mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-06-27 17:01:10 -04:00
ramulator memory addressing bug fix + platform memory refactoring
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
This commit is contained in:
parent
e80ee2c819
commit
22398c991d
33 changed files with 310 additions and 281 deletions
|
@ -301,11 +301,11 @@ config2()
|
||||||
|
|
||||||
# test single-bank memory
|
# test single-bank memory
|
||||||
if [ "$XLEN" == "64" ]; then
|
if [ "$XLEN" == "64" ]; then
|
||||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=opae --app=mstress
|
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=xrt --app=mstress
|
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||||
else
|
else
|
||||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=opae --app=mstress
|
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=xrt --app=mstress
|
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# test larger memory address
|
# test larger memory address
|
||||||
|
@ -322,10 +322,10 @@ config2()
|
||||||
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress
|
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||||
|
|
||||||
# test memory ports
|
# test memory ports
|
||||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress
|
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
|
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
|
||||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
|
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
|
||||||
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=opae --app=mstress --threads=8
|
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=opae --app=mstress --threads=8
|
||||||
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=xrt --app=mstress --threads=8
|
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=xrt --app=mstress --threads=8
|
||||||
|
|
||||||
|
|
|
@ -172,8 +172,26 @@
|
||||||
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
|
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
`ifndef PLATFORM_MEMORY_BANKS
|
// Platform memory parameters
|
||||||
`define PLATFORM_MEMORY_BANKS 2
|
|
||||||
|
`ifndef PLATFORM_MEMORY_NUM_BANKS
|
||||||
|
`define PLATFORM_MEMORY_NUM_BANKS 2
|
||||||
|
`endif
|
||||||
|
|
||||||
|
`ifndef PLATFORM_MEMORY_ADDR_WIDTH
|
||||||
|
`ifdef XLEN_64
|
||||||
|
`define PLATFORM_MEMORY_ADDR_WIDTH 48
|
||||||
|
`else
|
||||||
|
`define PLATFORM_MEMORY_ADDR_WIDTH 32
|
||||||
|
`endif
|
||||||
|
`endif
|
||||||
|
|
||||||
|
`ifndef PLATFORM_MEMORY_DATA_SIZE
|
||||||
|
`define PLATFORM_MEMORY_DATA_SIZE 64
|
||||||
|
`endif
|
||||||
|
|
||||||
|
`ifndef PLATFORM_MEMORY_INTERLEAVE
|
||||||
|
`define PLATFORM_MEMORY_INTERLEAVE 1
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
`ifdef XLEN_64
|
`ifdef XLEN_64
|
||||||
|
@ -656,9 +674,9 @@
|
||||||
// Number of Memory Ports
|
// Number of Memory Ports
|
||||||
`ifndef L1_MEM_PORTS
|
`ifndef L1_MEM_PORTS
|
||||||
`ifdef L1_DISABLE
|
`ifdef L1_DISABLE
|
||||||
`define L1_MEM_PORTS `MIN(DCACHE_NUM_REQS, `PLATFORM_MEMORY_BANKS)
|
`define L1_MEM_PORTS `MIN(DCACHE_NUM_REQS, `PLATFORM_MEMORY_NUM_BANKS)
|
||||||
`else
|
`else
|
||||||
`define L1_MEM_PORTS `MIN(`DCACHE_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
|
`define L1_MEM_PORTS `MIN(`DCACHE_NUM_BANKS, `PLATFORM_MEMORY_NUM_BANKS)
|
||||||
`endif
|
`endif
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
|
@ -735,9 +753,9 @@
|
||||||
// Number of Memory Ports
|
// Number of Memory Ports
|
||||||
`ifndef L2_MEM_PORTS
|
`ifndef L2_MEM_PORTS
|
||||||
`ifdef L2_ENABLE
|
`ifdef L2_ENABLE
|
||||||
`define L2_MEM_PORTS `MIN(`L2_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
|
`define L2_MEM_PORTS `MIN(`L2_NUM_BANKS, `PLATFORM_MEMORY_NUM_BANKS)
|
||||||
`else
|
`else
|
||||||
`define L2_MEM_PORTS `MIN(L2_NUM_REQS, `PLATFORM_MEMORY_BANKS)
|
`define L2_MEM_PORTS `MIN(L2_NUM_REQS, `PLATFORM_MEMORY_NUM_BANKS)
|
||||||
`endif
|
`endif
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
|
@ -796,9 +814,9 @@
|
||||||
// Number of Memory Ports
|
// Number of Memory Ports
|
||||||
`ifndef L3_MEM_PORTS
|
`ifndef L3_MEM_PORTS
|
||||||
`ifdef L3_ENABLE
|
`ifdef L3_ENABLE
|
||||||
`define L3_MEM_PORTS `MIN(`L3_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
|
`define L3_MEM_PORTS `MIN(`L3_NUM_BANKS, `PLATFORM_MEMORY_NUM_BANKS)
|
||||||
`else
|
`else
|
||||||
`define L3_MEM_PORTS `MIN(L3_NUM_REQS, `PLATFORM_MEMORY_BANKS)
|
`define L3_MEM_PORTS `MIN(L3_NUM_REQS, `PLATFORM_MEMORY_NUM_BANKS)
|
||||||
`endif
|
`endif
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
|
|
|
@ -193,7 +193,7 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
||||||
.TAG_WIDTH_OUT (AXI_TID_WIDTH),
|
.TAG_WIDTH_OUT (AXI_TID_WIDTH),
|
||||||
.NUM_PORTS_IN (`VX_MEM_PORTS),
|
.NUM_PORTS_IN (`VX_MEM_PORTS),
|
||||||
.NUM_BANKS_OUT (AXI_NUM_BANKS),
|
.NUM_BANKS_OUT (AXI_NUM_BANKS),
|
||||||
.INTERLEAVE (0),
|
.INTERLEAVE (`PLATFORM_MEMORY_INTERLEAVE),
|
||||||
.REQ_OUT_BUF ((`VX_MEM_PORTS > 1) ? 2 : 0),
|
.REQ_OUT_BUF ((`VX_MEM_PORTS > 1) ? 2 : 0),
|
||||||
.RSP_OUT_BUF ((`VX_MEM_PORTS > 1 || AXI_NUM_BANKS > 1) ? 2 : 0)
|
.RSP_OUT_BUF ((`VX_MEM_PORTS > 1 || AXI_NUM_BANKS > 1) ? 2 : 0)
|
||||||
) axi_adapter (
|
) axi_adapter (
|
||||||
|
|
|
@ -28,18 +28,18 @@
|
||||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
// POSSIBILITY OF SUCH DAMAGE.
|
// POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
//`include "platform_afu_top_config.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH
|
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH
|
||||||
`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH (`PLATFORM_MEMORY_ADDR_WIDTH - $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8))
|
`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH ((`PLATFORM_MEMORY_ADDR_WIDTH - $clog2(`PLATFORM_MEMORY_NUM_BANKS)) - $clog2(`PLATFORM_MEMORY_DATA_SIZE))
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH
|
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH
|
||||||
`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH `PLATFORM_MEMORY_DATA_WIDTH
|
`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH (`PLATFORM_MEMORY_DATA_SIZE * 8)
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH
|
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH
|
||||||
`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH `PLATFORM_MEMORY_BURST_CNT_WIDTH
|
`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH 4
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
package local_mem_cfg_pkg;
|
package local_mem_cfg_pkg;
|
||||||
|
|
|
@ -11,18 +11,14 @@
|
||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
`ifndef NOPAE
|
`ifndef NOPAE
|
||||||
`include "afu_json_info.vh"
|
`include "afu_json_info.vh"
|
||||||
`else
|
`else
|
||||||
`include "vortex_afu.vh"
|
`include "vortex_afu.vh"
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
`include "VX_define.vh"
|
|
||||||
|
|
||||||
`ifndef PLATFORM_MEMORY_INTERLEAVE
|
|
||||||
`define PLATFORM_MEMORY_INTERLEAVE 1
|
|
||||||
`endif
|
|
||||||
|
|
||||||
module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_gpu_pkg::*; #(
|
module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_gpu_pkg::*; #(
|
||||||
parameter NUM_LOCAL_MEM_BANKS = 2
|
parameter NUM_LOCAL_MEM_BANKS = 2
|
||||||
) (
|
) (
|
||||||
|
|
|
@ -134,10 +134,12 @@ module VX_afu_ctrl #(
|
||||||
RSTATE_RESP = 2'd2,
|
RSTATE_RESP = 2'd2,
|
||||||
RSTATE_WIDTH = 2;
|
RSTATE_WIDTH = 2;
|
||||||
|
|
||||||
|
localparam MEMORY_BANK_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH - `CLOG2(`PLATFORM_MEMORY_NUM_BANKS);
|
||||||
|
|
||||||
// device caps
|
// device caps
|
||||||
wire [63:0] dev_caps = {8'b0,
|
wire [63:0] dev_caps = {8'b0,
|
||||||
5'(`PLATFORM_MEMORY_ADDR_WIDTH-20),
|
5'(MEMORY_BANK_ADDR_WIDTH-20),
|
||||||
3'(`CLOG2(`PLATFORM_MEMORY_BANKS)),
|
3'(`CLOG2(`PLATFORM_MEMORY_NUM_BANKS)),
|
||||||
8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0),
|
8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0),
|
||||||
16'(`NUM_CORES * `NUM_CLUSTERS),
|
16'(`NUM_CORES * `NUM_CLUSTERS),
|
||||||
8'(`NUM_WARPS),
|
8'(`NUM_WARPS),
|
||||||
|
|
|
@ -31,7 +31,7 @@ module VX_afu_wrap #(
|
||||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||||
`REPEAT (1, GEN_AXI_MEM, REPEAT_COMMA),
|
`REPEAT (1, GEN_AXI_MEM, REPEAT_COMMA),
|
||||||
`else
|
`else
|
||||||
`REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
`REPEAT (`PLATFORM_MEMORY_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||||
`endif
|
`endif
|
||||||
// AXI4-Lite slave interface
|
// AXI4-Lite slave interface
|
||||||
input wire s_axi_ctrl_awvalid,
|
input wire s_axi_ctrl_awvalid,
|
||||||
|
@ -58,11 +58,7 @@ module VX_afu_wrap #(
|
||||||
|
|
||||||
output wire interrupt
|
output wire interrupt
|
||||||
);
|
);
|
||||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH - $clog2(C_M_AXI_MEM_NUM_BANKS);
|
||||||
localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_BANKS);
|
|
||||||
`else
|
|
||||||
localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH;
|
|
||||||
`endif
|
|
||||||
|
|
||||||
typedef enum logic [1:0] {
|
typedef enum logic [1:0] {
|
||||||
STATE_IDLE = 0,
|
STATE_IDLE = 0,
|
||||||
|
@ -71,8 +67,8 @@ module VX_afu_wrap #(
|
||||||
STATE_DONE = 3
|
STATE_DONE = 3
|
||||||
} state_e;
|
} state_e;
|
||||||
|
|
||||||
localparam PENDING_SIZEW = 12; // max outstanding requests size
|
localparam PENDING_WR_SIZEW = 12; // max outstanding requests size
|
||||||
localparam C_M_AXI_MEM_NUM_BANKS_SW = `CLOG2(C_M_AXI_MEM_NUM_BANKS+1);
|
localparam NUM_MEM_BANKS_SIZEW = `CLOG2(C_M_AXI_MEM_NUM_BANKS+1);
|
||||||
|
|
||||||
wire m_axi_mem_awvalid_a [C_M_AXI_MEM_NUM_BANKS];
|
wire m_axi_mem_awvalid_a [C_M_AXI_MEM_NUM_BANKS];
|
||||||
wire m_axi_mem_awready_a [C_M_AXI_MEM_NUM_BANKS];
|
wire m_axi_mem_awready_a [C_M_AXI_MEM_NUM_BANKS];
|
||||||
|
@ -108,11 +104,11 @@ module VX_afu_wrap #(
|
||||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||||
`REPEAT (1, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
|
`REPEAT (1, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
|
||||||
`else
|
`else
|
||||||
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
|
`REPEAT (`PLATFORM_MEMORY_NUM_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON);
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr;
|
reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr;
|
||||||
reg [PENDING_SIZEW-1:0] vx_pending_writes;
|
reg [PENDING_WR_SIZEW-1:0] vx_pending_writes;
|
||||||
reg vx_reset = 1; // asserted at initialization
|
reg vx_reset = 1; // asserted at initialization
|
||||||
wire vx_busy;
|
wire vx_busy;
|
||||||
|
|
||||||
|
@ -200,7 +196,7 @@ module VX_afu_wrap #(
|
||||||
end
|
end
|
||||||
|
|
||||||
wire [C_M_AXI_MEM_NUM_BANKS-1:0] m_axi_wr_req_fire, m_axi_wr_rsp_fire;
|
wire [C_M_AXI_MEM_NUM_BANKS-1:0] m_axi_wr_req_fire, m_axi_wr_rsp_fire;
|
||||||
wire [C_M_AXI_MEM_NUM_BANKS_SW-1:0] cur_wr_reqs, cur_wr_rsps;
|
wire [NUM_MEM_BANKS_SIZEW-1:0] cur_wr_reqs, cur_wr_rsps;
|
||||||
|
|
||||||
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_m_axi_wr_req_fire
|
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_m_axi_wr_req_fire
|
||||||
VX_axi_write_ack axi_write_ack (
|
VX_axi_write_ack axi_write_ack (
|
||||||
|
@ -224,14 +220,14 @@ module VX_afu_wrap #(
|
||||||
`POP_COUNT(cur_wr_reqs, m_axi_wr_req_fire);
|
`POP_COUNT(cur_wr_reqs, m_axi_wr_req_fire);
|
||||||
`POP_COUNT(cur_wr_rsps, m_axi_wr_rsp_fire);
|
`POP_COUNT(cur_wr_rsps, m_axi_wr_rsp_fire);
|
||||||
|
|
||||||
wire signed [C_M_AXI_MEM_NUM_BANKS_SW:0] reqs_sub = (C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_reqs) -
|
wire signed [NUM_MEM_BANKS_SIZEW:0] reqs_sub = (NUM_MEM_BANKS_SIZEW+1)'(cur_wr_reqs) -
|
||||||
(C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_rsps);
|
(NUM_MEM_BANKS_SIZEW+1)'(cur_wr_rsps);
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
vx_pending_writes <= '0;
|
vx_pending_writes <= '0;
|
||||||
end else begin
|
end else begin
|
||||||
vx_pending_writes <= vx_pending_writes + PENDING_SIZEW'(reqs_sub);
|
vx_pending_writes <= vx_pending_writes + PENDING_WR_SIZEW'(reqs_sub);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -270,7 +266,7 @@ module VX_afu_wrap #(
|
||||||
.ap_ready (ap_ready),
|
.ap_ready (ap_ready),
|
||||||
.ap_idle (ap_idle),
|
.ap_idle (ap_idle),
|
||||||
.interrupt (interrupt),
|
.interrupt (interrupt),
|
||||||
|
|
||||||
.ap_ctrl_read (ap_ctrl_read),
|
.ap_ctrl_read (ap_ctrl_read),
|
||||||
|
|
||||||
`ifdef SCOPE
|
`ifdef SCOPE
|
||||||
|
|
|
@ -17,12 +17,12 @@ module vortex_afu #(
|
||||||
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
|
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
|
||||||
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
|
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
|
||||||
parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH,
|
parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH,
|
||||||
parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH,
|
parameter C_M_AXI_MEM_DATA_WIDTH = (`PLATFORM_MEMORY_DATA_SIZE * 8),
|
||||||
parameter C_M_AXI_MEM_ADDR_WIDTH = 64,
|
parameter C_M_AXI_MEM_ADDR_WIDTH = 64,
|
||||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||||
parameter C_M_AXI_MEM_NUM_BANKS = 1
|
parameter C_M_AXI_MEM_NUM_BANKS = 1
|
||||||
`else
|
`else
|
||||||
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS
|
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_NUM_BANKS
|
||||||
`endif
|
`endif
|
||||||
) (
|
) (
|
||||||
// System signals
|
// System signals
|
||||||
|
@ -33,7 +33,7 @@ module vortex_afu #(
|
||||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||||
`REPEAT (1, GEN_AXI_MEM, REPEAT_COMMA),
|
`REPEAT (1, GEN_AXI_MEM, REPEAT_COMMA),
|
||||||
`else
|
`else
|
||||||
`REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
`REPEAT (`PLATFORM_MEMORY_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// AXI4-Lite slave interface
|
// AXI4-Lite slave interface
|
||||||
|
@ -75,7 +75,7 @@ module vortex_afu #(
|
||||||
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
`ifdef PLATFORM_MERGED_MEMORY_INTERFACE
|
||||||
`REPEAT (1, AXI_MEM_ARGS, REPEAT_COMMA),
|
`REPEAT (1, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||||
`else
|
`else
|
||||||
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
|
`REPEAT (`PLATFORM_MEMORY_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||||
`endif
|
`endif
|
||||||
.s_axi_ctrl_awvalid (s_axi_ctrl_awvalid),
|
.s_axi_ctrl_awvalid (s_axi_ctrl_awvalid),
|
||||||
.s_axi_ctrl_awready (s_axi_ctrl_awready),
|
.s_axi_ctrl_awready (s_axi_ctrl_awready),
|
||||||
|
@ -94,7 +94,7 @@ module vortex_afu #(
|
||||||
.s_axi_ctrl_rready (s_axi_ctrl_rready),
|
.s_axi_ctrl_rready (s_axi_ctrl_rready),
|
||||||
.s_axi_ctrl_rdata (s_axi_ctrl_rdata),
|
.s_axi_ctrl_rdata (s_axi_ctrl_rdata),
|
||||||
.s_axi_ctrl_rresp (s_axi_ctrl_rresp),
|
.s_axi_ctrl_rresp (s_axi_ctrl_rresp),
|
||||||
|
|
||||||
.s_axi_ctrl_bvalid (s_axi_ctrl_bvalid),
|
.s_axi_ctrl_bvalid (s_axi_ctrl_bvalid),
|
||||||
.s_axi_ctrl_bready (s_axi_ctrl_bready),
|
.s_axi_ctrl_bready (s_axi_ctrl_bready),
|
||||||
.s_axi_ctrl_bresp (s_axi_ctrl_bresp),
|
.s_axi_ctrl_bresp (s_axi_ctrl_bresp),
|
||||||
|
|
|
@ -14,18 +14,6 @@
|
||||||
`ifndef VORTEX_AFU_VH
|
`ifndef VORTEX_AFU_VH
|
||||||
`define VORTEX_AFU_VH
|
`define VORTEX_AFU_VH
|
||||||
|
|
||||||
`ifndef PLATFORM_MEMORY_BANKS
|
|
||||||
`define PLATFORM_MEMORY_BANKS 2
|
|
||||||
`endif
|
|
||||||
|
|
||||||
`ifndef PLATFORM_MEMORY_ADDR_WIDTH
|
|
||||||
`define PLATFORM_MEMORY_ADDR_WIDTH 31
|
|
||||||
`endif
|
|
||||||
|
|
||||||
`ifndef PLATFORM_MEMORY_DATA_WIDTH
|
|
||||||
`define PLATFORM_MEMORY_DATA_WIDTH 512
|
|
||||||
`endif
|
|
||||||
|
|
||||||
`ifndef PLATFORM_MEMORY_OFFSET
|
`ifndef PLATFORM_MEMORY_OFFSET
|
||||||
`define PLATFORM_MEMORY_OFFSET 0
|
`define PLATFORM_MEMORY_OFFSET 0
|
||||||
`endif
|
`endif
|
||||||
|
|
|
@ -221,7 +221,7 @@ module VX_async_ram_patch #(
|
||||||
VX_placeholder #(
|
VX_placeholder #(
|
||||||
.O (1)
|
.O (1)
|
||||||
) placeholder2 (
|
) placeholder2 (
|
||||||
.in (),
|
.in (1'b0),
|
||||||
.out (is_raddr_reg)
|
.out (is_raddr_reg)
|
||||||
);
|
);
|
||||||
wire [DATAW-1:0] rdata_a;
|
wire [DATAW-1:0] rdata_a;
|
||||||
|
|
|
@ -280,7 +280,13 @@ module VX_axi_adapter #(
|
||||||
end
|
end
|
||||||
|
|
||||||
assign m_axi_arvalid[i] = req_xbar_valid_out[i] && ~xbar_rw_out;
|
assign m_axi_arvalid[i] = req_xbar_valid_out[i] && ~xbar_rw_out;
|
||||||
assign m_axi_araddr[i] = ADDR_WIDTH_OUT'(xbar_addr_out) << LOG2_DATA_SIZE;
|
|
||||||
|
// convert address to byte-addressable space
|
||||||
|
if (INTERLEAVE) begin : g_m_axi_araddr_i
|
||||||
|
assign m_axi_araddr[i] = (ADDR_WIDTH_OUT'(xbar_addr_out) << (BANK_SEL_BITS + LOG2_DATA_SIZE)) | (ADDR_WIDTH_OUT'(i) << LOG2_DATA_SIZE);
|
||||||
|
end else begin : g_m_axi_araddr_ni
|
||||||
|
assign m_axi_araddr[i] = (ADDR_WIDTH_OUT'(xbar_addr_out) << LOG2_DATA_SIZE) | (ADDR_WIDTH_OUT'(i) << (BANK_ADDR_WIDTH + LOG2_DATA_SIZE));
|
||||||
|
end
|
||||||
assign m_axi_arid[i] = TAG_WIDTH_OUT'(xbar_tag_r_out);
|
assign m_axi_arid[i] = TAG_WIDTH_OUT'(xbar_tag_r_out);
|
||||||
assign m_axi_arlen[i] = 8'b00000000;
|
assign m_axi_arlen[i] = 8'b00000000;
|
||||||
assign m_axi_arsize[i] = 3'(LOG2_DATA_SIZE);
|
assign m_axi_arsize[i] = 3'(LOG2_DATA_SIZE);
|
||||||
|
|
|
@ -7,22 +7,6 @@ include ../../common.mk
|
||||||
# AFU parameters
|
# AFU parameters
|
||||||
CONFIGS += -DNOPAE
|
CONFIGS += -DNOPAE
|
||||||
CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY
|
CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY
|
||||||
ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=2
|
|
||||||
endif
|
|
||||||
ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
|
|
||||||
ifeq ($(XLEN),64)
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47
|
|
||||||
else
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
|
|
||||||
endif
|
|
||||||
ifeq (,$(findstring PLATFORM_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4
|
|
||||||
endif
|
|
||||||
|
|
||||||
#CONFIGS += -DNUM_CORES=2
|
#CONFIGS += -DNUM_CORES=2
|
||||||
#CONFIGS += -DNUM_WARPS=32
|
#CONFIGS += -DNUM_WARPS=32
|
||||||
|
|
|
@ -99,7 +99,7 @@ ifdef PERF
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# ast dump flags
|
# ast dump flags
|
||||||
XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32 -DPLATFORM_MEMORY_DATA_WIDTH=512 -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI
|
XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_MEMORY_NUM_BANKS=1 -DNOPAE -DSV_DPI
|
||||||
|
|
||||||
all: swconfig ip-gen setup build
|
all: swconfig ip-gen setup build
|
||||||
|
|
||||||
|
|
|
@ -52,7 +52,7 @@ foreach def $vdefines_list {
|
||||||
if { $name == "CHIPSCOPE" } {
|
if { $name == "CHIPSCOPE" } {
|
||||||
set chipscope 1
|
set chipscope 1
|
||||||
}
|
}
|
||||||
if { $name == "PLATFORM_MEMORY_BANKS" } {
|
if { $name == "PLATFORM_MEMORY_NUM_BANKS" } {
|
||||||
set num_banks [lindex $fields 1]
|
set num_banks [lindex $fields 1]
|
||||||
}
|
}
|
||||||
if { $name == "PLATFORM_MERGED_MEMORY_INTERFACE" } {
|
if { $name == "PLATFORM_MERGED_MEMORY_INTERFACE" } {
|
||||||
|
|
|
@ -5,31 +5,36 @@ CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
|
||||||
|
|
||||||
ifeq ($(DEV_ARCH), zynquplus)
|
ifeq ($(DEV_ARCH), zynquplus)
|
||||||
# zynquplus
|
# zynquplus
|
||||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32
|
CONFIGS += -DPLATFORM_MEMORY_NUM_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32
|
||||||
else ifeq ($(DEV_ARCH), versal)
|
else ifeq ($(DEV_ARCH), versal)
|
||||||
# versal
|
# versal
|
||||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32
|
CONFIGS += -DPLATFORM_MEMORY_NUM_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32
|
||||||
ifneq ($(findstring xilinx_vck5000,$(XSA)),)
|
ifneq ($(findstring xilinx_vck5000,$(XSA)),)
|
||||||
CONFIGS += -DPLATFORM_MEMORY_OFFSET=40'hC000000000
|
CONFIGS += -DPLATFORM_MEMORY_OFFSET=40'hC000000000
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
# alveo
|
# alveo
|
||||||
ifneq ($(findstring xilinx_u55c,$(XSA)),)
|
ifneq ($(findstring xilinx_u55c,$(XSA)),)
|
||||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=29
|
# 16 GB of HBM2 with 32 channels (512 MB per channel)
|
||||||
|
CONFIGS += -DPLATFORM_MEMORY_NUM_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=34
|
||||||
CONFIGS += -DPLATFORM_MERGED_MEMORY_INTERFACE
|
CONFIGS += -DPLATFORM_MERGED_MEMORY_INTERFACE
|
||||||
VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31]
|
VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31]
|
||||||
#VPP_FLAGS += $(foreach i,$(shell seq 0 31), --connectivity.sp vortex_afu_1.m_axi_mem_$(i):HBM[$(i)])
|
#VPP_FLAGS += $(foreach i,$(shell seq 0 31), --connectivity.sp vortex_afu_1.m_axi_mem_$(i):HBM[$(i)])
|
||||||
else ifneq ($(findstring xilinx_u50,$(XSA)),)
|
else ifneq ($(findstring xilinx_u50,$(XSA)),)
|
||||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=28
|
# 8 GB of HBM2 with 32 channels (256 MB per channel)
|
||||||
|
CONFIGS += -DPLATFORM_MEMORY_NUM_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=33
|
||||||
VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31]
|
VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31]
|
||||||
else ifneq ($(findstring xilinx_u280,$(XSA)),)
|
else ifneq ($(findstring xilinx_u280,$(XSA)),)
|
||||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=28
|
# 8 GB of HBM2 with 32 channels (256 MB per channel)
|
||||||
|
CONFIGS += -DPLATFORM_MEMORY_NUM_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=33
|
||||||
VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31]
|
VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31]
|
||||||
else ifneq ($(findstring xilinx_u250,$(XSA)),)
|
else ifneq ($(findstring xilinx_u250,$(XSA)),)
|
||||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=34
|
# 64 GB of DDR4 with 4 channels (16 GB per channel)
|
||||||
|
CONFIGS += -DPLATFORM_MEMORY_NUM_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=36
|
||||||
else ifneq ($(findstring xilinx_u200,$(XSA)),)
|
else ifneq ($(findstring xilinx_u200,$(XSA)),)
|
||||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=34
|
# 64 GB of DDR4 with 4 channels (16 GB per channel)
|
||||||
|
CONFIGS += -DPLATFORM_MEMORY_NUM_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=36
|
||||||
else
|
else
|
||||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32
|
CONFIGS += -DPLATFORM_MEMORY_NUM_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -78,10 +78,10 @@ public:
|
||||||
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
|
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
|
||||||
break;
|
break;
|
||||||
case VX_CAPS_NUM_MEM_BANKS:
|
case VX_CAPS_NUM_MEM_BANKS:
|
||||||
_value = PLATFORM_MEMORY_BANKS;
|
_value = PLATFORM_MEMORY_NUM_BANKS;
|
||||||
break;
|
break;
|
||||||
case VX_CAPS_MEM_BANK_SIZE:
|
case VX_CAPS_MEM_BANK_SIZE:
|
||||||
_value = 1ull << (MEM_ADDR_WIDTH / PLATFORM_MEMORY_BANKS);
|
_value = 1ull << (MEM_ADDR_WIDTH / PLATFORM_MEMORY_NUM_BANKS);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||||
|
|
|
@ -113,10 +113,10 @@ public:
|
||||||
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
|
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
|
||||||
break;
|
break;
|
||||||
case VX_CAPS_NUM_MEM_BANKS:
|
case VX_CAPS_NUM_MEM_BANKS:
|
||||||
_value = PLATFORM_MEMORY_BANKS;
|
_value = PLATFORM_MEMORY_NUM_BANKS;
|
||||||
break;
|
break;
|
||||||
case VX_CAPS_MEM_BANK_SIZE:
|
case VX_CAPS_MEM_BANK_SIZE:
|
||||||
_value = 1ull << (MEM_ADDR_WIDTH / PLATFORM_MEMORY_BANKS);
|
_value = 1ull << (MEM_ADDR_WIDTH / PLATFORM_MEMORY_NUM_BANKS);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||||
|
|
|
@ -29,19 +29,54 @@ using namespace vortex;
|
||||||
|
|
||||||
class DramSim::Impl {
|
class DramSim::Impl {
|
||||||
private:
|
private:
|
||||||
|
struct mem_req_t {
|
||||||
|
uint64_t addr;
|
||||||
|
bool is_write;
|
||||||
|
ResponseCallback callback;
|
||||||
|
void* arg;
|
||||||
|
};
|
||||||
|
|
||||||
Ramulator::IFrontEnd* ramulator_frontend_;
|
Ramulator::IFrontEnd* ramulator_frontend_;
|
||||||
Ramulator::IMemorySystem* ramulator_memorysystem_;
|
Ramulator::IMemorySystem* ramulator_memorysystem_;
|
||||||
|
uint32_t cpu_channel_size_;
|
||||||
|
uint64_t cpu_cycles_;
|
||||||
|
uint32_t scaled_dram_cycles_;
|
||||||
|
static const uint32_t tick_cycles_ = 1000;
|
||||||
|
static const uint32_t dram_channel_size_ = 16; // 128 bits
|
||||||
|
std::queue<mem_req_t> pending_reqs_;
|
||||||
|
|
||||||
|
void handle_pending_requests() {
|
||||||
|
if (pending_reqs_.empty())
|
||||||
|
return;
|
||||||
|
auto& req = pending_reqs_.front();
|
||||||
|
auto req_type = req.is_write ? Ramulator::Request::Type::Write : Ramulator::Request::Type::Read;
|
||||||
|
std::function<void(Ramulator::Request&)> callback = nullptr;
|
||||||
|
if (req.callback) {
|
||||||
|
callback = [req_callback = std::move(req.callback), req_arg = std::move(req.arg)](Ramulator::Request& /*dram_req*/) {
|
||||||
|
req_callback(req_arg);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (ramulator_frontend_->receive_external_requests(req_type, req.addr, 0, callback)) {
|
||||||
|
if (req.is_write) {
|
||||||
|
// Ramulator does not handle write responses, so we fire the callback ourselves.
|
||||||
|
if (req.callback) {
|
||||||
|
req.callback(req.arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pending_reqs_.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Impl(int clock_ratio) {
|
Impl(uint32_t num_channels, uint32_t channel_size, float clock_ratio) {
|
||||||
YAML::Node dram_config;
|
YAML::Node dram_config;
|
||||||
dram_config["Frontend"]["impl"] = "GEM5";
|
dram_config["Frontend"]["impl"] = "GEM5";
|
||||||
dram_config["MemorySystem"]["impl"] = "GenericDRAM";
|
dram_config["MemorySystem"]["impl"] = "GenericDRAM";
|
||||||
dram_config["MemorySystem"]["clock_ratio"] = clock_ratio;
|
dram_config["MemorySystem"]["clock_ratio"] = 1;
|
||||||
dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2";
|
dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2";
|
||||||
dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb";
|
dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb";
|
||||||
dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192;
|
dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192;
|
||||||
dram_config["MemorySystem"]["DRAM"]["org"]["channel"] = 8;
|
dram_config["MemorySystem"]["DRAM"]["org"]["channel"] = num_channels;
|
||||||
dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps";
|
dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps";
|
||||||
dram_config["MemorySystem"]["Controller"]["impl"] = "Generic";
|
dram_config["MemorySystem"]["Controller"]["impl"] = "Generic";
|
||||||
dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS";
|
dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS";
|
||||||
|
@ -59,6 +94,10 @@ public:
|
||||||
ramulator_memorysystem_ = Ramulator::Factory::create_memory_system(dram_config);
|
ramulator_memorysystem_ = Ramulator::Factory::create_memory_system(dram_config);
|
||||||
ramulator_frontend_->connect_memory_system(ramulator_memorysystem_);
|
ramulator_frontend_->connect_memory_system(ramulator_memorysystem_);
|
||||||
ramulator_memorysystem_->connect_frontend(ramulator_frontend_);
|
ramulator_memorysystem_->connect_frontend(ramulator_frontend_);
|
||||||
|
|
||||||
|
cpu_channel_size_ = channel_size;
|
||||||
|
scaled_dram_cycles_ = static_cast<uint64_t>(clock_ratio * tick_cycles_);
|
||||||
|
this->reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
~Impl() {
|
~Impl() {
|
||||||
|
@ -66,41 +105,49 @@ public:
|
||||||
auto original_buf = std::cout.rdbuf();
|
auto original_buf = std::cout.rdbuf();
|
||||||
std::cout.rdbuf(nullstream.rdbuf());
|
std::cout.rdbuf(nullstream.rdbuf());
|
||||||
ramulator_frontend_->finalize();
|
ramulator_frontend_->finalize();
|
||||||
ramulator_memorysystem_->finalize();
|
ramulator_memorysystem_->finalize();
|
||||||
std::cout.rdbuf(original_buf);
|
std::cout.rdbuf(original_buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
void reset() {
|
void reset() {
|
||||||
//--
|
cpu_cycles_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void tick() {
|
void tick() {
|
||||||
ramulator_memorysystem_->tick();
|
cpu_cycles_ += tick_cycles_;
|
||||||
|
while (cpu_cycles_ >= scaled_dram_cycles_) {
|
||||||
|
this->handle_pending_requests();
|
||||||
|
ramulator_memorysystem_->tick();
|
||||||
|
cpu_cycles_ -= scaled_dram_cycles_;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback response_cb, void* arg) {
|
void send_request(uint64_t addr, bool is_write, ResponseCallback response_cb, void* arg) {
|
||||||
if (!ramulator_frontend_->receive_external_requests(
|
// enqueue the request
|
||||||
is_write ? Ramulator::Request::Type::Write : Ramulator::Request::Type::Read,
|
if (cpu_channel_size_ > dram_channel_size_) {
|
||||||
addr,
|
uint32_t n = cpu_channel_size_ / dram_channel_size_;
|
||||||
source_id,
|
for (uint32_t i = 0; i < n; ++i) {
|
||||||
[callback_ = std::move(response_cb), arg_ = std::move(arg)](Ramulator::Request& /*dram_req*/) {
|
uint64_t dram_byte_addr = (addr / cpu_channel_size_) * dram_channel_size_ + (i * dram_channel_size_);
|
||||||
callback_(arg_);
|
if (i == 0) {
|
||||||
|
pending_reqs_.push({dram_byte_addr, is_write, response_cb, arg});
|
||||||
|
} else {
|
||||||
|
pending_reqs_.push({dram_byte_addr, is_write, nullptr, nullptr});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
)) {
|
} else if (cpu_channel_size_ < dram_channel_size_) {
|
||||||
return false;
|
uint64_t dram_byte_addr = (addr / cpu_channel_size_) * dram_channel_size_;
|
||||||
|
pending_reqs_.push({dram_byte_addr, is_write, response_cb, arg});
|
||||||
|
} else {
|
||||||
|
uint64_t dram_byte_addr = addr;
|
||||||
|
pending_reqs_.push({dram_byte_addr, is_write, response_cb, arg});
|
||||||
}
|
}
|
||||||
if (is_write) {
|
}
|
||||||
// Ramulator does not handle write responses, so we call the callback ourselves
|
|
||||||
response_cb(arg);
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
DramSim::DramSim(int clock_ratio)
|
DramSim::DramSim(uint32_t num_channels, uint32_t channel_size, float clock_ratio)
|
||||||
: impl_(new Impl(clock_ratio))
|
: impl_(new Impl(num_channels, channel_size, clock_ratio))
|
||||||
{}
|
{}
|
||||||
|
|
||||||
DramSim::~DramSim() {
|
DramSim::~DramSim() {
|
||||||
|
@ -115,6 +162,6 @@ void DramSim::tick() {
|
||||||
impl_->tick();
|
impl_->tick();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DramSim::send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback callback, void* arg) {
|
void DramSim::send_request(uint64_t addr, bool is_write, ResponseCallback callback, void* arg) {
|
||||||
return impl_->send_request(is_write, addr, source_id, callback, arg);
|
impl_->send_request(addr, is_write, callback, arg);
|
||||||
}
|
}
|
|
@ -19,14 +19,15 @@ class DramSim {
|
||||||
public:
|
public:
|
||||||
typedef void (*ResponseCallback)(void *arg);
|
typedef void (*ResponseCallback)(void *arg);
|
||||||
|
|
||||||
DramSim(int clock_ratio);
|
DramSim(uint32_t num_channels, uint32_t channel_size, float clock_ratio);
|
||||||
~DramSim();
|
~DramSim();
|
||||||
|
|
||||||
void reset();
|
void reset();
|
||||||
|
|
||||||
void tick();
|
void tick();
|
||||||
|
|
||||||
bool send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback response_cb, void* arg);
|
// addr: per-channel block address
|
||||||
|
void send_request(uint64_t addr, bool is_write, ResponseCallback response_cb, void* arg);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
class Impl;
|
class Impl;
|
||||||
|
|
|
@ -31,24 +31,6 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE
|
||||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
|
DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
|
||||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
|
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
|
||||||
|
|
||||||
# Platform parameters
|
|
||||||
ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=2
|
|
||||||
endif
|
|
||||||
ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
|
|
||||||
ifeq ($(XLEN),64)
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47
|
|
||||||
else
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
|
|
||||||
endif
|
|
||||||
ifeq (,$(findstring PLATFORM_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4
|
|
||||||
endif
|
|
||||||
|
|
||||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||||
|
|
||||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||||
|
|
|
@ -35,8 +35,6 @@
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <util.h>
|
#include <util.h>
|
||||||
|
|
||||||
#define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8)
|
|
||||||
|
|
||||||
#ifndef MEM_CLOCK_RATIO
|
#ifndef MEM_CLOCK_RATIO
|
||||||
#define MEM_CLOCK_RATIO 1
|
#define MEM_CLOCK_RATIO 1
|
||||||
#endif
|
#endif
|
||||||
|
@ -66,6 +64,8 @@
|
||||||
|
|
||||||
using namespace vortex;
|
using namespace vortex;
|
||||||
|
|
||||||
|
static uint32_t g_mem_bank_addr_width = (PLATFORM_MEMORY_ADDR_WIDTH - log2ceil(PLATFORM_MEMORY_NUM_BANKS));
|
||||||
|
|
||||||
static uint64_t timestamp = 0;
|
static uint64_t timestamp = 0;
|
||||||
|
|
||||||
double sc_time_stamp() {
|
double sc_time_stamp() {
|
||||||
|
@ -95,7 +95,7 @@ public:
|
||||||
Impl()
|
Impl()
|
||||||
: device_(nullptr)
|
: device_(nullptr)
|
||||||
, ram_(nullptr)
|
, ram_(nullptr)
|
||||||
, dram_sim_(MEM_CLOCK_RATIO)
|
, dram_sim_(PLATFORM_MEMORY_NUM_BANKS, PLATFORM_MEMORY_DATA_SIZE, MEM_CLOCK_RATIO)
|
||||||
, stop_(false)
|
, stop_(false)
|
||||||
, host_buffer_ids_(0)
|
, host_buffer_ids_(0)
|
||||||
#ifdef VCD_OUTPUT
|
#ifdef VCD_OUTPUT
|
||||||
|
@ -146,9 +146,6 @@ public:
|
||||||
// allocate RAM
|
// allocate RAM
|
||||||
ram_ = new RAM(0, RAM_PAGE_SIZE);
|
ram_ = new RAM(0, RAM_PAGE_SIZE);
|
||||||
|
|
||||||
// calculate memory bank size
|
|
||||||
mem_bank_size_ = 1ull << PLATFORM_MEMORY_ADDR_WIDTH;
|
|
||||||
|
|
||||||
// reset the device
|
// reset the device
|
||||||
this->reset();
|
this->reset();
|
||||||
|
|
||||||
|
@ -274,16 +271,15 @@ private:
|
||||||
|
|
||||||
if (!dram_queue_.empty()) {
|
if (!dram_queue_.empty()) {
|
||||||
auto mem_req = dram_queue_.front();
|
auto mem_req = dram_queue_.front();
|
||||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, mem_req->bank_id, [](void* arg) {
|
dram_sim_.send_request(mem_req->addr, mem_req->write, [](void* arg) {
|
||||||
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
||||||
if (orig_req->ready) {
|
if (orig_req->ready) {
|
||||||
delete orig_req;
|
delete orig_req;
|
||||||
} else {
|
} else {
|
||||||
orig_req->ready = true;
|
orig_req->ready = true;
|
||||||
}
|
}
|
||||||
}, mem_req)) {
|
}, mem_req);
|
||||||
dram_queue_.pop();
|
dram_queue_.pop();
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
dram_sim_.tick();
|
dram_sim_.tick();
|
||||||
|
@ -407,14 +403,14 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
void avs_bus_reset() {
|
void avs_bus_reset() {
|
||||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||||
device_->avs_readdatavalid[b] = 0;
|
device_->avs_readdatavalid[b] = 0;
|
||||||
device_->avs_waitrequest[b] = 0;
|
device_->avs_waitrequest[b] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void avs_bus_eval() {
|
void avs_bus_eval() {
|
||||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||||
// process memory responses
|
// process memory responses
|
||||||
device_->avs_readdatavalid[b] = 0;
|
device_->avs_readdatavalid[b] = 0;
|
||||||
if (!pending_mem_reqs_[b].empty()
|
if (!pending_mem_reqs_[b].empty()
|
||||||
|
@ -430,7 +426,12 @@ private:
|
||||||
|
|
||||||
// process memory requests
|
// process memory requests
|
||||||
assert(!device_->avs_read[b] || !device_->avs_write[b]);
|
assert(!device_->avs_read[b] || !device_->avs_write[b]);
|
||||||
uint64_t byte_addr = b * mem_bank_size_ + uint64_t(device_->avs_address[b]) * PLATFORM_MEMORY_DATA_SIZE;
|
#if PLATFORM_MEMORY_INTERLEAVE == 1
|
||||||
|
uint64_t byte_addr = (uint64_t(device_->avs_address[b]) * PLATFORM_MEMORY_NUM_BANKS + b) * PLATFORM_MEMORY_DATA_SIZE;
|
||||||
|
#else
|
||||||
|
uint64_t byte_addr = (uint64_t(device_->avs_address[b]) + (b << g_mem_bank_addr_width)) * PLATFORM_MEMORY_DATA_SIZE;
|
||||||
|
#endif
|
||||||
|
|
||||||
if (device_->avs_write[b]) {
|
if (device_->avs_write[b]) {
|
||||||
// process write request
|
// process write request
|
||||||
uint64_t byteen = device_->avs_byteenable[b];
|
uint64_t byteen = device_->avs_byteenable[b];
|
||||||
|
@ -515,9 +516,8 @@ private:
|
||||||
|
|
||||||
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
|
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
|
||||||
uint64_t host_buffer_ids_;
|
uint64_t host_buffer_ids_;
|
||||||
uint64_t mem_bank_size_;
|
|
||||||
|
|
||||||
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_BANKS];
|
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_NUM_BANKS];
|
||||||
|
|
||||||
std::list<cci_rd_req_t> cci_reads_;
|
std::list<cci_rd_req_t> cci_reads_;
|
||||||
std::list<cci_wr_req_t> cci_writes_;
|
std::list<cci_wr_req_t> cci_writes_;
|
||||||
|
|
|
@ -78,22 +78,22 @@ module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; (
|
||||||
output t_ccip_mmioData af2cp_sTxPort_c2_data,
|
output t_ccip_mmioData af2cp_sTxPort_c2_data,
|
||||||
|
|
||||||
// Avalon signals for local memory access
|
// Avalon signals for local memory access
|
||||||
output t_local_mem_data avs_writedata [`PLATFORM_MEMORY_BANKS],
|
output t_local_mem_data avs_writedata [`PLATFORM_MEMORY_NUM_BANKS],
|
||||||
input t_local_mem_data avs_readdata [`PLATFORM_MEMORY_BANKS],
|
input t_local_mem_data avs_readdata [`PLATFORM_MEMORY_NUM_BANKS],
|
||||||
output t_local_mem_addr avs_address [`PLATFORM_MEMORY_BANKS],
|
output t_local_mem_addr avs_address [`PLATFORM_MEMORY_NUM_BANKS],
|
||||||
input logic avs_waitrequest [`PLATFORM_MEMORY_BANKS],
|
input logic avs_waitrequest [`PLATFORM_MEMORY_NUM_BANKS],
|
||||||
output logic avs_write [`PLATFORM_MEMORY_BANKS],
|
output logic avs_write [`PLATFORM_MEMORY_NUM_BANKS],
|
||||||
output logic avs_read [`PLATFORM_MEMORY_BANKS],
|
output logic avs_read [`PLATFORM_MEMORY_NUM_BANKS],
|
||||||
output t_local_mem_byte_mask avs_byteenable [`PLATFORM_MEMORY_BANKS],
|
output t_local_mem_byte_mask avs_byteenable [`PLATFORM_MEMORY_NUM_BANKS],
|
||||||
output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_MEMORY_BANKS],
|
output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_MEMORY_NUM_BANKS],
|
||||||
input avs_readdatavalid [`PLATFORM_MEMORY_BANKS]
|
input avs_readdatavalid [`PLATFORM_MEMORY_NUM_BANKS]
|
||||||
);
|
);
|
||||||
|
|
||||||
t_if_ccip_Rx cp2af_sRxPort;
|
t_if_ccip_Rx cp2af_sRxPort;
|
||||||
t_if_ccip_Tx af2cp_sTxPort;
|
t_if_ccip_Tx af2cp_sTxPort;
|
||||||
|
|
||||||
vortex_afu #(
|
vortex_afu #(
|
||||||
.NUM_LOCAL_MEM_BANKS(`PLATFORM_MEMORY_BANKS)
|
.NUM_LOCAL_MEM_BANKS(`PLATFORM_MEMORY_NUM_BANKS)
|
||||||
) afu (
|
) afu (
|
||||||
.clk(clk),
|
.clk(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
|
|
|
@ -24,21 +24,6 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_AFU
|
||||||
DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
|
DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
|
||||||
DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR
|
DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR
|
||||||
|
|
||||||
# Platform parameters
|
|
||||||
ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=2
|
|
||||||
endif
|
|
||||||
ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
|
|
||||||
ifeq ($(XLEN),64)
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47
|
|
||||||
else
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
|
|
||||||
endif
|
|
||||||
|
|
||||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||||
|
|
||||||
RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
|
RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
|
||||||
|
|
|
@ -35,8 +35,6 @@
|
||||||
#include <dram_sim.h>
|
#include <dram_sim.h>
|
||||||
#include <util.h>
|
#include <util.h>
|
||||||
|
|
||||||
#define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8)
|
|
||||||
|
|
||||||
#ifndef MEM_CLOCK_RATIO
|
#ifndef MEM_CLOCK_RATIO
|
||||||
#define MEM_CLOCK_RATIO 1
|
#define MEM_CLOCK_RATIO 1
|
||||||
#endif
|
#endif
|
||||||
|
@ -66,6 +64,8 @@ typedef uint64_t Word;
|
||||||
|
|
||||||
using namespace vortex;
|
using namespace vortex;
|
||||||
|
|
||||||
|
static uint32_t g_mem_bank_addr_width = (PLATFORM_MEMORY_ADDR_WIDTH - log2ceil(PLATFORM_MEMORY_NUM_BANKS));
|
||||||
|
|
||||||
static uint64_t timestamp = 0;
|
static uint64_t timestamp = 0;
|
||||||
|
|
||||||
double sc_time_stamp() {
|
double sc_time_stamp() {
|
||||||
|
@ -93,7 +93,7 @@ void sim_trace_enable(bool enable) {
|
||||||
|
|
||||||
class Processor::Impl {
|
class Processor::Impl {
|
||||||
public:
|
public:
|
||||||
Impl() : dram_sim_(MEM_CLOCK_RATIO) {
|
Impl() : dram_sim_(PLATFORM_MEMORY_NUM_BANKS, PLATFORM_MEMORY_DATA_SIZE, MEM_CLOCK_RATIO) {
|
||||||
// force random values for uninitialized signals
|
// force random values for uninitialized signals
|
||||||
Verilated::randReset(VERILATOR_RESET_VALUE);
|
Verilated::randReset(VERILATOR_RESET_VALUE);
|
||||||
Verilated::randSeed(50);
|
Verilated::randSeed(50);
|
||||||
|
@ -154,7 +154,7 @@ public:
|
||||||
|
|
||||||
// start
|
// start
|
||||||
device_->reset = 0;
|
device_->reset = 0;
|
||||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||||
device_->mem_req_ready[b] = 1;
|
device_->mem_req_ready[b] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -195,7 +195,7 @@ private:
|
||||||
reqs.clear();
|
reqs.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||||
std::queue<mem_req_t*> empty;
|
std::queue<mem_req_t*> empty;
|
||||||
std::swap(dram_queue_[b], empty);
|
std::swap(dram_queue_[b], empty);
|
||||||
}
|
}
|
||||||
|
@ -224,17 +224,15 @@ private:
|
||||||
|
|
||||||
dram_sim_.tick();
|
dram_sim_.tick();
|
||||||
|
|
||||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||||
if (!dram_queue_[b].empty()) {
|
if (!dram_queue_[b].empty()) {
|
||||||
auto mem_req = dram_queue_[b].front();
|
auto mem_req = dram_queue_[b].front();
|
||||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, b, [](void* arg) {
|
dram_sim_.send_request(mem_req->addr, mem_req->write, [](void* arg) {
|
||||||
// mark completed request as ready
|
// mark completed request as ready
|
||||||
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
||||||
orig_req->ready = true;
|
orig_req->ready = true;
|
||||||
}, mem_req)) {
|
}, mem_req);
|
||||||
// was successfully sent to dram, remove from queue
|
dram_queue_[b].pop();
|
||||||
dram_queue_[b].pop();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -254,7 +252,7 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
void mem_bus_reset() {
|
void mem_bus_reset() {
|
||||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||||
device_->mem_req_ready[b] = 0;
|
device_->mem_req_ready[b] = 0;
|
||||||
device_->mem_rsp_valid[b] = 0;
|
device_->mem_rsp_valid[b] = 0;
|
||||||
}
|
}
|
||||||
|
@ -262,13 +260,13 @@ private:
|
||||||
|
|
||||||
void mem_bus_eval(bool clk) {
|
void mem_bus_eval(bool clk) {
|
||||||
if (!clk) {
|
if (!clk) {
|
||||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||||
mem_rd_rsp_ready_[b] = device_->mem_rsp_ready[b];
|
mem_rd_rsp_ready_[b] = device_->mem_rsp_ready[b];
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||||
// process memory responses
|
// process memory responses
|
||||||
if (device_->mem_rsp_valid[b] && mem_rd_rsp_ready_[b]) {
|
if (device_->mem_rsp_valid[b] && mem_rd_rsp_ready_[b]) {
|
||||||
device_->mem_rsp_valid[b] = 0;
|
device_->mem_rsp_valid[b] = 0;
|
||||||
|
@ -293,11 +291,16 @@ private:
|
||||||
|
|
||||||
// process memory requests
|
// process memory requests
|
||||||
if (device_->mem_req_valid[b] && device_->mem_req_ready[b]) {
|
if (device_->mem_req_valid[b] && device_->mem_req_ready[b]) {
|
||||||
uint64_t byte_addr = (device_->mem_req_addr[b] * PLATFORM_MEMORY_DATA_SIZE);
|
#if PLATFORM_MEMORY_INTERLEAVE == 1
|
||||||
|
uint64_t byte_addr = (uint64_t(device_->mem_req_addr[b]) * PLATFORM_MEMORY_NUM_BANKS + b) * PLATFORM_MEMORY_DATA_SIZE;
|
||||||
|
#else
|
||||||
|
uint64_t byte_addr = (uint64_t(device_->mem_req_addr[b]) + (b << g_mem_bank_addr_width)) * PLATFORM_MEMORY_DATA_SIZE;
|
||||||
|
#endif
|
||||||
|
// check read/write
|
||||||
if (device_->mem_req_rw[b]) {
|
if (device_->mem_req_rw[b]) {
|
||||||
auto byteen = device_->mem_req_byteen[b];
|
auto byteen = device_->mem_req_byteen[b];
|
||||||
auto data = VDataCast<uint8_t*, PLATFORM_MEMORY_DATA_SIZE>::get(device_->mem_req_data[b]);
|
auto data = VDataCast<uint8_t*, PLATFORM_MEMORY_DATA_SIZE>::get(device_->mem_req_data[b]);
|
||||||
// check address range
|
// check if console output address
|
||||||
if (byte_addr >= uint64_t(IO_COUT_ADDR)
|
if (byte_addr >= uint64_t(IO_COUT_ADDR)
|
||||||
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
|
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
|
||||||
// process console output
|
// process console output
|
||||||
|
@ -313,21 +316,23 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// process writes
|
// process memory writes
|
||||||
/*printf("%0ld: [sim] MEM Wr Req[%d]: addr=0x%0lx, tag=0x%0lx, byteen=0x", timestamp, b, byte_addr, device_->mem_req_tag[b]);
|
/*printf("%0ld: [sim] MEM Wr Req[%d]: addr=0x%0lx, tag=0x%0lx, byteen=0x", timestamp, b, byte_addr, device_->mem_req_tag[b]);
|
||||||
for (int i = (PLATFORM_MEMORY_DATA_SIZE/4)-1; i >= 0; --i) {
|
for (int i = (PLATFORM_MEMORY_DATA_SIZE/4)-1; i >= 0; --i) {
|
||||||
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
|
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
|
||||||
}
|
}
|
||||||
printf(", data=0x");
|
printf(", data=0x");
|
||||||
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
|
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
|
||||||
printf("%d=%02x,", i, data[i]);
|
printf("%02x", data[i]);
|
||||||
}
|
}
|
||||||
printf("\n");*/
|
printf("\n");*/
|
||||||
|
|
||||||
for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) {
|
for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) {
|
||||||
if ((byteen >> i) & 0x1) {
|
if ((byteen >> i) & 0x1) {
|
||||||
(*ram_)[byte_addr + i] = data[i];
|
(*ram_)[byte_addr + i] = data[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto mem_req = new mem_req_t();
|
auto mem_req = new mem_req_t();
|
||||||
mem_req->tag = device_->mem_req_tag[b];
|
mem_req->tag = device_->mem_req_tag[b];
|
||||||
mem_req->addr = byte_addr;
|
mem_req->addr = byte_addr;
|
||||||
|
@ -341,7 +346,7 @@ private:
|
||||||
pending_mem_reqs_[b].emplace_back(mem_req);
|
pending_mem_reqs_[b].emplace_back(mem_req);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// process reads
|
// process memory reads
|
||||||
auto mem_req = new mem_req_t();
|
auto mem_req = new mem_req_t();
|
||||||
mem_req->tag = device_->mem_req_tag[b];
|
mem_req->tag = device_->mem_req_tag[b];
|
||||||
mem_req->addr = byte_addr;
|
mem_req->addr = byte_addr;
|
||||||
|
@ -388,11 +393,11 @@ private:
|
||||||
|
|
||||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||||
|
|
||||||
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_BANKS];
|
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_NUM_BANKS];
|
||||||
|
|
||||||
std::queue<mem_req_t*> dram_queue_[PLATFORM_MEMORY_BANKS];
|
std::queue<mem_req_t*> dram_queue_[PLATFORM_MEMORY_NUM_BANKS];
|
||||||
|
|
||||||
std::array<bool, PLATFORM_MEMORY_BANKS> mem_rd_rsp_ready_;
|
std::array<bool, PLATFORM_MEMORY_NUM_BANKS> mem_rd_rsp_ready_;
|
||||||
|
|
||||||
DramSim dram_sim_;
|
DramSim dram_sim_;
|
||||||
|
|
||||||
|
|
|
@ -14,9 +14,9 @@
|
||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
module rtlsim_shim import VX_gpu_pkg::*; #(
|
module rtlsim_shim import VX_gpu_pkg::*; #(
|
||||||
parameter MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH,
|
parameter MEM_DATA_WIDTH = (`PLATFORM_MEMORY_DATA_SIZE * 8),
|
||||||
parameter MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH,
|
parameter MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH - $clog2(`PLATFORM_MEMORY_NUM_BANKS),
|
||||||
parameter MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS,
|
parameter MEM_NUM_BANKS = `PLATFORM_MEMORY_NUM_BANKS,
|
||||||
parameter MEM_TAG_WIDTH = 64
|
parameter MEM_TAG_WIDTH = 64
|
||||||
) (
|
) (
|
||||||
`SCOPE_IO_DECL
|
`SCOPE_IO_DECL
|
||||||
|
@ -159,7 +159,7 @@ module rtlsim_shim import VX_gpu_pkg::*; #(
|
||||||
.TAG_WIDTH_OUT (MEM_TAG_WIDTH),
|
.TAG_WIDTH_OUT (MEM_TAG_WIDTH),
|
||||||
.NUM_PORTS_IN (`VX_MEM_PORTS),
|
.NUM_PORTS_IN (`VX_MEM_PORTS),
|
||||||
.NUM_BANKS_OUT (MEM_NUM_BANKS),
|
.NUM_BANKS_OUT (MEM_NUM_BANKS),
|
||||||
.INTERLEAVE (0),
|
.INTERLEAVE (`PLATFORM_MEMORY_INTERLEAVE),
|
||||||
.REQ_OUT_BUF ((`VX_MEM_PORTS > 1) ? 2 : 0),
|
.REQ_OUT_BUF ((`VX_MEM_PORTS > 1) ? 2 : 0),
|
||||||
.RSP_OUT_BUF ((`VX_MEM_PORTS > 1 || MEM_NUM_BANKS > 1) ? 2 : 0)
|
.RSP_OUT_BUF ((`VX_MEM_PORTS > 1 || MEM_NUM_BANKS > 1) ? 2 : 0)
|
||||||
) mem_bank_adapter (
|
) mem_bank_adapter (
|
||||||
|
|
|
@ -43,8 +43,13 @@ public:
|
||||||
|
|
||||||
char sname[100];
|
char sname[100];
|
||||||
snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
|
snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
|
||||||
uint32_t wsel_bits = log2ceil(config_.line_size);
|
uint32_t lg2_line_size = log2ceil(config_.line_size);
|
||||||
mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::Priority, config.num_reqs, (1 << config.B), wsel_bits);
|
uint32_t num_banks = 1 << config.B;
|
||||||
|
mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::Priority, config.num_reqs, num_banks, 1,
|
||||||
|
[lg2_line_size, num_banks](const MemCrossBar::ReqType& req) {
|
||||||
|
// Custom logic to calculate the output index using bank interleaving
|
||||||
|
return (uint32_t)((req.addr >> lg2_line_size) & (num_banks-1));
|
||||||
|
});
|
||||||
for (uint32_t i = 0; i < config.num_reqs; ++i) {
|
for (uint32_t i = 0; i < config.num_reqs; ++i) {
|
||||||
simobject->Inputs.at(i).bind(&mem_xbar_->ReqIn.at(i));
|
simobject->Inputs.at(i).bind(&mem_xbar_->ReqIn.at(i));
|
||||||
mem_xbar_->RspIn.at(i).bind(&simobject->Outputs.at(i));
|
mem_xbar_->RspIn.at(i).bind(&simobject->Outputs.at(i));
|
||||||
|
|
|
@ -30,7 +30,6 @@ private:
|
||||||
MemCrossBar::Ptr mem_xbar_;
|
MemCrossBar::Ptr mem_xbar_;
|
||||||
DramSim dram_sim_;
|
DramSim dram_sim_;
|
||||||
mutable PerfStats perf_stats_;
|
mutable PerfStats perf_stats_;
|
||||||
|
|
||||||
struct DramCallbackArgs {
|
struct DramCallbackArgs {
|
||||||
MemSim::Impl* memsim;
|
MemSim::Impl* memsim;
|
||||||
MemReq request;
|
MemReq request;
|
||||||
|
@ -41,11 +40,15 @@ public:
|
||||||
Impl(MemSim* simobject, const Config& config)
|
Impl(MemSim* simobject, const Config& config)
|
||||||
: simobject_(simobject)
|
: simobject_(simobject)
|
||||||
, config_(config)
|
, config_(config)
|
||||||
, dram_sim_(MEM_CLOCK_RATIO)
|
, dram_sim_(config.num_banks, config.block_size, config.clock_ratio)
|
||||||
{
|
{
|
||||||
char sname[100];
|
char sname[100];
|
||||||
snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
|
snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
|
||||||
mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::RoundRobin, config.num_ports, config.num_banks);
|
mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::RoundRobin, config.num_ports, config.num_banks, 1,
|
||||||
|
[lg2_block_size = log2ceil(config.block_size), num_banks = config.num_banks](const MemCrossBar::ReqType& req) {
|
||||||
|
// Custom logic to calculate the output index using bank interleaving
|
||||||
|
return (uint32_t)((req.addr >> lg2_block_size) & (num_banks-1));
|
||||||
|
});
|
||||||
for (uint32_t i = 0; i < config.num_ports; ++i) {
|
for (uint32_t i = 0; i < config.num_ports; ++i) {
|
||||||
simobject->MemReqPorts.at(i).bind(&mem_xbar_->ReqIn.at(i));
|
simobject->MemReqPorts.at(i).bind(&mem_xbar_->ReqIn.at(i));
|
||||||
mem_xbar_->RspIn.at(i).bind(&simobject->MemRspPorts.at(i));
|
mem_xbar_->RspIn.at(i).bind(&simobject->MemRspPorts.at(i));
|
||||||
|
@ -74,16 +77,15 @@ public:
|
||||||
|
|
||||||
auto& mem_req = mem_xbar_->ReqOut.at(i).front();
|
auto& mem_req = mem_xbar_->ReqOut.at(i).front();
|
||||||
|
|
||||||
// try to enqueue the request to the memory system
|
// enqueue the request to the memory system
|
||||||
auto req_args = new DramCallbackArgs{this, mem_req, i};
|
auto req_args = new DramCallbackArgs{this, mem_req, i};
|
||||||
auto enqueue_success = dram_sim_.send_request(
|
dram_sim_.send_request(
|
||||||
mem_req.write,
|
|
||||||
mem_req.addr,
|
mem_req.addr,
|
||||||
0,
|
mem_req.write,
|
||||||
[](void* arg) {
|
[](void* arg) {
|
||||||
auto rsp_args = reinterpret_cast<const DramCallbackArgs*>(arg);
|
auto rsp_args = reinterpret_cast<const DramCallbackArgs*>(arg);
|
||||||
// only send a response for read requests
|
|
||||||
if (!rsp_args->request.write) {
|
if (!rsp_args->request.write) {
|
||||||
|
// only send a response for read requests
|
||||||
MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid};
|
MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid};
|
||||||
rsp_args->memsim->mem_xbar_->RspOut.at(rsp_args->bank_id).push(mem_rsp, 1);
|
rsp_args->memsim->mem_xbar_->RspOut.at(rsp_args->bank_id).push(mem_rsp, 1);
|
||||||
DT(3, rsp_args->memsim->simobject_->name() << "-mem-rsp[" << rsp_args->bank_id << "]: " << mem_rsp);
|
DT(3, rsp_args->memsim->simobject_->name() << "-mem-rsp[" << rsp_args->bank_id << "]: " << mem_rsp);
|
||||||
|
@ -93,14 +95,7 @@ public:
|
||||||
req_args
|
req_args
|
||||||
);
|
);
|
||||||
|
|
||||||
// check if the request was enqueued successfully
|
|
||||||
if (!enqueue_success) {
|
|
||||||
delete req_args;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
DT(3, simobject_->name() << "-mem-req[" << i << "]: " << mem_req);
|
DT(3, simobject_->name() << "-mem-req[" << i << "]: " << mem_req);
|
||||||
|
|
||||||
mem_xbar_->ReqOut.at(i).pop();
|
mem_xbar_->ReqOut.at(i).pop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,8 @@ public:
|
||||||
struct Config {
|
struct Config {
|
||||||
uint32_t num_banks;
|
uint32_t num_banks;
|
||||||
uint32_t num_ports;
|
uint32_t num_ports;
|
||||||
|
uint32_t block_size;
|
||||||
|
float clock_ratio;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PerfStats {
|
struct PerfStats {
|
||||||
|
|
|
@ -22,10 +22,14 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
||||||
{
|
{
|
||||||
SimPlatform::instance().initialize();
|
SimPlatform::instance().initialize();
|
||||||
|
|
||||||
|
assert(PLATFORM_MEMORY_DATA_SIZE == MEM_BLOCK_SIZE);
|
||||||
|
|
||||||
// create memory simulator
|
// create memory simulator
|
||||||
memsim_ = MemSim::Create("dram", MemSim::Config{
|
memsim_ = MemSim::Create("dram", MemSim::Config{
|
||||||
PLATFORM_MEMORY_BANKS,
|
PLATFORM_MEMORY_NUM_BANKS,
|
||||||
L3_MEM_PORTS
|
L3_MEM_PORTS,
|
||||||
|
MEM_BLOCK_SIZE,
|
||||||
|
MEM_CLOCK_RATIO
|
||||||
});
|
});
|
||||||
|
|
||||||
// create clusters
|
// create clusters
|
||||||
|
|
|
@ -398,6 +398,8 @@ inline std::ostream &operator<<(std::ostream &os, const MemRsp& rsp) {
|
||||||
template <typename T>
|
template <typename T>
|
||||||
class HashTable {
|
class HashTable {
|
||||||
public:
|
public:
|
||||||
|
typedef T DataType;
|
||||||
|
|
||||||
HashTable(uint32_t capacity)
|
HashTable(uint32_t capacity)
|
||||||
: entries_(capacity)
|
: entries_(capacity)
|
||||||
, size_(0)
|
, size_(0)
|
||||||
|
@ -470,6 +472,8 @@ private:
|
||||||
template <typename Type>
|
template <typename Type>
|
||||||
class Arbiter : public SimObject<Arbiter<Type>> {
|
class Arbiter : public SimObject<Arbiter<Type>> {
|
||||||
public:
|
public:
|
||||||
|
typedef Type ReqType;
|
||||||
|
|
||||||
std::vector<SimPort<Type>> Inputs;
|
std::vector<SimPort<Type>> Inputs;
|
||||||
std::vector<SimPort<Type>> Outputs;
|
std::vector<SimPort<Type>> Outputs;
|
||||||
|
|
||||||
|
@ -556,6 +560,8 @@ protected:
|
||||||
template <typename Type>
|
template <typename Type>
|
||||||
class CrossBar : public SimObject<CrossBar<Type>> {
|
class CrossBar : public SimObject<CrossBar<Type>> {
|
||||||
public:
|
public:
|
||||||
|
typedef Type ReqType;
|
||||||
|
|
||||||
std::vector<SimPort<Type>> Inputs;
|
std::vector<SimPort<Type>> Inputs;
|
||||||
std::vector<SimPort<Type>> Outputs;
|
std::vector<SimPort<Type>> Outputs;
|
||||||
|
|
||||||
|
@ -565,8 +571,8 @@ public:
|
||||||
ArbiterType type,
|
ArbiterType type,
|
||||||
uint32_t num_inputs,
|
uint32_t num_inputs,
|
||||||
uint32_t num_outputs = 1,
|
uint32_t num_outputs = 1,
|
||||||
uint32_t addr_start = 0,
|
uint32_t delay = 1,
|
||||||
uint32_t delay = 1
|
std::function<uint32_t(const Type& req)> output_sel = nullptr
|
||||||
)
|
)
|
||||||
: SimObject<CrossBar<Type>>(ctx, name)
|
: SimObject<CrossBar<Type>>(ctx, name)
|
||||||
, Inputs(num_inputs, this)
|
, Inputs(num_inputs, this)
|
||||||
|
@ -576,12 +582,18 @@ public:
|
||||||
, grants_(num_outputs, 0)
|
, grants_(num_outputs, 0)
|
||||||
, lg2_inputs_(log2ceil(num_inputs))
|
, lg2_inputs_(log2ceil(num_inputs))
|
||||||
, lg2_outputs_(log2ceil(num_outputs))
|
, lg2_outputs_(log2ceil(num_outputs))
|
||||||
, addr_start_(addr_start)
|
|
||||||
, collisions_(0) {
|
, collisions_(0) {
|
||||||
assert(delay != 0);
|
assert(delay != 0);
|
||||||
assert(num_inputs <= 64);
|
assert(num_inputs <= 64);
|
||||||
assert(num_outputs <= 64);
|
assert(num_outputs <= 64);
|
||||||
assert(ispow2(num_outputs));
|
assert(ispow2(num_outputs));
|
||||||
|
if (output_sel != nullptr) {
|
||||||
|
output_sel_ = output_sel;
|
||||||
|
} else {
|
||||||
|
output_sel_ = [this](const Type& req) {
|
||||||
|
return (uint32_t)bit_getw(req.addr, 0, (lg2_outputs_-1));
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void reset() {
|
void reset() {
|
||||||
|
@ -609,7 +621,8 @@ public:
|
||||||
auto& req = req_in.front();
|
auto& req = req_in.front();
|
||||||
uint32_t output_idx = 0;
|
uint32_t output_idx = 0;
|
||||||
if (lg2_outputs_ != 0) {
|
if (lg2_outputs_ != 0) {
|
||||||
output_idx = (uint32_t)bit_getw(req.addr, addr_start_, addr_start_ + (lg2_outputs_-1));
|
// select output index
|
||||||
|
output_idx = output_sel_(req);
|
||||||
// skip if input is not going to current output
|
// skip if input is not going to current output
|
||||||
if (output_idx != o)
|
if (output_idx != o)
|
||||||
continue;
|
continue;
|
||||||
|
@ -649,7 +662,7 @@ protected:
|
||||||
std::vector<uint32_t> grants_;
|
std::vector<uint32_t> grants_;
|
||||||
uint32_t lg2_inputs_;
|
uint32_t lg2_inputs_;
|
||||||
uint32_t lg2_outputs_;
|
uint32_t lg2_outputs_;
|
||||||
uint32_t addr_start_;
|
std::function<uint32_t(const Type& req)> output_sel_;
|
||||||
uint64_t collisions_;
|
uint64_t collisions_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -658,6 +671,9 @@ protected:
|
||||||
template <typename Req, typename Rsp>
|
template <typename Req, typename Rsp>
|
||||||
class TxArbiter : public SimObject<TxArbiter<Req, Rsp>> {
|
class TxArbiter : public SimObject<TxArbiter<Req, Rsp>> {
|
||||||
public:
|
public:
|
||||||
|
typedef Req ReqType;
|
||||||
|
typedef Rsp RspType;
|
||||||
|
|
||||||
std::vector<SimPort<Req>> ReqIn;
|
std::vector<SimPort<Req>> ReqIn;
|
||||||
std::vector<SimPort<Rsp>> RspIn;
|
std::vector<SimPort<Rsp>> RspIn;
|
||||||
|
|
||||||
|
@ -771,6 +787,9 @@ protected:
|
||||||
template <typename Req, typename Rsp>
|
template <typename Req, typename Rsp>
|
||||||
class TxCrossBar : public SimObject<TxCrossBar<Req, Rsp>> {
|
class TxCrossBar : public SimObject<TxCrossBar<Req, Rsp>> {
|
||||||
public:
|
public:
|
||||||
|
typedef Req ReqType;
|
||||||
|
typedef Rsp RspType;
|
||||||
|
|
||||||
std::vector<SimPort<Req>> ReqIn;
|
std::vector<SimPort<Req>> ReqIn;
|
||||||
std::vector<SimPort<Rsp>> RspIn;
|
std::vector<SimPort<Rsp>> RspIn;
|
||||||
|
|
||||||
|
@ -783,8 +802,8 @@ public:
|
||||||
ArbiterType type,
|
ArbiterType type,
|
||||||
uint32_t num_inputs,
|
uint32_t num_inputs,
|
||||||
uint32_t num_outputs = 1,
|
uint32_t num_outputs = 1,
|
||||||
uint32_t addr_start = 0,
|
uint32_t delay = 1,
|
||||||
uint32_t delay = 1
|
std::function<uint32_t(const Req& req)> output_sel = nullptr
|
||||||
)
|
)
|
||||||
: SimObject<TxCrossBar<Req, Rsp>>(ctx, name)
|
: SimObject<TxCrossBar<Req, Rsp>>(ctx, name)
|
||||||
, ReqIn(num_inputs, this)
|
, ReqIn(num_inputs, this)
|
||||||
|
@ -797,7 +816,6 @@ public:
|
||||||
, rsp_grants_(num_inputs, 0)
|
, rsp_grants_(num_inputs, 0)
|
||||||
, lg2_inputs_(log2ceil(num_inputs))
|
, lg2_inputs_(log2ceil(num_inputs))
|
||||||
, lg2_outputs_(log2ceil(num_outputs))
|
, lg2_outputs_(log2ceil(num_outputs))
|
||||||
, addr_start_(addr_start)
|
|
||||||
, req_collisions_(0)
|
, req_collisions_(0)
|
||||||
, rsp_collisions_(0) {
|
, rsp_collisions_(0) {
|
||||||
assert(delay != 0);
|
assert(delay != 0);
|
||||||
|
@ -805,6 +823,13 @@ public:
|
||||||
assert(num_outputs <= 64);
|
assert(num_outputs <= 64);
|
||||||
assert(ispow2(num_inputs));
|
assert(ispow2(num_inputs));
|
||||||
assert(ispow2(num_outputs));
|
assert(ispow2(num_outputs));
|
||||||
|
if (output_sel != nullptr) {
|
||||||
|
output_sel_ = output_sel;
|
||||||
|
} else {
|
||||||
|
output_sel_ = [this](const Req& req) {
|
||||||
|
return (uint32_t)bit_getw(req.addr, 0, (lg2_outputs_-1));
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void reset() {
|
void reset() {
|
||||||
|
@ -875,7 +900,8 @@ public:
|
||||||
auto& req = req_in.front();
|
auto& req = req_in.front();
|
||||||
uint32_t output_idx = 0;
|
uint32_t output_idx = 0;
|
||||||
if (lg2_outputs_ != 0) {
|
if (lg2_outputs_ != 0) {
|
||||||
output_idx = (uint32_t)bit_getw(req.addr, addr_start_, addr_start_ + (lg2_outputs_-1));
|
// select output index
|
||||||
|
output_idx = output_sel_(req);
|
||||||
// skip if request is not going to current output
|
// skip if request is not going to current output
|
||||||
if (output_idx != o)
|
if (output_idx != o)
|
||||||
continue;
|
continue;
|
||||||
|
@ -929,7 +955,7 @@ protected:
|
||||||
std::vector<uint32_t> rsp_grants_;
|
std::vector<uint32_t> rsp_grants_;
|
||||||
uint32_t lg2_inputs_;
|
uint32_t lg2_inputs_;
|
||||||
uint32_t lg2_outputs_;
|
uint32_t lg2_outputs_;
|
||||||
uint32_t addr_start_;
|
std::function<uint32_t(const Req& req)> output_sel_;
|
||||||
uint64_t req_collisions_;
|
uint64_t req_collisions_;
|
||||||
uint64_t rsp_collisions_;
|
uint64_t rsp_collisions_;
|
||||||
};
|
};
|
||||||
|
|
|
@ -31,21 +31,6 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE
|
||||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
|
DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
|
||||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
|
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
|
||||||
|
|
||||||
# Platform parameters
|
|
||||||
ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS)))
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_BANKS=2
|
|
||||||
endif
|
|
||||||
ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS)))
|
|
||||||
ifeq ($(XLEN),64)
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47
|
|
||||||
else
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS)))
|
|
||||||
CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512
|
|
||||||
endif
|
|
||||||
|
|
||||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||||
|
|
||||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||||
|
|
|
@ -17,16 +17,16 @@ module vortex_afu_shim #(
|
||||||
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
|
parameter C_S_AXI_CTRL_ADDR_WIDTH = 8,
|
||||||
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
|
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
|
||||||
parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH,
|
parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH,
|
||||||
parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH,
|
parameter C_M_AXI_MEM_DATA_WIDTH = (`PLATFORM_MEMORY_DATA_SIZE * 8),
|
||||||
parameter C_M_AXI_MEM_ADDR_WIDTH = 64,
|
parameter C_M_AXI_MEM_ADDR_WIDTH = 64,
|
||||||
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS
|
parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_NUM_BANKS
|
||||||
) (
|
) (
|
||||||
// System signals
|
// System signals
|
||||||
input wire ap_clk,
|
input wire ap_clk,
|
||||||
input wire ap_rst_n,
|
input wire ap_rst_n,
|
||||||
|
|
||||||
// AXI4 master interface
|
// AXI4 master interface
|
||||||
`REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
`REPEAT (`PLATFORM_MEMORY_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA),
|
||||||
|
|
||||||
// AXI4-Lite slave interface
|
// AXI4-Lite slave interface
|
||||||
input wire s_axi_ctrl_awvalid,
|
input wire s_axi_ctrl_awvalid,
|
||||||
|
@ -61,7 +61,7 @@ module vortex_afu_shim #(
|
||||||
.clk (ap_clk),
|
.clk (ap_clk),
|
||||||
.reset (~ap_rst_n),
|
.reset (~ap_rst_n),
|
||||||
|
|
||||||
`REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
|
`REPEAT (`PLATFORM_MEMORY_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA),
|
||||||
|
|
||||||
.s_axi_ctrl_awvalid (s_axi_ctrl_awvalid),
|
.s_axi_ctrl_awvalid (s_axi_ctrl_awvalid),
|
||||||
.s_axi_ctrl_awready (s_axi_ctrl_awready),
|
.s_axi_ctrl_awready (s_axi_ctrl_awready),
|
||||||
|
|
|
@ -37,8 +37,6 @@
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8)
|
|
||||||
|
|
||||||
#ifndef MEM_CLOCK_RATIO
|
#ifndef MEM_CLOCK_RATIO
|
||||||
#define MEM_CLOCK_RATIO 1
|
#define MEM_CLOCK_RATIO 1
|
||||||
#endif
|
#endif
|
||||||
|
@ -61,10 +59,10 @@
|
||||||
|
|
||||||
#define CPU_GPU_LATENCY 200
|
#define CPU_GPU_LATENCY 200
|
||||||
|
|
||||||
#if PLATFORM_MEMORY_DATA_WIDTH > 64
|
#if PLATFORM_MEMORY_DATA_SIZE > 8
|
||||||
typedef VlWide<(PLATFORM_MEMORY_DATA_WIDTH/32)> Vl_m_data_t;
|
typedef VlWide<(PLATFORM_MEMORY_DATA_SIZE/4)> Vl_m_data_t;
|
||||||
#else
|
#else
|
||||||
#if PLATFORM_MEMORY_DATA_WIDTH > 32
|
#if PLATFORM_MEMORY_DATA_SIZE > 4
|
||||||
typedef QData Vl_m_data_t;
|
typedef QData Vl_m_data_t;
|
||||||
#else
|
#else
|
||||||
typedef IData Vl_m_data_t;
|
typedef IData Vl_m_data_t;
|
||||||
|
@ -130,7 +128,7 @@ public:
|
||||||
Impl()
|
Impl()
|
||||||
: device_(nullptr)
|
: device_(nullptr)
|
||||||
, ram_(nullptr)
|
, ram_(nullptr)
|
||||||
, dram_sim_(MEM_CLOCK_RATIO)
|
, dram_sim_(PLATFORM_MEMORY_NUM_BANKS, PLATFORM_MEMORY_DATA_SIZE, MEM_CLOCK_RATIO)
|
||||||
, stop_(false)
|
, stop_(false)
|
||||||
#ifdef VCD_OUTPUT
|
#ifdef VCD_OUTPUT
|
||||||
, tfp_(nullptr)
|
, tfp_(nullptr)
|
||||||
|
@ -142,7 +140,7 @@ public:
|
||||||
if (future_.valid()) {
|
if (future_.valid()) {
|
||||||
future_.wait();
|
future_.wait();
|
||||||
}
|
}
|
||||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||||
delete mem_alloc_[b];
|
delete mem_alloc_[b];
|
||||||
}
|
}
|
||||||
if (ram_) {
|
if (ram_) {
|
||||||
|
@ -178,16 +176,16 @@ public:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// calculate memory bank size
|
// calculate memory bank size
|
||||||
mem_bank_size_ = 1ull << PLATFORM_MEMORY_ADDR_WIDTH;
|
mem_bank_size_ = (1ull << PLATFORM_MEMORY_ADDR_WIDTH) / PLATFORM_MEMORY_NUM_BANKS;
|
||||||
|
|
||||||
// allocate RAM
|
// allocate RAM
|
||||||
ram_ = new RAM(0, RAM_PAGE_SIZE);
|
ram_ = new RAM(0, RAM_PAGE_SIZE);
|
||||||
|
|
||||||
// initialize AXI memory interfaces
|
// initialize AXI memory interfaces
|
||||||
MP_M_AXI_MEM(PLATFORM_MEMORY_BANKS);
|
MP_M_AXI_MEM(PLATFORM_MEMORY_NUM_BANKS);
|
||||||
|
|
||||||
// initialize memory allocator
|
// initialize memory allocator
|
||||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||||
mem_alloc_[b] = new MemoryAllocator(0, mem_bank_size_, 4096, 64);
|
mem_alloc_[b] = new MemoryAllocator(0, mem_bank_size_, 4096, 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -209,13 +207,13 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) {
|
int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) {
|
||||||
if (bank_id >= PLATFORM_MEMORY_BANKS)
|
if (bank_id >= PLATFORM_MEMORY_NUM_BANKS)
|
||||||
return -1;
|
return -1;
|
||||||
return mem_alloc_[bank_id]->allocate(size, addr);
|
return mem_alloc_[bank_id]->allocate(size, addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
int mem_free(uint32_t bank_id, uint64_t addr) {
|
int mem_free(uint32_t bank_id, uint64_t addr) {
|
||||||
if (bank_id >= PLATFORM_MEMORY_BANKS)
|
if (bank_id >= PLATFORM_MEMORY_NUM_BANKS)
|
||||||
return -1;
|
return -1;
|
||||||
return mem_alloc_[bank_id]->release(addr);
|
return mem_alloc_[bank_id]->release(addr);
|
||||||
}
|
}
|
||||||
|
@ -223,7 +221,7 @@ public:
|
||||||
int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) {
|
int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) {
|
||||||
std::lock_guard<std::mutex> guard(mutex_);
|
std::lock_guard<std::mutex> guard(mutex_);
|
||||||
|
|
||||||
if (bank_id >= PLATFORM_MEMORY_BANKS)
|
if (bank_id >= PLATFORM_MEMORY_NUM_BANKS)
|
||||||
return -1;
|
return -1;
|
||||||
uint64_t base_addr = bank_id * mem_bank_size_ + addr;
|
uint64_t base_addr = bank_id * mem_bank_size_ + addr;
|
||||||
ram_->write(data, base_addr, size);
|
ram_->write(data, base_addr, size);
|
||||||
|
@ -238,7 +236,7 @@ public:
|
||||||
int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) {
|
int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) {
|
||||||
std::lock_guard<std::mutex> guard(mutex_);
|
std::lock_guard<std::mutex> guard(mutex_);
|
||||||
|
|
||||||
if (bank_id >= PLATFORM_MEMORY_BANKS)
|
if (bank_id >= PLATFORM_MEMORY_NUM_BANKS)
|
||||||
return -1;
|
return -1;
|
||||||
uint64_t base_addr = bank_id * mem_bank_size_ + addr;
|
uint64_t base_addr = bank_id * mem_bank_size_ + addr;
|
||||||
ram_->read(data, base_addr, size);
|
ram_->read(data, base_addr, size);
|
||||||
|
@ -321,7 +319,7 @@ private:
|
||||||
reqs.clear();
|
reqs.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||||
std::queue<mem_req_t*> empty;
|
std::queue<mem_req_t*> empty;
|
||||||
std::swap(dram_queues_[b], empty);
|
std::swap(dram_queues_[b], empty);
|
||||||
}
|
}
|
||||||
|
@ -338,7 +336,7 @@ private:
|
||||||
device_->ap_rst_n = 1;
|
device_->ap_rst_n = 1;
|
||||||
|
|
||||||
// this AXI device is always ready to accept new requests
|
// this AXI device is always ready to accept new requests
|
||||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||||
*m_axi_mem_[b].arready = 1;
|
*m_axi_mem_[b].arready = 1;
|
||||||
*m_axi_mem_[b].awready = 1;
|
*m_axi_mem_[b].awready = 1;
|
||||||
*m_axi_mem_[b].wready = 1;
|
*m_axi_mem_[b].wready = 1;
|
||||||
|
@ -358,19 +356,18 @@ private:
|
||||||
|
|
||||||
dram_sim_.tick();
|
dram_sim_.tick();
|
||||||
|
|
||||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||||
if (!dram_queues_[b].empty()) {
|
if (!dram_queues_[b].empty()) {
|
||||||
auto mem_req = dram_queues_[b].front();
|
auto mem_req = dram_queues_[b].front();
|
||||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, b, [](void* arg) {
|
dram_sim_.send_request(mem_req->addr, mem_req->write, [](void* arg) {
|
||||||
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
||||||
if (orig_req->ready) {
|
if (orig_req->ready) {
|
||||||
delete orig_req;
|
delete orig_req;
|
||||||
} else {
|
} else {
|
||||||
orig_req->ready = true;
|
orig_req->ready = true;
|
||||||
}
|
}
|
||||||
}, mem_req)) {
|
}, mem_req);
|
||||||
dram_queues_[b].pop();
|
dram_queues_[b].pop();
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -411,7 +408,7 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
void axi_mem_bus_reset() {
|
void axi_mem_bus_reset() {
|
||||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||||
// read request address
|
// read request address
|
||||||
*m_axi_mem_[b].arready = 0;
|
*m_axi_mem_[b].arready = 0;
|
||||||
|
|
||||||
|
@ -435,14 +432,14 @@ private:
|
||||||
|
|
||||||
void axi_mem_bus_eval(bool clk) {
|
void axi_mem_bus_eval(bool clk) {
|
||||||
if (!clk) {
|
if (!clk) {
|
||||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||||
m_axi_states_[b].read_rsp_ready = *m_axi_mem_[b].rready;
|
m_axi_states_[b].read_rsp_ready = *m_axi_mem_[b].rready;
|
||||||
m_axi_states_[b].write_rsp_ready = *m_axi_mem_[b].bready;
|
m_axi_states_[b].write_rsp_ready = *m_axi_mem_[b].bready;
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
for (int b = 0; b < PLATFORM_MEMORY_NUM_BANKS; ++b) {
|
||||||
// handle read responses
|
// handle read responses
|
||||||
if (*m_axi_mem_[b].rvalid && m_axi_states_[b].read_rsp_ready) {
|
if (*m_axi_mem_[b].rvalid && m_axi_states_[b].read_rsp_ready) {
|
||||||
*m_axi_mem_[b].rvalid = 0;
|
*m_axi_mem_[b].rvalid = 0;
|
||||||
|
@ -607,15 +604,15 @@ private:
|
||||||
|
|
||||||
std::mutex mutex_;
|
std::mutex mutex_;
|
||||||
|
|
||||||
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_BANKS];
|
std::list<mem_req_t*> pending_mem_reqs_[PLATFORM_MEMORY_NUM_BANKS];
|
||||||
|
|
||||||
m_axi_mem_t m_axi_mem_[PLATFORM_MEMORY_BANKS];
|
m_axi_mem_t m_axi_mem_[PLATFORM_MEMORY_NUM_BANKS];
|
||||||
|
|
||||||
MemoryAllocator* mem_alloc_[PLATFORM_MEMORY_BANKS];
|
MemoryAllocator* mem_alloc_[PLATFORM_MEMORY_NUM_BANKS];
|
||||||
|
|
||||||
m_axi_state_t m_axi_states_[PLATFORM_MEMORY_BANKS];
|
m_axi_state_t m_axi_states_[PLATFORM_MEMORY_NUM_BANKS];
|
||||||
|
|
||||||
std::queue<mem_req_t*> dram_queues_[PLATFORM_MEMORY_BANKS];
|
std::queue<mem_req_t*> dram_queues_[PLATFORM_MEMORY_NUM_BANKS];
|
||||||
|
|
||||||
#ifdef VCD_OUTPUT
|
#ifdef VCD_OUTPUT
|
||||||
VerilatedVcdC* tfp_;
|
VerilatedVcdC* tfp_;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue