mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'bug_fixes'
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
This commit is contained in:
commit
18ae57cc7f
38 changed files with 1359 additions and 831 deletions
|
@ -105,7 +105,7 @@ regression()
|
|||
./ci/blackbox.sh --driver=simx --app=vecadd --rebuild=3
|
||||
|
||||
# test for matmul
|
||||
CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1"
|
||||
CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1"
|
||||
|
||||
echo "regression tests done!"
|
||||
}
|
||||
|
@ -322,6 +322,10 @@ config2()
|
|||
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=1" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
|
||||
# test memory ports
|
||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=demo
|
||||
CONFIGS="-DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=demo --threads=32
|
||||
|
||||
echo "configuration-2 tests done!"
|
||||
}
|
||||
|
||||
|
|
|
@ -14,8 +14,6 @@
|
|||
`ifndef VX_CONFIG_VH
|
||||
`define VX_CONFIG_VH
|
||||
|
||||
|
||||
|
||||
`ifndef MIN
|
||||
`define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||
`endif
|
||||
|
@ -170,8 +168,8 @@
|
|||
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`endif
|
||||
|
||||
`ifndef MEMORY_BANKS
|
||||
`define MEMORY_BANKS 2
|
||||
`ifndef PLATFORM_MEMORY_BANKS
|
||||
`define PLATFORM_MEMORY_BANKS 1
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
|
@ -193,7 +191,7 @@
|
|||
`endif
|
||||
|
||||
`ifdef VM_ENABLE
|
||||
`ifndef PAGE_TABLE_BASE_ADDR
|
||||
`ifndef PAGE_TABLE_BASE_ADDR
|
||||
`define PAGE_TABLE_BASE_ADDR 64'h0F0000000
|
||||
`endif
|
||||
|
||||
|
@ -218,7 +216,7 @@
|
|||
`endif
|
||||
|
||||
`ifdef VM_ENABLE
|
||||
`ifndef PAGE_TABLE_BASE_ADDR
|
||||
`ifndef PAGE_TABLE_BASE_ADDR
|
||||
`define PAGE_TABLE_BASE_ADDR 32'hF0000000
|
||||
`endif
|
||||
|
||||
|
@ -303,13 +301,13 @@
|
|||
`ifndef VM_ADDR_MODE
|
||||
`define VM_ADDR_MODE SV32 //or BARE
|
||||
`endif
|
||||
`ifndef PT_LEVEL
|
||||
`ifndef PT_LEVEL
|
||||
`define PT_LEVEL (2)
|
||||
`endif
|
||||
`ifndef PTE_SIZE
|
||||
`define PTE_SIZE (4)
|
||||
`endif
|
||||
`ifndef NUM_PTE_ENTRY
|
||||
`ifndef NUM_PTE_ENTRY
|
||||
`define NUM_PTE_ENTRY (1024)
|
||||
`endif
|
||||
`ifndef PT_SIZE_LIMIT
|
||||
|
@ -319,13 +317,13 @@
|
|||
`ifndef VM_ADDR_MODE
|
||||
`define VM_ADDR_MODE SV39 //or BARE
|
||||
`endif
|
||||
`ifndef PT_LEVEL
|
||||
`ifndef PT_LEVEL
|
||||
`define PT_LEVEL (3)
|
||||
`endif
|
||||
`ifndef PTE_SIZE
|
||||
`define PTE_SIZE (8)
|
||||
`endif
|
||||
`ifndef NUM_PTE_ENTRY
|
||||
`ifndef NUM_PTE_ENTRY
|
||||
`define NUM_PTE_ENTRY (512)
|
||||
`endif
|
||||
`ifndef PT_SIZE_LIMIT
|
||||
|
@ -604,7 +602,7 @@
|
|||
|
||||
// Number of Banks
|
||||
`ifndef DCACHE_NUM_BANKS
|
||||
`define DCACHE_NUM_BANKS `MIN(`NUM_LSU_LANES, 4)
|
||||
`define DCACHE_NUM_BANKS `MIN(DCACHE_NUM_REQS, 16)
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
|
@ -647,6 +645,15 @@
|
|||
`define DCACHE_REPL_POLICY 1
|
||||
`endif
|
||||
|
||||
// Number of Memory Ports
|
||||
`ifndef L1_MEM_PORTS
|
||||
`ifdef L1_DISABLE
|
||||
`define L1_MEM_PORTS `MIN(DCACHE_NUM_REQS, `PLATFORM_MEMORY_BANKS)
|
||||
`else
|
||||
`define L1_MEM_PORTS `MIN(`DCACHE_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// LMEM Configurable Knobs ////////////////////////////////////////////////////
|
||||
|
||||
`ifndef LMEM_DISABLE
|
||||
|
@ -674,7 +681,7 @@
|
|||
|
||||
// Number of Banks
|
||||
`ifndef L2_NUM_BANKS
|
||||
`define L2_NUM_BANKS `MIN(4, `NUM_SOCKETS)
|
||||
`define L2_NUM_BANKS `MIN(L2_NUM_REQS, 16)
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
|
@ -717,6 +724,15 @@
|
|||
`define L2_REPL_POLICY 1
|
||||
`endif
|
||||
|
||||
// Number of Memory Ports
|
||||
`ifndef L2_MEM_PORTS
|
||||
`ifdef L2_ENABLE
|
||||
`define L2_MEM_PORTS `MIN(`L2_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
|
||||
`else
|
||||
`define L2_MEM_PORTS `MIN(L2_NUM_REQS, `PLATFORM_MEMORY_BANKS)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// L3cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Cache Size
|
||||
|
@ -726,7 +742,7 @@
|
|||
|
||||
// Number of Banks
|
||||
`ifndef L3_NUM_BANKS
|
||||
`define L3_NUM_BANKS `MIN(8, `NUM_CLUSTERS)
|
||||
`define L3_NUM_BANKS `MIN(L3_NUM_REQS, 16)
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
|
@ -769,9 +785,13 @@
|
|||
`define L3_REPL_POLICY 1
|
||||
`endif
|
||||
|
||||
// Number of Memory Ports from LLC
|
||||
`ifndef NUM_MEM_PORTS
|
||||
`define NUM_MEM_PORTS `MIN(`MEMORY_BANKS, `L3_NUM_BANKS)
|
||||
// Number of Memory Ports
|
||||
`ifndef L3_MEM_PORTS
|
||||
`ifdef L3_ENABLE
|
||||
`define L3_MEM_PORTS `MIN(`L3_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
|
||||
`else
|
||||
`define L3_MEM_PORTS `MIN(L3_NUM_REQS, `PLATFORM_MEMORY_BANKS)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// ISA Extensions /////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -163,6 +163,7 @@ endgenerate
|
|||
`define USE_BLOCK_BRAM (* ramstyle = "block" *)
|
||||
`define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *)
|
||||
`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *)
|
||||
`define RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams on" *)
|
||||
`define DISABLE_BRAM (* ramstyle = "logic" *)
|
||||
`define PRESERVE_NET (* preserve *)
|
||||
`define BLACKBOX_CELL (* black_box *)
|
||||
|
@ -173,6 +174,7 @@ endgenerate
|
|||
`define USE_BLOCK_BRAM (* ram_style = "block" *)
|
||||
`define USE_FAST_BRAM (* ram_style = "distributed" *)
|
||||
`define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *)
|
||||
`define RW_RAM_CHECK (* rw_addr_collision = "yes" *)
|
||||
`define DISABLE_BRAM (* ram_style = "registers" *)
|
||||
`define PRESERVE_NET (* keep = "true" *)
|
||||
`define BLACKBOX_CELL (* black_box *)
|
||||
|
@ -183,6 +185,7 @@ endgenerate
|
|||
`define USE_BLOCK_BRAM
|
||||
`define USE_FAST_BRAM
|
||||
`define NO_RW_RAM_CHECK
|
||||
`define RW_RAM_CHECK
|
||||
`define DISABLE_BRAM
|
||||
`define PRESERVE_NET
|
||||
`define BLACKBOX_CELL
|
||||
|
|
|
@ -47,7 +47,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lmem_switches
|
||||
VX_lmem_switch #(
|
||||
.REQ0_OUT_BUF (3),
|
||||
.REQ0_OUT_BUF (1),
|
||||
.REQ1_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (1),
|
||||
.ARBITER ("P")
|
||||
|
@ -78,7 +78,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #(
|
|||
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
|
||||
.ARBITER ("P"),
|
||||
.REQ_OUT_BUF (3),
|
||||
.RSP_OUT_BUF (0)
|
||||
.RSP_OUT_BUF (2)
|
||||
) lmem_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -13,12 +13,6 @@
|
|||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \
|
||||
if (wren[i]) begin \
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \
|
||||
end \
|
||||
end
|
||||
|
||||
`define RAM_INITIALIZATION \
|
||||
if (INIT_ENABLE != 0) begin : g_init \
|
||||
if (INIT_FILE != "") begin : g_file \
|
||||
|
@ -32,14 +26,93 @@
|
|||
end \
|
||||
end
|
||||
|
||||
`define RAM_BYPASS(__d) \
|
||||
reg [DATAW-1:0] bypass_data_r; \
|
||||
reg bypass_valid_r; \
|
||||
`define SYNC_RAM_WF_BLOCK(__d, __re, __we, __ra, __wa) \
|
||||
`RAM_ATTRIBUTES `RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; \
|
||||
`RAM_INITIALIZATION \
|
||||
reg [ADDRW-1:0] raddr_r; \
|
||||
always @(posedge clk) begin \
|
||||
bypass_valid_r <= read_s && write && (raddr_s == waddr); \
|
||||
bypass_data_r <= wdata; \
|
||||
if (__re || __we) begin \
|
||||
if (__we) begin \
|
||||
ram[__wa] <= wdata; \
|
||||
end \
|
||||
raddr_r <= __ra; \
|
||||
end \
|
||||
end \
|
||||
assign __d = bypass_valid_r ? bypass_data_r : rdata_r
|
||||
assign __d = ram[raddr_r]
|
||||
|
||||
`define SYNC_RAM_WF_WREN_BLOCK(__d, __re, __we, __ra, __wa) \
|
||||
`RAM_ATTRIBUTES `RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; \
|
||||
`RAM_INITIALIZATION \
|
||||
reg [ADDRW-1:0] raddr_r; \
|
||||
always @(posedge clk) begin \
|
||||
if (__re || __we) begin \
|
||||
if (__we) begin \
|
||||
for (integer i = 0; i < WRENW; ++i) begin \
|
||||
if (wren[i]) begin \
|
||||
ram[__wa][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \
|
||||
end \
|
||||
end \
|
||||
end \
|
||||
raddr_r <= __ra; \
|
||||
end \
|
||||
end \
|
||||
assign __d = ram[raddr_r]
|
||||
|
||||
`define SYNC_RAM_RF_BLOCK(__d, __re, __we, __ra, __wa) \
|
||||
`RAM_ATTRIBUTES reg [DATAW-1:0] ram [0:SIZE-1]; \
|
||||
`RAM_INITIALIZATION \
|
||||
reg [DATAW-1:0] rdata_r; \
|
||||
always @(posedge clk) begin \
|
||||
if (__re || __we) begin \
|
||||
if (__we) begin \
|
||||
ram[__wa] <= wdata; \
|
||||
end \
|
||||
rdata_r <= ram[__ra]; \
|
||||
end \
|
||||
end \
|
||||
assign __d = rdata_r
|
||||
|
||||
`define SYNC_RAM_RF_WREN_BLOCK(__d, __re, __we, __ra, __wa) \
|
||||
`RAM_ATTRIBUTES reg [DATAW-1:0] ram [0:SIZE-1]; \
|
||||
`RAM_INITIALIZATION \
|
||||
reg [DATAW-1:0] rdata_r; \
|
||||
always @(posedge clk) begin \
|
||||
if (__re || __we) begin \
|
||||
if (__we) begin \
|
||||
for (integer i = 0; i < WRENW; ++i) begin \
|
||||
if (wren[i]) begin \
|
||||
ram[__wa][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \
|
||||
end \
|
||||
end \
|
||||
end \
|
||||
rdata_r <= ram[__ra]; \
|
||||
end \
|
||||
end \
|
||||
assign __d = rdata_r
|
||||
|
||||
`define ASYNC_RAM_BLOCK(__d, __we, __ra, __wa) \
|
||||
`RAM_ATTRIBUTES reg [DATAW-1:0] ram [0:SIZE-1]; \
|
||||
`RAM_INITIALIZATION \
|
||||
always @(posedge clk) begin \
|
||||
if (__we) begin \
|
||||
ram[__wa] <= wdata; \
|
||||
end \
|
||||
end \
|
||||
assign __d = ram[__ra]
|
||||
|
||||
`define ASYNC_RAM_BLOCK_WREN(__d, __we, __ra, __wa) \
|
||||
`RAM_ATTRIBUTES reg [DATAW-1:0] ram [0:SIZE-1]; \
|
||||
`RAM_INITIALIZATION \
|
||||
always @(posedge clk) begin \
|
||||
if (__we) begin \
|
||||
for (integer i = 0; i < WRENW; ++i) begin \
|
||||
if (wren[i]) begin \
|
||||
ram[__wa][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \
|
||||
end \
|
||||
end \
|
||||
end \
|
||||
end \
|
||||
assign __d = ram[__ra]
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_async_ram_patch #(
|
||||
|
@ -47,6 +120,8 @@ module VX_async_ram_patch #(
|
|||
parameter SIZE = 1,
|
||||
parameter WRENW = 1,
|
||||
parameter DUAL_PORT = 0,
|
||||
parameter FORCE_BRAM = 0,
|
||||
parameter WRITE_FIRST = 0,
|
||||
parameter INIT_ENABLE = 0,
|
||||
parameter INIT_FILE = "",
|
||||
parameter [DATAW-1:0] INIT_VALUE = 0,
|
||||
|
@ -79,77 +154,102 @@ module VX_async_ram_patch #(
|
|||
.out ({raddr_s, read_s, is_raddr_reg})
|
||||
);
|
||||
|
||||
// synchroneous ram
|
||||
wire [DATAW-1:0] rdata_s, rdata_a;
|
||||
|
||||
wire [DATAW-1:0] rdata_s;
|
||||
|
||||
if (WRENW != 1) begin : g_wren_sync_ram
|
||||
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (read_s || write) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
if (1) begin : g_sync_ram
|
||||
if (WRENW != 1) begin : g_wren
|
||||
if (FORCE_BRAM) begin : g_bram
|
||||
if (WRITE_FIRST) begin : g_write_first
|
||||
`define RAM_ATTRIBUTES `USE_BLOCK_BRAM
|
||||
`SYNC_RAM_WF_WREN_BLOCK(rdata_s, read_s, write, raddr_s, waddr);
|
||||
`undef RAM_ATTRIBUTES
|
||||
end else begin : g_read_first
|
||||
`define RAM_ATTRIBUTES `USE_BLOCK_BRAM
|
||||
`SYNC_RAM_RF_WREN_BLOCK(rdata_s, read_s, write, raddr_s, waddr);
|
||||
`undef RAM_ATTRIBUTES
|
||||
end
|
||||
end else begin : g_lutram
|
||||
if (WRITE_FIRST) begin : g_write_first
|
||||
`define RAM_ATTRIBUTES
|
||||
`SYNC_RAM_WF_WREN_BLOCK(rdata_s, read_s, write, raddr_s, waddr);
|
||||
`undef RAM_ATTRIBUTES
|
||||
end else begin : g_read_first
|
||||
`define RAM_ATTRIBUTES
|
||||
`SYNC_RAM_RF_WREN_BLOCK(rdata_s, read_s, write, raddr_s, waddr);
|
||||
`undef RAM_ATTRIBUTES
|
||||
end
|
||||
end
|
||||
end else begin : g_no_wren
|
||||
if (FORCE_BRAM) begin : g_bram
|
||||
if (WRITE_FIRST) begin : g_write_first
|
||||
`define RAM_ATTRIBUTES `USE_BLOCK_BRAM
|
||||
`SYNC_RAM_WF_BLOCK(rdata_s, read_s, write, raddr_s, waddr);
|
||||
`undef RAM_ATTRIBUTES
|
||||
end else begin : g_read_first
|
||||
`define RAM_ATTRIBUTES `USE_BLOCK_BRAM
|
||||
`SYNC_RAM_RF_BLOCK(rdata_s, read_s, write, raddr_s, waddr);
|
||||
`undef RAM_ATTRIBUTES
|
||||
end
|
||||
end else begin : g_lutram
|
||||
if (WRITE_FIRST) begin : g_write_first
|
||||
`define RAM_ATTRIBUTES
|
||||
`SYNC_RAM_WF_BLOCK(rdata_s, read_s, write, raddr_s, waddr);
|
||||
`undef RAM_ATTRIBUTES
|
||||
end else begin : g_read_first
|
||||
`define RAM_ATTRIBUTES
|
||||
`SYNC_RAM_RF_BLOCK(rdata_s, read_s, write, raddr_s, waddr);
|
||||
`undef RAM_ATTRIBUTES
|
||||
end
|
||||
rdata_r <= ram[raddr_s];
|
||||
end
|
||||
end
|
||||
`RAM_BYPASS(rdata_s);
|
||||
end else begin : g_no_wren_sync_ram
|
||||
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
`RAM_INITIALIZATION
|
||||
`UNUSED_VAR (wren)
|
||||
always @(posedge clk) begin
|
||||
if (read_s || write) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
rdata_r <= ram[raddr_s];
|
||||
end
|
||||
end
|
||||
`RAM_BYPASS(rdata_s);
|
||||
end
|
||||
|
||||
// asynchronous ram (fallback)
|
||||
|
||||
wire [DATAW-1:0] rdata_a;
|
||||
|
||||
if (DUAL_PORT != 0) begin : g_dp_async_ram
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
if (WRENW != 1) begin : g_wren
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
if (1) begin : g_async_ram
|
||||
if (DUAL_PORT != 0) begin : g_dp
|
||||
if (WRENW != 1) begin : g_wren
|
||||
if (WRITE_FIRST) begin : g_write_first
|
||||
`define RAM_ATTRIBUTES `RW_RAM_CHECK
|
||||
`ASYNC_RAM_BLOCK_WREN(rdata_a, write, raddr, waddr);
|
||||
`undef RAM_ATTRIBUTES
|
||||
end else begin : g_read_first
|
||||
`define RAM_ATTRIBUTES `NO_RW_RAM_CHECK
|
||||
`ASYNC_RAM_BLOCK_WREN(rdata_a, write, raddr, waddr);
|
||||
`undef RAM_ATTRIBUTES
|
||||
end
|
||||
end else begin : g_no_wren
|
||||
if (WRITE_FIRST) begin : g_write_first
|
||||
`define RAM_ATTRIBUTES `RW_RAM_CHECK
|
||||
`ASYNC_RAM_BLOCK(rdata_a, write, raddr, waddr);
|
||||
`undef RAM_ATTRIBUTES
|
||||
end else begin : g_read_first
|
||||
`define RAM_ATTRIBUTES `NO_RW_RAM_CHECK
|
||||
`ASYNC_RAM_BLOCK(rdata_a, write, raddr, waddr);
|
||||
`undef RAM_ATTRIBUTES
|
||||
end
|
||||
end
|
||||
end else begin : g_no_wren
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end else begin : g_sp
|
||||
if (WRENW != 1) begin : g_wren
|
||||
if (WRITE_FIRST) begin : g_write_first
|
||||
`define RAM_ATTRIBUTES `RW_RAM_CHECK
|
||||
`ASYNC_RAM_BLOCK_WREN(rdata_a, write, waddr, waddr);
|
||||
`undef RAM_ATTRIBUTES
|
||||
end else begin : g_read_first
|
||||
`define RAM_ATTRIBUTES `NO_RW_RAM_CHECK
|
||||
`ASYNC_RAM_BLOCK_WREN(rdata_a, write, waddr, waddr);
|
||||
`undef RAM_ATTRIBUTES
|
||||
end
|
||||
end else begin : g_no_wren
|
||||
if (WRITE_FIRST) begin : g_write_first
|
||||
`define RAM_ATTRIBUTES `RW_RAM_CHECK
|
||||
`ASYNC_RAM_BLOCK(rdata_a, write, waddr, waddr);
|
||||
`undef RAM_ATTRIBUTES
|
||||
end else begin : g_read_first
|
||||
`define RAM_ATTRIBUTES `NO_RW_RAM_CHECK
|
||||
`ASYNC_RAM_BLOCK(rdata_a, write, waddr, waddr);
|
||||
`undef RAM_ATTRIBUTES
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata_a = ram[raddr];
|
||||
end else begin : g_sp_async_ram
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
if (WRENW != 1) begin : g_wren
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
end
|
||||
end else begin : g_no_wren
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata_a = ram[waddr];
|
||||
end
|
||||
|
||||
assign rdata = is_raddr_reg ? rdata_s : rdata_a;
|
||||
|
|
|
@ -80,7 +80,7 @@ module VX_dp_ram #(
|
|||
if (FORCE_BRAM) begin : g_bram
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
(* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [ADDRW-1:0] raddr_r;
|
||||
always @(posedge clk) begin
|
||||
|
@ -93,7 +93,7 @@ module VX_dp_ram #(
|
|||
end
|
||||
assign rdata = ram[raddr_r];
|
||||
end else begin : g_no_wren
|
||||
(* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [ADDRW-1:0] raddr_r;
|
||||
always @(posedge clk) begin
|
||||
|
@ -166,7 +166,7 @@ module VX_dp_ram #(
|
|||
end else begin : g_auto
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
(* rw_addr_collision = "yes" *) `RAM_ARRAY_WREN
|
||||
`RW_RAM_CHECK `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [ADDRW-1:0] raddr_r;
|
||||
always @(posedge clk) begin
|
||||
|
@ -179,7 +179,7 @@ module VX_dp_ram #(
|
|||
end
|
||||
assign rdata = ram[raddr_r];
|
||||
end else begin : g_no_wren
|
||||
(* rw_addr_collision = "yes" *) reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [ADDRW-1:0] raddr_r;
|
||||
always @(posedge clk) begin
|
||||
|
@ -220,7 +220,7 @@ module VX_dp_ram #(
|
|||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else begin
|
||||
end else begin : g_undefined
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
|
@ -253,30 +253,32 @@ module VX_dp_ram #(
|
|||
end else begin : g_async
|
||||
`UNUSED_VAR (read)
|
||||
if (FORCE_BRAM) begin : g_bram
|
||||
`ifdef VIVADO
|
||||
VX_async_ram_patch #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.WRENW (WRENW),
|
||||
.DUAL_PORT (1),
|
||||
.FORCE_BRAM (FORCE_BRAM),
|
||||
.WRITE_FIRST(RDW_MODE == "W"),
|
||||
.INIT_ENABLE(INIT_ENABLE),
|
||||
.INIT_FILE (INIT_FILE),
|
||||
.INIT_VALUE (INIT_VALUE)
|
||||
) async_ram_patch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (read),
|
||||
.write (write),
|
||||
.wren (wren),
|
||||
.waddr (waddr),
|
||||
.wdata (wdata),
|
||||
.raddr (raddr),
|
||||
.rdata (rdata)
|
||||
);
|
||||
`else
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
`ifdef VIVADO
|
||||
VX_async_ram_patch #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.WRENW (WRENW),
|
||||
.DUAL_PORT (1),
|
||||
.INIT_ENABLE(INIT_ENABLE),
|
||||
.INIT_FILE (INIT_FILE),
|
||||
.INIT_VALUE (INIT_VALUE)
|
||||
) async_ram_patch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (read),
|
||||
.write (write),
|
||||
.wren (wren),
|
||||
.waddr (waddr),
|
||||
.wdata (wdata),
|
||||
.raddr (raddr),
|
||||
.rdata (rdata)
|
||||
);
|
||||
`else
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -285,7 +287,7 @@ module VX_dp_ram #(
|
|||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin : g_no_wren
|
||||
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -294,7 +296,6 @@ module VX_dp_ram #(
|
|||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
`endif
|
||||
end else begin : g_read_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
|
@ -316,10 +317,11 @@ module VX_dp_ram #(
|
|||
assign rdata = ram[raddr];
|
||||
end
|
||||
end
|
||||
`endif
|
||||
end else begin : g_auto
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`RAM_ARRAY_WREN
|
||||
`RW_RAM_CHECK `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -328,7 +330,7 @@ module VX_dp_ram #(
|
|||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin : g_no_wren
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
|
|
@ -90,9 +90,6 @@ module VX_fifo_queue #(
|
|||
end
|
||||
end
|
||||
|
||||
wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1));
|
||||
wire bypass = push && (empty || (going_empty && pop));
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (DEPTH),
|
||||
|
@ -101,7 +98,7 @@ module VX_fifo_queue #(
|
|||
) dp_ram (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (~bypass),
|
||||
.read (1'b1),
|
||||
.write (push),
|
||||
.wren (1'b1),
|
||||
.raddr (rd_ptr_r),
|
||||
|
@ -112,6 +109,8 @@ module VX_fifo_queue #(
|
|||
|
||||
if (OUT_REG != 0) begin : g_out_reg
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1));
|
||||
wire bypass = push && (empty || (going_empty && pop));
|
||||
always @(posedge clk) begin
|
||||
if (bypass) begin
|
||||
data_out_r <= data_in;
|
||||
|
|
|
@ -485,7 +485,7 @@ module VX_rr_arbiter #(
|
|||
.D (NUM_REQS)
|
||||
) grant_decoder (
|
||||
.sel_in (grant_index),
|
||||
.data_in (1'b1),
|
||||
.data_in (grant_valid),
|
||||
.data_out (grant_onehot)
|
||||
);
|
||||
|
||||
|
|
|
@ -77,37 +77,9 @@ module VX_sp_ram #(
|
|||
localparam FORCE_BRAM = !LUTRAM && (SIZE * DATAW >= `MAX_LUTRAM);
|
||||
if (OUT_REG) begin : g_sync
|
||||
if (FORCE_BRAM) begin : g_bram
|
||||
if (RDW_MODE == "R") begin : g_read_first
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_no_wren
|
||||
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
ram[addr] <= wdata;
|
||||
end
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else if (RDW_MODE == "W") begin : g_write_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [ADDRW-1:0] addr_r;
|
||||
always @(posedge clk) begin
|
||||
|
@ -135,6 +107,34 @@ module VX_sp_ram #(
|
|||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else if (RDW_MODE == "R") begin : g_read_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_no_wren
|
||||
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
ram[addr] <= wdata;
|
||||
end
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else if (RDW_MODE == "N") begin : g_no_change
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
|
@ -165,7 +165,7 @@ module VX_sp_ram #(
|
|||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else if (RDW_MODE == "U") begin : g_unknown
|
||||
end else if (RDW_MODE == "U") begin : g_undefined
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
|
@ -195,35 +195,7 @@ module VX_sp_ram #(
|
|||
end
|
||||
end
|
||||
end else begin : g_auto
|
||||
if (RDW_MODE == "R") begin : g_read_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_no_wren
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
ram[addr] <= wdata;
|
||||
end
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else if (RDW_MODE == "W") begin : g_write_first
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
|
@ -253,6 +225,34 @@ module VX_sp_ram #(
|
|||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else if (RDW_MODE == "R") begin : g_read_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_no_wren
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
ram[addr] <= wdata;
|
||||
end
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else if (RDW_MODE == "N") begin : g_no_change
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`RAM_ARRAY_WREN
|
||||
|
@ -283,7 +283,7 @@ module VX_sp_ram #(
|
|||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else if (RDW_MODE == "U") begin : g_unknown
|
||||
end else if (RDW_MODE == "U") begin : g_undefined
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
|
@ -316,30 +316,32 @@ module VX_sp_ram #(
|
|||
end else begin : g_async
|
||||
`UNUSED_VAR (read)
|
||||
if (FORCE_BRAM) begin : g_bram
|
||||
`ifdef VIVADO
|
||||
VX_async_ram_patch #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.WRENW (WRENW),
|
||||
.DUAL_PORT (0),
|
||||
.FORCE_BRAM (FORCE_BRAM),
|
||||
.WRITE_FIRST(RDW_MODE == "W"),
|
||||
.INIT_ENABLE(INIT_ENABLE),
|
||||
.INIT_FILE (INIT_FILE),
|
||||
.INIT_VALUE (INIT_VALUE)
|
||||
) async_ram_patch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (read),
|
||||
.write (write),
|
||||
.wren (wren),
|
||||
.waddr (addr),
|
||||
.wdata (wdata),
|
||||
.raddr (addr),
|
||||
.rdata (rdata)
|
||||
);
|
||||
`else
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
`ifdef VIVADO
|
||||
VX_async_ram_patch #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.WRENW (WRENW),
|
||||
.DUAL_PORT (0),
|
||||
.INIT_ENABLE(INIT_ENABLE),
|
||||
.INIT_FILE (INIT_FILE),
|
||||
.INIT_VALUE (INIT_VALUE)
|
||||
) async_ram_patch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (read),
|
||||
.write (write),
|
||||
.wren (wren),
|
||||
.waddr (addr),
|
||||
.wdata (wdata),
|
||||
.raddr (addr),
|
||||
.rdata (rdata)
|
||||
);
|
||||
`else
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -348,7 +350,7 @@ module VX_sp_ram #(
|
|||
end
|
||||
assign rdata = ram[addr];
|
||||
end else begin : g_no_wren
|
||||
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -357,7 +359,6 @@ module VX_sp_ram #(
|
|||
end
|
||||
assign rdata = ram[addr];
|
||||
end
|
||||
`endif
|
||||
end else begin : g_read_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
|
@ -379,10 +380,11 @@ module VX_sp_ram #(
|
|||
assign rdata = ram[addr];
|
||||
end
|
||||
end
|
||||
`endif
|
||||
end else begin : g_auto
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`RAM_ARRAY_WREN
|
||||
`RW_RAM_CHECK `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -391,7 +393,7 @@ module VX_sp_ram #(
|
|||
end
|
||||
assign rdata = ram[addr];
|
||||
end else begin : g_no_wren
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
|
@ -443,15 +445,7 @@ module VX_sp_ram #(
|
|||
end
|
||||
|
||||
if (OUT_REG) begin : g_sync
|
||||
if (RDW_MODE == "R") begin : g_read_first
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else if (RDW_MODE == "W") begin : g_write_first
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
reg [ADDRW-1:0] addr_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
|
@ -459,6 +453,14 @@ module VX_sp_ram #(
|
|||
end
|
||||
end
|
||||
assign rdata = ram[addr_r];
|
||||
end else if (RDW_MODE == "R") begin : g_read_first
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else if (RDW_MODE == "N") begin : g_no_change
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// A stream elastic buffer operates at full-bandwidth where fire_in and fire_out can happen simultaneously
|
||||
// A stream elastic buffer_r operates at full-bandwidth where fire_in and fire_out can happen simultaneously
|
||||
// It has the following benefits:
|
||||
// + full-bandwidth throughput
|
||||
// + ready_in and ready_out are decoupled
|
||||
|
@ -45,79 +45,66 @@ module VX_stream_buffer #(
|
|||
assign valid_out = valid_in;
|
||||
assign data_out = data_in;
|
||||
|
||||
end else if (OUT_REG != 0) begin : g_out_reg
|
||||
end else begin : g_buffer
|
||||
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
reg [DATAW-1:0] buffer;
|
||||
reg valid_out_r;
|
||||
reg no_buffer;
|
||||
reg [DATAW-1:0] data_out_r, buffer_r;
|
||||
reg valid_out_r, valid_in_r;
|
||||
|
||||
wire fire_in = valid_in && ready_in;
|
||||
wire flow_out = ready_out || ~valid_out;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_out_r <= 0;
|
||||
no_buffer <= 1;
|
||||
end else begin
|
||||
if (flow_out) begin
|
||||
no_buffer <= 1;
|
||||
end else if (valid_in) begin
|
||||
no_buffer <= 0;
|
||||
end
|
||||
if (flow_out) begin
|
||||
valid_out_r <= valid_in || ~no_buffer;
|
||||
end
|
||||
valid_in_r <= 1'b1;
|
||||
end else if (valid_in || flow_out) begin
|
||||
valid_in_r <= flow_out;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (fire_in) begin
|
||||
buffer <= data_in;
|
||||
end
|
||||
if (flow_out) begin
|
||||
data_out_r <= no_buffer ? data_in : buffer;
|
||||
end
|
||||
end
|
||||
|
||||
assign ready_in = no_buffer;
|
||||
assign valid_out = valid_out_r;
|
||||
assign data_out = data_out_r;
|
||||
|
||||
end else begin : g_no_out_reg
|
||||
|
||||
reg [1:0][DATAW-1:0] shift_reg;
|
||||
reg [1:0] fifo_state, fifo_state_n;
|
||||
|
||||
wire fire_in = valid_in && ready_in;
|
||||
wire fire_out = valid_out && ready_out;
|
||||
|
||||
always @(*) begin
|
||||
case ({fire_in, fire_out})
|
||||
2'b10: fifo_state_n = {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10
|
||||
2'b01: fifo_state_n = {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00
|
||||
default: fifo_state_n = fifo_state;
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
fifo_state <= 2'b00;
|
||||
end else begin
|
||||
fifo_state <= fifo_state_n;
|
||||
valid_out_r <= 1'b0;
|
||||
end else if (flow_out) begin
|
||||
valid_out_r <= valid_in || ~valid_in_r;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (fire_in) begin
|
||||
shift_reg[1] <= shift_reg[0];
|
||||
shift_reg[0] <= data_in;
|
||||
if (OUT_REG != 0) begin : g_out_reg
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (fire_in) begin
|
||||
buffer_r <= data_in;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (flow_out) begin
|
||||
data_out_r <= valid_in_r ? data_in : buffer_r;
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = data_out_r;
|
||||
|
||||
end else begin : g_no_out_reg
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (fire_in) begin
|
||||
data_out_r <= data_in;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (fire_in) begin
|
||||
buffer_r <= data_out_r;
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = valid_in_r ? data_out_r : buffer_r;
|
||||
|
||||
end
|
||||
|
||||
assign ready_in = ~fifo_state[1];
|
||||
assign valid_out = fifo_state[0];
|
||||
assign data_out = shift_reg[fifo_state[1]];
|
||||
assign valid_out = valid_out_r;
|
||||
assign ready_in = valid_in_r;
|
||||
|
||||
end
|
||||
|
||||
|
|
|
@ -1,3 +1,16 @@
|
|||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
namespace eval vortex {
|
||||
|
||||
variable debug 0
|
||||
|
@ -17,6 +30,25 @@ proc str_replace {str match repl} {
|
|||
return $result
|
||||
}
|
||||
|
||||
proc regex_escape {str} {
|
||||
return [string map {
|
||||
\\ \\\\
|
||||
^ \\^
|
||||
. \\.
|
||||
\[ \\\[
|
||||
\] \\\]
|
||||
\$ \\\$
|
||||
\( \\\(
|
||||
\) \\\)
|
||||
| \\|
|
||||
* \\*
|
||||
+ \\+
|
||||
? \\?
|
||||
\{ \\\{
|
||||
\} \\\}
|
||||
} $str]
|
||||
}
|
||||
|
||||
proc unique_cell_name {name} {
|
||||
if {[get_cells -quiet $name] == {}} { return $name }
|
||||
set index 0
|
||||
|
@ -31,31 +63,60 @@ proc unique_net_name {name} {
|
|||
return ${name}_${index}
|
||||
}
|
||||
|
||||
proc find_nested_cells {parent name_match {should_exist 1}} {
|
||||
proc build_parent_child_map {all_cells} {
|
||||
set parent_child_map {}
|
||||
foreach cell $all_cells {
|
||||
set parent [get_property PARENT $cell]
|
||||
if {$parent ne ""} {
|
||||
if {[dict exists $parent_child_map $parent]} {
|
||||
dict lappend parent_child_map $parent $cell
|
||||
} else {
|
||||
dict set parent_child_map $parent [list $cell]
|
||||
}
|
||||
}
|
||||
}
|
||||
return $parent_child_map
|
||||
}
|
||||
|
||||
proc find_cell_descendants_recursive {parent_cell parent_child_map} {
|
||||
set descendants {}
|
||||
if {[dict exists $parent_child_map $parent_cell]} {
|
||||
set children [dict get $parent_child_map $parent_cell]
|
||||
foreach child $children {
|
||||
# Add the child to the list
|
||||
lappend descendants $child
|
||||
# Recursively add its descendants
|
||||
set sub_descendants [find_cell_descendants_recursive $child $parent_child_map]
|
||||
lappend descendants {*}$sub_descendants
|
||||
}
|
||||
}
|
||||
return $descendants
|
||||
}
|
||||
|
||||
proc find_cell_descendants {parent_cell} {
|
||||
set all_cells [get_cells -hierarchical]
|
||||
set parent_child_map [build_parent_child_map $all_cells]
|
||||
return [find_cell_descendants_recursive $parent_cell $parent_child_map]
|
||||
}
|
||||
|
||||
proc find_nested_cells {parent_cell name_match {should_exist 1}} {
|
||||
set hier_sep [get_hierarchy_separator]
|
||||
set matching_cells {}
|
||||
foreach cell [get_cells -hierarchical -include_replicated_objects -filter "PARENT == $parent"] {
|
||||
set name [get_property NAME $cell]
|
||||
if {[regexp $name_match $name]} {
|
||||
foreach cell [find_cell_descendants $parent_cell] {
|
||||
set parent_name [get_property PARENT $cell]
|
||||
set cell_name [get_property NAME $cell]
|
||||
set name_prefix [regex_escape "${parent_name}${hier_sep}"]
|
||||
set pattern "${name_prefix}${name_match}"
|
||||
if {[regexp $pattern $cell_name]} {
|
||||
lappend matching_cells $cell
|
||||
}
|
||||
}
|
||||
if {[llength $matching_cells] == 0} {
|
||||
print_error "No matching cell found for '$parent' matching '$name_match'." $should_exist
|
||||
print_error "No matching cell found for '$parent_cell' matching '$name_match'." $should_exist
|
||||
}
|
||||
return $matching_cells
|
||||
}
|
||||
|
||||
proc find_nested_cell {parent name_match} {
|
||||
foreach cell [get_cells -hierarchical -filter "PARENT == $parent"] {
|
||||
set name [get_property NAME $cell]
|
||||
if {$name == $name_match} {
|
||||
return $cell
|
||||
}
|
||||
}
|
||||
puts "ERROR: No matching cell found for '$parent' matching '$name_match'."
|
||||
exit -1
|
||||
}
|
||||
|
||||
proc find_cell_nets {cell name_match {should_exist 1}} {
|
||||
set matching_nets {}
|
||||
foreach net [get_nets -hierarchical -filter "PARENT_CELL == $cell"] {
|
||||
|
@ -70,22 +131,23 @@ proc find_cell_nets {cell name_match {should_exist 1}} {
|
|||
return $matching_nets
|
||||
}
|
||||
|
||||
proc get_cell_net {cell name_match} {
|
||||
foreach net [get_nets -hierarchical -filter "PARENT_CELL == $cell"] {
|
||||
set name [get_property NAME $net]
|
||||
if {$name == $name_match} {
|
||||
return $net
|
||||
}
|
||||
proc get_cell_net {cell name} {
|
||||
set net [get_nets -hierarchical -filter "PARENT_CELL == $cell && NAME == $name"]
|
||||
if {[llength $net] == 0} {
|
||||
puts "ERROR: No matching net found for '$cell' matching '$name'."
|
||||
exit -1
|
||||
}
|
||||
puts "ERROR: No matching net found for '$cell' matching '$name_match'."
|
||||
exit -1
|
||||
return $net;
|
||||
}
|
||||
|
||||
proc find_cell_pins {cell name_match {should_exist 1}} {
|
||||
set hier_sep [get_hierarchy_separator]
|
||||
set matching_pins {}
|
||||
foreach pin [get_pins -of_objects $cell] {
|
||||
set name [get_property NAME $pin]
|
||||
if {[regexp $name_match $name]} {
|
||||
set name_prefix [regex_escape "${cell}${hier_sep}"]
|
||||
set pattern "${name_prefix}${name_match}"
|
||||
if {[regexp $pattern $name]} {
|
||||
lappend matching_pins $pin
|
||||
}
|
||||
}
|
||||
|
@ -95,15 +157,31 @@ proc find_cell_pins {cell name_match {should_exist 1}} {
|
|||
return $matching_pins
|
||||
}
|
||||
|
||||
proc get_cell_pin {cell name_match} {
|
||||
foreach pin [get_pins -of_objects $cell] {
|
||||
set name [get_property NAME $pin]
|
||||
if {$name == $name_match} {
|
||||
return $pin
|
||||
}
|
||||
proc get_cell_pin {cell name} {
|
||||
set pin [get_pins -of_objects $cell -filter "NAME == $name"]
|
||||
if {[llength $pin] == 0} {
|
||||
puts "ERROR: No matching pin found for '$cell' matching '$name'."
|
||||
exit -1
|
||||
}
|
||||
puts "ERROR: No matching pin found for '$cell' matching '$name_match'."
|
||||
exit -1
|
||||
return $pin
|
||||
}
|
||||
|
||||
proc remove_cell_from_netlist {cell} {
|
||||
variable debug
|
||||
|
||||
puts "INFO: Removing cell '$cell' from the netlist."
|
||||
|
||||
# Disconnect all pins of the cell
|
||||
#foreach pin [get_pins -quiet -of_objects $cell] {
|
||||
# foreach net [get_nets -quiet -of_objects $pin] {
|
||||
# disconnect_net -net $net -objects $pin
|
||||
# if {$debug} {puts "DEBUG: Disconnected net '$net' from pin '$pin'."}
|
||||
# }
|
||||
#}
|
||||
|
||||
# Remove the cell
|
||||
remove_cell $cell
|
||||
if {$debug} {puts "DEBUG: Cell '$cell' was removed successfully."}
|
||||
}
|
||||
|
||||
proc replace_pin_source {pin source_pin} {
|
||||
|
@ -141,10 +219,42 @@ proc replace_pin_source {pin source_pin} {
|
|||
if {$debug} {puts "DEBUG: Connected net '$source_net' to pin '$pin'."}
|
||||
}
|
||||
|
||||
proc create_register_next {reg_cell prefix_name} {
|
||||
proc find_net_driver {input_net {should_exist 1}} {
|
||||
set driverPins [get_pins -quiet -leaf -of_objects $input_net -filter {DIRECTION == "OUT"}]
|
||||
if {[llength $driverPins] == 0} {
|
||||
set driverPorts [get_ports -quiet -of_objects $input_net -filter {DIRECTION == "IN"}]
|
||||
if {[llength $driverPorts] == 0} {
|
||||
print_error "No driver found for '$input_net'." $should_exist
|
||||
} elseif {[llength $driverPorts] > 1} {
|
||||
puts "WARNING: Multiple driver ports found for '$input_net'."
|
||||
return [lindex $driverPorts 0]
|
||||
}
|
||||
return $driverPorts
|
||||
} elseif {[llength $driverPins] > 1} {
|
||||
puts "WARNING: Multiple driver pins found for '$input_net'."
|
||||
return [lindex $driverPins 0]
|
||||
}
|
||||
return $driverPins
|
||||
}
|
||||
|
||||
proc find_pin_driver {input_pin {should_exist 1}} {
|
||||
set net [get_nets -quiet -of_objects $input_pin]
|
||||
if {[llength $net] == 0} {
|
||||
print_error "No net connected to pin '$input_pin'." $should_exist
|
||||
return ""
|
||||
} elseif {[llength $net] > 1} {
|
||||
puts "ERROR: Multiple nets connected to pin '$input_pin'."
|
||||
exit -1
|
||||
}
|
||||
return [find_net_driver $net]
|
||||
}
|
||||
|
||||
proc create_register_next {parent reg_cell} {
|
||||
variable debug
|
||||
|
||||
set reg_d_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/D"}]
|
||||
set hier_sep [get_hierarchy_separator]
|
||||
|
||||
set reg_d_pin [get_pins "${reg_cell}${hier_sep}D"]
|
||||
if {[llength $reg_d_pin] == 0} {
|
||||
puts "ERROR: No D pin found on register cell '$reg_cell'."
|
||||
exit -1
|
||||
|
@ -167,7 +277,7 @@ proc create_register_next {reg_cell prefix_name} {
|
|||
|
||||
set register_type [get_property REF_NAME $reg_cell]
|
||||
if {$register_type == "FDRE"} {
|
||||
set reg_r_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/R"}]
|
||||
set reg_r_pin [get_pins "${reg_cell}${hier_sep}R"]
|
||||
if {[llength $reg_r_pin] == 0} {
|
||||
puts "ERROR: No R pin found on FDRE cell '$reg_cell'."
|
||||
exit -1
|
||||
|
@ -184,7 +294,7 @@ proc create_register_next {reg_cell prefix_name} {
|
|||
exit -1
|
||||
}
|
||||
} elseif {$register_type == "FDSE"} {
|
||||
set reg_s_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/S"}]
|
||||
set reg_s_pin [get_pins "${reg_cell}${hier_sep}S"]
|
||||
if {[llength $reg_s_pin] == 0} {
|
||||
puts "ERROR: No S pin found on FDSE cell '$reg_cell'."
|
||||
exit -1
|
||||
|
@ -229,7 +339,7 @@ proc create_register_next {reg_cell prefix_name} {
|
|||
# Use a 2x1 LUT to describe the logic:
|
||||
# FDRE: O = I1 ? 0 : I0; where I0=D, I1=R
|
||||
# FDSE: O = I1 ? 1 : I0; where I0=D, I1=S
|
||||
set lut_name [unique_cell_name $prefix_name]
|
||||
set lut_name [unique_cell_name "${parent}${hier_sep}raddr_next"]
|
||||
set lut_cell [create_cell -reference LUT2 $lut_name]
|
||||
puts "INFO: Created lut cell: '$lut_cell'"
|
||||
|
||||
|
@ -242,7 +352,7 @@ proc create_register_next {reg_cell prefix_name} {
|
|||
exit 1
|
||||
}
|
||||
|
||||
set lut_i0_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/I0"}]
|
||||
set lut_i0_pin [get_pins "${lut_cell}${hier_sep}I0"]
|
||||
if {[llength $lut_i0_pin] == 0} {
|
||||
puts "ERROR: No I0 pin found on FDSE cell '$lut_cell'."
|
||||
exit -1
|
||||
|
@ -251,7 +361,7 @@ proc create_register_next {reg_cell prefix_name} {
|
|||
exit -1
|
||||
}
|
||||
|
||||
set lut_i1_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/I1"}]
|
||||
set lut_i1_pin [get_pins "${lut_cell}${hier_sep}I1"]
|
||||
if {[llength $lut_i1_pin] == 0} {
|
||||
puts "ERROR: No I1 pin found on FDSE cell '$lut_cell'."
|
||||
exit -1
|
||||
|
@ -260,7 +370,7 @@ proc create_register_next {reg_cell prefix_name} {
|
|||
exit -1
|
||||
}
|
||||
|
||||
set lut_o_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/O"}]
|
||||
set lut_o_pin [get_pins "${lut_cell}${hier_sep}O"]
|
||||
if {[llength $lut_o_pin] == 0} {
|
||||
puts "ERROR: No O pin found on FDSE cell '$lut_cell'."
|
||||
exit -1
|
||||
|
@ -278,19 +388,22 @@ proc create_register_next {reg_cell prefix_name} {
|
|||
return $lut_o_pin
|
||||
}
|
||||
|
||||
proc getOrCreateVCCPin {prefix_name} {
|
||||
proc getOrCreateVCCPin {parent} {
|
||||
variable debug
|
||||
|
||||
set vcc_cell ""
|
||||
set vcc_cells [get_cells -quiet -filter {REF_NAME == VCC}]
|
||||
if {[llength $vcc_cells] == 0} {
|
||||
set cell_name [unique_cell_name $prefix_name]
|
||||
set hier_sep [get_hierarchy_separator]
|
||||
set cell_name "${parent}${hier_sep}VCC"
|
||||
|
||||
set vcc_cell [get_cells -quiet $cell_name]
|
||||
if {[llength $vcc_cell] == 0} {
|
||||
set vcc_cell [create_cell -reference VCC $cell_name]
|
||||
puts "INFO: Created VCC cell: '$vcc_cell'"
|
||||
} else {
|
||||
set vcc_cell [lindex $vcc_cells 0]
|
||||
} elseif {[llength $vcc_cell] > 1} {
|
||||
puts "ERROR: Multiple VCC cells found with name '$cell_name'."
|
||||
exit -1
|
||||
}
|
||||
set vcc_pin [get_pins -of_objects $vcc_cell -filter {NAME =~ "*/P"}]
|
||||
|
||||
set vcc_pin [get_pins "${vcc_cell}${hier_sep}P"]
|
||||
if {[llength $vcc_pin] == 0} {
|
||||
puts "ERROR: No VCC pin found on VCC cell '$vcc_cell'."
|
||||
exit -1
|
||||
|
@ -298,22 +411,26 @@ proc getOrCreateVCCPin {prefix_name} {
|
|||
puts "ERROR: Multiple VCC pins found on VCC cell '$vcc_cell'."
|
||||
exit -1
|
||||
}
|
||||
|
||||
return $vcc_pin
|
||||
}
|
||||
|
||||
proc getOrCreateGNDPin {prefix_name} {
|
||||
proc getOrCreateGNDPin {parent} {
|
||||
variable debug
|
||||
|
||||
set gnd_cell ""
|
||||
set gnd_cells [get_cells -quiet -filter {REF_NAME == GND}]
|
||||
if {[llength $gnd_cells] == 0} {
|
||||
set cell_name [unique_cell_name $prefix_name]
|
||||
set hier_sep [get_hierarchy_separator]
|
||||
set cell_name "${parent}${hier_sep}GND"
|
||||
|
||||
set gnd_cell [get_cells -quiet $cell_name]
|
||||
if {[llength $gnd_cell] == 0} {
|
||||
set gnd_cell [create_cell -reference GND $cell_name]
|
||||
puts "INFO: Created GND cell: '$gnd_cell'"
|
||||
} else {
|
||||
set gnd_cell [lindex $gnd_cells 0]
|
||||
} elseif {[llength $gnd_cell] > 1} {
|
||||
puts "ERROR: Multiple GND cells found with name '$cell_name'."
|
||||
exit -1
|
||||
}
|
||||
set gnd_pin [get_pins -of_objects $gnd_cell -filter {NAME =~ "*/G"}]
|
||||
|
||||
set gnd_pin [get_pins "${gnd_cell}${hier_sep}G"]
|
||||
if {[llength $gnd_pin] == 0} {
|
||||
puts "ERROR: No GND pin found on GND cell '$gnd_cell'."
|
||||
exit -1
|
||||
|
@ -321,6 +438,7 @@ proc getOrCreateGNDPin {prefix_name} {
|
|||
puts "ERROR: Multiple GND pins found on GND cell '$gnd_cell'."
|
||||
exit -1
|
||||
}
|
||||
|
||||
return $gnd_pin
|
||||
}
|
||||
|
||||
|
@ -338,35 +456,6 @@ proc find_net_sinks {input_net {should_exist 1}} {
|
|||
return $sink_pins
|
||||
}
|
||||
|
||||
proc find_net_driver {input_net {should_exist 1}} {
|
||||
set driverPins [get_pins -quiet -leaf -of_objects $input_net -filter {DIRECTION == "OUT"}]
|
||||
if {[llength $driverPins] == 0} {
|
||||
set driverPorts [get_ports -quiet -of_objects $input_net -filter {DIRECTION == "IN"}]
|
||||
if {[llength $driverPorts] == 0} {
|
||||
print_error "No driver found for '$input_net'." $should_exist
|
||||
} elseif {[llength $driverPorts] > 1} {
|
||||
puts "WARNING: Multiple driver ports found for '$input_net'."
|
||||
return [lindex $driverPorts 0]
|
||||
}
|
||||
return $driverPorts
|
||||
} elseif {[llength $driverPins] > 1} {
|
||||
puts "WARNING: Multiple driver pins found for '$input_net'."
|
||||
return [lindex $driverPins 0]
|
||||
}
|
||||
return $driverPins
|
||||
}
|
||||
|
||||
proc find_pin_driver {input_pin {should_exist 1}} {
|
||||
set net [get_nets -quiet -of_objects $input_pin]
|
||||
if {[llength $net] == 0} {
|
||||
print_error "No net connected to pin '$input_pin'." $should_exist
|
||||
} elseif {[llength $net] > 1} {
|
||||
puts "ERROR: Multiple nets connected to pin '$input_pin'."
|
||||
exit -1
|
||||
}
|
||||
return [find_net_driver $net]
|
||||
}
|
||||
|
||||
proc find_matching_nets {cell nets match repl} {
|
||||
set matching_nets {}
|
||||
foreach net $nets {
|
||||
|
@ -386,6 +475,25 @@ proc find_matching_nets {cell nets match repl} {
|
|||
return $matching_nets
|
||||
}
|
||||
|
||||
proc find_matching_pins {cell pins match repl} {
|
||||
set matching_pins {}
|
||||
foreach pin $pins {
|
||||
set pin_name [str_replace $pin $match $repl]
|
||||
set matching_pin [get_cell_pin $cell $pin_name]
|
||||
if {$matching_pin != ""} {
|
||||
lappend matching_pins $matching_pin
|
||||
}
|
||||
}
|
||||
if {[llength $matching_pins] == 0} {
|
||||
puts "ERROR: No matching pins found for '$pins'."
|
||||
exit -1
|
||||
} elseif {[llength $matching_pins] != [llength $pins]} {
|
||||
puts "ERROR: Mismatch in number of matching pins."
|
||||
exit -1
|
||||
}
|
||||
return $matching_pins
|
||||
}
|
||||
|
||||
proc replace_net_source {net source_pin} {
|
||||
foreach pin [find_net_sinks $net 0] {
|
||||
replace_pin_source $pin $source_pin
|
||||
|
@ -397,6 +505,8 @@ proc resolve_async_bram {inst} {
|
|||
|
||||
puts "INFO: Resolving asynchronous BRAM patch: '$inst'."
|
||||
|
||||
set hier_sep [get_hierarchy_separator]
|
||||
|
||||
set raddr_w_nets [find_cell_nets $inst "raddr_w(\\\[\\d+\\\])?$"]
|
||||
set read_s_net [find_cell_nets $inst "read_s$"]
|
||||
set is_raddr_reg_net [find_cell_nets $inst "is_raddr_reg$"]
|
||||
|
@ -433,7 +543,7 @@ proc resolve_async_bram {inst} {
|
|||
}
|
||||
|
||||
# Create register next cell and return output pin
|
||||
set reg_next_pin [create_register_next $raddr_src_cell "$inst/raddr_next"]
|
||||
set reg_next_pin [create_register_next $inst $raddr_src_cell]
|
||||
if {$reg_next_pin == ""} {
|
||||
puts "ERROR: failed to create register next value for '$raddr_src_cell'."
|
||||
exit -1
|
||||
|
@ -444,7 +554,7 @@ proc resolve_async_bram {inst} {
|
|||
|
||||
# Find the CE pin on raddr_src_cell
|
||||
if {$reg_ce_src_pin == ""} {
|
||||
set reg_ce_pin [get_pins -of_objects $raddr_src_cell -filter {NAME =~ "*/CE"}]
|
||||
set reg_ce_pin [get_pins "${raddr_src_cell}${hier_sep}CE"]
|
||||
if {[llength $reg_ce_pin] == 0} {
|
||||
puts "ERROR: No CE pin found on register cell '$raddr_src_cell'."
|
||||
exit -1
|
||||
|
@ -466,9 +576,10 @@ proc resolve_async_bram {inst} {
|
|||
# do we have a fully registered read address?
|
||||
if {[llength $reg_next_pins] == [llength $raddr_w_nets]} {
|
||||
puts "INFO: Fully registered read address detected."
|
||||
|
||||
# Connect all reg_next_pins to all input pins attached to raddr_s_nets
|
||||
set addr_width [llength $raddr_w_nets]
|
||||
for {set addr_idx 0} {$addr_idx < $addr_width} {incr addr_idx} {
|
||||
set raddr_w_net [lindex $raddr_w_nets $addr_idx]
|
||||
set raddr_s_net [lindex $raddr_s_nets $addr_idx]
|
||||
set reg_next_pin [lindex $reg_next_pins $addr_idx]
|
||||
puts "INFO: Connecting pin '$reg_next_pin' to '$raddr_s_net's pins."
|
||||
|
@ -481,26 +592,35 @@ proc resolve_async_bram {inst} {
|
|||
replace_net_source $read_s_net $reg_ce_src_pin
|
||||
|
||||
# Create Const<1>'s pin
|
||||
set vcc_pin [getOrCreateVCCPin "$inst/VCC"]
|
||||
set vcc_pin [getOrCreateVCCPin $inst]
|
||||
|
||||
# Connect vcc_pin to all input pins attached to is_raddr_reg_net
|
||||
puts "INFO: Connecting pin '$vcc_pin' to '$is_raddr_reg_net's pins."
|
||||
replace_net_source $is_raddr_reg_net $vcc_pin
|
||||
|
||||
# Remove all async_ram cells
|
||||
foreach cell [find_nested_cells $inst "g_async_ram.*" 0] {
|
||||
remove_cell_from_netlist $cell
|
||||
}
|
||||
} else {
|
||||
puts "WARNING: Not all read addresses are registered!"
|
||||
|
||||
# Create Const<0>'s pin
|
||||
set gnd_pin [getOrCreateGNDPin "$inst/GND"]
|
||||
set gnd_pin [getOrCreateGNDPin $inst]
|
||||
|
||||
# Connect gnd_pin to all input pins attached to is_raddr_reg_net
|
||||
puts "INFO: Connecting pin '$gnd_pin' to '$is_raddr_reg_net's pins."
|
||||
replace_net_source $is_raddr_reg_net $gnd_pin
|
||||
|
||||
# Remove all sync_ram cells
|
||||
foreach cell [find_nested_cells $inst "g_sync_ram.*" 0] {
|
||||
remove_cell_from_netlist $cell
|
||||
}
|
||||
}
|
||||
|
||||
# Remove all placeholder cells
|
||||
# Remove placeholder cell
|
||||
foreach cell [find_nested_cells $inst "placeholder$"] {
|
||||
remove_cell $cell
|
||||
if {$debug} {puts "DEBUG: Cell '$cell' was removed successfully."}
|
||||
remove_cell_from_netlist $cell
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -519,7 +639,26 @@ proc resolve_async_brams {} {
|
|||
}
|
||||
}
|
||||
|
||||
proc dump_async_bram_cells {} {
|
||||
set bram_patch_cells [get_cells -hierarchical -filter {REF_NAME =~ "*VX_async_ram_patch*"}]
|
||||
if {[llength $bram_patch_cells] != 0} {
|
||||
foreach cell $bram_patch_cells {
|
||||
puts "INFO: Found async BRAM patch cell: '$cell'."
|
||||
set child_cells [find_cell_descendants $cell]
|
||||
foreach child $child_cells {
|
||||
set type [get_property REF_NAME $child]
|
||||
puts "INFO: child cell: '$child', type: '$type'"
|
||||
}
|
||||
}
|
||||
} else {
|
||||
puts "INFO: No async BRAM patch cells found in the design."
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
# Invoke the procedure to resolve async BRAM
|
||||
vortex::resolve_async_brams
|
||||
|
||||
# dump async bram cells
|
||||
#vortex::dump_async_bram_cells
|
||||
|
|
|
@ -1,3 +1,16 @@
|
|||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Function to export netlist to a Graphviz DOT file
|
||||
proc export_netlist {dot_file_name} {
|
||||
# Open the DOT file for writing
|
||||
|
|
|
@ -47,6 +47,9 @@ TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make chipscope
|
|||
# analyze build report
|
||||
vitis_analyzer build_xilinx_u50_gen3x16_xdma_5_202210_1_hw_4c/bin/vortex_afu.xclbin.link_summary
|
||||
|
||||
# resuming build for routing
|
||||
TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 VPP_FLAGS="--from_step vpl.impl.route_design" make > build.log 2>&1 &
|
||||
|
||||
# running test
|
||||
FPGA_BIN_DIR=<bin_dir> TARGET=hw_emu ./ci/blackbox.sh --driver=xrt --app=demo
|
||||
FPGA_BIN_DIR=<bin_dir> TARGET=hw ./ci/blackbox.sh --driver=xrt --app=demo
|
||||
|
|
|
@ -180,6 +180,7 @@ ifeq ($(TARGET), hw)
|
|||
cp $(BUILD_DIR)/_x/logs/link/vivado.log $(BUILD_DIR)/bin
|
||||
cp $(BUILD_DIR)/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log $(BUILD_DIR)/bin
|
||||
cp $(BUILD_DIR)/_x/reports/link/syn/ulp_vortex_afu_1_0_synth_1_ulp_vortex_afu_1_0_utilization_synth.rpt $(BUILD_DIR)/bin
|
||||
cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_utilization_placed.rpt $(BUILD_DIR)/bin
|
||||
cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin
|
||||
endif
|
||||
|
||||
|
|
|
@ -78,10 +78,10 @@ public:
|
|||
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
|
||||
break;
|
||||
case VX_CAPS_NUM_MEM_BANKS:
|
||||
_value = MEMORY_BANKS;
|
||||
_value = PLATFORM_MEMORY_BANKS;
|
||||
break;
|
||||
case VX_CAPS_MEM_BANK_SIZE:
|
||||
_value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS);
|
||||
_value = 1ull << (MEM_ADDR_WIDTH / PLATFORM_MEMORY_BANKS);
|
||||
break;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
|
|
|
@ -65,7 +65,7 @@ public:
|
|||
~vx_device() {
|
||||
#ifdef VM_ENABLE
|
||||
global_mem_.release(PAGE_TABLE_BASE_ADDR);
|
||||
// for (auto i = addr_mapping.begin(); i != addr_mapping.end(); i++)
|
||||
// for (auto i = addr_mapping.begin(); i != addr_mapping.end(); i++)
|
||||
// page_table_mem_->release(i->second << MEM_PAGE_SIZE);
|
||||
delete virtual_mem_;
|
||||
delete page_table_mem_;
|
||||
|
@ -113,10 +113,10 @@ public:
|
|||
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
|
||||
break;
|
||||
case VX_CAPS_NUM_MEM_BANKS:
|
||||
_value = MEMORY_BANKS;
|
||||
_value = PLATFORM_MEMORY_BANKS;
|
||||
break;
|
||||
case VX_CAPS_MEM_BANK_SIZE:
|
||||
_value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS);
|
||||
_value = 1ull << (MEM_ADDR_WIDTH / PLATFORM_MEMORY_BANKS);
|
||||
break;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
|
@ -164,7 +164,7 @@ public:
|
|||
if ((STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)))
|
||||
return 0;
|
||||
|
||||
// Now all conditions are not met. Return true because the address needs translation
|
||||
// Now all conditions are not met. Return true because the address needs translation
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -277,7 +277,7 @@ public:
|
|||
#ifdef VM_ENABLE
|
||||
uint64_t pAddr = page_table_walk(dest_addr);
|
||||
// uint64_t pAddr;
|
||||
// try {
|
||||
// try {
|
||||
// pAddr = page_table_walk(dest_addr);
|
||||
// } catch ( Page_Fault_Exception ) {
|
||||
// // HW: place holder
|
||||
|
@ -466,18 +466,18 @@ public:
|
|||
CHECK_ERR(virtual_mem_reserve(STARTUP_ADDR, 0x40000, VX_MEM_READ_WRITE), {
|
||||
return err;
|
||||
});
|
||||
|
||||
|
||||
if (virtual_mem_ == nullptr) {
|
||||
// virtual_mem_ does not intefere with physical mem, so no need to free space
|
||||
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
if (VM_ADDR_MODE == BARE)
|
||||
DBGPRINT("[RT:init_VM] VA_MODE = BARE MODE(addr= 0x0)");
|
||||
else
|
||||
CHECK_ERR(alloc_page_table(&pt_addr),{return err;});
|
||||
|
||||
|
||||
CHECK_ERR(processor_.set_satp_by_addr(pt_addr),{return err;});
|
||||
return 0;
|
||||
}
|
||||
|
@ -604,7 +604,7 @@ public:
|
|||
}
|
||||
else
|
||||
{
|
||||
// Leaf node found.
|
||||
// Leaf node found.
|
||||
// Check RWX permissions according to access type.
|
||||
if (pte.r == 0)
|
||||
{
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -27,9 +27,9 @@ class SimObjectBase;
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class SimPortBase {
|
||||
public:
|
||||
public:
|
||||
virtual ~SimPortBase() {}
|
||||
|
||||
|
||||
SimObjectBase* module() const {
|
||||
return module_;
|
||||
}
|
||||
|
@ -92,7 +92,7 @@ public:
|
|||
auto cycles = queue_.front().cycles;
|
||||
queue_.pop();
|
||||
return cycles;
|
||||
}
|
||||
}
|
||||
|
||||
void tx_callback(const TxCallback& callback) {
|
||||
tx_cb_ = callback;
|
||||
|
@ -137,7 +137,7 @@ public:
|
|||
typedef std::shared_ptr<SimEventBase> Ptr;
|
||||
|
||||
virtual ~SimEventBase() {}
|
||||
|
||||
|
||||
virtual void fire() const = 0;
|
||||
|
||||
uint64_t cycles() const {
|
||||
|
@ -161,7 +161,7 @@ public:
|
|||
|
||||
typedef std::function<void (const Pkt&)> Func;
|
||||
|
||||
SimCallEvent(const Func& func, const Pkt& pkt, uint64_t cycles)
|
||||
SimCallEvent(const Func& func, const Pkt& pkt, uint64_t cycles)
|
||||
: SimEventBase(cycles)
|
||||
, func_(func)
|
||||
, pkt_(pkt)
|
||||
|
@ -194,8 +194,8 @@ public:
|
|||
const_cast<SimPort<Pkt>*>(port_)->transfer(pkt_, cycles_);
|
||||
}
|
||||
|
||||
SimPortEvent(const SimPort<Pkt>* port, const Pkt& pkt, uint64_t cycles)
|
||||
: SimEventBase(cycles)
|
||||
SimPortEvent(const SimPort<Pkt>* port, const Pkt& pkt, uint64_t cycles)
|
||||
: SimEventBase(cycles)
|
||||
, port_(port)
|
||||
, pkt_(pkt)
|
||||
{}
|
||||
|
@ -209,7 +209,7 @@ public:
|
|||
}
|
||||
|
||||
protected:
|
||||
const SimPort<Pkt>* port_;
|
||||
const SimPort<Pkt>* port_;
|
||||
Pkt pkt_;
|
||||
|
||||
static MemoryPool<SimPortEvent<Pkt>> allocator_;
|
||||
|
@ -230,11 +230,11 @@ public:
|
|||
|
||||
const std::string& name() const {
|
||||
return name_;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
SimObjectBase(const SimContext& ctx, const char* name);
|
||||
SimObjectBase(const SimContext& ctx, const std::string& name);
|
||||
|
||||
private:
|
||||
|
||||
|
@ -259,8 +259,8 @@ public:
|
|||
|
||||
protected:
|
||||
|
||||
SimObject(const SimContext& ctx, const char* name)
|
||||
: SimObjectBase(ctx, name)
|
||||
SimObject(const SimContext& ctx, const std::string& name)
|
||||
: SimObjectBase(ctx, name)
|
||||
{}
|
||||
|
||||
private:
|
||||
|
@ -283,9 +283,9 @@ private:
|
|||
};
|
||||
|
||||
class SimContext {
|
||||
private:
|
||||
private:
|
||||
SimContext() {}
|
||||
|
||||
|
||||
friend class SimPlatform;
|
||||
};
|
||||
|
||||
|
@ -320,10 +320,10 @@ public:
|
|||
|
||||
template <typename Pkt>
|
||||
void schedule(const typename SimCallEvent<Pkt>::Func& callback,
|
||||
const Pkt& pkt,
|
||||
uint64_t delay) {
|
||||
const Pkt& pkt,
|
||||
uint64_t delay) {
|
||||
assert(delay != 0);
|
||||
auto evt = std::make_shared<SimCallEvent<Pkt>>(callback, pkt, cycles_ + delay);
|
||||
auto evt = std::make_shared<SimCallEvent<Pkt>>(callback, pkt, cycles_ + delay);
|
||||
events_.emplace_back(evt);
|
||||
}
|
||||
|
||||
|
@ -341,10 +341,10 @@ public:
|
|||
auto evt_it_end = events_.end();
|
||||
while (evt_it != evt_it_end) {
|
||||
auto& event = *evt_it;
|
||||
if (cycles_ >= event->cycles()) {
|
||||
if (cycles_ >= event->cycles()) {
|
||||
event->fire();
|
||||
evt_it = events_.erase(evt_it);
|
||||
} else {
|
||||
} else {
|
||||
++evt_it;
|
||||
}
|
||||
}
|
||||
|
@ -352,7 +352,7 @@ public:
|
|||
for (auto& object : objects_) {
|
||||
object->do_tick();
|
||||
}
|
||||
// advance clock
|
||||
// advance clock
|
||||
++cycles_;
|
||||
}
|
||||
|
||||
|
@ -390,8 +390,8 @@ private:
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline SimObjectBase::SimObjectBase(const SimContext&, const char* name)
|
||||
: name_(name)
|
||||
inline SimObjectBase::SimObjectBase(const SimContext&, const std::string& name)
|
||||
: name_(name)
|
||||
{}
|
||||
|
||||
template <typename Impl>
|
||||
|
@ -403,8 +403,8 @@ typename SimObject<Impl>::Ptr SimObject<Impl>::Create(Args&&... args) {
|
|||
template <typename Pkt>
|
||||
void SimPort<Pkt>::push(const Pkt& pkt, uint64_t delay) const {
|
||||
if (peer_ && !tx_cb_) {
|
||||
reinterpret_cast<const SimPort<Pkt>*>(peer_)->push(pkt, delay);
|
||||
reinterpret_cast<const SimPort<Pkt>*>(peer_)->push(pkt, delay);
|
||||
} else {
|
||||
SimPlatform::instance().schedule(this, pkt, delay);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -47,7 +47,7 @@ public:
|
|||
, indent_(indent, ' ')
|
||||
, owner_(nullptr)
|
||||
{}
|
||||
|
||||
|
||||
explicit IndentStream(std::ostream& dest, int indent = 4)
|
||||
: dest_(dest.rdbuf())
|
||||
, isBeginLine_(true)
|
||||
|
@ -76,3 +76,14 @@ private:
|
|||
std::string indent_;
|
||||
std::ostream* owner_;
|
||||
};
|
||||
|
||||
template <typename... Args>
|
||||
std::string StrFormat(const std::string& fmt, Args... args) {
|
||||
auto size = std::snprintf(nullptr, 0, fmt.c_str(), args...) + 1;
|
||||
if (size <= 0) {
|
||||
throw std::runtime_error("Error during formatting.");
|
||||
}
|
||||
std::vector<char> buf(size);
|
||||
std::snprintf(buf.data(), size, fmt.c_str(), args...);
|
||||
return std::string(buf.data(), buf.data() + size - 1);
|
||||
}
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -21,81 +21,77 @@ class CacheCluster : public SimObject<CacheCluster> {
|
|||
public:
|
||||
std::vector<std::vector<SimPort<MemReq>>> CoreReqPorts;
|
||||
std::vector<std::vector<SimPort<MemRsp>>> CoreRspPorts;
|
||||
SimPort<MemReq> MemReqPort;
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
std::vector<SimPort<MemReq>> MemReqPorts;
|
||||
std::vector<SimPort<MemRsp>> MemRspPorts;
|
||||
|
||||
CacheCluster(const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t num_inputs,
|
||||
uint32_t num_caches,
|
||||
uint32_t num_requests,
|
||||
const CacheSim::Config& cache_config)
|
||||
CacheCluster(const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t num_inputs,
|
||||
uint32_t num_units,
|
||||
const CacheSim::Config& cache_config)
|
||||
: SimObject(ctx, name)
|
||||
, CoreReqPorts(num_inputs, std::vector<SimPort<MemReq>>(num_requests, this))
|
||||
, CoreRspPorts(num_inputs, std::vector<SimPort<MemRsp>>(num_requests, this))
|
||||
, MemReqPort(this)
|
||||
, MemRspPort(this)
|
||||
, caches_(MAX(num_caches, 0x1)) {
|
||||
, CoreReqPorts(num_inputs, std::vector<SimPort<MemReq>>(cache_config.num_inputs, this))
|
||||
, CoreRspPorts(num_inputs, std::vector<SimPort<MemRsp>>(cache_config.num_inputs, this))
|
||||
, MemReqPorts(cache_config.mem_ports, this)
|
||||
, MemRspPorts(cache_config.mem_ports, this)
|
||||
, caches_(MAX(num_units, 0x1)) {
|
||||
|
||||
CacheSim::Config cache_config2(cache_config);
|
||||
if (0 == num_caches) {
|
||||
num_caches = 1;
|
||||
if (0 == num_units) {
|
||||
num_units = 1;
|
||||
cache_config2.bypass = true;
|
||||
}
|
||||
|
||||
char sname[100];
|
||||
|
||||
std::vector<MemSwitch::Ptr> input_arbs(num_inputs);
|
||||
for (uint32_t j = 0; j < num_inputs; ++j) {
|
||||
snprintf(sname, 100, "%s-input-arb%d", name, j);
|
||||
input_arbs.at(j) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_requests, cache_config.num_inputs);
|
||||
for (uint32_t i = 0; i < num_requests; ++i) {
|
||||
this->CoreReqPorts.at(j).at(i).bind(&input_arbs.at(j)->ReqIn.at(i));
|
||||
input_arbs.at(j)->RspIn.at(i).bind(&this->CoreRspPorts.at(j).at(i));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<MemSwitch::Ptr> mem_arbs(cache_config.num_inputs);
|
||||
// Arbitrate incoming core interfaces
|
||||
std::vector<MemArbiter::Ptr> input_arbs(cache_config.num_inputs);
|
||||
for (uint32_t i = 0; i < cache_config.num_inputs; ++i) {
|
||||
snprintf(sname, 100, "%s-mem-arb%d", name, i);
|
||||
mem_arbs.at(i) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_inputs, num_caches);
|
||||
snprintf(sname, 100, "%s-input-arb%d", name, i);
|
||||
input_arbs.at(i) = MemArbiter::Create(sname, ArbiterType::RoundRobin, num_inputs, num_units);
|
||||
for (uint32_t j = 0; j < num_inputs; ++j) {
|
||||
input_arbs.at(j)->ReqOut.at(i).bind(&mem_arbs.at(i)->ReqIn.at(j));
|
||||
mem_arbs.at(i)->RspIn.at(j).bind(&input_arbs.at(j)->RspOut.at(i));
|
||||
this->CoreReqPorts.at(j).at(i).bind(&input_arbs.at(i)->ReqIn.at(j));
|
||||
input_arbs.at(i)->RspIn.at(j).bind(&this->CoreRspPorts.at(j).at(i));
|
||||
}
|
||||
}
|
||||
|
||||
snprintf(sname, 100, "%s-cache-arb", name);
|
||||
auto cache_arb = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_caches, 1);
|
||||
// Arbitrate outgoing memory interfaces
|
||||
std::vector<MemArbiter::Ptr> mem_arbs(cache_config.mem_ports);
|
||||
for (uint32_t i = 0; i < cache_config.mem_ports; ++i) {
|
||||
snprintf(sname, 100, "%s-mem-arb%d", name, i);
|
||||
mem_arbs.at(i) = MemArbiter::Create(sname, ArbiterType::RoundRobin, num_units, 1);
|
||||
mem_arbs.at(i)->ReqOut.at(0).bind(&this->MemReqPorts.at(i));
|
||||
this->MemRspPorts.at(i).bind(&mem_arbs.at(i)->RspOut.at(0));
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < num_caches; ++i) {
|
||||
// Connect caches
|
||||
for (uint32_t i = 0; i < num_units; ++i) {
|
||||
snprintf(sname, 100, "%s-cache%d", name, i);
|
||||
caches_.at(i) = CacheSim::Create(sname, cache_config2);
|
||||
|
||||
for (uint32_t j = 0; j < cache_config.num_inputs; ++j) {
|
||||
mem_arbs.at(j)->ReqOut.at(i).bind(&caches_.at(i)->CoreReqPorts.at(j));
|
||||
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
|
||||
input_arbs.at(j)->ReqOut.at(i).bind(&caches_.at(i)->CoreReqPorts.at(j));
|
||||
caches_.at(i)->CoreRspPorts.at(j).bind(&input_arbs.at(j)->RspOut.at(i));
|
||||
}
|
||||
|
||||
caches_.at(i)->MemReqPorts.at(0).bind(&cache_arb->ReqIn.at(i));
|
||||
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPorts.at(0));
|
||||
for (uint32_t j = 0; j < cache_config.mem_ports; ++j) {
|
||||
caches_.at(i)->MemReqPorts.at(j).bind(&mem_arbs.at(j)->ReqIn.at(i));
|
||||
mem_arbs.at(j)->RspIn.at(i).bind(&caches_.at(i)->MemRspPorts.at(j));
|
||||
}
|
||||
}
|
||||
|
||||
cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
|
||||
this->MemRspPort.bind(&cache_arb->RspOut.at(0));
|
||||
}
|
||||
|
||||
~CacheCluster() {}
|
||||
|
||||
void reset() {}
|
||||
|
||||
|
||||
void tick() {}
|
||||
|
||||
CacheSim::PerfStats perf_stats() const {
|
||||
CacheSim::PerfStats perf;
|
||||
for (auto cache : caches_) {
|
||||
perf += cache->perf_stats();
|
||||
}
|
||||
}
|
||||
return perf;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
#include <vector>
|
||||
#include <list>
|
||||
#include <queue>
|
||||
#include <string.h>
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
|
@ -305,8 +304,8 @@ private:
|
|||
Config config_;
|
||||
params_t params_;
|
||||
std::vector<bank_t> banks_;
|
||||
MemSwitch::Ptr bank_switch_;
|
||||
MemSwitch::Ptr bypass_switch_;
|
||||
MemArbiter::Ptr bank_arb_;
|
||||
std::vector<MemArbiter::Ptr> nc_arbs_;
|
||||
std::vector<SimPort<MemReq>> mem_req_ports_;
|
||||
std::vector<SimPort<MemRsp>> mem_rsp_ports_;
|
||||
std::vector<bank_req_t> pipeline_reqs_;
|
||||
|
@ -322,88 +321,51 @@ public:
|
|||
, config_(config)
|
||||
, params_(config)
|
||||
, banks_((1 << config.B), {config, params_})
|
||||
, nc_arbs_(config.mem_ports)
|
||||
, mem_req_ports_((1 << config.B), simobject)
|
||||
, mem_rsp_ports_((1 << config.B), simobject)
|
||||
, pipeline_reqs_((1 << config.B), config.ports_per_bank)
|
||||
{
|
||||
char sname[100];
|
||||
snprintf(sname, 100, "%s-bypass-arb", simobject->name().c_str());
|
||||
|
||||
if (config_.bypass) {
|
||||
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, config_.num_inputs);
|
||||
snprintf(sname, 100, "%s-bypass-arb", simobject->name().c_str());
|
||||
auto bypass_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, config_.num_inputs, config_.mem_ports);
|
||||
for (uint32_t i = 0; i < config_.num_inputs; ++i) {
|
||||
simobject->CoreReqPorts.at(i).bind(&bypass_switch_->ReqIn.at(i));
|
||||
bypass_switch_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
|
||||
simobject->CoreReqPorts.at(i).bind(&bypass_arb->ReqIn.at(i));
|
||||
bypass_arb->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
|
||||
}
|
||||
for (uint32_t i = 0; i < config_.mem_ports; ++i) {
|
||||
bypass_arb->ReqOut.at(i).bind(&simobject->MemReqPorts.at(i));
|
||||
simobject->MemRspPorts.at(i).bind(&bypass_arb->RspOut.at(i));
|
||||
}
|
||||
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
|
||||
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
|
||||
return;
|
||||
}
|
||||
|
||||
if (strcmp(simobject->name().c_str(), "l3cache")) {
|
||||
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
|
||||
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
|
||||
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
|
||||
// create non-cacheable arbiter
|
||||
for (uint32_t i = 0; i < config_.mem_ports; ++i) {
|
||||
snprintf(sname, 100, "%s-nc-arb%d", simobject->name().c_str(), i);
|
||||
nc_arbs_.at(i) = MemArbiter::Create(sname, ArbiterType::Priority, 2, 1);
|
||||
}
|
||||
|
||||
if (config.B != 0) {
|
||||
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
|
||||
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B));
|
||||
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) {
|
||||
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
|
||||
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
|
||||
}
|
||||
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
|
||||
} else {
|
||||
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
|
||||
}
|
||||
} else {
|
||||
// TODO: Change this into a crossbar
|
||||
uint32_t max = MAX(2, config_.num_inputs);
|
||||
//printf("%s connecting\n", simobject_->name().c_str());
|
||||
//3
|
||||
if (config.B != 0) {
|
||||
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, max, max);
|
||||
for (uint32_t i = 0; i < max; ++i) {
|
||||
//printf("%s connecting input=%d to MemPorts\n", simobject_->name().c_str(), i);
|
||||
bypass_switch_->ReqOut.at(i).bind(&simobject->MemReqPorts.at(i % (1 << config.B)));
|
||||
simobject->MemRspPorts.at(i % (1 << config.B)).bind(&bypass_switch_->RspOut.at(i));
|
||||
}
|
||||
} else {
|
||||
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
|
||||
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
|
||||
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
|
||||
}
|
||||
// Connect non-cacheable arbiter output to outgoing memory ports
|
||||
for (uint32_t i = 0; i < config_.mem_ports; ++i) {
|
||||
nc_arbs_.at(i)->ReqOut.at(0).bind(&simobject->MemReqPorts.at(i));
|
||||
simobject->MemRspPorts.at(i).bind(&nc_arbs_.at(i)->RspOut.at(0));
|
||||
}
|
||||
|
||||
if (config.B != 0)
|
||||
{
|
||||
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
|
||||
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B), (1 << config.B));
|
||||
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i)
|
||||
{
|
||||
//1
|
||||
//printf("%s Connecting memory ports to bank=%d\n", simobject_->name().c_str(), i);
|
||||
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
|
||||
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
|
||||
}
|
||||
//2
|
||||
if (config_.num_inputs > 1) {
|
||||
for (uint32_t i = 0; i < max; ++i) {
|
||||
//printf("%s connecting bank and bypass port=%d\n", simobject_->name().c_str(), i);
|
||||
bank_switch_->ReqOut.at(i % (1 << config.B)).bind(&bypass_switch_->ReqIn.at(i));
|
||||
bypass_switch_->RspIn.at(i).bind(&bank_switch_->RspOut.at(i % (1 << config.B)));
|
||||
}
|
||||
} else {
|
||||
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
|
||||
}
|
||||
// Create bank's memory arbiter
|
||||
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
|
||||
auto bank_mem_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, (1 << config.B), config_.mem_ports);
|
||||
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) {
|
||||
mem_req_ports_.at(i).bind(&bank_mem_arb->ReqIn.at(i));
|
||||
bank_mem_arb->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
|
||||
}
|
||||
|
||||
// Connect bank's memory arbiter to non-cacheable arbiter's input 0
|
||||
for (uint32_t i = 0; i < config_.mem_ports; ++i) {
|
||||
bank_mem_arb->ReqOut.at(i).bind(&nc_arbs_.at(i)->ReqIn.at(0));
|
||||
nc_arbs_.at(i)->RspIn.at(0).bind(&bank_mem_arb->RspOut.at(i));
|
||||
}
|
||||
|
||||
// calculate cache initialization cycles
|
||||
|
@ -434,8 +396,8 @@ public:
|
|||
}
|
||||
|
||||
// handle cache bypasss responses
|
||||
{
|
||||
auto& bypass_port = bypass_switch_->RspIn.at(1);
|
||||
for (uint32_t i = 0, n = config_.mem_ports; i < n; ++i) {
|
||||
auto& bypass_port = nc_arbs_.at(i)->RspIn.at(1);
|
||||
if (!bypass_port.empty()) {
|
||||
auto& mem_rsp = bypass_port.front();
|
||||
this->processBypassResponse(mem_rsp);
|
||||
|
@ -468,7 +430,7 @@ public:
|
|||
continue;
|
||||
|
||||
auto& mem_rsp = mem_rsp_port.front();
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " fill-rsp: " << mem_rsp);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-fill-rsp: " << mem_rsp);
|
||||
pipeline_req.type = bank_req_t::Fill;
|
||||
pipeline_req.tag = mem_rsp.tag;
|
||||
mem_rsp_port.pop();
|
||||
|
@ -533,7 +495,7 @@ public:
|
|||
bank_req.type = bank_req_t::Core;
|
||||
bank_req.write = core_req.write;
|
||||
pipeline_req = bank_req;
|
||||
DT(3, simobject_->name() << " core-req: " << core_req);
|
||||
DT(3, simobject_->name() << "-core-req: " << core_req);
|
||||
}
|
||||
|
||||
if (core_req.write)
|
||||
|
@ -561,21 +523,22 @@ private:
|
|||
uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs;
|
||||
MemRsp core_rsp{tag, mem_rsp.cid, mem_rsp.uuid};
|
||||
simobject_->CoreRspPorts.at(req_id).push(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << " bypass-core-rsp: " << core_rsp);
|
||||
DT(3, simobject_->name() << "-bypass-core-rsp: " << core_rsp);
|
||||
}
|
||||
|
||||
void processBypassRequest(const MemReq& core_req, uint32_t req_id) {
|
||||
{
|
||||
MemReq mem_req(core_req);
|
||||
mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id;
|
||||
bypass_switch_->ReqIn.at(1).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << " bypass-dram-req: " << mem_req);
|
||||
uint32_t mem_port = req_id % config_.mem_ports;
|
||||
nc_arbs_.at(mem_port)->ReqIn.at(1).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-bypass-dram-req: " << mem_req);
|
||||
}
|
||||
|
||||
if (core_req.write && config_.write_reponse) {
|
||||
MemRsp core_rsp{core_req.tag, core_req.cid, core_req.uuid};
|
||||
simobject_->CoreRspPorts.at(req_id).push(core_rsp, 1);
|
||||
DT(3, simobject_->name() << " bypass-core-rsp: " << core_rsp);
|
||||
DT(3, simobject_->name() << "-bypass-core-rsp: " << core_rsp);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -605,7 +568,7 @@ private:
|
|||
continue;
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " replay: " << core_rsp);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-replay: " << core_rsp);
|
||||
}
|
||||
}
|
||||
} break;
|
||||
|
@ -649,7 +612,7 @@ private:
|
|||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " writethrough: " << mem_req);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-writethrough: " << mem_req);
|
||||
} else {
|
||||
// mark line as dirty
|
||||
hit_line.dirty = true;
|
||||
|
@ -662,7 +625,7 @@ private:
|
|||
continue;
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " core-rsp: " << core_rsp);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-core-rsp: " << core_rsp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -681,7 +644,7 @@ private:
|
|||
mem_req.write = true;
|
||||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req_ports_.at(bank_id).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " writeback: " << mem_req);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-writeback: " << mem_req);
|
||||
++perf_stats_.evictions;
|
||||
}
|
||||
}
|
||||
|
@ -695,7 +658,7 @@ private:
|
|||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " writethrough: " << mem_req);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-writethrough: " << mem_req);
|
||||
}
|
||||
// send core response
|
||||
if (config_.write_reponse) {
|
||||
|
@ -704,7 +667,7 @@ private:
|
|||
continue;
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " core-rsp: " << core_rsp);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-core-rsp: " << core_rsp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -713,7 +676,7 @@ private:
|
|||
|
||||
// allocate MSHR
|
||||
auto mshr_id = bank.mshr.allocate(pipeline_req, (free_line_id != -1) ? free_line_id : repl_line_id);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " mshr-enqueue: " << pipeline_req);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-mshr-enqueue: " << pipeline_req);
|
||||
|
||||
// send fill request
|
||||
if (!mshr_pending) {
|
||||
|
@ -724,7 +687,7 @@ private:
|
|||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << " fill: " << mem_req);
|
||||
DT(3, simobject_->name() << "-bank" << bank_id << "-fill: " << mem_req);
|
||||
++pending_fill_reqs_;
|
||||
}
|
||||
}
|
||||
|
@ -743,8 +706,8 @@ CacheSim::CacheSim(const SimContext& ctx, const char* name, const Config& config
|
|||
: SimObject<CacheSim>(ctx, name)
|
||||
, CoreReqPorts(config.num_inputs, this)
|
||||
, CoreRspPorts(config.num_inputs, this)
|
||||
, MemReqPorts(NUM_MEM_PORTS, this)
|
||||
, MemRspPorts(NUM_MEM_PORTS, this)
|
||||
, MemReqPorts(config.mem_ports, this)
|
||||
, MemRspPorts(config.mem_ports, this)
|
||||
, impl_(new Impl(this, config))
|
||||
{}
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ public:
|
|||
uint8_t addr_width; // word address bits
|
||||
uint8_t ports_per_bank; // number of ports per bank
|
||||
uint8_t num_inputs; // number of inputs
|
||||
uint8_t mem_ports; // memory ports
|
||||
bool write_back; // is write-back
|
||||
bool write_reponse; // enable write response
|
||||
uint16_t mshr_size; // MSHR buffer size
|
||||
|
|
|
@ -20,9 +20,9 @@ Cluster::Cluster(const SimContext& ctx,
|
|||
ProcessorImpl* processor,
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs)
|
||||
: SimObject(ctx, "cluster")
|
||||
, mem_req_port(this)
|
||||
, mem_rsp_port(this)
|
||||
: SimObject(ctx, StrFormat("cluster%d", cluster_id))
|
||||
, mem_req_ports(L2_MEM_PORTS, this)
|
||||
, mem_rsp_ports(L2_MEM_PORTS, this)
|
||||
, cluster_id_(cluster_id)
|
||||
, processor_(processor)
|
||||
, sockets_(NUM_SOCKETS)
|
||||
|
@ -35,31 +35,14 @@ Cluster::Cluster(const SimContext& ctx,
|
|||
|
||||
// create sockets
|
||||
|
||||
snprintf(sname, 100, "cluster%d-icache-arb", cluster_id);
|
||||
auto icache_switch = MemSwitch::Create(sname, ArbiterType::RoundRobin, sockets_per_cluster);
|
||||
|
||||
snprintf(sname, 100, "cluster%d-dcache-arb", cluster_id);
|
||||
auto dcache_switch = MemSwitch::Create(sname, ArbiterType::RoundRobin, sockets_per_cluster);
|
||||
|
||||
for (uint32_t i = 0; i < sockets_per_cluster; ++i) {
|
||||
uint32_t socket_id = cluster_id * sockets_per_cluster + i;
|
||||
auto socket = Socket::Create(socket_id,
|
||||
this,
|
||||
arch,
|
||||
dcrs);
|
||||
|
||||
socket->icache_mem_req_port.bind(&icache_switch->ReqIn.at(i));
|
||||
icache_switch->RspIn.at(i).bind(&socket->icache_mem_rsp_port);
|
||||
|
||||
socket->dcache_mem_req_port.bind(&dcache_switch->ReqIn.at(i));
|
||||
dcache_switch->RspIn.at(i).bind(&socket->dcache_mem_rsp_port);
|
||||
|
||||
sockets_.at(i) = socket;
|
||||
sockets_.at(i) = Socket::Create(socket_id, this, arch, dcrs);
|
||||
}
|
||||
|
||||
// Create l2cache
|
||||
|
||||
snprintf(sname, 100, "cluster%d-l2cache", cluster_id);
|
||||
snprintf(sname, 100, "%s-l2cache", this->name().c_str());
|
||||
l2cache_ = CacheSim::Create(sname, CacheSim::Config{
|
||||
!L2_ENABLED,
|
||||
log2ceil(L2_CACHE_SIZE),// C
|
||||
|
@ -69,21 +52,27 @@ Cluster::Cluster(const SimContext& ctx,
|
|||
log2ceil(L2_NUM_BANKS), // B
|
||||
XLEN, // address bits
|
||||
1, // number of ports
|
||||
2, // request size
|
||||
L2_NUM_REQS, // request size
|
||||
L2_MEM_PORTS, // memory ports
|
||||
L2_WRITEBACK, // write-back
|
||||
false, // write response
|
||||
L2_MSHR_SIZE, // mshr size
|
||||
2, // pipeline latency
|
||||
});
|
||||
|
||||
l2cache_->MemReqPorts.at(0).bind(&this->mem_req_port);
|
||||
this->mem_rsp_port.bind(&l2cache_->MemRspPorts.at(0));
|
||||
// connect l2cache core interfaces
|
||||
for (uint32_t i = 0; i < sockets_per_cluster; ++i) {
|
||||
for (uint32_t j = 0; j < L1_MEM_PORTS; ++j) {
|
||||
sockets_.at(i)->mem_req_ports.at(j).bind(&l2cache_->CoreReqPorts.at(i * L1_MEM_PORTS + j));
|
||||
l2cache_->CoreRspPorts.at(i * L1_MEM_PORTS + j).bind(&sockets_.at(i)->mem_rsp_ports.at(j));
|
||||
}
|
||||
}
|
||||
|
||||
icache_switch->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(0));
|
||||
l2cache_->CoreRspPorts.at(0).bind(&icache_switch->RspOut.at(0));
|
||||
|
||||
dcache_switch->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(1));
|
||||
l2cache_->CoreRspPorts.at(1).bind(&dcache_switch->RspOut.at(0));
|
||||
// connect l2cache memory interfaces
|
||||
for (uint32_t i = 0; i < L2_MEM_PORTS; ++i) {
|
||||
l2cache_->MemReqPorts.at(i).bind(&this->mem_req_ports.at(i));
|
||||
this->mem_rsp_ports.at(i).bind(&l2cache_->MemRspPorts.at(i));
|
||||
}
|
||||
}
|
||||
|
||||
Cluster::~Cluster() {
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -32,13 +32,13 @@ public:
|
|||
CacheSim::PerfStats l2cache;
|
||||
};
|
||||
|
||||
SimPort<MemReq> mem_req_port;
|
||||
SimPort<MemRsp> mem_rsp_port;
|
||||
std::vector<SimPort<MemReq>> mem_req_ports;
|
||||
std::vector<SimPort<MemRsp>> mem_rsp_ports;
|
||||
|
||||
Cluster(const SimContext& ctx,
|
||||
Cluster(const SimContext& ctx,
|
||||
uint32_t cluster_id,
|
||||
ProcessorImpl* processor,
|
||||
const Arch &arch,
|
||||
ProcessorImpl* processor,
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs);
|
||||
|
||||
~Cluster();
|
||||
|
@ -63,16 +63,16 @@ public:
|
|||
|
||||
bool running() const;
|
||||
|
||||
int get_exitcode() const;
|
||||
int get_exitcode() const;
|
||||
|
||||
void barrier(uint32_t bar_id, uint32_t count, uint32_t core_id);
|
||||
|
||||
PerfStats perf_stats() const;
|
||||
|
||||
|
||||
private:
|
||||
uint32_t cluster_id_;
|
||||
ProcessorImpl* processor_;
|
||||
std::vector<Socket::Ptr> sockets_;
|
||||
std::vector<Socket::Ptr> sockets_;
|
||||
std::vector<CoreMask> barriers_;
|
||||
CacheSim::Ptr l2cache_;
|
||||
uint32_t cores_per_socket_;
|
||||
|
|
|
@ -27,10 +27,15 @@ inline constexpr int LSU_WORD_SIZE = (XLEN / 8);
|
|||
inline constexpr int LSU_CHANNELS = NUM_LSU_LANES;
|
||||
inline constexpr int LSU_NUM_REQS = (NUM_LSU_BLOCKS * LSU_CHANNELS);
|
||||
|
||||
// The dcache uses coalesced memory blocks
|
||||
inline constexpr int DCACHE_WORD_SIZE = LSU_LINE_SIZE;
|
||||
inline constexpr int DCACHE_CHANNELS = UP((NUM_LSU_LANES * (XLEN / 8)) / DCACHE_WORD_SIZE);
|
||||
inline constexpr int DCACHE_NUM_REQS = (NUM_LSU_BLOCKS * DCACHE_CHANNELS);
|
||||
inline constexpr int DCACHE_NUM_REQS = (NUM_LSU_BLOCKS * DCACHE_CHANNELS);
|
||||
|
||||
inline constexpr int NUM_SOCKETS = UP(NUM_CORES / SOCKET_SIZE);
|
||||
|
||||
inline constexpr int L2_NUM_REQS = NUM_SOCKETS * L1_MEM_PORTS;
|
||||
|
||||
inline constexpr int L3_NUM_REQS = NUM_CLUSTERS * L2_MEM_PORTS;
|
||||
|
||||
inline constexpr int PER_ISSUE_WARPS = NUM_WARPS / ISSUE_WIDTH;
|
|
@ -30,7 +30,7 @@ Core::Core(const SimContext& ctx,
|
|||
Socket* socket,
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs)
|
||||
: SimObject(ctx, "core")
|
||||
: SimObject(ctx, StrFormat("core%d", core_id))
|
||||
, icache_req_ports(1, this)
|
||||
, icache_rsp_ports(1, this)
|
||||
, dcache_req_ports(DCACHE_NUM_REQS, this)
|
||||
|
@ -44,7 +44,7 @@ Core::Core(const SimContext& ctx,
|
|||
, operands_(ISSUE_WIDTH)
|
||||
, dispatchers_((uint32_t)FUType::Count)
|
||||
, func_units_((uint32_t)FUType::Count)
|
||||
, lsu_demux_(NUM_LSU_BLOCKS)
|
||||
, lmem_switch_(NUM_LSU_BLOCKS)
|
||||
, mem_coalescers_(NUM_LSU_BLOCKS)
|
||||
, lsu_dcache_adapter_(NUM_LSU_BLOCKS)
|
||||
, lsu_lmem_adapter_(NUM_LSU_BLOCKS)
|
||||
|
@ -59,12 +59,12 @@ Core::Core(const SimContext& ctx,
|
|||
|
||||
// create the memory coalescer
|
||||
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
|
||||
snprintf(sname, 100, "core%d-coalescer%d", core_id, i);
|
||||
snprintf(sname, 100, "%s-coalescer%d", this->name().c_str(), i);
|
||||
mem_coalescers_.at(i) = MemCoalescer::Create(sname, LSU_CHANNELS, DCACHE_CHANNELS, DCACHE_WORD_SIZE, LSUQ_OUT_SIZE, 1);
|
||||
}
|
||||
|
||||
// create local memory
|
||||
snprintf(sname, 100, "core%d-local_mem", core_id);
|
||||
snprintf(sname, 100, "%s-local_mem", this->name().c_str());
|
||||
local_mem_ = LocalMem::Create(sname, LocalMem::Config{
|
||||
(1 << LMEM_LOG_SIZE),
|
||||
LSU_WORD_SIZE,
|
||||
|
@ -73,31 +73,31 @@ Core::Core(const SimContext& ctx,
|
|||
false
|
||||
});
|
||||
|
||||
// create lsu demux
|
||||
// create lmem switch
|
||||
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
|
||||
snprintf(sname, 100, "core%d-lsu_demux%d", core_id, i);
|
||||
lsu_demux_.at(i) = LocalMemDemux::Create(sname, 1);
|
||||
snprintf(sname, 100, "%s-lmem_switch%d", this->name().c_str(), i);
|
||||
lmem_switch_.at(i) = LocalMemSwitch::Create(sname, 1);
|
||||
}
|
||||
|
||||
// create lsu dcache adapter
|
||||
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
|
||||
snprintf(sname, 100, "core%d-lsu_dcache_adapter%d", core_id, i);
|
||||
snprintf(sname, 100, "%s-lsu_dcache_adapter%d", this->name().c_str(), i);
|
||||
lsu_dcache_adapter_.at(i) = LsuMemAdapter::Create(sname, DCACHE_CHANNELS, 1);
|
||||
}
|
||||
|
||||
// create lsu lmem adapter
|
||||
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
|
||||
snprintf(sname, 100, "core%d-lsu_lmem_adapter%d", core_id, i);
|
||||
snprintf(sname, 100, "%s-lsu_lmem_adapter%d", this->name().c_str(), i);
|
||||
lsu_lmem_adapter_.at(i) = LsuMemAdapter::Create(sname, LSU_CHANNELS, 1);
|
||||
}
|
||||
|
||||
// connect lsu demux
|
||||
for (uint32_t b = 0; b < NUM_LSU_BLOCKS; ++b) {
|
||||
lsu_demux_.at(b)->ReqDC.bind(&mem_coalescers_.at(b)->ReqIn);
|
||||
mem_coalescers_.at(b)->RspIn.bind(&lsu_demux_.at(b)->RspDC);
|
||||
lmem_switch_.at(b)->ReqDC.bind(&mem_coalescers_.at(b)->ReqIn);
|
||||
mem_coalescers_.at(b)->RspIn.bind(&lmem_switch_.at(b)->RspDC);
|
||||
|
||||
lsu_demux_.at(b)->ReqLmem.bind(&lsu_lmem_adapter_.at(b)->ReqIn);
|
||||
lsu_lmem_adapter_.at(b)->RspIn.bind(&lsu_demux_.at(b)->RspLmem);
|
||||
lmem_switch_.at(b)->ReqLmem.bind(&lsu_lmem_adapter_.at(b)->ReqIn);
|
||||
lsu_lmem_adapter_.at(b)->RspIn.bind(&lmem_switch_.at(b)->RspLmem);
|
||||
}
|
||||
|
||||
// connect coalescer-adapter
|
||||
|
@ -130,7 +130,7 @@ Core::Core(const SimContext& ctx,
|
|||
dispatchers_.at((int)FUType::LSU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_LSU_BLOCKS, NUM_LSU_LANES);
|
||||
dispatchers_.at((int)FUType::SFU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_SFU_BLOCKS, NUM_SFU_LANES);
|
||||
dispatchers_.at((int)FUType::TCU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_TCU_BLOCKS, NUM_TCU_LANES);
|
||||
|
||||
|
||||
// initialize execute units
|
||||
func_units_.at((int)FUType::ALU) = SimPlatform::instance().create_object<AluUnit>(this);
|
||||
func_units_.at((int)FUType::FPU) = SimPlatform::instance().create_object<FpuUnit>(this);
|
||||
|
@ -140,8 +140,8 @@ Core::Core(const SimContext& ctx,
|
|||
|
||||
// bind commit arbiters
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
snprintf(sname, 100, "core%d-commit-arb%d", core_id, i);
|
||||
auto arbiter = TraceSwitch::Create(sname, ArbiterType::RoundRobin, (uint32_t)FUType::Count, 1);
|
||||
snprintf(sname, 100, "%s-commit-arb%d", this->name().c_str(), i);
|
||||
auto arbiter = TraceArbiter::Create(sname, ArbiterType::RoundRobin, (uint32_t)FUType::Count, 1);
|
||||
for (uint32_t j = 0; j < (uint32_t)FUType::Count; ++j) {
|
||||
func_units_.at(j)->Outputs.at(i).bind(&arbiter->Inputs.at(j));
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ class Socket;
|
|||
class Arch;
|
||||
class DCRS;
|
||||
|
||||
using TraceSwitch = Mux<instr_trace_t*>;
|
||||
using TraceArbiter = Arbiter<instr_trace_t*>;
|
||||
|
||||
class Core : public SimObject<Core> {
|
||||
public:
|
||||
|
@ -154,7 +154,7 @@ private:
|
|||
std::vector<Dispatcher::Ptr> dispatchers_;
|
||||
std::vector<FuncUnit::Ptr> func_units_;
|
||||
LocalMem::Ptr local_mem_;
|
||||
std::vector<LocalMemDemux::Ptr> lsu_demux_;
|
||||
std::vector<LocalMemSwitch::Ptr> lmem_switch_;
|
||||
std::vector<MemCoalescer::Ptr> mem_coalescers_;
|
||||
std::vector<LsuMemAdapter::Ptr> lsu_dcache_adapter_;
|
||||
std::vector<LsuMemAdapter::Ptr> lsu_lmem_adapter_;
|
||||
|
@ -169,7 +169,7 @@ private:
|
|||
|
||||
PerfStats perf_stats_;
|
||||
|
||||
std::vector<TraceSwitch::Ptr> commit_arbs_;
|
||||
std::vector<TraceArbiter::Ptr> commit_arbs_;
|
||||
|
||||
uint32_t commit_exe_;
|
||||
uint32_t ibuffer_idx_;
|
||||
|
|
|
@ -1421,7 +1421,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
std::abort();
|
||||
}
|
||||
} break;
|
||||
case Opcode::TCU:
|
||||
case Opcode::TCU:
|
||||
{ //TODO - make it data-type flexible
|
||||
uint32_t mem_bytes = 1;
|
||||
DP(3, "mem_bytes=" << mem_bytes << std::endl);
|
||||
|
@ -1443,7 +1443,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
|
||||
//LOAD
|
||||
if(num_threads > tc_size*tc_size*n_tiles*TC_per_warp)
|
||||
{
|
||||
{
|
||||
num_threads_actv = tc_size*tc_size*n_tiles*TC_per_warp;
|
||||
num_data_per_thread = 1;
|
||||
}
|
||||
|
@ -1456,7 +1456,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
|
||||
//STORE
|
||||
if(num_threads > tc_size*tc_size*TC_per_warp)
|
||||
{
|
||||
{
|
||||
num_threads_actv_st = tc_size*tc_size*TC_per_warp;
|
||||
num_data_per_thread_st = 1;
|
||||
}
|
||||
|
@ -1466,30 +1466,30 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
num_data_per_thread_st = (tc_size*tc_size)/num_threads_per_tc;
|
||||
}
|
||||
data_bytes_store = mem_bytes*num_data_per_thread_st;
|
||||
|
||||
|
||||
DP(3, "Num Tiles=" << n_tiles << std::endl);
|
||||
|
||||
|
||||
switch (func3) {
|
||||
case 0:
|
||||
{ //Matrix Load
|
||||
case 0:
|
||||
{ //Matrix Load
|
||||
|
||||
DP (4, "TCU LOAD");
|
||||
trace->fu_type = FUType::LSU;
|
||||
trace->lsu_type = LsuType::TCU_LOAD;
|
||||
|
||||
|
||||
trace->src_regs[0] = {RegType::Integer, rsrc0};
|
||||
auto trace_data = std::make_shared<LsuTraceData>(num_threads);
|
||||
trace->data = trace_data;
|
||||
|
||||
for (uint32_t t = thread_start; t < num_threads_actv; ++t)
|
||||
|
||||
for (uint32_t t = thread_start; t < num_threads_actv; ++t)
|
||||
{
|
||||
if (!warp.tmask.test(t))
|
||||
continue;
|
||||
DP(3, "Thread ID" << t);
|
||||
DP(3, "Thread ID" << t);
|
||||
|
||||
uint32_t base_addr = rsdata[t][0].i ;
|
||||
trace_data->mem_addrs.at(t) = {base_addr, data_bytes_load};
|
||||
|
||||
|
||||
//Load A or B (depends on immsrc)
|
||||
int loop_offset = 0;
|
||||
DP(3, "n_tiles = " << n_tiles << "; num_data_per_thread = " << num_data_per_thread <<std::endl);
|
||||
|
@ -1502,10 +1502,10 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
DP(3, "Scratchpad Index: " << loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n << ", Value: " << scratchpad[loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n]);
|
||||
}
|
||||
}
|
||||
rd_write = true;
|
||||
rd_write = true;
|
||||
} break;
|
||||
case 1:
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
DP(4, "TCU STORE");
|
||||
trace->fu_type = FUType::LSU;
|
||||
trace->lsu_type = LsuType::TCU_STORE;
|
||||
|
@ -1513,12 +1513,12 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
auto trace_data = std::make_shared<LsuTraceData>(num_threads);
|
||||
trace->data = trace_data;
|
||||
|
||||
for (uint32_t t = thread_start; t < num_threads_actv_st; ++t)
|
||||
for (uint32_t t = thread_start; t < num_threads_actv_st; ++t)
|
||||
{
|
||||
if (!warp.tmask.test(t))
|
||||
continue;
|
||||
|
||||
DP(3, "Thread ID" << t);
|
||||
DP(3, "Thread ID" << t);
|
||||
uint32_t base_addr = rsdata[t][0].i ;
|
||||
|
||||
trace_data->mem_addrs.at(t) = {base_addr, data_bytes_store};
|
||||
|
@ -1529,7 +1529,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
Word* temp_ref = &(warp.ireg_file.at(t).at(rsrc0));
|
||||
*temp_ref = scratchpad[(n_tiles*tc_size*tc_size*2) + (t*num_data_per_thread_st) + n];
|
||||
|
||||
this->dcache_write(temp_ref, base_addr+(n*mem_bytes), mem_bytes);
|
||||
this->dcache_write(temp_ref, base_addr+(n*mem_bytes), mem_bytes);
|
||||
}
|
||||
}
|
||||
//Clear the scratchpad
|
||||
|
@ -1539,18 +1539,18 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
}
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
case 2:
|
||||
{ //Matrix Multiply
|
||||
DP(4, "TCU MULTIPLY MAT");
|
||||
trace->fu_type = FUType::TCU;
|
||||
trace->tcu_type = TCUType::TCU_MUL;
|
||||
uint32_t threads_per_tc = MAX (1, num_threads/TC_per_warp);
|
||||
for (uint32_t t = thread_start; t < num_threads_actv; ++t)
|
||||
for (uint32_t t = thread_start; t < num_threads_actv; ++t)
|
||||
{
|
||||
if (!warp.tmask.test(t))
|
||||
continue;
|
||||
|
||||
DP(3, "Thread ID" << t);
|
||||
|
||||
DP(3, "Thread ID" << t);
|
||||
//TC operation [only 1 thread in 1 warp needs to do this]
|
||||
if (t%threads_per_tc == 0)
|
||||
{
|
||||
|
@ -1563,7 +1563,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
int offset_b = n_tiles*n_tiles*n_tiles*tc_size*tc_size;
|
||||
uint32_t accu_offset = (n_tiles)*(n_tiles)*(n_tiles)*tc_size*tc_size*2;
|
||||
for(int tiles = 0 ; tiles < n_tiles ; tiles++) //What's the HW implication of this?? A counter implementation?
|
||||
{
|
||||
{
|
||||
for (int i = 0; i < tc_size; i++) { //ROW-1
|
||||
for (int j = 0; j < tc_size; j++) { //COL-2
|
||||
int sum = 0;
|
||||
|
|
|
@ -116,12 +116,12 @@ void LsuUnit::tick() {
|
|||
|
||||
// handle memory responses
|
||||
for (uint32_t b = 0; b < NUM_LSU_BLOCKS; ++b) {
|
||||
auto& lsu_rsp_port = core_->lsu_demux_.at(b)->RspIn;
|
||||
auto& lsu_rsp_port = core_->lmem_switch_.at(b)->RspIn;
|
||||
if (lsu_rsp_port.empty())
|
||||
continue;
|
||||
auto& state = states_.at(b);
|
||||
auto& lsu_rsp = lsu_rsp_port.front();
|
||||
DT(3, this->name() << " mem-rsp: " << lsu_rsp);
|
||||
DT(3, this->name() << "-mem-rsp: " << lsu_rsp);
|
||||
auto& entry = state.pending_rd_reqs.at(lsu_rsp.tag);
|
||||
auto trace = entry.trace;
|
||||
assert(!entry.mask.none());
|
||||
|
@ -146,7 +146,7 @@ void LsuUnit::tick() {
|
|||
continue;
|
||||
Outputs.at(iw).push(state.fence_trace, 1);
|
||||
state.fence_lock = false;
|
||||
DT(3, this->name() << " fence-unlock: " << state.fence_trace);
|
||||
DT(3, this->name() << "-fence-unlock: " << state.fence_trace);
|
||||
}
|
||||
|
||||
// check input queue
|
||||
|
@ -160,7 +160,7 @@ void LsuUnit::tick() {
|
|||
// schedule fence lock
|
||||
state.fence_trace = trace;
|
||||
state.fence_lock = true;
|
||||
DT(3, this->name() << " fence-lock: " << *trace);
|
||||
DT(3, this->name() << "-fence-lock: " << *trace);
|
||||
// remove input
|
||||
input.pop();
|
||||
continue;
|
||||
|
@ -171,7 +171,7 @@ void LsuUnit::tick() {
|
|||
// check pending queue capacity
|
||||
if (!is_write && state.pending_rd_reqs.full()) {
|
||||
if (!trace->log_once(true)) {
|
||||
DT(4, "*** " << this->name() << " queue-full: " << *trace);
|
||||
DT(4, "*** " << this->name() << "-queue-full: " << *trace);
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
|
@ -201,8 +201,8 @@ void LsuUnit::tick() {
|
|||
lsu_req.uuid = trace->uuid;
|
||||
|
||||
// send memory request
|
||||
core_->lsu_demux_.at(block_idx)->ReqIn.push(lsu_req);
|
||||
DT(3, this->name() << " mem-req: " << lsu_req);
|
||||
core_->lmem_switch_.at(block_idx)->ReqIn.push(lsu_req);
|
||||
DT(3, this->name() << "-mem-req: " << lsu_req);
|
||||
|
||||
// update stats
|
||||
auto num_addrs = lsu_req.mask.count();
|
||||
|
@ -237,7 +237,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
|
|||
{
|
||||
req_per_thread= (1>(trace_data->mem_addrs.at(0).size)/4)? 1: ((trace_data->mem_addrs.at(0).size)/4);
|
||||
}
|
||||
|
||||
|
||||
auto t0 = trace->pid * NUM_LSU_LANES;
|
||||
|
||||
for (uint32_t i = 0; i < NUM_LSU_LANES; ++i) {
|
||||
|
@ -246,11 +246,11 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
|
|||
continue;
|
||||
|
||||
int req_idx = block_idx * LSU_CHANNELS + (i % LSU_CHANNELS);
|
||||
auto& dcache_req_port = core_->lsu_demux_.at(req_idx)->ReqIn;
|
||||
auto& dcache_req_port = core_->lmem_switch_.at(req_idx)->ReqIn;
|
||||
|
||||
auto mem_addr = trace_data->mem_addrs.at(t);
|
||||
auto type = get_addr_type(mem_addr.addr);
|
||||
// DT(3, "addr_type = " << type << ", " << *trace);
|
||||
// DT(3, "addr_type = " << type << ", " << *trace);
|
||||
uint32_t mem_bytes = 1;
|
||||
for (int i = 0; i < req_per_thread; i++)
|
||||
{
|
||||
|
@ -261,7 +261,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
|
|||
mem_req.tag = tag;
|
||||
mem_req.cid = trace->cid;
|
||||
mem_req.uuid = trace->uuid;
|
||||
|
||||
|
||||
dcache_req_port.push(mem_req, 1);
|
||||
DT(3, "mem-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << tag
|
||||
<< ", lsu_type=" << trace->lsu_type << ", rid=" << req_idx << ", addr_type=" << mem_req.type << ", " << *trace);
|
||||
|
@ -272,7 +272,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
|
|||
++core_->perf_stats_.loads;
|
||||
++pending_loads_;
|
||||
}
|
||||
|
||||
|
||||
++count;
|
||||
}
|
||||
}
|
||||
|
@ -282,7 +282,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TcuUnit::TcuUnit(const SimContext& ctx, Core* core)
|
||||
TcuUnit::TcuUnit(const SimContext& ctx, Core* core)
|
||||
: FuncUnit(ctx, core, "TCU")
|
||||
{}
|
||||
|
||||
|
@ -290,7 +290,7 @@ void TcuUnit::tick() {
|
|||
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
auto& input = Inputs.at(i);
|
||||
if (input.empty())
|
||||
if (input.empty())
|
||||
continue;
|
||||
auto& output = Outputs.at(i);
|
||||
auto trace = input.front();
|
||||
|
@ -307,7 +307,7 @@ void TcuUnit::tick() {
|
|||
}
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
DT(3, "pipeline-execute: op=" << trace->tcu_type << ", " << *trace);
|
||||
input.pop();
|
||||
}
|
||||
|
|
|
@ -24,9 +24,8 @@ protected:
|
|||
LocalMem* simobject_;
|
||||
Config config_;
|
||||
RAM ram_;
|
||||
int32_t bank_sel_addr_start_;
|
||||
int32_t bank_sel_addr_end_;
|
||||
PerfStats perf_stats_;
|
||||
MemCrossBar::Ptr mem_xbar_;
|
||||
mutable PerfStats perf_stats_;
|
||||
|
||||
uint64_t to_local_addr(uint64_t addr) {
|
||||
uint32_t total_lines = config_.capacity / config_.line_size;
|
||||
|
@ -40,9 +39,15 @@ public:
|
|||
: simobject_(simobject)
|
||||
, config_(config)
|
||||
, ram_(config.capacity)
|
||||
, bank_sel_addr_start_(0)
|
||||
, bank_sel_addr_end_(config.B-1)
|
||||
{}
|
||||
{
|
||||
char sname[100];
|
||||
snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
|
||||
mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::RoundRobin, config.num_reqs, (1 << config.B));
|
||||
for (uint32_t i = 0; i < config.num_reqs; ++i) {
|
||||
simobject->Inputs.at(i).bind(&mem_xbar_->ReqIn.at(i));
|
||||
mem_xbar_->RspIn.at(i).bind(&simobject->Outputs.at(i));
|
||||
}
|
||||
}
|
||||
|
||||
virtual ~Impl() {}
|
||||
|
||||
|
@ -63,45 +68,33 @@ public:
|
|||
}
|
||||
|
||||
void tick() {
|
||||
std::vector<bool> in_used_banks(1 << config_.B);
|
||||
for (uint32_t req_id = 0; req_id < config_.num_reqs; ++req_id) {
|
||||
auto& core_req_port = simobject_->Inputs.at(req_id);
|
||||
if (core_req_port.empty())
|
||||
// process bank requets from xbar
|
||||
uint32_t num_banks = (1 << config_.B);
|
||||
for (uint32_t i = 0; i < num_banks; ++i) {
|
||||
auto& xbar_req_out = mem_xbar_->ReqOut.at(i);
|
||||
if (xbar_req_out.empty())
|
||||
continue;
|
||||
|
||||
auto& core_req = core_req_port.front();
|
||||
auto& bank_req = xbar_req_out.front();
|
||||
DT(4, simobject_->name() << "-bank" << i << "-req : " << bank_req);
|
||||
|
||||
uint32_t bank_id = 0;
|
||||
if (bank_sel_addr_end_ >= bank_sel_addr_start_) {
|
||||
bank_id = (uint32_t)bit_getw(core_req.addr, bank_sel_addr_start_, bank_sel_addr_end_);
|
||||
}
|
||||
|
||||
// bank conflict check
|
||||
if (in_used_banks.at(bank_id)) {
|
||||
++perf_stats_.bank_stalls;
|
||||
continue;
|
||||
}
|
||||
|
||||
DT(4, simobject_->name() << " mem-req" << req_id << ": "<< core_req);
|
||||
|
||||
in_used_banks.at(bank_id) = true;
|
||||
|
||||
if (!core_req.write || config_.write_reponse) {
|
||||
// send response
|
||||
MemRsp core_rsp{core_req.tag, core_req.cid, core_req.uuid};
|
||||
simobject_->Outputs.at(req_id).push(core_rsp, 1);
|
||||
if (!bank_req.write || config_.write_reponse) {
|
||||
// send xbar response
|
||||
MemRsp bank_rsp{bank_req.tag, bank_req.cid, bank_req.uuid};
|
||||
mem_xbar_->RspOut.at(i).push(bank_rsp, 1);
|
||||
}
|
||||
|
||||
// update perf counters
|
||||
perf_stats_.reads += !core_req.write;
|
||||
perf_stats_.writes += core_req.write;
|
||||
perf_stats_.reads += !bank_req.write;
|
||||
perf_stats_.writes += bank_req.write;
|
||||
|
||||
// remove input
|
||||
core_req_port.pop();
|
||||
xbar_req_out.pop();
|
||||
}
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
perf_stats_.bank_stalls = mem_xbar_->collisions();
|
||||
return perf_stats_;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -42,10 +42,10 @@ void MemCoalescer::reset() {
|
|||
}
|
||||
|
||||
void MemCoalescer::tick() {
|
||||
// process incoming responses
|
||||
// process outgoing responses
|
||||
if (!RspOut.empty()) {
|
||||
auto& out_rsp = RspOut.front();
|
||||
DT(4, this->name() << " mem-rsp: " << out_rsp);
|
||||
DT(4, this->name() << "-mem-rsp: " << out_rsp);
|
||||
auto& entry = pending_rd_reqs_.at(out_rsp.tag);
|
||||
|
||||
BitVector<> rsp_mask(input_size_);
|
||||
|
@ -89,7 +89,7 @@ void MemCoalescer::tick() {
|
|||
|
||||
// ensure we can allocate a response tag
|
||||
if (pending_rd_reqs_.full()) {
|
||||
DT(4, "*** " << this->name() << " queue-full: " << in_req);
|
||||
DT(4, "*** " << this->name() << "-queue-full: " << in_req);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -145,7 +145,7 @@ void MemCoalescer::tick() {
|
|||
|
||||
// send memory request
|
||||
ReqOut.push(out_req, delay_);
|
||||
DT(4, this->name() << " mem-req: coalesced=" << cur_mask.count() << ", " << out_req);
|
||||
DT(4, this->name() << "-mem-req: coalesced=" << cur_mask.count() << ", " << out_req);
|
||||
|
||||
// update sent mask
|
||||
sent_mask_ |= cur_mask;
|
||||
|
|
|
@ -27,13 +27,14 @@ class MemSim::Impl {
|
|||
private:
|
||||
MemSim* simobject_;
|
||||
Config config_;
|
||||
MemCrossBar::Ptr mem_xbar_;
|
||||
DramSim dram_sim_;
|
||||
PerfStats perf_stats_;
|
||||
|
||||
struct DramCallbackArgs {
|
||||
MemSim* simobject;
|
||||
MemReq request;
|
||||
uint32_t i;
|
||||
MemSim::Impl* memsim;
|
||||
MemReq request;
|
||||
uint32_t bank_id;
|
||||
};
|
||||
|
||||
public:
|
||||
|
@ -41,7 +42,15 @@ public:
|
|||
: simobject_(simobject)
|
||||
, config_(config)
|
||||
, dram_sim_(MEM_CLOCK_RATIO)
|
||||
{}
|
||||
{
|
||||
char sname[100];
|
||||
snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
|
||||
mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::RoundRobin, config.num_ports, config.num_banks);
|
||||
for (uint32_t i = 0; i < config.num_ports; ++i) {
|
||||
simobject->MemReqPorts.at(i).bind(&mem_xbar_->ReqIn.at(i));
|
||||
mem_xbar_->RspIn.at(i).bind(&simobject->MemRspPorts.at(i));
|
||||
}
|
||||
}
|
||||
|
||||
~Impl() {
|
||||
//--
|
||||
|
@ -59,14 +68,14 @@ public:
|
|||
dram_sim_.tick();
|
||||
uint32_t counter = 0;
|
||||
|
||||
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
|
||||
if (simobject_->MemReqPorts.at(i).empty())
|
||||
for (uint32_t i = 0; i < config_.num_banks; ++i) {
|
||||
if (mem_xbar_->ReqOut.at(i).empty())
|
||||
continue;
|
||||
|
||||
auto& mem_req = simobject_->MemReqPorts.at(i).front();
|
||||
auto& mem_req = mem_xbar_->ReqOut.at(i).front();
|
||||
|
||||
// try to enqueue the request to the memory system
|
||||
auto req_args = new DramCallbackArgs{simobject_, mem_req, i};
|
||||
auto req_args = new DramCallbackArgs{this, mem_req, i};
|
||||
auto enqueue_success = dram_sim_.send_request(
|
||||
mem_req.write,
|
||||
mem_req.addr,
|
||||
|
@ -76,8 +85,8 @@ public:
|
|||
// only send a response for read requests
|
||||
if (!rsp_args->request.write) {
|
||||
MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid};
|
||||
rsp_args->simobject->MemRspPorts.at(rsp_args->i).push(mem_rsp, 1);
|
||||
DT(3, rsp_args->simobject->name() << " mem-rsp: bank=" << rsp_args->i << ", " << mem_rsp);
|
||||
rsp_args->memsim->mem_xbar_->RspOut.at(rsp_args->bank_id).push(mem_rsp, 1);
|
||||
DT(3, rsp_args->memsim->simobject_->name() << "-mem-rsp: bank=" << rsp_args->bank_id << ", " << mem_rsp);
|
||||
}
|
||||
delete rsp_args;
|
||||
},
|
||||
|
@ -90,9 +99,9 @@ public:
|
|||
continue;
|
||||
}
|
||||
|
||||
DT(3, simobject_->name() << " mem-req: bank=" << i << ", " << mem_req);
|
||||
DT(3, simobject_->name() << "-mem-req: bank=" << i << ", " << mem_req);
|
||||
|
||||
simobject_->MemReqPorts.at(i).pop();
|
||||
mem_xbar_->ReqOut.at(i).pop();
|
||||
counter++;
|
||||
}
|
||||
|
||||
|
@ -107,8 +116,8 @@ public:
|
|||
|
||||
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
|
||||
: SimObject<MemSim>(ctx, name)
|
||||
, MemReqPorts(NUM_MEM_PORTS, this)
|
||||
, MemRspPorts(NUM_MEM_PORTS, this)
|
||||
, MemReqPorts(config.num_ports, this)
|
||||
, MemRspPorts(config.num_ports, this)
|
||||
, impl_(new Impl(this, config))
|
||||
{}
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -21,15 +21,15 @@ namespace vortex {
|
|||
class MemSim : public SimObject<MemSim>{
|
||||
public:
|
||||
struct Config {
|
||||
uint32_t channels;
|
||||
uint32_t num_cores;
|
||||
uint32_t num_banks;
|
||||
uint32_t num_ports;
|
||||
};
|
||||
|
||||
struct PerfStats {
|
||||
uint64_t counter;
|
||||
uint64_t ticks;
|
||||
|
||||
PerfStats()
|
||||
PerfStats()
|
||||
: counter(0)
|
||||
, ticks(0)
|
||||
{}
|
||||
|
@ -52,7 +52,7 @@ public:
|
|||
void tick();
|
||||
|
||||
const PerfStats& perf_stats() const;
|
||||
|
||||
|
||||
private:
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
|
|
|
@ -24,10 +24,15 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||
|
||||
// create memory simulator
|
||||
memsim_ = MemSim::Create("dram", MemSim::Config{
|
||||
MEMORY_BANKS,
|
||||
uint32_t(arch.num_cores()) * arch.num_clusters()
|
||||
PLATFORM_MEMORY_BANKS,
|
||||
L3_MEM_PORTS
|
||||
});
|
||||
|
||||
// create clusters
|
||||
for (uint32_t i = 0; i < arch.num_clusters(); ++i) {
|
||||
clusters_.at(i) = Cluster::Create(i, this, arch, dcrs_);
|
||||
}
|
||||
|
||||
// create L3 cache
|
||||
l3cache_ = CacheSim::Create("l3cache", CacheSim::Config{
|
||||
!L3_ENABLED,
|
||||
|
@ -38,7 +43,8 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||
log2ceil(L3_NUM_BANKS), // B
|
||||
XLEN, // address bits
|
||||
1, // number of ports
|
||||
uint8_t(arch.num_clusters()), // request size
|
||||
L3_NUM_REQS, // request size
|
||||
L3_MEM_PORTS, // memory ports
|
||||
L3_WRITEBACK, // write-back
|
||||
false, // write response
|
||||
L3_MSHR_SIZE, // mshr size
|
||||
|
@ -46,26 +52,26 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||
}
|
||||
);
|
||||
|
||||
// connect L3 memory ports
|
||||
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
|
||||
// connect L3 core interfaces
|
||||
for (uint32_t i = 0; i < arch.num_clusters(); ++i) {
|
||||
for (uint32_t j = 0; j < L2_MEM_PORTS; ++j) {
|
||||
clusters_.at(i)->mem_req_ports.at(j).bind(&l3cache_->CoreReqPorts.at(i * L2_MEM_PORTS + j));
|
||||
l3cache_->CoreRspPorts.at(i * L2_MEM_PORTS + j).bind(&clusters_.at(i)->mem_rsp_ports.at(j));
|
||||
}
|
||||
}
|
||||
|
||||
// connect L3 memory interfaces
|
||||
for (uint32_t i = 0; i < L3_MEM_PORTS; ++i) {
|
||||
l3cache_->MemReqPorts.at(i).bind(&memsim_->MemReqPorts.at(i));
|
||||
memsim_->MemRspPorts.at(i).bind(&l3cache_->MemRspPorts.at(i));
|
||||
}
|
||||
|
||||
// create clusters
|
||||
for (uint32_t i = 0; i < arch.num_clusters(); ++i) {
|
||||
clusters_.at(i) = Cluster::Create(i, this, arch, dcrs_);
|
||||
// connect L3 core ports
|
||||
clusters_.at(i)->mem_req_port.bind(&l3cache_->CoreReqPorts.at(i));
|
||||
l3cache_->CoreRspPorts.at(i).bind(&clusters_.at(i)->mem_rsp_port);
|
||||
}
|
||||
|
||||
// set up memory profiling
|
||||
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
|
||||
for (uint32_t i = 0; i < L3_MEM_PORTS; ++i) {
|
||||
memsim_->MemReqPorts.at(i).tx_callback([&](const MemReq& req, uint64_t cycle){
|
||||
__unused (cycle);
|
||||
perf_mem_reads_ += !req.write;
|
||||
perf_mem_writes_ += req.write;
|
||||
perf_mem_reads_ += !req.write;
|
||||
perf_mem_writes_ += req.write;
|
||||
perf_mem_pending_reads_ += !req.write;
|
||||
});
|
||||
memsim_->MemRspPorts.at(i).tx_callback([&](const MemRsp&, uint64_t cycle){
|
||||
|
|
|
@ -21,11 +21,9 @@ Socket::Socket(const SimContext& ctx,
|
|||
Cluster* cluster,
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs)
|
||||
: SimObject(ctx, "socket")
|
||||
, icache_mem_req_port(this)
|
||||
, icache_mem_rsp_port(this)
|
||||
, dcache_mem_req_port(this)
|
||||
, dcache_mem_rsp_port(this)
|
||||
: SimObject(ctx, StrFormat("socket%d", socket_id))
|
||||
, mem_req_ports(L1_MEM_PORTS, this)
|
||||
, mem_rsp_ports(L1_MEM_PORTS, this)
|
||||
, socket_id_(socket_id)
|
||||
, cluster_(cluster)
|
||||
, cores_(arch.socket_size())
|
||||
|
@ -33,8 +31,8 @@ Socket::Socket(const SimContext& ctx,
|
|||
auto cores_per_socket = cores_.size();
|
||||
|
||||
char sname[100];
|
||||
snprintf(sname, 100, "socket%d-icaches", socket_id);
|
||||
icaches_ = CacheCluster::Create(sname, cores_per_socket, NUM_ICACHES, 1, CacheSim::Config{
|
||||
snprintf(sname, 100, "%s-icaches", this->name().c_str());
|
||||
icaches_ = CacheCluster::Create(sname, cores_per_socket, NUM_ICACHES, CacheSim::Config{
|
||||
!ICACHE_ENABLED,
|
||||
log2ceil(ICACHE_SIZE), // C
|
||||
log2ceil(L1_LINE_SIZE), // L
|
||||
|
@ -44,17 +42,15 @@ Socket::Socket(const SimContext& ctx,
|
|||
XLEN, // address bits
|
||||
1, // number of ports
|
||||
1, // number of inputs
|
||||
1, // memory ports
|
||||
false, // write-back
|
||||
false, // write response
|
||||
(uint8_t)arch.num_warps(), // mshr size
|
||||
2, // pipeline latency
|
||||
});
|
||||
|
||||
icaches_->MemReqPort.bind(&icache_mem_req_port);
|
||||
icache_mem_rsp_port.bind(&icaches_->MemRspPort);
|
||||
|
||||
snprintf(sname, 100, "socket%d-dcaches", socket_id);
|
||||
dcaches_ = CacheCluster::Create(sname, cores_per_socket, NUM_DCACHES, DCACHE_NUM_REQS, CacheSim::Config{
|
||||
snprintf(sname, 100, "%s-dcaches", this->name().c_str());
|
||||
dcaches_ = CacheCluster::Create(sname, cores_per_socket, NUM_DCACHES, CacheSim::Config{
|
||||
!DCACHE_ENABLED,
|
||||
log2ceil(DCACHE_SIZE), // C
|
||||
log2ceil(L1_LINE_SIZE), // L
|
||||
|
@ -64,21 +60,41 @@ Socket::Socket(const SimContext& ctx,
|
|||
XLEN, // address bits
|
||||
1, // number of ports
|
||||
DCACHE_NUM_REQS, // number of inputs
|
||||
L1_MEM_PORTS, // memory ports
|
||||
DCACHE_WRITEBACK, // write-back
|
||||
false, // write response
|
||||
DCACHE_MSHR_SIZE, // mshr size
|
||||
2, // pipeline latency
|
||||
});
|
||||
|
||||
dcaches_->MemReqPort.bind(&dcache_mem_req_port);
|
||||
dcache_mem_rsp_port.bind(&dcaches_->MemRspPort);
|
||||
// connect l1 caches to outgoing memory interfaces
|
||||
for (uint32_t i = 0; i < L1_MEM_PORTS; ++i) {
|
||||
if (i == 0) {
|
||||
snprintf(sname, 100, "%s-l1_arb%d", this->name().c_str(), i);
|
||||
auto l1_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, 2, 1);
|
||||
|
||||
icaches_->MemReqPorts.at(0).bind(&l1_arb->ReqIn.at(1));
|
||||
l1_arb->RspIn.at(1).bind(&icaches_->MemRspPorts.at(0));
|
||||
|
||||
dcaches_->MemReqPorts.at(0).bind(&l1_arb->ReqIn.at(0));
|
||||
l1_arb->RspIn.at(0).bind(&dcaches_->MemRspPorts.at(0));
|
||||
|
||||
l1_arb->ReqOut.at(0).bind(&this->mem_req_ports.at(0));
|
||||
this->mem_rsp_ports.at(0).bind(&l1_arb->RspOut.at(0));
|
||||
} else {
|
||||
dcaches_->MemReqPorts.at(i).bind(&this->mem_req_ports.at(i));
|
||||
this->mem_rsp_ports.at(i).bind(&dcaches_->MemRspPorts.at(i));
|
||||
}
|
||||
}
|
||||
|
||||
// create cores
|
||||
|
||||
for (uint32_t i = 0; i < cores_per_socket; ++i) {
|
||||
uint32_t core_id = socket_id * cores_per_socket + i;
|
||||
cores_.at(i) = Core::Create(core_id, this, arch, dcrs);
|
||||
}
|
||||
|
||||
// connect cores to caches
|
||||
for (uint32_t i = 0; i < cores_per_socket; ++i) {
|
||||
cores_.at(i)->icache_req_ports.at(0).bind(&icaches_->CoreReqPorts.at(i).at(0));
|
||||
icaches_->CoreRspPorts.at(i).at(0).bind(&cores_.at(i)->icache_rsp_ports.at(0));
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -32,16 +32,13 @@ public:
|
|||
CacheSim::PerfStats dcache;
|
||||
};
|
||||
|
||||
SimPort<MemReq> icache_mem_req_port;
|
||||
SimPort<MemRsp> icache_mem_rsp_port;
|
||||
std::vector<SimPort<MemReq>> mem_req_ports;
|
||||
std::vector<SimPort<MemRsp>> mem_rsp_ports;
|
||||
|
||||
SimPort<MemReq> dcache_mem_req_port;
|
||||
SimPort<MemRsp> dcache_mem_rsp_port;
|
||||
|
||||
Socket(const SimContext& ctx,
|
||||
Socket(const SimContext& ctx,
|
||||
uint32_t socket_id,
|
||||
Cluster* cluster,
|
||||
const Arch &arch,
|
||||
Cluster* cluster,
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs);
|
||||
|
||||
~Socket();
|
||||
|
@ -66,14 +63,14 @@ public:
|
|||
|
||||
bool running() const;
|
||||
|
||||
int get_exitcode() const;
|
||||
int get_exitcode() const;
|
||||
|
||||
void barrier(uint32_t bar_id, uint32_t count, uint32_t core_id);
|
||||
|
||||
void resume(uint32_t core_id);
|
||||
|
||||
PerfStats perf_stats() const;
|
||||
|
||||
|
||||
private:
|
||||
uint32_t socket_id_;
|
||||
Cluster* cluster_;
|
||||
|
|
|
@ -15,11 +15,11 @@
|
|||
|
||||
using namespace vortex;
|
||||
|
||||
LocalMemDemux::LocalMemDemux(
|
||||
LocalMemSwitch::LocalMemSwitch(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t delay
|
||||
) : SimObject<LocalMemDemux>(ctx, name)
|
||||
) : SimObject<LocalMemSwitch>(ctx, name)
|
||||
, ReqIn(this)
|
||||
, RspIn(this)
|
||||
, ReqLmem(this)
|
||||
|
@ -29,19 +29,19 @@ LocalMemDemux::LocalMemDemux(
|
|||
, delay_(delay)
|
||||
{}
|
||||
|
||||
void LocalMemDemux::reset() {}
|
||||
void LocalMemSwitch::reset() {}
|
||||
|
||||
void LocalMemDemux::tick() {
|
||||
// process incoming responses
|
||||
void LocalMemSwitch::tick() {
|
||||
// process outgoing responses
|
||||
if (!RspLmem.empty()) {
|
||||
auto& out_rsp = RspLmem.front();
|
||||
DT(4, this->name() << " lmem-rsp: " << out_rsp);
|
||||
DT(4, this->name() << "-lmem-rsp: " << out_rsp);
|
||||
RspIn.push(out_rsp, 1);
|
||||
RspLmem.pop();
|
||||
}
|
||||
if (!RspDC.empty()) {
|
||||
auto& out_rsp = RspDC.front();
|
||||
DT(4, this->name() << " dc-rsp: " << out_rsp);
|
||||
DT(4, this->name() << "-dc-rsp: " << out_rsp);
|
||||
RspIn.push(out_rsp, 1);
|
||||
RspDC.pop();
|
||||
}
|
||||
|
@ -73,12 +73,12 @@ void LocalMemDemux::tick() {
|
|||
|
||||
if (!out_dc_req.mask.none()) {
|
||||
ReqDC.push(out_dc_req, delay_);
|
||||
DT(4, this->name() << " dc-req: " << out_dc_req);
|
||||
DT(4, this->name() << "-dc-req: " << out_dc_req);
|
||||
}
|
||||
|
||||
if (!out_lmem_req.mask.none()) {
|
||||
ReqLmem.push(out_lmem_req, delay_);
|
||||
DT(4, this->name() << " lmem-req: " << out_lmem_req);
|
||||
DT(4, this->name() << "-lmem-req: " << out_lmem_req);
|
||||
}
|
||||
ReqIn.pop();
|
||||
}
|
||||
|
@ -104,12 +104,12 @@ void LsuMemAdapter::reset() {}
|
|||
void LsuMemAdapter::tick() {
|
||||
uint32_t input_size = ReqOut.size();
|
||||
|
||||
// process incoming responses
|
||||
// process outgoing responses
|
||||
for (uint32_t i = 0; i < input_size; ++i) {
|
||||
if (RspOut.at(i).empty())
|
||||
continue;
|
||||
auto& out_rsp = RspOut.at(i).front();
|
||||
DT(4, this->name() << " rsp" << i << ": " << out_rsp);
|
||||
DT(4, this->name() << "-rsp" << i << ": " << out_rsp);
|
||||
|
||||
// build memory response
|
||||
LsuRsp in_rsp(input_size);
|
||||
|
@ -141,7 +141,6 @@ void LsuMemAdapter::tick() {
|
|||
if (!ReqIn.empty()) {
|
||||
auto& in_req = ReqIn.front();
|
||||
assert(in_req.mask.size() == input_size);
|
||||
|
||||
for (uint32_t i = 0; i < input_size; ++i) {
|
||||
if (in_req.mask.test(i)) {
|
||||
// build memory request
|
||||
|
@ -152,10 +151,9 @@ void LsuMemAdapter::tick() {
|
|||
out_req.tag = in_req.tag;
|
||||
out_req.cid = in_req.cid;
|
||||
out_req.uuid = in_req.uuid;
|
||||
|
||||
// send memory request
|
||||
ReqOut.at(i).push(out_req, delay_);
|
||||
DT(4, this->name() << " req" << i << ": " << out_req);
|
||||
DT(4, this->name() << "-req" << i << ": " << out_req);
|
||||
}
|
||||
}
|
||||
ReqIn.pop();
|
||||
|
|
376
sim/simx/types.h
376
sim/simx/types.h
|
@ -466,29 +466,29 @@ private:
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Type>
|
||||
class Mux : public SimObject<Mux<Type>> {
|
||||
class Arbiter : public SimObject<Arbiter<Type>> {
|
||||
public:
|
||||
std::vector<SimPort<Type>> Inputs;
|
||||
std::vector<SimPort<Type>> Outputs;
|
||||
|
||||
Mux(
|
||||
Arbiter(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
ArbiterType type,
|
||||
uint32_t num_inputs,
|
||||
uint32_t num_outputs = 1,
|
||||
uint32_t delay = 1
|
||||
) : SimObject<Mux<Type>>(ctx, name)
|
||||
) : SimObject<Arbiter<Type>>(ctx, name)
|
||||
, Inputs(num_inputs, this)
|
||||
, Outputs(num_outputs, this)
|
||||
, type_(type)
|
||||
, delay_(delay)
|
||||
, cursors_(num_outputs, 0)
|
||||
, num_reqs_(log2ceil(num_inputs / num_outputs))
|
||||
, grants_(num_outputs, 0)
|
||||
, lg2_num_reqs_(log2ceil(num_inputs / num_outputs))
|
||||
{
|
||||
assert(delay != 0);
|
||||
assert(num_inputs <= 32);
|
||||
assert(num_outputs <= 32);
|
||||
assert(num_inputs <= 64);
|
||||
assert(num_outputs <= 64);
|
||||
assert(num_inputs >= num_outputs);
|
||||
|
||||
// bypass mode
|
||||
|
@ -500,15 +500,15 @@ public:
|
|||
}
|
||||
|
||||
void reset() {
|
||||
for (auto& cursor : cursors_) {
|
||||
cursor = 0;
|
||||
for (auto& grant : grants_) {
|
||||
grant = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void tick() {
|
||||
uint32_t I = Inputs.size();
|
||||
uint32_t O = Outputs.size();
|
||||
uint32_t R = 1 << num_reqs_;
|
||||
uint32_t R = 1 << lg2_num_reqs_;
|
||||
|
||||
// skip bypass mode
|
||||
if (I == O)
|
||||
|
@ -517,8 +517,8 @@ public:
|
|||
// process inputs
|
||||
for (uint32_t o = 0; o < O; ++o) {
|
||||
for (uint32_t r = 0; r < R; ++r) {
|
||||
uint32_t i = (cursors_.at(o) + r) & (R-1);
|
||||
uint32_t j = o * R + i;
|
||||
uint32_t g = (grants_.at(o) + r) & (R-1);
|
||||
uint32_t j = o * R + g;
|
||||
if (j >= I)
|
||||
continue;
|
||||
|
||||
|
@ -527,31 +527,134 @@ public:
|
|||
auto& req = req_in.front();
|
||||
Outputs.at(o).push(req, delay_);
|
||||
req_in.pop();
|
||||
this->update_cursor(o, i);
|
||||
this->update_grant(o, g);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
protected:
|
||||
|
||||
void update_cursor(uint32_t index, uint32_t grant) {
|
||||
void update_grant(uint32_t index, uint32_t grant) {
|
||||
if (type_ == ArbiterType::RoundRobin) {
|
||||
cursors_.at(index) = grant + 1;
|
||||
grants_.at(index) = grant + 1;
|
||||
}
|
||||
}
|
||||
|
||||
ArbiterType type_;
|
||||
uint32_t delay_;
|
||||
std::vector<uint32_t> cursors_;
|
||||
uint32_t num_reqs_;
|
||||
std::vector<uint32_t> grants_;
|
||||
uint32_t lg2_num_reqs_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Type>
|
||||
class CrossBar : public SimObject<CrossBar<Type>> {
|
||||
public:
|
||||
std::vector<SimPort<Type>> Inputs;
|
||||
std::vector<SimPort<Type>> Outputs;
|
||||
|
||||
CrossBar(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
ArbiterType type,
|
||||
uint32_t num_inputs,
|
||||
uint32_t num_outputs = 1,
|
||||
uint32_t addr_start = 0,
|
||||
uint32_t delay = 1
|
||||
)
|
||||
: SimObject<CrossBar<Type>>(ctx, name)
|
||||
, Inputs(num_inputs, this)
|
||||
, Outputs(num_outputs, this)
|
||||
, type_(type)
|
||||
, delay_(delay)
|
||||
, grants_(num_outputs, 0)
|
||||
, lg2_inputs_(log2ceil(num_inputs))
|
||||
, lg2_outputs_(log2ceil(num_outputs))
|
||||
, addr_start_(addr_start)
|
||||
, collisions_(0) {
|
||||
assert(delay != 0);
|
||||
assert(num_inputs <= 64);
|
||||
assert(num_outputs <= 64);
|
||||
assert(ispow2(num_outputs));
|
||||
}
|
||||
|
||||
void reset() {
|
||||
for (auto& grant : grants_) {
|
||||
grant = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void tick() {
|
||||
uint32_t I = Inputs.size();
|
||||
uint32_t O = Outputs.size();
|
||||
uint32_t R = 1 << lg2_inputs_;
|
||||
|
||||
// process incoming requests
|
||||
for (uint32_t o = 0; o < O; ++o) {
|
||||
int32_t input_idx = -1;
|
||||
for (uint32_t r = 0; r < R; ++r) {
|
||||
uint32_t i = (grants_.at(o) + r) & (R-1);
|
||||
if (i >= I)
|
||||
continue;
|
||||
auto& req_in = Inputs.at(i);
|
||||
if (!req_in.empty()) {
|
||||
auto& req = req_in.front();
|
||||
// skip if input is not going to current output
|
||||
uint32_t output_idx = 0;
|
||||
if (O != 1) {
|
||||
output_idx = (uint32_t)bit_getw(req.addr, addr_start_, lg2_outputs_-1);
|
||||
}
|
||||
if (output_idx != o)
|
||||
continue;
|
||||
if (input_idx != -1) {
|
||||
++collisions_;
|
||||
continue;
|
||||
}
|
||||
input_idx = i;
|
||||
}
|
||||
}
|
||||
if (input_idx != -1) {
|
||||
auto& req_in = Inputs.at(input_idx);
|
||||
auto& req = req_in.front();
|
||||
if (lg2_inputs_ != 0) {
|
||||
req.tag = (req.tag << lg2_inputs_) | input_idx;
|
||||
}
|
||||
DT(4, this->name() << "-req" << input_idx << ": " << req);
|
||||
Outputs.at(o).push(req, delay_);
|
||||
req_in.pop();
|
||||
this->update_grant(o, input_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t collisions() const {
|
||||
return collisions_;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
void update_grant(uint32_t index, uint32_t grant) {
|
||||
if (type_ == ArbiterType::RoundRobin) {
|
||||
grants_.at(index) = grant + 1;
|
||||
}
|
||||
}
|
||||
|
||||
ArbiterType type_;
|
||||
uint32_t delay_;
|
||||
std::vector<uint32_t> grants_;
|
||||
uint32_t lg2_inputs_;
|
||||
uint32_t lg2_outputs_;
|
||||
uint32_t addr_start_;
|
||||
uint64_t collisions_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Req, typename Rsp>
|
||||
class Switch : public SimObject<Switch<Req, Rsp>> {
|
||||
class TxArbiter : public SimObject<TxArbiter<Req, Rsp>> {
|
||||
public:
|
||||
std::vector<SimPort<Req>> ReqIn;
|
||||
std::vector<SimPort<Rsp>> RspIn;
|
||||
|
@ -559,7 +662,7 @@ public:
|
|||
std::vector<SimPort<Req>> ReqOut;
|
||||
std::vector<SimPort<Rsp>> RspOut;
|
||||
|
||||
Switch(
|
||||
TxArbiter(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
ArbiterType type,
|
||||
|
@ -567,19 +670,19 @@ public:
|
|||
uint32_t num_outputs = 1,
|
||||
uint32_t delay = 1
|
||||
)
|
||||
: SimObject<Switch<Req, Rsp>>(ctx, name)
|
||||
: SimObject<TxArbiter<Req, Rsp>>(ctx, name)
|
||||
, ReqIn(num_inputs, this)
|
||||
, RspIn(num_inputs, this)
|
||||
, ReqOut(num_outputs, this)
|
||||
, RspOut(num_outputs, this)
|
||||
, type_(type)
|
||||
, delay_(delay)
|
||||
, cursors_(num_outputs, 0)
|
||||
, lg_num_reqs_(log2ceil(num_inputs / num_outputs))
|
||||
, grants_(num_outputs, 0)
|
||||
, lg2_num_reqs_(log2ceil(num_inputs / num_outputs))
|
||||
{
|
||||
assert(delay != 0);
|
||||
assert(num_inputs <= 32);
|
||||
assert(num_outputs <= 32);
|
||||
assert(num_inputs <= 64);
|
||||
assert(num_outputs <= 64);
|
||||
assert(num_inputs >= num_outputs);
|
||||
|
||||
// bypass mode
|
||||
|
@ -592,76 +695,238 @@ public:
|
|||
}
|
||||
|
||||
void reset() {
|
||||
for (auto& cursor : cursors_) {
|
||||
cursor = 0;
|
||||
for (auto& grant : grants_) {
|
||||
grant = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void tick() {
|
||||
uint32_t I = ReqIn.size();
|
||||
uint32_t O = ReqOut.size();
|
||||
uint32_t R = 1 << lg_num_reqs_;
|
||||
uint32_t R = 1 << lg2_num_reqs_;
|
||||
|
||||
// skip bypass mode
|
||||
if (I == O)
|
||||
return;
|
||||
|
||||
// process outgoing responses
|
||||
for (uint32_t o = 0; o < O; ++o) {
|
||||
// process incoming responses
|
||||
if (!RspOut.at(o).empty()) {
|
||||
auto& rsp = RspOut.at(o).front();
|
||||
uint32_t i = 0;
|
||||
if (lg_num_reqs_ != 0) {
|
||||
i = rsp.tag & (R-1);
|
||||
rsp.tag >>= lg_num_reqs_;
|
||||
auto& rsp_out = RspOut.at(o);
|
||||
if (!rsp_out.empty()) {
|
||||
auto& rsp = rsp_out.front();
|
||||
uint32_t g = 0;
|
||||
if (lg2_num_reqs_ != 0) {
|
||||
g = rsp.tag & (R-1);
|
||||
rsp.tag >>= lg2_num_reqs_;
|
||||
}
|
||||
DT(4, this->name() << " rsp" << o << ": " << rsp);
|
||||
uint32_t j = o * R + i;
|
||||
DT(4, this->name() << "-rsp" << o << ": " << rsp);
|
||||
uint32_t j = o * R + g;
|
||||
RspIn.at(j).push(rsp, 1);
|
||||
RspOut.at(o).pop();
|
||||
rsp_out.pop();
|
||||
}
|
||||
}
|
||||
|
||||
// process incoming requests
|
||||
// process incoming requests
|
||||
for (uint32_t o = 0; o < O; ++o) {
|
||||
for (uint32_t r = 0; r < R; ++r) {
|
||||
uint32_t i = (cursors_.at(o) + r) & (R-1);
|
||||
uint32_t j = o * R + i;
|
||||
uint32_t g = (grants_.at(o) + r) & (R-1);
|
||||
uint32_t j = o * R + g;
|
||||
if (j >= I)
|
||||
continue;
|
||||
|
||||
auto& req_in = ReqIn.at(j);
|
||||
if (!req_in.empty()) {
|
||||
auto& req = req_in.front();
|
||||
if (lg_num_reqs_ != 0) {
|
||||
req.tag = (req.tag << lg_num_reqs_) | i;
|
||||
if (lg2_num_reqs_ != 0) {
|
||||
req.tag = (req.tag << lg2_num_reqs_) | g;
|
||||
}
|
||||
DT(4, this->name() << " req" << j << ": " << req);
|
||||
DT(4, this->name() << "-req" << j << ": " << req);
|
||||
ReqOut.at(o).push(req, delay_);
|
||||
req_in.pop();
|
||||
this->update_cursor(o, i);
|
||||
this->update_grant(o, g);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void update_cursor(uint32_t index, uint32_t grant) {
|
||||
protected:
|
||||
|
||||
void update_grant(uint32_t index, uint32_t grant) {
|
||||
if (type_ == ArbiterType::RoundRobin) {
|
||||
cursors_.at(index) = grant + 1;
|
||||
grants_.at(index) = grant + 1;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
ArbiterType type_;
|
||||
uint32_t delay_;
|
||||
std::vector<uint32_t> cursors_;
|
||||
uint32_t lg_num_reqs_;
|
||||
std::vector<uint32_t> grants_;
|
||||
uint32_t lg2_num_reqs_;
|
||||
};
|
||||
|
||||
using MemSwitch = Switch<MemReq, MemRsp>;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class LocalMemDemux : public SimObject<LocalMemDemux> {
|
||||
template <typename Req, typename Rsp>
|
||||
class TxCrossBar : public SimObject<TxCrossBar<Req, Rsp>> {
|
||||
public:
|
||||
std::vector<SimPort<Req>> ReqIn;
|
||||
std::vector<SimPort<Rsp>> RspIn;
|
||||
|
||||
std::vector<SimPort<Req>> ReqOut;
|
||||
std::vector<SimPort<Rsp>> RspOut;
|
||||
|
||||
TxCrossBar(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
ArbiterType type,
|
||||
uint32_t num_inputs,
|
||||
uint32_t num_outputs = 1,
|
||||
uint32_t addr_start = 0,
|
||||
uint32_t delay = 1
|
||||
)
|
||||
: SimObject<TxCrossBar<Req, Rsp>>(ctx, name)
|
||||
, ReqIn(num_inputs, this)
|
||||
, RspIn(num_inputs, this)
|
||||
, ReqOut(num_outputs, this)
|
||||
, RspOut(num_outputs, this)
|
||||
, type_(type)
|
||||
, delay_(delay)
|
||||
, req_grants_(num_outputs, 0)
|
||||
, rsp_grants_(num_inputs, 0)
|
||||
, lg2_inputs_(log2ceil(num_inputs))
|
||||
, lg2_outputs_(log2ceil(num_outputs))
|
||||
, addr_start_(addr_start)
|
||||
, collisions_(0) {
|
||||
assert(delay != 0);
|
||||
assert(num_inputs <= 64);
|
||||
assert(num_outputs <= 64);
|
||||
assert(ispow2(num_inputs));
|
||||
assert(ispow2(num_outputs));
|
||||
}
|
||||
|
||||
void reset() {
|
||||
for (auto& grant : req_grants_) {
|
||||
grant = 0;
|
||||
}
|
||||
for (auto& grant : rsp_grants_) {
|
||||
grant = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void tick() {
|
||||
uint32_t I = ReqIn.size();
|
||||
uint32_t O = ReqOut.size();
|
||||
uint32_t R = 1 << lg2_inputs_;
|
||||
uint32_t T = 1 << lg2_outputs_;
|
||||
|
||||
// process outgoing responses
|
||||
for (uint32_t i = 0; i < I; ++i) {
|
||||
int32_t output_idx = -1;
|
||||
for (uint32_t t = 0; t < T; ++t) {
|
||||
uint32_t o = (rsp_grants_.at(i) + t) & (T-1);
|
||||
if (o >= O)
|
||||
continue;
|
||||
auto& rsp_out = RspOut.at(o);
|
||||
if (!rsp_out.empty()) {
|
||||
auto& rsp = rsp_out.front();
|
||||
// skip if response is not going to current input
|
||||
uint32_t input_idx = 0;
|
||||
if (lg2_inputs_ != 0) {
|
||||
input_idx = rsp.tag & (R-1);
|
||||
}
|
||||
if (input_idx != i)
|
||||
continue;
|
||||
if (output_idx != -1) {
|
||||
++collisions_;
|
||||
continue;
|
||||
}
|
||||
output_idx = o;
|
||||
}
|
||||
}
|
||||
if (output_idx != -1) {
|
||||
auto& rsp_out = RspOut.at(output_idx);
|
||||
auto& rsp = rsp_out.front();
|
||||
uint32_t input_idx = 0;
|
||||
if (lg2_inputs_ != 0) {
|
||||
input_idx = rsp.tag & (R-1);
|
||||
rsp.tag >>= lg2_inputs_;
|
||||
}
|
||||
DT(4, this->name() << "-rsp" << output_idx << ": " << rsp);
|
||||
RspIn.at(input_idx).push(rsp, 1);
|
||||
rsp_out.pop();
|
||||
this->update_rsp_grant(i, output_idx);
|
||||
}
|
||||
}
|
||||
|
||||
// process incoming requests
|
||||
for (uint32_t o = 0; o < O; ++o) {
|
||||
int32_t input_idx = -1;
|
||||
for (uint32_t r = 0; r < R; ++r) {
|
||||
uint32_t i = (req_grants_.at(o) + r) & (R-1);
|
||||
if (i >= I)
|
||||
continue;
|
||||
auto& req_in = ReqIn.at(i);
|
||||
if (!req_in.empty()) {
|
||||
auto& req = req_in.front();
|
||||
// skip if request is not going to current output
|
||||
uint32_t output_idx = 0;
|
||||
if (O != 1) {
|
||||
output_idx = (uint32_t)bit_getw(req.addr, addr_start_, lg2_outputs_-1);
|
||||
}
|
||||
if (output_idx != o)
|
||||
continue;
|
||||
if (input_idx != -1) {
|
||||
++collisions_;
|
||||
continue;
|
||||
}
|
||||
input_idx = i;
|
||||
}
|
||||
}
|
||||
if (input_idx != -1) {
|
||||
auto& req_in = ReqIn.at(input_idx);
|
||||
auto& req = req_in.front();
|
||||
if (lg2_inputs_ != 0) {
|
||||
req.tag = (req.tag << lg2_inputs_) | input_idx;
|
||||
}
|
||||
DT(4, this->name() << "-req" << input_idx << ": " << req);
|
||||
ReqOut.at(o).push(req, delay_);
|
||||
req_in.pop();
|
||||
this->update_req_grant(o, input_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t collisions() const {
|
||||
return collisions_;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
void update_req_grant(uint32_t index, uint32_t grant) {
|
||||
if (type_ == ArbiterType::RoundRobin) {
|
||||
req_grants_.at(index) = grant + 1;
|
||||
}
|
||||
}
|
||||
|
||||
void update_rsp_grant(uint32_t index, uint32_t grant) {
|
||||
if (type_ == ArbiterType::RoundRobin) {
|
||||
rsp_grants_.at(index) = grant + 1;
|
||||
}
|
||||
}
|
||||
|
||||
ArbiterType type_;
|
||||
uint32_t delay_;
|
||||
std::vector<uint32_t> req_grants_;
|
||||
std::vector<uint32_t> rsp_grants_;
|
||||
uint32_t lg2_inputs_;
|
||||
uint32_t lg2_outputs_;
|
||||
uint32_t addr_start_;
|
||||
uint64_t collisions_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class LocalMemSwitch : public SimObject<LocalMemSwitch> {
|
||||
public:
|
||||
SimPort<LsuReq> ReqIn;
|
||||
SimPort<LsuRsp> RspIn;
|
||||
|
@ -672,7 +937,7 @@ public:
|
|||
SimPort<LsuReq> ReqDC;
|
||||
SimPort<LsuRsp> RspDC;
|
||||
|
||||
LocalMemDemux(
|
||||
LocalMemSwitch(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t delay
|
||||
|
@ -711,4 +976,7 @@ private:
|
|||
uint32_t delay_;
|
||||
};
|
||||
|
||||
using MemArbiter = TxArbiter<MemReq, MemRsp>;
|
||||
using MemCrossBar = TxCrossBar<MemReq, MemRsp>;
|
||||
|
||||
}
|
||||
|
|
|
@ -142,8 +142,8 @@ public:
|
|||
if (future_.valid()) {
|
||||
future_.wait();
|
||||
}
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
delete mem_alloc_[i];
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
delete mem_alloc_[b];
|
||||
}
|
||||
if (ram_) {
|
||||
delete ram_;
|
||||
|
@ -187,8 +187,8 @@ public:
|
|||
MP_M_AXI_MEM(PLATFORM_MEMORY_BANKS);
|
||||
|
||||
// initialize memory allocator
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
mem_alloc_[i] = new MemoryAllocator(0, mem_bank_size_, 4096, 64);
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
mem_alloc_[b] = new MemoryAllocator(0, mem_bank_size_, 4096, 64);
|
||||
}
|
||||
|
||||
// reset the device
|
||||
|
@ -257,8 +257,9 @@ public:
|
|||
//printf("%0ld: [sim] register_write: address=0x%x\n", timestamp, offset);
|
||||
device_->s_axi_ctrl_awvalid = 1;
|
||||
device_->s_axi_ctrl_awaddr = offset;
|
||||
while (!device_->s_axi_ctrl_awready)
|
||||
while (!device_->s_axi_ctrl_awready) {
|
||||
this->tick();
|
||||
}
|
||||
this->tick();
|
||||
device_->s_axi_ctrl_awvalid = 0;
|
||||
|
||||
|
@ -267,8 +268,9 @@ public:
|
|||
device_->s_axi_ctrl_wvalid = 1;
|
||||
device_->s_axi_ctrl_wdata = value;
|
||||
device_->s_axi_ctrl_wstrb = 0xf;
|
||||
while (!device_->s_axi_ctrl_wready)
|
||||
while (!device_->s_axi_ctrl_wready) {
|
||||
this->tick();
|
||||
}
|
||||
this->tick();
|
||||
device_->s_axi_ctrl_wvalid = 0;
|
||||
|
||||
|
@ -290,8 +292,9 @@ public:
|
|||
//printf("%0ld: [sim] register_read: address=0x%x\n", timestamp, offset);
|
||||
device_->s_axi_ctrl_arvalid = 1;
|
||||
device_->s_axi_ctrl_araddr = offset;
|
||||
while (!device_->s_axi_ctrl_arready)
|
||||
while (!device_->s_axi_ctrl_arready) {
|
||||
this->tick();
|
||||
}
|
||||
this->tick();
|
||||
device_->s_axi_ctrl_arvalid = 0;
|
||||
|
||||
|
@ -318,9 +321,9 @@ private:
|
|||
reqs.clear();
|
||||
}
|
||||
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
std::queue<mem_req_t*> empty;
|
||||
std::swap(dram_queues_[i], empty);
|
||||
std::swap(dram_queues_[b], empty);
|
||||
}
|
||||
|
||||
device_->ap_rst_n = 0;
|
||||
|
@ -335,10 +338,10 @@ private:
|
|||
device_->ap_rst_n = 1;
|
||||
|
||||
// this AXI device is always ready to accept new requests
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
*m_axi_mem_[i].arready = 1;
|
||||
*m_axi_mem_[i].awready = 1;
|
||||
*m_axi_mem_[i].wready = 1;
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
*m_axi_mem_[b].arready = 1;
|
||||
*m_axi_mem_[b].awready = 1;
|
||||
*m_axi_mem_[b].wready = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -355,10 +358,10 @@ private:
|
|||
|
||||
dram_sim_.tick();
|
||||
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
if (!dram_queues_[i].empty()) {
|
||||
auto mem_req = dram_queues_[i].front();
|
||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, i, [](void* arg) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
if (!dram_queues_[b].empty()) {
|
||||
auto mem_req = dram_queues_[b].front();
|
||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, b, [](void* arg) {
|
||||
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
||||
if (orig_req->ready) {
|
||||
delete orig_req;
|
||||
|
@ -366,7 +369,7 @@ private:
|
|||
orig_req->ready = true;
|
||||
}
|
||||
}, mem_req)) {
|
||||
dram_queues_[i].pop();
|
||||
dram_queues_[b].pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue