Merge branch 'bug_fixes'
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions

This commit is contained in:
tinebp 2024-12-04 22:20:52 -08:00
commit 18ae57cc7f
38 changed files with 1359 additions and 831 deletions

View file

@ -105,7 +105,7 @@ regression()
./ci/blackbox.sh --driver=simx --app=vecadd --rebuild=3
# test for matmul
CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1"
CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1"
echo "regression tests done!"
}
@ -322,6 +322,10 @@ config2()
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=1" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress
# test memory ports
CONFIGS="-DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=demo
CONFIGS="-DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=demo --threads=32
echo "configuration-2 tests done!"
}

View file

@ -14,8 +14,6 @@
`ifndef VX_CONFIG_VH
`define VX_CONFIG_VH
`ifndef MIN
`define MIN(x, y) (((x) < (y)) ? (x) : (y))
`endif
@ -170,8 +168,8 @@
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
`endif
`ifndef MEMORY_BANKS
`define MEMORY_BANKS 2
`ifndef PLATFORM_MEMORY_BANKS
`define PLATFORM_MEMORY_BANKS 1
`endif
`ifdef XLEN_64
@ -193,7 +191,7 @@
`endif
`ifdef VM_ENABLE
`ifndef PAGE_TABLE_BASE_ADDR
`ifndef PAGE_TABLE_BASE_ADDR
`define PAGE_TABLE_BASE_ADDR 64'h0F0000000
`endif
@ -218,7 +216,7 @@
`endif
`ifdef VM_ENABLE
`ifndef PAGE_TABLE_BASE_ADDR
`ifndef PAGE_TABLE_BASE_ADDR
`define PAGE_TABLE_BASE_ADDR 32'hF0000000
`endif
@ -303,13 +301,13 @@
`ifndef VM_ADDR_MODE
`define VM_ADDR_MODE SV32 //or BARE
`endif
`ifndef PT_LEVEL
`ifndef PT_LEVEL
`define PT_LEVEL (2)
`endif
`ifndef PTE_SIZE
`define PTE_SIZE (4)
`endif
`ifndef NUM_PTE_ENTRY
`ifndef NUM_PTE_ENTRY
`define NUM_PTE_ENTRY (1024)
`endif
`ifndef PT_SIZE_LIMIT
@ -319,13 +317,13 @@
`ifndef VM_ADDR_MODE
`define VM_ADDR_MODE SV39 //or BARE
`endif
`ifndef PT_LEVEL
`ifndef PT_LEVEL
`define PT_LEVEL (3)
`endif
`ifndef PTE_SIZE
`define PTE_SIZE (8)
`endif
`ifndef NUM_PTE_ENTRY
`ifndef NUM_PTE_ENTRY
`define NUM_PTE_ENTRY (512)
`endif
`ifndef PT_SIZE_LIMIT
@ -604,7 +602,7 @@
// Number of Banks
`ifndef DCACHE_NUM_BANKS
`define DCACHE_NUM_BANKS `MIN(`NUM_LSU_LANES, 4)
`define DCACHE_NUM_BANKS `MIN(DCACHE_NUM_REQS, 16)
`endif
// Core Response Queue Size
@ -647,6 +645,15 @@
`define DCACHE_REPL_POLICY 1
`endif
// Number of Memory Ports
`ifndef L1_MEM_PORTS
`ifdef L1_DISABLE
`define L1_MEM_PORTS `MIN(DCACHE_NUM_REQS, `PLATFORM_MEMORY_BANKS)
`else
`define L1_MEM_PORTS `MIN(`DCACHE_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
`endif
`endif
// LMEM Configurable Knobs ////////////////////////////////////////////////////
`ifndef LMEM_DISABLE
@ -674,7 +681,7 @@
// Number of Banks
`ifndef L2_NUM_BANKS
`define L2_NUM_BANKS `MIN(4, `NUM_SOCKETS)
`define L2_NUM_BANKS `MIN(L2_NUM_REQS, 16)
`endif
// Core Response Queue Size
@ -717,6 +724,15 @@
`define L2_REPL_POLICY 1
`endif
// Number of Memory Ports
`ifndef L2_MEM_PORTS
`ifdef L2_ENABLE
`define L2_MEM_PORTS `MIN(`L2_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
`else
`define L2_MEM_PORTS `MIN(L2_NUM_REQS, `PLATFORM_MEMORY_BANKS)
`endif
`endif
// L3cache Configurable Knobs /////////////////////////////////////////////////
// Cache Size
@ -726,7 +742,7 @@
// Number of Banks
`ifndef L3_NUM_BANKS
`define L3_NUM_BANKS `MIN(8, `NUM_CLUSTERS)
`define L3_NUM_BANKS `MIN(L3_NUM_REQS, 16)
`endif
// Core Response Queue Size
@ -769,9 +785,13 @@
`define L3_REPL_POLICY 1
`endif
// Number of Memory Ports from LLC
`ifndef NUM_MEM_PORTS
`define NUM_MEM_PORTS `MIN(`MEMORY_BANKS, `L3_NUM_BANKS)
// Number of Memory Ports
`ifndef L3_MEM_PORTS
`ifdef L3_ENABLE
`define L3_MEM_PORTS `MIN(`L3_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
`else
`define L3_MEM_PORTS `MIN(L3_NUM_REQS, `PLATFORM_MEMORY_BANKS)
`endif
`endif
// ISA Extensions /////////////////////////////////////////////////////////////

View file

@ -163,6 +163,7 @@ endgenerate
`define USE_BLOCK_BRAM (* ramstyle = "block" *)
`define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *)
`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *)
`define RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams on" *)
`define DISABLE_BRAM (* ramstyle = "logic" *)
`define PRESERVE_NET (* preserve *)
`define BLACKBOX_CELL (* black_box *)
@ -173,6 +174,7 @@ endgenerate
`define USE_BLOCK_BRAM (* ram_style = "block" *)
`define USE_FAST_BRAM (* ram_style = "distributed" *)
`define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *)
`define RW_RAM_CHECK (* rw_addr_collision = "yes" *)
`define DISABLE_BRAM (* ram_style = "registers" *)
`define PRESERVE_NET (* keep = "true" *)
`define BLACKBOX_CELL (* black_box *)
@ -183,6 +185,7 @@ endgenerate
`define USE_BLOCK_BRAM
`define USE_FAST_BRAM
`define NO_RW_RAM_CHECK
`define RW_RAM_CHECK
`define DISABLE_BRAM
`define PRESERVE_NET
`define BLACKBOX_CELL

View file

@ -47,7 +47,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #(
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lmem_switches
VX_lmem_switch #(
.REQ0_OUT_BUF (3),
.REQ0_OUT_BUF (1),
.REQ1_OUT_BUF (0),
.RSP_OUT_BUF (1),
.ARBITER ("P")
@ -78,7 +78,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #(
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
.ARBITER ("P"),
.REQ_OUT_BUF (3),
.RSP_OUT_BUF (0)
.RSP_OUT_BUF (2)
) lmem_adapter (
.clk (clk),
.reset (reset),

View file

@ -13,12 +13,6 @@
`include "VX_platform.vh"
`define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \
if (wren[i]) begin \
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \
end \
end
`define RAM_INITIALIZATION \
if (INIT_ENABLE != 0) begin : g_init \
if (INIT_FILE != "") begin : g_file \
@ -32,14 +26,93 @@
end \
end
`define RAM_BYPASS(__d) \
reg [DATAW-1:0] bypass_data_r; \
reg bypass_valid_r; \
`define SYNC_RAM_WF_BLOCK(__d, __re, __we, __ra, __wa) \
`RAM_ATTRIBUTES `RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; \
`RAM_INITIALIZATION \
reg [ADDRW-1:0] raddr_r; \
always @(posedge clk) begin \
bypass_valid_r <= read_s && write && (raddr_s == waddr); \
bypass_data_r <= wdata; \
if (__re || __we) begin \
if (__we) begin \
ram[__wa] <= wdata; \
end \
raddr_r <= __ra; \
end \
end \
assign __d = bypass_valid_r ? bypass_data_r : rdata_r
assign __d = ram[raddr_r]
`define SYNC_RAM_WF_WREN_BLOCK(__d, __re, __we, __ra, __wa) \
`RAM_ATTRIBUTES `RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; \
`RAM_INITIALIZATION \
reg [ADDRW-1:0] raddr_r; \
always @(posedge clk) begin \
if (__re || __we) begin \
if (__we) begin \
for (integer i = 0; i < WRENW; ++i) begin \
if (wren[i]) begin \
ram[__wa][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \
end \
end \
end \
raddr_r <= __ra; \
end \
end \
assign __d = ram[raddr_r]
`define SYNC_RAM_RF_BLOCK(__d, __re, __we, __ra, __wa) \
`RAM_ATTRIBUTES reg [DATAW-1:0] ram [0:SIZE-1]; \
`RAM_INITIALIZATION \
reg [DATAW-1:0] rdata_r; \
always @(posedge clk) begin \
if (__re || __we) begin \
if (__we) begin \
ram[__wa] <= wdata; \
end \
rdata_r <= ram[__ra]; \
end \
end \
assign __d = rdata_r
`define SYNC_RAM_RF_WREN_BLOCK(__d, __re, __we, __ra, __wa) \
`RAM_ATTRIBUTES reg [DATAW-1:0] ram [0:SIZE-1]; \
`RAM_INITIALIZATION \
reg [DATAW-1:0] rdata_r; \
always @(posedge clk) begin \
if (__re || __we) begin \
if (__we) begin \
for (integer i = 0; i < WRENW; ++i) begin \
if (wren[i]) begin \
ram[__wa][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \
end \
end \
end \
rdata_r <= ram[__ra]; \
end \
end \
assign __d = rdata_r
`define ASYNC_RAM_BLOCK(__d, __we, __ra, __wa) \
`RAM_ATTRIBUTES reg [DATAW-1:0] ram [0:SIZE-1]; \
`RAM_INITIALIZATION \
always @(posedge clk) begin \
if (__we) begin \
ram[__wa] <= wdata; \
end \
end \
assign __d = ram[__ra]
`define ASYNC_RAM_BLOCK_WREN(__d, __we, __ra, __wa) \
`RAM_ATTRIBUTES reg [DATAW-1:0] ram [0:SIZE-1]; \
`RAM_INITIALIZATION \
always @(posedge clk) begin \
if (__we) begin \
for (integer i = 0; i < WRENW; ++i) begin \
if (wren[i]) begin \
ram[__wa][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \
end \
end \
end \
end \
assign __d = ram[__ra]
`TRACING_OFF
module VX_async_ram_patch #(
@ -47,6 +120,8 @@ module VX_async_ram_patch #(
parameter SIZE = 1,
parameter WRENW = 1,
parameter DUAL_PORT = 0,
parameter FORCE_BRAM = 0,
parameter WRITE_FIRST = 0,
parameter INIT_ENABLE = 0,
parameter INIT_FILE = "",
parameter [DATAW-1:0] INIT_VALUE = 0,
@ -79,77 +154,102 @@ module VX_async_ram_patch #(
.out ({raddr_s, read_s, is_raddr_reg})
);
// synchroneous ram
wire [DATAW-1:0] rdata_s, rdata_a;
wire [DATAW-1:0] rdata_s;
if (WRENW != 1) begin : g_wren_sync_ram
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
reg [DATAW-1:0] rdata_r;
`RAM_INITIALIZATION
always @(posedge clk) begin
if (read_s || write) begin
if (write) begin
`RAM_WRITE_WREN
if (1) begin : g_sync_ram
if (WRENW != 1) begin : g_wren
if (FORCE_BRAM) begin : g_bram
if (WRITE_FIRST) begin : g_write_first
`define RAM_ATTRIBUTES `USE_BLOCK_BRAM
`SYNC_RAM_WF_WREN_BLOCK(rdata_s, read_s, write, raddr_s, waddr);
`undef RAM_ATTRIBUTES
end else begin : g_read_first
`define RAM_ATTRIBUTES `USE_BLOCK_BRAM
`SYNC_RAM_RF_WREN_BLOCK(rdata_s, read_s, write, raddr_s, waddr);
`undef RAM_ATTRIBUTES
end
end else begin : g_lutram
if (WRITE_FIRST) begin : g_write_first
`define RAM_ATTRIBUTES
`SYNC_RAM_WF_WREN_BLOCK(rdata_s, read_s, write, raddr_s, waddr);
`undef RAM_ATTRIBUTES
end else begin : g_read_first
`define RAM_ATTRIBUTES
`SYNC_RAM_RF_WREN_BLOCK(rdata_s, read_s, write, raddr_s, waddr);
`undef RAM_ATTRIBUTES
end
end
end else begin : g_no_wren
if (FORCE_BRAM) begin : g_bram
if (WRITE_FIRST) begin : g_write_first
`define RAM_ATTRIBUTES `USE_BLOCK_BRAM
`SYNC_RAM_WF_BLOCK(rdata_s, read_s, write, raddr_s, waddr);
`undef RAM_ATTRIBUTES
end else begin : g_read_first
`define RAM_ATTRIBUTES `USE_BLOCK_BRAM
`SYNC_RAM_RF_BLOCK(rdata_s, read_s, write, raddr_s, waddr);
`undef RAM_ATTRIBUTES
end
end else begin : g_lutram
if (WRITE_FIRST) begin : g_write_first
`define RAM_ATTRIBUTES
`SYNC_RAM_WF_BLOCK(rdata_s, read_s, write, raddr_s, waddr);
`undef RAM_ATTRIBUTES
end else begin : g_read_first
`define RAM_ATTRIBUTES
`SYNC_RAM_RF_BLOCK(rdata_s, read_s, write, raddr_s, waddr);
`undef RAM_ATTRIBUTES
end
rdata_r <= ram[raddr_s];
end
end
`RAM_BYPASS(rdata_s);
end else begin : g_no_wren_sync_ram
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
reg [DATAW-1:0] rdata_r;
`RAM_INITIALIZATION
`UNUSED_VAR (wren)
always @(posedge clk) begin
if (read_s || write) begin
if (write) begin
ram[waddr] <= wdata;
end
rdata_r <= ram[raddr_s];
end
end
`RAM_BYPASS(rdata_s);
end
// asynchronous ram (fallback)
wire [DATAW-1:0] rdata_a;
if (DUAL_PORT != 0) begin : g_dp_async_ram
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
if (WRENW != 1) begin : g_wren
always @(posedge clk) begin
if (write) begin
`RAM_WRITE_WREN
if (1) begin : g_async_ram
if (DUAL_PORT != 0) begin : g_dp
if (WRENW != 1) begin : g_wren
if (WRITE_FIRST) begin : g_write_first
`define RAM_ATTRIBUTES `RW_RAM_CHECK
`ASYNC_RAM_BLOCK_WREN(rdata_a, write, raddr, waddr);
`undef RAM_ATTRIBUTES
end else begin : g_read_first
`define RAM_ATTRIBUTES `NO_RW_RAM_CHECK
`ASYNC_RAM_BLOCK_WREN(rdata_a, write, raddr, waddr);
`undef RAM_ATTRIBUTES
end
end else begin : g_no_wren
if (WRITE_FIRST) begin : g_write_first
`define RAM_ATTRIBUTES `RW_RAM_CHECK
`ASYNC_RAM_BLOCK(rdata_a, write, raddr, waddr);
`undef RAM_ATTRIBUTES
end else begin : g_read_first
`define RAM_ATTRIBUTES `NO_RW_RAM_CHECK
`ASYNC_RAM_BLOCK(rdata_a, write, raddr, waddr);
`undef RAM_ATTRIBUTES
end
end
end else begin : g_no_wren
always @(posedge clk) begin
if (write) begin
ram[waddr] <= wdata;
end else begin : g_sp
if (WRENW != 1) begin : g_wren
if (WRITE_FIRST) begin : g_write_first
`define RAM_ATTRIBUTES `RW_RAM_CHECK
`ASYNC_RAM_BLOCK_WREN(rdata_a, write, waddr, waddr);
`undef RAM_ATTRIBUTES
end else begin : g_read_first
`define RAM_ATTRIBUTES `NO_RW_RAM_CHECK
`ASYNC_RAM_BLOCK_WREN(rdata_a, write, waddr, waddr);
`undef RAM_ATTRIBUTES
end
end else begin : g_no_wren
if (WRITE_FIRST) begin : g_write_first
`define RAM_ATTRIBUTES `RW_RAM_CHECK
`ASYNC_RAM_BLOCK(rdata_a, write, waddr, waddr);
`undef RAM_ATTRIBUTES
end else begin : g_read_first
`define RAM_ATTRIBUTES `NO_RW_RAM_CHECK
`ASYNC_RAM_BLOCK(rdata_a, write, waddr, waddr);
`undef RAM_ATTRIBUTES
end
end
end
assign rdata_a = ram[raddr];
end else begin : g_sp_async_ram
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
if (WRENW != 1) begin : g_wren
always @(posedge clk) begin
if (write) begin
`RAM_WRITE_WREN
end
end
end else begin : g_no_wren
always @(posedge clk) begin
if (write) begin
ram[waddr] <= wdata;
end
end
end
assign rdata_a = ram[waddr];
end
assign rdata = is_raddr_reg ? rdata_s : rdata_a;

View file

@ -80,7 +80,7 @@ module VX_dp_ram #(
if (FORCE_BRAM) begin : g_bram
if (RDW_MODE == "W") begin : g_write_first
if (WRENW != 1) begin : g_wren
(* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [ADDRW-1:0] raddr_r;
always @(posedge clk) begin
@ -93,7 +93,7 @@ module VX_dp_ram #(
end
assign rdata = ram[raddr_r];
end else begin : g_no_wren
(* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [ADDRW-1:0] raddr_r;
always @(posedge clk) begin
@ -166,7 +166,7 @@ module VX_dp_ram #(
end else begin : g_auto
if (RDW_MODE == "W") begin : g_write_first
if (WRENW != 1) begin : g_wren
(* rw_addr_collision = "yes" *) `RAM_ARRAY_WREN
`RW_RAM_CHECK `RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [ADDRW-1:0] raddr_r;
always @(posedge clk) begin
@ -179,7 +179,7 @@ module VX_dp_ram #(
end
assign rdata = ram[raddr_r];
end else begin : g_no_wren
(* rw_addr_collision = "yes" *) reg [DATAW-1:0] ram [0:SIZE-1];
`RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [ADDRW-1:0] raddr_r;
always @(posedge clk) begin
@ -220,7 +220,7 @@ module VX_dp_ram #(
end
assign rdata = rdata_r;
end
end else begin
end else begin : g_undefined
if (WRENW != 1) begin : g_wren
`RAM_ARRAY_WREN
`RAM_INITIALIZATION
@ -253,30 +253,32 @@ module VX_dp_ram #(
end else begin : g_async
`UNUSED_VAR (read)
if (FORCE_BRAM) begin : g_bram
`ifdef VIVADO
VX_async_ram_patch #(
.DATAW (DATAW),
.SIZE (SIZE),
.WRENW (WRENW),
.DUAL_PORT (1),
.FORCE_BRAM (FORCE_BRAM),
.WRITE_FIRST(RDW_MODE == "W"),
.INIT_ENABLE(INIT_ENABLE),
.INIT_FILE (INIT_FILE),
.INIT_VALUE (INIT_VALUE)
) async_ram_patch (
.clk (clk),
.reset (reset),
.read (read),
.write (write),
.wren (wren),
.waddr (waddr),
.wdata (wdata),
.raddr (raddr),
.rdata (rdata)
);
`else
if (RDW_MODE == "W") begin : g_write_first
`ifdef VIVADO
VX_async_ram_patch #(
.DATAW (DATAW),
.SIZE (SIZE),
.WRENW (WRENW),
.DUAL_PORT (1),
.INIT_ENABLE(INIT_ENABLE),
.INIT_FILE (INIT_FILE),
.INIT_VALUE (INIT_VALUE)
) async_ram_patch (
.clk (clk),
.reset (reset),
.read (read),
.write (write),
.wren (wren),
.waddr (waddr),
.wdata (wdata),
.raddr (raddr),
.rdata (rdata)
);
`else
if (WRENW != 1) begin : g_wren
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
@ -285,7 +287,7 @@ module VX_dp_ram #(
end
assign rdata = ram[raddr];
end else begin : g_no_wren
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
@ -294,7 +296,6 @@ module VX_dp_ram #(
end
assign rdata = ram[raddr];
end
`endif
end else begin : g_read_first
if (WRENW != 1) begin : g_wren
`NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN
@ -316,10 +317,11 @@ module VX_dp_ram #(
assign rdata = ram[raddr];
end
end
`endif
end else begin : g_auto
if (RDW_MODE == "W") begin : g_write_first
if (WRENW != 1) begin : g_wren
`RAM_ARRAY_WREN
`RW_RAM_CHECK `RAM_ARRAY_WREN
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
@ -328,7 +330,7 @@ module VX_dp_ram #(
end
assign rdata = ram[raddr];
end else begin : g_no_wren
reg [DATAW-1:0] ram [0:SIZE-1];
`RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin

View file

@ -90,9 +90,6 @@ module VX_fifo_queue #(
end
end
wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1));
wire bypass = push && (empty || (going_empty && pop));
VX_dp_ram #(
.DATAW (DATAW),
.SIZE (DEPTH),
@ -101,7 +98,7 @@ module VX_fifo_queue #(
) dp_ram (
.clk (clk),
.reset (reset),
.read (~bypass),
.read (1'b1),
.write (push),
.wren (1'b1),
.raddr (rd_ptr_r),
@ -112,6 +109,8 @@ module VX_fifo_queue #(
if (OUT_REG != 0) begin : g_out_reg
reg [DATAW-1:0] data_out_r;
wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1));
wire bypass = push && (empty || (going_empty && pop));
always @(posedge clk) begin
if (bypass) begin
data_out_r <= data_in;

View file

@ -485,7 +485,7 @@ module VX_rr_arbiter #(
.D (NUM_REQS)
) grant_decoder (
.sel_in (grant_index),
.data_in (1'b1),
.data_in (grant_valid),
.data_out (grant_onehot)
);

View file

@ -77,37 +77,9 @@ module VX_sp_ram #(
localparam FORCE_BRAM = !LUTRAM && (SIZE * DATAW >= `MAX_LUTRAM);
if (OUT_REG) begin : g_sync
if (FORCE_BRAM) begin : g_bram
if (RDW_MODE == "R") begin : g_read_first
if (RDW_MODE == "W") begin : g_write_first
if (WRENW != 1) begin : g_wren
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
`RAM_WRITE_WREN
end
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end else begin : g_no_wren
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
ram[addr] <= wdata;
end
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end
end else if (RDW_MODE == "W") begin : g_write_first
if (WRENW != 1) begin : g_wren
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [ADDRW-1:0] addr_r;
always @(posedge clk) begin
@ -135,6 +107,34 @@ module VX_sp_ram #(
end
assign rdata = rdata_r;
end
end else if (RDW_MODE == "R") begin : g_read_first
if (WRENW != 1) begin : g_wren
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
`RAM_WRITE_WREN
end
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end else begin : g_no_wren
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
ram[addr] <= wdata;
end
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end
end else if (RDW_MODE == "N") begin : g_no_change
if (WRENW != 1) begin : g_wren
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
@ -165,7 +165,7 @@ module VX_sp_ram #(
end
assign rdata = rdata_r;
end
end else if (RDW_MODE == "U") begin : g_unknown
end else if (RDW_MODE == "U") begin : g_undefined
if (WRENW != 1) begin : g_wren
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
@ -195,35 +195,7 @@ module VX_sp_ram #(
end
end
end else begin : g_auto
if (RDW_MODE == "R") begin : g_read_first
if (WRENW != 1) begin : g_wren
`RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
`RAM_WRITE_WREN
end
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end else begin : g_no_wren
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
ram[addr] <= wdata;
end
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end
end else if (RDW_MODE == "W") begin : g_write_first
if (RDW_MODE == "W") begin : g_write_first
if (WRENW != 1) begin : g_wren
`RAM_ARRAY_WREN
`RAM_INITIALIZATION
@ -253,6 +225,34 @@ module VX_sp_ram #(
end
assign rdata = rdata_r;
end
end else if (RDW_MODE == "R") begin : g_read_first
if (WRENW != 1) begin : g_wren
`RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
`RAM_WRITE_WREN
end
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end else begin : g_no_wren
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
ram[addr] <= wdata;
end
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end
end else if (RDW_MODE == "N") begin : g_no_change
if (WRENW != 1) begin : g_wren
`RAM_ARRAY_WREN
@ -283,7 +283,7 @@ module VX_sp_ram #(
end
assign rdata = rdata_r;
end
end else if (RDW_MODE == "U") begin : g_unknown
end else if (RDW_MODE == "U") begin : g_undefined
if (WRENW != 1) begin : g_wren
`RAM_ARRAY_WREN
`RAM_INITIALIZATION
@ -316,30 +316,32 @@ module VX_sp_ram #(
end else begin : g_async
`UNUSED_VAR (read)
if (FORCE_BRAM) begin : g_bram
`ifdef VIVADO
VX_async_ram_patch #(
.DATAW (DATAW),
.SIZE (SIZE),
.WRENW (WRENW),
.DUAL_PORT (0),
.FORCE_BRAM (FORCE_BRAM),
.WRITE_FIRST(RDW_MODE == "W"),
.INIT_ENABLE(INIT_ENABLE),
.INIT_FILE (INIT_FILE),
.INIT_VALUE (INIT_VALUE)
) async_ram_patch (
.clk (clk),
.reset (reset),
.read (read),
.write (write),
.wren (wren),
.waddr (addr),
.wdata (wdata),
.raddr (addr),
.rdata (rdata)
);
`else
if (RDW_MODE == "W") begin : g_write_first
`ifdef VIVADO
VX_async_ram_patch #(
.DATAW (DATAW),
.SIZE (SIZE),
.WRENW (WRENW),
.DUAL_PORT (0),
.INIT_ENABLE(INIT_ENABLE),
.INIT_FILE (INIT_FILE),
.INIT_VALUE (INIT_VALUE)
) async_ram_patch (
.clk (clk),
.reset (reset),
.read (read),
.write (write),
.wren (wren),
.waddr (addr),
.wdata (wdata),
.raddr (addr),
.rdata (rdata)
);
`else
if (WRENW != 1) begin : g_wren
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
@ -348,7 +350,7 @@ module VX_sp_ram #(
end
assign rdata = ram[addr];
end else begin : g_no_wren
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
@ -357,7 +359,6 @@ module VX_sp_ram #(
end
assign rdata = ram[addr];
end
`endif
end else begin : g_read_first
if (WRENW != 1) begin : g_wren
`NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN
@ -379,10 +380,11 @@ module VX_sp_ram #(
assign rdata = ram[addr];
end
end
`endif
end else begin : g_auto
if (RDW_MODE == "W") begin : g_write_first
if (WRENW != 1) begin : g_wren
`RAM_ARRAY_WREN
`RW_RAM_CHECK `RAM_ARRAY_WREN
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
@ -391,7 +393,7 @@ module VX_sp_ram #(
end
assign rdata = ram[addr];
end else begin : g_no_wren
reg [DATAW-1:0] ram [0:SIZE-1];
`RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
@ -443,15 +445,7 @@ module VX_sp_ram #(
end
if (OUT_REG) begin : g_sync
if (RDW_MODE == "R") begin : g_read_first
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end else if (RDW_MODE == "W") begin : g_write_first
if (RDW_MODE == "W") begin : g_write_first
reg [ADDRW-1:0] addr_r;
always @(posedge clk) begin
if (read || write) begin
@ -459,6 +453,14 @@ module VX_sp_ram #(
end
end
assign rdata = ram[addr_r];
end else if (RDW_MODE == "R") begin : g_read_first
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end else if (RDW_MODE == "N") begin : g_no_change
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin

View file

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// A stream elastic buffer operates at full-bandwidth where fire_in and fire_out can happen simultaneously
// A stream elastic buffer_r operates at full-bandwidth where fire_in and fire_out can happen simultaneously
// It has the following benefits:
// + full-bandwidth throughput
// + ready_in and ready_out are decoupled
@ -45,79 +45,66 @@ module VX_stream_buffer #(
assign valid_out = valid_in;
assign data_out = data_in;
end else if (OUT_REG != 0) begin : g_out_reg
end else begin : g_buffer
reg [DATAW-1:0] data_out_r;
reg [DATAW-1:0] buffer;
reg valid_out_r;
reg no_buffer;
reg [DATAW-1:0] data_out_r, buffer_r;
reg valid_out_r, valid_in_r;
wire fire_in = valid_in && ready_in;
wire flow_out = ready_out || ~valid_out;
always @(posedge clk) begin
if (reset) begin
valid_out_r <= 0;
no_buffer <= 1;
end else begin
if (flow_out) begin
no_buffer <= 1;
end else if (valid_in) begin
no_buffer <= 0;
end
if (flow_out) begin
valid_out_r <= valid_in || ~no_buffer;
end
valid_in_r <= 1'b1;
end else if (valid_in || flow_out) begin
valid_in_r <= flow_out;
end
end
always @(posedge clk) begin
if (fire_in) begin
buffer <= data_in;
end
if (flow_out) begin
data_out_r <= no_buffer ? data_in : buffer;
end
end
assign ready_in = no_buffer;
assign valid_out = valid_out_r;
assign data_out = data_out_r;
end else begin : g_no_out_reg
reg [1:0][DATAW-1:0] shift_reg;
reg [1:0] fifo_state, fifo_state_n;
wire fire_in = valid_in && ready_in;
wire fire_out = valid_out && ready_out;
always @(*) begin
case ({fire_in, fire_out})
2'b10: fifo_state_n = {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10
2'b01: fifo_state_n = {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00
default: fifo_state_n = fifo_state;
endcase
end
always @(posedge clk) begin
if (reset) begin
fifo_state <= 2'b00;
end else begin
fifo_state <= fifo_state_n;
valid_out_r <= 1'b0;
end else if (flow_out) begin
valid_out_r <= valid_in || ~valid_in_r;
end
end
always @(posedge clk) begin
if (fire_in) begin
shift_reg[1] <= shift_reg[0];
shift_reg[0] <= data_in;
if (OUT_REG != 0) begin : g_out_reg
always @(posedge clk) begin
if (fire_in) begin
buffer_r <= data_in;
end
end
always @(posedge clk) begin
if (flow_out) begin
data_out_r <= valid_in_r ? data_in : buffer_r;
end
end
assign data_out = data_out_r;
end else begin : g_no_out_reg
always @(posedge clk) begin
if (fire_in) begin
data_out_r <= data_in;
end
end
always @(posedge clk) begin
if (fire_in) begin
buffer_r <= data_out_r;
end
end
assign data_out = valid_in_r ? data_out_r : buffer_r;
end
assign ready_in = ~fifo_state[1];
assign valid_out = fifo_state[0];
assign data_out = shift_reg[fifo_state[1]];
assign valid_out = valid_out_r;
assign ready_in = valid_in_r;
end

View file

@ -1,3 +1,16 @@
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
namespace eval vortex {
variable debug 0
@ -17,6 +30,25 @@ proc str_replace {str match repl} {
return $result
}
proc regex_escape {str} {
return [string map {
\\ \\\\
^ \\^
. \\.
\[ \\\[
\] \\\]
\$ \\\$
\( \\\(
\) \\\)
| \\|
* \\*
+ \\+
? \\?
\{ \\\{
\} \\\}
} $str]
}
proc unique_cell_name {name} {
if {[get_cells -quiet $name] == {}} { return $name }
set index 0
@ -31,31 +63,60 @@ proc unique_net_name {name} {
return ${name}_${index}
}
proc find_nested_cells {parent name_match {should_exist 1}} {
proc build_parent_child_map {all_cells} {
set parent_child_map {}
foreach cell $all_cells {
set parent [get_property PARENT $cell]
if {$parent ne ""} {
if {[dict exists $parent_child_map $parent]} {
dict lappend parent_child_map $parent $cell
} else {
dict set parent_child_map $parent [list $cell]
}
}
}
return $parent_child_map
}
proc find_cell_descendants_recursive {parent_cell parent_child_map} {
set descendants {}
if {[dict exists $parent_child_map $parent_cell]} {
set children [dict get $parent_child_map $parent_cell]
foreach child $children {
# Add the child to the list
lappend descendants $child
# Recursively add its descendants
set sub_descendants [find_cell_descendants_recursive $child $parent_child_map]
lappend descendants {*}$sub_descendants
}
}
return $descendants
}
proc find_cell_descendants {parent_cell} {
set all_cells [get_cells -hierarchical]
set parent_child_map [build_parent_child_map $all_cells]
return [find_cell_descendants_recursive $parent_cell $parent_child_map]
}
proc find_nested_cells {parent_cell name_match {should_exist 1}} {
set hier_sep [get_hierarchy_separator]
set matching_cells {}
foreach cell [get_cells -hierarchical -include_replicated_objects -filter "PARENT == $parent"] {
set name [get_property NAME $cell]
if {[regexp $name_match $name]} {
foreach cell [find_cell_descendants $parent_cell] {
set parent_name [get_property PARENT $cell]
set cell_name [get_property NAME $cell]
set name_prefix [regex_escape "${parent_name}${hier_sep}"]
set pattern "${name_prefix}${name_match}"
if {[regexp $pattern $cell_name]} {
lappend matching_cells $cell
}
}
if {[llength $matching_cells] == 0} {
print_error "No matching cell found for '$parent' matching '$name_match'." $should_exist
print_error "No matching cell found for '$parent_cell' matching '$name_match'." $should_exist
}
return $matching_cells
}
proc find_nested_cell {parent name_match} {
foreach cell [get_cells -hierarchical -filter "PARENT == $parent"] {
set name [get_property NAME $cell]
if {$name == $name_match} {
return $cell
}
}
puts "ERROR: No matching cell found for '$parent' matching '$name_match'."
exit -1
}
proc find_cell_nets {cell name_match {should_exist 1}} {
set matching_nets {}
foreach net [get_nets -hierarchical -filter "PARENT_CELL == $cell"] {
@ -70,22 +131,23 @@ proc find_cell_nets {cell name_match {should_exist 1}} {
return $matching_nets
}
proc get_cell_net {cell name_match} {
foreach net [get_nets -hierarchical -filter "PARENT_CELL == $cell"] {
set name [get_property NAME $net]
if {$name == $name_match} {
return $net
}
proc get_cell_net {cell name} {
set net [get_nets -hierarchical -filter "PARENT_CELL == $cell && NAME == $name"]
if {[llength $net] == 0} {
puts "ERROR: No matching net found for '$cell' matching '$name'."
exit -1
}
puts "ERROR: No matching net found for '$cell' matching '$name_match'."
exit -1
return $net;
}
proc find_cell_pins {cell name_match {should_exist 1}} {
set hier_sep [get_hierarchy_separator]
set matching_pins {}
foreach pin [get_pins -of_objects $cell] {
set name [get_property NAME $pin]
if {[regexp $name_match $name]} {
set name_prefix [regex_escape "${cell}${hier_sep}"]
set pattern "${name_prefix}${name_match}"
if {[regexp $pattern $name]} {
lappend matching_pins $pin
}
}
@ -95,15 +157,31 @@ proc find_cell_pins {cell name_match {should_exist 1}} {
return $matching_pins
}
proc get_cell_pin {cell name_match} {
foreach pin [get_pins -of_objects $cell] {
set name [get_property NAME $pin]
if {$name == $name_match} {
return $pin
}
proc get_cell_pin {cell name} {
set pin [get_pins -of_objects $cell -filter "NAME == $name"]
if {[llength $pin] == 0} {
puts "ERROR: No matching pin found for '$cell' matching '$name'."
exit -1
}
puts "ERROR: No matching pin found for '$cell' matching '$name_match'."
exit -1
return $pin
}
proc remove_cell_from_netlist {cell} {
variable debug
puts "INFO: Removing cell '$cell' from the netlist."
# Disconnect all pins of the cell
#foreach pin [get_pins -quiet -of_objects $cell] {
# foreach net [get_nets -quiet -of_objects $pin] {
# disconnect_net -net $net -objects $pin
# if {$debug} {puts "DEBUG: Disconnected net '$net' from pin '$pin'."}
# }
#}
# Remove the cell
remove_cell $cell
if {$debug} {puts "DEBUG: Cell '$cell' was removed successfully."}
}
proc replace_pin_source {pin source_pin} {
@ -141,10 +219,42 @@ proc replace_pin_source {pin source_pin} {
if {$debug} {puts "DEBUG: Connected net '$source_net' to pin '$pin'."}
}
proc create_register_next {reg_cell prefix_name} {
proc find_net_driver {input_net {should_exist 1}} {
set driverPins [get_pins -quiet -leaf -of_objects $input_net -filter {DIRECTION == "OUT"}]
if {[llength $driverPins] == 0} {
set driverPorts [get_ports -quiet -of_objects $input_net -filter {DIRECTION == "IN"}]
if {[llength $driverPorts] == 0} {
print_error "No driver found for '$input_net'." $should_exist
} elseif {[llength $driverPorts] > 1} {
puts "WARNING: Multiple driver ports found for '$input_net'."
return [lindex $driverPorts 0]
}
return $driverPorts
} elseif {[llength $driverPins] > 1} {
puts "WARNING: Multiple driver pins found for '$input_net'."
return [lindex $driverPins 0]
}
return $driverPins
}
proc find_pin_driver {input_pin {should_exist 1}} {
set net [get_nets -quiet -of_objects $input_pin]
if {[llength $net] == 0} {
print_error "No net connected to pin '$input_pin'." $should_exist
return ""
} elseif {[llength $net] > 1} {
puts "ERROR: Multiple nets connected to pin '$input_pin'."
exit -1
}
return [find_net_driver $net]
}
proc create_register_next {parent reg_cell} {
variable debug
set reg_d_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/D"}]
set hier_sep [get_hierarchy_separator]
set reg_d_pin [get_pins "${reg_cell}${hier_sep}D"]
if {[llength $reg_d_pin] == 0} {
puts "ERROR: No D pin found on register cell '$reg_cell'."
exit -1
@ -167,7 +277,7 @@ proc create_register_next {reg_cell prefix_name} {
set register_type [get_property REF_NAME $reg_cell]
if {$register_type == "FDRE"} {
set reg_r_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/R"}]
set reg_r_pin [get_pins "${reg_cell}${hier_sep}R"]
if {[llength $reg_r_pin] == 0} {
puts "ERROR: No R pin found on FDRE cell '$reg_cell'."
exit -1
@ -184,7 +294,7 @@ proc create_register_next {reg_cell prefix_name} {
exit -1
}
} elseif {$register_type == "FDSE"} {
set reg_s_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/S"}]
set reg_s_pin [get_pins "${reg_cell}${hier_sep}S"]
if {[llength $reg_s_pin] == 0} {
puts "ERROR: No S pin found on FDSE cell '$reg_cell'."
exit -1
@ -229,7 +339,7 @@ proc create_register_next {reg_cell prefix_name} {
# Use a 2x1 LUT to describe the logic:
# FDRE: O = I1 ? 0 : I0; where I0=D, I1=R
# FDSE: O = I1 ? 1 : I0; where I0=D, I1=S
set lut_name [unique_cell_name $prefix_name]
set lut_name [unique_cell_name "${parent}${hier_sep}raddr_next"]
set lut_cell [create_cell -reference LUT2 $lut_name]
puts "INFO: Created lut cell: '$lut_cell'"
@ -242,7 +352,7 @@ proc create_register_next {reg_cell prefix_name} {
exit 1
}
set lut_i0_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/I0"}]
set lut_i0_pin [get_pins "${lut_cell}${hier_sep}I0"]
if {[llength $lut_i0_pin] == 0} {
puts "ERROR: No I0 pin found on FDSE cell '$lut_cell'."
exit -1
@ -251,7 +361,7 @@ proc create_register_next {reg_cell prefix_name} {
exit -1
}
set lut_i1_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/I1"}]
set lut_i1_pin [get_pins "${lut_cell}${hier_sep}I1"]
if {[llength $lut_i1_pin] == 0} {
puts "ERROR: No I1 pin found on FDSE cell '$lut_cell'."
exit -1
@ -260,7 +370,7 @@ proc create_register_next {reg_cell prefix_name} {
exit -1
}
set lut_o_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/O"}]
set lut_o_pin [get_pins "${lut_cell}${hier_sep}O"]
if {[llength $lut_o_pin] == 0} {
puts "ERROR: No O pin found on FDSE cell '$lut_cell'."
exit -1
@ -278,19 +388,22 @@ proc create_register_next {reg_cell prefix_name} {
return $lut_o_pin
}
proc getOrCreateVCCPin {prefix_name} {
proc getOrCreateVCCPin {parent} {
variable debug
set vcc_cell ""
set vcc_cells [get_cells -quiet -filter {REF_NAME == VCC}]
if {[llength $vcc_cells] == 0} {
set cell_name [unique_cell_name $prefix_name]
set hier_sep [get_hierarchy_separator]
set cell_name "${parent}${hier_sep}VCC"
set vcc_cell [get_cells -quiet $cell_name]
if {[llength $vcc_cell] == 0} {
set vcc_cell [create_cell -reference VCC $cell_name]
puts "INFO: Created VCC cell: '$vcc_cell'"
} else {
set vcc_cell [lindex $vcc_cells 0]
} elseif {[llength $vcc_cell] > 1} {
puts "ERROR: Multiple VCC cells found with name '$cell_name'."
exit -1
}
set vcc_pin [get_pins -of_objects $vcc_cell -filter {NAME =~ "*/P"}]
set vcc_pin [get_pins "${vcc_cell}${hier_sep}P"]
if {[llength $vcc_pin] == 0} {
puts "ERROR: No VCC pin found on VCC cell '$vcc_cell'."
exit -1
@ -298,22 +411,26 @@ proc getOrCreateVCCPin {prefix_name} {
puts "ERROR: Multiple VCC pins found on VCC cell '$vcc_cell'."
exit -1
}
return $vcc_pin
}
proc getOrCreateGNDPin {prefix_name} {
proc getOrCreateGNDPin {parent} {
variable debug
set gnd_cell ""
set gnd_cells [get_cells -quiet -filter {REF_NAME == GND}]
if {[llength $gnd_cells] == 0} {
set cell_name [unique_cell_name $prefix_name]
set hier_sep [get_hierarchy_separator]
set cell_name "${parent}${hier_sep}GND"
set gnd_cell [get_cells -quiet $cell_name]
if {[llength $gnd_cell] == 0} {
set gnd_cell [create_cell -reference GND $cell_name]
puts "INFO: Created GND cell: '$gnd_cell'"
} else {
set gnd_cell [lindex $gnd_cells 0]
} elseif {[llength $gnd_cell] > 1} {
puts "ERROR: Multiple GND cells found with name '$cell_name'."
exit -1
}
set gnd_pin [get_pins -of_objects $gnd_cell -filter {NAME =~ "*/G"}]
set gnd_pin [get_pins "${gnd_cell}${hier_sep}G"]
if {[llength $gnd_pin] == 0} {
puts "ERROR: No GND pin found on GND cell '$gnd_cell'."
exit -1
@ -321,6 +438,7 @@ proc getOrCreateGNDPin {prefix_name} {
puts "ERROR: Multiple GND pins found on GND cell '$gnd_cell'."
exit -1
}
return $gnd_pin
}
@ -338,35 +456,6 @@ proc find_net_sinks {input_net {should_exist 1}} {
return $sink_pins
}
proc find_net_driver {input_net {should_exist 1}} {
set driverPins [get_pins -quiet -leaf -of_objects $input_net -filter {DIRECTION == "OUT"}]
if {[llength $driverPins] == 0} {
set driverPorts [get_ports -quiet -of_objects $input_net -filter {DIRECTION == "IN"}]
if {[llength $driverPorts] == 0} {
print_error "No driver found for '$input_net'." $should_exist
} elseif {[llength $driverPorts] > 1} {
puts "WARNING: Multiple driver ports found for '$input_net'."
return [lindex $driverPorts 0]
}
return $driverPorts
} elseif {[llength $driverPins] > 1} {
puts "WARNING: Multiple driver pins found for '$input_net'."
return [lindex $driverPins 0]
}
return $driverPins
}
proc find_pin_driver {input_pin {should_exist 1}} {
set net [get_nets -quiet -of_objects $input_pin]
if {[llength $net] == 0} {
print_error "No net connected to pin '$input_pin'." $should_exist
} elseif {[llength $net] > 1} {
puts "ERROR: Multiple nets connected to pin '$input_pin'."
exit -1
}
return [find_net_driver $net]
}
proc find_matching_nets {cell nets match repl} {
set matching_nets {}
foreach net $nets {
@ -386,6 +475,25 @@ proc find_matching_nets {cell nets match repl} {
return $matching_nets
}
proc find_matching_pins {cell pins match repl} {
set matching_pins {}
foreach pin $pins {
set pin_name [str_replace $pin $match $repl]
set matching_pin [get_cell_pin $cell $pin_name]
if {$matching_pin != ""} {
lappend matching_pins $matching_pin
}
}
if {[llength $matching_pins] == 0} {
puts "ERROR: No matching pins found for '$pins'."
exit -1
} elseif {[llength $matching_pins] != [llength $pins]} {
puts "ERROR: Mismatch in number of matching pins."
exit -1
}
return $matching_pins
}
proc replace_net_source {net source_pin} {
foreach pin [find_net_sinks $net 0] {
replace_pin_source $pin $source_pin
@ -397,6 +505,8 @@ proc resolve_async_bram {inst} {
puts "INFO: Resolving asynchronous BRAM patch: '$inst'."
set hier_sep [get_hierarchy_separator]
set raddr_w_nets [find_cell_nets $inst "raddr_w(\\\[\\d+\\\])?$"]
set read_s_net [find_cell_nets $inst "read_s$"]
set is_raddr_reg_net [find_cell_nets $inst "is_raddr_reg$"]
@ -433,7 +543,7 @@ proc resolve_async_bram {inst} {
}
# Create register next cell and return output pin
set reg_next_pin [create_register_next $raddr_src_cell "$inst/raddr_next"]
set reg_next_pin [create_register_next $inst $raddr_src_cell]
if {$reg_next_pin == ""} {
puts "ERROR: failed to create register next value for '$raddr_src_cell'."
exit -1
@ -444,7 +554,7 @@ proc resolve_async_bram {inst} {
# Find the CE pin on raddr_src_cell
if {$reg_ce_src_pin == ""} {
set reg_ce_pin [get_pins -of_objects $raddr_src_cell -filter {NAME =~ "*/CE"}]
set reg_ce_pin [get_pins "${raddr_src_cell}${hier_sep}CE"]
if {[llength $reg_ce_pin] == 0} {
puts "ERROR: No CE pin found on register cell '$raddr_src_cell'."
exit -1
@ -466,9 +576,10 @@ proc resolve_async_bram {inst} {
# do we have a fully registered read address?
if {[llength $reg_next_pins] == [llength $raddr_w_nets]} {
puts "INFO: Fully registered read address detected."
# Connect all reg_next_pins to all input pins attached to raddr_s_nets
set addr_width [llength $raddr_w_nets]
for {set addr_idx 0} {$addr_idx < $addr_width} {incr addr_idx} {
set raddr_w_net [lindex $raddr_w_nets $addr_idx]
set raddr_s_net [lindex $raddr_s_nets $addr_idx]
set reg_next_pin [lindex $reg_next_pins $addr_idx]
puts "INFO: Connecting pin '$reg_next_pin' to '$raddr_s_net's pins."
@ -481,26 +592,35 @@ proc resolve_async_bram {inst} {
replace_net_source $read_s_net $reg_ce_src_pin
# Create Const<1>'s pin
set vcc_pin [getOrCreateVCCPin "$inst/VCC"]
set vcc_pin [getOrCreateVCCPin $inst]
# Connect vcc_pin to all input pins attached to is_raddr_reg_net
puts "INFO: Connecting pin '$vcc_pin' to '$is_raddr_reg_net's pins."
replace_net_source $is_raddr_reg_net $vcc_pin
# Remove all async_ram cells
foreach cell [find_nested_cells $inst "g_async_ram.*" 0] {
remove_cell_from_netlist $cell
}
} else {
puts "WARNING: Not all read addresses are registered!"
# Create Const<0>'s pin
set gnd_pin [getOrCreateGNDPin "$inst/GND"]
set gnd_pin [getOrCreateGNDPin $inst]
# Connect gnd_pin to all input pins attached to is_raddr_reg_net
puts "INFO: Connecting pin '$gnd_pin' to '$is_raddr_reg_net's pins."
replace_net_source $is_raddr_reg_net $gnd_pin
# Remove all sync_ram cells
foreach cell [find_nested_cells $inst "g_sync_ram.*" 0] {
remove_cell_from_netlist $cell
}
}
# Remove all placeholder cells
# Remove placeholder cell
foreach cell [find_nested_cells $inst "placeholder$"] {
remove_cell $cell
if {$debug} {puts "DEBUG: Cell '$cell' was removed successfully."}
remove_cell_from_netlist $cell
}
}
@ -519,7 +639,26 @@ proc resolve_async_brams {} {
}
}
proc dump_async_bram_cells {} {
set bram_patch_cells [get_cells -hierarchical -filter {REF_NAME =~ "*VX_async_ram_patch*"}]
if {[llength $bram_patch_cells] != 0} {
foreach cell $bram_patch_cells {
puts "INFO: Found async BRAM patch cell: '$cell'."
set child_cells [find_cell_descendants $cell]
foreach child $child_cells {
set type [get_property REF_NAME $child]
puts "INFO: child cell: '$child', type: '$type'"
}
}
} else {
puts "INFO: No async BRAM patch cells found in the design."
}
}
}
# Invoke the procedure to resolve async BRAM
vortex::resolve_async_brams
# dump async bram cells
#vortex::dump_async_bram_cells

View file

@ -1,3 +1,16 @@
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Function to export netlist to a Graphviz DOT file
proc export_netlist {dot_file_name} {
# Open the DOT file for writing

View file

@ -47,6 +47,9 @@ TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make chipscope
# analyze build report
vitis_analyzer build_xilinx_u50_gen3x16_xdma_5_202210_1_hw_4c/bin/vortex_afu.xclbin.link_summary
# resuming build for routing
TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 VPP_FLAGS="--from_step vpl.impl.route_design" make > build.log 2>&1 &
# running test
FPGA_BIN_DIR=<bin_dir> TARGET=hw_emu ./ci/blackbox.sh --driver=xrt --app=demo
FPGA_BIN_DIR=<bin_dir> TARGET=hw ./ci/blackbox.sh --driver=xrt --app=demo

View file

@ -180,6 +180,7 @@ ifeq ($(TARGET), hw)
cp $(BUILD_DIR)/_x/logs/link/vivado.log $(BUILD_DIR)/bin
cp $(BUILD_DIR)/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log $(BUILD_DIR)/bin
cp $(BUILD_DIR)/_x/reports/link/syn/ulp_vortex_afu_1_0_synth_1_ulp_vortex_afu_1_0_utilization_synth.rpt $(BUILD_DIR)/bin
cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_utilization_placed.rpt $(BUILD_DIR)/bin
cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin
endif

View file

@ -78,10 +78,10 @@ public:
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
_value = PLATFORM_MEMORY_BANKS;
break;
case VX_CAPS_MEM_BANK_SIZE:
_value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS);
_value = 1ull << (MEM_ADDR_WIDTH / PLATFORM_MEMORY_BANKS);
break;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;

View file

@ -65,7 +65,7 @@ public:
~vx_device() {
#ifdef VM_ENABLE
global_mem_.release(PAGE_TABLE_BASE_ADDR);
// for (auto i = addr_mapping.begin(); i != addr_mapping.end(); i++)
// for (auto i = addr_mapping.begin(); i != addr_mapping.end(); i++)
// page_table_mem_->release(i->second << MEM_PAGE_SIZE);
delete virtual_mem_;
delete page_table_mem_;
@ -113,10 +113,10 @@ public:
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
_value = PLATFORM_MEMORY_BANKS;
break;
case VX_CAPS_MEM_BANK_SIZE:
_value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS);
_value = 1ull << (MEM_ADDR_WIDTH / PLATFORM_MEMORY_BANKS);
break;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;
@ -164,7 +164,7 @@ public:
if ((STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)))
return 0;
// Now all conditions are not met. Return true because the address needs translation
// Now all conditions are not met. Return true because the address needs translation
return 1;
}
@ -277,7 +277,7 @@ public:
#ifdef VM_ENABLE
uint64_t pAddr = page_table_walk(dest_addr);
// uint64_t pAddr;
// try {
// try {
// pAddr = page_table_walk(dest_addr);
// } catch ( Page_Fault_Exception ) {
// // HW: place holder
@ -466,18 +466,18 @@ public:
CHECK_ERR(virtual_mem_reserve(STARTUP_ADDR, 0x40000, VX_MEM_READ_WRITE), {
return err;
});
if (virtual_mem_ == nullptr) {
// virtual_mem_ does not intefere with physical mem, so no need to free space
return 1;
}
if (VM_ADDR_MODE == BARE)
DBGPRINT("[RT:init_VM] VA_MODE = BARE MODE(addr= 0x0)");
else
CHECK_ERR(alloc_page_table(&pt_addr),{return err;});
CHECK_ERR(processor_.set_satp_by_addr(pt_addr),{return err;});
return 0;
}
@ -604,7 +604,7 @@ public:
}
else
{
// Leaf node found.
// Leaf node found.
// Check RWX permissions according to access type.
if (pte.r == 0)
{

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -27,9 +27,9 @@ class SimObjectBase;
///////////////////////////////////////////////////////////////////////////////
class SimPortBase {
public:
public:
virtual ~SimPortBase() {}
SimObjectBase* module() const {
return module_;
}
@ -92,7 +92,7 @@ public:
auto cycles = queue_.front().cycles;
queue_.pop();
return cycles;
}
}
void tx_callback(const TxCallback& callback) {
tx_cb_ = callback;
@ -137,7 +137,7 @@ public:
typedef std::shared_ptr<SimEventBase> Ptr;
virtual ~SimEventBase() {}
virtual void fire() const = 0;
uint64_t cycles() const {
@ -161,7 +161,7 @@ public:
typedef std::function<void (const Pkt&)> Func;
SimCallEvent(const Func& func, const Pkt& pkt, uint64_t cycles)
SimCallEvent(const Func& func, const Pkt& pkt, uint64_t cycles)
: SimEventBase(cycles)
, func_(func)
, pkt_(pkt)
@ -194,8 +194,8 @@ public:
const_cast<SimPort<Pkt>*>(port_)->transfer(pkt_, cycles_);
}
SimPortEvent(const SimPort<Pkt>* port, const Pkt& pkt, uint64_t cycles)
: SimEventBase(cycles)
SimPortEvent(const SimPort<Pkt>* port, const Pkt& pkt, uint64_t cycles)
: SimEventBase(cycles)
, port_(port)
, pkt_(pkt)
{}
@ -209,7 +209,7 @@ public:
}
protected:
const SimPort<Pkt>* port_;
const SimPort<Pkt>* port_;
Pkt pkt_;
static MemoryPool<SimPortEvent<Pkt>> allocator_;
@ -230,11 +230,11 @@ public:
const std::string& name() const {
return name_;
}
}
protected:
SimObjectBase(const SimContext& ctx, const char* name);
SimObjectBase(const SimContext& ctx, const std::string& name);
private:
@ -259,8 +259,8 @@ public:
protected:
SimObject(const SimContext& ctx, const char* name)
: SimObjectBase(ctx, name)
SimObject(const SimContext& ctx, const std::string& name)
: SimObjectBase(ctx, name)
{}
private:
@ -283,9 +283,9 @@ private:
};
class SimContext {
private:
private:
SimContext() {}
friend class SimPlatform;
};
@ -320,10 +320,10 @@ public:
template <typename Pkt>
void schedule(const typename SimCallEvent<Pkt>::Func& callback,
const Pkt& pkt,
uint64_t delay) {
const Pkt& pkt,
uint64_t delay) {
assert(delay != 0);
auto evt = std::make_shared<SimCallEvent<Pkt>>(callback, pkt, cycles_ + delay);
auto evt = std::make_shared<SimCallEvent<Pkt>>(callback, pkt, cycles_ + delay);
events_.emplace_back(evt);
}
@ -341,10 +341,10 @@ public:
auto evt_it_end = events_.end();
while (evt_it != evt_it_end) {
auto& event = *evt_it;
if (cycles_ >= event->cycles()) {
if (cycles_ >= event->cycles()) {
event->fire();
evt_it = events_.erase(evt_it);
} else {
} else {
++evt_it;
}
}
@ -352,7 +352,7 @@ public:
for (auto& object : objects_) {
object->do_tick();
}
// advance clock
// advance clock
++cycles_;
}
@ -390,8 +390,8 @@ private:
///////////////////////////////////////////////////////////////////////////////
inline SimObjectBase::SimObjectBase(const SimContext&, const char* name)
: name_(name)
inline SimObjectBase::SimObjectBase(const SimContext&, const std::string& name)
: name_(name)
{}
template <typename Impl>
@ -403,8 +403,8 @@ typename SimObject<Impl>::Ptr SimObject<Impl>::Create(Args&&... args) {
template <typename Pkt>
void SimPort<Pkt>::push(const Pkt& pkt, uint64_t delay) const {
if (peer_ && !tx_cb_) {
reinterpret_cast<const SimPort<Pkt>*>(peer_)->push(pkt, delay);
reinterpret_cast<const SimPort<Pkt>*>(peer_)->push(pkt, delay);
} else {
SimPlatform::instance().schedule(this, pkt, delay);
}
}
}

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -47,7 +47,7 @@ public:
, indent_(indent, ' ')
, owner_(nullptr)
{}
explicit IndentStream(std::ostream& dest, int indent = 4)
: dest_(dest.rdbuf())
, isBeginLine_(true)
@ -76,3 +76,14 @@ private:
std::string indent_;
std::ostream* owner_;
};
template <typename... Args>
std::string StrFormat(const std::string& fmt, Args... args) {
auto size = std::snprintf(nullptr, 0, fmt.c_str(), args...) + 1;
if (size <= 0) {
throw std::runtime_error("Error during formatting.");
}
std::vector<char> buf(size);
std::snprintf(buf.data(), size, fmt.c_str(), args...);
return std::string(buf.data(), buf.data() + size - 1);
}

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -21,81 +21,77 @@ class CacheCluster : public SimObject<CacheCluster> {
public:
std::vector<std::vector<SimPort<MemReq>>> CoreReqPorts;
std::vector<std::vector<SimPort<MemRsp>>> CoreRspPorts;
SimPort<MemReq> MemReqPort;
SimPort<MemRsp> MemRspPort;
std::vector<SimPort<MemReq>> MemReqPorts;
std::vector<SimPort<MemRsp>> MemRspPorts;
CacheCluster(const SimContext& ctx,
const char* name,
uint32_t num_inputs,
uint32_t num_caches,
uint32_t num_requests,
const CacheSim::Config& cache_config)
CacheCluster(const SimContext& ctx,
const char* name,
uint32_t num_inputs,
uint32_t num_units,
const CacheSim::Config& cache_config)
: SimObject(ctx, name)
, CoreReqPorts(num_inputs, std::vector<SimPort<MemReq>>(num_requests, this))
, CoreRspPorts(num_inputs, std::vector<SimPort<MemRsp>>(num_requests, this))
, MemReqPort(this)
, MemRspPort(this)
, caches_(MAX(num_caches, 0x1)) {
, CoreReqPorts(num_inputs, std::vector<SimPort<MemReq>>(cache_config.num_inputs, this))
, CoreRspPorts(num_inputs, std::vector<SimPort<MemRsp>>(cache_config.num_inputs, this))
, MemReqPorts(cache_config.mem_ports, this)
, MemRspPorts(cache_config.mem_ports, this)
, caches_(MAX(num_units, 0x1)) {
CacheSim::Config cache_config2(cache_config);
if (0 == num_caches) {
num_caches = 1;
if (0 == num_units) {
num_units = 1;
cache_config2.bypass = true;
}
char sname[100];
std::vector<MemSwitch::Ptr> input_arbs(num_inputs);
for (uint32_t j = 0; j < num_inputs; ++j) {
snprintf(sname, 100, "%s-input-arb%d", name, j);
input_arbs.at(j) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_requests, cache_config.num_inputs);
for (uint32_t i = 0; i < num_requests; ++i) {
this->CoreReqPorts.at(j).at(i).bind(&input_arbs.at(j)->ReqIn.at(i));
input_arbs.at(j)->RspIn.at(i).bind(&this->CoreRspPorts.at(j).at(i));
}
}
std::vector<MemSwitch::Ptr> mem_arbs(cache_config.num_inputs);
// Arbitrate incoming core interfaces
std::vector<MemArbiter::Ptr> input_arbs(cache_config.num_inputs);
for (uint32_t i = 0; i < cache_config.num_inputs; ++i) {
snprintf(sname, 100, "%s-mem-arb%d", name, i);
mem_arbs.at(i) = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_inputs, num_caches);
snprintf(sname, 100, "%s-input-arb%d", name, i);
input_arbs.at(i) = MemArbiter::Create(sname, ArbiterType::RoundRobin, num_inputs, num_units);
for (uint32_t j = 0; j < num_inputs; ++j) {
input_arbs.at(j)->ReqOut.at(i).bind(&mem_arbs.at(i)->ReqIn.at(j));
mem_arbs.at(i)->RspIn.at(j).bind(&input_arbs.at(j)->RspOut.at(i));
this->CoreReqPorts.at(j).at(i).bind(&input_arbs.at(i)->ReqIn.at(j));
input_arbs.at(i)->RspIn.at(j).bind(&this->CoreRspPorts.at(j).at(i));
}
}
snprintf(sname, 100, "%s-cache-arb", name);
auto cache_arb = MemSwitch::Create(sname, ArbiterType::RoundRobin, num_caches, 1);
// Arbitrate outgoing memory interfaces
std::vector<MemArbiter::Ptr> mem_arbs(cache_config.mem_ports);
for (uint32_t i = 0; i < cache_config.mem_ports; ++i) {
snprintf(sname, 100, "%s-mem-arb%d", name, i);
mem_arbs.at(i) = MemArbiter::Create(sname, ArbiterType::RoundRobin, num_units, 1);
mem_arbs.at(i)->ReqOut.at(0).bind(&this->MemReqPorts.at(i));
this->MemRspPorts.at(i).bind(&mem_arbs.at(i)->RspOut.at(0));
}
for (uint32_t i = 0; i < num_caches; ++i) {
// Connect caches
for (uint32_t i = 0; i < num_units; ++i) {
snprintf(sname, 100, "%s-cache%d", name, i);
caches_.at(i) = CacheSim::Create(sname, cache_config2);
for (uint32_t j = 0; j < cache_config.num_inputs; ++j) {
mem_arbs.at(j)->ReqOut.at(i).bind(&caches_.at(i)->CoreReqPorts.at(j));
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
input_arbs.at(j)->ReqOut.at(i).bind(&caches_.at(i)->CoreReqPorts.at(j));
caches_.at(i)->CoreRspPorts.at(j).bind(&input_arbs.at(j)->RspOut.at(i));
}
caches_.at(i)->MemReqPorts.at(0).bind(&cache_arb->ReqIn.at(i));
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPorts.at(0));
for (uint32_t j = 0; j < cache_config.mem_ports; ++j) {
caches_.at(i)->MemReqPorts.at(j).bind(&mem_arbs.at(j)->ReqIn.at(i));
mem_arbs.at(j)->RspIn.at(i).bind(&caches_.at(i)->MemRspPorts.at(j));
}
}
cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
this->MemRspPort.bind(&cache_arb->RspOut.at(0));
}
~CacheCluster() {}
void reset() {}
void tick() {}
CacheSim::PerfStats perf_stats() const {
CacheSim::PerfStats perf;
for (auto cache : caches_) {
perf += cache->perf_stats();
}
}
return perf;
}

View file

@ -19,7 +19,6 @@
#include <vector>
#include <list>
#include <queue>
#include <string.h>
using namespace vortex;
@ -305,8 +304,8 @@ private:
Config config_;
params_t params_;
std::vector<bank_t> banks_;
MemSwitch::Ptr bank_switch_;
MemSwitch::Ptr bypass_switch_;
MemArbiter::Ptr bank_arb_;
std::vector<MemArbiter::Ptr> nc_arbs_;
std::vector<SimPort<MemReq>> mem_req_ports_;
std::vector<SimPort<MemRsp>> mem_rsp_ports_;
std::vector<bank_req_t> pipeline_reqs_;
@ -322,88 +321,51 @@ public:
, config_(config)
, params_(config)
, banks_((1 << config.B), {config, params_})
, nc_arbs_(config.mem_ports)
, mem_req_ports_((1 << config.B), simobject)
, mem_rsp_ports_((1 << config.B), simobject)
, pipeline_reqs_((1 << config.B), config.ports_per_bank)
{
char sname[100];
snprintf(sname, 100, "%s-bypass-arb", simobject->name().c_str());
if (config_.bypass) {
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, config_.num_inputs);
snprintf(sname, 100, "%s-bypass-arb", simobject->name().c_str());
auto bypass_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, config_.num_inputs, config_.mem_ports);
for (uint32_t i = 0; i < config_.num_inputs; ++i) {
simobject->CoreReqPorts.at(i).bind(&bypass_switch_->ReqIn.at(i));
bypass_switch_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
simobject->CoreReqPorts.at(i).bind(&bypass_arb->ReqIn.at(i));
bypass_arb->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
}
for (uint32_t i = 0; i < config_.mem_ports; ++i) {
bypass_arb->ReqOut.at(i).bind(&simobject->MemReqPorts.at(i));
simobject->MemRspPorts.at(i).bind(&bypass_arb->RspOut.at(i));
}
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
return;
}
if (strcmp(simobject->name().c_str(), "l3cache")) {
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
// create non-cacheable arbiter
for (uint32_t i = 0; i < config_.mem_ports; ++i) {
snprintf(sname, 100, "%s-nc-arb%d", simobject->name().c_str(), i);
nc_arbs_.at(i) = MemArbiter::Create(sname, ArbiterType::Priority, 2, 1);
}
if (config.B != 0) {
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B));
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) {
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
}
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
} else {
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
}
} else {
// TODO: Change this into a crossbar
uint32_t max = MAX(2, config_.num_inputs);
//printf("%s connecting\n", simobject_->name().c_str());
//3
if (config.B != 0) {
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, max, max);
for (uint32_t i = 0; i < max; ++i) {
//printf("%s connecting input=%d to MemPorts\n", simobject_->name().c_str(), i);
bypass_switch_->ReqOut.at(i).bind(&simobject->MemReqPorts.at(i % (1 << config.B)));
simobject->MemRspPorts.at(i % (1 << config.B)).bind(&bypass_switch_->RspOut.at(i));
}
} else {
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
}
// Connect non-cacheable arbiter output to outgoing memory ports
for (uint32_t i = 0; i < config_.mem_ports; ++i) {
nc_arbs_.at(i)->ReqOut.at(0).bind(&simobject->MemReqPorts.at(i));
simobject->MemRspPorts.at(i).bind(&nc_arbs_.at(i)->RspOut.at(0));
}
if (config.B != 0)
{
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B), (1 << config.B));
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i)
{
//1
//printf("%s Connecting memory ports to bank=%d\n", simobject_->name().c_str(), i);
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
}
//2
if (config_.num_inputs > 1) {
for (uint32_t i = 0; i < max; ++i) {
//printf("%s connecting bank and bypass port=%d\n", simobject_->name().c_str(), i);
bank_switch_->ReqOut.at(i % (1 << config.B)).bind(&bypass_switch_->ReqIn.at(i));
bypass_switch_->RspIn.at(i).bind(&bank_switch_->RspOut.at(i % (1 << config.B)));
}
} else {
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
}
}
else
{
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
}
// Create bank's memory arbiter
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
auto bank_mem_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, (1 << config.B), config_.mem_ports);
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) {
mem_req_ports_.at(i).bind(&bank_mem_arb->ReqIn.at(i));
bank_mem_arb->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
}
// Connect bank's memory arbiter to non-cacheable arbiter's input 0
for (uint32_t i = 0; i < config_.mem_ports; ++i) {
bank_mem_arb->ReqOut.at(i).bind(&nc_arbs_.at(i)->ReqIn.at(0));
nc_arbs_.at(i)->RspIn.at(0).bind(&bank_mem_arb->RspOut.at(i));
}
// calculate cache initialization cycles
@ -434,8 +396,8 @@ public:
}
// handle cache bypasss responses
{
auto& bypass_port = bypass_switch_->RspIn.at(1);
for (uint32_t i = 0, n = config_.mem_ports; i < n; ++i) {
auto& bypass_port = nc_arbs_.at(i)->RspIn.at(1);
if (!bypass_port.empty()) {
auto& mem_rsp = bypass_port.front();
this->processBypassResponse(mem_rsp);
@ -468,7 +430,7 @@ public:
continue;
auto& mem_rsp = mem_rsp_port.front();
DT(3, simobject_->name() << "-bank" << bank_id << " fill-rsp: " << mem_rsp);
DT(3, simobject_->name() << "-bank" << bank_id << "-fill-rsp: " << mem_rsp);
pipeline_req.type = bank_req_t::Fill;
pipeline_req.tag = mem_rsp.tag;
mem_rsp_port.pop();
@ -533,7 +495,7 @@ public:
bank_req.type = bank_req_t::Core;
bank_req.write = core_req.write;
pipeline_req = bank_req;
DT(3, simobject_->name() << " core-req: " << core_req);
DT(3, simobject_->name() << "-core-req: " << core_req);
}
if (core_req.write)
@ -561,21 +523,22 @@ private:
uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs;
MemRsp core_rsp{tag, mem_rsp.cid, mem_rsp.uuid};
simobject_->CoreRspPorts.at(req_id).push(core_rsp, config_.latency);
DT(3, simobject_->name() << " bypass-core-rsp: " << core_rsp);
DT(3, simobject_->name() << "-bypass-core-rsp: " << core_rsp);
}
void processBypassRequest(const MemReq& core_req, uint32_t req_id) {
{
MemReq mem_req(core_req);
mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id;
bypass_switch_->ReqIn.at(1).push(mem_req, 1);
DT(3, simobject_->name() << " bypass-dram-req: " << mem_req);
uint32_t mem_port = req_id % config_.mem_ports;
nc_arbs_.at(mem_port)->ReqIn.at(1).push(mem_req, 1);
DT(3, simobject_->name() << "-bypass-dram-req: " << mem_req);
}
if (core_req.write && config_.write_reponse) {
MemRsp core_rsp{core_req.tag, core_req.cid, core_req.uuid};
simobject_->CoreRspPorts.at(req_id).push(core_rsp, 1);
DT(3, simobject_->name() << " bypass-core-rsp: " << core_rsp);
DT(3, simobject_->name() << "-bypass-core-rsp: " << core_rsp);
}
}
@ -605,7 +568,7 @@ private:
continue;
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
DT(3, simobject_->name() << "-bank" << bank_id << " replay: " << core_rsp);
DT(3, simobject_->name() << "-bank" << bank_id << "-replay: " << core_rsp);
}
}
} break;
@ -649,7 +612,7 @@ private:
mem_req.cid = pipeline_req.cid;
mem_req.uuid = pipeline_req.uuid;
mem_req_ports_.at(bank_id).push(mem_req, 1);
DT(3, simobject_->name() << "-bank" << bank_id << " writethrough: " << mem_req);
DT(3, simobject_->name() << "-bank" << bank_id << "-writethrough: " << mem_req);
} else {
// mark line as dirty
hit_line.dirty = true;
@ -662,7 +625,7 @@ private:
continue;
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
DT(3, simobject_->name() << "-bank" << bank_id << " core-rsp: " << core_rsp);
DT(3, simobject_->name() << "-bank" << bank_id << "-core-rsp: " << core_rsp);
}
}
} else {
@ -681,7 +644,7 @@ private:
mem_req.write = true;
mem_req.cid = pipeline_req.cid;
mem_req_ports_.at(bank_id).push(mem_req, 1);
DT(3, simobject_->name() << "-bank" << bank_id << " writeback: " << mem_req);
DT(3, simobject_->name() << "-bank" << bank_id << "-writeback: " << mem_req);
++perf_stats_.evictions;
}
}
@ -695,7 +658,7 @@ private:
mem_req.cid = pipeline_req.cid;
mem_req.uuid = pipeline_req.uuid;
mem_req_ports_.at(bank_id).push(mem_req, 1);
DT(3, simobject_->name() << "-bank" << bank_id << " writethrough: " << mem_req);
DT(3, simobject_->name() << "-bank" << bank_id << "-writethrough: " << mem_req);
}
// send core response
if (config_.write_reponse) {
@ -704,7 +667,7 @@ private:
continue;
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
DT(3, simobject_->name() << "-bank" << bank_id << " core-rsp: " << core_rsp);
DT(3, simobject_->name() << "-bank" << bank_id << "-core-rsp: " << core_rsp);
}
}
} else {
@ -713,7 +676,7 @@ private:
// allocate MSHR
auto mshr_id = bank.mshr.allocate(pipeline_req, (free_line_id != -1) ? free_line_id : repl_line_id);
DT(3, simobject_->name() << "-bank" << bank_id << " mshr-enqueue: " << pipeline_req);
DT(3, simobject_->name() << "-bank" << bank_id << "-mshr-enqueue: " << pipeline_req);
// send fill request
if (!mshr_pending) {
@ -724,7 +687,7 @@ private:
mem_req.cid = pipeline_req.cid;
mem_req.uuid = pipeline_req.uuid;
mem_req_ports_.at(bank_id).push(mem_req, 1);
DT(3, simobject_->name() << "-bank" << bank_id << " fill: " << mem_req);
DT(3, simobject_->name() << "-bank" << bank_id << "-fill: " << mem_req);
++pending_fill_reqs_;
}
}
@ -743,8 +706,8 @@ CacheSim::CacheSim(const SimContext& ctx, const char* name, const Config& config
: SimObject<CacheSim>(ctx, name)
, CoreReqPorts(config.num_inputs, this)
, CoreRspPorts(config.num_inputs, this)
, MemReqPorts(NUM_MEM_PORTS, this)
, MemRspPorts(NUM_MEM_PORTS, this)
, MemReqPorts(config.mem_ports, this)
, MemRspPorts(config.mem_ports, this)
, impl_(new Impl(this, config))
{}

View file

@ -30,6 +30,7 @@ public:
uint8_t addr_width; // word address bits
uint8_t ports_per_bank; // number of ports per bank
uint8_t num_inputs; // number of inputs
uint8_t mem_ports; // memory ports
bool write_back; // is write-back
bool write_reponse; // enable write response
uint16_t mshr_size; // MSHR buffer size

View file

@ -20,9 +20,9 @@ Cluster::Cluster(const SimContext& ctx,
ProcessorImpl* processor,
const Arch &arch,
const DCRS &dcrs)
: SimObject(ctx, "cluster")
, mem_req_port(this)
, mem_rsp_port(this)
: SimObject(ctx, StrFormat("cluster%d", cluster_id))
, mem_req_ports(L2_MEM_PORTS, this)
, mem_rsp_ports(L2_MEM_PORTS, this)
, cluster_id_(cluster_id)
, processor_(processor)
, sockets_(NUM_SOCKETS)
@ -35,31 +35,14 @@ Cluster::Cluster(const SimContext& ctx,
// create sockets
snprintf(sname, 100, "cluster%d-icache-arb", cluster_id);
auto icache_switch = MemSwitch::Create(sname, ArbiterType::RoundRobin, sockets_per_cluster);
snprintf(sname, 100, "cluster%d-dcache-arb", cluster_id);
auto dcache_switch = MemSwitch::Create(sname, ArbiterType::RoundRobin, sockets_per_cluster);
for (uint32_t i = 0; i < sockets_per_cluster; ++i) {
uint32_t socket_id = cluster_id * sockets_per_cluster + i;
auto socket = Socket::Create(socket_id,
this,
arch,
dcrs);
socket->icache_mem_req_port.bind(&icache_switch->ReqIn.at(i));
icache_switch->RspIn.at(i).bind(&socket->icache_mem_rsp_port);
socket->dcache_mem_req_port.bind(&dcache_switch->ReqIn.at(i));
dcache_switch->RspIn.at(i).bind(&socket->dcache_mem_rsp_port);
sockets_.at(i) = socket;
sockets_.at(i) = Socket::Create(socket_id, this, arch, dcrs);
}
// Create l2cache
snprintf(sname, 100, "cluster%d-l2cache", cluster_id);
snprintf(sname, 100, "%s-l2cache", this->name().c_str());
l2cache_ = CacheSim::Create(sname, CacheSim::Config{
!L2_ENABLED,
log2ceil(L2_CACHE_SIZE),// C
@ -69,21 +52,27 @@ Cluster::Cluster(const SimContext& ctx,
log2ceil(L2_NUM_BANKS), // B
XLEN, // address bits
1, // number of ports
2, // request size
L2_NUM_REQS, // request size
L2_MEM_PORTS, // memory ports
L2_WRITEBACK, // write-back
false, // write response
L2_MSHR_SIZE, // mshr size
2, // pipeline latency
});
l2cache_->MemReqPorts.at(0).bind(&this->mem_req_port);
this->mem_rsp_port.bind(&l2cache_->MemRspPorts.at(0));
// connect l2cache core interfaces
for (uint32_t i = 0; i < sockets_per_cluster; ++i) {
for (uint32_t j = 0; j < L1_MEM_PORTS; ++j) {
sockets_.at(i)->mem_req_ports.at(j).bind(&l2cache_->CoreReqPorts.at(i * L1_MEM_PORTS + j));
l2cache_->CoreRspPorts.at(i * L1_MEM_PORTS + j).bind(&sockets_.at(i)->mem_rsp_ports.at(j));
}
}
icache_switch->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(0));
l2cache_->CoreRspPorts.at(0).bind(&icache_switch->RspOut.at(0));
dcache_switch->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(1));
l2cache_->CoreRspPorts.at(1).bind(&dcache_switch->RspOut.at(0));
// connect l2cache memory interfaces
for (uint32_t i = 0; i < L2_MEM_PORTS; ++i) {
l2cache_->MemReqPorts.at(i).bind(&this->mem_req_ports.at(i));
this->mem_rsp_ports.at(i).bind(&l2cache_->MemRspPorts.at(i));
}
}
Cluster::~Cluster() {

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -32,13 +32,13 @@ public:
CacheSim::PerfStats l2cache;
};
SimPort<MemReq> mem_req_port;
SimPort<MemRsp> mem_rsp_port;
std::vector<SimPort<MemReq>> mem_req_ports;
std::vector<SimPort<MemRsp>> mem_rsp_ports;
Cluster(const SimContext& ctx,
Cluster(const SimContext& ctx,
uint32_t cluster_id,
ProcessorImpl* processor,
const Arch &arch,
ProcessorImpl* processor,
const Arch &arch,
const DCRS &dcrs);
~Cluster();
@ -63,16 +63,16 @@ public:
bool running() const;
int get_exitcode() const;
int get_exitcode() const;
void barrier(uint32_t bar_id, uint32_t count, uint32_t core_id);
PerfStats perf_stats() const;
private:
uint32_t cluster_id_;
ProcessorImpl* processor_;
std::vector<Socket::Ptr> sockets_;
std::vector<Socket::Ptr> sockets_;
std::vector<CoreMask> barriers_;
CacheSim::Ptr l2cache_;
uint32_t cores_per_socket_;

View file

@ -27,10 +27,15 @@ inline constexpr int LSU_WORD_SIZE = (XLEN / 8);
inline constexpr int LSU_CHANNELS = NUM_LSU_LANES;
inline constexpr int LSU_NUM_REQS = (NUM_LSU_BLOCKS * LSU_CHANNELS);
// The dcache uses coalesced memory blocks
inline constexpr int DCACHE_WORD_SIZE = LSU_LINE_SIZE;
inline constexpr int DCACHE_CHANNELS = UP((NUM_LSU_LANES * (XLEN / 8)) / DCACHE_WORD_SIZE);
inline constexpr int DCACHE_NUM_REQS = (NUM_LSU_BLOCKS * DCACHE_CHANNELS);
inline constexpr int DCACHE_NUM_REQS = (NUM_LSU_BLOCKS * DCACHE_CHANNELS);
inline constexpr int NUM_SOCKETS = UP(NUM_CORES / SOCKET_SIZE);
inline constexpr int L2_NUM_REQS = NUM_SOCKETS * L1_MEM_PORTS;
inline constexpr int L3_NUM_REQS = NUM_CLUSTERS * L2_MEM_PORTS;
inline constexpr int PER_ISSUE_WARPS = NUM_WARPS / ISSUE_WIDTH;

View file

@ -30,7 +30,7 @@ Core::Core(const SimContext& ctx,
Socket* socket,
const Arch &arch,
const DCRS &dcrs)
: SimObject(ctx, "core")
: SimObject(ctx, StrFormat("core%d", core_id))
, icache_req_ports(1, this)
, icache_rsp_ports(1, this)
, dcache_req_ports(DCACHE_NUM_REQS, this)
@ -44,7 +44,7 @@ Core::Core(const SimContext& ctx,
, operands_(ISSUE_WIDTH)
, dispatchers_((uint32_t)FUType::Count)
, func_units_((uint32_t)FUType::Count)
, lsu_demux_(NUM_LSU_BLOCKS)
, lmem_switch_(NUM_LSU_BLOCKS)
, mem_coalescers_(NUM_LSU_BLOCKS)
, lsu_dcache_adapter_(NUM_LSU_BLOCKS)
, lsu_lmem_adapter_(NUM_LSU_BLOCKS)
@ -59,12 +59,12 @@ Core::Core(const SimContext& ctx,
// create the memory coalescer
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
snprintf(sname, 100, "core%d-coalescer%d", core_id, i);
snprintf(sname, 100, "%s-coalescer%d", this->name().c_str(), i);
mem_coalescers_.at(i) = MemCoalescer::Create(sname, LSU_CHANNELS, DCACHE_CHANNELS, DCACHE_WORD_SIZE, LSUQ_OUT_SIZE, 1);
}
// create local memory
snprintf(sname, 100, "core%d-local_mem", core_id);
snprintf(sname, 100, "%s-local_mem", this->name().c_str());
local_mem_ = LocalMem::Create(sname, LocalMem::Config{
(1 << LMEM_LOG_SIZE),
LSU_WORD_SIZE,
@ -73,31 +73,31 @@ Core::Core(const SimContext& ctx,
false
});
// create lsu demux
// create lmem switch
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
snprintf(sname, 100, "core%d-lsu_demux%d", core_id, i);
lsu_demux_.at(i) = LocalMemDemux::Create(sname, 1);
snprintf(sname, 100, "%s-lmem_switch%d", this->name().c_str(), i);
lmem_switch_.at(i) = LocalMemSwitch::Create(sname, 1);
}
// create lsu dcache adapter
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
snprintf(sname, 100, "core%d-lsu_dcache_adapter%d", core_id, i);
snprintf(sname, 100, "%s-lsu_dcache_adapter%d", this->name().c_str(), i);
lsu_dcache_adapter_.at(i) = LsuMemAdapter::Create(sname, DCACHE_CHANNELS, 1);
}
// create lsu lmem adapter
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
snprintf(sname, 100, "core%d-lsu_lmem_adapter%d", core_id, i);
snprintf(sname, 100, "%s-lsu_lmem_adapter%d", this->name().c_str(), i);
lsu_lmem_adapter_.at(i) = LsuMemAdapter::Create(sname, LSU_CHANNELS, 1);
}
// connect lsu demux
for (uint32_t b = 0; b < NUM_LSU_BLOCKS; ++b) {
lsu_demux_.at(b)->ReqDC.bind(&mem_coalescers_.at(b)->ReqIn);
mem_coalescers_.at(b)->RspIn.bind(&lsu_demux_.at(b)->RspDC);
lmem_switch_.at(b)->ReqDC.bind(&mem_coalescers_.at(b)->ReqIn);
mem_coalescers_.at(b)->RspIn.bind(&lmem_switch_.at(b)->RspDC);
lsu_demux_.at(b)->ReqLmem.bind(&lsu_lmem_adapter_.at(b)->ReqIn);
lsu_lmem_adapter_.at(b)->RspIn.bind(&lsu_demux_.at(b)->RspLmem);
lmem_switch_.at(b)->ReqLmem.bind(&lsu_lmem_adapter_.at(b)->ReqIn);
lsu_lmem_adapter_.at(b)->RspIn.bind(&lmem_switch_.at(b)->RspLmem);
}
// connect coalescer-adapter
@ -130,7 +130,7 @@ Core::Core(const SimContext& ctx,
dispatchers_.at((int)FUType::LSU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_LSU_BLOCKS, NUM_LSU_LANES);
dispatchers_.at((int)FUType::SFU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_SFU_BLOCKS, NUM_SFU_LANES);
dispatchers_.at((int)FUType::TCU) = SimPlatform::instance().create_object<Dispatcher>(arch, 2, NUM_TCU_BLOCKS, NUM_TCU_LANES);
// initialize execute units
func_units_.at((int)FUType::ALU) = SimPlatform::instance().create_object<AluUnit>(this);
func_units_.at((int)FUType::FPU) = SimPlatform::instance().create_object<FpuUnit>(this);
@ -140,8 +140,8 @@ Core::Core(const SimContext& ctx,
// bind commit arbiters
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
snprintf(sname, 100, "core%d-commit-arb%d", core_id, i);
auto arbiter = TraceSwitch::Create(sname, ArbiterType::RoundRobin, (uint32_t)FUType::Count, 1);
snprintf(sname, 100, "%s-commit-arb%d", this->name().c_str(), i);
auto arbiter = TraceArbiter::Create(sname, ArbiterType::RoundRobin, (uint32_t)FUType::Count, 1);
for (uint32_t j = 0; j < (uint32_t)FUType::Count; ++j) {
func_units_.at(j)->Outputs.at(i).bind(&arbiter->Inputs.at(j));
}

View file

@ -34,7 +34,7 @@ class Socket;
class Arch;
class DCRS;
using TraceSwitch = Mux<instr_trace_t*>;
using TraceArbiter = Arbiter<instr_trace_t*>;
class Core : public SimObject<Core> {
public:
@ -154,7 +154,7 @@ private:
std::vector<Dispatcher::Ptr> dispatchers_;
std::vector<FuncUnit::Ptr> func_units_;
LocalMem::Ptr local_mem_;
std::vector<LocalMemDemux::Ptr> lsu_demux_;
std::vector<LocalMemSwitch::Ptr> lmem_switch_;
std::vector<MemCoalescer::Ptr> mem_coalescers_;
std::vector<LsuMemAdapter::Ptr> lsu_dcache_adapter_;
std::vector<LsuMemAdapter::Ptr> lsu_lmem_adapter_;
@ -169,7 +169,7 @@ private:
PerfStats perf_stats_;
std::vector<TraceSwitch::Ptr> commit_arbs_;
std::vector<TraceArbiter::Ptr> commit_arbs_;
uint32_t commit_exe_;
uint32_t ibuffer_idx_;

View file

@ -1421,7 +1421,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
std::abort();
}
} break;
case Opcode::TCU:
case Opcode::TCU:
{ //TODO - make it data-type flexible
uint32_t mem_bytes = 1;
DP(3, "mem_bytes=" << mem_bytes << std::endl);
@ -1443,7 +1443,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
//LOAD
if(num_threads > tc_size*tc_size*n_tiles*TC_per_warp)
{
{
num_threads_actv = tc_size*tc_size*n_tiles*TC_per_warp;
num_data_per_thread = 1;
}
@ -1456,7 +1456,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
//STORE
if(num_threads > tc_size*tc_size*TC_per_warp)
{
{
num_threads_actv_st = tc_size*tc_size*TC_per_warp;
num_data_per_thread_st = 1;
}
@ -1466,30 +1466,30 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
num_data_per_thread_st = (tc_size*tc_size)/num_threads_per_tc;
}
data_bytes_store = mem_bytes*num_data_per_thread_st;
DP(3, "Num Tiles=" << n_tiles << std::endl);
switch (func3) {
case 0:
{ //Matrix Load
case 0:
{ //Matrix Load
DP (4, "TCU LOAD");
trace->fu_type = FUType::LSU;
trace->lsu_type = LsuType::TCU_LOAD;
trace->src_regs[0] = {RegType::Integer, rsrc0};
auto trace_data = std::make_shared<LsuTraceData>(num_threads);
trace->data = trace_data;
for (uint32_t t = thread_start; t < num_threads_actv; ++t)
for (uint32_t t = thread_start; t < num_threads_actv; ++t)
{
if (!warp.tmask.test(t))
continue;
DP(3, "Thread ID" << t);
DP(3, "Thread ID" << t);
uint32_t base_addr = rsdata[t][0].i ;
trace_data->mem_addrs.at(t) = {base_addr, data_bytes_load};
//Load A or B (depends on immsrc)
int loop_offset = 0;
DP(3, "n_tiles = " << n_tiles << "; num_data_per_thread = " << num_data_per_thread <<std::endl);
@ -1502,10 +1502,10 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
DP(3, "Scratchpad Index: " << loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n << ", Value: " << scratchpad[loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n]);
}
}
rd_write = true;
rd_write = true;
} break;
case 1:
{
case 1:
{
DP(4, "TCU STORE");
trace->fu_type = FUType::LSU;
trace->lsu_type = LsuType::TCU_STORE;
@ -1513,12 +1513,12 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
auto trace_data = std::make_shared<LsuTraceData>(num_threads);
trace->data = trace_data;
for (uint32_t t = thread_start; t < num_threads_actv_st; ++t)
for (uint32_t t = thread_start; t < num_threads_actv_st; ++t)
{
if (!warp.tmask.test(t))
continue;
DP(3, "Thread ID" << t);
DP(3, "Thread ID" << t);
uint32_t base_addr = rsdata[t][0].i ;
trace_data->mem_addrs.at(t) = {base_addr, data_bytes_store};
@ -1529,7 +1529,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
Word* temp_ref = &(warp.ireg_file.at(t).at(rsrc0));
*temp_ref = scratchpad[(n_tiles*tc_size*tc_size*2) + (t*num_data_per_thread_st) + n];
this->dcache_write(temp_ref, base_addr+(n*mem_bytes), mem_bytes);
this->dcache_write(temp_ref, base_addr+(n*mem_bytes), mem_bytes);
}
}
//Clear the scratchpad
@ -1539,18 +1539,18 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
}
}
break;
case 2:
case 2:
{ //Matrix Multiply
DP(4, "TCU MULTIPLY MAT");
trace->fu_type = FUType::TCU;
trace->tcu_type = TCUType::TCU_MUL;
uint32_t threads_per_tc = MAX (1, num_threads/TC_per_warp);
for (uint32_t t = thread_start; t < num_threads_actv; ++t)
for (uint32_t t = thread_start; t < num_threads_actv; ++t)
{
if (!warp.tmask.test(t))
continue;
DP(3, "Thread ID" << t);
DP(3, "Thread ID" << t);
//TC operation [only 1 thread in 1 warp needs to do this]
if (t%threads_per_tc == 0)
{
@ -1563,7 +1563,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
int offset_b = n_tiles*n_tiles*n_tiles*tc_size*tc_size;
uint32_t accu_offset = (n_tiles)*(n_tiles)*(n_tiles)*tc_size*tc_size*2;
for(int tiles = 0 ; tiles < n_tiles ; tiles++) //What's the HW implication of this?? A counter implementation?
{
{
for (int i = 0; i < tc_size; i++) { //ROW-1
for (int j = 0; j < tc_size; j++) { //COL-2
int sum = 0;

View file

@ -116,12 +116,12 @@ void LsuUnit::tick() {
// handle memory responses
for (uint32_t b = 0; b < NUM_LSU_BLOCKS; ++b) {
auto& lsu_rsp_port = core_->lsu_demux_.at(b)->RspIn;
auto& lsu_rsp_port = core_->lmem_switch_.at(b)->RspIn;
if (lsu_rsp_port.empty())
continue;
auto& state = states_.at(b);
auto& lsu_rsp = lsu_rsp_port.front();
DT(3, this->name() << " mem-rsp: " << lsu_rsp);
DT(3, this->name() << "-mem-rsp: " << lsu_rsp);
auto& entry = state.pending_rd_reqs.at(lsu_rsp.tag);
auto trace = entry.trace;
assert(!entry.mask.none());
@ -146,7 +146,7 @@ void LsuUnit::tick() {
continue;
Outputs.at(iw).push(state.fence_trace, 1);
state.fence_lock = false;
DT(3, this->name() << " fence-unlock: " << state.fence_trace);
DT(3, this->name() << "-fence-unlock: " << state.fence_trace);
}
// check input queue
@ -160,7 +160,7 @@ void LsuUnit::tick() {
// schedule fence lock
state.fence_trace = trace;
state.fence_lock = true;
DT(3, this->name() << " fence-lock: " << *trace);
DT(3, this->name() << "-fence-lock: " << *trace);
// remove input
input.pop();
continue;
@ -171,7 +171,7 @@ void LsuUnit::tick() {
// check pending queue capacity
if (!is_write && state.pending_rd_reqs.full()) {
if (!trace->log_once(true)) {
DT(4, "*** " << this->name() << " queue-full: " << *trace);
DT(4, "*** " << this->name() << "-queue-full: " << *trace);
}
continue;
} else {
@ -201,8 +201,8 @@ void LsuUnit::tick() {
lsu_req.uuid = trace->uuid;
// send memory request
core_->lsu_demux_.at(block_idx)->ReqIn.push(lsu_req);
DT(3, this->name() << " mem-req: " << lsu_req);
core_->lmem_switch_.at(block_idx)->ReqIn.push(lsu_req);
DT(3, this->name() << "-mem-req: " << lsu_req);
// update stats
auto num_addrs = lsu_req.mask.count();
@ -237,7 +237,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
{
req_per_thread= (1>(trace_data->mem_addrs.at(0).size)/4)? 1: ((trace_data->mem_addrs.at(0).size)/4);
}
auto t0 = trace->pid * NUM_LSU_LANES;
for (uint32_t i = 0; i < NUM_LSU_LANES; ++i) {
@ -246,11 +246,11 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
continue;
int req_idx = block_idx * LSU_CHANNELS + (i % LSU_CHANNELS);
auto& dcache_req_port = core_->lsu_demux_.at(req_idx)->ReqIn;
auto& dcache_req_port = core_->lmem_switch_.at(req_idx)->ReqIn;
auto mem_addr = trace_data->mem_addrs.at(t);
auto type = get_addr_type(mem_addr.addr);
// DT(3, "addr_type = " << type << ", " << *trace);
// DT(3, "addr_type = " << type << ", " << *trace);
uint32_t mem_bytes = 1;
for (int i = 0; i < req_per_thread; i++)
{
@ -261,7 +261,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
mem_req.tag = tag;
mem_req.cid = trace->cid;
mem_req.uuid = trace->uuid;
dcache_req_port.push(mem_req, 1);
DT(3, "mem-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << tag
<< ", lsu_type=" << trace->lsu_type << ", rid=" << req_idx << ", addr_type=" << mem_req.type << ", " << *trace);
@ -272,7 +272,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
++core_->perf_stats_.loads;
++pending_loads_;
}
++count;
}
}
@ -282,7 +282,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
///////////////////////////////////////////////////////////////////////////////
TcuUnit::TcuUnit(const SimContext& ctx, Core* core)
TcuUnit::TcuUnit(const SimContext& ctx, Core* core)
: FuncUnit(ctx, core, "TCU")
{}
@ -290,7 +290,7 @@ void TcuUnit::tick() {
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
auto& input = Inputs.at(i);
if (input.empty())
if (input.empty())
continue;
auto& output = Outputs.at(i);
auto trace = input.front();
@ -307,7 +307,7 @@ void TcuUnit::tick() {
}
default:
std::abort();
}
}
DT(3, "pipeline-execute: op=" << trace->tcu_type << ", " << *trace);
input.pop();
}

View file

@ -24,9 +24,8 @@ protected:
LocalMem* simobject_;
Config config_;
RAM ram_;
int32_t bank_sel_addr_start_;
int32_t bank_sel_addr_end_;
PerfStats perf_stats_;
MemCrossBar::Ptr mem_xbar_;
mutable PerfStats perf_stats_;
uint64_t to_local_addr(uint64_t addr) {
uint32_t total_lines = config_.capacity / config_.line_size;
@ -40,9 +39,15 @@ public:
: simobject_(simobject)
, config_(config)
, ram_(config.capacity)
, bank_sel_addr_start_(0)
, bank_sel_addr_end_(config.B-1)
{}
{
char sname[100];
snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::RoundRobin, config.num_reqs, (1 << config.B));
for (uint32_t i = 0; i < config.num_reqs; ++i) {
simobject->Inputs.at(i).bind(&mem_xbar_->ReqIn.at(i));
mem_xbar_->RspIn.at(i).bind(&simobject->Outputs.at(i));
}
}
virtual ~Impl() {}
@ -63,45 +68,33 @@ public:
}
void tick() {
std::vector<bool> in_used_banks(1 << config_.B);
for (uint32_t req_id = 0; req_id < config_.num_reqs; ++req_id) {
auto& core_req_port = simobject_->Inputs.at(req_id);
if (core_req_port.empty())
// process bank requets from xbar
uint32_t num_banks = (1 << config_.B);
for (uint32_t i = 0; i < num_banks; ++i) {
auto& xbar_req_out = mem_xbar_->ReqOut.at(i);
if (xbar_req_out.empty())
continue;
auto& core_req = core_req_port.front();
auto& bank_req = xbar_req_out.front();
DT(4, simobject_->name() << "-bank" << i << "-req : " << bank_req);
uint32_t bank_id = 0;
if (bank_sel_addr_end_ >= bank_sel_addr_start_) {
bank_id = (uint32_t)bit_getw(core_req.addr, bank_sel_addr_start_, bank_sel_addr_end_);
}
// bank conflict check
if (in_used_banks.at(bank_id)) {
++perf_stats_.bank_stalls;
continue;
}
DT(4, simobject_->name() << " mem-req" << req_id << ": "<< core_req);
in_used_banks.at(bank_id) = true;
if (!core_req.write || config_.write_reponse) {
// send response
MemRsp core_rsp{core_req.tag, core_req.cid, core_req.uuid};
simobject_->Outputs.at(req_id).push(core_rsp, 1);
if (!bank_req.write || config_.write_reponse) {
// send xbar response
MemRsp bank_rsp{bank_req.tag, bank_req.cid, bank_req.uuid};
mem_xbar_->RspOut.at(i).push(bank_rsp, 1);
}
// update perf counters
perf_stats_.reads += !core_req.write;
perf_stats_.writes += core_req.write;
perf_stats_.reads += !bank_req.write;
perf_stats_.writes += bank_req.write;
// remove input
core_req_port.pop();
xbar_req_out.pop();
}
}
const PerfStats& perf_stats() const {
perf_stats_.bank_stalls = mem_xbar_->collisions();
return perf_stats_;
}
};

View file

@ -42,10 +42,10 @@ void MemCoalescer::reset() {
}
void MemCoalescer::tick() {
// process incoming responses
// process outgoing responses
if (!RspOut.empty()) {
auto& out_rsp = RspOut.front();
DT(4, this->name() << " mem-rsp: " << out_rsp);
DT(4, this->name() << "-mem-rsp: " << out_rsp);
auto& entry = pending_rd_reqs_.at(out_rsp.tag);
BitVector<> rsp_mask(input_size_);
@ -89,7 +89,7 @@ void MemCoalescer::tick() {
// ensure we can allocate a response tag
if (pending_rd_reqs_.full()) {
DT(4, "*** " << this->name() << " queue-full: " << in_req);
DT(4, "*** " << this->name() << "-queue-full: " << in_req);
return;
}
@ -145,7 +145,7 @@ void MemCoalescer::tick() {
// send memory request
ReqOut.push(out_req, delay_);
DT(4, this->name() << " mem-req: coalesced=" << cur_mask.count() << ", " << out_req);
DT(4, this->name() << "-mem-req: coalesced=" << cur_mask.count() << ", " << out_req);
// update sent mask
sent_mask_ |= cur_mask;

View file

@ -27,13 +27,14 @@ class MemSim::Impl {
private:
MemSim* simobject_;
Config config_;
MemCrossBar::Ptr mem_xbar_;
DramSim dram_sim_;
PerfStats perf_stats_;
struct DramCallbackArgs {
MemSim* simobject;
MemReq request;
uint32_t i;
MemSim::Impl* memsim;
MemReq request;
uint32_t bank_id;
};
public:
@ -41,7 +42,15 @@ public:
: simobject_(simobject)
, config_(config)
, dram_sim_(MEM_CLOCK_RATIO)
{}
{
char sname[100];
snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::RoundRobin, config.num_ports, config.num_banks);
for (uint32_t i = 0; i < config.num_ports; ++i) {
simobject->MemReqPorts.at(i).bind(&mem_xbar_->ReqIn.at(i));
mem_xbar_->RspIn.at(i).bind(&simobject->MemRspPorts.at(i));
}
}
~Impl() {
//--
@ -59,14 +68,14 @@ public:
dram_sim_.tick();
uint32_t counter = 0;
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
if (simobject_->MemReqPorts.at(i).empty())
for (uint32_t i = 0; i < config_.num_banks; ++i) {
if (mem_xbar_->ReqOut.at(i).empty())
continue;
auto& mem_req = simobject_->MemReqPorts.at(i).front();
auto& mem_req = mem_xbar_->ReqOut.at(i).front();
// try to enqueue the request to the memory system
auto req_args = new DramCallbackArgs{simobject_, mem_req, i};
auto req_args = new DramCallbackArgs{this, mem_req, i};
auto enqueue_success = dram_sim_.send_request(
mem_req.write,
mem_req.addr,
@ -76,8 +85,8 @@ public:
// only send a response for read requests
if (!rsp_args->request.write) {
MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid};
rsp_args->simobject->MemRspPorts.at(rsp_args->i).push(mem_rsp, 1);
DT(3, rsp_args->simobject->name() << " mem-rsp: bank=" << rsp_args->i << ", " << mem_rsp);
rsp_args->memsim->mem_xbar_->RspOut.at(rsp_args->bank_id).push(mem_rsp, 1);
DT(3, rsp_args->memsim->simobject_->name() << "-mem-rsp: bank=" << rsp_args->bank_id << ", " << mem_rsp);
}
delete rsp_args;
},
@ -90,9 +99,9 @@ public:
continue;
}
DT(3, simobject_->name() << " mem-req: bank=" << i << ", " << mem_req);
DT(3, simobject_->name() << "-mem-req: bank=" << i << ", " << mem_req);
simobject_->MemReqPorts.at(i).pop();
mem_xbar_->ReqOut.at(i).pop();
counter++;
}
@ -107,8 +116,8 @@ public:
MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
: SimObject<MemSim>(ctx, name)
, MemReqPorts(NUM_MEM_PORTS, this)
, MemRspPorts(NUM_MEM_PORTS, this)
, MemReqPorts(config.num_ports, this)
, MemRspPorts(config.num_ports, this)
, impl_(new Impl(this, config))
{}

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -21,15 +21,15 @@ namespace vortex {
class MemSim : public SimObject<MemSim>{
public:
struct Config {
uint32_t channels;
uint32_t num_cores;
uint32_t num_banks;
uint32_t num_ports;
};
struct PerfStats {
uint64_t counter;
uint64_t ticks;
PerfStats()
PerfStats()
: counter(0)
, ticks(0)
{}
@ -52,7 +52,7 @@ public:
void tick();
const PerfStats& perf_stats() const;
private:
class Impl;
Impl* impl_;

View file

@ -24,10 +24,15 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
// create memory simulator
memsim_ = MemSim::Create("dram", MemSim::Config{
MEMORY_BANKS,
uint32_t(arch.num_cores()) * arch.num_clusters()
PLATFORM_MEMORY_BANKS,
L3_MEM_PORTS
});
// create clusters
for (uint32_t i = 0; i < arch.num_clusters(); ++i) {
clusters_.at(i) = Cluster::Create(i, this, arch, dcrs_);
}
// create L3 cache
l3cache_ = CacheSim::Create("l3cache", CacheSim::Config{
!L3_ENABLED,
@ -38,7 +43,8 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
log2ceil(L3_NUM_BANKS), // B
XLEN, // address bits
1, // number of ports
uint8_t(arch.num_clusters()), // request size
L3_NUM_REQS, // request size
L3_MEM_PORTS, // memory ports
L3_WRITEBACK, // write-back
false, // write response
L3_MSHR_SIZE, // mshr size
@ -46,26 +52,26 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
}
);
// connect L3 memory ports
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
// connect L3 core interfaces
for (uint32_t i = 0; i < arch.num_clusters(); ++i) {
for (uint32_t j = 0; j < L2_MEM_PORTS; ++j) {
clusters_.at(i)->mem_req_ports.at(j).bind(&l3cache_->CoreReqPorts.at(i * L2_MEM_PORTS + j));
l3cache_->CoreRspPorts.at(i * L2_MEM_PORTS + j).bind(&clusters_.at(i)->mem_rsp_ports.at(j));
}
}
// connect L3 memory interfaces
for (uint32_t i = 0; i < L3_MEM_PORTS; ++i) {
l3cache_->MemReqPorts.at(i).bind(&memsim_->MemReqPorts.at(i));
memsim_->MemRspPorts.at(i).bind(&l3cache_->MemRspPorts.at(i));
}
// create clusters
for (uint32_t i = 0; i < arch.num_clusters(); ++i) {
clusters_.at(i) = Cluster::Create(i, this, arch, dcrs_);
// connect L3 core ports
clusters_.at(i)->mem_req_port.bind(&l3cache_->CoreReqPorts.at(i));
l3cache_->CoreRspPorts.at(i).bind(&clusters_.at(i)->mem_rsp_port);
}
// set up memory profiling
for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) {
for (uint32_t i = 0; i < L3_MEM_PORTS; ++i) {
memsim_->MemReqPorts.at(i).tx_callback([&](const MemReq& req, uint64_t cycle){
__unused (cycle);
perf_mem_reads_ += !req.write;
perf_mem_writes_ += req.write;
perf_mem_reads_ += !req.write;
perf_mem_writes_ += req.write;
perf_mem_pending_reads_ += !req.write;
});
memsim_->MemRspPorts.at(i).tx_callback([&](const MemRsp&, uint64_t cycle){

View file

@ -21,11 +21,9 @@ Socket::Socket(const SimContext& ctx,
Cluster* cluster,
const Arch &arch,
const DCRS &dcrs)
: SimObject(ctx, "socket")
, icache_mem_req_port(this)
, icache_mem_rsp_port(this)
, dcache_mem_req_port(this)
, dcache_mem_rsp_port(this)
: SimObject(ctx, StrFormat("socket%d", socket_id))
, mem_req_ports(L1_MEM_PORTS, this)
, mem_rsp_ports(L1_MEM_PORTS, this)
, socket_id_(socket_id)
, cluster_(cluster)
, cores_(arch.socket_size())
@ -33,8 +31,8 @@ Socket::Socket(const SimContext& ctx,
auto cores_per_socket = cores_.size();
char sname[100];
snprintf(sname, 100, "socket%d-icaches", socket_id);
icaches_ = CacheCluster::Create(sname, cores_per_socket, NUM_ICACHES, 1, CacheSim::Config{
snprintf(sname, 100, "%s-icaches", this->name().c_str());
icaches_ = CacheCluster::Create(sname, cores_per_socket, NUM_ICACHES, CacheSim::Config{
!ICACHE_ENABLED,
log2ceil(ICACHE_SIZE), // C
log2ceil(L1_LINE_SIZE), // L
@ -44,17 +42,15 @@ Socket::Socket(const SimContext& ctx,
XLEN, // address bits
1, // number of ports
1, // number of inputs
1, // memory ports
false, // write-back
false, // write response
(uint8_t)arch.num_warps(), // mshr size
2, // pipeline latency
});
icaches_->MemReqPort.bind(&icache_mem_req_port);
icache_mem_rsp_port.bind(&icaches_->MemRspPort);
snprintf(sname, 100, "socket%d-dcaches", socket_id);
dcaches_ = CacheCluster::Create(sname, cores_per_socket, NUM_DCACHES, DCACHE_NUM_REQS, CacheSim::Config{
snprintf(sname, 100, "%s-dcaches", this->name().c_str());
dcaches_ = CacheCluster::Create(sname, cores_per_socket, NUM_DCACHES, CacheSim::Config{
!DCACHE_ENABLED,
log2ceil(DCACHE_SIZE), // C
log2ceil(L1_LINE_SIZE), // L
@ -64,21 +60,41 @@ Socket::Socket(const SimContext& ctx,
XLEN, // address bits
1, // number of ports
DCACHE_NUM_REQS, // number of inputs
L1_MEM_PORTS, // memory ports
DCACHE_WRITEBACK, // write-back
false, // write response
DCACHE_MSHR_SIZE, // mshr size
2, // pipeline latency
});
dcaches_->MemReqPort.bind(&dcache_mem_req_port);
dcache_mem_rsp_port.bind(&dcaches_->MemRspPort);
// connect l1 caches to outgoing memory interfaces
for (uint32_t i = 0; i < L1_MEM_PORTS; ++i) {
if (i == 0) {
snprintf(sname, 100, "%s-l1_arb%d", this->name().c_str(), i);
auto l1_arb = MemArbiter::Create(sname, ArbiterType::RoundRobin, 2, 1);
icaches_->MemReqPorts.at(0).bind(&l1_arb->ReqIn.at(1));
l1_arb->RspIn.at(1).bind(&icaches_->MemRspPorts.at(0));
dcaches_->MemReqPorts.at(0).bind(&l1_arb->ReqIn.at(0));
l1_arb->RspIn.at(0).bind(&dcaches_->MemRspPorts.at(0));
l1_arb->ReqOut.at(0).bind(&this->mem_req_ports.at(0));
this->mem_rsp_ports.at(0).bind(&l1_arb->RspOut.at(0));
} else {
dcaches_->MemReqPorts.at(i).bind(&this->mem_req_ports.at(i));
this->mem_rsp_ports.at(i).bind(&dcaches_->MemRspPorts.at(i));
}
}
// create cores
for (uint32_t i = 0; i < cores_per_socket; ++i) {
uint32_t core_id = socket_id * cores_per_socket + i;
cores_.at(i) = Core::Create(core_id, this, arch, dcrs);
}
// connect cores to caches
for (uint32_t i = 0; i < cores_per_socket; ++i) {
cores_.at(i)->icache_req_ports.at(0).bind(&icaches_->CoreReqPorts.at(i).at(0));
icaches_->CoreRspPorts.at(i).at(0).bind(&cores_.at(i)->icache_rsp_ports.at(0));

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -32,16 +32,13 @@ public:
CacheSim::PerfStats dcache;
};
SimPort<MemReq> icache_mem_req_port;
SimPort<MemRsp> icache_mem_rsp_port;
std::vector<SimPort<MemReq>> mem_req_ports;
std::vector<SimPort<MemRsp>> mem_rsp_ports;
SimPort<MemReq> dcache_mem_req_port;
SimPort<MemRsp> dcache_mem_rsp_port;
Socket(const SimContext& ctx,
Socket(const SimContext& ctx,
uint32_t socket_id,
Cluster* cluster,
const Arch &arch,
Cluster* cluster,
const Arch &arch,
const DCRS &dcrs);
~Socket();
@ -66,14 +63,14 @@ public:
bool running() const;
int get_exitcode() const;
int get_exitcode() const;
void barrier(uint32_t bar_id, uint32_t count, uint32_t core_id);
void resume(uint32_t core_id);
PerfStats perf_stats() const;
private:
uint32_t socket_id_;
Cluster* cluster_;

View file

@ -15,11 +15,11 @@
using namespace vortex;
LocalMemDemux::LocalMemDemux(
LocalMemSwitch::LocalMemSwitch(
const SimContext& ctx,
const char* name,
uint32_t delay
) : SimObject<LocalMemDemux>(ctx, name)
) : SimObject<LocalMemSwitch>(ctx, name)
, ReqIn(this)
, RspIn(this)
, ReqLmem(this)
@ -29,19 +29,19 @@ LocalMemDemux::LocalMemDemux(
, delay_(delay)
{}
void LocalMemDemux::reset() {}
void LocalMemSwitch::reset() {}
void LocalMemDemux::tick() {
// process incoming responses
void LocalMemSwitch::tick() {
// process outgoing responses
if (!RspLmem.empty()) {
auto& out_rsp = RspLmem.front();
DT(4, this->name() << " lmem-rsp: " << out_rsp);
DT(4, this->name() << "-lmem-rsp: " << out_rsp);
RspIn.push(out_rsp, 1);
RspLmem.pop();
}
if (!RspDC.empty()) {
auto& out_rsp = RspDC.front();
DT(4, this->name() << " dc-rsp: " << out_rsp);
DT(4, this->name() << "-dc-rsp: " << out_rsp);
RspIn.push(out_rsp, 1);
RspDC.pop();
}
@ -73,12 +73,12 @@ void LocalMemDemux::tick() {
if (!out_dc_req.mask.none()) {
ReqDC.push(out_dc_req, delay_);
DT(4, this->name() << " dc-req: " << out_dc_req);
DT(4, this->name() << "-dc-req: " << out_dc_req);
}
if (!out_lmem_req.mask.none()) {
ReqLmem.push(out_lmem_req, delay_);
DT(4, this->name() << " lmem-req: " << out_lmem_req);
DT(4, this->name() << "-lmem-req: " << out_lmem_req);
}
ReqIn.pop();
}
@ -104,12 +104,12 @@ void LsuMemAdapter::reset() {}
void LsuMemAdapter::tick() {
uint32_t input_size = ReqOut.size();
// process incoming responses
// process outgoing responses
for (uint32_t i = 0; i < input_size; ++i) {
if (RspOut.at(i).empty())
continue;
auto& out_rsp = RspOut.at(i).front();
DT(4, this->name() << " rsp" << i << ": " << out_rsp);
DT(4, this->name() << "-rsp" << i << ": " << out_rsp);
// build memory response
LsuRsp in_rsp(input_size);
@ -141,7 +141,6 @@ void LsuMemAdapter::tick() {
if (!ReqIn.empty()) {
auto& in_req = ReqIn.front();
assert(in_req.mask.size() == input_size);
for (uint32_t i = 0; i < input_size; ++i) {
if (in_req.mask.test(i)) {
// build memory request
@ -152,10 +151,9 @@ void LsuMemAdapter::tick() {
out_req.tag = in_req.tag;
out_req.cid = in_req.cid;
out_req.uuid = in_req.uuid;
// send memory request
ReqOut.at(i).push(out_req, delay_);
DT(4, this->name() << " req" << i << ": " << out_req);
DT(4, this->name() << "-req" << i << ": " << out_req);
}
}
ReqIn.pop();

View file

@ -466,29 +466,29 @@ private:
///////////////////////////////////////////////////////////////////////////////
template <typename Type>
class Mux : public SimObject<Mux<Type>> {
class Arbiter : public SimObject<Arbiter<Type>> {
public:
std::vector<SimPort<Type>> Inputs;
std::vector<SimPort<Type>> Outputs;
Mux(
Arbiter(
const SimContext& ctx,
const char* name,
ArbiterType type,
uint32_t num_inputs,
uint32_t num_outputs = 1,
uint32_t delay = 1
) : SimObject<Mux<Type>>(ctx, name)
) : SimObject<Arbiter<Type>>(ctx, name)
, Inputs(num_inputs, this)
, Outputs(num_outputs, this)
, type_(type)
, delay_(delay)
, cursors_(num_outputs, 0)
, num_reqs_(log2ceil(num_inputs / num_outputs))
, grants_(num_outputs, 0)
, lg2_num_reqs_(log2ceil(num_inputs / num_outputs))
{
assert(delay != 0);
assert(num_inputs <= 32);
assert(num_outputs <= 32);
assert(num_inputs <= 64);
assert(num_outputs <= 64);
assert(num_inputs >= num_outputs);
// bypass mode
@ -500,15 +500,15 @@ public:
}
void reset() {
for (auto& cursor : cursors_) {
cursor = 0;
for (auto& grant : grants_) {
grant = 0;
}
}
void tick() {
uint32_t I = Inputs.size();
uint32_t O = Outputs.size();
uint32_t R = 1 << num_reqs_;
uint32_t R = 1 << lg2_num_reqs_;
// skip bypass mode
if (I == O)
@ -517,8 +517,8 @@ public:
// process inputs
for (uint32_t o = 0; o < O; ++o) {
for (uint32_t r = 0; r < R; ++r) {
uint32_t i = (cursors_.at(o) + r) & (R-1);
uint32_t j = o * R + i;
uint32_t g = (grants_.at(o) + r) & (R-1);
uint32_t j = o * R + g;
if (j >= I)
continue;
@ -527,31 +527,134 @@ public:
auto& req = req_in.front();
Outputs.at(o).push(req, delay_);
req_in.pop();
this->update_cursor(o, i);
this->update_grant(o, g);
break;
}
}
}
}
private:
protected:
void update_cursor(uint32_t index, uint32_t grant) {
void update_grant(uint32_t index, uint32_t grant) {
if (type_ == ArbiterType::RoundRobin) {
cursors_.at(index) = grant + 1;
grants_.at(index) = grant + 1;
}
}
ArbiterType type_;
uint32_t delay_;
std::vector<uint32_t> cursors_;
uint32_t num_reqs_;
std::vector<uint32_t> grants_;
uint32_t lg2_num_reqs_;
};
///////////////////////////////////////////////////////////////////////////////
template <typename Type>
class CrossBar : public SimObject<CrossBar<Type>> {
public:
std::vector<SimPort<Type>> Inputs;
std::vector<SimPort<Type>> Outputs;
CrossBar(
const SimContext& ctx,
const char* name,
ArbiterType type,
uint32_t num_inputs,
uint32_t num_outputs = 1,
uint32_t addr_start = 0,
uint32_t delay = 1
)
: SimObject<CrossBar<Type>>(ctx, name)
, Inputs(num_inputs, this)
, Outputs(num_outputs, this)
, type_(type)
, delay_(delay)
, grants_(num_outputs, 0)
, lg2_inputs_(log2ceil(num_inputs))
, lg2_outputs_(log2ceil(num_outputs))
, addr_start_(addr_start)
, collisions_(0) {
assert(delay != 0);
assert(num_inputs <= 64);
assert(num_outputs <= 64);
assert(ispow2(num_outputs));
}
void reset() {
for (auto& grant : grants_) {
grant = 0;
}
}
void tick() {
uint32_t I = Inputs.size();
uint32_t O = Outputs.size();
uint32_t R = 1 << lg2_inputs_;
// process incoming requests
for (uint32_t o = 0; o < O; ++o) {
int32_t input_idx = -1;
for (uint32_t r = 0; r < R; ++r) {
uint32_t i = (grants_.at(o) + r) & (R-1);
if (i >= I)
continue;
auto& req_in = Inputs.at(i);
if (!req_in.empty()) {
auto& req = req_in.front();
// skip if input is not going to current output
uint32_t output_idx = 0;
if (O != 1) {
output_idx = (uint32_t)bit_getw(req.addr, addr_start_, lg2_outputs_-1);
}
if (output_idx != o)
continue;
if (input_idx != -1) {
++collisions_;
continue;
}
input_idx = i;
}
}
if (input_idx != -1) {
auto& req_in = Inputs.at(input_idx);
auto& req = req_in.front();
if (lg2_inputs_ != 0) {
req.tag = (req.tag << lg2_inputs_) | input_idx;
}
DT(4, this->name() << "-req" << input_idx << ": " << req);
Outputs.at(o).push(req, delay_);
req_in.pop();
this->update_grant(o, input_idx);
}
}
}
uint64_t collisions() const {
return collisions_;
}
protected:
void update_grant(uint32_t index, uint32_t grant) {
if (type_ == ArbiterType::RoundRobin) {
grants_.at(index) = grant + 1;
}
}
ArbiterType type_;
uint32_t delay_;
std::vector<uint32_t> grants_;
uint32_t lg2_inputs_;
uint32_t lg2_outputs_;
uint32_t addr_start_;
uint64_t collisions_;
};
///////////////////////////////////////////////////////////////////////////////
template <typename Req, typename Rsp>
class Switch : public SimObject<Switch<Req, Rsp>> {
class TxArbiter : public SimObject<TxArbiter<Req, Rsp>> {
public:
std::vector<SimPort<Req>> ReqIn;
std::vector<SimPort<Rsp>> RspIn;
@ -559,7 +662,7 @@ public:
std::vector<SimPort<Req>> ReqOut;
std::vector<SimPort<Rsp>> RspOut;
Switch(
TxArbiter(
const SimContext& ctx,
const char* name,
ArbiterType type,
@ -567,19 +670,19 @@ public:
uint32_t num_outputs = 1,
uint32_t delay = 1
)
: SimObject<Switch<Req, Rsp>>(ctx, name)
: SimObject<TxArbiter<Req, Rsp>>(ctx, name)
, ReqIn(num_inputs, this)
, RspIn(num_inputs, this)
, ReqOut(num_outputs, this)
, RspOut(num_outputs, this)
, type_(type)
, delay_(delay)
, cursors_(num_outputs, 0)
, lg_num_reqs_(log2ceil(num_inputs / num_outputs))
, grants_(num_outputs, 0)
, lg2_num_reqs_(log2ceil(num_inputs / num_outputs))
{
assert(delay != 0);
assert(num_inputs <= 32);
assert(num_outputs <= 32);
assert(num_inputs <= 64);
assert(num_outputs <= 64);
assert(num_inputs >= num_outputs);
// bypass mode
@ -592,76 +695,238 @@ public:
}
void reset() {
for (auto& cursor : cursors_) {
cursor = 0;
for (auto& grant : grants_) {
grant = 0;
}
}
void tick() {
uint32_t I = ReqIn.size();
uint32_t O = ReqOut.size();
uint32_t R = 1 << lg_num_reqs_;
uint32_t R = 1 << lg2_num_reqs_;
// skip bypass mode
if (I == O)
return;
// process outgoing responses
for (uint32_t o = 0; o < O; ++o) {
// process incoming responses
if (!RspOut.at(o).empty()) {
auto& rsp = RspOut.at(o).front();
uint32_t i = 0;
if (lg_num_reqs_ != 0) {
i = rsp.tag & (R-1);
rsp.tag >>= lg_num_reqs_;
auto& rsp_out = RspOut.at(o);
if (!rsp_out.empty()) {
auto& rsp = rsp_out.front();
uint32_t g = 0;
if (lg2_num_reqs_ != 0) {
g = rsp.tag & (R-1);
rsp.tag >>= lg2_num_reqs_;
}
DT(4, this->name() << " rsp" << o << ": " << rsp);
uint32_t j = o * R + i;
DT(4, this->name() << "-rsp" << o << ": " << rsp);
uint32_t j = o * R + g;
RspIn.at(j).push(rsp, 1);
RspOut.at(o).pop();
rsp_out.pop();
}
}
// process incoming requests
// process incoming requests
for (uint32_t o = 0; o < O; ++o) {
for (uint32_t r = 0; r < R; ++r) {
uint32_t i = (cursors_.at(o) + r) & (R-1);
uint32_t j = o * R + i;
uint32_t g = (grants_.at(o) + r) & (R-1);
uint32_t j = o * R + g;
if (j >= I)
continue;
auto& req_in = ReqIn.at(j);
if (!req_in.empty()) {
auto& req = req_in.front();
if (lg_num_reqs_ != 0) {
req.tag = (req.tag << lg_num_reqs_) | i;
if (lg2_num_reqs_ != 0) {
req.tag = (req.tag << lg2_num_reqs_) | g;
}
DT(4, this->name() << " req" << j << ": " << req);
DT(4, this->name() << "-req" << j << ": " << req);
ReqOut.at(o).push(req, delay_);
req_in.pop();
this->update_cursor(o, i);
this->update_grant(o, g);
break;
}
}
}
}
void update_cursor(uint32_t index, uint32_t grant) {
protected:
void update_grant(uint32_t index, uint32_t grant) {
if (type_ == ArbiterType::RoundRobin) {
cursors_.at(index) = grant + 1;
grants_.at(index) = grant + 1;
}
}
private:
ArbiterType type_;
uint32_t delay_;
std::vector<uint32_t> cursors_;
uint32_t lg_num_reqs_;
std::vector<uint32_t> grants_;
uint32_t lg2_num_reqs_;
};
using MemSwitch = Switch<MemReq, MemRsp>;
///////////////////////////////////////////////////////////////////////////////
class LocalMemDemux : public SimObject<LocalMemDemux> {
template <typename Req, typename Rsp>
class TxCrossBar : public SimObject<TxCrossBar<Req, Rsp>> {
public:
std::vector<SimPort<Req>> ReqIn;
std::vector<SimPort<Rsp>> RspIn;
std::vector<SimPort<Req>> ReqOut;
std::vector<SimPort<Rsp>> RspOut;
TxCrossBar(
const SimContext& ctx,
const char* name,
ArbiterType type,
uint32_t num_inputs,
uint32_t num_outputs = 1,
uint32_t addr_start = 0,
uint32_t delay = 1
)
: SimObject<TxCrossBar<Req, Rsp>>(ctx, name)
, ReqIn(num_inputs, this)
, RspIn(num_inputs, this)
, ReqOut(num_outputs, this)
, RspOut(num_outputs, this)
, type_(type)
, delay_(delay)
, req_grants_(num_outputs, 0)
, rsp_grants_(num_inputs, 0)
, lg2_inputs_(log2ceil(num_inputs))
, lg2_outputs_(log2ceil(num_outputs))
, addr_start_(addr_start)
, collisions_(0) {
assert(delay != 0);
assert(num_inputs <= 64);
assert(num_outputs <= 64);
assert(ispow2(num_inputs));
assert(ispow2(num_outputs));
}
void reset() {
for (auto& grant : req_grants_) {
grant = 0;
}
for (auto& grant : rsp_grants_) {
grant = 0;
}
}
void tick() {
uint32_t I = ReqIn.size();
uint32_t O = ReqOut.size();
uint32_t R = 1 << lg2_inputs_;
uint32_t T = 1 << lg2_outputs_;
// process outgoing responses
for (uint32_t i = 0; i < I; ++i) {
int32_t output_idx = -1;
for (uint32_t t = 0; t < T; ++t) {
uint32_t o = (rsp_grants_.at(i) + t) & (T-1);
if (o >= O)
continue;
auto& rsp_out = RspOut.at(o);
if (!rsp_out.empty()) {
auto& rsp = rsp_out.front();
// skip if response is not going to current input
uint32_t input_idx = 0;
if (lg2_inputs_ != 0) {
input_idx = rsp.tag & (R-1);
}
if (input_idx != i)
continue;
if (output_idx != -1) {
++collisions_;
continue;
}
output_idx = o;
}
}
if (output_idx != -1) {
auto& rsp_out = RspOut.at(output_idx);
auto& rsp = rsp_out.front();
uint32_t input_idx = 0;
if (lg2_inputs_ != 0) {
input_idx = rsp.tag & (R-1);
rsp.tag >>= lg2_inputs_;
}
DT(4, this->name() << "-rsp" << output_idx << ": " << rsp);
RspIn.at(input_idx).push(rsp, 1);
rsp_out.pop();
this->update_rsp_grant(i, output_idx);
}
}
// process incoming requests
for (uint32_t o = 0; o < O; ++o) {
int32_t input_idx = -1;
for (uint32_t r = 0; r < R; ++r) {
uint32_t i = (req_grants_.at(o) + r) & (R-1);
if (i >= I)
continue;
auto& req_in = ReqIn.at(i);
if (!req_in.empty()) {
auto& req = req_in.front();
// skip if request is not going to current output
uint32_t output_idx = 0;
if (O != 1) {
output_idx = (uint32_t)bit_getw(req.addr, addr_start_, lg2_outputs_-1);
}
if (output_idx != o)
continue;
if (input_idx != -1) {
++collisions_;
continue;
}
input_idx = i;
}
}
if (input_idx != -1) {
auto& req_in = ReqIn.at(input_idx);
auto& req = req_in.front();
if (lg2_inputs_ != 0) {
req.tag = (req.tag << lg2_inputs_) | input_idx;
}
DT(4, this->name() << "-req" << input_idx << ": " << req);
ReqOut.at(o).push(req, delay_);
req_in.pop();
this->update_req_grant(o, input_idx);
}
}
}
uint64_t collisions() const {
return collisions_;
}
protected:
void update_req_grant(uint32_t index, uint32_t grant) {
if (type_ == ArbiterType::RoundRobin) {
req_grants_.at(index) = grant + 1;
}
}
void update_rsp_grant(uint32_t index, uint32_t grant) {
if (type_ == ArbiterType::RoundRobin) {
rsp_grants_.at(index) = grant + 1;
}
}
ArbiterType type_;
uint32_t delay_;
std::vector<uint32_t> req_grants_;
std::vector<uint32_t> rsp_grants_;
uint32_t lg2_inputs_;
uint32_t lg2_outputs_;
uint32_t addr_start_;
uint64_t collisions_;
};
///////////////////////////////////////////////////////////////////////////////
class LocalMemSwitch : public SimObject<LocalMemSwitch> {
public:
SimPort<LsuReq> ReqIn;
SimPort<LsuRsp> RspIn;
@ -672,7 +937,7 @@ public:
SimPort<LsuReq> ReqDC;
SimPort<LsuRsp> RspDC;
LocalMemDemux(
LocalMemSwitch(
const SimContext& ctx,
const char* name,
uint32_t delay
@ -711,4 +976,7 @@ private:
uint32_t delay_;
};
using MemArbiter = TxArbiter<MemReq, MemRsp>;
using MemCrossBar = TxCrossBar<MemReq, MemRsp>;
}

View file

@ -142,8 +142,8 @@ public:
if (future_.valid()) {
future_.wait();
}
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
delete mem_alloc_[i];
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
delete mem_alloc_[b];
}
if (ram_) {
delete ram_;
@ -187,8 +187,8 @@ public:
MP_M_AXI_MEM(PLATFORM_MEMORY_BANKS);
// initialize memory allocator
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
mem_alloc_[i] = new MemoryAllocator(0, mem_bank_size_, 4096, 64);
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
mem_alloc_[b] = new MemoryAllocator(0, mem_bank_size_, 4096, 64);
}
// reset the device
@ -257,8 +257,9 @@ public:
//printf("%0ld: [sim] register_write: address=0x%x\n", timestamp, offset);
device_->s_axi_ctrl_awvalid = 1;
device_->s_axi_ctrl_awaddr = offset;
while (!device_->s_axi_ctrl_awready)
while (!device_->s_axi_ctrl_awready) {
this->tick();
}
this->tick();
device_->s_axi_ctrl_awvalid = 0;
@ -267,8 +268,9 @@ public:
device_->s_axi_ctrl_wvalid = 1;
device_->s_axi_ctrl_wdata = value;
device_->s_axi_ctrl_wstrb = 0xf;
while (!device_->s_axi_ctrl_wready)
while (!device_->s_axi_ctrl_wready) {
this->tick();
}
this->tick();
device_->s_axi_ctrl_wvalid = 0;
@ -290,8 +292,9 @@ public:
//printf("%0ld: [sim] register_read: address=0x%x\n", timestamp, offset);
device_->s_axi_ctrl_arvalid = 1;
device_->s_axi_ctrl_araddr = offset;
while (!device_->s_axi_ctrl_arready)
while (!device_->s_axi_ctrl_arready) {
this->tick();
}
this->tick();
device_->s_axi_ctrl_arvalid = 0;
@ -318,9 +321,9 @@ private:
reqs.clear();
}
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
std::queue<mem_req_t*> empty;
std::swap(dram_queues_[i], empty);
std::swap(dram_queues_[b], empty);
}
device_->ap_rst_n = 0;
@ -335,10 +338,10 @@ private:
device_->ap_rst_n = 1;
// this AXI device is always ready to accept new requests
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
*m_axi_mem_[i].arready = 1;
*m_axi_mem_[i].awready = 1;
*m_axi_mem_[i].wready = 1;
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
*m_axi_mem_[b].arready = 1;
*m_axi_mem_[b].awready = 1;
*m_axi_mem_[b].wready = 1;
}
}
@ -355,10 +358,10 @@ private:
dram_sim_.tick();
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
if (!dram_queues_[i].empty()) {
auto mem_req = dram_queues_[i].front();
if (dram_sim_.send_request(mem_req->write, mem_req->addr, i, [](void* arg) {
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
if (!dram_queues_[b].empty()) {
auto mem_req = dram_queues_[b].front();
if (dram_sim_.send_request(mem_req->write, mem_req->addr, b, [](void* arg) {
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
if (orig_req->ready) {
delete orig_req;
@ -366,7 +369,7 @@ private:
orig_req->ready = true;
}
}, mem_req)) {
dram_queues_[i].pop();
dram_queues_[b].pop();
}
}
}