mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'rtl_cache'
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
This commit is contained in:
commit
5844de8c4d
81 changed files with 3085 additions and 2381 deletions
|
@ -163,8 +163,9 @@ cache()
|
|||
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache ways
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
CONFIGS="-DICACHE_NUM_WAYS=1 -DDCACHE_NUM_WAYS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache banking
|
||||
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
|
@ -174,11 +175,17 @@ cache()
|
|||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# replacement policy
|
||||
CONFIGS="-DDCACHE_REPL_POLICY=0" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_REPL_POLICY=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_REPL_POLICY=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
|
||||
# test writeback
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=0 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
|
||||
# cache clustering
|
||||
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=4 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=4 --warps=1 --threads=2
|
||||
|
|
8
configure
vendored
8
configure
vendored
|
@ -65,7 +65,7 @@ copy_files() {
|
|||
filename_no_ext="${filename%.in}"
|
||||
dest_file="$dest_dir/$filename_no_ext"
|
||||
mkdir -p "$dest_dir"
|
||||
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@CURRENTDIR@|$CURRENT_DIR|g" "$file" > "$dest_file"
|
||||
sed "s|@VORTEX_HOME@|$SOURCE_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@BUILDDIR@|$CURRENT_DIR|g" "$file" > "$dest_file"
|
||||
# apply permissions to bash scripts
|
||||
read -r firstline < "$dest_file"
|
||||
if [[ "$firstline" =~ ^#!.*bash ]]; then
|
||||
|
@ -169,8 +169,8 @@ fi
|
|||
SUBDIRS=("." "!ci" "!perf" "hw*" "kernel*" "runtime*" "sim*" "tests*")
|
||||
|
||||
# Get the directory of the script
|
||||
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
SOURCE_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
|
||||
THIRD_PARTY_DIR=$SCRIPT_DIR/third_party
|
||||
THIRD_PARTY_DIR=$SOURCE_DIR/third_party
|
||||
|
||||
copy_files "$SCRIPT_DIR" "$CURRENT_DIR"
|
||||
copy_files "$SOURCE_DIR" "$CURRENT_DIR"
|
||||
|
|
|
@ -67,7 +67,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_gbar_unit #(
|
||||
.INSTANCE_ID ($sformatf("gbar%0d", CLUSTER_ID))
|
||||
.INSTANCE_ID (`SFORMATF(("gbar%0d", CLUSTER_ID)))
|
||||
) gbar_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -84,7 +84,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (l2_reset, reset);
|
||||
|
||||
VX_cache_wrap #(
|
||||
.INSTANCE_ID ($sformatf("%s-l2cache", INSTANCE_ID)),
|
||||
.INSTANCE_ID (`SFORMATF(("%s-l2cache", INSTANCE_ID))),
|
||||
.CACHE_SIZE (`L2_CACHE_SIZE),
|
||||
.LINE_SIZE (`L2_LINE_SIZE),
|
||||
.NUM_BANKS (`L2_NUM_BANKS),
|
||||
|
@ -98,8 +98,10 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (L2_TAG_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`L2_WRITEBACK),
|
||||
.DIRTY_BYTES (`L2_WRITEBACK),
|
||||
.DIRTY_BYTES (`L2_DIRTYBYTES),
|
||||
.REPL_POLICY (`L2_REPL_POLICY),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
|
||||
.CORE_OUT_BUF (3),
|
||||
.MEM_OUT_BUF (3),
|
||||
.NC_ENABLE (1),
|
||||
|
@ -129,7 +131,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_socket #(
|
||||
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id),
|
||||
.INSTANCE_ID ($sformatf("%s-socket%0d", INSTANCE_ID, socket_id))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-socket%0d", INSTANCE_ID, socket_id)))
|
||||
) socket (
|
||||
`SCOPE_IO_BIND (scope_socket+socket_id)
|
||||
|
||||
|
@ -152,6 +154,6 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
);
|
||||
end
|
||||
|
||||
`BUFFER_EX(busy, (| per_socket_busy), 1'b1, (`NUM_SOCKETS > 1));
|
||||
`BUFFER_EX(busy, (| per_socket_busy), 1'b1, 1, (`NUM_SOCKETS > 1));
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -170,6 +170,10 @@
|
|||
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`endif
|
||||
|
||||
`ifndef MEMORY_BANKS
|
||||
`define MEMORY_BANKS 2
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
|
||||
`ifndef STACK_BASE_ADDR
|
||||
|
@ -566,7 +570,12 @@
|
|||
|
||||
// Number of Associative Ways
|
||||
`ifndef ICACHE_NUM_WAYS
|
||||
`define ICACHE_NUM_WAYS 1
|
||||
`define ICACHE_NUM_WAYS 4
|
||||
`endif
|
||||
|
||||
// Replacement Policy
|
||||
`ifndef ICACHE_REPL_POLICY
|
||||
`define ICACHE_REPL_POLICY 1
|
||||
`endif
|
||||
|
||||
// Dcache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
@ -615,12 +624,12 @@
|
|||
|
||||
// Memory Response Queue Size
|
||||
`ifndef DCACHE_MRSQ_SIZE
|
||||
`define DCACHE_MRSQ_SIZE 0
|
||||
`define DCACHE_MRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Number of Associative Ways
|
||||
`ifndef DCACHE_NUM_WAYS
|
||||
`define DCACHE_NUM_WAYS 1
|
||||
`define DCACHE_NUM_WAYS 4
|
||||
`endif
|
||||
|
||||
// Enable Cache Writeback
|
||||
|
@ -628,6 +637,16 @@
|
|||
`define DCACHE_WRITEBACK 0
|
||||
`endif
|
||||
|
||||
// Enable Cache Dirty bytes
|
||||
`ifndef DCACHE_DIRTYBYTES
|
||||
`define DCACHE_DIRTYBYTES `DCACHE_WRITEBACK
|
||||
`endif
|
||||
|
||||
// Replacement Policy
|
||||
`ifndef DCACHE_REPL_POLICY
|
||||
`define DCACHE_REPL_POLICY 1
|
||||
`endif
|
||||
|
||||
// LMEM Configurable Knobs ////////////////////////////////////////////////////
|
||||
|
||||
`ifndef LMEM_DISABLE
|
||||
|
@ -650,12 +669,8 @@
|
|||
|
||||
// Cache Size
|
||||
`ifndef L2_CACHE_SIZE
|
||||
`ifdef ALTERA_S10
|
||||
`define L2_CACHE_SIZE 2097152
|
||||
`else
|
||||
`define L2_CACHE_SIZE 1048576
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// Number of Banks
|
||||
`ifndef L2_NUM_BANKS
|
||||
|
@ -679,12 +694,12 @@
|
|||
|
||||
// Memory Response Queue Size
|
||||
`ifndef L2_MRSQ_SIZE
|
||||
`define L2_MRSQ_SIZE 0
|
||||
`define L2_MRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Number of Associative Ways
|
||||
`ifndef L2_NUM_WAYS
|
||||
`define L2_NUM_WAYS 2
|
||||
`define L2_NUM_WAYS 8
|
||||
`endif
|
||||
|
||||
// Enable Cache Writeback
|
||||
|
@ -692,15 +707,21 @@
|
|||
`define L2_WRITEBACK 0
|
||||
`endif
|
||||
|
||||
// Enable Cache Dirty bytes
|
||||
`ifndef L2_DIRTYBYTES
|
||||
`define L2_DIRTYBYTES `L2_WRITEBACK
|
||||
`endif
|
||||
|
||||
// Replacement Policy
|
||||
`ifndef L2_REPL_POLICY
|
||||
`define L2_REPL_POLICY 1
|
||||
`endif
|
||||
|
||||
// L3cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Cache Size
|
||||
`ifndef L3_CACHE_SIZE
|
||||
`ifdef ALTERA_S10
|
||||
`define L3_CACHE_SIZE 2097152
|
||||
`else
|
||||
`define L3_CACHE_SIZE 1048576
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// Number of Banks
|
||||
|
@ -725,12 +746,12 @@
|
|||
|
||||
// Memory Response Queue Size
|
||||
`ifndef L3_MRSQ_SIZE
|
||||
`define L3_MRSQ_SIZE 0
|
||||
`define L3_MRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Number of Associative Ways
|
||||
`ifndef L3_NUM_WAYS
|
||||
`define L3_NUM_WAYS 4
|
||||
`define L3_NUM_WAYS 8
|
||||
`endif
|
||||
|
||||
// Enable Cache Writeback
|
||||
|
@ -738,8 +759,14 @@
|
|||
`define L3_WRITEBACK 0
|
||||
`endif
|
||||
|
||||
`ifndef MEMORY_BANKS
|
||||
`define MEMORY_BANKS 2
|
||||
// Enable Cache Dirty bytes
|
||||
`ifndef L3_DIRTYBYTES
|
||||
`define L3_DIRTYBYTES `L3_WRITEBACK
|
||||
`endif
|
||||
|
||||
// Replacement Policy
|
||||
`ifndef L3_REPL_POLICY
|
||||
`define L3_REPL_POLICY 1
|
||||
`endif
|
||||
|
||||
// Number of Memory Ports from LLC
|
||||
|
|
|
@ -335,10 +335,10 @@
|
|||
.data_out (dst) \
|
||||
)
|
||||
|
||||
`define BUFFER_EX(dst, src, ena, latency) \
|
||||
`define BUFFER_EX(dst, src, ena, RSTW, latency) \
|
||||
VX_pipe_register #( \
|
||||
.DATAW ($bits(dst)), \
|
||||
.RESETW ($bits(dst)), \
|
||||
.RESETW (RSTW), \
|
||||
.DEPTH (latency) \
|
||||
) __``dst``__ ( \
|
||||
.clk (clk), \
|
||||
|
@ -348,7 +348,7 @@
|
|||
.data_out (dst) \
|
||||
)
|
||||
|
||||
`define BUFFER(dst, src) `BUFFER_EX(dst, src, 1'b1, 1)
|
||||
`define BUFFER(dst, src) `BUFFER_EX(dst, src, 1'b1, 0, 1)
|
||||
|
||||
`define POP_COUNT_EX(out, in, model) \
|
||||
VX_popcount #( \
|
||||
|
|
|
@ -37,16 +37,13 @@ endgenerate
|
|||
`define ASSERT(cond, msg) \
|
||||
assert(cond) else $error msg
|
||||
|
||||
`define RUNTIME_ASSERT(cond, msg) \
|
||||
always @(posedge clk) begin \
|
||||
assert(cond) else $error msg; \
|
||||
`define RUNTIME_ASSERT(cond, msg) \
|
||||
always @(posedge clk) begin \
|
||||
if (!reset) begin \
|
||||
`ASSERT(cond, msg); \
|
||||
end \
|
||||
end
|
||||
|
||||
`define __SCOPE
|
||||
`define __SCOPE_X
|
||||
`define __SCOPE_ON
|
||||
`define __SCOPE_OFF
|
||||
|
||||
`ifndef TRACING_ALL
|
||||
`define TRACING_ON /* verilator tracing_on */
|
||||
`define TRACING_OFF /* verilator tracing_off */
|
||||
|
@ -128,6 +125,8 @@ endgenerate
|
|||
end
|
||||
`endif
|
||||
|
||||
`define SFORMATF(x) $sformatf x
|
||||
|
||||
`else // SYNTHESIS
|
||||
|
||||
`define STATIC_ASSERT(cond, msg)
|
||||
|
@ -137,6 +136,7 @@ endgenerate
|
|||
|
||||
`define DEBUG_BLOCK(x)
|
||||
`define TRACE(level, args)
|
||||
`define SFORMATF(x) ""
|
||||
|
||||
`define TRACING_ON
|
||||
`define TRACING_OFF
|
||||
|
@ -153,45 +153,39 @@ endgenerate
|
|||
`define UNUSED_PIN(x) . x ()
|
||||
`define UNUSED_ARG(x) x
|
||||
|
||||
`define __SCOPE (* mark_debug="true" *)
|
||||
|
||||
`define __SCOPE_X
|
||||
|
||||
`define __SCOPE_ON \
|
||||
`undef __SCOPE_X \
|
||||
`define __SCOPE_X `__SCOPE
|
||||
|
||||
`define __SCOPE_OFF \
|
||||
`undef __SCOPE_X \
|
||||
`define __SCOPE_X
|
||||
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef QUARTUS
|
||||
`define MAX_FANOUT 8
|
||||
`define IF_DATA_SIZE(x) $bits(x.data)
|
||||
`define MAX_LUTRAM 1024
|
||||
`define USE_BLOCK_BRAM (* ramstyle = "block" *)
|
||||
`define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *)
|
||||
`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *)
|
||||
`define DISABLE_BRAM (* ramstyle = "logic" *)
|
||||
`define PRESERVE_NET (* preserve *)
|
||||
`define BLACKBOX_CELL (* black_box *)
|
||||
`define STRING string
|
||||
`elsif VIVADO
|
||||
`define MAX_FANOUT 8
|
||||
`define IF_DATA_SIZE(x) $bits(x.data)
|
||||
`define MAX_LUTRAM 1024
|
||||
`define USE_BLOCK_BRAM (* ram_style = "block" *)
|
||||
`define USE_FAST_BRAM (* ram_style = "distributed" *)
|
||||
`define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *)
|
||||
`define DISABLE_BRAM (* ram_style = "registers" *)
|
||||
`define PRESERVE_NET (* keep = "true" *)
|
||||
`define BLACKBOX_CELL (* black_box *)
|
||||
`define STRING
|
||||
`else
|
||||
`define MAX_FANOUT 8
|
||||
`define IF_DATA_SIZE(x) x.DATA_WIDTH
|
||||
`define MAX_LUTRAM 1024
|
||||
`define USE_BLOCK_BRAM
|
||||
`define USE_FAST_BRAM
|
||||
`define NO_RW_RAM_CHECK
|
||||
`define DISABLE_BRAM
|
||||
`define PRESERVE_NET
|
||||
`define BLACKBOX_CELL
|
||||
`define STRING string
|
||||
`endif
|
||||
|
||||
|
@ -217,7 +211,7 @@ endgenerate
|
|||
|
||||
`define CLAMP(x, lo, hi) (((x) > (hi)) ? (hi) : (((x) < (lo)) ? (lo) : (x)))
|
||||
|
||||
`define UP(x) (((x) != 0) ? (x) : 1)
|
||||
`define UP(x) (((x) > 0) ? (x) : 1)
|
||||
|
||||
`define CDIV(n,d) ((n + d - 1) / (d))
|
||||
|
||||
|
|
|
@ -85,7 +85,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (icache_reset, reset);
|
||||
|
||||
VX_cache_cluster #(
|
||||
.INSTANCE_ID ($sformatf("%s-icache", INSTANCE_ID)),
|
||||
.INSTANCE_ID (`SFORMATF(("%s-icache", INSTANCE_ID))),
|
||||
.NUM_UNITS (`NUM_ICACHES),
|
||||
.NUM_INPUTS (`SOCKET_SIZE),
|
||||
.TAG_SEL_IDX (0),
|
||||
|
@ -100,8 +100,10 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
.MRSQ_SIZE (`ICACHE_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`ICACHE_MREQ_SIZE),
|
||||
.TAG_WIDTH (ICACHE_TAG_WIDTH),
|
||||
.FLAGS_WIDTH (0),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.WRITE_ENABLE (0),
|
||||
.REPL_POLICY (`ICACHE_REPL_POLICY),
|
||||
.NC_ENABLE (0),
|
||||
.CORE_OUT_BUF (3),
|
||||
.MEM_OUT_BUF (2)
|
||||
|
@ -130,7 +132,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (dcache_reset, reset);
|
||||
|
||||
VX_cache_cluster #(
|
||||
.INSTANCE_ID ($sformatf("%s-dcache", INSTANCE_ID)),
|
||||
.INSTANCE_ID (`SFORMATF(("%s-dcache", INSTANCE_ID))),
|
||||
.NUM_UNITS (`NUM_DCACHES),
|
||||
.NUM_INPUTS (`SOCKET_SIZE),
|
||||
.TAG_SEL_IDX (0),
|
||||
|
@ -146,9 +148,11 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
.MREQ_SIZE (`DCACHE_WRITEBACK ? `DCACHE_MSHR_SIZE : `DCACHE_MREQ_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`DCACHE_WRITEBACK),
|
||||
.DIRTY_BYTES (`DCACHE_WRITEBACK),
|
||||
.DIRTY_BYTES (`DCACHE_DIRTYBYTES),
|
||||
.REPL_POLICY (`DCACHE_REPL_POLICY),
|
||||
.NC_ENABLE (1),
|
||||
.CORE_OUT_BUF (3),
|
||||
.MEM_OUT_BUF (2)
|
||||
|
@ -208,7 +212,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_core #(
|
||||
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id),
|
||||
.INSTANCE_ID ($sformatf("%s-core%0d", INSTANCE_ID, core_id))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-core%0d", INSTANCE_ID, core_id)))
|
||||
) core (
|
||||
`SCOPE_IO_BIND (scope_core + core_id)
|
||||
|
||||
|
@ -233,6 +237,6 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
);
|
||||
end
|
||||
|
||||
`BUFFER_EX(busy, (| per_core_busy), 1'b1, (`SOCKET_SIZE > 1));
|
||||
`BUFFER_EX(busy, (| per_core_busy), 1'b1, 1, (`SOCKET_SIZE > 1));
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -84,8 +84,10 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
.TAG_WIDTH (L2_MEM_TAG_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`L3_WRITEBACK),
|
||||
.DIRTY_BYTES (`L3_WRITEBACK),
|
||||
.DIRTY_BYTES (`L3_DIRTYBYTES),
|
||||
.REPL_POLICY (`L3_REPL_POLICY),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
|
||||
.CORE_OUT_BUF (3),
|
||||
.MEM_OUT_BUF (3),
|
||||
.NC_ENABLE (1),
|
||||
|
@ -138,7 +140,7 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
|
||||
VX_cluster #(
|
||||
.CLUSTER_ID (cluster_id),
|
||||
.INSTANCE_ID ($sformatf("cluster%0d", cluster_id))
|
||||
.INSTANCE_ID (`SFORMATF(("cluster%0d", cluster_id)))
|
||||
) cluster (
|
||||
`SCOPE_IO_BIND (scope_cluster + cluster_id)
|
||||
|
||||
|
@ -157,7 +159,7 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
);
|
||||
end
|
||||
|
||||
`BUFFER_EX(busy, (| per_cluster_busy), 1'b1, (`NUM_CLUSTERS > 1));
|
||||
`BUFFER_EX(busy, (| per_cluster_busy), 1'b1, 1, (`NUM_CLUSTERS > 1));
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
|
@ -202,13 +204,13 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
always @(posedge clk) begin
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw) begin
|
||||
`TRACE(1, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data, mem_req_uuid))
|
||||
`TRACE(2, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data, mem_req_uuid))
|
||||
end else begin
|
||||
`TRACE(1, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_uuid))
|
||||
`TRACE(2, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_uuid))
|
||||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, mem_rsp_tag, mem_rsp_data, mem_rsp_uuid))
|
||||
`TRACE(2, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, mem_rsp_tag, mem_rsp_data, mem_rsp_uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -968,7 +968,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
|
||||
wire [COUT_TID_WIDTH-1:0] cout_tid;
|
||||
|
||||
VX_encoder #(
|
||||
VX_onehot_encoder #(
|
||||
.N (`VX_MEM_BYTEEN_WIDTH)
|
||||
) cout_tid_enc (
|
||||
.data_in (vx_mem_req_byteen),
|
||||
|
|
|
@ -373,7 +373,9 @@ module VX_afu_wrap #(
|
|||
`SCOPE_IO_UNUSED(0)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef CHIPSCOPE
|
||||
`ifdef DBG_SCOPE_AFU
|
||||
ila_afu ila_afu_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({
|
||||
|
@ -394,6 +396,7 @@ module VX_afu_wrap #(
|
|||
})
|
||||
);
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef SIMULATION
|
||||
`ifndef VERILATOR
|
||||
|
|
54
hw/rtl/cache/VX_bank_flush.sv
vendored
54
hw/rtl/cache/VX_bank_flush.sv
vendored
|
@ -33,7 +33,7 @@ module VX_bank_flush #(
|
|||
output wire flush_init,
|
||||
output wire flush_valid,
|
||||
output wire [`CS_LINE_SEL_BITS-1:0] flush_line,
|
||||
output wire [NUM_WAYS-1:0] flush_way,
|
||||
output wire [`CS_WAY_SEL_WIDTH-1:0] flush_way,
|
||||
input wire flush_ready,
|
||||
input wire mshr_empty,
|
||||
input wire bank_empty
|
||||
|
@ -48,20 +48,21 @@ module VX_bank_flush #(
|
|||
localparam STATE_WAIT2 = 4;
|
||||
localparam STATE_DONE = 5;
|
||||
|
||||
reg [2:0] state_r, state_n;
|
||||
reg [2:0] state, state_n;
|
||||
|
||||
reg [CTR_WIDTH-1:0] counter_r;
|
||||
reg [CTR_WIDTH-1:0] counter;
|
||||
|
||||
always @(*) begin
|
||||
state_n = state_r;
|
||||
case (state_r)
|
||||
STATE_IDLE: begin
|
||||
state_n = state;
|
||||
case (state)
|
||||
//STATE_IDLE:
|
||||
default : begin
|
||||
if (flush_begin) begin
|
||||
state_n = STATE_WAIT1;
|
||||
end
|
||||
end
|
||||
STATE_INIT: begin
|
||||
if (counter_r == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
|
||||
if (counter == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
|
@ -72,7 +73,7 @@ module VX_bank_flush #(
|
|||
end
|
||||
end
|
||||
STATE_FLUSH: begin
|
||||
if (counter_r == ((2 ** CTR_WIDTH)-1) && flush_ready) begin
|
||||
if (counter == ((2 ** CTR_WIDTH)-1) && flush_ready) begin
|
||||
state_n = (BANK_ID == 0) ? STATE_DONE : STATE_WAIT2;
|
||||
end
|
||||
end
|
||||
|
@ -93,37 +94,30 @@ module VX_bank_flush #(
|
|||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state_r <= STATE_INIT;
|
||||
counter_r <= '0;
|
||||
state <= STATE_INIT;
|
||||
counter <= '0;
|
||||
end else begin
|
||||
state_r <= state_n;
|
||||
if (state_r != STATE_IDLE) begin
|
||||
if ((state_r == STATE_INIT)
|
||||
|| ((state_r == STATE_FLUSH) && flush_ready)) begin
|
||||
counter_r <= counter_r + CTR_WIDTH'(1);
|
||||
state <= state_n;
|
||||
if (state != STATE_IDLE) begin
|
||||
if ((state == STATE_INIT)
|
||||
|| ((state == STATE_FLUSH) && flush_ready)) begin
|
||||
counter <= counter + CTR_WIDTH'(1);
|
||||
end
|
||||
end else begin
|
||||
counter_r <= '0;
|
||||
counter <= '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign flush_end = (state_r == STATE_DONE);
|
||||
assign flush_init = (state_r == STATE_INIT);
|
||||
assign flush_valid = (state_r == STATE_FLUSH);
|
||||
assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0];
|
||||
assign flush_end = (state == STATE_DONE);
|
||||
assign flush_init = (state == STATE_INIT);
|
||||
assign flush_valid = (state == STATE_FLUSH);
|
||||
assign flush_line = counter[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way
|
||||
VX_decoder #(
|
||||
.N (`CS_WAY_SEL_BITS),
|
||||
.D (NUM_WAYS)
|
||||
) ctr_decoder (
|
||||
.data_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]),
|
||||
.valid_in (1'b1),
|
||||
.data_out (flush_way)
|
||||
);
|
||||
if (WRITEBACK && (NUM_WAYS > 1)) begin : g_flush_way
|
||||
assign flush_way = counter[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS];
|
||||
end else begin : g_flush_way_all
|
||||
assign flush_way = {NUM_WAYS{1'b1}};
|
||||
assign flush_way = '0;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
83
hw/rtl/cache/VX_cache.sv
vendored
83
hw/rtl/cache/VX_cache.sv
vendored
|
@ -20,22 +20,22 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
parameter NUM_REQS = 4,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 4096,
|
||||
parameter CACHE_SIZE = 32768,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter NUM_BANKS = 4,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
parameter NUM_WAYS = 4,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = `XLEN/8,
|
||||
parameter WORD_SIZE = 16,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
parameter CRSQ_SIZE = 4,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
parameter MSHR_SIZE = 16,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
parameter MRSQ_SIZE = 4,
|
||||
// Memory Request Queue Size
|
||||
parameter MREQ_SIZE = 4,
|
||||
|
||||
|
@ -48,17 +48,23 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Replacement policy
|
||||
parameter REPL_POLICY = `CS_REPL_CYCLIC,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// core request flags
|
||||
parameter FLAGS_WIDTH = 0,
|
||||
|
||||
// Core response output register
|
||||
parameter CORE_OUT_BUF = 0,
|
||||
parameter CORE_OUT_BUF = 3,
|
||||
|
||||
// Memory request output register
|
||||
parameter MEM_OUT_BUF = 0
|
||||
parameter MEM_OUT_BUF = 3
|
||||
) (
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
|
@ -76,10 +82,6 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
`STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter: writeback requires write enable"))
|
||||
`STATIC_ASSERT(WRITEBACK || !DIRTY_BYTES, ("invalid parameter: dirty bytes require writeback"))
|
||||
|
||||
// In writeback mode, memory fill response may issue a new memory request to handle evicted blocks.
|
||||
// We need to ensure that the memory request queue never fills up to avoid deadlock.
|
||||
`STATIC_ASSERT(!WRITEBACK || (MREQ_SIZE >= MSHR_SIZE), ("invalid parameter: writeback requires MREQ_SIZE >= MSHR_SIZE"))
|
||||
|
||||
localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS);
|
||||
localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS);
|
||||
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
|
||||
|
@ -90,7 +92,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS);
|
||||
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1;
|
||||
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + `UP(FLAGS_WIDTH);
|
||||
localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
|
||||
localparam BANK_MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH;
|
||||
|
||||
|
@ -206,13 +208,13 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [LINE_SIZE-1:0] mem_req_byteen;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire mem_req_flush;
|
||||
wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flags;
|
||||
wire mem_req_ready;
|
||||
|
||||
wire mem_req_flush_b;
|
||||
wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flush_b;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)),
|
||||
.SIZE (MEM_REQ_REG_DISABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
|
@ -220,13 +222,18 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.reset (reset),
|
||||
.valid_in (mem_req_valid),
|
||||
.ready_in (mem_req_ready),
|
||||
.data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flush}),
|
||||
.data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flags}),
|
||||
.data_out ({mem_bus_tmp_if.req_data.rw, mem_bus_tmp_if.req_data.byteen, mem_bus_tmp_if.req_data.addr, mem_bus_tmp_if.req_data.data, mem_bus_tmp_if.req_data.tag, mem_req_flush_b}),
|
||||
.valid_out (mem_bus_tmp_if.req_valid),
|
||||
.ready_out (mem_bus_tmp_if.req_ready)
|
||||
);
|
||||
|
||||
assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0;
|
||||
if (FLAGS_WIDTH != 0) begin : g_mem_req_flags
|
||||
assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b;
|
||||
end else begin : g_no_mem_req_flags
|
||||
assign mem_bus_tmp_if.req_data.flags = '0;
|
||||
`UNUSED_VAR (mem_req_flush_b)
|
||||
end
|
||||
|
||||
if (WRITE_ENABLE) begin : g_mem_bus_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if);
|
||||
|
@ -244,7 +251,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data;
|
||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_flush;
|
||||
wire [NUM_BANKS-1:0][`UP(FLAGS_WIDTH)-1:0] per_bank_core_req_flags;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
|
||||
|
@ -259,7 +266,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data;
|
||||
wire [NUM_BANKS-1:0][BANK_MEM_TAG_WIDTH-1:0] per_bank_mem_req_tag;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_flush;
|
||||
wire [NUM_BANKS-1:0][`UP(FLAGS_WIDTH)-1:0] per_bank_mem_req_flags;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
|
||||
|
@ -276,7 +283,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
|
||||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
|
||||
wire [NUM_REQS-1:0] core_req_flush;
|
||||
wire [NUM_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] core_req_flags;
|
||||
wire [NUM_REQS-1:0] core_req_ready;
|
||||
|
||||
wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr;
|
||||
|
@ -293,7 +300,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
assign core_req_addr[i] = core_bus2_if[i].req_data.addr;
|
||||
assign core_req_data[i] = core_bus2_if[i].req_data.data;
|
||||
assign core_req_tag[i] = core_bus2_if[i].req_data.tag;
|
||||
assign core_req_flush[i] = core_bus2_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH];
|
||||
assign core_req_flags[i] = `UP(FLAGS_WIDTH)'(core_bus2_if[i].req_data.flags);
|
||||
assign core_bus2_if[i].req_ready = core_req_ready[i];
|
||||
end
|
||||
|
||||
|
@ -325,7 +332,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
core_req_byteen[i],
|
||||
core_req_data[i],
|
||||
core_req_tag[i],
|
||||
core_req_flush[i]
|
||||
core_req_flags[i]
|
||||
};
|
||||
end
|
||||
|
||||
|
@ -366,7 +373,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
per_bank_core_req_byteen[i],
|
||||
per_bank_core_req_data[i],
|
||||
per_bank_core_req_tag[i],
|
||||
per_bank_core_req_flush[i]
|
||||
per_bank_core_req_flags[i]
|
||||
} = core_req_data_out[i];
|
||||
end
|
||||
|
||||
|
@ -378,23 +385,25 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_cache_bank #(
|
||||
.BANK_ID (bank_id),
|
||||
.INSTANCE_ID ($sformatf("%s-bank%0d", INSTANCE_ID, bank_id)),
|
||||
.INSTANCE_ID (`SFORMATF(("%s-bank%0d", INSTANCE_ID, bank_id))),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.REPL_POLICY (REPL_POLICY),
|
||||
.CRSQ_SIZE (CRSQ_SIZE),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(CORE_OUT_BUF)),
|
||||
.MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
.FLAGS_WIDTH (FLAGS_WIDTH),
|
||||
.CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : 1),
|
||||
.MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : 1)
|
||||
) bank (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -414,7 +423,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.core_req_data (per_bank_core_req_data[bank_id]),
|
||||
.core_req_tag (per_bank_core_req_tag[bank_id]),
|
||||
.core_req_idx (per_bank_core_req_idx[bank_id]),
|
||||
.core_req_flush (per_bank_core_req_flush[bank_id]),
|
||||
.core_req_flags (per_bank_core_req_flags[bank_id]),
|
||||
.core_req_ready (per_bank_core_req_ready[bank_id]),
|
||||
|
||||
// Core response
|
||||
|
@ -431,7 +440,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.mem_req_byteen (per_bank_mem_req_byteen[bank_id]),
|
||||
.mem_req_data (per_bank_mem_req_data[bank_id]),
|
||||
.mem_req_tag (per_bank_mem_req_tag[bank_id]),
|
||||
.mem_req_flush (per_bank_mem_req_flush[bank_id]),
|
||||
.mem_req_flags (per_bank_mem_req_flags[bank_id]),
|
||||
.mem_req_ready (per_bank_mem_req_ready[bank_id]),
|
||||
|
||||
// Memory response
|
||||
|
@ -487,7 +496,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
// Memory request arbitration
|
||||
|
||||
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1)-1:0] data_in;
|
||||
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + `UP(FLAGS_WIDTH))-1:0] data_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_data_in
|
||||
assign data_in[i] = {
|
||||
|
@ -496,7 +505,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
per_bank_mem_req_byteen[i],
|
||||
per_bank_mem_req_data[i],
|
||||
per_bank_mem_req_tag[i],
|
||||
per_bank_mem_req_flush[i]
|
||||
per_bank_mem_req_flags[i]
|
||||
};
|
||||
end
|
||||
|
||||
|
@ -504,7 +513,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1),
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)),
|
||||
.ARBITER ("R")
|
||||
) mem_req_arb (
|
||||
.clk (clk),
|
||||
|
@ -512,7 +521,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.valid_in (per_bank_mem_req_valid),
|
||||
.ready_in (per_bank_mem_req_ready),
|
||||
.data_in (data_in),
|
||||
.data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flush}),
|
||||
.data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flags}),
|
||||
.valid_out (mem_req_valid),
|
||||
.ready_out (mem_req_ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
|
|
533
hw/rtl/cache/VX_cache_bank.sv
vendored
533
hw/rtl/cache/VX_cache_bank.sv
vendored
|
@ -47,12 +47,18 @@ module VX_cache_bank #(
|
|||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Replacement policy
|
||||
parameter REPL_POLICY = `CS_REPL_CYCLIC,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// core request flags
|
||||
parameter FLAGS_WIDTH = 0,
|
||||
|
||||
// Core response output register
|
||||
parameter CORE_OUT_REG = 0,
|
||||
|
||||
|
@ -82,7 +88,7 @@ module VX_cache_bank #(
|
|||
input wire [`CS_WORD_WIDTH-1:0] core_req_data, // data to be written
|
||||
input wire [TAG_WIDTH-1:0] core_req_tag, // identifier of the request (request id)
|
||||
input wire [REQ_SEL_WIDTH-1:0] core_req_idx, // index of the request in the core request array
|
||||
input wire core_req_flush, // flush enable
|
||||
input wire [`UP(FLAGS_WIDTH)-1:0] core_req_flags,
|
||||
output wire core_req_ready,
|
||||
|
||||
// Core Response
|
||||
|
@ -99,7 +105,7 @@ module VX_cache_bank #(
|
|||
output wire [LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire mem_req_flush,
|
||||
output wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flags,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
|
@ -138,43 +144,45 @@ module VX_cache_bank #(
|
|||
wire [MSHR_ADDR_WIDTH-1:0] replay_id;
|
||||
wire replay_ready;
|
||||
|
||||
wire is_init_st0, is_init_st1;
|
||||
|
||||
wire valid_sel, valid_st0, valid_st1;
|
||||
wire is_init_st0;
|
||||
wire is_creq_st0, is_creq_st1;
|
||||
wire is_fill_st0, is_fill_st1;
|
||||
wire is_flush_st0, is_flush_st1;
|
||||
wire [NUM_WAYS-1:0] flush_way_st0;
|
||||
wire [`CS_WAY_SEL_WIDTH-1:0] flush_way_st0, evict_way_st0;
|
||||
wire [`CS_WAY_SEL_WIDTH-1:0] way_idx_st0, way_idx_st1;
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel_st0, line_sel_st1;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_idx_st0, line_idx_st1;
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag_st0, line_tag_st1;
|
||||
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0, evict_tag_st1;
|
||||
wire rw_sel, rw_st0, rw_st1;
|
||||
wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1;
|
||||
wire [WORD_SEL_WIDTH-1:0] word_idx_sel, word_idx_st0, word_idx_st1;
|
||||
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
|
||||
wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1;
|
||||
wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1;
|
||||
wire [`CS_WORD_WIDTH-1:0] read_data_st1;
|
||||
wire [`CS_WORD_WIDTH-1:0] write_word_st0, write_word_st1;
|
||||
wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0, mshr_id_st0, mshr_id_st1;
|
||||
wire valid_sel, valid_st0, valid_st1;
|
||||
wire is_creq_st0, is_creq_st1;
|
||||
wire is_fill_st0, is_fill_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0;
|
||||
wire is_dirty_st0, is_dirty_st1;
|
||||
wire is_replay_st0, is_replay_st1;
|
||||
wire creq_flush_sel, creq_flush_st0, creq_flush_st1;
|
||||
wire evict_dirty_st0, evict_dirty_st1;
|
||||
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
|
||||
wire [NUM_WAYS-1:0] tag_matches_st0;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1;
|
||||
wire is_hit_st0, is_hit_st1;
|
||||
wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1;
|
||||
wire mshr_pending_st0, mshr_pending_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_previd_st0, mshr_previd_st1;
|
||||
wire mshr_empty;
|
||||
|
||||
wire flush_valid;
|
||||
wire init_valid;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] flush_sel;
|
||||
wire [NUM_WAYS-1:0] flush_way;
|
||||
wire [`CS_WAY_SEL_WIDTH-1:0] flush_way;
|
||||
wire flush_ready;
|
||||
|
||||
// ensure we have no pending memory request in the bank
|
||||
wire no_pending_req = ~valid_st0 && ~valid_st1 && mreq_queue_empty;
|
||||
|
||||
// flush unit
|
||||
VX_bank_flush #(
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
|
@ -196,11 +204,7 @@ module VX_cache_bank #(
|
|||
.bank_empty (no_pending_req)
|
||||
);
|
||||
|
||||
wire rdw_hazard1_sel;
|
||||
wire rdw_hazard2_sel;
|
||||
reg rdw_hazard3_st1;
|
||||
|
||||
wire pipe_stall = crsp_queue_stall || rdw_hazard3_st1;
|
||||
wire pipe_stall = crsp_queue_stall;
|
||||
|
||||
// inputs arbitration:
|
||||
// mshr replay has highest priority to maximize utilization since there is no miss.
|
||||
|
@ -219,28 +223,26 @@ module VX_cache_bank #(
|
|||
wire creq_enable = creq_grant && core_req_valid;
|
||||
|
||||
assign replay_ready = replay_grant
|
||||
&& ~rdw_hazard1_sel
|
||||
&& ~(!WRITEBACK && replay_rw && mreq_queue_alm_full) // needed for writethrough
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign mem_rsp_ready = fill_grant
|
||||
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
|
||||
&& ~rdw_hazard2_sel
|
||||
&& ~(WRITEBACK && mreq_queue_alm_full) // needed for writeback
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign flush_ready = flush_grant
|
||||
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
|
||||
&& ~rdw_hazard2_sel
|
||||
&& ~(WRITEBACK && mreq_queue_alm_full) // needed for writeback
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign core_req_ready = creq_grant
|
||||
&& ~mreq_queue_alm_full
|
||||
&& ~mshr_alm_full
|
||||
&& ~mreq_queue_alm_full // needed for fill requests
|
||||
&& ~mshr_alm_full // needed for mshr allocation
|
||||
&& ~pipe_stall;
|
||||
|
||||
wire init_fire = init_valid;
|
||||
wire replay_fire = replay_valid && replay_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
wire flush_fire = flush_valid && flush_ready;
|
||||
wire flush_fire = flush_valid && flush_ready;
|
||||
wire core_req_fire = core_req_valid && core_req_ready;
|
||||
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id = mem_rsp_tag[MSHR_ADDR_WIDTH-1:0];
|
||||
|
@ -264,14 +266,13 @@ module VX_cache_bank #(
|
|||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
|
||||
assign rw_sel = replay_valid ? replay_rw : core_req_rw;
|
||||
assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen;
|
||||
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
|
||||
assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) :
|
||||
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
||||
assign word_idx_sel= replay_valid ? replay_wsel : core_req_wsel;
|
||||
assign req_idx_sel = replay_valid ? replay_idx : core_req_idx;
|
||||
assign tag_sel = (init_valid | flush_valid) ? (flush_valid ? flush_tag : '0) :
|
||||
(replay_valid ? replay_tag : (mem_rsp_valid ? mem_rsp_tag_s : core_req_tag));
|
||||
assign creq_flush_sel = core_req_valid && core_req_flush;
|
||||
|
||||
assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) :
|
||||
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
||||
assign flags_sel = core_req_valid ? core_req_flags : '0;
|
||||
|
||||
if (WRITE_ENABLE) begin : g_data_sel
|
||||
for (genvar i = 0; i < `CS_LINE_WIDTH; ++i) begin : g_i
|
||||
|
@ -293,15 +294,21 @@ module VX_cache_bank #(
|
|||
assign req_uuid_sel = '0;
|
||||
end
|
||||
|
||||
wire is_init_sel = init_valid;
|
||||
wire is_creq_sel = creq_enable || replay_enable;
|
||||
wire is_fill_sel = fill_enable;
|
||||
wire is_flush_sel = flush_enable;
|
||||
wire is_replay_sel = replay_enable;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_WAY_SEL_WIDTH + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, creq_flush_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
|
||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
.data_in ({valid_sel, is_init_sel, is_fill_sel, is_flush_sel, is_creq_sel, is_replay_sel, flags_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, word_idx_sel, req_idx_sel, tag_sel, replay_id}),
|
||||
.data_out ({valid_st0, is_init_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, flags_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
);
|
||||
|
||||
if (UUID_WIDTH != 0) begin : g_req_uuid_st0
|
||||
|
@ -310,147 +317,121 @@ module VX_cache_bank #(
|
|||
assign req_uuid_st0 = '0;
|
||||
end
|
||||
|
||||
wire do_init_st0 = valid_st0 && is_init_st0;
|
||||
wire do_flush_st0 = valid_st0 && is_flush_st0;
|
||||
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
|
||||
wire do_creq_wr_st0 = valid_st0 && is_creq_st0 && rw_st0;
|
||||
wire do_replay_rd_st0 = valid_st0 && is_replay_st0 && ~rw_st0;
|
||||
wire do_replay_wr_st0 = valid_st0 && is_replay_st0 && rw_st0;
|
||||
wire do_fill_st0 = valid_st0 && is_fill_st0;
|
||||
wire do_cache_rd_st0 = do_creq_rd_st0 || do_replay_rd_st0;
|
||||
wire do_cache_wr_st0 = do_creq_wr_st0 || do_replay_wr_st0;
|
||||
wire do_lookup_st0 = do_cache_rd_st0 || do_cache_wr_st0;
|
||||
wire is_read_st0 = is_creq_st0 && ~rw_st0;
|
||||
wire is_write_st0 = is_creq_st0 && rw_st0;
|
||||
|
||||
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
|
||||
wire do_init_st0 = valid_st0 && is_init_st0;
|
||||
wire do_flush_st0 = valid_st0 && is_flush_st0;
|
||||
wire do_read_st0 = valid_st0 && is_read_st0;
|
||||
wire do_write_st0 = valid_st0 && is_write_st0;
|
||||
wire do_fill_st0 = valid_st0 && is_fill_st0;
|
||||
|
||||
assign line_sel_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
|
||||
wire is_read_st1 = is_creq_st1 && ~rw_st1;
|
||||
wire is_write_st1 = is_creq_st1 && rw_st1;
|
||||
|
||||
wire [NUM_WAYS-1:0] evict_way_st0;
|
||||
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0;
|
||||
wire do_read_st1 = valid_st1 && is_read_st1;
|
||||
wire do_write_st1 = valid_st1 && is_write_st1;
|
||||
|
||||
assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
|
||||
assign line_tag_st0 = `CS_LINE_ADDR_TAG(addr_st0);
|
||||
|
||||
assign write_word_st0 = data_st0[`CS_WORD_WIDTH-1:0];
|
||||
|
||||
wire do_lookup_st0 = do_read_st0 || do_write_st0;
|
||||
wire do_lookup_st1 = do_read_st1 || do_write_st1;
|
||||
|
||||
wire [`CS_WAY_SEL_WIDTH-1:0] victim_way_st0;
|
||||
wire [NUM_WAYS-1:0] tag_matches_st0;
|
||||
|
||||
VX_cache_repl #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.REPL_POLICY (REPL_POLICY)
|
||||
) cache_repl (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (pipe_stall),
|
||||
.hit_valid (do_lookup_st1 && is_hit_st1 && ~pipe_stall),
|
||||
.hit_line (line_idx_st1),
|
||||
.hit_way (way_idx_st1),
|
||||
.repl_valid (do_fill_st0 && ~pipe_stall),
|
||||
.repl_line (line_idx_st0),
|
||||
.repl_way (victim_way_st0)
|
||||
);
|
||||
|
||||
assign evict_way_st0 = is_fill_st0 ? victim_way_st0 : flush_way_st0;
|
||||
|
||||
VX_cache_tags #(
|
||||
.INSTANCE_ID($sformatf("%s-tags", INSTANCE_ID)),
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH)
|
||||
.WRITEBACK (WRITEBACK)
|
||||
) cache_tags (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.req_uuid (req_uuid_st0),
|
||||
|
||||
.stall (pipe_stall),
|
||||
|
||||
// init/flush/fill/write/lookup
|
||||
// inputs
|
||||
.init (do_init_st0),
|
||||
.flush (do_flush_st0),
|
||||
.fill (do_fill_st0),
|
||||
.write (do_cache_wr_st0),
|
||||
.lookup (do_lookup_st0),
|
||||
.line_addr (addr_st0),
|
||||
.way_sel (flush_way_st0),
|
||||
.tag_matches(tag_matches_st0),
|
||||
|
||||
// replacement
|
||||
.evict_dirty(evict_dirty_st0),
|
||||
.flush (do_flush_st0 && ~pipe_stall),
|
||||
.fill (do_fill_st0 && ~pipe_stall),
|
||||
.read (do_read_st0 && ~pipe_stall),
|
||||
.write (do_write_st0 && ~pipe_stall),
|
||||
.line_idx (line_idx_st0),
|
||||
.line_tag (line_tag_st0),
|
||||
.evict_way (evict_way_st0),
|
||||
// outputs
|
||||
.tag_matches(tag_matches_st0),
|
||||
.evict_dirty(is_dirty_st0),
|
||||
.evict_tag (evict_tag_st0)
|
||||
);
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr2_st0;
|
||||
wire [`CS_WAY_SEL_WIDTH-1:0] hit_idx_st0;
|
||||
VX_onehot_encoder #(
|
||||
.N (NUM_WAYS)
|
||||
) way_idx_enc (
|
||||
.data_in (tag_matches_st0),
|
||||
.data_out (hit_idx_st0),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
wire is_flush2_st0 = WRITEBACK && is_flush_st0;
|
||||
assign way_idx_st0 = is_creq_st0 ? hit_idx_st0 : evict_way_st0;
|
||||
assign is_hit_st0 = (| tag_matches_st0);
|
||||
|
||||
assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0;
|
||||
|
||||
assign way_sel_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0;
|
||||
|
||||
assign addr2_st0 = (is_fill_st0 || is_flush2_st0) ? {evict_tag_st0, line_sel_st0} : addr_st0;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
|
||||
assign mshr_id_st0 = is_replay_st0 ? replay_id_st0 : mshr_alloc_id_st0;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_WAY_SEL_WIDTH + `CS_TAG_SEL_BITS + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, creq_flush_st0, rw_st0, addr2_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, evict_dirty_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, evict_dirty_st1, mshr_pending_st1})
|
||||
.data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, is_dirty_st0, is_hit_st0, rw_st0, flags_st0, way_idx_st0, evict_tag_st0, line_tag_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, is_dirty_st1, is_hit_st1, rw_st1, flags_st1, way_idx_st1, evict_tag_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1})
|
||||
);
|
||||
|
||||
// we have a tag hit
|
||||
wire is_hit_st1 = (| way_sel_st1);
|
||||
|
||||
if (UUID_WIDTH != 0) begin : g_req_uuid_st1
|
||||
assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin : g_req_uuid_st1_0
|
||||
assign req_uuid_st1 = '0;
|
||||
end
|
||||
|
||||
wire is_read_st1 = is_creq_st1 && ~rw_st1;
|
||||
wire is_write_st1 = is_creq_st1 && rw_st1;
|
||||
|
||||
wire do_init_st1 = valid_st1 && is_init_st1;
|
||||
wire do_fill_st1 = valid_st1 && is_fill_st1;
|
||||
wire do_flush_st1 = valid_st1 && is_flush_st1;
|
||||
|
||||
wire do_creq_rd_st1 = valid_st1 && is_read_st1;
|
||||
wire do_creq_wr_st1 = valid_st1 && is_write_st1;
|
||||
wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1;
|
||||
wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1;
|
||||
|
||||
wire do_read_hit_st1 = do_creq_rd_st1 && is_hit_st1;
|
||||
wire do_read_miss_st1 = do_creq_rd_st1 && ~is_hit_st1;
|
||||
|
||||
wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1;
|
||||
wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1;
|
||||
|
||||
wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
|
||||
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
|
||||
|
||||
assign line_sel_st1 = addr_st1[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
`UNUSED_VAR (do_write_miss_st1)
|
||||
assign addr_st1 = {line_tag_st1, line_idx_st1};
|
||||
|
||||
// ensure mshr replay always get a hit
|
||||
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time))
|
||||
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1 && ~is_hit_st1), ("%t: missed mshr replay", $time))
|
||||
|
||||
// both tag and data stores use BRAM with no read-during-write protection.
|
||||
// we ned to stall the pipeline to prevent read-after-write hazards.
|
||||
assign rdw_hazard1_sel = do_fill_st0; // stall first replay following a fill
|
||||
assign rdw_hazard2_sel = WRITEBACK && do_cache_wr_st0; // a writeback can evict any preceeding write
|
||||
always @(posedge clk) begin
|
||||
// stall reads following writes to same line address
|
||||
rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_sel_st0 == line_sel_st1)
|
||||
&& ~rdw_hazard3_st1; // release pipeline stall
|
||||
end
|
||||
assign write_word_st1 = data_st1[`CS_WORD_WIDTH-1:0];
|
||||
`UNUSED_VAR (data_st1)
|
||||
|
||||
wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}};
|
||||
wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1;
|
||||
wire [LINE_SIZE-1:0] write_byteen_st1;
|
||||
|
||||
wire [`CS_LINE_WIDTH-1:0] dirty_data_st1;
|
||||
wire [LINE_SIZE-1:0] dirty_byteen_st1;
|
||||
|
||||
if (`CS_WORDS_PER_LINE > 1) begin : g_write_byteen_st1_wsel
|
||||
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen_w;
|
||||
always @(*) begin
|
||||
write_byteen_w = '0;
|
||||
write_byteen_w[wsel_st1] = byteen_st1;
|
||||
end
|
||||
assign write_byteen_st1 = write_byteen_w;
|
||||
end else begin : g_write_byteen_st1
|
||||
assign write_byteen_st1 = byteen_st1;
|
||||
end
|
||||
wire[`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] read_data_st1;
|
||||
wire [LINE_SIZE-1:0] evict_byteen_st1;
|
||||
|
||||
VX_cache_data #(
|
||||
.INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)),
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
|
@ -458,56 +439,58 @@ module VX_cache_bank #(
|
|||
.WORD_SIZE (WORD_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.UUID_WIDTH (UUID_WIDTH)
|
||||
.DIRTY_BYTES (DIRTY_BYTES)
|
||||
) cache_data (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.req_uuid (req_uuid_st1),
|
||||
|
||||
.stall (pipe_stall),
|
||||
|
||||
.init (do_init_st1),
|
||||
.read (do_cache_rd_st1),
|
||||
.fill (do_fill_st1),
|
||||
.flush (do_flush_st1),
|
||||
.write (do_cache_wr_st1),
|
||||
.way_sel (way_sel_st1),
|
||||
.line_addr (addr_st1),
|
||||
.wsel (wsel_st1),
|
||||
.fill_data (fill_data_st1),
|
||||
.write_data (write_data_st1),
|
||||
.write_byteen(write_byteen_st1),
|
||||
// inputs
|
||||
.init (do_init_st0),
|
||||
.fill (do_fill_st0 && ~pipe_stall),
|
||||
.flush (do_flush_st0 && ~pipe_stall),
|
||||
.read (do_read_st0 && ~pipe_stall),
|
||||
.write (do_write_st0 && ~pipe_stall),
|
||||
.evict_way (evict_way_st0),
|
||||
.tag_matches(tag_matches_st0),
|
||||
.line_idx (line_idx_st0),
|
||||
.fill_data (data_st0),
|
||||
.write_word (write_word_st0),
|
||||
.word_idx (word_idx_st0),
|
||||
.write_byteen(byteen_st0),
|
||||
.way_idx_r (way_idx_st1),
|
||||
// outputs
|
||||
.read_data (read_data_st1),
|
||||
.dirty_data (dirty_data_st1),
|
||||
.dirty_byteen(dirty_byteen_st1)
|
||||
.evict_byteen(evict_byteen_st1)
|
||||
);
|
||||
|
||||
wire [MSHR_SIZE-1:0] mshr_lookup_pending_st0;
|
||||
wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0;
|
||||
wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall;
|
||||
wire mshr_lookup_st0 = mshr_allocate_st0;
|
||||
wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall;
|
||||
// only allocate MSHR entries for non-replay core requests
|
||||
wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~is_replay_st0;
|
||||
wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~is_replay_st1;
|
||||
|
||||
// release allocated mshr entry if we had a hit
|
||||
wire mshr_release_st1;
|
||||
if (WRITEBACK) begin : g_mshr_release_st1
|
||||
if (WRITEBACK) begin : g_mshr_release
|
||||
assign mshr_release_st1 = is_hit_st1;
|
||||
end else begin : g_mshr_release_st1_ro
|
||||
// we need to keep missed write requests in MSHR if there is already a pending entry to the same address
|
||||
// this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content
|
||||
// this can happen when writes are sent late, when the fill was already in flight.
|
||||
end else begin : g_mshr_release_ro
|
||||
// we need to keep missed write requests in MSHR if there is already a pending entry to the same address.
|
||||
// this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content.
|
||||
// this can happen when writes are sent to memory late, when a related fill was already in flight.
|
||||
assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
|
||||
end
|
||||
|
||||
wire mshr_release_fire = mshr_finalize_st1 && mshr_release_st1 && ~pipe_stall;
|
||||
|
||||
wire [1:0] mshr_dequeue;
|
||||
`POP_COUNT(mshr_dequeue, {replay_fire, mshr_release_fire});
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (MSHR_SIZE)
|
||||
.SIZE (MSHR_SIZE),
|
||||
.DECRW (2)
|
||||
) mshr_pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (core_req_fire),
|
||||
.decr (replay_fire || (mshr_finalize_st1 && mshr_release_st1)),
|
||||
.decr (mshr_dequeue),
|
||||
.empty (mshr_empty),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
.full (mshr_alm_full),
|
||||
|
@ -516,11 +499,12 @@ module VX_cache_bank #(
|
|||
);
|
||||
|
||||
VX_cache_mshr #(
|
||||
.INSTANCE_ID ($sformatf("%s-mshr", INSTANCE_ID)),
|
||||
.INSTANCE_ID (`SFORMATF(("%s-mshr", INSTANCE_ID))),
|
||||
.BANK_ID (BANK_ID),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH)
|
||||
) cache_mshr (
|
||||
|
@ -528,7 +512,7 @@ module VX_cache_bank #(
|
|||
.reset (reset),
|
||||
|
||||
.deq_req_uuid (req_uuid_sel),
|
||||
.lkp_req_uuid (req_uuid_st0),
|
||||
.alc_req_uuid (req_uuid_st0),
|
||||
.fin_req_uuid (req_uuid_st1),
|
||||
|
||||
// memory fill
|
||||
|
@ -545,37 +529,23 @@ module VX_cache_bank #(
|
|||
.dequeue_ready (replay_ready),
|
||||
|
||||
// allocate
|
||||
.allocate_valid (mshr_allocate_st0),
|
||||
.allocate_valid (mshr_allocate_st0 && ~pipe_stall),
|
||||
.allocate_addr (addr_st0),
|
||||
.allocate_rw (rw_st0),
|
||||
.allocate_data ({wsel_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}),
|
||||
.allocate_data ({word_idx_st0, byteen_st0, write_word_st0, tag_st0, req_idx_st0}),
|
||||
.allocate_id (mshr_alloc_id_st0),
|
||||
.allocate_prev (mshr_prev_st0),
|
||||
.allocate_pending(mshr_pending_st0),
|
||||
.allocate_previd(mshr_previd_st0),
|
||||
`UNUSED_PIN (allocate_ready),
|
||||
|
||||
// lookup
|
||||
.lookup_valid (mshr_lookup_st0),
|
||||
.lookup_addr (addr_st0),
|
||||
.lookup_pending (mshr_lookup_pending_st0),
|
||||
.lookup_rw (mshr_lookup_rw_st0),
|
||||
|
||||
// finalize
|
||||
.finalize_valid (mshr_finalize_st1),
|
||||
.finalize_release(mshr_release_st1),
|
||||
.finalize_pending(mshr_pending_st1),
|
||||
.finalize_valid (mshr_finalize_st1 && ~pipe_stall),
|
||||
.finalize_is_release(mshr_release_st1),
|
||||
.finalize_is_pending(mshr_pending_st1),
|
||||
.finalize_id (mshr_id_st1),
|
||||
.finalize_prev (mshr_prev_st1)
|
||||
.finalize_previd(mshr_previd_st1)
|
||||
);
|
||||
|
||||
// check if there are pending requests to same line in the MSHR
|
||||
wire [MSHR_SIZE-1:0] lookup_matches;
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_lookup_matches
|
||||
assign lookup_matches[i] = mshr_lookup_pending_st0[i]
|
||||
&& (i != mshr_alloc_id_st0) // exclude current mshr id
|
||||
&& (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough
|
||||
end
|
||||
assign mshr_pending_st0 = (| lookup_matches);
|
||||
|
||||
// schedule core response
|
||||
|
||||
wire crsp_queue_valid, crsp_queue_ready;
|
||||
|
@ -583,9 +553,9 @@ module VX_cache_bank #(
|
|||
wire [REQ_SEL_WIDTH-1:0] crsp_queue_idx;
|
||||
wire [TAG_WIDTH-1:0] crsp_queue_tag;
|
||||
|
||||
assign crsp_queue_valid = do_cache_rd_st1;
|
||||
assign crsp_queue_valid = do_read_st1 && is_hit_st1;
|
||||
assign crsp_queue_idx = req_idx_st1;
|
||||
assign crsp_queue_data = read_data_st1;
|
||||
assign crsp_queue_data = read_data_st1[word_idx_st1];
|
||||
assign crsp_queue_tag = tag_st1;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
|
@ -595,7 +565,7 @@ module VX_cache_bank #(
|
|||
) core_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (crsp_queue_valid && ~rdw_hazard3_st1),
|
||||
.valid_in (crsp_queue_valid),
|
||||
.ready_in (crsp_queue_ready),
|
||||
.data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}),
|
||||
.data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}),
|
||||
|
@ -613,51 +583,68 @@ module VX_cache_bank #(
|
|||
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr;
|
||||
wire [MEM_TAG_WIDTH-1:0] mreq_queue_tag;
|
||||
wire mreq_queue_rw;
|
||||
wire mreq_queue_flush;
|
||||
wire [`UP(FLAGS_WIDTH)-1:0] mreq_queue_flags;
|
||||
|
||||
wire is_fill_or_flush_st1 = is_fill_st1 || is_flush_st1;
|
||||
wire is_fill_or_flush_st1 = is_fill_st1 || (is_flush_st1 && WRITEBACK);
|
||||
wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1;
|
||||
wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1;
|
||||
|
||||
if (WRITEBACK) begin : g_mreq_queue_push
|
||||
if (DIRTY_BYTES) begin : g_dirty_bytes
|
||||
// ensure dirty bytes match the tag info
|
||||
wire has_dirty_bytes = (| dirty_byteen_st1);
|
||||
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)))
|
||||
end
|
||||
assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1)
|
||||
|| do_writeback_st1)
|
||||
&& ~rdw_hazard3_st1;
|
||||
end else begin : g_mreq_queue_push_ro
|
||||
`UNUSED_VAR (do_writeback_st1)
|
||||
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|
||||
|| do_creq_wr_st1)
|
||||
&& ~rdw_hazard3_st1;
|
||||
end
|
||||
|
||||
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
|
||||
assign mreq_queue_addr = addr_st1;
|
||||
assign mreq_queue_flush = creq_flush_st1;
|
||||
wire do_writeback_st1 = do_fill_or_flush_st1 && is_dirty_st1;
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_addr_st1 = {evict_tag_st1, line_idx_st1};
|
||||
|
||||
if (WRITE_ENABLE) begin : g_mreq_queue
|
||||
if (WRITEBACK) begin : g_writeback
|
||||
if (WRITEBACK) begin : g_wb
|
||||
if (DIRTY_BYTES) begin : g_dirty_bytes
|
||||
// ensure dirty bytes match the tag info
|
||||
wire has_dirty_bytes = (| evict_byteen_st1);
|
||||
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (is_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, is_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)))
|
||||
end
|
||||
// issue a fill request on a read/write miss
|
||||
// issue a writeback on a dirty line eviction
|
||||
assign mreq_queue_push = ((do_lookup_st1 && ~is_hit_st1 && ~mshr_pending_st1)
|
||||
|| do_writeback_st1)
|
||||
&& ~pipe_stall;
|
||||
assign mreq_queue_addr = is_fill_or_flush_st1 ? evict_addr_st1 : addr_st1;
|
||||
assign mreq_queue_rw = is_fill_or_flush_st1;
|
||||
assign mreq_queue_data = dirty_data_st1;
|
||||
assign mreq_queue_byteen = is_fill_or_flush_st1 ? dirty_byteen_st1 : '1;
|
||||
end else begin : g_writethrough
|
||||
assign mreq_queue_data = read_data_st1;
|
||||
assign mreq_queue_byteen = is_fill_or_flush_st1 ? evict_byteen_st1 : '1;
|
||||
`UNUSED_VAR (write_word_st1)
|
||||
`UNUSED_VAR (byteen_st1)
|
||||
end else begin : g_wt
|
||||
wire [LINE_SIZE-1:0] line_byteen;
|
||||
VX_demux #(
|
||||
.N (`CS_WORD_SEL_BITS),
|
||||
.M (WORD_SIZE)
|
||||
) byteen_demux (
|
||||
.sel_in (word_idx_st1),
|
||||
.data_in (byteen_st1),
|
||||
.data_out (line_byteen)
|
||||
);
|
||||
// issue a fill request on a read miss
|
||||
// issue a memory write on a write request
|
||||
assign mreq_queue_push = ((do_read_st1 && ~is_hit_st1 && ~mshr_pending_st1)
|
||||
|| do_write_st1)
|
||||
&& ~pipe_stall;
|
||||
assign mreq_queue_addr = addr_st1;
|
||||
assign mreq_queue_rw = rw_st1;
|
||||
assign mreq_queue_data = write_data_st1;
|
||||
assign mreq_queue_byteen = rw_st1 ? write_byteen_st1 : '1;
|
||||
assign mreq_queue_data = {`CS_WORDS_PER_LINE{write_word_st1}};
|
||||
assign mreq_queue_byteen = rw_st1 ? line_byteen : '1;
|
||||
`UNUSED_VAR (is_fill_or_flush_st1)
|
||||
`UNUSED_VAR (dirty_data_st1)
|
||||
`UNUSED_VAR (dirty_byteen_st1)
|
||||
`UNUSED_VAR (do_writeback_st1)
|
||||
`UNUSED_VAR (evict_addr_st1)
|
||||
`UNUSED_VAR (evict_byteen_st1)
|
||||
end
|
||||
end else begin : g_mreq_queue_ro
|
||||
// issue a fill request on a read miss
|
||||
assign mreq_queue_push = (do_read_st1 && ~is_hit_st1 && ~mshr_pending_st1)
|
||||
&& ~pipe_stall;
|
||||
assign mreq_queue_addr = addr_st1;
|
||||
assign mreq_queue_rw = 0;
|
||||
assign mreq_queue_data = '0;
|
||||
assign mreq_queue_byteen = '1;
|
||||
`UNUSED_VAR (dirty_data_st1)
|
||||
`UNUSED_VAR (dirty_byteen_st1)
|
||||
`UNUSED_VAR (do_writeback_st1)
|
||||
`UNUSED_VAR (evict_addr_st1)
|
||||
`UNUSED_VAR (evict_byteen_st1)
|
||||
`UNUSED_VAR (write_word_st1)
|
||||
`UNUSED_VAR (byteen_st1)
|
||||
end
|
||||
|
||||
if (UUID_WIDTH != 0) begin : g_mreq_queue_tag_uuid
|
||||
|
@ -666,18 +653,21 @@ module VX_cache_bank #(
|
|||
assign mreq_queue_tag = mshr_id_st1;
|
||||
end
|
||||
|
||||
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
|
||||
assign mreq_queue_flags = flags_st1;
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)),
|
||||
.DEPTH (MREQ_SIZE),
|
||||
.ALM_FULL (MREQ_SIZE-PIPELINE_STAGES),
|
||||
.ALM_FULL (MREQ_SIZE - PIPELINE_STAGES),
|
||||
.OUT_REG (MEM_OUT_REG)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (mreq_queue_push),
|
||||
.pop (mreq_queue_pop),
|
||||
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flush}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flush}),
|
||||
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flags}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flags}),
|
||||
.empty (mreq_queue_empty),
|
||||
.alm_full (mreq_queue_alm_full),
|
||||
`UNUSED_PIN (full),
|
||||
|
@ -687,11 +677,13 @@ module VX_cache_bank #(
|
|||
|
||||
assign mem_req_valid = ~mreq_queue_empty;
|
||||
|
||||
`UNUSED_VAR (do_lookup_st0)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign perf_read_misses = do_read_miss_st1;
|
||||
assign perf_write_misses = do_write_miss_st1;
|
||||
assign perf_read_misses = do_read_st1 && ~is_hit_st1;
|
||||
assign perf_write_misses = do_write_st1 && ~is_hit_st1;
|
||||
assign perf_mshr_stalls = mshr_alm_full;
|
||||
`endif
|
||||
|
||||
|
@ -701,31 +693,76 @@ module VX_cache_bank #(
|
|||
&& ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire);
|
||||
always @(posedge clk) begin
|
||||
if (input_stall || pipe_stall) begin
|
||||
`TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1))
|
||||
`TRACE(4, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID,
|
||||
crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full))
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel))
|
||||
`TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel))
|
||||
end
|
||||
if (replay_fire) begin
|
||||
`TRACE(2, ("%t: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel))
|
||||
`TRACE(2, ("%t: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel))
|
||||
end
|
||||
if (core_req_fire) begin
|
||||
if (core_req_rw) begin
|
||||
`TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel))
|
||||
`TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel))
|
||||
end else begin
|
||||
`TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel))
|
||||
`TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel))
|
||||
end
|
||||
end
|
||||
if (do_init_st0) begin
|
||||
`TRACE(3, ("%t: %s tags-init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), line_idx_st0))
|
||||
end
|
||||
if (do_fill_st0 && ~pipe_stall) begin
|
||||
`TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%0d, line=%0d, dirty=%b (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, is_dirty_st0, req_uuid_st0))
|
||||
end
|
||||
if (do_flush_st0 && ~pipe_stall) begin
|
||||
`TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%0d, line=%0d, dirty=%b (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, is_dirty_st0, req_uuid_st0))
|
||||
end
|
||||
if (do_lookup_st0 && ~pipe_stall) begin
|
||||
if (is_hit_st0) begin
|
||||
`TRACE(3, ("%t: %s tags-hit: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), rw_st0, way_idx_st0, line_idx_st0, line_tag_st0, req_uuid_st0))
|
||||
end else begin
|
||||
`TRACE(3, ("%t: %s tags-miss: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), rw_st0, way_idx_st0, line_idx_st0, line_tag_st0, req_uuid_st0))
|
||||
end
|
||||
end
|
||||
if (do_fill_st0 && ~pipe_stall) begin
|
||||
`TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%0d, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), way_idx_st0, line_idx_st0, data_st0, req_uuid_st0))
|
||||
end
|
||||
if (do_flush_st0 && ~pipe_stall) begin
|
||||
`TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), way_idx_st0, line_idx_st0, req_uuid_st0))
|
||||
end
|
||||
if (do_read_st1 && is_hit_st1 && ~pipe_stall) begin
|
||||
`TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), way_idx_st1, line_idx_st1, word_idx_st1, crsp_queue_data, req_uuid_st1))
|
||||
end
|
||||
if (do_write_st1 && is_hit_st1 && ~pipe_stall) begin
|
||||
`TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), way_idx_st1, line_idx_st1, word_idx_st1, byteen_st1, write_word_st1, req_uuid_st1))
|
||||
end
|
||||
if (crsp_queue_fire) begin
|
||||
`TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1))
|
||||
`TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1))
|
||||
end
|
||||
if (mreq_queue_push) begin
|
||||
if (do_creq_wr_st1 && !WRITEBACK) begin
|
||||
`TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
|
||||
end else if (do_writeback_st1) begin
|
||||
`TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
|
||||
if (!WRITEBACK && do_write_st1) begin
|
||||
`TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
|
||||
end else if (WRITEBACK && do_writeback_st1) begin
|
||||
`TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
|
||||
end else begin
|
||||
`TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1))
|
||||
`TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
2
hw/rtl/cache/VX_cache_bypass.sv
vendored
2
hw/rtl/cache/VX_cache_bypass.sv
vendored
|
@ -268,7 +268,7 @@ module VX_cache_bypass #(
|
|||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_valid
|
||||
assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || (is_mem_rsp_nc && rsp_idx == REQ_SEL_WIDTH'(i));
|
||||
end
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_ready
|
||||
assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i];
|
||||
end
|
||||
|
|
32
hw/rtl/cache/VX_cache_cluster.sv
vendored
32
hw/rtl/cache/VX_cache_cluster.sv
vendored
|
@ -24,22 +24,22 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
parameter NUM_REQS = 4,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 16384,
|
||||
parameter CACHE_SIZE = 32768,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter NUM_BANKS = 4,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 4,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 16,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
parameter CRSQ_SIZE = 4,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
parameter MSHR_SIZE = 16,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
parameter MRSQ_SIZE = 4,
|
||||
// Memory Request Queue Size
|
||||
parameter MREQ_SIZE = 4,
|
||||
|
||||
|
@ -52,20 +52,26 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Replacement policy
|
||||
parameter REPL_POLICY = `CS_REPL_CYCLIC,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// core request flags
|
||||
parameter FLAGS_WIDTH = 0,
|
||||
|
||||
// enable bypass for non-cacheable addresses
|
||||
parameter NC_ENABLE = 0,
|
||||
|
||||
// Core response output buffer
|
||||
parameter CORE_OUT_BUF = 0,
|
||||
parameter CORE_OUT_BUF = 3,
|
||||
|
||||
// Memory request output buffer
|
||||
parameter MEM_OUT_BUF = 0
|
||||
parameter MEM_OUT_BUF = 3
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -140,22 +146,24 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
|
||||
for (genvar i = 0; i < NUM_CACHES; ++i) begin : g_cache_wrap
|
||||
VX_cache_wrap #(
|
||||
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)),
|
||||
.INSTANCE_ID (`SFORMATF(("%s%0d", INSTANCE_ID, i))),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.REPL_POLICY (REPL_POLICY),
|
||||
.CRSQ_SIZE (CRSQ_SIZE),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.MRSQ_SIZE (MRSQ_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (ARB_TAG_WIDTH),
|
||||
.FLAGS_WIDTH (FLAGS_WIDTH),
|
||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
.CORE_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF ((NUM_CACHES > 1) ? 2 : MEM_OUT_BUF),
|
||||
|
|
240
hw/rtl/cache/VX_cache_data.sv
vendored
240
hw/rtl/cache/VX_cache_data.sv
vendored
|
@ -14,8 +14,6 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_data #(
|
||||
parameter `STRING INSTANCE_ID= "",
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
|
@ -31,171 +29,147 @@ module VX_cache_data #(
|
|||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0
|
||||
parameter DIRTY_BYTES = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
input wire[`UP(UUID_WIDTH)-1:0] req_uuid,
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
input wire stall,
|
||||
|
||||
// inputs
|
||||
input wire init,
|
||||
input wire read,
|
||||
input wire fill,
|
||||
input wire flush,
|
||||
input wire read,
|
||||
input wire write,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
||||
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] wsel,
|
||||
input wire [`CS_LINE_SEL_BITS-1:0] line_idx,
|
||||
input wire [`CS_WAY_SEL_WIDTH-1:0] evict_way,
|
||||
input wire [NUM_WAYS-1:0] tag_matches,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] write_data,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen,
|
||||
input wire [NUM_WAYS-1:0] way_sel,
|
||||
output wire [`CS_WORD_WIDTH-1:0] read_data,
|
||||
output wire [`CS_LINE_WIDTH-1:0] dirty_data,
|
||||
output wire [LINE_SIZE-1:0] dirty_byteen
|
||||
input wire [`CS_WORD_WIDTH-1:0] write_word,
|
||||
input wire [WORD_SIZE-1:0] write_byteen,
|
||||
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx,
|
||||
input wire [`CS_WAY_SEL_WIDTH-1:0] way_idx_r,
|
||||
// outputs
|
||||
output wire [`CS_LINE_WIDTH-1:0] read_data,
|
||||
output wire [LINE_SIZE-1:0] evict_byteen
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_PARAM (WORD_SIZE)
|
||||
`UNUSED_VAR (stall)
|
||||
`UNUSED_VAR (line_addr)
|
||||
`UNUSED_VAR (init)
|
||||
`UNUSED_VAR (read)
|
||||
`UNUSED_VAR (flush)
|
||||
|
||||
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
|
||||
if (DIRTY_BYTES != 0) begin : g_dirty_bytes
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_rdata;
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_wdata;
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_wren;
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
|
||||
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
|
||||
|
||||
if (WRITEBACK) begin : g_dirty_data
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata;
|
||||
VX_transpose #(
|
||||
.DATAW (`CS_WORD_WIDTH),
|
||||
.N (`CS_WORDS_PER_LINE),
|
||||
.M (NUM_WAYS)
|
||||
) transpose (
|
||||
.data_in (line_rdata),
|
||||
.data_out (transposed_rdata)
|
||||
);
|
||||
assign dirty_data = transposed_rdata[way_idx];
|
||||
end else begin : g_dirty_data_0
|
||||
assign dirty_data = '0;
|
||||
end
|
||||
|
||||
if (DIRTY_BYTES) begin : g_dirty_byteen
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata;
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata;
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_bs_wdata
|
||||
wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]);
|
||||
assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]);
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata
|
||||
wire evict = fill || flush;
|
||||
wire evict_way_en = (NUM_WAYS == 1) || (evict_way == i);
|
||||
wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask;
|
||||
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask
|
||||
wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j);
|
||||
assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}};
|
||||
end
|
||||
assign byteen_wdata[i] = {LINE_SIZE{write}}; // only asserted on writes
|
||||
assign byteen_wren[i] = {LINE_SIZE{init}}
|
||||
| {LINE_SIZE{evict && evict_way_en}}
|
||||
| ({LINE_SIZE{write && tag_matches[i]}} & write_mask);
|
||||
end
|
||||
|
||||
wire byteen_read = fill || flush;
|
||||
wire byteen_write = init || write || fill || flush;
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (LINE_SIZE * NUM_WAYS),
|
||||
.SIZE (`CS_LINES_PER_BANK)
|
||||
.WRENW (LINE_SIZE * NUM_WAYS),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.OUT_REG (1),
|
||||
.RDW_MODE ("R")
|
||||
) byteen_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (write || fill || flush),
|
||||
.write (init || write || fill || flush),
|
||||
.wren (1'b1),
|
||||
.addr (line_sel),
|
||||
.wdata (bs_wdata),
|
||||
.rdata (bs_rdata)
|
||||
.read (byteen_read),
|
||||
.write (byteen_write),
|
||||
.wren (byteen_wren),
|
||||
.addr (line_idx),
|
||||
.wdata (byteen_wdata),
|
||||
.rdata (byteen_rdata)
|
||||
);
|
||||
|
||||
assign dirty_byteen = bs_rdata[way_idx];
|
||||
end else begin : g_dirty_byteen_0
|
||||
assign dirty_byteen = '1;
|
||||
assign evict_byteen = byteen_rdata[way_idx_r];
|
||||
end else begin : g_no_dirty_bytes
|
||||
`UNUSED_VAR (init)
|
||||
`UNUSED_VAR (flush)
|
||||
assign evict_byteen = '1; // update whole line
|
||||
end
|
||||
|
||||
// order the data layout to perform ways multiplexing last.
|
||||
// this allows converting way index to binary in parallel with BRAM readaccess and way selection.
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
|
||||
wire [BYTEENW-1:0] line_wren;
|
||||
if (WRITE_ENABLE) begin : g_data_store
|
||||
// create a single write-enable block ram to reduce area overhead
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] line_wren;
|
||||
wire line_write;
|
||||
wire line_read;
|
||||
|
||||
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin : g_line_wdata
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin : g_i
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin : g_j
|
||||
assign line_wdata[i][j] = (fill || !WRITE_ENABLE) ? fill_data[i] : write_data[i];
|
||||
assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i])
|
||||
& {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}};
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_wdata
|
||||
wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i);
|
||||
wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask;
|
||||
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask
|
||||
wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j);
|
||||
assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}};
|
||||
end
|
||||
assign line_wdata[i] = fill ? fill_data : {`CS_WORDS_PER_LINE{write_word}};
|
||||
assign line_wren[i] = {LINE_SIZE{fill && fill_way_en}}
|
||||
| ({LINE_SIZE{write && tag_matches[i]}} & write_mask);
|
||||
end
|
||||
assign line_wren = wren_w;
|
||||
end else begin : g_line_wdata_ro
|
||||
|
||||
assign line_read = read || ((fill || flush) && WRITEBACK);
|
||||
assign line_write = fill || (write && WRITE_ENABLE);
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (NUM_WAYS * `CS_LINE_WIDTH),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.WRENW (NUM_WAYS * LINE_SIZE),
|
||||
.OUT_REG (1),
|
||||
.RDW_MODE ("R")
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (line_read),
|
||||
.write (line_write),
|
||||
.wren (line_wren),
|
||||
.addr (line_idx),
|
||||
.wdata (line_wdata),
|
||||
.rdata (line_rdata)
|
||||
);
|
||||
end else begin : g_data_store
|
||||
`UNUSED_VAR (write)
|
||||
`UNUSED_VAR (write_byteen)
|
||||
`UNUSED_VAR (write_data)
|
||||
assign line_wdata = fill_data;
|
||||
assign line_wren = fill;
|
||||
end
|
||||
`UNUSED_VAR (write_word)
|
||||
`UNUSED_VAR (word_idx)
|
||||
`UNUSED_VAR (tag_matches)
|
||||
|
||||
VX_encoder #(
|
||||
.N (NUM_WAYS)
|
||||
) way_enc (
|
||||
.data_in (way_sel),
|
||||
.data_out (way_idx),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
wire line_read = (read && ~stall)
|
||||
|| (WRITEBACK && (fill || flush));
|
||||
|
||||
wire line_write = write || fill;
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.WRENW (BYTEENW),
|
||||
.NO_RWCHECK (1),
|
||||
.RW_ASSERT (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (line_read),
|
||||
.write (line_write),
|
||||
.wren (line_wren),
|
||||
.addr (line_sel),
|
||||
.wdata (line_wdata),
|
||||
.rdata (line_rdata)
|
||||
);
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
|
||||
if (`CS_WORDS_PER_LINE > 1) begin : g_per_way_rdata_wsel
|
||||
assign per_way_rdata = line_rdata[wsel];
|
||||
end else begin : g_per_way_rdata
|
||||
`UNUSED_VAR (wsel)
|
||||
assign per_way_rdata = line_rdata;
|
||||
end
|
||||
assign read_data = per_way_rdata[way_idx];
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data))
|
||||
end
|
||||
if (flush && ~stall) begin
|
||||
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data))
|
||||
end
|
||||
if (read && ~stall) begin
|
||||
`TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid))
|
||||
end
|
||||
if (write && ~stall) begin
|
||||
`TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid))
|
||||
// we don't merge the ways into a single block ram due to WREN overhead
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways
|
||||
wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i);
|
||||
VX_sp_ram #(
|
||||
.DATAW (`CS_LINE_WIDTH),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.OUT_REG (1),
|
||||
.RDW_MODE ("R")
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (read),
|
||||
.write (fill && fill_way_en),
|
||||
.wren (1'b1),
|
||||
.addr (line_idx),
|
||||
.wdata (fill_data),
|
||||
.rdata (line_rdata[i])
|
||||
);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
assign read_data = line_rdata[way_idx_r];
|
||||
|
||||
endmodule
|
||||
|
|
7
hw/rtl/cache/VX_cache_define.vh
vendored
7
hw/rtl/cache/VX_cache_define.vh
vendored
|
@ -22,6 +22,7 @@
|
|||
`define CS_LINE_WIDTH (8 * LINE_SIZE)
|
||||
`define CS_BANK_SIZE (CACHE_SIZE / NUM_BANKS)
|
||||
`define CS_WAY_SEL_BITS `CLOG2(NUM_WAYS)
|
||||
`define CS_WAY_SEL_WIDTH `UP(`CS_WAY_SEL_BITS)
|
||||
|
||||
`define CS_LINES_PER_BANK (`CS_BANK_SIZE / (LINE_SIZE * NUM_WAYS))
|
||||
`define CS_WORDS_PER_LINE (LINE_SIZE / WORD_SIZE)
|
||||
|
@ -73,4 +74,10 @@
|
|||
`PERF_COUNTER_ADD (dst, src, mem_stalls, `PERF_CTR_BITS, count, (count > 1)) \
|
||||
`PERF_COUNTER_ADD (dst, src, crsp_stalls, `PERF_CTR_BITS, count, (count > 1))
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define CS_REPL_RANDOM 0
|
||||
`define CS_REPL_CYCLIC 1
|
||||
`define CS_REPL_PLRU 2
|
||||
|
||||
`endif // VX_CACHE_DEFINE_VH
|
||||
|
|
3
hw/rtl/cache/VX_cache_flush.sv
vendored
3
hw/rtl/cache/VX_cache_flush.sv
vendored
|
@ -128,7 +128,8 @@ module VX_cache_flush #(
|
|||
lock_released_n = lock_released;
|
||||
flush_uuid_n = flush_uuid_r;
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
//STATE_IDLE:
|
||||
default: begin
|
||||
if (flush_req_enable) begin
|
||||
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT1 : STATE_FLUSH;
|
||||
for (integer i = NUM_REQS-1; i >= 0; --i) begin
|
||||
|
|
135
hw/rtl/cache/VX_cache_mshr.sv
vendored
135
hw/rtl/cache/VX_cache_mshr.sv
vendored
|
@ -24,36 +24,23 @@
|
|||
// arrival and are dequeued in the same order.
|
||||
// Each entry has a next pointer to the next entry pending for the same cache line.
|
||||
//
|
||||
// During the fill operation, the MSHR will release the MSHR entry at fill_id
|
||||
// During the fill request, the MSHR will dequue the MSHR entry at the fill_id location
|
||||
// which represents the first request in the pending list that initiated the memory fill.
|
||||
//
|
||||
// The dequeue operation directly follows the fill operation and will release
|
||||
// The dequeue response directly follows the fill request and will release
|
||||
// all the subsequent entries linked to fill_id (pending the same cache line).
|
||||
//
|
||||
// During the allocation operation, the MSHR will allocate the next free slot
|
||||
// During the allocation request, the MSHR will allocate the next free slot
|
||||
// for the incoming core request. We return the allocated slot id as well as
|
||||
// the slot id of the previous entry for the same cache line. This is used to
|
||||
// link the new entry to the pending list during finalization.
|
||||
// link the new entry to the pending list.
|
||||
//
|
||||
// The lookup operation is used to find all pending entries for a given cache line.
|
||||
// This is used to by the cache bank to determine if a cache miss is already pending
|
||||
// and therefore avoid issuing a memory fill request.
|
||||
//
|
||||
// The finalize operation is used to release the allocated MSHR entry if we had a hit.
|
||||
// If we had a miss and finalize_pending is true, we link the allocated entry to
|
||||
// its corresponding pending list (via finalize_prev).
|
||||
// The finalize request is used to persit or release the currently allocated MSHR entry
|
||||
// if we had a cache miss or a hit, respectively.
|
||||
//
|
||||
// Warning: This MSHR implementation is strongly coupled with the bank pipeline
|
||||
// and as such changes to either module requires careful evaluation.
|
||||
//
|
||||
// This architecture implements three pipeline stages:
|
||||
// - Arbitration: cache bank arbitration before entering pipeline.
|
||||
// fill and dequeue operations are executed at this stage.
|
||||
// - stage 0: cache bank tag access stage.
|
||||
// allocate and lookup operations are executed at this stage.
|
||||
// - stage 1: cache bank tdatag access stage.
|
||||
// finalize operation is executed at this stage.
|
||||
//
|
||||
|
||||
module VX_cache_mshr #(
|
||||
parameter `STRING INSTANCE_ID= "",
|
||||
|
@ -68,6 +55,9 @@ module VX_cache_mshr #(
|
|||
parameter UUID_WIDTH = 0,
|
||||
// MSHR parameters
|
||||
parameter DATA_WIDTH = 1,
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -75,7 +65,7 @@ module VX_cache_mshr #(
|
|||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
input wire[`UP(UUID_WIDTH)-1:0] deq_req_uuid,
|
||||
input wire[`UP(UUID_WIDTH)-1:0] lkp_req_uuid,
|
||||
input wire[`UP(UUID_WIDTH)-1:0] alc_req_uuid,
|
||||
input wire[`UP(UUID_WIDTH)-1:0] fin_req_uuid,
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
|
@ -98,26 +88,21 @@ module VX_cache_mshr #(
|
|||
input wire allocate_rw,
|
||||
input wire [DATA_WIDTH-1:0] allocate_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] allocate_id,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] allocate_prev,
|
||||
output wire allocate_pending,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] allocate_previd,
|
||||
output wire allocate_ready,
|
||||
|
||||
// lookup
|
||||
input wire lookup_valid,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] lookup_addr,
|
||||
output wire [MSHR_SIZE-1:0] lookup_pending,
|
||||
output wire [MSHR_SIZE-1:0] lookup_rw,
|
||||
|
||||
// finalize
|
||||
input wire finalize_valid,
|
||||
input wire finalize_release,
|
||||
input wire finalize_pending,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] finalize_id,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] finalize_prev
|
||||
input wire finalize_is_release,
|
||||
input wire finalize_is_pending,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] finalize_previd,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] finalize_id
|
||||
);
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
|
||||
reg [`CS_LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0];
|
||||
reg [MSHR_ADDR_WIDTH-1:0] next_index [MSHR_SIZE-1:0];
|
||||
reg [`CS_LINE_ADDR_WIDTH-1:0] addr_table [0:MSHR_SIZE-1];
|
||||
reg [MSHR_ADDR_WIDTH-1:0] next_index [0:MSHR_SIZE-1];
|
||||
|
||||
reg [MSHR_SIZE-1:0] valid_table, valid_table_n;
|
||||
reg [MSHR_SIZE-1:0] next_table, next_table_x, next_table_n;
|
||||
|
@ -136,7 +121,7 @@ module VX_cache_mshr #(
|
|||
|
||||
wire [MSHR_SIZE-1:0] addr_matches;
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_addr_matches
|
||||
assign addr_matches[i] = valid_table[i] && (addr_table[i] == lookup_addr);
|
||||
assign addr_matches[i] = valid_table[i] && (addr_table[i] == allocate_addr);
|
||||
end
|
||||
|
||||
VX_lzc #(
|
||||
|
@ -148,11 +133,13 @@ module VX_cache_mshr #(
|
|||
.valid_out (allocate_rdy_n)
|
||||
);
|
||||
|
||||
VX_encoder #(
|
||||
// find matching tail-entry
|
||||
VX_priority_encoder #(
|
||||
.N (MSHR_SIZE)
|
||||
) prev_sel (
|
||||
.data_in (addr_matches & ~next_table_x),
|
||||
.data_out (prev_idx),
|
||||
.index_out (prev_idx),
|
||||
`UNUSED_PIN (onehot_out),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
|
@ -171,17 +158,22 @@ module VX_cache_mshr #(
|
|||
valid_table_n[dequeue_id] = 0;
|
||||
if (next_table[dequeue_id]) begin
|
||||
dequeue_id_n = next_index[dequeue_id];
|
||||
end else if (finalize_valid && finalize_is_pending && (finalize_previd == dequeue_id)) begin
|
||||
dequeue_id_n = finalize_id;
|
||||
end else begin
|
||||
dequeue_val_n = 0;
|
||||
end
|
||||
end
|
||||
|
||||
if (finalize_valid) begin
|
||||
if (finalize_release) begin
|
||||
if (finalize_is_release) begin
|
||||
valid_table_n[finalize_id] = 0;
|
||||
end
|
||||
if (finalize_pending) begin
|
||||
next_table_x[finalize_prev] = 1;
|
||||
// warning: This code allows 'finalize_is_pending' to be asserted regardless of hit/miss
|
||||
// to reduce the its propagation delay into the MSHR. this is safe because wrong updates
|
||||
// to 'next_table_n' will be cleared during 'allocate_fire' below.
|
||||
if (finalize_is_pending) begin
|
||||
next_table_x[finalize_previd] = 1;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -204,12 +196,12 @@ module VX_cache_mshr #(
|
|||
end
|
||||
|
||||
if (allocate_fire) begin
|
||||
addr_table[allocate_id] <= allocate_addr;
|
||||
addr_table[allocate_id] <= allocate_addr;
|
||||
write_table[allocate_id] <= allocate_rw;
|
||||
end
|
||||
|
||||
if (finalize_valid && finalize_pending) begin
|
||||
next_index[finalize_prev] <= finalize_id;
|
||||
if (finalize_valid && finalize_is_pending) begin
|
||||
next_index[finalize_previd] <= finalize_id;
|
||||
end
|
||||
|
||||
dequeue_id_r <= dequeue_id_n;
|
||||
|
@ -217,20 +209,20 @@ module VX_cache_mshr #(
|
|||
next_table <= next_table_n;
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, lkp_req_uuid))
|
||||
`RUNTIME_ASSERT(~(allocate_fire && valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, alc_req_uuid))
|
||||
|
||||
`RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
|
||||
`RUNTIME_ASSERT(~(finalize_valid && ~valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid))
|
||||
|
||||
`RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID,
|
||||
`RUNTIME_ASSERT(~(fill_valid && ~valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id))
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (DATA_WIDTH),
|
||||
.SIZE (MSHR_SIZE),
|
||||
.LUTRAM (1)
|
||||
) entries (
|
||||
.DATAW (DATA_WIDTH),
|
||||
.SIZE (MSHR_SIZE),
|
||||
.RDW_MODE ("R")
|
||||
) mshr_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
|
@ -245,19 +237,20 @@ module VX_cache_mshr #(
|
|||
assign fill_addr = addr_table[fill_id];
|
||||
|
||||
assign allocate_ready = allocate_rdy;
|
||||
assign allocate_id = allocate_id_r;
|
||||
assign allocate_prev = prev_idx;
|
||||
assign allocate_id = allocate_id_r;
|
||||
assign allocate_previd = prev_idx;
|
||||
|
||||
assign dequeue_valid = dequeue_val;
|
||||
assign dequeue_addr = addr_table[dequeue_id_r];
|
||||
assign dequeue_rw = write_table[dequeue_id_r];
|
||||
assign dequeue_id = dequeue_id_r;
|
||||
if (WRITEBACK) begin : g_pending_wb
|
||||
assign allocate_pending = |addr_matches;
|
||||
end else begin : g_pending_wt
|
||||
// exclude write requests if writethrough
|
||||
assign allocate_pending = |(addr_matches & ~write_table);
|
||||
end
|
||||
|
||||
// return pending entries for the given cache line
|
||||
assign lookup_pending = addr_matches;
|
||||
assign lookup_rw = write_table;
|
||||
|
||||
`UNUSED_VAR (lookup_valid)
|
||||
assign dequeue_valid = dequeue_val;
|
||||
assign dequeue_addr = addr_table[dequeue_id_r];
|
||||
assign dequeue_rw = write_table[dequeue_id_r];
|
||||
assign dequeue_id = dequeue_id_r;
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
reg show_table;
|
||||
|
@ -265,23 +258,21 @@ module VX_cache_mshr #(
|
|||
if (reset) begin
|
||||
show_table <= 0;
|
||||
end else begin
|
||||
show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire;
|
||||
show_table <= allocate_fire || finalize_valid || fill_valid || dequeue_fire;
|
||||
end
|
||||
if (allocate_fire) begin
|
||||
`TRACE(3, ("%t: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid))
|
||||
`TRACE(3, ("%t: %s allocate: addr=0x%0h, id=%0d, pending=%b, prev=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id, allocate_pending, prev_idx, alc_req_uuid))
|
||||
end
|
||||
if (lookup_valid) begin
|
||||
`TRACE(3, ("%t: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid))
|
||||
if (finalize_valid && finalize_is_release) begin
|
||||
`TRACE(3, ("%t: %s release: id=%0d (#%0d)\n", $time, INSTANCE_ID, finalize_id, fin_req_uuid))
|
||||
end
|
||||
if (finalize_valid) begin
|
||||
`TRACE(3, ("%t: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid))
|
||||
if (finalize_valid && finalize_is_pending) begin
|
||||
`TRACE(3, ("%t: %s finalize: id=%0d (#%0d)\n", $time, INSTANCE_ID, finalize_id, fin_req_uuid))
|
||||
end
|
||||
if (fill_valid) begin
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id))
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, id=%0d\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id))
|
||||
end
|
||||
if (dequeue_fire) begin
|
||||
`TRACE(3, ("%t: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
|
|
202
hw/rtl/cache/VX_cache_repl.sv
vendored
Normal file
202
hw/rtl/cache/VX_cache_repl.sv
vendored
Normal file
|
@ -0,0 +1,202 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
// Fast PLRU encoder and decoder utility
|
||||
// Adapted from BaseJump STL: http://bjump.org/data_out.html
|
||||
|
||||
module plru_decoder #(
|
||||
parameter NUM_WAYS = 1,
|
||||
parameter WAY_IDX_BITS = $clog2(NUM_WAYS),
|
||||
parameter WAY_IDX_WIDTH = `UP(WAY_IDX_BITS)
|
||||
) (
|
||||
input wire [WAY_IDX_WIDTH-1:0] way_idx,
|
||||
output wire [`UP(NUM_WAYS-1)-1:0] lru_data,
|
||||
output wire [`UP(NUM_WAYS-1)-1:0] lru_mask
|
||||
);
|
||||
if (NUM_WAYS > 1) begin : g_dec
|
||||
wire [`UP(NUM_WAYS-1)-1:0] data;
|
||||
`IGNORE_UNOPTFLAT_BEGIN
|
||||
wire [`UP(NUM_WAYS-1)-1:0] mask;
|
||||
`IGNORE_UNOPTFLAT_END
|
||||
for (genvar i = 0; i < NUM_WAYS-1; ++i) begin : g_i
|
||||
if (i == 0) begin : g_i_0
|
||||
assign mask[i] = 1'b1;
|
||||
end else if (i % 2 == 1) begin : g_i_odd
|
||||
assign mask[i] = mask[(i-1)/2] & ~way_idx[WAY_IDX_BITS-$clog2(i+2)+1];
|
||||
end else begin : g_i_even
|
||||
assign mask[i] = mask[(i-2)/2] & way_idx[WAY_IDX_BITS-$clog2(i+2)+1];
|
||||
end
|
||||
assign data[i] = ~way_idx[WAY_IDX_BITS-$clog2(i+2)];
|
||||
end
|
||||
assign lru_data = data;
|
||||
assign lru_mask = mask;
|
||||
end else begin : g_no_dec
|
||||
`UNUSED_VAR (way_idx)
|
||||
assign lru_data = '0;
|
||||
assign lru_mask = '0;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
module plru_encoder #(
|
||||
parameter NUM_WAYS = 1,
|
||||
parameter WAY_IDX_BITS = $clog2(NUM_WAYS),
|
||||
parameter WAY_IDX_WIDTH = `UP(WAY_IDX_BITS)
|
||||
) (
|
||||
input wire [`UP(NUM_WAYS-1)-1:0] lru_in,
|
||||
output wire [WAY_IDX_WIDTH-1:0] way_idx
|
||||
);
|
||||
if (NUM_WAYS > 1) begin : g_enc
|
||||
wire [WAY_IDX_BITS-1:0] tmp;
|
||||
for (genvar i = 0; i < WAY_IDX_BITS; ++i) begin : g_i
|
||||
if (i == 0) begin : g_i_0
|
||||
assign tmp[WAY_IDX_WIDTH-1] = lru_in[0];
|
||||
end else begin : g_i_n
|
||||
VX_mux #(
|
||||
.N (2**i)
|
||||
) mux (
|
||||
.data_in (lru_in[((2**i)-1)+:(2**i)]),
|
||||
.sel_in (tmp[WAY_IDX_BITS-1-:i]),
|
||||
.data_out (tmp[WAY_IDX_BITS-1-i])
|
||||
);
|
||||
end
|
||||
end
|
||||
assign way_idx = tmp;
|
||||
end else begin : g_no_enc
|
||||
`UNUSED_VAR (lru_in)
|
||||
assign way_idx = '0;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
module VX_cache_repl #(
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// replacement policy
|
||||
parameter REPL_POLICY = `CS_REPL_CYCLIC
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire stall,
|
||||
input wire hit_valid,
|
||||
input wire [`CS_LINE_SEL_BITS-1:0] hit_line,
|
||||
input wire [`CS_WAY_SEL_WIDTH-1:0] hit_way,
|
||||
input wire repl_valid,
|
||||
input wire [`CS_LINE_SEL_BITS-1:0] repl_line,
|
||||
output wire [`CS_WAY_SEL_WIDTH-1:0] repl_way
|
||||
);
|
||||
localparam WAY_SEL_WIDTH = `CS_WAY_SEL_WIDTH;
|
||||
`UNUSED_VAR (stall)
|
||||
|
||||
if (NUM_WAYS > 1) begin : g_enable
|
||||
if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru
|
||||
// Pseudo Least Recently Used replacement policy
|
||||
localparam LRU_WIDTH = `UP(NUM_WAYS-1);
|
||||
|
||||
wire [LRU_WIDTH-1:0] plru_rdata;
|
||||
wire [LRU_WIDTH-1:0] plru_wdata;
|
||||
wire [LRU_WIDTH-1:0] plru_wmask;
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (LRU_WIDTH),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.WRENW (LRU_WIDTH),
|
||||
.RDW_MODE ("R")
|
||||
) plru_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (repl_valid),
|
||||
.write (hit_valid),
|
||||
.wren (plru_wmask),
|
||||
.waddr (hit_line),
|
||||
.raddr (repl_line),
|
||||
.wdata (plru_wdata),
|
||||
.rdata (plru_rdata)
|
||||
);
|
||||
|
||||
plru_decoder #(
|
||||
.NUM_WAYS (NUM_WAYS)
|
||||
) plru_dec (
|
||||
.way_idx (hit_way),
|
||||
.lru_data (plru_wdata),
|
||||
.lru_mask (plru_wmask)
|
||||
);
|
||||
|
||||
plru_encoder #(
|
||||
.NUM_WAYS (NUM_WAYS)
|
||||
) plru_enc (
|
||||
.lru_in (plru_rdata),
|
||||
.way_idx (repl_way)
|
||||
);
|
||||
|
||||
end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic
|
||||
// Cyclic replacement policy
|
||||
`UNUSED_VAR (hit_valid)
|
||||
`UNUSED_VAR (hit_line)
|
||||
`UNUSED_VAR (hit_way)
|
||||
|
||||
wire [WAY_SEL_WIDTH-1:0] ctr_rdata;
|
||||
wire [WAY_SEL_WIDTH-1:0] ctr_wdata = ctr_rdata + 1;
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (WAY_SEL_WIDTH),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.RDW_MODE ("R")
|
||||
) ctr_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (repl_valid),
|
||||
.write (repl_valid),
|
||||
.wren (1'b1),
|
||||
.addr (repl_line),
|
||||
.wdata (ctr_wdata),
|
||||
.rdata (ctr_rdata)
|
||||
);
|
||||
|
||||
assign repl_way = ctr_rdata;
|
||||
end else begin : g_random
|
||||
// Random replacement policy
|
||||
`UNUSED_VAR (hit_valid)
|
||||
`UNUSED_VAR (hit_line)
|
||||
`UNUSED_VAR (hit_way)
|
||||
`UNUSED_VAR (repl_valid)
|
||||
`UNUSED_VAR (repl_line)
|
||||
reg [WAY_SEL_WIDTH-1:0] victim_idx;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
victim_idx <= 0;
|
||||
end else if (~stall) begin
|
||||
victim_idx <= victim_idx + 1;
|
||||
end
|
||||
end
|
||||
assign repl_way = victim_idx;
|
||||
end
|
||||
end else begin : g_disable
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (hit_valid)
|
||||
`UNUSED_VAR (hit_line)
|
||||
`UNUSED_VAR (hit_way)
|
||||
`UNUSED_VAR (repl_valid)
|
||||
`UNUSED_VAR (repl_line)
|
||||
assign repl_way = 1'b0;
|
||||
end
|
||||
|
||||
endmodule
|
126
hw/rtl/cache/VX_cache_tags.sv
vendored
126
hw/rtl/cache/VX_cache_tags.sv
vendored
|
@ -14,8 +14,6 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_tags #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter BANK_ID = 0,
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
|
@ -27,96 +25,61 @@ module VX_cache_tags #(
|
|||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0
|
||||
parameter WRITEBACK = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
input wire [`UP(UUID_WIDTH)-1:0] req_uuid,
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
input wire stall,
|
||||
|
||||
// init/fill/lookup
|
||||
// inputs
|
||||
input wire init,
|
||||
input wire flush,
|
||||
input wire fill,
|
||||
input wire read,
|
||||
input wire write,
|
||||
input wire lookup,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
||||
input wire [NUM_WAYS-1:0] way_sel,
|
||||
output wire [NUM_WAYS-1:0] tag_matches,
|
||||
input wire [`CS_LINE_SEL_BITS-1:0] line_idx,
|
||||
input wire [`CS_TAG_SEL_BITS-1:0] line_tag,
|
||||
input wire [`CS_WAY_SEL_WIDTH-1:0] evict_way,
|
||||
|
||||
// eviction
|
||||
// outputs
|
||||
output wire [NUM_WAYS-1:0] tag_matches,
|
||||
output wire evict_dirty,
|
||||
output wire [NUM_WAYS-1:0] evict_way,
|
||||
output wire [`CS_TAG_SEL_BITS-1:0] evict_tag
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_VAR (lookup)
|
||||
|
||||
// valid, dirty, tag
|
||||
localparam TAG_WIDTH = 1 + WRITEBACK + `CS_TAG_SEL_BITS;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr);
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag;
|
||||
wire [NUM_WAYS-1:0] read_valid;
|
||||
wire [NUM_WAYS-1:0] read_dirty;
|
||||
`UNUSED_VAR (read)
|
||||
|
||||
if (NUM_WAYS > 1) begin : g_evict_way
|
||||
reg [NUM_WAYS-1:0] evict_way_r;
|
||||
// cyclic assignment of replacement way
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
evict_way_r <= 1;
|
||||
end else if (~stall) begin // holding the value on stalls prevents filling different slots twice
|
||||
evict_way_r <= {evict_way_r[NUM_WAYS-2:0], evict_way_r[NUM_WAYS-1]};
|
||||
end
|
||||
end
|
||||
|
||||
assign evict_way = fill ? evict_way_r : way_sel;
|
||||
|
||||
VX_onehot_mux #(
|
||||
.DATAW (`CS_TAG_SEL_BITS),
|
||||
.N (NUM_WAYS)
|
||||
) evict_tag_sel (
|
||||
.data_in (read_tag),
|
||||
.sel_in (evict_way),
|
||||
.data_out (evict_tag)
|
||||
);
|
||||
end else begin : g_evict_way_0
|
||||
`UNUSED_VAR (stall)
|
||||
assign evict_way = 1'b1;
|
||||
assign evict_tag = read_tag;
|
||||
if (WRITEBACK) begin : g_evict_tag_wb
|
||||
assign evict_dirty = read_dirty[evict_way];
|
||||
assign evict_tag = read_tag[evict_way];
|
||||
end else begin : g_evict_tag_wt
|
||||
`UNUSED_VAR (read_dirty)
|
||||
assign evict_dirty = 1'b0;
|
||||
assign evict_tag = '0;
|
||||
end
|
||||
|
||||
// fill and flush need to also read in writeback mode
|
||||
wire fill_s = fill && (!WRITEBACK || ~stall);
|
||||
wire flush_s = flush && (!WRITEBACK || ~stall);
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store
|
||||
wire way_en = (NUM_WAYS == 1) || (evict_way == i);
|
||||
wire do_init = init; // init all ways
|
||||
wire do_fill = fill && way_en;
|
||||
wire do_flush = flush && (!WRITEBACK || way_en); // flush the whole line in writethrough mode
|
||||
wire do_write = WRITEBACK && write && tag_matches[i]; // only write on tag hit
|
||||
|
||||
wire do_fill = fill_s && evict_way[i];
|
||||
wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode
|
||||
wire do_write = WRITEBACK && write && tag_matches[i];
|
||||
|
||||
wire line_read = (WRITEBACK && (fill_s || flush_s));
|
||||
wire line_write = init || do_fill || do_flush || do_write;
|
||||
wire line_valid = ~(init || flush);
|
||||
wire line_read = read || write || (WRITEBACK && (fill || flush));
|
||||
wire line_write = do_init || do_fill || do_flush || do_write;
|
||||
wire line_valid = fill || write;
|
||||
|
||||
wire [TAG_WIDTH-1:0] line_wdata;
|
||||
wire [TAG_WIDTH-1:0] line_rdata;
|
||||
|
||||
if (WRITEBACK) begin : g_writeback
|
||||
if (WRITEBACK) begin : g_wdata
|
||||
assign line_wdata = {line_valid, write, line_tag};
|
||||
assign {read_valid[i], read_dirty[i], read_tag[i]} = line_rdata;
|
||||
end else begin : g_writethrough
|
||||
end else begin : g_wdata
|
||||
assign line_wdata = {line_valid, line_tag};
|
||||
assign {read_valid[i], read_tag[i]} = line_rdata;
|
||||
assign read_dirty[i] = 1'b0;
|
||||
|
@ -125,15 +88,14 @@ module VX_cache_tags #(
|
|||
VX_sp_ram #(
|
||||
.DATAW (TAG_WIDTH),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.NO_RWCHECK (1),
|
||||
.RW_ASSERT (1)
|
||||
.RDW_MODE ("W")
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (line_read),
|
||||
.write (line_write),
|
||||
.wren (1'b1),
|
||||
.addr (line_sel),
|
||||
.addr (line_idx),
|
||||
.wdata (line_wdata),
|
||||
.rdata (line_rdata)
|
||||
);
|
||||
|
@ -143,36 +105,4 @@ module VX_cache_tags #(
|
|||
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
|
||||
end
|
||||
|
||||
assign evict_dirty = | (read_dirty & evict_way);
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel};
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)))
|
||||
end
|
||||
if (init) begin
|
||||
`TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel))
|
||||
end
|
||||
if (flush && ~stall) begin
|
||||
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty))
|
||||
end
|
||||
if (lookup && ~stall) begin
|
||||
if (tag_matches != 0) begin
|
||||
if (write) begin
|
||||
`TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid))
|
||||
end else begin
|
||||
`TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid))
|
||||
end
|
||||
end else begin
|
||||
if (write) begin
|
||||
`TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid))
|
||||
end else begin
|
||||
`TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid))
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
20
hw/rtl/cache/VX_cache_top.sv
vendored
20
hw/rtl/cache/VX_cache_top.sv
vendored
|
@ -20,7 +20,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
parameter NUM_REQS = 4,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 16384,
|
||||
parameter CACHE_SIZE = 65536,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
|
@ -28,37 +28,37 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
// Number of associative ways
|
||||
parameter NUM_WAYS = 4,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 16,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
parameter CRSQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 16,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
parameter MRSQ_SIZE = 8,
|
||||
// Memory Request Queue Size
|
||||
parameter MREQ_SIZE = 4,
|
||||
parameter MREQ_SIZE = 8,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
parameter WRITEBACK = 1,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
parameter DIRTY_BYTES = 1,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = 16,
|
||||
parameter TAG_WIDTH = 32,
|
||||
|
||||
// Core response output buffer
|
||||
parameter CORE_OUT_BUF = 2,
|
||||
parameter CORE_OUT_BUF = 3,
|
||||
|
||||
// Memory request output buffer
|
||||
parameter MEM_OUT_BUF = 2,
|
||||
parameter MEM_OUT_BUF = 3,
|
||||
|
||||
parameter MEM_TAG_WIDTH = `CLOG2(MSHR_SIZE) + `CLOG2(NUM_BANKS)
|
||||
) (
|
||||
|
|
42
hw/rtl/cache/VX_cache_wrap.sv
vendored
42
hw/rtl/cache/VX_cache_wrap.sv
vendored
|
@ -27,18 +27,18 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
parameter NUM_BANKS = 4,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
parameter NUM_WAYS = 4,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 16,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
parameter CRSQ_SIZE = 4,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
parameter MSHR_SIZE = 16,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
parameter MRSQ_SIZE = 4,
|
||||
// Memory Request Queue Size
|
||||
parameter MREQ_SIZE = 4,
|
||||
|
||||
|
@ -51,12 +51,18 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Replacement policy
|
||||
parameter REPL_POLICY = `CS_REPL_CYCLIC,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// core request flags
|
||||
parameter FLAGS_WIDTH = 0,
|
||||
|
||||
// enable bypass for non-cacheable addresses
|
||||
parameter NC_ENABLE = 0,
|
||||
|
||||
|
@ -64,10 +70,10 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
parameter PASSTHRU = 0,
|
||||
|
||||
// Core response output buffer
|
||||
parameter CORE_OUT_BUF = 0,
|
||||
parameter CORE_OUT_BUF = 3,
|
||||
|
||||
// Memory request output buffer
|
||||
parameter MEM_OUT_BUF = 0
|
||||
parameter MEM_OUT_BUF = 3
|
||||
) (
|
||||
|
||||
input wire clk,
|
||||
|
@ -166,15 +172,17 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.REPL_POLICY (REPL_POLICY),
|
||||
.CRSQ_SIZE (CRSQ_SIZE),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.MRSQ_SIZE (MRSQ_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.FLAGS_WIDTH (FLAGS_WIDTH),
|
||||
.CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF)
|
||||
) cache (
|
||||
|
@ -232,13 +240,13 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
always @(posedge clk) begin
|
||||
if (core_req_fire) begin
|
||||
if (core_bus_if[i].req_data.rw) begin
|
||||
`TRACE(1, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid))
|
||||
`TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid))
|
||||
end else begin
|
||||
`TRACE(1, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid))
|
||||
`TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid))
|
||||
end
|
||||
end
|
||||
if (core_rsp_fire) begin
|
||||
`TRACE(1, ("%t: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid))
|
||||
`TRACE(2, ("%t: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -260,15 +268,15 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
always @(posedge clk) begin
|
||||
if (mem_req_fire) begin
|
||||
if (mem_bus_if.req_data.rw) begin
|
||||
`TRACE(1, ("%t: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n",
|
||||
`TRACE(2, ("%t: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid))
|
||||
end else begin
|
||||
`TRACE(1, ("%t: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
`TRACE(2, ("%t: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid))
|
||||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%t: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n",
|
||||
`TRACE(2, ("%t: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid))
|
||||
end
|
||||
end
|
||||
|
|
|
@ -194,7 +194,7 @@ module VX_alu_int #(
|
|||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (br_enable) begin
|
||||
`TRACE(1, ("%t: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
|
||||
`TRACE(2, ("%t: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid))
|
||||
end
|
||||
end
|
||||
|
|
|
@ -89,7 +89,7 @@ module VX_alu_unit #(
|
|||
);
|
||||
|
||||
VX_alu_int #(
|
||||
.INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)),
|
||||
.INSTANCE_ID (`SFORMATF(("%s-int%0d", INSTANCE_ID, block_idx))),
|
||||
.BLOCK_IDX (block_idx),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) alu_int (
|
||||
|
@ -102,7 +102,7 @@ module VX_alu_unit #(
|
|||
|
||||
`ifdef EXT_M_ENABLE
|
||||
VX_alu_muldiv #(
|
||||
.INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)),
|
||||
.INSTANCE_ID (`SFORMATF(("%s-muldiv%0d", INSTANCE_ID, block_idx))),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) muldiv_unit (
|
||||
.clk (clk),
|
||||
|
|
|
@ -87,7 +87,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
`SCOPE_IO_SWITCH (3);
|
||||
|
||||
VX_schedule #(
|
||||
.INSTANCE_ID ($sformatf("%s-schedule", INSTANCE_ID)),
|
||||
.INSTANCE_ID (`SFORMATF(("%s-schedule", INSTANCE_ID))),
|
||||
.CORE_ID (CORE_ID)
|
||||
) schedule (
|
||||
.clk (clk),
|
||||
|
@ -115,7 +115,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_fetch #(
|
||||
.INSTANCE_ID ($sformatf("%s-fetch", INSTANCE_ID))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-fetch", INSTANCE_ID)))
|
||||
) fetch (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
|
@ -126,7 +126,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_decode #(
|
||||
.INSTANCE_ID ($sformatf("%s-decode", INSTANCE_ID))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-decode", INSTANCE_ID)))
|
||||
) decode (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -136,7 +136,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_issue #(
|
||||
.INSTANCE_ID ($sformatf("%s-issue", INSTANCE_ID))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-issue", INSTANCE_ID)))
|
||||
) issue (
|
||||
`SCOPE_IO_BIND (1)
|
||||
|
||||
|
@ -153,7 +153,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_execute #(
|
||||
.INSTANCE_ID ($sformatf("%s-execute", INSTANCE_ID)),
|
||||
.INSTANCE_ID (`SFORMATF(("%s-execute", INSTANCE_ID))),
|
||||
.CORE_ID (CORE_ID)
|
||||
) execute (
|
||||
`SCOPE_IO_BIND (2)
|
||||
|
@ -181,7 +181,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_commit #(
|
||||
.INSTANCE_ID ($sformatf("%s-commit", INSTANCE_ID))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-commit", INSTANCE_ID)))
|
||||
) commit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -144,7 +144,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
VX_core #(
|
||||
.INSTANCE_ID ($sformatf("core")),
|
||||
.INSTANCE_ID (`SFORMATF(("core"))),
|
||||
.CORE_ID (CORE_ID)
|
||||
) core (
|
||||
`SCOPE_IO_BIND (0)
|
||||
|
|
|
@ -50,9 +50,9 @@ module VX_dcr_data import VX_gpu_pkg::*; (
|
|||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (dcr_bus_if.write_valid) begin
|
||||
`TRACE(1, ("%t: base-dcr: state=", $time))
|
||||
`TRACE(2, ("%t: base-dcr: state=", $time))
|
||||
trace_base_dcr(1, dcr_bus_if.write_addr);
|
||||
`TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data))
|
||||
`TRACE(2, (", data=0x%h\n", dcr_bus_if.write_data))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -52,7 +52,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
VX_alu_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-alu", INSTANCE_ID))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-alu", INSTANCE_ID)))
|
||||
) alu_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -64,7 +64,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
`SCOPE_IO_SWITCH (1);
|
||||
|
||||
VX_lsu_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-lsu", INSTANCE_ID))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-lsu", INSTANCE_ID)))
|
||||
) lsu_unit (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
|
@ -76,7 +76,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-fpu", INSTANCE_ID))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-fpu", INSTANCE_ID)))
|
||||
) fpu_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -87,7 +87,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
VX_sfu_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-sfu", INSTANCE_ID)),
|
||||
.INSTANCE_ID (`SFORMATF(("%s-sfu", INSTANCE_ID))),
|
||||
.CORE_ID (CORE_ID)
|
||||
) sfu_unit (
|
||||
.clk (clk),
|
||||
|
|
|
@ -51,9 +51,9 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (`PC_BITS + `NUM_THREADS),
|
||||
.SIZE (`NUM_WARPS),
|
||||
.LUTRAM (1)
|
||||
.DATAW (`PC_BITS + `NUM_THREADS),
|
||||
.SIZE (`NUM_WARPS),
|
||||
.RDW_MODE ("R")
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -166,7 +166,9 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
`SCOPE_IO_UNUSED(0)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef CHIPSCOPE
|
||||
`ifdef DBG_SCOPE_FETCH
|
||||
ila_fetch ila_fetch_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({schedule_if.valid, schedule_if.data, schedule_if.ready}),
|
||||
|
@ -174,6 +176,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
.probe2 ({icache_bus_if.rsp_valid, icache_bus_if.rsp_data, icache_bus_if.rsp_ready})
|
||||
);
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_MEM
|
||||
always @(posedge clk) begin
|
||||
|
|
|
@ -39,7 +39,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
|
|||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`IBUF_SIZE),
|
||||
.OUT_REG (2) // 2-cycle EB for area reduction
|
||||
.OUT_REG (1)
|
||||
) instr_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
module VX_ipdom_stack #(
|
||||
parameter WIDTH = 1,
|
||||
parameter DEPTH = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter ADDRW = `LOG2UP(DEPTH)
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -31,76 +30,63 @@ module VX_ipdom_stack #(
|
|||
output wire empty,
|
||||
output wire full
|
||||
);
|
||||
reg slot_set [DEPTH-1:0];
|
||||
|
||||
reg [ADDRW-1:0] rd_ptr, wr_ptr;
|
||||
reg [ADDRW-1:0] rd_ptr, rd_ptr_n, wr_ptr;
|
||||
|
||||
reg empty_r, full_r;
|
||||
|
||||
wire [WIDTH-1:0] d0, d1;
|
||||
|
||||
wire d_set_n = slot_set[rd_ptr];
|
||||
wire d_set_r;
|
||||
|
||||
always @(*) begin
|
||||
rd_ptr_n = rd_ptr;
|
||||
if (push) begin
|
||||
rd_ptr_n = wr_ptr;
|
||||
end else if (pop) begin
|
||||
rd_ptr_n = rd_ptr - ADDRW'(d_set_r);
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rd_ptr <= '0;
|
||||
wr_ptr <= '0;
|
||||
empty_r <= 1;
|
||||
full_r <= 0;
|
||||
rd_ptr <= '0;
|
||||
end else begin
|
||||
`ASSERT(~push || ~full, ("%t: runtime error: writing to a full stack!", $time));
|
||||
`ASSERT(~pop || ~empty, ("%t: runtime error: reading an empty stack!", $time));
|
||||
`ASSERT(~push || ~pop, ("%t: runtime error: push and pop in same cycle not supported!", $time));
|
||||
if (push) begin
|
||||
rd_ptr <= wr_ptr;
|
||||
wr_ptr <= wr_ptr + ADDRW'(1);
|
||||
empty_r <= 0;
|
||||
full_r <= (ADDRW'(DEPTH-1) == wr_ptr);
|
||||
end else if (pop) begin
|
||||
wr_ptr <= wr_ptr - ADDRW'(d_set_n);
|
||||
rd_ptr <= rd_ptr - ADDRW'(d_set_n);
|
||||
empty_r <= (rd_ptr == 0) && (d_set_n == 1);
|
||||
wr_ptr <= wr_ptr - ADDRW'(d_set_r);
|
||||
empty_r <= (rd_ptr == 0) && d_set_r;
|
||||
full_r <= 0;
|
||||
end
|
||||
rd_ptr <= rd_ptr_n;
|
||||
end
|
||||
end
|
||||
|
||||
wire [WIDTH * 2:0] qout = push ? {1'b0, q1, q0} : {1'b1, d1, d0};
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (WIDTH * 2),
|
||||
.SIZE (DEPTH),
|
||||
.OUT_REG (OUT_REG ? 1 : 0),
|
||||
.LUTRAM (OUT_REG ? 0 : 1)
|
||||
) store (
|
||||
.DATAW (1 + WIDTH * 2),
|
||||
.SIZE (DEPTH),
|
||||
.OUT_REG (1),
|
||||
.RDW_MODE ("R")
|
||||
) ipdom_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (push),
|
||||
.write (push || pop),
|
||||
.wren (1'b1),
|
||||
.waddr (wr_ptr),
|
||||
.wdata ({q1, q0}),
|
||||
.raddr (rd_ptr),
|
||||
.rdata ({d1, d0})
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
slot_set[wr_ptr] <= 0;
|
||||
end else if (pop) begin
|
||||
slot_set[rd_ptr] <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
wire d_set_r;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1),
|
||||
.DEPTH (OUT_REG)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in (d_set_n),
|
||||
.data_out (d_set_r)
|
||||
.waddr (push ? wr_ptr : rd_ptr),
|
||||
.wdata (qout),
|
||||
.raddr (rd_ptr_n),
|
||||
.rdata ({d_set_r, d1, d0})
|
||||
);
|
||||
|
||||
assign d = d_set_r ? d0 : d1;
|
||||
|
|
|
@ -52,7 +52,7 @@ module VX_issue import VX_gpu_pkg::*; #(
|
|||
|
||||
`SCOPE_IO_SWITCH (`ISSUE_WIDTH);
|
||||
|
||||
for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : g_issue_slices
|
||||
for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : g_slices
|
||||
VX_decode_if #(
|
||||
.NUM_WARPS (PER_ISSUE_WARPS)
|
||||
) per_issue_decode_if();
|
||||
|
@ -78,7 +78,7 @@ module VX_issue import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
VX_issue_slice #(
|
||||
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, issue_id)),
|
||||
.INSTANCE_ID (`SFORMATF(("%s%0d", INSTANCE_ID, issue_id))),
|
||||
.ISSUE_ID (issue_id)
|
||||
) issue_slice (
|
||||
`SCOPE_IO_BIND(issue_id)
|
||||
|
|
|
@ -37,7 +37,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
|
|||
VX_operands_if operands_if();
|
||||
|
||||
VX_ibuffer #(
|
||||
.INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-ibuffer", INSTANCE_ID)))
|
||||
) ibuffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -49,7 +49,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_scoreboard #(
|
||||
.INSTANCE_ID ($sformatf("%s-scoreboard", INSTANCE_ID))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-scoreboard", INSTANCE_ID)))
|
||||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -64,7 +64,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_operands #(
|
||||
.INSTANCE_ID ($sformatf("%s-operands", INSTANCE_ID))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-operands", INSTANCE_ID)))
|
||||
) operands (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -77,7 +77,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_dispatch #(
|
||||
.INSTANCE_ID ($sformatf("%s-dispatch", INSTANCE_ID))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-dispatch", INSTANCE_ID)))
|
||||
) dispatch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -143,7 +143,9 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
|
|||
`SCOPE_IO_UNUSED(0)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef CHIPSCOPE
|
||||
`ifdef DBG_SCOPE_ISSUE
|
||||
ila_issue ila_issue_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({decode_if.valid, decode_if.data, decode_if.ready}),
|
||||
|
@ -152,6 +154,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
|
|||
.probe3 ({writeback_if.valid, writeback_if.data})
|
||||
);
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
|
|
|
@ -310,7 +310,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
wire lsu_mem_rsp_ready;
|
||||
|
||||
VX_mem_scheduler #(
|
||||
.INSTANCE_ID ($sformatf("%s-memsched", INSTANCE_ID)),
|
||||
.INSTANCE_ID (`SFORMATF(("%s-memsched", INSTANCE_ID))),
|
||||
.CORE_REQS (NUM_LANES),
|
||||
.MEM_CHANNELS(NUM_LANES),
|
||||
.WORD_SIZE (LSU_WORD_SIZE),
|
||||
|
@ -504,30 +504,30 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
`ifdef DBG_TRACE_MEM
|
||||
always @(posedge clk) begin
|
||||
if (execute_if.valid && fence_lock) begin
|
||||
`TRACE(1, ("%t: *** %s fence wait\n", $time, INSTANCE_ID))
|
||||
`TRACE(2, ("%t: *** %s fence wait\n", $time, INSTANCE_ID))
|
||||
end
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw) begin
|
||||
`TRACE(1, ("%t: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask))
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES)
|
||||
`TRACE(1, (", flags="))
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES)
|
||||
`TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES)
|
||||
`TRACE(1, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid))
|
||||
`TRACE(2, ("%t: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask))
|
||||
`TRACE_ARRAY1D(2, "0x%h", full_addr, NUM_LANES)
|
||||
`TRACE(2, (", flags="))
|
||||
`TRACE_ARRAY1D(2, "%b", mem_req_flags, NUM_LANES)
|
||||
`TRACE(2, (", byteen=0x%0h, data=", mem_req_byteen))
|
||||
`TRACE_ARRAY1D(2, "0x%0h", mem_req_data, NUM_LANES)
|
||||
`TRACE(2, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid))
|
||||
end else begin
|
||||
`TRACE(1, ("%t: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask))
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES)
|
||||
`TRACE(1, (", flags="))
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES)
|
||||
`TRACE(1, (", byteen=0x%0h, rd=%0d, sop=%b, eop=%b, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid))
|
||||
`TRACE(2, ("%t: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask))
|
||||
`TRACE_ARRAY1D(2, "0x%h", full_addr, NUM_LANES)
|
||||
`TRACE(2, (", flags="))
|
||||
`TRACE_ARRAY1D(2, "%b", mem_req_flags, NUM_LANES)
|
||||
`TRACE(2, (", byteen=0x%0h, rd=%0d, sop=%b, eop=%b, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid))
|
||||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%t: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
|
||||
`TRACE(2, ("%t: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
|
||||
$time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES)
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid))
|
||||
`TRACE_ARRAY1D(2, "0x%0h", mem_rsp_data, NUM_LANES)
|
||||
`TRACE(2, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
@ -561,7 +561,9 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
`SCOPE_IO_UNUSED(0)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef CHIPSCOPE
|
||||
`ifdef DBG_SCOPE_LSU
|
||||
ila_lsu ila_lsu_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({execute_if.valid, execute_if.data, execute_if.ready}),
|
||||
|
@ -569,5 +571,6 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
.probe2 ({lsu_mem_if.rsp_valid, lsu_mem_if.rsp_data, lsu_mem_if.rsp_ready})
|
||||
);
|
||||
`endif
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -52,9 +52,9 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_lsus
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_slices
|
||||
VX_lsu_slice #(
|
||||
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx))
|
||||
.INSTANCE_ID (`SFORMATF(("%s%0d", INSTANCE_ID, block_idx)))
|
||||
) lsu_slice(
|
||||
`SCOPE_IO_BIND (block_idx)
|
||||
.clk (clk),
|
||||
|
|
|
@ -92,7 +92,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
VX_local_mem #(
|
||||
.INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)),
|
||||
.INSTANCE_ID(`SFORMATF(("%s-lmem", INSTANCE_ID))),
|
||||
.SIZE (1 << `LMEM_LOG_SIZE),
|
||||
.NUM_REQS (LSU_NUM_REQS),
|
||||
.NUM_BANKS (`LMEM_NUM_BANKS),
|
||||
|
@ -127,11 +127,11 @@ module VX_mem_unit import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (DCACHE_TAG_WIDTH)
|
||||
) dcache_coalesced_if[`NUM_LSU_BLOCKS]();
|
||||
|
||||
if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : g_enabled
|
||||
if ((`NUM_LSU_LANES > 1) && (LSU_WORD_SIZE != DCACHE_WORD_SIZE)) begin : g_enabled
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_coalescers
|
||||
VX_mem_coalescer #(
|
||||
.INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)),
|
||||
.INSTANCE_ID (`SFORMATF(("%s-coalescer%0d", INSTANCE_ID, i))),
|
||||
.NUM_REQS (`NUM_LSU_LANES),
|
||||
.DATA_IN_SIZE (LSU_WORD_SIZE),
|
||||
.DATA_OUT_SIZE (DCACHE_WORD_SIZE),
|
||||
|
|
|
@ -178,14 +178,14 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1;
|
||||
|
||||
VX_pipe_buffer #(
|
||||
.DATAW (NUM_BANKS + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH)
|
||||
.DATAW (NUM_BANKS * (1 + REQ_SEL_WIDTH) + META_DATAW)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (pipe_valid2_st1),
|
||||
.ready_in (pipe_ready_st1),
|
||||
.data_in ({gpr_rd_valid_st1, pipe_data_st1, gpr_rd_req_idx_st1}),
|
||||
.data_out ({gpr_rd_valid_st2, pipe_data_st2, gpr_rd_req_idx_st2}),
|
||||
.data_in ({gpr_rd_valid_st1, gpr_rd_req_idx_st1, pipe_data_st1}),
|
||||
.data_out ({gpr_rd_valid_st2, gpr_rd_req_idx_st2, pipe_data_st2}),
|
||||
.valid_out(pipe_valid_st2),
|
||||
.ready_out(pipe_ready_st2)
|
||||
);
|
||||
|
@ -266,13 +266,12 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
VX_dp_ram #(
|
||||
.DATAW (REGS_DATAW),
|
||||
.SIZE (PER_BANK_REGS * PER_ISSUE_WARPS),
|
||||
.OUT_REG (1),
|
||||
.READ_ENABLE (1),
|
||||
.WRENW (BYTEENW),
|
||||
`ifdef GPR_RESET
|
||||
.RESET_RAM (1),
|
||||
`endif
|
||||
.NO_RWCHECK (1)
|
||||
.OUT_REG (1),
|
||||
.RDW_MODE ("U")
|
||||
) gpr_ram (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -290,7 +290,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
// split/join handling
|
||||
|
||||
VX_split_join #(
|
||||
.INSTANCE_ID ($sformatf("%s-splitjoin", INSTANCE_ID))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-splitjoin", INSTANCE_ID)))
|
||||
) split_join (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -388,7 +388,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
|
||||
wire no_pending_instr = (& pending_warp_empty);
|
||||
|
||||
`BUFFER_EX(busy, (active_warps != 0 || ~no_pending_instr), 1'b1, 1);
|
||||
`BUFFER_EX(busy, (active_warps != 0 || ~no_pending_instr), 1'b1, 1, 1);
|
||||
|
||||
// export CSRs
|
||||
assign sched_csr_if.cycles = cycles;
|
||||
|
|
|
@ -62,8 +62,8 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
.data_out (perf_sfu_per_cycle)
|
||||
);
|
||||
|
||||
`BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
|
||||
`BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
|
||||
`BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, 0, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
|
||||
`BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, 0, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
|
||||
|
||||
wire [PER_ISSUE_WARPS-1:0] stg_valid_in;
|
||||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_stg_valid_in
|
||||
|
@ -206,7 +206,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
end else begin
|
||||
if (staging_if[w].valid && ~staging_if[w].ready) begin
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
`TRACE(3, ("%t: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
|
||||
`TRACE(4, ("%t: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
|
||||
$time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr,
|
||||
operands_busy, staging_if[w].data.uuid))
|
||||
`endif
|
||||
|
|
|
@ -99,7 +99,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_wctl_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-wctl", INSTANCE_ID)),
|
||||
.INSTANCE_ID (`SFORMATF(("%s-wctl", INSTANCE_ID))),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_unit (
|
||||
.clk (clk),
|
||||
|
@ -110,7 +110,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_csr_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-csr", INSTANCE_ID)),
|
||||
.INSTANCE_ID (`SFORMATF(("%s-csr", INSTANCE_ID))),
|
||||
.CORE_ID (CORE_ID),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) csr_unit (
|
||||
|
|
|
@ -48,8 +48,7 @@ module VX_split_join import VX_gpu_pkg::*; #(
|
|||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_ipdom_stacks
|
||||
VX_ipdom_stack #(
|
||||
.WIDTH (`NUM_THREADS+`PC_BITS),
|
||||
.DEPTH (`DV_STACK_SIZE),
|
||||
.OUT_REG (0)
|
||||
.DEPTH (`DV_STACK_SIZE)
|
||||
) ipdom_stack (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
158
hw/rtl/libs/VX_async_ram_patch.sv
Normal file
158
hw/rtl/libs/VX_async_ram_patch.sv
Normal file
|
@ -0,0 +1,158 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \
|
||||
if (wren[i]) begin \
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \
|
||||
end \
|
||||
end
|
||||
|
||||
`define RAM_INITIALIZATION \
|
||||
if (INIT_ENABLE != 0) begin : g_init \
|
||||
if (INIT_FILE != "") begin : g_file \
|
||||
initial $readmemh(INIT_FILE, ram); \
|
||||
end else begin : g_value \
|
||||
initial begin \
|
||||
for (integer i = 0; i < SIZE; ++i) begin : g_i \
|
||||
ram[i] = INIT_VALUE; \
|
||||
end \
|
||||
end \
|
||||
end \
|
||||
end
|
||||
|
||||
`define RAM_BYPASS(__d) \
|
||||
reg [DATAW-1:0] bypass_data_r; \
|
||||
reg bypass_valid_r; \
|
||||
always @(posedge clk) begin \
|
||||
bypass_valid_r <= read_s && write && (raddr_s == waddr); \
|
||||
bypass_data_r <= wdata; \
|
||||
end \
|
||||
assign __d = bypass_valid_r ? bypass_data_r : rdata_r
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_async_ram_patch #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter WRENW = 1,
|
||||
parameter DUAL_PORT = 0,
|
||||
parameter INIT_ENABLE = 0,
|
||||
parameter INIT_FILE = "",
|
||||
parameter [DATAW-1:0] INIT_VALUE = 0,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire read,
|
||||
input wire write,
|
||||
input wire [WRENW-1:0] wren,
|
||||
input wire [ADDRW-1:0] waddr,
|
||||
input wire [DATAW-1:0] wdata,
|
||||
input wire [ADDRW-1:0] raddr,
|
||||
output wire [DATAW-1:0] rdata
|
||||
);
|
||||
localparam WSELW = DATAW / WRENW;
|
||||
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
(* keep = "true" *) wire [ADDRW-1:0] raddr_w, raddr_s;
|
||||
(* keep = "true" *) wire read_s, is_raddr_reg;
|
||||
|
||||
assign raddr_w = raddr;
|
||||
|
||||
VX_placeholder #(
|
||||
.I (ADDRW),
|
||||
.O (ADDRW + 1 + 1)
|
||||
) placeholder (
|
||||
.in (raddr_w),
|
||||
.out ({raddr_s, read_s, is_raddr_reg})
|
||||
);
|
||||
|
||||
// synchroneous ram
|
||||
|
||||
wire [DATAW-1:0] rdata_s;
|
||||
|
||||
if (WRENW != 1) begin : g_wren_sync_ram
|
||||
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (read_s || write) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
rdata_r <= ram[raddr_s];
|
||||
end
|
||||
end
|
||||
`RAM_BYPASS(rdata_s);
|
||||
end else begin : g_no_wren_sync_ram
|
||||
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
`RAM_INITIALIZATION
|
||||
`UNUSED_VAR (wren)
|
||||
always @(posedge clk) begin
|
||||
if (read_s || write) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
rdata_r <= ram[raddr_s];
|
||||
end
|
||||
end
|
||||
`RAM_BYPASS(rdata_s);
|
||||
end
|
||||
|
||||
// asynchronous ram (fallback)
|
||||
|
||||
wire [DATAW-1:0] rdata_a;
|
||||
|
||||
if (DUAL_PORT != 0) begin : g_dp_async_ram
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
if (WRENW != 1) begin : g_wren
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
end
|
||||
end else begin : g_no_wren
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata_a = ram[raddr];
|
||||
end else begin : g_sp_async_ram
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
if (WRENW != 1) begin : g_wren
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
end
|
||||
end else begin : g_no_wren
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata_a = ram[waddr];
|
||||
end
|
||||
|
||||
assign rdata = is_raddr_reg ? rdata_s : rdata_a;
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
|
@ -135,7 +135,7 @@ module VX_axi_adapter #(
|
|||
);
|
||||
end
|
||||
|
||||
wire tbuf_full;
|
||||
wire mem_req_tag_ready;
|
||||
wire [TAG_WIDTH_OUT-1:0] mem_req_tag_out;
|
||||
wire [TAG_WIDTH_OUT-1:0] mem_rsp_tag_out;
|
||||
|
||||
|
@ -143,13 +143,14 @@ module VX_axi_adapter #(
|
|||
if (TAG_WIDTH_IN > TAG_WIDTH_OUT) begin : g_tag_buf
|
||||
localparam TBUF_ADDRW = `CLOG2(TAG_BUFFER_SIZE);
|
||||
wire [TBUF_ADDRW-1:0] tbuf_waddr, tbuf_raddr;
|
||||
wire tbuf_full;
|
||||
VX_index_buffer #(
|
||||
.DATAW (TAG_WIDTH_IN),
|
||||
.SIZE (TAG_BUFFER_SIZE)
|
||||
) tag_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.acquire_en (mem_req_valid && !mem_req_rw && mem_req_ready),
|
||||
.acquire_en (mem_req_valid && ~mem_req_rw && mem_req_ready),
|
||||
.write_addr (tbuf_waddr),
|
||||
.write_data (mem_req_tag),
|
||||
.read_data (mem_rsp_tag),
|
||||
|
@ -158,22 +159,24 @@ module VX_axi_adapter #(
|
|||
.full (tbuf_full),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
assign mem_req_tag_ready = mem_req_rw || ~tbuf_full;
|
||||
assign mem_req_tag_out = TAG_WIDTH_OUT'(tbuf_waddr);
|
||||
assign tbuf_raddr = mem_rsp_tag_out[TBUF_ADDRW-1:0];
|
||||
`UNUSED_VAR (mem_rsp_tag_out)
|
||||
end else begin : g_no_tag_buf
|
||||
assign tbuf_full = 0;
|
||||
assign mem_req_tag_ready = 1;
|
||||
assign mem_req_tag_out = TAG_WIDTH_OUT'(mem_req_tag);
|
||||
assign mem_rsp_tag = mem_rsp_tag_out[TAG_WIDTH_IN-1:0];
|
||||
`UNUSED_VAR (mem_rsp_tag_out)
|
||||
end
|
||||
|
||||
// request ack
|
||||
assign mem_req_ready = (mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]) && ~tbuf_full;
|
||||
assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] :
|
||||
(m_axi_arready[req_bank_sel] && mem_req_tag_ready);
|
||||
|
||||
// AXI write request address channel
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr
|
||||
assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~tbuf_full && ~m_axi_aw_ack[i];
|
||||
assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i];
|
||||
assign m_axi_awaddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8);
|
||||
assign m_axi_awid[i] = mem_req_tag_out;
|
||||
assign m_axi_awlen[i] = 8'b00000000;
|
||||
|
@ -188,7 +191,7 @@ module VX_axi_adapter #(
|
|||
|
||||
// AXI write request data channel
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_data
|
||||
assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~tbuf_full && ~m_axi_w_ack[i];
|
||||
assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_w_ack[i];
|
||||
assign m_axi_wdata[i] = mem_req_data;
|
||||
assign m_axi_wstrb[i] = mem_req_byteen;
|
||||
assign m_axi_wlast[i] = 1'b1;
|
||||
|
@ -205,7 +208,7 @@ module VX_axi_adapter #(
|
|||
|
||||
// AXI read request channel
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req
|
||||
assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i) && ~tbuf_full;
|
||||
assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i) && mem_req_tag_ready;
|
||||
assign m_axi_araddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8);
|
||||
assign m_axi_arid[i] = mem_req_tag_out;
|
||||
assign m_axi_arlen[i] = 8'b00000000;
|
||||
|
@ -228,9 +231,8 @@ module VX_axi_adapter #(
|
|||
assign rsp_arb_valid_in[i] = m_axi_rvalid[i];
|
||||
assign rsp_arb_data_in[i] = {m_axi_rdata[i], m_axi_rid[i]};
|
||||
assign m_axi_rready[i] = rsp_arb_ready_in[i];
|
||||
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time))
|
||||
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time))
|
||||
`UNUSED_VAR (m_axi_rlast[i])
|
||||
`RUNTIME_ASSERT(~(m_axi_rvalid[i] && m_axi_rlast[i] == 0), ("%t: *** AXI response error", $time))
|
||||
`RUNTIME_ASSERT(~(m_axi_rvalid[i] && m_axi_rresp[i] != 0), ("%t: *** AXI response error", $time))
|
||||
end
|
||||
|
||||
VX_stream_arb #(
|
||||
|
|
|
@ -65,12 +65,12 @@ module VX_cyclic_arbiter #(
|
|||
.valid_out (grant_valid)
|
||||
);
|
||||
|
||||
VX_decoder #(
|
||||
VX_demux #(
|
||||
.N (LOG_NUM_REQS),
|
||||
.D (NUM_REQS)
|
||||
) grant_decoder (
|
||||
.data_in (grant_index),
|
||||
.valid_in (1'b1),
|
||||
.sel_in (grant_index),
|
||||
.data_in (1'b1),
|
||||
.data_out (grant_onehot_w)
|
||||
);
|
||||
|
||||
|
|
|
@ -17,26 +17,31 @@
|
|||
// Adapted from BaseJump STL: http://bjump.org/data_out.html
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_decoder #(
|
||||
parameter N = 1,
|
||||
module VX_demux #(
|
||||
parameter N = 0,
|
||||
parameter M = 1,
|
||||
parameter MODEL = 0,
|
||||
parameter D = 1 << N
|
||||
) (
|
||||
input wire [N-1:0] data_in,
|
||||
input wire [M-1:0] valid_in,
|
||||
input wire [`UP(N)-1:0] sel_in,
|
||||
input wire [M-1:0] data_in,
|
||||
output wire [D-1:0][M-1:0] data_out
|
||||
);
|
||||
logic [D-1:0][M-1:0] shift;
|
||||
if (MODEL == 1) begin : g_model1
|
||||
always @(*) begin
|
||||
shift = '0;
|
||||
shift[data_in] = {M{1'b1}};
|
||||
if (N != 0) begin : g_decoder
|
||||
logic [D-1:0][M-1:0] shift;
|
||||
if (MODEL == 1) begin : g_model1
|
||||
always @(*) begin
|
||||
shift = '0;
|
||||
shift[sel_in] = {M{1'b1}};
|
||||
end
|
||||
end else begin : g_model0
|
||||
assign shift = ((D*M)'({M{1'b1}})) << (sel_in * M);
|
||||
end
|
||||
end else begin : g_model0
|
||||
assign shift = ((D*M)'({M{1'b1}})) << (data_in * M);
|
||||
assign data_out = {D{data_in}} & shift;
|
||||
end else begin : g_passthru
|
||||
`UNUSED_VAR (sel_in)
|
||||
assign data_out = data_in;
|
||||
end
|
||||
assign data_out = {D{valid_in}} & shift;
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
|
@ -13,6 +13,35 @@
|
|||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`define RAM_INITIALIZATION \
|
||||
if (INIT_ENABLE != 0) begin : g_init \
|
||||
if (INIT_FILE != "") begin : g_file \
|
||||
initial $readmemh(INIT_FILE, ram); \
|
||||
end else begin : g_value \
|
||||
initial begin \
|
||||
for (integer i = 0; i < SIZE; ++i) begin : g_i \
|
||||
ram[i] = INIT_VALUE; \
|
||||
end \
|
||||
end \
|
||||
end \
|
||||
end
|
||||
|
||||
`ifdef QUARTUS
|
||||
`define RAM_ARRAY_WREN reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
|
||||
`define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \
|
||||
if (wren[i]) begin \
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW]; \
|
||||
end \
|
||||
end
|
||||
`else
|
||||
`define RAM_ARRAY_WREN reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \
|
||||
if (wren[i]) begin \
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \
|
||||
end \
|
||||
end
|
||||
`endif
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_dp_ram #(
|
||||
parameter DATAW = 1,
|
||||
|
@ -20,11 +49,9 @@ module VX_dp_ram #(
|
|||
parameter WRENW = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter LUTRAM = 0,
|
||||
parameter NO_RWCHECK = 0,
|
||||
parameter RW_ASSERT = 0,
|
||||
parameter `STRING RDW_MODE = "W", // W: write-first, R: read-first, U: undefined
|
||||
parameter RDW_ASSERT = 0,
|
||||
parameter RESET_RAM = 0,
|
||||
parameter RESET_OUT = 0,
|
||||
parameter READ_ENABLE = 0,
|
||||
parameter INIT_ENABLE = 0,
|
||||
parameter INIT_FILE = "",
|
||||
parameter [DATAW-1:0] INIT_VALUE = 0,
|
||||
|
@ -41,284 +68,348 @@ module VX_dp_ram #(
|
|||
output wire [DATAW-1:0] rdata
|
||||
);
|
||||
localparam WSELW = DATAW / WRENW;
|
||||
`STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter"))
|
||||
`UNUSED_PARAM (LUTRAM)
|
||||
|
||||
`define RAM_INITIALIZATION \
|
||||
if (INIT_ENABLE != 0) begin : g_init \
|
||||
if (INIT_FILE != "") begin : g_file \
|
||||
initial $readmemh(INIT_FILE, ram); \
|
||||
end else begin : g_value \
|
||||
initial begin \
|
||||
for (integer i = 0; i < SIZE; ++i) \
|
||||
ram[i] = INIT_VALUE; \
|
||||
end \
|
||||
end \
|
||||
end
|
||||
`STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter"))
|
||||
`STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W" || RDW_MODE == "U"), ("invalid parameter"))
|
||||
`UNUSED_PARAM (RDW_ASSERT)
|
||||
|
||||
`UNUSED_PARAM (RW_ASSERT)
|
||||
`UNUSED_VAR (read)
|
||||
|
||||
`RUNTIME_ASSERT((((WRENW == 1) ) || ~write) || (| wren), ("%t: invalid write enable mask", $time))
|
||||
|
||||
if (OUT_REG && !READ_ENABLE) begin : g_out_reg
|
||||
`UNUSED_PARAM (NO_RWCHECK)
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
wire cs = read || write;
|
||||
if (WRENW != 1) begin : g_writeen
|
||||
`ifdef QUARTUS
|
||||
if (LUTRAM != 0) begin : g_lutram
|
||||
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (cs) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
`ifdef SYNTHESIS
|
||||
localparam FORCE_BRAM = !LUTRAM && (SIZE * DATAW >= `MAX_LUTRAM);
|
||||
if (OUT_REG) begin : g_sync
|
||||
if (FORCE_BRAM) begin : g_bram
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
(* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [ADDRW-1:0] raddr_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
raddr_r <= raddr;
|
||||
end
|
||||
if (RESET_OUT && reset) begin
|
||||
rdata_r <= '0;
|
||||
end else begin
|
||||
end
|
||||
assign rdata = ram[raddr_r];
|
||||
end else begin : g_no_wren
|
||||
(* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [ADDRW-1:0] raddr_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
raddr_r <= raddr;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr_r];
|
||||
end
|
||||
end else if (RDW_MODE == "R") begin : g_read_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin : g_no_lutram
|
||||
reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (cs) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_no_wren
|
||||
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
if (RESET_OUT && reset) begin
|
||||
rdata_r <= '0;
|
||||
end else begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end
|
||||
`else
|
||||
// default synthesis
|
||||
if (LUTRAM != 0) begin : g_lutram
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (cs) begin
|
||||
end else begin : g_undefined
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
if (RESET_OUT && reset) begin
|
||||
rdata_r <= '0;
|
||||
end else begin
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin : g_no_lutram
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (cs) begin
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_no_wren
|
||||
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
if (RESET_OUT && reset) begin
|
||||
rdata_r <= '0;
|
||||
end else begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
end else begin : g_no_writeen
|
||||
if (LUTRAM != 0) begin : g_lutram
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (cs) begin
|
||||
if (write)
|
||||
ram[waddr] <= wdata;
|
||||
if (RESET_OUT && reset) begin
|
||||
rdata_r <= '0;
|
||||
end else begin
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
|
||||
end else begin : g_no_lutram
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (cs) begin
|
||||
if (write)
|
||||
ram[waddr] <= wdata;
|
||||
if (RESET_OUT && reset) begin
|
||||
rdata_r <= '0;
|
||||
end else begin
|
||||
end
|
||||
end else begin : g_auto
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
(* rw_addr_collision = "yes" *) `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [ADDRW-1:0] raddr_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
raddr_r <= raddr;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr_r];
|
||||
end else begin : g_no_wren
|
||||
(* rw_addr_collision = "yes" *) reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [ADDRW-1:0] raddr_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
raddr_r <= raddr;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr_r];
|
||||
end
|
||||
end else if (RDW_MODE == "R") begin : g_read_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_no_out_reg
|
||||
// OUT_REG==0 || READ_ENABLE=1
|
||||
wire [DATAW-1:0] rdata_w;
|
||||
`ifdef SYNTHESIS
|
||||
if (WRENW > 1) begin : g_writeen
|
||||
`ifdef QUARTUS
|
||||
if (LUTRAM != 0) begin : g_lutram
|
||||
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin : g_no_lutram
|
||||
if (NO_RWCHECK != 0) begin : g_no_rwcheck
|
||||
`NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin : g_rwcheck
|
||||
reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end
|
||||
end
|
||||
`else
|
||||
// default synthesis
|
||||
if (LUTRAM != 0) begin : g_lutram
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin : g_no_lutram
|
||||
if (NO_RWCHECK != 0) begin : g_no_rwcheck
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin : g_rwcheck
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_no_wren
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end
|
||||
end
|
||||
`endif
|
||||
end else begin : g_no_writeen
|
||||
// (WRENW == 1)
|
||||
if (LUTRAM != 0) begin : g_lutram
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin : g_no_lutram
|
||||
if (NO_RWCHECK != 0) begin : g_no_rwcheck
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end else begin : g_rwcheck
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
assign rdata_w = ram[raddr];
|
||||
end
|
||||
end
|
||||
end
|
||||
`else
|
||||
// simulation
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
wire [DATAW-1:0] ram_n;
|
||||
for (genvar i = 0; i < WRENW; ++i) begin : g_ram_n
|
||||
assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW];
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (RESET_RAM && reset) begin
|
||||
for (integer i = 0; i < SIZE; ++i) begin
|
||||
ram[i] <= DATAW'(INIT_VALUE);
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else begin
|
||||
if (write) begin
|
||||
ram[waddr] <= ram_n;
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_no_wren
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin : g_async
|
||||
`UNUSED_VAR (read)
|
||||
if (FORCE_BRAM) begin : g_bram
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
`ifdef VIVADO
|
||||
VX_async_ram_patch #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.WRENW (WRENW),
|
||||
.DUAL_PORT (1),
|
||||
.INIT_ENABLE(INIT_ENABLE),
|
||||
.INIT_FILE (INIT_FILE),
|
||||
.INIT_VALUE (INIT_VALUE)
|
||||
) async_ram_patch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (read),
|
||||
.write (write),
|
||||
.wren (wren),
|
||||
.waddr (waddr),
|
||||
.wdata (wdata),
|
||||
.raddr (raddr),
|
||||
.rdata (rdata)
|
||||
);
|
||||
`else
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin : g_no_wren
|
||||
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
`endif
|
||||
end else begin : g_read_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin : g_no_wren
|
||||
`NO_RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
end
|
||||
end else begin : g_auto
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin : g_no_wren
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
end else begin : g_read_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`NO_RW_RAM_CHECK `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin : g_no_wren
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
`else
|
||||
// simulation
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
if (!LUTRAM && NO_RWCHECK) begin : g_rdata_no_bypass
|
||||
always @(posedge clk) begin
|
||||
if (RESET_RAM && reset) begin
|
||||
for (integer i = 0; i < SIZE; ++i) begin
|
||||
ram[i] <= DATAW'(INIT_VALUE);
|
||||
end
|
||||
end else if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i]) begin
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (OUT_REG) begin : g_sync
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
reg [ADDRW-1:0] raddr_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
raddr_r <= raddr;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[raddr_r];
|
||||
end else if (RDW_MODE == "R") begin : g_read_first
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_undefined
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read) begin
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else begin : g_async
|
||||
`UNUSED_VAR (read)
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
assign rdata = ram[raddr];
|
||||
end else begin : g_read_first
|
||||
reg [DATAW-1:0] prev_data;
|
||||
reg [ADDRW-1:0] prev_waddr;
|
||||
reg prev_write;
|
||||
|
@ -335,30 +426,13 @@ module VX_dp_ram #(
|
|||
end
|
||||
end
|
||||
|
||||
assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
||||
if (RW_ASSERT) begin : g_rw_assert
|
||||
`RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time))
|
||||
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
||||
if (RDW_ASSERT) begin : g_rw_asert
|
||||
`RUNTIME_ASSERT(~read || (rdata == ram[raddr]), ("%t: read after write hazard", $time))
|
||||
end
|
||||
end else begin : g_rdata_with_bypass
|
||||
assign rdata_w = ram[raddr];
|
||||
end
|
||||
`endif
|
||||
|
||||
if (OUT_REG != 0) begin : g_rdata_req
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (READ_ENABLE && reset) begin
|
||||
rdata_r <= '0;
|
||||
end else if (!READ_ENABLE || read) begin
|
||||
rdata_r <= rdata_w;
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_rdata_comb
|
||||
assign rdata = rdata_w;
|
||||
end
|
||||
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -15,12 +15,12 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_fifo_queue #(
|
||||
parameter DATAW = 1,
|
||||
parameter DEPTH = 2,
|
||||
parameter DATAW = 32,
|
||||
parameter DEPTH = 32,
|
||||
parameter ALM_FULL = (DEPTH - 1),
|
||||
parameter ALM_EMPTY = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter LUTRAM = 1,
|
||||
parameter LUTRAM = 0,
|
||||
parameter SIZEW = `CLOG2(DEPTH+1)
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -59,6 +59,8 @@ module VX_fifo_queue #(
|
|||
);
|
||||
|
||||
if (DEPTH == 1) begin : g_depth_1
|
||||
`UNUSED_PARAM (OUT_REG)
|
||||
`UNUSED_PARAM (LUTRAM)
|
||||
|
||||
reg [DATAW-1:0] head_r;
|
||||
|
||||
|
@ -74,91 +76,52 @@ module VX_fifo_queue #(
|
|||
|
||||
localparam ADDRW = `CLOG2(DEPTH);
|
||||
|
||||
wire [DATAW-1:0] data_out_w;
|
||||
reg [ADDRW-1:0] rd_ptr_r;
|
||||
reg [ADDRW-1:0] wr_ptr_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wr_ptr_r <= '0;
|
||||
rd_ptr_r <= (OUT_REG != 0) ? 1 : 0;
|
||||
end else begin
|
||||
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
|
||||
rd_ptr_r <= rd_ptr_r + ADDRW'(pop);
|
||||
end
|
||||
end
|
||||
|
||||
wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1));
|
||||
wire bypass = push && (empty || (going_empty && pop));
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (DEPTH),
|
||||
.LUTRAM (LUTRAM),
|
||||
.RDW_MODE ("W")
|
||||
) dp_ram (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (~bypass),
|
||||
.write (push),
|
||||
.wren (1'b1),
|
||||
.raddr (rd_ptr_r),
|
||||
.waddr (wr_ptr_r),
|
||||
.wdata (data_in),
|
||||
.rdata (data_out_w)
|
||||
);
|
||||
|
||||
if (OUT_REG != 0) begin : g_out_reg
|
||||
|
||||
wire [DATAW-1:0] dout;
|
||||
reg [DATAW-1:0] dout_r;
|
||||
reg [ADDRW-1:0] wr_ptr_r;
|
||||
reg [ADDRW-1:0] rd_ptr_r;
|
||||
reg [ADDRW-1:0] rd_ptr_n_r;
|
||||
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wr_ptr_r <= '0;
|
||||
rd_ptr_r <= '0;
|
||||
rd_ptr_n_r <= 1;
|
||||
end else begin
|
||||
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
|
||||
if (pop) begin
|
||||
rd_ptr_r <= rd_ptr_n_r;
|
||||
if (DEPTH > 2) begin
|
||||
rd_ptr_n_r <= rd_ptr_r + ADDRW'(2);
|
||||
end else begin // (DEPTH == 2);
|
||||
rd_ptr_n_r <= ~rd_ptr_n_r;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (DEPTH),
|
||||
.LUTRAM (LUTRAM)
|
||||
) dp_ram (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (push),
|
||||
.wren (1'b1),
|
||||
.waddr (wr_ptr_r),
|
||||
.wdata (data_in),
|
||||
.raddr (rd_ptr_n_r),
|
||||
.rdata (dout)
|
||||
);
|
||||
|
||||
wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1));
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push && (empty || (going_empty && pop))) begin
|
||||
dout_r <= data_in;
|
||||
if (bypass) begin
|
||||
data_out_r <= data_in;
|
||||
end else if (pop) begin
|
||||
dout_r <= dout;
|
||||
data_out_r <= data_out_w;
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = dout_r;
|
||||
|
||||
assign data_out = data_out_r;
|
||||
end else begin : g_no_out_reg
|
||||
|
||||
reg [ADDRW-1:0] rd_ptr_r;
|
||||
reg [ADDRW-1:0] wr_ptr_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rd_ptr_r <= '0;
|
||||
wr_ptr_r <= '0;
|
||||
end else begin
|
||||
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
|
||||
rd_ptr_r <= rd_ptr_r + ADDRW'(pop);
|
||||
end
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (DEPTH),
|
||||
.LUTRAM (LUTRAM)
|
||||
) dp_ram (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (1'b1),
|
||||
.write (push),
|
||||
.wren (1'b1),
|
||||
.waddr (wr_ptr_r),
|
||||
.wdata (data_in),
|
||||
.raddr (rd_ptr_r),
|
||||
.rdata (data_out)
|
||||
);
|
||||
|
||||
assign data_out = data_out_w;
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
`TRACING_OFF
|
||||
module VX_generic_arbiter #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter `STRING TYPE = "P",
|
||||
parameter `STRING TYPE = "P", // P: priority, R: round-robin, M: matrix, C: cyclic
|
||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -27,6 +27,8 @@ module VX_generic_arbiter #(
|
|||
output wire grant_valid,
|
||||
input wire grant_ready
|
||||
);
|
||||
`STATIC_ASSERT((TYPE == "P" || TYPE == "R" || TYPE == "M" || TYPE == "C"), ("invalid parameter"))
|
||||
|
||||
if (TYPE == "P") begin : g_priority
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
|
@ -84,10 +86,6 @@ module VX_generic_arbiter #(
|
|||
.grant_ready (grant_ready)
|
||||
);
|
||||
|
||||
end else begin : g_invalid
|
||||
|
||||
`ERROR(("invalid parameter"));
|
||||
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT (((~(| requests) != 1) || (grant_valid && (requests[grant_index] != 0) && (grant_onehot == (NUM_REQS'(1) << grant_index)))), ("%t: invalid arbiter grant!", $time))
|
||||
|
|
|
@ -15,10 +15,10 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_index_buffer #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter LUTRAM = 1,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter LUTRAM = 0,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -49,9 +49,10 @@ module VX_index_buffer #(
|
|||
);
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.LUTRAM (LUTRAM)
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.LUTRAM (LUTRAM),
|
||||
.RDW_MODE ("W")
|
||||
) data_table (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -72,7 +72,7 @@ module VX_matrix_arbiter #(
|
|||
|
||||
assign grant_onehot = grant;
|
||||
|
||||
VX_encoder #(
|
||||
VX_onehot_encoder #(
|
||||
.N (NUM_REQS)
|
||||
) encoder (
|
||||
.data_in (grant_onehot),
|
||||
|
|
|
@ -100,21 +100,21 @@ module VX_mem_adapter #(
|
|||
assign mem_req_addr_out_w = mem_req_addr_in_qual;
|
||||
end
|
||||
|
||||
VX_decoder #(
|
||||
VX_demux #(
|
||||
.N (D),
|
||||
.M (SRC_DATA_WIDTH/8)
|
||||
) req_be_dec (
|
||||
.data_in (req_idx),
|
||||
.valid_in (mem_req_byteen_in),
|
||||
) req_be_demux (
|
||||
.sel_in (req_idx),
|
||||
.data_in (mem_req_byteen_in),
|
||||
.data_out (mem_req_byteen_out_w)
|
||||
);
|
||||
|
||||
VX_decoder #(
|
||||
VX_demux #(
|
||||
.N (D),
|
||||
.M (SRC_DATA_WIDTH)
|
||||
) req_data_dec (
|
||||
.data_in (req_idx),
|
||||
.valid_in (mem_req_data_in),
|
||||
) req_data_demux (
|
||||
.sel_in (req_idx),
|
||||
.data_in (mem_req_data_in),
|
||||
.data_out (mem_req_data_out_w)
|
||||
);
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ module VX_mem_coalescer #(
|
|||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter NUM_REQS = 1,
|
||||
parameter ADDR_WIDTH = 32,
|
||||
parameter FLAGS_WIDTH = 1,
|
||||
parameter FLAGS_WIDTH = 0,
|
||||
parameter DATA_IN_SIZE = 4,
|
||||
parameter DATA_OUT_SIZE = 64,
|
||||
parameter TAG_WIDTH = 8,
|
||||
|
@ -43,7 +43,7 @@ module VX_mem_coalescer #(
|
|||
input wire [NUM_REQS-1:0] in_req_mask,
|
||||
input wire [NUM_REQS-1:0][DATA_IN_SIZE-1:0] in_req_byteen,
|
||||
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr,
|
||||
input wire [NUM_REQS-1:0][FLAGS_WIDTH-1:0] in_req_flags,
|
||||
input wire [NUM_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] in_req_flags,
|
||||
input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data,
|
||||
input wire [TAG_WIDTH-1:0] in_req_tag,
|
||||
output wire in_req_ready,
|
||||
|
@ -61,7 +61,7 @@ module VX_mem_coalescer #(
|
|||
output wire [OUT_REQS-1:0] out_req_mask,
|
||||
output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen,
|
||||
output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr,
|
||||
output wire [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags,
|
||||
output wire [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] out_req_flags,
|
||||
output wire [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data,
|
||||
output wire [OUT_TAG_WIDTH-1:0] out_req_tag,
|
||||
input wire out_req_ready,
|
||||
|
@ -74,6 +74,7 @@ module VX_mem_coalescer #(
|
|||
output wire out_rsp_ready
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`STATIC_ASSERT ((NUM_REQS > 1), ("invalid parameter"))
|
||||
`STATIC_ASSERT (`IS_DIVISBLE(NUM_REQS * DATA_IN_WIDTH, DATA_OUT_WIDTH), ("invalid parameter"))
|
||||
`STATIC_ASSERT ((NUM_REQS * DATA_IN_WIDTH >= DATA_OUT_WIDTH), ("invalid parameter"))
|
||||
`RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("%t: invalid request mask", $time))
|
||||
|
@ -92,7 +93,7 @@ module VX_mem_coalescer #(
|
|||
logic out_req_rw_r, out_req_rw_n;
|
||||
logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
|
||||
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
|
||||
logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags_r, out_req_flags_n;
|
||||
logic [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] out_req_flags_r, out_req_flags_n;
|
||||
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
|
||||
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n;
|
||||
logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
|
||||
|
@ -110,7 +111,7 @@ module VX_mem_coalescer #(
|
|||
|
||||
logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n;
|
||||
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n;
|
||||
logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] seed_flags_r, seed_flags_n;
|
||||
logic [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] seed_flags_r, seed_flags_n;
|
||||
logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n;
|
||||
logic [NUM_REQS-1:0] req_rem_mask_r, req_rem_mask_n;
|
||||
|
||||
|
@ -139,7 +140,7 @@ module VX_mem_coalescer #(
|
|||
assign addr_base[j] = in_req_addr[DATA_RATIO * i + j][ADDR_WIDTH-1:DATA_RATIO_W];
|
||||
end
|
||||
|
||||
wire [DATA_RATIO-1:0][FLAGS_WIDTH-1:0] req_flags;
|
||||
wire [DATA_RATIO-1:0][`UP(FLAGS_WIDTH)-1:0] req_flags;
|
||||
for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_req_flags
|
||||
assign req_flags[j] = in_req_flags[DATA_RATIO * i + j];
|
||||
end
|
||||
|
@ -221,7 +222,7 @@ module VX_mem_coalescer #(
|
|||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + FLAGS_WIDTH + OUT_ADDR_WIDTH + FLAGS_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH),
|
||||
.DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + `UP(FLAGS_WIDTH) + OUT_ADDR_WIDTH + `UP(FLAGS_WIDTH) + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH),
|
||||
.RESETW (1 + NUM_REQS + 1),
|
||||
.INIT_VALUE ({1'b0, {NUM_REQS{1'b1}}, 1'b0})
|
||||
) pipe_reg (
|
||||
|
@ -270,7 +271,12 @@ module VX_mem_coalescer #(
|
|||
assign out_req_mask = out_req_mask_r;
|
||||
assign out_req_byteen = out_req_byteen_r;
|
||||
assign out_req_addr = out_req_addr_r;
|
||||
assign out_req_flags = out_req_flags_r;
|
||||
if (FLAGS_WIDTH != 0) begin : g_out_req_flags
|
||||
assign out_req_flags = out_req_flags_r;
|
||||
end else begin : g_out_req_flags_0
|
||||
`UNUSED_VAR (out_req_flags_r)
|
||||
assign out_req_flags = '0;
|
||||
end
|
||||
assign out_req_data = out_req_data_r;
|
||||
assign out_req_tag = out_req_tag_r;
|
||||
|
||||
|
@ -346,30 +352,30 @@ module VX_mem_coalescer #(
|
|||
always @(posedge clk) begin
|
||||
if (out_req_fire) begin
|
||||
if (out_req_rw) begin
|
||||
`TRACE(1, ("%t: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask))
|
||||
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS)
|
||||
`TRACE(1, (", flags="))
|
||||
`TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS)
|
||||
`TRACE(1, (", byteen="))
|
||||
`TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS)
|
||||
`TRACE(1, (", data="))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS)
|
||||
`TRACE(2, ("%t: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask))
|
||||
`TRACE_ARRAY1D(2, "0x%h", out_req_addr, OUT_REQS)
|
||||
`TRACE(2, (", flags="))
|
||||
`TRACE_ARRAY1D(2, "%b", out_req_flags, OUT_REQS)
|
||||
`TRACE(2, (", byteen="))
|
||||
`TRACE_ARRAY1D(2, "0x%h", out_req_byteen, OUT_REQS)
|
||||
`TRACE(2, (", data="))
|
||||
`TRACE_ARRAY1D(2, "0x%0h", out_req_data, OUT_REQS)
|
||||
end else begin
|
||||
`TRACE(1, ("%d: %s out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask))
|
||||
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS)
|
||||
`TRACE(1, (", flags="))
|
||||
`TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS)
|
||||
`TRACE(2, ("%d: %s out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask))
|
||||
`TRACE_ARRAY1D(2, "0x%h", out_req_addr, OUT_REQS)
|
||||
`TRACE(2, (", flags="))
|
||||
`TRACE_ARRAY1D(2, "%b", out_req_flags, OUT_REQS)
|
||||
end
|
||||
`TRACE(1, (", offset="))
|
||||
`TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS)
|
||||
`TRACE(1, (", pmask=%b, coalesced=%0d, tag=0x%0h (#%0d)\n", out_req_pmask, $countones(out_req_pmask), out_req_tag, out_req_uuid))
|
||||
`TRACE(2, (", offset="))
|
||||
`TRACE_ARRAY1D(2, "%0d", out_req_offset, NUM_REQS)
|
||||
`TRACE(2, (", pmask=%b, coalesced=%0d, tag=0x%0h (#%0d)\n", out_req_pmask, $countones(out_req_pmask), out_req_tag, out_req_uuid))
|
||||
end
|
||||
if (out_rsp_fire) begin
|
||||
`TRACE(1, ("%t: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS)
|
||||
`TRACE(1, (", offset="))
|
||||
`TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS)
|
||||
`TRACE(1, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid))
|
||||
`TRACE(2, ("%t: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask))
|
||||
`TRACE_ARRAY1D(2, "0x%0h", out_rsp_data, OUT_REQS)
|
||||
`TRACE(2, (", offset="))
|
||||
`TRACE_ARRAY1D(2, "%0d", ibuf_dout_offset, NUM_REQS)
|
||||
`TRACE(2, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -21,7 +21,7 @@ module VX_mem_scheduler #(
|
|||
parameter WORD_SIZE = 4,
|
||||
parameter LINE_SIZE = WORD_SIZE,
|
||||
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
|
||||
parameter FLAGS_WIDTH = 1,
|
||||
parameter FLAGS_WIDTH = 0,
|
||||
parameter TAG_WIDTH = 8,
|
||||
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
|
||||
parameter CORE_QUEUE_SIZE= 8,
|
||||
|
@ -32,7 +32,7 @@ module VX_mem_scheduler #(
|
|||
|
||||
parameter WORD_WIDTH = WORD_SIZE * 8,
|
||||
parameter LINE_WIDTH = LINE_SIZE * 8,
|
||||
parameter COALESCE_ENABLE = (LINE_SIZE != WORD_SIZE),
|
||||
parameter COALESCE_ENABLE = (CORE_REQS > 1) && (LINE_SIZE != WORD_SIZE),
|
||||
parameter PER_LINE_REQS = LINE_SIZE / WORD_SIZE,
|
||||
parameter MERGED_REQS = CORE_REQS / PER_LINE_REQS,
|
||||
parameter MEM_BATCHES = `CDIV(MERGED_REQS, MEM_CHANNELS),
|
||||
|
@ -50,7 +50,7 @@ module VX_mem_scheduler #(
|
|||
input wire [CORE_REQS-1:0] core_req_mask,
|
||||
input wire [CORE_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] core_req_flags,
|
||||
input wire [CORE_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] core_req_flags,
|
||||
input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [TAG_WIDTH-1:0] core_req_tag,
|
||||
output wire core_req_ready,
|
||||
|
@ -72,7 +72,7 @@ module VX_mem_scheduler #(
|
|||
output wire [MEM_CHANNELS-1:0] mem_req_mask,
|
||||
output wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags,
|
||||
output wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags,
|
||||
output wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire mem_req_ready,
|
||||
|
@ -94,6 +94,7 @@ module VX_mem_scheduler #(
|
|||
localparam CORE_BATCHES = COALESCE_ENABLE ? 1 : MEM_BATCHES;
|
||||
localparam CORE_BATCH_BITS = `CLOG2(CORE_BATCHES);
|
||||
|
||||
`STATIC_ASSERT ((MEM_CHANNELS <= CORE_REQS), ("invalid parameter"))
|
||||
`STATIC_ASSERT (`IS_DIVISBLE(CORE_REQS * WORD_SIZE, LINE_SIZE), ("invalid parameter"))
|
||||
`STATIC_ASSERT ((TAG_WIDTH >= UUID_WIDTH), ("invalid parameter"))
|
||||
`RUNTIME_ASSERT((~core_req_valid || core_req_mask != 0), ("%t: invalid request mask", $time))
|
||||
|
@ -112,7 +113,7 @@ module VX_mem_scheduler #(
|
|||
wire reqq_rw;
|
||||
wire [CORE_REQS-1:0][WORD_SIZE-1:0] reqq_byteen;
|
||||
wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] reqq_addr;
|
||||
wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags;
|
||||
wire [CORE_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] reqq_flags;
|
||||
wire [CORE_REQS-1:0][WORD_WIDTH-1:0] reqq_data;
|
||||
wire [REQQ_TAG_WIDTH-1:0] reqq_tag;
|
||||
wire reqq_ready;
|
||||
|
@ -122,7 +123,7 @@ module VX_mem_scheduler #(
|
|||
wire reqq_rw_s;
|
||||
wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s;
|
||||
wire [MERGED_REQS-1:0][MEM_ADDR_WIDTH-1:0] reqq_addr_s;
|
||||
wire [MERGED_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags_s;
|
||||
wire [MERGED_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] reqq_flags_s;
|
||||
wire [MERGED_REQS-1:0][LINE_WIDTH-1:0] reqq_data_s;
|
||||
wire [MERGED_TAG_WIDTH-1:0] reqq_tag_s;
|
||||
wire reqq_ready_s;
|
||||
|
@ -132,7 +133,7 @@ module VX_mem_scheduler #(
|
|||
wire mem_req_rw_s;
|
||||
wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_s;
|
||||
wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_s;
|
||||
wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_s;
|
||||
wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_s;
|
||||
wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_s;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
|
||||
wire mem_req_ready_s;
|
||||
|
@ -167,7 +168,7 @@ module VX_mem_scheduler #(
|
|||
end
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + FLAGS_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH),
|
||||
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + `UP(FLAGS_WIDTH) + WORD_WIDTH) + REQQ_TAG_WIDTH),
|
||||
.SIZE (CORE_QUEUE_SIZE),
|
||||
.OUT_REG (1)
|
||||
) req_queue (
|
||||
|
@ -223,7 +224,7 @@ module VX_mem_scheduler #(
|
|||
if (COALESCE_ENABLE) begin : g_coalescer
|
||||
|
||||
VX_mem_coalescer #(
|
||||
.INSTANCE_ID ($sformatf("%s-coalescer", INSTANCE_ID)),
|
||||
.INSTANCE_ID (`SFORMATF(("%s-coalescer", INSTANCE_ID))),
|
||||
.NUM_REQS (CORE_REQS),
|
||||
.DATA_IN_SIZE (WORD_SIZE),
|
||||
.DATA_OUT_SIZE (LINE_SIZE),
|
||||
|
@ -297,7 +298,7 @@ module VX_mem_scheduler #(
|
|||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b;
|
||||
|
||||
wire [BATCH_SEL_WIDTH-1:0] req_batch_idx;
|
||||
|
@ -385,8 +386,10 @@ module VX_mem_scheduler #(
|
|||
|
||||
assign reqq_ready_s = req_sent_all;
|
||||
|
||||
wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_u;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + FLAGS_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH),
|
||||
.DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + `UP(FLAGS_WIDTH) + LINE_WIDTH) + MEM_TAG_WIDTH),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
|
@ -395,106 +398,128 @@ module VX_mem_scheduler #(
|
|||
.valid_in (mem_req_valid_s),
|
||||
.ready_in (mem_req_ready_s),
|
||||
.data_in ({mem_req_mask_s, mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_flags_s, mem_req_data_s, mem_req_tag_s}),
|
||||
.data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_flags, mem_req_data, mem_req_tag}),
|
||||
.data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_flags_u, mem_req_data, mem_req_tag}),
|
||||
.valid_out (mem_req_valid),
|
||||
.ready_out (mem_req_ready)
|
||||
);
|
||||
|
||||
if (FLAGS_WIDTH != 0) begin : g_mem_req_flags
|
||||
assign mem_req_flags = mem_req_flags_u;
|
||||
end else begin : g_mem_req_flags_0
|
||||
`UNUSED_VAR (mem_req_flags_u)
|
||||
assign mem_req_flags = '0;
|
||||
end
|
||||
|
||||
// Handle memory responses ////////////////////////////////////////////////
|
||||
|
||||
reg [CORE_QUEUE_SIZE-1:0][CORE_REQS-1:0] rsp_rem_mask;
|
||||
wire [CORE_REQS-1:0] rsp_rem_mask_n, curr_mask;
|
||||
wire [BATCH_SEL_WIDTH-1:0] rsp_batch_idx;
|
||||
|
||||
if (CORE_BATCHES > 1) begin : g_rsp_batch_idx
|
||||
assign rsp_batch_idx = mem_rsp_tag_s[CORE_BATCH_BITS-1:0];
|
||||
end else begin : g_rsp_batch_idx_0
|
||||
assign rsp_batch_idx = '0;
|
||||
end
|
||||
|
||||
for (genvar r = 0; r < CORE_REQS; ++r) begin : g_curr_mask
|
||||
localparam i = r / CORE_CHANNELS;
|
||||
localparam j = r % CORE_CHANNELS;
|
||||
assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j];
|
||||
end
|
||||
|
||||
assign rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~curr_mask;
|
||||
|
||||
wire rsp_complete = ~(| rsp_rem_mask_n);
|
||||
|
||||
wire mem_rsp_fire_s = mem_rsp_valid_s && mem_rsp_ready_s;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (ibuf_push) begin
|
||||
rsp_rem_mask[ibuf_waddr] <= core_req_mask;
|
||||
end
|
||||
if (mem_rsp_fire_s) begin
|
||||
rsp_rem_mask[ibuf_raddr] <= rsp_rem_mask_n;
|
||||
end
|
||||
end
|
||||
|
||||
if (RSP_PARTIAL != 0 || CORE_REQS == 1) begin : g_rsp_partial
|
||||
|
||||
reg [CORE_QUEUE_SIZE-1:0] rsp_sop_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (ibuf_push) begin
|
||||
rsp_sop_r[ibuf_waddr] <= 1;
|
||||
end
|
||||
if (mem_rsp_fire_s) begin
|
||||
rsp_sop_r[ibuf_raddr] <= 0;
|
||||
end
|
||||
end
|
||||
if (CORE_REQS == 1) begin : g_rsp_1
|
||||
`UNUSED_VAR (rsp_batch_idx)
|
||||
|
||||
assign crsp_valid = mem_rsp_valid_s;
|
||||
assign crsp_mask = curr_mask;
|
||||
assign crsp_sop = rsp_sop_r[ibuf_raddr];
|
||||
|
||||
for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data
|
||||
localparam j = r % CORE_CHANNELS;
|
||||
assign crsp_data[r] = mem_rsp_data_s[j];
|
||||
end
|
||||
assign crsp_mask = mem_rsp_mask_s;
|
||||
assign crsp_sop = 1'b1;
|
||||
assign crsp_eop = 1'b1;
|
||||
assign crsp_data = mem_rsp_data_s;
|
||||
|
||||
assign mem_rsp_ready_s = crsp_ready;
|
||||
|
||||
end else begin : g_rsp_full
|
||||
end else begin : g_rsp_N
|
||||
|
||||
wire [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store_n;
|
||||
reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0];
|
||||
reg [CORE_QUEUE_SIZE-1:0][CORE_REQS-1:0] rsp_rem_mask;
|
||||
wire [CORE_REQS-1:0] rsp_rem_mask_n, curr_mask;
|
||||
|
||||
for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store
|
||||
for (genvar j = 0; j < CORE_BATCHES; ++j) begin : g_j
|
||||
reg [WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0];
|
||||
wire rsp_wren = mem_rsp_fire_s
|
||||
&& (BATCH_SEL_WIDTH'(j) == rsp_batch_idx)
|
||||
&& ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]);
|
||||
always @(posedge clk) begin
|
||||
if (rsp_wren) begin
|
||||
rsp_store[ibuf_raddr] <= mem_rsp_data_s[i];
|
||||
end
|
||||
end
|
||||
assign rsp_store_n[i][j] = rsp_wren ? mem_rsp_data_s[i] : rsp_store[ibuf_raddr];
|
||||
end
|
||||
for (genvar r = 0; r < CORE_REQS; ++r) begin : g_curr_mask
|
||||
localparam i = r / CORE_CHANNELS;
|
||||
localparam j = r % CORE_CHANNELS;
|
||||
assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j];
|
||||
end
|
||||
|
||||
assign rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~curr_mask;
|
||||
|
||||
wire mem_rsp_fire_s = mem_rsp_valid_s && mem_rsp_ready_s;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (ibuf_push) begin
|
||||
rsp_orig_mask[ibuf_waddr] <= core_req_mask;
|
||||
rsp_rem_mask[ibuf_waddr] <= core_req_mask;
|
||||
end
|
||||
if (mem_rsp_fire_s) begin
|
||||
rsp_rem_mask[ibuf_raddr] <= rsp_rem_mask_n;
|
||||
end
|
||||
end
|
||||
|
||||
assign crsp_valid = mem_rsp_valid_s && rsp_complete;
|
||||
assign crsp_mask = rsp_orig_mask[ibuf_raddr];
|
||||
assign crsp_sop = 1'b1;
|
||||
wire rsp_complete = ~(| rsp_rem_mask_n) || (CORE_REQS == 1);
|
||||
|
||||
for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data
|
||||
localparam i = r / CORE_CHANNELS;
|
||||
localparam j = r % CORE_CHANNELS;
|
||||
assign crsp_data[r] = rsp_store_n[j][i];
|
||||
if (RSP_PARTIAL != 0) begin : g_rsp_partial
|
||||
|
||||
reg [CORE_QUEUE_SIZE-1:0] rsp_sop_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (ibuf_push) begin
|
||||
rsp_sop_r[ibuf_waddr] <= 1;
|
||||
end
|
||||
if (mem_rsp_fire_s) begin
|
||||
rsp_sop_r[ibuf_raddr] <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
assign crsp_valid = mem_rsp_valid_s;
|
||||
assign crsp_mask = curr_mask;
|
||||
assign crsp_sop = rsp_sop_r[ibuf_raddr];
|
||||
|
||||
for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data
|
||||
localparam j = r % CORE_CHANNELS;
|
||||
assign crsp_data[r] = mem_rsp_data_s[j];
|
||||
end
|
||||
|
||||
assign mem_rsp_ready_s = crsp_ready;
|
||||
|
||||
end else begin : g_rsp_full
|
||||
|
||||
wire [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store_n;
|
||||
reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0];
|
||||
|
||||
for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store
|
||||
for (genvar j = 0; j < CORE_BATCHES; ++j) begin : g_j
|
||||
reg [WORD_WIDTH-1:0] rsp_store [0:CORE_QUEUE_SIZE-1];
|
||||
wire rsp_wren = mem_rsp_fire_s
|
||||
&& (BATCH_SEL_WIDTH'(j) == rsp_batch_idx)
|
||||
&& ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]);
|
||||
always @(posedge clk) begin
|
||||
if (rsp_wren) begin
|
||||
rsp_store[ibuf_raddr] <= mem_rsp_data_s[i];
|
||||
end
|
||||
end
|
||||
assign rsp_store_n[i][j] = rsp_wren ? mem_rsp_data_s[i] : rsp_store[ibuf_raddr];
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (ibuf_push) begin
|
||||
rsp_orig_mask[ibuf_waddr] <= core_req_mask;
|
||||
end
|
||||
end
|
||||
|
||||
assign crsp_valid = mem_rsp_valid_s && rsp_complete;
|
||||
assign crsp_mask = rsp_orig_mask[ibuf_raddr];
|
||||
assign crsp_sop = 1'b1;
|
||||
|
||||
for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data
|
||||
localparam i = r / CORE_CHANNELS;
|
||||
localparam j = r % CORE_CHANNELS;
|
||||
assign crsp_data[r] = rsp_store_n[j][i];
|
||||
end
|
||||
|
||||
assign mem_rsp_ready_s = crsp_ready || ~rsp_complete;
|
||||
end
|
||||
|
||||
assign mem_rsp_ready_s = crsp_ready || ~rsp_complete;
|
||||
|
||||
assign crsp_eop = rsp_complete;
|
||||
end
|
||||
|
||||
if (UUID_WIDTH != 0) begin : g_crsp_tag
|
||||
|
@ -503,8 +528,6 @@ module VX_mem_scheduler #(
|
|||
assign crsp_tag = ibuf_dout;
|
||||
end
|
||||
|
||||
assign crsp_eop = rsp_complete;
|
||||
|
||||
// Send response to caller
|
||||
|
||||
VX_elastic_buffer #(
|
||||
|
@ -516,7 +539,7 @@ module VX_mem_scheduler #(
|
|||
.reset (reset),
|
||||
.valid_in (crsp_valid),
|
||||
.ready_in (crsp_ready),
|
||||
.data_in ({crsp_mask, crsp_sop, crsp_eop, crsp_data, crsp_tag}),
|
||||
.data_in ({crsp_mask, crsp_sop, crsp_eop, crsp_data, crsp_tag}),
|
||||
.data_out ({core_rsp_mask, core_rsp_sop, core_rsp_eop, core_rsp_data, core_rsp_tag}),
|
||||
.valid_out (core_rsp_valid),
|
||||
.ready_out (core_rsp_ready)
|
||||
|
@ -584,41 +607,41 @@ module VX_mem_scheduler #(
|
|||
always @(posedge clk) begin
|
||||
if (core_req_fire) begin
|
||||
if (core_req_rw) begin
|
||||
`TRACE(1, ("%t: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask))
|
||||
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS)
|
||||
`TRACE(1, (", byteen="))
|
||||
`TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS)
|
||||
`TRACE(1, (", data="))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS)
|
||||
`TRACE(2, ("%t: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask))
|
||||
`TRACE_ARRAY1D(2, "0x%h", core_req_addr, CORE_REQS)
|
||||
`TRACE(2, (", byteen="))
|
||||
`TRACE_ARRAY1D(2, "0x%h", core_req_byteen, CORE_REQS)
|
||||
`TRACE(2, (", data="))
|
||||
`TRACE_ARRAY1D(2, "0x%0h", core_req_data, CORE_REQS)
|
||||
end else begin
|
||||
`TRACE(1, ("%t: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask))
|
||||
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS)
|
||||
`TRACE(2, ("%t: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask))
|
||||
`TRACE_ARRAY1D(2, "0x%h", core_req_addr, CORE_REQS)
|
||||
end
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid))
|
||||
`TRACE(2, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid))
|
||||
end
|
||||
if (core_rsp_valid && core_rsp_ready) begin
|
||||
`TRACE(1, ("%t: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS)
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid))
|
||||
`TRACE(2, ("%t: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop))
|
||||
`TRACE_ARRAY1D(2, "0x%0h", core_rsp_data, CORE_REQS)
|
||||
`TRACE(2, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid))
|
||||
end
|
||||
if (| mem_req_fire_s) begin
|
||||
if (| mem_req_rw_s) begin
|
||||
`TRACE(1, ("%t: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s))
|
||||
`TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS)
|
||||
`TRACE(1, (", byteen="))
|
||||
`TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS)
|
||||
`TRACE(1, (", data="))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS)
|
||||
`TRACE(2, ("%t: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s))
|
||||
`TRACE_ARRAY1D(2, "0x%h", mem_req_addr_s, CORE_CHANNELS)
|
||||
`TRACE(2, (", byteen="))
|
||||
`TRACE_ARRAY1D(2, "0x%h", mem_req_byteen_s, CORE_CHANNELS)
|
||||
`TRACE(2, (", data="))
|
||||
`TRACE_ARRAY1D(2, "0x%0h", mem_req_data_s, CORE_CHANNELS)
|
||||
end else begin
|
||||
`TRACE(1, ("%t: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s))
|
||||
`TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS)
|
||||
`TRACE(2, ("%t: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s))
|
||||
`TRACE_ARRAY1D(2, "0x%h", mem_req_addr_s, CORE_CHANNELS)
|
||||
end
|
||||
`TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid))
|
||||
`TRACE(2, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid))
|
||||
end
|
||||
if (mem_rsp_fire_s) begin
|
||||
`TRACE(1, ("%t: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS)
|
||||
`TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid))
|
||||
if (mem_rsp_valid_s && mem_rsp_ready_s) begin
|
||||
`TRACE(2, ("%t: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s))
|
||||
`TRACE_ARRAY1D(2, "0x%0h", mem_rsp_data_s, CORE_CHANNELS)
|
||||
`TRACE(2, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -13,11 +13,11 @@
|
|||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
// Fast encoder using parallel prefix computation
|
||||
// Fast one-hot encoder using parallel prefix computation
|
||||
// Adapted from BaseJump STL: http://bjump.org/data_out.html
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_encoder #(
|
||||
module VX_onehot_encoder #(
|
||||
parameter N = 1,
|
||||
parameter REVERSE = 0,
|
||||
parameter MODEL = 1,
|
|
@ -66,11 +66,13 @@ module VX_pending_size #(
|
|||
|
||||
if (INCRW != 1 || DECRW != 1) begin : g_wide_step
|
||||
|
||||
localparam SUBW = `MIN(SIZEW, `MAX(INCRW, DECRW)+1);
|
||||
localparam DELTAW = `MIN(SIZEW, `MAX(INCRW, DECRW)+1);
|
||||
|
||||
logic [SIZEW-1:0] size_n, size_r;
|
||||
|
||||
assign size_n = $signed(size_r) + SIZEW'($signed(SUBW'(incr) - SUBW'(decr)));
|
||||
wire [DELTAW-1:0] delta = DELTAW'(incr) - DELTAW'(decr);
|
||||
|
||||
assign size_n = $signed(size_r) + SIZEW'($signed(delta));
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -80,8 +82,8 @@ module VX_pending_size #(
|
|||
alm_full_r <= 0;
|
||||
size_r <= '0;
|
||||
end else begin
|
||||
`ASSERT((SIZEW'(incr) >= SIZEW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow"));
|
||||
`ASSERT((SIZEW'(incr) <= SIZEW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow"));
|
||||
`ASSERT((DELTAW'(incr) <= DELTAW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow"));
|
||||
`ASSERT((DELTAW'(incr) >= DELTAW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow"));
|
||||
empty_r <= (size_n == SIZEW'(0));
|
||||
full_r <= (size_n == SIZEW'(SIZE));
|
||||
alm_empty_r <= (size_n <= SIZEW'(ALM_EMPTY));
|
||||
|
@ -129,7 +131,7 @@ module VX_pending_size #(
|
|||
wire is_empty_n = (used_r == ADDRW'(1));
|
||||
wire is_full_n = (used_r == ADDRW'(SIZE-1));
|
||||
|
||||
wire [1:0] push_minus_pop = {~incr & decr, incr ^ decr};
|
||||
wire [1:0] delta = {~incr & decr, incr ^ decr};
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -148,7 +150,7 @@ module VX_pending_size #(
|
|||
if (is_empty_n)
|
||||
empty_r <= 1;
|
||||
end
|
||||
used_r <= $signed(used_r) + ADDRW'($signed(push_minus_pop));
|
||||
used_r <= $signed(used_r) + ADDRW'($signed(delta));
|
||||
end
|
||||
end
|
||||
|
||||
|
|
27
hw/rtl/libs/VX_placeholder.sv
Normal file
27
hw/rtl/libs/VX_placeholder.sv
Normal file
|
@ -0,0 +1,27 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
`BLACKBOX_CELL module VX_placeholder #(
|
||||
parameter I = 0,
|
||||
parameter O = 0
|
||||
) (
|
||||
input wire [`UP(I)-1:0] in,
|
||||
output wire [`UP(O)-1:0] out
|
||||
);
|
||||
// empty module
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
|
@ -448,7 +448,7 @@ module VX_rr_arbiter #(
|
|||
end
|
||||
end
|
||||
|
||||
VX_encoder #(
|
||||
VX_onehot_encoder #(
|
||||
.N (NUM_REQS)
|
||||
) onehot_encoder (
|
||||
.data_in (grant_onehot),
|
||||
|
@ -480,12 +480,12 @@ module VX_rr_arbiter #(
|
|||
end
|
||||
end
|
||||
|
||||
VX_decoder #(
|
||||
VX_demux #(
|
||||
.N (LOG_NUM_REQS),
|
||||
.D (NUM_REQS)
|
||||
) grant_decoder (
|
||||
.data_in (grant_index),
|
||||
.valid_in (grant_valid),
|
||||
.sel_in (grant_index),
|
||||
.data_in (1'b1),
|
||||
.data_out (grant_onehot)
|
||||
);
|
||||
|
||||
|
|
|
@ -113,8 +113,7 @@ module VX_scope_tap #(
|
|||
.DATAW (IDLE_CTRW),
|
||||
.SIZE (DEPTH),
|
||||
.OUT_REG (1),
|
||||
.READ_ENABLE (0),
|
||||
.NO_RWCHECK (1)
|
||||
.RDW_MODE ("R")
|
||||
) delta_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -136,8 +135,7 @@ module VX_scope_tap #(
|
|||
.DATAW (DATAW),
|
||||
.SIZE (DEPTH),
|
||||
.OUT_REG (1),
|
||||
.READ_ENABLE (0),
|
||||
.NO_RWCHECK (1)
|
||||
.RDW_MODE ("R")
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -13,6 +13,35 @@
|
|||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`define RAM_INITIALIZATION \
|
||||
if (INIT_ENABLE != 0) begin : g_init \
|
||||
if (INIT_FILE != "") begin : g_file \
|
||||
initial $readmemh(INIT_FILE, ram); \
|
||||
end else begin : g_value \
|
||||
initial begin \
|
||||
for (integer i = 0; i < SIZE; ++i) begin : g_i \
|
||||
ram[i] = INIT_VALUE; \
|
||||
end \
|
||||
end \
|
||||
end \
|
||||
end
|
||||
|
||||
`ifdef QUARTUS
|
||||
`define RAM_ARRAY_WREN reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
|
||||
`define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \
|
||||
if (wren[i]) begin \
|
||||
ram[addr][i] <= wdata[i * WSELW +: WSELW]; \
|
||||
end \
|
||||
end
|
||||
`else
|
||||
`define RAM_ARRAY_WREN reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \
|
||||
if (wren[i]) begin \
|
||||
ram[addr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \
|
||||
end \
|
||||
end
|
||||
`endif
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_sp_ram #(
|
||||
parameter DATAW = 1,
|
||||
|
@ -20,11 +49,9 @@ module VX_sp_ram #(
|
|||
parameter WRENW = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter LUTRAM = 0,
|
||||
parameter NO_RWCHECK = 0,
|
||||
parameter RW_ASSERT = 0,
|
||||
parameter `STRING RDW_MODE = "W", // W: write-first, R: read-first, N: no-change, U: undefined
|
||||
parameter RDW_ASSERT = 0,
|
||||
parameter RESET_RAM = 0,
|
||||
parameter RESET_OUT = 0,
|
||||
parameter READ_ENABLE = 0,
|
||||
parameter INIT_ENABLE = 0,
|
||||
parameter INIT_FILE = "",
|
||||
parameter [DATAW-1:0] INIT_VALUE = 0,
|
||||
|
@ -39,32 +66,442 @@ module VX_sp_ram #(
|
|||
input wire [DATAW-1:0] wdata,
|
||||
output wire [DATAW-1:0] rdata
|
||||
);
|
||||
VX_dp_ram #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.WRENW (WRENW),
|
||||
.OUT_REG (OUT_REG),
|
||||
.LUTRAM (LUTRAM),
|
||||
.NO_RWCHECK (NO_RWCHECK),
|
||||
.RW_ASSERT (RW_ASSERT),
|
||||
.RESET_RAM (RESET_RAM),
|
||||
.RESET_OUT (RESET_OUT),
|
||||
.READ_ENABLE(READ_ENABLE),
|
||||
.INIT_ENABLE(INIT_ENABLE),
|
||||
.INIT_FILE (INIT_FILE),
|
||||
.INIT_VALUE (INIT_VALUE),
|
||||
.ADDRW (ADDRW)
|
||||
) dp_ram (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (read),
|
||||
.write (write),
|
||||
.wren (wren),
|
||||
.waddr (addr),
|
||||
.wdata (wdata),
|
||||
.raddr (addr),
|
||||
.rdata (rdata)
|
||||
);
|
||||
localparam WSELW = DATAW / WRENW;
|
||||
`UNUSED_PARAM (LUTRAM)
|
||||
|
||||
`STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter"))
|
||||
`STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W" || RDW_MODE == "N"), ("invalid parameter"))
|
||||
`UNUSED_PARAM (RDW_ASSERT)
|
||||
|
||||
`ifdef SYNTHESIS
|
||||
localparam FORCE_BRAM = !LUTRAM && (SIZE * DATAW >= `MAX_LUTRAM);
|
||||
if (OUT_REG) begin : g_sync
|
||||
if (FORCE_BRAM) begin : g_bram
|
||||
if (RDW_MODE == "R") begin : g_read_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_no_wren
|
||||
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
ram[addr] <= wdata;
|
||||
end
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else if (RDW_MODE == "W") begin : g_write_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [ADDRW-1:0] addr_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
addr_r <= addr;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[addr_r];
|
||||
end else begin : g_no_wren
|
||||
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
ram[addr] <= wdata;
|
||||
rdata_r <= wdata;
|
||||
end else begin
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else if (RDW_MODE == "N") begin : g_no_change
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end else begin
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_no_wren
|
||||
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
ram[addr] <= wdata;
|
||||
end else begin
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else if (RDW_MODE == "U") begin : g_unknown
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_no_wren
|
||||
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[addr] <= wdata;
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end
|
||||
end else begin : g_auto
|
||||
if (RDW_MODE == "R") begin : g_read_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_no_wren
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
ram[addr] <= wdata;
|
||||
end
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else if (RDW_MODE == "W") begin : g_write_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [ADDRW-1:0] addr_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
addr_r <= addr;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[addr_r];
|
||||
end else begin : g_no_wren
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
ram[addr] <= wdata;
|
||||
rdata_r <= wdata;
|
||||
end else begin
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else if (RDW_MODE == "N") begin : g_no_change
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end else begin
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_no_wren
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
if (write) begin
|
||||
ram[addr] <= wdata;
|
||||
end else begin
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else if (RDW_MODE == "U") begin : g_unknown
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_no_wren
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[addr] <= wdata;
|
||||
end
|
||||
if (read) begin
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin : g_async
|
||||
`UNUSED_VAR (read)
|
||||
if (FORCE_BRAM) begin : g_bram
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
`ifdef VIVADO
|
||||
VX_async_ram_patch #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.WRENW (WRENW),
|
||||
.DUAL_PORT (0),
|
||||
.INIT_ENABLE(INIT_ENABLE),
|
||||
.INIT_FILE (INIT_FILE),
|
||||
.INIT_VALUE (INIT_VALUE)
|
||||
) async_ram_patch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (read),
|
||||
.write (write),
|
||||
.wren (wren),
|
||||
.waddr (addr),
|
||||
.wdata (wdata),
|
||||
.raddr (addr),
|
||||
.rdata (rdata)
|
||||
);
|
||||
`else
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
end
|
||||
assign rdata = ram[addr];
|
||||
end else begin : g_no_wren
|
||||
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[addr] <= wdata;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[addr];
|
||||
end
|
||||
`endif
|
||||
end else begin : g_read_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
end
|
||||
assign rdata = ram[addr];
|
||||
end else begin : g_no_wren
|
||||
`NO_RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[addr] <= wdata;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[addr];
|
||||
end
|
||||
end
|
||||
end else begin : g_auto
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
end
|
||||
assign rdata = ram[addr];
|
||||
end else begin : g_no_wren
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[addr] <= wdata;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[addr];
|
||||
end
|
||||
end else begin : g_read_first
|
||||
if (WRENW != 1) begin : g_wren
|
||||
`NO_RW_RAM_CHECK `RAM_ARRAY_WREN
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
`RAM_WRITE_WREN
|
||||
end
|
||||
end
|
||||
assign rdata = ram[addr];
|
||||
end else begin : g_no_wren
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
if (write) begin
|
||||
ram[addr] <= wdata;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[addr];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
`else
|
||||
// simulation
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (RESET_RAM && reset) begin
|
||||
for (integer i = 0; i < SIZE; ++i) begin
|
||||
ram[i] <= DATAW'(INIT_VALUE);
|
||||
end
|
||||
end else if (write) begin
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i]) begin
|
||||
ram[addr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (OUT_REG) begin : g_sync
|
||||
if (RDW_MODE == "R") begin : g_read_first
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else if (RDW_MODE == "W") begin : g_write_first
|
||||
reg [ADDRW-1:0] addr_r;
|
||||
always @(posedge clk) begin
|
||||
if (read || write) begin
|
||||
addr_r <= addr;
|
||||
end
|
||||
end
|
||||
assign rdata = ram[addr_r];
|
||||
end else if (RDW_MODE == "N") begin : g_no_change
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read && ~write) begin
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else if (RDW_MODE == "U") begin : g_unknown
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
always @(posedge clk) begin
|
||||
if (read) begin
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end
|
||||
end else begin : g_async
|
||||
`UNUSED_VAR (read)
|
||||
if (RDW_MODE == "W") begin : g_write_first
|
||||
assign rdata = ram[addr];
|
||||
end else begin : g_read_first
|
||||
reg [DATAW-1:0] prev_data;
|
||||
reg [ADDRW-1:0] prev_waddr;
|
||||
reg prev_write;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
prev_write <= 0;
|
||||
prev_data <= '0;
|
||||
prev_waddr <= '0;
|
||||
end else begin
|
||||
prev_write <= write;
|
||||
prev_data <= ram[addr];
|
||||
prev_waddr <= addr;
|
||||
end
|
||||
end
|
||||
assign rdata = (prev_write && (prev_waddr == addr)) ? prev_data : ram[addr];
|
||||
if (RDW_ASSERT) begin : g_rw_asert
|
||||
`RUNTIME_ASSERT(~read || (rdata == ram[addr]), ("%t: read after write hazard", $time))
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -64,12 +64,12 @@ module VX_stream_xbar #(
|
|||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_sel_in_decoders
|
||||
VX_decoder #(
|
||||
VX_demux #(
|
||||
.N (OUT_WIDTH),
|
||||
.D (NUM_OUTPUTS)
|
||||
) sel_in_decoder (
|
||||
.data_in (sel_in[i]),
|
||||
.valid_in (valid_in[i]),
|
||||
) sel_in_demux (
|
||||
.sel_in (sel_in[i]),
|
||||
.data_in (valid_in[i]),
|
||||
.data_out (per_output_valid_in[i])
|
||||
);
|
||||
assign ready_in[i] = | per_output_ready_in_w[i];
|
||||
|
@ -137,12 +137,12 @@ module VX_stream_xbar #(
|
|||
wire [NUM_OUTPUTS-1:0] valid_out_w, ready_out_w;
|
||||
wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w;
|
||||
|
||||
VX_decoder #(
|
||||
VX_demux #(
|
||||
.N (OUT_WIDTH),
|
||||
.D (NUM_OUTPUTS)
|
||||
) sel_in_decoder (
|
||||
.data_in (sel_in[0]),
|
||||
.valid_in (valid_in[0]),
|
||||
) sel_in_demux (
|
||||
.sel_in (sel_in[0]),
|
||||
.data_in (valid_in[0]),
|
||||
.data_out (valid_out_w)
|
||||
);
|
||||
|
||||
|
|
|
@ -60,11 +60,11 @@ module VX_gbar_unit #(
|
|||
`ifdef DBG_TRACE_GBAR
|
||||
always @(posedge clk) begin
|
||||
if (gbar_bus_if.req_valid && gbar_bus_if.req_ready) begin
|
||||
`TRACE(1, ("%t: %s acquire: bar_id=%0d, size=%0d, core_id=%0d\n",
|
||||
`TRACE(2, ("%t: %s acquire: bar_id=%0d, size=%0d, core_id=%0d\n",
|
||||
$time, INSTANCE_ID, gbar_bus_if.req_id, gbar_bus_if.req_size_m1, gbar_bus_if.req_core_id))
|
||||
end
|
||||
if (gbar_bus_if.rsp_valid) begin
|
||||
`TRACE(1, ("%t: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id))
|
||||
`TRACE(2, ("%t: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -167,9 +167,8 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
.SIZE (WORDS_PER_BANK),
|
||||
.WRENW (WORD_SIZE),
|
||||
.OUT_REG (1),
|
||||
.READ_ENABLE (0),
|
||||
.NO_RWCHECK (1)
|
||||
) data_store (
|
||||
.RDW_MODE ("R")
|
||||
) lmem_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (per_bank_req_valid[i] && per_bank_req_ready[i] && ~per_bank_req_rw[i]),
|
||||
|
@ -330,15 +329,15 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
always @(posedge clk) begin
|
||||
if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin
|
||||
if (mem_bus_if[i].req_data.rw) begin
|
||||
`TRACE(1, ("%t: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n",
|
||||
`TRACE(2, ("%t: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i]))
|
||||
end else begin
|
||||
`TRACE(1, ("%t: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
`TRACE(2, ("%t: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, req_uuid[i]))
|
||||
end
|
||||
end
|
||||
if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin
|
||||
`TRACE(1, ("%t: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n",
|
||||
`TRACE(2, ("%t: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n",
|
||||
$time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i]))
|
||||
end
|
||||
end
|
||||
|
|
525
hw/scripts/xilinx_async_bram_patch.tcl
Normal file
525
hw/scripts/xilinx_async_bram_patch.tcl
Normal file
|
@ -0,0 +1,525 @@
|
|||
namespace eval vortex {
|
||||
|
||||
variable debug 0
|
||||
|
||||
proc print_error {msg {do_exit 1}} {
|
||||
if {$do_exit} {
|
||||
puts "ERROR: $msg"
|
||||
exit -1
|
||||
} else {
|
||||
puts "WARNING: $msg"
|
||||
}
|
||||
}
|
||||
|
||||
proc str_replace {str match repl} {
|
||||
set result ""
|
||||
regsub $match $str $repl result
|
||||
return $result
|
||||
}
|
||||
|
||||
proc unique_cell_name {name} {
|
||||
if {[get_cells -quiet $name] == {}} { return $name }
|
||||
set index 0
|
||||
while {[get_cells -quiet ${name}_${index}] != {}} { incr index }
|
||||
return ${name}_${index}
|
||||
}
|
||||
|
||||
proc unique_net_name {name} {
|
||||
if {[get_nets -quiet $name] == {}} { return $name }
|
||||
set index 0
|
||||
while {[get_nets -quiet ${name}_${index}] != {}} { incr index }
|
||||
return ${name}_${index}
|
||||
}
|
||||
|
||||
proc find_nested_cells {parent name_match {should_exist 1}} {
|
||||
set matching_cells {}
|
||||
foreach cell [get_cells -hierarchical -include_replicated_objects -filter "PARENT == $parent"] {
|
||||
set name [get_property NAME $cell]
|
||||
if {[regexp $name_match $name]} {
|
||||
lappend matching_cells $cell
|
||||
}
|
||||
}
|
||||
if {[llength $matching_cells] == 0} {
|
||||
print_error "No matching cell found for '$parent' matching '$name_match'." $should_exist
|
||||
}
|
||||
return $matching_cells
|
||||
}
|
||||
|
||||
proc find_nested_cell {parent name_match} {
|
||||
foreach cell [get_cells -hierarchical -filter "PARENT == $parent"] {
|
||||
set name [get_property NAME $cell]
|
||||
if {$name == $name_match} {
|
||||
return $cell
|
||||
}
|
||||
}
|
||||
puts "ERROR: No matching cell found for '$parent' matching '$name_match'."
|
||||
exit -1
|
||||
}
|
||||
|
||||
proc find_cell_nets {cell name_match {should_exist 1}} {
|
||||
set matching_nets {}
|
||||
foreach net [get_nets -hierarchical -filter "PARENT_CELL == $cell"] {
|
||||
set name [get_property NAME $net]
|
||||
if {[regexp $name_match $name]} {
|
||||
lappend matching_nets $net
|
||||
}
|
||||
}
|
||||
if {[llength $matching_nets] == 0} {
|
||||
print_error "No matching net found for '$cell' matching '$name_match'." $should_exist
|
||||
}
|
||||
return $matching_nets
|
||||
}
|
||||
|
||||
proc get_cell_net {cell name_match} {
|
||||
foreach net [get_nets -hierarchical -filter "PARENT_CELL == $cell"] {
|
||||
set name [get_property NAME $net]
|
||||
if {$name == $name_match} {
|
||||
return $net
|
||||
}
|
||||
}
|
||||
puts "ERROR: No matching net found for '$cell' matching '$name_match'."
|
||||
exit -1
|
||||
}
|
||||
|
||||
proc find_cell_pins {cell name_match {should_exist 1}} {
|
||||
set matching_pins {}
|
||||
foreach pin [get_pins -of_objects $cell] {
|
||||
set name [get_property NAME $pin]
|
||||
if {[regexp $name_match $name]} {
|
||||
lappend matching_pins $pin
|
||||
}
|
||||
}
|
||||
if {[llength $matching_pins] == 0} {
|
||||
print_error "No matching pin found for '$cell' matching '$name_match'." $should_exist
|
||||
}
|
||||
return $matching_pins
|
||||
}
|
||||
|
||||
proc get_cell_pin {cell name_match} {
|
||||
foreach pin [get_pins -of_objects $cell] {
|
||||
set name [get_property NAME $pin]
|
||||
if {$name == $name_match} {
|
||||
return $pin
|
||||
}
|
||||
}
|
||||
puts "ERROR: No matching pin found for '$cell' matching '$name_match'."
|
||||
exit -1
|
||||
}
|
||||
|
||||
proc replace_pin_source {pin source_pin} {
|
||||
variable debug
|
||||
|
||||
# Disconnect existing net from pin
|
||||
set net [get_nets -of_objects $pin]
|
||||
if {[llength $net] == 1} {
|
||||
disconnect_net -net $net -objects $pin
|
||||
if {$debug} {puts "DEBUG: Disconnected net '$net' from pin '$pin'."}
|
||||
} elseif {[llength $net] > 1} {
|
||||
puts "ERROR: Multiple nets connected to pin '$pin'."
|
||||
exit -1
|
||||
} else {
|
||||
puts "WARNING: No net connected to pin '$pin'."
|
||||
}
|
||||
|
||||
set source_net [get_nets -quiet -of_objects $source_pin]
|
||||
if {[llength $source_net] == 0} {
|
||||
# Create a new net if none exists
|
||||
set source_cell [get_cells -of_objects $source_pin]
|
||||
set net_name [unique_net_name "${source_cell}_net"]
|
||||
set source_net [create_net $net_name]
|
||||
if {$debug} {puts "DEBUG: Created source_net: '$source_net'"}
|
||||
# Connect the source pin to the new net
|
||||
connect_net -net $source_net -objects $source_pin -hierarchical
|
||||
if {$debug} {puts "DEBUG: Connected net '$source_net' to pin '$source_pin'."}
|
||||
} elseif {[llength $source_net] > 1} {
|
||||
puts "ERROR: Multiple nets connected to pin '$source_pin'."
|
||||
exit -1
|
||||
}
|
||||
|
||||
# Connect pin to the new source net
|
||||
connect_net -net $source_net -objects $pin -hierarchical
|
||||
if {$debug} {puts "DEBUG: Connected net '$source_net' to pin '$pin'."}
|
||||
}
|
||||
|
||||
proc create_register_next {reg_cell prefix_name} {
|
||||
variable debug
|
||||
|
||||
set reg_d_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/D"}]
|
||||
if {[llength $reg_d_pin] == 0} {
|
||||
puts "ERROR: No D pin found on register cell '$reg_cell'."
|
||||
exit -1
|
||||
} elseif {[llength $reg_d_pin] > 1} {
|
||||
puts "ERROR: Multiple D pins found on register cell '$reg_cell'."
|
||||
exit -1
|
||||
}
|
||||
|
||||
if {$debug} {puts "DEBUG: reg_d_pin: '$reg_d_pin'"}
|
||||
|
||||
set reg_d_src_pin [find_pin_driver $reg_d_pin]
|
||||
if {$reg_d_src_pin == ""} {
|
||||
puts "ERROR: No source pin found connected to '$reg_d_pin'."
|
||||
exit -1
|
||||
}
|
||||
|
||||
if {$debug} {puts "DEBUG: reg_d_src_pin: '$reg_d_src_pin'"}
|
||||
|
||||
set reg_r_src_pin ""
|
||||
|
||||
set register_type [get_property REF_NAME $reg_cell]
|
||||
if {$register_type == "FDRE"} {
|
||||
set reg_r_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/R"}]
|
||||
if {[llength $reg_r_pin] == 0} {
|
||||
puts "ERROR: No R pin found on FDRE cell '$reg_cell'."
|
||||
exit -1
|
||||
} elseif {[llength $reg_r_pin] > 1} {
|
||||
puts "ERROR: Multiple R pins found on FDRE cell '$reg_cell'."
|
||||
exit -1
|
||||
}
|
||||
|
||||
if {$debug} {puts "DEBUG: reg_r_pin: '$reg_r_pin'"}
|
||||
|
||||
set reg_r_src_pin [find_pin_driver $reg_r_pin]
|
||||
if {$reg_r_src_pin == ""} {
|
||||
puts "ERROR: No source pin found connected to '$reg_r_pin'."
|
||||
exit -1
|
||||
}
|
||||
} elseif {$register_type == "FDSE"} {
|
||||
set reg_s_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/S"}]
|
||||
if {[llength $reg_s_pin] == 0} {
|
||||
puts "ERROR: No S pin found on FDSE cell '$reg_cell'."
|
||||
exit -1
|
||||
} elseif {[llength $reg_s_pin] > 1} {
|
||||
puts "ERROR: Multiple S pins found on FDSE cell '$reg_cell'."
|
||||
exit -1
|
||||
}
|
||||
|
||||
if {$debug} {puts "DEBUG: reg_s_pin: '$reg_s_pin'"}
|
||||
|
||||
set reg_r_src_pin [find_pin_driver $reg_s_pin]
|
||||
if {$reg_r_src_pin == ""} {
|
||||
puts "ERROR: No source pin found connected to '$reg_s_pin'."
|
||||
exit -1
|
||||
}
|
||||
} else {
|
||||
puts "ERROR: Unsupported register type: '$register_type'."
|
||||
exit 1
|
||||
}
|
||||
|
||||
if {$debug} {puts "DEBUG: reg_r_src_pin: '$reg_r_src_pin'"}
|
||||
|
||||
set reg_d_src_net [get_nets -of_objects $reg_d_src_pin]
|
||||
if {[llength $reg_d_src_net] == 0} {
|
||||
puts "ERROR: Unable to get source nets for pins."
|
||||
exit -1
|
||||
} elseif {[llength $reg_d_src_net] > 1} {
|
||||
puts "ERROR: Multiple source nets found for pins."
|
||||
exit -1
|
||||
}
|
||||
|
||||
set reg_r_src_net [get_nets -of_objects $reg_r_src_pin]
|
||||
if {[llength $reg_r_src_net] == 0} {
|
||||
puts "ERROR: Unable to get source nets for pins."
|
||||
exit -1
|
||||
} elseif {[llength $reg_r_src_net] > 1} {
|
||||
puts "ERROR: Multiple source nets found for pins."
|
||||
exit -1
|
||||
}
|
||||
|
||||
# Create a MUX cell to implement register next value
|
||||
# Use a 2x1 LUT to describe the logic:
|
||||
# FDRE: O = I1 ? 0 : I0; where I0=D, I1=R
|
||||
# FDSE: O = I1 ? 1 : I0; where I0=D, I1=S
|
||||
set lut_name [unique_cell_name $prefix_name]
|
||||
set lut_cell [create_cell -reference LUT2 $lut_name]
|
||||
puts "INFO: Created lut cell: '$lut_cell'"
|
||||
|
||||
if {$register_type == "FDRE"} {
|
||||
set_property INIT 4'b0010 $lut_cell
|
||||
} elseif {$register_type == "FDSE"} {
|
||||
set_property INIT 4'b1110 $lut_cell
|
||||
} else {
|
||||
puts "ERROR: Unsupported register type: '$register_type'."
|
||||
exit 1
|
||||
}
|
||||
|
||||
set lut_i0_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/I0"}]
|
||||
if {[llength $lut_i0_pin] == 0} {
|
||||
puts "ERROR: No I0 pin found on FDSE cell '$lut_cell'."
|
||||
exit -1
|
||||
} elseif {[llength $lut_i0_pin] > 1} {
|
||||
puts "ERROR: Multiple I0 pins found on FDSE cell '$lut_cell'."
|
||||
exit -1
|
||||
}
|
||||
|
||||
set lut_i1_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/I1"}]
|
||||
if {[llength $lut_i1_pin] == 0} {
|
||||
puts "ERROR: No I1 pin found on FDSE cell '$lut_cell'."
|
||||
exit -1
|
||||
} elseif {[llength $lut_i1_pin] > 1} {
|
||||
puts "ERROR: Multiple I1 pins found on FDSE cell '$lut_cell'."
|
||||
exit -1
|
||||
}
|
||||
|
||||
set lut_o_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/O"}]
|
||||
if {[llength $lut_o_pin] == 0} {
|
||||
puts "ERROR: No O pin found on FDSE cell '$lut_cell'."
|
||||
exit -1
|
||||
} elseif {[llength $lut_o_pin] > 1} {
|
||||
puts "ERROR: Multiple O pins found on FDSE cell '$lut_cell'."
|
||||
exit -1
|
||||
}
|
||||
|
||||
connect_net -net $reg_d_src_net -objects $lut_i0_pin -hierarchical
|
||||
if {$debug} {puts "DEBUG: Connected net '$reg_d_src_net' to pin '$lut_i0_pin'."}
|
||||
|
||||
connect_net -net $reg_r_src_net -objects $lut_i1_pin -hierarchical
|
||||
if {$debug} {puts "DEBUG: Connected net '$reg_r_src_net' to pin '$lut_i1_pin'."}
|
||||
|
||||
return $lut_o_pin
|
||||
}
|
||||
|
||||
proc getOrCreateVCCPin {prefix_name} {
|
||||
variable debug
|
||||
|
||||
set vcc_cell ""
|
||||
set vcc_cells [get_cells -quiet -filter {REF_NAME == VCC}]
|
||||
if {[llength $vcc_cells] == 0} {
|
||||
set cell_name [unique_cell_name $prefix_name]
|
||||
set vcc_cell [create_cell -reference VCC $cell_name]
|
||||
puts "INFO: Created VCC cell: '$vcc_cell'"
|
||||
} else {
|
||||
set vcc_cell [lindex $vcc_cells 0]
|
||||
}
|
||||
set vcc_pin [get_pins -of_objects $vcc_cell -filter {NAME =~ "*/P"}]
|
||||
if {[llength $vcc_pin] == 0} {
|
||||
puts "ERROR: No VCC pin found on VCC cell '$vcc_cell'."
|
||||
exit -1
|
||||
} elseif {[llength $vcc_pin] > 1} {
|
||||
puts "ERROR: Multiple VCC pins found on VCC cell '$vcc_cell'."
|
||||
exit -1
|
||||
}
|
||||
return $vcc_pin
|
||||
}
|
||||
|
||||
proc getOrCreateGNDPin {prefix_name} {
|
||||
variable debug
|
||||
|
||||
set gnd_cell ""
|
||||
set gnd_cells [get_cells -quiet -filter {REF_NAME == GND}]
|
||||
if {[llength $gnd_cells] == 0} {
|
||||
set cell_name [unique_cell_name $prefix_name]
|
||||
set gnd_cell [create_cell -reference GND $cell_name]
|
||||
puts "INFO: Created GND cell: '$gnd_cell'"
|
||||
} else {
|
||||
set gnd_cell [lindex $gnd_cells 0]
|
||||
}
|
||||
set gnd_pin [get_pins -of_objects $gnd_cell -filter {NAME =~ "*/G"}]
|
||||
if {[llength $gnd_pin] == 0} {
|
||||
puts "ERROR: No GND pin found on GND cell '$gnd_cell'."
|
||||
exit -1
|
||||
} elseif {[llength $gnd_pin] > 1} {
|
||||
puts "ERROR: Multiple GND pins found on GND cell '$gnd_cell'."
|
||||
exit -1
|
||||
}
|
||||
return $gnd_pin
|
||||
}
|
||||
|
||||
proc find_net_sinks {input_net {should_exist 1}} {
|
||||
set sink_pins {}
|
||||
foreach pin [get_pins -quiet -leaf -of_objects $input_net -filter {DIRECTION == "IN"}] {
|
||||
lappend sink_pins $pin
|
||||
}
|
||||
foreach port [get_ports -quiet -of_objects $input_net -filter {DIRECTION == "OUT"}] {
|
||||
lappend sink_pins $port
|
||||
}
|
||||
if {[llength $sink_pins] == 0} {
|
||||
print_error "No sink found for '$input_net'." $should_exist
|
||||
}
|
||||
return $sink_pins
|
||||
}
|
||||
|
||||
proc find_net_driver {input_net {should_exist 1}} {
|
||||
set driverPins [get_pins -quiet -leaf -of_objects $input_net -filter {DIRECTION == "OUT"}]
|
||||
if {[llength $driverPins] == 0} {
|
||||
set driverPorts [get_ports -quiet -of_objects $input_net -filter {DIRECTION == "IN"}]
|
||||
if {[llength $driverPorts] == 0} {
|
||||
print_error "No driver found for '$input_net'." $should_exist
|
||||
} elseif {[llength $driverPorts] > 1} {
|
||||
puts "WARNING: Multiple driver ports found for '$input_net'."
|
||||
return [lindex $driverPorts 0]
|
||||
}
|
||||
return $driverPorts
|
||||
} elseif {[llength $driverPins] > 1} {
|
||||
puts "WARNING: Multiple driver pins found for '$input_net'."
|
||||
return [lindex $driverPins 0]
|
||||
}
|
||||
return $driverPins
|
||||
}
|
||||
|
||||
proc find_pin_driver {input_pin {should_exist 1}} {
|
||||
set net [get_nets -quiet -of_objects $input_pin]
|
||||
if {[llength $net] == 0} {
|
||||
print_error "No net connected to pin '$input_pin'." $should_exist
|
||||
} elseif {[llength $net] > 1} {
|
||||
puts "ERROR: Multiple nets connected to pin '$input_pin'."
|
||||
exit -1
|
||||
}
|
||||
return [find_net_driver $net]
|
||||
}
|
||||
|
||||
proc find_matching_nets {cell nets match repl} {
|
||||
set matching_nets {}
|
||||
foreach net $nets {
|
||||
set net_name [str_replace $net $match $repl]
|
||||
set matching_net [get_cell_net $cell $net_name]
|
||||
if {$matching_net != ""} {
|
||||
lappend matching_nets $matching_net
|
||||
}
|
||||
}
|
||||
if {[llength $matching_nets] == 0} {
|
||||
puts "ERROR: No matching nets found for '$nets'."
|
||||
exit -1
|
||||
} elseif {[llength $matching_nets] != [llength $nets]} {
|
||||
puts "ERROR: Mismatch in number of matching nets."
|
||||
exit -1
|
||||
}
|
||||
return $matching_nets
|
||||
}
|
||||
|
||||
proc replace_net_source {net source_pin} {
|
||||
foreach pin [find_net_sinks $net 0] {
|
||||
replace_pin_source $pin $source_pin
|
||||
}
|
||||
}
|
||||
|
||||
proc resolve_async_bram {inst} {
|
||||
variable debug
|
||||
|
||||
puts "INFO: Resolving asynchronous BRAM patch: '$inst'."
|
||||
|
||||
set raddr_w_nets [find_cell_nets $inst "raddr_w(\\\[\\d+\\\])?$"]
|
||||
set read_s_net [find_cell_nets $inst "read_s$"]
|
||||
set is_raddr_reg_net [find_cell_nets $inst "is_raddr_reg$"]
|
||||
|
||||
set raddr_s_nets [find_matching_nets $inst $raddr_w_nets "raddr_w(\\\[\\d+\\\])?$" "raddr_s\\1"]
|
||||
|
||||
set reg_next_pins {}
|
||||
set reg_ce_src_pin ""
|
||||
|
||||
foreach raddr_w_net $raddr_w_nets {
|
||||
if {$debug} {puts "DEBUG: Processing raddr_w net: '$raddr_w_net'"}
|
||||
|
||||
# Find raddr_w_net's driver pin
|
||||
set raddr_src_pin [find_net_driver $raddr_w_net]
|
||||
if {$debug} {puts "DEBUG: raddr_src_pin: '$raddr_src_pin'"}
|
||||
|
||||
# Get the driver cell
|
||||
set raddr_src_cell [get_cells -of_objects $raddr_src_pin]
|
||||
if {[llength $raddr_src_cell] == 0} {
|
||||
puts "ERROR: No source cell found connected to pin '$raddr_src_pin'."
|
||||
exit -1
|
||||
} elseif {[llength $raddr_src_cell] > 1} {
|
||||
puts "ERROR: Multiple source cells found connected to pin '$raddr_src_pin'."
|
||||
exit -1
|
||||
}
|
||||
|
||||
# Check driver type
|
||||
set driver_type [get_property REF_NAME $raddr_src_cell]
|
||||
if {$driver_type == "FDRE" || $driver_type == "FDSE"} {
|
||||
if {$debug} {puts "DEBUG: Net '$raddr_w_net' is registered, driver_type='$driver_type'"}
|
||||
} else {
|
||||
puts "WARNING: Net '$raddr_w_net' is not be registered, driver_type='$driver_type'"
|
||||
break
|
||||
}
|
||||
|
||||
# Create register next cell and return output pin
|
||||
set reg_next_pin [create_register_next $raddr_src_cell "$inst/raddr_next"]
|
||||
if {$reg_next_pin == ""} {
|
||||
puts "ERROR: failed to create register next value for '$raddr_src_cell'."
|
||||
exit -1
|
||||
}
|
||||
if {$debug} {puts "DEBUG: reg_next_pin: '$reg_next_pin'"}
|
||||
|
||||
lappend reg_next_pins $reg_next_pin
|
||||
|
||||
# Find the CE pin on raddr_src_cell
|
||||
if {$reg_ce_src_pin == ""} {
|
||||
set reg_ce_pin [get_pins -of_objects $raddr_src_cell -filter {NAME =~ "*/CE"}]
|
||||
if {[llength $reg_ce_pin] == 0} {
|
||||
puts "ERROR: No CE pin found on register cell '$raddr_src_cell'."
|
||||
exit -1
|
||||
} elseif {[llength $reg_ce_pin] > 1} {
|
||||
puts "ERROR: Multiple CE pins found on register cell '$raddr_src_cell'."
|
||||
exit -1
|
||||
}
|
||||
if {$debug} {puts "DEBUG: reg_ce_pin: '$reg_ce_pin'"}
|
||||
|
||||
set reg_ce_src_pin [find_pin_driver $reg_ce_pin]
|
||||
if {$reg_ce_src_pin == ""} {
|
||||
puts "ERROR: No source pin found connected to '$reg_ce_pin'."
|
||||
exit -1
|
||||
}
|
||||
if {$debug} {puts "DEBUG: reg_ce_src_pin: '$reg_ce_src_pin'"}
|
||||
}
|
||||
}
|
||||
|
||||
# do we have a fully registered read address?
|
||||
if {[llength $reg_next_pins] == [llength $raddr_w_nets]} {
|
||||
puts "INFO: Fully registered read address detected."
|
||||
set addr_width [llength $raddr_w_nets]
|
||||
for {set addr_idx 0} {$addr_idx < $addr_width} {incr addr_idx} {
|
||||
set raddr_w_net [lindex $raddr_w_nets $addr_idx]
|
||||
set raddr_s_net [lindex $raddr_s_nets $addr_idx]
|
||||
set reg_next_pin [lindex $reg_next_pins $addr_idx]
|
||||
puts "INFO: Connecting pin '$reg_next_pin' to '$raddr_s_net's pins."
|
||||
# Connect reg_next_pin to all input pins attached to raddr_s_net
|
||||
replace_net_source $raddr_s_net $reg_next_pin
|
||||
}
|
||||
|
||||
# Connect reg_ce_src_pin to all input pins attached to read_s_net
|
||||
puts "INFO: Connecting pin '$reg_ce_src_pin' to '$read_s_net's pins."
|
||||
replace_net_source $read_s_net $reg_ce_src_pin
|
||||
|
||||
# Create Const<1>'s pin
|
||||
set vcc_pin [getOrCreateVCCPin "$inst/VCC"]
|
||||
|
||||
# Connect vcc_pin to all input pins attached to is_raddr_reg_net
|
||||
puts "INFO: Connecting pin '$vcc_pin' to '$is_raddr_reg_net's pins."
|
||||
replace_net_source $is_raddr_reg_net $vcc_pin
|
||||
} else {
|
||||
puts "WARNING: Not all read addresses are registered!"
|
||||
|
||||
# Create Const<0>'s pin
|
||||
set gnd_pin [getOrCreateGNDPin "$inst/GND"]
|
||||
|
||||
# Connect gnd_pin to all input pins attached to is_raddr_reg_net
|
||||
puts "INFO: Connecting pin '$gnd_pin' to '$is_raddr_reg_net's pins."
|
||||
replace_net_source $is_raddr_reg_net $gnd_pin
|
||||
}
|
||||
|
||||
# Remove all placeholder cells
|
||||
foreach cell [find_nested_cells $inst "placeholder$"] {
|
||||
remove_cell $cell
|
||||
if {$debug} {puts "DEBUG: Cell '$cell' was removed successfully."}
|
||||
}
|
||||
}
|
||||
|
||||
proc resolve_async_brams {} {
|
||||
set bram_patch_cells {}
|
||||
foreach cell [get_cells -hierarchical -filter {REF_NAME =~ "*VX_async_ram_patch*"}] {
|
||||
puts "INFO: Found async BRAM patch cell: '$cell'."
|
||||
lappend bram_patch_cells $cell
|
||||
}
|
||||
if {[llength $bram_patch_cells] != 0} {
|
||||
foreach cell $bram_patch_cells {
|
||||
resolve_async_bram $cell
|
||||
}
|
||||
} else {
|
||||
puts "INFO: No async BRAM patch cells found in the design."
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
# Invoke the procedure to resolve async BRAM
|
||||
vortex::resolve_async_brams
|
71
hw/scripts/xilinx_export_netlist.tcl
Normal file
71
hw/scripts/xilinx_export_netlist.tcl
Normal file
|
@ -0,0 +1,71 @@
|
|||
# Function to export netlist to a Graphviz DOT file
|
||||
proc export_netlist {dot_file_name} {
|
||||
# Open the DOT file for writing
|
||||
set dot_file [open $dot_file_name "w"]
|
||||
|
||||
# Start the DOT graph definition
|
||||
puts $dot_file "digraph Netlist {"
|
||||
puts $dot_file "rankdir=LR;" ;# Set the graph direction from left to right
|
||||
|
||||
# Extract and add cells to the graph
|
||||
foreach cell [get_cells -hierarchical] {
|
||||
set cell_name [get_property NAME $cell]
|
||||
set cell_type [get_property REF_NAME $cell]
|
||||
puts $dot_file "\"$cell_name\" \[label=\"$cell_name\\n($cell_type)\", shape=box\];"
|
||||
}
|
||||
|
||||
# Extract and add ports to the graph
|
||||
foreach port [get_ports] {
|
||||
set port_name [get_property NAME $port]
|
||||
set direction [get_property DIRECTION $port]
|
||||
set shape "ellipse"
|
||||
|
||||
# Color code input and output ports for easier identification
|
||||
if {$direction == "IN"} {
|
||||
set color "lightblue"
|
||||
} else {
|
||||
set color "lightgreen"
|
||||
}
|
||||
puts $dot_file "\"$port_name\" \[label=\"$port_name\", shape=$shape, style=filled, fillcolor=$color\];"
|
||||
}
|
||||
|
||||
# Traverse nets and create edges between ports and pins
|
||||
foreach net [get_nets -hierarchical] {
|
||||
set net_name [get_property NAME $net]
|
||||
|
||||
# Find source and destination pins
|
||||
set source_pin ""
|
||||
set sink_pins {}
|
||||
|
||||
foreach pin [get_pins -of_objects $net] {
|
||||
set direction [get_property DIRECTION $pin]
|
||||
set cell [get_cells -of_objects $pin]
|
||||
set pin_name [get_property NAME $pin]
|
||||
|
||||
if {$direction == "OUT"} {
|
||||
# Set as source pin
|
||||
set source_pin "$cell/$pin_name"
|
||||
} else {
|
||||
# Collect as sink pin
|
||||
lappend sink_pins "$cell/$pin_name"
|
||||
}
|
||||
}
|
||||
|
||||
# Output edges from source to all sinks
|
||||
if {$source_pin != ""} {
|
||||
foreach sink_pin $sink_pins {
|
||||
puts $dot_file "\"$source_pin\" -> \"$sink_pin\" \[label=\"$net_name\"\];"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# End the DOT graph definition
|
||||
puts $dot_file "}"
|
||||
|
||||
# Close the DOT file
|
||||
close $dot_file
|
||||
puts "Netlist exported to DOT file: $dot_file_name"
|
||||
}
|
||||
|
||||
# Run the export function
|
||||
export_netlist "netlist.dot"
|
|
@ -31,9 +31,9 @@ project_1/sources.txt:
|
|||
build: $(PROJECT).xpr
|
||||
$(PROJECT).xpr: project_1/sources.txt
|
||||
ifdef FPU_IP
|
||||
MAX_JOBS=$(JOBS) FPU_IP=project_1/ip $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR)
|
||||
MAX_JOBS=$(JOBS) FPU_IP=project_1/ip SCRIPT_DIR=$(SCRIPT_DIR) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc
|
||||
else
|
||||
MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR)
|
||||
MAX_JOBS=$(JOBS) SCRIPT_DIR=$(SCRIPT_DIR) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc
|
||||
endif
|
||||
|
||||
clean:
|
||||
|
|
|
@ -14,9 +14,9 @@
|
|||
# Start time
|
||||
set start_time [clock seconds]
|
||||
|
||||
if { $::argc != 5 } {
|
||||
puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n"
|
||||
puts "Usage: $::argv0 <top_module> <device_part> <vcs_file> <xdc_file> <tool_dir>\n"
|
||||
if { $::argc != 4 } {
|
||||
puts "ERROR: Program \"$::argv0\" requires 4 arguments!\n"
|
||||
puts "Usage: $::argv0 <top_module> <device_part> <vcs_file> <xdc_file>\n"
|
||||
exit
|
||||
}
|
||||
|
||||
|
@ -27,13 +27,16 @@ set top_module [lindex $::argv 0]
|
|||
set device_part [lindex $::argv 1]
|
||||
set vcs_file [lindex $::argv 2]
|
||||
set xdc_file [lindex $::argv 3]
|
||||
set tool_dir [lindex $::argv 4]
|
||||
|
||||
set script_dir $::env(SCRIPT_DIR)
|
||||
set source_dir [file dirname [info script]]
|
||||
|
||||
puts "Using top_module=$top_module"
|
||||
puts "Using device_part=$device_part"
|
||||
puts "Using vcs_file=$vcs_file"
|
||||
puts "Using xdc_file=$xdc_file"
|
||||
puts "Using tool_dir=$tool_dir"
|
||||
puts "Using script_dir=$script_dir"
|
||||
puts "Using source_dir=$source_dir"
|
||||
|
||||
# Set the number of jobs based on MAX_JOBS environment variable
|
||||
if {[info exists ::env(MAX_JOBS)]} {
|
||||
|
@ -48,10 +51,10 @@ if {[info exists ::env(FPU_IP)]} {
|
|||
set ip_dir $::env(FPU_IP)
|
||||
set argv [list $ip_dir $device_part]
|
||||
set argc 2
|
||||
source ${tool_dir}/xilinx_ip_gen.tcl
|
||||
source ${script_dir}/xilinx_ip_gen.tcl
|
||||
}
|
||||
|
||||
source "${tool_dir}/parse_vcs_list.tcl"
|
||||
source "${script_dir}/parse_vcs_list.tcl"
|
||||
set vlist [parse_vcs_list "${vcs_file}"]
|
||||
|
||||
set vsources_list [lindex $vlist 0]
|
||||
|
@ -84,37 +87,52 @@ if {[info exists ::env(FPU_IP)]} {
|
|||
|
||||
update_compile_order -fileset sources_1
|
||||
|
||||
# Synthesis
|
||||
set_property top $top_module [current_fileset]
|
||||
|
||||
set_property \
|
||||
-name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} \
|
||||
-value {-mode out_of_context -flatten_hierarchy "rebuilt"} \
|
||||
-objects [get_runs synth_1]
|
||||
|
||||
# Synthesis
|
||||
# register compilation hooks
|
||||
#set_property STEPS.SYNTH_DESIGN.TCL.PRE ${source_dir}/pre_synth_hook.tcl [get_runs synth_1]
|
||||
#set_property STEPS.SYNTH_DESIGN.TCL.POST ${source_dir}/post_synth_hook.tcl [get_runs synth_1]
|
||||
set_property STEPS.OPT_DESIGN.TCL.PRE ${script_dir}/xilinx_async_bram_patch.tcl [get_runs impl_1]
|
||||
#set_property STEPS.OPT_DESIGN.TCL.POST ${source_dir}/post_opt_hook.tcl [get_runs impl_1]
|
||||
#set_property STEPS.ROUTE_DESIGN.TCL.PRE ${source_dir}/pre_route_hook.tcl [get_runs impl_1]
|
||||
#set_property STEPS.ROUTE_DESIGN.TCL.POST ${source_dir}/post_route_hook.tcl [get_runs impl_1]
|
||||
|
||||
if {$num_jobs != 0} {
|
||||
launch_runs synth_1 -jobs $num_jobs
|
||||
launch_runs synth_1 -verbose -jobs $num_jobs
|
||||
} else {
|
||||
launch_runs synth_1
|
||||
launch_runs synth_1 -verbose
|
||||
}
|
||||
wait_on_run synth_1
|
||||
open_run synth_1
|
||||
write_checkpoint -force post_synth.dcp
|
||||
report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages
|
||||
report_utilization -file post_synth_util.rpt -hierarchical -hierarchical_percentages
|
||||
|
||||
# Implementation
|
||||
if {$num_jobs != 0} {
|
||||
launch_runs impl_1 -jobs $num_jobs
|
||||
launch_runs impl_1 -verbose -jobs $num_jobs
|
||||
} else {
|
||||
launch_runs impl_1
|
||||
launch_runs impl_1 -verbose
|
||||
}
|
||||
wait_on_run impl_1
|
||||
open_run impl_1
|
||||
write_checkpoint -force post_impl.dcp
|
||||
report_utilization -file post_impl_util.rpt -hierarchical -hierarchical_percentages
|
||||
|
||||
# Generate the synthesis report
|
||||
report_place_status -file place.rpt
|
||||
report_route_status -file route.rpt
|
||||
report_timing_summary -file timing.rpt
|
||||
|
||||
# Generate timing report
|
||||
report_timing -nworst 10 -delay_type max -sort_by group -file timing.rpt
|
||||
|
||||
# Generate power and drc reports
|
||||
report_power -file power.rpt
|
||||
report_drc -file drc.rpt
|
||||
|
||||
|
@ -125,4 +143,4 @@ set elapsed_time [expr {[clock seconds] - $start_time}]
|
|||
set hours [format "%02d" [expr {$elapsed_time / 3600}]]
|
||||
set minutes [format "%02d" [expr {($elapsed_time % 3600) / 60}]]
|
||||
set seconds [format "%02d" [expr {$elapsed_time % 60}]]
|
||||
puts "Total elapsed time: ${hours}h ${minutes}m ${seconds}s"
|
||||
puts "Total elapsed time: ${hours}h ${minutes}m ${seconds}s"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
PROJECT = Unittest
|
||||
PROJECT = VX_fifo_queue
|
||||
TOP_LEVEL_ENTITY = $(PROJECT)
|
||||
SRC_FILE = $(PROJECT).sv
|
||||
|
||||
|
|
|
@ -24,11 +24,8 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu
|
|||
ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
|
||||
FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src
|
||||
endif
|
||||
TEX_INCLUDE = -I$(RTL_DIR)/tex
|
||||
RASTER_INCLUDE = -I$(RTL_DIR)/raster
|
||||
OM_INCLUDE = -I$(RTL_DIR)/om
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache
|
||||
RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(OM_INCLUDE)
|
||||
RTL_INCLUDE += $(FPU_INCLUDE)
|
||||
RTL_INCLUDE += -I$(SRC_DIR)
|
||||
|
||||
# compilation flags
|
||||
|
|
|
@ -121,8 +121,8 @@ proc run_setup {} {
|
|||
# None
|
||||
|
||||
# Set 'sim_1' fileset file properties for local files
|
||||
set file "testbench.v"
|
||||
set file_obj [get_files -of_objects [get_filesets sim_1] [list "*$file"]]
|
||||
set file "testbench.v"
|
||||
set file_obj [get_files -of_objects [get_filesets sim_1] [list "*$file"]]
|
||||
set_property -name "file_type" -value "Verilog" -objects $file_obj
|
||||
set_property -name "is_enabled" -value "1" -objects $file_obj
|
||||
set_property -name "is_global_include" -value "0" -objects $file_obj
|
||||
|
@ -300,7 +300,7 @@ set file_obj [get_files -of_objects [get_filesets sim_1] [list "*$file"]]
|
|||
CONFIG.Assume_Synchronous_Clk {true} \
|
||||
CONFIG.Byte_Size {8} \
|
||||
CONFIG.Load_Init_File {true} \
|
||||
CONFIG.Coe_File {@CURRENTDIR@/hw/syn/xilinx/sandbox/kernel.bin.coe} \
|
||||
CONFIG.Coe_File {@BUILDDIR@/hw/syn/xilinx/sandbox/kernel.bin.coe} \
|
||||
CONFIG.EN_SAFETY_CKT {true} \
|
||||
CONFIG.Enable_32bit_Address {true} \
|
||||
CONFIG.Fill_Remaining_Memory_Locations {false} \
|
||||
|
|
|
@ -76,22 +76,21 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c)
|
|||
|
||||
# include sources
|
||||
RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
|
||||
RTL_PKGS += $(RTL_DIR)/tex/VX_tex_pkg.sv $(RTL_DIR)/raster/VX_raster_pkg.sv $(RTL_DIR)/om/VX_om_pkg.sv
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fpu
|
||||
ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
|
||||
RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
|
||||
FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src
|
||||
endif
|
||||
TEX_INCLUDE = -I$(RTL_DIR)/tex
|
||||
RASTER_INCLUDE = -I$(RTL_DIR)/raster
|
||||
OM_INCLUDE = -I$(RTL_DIR)/om
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR)
|
||||
RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(OM_INCLUDE)
|
||||
RTL_INCLUDE += $(FPU_INCLUDE)
|
||||
|
||||
# Kernel compiler global settings
|
||||
VPP_FLAGS += --link --target $(TARGET) --platform $(PLATFORM) --save-temps --no_ip_cache
|
||||
VPP_FLAGS += --vivado.synth.jobs $(JOBS) --vivado.impl.jobs $(JOBS)
|
||||
|
||||
# register compilation hooks
|
||||
VPP_FLAGS += --xp "vivado_prop:run.impl_1.STEPS.OPT_DESIGN.TCL.PRE={$(SCRIPT_DIR)/xilinx_async_bram_patch.tcl}"
|
||||
|
||||
# load platform settings
|
||||
include $(SRC_DIR)/platforms.mk
|
||||
|
||||
|
@ -178,6 +177,7 @@ $(BIN_DIR)/emconfig.json:
|
|||
|
||||
report: $(XCLBIN_CONTAINER)
|
||||
ifeq ($(TARGET), hw)
|
||||
cp $(BUILD_DIR)/_x/logs/link/vivado.log $(BUILD_DIR)/bin
|
||||
cp $(BUILD_DIR)/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log $(BUILD_DIR)/bin
|
||||
cp $(BUILD_DIR)/_x/reports/link/syn/ulp_vortex_afu_1_0_synth_1_ulp_vortex_afu_1_0_utilization_synth.rpt $(BUILD_DIR)/bin
|
||||
cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
all:
|
||||
$(MAKE) -C cache
|
||||
$(MAKE) -C generic_queue
|
||||
$(MAKE) -C mem_streamer
|
||||
$(MAKE) -C cache_top
|
||||
|
@ -9,7 +8,6 @@ all:
|
|||
$(MAKE) -C mem_unit_top
|
||||
|
||||
run:
|
||||
$(MAKE) -C cache run
|
||||
$(MAKE) -C generic_queue run
|
||||
$(MAKE) -C mem_streamer run
|
||||
$(MAKE) -C cache_top run
|
||||
|
@ -19,7 +17,6 @@ run:
|
|||
$(MAKE) -C mem_unit_top run
|
||||
|
||||
clean:
|
||||
$(MAKE) -C cache clean
|
||||
$(MAKE) -C generic_queue clean
|
||||
$(MAKE) -C mem_streamer clean
|
||||
$(MAKE) -C cache_top clean
|
||||
|
|
26
hw/unittest/cache/Makefile
vendored
26
hw/unittest/cache/Makefile
vendored
|
@ -1,26 +0,0 @@
|
|||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
PROJECT := cache
|
||||
|
||||
RTL_DIR := $(VORTEX_HOME)/hw/rtl
|
||||
DPI_DIR := $(VORTEX_HOME)/hw/dpi
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT)
|
||||
|
||||
CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common
|
||||
CXXFLAGS += -I$(ROOT_DIR)/hw
|
||||
|
||||
SRCS := $(DPI_DIR)/util_dpi.cpp
|
||||
SRCS += $(SRC_DIR)/cachesim.cpp $(SRC_DIR)/testbench.cpp
|
||||
|
||||
DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE
|
||||
|
||||
RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv
|
||||
|
||||
RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
|
||||
RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache
|
||||
|
||||
TOP := VX_cache_top
|
||||
|
||||
include ../common.mk
|
354
hw/unittest/cache/cachesim.cpp
vendored
354
hw/unittest/cache/cachesim.cpp
vendored
|
@ -1,354 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "cachesim.h"
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <bitset>
|
||||
|
||||
#ifndef TRACE_START_TIME
|
||||
#define TRACE_START_TIME 0ull
|
||||
#endif
|
||||
|
||||
#ifndef TRACE_STOP_TIME
|
||||
#define TRACE_STOP_TIME -1ull
|
||||
#endif
|
||||
|
||||
static uint64_t timestamp = 0;
|
||||
static bool trace_enabled = false;
|
||||
static uint64_t trace_start_time = TRACE_START_TIME;
|
||||
static uint64_t trace_stop_time = TRACE_STOP_TIME;
|
||||
|
||||
double sc_time_stamp() {
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
bool sim_trace_enabled() {
|
||||
if (timestamp >= trace_start_time
|
||||
&& timestamp < trace_stop_time)
|
||||
return true;
|
||||
return trace_enabled;
|
||||
}
|
||||
|
||||
void sim_trace_enable(bool enable) {
|
||||
trace_enabled = enable;
|
||||
}
|
||||
|
||||
CacheSim::CacheSim() {
|
||||
// force random values for uninitialized signals
|
||||
Verilated::randReset(2);
|
||||
|
||||
// create RTL module instance
|
||||
cache_ = new VVX_cache_top();
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
Verilated::traceEverOn(true);
|
||||
tfp_ = new VerilatedVcdC;
|
||||
cache_->trace(tfp_, 99);
|
||||
tfp_->open("trace.vcd");
|
||||
#endif
|
||||
|
||||
ram_ = nullptr;
|
||||
mem_rsp_active_ = false;
|
||||
snp_req_active_ = false;
|
||||
}
|
||||
|
||||
CacheSim::~CacheSim() {
|
||||
#ifdef VCD_OUTPUT
|
||||
tfp_->close();
|
||||
#endif
|
||||
delete cache_;
|
||||
//need to delete the req and rsp vectors
|
||||
}
|
||||
|
||||
void CacheSim::attach_ram(RAM* ram) {
|
||||
ram_ = ram;
|
||||
mem_rsp_vec_.clear();
|
||||
}
|
||||
|
||||
void CacheSim::reset() {
|
||||
#ifndef NDEBUG
|
||||
std::cout << timestamp << ": [sim] reset()" << std::endl;
|
||||
#endif
|
||||
|
||||
cache_->reset = 1;
|
||||
this->step();
|
||||
cache_->reset = 0;
|
||||
this->step();
|
||||
|
||||
mem_rsp_vec_.clear();
|
||||
//clear req and rsp vecs
|
||||
|
||||
}
|
||||
|
||||
void CacheSim::step() {
|
||||
//std::cout << timestamp << ": [sim] step()" << std::endl;
|
||||
//toggle clock
|
||||
cache_->clk = 0;
|
||||
this->eval();
|
||||
|
||||
cache_->clk = 1;
|
||||
this->eval();
|
||||
|
||||
//handle core and memory reqs and rsps
|
||||
this->eval_reqs();
|
||||
this->eval_rsps();
|
||||
this->eval_mem_bus();
|
||||
timestamp++;
|
||||
}
|
||||
|
||||
void CacheSim::eval() {
|
||||
cache_->eval();
|
||||
#ifdef VCD_OUTPUT
|
||||
tfp_->dump(timestamp);
|
||||
#endif
|
||||
++timestamp;
|
||||
}
|
||||
|
||||
void CacheSim::run(){
|
||||
//#ifndef NDEBUG
|
||||
|
||||
//#endif
|
||||
this->step();
|
||||
|
||||
int valid = 300;
|
||||
int stalls = 20 + 10;
|
||||
|
||||
while (valid > -1) {
|
||||
|
||||
this->step();
|
||||
display_miss();
|
||||
if(cache_->core_rsp_valid){
|
||||
get_core_rsp();
|
||||
}
|
||||
|
||||
if(!cache_->core_req_valid && !cache_->core_rsp_valid){
|
||||
valid--;
|
||||
|
||||
}
|
||||
stalls--;
|
||||
if (stalls == 20){
|
||||
//stall_mem();
|
||||
//send_snoop_req();
|
||||
stalls--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CacheSim::clear_req(){
|
||||
cache_->core_req_valid = 0;
|
||||
}
|
||||
|
||||
void CacheSim::send_req(core_req_t *req){
|
||||
core_req_vec_.push(req);
|
||||
unsigned int *data = new unsigned int[4];
|
||||
core_rsp_vec_.insert(std::pair<unsigned int, unsigned int*>(req->tag, data));
|
||||
}
|
||||
|
||||
bool CacheSim::get_core_req_ready(){
|
||||
return cache_->core_req_ready;
|
||||
}
|
||||
|
||||
bool CacheSim::get_core_rsp_ready(){
|
||||
return cache_->core_rsp_ready;
|
||||
}
|
||||
|
||||
void CacheSim::eval_reqs(){
|
||||
//check to see if cache is accepting reqs
|
||||
if(!core_req_vec_.empty() && cache_->core_req_ready){
|
||||
core_req_t *req = core_req_vec_.front();
|
||||
|
||||
cache_->core_req_valid = req->valid;
|
||||
cache_->core_req_rw = req->rw;
|
||||
cache_->core_req_byteen = req->byteen;
|
||||
|
||||
cache_->core_req_addr[0] = req->addr[0];
|
||||
cache_->core_req_addr[1] = req->addr[1];
|
||||
cache_->core_req_addr[2] = req->addr[2];
|
||||
cache_->core_req_addr[3] = req->addr[3];
|
||||
|
||||
cache_->core_req_data[0] = req->data[0];
|
||||
cache_->core_req_data[1] = req->data[1];
|
||||
cache_->core_req_data[2] = req->data[2];
|
||||
cache_->core_req_data[3] = req->data[3];
|
||||
|
||||
cache_->core_req_tag = req->tag;
|
||||
|
||||
core_req_vec_.pop();
|
||||
|
||||
} else {
|
||||
clear_req();
|
||||
}
|
||||
}
|
||||
|
||||
void CacheSim::eval_rsps(){
|
||||
//check to see if a request has been responded to
|
||||
if (cache_->core_rsp_valid){
|
||||
core_rsp_vec_.at(cache_->core_rsp_tag)[0] = cache_->core_rsp_data[0];
|
||||
core_rsp_vec_.at(cache_->core_rsp_tag)[1] = cache_->core_rsp_data[1];
|
||||
core_rsp_vec_.at(cache_->core_rsp_tag)[2] = cache_->core_rsp_data[2];
|
||||
core_rsp_vec_.at(cache_->core_rsp_tag)[3] = cache_->core_rsp_data[3];
|
||||
}
|
||||
}
|
||||
|
||||
void CacheSim::stall_mem(){
|
||||
cache_->mem_req_ready = 0;
|
||||
}
|
||||
|
||||
void CacheSim::send_snoop_req(){
|
||||
/*cache_->snp_req_valid = 1;
|
||||
cache_->snp_req_addr = 0x12222222;
|
||||
cache_->snp_req_invalidate = 1;
|
||||
cache_->snp_req_tag = 0xff; */
|
||||
}
|
||||
|
||||
void CacheSim::eval_mem_bus() {
|
||||
if (ram_ == nullptr) {
|
||||
cache_->mem_req_ready = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// schedule memory responses
|
||||
int dequeue_index = -1;
|
||||
for (int i = 0; i < mem_rsp_vec_.size(); i++) {
|
||||
if (mem_rsp_vec_[i].cycles_left > 0) {
|
||||
mem_rsp_vec_[i].cycles_left -= 1;
|
||||
}
|
||||
if ((dequeue_index == -1)
|
||||
&& (mem_rsp_vec_[i].cycles_left == 0)) {
|
||||
dequeue_index = i;
|
||||
}
|
||||
}
|
||||
|
||||
// send memory response
|
||||
if (mem_rsp_active_
|
||||
&& cache_->mem_rsp_valid
|
||||
&& cache_->mem_rsp_ready) {
|
||||
mem_rsp_active_ = false;
|
||||
}
|
||||
if (!mem_rsp_active_) {
|
||||
if (dequeue_index != -1) { //time to respond to the request
|
||||
cache_->mem_rsp_valid = 1;
|
||||
|
||||
//copy data from the rsp queue to the cache module
|
||||
memcpy(cache_->mem_rsp_data.data(), mem_rsp_vec_[dequeue_index].data, MEM_BLOCK_SIZE);
|
||||
|
||||
cache_->mem_rsp_tag = mem_rsp_vec_[dequeue_index].tag;
|
||||
free(mem_rsp_vec_[dequeue_index].data); //take data out of the queue
|
||||
mem_rsp_vec_.erase(mem_rsp_vec_.begin() + dequeue_index);
|
||||
mem_rsp_active_ = true;
|
||||
} else {
|
||||
cache_->mem_rsp_valid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// handle memory stalls
|
||||
bool mem_stalled = false;
|
||||
#ifdef ENABLE_MEM_STALLS
|
||||
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
|
||||
mem_stalled = true;
|
||||
} else
|
||||
if (mem_rsp_vec_.size() >= MEM_RQ_SIZE) {
|
||||
mem_stalled = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
// process memory requests
|
||||
if (!mem_stalled) {
|
||||
if (cache_->mem_req_valid) {
|
||||
if (cache_->mem_req_rw) { //write = 1
|
||||
uint64_t byteen = cache_->mem_req_byteen;
|
||||
uint64_t base_addr = (cache_->mem_req_addr * MEM_BLOCK_SIZE);
|
||||
uint8_t* data = reinterpret_cast<uint8_t*>(cache_->mem_req_data.data());
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[base_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
mem_req_t mem_req;
|
||||
mem_req.cycles_left = MEM_LATENCY;
|
||||
mem_req.data = (uint8_t*)malloc(MEM_BLOCK_SIZE);
|
||||
mem_req.tag = cache_->mem_req_tag;
|
||||
ram_->read(cache_->mem_req_addr * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.data);
|
||||
mem_rsp_vec_.push_back(mem_req);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cache_->mem_req_ready = ~mem_stalled;
|
||||
}
|
||||
|
||||
bool CacheSim::assert_equal(unsigned int* data, unsigned int tag){
|
||||
int check = 0;
|
||||
unsigned int *rsp = core_rsp_vec_.at(tag);
|
||||
for (int i = 0; i < 4; ++i){
|
||||
for (int j = 0; j < 4; ++j){
|
||||
if (data[i] == rsp[j]){
|
||||
check++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return check;
|
||||
|
||||
}
|
||||
|
||||
//DEBUG
|
||||
|
||||
void CacheSim::display_miss(){
|
||||
//int i = (unsigned int)cache_->miss_vec;
|
||||
//std::bitset<8> x(i);
|
||||
//if (i) std::cout << "Miss Vec " << x << std::endl;
|
||||
//std::cout << "Miss Vec 0" << cache_->miss_vec[0] << std::endl;
|
||||
}
|
||||
|
||||
void CacheSim::get_core_req(unsigned int (&rsp)[4]){
|
||||
rsp[0] = cache_->core_rsp_data[0];
|
||||
rsp[1] = cache_->core_rsp_data[1];
|
||||
rsp[2] = cache_->core_rsp_data[2];
|
||||
rsp[3] = cache_->core_rsp_data[3];
|
||||
|
||||
//std::cout << std::hex << "core_rsp_valid: " << cache_->core_rsp_valid << std::endl;
|
||||
//std::cout << std::hex << "core_rsp_data: " << cache_->core_rsp_data << std::endl;
|
||||
//std::cout << std::hex << "core_rsp_tag: " << cache_->core_rsp_tag << std::endl;
|
||||
}
|
||||
|
||||
void CacheSim::get_core_rsp(){
|
||||
//std::cout << cache_->genblk5_BRA_0_KET_->bank->is_fill_in_pipe<< std::endl;
|
||||
char check = cache_->core_rsp_valid;
|
||||
std::cout << std::hex << "core_rsp_valid: " << (unsigned int) check << std::endl;
|
||||
std::cout << std::hex << "core_rsp_data[0]: " << cache_->core_rsp_data[0] << std::endl;
|
||||
std::cout << std::hex << "core_rsp_data[1]: " << cache_->core_rsp_data[1] << std::endl;
|
||||
std::cout << std::hex << "core_rsp_data[2]: " << cache_->core_rsp_data[2] << std::endl;
|
||||
std::cout << std::hex << "core_rsp_data[3]: " << cache_->core_rsp_data[3] << std::endl;
|
||||
std::cout << std::hex << "core_rsp_tag: " << cache_->core_rsp_tag << std::endl;
|
||||
}
|
||||
|
||||
void CacheSim::get_mem_req(){
|
||||
std::cout << std::hex << "mem_req_valid: " << cache_->mem_req_valid << std::endl;
|
||||
std::cout << std::hex << "mem_req_rw: " << cache_->mem_req_rw << std::endl;
|
||||
std::cout << std::hex << "mem_req_byteen: " << cache_->mem_req_byteen << std::endl;
|
||||
std::cout << std::hex << "mem_req_addr: " << cache_->mem_req_addr << std::endl;
|
||||
std::cout << std::hex << "mem_req_data: " << cache_->mem_req_data << std::endl;
|
||||
std::cout << std::hex << "mem_req_tag: " << cache_->mem_req_tag << std::endl;
|
||||
}
|
||||
|
||||
void CacheSim::get_mem_rsp(){
|
||||
std::cout << std::hex << "mem_rsp_valid: " << cache_->mem_rsp_valid << std::endl;
|
||||
std::cout << std::hex << "mem_rsp_data: " << cache_->mem_rsp_data << std::endl;
|
||||
std::cout << std::hex << "mem_rsp_tag: " << cache_->mem_rsp_tag << std::endl;
|
||||
std::cout << std::hex << "mem_rsp_ready: " << cache_->mem_rsp_ready << std::endl;
|
||||
}
|
104
hw/unittest/cache/cachesim.h
vendored
104
hw/unittest/cache/cachesim.h
vendored
|
@ -1,104 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "VVX_cache_top.h"
|
||||
#include "VVX_cache_top__Syms.h"
|
||||
#include "verilated.h"
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
#include <verilated_vcd_c.h>
|
||||
#endif
|
||||
|
||||
#include <VX_config.h>
|
||||
#include "ram.h"
|
||||
#include <ostream>
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
|
||||
#define ENABLE_MEM_STALLS
|
||||
#define MEM_LATENCY 100
|
||||
#define MEM_RQ_SIZE 16
|
||||
#define MEM_STALLS_MODULO 16
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
uint8_t *data;
|
||||
unsigned tag;
|
||||
} mem_req_t;
|
||||
|
||||
typedef struct {
|
||||
char valid;
|
||||
char rw;
|
||||
unsigned byteen;
|
||||
unsigned *addr;
|
||||
unsigned *data;
|
||||
unsigned int tag;
|
||||
} core_req_t;
|
||||
|
||||
class CacheSim {
|
||||
public:
|
||||
|
||||
CacheSim();
|
||||
virtual ~CacheSim();
|
||||
|
||||
bool busy();
|
||||
|
||||
void reset();
|
||||
void step();
|
||||
void wait(uint32_t cycles);
|
||||
void attach_ram(RAM* ram);
|
||||
void run(); //run until all reqs are empty
|
||||
|
||||
//req/rsp
|
||||
void send_req(core_req_t *req);
|
||||
void clear_req();
|
||||
void stall_mem();
|
||||
void send_snoop_req();
|
||||
void send_snp_fwd_in();
|
||||
|
||||
//assert funcs
|
||||
bool assert_equal(unsigned int* data, unsigned int tag);
|
||||
|
||||
//debug funcs
|
||||
void get_mem_req();
|
||||
void get_core_req(unsigned int (&rsp)[4]);
|
||||
void get_core_rsp();
|
||||
bool get_core_req_ready();
|
||||
bool get_core_rsp_ready();
|
||||
void get_mem_rsp();
|
||||
void display_miss();
|
||||
|
||||
private:
|
||||
|
||||
void eval();
|
||||
void eval_reqs();
|
||||
void eval_rsps();
|
||||
void eval_mem_bus();
|
||||
|
||||
std::queue<core_req_t*> core_req_vec_;
|
||||
std::vector<mem_req_t> mem_rsp_vec_;
|
||||
std::map<unsigned int, unsigned int*> core_rsp_vec_;
|
||||
int mem_rsp_active_;
|
||||
|
||||
uint32_t snp_req_active_;
|
||||
uint32_t snp_req_size_;
|
||||
uint32_t pending_snp_reqs_;
|
||||
|
||||
VVX_cache_top* cache_;
|
||||
RAM* ram_;
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC* tfp_;
|
||||
#endif
|
||||
};
|
77
hw/unittest/cache/ram.h
vendored
77
hw/unittest/cache/ram.h
vendored
|
@ -1,77 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
class RAM {
|
||||
private:
|
||||
|
||||
mutable uint8_t *mem_[(1 << 12)];
|
||||
|
||||
uint8_t *get(uint32_t address) const {
|
||||
uint32_t block_addr = address >> 20;
|
||||
uint32_t block_offset = address & 0x000FFFFF;
|
||||
if (mem_[block_addr] == NULL) {
|
||||
mem_[block_addr] = new uint8_t[(1 << 20)];
|
||||
}
|
||||
return mem_[block_addr] + block_offset;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
RAM() {
|
||||
for (uint32_t i = 0; i < (1 << 12); i++) {
|
||||
mem_[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
~RAM() {
|
||||
this->clear();
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return (1ull << 32);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (uint32_t i = 0; i < (1 << 12); i++) {
|
||||
if (mem_[i]) {
|
||||
delete [] mem_[i];
|
||||
mem_[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void read(uint32_t address, uint32_t length, uint8_t *data) const {
|
||||
for (unsigned i = 0; i < length; i++) {
|
||||
data[i] = *this->get(address + i);
|
||||
}
|
||||
}
|
||||
|
||||
void write(uint32_t address, uint32_t length, const uint8_t *data) {
|
||||
for (unsigned i = 0; i < length; i++) {
|
||||
*this->get(address + i) = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t& operator[](uint32_t address) {
|
||||
return *get(address);
|
||||
}
|
||||
|
||||
const uint8_t& operator[](uint32_t address) const {
|
||||
return *get(address);
|
||||
}
|
||||
};
|
248
hw/unittest/cache/testbench.cpp
vendored
248
hw/unittest/cache/testbench.cpp
vendored
|
@ -1,248 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "cachesim.h"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
|
||||
#define VCD_OUTPUT 1
|
||||
|
||||
|
||||
int REQ_RSP(CacheSim *sim){ //verified
|
||||
unsigned int addr[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444};
|
||||
unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333};
|
||||
unsigned int rsp[4] = {0,0,0,0};
|
||||
char responded = 0;
|
||||
//write req
|
||||
core_req_t* write = new core_req_t;
|
||||
write->valid = 0xf;
|
||||
write->rw = 0xf;
|
||||
write->byteen = 0xffff;
|
||||
write->addr = addr;
|
||||
write->data = data;
|
||||
write->tag = 0xff;
|
||||
|
||||
//read req
|
||||
core_req_t* read = new core_req_t;
|
||||
read->valid = 0xf;
|
||||
read->rw = 0;
|
||||
read->byteen = 0xffff;
|
||||
read->addr = addr;
|
||||
read->data = addr;
|
||||
read->tag = 0xff;
|
||||
|
||||
// reset the device
|
||||
sim->reset();
|
||||
|
||||
//queue reqs
|
||||
sim->send_req(write);
|
||||
sim->send_req(read);
|
||||
|
||||
sim->run();
|
||||
|
||||
int check = sim->assert_equal(data, write->tag);
|
||||
|
||||
if (check == 4) return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int HIT_1(CacheSim *sim){
|
||||
unsigned int addr[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444};
|
||||
unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333};
|
||||
unsigned int rsp[4] = {0,0,0,0};
|
||||
char responded = 0;
|
||||
//write req
|
||||
core_req_t* write = new core_req_t;
|
||||
write->valid = 0xf;
|
||||
write->rw = 0xf;
|
||||
write->byteen = 0xffff;
|
||||
write->addr = addr;
|
||||
write->data = data;
|
||||
write->tag = 0x11;
|
||||
|
||||
//read req
|
||||
core_req_t* read = new core_req_t;
|
||||
read->valid = 0xf;
|
||||
read->rw = 0;
|
||||
read->byteen = 0xffff;
|
||||
read->addr = addr;
|
||||
read->data = addr;
|
||||
read->tag = 0x22;
|
||||
|
||||
// reset the device
|
||||
sim->reset();
|
||||
|
||||
//queue reqs
|
||||
sim->send_req(write);
|
||||
sim->send_req(read);
|
||||
|
||||
sim->run();
|
||||
|
||||
bool check = sim->assert_equal(data, write->tag);
|
||||
|
||||
return check;
|
||||
}
|
||||
|
||||
int MISS_1(CacheSim *sim){
|
||||
unsigned int addr1[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444};
|
||||
unsigned int addr2[4] = {0x12229222, 0xabbbb4bb, 0xcddd47dd, 0xe4423544};
|
||||
unsigned int addr3[4] = {0x12223332, 0xabb454bb, 0xcdddeefd, 0xe4447744};
|
||||
unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333};
|
||||
unsigned int rsp[4] = {0,0,0,0};
|
||||
char responded = 0;
|
||||
//write req
|
||||
core_req_t* write = new core_req_t;
|
||||
write->valid = 0xf;
|
||||
write->rw = 0xf;
|
||||
write->byteen = 0xffff;
|
||||
write->addr = addr1;
|
||||
write->data = data;
|
||||
write->tag = 0xff;
|
||||
|
||||
//read req
|
||||
core_req_t* read1 = new core_req_t;
|
||||
read1->valid = 0xf;
|
||||
read1->rw = 0;
|
||||
read1->byteen = 0xffff;
|
||||
read1->addr = addr1;
|
||||
read1->data = data;
|
||||
read1->tag = 0xff;
|
||||
|
||||
core_req_t* read2 = new core_req_t;
|
||||
read2->valid = 0xf;
|
||||
read2->rw = 0;
|
||||
read2->byteen = 0xffff;
|
||||
read2->addr = addr2;
|
||||
read2->data = data;
|
||||
read2->tag = 0xff;
|
||||
|
||||
core_req_t* read3 = new core_req_t;
|
||||
read3->valid = 0xf;
|
||||
read3->rw = 0;
|
||||
read3->byteen = 0xffff;
|
||||
read3->addr = addr3;
|
||||
read3->data = data;
|
||||
read3->tag = 0xff;
|
||||
|
||||
// reset the device
|
||||
sim->reset();
|
||||
|
||||
//queue reqs
|
||||
sim->send_req(write);
|
||||
sim->send_req(read1);
|
||||
sim->send_req(read2);
|
||||
sim->send_req(read3);
|
||||
|
||||
sim->run();
|
||||
|
||||
bool check = sim->assert_equal(data, write->tag);
|
||||
|
||||
return check;
|
||||
}
|
||||
int FLUSH(CacheSim *sim){
|
||||
unsigned int addr[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444};
|
||||
unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333};
|
||||
unsigned int rsp[4] = {0,0,0,0};
|
||||
char responded = 0;
|
||||
//write req
|
||||
core_req_t* write = new core_req_t;
|
||||
write->valid = 0xf;
|
||||
write->rw = 0xf;
|
||||
write->byteen = 0xffff;
|
||||
write->addr = addr;
|
||||
write->data = data;
|
||||
write->tag = 0xff;
|
||||
|
||||
//read req
|
||||
core_req_t* read = new core_req_t;
|
||||
read->valid = 0xf;
|
||||
read->rw = 0;
|
||||
read->byteen = 0xffff;
|
||||
read->addr = addr;
|
||||
read->data = addr;
|
||||
read->tag = 0xff;
|
||||
|
||||
// reset the device
|
||||
sim->reset();
|
||||
|
||||
//queue reqs
|
||||
sim->send_req(write);
|
||||
sim->send_req(read);
|
||||
|
||||
sim->run();
|
||||
|
||||
bool check = sim->assert_equal(data, write->tag);
|
||||
|
||||
return check;
|
||||
}
|
||||
|
||||
|
||||
int BACK_PRESSURE(CacheSim *sim){
|
||||
//happens whenever the core is stalled or memory is stalled
|
||||
unsigned int addr[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444};
|
||||
unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333};
|
||||
unsigned int rsp[4] = {0,0,0,0};
|
||||
char responded = 0;
|
||||
|
||||
//write req
|
||||
core_req_t* write = new core_req_t;
|
||||
write->valid = 0xf;
|
||||
write->rw = 0xf;
|
||||
write->byteen = 0xffff;
|
||||
write->addr = addr;
|
||||
write->data = data;
|
||||
write->tag = 0xff;
|
||||
|
||||
//read req
|
||||
core_req_t* read = new core_req_t;
|
||||
read->valid = 0xf;
|
||||
read->rw = 0;
|
||||
read->byteen = 0xffff;
|
||||
read->addr = addr;
|
||||
read->data = addr;
|
||||
read->tag = 0xff;
|
||||
|
||||
// reset the device
|
||||
sim->reset();
|
||||
|
||||
//queue reqs
|
||||
for (int i = 0; i < 10; i++){
|
||||
sim->send_req(write);
|
||||
}
|
||||
sim->send_req(read);
|
||||
|
||||
sim->run();
|
||||
|
||||
bool check = sim->assert_equal(data, write->tag);
|
||||
|
||||
return check;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
//init
|
||||
RAM ram;
|
||||
CacheSim cachesim;
|
||||
cachesim.attach_ram(&ram);
|
||||
int check = REQ_RSP(&cachesim);
|
||||
if(check){
|
||||
std::cout << "PASSED" << std::endl;
|
||||
} else {
|
||||
std::cout << "FAILED" << std::endl;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -21,4 +21,6 @@ RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
|
|||
|
||||
TOP := VX_fifo_queue
|
||||
|
||||
PARAMS := -GDATAW=32 -GDEPTH=8
|
||||
|
||||
include ../common.mk
|
|
@ -241,8 +241,6 @@ private:
|
|||
#ifdef VCD_OUTPUT
|
||||
if (sim_trace_enabled()) {
|
||||
tfp_->dump(timestamp);
|
||||
} else {
|
||||
exit(-1);
|
||||
}
|
||||
#endif
|
||||
++timestamp;
|
||||
|
|
|
@ -333,14 +333,27 @@ private:
|
|||
}
|
||||
|
||||
device_->ap_rst_n = 1;
|
||||
|
||||
// this AXI device is always ready to accept new requests
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
*m_axi_mem_[i].arready = 1;
|
||||
*m_axi_mem_[i].awready = 1;
|
||||
*m_axi_mem_[i].wready = 1;
|
||||
}
|
||||
}
|
||||
|
||||
void tick() {
|
||||
this->axi_mem_bus_eval();
|
||||
device_->ap_clk = 0;
|
||||
this->eval();
|
||||
|
||||
this->axi_mem_bus_eval(0);
|
||||
|
||||
device_->ap_clk = 1;
|
||||
this->eval();
|
||||
|
||||
this->axi_mem_bus_eval(1);
|
||||
|
||||
dram_sim_.tick();
|
||||
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
if (!dram_queues_[i].empty()) {
|
||||
|
@ -358,13 +371,6 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
dram_sim_.tick();
|
||||
|
||||
device_->ap_clk = 0;
|
||||
this->eval();
|
||||
device_->ap_clk = 1;
|
||||
this->eval();
|
||||
|
||||
#ifndef NDEBUG
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
@ -381,162 +387,175 @@ private:
|
|||
}
|
||||
|
||||
void axi_ctrl_bus_reset() {
|
||||
// address read request
|
||||
// read request address
|
||||
device_->s_axi_ctrl_arvalid = 0;
|
||||
device_->s_axi_ctrl_araddr = 0;
|
||||
|
||||
// data read response
|
||||
// read response
|
||||
device_->s_axi_ctrl_rready = 0;
|
||||
|
||||
// address write request
|
||||
// write request address
|
||||
device_->s_axi_ctrl_awvalid = 0;
|
||||
device_->s_axi_ctrl_awaddr = 0;
|
||||
|
||||
// data write request
|
||||
// write request data
|
||||
device_->s_axi_ctrl_wvalid = 0;
|
||||
device_->s_axi_ctrl_wdata = 0;
|
||||
device_->s_axi_ctrl_wstrb = 0;
|
||||
|
||||
// data write response
|
||||
// write response
|
||||
device_->s_axi_ctrl_bready = 0;
|
||||
}
|
||||
|
||||
void axi_mem_bus_reset() {
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
// address read request
|
||||
*m_axi_mem_[i].arready = 0;
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
// read request address
|
||||
*m_axi_mem_[b].arready = 0;
|
||||
|
||||
// address write request
|
||||
*m_axi_mem_[i].awready = 0;
|
||||
// write request address
|
||||
*m_axi_mem_[b].awready = 0;
|
||||
|
||||
// data write request
|
||||
*m_axi_mem_[i].wready = 0;
|
||||
// write request data
|
||||
*m_axi_mem_[b].wready = 0;
|
||||
|
||||
// data read response
|
||||
*m_axi_mem_[i].rvalid = 0;
|
||||
// read response
|
||||
*m_axi_mem_[b].rvalid = 0;
|
||||
|
||||
// data write response
|
||||
*m_axi_mem_[i].bvalid = 0;
|
||||
// write response
|
||||
*m_axi_mem_[b].bvalid = 0;
|
||||
|
||||
// states
|
||||
m_axi_states_[i].write_req_pending = false;
|
||||
m_axi_states_[b].write_req_addr_ack = false;
|
||||
m_axi_states_[b].write_req_data_ack = false;
|
||||
}
|
||||
}
|
||||
|
||||
void axi_mem_bus_eval() {
|
||||
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
|
||||
// handle read responses
|
||||
if (*m_axi_mem_[i].rvalid && *m_axi_mem_[i].rready) {
|
||||
*m_axi_mem_[i].rvalid = 0;
|
||||
void axi_mem_bus_eval(bool clk) {
|
||||
if (!clk) {
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
m_axi_states_[b].read_rsp_ready = *m_axi_mem_[b].rready;
|
||||
m_axi_states_[b].write_rsp_ready = *m_axi_mem_[b].bready;
|
||||
}
|
||||
if (!*m_axi_mem_[i].rvalid) {
|
||||
if (!pending_mem_reqs_[i].empty()
|
||||
&& (*pending_mem_reqs_[i].begin())->ready
|
||||
&& !(*pending_mem_reqs_[i].begin())->write) {
|
||||
auto mem_rsp_it = pending_mem_reqs_[i].begin();
|
||||
return;
|
||||
}
|
||||
|
||||
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
|
||||
// handle read responses
|
||||
if (*m_axi_mem_[b].rvalid && m_axi_states_[b].read_rsp_ready) {
|
||||
*m_axi_mem_[b].rvalid = 0;
|
||||
}
|
||||
if (!*m_axi_mem_[b].rvalid) {
|
||||
if (!pending_mem_reqs_[b].empty()
|
||||
&& (*pending_mem_reqs_[b].begin())->ready
|
||||
&& !(*pending_mem_reqs_[b].begin())->write) {
|
||||
auto mem_rsp_it = pending_mem_reqs_[b].begin();
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
*m_axi_mem_[i].rvalid = 1;
|
||||
*m_axi_mem_[i].rid = mem_rsp->tag;
|
||||
*m_axi_mem_[i].rresp = 0;
|
||||
*m_axi_mem_[i].rlast = 1;
|
||||
memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE);
|
||||
pending_mem_reqs_[i].erase(mem_rsp_it);
|
||||
*m_axi_mem_[b].rvalid = 1;
|
||||
*m_axi_mem_[b].rid = mem_rsp->tag;
|
||||
*m_axi_mem_[b].rresp = 0;
|
||||
*m_axi_mem_[b].rlast = 1;
|
||||
memcpy(m_axi_mem_[b].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE);
|
||||
pending_mem_reqs_[b].erase(mem_rsp_it);
|
||||
delete mem_rsp;
|
||||
}
|
||||
}
|
||||
|
||||
// handle write responses
|
||||
if (*m_axi_mem_[i].bvalid && *m_axi_mem_[i].bready) {
|
||||
*m_axi_mem_[i].bvalid = 0;
|
||||
if (*m_axi_mem_[b].bvalid && m_axi_states_[b].write_rsp_ready) {
|
||||
*m_axi_mem_[b].bvalid = 0;
|
||||
}
|
||||
if (!*m_axi_mem_[i].bvalid) {
|
||||
if (!pending_mem_reqs_[i].empty()
|
||||
&& (*pending_mem_reqs_[i].begin())->ready
|
||||
&& (*pending_mem_reqs_[i].begin())->write) {
|
||||
auto mem_rsp_it = pending_mem_reqs_[i].begin();
|
||||
if (!*m_axi_mem_[b].bvalid) {
|
||||
if (!pending_mem_reqs_[b].empty()
|
||||
&& (*pending_mem_reqs_[b].begin())->ready
|
||||
&& (*pending_mem_reqs_[b].begin())->write) {
|
||||
auto mem_rsp_it = pending_mem_reqs_[b].begin();
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
*m_axi_mem_[i].bvalid = 1;
|
||||
*m_axi_mem_[i].bid = mem_rsp->tag;
|
||||
*m_axi_mem_[i].bresp = 0;
|
||||
pending_mem_reqs_[i].erase(mem_rsp_it);
|
||||
*m_axi_mem_[b].bvalid = 1;
|
||||
*m_axi_mem_[b].bid = mem_rsp->tag;
|
||||
*m_axi_mem_[b].bresp = 0;
|
||||
pending_mem_reqs_[b].erase(mem_rsp_it);
|
||||
delete mem_rsp;
|
||||
}
|
||||
}
|
||||
|
||||
// handle read requests
|
||||
if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) {
|
||||
if (*m_axi_mem_[b].arvalid && *m_axi_mem_[b].arready) {
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = *m_axi_mem_[i].arid;
|
||||
mem_req->addr = uint64_t(*m_axi_mem_[i].araddr);
|
||||
mem_req->tag = *m_axi_mem_[b].arid;
|
||||
mem_req->addr = uint64_t(*m_axi_mem_[b].araddr);
|
||||
ram_->read(mem_req->data.data(), mem_req->addr, PLATFORM_MEMORY_DATA_SIZE);
|
||||
mem_req->write = false;
|
||||
mem_req->ready = false;
|
||||
pending_mem_reqs_[i].emplace_back(mem_req);
|
||||
pending_mem_reqs_[b].emplace_back(mem_req);
|
||||
|
||||
/*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, mem_req->tag);
|
||||
/*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, b, mem_req->addr, mem_req->tag);
|
||||
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
|
||||
printf("%02x", mem_req->data[i]);
|
||||
printf("%02x", mem_req->data[b]);
|
||||
}
|
||||
printf("\n");*/
|
||||
|
||||
// send dram request
|
||||
dram_queues_[i].push(mem_req);
|
||||
dram_queues_[b].push(mem_req);
|
||||
}
|
||||
|
||||
if (*m_axi_mem_[i].wready && !m_axi_states_[i].write_req_pending) {
|
||||
*m_axi_mem_[i].wready = 0;
|
||||
// handle write address requests
|
||||
if (*m_axi_mem_[b].awvalid && *m_axi_mem_[b].awready && !m_axi_states_[b].write_req_addr_ack) {
|
||||
m_axi_states_[b].write_req_addr = *m_axi_mem_[b].awaddr;
|
||||
m_axi_states_[b].write_req_tag = *m_axi_mem_[b].awid;
|
||||
m_axi_states_[b].write_req_addr_ack = true;
|
||||
}
|
||||
|
||||
// handle address write requestsls
|
||||
if (*m_axi_mem_[i].awvalid && *m_axi_mem_[i].awready && !*m_axi_mem_[i].wready) {
|
||||
m_axi_states_[i].write_req_addr = *m_axi_mem_[i].awaddr;
|
||||
m_axi_states_[i].write_req_tag = *m_axi_mem_[i].awid;
|
||||
// activate data channel
|
||||
*m_axi_mem_[i].wready = 1;
|
||||
m_axi_states_[i].write_req_pending = !*m_axi_mem_[i].wvalid;
|
||||
// handle write data requests
|
||||
if (*m_axi_mem_[b].wvalid && *m_axi_mem_[b].wready && !m_axi_states_[b].write_req_data_ack) {
|
||||
m_axi_states_[b].write_req_byteen = *m_axi_mem_[b].wstrb;
|
||||
auto data = (const uint8_t*)m_axi_mem_[b].wdata->data();
|
||||
for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; ++i) {
|
||||
m_axi_states_[b].write_req_data[i] = data[i];
|
||||
}
|
||||
m_axi_states_[b].write_req_data_ack = true;
|
||||
}
|
||||
|
||||
// handle data write requests
|
||||
if (*m_axi_mem_[i].wvalid && *m_axi_mem_[i].wready) {
|
||||
auto byteen = *m_axi_mem_[i].wstrb;
|
||||
auto data = (uint8_t*)m_axi_mem_[i].wdata->data();
|
||||
auto byte_addr = m_axi_states_[i].write_req_addr;
|
||||
|
||||
for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) {
|
||||
// handle write requests
|
||||
if (m_axi_states_[b].write_req_addr_ack && m_axi_states_[b].write_req_data_ack) {
|
||||
auto byteen = m_axi_states_[b].write_req_byteen;
|
||||
auto byte_addr = m_axi_states_[b].write_req_addr;
|
||||
for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; ++i) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[byte_addr + i] = data[i];
|
||||
(*ram_)[byte_addr + i] = m_axi_states_[b].write_req_data[i];
|
||||
}
|
||||
}
|
||||
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = m_axi_states_[i].write_req_tag;
|
||||
mem_req->tag = m_axi_states_[b].write_req_tag;
|
||||
mem_req->addr = byte_addr;
|
||||
mem_req->write = true;
|
||||
mem_req->ready = false;
|
||||
pending_mem_reqs_[i].emplace_back(mem_req);
|
||||
pending_mem_reqs_[b].emplace_back(mem_req);
|
||||
|
||||
/*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, byteen, mem_req->tag);
|
||||
/*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, b, mem_req->addr, byteen, mem_req->tag);
|
||||
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
|
||||
printf("%02x", data[i]);
|
||||
printf("%02x", m_axi_states_[b].write_req_data[i]]);
|
||||
}
|
||||
printf("\n");*/
|
||||
|
||||
// send dram request
|
||||
dram_queues_[i].push(mem_req);
|
||||
dram_queues_[b].push(mem_req);
|
||||
|
||||
// deactivate data channel
|
||||
if (m_axi_states_[i].write_req_pending) {
|
||||
*m_axi_mem_[i].wready = 0;
|
||||
m_axi_states_[i].write_req_pending = false;
|
||||
}
|
||||
// clear acks
|
||||
m_axi_states_[b].write_req_addr_ack = false;
|
||||
m_axi_states_[b].write_req_data_ack = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
std::array<uint8_t, PLATFORM_MEMORY_DATA_SIZE> write_req_data;
|
||||
uint64_t write_req_byteen;
|
||||
uint64_t write_req_addr;
|
||||
uint32_t write_req_tag;
|
||||
bool write_req_pending;
|
||||
bool read_rsp_ready;
|
||||
bool write_rsp_ready;
|
||||
bool write_req_addr_ack;
|
||||
bool write_req_data_ack;
|
||||
} m_axi_state_t;
|
||||
|
||||
typedef struct {
|
||||
|
|
|
@ -141,7 +141,7 @@ public:
|
|||
for (uint32_t i = 0; i < n; ++i) {
|
||||
auto ref = a[i] + b[i];
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -171,7 +171,7 @@ public:
|
|||
for (uint32_t i = 0; i < n; ++i) {
|
||||
auto ref = a[i] * b[i];
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -201,7 +201,7 @@ public:
|
|||
for (uint32_t i = 0; i < n; ++i) {
|
||||
auto ref = a[i] / b[i];
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -233,7 +233,7 @@ public:
|
|||
auto y = a[i] * b[i];
|
||||
auto ref = x + y;
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -263,7 +263,7 @@ public:
|
|||
for (uint32_t i = 0; i < n; ++i) {
|
||||
auto ref = a[i] + b[i];
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -293,7 +293,7 @@ public:
|
|||
for (uint32_t i = 0; i < n; ++i) {
|
||||
auto ref = a[i] - b[i];
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -323,7 +323,7 @@ public:
|
|||
for (uint32_t i = 0; i < n; ++i) {
|
||||
auto ref = a[i] * b[i];
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -353,7 +353,7 @@ public:
|
|||
for (uint32_t i = 0; i < n; ++i) {
|
||||
auto ref = a[i] * b[i] + b[i];
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -383,7 +383,7 @@ public:
|
|||
for (uint32_t i = 0; i < n; ++i) {
|
||||
auto ref = a[i] * b[i] - b[i];
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -413,7 +413,7 @@ public:
|
|||
for (uint32_t i = 0; i < n; ++i) {
|
||||
auto ref = -a[i] * b[i] - b[i];
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -443,7 +443,7 @@ public:
|
|||
for (uint32_t i = 0; i < n; ++i) {
|
||||
auto ref = -a[i] * b[i] + b[i];
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -475,7 +475,7 @@ public:
|
|||
auto y = a[i] * b[i] + b[i];
|
||||
auto ref = x + y;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -505,7 +505,7 @@ public:
|
|||
for (uint32_t i = 0; i < n; ++i) {
|
||||
auto ref = a[i] / b[i];
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -537,7 +537,7 @@ public:
|
|||
auto y = b[i] / a[i];
|
||||
auto ref = x + y;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -568,7 +568,7 @@ public:
|
|||
for (uint32_t i = 0; i < n; ++i) {
|
||||
auto ref = sqrt(a[i] * b[i]);
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -600,7 +600,7 @@ public:
|
|||
auto x = a[i] + b[i];
|
||||
auto ref = (int32_t)x;
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -632,7 +632,7 @@ public:
|
|||
auto x = a[i] + b[i];
|
||||
auto ref = (uint32_t)x;
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -663,7 +663,7 @@ public:
|
|||
auto x = a[i] + b[i];
|
||||
auto ref = (float)x;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -694,7 +694,7 @@ public:
|
|||
auto x = a[i] + b[i];
|
||||
auto ref = (float)x;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -724,7 +724,7 @@ public:
|
|||
for (uint32_t i = 0; i < n; ++i) {
|
||||
auto ref = fmin(fmax(1.0f, a[i]), b[i]);
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -754,7 +754,7 @@ public:
|
|||
for (uint32_t i = 0; i < n; ++i) {
|
||||
auto ref = std::min(std::max(1, a[i]), b[i]);
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -787,7 +787,7 @@ public:
|
|||
ref = sinf(ref);
|
||||
}
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -820,7 +820,7 @@ public:
|
|||
for (uint32_t i = 0; i < n; ++i) {
|
||||
uint32_t ref = a[i] + 1;
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at result #" << i << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -857,7 +857,7 @@ public:
|
|||
for (uint32_t i = 0; i < n; ++i) {
|
||||
uint32_t ref = a[i] + 1;
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at result #" << i << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl;
|
||||
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue