Merge branch 'rtl_cache'
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions

This commit is contained in:
tinebp 2024-11-13 22:27:11 -08:00
commit 5844de8c4d
81 changed files with 3085 additions and 2381 deletions

View file

@ -163,8 +163,9 @@ cache()
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test cache ways
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
CONFIGS="-DICACHE_NUM_WAYS=1 -DDCACHE_NUM_WAYS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test cache banking
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
@ -174,11 +175,17 @@ cache()
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
# replacement policy
CONFIGS="-DDCACHE_REPL_POLICY=0" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_REPL_POLICY=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_REPL_POLICY=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
# test writeback
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --app=mstress
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --app=mstress
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=0 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=simx --app=mstress
CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
# cache clustering
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=4 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=4 --warps=1 --threads=2

8
configure vendored
View file

@ -65,7 +65,7 @@ copy_files() {
filename_no_ext="${filename%.in}"
dest_file="$dest_dir/$filename_no_ext"
mkdir -p "$dest_dir"
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@CURRENTDIR@|$CURRENT_DIR|g" "$file" > "$dest_file"
sed "s|@VORTEX_HOME@|$SOURCE_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@BUILDDIR@|$CURRENT_DIR|g" "$file" > "$dest_file"
# apply permissions to bash scripts
read -r firstline < "$dest_file"
if [[ "$firstline" =~ ^#!.*bash ]]; then
@ -169,8 +169,8 @@ fi
SUBDIRS=("." "!ci" "!perf" "hw*" "kernel*" "runtime*" "sim*" "tests*")
# Get the directory of the script
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
SOURCE_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
THIRD_PARTY_DIR=$SCRIPT_DIR/third_party
THIRD_PARTY_DIR=$SOURCE_DIR/third_party
copy_files "$SCRIPT_DIR" "$CURRENT_DIR"
copy_files "$SOURCE_DIR" "$CURRENT_DIR"

View file

@ -67,7 +67,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
);
VX_gbar_unit #(
.INSTANCE_ID ($sformatf("gbar%0d", CLUSTER_ID))
.INSTANCE_ID (`SFORMATF(("gbar%0d", CLUSTER_ID)))
) gbar_unit (
.clk (clk),
.reset (reset),
@ -84,7 +84,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
`RESET_RELAY (l2_reset, reset);
VX_cache_wrap #(
.INSTANCE_ID ($sformatf("%s-l2cache", INSTANCE_ID)),
.INSTANCE_ID (`SFORMATF(("%s-l2cache", INSTANCE_ID))),
.CACHE_SIZE (`L2_CACHE_SIZE),
.LINE_SIZE (`L2_LINE_SIZE),
.NUM_BANKS (`L2_NUM_BANKS),
@ -98,8 +98,10 @@ module VX_cluster import VX_gpu_pkg::*; #(
.TAG_WIDTH (L2_TAG_WIDTH),
.WRITE_ENABLE (1),
.WRITEBACK (`L2_WRITEBACK),
.DIRTY_BYTES (`L2_WRITEBACK),
.DIRTY_BYTES (`L2_DIRTYBYTES),
.REPL_POLICY (`L2_REPL_POLICY),
.UUID_WIDTH (`UUID_WIDTH),
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
.CORE_OUT_BUF (3),
.MEM_OUT_BUF (3),
.NC_ENABLE (1),
@ -129,7 +131,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
VX_socket #(
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id),
.INSTANCE_ID ($sformatf("%s-socket%0d", INSTANCE_ID, socket_id))
.INSTANCE_ID (`SFORMATF(("%s-socket%0d", INSTANCE_ID, socket_id)))
) socket (
`SCOPE_IO_BIND (scope_socket+socket_id)
@ -152,6 +154,6 @@ module VX_cluster import VX_gpu_pkg::*; #(
);
end
`BUFFER_EX(busy, (| per_socket_busy), 1'b1, (`NUM_SOCKETS > 1));
`BUFFER_EX(busy, (| per_socket_busy), 1'b1, 1, (`NUM_SOCKETS > 1));
endmodule

View file

@ -170,6 +170,10 @@
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
`endif
`ifndef MEMORY_BANKS
`define MEMORY_BANKS 2
`endif
`ifdef XLEN_64
`ifndef STACK_BASE_ADDR
@ -566,7 +570,12 @@
// Number of Associative Ways
`ifndef ICACHE_NUM_WAYS
`define ICACHE_NUM_WAYS 1
`define ICACHE_NUM_WAYS 4
`endif
// Replacement Policy
`ifndef ICACHE_REPL_POLICY
`define ICACHE_REPL_POLICY 1
`endif
// Dcache Configurable Knobs //////////////////////////////////////////////////
@ -615,12 +624,12 @@
// Memory Response Queue Size
`ifndef DCACHE_MRSQ_SIZE
`define DCACHE_MRSQ_SIZE 0
`define DCACHE_MRSQ_SIZE 4
`endif
// Number of Associative Ways
`ifndef DCACHE_NUM_WAYS
`define DCACHE_NUM_WAYS 1
`define DCACHE_NUM_WAYS 4
`endif
// Enable Cache Writeback
@ -628,6 +637,16 @@
`define DCACHE_WRITEBACK 0
`endif
// Enable Cache Dirty bytes
`ifndef DCACHE_DIRTYBYTES
`define DCACHE_DIRTYBYTES `DCACHE_WRITEBACK
`endif
// Replacement Policy
`ifndef DCACHE_REPL_POLICY
`define DCACHE_REPL_POLICY 1
`endif
// LMEM Configurable Knobs ////////////////////////////////////////////////////
`ifndef LMEM_DISABLE
@ -650,12 +669,8 @@
// Cache Size
`ifndef L2_CACHE_SIZE
`ifdef ALTERA_S10
`define L2_CACHE_SIZE 2097152
`else
`define L2_CACHE_SIZE 1048576
`endif
`endif
// Number of Banks
`ifndef L2_NUM_BANKS
@ -679,12 +694,12 @@
// Memory Response Queue Size
`ifndef L2_MRSQ_SIZE
`define L2_MRSQ_SIZE 0
`define L2_MRSQ_SIZE 4
`endif
// Number of Associative Ways
`ifndef L2_NUM_WAYS
`define L2_NUM_WAYS 2
`define L2_NUM_WAYS 8
`endif
// Enable Cache Writeback
@ -692,15 +707,21 @@
`define L2_WRITEBACK 0
`endif
// Enable Cache Dirty bytes
`ifndef L2_DIRTYBYTES
`define L2_DIRTYBYTES `L2_WRITEBACK
`endif
// Replacement Policy
`ifndef L2_REPL_POLICY
`define L2_REPL_POLICY 1
`endif
// L3cache Configurable Knobs /////////////////////////////////////////////////
// Cache Size
`ifndef L3_CACHE_SIZE
`ifdef ALTERA_S10
`define L3_CACHE_SIZE 2097152
`else
`define L3_CACHE_SIZE 1048576
`endif
`endif
// Number of Banks
@ -725,12 +746,12 @@
// Memory Response Queue Size
`ifndef L3_MRSQ_SIZE
`define L3_MRSQ_SIZE 0
`define L3_MRSQ_SIZE 4
`endif
// Number of Associative Ways
`ifndef L3_NUM_WAYS
`define L3_NUM_WAYS 4
`define L3_NUM_WAYS 8
`endif
// Enable Cache Writeback
@ -738,8 +759,14 @@
`define L3_WRITEBACK 0
`endif
`ifndef MEMORY_BANKS
`define MEMORY_BANKS 2
// Enable Cache Dirty bytes
`ifndef L3_DIRTYBYTES
`define L3_DIRTYBYTES `L3_WRITEBACK
`endif
// Replacement Policy
`ifndef L3_REPL_POLICY
`define L3_REPL_POLICY 1
`endif
// Number of Memory Ports from LLC

View file

@ -335,10 +335,10 @@
.data_out (dst) \
)
`define BUFFER_EX(dst, src, ena, latency) \
`define BUFFER_EX(dst, src, ena, RSTW, latency) \
VX_pipe_register #( \
.DATAW ($bits(dst)), \
.RESETW ($bits(dst)), \
.RESETW (RSTW), \
.DEPTH (latency) \
) __``dst``__ ( \
.clk (clk), \
@ -348,7 +348,7 @@
.data_out (dst) \
)
`define BUFFER(dst, src) `BUFFER_EX(dst, src, 1'b1, 1)
`define BUFFER(dst, src) `BUFFER_EX(dst, src, 1'b1, 0, 1)
`define POP_COUNT_EX(out, in, model) \
VX_popcount #( \

View file

@ -37,16 +37,13 @@ endgenerate
`define ASSERT(cond, msg) \
assert(cond) else $error msg
`define RUNTIME_ASSERT(cond, msg) \
always @(posedge clk) begin \
assert(cond) else $error msg; \
`define RUNTIME_ASSERT(cond, msg) \
always @(posedge clk) begin \
if (!reset) begin \
`ASSERT(cond, msg); \
end \
end
`define __SCOPE
`define __SCOPE_X
`define __SCOPE_ON
`define __SCOPE_OFF
`ifndef TRACING_ALL
`define TRACING_ON /* verilator tracing_on */
`define TRACING_OFF /* verilator tracing_off */
@ -128,6 +125,8 @@ endgenerate
end
`endif
`define SFORMATF(x) $sformatf x
`else // SYNTHESIS
`define STATIC_ASSERT(cond, msg)
@ -137,6 +136,7 @@ endgenerate
`define DEBUG_BLOCK(x)
`define TRACE(level, args)
`define SFORMATF(x) ""
`define TRACING_ON
`define TRACING_OFF
@ -153,45 +153,39 @@ endgenerate
`define UNUSED_PIN(x) . x ()
`define UNUSED_ARG(x) x
`define __SCOPE (* mark_debug="true" *)
`define __SCOPE_X
`define __SCOPE_ON \
`undef __SCOPE_X \
`define __SCOPE_X `__SCOPE
`define __SCOPE_OFF \
`undef __SCOPE_X \
`define __SCOPE_X
`endif
///////////////////////////////////////////////////////////////////////////////
`ifdef QUARTUS
`define MAX_FANOUT 8
`define IF_DATA_SIZE(x) $bits(x.data)
`define MAX_LUTRAM 1024
`define USE_BLOCK_BRAM (* ramstyle = "block" *)
`define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *)
`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *)
`define DISABLE_BRAM (* ramstyle = "logic" *)
`define PRESERVE_NET (* preserve *)
`define BLACKBOX_CELL (* black_box *)
`define STRING string
`elsif VIVADO
`define MAX_FANOUT 8
`define IF_DATA_SIZE(x) $bits(x.data)
`define MAX_LUTRAM 1024
`define USE_BLOCK_BRAM (* ram_style = "block" *)
`define USE_FAST_BRAM (* ram_style = "distributed" *)
`define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *)
`define DISABLE_BRAM (* ram_style = "registers" *)
`define PRESERVE_NET (* keep = "true" *)
`define BLACKBOX_CELL (* black_box *)
`define STRING
`else
`define MAX_FANOUT 8
`define IF_DATA_SIZE(x) x.DATA_WIDTH
`define MAX_LUTRAM 1024
`define USE_BLOCK_BRAM
`define USE_FAST_BRAM
`define NO_RW_RAM_CHECK
`define DISABLE_BRAM
`define PRESERVE_NET
`define BLACKBOX_CELL
`define STRING string
`endif
@ -217,7 +211,7 @@ endgenerate
`define CLAMP(x, lo, hi) (((x) > (hi)) ? (hi) : (((x) < (lo)) ? (lo) : (x)))
`define UP(x) (((x) != 0) ? (x) : 1)
`define UP(x) (((x) > 0) ? (x) : 1)
`define CDIV(n,d) ((n + d - 1) / (d))

View file

@ -85,7 +85,7 @@ module VX_socket import VX_gpu_pkg::*; #(
`RESET_RELAY (icache_reset, reset);
VX_cache_cluster #(
.INSTANCE_ID ($sformatf("%s-icache", INSTANCE_ID)),
.INSTANCE_ID (`SFORMATF(("%s-icache", INSTANCE_ID))),
.NUM_UNITS (`NUM_ICACHES),
.NUM_INPUTS (`SOCKET_SIZE),
.TAG_SEL_IDX (0),
@ -100,8 +100,10 @@ module VX_socket import VX_gpu_pkg::*; #(
.MRSQ_SIZE (`ICACHE_MRSQ_SIZE),
.MREQ_SIZE (`ICACHE_MREQ_SIZE),
.TAG_WIDTH (ICACHE_TAG_WIDTH),
.FLAGS_WIDTH (0),
.UUID_WIDTH (`UUID_WIDTH),
.WRITE_ENABLE (0),
.REPL_POLICY (`ICACHE_REPL_POLICY),
.NC_ENABLE (0),
.CORE_OUT_BUF (3),
.MEM_OUT_BUF (2)
@ -130,7 +132,7 @@ module VX_socket import VX_gpu_pkg::*; #(
`RESET_RELAY (dcache_reset, reset);
VX_cache_cluster #(
.INSTANCE_ID ($sformatf("%s-dcache", INSTANCE_ID)),
.INSTANCE_ID (`SFORMATF(("%s-dcache", INSTANCE_ID))),
.NUM_UNITS (`NUM_DCACHES),
.NUM_INPUTS (`SOCKET_SIZE),
.TAG_SEL_IDX (0),
@ -146,9 +148,11 @@ module VX_socket import VX_gpu_pkg::*; #(
.MREQ_SIZE (`DCACHE_WRITEBACK ? `DCACHE_MSHR_SIZE : `DCACHE_MREQ_SIZE),
.TAG_WIDTH (DCACHE_TAG_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
.WRITE_ENABLE (1),
.WRITEBACK (`DCACHE_WRITEBACK),
.DIRTY_BYTES (`DCACHE_WRITEBACK),
.DIRTY_BYTES (`DCACHE_DIRTYBYTES),
.REPL_POLICY (`DCACHE_REPL_POLICY),
.NC_ENABLE (1),
.CORE_OUT_BUF (3),
.MEM_OUT_BUF (2)
@ -208,7 +212,7 @@ module VX_socket import VX_gpu_pkg::*; #(
VX_core #(
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id),
.INSTANCE_ID ($sformatf("%s-core%0d", INSTANCE_ID, core_id))
.INSTANCE_ID (`SFORMATF(("%s-core%0d", INSTANCE_ID, core_id)))
) core (
`SCOPE_IO_BIND (scope_core + core_id)
@ -233,6 +237,6 @@ module VX_socket import VX_gpu_pkg::*; #(
);
end
`BUFFER_EX(busy, (| per_core_busy), 1'b1, (`SOCKET_SIZE > 1));
`BUFFER_EX(busy, (| per_core_busy), 1'b1, 1, (`SOCKET_SIZE > 1));
endmodule

View file

@ -84,8 +84,10 @@ module Vortex import VX_gpu_pkg::*; (
.TAG_WIDTH (L2_MEM_TAG_WIDTH),
.WRITE_ENABLE (1),
.WRITEBACK (`L3_WRITEBACK),
.DIRTY_BYTES (`L3_WRITEBACK),
.DIRTY_BYTES (`L3_DIRTYBYTES),
.REPL_POLICY (`L3_REPL_POLICY),
.UUID_WIDTH (`UUID_WIDTH),
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
.CORE_OUT_BUF (3),
.MEM_OUT_BUF (3),
.NC_ENABLE (1),
@ -138,7 +140,7 @@ module Vortex import VX_gpu_pkg::*; (
VX_cluster #(
.CLUSTER_ID (cluster_id),
.INSTANCE_ID ($sformatf("cluster%0d", cluster_id))
.INSTANCE_ID (`SFORMATF(("cluster%0d", cluster_id)))
) cluster (
`SCOPE_IO_BIND (scope_cluster + cluster_id)
@ -157,7 +159,7 @@ module Vortex import VX_gpu_pkg::*; (
);
end
`BUFFER_EX(busy, (| per_cluster_busy), 1'b1, (`NUM_CLUSTERS > 1));
`BUFFER_EX(busy, (| per_cluster_busy), 1'b1, 1, (`NUM_CLUSTERS > 1));
`ifdef PERF_ENABLE
@ -202,13 +204,13 @@ module Vortex import VX_gpu_pkg::*; (
always @(posedge clk) begin
if (mem_req_fire) begin
if (mem_req_rw) begin
`TRACE(1, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data, mem_req_uuid))
`TRACE(2, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data, mem_req_uuid))
end else begin
`TRACE(1, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_uuid))
`TRACE(2, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_uuid))
end
end
if (mem_rsp_fire) begin
`TRACE(1, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, mem_rsp_tag, mem_rsp_data, mem_rsp_uuid))
`TRACE(2, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, mem_rsp_tag, mem_rsp_data, mem_rsp_uuid))
end
end
`endif

View file

@ -968,7 +968,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
wire [COUT_TID_WIDTH-1:0] cout_tid;
VX_encoder #(
VX_onehot_encoder #(
.N (`VX_MEM_BYTEEN_WIDTH)
) cout_tid_enc (
.data_in (vx_mem_req_byteen),

View file

@ -373,7 +373,9 @@ module VX_afu_wrap #(
`SCOPE_IO_UNUSED(0)
`endif
`endif
`ifdef CHIPSCOPE
`ifdef DBG_SCOPE_AFU
ila_afu ila_afu_inst (
.clk (clk),
.probe0 ({
@ -394,6 +396,7 @@ module VX_afu_wrap #(
})
);
`endif
`endif
`ifdef SIMULATION
`ifndef VERILATOR

View file

@ -33,7 +33,7 @@ module VX_bank_flush #(
output wire flush_init,
output wire flush_valid,
output wire [`CS_LINE_SEL_BITS-1:0] flush_line,
output wire [NUM_WAYS-1:0] flush_way,
output wire [`CS_WAY_SEL_WIDTH-1:0] flush_way,
input wire flush_ready,
input wire mshr_empty,
input wire bank_empty
@ -48,20 +48,21 @@ module VX_bank_flush #(
localparam STATE_WAIT2 = 4;
localparam STATE_DONE = 5;
reg [2:0] state_r, state_n;
reg [2:0] state, state_n;
reg [CTR_WIDTH-1:0] counter_r;
reg [CTR_WIDTH-1:0] counter;
always @(*) begin
state_n = state_r;
case (state_r)
STATE_IDLE: begin
state_n = state;
case (state)
//STATE_IDLE:
default : begin
if (flush_begin) begin
state_n = STATE_WAIT1;
end
end
STATE_INIT: begin
if (counter_r == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
if (counter == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
state_n = STATE_IDLE;
end
end
@ -72,7 +73,7 @@ module VX_bank_flush #(
end
end
STATE_FLUSH: begin
if (counter_r == ((2 ** CTR_WIDTH)-1) && flush_ready) begin
if (counter == ((2 ** CTR_WIDTH)-1) && flush_ready) begin
state_n = (BANK_ID == 0) ? STATE_DONE : STATE_WAIT2;
end
end
@ -93,37 +94,30 @@ module VX_bank_flush #(
always @(posedge clk) begin
if (reset) begin
state_r <= STATE_INIT;
counter_r <= '0;
state <= STATE_INIT;
counter <= '0;
end else begin
state_r <= state_n;
if (state_r != STATE_IDLE) begin
if ((state_r == STATE_INIT)
|| ((state_r == STATE_FLUSH) && flush_ready)) begin
counter_r <= counter_r + CTR_WIDTH'(1);
state <= state_n;
if (state != STATE_IDLE) begin
if ((state == STATE_INIT)
|| ((state == STATE_FLUSH) && flush_ready)) begin
counter <= counter + CTR_WIDTH'(1);
end
end else begin
counter_r <= '0;
counter <= '0;
end
end
end
assign flush_end = (state_r == STATE_DONE);
assign flush_init = (state_r == STATE_INIT);
assign flush_valid = (state_r == STATE_FLUSH);
assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0];
assign flush_end = (state == STATE_DONE);
assign flush_init = (state == STATE_INIT);
assign flush_valid = (state == STATE_FLUSH);
assign flush_line = counter[`CS_LINE_SEL_BITS-1:0];
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way
VX_decoder #(
.N (`CS_WAY_SEL_BITS),
.D (NUM_WAYS)
) ctr_decoder (
.data_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]),
.valid_in (1'b1),
.data_out (flush_way)
);
if (WRITEBACK && (NUM_WAYS > 1)) begin : g_flush_way
assign flush_way = counter[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS];
end else begin : g_flush_way_all
assign flush_way = {NUM_WAYS{1'b1}};
assign flush_way = '0;
end
endmodule

View file

@ -20,22 +20,22 @@ module VX_cache import VX_gpu_pkg::*; #(
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 4096,
parameter CACHE_SIZE = 32768,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 64,
// Number of banks
parameter NUM_BANKS = 1,
parameter NUM_BANKS = 4,
// Number of associative ways
parameter NUM_WAYS = 1,
parameter NUM_WAYS = 4,
// Size of a word in bytes
parameter WORD_SIZE = `XLEN/8,
parameter WORD_SIZE = 16,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
parameter CRSQ_SIZE = 4,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 8,
parameter MSHR_SIZE = 16,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
parameter MRSQ_SIZE = 4,
// Memory Request Queue Size
parameter MREQ_SIZE = 4,
@ -48,17 +48,23 @@ module VX_cache import VX_gpu_pkg::*; #(
// Enable dirty bytes on writeback
parameter DIRTY_BYTES = 0,
// Replacement policy
parameter REPL_POLICY = `CS_REPL_CYCLIC,
// Request debug identifier
parameter UUID_WIDTH = 0,
// core request tag size
parameter TAG_WIDTH = UUID_WIDTH + 1,
// core request flags
parameter FLAGS_WIDTH = 0,
// Core response output register
parameter CORE_OUT_BUF = 0,
parameter CORE_OUT_BUF = 3,
// Memory request output register
parameter MEM_OUT_BUF = 0
parameter MEM_OUT_BUF = 3
) (
// PERF
`ifdef PERF_ENABLE
@ -76,10 +82,6 @@ module VX_cache import VX_gpu_pkg::*; #(
`STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter: writeback requires write enable"))
`STATIC_ASSERT(WRITEBACK || !DIRTY_BYTES, ("invalid parameter: dirty bytes require writeback"))
// In writeback mode, memory fill response may issue a new memory request to handle evicted blocks.
// We need to ensure that the memory request queue never fills up to avoid deadlock.
`STATIC_ASSERT(!WRITEBACK || (MREQ_SIZE >= MSHR_SIZE), ("invalid parameter: writeback requires MREQ_SIZE >= MSHR_SIZE"))
localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS);
localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS);
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
@ -90,7 +92,7 @@ module VX_cache import VX_gpu_pkg::*; #(
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS);
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1;
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + `UP(FLAGS_WIDTH);
localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
localparam BANK_MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH;
@ -206,13 +208,13 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [LINE_SIZE-1:0] mem_req_byteen;
wire [`CS_LINE_WIDTH-1:0] mem_req_data;
wire [MEM_TAG_WIDTH-1:0] mem_req_tag;
wire mem_req_flush;
wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flags;
wire mem_req_ready;
wire mem_req_flush_b;
wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flush_b;
VX_elastic_buffer #(
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)),
.SIZE (MEM_REQ_REG_DISABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
) mem_req_buf (
@ -220,13 +222,18 @@ module VX_cache import VX_gpu_pkg::*; #(
.reset (reset),
.valid_in (mem_req_valid),
.ready_in (mem_req_ready),
.data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flush}),
.data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flags}),
.data_out ({mem_bus_tmp_if.req_data.rw, mem_bus_tmp_if.req_data.byteen, mem_bus_tmp_if.req_data.addr, mem_bus_tmp_if.req_data.data, mem_bus_tmp_if.req_data.tag, mem_req_flush_b}),
.valid_out (mem_bus_tmp_if.req_valid),
.ready_out (mem_bus_tmp_if.req_ready)
);
assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0;
if (FLAGS_WIDTH != 0) begin : g_mem_req_flags
assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b;
end else begin : g_no_mem_req_flags
assign mem_bus_tmp_if.req_data.flags = '0;
`UNUSED_VAR (mem_req_flush_b)
end
if (WRITE_ENABLE) begin : g_mem_bus_if
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if);
@ -244,7 +251,7 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx;
wire [NUM_BANKS-1:0] per_bank_core_req_flush;
wire [NUM_BANKS-1:0][`UP(FLAGS_WIDTH)-1:0] per_bank_core_req_flags;
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
@ -259,7 +266,7 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen;
wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data;
wire [NUM_BANKS-1:0][BANK_MEM_TAG_WIDTH-1:0] per_bank_mem_req_tag;
wire [NUM_BANKS-1:0] per_bank_mem_req_flush;
wire [NUM_BANKS-1:0][`UP(FLAGS_WIDTH)-1:0] per_bank_mem_req_flags;
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
@ -276,7 +283,7 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
wire [NUM_REQS-1:0] core_req_flush;
wire [NUM_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] core_req_flags;
wire [NUM_REQS-1:0] core_req_ready;
wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr;
@ -293,7 +300,7 @@ module VX_cache import VX_gpu_pkg::*; #(
assign core_req_addr[i] = core_bus2_if[i].req_data.addr;
assign core_req_data[i] = core_bus2_if[i].req_data.data;
assign core_req_tag[i] = core_bus2_if[i].req_data.tag;
assign core_req_flush[i] = core_bus2_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH];
assign core_req_flags[i] = `UP(FLAGS_WIDTH)'(core_bus2_if[i].req_data.flags);
assign core_bus2_if[i].req_ready = core_req_ready[i];
end
@ -325,7 +332,7 @@ module VX_cache import VX_gpu_pkg::*; #(
core_req_byteen[i],
core_req_data[i],
core_req_tag[i],
core_req_flush[i]
core_req_flags[i]
};
end
@ -366,7 +373,7 @@ module VX_cache import VX_gpu_pkg::*; #(
per_bank_core_req_byteen[i],
per_bank_core_req_data[i],
per_bank_core_req_tag[i],
per_bank_core_req_flush[i]
per_bank_core_req_flags[i]
} = core_req_data_out[i];
end
@ -378,23 +385,25 @@ module VX_cache import VX_gpu_pkg::*; #(
VX_cache_bank #(
.BANK_ID (bank_id),
.INSTANCE_ID ($sformatf("%s-bank%0d", INSTANCE_ID, bank_id)),
.INSTANCE_ID (`SFORMATF(("%s-bank%0d", INSTANCE_ID, bank_id))),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK),
.DIRTY_BYTES (DIRTY_BYTES),
.REPL_POLICY (REPL_POLICY),
.CRSQ_SIZE (CRSQ_SIZE),
.MSHR_SIZE (MSHR_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.DIRTY_BYTES (DIRTY_BYTES),
.WRITEBACK (WRITEBACK),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(CORE_OUT_BUF)),
.MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF))
.FLAGS_WIDTH (FLAGS_WIDTH),
.CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : 1),
.MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : 1)
) bank (
.clk (clk),
.reset (reset),
@ -414,7 +423,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.core_req_data (per_bank_core_req_data[bank_id]),
.core_req_tag (per_bank_core_req_tag[bank_id]),
.core_req_idx (per_bank_core_req_idx[bank_id]),
.core_req_flush (per_bank_core_req_flush[bank_id]),
.core_req_flags (per_bank_core_req_flags[bank_id]),
.core_req_ready (per_bank_core_req_ready[bank_id]),
// Core response
@ -431,7 +440,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.mem_req_byteen (per_bank_mem_req_byteen[bank_id]),
.mem_req_data (per_bank_mem_req_data[bank_id]),
.mem_req_tag (per_bank_mem_req_tag[bank_id]),
.mem_req_flush (per_bank_mem_req_flush[bank_id]),
.mem_req_flags (per_bank_mem_req_flags[bank_id]),
.mem_req_ready (per_bank_mem_req_ready[bank_id]),
// Memory response
@ -487,7 +496,7 @@ module VX_cache import VX_gpu_pkg::*; #(
// Memory request arbitration
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1)-1:0] data_in;
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + `UP(FLAGS_WIDTH))-1:0] data_in;
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_data_in
assign data_in[i] = {
@ -496,7 +505,7 @@ module VX_cache import VX_gpu_pkg::*; #(
per_bank_mem_req_byteen[i],
per_bank_mem_req_data[i],
per_bank_mem_req_tag[i],
per_bank_mem_req_flush[i]
per_bank_mem_req_flags[i]
};
end
@ -504,7 +513,7 @@ module VX_cache import VX_gpu_pkg::*; #(
VX_stream_arb #(
.NUM_INPUTS (NUM_BANKS),
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1),
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)),
.ARBITER ("R")
) mem_req_arb (
.clk (clk),
@ -512,7 +521,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.valid_in (per_bank_mem_req_valid),
.ready_in (per_bank_mem_req_ready),
.data_in (data_in),
.data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flush}),
.data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flags}),
.valid_out (mem_req_valid),
.ready_out (mem_req_ready),
`UNUSED_PIN (sel_out)

View file

@ -47,12 +47,18 @@ module VX_cache_bank #(
// Enable dirty bytes on writeback
parameter DIRTY_BYTES = 0,
// Replacement policy
parameter REPL_POLICY = `CS_REPL_CYCLIC,
// Request debug identifier
parameter UUID_WIDTH = 0,
// core request tag size
parameter TAG_WIDTH = UUID_WIDTH + 1,
// core request flags
parameter FLAGS_WIDTH = 0,
// Core response output register
parameter CORE_OUT_REG = 0,
@ -82,7 +88,7 @@ module VX_cache_bank #(
input wire [`CS_WORD_WIDTH-1:0] core_req_data, // data to be written
input wire [TAG_WIDTH-1:0] core_req_tag, // identifier of the request (request id)
input wire [REQ_SEL_WIDTH-1:0] core_req_idx, // index of the request in the core request array
input wire core_req_flush, // flush enable
input wire [`UP(FLAGS_WIDTH)-1:0] core_req_flags,
output wire core_req_ready,
// Core Response
@ -99,7 +105,7 @@ module VX_cache_bank #(
output wire [LINE_SIZE-1:0] mem_req_byteen,
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
output wire mem_req_flush,
output wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flags,
input wire mem_req_ready,
// Memory response
@ -138,43 +144,45 @@ module VX_cache_bank #(
wire [MSHR_ADDR_WIDTH-1:0] replay_id;
wire replay_ready;
wire is_init_st0, is_init_st1;
wire valid_sel, valid_st0, valid_st1;
wire is_init_st0;
wire is_creq_st0, is_creq_st1;
wire is_fill_st0, is_fill_st1;
wire is_flush_st0, is_flush_st1;
wire [NUM_WAYS-1:0] flush_way_st0;
wire [`CS_WAY_SEL_WIDTH-1:0] flush_way_st0, evict_way_st0;
wire [`CS_WAY_SEL_WIDTH-1:0] way_idx_st0, way_idx_st1;
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
wire [`CS_LINE_SEL_BITS-1:0] line_sel_st0, line_sel_st1;
wire [`CS_LINE_SEL_BITS-1:0] line_idx_st0, line_idx_st1;
wire [`CS_TAG_SEL_BITS-1:0] line_tag_st0, line_tag_st1;
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0, evict_tag_st1;
wire rw_sel, rw_st0, rw_st1;
wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1;
wire [WORD_SEL_WIDTH-1:0] word_idx_sel, word_idx_st0, word_idx_st1;
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1;
wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1;
wire [`CS_WORD_WIDTH-1:0] read_data_st1;
wire [`CS_WORD_WIDTH-1:0] write_word_st0, write_word_st1;
wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1;
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0, mshr_id_st0, mshr_id_st1;
wire valid_sel, valid_st0, valid_st1;
wire is_creq_st0, is_creq_st1;
wire is_fill_st0, is_fill_st1;
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1;
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0;
wire is_dirty_st0, is_dirty_st1;
wire is_replay_st0, is_replay_st1;
wire creq_flush_sel, creq_flush_st0, creq_flush_st1;
wire evict_dirty_st0, evict_dirty_st1;
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
wire [NUM_WAYS-1:0] tag_matches_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1;
wire is_hit_st0, is_hit_st1;
wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1;
wire mshr_pending_st0, mshr_pending_st1;
wire [MSHR_ADDR_WIDTH-1:0] mshr_previd_st0, mshr_previd_st1;
wire mshr_empty;
wire flush_valid;
wire init_valid;
wire [`CS_LINE_SEL_BITS-1:0] flush_sel;
wire [NUM_WAYS-1:0] flush_way;
wire [`CS_WAY_SEL_WIDTH-1:0] flush_way;
wire flush_ready;
// ensure we have no pending memory request in the bank
wire no_pending_req = ~valid_st0 && ~valid_st1 && mreq_queue_empty;
// flush unit
VX_bank_flush #(
.BANK_ID (BANK_ID),
.CACHE_SIZE (CACHE_SIZE),
@ -196,11 +204,7 @@ module VX_cache_bank #(
.bank_empty (no_pending_req)
);
wire rdw_hazard1_sel;
wire rdw_hazard2_sel;
reg rdw_hazard3_st1;
wire pipe_stall = crsp_queue_stall || rdw_hazard3_st1;
wire pipe_stall = crsp_queue_stall;
// inputs arbitration:
// mshr replay has highest priority to maximize utilization since there is no miss.
@ -219,28 +223,26 @@ module VX_cache_bank #(
wire creq_enable = creq_grant && core_req_valid;
assign replay_ready = replay_grant
&& ~rdw_hazard1_sel
&& ~(!WRITEBACK && replay_rw && mreq_queue_alm_full) // needed for writethrough
&& ~pipe_stall;
assign mem_rsp_ready = fill_grant
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
&& ~rdw_hazard2_sel
&& ~(WRITEBACK && mreq_queue_alm_full) // needed for writeback
&& ~pipe_stall;
assign flush_ready = flush_grant
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
&& ~rdw_hazard2_sel
&& ~(WRITEBACK && mreq_queue_alm_full) // needed for writeback
&& ~pipe_stall;
assign core_req_ready = creq_grant
&& ~mreq_queue_alm_full
&& ~mshr_alm_full
&& ~mreq_queue_alm_full // needed for fill requests
&& ~mshr_alm_full // needed for mshr allocation
&& ~pipe_stall;
wire init_fire = init_valid;
wire replay_fire = replay_valid && replay_ready;
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
wire flush_fire = flush_valid && flush_ready;
wire flush_fire = flush_valid && flush_ready;
wire core_req_fire = core_req_valid && core_req_ready;
wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id = mem_rsp_tag[MSHR_ADDR_WIDTH-1:0];
@ -264,14 +266,13 @@ module VX_cache_bank #(
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
assign rw_sel = replay_valid ? replay_rw : core_req_rw;
assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen;
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) :
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
assign word_idx_sel= replay_valid ? replay_wsel : core_req_wsel;
assign req_idx_sel = replay_valid ? replay_idx : core_req_idx;
assign tag_sel = (init_valid | flush_valid) ? (flush_valid ? flush_tag : '0) :
(replay_valid ? replay_tag : (mem_rsp_valid ? mem_rsp_tag_s : core_req_tag));
assign creq_flush_sel = core_req_valid && core_req_flush;
assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) :
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
assign flags_sel = core_req_valid ? core_req_flags : '0;
if (WRITE_ENABLE) begin : g_data_sel
for (genvar i = 0; i < `CS_LINE_WIDTH; ++i) begin : g_i
@ -293,15 +294,21 @@ module VX_cache_bank #(
assign req_uuid_sel = '0;
end
wire is_init_sel = init_valid;
wire is_creq_sel = creq_enable || replay_enable;
wire is_fill_sel = fill_enable;
wire is_flush_sel = flush_enable;
wire is_replay_sel = replay_enable;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_WAY_SEL_WIDTH + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (~pipe_stall),
.data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, creq_flush_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
.data_in ({valid_sel, is_init_sel, is_fill_sel, is_flush_sel, is_creq_sel, is_replay_sel, flags_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, word_idx_sel, req_idx_sel, tag_sel, replay_id}),
.data_out ({valid_st0, is_init_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, flags_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, replay_id_st0})
);
if (UUID_WIDTH != 0) begin : g_req_uuid_st0
@ -310,147 +317,121 @@ module VX_cache_bank #(
assign req_uuid_st0 = '0;
end
wire do_init_st0 = valid_st0 && is_init_st0;
wire do_flush_st0 = valid_st0 && is_flush_st0;
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
wire do_creq_wr_st0 = valid_st0 && is_creq_st0 && rw_st0;
wire do_replay_rd_st0 = valid_st0 && is_replay_st0 && ~rw_st0;
wire do_replay_wr_st0 = valid_st0 && is_replay_st0 && rw_st0;
wire do_fill_st0 = valid_st0 && is_fill_st0;
wire do_cache_rd_st0 = do_creq_rd_st0 || do_replay_rd_st0;
wire do_cache_wr_st0 = do_creq_wr_st0 || do_replay_wr_st0;
wire do_lookup_st0 = do_cache_rd_st0 || do_cache_wr_st0;
wire is_read_st0 = is_creq_st0 && ~rw_st0;
wire is_write_st0 = is_creq_st0 && rw_st0;
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
wire do_init_st0 = valid_st0 && is_init_st0;
wire do_flush_st0 = valid_st0 && is_flush_st0;
wire do_read_st0 = valid_st0 && is_read_st0;
wire do_write_st0 = valid_st0 && is_write_st0;
wire do_fill_st0 = valid_st0 && is_fill_st0;
assign line_sel_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
wire is_read_st1 = is_creq_st1 && ~rw_st1;
wire is_write_st1 = is_creq_st1 && rw_st1;
wire [NUM_WAYS-1:0] evict_way_st0;
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0;
wire do_read_st1 = valid_st1 && is_read_st1;
wire do_write_st1 = valid_st1 && is_write_st1;
assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
assign line_tag_st0 = `CS_LINE_ADDR_TAG(addr_st0);
assign write_word_st0 = data_st0[`CS_WORD_WIDTH-1:0];
wire do_lookup_st0 = do_read_st0 || do_write_st0;
wire do_lookup_st1 = do_read_st1 || do_write_st1;
wire [`CS_WAY_SEL_WIDTH-1:0] victim_way_st0;
wire [NUM_WAYS-1:0] tag_matches_st0;
VX_cache_repl #(
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.REPL_POLICY (REPL_POLICY)
) cache_repl (
.clk (clk),
.reset (reset),
.stall (pipe_stall),
.hit_valid (do_lookup_st1 && is_hit_st1 && ~pipe_stall),
.hit_line (line_idx_st1),
.hit_way (way_idx_st1),
.repl_valid (do_fill_st0 && ~pipe_stall),
.repl_line (line_idx_st0),
.repl_way (victim_way_st0)
);
assign evict_way_st0 = is_fill_st0 ? victim_way_st0 : flush_way_st0;
VX_cache_tags #(
.INSTANCE_ID($sformatf("%s-tags", INSTANCE_ID)),
.BANK_ID (BANK_ID),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.WRITEBACK (WRITEBACK),
.UUID_WIDTH (UUID_WIDTH)
.WRITEBACK (WRITEBACK)
) cache_tags (
.clk (clk),
.reset (reset),
.req_uuid (req_uuid_st0),
.stall (pipe_stall),
// init/flush/fill/write/lookup
// inputs
.init (do_init_st0),
.flush (do_flush_st0),
.fill (do_fill_st0),
.write (do_cache_wr_st0),
.lookup (do_lookup_st0),
.line_addr (addr_st0),
.way_sel (flush_way_st0),
.tag_matches(tag_matches_st0),
// replacement
.evict_dirty(evict_dirty_st0),
.flush (do_flush_st0 && ~pipe_stall),
.fill (do_fill_st0 && ~pipe_stall),
.read (do_read_st0 && ~pipe_stall),
.write (do_write_st0 && ~pipe_stall),
.line_idx (line_idx_st0),
.line_tag (line_tag_st0),
.evict_way (evict_way_st0),
// outputs
.tag_matches(tag_matches_st0),
.evict_dirty(is_dirty_st0),
.evict_tag (evict_tag_st0)
);
wire [`CS_LINE_ADDR_WIDTH-1:0] addr2_st0;
wire [`CS_WAY_SEL_WIDTH-1:0] hit_idx_st0;
VX_onehot_encoder #(
.N (NUM_WAYS)
) way_idx_enc (
.data_in (tag_matches_st0),
.data_out (hit_idx_st0),
`UNUSED_PIN (valid_out)
);
wire is_flush2_st0 = WRITEBACK && is_flush_st0;
assign way_idx_st0 = is_creq_st0 ? hit_idx_st0 : evict_way_st0;
assign is_hit_st0 = (| tag_matches_st0);
assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0;
assign way_sel_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0;
assign addr2_st0 = (is_fill_st0 || is_flush2_st0) ? {evict_tag_st0, line_sel_st0} : addr_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
assign mshr_id_st0 = is_replay_st0 ? replay_id_st0 : mshr_alloc_id_st0;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_WAY_SEL_WIDTH + `CS_TAG_SEL_BITS + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (~pipe_stall),
.data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, creq_flush_st0, rw_st0, addr2_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, evict_dirty_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, evict_dirty_st1, mshr_pending_st1})
.data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, is_dirty_st0, is_hit_st0, rw_st0, flags_st0, way_idx_st0, evict_tag_st0, line_tag_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, is_dirty_st1, is_hit_st1, rw_st1, flags_st1, way_idx_st1, evict_tag_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1})
);
// we have a tag hit
wire is_hit_st1 = (| way_sel_st1);
if (UUID_WIDTH != 0) begin : g_req_uuid_st1
assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH];
end else begin : g_req_uuid_st1_0
assign req_uuid_st1 = '0;
end
wire is_read_st1 = is_creq_st1 && ~rw_st1;
wire is_write_st1 = is_creq_st1 && rw_st1;
wire do_init_st1 = valid_st1 && is_init_st1;
wire do_fill_st1 = valid_st1 && is_fill_st1;
wire do_flush_st1 = valid_st1 && is_flush_st1;
wire do_creq_rd_st1 = valid_st1 && is_read_st1;
wire do_creq_wr_st1 = valid_st1 && is_write_st1;
wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1;
wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1;
wire do_read_hit_st1 = do_creq_rd_st1 && is_hit_st1;
wire do_read_miss_st1 = do_creq_rd_st1 && ~is_hit_st1;
wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1;
wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1;
wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
assign line_sel_st1 = addr_st1[`CS_LINE_SEL_BITS-1:0];
`UNUSED_VAR (do_write_miss_st1)
assign addr_st1 = {line_tag_st1, line_idx_st1};
// ensure mshr replay always get a hit
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time))
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1 && ~is_hit_st1), ("%t: missed mshr replay", $time))
// both tag and data stores use BRAM with no read-during-write protection.
// we ned to stall the pipeline to prevent read-after-write hazards.
assign rdw_hazard1_sel = do_fill_st0; // stall first replay following a fill
assign rdw_hazard2_sel = WRITEBACK && do_cache_wr_st0; // a writeback can evict any preceeding write
always @(posedge clk) begin
// stall reads following writes to same line address
rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_sel_st0 == line_sel_st1)
&& ~rdw_hazard3_st1; // release pipeline stall
end
assign write_word_st1 = data_st1[`CS_WORD_WIDTH-1:0];
`UNUSED_VAR (data_st1)
wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}};
wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1;
wire [LINE_SIZE-1:0] write_byteen_st1;
wire [`CS_LINE_WIDTH-1:0] dirty_data_st1;
wire [LINE_SIZE-1:0] dirty_byteen_st1;
if (`CS_WORDS_PER_LINE > 1) begin : g_write_byteen_st1_wsel
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen_w;
always @(*) begin
write_byteen_w = '0;
write_byteen_w[wsel_st1] = byteen_st1;
end
assign write_byteen_st1 = write_byteen_w;
end else begin : g_write_byteen_st1
assign write_byteen_st1 = byteen_st1;
end
wire[`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] read_data_st1;
wire [LINE_SIZE-1:0] evict_byteen_st1;
VX_cache_data #(
.INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)),
.BANK_ID (BANK_ID),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
@ -458,56 +439,58 @@ module VX_cache_bank #(
.WORD_SIZE (WORD_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK),
.DIRTY_BYTES (DIRTY_BYTES),
.UUID_WIDTH (UUID_WIDTH)
.DIRTY_BYTES (DIRTY_BYTES)
) cache_data (
.clk (clk),
.reset (reset),
.req_uuid (req_uuid_st1),
.stall (pipe_stall),
.init (do_init_st1),
.read (do_cache_rd_st1),
.fill (do_fill_st1),
.flush (do_flush_st1),
.write (do_cache_wr_st1),
.way_sel (way_sel_st1),
.line_addr (addr_st1),
.wsel (wsel_st1),
.fill_data (fill_data_st1),
.write_data (write_data_st1),
.write_byteen(write_byteen_st1),
// inputs
.init (do_init_st0),
.fill (do_fill_st0 && ~pipe_stall),
.flush (do_flush_st0 && ~pipe_stall),
.read (do_read_st0 && ~pipe_stall),
.write (do_write_st0 && ~pipe_stall),
.evict_way (evict_way_st0),
.tag_matches(tag_matches_st0),
.line_idx (line_idx_st0),
.fill_data (data_st0),
.write_word (write_word_st0),
.word_idx (word_idx_st0),
.write_byteen(byteen_st0),
.way_idx_r (way_idx_st1),
// outputs
.read_data (read_data_st1),
.dirty_data (dirty_data_st1),
.dirty_byteen(dirty_byteen_st1)
.evict_byteen(evict_byteen_st1)
);
wire [MSHR_SIZE-1:0] mshr_lookup_pending_st0;
wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0;
wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall;
wire mshr_lookup_st0 = mshr_allocate_st0;
wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall;
// only allocate MSHR entries for non-replay core requests
wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~is_replay_st0;
wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~is_replay_st1;
// release allocated mshr entry if we had a hit
wire mshr_release_st1;
if (WRITEBACK) begin : g_mshr_release_st1
if (WRITEBACK) begin : g_mshr_release
assign mshr_release_st1 = is_hit_st1;
end else begin : g_mshr_release_st1_ro
// we need to keep missed write requests in MSHR if there is already a pending entry to the same address
// this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content
// this can happen when writes are sent late, when the fill was already in flight.
end else begin : g_mshr_release_ro
// we need to keep missed write requests in MSHR if there is already a pending entry to the same address.
// this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content.
// this can happen when writes are sent to memory late, when a related fill was already in flight.
assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
end
wire mshr_release_fire = mshr_finalize_st1 && mshr_release_st1 && ~pipe_stall;
wire [1:0] mshr_dequeue;
`POP_COUNT(mshr_dequeue, {replay_fire, mshr_release_fire});
VX_pending_size #(
.SIZE (MSHR_SIZE)
.SIZE (MSHR_SIZE),
.DECRW (2)
) mshr_pending_size (
.clk (clk),
.reset (reset),
.incr (core_req_fire),
.decr (replay_fire || (mshr_finalize_st1 && mshr_release_st1)),
.decr (mshr_dequeue),
.empty (mshr_empty),
`UNUSED_PIN (alm_empty),
.full (mshr_alm_full),
@ -516,11 +499,12 @@ module VX_cache_bank #(
);
VX_cache_mshr #(
.INSTANCE_ID ($sformatf("%s-mshr", INSTANCE_ID)),
.INSTANCE_ID (`SFORMATF(("%s-mshr", INSTANCE_ID))),
.BANK_ID (BANK_ID),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.MSHR_SIZE (MSHR_SIZE),
.WRITEBACK (WRITEBACK),
.UUID_WIDTH (UUID_WIDTH),
.DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH)
) cache_mshr (
@ -528,7 +512,7 @@ module VX_cache_bank #(
.reset (reset),
.deq_req_uuid (req_uuid_sel),
.lkp_req_uuid (req_uuid_st0),
.alc_req_uuid (req_uuid_st0),
.fin_req_uuid (req_uuid_st1),
// memory fill
@ -545,37 +529,23 @@ module VX_cache_bank #(
.dequeue_ready (replay_ready),
// allocate
.allocate_valid (mshr_allocate_st0),
.allocate_valid (mshr_allocate_st0 && ~pipe_stall),
.allocate_addr (addr_st0),
.allocate_rw (rw_st0),
.allocate_data ({wsel_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}),
.allocate_data ({word_idx_st0, byteen_st0, write_word_st0, tag_st0, req_idx_st0}),
.allocate_id (mshr_alloc_id_st0),
.allocate_prev (mshr_prev_st0),
.allocate_pending(mshr_pending_st0),
.allocate_previd(mshr_previd_st0),
`UNUSED_PIN (allocate_ready),
// lookup
.lookup_valid (mshr_lookup_st0),
.lookup_addr (addr_st0),
.lookup_pending (mshr_lookup_pending_st0),
.lookup_rw (mshr_lookup_rw_st0),
// finalize
.finalize_valid (mshr_finalize_st1),
.finalize_release(mshr_release_st1),
.finalize_pending(mshr_pending_st1),
.finalize_valid (mshr_finalize_st1 && ~pipe_stall),
.finalize_is_release(mshr_release_st1),
.finalize_is_pending(mshr_pending_st1),
.finalize_id (mshr_id_st1),
.finalize_prev (mshr_prev_st1)
.finalize_previd(mshr_previd_st1)
);
// check if there are pending requests to same line in the MSHR
wire [MSHR_SIZE-1:0] lookup_matches;
for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_lookup_matches
assign lookup_matches[i] = mshr_lookup_pending_st0[i]
&& (i != mshr_alloc_id_st0) // exclude current mshr id
&& (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough
end
assign mshr_pending_st0 = (| lookup_matches);
// schedule core response
wire crsp_queue_valid, crsp_queue_ready;
@ -583,9 +553,9 @@ module VX_cache_bank #(
wire [REQ_SEL_WIDTH-1:0] crsp_queue_idx;
wire [TAG_WIDTH-1:0] crsp_queue_tag;
assign crsp_queue_valid = do_cache_rd_st1;
assign crsp_queue_valid = do_read_st1 && is_hit_st1;
assign crsp_queue_idx = req_idx_st1;
assign crsp_queue_data = read_data_st1;
assign crsp_queue_data = read_data_st1[word_idx_st1];
assign crsp_queue_tag = tag_st1;
VX_elastic_buffer #(
@ -595,7 +565,7 @@ module VX_cache_bank #(
) core_rsp_queue (
.clk (clk),
.reset (reset),
.valid_in (crsp_queue_valid && ~rdw_hazard3_st1),
.valid_in (crsp_queue_valid),
.ready_in (crsp_queue_ready),
.data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}),
.data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}),
@ -613,51 +583,68 @@ module VX_cache_bank #(
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr;
wire [MEM_TAG_WIDTH-1:0] mreq_queue_tag;
wire mreq_queue_rw;
wire mreq_queue_flush;
wire [`UP(FLAGS_WIDTH)-1:0] mreq_queue_flags;
wire is_fill_or_flush_st1 = is_fill_st1 || is_flush_st1;
wire is_fill_or_flush_st1 = is_fill_st1 || (is_flush_st1 && WRITEBACK);
wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1;
wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1;
if (WRITEBACK) begin : g_mreq_queue_push
if (DIRTY_BYTES) begin : g_dirty_bytes
// ensure dirty bytes match the tag info
wire has_dirty_bytes = (| dirty_byteen_st1);
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)))
end
assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1)
|| do_writeback_st1)
&& ~rdw_hazard3_st1;
end else begin : g_mreq_queue_push_ro
`UNUSED_VAR (do_writeback_st1)
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|| do_creq_wr_st1)
&& ~rdw_hazard3_st1;
end
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
assign mreq_queue_addr = addr_st1;
assign mreq_queue_flush = creq_flush_st1;
wire do_writeback_st1 = do_fill_or_flush_st1 && is_dirty_st1;
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_addr_st1 = {evict_tag_st1, line_idx_st1};
if (WRITE_ENABLE) begin : g_mreq_queue
if (WRITEBACK) begin : g_writeback
if (WRITEBACK) begin : g_wb
if (DIRTY_BYTES) begin : g_dirty_bytes
// ensure dirty bytes match the tag info
wire has_dirty_bytes = (| evict_byteen_st1);
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (is_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, is_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)))
end
// issue a fill request on a read/write miss
// issue a writeback on a dirty line eviction
assign mreq_queue_push = ((do_lookup_st1 && ~is_hit_st1 && ~mshr_pending_st1)
|| do_writeback_st1)
&& ~pipe_stall;
assign mreq_queue_addr = is_fill_or_flush_st1 ? evict_addr_st1 : addr_st1;
assign mreq_queue_rw = is_fill_or_flush_st1;
assign mreq_queue_data = dirty_data_st1;
assign mreq_queue_byteen = is_fill_or_flush_st1 ? dirty_byteen_st1 : '1;
end else begin : g_writethrough
assign mreq_queue_data = read_data_st1;
assign mreq_queue_byteen = is_fill_or_flush_st1 ? evict_byteen_st1 : '1;
`UNUSED_VAR (write_word_st1)
`UNUSED_VAR (byteen_st1)
end else begin : g_wt
wire [LINE_SIZE-1:0] line_byteen;
VX_demux #(
.N (`CS_WORD_SEL_BITS),
.M (WORD_SIZE)
) byteen_demux (
.sel_in (word_idx_st1),
.data_in (byteen_st1),
.data_out (line_byteen)
);
// issue a fill request on a read miss
// issue a memory write on a write request
assign mreq_queue_push = ((do_read_st1 && ~is_hit_st1 && ~mshr_pending_st1)
|| do_write_st1)
&& ~pipe_stall;
assign mreq_queue_addr = addr_st1;
assign mreq_queue_rw = rw_st1;
assign mreq_queue_data = write_data_st1;
assign mreq_queue_byteen = rw_st1 ? write_byteen_st1 : '1;
assign mreq_queue_data = {`CS_WORDS_PER_LINE{write_word_st1}};
assign mreq_queue_byteen = rw_st1 ? line_byteen : '1;
`UNUSED_VAR (is_fill_or_flush_st1)
`UNUSED_VAR (dirty_data_st1)
`UNUSED_VAR (dirty_byteen_st1)
`UNUSED_VAR (do_writeback_st1)
`UNUSED_VAR (evict_addr_st1)
`UNUSED_VAR (evict_byteen_st1)
end
end else begin : g_mreq_queue_ro
// issue a fill request on a read miss
assign mreq_queue_push = (do_read_st1 && ~is_hit_st1 && ~mshr_pending_st1)
&& ~pipe_stall;
assign mreq_queue_addr = addr_st1;
assign mreq_queue_rw = 0;
assign mreq_queue_data = '0;
assign mreq_queue_byteen = '1;
`UNUSED_VAR (dirty_data_st1)
`UNUSED_VAR (dirty_byteen_st1)
`UNUSED_VAR (do_writeback_st1)
`UNUSED_VAR (evict_addr_st1)
`UNUSED_VAR (evict_byteen_st1)
`UNUSED_VAR (write_word_st1)
`UNUSED_VAR (byteen_st1)
end
if (UUID_WIDTH != 0) begin : g_mreq_queue_tag_uuid
@ -666,18 +653,21 @@ module VX_cache_bank #(
assign mreq_queue_tag = mshr_id_st1;
end
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
assign mreq_queue_flags = flags_st1;
VX_fifo_queue #(
.DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
.DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)),
.DEPTH (MREQ_SIZE),
.ALM_FULL (MREQ_SIZE-PIPELINE_STAGES),
.ALM_FULL (MREQ_SIZE - PIPELINE_STAGES),
.OUT_REG (MEM_OUT_REG)
) mem_req_queue (
.clk (clk),
.reset (reset),
.push (mreq_queue_push),
.pop (mreq_queue_pop),
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flush}),
.data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flush}),
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flags}),
.data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flags}),
.empty (mreq_queue_empty),
.alm_full (mreq_queue_alm_full),
`UNUSED_PIN (full),
@ -687,11 +677,13 @@ module VX_cache_bank #(
assign mem_req_valid = ~mreq_queue_empty;
`UNUSED_VAR (do_lookup_st0)
///////////////////////////////////////////////////////////////////////////////
`ifdef PERF_ENABLE
assign perf_read_misses = do_read_miss_st1;
assign perf_write_misses = do_write_miss_st1;
assign perf_read_misses = do_read_st1 && ~is_hit_st1;
assign perf_write_misses = do_write_st1 && ~is_hit_st1;
assign perf_mshr_stalls = mshr_alm_full;
`endif
@ -701,31 +693,76 @@ module VX_cache_bank #(
&& ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire);
always @(posedge clk) begin
if (input_stall || pipe_stall) begin
`TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1))
`TRACE(4, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID,
crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full))
end
if (mem_rsp_fire) begin
`TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel))
`TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel))
end
if (replay_fire) begin
`TRACE(2, ("%t: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel))
`TRACE(2, ("%t: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel))
end
if (core_req_fire) begin
if (core_req_rw) begin
`TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel))
`TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel))
end else begin
`TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel))
`TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel))
end
end
if (do_init_st0) begin
`TRACE(3, ("%t: %s tags-init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), line_idx_st0))
end
if (do_fill_st0 && ~pipe_stall) begin
`TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%0d, line=%0d, dirty=%b (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, is_dirty_st0, req_uuid_st0))
end
if (do_flush_st0 && ~pipe_stall) begin
`TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%0d, line=%0d, dirty=%b (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, is_dirty_st0, req_uuid_st0))
end
if (do_lookup_st0 && ~pipe_stall) begin
if (is_hit_st0) begin
`TRACE(3, ("%t: %s tags-hit: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), rw_st0, way_idx_st0, line_idx_st0, line_tag_st0, req_uuid_st0))
end else begin
`TRACE(3, ("%t: %s tags-miss: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), rw_st0, way_idx_st0, line_idx_st0, line_tag_st0, req_uuid_st0))
end
end
if (do_fill_st0 && ~pipe_stall) begin
`TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%0d, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), way_idx_st0, line_idx_st0, data_st0, req_uuid_st0))
end
if (do_flush_st0 && ~pipe_stall) begin
`TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), way_idx_st0, line_idx_st0, req_uuid_st0))
end
if (do_read_st1 && is_hit_st1 && ~pipe_stall) begin
`TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), way_idx_st1, line_idx_st1, word_idx_st1, crsp_queue_data, req_uuid_st1))
end
if (do_write_st1 && is_hit_st1 && ~pipe_stall) begin
`TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), way_idx_st1, line_idx_st1, word_idx_st1, byteen_st1, write_word_st1, req_uuid_st1))
end
if (crsp_queue_fire) begin
`TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1))
`TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1))
end
if (mreq_queue_push) begin
if (do_creq_wr_st1 && !WRITEBACK) begin
`TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
end else if (do_writeback_st1) begin
`TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
if (!WRITEBACK && do_write_st1) begin
`TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
end else if (WRITEBACK && do_writeback_st1) begin
`TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
end else begin
`TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1))
`TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1))
end
end
end

View file

@ -268,7 +268,7 @@ module VX_cache_bypass #(
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_valid
assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || (is_mem_rsp_nc && rsp_idx == REQ_SEL_WIDTH'(i));
end
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_ready
assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i];
end

View file

@ -24,22 +24,22 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 16384,
parameter CACHE_SIZE = 32768,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 64,
// Number of banks
parameter NUM_BANKS = 1,
parameter NUM_BANKS = 4,
// Number of associative ways
parameter NUM_WAYS = 4,
// Size of a word in bytes
parameter WORD_SIZE = 4,
parameter WORD_SIZE = 16,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
parameter CRSQ_SIZE = 4,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 8,
parameter MSHR_SIZE = 16,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
parameter MRSQ_SIZE = 4,
// Memory Request Queue Size
parameter MREQ_SIZE = 4,
@ -52,20 +52,26 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
// Enable dirty bytes on writeback
parameter DIRTY_BYTES = 0,
// Replacement policy
parameter REPL_POLICY = `CS_REPL_CYCLIC,
// Request debug identifier
parameter UUID_WIDTH = 0,
// core request tag size
parameter TAG_WIDTH = UUID_WIDTH + 1,
// core request flags
parameter FLAGS_WIDTH = 0,
// enable bypass for non-cacheable addresses
parameter NC_ENABLE = 0,
// Core response output buffer
parameter CORE_OUT_BUF = 0,
parameter CORE_OUT_BUF = 3,
// Memory request output buffer
parameter MEM_OUT_BUF = 0
parameter MEM_OUT_BUF = 3
) (
input wire clk,
input wire reset,
@ -140,22 +146,24 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
for (genvar i = 0; i < NUM_CACHES; ++i) begin : g_cache_wrap
VX_cache_wrap #(
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)),
.INSTANCE_ID (`SFORMATF(("%s%0d", INSTANCE_ID, i))),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK),
.DIRTY_BYTES (DIRTY_BYTES),
.REPL_POLICY (REPL_POLICY),
.CRSQ_SIZE (CRSQ_SIZE),
.MSHR_SIZE (MSHR_SIZE),
.MRSQ_SIZE (MRSQ_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK),
.DIRTY_BYTES (DIRTY_BYTES),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (ARB_TAG_WIDTH),
.FLAGS_WIDTH (FLAGS_WIDTH),
.TAG_SEL_IDX (TAG_SEL_IDX),
.CORE_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : CORE_OUT_BUF),
.MEM_OUT_BUF ((NUM_CACHES > 1) ? 2 : MEM_OUT_BUF),

View file

@ -14,8 +14,6 @@
`include "VX_cache_define.vh"
module VX_cache_data #(
parameter `STRING INSTANCE_ID= "",
parameter BANK_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
@ -31,171 +29,147 @@ module VX_cache_data #(
// Enable cache writeback
parameter WRITEBACK = 0,
// Enable dirty bytes on writeback
parameter DIRTY_BYTES = 0,
// Request debug identifier
parameter UUID_WIDTH = 0
parameter DIRTY_BYTES = 0
) (
input wire clk,
input wire reset,
`IGNORE_UNUSED_BEGIN
input wire[`UP(UUID_WIDTH)-1:0] req_uuid,
`IGNORE_UNUSED_END
input wire stall,
// inputs
input wire init,
input wire read,
input wire fill,
input wire flush,
input wire read,
input wire write,
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] wsel,
input wire [`CS_LINE_SEL_BITS-1:0] line_idx,
input wire [`CS_WAY_SEL_WIDTH-1:0] evict_way,
input wire [NUM_WAYS-1:0] tag_matches,
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data,
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] write_data,
input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen,
input wire [NUM_WAYS-1:0] way_sel,
output wire [`CS_WORD_WIDTH-1:0] read_data,
output wire [`CS_LINE_WIDTH-1:0] dirty_data,
output wire [LINE_SIZE-1:0] dirty_byteen
input wire [`CS_WORD_WIDTH-1:0] write_word,
input wire [WORD_SIZE-1:0] write_byteen,
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx,
input wire [`CS_WAY_SEL_WIDTH-1:0] way_idx_r,
// outputs
output wire [`CS_LINE_WIDTH-1:0] read_data,
output wire [LINE_SIZE-1:0] evict_byteen
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (BANK_ID)
`UNUSED_PARAM (WORD_SIZE)
`UNUSED_VAR (stall)
`UNUSED_VAR (line_addr)
`UNUSED_VAR (init)
`UNUSED_VAR (read)
`UNUSED_VAR (flush)
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
if (DIRTY_BYTES != 0) begin : g_dirty_bytes
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_rdata;
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_wdata;
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_wren;
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
if (WRITEBACK) begin : g_dirty_data
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata;
VX_transpose #(
.DATAW (`CS_WORD_WIDTH),
.N (`CS_WORDS_PER_LINE),
.M (NUM_WAYS)
) transpose (
.data_in (line_rdata),
.data_out (transposed_rdata)
);
assign dirty_data = transposed_rdata[way_idx];
end else begin : g_dirty_data_0
assign dirty_data = '0;
end
if (DIRTY_BYTES) begin : g_dirty_byteen
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata;
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata;
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_bs_wdata
wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]);
assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]);
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata
wire evict = fill || flush;
wire evict_way_en = (NUM_WAYS == 1) || (evict_way == i);
wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask;
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask
wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j);
assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}};
end
assign byteen_wdata[i] = {LINE_SIZE{write}}; // only asserted on writes
assign byteen_wren[i] = {LINE_SIZE{init}}
| {LINE_SIZE{evict && evict_way_en}}
| ({LINE_SIZE{write && tag_matches[i]}} & write_mask);
end
wire byteen_read = fill || flush;
wire byteen_write = init || write || fill || flush;
VX_sp_ram #(
.DATAW (LINE_SIZE * NUM_WAYS),
.SIZE (`CS_LINES_PER_BANK)
.WRENW (LINE_SIZE * NUM_WAYS),
.SIZE (`CS_LINES_PER_BANK),
.OUT_REG (1),
.RDW_MODE ("R")
) byteen_store (
.clk (clk),
.reset (reset),
.read (write || fill || flush),
.write (init || write || fill || flush),
.wren (1'b1),
.addr (line_sel),
.wdata (bs_wdata),
.rdata (bs_rdata)
.read (byteen_read),
.write (byteen_write),
.wren (byteen_wren),
.addr (line_idx),
.wdata (byteen_wdata),
.rdata (byteen_rdata)
);
assign dirty_byteen = bs_rdata[way_idx];
end else begin : g_dirty_byteen_0
assign dirty_byteen = '1;
assign evict_byteen = byteen_rdata[way_idx_r];
end else begin : g_no_dirty_bytes
`UNUSED_VAR (init)
`UNUSED_VAR (flush)
assign evict_byteen = '1; // update whole line
end
// order the data layout to perform ways multiplexing last.
// this allows converting way index to binary in parallel with BRAM readaccess and way selection.
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
wire [BYTEENW-1:0] line_wren;
if (WRITE_ENABLE) begin : g_data_store
// create a single write-enable block ram to reduce area overhead
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] line_wren;
wire line_write;
wire line_read;
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin : g_line_wdata
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin : g_i
for (genvar j = 0; j < NUM_WAYS; ++j) begin : g_j
assign line_wdata[i][j] = (fill || !WRITE_ENABLE) ? fill_data[i] : write_data[i];
assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i])
& {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}};
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_wdata
wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i);
wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask;
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask
wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j);
assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}};
end
assign line_wdata[i] = fill ? fill_data : {`CS_WORDS_PER_LINE{write_word}};
assign line_wren[i] = {LINE_SIZE{fill && fill_way_en}}
| ({LINE_SIZE{write && tag_matches[i]}} & write_mask);
end
assign line_wren = wren_w;
end else begin : g_line_wdata_ro
assign line_read = read || ((fill || flush) && WRITEBACK);
assign line_write = fill || (write && WRITE_ENABLE);
VX_sp_ram #(
.DATAW (NUM_WAYS * `CS_LINE_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.WRENW (NUM_WAYS * LINE_SIZE),
.OUT_REG (1),
.RDW_MODE ("R")
) data_store (
.clk (clk),
.reset (reset),
.read (line_read),
.write (line_write),
.wren (line_wren),
.addr (line_idx),
.wdata (line_wdata),
.rdata (line_rdata)
);
end else begin : g_data_store
`UNUSED_VAR (write)
`UNUSED_VAR (write_byteen)
`UNUSED_VAR (write_data)
assign line_wdata = fill_data;
assign line_wren = fill;
end
`UNUSED_VAR (write_word)
`UNUSED_VAR (word_idx)
`UNUSED_VAR (tag_matches)
VX_encoder #(
.N (NUM_WAYS)
) way_enc (
.data_in (way_sel),
.data_out (way_idx),
`UNUSED_PIN (valid_out)
);
wire line_read = (read && ~stall)
|| (WRITEBACK && (fill || flush));
wire line_write = write || fill;
VX_sp_ram #(
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
.SIZE (`CS_LINES_PER_BANK),
.WRENW (BYTEENW),
.NO_RWCHECK (1),
.RW_ASSERT (1)
) data_store (
.clk (clk),
.reset (reset),
.read (line_read),
.write (line_write),
.wren (line_wren),
.addr (line_sel),
.wdata (line_wdata),
.rdata (line_rdata)
);
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
if (`CS_WORDS_PER_LINE > 1) begin : g_per_way_rdata_wsel
assign per_way_rdata = line_rdata[wsel];
end else begin : g_per_way_rdata
`UNUSED_VAR (wsel)
assign per_way_rdata = line_rdata;
end
assign read_data = per_way_rdata[way_idx];
`ifdef DBG_TRACE_CACHE
always @(posedge clk) begin
if (fill && ~stall) begin
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data))
end
if (flush && ~stall) begin
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data))
end
if (read && ~stall) begin
`TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid))
end
if (write && ~stall) begin
`TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid))
// we don't merge the ways into a single block ram due to WREN overhead
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways
wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i);
VX_sp_ram #(
.DATAW (`CS_LINE_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.OUT_REG (1),
.RDW_MODE ("R")
) data_store (
.clk (clk),
.reset (reset),
.read (read),
.write (fill && fill_way_en),
.wren (1'b1),
.addr (line_idx),
.wdata (fill_data),
.rdata (line_rdata[i])
);
end
end
`endif
assign read_data = line_rdata[way_idx_r];
endmodule

View file

@ -22,6 +22,7 @@
`define CS_LINE_WIDTH (8 * LINE_SIZE)
`define CS_BANK_SIZE (CACHE_SIZE / NUM_BANKS)
`define CS_WAY_SEL_BITS `CLOG2(NUM_WAYS)
`define CS_WAY_SEL_WIDTH `UP(`CS_WAY_SEL_BITS)
`define CS_LINES_PER_BANK (`CS_BANK_SIZE / (LINE_SIZE * NUM_WAYS))
`define CS_WORDS_PER_LINE (LINE_SIZE / WORD_SIZE)
@ -73,4 +74,10 @@
`PERF_COUNTER_ADD (dst, src, mem_stalls, `PERF_CTR_BITS, count, (count > 1)) \
`PERF_COUNTER_ADD (dst, src, crsp_stalls, `PERF_CTR_BITS, count, (count > 1))
///////////////////////////////////////////////////////////////////////////////
`define CS_REPL_RANDOM 0
`define CS_REPL_CYCLIC 1
`define CS_REPL_PLRU 2
`endif // VX_CACHE_DEFINE_VH

View file

@ -128,7 +128,8 @@ module VX_cache_flush #(
lock_released_n = lock_released;
flush_uuid_n = flush_uuid_r;
case (state)
STATE_IDLE: begin
//STATE_IDLE:
default: begin
if (flush_req_enable) begin
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT1 : STATE_FLUSH;
for (integer i = NUM_REQS-1; i >= 0; --i) begin

View file

@ -24,36 +24,23 @@
// arrival and are dequeued in the same order.
// Each entry has a next pointer to the next entry pending for the same cache line.
//
// During the fill operation, the MSHR will release the MSHR entry at fill_id
// During the fill request, the MSHR will dequue the MSHR entry at the fill_id location
// which represents the first request in the pending list that initiated the memory fill.
//
// The dequeue operation directly follows the fill operation and will release
// The dequeue response directly follows the fill request and will release
// all the subsequent entries linked to fill_id (pending the same cache line).
//
// During the allocation operation, the MSHR will allocate the next free slot
// During the allocation request, the MSHR will allocate the next free slot
// for the incoming core request. We return the allocated slot id as well as
// the slot id of the previous entry for the same cache line. This is used to
// link the new entry to the pending list during finalization.
// link the new entry to the pending list.
//
// The lookup operation is used to find all pending entries for a given cache line.
// This is used to by the cache bank to determine if a cache miss is already pending
// and therefore avoid issuing a memory fill request.
//
// The finalize operation is used to release the allocated MSHR entry if we had a hit.
// If we had a miss and finalize_pending is true, we link the allocated entry to
// its corresponding pending list (via finalize_prev).
// The finalize request is used to persit or release the currently allocated MSHR entry
// if we had a cache miss or a hit, respectively.
//
// Warning: This MSHR implementation is strongly coupled with the bank pipeline
// and as such changes to either module requires careful evaluation.
//
// This architecture implements three pipeline stages:
// - Arbitration: cache bank arbitration before entering pipeline.
// fill and dequeue operations are executed at this stage.
// - stage 0: cache bank tag access stage.
// allocate and lookup operations are executed at this stage.
// - stage 1: cache bank tdatag access stage.
// finalize operation is executed at this stage.
//
module VX_cache_mshr #(
parameter `STRING INSTANCE_ID= "",
@ -68,6 +55,9 @@ module VX_cache_mshr #(
parameter UUID_WIDTH = 0,
// MSHR parameters
parameter DATA_WIDTH = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE)
) (
input wire clk,
@ -75,7 +65,7 @@ module VX_cache_mshr #(
`IGNORE_UNUSED_BEGIN
input wire[`UP(UUID_WIDTH)-1:0] deq_req_uuid,
input wire[`UP(UUID_WIDTH)-1:0] lkp_req_uuid,
input wire[`UP(UUID_WIDTH)-1:0] alc_req_uuid,
input wire[`UP(UUID_WIDTH)-1:0] fin_req_uuid,
`IGNORE_UNUSED_END
@ -98,26 +88,21 @@ module VX_cache_mshr #(
input wire allocate_rw,
input wire [DATA_WIDTH-1:0] allocate_data,
output wire [MSHR_ADDR_WIDTH-1:0] allocate_id,
output wire [MSHR_ADDR_WIDTH-1:0] allocate_prev,
output wire allocate_pending,
output wire [MSHR_ADDR_WIDTH-1:0] allocate_previd,
output wire allocate_ready,
// lookup
input wire lookup_valid,
input wire [`CS_LINE_ADDR_WIDTH-1:0] lookup_addr,
output wire [MSHR_SIZE-1:0] lookup_pending,
output wire [MSHR_SIZE-1:0] lookup_rw,
// finalize
input wire finalize_valid,
input wire finalize_release,
input wire finalize_pending,
input wire [MSHR_ADDR_WIDTH-1:0] finalize_id,
input wire [MSHR_ADDR_WIDTH-1:0] finalize_prev
input wire finalize_is_release,
input wire finalize_is_pending,
input wire [MSHR_ADDR_WIDTH-1:0] finalize_previd,
input wire [MSHR_ADDR_WIDTH-1:0] finalize_id
);
`UNUSED_PARAM (BANK_ID)
reg [`CS_LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0];
reg [MSHR_ADDR_WIDTH-1:0] next_index [MSHR_SIZE-1:0];
reg [`CS_LINE_ADDR_WIDTH-1:0] addr_table [0:MSHR_SIZE-1];
reg [MSHR_ADDR_WIDTH-1:0] next_index [0:MSHR_SIZE-1];
reg [MSHR_SIZE-1:0] valid_table, valid_table_n;
reg [MSHR_SIZE-1:0] next_table, next_table_x, next_table_n;
@ -136,7 +121,7 @@ module VX_cache_mshr #(
wire [MSHR_SIZE-1:0] addr_matches;
for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_addr_matches
assign addr_matches[i] = valid_table[i] && (addr_table[i] == lookup_addr);
assign addr_matches[i] = valid_table[i] && (addr_table[i] == allocate_addr);
end
VX_lzc #(
@ -148,11 +133,13 @@ module VX_cache_mshr #(
.valid_out (allocate_rdy_n)
);
VX_encoder #(
// find matching tail-entry
VX_priority_encoder #(
.N (MSHR_SIZE)
) prev_sel (
.data_in (addr_matches & ~next_table_x),
.data_out (prev_idx),
.index_out (prev_idx),
`UNUSED_PIN (onehot_out),
`UNUSED_PIN (valid_out)
);
@ -171,17 +158,22 @@ module VX_cache_mshr #(
valid_table_n[dequeue_id] = 0;
if (next_table[dequeue_id]) begin
dequeue_id_n = next_index[dequeue_id];
end else if (finalize_valid && finalize_is_pending && (finalize_previd == dequeue_id)) begin
dequeue_id_n = finalize_id;
end else begin
dequeue_val_n = 0;
end
end
if (finalize_valid) begin
if (finalize_release) begin
if (finalize_is_release) begin
valid_table_n[finalize_id] = 0;
end
if (finalize_pending) begin
next_table_x[finalize_prev] = 1;
// warning: This code allows 'finalize_is_pending' to be asserted regardless of hit/miss
// to reduce the its propagation delay into the MSHR. this is safe because wrong updates
// to 'next_table_n' will be cleared during 'allocate_fire' below.
if (finalize_is_pending) begin
next_table_x[finalize_previd] = 1;
end
end
@ -204,12 +196,12 @@ module VX_cache_mshr #(
end
if (allocate_fire) begin
addr_table[allocate_id] <= allocate_addr;
addr_table[allocate_id] <= allocate_addr;
write_table[allocate_id] <= allocate_rw;
end
if (finalize_valid && finalize_pending) begin
next_index[finalize_prev] <= finalize_id;
if (finalize_valid && finalize_is_pending) begin
next_index[finalize_previd] <= finalize_id;
end
dequeue_id_r <= dequeue_id_n;
@ -217,20 +209,20 @@ module VX_cache_mshr #(
next_table <= next_table_n;
end
`RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, lkp_req_uuid))
`RUNTIME_ASSERT(~(allocate_fire && valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, alc_req_uuid))
`RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
`RUNTIME_ASSERT(~(finalize_valid && ~valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid))
`RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID,
`RUNTIME_ASSERT(~(fill_valid && ~valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id))
VX_dp_ram #(
.DATAW (DATA_WIDTH),
.SIZE (MSHR_SIZE),
.LUTRAM (1)
) entries (
.DATAW (DATA_WIDTH),
.SIZE (MSHR_SIZE),
.RDW_MODE ("R")
) mshr_store (
.clk (clk),
.reset (reset),
.read (1'b1),
@ -245,19 +237,20 @@ module VX_cache_mshr #(
assign fill_addr = addr_table[fill_id];
assign allocate_ready = allocate_rdy;
assign allocate_id = allocate_id_r;
assign allocate_prev = prev_idx;
assign allocate_id = allocate_id_r;
assign allocate_previd = prev_idx;
assign dequeue_valid = dequeue_val;
assign dequeue_addr = addr_table[dequeue_id_r];
assign dequeue_rw = write_table[dequeue_id_r];
assign dequeue_id = dequeue_id_r;
if (WRITEBACK) begin : g_pending_wb
assign allocate_pending = |addr_matches;
end else begin : g_pending_wt
// exclude write requests if writethrough
assign allocate_pending = |(addr_matches & ~write_table);
end
// return pending entries for the given cache line
assign lookup_pending = addr_matches;
assign lookup_rw = write_table;
`UNUSED_VAR (lookup_valid)
assign dequeue_valid = dequeue_val;
assign dequeue_addr = addr_table[dequeue_id_r];
assign dequeue_rw = write_table[dequeue_id_r];
assign dequeue_id = dequeue_id_r;
`ifdef DBG_TRACE_CACHE
reg show_table;
@ -265,23 +258,21 @@ module VX_cache_mshr #(
if (reset) begin
show_table <= 0;
end else begin
show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire;
show_table <= allocate_fire || finalize_valid || fill_valid || dequeue_fire;
end
if (allocate_fire) begin
`TRACE(3, ("%t: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid))
`TRACE(3, ("%t: %s allocate: addr=0x%0h, id=%0d, pending=%b, prev=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id, allocate_pending, prev_idx, alc_req_uuid))
end
if (lookup_valid) begin
`TRACE(3, ("%t: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid))
if (finalize_valid && finalize_is_release) begin
`TRACE(3, ("%t: %s release: id=%0d (#%0d)\n", $time, INSTANCE_ID, finalize_id, fin_req_uuid))
end
if (finalize_valid) begin
`TRACE(3, ("%t: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid))
if (finalize_valid && finalize_is_pending) begin
`TRACE(3, ("%t: %s finalize: id=%0d (#%0d)\n", $time, INSTANCE_ID, finalize_id, fin_req_uuid))
end
if (fill_valid) begin
`TRACE(3, ("%t: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id))
`TRACE(3, ("%t: %s fill: addr=0x%0h, id=%0d\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id))
end
if (dequeue_fire) begin
`TRACE(3, ("%t: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID,

202
hw/rtl/cache/VX_cache_repl.sv vendored Normal file
View file

@ -0,0 +1,202 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
// Fast PLRU encoder and decoder utility
// Adapted from BaseJump STL: http://bjump.org/data_out.html
module plru_decoder #(
parameter NUM_WAYS = 1,
parameter WAY_IDX_BITS = $clog2(NUM_WAYS),
parameter WAY_IDX_WIDTH = `UP(WAY_IDX_BITS)
) (
input wire [WAY_IDX_WIDTH-1:0] way_idx,
output wire [`UP(NUM_WAYS-1)-1:0] lru_data,
output wire [`UP(NUM_WAYS-1)-1:0] lru_mask
);
if (NUM_WAYS > 1) begin : g_dec
wire [`UP(NUM_WAYS-1)-1:0] data;
`IGNORE_UNOPTFLAT_BEGIN
wire [`UP(NUM_WAYS-1)-1:0] mask;
`IGNORE_UNOPTFLAT_END
for (genvar i = 0; i < NUM_WAYS-1; ++i) begin : g_i
if (i == 0) begin : g_i_0
assign mask[i] = 1'b1;
end else if (i % 2 == 1) begin : g_i_odd
assign mask[i] = mask[(i-1)/2] & ~way_idx[WAY_IDX_BITS-$clog2(i+2)+1];
end else begin : g_i_even
assign mask[i] = mask[(i-2)/2] & way_idx[WAY_IDX_BITS-$clog2(i+2)+1];
end
assign data[i] = ~way_idx[WAY_IDX_BITS-$clog2(i+2)];
end
assign lru_data = data;
assign lru_mask = mask;
end else begin : g_no_dec
`UNUSED_VAR (way_idx)
assign lru_data = '0;
assign lru_mask = '0;
end
endmodule
module plru_encoder #(
parameter NUM_WAYS = 1,
parameter WAY_IDX_BITS = $clog2(NUM_WAYS),
parameter WAY_IDX_WIDTH = `UP(WAY_IDX_BITS)
) (
input wire [`UP(NUM_WAYS-1)-1:0] lru_in,
output wire [WAY_IDX_WIDTH-1:0] way_idx
);
if (NUM_WAYS > 1) begin : g_enc
wire [WAY_IDX_BITS-1:0] tmp;
for (genvar i = 0; i < WAY_IDX_BITS; ++i) begin : g_i
if (i == 0) begin : g_i_0
assign tmp[WAY_IDX_WIDTH-1] = lru_in[0];
end else begin : g_i_n
VX_mux #(
.N (2**i)
) mux (
.data_in (lru_in[((2**i)-1)+:(2**i)]),
.sel_in (tmp[WAY_IDX_BITS-1-:i]),
.data_out (tmp[WAY_IDX_BITS-1-i])
);
end
end
assign way_idx = tmp;
end else begin : g_no_enc
`UNUSED_VAR (lru_in)
assign way_idx = '0;
end
endmodule
module VX_cache_repl #(
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 64,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 1,
// replacement policy
parameter REPL_POLICY = `CS_REPL_CYCLIC
) (
input wire clk,
input wire reset,
input wire stall,
input wire hit_valid,
input wire [`CS_LINE_SEL_BITS-1:0] hit_line,
input wire [`CS_WAY_SEL_WIDTH-1:0] hit_way,
input wire repl_valid,
input wire [`CS_LINE_SEL_BITS-1:0] repl_line,
output wire [`CS_WAY_SEL_WIDTH-1:0] repl_way
);
localparam WAY_SEL_WIDTH = `CS_WAY_SEL_WIDTH;
`UNUSED_VAR (stall)
if (NUM_WAYS > 1) begin : g_enable
if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru
// Pseudo Least Recently Used replacement policy
localparam LRU_WIDTH = `UP(NUM_WAYS-1);
wire [LRU_WIDTH-1:0] plru_rdata;
wire [LRU_WIDTH-1:0] plru_wdata;
wire [LRU_WIDTH-1:0] plru_wmask;
VX_dp_ram #(
.DATAW (LRU_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.WRENW (LRU_WIDTH),
.RDW_MODE ("R")
) plru_store (
.clk (clk),
.reset (reset),
.read (repl_valid),
.write (hit_valid),
.wren (plru_wmask),
.waddr (hit_line),
.raddr (repl_line),
.wdata (plru_wdata),
.rdata (plru_rdata)
);
plru_decoder #(
.NUM_WAYS (NUM_WAYS)
) plru_dec (
.way_idx (hit_way),
.lru_data (plru_wdata),
.lru_mask (plru_wmask)
);
plru_encoder #(
.NUM_WAYS (NUM_WAYS)
) plru_enc (
.lru_in (plru_rdata),
.way_idx (repl_way)
);
end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic
// Cyclic replacement policy
`UNUSED_VAR (hit_valid)
`UNUSED_VAR (hit_line)
`UNUSED_VAR (hit_way)
wire [WAY_SEL_WIDTH-1:0] ctr_rdata;
wire [WAY_SEL_WIDTH-1:0] ctr_wdata = ctr_rdata + 1;
VX_sp_ram #(
.DATAW (WAY_SEL_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.RDW_MODE ("R")
) ctr_store (
.clk (clk),
.reset (reset),
.read (repl_valid),
.write (repl_valid),
.wren (1'b1),
.addr (repl_line),
.wdata (ctr_wdata),
.rdata (ctr_rdata)
);
assign repl_way = ctr_rdata;
end else begin : g_random
// Random replacement policy
`UNUSED_VAR (hit_valid)
`UNUSED_VAR (hit_line)
`UNUSED_VAR (hit_way)
`UNUSED_VAR (repl_valid)
`UNUSED_VAR (repl_line)
reg [WAY_SEL_WIDTH-1:0] victim_idx;
always @(posedge clk) begin
if (reset) begin
victim_idx <= 0;
end else if (~stall) begin
victim_idx <= victim_idx + 1;
end
end
assign repl_way = victim_idx;
end
end else begin : g_disable
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (hit_valid)
`UNUSED_VAR (hit_line)
`UNUSED_VAR (hit_way)
`UNUSED_VAR (repl_valid)
`UNUSED_VAR (repl_line)
assign repl_way = 1'b0;
end
endmodule

View file

@ -14,8 +14,6 @@
`include "VX_cache_define.vh"
module VX_cache_tags #(
parameter `STRING INSTANCE_ID = "",
parameter BANK_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
@ -27,96 +25,61 @@ module VX_cache_tags #(
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
// Request debug identifier
parameter UUID_WIDTH = 0
parameter WRITEBACK = 0
) (
input wire clk,
input wire reset,
`IGNORE_UNUSED_BEGIN
input wire [`UP(UUID_WIDTH)-1:0] req_uuid,
`IGNORE_UNUSED_END
input wire stall,
// init/fill/lookup
// inputs
input wire init,
input wire flush,
input wire fill,
input wire read,
input wire write,
input wire lookup,
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
input wire [NUM_WAYS-1:0] way_sel,
output wire [NUM_WAYS-1:0] tag_matches,
input wire [`CS_LINE_SEL_BITS-1:0] line_idx,
input wire [`CS_TAG_SEL_BITS-1:0] line_tag,
input wire [`CS_WAY_SEL_WIDTH-1:0] evict_way,
// eviction
// outputs
output wire [NUM_WAYS-1:0] tag_matches,
output wire evict_dirty,
output wire [NUM_WAYS-1:0] evict_way,
output wire [`CS_TAG_SEL_BITS-1:0] evict_tag
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (BANK_ID)
`UNUSED_VAR (lookup)
// valid, dirty, tag
localparam TAG_WIDTH = 1 + WRITEBACK + `CS_TAG_SEL_BITS;
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr);
wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag;
wire [NUM_WAYS-1:0] read_valid;
wire [NUM_WAYS-1:0] read_dirty;
`UNUSED_VAR (read)
if (NUM_WAYS > 1) begin : g_evict_way
reg [NUM_WAYS-1:0] evict_way_r;
// cyclic assignment of replacement way
always @(posedge clk) begin
if (reset) begin
evict_way_r <= 1;
end else if (~stall) begin // holding the value on stalls prevents filling different slots twice
evict_way_r <= {evict_way_r[NUM_WAYS-2:0], evict_way_r[NUM_WAYS-1]};
end
end
assign evict_way = fill ? evict_way_r : way_sel;
VX_onehot_mux #(
.DATAW (`CS_TAG_SEL_BITS),
.N (NUM_WAYS)
) evict_tag_sel (
.data_in (read_tag),
.sel_in (evict_way),
.data_out (evict_tag)
);
end else begin : g_evict_way_0
`UNUSED_VAR (stall)
assign evict_way = 1'b1;
assign evict_tag = read_tag;
if (WRITEBACK) begin : g_evict_tag_wb
assign evict_dirty = read_dirty[evict_way];
assign evict_tag = read_tag[evict_way];
end else begin : g_evict_tag_wt
`UNUSED_VAR (read_dirty)
assign evict_dirty = 1'b0;
assign evict_tag = '0;
end
// fill and flush need to also read in writeback mode
wire fill_s = fill && (!WRITEBACK || ~stall);
wire flush_s = flush && (!WRITEBACK || ~stall);
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store
wire way_en = (NUM_WAYS == 1) || (evict_way == i);
wire do_init = init; // init all ways
wire do_fill = fill && way_en;
wire do_flush = flush && (!WRITEBACK || way_en); // flush the whole line in writethrough mode
wire do_write = WRITEBACK && write && tag_matches[i]; // only write on tag hit
wire do_fill = fill_s && evict_way[i];
wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode
wire do_write = WRITEBACK && write && tag_matches[i];
wire line_read = (WRITEBACK && (fill_s || flush_s));
wire line_write = init || do_fill || do_flush || do_write;
wire line_valid = ~(init || flush);
wire line_read = read || write || (WRITEBACK && (fill || flush));
wire line_write = do_init || do_fill || do_flush || do_write;
wire line_valid = fill || write;
wire [TAG_WIDTH-1:0] line_wdata;
wire [TAG_WIDTH-1:0] line_rdata;
if (WRITEBACK) begin : g_writeback
if (WRITEBACK) begin : g_wdata
assign line_wdata = {line_valid, write, line_tag};
assign {read_valid[i], read_dirty[i], read_tag[i]} = line_rdata;
end else begin : g_writethrough
end else begin : g_wdata
assign line_wdata = {line_valid, line_tag};
assign {read_valid[i], read_tag[i]} = line_rdata;
assign read_dirty[i] = 1'b0;
@ -125,15 +88,14 @@ module VX_cache_tags #(
VX_sp_ram #(
.DATAW (TAG_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.NO_RWCHECK (1),
.RW_ASSERT (1)
.RDW_MODE ("W")
) tag_store (
.clk (clk),
.reset (reset),
.read (line_read),
.write (line_write),
.wren (1'b1),
.addr (line_sel),
.addr (line_idx),
.wdata (line_wdata),
.rdata (line_rdata)
);
@ -143,36 +105,4 @@ module VX_cache_tags #(
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
end
assign evict_dirty = | (read_dirty & evict_way);
`ifdef DBG_TRACE_CACHE
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel};
always @(posedge clk) begin
if (fill && ~stall) begin
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)))
end
if (init) begin
`TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel))
end
if (flush && ~stall) begin
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty))
end
if (lookup && ~stall) begin
if (tag_matches != 0) begin
if (write) begin
`TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid))
end else begin
`TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid))
end
end else begin
if (write) begin
`TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid))
end else begin
`TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid))
end
end
end
end
`endif
endmodule

View file

@ -20,7 +20,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 16384,
parameter CACHE_SIZE = 65536,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 64,
// Number of banks
@ -28,37 +28,37 @@ module VX_cache_top import VX_gpu_pkg::*; #(
// Number of associative ways
parameter NUM_WAYS = 4,
// Size of a word in bytes
parameter WORD_SIZE = 4,
parameter WORD_SIZE = 16,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
parameter CRSQ_SIZE = 8,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 16,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
parameter MRSQ_SIZE = 8,
// Memory Request Queue Size
parameter MREQ_SIZE = 4,
parameter MREQ_SIZE = 8,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
parameter WRITEBACK = 1,
// Enable dirty bytes on writeback
parameter DIRTY_BYTES = 0,
parameter DIRTY_BYTES = 1,
// Request debug identifier
parameter UUID_WIDTH = 0,
// core request tag size
parameter TAG_WIDTH = 16,
parameter TAG_WIDTH = 32,
// Core response output buffer
parameter CORE_OUT_BUF = 2,
parameter CORE_OUT_BUF = 3,
// Memory request output buffer
parameter MEM_OUT_BUF = 2,
parameter MEM_OUT_BUF = 3,
parameter MEM_TAG_WIDTH = `CLOG2(MSHR_SIZE) + `CLOG2(NUM_BANKS)
) (

View file

@ -27,18 +27,18 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
// Size of line inside a bank in bytes
parameter LINE_SIZE = 64,
// Number of banks
parameter NUM_BANKS = 1,
parameter NUM_BANKS = 4,
// Number of associative ways
parameter NUM_WAYS = 1,
parameter NUM_WAYS = 4,
// Size of a word in bytes
parameter WORD_SIZE = 4,
parameter WORD_SIZE = 16,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
parameter CRSQ_SIZE = 4,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 8,
parameter MSHR_SIZE = 16,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
parameter MRSQ_SIZE = 4,
// Memory Request Queue Size
parameter MREQ_SIZE = 4,
@ -51,12 +51,18 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
// Enable dirty bytes on writeback
parameter DIRTY_BYTES = 0,
// Replacement policy
parameter REPL_POLICY = `CS_REPL_CYCLIC,
// Request debug identifier
parameter UUID_WIDTH = 0,
// core request tag size
parameter TAG_WIDTH = UUID_WIDTH + 1,
// core request flags
parameter FLAGS_WIDTH = 0,
// enable bypass for non-cacheable addresses
parameter NC_ENABLE = 0,
@ -64,10 +70,10 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
parameter PASSTHRU = 0,
// Core response output buffer
parameter CORE_OUT_BUF = 0,
parameter CORE_OUT_BUF = 3,
// Memory request output buffer
parameter MEM_OUT_BUF = 0
parameter MEM_OUT_BUF = 3
) (
input wire clk,
@ -166,15 +172,17 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK),
.DIRTY_BYTES (DIRTY_BYTES),
.REPL_POLICY (REPL_POLICY),
.CRSQ_SIZE (CRSQ_SIZE),
.MSHR_SIZE (MSHR_SIZE),
.MRSQ_SIZE (MRSQ_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK),
.DIRTY_BYTES (DIRTY_BYTES),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.FLAGS_WIDTH (FLAGS_WIDTH),
.CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF),
.MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF)
) cache (
@ -232,13 +240,13 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
always @(posedge clk) begin
if (core_req_fire) begin
if (core_bus_if[i].req_data.rw) begin
`TRACE(1, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid))
`TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid))
end else begin
`TRACE(1, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid))
`TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid))
end
end
if (core_rsp_fire) begin
`TRACE(1, ("%t: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid))
`TRACE(2, ("%t: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid))
end
end
end
@ -260,15 +268,15 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
always @(posedge clk) begin
if (mem_req_fire) begin
if (mem_bus_if.req_data.rw) begin
`TRACE(1, ("%t: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n",
`TRACE(2, ("%t: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n",
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid))
end else begin
`TRACE(1, ("%t: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
`TRACE(2, ("%t: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid))
end
end
if (mem_rsp_fire) begin
`TRACE(1, ("%t: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n",
`TRACE(2, ("%t: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n",
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid))
end
end

View file

@ -194,7 +194,7 @@ module VX_alu_int #(
`ifdef DBG_TRACE_PIPELINE
always @(posedge clk) begin
if (br_enable) begin
`TRACE(1, ("%t: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
`TRACE(2, ("%t: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
$time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid))
end
end

View file

@ -89,7 +89,7 @@ module VX_alu_unit #(
);
VX_alu_int #(
.INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)),
.INSTANCE_ID (`SFORMATF(("%s-int%0d", INSTANCE_ID, block_idx))),
.BLOCK_IDX (block_idx),
.NUM_LANES (NUM_LANES)
) alu_int (
@ -102,7 +102,7 @@ module VX_alu_unit #(
`ifdef EXT_M_ENABLE
VX_alu_muldiv #(
.INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)),
.INSTANCE_ID (`SFORMATF(("%s-muldiv%0d", INSTANCE_ID, block_idx))),
.NUM_LANES (NUM_LANES)
) muldiv_unit (
.clk (clk),

View file

@ -87,7 +87,7 @@ module VX_core import VX_gpu_pkg::*; #(
`SCOPE_IO_SWITCH (3);
VX_schedule #(
.INSTANCE_ID ($sformatf("%s-schedule", INSTANCE_ID)),
.INSTANCE_ID (`SFORMATF(("%s-schedule", INSTANCE_ID))),
.CORE_ID (CORE_ID)
) schedule (
.clk (clk),
@ -115,7 +115,7 @@ module VX_core import VX_gpu_pkg::*; #(
);
VX_fetch #(
.INSTANCE_ID ($sformatf("%s-fetch", INSTANCE_ID))
.INSTANCE_ID (`SFORMATF(("%s-fetch", INSTANCE_ID)))
) fetch (
`SCOPE_IO_BIND (0)
.clk (clk),
@ -126,7 +126,7 @@ module VX_core import VX_gpu_pkg::*; #(
);
VX_decode #(
.INSTANCE_ID ($sformatf("%s-decode", INSTANCE_ID))
.INSTANCE_ID (`SFORMATF(("%s-decode", INSTANCE_ID)))
) decode (
.clk (clk),
.reset (reset),
@ -136,7 +136,7 @@ module VX_core import VX_gpu_pkg::*; #(
);
VX_issue #(
.INSTANCE_ID ($sformatf("%s-issue", INSTANCE_ID))
.INSTANCE_ID (`SFORMATF(("%s-issue", INSTANCE_ID)))
) issue (
`SCOPE_IO_BIND (1)
@ -153,7 +153,7 @@ module VX_core import VX_gpu_pkg::*; #(
);
VX_execute #(
.INSTANCE_ID ($sformatf("%s-execute", INSTANCE_ID)),
.INSTANCE_ID (`SFORMATF(("%s-execute", INSTANCE_ID))),
.CORE_ID (CORE_ID)
) execute (
`SCOPE_IO_BIND (2)
@ -181,7 +181,7 @@ module VX_core import VX_gpu_pkg::*; #(
);
VX_commit #(
.INSTANCE_ID ($sformatf("%s-commit", INSTANCE_ID))
.INSTANCE_ID (`SFORMATF(("%s-commit", INSTANCE_ID)))
) commit (
.clk (clk),
.reset (reset),

View file

@ -144,7 +144,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
`endif
VX_core #(
.INSTANCE_ID ($sformatf("core")),
.INSTANCE_ID (`SFORMATF(("core"))),
.CORE_ID (CORE_ID)
) core (
`SCOPE_IO_BIND (0)

View file

@ -50,9 +50,9 @@ module VX_dcr_data import VX_gpu_pkg::*; (
`ifdef DBG_TRACE_PIPELINE
always @(posedge clk) begin
if (dcr_bus_if.write_valid) begin
`TRACE(1, ("%t: base-dcr: state=", $time))
`TRACE(2, ("%t: base-dcr: state=", $time))
trace_base_dcr(1, dcr_bus_if.write_addr);
`TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data))
`TRACE(2, (", data=0x%h\n", dcr_bus_if.write_data))
end
end
`endif

View file

@ -52,7 +52,7 @@ module VX_execute import VX_gpu_pkg::*; #(
`endif
VX_alu_unit #(
.INSTANCE_ID ($sformatf("%s-alu", INSTANCE_ID))
.INSTANCE_ID (`SFORMATF(("%s-alu", INSTANCE_ID)))
) alu_unit (
.clk (clk),
.reset (reset),
@ -64,7 +64,7 @@ module VX_execute import VX_gpu_pkg::*; #(
`SCOPE_IO_SWITCH (1);
VX_lsu_unit #(
.INSTANCE_ID ($sformatf("%s-lsu", INSTANCE_ID))
.INSTANCE_ID (`SFORMATF(("%s-lsu", INSTANCE_ID)))
) lsu_unit (
`SCOPE_IO_BIND (0)
.clk (clk),
@ -76,7 +76,7 @@ module VX_execute import VX_gpu_pkg::*; #(
`ifdef EXT_F_ENABLE
VX_fpu_unit #(
.INSTANCE_ID ($sformatf("%s-fpu", INSTANCE_ID))
.INSTANCE_ID (`SFORMATF(("%s-fpu", INSTANCE_ID)))
) fpu_unit (
.clk (clk),
.reset (reset),
@ -87,7 +87,7 @@ module VX_execute import VX_gpu_pkg::*; #(
`endif
VX_sfu_unit #(
.INSTANCE_ID ($sformatf("%s-sfu", INSTANCE_ID)),
.INSTANCE_ID (`SFORMATF(("%s-sfu", INSTANCE_ID))),
.CORE_ID (CORE_ID)
) sfu_unit (
.clk (clk),

View file

@ -51,9 +51,9 @@ module VX_fetch import VX_gpu_pkg::*; #(
wire [`NUM_THREADS-1:0] rsp_tmask;
VX_dp_ram #(
.DATAW (`PC_BITS + `NUM_THREADS),
.SIZE (`NUM_WARPS),
.LUTRAM (1)
.DATAW (`PC_BITS + `NUM_THREADS),
.SIZE (`NUM_WARPS),
.RDW_MODE ("R")
) tag_store (
.clk (clk),
.reset (reset),
@ -166,7 +166,9 @@ module VX_fetch import VX_gpu_pkg::*; #(
`SCOPE_IO_UNUSED(0)
`endif
`endif
`ifdef CHIPSCOPE
`ifdef DBG_SCOPE_FETCH
ila_fetch ila_fetch_inst (
.clk (clk),
.probe0 ({schedule_if.valid, schedule_if.data, schedule_if.ready}),
@ -174,6 +176,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
.probe2 ({icache_bus_if.rsp_valid, icache_bus_if.rsp_data, icache_bus_if.rsp_ready})
);
`endif
`endif
`ifdef DBG_TRACE_MEM
always @(posedge clk) begin

View file

@ -39,7 +39,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (`IBUF_SIZE),
.OUT_REG (2) // 2-cycle EB for area reduction
.OUT_REG (1)
) instr_buf (
.clk (clk),
.reset (reset),

View file

@ -16,7 +16,6 @@
module VX_ipdom_stack #(
parameter WIDTH = 1,
parameter DEPTH = 1,
parameter OUT_REG = 0,
parameter ADDRW = `LOG2UP(DEPTH)
) (
input wire clk,
@ -31,76 +30,63 @@ module VX_ipdom_stack #(
output wire empty,
output wire full
);
reg slot_set [DEPTH-1:0];
reg [ADDRW-1:0] rd_ptr, wr_ptr;
reg [ADDRW-1:0] rd_ptr, rd_ptr_n, wr_ptr;
reg empty_r, full_r;
wire [WIDTH-1:0] d0, d1;
wire d_set_n = slot_set[rd_ptr];
wire d_set_r;
always @(*) begin
rd_ptr_n = rd_ptr;
if (push) begin
rd_ptr_n = wr_ptr;
end else if (pop) begin
rd_ptr_n = rd_ptr - ADDRW'(d_set_r);
end
end
always @(posedge clk) begin
if (reset) begin
rd_ptr <= '0;
wr_ptr <= '0;
empty_r <= 1;
full_r <= 0;
rd_ptr <= '0;
end else begin
`ASSERT(~push || ~full, ("%t: runtime error: writing to a full stack!", $time));
`ASSERT(~pop || ~empty, ("%t: runtime error: reading an empty stack!", $time));
`ASSERT(~push || ~pop, ("%t: runtime error: push and pop in same cycle not supported!", $time));
if (push) begin
rd_ptr <= wr_ptr;
wr_ptr <= wr_ptr + ADDRW'(1);
empty_r <= 0;
full_r <= (ADDRW'(DEPTH-1) == wr_ptr);
end else if (pop) begin
wr_ptr <= wr_ptr - ADDRW'(d_set_n);
rd_ptr <= rd_ptr - ADDRW'(d_set_n);
empty_r <= (rd_ptr == 0) && (d_set_n == 1);
wr_ptr <= wr_ptr - ADDRW'(d_set_r);
empty_r <= (rd_ptr == 0) && d_set_r;
full_r <= 0;
end
rd_ptr <= rd_ptr_n;
end
end
wire [WIDTH * 2:0] qout = push ? {1'b0, q1, q0} : {1'b1, d1, d0};
VX_dp_ram #(
.DATAW (WIDTH * 2),
.SIZE (DEPTH),
.OUT_REG (OUT_REG ? 1 : 0),
.LUTRAM (OUT_REG ? 0 : 1)
) store (
.DATAW (1 + WIDTH * 2),
.SIZE (DEPTH),
.OUT_REG (1),
.RDW_MODE ("R")
) ipdom_store (
.clk (clk),
.reset (reset),
.read (1'b1),
.write (push),
.write (push || pop),
.wren (1'b1),
.waddr (wr_ptr),
.wdata ({q1, q0}),
.raddr (rd_ptr),
.rdata ({d1, d0})
);
always @(posedge clk) begin
if (push) begin
slot_set[wr_ptr] <= 0;
end else if (pop) begin
slot_set[rd_ptr] <= 1;
end
end
wire d_set_r;
VX_pipe_register #(
.DATAW (1),
.DEPTH (OUT_REG)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (1'b1),
.data_in (d_set_n),
.data_out (d_set_r)
.waddr (push ? wr_ptr : rd_ptr),
.wdata (qout),
.raddr (rd_ptr_n),
.rdata ({d_set_r, d1, d0})
);
assign d = d_set_r ? d0 : d1;

View file

@ -52,7 +52,7 @@ module VX_issue import VX_gpu_pkg::*; #(
`SCOPE_IO_SWITCH (`ISSUE_WIDTH);
for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : g_issue_slices
for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : g_slices
VX_decode_if #(
.NUM_WARPS (PER_ISSUE_WARPS)
) per_issue_decode_if();
@ -78,7 +78,7 @@ module VX_issue import VX_gpu_pkg::*; #(
`endif
VX_issue_slice #(
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, issue_id)),
.INSTANCE_ID (`SFORMATF(("%s%0d", INSTANCE_ID, issue_id))),
.ISSUE_ID (issue_id)
) issue_slice (
`SCOPE_IO_BIND(issue_id)

View file

@ -37,7 +37,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
VX_operands_if operands_if();
VX_ibuffer #(
.INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID))
.INSTANCE_ID (`SFORMATF(("%s-ibuffer", INSTANCE_ID)))
) ibuffer (
.clk (clk),
.reset (reset),
@ -49,7 +49,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
);
VX_scoreboard #(
.INSTANCE_ID ($sformatf("%s-scoreboard", INSTANCE_ID))
.INSTANCE_ID (`SFORMATF(("%s-scoreboard", INSTANCE_ID)))
) scoreboard (
.clk (clk),
.reset (reset),
@ -64,7 +64,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
);
VX_operands #(
.INSTANCE_ID ($sformatf("%s-operands", INSTANCE_ID))
.INSTANCE_ID (`SFORMATF(("%s-operands", INSTANCE_ID)))
) operands (
.clk (clk),
.reset (reset),
@ -77,7 +77,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
);
VX_dispatch #(
.INSTANCE_ID ($sformatf("%s-dispatch", INSTANCE_ID))
.INSTANCE_ID (`SFORMATF(("%s-dispatch", INSTANCE_ID)))
) dispatch (
.clk (clk),
.reset (reset),
@ -143,7 +143,9 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
`SCOPE_IO_UNUSED(0)
`endif
`endif
`ifdef CHIPSCOPE
`ifdef DBG_SCOPE_ISSUE
ila_issue ila_issue_inst (
.clk (clk),
.probe0 ({decode_if.valid, decode_if.data, decode_if.ready}),
@ -152,6 +154,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
.probe3 ({writeback_if.valid, writeback_if.data})
);
`endif
`endif
`ifdef DBG_TRACE_PIPELINE
always @(posedge clk) begin

View file

@ -310,7 +310,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
wire lsu_mem_rsp_ready;
VX_mem_scheduler #(
.INSTANCE_ID ($sformatf("%s-memsched", INSTANCE_ID)),
.INSTANCE_ID (`SFORMATF(("%s-memsched", INSTANCE_ID))),
.CORE_REQS (NUM_LANES),
.MEM_CHANNELS(NUM_LANES),
.WORD_SIZE (LSU_WORD_SIZE),
@ -504,30 +504,30 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
`ifdef DBG_TRACE_MEM
always @(posedge clk) begin
if (execute_if.valid && fence_lock) begin
`TRACE(1, ("%t: *** %s fence wait\n", $time, INSTANCE_ID))
`TRACE(2, ("%t: *** %s fence wait\n", $time, INSTANCE_ID))
end
if (mem_req_fire) begin
if (mem_req_rw) begin
`TRACE(1, ("%t: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask))
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES)
`TRACE(1, (", flags="))
`TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES)
`TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen))
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES)
`TRACE(1, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid))
`TRACE(2, ("%t: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask))
`TRACE_ARRAY1D(2, "0x%h", full_addr, NUM_LANES)
`TRACE(2, (", flags="))
`TRACE_ARRAY1D(2, "%b", mem_req_flags, NUM_LANES)
`TRACE(2, (", byteen=0x%0h, data=", mem_req_byteen))
`TRACE_ARRAY1D(2, "0x%0h", mem_req_data, NUM_LANES)
`TRACE(2, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid))
end else begin
`TRACE(1, ("%t: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask))
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES)
`TRACE(1, (", flags="))
`TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES)
`TRACE(1, (", byteen=0x%0h, rd=%0d, sop=%b, eop=%b, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid))
`TRACE(2, ("%t: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask))
`TRACE_ARRAY1D(2, "0x%h", full_addr, NUM_LANES)
`TRACE(2, (", flags="))
`TRACE_ARRAY1D(2, "%b", mem_req_flags, NUM_LANES)
`TRACE(2, (", byteen=0x%0h, rd=%0d, sop=%b, eop=%b, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid))
end
end
if (mem_rsp_fire) begin
`TRACE(1, ("%t: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
`TRACE(2, ("%t: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
$time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop))
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES)
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid))
`TRACE_ARRAY1D(2, "0x%0h", mem_rsp_data, NUM_LANES)
`TRACE(2, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid))
end
end
`endif
@ -561,7 +561,9 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
`SCOPE_IO_UNUSED(0)
`endif
`endif
`ifdef CHIPSCOPE
`ifdef DBG_SCOPE_LSU
ila_lsu ila_lsu_inst (
.clk (clk),
.probe0 ({execute_if.valid, execute_if.data, execute_if.ready}),
@ -569,5 +571,6 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
.probe2 ({lsu_mem_if.rsp_valid, lsu_mem_if.rsp_data, lsu_mem_if.rsp_ready})
);
`endif
`endif
endmodule

View file

@ -52,9 +52,9 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
.NUM_LANES (NUM_LANES)
) per_block_commit_if[BLOCK_SIZE]();
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_lsus
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_slices
VX_lsu_slice #(
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx))
.INSTANCE_ID (`SFORMATF(("%s%0d", INSTANCE_ID, block_idx)))
) lsu_slice(
`SCOPE_IO_BIND (block_idx)
.clk (clk),

View file

@ -92,7 +92,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #(
end
VX_local_mem #(
.INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)),
.INSTANCE_ID(`SFORMATF(("%s-lmem", INSTANCE_ID))),
.SIZE (1 << `LMEM_LOG_SIZE),
.NUM_REQS (LSU_NUM_REQS),
.NUM_BANKS (`LMEM_NUM_BANKS),
@ -127,11 +127,11 @@ module VX_mem_unit import VX_gpu_pkg::*; #(
.TAG_WIDTH (DCACHE_TAG_WIDTH)
) dcache_coalesced_if[`NUM_LSU_BLOCKS]();
if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : g_enabled
if ((`NUM_LSU_LANES > 1) && (LSU_WORD_SIZE != DCACHE_WORD_SIZE)) begin : g_enabled
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_coalescers
VX_mem_coalescer #(
.INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)),
.INSTANCE_ID (`SFORMATF(("%s-coalescer%0d", INSTANCE_ID, i))),
.NUM_REQS (`NUM_LSU_LANES),
.DATA_IN_SIZE (LSU_WORD_SIZE),
.DATA_OUT_SIZE (DCACHE_WORD_SIZE),

View file

@ -178,14 +178,14 @@ module VX_operands import VX_gpu_pkg::*; #(
wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1;
VX_pipe_buffer #(
.DATAW (NUM_BANKS + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH)
.DATAW (NUM_BANKS * (1 + REQ_SEL_WIDTH) + META_DATAW)
) pipe_reg2 (
.clk (clk),
.reset (reset),
.valid_in (pipe_valid2_st1),
.ready_in (pipe_ready_st1),
.data_in ({gpr_rd_valid_st1, pipe_data_st1, gpr_rd_req_idx_st1}),
.data_out ({gpr_rd_valid_st2, pipe_data_st2, gpr_rd_req_idx_st2}),
.data_in ({gpr_rd_valid_st1, gpr_rd_req_idx_st1, pipe_data_st1}),
.data_out ({gpr_rd_valid_st2, gpr_rd_req_idx_st2, pipe_data_st2}),
.valid_out(pipe_valid_st2),
.ready_out(pipe_ready_st2)
);
@ -266,13 +266,12 @@ module VX_operands import VX_gpu_pkg::*; #(
VX_dp_ram #(
.DATAW (REGS_DATAW),
.SIZE (PER_BANK_REGS * PER_ISSUE_WARPS),
.OUT_REG (1),
.READ_ENABLE (1),
.WRENW (BYTEENW),
`ifdef GPR_RESET
.RESET_RAM (1),
`endif
.NO_RWCHECK (1)
.OUT_REG (1),
.RDW_MODE ("U")
) gpr_ram (
.clk (clk),
.reset (reset),

View file

@ -290,7 +290,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
// split/join handling
VX_split_join #(
.INSTANCE_ID ($sformatf("%s-splitjoin", INSTANCE_ID))
.INSTANCE_ID (`SFORMATF(("%s-splitjoin", INSTANCE_ID)))
) split_join (
.clk (clk),
.reset (reset),
@ -388,7 +388,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
wire no_pending_instr = (& pending_warp_empty);
`BUFFER_EX(busy, (active_warps != 0 || ~no_pending_instr), 1'b1, 1);
`BUFFER_EX(busy, (active_warps != 0 || ~no_pending_instr), 1'b1, 1, 1);
// export CSRs
assign sched_csr_if.cycles = cycles;

View file

@ -62,8 +62,8 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
.data_out (perf_sfu_per_cycle)
);
`BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
`BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
`BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, 0, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
`BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, 0, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT));
wire [PER_ISSUE_WARPS-1:0] stg_valid_in;
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_stg_valid_in
@ -206,7 +206,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
end else begin
if (staging_if[w].valid && ~staging_if[w].ready) begin
`ifdef DBG_TRACE_PIPELINE
`TRACE(3, ("%t: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
`TRACE(4, ("%t: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
$time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr,
operands_busy, staging_if[w].data.uuid))
`endif

View file

@ -99,7 +99,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
);
VX_wctl_unit #(
.INSTANCE_ID ($sformatf("%s-wctl", INSTANCE_ID)),
.INSTANCE_ID (`SFORMATF(("%s-wctl", INSTANCE_ID))),
.NUM_LANES (NUM_LANES)
) wctl_unit (
.clk (clk),
@ -110,7 +110,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
);
VX_csr_unit #(
.INSTANCE_ID ($sformatf("%s-csr", INSTANCE_ID)),
.INSTANCE_ID (`SFORMATF(("%s-csr", INSTANCE_ID))),
.CORE_ID (CORE_ID),
.NUM_LANES (NUM_LANES)
) csr_unit (

View file

@ -48,8 +48,7 @@ module VX_split_join import VX_gpu_pkg::*; #(
for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_ipdom_stacks
VX_ipdom_stack #(
.WIDTH (`NUM_THREADS+`PC_BITS),
.DEPTH (`DV_STACK_SIZE),
.OUT_REG (0)
.DEPTH (`DV_STACK_SIZE)
) ipdom_stack (
.clk (clk),
.reset (reset),

View file

@ -0,0 +1,158 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_platform.vh"
`define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \
if (wren[i]) begin \
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \
end \
end
`define RAM_INITIALIZATION \
if (INIT_ENABLE != 0) begin : g_init \
if (INIT_FILE != "") begin : g_file \
initial $readmemh(INIT_FILE, ram); \
end else begin : g_value \
initial begin \
for (integer i = 0; i < SIZE; ++i) begin : g_i \
ram[i] = INIT_VALUE; \
end \
end \
end \
end
`define RAM_BYPASS(__d) \
reg [DATAW-1:0] bypass_data_r; \
reg bypass_valid_r; \
always @(posedge clk) begin \
bypass_valid_r <= read_s && write && (raddr_s == waddr); \
bypass_data_r <= wdata; \
end \
assign __d = bypass_valid_r ? bypass_data_r : rdata_r
`TRACING_OFF
module VX_async_ram_patch #(
parameter DATAW = 1,
parameter SIZE = 1,
parameter WRENW = 1,
parameter DUAL_PORT = 0,
parameter INIT_ENABLE = 0,
parameter INIT_FILE = "",
parameter [DATAW-1:0] INIT_VALUE = 0,
parameter ADDRW = `LOG2UP(SIZE)
) (
input wire clk,
input wire reset,
input wire read,
input wire write,
input wire [WRENW-1:0] wren,
input wire [ADDRW-1:0] waddr,
input wire [DATAW-1:0] wdata,
input wire [ADDRW-1:0] raddr,
output wire [DATAW-1:0] rdata
);
localparam WSELW = DATAW / WRENW;
`UNUSED_VAR (reset)
(* keep = "true" *) wire [ADDRW-1:0] raddr_w, raddr_s;
(* keep = "true" *) wire read_s, is_raddr_reg;
assign raddr_w = raddr;
VX_placeholder #(
.I (ADDRW),
.O (ADDRW + 1 + 1)
) placeholder (
.in (raddr_w),
.out ({raddr_s, read_s, is_raddr_reg})
);
// synchroneous ram
wire [DATAW-1:0] rdata_s;
if (WRENW != 1) begin : g_wren_sync_ram
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
reg [DATAW-1:0] rdata_r;
`RAM_INITIALIZATION
always @(posedge clk) begin
if (read_s || write) begin
if (write) begin
`RAM_WRITE_WREN
end
rdata_r <= ram[raddr_s];
end
end
`RAM_BYPASS(rdata_s);
end else begin : g_no_wren_sync_ram
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
reg [DATAW-1:0] rdata_r;
`RAM_INITIALIZATION
`UNUSED_VAR (wren)
always @(posedge clk) begin
if (read_s || write) begin
if (write) begin
ram[waddr] <= wdata;
end
rdata_r <= ram[raddr_s];
end
end
`RAM_BYPASS(rdata_s);
end
// asynchronous ram (fallback)
wire [DATAW-1:0] rdata_a;
if (DUAL_PORT != 0) begin : g_dp_async_ram
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
if (WRENW != 1) begin : g_wren
always @(posedge clk) begin
if (write) begin
`RAM_WRITE_WREN
end
end
end else begin : g_no_wren
always @(posedge clk) begin
if (write) begin
ram[waddr] <= wdata;
end
end
end
assign rdata_a = ram[raddr];
end else begin : g_sp_async_ram
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
if (WRENW != 1) begin : g_wren
always @(posedge clk) begin
if (write) begin
`RAM_WRITE_WREN
end
end
end else begin : g_no_wren
always @(posedge clk) begin
if (write) begin
ram[waddr] <= wdata;
end
end
end
assign rdata_a = ram[waddr];
end
assign rdata = is_raddr_reg ? rdata_s : rdata_a;
endmodule
`TRACING_ON

View file

@ -135,7 +135,7 @@ module VX_axi_adapter #(
);
end
wire tbuf_full;
wire mem_req_tag_ready;
wire [TAG_WIDTH_OUT-1:0] mem_req_tag_out;
wire [TAG_WIDTH_OUT-1:0] mem_rsp_tag_out;
@ -143,13 +143,14 @@ module VX_axi_adapter #(
if (TAG_WIDTH_IN > TAG_WIDTH_OUT) begin : g_tag_buf
localparam TBUF_ADDRW = `CLOG2(TAG_BUFFER_SIZE);
wire [TBUF_ADDRW-1:0] tbuf_waddr, tbuf_raddr;
wire tbuf_full;
VX_index_buffer #(
.DATAW (TAG_WIDTH_IN),
.SIZE (TAG_BUFFER_SIZE)
) tag_buf (
.clk (clk),
.reset (reset),
.acquire_en (mem_req_valid && !mem_req_rw && mem_req_ready),
.acquire_en (mem_req_valid && ~mem_req_rw && mem_req_ready),
.write_addr (tbuf_waddr),
.write_data (mem_req_tag),
.read_data (mem_rsp_tag),
@ -158,22 +159,24 @@ module VX_axi_adapter #(
.full (tbuf_full),
`UNUSED_PIN (empty)
);
assign mem_req_tag_ready = mem_req_rw || ~tbuf_full;
assign mem_req_tag_out = TAG_WIDTH_OUT'(tbuf_waddr);
assign tbuf_raddr = mem_rsp_tag_out[TBUF_ADDRW-1:0];
`UNUSED_VAR (mem_rsp_tag_out)
end else begin : g_no_tag_buf
assign tbuf_full = 0;
assign mem_req_tag_ready = 1;
assign mem_req_tag_out = TAG_WIDTH_OUT'(mem_req_tag);
assign mem_rsp_tag = mem_rsp_tag_out[TAG_WIDTH_IN-1:0];
`UNUSED_VAR (mem_rsp_tag_out)
end
// request ack
assign mem_req_ready = (mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]) && ~tbuf_full;
assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] :
(m_axi_arready[req_bank_sel] && mem_req_tag_ready);
// AXI write request address channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr
assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~tbuf_full && ~m_axi_aw_ack[i];
assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i];
assign m_axi_awaddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8);
assign m_axi_awid[i] = mem_req_tag_out;
assign m_axi_awlen[i] = 8'b00000000;
@ -188,7 +191,7 @@ module VX_axi_adapter #(
// AXI write request data channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_data
assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~tbuf_full && ~m_axi_w_ack[i];
assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_w_ack[i];
assign m_axi_wdata[i] = mem_req_data;
assign m_axi_wstrb[i] = mem_req_byteen;
assign m_axi_wlast[i] = 1'b1;
@ -205,7 +208,7 @@ module VX_axi_adapter #(
// AXI read request channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req
assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i) && ~tbuf_full;
assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i) && mem_req_tag_ready;
assign m_axi_araddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8);
assign m_axi_arid[i] = mem_req_tag_out;
assign m_axi_arlen[i] = 8'b00000000;
@ -228,9 +231,8 @@ module VX_axi_adapter #(
assign rsp_arb_valid_in[i] = m_axi_rvalid[i];
assign rsp_arb_data_in[i] = {m_axi_rdata[i], m_axi_rid[i]};
assign m_axi_rready[i] = rsp_arb_ready_in[i];
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time))
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time))
`UNUSED_VAR (m_axi_rlast[i])
`RUNTIME_ASSERT(~(m_axi_rvalid[i] && m_axi_rlast[i] == 0), ("%t: *** AXI response error", $time))
`RUNTIME_ASSERT(~(m_axi_rvalid[i] && m_axi_rresp[i] != 0), ("%t: *** AXI response error", $time))
end
VX_stream_arb #(

View file

@ -65,12 +65,12 @@ module VX_cyclic_arbiter #(
.valid_out (grant_valid)
);
VX_decoder #(
VX_demux #(
.N (LOG_NUM_REQS),
.D (NUM_REQS)
) grant_decoder (
.data_in (grant_index),
.valid_in (1'b1),
.sel_in (grant_index),
.data_in (1'b1),
.data_out (grant_onehot_w)
);

View file

@ -17,26 +17,31 @@
// Adapted from BaseJump STL: http://bjump.org/data_out.html
`TRACING_OFF
module VX_decoder #(
parameter N = 1,
module VX_demux #(
parameter N = 0,
parameter M = 1,
parameter MODEL = 0,
parameter D = 1 << N
) (
input wire [N-1:0] data_in,
input wire [M-1:0] valid_in,
input wire [`UP(N)-1:0] sel_in,
input wire [M-1:0] data_in,
output wire [D-1:0][M-1:0] data_out
);
logic [D-1:0][M-1:0] shift;
if (MODEL == 1) begin : g_model1
always @(*) begin
shift = '0;
shift[data_in] = {M{1'b1}};
if (N != 0) begin : g_decoder
logic [D-1:0][M-1:0] shift;
if (MODEL == 1) begin : g_model1
always @(*) begin
shift = '0;
shift[sel_in] = {M{1'b1}};
end
end else begin : g_model0
assign shift = ((D*M)'({M{1'b1}})) << (sel_in * M);
end
end else begin : g_model0
assign shift = ((D*M)'({M{1'b1}})) << (data_in * M);
assign data_out = {D{data_in}} & shift;
end else begin : g_passthru
`UNUSED_VAR (sel_in)
assign data_out = data_in;
end
assign data_out = {D{valid_in}} & shift;
endmodule
`TRACING_ON

View file

@ -13,6 +13,35 @@
`include "VX_platform.vh"
`define RAM_INITIALIZATION \
if (INIT_ENABLE != 0) begin : g_init \
if (INIT_FILE != "") begin : g_file \
initial $readmemh(INIT_FILE, ram); \
end else begin : g_value \
initial begin \
for (integer i = 0; i < SIZE; ++i) begin : g_i \
ram[i] = INIT_VALUE; \
end \
end \
end \
end
`ifdef QUARTUS
`define RAM_ARRAY_WREN reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
`define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \
if (wren[i]) begin \
ram[waddr][i] <= wdata[i * WSELW +: WSELW]; \
end \
end
`else
`define RAM_ARRAY_WREN reg [DATAW-1:0] ram [0:SIZE-1];
`define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \
if (wren[i]) begin \
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \
end \
end
`endif
`TRACING_OFF
module VX_dp_ram #(
parameter DATAW = 1,
@ -20,11 +49,9 @@ module VX_dp_ram #(
parameter WRENW = 1,
parameter OUT_REG = 0,
parameter LUTRAM = 0,
parameter NO_RWCHECK = 0,
parameter RW_ASSERT = 0,
parameter `STRING RDW_MODE = "W", // W: write-first, R: read-first, U: undefined
parameter RDW_ASSERT = 0,
parameter RESET_RAM = 0,
parameter RESET_OUT = 0,
parameter READ_ENABLE = 0,
parameter INIT_ENABLE = 0,
parameter INIT_FILE = "",
parameter [DATAW-1:0] INIT_VALUE = 0,
@ -41,284 +68,348 @@ module VX_dp_ram #(
output wire [DATAW-1:0] rdata
);
localparam WSELW = DATAW / WRENW;
`STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter"))
`UNUSED_PARAM (LUTRAM)
`define RAM_INITIALIZATION \
if (INIT_ENABLE != 0) begin : g_init \
if (INIT_FILE != "") begin : g_file \
initial $readmemh(INIT_FILE, ram); \
end else begin : g_value \
initial begin \
for (integer i = 0; i < SIZE; ++i) \
ram[i] = INIT_VALUE; \
end \
end \
end
`STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter"))
`STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W" || RDW_MODE == "U"), ("invalid parameter"))
`UNUSED_PARAM (RDW_ASSERT)
`UNUSED_PARAM (RW_ASSERT)
`UNUSED_VAR (read)
`RUNTIME_ASSERT((((WRENW == 1) ) || ~write) || (| wren), ("%t: invalid write enable mask", $time))
if (OUT_REG && !READ_ENABLE) begin : g_out_reg
`UNUSED_PARAM (NO_RWCHECK)
reg [DATAW-1:0] rdata_r;
wire cs = read || write;
if (WRENW != 1) begin : g_writeen
`ifdef QUARTUS
if (LUTRAM != 0) begin : g_lutram
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (cs) begin
if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i])
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
`ifdef SYNTHESIS
localparam FORCE_BRAM = !LUTRAM && (SIZE * DATAW >= `MAX_LUTRAM);
if (OUT_REG) begin : g_sync
if (FORCE_BRAM) begin : g_bram
if (RDW_MODE == "W") begin : g_write_first
if (WRENW != 1) begin : g_wren
(* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [ADDRW-1:0] raddr_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
`RAM_WRITE_WREN
end
raddr_r <= raddr;
end
if (RESET_OUT && reset) begin
rdata_r <= '0;
end else begin
end
assign rdata = ram[raddr_r];
end else begin : g_no_wren
(* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [ADDRW-1:0] raddr_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
ram[waddr] <= wdata;
end
raddr_r <= raddr;
end
end
assign rdata = ram[raddr_r];
end
end else if (RDW_MODE == "R") begin : g_read_first
if (WRENW != 1) begin : g_wren
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
`RAM_WRITE_WREN
end
rdata_r <= ram[raddr];
end
end
end
end else begin : g_no_lutram
reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (cs) begin
if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i])
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
assign rdata = rdata_r;
end else begin : g_no_wren
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
ram[waddr] <= wdata;
end
end
if (RESET_OUT && reset) begin
rdata_r <= '0;
end else begin
rdata_r <= ram[raddr];
end
end
assign rdata = rdata_r;
end
end
`else
// default synthesis
if (LUTRAM != 0) begin : g_lutram
`USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (cs) begin
end else begin : g_undefined
if (WRENW != 1) begin : g_wren
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i])
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
end
`RAM_WRITE_WREN
end
if (RESET_OUT && reset) begin
rdata_r <= '0;
end else begin
if (read) begin
rdata_r <= ram[raddr];
end
end
end
end else begin : g_no_lutram
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (cs) begin
assign rdata = rdata_r;
end else begin : g_no_wren
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i])
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
end
end
if (RESET_OUT && reset) begin
rdata_r <= '0;
end else begin
rdata_r <= ram[raddr];
end
end
end
end
`endif
end else begin : g_no_writeen
if (LUTRAM != 0) begin : g_lutram
`USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (cs) begin
if (write)
ram[waddr] <= wdata;
if (RESET_OUT && reset) begin
rdata_r <= '0;
end else begin
end
if (read) begin
rdata_r <= ram[raddr];
end
end
assign rdata = rdata_r;
end
end else begin : g_no_lutram
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (cs) begin
if (write)
ram[waddr] <= wdata;
if (RESET_OUT && reset) begin
rdata_r <= '0;
end else begin
end
end else begin : g_auto
if (RDW_MODE == "W") begin : g_write_first
if (WRENW != 1) begin : g_wren
(* rw_addr_collision = "yes" *) `RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [ADDRW-1:0] raddr_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
`RAM_WRITE_WREN
end
raddr_r <= raddr;
end
end
assign rdata = ram[raddr_r];
end else begin : g_no_wren
(* rw_addr_collision = "yes" *) reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [ADDRW-1:0] raddr_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
ram[waddr] <= wdata;
end
raddr_r <= raddr;
end
end
assign rdata = ram[raddr_r];
end
end else if (RDW_MODE == "R") begin : g_read_first
if (WRENW != 1) begin : g_wren
`RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
`RAM_WRITE_WREN
end
rdata_r <= ram[raddr];
end
end
end
end
end
assign rdata = rdata_r;
end else begin : g_no_out_reg
// OUT_REG==0 || READ_ENABLE=1
wire [DATAW-1:0] rdata_w;
`ifdef SYNTHESIS
if (WRENW > 1) begin : g_writeen
`ifdef QUARTUS
if (LUTRAM != 0) begin : g_lutram
`USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i])
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
end
end
end
assign rdata_w = ram[raddr];
end else begin : g_no_lutram
if (NO_RWCHECK != 0) begin : g_no_rwcheck
`NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i])
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
end
end
end
assign rdata_w = ram[raddr];
end else begin : g_rwcheck
reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i])
ram[waddr][i] <= wdata[i * WSELW +: WSELW];
end
end
end
assign rdata_w = ram[raddr];
end
end
`else
// default synthesis
if (LUTRAM != 0) begin : g_lutram
`USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i])
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
end
end
end
assign rdata_w = ram[raddr];
end else begin : g_no_lutram
if (NO_RWCHECK != 0) begin : g_no_rwcheck
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i])
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
end
end
end
assign rdata_w = ram[raddr];
end else begin : g_rwcheck
assign rdata = rdata_r;
end else begin : g_no_wren
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i])
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
if (read || write) begin
if (write) begin
ram[waddr] <= wdata;
end
rdata_r <= ram[raddr];
end
end
assign rdata_w = ram[raddr];
end
end
`endif
end else begin : g_no_writeen
// (WRENW == 1)
if (LUTRAM != 0) begin : g_lutram
`USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
ram[waddr] <= wdata;
end
end
assign rdata_w = ram[raddr];
end else begin : g_no_lutram
if (NO_RWCHECK != 0) begin : g_no_rwcheck
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
ram[waddr] <= wdata;
end
end
assign rdata_w = ram[raddr];
end else begin : g_rwcheck
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
ram[waddr] <= wdata;
end
end
assign rdata_w = ram[raddr];
end
end
end
`else
// simulation
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
wire [DATAW-1:0] ram_n;
for (genvar i = 0; i < WRENW; ++i) begin : g_ram_n
assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW];
end
always @(posedge clk) begin
if (RESET_RAM && reset) begin
for (integer i = 0; i < SIZE; ++i) begin
ram[i] <= DATAW'(INIT_VALUE);
assign rdata = rdata_r;
end
end else begin
if (write) begin
ram[waddr] <= ram_n;
if (WRENW != 1) begin : g_wren
`RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (write) begin
`RAM_WRITE_WREN
end
if (read) begin
rdata_r <= ram[raddr];
end
end
assign rdata = rdata_r;
end else begin : g_no_wren
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (write) begin
ram[waddr] <= wdata;
end
if (read) begin
rdata_r <= ram[raddr];
end
end
assign rdata = rdata_r;
end
end
end
end else begin : g_async
`UNUSED_VAR (read)
if (FORCE_BRAM) begin : g_bram
if (RDW_MODE == "W") begin : g_write_first
`ifdef VIVADO
VX_async_ram_patch #(
.DATAW (DATAW),
.SIZE (SIZE),
.WRENW (WRENW),
.DUAL_PORT (1),
.INIT_ENABLE(INIT_ENABLE),
.INIT_FILE (INIT_FILE),
.INIT_VALUE (INIT_VALUE)
) async_ram_patch (
.clk (clk),
.reset (reset),
.read (read),
.write (write),
.wren (wren),
.waddr (waddr),
.wdata (wdata),
.raddr (raddr),
.rdata (rdata)
);
`else
if (WRENW != 1) begin : g_wren
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
`RAM_WRITE_WREN
end
end
assign rdata = ram[raddr];
end else begin : g_no_wren
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
ram[waddr] <= wdata;
end
end
assign rdata = ram[raddr];
end
`endif
end else begin : g_read_first
if (WRENW != 1) begin : g_wren
`NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
`RAM_WRITE_WREN
end
end
assign rdata = ram[raddr];
end else begin : g_no_wren
`NO_RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
ram[waddr] <= wdata;
end
end
assign rdata = ram[raddr];
end
end
end else begin : g_auto
if (RDW_MODE == "W") begin : g_write_first
if (WRENW != 1) begin : g_wren
`RAM_ARRAY_WREN
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
`RAM_WRITE_WREN
end
end
assign rdata = ram[raddr];
end else begin : g_no_wren
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
ram[waddr] <= wdata;
end
end
assign rdata = ram[raddr];
end
end else begin : g_read_first
if (WRENW != 1) begin : g_wren
`NO_RW_RAM_CHECK `RAM_ARRAY_WREN
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
`RAM_WRITE_WREN
end
end
assign rdata = ram[raddr];
end else begin : g_no_wren
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
ram[waddr] <= wdata;
end
end
assign rdata = ram[raddr];
end
end
end
end
`else
// simulation
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
if (!LUTRAM && NO_RWCHECK) begin : g_rdata_no_bypass
always @(posedge clk) begin
if (RESET_RAM && reset) begin
for (integer i = 0; i < SIZE; ++i) begin
ram[i] <= DATAW'(INIT_VALUE);
end
end else if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i]) begin
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
end
end
end
end
if (OUT_REG) begin : g_sync
if (RDW_MODE == "W") begin : g_write_first
reg [ADDRW-1:0] raddr_r;
always @(posedge clk) begin
if (read || write) begin
raddr_r <= raddr;
end
end
assign rdata = ram[raddr_r];
end else if (RDW_MODE == "R") begin : g_read_first
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
rdata_r <= ram[raddr];
end
end
assign rdata = rdata_r;
end else begin : g_undefined
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read) begin
rdata_r <= ram[raddr];
end
end
assign rdata = rdata_r;
end
end else begin : g_async
`UNUSED_VAR (read)
if (RDW_MODE == "W") begin : g_write_first
assign rdata = ram[raddr];
end else begin : g_read_first
reg [DATAW-1:0] prev_data;
reg [ADDRW-1:0] prev_waddr;
reg prev_write;
@ -335,30 +426,13 @@ module VX_dp_ram #(
end
end
assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
if (RW_ASSERT) begin : g_rw_assert
`RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time))
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
if (RDW_ASSERT) begin : g_rw_asert
`RUNTIME_ASSERT(~read || (rdata == ram[raddr]), ("%t: read after write hazard", $time))
end
end else begin : g_rdata_with_bypass
assign rdata_w = ram[raddr];
end
`endif
if (OUT_REG != 0) begin : g_rdata_req
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (READ_ENABLE && reset) begin
rdata_r <= '0;
end else if (!READ_ENABLE || read) begin
rdata_r <= rdata_w;
end
end
assign rdata = rdata_r;
end else begin : g_rdata_comb
assign rdata = rdata_w;
end
end
`endif
endmodule
`TRACING_ON

View file

@ -15,12 +15,12 @@
`TRACING_OFF
module VX_fifo_queue #(
parameter DATAW = 1,
parameter DEPTH = 2,
parameter DATAW = 32,
parameter DEPTH = 32,
parameter ALM_FULL = (DEPTH - 1),
parameter ALM_EMPTY = 1,
parameter OUT_REG = 0,
parameter LUTRAM = 1,
parameter LUTRAM = 0,
parameter SIZEW = `CLOG2(DEPTH+1)
) (
input wire clk,
@ -59,6 +59,8 @@ module VX_fifo_queue #(
);
if (DEPTH == 1) begin : g_depth_1
`UNUSED_PARAM (OUT_REG)
`UNUSED_PARAM (LUTRAM)
reg [DATAW-1:0] head_r;
@ -74,91 +76,52 @@ module VX_fifo_queue #(
localparam ADDRW = `CLOG2(DEPTH);
wire [DATAW-1:0] data_out_w;
reg [ADDRW-1:0] rd_ptr_r;
reg [ADDRW-1:0] wr_ptr_r;
always @(posedge clk) begin
if (reset) begin
wr_ptr_r <= '0;
rd_ptr_r <= (OUT_REG != 0) ? 1 : 0;
end else begin
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
rd_ptr_r <= rd_ptr_r + ADDRW'(pop);
end
end
wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1));
wire bypass = push && (empty || (going_empty && pop));
VX_dp_ram #(
.DATAW (DATAW),
.SIZE (DEPTH),
.LUTRAM (LUTRAM),
.RDW_MODE ("W")
) dp_ram (
.clk (clk),
.reset (reset),
.read (~bypass),
.write (push),
.wren (1'b1),
.raddr (rd_ptr_r),
.waddr (wr_ptr_r),
.wdata (data_in),
.rdata (data_out_w)
);
if (OUT_REG != 0) begin : g_out_reg
wire [DATAW-1:0] dout;
reg [DATAW-1:0] dout_r;
reg [ADDRW-1:0] wr_ptr_r;
reg [ADDRW-1:0] rd_ptr_r;
reg [ADDRW-1:0] rd_ptr_n_r;
reg [DATAW-1:0] data_out_r;
always @(posedge clk) begin
if (reset) begin
wr_ptr_r <= '0;
rd_ptr_r <= '0;
rd_ptr_n_r <= 1;
end else begin
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
if (pop) begin
rd_ptr_r <= rd_ptr_n_r;
if (DEPTH > 2) begin
rd_ptr_n_r <= rd_ptr_r + ADDRW'(2);
end else begin // (DEPTH == 2);
rd_ptr_n_r <= ~rd_ptr_n_r;
end
end
end
end
VX_dp_ram #(
.DATAW (DATAW),
.SIZE (DEPTH),
.LUTRAM (LUTRAM)
) dp_ram (
.clk (clk),
.reset (reset),
.read (1'b1),
.write (push),
.wren (1'b1),
.waddr (wr_ptr_r),
.wdata (data_in),
.raddr (rd_ptr_n_r),
.rdata (dout)
);
wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1));
always @(posedge clk) begin
if (push && (empty || (going_empty && pop))) begin
dout_r <= data_in;
if (bypass) begin
data_out_r <= data_in;
end else if (pop) begin
dout_r <= dout;
data_out_r <= data_out_w;
end
end
assign data_out = dout_r;
assign data_out = data_out_r;
end else begin : g_no_out_reg
reg [ADDRW-1:0] rd_ptr_r;
reg [ADDRW-1:0] wr_ptr_r;
always @(posedge clk) begin
if (reset) begin
rd_ptr_r <= '0;
wr_ptr_r <= '0;
end else begin
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
rd_ptr_r <= rd_ptr_r + ADDRW'(pop);
end
end
VX_dp_ram #(
.DATAW (DATAW),
.SIZE (DEPTH),
.LUTRAM (LUTRAM)
) dp_ram (
.clk (clk),
.reset (reset),
.read (1'b1),
.write (push),
.wren (1'b1),
.waddr (wr_ptr_r),
.wdata (data_in),
.raddr (rd_ptr_r),
.rdata (data_out)
);
assign data_out = data_out_w;
end
end

View file

@ -16,7 +16,7 @@
`TRACING_OFF
module VX_generic_arbiter #(
parameter NUM_REQS = 1,
parameter `STRING TYPE = "P",
parameter `STRING TYPE = "P", // P: priority, R: round-robin, M: matrix, C: cyclic
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) (
input wire clk,
@ -27,6 +27,8 @@ module VX_generic_arbiter #(
output wire grant_valid,
input wire grant_ready
);
`STATIC_ASSERT((TYPE == "P" || TYPE == "R" || TYPE == "M" || TYPE == "C"), ("invalid parameter"))
if (TYPE == "P") begin : g_priority
`UNUSED_VAR (clk)
@ -84,10 +86,6 @@ module VX_generic_arbiter #(
.grant_ready (grant_ready)
);
end else begin : g_invalid
`ERROR(("invalid parameter"));
end
`RUNTIME_ASSERT (((~(| requests) != 1) || (grant_valid && (requests[grant_index] != 0) && (grant_onehot == (NUM_REQS'(1) << grant_index)))), ("%t: invalid arbiter grant!", $time))

View file

@ -15,10 +15,10 @@
`TRACING_OFF
module VX_index_buffer #(
parameter DATAW = 1,
parameter SIZE = 1,
parameter LUTRAM = 1,
parameter ADDRW = `LOG2UP(SIZE)
parameter DATAW = 1,
parameter SIZE = 1,
parameter LUTRAM = 0,
parameter ADDRW = `LOG2UP(SIZE)
) (
input wire clk,
input wire reset,
@ -49,9 +49,10 @@ module VX_index_buffer #(
);
VX_dp_ram #(
.DATAW (DATAW),
.SIZE (SIZE),
.LUTRAM (LUTRAM)
.DATAW (DATAW),
.SIZE (SIZE),
.LUTRAM (LUTRAM),
.RDW_MODE ("W")
) data_table (
.clk (clk),
.reset (reset),

View file

@ -72,7 +72,7 @@ module VX_matrix_arbiter #(
assign grant_onehot = grant;
VX_encoder #(
VX_onehot_encoder #(
.N (NUM_REQS)
) encoder (
.data_in (grant_onehot),

View file

@ -100,21 +100,21 @@ module VX_mem_adapter #(
assign mem_req_addr_out_w = mem_req_addr_in_qual;
end
VX_decoder #(
VX_demux #(
.N (D),
.M (SRC_DATA_WIDTH/8)
) req_be_dec (
.data_in (req_idx),
.valid_in (mem_req_byteen_in),
) req_be_demux (
.sel_in (req_idx),
.data_in (mem_req_byteen_in),
.data_out (mem_req_byteen_out_w)
);
VX_decoder #(
VX_demux #(
.N (D),
.M (SRC_DATA_WIDTH)
) req_data_dec (
.data_in (req_idx),
.valid_in (mem_req_data_in),
) req_data_demux (
.sel_in (req_idx),
.data_in (mem_req_data_in),
.data_out (mem_req_data_out_w)
);

View file

@ -18,7 +18,7 @@ module VX_mem_coalescer #(
parameter `STRING INSTANCE_ID = "",
parameter NUM_REQS = 1,
parameter ADDR_WIDTH = 32,
parameter FLAGS_WIDTH = 1,
parameter FLAGS_WIDTH = 0,
parameter DATA_IN_SIZE = 4,
parameter DATA_OUT_SIZE = 64,
parameter TAG_WIDTH = 8,
@ -43,7 +43,7 @@ module VX_mem_coalescer #(
input wire [NUM_REQS-1:0] in_req_mask,
input wire [NUM_REQS-1:0][DATA_IN_SIZE-1:0] in_req_byteen,
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr,
input wire [NUM_REQS-1:0][FLAGS_WIDTH-1:0] in_req_flags,
input wire [NUM_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] in_req_flags,
input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data,
input wire [TAG_WIDTH-1:0] in_req_tag,
output wire in_req_ready,
@ -61,7 +61,7 @@ module VX_mem_coalescer #(
output wire [OUT_REQS-1:0] out_req_mask,
output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen,
output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr,
output wire [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags,
output wire [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] out_req_flags,
output wire [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data,
output wire [OUT_TAG_WIDTH-1:0] out_req_tag,
input wire out_req_ready,
@ -74,6 +74,7 @@ module VX_mem_coalescer #(
output wire out_rsp_ready
);
`UNUSED_SPARAM (INSTANCE_ID)
`STATIC_ASSERT ((NUM_REQS > 1), ("invalid parameter"))
`STATIC_ASSERT (`IS_DIVISBLE(NUM_REQS * DATA_IN_WIDTH, DATA_OUT_WIDTH), ("invalid parameter"))
`STATIC_ASSERT ((NUM_REQS * DATA_IN_WIDTH >= DATA_OUT_WIDTH), ("invalid parameter"))
`RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("%t: invalid request mask", $time))
@ -92,7 +93,7 @@ module VX_mem_coalescer #(
logic out_req_rw_r, out_req_rw_n;
logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags_r, out_req_flags_n;
logic [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] out_req_flags_r, out_req_flags_n;
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n;
logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
@ -110,7 +111,7 @@ module VX_mem_coalescer #(
logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n;
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n;
logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] seed_flags_r, seed_flags_n;
logic [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] seed_flags_r, seed_flags_n;
logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n;
logic [NUM_REQS-1:0] req_rem_mask_r, req_rem_mask_n;
@ -139,7 +140,7 @@ module VX_mem_coalescer #(
assign addr_base[j] = in_req_addr[DATA_RATIO * i + j][ADDR_WIDTH-1:DATA_RATIO_W];
end
wire [DATA_RATIO-1:0][FLAGS_WIDTH-1:0] req_flags;
wire [DATA_RATIO-1:0][`UP(FLAGS_WIDTH)-1:0] req_flags;
for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_req_flags
assign req_flags[j] = in_req_flags[DATA_RATIO * i + j];
end
@ -221,7 +222,7 @@ module VX_mem_coalescer #(
end
VX_pipe_register #(
.DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + FLAGS_WIDTH + OUT_ADDR_WIDTH + FLAGS_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH),
.DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + `UP(FLAGS_WIDTH) + OUT_ADDR_WIDTH + `UP(FLAGS_WIDTH) + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH),
.RESETW (1 + NUM_REQS + 1),
.INIT_VALUE ({1'b0, {NUM_REQS{1'b1}}, 1'b0})
) pipe_reg (
@ -270,7 +271,12 @@ module VX_mem_coalescer #(
assign out_req_mask = out_req_mask_r;
assign out_req_byteen = out_req_byteen_r;
assign out_req_addr = out_req_addr_r;
assign out_req_flags = out_req_flags_r;
if (FLAGS_WIDTH != 0) begin : g_out_req_flags
assign out_req_flags = out_req_flags_r;
end else begin : g_out_req_flags_0
`UNUSED_VAR (out_req_flags_r)
assign out_req_flags = '0;
end
assign out_req_data = out_req_data_r;
assign out_req_tag = out_req_tag_r;
@ -346,30 +352,30 @@ module VX_mem_coalescer #(
always @(posedge clk) begin
if (out_req_fire) begin
if (out_req_rw) begin
`TRACE(1, ("%t: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask))
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS)
`TRACE(1, (", flags="))
`TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS)
`TRACE(1, (", byteen="))
`TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS)
`TRACE(1, (", data="))
`TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS)
`TRACE(2, ("%t: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask))
`TRACE_ARRAY1D(2, "0x%h", out_req_addr, OUT_REQS)
`TRACE(2, (", flags="))
`TRACE_ARRAY1D(2, "%b", out_req_flags, OUT_REQS)
`TRACE(2, (", byteen="))
`TRACE_ARRAY1D(2, "0x%h", out_req_byteen, OUT_REQS)
`TRACE(2, (", data="))
`TRACE_ARRAY1D(2, "0x%0h", out_req_data, OUT_REQS)
end else begin
`TRACE(1, ("%d: %s out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask))
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS)
`TRACE(1, (", flags="))
`TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS)
`TRACE(2, ("%d: %s out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask))
`TRACE_ARRAY1D(2, "0x%h", out_req_addr, OUT_REQS)
`TRACE(2, (", flags="))
`TRACE_ARRAY1D(2, "%b", out_req_flags, OUT_REQS)
end
`TRACE(1, (", offset="))
`TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS)
`TRACE(1, (", pmask=%b, coalesced=%0d, tag=0x%0h (#%0d)\n", out_req_pmask, $countones(out_req_pmask), out_req_tag, out_req_uuid))
`TRACE(2, (", offset="))
`TRACE_ARRAY1D(2, "%0d", out_req_offset, NUM_REQS)
`TRACE(2, (", pmask=%b, coalesced=%0d, tag=0x%0h (#%0d)\n", out_req_pmask, $countones(out_req_pmask), out_req_tag, out_req_uuid))
end
if (out_rsp_fire) begin
`TRACE(1, ("%t: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask))
`TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS)
`TRACE(1, (", offset="))
`TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS)
`TRACE(1, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid))
`TRACE(2, ("%t: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask))
`TRACE_ARRAY1D(2, "0x%0h", out_rsp_data, OUT_REQS)
`TRACE(2, (", offset="))
`TRACE_ARRAY1D(2, "%0d", ibuf_dout_offset, NUM_REQS)
`TRACE(2, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid))
end
end
`endif

View file

@ -21,7 +21,7 @@ module VX_mem_scheduler #(
parameter WORD_SIZE = 4,
parameter LINE_SIZE = WORD_SIZE,
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
parameter FLAGS_WIDTH = 1,
parameter FLAGS_WIDTH = 0,
parameter TAG_WIDTH = 8,
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
parameter CORE_QUEUE_SIZE= 8,
@ -32,7 +32,7 @@ module VX_mem_scheduler #(
parameter WORD_WIDTH = WORD_SIZE * 8,
parameter LINE_WIDTH = LINE_SIZE * 8,
parameter COALESCE_ENABLE = (LINE_SIZE != WORD_SIZE),
parameter COALESCE_ENABLE = (CORE_REQS > 1) && (LINE_SIZE != WORD_SIZE),
parameter PER_LINE_REQS = LINE_SIZE / WORD_SIZE,
parameter MERGED_REQS = CORE_REQS / PER_LINE_REQS,
parameter MEM_BATCHES = `CDIV(MERGED_REQS, MEM_CHANNELS),
@ -50,7 +50,7 @@ module VX_mem_scheduler #(
input wire [CORE_REQS-1:0] core_req_mask,
input wire [CORE_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] core_req_addr,
input wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] core_req_flags,
input wire [CORE_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] core_req_flags,
input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data,
input wire [TAG_WIDTH-1:0] core_req_tag,
output wire core_req_ready,
@ -72,7 +72,7 @@ module VX_mem_scheduler #(
output wire [MEM_CHANNELS-1:0] mem_req_mask,
output wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen,
output wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags,
output wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags,
output wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data,
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
@ -94,6 +94,7 @@ module VX_mem_scheduler #(
localparam CORE_BATCHES = COALESCE_ENABLE ? 1 : MEM_BATCHES;
localparam CORE_BATCH_BITS = `CLOG2(CORE_BATCHES);
`STATIC_ASSERT ((MEM_CHANNELS <= CORE_REQS), ("invalid parameter"))
`STATIC_ASSERT (`IS_DIVISBLE(CORE_REQS * WORD_SIZE, LINE_SIZE), ("invalid parameter"))
`STATIC_ASSERT ((TAG_WIDTH >= UUID_WIDTH), ("invalid parameter"))
`RUNTIME_ASSERT((~core_req_valid || core_req_mask != 0), ("%t: invalid request mask", $time))
@ -112,7 +113,7 @@ module VX_mem_scheduler #(
wire reqq_rw;
wire [CORE_REQS-1:0][WORD_SIZE-1:0] reqq_byteen;
wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] reqq_addr;
wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags;
wire [CORE_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] reqq_flags;
wire [CORE_REQS-1:0][WORD_WIDTH-1:0] reqq_data;
wire [REQQ_TAG_WIDTH-1:0] reqq_tag;
wire reqq_ready;
@ -122,7 +123,7 @@ module VX_mem_scheduler #(
wire reqq_rw_s;
wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s;
wire [MERGED_REQS-1:0][MEM_ADDR_WIDTH-1:0] reqq_addr_s;
wire [MERGED_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags_s;
wire [MERGED_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] reqq_flags_s;
wire [MERGED_REQS-1:0][LINE_WIDTH-1:0] reqq_data_s;
wire [MERGED_TAG_WIDTH-1:0] reqq_tag_s;
wire reqq_ready_s;
@ -132,7 +133,7 @@ module VX_mem_scheduler #(
wire mem_req_rw_s;
wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_s;
wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_s;
wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_s;
wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_s;
wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_s;
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
wire mem_req_ready_s;
@ -167,7 +168,7 @@ module VX_mem_scheduler #(
end
VX_elastic_buffer #(
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + FLAGS_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH),
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + `UP(FLAGS_WIDTH) + WORD_WIDTH) + REQQ_TAG_WIDTH),
.SIZE (CORE_QUEUE_SIZE),
.OUT_REG (1)
) req_queue (
@ -223,7 +224,7 @@ module VX_mem_scheduler #(
if (COALESCE_ENABLE) begin : g_coalescer
VX_mem_coalescer #(
.INSTANCE_ID ($sformatf("%s-coalescer", INSTANCE_ID)),
.INSTANCE_ID (`SFORMATF(("%s-coalescer", INSTANCE_ID))),
.NUM_REQS (CORE_REQS),
.DATA_IN_SIZE (WORD_SIZE),
.DATA_OUT_SIZE (LINE_SIZE),
@ -297,7 +298,7 @@ module VX_mem_scheduler #(
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b;
wire [BATCH_SEL_WIDTH-1:0] req_batch_idx;
@ -385,8 +386,10 @@ module VX_mem_scheduler #(
assign reqq_ready_s = req_sent_all;
wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_u;
VX_elastic_buffer #(
.DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + FLAGS_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH),
.DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + `UP(FLAGS_WIDTH) + LINE_WIDTH) + MEM_TAG_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
) mem_req_buf (
@ -395,106 +398,128 @@ module VX_mem_scheduler #(
.valid_in (mem_req_valid_s),
.ready_in (mem_req_ready_s),
.data_in ({mem_req_mask_s, mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_flags_s, mem_req_data_s, mem_req_tag_s}),
.data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_flags, mem_req_data, mem_req_tag}),
.data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_flags_u, mem_req_data, mem_req_tag}),
.valid_out (mem_req_valid),
.ready_out (mem_req_ready)
);
if (FLAGS_WIDTH != 0) begin : g_mem_req_flags
assign mem_req_flags = mem_req_flags_u;
end else begin : g_mem_req_flags_0
`UNUSED_VAR (mem_req_flags_u)
assign mem_req_flags = '0;
end
// Handle memory responses ////////////////////////////////////////////////
reg [CORE_QUEUE_SIZE-1:0][CORE_REQS-1:0] rsp_rem_mask;
wire [CORE_REQS-1:0] rsp_rem_mask_n, curr_mask;
wire [BATCH_SEL_WIDTH-1:0] rsp_batch_idx;
if (CORE_BATCHES > 1) begin : g_rsp_batch_idx
assign rsp_batch_idx = mem_rsp_tag_s[CORE_BATCH_BITS-1:0];
end else begin : g_rsp_batch_idx_0
assign rsp_batch_idx = '0;
end
for (genvar r = 0; r < CORE_REQS; ++r) begin : g_curr_mask
localparam i = r / CORE_CHANNELS;
localparam j = r % CORE_CHANNELS;
assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j];
end
assign rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~curr_mask;
wire rsp_complete = ~(| rsp_rem_mask_n);
wire mem_rsp_fire_s = mem_rsp_valid_s && mem_rsp_ready_s;
always @(posedge clk) begin
if (ibuf_push) begin
rsp_rem_mask[ibuf_waddr] <= core_req_mask;
end
if (mem_rsp_fire_s) begin
rsp_rem_mask[ibuf_raddr] <= rsp_rem_mask_n;
end
end
if (RSP_PARTIAL != 0 || CORE_REQS == 1) begin : g_rsp_partial
reg [CORE_QUEUE_SIZE-1:0] rsp_sop_r;
always @(posedge clk) begin
if (ibuf_push) begin
rsp_sop_r[ibuf_waddr] <= 1;
end
if (mem_rsp_fire_s) begin
rsp_sop_r[ibuf_raddr] <= 0;
end
end
if (CORE_REQS == 1) begin : g_rsp_1
`UNUSED_VAR (rsp_batch_idx)
assign crsp_valid = mem_rsp_valid_s;
assign crsp_mask = curr_mask;
assign crsp_sop = rsp_sop_r[ibuf_raddr];
for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data
localparam j = r % CORE_CHANNELS;
assign crsp_data[r] = mem_rsp_data_s[j];
end
assign crsp_mask = mem_rsp_mask_s;
assign crsp_sop = 1'b1;
assign crsp_eop = 1'b1;
assign crsp_data = mem_rsp_data_s;
assign mem_rsp_ready_s = crsp_ready;
end else begin : g_rsp_full
end else begin : g_rsp_N
wire [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store_n;
reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0];
reg [CORE_QUEUE_SIZE-1:0][CORE_REQS-1:0] rsp_rem_mask;
wire [CORE_REQS-1:0] rsp_rem_mask_n, curr_mask;
for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store
for (genvar j = 0; j < CORE_BATCHES; ++j) begin : g_j
reg [WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0];
wire rsp_wren = mem_rsp_fire_s
&& (BATCH_SEL_WIDTH'(j) == rsp_batch_idx)
&& ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]);
always @(posedge clk) begin
if (rsp_wren) begin
rsp_store[ibuf_raddr] <= mem_rsp_data_s[i];
end
end
assign rsp_store_n[i][j] = rsp_wren ? mem_rsp_data_s[i] : rsp_store[ibuf_raddr];
end
for (genvar r = 0; r < CORE_REQS; ++r) begin : g_curr_mask
localparam i = r / CORE_CHANNELS;
localparam j = r % CORE_CHANNELS;
assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j];
end
assign rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~curr_mask;
wire mem_rsp_fire_s = mem_rsp_valid_s && mem_rsp_ready_s;
always @(posedge clk) begin
if (ibuf_push) begin
rsp_orig_mask[ibuf_waddr] <= core_req_mask;
rsp_rem_mask[ibuf_waddr] <= core_req_mask;
end
if (mem_rsp_fire_s) begin
rsp_rem_mask[ibuf_raddr] <= rsp_rem_mask_n;
end
end
assign crsp_valid = mem_rsp_valid_s && rsp_complete;
assign crsp_mask = rsp_orig_mask[ibuf_raddr];
assign crsp_sop = 1'b1;
wire rsp_complete = ~(| rsp_rem_mask_n) || (CORE_REQS == 1);
for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data
localparam i = r / CORE_CHANNELS;
localparam j = r % CORE_CHANNELS;
assign crsp_data[r] = rsp_store_n[j][i];
if (RSP_PARTIAL != 0) begin : g_rsp_partial
reg [CORE_QUEUE_SIZE-1:0] rsp_sop_r;
always @(posedge clk) begin
if (ibuf_push) begin
rsp_sop_r[ibuf_waddr] <= 1;
end
if (mem_rsp_fire_s) begin
rsp_sop_r[ibuf_raddr] <= 0;
end
end
assign crsp_valid = mem_rsp_valid_s;
assign crsp_mask = curr_mask;
assign crsp_sop = rsp_sop_r[ibuf_raddr];
for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data
localparam j = r % CORE_CHANNELS;
assign crsp_data[r] = mem_rsp_data_s[j];
end
assign mem_rsp_ready_s = crsp_ready;
end else begin : g_rsp_full
wire [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store_n;
reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0];
for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store
for (genvar j = 0; j < CORE_BATCHES; ++j) begin : g_j
reg [WORD_WIDTH-1:0] rsp_store [0:CORE_QUEUE_SIZE-1];
wire rsp_wren = mem_rsp_fire_s
&& (BATCH_SEL_WIDTH'(j) == rsp_batch_idx)
&& ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]);
always @(posedge clk) begin
if (rsp_wren) begin
rsp_store[ibuf_raddr] <= mem_rsp_data_s[i];
end
end
assign rsp_store_n[i][j] = rsp_wren ? mem_rsp_data_s[i] : rsp_store[ibuf_raddr];
end
end
always @(posedge clk) begin
if (ibuf_push) begin
rsp_orig_mask[ibuf_waddr] <= core_req_mask;
end
end
assign crsp_valid = mem_rsp_valid_s && rsp_complete;
assign crsp_mask = rsp_orig_mask[ibuf_raddr];
assign crsp_sop = 1'b1;
for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data
localparam i = r / CORE_CHANNELS;
localparam j = r % CORE_CHANNELS;
assign crsp_data[r] = rsp_store_n[j][i];
end
assign mem_rsp_ready_s = crsp_ready || ~rsp_complete;
end
assign mem_rsp_ready_s = crsp_ready || ~rsp_complete;
assign crsp_eop = rsp_complete;
end
if (UUID_WIDTH != 0) begin : g_crsp_tag
@ -503,8 +528,6 @@ module VX_mem_scheduler #(
assign crsp_tag = ibuf_dout;
end
assign crsp_eop = rsp_complete;
// Send response to caller
VX_elastic_buffer #(
@ -516,7 +539,7 @@ module VX_mem_scheduler #(
.reset (reset),
.valid_in (crsp_valid),
.ready_in (crsp_ready),
.data_in ({crsp_mask, crsp_sop, crsp_eop, crsp_data, crsp_tag}),
.data_in ({crsp_mask, crsp_sop, crsp_eop, crsp_data, crsp_tag}),
.data_out ({core_rsp_mask, core_rsp_sop, core_rsp_eop, core_rsp_data, core_rsp_tag}),
.valid_out (core_rsp_valid),
.ready_out (core_rsp_ready)
@ -584,41 +607,41 @@ module VX_mem_scheduler #(
always @(posedge clk) begin
if (core_req_fire) begin
if (core_req_rw) begin
`TRACE(1, ("%t: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask))
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS)
`TRACE(1, (", byteen="))
`TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS)
`TRACE(1, (", data="))
`TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS)
`TRACE(2, ("%t: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask))
`TRACE_ARRAY1D(2, "0x%h", core_req_addr, CORE_REQS)
`TRACE(2, (", byteen="))
`TRACE_ARRAY1D(2, "0x%h", core_req_byteen, CORE_REQS)
`TRACE(2, (", data="))
`TRACE_ARRAY1D(2, "0x%0h", core_req_data, CORE_REQS)
end else begin
`TRACE(1, ("%t: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask))
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS)
`TRACE(2, ("%t: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask))
`TRACE_ARRAY1D(2, "0x%h", core_req_addr, CORE_REQS)
end
`TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid))
`TRACE(2, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid))
end
if (core_rsp_valid && core_rsp_ready) begin
`TRACE(1, ("%t: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop))
`TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS)
`TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid))
`TRACE(2, ("%t: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop))
`TRACE_ARRAY1D(2, "0x%0h", core_rsp_data, CORE_REQS)
`TRACE(2, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid))
end
if (| mem_req_fire_s) begin
if (| mem_req_rw_s) begin
`TRACE(1, ("%t: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s))
`TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS)
`TRACE(1, (", byteen="))
`TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS)
`TRACE(1, (", data="))
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS)
`TRACE(2, ("%t: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s))
`TRACE_ARRAY1D(2, "0x%h", mem_req_addr_s, CORE_CHANNELS)
`TRACE(2, (", byteen="))
`TRACE_ARRAY1D(2, "0x%h", mem_req_byteen_s, CORE_CHANNELS)
`TRACE(2, (", data="))
`TRACE_ARRAY1D(2, "0x%0h", mem_req_data_s, CORE_CHANNELS)
end else begin
`TRACE(1, ("%t: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s))
`TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS)
`TRACE(2, ("%t: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s))
`TRACE_ARRAY1D(2, "0x%h", mem_req_addr_s, CORE_CHANNELS)
end
`TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid))
`TRACE(2, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid))
end
if (mem_rsp_fire_s) begin
`TRACE(1, ("%t: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s))
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS)
`TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid))
if (mem_rsp_valid_s && mem_rsp_ready_s) begin
`TRACE(2, ("%t: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s))
`TRACE_ARRAY1D(2, "0x%0h", mem_rsp_data_s, CORE_CHANNELS)
`TRACE(2, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid))
end
end
`endif

View file

@ -13,11 +13,11 @@
`include "VX_platform.vh"
// Fast encoder using parallel prefix computation
// Fast one-hot encoder using parallel prefix computation
// Adapted from BaseJump STL: http://bjump.org/data_out.html
`TRACING_OFF
module VX_encoder #(
module VX_onehot_encoder #(
parameter N = 1,
parameter REVERSE = 0,
parameter MODEL = 1,

View file

@ -66,11 +66,13 @@ module VX_pending_size #(
if (INCRW != 1 || DECRW != 1) begin : g_wide_step
localparam SUBW = `MIN(SIZEW, `MAX(INCRW, DECRW)+1);
localparam DELTAW = `MIN(SIZEW, `MAX(INCRW, DECRW)+1);
logic [SIZEW-1:0] size_n, size_r;
assign size_n = $signed(size_r) + SIZEW'($signed(SUBW'(incr) - SUBW'(decr)));
wire [DELTAW-1:0] delta = DELTAW'(incr) - DELTAW'(decr);
assign size_n = $signed(size_r) + SIZEW'($signed(delta));
always @(posedge clk) begin
if (reset) begin
@ -80,8 +82,8 @@ module VX_pending_size #(
alm_full_r <= 0;
size_r <= '0;
end else begin
`ASSERT((SIZEW'(incr) >= SIZEW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow"));
`ASSERT((SIZEW'(incr) <= SIZEW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow"));
`ASSERT((DELTAW'(incr) <= DELTAW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow"));
`ASSERT((DELTAW'(incr) >= DELTAW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow"));
empty_r <= (size_n == SIZEW'(0));
full_r <= (size_n == SIZEW'(SIZE));
alm_empty_r <= (size_n <= SIZEW'(ALM_EMPTY));
@ -129,7 +131,7 @@ module VX_pending_size #(
wire is_empty_n = (used_r == ADDRW'(1));
wire is_full_n = (used_r == ADDRW'(SIZE-1));
wire [1:0] push_minus_pop = {~incr & decr, incr ^ decr};
wire [1:0] delta = {~incr & decr, incr ^ decr};
always @(posedge clk) begin
if (reset) begin
@ -148,7 +150,7 @@ module VX_pending_size #(
if (is_empty_n)
empty_r <= 1;
end
used_r <= $signed(used_r) + ADDRW'($signed(push_minus_pop));
used_r <= $signed(used_r) + ADDRW'($signed(delta));
end
end

View file

@ -0,0 +1,27 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_platform.vh"
`TRACING_OFF
`BLACKBOX_CELL module VX_placeholder #(
parameter I = 0,
parameter O = 0
) (
input wire [`UP(I)-1:0] in,
output wire [`UP(O)-1:0] out
);
// empty module
endmodule
`TRACING_ON

View file

@ -448,7 +448,7 @@ module VX_rr_arbiter #(
end
end
VX_encoder #(
VX_onehot_encoder #(
.N (NUM_REQS)
) onehot_encoder (
.data_in (grant_onehot),
@ -480,12 +480,12 @@ module VX_rr_arbiter #(
end
end
VX_decoder #(
VX_demux #(
.N (LOG_NUM_REQS),
.D (NUM_REQS)
) grant_decoder (
.data_in (grant_index),
.valid_in (grant_valid),
.sel_in (grant_index),
.data_in (1'b1),
.data_out (grant_onehot)
);

View file

@ -113,8 +113,7 @@ module VX_scope_tap #(
.DATAW (IDLE_CTRW),
.SIZE (DEPTH),
.OUT_REG (1),
.READ_ENABLE (0),
.NO_RWCHECK (1)
.RDW_MODE ("R")
) delta_store (
.clk (clk),
.reset (reset),
@ -136,8 +135,7 @@ module VX_scope_tap #(
.DATAW (DATAW),
.SIZE (DEPTH),
.OUT_REG (1),
.READ_ENABLE (0),
.NO_RWCHECK (1)
.RDW_MODE ("R")
) data_store (
.clk (clk),
.reset (reset),

View file

@ -13,6 +13,35 @@
`include "VX_platform.vh"
`define RAM_INITIALIZATION \
if (INIT_ENABLE != 0) begin : g_init \
if (INIT_FILE != "") begin : g_file \
initial $readmemh(INIT_FILE, ram); \
end else begin : g_value \
initial begin \
for (integer i = 0; i < SIZE; ++i) begin : g_i \
ram[i] = INIT_VALUE; \
end \
end \
end \
end
`ifdef QUARTUS
`define RAM_ARRAY_WREN reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
`define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \
if (wren[i]) begin \
ram[addr][i] <= wdata[i * WSELW +: WSELW]; \
end \
end
`else
`define RAM_ARRAY_WREN reg [DATAW-1:0] ram [0:SIZE-1];
`define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \
if (wren[i]) begin \
ram[addr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \
end \
end
`endif
`TRACING_OFF
module VX_sp_ram #(
parameter DATAW = 1,
@ -20,11 +49,9 @@ module VX_sp_ram #(
parameter WRENW = 1,
parameter OUT_REG = 0,
parameter LUTRAM = 0,
parameter NO_RWCHECK = 0,
parameter RW_ASSERT = 0,
parameter `STRING RDW_MODE = "W", // W: write-first, R: read-first, N: no-change, U: undefined
parameter RDW_ASSERT = 0,
parameter RESET_RAM = 0,
parameter RESET_OUT = 0,
parameter READ_ENABLE = 0,
parameter INIT_ENABLE = 0,
parameter INIT_FILE = "",
parameter [DATAW-1:0] INIT_VALUE = 0,
@ -39,32 +66,442 @@ module VX_sp_ram #(
input wire [DATAW-1:0] wdata,
output wire [DATAW-1:0] rdata
);
VX_dp_ram #(
.DATAW (DATAW),
.SIZE (SIZE),
.WRENW (WRENW),
.OUT_REG (OUT_REG),
.LUTRAM (LUTRAM),
.NO_RWCHECK (NO_RWCHECK),
.RW_ASSERT (RW_ASSERT),
.RESET_RAM (RESET_RAM),
.RESET_OUT (RESET_OUT),
.READ_ENABLE(READ_ENABLE),
.INIT_ENABLE(INIT_ENABLE),
.INIT_FILE (INIT_FILE),
.INIT_VALUE (INIT_VALUE),
.ADDRW (ADDRW)
) dp_ram (
.clk (clk),
.reset (reset),
.read (read),
.write (write),
.wren (wren),
.waddr (addr),
.wdata (wdata),
.raddr (addr),
.rdata (rdata)
);
localparam WSELW = DATAW / WRENW;
`UNUSED_PARAM (LUTRAM)
`STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter"))
`STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W" || RDW_MODE == "N"), ("invalid parameter"))
`UNUSED_PARAM (RDW_ASSERT)
`ifdef SYNTHESIS
localparam FORCE_BRAM = !LUTRAM && (SIZE * DATAW >= `MAX_LUTRAM);
if (OUT_REG) begin : g_sync
if (FORCE_BRAM) begin : g_bram
if (RDW_MODE == "R") begin : g_read_first
if (WRENW != 1) begin : g_wren
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
`RAM_WRITE_WREN
end
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end else begin : g_no_wren
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
ram[addr] <= wdata;
end
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end
end else if (RDW_MODE == "W") begin : g_write_first
if (WRENW != 1) begin : g_wren
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [ADDRW-1:0] addr_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
`RAM_WRITE_WREN
end
addr_r <= addr;
end
end
assign rdata = ram[addr_r];
end else begin : g_no_wren
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
ram[addr] <= wdata;
rdata_r <= wdata;
end else begin
rdata_r <= ram[addr];
end
end
end
assign rdata = rdata_r;
end
end else if (RDW_MODE == "N") begin : g_no_change
if (WRENW != 1) begin : g_wren
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
`RAM_WRITE_WREN
end else begin
rdata_r <= ram[addr];
end
end
end
assign rdata = rdata_r;
end else begin : g_no_wren
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
ram[addr] <= wdata;
end else begin
rdata_r <= ram[addr];
end
end
end
assign rdata = rdata_r;
end
end else if (RDW_MODE == "U") begin : g_unknown
if (WRENW != 1) begin : g_wren
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (write) begin
`RAM_WRITE_WREN
end
if (read) begin
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end else begin : g_no_wren
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (write) begin
ram[addr] <= wdata;
end
if (read) begin
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end
end
end else begin : g_auto
if (RDW_MODE == "R") begin : g_read_first
if (WRENW != 1) begin : g_wren
`RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
`RAM_WRITE_WREN
end
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end else begin : g_no_wren
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
ram[addr] <= wdata;
end
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end
end else if (RDW_MODE == "W") begin : g_write_first
if (WRENW != 1) begin : g_wren
`RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [ADDRW-1:0] addr_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
`RAM_WRITE_WREN
end
addr_r <= addr;
end
end
assign rdata = ram[addr_r];
end else begin : g_no_wren
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
ram[addr] <= wdata;
rdata_r <= wdata;
end else begin
rdata_r <= ram[addr];
end
end
end
assign rdata = rdata_r;
end
end else if (RDW_MODE == "N") begin : g_no_change
if (WRENW != 1) begin : g_wren
`RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
`RAM_WRITE_WREN
end else begin
rdata_r <= ram[addr];
end
end
end
assign rdata = rdata_r;
end else begin : g_no_wren
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
if (write) begin
ram[addr] <= wdata;
end else begin
rdata_r <= ram[addr];
end
end
end
assign rdata = rdata_r;
end
end else if (RDW_MODE == "U") begin : g_unknown
if (WRENW != 1) begin : g_wren
`RAM_ARRAY_WREN
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (write) begin
`RAM_WRITE_WREN
end
if (read) begin
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end else begin : g_no_wren
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (write) begin
ram[addr] <= wdata;
end
if (read) begin
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end
end
end
end else begin : g_async
`UNUSED_VAR (read)
if (FORCE_BRAM) begin : g_bram
if (RDW_MODE == "W") begin : g_write_first
`ifdef VIVADO
VX_async_ram_patch #(
.DATAW (DATAW),
.SIZE (SIZE),
.WRENW (WRENW),
.DUAL_PORT (0),
.INIT_ENABLE(INIT_ENABLE),
.INIT_FILE (INIT_FILE),
.INIT_VALUE (INIT_VALUE)
) async_ram_patch (
.clk (clk),
.reset (reset),
.read (read),
.write (write),
.wren (wren),
.waddr (addr),
.wdata (wdata),
.raddr (addr),
.rdata (rdata)
);
`else
if (WRENW != 1) begin : g_wren
`USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
`RAM_WRITE_WREN
end
end
assign rdata = ram[addr];
end else begin : g_no_wren
`USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
ram[addr] <= wdata;
end
end
assign rdata = ram[addr];
end
`endif
end else begin : g_read_first
if (WRENW != 1) begin : g_wren
`NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
`RAM_WRITE_WREN
end
end
assign rdata = ram[addr];
end else begin : g_no_wren
`NO_RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
ram[addr] <= wdata;
end
end
assign rdata = ram[addr];
end
end
end else begin : g_auto
if (RDW_MODE == "W") begin : g_write_first
if (WRENW != 1) begin : g_wren
`RAM_ARRAY_WREN
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
`RAM_WRITE_WREN
end
end
assign rdata = ram[addr];
end else begin : g_no_wren
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
ram[addr] <= wdata;
end
end
assign rdata = ram[addr];
end
end else begin : g_read_first
if (WRENW != 1) begin : g_wren
`NO_RW_RAM_CHECK `RAM_ARRAY_WREN
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
`RAM_WRITE_WREN
end
end
assign rdata = ram[addr];
end else begin : g_no_wren
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (write) begin
ram[addr] <= wdata;
end
end
assign rdata = ram[addr];
end
end
end
end
`else
// simulation
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (RESET_RAM && reset) begin
for (integer i = 0; i < SIZE; ++i) begin
ram[i] <= DATAW'(INIT_VALUE);
end
end else if (write) begin
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i]) begin
ram[addr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW];
end
end
end
end
if (OUT_REG) begin : g_sync
if (RDW_MODE == "R") begin : g_read_first
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read || write) begin
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end else if (RDW_MODE == "W") begin : g_write_first
reg [ADDRW-1:0] addr_r;
always @(posedge clk) begin
if (read || write) begin
addr_r <= addr;
end
end
assign rdata = ram[addr_r];
end else if (RDW_MODE == "N") begin : g_no_change
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read && ~write) begin
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end else if (RDW_MODE == "U") begin : g_unknown
reg [DATAW-1:0] rdata_r;
always @(posedge clk) begin
if (read) begin
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end
end else begin : g_async
`UNUSED_VAR (read)
if (RDW_MODE == "W") begin : g_write_first
assign rdata = ram[addr];
end else begin : g_read_first
reg [DATAW-1:0] prev_data;
reg [ADDRW-1:0] prev_waddr;
reg prev_write;
always @(posedge clk) begin
if (reset) begin
prev_write <= 0;
prev_data <= '0;
prev_waddr <= '0;
end else begin
prev_write <= write;
prev_data <= ram[addr];
prev_waddr <= addr;
end
end
assign rdata = (prev_write && (prev_waddr == addr)) ? prev_data : ram[addr];
if (RDW_ASSERT) begin : g_rw_asert
`RUNTIME_ASSERT(~read || (rdata == ram[addr]), ("%t: read after write hazard", $time))
end
end
end
`endif
endmodule
`TRACING_ON

View file

@ -64,12 +64,12 @@ module VX_stream_xbar #(
);
for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_sel_in_decoders
VX_decoder #(
VX_demux #(
.N (OUT_WIDTH),
.D (NUM_OUTPUTS)
) sel_in_decoder (
.data_in (sel_in[i]),
.valid_in (valid_in[i]),
) sel_in_demux (
.sel_in (sel_in[i]),
.data_in (valid_in[i]),
.data_out (per_output_valid_in[i])
);
assign ready_in[i] = | per_output_ready_in_w[i];
@ -137,12 +137,12 @@ module VX_stream_xbar #(
wire [NUM_OUTPUTS-1:0] valid_out_w, ready_out_w;
wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w;
VX_decoder #(
VX_demux #(
.N (OUT_WIDTH),
.D (NUM_OUTPUTS)
) sel_in_decoder (
.data_in (sel_in[0]),
.valid_in (valid_in[0]),
) sel_in_demux (
.sel_in (sel_in[0]),
.data_in (valid_in[0]),
.data_out (valid_out_w)
);

View file

@ -60,11 +60,11 @@ module VX_gbar_unit #(
`ifdef DBG_TRACE_GBAR
always @(posedge clk) begin
if (gbar_bus_if.req_valid && gbar_bus_if.req_ready) begin
`TRACE(1, ("%t: %s acquire: bar_id=%0d, size=%0d, core_id=%0d\n",
`TRACE(2, ("%t: %s acquire: bar_id=%0d, size=%0d, core_id=%0d\n",
$time, INSTANCE_ID, gbar_bus_if.req_id, gbar_bus_if.req_size_m1, gbar_bus_if.req_core_id))
end
if (gbar_bus_if.rsp_valid) begin
`TRACE(1, ("%t: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id))
`TRACE(2, ("%t: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id))
end
end
`endif

View file

@ -167,9 +167,8 @@ module VX_local_mem import VX_gpu_pkg::*; #(
.SIZE (WORDS_PER_BANK),
.WRENW (WORD_SIZE),
.OUT_REG (1),
.READ_ENABLE (0),
.NO_RWCHECK (1)
) data_store (
.RDW_MODE ("R")
) lmem_store (
.clk (clk),
.reset (reset),
.read (per_bank_req_valid[i] && per_bank_req_ready[i] && ~per_bank_req_rw[i]),
@ -330,15 +329,15 @@ module VX_local_mem import VX_gpu_pkg::*; #(
always @(posedge clk) begin
if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin
if (mem_bus_if[i].req_data.rw) begin
`TRACE(1, ("%t: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n",
`TRACE(2, ("%t: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n",
$time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i]))
end else begin
`TRACE(1, ("%t: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n",
`TRACE(2, ("%t: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n",
$time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, req_uuid[i]))
end
end
if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin
`TRACE(1, ("%t: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n",
`TRACE(2, ("%t: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n",
$time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i]))
end
end

View file

@ -0,0 +1,525 @@
namespace eval vortex {
variable debug 0
proc print_error {msg {do_exit 1}} {
if {$do_exit} {
puts "ERROR: $msg"
exit -1
} else {
puts "WARNING: $msg"
}
}
proc str_replace {str match repl} {
set result ""
regsub $match $str $repl result
return $result
}
proc unique_cell_name {name} {
if {[get_cells -quiet $name] == {}} { return $name }
set index 0
while {[get_cells -quiet ${name}_${index}] != {}} { incr index }
return ${name}_${index}
}
proc unique_net_name {name} {
if {[get_nets -quiet $name] == {}} { return $name }
set index 0
while {[get_nets -quiet ${name}_${index}] != {}} { incr index }
return ${name}_${index}
}
proc find_nested_cells {parent name_match {should_exist 1}} {
set matching_cells {}
foreach cell [get_cells -hierarchical -include_replicated_objects -filter "PARENT == $parent"] {
set name [get_property NAME $cell]
if {[regexp $name_match $name]} {
lappend matching_cells $cell
}
}
if {[llength $matching_cells] == 0} {
print_error "No matching cell found for '$parent' matching '$name_match'." $should_exist
}
return $matching_cells
}
proc find_nested_cell {parent name_match} {
foreach cell [get_cells -hierarchical -filter "PARENT == $parent"] {
set name [get_property NAME $cell]
if {$name == $name_match} {
return $cell
}
}
puts "ERROR: No matching cell found for '$parent' matching '$name_match'."
exit -1
}
proc find_cell_nets {cell name_match {should_exist 1}} {
set matching_nets {}
foreach net [get_nets -hierarchical -filter "PARENT_CELL == $cell"] {
set name [get_property NAME $net]
if {[regexp $name_match $name]} {
lappend matching_nets $net
}
}
if {[llength $matching_nets] == 0} {
print_error "No matching net found for '$cell' matching '$name_match'." $should_exist
}
return $matching_nets
}
proc get_cell_net {cell name_match} {
foreach net [get_nets -hierarchical -filter "PARENT_CELL == $cell"] {
set name [get_property NAME $net]
if {$name == $name_match} {
return $net
}
}
puts "ERROR: No matching net found for '$cell' matching '$name_match'."
exit -1
}
proc find_cell_pins {cell name_match {should_exist 1}} {
set matching_pins {}
foreach pin [get_pins -of_objects $cell] {
set name [get_property NAME $pin]
if {[regexp $name_match $name]} {
lappend matching_pins $pin
}
}
if {[llength $matching_pins] == 0} {
print_error "No matching pin found for '$cell' matching '$name_match'." $should_exist
}
return $matching_pins
}
proc get_cell_pin {cell name_match} {
foreach pin [get_pins -of_objects $cell] {
set name [get_property NAME $pin]
if {$name == $name_match} {
return $pin
}
}
puts "ERROR: No matching pin found for '$cell' matching '$name_match'."
exit -1
}
proc replace_pin_source {pin source_pin} {
variable debug
# Disconnect existing net from pin
set net [get_nets -of_objects $pin]
if {[llength $net] == 1} {
disconnect_net -net $net -objects $pin
if {$debug} {puts "DEBUG: Disconnected net '$net' from pin '$pin'."}
} elseif {[llength $net] > 1} {
puts "ERROR: Multiple nets connected to pin '$pin'."
exit -1
} else {
puts "WARNING: No net connected to pin '$pin'."
}
set source_net [get_nets -quiet -of_objects $source_pin]
if {[llength $source_net] == 0} {
# Create a new net if none exists
set source_cell [get_cells -of_objects $source_pin]
set net_name [unique_net_name "${source_cell}_net"]
set source_net [create_net $net_name]
if {$debug} {puts "DEBUG: Created source_net: '$source_net'"}
# Connect the source pin to the new net
connect_net -net $source_net -objects $source_pin -hierarchical
if {$debug} {puts "DEBUG: Connected net '$source_net' to pin '$source_pin'."}
} elseif {[llength $source_net] > 1} {
puts "ERROR: Multiple nets connected to pin '$source_pin'."
exit -1
}
# Connect pin to the new source net
connect_net -net $source_net -objects $pin -hierarchical
if {$debug} {puts "DEBUG: Connected net '$source_net' to pin '$pin'."}
}
proc create_register_next {reg_cell prefix_name} {
variable debug
set reg_d_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/D"}]
if {[llength $reg_d_pin] == 0} {
puts "ERROR: No D pin found on register cell '$reg_cell'."
exit -1
} elseif {[llength $reg_d_pin] > 1} {
puts "ERROR: Multiple D pins found on register cell '$reg_cell'."
exit -1
}
if {$debug} {puts "DEBUG: reg_d_pin: '$reg_d_pin'"}
set reg_d_src_pin [find_pin_driver $reg_d_pin]
if {$reg_d_src_pin == ""} {
puts "ERROR: No source pin found connected to '$reg_d_pin'."
exit -1
}
if {$debug} {puts "DEBUG: reg_d_src_pin: '$reg_d_src_pin'"}
set reg_r_src_pin ""
set register_type [get_property REF_NAME $reg_cell]
if {$register_type == "FDRE"} {
set reg_r_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/R"}]
if {[llength $reg_r_pin] == 0} {
puts "ERROR: No R pin found on FDRE cell '$reg_cell'."
exit -1
} elseif {[llength $reg_r_pin] > 1} {
puts "ERROR: Multiple R pins found on FDRE cell '$reg_cell'."
exit -1
}
if {$debug} {puts "DEBUG: reg_r_pin: '$reg_r_pin'"}
set reg_r_src_pin [find_pin_driver $reg_r_pin]
if {$reg_r_src_pin == ""} {
puts "ERROR: No source pin found connected to '$reg_r_pin'."
exit -1
}
} elseif {$register_type == "FDSE"} {
set reg_s_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/S"}]
if {[llength $reg_s_pin] == 0} {
puts "ERROR: No S pin found on FDSE cell '$reg_cell'."
exit -1
} elseif {[llength $reg_s_pin] > 1} {
puts "ERROR: Multiple S pins found on FDSE cell '$reg_cell'."
exit -1
}
if {$debug} {puts "DEBUG: reg_s_pin: '$reg_s_pin'"}
set reg_r_src_pin [find_pin_driver $reg_s_pin]
if {$reg_r_src_pin == ""} {
puts "ERROR: No source pin found connected to '$reg_s_pin'."
exit -1
}
} else {
puts "ERROR: Unsupported register type: '$register_type'."
exit 1
}
if {$debug} {puts "DEBUG: reg_r_src_pin: '$reg_r_src_pin'"}
set reg_d_src_net [get_nets -of_objects $reg_d_src_pin]
if {[llength $reg_d_src_net] == 0} {
puts "ERROR: Unable to get source nets for pins."
exit -1
} elseif {[llength $reg_d_src_net] > 1} {
puts "ERROR: Multiple source nets found for pins."
exit -1
}
set reg_r_src_net [get_nets -of_objects $reg_r_src_pin]
if {[llength $reg_r_src_net] == 0} {
puts "ERROR: Unable to get source nets for pins."
exit -1
} elseif {[llength $reg_r_src_net] > 1} {
puts "ERROR: Multiple source nets found for pins."
exit -1
}
# Create a MUX cell to implement register next value
# Use a 2x1 LUT to describe the logic:
# FDRE: O = I1 ? 0 : I0; where I0=D, I1=R
# FDSE: O = I1 ? 1 : I0; where I0=D, I1=S
set lut_name [unique_cell_name $prefix_name]
set lut_cell [create_cell -reference LUT2 $lut_name]
puts "INFO: Created lut cell: '$lut_cell'"
if {$register_type == "FDRE"} {
set_property INIT 4'b0010 $lut_cell
} elseif {$register_type == "FDSE"} {
set_property INIT 4'b1110 $lut_cell
} else {
puts "ERROR: Unsupported register type: '$register_type'."
exit 1
}
set lut_i0_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/I0"}]
if {[llength $lut_i0_pin] == 0} {
puts "ERROR: No I0 pin found on FDSE cell '$lut_cell'."
exit -1
} elseif {[llength $lut_i0_pin] > 1} {
puts "ERROR: Multiple I0 pins found on FDSE cell '$lut_cell'."
exit -1
}
set lut_i1_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/I1"}]
if {[llength $lut_i1_pin] == 0} {
puts "ERROR: No I1 pin found on FDSE cell '$lut_cell'."
exit -1
} elseif {[llength $lut_i1_pin] > 1} {
puts "ERROR: Multiple I1 pins found on FDSE cell '$lut_cell'."
exit -1
}
set lut_o_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/O"}]
if {[llength $lut_o_pin] == 0} {
puts "ERROR: No O pin found on FDSE cell '$lut_cell'."
exit -1
} elseif {[llength $lut_o_pin] > 1} {
puts "ERROR: Multiple O pins found on FDSE cell '$lut_cell'."
exit -1
}
connect_net -net $reg_d_src_net -objects $lut_i0_pin -hierarchical
if {$debug} {puts "DEBUG: Connected net '$reg_d_src_net' to pin '$lut_i0_pin'."}
connect_net -net $reg_r_src_net -objects $lut_i1_pin -hierarchical
if {$debug} {puts "DEBUG: Connected net '$reg_r_src_net' to pin '$lut_i1_pin'."}
return $lut_o_pin
}
proc getOrCreateVCCPin {prefix_name} {
variable debug
set vcc_cell ""
set vcc_cells [get_cells -quiet -filter {REF_NAME == VCC}]
if {[llength $vcc_cells] == 0} {
set cell_name [unique_cell_name $prefix_name]
set vcc_cell [create_cell -reference VCC $cell_name]
puts "INFO: Created VCC cell: '$vcc_cell'"
} else {
set vcc_cell [lindex $vcc_cells 0]
}
set vcc_pin [get_pins -of_objects $vcc_cell -filter {NAME =~ "*/P"}]
if {[llength $vcc_pin] == 0} {
puts "ERROR: No VCC pin found on VCC cell '$vcc_cell'."
exit -1
} elseif {[llength $vcc_pin] > 1} {
puts "ERROR: Multiple VCC pins found on VCC cell '$vcc_cell'."
exit -1
}
return $vcc_pin
}
proc getOrCreateGNDPin {prefix_name} {
variable debug
set gnd_cell ""
set gnd_cells [get_cells -quiet -filter {REF_NAME == GND}]
if {[llength $gnd_cells] == 0} {
set cell_name [unique_cell_name $prefix_name]
set gnd_cell [create_cell -reference GND $cell_name]
puts "INFO: Created GND cell: '$gnd_cell'"
} else {
set gnd_cell [lindex $gnd_cells 0]
}
set gnd_pin [get_pins -of_objects $gnd_cell -filter {NAME =~ "*/G"}]
if {[llength $gnd_pin] == 0} {
puts "ERROR: No GND pin found on GND cell '$gnd_cell'."
exit -1
} elseif {[llength $gnd_pin] > 1} {
puts "ERROR: Multiple GND pins found on GND cell '$gnd_cell'."
exit -1
}
return $gnd_pin
}
proc find_net_sinks {input_net {should_exist 1}} {
set sink_pins {}
foreach pin [get_pins -quiet -leaf -of_objects $input_net -filter {DIRECTION == "IN"}] {
lappend sink_pins $pin
}
foreach port [get_ports -quiet -of_objects $input_net -filter {DIRECTION == "OUT"}] {
lappend sink_pins $port
}
if {[llength $sink_pins] == 0} {
print_error "No sink found for '$input_net'." $should_exist
}
return $sink_pins
}
proc find_net_driver {input_net {should_exist 1}} {
set driverPins [get_pins -quiet -leaf -of_objects $input_net -filter {DIRECTION == "OUT"}]
if {[llength $driverPins] == 0} {
set driverPorts [get_ports -quiet -of_objects $input_net -filter {DIRECTION == "IN"}]
if {[llength $driverPorts] == 0} {
print_error "No driver found for '$input_net'." $should_exist
} elseif {[llength $driverPorts] > 1} {
puts "WARNING: Multiple driver ports found for '$input_net'."
return [lindex $driverPorts 0]
}
return $driverPorts
} elseif {[llength $driverPins] > 1} {
puts "WARNING: Multiple driver pins found for '$input_net'."
return [lindex $driverPins 0]
}
return $driverPins
}
proc find_pin_driver {input_pin {should_exist 1}} {
set net [get_nets -quiet -of_objects $input_pin]
if {[llength $net] == 0} {
print_error "No net connected to pin '$input_pin'." $should_exist
} elseif {[llength $net] > 1} {
puts "ERROR: Multiple nets connected to pin '$input_pin'."
exit -1
}
return [find_net_driver $net]
}
proc find_matching_nets {cell nets match repl} {
set matching_nets {}
foreach net $nets {
set net_name [str_replace $net $match $repl]
set matching_net [get_cell_net $cell $net_name]
if {$matching_net != ""} {
lappend matching_nets $matching_net
}
}
if {[llength $matching_nets] == 0} {
puts "ERROR: No matching nets found for '$nets'."
exit -1
} elseif {[llength $matching_nets] != [llength $nets]} {
puts "ERROR: Mismatch in number of matching nets."
exit -1
}
return $matching_nets
}
proc replace_net_source {net source_pin} {
foreach pin [find_net_sinks $net 0] {
replace_pin_source $pin $source_pin
}
}
proc resolve_async_bram {inst} {
variable debug
puts "INFO: Resolving asynchronous BRAM patch: '$inst'."
set raddr_w_nets [find_cell_nets $inst "raddr_w(\\\[\\d+\\\])?$"]
set read_s_net [find_cell_nets $inst "read_s$"]
set is_raddr_reg_net [find_cell_nets $inst "is_raddr_reg$"]
set raddr_s_nets [find_matching_nets $inst $raddr_w_nets "raddr_w(\\\[\\d+\\\])?$" "raddr_s\\1"]
set reg_next_pins {}
set reg_ce_src_pin ""
foreach raddr_w_net $raddr_w_nets {
if {$debug} {puts "DEBUG: Processing raddr_w net: '$raddr_w_net'"}
# Find raddr_w_net's driver pin
set raddr_src_pin [find_net_driver $raddr_w_net]
if {$debug} {puts "DEBUG: raddr_src_pin: '$raddr_src_pin'"}
# Get the driver cell
set raddr_src_cell [get_cells -of_objects $raddr_src_pin]
if {[llength $raddr_src_cell] == 0} {
puts "ERROR: No source cell found connected to pin '$raddr_src_pin'."
exit -1
} elseif {[llength $raddr_src_cell] > 1} {
puts "ERROR: Multiple source cells found connected to pin '$raddr_src_pin'."
exit -1
}
# Check driver type
set driver_type [get_property REF_NAME $raddr_src_cell]
if {$driver_type == "FDRE" || $driver_type == "FDSE"} {
if {$debug} {puts "DEBUG: Net '$raddr_w_net' is registered, driver_type='$driver_type'"}
} else {
puts "WARNING: Net '$raddr_w_net' is not be registered, driver_type='$driver_type'"
break
}
# Create register next cell and return output pin
set reg_next_pin [create_register_next $raddr_src_cell "$inst/raddr_next"]
if {$reg_next_pin == ""} {
puts "ERROR: failed to create register next value for '$raddr_src_cell'."
exit -1
}
if {$debug} {puts "DEBUG: reg_next_pin: '$reg_next_pin'"}
lappend reg_next_pins $reg_next_pin
# Find the CE pin on raddr_src_cell
if {$reg_ce_src_pin == ""} {
set reg_ce_pin [get_pins -of_objects $raddr_src_cell -filter {NAME =~ "*/CE"}]
if {[llength $reg_ce_pin] == 0} {
puts "ERROR: No CE pin found on register cell '$raddr_src_cell'."
exit -1
} elseif {[llength $reg_ce_pin] > 1} {
puts "ERROR: Multiple CE pins found on register cell '$raddr_src_cell'."
exit -1
}
if {$debug} {puts "DEBUG: reg_ce_pin: '$reg_ce_pin'"}
set reg_ce_src_pin [find_pin_driver $reg_ce_pin]
if {$reg_ce_src_pin == ""} {
puts "ERROR: No source pin found connected to '$reg_ce_pin'."
exit -1
}
if {$debug} {puts "DEBUG: reg_ce_src_pin: '$reg_ce_src_pin'"}
}
}
# do we have a fully registered read address?
if {[llength $reg_next_pins] == [llength $raddr_w_nets]} {
puts "INFO: Fully registered read address detected."
set addr_width [llength $raddr_w_nets]
for {set addr_idx 0} {$addr_idx < $addr_width} {incr addr_idx} {
set raddr_w_net [lindex $raddr_w_nets $addr_idx]
set raddr_s_net [lindex $raddr_s_nets $addr_idx]
set reg_next_pin [lindex $reg_next_pins $addr_idx]
puts "INFO: Connecting pin '$reg_next_pin' to '$raddr_s_net's pins."
# Connect reg_next_pin to all input pins attached to raddr_s_net
replace_net_source $raddr_s_net $reg_next_pin
}
# Connect reg_ce_src_pin to all input pins attached to read_s_net
puts "INFO: Connecting pin '$reg_ce_src_pin' to '$read_s_net's pins."
replace_net_source $read_s_net $reg_ce_src_pin
# Create Const<1>'s pin
set vcc_pin [getOrCreateVCCPin "$inst/VCC"]
# Connect vcc_pin to all input pins attached to is_raddr_reg_net
puts "INFO: Connecting pin '$vcc_pin' to '$is_raddr_reg_net's pins."
replace_net_source $is_raddr_reg_net $vcc_pin
} else {
puts "WARNING: Not all read addresses are registered!"
# Create Const<0>'s pin
set gnd_pin [getOrCreateGNDPin "$inst/GND"]
# Connect gnd_pin to all input pins attached to is_raddr_reg_net
puts "INFO: Connecting pin '$gnd_pin' to '$is_raddr_reg_net's pins."
replace_net_source $is_raddr_reg_net $gnd_pin
}
# Remove all placeholder cells
foreach cell [find_nested_cells $inst "placeholder$"] {
remove_cell $cell
if {$debug} {puts "DEBUG: Cell '$cell' was removed successfully."}
}
}
proc resolve_async_brams {} {
set bram_patch_cells {}
foreach cell [get_cells -hierarchical -filter {REF_NAME =~ "*VX_async_ram_patch*"}] {
puts "INFO: Found async BRAM patch cell: '$cell'."
lappend bram_patch_cells $cell
}
if {[llength $bram_patch_cells] != 0} {
foreach cell $bram_patch_cells {
resolve_async_bram $cell
}
} else {
puts "INFO: No async BRAM patch cells found in the design."
}
}
}
# Invoke the procedure to resolve async BRAM
vortex::resolve_async_brams

View file

@ -0,0 +1,71 @@
# Function to export netlist to a Graphviz DOT file
proc export_netlist {dot_file_name} {
# Open the DOT file for writing
set dot_file [open $dot_file_name "w"]
# Start the DOT graph definition
puts $dot_file "digraph Netlist {"
puts $dot_file "rankdir=LR;" ;# Set the graph direction from left to right
# Extract and add cells to the graph
foreach cell [get_cells -hierarchical] {
set cell_name [get_property NAME $cell]
set cell_type [get_property REF_NAME $cell]
puts $dot_file "\"$cell_name\" \[label=\"$cell_name\\n($cell_type)\", shape=box\];"
}
# Extract and add ports to the graph
foreach port [get_ports] {
set port_name [get_property NAME $port]
set direction [get_property DIRECTION $port]
set shape "ellipse"
# Color code input and output ports for easier identification
if {$direction == "IN"} {
set color "lightblue"
} else {
set color "lightgreen"
}
puts $dot_file "\"$port_name\" \[label=\"$port_name\", shape=$shape, style=filled, fillcolor=$color\];"
}
# Traverse nets and create edges between ports and pins
foreach net [get_nets -hierarchical] {
set net_name [get_property NAME $net]
# Find source and destination pins
set source_pin ""
set sink_pins {}
foreach pin [get_pins -of_objects $net] {
set direction [get_property DIRECTION $pin]
set cell [get_cells -of_objects $pin]
set pin_name [get_property NAME $pin]
if {$direction == "OUT"} {
# Set as source pin
set source_pin "$cell/$pin_name"
} else {
# Collect as sink pin
lappend sink_pins "$cell/$pin_name"
}
}
# Output edges from source to all sinks
if {$source_pin != ""} {
foreach sink_pin $sink_pins {
puts $dot_file "\"$source_pin\" -> \"$sink_pin\" \[label=\"$net_name\"\];"
}
}
}
# End the DOT graph definition
puts $dot_file "}"
# Close the DOT file
close $dot_file
puts "Netlist exported to DOT file: $dot_file_name"
}
# Run the export function
export_netlist "netlist.dot"

View file

@ -31,9 +31,9 @@ project_1/sources.txt:
build: $(PROJECT).xpr
$(PROJECT).xpr: project_1/sources.txt
ifdef FPU_IP
MAX_JOBS=$(JOBS) FPU_IP=project_1/ip $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR)
MAX_JOBS=$(JOBS) FPU_IP=project_1/ip SCRIPT_DIR=$(SCRIPT_DIR) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc
else
MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR)
MAX_JOBS=$(JOBS) SCRIPT_DIR=$(SCRIPT_DIR) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc
endif
clean:

View file

@ -14,9 +14,9 @@
# Start time
set start_time [clock seconds]
if { $::argc != 5 } {
puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n"
puts "Usage: $::argv0 <top_module> <device_part> <vcs_file> <xdc_file> <tool_dir>\n"
if { $::argc != 4 } {
puts "ERROR: Program \"$::argv0\" requires 4 arguments!\n"
puts "Usage: $::argv0 <top_module> <device_part> <vcs_file> <xdc_file>\n"
exit
}
@ -27,13 +27,16 @@ set top_module [lindex $::argv 0]
set device_part [lindex $::argv 1]
set vcs_file [lindex $::argv 2]
set xdc_file [lindex $::argv 3]
set tool_dir [lindex $::argv 4]
set script_dir $::env(SCRIPT_DIR)
set source_dir [file dirname [info script]]
puts "Using top_module=$top_module"
puts "Using device_part=$device_part"
puts "Using vcs_file=$vcs_file"
puts "Using xdc_file=$xdc_file"
puts "Using tool_dir=$tool_dir"
puts "Using script_dir=$script_dir"
puts "Using source_dir=$source_dir"
# Set the number of jobs based on MAX_JOBS environment variable
if {[info exists ::env(MAX_JOBS)]} {
@ -48,10 +51,10 @@ if {[info exists ::env(FPU_IP)]} {
set ip_dir $::env(FPU_IP)
set argv [list $ip_dir $device_part]
set argc 2
source ${tool_dir}/xilinx_ip_gen.tcl
source ${script_dir}/xilinx_ip_gen.tcl
}
source "${tool_dir}/parse_vcs_list.tcl"
source "${script_dir}/parse_vcs_list.tcl"
set vlist [parse_vcs_list "${vcs_file}"]
set vsources_list [lindex $vlist 0]
@ -84,37 +87,52 @@ if {[info exists ::env(FPU_IP)]} {
update_compile_order -fileset sources_1
# Synthesis
set_property top $top_module [current_fileset]
set_property \
-name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} \
-value {-mode out_of_context -flatten_hierarchy "rebuilt"} \
-objects [get_runs synth_1]
# Synthesis
# register compilation hooks
#set_property STEPS.SYNTH_DESIGN.TCL.PRE ${source_dir}/pre_synth_hook.tcl [get_runs synth_1]
#set_property STEPS.SYNTH_DESIGN.TCL.POST ${source_dir}/post_synth_hook.tcl [get_runs synth_1]
set_property STEPS.OPT_DESIGN.TCL.PRE ${script_dir}/xilinx_async_bram_patch.tcl [get_runs impl_1]
#set_property STEPS.OPT_DESIGN.TCL.POST ${source_dir}/post_opt_hook.tcl [get_runs impl_1]
#set_property STEPS.ROUTE_DESIGN.TCL.PRE ${source_dir}/pre_route_hook.tcl [get_runs impl_1]
#set_property STEPS.ROUTE_DESIGN.TCL.POST ${source_dir}/post_route_hook.tcl [get_runs impl_1]
if {$num_jobs != 0} {
launch_runs synth_1 -jobs $num_jobs
launch_runs synth_1 -verbose -jobs $num_jobs
} else {
launch_runs synth_1
launch_runs synth_1 -verbose
}
wait_on_run synth_1
open_run synth_1
write_checkpoint -force post_synth.dcp
report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages
report_utilization -file post_synth_util.rpt -hierarchical -hierarchical_percentages
# Implementation
if {$num_jobs != 0} {
launch_runs impl_1 -jobs $num_jobs
launch_runs impl_1 -verbose -jobs $num_jobs
} else {
launch_runs impl_1
launch_runs impl_1 -verbose
}
wait_on_run impl_1
open_run impl_1
write_checkpoint -force post_impl.dcp
report_utilization -file post_impl_util.rpt -hierarchical -hierarchical_percentages
# Generate the synthesis report
report_place_status -file place.rpt
report_route_status -file route.rpt
report_timing_summary -file timing.rpt
# Generate timing report
report_timing -nworst 10 -delay_type max -sort_by group -file timing.rpt
# Generate power and drc reports
report_power -file power.rpt
report_drc -file drc.rpt
@ -125,4 +143,4 @@ set elapsed_time [expr {[clock seconds] - $start_time}]
set hours [format "%02d" [expr {$elapsed_time / 3600}]]
set minutes [format "%02d" [expr {($elapsed_time % 3600) / 60}]]
set seconds [format "%02d" [expr {$elapsed_time % 60}]]
puts "Total elapsed time: ${hours}h ${minutes}m ${seconds}s"
puts "Total elapsed time: ${hours}h ${minutes}m ${seconds}s"

View file

@ -1,4 +1,4 @@
PROJECT = Unittest
PROJECT = VX_fifo_queue
TOP_LEVEL_ENTITY = $(PROJECT)
SRC_FILE = $(PROJECT).sv

View file

@ -24,11 +24,8 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu
ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src
endif
TEX_INCLUDE = -I$(RTL_DIR)/tex
RASTER_INCLUDE = -I$(RTL_DIR)/raster
OM_INCLUDE = -I$(RTL_DIR)/om
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache
RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(OM_INCLUDE)
RTL_INCLUDE += $(FPU_INCLUDE)
RTL_INCLUDE += -I$(SRC_DIR)
# compilation flags

View file

@ -121,8 +121,8 @@ proc run_setup {} {
# None
# Set 'sim_1' fileset file properties for local files
set file "testbench.v"
set file_obj [get_files -of_objects [get_filesets sim_1] [list "*$file"]]
set file "testbench.v"
set file_obj [get_files -of_objects [get_filesets sim_1] [list "*$file"]]
set_property -name "file_type" -value "Verilog" -objects $file_obj
set_property -name "is_enabled" -value "1" -objects $file_obj
set_property -name "is_global_include" -value "0" -objects $file_obj
@ -300,7 +300,7 @@ set file_obj [get_files -of_objects [get_filesets sim_1] [list "*$file"]]
CONFIG.Assume_Synchronous_Clk {true} \
CONFIG.Byte_Size {8} \
CONFIG.Load_Init_File {true} \
CONFIG.Coe_File {@CURRENTDIR@/hw/syn/xilinx/sandbox/kernel.bin.coe} \
CONFIG.Coe_File {@BUILDDIR@/hw/syn/xilinx/sandbox/kernel.bin.coe} \
CONFIG.EN_SAFETY_CKT {true} \
CONFIG.Enable_32bit_Address {true} \
CONFIG.Fill_Remaining_Memory_Locations {false} \

View file

@ -76,22 +76,21 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c)
# include sources
RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
RTL_PKGS += $(RTL_DIR)/tex/VX_tex_pkg.sv $(RTL_DIR)/raster/VX_raster_pkg.sv $(RTL_DIR)/om/VX_om_pkg.sv
FPU_INCLUDE = -I$(RTL_DIR)/fpu
ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src
endif
TEX_INCLUDE = -I$(RTL_DIR)/tex
RASTER_INCLUDE = -I$(RTL_DIR)/raster
OM_INCLUDE = -I$(RTL_DIR)/om
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR)
RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(OM_INCLUDE)
RTL_INCLUDE += $(FPU_INCLUDE)
# Kernel compiler global settings
VPP_FLAGS += --link --target $(TARGET) --platform $(PLATFORM) --save-temps --no_ip_cache
VPP_FLAGS += --vivado.synth.jobs $(JOBS) --vivado.impl.jobs $(JOBS)
# register compilation hooks
VPP_FLAGS += --xp "vivado_prop:run.impl_1.STEPS.OPT_DESIGN.TCL.PRE={$(SCRIPT_DIR)/xilinx_async_bram_patch.tcl}"
# load platform settings
include $(SRC_DIR)/platforms.mk
@ -178,6 +177,7 @@ $(BIN_DIR)/emconfig.json:
report: $(XCLBIN_CONTAINER)
ifeq ($(TARGET), hw)
cp $(BUILD_DIR)/_x/logs/link/vivado.log $(BUILD_DIR)/bin
cp $(BUILD_DIR)/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log $(BUILD_DIR)/bin
cp $(BUILD_DIR)/_x/reports/link/syn/ulp_vortex_afu_1_0_synth_1_ulp_vortex_afu_1_0_utilization_synth.rpt $(BUILD_DIR)/bin
cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin

View file

@ -1,5 +1,4 @@
all:
$(MAKE) -C cache
$(MAKE) -C generic_queue
$(MAKE) -C mem_streamer
$(MAKE) -C cache_top
@ -9,7 +8,6 @@ all:
$(MAKE) -C mem_unit_top
run:
$(MAKE) -C cache run
$(MAKE) -C generic_queue run
$(MAKE) -C mem_streamer run
$(MAKE) -C cache_top run
@ -19,7 +17,6 @@ run:
$(MAKE) -C mem_unit_top run
clean:
$(MAKE) -C cache clean
$(MAKE) -C generic_queue clean
$(MAKE) -C mem_streamer clean
$(MAKE) -C cache_top clean

View file

@ -1,26 +0,0 @@
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
PROJECT := cache
RTL_DIR := $(VORTEX_HOME)/hw/rtl
DPI_DIR := $(VORTEX_HOME)/hw/dpi
SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT)
CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common
CXXFLAGS += -I$(ROOT_DIR)/hw
SRCS := $(DPI_DIR)/util_dpi.cpp
SRCS += $(SRC_DIR)/cachesim.cpp $(SRC_DIR)/testbench.cpp
DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE
RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv
RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache
TOP := VX_cache_top
include ../common.mk

View file

@ -1,354 +0,0 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cachesim.h"
#include <fstream>
#include <iomanip>
#include <iostream>
#include <vector>
#include <bitset>
#ifndef TRACE_START_TIME
#define TRACE_START_TIME 0ull
#endif
#ifndef TRACE_STOP_TIME
#define TRACE_STOP_TIME -1ull
#endif
static uint64_t timestamp = 0;
static bool trace_enabled = false;
static uint64_t trace_start_time = TRACE_START_TIME;
static uint64_t trace_stop_time = TRACE_STOP_TIME;
double sc_time_stamp() {
return timestamp;
}
bool sim_trace_enabled() {
if (timestamp >= trace_start_time
&& timestamp < trace_stop_time)
return true;
return trace_enabled;
}
void sim_trace_enable(bool enable) {
trace_enabled = enable;
}
CacheSim::CacheSim() {
// force random values for uninitialized signals
Verilated::randReset(2);
// create RTL module instance
cache_ = new VVX_cache_top();
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
tfp_ = new VerilatedVcdC;
cache_->trace(tfp_, 99);
tfp_->open("trace.vcd");
#endif
ram_ = nullptr;
mem_rsp_active_ = false;
snp_req_active_ = false;
}
CacheSim::~CacheSim() {
#ifdef VCD_OUTPUT
tfp_->close();
#endif
delete cache_;
//need to delete the req and rsp vectors
}
void CacheSim::attach_ram(RAM* ram) {
ram_ = ram;
mem_rsp_vec_.clear();
}
void CacheSim::reset() {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] reset()" << std::endl;
#endif
cache_->reset = 1;
this->step();
cache_->reset = 0;
this->step();
mem_rsp_vec_.clear();
//clear req and rsp vecs
}
void CacheSim::step() {
//std::cout << timestamp << ": [sim] step()" << std::endl;
//toggle clock
cache_->clk = 0;
this->eval();
cache_->clk = 1;
this->eval();
//handle core and memory reqs and rsps
this->eval_reqs();
this->eval_rsps();
this->eval_mem_bus();
timestamp++;
}
void CacheSim::eval() {
cache_->eval();
#ifdef VCD_OUTPUT
tfp_->dump(timestamp);
#endif
++timestamp;
}
void CacheSim::run(){
//#ifndef NDEBUG
//#endif
this->step();
int valid = 300;
int stalls = 20 + 10;
while (valid > -1) {
this->step();
display_miss();
if(cache_->core_rsp_valid){
get_core_rsp();
}
if(!cache_->core_req_valid && !cache_->core_rsp_valid){
valid--;
}
stalls--;
if (stalls == 20){
//stall_mem();
//send_snoop_req();
stalls--;
}
}
}
void CacheSim::clear_req(){
cache_->core_req_valid = 0;
}
void CacheSim::send_req(core_req_t *req){
core_req_vec_.push(req);
unsigned int *data = new unsigned int[4];
core_rsp_vec_.insert(std::pair<unsigned int, unsigned int*>(req->tag, data));
}
bool CacheSim::get_core_req_ready(){
return cache_->core_req_ready;
}
bool CacheSim::get_core_rsp_ready(){
return cache_->core_rsp_ready;
}
void CacheSim::eval_reqs(){
//check to see if cache is accepting reqs
if(!core_req_vec_.empty() && cache_->core_req_ready){
core_req_t *req = core_req_vec_.front();
cache_->core_req_valid = req->valid;
cache_->core_req_rw = req->rw;
cache_->core_req_byteen = req->byteen;
cache_->core_req_addr[0] = req->addr[0];
cache_->core_req_addr[1] = req->addr[1];
cache_->core_req_addr[2] = req->addr[2];
cache_->core_req_addr[3] = req->addr[3];
cache_->core_req_data[0] = req->data[0];
cache_->core_req_data[1] = req->data[1];
cache_->core_req_data[2] = req->data[2];
cache_->core_req_data[3] = req->data[3];
cache_->core_req_tag = req->tag;
core_req_vec_.pop();
} else {
clear_req();
}
}
void CacheSim::eval_rsps(){
//check to see if a request has been responded to
if (cache_->core_rsp_valid){
core_rsp_vec_.at(cache_->core_rsp_tag)[0] = cache_->core_rsp_data[0];
core_rsp_vec_.at(cache_->core_rsp_tag)[1] = cache_->core_rsp_data[1];
core_rsp_vec_.at(cache_->core_rsp_tag)[2] = cache_->core_rsp_data[2];
core_rsp_vec_.at(cache_->core_rsp_tag)[3] = cache_->core_rsp_data[3];
}
}
void CacheSim::stall_mem(){
cache_->mem_req_ready = 0;
}
void CacheSim::send_snoop_req(){
/*cache_->snp_req_valid = 1;
cache_->snp_req_addr = 0x12222222;
cache_->snp_req_invalidate = 1;
cache_->snp_req_tag = 0xff; */
}
void CacheSim::eval_mem_bus() {
if (ram_ == nullptr) {
cache_->mem_req_ready = 0;
return;
}
// schedule memory responses
int dequeue_index = -1;
for (int i = 0; i < mem_rsp_vec_.size(); i++) {
if (mem_rsp_vec_[i].cycles_left > 0) {
mem_rsp_vec_[i].cycles_left -= 1;
}
if ((dequeue_index == -1)
&& (mem_rsp_vec_[i].cycles_left == 0)) {
dequeue_index = i;
}
}
// send memory response
if (mem_rsp_active_
&& cache_->mem_rsp_valid
&& cache_->mem_rsp_ready) {
mem_rsp_active_ = false;
}
if (!mem_rsp_active_) {
if (dequeue_index != -1) { //time to respond to the request
cache_->mem_rsp_valid = 1;
//copy data from the rsp queue to the cache module
memcpy(cache_->mem_rsp_data.data(), mem_rsp_vec_[dequeue_index].data, MEM_BLOCK_SIZE);
cache_->mem_rsp_tag = mem_rsp_vec_[dequeue_index].tag;
free(mem_rsp_vec_[dequeue_index].data); //take data out of the queue
mem_rsp_vec_.erase(mem_rsp_vec_.begin() + dequeue_index);
mem_rsp_active_ = true;
} else {
cache_->mem_rsp_valid = 0;
}
}
// handle memory stalls
bool mem_stalled = false;
#ifdef ENABLE_MEM_STALLS
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
mem_stalled = true;
} else
if (mem_rsp_vec_.size() >= MEM_RQ_SIZE) {
mem_stalled = true;
}
#endif
// process memory requests
if (!mem_stalled) {
if (cache_->mem_req_valid) {
if (cache_->mem_req_rw) { //write = 1
uint64_t byteen = cache_->mem_req_byteen;
uint64_t base_addr = (cache_->mem_req_addr * MEM_BLOCK_SIZE);
uint8_t* data = reinterpret_cast<uint8_t*>(cache_->mem_req_data.data());
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[base_addr + i] = data[i];
}
}
} else {
mem_req_t mem_req;
mem_req.cycles_left = MEM_LATENCY;
mem_req.data = (uint8_t*)malloc(MEM_BLOCK_SIZE);
mem_req.tag = cache_->mem_req_tag;
ram_->read(cache_->mem_req_addr * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.data);
mem_rsp_vec_.push_back(mem_req);
}
}
}
cache_->mem_req_ready = ~mem_stalled;
}
bool CacheSim::assert_equal(unsigned int* data, unsigned int tag){
int check = 0;
unsigned int *rsp = core_rsp_vec_.at(tag);
for (int i = 0; i < 4; ++i){
for (int j = 0; j < 4; ++j){
if (data[i] == rsp[j]){
check++;
}
}
}
return check;
}
//DEBUG
void CacheSim::display_miss(){
//int i = (unsigned int)cache_->miss_vec;
//std::bitset<8> x(i);
//if (i) std::cout << "Miss Vec " << x << std::endl;
//std::cout << "Miss Vec 0" << cache_->miss_vec[0] << std::endl;
}
void CacheSim::get_core_req(unsigned int (&rsp)[4]){
rsp[0] = cache_->core_rsp_data[0];
rsp[1] = cache_->core_rsp_data[1];
rsp[2] = cache_->core_rsp_data[2];
rsp[3] = cache_->core_rsp_data[3];
//std::cout << std::hex << "core_rsp_valid: " << cache_->core_rsp_valid << std::endl;
//std::cout << std::hex << "core_rsp_data: " << cache_->core_rsp_data << std::endl;
//std::cout << std::hex << "core_rsp_tag: " << cache_->core_rsp_tag << std::endl;
}
void CacheSim::get_core_rsp(){
//std::cout << cache_->genblk5_BRA_0_KET_->bank->is_fill_in_pipe<< std::endl;
char check = cache_->core_rsp_valid;
std::cout << std::hex << "core_rsp_valid: " << (unsigned int) check << std::endl;
std::cout << std::hex << "core_rsp_data[0]: " << cache_->core_rsp_data[0] << std::endl;
std::cout << std::hex << "core_rsp_data[1]: " << cache_->core_rsp_data[1] << std::endl;
std::cout << std::hex << "core_rsp_data[2]: " << cache_->core_rsp_data[2] << std::endl;
std::cout << std::hex << "core_rsp_data[3]: " << cache_->core_rsp_data[3] << std::endl;
std::cout << std::hex << "core_rsp_tag: " << cache_->core_rsp_tag << std::endl;
}
void CacheSim::get_mem_req(){
std::cout << std::hex << "mem_req_valid: " << cache_->mem_req_valid << std::endl;
std::cout << std::hex << "mem_req_rw: " << cache_->mem_req_rw << std::endl;
std::cout << std::hex << "mem_req_byteen: " << cache_->mem_req_byteen << std::endl;
std::cout << std::hex << "mem_req_addr: " << cache_->mem_req_addr << std::endl;
std::cout << std::hex << "mem_req_data: " << cache_->mem_req_data << std::endl;
std::cout << std::hex << "mem_req_tag: " << cache_->mem_req_tag << std::endl;
}
void CacheSim::get_mem_rsp(){
std::cout << std::hex << "mem_rsp_valid: " << cache_->mem_rsp_valid << std::endl;
std::cout << std::hex << "mem_rsp_data: " << cache_->mem_rsp_data << std::endl;
std::cout << std::hex << "mem_rsp_tag: " << cache_->mem_rsp_tag << std::endl;
std::cout << std::hex << "mem_rsp_ready: " << cache_->mem_rsp_ready << std::endl;
}

View file

@ -1,104 +0,0 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "VVX_cache_top.h"
#include "VVX_cache_top__Syms.h"
#include "verilated.h"
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
#endif
#include <VX_config.h>
#include "ram.h"
#include <ostream>
#include <vector>
#include <queue>
#define ENABLE_MEM_STALLS
#define MEM_LATENCY 100
#define MEM_RQ_SIZE 16
#define MEM_STALLS_MODULO 16
typedef struct {
int cycles_left;
uint8_t *data;
unsigned tag;
} mem_req_t;
typedef struct {
char valid;
char rw;
unsigned byteen;
unsigned *addr;
unsigned *data;
unsigned int tag;
} core_req_t;
class CacheSim {
public:
CacheSim();
virtual ~CacheSim();
bool busy();
void reset();
void step();
void wait(uint32_t cycles);
void attach_ram(RAM* ram);
void run(); //run until all reqs are empty
//req/rsp
void send_req(core_req_t *req);
void clear_req();
void stall_mem();
void send_snoop_req();
void send_snp_fwd_in();
//assert funcs
bool assert_equal(unsigned int* data, unsigned int tag);
//debug funcs
void get_mem_req();
void get_core_req(unsigned int (&rsp)[4]);
void get_core_rsp();
bool get_core_req_ready();
bool get_core_rsp_ready();
void get_mem_rsp();
void display_miss();
private:
void eval();
void eval_reqs();
void eval_rsps();
void eval_mem_bus();
std::queue<core_req_t*> core_req_vec_;
std::vector<mem_req_t> mem_rsp_vec_;
std::map<unsigned int, unsigned int*> core_rsp_vec_;
int mem_rsp_active_;
uint32_t snp_req_active_;
uint32_t snp_req_size_;
uint32_t pending_snp_reqs_;
VVX_cache_top* cache_;
RAM* ram_;
#ifdef VCD_OUTPUT
VerilatedVcdC* tfp_;
#endif
};

View file

@ -1,77 +0,0 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdio.h>
#include <stdint.h>
class RAM {
private:
mutable uint8_t *mem_[(1 << 12)];
uint8_t *get(uint32_t address) const {
uint32_t block_addr = address >> 20;
uint32_t block_offset = address & 0x000FFFFF;
if (mem_[block_addr] == NULL) {
mem_[block_addr] = new uint8_t[(1 << 20)];
}
return mem_[block_addr] + block_offset;
}
public:
RAM() {
for (uint32_t i = 0; i < (1 << 12); i++) {
mem_[i] = NULL;
}
}
~RAM() {
this->clear();
}
size_t size() const {
return (1ull << 32);
}
void clear() {
for (uint32_t i = 0; i < (1 << 12); i++) {
if (mem_[i]) {
delete [] mem_[i];
mem_[i] = NULL;
}
}
}
void read(uint32_t address, uint32_t length, uint8_t *data) const {
for (unsigned i = 0; i < length; i++) {
data[i] = *this->get(address + i);
}
}
void write(uint32_t address, uint32_t length, const uint8_t *data) {
for (unsigned i = 0; i < length; i++) {
*this->get(address + i) = data[i];
}
}
uint8_t& operator[](uint32_t address) {
return *get(address);
}
const uint8_t& operator[](uint32_t address) const {
return *get(address);
}
};

View file

@ -1,248 +0,0 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cachesim.h"
#include <iostream>
#include <fstream>
#include <iomanip>
#define VCD_OUTPUT 1
int REQ_RSP(CacheSim *sim){ //verified
unsigned int addr[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444};
unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333};
unsigned int rsp[4] = {0,0,0,0};
char responded = 0;
//write req
core_req_t* write = new core_req_t;
write->valid = 0xf;
write->rw = 0xf;
write->byteen = 0xffff;
write->addr = addr;
write->data = data;
write->tag = 0xff;
//read req
core_req_t* read = new core_req_t;
read->valid = 0xf;
read->rw = 0;
read->byteen = 0xffff;
read->addr = addr;
read->data = addr;
read->tag = 0xff;
// reset the device
sim->reset();
//queue reqs
sim->send_req(write);
sim->send_req(read);
sim->run();
int check = sim->assert_equal(data, write->tag);
if (check == 4) return 1;
return 0;
}
int HIT_1(CacheSim *sim){
unsigned int addr[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444};
unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333};
unsigned int rsp[4] = {0,0,0,0};
char responded = 0;
//write req
core_req_t* write = new core_req_t;
write->valid = 0xf;
write->rw = 0xf;
write->byteen = 0xffff;
write->addr = addr;
write->data = data;
write->tag = 0x11;
//read req
core_req_t* read = new core_req_t;
read->valid = 0xf;
read->rw = 0;
read->byteen = 0xffff;
read->addr = addr;
read->data = addr;
read->tag = 0x22;
// reset the device
sim->reset();
//queue reqs
sim->send_req(write);
sim->send_req(read);
sim->run();
bool check = sim->assert_equal(data, write->tag);
return check;
}
int MISS_1(CacheSim *sim){
unsigned int addr1[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444};
unsigned int addr2[4] = {0x12229222, 0xabbbb4bb, 0xcddd47dd, 0xe4423544};
unsigned int addr3[4] = {0x12223332, 0xabb454bb, 0xcdddeefd, 0xe4447744};
unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333};
unsigned int rsp[4] = {0,0,0,0};
char responded = 0;
//write req
core_req_t* write = new core_req_t;
write->valid = 0xf;
write->rw = 0xf;
write->byteen = 0xffff;
write->addr = addr1;
write->data = data;
write->tag = 0xff;
//read req
core_req_t* read1 = new core_req_t;
read1->valid = 0xf;
read1->rw = 0;
read1->byteen = 0xffff;
read1->addr = addr1;
read1->data = data;
read1->tag = 0xff;
core_req_t* read2 = new core_req_t;
read2->valid = 0xf;
read2->rw = 0;
read2->byteen = 0xffff;
read2->addr = addr2;
read2->data = data;
read2->tag = 0xff;
core_req_t* read3 = new core_req_t;
read3->valid = 0xf;
read3->rw = 0;
read3->byteen = 0xffff;
read3->addr = addr3;
read3->data = data;
read3->tag = 0xff;
// reset the device
sim->reset();
//queue reqs
sim->send_req(write);
sim->send_req(read1);
sim->send_req(read2);
sim->send_req(read3);
sim->run();
bool check = sim->assert_equal(data, write->tag);
return check;
}
int FLUSH(CacheSim *sim){
unsigned int addr[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444};
unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333};
unsigned int rsp[4] = {0,0,0,0};
char responded = 0;
//write req
core_req_t* write = new core_req_t;
write->valid = 0xf;
write->rw = 0xf;
write->byteen = 0xffff;
write->addr = addr;
write->data = data;
write->tag = 0xff;
//read req
core_req_t* read = new core_req_t;
read->valid = 0xf;
read->rw = 0;
read->byteen = 0xffff;
read->addr = addr;
read->data = addr;
read->tag = 0xff;
// reset the device
sim->reset();
//queue reqs
sim->send_req(write);
sim->send_req(read);
sim->run();
bool check = sim->assert_equal(data, write->tag);
return check;
}
int BACK_PRESSURE(CacheSim *sim){
//happens whenever the core is stalled or memory is stalled
unsigned int addr[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444};
unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333};
unsigned int rsp[4] = {0,0,0,0};
char responded = 0;
//write req
core_req_t* write = new core_req_t;
write->valid = 0xf;
write->rw = 0xf;
write->byteen = 0xffff;
write->addr = addr;
write->data = data;
write->tag = 0xff;
//read req
core_req_t* read = new core_req_t;
read->valid = 0xf;
read->rw = 0;
read->byteen = 0xffff;
read->addr = addr;
read->data = addr;
read->tag = 0xff;
// reset the device
sim->reset();
//queue reqs
for (int i = 0; i < 10; i++){
sim->send_req(write);
}
sim->send_req(read);
sim->run();
bool check = sim->assert_equal(data, write->tag);
return check;
}
int main(int argc, char **argv)
{
//init
RAM ram;
CacheSim cachesim;
cachesim.attach_ram(&ram);
int check = REQ_RSP(&cachesim);
if(check){
std::cout << "PASSED" << std::endl;
} else {
std::cout << "FAILED" << std::endl;
}
return 0;
}

View file

@ -21,4 +21,6 @@ RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
TOP := VX_fifo_queue
PARAMS := -GDATAW=32 -GDEPTH=8
include ../common.mk

View file

@ -241,8 +241,6 @@ private:
#ifdef VCD_OUTPUT
if (sim_trace_enabled()) {
tfp_->dump(timestamp);
} else {
exit(-1);
}
#endif
++timestamp;

View file

@ -333,14 +333,27 @@ private:
}
device_->ap_rst_n = 1;
// this AXI device is always ready to accept new requests
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
*m_axi_mem_[i].arready = 1;
*m_axi_mem_[i].awready = 1;
*m_axi_mem_[i].wready = 1;
}
}
void tick() {
this->axi_mem_bus_eval();
device_->ap_clk = 0;
this->eval();
this->axi_mem_bus_eval(0);
device_->ap_clk = 1;
this->eval();
this->axi_mem_bus_eval(1);
dram_sim_.tick();
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
if (!dram_queues_[i].empty()) {
@ -358,13 +371,6 @@ private:
}
}
dram_sim_.tick();
device_->ap_clk = 0;
this->eval();
device_->ap_clk = 1;
this->eval();
#ifndef NDEBUG
fflush(stdout);
#endif
@ -381,162 +387,175 @@ private:
}
void axi_ctrl_bus_reset() {
// address read request
// read request address
device_->s_axi_ctrl_arvalid = 0;
device_->s_axi_ctrl_araddr = 0;
// data read response
// read response
device_->s_axi_ctrl_rready = 0;
// address write request
// write request address
device_->s_axi_ctrl_awvalid = 0;
device_->s_axi_ctrl_awaddr = 0;
// data write request
// write request data
device_->s_axi_ctrl_wvalid = 0;
device_->s_axi_ctrl_wdata = 0;
device_->s_axi_ctrl_wstrb = 0;
// data write response
// write response
device_->s_axi_ctrl_bready = 0;
}
void axi_mem_bus_reset() {
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
// address read request
*m_axi_mem_[i].arready = 0;
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
// read request address
*m_axi_mem_[b].arready = 0;
// address write request
*m_axi_mem_[i].awready = 0;
// write request address
*m_axi_mem_[b].awready = 0;
// data write request
*m_axi_mem_[i].wready = 0;
// write request data
*m_axi_mem_[b].wready = 0;
// data read response
*m_axi_mem_[i].rvalid = 0;
// read response
*m_axi_mem_[b].rvalid = 0;
// data write response
*m_axi_mem_[i].bvalid = 0;
// write response
*m_axi_mem_[b].bvalid = 0;
// states
m_axi_states_[i].write_req_pending = false;
m_axi_states_[b].write_req_addr_ack = false;
m_axi_states_[b].write_req_data_ack = false;
}
}
void axi_mem_bus_eval() {
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
// handle read responses
if (*m_axi_mem_[i].rvalid && *m_axi_mem_[i].rready) {
*m_axi_mem_[i].rvalid = 0;
void axi_mem_bus_eval(bool clk) {
if (!clk) {
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
m_axi_states_[b].read_rsp_ready = *m_axi_mem_[b].rready;
m_axi_states_[b].write_rsp_ready = *m_axi_mem_[b].bready;
}
if (!*m_axi_mem_[i].rvalid) {
if (!pending_mem_reqs_[i].empty()
&& (*pending_mem_reqs_[i].begin())->ready
&& !(*pending_mem_reqs_[i].begin())->write) {
auto mem_rsp_it = pending_mem_reqs_[i].begin();
return;
}
for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) {
// handle read responses
if (*m_axi_mem_[b].rvalid && m_axi_states_[b].read_rsp_ready) {
*m_axi_mem_[b].rvalid = 0;
}
if (!*m_axi_mem_[b].rvalid) {
if (!pending_mem_reqs_[b].empty()
&& (*pending_mem_reqs_[b].begin())->ready
&& !(*pending_mem_reqs_[b].begin())->write) {
auto mem_rsp_it = pending_mem_reqs_[b].begin();
auto mem_rsp = *mem_rsp_it;
*m_axi_mem_[i].rvalid = 1;
*m_axi_mem_[i].rid = mem_rsp->tag;
*m_axi_mem_[i].rresp = 0;
*m_axi_mem_[i].rlast = 1;
memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE);
pending_mem_reqs_[i].erase(mem_rsp_it);
*m_axi_mem_[b].rvalid = 1;
*m_axi_mem_[b].rid = mem_rsp->tag;
*m_axi_mem_[b].rresp = 0;
*m_axi_mem_[b].rlast = 1;
memcpy(m_axi_mem_[b].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE);
pending_mem_reqs_[b].erase(mem_rsp_it);
delete mem_rsp;
}
}
// handle write responses
if (*m_axi_mem_[i].bvalid && *m_axi_mem_[i].bready) {
*m_axi_mem_[i].bvalid = 0;
if (*m_axi_mem_[b].bvalid && m_axi_states_[b].write_rsp_ready) {
*m_axi_mem_[b].bvalid = 0;
}
if (!*m_axi_mem_[i].bvalid) {
if (!pending_mem_reqs_[i].empty()
&& (*pending_mem_reqs_[i].begin())->ready
&& (*pending_mem_reqs_[i].begin())->write) {
auto mem_rsp_it = pending_mem_reqs_[i].begin();
if (!*m_axi_mem_[b].bvalid) {
if (!pending_mem_reqs_[b].empty()
&& (*pending_mem_reqs_[b].begin())->ready
&& (*pending_mem_reqs_[b].begin())->write) {
auto mem_rsp_it = pending_mem_reqs_[b].begin();
auto mem_rsp = *mem_rsp_it;
*m_axi_mem_[i].bvalid = 1;
*m_axi_mem_[i].bid = mem_rsp->tag;
*m_axi_mem_[i].bresp = 0;
pending_mem_reqs_[i].erase(mem_rsp_it);
*m_axi_mem_[b].bvalid = 1;
*m_axi_mem_[b].bid = mem_rsp->tag;
*m_axi_mem_[b].bresp = 0;
pending_mem_reqs_[b].erase(mem_rsp_it);
delete mem_rsp;
}
}
// handle read requests
if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) {
if (*m_axi_mem_[b].arvalid && *m_axi_mem_[b].arready) {
auto mem_req = new mem_req_t();
mem_req->tag = *m_axi_mem_[i].arid;
mem_req->addr = uint64_t(*m_axi_mem_[i].araddr);
mem_req->tag = *m_axi_mem_[b].arid;
mem_req->addr = uint64_t(*m_axi_mem_[b].araddr);
ram_->read(mem_req->data.data(), mem_req->addr, PLATFORM_MEMORY_DATA_SIZE);
mem_req->write = false;
mem_req->ready = false;
pending_mem_reqs_[i].emplace_back(mem_req);
pending_mem_reqs_[b].emplace_back(mem_req);
/*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, mem_req->tag);
/*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, b, mem_req->addr, mem_req->tag);
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
printf("%02x", mem_req->data[i]);
printf("%02x", mem_req->data[b]);
}
printf("\n");*/
// send dram request
dram_queues_[i].push(mem_req);
dram_queues_[b].push(mem_req);
}
if (*m_axi_mem_[i].wready && !m_axi_states_[i].write_req_pending) {
*m_axi_mem_[i].wready = 0;
// handle write address requests
if (*m_axi_mem_[b].awvalid && *m_axi_mem_[b].awready && !m_axi_states_[b].write_req_addr_ack) {
m_axi_states_[b].write_req_addr = *m_axi_mem_[b].awaddr;
m_axi_states_[b].write_req_tag = *m_axi_mem_[b].awid;
m_axi_states_[b].write_req_addr_ack = true;
}
// handle address write requestsls
if (*m_axi_mem_[i].awvalid && *m_axi_mem_[i].awready && !*m_axi_mem_[i].wready) {
m_axi_states_[i].write_req_addr = *m_axi_mem_[i].awaddr;
m_axi_states_[i].write_req_tag = *m_axi_mem_[i].awid;
// activate data channel
*m_axi_mem_[i].wready = 1;
m_axi_states_[i].write_req_pending = !*m_axi_mem_[i].wvalid;
// handle write data requests
if (*m_axi_mem_[b].wvalid && *m_axi_mem_[b].wready && !m_axi_states_[b].write_req_data_ack) {
m_axi_states_[b].write_req_byteen = *m_axi_mem_[b].wstrb;
auto data = (const uint8_t*)m_axi_mem_[b].wdata->data();
for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; ++i) {
m_axi_states_[b].write_req_data[i] = data[i];
}
m_axi_states_[b].write_req_data_ack = true;
}
// handle data write requests
if (*m_axi_mem_[i].wvalid && *m_axi_mem_[i].wready) {
auto byteen = *m_axi_mem_[i].wstrb;
auto data = (uint8_t*)m_axi_mem_[i].wdata->data();
auto byte_addr = m_axi_states_[i].write_req_addr;
for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) {
// handle write requests
if (m_axi_states_[b].write_req_addr_ack && m_axi_states_[b].write_req_data_ack) {
auto byteen = m_axi_states_[b].write_req_byteen;
auto byte_addr = m_axi_states_[b].write_req_addr;
for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; ++i) {
if ((byteen >> i) & 0x1) {
(*ram_)[byte_addr + i] = data[i];
(*ram_)[byte_addr + i] = m_axi_states_[b].write_req_data[i];
}
}
auto mem_req = new mem_req_t();
mem_req->tag = m_axi_states_[i].write_req_tag;
mem_req->tag = m_axi_states_[b].write_req_tag;
mem_req->addr = byte_addr;
mem_req->write = true;
mem_req->ready = false;
pending_mem_reqs_[i].emplace_back(mem_req);
pending_mem_reqs_[b].emplace_back(mem_req);
/*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, byteen, mem_req->tag);
/*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, b, mem_req->addr, byteen, mem_req->tag);
for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) {
printf("%02x", data[i]);
printf("%02x", m_axi_states_[b].write_req_data[i]]);
}
printf("\n");*/
// send dram request
dram_queues_[i].push(mem_req);
dram_queues_[b].push(mem_req);
// deactivate data channel
if (m_axi_states_[i].write_req_pending) {
*m_axi_mem_[i].wready = 0;
m_axi_states_[i].write_req_pending = false;
}
// clear acks
m_axi_states_[b].write_req_addr_ack = false;
m_axi_states_[b].write_req_data_ack = false;
}
}
}
typedef struct {
std::array<uint8_t, PLATFORM_MEMORY_DATA_SIZE> write_req_data;
uint64_t write_req_byteen;
uint64_t write_req_addr;
uint32_t write_req_tag;
bool write_req_pending;
bool read_rsp_ready;
bool write_rsp_ready;
bool write_req_addr_ack;
bool write_req_data_ack;
} m_axi_state_t;
typedef struct {

View file

@ -141,7 +141,7 @@ public:
for (uint32_t i = 0; i < n; ++i) {
auto ref = a[i] + b[i];
if (c[i] != ref) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -171,7 +171,7 @@ public:
for (uint32_t i = 0; i < n; ++i) {
auto ref = a[i] * b[i];
if (c[i] != ref) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -201,7 +201,7 @@ public:
for (uint32_t i = 0; i < n; ++i) {
auto ref = a[i] / b[i];
if (c[i] != ref) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -233,7 +233,7 @@ public:
auto y = a[i] * b[i];
auto ref = x + y;
if (c[i] != ref) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -263,7 +263,7 @@ public:
for (uint32_t i = 0; i < n; ++i) {
auto ref = a[i] + b[i];
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -293,7 +293,7 @@ public:
for (uint32_t i = 0; i < n; ++i) {
auto ref = a[i] - b[i];
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -323,7 +323,7 @@ public:
for (uint32_t i = 0; i < n; ++i) {
auto ref = a[i] * b[i];
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -353,7 +353,7 @@ public:
for (uint32_t i = 0; i < n; ++i) {
auto ref = a[i] * b[i] + b[i];
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -383,7 +383,7 @@ public:
for (uint32_t i = 0; i < n; ++i) {
auto ref = a[i] * b[i] - b[i];
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -413,7 +413,7 @@ public:
for (uint32_t i = 0; i < n; ++i) {
auto ref = -a[i] * b[i] - b[i];
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -443,7 +443,7 @@ public:
for (uint32_t i = 0; i < n; ++i) {
auto ref = -a[i] * b[i] + b[i];
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -475,7 +475,7 @@ public:
auto y = a[i] * b[i] + b[i];
auto ref = x + y;
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -505,7 +505,7 @@ public:
for (uint32_t i = 0; i < n; ++i) {
auto ref = a[i] / b[i];
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -537,7 +537,7 @@ public:
auto y = b[i] / a[i];
auto ref = x + y;
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -568,7 +568,7 @@ public:
for (uint32_t i = 0; i < n; ++i) {
auto ref = sqrt(a[i] * b[i]);
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -600,7 +600,7 @@ public:
auto x = a[i] + b[i];
auto ref = (int32_t)x;
if (c[i] != ref) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -632,7 +632,7 @@ public:
auto x = a[i] + b[i];
auto ref = (uint32_t)x;
if (c[i] != ref) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -663,7 +663,7 @@ public:
auto x = a[i] + b[i];
auto ref = (float)x;
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -694,7 +694,7 @@ public:
auto x = a[i] + b[i];
auto ref = (float)x;
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -724,7 +724,7 @@ public:
for (uint32_t i = 0; i < n; ++i) {
auto ref = fmin(fmax(1.0f, a[i]), b[i]);
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -754,7 +754,7 @@ public:
for (uint32_t i = 0; i < n; ++i) {
auto ref = std::min(std::max(1, a[i]), b[i]);
if (c[i] != ref) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -787,7 +787,7 @@ public:
ref = sinf(ref);
}
if (!almost_equal(c[i], ref)) {
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
++errors;
}
}
@ -820,7 +820,7 @@ public:
for (uint32_t i = 0; i < n; ++i) {
uint32_t ref = a[i] + 1;
if (c[i] != ref) {
std::cout << "error at result #" << i << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl;
++errors;
}
}
@ -857,7 +857,7 @@ public:
for (uint32_t i = 0; i < n; ++i) {
uint32_t ref = a[i] + 1;
if (c[i] != ref) {
std::cout << "error at result #" << i << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl;
std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl;
++errors;
}
}