code refactoring for Vivado, sv2v, and yosys compatibility

This commit is contained in:
Blaise Tine 2021-09-27 08:55:10 -04:00
parent 9b04f3d9d6
commit 9f34b2944c
97 changed files with 1435 additions and 666 deletions

View file

@ -85,8 +85,8 @@ CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_add
# test cache multi-porting
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo --debug --args="-n1"
CONFIGS="-DL2NUM_PORTS=2 -DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr
CONFIGS="-DL2NUM_PORTS=4 -DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=io_addr
CONFIGS="-DL2_NUM_PORTS=2 -DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr
CONFIGS="-DL2_NUM_PORTS=4 -DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=io_addr
# test 128-bit MEM block
CONFIGS=-DMEM_BLOCK_SIZE=16 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo

View file

@ -36,8 +36,8 @@ DPI_DIR=../../../hw/dpi
SRCS = fpga.cpp opae_sim.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
RTL_INCLUDE += -I$(RTL_DIR)/afu -I$(RTL_DIR)/afu/ccip
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic

View file

@ -41,8 +41,8 @@ DPI_DIR = ../../hw/dpi
SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO

View file

@ -3,15 +3,15 @@
module VX_alu_unit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
input wire clk,
input wire reset,
// Inputs
VX_alu_req_if alu_req_if,
VX_alu_req_if.slave alu_req_if,
// Outputs
VX_branch_ctl_if branch_ctl_if,
VX_commit_if alu_commit_if
VX_branch_ctl_if.master branch_ctl_if,
VX_commit_if.master alu_commit_if
);
`UNUSED_PARAM (CORE_ID)

View file

@ -12,16 +12,16 @@ module VX_cluster #(
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [`L2MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
output wire [`L2MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`L2MEM_DATA_WIDTH-1:0] mem_req_data,
output wire [`L2MEM_TAG_WIDTH-1:0] mem_req_tag,
output wire [`L2_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
output wire [`L2_MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`L2_MEM_DATA_WIDTH-1:0] mem_req_data,
output wire [`L2_MEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
// Memory response
input wire mem_rsp_valid,
input wire [`L2MEM_DATA_WIDTH-1:0] mem_rsp_data,
input wire [`L2MEM_TAG_WIDTH-1:0] mem_rsp_tag,
input wire [`L2_MEM_DATA_WIDTH-1:0] mem_rsp_data,
input wire [`L2_MEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready,
// Status
@ -31,14 +31,14 @@ module VX_cluster #(
wire [`NUM_CORES-1:0] per_core_mem_req_valid;
wire [`NUM_CORES-1:0] per_core_mem_req_rw;
wire [`NUM_CORES-1:0][`DMEM_BYTEEN_WIDTH-1:0] per_core_mem_req_byteen;
wire [`NUM_CORES-1:0][`DMEM_ADDR_WIDTH-1:0] per_core_mem_req_addr;
wire [`NUM_CORES-1:0][`DMEM_DATA_WIDTH-1:0] per_core_mem_req_data;
wire [`NUM_CORES-1:0][`DCACHE_MEM_BYTEEN_WIDTH-1:0] per_core_mem_req_byteen;
wire [`NUM_CORES-1:0][`DCACHE_MEM_ADDR_WIDTH-1:0] per_core_mem_req_addr;
wire [`NUM_CORES-1:0][`DCACHE_MEM_DATA_WIDTH-1:0] per_core_mem_req_data;
wire [`NUM_CORES-1:0][`XMEM_TAG_WIDTH-1:0] per_core_mem_req_tag;
wire [`NUM_CORES-1:0] per_core_mem_req_ready;
wire [`NUM_CORES-1:0] per_core_mem_rsp_valid;
wire [`NUM_CORES-1:0][`DMEM_DATA_WIDTH-1:0] per_core_mem_rsp_data;
wire [`NUM_CORES-1:0][`DCACHE_MEM_DATA_WIDTH-1:0] per_core_mem_rsp_data;
wire [`NUM_CORES-1:0][`XMEM_TAG_WIDTH-1:0] per_core_mem_rsp_tag;
wire [`NUM_CORES-1:0] per_core_mem_rsp_ready;
@ -83,22 +83,22 @@ module VX_cluster #(
`RESET_RELAY (l2_reset);
VX_cache #(
.CACHE_ID (`L2CACHE_ID),
.CACHE_SIZE (`L2CACHE_SIZE),
.CACHE_LINE_SIZE (`L2CACHE_LINE_SIZE),
.NUM_BANKS (`L2NUM_BANKS),
.NUM_PORTS (`L2NUM_PORTS),
.WORD_SIZE (`L2WORD_SIZE),
.NUM_REQS (`L2NUM_REQS),
.CREQ_SIZE (`L2CREQ_SIZE),
.CRSQ_SIZE (`L2CRSQ_SIZE),
.MSHR_SIZE (`L2MSHR_SIZE),
.MRSQ_SIZE (`L2MRSQ_SIZE),
.MREQ_SIZE (`L2MREQ_SIZE),
.CACHE_ID (`L2_CACHE_ID),
.CACHE_SIZE (`L2_CACHE_SIZE),
.CACHE_LINE_SIZE (`L2_CACHE_LINE_SIZE),
.NUM_BANKS (`L2_NUM_BANKS),
.NUM_PORTS (`L2_NUM_PORTS),
.WORD_SIZE (`L2_WORD_SIZE),
.NUM_REQS (`L2_NUM_REQS),
.CREQ_SIZE (`L2_CREQ_SIZE),
.CRSQ_SIZE (`L2_CRSQ_SIZE),
.MSHR_SIZE (`L2_MSHR_SIZE),
.MRSQ_SIZE (`L2_MRSQ_SIZE),
.MREQ_SIZE (`L2_MREQ_SIZE),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`XMEM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.MEM_TAG_WIDTH (`L2MEM_TAG_WIDTH),
.MEM_TAG_WIDTH (`L2_MEM_TAG_WIDTH),
.NC_ENABLE (1)
) l2cache (
`SCOPE_BIND_VX_cluster_l2cache
@ -148,8 +148,8 @@ module VX_cluster #(
VX_mem_arb #(
.NUM_REQS (`NUM_CORES),
.DATA_WIDTH (`DMEM_DATA_WIDTH),
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
.TAG_IN_WIDTH (`XMEM_TAG_WIDTH),
.TYPE ("R"),
.TAG_SEL_IDX (1), // Skip 0 for NC flag

View file

@ -3,22 +3,22 @@
module VX_commit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
input wire clk,
input wire reset,
// inputs
VX_commit_if alu_commit_if,
VX_commit_if ld_commit_if,
VX_commit_if st_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if.slave alu_commit_if,
VX_commit_if.slave ld_commit_if,
VX_commit_if.slave st_commit_if,
VX_commit_if.slave csr_commit_if,
`ifdef EXT_F_ENABLE
VX_commit_if fpu_commit_if,
VX_commit_if.slave fpu_commit_if,
`endif
VX_commit_if gpu_commit_if,
VX_commit_if.slave gpu_commit_if,
// outputs
VX_writeback_if writeback_if,
VX_cmt_to_csr_if cmt_to_csr_if
VX_writeback_if.master writeback_if,
VX_cmt_to_csr_if.master cmt_to_csr_if
);
// CSRs update
@ -50,6 +50,9 @@ module VX_commit #(
`endif
/*gpu_commit_fire ?*/ gpu_commit_if.tmask;
wire [$clog2(`NUM_THREADS+1)-1:0] commit_cnt;
`POP_COUNT(commit_cnt, commit_tmask);
VX_pipe_register #(
.DATAW (1 + $clog2(`NUM_THREADS+1)),
.RESETW (1)
@ -57,7 +60,7 @@ module VX_commit #(
.clk (clk),
.reset (reset),
.enable (1'b1),
.data_in ({commit_fire, $countones(commit_tmask)}),
.data_in ({commit_fire, commit_cnt}),
.data_out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size})
);

View file

@ -255,28 +255,28 @@
`endif
// Core Request Queue Size
`ifndef ICREQ_SIZE
`define ICREQ_SIZE 0
`ifndef ICACHE_CREQ_SIZE
`define ICACHE_CREQ_SIZE 0
`endif
// Core Response Queue Size
`ifndef ICRSQ_SIZE
`define ICRSQ_SIZE 2
`ifndef ICACHE_CRSQ_SIZE
`define ICACHE_CRSQ_SIZE 2
`endif
// Miss Handling Register Size
`ifndef IMSHR_SIZE
`define IMSHR_SIZE `NUM_WARPS
`ifndef ICACHE_MSHR_SIZE
`define ICACHE_MSHR_SIZE `NUM_WARPS
`endif
// Memory Request Queue Size
`ifndef IMREQ_SIZE
`define IMREQ_SIZE 4
`ifndef ICACHE_MREQ_SIZE
`define ICACHE_MREQ_SIZE 4
`endif
// Memory Response Queue Size
`ifndef IMRSQ_SIZE
`define IMRSQ_SIZE 0
`ifndef ICACHE_MRSQ_SIZE
`define ICACHE_MRSQ_SIZE 0
`endif
// Dcache Configurable Knobs //////////////////////////////////////////////////
@ -287,38 +287,38 @@
`endif
// Number of banks
`ifndef DNUM_BANKS
`define DNUM_BANKS `NUM_THREADS
`ifndef DCACHE_NUM_BANKS
`define DCACHE_NUM_BANKS `NUM_THREADS
`endif
// Number of ports per bank
`ifndef DNUM_PORTS
`define DNUM_PORTS 1
`ifndef DCACHE_NUM_PORTS
`define DCACHE_NUM_PORTS 1
`endif
// Core Request Queue Size
`ifndef DCREQ_SIZE
`define DCREQ_SIZE 0
`ifndef DCACHE_CREQ_SIZE
`define DCACHE_CREQ_SIZE 0
`endif
// Core Response Queue Size
`ifndef DCRSQ_SIZE
`define DCRSQ_SIZE 2
`ifndef DCACHE_CRSQ_SIZE
`define DCACHE_CRSQ_SIZE 2
`endif
// Miss Handling Register Size
`ifndef DMSHR_SIZE
`define DMSHR_SIZE `LSUQ_SIZE
`ifndef DCACHE_MSHR_SIZE
`define DCACHE_MSHR_SIZE `LSUQ_SIZE
`endif
// Memory Request Queue Size
`ifndef DMREQ_SIZE
`define DMREQ_SIZE 4
`ifndef DCACHE_MREQ_SIZE
`define DCACHE_MREQ_SIZE 4
`endif
// Memory Response Queue Size
`ifndef DMRSQ_SIZE
`define DMRSQ_SIZE 0
`ifndef DCACHE_MRSQ_SIZE
`define DCACHE_MRSQ_SIZE 0
`endif
// SM Configurable Knobs //////////////////////////////////////////////////////
@ -335,102 +335,102 @@
`endif
// Number of banks
`ifndef SNUM_BANKS
`define SNUM_BANKS `NUM_THREADS
`ifndef SMEM_NUM_BANKS
`define SMEM_NUM_BANKS `NUM_THREADS
`endif
// Core Request Queue Size
`ifndef SCREQ_SIZE
`define SCREQ_SIZE 2
`ifndef SMEM_CREQ_SIZE
`define SMEM_CREQ_SIZE 2
`endif
// Core Response Queue Size
`ifndef SCRSQ_SIZE
`define SCRSQ_SIZE 2
`ifndef SMEM_CRSQ_SIZE
`define SMEM_CRSQ_SIZE 2
`endif
// L2cache Configurable Knobs /////////////////////////////////////////////////
// Size of cache in bytes
`ifndef L2CACHE_SIZE
`define L2CACHE_SIZE 131072
`ifndef L2_CACHE_SIZE
`define L2_CACHE_SIZE 131072
`endif
// Number of banks
`ifndef L2NUM_BANKS
`define L2NUM_BANKS `MIN(`NUM_CORES, 4)
`ifndef L2_NUM_BANKS
`define L2_NUM_BANKS `MIN(`NUM_CORES, 4)
`endif
// Number of ports per bank
`ifndef L2NUM_PORTS
`define L2NUM_PORTS 1
`ifndef L2_NUM_PORTS
`define L2_NUM_PORTS 1
`endif
// Core Request Queue Size
`ifndef L2CREQ_SIZE
`define L2CREQ_SIZE 0
`ifndef L2_CREQ_SIZE
`define L2_CREQ_SIZE 0
`endif
// Core Response Queue Size
`ifndef L2CRSQ_SIZE
`define L2CRSQ_SIZE 2
`ifndef L2_CRSQ_SIZE
`define L2_CRSQ_SIZE 2
`endif
// Miss Handling Register Size
`ifndef L2MSHR_SIZE
`define L2MSHR_SIZE 16
`ifndef L2_MSHR_SIZE
`define L2_MSHR_SIZE 16
`endif
// Memory Request Queue Size
`ifndef L2MREQ_SIZE
`define L2MREQ_SIZE 4
`ifndef L2_MREQ_SIZE
`define L2_MREQ_SIZE 4
`endif
// Memory Response Queue Size
`ifndef L2MRSQ_SIZE
`define L2MRSQ_SIZE 0
`ifndef L2_MRSQ_SIZE
`define L2_MRSQ_SIZE 0
`endif
// L3cache Configurable Knobs /////////////////////////////////////////////////
// Size of cache in bytes
`ifndef L3CACHE_SIZE
`define L3CACHE_SIZE 1048576
`ifndef L3_CACHE_SIZE
`define L3_CACHE_SIZE 1048576
`endif
// Number of banks
`ifndef L3NUM_BANKS
`define L3NUM_BANKS `MIN(`NUM_CLUSTERS, 4)
`ifndef L3_NUM_BANKS
`define L3_NUM_BANKS `MIN(`NUM_CLUSTERS, 4)
`endif
// Number of ports per bank
`ifndef L3NUM_PORTS
`define L3NUM_PORTS 1
`ifndef L3_NUM_PORTS
`define L3_NUM_PORTS 1
`endif
// Core Request Queue Size
`ifndef L3CREQ_SIZE
`define L3CREQ_SIZE 0
`ifndef L3_CREQ_SIZE
`define L3_CREQ_SIZE 0
`endif
// Core Response Queue Size
`ifndef L3CRSQ_SIZE
`define L3CRSQ_SIZE 2
`ifndef L3_CRSQ_SIZE
`define L3_CRSQ_SIZE 2
`endif
// Miss Handling Register Size
`ifndef L3MSHR_SIZE
`define L3MSHR_SIZE 16
`ifndef L3_MSHR_SIZE
`define L3_MSHR_SIZE 16
`endif
// Memory Request Queue Size
`ifndef L3MREQ_SIZE
`define L3MREQ_SIZE 4
`ifndef L3_MREQ_SIZE
`define L3_MREQ_SIZE 4
`endif
// Memory Response Queue Size
`ifndef L3MRSQ_SIZE
`define L3MRSQ_SIZE 0
`ifndef L3_MRSQ_SIZE
`define L3_MRSQ_SIZE 0
`endif
`endif

View file

@ -12,15 +12,15 @@ module VX_core #(
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [`DMEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
output wire [`DMEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`DMEM_DATA_WIDTH-1:0] mem_req_data,
output wire [`DCACHE_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
output wire [`DCACHE_MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`DCACHE_MEM_DATA_WIDTH-1:0] mem_req_data,
output wire [`XMEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
// Memory reponse
input wire mem_rsp_valid,
input wire [`DMEM_DATA_WIDTH-1:0] mem_rsp_data,
input wire [`DCACHE_MEM_DATA_WIDTH-1:0] mem_rsp_data,
input wire [`XMEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready,
@ -32,13 +32,13 @@ module VX_core #(
`endif
VX_mem_req_if #(
.DATA_WIDTH (`DMEM_DATA_WIDTH),
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
.TAG_WIDTH (`XMEM_TAG_WIDTH)
) mem_req_if();
VX_mem_rsp_if #(
.DATA_WIDTH (`DMEM_DATA_WIDTH),
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
.TAG_WIDTH (`XMEM_TAG_WIDTH)
) mem_rsp_if();
@ -58,25 +58,25 @@ module VX_core #(
//--
VX_dcache_req_if #(
.NUM_REQS (`DNUM_REQS),
.WORD_SIZE (`DWORD_SIZE),
.TAG_WIDTH (`DCORE_TAG_WIDTH)
.NUM_REQS (`DCACHE_NUM_REQS),
.WORD_SIZE (`DCACHE_WORD_SIZE),
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
) dcache_req_if();
VX_dcache_rsp_if #(
.NUM_REQS (`DNUM_REQS),
.WORD_SIZE (`DWORD_SIZE),
.TAG_WIDTH (`DCORE_TAG_WIDTH)
.NUM_REQS (`DCACHE_NUM_REQS),
.WORD_SIZE (`DCACHE_WORD_SIZE),
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
) dcache_rsp_if();
VX_icache_req_if #(
.WORD_SIZE (`IWORD_SIZE),
.TAG_WIDTH (`ICORE_TAG_WIDTH)
.WORD_SIZE (`ICACHE_WORD_SIZE),
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
) icache_req_if();
VX_icache_rsp_if #(
.WORD_SIZE (`IWORD_SIZE),
.TAG_WIDTH (`ICORE_TAG_WIDTH)
.WORD_SIZE (`ICACHE_WORD_SIZE),
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
) icache_rsp_if();
VX_pipeline #(

View file

@ -7,15 +7,15 @@ module VX_csr_data #(
input wire reset,
`ifdef PERF_ENABLE
VX_perf_memsys_if perf_memsys_if,
VX_perf_pipeline_if perf_pipeline_if,
VX_perf_memsys_if.slave perf_memsys_if,
VX_perf_pipeline_if.slave perf_pipeline_if,
`endif
VX_cmt_to_csr_if cmt_to_csr_if,
VX_fetch_to_csr_if fetch_to_csr_if,
VX_cmt_to_csr_if.slave cmt_to_csr_if,
VX_fetch_to_csr_if.slave fetch_to_csr_if,
`ifdef EXT_F_ENABLE
VX_fpu_to_csr_if fpu_to_csr_if,
VX_fpu_to_csr_if.slave fpu_to_csr_if,
`endif
input wire read_enable,
@ -44,19 +44,16 @@ module VX_csr_data #(
reg [`NUM_WARPS-1:0][`INST_FRM_BITS+`FFLAGS_BITS-1:0] fcsr;
always @(posedge clk) begin
always @(posedge clk) begin
`ifdef EXT_F_ENABLE
if (reset) begin
fcsr <= '0;
end
end
if (fpu_to_csr_if.write_enable) begin
fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0]
| fpu_to_csr_if.write_fflags;
end
`endif
if (write_enable) begin
case (write_addr)
`CSR_FFLAGS: fcsr[write_wid][`FFLAGS_BITS-1:0] <= write_data[`FFLAGS_BITS-1:0];
@ -77,7 +74,7 @@ module VX_csr_data #(
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data;
default: begin
assert(~write_enable) else $error("%t: invalid CSR write address: %0h", $time, write_addr);
`ASSERT(~write_enable, ("%t: invalid CSR write address: %0h", $time, write_addr));
end
endcase
end

View file

@ -3,26 +3,26 @@
module VX_csr_unit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
VX_perf_memsys_if perf_memsys_if,
VX_perf_pipeline_if perf_pipeline_if,
VX_perf_memsys_if.slave perf_memsys_if,
VX_perf_pipeline_if.slave perf_pipeline_if,
`endif
VX_cmt_to_csr_if cmt_to_csr_if,
VX_fetch_to_csr_if fetch_to_csr_if,
VX_csr_req_if csr_req_if,
VX_commit_if csr_commit_if,
VX_cmt_to_csr_if.slave cmt_to_csr_if,
VX_fetch_to_csr_if.slave fetch_to_csr_if,
VX_csr_req_if.slave csr_req_if,
VX_commit_if.master csr_commit_if,
`ifdef EXT_F_ENABLE
VX_fpu_to_csr_if fpu_to_csr_if,
input wire[`NUM_WARPS-1:0] fpu_pending,
VX_fpu_to_csr_if.slave fpu_to_csr_if,
input wire[`NUM_WARPS-1:0] fpu_pending,
`endif
output wire[`NUM_WARPS-1:0] pending,
input wire busy
input wire busy
);
wire csr_we_s1;
wire [`CSR_ADDR_BITS-1:0] csr_addr_s1;

View file

@ -19,12 +19,12 @@ module VX_decode #(
input wire reset,
// inputs
VX_ifetch_rsp_if ifetch_rsp_if,
VX_ifetch_rsp_if.slave ifetch_rsp_if,
// outputs
VX_decode_if decode_if,
VX_wstall_if wstall_if,
VX_join_if join_if
VX_decode_if.master decode_if,
VX_wstall_if.master wstall_if,
VX_join_if.master join_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (clk)

View file

@ -238,45 +238,33 @@
`endif
// non-cacheable address bit
`define NC_ADDR_BITS 1
`define NC_FLAG_BITS 1
////////////////////////// Icache Configurable Knobs //////////////////////////
// Cache ID
`define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
// Number of banks
`define INUM_BANKS 1
// Word size in bytes
`define IWORD_SIZE 4
`define ICACHE_WORD_SIZE 4
// Block size in bytes
`define ICACHE_LINE_SIZE `L1_BLOCK_SIZE
// Core request address bits
`define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE))
// Core request byte enable bits
`define ICORE_BYTEEN_WIDTH `DWORD_SIZE
// TAG sharing enable
`define ICORE_TAG_ID_BITS `NW_BITS
`define ICACHE_CORE_TAG_ID_BITS `NW_BITS
// Core request tag bits
`define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
`define ICACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICACHE_CORE_TAG_ID_BITS)
// Memory request data bits
`define IMEM_DATA_WIDTH (`ICACHE_LINE_SIZE * 8)
`define ICACHE_MEM_DATA_WIDTH (`ICACHE_LINE_SIZE * 8)
// Memory request address bits
`define IMEM_ADDR_WIDTH (32 - `CLOG2(`ICACHE_LINE_SIZE))
// Memory byte enable bits
`define IMEM_BYTEEN_WIDTH `ICACHE_LINE_SIZE
`define ICACHE_MEM_ADDR_WIDTH (32 - `CLOG2(`ICACHE_LINE_SIZE))
// Memory request tag bits
`define IMEM_TAG_WIDTH (`IMEM_ADDR_WIDTH + `CLOG2(`IMSHR_SIZE))
`define ICACHE_MEM_TAG_WIDTH `CLOG2(`ICACHE_MSHR_SIZE)
////////////////////////// Dcache Configurable Knobs //////////////////////////
@ -284,129 +272,126 @@
`define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
// Word size in bytes
`define DWORD_SIZE 4
`define DCACHE_WORD_SIZE 4
// Block size in bytes
`define DCACHE_LINE_SIZE `L1_BLOCK_SIZE
// Core request address bits
`define DCORE_ADDR_WIDTH (32-`CLOG2(`DWORD_SIZE))
// TAG sharing enable
`define LSUQ_ADDR_BITS `LOG2UP(`LSUQ_SIZE)
`define DCORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_ADDR_BITS + `SM_ENABLE)
`define DCACHE_CORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_FLAG_BITS + `SM_ENABLE)
// Input request tag bits
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
`define DCACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCACHE_CORE_TAG_ID_BITS)
// Memory request data bits
`define DMEM_DATA_WIDTH (`DCACHE_LINE_SIZE * 8)
`define DCACHE_MEM_DATA_WIDTH (`DCACHE_LINE_SIZE * 8)
// Memory request address bits
`define DMEM_ADDR_WIDTH (32 - `CLOG2(`DCACHE_LINE_SIZE))
`define DCACHE_MEM_ADDR_WIDTH (32 - `CLOG2(`DCACHE_LINE_SIZE))
// Memory byte enable bits
`define DMEM_BYTEEN_WIDTH `DCACHE_LINE_SIZE
`define DCACHE_MEM_BYTEEN_WIDTH `DCACHE_LINE_SIZE
// Input request size
`define DNUM_REQS `NUM_THREADS
`define DCACHE_NUM_REQS `NUM_THREADS
// Memory request tag bits
`define _DMEM_ADDR_RATIO_W $clog2(`DCACHE_LINE_SIZE / `DWORD_SIZE)
`define _DNC_MEM_TAG_WIDTH ($clog2(`DNUM_REQS) + `_DMEM_ADDR_RATIO_W + `DCORE_TAG_WIDTH)
`define DMEM_TAG_WIDTH `MAX((`DMEM_ADDR_WIDTH + `CLOG2(`DMSHR_SIZE) + `NC_ADDR_BITS), `_DNC_MEM_TAG_WIDTH)
`define _DMEM_ADDR_RATIO_W $clog2(`DCACHE_LINE_SIZE / `DCACHE_WORD_SIZE)
`define _DNC_MEM_TAG_WIDTH ($clog2(`DCACHE_NUM_REQS) + `_DMEM_ADDR_RATIO_W + `DCACHE_CORE_TAG_WIDTH)
`define DCACHE_MEM_TAG_WIDTH `MAX((`CLOG2(`DCACHE_NUM_BANKS) + `CLOG2(`DCACHE_MSHR_SIZE) + `NC_FLAG_BITS), `_DNC_MEM_TAG_WIDTH)
////////////////////////// SM Configurable Knobs //////////////////////////////
// Cache ID
`define SCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2)
`define SMEM_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2)
// Word size in bytes
`define SWORD_SIZE 4
`define SMEM_WORD_SIZE 4
// bank address offset
`define SBANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SWORD_SIZE)
`define SMEM_BANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SMEM_WORD_SIZE)
// Input request size
`define SNUM_REQS `NUM_THREADS
`define SMEM_NUM_REQS `NUM_THREADS
////////////////////////// L2cache Configurable Knobs /////////////////////////
// Cache ID
`define L2CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID)
`define L2_CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID)
// Word size in bytes
`define L2WORD_SIZE `DCACHE_LINE_SIZE
`define L2_WORD_SIZE `DCACHE_LINE_SIZE
// Block size in bytes
`define L2CACHE_LINE_SIZE (`L2_ENABLE ? `MEM_BLOCK_SIZE : `L2WORD_SIZE)
`define L2_CACHE_LINE_SIZE ((`L2_ENABLE) ? `MEM_BLOCK_SIZE : `L2_WORD_SIZE)
// Input request tag bits
`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
`define L2_CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
// Memory request data bits
`define L2MEM_DATA_WIDTH (`L2CACHE_LINE_SIZE * 8)
`define L2_MEM_DATA_WIDTH (`L2_CACHE_LINE_SIZE * 8)
// Memory request address bits
`define L2MEM_ADDR_WIDTH (32 - `CLOG2(`L2CACHE_LINE_SIZE))
`define L2_MEM_ADDR_WIDTH (32 - `CLOG2(`L2_CACHE_LINE_SIZE))
// Memory byte enable bits
`define L2MEM_BYTEEN_WIDTH `L2CACHE_LINE_SIZE
`define L2_MEM_BYTEEN_WIDTH `L2_CACHE_LINE_SIZE
// Input request size
`define L2NUM_REQS `NUM_CORES
`define L2_NUM_REQS `NUM_CORES
// Memory request tag bits
`define _L2MEM_ADDR_RATIO_W $clog2(`L2CACHE_LINE_SIZE / `L2WORD_SIZE)
`define _L2NC_MEM_TAG_WIDTH ($clog2(`L2NUM_REQS) + `_L2MEM_ADDR_RATIO_W + `XMEM_TAG_WIDTH)
`define _L2MEM_TAG_WIDTH `MAX((`L2MEM_ADDR_WIDTH + `CLOG2(`L2MSHR_SIZE) + `NC_ADDR_BITS), `_L2NC_MEM_TAG_WIDTH)
`define L2MEM_TAG_WIDTH (`L2_ENABLE ? `_L2MEM_TAG_WIDTH : (`XMEM_TAG_WIDTH + `CLOG2(`L2NUM_REQS)))
`define _L2_MEM_ADDR_RATIO_W $clog2(`L2_CACHE_LINE_SIZE / `L2_WORD_SIZE)
`define _L2_NC_MEM_TAG_WIDTH ($clog2(`L2_NUM_REQS) + `_L2_MEM_ADDR_RATIO_W + `XMEM_TAG_WIDTH)
`define _L2_MEM_TAG_WIDTH `MAX((`CLOG2(`L2_NUM_BANKS) + `CLOG2(`L2_MSHR_SIZE) + `NC_FLAG_BITS), `_L2_NC_MEM_TAG_WIDTH)
`define L2_MEM_TAG_WIDTH ((`L2_ENABLE) ? `_L2_MEM_TAG_WIDTH : (`XMEM_TAG_WIDTH + `CLOG2(`L2_NUM_REQS)))
////////////////////////// L3cache Configurable Knobs /////////////////////////
// Cache ID
`define L3CACHE_ID 0
`define L3_CACHE_ID 0
// Word size in bytes
`define L3WORD_SIZE `L2CACHE_LINE_SIZE
`define L3_WORD_SIZE `L2_CACHE_LINE_SIZE
// Block size in bytes
`define L3CACHE_LINE_SIZE (`L3_ENABLE ? `MEM_BLOCK_SIZE : `L3WORD_SIZE)
`define L3_CACHE_LINE_SIZE ((`L3_ENABLE) ? `MEM_BLOCK_SIZE : `L3_WORD_SIZE)
// Input request tag bits
`define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
`define L3_CORE_TAG_WIDTH (`L2_CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
// Memory request data bits
`define L3MEM_DATA_WIDTH (`L3CACHE_LINE_SIZE * 8)
`define L3_MEM_DATA_WIDTH (`L3_CACHE_LINE_SIZE * 8)
// Memory request address bits
`define L3MEM_ADDR_WIDTH (32 - `CLOG2(`L3CACHE_LINE_SIZE))
`define L3_MEM_ADDR_WIDTH (32 - `CLOG2(`L3_CACHE_LINE_SIZE))
// Memory byte enable bits
`define L3MEM_BYTEEN_WIDTH `L3CACHE_LINE_SIZE
`define L3_MEM_BYTEEN_WIDTH `L3_CACHE_LINE_SIZE
// Input request size
`define L3NUM_REQS `NUM_CLUSTERS
`define L3_NUM_REQS `NUM_CLUSTERS
// Memory request tag bits
`define _L3MEM_ADDR_RATIO_W $clog2(`L3CACHE_LINE_SIZE / `L3WORD_SIZE)
`define _L3NC_MEM_TAG_WIDTH ($clog2(`L3NUM_REQS) + `_L3MEM_ADDR_RATIO_W + `L2MEM_TAG_WIDTH)
`define _L3MEM_TAG_WIDTH `MAX((`L3MEM_ADDR_WIDTH + `CLOG2(`L3MSHR_SIZE) + `NC_ADDR_BITS), `_L3NC_MEM_TAG_WIDTH)
`define L3MEM_TAG_WIDTH (`L3_ENABLE ? `_L3MEM_TAG_WIDTH : (`L2MEM_TAG_WIDTH + `CLOG2(`L3NUM_REQS)))
`define _L3_MEM_ADDR_RATIO_W $clog2(`L3_CACHE_LINE_SIZE / `L3_WORD_SIZE)
`define _L3_NC_MEM_TAG_WIDTH ($clog2(`L3_NUM_REQS) + `_L3_MEM_ADDR_RATIO_W + `L2_MEM_TAG_WIDTH)
`define _L3_MEM_TAG_WIDTH `MAX((`CLOG2(`L3_NUM_BANKS) + `CLOG2(`L3_MSHR_SIZE) + `NC_FLAG_BITS), `_L3_NC_MEM_TAG_WIDTH)
`define L3_MEM_TAG_WIDTH ((`L3_ENABLE) ? `_L3_MEM_TAG_WIDTH : (`L2_MEM_TAG_WIDTH + `CLOG2(`L3_NUM_REQS)))
///////////////////////////////////////////////////////////////////////////////
`define VX_MEM_BYTEEN_WIDTH `L3MEM_BYTEEN_WIDTH
`define VX_MEM_ADDR_WIDTH `L3MEM_ADDR_WIDTH
`define VX_MEM_DATA_WIDTH `L3MEM_DATA_WIDTH
`define VX_MEM_TAG_WIDTH `L3MEM_TAG_WIDTH
`define VX_CORE_TAG_WIDTH `L3CORE_TAG_WIDTH
`define VX_MEM_BYTEEN_WIDTH `L3_MEM_BYTEEN_WIDTH
`define VX_MEM_ADDR_WIDTH `L3_MEM_ADDR_WIDTH
`define VX_MEM_DATA_WIDTH `L3_MEM_DATA_WIDTH
`define VX_MEM_TAG_WIDTH `L3_MEM_TAG_WIDTH
`define VX_CORE_TAG_WIDTH `L3_CORE_TAG_WIDTH
`define VX_CSR_ID_WIDTH `LOG2UP(`NUM_CLUSTERS * `NUM_CORES)
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
// Merged D-cache/I-cache memory tag
`define XMEM_TAG_WIDTH (`DMEM_TAG_WIDTH + `CLOG2(2))
`define XMEM_TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH + `CLOG2(2))
`include "VX_types.vh"

View file

@ -9,42 +9,42 @@ module VX_execute #(
input wire reset,
// Dcache interface
VX_dcache_req_if dcache_req_if,
VX_dcache_rsp_if dcache_rsp_if,
VX_dcache_req_if.master dcache_req_if,
VX_dcache_rsp_if.slave dcache_rsp_if,
// commit interface
VX_cmt_to_csr_if cmt_to_csr_if,
VX_cmt_to_csr_if.slave cmt_to_csr_if,
// fetch interface
VX_fetch_to_csr_if fetch_to_csr_if,
VX_fetch_to_csr_if.slave fetch_to_csr_if,
`ifdef PERF_ENABLE
VX_perf_memsys_if perf_memsys_if,
VX_perf_pipeline_if perf_pipeline_if,
VX_perf_memsys_if.slave perf_memsys_if,
VX_perf_pipeline_if.slave perf_pipeline_if,
`endif
// inputs
VX_alu_req_if alu_req_if,
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_alu_req_if.slave alu_req_if,
VX_lsu_req_if.slave lsu_req_if,
VX_csr_req_if.slave csr_req_if,
`ifdef EXT_F_ENABLE
VX_fpu_req_if fpu_req_if,
VX_fpu_req_if.slave fpu_req_if,
`endif
VX_gpu_req_if gpu_req_if,
VX_gpu_req_if.slave gpu_req_if,
// outputs
VX_branch_ctl_if branch_ctl_if,
VX_warp_ctl_if warp_ctl_if,
VX_commit_if alu_commit_if,
VX_commit_if ld_commit_if,
VX_commit_if st_commit_if,
VX_commit_if csr_commit_if,
VX_branch_ctl_if.master branch_ctl_if,
VX_warp_ctl_if.master warp_ctl_if,
VX_commit_if.master alu_commit_if,
VX_commit_if.master ld_commit_if,
VX_commit_if.master st_commit_if,
VX_commit_if.master csr_commit_if,
`ifdef EXT_F_ENABLE
VX_commit_if fpu_commit_if,
VX_commit_if.master fpu_commit_if,
`endif
VX_commit_if gpu_commit_if,
VX_commit_if.master gpu_commit_if,
input wire busy
input wire busy
);
`ifdef EXT_F_ENABLE
VX_fpu_to_csr_if fpu_to_csr_if();

View file

@ -9,23 +9,23 @@ module VX_fetch #(
input wire reset,
// Icache interface
VX_icache_req_if icache_req_if,
VX_icache_rsp_if icache_rsp_if,
VX_icache_req_if.master icache_req_if,
VX_icache_rsp_if.slave icache_rsp_if,
// inputs
VX_wstall_if wstall_if,
VX_join_if join_if,
VX_branch_ctl_if branch_ctl_if,
VX_warp_ctl_if warp_ctl_if,
VX_wstall_if.slave wstall_if,
VX_join_if.slave join_if,
VX_branch_ctl_if.slave branch_ctl_if,
VX_warp_ctl_if.slave warp_ctl_if,
// outputs
VX_ifetch_rsp_if ifetch_rsp_if,
VX_ifetch_rsp_if.master ifetch_rsp_if,
// csr interface
VX_fetch_to_csr_if fetch_to_csr_if,
VX_fetch_to_csr_if.master fetch_to_csr_if,
// busy status
output wire busy
output wire busy
);
VX_ifetch_req_if ifetch_req_if();

View file

@ -6,9 +6,9 @@ module VX_fpu_unit #(
input wire clk,
input wire reset,
VX_fpu_req_if fpu_req_if,
VX_fpu_to_csr_if fpu_to_csr_if,
VX_commit_if fpu_commit_if,
VX_fpu_req_if.slave fpu_req_if,
VX_fpu_to_csr_if.master fpu_to_csr_if,
VX_commit_if.master fpu_commit_if,
input wire[`NUM_WARPS-1:0] csr_pending,
output wire[`NUM_WARPS-1:0] pending

View file

@ -3,15 +3,15 @@
module VX_gpr_stage #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
input wire clk,
input wire reset,
// inputs
VX_writeback_if writeback_if,
VX_gpr_req_if gpr_req_if,
VX_writeback_if.slave writeback_if,
VX_gpr_req_if.slave gpr_req_if,
// outputs
VX_gpr_rsp_if gpr_rsp_if
VX_gpr_rsp_if.master gpr_rsp_if
);
`UNUSED_PARAM (CORE_ID)

View file

@ -5,15 +5,15 @@ module VX_gpu_unit #(
) (
`SCOPE_IO_VX_gpu_unit
input wire clk,
input wire reset,
input wire clk,
input wire reset,
// Inputs
VX_gpu_req_if gpu_req_if,
VX_gpu_req_if.slave gpu_req_if,
// Outputs
VX_warp_ctl_if warp_ctl_if,
VX_commit_if gpu_commit_if
VX_warp_ctl_if.master warp_ctl_if,
VX_commit_if.master gpu_commit_if
);
`UNUSED_PARAM (CORE_ID)

View file

@ -7,10 +7,10 @@ module VX_ibuffer #(
input wire reset,
// inputs
VX_decode_if decode_if,
VX_decode_if.slave decode_if,
// outputs
VX_ibuffer_if ibuffer_if
VX_ibuffer_if.master ibuffer_if
);
`UNUSED_PARAM (CORE_ID)

View file

@ -5,18 +5,18 @@ module VX_icache_stage #(
) (
`SCOPE_IO_VX_icache_stage
input wire clk,
input wire reset,
input wire clk,
input wire reset,
// Icache interface
VX_icache_req_if icache_req_if,
VX_icache_rsp_if icache_rsp_if,
VX_icache_req_if.master icache_req_if,
VX_icache_rsp_if.slave icache_rsp_if,
// request
VX_ifetch_req_if ifetch_req_if,
VX_ifetch_req_if.slave ifetch_req_if,
// reponse
VX_ifetch_rsp_if ifetch_rsp_if
VX_ifetch_rsp_if.master ifetch_rsp_if
);
`UNUSED_PARAM (CORE_ID)

View file

@ -1,21 +1,21 @@
`include "VX_define.vh"
module VX_instr_demux (
input wire clk,
input wire reset,
input wire clk,
input wire reset,
// inputs
VX_ibuffer_if ibuffer_if,
VX_gpr_rsp_if gpr_rsp_if,
VX_ibuffer_if.slave ibuffer_if,
VX_gpr_rsp_if.slave gpr_rsp_if,
// outputs
VX_alu_req_if alu_req_if,
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_alu_req_if.master alu_req_if,
VX_lsu_req_if.master lsu_req_if,
VX_csr_req_if.master csr_req_if,
`ifdef EXT_F_ENABLE
VX_fpu_req_if fpu_req_if,
VX_fpu_req_if.master fpu_req_if,
`endif
VX_gpu_req_if gpu_req_if
VX_gpu_req_if.master gpu_req_if
);
wire [`NT_BITS-1:0] tid;
wire alu_req_ready;

View file

@ -9,19 +9,19 @@ module VX_issue #(
input wire reset,
`ifdef PERF_ENABLE
VX_perf_pipeline_if perf_pipeline_if,
VX_perf_pipeline_if.master perf_pipeline_if,
`endif
VX_decode_if decode_if,
VX_writeback_if writeback_if,
VX_decode_if.slave decode_if,
VX_writeback_if.slave writeback_if,
VX_alu_req_if alu_req_if,
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_alu_req_if.master alu_req_if,
VX_lsu_req_if.master lsu_req_if,
VX_csr_req_if.master csr_req_if,
`ifdef EXT_F_ENABLE
VX_fpu_req_if fpu_req_if,
VX_fpu_req_if.master fpu_req_if,
`endif
VX_gpu_req_if gpu_req_if
VX_gpu_req_if.master gpu_req_if
);
VX_ibuffer_if ibuffer_if();
VX_ibuffer_if execute_if();

View file

@ -5,26 +5,26 @@ module VX_lsu_unit #(
) (
`SCOPE_IO_VX_lsu_unit
input wire clk,
input wire reset,
input wire clk,
input wire reset,
// Dcache interface
VX_dcache_req_if dcache_req_if,
VX_dcache_rsp_if dcache_rsp_if,
VX_dcache_req_if.master dcache_req_if,
VX_dcache_rsp_if.slave dcache_rsp_if,
// inputs
VX_lsu_req_if lsu_req_if,
VX_lsu_req_if.slave lsu_req_if,
// outputs
VX_commit_if ld_commit_if,
VX_commit_if st_commit_if
VX_commit_if.master ld_commit_if,
VX_commit_if.master st_commit_if
);
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
localparam MEM_ADDRW = 32 - MEM_ASHIFT;
localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE);
localparam REQ_ASHIFT = `CLOG2(`DCACHE_WORD_SIZE);
localparam ADDR_TYPEW = `NC_ADDR_BITS + `SM_ENABLE;
localparam ADDR_TYPEW = `NC_FLAG_BITS + `SM_ENABLE;
`STATIC_ASSERT(0 == (`IO_BASE_ADDR % MEM_ASHIFT), ("invalid parameter"))
`STATIC_ASSERT(0 == (`SMEM_BASE_ADDR % MEM_ASHIFT), ("invalid parameter"))
@ -321,9 +321,9 @@ module VX_lsu_unit #(
for (integer i = 0; i < `LSUQ_SIZE; ++i) begin
if (pending_reqs[i][0]) begin
assert(($time - pending_reqs[i][1 +: 64]) < delay_timeout) else
$error("%t: *** D$%0d response timeout: remaining=%b, wid=%0d, PC=%0h, rd=%0d",
$time, CORE_ID, rsp_rem_mask[i], pending_reqs[i][1+64+32+`NR_BITS +: `NW_BITS], pending_reqs[i][1+64+`NR_BITS +: 32], pending_reqs[i][1+64 +: `NR_BITS]);
`ASSERT(($time - pending_reqs[i][1 +: 64]) < delay_timeout,
("%t: *** D$%0d response timeout: remaining=%b, wid=%0d, PC=%0h, rd=%0d",
$time, CORE_ID, rsp_rem_mask[i], pending_reqs[i][1+64+32+`NR_BITS +: `NW_BITS], pending_reqs[i][1+64+`NR_BITS +: 32], pending_reqs[i][1+64 +: `NR_BITS]));
end
end
end

View file

@ -10,9 +10,9 @@ module VX_mem_arb #(
parameter BUFFERED_RSP = 0,
parameter TYPE = "P",
localparam DATA_SIZE = (DATA_WIDTH / 8),
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS),
localparam TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS
parameter DATA_SIZE = (DATA_WIDTH / 8),
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS),
parameter TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS
) (
input wire clk,
input wire reset,

View file

@ -5,24 +5,24 @@ module VX_mem_unit # (
) (
`SCOPE_IO_VX_mem_unit
input wire clk,
input wire reset,
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
VX_perf_memsys_if perf_memsys_if,
VX_perf_memsys_if.master perf_memsys_if,
`endif
// Core <-> Dcache
VX_dcache_req_if dcache_req_if,
VX_dcache_rsp_if dcache_rsp_if,
VX_dcache_req_if.slave dcache_req_if,
VX_dcache_rsp_if.master dcache_rsp_if,
// Core <-> Icache
VX_icache_req_if icache_req_if,
VX_icache_rsp_if icache_rsp_if,
VX_icache_req_if.slave icache_req_if,
VX_icache_rsp_if.master icache_rsp_if,
// Memory
VX_mem_req_if mem_req_if,
VX_mem_rsp_if mem_rsp_if
VX_mem_req_if.master mem_req_if,
VX_mem_rsp_if.slave mem_rsp_if
);
`ifdef PERF_ENABLE
@ -30,37 +30,37 @@ module VX_mem_unit # (
`endif
VX_mem_req_if #(
.DATA_WIDTH (`IMEM_DATA_WIDTH),
.ADDR_WIDTH (`IMEM_ADDR_WIDTH),
.TAG_WIDTH (`IMEM_TAG_WIDTH)
.DATA_WIDTH (`ICACHE_MEM_DATA_WIDTH),
.ADDR_WIDTH (`ICACHE_MEM_ADDR_WIDTH),
.TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH)
) icache_mem_req_if();
VX_mem_rsp_if #(
.DATA_WIDTH (`IMEM_DATA_WIDTH),
.TAG_WIDTH (`IMEM_TAG_WIDTH)
.DATA_WIDTH (`ICACHE_MEM_DATA_WIDTH),
.TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH)
) icache_mem_rsp_if();
VX_mem_req_if #(
.DATA_WIDTH (`DMEM_DATA_WIDTH),
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
.TAG_WIDTH (`DMEM_TAG_WIDTH)
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
.TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH)
) dcache_mem_req_if();
VX_mem_rsp_if #(
.DATA_WIDTH (`DMEM_DATA_WIDTH),
.TAG_WIDTH (`DMEM_TAG_WIDTH)
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
.TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH)
) dcache_mem_rsp_if();
VX_dcache_req_if #(
.NUM_REQS (`DNUM_REQS),
.WORD_SIZE (`DWORD_SIZE),
.TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE)
.NUM_REQS (`DCACHE_NUM_REQS),
.WORD_SIZE (`DCACHE_WORD_SIZE),
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
) dcache_req_tmp_if();
VX_dcache_rsp_if #(
.NUM_REQS (`DNUM_REQS),
.WORD_SIZE (`DWORD_SIZE),
.TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE)
.NUM_REQS (`DCACHE_NUM_REQS),
.WORD_SIZE (`DCACHE_WORD_SIZE),
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
) dcache_rsp_tmp_if();
`RESET_RELAY (icache_reset);
@ -71,18 +71,18 @@ module VX_mem_unit # (
.CACHE_ID (`ICACHE_ID),
.CACHE_SIZE (`ICACHE_SIZE),
.CACHE_LINE_SIZE (`ICACHE_LINE_SIZE),
.NUM_BANKS (`INUM_BANKS),
.WORD_SIZE (`IWORD_SIZE),
.NUM_BANKS (1),
.WORD_SIZE (`ICACHE_WORD_SIZE),
.NUM_REQS (1),
.CREQ_SIZE (`ICREQ_SIZE),
.CRSQ_SIZE (`ICRSQ_SIZE),
.MSHR_SIZE (`IMSHR_SIZE),
.MRSQ_SIZE (`IMRSQ_SIZE),
.MREQ_SIZE (`IMREQ_SIZE),
.CREQ_SIZE (`ICACHE_CREQ_SIZE),
.CRSQ_SIZE (`ICACHE_CRSQ_SIZE),
.MSHR_SIZE (`ICACHE_MSHR_SIZE),
.MRSQ_SIZE (`ICACHE_MRSQ_SIZE),
.MREQ_SIZE (`ICACHE_MREQ_SIZE),
.WRITE_ENABLE (0),
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
.MEM_TAG_WIDTH (`IMEM_TAG_WIDTH)
.CORE_TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`ICACHE_CORE_TAG_ID_BITS),
.MEM_TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH)
) icache (
`SCOPE_BIND_VX_mem_unit_icache
@ -92,7 +92,7 @@ module VX_mem_unit # (
// Core request
.core_req_valid (icache_req_if.valid),
.core_req_rw (1'b0),
.core_req_byteen ({`IWORD_SIZE{1'b1}}),
.core_req_byteen ('b0),
.core_req_addr (icache_req_if.addr),
.core_req_data ('x),
.core_req_tag (icache_req_if.tag),
@ -129,19 +129,19 @@ module VX_mem_unit # (
.CACHE_ID (`DCACHE_ID),
.CACHE_SIZE (`DCACHE_SIZE),
.CACHE_LINE_SIZE (`DCACHE_LINE_SIZE),
.NUM_BANKS (`DNUM_BANKS),
.NUM_PORTS (`DNUM_PORTS),
.WORD_SIZE (`DWORD_SIZE),
.NUM_REQS (`DNUM_REQS),
.CREQ_SIZE (`DCREQ_SIZE),
.CRSQ_SIZE (`DCRSQ_SIZE),
.MSHR_SIZE (`DMSHR_SIZE),
.MRSQ_SIZE (`DMRSQ_SIZE),
.MREQ_SIZE (`DMREQ_SIZE),
.NUM_BANKS (`DCACHE_NUM_BANKS),
.NUM_PORTS (`DCACHE_NUM_PORTS),
.WORD_SIZE (`DCACHE_WORD_SIZE),
.NUM_REQS (`DCACHE_NUM_REQS),
.CREQ_SIZE (`DCACHE_CREQ_SIZE),
.CRSQ_SIZE (`DCACHE_CRSQ_SIZE),
.MSHR_SIZE (`DCACHE_MSHR_SIZE),
.MRSQ_SIZE (`DCACHE_MRSQ_SIZE),
.MREQ_SIZE (`DCACHE_MREQ_SIZE),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS-`SM_ENABLE),
.MEM_TAG_WIDTH (`DMEM_TAG_WIDTH),
.CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE),
.CORE_TAG_ID_BITS (`DCACHE_CORE_TAG_ID_BITS-`SM_ENABLE),
.MEM_TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH),
.NC_ENABLE (1)
) dcache (
`SCOPE_BIND_VX_mem_unit_dcache
@ -187,15 +187,15 @@ module VX_mem_unit # (
if (`SM_ENABLE) begin
VX_dcache_req_if #(
.NUM_REQS (`DNUM_REQS),
.WORD_SIZE (`DWORD_SIZE),
.TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE)
.NUM_REQS (`DCACHE_NUM_REQS),
.WORD_SIZE (`DCACHE_WORD_SIZE),
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
) smem_req_if();
VX_dcache_rsp_if #(
.NUM_REQS (`DNUM_REQS),
.WORD_SIZE (`DWORD_SIZE),
.TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE)
.NUM_REQS (`DCACHE_NUM_REQS),
.WORD_SIZE (`DCACHE_WORD_SIZE),
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
) smem_rsp_if();
`RESET_RELAY (smem_arb_reset);
@ -205,7 +205,7 @@ module VX_mem_unit # (
.NUM_REQS (2),
.LANES (`NUM_THREADS),
.DATA_SIZE (4),
.TAG_IN_WIDTH (`DCORE_TAG_WIDTH),
.TAG_IN_WIDTH (`DCACHE_CORE_TAG_WIDTH),
.TYPE ("P"),
.BUFFERED_REQ (2),
.BUFFERED_RSP (1)
@ -247,16 +247,16 @@ module VX_mem_unit # (
);
VX_shared_mem #(
.CACHE_ID (`SCACHE_ID),
.CACHE_ID (`SMEM_ID),
.CACHE_SIZE (`SMEM_SIZE),
.NUM_BANKS (`SNUM_BANKS),
.WORD_SIZE (`SWORD_SIZE),
.NUM_REQS (`SNUM_REQS),
.CREQ_SIZE (`SCREQ_SIZE),
.CRSQ_SIZE (`SCRSQ_SIZE),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS-`SM_ENABLE),
.BANK_ADDR_OFFSET (`SBANK_ADDR_OFFSET)
.NUM_BANKS (`SMEM_NUM_BANKS),
.WORD_SIZE (`SMEM_WORD_SIZE),
.NUM_REQS (`SMEM_NUM_REQS),
.CREQ_SIZE (`SMEM_CREQ_SIZE),
.CRSQ_SIZE (`SMEM_CRSQ_SIZE),
.CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE),
.CORE_TAG_ID_BITS (`DCACHE_CORE_TAG_ID_BITS-`SM_ENABLE),
.BANK_ADDR_OFFSET (`SMEM_BANK_ADDR_OFFSET)
) smem (
.clk (clk),
.reset (smem_reset),
@ -283,9 +283,9 @@ module VX_mem_unit # (
);
end else begin
// core to D-cache request
for (genvar i = 0; i < `DNUM_REQS; ++i) begin
for (genvar i = 0; i < `DCACHE_NUM_REQS; ++i) begin
VX_skid_buffer #(
.DATAW ((32-`CLOG2(`DWORD_SIZE)) + 1 + `DWORD_SIZE + (8*`DWORD_SIZE) + `DCORE_TAG_WIDTH)
.DATAW ((32-`CLOG2(`DCACHE_WORD_SIZE)) + 1 + `DCACHE_WORD_SIZE + (8*`DCACHE_WORD_SIZE) + `DCACHE_CORE_TAG_WIDTH)
) req_buf (
.clk (clk),
.reset (reset),
@ -306,16 +306,16 @@ module VX_mem_unit # (
assign dcache_rsp_tmp_if.ready = dcache_rsp_if.ready;
end
wire [`DMEM_TAG_WIDTH-1:0] icache_mem_req_tag = `DMEM_TAG_WIDTH'(icache_mem_req_if.tag);
wire [`DMEM_TAG_WIDTH-1:0] icache_mem_rsp_tag;
assign icache_mem_rsp_if.tag = icache_mem_rsp_tag[`IMEM_TAG_WIDTH-1:0];
wire [`DCACHE_MEM_TAG_WIDTH-1:0] icache_mem_req_tag = `DCACHE_MEM_TAG_WIDTH'(icache_mem_req_if.tag);
wire [`DCACHE_MEM_TAG_WIDTH-1:0] icache_mem_rsp_tag;
assign icache_mem_rsp_if.tag = icache_mem_rsp_tag[`ICACHE_MEM_TAG_WIDTH-1:0];
`UNUSED_VAR (icache_mem_rsp_tag)
VX_mem_arb #(
.NUM_REQS (2),
.DATA_WIDTH (`DMEM_DATA_WIDTH),
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
.TAG_IN_WIDTH (`DMEM_TAG_WIDTH),
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
.TAG_IN_WIDTH (`DCACHE_MEM_TAG_WIDTH),
.TYPE ("R"),
.TAG_SEL_IDX (1), // Skip 0 for NC flag
.BUFFERED_REQ (1),

View file

@ -15,30 +15,30 @@ module VX_pipeline #(
output wire [`NUM_THREADS-1:0][3:0] dcache_req_byteen,
output wire [`NUM_THREADS-1:0][29:0] dcache_req_addr,
output wire [`NUM_THREADS-1:0][31:0] dcache_req_data,
output wire [`NUM_THREADS-1:0][`DCORE_TAG_WIDTH-1:0] dcache_req_tag,
output wire [`NUM_THREADS-1:0][`DCACHE_CORE_TAG_WIDTH-1:0] dcache_req_tag,
input wire [`NUM_THREADS-1:0] dcache_req_ready,
// Dcache core reponse
input wire dcache_rsp_valid,
input wire [`NUM_THREADS-1:0] dcache_rsp_tmask,
input wire [`NUM_THREADS-1:0][31:0] dcache_rsp_data,
input wire [`DCORE_TAG_WIDTH-1:0] dcache_rsp_tag,
input wire [`DCACHE_CORE_TAG_WIDTH-1:0] dcache_rsp_tag,
output wire dcache_rsp_ready,
// Icache core request
output wire icache_req_valid,
output wire [29:0] icache_req_addr,
output wire [`ICORE_TAG_WIDTH-1:0] icache_req_tag,
output wire [`ICACHE_CORE_TAG_WIDTH-1:0] icache_req_tag,
input wire icache_req_ready,
// Icache core response
input wire icache_rsp_valid,
input wire [31:0] icache_rsp_data,
input wire [`ICORE_TAG_WIDTH-1:0] icache_rsp_tag,
input wire [`ICACHE_CORE_TAG_WIDTH-1:0] icache_rsp_tag,
output wire icache_rsp_ready,
`ifdef PERF_ENABLE
VX_perf_memsys_if perf_memsys_if,
VX_perf_memsys_if.slave perf_memsys_if,
`endif
// Status
@ -51,7 +51,7 @@ module VX_pipeline #(
VX_dcache_req_if #(
.NUM_REQS (`NUM_THREADS),
.WORD_SIZE (4),
.TAG_WIDTH (`DCORE_TAG_WIDTH)
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
) dcache_req_if();
assign dcache_req_valid = dcache_req_if.valid;
@ -69,7 +69,7 @@ module VX_pipeline #(
VX_dcache_rsp_if #(
.NUM_REQS (`NUM_THREADS),
.WORD_SIZE (4),
.TAG_WIDTH (`DCORE_TAG_WIDTH)
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
) dcache_rsp_if();
assign dcache_rsp_if.valid = dcache_rsp_valid;
@ -84,7 +84,7 @@ module VX_pipeline #(
VX_icache_req_if #(
.WORD_SIZE (4),
.TAG_WIDTH (`ICORE_TAG_WIDTH)
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
) icache_req_if();
assign icache_req_valid = icache_req_if.valid;
@ -98,7 +98,7 @@ module VX_pipeline #(
VX_icache_rsp_if #(
.WORD_SIZE (4),
.TAG_WIDTH (`ICORE_TAG_WIDTH)
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
) icache_rsp_if();
assign icache_rsp_if.valid = icache_rsp_valid;

View file

@ -10,6 +10,7 @@
///////////////////////////////////////////////////////////////////////////////
`ifndef SYNTHESIS
`ifndef NDEBUG
`define DEBUG_BLOCK(x) /* verilator lint_off UNUSED */ \
x \
@ -48,18 +49,24 @@
. x () \
/* verilator lint_on PINCONNECTEMPTY */
`define ASSERT(cond, msg) \
assert(cond) else $error msg
`define STATIC_ASSERT(cond, msg) \
generate \
if (!(cond)) $error msg; \
endgenerate
`define RUNTIME_ASSERT(cond, msg) \
always @(posedge clk) \
always @(posedge clk) begin \
assert(cond) else $error msg; \
end
`define TRACING_ON /* verilator tracing_on */
`define TRACING_OFF /* verilator tracing_off */
`else // SYNTHESIS
`define DEBUG_BLOCK(x)
`define IGNORE_UNUSED_BEGIN
`define IGNORE_UNUSED_END
@ -68,10 +75,12 @@
`define UNUSED_PARAM(x)
`define UNUSED_VAR(x)
`define UNUSED_PIN(x) . x ()
`define ASSERT(cond, msg) if (cond);
`define STATIC_ASSERT(cond, msg)
`define RUNTIME_ASSERT(cond, msg)
`define TRACING_ON
`define TRACING_OFF
`endif // SYNTHESIS
///////////////////////////////////////////////////////////////////////////////
@ -131,12 +140,20 @@
end \
dpi_trace("}")
`define RESET_RELAY(signal) \
wire signal; \
`define RESET_RELAY(signal) \
wire signal; \
VX_reset_relay __``signal ( \
.clk (clk), \
.reset (reset), \
.reset_o (signal) \
.clk (clk), \
.reset (reset), \
.reset_o (signal) \
)
`define POP_COUNT(out, in) \
VX_popcount #( \
.N ($bits(in)) \
) __``out ( \
.in_i (in), \
.cnt_o (out) \
)
`endif

View file

@ -3,12 +3,12 @@
module VX_scoreboard #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
input wire clk,
input wire reset,
VX_ibuffer_if ibuffer_if,
VX_writeback_if writeback_if,
output wire delay
VX_ibuffer_if.slave ibuffer_if,
VX_writeback_if.slave writeback_if,
output wire delay
);
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_regs, inuse_regs_n;
@ -61,15 +61,16 @@ module VX_scoreboard #(
end
`endif
if (release_reg) begin
assert(inuse_regs[writeback_if.wid][writeback_if.rd] != 0)
else $error("%t: *** core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d",
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd);
`ASSERT(inuse_regs[writeback_if.wid][writeback_if.rd] != 0,
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d",
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd));
end
if (ibuffer_if.valid && ~ibuffer_if.ready) begin
deadlock_ctr <= deadlock_ctr + 1;
assert(deadlock_ctr < deadlock_timeout) else $error("%t: *** core%0d-deadlock: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
`ASSERT(deadlock_ctr < deadlock_timeout,
("%t: *** core%0d-deadlock: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
$time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC, ibuffer_if.rd, ibuffer_if.wb,
deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3);
deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3));
end else if (ibuffer_if.valid && ibuffer_if.ready) begin
deadlock_ctr <= 0;
end

View file

@ -10,10 +10,10 @@ module VX_smem_arb #(
parameter BUFFERED_RSP = 0,
parameter TYPE = "P",
localparam ADDR_WIDTH = (32-`CLOG2(DATA_SIZE)),
localparam DATA_WIDTH = (8 * DATA_SIZE),
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS),
localparam TAG_OUT_WIDTH = TAG_IN_WIDTH - LOG_NUM_REQS
parameter ADDR_WIDTH = (32-`CLOG2(DATA_SIZE)),
parameter DATA_WIDTH = (8 * DATA_SIZE),
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS),
parameter TAG_OUT_WIDTH = TAG_IN_WIDTH - LOG_NUM_REQS
) (
input wire clk,
input wire reset,

View file

@ -5,19 +5,19 @@ module VX_warp_sched #(
) (
`SCOPE_IO_VX_warp_sched
input wire clk,
input wire reset,
input wire clk,
input wire reset,
VX_warp_ctl_if warp_ctl_if,
VX_wstall_if wstall_if,
VX_join_if join_if,
VX_branch_ctl_if branch_ctl_if,
VX_warp_ctl_if.slave warp_ctl_if,
VX_wstall_if.slave wstall_if,
VX_join_if.slave join_if,
VX_branch_ctl_if.slave branch_ctl_if,
VX_ifetch_req_if ifetch_req_if,
VX_ifetch_req_if.master ifetch_req_if,
VX_fetch_to_csr_if fetch_to_csr_if,
VX_fetch_to_csr_if.master fetch_to_csr_if,
output wire busy
output wire busy
);
`UNUSED_PARAM (CORE_ID)
@ -147,7 +147,8 @@ module VX_warp_sched #(
`IGNORE_UNUSED_BEGIN
wire [`NW_BITS:0] active_barrier_count;
`IGNORE_UNUSED_END
assign active_barrier_count = $countones(barrier_masks[warp_ctl_if.barrier.id]);
wire [`NUM_WARPS-1:0] barrier_mask = barrier_masks[warp_ctl_if.barrier.id];
`POP_COUNT(active_barrier_count, barrier_mask);
assign reached_barrier_limit = (active_barrier_count[`NW_BITS-1:0] == warp_ctl_if.barrier.size_m1);
@ -161,7 +162,7 @@ module VX_warp_sched #(
// split/join stack management
wire [(1+32+`NUM_THREADS-1):0] ipdom [`NUM_WARPS-1:0];
wire [(1+32+`NUM_THREADS)-1:0] ipdom [`NUM_WARPS-1:0];
wire [`NUM_THREADS-1:0] curr_tmask = thread_masks[warp_ctl_if.wid];
@ -173,8 +174,8 @@ module VX_warp_sched #(
wire pop = join_if.valid && (i == join_if.wid);
wire [`NUM_THREADS-1:0] else_tmask = warp_ctl_if.split.diverged ? warp_ctl_if.split.else_tmask : curr_tmask;
wire [(1+32+`NUM_THREADS-1):0] q_end = {1'b0, 32'b0, curr_tmask};
wire [(1+32+`NUM_THREADS-1):0] q_else = {1'b1, warp_ctl_if.split.pc, else_tmask};
wire [(1+32+`NUM_THREADS)-1:0] q_end = {1'b0, 32'b0, curr_tmask};
wire [(1+32+`NUM_THREADS)-1:0] q_else = {1'b1, warp_ctl_if.split.pc, else_tmask};
VX_ipdom_stack #(
.WIDTH (1+32+`NUM_THREADS),

View file

@ -3,19 +3,19 @@
module VX_writeback #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
input wire clk,
input wire reset,
// inputs
VX_commit_if alu_commit_if,
VX_commit_if ld_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if.slave alu_commit_if,
VX_commit_if.slave ld_commit_if,
VX_commit_if.slave csr_commit_if,
`ifdef EXT_F_ENABLE
VX_commit_if fpu_commit_if,
VX_commit_if.slave fpu_commit_if,
`endif
// outputs
VX_writeback_if writeback_if
VX_writeback_if.master writeback_if
);
`UNUSED_PARAM (CORE_ID)

View file

@ -29,15 +29,15 @@ module Vortex (
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_valid;
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_rw;
wire [`NUM_CLUSTERS-1:0][`L2MEM_BYTEEN_WIDTH-1:0] per_cluster_mem_req_byteen;
wire [`NUM_CLUSTERS-1:0][`L2MEM_ADDR_WIDTH-1:0] per_cluster_mem_req_addr;
wire [`NUM_CLUSTERS-1:0][`L2MEM_DATA_WIDTH-1:0] per_cluster_mem_req_data;
wire [`NUM_CLUSTERS-1:0][`L2MEM_TAG_WIDTH-1:0] per_cluster_mem_req_tag;
wire [`NUM_CLUSTERS-1:0][`L2_MEM_BYTEEN_WIDTH-1:0] per_cluster_mem_req_byteen;
wire [`NUM_CLUSTERS-1:0][`L2_MEM_ADDR_WIDTH-1:0] per_cluster_mem_req_addr;
wire [`NUM_CLUSTERS-1:0][`L2_MEM_DATA_WIDTH-1:0] per_cluster_mem_req_data;
wire [`NUM_CLUSTERS-1:0][`L2_MEM_TAG_WIDTH-1:0] per_cluster_mem_req_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2MEM_DATA_WIDTH-1:0] per_cluster_mem_rsp_data;
wire [`NUM_CLUSTERS-1:0][`L2MEM_TAG_WIDTH-1:0] per_cluster_mem_rsp_tag;
wire [`NUM_CLUSTERS-1:0][`L2_MEM_DATA_WIDTH-1:0] per_cluster_mem_rsp_data;
wire [`NUM_CLUSTERS-1:0][`L2_MEM_TAG_WIDTH-1:0] per_cluster_mem_rsp_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
@ -81,22 +81,22 @@ module Vortex (
`RESET_RELAY (l3_reset);
VX_cache #(
.CACHE_ID (`L3CACHE_ID),
.CACHE_SIZE (`L3CACHE_SIZE),
.CACHE_LINE_SIZE (`L3CACHE_LINE_SIZE),
.NUM_BANKS (`L3NUM_BANKS),
.NUM_PORTS (`L3NUM_PORTS),
.WORD_SIZE (`L3WORD_SIZE),
.NUM_REQS (`L3NUM_REQS),
.CREQ_SIZE (`L3CREQ_SIZE),
.CRSQ_SIZE (`L3CRSQ_SIZE),
.MSHR_SIZE (`L3MSHR_SIZE),
.MRSQ_SIZE (`L3MRSQ_SIZE),
.MREQ_SIZE (`L3MREQ_SIZE),
.CACHE_ID (`L3_CACHE_ID),
.CACHE_SIZE (`L3_CACHE_SIZE),
.CACHE_LINE_SIZE (`L3_CACHE_LINE_SIZE),
.NUM_BANKS (`L3_NUM_BANKS),
.NUM_PORTS (`L3_NUM_PORTS),
.WORD_SIZE (`L3_WORD_SIZE),
.NUM_REQS (`L3_NUM_REQS),
.CREQ_SIZE (`L3_CREQ_SIZE),
.CRSQ_SIZE (`L3_CRSQ_SIZE),
.MSHR_SIZE (`L3_MSHR_SIZE),
.MRSQ_SIZE (`L3_MRSQ_SIZE),
.MREQ_SIZE (`L3_MREQ_SIZE),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`L2MEM_TAG_WIDTH),
.CORE_TAG_WIDTH (`L2_MEM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.MEM_TAG_WIDTH (`L3MEM_TAG_WIDTH),
.MEM_TAG_WIDTH (`L3_MEM_TAG_WIDTH),
.NC_ENABLE (1)
) l3cache (
`SCOPE_BIND_Vortex_l3cache
@ -146,9 +146,9 @@ module Vortex (
VX_mem_arb #(
.NUM_REQS (`NUM_CLUSTERS),
.DATA_WIDTH (`L3MEM_DATA_WIDTH),
.ADDR_WIDTH (`L3MEM_ADDR_WIDTH),
.TAG_IN_WIDTH (`L2MEM_TAG_WIDTH),
.DATA_WIDTH (`L3_MEM_DATA_WIDTH),
.ADDR_WIDTH (`L3_MEM_ADDR_WIDTH),
.TAG_IN_WIDTH (`L2_MEM_TAG_WIDTH),
.TYPE ("R"),
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)

View file

@ -1,17 +1,16 @@
`include "VX_define.vh"
module Vortex_axi #(
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
parameter AXI_ADDR_WIDTH = 32,
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH,
localparam AXI_STROBE_WIDTH = (AXI_DATA_WIDTH / 8)
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
parameter AXI_ADDR_WIDTH = 32,
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH,
parameter AXI_STROBE_WIDTH = (`VX_MEM_DATA_WIDTH / 8)
)(
// Clock
input wire clk,
input wire reset,
// AXI write address channel
output wire m_axi_awvalid,
// AXI write request address channel
output wire [AXI_TID_WIDTH-1:0] m_axi_awid,
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr,
output wire [7:0] m_axi_awlen,
@ -20,18 +19,24 @@ module Vortex_axi #(
output wire m_axi_awlock,
output wire [3:0] m_axi_awcache,
output wire [2:0] m_axi_awprot,
output wire [3:0] m_axi_awqos,
output wire [3:0] m_axi_awqos,
output wire m_axi_awvalid,
input wire m_axi_awready,
// AXI write data channel
output wire m_axi_wvalid,
// AXI write request data channel
output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata,
output wire [AXI_STROBE_WIDTH-1:0] m_axi_wstrb,
output wire m_axi_wlast,
output wire m_axi_wlast,
output wire m_axi_wvalid,
input wire m_axi_wready,
// AXI write response channel
input wire [AXI_TID_WIDTH-1:0] m_axi_bid,
input wire [1:0] m_axi_bresp,
input wire m_axi_bvalid,
output wire m_axi_bready,
// AXI read address channel
output wire m_axi_arvalid,
// AXI read request channel
output wire [AXI_TID_WIDTH-1:0] m_axi_arid,
output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr,
output wire [7:0] m_axi_arlen,
@ -41,12 +46,15 @@ module Vortex_axi #(
output wire [3:0] m_axi_arcache,
output wire [2:0] m_axi_arprot,
output wire [3:0] m_axi_arqos,
output wire m_axi_arvalid,
input wire m_axi_arready,
// AXI read data channel
input wire m_axi_rvalid,
// AXI read response channel
input wire [AXI_TID_WIDTH-1:0] m_axi_rid,
input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata,
input wire [1:0] m_axi_rresp,
input wire m_axi_rlast,
input wire m_axi_rvalid,
output wire m_axi_rready,
// Status
@ -66,12 +74,14 @@ module Vortex_axi #(
wire mem_rsp_ready;
VX_axi_adapter #(
.VX_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.VX_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
.VX_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
.AXI_DATA_WIDTH (AXI_DATA_WIDTH),
.AXI_ADDR_WIDTH (AXI_ADDR_WIDTH),
.AXI_TID_WIDTH (AXI_TID_WIDTH)
.VX_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.VX_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
.VX_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
.VX_BYTEEN_WIDTH (AXI_STROBE_WIDTH),
.AXI_DATA_WIDTH (AXI_DATA_WIDTH),
.AXI_ADDR_WIDTH (AXI_ADDR_WIDTH),
.AXI_TID_WIDTH (AXI_TID_WIDTH),
.AXI_STROBE_WIDTH (AXI_STROBE_WIDTH)
) axi_adapter (
.clk (clk),
.reset (reset),
@ -89,7 +99,6 @@ module Vortex_axi #(
.mem_rsp_tag (mem_rsp_tag),
.mem_rsp_ready (mem_rsp_ready),
.m_axi_awvalid (m_axi_awvalid),
.m_axi_awid (m_axi_awid),
.m_axi_awaddr (m_axi_awaddr),
.m_axi_awlen (m_axi_awlen),
@ -99,15 +108,20 @@ module Vortex_axi #(
.m_axi_awcache (m_axi_awcache),
.m_axi_awprot (m_axi_awprot),
.m_axi_awqos (m_axi_awqos),
.m_axi_awvalid (m_axi_awvalid),
.m_axi_awready (m_axi_awready),
.m_axi_wvalid (m_axi_wvalid),
.m_axi_wdata (m_axi_wdata),
.m_axi_wstrb (m_axi_wstrb),
.m_axi_wlast (m_axi_wlast),
.m_axi_wvalid (m_axi_wvalid),
.m_axi_wready (m_axi_wready),
.m_axi_bid (m_axi_bid),
.m_axi_bresp (m_axi_bresp),
.m_axi_bvalid (m_axi_bvalid),
.m_axi_bready (m_axi_bready),
.m_axi_arvalid (m_axi_arvalid),
.m_axi_arid (m_axi_arid),
.m_axi_araddr (m_axi_araddr),
.m_axi_arlen (m_axi_arlen),
@ -117,11 +131,14 @@ module Vortex_axi #(
.m_axi_arcache (m_axi_arcache),
.m_axi_arprot (m_axi_arprot),
.m_axi_arqos (m_axi_arqos),
.m_axi_arvalid (m_axi_arvalid),
.m_axi_arready (m_axi_arready),
.m_axi_rvalid (m_axi_rvalid),
.m_axi_rid (m_axi_rid),
.m_axi_rdata (m_axi_rdata),
.m_axi_rresp (m_axi_rresp),
.m_axi_rlast (m_axi_rlast),
.m_axi_rvalid (m_axi_rvalid),
.m_axi_rready (m_axi_rready)
);

View file

@ -8,8 +8,8 @@ module VX_avs_wrapper #(
parameter REQ_TAG_WIDTH = 1,
parameter RD_QUEUE_SIZE = 1,
localparam AVS_BYTEENW = (AVS_DATA_WIDTH / 8),
localparam RD_QUEUE_ADDR_WIDTH = $clog2(RD_QUEUE_SIZE+1)
parameter AVS_BYTEENW = (AVS_DATA_WIDTH / 8),
parameter RD_QUEUE_ADDR_WIDTH = $clog2(RD_QUEUE_SIZE+1)
) (
input wire clk,
input wire reset,

View file

@ -851,7 +851,7 @@ begin
cci_wr_req_data <= t_ccip_clData'(cci_mem_rsp_data);
if (cci_wr_req_fire) begin
assert(cci_wr_req_ctr != 0);
`ASSERT(cci_wr_req_ctr != 0, ("runtime error"));
cci_wr_req_ctr <= cci_wr_req_ctr - CCI_ADDR_WIDTH'(1);
if (cci_wr_req_ctr == CCI_ADDR_WIDTH'(1)) begin
cci_wr_req_done <= 1;

View file

@ -39,8 +39,8 @@ module VX_bank #(
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0,
localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE),
localparam WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
parameter MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE),
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
) (
`SCOPE_IO_VX_bank
@ -86,8 +86,7 @@ module VX_bank #(
input wire mem_req_ready,
// Memory response
input wire mem_rsp_valid,
input wire [`LINE_ADDR_WIDTH-1:0] mem_rsp_addr,
input wire mem_rsp_valid,
input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id,
input wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data,
output wire mem_rsp_ready,
@ -130,8 +129,12 @@ module VX_bank #(
.ready_out (creq_ready),
.valid_out (creq_valid)
);
wire mreq_alm_full;
wire [`LINE_ADDR_WIDTH-1:0] mem_rsp_addr;
wire crsq_valid, crsq_ready;
wire crsq_stall;
wire mshr_valid;
wire mshr_ready;
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id;
@ -161,9 +164,6 @@ module VX_bank #(
wire is_flush_st0;
wire mshr_pending_st0, mshr_pending_st1;
wire crsq_valid, crsq_ready, crsq_stall;
wire mreq_alm_full;
// prevent read-during-write hazard when accessing tags/data block RAMs
wire rdw_fill_hazard = valid_st0 && is_fill_st0;
wire rdw_write_hazard = valid_st0 && is_write_st0 && ~creq_rw;
@ -398,6 +398,7 @@ module VX_bank #(
// fill
.fill_valid (mem_rsp_fire),
.fill_id (mem_rsp_id),
.fill_addr (mem_rsp_addr),
// dequeue
.dequeue_valid (mshr_valid),

View file

@ -46,13 +46,13 @@ module VX_cache #(
// enable bypass for non-cacheable addresses
parameter NC_ENABLE = 0,
localparam WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
) (
`SCOPE_IO_VX_cache
// PERF
`ifdef PERF_ENABLE
VX_perf_cache_if perf_cache_if,
VX_perf_cache_if.master perf_cache_if,
`endif
input wire clk,
@ -94,7 +94,7 @@ module VX_cache #(
`STATIC_ASSERT(NUM_PORTS <= NUM_BANKS, ("invalid value"))
localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE);
localparam MEM_TAG_IN_WIDTH = `MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH;
localparam MEM_TAG_IN_WIDTH = `BANK_SELECT_BITS + MSHR_ADDR_WIDTH;
localparam CORE_TAG_X_WIDTH = CORE_TAG_WIDTH - NC_ENABLE;
localparam CORE_TAG_ID_X_BITS = (CORE_TAG_ID_BITS != 0) ? (CORE_TAG_ID_BITS - NC_ENABLE) : CORE_TAG_ID_BITS;
@ -444,7 +444,6 @@ module VX_cache #(
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
if (NUM_BANKS == 1) begin
`UNUSED_VAR (mem_rsp_tag_qual)
assign mrsq_out_ready = per_bank_mem_rsp_ready;
end else begin
assign mrsq_out_ready = per_bank_mem_rsp_ready[`MEM_TAG_TO_BANK_ID(mem_rsp_tag_qual)];
@ -515,8 +514,7 @@ module VX_cache #(
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_mem_req_data;
wire curr_bank_mem_req_ready;
wire curr_bank_mem_rsp_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_mem_rsp_addr;
wire curr_bank_mem_rsp_valid;
wire [MSHR_ADDR_WIDTH-1:0] curr_bank_mem_rsp_id;
wire [`CACHE_LINE_WIDTH-1:0] curr_bank_mem_rsp_data;
wire curr_bank_mem_rsp_ready;
@ -558,11 +556,9 @@ module VX_cache #(
// Memory response
if (NUM_BANKS == 1) begin
assign curr_bank_mem_rsp_valid = mrsq_out_valid;
assign curr_bank_mem_rsp_addr = `MEM_TAG_TO_LINE_ADDR(mem_rsp_tag_qual);
assign curr_bank_mem_rsp_valid = mrsq_out_valid;
end else begin
assign curr_bank_mem_rsp_valid = mrsq_out_valid && (`MEM_TAG_TO_BANK_ID(mem_rsp_tag_qual) == i);
assign curr_bank_mem_rsp_addr = `MEM_TAG_TO_LINE_ADDR(mem_rsp_tag_qual);
end
assign curr_bank_mem_rsp_id = `MEM_TAG_TO_REQ_ID(mem_rsp_tag_qual);
assign curr_bank_mem_rsp_data = mem_rsp_data_qual;
@ -633,7 +629,6 @@ module VX_cache #(
// Memory response
.mem_rsp_valid (curr_bank_mem_rsp_valid),
.mem_rsp_addr (curr_bank_mem_rsp_addr),
.mem_rsp_id (curr_bank_mem_rsp_id),
.mem_rsp_data (curr_bank_mem_rsp_data),
.mem_rsp_ready (curr_bank_mem_rsp_ready),
@ -668,7 +663,7 @@ module VX_cache #(
.core_rsp_ready (core_rsp_ready_nc)
);
wire [NUM_BANKS-1:0][(MEM_TAG_IN_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH))-1:0] data_in;
wire [NUM_BANKS-1:0][(`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH))-1:0] data_in;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_id[i], per_bank_mem_req_rw[i], per_bank_mem_req_pmask[i], per_bank_mem_req_byteen[i], per_bank_mem_req_wsel[i], per_bank_mem_req_data[i]};
end
@ -692,33 +687,42 @@ module VX_cache #(
.ready_out (mem_req_ready_nc)
);
assign mem_req_tag_nc = MEM_TAG_IN_WIDTH'({mem_req_addr_nc, mem_req_id});
if (NUM_BANKS == 1) begin
assign mem_req_tag_nc = MEM_TAG_IN_WIDTH'(mem_req_id);
end else begin
assign mem_req_tag_nc = MEM_TAG_IN_WIDTH'({`MEM_ADDR_TO_BANK_ID(mem_req_addr_nc), mem_req_id});
end
`ifdef PERF_ENABLE
// per cycle: core_reads, core_writes
reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle;
reg [($clog2(NUM_REQS+1)-1):0] perf_core_writes_per_cycle;
reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle;
wire [$clog2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
wire [$clog2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
wire [$clog2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
assign perf_core_reads_per_cycle = $countones(core_req_valid & core_req_ready & ~core_req_rw);
assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw);
wire [NUM_REQS-1:0] perf_core_reads_per_mask = core_req_valid & core_req_ready & ~core_req_rw;
wire [NUM_REQS-1:0] perf_core_writes_per_mask = core_req_valid & core_req_ready & core_req_rw;
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_mask);
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_mask);
if (CORE_TAG_ID_BITS != 0) begin
assign perf_crsp_stall_per_cycle = $countones(core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}});
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}};
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
end else begin
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready);
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_valid & ~core_rsp_ready;
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
end
// per cycle: read misses, write misses, msrq stalls, pipeline stalls
reg [($clog2(NUM_BANKS+1)-1):0] perf_read_miss_per_cycle;
reg [($clog2(NUM_BANKS+1)-1):0] perf_write_miss_per_cycle;
reg [($clog2(NUM_BANKS+1)-1):0] perf_mshr_stall_per_cycle;
reg [($clog2(NUM_BANKS+1)-1):0] perf_pipe_stall_per_cycle;
wire [$clog2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle;
wire [$clog2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle;
wire [$clog2(NUM_BANKS+1)-1:0] perf_mshr_stall_per_cycle;
wire [$clog2(NUM_BANKS+1)-1:0] perf_pipe_stall_per_cycle;
assign perf_read_miss_per_cycle = $countones(perf_read_miss_per_bank);
assign perf_write_miss_per_cycle = $countones(perf_write_miss_per_bank);
assign perf_mshr_stall_per_cycle = $countones(perf_mshr_stall_per_bank);
assign perf_pipe_stall_per_cycle = $countones(perf_pipe_stall_per_bank);
`POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank);
`POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank);
`POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank);
`POP_COUNT(perf_pipe_stall_per_cycle, perf_pipe_stall_per_bank);
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
reg [`PERF_CTR_BITS-1:0] perf_core_writes;

View file

@ -61,12 +61,12 @@
`define LINE_TO_MEM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)}
`define MEM_ADDR_TO_BANK_ID(x) x[0 +: `BANK_SELECT_BITS]
`define MEM_TAG_TO_REQ_ID(x) x[MSHR_ADDR_WIDTH-1:0]
`define MEM_TAG_TO_BANK_ID(x) x[MSHR_ADDR_WIDTH +: `BANK_SELECT_BITS]
`define MEM_TAG_TO_LINE_ADDR(x) x[(MSHR_ADDR_WIDTH+`BANK_SELECT_BITS) +: `LINE_ADDR_WIDTH]
`define LINE_TO_BYTE_ADDR(x, i) {x, (32-$bits(x))'(i << (32-$bits(x)-`BANK_SELECT_BITS))}
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}

View file

@ -291,12 +291,16 @@ module VX_core_req_bank_sel #(
end
reg [`PERF_CTR_BITS-1:0] bank_stalls_r;
wire [$clog2(NUM_REQS+1)-1:0] bank_stall_cnt;
wire [NUM_REQS-1:0] bank_stall_mask = core_req_sel_r & ~core_req_ready;
`POP_COUNT(bank_stall_cnt, bank_stall_mask);
always @(posedge clk) begin
if (reset) begin
bank_stalls_r <= 0;
end else begin
bank_stalls_r <= bank_stalls_r + `PERF_CTR_BITS'($countones(core_req_sel_r & ~core_req_ready));
bank_stalls_r <= bank_stalls_r + `PERF_CTR_BITS'(bank_stall_cnt);
end
end

View file

@ -16,7 +16,7 @@ module VX_data_access #(
// Enable cache writeable
parameter WRITE_ENABLE = 1,
localparam WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
) (
input wire clk,
input wire reset,

View file

@ -20,7 +20,7 @@ module VX_miss_resrv #(
// core request tag size
parameter CORE_TAG_WIDTH = 1,
localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE)
parameter MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE)
) (
input wire clk,
input wire reset,
@ -46,6 +46,7 @@ module VX_miss_resrv #(
// fill
input wire fill_valid,
input wire [MSHR_ADDR_WIDTH-1:0] fill_id,
output wire [`LINE_ADDR_WIDTH-1:0] fill_addr,
// lookup
input wire lookup_valid,
@ -161,8 +162,8 @@ module VX_miss_resrv #(
dequeue_id_r <= dequeue_id_n;
allocate_id_r <= allocate_id_n;
assert(!allocate_fire || !valid_table[allocate_id_r]);
assert(!release_valid || valid_table[release_id]);
`ASSERT(!allocate_fire || !valid_table[allocate_id_r], ("runtime error"));
`ASSERT(!release_valid || valid_table[release_id], ("runtime error"));
end
`RUNTIME_ASSERT((!allocate_fire || ~valid_table[allocate_id]), ("%t: *** cache%0d:%0d in-use allocation: addr=%0h, id=%0d", $time, CACHE_ID, BANK_ID,
@ -184,6 +185,8 @@ module VX_miss_resrv #(
.rdata (dequeue_data)
);
assign fill_addr = addr_table[fill_id];
assign allocate_ready = allocate_rdy_r;
assign allocate_id = allocate_id_r;
@ -206,8 +209,8 @@ module VX_miss_resrv #(
dpi_trace("%d: cache%0d:%0d mshr-allocate: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(allocate_addr, BANK_ID), allocate_id, deq_debug_wid, deq_debug_pc);
if (fill_valid)
dpi_trace("%d: cache%0d:%0d mshr-fill: addr=%0h, id=%0d\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id);
dpi_trace("%d: cache%0d:%0d mshr-fill: addr=%0h, id=%0d, addr=%0h\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id, `LINE_TO_BYTE_ADDR(fill_addr, BANK_ID));
if (dequeue_fire)
dpi_trace("%d: cache%0d:%0d mshr-dequeue: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_debug_wid, deq_debug_pc);

View file

@ -15,10 +15,10 @@ module VX_nc_bypass #(
parameter MEM_TAG_IN_WIDTH = 1,
parameter MEM_TAG_OUT_WIDTH = 1,
localparam CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
localparam MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
localparam CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1,
localparam MEM_SELECT_BITS = `UP(`CLOG2(MEM_DATA_SIZE / CORE_DATA_SIZE))
parameter CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
parameter MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
parameter CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1,
parameter MEM_SELECT_BITS = `UP(`CLOG2(MEM_DATA_SIZE / CORE_DATA_SIZE))
) (
input wire clk,
input wire reset,

View file

@ -31,7 +31,7 @@ module VX_shared_mem #(
// PERF
`ifdef PERF_ENABLE
VX_perf_cache_if perf_cache_if,
VX_perf_cache_if.master perf_cache_if,
`endif
// Core request
@ -337,16 +337,22 @@ module VX_shared_mem #(
`ifdef PERF_ENABLE
// per cycle: core_reads, core_writes
reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle, perf_core_writes_per_cycle;
reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle;
wire [$clog2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
wire [$clog2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
wire [$clog2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
assign perf_core_reads_per_cycle = $countones(core_req_valid & core_req_ready & ~core_req_rw);
assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw);
wire [NUM_REQS-1:0] perf_core_reads_per_mask = core_req_valid & core_req_ready & ~core_req_rw;
wire [NUM_REQS-1:0] perf_core_writes_per_mask = core_req_valid & core_req_ready & core_req_rw;
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_mask);
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_mask);
if (CORE_TAG_ID_BITS != 0) begin
assign perf_crsp_stall_per_cycle = $countones(core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}});
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}};
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
end else begin
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready);
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_valid & ~core_rsp_ready;
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
end
reg [`PERF_CTR_BITS-1:0] perf_core_reads;

View file

@ -22,6 +22,44 @@ interface VX_alu_req_if ();
wire wb;
wire ready;
modport master (
output valid,
output wid,
output tmask,
output PC,
output next_PC,
output op_type,
output op_mod,
output use_PC,
output use_imm,
output imm,
output tid,
output rs1_data,
output rs2_data,
output rd,
output wb,
input ready
);
modport slave (
input valid,
input wid,
input tmask,
input PC,
input next_PC,
input op_type,
input op_mod,
input use_PC,
input use_imm,
input imm,
input tid,
input rs1_data,
input rs2_data,
input rd,
input wb,
output ready
);
endinterface
`endif

View file

@ -10,6 +10,20 @@ interface VX_branch_ctl_if ();
wire taken;
wire [31:0] dest;
modport master (
output valid,
output wid,
output taken,
output dest
);
modport slave (
input valid,
input wid,
input taken,
input dest
);
endinterface
`endif

View file

@ -8,6 +8,16 @@ interface VX_cmt_to_csr_if ();
wire valid;
wire [$clog2(`NUM_THREADS+1)-1:0] commit_size;
modport master (
output valid,
output commit_size
);
modport slave (
input valid,
input commit_size
);
endinterface
`endif

View file

@ -13,7 +13,31 @@ interface VX_commit_if ();
wire [`NR_BITS-1:0] rd;
wire wb;
wire eop;
wire ready;
wire ready;
modport master (
output valid,
output wid,
output tmask,
output PC,
output data,
output rd,
output wb,
output eop,
input ready
);
modport slave (
input valid,
input wid,
input tmask,
input PC,
input data,
input rd,
input wb,
input eop,
output ready
);
endinterface

View file

@ -17,6 +17,36 @@ interface VX_csr_req_if ();
wire [`NR_BITS-1:0] rd;
wire wb;
wire ready;
modport master (
output valid,
output wid,
output tmask,
output PC,
output op_type,
output addr,
output rs1_data,
output use_imm,
output imm,
output rd,
output wb,
input ready
);
modport slave (
input valid,
input wid,
input tmask,
input PC,
input op_type,
input addr,
input rs1_data,
input use_imm,
input imm,
input rd,
input wb,
output ready
);
endinterface

View file

@ -17,6 +17,26 @@ interface VX_dcache_req_if #(
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] tag;
wire [NUM_REQS-1:0] ready;
modport master (
output valid,
output rw,
output byteen,
output addr,
output data,
output tag,
input ready
);
modport slave (
input valid,
input rw,
input byteen,
input addr,
input data,
input tag,
output ready
);
endinterface
`endif

View file

@ -15,6 +15,22 @@ interface VX_dcache_rsp_if #(
wire [TAG_WIDTH-1:0] tag;
wire ready;
modport master (
output valid,
output tmask,
output data,
output tag,
input ready
);
modport slave (
input valid,
input tmask,
input data,
input tag,
output ready
);
endinterface
`endif

View file

@ -22,6 +22,44 @@ interface VX_decode_if ();
wire [`NR_BITS-1:0] rs3;
wire ready;
modport master (
output valid,
output wid,
output tmask,
output PC,
output ex_type,
output op_type,
output op_mod,
output wb,
output use_PC,
output use_imm,
output imm,
output rd,
output rs1,
output rs2,
output rs3,
input ready
);
modport slave (
input valid,
input wid,
input tmask,
input PC,
input ex_type,
input op_type,
input op_mod,
input wb,
input use_PC,
input use_imm,
input imm,
input rd,
input rs1,
input rs2,
input rs3,
output ready
);
endinterface
`endif

View file

@ -7,6 +7,14 @@ interface VX_fetch_to_csr_if ();
wire [`NUM_WARPS-1:0][`NUM_THREADS-1:0] thread_masks;
modport master (
output thread_masks
);
modport slave (
input thread_masks
);
endinterface
`endif

View file

@ -18,6 +18,36 @@ interface VX_fpu_req_if ();
wire wb;
wire ready;
modport master (
output valid,
output wid,
output tmask,
output PC,
output op_type,
output op_mod,
output rs1_data,
output rs2_data,
output rs3_data,
output rd,
output wb,
input ready
);
modport slave (
input valid,
input wid,
input tmask,
input PC,
input op_type,
input op_mod,
input rs1_data,
input rs2_data,
input rs3_data,
input rd,
input wb,
output ready
);
endinterface
`endif

View file

@ -12,6 +12,22 @@ interface VX_fpu_to_csr_if ();
wire [`NW_BITS-1:0] read_wid;
wire [`INST_FRM_BITS-1:0] read_frm;
modport master (
output write_enable,
output write_wid,
output write_fflags,
output read_wid,
input read_frm
);
modport slave (
input write_enable,
input write_wid,
input write_fflags,
input read_wid,
output read_frm
);
endinterface
`endif

View file

@ -8,7 +8,21 @@ interface VX_gpr_req_if ();
wire [`NW_BITS-1:0] wid;
wire [`NR_BITS-1:0] rs1;
wire [`NR_BITS-1:0] rs2;
wire [`NR_BITS-1:0] rs3;
wire [`NR_BITS-1:0] rs3;
modport master (
output wid,
output rs1,
output rs2,
output rs3
);
modport slave (
input wid,
input rs1,
input rs2,
input rs3
);
endinterface

View file

@ -9,6 +9,18 @@ interface VX_gpr_rsp_if ();
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs3_data;
modport master (
output rs1_data,
output rs2_data,
output rs3_data
);
modport slave (
input rs1_data,
input rs2_data,
input rs3_data
);
endinterface
`endif

View file

@ -20,6 +20,36 @@ interface VX_gpu_req_if();
wire ready;
modport master (
output valid,
output wid,
output tmask,
output PC,
output next_PC,
output op_type,
output tid,
output rs1_data,
output rs2_data,
output rd,
output wb,
input ready
);
modport slave (
input valid,
input wid,
input tmask,
input PC,
input next_PC,
input op_type,
input tid,
input rs1_data,
input rs2_data,
input rd,
input wb,
output ready
);
endinterface
`endif

View file

@ -20,14 +20,62 @@ interface VX_ibuffer_if ();
wire [`NR_BITS-1:0] rs1;
wire [`NR_BITS-1:0] rs2;
wire [`NR_BITS-1:0] rs3;
wire ready;
// scoreboard forwarding
wire [`NR_BITS-1:0] rd_n;
wire [`NR_BITS-1:0] rs1_n;
wire [`NR_BITS-1:0] rs2_n;
wire [`NR_BITS-1:0] rs3_n;
wire [`NW_BITS-1:0] wid_n;
wire ready;
modport master (
output valid,
output wid,
output tmask,
output PC,
output ex_type,
output op_type,
output op_mod,
output wb,
output use_PC,
output use_imm,
output imm,
output rd,
output rs1,
output rs2,
output rs3,
output rd_n,
output rs1_n,
output rs2_n,
output rs3_n,
output wid_n,
input ready
);
modport slave (
input valid,
input wid,
input tmask,
input PC,
input ex_type,
input op_type,
input op_mod,
input wb,
input use_PC,
input use_imm,
input imm,
input rd,
input rs1,
input rs2,
input rs3,
input rd_n,
input rs1_n,
input rs2_n,
input rs3_n,
input wid_n,
output ready
);
endinterface

View file

@ -13,6 +13,20 @@ interface VX_icache_req_if #(
wire [TAG_WIDTH-1:0] tag;
wire ready;
modport master (
output valid,
output addr,
output tag,
input ready
);
modport slave (
input valid,
input addr,
input tag,
output ready
);
endinterface
`endif

View file

@ -11,7 +11,21 @@ interface VX_icache_rsp_if #(
wire valid;
wire [`WORD_WIDTH-1:0] data;
wire [TAG_WIDTH-1:0] tag;
wire ready;
wire ready;
modport master (
output valid,
output data,
output tag,
input ready
);
modport slave (
input valid,
input data,
input tag,
output ready
);
endinterface

View file

@ -11,6 +11,22 @@ interface VX_ifetch_req_if ();
wire [31:0] PC;
wire ready;
modport master (
output valid,
output tmask,
output wid,
output PC,
input ready
);
modport slave (
input valid,
input tmask,
input wid,
input PC,
output ready
);
endinterface
`endif

View file

@ -12,6 +12,24 @@ interface VX_ifetch_rsp_if ();
wire [31:0] data;
wire ready;
modport master (
output valid,
output tmask,
output wid,
output PC,
output data,
input ready
);
modport slave (
input valid,
input tmask,
input wid,
input PC,
input data,
output ready
);
endinterface
`endif

View file

@ -8,6 +8,16 @@ interface VX_join_if ();
wire valid;
wire [`NW_BITS-1:0] wid;
modport master (
output valid,
output wid
);
modport slave (
input valid,
input wid
);
endinterface
`endif

View file

@ -18,6 +18,36 @@ interface VX_lsu_req_if ();
wire wb;
wire ready;
modport master (
output valid,
output wid,
output tmask,
output PC,
output op_type,
output is_fence,
output store_data,
output base_addr,
output offset,
output rd,
output wb,
input ready
);
modport slave (
input valid,
input wid,
input tmask,
input PC,
input op_type,
input is_fence,
input store_data,
input base_addr,
input offset,
input rd,
input wb,
output ready
);
endinterface
`endif

View file

@ -18,6 +18,26 @@ interface VX_mem_req_if #(
wire [TAG_WIDTH-1:0] tag;
wire ready;
modport master (
output valid,
output rw,
output byteen,
output addr,
output data,
output tag,
input ready
);
modport slave (
input valid,
input rw,
input byteen,
input addr,
input data,
input tag,
output ready
);
endinterface
`endif

View file

@ -11,7 +11,21 @@ interface VX_mem_rsp_if #(
wire valid;
wire [DATA_WIDTH-1:0] data;
wire [TAG_WIDTH-1:0] tag;
wire ready;
wire ready;
modport master (
output valid,
output data,
output tag,
input ready
);
modport slave (
input valid,
input data,
input tag,
output ready
);
endinterface

View file

@ -14,6 +14,28 @@ interface VX_perf_cache_if ();
wire [`PERF_CTR_BITS-1:0] pipe_stalls;
wire [`PERF_CTR_BITS-1:0] crsp_stalls;
modport master (
output reads,
output writes,
output read_misses,
output write_misses,
output bank_stalls,
output mshr_stalls,
output pipe_stalls,
output crsp_stalls
);
modport slave (
input reads,
input writes,
input read_misses,
input write_misses,
input bank_stalls,
input mshr_stalls,
input pipe_stalls,
input crsp_stalls
);
endinterface
`endif

View file

@ -28,6 +28,50 @@ interface VX_perf_memsys_if ();
wire [`PERF_CTR_BITS-1:0] mem_stalls;
wire [`PERF_CTR_BITS-1:0] mem_latency;
modport master (
output icache_reads,
output icache_read_misses,
output icache_pipe_stalls,
output icache_crsp_stalls,
output dcache_reads,
output dcache_writes,
output dcache_read_misses,
output dcache_write_misses,
output dcache_bank_stalls,
output dcache_mshr_stalls,
output dcache_pipe_stalls,
output dcache_crsp_stalls,
output smem_reads,
output smem_writes,
output smem_bank_stalls,
output mem_reads,
output mem_writes,
output mem_stalls,
output mem_latency
);
modport slave (
input icache_reads,
input icache_read_misses,
input icache_pipe_stalls,
input icache_crsp_stalls,
input dcache_reads,
input dcache_writes,
input dcache_read_misses,
input dcache_write_misses,
input dcache_bank_stalls,
input dcache_mshr_stalls,
input dcache_pipe_stalls,
input dcache_crsp_stalls,
input smem_reads,
input smem_writes,
input smem_bank_stalls,
input mem_reads,
input mem_writes,
input mem_stalls,
input mem_latency
);
endinterface
`endif

View file

@ -4,15 +4,41 @@
`include "VX_define.vh"
interface VX_perf_pipeline_if ();
wire [`PERF_CTR_BITS-1:0] ibf_stalls;
wire [`PERF_CTR_BITS-1:0] scb_stalls;
wire [`PERF_CTR_BITS-1:0] lsu_stalls;
wire [`PERF_CTR_BITS-1:0] csr_stalls;
wire [`PERF_CTR_BITS-1:0] alu_stalls;
wire [`PERF_CTR_BITS-1:0] gpu_stalls;
`ifdef EXT_F_ENABLE
wire [`PERF_CTR_BITS-1:0] fpu_stalls;
`endif
wire [`PERF_CTR_BITS-1:0] gpu_stalls;
modport master (
output ibf_stalls,
output scb_stalls,
output lsu_stalls,
output csr_stalls,
output alu_stalls,
`ifdef EXT_F_ENABLE
output fpu_stalls,
`endif
output gpu_stalls
);
modport slave (
input ibf_stalls,
input scb_stalls,
input lsu_stalls,
input csr_stalls,
input alu_stalls,
`ifdef EXT_F_ENABLE
input fpu_stalls,
`endif
input gpu_stalls
);
endinterface
`endif

View file

@ -12,6 +12,24 @@ interface VX_warp_ctl_if ();
gpu_barrier_t barrier;
gpu_split_t split;
modport master (
output valid,
output wid,
output tmc,
output wspawn,
output barrier,
output split
);
modport slave (
input valid,
input wid,
input tmc,
input wspawn,
input barrier,
input split
);
endinterface
`endif

View file

@ -6,16 +6,36 @@
interface VX_writeback_if ();
wire valid;
wire [`NUM_THREADS-1:0] tmask;
wire [`NW_BITS-1:0] wid;
wire [31:0] PC;
wire [`NR_BITS-1:0] rd;
wire [`NUM_THREADS-1:0][31:0] data;
wire eop;
wire eop;
wire ready;
modport master (
output valid,
output tmask,
output wid,
output PC,
output rd,
output data,
output eop,
input ready
);
modport slave (
input valid,
input tmask,
input wid,
input PC,
input rd,
input data,
input eop,
output ready
);
endinterface
`endif

View file

@ -9,6 +9,18 @@ interface VX_wstall_if();
wire [`NW_BITS-1:0] wid;
wire stalled;
modport master (
output valid,
output wid,
output stalled
);
modport slave (
input valid,
input wid,
input stalled
);
endinterface
`endif

View file

@ -1,15 +1,15 @@
`include "VX_define.vh"
module VX_axi_adapter #(
parameter VX_DATA_WIDTH = 512,
parameter VX_ADDR_WIDTH = (32 - $clog2(VX_DATA_WIDTH/8)),
parameter VX_TAG_WIDTH = 8,
parameter AXI_DATA_WIDTH = VX_DATA_WIDTH,
parameter AXI_ADDR_WIDTH = 32,
parameter AXI_TID_WIDTH = VX_TAG_WIDTH,
parameter VX_DATA_WIDTH = 512,
parameter VX_ADDR_WIDTH = (32 - $clog2(VX_DATA_WIDTH/8)),
parameter VX_TAG_WIDTH = 8,
parameter AXI_DATA_WIDTH = VX_DATA_WIDTH,
parameter AXI_ADDR_WIDTH = 32,
parameter AXI_TID_WIDTH = VX_TAG_WIDTH,
localparam VX_BYTEEN_WIDTH = (VX_DATA_WIDTH / 8),
localparam AXI_STROBE_WIDTH = (AXI_DATA_WIDTH / 8)
parameter VX_BYTEEN_WIDTH = (VX_DATA_WIDTH / 8),
parameter AXI_STROBE_WIDTH = (AXI_DATA_WIDTH / 8)
) (
input wire clk,
input wire reset,
@ -29,8 +29,7 @@ module VX_axi_adapter #(
output wire [VX_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_req_ready,
// AXI write address channel
output wire m_axi_awvalid,
// AXI write request address channel
output wire [AXI_TID_WIDTH-1:0] m_axi_awid,
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr,
output wire [7:0] m_axi_awlen,
@ -39,18 +38,24 @@ module VX_axi_adapter #(
output wire m_axi_awlock,
output wire [3:0] m_axi_awcache,
output wire [2:0] m_axi_awprot,
output wire [3:0] m_axi_awqos,
output wire [3:0] m_axi_awqos,
output wire m_axi_awvalid,
input wire m_axi_awready,
// AXI write data channel
output wire m_axi_wvalid,
// AXI write request data channel
output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata,
output wire [AXI_STROBE_WIDTH-1:0] m_axi_wstrb,
output wire m_axi_wlast,
output wire m_axi_wlast,
output wire m_axi_wvalid,
input wire m_axi_wready,
// AXI write response channel
input wire [AXI_TID_WIDTH-1:0] m_axi_bid,
input wire [1:0] m_axi_bresp,
input wire m_axi_bvalid,
output wire m_axi_bready,
// AXI read address channel
output wire m_axi_arvalid,
output wire [AXI_TID_WIDTH-1:0] m_axi_arid,
output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr,
output wire [7:0] m_axi_arlen,
@ -60,12 +65,15 @@ module VX_axi_adapter #(
output wire [3:0] m_axi_arcache,
output wire [2:0] m_axi_arprot,
output wire [3:0] m_axi_arqos,
output wire m_axi_arvalid,
input wire m_axi_arready,
// AXI read data channel
input wire m_axi_rvalid,
// AXI read response channel
input wire [AXI_TID_WIDTH-1:0] m_axi_rid,
input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata,
input wire [1:0] m_axi_rresp,
input wire m_axi_rlast,
input wire m_axi_rvalid,
output wire m_axi_rready
);
localparam AXSIZE = $clog2(VX_DATA_WIDTH/8);
@ -73,6 +81,8 @@ module VX_axi_adapter #(
`STATIC_ASSERT((AXI_DATA_WIDTH == VX_DATA_WIDTH), ("invalid parameter"))
`STATIC_ASSERT((AXI_TID_WIDTH == VX_TAG_WIDTH), ("invalid parameter"))
//`UNUSED_VAR ()
reg awvalid_ack;
reg wvalid_ack;
@ -95,7 +105,7 @@ module VX_axi_adapter #(
wire axi_write_ready = (m_axi_awready || awvalid_ack) && (m_axi_wready || wvalid_ack);
// AXI write address channel
// AXI write request address channel
assign m_axi_awvalid = mem_req_valid && mem_req_rw && !awvalid_ack;
assign m_axi_awid = mem_req_tag;
assign m_axi_awaddr = AXI_ADDR_WIDTH'(mem_req_addr) << AXSIZE;
@ -107,13 +117,18 @@ module VX_axi_adapter #(
assign m_axi_awprot = 3'b0;
assign m_axi_awqos = 4'b0;
// AXI write data channel
// AXI write request data channel
assign m_axi_wvalid = mem_req_valid && mem_req_rw && !wvalid_ack;
assign m_axi_wdata = mem_req_data;
assign m_axi_wstrb = mem_req_byteen;
assign m_axi_wlast = 1'b1;
// AXI read address channel
// AXI write response channel
`UNUSED_VAR (m_axi_bid);
`RUNTIME_ASSERT(~m_axi_bvalid || m_axi_bresp == 0, ("AXI response error"));
assign m_axi_bready = 1'b1;
// AXI read request channel
assign m_axi_arvalid = mem_req_valid && !mem_req_rw;
assign m_axi_arid = mem_req_tag;
assign m_axi_araddr = AXI_ADDR_WIDTH'(mem_req_addr) << AXSIZE;
@ -125,10 +140,12 @@ module VX_axi_adapter #(
assign m_axi_arprot = 3'b0;
assign m_axi_arqos = 4'b0;
// AXI read data channel
// AXI read response channel
assign mem_rsp_valid = m_axi_rvalid;
assign mem_rsp_tag = m_axi_rid;
assign mem_rsp_data = m_axi_rdata;
`RUNTIME_ASSERT(~m_axi_rvalid || m_axi_rresp == 0, ("AXI response error"));
`UNUSED_VAR (m_axi_rlast);
assign m_axi_rready = mem_rsp_ready;
// Vortex request ack

View file

@ -31,7 +31,7 @@ module VX_bypass_buffer #(
buffer_valid <= 0;
end
if (valid_in && ~ready_out) begin
assert(!buffer_valid);
`ASSERT(!buffer_valid, "runtime error");
buffer_valid <= 1;
end
end

View file

@ -28,7 +28,9 @@ module VX_dp_ram #(
if (INIT_FILE != "") begin \
initial $readmemh(INIT_FILE, ram); \
end else begin \
initial ram = '{default: INIT_VALUE}; \
initial \
for (integer i = 0; i < SIZE; ++i)\
ram[i] = INIT_VALUE; \
end \
end

View file

@ -35,8 +35,8 @@ module VX_fifo_queue #(
head_r <= 0;
size_r <= 0;
end else begin
assert(!push || !full);
assert(!pop || !empty);
`ASSERT(!push || !full, ("runtime error"));
`ASSERT(!pop || !empty, ("runtime error"));
if (push) begin
if (!pop) begin
size_r <= 1;
@ -71,8 +71,8 @@ module VX_fifo_queue #(
alm_full_r <= 0;
used_r <= 0;
end else begin
assert(!push || !full);
assert(!pop || !empty);
`ASSERT(!push || !full, ("runtime error"));
`ASSERT(!pop || !empty, ("runtime error"));
if (push) begin
if (!pop) begin
empty_r <= 0;

View file

@ -5,7 +5,7 @@ module VX_find_first #(
parameter N = 1,
parameter DATAW = 1,
parameter REVERSE = 0,
localparam LOGN = $clog2(N)
parameter LOGN = $clog2(N)
) (
input wire [N-1:0][DATAW-1:0] data_i,
input wire [N-1:0] valid_i,

View file

@ -55,10 +55,10 @@ module VX_index_buffer #(
full_r <= 1'b0;
end else begin
if (release_slot) begin
assert(0 == free_slots[release_addr]) else $error("%t: releasing invalid slot at port %d", $time, release_addr);
`ASSERT(0 == free_slots[release_addr], ("%t: releasing invalid slot at port %d", $time, release_addr));
end
if (acquire_slot) begin
assert(1 == free_slots[write_addr]) else $error("%t: acquiring used slot at port %d", $time, write_addr);
`ASSERT(1 == free_slots[write_addr], ("%t: acquiring used slot at port %d", $time, write_addr));
end
write_addr_r <= free_index;
free_slots <= free_slots_n;

View file

@ -32,10 +32,8 @@ module VX_index_queue #(
assign enqueue = push;
assign dequeue = !empty && !valid[rd_a]; // auto-remove when head is invalid
always @(*) begin
assert(!push || !full);
end
`RUNTIME_ASSERT(!push || !full, ("invalid inputs"));
always @(posedge clk) begin
if (reset) begin
rd_ptr <= 0;

View file

@ -2,9 +2,9 @@
`TRACING_OFF
module VX_lzc #(
parameter N = 2,
parameter MODE = 0, // 0 -> trailing zero, 1 -> leading zero
localparam LOGN = $clog2(N)
parameter N = 2,
parameter MODE = 0, // 0 -> trailing zero, 1 -> leading zero
parameter LOGN = $clog2(N)
) (
input wire [N-1:0] in_i,
output wire [LOGN-1:0] cnt_o,

View file

@ -25,7 +25,7 @@ module VX_pending_size #(
empty_r <= 1;
full_r <= 0;
end else begin
assert(!incr || !full);
`ASSERT(!incr || !full, ("runtime error"));
if (incr) begin
if (!decr) begin
empty_r <= 0;

View file

@ -30,9 +30,7 @@ module VX_skid_buffer #(
end else if (NOBACKPRESSURE) begin
always @(posedge clk) begin
assert(ready_out) else $error("ready_out should always be asserted");
end
`RUNTIME_ASSERT(ready_out, ("ready_out should always be asserted"))
wire stall = valid_out && ~ready_out;

View file

@ -27,7 +27,9 @@ module VX_sp_ram #(
if (INIT_FILE != "") begin \
initial $readmemh(INIT_FILE, ram); \
end else begin \
initial ram = '{default: INIT_VALUE}; \
initial \
for (integer i = 0; i < SIZE; ++i)\
ram[i] = INIT_VALUE; \
end \
end

View file

@ -5,7 +5,7 @@ module VX_stream_demux #(
parameter LANES = 1,
parameter DATAW = 1,
parameter BUFFERED = 0,
localparam LOG_NUM_REQS = `LOG2UP(NUM_REQS)
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
) (
input wire clk,
input wire reset,

View file

@ -35,8 +35,8 @@ TOP = Vortex
RTL_DIR=../rtl
DPI_DIR=../dpi
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
RTL_INCLUDE = -I$(RTL_DIR)/ -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE)
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE)
SRCS = simulator.cpp main.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp

View file

@ -113,7 +113,8 @@ void Simulator::reset() {
mem_rsp_vec_[b].clear();
}
last_mem_rsp_bank_ = 0;
mem_rsp_active_ = false;
mem_rd_rsp_active_ = false;
mem_wr_rsp_active_ = false;
#ifdef AXI_BUS
this->reset_axi_bus();
@ -182,9 +183,11 @@ void Simulator::reset_axi_bus() {
void Simulator::eval_axi_bus(bool clk) {
if (!clk) {
mem_rsp_ready_ = vortex_->m_axi_rready;
mem_rd_rsp_ready_ = vortex_->m_axi_rready;
mem_wr_rsp_ready_ = vortex_->m_axi_bready;
return;
}
if (ram_ == nullptr) {
vortex_->m_axi_wready = 0;
vortex_->m_axi_awready = 0;
@ -200,44 +203,71 @@ void Simulator::eval_axi_bus(bool clk) {
}
}
bool has_response = false;
bool has_rd_response = false;
bool has_wr_response = false;
// schedule memory responses that are ready
for (int i = 0; i < MEMORY_BANKS; ++i) {
uint32_t b = (i + last_mem_rsp_bank_ + 1) % MEMORY_BANKS;
if (!mem_rsp_vec_[b].empty()
&& (mem_rsp_vec_[b].begin()->cycles_left) <= 0) {
has_response = true;
last_mem_rsp_bank_ = b;
break;
if (!mem_rsp_vec_[b].empty()) {
auto mem_rsp_it = mem_rsp_vec_[b].begin();
if (mem_rsp_it->cycles_left <= 0) {
has_rd_response = !mem_rsp_it->write;
has_wr_response = mem_rsp_it->write;
last_mem_rsp_bank_ = b;
break;
}
}
}
// send memory response
if (mem_rsp_active_
&& vortex_->m_axi_rvalid && mem_rsp_ready_) {
mem_rsp_active_ = false;
// send memory read response
if (mem_rd_rsp_active_
&& vortex_->m_axi_rvalid && mem_rd_rsp_ready_) {
mem_rd_rsp_active_ = false;
}
if (!mem_rsp_active_) {
if (has_response) {
vortex_->m_axi_rvalid = 1;
std::list<mem_req_t>::iterator mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
if (!mem_rd_rsp_active_) {
if (has_rd_response) {
auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
/*
printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr);
printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%02x", mem_rsp_it->block[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");
*/
*/
vortex_->m_axi_rvalid = 1;
vortex_->m_axi_rid = mem_rsp_it->tag;
vortex_->m_axi_rresp = 0;
vortex_->m_axi_rlast = 1;
memcpy((uint8_t*)vortex_->m_axi_rdata, mem_rsp_it->block.data(), MEM_BLOCK_SIZE);
vortex_->m_axi_rid = mem_rsp_it->tag;
mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it);
mem_rsp_active_ = true;
mem_rd_rsp_active_ = true;
} else {
vortex_->m_axi_rvalid = 0;
}
}
// send memory write response
if (mem_wr_rsp_active_
&& vortex_->m_axi_bvalid && mem_wr_rsp_ready_) {
mem_wr_rsp_active_ = false;
}
if (!mem_wr_rsp_active_) {
if (has_wr_response) {
auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
/*
printf("%0ld: [sim] MEM Wr Rsp: bank=%d, addr=%0lx\n", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr);
*/
vortex_->m_axi_bvalid = 1;
vortex_->m_axi_bid = mem_rsp_it->tag;
vortex_->m_axi_bresp = 0;
mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it);
mem_wr_rsp_active_ = true;
} else {
vortex_->m_axi_bvalid = 0;
}
}
// select the memory bank
uint32_t req_addr = vortex_->m_axi_wvalid ? vortex_->m_axi_awaddr : vortex_->m_axi_araddr;
uint32_t req_bank = (MEMORY_BANKS >= 2) ? ((req_addr / MEM_BLOCK_SIZE) % MEMORY_BANKS) : 0;
@ -260,6 +290,8 @@ void Simulator::eval_axi_bus(bool clk) {
uint64_t byteen = vortex_->m_axi_wstrb;
unsigned base_addr = vortex_->m_axi_awaddr;
uint8_t* data = (uint8_t*)(vortex_->m_axi_wdata);
// detect stdout write
if (base_addr >= IO_COUT_ADDR
&& base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
@ -286,13 +318,20 @@ void Simulator::eval_axi_bus(bool clk) {
(*ram_)[base_addr + i] = data[i];
}
}
}
mem_req_t mem_req;
mem_req.tag = vortex_->m_axi_arid;
mem_req.addr = vortex_->m_axi_araddr;
mem_req.cycles_left = 0;
mem_req.write = 1;
mem_rsp_vec_[req_bank].emplace_back(mem_req);
}
} else {
mem_req_t mem_req;
mem_req.tag = vortex_->m_axi_arid;
mem_req.addr = vortex_->m_axi_araddr;
ram_->read(vortex_->m_axi_araddr, MEM_BLOCK_SIZE, mem_req.block.data());
mem_req.cycles_left = MEM_LATENCY;
mem_req.write = 0;
for (auto& rsp : mem_rsp_vec_[req_bank]) {
if (mem_req.addr == rsp.addr) {
// duplicate requests receive the same cycle delay
@ -319,7 +358,7 @@ void Simulator::reset_mem_bus() {
void Simulator::eval_mem_bus(bool clk) {
if (!clk) {
mem_rsp_ready_ = vortex_->mem_rsp_ready;
mem_rd_rsp_ready_ = vortex_->mem_rsp_ready;
return;
}
@ -350,14 +389,14 @@ void Simulator::eval_mem_bus(bool clk) {
}
// send memory response
if (mem_rsp_active_
&& vortex_->mem_rsp_valid && mem_rsp_ready_) {
mem_rsp_active_ = false;
if (mem_rd_rsp_active_
&& vortex_->mem_rsp_valid && mem_rd_rsp_ready_) {
mem_rd_rsp_active_ = false;
}
if (!mem_rsp_active_) {
if (!mem_rd_rsp_active_) {
if (has_response) {
vortex_->mem_rsp_valid = 1;
std::list<mem_req_t>::iterator mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
/*
printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
@ -368,7 +407,7 @@ void Simulator::eval_mem_bus(bool clk) {
memcpy((uint8_t*)vortex_->mem_rsp_data, mem_rsp_it->block.data(), MEM_BLOCK_SIZE);
vortex_->mem_rsp_tag = mem_rsp_it->tag;
mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it);
mem_rsp_active_ = true;
mem_rd_rsp_active_ = true;
} else {
vortex_->mem_rsp_valid = 0;
}

View file

@ -54,11 +54,12 @@ public:
private:
typedef struct {
typedef struct {
int cycles_left;
std::array<uint8_t, MEM_BLOCK_SIZE> block;
uint64_t addr;
uint64_t tag;
bool write;
} mem_req_t;
std::unordered_map<int, std::stringstream> print_bufs_;
@ -80,9 +81,11 @@ private:
std::list<mem_req_t> mem_rsp_vec_ [MEMORY_BANKS];
uint32_t last_mem_rsp_bank_;
bool mem_rsp_active_;
bool mem_rd_rsp_active_;
bool mem_rd_rsp_ready_;
bool mem_rsp_ready_;
bool mem_wr_rsp_active_;
bool mem_wr_rsp_ready_;
RAM *ram_;

View file

@ -26,11 +26,11 @@ DBG_FLAGS += -DDBG_CACHE_REQ_INFO
CONFIG1 := -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
CONFIG2 := -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
CONFIG4 := -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=0 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL2CACHE_SIZE=131072 $(CONFIGS)
CONFIG8 := -DNUM_CLUSTERS=1 -DNUM_CORES=8 -DL2_ENABLE=1 -DL3_ENABLE=0 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL2CACHE_SIZE=131072 $(CONFIGS)
CONFIG16 := -DNUM_CLUSTERS=4 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3CACHE_SIZE=262144 $(CONFIGS)
CONFIG32 := -DNUM_CLUSTERS=4 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3CACHE_SIZE=262144 $(CONFIGS)
CONFIG64 := -DNUM_CLUSTERS=8 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3CACHE_SIZE=524288 $(CONFIGS)
CONFIG4 := -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=0 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL2_CACHE_SIZE=131072 $(CONFIGS)
CONFIG8 := -DNUM_CLUSTERS=1 -DNUM_CORES=8 -DL2_ENABLE=1 -DL3_ENABLE=0 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL2_CACHE_SIZE=131072 $(CONFIGS)
CONFIG16 := -DNUM_CLUSTERS=4 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3_CACHE_SIZE=262144 $(CONFIGS)
CONFIG32 := -DNUM_CLUSTERS=4 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3_CACHE_SIZE=262144 $(CONFIGS)
CONFIG64 := -DNUM_CLUSTERS=8 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3_CACHE_SIZE=524288 $(CONFIGS)
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/altera/$(DEVICE_FAMILY)
RTL_INCLUDE = -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/afu

View file

@ -58,7 +58,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=4" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3CACHE_SIZE=262144"
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=4" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3_CACHE_SIZE=262144"
syn.chg:
$(STAMP) syn.chg

View file

@ -58,7 +58,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=4" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3CACHE_SIZE=262144"
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=4" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3_CACHE_SIZE=262144"
syn.chg:
$(STAMP) syn.chg

View file

@ -58,7 +58,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=1" -set "L2_ENABLE=1" -set "L3_ENABLE=0" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L2CACHE_SIZE=65536"
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=1" -set "L2_ENABLE=1" -set "L3_ENABLE=0" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L2_CACHE_SIZE=65536"
syn.chg:
$(STAMP) syn.chg

View file

@ -58,7 +58,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=8" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3CACHE_SIZE=524288"
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=8" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3_CACHE_SIZE=524288"
syn.chg:
$(STAMP) syn.chg

View file

@ -58,7 +58,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=1" -set "L2_ENABLE=1" -set "L3_ENABLE=0" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L2CACHE_SIZE=131072"
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=1" -set "L2_ENABLE=1" -set "L3_ENABLE=0" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L2_CACHE_SIZE=131072"
syn.chg:
$(STAMP) syn.chg

View file

@ -3,15 +3,18 @@ TOP_LEVEL_ENTITY = Vortex
SRC_FILE = Vortex.v
RTL_DIR = ../../rtl
DEFINES = -DNDEBUG -DSYNTHESIS -DEXT_F_DISABLE -DNUM_CORES=1 -DNUM_THREADS=2 -DNUM_WARPS=2
DEFINES = -DNDEBUG -DSYNTHESIS -DEXT_F_DISABLE -DNUM_CORES=1 -DNUM_THREADS=2 -DNUM_WARPS=2 -DMEM_BLOCK_SIZE=64
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache
# Build targets
all: build
build:
./synth.sh -t$(TOP_LEVEL_ENTITY) -s$(SRC_FILE) $(DEFINES) $(RTL_INCLUDE)
output.v:
./sv2v.sh $(DEFINES) $(RTL_INCLUDE) -ooutput.v
build: output.v
./synth.sh -t$(TOP_LEVEL_ENTITY) -soutput.v
clean:
rm -rf sources.v *.ys *.log
rm -rf output.v *.ys *.log

View file

@ -1,5 +0,0 @@
# load design
read_verilog -sv -I../../rtl -I../../rtl/libs -I../../rtl/interfaces -I../../rtl/pipe_regs -I../../rtl/cache ../../rtl/Vortex.v
# dump diagram
show

57
hw/syn/yosys/sv2v.sh Executable file
View file

@ -0,0 +1,57 @@
#!/bin/bash
# this script uses sv2v and yosys tools to run.
# sv2v: https://github.com/zachjs/sv2v
# yosys: http://www.clifford.at/yosys/
# exit when any command fails
set -e
source=""
includes=()
macro_args=""
output_file=out.v
usage() { echo "$0 usage:" && grep " .)\ #" $0; exit 0; }
[ $# -eq 0 ] && usage
while getopts "o:I:D:h" arg; do
case $arg in
s) # source
source=${OPTARG}
;;
o) # output-file
output_file=${OPTARG}
;;
I) # include directory
includes+=(${OPTARG})
;;
D) # macro definition
macro_args="$macro_args -D${OPTARG}"
;;
h | *)
usage
exit 0
;;
esac
done
# process include paths
inc_args=""
for dir in "${includes[@]}"
do
inc_args="$inc_args -I$dir"
done
# process source files
file_args=$source
for dir in "${includes[@]}"
do
for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f)
do
echo "file: $file"
file_args="$file_args $file"
done
done
# system-verilog to verilog conversion
sv2v $macro_args $inc_args $file_args -v -w $output_file

View file

@ -10,11 +10,12 @@ set -e
source=""
top_level=""
dir_list=()
defines=""
inc_args=""
macro_args=""
usage() { echo "$0 usage:" && grep " .)\ #" $0; exit 0; }
[ $# -eq 0 ] && usage
while getopts "hs:t:I:D:" arg; do
while getopts "s:t:I:D:h" arg; do
case $arg in
s) # source
source=${OPTARG}
@ -24,9 +25,10 @@ while getopts "hs:t:I:D:" arg; do
;;
I) # include directory
dir_list+=(${OPTARG})
inc_args="$inc_args -I${OPTARG}"
;;
D) # macro definition
defines="$defines -D${OPTARG}"
macro_args="$macro_args -D${OPTARG}"
;;
h | *)
usage
@ -35,41 +37,29 @@ while getopts "hs:t:I:D:" arg; do
esac
done
echo "top_level=$top_level, source=$source, defines=$defines"
# process include paths
inc_list=""
for dir in "${dir_list[@]}"
do
echo "include: $dir" >> synth.log
inc_list="$inc_list -I$dir"
done
# process source files
file_list=""
for dir in "${dir_list[@]}"
do
for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f)
{
# read design sources
for dir in "${dir_list[@]}"
do
echo "file: $file" >> synth.log
file_list="$file_list $file"
for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f)
do
echo "read_verilog $macro_args $inc_args -sv $file"
done
done
done
if [ -n "$source" ]; then
echo "read_verilog $macro_args $inc_args -sv $source"
fi
# system-verilog to verilog conversion
sv2v $defines -w output.v $inc_list $file_list
# generic synthesis
echo "synth -top $top_level"
{
echo "read_verilog -sv output.v"
echo "hierarchy -check -top $top_level"
# mapping to mycells.lib
echo "dfflibmap -liberty mycells.lib"
echo "abc -liberty mycells.lib"
echo "clean"
# insertation of global reset
echo "add -global_input reset 1"
echo "proc -global_arst reset"
echo "synth -run coarse; opt -fine"
echo "tee -o brams.log memory_bram -rules scripts/brams.txt;;"
echo "write_verilog -noexpr -noattr synth.v"
# write synthesized design
echo "write_verilog synth.v"
} > synth.ys
yosys -l yosys.log synth.ys

View file

@ -1,4 +1,4 @@
PARAM += -DCACHE_SIZE=4096 -DWORD_SIZE=4 -DCACHE_LINE_SIZE=16 -DNUM_BANKS=4 -DCREQ_SIZE=4 -DMRVQ_SIZE=16 -DDFPQ_SIZE=16 -DSNRQ_SIZE=16 -DCWBQ_SIZE=4 -DDWBQ_SIZE=4 -DFQQ_SIZE=4
PARAM += -DCACHE_SIZE=4096 -DCACHE_WORD_SIZE=4 -DCACHE_LINE_SIZE=16 -DCACHE_NUM_BANKS=4 -DCACHE_CREQ_SIZE=4 -DMRVQ_SIZE=16 -DDFPQ_SIZE=16 -DSNRQ_SIZE=16 -DCWBQ_SIZE=4 -DDWBQ_SIZE=4 -DFQQ_SIZE=4