mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'master' of https://github.gatech.edu/casl/Vortex
This commit is contained in:
commit
d84241aad0
97 changed files with 1435 additions and 666 deletions
|
@ -85,8 +85,8 @@ CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_add
|
|||
# test cache multi-porting
|
||||
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
|
||||
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo --debug --args="-n1"
|
||||
CONFIGS="-DL2NUM_PORTS=2 -DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr
|
||||
CONFIGS="-DL2NUM_PORTS=4 -DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=io_addr
|
||||
CONFIGS="-DL2_NUM_PORTS=2 -DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr
|
||||
CONFIGS="-DL2_NUM_PORTS=4 -DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=io_addr
|
||||
|
||||
# test 128-bit MEM block
|
||||
CONFIGS=-DMEM_BLOCK_SIZE=16 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
|
||||
|
|
|
@ -36,8 +36,8 @@ DPI_DIR=../../../hw/dpi
|
|||
SRCS = fpga.cpp opae_sim.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
RTL_INCLUDE += -I$(RTL_DIR)/afu -I$(RTL_DIR)/afu/ccip
|
||||
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
|
||||
|
|
|
@ -41,8 +41,8 @@ DPI_DIR = ../../hw/dpi
|
|||
SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
|
|
|
@ -3,15 +3,15 @@
|
|||
module VX_alu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_alu_req_if.slave alu_req_if,
|
||||
|
||||
// Outputs
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
VX_commit_if alu_commit_if
|
||||
VX_branch_ctl_if.master branch_ctl_if,
|
||||
VX_commit_if.master alu_commit_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
|
|
@ -12,16 +12,16 @@ module VX_cluster #(
|
|||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire [`L2MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
|
||||
output wire [`L2MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`L2MEM_DATA_WIDTH-1:0] mem_req_data,
|
||||
output wire [`L2MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire [`L2_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
|
||||
output wire [`L2_MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`L2_MEM_DATA_WIDTH-1:0] mem_req_data,
|
||||
output wire [`L2_MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`L2MEM_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [`L2MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
input wire [`L2_MEM_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [`L2_MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// Status
|
||||
|
@ -31,14 +31,14 @@ module VX_cluster #(
|
|||
|
||||
wire [`NUM_CORES-1:0] per_core_mem_req_valid;
|
||||
wire [`NUM_CORES-1:0] per_core_mem_req_rw;
|
||||
wire [`NUM_CORES-1:0][`DMEM_BYTEEN_WIDTH-1:0] per_core_mem_req_byteen;
|
||||
wire [`NUM_CORES-1:0][`DMEM_ADDR_WIDTH-1:0] per_core_mem_req_addr;
|
||||
wire [`NUM_CORES-1:0][`DMEM_DATA_WIDTH-1:0] per_core_mem_req_data;
|
||||
wire [`NUM_CORES-1:0][`DCACHE_MEM_BYTEEN_WIDTH-1:0] per_core_mem_req_byteen;
|
||||
wire [`NUM_CORES-1:0][`DCACHE_MEM_ADDR_WIDTH-1:0] per_core_mem_req_addr;
|
||||
wire [`NUM_CORES-1:0][`DCACHE_MEM_DATA_WIDTH-1:0] per_core_mem_req_data;
|
||||
wire [`NUM_CORES-1:0][`XMEM_TAG_WIDTH-1:0] per_core_mem_req_tag;
|
||||
wire [`NUM_CORES-1:0] per_core_mem_req_ready;
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_mem_rsp_valid;
|
||||
wire [`NUM_CORES-1:0][`DMEM_DATA_WIDTH-1:0] per_core_mem_rsp_data;
|
||||
wire [`NUM_CORES-1:0][`DCACHE_MEM_DATA_WIDTH-1:0] per_core_mem_rsp_data;
|
||||
wire [`NUM_CORES-1:0][`XMEM_TAG_WIDTH-1:0] per_core_mem_rsp_tag;
|
||||
wire [`NUM_CORES-1:0] per_core_mem_rsp_ready;
|
||||
|
||||
|
@ -83,22 +83,22 @@ module VX_cluster #(
|
|||
`RESET_RELAY (l2_reset);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`L2CACHE_ID),
|
||||
.CACHE_SIZE (`L2CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`L2CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`L2NUM_BANKS),
|
||||
.NUM_PORTS (`L2NUM_PORTS),
|
||||
.WORD_SIZE (`L2WORD_SIZE),
|
||||
.NUM_REQS (`L2NUM_REQS),
|
||||
.CREQ_SIZE (`L2CREQ_SIZE),
|
||||
.CRSQ_SIZE (`L2CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L2MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L2MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L2MREQ_SIZE),
|
||||
.CACHE_ID (`L2_CACHE_ID),
|
||||
.CACHE_SIZE (`L2_CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`L2_CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`L2_NUM_BANKS),
|
||||
.NUM_PORTS (`L2_NUM_PORTS),
|
||||
.WORD_SIZE (`L2_WORD_SIZE),
|
||||
.NUM_REQS (`L2_NUM_REQS),
|
||||
.CREQ_SIZE (`L2_CREQ_SIZE),
|
||||
.CRSQ_SIZE (`L2_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L2_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L2_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L2_MREQ_SIZE),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`XMEM_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (0),
|
||||
.MEM_TAG_WIDTH (`L2MEM_TAG_WIDTH),
|
||||
.MEM_TAG_WIDTH (`L2_MEM_TAG_WIDTH),
|
||||
.NC_ENABLE (1)
|
||||
) l2cache (
|
||||
`SCOPE_BIND_VX_cluster_l2cache
|
||||
|
@ -148,8 +148,8 @@ module VX_cluster #(
|
|||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (`NUM_CORES),
|
||||
.DATA_WIDTH (`DMEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`XMEM_TAG_WIDTH),
|
||||
.TYPE ("R"),
|
||||
.TAG_SEL_IDX (1), // Skip 0 for NC flag
|
||||
|
|
|
@ -3,22 +3,22 @@
|
|||
module VX_commit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if alu_commit_if,
|
||||
VX_commit_if ld_commit_if,
|
||||
VX_commit_if st_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_commit_if.slave alu_commit_if,
|
||||
VX_commit_if.slave ld_commit_if,
|
||||
VX_commit_if.slave st_commit_if,
|
||||
VX_commit_if.slave csr_commit_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_if.slave fpu_commit_if,
|
||||
`endif
|
||||
VX_commit_if gpu_commit_if,
|
||||
VX_commit_if.slave gpu_commit_if,
|
||||
|
||||
// outputs
|
||||
VX_writeback_if writeback_if,
|
||||
VX_cmt_to_csr_if cmt_to_csr_if
|
||||
VX_writeback_if.master writeback_if,
|
||||
VX_cmt_to_csr_if.master cmt_to_csr_if
|
||||
);
|
||||
// CSRs update
|
||||
|
||||
|
@ -50,6 +50,9 @@ module VX_commit #(
|
|||
`endif
|
||||
/*gpu_commit_fire ?*/ gpu_commit_if.tmask;
|
||||
|
||||
wire [$clog2(`NUM_THREADS+1)-1:0] commit_cnt;
|
||||
`POP_COUNT(commit_cnt, commit_tmask);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + $clog2(`NUM_THREADS+1)),
|
||||
.RESETW (1)
|
||||
|
@ -57,7 +60,7 @@ module VX_commit #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({commit_fire, $countones(commit_tmask)}),
|
||||
.data_in ({commit_fire, commit_cnt}),
|
||||
.data_out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size})
|
||||
);
|
||||
|
||||
|
|
|
@ -255,28 +255,28 @@
|
|||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef ICREQ_SIZE
|
||||
`define ICREQ_SIZE 0
|
||||
`ifndef ICACHE_CREQ_SIZE
|
||||
`define ICACHE_CREQ_SIZE 0
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef ICRSQ_SIZE
|
||||
`define ICRSQ_SIZE 2
|
||||
`ifndef ICACHE_CRSQ_SIZE
|
||||
`define ICACHE_CRSQ_SIZE 2
|
||||
`endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef IMSHR_SIZE
|
||||
`define IMSHR_SIZE `NUM_WARPS
|
||||
`ifndef ICACHE_MSHR_SIZE
|
||||
`define ICACHE_MSHR_SIZE `NUM_WARPS
|
||||
`endif
|
||||
|
||||
// Memory Request Queue Size
|
||||
`ifndef IMREQ_SIZE
|
||||
`define IMREQ_SIZE 4
|
||||
`ifndef ICACHE_MREQ_SIZE
|
||||
`define ICACHE_MREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Memory Response Queue Size
|
||||
`ifndef IMRSQ_SIZE
|
||||
`define IMRSQ_SIZE 0
|
||||
`ifndef ICACHE_MRSQ_SIZE
|
||||
`define ICACHE_MRSQ_SIZE 0
|
||||
`endif
|
||||
|
||||
// Dcache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
@ -287,38 +287,38 @@
|
|||
`endif
|
||||
|
||||
// Number of banks
|
||||
`ifndef DNUM_BANKS
|
||||
`define DNUM_BANKS `NUM_THREADS
|
||||
`ifndef DCACHE_NUM_BANKS
|
||||
`define DCACHE_NUM_BANKS `NUM_THREADS
|
||||
`endif
|
||||
|
||||
// Number of ports per bank
|
||||
`ifndef DNUM_PORTS
|
||||
`define DNUM_PORTS 1
|
||||
`ifndef DCACHE_NUM_PORTS
|
||||
`define DCACHE_NUM_PORTS 1
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef DCREQ_SIZE
|
||||
`define DCREQ_SIZE 0
|
||||
`ifndef DCACHE_CREQ_SIZE
|
||||
`define DCACHE_CREQ_SIZE 0
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef DCRSQ_SIZE
|
||||
`define DCRSQ_SIZE 2
|
||||
`ifndef DCACHE_CRSQ_SIZE
|
||||
`define DCACHE_CRSQ_SIZE 2
|
||||
`endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef DMSHR_SIZE
|
||||
`define DMSHR_SIZE `LSUQ_SIZE
|
||||
`ifndef DCACHE_MSHR_SIZE
|
||||
`define DCACHE_MSHR_SIZE `LSUQ_SIZE
|
||||
`endif
|
||||
|
||||
// Memory Request Queue Size
|
||||
`ifndef DMREQ_SIZE
|
||||
`define DMREQ_SIZE 4
|
||||
`ifndef DCACHE_MREQ_SIZE
|
||||
`define DCACHE_MREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Memory Response Queue Size
|
||||
`ifndef DMRSQ_SIZE
|
||||
`define DMRSQ_SIZE 0
|
||||
`ifndef DCACHE_MRSQ_SIZE
|
||||
`define DCACHE_MRSQ_SIZE 0
|
||||
`endif
|
||||
|
||||
// SM Configurable Knobs //////////////////////////////////////////////////////
|
||||
|
@ -335,102 +335,102 @@
|
|||
`endif
|
||||
|
||||
// Number of banks
|
||||
`ifndef SNUM_BANKS
|
||||
`define SNUM_BANKS `NUM_THREADS
|
||||
`ifndef SMEM_NUM_BANKS
|
||||
`define SMEM_NUM_BANKS `NUM_THREADS
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef SCREQ_SIZE
|
||||
`define SCREQ_SIZE 2
|
||||
`ifndef SMEM_CREQ_SIZE
|
||||
`define SMEM_CREQ_SIZE 2
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef SCRSQ_SIZE
|
||||
`define SCRSQ_SIZE 2
|
||||
`ifndef SMEM_CRSQ_SIZE
|
||||
`define SMEM_CRSQ_SIZE 2
|
||||
`endif
|
||||
|
||||
// L2cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef L2CACHE_SIZE
|
||||
`define L2CACHE_SIZE 131072
|
||||
`ifndef L2_CACHE_SIZE
|
||||
`define L2_CACHE_SIZE 131072
|
||||
`endif
|
||||
|
||||
// Number of banks
|
||||
`ifndef L2NUM_BANKS
|
||||
`define L2NUM_BANKS `MIN(`NUM_CORES, 4)
|
||||
`ifndef L2_NUM_BANKS
|
||||
`define L2_NUM_BANKS `MIN(`NUM_CORES, 4)
|
||||
`endif
|
||||
|
||||
// Number of ports per bank
|
||||
`ifndef L2NUM_PORTS
|
||||
`define L2NUM_PORTS 1
|
||||
`ifndef L2_NUM_PORTS
|
||||
`define L2_NUM_PORTS 1
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef L2CREQ_SIZE
|
||||
`define L2CREQ_SIZE 0
|
||||
`ifndef L2_CREQ_SIZE
|
||||
`define L2_CREQ_SIZE 0
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef L2CRSQ_SIZE
|
||||
`define L2CRSQ_SIZE 2
|
||||
`ifndef L2_CRSQ_SIZE
|
||||
`define L2_CRSQ_SIZE 2
|
||||
`endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef L2MSHR_SIZE
|
||||
`define L2MSHR_SIZE 16
|
||||
`ifndef L2_MSHR_SIZE
|
||||
`define L2_MSHR_SIZE 16
|
||||
`endif
|
||||
|
||||
// Memory Request Queue Size
|
||||
`ifndef L2MREQ_SIZE
|
||||
`define L2MREQ_SIZE 4
|
||||
`ifndef L2_MREQ_SIZE
|
||||
`define L2_MREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Memory Response Queue Size
|
||||
`ifndef L2MRSQ_SIZE
|
||||
`define L2MRSQ_SIZE 0
|
||||
`ifndef L2_MRSQ_SIZE
|
||||
`define L2_MRSQ_SIZE 0
|
||||
`endif
|
||||
|
||||
// L3cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
`ifndef L3CACHE_SIZE
|
||||
`define L3CACHE_SIZE 1048576
|
||||
`ifndef L3_CACHE_SIZE
|
||||
`define L3_CACHE_SIZE 1048576
|
||||
`endif
|
||||
|
||||
// Number of banks
|
||||
`ifndef L3NUM_BANKS
|
||||
`define L3NUM_BANKS `MIN(`NUM_CLUSTERS, 4)
|
||||
`ifndef L3_NUM_BANKS
|
||||
`define L3_NUM_BANKS `MIN(`NUM_CLUSTERS, 4)
|
||||
`endif
|
||||
|
||||
// Number of ports per bank
|
||||
`ifndef L3NUM_PORTS
|
||||
`define L3NUM_PORTS 1
|
||||
`ifndef L3_NUM_PORTS
|
||||
`define L3_NUM_PORTS 1
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef L3CREQ_SIZE
|
||||
`define L3CREQ_SIZE 0
|
||||
`ifndef L3_CREQ_SIZE
|
||||
`define L3_CREQ_SIZE 0
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef L3CRSQ_SIZE
|
||||
`define L3CRSQ_SIZE 2
|
||||
`ifndef L3_CRSQ_SIZE
|
||||
`define L3_CRSQ_SIZE 2
|
||||
`endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef L3MSHR_SIZE
|
||||
`define L3MSHR_SIZE 16
|
||||
`ifndef L3_MSHR_SIZE
|
||||
`define L3_MSHR_SIZE 16
|
||||
`endif
|
||||
|
||||
// Memory Request Queue Size
|
||||
`ifndef L3MREQ_SIZE
|
||||
`define L3MREQ_SIZE 4
|
||||
`ifndef L3_MREQ_SIZE
|
||||
`define L3_MREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Memory Response Queue Size
|
||||
`ifndef L3MRSQ_SIZE
|
||||
`define L3MRSQ_SIZE 0
|
||||
`ifndef L3_MRSQ_SIZE
|
||||
`define L3_MRSQ_SIZE 0
|
||||
`endif
|
||||
|
||||
`endif
|
||||
|
|
|
@ -12,15 +12,15 @@ module VX_core #(
|
|||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire [`DMEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
|
||||
output wire [`DMEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`DMEM_DATA_WIDTH-1:0] mem_req_data,
|
||||
output wire [`DCACHE_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
|
||||
output wire [`DCACHE_MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`DCACHE_MEM_DATA_WIDTH-1:0] mem_req_data,
|
||||
output wire [`XMEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory reponse
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`DMEM_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [`DCACHE_MEM_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [`XMEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
|
@ -32,13 +32,13 @@ module VX_core #(
|
|||
`endif
|
||||
|
||||
VX_mem_req_if #(
|
||||
.DATA_WIDTH (`DMEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`XMEM_TAG_WIDTH)
|
||||
) mem_req_if();
|
||||
|
||||
VX_mem_rsp_if #(
|
||||
.DATA_WIDTH (`DMEM_DATA_WIDTH),
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.TAG_WIDTH (`XMEM_TAG_WIDTH)
|
||||
) mem_rsp_if();
|
||||
|
||||
|
@ -58,25 +58,25 @@ module VX_core #(
|
|||
//--
|
||||
|
||||
VX_dcache_req_if #(
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
|
||||
) dcache_req_if();
|
||||
|
||||
VX_dcache_rsp_if #(
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
|
||||
) dcache_rsp_if();
|
||||
|
||||
VX_icache_req_if #(
|
||||
.WORD_SIZE (`IWORD_SIZE),
|
||||
.TAG_WIDTH (`ICORE_TAG_WIDTH)
|
||||
.WORD_SIZE (`ICACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
|
||||
) icache_req_if();
|
||||
|
||||
VX_icache_rsp_if #(
|
||||
.WORD_SIZE (`IWORD_SIZE),
|
||||
.TAG_WIDTH (`ICORE_TAG_WIDTH)
|
||||
.WORD_SIZE (`ICACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
|
||||
) icache_rsp_if();
|
||||
|
||||
VX_pipeline #(
|
||||
|
|
|
@ -7,15 +7,15 @@ module VX_csr_data #(
|
|||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
VX_perf_memsys_if.slave perf_memsys_if,
|
||||
VX_perf_pipeline_if.slave perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_fetch_to_csr_if fetch_to_csr_if,
|
||||
VX_cmt_to_csr_if.slave cmt_to_csr_if,
|
||||
VX_fetch_to_csr_if.slave fetch_to_csr_if,
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
VX_fpu_to_csr_if.slave fpu_to_csr_if,
|
||||
`endif
|
||||
|
||||
input wire read_enable,
|
||||
|
@ -44,19 +44,16 @@ module VX_csr_data #(
|
|||
|
||||
reg [`NUM_WARPS-1:0][`INST_FRM_BITS+`FFLAGS_BITS-1:0] fcsr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
||||
always @(posedge clk) begin
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (reset) begin
|
||||
fcsr <= '0;
|
||||
end
|
||||
|
||||
end
|
||||
if (fpu_to_csr_if.write_enable) begin
|
||||
fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0]
|
||||
| fpu_to_csr_if.write_fflags;
|
||||
end
|
||||
`endif
|
||||
|
||||
if (write_enable) begin
|
||||
case (write_addr)
|
||||
`CSR_FFLAGS: fcsr[write_wid][`FFLAGS_BITS-1:0] <= write_data[`FFLAGS_BITS-1:0];
|
||||
|
@ -77,7 +74,7 @@ module VX_csr_data #(
|
|||
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data;
|
||||
|
||||
default: begin
|
||||
assert(~write_enable) else $error("%t: invalid CSR write address: %0h", $time, write_addr);
|
||||
`ASSERT(~write_enable, ("%t: invalid CSR write address: %0h", $time, write_addr));
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
|
|
@ -3,26 +3,26 @@
|
|||
module VX_csr_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
VX_perf_memsys_if.slave perf_memsys_if,
|
||||
VX_perf_pipeline_if.slave perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_fetch_to_csr_if fetch_to_csr_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_cmt_to_csr_if.slave cmt_to_csr_if,
|
||||
VX_fetch_to_csr_if.slave fetch_to_csr_if,
|
||||
VX_csr_req_if.slave csr_req_if,
|
||||
VX_commit_if.master csr_commit_if,
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
input wire[`NUM_WARPS-1:0] fpu_pending,
|
||||
VX_fpu_to_csr_if.slave fpu_to_csr_if,
|
||||
input wire[`NUM_WARPS-1:0] fpu_pending,
|
||||
`endif
|
||||
|
||||
output wire[`NUM_WARPS-1:0] pending,
|
||||
input wire busy
|
||||
input wire busy
|
||||
);
|
||||
wire csr_we_s1;
|
||||
wire [`CSR_ADDR_BITS-1:0] csr_addr_s1;
|
||||
|
|
|
@ -19,12 +19,12 @@ module VX_decode #(
|
|||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_ifetch_rsp_if ifetch_rsp_if,
|
||||
VX_ifetch_rsp_if.slave ifetch_rsp_if,
|
||||
|
||||
// outputs
|
||||
VX_decode_if decode_if,
|
||||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if
|
||||
VX_decode_if.master decode_if,
|
||||
VX_wstall_if.master wstall_if,
|
||||
VX_join_if.master join_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (clk)
|
||||
|
|
|
@ -238,45 +238,33 @@
|
|||
`endif
|
||||
|
||||
// non-cacheable address bit
|
||||
`define NC_ADDR_BITS 1
|
||||
`define NC_FLAG_BITS 1
|
||||
|
||||
////////////////////////// Icache Configurable Knobs //////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
|
||||
|
||||
// Number of banks
|
||||
`define INUM_BANKS 1
|
||||
|
||||
// Word size in bytes
|
||||
`define IWORD_SIZE 4
|
||||
`define ICACHE_WORD_SIZE 4
|
||||
|
||||
// Block size in bytes
|
||||
`define ICACHE_LINE_SIZE `L1_BLOCK_SIZE
|
||||
|
||||
// Core request address bits
|
||||
`define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE))
|
||||
|
||||
// Core request byte enable bits
|
||||
`define ICORE_BYTEEN_WIDTH `DWORD_SIZE
|
||||
|
||||
// TAG sharing enable
|
||||
`define ICORE_TAG_ID_BITS `NW_BITS
|
||||
`define ICACHE_CORE_TAG_ID_BITS `NW_BITS
|
||||
|
||||
// Core request tag bits
|
||||
`define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
|
||||
`define ICACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICACHE_CORE_TAG_ID_BITS)
|
||||
|
||||
// Memory request data bits
|
||||
`define IMEM_DATA_WIDTH (`ICACHE_LINE_SIZE * 8)
|
||||
`define ICACHE_MEM_DATA_WIDTH (`ICACHE_LINE_SIZE * 8)
|
||||
|
||||
// Memory request address bits
|
||||
`define IMEM_ADDR_WIDTH (32 - `CLOG2(`ICACHE_LINE_SIZE))
|
||||
|
||||
// Memory byte enable bits
|
||||
`define IMEM_BYTEEN_WIDTH `ICACHE_LINE_SIZE
|
||||
`define ICACHE_MEM_ADDR_WIDTH (32 - `CLOG2(`ICACHE_LINE_SIZE))
|
||||
|
||||
// Memory request tag bits
|
||||
`define IMEM_TAG_WIDTH (`IMEM_ADDR_WIDTH + `CLOG2(`IMSHR_SIZE))
|
||||
`define ICACHE_MEM_TAG_WIDTH `CLOG2(`ICACHE_MSHR_SIZE)
|
||||
|
||||
////////////////////////// Dcache Configurable Knobs //////////////////////////
|
||||
|
||||
|
@ -284,129 +272,126 @@
|
|||
`define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
|
||||
|
||||
// Word size in bytes
|
||||
`define DWORD_SIZE 4
|
||||
`define DCACHE_WORD_SIZE 4
|
||||
|
||||
// Block size in bytes
|
||||
`define DCACHE_LINE_SIZE `L1_BLOCK_SIZE
|
||||
|
||||
// Core request address bits
|
||||
`define DCORE_ADDR_WIDTH (32-`CLOG2(`DWORD_SIZE))
|
||||
|
||||
// TAG sharing enable
|
||||
`define LSUQ_ADDR_BITS `LOG2UP(`LSUQ_SIZE)
|
||||
`define DCORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_ADDR_BITS + `SM_ENABLE)
|
||||
`define DCACHE_CORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_FLAG_BITS + `SM_ENABLE)
|
||||
|
||||
// Input request tag bits
|
||||
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
|
||||
`define DCACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCACHE_CORE_TAG_ID_BITS)
|
||||
|
||||
// Memory request data bits
|
||||
`define DMEM_DATA_WIDTH (`DCACHE_LINE_SIZE * 8)
|
||||
`define DCACHE_MEM_DATA_WIDTH (`DCACHE_LINE_SIZE * 8)
|
||||
|
||||
// Memory request address bits
|
||||
`define DMEM_ADDR_WIDTH (32 - `CLOG2(`DCACHE_LINE_SIZE))
|
||||
`define DCACHE_MEM_ADDR_WIDTH (32 - `CLOG2(`DCACHE_LINE_SIZE))
|
||||
|
||||
// Memory byte enable bits
|
||||
`define DMEM_BYTEEN_WIDTH `DCACHE_LINE_SIZE
|
||||
`define DCACHE_MEM_BYTEEN_WIDTH `DCACHE_LINE_SIZE
|
||||
|
||||
// Input request size
|
||||
`define DNUM_REQS `NUM_THREADS
|
||||
`define DCACHE_NUM_REQS `NUM_THREADS
|
||||
|
||||
// Memory request tag bits
|
||||
`define _DMEM_ADDR_RATIO_W $clog2(`DCACHE_LINE_SIZE / `DWORD_SIZE)
|
||||
`define _DNC_MEM_TAG_WIDTH ($clog2(`DNUM_REQS) + `_DMEM_ADDR_RATIO_W + `DCORE_TAG_WIDTH)
|
||||
`define DMEM_TAG_WIDTH `MAX((`DMEM_ADDR_WIDTH + `CLOG2(`DMSHR_SIZE) + `NC_ADDR_BITS), `_DNC_MEM_TAG_WIDTH)
|
||||
`define _DMEM_ADDR_RATIO_W $clog2(`DCACHE_LINE_SIZE / `DCACHE_WORD_SIZE)
|
||||
`define _DNC_MEM_TAG_WIDTH ($clog2(`DCACHE_NUM_REQS) + `_DMEM_ADDR_RATIO_W + `DCACHE_CORE_TAG_WIDTH)
|
||||
`define DCACHE_MEM_TAG_WIDTH `MAX((`CLOG2(`DCACHE_NUM_BANKS) + `CLOG2(`DCACHE_MSHR_SIZE) + `NC_FLAG_BITS), `_DNC_MEM_TAG_WIDTH)
|
||||
|
||||
////////////////////////// SM Configurable Knobs //////////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define SCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2)
|
||||
`define SMEM_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2)
|
||||
|
||||
// Word size in bytes
|
||||
`define SWORD_SIZE 4
|
||||
`define SMEM_WORD_SIZE 4
|
||||
|
||||
// bank address offset
|
||||
`define SBANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SWORD_SIZE)
|
||||
`define SMEM_BANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SMEM_WORD_SIZE)
|
||||
|
||||
// Input request size
|
||||
`define SNUM_REQS `NUM_THREADS
|
||||
`define SMEM_NUM_REQS `NUM_THREADS
|
||||
|
||||
////////////////////////// L2cache Configurable Knobs /////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define L2CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID)
|
||||
`define L2_CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID)
|
||||
|
||||
// Word size in bytes
|
||||
`define L2WORD_SIZE `DCACHE_LINE_SIZE
|
||||
`define L2_WORD_SIZE `DCACHE_LINE_SIZE
|
||||
|
||||
// Block size in bytes
|
||||
`define L2CACHE_LINE_SIZE (`L2_ENABLE ? `MEM_BLOCK_SIZE : `L2WORD_SIZE)
|
||||
`define L2_CACHE_LINE_SIZE ((`L2_ENABLE) ? `MEM_BLOCK_SIZE : `L2_WORD_SIZE)
|
||||
|
||||
// Input request tag bits
|
||||
`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
|
||||
`define L2_CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
|
||||
|
||||
// Memory request data bits
|
||||
`define L2MEM_DATA_WIDTH (`L2CACHE_LINE_SIZE * 8)
|
||||
`define L2_MEM_DATA_WIDTH (`L2_CACHE_LINE_SIZE * 8)
|
||||
|
||||
// Memory request address bits
|
||||
`define L2MEM_ADDR_WIDTH (32 - `CLOG2(`L2CACHE_LINE_SIZE))
|
||||
`define L2_MEM_ADDR_WIDTH (32 - `CLOG2(`L2_CACHE_LINE_SIZE))
|
||||
|
||||
// Memory byte enable bits
|
||||
`define L2MEM_BYTEEN_WIDTH `L2CACHE_LINE_SIZE
|
||||
`define L2_MEM_BYTEEN_WIDTH `L2_CACHE_LINE_SIZE
|
||||
|
||||
// Input request size
|
||||
`define L2NUM_REQS `NUM_CORES
|
||||
`define L2_NUM_REQS `NUM_CORES
|
||||
|
||||
// Memory request tag bits
|
||||
`define _L2MEM_ADDR_RATIO_W $clog2(`L2CACHE_LINE_SIZE / `L2WORD_SIZE)
|
||||
`define _L2NC_MEM_TAG_WIDTH ($clog2(`L2NUM_REQS) + `_L2MEM_ADDR_RATIO_W + `XMEM_TAG_WIDTH)
|
||||
`define _L2MEM_TAG_WIDTH `MAX((`L2MEM_ADDR_WIDTH + `CLOG2(`L2MSHR_SIZE) + `NC_ADDR_BITS), `_L2NC_MEM_TAG_WIDTH)
|
||||
`define L2MEM_TAG_WIDTH (`L2_ENABLE ? `_L2MEM_TAG_WIDTH : (`XMEM_TAG_WIDTH + `CLOG2(`L2NUM_REQS)))
|
||||
`define _L2_MEM_ADDR_RATIO_W $clog2(`L2_CACHE_LINE_SIZE / `L2_WORD_SIZE)
|
||||
`define _L2_NC_MEM_TAG_WIDTH ($clog2(`L2_NUM_REQS) + `_L2_MEM_ADDR_RATIO_W + `XMEM_TAG_WIDTH)
|
||||
`define _L2_MEM_TAG_WIDTH `MAX((`CLOG2(`L2_NUM_BANKS) + `CLOG2(`L2_MSHR_SIZE) + `NC_FLAG_BITS), `_L2_NC_MEM_TAG_WIDTH)
|
||||
`define L2_MEM_TAG_WIDTH ((`L2_ENABLE) ? `_L2_MEM_TAG_WIDTH : (`XMEM_TAG_WIDTH + `CLOG2(`L2_NUM_REQS)))
|
||||
|
||||
////////////////////////// L3cache Configurable Knobs /////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define L3CACHE_ID 0
|
||||
`define L3_CACHE_ID 0
|
||||
|
||||
// Word size in bytes
|
||||
`define L3WORD_SIZE `L2CACHE_LINE_SIZE
|
||||
`define L3_WORD_SIZE `L2_CACHE_LINE_SIZE
|
||||
|
||||
// Block size in bytes
|
||||
`define L3CACHE_LINE_SIZE (`L3_ENABLE ? `MEM_BLOCK_SIZE : `L3WORD_SIZE)
|
||||
`define L3_CACHE_LINE_SIZE ((`L3_ENABLE) ? `MEM_BLOCK_SIZE : `L3_WORD_SIZE)
|
||||
|
||||
// Input request tag bits
|
||||
`define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
|
||||
`define L3_CORE_TAG_WIDTH (`L2_CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
|
||||
|
||||
// Memory request data bits
|
||||
`define L3MEM_DATA_WIDTH (`L3CACHE_LINE_SIZE * 8)
|
||||
`define L3_MEM_DATA_WIDTH (`L3_CACHE_LINE_SIZE * 8)
|
||||
|
||||
// Memory request address bits
|
||||
`define L3MEM_ADDR_WIDTH (32 - `CLOG2(`L3CACHE_LINE_SIZE))
|
||||
`define L3_MEM_ADDR_WIDTH (32 - `CLOG2(`L3_CACHE_LINE_SIZE))
|
||||
|
||||
// Memory byte enable bits
|
||||
`define L3MEM_BYTEEN_WIDTH `L3CACHE_LINE_SIZE
|
||||
`define L3_MEM_BYTEEN_WIDTH `L3_CACHE_LINE_SIZE
|
||||
|
||||
// Input request size
|
||||
`define L3NUM_REQS `NUM_CLUSTERS
|
||||
`define L3_NUM_REQS `NUM_CLUSTERS
|
||||
|
||||
// Memory request tag bits
|
||||
`define _L3MEM_ADDR_RATIO_W $clog2(`L3CACHE_LINE_SIZE / `L3WORD_SIZE)
|
||||
`define _L3NC_MEM_TAG_WIDTH ($clog2(`L3NUM_REQS) + `_L3MEM_ADDR_RATIO_W + `L2MEM_TAG_WIDTH)
|
||||
`define _L3MEM_TAG_WIDTH `MAX((`L3MEM_ADDR_WIDTH + `CLOG2(`L3MSHR_SIZE) + `NC_ADDR_BITS), `_L3NC_MEM_TAG_WIDTH)
|
||||
`define L3MEM_TAG_WIDTH (`L3_ENABLE ? `_L3MEM_TAG_WIDTH : (`L2MEM_TAG_WIDTH + `CLOG2(`L3NUM_REQS)))
|
||||
`define _L3_MEM_ADDR_RATIO_W $clog2(`L3_CACHE_LINE_SIZE / `L3_WORD_SIZE)
|
||||
`define _L3_NC_MEM_TAG_WIDTH ($clog2(`L3_NUM_REQS) + `_L3_MEM_ADDR_RATIO_W + `L2_MEM_TAG_WIDTH)
|
||||
`define _L3_MEM_TAG_WIDTH `MAX((`CLOG2(`L3_NUM_BANKS) + `CLOG2(`L3_MSHR_SIZE) + `NC_FLAG_BITS), `_L3_NC_MEM_TAG_WIDTH)
|
||||
`define L3_MEM_TAG_WIDTH ((`L3_ENABLE) ? `_L3_MEM_TAG_WIDTH : (`L2_MEM_TAG_WIDTH + `CLOG2(`L3_NUM_REQS)))
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3MEM_BYTEEN_WIDTH
|
||||
`define VX_MEM_ADDR_WIDTH `L3MEM_ADDR_WIDTH
|
||||
`define VX_MEM_DATA_WIDTH `L3MEM_DATA_WIDTH
|
||||
`define VX_MEM_TAG_WIDTH `L3MEM_TAG_WIDTH
|
||||
`define VX_CORE_TAG_WIDTH `L3CORE_TAG_WIDTH
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3_MEM_BYTEEN_WIDTH
|
||||
`define VX_MEM_ADDR_WIDTH `L3_MEM_ADDR_WIDTH
|
||||
`define VX_MEM_DATA_WIDTH `L3_MEM_DATA_WIDTH
|
||||
`define VX_MEM_TAG_WIDTH `L3_MEM_TAG_WIDTH
|
||||
`define VX_CORE_TAG_WIDTH `L3_CORE_TAG_WIDTH
|
||||
`define VX_CSR_ID_WIDTH `LOG2UP(`NUM_CLUSTERS * `NUM_CORES)
|
||||
|
||||
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
|
||||
|
||||
// Merged D-cache/I-cache memory tag
|
||||
`define XMEM_TAG_WIDTH (`DMEM_TAG_WIDTH + `CLOG2(2))
|
||||
`define XMEM_TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH + `CLOG2(2))
|
||||
|
||||
`include "VX_types.vh"
|
||||
|
||||
|
|
|
@ -9,42 +9,42 @@ module VX_execute #(
|
|||
input wire reset,
|
||||
|
||||
// Dcache interface
|
||||
VX_dcache_req_if dcache_req_if,
|
||||
VX_dcache_rsp_if dcache_rsp_if,
|
||||
VX_dcache_req_if.master dcache_req_if,
|
||||
VX_dcache_rsp_if.slave dcache_rsp_if,
|
||||
|
||||
// commit interface
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_cmt_to_csr_if.slave cmt_to_csr_if,
|
||||
|
||||
// fetch interface
|
||||
VX_fetch_to_csr_if fetch_to_csr_if,
|
||||
VX_fetch_to_csr_if.slave fetch_to_csr_if,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
VX_perf_memsys_if.slave perf_memsys_if,
|
||||
VX_perf_pipeline_if.slave perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
// inputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_alu_req_if.slave alu_req_if,
|
||||
VX_lsu_req_if.slave lsu_req_if,
|
||||
VX_csr_req_if.slave csr_req_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_fpu_req_if.slave fpu_req_if,
|
||||
`endif
|
||||
VX_gpu_req_if gpu_req_if,
|
||||
VX_gpu_req_if.slave gpu_req_if,
|
||||
|
||||
// outputs
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_commit_if alu_commit_if,
|
||||
VX_commit_if ld_commit_if,
|
||||
VX_commit_if st_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_branch_ctl_if.master branch_ctl_if,
|
||||
VX_warp_ctl_if.master warp_ctl_if,
|
||||
VX_commit_if.master alu_commit_if,
|
||||
VX_commit_if.master ld_commit_if,
|
||||
VX_commit_if.master st_commit_if,
|
||||
VX_commit_if.master csr_commit_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_if.master fpu_commit_if,
|
||||
`endif
|
||||
VX_commit_if gpu_commit_if,
|
||||
VX_commit_if.master gpu_commit_if,
|
||||
|
||||
input wire busy
|
||||
input wire busy
|
||||
);
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_to_csr_if fpu_to_csr_if();
|
||||
|
|
|
@ -9,23 +9,23 @@ module VX_fetch #(
|
|||
input wire reset,
|
||||
|
||||
// Icache interface
|
||||
VX_icache_req_if icache_req_if,
|
||||
VX_icache_rsp_if icache_rsp_if,
|
||||
VX_icache_req_if.master icache_req_if,
|
||||
VX_icache_rsp_if.slave icache_rsp_if,
|
||||
|
||||
// inputs
|
||||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if,
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_wstall_if.slave wstall_if,
|
||||
VX_join_if.slave join_if,
|
||||
VX_branch_ctl_if.slave branch_ctl_if,
|
||||
VX_warp_ctl_if.slave warp_ctl_if,
|
||||
|
||||
// outputs
|
||||
VX_ifetch_rsp_if ifetch_rsp_if,
|
||||
VX_ifetch_rsp_if.master ifetch_rsp_if,
|
||||
|
||||
// csr interface
|
||||
VX_fetch_to_csr_if fetch_to_csr_if,
|
||||
VX_fetch_to_csr_if.master fetch_to_csr_if,
|
||||
|
||||
// busy status
|
||||
output wire busy
|
||||
output wire busy
|
||||
);
|
||||
|
||||
VX_ifetch_req_if ifetch_req_if();
|
||||
|
|
|
@ -6,9 +6,9 @@ module VX_fpu_unit #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_fpu_req_if.slave fpu_req_if,
|
||||
VX_fpu_to_csr_if.master fpu_to_csr_if,
|
||||
VX_commit_if.master fpu_commit_if,
|
||||
|
||||
input wire[`NUM_WARPS-1:0] csr_pending,
|
||||
output wire[`NUM_WARPS-1:0] pending
|
||||
|
|
|
@ -3,15 +3,15 @@
|
|||
module VX_gpr_stage #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_writeback_if writeback_if,
|
||||
VX_gpr_req_if gpr_req_if,
|
||||
VX_writeback_if.slave writeback_if,
|
||||
VX_gpr_req_if.slave gpr_req_if,
|
||||
|
||||
// outputs
|
||||
VX_gpr_rsp_if gpr_rsp_if
|
||||
VX_gpr_rsp_if.master gpr_rsp_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
|
|
@ -5,15 +5,15 @@ module VX_gpu_unit #(
|
|||
) (
|
||||
`SCOPE_IO_VX_gpu_unit
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_gpu_req_if gpu_req_if,
|
||||
VX_gpu_req_if.slave gpu_req_if,
|
||||
|
||||
// Outputs
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_commit_if gpu_commit_if
|
||||
VX_warp_ctl_if.master warp_ctl_if,
|
||||
VX_commit_if.master gpu_commit_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
|
|
@ -7,10 +7,10 @@ module VX_ibuffer #(
|
|||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_decode_if decode_if,
|
||||
VX_decode_if.slave decode_if,
|
||||
|
||||
// outputs
|
||||
VX_ibuffer_if ibuffer_if
|
||||
VX_ibuffer_if.master ibuffer_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
|
|
@ -5,18 +5,18 @@ module VX_icache_stage #(
|
|||
) (
|
||||
`SCOPE_IO_VX_icache_stage
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Icache interface
|
||||
VX_icache_req_if icache_req_if,
|
||||
VX_icache_rsp_if icache_rsp_if,
|
||||
VX_icache_req_if.master icache_req_if,
|
||||
VX_icache_rsp_if.slave icache_rsp_if,
|
||||
|
||||
// request
|
||||
VX_ifetch_req_if ifetch_req_if,
|
||||
VX_ifetch_req_if.slave ifetch_req_if,
|
||||
|
||||
// reponse
|
||||
VX_ifetch_rsp_if ifetch_rsp_if
|
||||
VX_ifetch_rsp_if.master ifetch_rsp_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_instr_demux (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_ibuffer_if ibuffer_if,
|
||||
VX_gpr_rsp_if gpr_rsp_if,
|
||||
VX_ibuffer_if.slave ibuffer_if,
|
||||
VX_gpr_rsp_if.slave gpr_rsp_if,
|
||||
|
||||
// outputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_alu_req_if.master alu_req_if,
|
||||
VX_lsu_req_if.master lsu_req_if,
|
||||
VX_csr_req_if.master csr_req_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_fpu_req_if.master fpu_req_if,
|
||||
`endif
|
||||
VX_gpu_req_if gpu_req_if
|
||||
VX_gpu_req_if.master gpu_req_if
|
||||
);
|
||||
wire [`NT_BITS-1:0] tid;
|
||||
wire alu_req_ready;
|
||||
|
|
|
@ -9,19 +9,19 @@ module VX_issue #(
|
|||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
VX_perf_pipeline_if.master perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
VX_decode_if decode_if,
|
||||
VX_writeback_if writeback_if,
|
||||
VX_decode_if.slave decode_if,
|
||||
VX_writeback_if.slave writeback_if,
|
||||
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_alu_req_if.master alu_req_if,
|
||||
VX_lsu_req_if.master lsu_req_if,
|
||||
VX_csr_req_if.master csr_req_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_fpu_req_if.master fpu_req_if,
|
||||
`endif
|
||||
VX_gpu_req_if gpu_req_if
|
||||
VX_gpu_req_if.master gpu_req_if
|
||||
);
|
||||
VX_ibuffer_if ibuffer_if();
|
||||
VX_ibuffer_if execute_if();
|
||||
|
|
|
@ -5,26 +5,26 @@ module VX_lsu_unit #(
|
|||
) (
|
||||
`SCOPE_IO_VX_lsu_unit
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Dcache interface
|
||||
VX_dcache_req_if dcache_req_if,
|
||||
VX_dcache_rsp_if dcache_rsp_if,
|
||||
VX_dcache_req_if.master dcache_req_if,
|
||||
VX_dcache_rsp_if.slave dcache_rsp_if,
|
||||
|
||||
// inputs
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_lsu_req_if.slave lsu_req_if,
|
||||
|
||||
// outputs
|
||||
VX_commit_if ld_commit_if,
|
||||
VX_commit_if st_commit_if
|
||||
VX_commit_if.master ld_commit_if,
|
||||
VX_commit_if.master st_commit_if
|
||||
);
|
||||
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
|
||||
localparam MEM_ADDRW = 32 - MEM_ASHIFT;
|
||||
|
||||
localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE);
|
||||
localparam REQ_ASHIFT = `CLOG2(`DCACHE_WORD_SIZE);
|
||||
|
||||
localparam ADDR_TYPEW = `NC_ADDR_BITS + `SM_ENABLE;
|
||||
localparam ADDR_TYPEW = `NC_FLAG_BITS + `SM_ENABLE;
|
||||
|
||||
`STATIC_ASSERT(0 == (`IO_BASE_ADDR % MEM_ASHIFT), ("invalid parameter"))
|
||||
`STATIC_ASSERT(0 == (`SMEM_BASE_ADDR % MEM_ASHIFT), ("invalid parameter"))
|
||||
|
@ -321,9 +321,9 @@ module VX_lsu_unit #(
|
|||
|
||||
for (integer i = 0; i < `LSUQ_SIZE; ++i) begin
|
||||
if (pending_reqs[i][0]) begin
|
||||
assert(($time - pending_reqs[i][1 +: 64]) < delay_timeout) else
|
||||
$error("%t: *** D$%0d response timeout: remaining=%b, wid=%0d, PC=%0h, rd=%0d",
|
||||
$time, CORE_ID, rsp_rem_mask[i], pending_reqs[i][1+64+32+`NR_BITS +: `NW_BITS], pending_reqs[i][1+64+`NR_BITS +: 32], pending_reqs[i][1+64 +: `NR_BITS]);
|
||||
`ASSERT(($time - pending_reqs[i][1 +: 64]) < delay_timeout,
|
||||
("%t: *** D$%0d response timeout: remaining=%b, wid=%0d, PC=%0h, rd=%0d",
|
||||
$time, CORE_ID, rsp_rem_mask[i], pending_reqs[i][1+64+32+`NR_BITS +: `NW_BITS], pending_reqs[i][1+64+`NR_BITS +: 32], pending_reqs[i][1+64 +: `NR_BITS]));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -10,9 +10,9 @@ module VX_mem_arb #(
|
|||
parameter BUFFERED_RSP = 0,
|
||||
parameter TYPE = "P",
|
||||
|
||||
localparam DATA_SIZE = (DATA_WIDTH / 8),
|
||||
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS),
|
||||
localparam TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS
|
||||
parameter DATA_SIZE = (DATA_WIDTH / 8),
|
||||
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS),
|
||||
parameter TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
|
|
@ -5,24 +5,24 @@ module VX_mem_unit # (
|
|||
) (
|
||||
`SCOPE_IO_VX_mem_unit
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_memsys_if.master perf_memsys_if,
|
||||
`endif
|
||||
|
||||
// Core <-> Dcache
|
||||
VX_dcache_req_if dcache_req_if,
|
||||
VX_dcache_rsp_if dcache_rsp_if,
|
||||
VX_dcache_req_if.slave dcache_req_if,
|
||||
VX_dcache_rsp_if.master dcache_rsp_if,
|
||||
|
||||
// Core <-> Icache
|
||||
VX_icache_req_if icache_req_if,
|
||||
VX_icache_rsp_if icache_rsp_if,
|
||||
VX_icache_req_if.slave icache_req_if,
|
||||
VX_icache_rsp_if.master icache_rsp_if,
|
||||
|
||||
// Memory
|
||||
VX_mem_req_if mem_req_if,
|
||||
VX_mem_rsp_if mem_rsp_if
|
||||
VX_mem_req_if.master mem_req_if,
|
||||
VX_mem_rsp_if.slave mem_rsp_if
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
@ -30,37 +30,37 @@ module VX_mem_unit # (
|
|||
`endif
|
||||
|
||||
VX_mem_req_if #(
|
||||
.DATA_WIDTH (`IMEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`IMEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`IMEM_TAG_WIDTH)
|
||||
.DATA_WIDTH (`ICACHE_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`ICACHE_MEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH)
|
||||
) icache_mem_req_if();
|
||||
|
||||
VX_mem_rsp_if #(
|
||||
.DATA_WIDTH (`IMEM_DATA_WIDTH),
|
||||
.TAG_WIDTH (`IMEM_TAG_WIDTH)
|
||||
.DATA_WIDTH (`ICACHE_MEM_DATA_WIDTH),
|
||||
.TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH)
|
||||
) icache_mem_rsp_if();
|
||||
|
||||
VX_mem_req_if #(
|
||||
.DATA_WIDTH (`DMEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`DMEM_TAG_WIDTH)
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH)
|
||||
) dcache_mem_req_if();
|
||||
|
||||
VX_mem_rsp_if #(
|
||||
.DATA_WIDTH (`DMEM_DATA_WIDTH),
|
||||
.TAG_WIDTH (`DMEM_TAG_WIDTH)
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH)
|
||||
) dcache_mem_rsp_if();
|
||||
|
||||
VX_dcache_req_if #(
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE)
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
|
||||
) dcache_req_tmp_if();
|
||||
|
||||
VX_dcache_rsp_if #(
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE)
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
|
||||
) dcache_rsp_tmp_if();
|
||||
|
||||
`RESET_RELAY (icache_reset);
|
||||
|
@ -71,18 +71,18 @@ module VX_mem_unit # (
|
|||
.CACHE_ID (`ICACHE_ID),
|
||||
.CACHE_SIZE (`ICACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`ICACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`INUM_BANKS),
|
||||
.WORD_SIZE (`IWORD_SIZE),
|
||||
.NUM_BANKS (1),
|
||||
.WORD_SIZE (`ICACHE_WORD_SIZE),
|
||||
.NUM_REQS (1),
|
||||
.CREQ_SIZE (`ICREQ_SIZE),
|
||||
.CRSQ_SIZE (`ICRSQ_SIZE),
|
||||
.MSHR_SIZE (`IMSHR_SIZE),
|
||||
.MRSQ_SIZE (`IMRSQ_SIZE),
|
||||
.MREQ_SIZE (`IMREQ_SIZE),
|
||||
.CREQ_SIZE (`ICACHE_CREQ_SIZE),
|
||||
.CRSQ_SIZE (`ICACHE_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`ICACHE_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`ICACHE_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`ICACHE_MREQ_SIZE),
|
||||
.WRITE_ENABLE (0),
|
||||
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
|
||||
.MEM_TAG_WIDTH (`IMEM_TAG_WIDTH)
|
||||
.CORE_TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`ICACHE_CORE_TAG_ID_BITS),
|
||||
.MEM_TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH)
|
||||
) icache (
|
||||
`SCOPE_BIND_VX_mem_unit_icache
|
||||
|
||||
|
@ -92,7 +92,7 @@ module VX_mem_unit # (
|
|||
// Core request
|
||||
.core_req_valid (icache_req_if.valid),
|
||||
.core_req_rw (1'b0),
|
||||
.core_req_byteen ({`IWORD_SIZE{1'b1}}),
|
||||
.core_req_byteen ('b0),
|
||||
.core_req_addr (icache_req_if.addr),
|
||||
.core_req_data ('x),
|
||||
.core_req_tag (icache_req_if.tag),
|
||||
|
@ -129,19 +129,19 @@ module VX_mem_unit # (
|
|||
.CACHE_ID (`DCACHE_ID),
|
||||
.CACHE_SIZE (`DCACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`DCACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`DNUM_BANKS),
|
||||
.NUM_PORTS (`DNUM_PORTS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.CREQ_SIZE (`DCREQ_SIZE),
|
||||
.CRSQ_SIZE (`DCRSQ_SIZE),
|
||||
.MSHR_SIZE (`DMSHR_SIZE),
|
||||
.MRSQ_SIZE (`DMRSQ_SIZE),
|
||||
.MREQ_SIZE (`DMREQ_SIZE),
|
||||
.NUM_BANKS (`DCACHE_NUM_BANKS),
|
||||
.NUM_PORTS (`DCACHE_NUM_PORTS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.CREQ_SIZE (`DCACHE_CREQ_SIZE),
|
||||
.CRSQ_SIZE (`DCACHE_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`DCACHE_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`DCACHE_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`DCACHE_MREQ_SIZE),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE),
|
||||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS-`SM_ENABLE),
|
||||
.MEM_TAG_WIDTH (`DMEM_TAG_WIDTH),
|
||||
.CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE),
|
||||
.CORE_TAG_ID_BITS (`DCACHE_CORE_TAG_ID_BITS-`SM_ENABLE),
|
||||
.MEM_TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH),
|
||||
.NC_ENABLE (1)
|
||||
) dcache (
|
||||
`SCOPE_BIND_VX_mem_unit_dcache
|
||||
|
@ -187,15 +187,15 @@ module VX_mem_unit # (
|
|||
|
||||
if (`SM_ENABLE) begin
|
||||
VX_dcache_req_if #(
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE)
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
|
||||
) smem_req_if();
|
||||
|
||||
VX_dcache_rsp_if #(
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE)
|
||||
.NUM_REQS (`DCACHE_NUM_REQS),
|
||||
.WORD_SIZE (`DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
|
||||
) smem_rsp_if();
|
||||
|
||||
`RESET_RELAY (smem_arb_reset);
|
||||
|
@ -205,7 +205,7 @@ module VX_mem_unit # (
|
|||
.NUM_REQS (2),
|
||||
.LANES (`NUM_THREADS),
|
||||
.DATA_SIZE (4),
|
||||
.TAG_IN_WIDTH (`DCORE_TAG_WIDTH),
|
||||
.TAG_IN_WIDTH (`DCACHE_CORE_TAG_WIDTH),
|
||||
.TYPE ("P"),
|
||||
.BUFFERED_REQ (2),
|
||||
.BUFFERED_RSP (1)
|
||||
|
@ -247,16 +247,16 @@ module VX_mem_unit # (
|
|||
);
|
||||
|
||||
VX_shared_mem #(
|
||||
.CACHE_ID (`SCACHE_ID),
|
||||
.CACHE_ID (`SMEM_ID),
|
||||
.CACHE_SIZE (`SMEM_SIZE),
|
||||
.NUM_BANKS (`SNUM_BANKS),
|
||||
.WORD_SIZE (`SWORD_SIZE),
|
||||
.NUM_REQS (`SNUM_REQS),
|
||||
.CREQ_SIZE (`SCREQ_SIZE),
|
||||
.CRSQ_SIZE (`SCRSQ_SIZE),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE),
|
||||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS-`SM_ENABLE),
|
||||
.BANK_ADDR_OFFSET (`SBANK_ADDR_OFFSET)
|
||||
.NUM_BANKS (`SMEM_NUM_BANKS),
|
||||
.WORD_SIZE (`SMEM_WORD_SIZE),
|
||||
.NUM_REQS (`SMEM_NUM_REQS),
|
||||
.CREQ_SIZE (`SMEM_CREQ_SIZE),
|
||||
.CRSQ_SIZE (`SMEM_CRSQ_SIZE),
|
||||
.CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE),
|
||||
.CORE_TAG_ID_BITS (`DCACHE_CORE_TAG_ID_BITS-`SM_ENABLE),
|
||||
.BANK_ADDR_OFFSET (`SMEM_BANK_ADDR_OFFSET)
|
||||
) smem (
|
||||
.clk (clk),
|
||||
.reset (smem_reset),
|
||||
|
@ -283,9 +283,9 @@ module VX_mem_unit # (
|
|||
);
|
||||
end else begin
|
||||
// core to D-cache request
|
||||
for (genvar i = 0; i < `DNUM_REQS; ++i) begin
|
||||
for (genvar i = 0; i < `DCACHE_NUM_REQS; ++i) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW ((32-`CLOG2(`DWORD_SIZE)) + 1 + `DWORD_SIZE + (8*`DWORD_SIZE) + `DCORE_TAG_WIDTH)
|
||||
.DATAW ((32-`CLOG2(`DCACHE_WORD_SIZE)) + 1 + `DCACHE_WORD_SIZE + (8*`DCACHE_WORD_SIZE) + `DCACHE_CORE_TAG_WIDTH)
|
||||
) req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -306,16 +306,16 @@ module VX_mem_unit # (
|
|||
assign dcache_rsp_tmp_if.ready = dcache_rsp_if.ready;
|
||||
end
|
||||
|
||||
wire [`DMEM_TAG_WIDTH-1:0] icache_mem_req_tag = `DMEM_TAG_WIDTH'(icache_mem_req_if.tag);
|
||||
wire [`DMEM_TAG_WIDTH-1:0] icache_mem_rsp_tag;
|
||||
assign icache_mem_rsp_if.tag = icache_mem_rsp_tag[`IMEM_TAG_WIDTH-1:0];
|
||||
wire [`DCACHE_MEM_TAG_WIDTH-1:0] icache_mem_req_tag = `DCACHE_MEM_TAG_WIDTH'(icache_mem_req_if.tag);
|
||||
wire [`DCACHE_MEM_TAG_WIDTH-1:0] icache_mem_rsp_tag;
|
||||
assign icache_mem_rsp_if.tag = icache_mem_rsp_tag[`ICACHE_MEM_TAG_WIDTH-1:0];
|
||||
`UNUSED_VAR (icache_mem_rsp_tag)
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (2),
|
||||
.DATA_WIDTH (`DMEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`DMEM_TAG_WIDTH),
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`DCACHE_MEM_TAG_WIDTH),
|
||||
.TYPE ("R"),
|
||||
.TAG_SEL_IDX (1), // Skip 0 for NC flag
|
||||
.BUFFERED_REQ (1),
|
||||
|
|
|
@ -15,30 +15,30 @@ module VX_pipeline #(
|
|||
output wire [`NUM_THREADS-1:0][3:0] dcache_req_byteen,
|
||||
output wire [`NUM_THREADS-1:0][29:0] dcache_req_addr,
|
||||
output wire [`NUM_THREADS-1:0][31:0] dcache_req_data,
|
||||
output wire [`NUM_THREADS-1:0][`DCORE_TAG_WIDTH-1:0] dcache_req_tag,
|
||||
output wire [`NUM_THREADS-1:0][`DCACHE_CORE_TAG_WIDTH-1:0] dcache_req_tag,
|
||||
input wire [`NUM_THREADS-1:0] dcache_req_ready,
|
||||
|
||||
// Dcache core reponse
|
||||
input wire dcache_rsp_valid,
|
||||
input wire [`NUM_THREADS-1:0] dcache_rsp_tmask,
|
||||
input wire [`NUM_THREADS-1:0][31:0] dcache_rsp_data,
|
||||
input wire [`DCORE_TAG_WIDTH-1:0] dcache_rsp_tag,
|
||||
input wire [`DCACHE_CORE_TAG_WIDTH-1:0] dcache_rsp_tag,
|
||||
output wire dcache_rsp_ready,
|
||||
|
||||
// Icache core request
|
||||
output wire icache_req_valid,
|
||||
output wire [29:0] icache_req_addr,
|
||||
output wire [`ICORE_TAG_WIDTH-1:0] icache_req_tag,
|
||||
output wire [`ICACHE_CORE_TAG_WIDTH-1:0] icache_req_tag,
|
||||
input wire icache_req_ready,
|
||||
|
||||
// Icache core response
|
||||
input wire icache_rsp_valid,
|
||||
input wire [31:0] icache_rsp_data,
|
||||
input wire [`ICORE_TAG_WIDTH-1:0] icache_rsp_tag,
|
||||
input wire [`ICACHE_CORE_TAG_WIDTH-1:0] icache_rsp_tag,
|
||||
output wire icache_rsp_ready,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_memsys_if.slave perf_memsys_if,
|
||||
`endif
|
||||
|
||||
// Status
|
||||
|
@ -51,7 +51,7 @@ module VX_pipeline #(
|
|||
VX_dcache_req_if #(
|
||||
.NUM_REQS (`NUM_THREADS),
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
|
||||
) dcache_req_if();
|
||||
|
||||
assign dcache_req_valid = dcache_req_if.valid;
|
||||
|
@ -69,7 +69,7 @@ module VX_pipeline #(
|
|||
VX_dcache_rsp_if #(
|
||||
.NUM_REQS (`NUM_THREADS),
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
|
||||
) dcache_rsp_if();
|
||||
|
||||
assign dcache_rsp_if.valid = dcache_rsp_valid;
|
||||
|
@ -84,7 +84,7 @@ module VX_pipeline #(
|
|||
|
||||
VX_icache_req_if #(
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`ICORE_TAG_WIDTH)
|
||||
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
|
||||
) icache_req_if();
|
||||
|
||||
assign icache_req_valid = icache_req_if.valid;
|
||||
|
@ -98,7 +98,7 @@ module VX_pipeline #(
|
|||
|
||||
VX_icache_rsp_if #(
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`ICORE_TAG_WIDTH)
|
||||
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
|
||||
) icache_rsp_if();
|
||||
|
||||
assign icache_rsp_if.valid = icache_rsp_valid;
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
|
||||
`ifndef NDEBUG
|
||||
`define DEBUG_BLOCK(x) /* verilator lint_off UNUSED */ \
|
||||
x \
|
||||
|
@ -48,18 +49,24 @@
|
|||
. x () \
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
`define ASSERT(cond, msg) \
|
||||
assert(cond) else $error msg
|
||||
|
||||
`define STATIC_ASSERT(cond, msg) \
|
||||
generate \
|
||||
if (!(cond)) $error msg; \
|
||||
endgenerate
|
||||
|
||||
`define RUNTIME_ASSERT(cond, msg) \
|
||||
always @(posedge clk) \
|
||||
always @(posedge clk) begin \
|
||||
assert(cond) else $error msg; \
|
||||
end
|
||||
|
||||
`define TRACING_ON /* verilator tracing_on */
|
||||
`define TRACING_OFF /* verilator tracing_off */
|
||||
|
||||
`else // SYNTHESIS
|
||||
|
||||
`define DEBUG_BLOCK(x)
|
||||
`define IGNORE_UNUSED_BEGIN
|
||||
`define IGNORE_UNUSED_END
|
||||
|
@ -68,10 +75,12 @@
|
|||
`define UNUSED_PARAM(x)
|
||||
`define UNUSED_VAR(x)
|
||||
`define UNUSED_PIN(x) . x ()
|
||||
`define ASSERT(cond, msg) if (cond);
|
||||
`define STATIC_ASSERT(cond, msg)
|
||||
`define RUNTIME_ASSERT(cond, msg)
|
||||
`define TRACING_ON
|
||||
`define TRACING_OFF
|
||||
|
||||
`endif // SYNTHESIS
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -131,12 +140,20 @@
|
|||
end \
|
||||
dpi_trace("}")
|
||||
|
||||
`define RESET_RELAY(signal) \
|
||||
wire signal; \
|
||||
`define RESET_RELAY(signal) \
|
||||
wire signal; \
|
||||
VX_reset_relay __``signal ( \
|
||||
.clk (clk), \
|
||||
.reset (reset), \
|
||||
.reset_o (signal) \
|
||||
.clk (clk), \
|
||||
.reset (reset), \
|
||||
.reset_o (signal) \
|
||||
)
|
||||
|
||||
`define POP_COUNT(out, in) \
|
||||
VX_popcount #( \
|
||||
.N ($bits(in)) \
|
||||
) __``out ( \
|
||||
.in_i (in), \
|
||||
.cnt_o (out) \
|
||||
)
|
||||
|
||||
`endif
|
|
@ -3,12 +3,12 @@
|
|||
module VX_scoreboard #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_ibuffer_if ibuffer_if,
|
||||
VX_writeback_if writeback_if,
|
||||
output wire delay
|
||||
VX_ibuffer_if.slave ibuffer_if,
|
||||
VX_writeback_if.slave writeback_if,
|
||||
output wire delay
|
||||
);
|
||||
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_regs, inuse_regs_n;
|
||||
|
||||
|
@ -61,15 +61,16 @@ module VX_scoreboard #(
|
|||
end
|
||||
`endif
|
||||
if (release_reg) begin
|
||||
assert(inuse_regs[writeback_if.wid][writeback_if.rd] != 0)
|
||||
else $error("%t: *** core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d",
|
||||
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd);
|
||||
`ASSERT(inuse_regs[writeback_if.wid][writeback_if.rd] != 0,
|
||||
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d",
|
||||
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd));
|
||||
end
|
||||
if (ibuffer_if.valid && ~ibuffer_if.ready) begin
|
||||
deadlock_ctr <= deadlock_ctr + 1;
|
||||
assert(deadlock_ctr < deadlock_timeout) else $error("%t: *** core%0d-deadlock: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
|
||||
`ASSERT(deadlock_ctr < deadlock_timeout,
|
||||
("%t: *** core%0d-deadlock: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
|
||||
$time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC, ibuffer_if.rd, ibuffer_if.wb,
|
||||
deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3);
|
||||
deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3));
|
||||
end else if (ibuffer_if.valid && ibuffer_if.ready) begin
|
||||
deadlock_ctr <= 0;
|
||||
end
|
||||
|
|
|
@ -10,10 +10,10 @@ module VX_smem_arb #(
|
|||
parameter BUFFERED_RSP = 0,
|
||||
parameter TYPE = "P",
|
||||
|
||||
localparam ADDR_WIDTH = (32-`CLOG2(DATA_SIZE)),
|
||||
localparam DATA_WIDTH = (8 * DATA_SIZE),
|
||||
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS),
|
||||
localparam TAG_OUT_WIDTH = TAG_IN_WIDTH - LOG_NUM_REQS
|
||||
parameter ADDR_WIDTH = (32-`CLOG2(DATA_SIZE)),
|
||||
parameter DATA_WIDTH = (8 * DATA_SIZE),
|
||||
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS),
|
||||
parameter TAG_OUT_WIDTH = TAG_IN_WIDTH - LOG_NUM_REQS
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
|
|
@ -5,19 +5,19 @@ module VX_warp_sched #(
|
|||
) (
|
||||
`SCOPE_IO_VX_warp_sched
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if,
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
VX_warp_ctl_if.slave warp_ctl_if,
|
||||
VX_wstall_if.slave wstall_if,
|
||||
VX_join_if.slave join_if,
|
||||
VX_branch_ctl_if.slave branch_ctl_if,
|
||||
|
||||
VX_ifetch_req_if ifetch_req_if,
|
||||
VX_ifetch_req_if.master ifetch_req_if,
|
||||
|
||||
VX_fetch_to_csr_if fetch_to_csr_if,
|
||||
VX_fetch_to_csr_if.master fetch_to_csr_if,
|
||||
|
||||
output wire busy
|
||||
output wire busy
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
@ -147,7 +147,8 @@ module VX_warp_sched #(
|
|||
`IGNORE_UNUSED_BEGIN
|
||||
wire [`NW_BITS:0] active_barrier_count;
|
||||
`IGNORE_UNUSED_END
|
||||
assign active_barrier_count = $countones(barrier_masks[warp_ctl_if.barrier.id]);
|
||||
wire [`NUM_WARPS-1:0] barrier_mask = barrier_masks[warp_ctl_if.barrier.id];
|
||||
`POP_COUNT(active_barrier_count, barrier_mask);
|
||||
|
||||
assign reached_barrier_limit = (active_barrier_count[`NW_BITS-1:0] == warp_ctl_if.barrier.size_m1);
|
||||
|
||||
|
@ -161,7 +162,7 @@ module VX_warp_sched #(
|
|||
|
||||
// split/join stack management
|
||||
|
||||
wire [(1+32+`NUM_THREADS-1):0] ipdom [`NUM_WARPS-1:0];
|
||||
wire [(1+32+`NUM_THREADS)-1:0] ipdom [`NUM_WARPS-1:0];
|
||||
|
||||
wire [`NUM_THREADS-1:0] curr_tmask = thread_masks[warp_ctl_if.wid];
|
||||
|
||||
|
@ -173,8 +174,8 @@ module VX_warp_sched #(
|
|||
wire pop = join_if.valid && (i == join_if.wid);
|
||||
|
||||
wire [`NUM_THREADS-1:0] else_tmask = warp_ctl_if.split.diverged ? warp_ctl_if.split.else_tmask : curr_tmask;
|
||||
wire [(1+32+`NUM_THREADS-1):0] q_end = {1'b0, 32'b0, curr_tmask};
|
||||
wire [(1+32+`NUM_THREADS-1):0] q_else = {1'b1, warp_ctl_if.split.pc, else_tmask};
|
||||
wire [(1+32+`NUM_THREADS)-1:0] q_end = {1'b0, 32'b0, curr_tmask};
|
||||
wire [(1+32+`NUM_THREADS)-1:0] q_else = {1'b1, warp_ctl_if.split.pc, else_tmask};
|
||||
|
||||
VX_ipdom_stack #(
|
||||
.WIDTH (1+32+`NUM_THREADS),
|
||||
|
|
|
@ -3,19 +3,19 @@
|
|||
module VX_writeback #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if alu_commit_if,
|
||||
VX_commit_if ld_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_commit_if.slave alu_commit_if,
|
||||
VX_commit_if.slave ld_commit_if,
|
||||
VX_commit_if.slave csr_commit_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_if.slave fpu_commit_if,
|
||||
`endif
|
||||
|
||||
// outputs
|
||||
VX_writeback_if writeback_if
|
||||
VX_writeback_if.master writeback_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
|
|
@ -29,15 +29,15 @@ module Vortex (
|
|||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_valid;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_rw;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2MEM_BYTEEN_WIDTH-1:0] per_cluster_mem_req_byteen;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2MEM_ADDR_WIDTH-1:0] per_cluster_mem_req_addr;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2MEM_DATA_WIDTH-1:0] per_cluster_mem_req_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2MEM_TAG_WIDTH-1:0] per_cluster_mem_req_tag;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_BYTEEN_WIDTH-1:0] per_cluster_mem_req_byteen;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_ADDR_WIDTH-1:0] per_cluster_mem_req_addr;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_DATA_WIDTH-1:0] per_cluster_mem_req_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_TAG_WIDTH-1:0] per_cluster_mem_req_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_ready;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2MEM_DATA_WIDTH-1:0] per_cluster_mem_rsp_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2MEM_TAG_WIDTH-1:0] per_cluster_mem_rsp_tag;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_DATA_WIDTH-1:0] per_cluster_mem_rsp_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_TAG_WIDTH-1:0] per_cluster_mem_rsp_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_ready;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
|
@ -81,22 +81,22 @@ module Vortex (
|
|||
`RESET_RELAY (l3_reset);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`L3CACHE_ID),
|
||||
.CACHE_SIZE (`L3CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`L3CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`L3NUM_BANKS),
|
||||
.NUM_PORTS (`L3NUM_PORTS),
|
||||
.WORD_SIZE (`L3WORD_SIZE),
|
||||
.NUM_REQS (`L3NUM_REQS),
|
||||
.CREQ_SIZE (`L3CREQ_SIZE),
|
||||
.CRSQ_SIZE (`L3CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L3MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L3MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L3MREQ_SIZE),
|
||||
.CACHE_ID (`L3_CACHE_ID),
|
||||
.CACHE_SIZE (`L3_CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`L3_CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`L3_NUM_BANKS),
|
||||
.NUM_PORTS (`L3_NUM_PORTS),
|
||||
.WORD_SIZE (`L3_WORD_SIZE),
|
||||
.NUM_REQS (`L3_NUM_REQS),
|
||||
.CREQ_SIZE (`L3_CREQ_SIZE),
|
||||
.CRSQ_SIZE (`L3_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L3_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L3_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L3_MREQ_SIZE),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`L2MEM_TAG_WIDTH),
|
||||
.CORE_TAG_WIDTH (`L2_MEM_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (0),
|
||||
.MEM_TAG_WIDTH (`L3MEM_TAG_WIDTH),
|
||||
.MEM_TAG_WIDTH (`L3_MEM_TAG_WIDTH),
|
||||
.NC_ENABLE (1)
|
||||
) l3cache (
|
||||
`SCOPE_BIND_Vortex_l3cache
|
||||
|
@ -146,9 +146,9 @@ module Vortex (
|
|||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (`NUM_CLUSTERS),
|
||||
.DATA_WIDTH (`L3MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`L3MEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`L2MEM_TAG_WIDTH),
|
||||
.DATA_WIDTH (`L3_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`L3_MEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`L2_MEM_TAG_WIDTH),
|
||||
.TYPE ("R"),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
|
|
|
@ -1,17 +1,16 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module Vortex_axi #(
|
||||
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
|
||||
parameter AXI_ADDR_WIDTH = 32,
|
||||
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH,
|
||||
localparam AXI_STROBE_WIDTH = (AXI_DATA_WIDTH / 8)
|
||||
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
|
||||
parameter AXI_ADDR_WIDTH = 32,
|
||||
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH,
|
||||
parameter AXI_STROBE_WIDTH = (`VX_MEM_DATA_WIDTH / 8)
|
||||
)(
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// AXI write address channel
|
||||
output wire m_axi_awvalid,
|
||||
// AXI write request address channel
|
||||
output wire [AXI_TID_WIDTH-1:0] m_axi_awid,
|
||||
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr,
|
||||
output wire [7:0] m_axi_awlen,
|
||||
|
@ -20,18 +19,24 @@ module Vortex_axi #(
|
|||
output wire m_axi_awlock,
|
||||
output wire [3:0] m_axi_awcache,
|
||||
output wire [2:0] m_axi_awprot,
|
||||
output wire [3:0] m_axi_awqos,
|
||||
output wire [3:0] m_axi_awqos,
|
||||
output wire m_axi_awvalid,
|
||||
input wire m_axi_awready,
|
||||
|
||||
// AXI write data channel
|
||||
output wire m_axi_wvalid,
|
||||
// AXI write request data channel
|
||||
output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata,
|
||||
output wire [AXI_STROBE_WIDTH-1:0] m_axi_wstrb,
|
||||
output wire m_axi_wlast,
|
||||
output wire m_axi_wlast,
|
||||
output wire m_axi_wvalid,
|
||||
input wire m_axi_wready,
|
||||
|
||||
// AXI write response channel
|
||||
input wire [AXI_TID_WIDTH-1:0] m_axi_bid,
|
||||
input wire [1:0] m_axi_bresp,
|
||||
input wire m_axi_bvalid,
|
||||
output wire m_axi_bready,
|
||||
|
||||
// AXI read address channel
|
||||
output wire m_axi_arvalid,
|
||||
// AXI read request channel
|
||||
output wire [AXI_TID_WIDTH-1:0] m_axi_arid,
|
||||
output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr,
|
||||
output wire [7:0] m_axi_arlen,
|
||||
|
@ -41,12 +46,15 @@ module Vortex_axi #(
|
|||
output wire [3:0] m_axi_arcache,
|
||||
output wire [2:0] m_axi_arprot,
|
||||
output wire [3:0] m_axi_arqos,
|
||||
output wire m_axi_arvalid,
|
||||
input wire m_axi_arready,
|
||||
|
||||
// AXI read data channel
|
||||
input wire m_axi_rvalid,
|
||||
// AXI read response channel
|
||||
input wire [AXI_TID_WIDTH-1:0] m_axi_rid,
|
||||
input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata,
|
||||
input wire [1:0] m_axi_rresp,
|
||||
input wire m_axi_rlast,
|
||||
input wire m_axi_rvalid,
|
||||
output wire m_axi_rready,
|
||||
|
||||
// Status
|
||||
|
@ -66,12 +74,14 @@ module Vortex_axi #(
|
|||
wire mem_rsp_ready;
|
||||
|
||||
VX_axi_adapter #(
|
||||
.VX_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
|
||||
.VX_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
|
||||
.VX_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
|
||||
.AXI_DATA_WIDTH (AXI_DATA_WIDTH),
|
||||
.AXI_ADDR_WIDTH (AXI_ADDR_WIDTH),
|
||||
.AXI_TID_WIDTH (AXI_TID_WIDTH)
|
||||
.VX_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
|
||||
.VX_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
|
||||
.VX_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
|
||||
.VX_BYTEEN_WIDTH (AXI_STROBE_WIDTH),
|
||||
.AXI_DATA_WIDTH (AXI_DATA_WIDTH),
|
||||
.AXI_ADDR_WIDTH (AXI_ADDR_WIDTH),
|
||||
.AXI_TID_WIDTH (AXI_TID_WIDTH),
|
||||
.AXI_STROBE_WIDTH (AXI_STROBE_WIDTH)
|
||||
) axi_adapter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -89,7 +99,6 @@ module Vortex_axi #(
|
|||
.mem_rsp_tag (mem_rsp_tag),
|
||||
.mem_rsp_ready (mem_rsp_ready),
|
||||
|
||||
.m_axi_awvalid (m_axi_awvalid),
|
||||
.m_axi_awid (m_axi_awid),
|
||||
.m_axi_awaddr (m_axi_awaddr),
|
||||
.m_axi_awlen (m_axi_awlen),
|
||||
|
@ -99,15 +108,20 @@ module Vortex_axi #(
|
|||
.m_axi_awcache (m_axi_awcache),
|
||||
.m_axi_awprot (m_axi_awprot),
|
||||
.m_axi_awqos (m_axi_awqos),
|
||||
.m_axi_awvalid (m_axi_awvalid),
|
||||
.m_axi_awready (m_axi_awready),
|
||||
|
||||
.m_axi_wvalid (m_axi_wvalid),
|
||||
.m_axi_wdata (m_axi_wdata),
|
||||
.m_axi_wstrb (m_axi_wstrb),
|
||||
.m_axi_wlast (m_axi_wlast),
|
||||
.m_axi_wvalid (m_axi_wvalid),
|
||||
.m_axi_wready (m_axi_wready),
|
||||
|
||||
.m_axi_bid (m_axi_bid),
|
||||
.m_axi_bresp (m_axi_bresp),
|
||||
.m_axi_bvalid (m_axi_bvalid),
|
||||
.m_axi_bready (m_axi_bready),
|
||||
|
||||
.m_axi_arvalid (m_axi_arvalid),
|
||||
.m_axi_arid (m_axi_arid),
|
||||
.m_axi_araddr (m_axi_araddr),
|
||||
.m_axi_arlen (m_axi_arlen),
|
||||
|
@ -117,11 +131,14 @@ module Vortex_axi #(
|
|||
.m_axi_arcache (m_axi_arcache),
|
||||
.m_axi_arprot (m_axi_arprot),
|
||||
.m_axi_arqos (m_axi_arqos),
|
||||
.m_axi_arvalid (m_axi_arvalid),
|
||||
.m_axi_arready (m_axi_arready),
|
||||
|
||||
.m_axi_rvalid (m_axi_rvalid),
|
||||
.m_axi_rid (m_axi_rid),
|
||||
.m_axi_rdata (m_axi_rdata),
|
||||
.m_axi_rresp (m_axi_rresp),
|
||||
.m_axi_rlast (m_axi_rlast),
|
||||
.m_axi_rvalid (m_axi_rvalid),
|
||||
.m_axi_rready (m_axi_rready)
|
||||
);
|
||||
|
||||
|
|
|
@ -8,8 +8,8 @@ module VX_avs_wrapper #(
|
|||
parameter REQ_TAG_WIDTH = 1,
|
||||
parameter RD_QUEUE_SIZE = 1,
|
||||
|
||||
localparam AVS_BYTEENW = (AVS_DATA_WIDTH / 8),
|
||||
localparam RD_QUEUE_ADDR_WIDTH = $clog2(RD_QUEUE_SIZE+1)
|
||||
parameter AVS_BYTEENW = (AVS_DATA_WIDTH / 8),
|
||||
parameter RD_QUEUE_ADDR_WIDTH = $clog2(RD_QUEUE_SIZE+1)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
|
|
@ -851,7 +851,7 @@ begin
|
|||
cci_wr_req_data <= t_ccip_clData'(cci_mem_rsp_data);
|
||||
|
||||
if (cci_wr_req_fire) begin
|
||||
assert(cci_wr_req_ctr != 0);
|
||||
`ASSERT(cci_wr_req_ctr != 0, ("runtime error"));
|
||||
cci_wr_req_ctr <= cci_wr_req_ctr - CCI_ADDR_WIDTH'(1);
|
||||
if (cci_wr_req_ctr == CCI_ADDR_WIDTH'(1)) begin
|
||||
cci_wr_req_done <= 1;
|
||||
|
|
17
hw/rtl/cache/VX_bank.v
vendored
17
hw/rtl/cache/VX_bank.v
vendored
|
@ -39,8 +39,8 @@ module VX_bank #(
|
|||
// bank offset from beginning of index range
|
||||
parameter BANK_ADDR_OFFSET = 0,
|
||||
|
||||
localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE),
|
||||
localparam WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
|
||||
parameter MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE),
|
||||
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
|
||||
) (
|
||||
`SCOPE_IO_VX_bank
|
||||
|
||||
|
@ -86,8 +86,7 @@ module VX_bank #(
|
|||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] mem_rsp_addr,
|
||||
input wire mem_rsp_valid,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id,
|
||||
input wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data,
|
||||
output wire mem_rsp_ready,
|
||||
|
@ -130,8 +129,12 @@ module VX_bank #(
|
|||
.ready_out (creq_ready),
|
||||
.valid_out (creq_valid)
|
||||
);
|
||||
|
||||
|
||||
wire mreq_alm_full;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] mem_rsp_addr;
|
||||
wire crsq_valid, crsq_ready;
|
||||
wire crsq_stall;
|
||||
|
||||
wire mshr_valid;
|
||||
wire mshr_ready;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id;
|
||||
|
@ -161,9 +164,6 @@ module VX_bank #(
|
|||
wire is_flush_st0;
|
||||
wire mshr_pending_st0, mshr_pending_st1;
|
||||
|
||||
wire crsq_valid, crsq_ready, crsq_stall;
|
||||
wire mreq_alm_full;
|
||||
|
||||
// prevent read-during-write hazard when accessing tags/data block RAMs
|
||||
wire rdw_fill_hazard = valid_st0 && is_fill_st0;
|
||||
wire rdw_write_hazard = valid_st0 && is_write_st0 && ~creq_rw;
|
||||
|
@ -398,6 +398,7 @@ module VX_bank #(
|
|||
// fill
|
||||
.fill_valid (mem_rsp_fire),
|
||||
.fill_id (mem_rsp_id),
|
||||
.fill_addr (mem_rsp_addr),
|
||||
|
||||
// dequeue
|
||||
.dequeue_valid (mshr_valid),
|
||||
|
|
58
hw/rtl/cache/VX_cache.v
vendored
58
hw/rtl/cache/VX_cache.v
vendored
|
@ -46,13 +46,13 @@ module VX_cache #(
|
|||
// enable bypass for non-cacheable addresses
|
||||
parameter NC_ENABLE = 0,
|
||||
|
||||
localparam WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
|
||||
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
|
||||
) (
|
||||
`SCOPE_IO_VX_cache
|
||||
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_cache_if,
|
||||
VX_perf_cache_if.master perf_cache_if,
|
||||
`endif
|
||||
|
||||
input wire clk,
|
||||
|
@ -94,7 +94,7 @@ module VX_cache #(
|
|||
`STATIC_ASSERT(NUM_PORTS <= NUM_BANKS, ("invalid value"))
|
||||
|
||||
localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE);
|
||||
localparam MEM_TAG_IN_WIDTH = `MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH;
|
||||
localparam MEM_TAG_IN_WIDTH = `BANK_SELECT_BITS + MSHR_ADDR_WIDTH;
|
||||
localparam CORE_TAG_X_WIDTH = CORE_TAG_WIDTH - NC_ENABLE;
|
||||
localparam CORE_TAG_ID_X_BITS = (CORE_TAG_ID_BITS != 0) ? (CORE_TAG_ID_BITS - NC_ENABLE) : CORE_TAG_ID_BITS;
|
||||
|
||||
|
@ -444,7 +444,6 @@ module VX_cache #(
|
|||
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
`UNUSED_VAR (mem_rsp_tag_qual)
|
||||
assign mrsq_out_ready = per_bank_mem_rsp_ready;
|
||||
end else begin
|
||||
assign mrsq_out_ready = per_bank_mem_rsp_ready[`MEM_TAG_TO_BANK_ID(mem_rsp_tag_qual)];
|
||||
|
@ -515,8 +514,7 @@ module VX_cache #(
|
|||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_mem_req_data;
|
||||
wire curr_bank_mem_req_ready;
|
||||
|
||||
wire curr_bank_mem_rsp_valid;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_mem_rsp_addr;
|
||||
wire curr_bank_mem_rsp_valid;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] curr_bank_mem_rsp_id;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] curr_bank_mem_rsp_data;
|
||||
wire curr_bank_mem_rsp_ready;
|
||||
|
@ -558,11 +556,9 @@ module VX_cache #(
|
|||
|
||||
// Memory response
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign curr_bank_mem_rsp_valid = mrsq_out_valid;
|
||||
assign curr_bank_mem_rsp_addr = `MEM_TAG_TO_LINE_ADDR(mem_rsp_tag_qual);
|
||||
assign curr_bank_mem_rsp_valid = mrsq_out_valid;
|
||||
end else begin
|
||||
assign curr_bank_mem_rsp_valid = mrsq_out_valid && (`MEM_TAG_TO_BANK_ID(mem_rsp_tag_qual) == i);
|
||||
assign curr_bank_mem_rsp_addr = `MEM_TAG_TO_LINE_ADDR(mem_rsp_tag_qual);
|
||||
end
|
||||
assign curr_bank_mem_rsp_id = `MEM_TAG_TO_REQ_ID(mem_rsp_tag_qual);
|
||||
assign curr_bank_mem_rsp_data = mem_rsp_data_qual;
|
||||
|
@ -633,7 +629,6 @@ module VX_cache #(
|
|||
|
||||
// Memory response
|
||||
.mem_rsp_valid (curr_bank_mem_rsp_valid),
|
||||
.mem_rsp_addr (curr_bank_mem_rsp_addr),
|
||||
.mem_rsp_id (curr_bank_mem_rsp_id),
|
||||
.mem_rsp_data (curr_bank_mem_rsp_data),
|
||||
.mem_rsp_ready (curr_bank_mem_rsp_ready),
|
||||
|
@ -668,7 +663,7 @@ module VX_cache #(
|
|||
.core_rsp_ready (core_rsp_ready_nc)
|
||||
);
|
||||
|
||||
wire [NUM_BANKS-1:0][(MEM_TAG_IN_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH))-1:0] data_in;
|
||||
wire [NUM_BANKS-1:0][(`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH))-1:0] data_in;
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_id[i], per_bank_mem_req_rw[i], per_bank_mem_req_pmask[i], per_bank_mem_req_byteen[i], per_bank_mem_req_wsel[i], per_bank_mem_req_data[i]};
|
||||
end
|
||||
|
@ -692,33 +687,42 @@ module VX_cache #(
|
|||
.ready_out (mem_req_ready_nc)
|
||||
);
|
||||
|
||||
assign mem_req_tag_nc = MEM_TAG_IN_WIDTH'({mem_req_addr_nc, mem_req_id});
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign mem_req_tag_nc = MEM_TAG_IN_WIDTH'(mem_req_id);
|
||||
end else begin
|
||||
assign mem_req_tag_nc = MEM_TAG_IN_WIDTH'({`MEM_ADDR_TO_BANK_ID(mem_req_addr_nc), mem_req_id});
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
// per cycle: core_reads, core_writes
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle;
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_core_writes_per_cycle;
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
|
||||
|
||||
assign perf_core_reads_per_cycle = $countones(core_req_valid & core_req_ready & ~core_req_rw);
|
||||
assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw);
|
||||
wire [NUM_REQS-1:0] perf_core_reads_per_mask = core_req_valid & core_req_ready & ~core_req_rw;
|
||||
wire [NUM_REQS-1:0] perf_core_writes_per_mask = core_req_valid & core_req_ready & core_req_rw;
|
||||
|
||||
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_mask);
|
||||
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_mask);
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
assign perf_crsp_stall_per_cycle = $countones(core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}});
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}};
|
||||
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
|
||||
end else begin
|
||||
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready);
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_valid & ~core_rsp_ready;
|
||||
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
|
||||
end
|
||||
|
||||
// per cycle: read misses, write misses, msrq stalls, pipeline stalls
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_read_miss_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_write_miss_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_mshr_stall_per_cycle;
|
||||
reg [($clog2(NUM_BANKS+1)-1):0] perf_pipe_stall_per_cycle;
|
||||
wire [$clog2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle;
|
||||
wire [$clog2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle;
|
||||
wire [$clog2(NUM_BANKS+1)-1:0] perf_mshr_stall_per_cycle;
|
||||
wire [$clog2(NUM_BANKS+1)-1:0] perf_pipe_stall_per_cycle;
|
||||
|
||||
assign perf_read_miss_per_cycle = $countones(perf_read_miss_per_bank);
|
||||
assign perf_write_miss_per_cycle = $countones(perf_write_miss_per_bank);
|
||||
assign perf_mshr_stall_per_cycle = $countones(perf_mshr_stall_per_bank);
|
||||
assign perf_pipe_stall_per_cycle = $countones(perf_pipe_stall_per_bank);
|
||||
`POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank);
|
||||
`POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank);
|
||||
`POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank);
|
||||
`POP_COUNT(perf_pipe_stall_per_cycle, perf_pipe_stall_per_bank);
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_writes;
|
||||
|
|
4
hw/rtl/cache/VX_cache_define.vh
vendored
4
hw/rtl/cache/VX_cache_define.vh
vendored
|
@ -61,12 +61,12 @@
|
|||
|
||||
`define LINE_TO_MEM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)}
|
||||
|
||||
`define MEM_ADDR_TO_BANK_ID(x) x[0 +: `BANK_SELECT_BITS]
|
||||
|
||||
`define MEM_TAG_TO_REQ_ID(x) x[MSHR_ADDR_WIDTH-1:0]
|
||||
|
||||
`define MEM_TAG_TO_BANK_ID(x) x[MSHR_ADDR_WIDTH +: `BANK_SELECT_BITS]
|
||||
|
||||
`define MEM_TAG_TO_LINE_ADDR(x) x[(MSHR_ADDR_WIDTH+`BANK_SELECT_BITS) +: `LINE_ADDR_WIDTH]
|
||||
|
||||
`define LINE_TO_BYTE_ADDR(x, i) {x, (32-$bits(x))'(i << (32-$bits(x)-`BANK_SELECT_BITS))}
|
||||
|
||||
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
|
||||
|
|
6
hw/rtl/cache/VX_core_req_bank_sel.v
vendored
6
hw/rtl/cache/VX_core_req_bank_sel.v
vendored
|
@ -291,12 +291,16 @@ module VX_core_req_bank_sel #(
|
|||
end
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] bank_stalls_r;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] bank_stall_cnt;
|
||||
|
||||
wire [NUM_REQS-1:0] bank_stall_mask = core_req_sel_r & ~core_req_ready;
|
||||
`POP_COUNT(bank_stall_cnt, bank_stall_mask);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
bank_stalls_r <= 0;
|
||||
end else begin
|
||||
bank_stalls_r <= bank_stalls_r + `PERF_CTR_BITS'($countones(core_req_sel_r & ~core_req_ready));
|
||||
bank_stalls_r <= bank_stalls_r + `PERF_CTR_BITS'(bank_stall_cnt);
|
||||
end
|
||||
end
|
||||
|
||||
|
|
2
hw/rtl/cache/VX_data_access.v
vendored
2
hw/rtl/cache/VX_data_access.v
vendored
|
@ -16,7 +16,7 @@ module VX_data_access #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
localparam WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
|
||||
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
|
13
hw/rtl/cache/VX_miss_resrv.v
vendored
13
hw/rtl/cache/VX_miss_resrv.v
vendored
|
@ -20,7 +20,7 @@ module VX_miss_resrv #(
|
|||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
|
||||
localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE)
|
||||
parameter MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -46,6 +46,7 @@ module VX_miss_resrv #(
|
|||
// fill
|
||||
input wire fill_valid,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] fill_id,
|
||||
output wire [`LINE_ADDR_WIDTH-1:0] fill_addr,
|
||||
|
||||
// lookup
|
||||
input wire lookup_valid,
|
||||
|
@ -161,8 +162,8 @@ module VX_miss_resrv #(
|
|||
dequeue_id_r <= dequeue_id_n;
|
||||
allocate_id_r <= allocate_id_n;
|
||||
|
||||
assert(!allocate_fire || !valid_table[allocate_id_r]);
|
||||
assert(!release_valid || valid_table[release_id]);
|
||||
`ASSERT(!allocate_fire || !valid_table[allocate_id_r], ("runtime error"));
|
||||
`ASSERT(!release_valid || valid_table[release_id], ("runtime error"));
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT((!allocate_fire || ~valid_table[allocate_id]), ("%t: *** cache%0d:%0d in-use allocation: addr=%0h, id=%0d", $time, CACHE_ID, BANK_ID,
|
||||
|
@ -184,6 +185,8 @@ module VX_miss_resrv #(
|
|||
.rdata (dequeue_data)
|
||||
);
|
||||
|
||||
assign fill_addr = addr_table[fill_id];
|
||||
|
||||
assign allocate_ready = allocate_rdy_r;
|
||||
assign allocate_id = allocate_id_r;
|
||||
|
||||
|
@ -206,8 +209,8 @@ module VX_miss_resrv #(
|
|||
dpi_trace("%d: cache%0d:%0d mshr-allocate: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(allocate_addr, BANK_ID), allocate_id, deq_debug_wid, deq_debug_pc);
|
||||
if (fill_valid)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-fill: addr=%0h, id=%0d\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id);
|
||||
dpi_trace("%d: cache%0d:%0d mshr-fill: addr=%0h, id=%0d, addr=%0h\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id, `LINE_TO_BYTE_ADDR(fill_addr, BANK_ID));
|
||||
if (dequeue_fire)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-dequeue: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_debug_wid, deq_debug_pc);
|
||||
|
|
8
hw/rtl/cache/VX_nc_bypass.v
vendored
8
hw/rtl/cache/VX_nc_bypass.v
vendored
|
@ -15,10 +15,10 @@ module VX_nc_bypass #(
|
|||
parameter MEM_TAG_IN_WIDTH = 1,
|
||||
parameter MEM_TAG_OUT_WIDTH = 1,
|
||||
|
||||
localparam CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
|
||||
localparam MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
|
||||
localparam CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1,
|
||||
localparam MEM_SELECT_BITS = `UP(`CLOG2(MEM_DATA_SIZE / CORE_DATA_SIZE))
|
||||
parameter CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
|
||||
parameter MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
|
||||
parameter CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1,
|
||||
parameter MEM_SELECT_BITS = `UP(`CLOG2(MEM_DATA_SIZE / CORE_DATA_SIZE))
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
|
20
hw/rtl/cache/VX_shared_mem.v
vendored
20
hw/rtl/cache/VX_shared_mem.v
vendored
|
@ -31,7 +31,7 @@ module VX_shared_mem #(
|
|||
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_cache_if,
|
||||
VX_perf_cache_if.master perf_cache_if,
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
|
@ -337,16 +337,22 @@ module VX_shared_mem #(
|
|||
|
||||
`ifdef PERF_ENABLE
|
||||
// per cycle: core_reads, core_writes
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle, perf_core_writes_per_cycle;
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
|
||||
|
||||
assign perf_core_reads_per_cycle = $countones(core_req_valid & core_req_ready & ~core_req_rw);
|
||||
assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw);
|
||||
wire [NUM_REQS-1:0] perf_core_reads_per_mask = core_req_valid & core_req_ready & ~core_req_rw;
|
||||
wire [NUM_REQS-1:0] perf_core_writes_per_mask = core_req_valid & core_req_ready & core_req_rw;
|
||||
|
||||
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_mask);
|
||||
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_mask);
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
assign perf_crsp_stall_per_cycle = $countones(core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}});
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}};
|
||||
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
|
||||
end else begin
|
||||
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready);
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_valid & ~core_rsp_ready;
|
||||
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
|
||||
end
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
|
||||
|
|
|
@ -22,6 +22,44 @@ interface VX_alu_req_if ();
|
|||
wire wb;
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
output next_PC,
|
||||
output op_type,
|
||||
output op_mod,
|
||||
output use_PC,
|
||||
output use_imm,
|
||||
output imm,
|
||||
output tid,
|
||||
output rs1_data,
|
||||
output rs2_data,
|
||||
output rd,
|
||||
output wb,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
input next_PC,
|
||||
input op_type,
|
||||
input op_mod,
|
||||
input use_PC,
|
||||
input use_imm,
|
||||
input imm,
|
||||
input tid,
|
||||
input rs1_data,
|
||||
input rs2_data,
|
||||
input rd,
|
||||
input wb,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -10,6 +10,20 @@ interface VX_branch_ctl_if ();
|
|||
wire taken;
|
||||
wire [31:0] dest;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output wid,
|
||||
output taken,
|
||||
output dest
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input wid,
|
||||
input taken,
|
||||
input dest
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -8,6 +8,16 @@ interface VX_cmt_to_csr_if ();
|
|||
wire valid;
|
||||
wire [$clog2(`NUM_THREADS+1)-1:0] commit_size;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output commit_size
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input commit_size
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -13,7 +13,31 @@ interface VX_commit_if ();
|
|||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
wire eop;
|
||||
wire ready;
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
output data,
|
||||
output rd,
|
||||
output wb,
|
||||
output eop,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
input data,
|
||||
input rd,
|
||||
input wb,
|
||||
input eop,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -17,6 +17,36 @@ interface VX_csr_req_if ();
|
|||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
output op_type,
|
||||
output addr,
|
||||
output rs1_data,
|
||||
output use_imm,
|
||||
output imm,
|
||||
output rd,
|
||||
output wb,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
input op_type,
|
||||
input addr,
|
||||
input rs1_data,
|
||||
input use_imm,
|
||||
input imm,
|
||||
input rd,
|
||||
input wb,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -17,6 +17,26 @@ interface VX_dcache_req_if #(
|
|||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] tag;
|
||||
wire [NUM_REQS-1:0] ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output rw,
|
||||
output byteen,
|
||||
output addr,
|
||||
output data,
|
||||
output tag,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input rw,
|
||||
input byteen,
|
||||
input addr,
|
||||
input data,
|
||||
input tag,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -15,6 +15,22 @@ interface VX_dcache_rsp_if #(
|
|||
wire [TAG_WIDTH-1:0] tag;
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output tmask,
|
||||
output data,
|
||||
output tag,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input tmask,
|
||||
input data,
|
||||
input tag,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -22,6 +22,44 @@ interface VX_decode_if ();
|
|||
wire [`NR_BITS-1:0] rs3;
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
output ex_type,
|
||||
output op_type,
|
||||
output op_mod,
|
||||
output wb,
|
||||
output use_PC,
|
||||
output use_imm,
|
||||
output imm,
|
||||
output rd,
|
||||
output rs1,
|
||||
output rs2,
|
||||
output rs3,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
input ex_type,
|
||||
input op_type,
|
||||
input op_mod,
|
||||
input wb,
|
||||
input use_PC,
|
||||
input use_imm,
|
||||
input imm,
|
||||
input rd,
|
||||
input rs1,
|
||||
input rs2,
|
||||
input rs3,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -7,6 +7,14 @@ interface VX_fetch_to_csr_if ();
|
|||
|
||||
wire [`NUM_WARPS-1:0][`NUM_THREADS-1:0] thread_masks;
|
||||
|
||||
modport master (
|
||||
output thread_masks
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input thread_masks
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -18,6 +18,36 @@ interface VX_fpu_req_if ();
|
|||
wire wb;
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
output op_type,
|
||||
output op_mod,
|
||||
output rs1_data,
|
||||
output rs2_data,
|
||||
output rs3_data,
|
||||
output rd,
|
||||
output wb,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
input op_type,
|
||||
input op_mod,
|
||||
input rs1_data,
|
||||
input rs2_data,
|
||||
input rs3_data,
|
||||
input rd,
|
||||
input wb,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -12,6 +12,22 @@ interface VX_fpu_to_csr_if ();
|
|||
wire [`NW_BITS-1:0] read_wid;
|
||||
wire [`INST_FRM_BITS-1:0] read_frm;
|
||||
|
||||
modport master (
|
||||
output write_enable,
|
||||
output write_wid,
|
||||
output write_fflags,
|
||||
output read_wid,
|
||||
input read_frm
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input write_enable,
|
||||
input write_wid,
|
||||
input write_fflags,
|
||||
input read_wid,
|
||||
output read_frm
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -8,7 +8,21 @@ interface VX_gpr_req_if ();
|
|||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [`NR_BITS-1:0] rs2;
|
||||
wire [`NR_BITS-1:0] rs3;
|
||||
wire [`NR_BITS-1:0] rs3;
|
||||
|
||||
modport master (
|
||||
output wid,
|
||||
output rs1,
|
||||
output rs2,
|
||||
output rs3
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input wid,
|
||||
input rs1,
|
||||
input rs2,
|
||||
input rs3
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -9,6 +9,18 @@ interface VX_gpr_rsp_if ();
|
|||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
|
||||
modport master (
|
||||
output rs1_data,
|
||||
output rs2_data,
|
||||
output rs3_data
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input rs1_data,
|
||||
input rs2_data,
|
||||
input rs3_data
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -20,6 +20,36 @@ interface VX_gpu_req_if();
|
|||
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
output next_PC,
|
||||
output op_type,
|
||||
output tid,
|
||||
output rs1_data,
|
||||
output rs2_data,
|
||||
output rd,
|
||||
output wb,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
input next_PC,
|
||||
input op_type,
|
||||
input tid,
|
||||
input rs1_data,
|
||||
input rs2_data,
|
||||
input rd,
|
||||
input wb,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -20,14 +20,62 @@ interface VX_ibuffer_if ();
|
|||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [`NR_BITS-1:0] rs2;
|
||||
wire [`NR_BITS-1:0] rs3;
|
||||
wire ready;
|
||||
|
||||
// scoreboard forwarding
|
||||
|
||||
wire [`NR_BITS-1:0] rd_n;
|
||||
wire [`NR_BITS-1:0] rs1_n;
|
||||
wire [`NR_BITS-1:0] rs2_n;
|
||||
wire [`NR_BITS-1:0] rs3_n;
|
||||
wire [`NW_BITS-1:0] wid_n;
|
||||
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
output ex_type,
|
||||
output op_type,
|
||||
output op_mod,
|
||||
output wb,
|
||||
output use_PC,
|
||||
output use_imm,
|
||||
output imm,
|
||||
output rd,
|
||||
output rs1,
|
||||
output rs2,
|
||||
output rs3,
|
||||
output rd_n,
|
||||
output rs1_n,
|
||||
output rs2_n,
|
||||
output rs3_n,
|
||||
output wid_n,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
input ex_type,
|
||||
input op_type,
|
||||
input op_mod,
|
||||
input wb,
|
||||
input use_PC,
|
||||
input use_imm,
|
||||
input imm,
|
||||
input rd,
|
||||
input rs1,
|
||||
input rs2,
|
||||
input rs3,
|
||||
input rd_n,
|
||||
input rs1_n,
|
||||
input rs2_n,
|
||||
input rs3_n,
|
||||
input wid_n,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -13,6 +13,20 @@ interface VX_icache_req_if #(
|
|||
wire [TAG_WIDTH-1:0] tag;
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output addr,
|
||||
output tag,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input addr,
|
||||
input tag,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -11,7 +11,21 @@ interface VX_icache_rsp_if #(
|
|||
wire valid;
|
||||
wire [`WORD_WIDTH-1:0] data;
|
||||
wire [TAG_WIDTH-1:0] tag;
|
||||
wire ready;
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output data,
|
||||
output tag,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input data,
|
||||
input tag,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -11,6 +11,22 @@ interface VX_ifetch_req_if ();
|
|||
wire [31:0] PC;
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output tmask,
|
||||
output wid,
|
||||
output PC,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input tmask,
|
||||
input wid,
|
||||
input PC,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -12,6 +12,24 @@ interface VX_ifetch_rsp_if ();
|
|||
wire [31:0] data;
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output tmask,
|
||||
output wid,
|
||||
output PC,
|
||||
output data,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input tmask,
|
||||
input wid,
|
||||
input PC,
|
||||
input data,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -8,6 +8,16 @@ interface VX_join_if ();
|
|||
wire valid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output wid
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input wid
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -18,6 +18,36 @@ interface VX_lsu_req_if ();
|
|||
wire wb;
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
output op_type,
|
||||
output is_fence,
|
||||
output store_data,
|
||||
output base_addr,
|
||||
output offset,
|
||||
output rd,
|
||||
output wb,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
input op_type,
|
||||
input is_fence,
|
||||
input store_data,
|
||||
input base_addr,
|
||||
input offset,
|
||||
input rd,
|
||||
input wb,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -18,6 +18,26 @@ interface VX_mem_req_if #(
|
|||
wire [TAG_WIDTH-1:0] tag;
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output rw,
|
||||
output byteen,
|
||||
output addr,
|
||||
output data,
|
||||
output tag,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input rw,
|
||||
input byteen,
|
||||
input addr,
|
||||
input data,
|
||||
input tag,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -11,7 +11,21 @@ interface VX_mem_rsp_if #(
|
|||
wire valid;
|
||||
wire [DATA_WIDTH-1:0] data;
|
||||
wire [TAG_WIDTH-1:0] tag;
|
||||
wire ready;
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output data,
|
||||
output tag,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input data,
|
||||
input tag,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -14,6 +14,28 @@ interface VX_perf_cache_if ();
|
|||
wire [`PERF_CTR_BITS-1:0] pipe_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] crsp_stalls;
|
||||
|
||||
modport master (
|
||||
output reads,
|
||||
output writes,
|
||||
output read_misses,
|
||||
output write_misses,
|
||||
output bank_stalls,
|
||||
output mshr_stalls,
|
||||
output pipe_stalls,
|
||||
output crsp_stalls
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input reads,
|
||||
input writes,
|
||||
input read_misses,
|
||||
input write_misses,
|
||||
input bank_stalls,
|
||||
input mshr_stalls,
|
||||
input pipe_stalls,
|
||||
input crsp_stalls
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -28,6 +28,50 @@ interface VX_perf_memsys_if ();
|
|||
wire [`PERF_CTR_BITS-1:0] mem_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] mem_latency;
|
||||
|
||||
modport master (
|
||||
output icache_reads,
|
||||
output icache_read_misses,
|
||||
output icache_pipe_stalls,
|
||||
output icache_crsp_stalls,
|
||||
output dcache_reads,
|
||||
output dcache_writes,
|
||||
output dcache_read_misses,
|
||||
output dcache_write_misses,
|
||||
output dcache_bank_stalls,
|
||||
output dcache_mshr_stalls,
|
||||
output dcache_pipe_stalls,
|
||||
output dcache_crsp_stalls,
|
||||
output smem_reads,
|
||||
output smem_writes,
|
||||
output smem_bank_stalls,
|
||||
output mem_reads,
|
||||
output mem_writes,
|
||||
output mem_stalls,
|
||||
output mem_latency
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input icache_reads,
|
||||
input icache_read_misses,
|
||||
input icache_pipe_stalls,
|
||||
input icache_crsp_stalls,
|
||||
input dcache_reads,
|
||||
input dcache_writes,
|
||||
input dcache_read_misses,
|
||||
input dcache_write_misses,
|
||||
input dcache_bank_stalls,
|
||||
input dcache_mshr_stalls,
|
||||
input dcache_pipe_stalls,
|
||||
input dcache_crsp_stalls,
|
||||
input smem_reads,
|
||||
input smem_writes,
|
||||
input smem_bank_stalls,
|
||||
input mem_reads,
|
||||
input mem_writes,
|
||||
input mem_stalls,
|
||||
input mem_latency
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -4,15 +4,41 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
interface VX_perf_pipeline_if ();
|
||||
|
||||
wire [`PERF_CTR_BITS-1:0] ibf_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] scb_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] lsu_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] csr_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] alu_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] gpu_stalls;
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire [`PERF_CTR_BITS-1:0] fpu_stalls;
|
||||
`endif
|
||||
wire [`PERF_CTR_BITS-1:0] gpu_stalls;
|
||||
|
||||
modport master (
|
||||
output ibf_stalls,
|
||||
output scb_stalls,
|
||||
output lsu_stalls,
|
||||
output csr_stalls,
|
||||
output alu_stalls,
|
||||
`ifdef EXT_F_ENABLE
|
||||
output fpu_stalls,
|
||||
`endif
|
||||
output gpu_stalls
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input ibf_stalls,
|
||||
input scb_stalls,
|
||||
input lsu_stalls,
|
||||
input csr_stalls,
|
||||
input alu_stalls,
|
||||
`ifdef EXT_F_ENABLE
|
||||
input fpu_stalls,
|
||||
`endif
|
||||
input gpu_stalls
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -12,6 +12,24 @@ interface VX_warp_ctl_if ();
|
|||
gpu_barrier_t barrier;
|
||||
gpu_split_t split;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output wid,
|
||||
output tmc,
|
||||
output wspawn,
|
||||
output barrier,
|
||||
output split
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input wid,
|
||||
input tmc,
|
||||
input wspawn,
|
||||
input barrier,
|
||||
input split
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -6,16 +6,36 @@
|
|||
interface VX_writeback_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [31:0] PC;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire eop;
|
||||
|
||||
wire eop;
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output tmask,
|
||||
output wid,
|
||||
output PC,
|
||||
output rd,
|
||||
output data,
|
||||
output eop,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input tmask,
|
||||
input wid,
|
||||
input PC,
|
||||
input rd,
|
||||
input data,
|
||||
input eop,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
|
|
|
@ -9,6 +9,18 @@ interface VX_wstall_if();
|
|||
wire [`NW_BITS-1:0] wid;
|
||||
wire stalled;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output wid,
|
||||
output stalled
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input wid,
|
||||
input stalled
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -1,15 +1,15 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_axi_adapter #(
|
||||
parameter VX_DATA_WIDTH = 512,
|
||||
parameter VX_ADDR_WIDTH = (32 - $clog2(VX_DATA_WIDTH/8)),
|
||||
parameter VX_TAG_WIDTH = 8,
|
||||
parameter AXI_DATA_WIDTH = VX_DATA_WIDTH,
|
||||
parameter AXI_ADDR_WIDTH = 32,
|
||||
parameter AXI_TID_WIDTH = VX_TAG_WIDTH,
|
||||
parameter VX_DATA_WIDTH = 512,
|
||||
parameter VX_ADDR_WIDTH = (32 - $clog2(VX_DATA_WIDTH/8)),
|
||||
parameter VX_TAG_WIDTH = 8,
|
||||
parameter AXI_DATA_WIDTH = VX_DATA_WIDTH,
|
||||
parameter AXI_ADDR_WIDTH = 32,
|
||||
parameter AXI_TID_WIDTH = VX_TAG_WIDTH,
|
||||
|
||||
localparam VX_BYTEEN_WIDTH = (VX_DATA_WIDTH / 8),
|
||||
localparam AXI_STROBE_WIDTH = (AXI_DATA_WIDTH / 8)
|
||||
parameter VX_BYTEEN_WIDTH = (VX_DATA_WIDTH / 8),
|
||||
parameter AXI_STROBE_WIDTH = (AXI_DATA_WIDTH / 8)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -29,8 +29,7 @@ module VX_axi_adapter #(
|
|||
output wire [VX_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_req_ready,
|
||||
|
||||
// AXI write address channel
|
||||
output wire m_axi_awvalid,
|
||||
// AXI write request address channel
|
||||
output wire [AXI_TID_WIDTH-1:0] m_axi_awid,
|
||||
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr,
|
||||
output wire [7:0] m_axi_awlen,
|
||||
|
@ -39,18 +38,24 @@ module VX_axi_adapter #(
|
|||
output wire m_axi_awlock,
|
||||
output wire [3:0] m_axi_awcache,
|
||||
output wire [2:0] m_axi_awprot,
|
||||
output wire [3:0] m_axi_awqos,
|
||||
output wire [3:0] m_axi_awqos,
|
||||
output wire m_axi_awvalid,
|
||||
input wire m_axi_awready,
|
||||
|
||||
// AXI write data channel
|
||||
output wire m_axi_wvalid,
|
||||
// AXI write request data channel
|
||||
output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata,
|
||||
output wire [AXI_STROBE_WIDTH-1:0] m_axi_wstrb,
|
||||
output wire m_axi_wlast,
|
||||
output wire m_axi_wlast,
|
||||
output wire m_axi_wvalid,
|
||||
input wire m_axi_wready,
|
||||
|
||||
// AXI write response channel
|
||||
input wire [AXI_TID_WIDTH-1:0] m_axi_bid,
|
||||
input wire [1:0] m_axi_bresp,
|
||||
input wire m_axi_bvalid,
|
||||
output wire m_axi_bready,
|
||||
|
||||
// AXI read address channel
|
||||
output wire m_axi_arvalid,
|
||||
output wire [AXI_TID_WIDTH-1:0] m_axi_arid,
|
||||
output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr,
|
||||
output wire [7:0] m_axi_arlen,
|
||||
|
@ -60,12 +65,15 @@ module VX_axi_adapter #(
|
|||
output wire [3:0] m_axi_arcache,
|
||||
output wire [2:0] m_axi_arprot,
|
||||
output wire [3:0] m_axi_arqos,
|
||||
output wire m_axi_arvalid,
|
||||
input wire m_axi_arready,
|
||||
|
||||
// AXI read data channel
|
||||
input wire m_axi_rvalid,
|
||||
// AXI read response channel
|
||||
input wire [AXI_TID_WIDTH-1:0] m_axi_rid,
|
||||
input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata,
|
||||
input wire [1:0] m_axi_rresp,
|
||||
input wire m_axi_rlast,
|
||||
input wire m_axi_rvalid,
|
||||
output wire m_axi_rready
|
||||
);
|
||||
localparam AXSIZE = $clog2(VX_DATA_WIDTH/8);
|
||||
|
@ -73,6 +81,8 @@ module VX_axi_adapter #(
|
|||
`STATIC_ASSERT((AXI_DATA_WIDTH == VX_DATA_WIDTH), ("invalid parameter"))
|
||||
`STATIC_ASSERT((AXI_TID_WIDTH == VX_TAG_WIDTH), ("invalid parameter"))
|
||||
|
||||
//`UNUSED_VAR ()
|
||||
|
||||
reg awvalid_ack;
|
||||
reg wvalid_ack;
|
||||
|
||||
|
@ -95,7 +105,7 @@ module VX_axi_adapter #(
|
|||
|
||||
wire axi_write_ready = (m_axi_awready || awvalid_ack) && (m_axi_wready || wvalid_ack);
|
||||
|
||||
// AXI write address channel
|
||||
// AXI write request address channel
|
||||
assign m_axi_awvalid = mem_req_valid && mem_req_rw && !awvalid_ack;
|
||||
assign m_axi_awid = mem_req_tag;
|
||||
assign m_axi_awaddr = AXI_ADDR_WIDTH'(mem_req_addr) << AXSIZE;
|
||||
|
@ -107,13 +117,18 @@ module VX_axi_adapter #(
|
|||
assign m_axi_awprot = 3'b0;
|
||||
assign m_axi_awqos = 4'b0;
|
||||
|
||||
// AXI write data channel
|
||||
// AXI write request data channel
|
||||
assign m_axi_wvalid = mem_req_valid && mem_req_rw && !wvalid_ack;
|
||||
assign m_axi_wdata = mem_req_data;
|
||||
assign m_axi_wstrb = mem_req_byteen;
|
||||
assign m_axi_wlast = 1'b1;
|
||||
|
||||
// AXI read address channel
|
||||
// AXI write response channel
|
||||
`UNUSED_VAR (m_axi_bid);
|
||||
`RUNTIME_ASSERT(~m_axi_bvalid || m_axi_bresp == 0, ("AXI response error"));
|
||||
assign m_axi_bready = 1'b1;
|
||||
|
||||
// AXI read request channel
|
||||
assign m_axi_arvalid = mem_req_valid && !mem_req_rw;
|
||||
assign m_axi_arid = mem_req_tag;
|
||||
assign m_axi_araddr = AXI_ADDR_WIDTH'(mem_req_addr) << AXSIZE;
|
||||
|
@ -125,10 +140,12 @@ module VX_axi_adapter #(
|
|||
assign m_axi_arprot = 3'b0;
|
||||
assign m_axi_arqos = 4'b0;
|
||||
|
||||
// AXI read data channel
|
||||
// AXI read response channel
|
||||
assign mem_rsp_valid = m_axi_rvalid;
|
||||
assign mem_rsp_tag = m_axi_rid;
|
||||
assign mem_rsp_data = m_axi_rdata;
|
||||
`RUNTIME_ASSERT(~m_axi_rvalid || m_axi_rresp == 0, ("AXI response error"));
|
||||
`UNUSED_VAR (m_axi_rlast);
|
||||
assign m_axi_rready = mem_rsp_ready;
|
||||
|
||||
// Vortex request ack
|
||||
|
|
|
@ -31,7 +31,7 @@ module VX_bypass_buffer #(
|
|||
buffer_valid <= 0;
|
||||
end
|
||||
if (valid_in && ~ready_out) begin
|
||||
assert(!buffer_valid);
|
||||
`ASSERT(!buffer_valid, "runtime error");
|
||||
buffer_valid <= 1;
|
||||
end
|
||||
end
|
||||
|
|
|
@ -28,7 +28,9 @@ module VX_dp_ram #(
|
|||
if (INIT_FILE != "") begin \
|
||||
initial $readmemh(INIT_FILE, ram); \
|
||||
end else begin \
|
||||
initial ram = '{default: INIT_VALUE}; \
|
||||
initial \
|
||||
for (integer i = 0; i < SIZE; ++i)\
|
||||
ram[i] = INIT_VALUE; \
|
||||
end \
|
||||
end
|
||||
|
||||
|
|
|
@ -35,8 +35,8 @@ module VX_fifo_queue #(
|
|||
head_r <= 0;
|
||||
size_r <= 0;
|
||||
end else begin
|
||||
assert(!push || !full);
|
||||
assert(!pop || !empty);
|
||||
`ASSERT(!push || !full, ("runtime error"));
|
||||
`ASSERT(!pop || !empty, ("runtime error"));
|
||||
if (push) begin
|
||||
if (!pop) begin
|
||||
size_r <= 1;
|
||||
|
@ -71,8 +71,8 @@ module VX_fifo_queue #(
|
|||
alm_full_r <= 0;
|
||||
used_r <= 0;
|
||||
end else begin
|
||||
assert(!push || !full);
|
||||
assert(!pop || !empty);
|
||||
`ASSERT(!push || !full, ("runtime error"));
|
||||
`ASSERT(!pop || !empty, ("runtime error"));
|
||||
if (push) begin
|
||||
if (!pop) begin
|
||||
empty_r <= 0;
|
||||
|
|
|
@ -5,7 +5,7 @@ module VX_find_first #(
|
|||
parameter N = 1,
|
||||
parameter DATAW = 1,
|
||||
parameter REVERSE = 0,
|
||||
localparam LOGN = $clog2(N)
|
||||
parameter LOGN = $clog2(N)
|
||||
) (
|
||||
input wire [N-1:0][DATAW-1:0] data_i,
|
||||
input wire [N-1:0] valid_i,
|
||||
|
|
|
@ -55,10 +55,10 @@ module VX_index_buffer #(
|
|||
full_r <= 1'b0;
|
||||
end else begin
|
||||
if (release_slot) begin
|
||||
assert(0 == free_slots[release_addr]) else $error("%t: releasing invalid slot at port %d", $time, release_addr);
|
||||
`ASSERT(0 == free_slots[release_addr], ("%t: releasing invalid slot at port %d", $time, release_addr));
|
||||
end
|
||||
if (acquire_slot) begin
|
||||
assert(1 == free_slots[write_addr]) else $error("%t: acquiring used slot at port %d", $time, write_addr);
|
||||
`ASSERT(1 == free_slots[write_addr], ("%t: acquiring used slot at port %d", $time, write_addr));
|
||||
end
|
||||
write_addr_r <= free_index;
|
||||
free_slots <= free_slots_n;
|
||||
|
|
|
@ -32,10 +32,8 @@ module VX_index_queue #(
|
|||
assign enqueue = push;
|
||||
assign dequeue = !empty && !valid[rd_a]; // auto-remove when head is invalid
|
||||
|
||||
always @(*) begin
|
||||
assert(!push || !full);
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT(!push || !full, ("invalid inputs"));
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rd_ptr <= 0;
|
||||
|
|
|
@ -2,9 +2,9 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_lzc #(
|
||||
parameter N = 2,
|
||||
parameter MODE = 0, // 0 -> trailing zero, 1 -> leading zero
|
||||
localparam LOGN = $clog2(N)
|
||||
parameter N = 2,
|
||||
parameter MODE = 0, // 0 -> trailing zero, 1 -> leading zero
|
||||
parameter LOGN = $clog2(N)
|
||||
) (
|
||||
input wire [N-1:0] in_i,
|
||||
output wire [LOGN-1:0] cnt_o,
|
||||
|
|
|
@ -25,7 +25,7 @@ module VX_pending_size #(
|
|||
empty_r <= 1;
|
||||
full_r <= 0;
|
||||
end else begin
|
||||
assert(!incr || !full);
|
||||
`ASSERT(!incr || !full, ("runtime error"));
|
||||
if (incr) begin
|
||||
if (!decr) begin
|
||||
empty_r <= 0;
|
||||
|
|
|
@ -30,9 +30,7 @@ module VX_skid_buffer #(
|
|||
|
||||
end else if (NOBACKPRESSURE) begin
|
||||
|
||||
always @(posedge clk) begin
|
||||
assert(ready_out) else $error("ready_out should always be asserted");
|
||||
end
|
||||
`RUNTIME_ASSERT(ready_out, ("ready_out should always be asserted"))
|
||||
|
||||
wire stall = valid_out && ~ready_out;
|
||||
|
||||
|
|
|
@ -27,7 +27,9 @@ module VX_sp_ram #(
|
|||
if (INIT_FILE != "") begin \
|
||||
initial $readmemh(INIT_FILE, ram); \
|
||||
end else begin \
|
||||
initial ram = '{default: INIT_VALUE}; \
|
||||
initial \
|
||||
for (integer i = 0; i < SIZE; ++i)\
|
||||
ram[i] = INIT_VALUE; \
|
||||
end \
|
||||
end
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ module VX_stream_demux #(
|
|||
parameter LANES = 1,
|
||||
parameter DATAW = 1,
|
||||
parameter BUFFERED = 0,
|
||||
localparam LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
|
|
@ -35,8 +35,8 @@ TOP = Vortex
|
|||
RTL_DIR=../rtl
|
||||
DPI_DIR=../dpi
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR)/ -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE)
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE)
|
||||
|
||||
SRCS = simulator.cpp main.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
|
|
|
@ -113,7 +113,8 @@ void Simulator::reset() {
|
|||
mem_rsp_vec_[b].clear();
|
||||
}
|
||||
last_mem_rsp_bank_ = 0;
|
||||
mem_rsp_active_ = false;
|
||||
mem_rd_rsp_active_ = false;
|
||||
mem_wr_rsp_active_ = false;
|
||||
|
||||
#ifdef AXI_BUS
|
||||
this->reset_axi_bus();
|
||||
|
@ -182,9 +183,11 @@ void Simulator::reset_axi_bus() {
|
|||
|
||||
void Simulator::eval_axi_bus(bool clk) {
|
||||
if (!clk) {
|
||||
mem_rsp_ready_ = vortex_->m_axi_rready;
|
||||
mem_rd_rsp_ready_ = vortex_->m_axi_rready;
|
||||
mem_wr_rsp_ready_ = vortex_->m_axi_bready;
|
||||
return;
|
||||
}
|
||||
|
||||
if (ram_ == nullptr) {
|
||||
vortex_->m_axi_wready = 0;
|
||||
vortex_->m_axi_awready = 0;
|
||||
|
@ -200,44 +203,71 @@ void Simulator::eval_axi_bus(bool clk) {
|
|||
}
|
||||
}
|
||||
|
||||
bool has_response = false;
|
||||
bool has_rd_response = false;
|
||||
bool has_wr_response = false;
|
||||
|
||||
// schedule memory responses that are ready
|
||||
for (int i = 0; i < MEMORY_BANKS; ++i) {
|
||||
uint32_t b = (i + last_mem_rsp_bank_ + 1) % MEMORY_BANKS;
|
||||
if (!mem_rsp_vec_[b].empty()
|
||||
&& (mem_rsp_vec_[b].begin()->cycles_left) <= 0) {
|
||||
has_response = true;
|
||||
last_mem_rsp_bank_ = b;
|
||||
break;
|
||||
if (!mem_rsp_vec_[b].empty()) {
|
||||
auto mem_rsp_it = mem_rsp_vec_[b].begin();
|
||||
if (mem_rsp_it->cycles_left <= 0) {
|
||||
has_rd_response = !mem_rsp_it->write;
|
||||
has_wr_response = mem_rsp_it->write;
|
||||
last_mem_rsp_bank_ = b;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// send memory response
|
||||
if (mem_rsp_active_
|
||||
&& vortex_->m_axi_rvalid && mem_rsp_ready_) {
|
||||
mem_rsp_active_ = false;
|
||||
// send memory read response
|
||||
if (mem_rd_rsp_active_
|
||||
&& vortex_->m_axi_rvalid && mem_rd_rsp_ready_) {
|
||||
mem_rd_rsp_active_ = false;
|
||||
}
|
||||
if (!mem_rsp_active_) {
|
||||
if (has_response) {
|
||||
vortex_->m_axi_rvalid = 1;
|
||||
std::list<mem_req_t>::iterator mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
|
||||
if (!mem_rd_rsp_active_) {
|
||||
if (has_rd_response) {
|
||||
auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr);
|
||||
printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", mem_rsp_it->block[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
*/
|
||||
vortex_->m_axi_rvalid = 1;
|
||||
vortex_->m_axi_rid = mem_rsp_it->tag;
|
||||
vortex_->m_axi_rresp = 0;
|
||||
vortex_->m_axi_rlast = 1;
|
||||
memcpy((uint8_t*)vortex_->m_axi_rdata, mem_rsp_it->block.data(), MEM_BLOCK_SIZE);
|
||||
vortex_->m_axi_rid = mem_rsp_it->tag;
|
||||
mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it);
|
||||
mem_rsp_active_ = true;
|
||||
mem_rd_rsp_active_ = true;
|
||||
} else {
|
||||
vortex_->m_axi_rvalid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// send memory write response
|
||||
if (mem_wr_rsp_active_
|
||||
&& vortex_->m_axi_bvalid && mem_wr_rsp_ready_) {
|
||||
mem_wr_rsp_active_ = false;
|
||||
}
|
||||
if (!mem_wr_rsp_active_) {
|
||||
if (has_wr_response) {
|
||||
auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr Rsp: bank=%d, addr=%0lx\n", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr);
|
||||
*/
|
||||
vortex_->m_axi_bvalid = 1;
|
||||
vortex_->m_axi_bid = mem_rsp_it->tag;
|
||||
vortex_->m_axi_bresp = 0;
|
||||
mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it);
|
||||
mem_wr_rsp_active_ = true;
|
||||
} else {
|
||||
vortex_->m_axi_bvalid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// select the memory bank
|
||||
uint32_t req_addr = vortex_->m_axi_wvalid ? vortex_->m_axi_awaddr : vortex_->m_axi_araddr;
|
||||
uint32_t req_bank = (MEMORY_BANKS >= 2) ? ((req_addr / MEM_BLOCK_SIZE) % MEMORY_BANKS) : 0;
|
||||
|
@ -260,6 +290,8 @@ void Simulator::eval_axi_bus(bool clk) {
|
|||
uint64_t byteen = vortex_->m_axi_wstrb;
|
||||
unsigned base_addr = vortex_->m_axi_awaddr;
|
||||
uint8_t* data = (uint8_t*)(vortex_->m_axi_wdata);
|
||||
|
||||
// detect stdout write
|
||||
if (base_addr >= IO_COUT_ADDR
|
||||
&& base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
|
@ -286,13 +318,20 @@ void Simulator::eval_axi_bus(bool clk) {
|
|||
(*ram_)[base_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
mem_req_t mem_req;
|
||||
mem_req.tag = vortex_->m_axi_arid;
|
||||
mem_req.addr = vortex_->m_axi_araddr;
|
||||
mem_req.cycles_left = 0;
|
||||
mem_req.write = 1;
|
||||
mem_rsp_vec_[req_bank].emplace_back(mem_req);
|
||||
}
|
||||
} else {
|
||||
mem_req_t mem_req;
|
||||
mem_req.tag = vortex_->m_axi_arid;
|
||||
mem_req.addr = vortex_->m_axi_araddr;
|
||||
ram_->read(vortex_->m_axi_araddr, MEM_BLOCK_SIZE, mem_req.block.data());
|
||||
mem_req.cycles_left = MEM_LATENCY;
|
||||
mem_req.write = 0;
|
||||
for (auto& rsp : mem_rsp_vec_[req_bank]) {
|
||||
if (mem_req.addr == rsp.addr) {
|
||||
// duplicate requests receive the same cycle delay
|
||||
|
@ -319,7 +358,7 @@ void Simulator::reset_mem_bus() {
|
|||
|
||||
void Simulator::eval_mem_bus(bool clk) {
|
||||
if (!clk) {
|
||||
mem_rsp_ready_ = vortex_->mem_rsp_ready;
|
||||
mem_rd_rsp_ready_ = vortex_->mem_rsp_ready;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -350,14 +389,14 @@ void Simulator::eval_mem_bus(bool clk) {
|
|||
}
|
||||
|
||||
// send memory response
|
||||
if (mem_rsp_active_
|
||||
&& vortex_->mem_rsp_valid && mem_rsp_ready_) {
|
||||
mem_rsp_active_ = false;
|
||||
if (mem_rd_rsp_active_
|
||||
&& vortex_->mem_rsp_valid && mem_rd_rsp_ready_) {
|
||||
mem_rd_rsp_active_ = false;
|
||||
}
|
||||
if (!mem_rsp_active_) {
|
||||
if (!mem_rd_rsp_active_) {
|
||||
if (has_response) {
|
||||
vortex_->mem_rsp_valid = 1;
|
||||
std::list<mem_req_t>::iterator mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
|
||||
auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
|
@ -368,7 +407,7 @@ void Simulator::eval_mem_bus(bool clk) {
|
|||
memcpy((uint8_t*)vortex_->mem_rsp_data, mem_rsp_it->block.data(), MEM_BLOCK_SIZE);
|
||||
vortex_->mem_rsp_tag = mem_rsp_it->tag;
|
||||
mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it);
|
||||
mem_rsp_active_ = true;
|
||||
mem_rd_rsp_active_ = true;
|
||||
} else {
|
||||
vortex_->mem_rsp_valid = 0;
|
||||
}
|
||||
|
|
|
@ -54,11 +54,12 @@ public:
|
|||
|
||||
private:
|
||||
|
||||
typedef struct {
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, MEM_BLOCK_SIZE> block;
|
||||
uint64_t addr;
|
||||
uint64_t tag;
|
||||
bool write;
|
||||
} mem_req_t;
|
||||
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
|
@ -80,9 +81,11 @@ private:
|
|||
std::list<mem_req_t> mem_rsp_vec_ [MEMORY_BANKS];
|
||||
uint32_t last_mem_rsp_bank_;
|
||||
|
||||
bool mem_rsp_active_;
|
||||
bool mem_rd_rsp_active_;
|
||||
bool mem_rd_rsp_ready_;
|
||||
|
||||
bool mem_rsp_ready_;
|
||||
bool mem_wr_rsp_active_;
|
||||
bool mem_wr_rsp_ready_;
|
||||
|
||||
RAM *ram_;
|
||||
|
||||
|
|
|
@ -26,11 +26,11 @@ DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
|||
|
||||
CONFIG1 := -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
|
||||
CONFIG2 := -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
|
||||
CONFIG4 := -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=0 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL2CACHE_SIZE=131072 $(CONFIGS)
|
||||
CONFIG8 := -DNUM_CLUSTERS=1 -DNUM_CORES=8 -DL2_ENABLE=1 -DL3_ENABLE=0 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL2CACHE_SIZE=131072 $(CONFIGS)
|
||||
CONFIG16 := -DNUM_CLUSTERS=4 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3CACHE_SIZE=262144 $(CONFIGS)
|
||||
CONFIG32 := -DNUM_CLUSTERS=4 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3CACHE_SIZE=262144 $(CONFIGS)
|
||||
CONFIG64 := -DNUM_CLUSTERS=8 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3CACHE_SIZE=524288 $(CONFIGS)
|
||||
CONFIG4 := -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=0 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL2_CACHE_SIZE=131072 $(CONFIGS)
|
||||
CONFIG8 := -DNUM_CLUSTERS=1 -DNUM_CORES=8 -DL2_ENABLE=1 -DL3_ENABLE=0 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL2_CACHE_SIZE=131072 $(CONFIGS)
|
||||
CONFIG16 := -DNUM_CLUSTERS=4 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3_CACHE_SIZE=262144 $(CONFIGS)
|
||||
CONFIG32 := -DNUM_CLUSTERS=4 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3_CACHE_SIZE=262144 $(CONFIGS)
|
||||
CONFIG64 := -DNUM_CLUSTERS=8 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3_CACHE_SIZE=524288 $(CONFIGS)
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/altera/$(DEVICE_FAMILY)
|
||||
RTL_INCLUDE = -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/afu
|
||||
|
|
|
@ -58,7 +58,7 @@ smart.log: $(PROJECT_FILES)
|
|||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=4" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3CACHE_SIZE=262144"
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=4" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3_CACHE_SIZE=262144"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
|
|
@ -58,7 +58,7 @@ smart.log: $(PROJECT_FILES)
|
|||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=4" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3CACHE_SIZE=262144"
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=4" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3_CACHE_SIZE=262144"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
|
|
@ -58,7 +58,7 @@ smart.log: $(PROJECT_FILES)
|
|||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=1" -set "L2_ENABLE=1" -set "L3_ENABLE=0" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L2CACHE_SIZE=65536"
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=1" -set "L2_ENABLE=1" -set "L3_ENABLE=0" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L2_CACHE_SIZE=65536"
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
|
|
|
@ -58,7 +58,7 @@ smart.log: $(PROJECT_FILES)
|
|||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=8" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3CACHE_SIZE=524288"
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=8" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3_CACHE_SIZE=524288"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
|
|
@ -58,7 +58,7 @@ smart.log: $(PROJECT_FILES)
|
|||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=1" -set "L2_ENABLE=1" -set "L3_ENABLE=0" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L2CACHE_SIZE=131072"
|
||||
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=1" -set "L2_ENABLE=1" -set "L3_ENABLE=0" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L2_CACHE_SIZE=131072"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
|
|
@ -3,15 +3,18 @@ TOP_LEVEL_ENTITY = Vortex
|
|||
SRC_FILE = Vortex.v
|
||||
RTL_DIR = ../../rtl
|
||||
|
||||
DEFINES = -DNDEBUG -DSYNTHESIS -DEXT_F_DISABLE -DNUM_CORES=1 -DNUM_THREADS=2 -DNUM_WARPS=2
|
||||
DEFINES = -DNDEBUG -DSYNTHESIS -DEXT_F_DISABLE -DNUM_CORES=1 -DNUM_THREADS=2 -DNUM_WARPS=2 -DMEM_BLOCK_SIZE=64
|
||||
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache
|
||||
|
||||
# Build targets
|
||||
all: build
|
||||
|
||||
build:
|
||||
./synth.sh -t$(TOP_LEVEL_ENTITY) -s$(SRC_FILE) $(DEFINES) $(RTL_INCLUDE)
|
||||
output.v:
|
||||
./sv2v.sh $(DEFINES) $(RTL_INCLUDE) -ooutput.v
|
||||
|
||||
build: output.v
|
||||
./synth.sh -t$(TOP_LEVEL_ENTITY) -soutput.v
|
||||
|
||||
clean:
|
||||
rm -rf sources.v *.ys *.log
|
||||
rm -rf output.v *.ys *.log
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
# load design
|
||||
read_verilog -sv -I../../rtl -I../../rtl/libs -I../../rtl/interfaces -I../../rtl/pipe_regs -I../../rtl/cache ../../rtl/Vortex.v
|
||||
|
||||
# dump diagram
|
||||
show
|
57
hw/syn/yosys/sv2v.sh
Executable file
57
hw/syn/yosys/sv2v.sh
Executable file
|
@ -0,0 +1,57 @@
|
|||
#!/bin/bash
|
||||
|
||||
# this script uses sv2v and yosys tools to run.
|
||||
# sv2v: https://github.com/zachjs/sv2v
|
||||
# yosys: http://www.clifford.at/yosys/
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
source=""
|
||||
includes=()
|
||||
macro_args=""
|
||||
output_file=out.v
|
||||
|
||||
usage() { echo "$0 usage:" && grep " .)\ #" $0; exit 0; }
|
||||
[ $# -eq 0 ] && usage
|
||||
while getopts "o:I:D:h" arg; do
|
||||
case $arg in
|
||||
s) # source
|
||||
source=${OPTARG}
|
||||
;;
|
||||
o) # output-file
|
||||
output_file=${OPTARG}
|
||||
;;
|
||||
I) # include directory
|
||||
includes+=(${OPTARG})
|
||||
;;
|
||||
D) # macro definition
|
||||
macro_args="$macro_args -D${OPTARG}"
|
||||
;;
|
||||
h | *)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# process include paths
|
||||
inc_args=""
|
||||
for dir in "${includes[@]}"
|
||||
do
|
||||
inc_args="$inc_args -I$dir"
|
||||
done
|
||||
|
||||
# process source files
|
||||
file_args=$source
|
||||
for dir in "${includes[@]}"
|
||||
do
|
||||
for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f)
|
||||
do
|
||||
echo "file: $file"
|
||||
file_args="$file_args $file"
|
||||
done
|
||||
done
|
||||
|
||||
# system-verilog to verilog conversion
|
||||
sv2v $macro_args $inc_args $file_args -v -w $output_file
|
|
@ -10,11 +10,12 @@ set -e
|
|||
source=""
|
||||
top_level=""
|
||||
dir_list=()
|
||||
defines=""
|
||||
inc_args=""
|
||||
macro_args=""
|
||||
|
||||
usage() { echo "$0 usage:" && grep " .)\ #" $0; exit 0; }
|
||||
[ $# -eq 0 ] && usage
|
||||
while getopts "hs:t:I:D:" arg; do
|
||||
while getopts "s:t:I:D:h" arg; do
|
||||
case $arg in
|
||||
s) # source
|
||||
source=${OPTARG}
|
||||
|
@ -24,9 +25,10 @@ while getopts "hs:t:I:D:" arg; do
|
|||
;;
|
||||
I) # include directory
|
||||
dir_list+=(${OPTARG})
|
||||
inc_args="$inc_args -I${OPTARG}"
|
||||
;;
|
||||
D) # macro definition
|
||||
defines="$defines -D${OPTARG}"
|
||||
macro_args="$macro_args -D${OPTARG}"
|
||||
;;
|
||||
h | *)
|
||||
usage
|
||||
|
@ -35,41 +37,29 @@ while getopts "hs:t:I:D:" arg; do
|
|||
esac
|
||||
done
|
||||
|
||||
echo "top_level=$top_level, source=$source, defines=$defines"
|
||||
|
||||
# process include paths
|
||||
inc_list=""
|
||||
for dir in "${dir_list[@]}"
|
||||
do
|
||||
echo "include: $dir" >> synth.log
|
||||
inc_list="$inc_list -I$dir"
|
||||
done
|
||||
|
||||
# process source files
|
||||
file_list=""
|
||||
for dir in "${dir_list[@]}"
|
||||
do
|
||||
for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f)
|
||||
{
|
||||
# read design sources
|
||||
for dir in "${dir_list[@]}"
|
||||
do
|
||||
echo "file: $file" >> synth.log
|
||||
file_list="$file_list $file"
|
||||
for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f)
|
||||
do
|
||||
echo "read_verilog $macro_args $inc_args -sv $file"
|
||||
done
|
||||
done
|
||||
done
|
||||
if [ -n "$source" ]; then
|
||||
echo "read_verilog $macro_args $inc_args -sv $source"
|
||||
fi
|
||||
|
||||
# system-verilog to verilog conversion
|
||||
sv2v $defines -w output.v $inc_list $file_list
|
||||
# generic synthesis
|
||||
echo "synth -top $top_level"
|
||||
|
||||
{
|
||||
echo "read_verilog -sv output.v"
|
||||
echo "hierarchy -check -top $top_level"
|
||||
# mapping to mycells.lib
|
||||
echo "dfflibmap -liberty mycells.lib"
|
||||
echo "abc -liberty mycells.lib"
|
||||
echo "clean"
|
||||
|
||||
# insertation of global reset
|
||||
echo "add -global_input reset 1"
|
||||
echo "proc -global_arst reset"
|
||||
|
||||
echo "synth -run coarse; opt -fine"
|
||||
echo "tee -o brams.log memory_bram -rules scripts/brams.txt;;"
|
||||
echo "write_verilog -noexpr -noattr synth.v"
|
||||
# write synthesized design
|
||||
echo "write_verilog synth.v"
|
||||
} > synth.ys
|
||||
|
||||
yosys -l yosys.log synth.ys
|
2
hw/unit_tests/cache/Makefile
vendored
2
hw/unit_tests/cache/Makefile
vendored
|
@ -1,4 +1,4 @@
|
|||
PARAM += -DCACHE_SIZE=4096 -DWORD_SIZE=4 -DCACHE_LINE_SIZE=16 -DNUM_BANKS=4 -DCREQ_SIZE=4 -DMRVQ_SIZE=16 -DDFPQ_SIZE=16 -DSNRQ_SIZE=16 -DCWBQ_SIZE=4 -DDWBQ_SIZE=4 -DFQQ_SIZE=4
|
||||
PARAM += -DCACHE_SIZE=4096 -DCACHE_WORD_SIZE=4 -DCACHE_LINE_SIZE=16 -DCACHE_NUM_BANKS=4 -DCACHE_CREQ_SIZE=4 -DMRVQ_SIZE=16 -DDFPQ_SIZE=16 -DSNRQ_SIZE=16 -DCWBQ_SIZE=4 -DDWBQ_SIZE=4 -DFQQ_SIZE=4
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue