enabling 128-bit dram bus

This commit is contained in:
Blaise Tine 2021-04-24 00:31:27 -04:00
parent 2f5ccdcf45
commit 4cb98a25a7
19 changed files with 344 additions and 198 deletions

View file

@ -18,8 +18,7 @@ install:
- export RISCV_TOOLCHAIN_PATH=/opt/riscv-gnu-toolchain
- export VERILATOR_ROOT=/opt/verilator
- export PATH=$VERILATOR_ROOT/bin:$PATH
- make -s
script:
- ./ci/regression.sh

View file

@ -135,7 +135,7 @@ case $APP in
;;
esac
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2 -DL3_ENABLE=$L3 $PERF_FLAG"
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2 -DL3_ENABLE=$L3 $PERF_FLAG $CONFIGS"
echo "CONFIGS=$CONFIGS"

View file

@ -3,6 +3,8 @@
# exit when any command fails
set -e
make -s
# Dogfood tests
./ci/test_runtime.sh
./ci/test_riscv_isa.sh
@ -11,6 +13,14 @@ set -e
./ci/test_simx.sh
./ci/test_compiler.sh
# Blackbox tests
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=demo --args="-n1"
# Build tests disabling extensions
CONFIGS=-DEXT_M_DISABLE make -C hw/simulate
CONFIGS=-DEXT_F_DISABLE make -C hw/simulate
@ -18,10 +28,8 @@ CONFIGS=-DEXT_F_DISABLE make -C hw/simulate
# disable shared memory
CONFIGS=-DSM_ENABLE=0 make -C hw/simulate
# Blackbox tests
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=demo --args="-n1"
# test 128-bit DRAM bus
CONFIGS=-DPLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE_BITS=4 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
# test 256-bit DRAM bus
CONFIGS=-DPLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE_BITS=4 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo

View file

@ -21,10 +21,10 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
#CONFIGS ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
#CONFIGS := -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 $(CONFIGS)
#CONFIGS := -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 $(CONFIGS)
#CONFIGS := -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 $(CONFIGS)
CONFIGS := -DNUM_CLUSTERS=1 -DNUM_CORES=1 $(CONFIGS)
CFLAGS += -fPIC
@ -47,10 +47,11 @@ FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
RTL_INCLUDE += -I$(RTL_DIR)/afu -I$(RTL_DIR)/afu/ccip
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS)
VL_FLAGS += -Wno-DECLFILENAME
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += verilator.vlt
VL_FLAGS += $(CONFIGS)
# Enable Verilator multithreaded simulation
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
@ -87,10 +88,13 @@ VL_FLAGS += -DFPU_DPI
PROJECT = libopae-c-vlsim.so
all: $(PROJECT)
vortex_afu.h : $(RTL_DIR)/afu/vortex_afu.vh
../../../hw/scripts/gen_config.py -i $(RTL_DIR)/afu/vortex_afu.vh -o vortex_afu.h
$(PROJECT): $(SRCS)
$(PROJECT): $(SRCS) vortex_afu.h
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
make -j -C obj_dir -f V$(TOP).mk
clean:
rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh
rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh vortex_afu.h

View file

@ -285,15 +285,15 @@ void opae_sim::avs_bus() {
vortex_afu_->avs_readdatavalid = 0;
if (dram_rd_it != dram_reads_.end()) {
vortex_afu_->avs_readdatavalid = 1;
memcpy(vortex_afu_->avs_readdata, dram_rd_it->data.data(), CACHE_BLOCK_SIZE);
memcpy(vortex_afu_->avs_readdata, dram_rd_it->data.data(), DRAM_BLOCK_SIZE);
uint32_t addr = dram_rd_it->addr;
dram_reads_.erase(dram_rd_it);
/*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, addr * CACHE_BLOCK_SIZE);
/*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, addr * DRAM_BLOCK_SIZE);
for (auto& req : dram_reads_) {
if (req.cycles_left != 0)
printf(" !%0x", req.addr * CACHE_BLOCK_SIZE);
printf(" !%0x", req.addr * DRAM_BLOCK_SIZE);
else
printf(" %0x", req.addr * CACHE_BLOCK_SIZE);
printf(" %0x", req.addr * DRAM_BLOCK_SIZE);
}
printf("}\n");*/
}
@ -315,19 +315,24 @@ void opae_sim::avs_bus() {
if (vortex_afu_->avs_write) {
assert(0 == vortex_afu_->mem_bank_select);
uint64_t byteen = vortex_afu_->avs_byteenable;
unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE);
unsigned base_addr = vortex_afu_->avs_address * DRAM_BLOCK_SIZE;
uint8_t* data = (uint8_t*)(vortex_afu_->avs_writedata);
for (int i = 0; i < CACHE_BLOCK_SIZE; i++) {
for (int i = 0; i < DRAM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
ram_[base_addr + i] = data[i];
}
}
/*printf("%0ld: [sim] DRAM Wr Req: addr=%x, data=", timestamp, base_addr);
for (int i = 0; i < DRAM_BLOCK_SIZE; i++) {
printf("%0x", data[(DRAM_BLOCK_SIZE-1)-i]);
}
printf("\n");*/
}
if (vortex_afu_->avs_read) {
assert(0 == vortex_afu_->mem_bank_select);
dram_rd_req_t dram_req;
dram_req.addr = vortex_afu_->avs_address;
ram_.read(vortex_afu_->avs_address * CACHE_BLOCK_SIZE, CACHE_BLOCK_SIZE, dram_req.data.data());
ram_.read(vortex_afu_->avs_address * DRAM_BLOCK_SIZE, DRAM_BLOCK_SIZE, dram_req.data.data());
dram_req.cycles_left = DRAM_LATENCY;
for (auto& rsp : dram_reads_) {
if (dram_req.addr == rsp.addr) {
@ -336,15 +341,15 @@ void opae_sim::avs_bus() {
}
}
dram_reads_.emplace_back(dram_req);
/*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, dram_req.addr * CACHE_BLOCK_SIZE);
/*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, dram_req.addr * DRAM_BLOCK_SIZE);
for (auto& req : dram_reads_) {
if (req.cycles_left != 0)
printf(" !%0x", req.addr * CACHE_BLOCK_SIZE);
printf(" !%0x", req.addr * DRAM_BLOCK_SIZE);
else
printf(" %0x", req.addr * CACHE_BLOCK_SIZE);
printf(" %0x", req.addr * DRAM_BLOCK_SIZE);
}
printf("}\n");*/
}
}
}
vortex_afu_->avs_waitrequest = dram_stalled;

View file

@ -9,6 +9,7 @@
#endif
#include <VX_config.h>
#include "vortex_afu.h"
#include "ram.h"
#include <ostream>
@ -16,6 +17,8 @@
#include <list>
#include <unordered_map>
#define DRAM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8)
#define CACHE_BLOCK_SIZE 64
class opae_sim {
@ -40,7 +43,7 @@ private:
typedef struct {
int cycles_left;
std::array<uint8_t, CACHE_BLOCK_SIZE> data;
std::array<uint8_t, DRAM_BLOCK_SIZE> data;
uint32_t addr;
} dram_rd_req_t;

View file

@ -1,5 +1,9 @@
`include "VX_define.vh"
`include "VX_platform.vh"
`IGNORE_WARNINGS_BEGIN
`include "vortex_afu.vh"
`IGNORE_WARNINGS_END
`include "VX_define.vh"
/* verilator lint_off IMPORTSTAR */
import ccip_if_pkg::*;
import local_mem_cfg_pkg::*;

View file

@ -20,10 +20,10 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
#CONFIGS ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
#CONFIGS := -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 $(CONFIGS)
#CONFIGS := -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 $(CONFIGS)
#CONFIGS := -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 $(CONFIGS)
CONFIGS := -DNUM_CLUSTERS=1 -DNUM_CORES=1 $(CONFIGS)
CFLAGS += $(CONFIGS)
@ -44,7 +44,7 @@ FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS)
VL_FLAGS += -Wno-DECLFILENAME
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += verilator.vlt

View file

@ -10,10 +10,10 @@ CXXFLAGS += -fPIC -Wno-aligned-new -Wno-maybe-uninitialized
CXXFLAGS += -I../include -I../../hw -I$(SIMX_DIR)
CXXFLAGS += -DDUMP_PERF_STATS
#CONFIGS ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
#CONFIGS := -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 $(CONFIGS)
#CONFIGS := -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 $(CONFIGS)
#CONFIGS := -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 $(CONFIGS)
CONFIGS := -DNUM_CLUSTERS=1 -DNUM_CORES=1 $(CONFIGS)
CXXFLAGS += $(CONFIGS)

View file

@ -1,9 +1,9 @@
.PHONY: build_config
build_config:
./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./VX_config.h
build_config: ./rtl/VX_config.vh
./scripts/gen_config.py -i ./rtl/VX_config.vh -o ./VX_config.h
$(MAKE) -C simulate
clean:
rm -f ./rtl/VX_user_config.vh ./VX_config.h
rm -f ./VX_config.h
$(MAKE) -C simulate clean

View file

@ -36,7 +36,11 @@
`endif
`ifndef GLOBAL_BLOCK_SIZE
`define GLOBAL_BLOCK_SIZE 64
`ifdef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH
`define GLOBAL_BLOCK_SIZE (`PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8)
`else
`define GLOBAL_BLOCK_SIZE 64
`endif
`endif
`ifndef L1_BLOCK_SIZE

View file

@ -15,18 +15,6 @@ module VX_avs_wrapper #(
input wire clk,
input wire reset,
// AVS bus
output wire [AVS_DATAW-1:0] avs_writedata,
input wire [AVS_DATAW-1:0] avs_readdata,
output wire [AVS_ADDRW-1:0] avs_address,
input wire avs_waitrequest,
output wire avs_write,
output wire avs_read,
output wire [AVS_BYTEENW-1:0] avs_byteenable,
output wire [AVS_BURSTW-1:0] avs_burstcount,
input avs_readdatavalid,
output wire [AVS_BANKS_BITS-1:0] avs_bankselect,
// DRAM request
input wire dram_req_valid,
input wire dram_req_rw,
@ -40,7 +28,19 @@ module VX_avs_wrapper #(
output wire dram_rsp_valid,
output wire [AVS_DATAW-1:0] dram_rsp_data,
output wire [REQ_TAGW-1:0] dram_rsp_tag,
input wire dram_rsp_ready
input wire dram_rsp_ready,
// AVS bus
output wire [AVS_DATAW-1:0] avs_writedata,
input wire [AVS_DATAW-1:0] avs_readdata,
output wire [AVS_ADDRW-1:0] avs_address,
input wire avs_waitrequest,
output wire avs_write,
output wire avs_read,
output wire [AVS_BYTEENW-1:0] avs_byteenable,
output wire [AVS_BURSTW-1:0] avs_burstcount,
input avs_readdatavalid,
output wire [AVS_BANKS_BITS-1:0] avs_bankselect
);
reg [AVS_BANKS_BITS-1:0] avs_bankselect_r;
reg [AVS_BURSTW-1:0] avs_burstcount_r;

107
hw/rtl/afu/VX_cci_to_mem.v Normal file
View file

@ -0,0 +1,107 @@
`include "VX_define.vh"
module VX_cci_to_mem #(
parameter CCI_DATAW = 1,
parameter CCI_ADDRW = 1,
parameter AVS_DATAW = 1,
parameter AVS_ADDRW = 1,
parameter AVS_BYTEENW = (AVS_DATAW / 8),
parameter TAG_WIDTH = 1
) (
input wire clk,
input wire reset,
input wire dram_req_valid_in,
input wire [CCI_ADDRW-1:0] dram_req_addr_in,
input wire dram_req_rw_in,
input wire [CCI_DATAW-1:0] dram_req_data_in,
input wire [TAG_WIDTH-1:0] dram_req_tag_in,
output wire dram_req_ready_in,
output wire dram_req_valid_out,
output wire [AVS_ADDRW-1:0] dram_req_addr_out,
output wire dram_req_rw_out,
output wire [AVS_BYTEENW-1:0] dram_req_byteen_out,
output wire [AVS_DATAW-1:0] dram_req_data_out,
output wire [TAG_WIDTH-1:0] dram_req_tag_out,
input wire dram_req_ready_out,
input wire dram_rsp_valid_in,
input wire [AVS_DATAW-1:0] dram_rsp_data_in,
input wire [TAG_WIDTH-1:0] dram_rsp_tag_in,
output wire dram_rsp_ready_in,
output wire dram_rsp_valid_out,
output wire [CCI_DATAW-1:0] dram_rsp_data_out,
output wire [TAG_WIDTH-1:0] dram_rsp_tag_out,
input wire dram_rsp_ready_out
);
localparam N = AVS_ADDRW - CCI_ADDRW;
`STATIC_ASSERT(N >= 0, ("oops!"))
if (N == 0) begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign dram_req_valid_out = dram_req_valid_in;
assign dram_req_addr_out = dram_req_addr_in;
assign dram_req_rw_out = dram_req_rw_in;
assign dram_req_byteen_out = {AVS_BYTEENW{1'b1}};
assign dram_req_data_out = dram_req_data_in;
assign dram_req_tag_out = dram_req_tag_in;
assign dram_req_ready_in = dram_req_ready_out;
assign dram_rsp_valid_out = dram_rsp_valid_in;
assign dram_rsp_data_out = dram_rsp_data_in;
assign dram_rsp_tag_out = dram_rsp_tag_in;
assign dram_rsp_ready_in = dram_rsp_ready_out;
end else begin
reg [N-1:0] req_ctr, rsp_ctr;
wire [(2**N)-1:0][AVS_DATAW-1:0] dram_req_data_w_in;
reg [(2**N)-1:0][AVS_DATAW-1:0] dram_rsp_data_r_out, dram_rsp_data_n_out;
wire dram_req_fire_out = dram_req_valid_out && dram_req_ready_out;
wire dram_rsp_fire_in = dram_rsp_valid_in && dram_rsp_ready_in;
assign dram_req_data_w_in = dram_req_data_in;
always @(*) begin
dram_rsp_data_n_out = dram_rsp_data_r_out;
dram_rsp_data_n_out[rsp_ctr] = dram_rsp_data_in;
end
always @(posedge clk) begin
if (reset) begin
req_ctr <= 0;
rsp_ctr <= 0;
end else begin
if (dram_req_fire_out) begin
req_ctr <= req_ctr + 1;
end
if (dram_rsp_fire_in) begin
rsp_ctr <= rsp_ctr + 1;
dram_rsp_data_r_out <= dram_rsp_data_n_out;
end
end
end
assign dram_req_valid_out = dram_req_valid_in;
assign dram_req_addr_out = {dram_req_addr_in, req_ctr};
assign dram_req_rw_out = dram_req_rw_in;
assign dram_req_byteen_out = {AVS_BYTEENW{1'b1}};
assign dram_req_data_out = dram_req_data_w_in[req_ctr];
assign dram_req_tag_out = dram_req_tag_in;
assign dram_req_ready_in = dram_req_ready_out && (req_ctr == (2**N-1));
assign dram_rsp_valid_out = dram_rsp_valid_in && (rsp_ctr == (2**N-1));
assign dram_rsp_data_out = dram_rsp_data_n_out;
assign dram_rsp_tag_out = dram_rsp_tag_in;
assign dram_rsp_ready_in = dram_rsp_ready_out;
end
endmodule

View file

@ -1,9 +1,13 @@
`include "VX_define.vh"
`ifndef NOPAE
`include "afu_json_info.vh"
`else
`include "VX_platform.vh"
`ifdef NOPAE
`IGNORE_WARNINGS_BEGIN
`include "vortex_afu.vh"
`IGNORE_WARNINGS_END
`else
`include "afu_json_info.vh"
`endif
`include "VX_define.vh"
/* verilator lint_off IMPORTSTAR */
import ccip_if_pkg::*;
import local_mem_cfg_pkg::*;
@ -36,10 +40,14 @@ module vortex_afu #(
localparam RESET_DELAY = 3;
localparam DRAM_ADDR_WIDTH = $bits(t_local_mem_addr);
localparam DRAM_LINE_WIDTH = $bits(t_local_mem_data);
localparam DRAM_ADDR_WIDTH = $bits(t_local_mem_addr);
localparam DRAM_BURST_CTRW = $bits(t_local_mem_burst_cnt);
localparam DRAM_LINE_LW = $clog2(DRAM_LINE_WIDTH);
localparam CCI_LINE_WIDTH = $bits(t_ccip_clData);
localparam CCI_ADDR_WIDTH = 32 - $clog2(CCI_LINE_WIDTH / 8);
localparam VX_DRAM_LINE_LW = $clog2(`VX_DRAM_LINE_WIDTH);
localparam VX_DRAM_LINE_IDX = (DRAM_LINE_LW - VX_DRAM_LINE_LW);
@ -74,7 +82,7 @@ localparam MMIO_CSR_DATA = `AFU_IMAGE_MMIO_CSR_DATA;
localparam MMIO_CSR_READ = `AFU_IMAGE_MMIO_CSR_READ;
localparam CCI_RD_RQ_TAGW = $clog2(CCI_RD_WINDOW_SIZE);
localparam CCI_RD_RQ_DATAW = $bits(t_ccip_clData) + CCI_RD_RQ_TAGW;
localparam CCI_RD_RQ_DATAW = CCI_LINE_WIDTH + CCI_RD_RQ_TAGW;
localparam STATE_IDLE = 0;
localparam STATE_READ = 1;
@ -128,8 +136,8 @@ reg vx_dram_en;
// CMD variables //////////////////////////////////////////////////////////////
t_ccip_clAddr cmd_io_addr;
reg [DRAM_ADDR_WIDTH-1:0] cmd_mem_addr;
reg [DRAM_ADDR_WIDTH-1:0] cmd_data_size;
reg [CCI_ADDR_WIDTH-1:0] cmd_mem_addr;
reg [CCI_ADDR_WIDTH-1:0] cmd_data_size;
`ifdef SCOPE
wire [63:0] cmd_scope_rdata;
@ -216,9 +224,9 @@ always @(posedge clk) begin
`endif
end
MMIO_MEM_ADDR: begin
cmd_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data);
cmd_mem_addr <= $bits(cmd_mem_addr)'(cp2af_sRxPort.c0.data);
`ifdef DBG_PRINT_OPAE
$display("%t: MMIO_MEM_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, t_local_mem_addr'(cp2af_sRxPort.c0.data));
$display("%t: MMIO_MEM_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, $bits(cmd_mem_addr)'(cp2af_sRxPort.c0.data));
`endif
end
MMIO_DATA_SIZE: begin
@ -455,18 +463,18 @@ t_local_mem_data dram_rsp_data;
wire [AVS_REQ_TAGW:0] dram_rsp_tag;
wire dram_rsp_ready;
wire cci_dram_req_valid;
wire cci_dram_req_rw;
t_local_mem_byte_mask cci_dram_req_byteen;
t_local_mem_addr cci_dram_req_addr;
t_local_mem_data cci_dram_req_data;
wire [AVS_REQ_TAGW-1:0] cci_dram_req_tag;
wire cci_dram_req_ready;
wire cci_dram_req_tmp_valid;
wire cci_dram_req_tmp_rw;
t_local_mem_byte_mask cci_dram_req_tmp_byteen;
t_local_mem_addr cci_dram_req_tmp_addr;
t_local_mem_data cci_dram_req_tmp_data;
wire [AVS_REQ_TAGW-1:0] cci_dram_req_tmp_tag;
wire cci_dram_req_tmp_ready;
wire cci_dram_rsp_valid;
t_local_mem_data cci_dram_rsp_data;
wire [AVS_REQ_TAGW-1:0] cci_dram_rsp_tag;
wire cci_dram_rsp_ready;
wire cci_dram_rsp_tmp_valid;
t_local_mem_data cci_dram_rsp_tmp_data;
wire [AVS_REQ_TAGW-1:0] cci_dram_rsp_tmp_tag;
wire cci_dram_rsp_tmp_ready;
wire vx_dram_req_valid_qual;
t_local_mem_addr vx_dram_req_addr_qual;
@ -477,18 +485,55 @@ wire [AVS_REQ_TAGW-1:0] vx_dram_req_tag_qual;
wire [(1 << VX_DRAM_LINE_IDX)-1:0][`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data_unqual;
wire [AVS_REQ_TAGW-1:0] vx_dram_rsp_tag_unqual;
wire cci_dram_rd_req_valid, cci_dram_wr_req_valid;
wire [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr;
wire cci_dram_rd_req_valid;
wire cci_dram_wr_req_valid;
wire [CCI_ADDR_WIDTH-1:0] cci_dram_rd_req_addr;
wire [CCI_ADDR_WIDTH-1:0] cci_dram_wr_req_addr;
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout;
wire cci_dram_req_ready;
wire cci_dram_rsp_valid;
wire [CCI_LINE_WIDTH-1:0] cci_dram_rsp_data;
wire [AVS_REQ_TAGW-1:0] cci_dram_rsp_tag;
wire cci_dram_rsp_ready;
//--
assign cci_dram_req_valid = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_valid : cci_dram_rd_req_valid;
assign cci_dram_req_addr = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_addr : cci_dram_rd_req_addr;
assign cci_dram_req_rw = (CMD_MEM_WRITE == state);
assign cci_dram_req_byteen = {64{1'b1}};
assign cci_dram_req_data = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW];
assign cci_dram_req_tag = AVS_REQ_TAGW'(0);
VX_cci_to_mem #(
.CCI_DATAW (CCI_LINE_WIDTH),
.CCI_ADDRW (CCI_ADDR_WIDTH),
.AVS_DATAW (DRAM_LINE_WIDTH),
.AVS_ADDRW (DRAM_ADDR_WIDTH),
.TAG_WIDTH (AVS_REQ_TAGW)
) cci_to_mem(
.clk (clk),
.reset (reset),
.dram_req_valid_in ((CMD_MEM_WRITE == state) ? cci_dram_wr_req_valid : cci_dram_rd_req_valid),
.dram_req_addr_in ((CMD_MEM_WRITE == state) ? cci_dram_wr_req_addr : cci_dram_rd_req_addr),
.dram_req_rw_in ((CMD_MEM_WRITE == state)),
.dram_req_data_in (cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW]),
.dram_req_tag_in (AVS_REQ_TAGW'(0)),
.dram_req_ready_in (cci_dram_req_ready),
.dram_req_valid_out (cci_dram_req_tmp_valid),
.dram_req_addr_out (cci_dram_req_tmp_addr),
.dram_req_rw_out (cci_dram_req_tmp_rw),
.dram_req_byteen_out(cci_dram_req_tmp_byteen),
.dram_req_data_out (cci_dram_req_tmp_data),
.dram_req_tag_out (cci_dram_req_tmp_tag),
.dram_req_ready_out (cci_dram_req_tmp_ready),
.dram_rsp_valid_in (cci_dram_rsp_tmp_valid),
.dram_rsp_data_in (cci_dram_rsp_tmp_data),
.dram_rsp_tag_in (cci_dram_rsp_tmp_tag),
.dram_rsp_ready_in (cci_dram_rsp_tmp_ready),
.dram_rsp_valid_out (cci_dram_rsp_valid),
.dram_rsp_data_out (cci_dram_rsp_data),
.dram_rsp_tag_out (cci_dram_rsp_tag),
.dram_rsp_ready_out (cci_dram_rsp_ready)
);
`UNUSED_VAR (cci_dram_rsp_tag)
@ -518,8 +563,8 @@ assign vx_dram_rsp_tag = vx_dram_rsp_tag_unqual[`VX_DRAM_TAG_WIDTH+VX_DRAM_LINE_
VX_mem_arb #(
.NUM_REQS (2),
.DATA_WIDTH ($bits(t_local_mem_data)),
.ADDR_WIDTH ($bits(t_local_mem_addr)),
.DATA_WIDTH (DRAM_LINE_WIDTH),
.ADDR_WIDTH (DRAM_ADDR_WIDTH),
.TAG_IN_WIDTH (AVS_REQ_TAGW),
.TAG_OUT_WIDTH (AVS_REQ_TAGW+1)
) dram_arb (
@ -527,13 +572,13 @@ VX_mem_arb #(
.reset (reset),
// Source request
.req_valid_in ({cci_dram_req_valid, vx_dram_req_valid_qual}),
.req_rw_in ({cci_dram_req_rw, vx_dram_req_rw}),
.req_byteen_in ({cci_dram_req_byteen, vx_dram_req_byteen_qual}),
.req_addr_in ({cci_dram_req_addr, vx_dram_req_addr_qual}),
.req_data_in ({cci_dram_req_data, vx_dram_req_data_qual}),
.req_tag_in ({cci_dram_req_tag, vx_dram_req_tag_qual}),
.req_ready_in ({cci_dram_req_ready, vx_dram_req_ready}),
.req_valid_in ({cci_dram_req_tmp_valid, vx_dram_req_valid_qual}),
.req_rw_in ({cci_dram_req_tmp_rw, vx_dram_req_rw}),
.req_byteen_in ({cci_dram_req_tmp_byteen, vx_dram_req_byteen_qual}),
.req_addr_in ({cci_dram_req_tmp_addr, vx_dram_req_addr_qual}),
.req_data_in ({cci_dram_req_tmp_data, vx_dram_req_data_qual}),
.req_tag_in ({cci_dram_req_tmp_tag, vx_dram_req_tag_qual}),
.req_ready_in ({cci_dram_req_tmp_ready, vx_dram_req_ready}),
// DRAM request
.req_valid_out (dram_req_valid),
@ -545,10 +590,10 @@ VX_mem_arb #(
.req_ready_out (dram_req_ready),
// Source response
.rsp_valid_out ({cci_dram_rsp_valid, vx_dram_rsp_valid}),
.rsp_data_out ({cci_dram_rsp_data, vx_dram_rsp_data_unqual}),
.rsp_tag_out ({cci_dram_rsp_tag, vx_dram_rsp_tag_unqual}),
.rsp_ready_out ({cci_dram_rsp_ready, vx_dram_rsp_ready}),
.rsp_valid_out ({cci_dram_rsp_tmp_valid, vx_dram_rsp_valid}),
.rsp_data_out ({cci_dram_rsp_tmp_data, vx_dram_rsp_data_unqual}),
.rsp_tag_out ({cci_dram_rsp_tmp_tag, vx_dram_rsp_tag_unqual}),
.rsp_ready_out ({cci_dram_rsp_tmp_ready, vx_dram_rsp_ready}),
// DRAM response
.rsp_valid_in (dram_rsp_valid),
@ -560,9 +605,9 @@ VX_mem_arb #(
//--
VX_avs_wrapper #(
.AVS_DATAW ($bits(t_local_mem_data)),
.AVS_ADDRW ($bits(t_local_mem_addr)),
.AVS_BURSTW ($bits(t_local_mem_burst_cnt)),
.AVS_DATAW (DRAM_LINE_WIDTH),
.AVS_ADDRW (DRAM_ADDR_WIDTH),
.AVS_BURSTW (DRAM_BURST_CTRW),
.AVS_BANKS (NUM_LOCAL_MEM_BANKS),
.REQ_TAGW (AVS_REQ_TAGW+1),
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE)
@ -570,18 +615,6 @@ VX_avs_wrapper #(
.clk (clk),
.reset (reset),
// AVS bus
.avs_writedata (avs_writedata),
.avs_readdata (avs_readdata),
.avs_address (avs_address),
.avs_waitrequest (avs_waitrequest),
.avs_write (avs_write),
.avs_read (avs_read),
.avs_byteenable (avs_byteenable),
.avs_burstcount (avs_burstcount),
.avs_readdatavalid (avs_readdatavalid),
.avs_bankselect (mem_bank_select),
// DRAM request
.dram_req_valid (dram_req_valid),
.dram_req_rw (dram_req_rw),
@ -595,15 +628,27 @@ VX_avs_wrapper #(
.dram_rsp_valid (dram_rsp_valid),
.dram_rsp_data (dram_rsp_data),
.dram_rsp_tag (dram_rsp_tag),
.dram_rsp_ready (dram_rsp_ready)
.dram_rsp_ready (dram_rsp_ready),
// AVS bus
.avs_writedata (avs_writedata),
.avs_readdata (avs_readdata),
.avs_address (avs_address),
.avs_waitrequest (avs_waitrequest),
.avs_write (avs_write),
.avs_read (avs_read),
.avs_byteenable (avs_byteenable),
.avs_burstcount (avs_burstcount),
.avs_readdatavalid (avs_readdatavalid),
.avs_bankselect (mem_bank_select)
);
// CCI-P Read Request ///////////////////////////////////////////////////////////
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_wr_req_ctr;
reg [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr;
wire [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr_next;
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_wr_req_addr_unqual;
reg [CCI_ADDR_WIDTH-1:0] cci_dram_wr_req_ctr;
reg [CCI_ADDR_WIDTH-1:0] cci_rd_req_ctr;
wire [CCI_ADDR_WIDTH-1:0] cci_rd_req_ctr_next;
reg [CCI_ADDR_WIDTH-1:0] cci_dram_wr_req_addr_unqual;
wire [CCI_RD_RQ_TAGW-1:0] cci_rd_req_tag, cci_rd_rsp_tag;
reg [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_ctr;
t_ccip_clAddr cci_rd_req_addr;
@ -631,7 +676,7 @@ wire cci_rd_rsp_fire = (STATE_WRITE == state)
assign cci_rd_req_tag = CCI_RD_RQ_TAGW'(cci_rd_req_ctr);
assign cci_rd_rsp_tag = CCI_RD_RQ_TAGW'(cp2af_sRxPort.c0.hdr.mdata);
assign cci_rd_req_ctr_next = cci_rd_req_ctr + DRAM_ADDR_WIDTH'(cci_rd_req_fire ? 1 : 0);
assign cci_rd_req_ctr_next = cci_rd_req_ctr + CCI_ADDR_WIDTH'(cci_rd_req_fire ? 1 : 0);
assign cci_rdq_pop = cci_dram_wr_req_fire;
assign cci_rdq_push = cci_rd_rsp_fire;
@ -654,9 +699,9 @@ VX_pending_size #(
assign cci_dram_wr_req_valid = !cci_rdq_empty;
assign cci_dram_wr_req_addr = cci_dram_wr_req_addr_unqual + (DRAM_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout)));
assign cci_dram_wr_req_addr = cci_dram_wr_req_addr_unqual + (CCI_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout)));
assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait;
assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !(cci_rd_req_wait || cci_pending_reads_full);
assign cmd_write_done = (cci_dram_wr_req_ctr == cmd_data_size);
@ -685,7 +730,6 @@ always @(posedge clk) begin
cci_rd_req_enable <= (STATE_WRITE == state)
&& (cci_rd_req_ctr_next != cmd_data_size)
&& !cci_pending_reads_full
&& !cp2af_sRxPort.c0TxAlmFull;
if (cci_rd_req_fire) begin
@ -716,8 +760,8 @@ always @(posedge clk) begin
end*/
if (cci_dram_wr_req_fire) begin
cci_dram_wr_req_addr_unqual <= cci_dram_wr_req_addr_unqual + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0));
cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1);
cci_dram_wr_req_addr_unqual <= cci_dram_wr_req_addr_unqual + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : CCI_ADDR_WIDTH'(0));
cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + CCI_ADDR_WIDTH'(1);
end
end
end
@ -761,9 +805,9 @@ VX_fifo_queue #(
// CCI-P Write Request //////////////////////////////////////////////////////////
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr;
reg [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr;
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr_r;
reg [CCI_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr;
reg [CCI_ADDR_WIDTH-1:0] cci_wr_req_ctr;
reg [CCI_ADDR_WIDTH-1:0] cci_dram_rd_req_addr_r;
t_ccip_clAddr cci_wr_req_addr;
always @(*) begin
@ -827,7 +871,7 @@ begin
if (cci_wr_req_fire) begin
assert(cci_wr_req_ctr != 0);
cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1);
cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1);
cci_wr_req_ctr <= cci_wr_req_ctr - CCI_ADDR_WIDTH'(1);
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data);
`endif
@ -840,8 +884,8 @@ begin
`endif*/
if (cci_dram_rd_req_fire) begin
cci_dram_rd_req_addr_r <= cci_dram_rd_req_addr_r + DRAM_ADDR_WIDTH'(1);
cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - DRAM_ADDR_WIDTH'(1);
cci_dram_rd_req_addr_r <= cci_dram_rd_req_addr_r + CCI_ADDR_WIDTH'(1);
cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - CCI_ADDR_WIDTH'(1);
end
end
end

View file

@ -1,18 +1,19 @@
`ifndef __VORTEX_AFU__
`define __VORTEX_AFU__
`IGNORE_WARNINGS_BEGIN
`include "ccip_if_pkg.sv"
`IGNORE_WARNINGS_END
`define PLATFORM_PROVIDES_LOCAL_MEMORY
`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH 26
`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH 512
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE_BITS
`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE_BITS 6
`endif
`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH (32-`PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE_BITS)
`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH (8 << `PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE_BITS)
`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH 4
`IGNORE_WARNINGS_BEGIN
`include "local_mem_cfg_pkg.sv"
`IGNORE_WARNINGS_END
`define AFU_ACCEL_NAME "vortex_afu"
`define AFU_ACCEL_UUID 128'h35f9452b_25c2_434c_93d5_6f8c60db361c

View file

@ -45,6 +45,7 @@ module VX_cache_core_req_bank_sel #(
output wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag,
input wire [`BANK_READY_COUNT-1:0] per_bank_core_req_ready
);
`UNUSED_PARAM (CACHE_ID)
`STATIC_ASSERT (NUM_REQS >= NUM_BANKS, ("invalid number of banks"));
`UNUSED_VAR (clk)

View file

@ -33,6 +33,8 @@ module VX_cache_core_rsp_merge #(
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
input wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready
);
`UNUSED_PARAM (CACHE_ID)
if (NUM_BANKS > 1) begin
reg [NUM_REQS-1:0] core_rsp_valid_unqual;

View file

@ -2,6 +2,7 @@
# coding=utf-8
from __future__ import print_function
import sys
import os
import os.path as path
import re
@ -10,55 +11,19 @@ from datetime import datetime
script_dir = path.dirname(path.realpath(__file__))
defines = {}
for k, v in os.environ.items():
if k.upper().startswith('V_'):
defines[k[2:]] = v
print('Custom params:', ', '.join(['='.join(x) for x in defines.items()]))
parser = argparse.ArgumentParser()
parser.add_argument('--outc', default='none', help='Output C header')
parser.add_argument('--outv', default='none', help='Output Verilog header')
parser.add_argument('-i', "--input", default='none', help='Verilog header')
parser.add_argument('-o', "--output", default='none', help='C header')
args = parser.parse_args()
if args.outc == 'none' and args.outv == 'none':
print('Warning: not emitting any files. Specify arguments')
if args.outv != 'none':
with open(args.outv, 'w') as f:
print('''
// auto-generated by gen_config.py. DO NOT EDIT
// Generated at {date}
`ifndef VX_USER_CONFIG
`define VX_USER_CONFIG
'''[1:].format(date=datetime.now()), file=f)
for k, v in defines.items():
print('`define {} {}'.format(k, v), file=f)
print('\n`endif', file=f)
if args.outc != 'none':
with open(args.outc, 'w') as f:
print('''
// auto-generated by gen_config.py. DO NOT EDIT
// Generated at {date}
#ifndef VX_USER_CONFIG
#define VX_USER_CONFIG
'''[1:].format(date=datetime.now()), file=f)
for k, v in defines.items():
print('#define {} {}'.format(k, v), file=f)
print('\n#endif', file=f)
if args.input == 'none' or args.output == 'none':
print('Error: invalid arguments')
sys.exit()
translation_rules = [
# preprocessor directives
(re.compile(r'^\s*`include .*$'), r''),
(re.compile(r'`include\s+.*$'), r''),
(re.compile(r'`ifdef'), r'#ifdef'),
(re.compile(r'`ifndef'), r'#ifndef'),
(re.compile(r'`elif'), r'#elif'),
@ -75,25 +40,24 @@ translation_rules = [
(re.compile(r"\d+'h([\da-fA-F]+)"), r'0x\1')
]
if args.outc != 'none':
with open(args.outc, 'a') as f:
print('''
with open(args.output, 'w') as f:
print('''
// auto-generated by gen_config.py. DO NOT EDIT
// Generated at {date}
// Translated from VX_config.vh:
'''[1:].format(date=datetime.now()), file=f)
with open(path.join(script_dir, '../rtl/VX_config.vh'), 'r') as r:
lineno = 0
for line in r:
for pat, repl in translation_rules:
match = pat.search(line)
if match:
line = re.sub(pat, repl, line)
#print("*** match @" + str(lineno) + ": " + match.group() + " => " + line)
f.write(line)
lineno = lineno + 1
print('''
with open(args.input, 'r') as r:
lineno = 0
for line in r:
for pat, repl in translation_rules:
match = pat.search(line)
if match:
line = re.sub(pat, repl, line)
#print("*** match @" + str(lineno) + ": " + match.group() + " => " + line)
f.write(line)
lineno = lineno + 1
print('''
'''[1:], file=f)

View file

@ -21,11 +21,11 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
SINGLECORE += -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DL2_ENABLE=0
SINGLECORE = -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DL2_ENABLE=0
#MULTICORE ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
#MULTICORE = -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#MULTICORE = -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
MULTICORE = -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
SINGLECORE += $(CONFIGS)
MULTICORE += $(CONFIGS)
@ -42,7 +42,7 @@ SRCS = simulator.cpp testbench.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += verilator.vlt