mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
performance refactoring - rebalanced stream buffers accross the device to enforce output buffering rule at compoments boudaries, finally resolved block ram R/W collusion discrepencies,
This commit is contained in:
parent
29cd2f5dff
commit
4bbd7bf408
76 changed files with 1313 additions and 1098 deletions
|
@ -9,6 +9,9 @@ show_usage()
|
|||
echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [--l3cache] [[--driver=rtlsim|vlsim] [--debug] [--scope] [--perf] [--app=vecadd|sgemm|basic|demo|dogfood] [--args=<args>] [--help]]"
|
||||
}
|
||||
|
||||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
VORTEX_HOME=$SCRIPT_DIR/..
|
||||
|
||||
DRIVER=vlsim
|
||||
APP=sgemm
|
||||
CLUSTERS=1
|
||||
|
@ -62,6 +65,7 @@ case $i in
|
|||
;;
|
||||
--scope)
|
||||
SCOPE=1
|
||||
CORES=1
|
||||
shift
|
||||
;;
|
||||
--perf)
|
||||
|
@ -86,19 +90,19 @@ done
|
|||
|
||||
case $DRIVER in
|
||||
rtlsim)
|
||||
DRIVER_PATH=driver/rtlsim
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/rtlsim
|
||||
DRIVER_EXTRA=
|
||||
;;
|
||||
vlsim)
|
||||
DRIVER_PATH=driver/opae
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/opae
|
||||
DRIVER_EXTRA=vlsim
|
||||
;;
|
||||
asesim)
|
||||
DRIVER_PATH=driver/opae
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/opae
|
||||
DRIVER_EXTRA=asesim
|
||||
;;
|
||||
fpga)
|
||||
DRIVER_PATH=driver/opae
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/opae
|
||||
DRIVER_EXTRA=fpga
|
||||
;;
|
||||
*)
|
||||
|
@ -109,19 +113,19 @@ esac
|
|||
|
||||
case $APP in
|
||||
sgemm)
|
||||
APP_PATH=benchmarks/opencl/sgemm
|
||||
APP_PATH=$VORTEX_HOME/benchmarks/opencl/sgemm
|
||||
;;
|
||||
vecadd)
|
||||
APP_PATH=benchmarks/opencl/vacadd
|
||||
APP_PATH=$VORTEX_HOME/benchmarks/opencl/vacadd
|
||||
;;
|
||||
basic)
|
||||
APP_PATH=driver/tests/basic
|
||||
APP_PATH=$VORTEX_HOME/driver/tests/basic
|
||||
;;
|
||||
demo)
|
||||
APP_PATH=driver/tests/demo
|
||||
APP_PATH=$VORTEX_HOME/driver/tests/demo
|
||||
;;
|
||||
dogfood)
|
||||
APP_PATH=driver/tests/dogfood
|
||||
APP_PATH=$VORTEX_HOME/driver/tests/dogfood
|
||||
;;
|
||||
*)
|
||||
echo "invalid app: $APP"
|
||||
|
|
|
@ -172,32 +172,32 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
// alu_stall
|
||||
uint64_t alu_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ALU_ST, CSR_MPM_ALU_ST_H, &alu_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: alu stalls=%ld\n", core_id, alu_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: alu unit stalls=%ld\n", core_id, alu_stalls_per_core);
|
||||
alu_stalls += alu_stalls_per_core;
|
||||
// lsu_stall
|
||||
uint64_t lsu_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_LSU_ST, CSR_MPM_LSU_ST_H, &lsu_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: lsu stalls=%ld\n", core_id, lsu_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: lsu unit stalls=%ld\n", core_id, lsu_stalls_per_core);
|
||||
lsu_stalls += lsu_stalls_per_core;
|
||||
// csr_stall
|
||||
uint64_t csr_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_CSR_ST, CSR_MPM_CSR_ST_H, &csr_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: csr stalls=%ld\n", core_id, csr_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: csr unit stalls=%ld\n", core_id, csr_stalls_per_core);
|
||||
csr_stalls += csr_stalls_per_core;
|
||||
// mul_stall
|
||||
uint64_t mul_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_MUL_ST, CSR_MPM_MUL_ST_H, &mul_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: mul stalls=%ld\n", core_id, mul_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: mul unit stalls=%ld\n", core_id, mul_stalls_per_core);
|
||||
mul_stalls += mul_stalls_per_core;
|
||||
// fpu_stall
|
||||
uint64_t fpu_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_FPU_ST, CSR_MPM_FPU_ST_H, &fpu_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: fpu stalls=%ld\n", core_id, fpu_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: fpu unit stalls=%ld\n", core_id, fpu_stalls_per_core);
|
||||
fpu_stalls += fpu_stalls_per_core;
|
||||
// gpu_stall
|
||||
uint64_t gpu_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_GPU_ST, CSR_MPM_GPU_ST_H, &gpu_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: gpu stalls=%ld\n", core_id, gpu_stalls_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: gpu unit stalls=%ld\n", core_id, gpu_stalls_per_core);
|
||||
gpu_stalls += gpu_stalls_per_core;
|
||||
|
||||
// PERF: Icache
|
||||
|
@ -300,12 +300,12 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
fprintf(stream, "PERF: icache stalls=%ld\n", icache_stalls);
|
||||
fprintf(stream, "PERF: ibuffer stalls=%ld\n", ibuffer_stalls);
|
||||
fprintf(stream, "PERF: scoreboard stalls=%ld\n", scoreboard_stalls);
|
||||
fprintf(stream, "PERF: alu stalls=%ld\n", alu_stalls);
|
||||
fprintf(stream, "PERF: lsu stalls=%ld\n", lsu_stalls);
|
||||
fprintf(stream, "PERF: csr stalls=%ld\n", csr_stalls);
|
||||
fprintf(stream, "PERF: mul stalls=%ld\n", mul_stalls);
|
||||
fprintf(stream, "PERF: fpu stalls=%ld\n", fpu_stalls);
|
||||
fprintf(stream, "PERF: gpu stalls=%ld\n", gpu_stalls);
|
||||
fprintf(stream, "PERF: alu unit stalls=%ld\n", alu_stalls);
|
||||
fprintf(stream, "PERF: lsu unit stalls=%ld\n", lsu_stalls);
|
||||
fprintf(stream, "PERF: csr unit stalls=%ld\n", csr_stalls);
|
||||
fprintf(stream, "PERF: mul unit stalls=%ld\n", mul_stalls);
|
||||
fprintf(stream, "PERF: fpu unit stalls=%ld\n", fpu_stalls);
|
||||
fprintf(stream, "PERF: gpu unit stalls=%ld\n", gpu_stalls);
|
||||
fprintf(stream, "PERF: icache reads=%ld\n", icache_reads);
|
||||
fprintf(stream, "PERF: icache read misses=%ld\n", icache_read_misses);
|
||||
fprintf(stream, "PERF: icache reponse stalls=%ld\n", icache_rsp_stalls);
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
#define CCI_WQ_SIZE 16
|
||||
|
||||
#define ENABLE_DRAM_STALLS
|
||||
#define DRAM_LATENCY 100
|
||||
#define DRAM_LATENCY 24
|
||||
#define DRAM_RQ_SIZE 16
|
||||
#define DRAM_STALLS_MODULO 16
|
||||
|
||||
|
|
|
@ -66,7 +66,7 @@ make ase
|
|||
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
|
||||
|
||||
# modify "vsim_run.tcl" to dump VCD trace
|
||||
vcd file vortex.vcd
|
||||
vcd file trace.vcd
|
||||
vcd add -r /*/Vortex/hw/rtl/*
|
||||
run -all
|
||||
|
||||
|
@ -104,8 +104,11 @@ lsof +D build_ase_1c
|
|||
make -C pipeline clean && make -C pipeline > pipeline/build.log 2>&1 &
|
||||
make -C cache clean && make -C cache > cache/build.log 2>&1 &
|
||||
make -C core clean && make -C core > core/build.log 2>&1 &
|
||||
make -C core8 clean && make -C core8 > core8/build.log 2>&1 &
|
||||
make -C vortex clean && make -C vortex > vortex/build.log 2>&1 &
|
||||
make -C top clean && make -C top > top/build.log 2>&1 &
|
||||
make -C top1 clean && make -C top1 > top1/build.log 2>&1 &
|
||||
make -C top8 clean && make -C top8 > top8/build.log 2>&1 &
|
||||
|
||||
# How to calculate the maximum operating frequency?
|
||||
200 Mhz -> period = 1/200x10^6 = 5ns
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/bin/bash
|
||||
|
||||
SCRIPT_DIR=$PWD
|
||||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
BUILD_DIR=$1
|
||||
|
||||
|
|
|
@ -4,21 +4,21 @@
|
|||
+define+QUARTUS
|
||||
+define+FPU_FAST
|
||||
#+define+SCOPE
|
||||
+define+PERF_ENABLE
|
||||
#+define+PERF_ENABLE
|
||||
|
||||
+define+DBG_PRINT_CORE_ICACHE
|
||||
+define+DBG_PRINT_CORE_DCACHE
|
||||
+define+DBG_PRINT_CACHE_BANK
|
||||
+define+DBG_PRINT_CACHE_SNP
|
||||
+define+DBG_PRINT_CACHE_MSRQ
|
||||
+define+DBG_PRINT_CACHE_TAG
|
||||
+define+DBG_PRINT_CACHE_DATA
|
||||
+define+DBG_PRINT_DRAM
|
||||
+define+DBG_PRINT_PIPELINE
|
||||
+define+DBG_PRINT_OPAE
|
||||
+define+DBG_PRINT_AVS
|
||||
+define+DBG_PRINT_SCOPE
|
||||
+define+DBG_CACHE_REQ_INFO
|
||||
#+define+DBG_PRINT_CORE_ICACHE
|
||||
#+define+DBG_PRINT_CORE_DCACHE
|
||||
#+define+DBG_PRINT_CACHE_BANK
|
||||
#+define+DBG_PRINT_CACHE_SNP
|
||||
#+define+DBG_PRINT_CACHE_MSRQ
|
||||
#+define+DBG_PRINT_CACHE_TAG
|
||||
#+define+DBG_PRINT_CACHE_DATA
|
||||
#+define+DBG_PRINT_DRAM
|
||||
#+define+DBG_PRINT_PIPELINE
|
||||
#+define+DBG_PRINT_OPAE
|
||||
#+define+DBG_PRINT_AVS
|
||||
#+define+DBG_PRINT_SCOPE
|
||||
#+define+DBG_CACHE_REQ_INFO
|
||||
|
||||
vortex_afu.json
|
||||
QI:vortex_afu.qsf
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
+define+NUM_CORES=2
|
||||
+define+L2_ENABLE=0
|
||||
|
||||
+define+SYNTHESIS
|
||||
+define+QUARTUS
|
||||
+define+FPU_FAST
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
+define+NUM_CORES=4
|
||||
+define+L2_ENABLE=1
|
||||
|
||||
+define+SYNTHESIS
|
||||
+define+QUARTUS
|
||||
+define+FPU_FAST
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
|
||||
# Analysis & Synthesis Assignments
|
||||
|
||||
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
|
||||
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
|
||||
set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
|
||||
set_global_assignment -name VERILOG_MACRO QUARTUS
|
||||
|
@ -7,7 +9,14 @@ set_global_assignment -name VERILOG_MACRO SYNTHESIS
|
|||
set_global_assignment -name VERILOG_MACRO NDEBUG
|
||||
set_global_assignment -name MESSAGE_DISABLE 16818
|
||||
set_global_assignment -name VERILOG_MACRO FPU_FAST
|
||||
set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
|
||||
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
|
||||
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
|
||||
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
||||
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||
set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"
|
||||
|
@ -17,10 +26,4 @@ set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON
|
|||
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
||||
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
||||
set_global_assignment -name POWER_USE_TA_VALUE 65
|
||||
set_global_assignment -name SEED 1
|
||||
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
|
||||
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||
set_global_assignment -name SEED 1
|
|
@ -181,7 +181,9 @@ module VX_cluster #(
|
|||
.NUM_REQS (`NUM_CORES),
|
||||
.WORD_SIZE (4),
|
||||
.TAG_IN_WIDTH (`DCORE_TAG_WIDTH),
|
||||
.TAG_OUT_WIDTH (`L2CORE_TAG_WIDTH)
|
||||
.TAG_OUT_WIDTH (`L2CORE_TAG_WIDTH),
|
||||
.BUFFERED_REQ (`NUM_CORES >= 4),
|
||||
.BUFFERED_RSP (1)
|
||||
) io_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -218,9 +220,11 @@ module VX_cluster #(
|
|||
);
|
||||
|
||||
VX_csr_io_arb #(
|
||||
.NUM_REQS (`NUM_CORES),
|
||||
.DATA_WIDTH (32),
|
||||
.ADDR_WIDTH (12)
|
||||
.NUM_REQS (`NUM_CORES),
|
||||
.DATA_WIDTH (32),
|
||||
.ADDR_WIDTH (12),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (`NUM_CORES >= 4)
|
||||
) csr_io_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -268,7 +272,8 @@ module VX_cluster #(
|
|||
.DST_ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
|
||||
.SREQ_SIZE (`L2SREQ_SIZE),
|
||||
.TAG_IN_WIDTH (`L2SNP_TAG_WIDTH),
|
||||
.TAG_OUT_WIDTH (`DSNP_TAG_WIDTH)
|
||||
.TAG_OUT_WIDTH (`DSNP_TAG_WIDTH),
|
||||
.BUFFERED (`NUM_CORES >= 4)
|
||||
) snp_forwarder (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -301,49 +306,6 @@ module VX_cluster #(
|
|||
VX_perf_cache_if perf_l2cache_if();
|
||||
`endif
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_dram_req_valid_qual;
|
||||
wire [`NUM_CORES-1:0] per_core_dram_req_rw_qual;
|
||||
wire [`NUM_CORES-1:0][`DDRAM_BYTEEN_WIDTH-1:0] per_core_dram_req_byteen_qual;
|
||||
wire [`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_dram_req_addr_qual;
|
||||
wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_dram_req_data_qual;
|
||||
wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] per_core_dram_req_tag_qual;
|
||||
wire [`NUM_CORES-1:0] per_core_dram_req_ready_qual;
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_dram_rsp_valid_unqual;
|
||||
wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_dram_rsp_data_unqual;
|
||||
wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] per_core_dram_rsp_tag_unqual;
|
||||
wire [`NUM_CORES-1:0] per_core_dram_rsp_ready_unqual;
|
||||
|
||||
for (genvar i = 0; i < `NUM_CORES; i++) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (1 + `DDRAM_BYTEEN_WIDTH + `DDRAM_ADDR_WIDTH + `DDRAM_LINE_WIDTH + `XDRAM_TAG_WIDTH),
|
||||
.PASSTHRU (`NUM_CORES < 4)
|
||||
) core_req_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (per_core_dram_req_valid[i]),
|
||||
.data_in ({per_core_dram_req_rw[i], per_core_dram_req_byteen[i], per_core_dram_req_addr[i], per_core_dram_req_data[i], per_core_dram_req_tag[i]}),
|
||||
.ready_in (per_core_dram_req_ready[i]),
|
||||
.valid_out (per_core_dram_req_valid_qual[i]),
|
||||
.data_out ({per_core_dram_req_rw_qual[i], per_core_dram_req_byteen_qual[i], per_core_dram_req_addr_qual[i], per_core_dram_req_data_qual[i], per_core_dram_req_tag_qual[i]}),
|
||||
.ready_out (per_core_dram_req_ready_qual[i])
|
||||
);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`DDRAM_LINE_WIDTH + `XDRAM_TAG_WIDTH),
|
||||
.PASSTHRU (1)
|
||||
) core_rsp_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (per_core_dram_rsp_valid_unqual[i]),
|
||||
.data_in ({per_core_dram_rsp_data_unqual[i], per_core_dram_rsp_tag_unqual[i]}),
|
||||
.ready_in (per_core_dram_rsp_ready_unqual[i]),
|
||||
.valid_out (per_core_dram_rsp_valid[i]),
|
||||
.data_out ({per_core_dram_rsp_data[i], per_core_dram_rsp_tag[i]}),
|
||||
.ready_out (per_core_dram_rsp_ready[i])
|
||||
);
|
||||
end
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`L2CACHE_ID),
|
||||
.CACHE_SIZE (`L2CACHE_SIZE),
|
||||
|
@ -376,19 +338,19 @@ module VX_cluster #(
|
|||
`endif
|
||||
|
||||
// Core request
|
||||
.core_req_valid (per_core_dram_req_valid_qual),
|
||||
.core_req_rw (per_core_dram_req_rw_qual),
|
||||
.core_req_byteen (per_core_dram_req_byteen_qual),
|
||||
.core_req_addr (per_core_dram_req_addr_qual),
|
||||
.core_req_data (per_core_dram_req_data_qual),
|
||||
.core_req_tag (per_core_dram_req_tag_qual),
|
||||
.core_req_ready (per_core_dram_req_ready_qual),
|
||||
.core_req_valid (per_core_dram_req_valid),
|
||||
.core_req_rw (per_core_dram_req_rw),
|
||||
.core_req_byteen (per_core_dram_req_byteen),
|
||||
.core_req_addr (per_core_dram_req_addr),
|
||||
.core_req_data (per_core_dram_req_data),
|
||||
.core_req_tag (per_core_dram_req_tag),
|
||||
.core_req_ready (per_core_dram_req_ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (per_core_dram_rsp_valid_unqual),
|
||||
.core_rsp_data (per_core_dram_rsp_data_unqual),
|
||||
.core_rsp_tag (per_core_dram_rsp_tag_unqual),
|
||||
.core_rsp_ready (per_core_dram_rsp_ready_unqual),
|
||||
.core_rsp_valid (per_core_dram_rsp_valid),
|
||||
.core_rsp_data (per_core_dram_rsp_data),
|
||||
.core_rsp_tag (per_core_dram_rsp_tag),
|
||||
.core_rsp_ready (per_core_dram_rsp_ready),
|
||||
|
||||
// DRAM request
|
||||
.dram_req_valid (dram_req_valid),
|
||||
|
@ -427,7 +389,9 @@ module VX_cluster #(
|
|||
.NUM_REQS (`NUM_CORES),
|
||||
.DATA_WIDTH (`L2DRAM_LINE_WIDTH),
|
||||
.TAG_IN_WIDTH (`XDRAM_TAG_WIDTH),
|
||||
.TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH)
|
||||
.TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH),
|
||||
.BUFFERED_REQ (`NUM_CORES >= 4),
|
||||
.BUFFERED_RSP (1)
|
||||
) dram_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -24,11 +24,15 @@
|
|||
`endif
|
||||
|
||||
`ifndef L2_ENABLE
|
||||
`define L2_ENABLE (`NUM_CORES >= 4)
|
||||
`define L2_ENABLE 0
|
||||
`endif
|
||||
|
||||
`ifndef L3_ENABLE
|
||||
`define L3_ENABLE (`NUM_CLUSTERS >= 4)
|
||||
`define L3_ENABLE 0
|
||||
`endif
|
||||
|
||||
`ifndef SM_ENABLE
|
||||
`define SM_ENABLE 0
|
||||
`endif
|
||||
|
||||
`ifndef GLOBAL_BLOCK_SIZE
|
||||
|
@ -253,7 +257,7 @@
|
|||
|
||||
// Size of cache in bytes
|
||||
`ifndef ICACHE_SIZE
|
||||
`define ICACHE_SIZE 4096
|
||||
`define ICACHE_SIZE 2048
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
|
@ -285,7 +289,7 @@
|
|||
|
||||
// Size of cache in bytes
|
||||
`ifndef DCACHE_SIZE
|
||||
`define DCACHE_SIZE 8192
|
||||
`define DCACHE_SIZE 4096
|
||||
`endif
|
||||
|
||||
// Number of banks
|
||||
|
@ -332,7 +336,7 @@
|
|||
|
||||
// Size of cache in bytes
|
||||
`ifndef SMEM_SIZE
|
||||
`define SMEM_SIZE 4096
|
||||
`define SMEM_SIZE 2048
|
||||
`endif
|
||||
|
||||
// Number of banks
|
||||
|
|
|
@ -43,8 +43,19 @@ module VX_csr_arb (
|
|||
assign csr_io_req_if.ready = csr_pipe_req_if.ready && !csr_core_req_if.valid;
|
||||
|
||||
// responses
|
||||
assign csr_io_rsp_if.valid = csr_pipe_rsp_if.valid & select_io_rsp;
|
||||
assign csr_io_rsp_if.data = csr_pipe_rsp_if.data[0];
|
||||
wire csr_io_rsp_ready;
|
||||
VX_skid_buffer #(
|
||||
.DATAW (32)
|
||||
) csr_io_out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (csr_pipe_rsp_if.valid & select_io_rsp),
|
||||
.data_in (csr_pipe_rsp_if.data[0]),
|
||||
.ready_in (csr_io_rsp_ready),
|
||||
.valid_out (csr_io_rsp_if.valid),
|
||||
.data_out (csr_io_rsp_if.data),
|
||||
.ready_out (csr_io_rsp_if.ready)
|
||||
);
|
||||
|
||||
assign csr_commit_if.valid = csr_pipe_rsp_if.valid & ~select_io_rsp;
|
||||
assign csr_commit_if.wid = csr_pipe_rsp_if.wid;
|
||||
|
@ -54,6 +65,6 @@ module VX_csr_arb (
|
|||
assign csr_commit_if.wb = csr_pipe_rsp_if.wb;
|
||||
assign csr_commit_if.data = csr_pipe_rsp_if.data;
|
||||
|
||||
assign csr_pipe_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready;
|
||||
assign csr_pipe_rsp_if.ready = select_io_rsp ? csr_io_rsp_ready : csr_commit_if.ready;
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,24 +1,26 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_io_arb #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter DATA_WIDTH = 1,
|
||||
parameter NUM_REQS = 1,
|
||||
parameter DATA_WIDTH = 1,
|
||||
parameter BUFFERED_REQ = 0,
|
||||
parameter BUFFERED_RSP = 0,
|
||||
|
||||
parameter DATA_SIZE = (DATA_WIDTH / 8),
|
||||
parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE),
|
||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire [LOG_NUM_REQS-1:0] request_id,
|
||||
input wire [LOG_NUM_REQS-1:0] request_id,
|
||||
|
||||
// input requests
|
||||
input wire req_valid_in,
|
||||
input wire [ADDR_WIDTH-1:0] req_addr_in,
|
||||
input wire req_rw_in,
|
||||
input wire [DATA_WIDTH-1:0] req_data_in,
|
||||
output wire req_ready_in,
|
||||
input wire req_valid_in,
|
||||
input wire [ADDR_WIDTH-1:0] req_addr_in,
|
||||
input wire req_rw_in,
|
||||
input wire [DATA_WIDTH-1:0] req_data_in,
|
||||
output wire req_ready_in,
|
||||
|
||||
// output request
|
||||
output wire [NUM_REQS-1:0] req_valid_out,
|
||||
|
@ -33,40 +35,38 @@ module VX_csr_io_arb #(
|
|||
output wire [NUM_REQS-1:0] rsp_ready_in,
|
||||
|
||||
// output response
|
||||
output wire rsp_valid_out,
|
||||
output wire [DATA_WIDTH-1:0] rsp_data_out,
|
||||
input wire rsp_ready_out
|
||||
output wire rsp_valid_out,
|
||||
output wire [DATA_WIDTH-1:0] rsp_data_out,
|
||||
input wire rsp_ready_out
|
||||
);
|
||||
if (NUM_REQS > 1) begin
|
||||
localparam REQ_DATAW = ADDR_WIDTH + 1 + DATA_WIDTH;
|
||||
localparam RSP_DATAW = DATA_WIDTH;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign req_valid_out[i] = req_valid_in && (request_id == LOG_NUM_REQS'(i));
|
||||
assign req_addr_out[i] = req_addr_in;
|
||||
assign req_rw_out[i] = req_rw_in;
|
||||
assign req_data_out[i] = req_data_in;
|
||||
end
|
||||
|
||||
assign req_ready_in = req_ready_out[request_id];
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (request_id)
|
||||
|
||||
assign req_valid_out = req_valid_in;
|
||||
assign req_addr_out = req_addr_in;
|
||||
assign req_rw_out = req_rw_in;
|
||||
assign req_data_out = req_data_in;
|
||||
assign req_ready_in = req_ready_out;
|
||||
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_merged_data_out;
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign {req_addr_out[i], req_rw_out[i], req_data_out[i]} = req_merged_data_out[i];
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
VX_stream_demux #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (REQ_DATAW),
|
||||
.BUFFERED (BUFFERED_REQ)
|
||||
) req_demux (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel (request_id),
|
||||
.valid_in (req_valid_in),
|
||||
.data_in ({req_addr_in, req_rw_in, req_data_in}),
|
||||
.ready_in (req_ready_in),
|
||||
.valid_out (req_valid_out),
|
||||
.data_out (req_merged_data_out),
|
||||
.ready_out (req_ready_out)
|
||||
);
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (DATA_WIDTH),
|
||||
.IN_BUFFER (NUM_REQS >= 4),
|
||||
.OUT_BUFFER (NUM_REQS >= 4)
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (RSP_DATAW),
|
||||
.BUFFERED (BUFFERED_RSP)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -5,6 +5,8 @@ module VX_databus_arb #(
|
|||
parameter WORD_SIZE = 1,
|
||||
parameter TAG_IN_WIDTH = 1,
|
||||
parameter TAG_OUT_WIDTH = 1,
|
||||
parameter BUFFERED_REQ = 0,
|
||||
parameter BUFFERED_RSP = 0,
|
||||
|
||||
parameter WORD_WIDTH = WORD_SIZE * 8,
|
||||
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
|
||||
|
@ -43,12 +45,13 @@ module VX_databus_arb #(
|
|||
output wire [NUM_REQS-1:0][WORD_WIDTH-1:0] rsp_data_out,
|
||||
input wire [NUM_REQS-1:0] rsp_ready_out
|
||||
);
|
||||
localparam DATAW = `NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH);
|
||||
localparam REQ_DATAW = `NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH);
|
||||
localparam RSP_DATAW = TAG_IN_WIDTH + WORD_WIDTH;
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
wire [NUM_REQS-1:0] valids;
|
||||
wire [NUM_REQS-1:0][DATAW-1:0] data_in;
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] data_in;
|
||||
wire [`NUM_THREADS-1:0] req_tmask_out;
|
||||
wire req_valid_out_unqual;
|
||||
|
||||
|
@ -58,34 +61,46 @@ module VX_databus_arb #(
|
|||
end
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (DATAW),
|
||||
.IN_BUFFER (NUM_REQS >= 4),
|
||||
.OUT_BUFFER (NUM_REQS >= 4)
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (REQ_DATAW),
|
||||
.BUFFERED (BUFFERED_REQ)
|
||||
) req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valids),
|
||||
.data_in (data_in),
|
||||
.ready_in (req_ready_in),
|
||||
.valid_out (req_valid_out_unqual),
|
||||
.data_out ({req_tmask_out, req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}),
|
||||
.ready_out (req_ready_out)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valids),
|
||||
.data_in (data_in),
|
||||
.ready_in (req_ready_in),
|
||||
.valid_out (req_valid_out_unqual),
|
||||
.data_out ({req_tmask_out, req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}),
|
||||
.ready_out (req_ready_out)
|
||||
);
|
||||
|
||||
assign req_valid_out = {`NUM_THREADS{req_valid_out_unqual}} & req_tmask_out;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[LOG_NUM_REQS-1:0];
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign rsp_valid_out[i] = rsp_valid_in && (rsp_sel == LOG_NUM_REQS'(i));
|
||||
assign rsp_tag_out[i] = rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH];
|
||||
assign rsp_data_out[i] = rsp_data_in;
|
||||
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[LOG_NUM_REQS-1:0];
|
||||
|
||||
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_merged_data_out;
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_merged_data_out[i];
|
||||
end
|
||||
|
||||
assign rsp_ready_in = rsp_ready_out[rsp_sel];
|
||||
|
||||
VX_stream_demux #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (RSP_DATAW),
|
||||
.BUFFERED (BUFFERED_RSP)
|
||||
) rsp_demux (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel (rsp_sel),
|
||||
.valid_in (rsp_valid_in),
|
||||
.data_in ({rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH], rsp_data_in}),
|
||||
.ready_in (rsp_ready_in),
|
||||
.valid_out (rsp_valid_out),
|
||||
.data_out (rsp_merged_data_out),
|
||||
.ready_out (rsp_ready_out)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
|
|
|
@ -20,7 +20,8 @@ module VX_dcache_arb (
|
|||
// output response
|
||||
VX_cache_core_rsp_if core_rsp_if
|
||||
);
|
||||
localparam REQ_DATAW = `NUM_THREADS + 1 + `NUM_THREADS * `DWORD_SIZE + `NUM_THREADS * (32-`CLOG2(`DWORD_SIZE)) + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||
localparam REQ_ADDRW = 32 - `CLOG2(`DWORD_SIZE);
|
||||
localparam REQ_DATAW = `NUM_THREADS + 1 + `NUM_THREADS * `DWORD_SIZE + `NUM_THREADS * REQ_ADDRW + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||
|
||||
//
|
||||
|
@ -28,54 +29,90 @@ module VX_dcache_arb (
|
|||
//
|
||||
|
||||
// select shared memory bus
|
||||
wire is_smem_addr = (| core_req_if.valid)
|
||||
&& ({core_req_if.addr[0], 2'b0} >= `SHARED_MEM_BASE_ADDR)
|
||||
&& ({core_req_if.addr[0], 2'b0} < (`SHARED_MEM_BASE_ADDR + `SMEM_SIZE));
|
||||
wire is_smem_addr = core_req_if.valid[0] && `SM_ENABLE
|
||||
&& (core_req_if.addr[0] >= REQ_ADDRW'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> 2))
|
||||
&& (core_req_if.addr[0] < REQ_ADDRW'(`SHARED_MEM_BASE_ADDR >> 2));
|
||||
|
||||
// select io bus
|
||||
wire is_io_addr = (| core_req_if.valid)
|
||||
&& ({core_req_if.addr[0], 2'b0} >= `IO_BUS_BASE_ADDR);
|
||||
wire is_io_addr = core_req_if.valid[0]
|
||||
&& (core_req_if.addr[0] >= REQ_ADDRW'(`IO_BUS_BASE_ADDR >> 2));
|
||||
|
||||
wire cache_req_valid_out;
|
||||
wire [`NUM_THREADS-1:0] cache_req_tmask;
|
||||
wire cache_req_ready_in;
|
||||
|
||||
wire smem_req_valid_out;
|
||||
wire [`NUM_THREADS-1:0] smem_req_tmask;
|
||||
wire smem_req_ready_in;
|
||||
|
||||
wire io_req_valid_out;
|
||||
wire [`NUM_THREADS-1:0] io_req_tmask;
|
||||
wire io_req_ready_in;
|
||||
|
||||
reg [2:0] req_select;
|
||||
reg req_ready;
|
||||
|
||||
assign cache_req_if.valid = core_req_if.valid & {`NUM_THREADS{req_select[0]}};
|
||||
assign cache_req_if.rw = core_req_if.rw;
|
||||
assign cache_req_if.byteen = core_req_if.byteen;
|
||||
assign cache_req_if.addr = core_req_if.addr;
|
||||
assign cache_req_if.data = core_req_if.data;
|
||||
assign cache_req_if.tag = core_req_if.tag;
|
||||
VX_skid_buffer #(
|
||||
.DATAW (REQ_DATAW)
|
||||
) cache_out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (req_select[0]),
|
||||
.data_in ({core_req_if.valid, core_req_if.addr, core_req_if.rw, core_req_if.byteen, core_req_if.data, core_req_if.tag}),
|
||||
.ready_in (cache_req_ready_in),
|
||||
.valid_out (cache_req_valid_out),
|
||||
.data_out ({cache_req_tmask, cache_req_if.addr, cache_req_if.rw, cache_req_if.byteen, cache_req_if.data, cache_req_if.tag}),
|
||||
.ready_out (cache_req_if.ready)
|
||||
);
|
||||
|
||||
assign smem_req_if.valid = core_req_if.valid & {`NUM_THREADS{req_select[1]}};
|
||||
assign smem_req_if.rw = core_req_if.rw;
|
||||
assign smem_req_if.byteen = core_req_if.byteen;
|
||||
assign smem_req_if.addr = core_req_if.addr;
|
||||
assign smem_req_if.data = core_req_if.data;
|
||||
assign smem_req_if.tag = core_req_if.tag;
|
||||
assign cache_req_if.valid = cache_req_tmask & {`NUM_THREADS{cache_req_valid_out}};
|
||||
|
||||
assign io_req_if.valid = core_req_if.valid & {`NUM_THREADS{req_select[2]}};
|
||||
assign io_req_if.rw = core_req_if.rw;
|
||||
assign io_req_if.byteen = core_req_if.byteen;
|
||||
assign io_req_if.addr = core_req_if.addr;
|
||||
assign io_req_if.data = core_req_if.data;
|
||||
assign io_req_if.tag = core_req_if.tag;
|
||||
VX_skid_buffer #(
|
||||
.DATAW (REQ_DATAW)
|
||||
) smem_out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (req_select[1]),
|
||||
.data_in ({core_req_if.valid, core_req_if.addr, core_req_if.rw, core_req_if.byteen, core_req_if.data, core_req_if.tag}),
|
||||
.ready_in (smem_req_ready_in),
|
||||
.valid_out (smem_req_valid_out),
|
||||
.data_out ({smem_req_tmask, smem_req_if.addr, smem_req_if.rw, smem_req_if.byteen, smem_req_if.data, smem_req_if.tag}),
|
||||
.ready_out (smem_req_if.ready)
|
||||
);
|
||||
|
||||
assign core_req_if.ready = req_ready;
|
||||
assign smem_req_if.valid = smem_req_tmask & {`NUM_THREADS{smem_req_valid_out}};
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (REQ_DATAW)
|
||||
) io_out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (req_select[2]),
|
||||
.data_in ({core_req_if.valid, core_req_if.addr, core_req_if.rw, core_req_if.byteen, core_req_if.data, core_req_if.tag}),
|
||||
.ready_in (io_req_ready_in),
|
||||
.valid_out (io_req_valid_out),
|
||||
.data_out ({io_req_tmask, io_req_if.addr, io_req_if.rw, io_req_if.byteen, io_req_if.data, io_req_if.tag}),
|
||||
.ready_out (io_req_if.ready)
|
||||
);
|
||||
|
||||
assign io_req_if.valid = io_req_tmask & {`NUM_THREADS{io_req_valid_out}};
|
||||
|
||||
always @(*) begin
|
||||
req_select = 0;
|
||||
if (is_smem_addr) begin
|
||||
req_select[1] = 1;
|
||||
req_ready = smem_req_if.ready;
|
||||
req_ready = smem_req_ready_in;
|
||||
end else if (is_io_addr) begin
|
||||
req_select[2] = 1;
|
||||
req_ready = io_req_if.ready;
|
||||
req_ready = io_req_ready_in;
|
||||
end else begin
|
||||
req_select[0] = 1;
|
||||
req_ready = cache_req_if.ready;
|
||||
req_ready = cache_req_ready_in;
|
||||
end
|
||||
end
|
||||
|
||||
assign core_req_if.ready = req_ready;
|
||||
|
||||
//
|
||||
// select response
|
||||
//
|
||||
|
@ -92,14 +129,13 @@ module VX_dcache_arb (
|
|||
assign rsp_data_in[2] = {io_rsp_if.valid, io_rsp_if.data, io_rsp_if.tag};
|
||||
|
||||
assign rsp_valid_in[0] = (| cache_rsp_if.valid);
|
||||
assign rsp_valid_in[1] = (| smem_rsp_if.valid);
|
||||
assign rsp_valid_in[1] = (| smem_rsp_if.valid) & `SM_ENABLE;
|
||||
assign rsp_valid_in[2] = (| io_rsp_if.valid);
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (3),
|
||||
.DATAW (RSP_DATAW),
|
||||
.IN_BUFFER (1),
|
||||
.OUT_BUFFER (1)
|
||||
.NUM_REQS (3),
|
||||
.DATAW (RSP_DATAW),
|
||||
.BUFFERED (1)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -40,19 +40,20 @@ module VX_fpu_unit #(
|
|||
wire fpuq_pop = valid_out && ready_out;
|
||||
|
||||
VX_cam_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
|
||||
.SIZE (`FPUQ_SIZE)
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
|
||||
.SIZE (`FPUQ_SIZE),
|
||||
.FASTRAM (1)
|
||||
) req_metadata_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.acquire_slot (fpuq_push),
|
||||
.write_addr (tag_in),
|
||||
.read_addr (tag_out),
|
||||
.release_addr (tag_out),
|
||||
.write_data ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.rd, fpu_req_if.wb}),
|
||||
.read_data ({rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}),
|
||||
.release_slot (fpuq_pop),
|
||||
.full (fpuq_full)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.acquire_slot (fpuq_push),
|
||||
.write_addr (tag_in),
|
||||
.read_addr (tag_out),
|
||||
.release_addr (tag_out),
|
||||
.write_data ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.rd, fpu_req_if.wb}),
|
||||
.read_data ({rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}),
|
||||
.release_slot (fpuq_pop),
|
||||
.full (fpuq_full)
|
||||
);
|
||||
|
||||
// can accept new request?
|
||||
|
|
|
@ -99,7 +99,6 @@ module VX_gpu_unit #(
|
|||
`SCOPE_ASSIGN (gpu_req_op_type, gpu_req_if.op_type);
|
||||
`SCOPE_ASSIGN (gpu_req_rs1, gpu_req_if.rs1_data[0]);
|
||||
`SCOPE_ASSIGN (gpu_req_rs2, gpu_req_if.rs2_data);
|
||||
|
||||
`SCOPE_ASSIGN (gpu_rsp_valid, warp_ctl_if.valid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_wid, warp_ctl_if.wid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_tmc, warp_ctl_if.tmc);
|
||||
|
|
|
@ -42,9 +42,9 @@ module VX_ibuffer #(
|
|||
wire pop = reading && (size_r[i] != 1);
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW(DATAW),
|
||||
.SIZE(SIZE),
|
||||
.BUFFERED(1)
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.FASTRAM (1)
|
||||
) queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -101,7 +101,7 @@ module VX_ibuffer #(
|
|||
end
|
||||
|
||||
// schedule the next instruction to issue
|
||||
// does round-robin scheduling when multiple warps are present
|
||||
// do round-robin when multiple warps are active
|
||||
always @(*) begin
|
||||
deq_valid_n = 0;
|
||||
deq_wid_n = 'x;
|
||||
|
|
|
@ -28,15 +28,14 @@ module VX_icache_stage #(
|
|||
VX_dp_ram #(
|
||||
.DATAW(32 + `NUM_THREADS),
|
||||
.SIZE(`NUM_WARPS),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(0)
|
||||
.FASTRAM(1)
|
||||
) req_metadata (
|
||||
.clk(clk),
|
||||
.waddr(req_tag),
|
||||
.raddr(rsp_tag),
|
||||
.wren(icache_req_fire),
|
||||
.byteen(1'b1),
|
||||
.rden(1'b1),
|
||||
.rden(ifetch_rsp_if.valid),
|
||||
.din({ifetch_req_if.PC, ifetch_req_if.tmask}),
|
||||
.dout({ifetch_rsp_if.PC, ifetch_rsp_if.tmask})
|
||||
);
|
||||
|
@ -68,7 +67,6 @@ module VX_icache_stage #(
|
|||
`SCOPE_ASSIGN (icache_req_wid, ifetch_req_if.wid);
|
||||
`SCOPE_ASSIGN (icache_req_addr, {icache_req_if.addr, 2'b0});
|
||||
`SCOPE_ASSIGN (icache_req_tag, req_tag);
|
||||
|
||||
`SCOPE_ASSIGN (icache_rsp_fire, icache_rsp_if.valid && icache_rsp_if.ready);
|
||||
`SCOPE_ASSIGN (icache_rsp_data, icache_rsp_if.data[0]);
|
||||
`SCOPE_ASSIGN (icache_rsp_tag, rsp_tag);
|
||||
|
|
|
@ -14,13 +14,13 @@ module VX_ipdom_stack #(
|
|||
output wire empty,
|
||||
output wire full
|
||||
);
|
||||
localparam STACK_SIZE = 2 ** DEPTH;
|
||||
localparam ADDRW = $clog2(DEPTH);
|
||||
|
||||
reg is_part [STACK_SIZE-1:0];
|
||||
reg is_part [DEPTH-1:0];
|
||||
|
||||
reg [DEPTH-1:0] rd_ptr, wr_ptr;
|
||||
reg [ADDRW-1:0] rd_ptr, wr_ptr;
|
||||
|
||||
wire [WIDTH - 1:0] d1, d2;
|
||||
wire [WIDTH-1:0] d1, d2;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -29,18 +29,17 @@ module VX_ipdom_stack #(
|
|||
end else begin
|
||||
if (push) begin
|
||||
rd_ptr <= wr_ptr;
|
||||
wr_ptr <= wr_ptr + DEPTH'(1);
|
||||
wr_ptr <= wr_ptr + ADDRW'(1);
|
||||
end else if (pop) begin
|
||||
wr_ptr <= wr_ptr - DEPTH'(is_part[rd_ptr]);
|
||||
rd_ptr <= rd_ptr - DEPTH'(is_part[rd_ptr]);
|
||||
wr_ptr <= wr_ptr - ADDRW'(is_part[rd_ptr]);
|
||||
rd_ptr <= rd_ptr - ADDRW'(is_part[rd_ptr]);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW(WIDTH * 2),
|
||||
.SIZE(STACK_SIZE),
|
||||
.BUFFERED(0),
|
||||
.SIZE(DEPTH),
|
||||
.RWCHECK(0)
|
||||
) store (
|
||||
.clk(clk),
|
||||
|
@ -48,7 +47,7 @@ module VX_ipdom_stack #(
|
|||
.raddr(rd_ptr),
|
||||
.wren(push),
|
||||
.byteen(1'b1),
|
||||
.rden(1'b1),
|
||||
.rden(pop),
|
||||
.din({q2, q1}),
|
||||
.dout({d2, d1})
|
||||
);
|
||||
|
@ -64,6 +63,6 @@ module VX_ipdom_stack #(
|
|||
|
||||
assign d = p ? d1 : d2;
|
||||
assign empty = ~(| wr_ptr);
|
||||
assign full = ((STACK_SIZE-1) == wr_ptr);
|
||||
assign full = (ADDRW'(DEPTH-1) == wr_ptr);
|
||||
|
||||
endmodule
|
|
@ -110,15 +110,13 @@ module VX_issue #(
|
|||
`SCOPE_ASSIGN (issue_imm, ibuf_deq_if.imm);
|
||||
`SCOPE_ASSIGN (issue_rs1_is_pc, ibuf_deq_if.rs1_is_PC);
|
||||
`SCOPE_ASSIGN (issue_rs2_is_imm, ibuf_deq_if.rs2_is_imm);
|
||||
|
||||
`SCOPE_ASSIGN (scoreboard_delay, scoreboard_delay);
|
||||
`SCOPE_ASSIGN (execute_delay, ~execute_if.ready);
|
||||
|
||||
`SCOPE_ASSIGN (execute_delay, ~execute_if.ready);
|
||||
`SCOPE_ASSIGN (gpr_rsp_a, gpr_rsp_if.rs1_data);
|
||||
`SCOPE_ASSIGN (gpr_rsp_b, gpr_rsp_if.rs2_data);
|
||||
`SCOPE_ASSIGN (gpr_rsp_c, gpr_rsp_if.rs3_data);
|
||||
|
||||
`SCOPE_ASSIGN (writeback_valid, writeback_if.valid);
|
||||
`SCOPE_ASSIGN (writeback_tmask, writeback_if.tmask);
|
||||
`SCOPE_ASSIGN (writeback_wid, writeback_if.wid);
|
||||
`SCOPE_ASSIGN (writeback_pc, writeback_if.PC);
|
||||
`SCOPE_ASSIGN (writeback_rd, writeback_if.rd);
|
||||
|
|
|
@ -77,7 +77,7 @@ module VX_lsu_unit #(
|
|||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32))),
|
||||
.R(1)
|
||||
) pipe_reg0 (
|
||||
) req_pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_in),
|
||||
|
@ -111,8 +111,9 @@ module VX_lsu_unit #(
|
|||
wire lsuq_pop = lsuq_pop_part && (0 == mem_rsp_mask_n);
|
||||
|
||||
VX_cam_buffer #(
|
||||
.DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2),
|
||||
.SIZE (`LSUQ_SIZE)
|
||||
.DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2),
|
||||
.SIZE (`LSUQ_SIZE),
|
||||
.FASTRAM (1)
|
||||
) req_metadata_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -192,7 +193,7 @@ module VX_lsu_unit #(
|
|||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.R(1)
|
||||
) pipe_reg1 (
|
||||
) rsp_pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (load_rsp_stall),
|
||||
|
@ -213,7 +214,6 @@ module VX_lsu_unit #(
|
|||
`SCOPE_ASSIGN (dcache_req_byteen,dcache_req_if.byteen);
|
||||
`SCOPE_ASSIGN (dcache_req_data, dcache_req_if.data);
|
||||
`SCOPE_ASSIGN (dcache_req_tag, req_tag);
|
||||
|
||||
`SCOPE_ASSIGN (dcache_rsp_fire, dcache_rsp_if.valid & {`NUM_THREADS{dcache_rsp_if.ready}});
|
||||
`SCOPE_ASSIGN (dcache_rsp_data, dcache_rsp_if.data);
|
||||
`SCOPE_ASSIGN (dcache_rsp_tag, rsp_tag);
|
||||
|
@ -222,11 +222,11 @@ module VX_lsu_unit #(
|
|||
always @(posedge clk) begin
|
||||
if ((| dcache_req_if.valid) && dcache_req_if.ready) begin
|
||||
if (dcache_req_if.rw)
|
||||
$display("%t: D$%0d Rw Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, data=%0h",
|
||||
$display("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, data=%0h",
|
||||
$time, CORE_ID, req_wid, req_pc, dcache_req_if.valid, req_address, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data);
|
||||
else
|
||||
$display("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, rd=%0d, byteen=%0h",
|
||||
$time, CORE_ID, req_wid, req_pc, dcache_req_if.valid, req_address, dcache_req_if.tag, req_rd, dcache_req_if.byteen, dcache_req_if.data);
|
||||
$display("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, rd=%0d",
|
||||
$time, CORE_ID, req_wid, req_pc, dcache_req_if.valid, req_address, dcache_req_if.tag, dcache_req_if.byteen, req_rd);
|
||||
end
|
||||
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
|
||||
$display("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h",
|
||||
|
|
|
@ -5,6 +5,8 @@ module VX_mem_arb #(
|
|||
parameter DATA_WIDTH = 1,
|
||||
parameter TAG_IN_WIDTH = 1,
|
||||
parameter TAG_OUT_WIDTH = 1,
|
||||
parameter BUFFERED_REQ = 0,
|
||||
parameter BUFFERED_RSP = 0,
|
||||
|
||||
parameter DATA_SIZE = (DATA_WIDTH / 8),
|
||||
parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE),
|
||||
|
@ -48,38 +50,50 @@ module VX_mem_arb #(
|
|||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] data_in;
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_merged_data_in;
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign data_in[i] = {{req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
|
||||
assign req_merged_data_in[i] = {{req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
|
||||
end
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (REQ_DATAW),
|
||||
.IN_BUFFER (NUM_REQS >= 4),
|
||||
.OUT_BUFFER (NUM_REQS >= 4)
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (REQ_DATAW),
|
||||
.BUFFERED (BUFFERED_REQ)
|
||||
) req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (req_valid_in),
|
||||
.data_in (data_in),
|
||||
.ready_in (req_ready_in),
|
||||
.valid_out (req_valid_out),
|
||||
.data_out ({req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}),
|
||||
.ready_out (req_ready_out)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (req_valid_in),
|
||||
.data_in (req_merged_data_in),
|
||||
.ready_in (req_ready_in),
|
||||
.valid_out (req_valid_out),
|
||||
.data_out ({req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}),
|
||||
.ready_out (req_ready_out)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in [LOG_NUM_REQS-1:0];
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign rsp_valid_out [i] = rsp_valid_in && (rsp_sel == LOG_NUM_REQS'(i));
|
||||
assign rsp_tag_out [i] = rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH];
|
||||
assign rsp_data_out [i] = rsp_data_in;
|
||||
|
||||
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_merged_data_out;
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_merged_data_out[i];
|
||||
end
|
||||
|
||||
assign rsp_ready_in = rsp_ready_out [rsp_sel];
|
||||
|
||||
VX_stream_demux #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (RSP_DATAW),
|
||||
.BUFFERED (BUFFERED_RSP)
|
||||
) rsp_demux (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel (rsp_sel),
|
||||
.valid_in (rsp_valid_in),
|
||||
.data_in ({rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH], rsp_data_in}),
|
||||
.ready_in (rsp_ready_in),
|
||||
.valid_out (rsp_valid_out),
|
||||
.data_out (rsp_merged_data_out),
|
||||
.ready_out (rsp_ready_out)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
|
|
|
@ -242,90 +242,96 @@ module VX_mem_unit # (
|
|||
|
||||
// Miss status
|
||||
`UNUSED_PIN (miss_vec)
|
||||
);
|
||||
);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`SCACHE_ID),
|
||||
.CACHE_SIZE (`SMEM_SIZE),
|
||||
.BANK_LINE_SIZE (`SBANK_LINE_SIZE),
|
||||
.NUM_BANKS (`SNUM_BANKS),
|
||||
.WORD_SIZE (`SWORD_SIZE),
|
||||
.NUM_REQS (`SNUM_REQUESTS),
|
||||
.CREQ_SIZE (`SCREQ_SIZE),
|
||||
.MSHR_SIZE (8),
|
||||
.DRSQ_SIZE (1),
|
||||
.SREQ_SIZE (1),
|
||||
.CRSQ_SIZE (`SCRSQ_SIZE),
|
||||
.DREQ_SIZE (1),
|
||||
.SRSQ_SIZE (1),
|
||||
.DRAM_ENABLE (0),
|
||||
.FLUSH_ENABLE (0),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
|
||||
.DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH)
|
||||
) smem (
|
||||
`SCOPE_BIND_VX_mem_unit_smem
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
if (`SM_ENABLE) begin
|
||||
|
||||
// Core request
|
||||
.core_req_valid (smem_req_if.valid),
|
||||
.core_req_rw (smem_req_if.rw),
|
||||
.core_req_byteen (smem_req_if.byteen),
|
||||
.core_req_addr (smem_req_if.addr),
|
||||
.core_req_data (smem_req_if.data),
|
||||
.core_req_tag (smem_req_if.tag),
|
||||
.core_req_ready (smem_req_if.ready),
|
||||
VX_cache #(
|
||||
.CACHE_ID (`SCACHE_ID),
|
||||
.CACHE_SIZE (`SMEM_SIZE),
|
||||
.BANK_LINE_SIZE (`SBANK_LINE_SIZE),
|
||||
.NUM_BANKS (`SNUM_BANKS),
|
||||
.WORD_SIZE (`SWORD_SIZE),
|
||||
.NUM_REQS (`SNUM_REQUESTS),
|
||||
.CREQ_SIZE (`SCREQ_SIZE),
|
||||
.MSHR_SIZE (8),
|
||||
.DRSQ_SIZE (1),
|
||||
.SREQ_SIZE (1),
|
||||
.CRSQ_SIZE (`SCRSQ_SIZE),
|
||||
.DREQ_SIZE (1),
|
||||
.SRSQ_SIZE (1),
|
||||
.DRAM_ENABLE (0),
|
||||
.FLUSH_ENABLE (0),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
|
||||
.DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH)
|
||||
) smem (
|
||||
`SCOPE_BIND_VX_mem_unit_smem
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (smem_rsp_if.valid),
|
||||
.core_rsp_data (smem_rsp_if.data),
|
||||
.core_rsp_tag (smem_rsp_if.tag),
|
||||
.core_rsp_ready (smem_rsp_if.ready),
|
||||
// Core request
|
||||
.core_req_valid (smem_req_if.valid),
|
||||
.core_req_rw (smem_req_if.rw),
|
||||
.core_req_byteen (smem_req_if.byteen),
|
||||
.core_req_addr (smem_req_if.addr),
|
||||
.core_req_data (smem_req_if.data),
|
||||
.core_req_tag (smem_req_if.tag),
|
||||
.core_req_ready (smem_req_if.ready),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_smem_if),
|
||||
`endif
|
||||
// Core response
|
||||
.core_rsp_valid (smem_rsp_if.valid),
|
||||
.core_rsp_data (smem_rsp_if.data),
|
||||
.core_rsp_tag (smem_rsp_if.tag),
|
||||
.core_rsp_ready (smem_rsp_if.ready),
|
||||
|
||||
// DRAM request
|
||||
`UNUSED_PIN (dram_req_valid),
|
||||
`UNUSED_PIN (dram_req_rw),
|
||||
`UNUSED_PIN (dram_req_byteen),
|
||||
`UNUSED_PIN (dram_req_addr),
|
||||
`UNUSED_PIN (dram_req_data),
|
||||
`UNUSED_PIN (dram_req_tag),
|
||||
.dram_req_ready (1'b0),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_smem_if),
|
||||
`endif
|
||||
|
||||
// DRAM response
|
||||
.dram_rsp_valid (0),
|
||||
.dram_rsp_data (0),
|
||||
.dram_rsp_tag (0),
|
||||
`UNUSED_PIN (dram_rsp_ready),
|
||||
// DRAM request
|
||||
`UNUSED_PIN (dram_req_valid),
|
||||
`UNUSED_PIN (dram_req_rw),
|
||||
`UNUSED_PIN (dram_req_byteen),
|
||||
`UNUSED_PIN (dram_req_addr),
|
||||
`UNUSED_PIN (dram_req_data),
|
||||
`UNUSED_PIN (dram_req_tag),
|
||||
.dram_req_ready (1'b0),
|
||||
|
||||
// Snoop request
|
||||
.snp_req_valid (1'b0),
|
||||
.snp_req_addr (0),
|
||||
.snp_req_inv (0),
|
||||
.snp_req_tag (0),
|
||||
`UNUSED_PIN (snp_req_ready),
|
||||
// DRAM response
|
||||
.dram_rsp_valid (0),
|
||||
.dram_rsp_data (0),
|
||||
.dram_rsp_tag (0),
|
||||
`UNUSED_PIN (dram_rsp_ready),
|
||||
|
||||
// Snoop response
|
||||
`UNUSED_PIN (snp_rsp_valid),
|
||||
`UNUSED_PIN (snp_rsp_tag),
|
||||
.snp_rsp_ready (1'b0),
|
||||
// Snoop request
|
||||
.snp_req_valid (1'b0),
|
||||
.snp_req_addr (0),
|
||||
.snp_req_inv (0),
|
||||
.snp_req_tag (0),
|
||||
`UNUSED_PIN (snp_req_ready),
|
||||
|
||||
// Miss status
|
||||
`UNUSED_PIN (miss_vec)
|
||||
);
|
||||
// Snoop response
|
||||
`UNUSED_PIN (snp_rsp_valid),
|
||||
`UNUSED_PIN (snp_rsp_tag),
|
||||
.snp_rsp_ready (1'b0),
|
||||
|
||||
// Miss status
|
||||
`UNUSED_PIN (miss_vec)
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (2),
|
||||
.DATA_WIDTH (`DDRAM_LINE_WIDTH),
|
||||
.ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`DDRAM_TAG_WIDTH),
|
||||
.TAG_OUT_WIDTH (`XDRAM_TAG_WIDTH)
|
||||
.TAG_OUT_WIDTH (`XDRAM_TAG_WIDTH),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (0)
|
||||
) dram_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -32,19 +32,20 @@ module VX_mul_unit #(
|
|||
wire mulq_pop = valid_out && ready_out;
|
||||
|
||||
VX_cam_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
|
||||
.SIZE (`MULQ_SIZE)
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
|
||||
.SIZE (`MULQ_SIZE),
|
||||
.FASTRAM (1)
|
||||
) req_metadata_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.acquire_slot (mulq_push),
|
||||
.write_addr (tag_in),
|
||||
.read_addr (tag_out),
|
||||
.release_addr (tag_out),
|
||||
.write_data ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.rd, mul_req_if.wb}),
|
||||
.read_data ({rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}),
|
||||
.release_slot (mulq_pop),
|
||||
.full (mulq_full)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.acquire_slot (mulq_push),
|
||||
.write_addr (tag_in),
|
||||
.read_addr (tag_out),
|
||||
.release_addr (tag_out),
|
||||
.write_data ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.rd, mul_req_if.wb}),
|
||||
.read_data ({rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}),
|
||||
.release_slot (mulq_pop),
|
||||
.full (mulq_full)
|
||||
);
|
||||
|
||||
wire valid_in = mul_req_if.valid && ~mulq_full;
|
||||
|
|
|
@ -51,7 +51,7 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define USE_FAST_BRAM (* ramstyle="mlab" *)
|
||||
`define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *)
|
||||
`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -29,8 +29,8 @@ module VX_warp_sched #(
|
|||
// Lock warp until instruction decode to resolve branches
|
||||
reg [`NUM_WARPS-1:0] fetch_lock;
|
||||
|
||||
reg [`NUM_THREADS-1:0] thread_masks[`NUM_WARPS-1:0];
|
||||
reg [31:0] warp_pcs[`NUM_WARPS-1:0];
|
||||
reg [`NUM_THREADS-1:0] thread_masks [`NUM_WARPS-1:0];
|
||||
reg [31:0] warp_pcs [`NUM_WARPS-1:0];
|
||||
|
||||
// barriers
|
||||
reg [`NUM_WARPS-1:0] barrier_stall_mask[`NUM_BARRIERS-1:0]; // warps waiting on barrier
|
||||
|
@ -180,11 +180,11 @@ module VX_warp_sched #(
|
|||
|
||||
// split/join stack management
|
||||
|
||||
wire [(1+32+`NUM_THREADS-1):0] ipdom[`NUM_WARPS-1:0];
|
||||
wire [(1+32+`NUM_THREADS-1):0] ipdom [`NUM_WARPS-1:0];
|
||||
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[warp_ctl_if.wid]};
|
||||
wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, warp_ctl_if.split.pc, warp_ctl_if.split.else_mask};
|
||||
|
||||
assign {join_fall, join_pc, join_tm} = ipdom[join_if.wid];
|
||||
assign {join_fall, join_pc, join_tm} = ipdom [join_if.wid];
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||
wire push = warp_ctl_if.valid
|
||||
|
@ -196,7 +196,7 @@ module VX_warp_sched #(
|
|||
|
||||
VX_ipdom_stack #(
|
||||
.WIDTH(1+32+`NUM_THREADS),
|
||||
.DEPTH(`NT_BITS+1)
|
||||
.DEPTH(2 ** (`NT_BITS+1))
|
||||
) ipdom_stack (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
|
|
|
@ -71,8 +71,7 @@ module VX_writeback #(
|
|||
fpu_valid ? fpu_commit_if.data :
|
||||
0;
|
||||
|
||||
wire stall =~writeback_if.ready && writeback_if.valid;
|
||||
always @(*) assert(writeback_if.ready); // the writeback currently has no backpressure from issue stage
|
||||
wire stall = ~writeback_if.ready && writeback_if.valid;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)),
|
||||
|
|
107
hw/rtl/Vortex.v
107
hw/rtl/Vortex.v
|
@ -183,7 +183,9 @@ module Vortex (
|
|||
.NUM_REQS (`NUM_CLUSTERS),
|
||||
.WORD_SIZE (4),
|
||||
.TAG_IN_WIDTH (`L2CORE_TAG_WIDTH),
|
||||
.TAG_OUT_WIDTH (`L3CORE_TAG_WIDTH)
|
||||
.TAG_OUT_WIDTH (`L3CORE_TAG_WIDTH),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (`NUM_CLUSTERS >= 4)
|
||||
) io_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -220,9 +222,11 @@ module Vortex (
|
|||
);
|
||||
|
||||
VX_csr_io_arb #(
|
||||
.NUM_REQS (`NUM_CLUSTERS),
|
||||
.DATA_WIDTH (32),
|
||||
.ADDR_WIDTH (12)
|
||||
.NUM_REQS (`NUM_CLUSTERS),
|
||||
.DATA_WIDTH (32),
|
||||
.ADDR_WIDTH (12),
|
||||
.BUFFERED_REQ (`NUM_CLUSTERS >= 4),
|
||||
.BUFFERED_RSP (1)
|
||||
) csr_io_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -270,7 +274,8 @@ module Vortex (
|
|||
.DST_ADDR_WIDTH (`L2DRAM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`L3SNP_TAG_WIDTH),
|
||||
.TAG_OUT_WIDTH (`L2SNP_TAG_WIDTH),
|
||||
.SREQ_SIZE (`L3SREQ_SIZE)
|
||||
.SREQ_SIZE (`L3SREQ_SIZE),
|
||||
.BUFFERED (`NUM_CLUSTERS >= 4)
|
||||
) snp_forwarder (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -303,49 +308,6 @@ module Vortex (
|
|||
VX_perf_cache_if perf_l3cache_if();
|
||||
`endif
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid_qual;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw_qual;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen_qual;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr_qual;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data_qual;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag_qual;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_ready_qual;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid_unqual;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data_unqual;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag_unqual;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready_unqual;
|
||||
|
||||
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (1 + `L2DRAM_BYTEEN_WIDTH + `L2DRAM_ADDR_WIDTH + `L2DRAM_LINE_WIDTH + `L2DRAM_TAG_WIDTH),
|
||||
.PASSTHRU (`NUM_CLUSTERS < 4)
|
||||
) dram_req_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (per_cluster_dram_req_valid[i]),
|
||||
.data_in ({per_cluster_dram_req_rw[i], per_cluster_dram_req_byteen[i], per_cluster_dram_req_addr[i], per_cluster_dram_req_data[i], per_cluster_dram_req_tag[i]}),
|
||||
.ready_in (per_cluster_dram_req_ready[i]),
|
||||
.valid_out (per_cluster_dram_req_valid_qual[i]),
|
||||
.data_out ({per_cluster_dram_req_rw_qual[i], per_cluster_dram_req_byteen_qual[i], per_cluster_dram_req_addr_qual[i], per_cluster_dram_req_data_qual[i], per_cluster_dram_req_tag_qual[i]}),
|
||||
.ready_out (per_cluster_dram_req_ready_qual[i])
|
||||
);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`L2DRAM_LINE_WIDTH + `L2DRAM_TAG_WIDTH),
|
||||
.PASSTHRU (1)
|
||||
) core_rsp_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (per_cluster_dram_rsp_valid_unqual[i]),
|
||||
.data_in ({per_cluster_dram_rsp_data_unqual[i], per_cluster_dram_rsp_tag_unqual[i]}),
|
||||
.ready_in (per_cluster_dram_rsp_ready_unqual[i]),
|
||||
.valid_out (per_cluster_dram_rsp_valid[i]),
|
||||
.data_out ({per_cluster_dram_rsp_data[i], per_cluster_dram_rsp_tag[i]}),
|
||||
.ready_out (per_cluster_dram_rsp_ready[i])
|
||||
);
|
||||
end
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`L3CACHE_ID),
|
||||
.CACHE_SIZE (`L3CACHE_SIZE),
|
||||
|
@ -378,19 +340,19 @@ module Vortex (
|
|||
`endif
|
||||
|
||||
// Core request
|
||||
.core_req_valid (per_cluster_dram_req_valid_qual),
|
||||
.core_req_rw (per_cluster_dram_req_rw_qual),
|
||||
.core_req_byteen (per_cluster_dram_req_byteen_qual),
|
||||
.core_req_addr (per_cluster_dram_req_addr_qual),
|
||||
.core_req_data (per_cluster_dram_req_data_qual),
|
||||
.core_req_tag (per_cluster_dram_req_tag_qual),
|
||||
.core_req_ready (per_cluster_dram_req_ready_qual),
|
||||
.core_req_valid (per_cluster_dram_req_valid),
|
||||
.core_req_rw (per_cluster_dram_req_rw),
|
||||
.core_req_byteen (per_cluster_dram_req_byteen),
|
||||
.core_req_addr (per_cluster_dram_req_addr),
|
||||
.core_req_data (per_cluster_dram_req_data),
|
||||
.core_req_tag (per_cluster_dram_req_tag),
|
||||
.core_req_ready (per_cluster_dram_req_ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (per_cluster_dram_rsp_valid_unqual),
|
||||
.core_rsp_data (per_cluster_dram_rsp_data_unqual),
|
||||
.core_rsp_tag (per_cluster_dram_rsp_tag_unqual),
|
||||
.core_rsp_ready (per_cluster_dram_rsp_ready_unqual),
|
||||
.core_rsp_valid (per_cluster_dram_rsp_valid),
|
||||
.core_rsp_data (per_cluster_dram_rsp_data),
|
||||
.core_rsp_tag (per_cluster_dram_rsp_tag),
|
||||
.core_rsp_ready (per_cluster_dram_rsp_ready),
|
||||
|
||||
// DRAM request
|
||||
.dram_req_valid (dram_req_valid),
|
||||
|
@ -429,7 +391,9 @@ module Vortex (
|
|||
.NUM_REQS (`NUM_CLUSTERS),
|
||||
.DATA_WIDTH (`L3DRAM_LINE_WIDTH),
|
||||
.TAG_IN_WIDTH (`L2DRAM_TAG_WIDTH),
|
||||
.TAG_OUT_WIDTH (`L3DRAM_TAG_WIDTH)
|
||||
.TAG_OUT_WIDTH (`L3DRAM_TAG_WIDTH),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (`NUM_CLUSTERS >= 4)
|
||||
) dram_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -476,28 +440,23 @@ module Vortex (
|
|||
|
||||
`SCOPE_ASSIGN (reset, reset);
|
||||
|
||||
`SCOPE_ASSIGN (dram_req_fire, dram_req_valid && dram_req_ready);
|
||||
`SCOPE_ASSIGN (dram_req_addr, `TO_FULL_ADDR(dram_req_addr));
|
||||
`SCOPE_ASSIGN (dram_req_rw, dram_req_rw);
|
||||
`SCOPE_ASSIGN (dram_req_byteen,dram_req_byteen);
|
||||
`SCOPE_ASSIGN (dram_req_data, dram_req_data);
|
||||
`SCOPE_ASSIGN (dram_req_tag, dram_req_tag);
|
||||
|
||||
`SCOPE_ASSIGN (dram_rsp_fire, dram_rsp_valid && dram_rsp_ready);
|
||||
`SCOPE_ASSIGN (dram_rsp_data, dram_rsp_data);
|
||||
`SCOPE_ASSIGN (dram_rsp_tag, dram_rsp_tag);
|
||||
|
||||
`SCOPE_ASSIGN (dram_req_fire, dram_req_valid && dram_req_ready);
|
||||
`SCOPE_ASSIGN (dram_req_addr, `TO_FULL_ADDR(dram_req_addr));
|
||||
`SCOPE_ASSIGN (dram_req_rw, dram_req_rw);
|
||||
`SCOPE_ASSIGN (dram_req_byteen, dram_req_byteen);
|
||||
`SCOPE_ASSIGN (dram_req_data, dram_req_data);
|
||||
`SCOPE_ASSIGN (dram_req_tag, dram_req_tag);
|
||||
`SCOPE_ASSIGN (dram_rsp_fire, dram_rsp_valid && dram_rsp_ready);
|
||||
`SCOPE_ASSIGN (dram_rsp_data, dram_rsp_data);
|
||||
`SCOPE_ASSIGN (dram_rsp_tag, dram_rsp_tag);
|
||||
`SCOPE_ASSIGN (snp_req_fire, snp_req_valid && snp_req_ready);
|
||||
`SCOPE_ASSIGN (snp_req_addr, `TO_FULL_ADDR(snp_req_addr));
|
||||
`SCOPE_ASSIGN (snp_req_inv, snp_req_inv);
|
||||
`SCOPE_ASSIGN (snp_req_tag, snp_req_tag);
|
||||
|
||||
`SCOPE_ASSIGN (snp_rsp_fire, snp_rsp_valid && snp_rsp_ready);
|
||||
`SCOPE_ASSIGN (snp_rsp_tag, snp_rsp_tag);
|
||||
|
||||
`SCOPE_ASSIGN (snp_rsp_fire, snp_rsp_valid && snp_rsp_ready);
|
||||
`SCOPE_ASSIGN (snp_rsp_tag, snp_rsp_tag);
|
||||
|
||||
`SCOPE_ASSIGN (busy, busy);
|
||||
|
||||
`ifdef DBG_PRINT_DRAM
|
||||
|
|
|
@ -59,20 +59,25 @@ module VX_avs_wrapper #(
|
|||
+ RD_QUEUE_ADDRW'((avs_reqq_push && !avs_rspq_pop) ? 1 :
|
||||
(avs_rspq_pop && !avs_reqq_push) ? -1 : 0);
|
||||
|
||||
reg rsp_queue_ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
avs_burstcount_r <= 1;
|
||||
avs_bankselect_r <= 0;
|
||||
avs_pending_reads <= 0;
|
||||
rsp_queue_ready <= 1;
|
||||
end else begin
|
||||
avs_pending_reads <= avs_pending_reads_n;
|
||||
rsp_queue_ready <= (avs_pending_reads_n != RD_QUEUE_SIZE);
|
||||
end
|
||||
end
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW (REQ_TAGW),
|
||||
.SIZE (RD_QUEUE_SIZE),
|
||||
.BUFFERED (1)
|
||||
.DATAW (REQ_TAGW),
|
||||
.SIZE (RD_QUEUE_SIZE),
|
||||
.BUFFERED(1),
|
||||
.FASTRAM (1)
|
||||
) rd_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -86,9 +91,10 @@ module VX_avs_wrapper #(
|
|||
);
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW (AVS_DATAW),
|
||||
.SIZE (RD_QUEUE_SIZE),
|
||||
.BUFFERED (1)
|
||||
.DATAW (AVS_DATAW),
|
||||
.SIZE (RD_QUEUE_SIZE),
|
||||
.BUFFERED(1),
|
||||
.FASTRAM (1)
|
||||
) rd_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -101,8 +107,6 @@ module VX_avs_wrapper #(
|
|||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
wire rsp_queue_ready = (avs_pending_reads != RD_QUEUE_SIZE);
|
||||
|
||||
assign avs_read = dram_req_valid && !dram_req_rw && rsp_queue_ready;
|
||||
assign avs_write = dram_req_valid && dram_req_rw && rsp_queue_ready;
|
||||
assign avs_address = dram_req_addr;
|
||||
|
|
|
@ -700,8 +700,8 @@ always @(posedge clk) begin
|
|||
end
|
||||
|
||||
cci_rd_req_enable <= (STATE_WRITE == state)
|
||||
&& (cci_rd_req_ctr_next < cmd_data_size)
|
||||
&& (cci_pending_reads_next < CCI_RD_QUEUE_SIZE)
|
||||
&& (cci_rd_req_ctr_next != cmd_data_size)
|
||||
&& (cci_pending_reads_next != CCI_RD_QUEUE_SIZE)
|
||||
&& !cp2af_sRxPort.c0TxAlmFull;
|
||||
|
||||
if (cci_rd_req_fire) begin
|
||||
|
@ -741,8 +741,9 @@ always @(posedge clk) begin
|
|||
end
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW(CCI_RD_RQ_DATAW),
|
||||
.SIZE(CCI_RD_QUEUE_SIZE)
|
||||
.DATAW (CCI_RD_RQ_DATAW),
|
||||
.SIZE (CCI_RD_QUEUE_SIZE),
|
||||
.FASTRAM (1)
|
||||
) cci_rd_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -898,7 +899,7 @@ always @(posedge clk) begin
|
|||
end
|
||||
|
||||
if ((STATE_CLFLUSH == state)
|
||||
&& (snp_req_ctr_next >= snp_req_size)) begin
|
||||
&& (snp_req_ctr_next == snp_req_size)) begin
|
||||
vx_snp_req_valid <= 0;
|
||||
end
|
||||
|
||||
|
|
338
hw/rtl/cache/VX_bank.v
vendored
338
hw/rtl/cache/VX_bank.v
vendored
|
@ -111,36 +111,24 @@ module VX_bank #(
|
|||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
/* verilator lint_off UNUSED */
|
||||
wire[31:0] debug_pc_st0;
|
||||
wire[`NR_BITS-1:0] debug_rd_st0;
|
||||
wire[`NW_BITS-1:0] debug_wid_st0;
|
||||
wire debug_rw_st0;
|
||||
wire[WORD_SIZE-1:0] debug_byteen_st0;
|
||||
wire[`REQS_BITS-1:0] debug_tid_st0;
|
||||
wire [31:0] debug_pc_st0;
|
||||
wire [`NR_BITS-1:0] debug_rd_st0;
|
||||
wire [`NW_BITS-1:0] debug_wid_st0;
|
||||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st0;
|
||||
|
||||
wire[31:0] debug_pc_st1;
|
||||
wire[`NR_BITS-1:0] debug_rd_st1;
|
||||
wire[`NW_BITS-1:0] debug_wid_st1;
|
||||
wire debug_rw_st1;
|
||||
wire[WORD_SIZE-1:0] debug_byteen_st1;
|
||||
wire[`REQS_BITS-1:0] debug_tid_st1;
|
||||
|
||||
wire [31:0] debug_pc_st1;
|
||||
wire [`NR_BITS-1:0] debug_rd_st1;
|
||||
wire [`NW_BITS-1:0] debug_wid_st1;
|
||||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1;
|
||||
|
||||
wire[31:0] debug_pc_st2;
|
||||
wire[`NR_BITS-1:0] debug_rd_st2;
|
||||
wire[`NW_BITS-1:0] debug_wid_st2;
|
||||
wire debug_rw_st2;
|
||||
wire[WORD_SIZE-1:0] debug_byteen_st2;
|
||||
wire[`REQS_BITS-1:0] debug_tid_st2;
|
||||
wire [31:0] debug_pc_st2;
|
||||
wire [`NR_BITS-1:0] debug_rd_st2;
|
||||
wire [`NW_BITS-1:0] debug_wid_st2;
|
||||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st2;
|
||||
|
||||
wire[31:0] debug_pc_st3;
|
||||
wire[`NR_BITS-1:0] debug_rd_st3;
|
||||
wire[`NW_BITS-1:0] debug_wid_st3;
|
||||
wire debug_rw_st3;
|
||||
wire[WORD_SIZE-1:0] debug_byteen_st3;
|
||||
wire[`REQS_BITS-1:0] debug_tid_st3;
|
||||
wire [31:0] debug_pc_st3;
|
||||
wire [`NR_BITS-1:0] debug_rd_st3;
|
||||
wire [`NW_BITS-1:0] debug_wid_st3;
|
||||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st3;
|
||||
/* verilator lint_on UNUSED */
|
||||
`endif
|
||||
|
@ -159,9 +147,10 @@ module VX_bank #(
|
|||
wire sreq_push = snp_req_valid && snp_req_ready;
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW(`LINE_ADDR_WIDTH + 1 + SNP_TAG_WIDTH),
|
||||
.SIZE(SREQ_SIZE),
|
||||
.BUFFERED(1)
|
||||
.DATAW (`LINE_ADDR_WIDTH + 1 + SNP_TAG_WIDTH),
|
||||
.SIZE (SREQ_SIZE),
|
||||
.BUFFERED (1),
|
||||
.FASTRAM (1)
|
||||
) snp_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -200,9 +189,10 @@ module VX_bank #(
|
|||
assign dram_rsp_ready = !drsq_full;
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW(`LINE_ADDR_WIDTH + $bits(dram_rsp_data)),
|
||||
.SIZE(DRSQ_SIZE),
|
||||
.BUFFERED(1)
|
||||
.DATAW (`LINE_ADDR_WIDTH + $bits(dram_rsp_data)),
|
||||
.SIZE (DRSQ_SIZE),
|
||||
.BUFFERED (1),
|
||||
.FASTRAM (1)
|
||||
) dram_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -246,30 +236,30 @@ module VX_bank #(
|
|||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
|
||||
) core_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Enqueue
|
||||
.push (creq_push),
|
||||
.tag_in (core_req_tag),
|
||||
.valids_in (core_req_valid),
|
||||
.rw_in (core_req_rw),
|
||||
.byteen_in (core_req_byteen),
|
||||
.addr_in (core_req_addr),
|
||||
.writedata_in (core_req_data),
|
||||
.push (creq_push),
|
||||
.tag_in (core_req_tag),
|
||||
.valids_in (core_req_valid),
|
||||
.rw_in (core_req_rw),
|
||||
.byteen_in (core_req_byteen),
|
||||
.addr_in (core_req_addr),
|
||||
.wdata_in (core_req_data),
|
||||
|
||||
// Dequeue
|
||||
.pop (creq_pop),
|
||||
.tag_out (creq_tag_st0),
|
||||
.tid_out (creq_tid_st0),
|
||||
.rw_out (creq_rw_st0),
|
||||
.byteen_out (creq_byteen_st0),
|
||||
.addr_out (creq_addr_st0),
|
||||
.writedata_out (creq_writeword_st0),
|
||||
.pop (creq_pop),
|
||||
.tag_out (creq_tag_st0),
|
||||
.tid_out (creq_tid_st0),
|
||||
.rw_out (creq_rw_st0),
|
||||
.byteen_out (creq_byteen_st0),
|
||||
.addr_out (creq_addr_st0),
|
||||
.wdata_out (creq_writeword_st0),
|
||||
|
||||
// States
|
||||
.empty (creq_empty),
|
||||
.full (creq_full)
|
||||
.empty (creq_empty),
|
||||
.full (creq_full)
|
||||
);
|
||||
|
||||
reg [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size;
|
||||
|
@ -277,7 +267,7 @@ module VX_bank #(
|
|||
reg mshr_going_full;
|
||||
wire mshr_pop;
|
||||
wire mshr_valid_st0;
|
||||
wire[`REQS_BITS-1:0] mshr_tid_st0;
|
||||
wire [`REQS_BITS-1:0] mshr_tid_st0;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] mshr_addr_st0;
|
||||
wire [`UP(`WORD_SELECT_WIDTH)-1:0] mshr_wsel_st0;
|
||||
wire [`WORD_WIDTH-1:0] mshr_writeword_st0;
|
||||
|
@ -286,6 +276,7 @@ module VX_bank #(
|
|||
wire [WORD_SIZE-1:0] mshr_byteen_st0;
|
||||
wire mshr_is_snp_st0;
|
||||
wire mshr_snp_inv_st0;
|
||||
wire mshr_pending_hazard_unqual_st0;
|
||||
|
||||
wire is_fill_st0;
|
||||
wire is_mshr_st0;
|
||||
|
@ -295,9 +286,11 @@ module VX_bank #(
|
|||
wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st0;
|
||||
wire [`WORD_WIDTH-1:0] writeword_st0;
|
||||
wire [`BANK_LINE_WIDTH-1:0] writedata_st0;
|
||||
wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st0;
|
||||
wire snp_inv_st0;
|
||||
wire mshr_pending_hazard_unqual_st0;
|
||||
wire [`REQ_TAG_WIDTH-1:0] tag_st0;
|
||||
wire mem_rw_st0;
|
||||
wire [WORD_SIZE-1:0] byteen_st0;
|
||||
wire [`REQS_BITS-1:0] req_tid_st0;
|
||||
|
||||
wire is_fill_st1;
|
||||
wire is_mshr_st1;
|
||||
|
@ -306,32 +299,26 @@ module VX_bank #(
|
|||
wire [`LINE_ADDR_WIDTH-1:0] addr_st1;
|
||||
wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1;
|
||||
wire [`WORD_WIDTH-1:0] writeword_st1;
|
||||
wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st1;
|
||||
wire [`BANK_LINE_WIDTH-1:0] writedata_st1;
|
||||
wire snp_inv_st1;
|
||||
|
||||
wire [`TAG_SELECT_BITS-1:0] readtag_st1;
|
||||
wire miss_st1;
|
||||
wire force_miss_st1;
|
||||
wire dirty_st1;
|
||||
wire [WORD_SIZE-1:0] mem_byteen_st1;
|
||||
wire writeen_st1;
|
||||
wire mem_rw_st1;
|
||||
`DEBUG_BEGIN
|
||||
wire [`REQ_TAG_WIDTH-1:0] tag_st1;
|
||||
wire [`REQS_BITS-1:0] tid_st1;
|
||||
`DEBUG_END
|
||||
wire mem_rw_st1;
|
||||
wire [WORD_SIZE-1:0] byteen_st1;
|
||||
wire [`REQS_BITS-1:0] req_tid_st1;
|
||||
|
||||
wire valid_st2;
|
||||
wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st2;
|
||||
wire [`WORD_WIDTH-1:0] writeword_st2;
|
||||
wire [`WORD_WIDTH-1:0] readword_st2;
|
||||
wire [`WORD_WIDTH-1:0] writeword_st2;
|
||||
wire [`BANK_LINE_WIDTH-1:0] readdata_st2;
|
||||
wire [`BANK_LINE_WIDTH-1:0] writedata_st2;
|
||||
wire [WORD_SIZE-1:0] mem_byteen_st2;
|
||||
wire dirty_st2;
|
||||
wire [BANK_LINE_SIZE-1:0] dirtyb_st2;
|
||||
wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st2;
|
||||
wire [`TAG_SELECT_BITS-1:0] readtag_st2;
|
||||
wire is_fill_st2;
|
||||
wire is_snp_st2;
|
||||
|
@ -342,15 +329,22 @@ module VX_bank #(
|
|||
wire[`LINE_ADDR_WIDTH-1:0] addr_st2;
|
||||
wire writeen_st2;
|
||||
wire core_req_hit_st2;
|
||||
wire incoming_fill_st2;
|
||||
wire [`REQ_TAG_WIDTH-1:0] tag_st2;
|
||||
wire mem_rw_st2;
|
||||
wire [WORD_SIZE-1:0] byteen_st2;
|
||||
wire [`REQS_BITS-1:0] req_tid_st2;
|
||||
|
||||
wire valid_st3;
|
||||
wire is_mshr_st3;
|
||||
wire miss_st3;
|
||||
wire force_miss_st3;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] addr_st3;
|
||||
|
||||
wire core_req_hit_st1;
|
||||
|
||||
wire [`REQ_TAG_WIDTH-1:0] tag_st3;
|
||||
wire mem_rw_st3;
|
||||
wire [WORD_SIZE-1:0] byteen_st3;
|
||||
wire [`REQS_BITS-1:0] req_tid_st3;
|
||||
|
||||
wire mshr_push_stall;
|
||||
wire crsq_push_stall;
|
||||
wire dreq_push_stall;
|
||||
|
@ -360,7 +354,7 @@ module VX_bank #(
|
|||
wire is_mshr_miss_st2 = valid_st2 && is_mshr_st2 && (miss_st2 || force_miss_st2);
|
||||
wire is_mshr_miss_st3 = valid_st3 && is_mshr_st3 && (miss_st3 || force_miss_st3);
|
||||
|
||||
wire creq_commit = valid_st1 && core_req_hit_st1 && !pipeline_stall;
|
||||
wire creq_commit = valid_st2 && core_req_hit_st2 && !pipeline_stall;
|
||||
|
||||
// determine which queue to pop next in piority order
|
||||
wire mshr_pop_unqual = mshr_valid_st0;
|
||||
|
@ -383,7 +377,7 @@ module VX_bank #(
|
|||
mshr_going_full <= 0;
|
||||
end else begin
|
||||
mshr_pending_size <= mshr_pending_size_n;
|
||||
mshr_going_full <= (mshr_pending_size_n == MSHR_SIZE);
|
||||
mshr_going_full <= (mshr_pending_size_n == MSHR_SIZE);
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -409,10 +403,25 @@ module VX_bank #(
|
|||
|
||||
assign writedata_st0 = drsq_filldata_st0;
|
||||
|
||||
assign inst_meta_st0 = mshr_pop_unqual ? {`REQ_TAG_WIDTH'(mshr_tag_st0), mshr_rw_st0, mshr_byteen_st0, mshr_tid_st0} :
|
||||
creq_pop_unqual ? {`REQ_TAG_WIDTH'(creq_tag_st0), creq_rw_st0, creq_byteen_st0, creq_tid_st0} :
|
||||
sreq_pop_unqual ? {`REQ_TAG_WIDTH'(sreq_tag_st0), 1'b0, WORD_SIZE'(0), `REQS_BITS'(0)} :
|
||||
0;
|
||||
assign tag_st0 = mshr_pop_unqual ? `REQ_TAG_WIDTH'(mshr_tag_st0) :
|
||||
creq_pop_unqual ? `REQ_TAG_WIDTH'(creq_tag_st0) :
|
||||
sreq_pop_unqual ? `REQ_TAG_WIDTH'(sreq_tag_st0) :
|
||||
0;
|
||||
|
||||
assign mem_rw_st0 = mshr_pop_unqual ? mshr_rw_st0 :
|
||||
creq_pop_unqual ? creq_rw_st0 :
|
||||
sreq_pop_unqual ? 1'b0 :
|
||||
0;
|
||||
|
||||
assign byteen_st0 = mshr_pop_unqual ? mshr_byteen_st0 :
|
||||
creq_pop_unqual ? creq_byteen_st0 :
|
||||
sreq_pop_unqual ? WORD_SIZE'(0) :
|
||||
0;
|
||||
|
||||
assign req_tid_st0 = mshr_pop_unqual ? mshr_tid_st0 :
|
||||
creq_pop_unqual ? creq_tid_st0 :
|
||||
sreq_pop_unqual ? `REQS_BITS'(0) :
|
||||
0;
|
||||
|
||||
assign is_snp_st0 = mshr_pop_unqual ? mshr_is_snp_st0 :
|
||||
sreq_pop_unqual ? 1 :
|
||||
|
@ -428,9 +437,9 @@ module VX_bank #(
|
|||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = inst_meta_st0;
|
||||
assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0} = tag_st0;
|
||||
end else begin
|
||||
assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = 0;
|
||||
assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0} = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
|
@ -443,27 +452,25 @@ if (DRAM_ENABLE) begin
|
|||
|| (valid_st3 && (miss_st3 || force_miss_st3) && (addr_st3 == addr_st0));
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH),
|
||||
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + 1 + `BANK_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH),
|
||||
.R(1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (pipeline_stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({valid_st0, is_mshr_st0, is_snp_st0, snp_inv_st0, mshr_pending_hazard_st0, addr_st0, wsel_st0, writeword_st0, inst_meta_st0, is_fill_st0, writedata_st0}),
|
||||
.data_out ({valid_st1, is_mshr_st1, is_snp_st1, snp_inv_st1, mshr_pending_hazard_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1})
|
||||
.data_in ({valid_st0, is_mshr_st0, is_snp_st0, snp_inv_st0, mshr_pending_hazard_st0, addr_st0, wsel_st0, writeword_st0, is_fill_st0, writedata_st0, mem_rw_st0, byteen_st0, req_tid_st0, tag_st0}),
|
||||
.data_out ({valid_st1, is_mshr_st1, is_snp_st1, snp_inv_st1, mshr_pending_hazard_st1, addr_st1, wsel_st1, writeword_st1, is_fill_st1, writedata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1})
|
||||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1;
|
||||
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1} = tag_st1;
|
||||
end else begin
|
||||
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = 0;
|
||||
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1} = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
assign {tag_st1, mem_rw_st1, mem_byteen_st1, tid_st1} = inst_meta_st1;
|
||||
|
||||
// force miss to ensure commit order when a new request has pending previous requests to same block
|
||||
// also force a miss for msrq requests when previous requests got a miss
|
||||
wire st2_pending_hazard_st1 = valid_st2 && (miss_st2 || force_miss_st2) && (addr_st2 == addr_st1);
|
||||
|
@ -511,20 +518,22 @@ if (DRAM_ENABLE) begin
|
|||
.writeen_out (writeen_st1)
|
||||
);
|
||||
|
||||
assign core_req_hit_st1 = !is_fill_st1 && !is_snp_st1 && !miss_st1 && !force_miss_st1;
|
||||
|
||||
assign misses = miss_st1;
|
||||
|
||||
|
||||
wire core_req_hit_st1 = !is_fill_st1 && !is_snp_st1 && !miss_st1 && !force_miss_st1;
|
||||
|
||||
wire incoming_fill_st1 = !drsq_empty && (addr_st1 == drsq_addr_st0);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `BANK_LINE_WIDTH + WORD_SIZE + `REQ_INST_META_WIDTH),
|
||||
.N(1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `BANK_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH),
|
||||
.R(1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (pipeline_stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({valid_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, dirty_st1, is_snp_st1, snp_inv_st1, is_fill_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_byteen_st1, inst_meta_st1}),
|
||||
.data_out ({valid_st2, core_req_hit_st2, is_mshr_st2, writeen_st2, force_miss_st2, dirty_st2, is_snp_st2, snp_inv_st2, is_fill_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, writedata_st2, mem_byteen_st2, inst_meta_st2})
|
||||
.data_in ({valid_st1, incoming_fill_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, dirty_st1, is_snp_st1, snp_inv_st1, is_fill_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}),
|
||||
.data_out ({valid_st2, incoming_fill_st2, core_req_hit_st2, is_mshr_st2, writeen_st2, force_miss_st2, dirty_st2, is_snp_st2, snp_inv_st2, is_fill_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, writedata_st2, mem_rw_st2, byteen_st2, req_tid_st2, tag_st2})
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
@ -532,9 +541,7 @@ end else begin
|
|||
`UNUSED_VAR (mshr_pending_hazard_unqual_st0)
|
||||
`UNUSED_VAR (drsq_push)
|
||||
`UNUSED_VAR (addr_st0)
|
||||
|
||||
assign {tag_st1, mem_rw_st1, mem_byteen_st1, tid_st1} = inst_meta_st1;
|
||||
|
||||
|
||||
assign is_fill_st1 = is_fill_st0;
|
||||
assign is_mshr_st1 = is_mshr_st0;
|
||||
assign is_snp_st1 = is_snp_st0;
|
||||
|
@ -542,14 +549,17 @@ end else begin
|
|||
assign wsel_st1 = wsel_st0;
|
||||
assign writeword_st1= writeword_st0;
|
||||
assign writedata_st1= writedata_st0;
|
||||
assign inst_meta_st1= inst_meta_st0;
|
||||
assign snp_inv_st1 = snp_inv_st0;
|
||||
assign addr_st1 = creq_addr_st0[`LINE_SELECT_ADDR_RNG];
|
||||
assign dirty_st1 = 0;
|
||||
assign readtag_st1 = 0;
|
||||
assign miss_st1 = 0;
|
||||
assign writeen_st1 = valid_st1 && mem_rw_st1;
|
||||
assign writeen_st1 = mem_rw_st1;
|
||||
assign force_miss_st1 = 0;
|
||||
assign tag_st1 = tag_st0;
|
||||
assign mem_rw_st1 = mem_rw_st0;
|
||||
assign byteen_st1 = byteen_st0;
|
||||
assign req_tid_st1 = req_tid_st0;
|
||||
|
||||
assign is_fill_st2 = is_fill_st1;
|
||||
assign is_mshr_st2 = is_mshr_st1;
|
||||
|
@ -558,20 +568,19 @@ end else begin
|
|||
assign wsel_st2 = wsel_st1;
|
||||
assign writeword_st2= writeword_st1;
|
||||
assign writedata_st2= writedata_st1;
|
||||
assign inst_meta_st2= inst_meta_st1;
|
||||
assign snp_inv_st2 = snp_inv_st1;
|
||||
assign addr_st2 = addr_st1;
|
||||
assign dirty_st2 = dirty_st1;
|
||||
assign mem_byteen_st2 = mem_byteen_st1;
|
||||
assign readtag_st2 = readtag_st1;
|
||||
assign miss_st2 = miss_st1;
|
||||
assign writeen_st2 = writeen_st1;
|
||||
assign force_miss_st2 = force_miss_st1;
|
||||
assign tag_st2 = tag_st1;
|
||||
assign mem_rw_st2 = mem_rw_st1;
|
||||
assign byteen_st2 = byteen_st1;
|
||||
assign req_tid_st2 = req_tid_st1;
|
||||
|
||||
assign core_req_hit_st1 = 0;
|
||||
assign core_req_hit_st2 = 0;
|
||||
assign send_dwb_req_st2 = 0;
|
||||
assign do_writeback_st2 = 0;
|
||||
assign core_req_hit_st2 = 1;
|
||||
assign incoming_fill_st2 = 0;
|
||||
|
||||
assign misses = 0;
|
||||
|
@ -579,9 +588,9 @@ end
|
|||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
|
||||
assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2} = tag_st2;
|
||||
end else begin
|
||||
assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = 0;
|
||||
assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2} = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
|
@ -613,7 +622,7 @@ end
|
|||
.writeen_in (writeen_st2),
|
||||
.is_fill_in (is_fill_st2),
|
||||
.wordsel_in (wsel_st2),
|
||||
.byteen_in (mem_byteen_st2),
|
||||
.byteen_in (byteen_st2),
|
||||
.writeword_in (writeword_st2),
|
||||
.writedata_in (writedata_st2),
|
||||
|
||||
|
@ -628,62 +637,58 @@ end
|
|||
wire [`WORD_WIDTH-1:0] readword_st3;
|
||||
wire [`BANK_LINE_WIDTH-1:0] readdata_st3;
|
||||
wire [BANK_LINE_SIZE-1:0] dirtyb_st3;
|
||||
wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st3;
|
||||
wire [`TAG_SELECT_BITS-1:0] readtag_st3;
|
||||
wire is_snp_st3;
|
||||
wire snp_inv_st3;
|
||||
wire core_req_hit_st3;
|
||||
wire send_dwb_req_st3;
|
||||
wire do_writeback_st3;
|
||||
wire incoming_fill_st3;
|
||||
wire mshr_push_st3;
|
||||
wire crsq_push_st3;
|
||||
wire dreq_push_st3;
|
||||
wire srsq_push_st3;
|
||||
|
||||
// check if a matching fill request is comming
|
||||
wire incoming_fill_dfp_st2 = drsq_push && (addr_st2 == dram_rsp_addr);
|
||||
wire incoming_fill_st0_st2 = !drsq_empty && (addr_st2 == drsq_addr_st0);
|
||||
wire incoming_fill_st1_st2 = is_fill_st1 && (addr_st2 == addr_st1);
|
||||
wire incoming_fill_st2 = incoming_fill_dfp_st2
|
||||
|| incoming_fill_st0_st2
|
||||
|| incoming_fill_st1_st2;
|
||||
wire incoming_fill_qual_st2 = (!drsq_empty && (addr_st2 == drsq_addr_st0)) || incoming_fill_st2;
|
||||
|
||||
wire do_fill_req_st2 = miss_st2
|
||||
&& (!force_miss_st2
|
||||
|| (is_mshr_st2 && addr_st2 != addr_st3))
|
||||
&& !incoming_fill_qual_st2;
|
||||
|
||||
wire send_fill_req_st2 = miss_st2
|
||||
&& (!force_miss_st2
|
||||
|| (is_mshr_st2 && addr_st2 != addr_st3))
|
||||
&& !incoming_fill_st2;
|
||||
wire do_writeback_st2 = dirty_st2
|
||||
&& (is_fill_st2
|
||||
|| (!force_miss_st2 && is_snp_st2));
|
||||
|
||||
wire do_writeback_st2 = dirty_st2
|
||||
&& (is_fill_st2
|
||||
|| (!force_miss_st2 && is_snp_st2));
|
||||
wire mshr_push_st2 = miss_st2 || force_miss_st2;
|
||||
|
||||
wire send_dwb_req_st2 = send_fill_req_st2 || do_writeback_st2;
|
||||
wire crsq_push_st2 = core_req_hit_st2 && !mem_rw_st2;
|
||||
|
||||
wire dreq_push_st2 = do_fill_req_st2 || do_writeback_st2;
|
||||
|
||||
wire srsq_push_st2 = is_snp_st2 && !force_miss_st2;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + 1+ 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH),
|
||||
.N(1 + 1+ 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + 1 + WORD_SIZE + `WORD_WIDTH + `BANK_LINE_WIDTH + `REQS_BITS + `REQ_TAG_WIDTH),
|
||||
.R(1)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (pipeline_stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({valid_st2, core_req_hit_st2, send_dwb_req_st2, do_writeback_st2, incoming_fill_st2, force_miss_st2, is_mshr_st2, is_snp_st2, snp_inv_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirtyb_st2, inst_meta_st2}),
|
||||
.data_out ({valid_st3, core_req_hit_st3, send_dwb_req_st3, do_writeback_st3, incoming_fill_st3, force_miss_st3, is_mshr_st3, is_snp_st3, snp_inv_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3})
|
||||
.data_in ({valid_st2, mshr_push_st2, crsq_push_st2, dreq_push_st2, srsq_push_st2, do_writeback_st2, incoming_fill_qual_st2, force_miss_st2, is_mshr_st2, is_snp_st2, snp_inv_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, dirtyb_st2, mem_rw_st2, byteen_st2, readword_st2, readdata_st2, req_tid_st2, tag_st2}),
|
||||
.data_out ({valid_st3, mshr_push_st3, crsq_push_st3, dreq_push_st3, srsq_push_st3, do_writeback_st3, incoming_fill_st3, force_miss_st3, is_mshr_st3, is_snp_st3, snp_inv_st3, addr_st3, wsel_st3, writeword_st3, readtag_st3, miss_st3, dirtyb_st3, mem_rw_st3, byteen_st3, readword_st3, readdata_st3, req_tid_st3, tag_st3})
|
||||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3, debug_rw_st3, debug_byteen_st3, debug_tid_st3} = inst_meta_st3;
|
||||
assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3} = tag_st3;
|
||||
end else begin
|
||||
assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3, debug_rw_st3, debug_byteen_st3, debug_tid_st3} = 0;
|
||||
assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3} = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
// Enqueue to miss reserv if it's a valid miss
|
||||
|
||||
wire[`REQS_BITS-1:0] req_tid_st3;
|
||||
wire[`REQ_TAG_WIDTH-1:0] req_tag_st3;
|
||||
wire req_rw_st3;
|
||||
wire[WORD_SIZE-1:0] req_byteen_st3;
|
||||
|
||||
wire mshr_push_unqual = valid_st3 && (miss_st3 || force_miss_st3);
|
||||
wire mshr_push_unqual = valid_st3 && mshr_push_st3;
|
||||
assign mshr_push_stall = 0;
|
||||
|
||||
wire mshr_push = mshr_push_unqual
|
||||
|
@ -696,7 +701,7 @@ end
|
|||
assert(!mshr_push || !mshr_full); // mmshr stall is detected before issuing new requests
|
||||
end
|
||||
|
||||
assign {req_tag_st3, req_rw_st3, req_byteen_st3, req_tid_st3} = inst_meta_st3;
|
||||
wire incoming_fill_qual_st3 = (!drsq_empty && (addr_st3 == drsq_addr_st0)) || incoming_fill_st3;
|
||||
|
||||
if (DRAM_ENABLE) begin
|
||||
|
||||
|
@ -707,7 +712,7 @@ end
|
|||
|
||||
// push missed requests as 'ready' if it was a forced miss but actually had a hit
|
||||
// or the fill request is comming for the missed block
|
||||
wire mshr_init_ready_state_st3 = valid_st3 && (!miss_st3 || incoming_fill_st3);
|
||||
wire mshr_init_ready_state_st3 = valid_st3 && (!miss_st3 || incoming_fill_qual_st3);
|
||||
|
||||
VX_miss_resrv #(
|
||||
.BANK_ID (BANK_ID),
|
||||
|
@ -738,14 +743,7 @@ end
|
|||
// enqueue
|
||||
.enqueue_st3 (mshr_push),
|
||||
.enqueue_addr_st3 (addr_st3),
|
||||
.enqueue_wsel_st3 (wsel_st3),
|
||||
.enqueue_data_st3 (writeword_st3),
|
||||
.enqueue_tid_st3 (req_tid_st3),
|
||||
.enqueue_tag_st3 (req_tag_st3),
|
||||
.enqueue_rw_st3 (req_rw_st3),
|
||||
.enqueue_byteen_st3 (req_byteen_st3),
|
||||
.enqueue_is_snp_st3 (is_snp_st3),
|
||||
.enqueue_snp_inv_st3(snp_inv_st3),
|
||||
.enqueue_data_st3 ({writeword_st3, req_tid_st3, tag_st3, mem_rw_st3, byteen_st3, wsel_st3, is_snp_st3, snp_inv_st3}),
|
||||
.enqueue_is_mshr_st3(is_mshr_st3),
|
||||
.enqueue_ready_st3 (mshr_init_ready_state_st3),
|
||||
.enqueue_full (mshr_full),
|
||||
|
@ -759,14 +757,7 @@ end
|
|||
.schedule_st0 (mshr_pop),
|
||||
.dequeue_valid_st0 (mshr_valid_st0),
|
||||
.dequeue_addr_st0 (mshr_addr_st0),
|
||||
.dequeue_wsel_st0 (mshr_wsel_st0),
|
||||
.dequeue_data_st0 (mshr_writeword_st0),
|
||||
.dequeue_tid_st0 (mshr_tid_st0),
|
||||
.dequeue_tag_st0 (mshr_tag_st0),
|
||||
.dequeue_rw_st0 (mshr_rw_st0),
|
||||
.dequeue_byteen_st0 (mshr_byteen_st0),
|
||||
.dequeue_is_snp_st0 (mshr_is_snp_st0),
|
||||
.dequeue_snp_inv_st0(mshr_snp_inv_st0),
|
||||
.dequeue_data_st0 ({mshr_writeword_st0, mshr_tid_st0, mshr_tag_st0, mshr_rw_st0, mshr_byteen_st0, mshr_wsel_st0, mshr_is_snp_st0, mshr_snp_inv_st0}),
|
||||
.dequeue_st3 (mshr_dequeue_st3)
|
||||
);
|
||||
end else begin
|
||||
|
@ -775,7 +766,8 @@ end
|
|||
`UNUSED_VAR (wsel_st3)
|
||||
`UNUSED_VAR (writeword_st3)
|
||||
`UNUSED_VAR (snp_inv_st3)
|
||||
`UNUSED_VAR (req_byteen_st3)
|
||||
`UNUSED_VAR (mem_rw_st3)
|
||||
`UNUSED_VAR (byteen_st3)
|
||||
`UNUSED_VAR (is_snp_st3)
|
||||
`UNUSED_VAR (incoming_fill_st3)
|
||||
assign mshr_pending_hazard_unqual_st0 = 0;
|
||||
|
@ -796,7 +788,7 @@ end
|
|||
|
||||
wire crsq_empty, crsq_full;
|
||||
|
||||
wire crsq_push_unqual = valid_st3 && core_req_hit_st3 && !req_rw_st3;
|
||||
wire crsq_push_unqual = valid_st3 && crsq_push_st3;
|
||||
assign crsq_push_stall = crsq_push_unqual && crsq_full;
|
||||
|
||||
wire crsq_push = crsq_push_unqual
|
||||
|
@ -808,13 +800,14 @@ end
|
|||
wire crsq_pop = core_rsp_valid && core_rsp_ready;
|
||||
|
||||
wire [`REQS_BITS-1:0] crsq_tid_st3 = req_tid_st3;
|
||||
wire [CORE_TAG_WIDTH-1:0] crsq_tag_st3 = CORE_TAG_WIDTH'(req_tag_st3);
|
||||
wire [CORE_TAG_WIDTH-1:0] crsq_tag_st3 = CORE_TAG_WIDTH'(tag_st3);
|
||||
wire [`WORD_WIDTH-1:0] crsq_data_st3 = readword_st3;
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW(`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH),
|
||||
.SIZE(CRSQ_SIZE),
|
||||
.BUFFERED(1)
|
||||
.DATAW (`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH),
|
||||
.SIZE (CRSQ_SIZE),
|
||||
.BUFFERED (1),
|
||||
.FASTRAM (1)
|
||||
) core_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -833,11 +826,11 @@ end
|
|||
|
||||
wire dreq_empty, dreq_full;
|
||||
|
||||
wire dreq_push_unqual = valid_st3 && send_dwb_req_st3;
|
||||
|
||||
assign dreq_push_stall = dreq_push_unqual && dreq_full;
|
||||
wire dreq_push_unqual = valid_st3 && dreq_push_st3;
|
||||
assign dreq_push_stall = dreq_push_unqual && dreq_full;
|
||||
|
||||
wire dreq_push = dreq_push_unqual
|
||||
&& (do_writeback_st3 || !incoming_fill_qual_st3)
|
||||
&& !dreq_full
|
||||
&& !mshr_push_stall
|
||||
&& !crsq_push_stall
|
||||
|
@ -854,9 +847,10 @@ end
|
|||
|
||||
if (DRAM_ENABLE) begin
|
||||
VX_generic_queue #(
|
||||
.DATAW(1 + BANK_LINE_SIZE + `LINE_ADDR_WIDTH + `BANK_LINE_WIDTH),
|
||||
.SIZE(DREQ_SIZE),
|
||||
.BUFFERED(1)
|
||||
.DATAW (1 + BANK_LINE_SIZE + `LINE_ADDR_WIDTH + `BANK_LINE_WIDTH),
|
||||
.SIZE (DREQ_SIZE),
|
||||
.BUFFERED (1),
|
||||
.FASTRAM (1)
|
||||
) dram_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -892,8 +886,7 @@ end
|
|||
|
||||
wire srsq_empty, srsq_full;
|
||||
|
||||
wire srsq_push_unqual = valid_st3 && is_snp_st3 && !force_miss_st3;
|
||||
|
||||
wire srsq_push_unqual = valid_st3 && srsq_push_st3;
|
||||
assign srsq_push_stall = srsq_push_unqual && srsq_full;
|
||||
|
||||
wire srsq_push = srsq_push_unqual
|
||||
|
@ -904,13 +897,14 @@ end
|
|||
|
||||
wire srsq_pop = snp_rsp_valid && snp_rsp_ready;
|
||||
|
||||
wire [SNP_TAG_WIDTH-1:0] srsq_tag_st3 = SNP_TAG_WIDTH'(req_tag_st3);
|
||||
wire [SNP_TAG_WIDTH-1:0] srsq_tag_st3 = SNP_TAG_WIDTH'(tag_st3);
|
||||
|
||||
if (FLUSH_ENABLE) begin
|
||||
VX_generic_queue #(
|
||||
.DATAW (SNP_TAG_WIDTH),
|
||||
.SIZE (SRSQ_SIZE),
|
||||
.BUFFERED(1)
|
||||
.DATAW (SNP_TAG_WIDTH),
|
||||
.SIZE (SRSQ_SIZE),
|
||||
.BUFFERED (1),
|
||||
.FASTRAM (1)
|
||||
) snp_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -945,14 +939,14 @@ end
|
|||
`SCOPE_ASSIGN (valid_st1, valid_st1);
|
||||
`SCOPE_ASSIGN (valid_st2, valid_st2);
|
||||
`SCOPE_ASSIGN (valid_st3, valid_st3);
|
||||
|
||||
`SCOPE_ASSIGN (is_fill_st0, is_fill_st0);
|
||||
`SCOPE_ASSIGN (is_snp_st0, is_snp_st0);
|
||||
`SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0);
|
||||
|
||||
`SCOPE_ASSIGN (miss_st1, miss_st1);
|
||||
`SCOPE_ASSIGN (dirty_st1, dirty_st1);
|
||||
`SCOPE_ASSIGN (miss_st1, miss_st1);
|
||||
`SCOPE_ASSIGN (dirty_st1, dirty_st1);
|
||||
`SCOPE_ASSIGN (force_miss_st1, force_miss_st1);
|
||||
`SCOPE_ASSIGN (mshr_push, mshr_push);
|
||||
`SCOPE_ASSIGN (pipeline_stall, pipeline_stall);
|
||||
|
||||
`SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID));
|
||||
`SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
|
||||
`SCOPE_ASSIGN (addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
|
||||
|
@ -961,8 +955,8 @@ end
|
|||
`ifdef PERF_ENABLE
|
||||
assign perf_pipe_stall = pipeline_stall;
|
||||
assign perf_mshr_stall = mshr_going_full;
|
||||
assign perf_read_miss = !pipeline_stall & miss_st1 & !is_mshr_st1 & !mem_rw_st1;
|
||||
assign perf_write_miss = !pipeline_stall & miss_st1 & !is_mshr_st1 & mem_rw_st1;
|
||||
assign perf_read_miss = !pipeline_stall & miss_st2 & !is_mshr_st2 & !mem_rw_st2;
|
||||
assign perf_write_miss = !pipeline_stall & miss_st2 & !is_mshr_st2 & mem_rw_st2;
|
||||
if (DRAM_ENABLE) begin
|
||||
assign perf_evict = dreq_push & do_writeback_st3 & !is_snp_st3;
|
||||
end else begin
|
||||
|
|
126
hw/rtl/cache/VX_bank_core_req_queue.v
vendored
126
hw/rtl/cache/VX_bank_core_req_queue.v
vendored
|
@ -22,7 +22,7 @@ module VX_bank_core_req_queue #(
|
|||
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] addr_in,
|
||||
input wire [`CORE_REQ_TAG_COUNT-1:0] rw_in,
|
||||
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] byteen_in,
|
||||
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] writedata_in,
|
||||
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] wdata_in,
|
||||
|
||||
// Dequeue
|
||||
input wire pop,
|
||||
|
@ -30,7 +30,7 @@ module VX_bank_core_req_queue #(
|
|||
output wire [`WORD_ADDR_WIDTH-1:0] addr_out,
|
||||
output wire rw_out,
|
||||
output wire [WORD_SIZE-1:0] byteen_out,
|
||||
output wire [`WORD_WIDTH-1:0] writedata_out,
|
||||
output wire [`WORD_WIDTH-1:0] wdata_out,
|
||||
output wire [`REQS_BITS-1:0] tid_out,
|
||||
|
||||
// States
|
||||
|
@ -43,7 +43,7 @@ module VX_bank_core_req_queue #(
|
|||
wire [`CORE_REQ_TAG_COUNT-1:0] q_rw;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] q_byteen;
|
||||
wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] q_addr;
|
||||
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] q_writedata;
|
||||
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] q_wdata;
|
||||
wire q_push;
|
||||
wire q_pop;
|
||||
wire q_empty;
|
||||
|
@ -56,16 +56,17 @@ module VX_bank_core_req_queue #(
|
|||
end
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW($bits(valids_in) + $bits(tag_in) + $bits(addr_in) + $bits(rw_in) + $bits(byteen_in) + $bits(writedata_in)),
|
||||
.SIZE(CREQ_SIZE),
|
||||
.BUFFERED(1)
|
||||
.DATAW ($bits(valids_in) + $bits(tag_in) + $bits(addr_in) + $bits(rw_in) + $bits(byteen_in) + $bits(wdata_in)),
|
||||
.SIZE (CREQ_SIZE),
|
||||
.BUFFERED (1),
|
||||
.FASTRAM (1)
|
||||
) req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (q_push),
|
||||
.pop (q_pop),
|
||||
.data_in ({valids_in, tag_in, addr_in, rw_in, byteen_in, writedata_in}),
|
||||
.data_out ({q_valids, q_tag, q_addr, q_rw, q_byteen, q_writedata}),
|
||||
.data_in ({valids_in, tag_in, addr_in, rw_in, byteen_in, wdata_in}),
|
||||
.data_out ({q_valids, q_tag, q_addr, q_rw, q_byteen, q_wdata}),
|
||||
.empty (q_empty),
|
||||
.full (q_full),
|
||||
`UNUSED_PIN (size)
|
||||
|
@ -78,37 +79,42 @@ module VX_bank_core_req_queue #(
|
|||
reg [`WORD_ADDR_WIDTH-1:0] sel_addr, sel_addr_r;
|
||||
reg sel_rw, sel_rw_r;
|
||||
reg [WORD_SIZE-1:0] sel_byteen, sel_byteen_r;
|
||||
reg [`WORD_WIDTH-1:0] sel_writedata, sel_writedata_r;
|
||||
reg [`WORD_WIDTH-1:0] sel_wdata, sel_wdata_r;
|
||||
|
||||
reg [$clog2(NUM_REQS+1)-1:0] q_valids_cnt_r;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] q_valids_cnt_n;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] q_valids_cnt;
|
||||
|
||||
reg [NUM_REQS-1:0] pop_mask;
|
||||
reg fast_track;
|
||||
|
||||
reg [NUM_REQS-1:0] pop_mask;
|
||||
reg fast_track;
|
||||
wire fast_track_n;
|
||||
|
||||
reg req_eop; // request end of packet
|
||||
reg empty_r;
|
||||
|
||||
assign q_push = push;
|
||||
assign q_pop = pop && (q_valids_cnt_r == 1 || q_valids_cnt_r == 2) && !fast_track;
|
||||
assign q_pop = pop && req_eop;
|
||||
|
||||
wire [NUM_REQS-1:0] requests = q_valids & ~pop_mask;
|
||||
|
||||
always @(*) begin
|
||||
sel_idx = 0;
|
||||
sel_tag = 'x;
|
||||
sel_addr = 'x;
|
||||
sel_rw = 'x;
|
||||
sel_byteen = 'x;
|
||||
sel_writedata = 'x;
|
||||
sel_idx = 0;
|
||||
sel_tag = 'x;
|
||||
sel_addr = 'x;
|
||||
sel_rw = 'x;
|
||||
sel_byteen = 'x;
|
||||
sel_wdata = 'x;
|
||||
|
||||
for (integer i = 0; i < NUM_REQS; i++) begin
|
||||
if (requests[i]) begin
|
||||
sel_idx = `REQS_BITS'(i);
|
||||
sel_addr = q_addr[i];
|
||||
if (0 == CORE_TAG_ID_BITS) begin
|
||||
sel_tag = q_tag[i];
|
||||
sel_rw = q_rw[i];
|
||||
sel_tag = q_tag[i];
|
||||
sel_rw = q_rw[i];
|
||||
end
|
||||
sel_byteen = q_byteen[i];
|
||||
sel_writedata = q_writedata[i];
|
||||
sel_byteen = q_byteen[i];
|
||||
sel_wdata = q_wdata[i];
|
||||
break;
|
||||
end
|
||||
end
|
||||
|
@ -121,33 +127,43 @@ module VX_bank_core_req_queue #(
|
|||
.count (q_valids_cnt)
|
||||
);
|
||||
|
||||
assign fast_track_n = (!q_empty && (empty_r || (pop && fast_track))) ? 0 :
|
||||
pop ? (q_valids_cnt_r == 2) :
|
||||
fast_track;
|
||||
|
||||
assign q_valids_cnt_n = (!q_empty && (empty_r || (pop && fast_track))) ? q_valids_cnt :
|
||||
pop ? (q_valids_cnt_r - 1) :
|
||||
q_valids_cnt_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pop_mask <= 0;
|
||||
fast_track <= 0;
|
||||
q_valids_cnt_r <= 0;
|
||||
req_eop <= 0;
|
||||
empty_r <= 1;
|
||||
end else begin
|
||||
if (!q_empty
|
||||
&& ((0 == q_valids_cnt_r) || (pop && fast_track))) begin
|
||||
q_valids_cnt_r <= q_valids_cnt;
|
||||
pop_mask <= (NUM_REQS'(1) << sel_idx);
|
||||
fast_track <= 0;
|
||||
&& (empty_r || (pop && fast_track))) begin
|
||||
pop_mask <= (NUM_REQS'(1) << sel_idx);
|
||||
end else if (pop) begin
|
||||
q_valids_cnt_r <= q_valids_cnt_r - 1;
|
||||
fast_track <= (q_valids_cnt_r == 2);
|
||||
if (q_valids_cnt_r == 1 || q_valids_cnt_r == 2) begin
|
||||
if (q_valids_cnt_r == 1 || q_valids_cnt_r == 2) begin
|
||||
pop_mask <= 0;
|
||||
end else begin
|
||||
pop_mask[sel_idx] <= 1;
|
||||
end
|
||||
end
|
||||
q_valids_cnt_r <= q_valids_cnt_n;
|
||||
fast_track <= fast_track_n;
|
||||
req_eop <= (q_valids_cnt_n == 1 || q_valids_cnt_n == 2) && !fast_track_n;
|
||||
empty_r <= (0 == q_valids_cnt_n);
|
||||
end
|
||||
|
||||
if ((0 == q_valids_cnt_r) || pop) begin
|
||||
sel_idx_r <= sel_idx;
|
||||
sel_byteen_r <= sel_byteen;
|
||||
sel_addr_r <= sel_addr;
|
||||
sel_writedata_r <= sel_writedata;
|
||||
if (empty_r || pop) begin
|
||||
sel_idx_r <= sel_idx;
|
||||
sel_byteen_r <= sel_byteen;
|
||||
sel_addr_r <= sel_addr;
|
||||
sel_wdata_r <= sel_wdata;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -155,45 +171,45 @@ module VX_bank_core_req_queue #(
|
|||
`UNUSED_VAR (sel_tag)
|
||||
`UNUSED_VAR (sel_rw)
|
||||
always @(posedge clk) begin
|
||||
if ((0 == q_valids_cnt_r) || pop) begin
|
||||
if (empty_r || pop) begin
|
||||
sel_tag_r <= q_tag;
|
||||
sel_rw_r <= q_rw;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(posedge clk) begin
|
||||
if ((0 == q_valids_cnt_r) || pop) begin
|
||||
if (empty_r || pop) begin
|
||||
sel_tag_r <= sel_tag;
|
||||
sel_rw_r <= sel_rw;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign tag_out = sel_tag_r;
|
||||
assign addr_out = sel_addr_r;
|
||||
assign rw_out = sel_rw_r;
|
||||
assign byteen_out = sel_byteen_r;
|
||||
assign writedata_out = sel_writedata_r;
|
||||
assign tid_out = sel_idx_r;
|
||||
assign tag_out = sel_tag_r;
|
||||
assign addr_out = sel_addr_r;
|
||||
assign rw_out = sel_rw_r;
|
||||
assign byteen_out = sel_byteen_r;
|
||||
assign wdata_out = sel_wdata_r;
|
||||
assign tid_out = sel_idx_r;
|
||||
|
||||
assign empty = (0 == q_valids_cnt_r);
|
||||
assign full = q_full;
|
||||
assign full = q_full;
|
||||
assign empty = empty_r;
|
||||
|
||||
end else begin
|
||||
`UNUSED_VAR (q_valids)
|
||||
|
||||
assign q_push = push;
|
||||
assign q_pop = pop;
|
||||
assign q_push = push;
|
||||
assign q_pop = pop;
|
||||
|
||||
assign tag_out = q_tag;
|
||||
assign addr_out = q_addr;
|
||||
assign rw_out = q_rw;
|
||||
assign byteen_out = q_byteen;
|
||||
assign writedata_out = q_writedata;
|
||||
assign tid_out = 0;
|
||||
assign tag_out = q_tag;
|
||||
assign addr_out = q_addr;
|
||||
assign rw_out = q_rw;
|
||||
assign byteen_out = q_byteen;
|
||||
assign wdata_out = q_wdata;
|
||||
assign tid_out = 0;
|
||||
|
||||
assign empty = q_empty;
|
||||
assign full = q_full;
|
||||
assign empty = q_empty;
|
||||
assign full = q_full;
|
||||
end
|
||||
|
||||
endmodule
|
12
hw/rtl/cache/VX_cache.v
vendored
12
hw/rtl/cache/VX_cache.v
vendored
|
@ -378,9 +378,9 @@ module VX_cache #(
|
|||
end
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (NUM_BANKS),
|
||||
.DATAW (`DRAM_ADDR_WIDTH + 1 + BANK_LINE_SIZE + `BANK_LINE_WIDTH),
|
||||
.OUT_BUFFER (NUM_BANKS >= 4)
|
||||
.NUM_REQS (NUM_BANKS),
|
||||
.DATAW (`DRAM_ADDR_WIDTH + 1 + BANK_LINE_SIZE + `BANK_LINE_WIDTH),
|
||||
.BUFFERED (1)
|
||||
) dram_req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -408,9 +408,9 @@ module VX_cache #(
|
|||
|
||||
if (FLUSH_ENABLE) begin
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (NUM_BANKS),
|
||||
.DATAW (SNP_TAG_WIDTH),
|
||||
.OUT_BUFFER (NUM_BANKS >= 4)
|
||||
.NUM_REQS (NUM_BANKS),
|
||||
.DATAW (SNP_TAG_WIDTH),
|
||||
.BUFFERED (1)
|
||||
) snp_rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
2
hw/rtl/cache/VX_cache_config.vh
vendored
2
hw/rtl/cache/VX_cache_config.vh
vendored
|
@ -15,7 +15,7 @@
|
|||
`define REQ_INST_META_WIDTH (`REQ_TAG_WIDTH + 1 + WORD_SIZE + `REQS_BITS)
|
||||
|
||||
// data metadata word_sel is_snp snp_inv
|
||||
`define MSHR_DATA_WIDTH (`WORD_WIDTH + `REQ_INST_META_WIDTH + `UP(`WORD_SELECT_WIDTH) + 1 + 1)
|
||||
`define MSHR_DATA_WIDTH (`WORD_WIDTH + `REQ_INST_META_WIDTH + `UP(`WORD_SELECT_WIDTH) + 1 + 1)
|
||||
|
||||
`define BANK_BITS `LOG2UP(NUM_BANKS)
|
||||
|
||||
|
|
33
hw/rtl/cache/VX_data_access.v
vendored
33
hw/rtl/cache/VX_data_access.v
vendored
|
@ -40,25 +40,25 @@ module VX_data_access #(
|
|||
`IGNORE_WARNINGS_END
|
||||
input wire writeen_in,
|
||||
input wire is_fill_in,
|
||||
input wire[`WORD_WIDTH-1:0] writeword_in,
|
||||
input wire[`BANK_LINE_WIDTH-1:0] writedata_in,
|
||||
input wire[WORD_SIZE-1:0] byteen_in,
|
||||
input wire[`UP(`WORD_SELECT_WIDTH)-1:0] wordsel_in,
|
||||
input wire [`WORD_WIDTH-1:0] writeword_in,
|
||||
input wire [`BANK_LINE_WIDTH-1:0] writedata_in,
|
||||
input wire [WORD_SIZE-1:0] byteen_in,
|
||||
input wire [`UP(`WORD_SELECT_WIDTH)-1:0] wordsel_in,
|
||||
|
||||
// Outputs
|
||||
output wire[`WORD_WIDTH-1:0] readword_out,
|
||||
output wire[`BANK_LINE_WIDTH-1:0] readdata_out,
|
||||
output wire[BANK_LINE_SIZE-1:0] dirtyb_out
|
||||
output wire [`BANK_LINE_WIDTH-1:0] readdata_out,
|
||||
output wire [BANK_LINE_SIZE-1:0] dirtyb_out
|
||||
);
|
||||
|
||||
wire[BANK_LINE_SIZE-1:0] read_dirtyb_out;
|
||||
wire[`BANK_LINE_WIDTH-1:0] read_data;
|
||||
wire [BANK_LINE_SIZE-1:0] read_dirtyb_out;
|
||||
wire [`BANK_LINE_WIDTH-1:0] read_data;
|
||||
|
||||
wire[`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] byte_enable;
|
||||
wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] byte_enable;
|
||||
wire write_enable;
|
||||
wire[`BANK_LINE_WIDTH-1:0] write_data;
|
||||
wire [`BANK_LINE_WIDTH-1:0] write_data;
|
||||
|
||||
wire[`LINE_SELECT_BITS-1:0] addrline = addr_in[`LINE_SELECT_BITS-1:0];
|
||||
wire [`LINE_SELECT_BITS-1:0] addrline = addr_in[`LINE_SELECT_BITS-1:0];
|
||||
|
||||
VX_data_store #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
|
@ -68,7 +68,6 @@ module VX_data_access #(
|
|||
.WRITE_ENABLE (WRITE_ENABLE)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
|
||||
.reset (reset),
|
||||
|
||||
.read_addr (addrline),
|
||||
|
@ -81,7 +80,7 @@ module VX_data_access #(
|
|||
.write_addr (addrline),
|
||||
.write_data (write_data)
|
||||
);
|
||||
|
||||
|
||||
if (`WORD_SELECT_WIDTH != 0) begin
|
||||
wire [`WORD_WIDTH-1:0] readword = read_data[wordsel_in * `WORD_WIDTH +: `WORD_WIDTH];
|
||||
for (genvar i = 0; i < WORD_SIZE; i++) begin
|
||||
|
@ -97,16 +96,12 @@ module VX_data_access #(
|
|||
wire word_sel = (`WORD_SELECT_WIDTH == 0) || (wordsel_in == `UP(`WORD_SELECT_WIDTH)'(i));
|
||||
|
||||
assign byte_enable[i] = is_fill_in ? {WORD_SIZE{1'b1}} :
|
||||
word_sel ? byteen_in :
|
||||
{WORD_SIZE{1'b0}};
|
||||
word_sel ? byteen_in : {WORD_SIZE{1'b0}};
|
||||
|
||||
assign write_data[i * `WORD_WIDTH +: `WORD_WIDTH] = is_fill_in ? writedata_in[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_in;
|
||||
end
|
||||
|
||||
assign write_enable = valid_in
|
||||
&& writeen_in
|
||||
&& !stall;
|
||||
|
||||
assign write_enable = valid_in && writeen_in && !stall;
|
||||
assign dirtyb_out = read_dirtyb_out;
|
||||
assign readdata_out = read_data;
|
||||
|
||||
|
|
1
hw/rtl/cache/VX_data_store.v
vendored
1
hw/rtl/cache/VX_data_store.v
vendored
|
@ -46,7 +46,6 @@ module VX_data_store #(
|
|||
.DATAW(BANK_LINE_SIZE * 8),
|
||||
.SIZE(`BANK_LINE_COUNT),
|
||||
.BYTEENW(BANK_LINE_SIZE),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(1)
|
||||
) data (
|
||||
.clk(clk),
|
||||
|
|
33
hw/rtl/cache/VX_miss_resrv.v
vendored
33
hw/rtl/cache/VX_miss_resrv.v
vendored
|
@ -39,36 +39,22 @@ module VX_miss_resrv #(
|
|||
|
||||
// enqueue
|
||||
input wire enqueue_st3,
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] enqueue_addr_st3,
|
||||
input wire[`UP(`WORD_SELECT_WIDTH)-1:0] enqueue_wsel_st3,
|
||||
input wire[`WORD_WIDTH-1:0] enqueue_data_st3,
|
||||
input wire[`REQS_BITS-1:0] enqueue_tid_st3,
|
||||
input wire[`REQ_TAG_WIDTH-1:0] enqueue_tag_st3,
|
||||
input wire enqueue_rw_st3,
|
||||
input wire[WORD_SIZE-1:0] enqueue_byteen_st3,
|
||||
input wire enqueue_is_snp_st3,
|
||||
input wire enqueue_snp_inv_st3,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] enqueue_addr_st3,
|
||||
input wire [`MSHR_DATA_WIDTH-1:0] enqueue_data_st3,
|
||||
input wire enqueue_is_mshr_st3,
|
||||
input wire enqueue_ready_st3,
|
||||
output wire enqueue_full,
|
||||
|
||||
// fill
|
||||
input wire update_ready_st0,
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] addr_st0,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] addr_st0,
|
||||
output wire pending_hazard_st0,
|
||||
|
||||
// dequeue
|
||||
input wire schedule_st0,
|
||||
output wire dequeue_valid_st0,
|
||||
output wire[`LINE_ADDR_WIDTH-1:0] dequeue_addr_st0,
|
||||
output wire[`UP(`WORD_SELECT_WIDTH)-1:0] dequeue_wsel_st0,
|
||||
output wire[`WORD_WIDTH-1:0] dequeue_data_st0,
|
||||
output wire[`REQS_BITS-1:0] dequeue_tid_st0,
|
||||
output wire[`REQ_TAG_WIDTH-1:0] dequeue_tag_st0,
|
||||
output wire dequeue_rw_st0,
|
||||
output wire[WORD_SIZE-1:0] dequeue_byteen_st0,
|
||||
output wire dequeue_is_snp_st0,
|
||||
output wire dequeue_snp_inv_st0,
|
||||
output wire [`LINE_ADDR_WIDTH-1:0] dequeue_addr_st0,
|
||||
output wire [`MSHR_DATA_WIDTH-1:0] dequeue_data_st0,
|
||||
input wire dequeue_st3
|
||||
);
|
||||
reg [`LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0];
|
||||
|
@ -76,8 +62,7 @@ module VX_miss_resrv #(
|
|||
reg [MSHR_SIZE-1:0] valid_table;
|
||||
reg [MSHR_SIZE-1:0] ready_table;
|
||||
reg [`LOG2UP(MSHR_SIZE)-1:0] schedule_ptr, restore_ptr;
|
||||
reg [`LOG2UP(MSHR_SIZE)-1:0] head_ptr;
|
||||
reg [`LOG2UP(MSHR_SIZE)-1:0] tail_ptr;
|
||||
reg [`LOG2UP(MSHR_SIZE)-1:0] head_ptr, tail_ptr;
|
||||
reg [`LOG2UP(MSHR_SIZE+1)-1:0] size;
|
||||
|
||||
assign enqueue_full = (size == $bits(size)'(MSHR_SIZE));
|
||||
|
@ -151,8 +136,6 @@ module VX_miss_resrv #(
|
|||
VX_dp_ram #(
|
||||
.DATAW(`MSHR_DATA_WIDTH),
|
||||
.SIZE(MSHR_SIZE),
|
||||
.BYTEENW(1),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(1)
|
||||
) datatable (
|
||||
.clk(clk),
|
||||
|
@ -161,8 +144,8 @@ module VX_miss_resrv #(
|
|||
.wren(mshr_push),
|
||||
.byteen(1'b1),
|
||||
.rden(1'b1),
|
||||
.din({enqueue_data_st3, enqueue_tid_st3, enqueue_tag_st3, enqueue_rw_st3, enqueue_byteen_st3, enqueue_wsel_st3, enqueue_is_snp_st3, enqueue_snp_inv_st3}),
|
||||
.dout({dequeue_data_st0, dequeue_tid_st0, dequeue_tag_st0, dequeue_rw_st0, dequeue_byteen_st0, dequeue_wsel_st0, dequeue_is_snp_st0, dequeue_snp_inv_st0})
|
||||
.din(enqueue_data_st3),
|
||||
.dout(dequeue_data_st0)
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_MSHR
|
||||
|
|
101
hw/rtl/cache/VX_snp_forwarder.v
vendored
101
hw/rtl/cache/VX_snp_forwarder.v
vendored
|
@ -7,7 +7,8 @@ module VX_snp_forwarder #(
|
|||
parameter NUM_REQS = 1,
|
||||
parameter SREQ_SIZE = 1,
|
||||
parameter TAG_IN_WIDTH = 1,
|
||||
parameter TAG_OUT_WIDTH = `LOG2UP(SREQ_SIZE)
|
||||
parameter TAG_OUT_WIDTH = `LOG2UP(SREQ_SIZE),
|
||||
parameter BUFFERED = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -23,7 +24,7 @@ module VX_snp_forwarder #(
|
|||
output wire snp_rsp_valid,
|
||||
output wire [SRC_ADDR_WIDTH-1:0] snp_rsp_addr,
|
||||
output wire snp_rsp_inv,
|
||||
output wire [TAG_IN_WIDTH-1:0] snp_rsp_tag,
|
||||
output wire [TAG_IN_WIDTH-1:0] snp_rsp_tag,
|
||||
input wire snp_rsp_ready,
|
||||
|
||||
// Snoop Forwarding out
|
||||
|
@ -45,6 +46,11 @@ module VX_snp_forwarder #(
|
|||
if (NUM_REQS > 1) begin
|
||||
|
||||
reg [REQ_QUAL_BITS:0] pending_cntrs [SREQ_SIZE-1:0];
|
||||
|
||||
wire [TAG_IN_WIDTH-1:0] snp_rsp_tag_unqual;
|
||||
wire [SRC_ADDR_WIDTH-1:0] snp_rsp_addr_unqual;
|
||||
wire snp_rsp_inv_unqual;
|
||||
wire snp_rsp_ready_unqual;
|
||||
|
||||
wire [TAG_OUT_WIDTH-1:0] sfq_write_addr, sfq_read_addr;
|
||||
wire sfq_full;
|
||||
|
@ -52,30 +58,31 @@ module VX_snp_forwarder #(
|
|||
wire [TAG_OUT_WIDTH-1:0] fwdin_tag;
|
||||
wire fwdin_valid;
|
||||
|
||||
wire fwdin_ready = snp_rsp_ready || (1 != pending_cntrs[sfq_read_addr]);
|
||||
wire fwdin_ready = snp_rsp_ready_unqual || (1 != pending_cntrs[sfq_read_addr]);
|
||||
wire fwdin_fire = fwdin_valid && fwdin_ready;
|
||||
|
||||
assign snp_rsp_valid = fwdin_valid && (1 == pending_cntrs[sfq_read_addr]);
|
||||
wire snp_rsp_valid_unqual = fwdin_valid && (1 == pending_cntrs[sfq_read_addr]);
|
||||
|
||||
assign sfq_read_addr = fwdin_tag;
|
||||
|
||||
wire sfq_acquire = snp_req_valid && snp_req_ready;
|
||||
wire sfq_release = snp_rsp_valid && snp_rsp_ready;
|
||||
wire sfq_release = snp_rsp_valid_unqual && snp_rsp_ready_unqual;
|
||||
|
||||
VX_cam_buffer #(
|
||||
.DATAW (SRC_ADDR_WIDTH + 1 + TAG_IN_WIDTH),
|
||||
.SIZE (SREQ_SIZE)
|
||||
.DATAW (SRC_ADDR_WIDTH + 1 + TAG_IN_WIDTH),
|
||||
.SIZE (SREQ_SIZE),
|
||||
.FASTRAM (1)
|
||||
) req_metadata_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_addr (sfq_write_addr),
|
||||
.acquire_slot (sfq_acquire),
|
||||
.read_addr (sfq_read_addr),
|
||||
.write_data ({snp_req_addr, snp_req_inv, snp_req_tag}),
|
||||
.read_data ({snp_rsp_addr, snp_rsp_inv, snp_rsp_tag}),
|
||||
.release_addr (sfq_read_addr),
|
||||
.release_slot (sfq_release),
|
||||
.full (sfq_full)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_addr (sfq_write_addr),
|
||||
.acquire_slot (sfq_acquire),
|
||||
.read_addr (sfq_read_addr),
|
||||
.write_data ({snp_req_tag, snp_req_addr, snp_req_inv}),
|
||||
.read_data ({snp_rsp_tag_unqual, snp_rsp_addr_unqual, snp_rsp_inv_unqual}),
|
||||
.release_addr (sfq_read_addr),
|
||||
.release_slot (sfq_release),
|
||||
.full (sfq_full)
|
||||
);
|
||||
|
||||
wire fwdout_valid;
|
||||
|
@ -115,21 +122,21 @@ module VX_snp_forwarder #(
|
|||
fwdout_tag_r <= sfq_write_addr;
|
||||
end
|
||||
end
|
||||
assign fwdout_valid = dispatch_hold_r || (snp_req_valid && !sfq_full);
|
||||
assign fwdout_tag = dispatch_hold_r ? fwdout_tag_r : sfq_write_addr;
|
||||
assign fwdout_addr = dispatch_hold_r ? fwdout_addr_r : {snp_req_addr, ADDR_DIFF'(0)};
|
||||
assign fwdout_inv = dispatch_hold_r ? fwdout_inv_r : snp_req_inv;
|
||||
assign dispatch_hold= dispatch_hold_r;
|
||||
assign fwdout_valid = dispatch_hold_r || (snp_req_valid && !sfq_full);
|
||||
assign fwdout_tag = dispatch_hold_r ? fwdout_tag_r : sfq_write_addr;
|
||||
assign fwdout_addr = dispatch_hold_r ? fwdout_addr_r : {snp_req_addr, ADDR_DIFF'(0)};
|
||||
assign fwdout_inv = dispatch_hold_r ? fwdout_inv_r : snp_req_inv;
|
||||
assign dispatch_hold = dispatch_hold_r;
|
||||
end else begin
|
||||
assign fwdout_valid = snp_req_valid && !sfq_full;
|
||||
assign fwdout_tag = sfq_write_addr;
|
||||
assign fwdout_addr = snp_req_addr;
|
||||
assign fwdout_inv = snp_req_inv;
|
||||
assign dispatch_hold= 1'b0;
|
||||
assign fwdout_valid = snp_req_valid && !sfq_full;
|
||||
assign fwdout_tag = sfq_write_addr;
|
||||
assign fwdout_addr = snp_req_addr;
|
||||
assign fwdout_inv = snp_req_inv;
|
||||
assign dispatch_hold = 1'b0;
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (sfq_acquire) begin
|
||||
if (sfq_acquire) begin
|
||||
pending_cntrs[sfq_write_addr] <= NUM_REQUESTS_QUAL;
|
||||
end
|
||||
if (fwdin_fire) begin
|
||||
|
@ -143,7 +150,7 @@ module VX_snp_forwarder #(
|
|||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (DST_ADDR_WIDTH + 1 + TAG_OUT_WIDTH),
|
||||
.PASSTHRU (NUM_REQS >= 4)
|
||||
.PASSTHRU (!BUFFERED)
|
||||
) fwdout_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -171,19 +178,31 @@ module VX_snp_forwarder #(
|
|||
assign snp_req_ready = fwdout_ready && !sfq_full && !dispatch_hold;
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (TAG_OUT_WIDTH),
|
||||
.IN_BUFFER (NUM_REQS >= 4),
|
||||
.OUT_BUFFER (NUM_REQS >= 4)
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (TAG_OUT_WIDTH)
|
||||
) snp_fwdin_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (snp_fwdin_valid),
|
||||
.data_in (snp_fwdin_tag),
|
||||
.ready_in (snp_fwdin_ready),
|
||||
.valid_out (fwdin_valid),
|
||||
.data_out (fwdin_tag),
|
||||
.ready_out (fwdin_ready)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (snp_fwdin_valid),
|
||||
.data_in (snp_fwdin_tag),
|
||||
.ready_in (snp_fwdin_ready),
|
||||
.valid_out (fwdin_valid),
|
||||
.data_out (fwdin_tag),
|
||||
.ready_out (fwdin_ready)
|
||||
);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (TAG_IN_WIDTH + SRC_ADDR_WIDTH + 1),
|
||||
.PASSTHRU (!BUFFERED)
|
||||
) rsp_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (snp_rsp_valid_unqual),
|
||||
.data_in ({snp_rsp_tag_unqual, snp_rsp_addr_unqual, snp_rsp_inv_unqual}),
|
||||
.ready_in (snp_rsp_ready_unqual),
|
||||
.valid_out (snp_rsp_valid),
|
||||
.data_out ({snp_rsp_tag, snp_rsp_addr, snp_rsp_inv}),
|
||||
.ready_out (snp_rsp_ready)
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_SNP
|
||||
|
|
2
hw/rtl/cache/VX_tag_access.v
vendored
2
hw/rtl/cache/VX_tag_access.v
vendored
|
@ -54,7 +54,7 @@ module VX_tag_access #(
|
|||
|
||||
wire read_valid;
|
||||
wire read_dirty;
|
||||
wire[`TAG_SELECT_BITS-1:0] read_tag;
|
||||
wire [`TAG_SELECT_BITS-1:0] read_tag;
|
||||
|
||||
wire do_fill;
|
||||
wire do_write;
|
||||
|
|
2
hw/rtl/cache/VX_tag_store.v
vendored
2
hw/rtl/cache/VX_tag_store.v
vendored
|
@ -48,8 +48,6 @@ module VX_tag_store #(
|
|||
VX_dp_ram #(
|
||||
.DATAW(`TAG_SELECT_BITS),
|
||||
.SIZE(`BANK_LINE_COUNT),
|
||||
.BYTEENW(1),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(1)
|
||||
) tags (
|
||||
.clk(clk),
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
interface VX_alu_req_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
|
@ -20,8 +19,7 @@ interface VX_alu_req_if ();
|
|||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
|
||||
wire wb;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -10,12 +10,10 @@ interface VX_cache_core_rsp_if #(
|
|||
parameter CORE_TAG_ID_BITS = 0
|
||||
) ();
|
||||
|
||||
wire [NUM_REQS-1:0] valid;
|
||||
|
||||
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] data;
|
||||
wire [NUM_REQS-1:0] valid;
|
||||
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] data;
|
||||
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag;
|
||||
|
||||
wire ready;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -9,14 +9,12 @@ interface VX_cache_dram_req_if #(
|
|||
parameter DRAM_TAG_WIDTH = 1
|
||||
) ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire valid;
|
||||
wire rw;
|
||||
wire [(DRAM_LINE_WIDTH/8)-1:0] byteen;
|
||||
wire [DRAM_ADDR_WIDTH-1:0] addr;
|
||||
wire [DRAM_LINE_WIDTH-1:0] data;
|
||||
wire [DRAM_TAG_WIDTH-1:0] tag;
|
||||
|
||||
wire [DRAM_TAG_WIDTH-1:0] tag;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -8,12 +8,10 @@ interface VX_cache_dram_rsp_if #(
|
|||
parameter DRAM_TAG_WIDTH = 1
|
||||
) ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [DRAM_LINE_WIDTH-1:0] data;
|
||||
wire [DRAM_TAG_WIDTH-1:0] tag;
|
||||
|
||||
wire ready;
|
||||
wire valid;
|
||||
wire [DRAM_LINE_WIDTH-1:0] data;
|
||||
wire [DRAM_TAG_WIDTH-1:0] tag;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -8,13 +8,11 @@ interface VX_cache_snp_req_if #(
|
|||
parameter SNP_TAG_WIDTH = 0
|
||||
) ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [DRAM_ADDR_WIDTH-1:0] addr;
|
||||
wire invalidate;
|
||||
wire [SNP_TAG_WIDTH-1:0] tag;
|
||||
|
||||
wire ready;
|
||||
wire valid;
|
||||
wire [DRAM_ADDR_WIDTH-1:0] addr;
|
||||
wire invalidate;
|
||||
wire [SNP_TAG_WIDTH-1:0] tag;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -7,10 +7,8 @@ interface VX_cache_snp_rsp_if #(
|
|||
parameter SNP_TAG_WIDTH = 0
|
||||
) ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire valid;
|
||||
wire [SNP_TAG_WIDTH-1:0] tag;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
interface VX_cmt_to_csr_if ();
|
||||
|
||||
wire valid;
|
||||
wire valid;
|
||||
wire [$clog2(3*`NUM_THREADS+1)-1:0] commit_size;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -6,14 +6,12 @@
|
|||
interface VX_commit_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -5,12 +5,10 @@
|
|||
|
||||
interface VX_csr_io_req_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire valid;
|
||||
wire [`CSR_ADDR_BITS-1:0] addr;
|
||||
wire rw;
|
||||
wire [31:0] data;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -5,10 +5,8 @@
|
|||
|
||||
interface VX_csr_io_rsp_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire valid;
|
||||
wire [31:0] data;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
interface VX_csr_pipe_req_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
|
@ -15,8 +14,7 @@ interface VX_csr_pipe_req_if ();
|
|||
wire [31:0] csr_mask;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
wire is_io;
|
||||
|
||||
wire is_io;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
interface VX_csr_req_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
|
@ -16,8 +15,7 @@ interface VX_csr_req_if ();
|
|||
wire rs2_is_imm;
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
|
||||
wire wb;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
interface VX_decode_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
|
@ -21,8 +20,7 @@ interface VX_decode_if ();
|
|||
wire [31:0] imm;
|
||||
wire rs1_is_PC;
|
||||
wire rs2_is_imm;
|
||||
wire [`NUM_REGS-1:0] used_regs;
|
||||
|
||||
wire [`NUM_REGS-1:0] used_regs;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -9,8 +9,7 @@
|
|||
|
||||
interface VX_fpu_req_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
|
@ -20,8 +19,7 @@ interface VX_fpu_req_if ();
|
|||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
|
||||
wire wb;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -5,12 +5,10 @@
|
|||
|
||||
interface VX_ifetch_req_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire valid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [31:0] PC;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -6,12 +6,10 @@
|
|||
interface VX_ifetch_rsp_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [31:0] PC;
|
||||
wire [31:0] instr;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
interface VX_lsu_req_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
|
@ -16,8 +15,7 @@ interface VX_lsu_req_if ();
|
|||
wire [`NUM_THREADS-1:0][31:0] base_addr;
|
||||
wire [31:0] offset;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
|
||||
wire wb;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
interface VX_mul_req_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
|
@ -18,8 +17,7 @@ interface VX_mul_req_if ();
|
|||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
|
||||
wire wb;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
module VX_cam_buffer #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter FASTRAM = 0,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -63,8 +64,8 @@ module VX_cam_buffer #(
|
|||
VX_dp_ram #(
|
||||
.DATAW(DATAW),
|
||||
.SIZE(SIZE),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(0)
|
||||
.RWCHECK(1),
|
||||
.FASTRAM(FASTRAM)
|
||||
) data_table (
|
||||
.clk(clk),
|
||||
.waddr(write_addr),
|
||||
|
|
|
@ -26,9 +26,7 @@ module VX_dp_ram #(
|
|||
localparam DATA32W = DATAW / 32;
|
||||
localparam BYTEEN32W = BYTEENW / 4;
|
||||
|
||||
//`ifndef QUARTUS
|
||||
|
||||
if (FASTRAM) begin
|
||||
if (FASTRAM) begin
|
||||
if (BUFFERED) begin
|
||||
reg [DATAW-1:0] dout_r;
|
||||
|
||||
|
@ -57,72 +55,36 @@ module VX_dp_ram #(
|
|||
dout_r <= mem[raddr];
|
||||
end
|
||||
end
|
||||
|
||||
assign dout = dout_r;
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (rden)
|
||||
|
||||
if (RWCHECK) begin
|
||||
if (BYTEENW > 1) begin
|
||||
`USE_FAST_BRAM reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0];
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
`USE_FAST_BRAM reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren) begin
|
||||
for (integer j = 0; j < BYTEEN32W; j++) begin
|
||||
for (integer i = 0; i < 4; i++) begin
|
||||
if (byteen[j * 4 + i])
|
||||
mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8];
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if (wren) begin
|
||||
for (integer j = 0; j < BYTEEN32W; j++) begin
|
||||
for (integer i = 0; i < 4; i++) begin
|
||||
if (byteen[j * 4 + i])
|
||||
mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign dout = mem[raddr];
|
||||
|
||||
end else begin
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren && byteen)
|
||||
mem[waddr] <= din;
|
||||
end
|
||||
assign dout = mem[raddr];
|
||||
end
|
||||
|
||||
assign dout = mem[raddr];
|
||||
end else begin
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
`USE_FAST_BRAM `NO_RW_RAM_CHECK reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren) begin
|
||||
for (integer j = 0; j < BYTEEN32W; j++) begin
|
||||
for (integer i = 0; i < 4; i++) begin
|
||||
if (byteen[j * 4 + i])
|
||||
mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
assign dout = mem[raddr];
|
||||
end else begin
|
||||
`USE_FAST_BRAM `NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren && byteen)
|
||||
mem[waddr] <= din;
|
||||
end
|
||||
assign dout = mem[raddr];
|
||||
end
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if (wren && byteen)
|
||||
mem[waddr] <= din;
|
||||
end
|
||||
assign dout = mem[raddr];
|
||||
end
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
if (BUFFERED) begin
|
||||
|
||||
reg [DATAW-1:0] dout_r;
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
|
@ -150,14 +112,11 @@ module VX_dp_ram #(
|
|||
dout_r <= mem[raddr];
|
||||
end
|
||||
end
|
||||
|
||||
assign dout = dout_r;
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (rden)
|
||||
|
||||
if (RWCHECK) begin
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0];
|
||||
|
||||
|
@ -208,96 +167,6 @@ module VX_dp_ram #(
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
/*`else
|
||||
|
||||
localparam OUTDATA_REG_B = BUFFERED ? "CLOCK0" : "UNREGISTERED";
|
||||
localparam RAM_BLOCK_TYPE = FASTRAM ? "MLAB" : "AUTO";
|
||||
|
||||
if (RWCHECK) begin
|
||||
|
||||
altsyncram #(
|
||||
.init_file (),
|
||||
.operation_mode ("DUAL_PORT"),
|
||||
.numwords_a (SIZE),
|
||||
.numwords_b (SIZE),
|
||||
.widthad_a (ADDRW),
|
||||
.widthad_b (ADDRW),
|
||||
.width_a (DATAW),
|
||||
.width_b (DATAW),
|
||||
.width_byteena_a(BYTEENW),
|
||||
.address_reg_b ("CLOCK0"),
|
||||
.outdata_reg_b (OUTDATA_REG_B),
|
||||
.ram_block_type (RAM_BLOCK_TYPE)
|
||||
) mem (
|
||||
.clocken0 (1'b1),
|
||||
.clocken1 (),
|
||||
.clocken2 (),
|
||||
.clocken3 (),
|
||||
.clock0 (clk),
|
||||
.clock1 (),
|
||||
.address_a (waddr),
|
||||
.address_b (raddr),
|
||||
.byteena_a (byteen),
|
||||
.byteena_b (1'b1),
|
||||
.wren_a (wren),
|
||||
.wren_b (1'b0),
|
||||
.data_a (din),
|
||||
.data_b (),
|
||||
.rden_a (),
|
||||
.rden_b (1'b1),
|
||||
.q_a (),
|
||||
.q_b (dout),
|
||||
.addressstall_a (1'b0),
|
||||
.addressstall_b (1'b0),
|
||||
.aclr0 (1'b0),
|
||||
.aclr1 (1'b0),
|
||||
.eccstatus ()
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
`NO_RW_RAM_CHECK altsyncram #(
|
||||
.init_file (),
|
||||
.operation_mode ("DUAL_PORT"),
|
||||
.numwords_a (SIZE),
|
||||
.numwords_b (SIZE),
|
||||
.widthad_a (ADDRW),
|
||||
.widthad_b (ADDRW),
|
||||
.width_a (DATAW),
|
||||
.width_b (DATAW),
|
||||
.width_byteena_a(BYTEENW),
|
||||
.outdata_reg_b (OUTDATA_REG_B),
|
||||
.ram_block_type (RAM_BLOCK_TYPE)
|
||||
) mem (
|
||||
.clocken0 (1'b1),
|
||||
.clocken1 (1'b1),
|
||||
.clocken2 (1'b1),
|
||||
.clocken3 (1'b1),
|
||||
.clock0 (clk),
|
||||
.clock1 (clk),
|
||||
.address_a (waddr),
|
||||
.address_b (raddr),
|
||||
.byteena_a (byteen),
|
||||
.byteena_b (1'b1),
|
||||
.wren_a (wren),
|
||||
.wren_b (1'b0),
|
||||
.data_a (din),
|
||||
.data_b (),
|
||||
.rden_a (),
|
||||
.rden_b (1'b1),
|
||||
.q_a (),
|
||||
.q_b (dout),
|
||||
.addressstall_a (1'b0),
|
||||
.addressstall_b (1'b0),
|
||||
.aclr0 (1'b0),
|
||||
.aclr1 (1'b0),
|
||||
.eccstatus ()
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
`endif*/
|
||||
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
|
@ -3,10 +3,10 @@
|
|||
module VX_generic_queue #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 2,
|
||||
parameter BUFFERED = 0,
|
||||
parameter ADDRW = $clog2(SIZE),
|
||||
parameter SIZEW = $clog2(SIZE+1),
|
||||
parameter FASTRAM = 0
|
||||
parameter BUFFERED = 0,
|
||||
parameter FASTRAM = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -78,25 +78,22 @@ module VX_generic_queue #(
|
|||
end;
|
||||
end
|
||||
end
|
||||
used_r <= used_r + ADDRW'(push) - ADDRW'(pop);
|
||||
used_r <= used_r + (ADDRW'(push) - ADDRW'(pop));
|
||||
end
|
||||
end
|
||||
|
||||
if (0 == BUFFERED) begin
|
||||
|
||||
reg [ADDRW:0] rd_ptr_r;
|
||||
reg [ADDRW:0] wr_ptr_r;
|
||||
|
||||
wire [ADDRW-1:0] rd_ptr_a = rd_ptr_r[ADDRW-1:0];
|
||||
wire [ADDRW-1:0] wr_ptr_a = wr_ptr_r[ADDRW-1:0];
|
||||
reg [ADDRW-1:0] rd_ptr_r;
|
||||
reg [ADDRW-1:0] wr_ptr_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rd_ptr_r <= 0;
|
||||
wr_ptr_r <= 0;
|
||||
end else begin
|
||||
wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(push);
|
||||
rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(pop);
|
||||
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
|
||||
rd_ptr_r <= rd_ptr_r + ADDRW'(pop);
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -108,8 +105,8 @@ module VX_generic_queue #(
|
|||
.FASTRAM(FASTRAM)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
.waddr(wr_ptr_a),
|
||||
.raddr(rd_ptr_a),
|
||||
.waddr(wr_ptr_r),
|
||||
.raddr(rd_ptr_r),
|
||||
.wren(push),
|
||||
.byteen(1'b1),
|
||||
.rden(1'b1),
|
||||
|
@ -149,7 +146,7 @@ module VX_generic_queue #(
|
|||
.DATAW(DATAW),
|
||||
.SIZE(SIZE),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(0),
|
||||
.RWCHECK(1),
|
||||
.FASTRAM(FASTRAM)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
|
@ -166,7 +163,7 @@ module VX_generic_queue #(
|
|||
if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin
|
||||
dout_r <= data_in;
|
||||
end else if (pop) begin
|
||||
dout_r <= dout;
|
||||
dout_r <= dout; // BRAM R/W collision
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -178,4 +175,4 @@ module VX_generic_queue #(
|
|||
assign size = {full_r, used_r};
|
||||
end
|
||||
|
||||
endmodule
|
||||
endmodule
|
|
@ -143,7 +143,7 @@ module VX_scope #(
|
|||
end
|
||||
|
||||
if (stop
|
||||
|| (waddr >= waddr_end)) begin
|
||||
|| (waddr == waddr_end)) begin
|
||||
waddr <= waddr; // keep last address
|
||||
recording <= 0;
|
||||
data_valid <= 1;
|
||||
|
|
|
@ -1,11 +1,10 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
module VX_stream_arbiter #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter DATAW = 1,
|
||||
parameter TYPE = "R",
|
||||
parameter IN_BUFFER = 0,
|
||||
parameter OUT_BUFFER = 0
|
||||
parameter NUM_REQS = 1,
|
||||
parameter DATAW = 1,
|
||||
parameter TYPE = "R",
|
||||
parameter BUFFERED = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -22,27 +21,6 @@ module VX_stream_arbiter #(
|
|||
localparam LOG_NUM_REQS = $clog2(NUM_REQS);
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
wire [NUM_REQS-1:0] valid_in_qual;
|
||||
wire [NUM_REQS-1:0][DATAW-1:0] data_in_qual;
|
||||
wire [NUM_REQS-1:0] ready_in_qual;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.PASSTHRU (!IN_BUFFER)
|
||||
) req_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in[i]),
|
||||
.data_in (data_in[i]),
|
||||
.ready_in (ready_in[i]),
|
||||
.valid_out (valid_in_qual[i]),
|
||||
.data_out (data_in_qual[i]),
|
||||
.ready_out (ready_in_qual[i])
|
||||
);
|
||||
end
|
||||
|
||||
wire sel_enable;
|
||||
wire sel_valid;
|
||||
wire [LOG_NUM_REQS-1:0] sel_idx;
|
||||
|
@ -56,7 +34,7 @@ module VX_stream_arbiter #(
|
|||
) sel_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (valid_in_qual),
|
||||
.requests (valid_in),
|
||||
.enable (sel_enable),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_idx),
|
||||
|
@ -71,7 +49,7 @@ module VX_stream_arbiter #(
|
|||
) sel_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (valid_in_qual),
|
||||
.requests (valid_in),
|
||||
.enable (sel_enable),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_idx),
|
||||
|
@ -86,7 +64,7 @@ module VX_stream_arbiter #(
|
|||
) sel_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (valid_in_qual),
|
||||
.requests (valid_in),
|
||||
.enable (sel_enable),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_idx),
|
||||
|
@ -101,47 +79,36 @@ module VX_stream_arbiter #(
|
|||
) sel_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (valid_in_qual),
|
||||
.requests (valid_in),
|
||||
.enable (sel_enable),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_idx),
|
||||
.grant_onehot (sel_1hot)
|
||||
);
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
if (OUT_BUFFER) begin
|
||||
wire ready_out_unqual;
|
||||
|
||||
wire stall = ~ready_out && valid_out;
|
||||
assign sel_enable = ~stall;
|
||||
VX_skid_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.PASSTHRU (!BUFFERED)
|
||||
) out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (sel_valid),
|
||||
.data_in (data_in[sel_idx]),
|
||||
.ready_in (ready_out_unqual),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + DATAW),
|
||||
.R(1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({sel_valid, data_in_qual[sel_idx]}),
|
||||
.data_out ({valid_out, data_out})
|
||||
);
|
||||
assign sel_enable = ready_out_unqual;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign ready_in_qual[i] = sel_1hot[i] && ~stall;
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
assign sel_enable = ready_out;
|
||||
assign valid_out = sel_valid;
|
||||
assign data_out = data_in_qual[sel_idx];
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign ready_in_qual[i] = sel_1hot[i] && ready_out;
|
||||
end
|
||||
|
||||
end
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign ready_in[i] = sel_1hot[i] && ready_out_unqual;
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
|
|
68
hw/rtl/libs/VX_stream_demux.v
Normal file
68
hw/rtl/libs/VX_stream_demux.v
Normal file
|
@ -0,0 +1,68 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
module VX_stream_demux #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter DATAW = 1,
|
||||
parameter BUFFERED = 0,
|
||||
localparam LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire [LOG_NUM_REQS-1:0] sel,
|
||||
|
||||
input wire valid_in,
|
||||
input wire [DATAW-1:0] data_in,
|
||||
output wire ready_in,
|
||||
|
||||
output wire [NUM_REQS-1:0] valid_out,
|
||||
output wire [NUM_REQS-1:0][DATAW-1:0] data_out,
|
||||
input wire [NUM_REQS-1:0] ready_out
|
||||
);
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
reg [NUM_REQS-1:0] valid_out_unqual;
|
||||
wire [NUM_REQS-1:0][DATAW-1:0] data_out_unqual;
|
||||
wire [NUM_REQS-1:0] ready_out_unqual;
|
||||
|
||||
always @(*) begin
|
||||
valid_out_unqual = '0;
|
||||
valid_out_unqual[sel] = valid_in;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign data_out_unqual[i] = data_in;
|
||||
end
|
||||
|
||||
assign ready_in = ready_out_unqual[sel];
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.PASSTHRU (!BUFFERED)
|
||||
) out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_out_unqual[i]),
|
||||
.data_in (data_out_unqual[i]),
|
||||
.ready_in (ready_out_unqual[i]),
|
||||
.valid_out (valid_out[i]),
|
||||
.data_out (data_out[i]),
|
||||
.ready_out (ready_out[i])
|
||||
);
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (sel)
|
||||
|
||||
assign valid_out = valid_in;
|
||||
assign data_out = data_in;
|
||||
assign ready_in = ready_out;
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -9,7 +9,7 @@
|
|||
"modules": {
|
||||
"afu": {
|
||||
"submodules": {
|
||||
"vortex": {"type":"Vortex", "enabled":false}
|
||||
"vortex": {"type":"Vortex", "enabled":true}
|
||||
}
|
||||
},
|
||||
"Vortex": {
|
||||
|
@ -190,6 +190,7 @@
|
|||
"?writeback_valid": 1,
|
||||
"writeback_wid":"`NW_BITS",
|
||||
"writeback_pc": 32,
|
||||
"writeback_tmask":"`NUM_THREADS",
|
||||
"writeback_rd":"`NR_BITS",
|
||||
"writeback_data":"`NUM_THREADS * 32",
|
||||
"!scoreboard_delay": 1,
|
||||
|
@ -204,11 +205,14 @@
|
|||
"addr_st1": 32,
|
||||
"addr_st2": 32,
|
||||
"addr_st3": 32,
|
||||
"is_fill_st0": 1,
|
||||
"is_snp_st0": 1,
|
||||
"is_mshr_st0": 1,
|
||||
"miss_st1": 1,
|
||||
"force_miss_st1": 1,
|
||||
"dirty_st1": 1,
|
||||
"!force_miss_st1": 1,
|
||||
"!pipeline_stall": 1
|
||||
"mshr_push": 1,
|
||||
"?pipeline_stall": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#include <iomanip>
|
||||
|
||||
#define ENABLE_DRAM_STALLS
|
||||
#define DRAM_LATENCY 100
|
||||
#define DRAM_LATENCY 24
|
||||
#define DRAM_RQ_SIZE 16
|
||||
#define DRAM_STALLS_MODULO 16
|
||||
|
||||
|
|
12
hw/syn/quartus/.gitignore
vendored
12
hw/syn/quartus/.gitignore
vendored
|
@ -12,3 +12,15 @@
|
|||
|
||||
/core/*
|
||||
!/core/Makefile
|
||||
|
||||
/core8/*
|
||||
!/core8/Makefile
|
||||
|
||||
/top1/*
|
||||
!/top1/Makefile
|
||||
|
||||
/top2/*
|
||||
!/top2/Makefile
|
||||
|
||||
/top8/*
|
||||
!/top8/Makefile
|
72
hw/syn/quartus/core8/Makefile
Normal file
72
hw/syn/quartus/core8/Makefile
Normal file
|
@ -0,0 +1,72 @@
|
|||
PROJECT = Core
|
||||
TOP_LEVEL_ENTITY = VX_core
|
||||
SRC_FILE = VX_core.v
|
||||
FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
|
||||
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
# Part, Family
|
||||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on
|
||||
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --parallel --do_report_timing
|
||||
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
syn: $(PROJECT).syn.rpt
|
||||
|
||||
fit: $(PROJECT).fit.rpt
|
||||
|
||||
asm: $(PROJECT).asm.rpt
|
||||
|
||||
sta: $(PROJECT).sta.rpt
|
||||
|
||||
smart: smart.log
|
||||
|
||||
# Target implementations
|
||||
STAMP = echo done >
|
||||
|
||||
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
|
||||
quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
|
||||
quartus_fit $(PROJECT) $(FIT_ARGS)
|
||||
$(STAMP) asm.chg
|
||||
$(STAMP) sta.chg
|
||||
|
||||
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
|
||||
quartus_asm $(PROJECT) $(ASM_ARGS)
|
||||
|
||||
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
|
||||
quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NUM_THREADS=8"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
fit.chg:
|
||||
$(STAMP) fit.chg
|
||||
|
||||
sta.chg:
|
||||
$(STAMP) sta.chg
|
||||
|
||||
asm.chg:
|
||||
$(STAMP) asm.chg
|
||||
|
||||
program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
|
|
@ -1,6 +1,6 @@
|
|||
set_time_format -unit ns -decimal_places 3
|
||||
|
||||
create_clock -name {clk} -period "200 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
|
||||
create_clock -name {clk} -period "220 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
|
||||
|
||||
derive_pll_clocks -create_base_clocks
|
||||
derive_clock_uncertainty
|
||||
|
|
|
@ -31,6 +31,7 @@ set_global_assignment -name FAMILY $opts(family)
|
|||
set_global_assignment -name DEVICE $opts(device)
|
||||
set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
|
||||
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
|
||||
|
||||
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
|
||||
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
|
||||
set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
|
||||
|
@ -39,7 +40,14 @@ set_global_assignment -name VERILOG_MACRO SYNTHESIS
|
|||
set_global_assignment -name VERILOG_MACRO NDEBUG
|
||||
set_global_assignment -name MESSAGE_DISABLE 16818
|
||||
set_global_assignment -name VERILOG_MACRO FPU_FAST
|
||||
set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
|
||||
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
|
||||
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
|
||||
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
||||
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||
set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"
|
||||
|
@ -50,12 +58,6 @@ set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
|||
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
||||
set_global_assignment -name POWER_USE_TA_VALUE 65
|
||||
set_global_assignment -name SEED 1
|
||||
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
|
||||
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||
|
||||
set idx 0
|
||||
foreach arg $q_args_orig {
|
||||
|
|
40
hw/syn/quartus/timing-html.tcl
Normal file
40
hw/syn/quartus/timing-html.tcl
Normal file
|
@ -0,0 +1,40 @@
|
|||
package require cmdline
|
||||
|
||||
set options {
|
||||
{ "project.arg" "" "Project name" }
|
||||
{ "outdir.arg" "timing-html" "Output directory" }
|
||||
}
|
||||
|
||||
array set opts [::cmdline::getoptions quartus(args) $options]
|
||||
|
||||
# Verify required parameters
|
||||
set requiredParameters {project}
|
||||
foreach p $requiredParameters {
|
||||
if {$opts($p) == ""} {
|
||||
puts stderr "Missing required parameter: -$p"
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
|
||||
project_open $opts(project)
|
||||
|
||||
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
|
||||
|
||||
create_timing_netlist
|
||||
read_sdc
|
||||
update_timing_netlist
|
||||
|
||||
foreach_in_collection op [get_available_operating_conditions] {
|
||||
set_operating_conditions $op
|
||||
|
||||
report_timing -setup -npaths 150 -detail full_path -multi_corner -pairs_only -nworst 8 \
|
||||
-file "$opts(outdir)/timing_paths_$op.html" \
|
||||
-panel_name "Critical paths for $op"
|
||||
|
||||
create_slack_histogram -num_bins 50 -clock clk -multi_corner -file "$opts(outdir)/slack_histogram_$op.html"
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
project_open VX_pipeline
|
||||
|
||||
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
|
||||
|
||||
create_timing_netlist
|
||||
read_sdc
|
||||
update_timing_netlist
|
||||
|
||||
foreach_in_collection op [get_available_operating_conditions] {
|
||||
set_operating_conditions $op
|
||||
|
||||
report_timing -setup -npaths 150 -detail full_path -multi_corner -pairs_only -nworst 8 \
|
||||
-file "bin/timing_paths_$op.html" \
|
||||
-panel_name "Critical paths for $op"
|
||||
|
||||
create_slack_histogram -num_bins 50 -clock clk -multi_corner -file "bin/slack_histogram_$op.html"
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
72
hw/syn/quartus/top1/Makefile
Normal file
72
hw/syn/quartus/top1/Makefile
Normal file
|
@ -0,0 +1,72 @@
|
|||
PROJECT = vortex_afu
|
||||
TOP_LEVEL_ENTITY = vortex_afu
|
||||
SRC_FILE = vortex_afu.sv
|
||||
FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
|
||||
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache;../../../rtl/afu;../../../rtl/afu/ccip
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
# Part, Family
|
||||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1
|
||||
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --parallel --do_report_timing
|
||||
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
syn: $(PROJECT).syn.rpt
|
||||
|
||||
fit: $(PROJECT).fit.rpt
|
||||
|
||||
asm: $(PROJECT).asm.rpt
|
||||
|
||||
sta: $(PROJECT).sta.rpt
|
||||
|
||||
smart: smart.log
|
||||
|
||||
# Target implementations
|
||||
STAMP = echo done >
|
||||
|
||||
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
|
||||
quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
|
||||
quartus_fit $(PROJECT) $(FIT_ARGS)
|
||||
$(STAMP) asm.chg
|
||||
$(STAMP) sta.chg
|
||||
|
||||
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
|
||||
quartus_asm $(PROJECT) $(ASM_ARGS)
|
||||
|
||||
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
|
||||
quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=1"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
fit.chg:
|
||||
$(STAMP) fit.chg
|
||||
|
||||
sta.chg:
|
||||
$(STAMP) sta.chg
|
||||
|
||||
asm.chg:
|
||||
$(STAMP) asm.chg
|
||||
|
||||
program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
|
72
hw/syn/quartus/top2/Makefile
Normal file
72
hw/syn/quartus/top2/Makefile
Normal file
|
@ -0,0 +1,72 @@
|
|||
PROJECT = vortex_afu
|
||||
TOP_LEVEL_ENTITY = vortex_afu
|
||||
SRC_FILE = vortex_afu.sv
|
||||
FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
|
||||
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache;../../../rtl/afu;../../../rtl/afu/ccip
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
# Part, Family
|
||||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1
|
||||
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --parallel --do_report_timing
|
||||
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
syn: $(PROJECT).syn.rpt
|
||||
|
||||
fit: $(PROJECT).fit.rpt
|
||||
|
||||
asm: $(PROJECT).asm.rpt
|
||||
|
||||
sta: $(PROJECT).sta.rpt
|
||||
|
||||
smart: smart.log
|
||||
|
||||
# Target implementations
|
||||
STAMP = echo done >
|
||||
|
||||
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
|
||||
quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
|
||||
quartus_fit $(PROJECT) $(FIT_ARGS)
|
||||
$(STAMP) asm.chg
|
||||
$(STAMP) sta.chg
|
||||
|
||||
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
|
||||
quartus_asm $(PROJECT) $(ASM_ARGS)
|
||||
|
||||
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
|
||||
quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=2"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
fit.chg:
|
||||
$(STAMP) fit.chg
|
||||
|
||||
sta.chg:
|
||||
$(STAMP) sta.chg
|
||||
|
||||
asm.chg:
|
||||
$(STAMP) asm.chg
|
||||
|
||||
program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
|
74
hw/syn/quartus/top8/Makefile
Normal file
74
hw/syn/quartus/top8/Makefile
Normal file
|
@ -0,0 +1,74 @@
|
|||
PROJECT = vortex_afu
|
||||
TOP_LEVEL_ENTITY = vortex_afu
|
||||
SRC_FILE = vortex_afu.sv
|
||||
FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
|
||||
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache;../../../rtl/afu;../../../rtl/afu/ccip
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
# Part, Family
|
||||
FAMILY = "Arria 10"
|
||||
|
||||
#DEVICE = 1SX280HN2F43E2VG
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1
|
||||
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --parallel --do_report_timing
|
||||
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
syn: $(PROJECT).syn.rpt
|
||||
|
||||
fit: $(PROJECT).fit.rpt
|
||||
|
||||
asm: $(PROJECT).asm.rpt
|
||||
|
||||
sta: $(PROJECT).sta.rpt
|
||||
|
||||
smart: smart.log
|
||||
|
||||
# Target implementations
|
||||
STAMP = echo done >
|
||||
|
||||
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
|
||||
quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
|
||||
quartus_fit $(PROJECT) $(FIT_ARGS)
|
||||
$(STAMP) asm.chg
|
||||
$(STAMP) sta.chg
|
||||
|
||||
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
|
||||
quartus_asm $(PROJECT) $(ASM_ARGS)
|
||||
|
||||
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
|
||||
quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
fit.chg:
|
||||
$(STAMP) fit.chg
|
||||
|
||||
sta.chg:
|
||||
$(STAMP) sta.chg
|
||||
|
||||
asm.chg:
|
||||
$(STAMP) asm.chg
|
||||
|
||||
program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
|
Loading…
Add table
Add a link
Reference in a new issue