mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'master' of https://github.gatech.edu/casl/Vortex
This commit is contained in:
commit
d3c3d551ff
71 changed files with 1671 additions and 1319 deletions
|
@ -40,15 +40,18 @@ jobs:
|
|||
- stage: test
|
||||
name: config
|
||||
script: cp -r $PWD ../build4 && cd ../build4 && ./ci/travis_run.py ./ci/regression.sh -config
|
||||
- stage: test
|
||||
name: stress0
|
||||
script: cp -r $PWD ../build5 && cd ../build5 && ./ci/travis_run.py ./ci/regression.sh -stress0
|
||||
- stage: test
|
||||
name: stress1
|
||||
script: cp -r $PWD ../build5 && cd ../build5 && ./ci/travis_run.py ./ci/regression.sh -stress1
|
||||
script: cp -r $PWD ../build6 && cd ../build6 && ./ci/travis_run.py ./ci/regression.sh -stress1
|
||||
- stage: test
|
||||
name: stress2
|
||||
script: cp -r $PWD ../build6 && cd ../build6 && ./ci/travis_run.py ./ci/regression.sh -stress2
|
||||
script: cp -r $PWD ../build7 && cd ../build7 && ./ci/travis_run.py ./ci/regression.sh -stress2
|
||||
- stage: test
|
||||
name: compiler
|
||||
script: cp -r $PWD ../build7 && cd ../build7 && ./ci/travis_run.py /ci/test_compiler.sh
|
||||
script: cp -r $PWD ../build8 && cd ../build8 && ./ci/travis_run.py /ci/test_compiler.sh
|
||||
|
||||
after_success:
|
||||
# Gather code coverage
|
||||
|
|
|
@ -46,8 +46,8 @@ debug()
|
|||
{
|
||||
echo "begin debugging tests..."
|
||||
|
||||
./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --perf --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --debug --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=basic --args="-t0 -n1"
|
||||
|
||||
echo "debugging tests done!"
|
||||
|
@ -72,13 +72,18 @@ FPU_CORE=FPU_DEFAULT ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
|
|||
# using FPNEW FPU core
|
||||
FPU_CORE=FPU_FPNEW ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
|
||||
|
||||
# adjust l1 block size to match l2
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16 -DL1_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr --args="-n1"
|
||||
|
||||
# test cache banking
|
||||
CONFIGS="-DDNUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
|
||||
CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
|
||||
CONFIGS="-DDNUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
|
||||
CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
|
||||
|
||||
# test cache multi-porting
|
||||
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
|
||||
CONFIGS="-DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
|
||||
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
|
||||
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo --debug --args="-n1"
|
||||
CONFIGS="-DL2NUM_PORTS=2 -DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr
|
||||
CONFIGS="-DL2NUM_PORTS=4 -DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=io_addr
|
||||
|
||||
# test 128-bit MEM block
|
||||
CONFIGS=-DMEM_BLOCK_SIZE=16 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
|
||||
|
@ -92,32 +97,45 @@ CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=
|
|||
# test 128-bit DRAM block
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=128 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=28 -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
|
||||
|
||||
# test verilator reset values
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=sgemm
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=sgemm
|
||||
|
||||
# test long memory latency
|
||||
CONFIGS="-DMEM_LATENCY=100 -DMEM_RQ_SIZE=4 -DMEM_STALLS_MODULO=4" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
|
||||
|
||||
echo "configuration tests done!"
|
||||
}
|
||||
|
||||
stress0()
|
||||
{
|
||||
echo "begin stress0 tests..."
|
||||
|
||||
# test verilator reset values
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=sgemm
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=sgemm
|
||||
FPU_CORE=FPU_DEFAULT CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
|
||||
FPU_CORE=FPU_DEFAULT CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=printf
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=printf
|
||||
|
||||
echo "stress0 tests done!"
|
||||
}
|
||||
|
||||
stress1()
|
||||
{
|
||||
echo "begin stress tests..."
|
||||
echo "begin stress1 tests..."
|
||||
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemm --args="-n256"
|
||||
|
||||
echo "stress tests done!"
|
||||
echo "stress1 tests done!"
|
||||
}
|
||||
|
||||
stress2()
|
||||
{
|
||||
echo "begin stress tests..."
|
||||
echo "begin stress2 tests..."
|
||||
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --clusters=2 --l3cache --app=sgemm --args="-n256"
|
||||
|
||||
echo "stress tests done!"
|
||||
echo "stress2 tests done!"
|
||||
}
|
||||
|
||||
usage()
|
||||
|
@ -128,13 +146,15 @@ usage()
|
|||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
-coverage ) coverage
|
||||
;;
|
||||
;;
|
||||
-cluster ) cluster
|
||||
;;
|
||||
;;
|
||||
-debug ) debug
|
||||
;;
|
||||
;;
|
||||
-config ) config
|
||||
;;
|
||||
-stress0 ) stress0
|
||||
;;
|
||||
-stress1 ) stress1
|
||||
;;
|
||||
-stress2 ) stress2
|
||||
|
@ -143,12 +163,13 @@ while [ "$1" != "" ]; do
|
|||
cluster
|
||||
debug
|
||||
config
|
||||
stress0
|
||||
stress1
|
||||
stress2
|
||||
;;
|
||||
;;
|
||||
-h | --help ) usage
|
||||
exit
|
||||
;;
|
||||
;;
|
||||
* ) usage
|
||||
exit 1
|
||||
esac
|
||||
|
|
|
@ -25,12 +25,12 @@ module VX_alu_unit #(
|
|||
wire stall_in, stall_out;
|
||||
|
||||
`UNUSED_VAR (alu_req_if.op_mod)
|
||||
wire is_br_op = `ALU_IS_BR(alu_req_if.op_mod);
|
||||
wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op_type);
|
||||
wire [`BR_BITS-1:0] br_op = `BR_OP(alu_req_if.op_type);
|
||||
wire alu_signed = `ALU_SIGNED(alu_op);
|
||||
wire [1:0] alu_op_class = `ALU_OP_CLASS(alu_op);
|
||||
wire is_sub = (alu_op == `ALU_SUB);
|
||||
wire is_br_op = `INST_ALU_IS_BR(alu_req_if.op_mod);
|
||||
wire [`INST_ALU_BITS-1:0] alu_op = `INST_ALU_OP(alu_req_if.op_type);
|
||||
wire [`INST_BR_BITS-1:0] br_op = `INST_BR_OP(alu_req_if.op_type);
|
||||
wire alu_signed = `INST_ALU_SIGNED(alu_op);
|
||||
wire [1:0] alu_op_class = `INST_ALU_OP_CLASS(alu_op);
|
||||
wire is_sub = (alu_op == `INST_ALU_SUB);
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
|
||||
|
@ -57,10 +57,10 @@ module VX_alu_unit #(
|
|||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(*) begin
|
||||
case (alu_op)
|
||||
`ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
|
||||
`ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
|
||||
`ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
|
||||
//`ALU_SLL,
|
||||
`INST_ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
|
||||
`INST_ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
|
||||
`INST_ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
|
||||
//`INST_ALU_SLL,
|
||||
default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
|
||||
endcase
|
||||
end
|
||||
|
@ -81,7 +81,7 @@ module VX_alu_unit #(
|
|||
|
||||
// branch
|
||||
|
||||
wire is_jal = is_br_op && (br_op == `BR_JAL || br_op == `BR_JALR);
|
||||
wire is_jal = is_br_op && (br_op == `INST_BR_JAL || br_op == `INST_BR_JALR);
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : alu_result;
|
||||
|
||||
wire [31:0] br_dest = add_result[alu_req_if.tid];
|
||||
|
@ -90,9 +90,9 @@ module VX_alu_unit #(
|
|||
wire is_less = cmp_result[32];
|
||||
wire is_equal = ~(| cmp_result[31:0]);
|
||||
|
||||
wire br_neg = `BR_NEG(br_op);
|
||||
wire br_less = `BR_LESS(br_op);
|
||||
wire br_static = `BR_STATIC(br_op);
|
||||
wire br_neg = `INST_BR_NEG(br_op);
|
||||
wire br_less = `INST_BR_LESS(br_op);
|
||||
wire br_static = `INST_BR_STATIC(br_op);
|
||||
wire br_taken = ((br_less ? is_less : is_equal) ^ br_neg) | br_static;
|
||||
|
||||
// output
|
||||
|
@ -118,14 +118,14 @@ module VX_alu_unit #(
|
|||
wire mul_wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_data;
|
||||
|
||||
wire is_mul_op = `ALU_IS_MUL(alu_req_if.op_mod);
|
||||
wire is_mul_op = `INST_ALU_IS_MUL(alu_req_if.op_mod);
|
||||
|
||||
VX_muldiv muldiv (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Inputs
|
||||
.alu_op (`MUL_OP(alu_req_if.op_type)),
|
||||
.alu_op (`INST_MUL_OP(alu_req_if.op_type)),
|
||||
.wid_in (alu_req_if.wid),
|
||||
.tmask_in (alu_req_if.tmask),
|
||||
.PC_in (alu_req_if.PC),
|
||||
|
|
|
@ -87,6 +87,7 @@ module VX_cluster #(
|
|||
.CACHE_SIZE (`L2CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`L2CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`L2NUM_BANKS),
|
||||
.NUM_PORTS (`L2NUM_PORTS),
|
||||
.WORD_SIZE (`L2WORD_SIZE),
|
||||
.NUM_REQS (`L2NUM_REQS),
|
||||
.CREQ_SIZE (`L2CREQ_SIZE),
|
||||
|
@ -143,17 +144,19 @@ module VX_cluster #(
|
|||
|
||||
end else begin
|
||||
|
||||
`RESET_RELAY (mem_arb_reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (`NUM_CORES),
|
||||
.DATA_WIDTH (`L2MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`L2MEM_ADDR_WIDTH),
|
||||
.DATA_WIDTH (`DMEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`XMEM_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (1), // Skip 0 for NC flag
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (mem_arb_reset),
|
||||
|
||||
// Core request
|
||||
.req_valid_in (per_core_mem_req_valid),
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
`endif
|
||||
|
||||
`ifndef L1_BLOCK_SIZE
|
||||
`define L1_BLOCK_SIZE (`NUM_THREADS * 4)
|
||||
`define L1_BLOCK_SIZE ((`L2_ENABLE || `L3_ENABLE) ? (`NUM_THREADS * 4) : `MEM_BLOCK_SIZE)
|
||||
`endif
|
||||
|
||||
`ifndef STARTUP_ADDR
|
||||
|
@ -291,7 +291,7 @@
|
|||
`define DNUM_BANKS `NUM_THREADS
|
||||
`endif
|
||||
|
||||
// Number of bank ports
|
||||
// Number of ports per bank
|
||||
`ifndef DNUM_PORTS
|
||||
`define DNUM_PORTS 1
|
||||
`endif
|
||||
|
@ -361,6 +361,11 @@
|
|||
`define L2NUM_BANKS `MIN(`NUM_CORES, 4)
|
||||
`endif
|
||||
|
||||
// Number of ports per bank
|
||||
`ifndef L2NUM_PORTS
|
||||
`define L2NUM_PORTS 1
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef L2CREQ_SIZE
|
||||
`define L2CREQ_SIZE 0
|
||||
|
@ -398,6 +403,11 @@
|
|||
`define L3NUM_BANKS `MIN(`NUM_CLUSTERS, 4)
|
||||
`endif
|
||||
|
||||
// Number of ports per bank
|
||||
`ifndef L3NUM_PORTS
|
||||
`define L3NUM_PORTS 1
|
||||
`endif
|
||||
|
||||
// Core Request Queue Size
|
||||
`ifndef L3CREQ_SIZE
|
||||
`define L3CREQ_SIZE 0
|
||||
|
|
|
@ -42,7 +42,7 @@ module VX_csr_data #(
|
|||
reg [63:0] csr_cycle;
|
||||
reg [63:0] csr_instret;
|
||||
|
||||
reg [`NUM_WARPS-1:0][`FRM_BITS+`FFG_BITS-1:0] fcsr;
|
||||
reg [`NUM_WARPS-1:0][`INST_FRM_BITS+`FFLAGS_BITS-1:0] fcsr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
||||
|
@ -52,16 +52,16 @@ module VX_csr_data #(
|
|||
end
|
||||
|
||||
if (fpu_to_csr_if.write_enable) begin
|
||||
fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0]
|
||||
| fpu_to_csr_if.write_fflags;
|
||||
fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0]
|
||||
| fpu_to_csr_if.write_fflags;
|
||||
end
|
||||
`endif
|
||||
|
||||
if (write_enable) begin
|
||||
case (write_addr)
|
||||
`CSR_FFLAGS: fcsr[write_wid][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
|
||||
`CSR_FRM: fcsr[write_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
|
||||
`CSR_FCSR: fcsr[write_wid] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
|
||||
`CSR_FFLAGS: fcsr[write_wid][`FFLAGS_BITS-1:0] <= write_data[`FFLAGS_BITS-1:0];
|
||||
`CSR_FRM: fcsr[write_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS] <= write_data[`INST_FRM_BITS-1:0];
|
||||
`CSR_FCSR: fcsr[write_wid] <= write_data[`FFLAGS_BITS+`INST_FRM_BITS-1:0];
|
||||
|
||||
`CSR_SATP: csr_satp <= write_data;
|
||||
|
||||
|
@ -104,8 +104,8 @@ module VX_csr_data #(
|
|||
read_data_r = 'x;
|
||||
read_addr_valid_r = 1;
|
||||
case (read_addr)
|
||||
`CSR_FFLAGS : read_data_r = 32'(fcsr[read_wid][`FFG_BITS-1:0]);
|
||||
`CSR_FRM : read_data_r = 32'(fcsr[read_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS]);
|
||||
`CSR_FFLAGS : read_data_r = 32'(fcsr[read_wid][`FFLAGS_BITS-1:0]);
|
||||
`CSR_FRM : read_data_r = 32'(fcsr[read_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS]);
|
||||
`CSR_FCSR : read_data_r = 32'(fcsr[read_wid]);
|
||||
|
||||
`CSR_WTID ,
|
||||
|
@ -222,7 +222,7 @@ module VX_csr_data #(
|
|||
assign read_data = read_data_r;
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign fpu_to_csr_if.read_frm = fcsr[fpu_to_csr_if.read_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS];
|
||||
assign fpu_to_csr_if.read_frm = fcsr[fpu_to_csr_if.read_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS];
|
||||
`endif
|
||||
|
||||
endmodule
|
|
@ -70,14 +70,14 @@ module VX_csr_unit #(
|
|||
always @(*) begin
|
||||
csr_we_s0_unqual = (csr_req_data != 0);
|
||||
case (csr_req_if.op_type)
|
||||
`CSR_RW: begin
|
||||
`INST_CSR_RW: begin
|
||||
csr_updated_data = csr_req_data;
|
||||
csr_we_s0_unqual = 1;
|
||||
end
|
||||
`CSR_RS: begin
|
||||
`INST_CSR_RS: begin
|
||||
csr_updated_data = csr_read_data_qual | csr_req_data;
|
||||
end
|
||||
//`CSR_RC
|
||||
//`INST_CSR_RC
|
||||
default: begin
|
||||
csr_updated_data = csr_read_data_qual & ~csr_req_data;
|
||||
end
|
||||
|
|
|
@ -32,8 +32,8 @@ module VX_decode #(
|
|||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [`EX_BITS-1:0] ex_type;
|
||||
reg [`OP_BITS-1:0] op_type;
|
||||
reg [`MOD_BITS-1:0] op_mod;
|
||||
reg [`INST_OP_BITS-1:0] op_type;
|
||||
reg [`INST_MOD_BITS-1:0] op_mod;
|
||||
reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r;
|
||||
reg [31:0] imm;
|
||||
reg use_rd, use_PC, use_imm;
|
||||
|
@ -79,14 +79,14 @@ module VX_decode #(
|
|||
`INST_I: begin
|
||||
ex_type = `EX_ALU;
|
||||
case (func3)
|
||||
3'h0: op_type = `OP_BITS'(`ALU_ADD);
|
||||
3'h1: op_type = `OP_BITS'(`ALU_SLL);
|
||||
3'h2: op_type = `OP_BITS'(`ALU_SLT);
|
||||
3'h3: op_type = `OP_BITS'(`ALU_SLTU);
|
||||
3'h4: op_type = `OP_BITS'(`ALU_XOR);
|
||||
3'h5: op_type = (func7[5]) ? `OP_BITS'(`ALU_SRA) : `OP_BITS'(`ALU_SRL);
|
||||
3'h6: op_type = `OP_BITS'(`ALU_OR);
|
||||
3'h7: op_type = `OP_BITS'(`ALU_AND);
|
||||
3'h0: op_type = `INST_OP_BITS'(`INST_ALU_ADD);
|
||||
3'h1: op_type = `INST_OP_BITS'(`INST_ALU_SLL);
|
||||
3'h2: op_type = `INST_OP_BITS'(`INST_ALU_SLT);
|
||||
3'h3: op_type = `INST_OP_BITS'(`INST_ALU_SLTU);
|
||||
3'h4: op_type = `INST_OP_BITS'(`INST_ALU_XOR);
|
||||
3'h5: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SRA) : `INST_OP_BITS'(`INST_ALU_SRL);
|
||||
3'h6: op_type = `INST_OP_BITS'(`INST_ALU_OR);
|
||||
3'h7: op_type = `INST_OP_BITS'(`INST_ALU_AND);
|
||||
default:;
|
||||
endcase
|
||||
use_rd = 1;
|
||||
|
@ -100,14 +100,14 @@ module VX_decode #(
|
|||
`ifdef EXT_F_ENABLE
|
||||
if (func7[0]) begin
|
||||
case (func3)
|
||||
3'h0: op_type = `OP_BITS'(`MUL_MUL);
|
||||
3'h1: op_type = `OP_BITS'(`MUL_MULH);
|
||||
3'h2: op_type = `OP_BITS'(`MUL_MULHSU);
|
||||
3'h3: op_type = `OP_BITS'(`MUL_MULHU);
|
||||
3'h4: op_type = `OP_BITS'(`MUL_DIV);
|
||||
3'h5: op_type = `OP_BITS'(`MUL_DIVU);
|
||||
3'h6: op_type = `OP_BITS'(`MUL_REM);
|
||||
3'h7: op_type = `OP_BITS'(`MUL_REMU);
|
||||
3'h0: op_type = `INST_OP_BITS'(`INST_MUL_MUL);
|
||||
3'h1: op_type = `INST_OP_BITS'(`INST_MUL_MULH);
|
||||
3'h2: op_type = `INST_OP_BITS'(`INST_MUL_MULHSU);
|
||||
3'h3: op_type = `INST_OP_BITS'(`INST_MUL_MULHU);
|
||||
3'h4: op_type = `INST_OP_BITS'(`INST_MUL_DIV);
|
||||
3'h5: op_type = `INST_OP_BITS'(`INST_MUL_DIVU);
|
||||
3'h6: op_type = `INST_OP_BITS'(`INST_MUL_REM);
|
||||
3'h7: op_type = `INST_OP_BITS'(`INST_MUL_REMU);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 2;
|
||||
|
@ -115,14 +115,14 @@ module VX_decode #(
|
|||
`endif
|
||||
begin
|
||||
case (func3)
|
||||
3'h0: op_type = (func7[5]) ? `OP_BITS'(`ALU_SUB) : `OP_BITS'(`ALU_ADD);
|
||||
3'h1: op_type = `OP_BITS'(`ALU_SLL);
|
||||
3'h2: op_type = `OP_BITS'(`ALU_SLT);
|
||||
3'h3: op_type = `OP_BITS'(`ALU_SLTU);
|
||||
3'h4: op_type = `OP_BITS'(`ALU_XOR);
|
||||
3'h5: op_type = (func7[5]) ? `OP_BITS'(`ALU_SRA) : `OP_BITS'(`ALU_SRL);
|
||||
3'h6: op_type = `OP_BITS'(`ALU_OR);
|
||||
3'h7: op_type = `OP_BITS'(`ALU_AND);
|
||||
3'h0: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SUB) : `INST_OP_BITS'(`INST_ALU_ADD);
|
||||
3'h1: op_type = `INST_OP_BITS'(`INST_ALU_SLL);
|
||||
3'h2: op_type = `INST_OP_BITS'(`INST_ALU_SLT);
|
||||
3'h3: op_type = `INST_OP_BITS'(`INST_ALU_SLTU);
|
||||
3'h4: op_type = `INST_OP_BITS'(`INST_ALU_XOR);
|
||||
3'h5: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SRA) : `INST_OP_BITS'(`INST_ALU_SRL);
|
||||
3'h6: op_type = `INST_OP_BITS'(`INST_ALU_OR);
|
||||
3'h7: op_type = `INST_OP_BITS'(`INST_ALU_AND);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
@ -133,7 +133,7 @@ module VX_decode #(
|
|||
end
|
||||
`INST_LUI: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`ALU_LUI);
|
||||
op_type = `INST_OP_BITS'(`INST_ALU_LUI);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {upper_imm, 12'(0)};
|
||||
|
@ -142,7 +142,7 @@ module VX_decode #(
|
|||
end
|
||||
`INST_AUIPC: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`ALU_AUIPC);
|
||||
op_type = `INST_OP_BITS'(`INST_ALU_AUIPC);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
|
@ -151,7 +151,7 @@ module VX_decode #(
|
|||
end
|
||||
`INST_JAL: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`BR_JAL);
|
||||
op_type = `INST_OP_BITS'(`INST_BR_JAL);
|
||||
op_mod = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
|
@ -162,7 +162,7 @@ module VX_decode #(
|
|||
end
|
||||
`INST_JALR: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`BR_JALR);
|
||||
op_type = `INST_OP_BITS'(`INST_BR_JALR);
|
||||
op_mod = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
|
@ -174,12 +174,12 @@ module VX_decode #(
|
|||
`INST_B: begin
|
||||
ex_type = `EX_ALU;
|
||||
case (func3)
|
||||
3'h0: op_type = `OP_BITS'(`BR_EQ);
|
||||
3'h1: op_type = `OP_BITS'(`BR_NE);
|
||||
3'h4: op_type = `OP_BITS'(`BR_LT);
|
||||
3'h5: op_type = `OP_BITS'(`BR_GE);
|
||||
3'h6: op_type = `OP_BITS'(`BR_LTU);
|
||||
3'h7: op_type = `OP_BITS'(`BR_GEU);
|
||||
3'h0: op_type = `INST_OP_BITS'(`INST_BR_EQ);
|
||||
3'h1: op_type = `INST_OP_BITS'(`INST_BR_NE);
|
||||
3'h4: op_type = `INST_OP_BITS'(`INST_BR_LT);
|
||||
3'h5: op_type = `INST_OP_BITS'(`INST_BR_GE);
|
||||
3'h6: op_type = `INST_OP_BITS'(`INST_BR_LTU);
|
||||
3'h7: op_type = `INST_OP_BITS'(`INST_BR_GEU);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 1;
|
||||
|
@ -192,12 +192,13 @@ module VX_decode #(
|
|||
end
|
||||
`INST_F: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_mod = `MOD_BITS'(!func3[0]); // data fence
|
||||
op_type = `INST_OP_BITS'(func3[0]);
|
||||
op_mod = `INST_MOD_BITS'(1);
|
||||
end
|
||||
`INST_SYS : begin
|
||||
if (func3[1:0] != 0) begin
|
||||
ex_type = `EX_CSR;
|
||||
op_type = `OP_BITS'(func3[1:0]);
|
||||
op_type = `INST_OP_BITS'(func3[1:0]);
|
||||
use_rd = 1;
|
||||
use_imm = func3[2];
|
||||
imm = 32'(u_12); // addr
|
||||
|
@ -210,11 +211,11 @@ module VX_decode #(
|
|||
end else begin
|
||||
ex_type = `EX_ALU;
|
||||
case (u_12)
|
||||
12'h000: op_type = `OP_BITS'(`BR_ECALL);
|
||||
12'h001: op_type = `OP_BITS'(`BR_EBREAK);
|
||||
12'h302: op_type = `OP_BITS'(`BR_MRET);
|
||||
12'h102: op_type = `OP_BITS'(`BR_SRET);
|
||||
12'h7B2: op_type = `OP_BITS'(`BR_DRET);
|
||||
12'h000: op_type = `INST_OP_BITS'(`INST_BR_ECALL);
|
||||
12'h001: op_type = `INST_OP_BITS'(`INST_BR_EBREAK);
|
||||
12'h302: op_type = `INST_OP_BITS'(`INST_BR_MRET);
|
||||
12'h102: op_type = `INST_OP_BITS'(`INST_BR_SRET);
|
||||
12'h7B2: op_type = `INST_OP_BITS'(`INST_BR_DRET);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 1;
|
||||
|
@ -231,7 +232,7 @@ module VX_decode #(
|
|||
`endif
|
||||
`INST_L: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `OP_BITS'({1'b0, func3});
|
||||
op_type = `INST_OP_BITS'({1'b0, func3});
|
||||
use_rd = 1;
|
||||
imm = {{20{u_12[11]}}, u_12};
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
@ -247,7 +248,7 @@ module VX_decode #(
|
|||
`endif
|
||||
`INST_S: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `OP_BITS'({1'b1, func3});
|
||||
op_type = `INST_OP_BITS'({1'b1, func3});
|
||||
imm = {{20{s_imm[11]}}, s_imm};
|
||||
`USED_IREG (rs1);
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
@ -263,7 +264,7 @@ module VX_decode #(
|
|||
`INST_FNMSUB,
|
||||
`INST_FNMADD: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_type = `OP_BITS'(opcode[3:0]);
|
||||
op_type = `INST_OP_BITS'(opcode[3:0]);
|
||||
op_mod = func3;
|
||||
use_rd = 1;
|
||||
`USED_FREG (rd);
|
||||
|
@ -280,35 +281,35 @@ module VX_decode #(
|
|||
7'h04, // FSUB
|
||||
7'h08, // FMUL
|
||||
7'h0C: begin // FDIV
|
||||
op_type = `OP_BITS'(func7[3:0]);
|
||||
op_type = `INST_OP_BITS'(func7[3:0]);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
7'h2C: begin
|
||||
op_type = `OP_BITS'(`FPU_SQRT);
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_SQRT);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
7'h50: begin
|
||||
op_type = `OP_BITS'(`FPU_CMP);
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_CMP);
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
7'h60: begin
|
||||
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTWUS) : `OP_BITS'(`FPU_CVTWS);
|
||||
op_type = (instr[20]) ? `INST_OP_BITS'(`INST_FPU_CVTWUS) : `INST_OP_BITS'(`INST_FPU_CVTWS);
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
7'h68: begin
|
||||
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTSWU) : `OP_BITS'(`FPU_CVTSW);
|
||||
op_type = (instr[20]) ? `INST_OP_BITS'(`INST_FPU_CVTSWU) : `INST_OP_BITS'(`INST_FPU_CVTSW);
|
||||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
7'h10: begin
|
||||
// FSGNJ=0, FSGNJN=1, FSGNJX=2
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = {1'b0, func3[1:0]};
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
|
@ -316,7 +317,7 @@ module VX_decode #(
|
|||
end
|
||||
7'h14: begin
|
||||
// FMIN=3, FMAX=4
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = func3[0] ? 4 : 3;
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
|
@ -325,10 +326,10 @@ module VX_decode #(
|
|||
7'h70: begin
|
||||
if (func3[0]) begin
|
||||
// FCLASS
|
||||
op_type = `OP_BITS'(`FPU_CLASS);
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_CLASS);
|
||||
end else begin
|
||||
// FMV.X.W=5
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 5;
|
||||
end
|
||||
`USED_IREG (rd);
|
||||
|
@ -336,7 +337,7 @@ module VX_decode #(
|
|||
end
|
||||
7'h78: begin
|
||||
// FMV.W.X=6
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 6;
|
||||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
|
@ -349,26 +350,26 @@ module VX_decode #(
|
|||
ex_type = `EX_GPU;
|
||||
case (func3)
|
||||
3'h0: begin
|
||||
op_type = `OP_BITS'(rs2 ? `GPU_PRED : `GPU_TMC);
|
||||
op_type = rs2[0] ? `INST_OP_BITS'(`INST_GPU_PRED) : `INST_OP_BITS'(`INST_GPU_TMC);
|
||||
is_wstall = 1;
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
3'h1: begin
|
||||
op_type = `OP_BITS'(`GPU_WSPAWN);
|
||||
op_type = `INST_OP_BITS'(`INST_GPU_WSPAWN);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
3'h2: begin
|
||||
op_type = `OP_BITS'(`GPU_SPLIT);
|
||||
op_type = `INST_OP_BITS'(`INST_GPU_SPLIT);
|
||||
is_wstall = 1;
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
3'h3: begin
|
||||
op_type = `OP_BITS'(`GPU_JOIN);
|
||||
op_type = `INST_OP_BITS'(`INST_GPU_JOIN);
|
||||
is_join = 1;
|
||||
end
|
||||
3'h4: begin
|
||||
op_type = `OP_BITS'(`GPU_BAR);
|
||||
op_type = `INST_OP_BITS'(`INST_GPU_BAR);
|
||||
is_wstall = 1;
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
|
|
|
@ -14,8 +14,6 @@
|
|||
|
||||
`define NB_BITS `LOG2UP(`NUM_BARRIERS)
|
||||
|
||||
`define REQS_BITS `LOG2UP(NUM_REQS)
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define NUM_REGS 64
|
||||
`else
|
||||
|
@ -32,6 +30,16 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define EX_NOP 3'h0
|
||||
`define EX_ALU 3'h1
|
||||
`define EX_LSU 3'h2
|
||||
`define EX_CSR 3'h3
|
||||
`define EX_FPU 3'h4
|
||||
`define EX_GPU 3'h5
|
||||
`define EX_BITS 3
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_LUI 7'b0110111
|
||||
`define INST_AUIPC 7'b0010111
|
||||
`define INST_JAL 7'b1101111
|
||||
|
@ -56,138 +64,131 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define FRM_RNE 3'b000 // round to nearest even
|
||||
`define FRM_RTZ 3'b001 // round to zero
|
||||
`define FRM_RDN 3'b010 // round to -inf
|
||||
`define FRM_RUP 3'b011 // round to +inf
|
||||
`define FRM_RMM 3'b100 // round to nearest max magnitude
|
||||
`define FRM_DYN 3'b111 // dynamic mode
|
||||
`define FRM_BITS 3
|
||||
`define INST_FRM_RNE 3'b000 // round to nearest even
|
||||
`define INST_FRM_RTZ 3'b001 // round to zero
|
||||
`define INST_FRM_RDN 3'b010 // round to -inf
|
||||
`define INST_FRM_RUP 3'b011 // round to +inf
|
||||
`define INST_FRM_RMM 3'b100 // round to nearest max magnitude
|
||||
`define INST_FRM_DYN 3'b111 // dynamic mode
|
||||
`define INST_FRM_BITS 3
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define EX_NOP 3'h0
|
||||
`define EX_ALU 3'h1
|
||||
`define EX_LSU 3'h2
|
||||
`define EX_CSR 3'h3
|
||||
`define EX_FPU 3'h4
|
||||
`define EX_GPU 3'h5
|
||||
`define EX_BITS 3
|
||||
|
||||
`define NUM_EXS 6
|
||||
`define NE_BITS `LOG2UP(`NUM_EXS)
|
||||
`define INST_OP_BITS 4
|
||||
`define INST_MOD_BITS 3
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define OP_BITS 4
|
||||
`define MOD_BITS 3
|
||||
`define INST_ALU_ADD 4'b0000
|
||||
`define INST_ALU_LUI 4'b0010
|
||||
`define INST_ALU_AUIPC 4'b0011
|
||||
`define INST_ALU_SLTU 4'b0100
|
||||
`define INST_ALU_SLT 4'b0101
|
||||
`define INST_ALU_SRL 4'b1000
|
||||
`define INST_ALU_SRA 4'b1001
|
||||
`define INST_ALU_SUB 4'b1011
|
||||
`define INST_ALU_AND 4'b1100
|
||||
`define INST_ALU_OR 4'b1101
|
||||
`define INST_ALU_XOR 4'b1110
|
||||
`define INST_ALU_SLL 4'b1111
|
||||
`define INST_ALU_OTHER 4'b0111
|
||||
`define INST_ALU_BITS 4
|
||||
`define INST_ALU_OP(x) x[`INST_ALU_BITS-1:0]
|
||||
`define INST_ALU_OP_CLASS(x) x[3:2]
|
||||
`define INST_ALU_SIGNED(x) x[0]
|
||||
`define INST_ALU_IS_BR(x) x[0]
|
||||
`define INST_ALU_IS_MUL(x) x[1]
|
||||
|
||||
`define ALU_ADD 4'b0000
|
||||
`define ALU_LUI 4'b0010
|
||||
`define ALU_AUIPC 4'b0011
|
||||
`define ALU_SLTU 4'b0100
|
||||
`define ALU_SLT 4'b0101
|
||||
`define ALU_SRL 4'b1000
|
||||
`define ALU_SRA 4'b1001
|
||||
`define ALU_SUB 4'b1011
|
||||
`define ALU_AND 4'b1100
|
||||
`define ALU_OR 4'b1101
|
||||
`define ALU_XOR 4'b1110
|
||||
`define ALU_SLL 4'b1111
|
||||
`define ALU_OTHER 4'b0111
|
||||
`define ALU_BITS 4
|
||||
`define ALU_OP(x) x[`ALU_BITS-1:0]
|
||||
`define ALU_OP_CLASS(x) x[3:2]
|
||||
`define ALU_SIGNED(x) x[0]
|
||||
`define ALU_IS_BR(x) x[0]
|
||||
`define ALU_IS_MUL(x) x[1]
|
||||
`define INST_BR_EQ 4'b0000
|
||||
`define INST_BR_NE 4'b0010
|
||||
`define INST_BR_LTU 4'b0100
|
||||
`define INST_BR_GEU 4'b0110
|
||||
`define INST_BR_LT 4'b0101
|
||||
`define INST_BR_GE 4'b0111
|
||||
`define INST_BR_JAL 4'b1000
|
||||
`define INST_BR_JALR 4'b1001
|
||||
`define INST_BR_ECALL 4'b1010
|
||||
`define INST_BR_EBREAK 4'b1011
|
||||
`define INST_BR_MRET 4'b1100
|
||||
`define INST_BR_SRET 4'b1101
|
||||
`define INST_BR_DRET 4'b1110
|
||||
`define INST_BR_OTHER 4'b1111
|
||||
`define INST_BR_BITS 4
|
||||
`define INST_BR_OP(x) x[`INST_BR_BITS-1:0]
|
||||
`define INST_BR_NEG(x) x[1]
|
||||
`define INST_BR_LESS(x) x[2]
|
||||
`define INST_BR_STATIC(x) x[3]
|
||||
|
||||
`define BR_EQ 4'b0000
|
||||
`define BR_NE 4'b0010
|
||||
`define BR_LTU 4'b0100
|
||||
`define BR_GEU 4'b0110
|
||||
`define BR_LT 4'b0101
|
||||
`define BR_GE 4'b0111
|
||||
`define BR_JAL 4'b1000
|
||||
`define BR_JALR 4'b1001
|
||||
`define BR_ECALL 4'b1010
|
||||
`define BR_EBREAK 4'b1011
|
||||
`define BR_MRET 4'b1100
|
||||
`define BR_SRET 4'b1101
|
||||
`define BR_DRET 4'b1110
|
||||
`define BR_OTHER 4'b1111
|
||||
`define BR_BITS 4
|
||||
`define BR_OP(x) x[`BR_BITS-1:0]
|
||||
`define BR_NEG(x) x[1]
|
||||
`define BR_LESS(x) x[2]
|
||||
`define BR_STATIC(x) x[3]
|
||||
`define INST_MUL_MUL 3'h0
|
||||
`define INST_MUL_MULH 3'h1
|
||||
`define INST_MUL_MULHSU 3'h2
|
||||
`define INST_MUL_MULHU 3'h3
|
||||
`define INST_MUL_DIV 3'h4
|
||||
`define INST_MUL_DIVU 3'h5
|
||||
`define INST_MUL_REM 3'h6
|
||||
`define INST_MUL_REMU 3'h7
|
||||
`define INST_MUL_BITS 3
|
||||
`define INST_MUL_OP(x) x[`INST_MUL_BITS-1:0]
|
||||
`define INST_MUL_IS_DIV(x) x[2]
|
||||
|
||||
`define MUL_MUL 3'h0
|
||||
`define MUL_MULH 3'h1
|
||||
`define MUL_MULHSU 3'h2
|
||||
`define MUL_MULHU 3'h3
|
||||
`define MUL_DIV 3'h4
|
||||
`define MUL_DIVU 3'h5
|
||||
`define MUL_REM 3'h6
|
||||
`define MUL_REMU 3'h7
|
||||
`define MUL_BITS 3
|
||||
`define MUL_OP(x) x[`MUL_BITS-1:0]
|
||||
`define MUL_IS_DIV(x) x[2]
|
||||
`define INST_FMT_B 3'b000
|
||||
`define INST_FMT_H 3'b001
|
||||
`define INST_FMT_W 3'b010
|
||||
`define INST_FMT_BU 3'b100
|
||||
`define INST_FMT_HU 3'b101
|
||||
|
||||
`define FMT_B 3'b000
|
||||
`define FMT_H 3'b001
|
||||
`define FMT_W 3'b010
|
||||
`define FMT_BU 3'b100
|
||||
`define FMT_HU 3'b101
|
||||
`define INST_LSU_LB 4'b0000
|
||||
`define INST_LSU_LH 4'b0001
|
||||
`define INST_LSU_LW 4'b0010
|
||||
`define INST_LSU_LBU 4'b0100
|
||||
`define INST_LSU_LHU 4'b0101
|
||||
`define INST_LSU_SB 4'b1000
|
||||
`define INST_LSU_SH 4'b1001
|
||||
`define INST_LSU_SW 4'b1010
|
||||
`define INST_LSU_BITS 4
|
||||
`define INST_LSU_FMT(x) x[2:0]
|
||||
`define INST_LSU_WSIZE(x) x[1:0]
|
||||
`define INST_LSU_OP(x) x[`INST_LSU_BITS-1:0]
|
||||
`define INST_LSU_IS_FENCE(x) x[0]
|
||||
|
||||
`define LSU_LB 4'b0000
|
||||
`define LSU_LH 4'b0001
|
||||
`define LSU_LW 4'b0010
|
||||
`define LSU_LBU 4'b0100
|
||||
`define LSU_LHU 4'b0101
|
||||
`define LSU_SB 4'b1000
|
||||
`define LSU_SH 4'b1001
|
||||
`define LSU_SW 4'b1010
|
||||
`define LSU_BITS 4
|
||||
`define LSU_FMT(x) x[2:0]
|
||||
`define LSU_WSIZE(x) x[1:0]
|
||||
`define LSU_OP(x) x[`LSU_BITS-1:0]
|
||||
`define LSU_IS_FENCE(x) x[0]
|
||||
`define INST_FENCE_BITS 1
|
||||
`define INST_FENCE_D 1'h0
|
||||
`define INST_FENCE_I 1'h1
|
||||
|
||||
`define CSR_RW 2'h1
|
||||
`define CSR_RS 2'h2
|
||||
`define CSR_RC 2'h3
|
||||
`define CSR_OTHER 2'h0
|
||||
`define CSR_BITS 2
|
||||
`define CSR_OP(x) x[`CSR_BITS-1:0]
|
||||
`define INST_CSR_RW 2'h1
|
||||
`define INST_CSR_RS 2'h2
|
||||
`define INST_CSR_RC 2'h3
|
||||
`define INST_CSR_OTHER 2'h0
|
||||
`define INST_CSR_BITS 2
|
||||
`define INST_CSR_OP(x) x[`INST_CSR_BITS-1:0]
|
||||
|
||||
`define FPU_ADD 4'h0
|
||||
`define FPU_SUB 4'h4
|
||||
`define FPU_MUL 4'h8
|
||||
`define FPU_DIV 4'hC
|
||||
`define FPU_CVTWS 4'h1 // FCVT.W.S
|
||||
`define FPU_CVTWUS 4'h5 // FCVT.WU.S
|
||||
`define FPU_CVTSW 4'h9 // FCVT.S.W
|
||||
`define FPU_CVTSWU 4'hD // FCVT.S.WU
|
||||
`define FPU_SQRT 4'h2
|
||||
`define FPU_CLASS 4'h6
|
||||
`define FPU_CMP 4'hA
|
||||
`define FPU_MISC 4'hE // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
|
||||
`define FPU_MADD 4'h3
|
||||
`define FPU_MSUB 4'h7
|
||||
`define FPU_NMSUB 4'hB
|
||||
`define FPU_NMADD 4'hF
|
||||
`define FPU_BITS 4
|
||||
`define FPU_OP(x) x[`FPU_BITS-1:0]
|
||||
`define INST_FPU_ADD 4'h0
|
||||
`define INST_FPU_SUB 4'h4
|
||||
`define INST_FPU_MUL 4'h8
|
||||
`define INST_FPU_DIV 4'hC
|
||||
`define INST_FPU_CVTWS 4'h1 // FCVT.W.S
|
||||
`define INST_FPU_CVTWUS 4'h5 // FCVT.WU.S
|
||||
`define INST_FPU_CVTSW 4'h9 // FCVT.S.W
|
||||
`define INST_FPU_CVTSWU 4'hD // FCVT.S.WU
|
||||
`define INST_FPU_SQRT 4'h2
|
||||
`define INST_FPU_CLASS 4'h6
|
||||
`define INST_FPU_CMP 4'hA
|
||||
`define INST_FPU_MISC 4'hE // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
|
||||
`define INST_FPU_MADD 4'h3
|
||||
`define INST_FPU_MSUB 4'h7
|
||||
`define INST_FPU_NMSUB 4'hB
|
||||
`define INST_FPU_NMADD 4'hF
|
||||
`define INST_FPU_BITS 4
|
||||
`define INST_FPU_OP(x) x[`INST_FPU_BITS-1:0]
|
||||
|
||||
`define GPU_TMC 3'h0
|
||||
`define GPU_WSPAWN 3'h1
|
||||
`define GPU_SPLIT 3'h2
|
||||
`define GPU_JOIN 3'h3
|
||||
`define GPU_BAR 3'h4
|
||||
`define GPU_PRED 3'h5
|
||||
`define GPU_BITS 3
|
||||
`define GPU_OP(x) x[`GPU_BITS-1:0]
|
||||
`define INST_GPU_TMC 3'h0
|
||||
`define INST_GPU_WSPAWN 3'h1
|
||||
`define INST_GPU_SPLIT 3'h2
|
||||
`define INST_GPU_JOIN 3'h3
|
||||
`define INST_GPU_BAR 3'h4
|
||||
`define INST_GPU_PRED 3'h5
|
||||
`define INST_GPU_BITS 3
|
||||
`define INST_GPU_OP(x) x[`INST_GPU_BITS-1:0]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -246,14 +247,14 @@
|
|||
// Cache ID
|
||||
`define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
|
||||
|
||||
// Block size in bytes
|
||||
`define ICACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `MEM_BLOCK_SIZE)
|
||||
// Number of banks
|
||||
`define INUM_BANKS 1
|
||||
|
||||
// Word size in bytes
|
||||
`define IWORD_SIZE 4
|
||||
|
||||
// Number of banks
|
||||
`define INUM_BANKS 1
|
||||
// Block size in bytes
|
||||
`define ICACHE_LINE_SIZE `L1_BLOCK_SIZE
|
||||
|
||||
// Core request address bits
|
||||
`define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE))
|
||||
|
@ -284,12 +285,12 @@
|
|||
// Cache ID
|
||||
`define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
|
||||
|
||||
// Block size in bytes
|
||||
`define DCACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `MEM_BLOCK_SIZE)
|
||||
|
||||
// Word size in bytes
|
||||
`define DWORD_SIZE 4
|
||||
|
||||
// Block size in bytes
|
||||
`define DCACHE_LINE_SIZE `L1_BLOCK_SIZE
|
||||
|
||||
// Core request address bits
|
||||
`define DCORE_ADDR_WIDTH (32-`CLOG2(`DWORD_SIZE))
|
||||
|
||||
|
@ -336,12 +337,12 @@
|
|||
// Cache ID
|
||||
`define L2CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID)
|
||||
|
||||
// Block size in bytes
|
||||
`define L2CACHE_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
|
||||
// Word size in bytes
|
||||
`define L2WORD_SIZE `DCACHE_LINE_SIZE
|
||||
|
||||
// Block size in bytes
|
||||
`define L2CACHE_LINE_SIZE (`L2_ENABLE ? `MEM_BLOCK_SIZE : `L2WORD_SIZE)
|
||||
|
||||
// Input request tag bits
|
||||
`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
|
||||
|
||||
|
@ -368,12 +369,12 @@
|
|||
// Cache ID
|
||||
`define L3CACHE_ID 0
|
||||
|
||||
// Block size in bytes
|
||||
`define L3CACHE_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
|
||||
// Word size in bytes
|
||||
`define L3WORD_SIZE `L2CACHE_LINE_SIZE
|
||||
|
||||
// Block size in bytes
|
||||
`define L3CACHE_LINE_SIZE (`L3_ENABLE ? `MEM_BLOCK_SIZE : `L3WORD_SIZE)
|
||||
|
||||
// Input request tag bits
|
||||
`define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
|
||||
|
||||
|
|
|
@ -133,8 +133,8 @@ module VX_execute #(
|
|||
// special workaround to get RISC-V tests Pass/Fail status
|
||||
wire ebreak /* verilator public */;
|
||||
assign ebreak = alu_req_if.valid && alu_req_if.ready
|
||||
&& `ALU_IS_BR(alu_req_if.op_mod)
|
||||
&& (`BR_OP(alu_req_if.op_type) == `BR_EBREAK
|
||||
|| `BR_OP(alu_req_if.op_type) == `BR_ECALL);
|
||||
&& `INST_ALU_IS_BR(alu_req_if.op_mod)
|
||||
&& (`INST_BR_OP(alu_req_if.op_type) == `INST_BR_EBREAK
|
||||
|| `INST_BR_OP(alu_req_if.op_type) == `INST_BR_ECALL);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -61,7 +61,7 @@ module VX_fpu_unit #(
|
|||
|
||||
// resolve dynamic FRM from CSR
|
||||
assign fpu_to_csr_if.read_wid = fpu_req_if.wid;
|
||||
wire [`FRM_BITS-1:0] fpu_frm = (fpu_req_if.op_mod == `FRM_DYN) ? fpu_to_csr_if.read_frm : fpu_req_if.op_mod;
|
||||
wire [`INST_FRM_BITS-1:0] fpu_frm = (fpu_req_if.op_mod == `INST_FRM_DYN) ? fpu_to_csr_if.read_frm : fpu_req_if.op_mod;
|
||||
|
||||
`ifdef FPU_DPI
|
||||
|
||||
|
@ -179,7 +179,7 @@ module VX_fpu_unit #(
|
|||
wire stall_out = ~fpu_commit_if.ready && fpu_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFG_BITS),
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFLAGS_BITS),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
|
|
|
@ -1,37 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
|
||||
module VX_gpr_ram_f #(
|
||||
parameter DATAW = 1,
|
||||
parameter DEPTH = 1,
|
||||
parameter ADDRW = $clog2(DEPTH)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire wren,
|
||||
input wire [ADDRW-1:0] waddr,
|
||||
input wire [DATAW-1:0] wdata,
|
||||
input wire [ADDRW-1:0] raddr1,
|
||||
input wire [ADDRW-1:0] raddr2,
|
||||
input wire [ADDRW-1:0] raddr3,
|
||||
output wire [DATAW-1:0] rdata1,
|
||||
output wire [DATAW-1:0] rdata2,
|
||||
output wire [DATAW-1:0] rdata3
|
||||
);
|
||||
reg [DATAW-1:0] mem [DEPTH-1:0];
|
||||
|
||||
initial mem = '{default: 0};
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren) begin
|
||||
mem [waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
|
||||
assign rdata1 = mem [raddr1];
|
||||
assign rdata2 = mem [raddr2];
|
||||
assign rdata3 = mem [raddr3];
|
||||
|
||||
endmodule
|
||||
|
||||
`TRACING_ON
|
|
@ -1,34 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
|
||||
module VX_gpr_ram_i #(
|
||||
parameter DATAW = 1,
|
||||
parameter DEPTH = 1,
|
||||
parameter ADDRW = $clog2(DEPTH)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire wren,
|
||||
input wire [ADDRW-1:0] waddr,
|
||||
input wire [DATAW-1:0] wdata,
|
||||
input wire [ADDRW-1:0] raddr1,
|
||||
input wire [ADDRW-1:0] raddr2,
|
||||
output wire [DATAW-1:0] rdata1,
|
||||
output wire [DATAW-1:0] rdata2
|
||||
);
|
||||
reg [DATAW-1:0] mem [DEPTH-1:0];
|
||||
|
||||
initial mem = '{default: 0};
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren) begin
|
||||
mem [waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
|
||||
assign rdata1 = mem [raddr1];
|
||||
assign rdata2 = mem [raddr2];
|
||||
|
||||
endmodule
|
||||
|
||||
`TRACING_ON
|
|
@ -17,69 +17,100 @@ module VX_gpr_stage #(
|
|||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS;
|
||||
|
||||
// ensure r0 never gets written, which can happen before the reset
|
||||
wire write_enable = writeback_if.valid && (writeback_if.rd != 0);
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS;
|
||||
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3;
|
||||
wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2, raddr3;
|
||||
|
||||
assign waddr = {writeback_if.wid, writeback_if.rd};
|
||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
||||
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
|
||||
wire [(`NUM_THREADS * 4)-1:0] wren;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign wren [i * 4 +: 4] = {4{write_enable && writeback_if.tmask[i]}};
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
VX_gpr_ram_f #(
|
||||
.DATAW (32),
|
||||
.DEPTH (RAM_DEPTH)
|
||||
) gpr_ram_f (
|
||||
.clk (clk),
|
||||
.wren (write_enable && writeback_if.tmask[i]),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data[i]),
|
||||
.raddr1 (raddr1),
|
||||
.raddr2 (raddr2),
|
||||
.raddr3 (raddr3),
|
||||
.rdata1 (rdata1[i]),
|
||||
.rdata2 (rdata2[i]),
|
||||
.rdata3 (rdata3[i])
|
||||
);
|
||||
reg [`NUM_THREADS-1:0][31:0] last_wdata;
|
||||
reg [$clog2(RAM_SIZE)-1:0] last_waddr;
|
||||
reg [`NUM_THREADS-1:0] last_wmask;
|
||||
|
||||
always @(posedge clk) begin
|
||||
last_wdata <= writeback_if.data;
|
||||
last_wmask <= {`NUM_THREADS{write_enable}} & writeback_if.tmask;
|
||||
last_waddr <= waddr;
|
||||
end
|
||||
|
||||
assign gpr_rsp_if.rs1_data = rdata1;
|
||||
assign gpr_rsp_if.rs2_data = rdata2;
|
||||
assign gpr_rsp_if.rs3_data = rdata3;
|
||||
`else
|
||||
localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS;
|
||||
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2;
|
||||
wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2;
|
||||
|
||||
wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2;
|
||||
|
||||
assign waddr = {writeback_if.wid, writeback_if.rd};
|
||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
||||
`UNUSED_VAR (gpr_req_if.rs3)
|
||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
VX_gpr_ram_i #(
|
||||
.DATAW (32),
|
||||
.DEPTH (RAM_DEPTH)
|
||||
) gpr_ram_i (
|
||||
.clk (clk),
|
||||
.wren (write_enable && writeback_if.tmask[i]),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data[i]),
|
||||
.raddr1 (raddr1),
|
||||
.raddr2 (raddr2),
|
||||
.rdata1 (rdata1[i]),
|
||||
.rdata2 (rdata2[i])
|
||||
);
|
||||
VX_dp_ram #(
|
||||
.DATAW (32 * `NUM_THREADS),
|
||||
.SIZE (RAM_SIZE),
|
||||
.BYTEENW (`NUM_THREADS * 4),
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0),
|
||||
.NO_RWCHECK (1)
|
||||
) dp_ram1 (
|
||||
.clk (clk),
|
||||
.wren (wren),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data),
|
||||
.rden (1'b1),
|
||||
.raddr (raddr1),
|
||||
.rdata (rdata1)
|
||||
);
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (32 * `NUM_THREADS),
|
||||
.SIZE (RAM_SIZE),
|
||||
.BYTEENW (`NUM_THREADS * 4),
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0),
|
||||
.NO_RWCHECK (1)
|
||||
) dp_ram2 (
|
||||
.clk (clk),
|
||||
.wren (wren),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data),
|
||||
.rden (1'b1),
|
||||
.raddr (raddr2),
|
||||
.rdata (rdata2)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign gpr_rsp_if.rs1_data[i] = (last_wmask[i] && (raddr1 == last_waddr)) ? last_wdata[i] : rdata1[i];
|
||||
assign gpr_rsp_if.rs2_data[i] = (last_wmask[i] && (raddr2 == last_waddr)) ? last_wdata[i] : rdata2[i];
|
||||
end
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire [`NUM_THREADS-1:0][31:0] rdata3;
|
||||
wire [$clog2(RAM_SIZE)-1:0] raddr3;
|
||||
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
|
||||
|
||||
assign gpr_rsp_if.rs1_data = rdata1;
|
||||
assign gpr_rsp_if.rs2_data = rdata2;
|
||||
assign gpr_rsp_if.rs3_data = 0;
|
||||
VX_dp_ram #(
|
||||
.DATAW (32 * `NUM_THREADS),
|
||||
.SIZE (RAM_SIZE),
|
||||
.BYTEENW (`NUM_THREADS * 4),
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0),
|
||||
.NO_RWCHECK (1)
|
||||
) dp_ram3 (
|
||||
.clk (clk),
|
||||
.wren (wren),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data),
|
||||
.rden (1'b1),
|
||||
.raddr (raddr3),
|
||||
.rdata (rdata3)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign gpr_rsp_if.rs3_data[i] = (last_wmask[i] && (raddr3 == last_waddr)) ? last_wdata[i] : rdata3[i];
|
||||
end
|
||||
`else
|
||||
`UNUSED_VAR (gpr_req_if.rs3)
|
||||
assign gpr_rsp_if.rs3_data = 'x;
|
||||
`endif
|
||||
|
||||
assign writeback_if.ready = 1'b1;
|
||||
|
|
|
@ -19,18 +19,17 @@ module VX_gpu_unit #(
|
|||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (gpu_req_if.op_mod)
|
||||
|
||||
gpu_tmc_t tmc;
|
||||
gpu_wspawn_t wspawn;
|
||||
gpu_barrier_t barrier;
|
||||
gpu_split_t split;
|
||||
|
||||
wire is_wspawn = (gpu_req_if.op_type == `GPU_WSPAWN);
|
||||
wire is_tmc = (gpu_req_if.op_type == `GPU_TMC);
|
||||
wire is_split = (gpu_req_if.op_type == `GPU_SPLIT);
|
||||
wire is_bar = (gpu_req_if.op_type == `GPU_BAR);
|
||||
wire is_pred = (gpu_req_if.op_type == `GPU_PRED);
|
||||
wire is_wspawn = (gpu_req_if.op_type == `INST_GPU_WSPAWN);
|
||||
wire is_tmc = (gpu_req_if.op_type == `INST_GPU_TMC);
|
||||
wire is_split = (gpu_req_if.op_type == `INST_GPU_SPLIT);
|
||||
wire is_bar = (gpu_req_if.op_type == `INST_GPU_BAR);
|
||||
wire is_pred = (gpu_req_if.op_type == `INST_GPU_PRED);
|
||||
|
||||
// tmc
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ module VX_ibuffer #(
|
|||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + `NUM_REGS;
|
||||
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `INST_OP_BITS + `INST_FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + `NUM_REGS;
|
||||
localparam ADDRW = $clog2(`IBUF_SIZE+1);
|
||||
localparam NWARPSW = $clog2(`NUM_WARPS+1);
|
||||
|
||||
|
|
|
@ -33,16 +33,17 @@ module VX_icache_stage #(
|
|||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW(32 + `NUM_THREADS),
|
||||
.SIZE(`NUM_WARPS),
|
||||
.FASTRAM(1)
|
||||
.DATAW (32 + `NUM_THREADS),
|
||||
.SIZE (`NUM_WARPS),
|
||||
.LUTRAM (1)
|
||||
) req_metadata (
|
||||
.clk(clk),
|
||||
.waddr(req_tag),
|
||||
.raddr(rsp_tag),
|
||||
.wren(icache_req_fire),
|
||||
.din({ifetch_req_if.PC, ifetch_req_if.tmask}),
|
||||
.dout({rsp_PC, rsp_tmask})
|
||||
.clk (clk),
|
||||
.wren (icache_req_fire),
|
||||
.waddr (req_tag),
|
||||
.wdata ({ifetch_req_if.PC, ifetch_req_if.tmask}),
|
||||
.rden (1'b1),
|
||||
.raddr (rsp_tag),
|
||||
.rdata ({rsp_PC, rsp_tmask})
|
||||
);
|
||||
|
||||
`RUNTIME_ASSERT((!ifetch_req_if.valid || ifetch_req_if.PC >= `STARTUP_ADDR),
|
||||
|
|
|
@ -26,13 +26,12 @@ module VX_instr_demux (
|
|||
`endif
|
||||
wire gpu_req_ready;
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N (`NUM_THREADS)
|
||||
VX_lzc #(
|
||||
.WIDTH (`NUM_THREADS)
|
||||
) tid_select (
|
||||
.data_in (ibuffer_if.tmask),
|
||||
.index (tid),
|
||||
`UNUSED_PIN (onehot),
|
||||
`UNUSED_PIN (valid_out)
|
||||
.in_i (ibuffer_if.tmask),
|
||||
.cnt_o (tid),
|
||||
`UNUSED_PIN (valid_o)
|
||||
);
|
||||
|
||||
wire [31:0] next_PC = ibuffer_if.PC + 4;
|
||||
|
@ -42,15 +41,15 @@ module VX_instr_demux (
|
|||
wire alu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_ALU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.OUTPUT_REG (1)
|
||||
) alu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (alu_req_valid),
|
||||
.ready_in (alu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, `ALU_OP(ibuffer_if.op_type), ibuffer_if.op_mod, ibuffer_if.imm, ibuffer_if.use_PC, ibuffer_if.use_imm, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.op_mod, alu_req_if.imm, alu_req_if.use_PC, alu_req_if.use_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, `INST_ALU_OP(ibuffer_if.op_type), ibuffer_if.op_mod, ibuffer_if.imm, ibuffer_if.use_PC, ibuffer_if.use_imm, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.op_mod, alu_req_if.imm, alu_req_if.use_PC, alu_req_if.use_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||
.valid_out (alu_req_if.valid),
|
||||
.ready_out (alu_req_if.ready)
|
||||
);
|
||||
|
@ -58,18 +57,18 @@ module VX_instr_demux (
|
|||
// lsu unit
|
||||
|
||||
wire lsu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_LSU);
|
||||
wire lsu_is_fence = `LSU_IS_FENCE(ibuffer_if.op_mod);
|
||||
wire lsu_is_fence = `INST_LSU_IS_FENCE(ibuffer_if.op_mod);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
||||
.OUTPUT_REG (1)
|
||||
) lsu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (lsu_req_valid),
|
||||
.ready_in (lsu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `LSU_OP(ibuffer_if.op_type), lsu_is_fence, ibuffer_if.imm, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.is_fence, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `INST_LSU_OP(ibuffer_if.op_type), lsu_is_fence, ibuffer_if.imm, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.is_fence, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
|
||||
.valid_out (lsu_req_if.valid),
|
||||
.ready_out (lsu_req_if.ready)
|
||||
);
|
||||
|
@ -79,15 +78,15 @@ module VX_instr_demux (
|
|||
wire csr_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_CSR);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
|
||||
.OUTPUT_REG (1)
|
||||
) csr_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (csr_req_valid),
|
||||
.ready_in (csr_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `CSR_OP(ibuffer_if.op_type), ibuffer_if.imm[`CSR_ADDR_BITS-1:0], ibuffer_if.rd, ibuffer_if.wb, ibuffer_if.use_imm, ibuffer_if.rs1, gpr_rsp_if.rs1_data[0]}),
|
||||
.data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.use_imm, csr_req_if.rs1, csr_req_if.rs1_data}),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `INST_CSR_OP(ibuffer_if.op_type), ibuffer_if.imm[`CSR_ADDR_BITS-1:0], ibuffer_if.rd, ibuffer_if.wb, ibuffer_if.use_imm, ibuffer_if.rs1, gpr_rsp_if.rs1_data[0]}),
|
||||
.data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.use_imm, csr_req_if.rs1, csr_req_if.rs1_data}),
|
||||
.valid_out (csr_req_if.valid),
|
||||
.ready_out (csr_req_if.ready)
|
||||
);
|
||||
|
@ -98,15 +97,15 @@ module VX_instr_demux (
|
|||
wire fpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_FPU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||
.OUTPUT_REG (1)
|
||||
) fpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (fpu_req_valid),
|
||||
.ready_in (fpu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `FPU_OP(ibuffer_if.op_type), ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `INST_FPU_OP(ibuffer_if.op_type), ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
|
||||
.valid_out (fpu_req_if.valid),
|
||||
.ready_out (fpu_req_if.ready)
|
||||
);
|
||||
|
@ -119,15 +118,15 @@ module VX_instr_demux (
|
|||
wire gpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_GPU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + + `NT_BITS + (`NUM_THREADS * 32 + 32)),
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `NR_BITS + 1 + + `NT_BITS + (`NUM_THREADS * 32 + 32)),
|
||||
.OUTPUT_REG (1)
|
||||
) gpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (gpu_req_valid),
|
||||
.ready_in (gpu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, `GPU_OP(ibuffer_if.op_type), ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
|
||||
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, `INST_GPU_OP(ibuffer_if.op_type), ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
|
||||
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
|
||||
.valid_out (gpu_req_if.valid),
|
||||
.ready_out (gpu_req_if.ready)
|
||||
);
|
||||
|
|
|
@ -38,17 +38,17 @@ module VX_ipdom_stack #(
|
|||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW(WIDTH * 2),
|
||||
.SIZE(DEPTH),
|
||||
.RWCHECK(1),
|
||||
.FASTRAM(1)
|
||||
.DATAW (WIDTH * 2),
|
||||
.SIZE (DEPTH),
|
||||
.LUTRAM (1)
|
||||
) store (
|
||||
.clk(clk),
|
||||
.waddr(wr_ptr),
|
||||
.raddr(rd_ptr),
|
||||
.wren(push),
|
||||
.din({q2, q1}),
|
||||
.dout({d2, d1})
|
||||
.clk (clk),
|
||||
.wren (push),
|
||||
.waddr (wr_ptr),
|
||||
.wdata ({q2, q1}),
|
||||
.rden (1'b1),
|
||||
.raddr (rd_ptr),
|
||||
.rdata ({d2, d1})
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
|
|
@ -30,11 +30,15 @@ module VX_issue #(
|
|||
|
||||
wire scoreboard_delay;
|
||||
|
||||
`RESET_RELAY (ibuf_reset);
|
||||
`RESET_RELAY (gpr_reset);
|
||||
`RESET_RELAY (demux_reset);
|
||||
|
||||
VX_ibuffer #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) ibuffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (ibuf_reset),
|
||||
.decode_if (decode_if),
|
||||
.ibuffer_if (ibuffer_if)
|
||||
);
|
||||
|
@ -58,7 +62,7 @@ module VX_issue #(
|
|||
.CORE_ID(CORE_ID)
|
||||
) gpr_stage (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (gpr_reset),
|
||||
.writeback_if (writeback_if),
|
||||
.gpr_req_if (gpr_req_if),
|
||||
.gpr_rsp_if (gpr_rsp_if)
|
||||
|
@ -80,7 +84,7 @@ module VX_issue #(
|
|||
|
||||
VX_instr_demux instr_demux (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (demux_reset),
|
||||
.ibuffer_if (execute_if),
|
||||
.gpr_rsp_if (gpr_rsp_if),
|
||||
.alu_req_if (alu_req_if),
|
||||
|
|
|
@ -33,7 +33,7 @@ module VX_lsu_unit #(
|
|||
wire req_valid;
|
||||
wire [`NUM_THREADS-1:0] req_tmask;
|
||||
wire [`NUM_THREADS-1:0][31:0] req_addr;
|
||||
wire [`LSU_BITS-1:0] req_type;
|
||||
wire [`INST_LSU_BITS-1:0] req_type;
|
||||
wire [`NUM_THREADS-1:0][31:0] req_data;
|
||||
wire [`NR_BITS-1:0] req_rd;
|
||||
wire req_wb;
|
||||
|
@ -80,7 +80,7 @@ module VX_lsu_unit #(
|
|||
wire lsu_valid = lsu_req_if.valid && ~fence_wait;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * ADDR_TYPEW) + `LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.DATAW (1 + 1 + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * ADDR_TYPEW) + `INST_LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.RESETW (1)
|
||||
) req_pipe_reg (
|
||||
.clk (clk),
|
||||
|
@ -97,7 +97,7 @@ module VX_lsu_unit #(
|
|||
wire [31:0] rsp_pc;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire rsp_wb;
|
||||
wire [`LSU_BITS-1:0] rsp_type;
|
||||
wire [`INST_LSU_BITS-1:0] rsp_type;
|
||||
wire rsp_is_dup;
|
||||
|
||||
`UNUSED_VAR (rsp_type)
|
||||
|
@ -132,8 +132,8 @@ module VX_lsu_unit #(
|
|||
assign mbuf_raddr = dcache_rsp_if.tag[ADDR_TYPEW +: `LSUQ_ADDR_BITS];
|
||||
|
||||
VX_index_buffer #(
|
||||
.DATAW (`NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `LSU_BITS + (`NUM_THREADS * REQ_ASHIFT) + 1),
|
||||
.SIZE (`LSUQ_SIZE)
|
||||
.DATAW (`NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `INST_LSU_BITS + (`NUM_THREADS * REQ_ASHIFT) + 1),
|
||||
.SIZE (`LSUQ_SIZE)
|
||||
) req_metadata (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -202,7 +202,7 @@ module VX_lsu_unit #(
|
|||
|
||||
always @(*) begin
|
||||
mem_req_byteen = {4{req_wb}};
|
||||
case (`LSU_WSIZE(req_type))
|
||||
case (`INST_LSU_WSIZE(req_type))
|
||||
0: mem_req_byteen[req_offset[i]] = 1;
|
||||
1: begin
|
||||
mem_req_byteen[req_offset[i]] = 1;
|
||||
|
@ -261,11 +261,11 @@ module VX_lsu_unit #(
|
|||
wire [7:0] rsp_data8 = rsp_offset[i][0] ? rsp_data16[15:8] : rsp_data16[7:0];
|
||||
|
||||
always @(*) begin
|
||||
case (`LSU_FMT(rsp_type))
|
||||
`FMT_B: rsp_data[i] = 32'(signed'(rsp_data8));
|
||||
`FMT_H: rsp_data[i] = 32'(signed'(rsp_data16));
|
||||
`FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data8));
|
||||
`FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data16));
|
||||
case (`INST_LSU_FMT(rsp_type))
|
||||
`INST_FMT_B: rsp_data[i] = 32'(signed'(rsp_data8));
|
||||
`INST_FMT_H: rsp_data[i] = 32'(signed'(rsp_data16));
|
||||
`INST_FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data8));
|
||||
`INST_FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data16));
|
||||
default: rsp_data[i] = rsp_data32;
|
||||
endcase
|
||||
end
|
||||
|
|
|
@ -65,6 +65,7 @@ module VX_mem_unit # (
|
|||
|
||||
`RESET_RELAY (icache_reset);
|
||||
`RESET_RELAY (dcache_reset);
|
||||
`RESET_RELAY (mem_arb_reset);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`ICACHE_ID),
|
||||
|
@ -197,6 +198,9 @@ module VX_mem_unit # (
|
|||
.TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE)
|
||||
) smem_rsp_if();
|
||||
|
||||
`RESET_RELAY (smem_arb_reset);
|
||||
`RESET_RELAY (smem_reset);
|
||||
|
||||
VX_smem_arb #(
|
||||
.NUM_REQS (2),
|
||||
.LANES (`NUM_THREADS),
|
||||
|
@ -207,7 +211,7 @@ module VX_mem_unit # (
|
|||
.BUFFERED_RSP (1)
|
||||
) smem_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (smem_arb_reset),
|
||||
|
||||
// input request
|
||||
.req_valid_in (dcache_req_if.valid),
|
||||
|
@ -242,8 +246,6 @@ module VX_mem_unit # (
|
|||
.rsp_ready_out (dcache_rsp_if.ready)
|
||||
);
|
||||
|
||||
`RESET_RELAY (smem_reset);
|
||||
|
||||
VX_shared_mem #(
|
||||
.CACHE_ID (`SCACHE_ID),
|
||||
.CACHE_SIZE (`SMEM_SIZE),
|
||||
|
@ -312,7 +314,7 @@ module VX_mem_unit # (
|
|||
.BUFFERED_RSP (2)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (mem_arb_reset),
|
||||
|
||||
// Source request
|
||||
.req_valid_in ({dcache_mem_req_if.valid, icache_mem_req_if.valid}),
|
||||
|
|
|
@ -5,7 +5,7 @@ module VX_muldiv (
|
|||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
input wire [`MUL_BITS-1:0] alu_op,
|
||||
input wire [`INST_MUL_BITS-1:0] alu_op,
|
||||
input wire [`NW_BITS-1:0] wid_in,
|
||||
input wire [`NUM_THREADS-1:0] tmask_in,
|
||||
input wire [31:0] PC_in,
|
||||
|
@ -29,7 +29,7 @@ module VX_muldiv (
|
|||
input wire ready_out
|
||||
);
|
||||
|
||||
wire is_div_op = `MUL_IS_DIV(alu_op);
|
||||
wire is_div_op = `INST_MUL_IS_DIV(alu_op);
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_result;
|
||||
wire [`NW_BITS-1:0] mul_wid_out;
|
||||
|
@ -44,9 +44,9 @@ module VX_muldiv (
|
|||
wire mul_valid_in = valid_in && !is_div_op;
|
||||
wire mul_ready_in = ~stall_out || ~mul_valid_out;
|
||||
|
||||
wire is_mulh_in = (alu_op != `MUL_MUL);
|
||||
wire is_signed_mul_a = (alu_op != `MUL_MULHU);
|
||||
wire is_signed_mul_b = (alu_op != `MUL_MULHU && alu_op != `MUL_MULHSU);
|
||||
wire is_mulh_in = (alu_op != `INST_MUL_MUL);
|
||||
wire is_signed_mul_a = (alu_op != `INST_MUL_MULHU);
|
||||
wire is_signed_mul_b = (alu_op != `INST_MUL_MULHU && alu_op != `INST_MUL_MULHSU);
|
||||
|
||||
`ifdef IMUL_DPI
|
||||
|
||||
|
@ -123,8 +123,8 @@ module VX_muldiv (
|
|||
wire [`NR_BITS-1:0] div_rd_out;
|
||||
wire div_wb_out;
|
||||
|
||||
wire is_rem_op_in = (alu_op == `MUL_REM) || (alu_op == `MUL_REMU);
|
||||
wire is_signed_div = (alu_op == `MUL_DIV) || (alu_op == `MUL_REM);
|
||||
wire is_rem_op_in = (alu_op == `INST_MUL_REM) || (alu_op == `INST_MUL_REMU);
|
||||
wire is_signed_div = (alu_op == `INST_MUL_DIV) || (alu_op == `INST_MUL_REM);
|
||||
wire div_valid_in = valid_in && is_div_op;
|
||||
wire div_ready_out = ~stall_out && ~mul_valid_out; // arbitration prioritizes MUL
|
||||
wire div_ready_in;
|
||||
|
|
|
@ -74,6 +74,7 @@
|
|||
`define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *)
|
||||
`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *)
|
||||
`define DISABLE_BRAM (* ramstyle = "logic" *)
|
||||
`define PRESERVE_REG (* preserve *)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
|
|
@ -18,96 +18,104 @@ endtask
|
|||
|
||||
task print_ex_op (
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`OP_BITS-1:0] op_type,
|
||||
input [`MOD_BITS-1:0] op_mod
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input [`INST_MOD_BITS-1:0] op_mod
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
if (`ALU_IS_BR(op_mod)) begin
|
||||
case (`BR_BITS'(op_type))
|
||||
`BR_EQ: dpi_trace("BEQ");
|
||||
`BR_NE: dpi_trace("BNE");
|
||||
`BR_LT: dpi_trace("BLT");
|
||||
`BR_GE: dpi_trace("BGE");
|
||||
`BR_LTU: dpi_trace("BLTU");
|
||||
`BR_GEU: dpi_trace("BGEU");
|
||||
`BR_JAL: dpi_trace("JAL");
|
||||
`BR_JALR: dpi_trace("JALR");
|
||||
`BR_ECALL: dpi_trace("ECALL");
|
||||
`BR_EBREAK:dpi_trace("EBREAK");
|
||||
`BR_MRET: dpi_trace("MRET");
|
||||
`BR_SRET: dpi_trace("SRET");
|
||||
`BR_DRET: dpi_trace("DRET");
|
||||
if (`INST_ALU_IS_BR(op_mod)) begin
|
||||
case (`INST_BR_BITS'(op_type))
|
||||
`INST_BR_EQ: dpi_trace("BEQ");
|
||||
`INST_BR_NE: dpi_trace("BNE");
|
||||
`INST_BR_LT: dpi_trace("BLT");
|
||||
`INST_BR_GE: dpi_trace("BGE");
|
||||
`INST_BR_LTU: dpi_trace("BLTU");
|
||||
`INST_BR_GEU: dpi_trace("BGEU");
|
||||
`INST_BR_JAL: dpi_trace("JAL");
|
||||
`INST_BR_JALR: dpi_trace("JALR");
|
||||
`INST_BR_ECALL: dpi_trace("ECALL");
|
||||
`INST_BR_EBREAK:dpi_trace("EBREAK");
|
||||
`INST_BR_MRET: dpi_trace("MRET");
|
||||
`INST_BR_SRET: dpi_trace("SRET");
|
||||
`INST_BR_DRET: dpi_trace("DRET");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end else if (`ALU_IS_MUL(op_mod)) begin
|
||||
case (`MUL_BITS'(op_type))
|
||||
`MUL_MUL: dpi_trace("MUL");
|
||||
`MUL_MULH: dpi_trace("MULH");
|
||||
`MUL_MULHSU:dpi_trace("MULHSU");
|
||||
`MUL_MULHU: dpi_trace("MULHU");
|
||||
`MUL_DIV: dpi_trace("DIV");
|
||||
`MUL_DIVU: dpi_trace("DIVU");
|
||||
`MUL_REM: dpi_trace("REM");
|
||||
`MUL_REMU: dpi_trace("REMU");
|
||||
end else if (`INST_ALU_IS_MUL(op_mod)) begin
|
||||
case (`INST_MUL_BITS'(op_type))
|
||||
`INST_MUL_MUL: dpi_trace("MUL");
|
||||
`INST_MUL_MULH: dpi_trace("MULH");
|
||||
`INST_MUL_MULHSU:dpi_trace("MULHSU");
|
||||
`INST_MUL_MULHU: dpi_trace("MULHU");
|
||||
`INST_MUL_DIV: dpi_trace("DIV");
|
||||
`INST_MUL_DIVU: dpi_trace("DIVU");
|
||||
`INST_MUL_REM: dpi_trace("REM");
|
||||
`INST_MUL_REMU: dpi_trace("REMU");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end else begin
|
||||
case (`ALU_BITS'(op_type))
|
||||
`ALU_ADD: dpi_trace("ADD");
|
||||
`ALU_SUB: dpi_trace("SUB");
|
||||
`ALU_SLL: dpi_trace("SLL");
|
||||
`ALU_SRL: dpi_trace("SRL");
|
||||
`ALU_SRA: dpi_trace("SRA");
|
||||
`ALU_SLT: dpi_trace("SLT");
|
||||
`ALU_SLTU: dpi_trace("SLTU");
|
||||
`ALU_XOR: dpi_trace("XOR");
|
||||
`ALU_OR: dpi_trace("OR");
|
||||
`ALU_AND: dpi_trace("AND");
|
||||
`ALU_LUI: dpi_trace("LUI");
|
||||
`ALU_AUIPC: dpi_trace("AUIPC");
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: dpi_trace("ADD");
|
||||
`INST_ALU_SUB: dpi_trace("SUB");
|
||||
`INST_ALU_SLL: dpi_trace("SLL");
|
||||
`INST_ALU_SRL: dpi_trace("SRL");
|
||||
`INST_ALU_SRA: dpi_trace("SRA");
|
||||
`INST_ALU_SLT: dpi_trace("SLT");
|
||||
`INST_ALU_SLTU: dpi_trace("SLTU");
|
||||
`INST_ALU_XOR: dpi_trace("XOR");
|
||||
`INST_ALU_OR: dpi_trace("OR");
|
||||
`INST_ALU_AND: dpi_trace("AND");
|
||||
`INST_ALU_LUI: dpi_trace("LUI");
|
||||
`INST_ALU_AUIPC: dpi_trace("AUIPC");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`EX_LSU: begin
|
||||
case (`LSU_BITS'(op_type))
|
||||
`LSU_LB: dpi_trace("LB");
|
||||
`LSU_LH: dpi_trace("LH");
|
||||
`LSU_LW: dpi_trace("LW");
|
||||
`LSU_LBU:dpi_trace("LBU");
|
||||
`LSU_LHU:dpi_trace("LHU");
|
||||
`LSU_SB: dpi_trace("SB");
|
||||
`LSU_SH: dpi_trace("SH");
|
||||
`LSU_SW: dpi_trace("SW");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
if (op_mod == 0) begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LB: dpi_trace("LB");
|
||||
`INST_LSU_LH: dpi_trace("LH");
|
||||
`INST_LSU_LW: dpi_trace("LW");
|
||||
`INST_LSU_LBU:dpi_trace("LBU");
|
||||
`INST_LSU_LHU:dpi_trace("LHU");
|
||||
`INST_LSU_SB: dpi_trace("SB");
|
||||
`INST_LSU_SH: dpi_trace("SH");
|
||||
`INST_LSU_SW: dpi_trace("SW");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end else if (op_mod == 1) begin
|
||||
case (`INST_FENCE_BITS'(op_type))
|
||||
`INST_FENCE_D: dpi_trace("DFENCE");
|
||||
`INST_FENCE_I: dpi_trace("IFENCE");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`EX_CSR: begin
|
||||
case (`CSR_BITS'(op_type))
|
||||
`CSR_RW: dpi_trace("CSRW");
|
||||
`CSR_RS: dpi_trace("CSRS");
|
||||
`CSR_RC: dpi_trace("CSRC");
|
||||
case (`INST_CSR_BITS'(op_type))
|
||||
`INST_CSR_RW: dpi_trace("CSRW");
|
||||
`INST_CSR_RS: dpi_trace("CSRS");
|
||||
`INST_CSR_RC: dpi_trace("CSRC");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end
|
||||
`EX_FPU: begin
|
||||
case (`FPU_BITS'(op_type))
|
||||
`FPU_ADD: dpi_trace("ADD");
|
||||
`FPU_SUB: dpi_trace("SUB");
|
||||
`FPU_MUL: dpi_trace("MUL");
|
||||
`FPU_DIV: dpi_trace("DIV");
|
||||
`FPU_SQRT: dpi_trace("SQRT");
|
||||
`FPU_MADD: dpi_trace("MADD");
|
||||
`FPU_NMSUB: dpi_trace("NMSUB");
|
||||
`FPU_NMADD: dpi_trace("NMADD");
|
||||
`FPU_CVTWS: dpi_trace("CVTWS");
|
||||
`FPU_CVTWUS:dpi_trace("CVTWUS");
|
||||
`FPU_CVTSW: dpi_trace("CVTSW");
|
||||
`FPU_CVTSWU:dpi_trace("CVTSWU");
|
||||
`FPU_CLASS: dpi_trace("CLASS");
|
||||
`FPU_CMP: dpi_trace("CMP");
|
||||
`FPU_MISC: begin
|
||||
case (`INST_FPU_BITS'(op_type))
|
||||
`INST_FPU_ADD: dpi_trace("ADD");
|
||||
`INST_FPU_SUB: dpi_trace("SUB");
|
||||
`INST_FPU_MUL: dpi_trace("MUL");
|
||||
`INST_FPU_DIV: dpi_trace("DIV");
|
||||
`INST_FPU_SQRT: dpi_trace("SQRT");
|
||||
`INST_FPU_MADD: dpi_trace("MADD");
|
||||
`INST_FPU_NMSUB: dpi_trace("NMSUB");
|
||||
`INST_FPU_NMADD: dpi_trace("NMADD");
|
||||
`INST_FPU_CVTWS: dpi_trace("CVTWS");
|
||||
`INST_FPU_CVTWUS:dpi_trace("CVTWUS");
|
||||
`INST_FPU_CVTSW: dpi_trace("CVTSW");
|
||||
`INST_FPU_CVTSWU:dpi_trace("CVTSWU");
|
||||
`INST_FPU_CLASS: dpi_trace("CLASS");
|
||||
`INST_FPU_CMP: dpi_trace("CMP");
|
||||
`INST_FPU_MISC: begin
|
||||
case (op_mod)
|
||||
0: dpi_trace("SGNJ");
|
||||
1: dpi_trace("SGNJN");
|
||||
|
@ -122,13 +130,13 @@ task print_ex_op (
|
|||
endcase
|
||||
end
|
||||
`EX_GPU: begin
|
||||
case (`GPU_BITS'(op_type))
|
||||
`GPU_TMC: dpi_trace("TMC");
|
||||
`GPU_WSPAWN:dpi_trace("WSPAWN");
|
||||
`GPU_SPLIT: dpi_trace("SPLIT");
|
||||
`GPU_JOIN: dpi_trace("JOIN");
|
||||
`GPU_BAR: dpi_trace("BAR");
|
||||
`GPU_BAR: dpi_trace("PRED");
|
||||
case (`INST_GPU_BITS'(op_type))
|
||||
`INST_GPU_TMC: dpi_trace("TMC");
|
||||
`INST_GPU_WSPAWN:dpi_trace("WSPAWN");
|
||||
`INST_GPU_SPLIT: dpi_trace("SPLIT");
|
||||
`INST_GPU_JOIN: dpi_trace("JOIN");
|
||||
`INST_GPU_BAR: dpi_trace("BAR");
|
||||
`INST_GPU_PRED: dpi_trace("PRED");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end
|
||||
|
|
|
@ -21,7 +21,7 @@ typedef struct packed {
|
|||
logic NX; // 0-Inexact
|
||||
} fflags_t;
|
||||
|
||||
`define FFG_BITS $bits(fflags_t)
|
||||
`define FFLAGS_BITS $bits(fflags_t)
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
|
|
|
@ -30,7 +30,7 @@ module VX_warp_sched #(
|
|||
reg [`NUM_WARPS-1:0] stalled_warps; // asserted when a branch/gpgpu instructions are issued
|
||||
|
||||
reg [`NUM_WARPS-1:0][`NUM_THREADS-1:0] thread_masks;
|
||||
reg [`NUM_WARPS-1:0][31:0] warp_pcs, warp_next_pcs;
|
||||
reg [`NUM_WARPS-1:0][31:0] warp_pcs;
|
||||
|
||||
// barriers
|
||||
reg [`NUM_BARRIERS-1:0][`NUM_WARPS-1:0] barrier_masks; // warps waiting on barrier
|
||||
|
@ -121,12 +121,11 @@ module VX_warp_sched #(
|
|||
end
|
||||
|
||||
if (ifetch_req_fire) begin
|
||||
warp_next_pcs[ifetch_req_if.wid] <= ifetch_req_if.PC + 4;
|
||||
warp_pcs[ifetch_req_if.wid] <= ifetch_req_if.PC + 4;
|
||||
end
|
||||
|
||||
if (wstall_if.valid) begin
|
||||
stalled_warps[wstall_if.wid] <= wstall_if.stalled;
|
||||
warp_pcs[wstall_if.wid] <= warp_next_pcs[wstall_if.wid];
|
||||
end
|
||||
|
||||
// join handling
|
||||
|
@ -200,13 +199,12 @@ module VX_warp_sched #(
|
|||
|
||||
wire [`NUM_WARPS-1:0] ready_warps = active_warps & ~(stalled_warps | barrier_stalls);
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N (`NUM_WARPS)
|
||||
) rr_arbiter (
|
||||
.data_in (ready_warps),
|
||||
.index (schedule_wid),
|
||||
.valid_out (schedule_valid),
|
||||
`UNUSED_PIN (onehot)
|
||||
VX_lzc #(
|
||||
.WIDTH (`NUM_WARPS)
|
||||
) wid_select (
|
||||
.in_i (ready_warps),
|
||||
.cnt_o (schedule_wid),
|
||||
.valid_o (schedule_valid)
|
||||
);
|
||||
|
||||
wire [`NUM_WARPS-1:0][(`NUM_THREADS + 32)-1:0] schedule_data;
|
||||
|
|
|
@ -85,6 +85,7 @@ module Vortex (
|
|||
.CACHE_SIZE (`L3CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`L3CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`L3NUM_BANKS),
|
||||
.NUM_PORTS (`L3NUM_PORTS),
|
||||
.WORD_SIZE (`L3WORD_SIZE),
|
||||
.NUM_REQS (`L3NUM_REQS),
|
||||
.CREQ_SIZE (`L3CREQ_SIZE),
|
||||
|
@ -141,6 +142,8 @@ module Vortex (
|
|||
|
||||
end else begin
|
||||
|
||||
`RESET_RELAY (mem_arb_reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (`NUM_CLUSTERS),
|
||||
.DATA_WIDTH (`L3MEM_DATA_WIDTH),
|
||||
|
@ -150,7 +153,7 @@ module Vortex (
|
|||
.BUFFERED_RSP (1)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (mem_arb_reset),
|
||||
|
||||
// Core request
|
||||
.req_valid_in (per_cluster_mem_req_valid),
|
||||
|
|
|
@ -512,6 +512,8 @@ t_local_mem_data mem_rsp_data;
|
|||
wire [AVS_REQ_TAGW:0] mem_rsp_tag;
|
||||
wire mem_rsp_ready;
|
||||
|
||||
`RESET_RELAY (mem_arb_reset);
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (2),
|
||||
.DATA_WIDTH (LMEM_DATA_WIDTH),
|
||||
|
@ -522,7 +524,7 @@ VX_mem_arb #(
|
|||
.TYPE ("X")
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (mem_arb_reset),
|
||||
|
||||
// Source request
|
||||
.req_valid_in ({vx_mem_req_arb_valid, cci_mem_req_arb_valid}),
|
||||
|
@ -557,6 +559,8 @@ VX_mem_arb #(
|
|||
|
||||
//--
|
||||
|
||||
`RESET_RELAY (avs_wrapper_reset);
|
||||
|
||||
VX_avs_wrapper #(
|
||||
.AVS_DATA_WIDTH (LMEM_DATA_WIDTH),
|
||||
.AVS_ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
|
@ -566,7 +570,7 @@ VX_avs_wrapper #(
|
|||
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE)
|
||||
) avs_wrapper (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (avs_wrapper_reset),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (mem_req_valid),
|
||||
|
@ -724,13 +728,15 @@ always @(posedge clk) begin
|
|||
end
|
||||
end
|
||||
|
||||
`RESET_RELAY (cci_rdq_reset);
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (CCI_RD_QUEUE_DATAW),
|
||||
.SIZE (CCI_RD_QUEUE_SIZE),
|
||||
.OUTPUT_REG (1)
|
||||
) cci_rd_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (cci_rdq_reset),
|
||||
.push (cci_rdq_push),
|
||||
.pop (cci_rdq_pop),
|
||||
.data_in (cci_rdq_din),
|
||||
|
@ -878,7 +884,7 @@ Vortex #() vortex (
|
|||
`SCOPE_BIND_afu_vortex
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset | vx_reset),
|
||||
.reset (reset || vx_reset),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (vx_mem_req_valid),
|
||||
|
@ -997,6 +1003,8 @@ VX_fifo_queue #(
|
|||
|
||||
wire scope_changed = `SCOPE_TRIGGER;
|
||||
|
||||
`RESET_RELAY (scope_reset);
|
||||
|
||||
VX_scope #(
|
||||
.DATAW ($bits({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST})),
|
||||
.BUSW (64),
|
||||
|
@ -1004,7 +1012,7 @@ VX_scope #(
|
|||
.UPDW ($bits({`SCOPE_UPDATE_LIST}))
|
||||
) scope (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (scope_reset),
|
||||
.start (1'b0),
|
||||
.stop (1'b0),
|
||||
.changed (scope_changed),
|
||||
|
|
211
hw/rtl/cache/VX_bank.v
vendored
211
hw/rtl/cache/VX_bank.v
vendored
|
@ -39,7 +39,8 @@ module VX_bank #(
|
|||
// bank offset from beginning of index range
|
||||
parameter BANK_ADDR_OFFSET = 0,
|
||||
|
||||
localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE)
|
||||
localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE),
|
||||
localparam WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
|
||||
) (
|
||||
`SCOPE_IO_VX_bank
|
||||
|
||||
|
@ -56,13 +57,13 @@ module VX_bank #(
|
|||
// Core Request
|
||||
input wire core_req_valid,
|
||||
input wire [NUM_PORTS-1:0] core_req_pmask,
|
||||
input wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] core_req_wsel,
|
||||
input wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] core_req_wsel,
|
||||
input wire [NUM_PORTS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_req_tid,
|
||||
input wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
|
||||
input wire core_req_rw,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire [CORE_TAG_WIDTH-1:0] core_req_tag,
|
||||
output wire core_req_ready,
|
||||
|
||||
// Core Response
|
||||
|
@ -70,16 +71,18 @@ module VX_bank #(
|
|||
output wire [NUM_PORTS-1:0] core_rsp_pmask,
|
||||
output wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_rsp_tid,
|
||||
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
||||
output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
output wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
input wire core_rsp_ready,
|
||||
|
||||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire [CACHE_LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [NUM_PORTS-1:0] mem_req_pmask,
|
||||
output wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen,
|
||||
output wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel,
|
||||
output wire [`LINE_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id,
|
||||
output wire [`CACHE_LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
|
@ -104,18 +107,18 @@ module VX_bank #(
|
|||
`endif
|
||||
|
||||
wire [NUM_PORTS-1:0] creq_pmask;
|
||||
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] creq_wsel;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] creq_wsel;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] creq_byteen;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] creq_tid;
|
||||
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] creq_tag;
|
||||
wire creq_rw;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] creq_addr;
|
||||
wire [CORE_TAG_WIDTH-1:0] creq_tag;
|
||||
|
||||
|
||||
wire creq_valid, creq_ready;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + 1 + `LINE_ADDR_WIDTH + (1 + `UP(`WORD_SELECT_BITS) + WORD_SIZE + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
|
||||
.DATAW (1 + `LINE_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SELECT_BITS + WORD_SIZE + `WORD_WIDTH + `REQS_BITS + CORE_TAG_WIDTH)),
|
||||
.SIZE (CREQ_SIZE),
|
||||
.OUTPUT_REG (CREQ_SIZE > 2)
|
||||
) core_req_queue (
|
||||
|
@ -123,8 +126,8 @@ module VX_bank #(
|
|||
.reset (reset),
|
||||
.ready_in (core_req_ready),
|
||||
.valid_in (core_req_valid),
|
||||
.data_in ({core_req_tag, core_req_rw, core_req_addr, core_req_pmask, core_req_wsel, core_req_byteen, core_req_data, core_req_tid}),
|
||||
.data_out ({creq_tag, creq_rw, creq_addr, creq_pmask, creq_wsel, creq_byteen, creq_data, creq_tid}),
|
||||
.data_in ({core_req_rw, core_req_addr, core_req_pmask, core_req_wsel, core_req_byteen, core_req_data, core_req_tid, core_req_tag}),
|
||||
.data_out ({creq_rw, creq_addr, creq_pmask, creq_wsel, creq_byteen, creq_data, creq_tid, creq_tag}),
|
||||
.ready_out (creq_ready),
|
||||
.valid_out (creq_valid)
|
||||
);
|
||||
|
@ -134,33 +137,34 @@ module VX_bank #(
|
|||
wire mshr_valid;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_dequeue_id;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] mshr_addr;
|
||||
wire [CORE_TAG_WIDTH-1:0] mshr_tag;
|
||||
wire [NUM_PORTS-1:0] mshr_pmask;
|
||||
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] mshr_wsel;
|
||||
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] mshr_tag;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mshr_wsel;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] mshr_tid;
|
||||
wire [NUM_PORTS-1:0] mshr_pmask;
|
||||
|
||||
wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1;
|
||||
wire mem_rw_st0, mem_rw_st1;
|
||||
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] wsel_st0, wsel_st1;
|
||||
wire write_st0, write_st1;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] wsel_st0, wsel_st1;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen_st0, byteen_st1;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] req_tid_st0, req_tid_st1;
|
||||
wire [NUM_PORTS-1:0] pmask_st0, pmask_st1;
|
||||
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] tag_st0, tag_st1;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] rdata_st1;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] wdata_st0, wdata_st1;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] wdata_st0, wdata_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1;
|
||||
wire [CORE_TAG_WIDTH-1:0] tag_st0, tag_st1;
|
||||
wire valid_st0, valid_st1;
|
||||
wire is_fill_st0, is_fill_st1;
|
||||
wire is_mshr_st0, is_mshr_st1;
|
||||
wire miss_st0, miss_st1;
|
||||
wire writeen_unqual_st1;
|
||||
wire is_flush_st0;
|
||||
wire mshr_pending_st0, mshr_pending_st1;
|
||||
|
||||
wire crsq_valid, crsq_ready, crsq_stall;
|
||||
wire mreq_alm_full;
|
||||
|
||||
wire creq_fire = creq_valid && creq_ready;
|
||||
|
||||
// prevent read-during-write hazard when accessing tags/data block RAMs
|
||||
wire rdw_fill_hazard = valid_st0 && is_fill_st0;
|
||||
wire rdw_write_hazard = valid_st0 && write_st0 && ~creq_rw;
|
||||
|
||||
// determine which queue to pop next in priority order
|
||||
wire mshr_grant = 1;
|
||||
|
@ -172,51 +176,38 @@ module VX_bank #(
|
|||
wire creq_grant = !mshr_enable && !mrsq_enable && !flush_enable;
|
||||
|
||||
wire mshr_ready = mshr_grant
|
||||
&& !crsq_stall; // ensure core response ready
|
||||
&& !rdw_fill_hazard // prevent read-during-write hazard
|
||||
&& !crsq_stall; // ensure core response ready
|
||||
|
||||
assign mem_rsp_ready = mrsq_grant
|
||||
&& !crsq_stall; // ensure core response ready
|
||||
&& !crsq_stall; // ensure core response ready
|
||||
|
||||
assign creq_ready = creq_grant
|
||||
&& !mreq_alm_full // ensure memory request ready
|
||||
&& !mshr_alm_full // ensure mshr enqueue ready
|
||||
&& !crsq_stall; // ensure core response ready
|
||||
|
||||
wire mshr_fire = mshr_valid && mshr_ready;
|
||||
assign creq_ready = creq_grant
|
||||
&& !rdw_write_hazard // prevent read-during-write hazard
|
||||
&& !mreq_alm_full // ensure memory request ready
|
||||
&& !mshr_alm_full // ensure mshr enqueue ready
|
||||
&& !crsq_stall; // ensure core response ready
|
||||
|
||||
wire mshr_fire = mshr_valid && mshr_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
wire creq_fire = creq_valid && creq_ready;
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_wid_sel, debug_pc_sel} = mshr_enable ? mshr_tag[`CACHE_REQ_INFO_RNG] : creq_tag[`CACHE_REQ_INFO_RNG];
|
||||
assign {debug_wid_sel, debug_pc_sel} = mshr_enable ? mshr_tag[0][`CACHE_REQ_INFO_RNG] : creq_tag[0][`CACHE_REQ_INFO_RNG];
|
||||
end else begin
|
||||
assign {debug_wid_sel, debug_pc_sel} = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
wire [`CACHE_LINE_WIDTH-1:0] creq_line_data;
|
||||
|
||||
if (`WORDS_PER_LINE > 1) begin
|
||||
if (NUM_PORTS > 1) begin
|
||||
reg [`CACHE_LINE_WIDTH-1:0] creq_line_data_r;
|
||||
always @(*) begin
|
||||
creq_line_data_r = 'x;
|
||||
for (integer p = 0; p < NUM_PORTS; p++) begin
|
||||
if (creq_pmask[p]) begin
|
||||
creq_line_data_r[creq_wsel[p] * `WORD_WIDTH +: `WORD_WIDTH] = creq_data[p];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign creq_line_data = creq_line_data_r;
|
||||
end else begin
|
||||
assign creq_line_data = {`WORDS_PER_LINE{creq_data}};
|
||||
end
|
||||
end else begin
|
||||
assign creq_line_data = creq_data;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] wdata_sel;
|
||||
assign wdata_sel[(NUM_PORTS * `WORD_WIDTH)-1:0] = (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data[(NUM_PORTS * `WORD_WIDTH)-1:0] : creq_data;
|
||||
for (genvar i = NUM_PORTS * `WORD_WIDTH; i < `CACHE_LINE_WIDTH; ++i) begin
|
||||
assign wdata_sel[i] = mem_rsp_data[i];
|
||||
end
|
||||
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
|
@ -227,9 +218,9 @@ module VX_bank #(
|
|||
flush_enable,
|
||||
mrsq_enable || flush_enable,
|
||||
mshr_enable,
|
||||
mshr_enable ? 1'b0 : creq_rw,
|
||||
creq_fire && creq_rw,
|
||||
mshr_enable ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : (flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : creq_addr)),
|
||||
(mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data : creq_line_data,
|
||||
wdata_sel,
|
||||
mshr_enable ? mshr_wsel : creq_wsel,
|
||||
creq_byteen,
|
||||
mshr_enable ? mshr_tid : creq_tid,
|
||||
|
@ -237,12 +228,12 @@ module VX_bank #(
|
|||
mshr_enable ? mshr_tag : creq_tag,
|
||||
mshr_enable ? mshr_dequeue_id : (mem_rsp_valid ? mem_rsp_id : mshr_alloc_id)
|
||||
}),
|
||||
.data_out ({valid_st0, is_flush_st0, is_fill_st0, is_mshr_st0, mem_rw_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_st0})
|
||||
.data_out ({valid_st0, is_flush_st0, is_fill_st0, is_mshr_st0, write_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_st0})
|
||||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_wid_st0, debug_pc_st0} = tag_st0[`CACHE_REQ_INFO_RNG];
|
||||
assign {debug_wid_st0, debug_pc_st0} = tag_st0[0][`CACHE_REQ_INFO_RNG];
|
||||
end else begin
|
||||
assign {debug_wid_st0, debug_pc_st0} = 0;
|
||||
end
|
||||
|
@ -282,54 +273,70 @@ module VX_bank #(
|
|||
// we have a core request hit
|
||||
assign miss_st0 = !is_fill_st0 && !tag_match_st0;
|
||||
|
||||
wire read_st0 = !is_fill_st0 && !write_st0;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH + MSHR_ADDR_WIDTH + 1),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!crsq_stall),
|
||||
.data_in ({valid_st0, is_fill_st0, is_mshr_st0, is_fill_st0, miss_st0, mem_rw_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_fill_st1, is_mshr_st1, writeen_unqual_st1, miss_st1, mem_rw_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1, mshr_id_st1, mshr_pending_st1})
|
||||
.data_in ({valid_st0, is_fill_st0, is_mshr_st0, miss_st0, write_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_fill_st1, is_mshr_st1, miss_st1, write_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1, mshr_id_st1, mshr_pending_st1})
|
||||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_wid_st1, debug_pc_st1} = tag_st1[`CACHE_REQ_INFO_RNG];
|
||||
assign {debug_wid_st1, debug_pc_st1} = tag_st1[0][`CACHE_REQ_INFO_RNG];
|
||||
end else begin
|
||||
assign {debug_wid_st1, debug_pc_st1} = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
wire writeen_st1 = (WRITE_ENABLE && !is_fill_st1 && mem_rw_st1 && !miss_st1)
|
||||
|| writeen_unqual_st1;
|
||||
wire read_st1 = !is_fill_st1 && !write_st1;
|
||||
|
||||
wire readen_st1 = !is_fill_st1 && !mem_rw_st1;
|
||||
wire writeen_st1 = (WRITE_ENABLE && write_st1 && !miss_st1)
|
||||
|| is_fill_st1;
|
||||
|
||||
wire crsq_push_st1 = readen_st1 && !miss_st1;
|
||||
|
||||
wire do_writeback_st1 = !is_fill_st1 && mem_rw_st1;
|
||||
wire crsq_push_st1 = read_st1 && !miss_st1;
|
||||
|
||||
wire mreq_push_st1 = (readen_st1 && miss_st1 && !mshr_pending_st1)
|
||||
|| do_writeback_st1;
|
||||
wire mreq_push_st1 = (read_st1 && miss_st1 && !mshr_pending_st1)
|
||||
|| write_st1;
|
||||
|
||||
wire [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] line_byteen_st1;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] line_wdata_st1;
|
||||
wire [CACHE_LINE_SIZE-1:0] line_byteen_st1;
|
||||
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data_st1 = wdata_st1[0 +: NUM_PORTS * `WORD_WIDTH];
|
||||
|
||||
if (`WORDS_PER_LINE > 1) begin
|
||||
reg [`CACHE_LINE_WIDTH-1:0] line_wdata_r;
|
||||
reg [CACHE_LINE_SIZE-1:0] line_byteen_r;
|
||||
always @(*) begin
|
||||
line_byteen_r = 0;
|
||||
for (integer p = 0; p < NUM_PORTS; p++) begin
|
||||
if ((NUM_PORTS == 1) || pmask_st1[p]) begin
|
||||
line_byteen_r[wsel_st1[p] * WORD_SIZE +: WORD_SIZE] = byteen_st1[p];
|
||||
if (NUM_PORTS > 1) begin
|
||||
always @(*) begin
|
||||
line_wdata_r = 'x;
|
||||
line_byteen_r = 0;
|
||||
for (integer i = 0; i < NUM_PORTS; ++i) begin
|
||||
if (pmask_st1[i]) begin
|
||||
line_wdata_r[wsel_st1[i] * `WORD_WIDTH +: `WORD_WIDTH] = creq_data_st1[i];
|
||||
line_byteen_r[wsel_st1[i] * WORD_SIZE +: WORD_SIZE] = byteen_st1[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
line_wdata_r = {`WORDS_PER_LINE{creq_data_st1}};
|
||||
line_byteen_r = 0;
|
||||
line_byteen_r[wsel_st1 * WORD_SIZE +: WORD_SIZE] = byteen_st1;
|
||||
end
|
||||
end
|
||||
assign line_wdata_st1 = line_wdata_r;
|
||||
assign line_byteen_st1 = line_byteen_r;
|
||||
end else begin
|
||||
assign line_byteen_st1 = byteen_st1;
|
||||
`UNUSED_VAR (wsel_st1)
|
||||
end
|
||||
assign line_wdata_st1 = creq_data_st1;
|
||||
assign line_byteen_st1 = byteen_st1;
|
||||
end
|
||||
|
||||
VX_data_access #(
|
||||
.BANK_ID (BANK_ID),
|
||||
|
@ -353,20 +360,21 @@ module VX_bank #(
|
|||
.addr (addr_st1),
|
||||
|
||||
// reading
|
||||
.readen (valid_st1 && readen_st1),
|
||||
.rdata (rdata_st1),
|
||||
.readen (valid_st1 && read_st1),
|
||||
.read_data (rdata_st1),
|
||||
|
||||
// writing
|
||||
.writeen (valid_st1 && writeen_st1),
|
||||
.is_fill (is_fill_st1),
|
||||
.byteen (line_byteen_st1),
|
||||
.wdata (wdata_st1)
|
||||
.byteen (line_byteen_st1),
|
||||
.write_data (line_wdata_st1),
|
||||
.fill_data (wdata_st1)
|
||||
);
|
||||
|
||||
wire mshr_allocate = creq_fire && ~creq_rw;
|
||||
wire mshr_replay = do_fill_st0 && ~crsq_stall;
|
||||
wire mshr_lookup = valid_st0 && !is_fill_st0 && ~is_mshr_st0 && ~mem_rw_st0 && ~crsq_stall;
|
||||
wire mshr_release = valid_st1 && readen_st1 && ~is_mshr_st1 && ~miss_st1 && ~crsq_stall;
|
||||
wire mshr_lookup = valid_st0 && read_st0 && !is_mshr_st0 && !crsq_stall;
|
||||
wire mshr_release = valid_st1 && read_st1 && !is_mshr_st1 && !miss_st1 && !crsq_stall;
|
||||
|
||||
wire mshr_not_full;
|
||||
|
||||
|
@ -430,7 +438,7 @@ module VX_bank #(
|
|||
wire [NUM_PORTS-1:0] crsq_pmask;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] crsq_data;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] crsq_tid;
|
||||
wire [CORE_TAG_WIDTH-1:0] crsq_tag;
|
||||
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] crsq_tag;
|
||||
|
||||
assign crsq_valid = valid_st1 && crsq_push_st1;
|
||||
assign crsq_stall = crsq_valid && !crsq_ready;
|
||||
|
@ -440,15 +448,15 @@ module VX_bank #(
|
|||
assign crsq_tag = tag_st1;
|
||||
|
||||
if (`WORDS_PER_LINE > 1) begin
|
||||
for (genvar p = 0; p < NUM_PORTS; ++p) begin
|
||||
assign crsq_data[p] = rdata_st1[wsel_st1[p] * `WORD_WIDTH +: `WORD_WIDTH];
|
||||
for (genvar i = 0; i < NUM_PORTS; ++i) begin
|
||||
assign crsq_data[i] = rdata_st1[wsel_st1[i] * `WORD_WIDTH +: `WORD_WIDTH];
|
||||
end
|
||||
end else begin
|
||||
assign crsq_data = rdata_st1;
|
||||
end
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
|
||||
.DATAW (NUM_PORTS * (CORE_TAG_WIDTH + 1 + `WORD_WIDTH + `REQS_BITS)),
|
||||
.SIZE (CRSQ_SIZE),
|
||||
.OUTPUT_REG (1 == NUM_BANKS)
|
||||
) core_rsp_req (
|
||||
|
@ -464,24 +472,29 @@ module VX_bank #(
|
|||
|
||||
// Enqueue memory request
|
||||
|
||||
wire [CACHE_LINE_SIZE-1:0] mreq_byteen;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mreq_data;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mreq_byteen;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mreq_wsel;
|
||||
wire [NUM_PORTS-1:0] mreq_pmask;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] mreq_addr;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mreq_id;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] mreq_data;
|
||||
|
||||
wire mreq_push, mreq_pop, mreq_empty, mreq_rw;
|
||||
|
||||
assign mreq_push = valid_st1 && mreq_push_st1;
|
||||
|
||||
assign mreq_pop = mem_req_valid && mem_req_ready;
|
||||
|
||||
assign mreq_rw = WRITE_ENABLE && do_writeback_st1;
|
||||
assign mreq_byteen = mreq_rw ? line_byteen_st1 : {CACHE_LINE_SIZE{1'b1}};
|
||||
assign mreq_addr = addr_st1;
|
||||
assign mreq_id = mshr_id_st1;
|
||||
assign mreq_data = wdata_st1;
|
||||
assign mreq_rw = WRITE_ENABLE && write_st1;
|
||||
assign mreq_addr = addr_st1;
|
||||
assign mreq_id = mshr_id_st1;
|
||||
assign mreq_pmask= pmask_st1;
|
||||
assign mreq_wsel = wsel_st1;
|
||||
assign mreq_byteen = byteen_st1;
|
||||
assign mreq_data = creq_data_st1;
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + `CACHE_LINE_WIDTH),
|
||||
.DATAW (1 + `LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)),
|
||||
.SIZE (MREQ_SIZE),
|
||||
.ALM_FULL (MREQ_SIZE-2)
|
||||
) mem_req_queue (
|
||||
|
@ -489,8 +502,8 @@ module VX_bank #(
|
|||
.reset (reset),
|
||||
.push (mreq_push),
|
||||
.pop (mreq_pop),
|
||||
.data_in ({mreq_rw, mreq_byteen, mreq_addr, mreq_id, mreq_data}),
|
||||
.data_out ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_id, mem_req_data}),
|
||||
.data_in ({mreq_rw, mreq_addr, mreq_id, mreq_pmask, mreq_byteen, mreq_wsel, mreq_data}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_pmask, mem_req_byteen, mem_req_wsel, mem_req_data}),
|
||||
.empty (mreq_empty),
|
||||
.alm_full (mreq_alm_full),
|
||||
`UNUSED_PIN (full),
|
||||
|
@ -512,8 +525,8 @@ module VX_bank #(
|
|||
`SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign perf_read_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && !mem_rw_st1;
|
||||
assign perf_write_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && mem_rw_st1;
|
||||
assign perf_read_misses = valid_st1 && read_st1 && !is_mshr_st1 && miss_st1;
|
||||
assign perf_write_misses = valid_st1 && write_st1 && !is_mshr_st1 && miss_st1;
|
||||
assign perf_pipe_stalls = crsq_stall || mreq_alm_full || mshr_alm_full;
|
||||
assign perf_mshr_stalls = mshr_alm_full;
|
||||
`endif
|
||||
|
@ -547,7 +560,7 @@ module VX_bank #(
|
|||
dpi_trace("%d: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%b, tid=%0d, data=%0h, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, debug_wid_st1, debug_pc_st1);
|
||||
end
|
||||
if (mreq_push) begin
|
||||
if (do_writeback_st1)
|
||||
if (write_st1)
|
||||
dpi_trace("%d: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_data, mreq_byteen, debug_wid_st1, debug_pc_st1);
|
||||
else
|
||||
dpi_trace("%d: cache%0d:%0d fill-req: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_id, debug_wid_st1, debug_pc_st1);
|
||||
|
|
149
hw/rtl/cache/VX_cache.v
vendored
149
hw/rtl/cache/VX_cache.v
vendored
|
@ -44,7 +44,9 @@ module VX_cache #(
|
|||
parameter BANK_ADDR_OFFSET = 0,
|
||||
|
||||
// enable bypass for non-cacheable addresses
|
||||
parameter NC_ENABLE = 0
|
||||
parameter NC_ENABLE = 0,
|
||||
|
||||
localparam WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
|
||||
) (
|
||||
`SCOPE_IO_VX_cache
|
||||
|
||||
|
@ -103,6 +105,54 @@ module VX_cache #(
|
|||
wire [NUM_BANKS-1:0] perf_pipe_stall_per_bank;
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_p;
|
||||
wire [NUM_PORTS-1:0] mem_req_pmask_p;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel_p;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data_p;
|
||||
wire mem_req_rw_p;
|
||||
|
||||
if (WRITE_ENABLE) begin
|
||||
if (`WORDS_PER_LINE > 1) begin
|
||||
reg [CACHE_LINE_SIZE-1:0] mem_req_byteen_r;
|
||||
reg [`CACHE_LINE_WIDTH-1:0] mem_req_data_r;
|
||||
|
||||
always @(*) begin
|
||||
mem_req_byteen_r = 0;
|
||||
mem_req_data_r = 'x;
|
||||
for (integer i = 0; i < NUM_PORTS; ++i) begin
|
||||
if ((1 == NUM_PORTS) || mem_req_pmask_p[i]) begin
|
||||
mem_req_byteen_r[mem_req_wsel_p[i] * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p[i];
|
||||
mem_req_data_r[mem_req_wsel_p[i] * `WORD_WIDTH +: `WORD_WIDTH] = mem_req_data_p[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign mem_req_rw = mem_req_rw_p;
|
||||
assign mem_req_byteen = mem_req_byteen_r;
|
||||
assign mem_req_data = mem_req_data_r;
|
||||
end else begin
|
||||
`UNUSED_VAR (mem_req_pmask_p)
|
||||
`UNUSED_VAR (mem_req_wsel_p)
|
||||
assign mem_req_rw = mem_req_rw_p;
|
||||
assign mem_req_byteen = mem_req_byteen_p;
|
||||
assign mem_req_data = mem_req_data_p;
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (mem_req_byteen_p)
|
||||
`UNUSED_VAR (mem_req_pmask_p)
|
||||
`UNUSED_VAR (mem_req_wsel_p)
|
||||
`UNUSED_VAR (mem_req_data_p)
|
||||
`UNUSED_VAR (mem_req_rw_p)
|
||||
|
||||
assign mem_req_rw = 0;
|
||||
assign mem_req_byteen = 'x;
|
||||
assign mem_req_data = 'x;
|
||||
end
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Core request
|
||||
|
@ -124,9 +174,11 @@ module VX_cache #(
|
|||
// Memory request
|
||||
wire mem_req_valid_nc;
|
||||
wire mem_req_rw_nc;
|
||||
wire [CACHE_LINE_SIZE-1:0] mem_req_byteen_nc;
|
||||
wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_nc;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] mem_req_data_nc;
|
||||
wire [NUM_PORTS-1:0] mem_req_pmask_nc;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_nc;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel_nc;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data_nc;
|
||||
wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_nc;
|
||||
wire mem_req_ready_nc;
|
||||
|
||||
|
@ -134,10 +186,11 @@ module VX_cache #(
|
|||
wire mem_rsp_valid_nc;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_nc;
|
||||
wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_nc;
|
||||
wire mem_rsp_ready_nc;
|
||||
wire mem_rsp_ready_nc;
|
||||
|
||||
if (NC_ENABLE) begin
|
||||
VX_nc_bypass #(
|
||||
.NUM_PORTS (NUM_PORTS),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.NUM_RSP_TAGS (`CORE_RSP_TAGS),
|
||||
.NC_TAG_BIT (0),
|
||||
|
@ -147,12 +200,12 @@ module VX_cache #(
|
|||
.CORE_TAG_IN_WIDTH (CORE_TAG_WIDTH),
|
||||
|
||||
.MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH),
|
||||
.MEM_DATA_SIZE (CACHE_LINE_SIZE),
|
||||
.MEM_DATA_SIZE (CACHE_LINE_SIZE),
|
||||
.MEM_TAG_IN_WIDTH (MEM_TAG_IN_WIDTH),
|
||||
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH)
|
||||
) nc_bypass (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core request in
|
||||
.core_req_valid_in (core_req_valid),
|
||||
|
@ -188,19 +241,23 @@ module VX_cache #(
|
|||
|
||||
// Memory request in
|
||||
.mem_req_valid_in (mem_req_valid_nc),
|
||||
.mem_req_rw_in (mem_req_rw_nc),
|
||||
.mem_req_byteen_in (mem_req_byteen_nc),
|
||||
.mem_req_rw_in (mem_req_rw_nc),
|
||||
.mem_req_addr_in (mem_req_addr_nc),
|
||||
.mem_req_pmask_in (mem_req_pmask_nc),
|
||||
.mem_req_byteen_in (mem_req_byteen_nc),
|
||||
.mem_req_wsel_in (mem_req_wsel_nc),
|
||||
.mem_req_data_in (mem_req_data_nc),
|
||||
.mem_req_tag_in (mem_req_tag_nc),
|
||||
.mem_req_ready_in (mem_req_ready_nc),
|
||||
|
||||
// Memory request out
|
||||
.mem_req_valid_out (mem_req_valid),
|
||||
.mem_req_rw_out (mem_req_rw),
|
||||
.mem_req_byteen_out (mem_req_byteen),
|
||||
.mem_req_addr_out (mem_req_addr),
|
||||
.mem_req_data_out (mem_req_data),
|
||||
.mem_req_rw_out (mem_req_rw_p),
|
||||
.mem_req_pmask_out (mem_req_pmask_p),
|
||||
.mem_req_byteen_out (mem_req_byteen_p),
|
||||
.mem_req_wsel_out (mem_req_wsel_p),
|
||||
.mem_req_data_out (mem_req_data_p),
|
||||
.mem_req_tag_out (mem_req_tag),
|
||||
.mem_req_ready_out (mem_req_ready),
|
||||
|
||||
|
@ -232,10 +289,12 @@ module VX_cache #(
|
|||
assign core_rsp_ready_nc = core_rsp_ready;
|
||||
|
||||
assign mem_req_valid = mem_req_valid_nc;
|
||||
assign mem_req_rw = mem_req_rw_nc;
|
||||
assign mem_req_addr = mem_req_addr_nc;
|
||||
assign mem_req_byteen = mem_req_byteen_nc;
|
||||
assign mem_req_data = mem_req_data_nc;
|
||||
assign mem_req_rw_p = mem_req_rw_nc;
|
||||
assign mem_req_pmask_p = mem_req_pmask_nc;
|
||||
assign mem_req_byteen_p = mem_req_byteen_nc;
|
||||
assign mem_req_wsel_p = mem_req_wsel_nc;
|
||||
assign mem_req_data_p = mem_req_data_nc;
|
||||
assign mem_req_tag = mem_req_tag_nc;
|
||||
assign mem_req_ready_nc = mem_req_ready;
|
||||
|
||||
|
@ -251,6 +310,8 @@ module VX_cache #(
|
|||
wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_qual;
|
||||
|
||||
wire mrsq_out_valid, mrsq_out_ready;
|
||||
|
||||
`RESET_RELAY (mrsq_reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (MEM_TAG_IN_WIDTH + `CACHE_LINE_WIDTH),
|
||||
|
@ -258,7 +319,7 @@ module VX_cache #(
|
|||
.OUTPUT_REG (MRSQ_SIZE > 2)
|
||||
) mem_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (mrsq_reset),
|
||||
.ready_in (mem_rsp_ready_nc),
|
||||
.valid_in (mem_rsp_valid_nc),
|
||||
.data_in ({mem_rsp_tag_nc, mem_rsp_data_nc}),
|
||||
|
@ -274,13 +335,15 @@ module VX_cache #(
|
|||
wire [`LINE_SELECT_BITS-1:0] flush_addr;
|
||||
wire flush_enable;
|
||||
|
||||
`RESET_RELAY (flush_reset);
|
||||
|
||||
VX_flush_ctrl #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS)
|
||||
) flush_ctrl (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (flush_reset),
|
||||
.addr_out (flush_addr),
|
||||
.valid_out (flush_enable)
|
||||
);
|
||||
|
@ -289,28 +352,30 @@ module VX_cache #(
|
|||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_pmask;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] per_bank_core_req_wsel;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_req_tag;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_rw;
|
||||
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
|
||||
wire [NUM_BANKS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_req_tag;
|
||||
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_pmask;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid;
|
||||
wire [NUM_BANKS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_rsp_tag;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_rsp_tag;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_rsp_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
|
||||
wire [NUM_BANKS-1:0][CACHE_LINE_SIZE-1:0] per_bank_mem_req_byteen;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_mem_req_pmask;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_mem_req_byteen;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] per_bank_mem_req_wsel;
|
||||
wire [NUM_BANKS-1:0][`MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr;
|
||||
wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id;
|
||||
wire [NUM_BANKS-1:0][`CACHE_LINE_WIDTH-1:0] per_bank_mem_req_data;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_mem_req_data;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
|
||||
|
@ -361,28 +426,30 @@ module VX_cache #(
|
|||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
wire curr_bank_core_req_valid;
|
||||
wire [NUM_PORTS-1:0] curr_bank_core_req_pmask;
|
||||
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] curr_bank_core_req_wsel;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] curr_bank_core_req_wsel;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_req_tid;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_req_tid;
|
||||
wire [NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] curr_bank_core_req_tag;
|
||||
wire curr_bank_core_req_rw;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
|
||||
wire [CORE_TAG_X_WIDTH-1:0] curr_bank_core_req_tag;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
|
||||
wire curr_bank_core_req_ready;
|
||||
|
||||
wire curr_bank_core_rsp_valid;
|
||||
wire [NUM_PORTS-1:0] curr_bank_core_rsp_pmask;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_rsp_data;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_rsp_tid;
|
||||
wire [CORE_TAG_X_WIDTH-1:0] curr_bank_core_rsp_tag;
|
||||
wire [NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] curr_bank_core_rsp_tag;
|
||||
wire curr_bank_core_rsp_ready;
|
||||
|
||||
wire curr_bank_mem_req_valid;
|
||||
wire curr_bank_mem_req_rw;
|
||||
wire [CACHE_LINE_SIZE-1:0] curr_bank_mem_req_byteen;
|
||||
wire [NUM_PORTS-1:0] curr_bank_mem_req_pmask;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] curr_bank_mem_req_byteen;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] curr_bank_mem_req_wsel;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] curr_bank_mem_req_id;
|
||||
wire[`CACHE_LINE_WIDTH-1:0] curr_bank_mem_req_data;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_mem_req_data;
|
||||
wire curr_bank_mem_req_ready;
|
||||
|
||||
wire curr_bank_mem_rsp_valid;
|
||||
|
@ -414,7 +481,9 @@ module VX_cache #(
|
|||
// Memory request
|
||||
assign per_bank_mem_req_valid[i] = curr_bank_mem_req_valid;
|
||||
assign per_bank_mem_req_rw[i] = curr_bank_mem_req_rw;
|
||||
assign per_bank_mem_req_pmask[i] = curr_bank_mem_req_pmask;
|
||||
assign per_bank_mem_req_byteen[i] = curr_bank_mem_req_byteen;
|
||||
assign per_bank_mem_req_wsel[i] = curr_bank_mem_req_wsel;
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign per_bank_mem_req_addr[i] = curr_bank_mem_req_addr;
|
||||
end else begin
|
||||
|
@ -435,6 +504,8 @@ module VX_cache #(
|
|||
assign curr_bank_mem_rsp_id = `MEM_TAG_TO_REQ_ID(mem_rsp_tag_qual);
|
||||
assign curr_bank_mem_rsp_data = mem_rsp_data_qual;
|
||||
assign per_bank_mem_rsp_ready[i] = curr_bank_mem_rsp_ready;
|
||||
|
||||
`RESET_RELAY (bank_reset);
|
||||
|
||||
VX_bank #(
|
||||
.BANK_ID (i),
|
||||
|
@ -457,7 +528,7 @@ module VX_cache #(
|
|||
`SCOPE_BIND_VX_cache_bank(i)
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (bank_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_read_misses (perf_read_miss_per_bank[i]),
|
||||
|
@ -489,7 +560,9 @@ module VX_cache #(
|
|||
// Memory request
|
||||
.mem_req_valid (curr_bank_mem_req_valid),
|
||||
.mem_req_rw (curr_bank_mem_req_rw),
|
||||
.mem_req_pmask (curr_bank_mem_req_pmask),
|
||||
.mem_req_byteen (curr_bank_mem_req_byteen),
|
||||
.mem_req_wsel (curr_bank_mem_req_wsel),
|
||||
.mem_req_addr (curr_bank_mem_req_addr),
|
||||
.mem_req_id (curr_bank_mem_req_id),
|
||||
.mem_req_data (curr_bank_mem_req_data),
|
||||
|
@ -532,25 +605,27 @@ module VX_cache #(
|
|||
.core_rsp_ready (core_rsp_ready_nc)
|
||||
);
|
||||
|
||||
wire [NUM_BANKS-1:0][(MEM_TAG_IN_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in;
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_id[i], per_bank_mem_req_rw[i], per_bank_mem_req_byteen[i], per_bank_mem_req_data[i]};
|
||||
wire [NUM_BANKS-1:0][(MEM_TAG_IN_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH))-1:0] data_in;
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_id[i], per_bank_mem_req_rw[i], per_bank_mem_req_pmask[i], per_bank_mem_req_byteen[i], per_bank_mem_req_wsel[i], per_bank_mem_req_data[i]};
|
||||
end
|
||||
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mem_req_id;
|
||||
|
||||
`RESET_RELAY (mreq_reset);
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (NUM_BANKS),
|
||||
.DATAW (`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH),
|
||||
.DATAW (`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)),
|
||||
.BUFFERED (1)
|
||||
) mem_req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (mreq_reset),
|
||||
.valid_in (per_bank_mem_req_valid),
|
||||
.data_in (data_in),
|
||||
.ready_in (per_bank_mem_req_ready),
|
||||
.valid_out (mem_req_valid_nc),
|
||||
.data_out ({mem_req_addr_nc, mem_req_id, mem_req_rw_nc, mem_req_byteen_nc, mem_req_data_nc}),
|
||||
.data_out ({mem_req_addr_nc, mem_req_id, mem_req_rw_nc, mem_req_pmask_nc, mem_req_byteen_nc, mem_req_wsel_nc, mem_req_data_nc}),
|
||||
.ready_out (mem_req_ready_nc)
|
||||
);
|
||||
|
||||
|
|
6
hw/rtl/cache/VX_cache_define.vh
vendored
6
hw/rtl/cache/VX_cache_define.vh
vendored
|
@ -9,8 +9,10 @@
|
|||
|
||||
`define REQS_BITS `LOG2UP(NUM_REQS)
|
||||
|
||||
// tag valid tid word_sel
|
||||
`define MSHR_DATA_WIDTH (CORE_TAG_WIDTH + (1 + `REQS_BITS + `UP(`WORD_SELECT_BITS)) * NUM_PORTS)
|
||||
`define PORTS_BITS `LOG2UP(NUM_PORTS)
|
||||
|
||||
// tag valid tid word_sel
|
||||
`define MSHR_DATA_WIDTH ((CORE_TAG_WIDTH + 1 + `REQS_BITS + `UP(`WORD_SELECT_BITS)) * NUM_PORTS)
|
||||
|
||||
`define WORD_WIDTH (8 * WORD_SIZE)
|
||||
|
||||
|
|
31
hw/rtl/cache/VX_core_req_bank_sel.v
vendored
31
hw/rtl/cache/VX_core_req_bank_sel.v
vendored
|
@ -43,7 +43,7 @@ module VX_core_req_bank_sel #(
|
|||
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen,
|
||||
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data,
|
||||
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid,
|
||||
output wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag,
|
||||
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag,
|
||||
input wire [`BANK_READY_COUNT-1:0] per_bank_core_req_ready
|
||||
);
|
||||
`UNUSED_PARAM (CACHE_ID)
|
||||
|
@ -80,9 +80,9 @@ module VX_core_req_bank_sel #(
|
|||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_r;
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_r;
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_r;
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r;
|
||||
reg [NUM_BANKS-1:0] per_bank_core_req_rw_r;
|
||||
reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_r;
|
||||
reg [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r;
|
||||
reg [NUM_REQS-1:0] core_req_ready_r;
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
@ -101,7 +101,7 @@ module VX_core_req_bank_sel #(
|
|||
end
|
||||
end
|
||||
|
||||
for (genvar i = NUM_REQS-1; i >= 0; --i) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_line_match[i] = (core_req_line_addr[i] == per_bank_line_addr_r[core_req_bid[i]]);
|
||||
end
|
||||
|
||||
|
@ -129,10 +129,9 @@ module VX_core_req_bank_sel #(
|
|||
per_bank_core_req_byteen_r[core_req_bid[i]][i % NUM_PORTS] = core_req_byteen[i];
|
||||
per_bank_core_req_data_r[core_req_bid[i]][i % NUM_PORTS] = core_req_data[i];
|
||||
per_bank_core_req_tid_r[core_req_bid[i]][i % NUM_PORTS] = `REQS_BITS'(i);
|
||||
per_bank_core_req_tag_r[core_req_bid[i]][i % NUM_PORTS] = core_req_tag[i];
|
||||
per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i];
|
||||
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
|
||||
per_bank_core_req_tag_r[core_req_bid[i]] = core_req_tag[i];
|
||||
|
||||
req_select_table_r[core_req_bid[i]][i % NUM_PORTS] = (1 << i);
|
||||
end
|
||||
end
|
||||
|
@ -177,31 +176,25 @@ module VX_core_req_bank_sel #(
|
|||
per_bank_core_req_byteen_r[core_req_bid[i]][i % NUM_PORTS] = core_req_byteen[i];
|
||||
per_bank_core_req_data_r[core_req_bid[i]][i % NUM_PORTS] = core_req_data[i];
|
||||
per_bank_core_req_tid_r[core_req_bid[i]][i % NUM_PORTS] = `REQS_BITS'(i);
|
||||
per_bank_core_req_tag_r[core_req_bid[i]][i % NUM_PORTS] = core_req_tag[i];
|
||||
per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i];
|
||||
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
|
||||
per_bank_core_req_tag_r[core_req_bid[i]] = core_req_tag[i];
|
||||
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (SHARED_BANK_READY == 0) begin
|
||||
always @(*) begin
|
||||
core_req_ready_r = 'x;
|
||||
for (integer i = NUM_REQS-1; i >= 0; --i) begin
|
||||
if (core_req_valid[i]) begin
|
||||
core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]]
|
||||
&& core_req_line_match[i];
|
||||
end
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]]
|
||||
&& core_req_line_match[i];
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
core_req_ready_r = 'x;
|
||||
for (integer i = NUM_REQS-1; i >= 0; --i) begin
|
||||
if (core_req_valid[i]) begin
|
||||
core_req_ready_r[i] = per_bank_core_req_ready
|
||||
&& core_req_line_match[i];
|
||||
end
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
core_req_ready_r[i] = per_bank_core_req_ready
|
||||
&& core_req_line_match[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
234
hw/rtl/cache/VX_core_rsp_merge.v
vendored
234
hw/rtl/cache/VX_core_rsp_merge.v
vendored
|
@ -24,7 +24,7 @@ module VX_core_rsp_merge #(
|
|||
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_pmask,
|
||||
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data,
|
||||
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid,
|
||||
input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag,
|
||||
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag,
|
||||
output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready,
|
||||
|
||||
// Core Response
|
||||
|
@ -40,7 +40,7 @@ module VX_core_rsp_merge #(
|
|||
|
||||
reg [NUM_REQS-1:0] core_rsp_valid_unqual;
|
||||
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
|
||||
reg [NUM_BANKS-1:0] core_rsp_bank_select;
|
||||
reg [NUM_BANKS-1:0] per_bank_core_rsp_ready_r;
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
|
||||
|
@ -51,61 +51,101 @@ module VX_core_rsp_merge #(
|
|||
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
|
||||
wire core_rsp_ready_unqual;
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_tag_unqual = 'x;
|
||||
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
||||
if (per_bank_core_rsp_valid[i]) begin
|
||||
core_rsp_tag_unqual = per_bank_core_rsp_tag[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (NUM_PORTS > 1) begin
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_data_unqual = 'x;
|
||||
core_rsp_bank_select = 0;
|
||||
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
for (integer p = 0; p < NUM_PORTS; p++) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& per_bank_core_rsp_pmask[i][p]
|
||||
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i][p]] = 1;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_data[i][p];
|
||||
core_rsp_bank_select[i] = core_rsp_ready_unqual;
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_r, per_bank_core_rsp_sent;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_n;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign per_bank_core_rsp_sent_n[i] = per_bank_core_rsp_sent_r[i] | per_bank_core_rsp_sent[i];
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
per_bank_core_rsp_sent_r <= '0;
|
||||
end else begin
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]) begin
|
||||
per_bank_core_rsp_sent_r[i] <= '0;
|
||||
end else begin
|
||||
per_bank_core_rsp_sent_r[i] <= per_bank_core_rsp_sent_n[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_tag_unqual = 'x;
|
||||
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
||||
for (integer p = 0; p < NUM_PORTS; ++p) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& per_bank_core_rsp_pmask[i][p]
|
||||
&& !per_bank_core_rsp_sent_r[i][p]) begin
|
||||
core_rsp_tag_unqual = per_bank_core_rsp_tag[i][p];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_data_unqual = 'x;
|
||||
per_bank_core_rsp_sent = 0;
|
||||
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
for (integer p = 0; p < NUM_PORTS; ++p) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& per_bank_core_rsp_pmask[i][p]
|
||||
&& !per_bank_core_rsp_sent_r[i][p]
|
||||
&& (per_bank_core_rsp_tag[i][p][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i][p]] = 1;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_data[i][p];
|
||||
per_bank_core_rsp_sent[i][p] = core_rsp_ready_unqual;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
per_bank_core_rsp_ready_r[i] = (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]);
|
||||
end
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (per_bank_core_rsp_pmask)
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_data_unqual = 'x;
|
||||
core_rsp_bank_select = 0;
|
||||
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
|
||||
core_rsp_bank_select[i] = core_rsp_ready_unqual;
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_tag_unqual = 'x;
|
||||
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
||||
if (per_bank_core_rsp_valid[i]) begin
|
||||
core_rsp_tag_unqual = per_bank_core_rsp_tag[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_data_unqual = 'x;
|
||||
per_bank_core_rsp_ready_r = 0;
|
||||
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& (per_bank_core_rsp_tag[i][0][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
|
||||
per_bank_core_rsp_ready_r[i] = core_rsp_ready_unqual;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH))
|
||||
) pipe_reg (
|
||||
) skid_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_rsp_valid_any),
|
||||
|
@ -118,40 +158,102 @@ module VX_core_rsp_merge #(
|
|||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (per_bank_core_rsp_pmask)
|
||||
|
||||
reg [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
|
||||
reg [NUM_REQS-1:0][NUM_BANKS-1:0] bank_select_table;
|
||||
|
||||
wire [NUM_REQS-1:0] core_rsp_ready_unqual;
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_tag_unqual = 'x;
|
||||
core_rsp_data_unqual = 'x;
|
||||
bank_select_table = 'x;
|
||||
|
||||
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
||||
if (per_bank_core_rsp_valid[i]) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
|
||||
core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
|
||||
bank_select_table[per_bank_core_rsp_tid[i]] = (1 << i);
|
||||
end
|
||||
end
|
||||
end
|
||||
if (NUM_PORTS > 1) begin
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
core_rsp_bank_select[i] = core_rsp_ready_unqual[per_bank_core_rsp_tid[i]]
|
||||
&& bank_select_table[per_bank_core_rsp_tid[i]][i];
|
||||
end
|
||||
reg [NUM_REQS-1:0][(`PORTS_BITS + `BANK_SELECT_BITS)-1:0] bank_select_table;
|
||||
|
||||
reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_r, per_bank_core_rsp_sent;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_n;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign per_bank_core_rsp_sent_n[i] = per_bank_core_rsp_sent_r[i] | per_bank_core_rsp_sent[i];
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
per_bank_core_rsp_sent_r <= '0;
|
||||
end else begin
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]) begin
|
||||
per_bank_core_rsp_sent_r[i] <= '0;
|
||||
end else begin
|
||||
per_bank_core_rsp_sent_r[i] <= per_bank_core_rsp_sent_n[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = '0;
|
||||
core_rsp_tag_unqual = 'x;
|
||||
core_rsp_data_unqual = 'x;
|
||||
bank_select_table = 'x;
|
||||
|
||||
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
||||
for (integer p = 0; p < NUM_PORTS; ++p) begin
|
||||
if (per_bank_core_rsp_valid[i]
|
||||
&& per_bank_core_rsp_pmask[i][p]
|
||||
&& !per_bank_core_rsp_sent_r[i][p]) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i][p]] = 1;
|
||||
core_rsp_tag_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_tag[i][p];
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_data[i][p];
|
||||
bank_select_table[per_bank_core_rsp_tid[i][p]] = {`PORTS_BITS'(p), `BANK_SELECT_BITS'(i)};
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
per_bank_core_rsp_sent = '0;
|
||||
for (integer i = 0; i < NUM_REQS; i++) begin
|
||||
if (core_rsp_valid_unqual[i]) begin
|
||||
per_bank_core_rsp_sent[bank_select_table[i][0 +: `BANK_SELECT_BITS]][bank_select_table[i][`BANK_SELECT_BITS +: `PORTS_BITS]] = core_rsp_ready_unqual[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
per_bank_core_rsp_ready_r[i] = (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]);
|
||||
end
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (per_bank_core_rsp_pmask)
|
||||
reg [NUM_REQS-1:0][NUM_BANKS-1:0] bank_select_table;
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_tag_unqual = 'x;
|
||||
core_rsp_data_unqual = 'x;
|
||||
bank_select_table = 'x;
|
||||
|
||||
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
||||
if (per_bank_core_rsp_valid[i]) begin
|
||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
|
||||
core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
|
||||
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
|
||||
bank_select_table[per_bank_core_rsp_tid[i]] = (1 << i);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
per_bank_core_rsp_ready_r[i] = core_rsp_ready_unqual[per_bank_core_rsp_tid[i]]
|
||||
&& bank_select_table[per_bank_core_rsp_tid[i]][i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH)
|
||||
) pipe_reg (
|
||||
) skid_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_rsp_valid_unqual[i]),
|
||||
|
@ -167,9 +269,7 @@ module VX_core_rsp_merge #(
|
|||
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
assign per_bank_core_rsp_ready[i] = core_rsp_bank_select[i];
|
||||
end
|
||||
assign per_bank_core_rsp_ready = per_bank_core_rsp_ready_r;
|
||||
|
||||
end else begin
|
||||
|
||||
|
|
42
hw/rtl/cache/VX_data_access.v
vendored
42
hw/rtl/cache/VX_data_access.v
vendored
|
@ -32,13 +32,14 @@ module VX_data_access #(
|
|||
|
||||
// reading
|
||||
input wire readen,
|
||||
output wire [`CACHE_LINE_WIDTH-1:0] rdata,
|
||||
output wire [`CACHE_LINE_WIDTH-1:0] read_data,
|
||||
|
||||
// writing
|
||||
input wire writeen,
|
||||
input wire is_fill,
|
||||
input wire [CACHE_LINE_SIZE-1:0] byteen,
|
||||
input wire [`CACHE_LINE_WIDTH-1:0] wdata
|
||||
input wire [`CACHE_LINE_WIDTH-1:0] write_data,
|
||||
input wire [`CACHE_LINE_WIDTH-1:0] fill_data
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CACHE_ID)
|
||||
|
@ -50,29 +51,34 @@ module VX_data_access #(
|
|||
localparam BYTEENW = WRITE_ENABLE ? CACHE_LINE_SIZE : 1;
|
||||
|
||||
wire [`LINE_SELECT_BITS-1:0] line_addr;
|
||||
wire [BYTEENW-1:0] byte_enable;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] wdata;
|
||||
wire [BYTEENW-1:0] wren;
|
||||
|
||||
assign line_addr = addr[`LINE_SELECT_BITS-1:0];
|
||||
|
||||
if (WRITE_ENABLE) begin
|
||||
assign byte_enable = is_fill ? {BYTEENW{1'b1}} : byteen;
|
||||
assign wren = is_fill ? {BYTEENW{writeen}} : (byteen & {BYTEENW{writeen}});
|
||||
assign wdata = is_fill ? fill_data : write_data;
|
||||
end else begin
|
||||
`UNUSED_VAR (byteen)
|
||||
`UNUSED_VAR (is_fill)
|
||||
assign byte_enable = 1'b1;
|
||||
`UNUSED_VAR (byteen)
|
||||
`UNUSED_VAR (write_data)
|
||||
assign wren = writeen;
|
||||
assign wdata = fill_data;
|
||||
end
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (CACHE_LINE_SIZE * 8),
|
||||
.SIZE (`LINES_PER_BANK),
|
||||
.BYTEENW (BYTEENW),
|
||||
.RWCHECK (1)
|
||||
.DATAW (CACHE_LINE_SIZE * 8),
|
||||
.SIZE (`LINES_PER_BANK),
|
||||
.BYTEENW (BYTEENW),
|
||||
.NO_RWCHECK (1)
|
||||
) data_store (
|
||||
.clk(clk),
|
||||
.addr(line_addr),
|
||||
.wren({BYTEENW{writeen}} & byte_enable),
|
||||
.din(wdata),
|
||||
.dout(rdata)
|
||||
.clk (clk),
|
||||
.addr (line_addr),
|
||||
.wren (wren),
|
||||
.wdata (wdata),
|
||||
.rden (1'b1),
|
||||
.rdata (read_data)
|
||||
);
|
||||
|
||||
`UNUSED_VAR (stall)
|
||||
|
@ -81,13 +87,13 @@ module VX_data_access #(
|
|||
always @(posedge clk) begin
|
||||
if (writeen && ~stall) begin
|
||||
if (is_fill) begin
|
||||
dpi_trace("%d: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, wdata);
|
||||
dpi_trace("%d: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, fill_data);
|
||||
end else begin
|
||||
dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, byte_enable, line_addr, wdata);
|
||||
dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, wren, line_addr, write_data);
|
||||
end
|
||||
end
|
||||
if (readen && ~stall) begin
|
||||
dpi_trace("%d: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, rdata);
|
||||
dpi_trace("%d: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, read_data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
46
hw/rtl/cache/VX_miss_resrv.v
vendored
46
hw/rtl/cache/VX_miss_resrv.v
vendored
|
@ -102,22 +102,20 @@ module VX_miss_resrv #(
|
|||
end
|
||||
end
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N (MSHR_SIZE)
|
||||
) dequeue_pe (
|
||||
.data_in (valid_table_x & ready_table_x),
|
||||
.index (dequeue_id_x),
|
||||
.valid_out (dequeue_val_x),
|
||||
`UNUSED_PIN (onehot)
|
||||
VX_lzc #(
|
||||
.WIDTH (MSHR_SIZE)
|
||||
) dequeue_sel (
|
||||
.in_i (valid_table_x & ready_table_x),
|
||||
.cnt_o (dequeue_id_x),
|
||||
.valid_o (dequeue_val_x)
|
||||
);
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N (MSHR_SIZE)
|
||||
) allocate_pe (
|
||||
.data_in (~valid_table_n),
|
||||
.index (allocate_id_n),
|
||||
.valid_out (allocate_rdy_n),
|
||||
`UNUSED_PIN (onehot)
|
||||
VX_lzc #(
|
||||
.WIDTH (MSHR_SIZE)
|
||||
) allocate_sel (
|
||||
.in_i (~valid_table_n),
|
||||
.cnt_o (allocate_id_n),
|
||||
.valid_o (allocate_rdy_n)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
|
@ -171,17 +169,17 @@ module VX_miss_resrv #(
|
|||
`LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id))
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (`MSHR_DATA_WIDTH),
|
||||
.SIZE (MSHR_SIZE),
|
||||
.RWCHECK (1),
|
||||
.FASTRAM (1)
|
||||
.DATAW (`MSHR_DATA_WIDTH),
|
||||
.SIZE (MSHR_SIZE),
|
||||
.LUTRAM (1)
|
||||
) entries (
|
||||
.clk (clk),
|
||||
.waddr (allocate_id_r),
|
||||
.raddr (dequeue_id_r),
|
||||
.wren (allocate_valid),
|
||||
.din (allocate_data),
|
||||
.dout (dequeue_data)
|
||||
.clk (clk),
|
||||
.waddr (allocate_id_r),
|
||||
.raddr (dequeue_id_r),
|
||||
.wren (allocate_valid),
|
||||
.wdata (allocate_data),
|
||||
.rden (1'b1),
|
||||
.rdata (dequeue_data)
|
||||
);
|
||||
|
||||
assign allocate_ready = allocate_rdy_r;
|
||||
|
|
151
hw/rtl/cache/VX_nc_bypass.v
vendored
151
hw/rtl/cache/VX_nc_bypass.v
vendored
|
@ -1,6 +1,7 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_nc_bypass #(
|
||||
parameter NUM_PORTS = 1,
|
||||
parameter NUM_REQS = 1,
|
||||
parameter NUM_RSP_TAGS = 0,
|
||||
parameter NC_TAG_BIT = 0,
|
||||
|
@ -10,13 +11,14 @@ module VX_nc_bypass #(
|
|||
parameter CORE_TAG_IN_WIDTH = 1,
|
||||
|
||||
parameter MEM_ADDR_WIDTH = 1,
|
||||
parameter MEM_DATA_SIZE = 1,
|
||||
parameter MEM_DATA_SIZE = 1,
|
||||
parameter MEM_TAG_IN_WIDTH = 1,
|
||||
parameter MEM_TAG_OUT_WIDTH = 1,
|
||||
|
||||
localparam CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
|
||||
localparam MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
|
||||
localparam CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1
|
||||
localparam CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
|
||||
localparam MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
|
||||
localparam CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1,
|
||||
localparam MEM_SELECT_BITS = `UP(`CLOG2(MEM_DATA_SIZE / CORE_DATA_SIZE))
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -57,8 +59,10 @@ module VX_nc_bypass #(
|
|||
input wire mem_req_valid_in,
|
||||
input wire mem_req_rw_in,
|
||||
input wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_in,
|
||||
input wire [MEM_DATA_SIZE-1:0] mem_req_byteen_in,
|
||||
input wire [MEM_DATA_WIDTH-1:0] mem_req_data_in,
|
||||
input wire [NUM_PORTS-1:0] mem_req_pmask_in,
|
||||
input wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in,
|
||||
input wire [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_in,
|
||||
input wire [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in,
|
||||
input wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_in,
|
||||
output wire mem_req_ready_in,
|
||||
|
||||
|
@ -66,8 +70,10 @@ module VX_nc_bypass #(
|
|||
output wire mem_req_valid_out,
|
||||
output wire mem_req_rw_out,
|
||||
output wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_out,
|
||||
output wire [MEM_DATA_SIZE-1:0] mem_req_byteen_out,
|
||||
output wire [MEM_DATA_WIDTH-1:0] mem_req_data_out,
|
||||
output wire [NUM_PORTS-1:0] mem_req_pmask_out,
|
||||
output wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_out,
|
||||
output wire [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_out,
|
||||
output wire [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_out,
|
||||
output wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_tag_out,
|
||||
input wire mem_req_ready_out,
|
||||
|
||||
|
@ -99,7 +105,6 @@ module VX_nc_bypass #(
|
|||
// core request handling
|
||||
|
||||
wire [NUM_REQS-1:0] core_req_valid_in_nc;
|
||||
wire [NUM_REQS-1:0] core_req_nc_sel;
|
||||
wire [NUM_REQS-1:0] core_req_nc_tids;
|
||||
wire [`UP(CORE_REQ_TIDW)-1:0] core_req_nc_tid;
|
||||
wire core_req_nc_valid;
|
||||
|
@ -110,13 +115,12 @@ module VX_nc_bypass #(
|
|||
|
||||
assign core_req_valid_in_nc = core_req_valid_in & core_req_nc_tids;
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N (NUM_REQS)
|
||||
VX_lzc #(
|
||||
.WIDTH (NUM_REQS)
|
||||
) core_req_sel (
|
||||
.data_in (core_req_valid_in_nc),
|
||||
.index (core_req_nc_tid),
|
||||
.onehot (core_req_nc_sel),
|
||||
.valid_out (core_req_nc_valid)
|
||||
.in_i (core_req_valid_in_nc),
|
||||
.cnt_o (core_req_nc_tid),
|
||||
.valid_o (core_req_nc_valid)
|
||||
);
|
||||
|
||||
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_tids;
|
||||
|
@ -139,10 +143,9 @@ module VX_nc_bypass #(
|
|||
if (NUM_REQS > 1) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ?
|
||||
(~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i]) : core_req_ready_out[i];
|
||||
(~mem_req_valid_in && mem_req_ready_out && (core_req_nc_tid == i)) : core_req_ready_out[i];
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (core_req_nc_sel)
|
||||
assign core_req_ready_in = core_req_valid_in_nc ? (~mem_req_valid_in && mem_req_ready_out) : core_req_ready_out;
|
||||
end
|
||||
|
||||
|
@ -151,7 +154,7 @@ module VX_nc_bypass #(
|
|||
assign mem_req_valid_out = mem_req_valid_in || core_req_nc_valid;
|
||||
assign mem_req_ready_in = mem_req_ready_out;
|
||||
|
||||
wire [(MEM_TAG_IN_WIDTH+1)-1:0] mem_req_tag_in_nc;
|
||||
wire [(MEM_TAG_IN_WIDTH+1)-1:0] mem_req_tag_in_c;
|
||||
|
||||
VX_bits_insert #(
|
||||
.N (MEM_TAG_IN_WIDTH),
|
||||
|
@ -160,81 +163,69 @@ module VX_nc_bypass #(
|
|||
) mem_req_tag_insert (
|
||||
.data_in (mem_req_tag_in),
|
||||
.sel_in ('0),
|
||||
.data_out (mem_req_tag_in_nc)
|
||||
.data_out (mem_req_tag_in_c)
|
||||
);
|
||||
|
||||
wire [CORE_TAG_IN_WIDTH-1:0] core_req_tag_in_sel;
|
||||
wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel;
|
||||
wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel;
|
||||
wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel;
|
||||
wire core_req_rw_in_sel;
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
wire [CORE_TAG_IN_WIDTH-1:0] core_req_tag_in_sel;
|
||||
wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel;
|
||||
wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel;
|
||||
wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel;
|
||||
wire core_req_rw_in_sel;
|
||||
|
||||
wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_nc_mux_in[i] = {core_req_tag_in[i], core_req_data_in[i], core_req_byteen_in[i], core_req_addr_in[i], core_req_rw_in[i]};
|
||||
end
|
||||
|
||||
VX_onehot_mux #(
|
||||
.DATAW (MUX_DATAW),
|
||||
.N (NUM_REQS)
|
||||
) core_req_nc_mux (
|
||||
.data_in (core_req_nc_mux_in),
|
||||
.sel_in (core_req_nc_sel),
|
||||
.data_out ({core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel})
|
||||
);
|
||||
|
||||
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
|
||||
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[D +: MEM_ADDR_WIDTH];
|
||||
|
||||
for (genvar i = 0; i < P; ++i) begin
|
||||
assign mem_req_data_out[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = mem_req_valid_in ?
|
||||
mem_req_data_in[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_req_data_in_sel;
|
||||
end
|
||||
|
||||
if (D != 0) begin
|
||||
wire [D-1:0] req_addr_idx = core_req_addr_in_sel[D-1:0];
|
||||
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_in_r;
|
||||
always @(*) begin
|
||||
mem_req_byteen_in_r = 0;
|
||||
mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in_sel;
|
||||
end
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel});
|
||||
end else begin
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, core_req_tag_in_sel});
|
||||
end
|
||||
assign {core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel} = core_req_nc_mux_in[core_req_nc_tid];
|
||||
end else begin
|
||||
`UNUSED_VAR (core_req_nc_tid)
|
||||
assign core_req_tag_in_sel = core_req_tag_in;
|
||||
assign core_req_data_in_sel = core_req_data_in;
|
||||
assign core_req_byteen_in_sel = core_req_byteen_in;
|
||||
assign core_req_addr_in_sel = core_req_addr_in;
|
||||
assign core_req_rw_in_sel = core_req_rw_in;
|
||||
end
|
||||
|
||||
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
|
||||
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[D +: MEM_ADDR_WIDTH];
|
||||
|
||||
if (D != 0) begin
|
||||
reg [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in_r;
|
||||
reg [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_in_r;
|
||||
reg [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r;
|
||||
|
||||
wire [D-1:0] req_addr_idx = core_req_addr_in_sel[D-1:0];
|
||||
|
||||
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in;
|
||||
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in[0][D +: MEM_ADDR_WIDTH];
|
||||
always @(*) begin
|
||||
mem_req_byteen_in_r = 0;
|
||||
mem_req_byteen_in_r[0] = core_req_byteen_in_sel;
|
||||
|
||||
for (genvar i = 0; i < P; ++i) begin
|
||||
assign mem_req_data_out[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = mem_req_valid_in ?
|
||||
mem_req_data_in[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_req_data_in;
|
||||
mem_req_wsel_in_r = 'x;
|
||||
mem_req_wsel_in_r[0] = req_addr_idx;
|
||||
|
||||
mem_req_data_in_r = 'x;
|
||||
mem_req_data_in_r[0] = core_req_data_in_sel;
|
||||
end
|
||||
|
||||
if (D != 0) begin
|
||||
wire [D-1:0] req_addr_idx = core_req_addr_in[0][D-1:0];
|
||||
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_in_r;
|
||||
always @(*) begin
|
||||
mem_req_byteen_in_r = 0;
|
||||
mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in;
|
||||
end
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'({req_addr_idx, core_req_tag_in});
|
||||
end else begin
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'(core_req_tag_in);
|
||||
end
|
||||
assign mem_req_pmask_out = mem_req_valid_in ? mem_req_pmask_in : NUM_PORTS'(1'b1);
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
|
||||
assign mem_req_wsel_out = mem_req_valid_in ? mem_req_wsel_in : mem_req_wsel_in_r;
|
||||
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : mem_req_data_in_r;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_c) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel});
|
||||
end else begin
|
||||
`UNUSED_VAR (mem_req_wsel_in)
|
||||
`UNUSED_VAR (mem_req_pmask_in)
|
||||
assign mem_req_pmask_out = 0;
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
|
||||
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : core_req_data_in_sel;
|
||||
assign mem_req_wsel_out = 0;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_c) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, core_req_tag_in_sel});
|
||||
end
|
||||
|
||||
// core response handling
|
||||
|
||||
wire [NUM_RSP_TAGS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out_unqual;
|
||||
wire [NUM_RSP_TAGS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out_c;
|
||||
|
||||
wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT];
|
||||
|
||||
|
@ -246,7 +237,7 @@ module VX_nc_bypass #(
|
|||
) core_rsp_tag_insert (
|
||||
.data_in (core_rsp_tag_in[i]),
|
||||
.sel_in ('0),
|
||||
.data_out (core_rsp_tag_out_unqual[i])
|
||||
.data_out (core_rsp_tag_out_c[i])
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -272,14 +263,14 @@ module VX_nc_bypass #(
|
|||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_data_out[i] = core_rsp_valid_in[i] ? core_rsp_data_in[i] : mem_rsp_data_in;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_out_unqual[i] : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
|
||||
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_out_c[i] : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
|
||||
end
|
||||
end else begin
|
||||
assign core_rsp_valid_out = core_rsp_valid_in || is_mem_rsp_nc;
|
||||
assign core_rsp_tag_out = core_rsp_valid_in ? core_rsp_tag_out_unqual : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
|
||||
assign core_rsp_tag_out = core_rsp_valid_in ? core_rsp_tag_out_c : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
|
||||
assign core_rsp_ready_in = core_rsp_ready_out;
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
|
37
hw/rtl/cache/VX_shared_mem.v
vendored
37
hw/rtl/cache/VX_shared_mem.v
vendored
|
@ -171,16 +171,17 @@ module VX_shared_mem #(
|
|||
&& creq_out_fire;
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`WORD_WIDTH),
|
||||
.SIZE (`LINES_PER_BANK),
|
||||
.BYTEENW (WORD_SIZE),
|
||||
.RWCHECK (1)
|
||||
.DATAW (`WORD_WIDTH),
|
||||
.SIZE (`LINES_PER_BANK),
|
||||
.BYTEENW (WORD_SIZE),
|
||||
.NO_RWCHECK (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.addr (per_bank_core_req_addr[i]),
|
||||
.wren ({WORD_SIZE{wren}} & per_bank_core_req_byteen[i]),
|
||||
.din (per_bank_core_req_data[i]),
|
||||
.dout (per_bank_core_rsp_data[i])
|
||||
.clk (clk),
|
||||
.addr (per_bank_core_req_addr[i]),
|
||||
.wren ({WORD_SIZE{wren}} & per_bank_core_req_byteen[i]),
|
||||
.wdata (per_bank_core_req_data[i]),
|
||||
.rden (1'b1),
|
||||
.rdata (per_bank_core_rsp_data[i])
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -216,18 +217,19 @@ module VX_shared_mem #(
|
|||
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in;
|
||||
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_in;
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valids_in = 0;
|
||||
core_rsp_data_in = 'x;
|
||||
core_rsp_tag_in = 'x;
|
||||
bank_rsp_sel_cur = 0;
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_tag_in = 'x;
|
||||
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
||||
if (per_bank_req_reads[i] && ~bank_rsp_sel_prv[i]) begin
|
||||
core_rsp_tag_in = per_bank_core_req_tag[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valids_in = 0;
|
||||
core_rsp_data_in = 'x;
|
||||
bank_rsp_sel_cur = 0;
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
if (per_bank_core_req_valid[i]
|
||||
&& (core_rsp_tag_in[CORE_TAG_ID_BITS-1:0] == per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin
|
||||
|
@ -278,13 +280,16 @@ module VX_shared_mem #(
|
|||
reg [CORE_TAG_WIDTH-1:0] core_req_tag_sel;
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
always @(*) begin
|
||||
always @(*) begin
|
||||
core_req_tag_sel ='x;
|
||||
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
|
||||
if (per_bank_core_req_valid[i]) begin
|
||||
core_req_tag_sel = per_bank_core_req_tag[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
is_multi_tag_req = 0;
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_req_valid[i]
|
||||
|
|
18
hw/rtl/cache/VX_tag_access.v
vendored
18
hw/rtl/cache/VX_tag_access.v
vendored
|
@ -46,16 +46,16 @@ module VX_tag_access #(
|
|||
wire [`LINE_SELECT_BITS-1:0] line_addr = addr [`LINE_SELECT_BITS-1:0];
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW(`TAG_SELECT_BITS + 1),
|
||||
.SIZE(`LINES_PER_BANK),
|
||||
.INITZERO(1),
|
||||
.RWCHECK(1)
|
||||
.DATAW (`TAG_SELECT_BITS + 1),
|
||||
.SIZE (`LINES_PER_BANK),
|
||||
.NO_RWCHECK (1)
|
||||
) tag_store (
|
||||
.clk(clk),
|
||||
.addr(line_addr),
|
||||
.wren(fill),
|
||||
.din({!is_flush, line_tag}),
|
||||
.dout({read_valid, read_tag})
|
||||
.clk( clk),
|
||||
.addr (line_addr),
|
||||
.wren (fill),
|
||||
.wdata ({!is_flush, line_tag}),
|
||||
.rden (1'b1),
|
||||
.rdata ({read_valid, read_tag})
|
||||
);
|
||||
|
||||
assign tag_match = read_valid && (line_tag == read_tag);
|
||||
|
|
|
@ -15,7 +15,7 @@ module VX_fp_cvt #(
|
|||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire is_itof,
|
||||
input wire is_signed,
|
||||
|
@ -101,7 +101,7 @@ module VX_fp_cvt #(
|
|||
wire stall;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + INT_EXP_WIDTH + INT_MAN_WIDTH)),
|
||||
.DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + INT_EXP_WIDTH + INT_MAN_WIDTH)),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
|
@ -167,7 +167,7 @@ module VX_fp_cvt #(
|
|||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp_s1;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + INT_EXP_WIDTH)),
|
||||
.DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + INT_EXP_WIDTH)),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
|
@ -253,7 +253,7 @@ module VX_fp_cvt #(
|
|||
wire [LANES-1:0] of_before_round_s2;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + 1 + `FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + INT_EXP_WIDTH + 1)),
|
||||
.DATAW (1 + TAGW + 1 + 1 + `INST_FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + INT_EXP_WIDTH + 1)),
|
||||
.RESETW (1)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
|
@ -435,7 +435,7 @@ module VX_fp_cvt #(
|
|||
assign stall = ~ready_out && valid_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + (LANES * 32) + (LANES * `FFG_BITS)),
|
||||
.DATAW (1 + TAGW + (LANES * 32) + (LANES * `FFLAGS_BITS)),
|
||||
.RESETW (1)
|
||||
) pipe_reg4 (
|
||||
.clk (clk),
|
||||
|
|
|
@ -16,7 +16,7 @@ module VX_fp_div #(
|
|||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
input wire [LANES-1:0][31:0] datab,
|
||||
|
|
|
@ -16,7 +16,7 @@ module VX_fp_fma #(
|
|||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire do_madd,
|
||||
input wire do_sub,
|
||||
|
|
|
@ -15,8 +15,8 @@ module VX_fp_ncomp #(
|
|||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op_type,
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
input wire [`INST_FPU_BITS-1:0] op_type,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
input wire [LANES-1:0][31:0] datab,
|
||||
|
@ -77,8 +77,8 @@ module VX_fp_ncomp #(
|
|||
|
||||
wire valid_in_s0;
|
||||
wire [TAGW-1:0] tag_in_s0;
|
||||
wire [`FPU_BITS-1:0] op_type_s0;
|
||||
wire [`FRM_BITS-1:0] frm_s0;
|
||||
wire [`INST_FPU_BITS-1:0] op_type_s0;
|
||||
wire [`INST_FRM_BITS-1:0] frm_s0;
|
||||
wire [LANES-1:0][31:0] dataa_s0, datab_s0;
|
||||
wire [LANES-1:0] a_sign_s0, b_sign_s0;
|
||||
wire [LANES-1:0][7:0] a_exponent_s0;
|
||||
|
@ -89,7 +89,7 @@ module VX_fp_ncomp #(
|
|||
wire stall;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + `FPU_BITS + `FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_type_t) + 1 + 1)),
|
||||
.DATAW (1 + TAGW + `INST_FPU_BITS + `INST_FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_type_t) + 1 + 1)),
|
||||
.RESETW (1),
|
||||
.DEPTH (0)
|
||||
) pipe_reg0 (
|
||||
|
@ -164,7 +164,7 @@ module VX_fp_ncomp #(
|
|||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
case (frm_s0)
|
||||
`FRM_RNE: begin // LE
|
||||
`INST_FRM_RNE: begin // LE
|
||||
fcmp_fflags[i] = 5'h0;
|
||||
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
|
||||
fcmp_res[i] = 32'h0;
|
||||
|
@ -173,7 +173,7 @@ module VX_fp_ncomp #(
|
|||
fcmp_res[i] = {31'h0, (a_smaller_s0[i] | ab_equal_s0[i])};
|
||||
end
|
||||
end
|
||||
`FRM_RTZ: begin // LS
|
||||
`INST_FRM_RTZ: begin // LS
|
||||
fcmp_fflags[i] = 5'h0;
|
||||
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
|
||||
fcmp_res[i] = 32'h0;
|
||||
|
@ -182,7 +182,7 @@ module VX_fp_ncomp #(
|
|||
fcmp_res[i] = {31'h0, (a_smaller_s0[i] & ~ab_equal_s0[i])};
|
||||
end
|
||||
end
|
||||
`FRM_RDN: begin // EQ
|
||||
`INST_FRM_RDN: begin // EQ
|
||||
fcmp_fflags[i] = 5'h0;
|
||||
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
|
||||
fcmp_res[i] = 32'h0;
|
||||
|
@ -207,11 +207,11 @@ module VX_fp_ncomp #(
|
|||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
case (op_type_s0)
|
||||
`FPU_CLASS: begin
|
||||
`INST_FPU_CLASS: begin
|
||||
tmp_result[i] = fclass_mask[i];
|
||||
tmp_fflags[i] = 'x;
|
||||
end
|
||||
`FPU_CMP: begin
|
||||
`INST_FPU_CMP: begin
|
||||
tmp_result[i] = fcmp_res[i];
|
||||
tmp_fflags[i] = fcmp_fflags[i];
|
||||
end
|
||||
|
@ -238,15 +238,15 @@ module VX_fp_ncomp #(
|
|||
end
|
||||
end
|
||||
|
||||
wire has_fflags_s0 = ((op_type_s0 == `FPU_MISC)
|
||||
&& (frm_s0 == 3 // MIN
|
||||
|| frm_s0 == 4)) // MAX
|
||||
|| (op_type_s0 == `FPU_CMP); // CMP
|
||||
wire has_fflags_s0 = ((op_type_s0 == `INST_FPU_MISC)
|
||||
&& (frm_s0 == 3 // MIN
|
||||
|| frm_s0 == 4)) // MAX
|
||||
|| (op_type_s0 == `INST_FPU_CMP); // CMP
|
||||
|
||||
assign stall = ~ready_out && valid_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS)),
|
||||
.DATAW (1 + TAGW + (LANES * 32) + 1 + (LANES * `FFLAGS_BITS)),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
|
|
|
@ -34,7 +34,7 @@ module VX_fp_rounding #(
|
|||
|
||||
always @(*) begin
|
||||
case (rnd_mode_i)
|
||||
`FRM_RNE: // Decide accoring to round/sticky bits
|
||||
`INST_FRM_RNE: // Decide accoring to round/sticky bits
|
||||
case (round_sticky_bits_i)
|
||||
2'b00,
|
||||
2'b01: round_up = 1'b0; // < ulp/2 away, round down
|
||||
|
@ -42,10 +42,10 @@ module VX_fp_rounding #(
|
|||
2'b11: round_up = 1'b1; // > ulp/2 away, round up
|
||||
default: round_up = 1'bx;
|
||||
endcase
|
||||
`FRM_RTZ: round_up = 1'b0; // always round down
|
||||
`FRM_RDN: round_up = (| round_sticky_bits_i) & sign_i; // to 0 if +, away if -
|
||||
`FRM_RUP: round_up = (| round_sticky_bits_i) & ~sign_i; // to 0 if -, away if +
|
||||
`FRM_RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up
|
||||
`INST_FRM_RTZ: round_up = 1'b0; // always round down
|
||||
`INST_FRM_RDN: round_up = (| round_sticky_bits_i) & sign_i; // to 0 if +, away if -
|
||||
`INST_FRM_RUP: round_up = (| round_sticky_bits_i) & ~sign_i; // to 0 if -, away if +
|
||||
`INST_FRM_RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up
|
||||
default: round_up = 1'bx; // propagate x
|
||||
endcase
|
||||
end
|
||||
|
@ -58,7 +58,7 @@ module VX_fp_rounding #(
|
|||
|
||||
// In case of effective subtraction (thus signs of addition operands must have differed) and a
|
||||
// true zero result, the result sign is '-' in case of RDN and '+' for other modes.
|
||||
assign sign_o = (exact_zero_o && effective_subtraction_i) ? (rnd_mode_i == `FRM_RDN)
|
||||
assign sign_o = (exact_zero_o && effective_subtraction_i) ? (rnd_mode_i == `INST_FRM_RDN)
|
||||
: sign_i;
|
||||
|
||||
endmodule
|
|
@ -16,7 +16,7 @@ module VX_fp_sqrt #(
|
|||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
output wire [LANES-1:0][31:0] result,
|
||||
|
|
|
@ -14,8 +14,8 @@ module VX_fpu_dpi #(
|
|||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op_type,
|
||||
input wire [`MOD_BITS-1:0] frm,
|
||||
input wire [`INST_FPU_BITS-1:0] op_type,
|
||||
input wire [`INST_MOD_BITS-1:0] frm,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
|
@ -76,21 +76,21 @@ module VX_fpu_dpi #(
|
|||
is_fsgnjx = 0;
|
||||
|
||||
case (op_type)
|
||||
`FPU_ADD: begin core_select = FPU_FMA; is_fadd = 1; end
|
||||
`FPU_SUB: begin core_select = FPU_FMA; is_fsub = 1; end
|
||||
`FPU_MUL: begin core_select = FPU_FMA; is_fmul = 1; end
|
||||
`FPU_MADD: begin core_select = FPU_FMA; is_fmadd = 1; end
|
||||
`FPU_MSUB: begin core_select = FPU_FMA; is_fmsub = 1; end
|
||||
`FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = 1; end
|
||||
`FPU_NMSUB: begin core_select = FPU_FMA; is_fnmsub = 1; end
|
||||
`FPU_DIV: begin core_select = FPU_DIV; end
|
||||
`FPU_SQRT: begin core_select = FPU_SQRT; end
|
||||
`FPU_CVTWS: begin core_select = FPU_CVT; is_ftoi = 1; end
|
||||
`FPU_CVTWUS:begin core_select = FPU_CVT; is_ftou = 1; end
|
||||
`FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; end
|
||||
`FPU_CVTSWU:begin core_select = FPU_CVT; is_utof = 1; end
|
||||
`FPU_CLASS: begin core_select = FPU_NCP; is_fclss = 1; end
|
||||
`FPU_CMP: begin core_select = FPU_NCP;
|
||||
`INST_FPU_ADD: begin core_select = FPU_FMA; is_fadd = 1; end
|
||||
`INST_FPU_SUB: begin core_select = FPU_FMA; is_fsub = 1; end
|
||||
`INST_FPU_MUL: begin core_select = FPU_FMA; is_fmul = 1; end
|
||||
`INST_FPU_MADD: begin core_select = FPU_FMA; is_fmadd = 1; end
|
||||
`INST_FPU_MSUB: begin core_select = FPU_FMA; is_fmsub = 1; end
|
||||
`INST_FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = 1; end
|
||||
`INST_FPU_NMSUB: begin core_select = FPU_FMA; is_fnmsub = 1; end
|
||||
`INST_FPU_DIV: begin core_select = FPU_DIV; end
|
||||
`INST_FPU_SQRT: begin core_select = FPU_SQRT; end
|
||||
`INST_FPU_CVTWS: begin core_select = FPU_CVT; is_ftoi = 1; end
|
||||
`INST_FPU_CVTWUS:begin core_select = FPU_CVT; is_ftou = 1; end
|
||||
`INST_FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; end
|
||||
`INST_FPU_CVTSWU:begin core_select = FPU_CVT; is_utof = 1; end
|
||||
`INST_FPU_CLASS: begin core_select = FPU_NCP; is_fclss = 1; end
|
||||
`INST_FPU_CMP: begin core_select = FPU_NCP;
|
||||
is_fle = (frm == 0);
|
||||
is_flt = (frm == 1);
|
||||
is_feq = (frm == 2);
|
||||
|
|
|
@ -11,8 +11,8 @@ module VX_fpu_fpga #(
|
|||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op_type,
|
||||
input wire [`MOD_BITS-1:0] frm,
|
||||
input wire [`INST_FPU_BITS-1:0] op_type,
|
||||
input wire [`INST_MOD_BITS-1:0] frm,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
|
@ -54,19 +54,19 @@ module VX_fpu_fpga #(
|
|||
is_itof = 0;
|
||||
is_signed = 0;
|
||||
case (op_type)
|
||||
`FPU_ADD: begin core_select = FPU_FMA; end
|
||||
`FPU_SUB: begin core_select = FPU_FMA; do_sub = 1; end
|
||||
`FPU_MUL: begin core_select = FPU_FMA; do_neg = 1; end
|
||||
`FPU_MADD: begin core_select = FPU_FMA; do_madd = 1; end
|
||||
`FPU_MSUB: begin core_select = FPU_FMA; do_madd = 1; do_sub = 1; end
|
||||
`FPU_NMADD: begin core_select = FPU_FMA; do_madd = 1; do_neg = 1; end
|
||||
`FPU_NMSUB: begin core_select = FPU_FMA; do_madd = 1; do_sub = 1; do_neg = 1; end
|
||||
`FPU_DIV: begin core_select = FPU_DIV; end
|
||||
`FPU_SQRT: begin core_select = FPU_SQRT; end
|
||||
`FPU_CVTWS: begin core_select = FPU_CVT; is_signed = 1; end
|
||||
`FPU_CVTWUS: begin core_select = FPU_CVT; end
|
||||
`FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; is_signed = 1; end
|
||||
`FPU_CVTSWU: begin core_select = FPU_CVT; is_itof = 1; end
|
||||
`INST_FPU_ADD: begin core_select = FPU_FMA; end
|
||||
`INST_FPU_SUB: begin core_select = FPU_FMA; do_sub = 1; end
|
||||
`INST_FPU_MUL: begin core_select = FPU_FMA; do_neg = 1; end
|
||||
`INST_FPU_MADD: begin core_select = FPU_FMA; do_madd = 1; end
|
||||
`INST_FPU_MSUB: begin core_select = FPU_FMA; do_madd = 1; do_sub = 1; end
|
||||
`INST_FPU_NMADD: begin core_select = FPU_FMA; do_madd = 1; do_neg = 1; end
|
||||
`INST_FPU_NMSUB: begin core_select = FPU_FMA; do_madd = 1; do_sub = 1; do_neg = 1; end
|
||||
`INST_FPU_DIV: begin core_select = FPU_DIV; end
|
||||
`INST_FPU_SQRT: begin core_select = FPU_SQRT; end
|
||||
`INST_FPU_CVTWS: begin core_select = FPU_CVT; is_signed = 1; end
|
||||
`INST_FPU_CVTWUS: begin core_select = FPU_CVT; end
|
||||
`INST_FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; is_signed = 1; end
|
||||
`INST_FPU_CVTSWU: begin core_select = FPU_CVT; is_itof = 1; end
|
||||
default: begin core_select = FPU_NCP; end
|
||||
endcase
|
||||
end
|
||||
|
|
|
@ -19,8 +19,8 @@ module VX_fpu_fpnew
|
|||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op_type,
|
||||
input wire [`MOD_BITS-1:0] frm,
|
||||
input wire [`INST_FPU_BITS-1:0] op_type,
|
||||
input wire [`INST_MOD_BITS-1:0] frm,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
|
@ -81,7 +81,7 @@ module VX_fpu_fpnew
|
|||
fpnew_pkg::status_t [`NUM_THREADS-1:0] fpu_status;
|
||||
|
||||
reg [FOP_BITS-1:0] fpu_op;
|
||||
reg [`FRM_BITS-1:0] fpu_rnd;
|
||||
reg [`INST_FRM_BITS-1:0] fpu_rnd;
|
||||
reg fpu_op_mod;
|
||||
reg fpu_has_fflags, fpu_has_fflags_out;
|
||||
|
||||
|
@ -95,38 +95,38 @@ module VX_fpu_fpnew
|
|||
fpu_operands[2] = datac;
|
||||
|
||||
case (op_type)
|
||||
`FPU_ADD: begin
|
||||
`INST_FPU_ADD: begin
|
||||
fpu_op = fpnew_pkg::ADD;
|
||||
fpu_operands[1] = dataa;
|
||||
fpu_operands[2] = datab;
|
||||
end
|
||||
`FPU_SUB: begin
|
||||
`INST_FPU_SUB: begin
|
||||
fpu_op = fpnew_pkg::ADD;
|
||||
fpu_operands[1] = dataa;
|
||||
fpu_operands[2] = datab;
|
||||
fpu_op_mod = 1;
|
||||
end
|
||||
`FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
|
||||
`FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
|
||||
`FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
|
||||
`FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
|
||||
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
|
||||
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
|
||||
`FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
|
||||
`FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
|
||||
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
|
||||
`FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
|
||||
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
|
||||
`FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end
|
||||
`FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
|
||||
`FPU_MISC: begin
|
||||
`INST_FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
|
||||
`INST_FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
|
||||
`INST_FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
|
||||
`INST_FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
|
||||
`INST_FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
|
||||
`INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
|
||||
`INST_FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
|
||||
`INST_FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
|
||||
`INST_FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
|
||||
`INST_FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
|
||||
`INST_FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
|
||||
`INST_FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end
|
||||
`INST_FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
|
||||
`INST_FPU_MISC: begin
|
||||
case (frm)
|
||||
0: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fpu_has_fflags = 0; end
|
||||
1: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fpu_has_fflags = 0; end
|
||||
2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fpu_has_fflags = 0; end
|
||||
3: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
|
||||
4: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
|
||||
default: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fpu_has_fflags = 0; end
|
||||
0: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RNE; fpu_has_fflags = 0; end
|
||||
1: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RTZ; fpu_has_fflags = 0; end
|
||||
2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RDN; fpu_has_fflags = 0; end
|
||||
3: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `INST_FRM_RNE; end
|
||||
4: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `INST_FRM_RTZ; end
|
||||
default: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RUP; fpu_has_fflags = 0; end
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
|
|
|
@ -10,8 +10,8 @@ interface VX_alu_req_if ();
|
|||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
wire [31:0] next_PC;
|
||||
wire [`ALU_BITS-1:0] op_type;
|
||||
wire [`MOD_BITS-1:0] op_mod;
|
||||
wire [`INST_ALU_BITS-1:0] op_type;
|
||||
wire [`INST_MOD_BITS-1:0] op_mod;
|
||||
wire use_PC;
|
||||
wire use_imm;
|
||||
wire [31:0] imm;
|
||||
|
|
|
@ -9,7 +9,7 @@ interface VX_csr_req_if ();
|
|||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
wire [`CSR_BITS-1:0] op_type;
|
||||
wire [`INST_CSR_BITS-1:0] op_type;
|
||||
wire [`CSR_ADDR_BITS-1:0] addr;
|
||||
wire [31:0] rs1_data;
|
||||
wire use_imm;
|
||||
|
|
|
@ -10,8 +10,8 @@ interface VX_decode_if ();
|
|||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
wire [`EX_BITS-1:0] ex_type;
|
||||
wire [`OP_BITS-1:0] op_type;
|
||||
wire [`MOD_BITS-1:0] op_mod;
|
||||
wire [`INST_OP_BITS-1:0] op_type;
|
||||
wire [`INST_MOD_BITS-1:0] op_mod;
|
||||
wire wb;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
|
|
|
@ -9,8 +9,8 @@ interface VX_fpu_req_if ();
|
|||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
wire [`FPU_BITS-1:0] op_type;
|
||||
wire [`MOD_BITS-1:0] op_mod;
|
||||
wire [`INST_FPU_BITS-1:0] op_type;
|
||||
wire [`INST_MOD_BITS-1:0] op_mod;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
|
|
|
@ -10,7 +10,7 @@ interface VX_fpu_to_csr_if ();
|
|||
fflags_t write_fflags;
|
||||
|
||||
wire [`NW_BITS-1:0] read_wid;
|
||||
wire [`FRM_BITS-1:0] read_frm;
|
||||
wire [`INST_FRM_BITS-1:0] read_frm;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ interface VX_gpu_req_if();
|
|||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
wire [31:0] next_PC;
|
||||
wire [`GPU_BITS-1:0] op_type;
|
||||
wire [`INST_GPU_BITS-1:0] op_type;
|
||||
wire [`NT_BITS-1:0] tid;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [31:0] rs2_data;
|
||||
|
|
|
@ -11,8 +11,8 @@ interface VX_ibuffer_if ();
|
|||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
wire [`EX_BITS-1:0] ex_type;
|
||||
wire [`OP_BITS-1:0] op_type;
|
||||
wire [`MOD_BITS-1:0] op_mod;
|
||||
wire [`INST_OP_BITS-1:0] op_type;
|
||||
wire [`INST_MOD_BITS-1:0] op_mod;
|
||||
wire wb;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
|
|
|
@ -9,7 +9,7 @@ interface VX_lsu_req_if ();
|
|||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
wire [`LSU_BITS-1:0] op_type;
|
||||
wire [`INST_LSU_BITS-1:0] op_type;
|
||||
wire is_fence;
|
||||
wire [`NUM_THREADS-1:0][31:0] store_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] base_addr;
|
||||
|
|
|
@ -2,178 +2,259 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_dp_ram #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter BYTEENW = 1,
|
||||
parameter OUTPUT_REG = 0,
|
||||
parameter RWCHECK = 1,
|
||||
parameter ADDRW = $clog2(SIZE),
|
||||
parameter FASTRAM = 0,
|
||||
parameter INITZERO = 0
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter BYTEENW = 1,
|
||||
parameter OUTPUT_REG = 0,
|
||||
parameter NO_RWCHECK = 0,
|
||||
parameter ADDRW = $clog2(SIZE),
|
||||
parameter LUTRAM = 0,
|
||||
parameter INIT_ENABLE = 0,
|
||||
parameter INIT_FILE = "",
|
||||
parameter [DATAW-1:0] INIT_VALUE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire [ADDRW-1:0] waddr,
|
||||
input wire [ADDRW-1:0] raddr,
|
||||
input wire clk,
|
||||
input wire [BYTEENW-1:0] wren,
|
||||
input wire [DATAW-1:0] din,
|
||||
output wire [DATAW-1:0] dout
|
||||
input wire [ADDRW-1:0] waddr,
|
||||
input wire [DATAW-1:0] wdata,
|
||||
input wire rden,
|
||||
input wire [ADDRW-1:0] raddr,
|
||||
output wire [DATAW-1:0] rdata
|
||||
);
|
||||
|
||||
`STATIC_ASSERT((1 == BYTEENW) || ((BYTEENW > 1) && 0 == (BYTEENW % 4)), ("invalid parameter"))
|
||||
|
||||
if (FASTRAM) begin
|
||||
`define RAM_INITIALIZATION \
|
||||
if (INIT_ENABLE) begin \
|
||||
if (INIT_FILE != "") begin \
|
||||
initial $readmemh(INIT_FILE, ram); \
|
||||
end else begin \
|
||||
initial ram = '{default: INIT_VALUE}; \
|
||||
end \
|
||||
end
|
||||
|
||||
`ifdef SYNTHESIS
|
||||
if (LUTRAM) begin
|
||||
if (OUTPUT_REG) begin
|
||||
reg [DATAW-1:0] dout_r;
|
||||
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
if (BYTEENW > 1) begin
|
||||
`USE_FAST_BRAM reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
|
||||
`USE_FAST_BRAM reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
if (wren[i])
|
||||
mem[waddr][i] <= din[i * 8 +: 8];
|
||||
ram[waddr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
dout_r <= mem[raddr];
|
||||
if (rden)
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end else begin
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
mem[waddr] <= din;
|
||||
dout_r <= mem[raddr];
|
||||
ram[waddr] <= wdata;
|
||||
if (rden)
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign dout = dout_r;
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
`UNUSED_VAR (rden)
|
||||
if (BYTEENW > 1) begin
|
||||
`USE_FAST_BRAM reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
|
||||
`USE_FAST_BRAM reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
if (wren[i])
|
||||
mem[waddr][i] <= din[i * 8 +: 8];
|
||||
ram[waddr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
assign dout = mem[raddr];
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
mem[waddr] <= din;
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
assign dout = mem[raddr];
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
if (OUTPUT_REG) begin
|
||||
reg [DATAW-1:0] dout_r;
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
|
||||
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
if (wren[i])
|
||||
mem[waddr][i] <= din[i * 8 +: 8];
|
||||
ram[waddr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
dout_r <= mem[raddr];
|
||||
if (rden)
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end else begin
|
||||
reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
mem[waddr] <= din;
|
||||
dout_r <= mem[raddr];
|
||||
ram[waddr] <= wdata;
|
||||
if (rden)
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign dout = dout_r;
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
if (RWCHECK) begin
|
||||
`UNUSED_VAR (rden)
|
||||
if (NO_RWCHECK) begin
|
||||
if (BYTEENW > 1) begin
|
||||
reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
|
||||
`NO_RW_RAM_CHECK reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
if (wren[i])
|
||||
mem[waddr][i] <= din[i * 8 +: 8];
|
||||
ram[waddr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
assign dout = mem[raddr];
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
mem[waddr] <= din;
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
assign dout = mem[raddr];
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
end else begin
|
||||
if (BYTEENW > 1) begin
|
||||
`NO_RW_RAM_CHECK reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
|
||||
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
if (wren[i])
|
||||
mem[waddr][i] <= din[i * 8 +: 8];
|
||||
ram[waddr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
assign dout = mem[raddr];
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
mem[waddr] <= din;
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
assign dout = mem[raddr];
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`else
|
||||
if (OUTPUT_REG) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
if (BYTEENW > 1) begin
|
||||
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
if (rden)
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end else begin
|
||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
ram[waddr] <= wdata;
|
||||
if (rden)
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
`UNUSED_VAR (rden)
|
||||
if (BYTEENW > 1) begin
|
||||
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||
reg [DATAW-1:0] prev_data;
|
||||
reg [ADDRW-1:0] prev_waddr;
|
||||
reg prev_write;
|
||||
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
prev_write <= (| wren);
|
||||
prev_data <= ram[waddr];
|
||||
prev_waddr <= waddr;
|
||||
end
|
||||
|
||||
if (LUTRAM || !NO_RWCHECK) begin
|
||||
`UNUSED_VAR (prev_write)
|
||||
`UNUSED_VAR (prev_data)
|
||||
`UNUSED_VAR (prev_waddr)
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
||||
end
|
||||
end else begin
|
||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
reg [DATAW-1:0] prev_data;
|
||||
reg [ADDRW-1:0] prev_waddr;
|
||||
reg prev_write;
|
||||
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
ram[waddr] <= wdata;
|
||||
prev_write <= wren;
|
||||
prev_data <= ram[waddr];
|
||||
prev_waddr <= waddr;
|
||||
end
|
||||
if (LUTRAM || !NO_RWCHECK) begin
|
||||
`UNUSED_VAR (prev_write)
|
||||
`UNUSED_VAR (prev_data)
|
||||
`UNUSED_VAR (prev_waddr)
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
|
@ -5,7 +5,7 @@ module VX_elastic_buffer #(
|
|||
parameter DATAW = 1,
|
||||
parameter SIZE = 2,
|
||||
parameter OUTPUT_REG = 0,
|
||||
parameter FASTRAM = 0
|
||||
parameter LUTRAM = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -56,7 +56,7 @@ module VX_elastic_buffer #(
|
|||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.OUTPUT_REG (OUTPUT_REG),
|
||||
.FASTRAM (FASTRAM)
|
||||
.LUTRAM (LUTRAM)
|
||||
) queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -9,7 +9,7 @@ module VX_fifo_queue #(
|
|||
parameter ADDRW = $clog2(SIZE),
|
||||
parameter SIZEW = $clog2(SIZE+1),
|
||||
parameter OUTPUT_REG = 0,
|
||||
parameter FASTRAM = 1
|
||||
parameter LUTRAM = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -157,15 +157,15 @@ module VX_fifo_queue #(
|
|||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.OUTPUT_REG (0),
|
||||
.RWCHECK (1),
|
||||
.FASTRAM (FASTRAM)
|
||||
.LUTRAM (LUTRAM)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
.waddr(wr_ptr_r),
|
||||
.raddr(rd_ptr_r),
|
||||
.wren(push),
|
||||
.din(data_in),
|
||||
.dout(data_out)
|
||||
.wren (push),
|
||||
.waddr (wr_ptr_r),
|
||||
.wdata (data_in),
|
||||
.rden (1'b1),
|
||||
.raddr (rd_ptr_r),
|
||||
.rdata (data_out)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
@ -200,15 +200,15 @@ module VX_fifo_queue #(
|
|||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.OUTPUT_REG (0),
|
||||
.RWCHECK (1),
|
||||
.FASTRAM (FASTRAM)
|
||||
.LUTRAM (LUTRAM)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
.waddr(wr_ptr_r),
|
||||
.raddr(rd_ptr_n_r),
|
||||
.wren(push),
|
||||
.din(data_in),
|
||||
.dout(dout)
|
||||
.clk (clk),
|
||||
.wren (push),
|
||||
.waddr (wr_ptr_r),
|
||||
.wdata (data_in),
|
||||
.rden (1'b1),
|
||||
.raddr (rd_ptr_n_r),
|
||||
.rdata (dout)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
|
|
@ -2,10 +2,10 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_index_buffer #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter FASTRAM = 1,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter LUTRAM = 1,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -29,13 +29,12 @@ module VX_index_buffer #(
|
|||
wire free_valid;
|
||||
wire [ADDRW-1:0] free_index;
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N (SIZE)
|
||||
VX_lzc #(
|
||||
.WIDTH (SIZE)
|
||||
) free_slots_encoder (
|
||||
.data_in (free_slots_n),
|
||||
.index (free_index),
|
||||
`UNUSED_PIN (onehot),
|
||||
.valid_out (free_valid)
|
||||
.in_i (free_slots_n),
|
||||
.cnt_o (free_index),
|
||||
.valid_o (free_valid)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
|
@ -69,17 +68,17 @@ module VX_index_buffer #(
|
|||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW(DATAW),
|
||||
.SIZE(SIZE),
|
||||
.RWCHECK(1),
|
||||
.FASTRAM(FASTRAM)
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.LUTRAM (LUTRAM)
|
||||
) data_table (
|
||||
.clk(clk),
|
||||
.waddr(write_addr),
|
||||
.raddr(read_addr),
|
||||
.wren(acquire_slot),
|
||||
.din(write_data),
|
||||
.dout(read_data)
|
||||
.clk (clk),
|
||||
.wren (acquire_slot),
|
||||
.waddr (write_addr),
|
||||
.wdata (write_data),
|
||||
.rden (1'b1),
|
||||
.raddr (read_addr),
|
||||
.rdata (read_data)
|
||||
);
|
||||
|
||||
assign write_addr = write_addr_r;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
// Fast encoder using parallel prefix computation
|
||||
// Adapter from BaseJump STL: http://bjump.org/data_out.html
|
||||
// Adapted from BaseJump STL: http://bjump.org/data_out.html
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_onehot_encoder #(
|
||||
|
|
|
@ -12,46 +12,48 @@ module VX_priority_encoder #(
|
|||
output wire [LN-1:0] index,
|
||||
output wire valid_out
|
||||
);
|
||||
wire [N-1:0] reversed;
|
||||
|
||||
if (REVERSE) begin
|
||||
for (genvar i = 0; i < N; ++i) begin
|
||||
assign reversed[N-i-1] = data_in[i];
|
||||
end
|
||||
end else begin
|
||||
assign reversed = data_in;
|
||||
end
|
||||
|
||||
if (N == 1) begin
|
||||
|
||||
assign onehot = data_in;
|
||||
assign onehot = reversed;
|
||||
assign index = 0;
|
||||
assign valid_out = data_in;
|
||||
assign valid_out = reversed;
|
||||
|
||||
end else if (N == 2) begin
|
||||
|
||||
assign onehot = {~data_in[REVERSE], data_in[REVERSE]};
|
||||
assign index = ~data_in[REVERSE];
|
||||
assign valid_out = (| data_in);
|
||||
assign onehot = {~reversed[0], reversed[0]};
|
||||
assign index = ~reversed[0];
|
||||
assign valid_out = (| reversed);
|
||||
|
||||
end else if (MODEL == 1) begin
|
||||
|
||||
wire [N-1:0] scan_lo;
|
||||
|
||||
VX_scan #(
|
||||
.N (N),
|
||||
.OP (2),
|
||||
.REVERSE (REVERSE)
|
||||
.N (N),
|
||||
.OP (2)
|
||||
) scan (
|
||||
.data_in (data_in),
|
||||
.data_in (reversed),
|
||||
.data_out (scan_lo)
|
||||
);
|
||||
|
||||
if (REVERSE) begin
|
||||
assign onehot = scan_lo & {1'b1, (~scan_lo[N-1:1])};
|
||||
assign valid_out = scan_lo[0];
|
||||
end else begin
|
||||
assign onehot = scan_lo & {(~scan_lo[N-2:0]), 1'b1};
|
||||
assign valid_out = scan_lo[N-1];
|
||||
end
|
||||
assign onehot = scan_lo & {(~scan_lo[N-2:0]), 1'b1};
|
||||
assign valid_out = scan_lo[N-1];
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (N),
|
||||
.REVERSE (REVERSE)
|
||||
.N (N)
|
||||
) onehot_encoder (
|
||||
.data_in (onehot),
|
||||
.data_out (index),
|
||||
.data_out (index),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
|
@ -60,70 +62,54 @@ module VX_priority_encoder #(
|
|||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [N-1:0] higher_pri_regs;
|
||||
`IGNORE_WARNINGS_END
|
||||
assign higher_pri_regs[N-1:1] = higher_pri_regs[N-2:0] | data_in[N-2:0];
|
||||
assign higher_pri_regs[N-1:1] = higher_pri_regs[N-2:0] | reversed[N-2:0];
|
||||
assign higher_pri_regs[0] = 1'b0;
|
||||
assign onehot[N-1:0] = data_in[N-1:0] & ~higher_pri_regs[N-1:0];
|
||||
assign onehot[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0];
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (N),
|
||||
.REVERSE (REVERSE)
|
||||
.N (N)
|
||||
) onehot_encoder (
|
||||
.data_in (onehot),
|
||||
.data_out (index),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
assign valid_out = (| data_in);
|
||||
assign valid_out = (| reversed);
|
||||
|
||||
end else if (MODEL == 3) begin
|
||||
|
||||
assign onehot = data_in & ~(data_in-1);
|
||||
assign onehot = reversed & ~(reversed-1);
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (N),
|
||||
.REVERSE (REVERSE)
|
||||
.N (N)
|
||||
) onehot_encoder (
|
||||
.data_in (onehot),
|
||||
.data_out (index),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
assign valid_out = (| data_in);
|
||||
assign valid_out = (| reversed);
|
||||
|
||||
end else begin
|
||||
|
||||
reg [LN-1:0] index_r;
|
||||
reg [N-1:0] onehot_r;
|
||||
|
||||
if (REVERSE) begin
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
onehot_r = 'x;
|
||||
for (integer i = 0; i < N; ++i) begin
|
||||
if (data_in[i]) begin
|
||||
index_r = LN'(i);
|
||||
onehot_r = 0;
|
||||
onehot_r[i] = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
onehot_r = 'x;
|
||||
for (integer i = N-1; i >= 0; --i) begin
|
||||
if (data_in[i]) begin
|
||||
index_r = LN'(i);
|
||||
onehot_r = 0;
|
||||
onehot_r[i] = 1'b1;
|
||||
end
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
onehot_r = 'x;
|
||||
for (integer i = N-1; i >= 0; --i) begin
|
||||
if (reversed[i]) begin
|
||||
index_r = LN'(i);
|
||||
onehot_r = 0;
|
||||
onehot_r[i] = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign index = index_r;
|
||||
assign onehot = onehot_r;
|
||||
assign valid_out = (| data_in);
|
||||
assign valid_out = (| reversed);
|
||||
|
||||
end
|
||||
|
||||
|
|
|
@ -1,26 +1,31 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_reset_relay #(
|
||||
parameter ASYNC = 0
|
||||
parameter N = 1,
|
||||
parameter DEPTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
output wire reset_o
|
||||
output wire [N-1:0] reset_o
|
||||
);
|
||||
(* preserve *) reg reset_r;
|
||||
|
||||
if (ASYNC) begin
|
||||
always @(posedge clk or posedge reset) begin
|
||||
reset_r <= reset;
|
||||
end
|
||||
end else begin
|
||||
if (DEPTH > 1) begin
|
||||
`PRESERVE_REG `DISABLE_BRAM reg [N-1:0] reset_r [DEPTH-1:0];
|
||||
always @(posedge clk) begin
|
||||
reset_r <= reset;
|
||||
for (integer i = DEPTH-1; i > 0; --i)
|
||||
reset_r[i] <= reset_r[i-1];
|
||||
reset_r[0] <= {N{reset}};
|
||||
end
|
||||
assign reset_o = reset_r[DEPTH-1];
|
||||
end else if (DEPTH == 1) begin
|
||||
`PRESERVE_REG reg [N-1:0] reset_r;
|
||||
always @(posedge clk) begin
|
||||
reset_r <= {N{reset}};
|
||||
end
|
||||
assign reset_o = reset_r;
|
||||
end else begin
|
||||
`UNUSED_VAR (clk)
|
||||
assign reset_o = {N{reset}};
|
||||
end
|
||||
|
||||
assign reset_o = reset_r;
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
endmodule
|
|
@ -2,177 +2,259 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_sp_ram #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter BYTEENW = 1,
|
||||
parameter OUTPUT_REG = 0,
|
||||
parameter RWCHECK = 1,
|
||||
parameter ADDRW = $clog2(SIZE),
|
||||
parameter FASTRAM = 0,
|
||||
parameter INITZERO = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire [ADDRW-1:0] addr,
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter BYTEENW = 1,
|
||||
parameter OUTPUT_REG = 0,
|
||||
parameter NO_RWCHECK = 0,
|
||||
parameter ADDRW = $clog2(SIZE),
|
||||
parameter LUTRAM = 0,
|
||||
parameter INIT_ENABLE = 0,
|
||||
parameter INIT_FILE = "",
|
||||
parameter [DATAW-1:0] INIT_VALUE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire [ADDRW-1:0] addr,
|
||||
input wire [BYTEENW-1:0] wren,
|
||||
input wire [DATAW-1:0] din,
|
||||
output wire [DATAW-1:0] dout
|
||||
input wire [DATAW-1:0] wdata,
|
||||
input wire rden,
|
||||
output wire [DATAW-1:0] rdata
|
||||
);
|
||||
|
||||
`STATIC_ASSERT((1 == BYTEENW) || ((BYTEENW > 1) && 0 == (BYTEENW % 4)), ("invalid parameter"))
|
||||
|
||||
if (FASTRAM) begin
|
||||
`define RAM_INITIALIZATION \
|
||||
if (INIT_ENABLE) begin \
|
||||
if (INIT_FILE != "") begin \
|
||||
initial $readmemh(INIT_FILE, ram); \
|
||||
end else begin \
|
||||
initial ram = '{default: INIT_VALUE}; \
|
||||
end \
|
||||
end
|
||||
|
||||
`ifdef SYNTHESIS
|
||||
if (LUTRAM) begin
|
||||
if (OUTPUT_REG) begin
|
||||
reg [DATAW-1:0] dout_r;
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
`USE_FAST_BRAM reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
|
||||
`USE_FAST_BRAM reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
if (wren[i])
|
||||
mem[addr][i] <= din[i * 8 +: 8];
|
||||
ram[addr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
dout_r <= mem[addr];
|
||||
if (rden)
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end else begin
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
mem[addr] <= din;
|
||||
dout_r <= mem[addr];
|
||||
ram[addr] <= wdata;
|
||||
if (rden)
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign dout = dout_r;
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
`UNUSED_VAR (rden)
|
||||
if (BYTEENW > 1) begin
|
||||
`USE_FAST_BRAM reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
|
||||
`USE_FAST_BRAM reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
if (wren[i])
|
||||
mem[addr][i] <= din[i * 8 +: 8];
|
||||
ram[addr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
assign dout = mem[addr];
|
||||
assign rdata = ram[addr];
|
||||
end else begin
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
mem[addr] <= din;
|
||||
ram[addr] <= wdata;
|
||||
end
|
||||
assign dout = mem[addr];
|
||||
assign rdata = ram[addr];
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
if (OUTPUT_REG) begin
|
||||
reg [DATAW-1:0] dout_r;
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
|
||||
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
if (wren[i])
|
||||
mem[addr][i] <= din[i * 8 +: 8];
|
||||
ram[addr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
dout_r <= mem[addr];
|
||||
if (rden)
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end else begin
|
||||
reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
mem[addr] <= din;
|
||||
dout_r <= mem[addr];
|
||||
ram[addr] <= wdata;
|
||||
if (rden)
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign dout = dout_r;
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
if (RWCHECK) begin
|
||||
`UNUSED_VAR (rden)
|
||||
if (NO_RWCHECK) begin
|
||||
if (BYTEENW > 1) begin
|
||||
reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
|
||||
`NO_RW_RAM_CHECK reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
if (wren[i])
|
||||
mem[addr][i] <= din[i * 8 +: 8];
|
||||
ram[addr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
assign dout = mem[addr];
|
||||
assign rdata = ram[addr];
|
||||
end else begin
|
||||
reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
mem[addr] <= din;
|
||||
ram[addr] <= wdata;
|
||||
end
|
||||
assign dout = mem[addr];
|
||||
assign rdata = ram[addr];
|
||||
end
|
||||
end else begin
|
||||
if (BYTEENW > 1) begin
|
||||
`NO_RW_RAM_CHECK reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
|
||||
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
if (wren[i])
|
||||
mem[addr][i] <= din[i * 8 +: 8];
|
||||
ram[addr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
assign dout = mem[addr];
|
||||
assign rdata = ram[addr];
|
||||
end else begin
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
|
||||
if (INITZERO) begin
|
||||
initial mem = '{default: 0};
|
||||
end
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
mem[addr] <= din;
|
||||
ram[addr] <= wdata;
|
||||
end
|
||||
assign dout = mem[addr];
|
||||
assign rdata = ram[addr];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`else
|
||||
if (OUTPUT_REG) begin
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
if (BYTEENW > 1) begin
|
||||
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
if (wren[i])
|
||||
ram[addr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
if (rden)
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end else begin
|
||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
ram[addr] <= wdata;
|
||||
if (rden)
|
||||
rdata_r <= ram[addr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
end else begin
|
||||
`UNUSED_VAR (rden)
|
||||
if (BYTEENW > 1) begin
|
||||
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||
reg [DATAW-1:0] prev_data;
|
||||
reg [ADDRW-1:0] prev_addr;
|
||||
reg prev_write;
|
||||
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
if (wren[i])
|
||||
ram[addr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
prev_write <= (| wren);
|
||||
prev_data <= ram[addr];
|
||||
prev_addr <= addr;
|
||||
end
|
||||
|
||||
if (LUTRAM || !NO_RWCHECK) begin
|
||||
`UNUSED_VAR (prev_write)
|
||||
`UNUSED_VAR (prev_data)
|
||||
`UNUSED_VAR (prev_addr)
|
||||
assign rdata = ram[addr];
|
||||
end else begin
|
||||
assign rdata = (prev_write && (prev_addr == addr)) ? prev_data : ram[addr];
|
||||
end
|
||||
end else begin
|
||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
reg [DATAW-1:0] prev_data;
|
||||
reg [ADDRW-1:0] prev_addr;
|
||||
reg prev_write;
|
||||
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
ram[addr] <= wdata;
|
||||
prev_write <= wren;
|
||||
prev_data <= ram[addr];
|
||||
prev_addr <= addr;
|
||||
end
|
||||
if (LUTRAM || !NO_RWCHECK) begin
|
||||
`UNUSED_VAR (prev_write)
|
||||
`UNUSED_VAR (prev_data)
|
||||
`UNUSED_VAR (prev_addr)
|
||||
assign rdata = ram[addr];
|
||||
end else begin
|
||||
assign rdata = (prev_write && (prev_addr == addr)) ? prev_data : ram[addr];
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
|
@ -19,11 +19,12 @@ module VX_stream_arbiter #(
|
|||
output wire [LANES-1:0][DATAW-1:0] data_out,
|
||||
input wire [LANES-1:0] ready_out
|
||||
);
|
||||
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS);
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
wire sel_valid;
|
||||
wire sel_ready;
|
||||
wire [NUM_REQS-1:0] sel_1hot;
|
||||
wire sel_valid;
|
||||
wire sel_ready;
|
||||
wire [LOG_NUM_REQS-1:0] sel_index;
|
||||
|
||||
wire [NUM_REQS-1:0] valid_in_any;
|
||||
wire [LANES-1:0] ready_in_sel;
|
||||
|
@ -50,8 +51,8 @@ module VX_stream_arbiter #(
|
|||
.requests (valid_in_any),
|
||||
.enable (sel_ready),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_onehot (sel_1hot),
|
||||
`UNUSED_PIN (grant_index)
|
||||
.grant_index (sel_index),
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
);
|
||||
end else if (TYPE == "R") begin
|
||||
VX_rr_arbiter #(
|
||||
|
@ -63,8 +64,8 @@ module VX_stream_arbiter #(
|
|||
.requests (valid_in_any),
|
||||
.enable (sel_ready),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_onehot (sel_1hot),
|
||||
`UNUSED_PIN (grant_index)
|
||||
.grant_index (sel_index),
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
);
|
||||
end else if (TYPE == "F") begin
|
||||
VX_fair_arbiter #(
|
||||
|
@ -76,8 +77,8 @@ module VX_stream_arbiter #(
|
|||
.requests (valid_in_any),
|
||||
.enable (sel_ready),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_onehot (sel_1hot),
|
||||
`UNUSED_PIN (grant_index)
|
||||
.grant_index (sel_index),
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
);
|
||||
end else if (TYPE == "M") begin
|
||||
VX_matrix_arbiter #(
|
||||
|
@ -89,8 +90,8 @@ module VX_stream_arbiter #(
|
|||
.requests (valid_in_any),
|
||||
.enable (sel_ready),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_onehot (sel_1hot),
|
||||
`UNUSED_PIN (grant_index)
|
||||
.grant_index (sel_index),
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
);
|
||||
end else begin
|
||||
$error ("invalid parameter");
|
||||
|
@ -105,32 +106,16 @@ module VX_stream_arbiter #(
|
|||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign valid_data_in[i] = {valid_in[i], data_in[i]};
|
||||
end
|
||||
|
||||
VX_onehot_mux #(
|
||||
.DATAW (LANES * (1 + DATAW)),
|
||||
.N (NUM_REQS)
|
||||
) data_in_mux (
|
||||
.data_in (valid_data_in),
|
||||
.sel_in (sel_1hot),
|
||||
.data_out ({valid_in_sel, data_in_sel})
|
||||
);
|
||||
assign {valid_in_sel, data_in_sel} = valid_data_in[sel_index];
|
||||
|
||||
`UNUSED_VAR (sel_valid)
|
||||
end else begin
|
||||
VX_onehot_mux #(
|
||||
.DATAW (DATAW),
|
||||
.N (NUM_REQS)
|
||||
) data_in_mux (
|
||||
.data_in (data_in),
|
||||
.sel_in (sel_1hot),
|
||||
.data_out (data_in_sel)
|
||||
);
|
||||
|
||||
assign data_in_sel = data_in[sel_index];
|
||||
assign valid_in_sel = sel_valid;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign ready_in[i] = ready_in_sel & {LANES{sel_1hot[i]}};
|
||||
assign ready_in[i] = ready_in_sel & {LANES{(sel_index == LOG_NUM_REQS'(i))}};
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
|
|
|
@ -173,8 +173,8 @@
|
|||
"issue_tmask":"`NUM_THREADS",
|
||||
"issue_pc": 32,
|
||||
"issue_ex_type":"`EX_BITS",
|
||||
"issue_op_type":"`OP_BITS",
|
||||
"issue_op_mod":"`MOD_BITS",
|
||||
"issue_op_type":"`INST_OP_BITS",
|
||||
"issue_op_mod":"`INST_MOD_BITS",
|
||||
"issue_wb": 1,
|
||||
"issue_rd":"`NR_BITS",
|
||||
"issue_rs1":"`NR_BITS",
|
||||
|
|
7
hw/syn/opae/fpga_prog.sh
Executable file
7
hw/syn/opae/fpga_prog.sh
Executable file
|
@ -0,0 +1,7 @@
|
|||
#!/bin/bash
|
||||
|
||||
# FPGA programming
|
||||
# first argument is the bitstream
|
||||
|
||||
echo "fpgaconf --bus 0xaf $1"
|
||||
fpgaconf --bus 0xaf $1
|
|
@ -23,12 +23,12 @@ set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
|
|||
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
||||
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
||||
|
||||
set_global_assignment -name USE_HIGH_SPEED_ADDER ON
|
||||
set_global_assignment -name MUX_RESTRUCTURE ON
|
||||
set_global_assignment -name ADV_NETLIST_OPT_SYNTH_WYSIWYG_REMAP ON
|
||||
set_global_assignment -name PROGRAMMABLE_POWER_TECHNOLOGY_SETTING "FORCE ALL TILES WITH FAILING TIMING PATHS TO HIGH SPEED"
|
||||
set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC ON
|
||||
set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_RETIMING ON
|
||||
#set_global_assignment -name USE_HIGH_SPEED_ADDER ON
|
||||
#set_global_assignment -name MUX_RESTRUCTURE ON
|
||||
#set_global_assignment -name ADV_NETLIST_OPT_SYNTH_WYSIWYG_REMAP ON
|
||||
#set_global_assignment -name PROGRAMMABLE_POWER_TECHNOLOGY_SETTING "FORCE ALL TILES WITH FAILING TIMING PATHS TO HIGH SPEED"
|
||||
#set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC ON
|
||||
#set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_RETIMING ON
|
||||
|
||||
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
||||
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||
|
|
|
@ -21,7 +21,7 @@ VX_SRCS = kernel.c
|
|||
#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I$(VORTEX_DRV_PATH)/include
|
||||
CXXFLAGS += -I$(VORTEX_DRV_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
LDFLAGS += -L$(VORTEX_DRV_PATH)/stub -lvortex
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <vortex.h>
|
||||
#include <VX_config.h>
|
||||
#include "testcases.h"
|
||||
#include "common.h"
|
||||
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
ALL_TESTS := $(wildcard *.hex)
|
||||
|
||||
D_TESTS := $(wildcard *ud-p-*.hex)
|
||||
V_TESTS := $(wildcard *-v-*.hex)
|
||||
|
||||
EXCLUDED_TESTS := $(V_TESTS) rv32si-p-scall.hex rv32si-p-sbreak.hex rv32mi-p-breakpoint.hex rv32ud-p-fclass.hex rv32ua-p-amomax_w.hex rv32ua-p-amoxor_w.hex rv32ud-p-ldst.hex rv32ua-p-amoor_w.hex rv32mi-p-ma_addr.hex rv32ud-p-fdiv.hex rv32ud-p-fcmp.hex rv32mi-p-mcsr.hex rv32ua-p-amoswap_w.hex rv32mi-p-ma_fetch.hex rv32mi-p-csr.hex rv32ua-p-amoadd_w.hex rv32si-p-dirty.hex rv32ud-p-fcvt.hex rv32ui-p-fence_i.hex rv32si-p-csr.hex rv32mi-p-shamt.hex rv32ua-p-amomin_w.hex rv32ua-p-lrsc.hex rv32ud-p-fmadd.hex rv32ud-p-fadd.hex rv32si-p-wfi.hex rv32ua-p-amomaxu_w.hex rv32si-p-ma_fetch.hex rv32ud-p-fmin.hex rv32mi-p-illegal.hex rv32uc-p-rvc.hex rv32mi-p-sbreak.hex rv32ua-p-amominu_w.hex rv32ua-p-amoand_w.hex
|
||||
EXCLUDED_TESTS := $(V_TESTS) $(D_TESTS) rv32si-p-scall.hex rv32si-p-sbreak.hex rv32mi-p-breakpoint.hex rv32ua-p-amomax_w.hex rv32ua-p-amoxor_w.hex rv32ua-p-amoor_w.hex rv32mi-p-ma_addr.hex rv32mi-p-mcsr.hex rv32ua-p-amoswap_w.hex rv32mi-p-ma_fetch.hex rv32mi-p-csr.hex rv32ua-p-amoadd_w.hex rv32si-p-dirty.hex rv32ui-p-fence_i.hex rv32si-p-csr.hex rv32mi-p-shamt.hex rv32ua-p-amomin_w.hex rv32ua-p-lrsc.hex rv32si-p-wfi.hex rv32ua-p-amomaxu_w.hex rv32si-p-ma_fetch.hex rv32mi-p-illegal.hex rv32uc-p-rvc.hex rv32mi-p-sbreak.hex rv32ua-p-amominu_w.hex rv32ua-p-amoand_w.hex
|
||||
|
||||
TESTS := $(filter-out $(EXCLUDED_TESTS), $(ALL_TESTS))
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue