This commit is contained in:
Blaise Tine 2021-08-31 03:36:37 -04:00
commit d3c3d551ff
71 changed files with 1671 additions and 1319 deletions

View file

@ -40,15 +40,18 @@ jobs:
- stage: test
name: config
script: cp -r $PWD ../build4 && cd ../build4 && ./ci/travis_run.py ./ci/regression.sh -config
- stage: test
name: stress0
script: cp -r $PWD ../build5 && cd ../build5 && ./ci/travis_run.py ./ci/regression.sh -stress0
- stage: test
name: stress1
script: cp -r $PWD ../build5 && cd ../build5 && ./ci/travis_run.py ./ci/regression.sh -stress1
script: cp -r $PWD ../build6 && cd ../build6 && ./ci/travis_run.py ./ci/regression.sh -stress1
- stage: test
name: stress2
script: cp -r $PWD ../build6 && cd ../build6 && ./ci/travis_run.py ./ci/regression.sh -stress2
script: cp -r $PWD ../build7 && cd ../build7 && ./ci/travis_run.py ./ci/regression.sh -stress2
- stage: test
name: compiler
script: cp -r $PWD ../build7 && cd ../build7 && ./ci/travis_run.py /ci/test_compiler.sh
script: cp -r $PWD ../build8 && cd ../build8 && ./ci/travis_run.py /ci/test_compiler.sh
after_success:
# Gather code coverage

View file

@ -46,8 +46,8 @@ debug()
{
echo "begin debugging tests..."
./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"
./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --perf --app=demo --args="-n1"
./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --debug --app=demo --args="-n1"
./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=basic --args="-t0 -n1"
echo "debugging tests done!"
@ -72,13 +72,18 @@ FPU_CORE=FPU_DEFAULT ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# using FPNEW FPU core
FPU_CORE=FPU_FPNEW ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# adjust l1 block size to match l2
CONFIGS="-DMEM_BLOCK_SIZE=16 -DL1_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr --args="-n1"
# test cache banking
CONFIGS="-DDNUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
CONFIGS="-DDNUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
# test cache multi-porting
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
CONFIGS="-DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo --debug --args="-n1"
CONFIGS="-DL2NUM_PORTS=2 -DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr
CONFIGS="-DL2NUM_PORTS=4 -DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=io_addr
# test 128-bit MEM block
CONFIGS=-DMEM_BLOCK_SIZE=16 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
@ -92,32 +97,45 @@ CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=
# test 128-bit DRAM block
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=128 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=28 -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
# test verilator reset values
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=sgemm
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=sgemm
# test long memory latency
CONFIGS="-DMEM_LATENCY=100 -DMEM_RQ_SIZE=4 -DMEM_STALLS_MODULO=4" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
echo "configuration tests done!"
}
stress0()
{
echo "begin stress0 tests..."
# test verilator reset values
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=sgemm
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=sgemm
FPU_CORE=FPU_DEFAULT CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
FPU_CORE=FPU_DEFAULT CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=printf
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=printf
echo "stress0 tests done!"
}
stress1()
{
echo "begin stress tests..."
echo "begin stress1 tests..."
./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemm --args="-n256"
echo "stress tests done!"
echo "stress1 tests done!"
}
stress2()
{
echo "begin stress tests..."
echo "begin stress2 tests..."
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --clusters=2 --l3cache --app=sgemm --args="-n256"
echo "stress tests done!"
echo "stress2 tests done!"
}
usage()
@ -128,13 +146,15 @@ usage()
while [ "$1" != "" ]; do
case $1 in
-coverage ) coverage
;;
;;
-cluster ) cluster
;;
;;
-debug ) debug
;;
;;
-config ) config
;;
-stress0 ) stress0
;;
-stress1 ) stress1
;;
-stress2 ) stress2
@ -143,12 +163,13 @@ while [ "$1" != "" ]; do
cluster
debug
config
stress0
stress1
stress2
;;
;;
-h | --help ) usage
exit
;;
;;
* ) usage
exit 1
esac

View file

@ -25,12 +25,12 @@ module VX_alu_unit #(
wire stall_in, stall_out;
`UNUSED_VAR (alu_req_if.op_mod)
wire is_br_op = `ALU_IS_BR(alu_req_if.op_mod);
wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op_type);
wire [`BR_BITS-1:0] br_op = `BR_OP(alu_req_if.op_type);
wire alu_signed = `ALU_SIGNED(alu_op);
wire [1:0] alu_op_class = `ALU_OP_CLASS(alu_op);
wire is_sub = (alu_op == `ALU_SUB);
wire is_br_op = `INST_ALU_IS_BR(alu_req_if.op_mod);
wire [`INST_ALU_BITS-1:0] alu_op = `INST_ALU_OP(alu_req_if.op_type);
wire [`INST_BR_BITS-1:0] br_op = `INST_BR_OP(alu_req_if.op_type);
wire alu_signed = `INST_ALU_SIGNED(alu_op);
wire [1:0] alu_op_class = `INST_ALU_OP_CLASS(alu_op);
wire is_sub = (alu_op == `INST_ALU_SUB);
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
@ -57,10 +57,10 @@ module VX_alu_unit #(
for (genvar i = 0; i < `NUM_THREADS; i++) begin
always @(*) begin
case (alu_op)
`ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
`ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
`ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
//`ALU_SLL,
`INST_ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
`INST_ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
`INST_ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
//`INST_ALU_SLL,
default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
endcase
end
@ -81,7 +81,7 @@ module VX_alu_unit #(
// branch
wire is_jal = is_br_op && (br_op == `BR_JAL || br_op == `BR_JALR);
wire is_jal = is_br_op && (br_op == `INST_BR_JAL || br_op == `INST_BR_JALR);
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : alu_result;
wire [31:0] br_dest = add_result[alu_req_if.tid];
@ -90,9 +90,9 @@ module VX_alu_unit #(
wire is_less = cmp_result[32];
wire is_equal = ~(| cmp_result[31:0]);
wire br_neg = `BR_NEG(br_op);
wire br_less = `BR_LESS(br_op);
wire br_static = `BR_STATIC(br_op);
wire br_neg = `INST_BR_NEG(br_op);
wire br_less = `INST_BR_LESS(br_op);
wire br_static = `INST_BR_STATIC(br_op);
wire br_taken = ((br_less ? is_less : is_equal) ^ br_neg) | br_static;
// output
@ -118,14 +118,14 @@ module VX_alu_unit #(
wire mul_wb;
wire [`NUM_THREADS-1:0][31:0] mul_data;
wire is_mul_op = `ALU_IS_MUL(alu_req_if.op_mod);
wire is_mul_op = `INST_ALU_IS_MUL(alu_req_if.op_mod);
VX_muldiv muldiv (
.clk (clk),
.reset (reset),
// Inputs
.alu_op (`MUL_OP(alu_req_if.op_type)),
.alu_op (`INST_MUL_OP(alu_req_if.op_type)),
.wid_in (alu_req_if.wid),
.tmask_in (alu_req_if.tmask),
.PC_in (alu_req_if.PC),

View file

@ -87,6 +87,7 @@ module VX_cluster #(
.CACHE_SIZE (`L2CACHE_SIZE),
.CACHE_LINE_SIZE (`L2CACHE_LINE_SIZE),
.NUM_BANKS (`L2NUM_BANKS),
.NUM_PORTS (`L2NUM_PORTS),
.WORD_SIZE (`L2WORD_SIZE),
.NUM_REQS (`L2NUM_REQS),
.CREQ_SIZE (`L2CREQ_SIZE),
@ -143,17 +144,19 @@ module VX_cluster #(
end else begin
`RESET_RELAY (mem_arb_reset);
VX_mem_arb #(
.NUM_REQS (`NUM_CORES),
.DATA_WIDTH (`L2MEM_DATA_WIDTH),
.ADDR_WIDTH (`L2MEM_ADDR_WIDTH),
.DATA_WIDTH (`DMEM_DATA_WIDTH),
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
.TAG_IN_WIDTH (`XMEM_TAG_WIDTH),
.TAG_SEL_IDX (1), // Skip 0 for NC flag
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
) mem_arb (
.clk (clk),
.reset (reset),
.reset (mem_arb_reset),
// Core request
.req_valid_in (per_core_mem_req_valid),

View file

@ -38,7 +38,7 @@
`endif
`ifndef L1_BLOCK_SIZE
`define L1_BLOCK_SIZE (`NUM_THREADS * 4)
`define L1_BLOCK_SIZE ((`L2_ENABLE || `L3_ENABLE) ? (`NUM_THREADS * 4) : `MEM_BLOCK_SIZE)
`endif
`ifndef STARTUP_ADDR
@ -291,7 +291,7 @@
`define DNUM_BANKS `NUM_THREADS
`endif
// Number of bank ports
// Number of ports per bank
`ifndef DNUM_PORTS
`define DNUM_PORTS 1
`endif
@ -361,6 +361,11 @@
`define L2NUM_BANKS `MIN(`NUM_CORES, 4)
`endif
// Number of ports per bank
`ifndef L2NUM_PORTS
`define L2NUM_PORTS 1
`endif
// Core Request Queue Size
`ifndef L2CREQ_SIZE
`define L2CREQ_SIZE 0
@ -398,6 +403,11 @@
`define L3NUM_BANKS `MIN(`NUM_CLUSTERS, 4)
`endif
// Number of ports per bank
`ifndef L3NUM_PORTS
`define L3NUM_PORTS 1
`endif
// Core Request Queue Size
`ifndef L3CREQ_SIZE
`define L3CREQ_SIZE 0

View file

@ -42,7 +42,7 @@ module VX_csr_data #(
reg [63:0] csr_cycle;
reg [63:0] csr_instret;
reg [`NUM_WARPS-1:0][`FRM_BITS+`FFG_BITS-1:0] fcsr;
reg [`NUM_WARPS-1:0][`INST_FRM_BITS+`FFLAGS_BITS-1:0] fcsr;
always @(posedge clk) begin
@ -52,16 +52,16 @@ module VX_csr_data #(
end
if (fpu_to_csr_if.write_enable) begin
fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0]
| fpu_to_csr_if.write_fflags;
fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0]
| fpu_to_csr_if.write_fflags;
end
`endif
if (write_enable) begin
case (write_addr)
`CSR_FFLAGS: fcsr[write_wid][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
`CSR_FRM: fcsr[write_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
`CSR_FCSR: fcsr[write_wid] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
`CSR_FFLAGS: fcsr[write_wid][`FFLAGS_BITS-1:0] <= write_data[`FFLAGS_BITS-1:0];
`CSR_FRM: fcsr[write_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS] <= write_data[`INST_FRM_BITS-1:0];
`CSR_FCSR: fcsr[write_wid] <= write_data[`FFLAGS_BITS+`INST_FRM_BITS-1:0];
`CSR_SATP: csr_satp <= write_data;
@ -104,8 +104,8 @@ module VX_csr_data #(
read_data_r = 'x;
read_addr_valid_r = 1;
case (read_addr)
`CSR_FFLAGS : read_data_r = 32'(fcsr[read_wid][`FFG_BITS-1:0]);
`CSR_FRM : read_data_r = 32'(fcsr[read_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS]);
`CSR_FFLAGS : read_data_r = 32'(fcsr[read_wid][`FFLAGS_BITS-1:0]);
`CSR_FRM : read_data_r = 32'(fcsr[read_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS]);
`CSR_FCSR : read_data_r = 32'(fcsr[read_wid]);
`CSR_WTID ,
@ -222,7 +222,7 @@ module VX_csr_data #(
assign read_data = read_data_r;
`ifdef EXT_F_ENABLE
assign fpu_to_csr_if.read_frm = fcsr[fpu_to_csr_if.read_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS];
assign fpu_to_csr_if.read_frm = fcsr[fpu_to_csr_if.read_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS];
`endif
endmodule

View file

@ -70,14 +70,14 @@ module VX_csr_unit #(
always @(*) begin
csr_we_s0_unqual = (csr_req_data != 0);
case (csr_req_if.op_type)
`CSR_RW: begin
`INST_CSR_RW: begin
csr_updated_data = csr_req_data;
csr_we_s0_unqual = 1;
end
`CSR_RS: begin
`INST_CSR_RS: begin
csr_updated_data = csr_read_data_qual | csr_req_data;
end
//`CSR_RC
//`INST_CSR_RC
default: begin
csr_updated_data = csr_read_data_qual & ~csr_req_data;
end

View file

@ -32,8 +32,8 @@ module VX_decode #(
`UNUSED_VAR (reset)
reg [`EX_BITS-1:0] ex_type;
reg [`OP_BITS-1:0] op_type;
reg [`MOD_BITS-1:0] op_mod;
reg [`INST_OP_BITS-1:0] op_type;
reg [`INST_MOD_BITS-1:0] op_mod;
reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r;
reg [31:0] imm;
reg use_rd, use_PC, use_imm;
@ -79,14 +79,14 @@ module VX_decode #(
`INST_I: begin
ex_type = `EX_ALU;
case (func3)
3'h0: op_type = `OP_BITS'(`ALU_ADD);
3'h1: op_type = `OP_BITS'(`ALU_SLL);
3'h2: op_type = `OP_BITS'(`ALU_SLT);
3'h3: op_type = `OP_BITS'(`ALU_SLTU);
3'h4: op_type = `OP_BITS'(`ALU_XOR);
3'h5: op_type = (func7[5]) ? `OP_BITS'(`ALU_SRA) : `OP_BITS'(`ALU_SRL);
3'h6: op_type = `OP_BITS'(`ALU_OR);
3'h7: op_type = `OP_BITS'(`ALU_AND);
3'h0: op_type = `INST_OP_BITS'(`INST_ALU_ADD);
3'h1: op_type = `INST_OP_BITS'(`INST_ALU_SLL);
3'h2: op_type = `INST_OP_BITS'(`INST_ALU_SLT);
3'h3: op_type = `INST_OP_BITS'(`INST_ALU_SLTU);
3'h4: op_type = `INST_OP_BITS'(`INST_ALU_XOR);
3'h5: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SRA) : `INST_OP_BITS'(`INST_ALU_SRL);
3'h6: op_type = `INST_OP_BITS'(`INST_ALU_OR);
3'h7: op_type = `INST_OP_BITS'(`INST_ALU_AND);
default:;
endcase
use_rd = 1;
@ -100,14 +100,14 @@ module VX_decode #(
`ifdef EXT_F_ENABLE
if (func7[0]) begin
case (func3)
3'h0: op_type = `OP_BITS'(`MUL_MUL);
3'h1: op_type = `OP_BITS'(`MUL_MULH);
3'h2: op_type = `OP_BITS'(`MUL_MULHSU);
3'h3: op_type = `OP_BITS'(`MUL_MULHU);
3'h4: op_type = `OP_BITS'(`MUL_DIV);
3'h5: op_type = `OP_BITS'(`MUL_DIVU);
3'h6: op_type = `OP_BITS'(`MUL_REM);
3'h7: op_type = `OP_BITS'(`MUL_REMU);
3'h0: op_type = `INST_OP_BITS'(`INST_MUL_MUL);
3'h1: op_type = `INST_OP_BITS'(`INST_MUL_MULH);
3'h2: op_type = `INST_OP_BITS'(`INST_MUL_MULHSU);
3'h3: op_type = `INST_OP_BITS'(`INST_MUL_MULHU);
3'h4: op_type = `INST_OP_BITS'(`INST_MUL_DIV);
3'h5: op_type = `INST_OP_BITS'(`INST_MUL_DIVU);
3'h6: op_type = `INST_OP_BITS'(`INST_MUL_REM);
3'h7: op_type = `INST_OP_BITS'(`INST_MUL_REMU);
default:;
endcase
op_mod = 2;
@ -115,14 +115,14 @@ module VX_decode #(
`endif
begin
case (func3)
3'h0: op_type = (func7[5]) ? `OP_BITS'(`ALU_SUB) : `OP_BITS'(`ALU_ADD);
3'h1: op_type = `OP_BITS'(`ALU_SLL);
3'h2: op_type = `OP_BITS'(`ALU_SLT);
3'h3: op_type = `OP_BITS'(`ALU_SLTU);
3'h4: op_type = `OP_BITS'(`ALU_XOR);
3'h5: op_type = (func7[5]) ? `OP_BITS'(`ALU_SRA) : `OP_BITS'(`ALU_SRL);
3'h6: op_type = `OP_BITS'(`ALU_OR);
3'h7: op_type = `OP_BITS'(`ALU_AND);
3'h0: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SUB) : `INST_OP_BITS'(`INST_ALU_ADD);
3'h1: op_type = `INST_OP_BITS'(`INST_ALU_SLL);
3'h2: op_type = `INST_OP_BITS'(`INST_ALU_SLT);
3'h3: op_type = `INST_OP_BITS'(`INST_ALU_SLTU);
3'h4: op_type = `INST_OP_BITS'(`INST_ALU_XOR);
3'h5: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SRA) : `INST_OP_BITS'(`INST_ALU_SRL);
3'h6: op_type = `INST_OP_BITS'(`INST_ALU_OR);
3'h7: op_type = `INST_OP_BITS'(`INST_ALU_AND);
default:;
endcase
end
@ -133,7 +133,7 @@ module VX_decode #(
end
`INST_LUI: begin
ex_type = `EX_ALU;
op_type = `OP_BITS'(`ALU_LUI);
op_type = `INST_OP_BITS'(`INST_ALU_LUI);
use_rd = 1;
use_imm = 1;
imm = {upper_imm, 12'(0)};
@ -142,7 +142,7 @@ module VX_decode #(
end
`INST_AUIPC: begin
ex_type = `EX_ALU;
op_type = `OP_BITS'(`ALU_AUIPC);
op_type = `INST_OP_BITS'(`INST_ALU_AUIPC);
use_rd = 1;
use_imm = 1;
use_PC = 1;
@ -151,7 +151,7 @@ module VX_decode #(
end
`INST_JAL: begin
ex_type = `EX_ALU;
op_type = `OP_BITS'(`BR_JAL);
op_type = `INST_OP_BITS'(`INST_BR_JAL);
op_mod = 1;
use_rd = 1;
use_imm = 1;
@ -162,7 +162,7 @@ module VX_decode #(
end
`INST_JALR: begin
ex_type = `EX_ALU;
op_type = `OP_BITS'(`BR_JALR);
op_type = `INST_OP_BITS'(`INST_BR_JALR);
op_mod = 1;
use_rd = 1;
use_imm = 1;
@ -174,12 +174,12 @@ module VX_decode #(
`INST_B: begin
ex_type = `EX_ALU;
case (func3)
3'h0: op_type = `OP_BITS'(`BR_EQ);
3'h1: op_type = `OP_BITS'(`BR_NE);
3'h4: op_type = `OP_BITS'(`BR_LT);
3'h5: op_type = `OP_BITS'(`BR_GE);
3'h6: op_type = `OP_BITS'(`BR_LTU);
3'h7: op_type = `OP_BITS'(`BR_GEU);
3'h0: op_type = `INST_OP_BITS'(`INST_BR_EQ);
3'h1: op_type = `INST_OP_BITS'(`INST_BR_NE);
3'h4: op_type = `INST_OP_BITS'(`INST_BR_LT);
3'h5: op_type = `INST_OP_BITS'(`INST_BR_GE);
3'h6: op_type = `INST_OP_BITS'(`INST_BR_LTU);
3'h7: op_type = `INST_OP_BITS'(`INST_BR_GEU);
default:;
endcase
op_mod = 1;
@ -192,12 +192,13 @@ module VX_decode #(
end
`INST_F: begin
ex_type = `EX_LSU;
op_mod = `MOD_BITS'(!func3[0]); // data fence
op_type = `INST_OP_BITS'(func3[0]);
op_mod = `INST_MOD_BITS'(1);
end
`INST_SYS : begin
if (func3[1:0] != 0) begin
ex_type = `EX_CSR;
op_type = `OP_BITS'(func3[1:0]);
op_type = `INST_OP_BITS'(func3[1:0]);
use_rd = 1;
use_imm = func3[2];
imm = 32'(u_12); // addr
@ -210,11 +211,11 @@ module VX_decode #(
end else begin
ex_type = `EX_ALU;
case (u_12)
12'h000: op_type = `OP_BITS'(`BR_ECALL);
12'h001: op_type = `OP_BITS'(`BR_EBREAK);
12'h302: op_type = `OP_BITS'(`BR_MRET);
12'h102: op_type = `OP_BITS'(`BR_SRET);
12'h7B2: op_type = `OP_BITS'(`BR_DRET);
12'h000: op_type = `INST_OP_BITS'(`INST_BR_ECALL);
12'h001: op_type = `INST_OP_BITS'(`INST_BR_EBREAK);
12'h302: op_type = `INST_OP_BITS'(`INST_BR_MRET);
12'h102: op_type = `INST_OP_BITS'(`INST_BR_SRET);
12'h7B2: op_type = `INST_OP_BITS'(`INST_BR_DRET);
default:;
endcase
op_mod = 1;
@ -231,7 +232,7 @@ module VX_decode #(
`endif
`INST_L: begin
ex_type = `EX_LSU;
op_type = `OP_BITS'({1'b0, func3});
op_type = `INST_OP_BITS'({1'b0, func3});
use_rd = 1;
imm = {{20{u_12[11]}}, u_12};
`ifdef EXT_F_ENABLE
@ -247,7 +248,7 @@ module VX_decode #(
`endif
`INST_S: begin
ex_type = `EX_LSU;
op_type = `OP_BITS'({1'b1, func3});
op_type = `INST_OP_BITS'({1'b1, func3});
imm = {{20{s_imm[11]}}, s_imm};
`USED_IREG (rs1);
`ifdef EXT_F_ENABLE
@ -263,7 +264,7 @@ module VX_decode #(
`INST_FNMSUB,
`INST_FNMADD: begin
ex_type = `EX_FPU;
op_type = `OP_BITS'(opcode[3:0]);
op_type = `INST_OP_BITS'(opcode[3:0]);
op_mod = func3;
use_rd = 1;
`USED_FREG (rd);
@ -280,35 +281,35 @@ module VX_decode #(
7'h04, // FSUB
7'h08, // FMUL
7'h0C: begin // FDIV
op_type = `OP_BITS'(func7[3:0]);
op_type = `INST_OP_BITS'(func7[3:0]);
`USED_FREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
end
7'h2C: begin
op_type = `OP_BITS'(`FPU_SQRT);
op_type = `INST_OP_BITS'(`INST_FPU_SQRT);
`USED_FREG (rd);
`USED_FREG (rs1);
end
7'h50: begin
op_type = `OP_BITS'(`FPU_CMP);
op_type = `INST_OP_BITS'(`INST_FPU_CMP);
`USED_IREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
end
7'h60: begin
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTWUS) : `OP_BITS'(`FPU_CVTWS);
op_type = (instr[20]) ? `INST_OP_BITS'(`INST_FPU_CVTWUS) : `INST_OP_BITS'(`INST_FPU_CVTWS);
`USED_IREG (rd);
`USED_FREG (rs1);
end
7'h68: begin
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTSWU) : `OP_BITS'(`FPU_CVTSW);
op_type = (instr[20]) ? `INST_OP_BITS'(`INST_FPU_CVTSWU) : `INST_OP_BITS'(`INST_FPU_CVTSW);
`USED_FREG (rd);
`USED_IREG (rs1);
end
7'h10: begin
// FSGNJ=0, FSGNJN=1, FSGNJX=2
op_type = `OP_BITS'(`FPU_MISC);
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
op_mod = {1'b0, func3[1:0]};
`USED_FREG (rd);
`USED_FREG (rs1);
@ -316,7 +317,7 @@ module VX_decode #(
end
7'h14: begin
// FMIN=3, FMAX=4
op_type = `OP_BITS'(`FPU_MISC);
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
op_mod = func3[0] ? 4 : 3;
`USED_FREG (rd);
`USED_FREG (rs1);
@ -325,10 +326,10 @@ module VX_decode #(
7'h70: begin
if (func3[0]) begin
// FCLASS
op_type = `OP_BITS'(`FPU_CLASS);
op_type = `INST_OP_BITS'(`INST_FPU_CLASS);
end else begin
// FMV.X.W=5
op_type = `OP_BITS'(`FPU_MISC);
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
op_mod = 5;
end
`USED_IREG (rd);
@ -336,7 +337,7 @@ module VX_decode #(
end
7'h78: begin
// FMV.W.X=6
op_type = `OP_BITS'(`FPU_MISC);
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
op_mod = 6;
`USED_FREG (rd);
`USED_IREG (rs1);
@ -349,26 +350,26 @@ module VX_decode #(
ex_type = `EX_GPU;
case (func3)
3'h0: begin
op_type = `OP_BITS'(rs2 ? `GPU_PRED : `GPU_TMC);
op_type = rs2[0] ? `INST_OP_BITS'(`INST_GPU_PRED) : `INST_OP_BITS'(`INST_GPU_TMC);
is_wstall = 1;
`USED_IREG (rs1);
end
3'h1: begin
op_type = `OP_BITS'(`GPU_WSPAWN);
op_type = `INST_OP_BITS'(`INST_GPU_WSPAWN);
`USED_IREG (rs1);
`USED_IREG (rs2);
end
3'h2: begin
op_type = `OP_BITS'(`GPU_SPLIT);
op_type = `INST_OP_BITS'(`INST_GPU_SPLIT);
is_wstall = 1;
`USED_IREG (rs1);
end
3'h3: begin
op_type = `OP_BITS'(`GPU_JOIN);
op_type = `INST_OP_BITS'(`INST_GPU_JOIN);
is_join = 1;
end
3'h4: begin
op_type = `OP_BITS'(`GPU_BAR);
op_type = `INST_OP_BITS'(`INST_GPU_BAR);
is_wstall = 1;
`USED_IREG (rs1);
`USED_IREG (rs2);

View file

@ -14,8 +14,6 @@
`define NB_BITS `LOG2UP(`NUM_BARRIERS)
`define REQS_BITS `LOG2UP(NUM_REQS)
`ifdef EXT_F_ENABLE
`define NUM_REGS 64
`else
@ -32,6 +30,16 @@
///////////////////////////////////////////////////////////////////////////////
`define EX_NOP 3'h0
`define EX_ALU 3'h1
`define EX_LSU 3'h2
`define EX_CSR 3'h3
`define EX_FPU 3'h4
`define EX_GPU 3'h5
`define EX_BITS 3
///////////////////////////////////////////////////////////////////////////////
`define INST_LUI 7'b0110111
`define INST_AUIPC 7'b0010111
`define INST_JAL 7'b1101111
@ -56,138 +64,131 @@
///////////////////////////////////////////////////////////////////////////////
`define FRM_RNE 3'b000 // round to nearest even
`define FRM_RTZ 3'b001 // round to zero
`define FRM_RDN 3'b010 // round to -inf
`define FRM_RUP 3'b011 // round to +inf
`define FRM_RMM 3'b100 // round to nearest max magnitude
`define FRM_DYN 3'b111 // dynamic mode
`define FRM_BITS 3
`define INST_FRM_RNE 3'b000 // round to nearest even
`define INST_FRM_RTZ 3'b001 // round to zero
`define INST_FRM_RDN 3'b010 // round to -inf
`define INST_FRM_RUP 3'b011 // round to +inf
`define INST_FRM_RMM 3'b100 // round to nearest max magnitude
`define INST_FRM_DYN 3'b111 // dynamic mode
`define INST_FRM_BITS 3
///////////////////////////////////////////////////////////////////////////////
`define EX_NOP 3'h0
`define EX_ALU 3'h1
`define EX_LSU 3'h2
`define EX_CSR 3'h3
`define EX_FPU 3'h4
`define EX_GPU 3'h5
`define EX_BITS 3
`define NUM_EXS 6
`define NE_BITS `LOG2UP(`NUM_EXS)
`define INST_OP_BITS 4
`define INST_MOD_BITS 3
///////////////////////////////////////////////////////////////////////////////
`define OP_BITS 4
`define MOD_BITS 3
`define INST_ALU_ADD 4'b0000
`define INST_ALU_LUI 4'b0010
`define INST_ALU_AUIPC 4'b0011
`define INST_ALU_SLTU 4'b0100
`define INST_ALU_SLT 4'b0101
`define INST_ALU_SRL 4'b1000
`define INST_ALU_SRA 4'b1001
`define INST_ALU_SUB 4'b1011
`define INST_ALU_AND 4'b1100
`define INST_ALU_OR 4'b1101
`define INST_ALU_XOR 4'b1110
`define INST_ALU_SLL 4'b1111
`define INST_ALU_OTHER 4'b0111
`define INST_ALU_BITS 4
`define INST_ALU_OP(x) x[`INST_ALU_BITS-1:0]
`define INST_ALU_OP_CLASS(x) x[3:2]
`define INST_ALU_SIGNED(x) x[0]
`define INST_ALU_IS_BR(x) x[0]
`define INST_ALU_IS_MUL(x) x[1]
`define ALU_ADD 4'b0000
`define ALU_LUI 4'b0010
`define ALU_AUIPC 4'b0011
`define ALU_SLTU 4'b0100
`define ALU_SLT 4'b0101
`define ALU_SRL 4'b1000
`define ALU_SRA 4'b1001
`define ALU_SUB 4'b1011
`define ALU_AND 4'b1100
`define ALU_OR 4'b1101
`define ALU_XOR 4'b1110
`define ALU_SLL 4'b1111
`define ALU_OTHER 4'b0111
`define ALU_BITS 4
`define ALU_OP(x) x[`ALU_BITS-1:0]
`define ALU_OP_CLASS(x) x[3:2]
`define ALU_SIGNED(x) x[0]
`define ALU_IS_BR(x) x[0]
`define ALU_IS_MUL(x) x[1]
`define INST_BR_EQ 4'b0000
`define INST_BR_NE 4'b0010
`define INST_BR_LTU 4'b0100
`define INST_BR_GEU 4'b0110
`define INST_BR_LT 4'b0101
`define INST_BR_GE 4'b0111
`define INST_BR_JAL 4'b1000
`define INST_BR_JALR 4'b1001
`define INST_BR_ECALL 4'b1010
`define INST_BR_EBREAK 4'b1011
`define INST_BR_MRET 4'b1100
`define INST_BR_SRET 4'b1101
`define INST_BR_DRET 4'b1110
`define INST_BR_OTHER 4'b1111
`define INST_BR_BITS 4
`define INST_BR_OP(x) x[`INST_BR_BITS-1:0]
`define INST_BR_NEG(x) x[1]
`define INST_BR_LESS(x) x[2]
`define INST_BR_STATIC(x) x[3]
`define BR_EQ 4'b0000
`define BR_NE 4'b0010
`define BR_LTU 4'b0100
`define BR_GEU 4'b0110
`define BR_LT 4'b0101
`define BR_GE 4'b0111
`define BR_JAL 4'b1000
`define BR_JALR 4'b1001
`define BR_ECALL 4'b1010
`define BR_EBREAK 4'b1011
`define BR_MRET 4'b1100
`define BR_SRET 4'b1101
`define BR_DRET 4'b1110
`define BR_OTHER 4'b1111
`define BR_BITS 4
`define BR_OP(x) x[`BR_BITS-1:0]
`define BR_NEG(x) x[1]
`define BR_LESS(x) x[2]
`define BR_STATIC(x) x[3]
`define INST_MUL_MUL 3'h0
`define INST_MUL_MULH 3'h1
`define INST_MUL_MULHSU 3'h2
`define INST_MUL_MULHU 3'h3
`define INST_MUL_DIV 3'h4
`define INST_MUL_DIVU 3'h5
`define INST_MUL_REM 3'h6
`define INST_MUL_REMU 3'h7
`define INST_MUL_BITS 3
`define INST_MUL_OP(x) x[`INST_MUL_BITS-1:0]
`define INST_MUL_IS_DIV(x) x[2]
`define MUL_MUL 3'h0
`define MUL_MULH 3'h1
`define MUL_MULHSU 3'h2
`define MUL_MULHU 3'h3
`define MUL_DIV 3'h4
`define MUL_DIVU 3'h5
`define MUL_REM 3'h6
`define MUL_REMU 3'h7
`define MUL_BITS 3
`define MUL_OP(x) x[`MUL_BITS-1:0]
`define MUL_IS_DIV(x) x[2]
`define INST_FMT_B 3'b000
`define INST_FMT_H 3'b001
`define INST_FMT_W 3'b010
`define INST_FMT_BU 3'b100
`define INST_FMT_HU 3'b101
`define FMT_B 3'b000
`define FMT_H 3'b001
`define FMT_W 3'b010
`define FMT_BU 3'b100
`define FMT_HU 3'b101
`define INST_LSU_LB 4'b0000
`define INST_LSU_LH 4'b0001
`define INST_LSU_LW 4'b0010
`define INST_LSU_LBU 4'b0100
`define INST_LSU_LHU 4'b0101
`define INST_LSU_SB 4'b1000
`define INST_LSU_SH 4'b1001
`define INST_LSU_SW 4'b1010
`define INST_LSU_BITS 4
`define INST_LSU_FMT(x) x[2:0]
`define INST_LSU_WSIZE(x) x[1:0]
`define INST_LSU_OP(x) x[`INST_LSU_BITS-1:0]
`define INST_LSU_IS_FENCE(x) x[0]
`define LSU_LB 4'b0000
`define LSU_LH 4'b0001
`define LSU_LW 4'b0010
`define LSU_LBU 4'b0100
`define LSU_LHU 4'b0101
`define LSU_SB 4'b1000
`define LSU_SH 4'b1001
`define LSU_SW 4'b1010
`define LSU_BITS 4
`define LSU_FMT(x) x[2:0]
`define LSU_WSIZE(x) x[1:0]
`define LSU_OP(x) x[`LSU_BITS-1:0]
`define LSU_IS_FENCE(x) x[0]
`define INST_FENCE_BITS 1
`define INST_FENCE_D 1'h0
`define INST_FENCE_I 1'h1
`define CSR_RW 2'h1
`define CSR_RS 2'h2
`define CSR_RC 2'h3
`define CSR_OTHER 2'h0
`define CSR_BITS 2
`define CSR_OP(x) x[`CSR_BITS-1:0]
`define INST_CSR_RW 2'h1
`define INST_CSR_RS 2'h2
`define INST_CSR_RC 2'h3
`define INST_CSR_OTHER 2'h0
`define INST_CSR_BITS 2
`define INST_CSR_OP(x) x[`INST_CSR_BITS-1:0]
`define FPU_ADD 4'h0
`define FPU_SUB 4'h4
`define FPU_MUL 4'h8
`define FPU_DIV 4'hC
`define FPU_CVTWS 4'h1 // FCVT.W.S
`define FPU_CVTWUS 4'h5 // FCVT.WU.S
`define FPU_CVTSW 4'h9 // FCVT.S.W
`define FPU_CVTSWU 4'hD // FCVT.S.WU
`define FPU_SQRT 4'h2
`define FPU_CLASS 4'h6
`define FPU_CMP 4'hA
`define FPU_MISC 4'hE // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
`define FPU_MADD 4'h3
`define FPU_MSUB 4'h7
`define FPU_NMSUB 4'hB
`define FPU_NMADD 4'hF
`define FPU_BITS 4
`define FPU_OP(x) x[`FPU_BITS-1:0]
`define INST_FPU_ADD 4'h0
`define INST_FPU_SUB 4'h4
`define INST_FPU_MUL 4'h8
`define INST_FPU_DIV 4'hC
`define INST_FPU_CVTWS 4'h1 // FCVT.W.S
`define INST_FPU_CVTWUS 4'h5 // FCVT.WU.S
`define INST_FPU_CVTSW 4'h9 // FCVT.S.W
`define INST_FPU_CVTSWU 4'hD // FCVT.S.WU
`define INST_FPU_SQRT 4'h2
`define INST_FPU_CLASS 4'h6
`define INST_FPU_CMP 4'hA
`define INST_FPU_MISC 4'hE // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
`define INST_FPU_MADD 4'h3
`define INST_FPU_MSUB 4'h7
`define INST_FPU_NMSUB 4'hB
`define INST_FPU_NMADD 4'hF
`define INST_FPU_BITS 4
`define INST_FPU_OP(x) x[`INST_FPU_BITS-1:0]
`define GPU_TMC 3'h0
`define GPU_WSPAWN 3'h1
`define GPU_SPLIT 3'h2
`define GPU_JOIN 3'h3
`define GPU_BAR 3'h4
`define GPU_PRED 3'h5
`define GPU_BITS 3
`define GPU_OP(x) x[`GPU_BITS-1:0]
`define INST_GPU_TMC 3'h0
`define INST_GPU_WSPAWN 3'h1
`define INST_GPU_SPLIT 3'h2
`define INST_GPU_JOIN 3'h3
`define INST_GPU_BAR 3'h4
`define INST_GPU_PRED 3'h5
`define INST_GPU_BITS 3
`define INST_GPU_OP(x) x[`INST_GPU_BITS-1:0]
///////////////////////////////////////////////////////////////////////////////
@ -246,14 +247,14 @@
// Cache ID
`define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
// Block size in bytes
`define ICACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `MEM_BLOCK_SIZE)
// Number of banks
`define INUM_BANKS 1
// Word size in bytes
`define IWORD_SIZE 4
// Number of banks
`define INUM_BANKS 1
// Block size in bytes
`define ICACHE_LINE_SIZE `L1_BLOCK_SIZE
// Core request address bits
`define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE))
@ -284,12 +285,12 @@
// Cache ID
`define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
// Block size in bytes
`define DCACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `MEM_BLOCK_SIZE)
// Word size in bytes
`define DWORD_SIZE 4
// Block size in bytes
`define DCACHE_LINE_SIZE `L1_BLOCK_SIZE
// Core request address bits
`define DCORE_ADDR_WIDTH (32-`CLOG2(`DWORD_SIZE))
@ -336,12 +337,12 @@
// Cache ID
`define L2CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID)
// Block size in bytes
`define L2CACHE_LINE_SIZE `MEM_BLOCK_SIZE
// Word size in bytes
`define L2WORD_SIZE `DCACHE_LINE_SIZE
// Block size in bytes
`define L2CACHE_LINE_SIZE (`L2_ENABLE ? `MEM_BLOCK_SIZE : `L2WORD_SIZE)
// Input request tag bits
`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
@ -368,12 +369,12 @@
// Cache ID
`define L3CACHE_ID 0
// Block size in bytes
`define L3CACHE_LINE_SIZE `MEM_BLOCK_SIZE
// Word size in bytes
`define L3WORD_SIZE `L2CACHE_LINE_SIZE
// Block size in bytes
`define L3CACHE_LINE_SIZE (`L3_ENABLE ? `MEM_BLOCK_SIZE : `L3WORD_SIZE)
// Input request tag bits
`define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))

View file

@ -133,8 +133,8 @@ module VX_execute #(
// special workaround to get RISC-V tests Pass/Fail status
wire ebreak /* verilator public */;
assign ebreak = alu_req_if.valid && alu_req_if.ready
&& `ALU_IS_BR(alu_req_if.op_mod)
&& (`BR_OP(alu_req_if.op_type) == `BR_EBREAK
|| `BR_OP(alu_req_if.op_type) == `BR_ECALL);
&& `INST_ALU_IS_BR(alu_req_if.op_mod)
&& (`INST_BR_OP(alu_req_if.op_type) == `INST_BR_EBREAK
|| `INST_BR_OP(alu_req_if.op_type) == `INST_BR_ECALL);
endmodule

View file

@ -61,7 +61,7 @@ module VX_fpu_unit #(
// resolve dynamic FRM from CSR
assign fpu_to_csr_if.read_wid = fpu_req_if.wid;
wire [`FRM_BITS-1:0] fpu_frm = (fpu_req_if.op_mod == `FRM_DYN) ? fpu_to_csr_if.read_frm : fpu_req_if.op_mod;
wire [`INST_FRM_BITS-1:0] fpu_frm = (fpu_req_if.op_mod == `INST_FRM_DYN) ? fpu_to_csr_if.read_frm : fpu_req_if.op_mod;
`ifdef FPU_DPI
@ -179,7 +179,7 @@ module VX_fpu_unit #(
wire stall_out = ~fpu_commit_if.ready && fpu_commit_if.valid;
VX_pipe_register #(
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFG_BITS),
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFLAGS_BITS),
.RESETW (1)
) pipe_reg (
.clk (clk),

View file

@ -1,37 +0,0 @@
`include "VX_define.vh"
`TRACING_OFF
module VX_gpr_ram_f #(
parameter DATAW = 1,
parameter DEPTH = 1,
parameter ADDRW = $clog2(DEPTH)
) (
input wire clk,
input wire wren,
input wire [ADDRW-1:0] waddr,
input wire [DATAW-1:0] wdata,
input wire [ADDRW-1:0] raddr1,
input wire [ADDRW-1:0] raddr2,
input wire [ADDRW-1:0] raddr3,
output wire [DATAW-1:0] rdata1,
output wire [DATAW-1:0] rdata2,
output wire [DATAW-1:0] rdata3
);
reg [DATAW-1:0] mem [DEPTH-1:0];
initial mem = '{default: 0};
always @(posedge clk) begin
if (wren) begin
mem [waddr] <= wdata;
end
end
assign rdata1 = mem [raddr1];
assign rdata2 = mem [raddr2];
assign rdata3 = mem [raddr3];
endmodule
`TRACING_ON

View file

@ -1,34 +0,0 @@
`include "VX_define.vh"
`TRACING_OFF
module VX_gpr_ram_i #(
parameter DATAW = 1,
parameter DEPTH = 1,
parameter ADDRW = $clog2(DEPTH)
) (
input wire clk,
input wire wren,
input wire [ADDRW-1:0] waddr,
input wire [DATAW-1:0] wdata,
input wire [ADDRW-1:0] raddr1,
input wire [ADDRW-1:0] raddr2,
output wire [DATAW-1:0] rdata1,
output wire [DATAW-1:0] rdata2
);
reg [DATAW-1:0] mem [DEPTH-1:0];
initial mem = '{default: 0};
always @(posedge clk) begin
if (wren) begin
mem [waddr] <= wdata;
end
end
assign rdata1 = mem [raddr1];
assign rdata2 = mem [raddr2];
endmodule
`TRACING_ON

View file

@ -17,69 +17,100 @@ module VX_gpr_stage #(
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (reset)
localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS;
// ensure r0 never gets written, which can happen before the reset
wire write_enable = writeback_if.valid && (writeback_if.rd != 0);
`ifdef EXT_F_ENABLE
localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS;
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3;
wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2, raddr3;
assign waddr = {writeback_if.wid, writeback_if.rd};
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
wire [(`NUM_THREADS * 4)-1:0] wren;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign wren [i * 4 +: 4] = {4{write_enable && writeback_if.tmask[i]}};
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
VX_gpr_ram_f #(
.DATAW (32),
.DEPTH (RAM_DEPTH)
) gpr_ram_f (
.clk (clk),
.wren (write_enable && writeback_if.tmask[i]),
.waddr (waddr),
.wdata (writeback_if.data[i]),
.raddr1 (raddr1),
.raddr2 (raddr2),
.raddr3 (raddr3),
.rdata1 (rdata1[i]),
.rdata2 (rdata2[i]),
.rdata3 (rdata3[i])
);
reg [`NUM_THREADS-1:0][31:0] last_wdata;
reg [$clog2(RAM_SIZE)-1:0] last_waddr;
reg [`NUM_THREADS-1:0] last_wmask;
always @(posedge clk) begin
last_wdata <= writeback_if.data;
last_wmask <= {`NUM_THREADS{write_enable}} & writeback_if.tmask;
last_waddr <= waddr;
end
assign gpr_rsp_if.rs1_data = rdata1;
assign gpr_rsp_if.rs2_data = rdata2;
assign gpr_rsp_if.rs3_data = rdata3;
`else
localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS;
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2;
wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2;
wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2;
assign waddr = {writeback_if.wid, writeback_if.rd};
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
`UNUSED_VAR (gpr_req_if.rs3)
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
for (genvar i = 0; i < `NUM_THREADS; i++) begin
VX_gpr_ram_i #(
.DATAW (32),
.DEPTH (RAM_DEPTH)
) gpr_ram_i (
.clk (clk),
.wren (write_enable && writeback_if.tmask[i]),
.waddr (waddr),
.wdata (writeback_if.data[i]),
.raddr1 (raddr1),
.raddr2 (raddr2),
.rdata1 (rdata1[i]),
.rdata2 (rdata2[i])
);
VX_dp_ram #(
.DATAW (32 * `NUM_THREADS),
.SIZE (RAM_SIZE),
.BYTEENW (`NUM_THREADS * 4),
.INIT_ENABLE (1),
.INIT_VALUE (0),
.NO_RWCHECK (1)
) dp_ram1 (
.clk (clk),
.wren (wren),
.waddr (waddr),
.wdata (writeback_if.data),
.rden (1'b1),
.raddr (raddr1),
.rdata (rdata1)
);
VX_dp_ram #(
.DATAW (32 * `NUM_THREADS),
.SIZE (RAM_SIZE),
.BYTEENW (`NUM_THREADS * 4),
.INIT_ENABLE (1),
.INIT_VALUE (0),
.NO_RWCHECK (1)
) dp_ram2 (
.clk (clk),
.wren (wren),
.waddr (waddr),
.wdata (writeback_if.data),
.rden (1'b1),
.raddr (raddr2),
.rdata (rdata2)
);
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign gpr_rsp_if.rs1_data[i] = (last_wmask[i] && (raddr1 == last_waddr)) ? last_wdata[i] : rdata1[i];
assign gpr_rsp_if.rs2_data[i] = (last_wmask[i] && (raddr2 == last_waddr)) ? last_wdata[i] : rdata2[i];
end
`ifdef EXT_F_ENABLE
wire [`NUM_THREADS-1:0][31:0] rdata3;
wire [$clog2(RAM_SIZE)-1:0] raddr3;
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
assign gpr_rsp_if.rs1_data = rdata1;
assign gpr_rsp_if.rs2_data = rdata2;
assign gpr_rsp_if.rs3_data = 0;
VX_dp_ram #(
.DATAW (32 * `NUM_THREADS),
.SIZE (RAM_SIZE),
.BYTEENW (`NUM_THREADS * 4),
.INIT_ENABLE (1),
.INIT_VALUE (0),
.NO_RWCHECK (1)
) dp_ram3 (
.clk (clk),
.wren (wren),
.waddr (waddr),
.wdata (writeback_if.data),
.rden (1'b1),
.raddr (raddr3),
.rdata (rdata3)
);
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign gpr_rsp_if.rs3_data[i] = (last_wmask[i] && (raddr3 == last_waddr)) ? last_wdata[i] : rdata3[i];
end
`else
`UNUSED_VAR (gpr_req_if.rs3)
assign gpr_rsp_if.rs3_data = 'x;
`endif
assign writeback_if.ready = 1'b1;

View file

@ -19,18 +19,17 @@ module VX_gpu_unit #(
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (gpu_req_if.op_mod)
gpu_tmc_t tmc;
gpu_wspawn_t wspawn;
gpu_barrier_t barrier;
gpu_split_t split;
wire is_wspawn = (gpu_req_if.op_type == `GPU_WSPAWN);
wire is_tmc = (gpu_req_if.op_type == `GPU_TMC);
wire is_split = (gpu_req_if.op_type == `GPU_SPLIT);
wire is_bar = (gpu_req_if.op_type == `GPU_BAR);
wire is_pred = (gpu_req_if.op_type == `GPU_PRED);
wire is_wspawn = (gpu_req_if.op_type == `INST_GPU_WSPAWN);
wire is_tmc = (gpu_req_if.op_type == `INST_GPU_TMC);
wire is_split = (gpu_req_if.op_type == `INST_GPU_SPLIT);
wire is_bar = (gpu_req_if.op_type == `INST_GPU_BAR);
wire is_pred = (gpu_req_if.op_type == `INST_GPU_PRED);
// tmc

View file

@ -15,7 +15,7 @@ module VX_ibuffer #(
`UNUSED_PARAM (CORE_ID)
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + `NUM_REGS;
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `INST_OP_BITS + `INST_FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + `NUM_REGS;
localparam ADDRW = $clog2(`IBUF_SIZE+1);
localparam NWARPSW = $clog2(`NUM_WARPS+1);

View file

@ -33,16 +33,17 @@ module VX_icache_stage #(
wire [`NUM_THREADS-1:0] rsp_tmask;
VX_dp_ram #(
.DATAW(32 + `NUM_THREADS),
.SIZE(`NUM_WARPS),
.FASTRAM(1)
.DATAW (32 + `NUM_THREADS),
.SIZE (`NUM_WARPS),
.LUTRAM (1)
) req_metadata (
.clk(clk),
.waddr(req_tag),
.raddr(rsp_tag),
.wren(icache_req_fire),
.din({ifetch_req_if.PC, ifetch_req_if.tmask}),
.dout({rsp_PC, rsp_tmask})
.clk (clk),
.wren (icache_req_fire),
.waddr (req_tag),
.wdata ({ifetch_req_if.PC, ifetch_req_if.tmask}),
.rden (1'b1),
.raddr (rsp_tag),
.rdata ({rsp_PC, rsp_tmask})
);
`RUNTIME_ASSERT((!ifetch_req_if.valid || ifetch_req_if.PC >= `STARTUP_ADDR),

View file

@ -26,13 +26,12 @@ module VX_instr_demux (
`endif
wire gpu_req_ready;
VX_priority_encoder #(
.N (`NUM_THREADS)
VX_lzc #(
.WIDTH (`NUM_THREADS)
) tid_select (
.data_in (ibuffer_if.tmask),
.index (tid),
`UNUSED_PIN (onehot),
`UNUSED_PIN (valid_out)
.in_i (ibuffer_if.tmask),
.cnt_o (tid),
`UNUSED_PIN (valid_o)
);
wire [31:0] next_PC = ibuffer_if.PC + 4;
@ -42,15 +41,15 @@ module VX_instr_demux (
wire alu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_ALU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
.OUTPUT_REG (1)
) alu_buffer (
.clk (clk),
.reset (reset),
.valid_in (alu_req_valid),
.ready_in (alu_req_ready),
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, `ALU_OP(ibuffer_if.op_type), ibuffer_if.op_mod, ibuffer_if.imm, ibuffer_if.use_PC, ibuffer_if.use_imm, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.op_mod, alu_req_if.imm, alu_req_if.use_PC, alu_req_if.use_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, `INST_ALU_OP(ibuffer_if.op_type), ibuffer_if.op_mod, ibuffer_if.imm, ibuffer_if.use_PC, ibuffer_if.use_imm, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.op_mod, alu_req_if.imm, alu_req_if.use_PC, alu_req_if.use_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
.valid_out (alu_req_if.valid),
.ready_out (alu_req_if.ready)
);
@ -58,18 +57,18 @@ module VX_instr_demux (
// lsu unit
wire lsu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_LSU);
wire lsu_is_fence = `LSU_IS_FENCE(ibuffer_if.op_mod);
wire lsu_is_fence = `INST_LSU_IS_FENCE(ibuffer_if.op_mod);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
.OUTPUT_REG (1)
) lsu_buffer (
.clk (clk),
.reset (reset),
.valid_in (lsu_req_valid),
.ready_in (lsu_req_ready),
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `LSU_OP(ibuffer_if.op_type), lsu_is_fence, ibuffer_if.imm, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.is_fence, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `INST_LSU_OP(ibuffer_if.op_type), lsu_is_fence, ibuffer_if.imm, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.is_fence, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
.valid_out (lsu_req_if.valid),
.ready_out (lsu_req_if.ready)
);
@ -79,15 +78,15 @@ module VX_instr_demux (
wire csr_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_CSR);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
.OUTPUT_REG (1)
) csr_buffer (
.clk (clk),
.reset (reset),
.valid_in (csr_req_valid),
.ready_in (csr_req_ready),
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `CSR_OP(ibuffer_if.op_type), ibuffer_if.imm[`CSR_ADDR_BITS-1:0], ibuffer_if.rd, ibuffer_if.wb, ibuffer_if.use_imm, ibuffer_if.rs1, gpr_rsp_if.rs1_data[0]}),
.data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.use_imm, csr_req_if.rs1, csr_req_if.rs1_data}),
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `INST_CSR_OP(ibuffer_if.op_type), ibuffer_if.imm[`CSR_ADDR_BITS-1:0], ibuffer_if.rd, ibuffer_if.wb, ibuffer_if.use_imm, ibuffer_if.rs1, gpr_rsp_if.rs1_data[0]}),
.data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.use_imm, csr_req_if.rs1, csr_req_if.rs1_data}),
.valid_out (csr_req_if.valid),
.ready_out (csr_req_if.ready)
);
@ -98,15 +97,15 @@ module VX_instr_demux (
wire fpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_FPU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
.OUTPUT_REG (1)
) fpu_buffer (
.clk (clk),
.reset (reset),
.valid_in (fpu_req_valid),
.ready_in (fpu_req_ready),
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `FPU_OP(ibuffer_if.op_type), ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
.data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `INST_FPU_OP(ibuffer_if.op_type), ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
.data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
.valid_out (fpu_req_if.valid),
.ready_out (fpu_req_if.ready)
);
@ -119,15 +118,15 @@ module VX_instr_demux (
wire gpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_GPU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + + `NT_BITS + (`NUM_THREADS * 32 + 32)),
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `NR_BITS + 1 + + `NT_BITS + (`NUM_THREADS * 32 + 32)),
.OUTPUT_REG (1)
) gpu_buffer (
.clk (clk),
.reset (reset),
.valid_in (gpu_req_valid),
.ready_in (gpu_req_ready),
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, `GPU_OP(ibuffer_if.op_type), ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, `INST_GPU_OP(ibuffer_if.op_type), ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
.valid_out (gpu_req_if.valid),
.ready_out (gpu_req_if.ready)
);

View file

@ -38,17 +38,17 @@ module VX_ipdom_stack #(
end
VX_dp_ram #(
.DATAW(WIDTH * 2),
.SIZE(DEPTH),
.RWCHECK(1),
.FASTRAM(1)
.DATAW (WIDTH * 2),
.SIZE (DEPTH),
.LUTRAM (1)
) store (
.clk(clk),
.waddr(wr_ptr),
.raddr(rd_ptr),
.wren(push),
.din({q2, q1}),
.dout({d2, d1})
.clk (clk),
.wren (push),
.waddr (wr_ptr),
.wdata ({q2, q1}),
.rden (1'b1),
.raddr (rd_ptr),
.rdata ({d2, d1})
);
always @(posedge clk) begin

View file

@ -30,11 +30,15 @@ module VX_issue #(
wire scoreboard_delay;
`RESET_RELAY (ibuf_reset);
`RESET_RELAY (gpr_reset);
`RESET_RELAY (demux_reset);
VX_ibuffer #(
.CORE_ID(CORE_ID)
) ibuffer (
.clk (clk),
.reset (reset),
.reset (ibuf_reset),
.decode_if (decode_if),
.ibuffer_if (ibuffer_if)
);
@ -58,7 +62,7 @@ module VX_issue #(
.CORE_ID(CORE_ID)
) gpr_stage (
.clk (clk),
.reset (reset),
.reset (gpr_reset),
.writeback_if (writeback_if),
.gpr_req_if (gpr_req_if),
.gpr_rsp_if (gpr_rsp_if)
@ -80,7 +84,7 @@ module VX_issue #(
VX_instr_demux instr_demux (
.clk (clk),
.reset (reset),
.reset (demux_reset),
.ibuffer_if (execute_if),
.gpr_rsp_if (gpr_rsp_if),
.alu_req_if (alu_req_if),

View file

@ -33,7 +33,7 @@ module VX_lsu_unit #(
wire req_valid;
wire [`NUM_THREADS-1:0] req_tmask;
wire [`NUM_THREADS-1:0][31:0] req_addr;
wire [`LSU_BITS-1:0] req_type;
wire [`INST_LSU_BITS-1:0] req_type;
wire [`NUM_THREADS-1:0][31:0] req_data;
wire [`NR_BITS-1:0] req_rd;
wire req_wb;
@ -80,7 +80,7 @@ module VX_lsu_unit #(
wire lsu_valid = lsu_req_if.valid && ~fence_wait;
VX_pipe_register #(
.DATAW (1 + 1 + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * ADDR_TYPEW) + `LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.DATAW (1 + 1 + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * ADDR_TYPEW) + `INST_LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.RESETW (1)
) req_pipe_reg (
.clk (clk),
@ -97,7 +97,7 @@ module VX_lsu_unit #(
wire [31:0] rsp_pc;
wire [`NR_BITS-1:0] rsp_rd;
wire rsp_wb;
wire [`LSU_BITS-1:0] rsp_type;
wire [`INST_LSU_BITS-1:0] rsp_type;
wire rsp_is_dup;
`UNUSED_VAR (rsp_type)
@ -132,8 +132,8 @@ module VX_lsu_unit #(
assign mbuf_raddr = dcache_rsp_if.tag[ADDR_TYPEW +: `LSUQ_ADDR_BITS];
VX_index_buffer #(
.DATAW (`NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `LSU_BITS + (`NUM_THREADS * REQ_ASHIFT) + 1),
.SIZE (`LSUQ_SIZE)
.DATAW (`NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `INST_LSU_BITS + (`NUM_THREADS * REQ_ASHIFT) + 1),
.SIZE (`LSUQ_SIZE)
) req_metadata (
.clk (clk),
.reset (reset),
@ -202,7 +202,7 @@ module VX_lsu_unit #(
always @(*) begin
mem_req_byteen = {4{req_wb}};
case (`LSU_WSIZE(req_type))
case (`INST_LSU_WSIZE(req_type))
0: mem_req_byteen[req_offset[i]] = 1;
1: begin
mem_req_byteen[req_offset[i]] = 1;
@ -261,11 +261,11 @@ module VX_lsu_unit #(
wire [7:0] rsp_data8 = rsp_offset[i][0] ? rsp_data16[15:8] : rsp_data16[7:0];
always @(*) begin
case (`LSU_FMT(rsp_type))
`FMT_B: rsp_data[i] = 32'(signed'(rsp_data8));
`FMT_H: rsp_data[i] = 32'(signed'(rsp_data16));
`FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data8));
`FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data16));
case (`INST_LSU_FMT(rsp_type))
`INST_FMT_B: rsp_data[i] = 32'(signed'(rsp_data8));
`INST_FMT_H: rsp_data[i] = 32'(signed'(rsp_data16));
`INST_FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data8));
`INST_FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data16));
default: rsp_data[i] = rsp_data32;
endcase
end

View file

@ -65,6 +65,7 @@ module VX_mem_unit # (
`RESET_RELAY (icache_reset);
`RESET_RELAY (dcache_reset);
`RESET_RELAY (mem_arb_reset);
VX_cache #(
.CACHE_ID (`ICACHE_ID),
@ -197,6 +198,9 @@ module VX_mem_unit # (
.TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE)
) smem_rsp_if();
`RESET_RELAY (smem_arb_reset);
`RESET_RELAY (smem_reset);
VX_smem_arb #(
.NUM_REQS (2),
.LANES (`NUM_THREADS),
@ -207,7 +211,7 @@ module VX_mem_unit # (
.BUFFERED_RSP (1)
) smem_arb (
.clk (clk),
.reset (reset),
.reset (smem_arb_reset),
// input request
.req_valid_in (dcache_req_if.valid),
@ -242,8 +246,6 @@ module VX_mem_unit # (
.rsp_ready_out (dcache_rsp_if.ready)
);
`RESET_RELAY (smem_reset);
VX_shared_mem #(
.CACHE_ID (`SCACHE_ID),
.CACHE_SIZE (`SMEM_SIZE),
@ -312,7 +314,7 @@ module VX_mem_unit # (
.BUFFERED_RSP (2)
) mem_arb (
.clk (clk),
.reset (reset),
.reset (mem_arb_reset),
// Source request
.req_valid_in ({dcache_mem_req_if.valid, icache_mem_req_if.valid}),

View file

@ -5,7 +5,7 @@ module VX_muldiv (
input wire reset,
// Inputs
input wire [`MUL_BITS-1:0] alu_op,
input wire [`INST_MUL_BITS-1:0] alu_op,
input wire [`NW_BITS-1:0] wid_in,
input wire [`NUM_THREADS-1:0] tmask_in,
input wire [31:0] PC_in,
@ -29,7 +29,7 @@ module VX_muldiv (
input wire ready_out
);
wire is_div_op = `MUL_IS_DIV(alu_op);
wire is_div_op = `INST_MUL_IS_DIV(alu_op);
wire [`NUM_THREADS-1:0][31:0] mul_result;
wire [`NW_BITS-1:0] mul_wid_out;
@ -44,9 +44,9 @@ module VX_muldiv (
wire mul_valid_in = valid_in && !is_div_op;
wire mul_ready_in = ~stall_out || ~mul_valid_out;
wire is_mulh_in = (alu_op != `MUL_MUL);
wire is_signed_mul_a = (alu_op != `MUL_MULHU);
wire is_signed_mul_b = (alu_op != `MUL_MULHU && alu_op != `MUL_MULHSU);
wire is_mulh_in = (alu_op != `INST_MUL_MUL);
wire is_signed_mul_a = (alu_op != `INST_MUL_MULHU);
wire is_signed_mul_b = (alu_op != `INST_MUL_MULHU && alu_op != `INST_MUL_MULHSU);
`ifdef IMUL_DPI
@ -123,8 +123,8 @@ module VX_muldiv (
wire [`NR_BITS-1:0] div_rd_out;
wire div_wb_out;
wire is_rem_op_in = (alu_op == `MUL_REM) || (alu_op == `MUL_REMU);
wire is_signed_div = (alu_op == `MUL_DIV) || (alu_op == `MUL_REM);
wire is_rem_op_in = (alu_op == `INST_MUL_REM) || (alu_op == `INST_MUL_REMU);
wire is_signed_div = (alu_op == `INST_MUL_DIV) || (alu_op == `INST_MUL_REM);
wire div_valid_in = valid_in && is_div_op;
wire div_ready_out = ~stall_out && ~mul_valid_out; // arbitration prioritizes MUL
wire div_ready_in;

View file

@ -74,6 +74,7 @@
`define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *)
`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *)
`define DISABLE_BRAM (* ramstyle = "logic" *)
`define PRESERVE_REG (* preserve *)
///////////////////////////////////////////////////////////////////////////////

View file

@ -18,96 +18,104 @@ endtask
task print_ex_op (
input [`EX_BITS-1:0] ex_type,
input [`OP_BITS-1:0] op_type,
input [`MOD_BITS-1:0] op_mod
input [`INST_OP_BITS-1:0] op_type,
input [`INST_MOD_BITS-1:0] op_mod
);
case (ex_type)
`EX_ALU: begin
if (`ALU_IS_BR(op_mod)) begin
case (`BR_BITS'(op_type))
`BR_EQ: dpi_trace("BEQ");
`BR_NE: dpi_trace("BNE");
`BR_LT: dpi_trace("BLT");
`BR_GE: dpi_trace("BGE");
`BR_LTU: dpi_trace("BLTU");
`BR_GEU: dpi_trace("BGEU");
`BR_JAL: dpi_trace("JAL");
`BR_JALR: dpi_trace("JALR");
`BR_ECALL: dpi_trace("ECALL");
`BR_EBREAK:dpi_trace("EBREAK");
`BR_MRET: dpi_trace("MRET");
`BR_SRET: dpi_trace("SRET");
`BR_DRET: dpi_trace("DRET");
if (`INST_ALU_IS_BR(op_mod)) begin
case (`INST_BR_BITS'(op_type))
`INST_BR_EQ: dpi_trace("BEQ");
`INST_BR_NE: dpi_trace("BNE");
`INST_BR_LT: dpi_trace("BLT");
`INST_BR_GE: dpi_trace("BGE");
`INST_BR_LTU: dpi_trace("BLTU");
`INST_BR_GEU: dpi_trace("BGEU");
`INST_BR_JAL: dpi_trace("JAL");
`INST_BR_JALR: dpi_trace("JALR");
`INST_BR_ECALL: dpi_trace("ECALL");
`INST_BR_EBREAK:dpi_trace("EBREAK");
`INST_BR_MRET: dpi_trace("MRET");
`INST_BR_SRET: dpi_trace("SRET");
`INST_BR_DRET: dpi_trace("DRET");
default: dpi_trace("?");
endcase
end else if (`ALU_IS_MUL(op_mod)) begin
case (`MUL_BITS'(op_type))
`MUL_MUL: dpi_trace("MUL");
`MUL_MULH: dpi_trace("MULH");
`MUL_MULHSU:dpi_trace("MULHSU");
`MUL_MULHU: dpi_trace("MULHU");
`MUL_DIV: dpi_trace("DIV");
`MUL_DIVU: dpi_trace("DIVU");
`MUL_REM: dpi_trace("REM");
`MUL_REMU: dpi_trace("REMU");
end else if (`INST_ALU_IS_MUL(op_mod)) begin
case (`INST_MUL_BITS'(op_type))
`INST_MUL_MUL: dpi_trace("MUL");
`INST_MUL_MULH: dpi_trace("MULH");
`INST_MUL_MULHSU:dpi_trace("MULHSU");
`INST_MUL_MULHU: dpi_trace("MULHU");
`INST_MUL_DIV: dpi_trace("DIV");
`INST_MUL_DIVU: dpi_trace("DIVU");
`INST_MUL_REM: dpi_trace("REM");
`INST_MUL_REMU: dpi_trace("REMU");
default: dpi_trace("?");
endcase
end else begin
case (`ALU_BITS'(op_type))
`ALU_ADD: dpi_trace("ADD");
`ALU_SUB: dpi_trace("SUB");
`ALU_SLL: dpi_trace("SLL");
`ALU_SRL: dpi_trace("SRL");
`ALU_SRA: dpi_trace("SRA");
`ALU_SLT: dpi_trace("SLT");
`ALU_SLTU: dpi_trace("SLTU");
`ALU_XOR: dpi_trace("XOR");
`ALU_OR: dpi_trace("OR");
`ALU_AND: dpi_trace("AND");
`ALU_LUI: dpi_trace("LUI");
`ALU_AUIPC: dpi_trace("AUIPC");
case (`INST_ALU_BITS'(op_type))
`INST_ALU_ADD: dpi_trace("ADD");
`INST_ALU_SUB: dpi_trace("SUB");
`INST_ALU_SLL: dpi_trace("SLL");
`INST_ALU_SRL: dpi_trace("SRL");
`INST_ALU_SRA: dpi_trace("SRA");
`INST_ALU_SLT: dpi_trace("SLT");
`INST_ALU_SLTU: dpi_trace("SLTU");
`INST_ALU_XOR: dpi_trace("XOR");
`INST_ALU_OR: dpi_trace("OR");
`INST_ALU_AND: dpi_trace("AND");
`INST_ALU_LUI: dpi_trace("LUI");
`INST_ALU_AUIPC: dpi_trace("AUIPC");
default: dpi_trace("?");
endcase
end
end
`EX_LSU: begin
case (`LSU_BITS'(op_type))
`LSU_LB: dpi_trace("LB");
`LSU_LH: dpi_trace("LH");
`LSU_LW: dpi_trace("LW");
`LSU_LBU:dpi_trace("LBU");
`LSU_LHU:dpi_trace("LHU");
`LSU_SB: dpi_trace("SB");
`LSU_SH: dpi_trace("SH");
`LSU_SW: dpi_trace("SW");
default: dpi_trace("?");
endcase
if (op_mod == 0) begin
case (`INST_LSU_BITS'(op_type))
`INST_LSU_LB: dpi_trace("LB");
`INST_LSU_LH: dpi_trace("LH");
`INST_LSU_LW: dpi_trace("LW");
`INST_LSU_LBU:dpi_trace("LBU");
`INST_LSU_LHU:dpi_trace("LHU");
`INST_LSU_SB: dpi_trace("SB");
`INST_LSU_SH: dpi_trace("SH");
`INST_LSU_SW: dpi_trace("SW");
default: dpi_trace("?");
endcase
end else if (op_mod == 1) begin
case (`INST_FENCE_BITS'(op_type))
`INST_FENCE_D: dpi_trace("DFENCE");
`INST_FENCE_I: dpi_trace("IFENCE");
default: dpi_trace("?");
endcase
end
end
`EX_CSR: begin
case (`CSR_BITS'(op_type))
`CSR_RW: dpi_trace("CSRW");
`CSR_RS: dpi_trace("CSRS");
`CSR_RC: dpi_trace("CSRC");
case (`INST_CSR_BITS'(op_type))
`INST_CSR_RW: dpi_trace("CSRW");
`INST_CSR_RS: dpi_trace("CSRS");
`INST_CSR_RC: dpi_trace("CSRC");
default: dpi_trace("?");
endcase
end
`EX_FPU: begin
case (`FPU_BITS'(op_type))
`FPU_ADD: dpi_trace("ADD");
`FPU_SUB: dpi_trace("SUB");
`FPU_MUL: dpi_trace("MUL");
`FPU_DIV: dpi_trace("DIV");
`FPU_SQRT: dpi_trace("SQRT");
`FPU_MADD: dpi_trace("MADD");
`FPU_NMSUB: dpi_trace("NMSUB");
`FPU_NMADD: dpi_trace("NMADD");
`FPU_CVTWS: dpi_trace("CVTWS");
`FPU_CVTWUS:dpi_trace("CVTWUS");
`FPU_CVTSW: dpi_trace("CVTSW");
`FPU_CVTSWU:dpi_trace("CVTSWU");
`FPU_CLASS: dpi_trace("CLASS");
`FPU_CMP: dpi_trace("CMP");
`FPU_MISC: begin
case (`INST_FPU_BITS'(op_type))
`INST_FPU_ADD: dpi_trace("ADD");
`INST_FPU_SUB: dpi_trace("SUB");
`INST_FPU_MUL: dpi_trace("MUL");
`INST_FPU_DIV: dpi_trace("DIV");
`INST_FPU_SQRT: dpi_trace("SQRT");
`INST_FPU_MADD: dpi_trace("MADD");
`INST_FPU_NMSUB: dpi_trace("NMSUB");
`INST_FPU_NMADD: dpi_trace("NMADD");
`INST_FPU_CVTWS: dpi_trace("CVTWS");
`INST_FPU_CVTWUS:dpi_trace("CVTWUS");
`INST_FPU_CVTSW: dpi_trace("CVTSW");
`INST_FPU_CVTSWU:dpi_trace("CVTSWU");
`INST_FPU_CLASS: dpi_trace("CLASS");
`INST_FPU_CMP: dpi_trace("CMP");
`INST_FPU_MISC: begin
case (op_mod)
0: dpi_trace("SGNJ");
1: dpi_trace("SGNJN");
@ -122,13 +130,13 @@ task print_ex_op (
endcase
end
`EX_GPU: begin
case (`GPU_BITS'(op_type))
`GPU_TMC: dpi_trace("TMC");
`GPU_WSPAWN:dpi_trace("WSPAWN");
`GPU_SPLIT: dpi_trace("SPLIT");
`GPU_JOIN: dpi_trace("JOIN");
`GPU_BAR: dpi_trace("BAR");
`GPU_BAR: dpi_trace("PRED");
case (`INST_GPU_BITS'(op_type))
`INST_GPU_TMC: dpi_trace("TMC");
`INST_GPU_WSPAWN:dpi_trace("WSPAWN");
`INST_GPU_SPLIT: dpi_trace("SPLIT");
`INST_GPU_JOIN: dpi_trace("JOIN");
`INST_GPU_BAR: dpi_trace("BAR");
`INST_GPU_PRED: dpi_trace("PRED");
default: dpi_trace("?");
endcase
end

View file

@ -21,7 +21,7 @@ typedef struct packed {
logic NX; // 0-Inexact
} fflags_t;
`define FFG_BITS $bits(fflags_t)
`define FFLAGS_BITS $bits(fflags_t)
typedef struct packed {
logic valid;

View file

@ -30,7 +30,7 @@ module VX_warp_sched #(
reg [`NUM_WARPS-1:0] stalled_warps; // asserted when a branch/gpgpu instructions are issued
reg [`NUM_WARPS-1:0][`NUM_THREADS-1:0] thread_masks;
reg [`NUM_WARPS-1:0][31:0] warp_pcs, warp_next_pcs;
reg [`NUM_WARPS-1:0][31:0] warp_pcs;
// barriers
reg [`NUM_BARRIERS-1:0][`NUM_WARPS-1:0] barrier_masks; // warps waiting on barrier
@ -121,12 +121,11 @@ module VX_warp_sched #(
end
if (ifetch_req_fire) begin
warp_next_pcs[ifetch_req_if.wid] <= ifetch_req_if.PC + 4;
warp_pcs[ifetch_req_if.wid] <= ifetch_req_if.PC + 4;
end
if (wstall_if.valid) begin
stalled_warps[wstall_if.wid] <= wstall_if.stalled;
warp_pcs[wstall_if.wid] <= warp_next_pcs[wstall_if.wid];
end
// join handling
@ -200,13 +199,12 @@ module VX_warp_sched #(
wire [`NUM_WARPS-1:0] ready_warps = active_warps & ~(stalled_warps | barrier_stalls);
VX_priority_encoder #(
.N (`NUM_WARPS)
) rr_arbiter (
.data_in (ready_warps),
.index (schedule_wid),
.valid_out (schedule_valid),
`UNUSED_PIN (onehot)
VX_lzc #(
.WIDTH (`NUM_WARPS)
) wid_select (
.in_i (ready_warps),
.cnt_o (schedule_wid),
.valid_o (schedule_valid)
);
wire [`NUM_WARPS-1:0][(`NUM_THREADS + 32)-1:0] schedule_data;

View file

@ -85,6 +85,7 @@ module Vortex (
.CACHE_SIZE (`L3CACHE_SIZE),
.CACHE_LINE_SIZE (`L3CACHE_LINE_SIZE),
.NUM_BANKS (`L3NUM_BANKS),
.NUM_PORTS (`L3NUM_PORTS),
.WORD_SIZE (`L3WORD_SIZE),
.NUM_REQS (`L3NUM_REQS),
.CREQ_SIZE (`L3CREQ_SIZE),
@ -141,6 +142,8 @@ module Vortex (
end else begin
`RESET_RELAY (mem_arb_reset);
VX_mem_arb #(
.NUM_REQS (`NUM_CLUSTERS),
.DATA_WIDTH (`L3MEM_DATA_WIDTH),
@ -150,7 +153,7 @@ module Vortex (
.BUFFERED_RSP (1)
) mem_arb (
.clk (clk),
.reset (reset),
.reset (mem_arb_reset),
// Core request
.req_valid_in (per_cluster_mem_req_valid),

View file

@ -512,6 +512,8 @@ t_local_mem_data mem_rsp_data;
wire [AVS_REQ_TAGW:0] mem_rsp_tag;
wire mem_rsp_ready;
`RESET_RELAY (mem_arb_reset);
VX_mem_arb #(
.NUM_REQS (2),
.DATA_WIDTH (LMEM_DATA_WIDTH),
@ -522,7 +524,7 @@ VX_mem_arb #(
.TYPE ("X")
) mem_arb (
.clk (clk),
.reset (reset),
.reset (mem_arb_reset),
// Source request
.req_valid_in ({vx_mem_req_arb_valid, cci_mem_req_arb_valid}),
@ -557,6 +559,8 @@ VX_mem_arb #(
//--
`RESET_RELAY (avs_wrapper_reset);
VX_avs_wrapper #(
.AVS_DATA_WIDTH (LMEM_DATA_WIDTH),
.AVS_ADDR_WIDTH (LMEM_ADDR_WIDTH),
@ -566,7 +570,7 @@ VX_avs_wrapper #(
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE)
) avs_wrapper (
.clk (clk),
.reset (reset),
.reset (avs_wrapper_reset),
// Memory request
.mem_req_valid (mem_req_valid),
@ -724,13 +728,15 @@ always @(posedge clk) begin
end
end
`RESET_RELAY (cci_rdq_reset);
VX_fifo_queue #(
.DATAW (CCI_RD_QUEUE_DATAW),
.SIZE (CCI_RD_QUEUE_SIZE),
.OUTPUT_REG (1)
) cci_rd_req_queue (
.clk (clk),
.reset (reset),
.reset (cci_rdq_reset),
.push (cci_rdq_push),
.pop (cci_rdq_pop),
.data_in (cci_rdq_din),
@ -878,7 +884,7 @@ Vortex #() vortex (
`SCOPE_BIND_afu_vortex
.clk (clk),
.reset (reset | vx_reset),
.reset (reset || vx_reset),
// Memory request
.mem_req_valid (vx_mem_req_valid),
@ -997,6 +1003,8 @@ VX_fifo_queue #(
wire scope_changed = `SCOPE_TRIGGER;
`RESET_RELAY (scope_reset);
VX_scope #(
.DATAW ($bits({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST})),
.BUSW (64),
@ -1004,7 +1012,7 @@ VX_scope #(
.UPDW ($bits({`SCOPE_UPDATE_LIST}))
) scope (
.clk (clk),
.reset (reset),
.reset (scope_reset),
.start (1'b0),
.stop (1'b0),
.changed (scope_changed),

211
hw/rtl/cache/VX_bank.v vendored
View file

@ -39,7 +39,8 @@ module VX_bank #(
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0,
localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE)
localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE),
localparam WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
) (
`SCOPE_IO_VX_bank
@ -56,13 +57,13 @@ module VX_bank #(
// Core Request
input wire core_req_valid,
input wire [NUM_PORTS-1:0] core_req_pmask,
input wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] core_req_wsel,
input wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] core_req_wsel,
input wire [NUM_PORTS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_req_tid,
input wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
input wire core_req_rw,
input wire [`LINE_ADDR_WIDTH-1:0] core_req_addr,
input wire [CORE_TAG_WIDTH-1:0] core_req_tag,
output wire core_req_ready,
// Core Response
@ -70,16 +71,18 @@ module VX_bank #(
output wire [NUM_PORTS-1:0] core_rsp_pmask,
output wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_rsp_tid,
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag,
output wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [CACHE_LINE_SIZE-1:0] mem_req_byteen,
output wire [NUM_PORTS-1:0] mem_req_pmask,
output wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen,
output wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel,
output wire [`LINE_ADDR_WIDTH-1:0] mem_req_addr,
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id,
output wire [`CACHE_LINE_WIDTH-1:0] mem_req_data,
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data,
input wire mem_req_ready,
// Memory response
@ -104,18 +107,18 @@ module VX_bank #(
`endif
wire [NUM_PORTS-1:0] creq_pmask;
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] creq_wsel;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] creq_wsel;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] creq_byteen;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] creq_tid;
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] creq_tag;
wire creq_rw;
wire [`LINE_ADDR_WIDTH-1:0] creq_addr;
wire [CORE_TAG_WIDTH-1:0] creq_tag;
wire creq_valid, creq_ready;
VX_elastic_buffer #(
.DATAW (CORE_TAG_WIDTH + 1 + `LINE_ADDR_WIDTH + (1 + `UP(`WORD_SELECT_BITS) + WORD_SIZE + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
.DATAW (1 + `LINE_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SELECT_BITS + WORD_SIZE + `WORD_WIDTH + `REQS_BITS + CORE_TAG_WIDTH)),
.SIZE (CREQ_SIZE),
.OUTPUT_REG (CREQ_SIZE > 2)
) core_req_queue (
@ -123,8 +126,8 @@ module VX_bank #(
.reset (reset),
.ready_in (core_req_ready),
.valid_in (core_req_valid),
.data_in ({core_req_tag, core_req_rw, core_req_addr, core_req_pmask, core_req_wsel, core_req_byteen, core_req_data, core_req_tid}),
.data_out ({creq_tag, creq_rw, creq_addr, creq_pmask, creq_wsel, creq_byteen, creq_data, creq_tid}),
.data_in ({core_req_rw, core_req_addr, core_req_pmask, core_req_wsel, core_req_byteen, core_req_data, core_req_tid, core_req_tag}),
.data_out ({creq_rw, creq_addr, creq_pmask, creq_wsel, creq_byteen, creq_data, creq_tid, creq_tag}),
.ready_out (creq_ready),
.valid_out (creq_valid)
);
@ -134,33 +137,34 @@ module VX_bank #(
wire mshr_valid;
wire [MSHR_ADDR_WIDTH-1:0] mshr_dequeue_id;
wire [`LINE_ADDR_WIDTH-1:0] mshr_addr;
wire [CORE_TAG_WIDTH-1:0] mshr_tag;
wire [NUM_PORTS-1:0] mshr_pmask;
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] mshr_wsel;
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] mshr_tag;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mshr_wsel;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] mshr_tid;
wire [NUM_PORTS-1:0] mshr_pmask;
wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1;
wire mem_rw_st0, mem_rw_st1;
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] wsel_st0, wsel_st1;
wire write_st0, write_st1;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] wsel_st0, wsel_st1;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen_st0, byteen_st1;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] req_tid_st0, req_tid_st1;
wire [NUM_PORTS-1:0] pmask_st0, pmask_st1;
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] tag_st0, tag_st1;
wire [`CACHE_LINE_WIDTH-1:0] rdata_st1;
wire [`CACHE_LINE_WIDTH-1:0] wdata_st0, wdata_st1;
wire [`CACHE_LINE_WIDTH-1:0] wdata_st0, wdata_st1;
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1;
wire [CORE_TAG_WIDTH-1:0] tag_st0, tag_st1;
wire valid_st0, valid_st1;
wire is_fill_st0, is_fill_st1;
wire is_mshr_st0, is_mshr_st1;
wire miss_st0, miss_st1;
wire writeen_unqual_st1;
wire is_flush_st0;
wire mshr_pending_st0, mshr_pending_st1;
wire crsq_valid, crsq_ready, crsq_stall;
wire mreq_alm_full;
wire creq_fire = creq_valid && creq_ready;
// prevent read-during-write hazard when accessing tags/data block RAMs
wire rdw_fill_hazard = valid_st0 && is_fill_st0;
wire rdw_write_hazard = valid_st0 && write_st0 && ~creq_rw;
// determine which queue to pop next in priority order
wire mshr_grant = 1;
@ -172,51 +176,38 @@ module VX_bank #(
wire creq_grant = !mshr_enable && !mrsq_enable && !flush_enable;
wire mshr_ready = mshr_grant
&& !crsq_stall; // ensure core response ready
&& !rdw_fill_hazard // prevent read-during-write hazard
&& !crsq_stall; // ensure core response ready
assign mem_rsp_ready = mrsq_grant
&& !crsq_stall; // ensure core response ready
&& !crsq_stall; // ensure core response ready
assign creq_ready = creq_grant
&& !mreq_alm_full // ensure memory request ready
&& !mshr_alm_full // ensure mshr enqueue ready
&& !crsq_stall; // ensure core response ready
wire mshr_fire = mshr_valid && mshr_ready;
assign creq_ready = creq_grant
&& !rdw_write_hazard // prevent read-during-write hazard
&& !mreq_alm_full // ensure memory request ready
&& !mshr_alm_full // ensure mshr enqueue ready
&& !crsq_stall; // ensure core response ready
wire mshr_fire = mshr_valid && mshr_ready;
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
wire creq_fire = creq_valid && creq_ready;
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_wid_sel, debug_pc_sel} = mshr_enable ? mshr_tag[`CACHE_REQ_INFO_RNG] : creq_tag[`CACHE_REQ_INFO_RNG];
assign {debug_wid_sel, debug_pc_sel} = mshr_enable ? mshr_tag[0][`CACHE_REQ_INFO_RNG] : creq_tag[0][`CACHE_REQ_INFO_RNG];
end else begin
assign {debug_wid_sel, debug_pc_sel} = 0;
end
`endif
wire [`CACHE_LINE_WIDTH-1:0] creq_line_data;
if (`WORDS_PER_LINE > 1) begin
if (NUM_PORTS > 1) begin
reg [`CACHE_LINE_WIDTH-1:0] creq_line_data_r;
always @(*) begin
creq_line_data_r = 'x;
for (integer p = 0; p < NUM_PORTS; p++) begin
if (creq_pmask[p]) begin
creq_line_data_r[creq_wsel[p] * `WORD_WIDTH +: `WORD_WIDTH] = creq_data[p];
end
end
end
assign creq_line_data = creq_line_data_r;
end else begin
assign creq_line_data = {`WORDS_PER_LINE{creq_data}};
end
end else begin
assign creq_line_data = creq_data;
wire [`CACHE_LINE_WIDTH-1:0] wdata_sel;
assign wdata_sel[(NUM_PORTS * `WORD_WIDTH)-1:0] = (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data[(NUM_PORTS * `WORD_WIDTH)-1:0] : creq_data;
for (genvar i = NUM_PORTS * `WORD_WIDTH; i < `CACHE_LINE_WIDTH; ++i) begin
assign wdata_sel[i] = mem_rsp_data[i];
end
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH + MSHR_ADDR_WIDTH),
.DATAW (1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
@ -227,9 +218,9 @@ module VX_bank #(
flush_enable,
mrsq_enable || flush_enable,
mshr_enable,
mshr_enable ? 1'b0 : creq_rw,
creq_fire && creq_rw,
mshr_enable ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : (flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : creq_addr)),
(mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data : creq_line_data,
wdata_sel,
mshr_enable ? mshr_wsel : creq_wsel,
creq_byteen,
mshr_enable ? mshr_tid : creq_tid,
@ -237,12 +228,12 @@ module VX_bank #(
mshr_enable ? mshr_tag : creq_tag,
mshr_enable ? mshr_dequeue_id : (mem_rsp_valid ? mem_rsp_id : mshr_alloc_id)
}),
.data_out ({valid_st0, is_flush_st0, is_fill_st0, is_mshr_st0, mem_rw_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_st0})
.data_out ({valid_st0, is_flush_st0, is_fill_st0, is_mshr_st0, write_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_st0})
);
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_wid_st0, debug_pc_st0} = tag_st0[`CACHE_REQ_INFO_RNG];
assign {debug_wid_st0, debug_pc_st0} = tag_st0[0][`CACHE_REQ_INFO_RNG];
end else begin
assign {debug_wid_st0, debug_pc_st0} = 0;
end
@ -282,54 +273,70 @@ module VX_bank #(
// we have a core request hit
assign miss_st0 = !is_fill_st0 && !tag_match_st0;
wire read_st0 = !is_fill_st0 && !write_st0;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH + MSHR_ADDR_WIDTH + 1),
.DATAW (1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH + 1),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (!crsq_stall),
.data_in ({valid_st0, is_fill_st0, is_mshr_st0, is_fill_st0, miss_st0, mem_rw_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_fill_st1, is_mshr_st1, writeen_unqual_st1, miss_st1, mem_rw_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1, mshr_id_st1, mshr_pending_st1})
.data_in ({valid_st0, is_fill_st0, is_mshr_st0, miss_st0, write_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_fill_st1, is_mshr_st1, miss_st1, write_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1, mshr_id_st1, mshr_pending_st1})
);
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_wid_st1, debug_pc_st1} = tag_st1[`CACHE_REQ_INFO_RNG];
assign {debug_wid_st1, debug_pc_st1} = tag_st1[0][`CACHE_REQ_INFO_RNG];
end else begin
assign {debug_wid_st1, debug_pc_st1} = 0;
end
`endif
wire writeen_st1 = (WRITE_ENABLE && !is_fill_st1 && mem_rw_st1 && !miss_st1)
|| writeen_unqual_st1;
wire read_st1 = !is_fill_st1 && !write_st1;
wire readen_st1 = !is_fill_st1 && !mem_rw_st1;
wire writeen_st1 = (WRITE_ENABLE && write_st1 && !miss_st1)
|| is_fill_st1;
wire crsq_push_st1 = readen_st1 && !miss_st1;
wire do_writeback_st1 = !is_fill_st1 && mem_rw_st1;
wire crsq_push_st1 = read_st1 && !miss_st1;
wire mreq_push_st1 = (readen_st1 && miss_st1 && !mshr_pending_st1)
|| do_writeback_st1;
wire mreq_push_st1 = (read_st1 && miss_st1 && !mshr_pending_st1)
|| write_st1;
wire [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] line_byteen_st1;
wire [`CACHE_LINE_WIDTH-1:0] line_wdata_st1;
wire [CACHE_LINE_SIZE-1:0] line_byteen_st1;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data_st1 = wdata_st1[0 +: NUM_PORTS * `WORD_WIDTH];
if (`WORDS_PER_LINE > 1) begin
reg [`CACHE_LINE_WIDTH-1:0] line_wdata_r;
reg [CACHE_LINE_SIZE-1:0] line_byteen_r;
always @(*) begin
line_byteen_r = 0;
for (integer p = 0; p < NUM_PORTS; p++) begin
if ((NUM_PORTS == 1) || pmask_st1[p]) begin
line_byteen_r[wsel_st1[p] * WORD_SIZE +: WORD_SIZE] = byteen_st1[p];
if (NUM_PORTS > 1) begin
always @(*) begin
line_wdata_r = 'x;
line_byteen_r = 0;
for (integer i = 0; i < NUM_PORTS; ++i) begin
if (pmask_st1[i]) begin
line_wdata_r[wsel_st1[i] * `WORD_WIDTH +: `WORD_WIDTH] = creq_data_st1[i];
line_byteen_r[wsel_st1[i] * WORD_SIZE +: WORD_SIZE] = byteen_st1[i];
end
end
end
end else begin
always @(*) begin
line_wdata_r = {`WORDS_PER_LINE{creq_data_st1}};
line_byteen_r = 0;
line_byteen_r[wsel_st1 * WORD_SIZE +: WORD_SIZE] = byteen_st1;
end
end
assign line_wdata_st1 = line_wdata_r;
assign line_byteen_st1 = line_byteen_r;
end else begin
assign line_byteen_st1 = byteen_st1;
`UNUSED_VAR (wsel_st1)
end
assign line_wdata_st1 = creq_data_st1;
assign line_byteen_st1 = byteen_st1;
end
VX_data_access #(
.BANK_ID (BANK_ID),
@ -353,20 +360,21 @@ module VX_bank #(
.addr (addr_st1),
// reading
.readen (valid_st1 && readen_st1),
.rdata (rdata_st1),
.readen (valid_st1 && read_st1),
.read_data (rdata_st1),
// writing
.writeen (valid_st1 && writeen_st1),
.is_fill (is_fill_st1),
.byteen (line_byteen_st1),
.wdata (wdata_st1)
.byteen (line_byteen_st1),
.write_data (line_wdata_st1),
.fill_data (wdata_st1)
);
wire mshr_allocate = creq_fire && ~creq_rw;
wire mshr_replay = do_fill_st0 && ~crsq_stall;
wire mshr_lookup = valid_st0 && !is_fill_st0 && ~is_mshr_st0 && ~mem_rw_st0 && ~crsq_stall;
wire mshr_release = valid_st1 && readen_st1 && ~is_mshr_st1 && ~miss_st1 && ~crsq_stall;
wire mshr_lookup = valid_st0 && read_st0 && !is_mshr_st0 && !crsq_stall;
wire mshr_release = valid_st1 && read_st1 && !is_mshr_st1 && !miss_st1 && !crsq_stall;
wire mshr_not_full;
@ -430,7 +438,7 @@ module VX_bank #(
wire [NUM_PORTS-1:0] crsq_pmask;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] crsq_data;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] crsq_tid;
wire [CORE_TAG_WIDTH-1:0] crsq_tag;
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] crsq_tag;
assign crsq_valid = valid_st1 && crsq_push_st1;
assign crsq_stall = crsq_valid && !crsq_ready;
@ -440,15 +448,15 @@ module VX_bank #(
assign crsq_tag = tag_st1;
if (`WORDS_PER_LINE > 1) begin
for (genvar p = 0; p < NUM_PORTS; ++p) begin
assign crsq_data[p] = rdata_st1[wsel_st1[p] * `WORD_WIDTH +: `WORD_WIDTH];
for (genvar i = 0; i < NUM_PORTS; ++i) begin
assign crsq_data[i] = rdata_st1[wsel_st1[i] * `WORD_WIDTH +: `WORD_WIDTH];
end
end else begin
assign crsq_data = rdata_st1;
end
VX_elastic_buffer #(
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
.DATAW (NUM_PORTS * (CORE_TAG_WIDTH + 1 + `WORD_WIDTH + `REQS_BITS)),
.SIZE (CRSQ_SIZE),
.OUTPUT_REG (1 == NUM_BANKS)
) core_rsp_req (
@ -464,24 +472,29 @@ module VX_bank #(
// Enqueue memory request
wire [CACHE_LINE_SIZE-1:0] mreq_byteen;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mreq_data;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mreq_byteen;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mreq_wsel;
wire [NUM_PORTS-1:0] mreq_pmask;
wire [`LINE_ADDR_WIDTH-1:0] mreq_addr;
wire [MSHR_ADDR_WIDTH-1:0] mreq_id;
wire [`CACHE_LINE_WIDTH-1:0] mreq_data;
wire mreq_push, mreq_pop, mreq_empty, mreq_rw;
assign mreq_push = valid_st1 && mreq_push_st1;
assign mreq_pop = mem_req_valid && mem_req_ready;
assign mreq_rw = WRITE_ENABLE && do_writeback_st1;
assign mreq_byteen = mreq_rw ? line_byteen_st1 : {CACHE_LINE_SIZE{1'b1}};
assign mreq_addr = addr_st1;
assign mreq_id = mshr_id_st1;
assign mreq_data = wdata_st1;
assign mreq_rw = WRITE_ENABLE && write_st1;
assign mreq_addr = addr_st1;
assign mreq_id = mshr_id_st1;
assign mreq_pmask= pmask_st1;
assign mreq_wsel = wsel_st1;
assign mreq_byteen = byteen_st1;
assign mreq_data = creq_data_st1;
VX_fifo_queue #(
.DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + `CACHE_LINE_WIDTH),
.DATAW (1 + `LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)),
.SIZE (MREQ_SIZE),
.ALM_FULL (MREQ_SIZE-2)
) mem_req_queue (
@ -489,8 +502,8 @@ module VX_bank #(
.reset (reset),
.push (mreq_push),
.pop (mreq_pop),
.data_in ({mreq_rw, mreq_byteen, mreq_addr, mreq_id, mreq_data}),
.data_out ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_id, mem_req_data}),
.data_in ({mreq_rw, mreq_addr, mreq_id, mreq_pmask, mreq_byteen, mreq_wsel, mreq_data}),
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_pmask, mem_req_byteen, mem_req_wsel, mem_req_data}),
.empty (mreq_empty),
.alm_full (mreq_alm_full),
`UNUSED_PIN (full),
@ -512,8 +525,8 @@ module VX_bank #(
`SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
`ifdef PERF_ENABLE
assign perf_read_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && !mem_rw_st1;
assign perf_write_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && mem_rw_st1;
assign perf_read_misses = valid_st1 && read_st1 && !is_mshr_st1 && miss_st1;
assign perf_write_misses = valid_st1 && write_st1 && !is_mshr_st1 && miss_st1;
assign perf_pipe_stalls = crsq_stall || mreq_alm_full || mshr_alm_full;
assign perf_mshr_stalls = mshr_alm_full;
`endif
@ -547,7 +560,7 @@ module VX_bank #(
dpi_trace("%d: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%b, tid=%0d, data=%0h, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, debug_wid_st1, debug_pc_st1);
end
if (mreq_push) begin
if (do_writeback_st1)
if (write_st1)
dpi_trace("%d: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_data, mreq_byteen, debug_wid_st1, debug_pc_st1);
else
dpi_trace("%d: cache%0d:%0d fill-req: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_id, debug_wid_st1, debug_pc_st1);

View file

@ -44,7 +44,9 @@ module VX_cache #(
parameter BANK_ADDR_OFFSET = 0,
// enable bypass for non-cacheable addresses
parameter NC_ENABLE = 0
parameter NC_ENABLE = 0,
localparam WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
) (
`SCOPE_IO_VX_cache
@ -103,6 +105,54 @@ module VX_cache #(
wire [NUM_BANKS-1:0] perf_pipe_stall_per_bank;
`endif
///////////////////////////////////////////////////////////////////////////
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_p;
wire [NUM_PORTS-1:0] mem_req_pmask_p;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel_p;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data_p;
wire mem_req_rw_p;
if (WRITE_ENABLE) begin
if (`WORDS_PER_LINE > 1) begin
reg [CACHE_LINE_SIZE-1:0] mem_req_byteen_r;
reg [`CACHE_LINE_WIDTH-1:0] mem_req_data_r;
always @(*) begin
mem_req_byteen_r = 0;
mem_req_data_r = 'x;
for (integer i = 0; i < NUM_PORTS; ++i) begin
if ((1 == NUM_PORTS) || mem_req_pmask_p[i]) begin
mem_req_byteen_r[mem_req_wsel_p[i] * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p[i];
mem_req_data_r[mem_req_wsel_p[i] * `WORD_WIDTH +: `WORD_WIDTH] = mem_req_data_p[i];
end
end
end
assign mem_req_rw = mem_req_rw_p;
assign mem_req_byteen = mem_req_byteen_r;
assign mem_req_data = mem_req_data_r;
end else begin
`UNUSED_VAR (mem_req_pmask_p)
`UNUSED_VAR (mem_req_wsel_p)
assign mem_req_rw = mem_req_rw_p;
assign mem_req_byteen = mem_req_byteen_p;
assign mem_req_data = mem_req_data_p;
end
end else begin
`UNUSED_VAR (mem_req_byteen_p)
`UNUSED_VAR (mem_req_pmask_p)
`UNUSED_VAR (mem_req_wsel_p)
`UNUSED_VAR (mem_req_data_p)
`UNUSED_VAR (mem_req_rw_p)
assign mem_req_rw = 0;
assign mem_req_byteen = 'x;
assign mem_req_data = 'x;
end
///////////////////////////////////////////////////////////////////////////
// Core request
@ -124,9 +174,11 @@ module VX_cache #(
// Memory request
wire mem_req_valid_nc;
wire mem_req_rw_nc;
wire [CACHE_LINE_SIZE-1:0] mem_req_byteen_nc;
wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_nc;
wire [`CACHE_LINE_WIDTH-1:0] mem_req_data_nc;
wire [NUM_PORTS-1:0] mem_req_pmask_nc;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_nc;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel_nc;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data_nc;
wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_nc;
wire mem_req_ready_nc;
@ -134,10 +186,11 @@ module VX_cache #(
wire mem_rsp_valid_nc;
wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_nc;
wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_nc;
wire mem_rsp_ready_nc;
wire mem_rsp_ready_nc;
if (NC_ENABLE) begin
VX_nc_bypass #(
.NUM_PORTS (NUM_PORTS),
.NUM_REQS (NUM_REQS),
.NUM_RSP_TAGS (`CORE_RSP_TAGS),
.NC_TAG_BIT (0),
@ -147,12 +200,12 @@ module VX_cache #(
.CORE_TAG_IN_WIDTH (CORE_TAG_WIDTH),
.MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH),
.MEM_DATA_SIZE (CACHE_LINE_SIZE),
.MEM_DATA_SIZE (CACHE_LINE_SIZE),
.MEM_TAG_IN_WIDTH (MEM_TAG_IN_WIDTH),
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH)
) nc_bypass (
.clk (clk),
.reset (reset),
.clk (clk),
.reset (reset),
// Core request in
.core_req_valid_in (core_req_valid),
@ -188,19 +241,23 @@ module VX_cache #(
// Memory request in
.mem_req_valid_in (mem_req_valid_nc),
.mem_req_rw_in (mem_req_rw_nc),
.mem_req_byteen_in (mem_req_byteen_nc),
.mem_req_rw_in (mem_req_rw_nc),
.mem_req_addr_in (mem_req_addr_nc),
.mem_req_pmask_in (mem_req_pmask_nc),
.mem_req_byteen_in (mem_req_byteen_nc),
.mem_req_wsel_in (mem_req_wsel_nc),
.mem_req_data_in (mem_req_data_nc),
.mem_req_tag_in (mem_req_tag_nc),
.mem_req_ready_in (mem_req_ready_nc),
// Memory request out
.mem_req_valid_out (mem_req_valid),
.mem_req_rw_out (mem_req_rw),
.mem_req_byteen_out (mem_req_byteen),
.mem_req_addr_out (mem_req_addr),
.mem_req_data_out (mem_req_data),
.mem_req_rw_out (mem_req_rw_p),
.mem_req_pmask_out (mem_req_pmask_p),
.mem_req_byteen_out (mem_req_byteen_p),
.mem_req_wsel_out (mem_req_wsel_p),
.mem_req_data_out (mem_req_data_p),
.mem_req_tag_out (mem_req_tag),
.mem_req_ready_out (mem_req_ready),
@ -232,10 +289,12 @@ module VX_cache #(
assign core_rsp_ready_nc = core_rsp_ready;
assign mem_req_valid = mem_req_valid_nc;
assign mem_req_rw = mem_req_rw_nc;
assign mem_req_addr = mem_req_addr_nc;
assign mem_req_byteen = mem_req_byteen_nc;
assign mem_req_data = mem_req_data_nc;
assign mem_req_rw_p = mem_req_rw_nc;
assign mem_req_pmask_p = mem_req_pmask_nc;
assign mem_req_byteen_p = mem_req_byteen_nc;
assign mem_req_wsel_p = mem_req_wsel_nc;
assign mem_req_data_p = mem_req_data_nc;
assign mem_req_tag = mem_req_tag_nc;
assign mem_req_ready_nc = mem_req_ready;
@ -251,6 +310,8 @@ module VX_cache #(
wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_qual;
wire mrsq_out_valid, mrsq_out_ready;
`RESET_RELAY (mrsq_reset);
VX_elastic_buffer #(
.DATAW (MEM_TAG_IN_WIDTH + `CACHE_LINE_WIDTH),
@ -258,7 +319,7 @@ module VX_cache #(
.OUTPUT_REG (MRSQ_SIZE > 2)
) mem_rsp_queue (
.clk (clk),
.reset (reset),
.reset (mrsq_reset),
.ready_in (mem_rsp_ready_nc),
.valid_in (mem_rsp_valid_nc),
.data_in ({mem_rsp_tag_nc, mem_rsp_data_nc}),
@ -274,13 +335,15 @@ module VX_cache #(
wire [`LINE_SELECT_BITS-1:0] flush_addr;
wire flush_enable;
`RESET_RELAY (flush_reset);
VX_flush_ctrl #(
.CACHE_SIZE (CACHE_SIZE),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS)
) flush_ctrl (
.clk (clk),
.reset (reset),
.reset (flush_reset),
.addr_out (flush_addr),
.valid_out (flush_enable)
);
@ -289,28 +352,30 @@ module VX_cache #(
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_pmask;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] per_bank_core_req_wsel;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_req_tag;
wire [NUM_BANKS-1:0] per_bank_core_req_rw;
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
wire [NUM_BANKS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_req_tag;
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_pmask;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid;
wire [NUM_BANKS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_rsp_tag;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_rsp_tag;
wire [NUM_BANKS-1:0] per_bank_core_rsp_ready;
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
wire [NUM_BANKS-1:0][CACHE_LINE_SIZE-1:0] per_bank_mem_req_byteen;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_mem_req_pmask;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_mem_req_byteen;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] per_bank_mem_req_wsel;
wire [NUM_BANKS-1:0][`MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr;
wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id;
wire [NUM_BANKS-1:0][`CACHE_LINE_WIDTH-1:0] per_bank_mem_req_data;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_mem_req_data;
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
@ -361,28 +426,30 @@ module VX_cache #(
for (genvar i = 0; i < NUM_BANKS; i++) begin
wire curr_bank_core_req_valid;
wire [NUM_PORTS-1:0] curr_bank_core_req_pmask;
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] curr_bank_core_req_wsel;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] curr_bank_core_req_wsel;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_req_tid;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_req_tid;
wire [NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] curr_bank_core_req_tag;
wire curr_bank_core_req_rw;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
wire [CORE_TAG_X_WIDTH-1:0] curr_bank_core_req_tag;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
wire curr_bank_core_req_ready;
wire curr_bank_core_rsp_valid;
wire [NUM_PORTS-1:0] curr_bank_core_rsp_pmask;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_rsp_data;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_rsp_tid;
wire [CORE_TAG_X_WIDTH-1:0] curr_bank_core_rsp_tag;
wire [NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] curr_bank_core_rsp_tag;
wire curr_bank_core_rsp_ready;
wire curr_bank_mem_req_valid;
wire curr_bank_mem_req_rw;
wire [CACHE_LINE_SIZE-1:0] curr_bank_mem_req_byteen;
wire [NUM_PORTS-1:0] curr_bank_mem_req_pmask;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] curr_bank_mem_req_byteen;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] curr_bank_mem_req_wsel;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
wire [MSHR_ADDR_WIDTH-1:0] curr_bank_mem_req_id;
wire[`CACHE_LINE_WIDTH-1:0] curr_bank_mem_req_data;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_mem_req_data;
wire curr_bank_mem_req_ready;
wire curr_bank_mem_rsp_valid;
@ -414,7 +481,9 @@ module VX_cache #(
// Memory request
assign per_bank_mem_req_valid[i] = curr_bank_mem_req_valid;
assign per_bank_mem_req_rw[i] = curr_bank_mem_req_rw;
assign per_bank_mem_req_pmask[i] = curr_bank_mem_req_pmask;
assign per_bank_mem_req_byteen[i] = curr_bank_mem_req_byteen;
assign per_bank_mem_req_wsel[i] = curr_bank_mem_req_wsel;
if (NUM_BANKS == 1) begin
assign per_bank_mem_req_addr[i] = curr_bank_mem_req_addr;
end else begin
@ -435,6 +504,8 @@ module VX_cache #(
assign curr_bank_mem_rsp_id = `MEM_TAG_TO_REQ_ID(mem_rsp_tag_qual);
assign curr_bank_mem_rsp_data = mem_rsp_data_qual;
assign per_bank_mem_rsp_ready[i] = curr_bank_mem_rsp_ready;
`RESET_RELAY (bank_reset);
VX_bank #(
.BANK_ID (i),
@ -457,7 +528,7 @@ module VX_cache #(
`SCOPE_BIND_VX_cache_bank(i)
.clk (clk),
.reset (reset),
.reset (bank_reset),
`ifdef PERF_ENABLE
.perf_read_misses (perf_read_miss_per_bank[i]),
@ -489,7 +560,9 @@ module VX_cache #(
// Memory request
.mem_req_valid (curr_bank_mem_req_valid),
.mem_req_rw (curr_bank_mem_req_rw),
.mem_req_pmask (curr_bank_mem_req_pmask),
.mem_req_byteen (curr_bank_mem_req_byteen),
.mem_req_wsel (curr_bank_mem_req_wsel),
.mem_req_addr (curr_bank_mem_req_addr),
.mem_req_id (curr_bank_mem_req_id),
.mem_req_data (curr_bank_mem_req_data),
@ -532,25 +605,27 @@ module VX_cache #(
.core_rsp_ready (core_rsp_ready_nc)
);
wire [NUM_BANKS-1:0][(MEM_TAG_IN_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in;
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_id[i], per_bank_mem_req_rw[i], per_bank_mem_req_byteen[i], per_bank_mem_req_data[i]};
wire [NUM_BANKS-1:0][(MEM_TAG_IN_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH))-1:0] data_in;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_id[i], per_bank_mem_req_rw[i], per_bank_mem_req_pmask[i], per_bank_mem_req_byteen[i], per_bank_mem_req_wsel[i], per_bank_mem_req_data[i]};
end
wire [MSHR_ADDR_WIDTH-1:0] mem_req_id;
`RESET_RELAY (mreq_reset);
VX_stream_arbiter #(
.NUM_REQS (NUM_BANKS),
.DATAW (`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH),
.DATAW (`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)),
.BUFFERED (1)
) mem_req_arb (
.clk (clk),
.reset (reset),
.reset (mreq_reset),
.valid_in (per_bank_mem_req_valid),
.data_in (data_in),
.ready_in (per_bank_mem_req_ready),
.valid_out (mem_req_valid_nc),
.data_out ({mem_req_addr_nc, mem_req_id, mem_req_rw_nc, mem_req_byteen_nc, mem_req_data_nc}),
.data_out ({mem_req_addr_nc, mem_req_id, mem_req_rw_nc, mem_req_pmask_nc, mem_req_byteen_nc, mem_req_wsel_nc, mem_req_data_nc}),
.ready_out (mem_req_ready_nc)
);

View file

@ -9,8 +9,10 @@
`define REQS_BITS `LOG2UP(NUM_REQS)
// tag valid tid word_sel
`define MSHR_DATA_WIDTH (CORE_TAG_WIDTH + (1 + `REQS_BITS + `UP(`WORD_SELECT_BITS)) * NUM_PORTS)
`define PORTS_BITS `LOG2UP(NUM_PORTS)
// tag valid tid word_sel
`define MSHR_DATA_WIDTH ((CORE_TAG_WIDTH + 1 + `REQS_BITS + `UP(`WORD_SELECT_BITS)) * NUM_PORTS)
`define WORD_WIDTH (8 * WORD_SIZE)

View file

@ -43,7 +43,7 @@ module VX_core_req_bank_sel #(
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen,
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data,
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid,
output wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag,
output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag,
input wire [`BANK_READY_COUNT-1:0] per_bank_core_req_ready
);
`UNUSED_PARAM (CACHE_ID)
@ -80,9 +80,9 @@ module VX_core_req_bank_sel #(
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r;
reg [NUM_BANKS-1:0] per_bank_core_req_rw_r;
reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_r;
reg [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r;
reg [NUM_REQS-1:0] core_req_ready_r;
if (NUM_REQS > 1) begin
@ -101,7 +101,7 @@ module VX_core_req_bank_sel #(
end
end
for (genvar i = NUM_REQS-1; i >= 0; --i) begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_line_match[i] = (core_req_line_addr[i] == per_bank_line_addr_r[core_req_bid[i]]);
end
@ -129,10 +129,9 @@ module VX_core_req_bank_sel #(
per_bank_core_req_byteen_r[core_req_bid[i]][i % NUM_PORTS] = core_req_byteen[i];
per_bank_core_req_data_r[core_req_bid[i]][i % NUM_PORTS] = core_req_data[i];
per_bank_core_req_tid_r[core_req_bid[i]][i % NUM_PORTS] = `REQS_BITS'(i);
per_bank_core_req_tag_r[core_req_bid[i]][i % NUM_PORTS] = core_req_tag[i];
per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i];
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
per_bank_core_req_tag_r[core_req_bid[i]] = core_req_tag[i];
req_select_table_r[core_req_bid[i]][i % NUM_PORTS] = (1 << i);
end
end
@ -177,31 +176,25 @@ module VX_core_req_bank_sel #(
per_bank_core_req_byteen_r[core_req_bid[i]][i % NUM_PORTS] = core_req_byteen[i];
per_bank_core_req_data_r[core_req_bid[i]][i % NUM_PORTS] = core_req_data[i];
per_bank_core_req_tid_r[core_req_bid[i]][i % NUM_PORTS] = `REQS_BITS'(i);
per_bank_core_req_tag_r[core_req_bid[i]][i % NUM_PORTS] = core_req_tag[i];
per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i];
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
per_bank_core_req_tag_r[core_req_bid[i]] = core_req_tag[i];
per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i];
end
end
end
if (SHARED_BANK_READY == 0) begin
always @(*) begin
core_req_ready_r = 'x;
for (integer i = NUM_REQS-1; i >= 0; --i) begin
if (core_req_valid[i]) begin
core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]]
&& core_req_line_match[i];
end
for (integer i = 0; i < NUM_REQS; ++i) begin
core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]]
&& core_req_line_match[i];
end
end
end else begin
always @(*) begin
core_req_ready_r = 'x;
for (integer i = NUM_REQS-1; i >= 0; --i) begin
if (core_req_valid[i]) begin
core_req_ready_r[i] = per_bank_core_req_ready
&& core_req_line_match[i];
end
for (integer i = 0; i < NUM_REQS; ++i) begin
core_req_ready_r[i] = per_bank_core_req_ready
&& core_req_line_match[i];
end
end
end

View file

@ -24,7 +24,7 @@ module VX_core_rsp_merge #(
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_pmask,
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data,
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid,
input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag,
input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag,
output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready,
// Core Response
@ -40,7 +40,7 @@ module VX_core_rsp_merge #(
reg [NUM_REQS-1:0] core_rsp_valid_unqual;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
reg [NUM_BANKS-1:0] core_rsp_bank_select;
reg [NUM_BANKS-1:0] per_bank_core_rsp_ready_r;
if (CORE_TAG_ID_BITS != 0) begin
@ -51,61 +51,101 @@ module VX_core_rsp_merge #(
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
wire core_rsp_ready_unqual;
always @(*) begin
core_rsp_tag_unqual = 'x;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
if (per_bank_core_rsp_valid[i]) begin
core_rsp_tag_unqual = per_bank_core_rsp_tag[i];
end
end
end
if (NUM_PORTS > 1) begin
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 'x;
core_rsp_bank_select = 0;
for (integer i = 0; i < NUM_BANKS; i++) begin
for (integer p = 0; p < NUM_PORTS; p++) begin
if (per_bank_core_rsp_valid[i]
&& per_bank_core_rsp_pmask[i][p]
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i][p]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_data[i][p];
core_rsp_bank_select[i] = core_rsp_ready_unqual;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_r, per_bank_core_rsp_sent;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_n;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign per_bank_core_rsp_sent_n[i] = per_bank_core_rsp_sent_r[i] | per_bank_core_rsp_sent[i];
end
always @(posedge clk) begin
if (reset) begin
per_bank_core_rsp_sent_r <= '0;
end else begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]) begin
per_bank_core_rsp_sent_r[i] <= '0;
end else begin
per_bank_core_rsp_sent_r[i] <= per_bank_core_rsp_sent_n[i];
end
end
end
end
always @(*) begin
core_rsp_tag_unqual = 'x;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
for (integer p = 0; p < NUM_PORTS; ++p) begin
if (per_bank_core_rsp_valid[i]
&& per_bank_core_rsp_pmask[i][p]
&& !per_bank_core_rsp_sent_r[i][p]) begin
core_rsp_tag_unqual = per_bank_core_rsp_tag[i][p];
end
end
end
end
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 'x;
per_bank_core_rsp_sent = 0;
for (integer i = 0; i < NUM_BANKS; ++i) begin
for (integer p = 0; p < NUM_PORTS; ++p) begin
if (per_bank_core_rsp_valid[i]
&& per_bank_core_rsp_pmask[i][p]
&& !per_bank_core_rsp_sent_r[i][p]
&& (per_bank_core_rsp_tag[i][p][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i][p]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_data[i][p];
per_bank_core_rsp_sent[i][p] = core_rsp_ready_unqual;
end
end
end
end
always @(*) begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
per_bank_core_rsp_ready_r[i] = (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]);
end
end
end else begin
`UNUSED_VAR (per_bank_core_rsp_pmask)
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 'x;
core_rsp_bank_select = 0;
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i]
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_bank_select[i] = core_rsp_ready_unqual;
always @(*) begin
core_rsp_tag_unqual = 'x;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
if (per_bank_core_rsp_valid[i]) begin
core_rsp_tag_unqual = per_bank_core_rsp_tag[i];
end
end
end
end
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 'x;
per_bank_core_rsp_ready_r = 0;
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i]
&& (per_bank_core_rsp_tag[i][0][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
per_bank_core_rsp_ready_r[i] = core_rsp_ready_unqual;
end
end
end
end
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
VX_skid_buffer #(
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH))
) pipe_reg (
) skid_buf (
.clk (clk),
.reset (reset),
.valid_in (core_rsp_valid_any),
@ -118,40 +158,102 @@ module VX_core_rsp_merge #(
end else begin
`UNUSED_VAR (per_bank_core_rsp_pmask)
reg [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [NUM_REQS-1:0][NUM_BANKS-1:0] bank_select_table;
wire [NUM_REQS-1:0] core_rsp_ready_unqual;
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_tag_unqual = 'x;
core_rsp_data_unqual = 'x;
bank_select_table = 'x;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
if (per_bank_core_rsp_valid[i]) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
bank_select_table[per_bank_core_rsp_tid[i]] = (1 << i);
end
end
end
if (NUM_PORTS > 1) begin
always @(*) begin
for (integer i = 0; i < NUM_BANKS; i++) begin
core_rsp_bank_select[i] = core_rsp_ready_unqual[per_bank_core_rsp_tid[i]]
&& bank_select_table[per_bank_core_rsp_tid[i]][i];
end
reg [NUM_REQS-1:0][(`PORTS_BITS + `BANK_SELECT_BITS)-1:0] bank_select_table;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_r, per_bank_core_rsp_sent;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_sent_n;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign per_bank_core_rsp_sent_n[i] = per_bank_core_rsp_sent_r[i] | per_bank_core_rsp_sent[i];
end
always @(posedge clk) begin
if (reset) begin
per_bank_core_rsp_sent_r <= '0;
end else begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]) begin
per_bank_core_rsp_sent_r[i] <= '0;
end else begin
per_bank_core_rsp_sent_r[i] <= per_bank_core_rsp_sent_n[i];
end
end
end
end
always @(*) begin
core_rsp_valid_unqual = '0;
core_rsp_tag_unqual = 'x;
core_rsp_data_unqual = 'x;
bank_select_table = 'x;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
for (integer p = 0; p < NUM_PORTS; ++p) begin
if (per_bank_core_rsp_valid[i]
&& per_bank_core_rsp_pmask[i][p]
&& !per_bank_core_rsp_sent_r[i][p]) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i][p]] = 1;
core_rsp_tag_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_tag[i][p];
core_rsp_data_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_data[i][p];
bank_select_table[per_bank_core_rsp_tid[i][p]] = {`PORTS_BITS'(p), `BANK_SELECT_BITS'(i)};
end
end
end
end
always @(*) begin
per_bank_core_rsp_sent = '0;
for (integer i = 0; i < NUM_REQS; i++) begin
if (core_rsp_valid_unqual[i]) begin
per_bank_core_rsp_sent[bank_select_table[i][0 +: `BANK_SELECT_BITS]][bank_select_table[i][`BANK_SELECT_BITS +: `PORTS_BITS]] = core_rsp_ready_unqual[i];
end
end
end
always @(*) begin
for (integer i = 0; i < NUM_BANKS; i++) begin
per_bank_core_rsp_ready_r[i] = (per_bank_core_rsp_sent_n[i] == per_bank_core_rsp_pmask[i]);
end
end
end else begin
`UNUSED_VAR (per_bank_core_rsp_pmask)
reg [NUM_REQS-1:0][NUM_BANKS-1:0] bank_select_table;
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_tag_unqual = 'x;
core_rsp_data_unqual = 'x;
bank_select_table = 'x;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
if (per_bank_core_rsp_valid[i]) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
bank_select_table[per_bank_core_rsp_tid[i]] = (1 << i);
end
end
end
always @(*) begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
per_bank_core_rsp_ready_r[i] = core_rsp_ready_unqual[per_bank_core_rsp_tid[i]]
&& bank_select_table[per_bank_core_rsp_tid[i]][i];
end
end
end
for (genvar i = 0; i < NUM_REQS; i++) begin
VX_skid_buffer #(
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH)
) pipe_reg (
) skid_buf (
.clk (clk),
.reset (reset),
.valid_in (core_rsp_valid_unqual[i]),
@ -167,9 +269,7 @@ module VX_core_rsp_merge #(
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_core_rsp_ready[i] = core_rsp_bank_select[i];
end
assign per_bank_core_rsp_ready = per_bank_core_rsp_ready_r;
end else begin

View file

@ -32,13 +32,14 @@ module VX_data_access #(
// reading
input wire readen,
output wire [`CACHE_LINE_WIDTH-1:0] rdata,
output wire [`CACHE_LINE_WIDTH-1:0] read_data,
// writing
input wire writeen,
input wire is_fill,
input wire [CACHE_LINE_SIZE-1:0] byteen,
input wire [`CACHE_LINE_WIDTH-1:0] wdata
input wire [`CACHE_LINE_WIDTH-1:0] write_data,
input wire [`CACHE_LINE_WIDTH-1:0] fill_data
);
`UNUSED_PARAM (CACHE_ID)
@ -50,29 +51,34 @@ module VX_data_access #(
localparam BYTEENW = WRITE_ENABLE ? CACHE_LINE_SIZE : 1;
wire [`LINE_SELECT_BITS-1:0] line_addr;
wire [BYTEENW-1:0] byte_enable;
wire [`CACHE_LINE_WIDTH-1:0] wdata;
wire [BYTEENW-1:0] wren;
assign line_addr = addr[`LINE_SELECT_BITS-1:0];
if (WRITE_ENABLE) begin
assign byte_enable = is_fill ? {BYTEENW{1'b1}} : byteen;
assign wren = is_fill ? {BYTEENW{writeen}} : (byteen & {BYTEENW{writeen}});
assign wdata = is_fill ? fill_data : write_data;
end else begin
`UNUSED_VAR (byteen)
`UNUSED_VAR (is_fill)
assign byte_enable = 1'b1;
`UNUSED_VAR (byteen)
`UNUSED_VAR (write_data)
assign wren = writeen;
assign wdata = fill_data;
end
VX_sp_ram #(
.DATAW (CACHE_LINE_SIZE * 8),
.SIZE (`LINES_PER_BANK),
.BYTEENW (BYTEENW),
.RWCHECK (1)
.DATAW (CACHE_LINE_SIZE * 8),
.SIZE (`LINES_PER_BANK),
.BYTEENW (BYTEENW),
.NO_RWCHECK (1)
) data_store (
.clk(clk),
.addr(line_addr),
.wren({BYTEENW{writeen}} & byte_enable),
.din(wdata),
.dout(rdata)
.clk (clk),
.addr (line_addr),
.wren (wren),
.wdata (wdata),
.rden (1'b1),
.rdata (read_data)
);
`UNUSED_VAR (stall)
@ -81,13 +87,13 @@ module VX_data_access #(
always @(posedge clk) begin
if (writeen && ~stall) begin
if (is_fill) begin
dpi_trace("%d: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, wdata);
dpi_trace("%d: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, fill_data);
end else begin
dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, byte_enable, line_addr, wdata);
dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, wren, line_addr, write_data);
end
end
if (readen && ~stall) begin
dpi_trace("%d: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, rdata);
dpi_trace("%d: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, read_data);
end
end
`endif

View file

@ -102,22 +102,20 @@ module VX_miss_resrv #(
end
end
VX_priority_encoder #(
.N (MSHR_SIZE)
) dequeue_pe (
.data_in (valid_table_x & ready_table_x),
.index (dequeue_id_x),
.valid_out (dequeue_val_x),
`UNUSED_PIN (onehot)
VX_lzc #(
.WIDTH (MSHR_SIZE)
) dequeue_sel (
.in_i (valid_table_x & ready_table_x),
.cnt_o (dequeue_id_x),
.valid_o (dequeue_val_x)
);
VX_priority_encoder #(
.N (MSHR_SIZE)
) allocate_pe (
.data_in (~valid_table_n),
.index (allocate_id_n),
.valid_out (allocate_rdy_n),
`UNUSED_PIN (onehot)
VX_lzc #(
.WIDTH (MSHR_SIZE)
) allocate_sel (
.in_i (~valid_table_n),
.cnt_o (allocate_id_n),
.valid_o (allocate_rdy_n)
);
always @(*) begin
@ -171,17 +169,17 @@ module VX_miss_resrv #(
`LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id))
VX_dp_ram #(
.DATAW (`MSHR_DATA_WIDTH),
.SIZE (MSHR_SIZE),
.RWCHECK (1),
.FASTRAM (1)
.DATAW (`MSHR_DATA_WIDTH),
.SIZE (MSHR_SIZE),
.LUTRAM (1)
) entries (
.clk (clk),
.waddr (allocate_id_r),
.raddr (dequeue_id_r),
.wren (allocate_valid),
.din (allocate_data),
.dout (dequeue_data)
.clk (clk),
.waddr (allocate_id_r),
.raddr (dequeue_id_r),
.wren (allocate_valid),
.wdata (allocate_data),
.rden (1'b1),
.rdata (dequeue_data)
);
assign allocate_ready = allocate_rdy_r;

View file

@ -1,6 +1,7 @@
`include "VX_cache_define.vh"
module VX_nc_bypass #(
parameter NUM_PORTS = 1,
parameter NUM_REQS = 1,
parameter NUM_RSP_TAGS = 0,
parameter NC_TAG_BIT = 0,
@ -10,13 +11,14 @@ module VX_nc_bypass #(
parameter CORE_TAG_IN_WIDTH = 1,
parameter MEM_ADDR_WIDTH = 1,
parameter MEM_DATA_SIZE = 1,
parameter MEM_DATA_SIZE = 1,
parameter MEM_TAG_IN_WIDTH = 1,
parameter MEM_TAG_OUT_WIDTH = 1,
localparam CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
localparam MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
localparam CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1
localparam CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
localparam MEM_DATA_WIDTH = MEM_DATA_SIZE * 8,
localparam CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1,
localparam MEM_SELECT_BITS = `UP(`CLOG2(MEM_DATA_SIZE / CORE_DATA_SIZE))
) (
input wire clk,
input wire reset,
@ -57,8 +59,10 @@ module VX_nc_bypass #(
input wire mem_req_valid_in,
input wire mem_req_rw_in,
input wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_in,
input wire [MEM_DATA_SIZE-1:0] mem_req_byteen_in,
input wire [MEM_DATA_WIDTH-1:0] mem_req_data_in,
input wire [NUM_PORTS-1:0] mem_req_pmask_in,
input wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in,
input wire [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_in,
input wire [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in,
input wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_in,
output wire mem_req_ready_in,
@ -66,8 +70,10 @@ module VX_nc_bypass #(
output wire mem_req_valid_out,
output wire mem_req_rw_out,
output wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_out,
output wire [MEM_DATA_SIZE-1:0] mem_req_byteen_out,
output wire [MEM_DATA_WIDTH-1:0] mem_req_data_out,
output wire [NUM_PORTS-1:0] mem_req_pmask_out,
output wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_out,
output wire [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_out,
output wire [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_out,
output wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_tag_out,
input wire mem_req_ready_out,
@ -99,7 +105,6 @@ module VX_nc_bypass #(
// core request handling
wire [NUM_REQS-1:0] core_req_valid_in_nc;
wire [NUM_REQS-1:0] core_req_nc_sel;
wire [NUM_REQS-1:0] core_req_nc_tids;
wire [`UP(CORE_REQ_TIDW)-1:0] core_req_nc_tid;
wire core_req_nc_valid;
@ -110,13 +115,12 @@ module VX_nc_bypass #(
assign core_req_valid_in_nc = core_req_valid_in & core_req_nc_tids;
VX_priority_encoder #(
.N (NUM_REQS)
VX_lzc #(
.WIDTH (NUM_REQS)
) core_req_sel (
.data_in (core_req_valid_in_nc),
.index (core_req_nc_tid),
.onehot (core_req_nc_sel),
.valid_out (core_req_nc_valid)
.in_i (core_req_valid_in_nc),
.cnt_o (core_req_nc_tid),
.valid_o (core_req_nc_valid)
);
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_tids;
@ -139,10 +143,9 @@ module VX_nc_bypass #(
if (NUM_REQS > 1) begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ?
(~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i]) : core_req_ready_out[i];
(~mem_req_valid_in && mem_req_ready_out && (core_req_nc_tid == i)) : core_req_ready_out[i];
end
end else begin
`UNUSED_VAR (core_req_nc_sel)
assign core_req_ready_in = core_req_valid_in_nc ? (~mem_req_valid_in && mem_req_ready_out) : core_req_ready_out;
end
@ -151,7 +154,7 @@ module VX_nc_bypass #(
assign mem_req_valid_out = mem_req_valid_in || core_req_nc_valid;
assign mem_req_ready_in = mem_req_ready_out;
wire [(MEM_TAG_IN_WIDTH+1)-1:0] mem_req_tag_in_nc;
wire [(MEM_TAG_IN_WIDTH+1)-1:0] mem_req_tag_in_c;
VX_bits_insert #(
.N (MEM_TAG_IN_WIDTH),
@ -160,81 +163,69 @@ module VX_nc_bypass #(
) mem_req_tag_insert (
.data_in (mem_req_tag_in),
.sel_in ('0),
.data_out (mem_req_tag_in_nc)
.data_out (mem_req_tag_in_c)
);
wire [CORE_TAG_IN_WIDTH-1:0] core_req_tag_in_sel;
wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel;
wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel;
wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel;
wire core_req_rw_in_sel;
if (NUM_REQS > 1) begin
wire [CORE_TAG_IN_WIDTH-1:0] core_req_tag_in_sel;
wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel;
wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel;
wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel;
wire core_req_rw_in_sel;
wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_nc_mux_in[i] = {core_req_tag_in[i], core_req_data_in[i], core_req_byteen_in[i], core_req_addr_in[i], core_req_rw_in[i]};
end
VX_onehot_mux #(
.DATAW (MUX_DATAW),
.N (NUM_REQS)
) core_req_nc_mux (
.data_in (core_req_nc_mux_in),
.sel_in (core_req_nc_sel),
.data_out ({core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel})
);
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[D +: MEM_ADDR_WIDTH];
for (genvar i = 0; i < P; ++i) begin
assign mem_req_data_out[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = mem_req_valid_in ?
mem_req_data_in[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_req_data_in_sel;
end
if (D != 0) begin
wire [D-1:0] req_addr_idx = core_req_addr_in_sel[D-1:0];
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_in_r;
always @(*) begin
mem_req_byteen_in_r = 0;
mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in_sel;
end
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel});
end else begin
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, core_req_tag_in_sel});
end
assign {core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel} = core_req_nc_mux_in[core_req_nc_tid];
end else begin
`UNUSED_VAR (core_req_nc_tid)
assign core_req_tag_in_sel = core_req_tag_in;
assign core_req_data_in_sel = core_req_data_in;
assign core_req_byteen_in_sel = core_req_byteen_in;
assign core_req_addr_in_sel = core_req_addr_in;
assign core_req_rw_in_sel = core_req_rw_in;
end
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[D +: MEM_ADDR_WIDTH];
if (D != 0) begin
reg [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in_r;
reg [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_in_r;
reg [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r;
wire [D-1:0] req_addr_idx = core_req_addr_in_sel[D-1:0];
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in;
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in[0][D +: MEM_ADDR_WIDTH];
always @(*) begin
mem_req_byteen_in_r = 0;
mem_req_byteen_in_r[0] = core_req_byteen_in_sel;
for (genvar i = 0; i < P; ++i) begin
assign mem_req_data_out[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = mem_req_valid_in ?
mem_req_data_in[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_req_data_in;
mem_req_wsel_in_r = 'x;
mem_req_wsel_in_r[0] = req_addr_idx;
mem_req_data_in_r = 'x;
mem_req_data_in_r[0] = core_req_data_in_sel;
end
if (D != 0) begin
wire [D-1:0] req_addr_idx = core_req_addr_in[0][D-1:0];
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_in_r;
always @(*) begin
mem_req_byteen_in_r = 0;
mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in;
end
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'({req_addr_idx, core_req_tag_in});
end else begin
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in;
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_nc) : MEM_TAG_OUT_WIDTH'(core_req_tag_in);
end
assign mem_req_pmask_out = mem_req_valid_in ? mem_req_pmask_in : NUM_PORTS'(1'b1);
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
assign mem_req_wsel_out = mem_req_valid_in ? mem_req_wsel_in : mem_req_wsel_in_r;
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : mem_req_data_in_r;
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_c) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel});
end else begin
`UNUSED_VAR (mem_req_wsel_in)
`UNUSED_VAR (mem_req_pmask_in)
assign mem_req_pmask_out = 0;
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : core_req_data_in_sel;
assign mem_req_wsel_out = 0;
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_c) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, core_req_tag_in_sel});
end
// core response handling
wire [NUM_RSP_TAGS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out_unqual;
wire [NUM_RSP_TAGS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out_c;
wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT];
@ -246,7 +237,7 @@ module VX_nc_bypass #(
) core_rsp_tag_insert (
.data_in (core_rsp_tag_in[i]),
.sel_in ('0),
.data_out (core_rsp_tag_out_unqual[i])
.data_out (core_rsp_tag_out_c[i])
);
end
@ -272,14 +263,14 @@ module VX_nc_bypass #(
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = core_rsp_valid_in[i] ? core_rsp_data_in[i] : mem_rsp_data_in;
end
end
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_out_unqual[i] : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_out_c[i] : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
end
end else begin
assign core_rsp_valid_out = core_rsp_valid_in || is_mem_rsp_nc;
assign core_rsp_tag_out = core_rsp_valid_in ? core_rsp_tag_out_unqual : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
assign core_rsp_tag_out = core_rsp_valid_in ? core_rsp_tag_out_c : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0];
assign core_rsp_ready_in = core_rsp_ready_out;
if (NUM_REQS > 1) begin

View file

@ -171,16 +171,17 @@ module VX_shared_mem #(
&& creq_out_fire;
VX_sp_ram #(
.DATAW (`WORD_WIDTH),
.SIZE (`LINES_PER_BANK),
.BYTEENW (WORD_SIZE),
.RWCHECK (1)
.DATAW (`WORD_WIDTH),
.SIZE (`LINES_PER_BANK),
.BYTEENW (WORD_SIZE),
.NO_RWCHECK (1)
) data_store (
.clk (clk),
.addr (per_bank_core_req_addr[i]),
.wren ({WORD_SIZE{wren}} & per_bank_core_req_byteen[i]),
.din (per_bank_core_req_data[i]),
.dout (per_bank_core_rsp_data[i])
.clk (clk),
.addr (per_bank_core_req_addr[i]),
.wren ({WORD_SIZE{wren}} & per_bank_core_req_byteen[i]),
.wdata (per_bank_core_req_data[i]),
.rden (1'b1),
.rdata (per_bank_core_rsp_data[i])
);
end
@ -216,18 +217,19 @@ module VX_shared_mem #(
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in;
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_in;
always @(*) begin
core_rsp_valids_in = 0;
core_rsp_data_in = 'x;
core_rsp_tag_in = 'x;
bank_rsp_sel_cur = 0;
always @(*) begin
core_rsp_tag_in = 'x;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
if (per_bank_req_reads[i] && ~bank_rsp_sel_prv[i]) begin
core_rsp_tag_in = per_bank_core_req_tag[i];
end
end
end
always @(*) begin
core_rsp_valids_in = 0;
core_rsp_data_in = 'x;
bank_rsp_sel_cur = 0;
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_req_valid[i]
&& (core_rsp_tag_in[CORE_TAG_ID_BITS-1:0] == per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin
@ -278,13 +280,16 @@ module VX_shared_mem #(
reg [CORE_TAG_WIDTH-1:0] core_req_tag_sel;
`IGNORE_UNUSED_END
always @(*) begin
always @(*) begin
core_req_tag_sel ='x;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
if (per_bank_core_req_valid[i]) begin
core_req_tag_sel = per_bank_core_req_tag[i];
end
end
end
always @(*) begin
is_multi_tag_req = 0;
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid[i]

View file

@ -46,16 +46,16 @@ module VX_tag_access #(
wire [`LINE_SELECT_BITS-1:0] line_addr = addr [`LINE_SELECT_BITS-1:0];
VX_sp_ram #(
.DATAW(`TAG_SELECT_BITS + 1),
.SIZE(`LINES_PER_BANK),
.INITZERO(1),
.RWCHECK(1)
.DATAW (`TAG_SELECT_BITS + 1),
.SIZE (`LINES_PER_BANK),
.NO_RWCHECK (1)
) tag_store (
.clk(clk),
.addr(line_addr),
.wren(fill),
.din({!is_flush, line_tag}),
.dout({read_valid, read_tag})
.clk( clk),
.addr (line_addr),
.wren (fill),
.wdata ({!is_flush, line_tag}),
.rden (1'b1),
.rdata ({read_valid, read_tag})
);
assign tag_match = read_valid && (line_tag == read_tag);

View file

@ -15,7 +15,7 @@ module VX_fp_cvt #(
input wire [TAGW-1:0] tag_in,
input wire [`FRM_BITS-1:0] frm,
input wire [`INST_FRM_BITS-1:0] frm,
input wire is_itof,
input wire is_signed,
@ -101,7 +101,7 @@ module VX_fp_cvt #(
wire stall;
VX_pipe_register #(
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + INT_EXP_WIDTH + INT_MAN_WIDTH)),
.DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + INT_EXP_WIDTH + INT_MAN_WIDTH)),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
@ -167,7 +167,7 @@ module VX_fp_cvt #(
wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp_s1;
VX_pipe_register #(
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + INT_EXP_WIDTH)),
.DATAW (1 + TAGW + 1 + `INST_FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + INT_EXP_WIDTH)),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
@ -253,7 +253,7 @@ module VX_fp_cvt #(
wire [LANES-1:0] of_before_round_s2;
VX_pipe_register #(
.DATAW (1 + TAGW + 1 + 1 + `FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + INT_EXP_WIDTH + 1)),
.DATAW (1 + TAGW + 1 + 1 + `INST_FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + INT_EXP_WIDTH + 1)),
.RESETW (1)
) pipe_reg2 (
.clk (clk),
@ -435,7 +435,7 @@ module VX_fp_cvt #(
assign stall = ~ready_out && valid_out;
VX_pipe_register #(
.DATAW (1 + TAGW + (LANES * 32) + (LANES * `FFG_BITS)),
.DATAW (1 + TAGW + (LANES * 32) + (LANES * `FFLAGS_BITS)),
.RESETW (1)
) pipe_reg4 (
.clk (clk),

View file

@ -16,7 +16,7 @@ module VX_fp_div #(
input wire [TAGW-1:0] tag_in,
input wire [`FRM_BITS-1:0] frm,
input wire [`INST_FRM_BITS-1:0] frm,
input wire [LANES-1:0][31:0] dataa,
input wire [LANES-1:0][31:0] datab,

View file

@ -16,7 +16,7 @@ module VX_fp_fma #(
input wire [TAGW-1:0] tag_in,
input wire [`FRM_BITS-1:0] frm,
input wire [`INST_FRM_BITS-1:0] frm,
input wire do_madd,
input wire do_sub,

View file

@ -15,8 +15,8 @@ module VX_fp_ncomp #(
input wire [TAGW-1:0] tag_in,
input wire [`FPU_BITS-1:0] op_type,
input wire [`FRM_BITS-1:0] frm,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_FRM_BITS-1:0] frm,
input wire [LANES-1:0][31:0] dataa,
input wire [LANES-1:0][31:0] datab,
@ -77,8 +77,8 @@ module VX_fp_ncomp #(
wire valid_in_s0;
wire [TAGW-1:0] tag_in_s0;
wire [`FPU_BITS-1:0] op_type_s0;
wire [`FRM_BITS-1:0] frm_s0;
wire [`INST_FPU_BITS-1:0] op_type_s0;
wire [`INST_FRM_BITS-1:0] frm_s0;
wire [LANES-1:0][31:0] dataa_s0, datab_s0;
wire [LANES-1:0] a_sign_s0, b_sign_s0;
wire [LANES-1:0][7:0] a_exponent_s0;
@ -89,7 +89,7 @@ module VX_fp_ncomp #(
wire stall;
VX_pipe_register #(
.DATAW (1 + TAGW + `FPU_BITS + `FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_type_t) + 1 + 1)),
.DATAW (1 + TAGW + `INST_FPU_BITS + `INST_FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_type_t) + 1 + 1)),
.RESETW (1),
.DEPTH (0)
) pipe_reg0 (
@ -164,7 +164,7 @@ module VX_fp_ncomp #(
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
case (frm_s0)
`FRM_RNE: begin // LE
`INST_FRM_RNE: begin // LE
fcmp_fflags[i] = 5'h0;
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
fcmp_res[i] = 32'h0;
@ -173,7 +173,7 @@ module VX_fp_ncomp #(
fcmp_res[i] = {31'h0, (a_smaller_s0[i] | ab_equal_s0[i])};
end
end
`FRM_RTZ: begin // LS
`INST_FRM_RTZ: begin // LS
fcmp_fflags[i] = 5'h0;
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
fcmp_res[i] = 32'h0;
@ -182,7 +182,7 @@ module VX_fp_ncomp #(
fcmp_res[i] = {31'h0, (a_smaller_s0[i] & ~ab_equal_s0[i])};
end
end
`FRM_RDN: begin // EQ
`INST_FRM_RDN: begin // EQ
fcmp_fflags[i] = 5'h0;
if (a_type_s0[i].is_nan || b_type_s0[i].is_nan) begin
fcmp_res[i] = 32'h0;
@ -207,11 +207,11 @@ module VX_fp_ncomp #(
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
case (op_type_s0)
`FPU_CLASS: begin
`INST_FPU_CLASS: begin
tmp_result[i] = fclass_mask[i];
tmp_fflags[i] = 'x;
end
`FPU_CMP: begin
`INST_FPU_CMP: begin
tmp_result[i] = fcmp_res[i];
tmp_fflags[i] = fcmp_fflags[i];
end
@ -238,15 +238,15 @@ module VX_fp_ncomp #(
end
end
wire has_fflags_s0 = ((op_type_s0 == `FPU_MISC)
&& (frm_s0 == 3 // MIN
|| frm_s0 == 4)) // MAX
|| (op_type_s0 == `FPU_CMP); // CMP
wire has_fflags_s0 = ((op_type_s0 == `INST_FPU_MISC)
&& (frm_s0 == 3 // MIN
|| frm_s0 == 4)) // MAX
|| (op_type_s0 == `INST_FPU_CMP); // CMP
assign stall = ~ready_out && valid_out;
VX_pipe_register #(
.DATAW (1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS)),
.DATAW (1 + TAGW + (LANES * 32) + 1 + (LANES * `FFLAGS_BITS)),
.RESETW (1)
) pipe_reg1 (
.clk (clk),

View file

@ -34,7 +34,7 @@ module VX_fp_rounding #(
always @(*) begin
case (rnd_mode_i)
`FRM_RNE: // Decide accoring to round/sticky bits
`INST_FRM_RNE: // Decide accoring to round/sticky bits
case (round_sticky_bits_i)
2'b00,
2'b01: round_up = 1'b0; // < ulp/2 away, round down
@ -42,10 +42,10 @@ module VX_fp_rounding #(
2'b11: round_up = 1'b1; // > ulp/2 away, round up
default: round_up = 1'bx;
endcase
`FRM_RTZ: round_up = 1'b0; // always round down
`FRM_RDN: round_up = (| round_sticky_bits_i) & sign_i; // to 0 if +, away if -
`FRM_RUP: round_up = (| round_sticky_bits_i) & ~sign_i; // to 0 if -, away if +
`FRM_RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up
`INST_FRM_RTZ: round_up = 1'b0; // always round down
`INST_FRM_RDN: round_up = (| round_sticky_bits_i) & sign_i; // to 0 if +, away if -
`INST_FRM_RUP: round_up = (| round_sticky_bits_i) & ~sign_i; // to 0 if -, away if +
`INST_FRM_RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up
default: round_up = 1'bx; // propagate x
endcase
end
@ -58,7 +58,7 @@ module VX_fp_rounding #(
// In case of effective subtraction (thus signs of addition operands must have differed) and a
// true zero result, the result sign is '-' in case of RDN and '+' for other modes.
assign sign_o = (exact_zero_o && effective_subtraction_i) ? (rnd_mode_i == `FRM_RDN)
assign sign_o = (exact_zero_o && effective_subtraction_i) ? (rnd_mode_i == `INST_FRM_RDN)
: sign_i;
endmodule

View file

@ -16,7 +16,7 @@ module VX_fp_sqrt #(
input wire [TAGW-1:0] tag_in,
input wire [`FRM_BITS-1:0] frm,
input wire [`INST_FRM_BITS-1:0] frm,
input wire [LANES-1:0][31:0] dataa,
output wire [LANES-1:0][31:0] result,

View file

@ -14,8 +14,8 @@ module VX_fpu_dpi #(
input wire [TAGW-1:0] tag_in,
input wire [`FPU_BITS-1:0] op_type,
input wire [`MOD_BITS-1:0] frm,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_MOD_BITS-1:0] frm,
input wire [`NUM_THREADS-1:0][31:0] dataa,
input wire [`NUM_THREADS-1:0][31:0] datab,
@ -76,21 +76,21 @@ module VX_fpu_dpi #(
is_fsgnjx = 0;
case (op_type)
`FPU_ADD: begin core_select = FPU_FMA; is_fadd = 1; end
`FPU_SUB: begin core_select = FPU_FMA; is_fsub = 1; end
`FPU_MUL: begin core_select = FPU_FMA; is_fmul = 1; end
`FPU_MADD: begin core_select = FPU_FMA; is_fmadd = 1; end
`FPU_MSUB: begin core_select = FPU_FMA; is_fmsub = 1; end
`FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = 1; end
`FPU_NMSUB: begin core_select = FPU_FMA; is_fnmsub = 1; end
`FPU_DIV: begin core_select = FPU_DIV; end
`FPU_SQRT: begin core_select = FPU_SQRT; end
`FPU_CVTWS: begin core_select = FPU_CVT; is_ftoi = 1; end
`FPU_CVTWUS:begin core_select = FPU_CVT; is_ftou = 1; end
`FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; end
`FPU_CVTSWU:begin core_select = FPU_CVT; is_utof = 1; end
`FPU_CLASS: begin core_select = FPU_NCP; is_fclss = 1; end
`FPU_CMP: begin core_select = FPU_NCP;
`INST_FPU_ADD: begin core_select = FPU_FMA; is_fadd = 1; end
`INST_FPU_SUB: begin core_select = FPU_FMA; is_fsub = 1; end
`INST_FPU_MUL: begin core_select = FPU_FMA; is_fmul = 1; end
`INST_FPU_MADD: begin core_select = FPU_FMA; is_fmadd = 1; end
`INST_FPU_MSUB: begin core_select = FPU_FMA; is_fmsub = 1; end
`INST_FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = 1; end
`INST_FPU_NMSUB: begin core_select = FPU_FMA; is_fnmsub = 1; end
`INST_FPU_DIV: begin core_select = FPU_DIV; end
`INST_FPU_SQRT: begin core_select = FPU_SQRT; end
`INST_FPU_CVTWS: begin core_select = FPU_CVT; is_ftoi = 1; end
`INST_FPU_CVTWUS:begin core_select = FPU_CVT; is_ftou = 1; end
`INST_FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; end
`INST_FPU_CVTSWU:begin core_select = FPU_CVT; is_utof = 1; end
`INST_FPU_CLASS: begin core_select = FPU_NCP; is_fclss = 1; end
`INST_FPU_CMP: begin core_select = FPU_NCP;
is_fle = (frm == 0);
is_flt = (frm == 1);
is_feq = (frm == 2);

View file

@ -11,8 +11,8 @@ module VX_fpu_fpga #(
input wire [TAGW-1:0] tag_in,
input wire [`FPU_BITS-1:0] op_type,
input wire [`MOD_BITS-1:0] frm,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_MOD_BITS-1:0] frm,
input wire [`NUM_THREADS-1:0][31:0] dataa,
input wire [`NUM_THREADS-1:0][31:0] datab,
@ -54,19 +54,19 @@ module VX_fpu_fpga #(
is_itof = 0;
is_signed = 0;
case (op_type)
`FPU_ADD: begin core_select = FPU_FMA; end
`FPU_SUB: begin core_select = FPU_FMA; do_sub = 1; end
`FPU_MUL: begin core_select = FPU_FMA; do_neg = 1; end
`FPU_MADD: begin core_select = FPU_FMA; do_madd = 1; end
`FPU_MSUB: begin core_select = FPU_FMA; do_madd = 1; do_sub = 1; end
`FPU_NMADD: begin core_select = FPU_FMA; do_madd = 1; do_neg = 1; end
`FPU_NMSUB: begin core_select = FPU_FMA; do_madd = 1; do_sub = 1; do_neg = 1; end
`FPU_DIV: begin core_select = FPU_DIV; end
`FPU_SQRT: begin core_select = FPU_SQRT; end
`FPU_CVTWS: begin core_select = FPU_CVT; is_signed = 1; end
`FPU_CVTWUS: begin core_select = FPU_CVT; end
`FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; is_signed = 1; end
`FPU_CVTSWU: begin core_select = FPU_CVT; is_itof = 1; end
`INST_FPU_ADD: begin core_select = FPU_FMA; end
`INST_FPU_SUB: begin core_select = FPU_FMA; do_sub = 1; end
`INST_FPU_MUL: begin core_select = FPU_FMA; do_neg = 1; end
`INST_FPU_MADD: begin core_select = FPU_FMA; do_madd = 1; end
`INST_FPU_MSUB: begin core_select = FPU_FMA; do_madd = 1; do_sub = 1; end
`INST_FPU_NMADD: begin core_select = FPU_FMA; do_madd = 1; do_neg = 1; end
`INST_FPU_NMSUB: begin core_select = FPU_FMA; do_madd = 1; do_sub = 1; do_neg = 1; end
`INST_FPU_DIV: begin core_select = FPU_DIV; end
`INST_FPU_SQRT: begin core_select = FPU_SQRT; end
`INST_FPU_CVTWS: begin core_select = FPU_CVT; is_signed = 1; end
`INST_FPU_CVTWUS: begin core_select = FPU_CVT; end
`INST_FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; is_signed = 1; end
`INST_FPU_CVTSWU: begin core_select = FPU_CVT; is_itof = 1; end
default: begin core_select = FPU_NCP; end
endcase
end

View file

@ -19,8 +19,8 @@ module VX_fpu_fpnew
input wire [TAGW-1:0] tag_in,
input wire [`FPU_BITS-1:0] op_type,
input wire [`MOD_BITS-1:0] frm,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_MOD_BITS-1:0] frm,
input wire [`NUM_THREADS-1:0][31:0] dataa,
input wire [`NUM_THREADS-1:0][31:0] datab,
@ -81,7 +81,7 @@ module VX_fpu_fpnew
fpnew_pkg::status_t [`NUM_THREADS-1:0] fpu_status;
reg [FOP_BITS-1:0] fpu_op;
reg [`FRM_BITS-1:0] fpu_rnd;
reg [`INST_FRM_BITS-1:0] fpu_rnd;
reg fpu_op_mod;
reg fpu_has_fflags, fpu_has_fflags_out;
@ -95,38 +95,38 @@ module VX_fpu_fpnew
fpu_operands[2] = datac;
case (op_type)
`FPU_ADD: begin
`INST_FPU_ADD: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = dataa;
fpu_operands[2] = datab;
end
`FPU_SUB: begin
`INST_FPU_SUB: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = dataa;
fpu_operands[2] = datab;
fpu_op_mod = 1;
end
`FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
`FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
`FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
`FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
`FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
`FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
`FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
`FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end
`FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
`FPU_MISC: begin
`INST_FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
`INST_FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
`INST_FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
`INST_FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
`INST_FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
`INST_FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
`INST_FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
`INST_FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
`INST_FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
`INST_FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
`INST_FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end
`INST_FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
`INST_FPU_MISC: begin
case (frm)
0: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fpu_has_fflags = 0; end
1: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fpu_has_fflags = 0; end
2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fpu_has_fflags = 0; end
3: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
4: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
default: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fpu_has_fflags = 0; end
0: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RNE; fpu_has_fflags = 0; end
1: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RTZ; fpu_has_fflags = 0; end
2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RDN; fpu_has_fflags = 0; end
3: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `INST_FRM_RNE; end
4: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `INST_FRM_RTZ; end
default: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RUP; fpu_has_fflags = 0; end
endcase
end
default:;

View file

@ -10,8 +10,8 @@ interface VX_alu_req_if ();
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [31:0] next_PC;
wire [`ALU_BITS-1:0] op_type;
wire [`MOD_BITS-1:0] op_mod;
wire [`INST_ALU_BITS-1:0] op_type;
wire [`INST_MOD_BITS-1:0] op_mod;
wire use_PC;
wire use_imm;
wire [31:0] imm;

View file

@ -9,7 +9,7 @@ interface VX_csr_req_if ();
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [`CSR_BITS-1:0] op_type;
wire [`INST_CSR_BITS-1:0] op_type;
wire [`CSR_ADDR_BITS-1:0] addr;
wire [31:0] rs1_data;
wire use_imm;

View file

@ -10,8 +10,8 @@ interface VX_decode_if ();
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [`EX_BITS-1:0] ex_type;
wire [`OP_BITS-1:0] op_type;
wire [`MOD_BITS-1:0] op_mod;
wire [`INST_OP_BITS-1:0] op_type;
wire [`INST_MOD_BITS-1:0] op_mod;
wire wb;
wire [`NR_BITS-1:0] rd;
wire [`NR_BITS-1:0] rs1;

View file

@ -9,8 +9,8 @@ interface VX_fpu_req_if ();
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [`FPU_BITS-1:0] op_type;
wire [`MOD_BITS-1:0] op_mod;
wire [`INST_FPU_BITS-1:0] op_type;
wire [`INST_MOD_BITS-1:0] op_mod;
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs3_data;

View file

@ -10,7 +10,7 @@ interface VX_fpu_to_csr_if ();
fflags_t write_fflags;
wire [`NW_BITS-1:0] read_wid;
wire [`FRM_BITS-1:0] read_frm;
wire [`INST_FRM_BITS-1:0] read_frm;
endinterface

View file

@ -11,7 +11,7 @@ interface VX_gpu_req_if();
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [31:0] next_PC;
wire [`GPU_BITS-1:0] op_type;
wire [`INST_GPU_BITS-1:0] op_type;
wire [`NT_BITS-1:0] tid;
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [31:0] rs2_data;

View file

@ -11,8 +11,8 @@ interface VX_ibuffer_if ();
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [`EX_BITS-1:0] ex_type;
wire [`OP_BITS-1:0] op_type;
wire [`MOD_BITS-1:0] op_mod;
wire [`INST_OP_BITS-1:0] op_type;
wire [`INST_MOD_BITS-1:0] op_mod;
wire wb;
wire [`NR_BITS-1:0] rd;
wire [`NR_BITS-1:0] rs1;

View file

@ -9,7 +9,7 @@ interface VX_lsu_req_if ();
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [`LSU_BITS-1:0] op_type;
wire [`INST_LSU_BITS-1:0] op_type;
wire is_fence;
wire [`NUM_THREADS-1:0][31:0] store_data;
wire [`NUM_THREADS-1:0][31:0] base_addr;

View file

@ -2,178 +2,259 @@
`TRACING_OFF
module VX_dp_ram #(
parameter DATAW = 1,
parameter SIZE = 1,
parameter BYTEENW = 1,
parameter OUTPUT_REG = 0,
parameter RWCHECK = 1,
parameter ADDRW = $clog2(SIZE),
parameter FASTRAM = 0,
parameter INITZERO = 0
parameter DATAW = 1,
parameter SIZE = 1,
parameter BYTEENW = 1,
parameter OUTPUT_REG = 0,
parameter NO_RWCHECK = 0,
parameter ADDRW = $clog2(SIZE),
parameter LUTRAM = 0,
parameter INIT_ENABLE = 0,
parameter INIT_FILE = "",
parameter [DATAW-1:0] INIT_VALUE = 0
) (
input wire clk,
input wire [ADDRW-1:0] waddr,
input wire [ADDRW-1:0] raddr,
input wire clk,
input wire [BYTEENW-1:0] wren,
input wire [DATAW-1:0] din,
output wire [DATAW-1:0] dout
input wire [ADDRW-1:0] waddr,
input wire [DATAW-1:0] wdata,
input wire rden,
input wire [ADDRW-1:0] raddr,
output wire [DATAW-1:0] rdata
);
`STATIC_ASSERT((1 == BYTEENW) || ((BYTEENW > 1) && 0 == (BYTEENW % 4)), ("invalid parameter"))
if (FASTRAM) begin
`define RAM_INITIALIZATION \
if (INIT_ENABLE) begin \
if (INIT_FILE != "") begin \
initial $readmemh(INIT_FILE, ram); \
end else begin \
initial ram = '{default: INIT_VALUE}; \
end \
end
`ifdef SYNTHESIS
if (LUTRAM) begin
if (OUTPUT_REG) begin
reg [DATAW-1:0] dout_r;
reg [DATAW-1:0] rdata_r;
if (BYTEENW > 1) begin
`USE_FAST_BRAM reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
`USE_FAST_BRAM reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
mem[waddr][i] <= din[i * 8 +: 8];
ram[waddr][i] <= wdata[i * 8 +: 8];
end
dout_r <= mem[raddr];
if (rden)
rdata_r <= ram[raddr];
end
end else begin
`USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0];
`USE_FAST_BRAM reg [DATAW-1:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
if (wren)
mem[waddr] <= din;
dout_r <= mem[raddr];
ram[waddr] <= wdata;
if (rden)
rdata_r <= ram[raddr];
end
end
assign dout = dout_r;
assign rdata = rdata_r;
end else begin
`UNUSED_VAR (rden)
if (BYTEENW > 1) begin
`USE_FAST_BRAM reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
`USE_FAST_BRAM reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
mem[waddr][i] <= din[i * 8 +: 8];
ram[waddr][i] <= wdata[i * 8 +: 8];
end
end
assign dout = mem[raddr];
assign rdata = ram[raddr];
end else begin
`USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0];
`USE_FAST_BRAM reg [DATAW-1:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
if (wren)
mem[waddr] <= din;
ram[waddr] <= wdata;
end
assign dout = mem[raddr];
assign rdata = ram[raddr];
end
end
end else begin
if (OUTPUT_REG) begin
reg [DATAW-1:0] dout_r;
reg [DATAW-1:0] rdata_r;
if (BYTEENW > 1) begin
reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
mem[waddr][i] <= din[i * 8 +: 8];
ram[waddr][i] <= wdata[i * 8 +: 8];
end
dout_r <= mem[raddr];
if (rden)
rdata_r <= ram[raddr];
end
end else begin
reg [DATAW-1:0] mem [SIZE-1:0];
reg [DATAW-1:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
if (wren)
mem[waddr] <= din;
dout_r <= mem[raddr];
ram[waddr] <= wdata;
if (rden)
rdata_r <= ram[raddr];
end
end
assign dout = dout_r;
assign rdata = rdata_r;
end else begin
if (RWCHECK) begin
`UNUSED_VAR (rden)
if (NO_RWCHECK) begin
if (BYTEENW > 1) begin
reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
`NO_RW_RAM_CHECK reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
mem[waddr][i] <= din[i * 8 +: 8];
ram[waddr][i] <= wdata[i * 8 +: 8];
end
end
assign dout = mem[raddr];
assign rdata = ram[raddr];
end else begin
reg [DATAW-1:0] mem [SIZE-1:0];
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
if (wren)
mem[waddr] <= din;
ram[waddr] <= wdata;
end
assign dout = mem[raddr];
assign rdata = ram[raddr];
end
end else begin
if (BYTEENW > 1) begin
`NO_RW_RAM_CHECK reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
mem[waddr][i] <= din[i * 8 +: 8];
ram[waddr][i] <= wdata[i * 8 +: 8];
end
end
assign dout = mem[raddr];
assign rdata = ram[raddr];
end else begin
`NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0];
reg [DATAW-1:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
if (wren)
mem[waddr] <= din;
ram[waddr] <= wdata;
end
assign dout = mem[raddr];
assign rdata = ram[raddr];
end
end
end
end
`else
if (OUTPUT_REG) begin
reg [DATAW-1:0] rdata_r;
if (BYTEENW > 1) begin
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
`RAM_INITIALIZATION
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
ram[waddr][i] <= wdata[i * 8 +: 8];
end
if (rden)
rdata_r <= ram[raddr];
end
end else begin
reg [DATAW-1:0] ram [SIZE-1:0];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (wren)
ram[waddr] <= wdata;
if (rden)
rdata_r <= ram[raddr];
end
end
assign rdata = rdata_r;
end else begin
`UNUSED_VAR (rden)
if (BYTEENW > 1) begin
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
reg [DATAW-1:0] prev_data;
reg [ADDRW-1:0] prev_waddr;
reg prev_write;
`RAM_INITIALIZATION
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
ram[waddr][i] <= wdata[i * 8 +: 8];
end
prev_write <= (| wren);
prev_data <= ram[waddr];
prev_waddr <= waddr;
end
if (LUTRAM || !NO_RWCHECK) begin
`UNUSED_VAR (prev_write)
`UNUSED_VAR (prev_data)
`UNUSED_VAR (prev_waddr)
assign rdata = ram[raddr];
end else begin
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
end
end else begin
reg [DATAW-1:0] ram [SIZE-1:0];
reg [DATAW-1:0] prev_data;
reg [ADDRW-1:0] prev_waddr;
reg prev_write;
`RAM_INITIALIZATION
always @(posedge clk) begin
if (wren)
ram[waddr] <= wdata;
prev_write <= wren;
prev_data <= ram[waddr];
prev_waddr <= waddr;
end
if (LUTRAM || !NO_RWCHECK) begin
`UNUSED_VAR (prev_write)
`UNUSED_VAR (prev_data)
`UNUSED_VAR (prev_waddr)
assign rdata = ram[raddr];
end else begin
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
end
end
end
`endif
endmodule
`TRACING_ON

View file

@ -5,7 +5,7 @@ module VX_elastic_buffer #(
parameter DATAW = 1,
parameter SIZE = 2,
parameter OUTPUT_REG = 0,
parameter FASTRAM = 0
parameter LUTRAM = 0
) (
input wire clk,
input wire reset,
@ -56,7 +56,7 @@ module VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (SIZE),
.OUTPUT_REG (OUTPUT_REG),
.FASTRAM (FASTRAM)
.LUTRAM (LUTRAM)
) queue (
.clk (clk),
.reset (reset),

View file

@ -9,7 +9,7 @@ module VX_fifo_queue #(
parameter ADDRW = $clog2(SIZE),
parameter SIZEW = $clog2(SIZE+1),
parameter OUTPUT_REG = 0,
parameter FASTRAM = 1
parameter LUTRAM = 1
) (
input wire clk,
input wire reset,
@ -157,15 +157,15 @@ module VX_fifo_queue #(
.DATAW (DATAW),
.SIZE (SIZE),
.OUTPUT_REG (0),
.RWCHECK (1),
.FASTRAM (FASTRAM)
.LUTRAM (LUTRAM)
) dp_ram (
.clk(clk),
.waddr(wr_ptr_r),
.raddr(rd_ptr_r),
.wren(push),
.din(data_in),
.dout(data_out)
.wren (push),
.waddr (wr_ptr_r),
.wdata (data_in),
.rden (1'b1),
.raddr (rd_ptr_r),
.rdata (data_out)
);
end else begin
@ -200,15 +200,15 @@ module VX_fifo_queue #(
.DATAW (DATAW),
.SIZE (SIZE),
.OUTPUT_REG (0),
.RWCHECK (1),
.FASTRAM (FASTRAM)
.LUTRAM (LUTRAM)
) dp_ram (
.clk(clk),
.waddr(wr_ptr_r),
.raddr(rd_ptr_n_r),
.wren(push),
.din(data_in),
.dout(dout)
.clk (clk),
.wren (push),
.waddr (wr_ptr_r),
.wdata (data_in),
.rden (1'b1),
.raddr (rd_ptr_n_r),
.rdata (dout)
);
always @(posedge clk) begin

View file

@ -2,10 +2,10 @@
`TRACING_OFF
module VX_index_buffer #(
parameter DATAW = 1,
parameter SIZE = 1,
parameter FASTRAM = 1,
parameter ADDRW = `LOG2UP(SIZE)
parameter DATAW = 1,
parameter SIZE = 1,
parameter LUTRAM = 1,
parameter ADDRW = `LOG2UP(SIZE)
) (
input wire clk,
input wire reset,
@ -29,13 +29,12 @@ module VX_index_buffer #(
wire free_valid;
wire [ADDRW-1:0] free_index;
VX_priority_encoder #(
.N (SIZE)
VX_lzc #(
.WIDTH (SIZE)
) free_slots_encoder (
.data_in (free_slots_n),
.index (free_index),
`UNUSED_PIN (onehot),
.valid_out (free_valid)
.in_i (free_slots_n),
.cnt_o (free_index),
.valid_o (free_valid)
);
always @(*) begin
@ -69,17 +68,17 @@ module VX_index_buffer #(
end
VX_dp_ram #(
.DATAW(DATAW),
.SIZE(SIZE),
.RWCHECK(1),
.FASTRAM(FASTRAM)
.DATAW (DATAW),
.SIZE (SIZE),
.LUTRAM (LUTRAM)
) data_table (
.clk(clk),
.waddr(write_addr),
.raddr(read_addr),
.wren(acquire_slot),
.din(write_data),
.dout(read_data)
.clk (clk),
.wren (acquire_slot),
.waddr (write_addr),
.wdata (write_data),
.rden (1'b1),
.raddr (read_addr),
.rdata (read_data)
);
assign write_addr = write_addr_r;

View file

@ -1,7 +1,7 @@
`include "VX_platform.vh"
// Fast encoder using parallel prefix computation
// Adapter from BaseJump STL: http://bjump.org/data_out.html
// Adapted from BaseJump STL: http://bjump.org/data_out.html
`TRACING_OFF
module VX_onehot_encoder #(

View file

@ -12,46 +12,48 @@ module VX_priority_encoder #(
output wire [LN-1:0] index,
output wire valid_out
);
wire [N-1:0] reversed;
if (REVERSE) begin
for (genvar i = 0; i < N; ++i) begin
assign reversed[N-i-1] = data_in[i];
end
end else begin
assign reversed = data_in;
end
if (N == 1) begin
assign onehot = data_in;
assign onehot = reversed;
assign index = 0;
assign valid_out = data_in;
assign valid_out = reversed;
end else if (N == 2) begin
assign onehot = {~data_in[REVERSE], data_in[REVERSE]};
assign index = ~data_in[REVERSE];
assign valid_out = (| data_in);
assign onehot = {~reversed[0], reversed[0]};
assign index = ~reversed[0];
assign valid_out = (| reversed);
end else if (MODEL == 1) begin
wire [N-1:0] scan_lo;
VX_scan #(
.N (N),
.OP (2),
.REVERSE (REVERSE)
.N (N),
.OP (2)
) scan (
.data_in (data_in),
.data_in (reversed),
.data_out (scan_lo)
);
if (REVERSE) begin
assign onehot = scan_lo & {1'b1, (~scan_lo[N-1:1])};
assign valid_out = scan_lo[0];
end else begin
assign onehot = scan_lo & {(~scan_lo[N-2:0]), 1'b1};
assign valid_out = scan_lo[N-1];
end
assign onehot = scan_lo & {(~scan_lo[N-2:0]), 1'b1};
assign valid_out = scan_lo[N-1];
VX_onehot_encoder #(
.N (N),
.REVERSE (REVERSE)
.N (N)
) onehot_encoder (
.data_in (onehot),
.data_out (index),
.data_out (index),
`UNUSED_PIN (valid_out)
);
@ -60,70 +62,54 @@ module VX_priority_encoder #(
`IGNORE_WARNINGS_BEGIN
wire [N-1:0] higher_pri_regs;
`IGNORE_WARNINGS_END
assign higher_pri_regs[N-1:1] = higher_pri_regs[N-2:0] | data_in[N-2:0];
assign higher_pri_regs[N-1:1] = higher_pri_regs[N-2:0] | reversed[N-2:0];
assign higher_pri_regs[0] = 1'b0;
assign onehot[N-1:0] = data_in[N-1:0] & ~higher_pri_regs[N-1:0];
assign onehot[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0];
VX_onehot_encoder #(
.N (N),
.REVERSE (REVERSE)
.N (N)
) onehot_encoder (
.data_in (onehot),
.data_out (index),
`UNUSED_PIN (valid_out)
);
assign valid_out = (| data_in);
assign valid_out = (| reversed);
end else if (MODEL == 3) begin
assign onehot = data_in & ~(data_in-1);
assign onehot = reversed & ~(reversed-1);
VX_onehot_encoder #(
.N (N),
.REVERSE (REVERSE)
.N (N)
) onehot_encoder (
.data_in (onehot),
.data_out (index),
`UNUSED_PIN (valid_out)
);
assign valid_out = (| data_in);
assign valid_out = (| reversed);
end else begin
reg [LN-1:0] index_r;
reg [N-1:0] onehot_r;
if (REVERSE) begin
always @(*) begin
index_r = 'x;
onehot_r = 'x;
for (integer i = 0; i < N; ++i) begin
if (data_in[i]) begin
index_r = LN'(i);
onehot_r = 0;
onehot_r[i] = 1'b1;
end
end
end
end else begin
always @(*) begin
index_r = 'x;
onehot_r = 'x;
for (integer i = N-1; i >= 0; --i) begin
if (data_in[i]) begin
index_r = LN'(i);
onehot_r = 0;
onehot_r[i] = 1'b1;
end
always @(*) begin
index_r = 'x;
onehot_r = 'x;
for (integer i = N-1; i >= 0; --i) begin
if (reversed[i]) begin
index_r = LN'(i);
onehot_r = 0;
onehot_r[i] = 1'b1;
end
end
end
assign index = index_r;
assign onehot = onehot_r;
assign valid_out = (| data_in);
assign valid_out = (| reversed);
end

View file

@ -1,26 +1,31 @@
`include "VX_platform.vh"
`TRACING_OFF
module VX_reset_relay #(
parameter ASYNC = 0
parameter N = 1,
parameter DEPTH = 1
) (
input wire clk,
input wire reset,
output wire reset_o
output wire [N-1:0] reset_o
);
(* preserve *) reg reset_r;
if (ASYNC) begin
always @(posedge clk or posedge reset) begin
reset_r <= reset;
end
end else begin
if (DEPTH > 1) begin
`PRESERVE_REG `DISABLE_BRAM reg [N-1:0] reset_r [DEPTH-1:0];
always @(posedge clk) begin
reset_r <= reset;
for (integer i = DEPTH-1; i > 0; --i)
reset_r[i] <= reset_r[i-1];
reset_r[0] <= {N{reset}};
end
assign reset_o = reset_r[DEPTH-1];
end else if (DEPTH == 1) begin
`PRESERVE_REG reg [N-1:0] reset_r;
always @(posedge clk) begin
reset_r <= {N{reset}};
end
assign reset_o = reset_r;
end else begin
`UNUSED_VAR (clk)
assign reset_o = {N{reset}};
end
assign reset_o = reset_r;
endmodule
`TRACING_ON
endmodule

View file

@ -2,177 +2,259 @@
`TRACING_OFF
module VX_sp_ram #(
parameter DATAW = 1,
parameter SIZE = 1,
parameter BYTEENW = 1,
parameter OUTPUT_REG = 0,
parameter RWCHECK = 1,
parameter ADDRW = $clog2(SIZE),
parameter FASTRAM = 0,
parameter INITZERO = 0
) (
input wire clk,
input wire [ADDRW-1:0] addr,
parameter DATAW = 1,
parameter SIZE = 1,
parameter BYTEENW = 1,
parameter OUTPUT_REG = 0,
parameter NO_RWCHECK = 0,
parameter ADDRW = $clog2(SIZE),
parameter LUTRAM = 0,
parameter INIT_ENABLE = 0,
parameter INIT_FILE = "",
parameter [DATAW-1:0] INIT_VALUE = 0
) (
input wire clk,
input wire [ADDRW-1:0] addr,
input wire [BYTEENW-1:0] wren,
input wire [DATAW-1:0] din,
output wire [DATAW-1:0] dout
input wire [DATAW-1:0] wdata,
input wire rden,
output wire [DATAW-1:0] rdata
);
`STATIC_ASSERT((1 == BYTEENW) || ((BYTEENW > 1) && 0 == (BYTEENW % 4)), ("invalid parameter"))
if (FASTRAM) begin
`define RAM_INITIALIZATION \
if (INIT_ENABLE) begin \
if (INIT_FILE != "") begin \
initial $readmemh(INIT_FILE, ram); \
end else begin \
initial ram = '{default: INIT_VALUE}; \
end \
end
`ifdef SYNTHESIS
if (LUTRAM) begin
if (OUTPUT_REG) begin
reg [DATAW-1:0] dout_r;
reg [DATAW-1:0] rdata_r;
if (BYTEENW > 1) begin
`USE_FAST_BRAM reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
`USE_FAST_BRAM reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
mem[addr][i] <= din[i * 8 +: 8];
ram[addr][i] <= wdata[i * 8 +: 8];
end
dout_r <= mem[addr];
if (rden)
rdata_r <= ram[addr];
end
end else begin
`USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0];
`USE_FAST_BRAM reg [DATAW-1:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
if (wren)
mem[addr] <= din;
dout_r <= mem[addr];
ram[addr] <= wdata;
if (rden)
rdata_r <= ram[addr];
end
end
assign dout = dout_r;
assign rdata = rdata_r;
end else begin
`UNUSED_VAR (rden)
if (BYTEENW > 1) begin
`USE_FAST_BRAM reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
`USE_FAST_BRAM reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
mem[addr][i] <= din[i * 8 +: 8];
ram[addr][i] <= wdata[i * 8 +: 8];
end
end
assign dout = mem[addr];
assign rdata = ram[addr];
end else begin
`USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0];
`USE_FAST_BRAM reg [DATAW-1:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
if (wren)
mem[addr] <= din;
ram[addr] <= wdata;
end
assign dout = mem[addr];
assign rdata = ram[addr];
end
end
end else begin
if (OUTPUT_REG) begin
reg [DATAW-1:0] dout_r;
reg [DATAW-1:0] rdata_r;
if (BYTEENW > 1) begin
reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
mem[addr][i] <= din[i * 8 +: 8];
ram[addr][i] <= wdata[i * 8 +: 8];
end
dout_r <= mem[addr];
if (rden)
rdata_r <= ram[addr];
end
end else begin
reg [DATAW-1:0] mem [SIZE-1:0];
reg [DATAW-1:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
if (wren)
mem[addr] <= din;
dout_r <= mem[addr];
ram[addr] <= wdata;
if (rden)
rdata_r <= ram[addr];
end
end
assign dout = dout_r;
assign rdata = rdata_r;
end else begin
if (RWCHECK) begin
`UNUSED_VAR (rden)
if (NO_RWCHECK) begin
if (BYTEENW > 1) begin
reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
`NO_RW_RAM_CHECK reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
mem[addr][i] <= din[i * 8 +: 8];
ram[addr][i] <= wdata[i * 8 +: 8];
end
end
assign dout = mem[addr];
assign rdata = ram[addr];
end else begin
reg [DATAW-1:0] mem [SIZE-1:0];
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
if (wren)
mem[addr] <= din;
ram[addr] <= wdata;
end
assign dout = mem[addr];
assign rdata = ram[addr];
end
end else begin
if (BYTEENW > 1) begin
`NO_RW_RAM_CHECK reg [BYTEENW-1:0][7:0] mem [SIZE-1:0];
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
mem[addr][i] <= din[i * 8 +: 8];
ram[addr][i] <= wdata[i * 8 +: 8];
end
end
assign dout = mem[addr];
assign rdata = ram[addr];
end else begin
`NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0];
reg [DATAW-1:0] ram [SIZE-1:0];
if (INITZERO) begin
initial mem = '{default: 0};
end
`RAM_INITIALIZATION
always @(posedge clk) begin
if (wren)
mem[addr] <= din;
ram[addr] <= wdata;
end
assign dout = mem[addr];
assign rdata = ram[addr];
end
end
end
end
`else
if (OUTPUT_REG) begin
reg [DATAW-1:0] rdata_r;
if (BYTEENW > 1) begin
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
`RAM_INITIALIZATION
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
ram[addr][i] <= wdata[i * 8 +: 8];
end
if (rden)
rdata_r <= ram[addr];
end
end else begin
reg [DATAW-1:0] ram [SIZE-1:0];
`RAM_INITIALIZATION
always @(posedge clk) begin
if (wren)
ram[addr] <= wdata;
if (rden)
rdata_r <= ram[addr];
end
end
assign rdata = rdata_r;
end else begin
`UNUSED_VAR (rden)
if (BYTEENW > 1) begin
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
reg [DATAW-1:0] prev_data;
reg [ADDRW-1:0] prev_addr;
reg prev_write;
`RAM_INITIALIZATION
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
ram[addr][i] <= wdata[i * 8 +: 8];
end
prev_write <= (| wren);
prev_data <= ram[addr];
prev_addr <= addr;
end
if (LUTRAM || !NO_RWCHECK) begin
`UNUSED_VAR (prev_write)
`UNUSED_VAR (prev_data)
`UNUSED_VAR (prev_addr)
assign rdata = ram[addr];
end else begin
assign rdata = (prev_write && (prev_addr == addr)) ? prev_data : ram[addr];
end
end else begin
reg [DATAW-1:0] ram [SIZE-1:0];
reg [DATAW-1:0] prev_data;
reg [ADDRW-1:0] prev_addr;
reg prev_write;
`RAM_INITIALIZATION
always @(posedge clk) begin
if (wren)
ram[addr] <= wdata;
prev_write <= wren;
prev_data <= ram[addr];
prev_addr <= addr;
end
if (LUTRAM || !NO_RWCHECK) begin
`UNUSED_VAR (prev_write)
`UNUSED_VAR (prev_data)
`UNUSED_VAR (prev_addr)
assign rdata = ram[addr];
end else begin
assign rdata = (prev_write && (prev_addr == addr)) ? prev_data : ram[addr];
end
end
end
`endif
endmodule
`TRACING_ON

View file

@ -19,11 +19,12 @@ module VX_stream_arbiter #(
output wire [LANES-1:0][DATAW-1:0] data_out,
input wire [LANES-1:0] ready_out
);
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS);
if (NUM_REQS > 1) begin
wire sel_valid;
wire sel_ready;
wire [NUM_REQS-1:0] sel_1hot;
wire sel_valid;
wire sel_ready;
wire [LOG_NUM_REQS-1:0] sel_index;
wire [NUM_REQS-1:0] valid_in_any;
wire [LANES-1:0] ready_in_sel;
@ -50,8 +51,8 @@ module VX_stream_arbiter #(
.requests (valid_in_any),
.enable (sel_ready),
.grant_valid (sel_valid),
.grant_onehot (sel_1hot),
`UNUSED_PIN (grant_index)
.grant_index (sel_index),
`UNUSED_PIN (grant_onehot)
);
end else if (TYPE == "R") begin
VX_rr_arbiter #(
@ -63,8 +64,8 @@ module VX_stream_arbiter #(
.requests (valid_in_any),
.enable (sel_ready),
.grant_valid (sel_valid),
.grant_onehot (sel_1hot),
`UNUSED_PIN (grant_index)
.grant_index (sel_index),
`UNUSED_PIN (grant_onehot)
);
end else if (TYPE == "F") begin
VX_fair_arbiter #(
@ -76,8 +77,8 @@ module VX_stream_arbiter #(
.requests (valid_in_any),
.enable (sel_ready),
.grant_valid (sel_valid),
.grant_onehot (sel_1hot),
`UNUSED_PIN (grant_index)
.grant_index (sel_index),
`UNUSED_PIN (grant_onehot)
);
end else if (TYPE == "M") begin
VX_matrix_arbiter #(
@ -89,8 +90,8 @@ module VX_stream_arbiter #(
.requests (valid_in_any),
.enable (sel_ready),
.grant_valid (sel_valid),
.grant_onehot (sel_1hot),
`UNUSED_PIN (grant_index)
.grant_index (sel_index),
`UNUSED_PIN (grant_onehot)
);
end else begin
$error ("invalid parameter");
@ -105,32 +106,16 @@ module VX_stream_arbiter #(
for (genvar i = 0; i < NUM_REQS; i++) begin
assign valid_data_in[i] = {valid_in[i], data_in[i]};
end
VX_onehot_mux #(
.DATAW (LANES * (1 + DATAW)),
.N (NUM_REQS)
) data_in_mux (
.data_in (valid_data_in),
.sel_in (sel_1hot),
.data_out ({valid_in_sel, data_in_sel})
);
assign {valid_in_sel, data_in_sel} = valid_data_in[sel_index];
`UNUSED_VAR (sel_valid)
end else begin
VX_onehot_mux #(
.DATAW (DATAW),
.N (NUM_REQS)
) data_in_mux (
.data_in (data_in),
.sel_in (sel_1hot),
.data_out (data_in_sel)
);
assign data_in_sel = data_in[sel_index];
assign valid_in_sel = sel_valid;
end
for (genvar i = 0; i < NUM_REQS; i++) begin
assign ready_in[i] = ready_in_sel & {LANES{sel_1hot[i]}};
assign ready_in[i] = ready_in_sel & {LANES{(sel_index == LOG_NUM_REQS'(i))}};
end
for (genvar i = 0; i < LANES; ++i) begin

View file

@ -173,8 +173,8 @@
"issue_tmask":"`NUM_THREADS",
"issue_pc": 32,
"issue_ex_type":"`EX_BITS",
"issue_op_type":"`OP_BITS",
"issue_op_mod":"`MOD_BITS",
"issue_op_type":"`INST_OP_BITS",
"issue_op_mod":"`INST_MOD_BITS",
"issue_wb": 1,
"issue_rd":"`NR_BITS",
"issue_rs1":"`NR_BITS",

7
hw/syn/opae/fpga_prog.sh Executable file
View file

@ -0,0 +1,7 @@
#!/bin/bash
# FPGA programming
# first argument is the bitstream
echo "fpgaconf --bus 0xaf $1"
fpgaconf --bus 0xaf $1

View file

@ -23,12 +23,12 @@ set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
set_global_assignment -name USE_HIGH_SPEED_ADDER ON
set_global_assignment -name MUX_RESTRUCTURE ON
set_global_assignment -name ADV_NETLIST_OPT_SYNTH_WYSIWYG_REMAP ON
set_global_assignment -name PROGRAMMABLE_POWER_TECHNOLOGY_SETTING "FORCE ALL TILES WITH FAILING TIMING PATHS TO HIGH SPEED"
set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC ON
set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_RETIMING ON
#set_global_assignment -name USE_HIGH_SPEED_ADDER ON
#set_global_assignment -name MUX_RESTRUCTURE ON
#set_global_assignment -name ADV_NETLIST_OPT_SYNTH_WYSIWYG_REMAP ON
#set_global_assignment -name PROGRAMMABLE_POWER_TECHNOLOGY_SETTING "FORCE ALL TILES WITH FAILING TIMING PATHS TO HIGH SPEED"
#set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC ON
#set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_RETIMING ON
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100

View file

@ -21,7 +21,7 @@ VX_SRCS = kernel.c
#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I$(VORTEX_DRV_PATH)/include
CXXFLAGS += -I$(VORTEX_DRV_PATH)/include -I$(VORTEX_RT_PATH)/../hw
LDFLAGS += -L$(VORTEX_DRV_PATH)/stub -lvortex

View file

@ -3,6 +3,7 @@
#include <unistd.h>
#include <string.h>
#include <vortex.h>
#include <VX_config.h>
#include "testcases.h"
#include "common.h"

View file

@ -1,8 +1,9 @@
ALL_TESTS := $(wildcard *.hex)
D_TESTS := $(wildcard *ud-p-*.hex)
V_TESTS := $(wildcard *-v-*.hex)
EXCLUDED_TESTS := $(V_TESTS) rv32si-p-scall.hex rv32si-p-sbreak.hex rv32mi-p-breakpoint.hex rv32ud-p-fclass.hex rv32ua-p-amomax_w.hex rv32ua-p-amoxor_w.hex rv32ud-p-ldst.hex rv32ua-p-amoor_w.hex rv32mi-p-ma_addr.hex rv32ud-p-fdiv.hex rv32ud-p-fcmp.hex rv32mi-p-mcsr.hex rv32ua-p-amoswap_w.hex rv32mi-p-ma_fetch.hex rv32mi-p-csr.hex rv32ua-p-amoadd_w.hex rv32si-p-dirty.hex rv32ud-p-fcvt.hex rv32ui-p-fence_i.hex rv32si-p-csr.hex rv32mi-p-shamt.hex rv32ua-p-amomin_w.hex rv32ua-p-lrsc.hex rv32ud-p-fmadd.hex rv32ud-p-fadd.hex rv32si-p-wfi.hex rv32ua-p-amomaxu_w.hex rv32si-p-ma_fetch.hex rv32ud-p-fmin.hex rv32mi-p-illegal.hex rv32uc-p-rvc.hex rv32mi-p-sbreak.hex rv32ua-p-amominu_w.hex rv32ua-p-amoand_w.hex
EXCLUDED_TESTS := $(V_TESTS) $(D_TESTS) rv32si-p-scall.hex rv32si-p-sbreak.hex rv32mi-p-breakpoint.hex rv32ua-p-amomax_w.hex rv32ua-p-amoxor_w.hex rv32ua-p-amoor_w.hex rv32mi-p-ma_addr.hex rv32mi-p-mcsr.hex rv32ua-p-amoswap_w.hex rv32mi-p-ma_fetch.hex rv32mi-p-csr.hex rv32ua-p-amoadd_w.hex rv32si-p-dirty.hex rv32ui-p-fence_i.hex rv32si-p-csr.hex rv32mi-p-shamt.hex rv32ua-p-amomin_w.hex rv32ua-p-lrsc.hex rv32si-p-wfi.hex rv32ua-p-amomaxu_w.hex rv32si-p-ma_fetch.hex rv32mi-p-illegal.hex rv32uc-p-rvc.hex rv32mi-p-sbreak.hex rv32ua-p-amominu_w.hex rv32ua-p-amoand_w.hex
TESTS := $(filter-out $(EXCLUDED_TESTS), $(ALL_TESTS))