This commit is contained in:
Malik Aki Burton 2021-04-05 14:05:22 -04:00
commit bb576a8f40
39 changed files with 667 additions and 362 deletions

View file

@ -15,6 +15,9 @@ set -e
CONFIGS=-DEXT_M_DISABLE make -C hw/simulate
CONFIGS=-DEXT_F_DISABLE make -C hw/simulate
# disable shared memory
CONFIGS=-DSM_ENABLE=0 make -C hw/simulate
# Blackbox tests
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"

View file

@ -5,16 +5,19 @@ Description: Makes the build in the opae directory with the specified core
exists, a make clean command is ran before the build. Script waits
until the inteldev script or quartus program is finished running.
Usage: ./build.sh -c [1|2|4|8|16] [-p [y|n]]
Usage: ./build.sh -c [1|2|4|8|16] [-p perf] [-w wait]
Options:
-c
Core count (1, 2, 4, 8, or 16).
-p
Performance profiling enable (y or n). Changes the source file in the
Performance profiling enable. Changes the source file in the
opae directory to include/exclude "+define+PERF_ENABLE".
-w
Wait for the build to complete
_______________________________________________________________________________

View file

@ -1,10 +1,23 @@
#!/bin/bash
while getopts c:p: flag
BUILD_DIR=../../hw/syn/opae
perf=0
wait=0
while getopts c:pwh flag
do
case "${flag}" in
c) cores=${OPTARG};; #1, 2, 4, 8, 16
p) perf=${OPTARG};; #perf counters enable (y/n)
p) perf=1;; #perf counters enable
w) wait=1;; # wait for build to complete
h) echo "Usage: -c <cores> [-p perf] [-w wait] [-h help]"
exit 0
;;
\?)
echo "Invalid option: -$OPTARG" 1>&2
exit 1
;;
esac
done
@ -13,25 +26,22 @@ if [[ ! "$cores" =~ ^(1|2|4|8|16)$ ]]; then
exit 1
fi
cd ../../hw/syn/opae
cd ${BUILD_DIR}
sources_file="./sources_${cores}c.txt"
if [ ${perf:0:1} = "n" ]; then
if grep -v '^ *#' ${sources_file} | grep -Fxq '+define+SYNTHESIS'; then
sed -i 's/+define+PERF_ENABLE/#+define+PERF_ENABLE/' ${sources_file}
elif ! grep -Fxq '#+define+PERF_ENABLE' ${sources_file}; then
sed -i '1s/^/#+define+PERF_ENABLE\n/' ${sources_file}
fi
elif [ ${perf:0:1} = "y" ]; then
if [ ${perf} = 1 ]; then
if grep -Fxq '#+define+PERF_ENABLE' ${sources_file}; then
sed -i 's/+define+PERF_ENABLE/#+define+PERF_ENABLE/' ${sources_file}
elif ! grep -Fxq '+define+PERF_ENABLE' ${sources_file}; then
sed -i '1s/^/+define+PERF_ENABLE\n/' ${sources_file}
fi
else
echo 'Invalid parameter for argument -p (y/n expected)'
exit 1
if grep -v '^ *#' ${sources_file} | grep -Fxq '+define+SYNTHESIS'; then
sed -i 's/+define+PERF_ENABLE/#+define+PERF_ENABLE/' ${sources_file}
elif ! grep -Fxq '#+define+PERF_ENABLE' ${sources_file}; then
sed -i '1s/^/#+define+PERF_ENABLE\n/' ${sources_file}
fi
fi
if [ -d "./build_fpga_{$cores}c" ]; then
@ -39,12 +49,12 @@ if [ -d "./build_fpga_{$cores}c" ]; then
fi
make "fpga-${cores}c"
sleep 30
pids=($(pgrep -f "${OPAE_PLATFORM_ROOT}|quartus"))
for pid in ${pids[@]}; do
while kill -0 ${pid} 2> /dev/null; do
sleep 30
if [ ${wait} = 1 ]; then
sleep 30
pids=($(pgrep -f "${OPAE_PLATFORM_ROOT}|quartus"))
for pid in ${pids[@]}; do
while kill -0 ${pid} 2> /dev/null; do
sleep 30
done
done
done
fi

View file

@ -2,6 +2,6 @@
for ((i=1; i <= 16; i=i*2)); do
echo "Building ${i} core build..."
./build.sh -c ${i} -p y
./build.sh -c ${i} -p -w
echo "Done ${i} core build."
done

View file

@ -26,9 +26,9 @@ extern "C" {
void dpi_utof(int a, int frm, int* result, int* fflags);
void dpi_fclss(int a, int* result);
void dpi_fsgnj(int a, int* result);
void dpi_fsgnjn(int a, int* result);
void dpi_fsgnjx(int a, int* result);
void dpi_fsgnj(int a, int b, int* result);
void dpi_fsgnjn(int a, int b, int* result);
void dpi_fsgnjx(int a, int b, int* result);
void dpi_flt(int a, int b, int* result, int* fflags);
void dpi_fle(int a, int b, int* result, int* fflags);
@ -244,21 +244,53 @@ void dpi_fmax(int a, int b, int* result, int* fflags) {
}
void dpi_fclss(int a, int* result) {
// TODO
*result = 0;
int r = 0; // clear all bits
bool fsign = (a >> 31);
uint32_t expo = (a >> 23) & 0xFF;
uint32_t fraction = a & 0x7FFFFF;
if ((expo == 0) && (fraction == 0)) {
r = fsign ? (1 << 3) : (1 << 4); // +/- 0
} else if ((expo == 0) && (fraction != 0)) {
r = fsign ? (1 << 2) : (1 << 5); // +/- subnormal
} else if ((expo == 0xFF) && (fraction == 0)) {
r = fsign ? (1<<0) : (1<<7); // +/- infinity
} else if ((expo == 0xFF ) && (fraction != 0)) {
if (!fsign && (fraction == 0x00400000)) {
r = (1 << 9); // quiet NaN
} else {
r = (1 << 8); // signaling NaN
}
} else {
r = fsign ? (1 << 1) : (1 << 6); // +/- normal
}
*result = r;
}
void dpi_fsgnj(int a, int* result) {
// TODO
*result = 0;
void dpi_fsgnj(int a, int b, int* result) {
int sign = b & 0x80000000;
int r = sign | (a & 0x7FFFFFFF);
*result = r;
}
void dpi_fsgnjn(int a, int* result) {
// TODO
*result = 0;
void dpi_fsgnjn(int a, int b, int* result) {
int sign = ~b & 0x80000000;
int r = sign | (a & 0x7FFFFFFF);
*result = r;
}
void dpi_fsgnjx(int a, int* result) {
// TODO
*result = 0;
void dpi_fsgnjx(int a, int b, int* result) {
int sign1 = a & 0x80000000;
int sign2 = b & 0x80000000;
int r = (sign1 ^ sign2) | (a & 0x7FFFFFFF);
*result = r;
}

View file

@ -18,9 +18,9 @@ import "DPI-C" context function void dpi_itof(input int a, input bit[2:0] frm, o
import "DPI-C" context function void dpi_utof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fclss(input int a, output int result);
import "DPI-C" context function void dpi_fsgnj(input int a, output int result);
import "DPI-C" context function void dpi_fsgnjn(input int a, output int result);
import "DPI-C" context function void dpi_fsgnjx(input int a, output int result);
import "DPI-C" context function void dpi_fsgnj(input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsgnjn(input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsgnjx(input int a, input int b, output int result);
import "DPI-C" context function void dpi_flt(input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fle(input int a, input int b, output int result, output bit[4:0] fflags);

View file

@ -120,7 +120,7 @@ module VX_cluster #(
.DATA_WIDTH (32),
.ADDR_WIDTH (12),
.BUFFERED_REQ (1),
.BUFFERED_RSP (`NUM_CORES >= 4)
.BUFFERED_RSP (1)
) csr_arb (
.clk (clk),
.reset (reset),
@ -225,7 +225,7 @@ module VX_cluster #(
.DATA_WIDTH (`L2DRAM_LINE_WIDTH),
.TAG_IN_WIDTH (`XDRAM_TAG_WIDTH),
.TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH),
.BUFFERED_REQ (`NUM_CORES >= 4),
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
) dram_arb (
.clk (clk),

View file

@ -21,7 +21,7 @@ module VX_databus_arb (
localparam SMEM_ASHIFT = `CLOG2(`SHARED_MEM_BASE_ADDR_ALIGN);
localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE);
localparam REQ_ADDRW = 32 - REQ_ASHIFT;
localparam REQ_DATAW = REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
localparam REQ_DATAW = 1 + REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
//
@ -30,41 +30,42 @@ module VX_databus_arb (
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire cache_req_ready_in;
wire smem_req_ready_in;
wire cache_req_valid_out, cache_req_ready_out;
wire is_smem_addr_in, is_smem_addr_out;
// select shared memory bus
wire is_smem_addr = core_req_if.valid[i] && `SM_ENABLE
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] >= (32-SMEM_ASHIFT)'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> SMEM_ASHIFT))
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT));
assign is_smem_addr_in = core_req_if.valid[i] && `SM_ENABLE
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] >= (32-SMEM_ASHIFT)'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> SMEM_ASHIFT))
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT));
VX_skid_buffer #(
.DATAW (REQ_DATAW)
) cache_out_buffer (
) out_buffer (
.clk (clk),
.reset (reset),
.valid_in (core_req_if.valid[i] && !is_smem_addr),
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
.ready_in (cache_req_ready_in),
.valid_out (cache_req_if.valid[i]),
.data_out ({cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
.ready_out (cache_req_if.ready[i])
.valid_in (core_req_if.valid[i]),
.data_in ({is_smem_addr_in, core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
.ready_in (core_req_if.ready[i]),
.valid_out (cache_req_valid_out),
.data_out ({is_smem_addr_out, cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
.ready_out (cache_req_ready_out)
);
VX_skid_buffer #(
.DATAW (REQ_DATAW)
) smem_out_buffer (
.clk (clk),
.reset (reset),
.valid_in (core_req_if.valid[i] && is_smem_addr),
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
.ready_in (smem_req_ready_in),
.valid_out (smem_req_if.valid[i]),
.data_out ({smem_req_if.addr[i], smem_req_if.rw[i], smem_req_if.byteen[i], smem_req_if.data[i], smem_req_if.tag[i]}),
.ready_out (smem_req_if.ready[i])
);
assign core_req_if.ready[i] = is_smem_addr ? smem_req_ready_in : cache_req_ready_in;
if (`SM_ENABLE ) begin
assign cache_req_if.valid[i] = cache_req_valid_out && ~is_smem_addr_out;
assign smem_req_if.valid[i] = cache_req_valid_out && is_smem_addr_out;
assign cache_req_ready_out = is_smem_addr_out ? smem_req_if.ready[i] : cache_req_if.ready[i];
assign smem_req_if.addr[i] = cache_req_if.addr[i];
assign smem_req_if.rw[i] = cache_req_if.rw[i];
assign smem_req_if.byteen[i] = cache_req_if.byteen[i];
assign smem_req_if.data[i] = cache_req_if.data[i];
assign smem_req_if.tag[i] = cache_req_if.tag[i];
end else begin
`UNUSED_VAR (is_smem_addr_out)
assign cache_req_if.valid[i] = cache_req_valid_out;
assign cache_req_ready_out = cache_req_if.ready[i];
end
end
//

View file

@ -1,6 +1,12 @@
`include "VX_define.vh"
`include "VX_print_instr.vh"
`ifdef EXT_F_ENABLE
`define USED_REGS(f,r) used_regs[{f,r}] = 1
`else
`define USED_REGS(f,r) used_regs[r] = 1
`endif
module VX_decode #(
parameter CORE_ID = 0
) (
@ -22,10 +28,12 @@ module VX_decode #(
reg [`EX_BITS-1:0] ex_type;
reg [`OP_BITS-1:0] op_type;
reg [`MOD_BITS-1:0] op_mod;
reg [31:0] imm;
reg use_rd, use_rs1, use_rs2, use_rs3, use_PC, use_imm;
reg [4:0] rd_r, rs1_r, rs2_r, rs3_r;
reg [31:0] imm;
reg use_rd, use_PC, use_imm;
reg rd_fp, rs1_fp, rs2_fp;
reg is_join, is_wstall;
reg [`NUM_REGS-1:0] used_regs;
wire [31:0] instr = ifetch_rsp_if.instr;
wire [6:0] opcode = instr[6:0];
@ -45,21 +53,23 @@ module VX_decode #(
always @(*) begin
ex_type = `EX_NOP;
ex_type = 0;
op_type = 'x;
op_mod = 'x;
imm = 'x;
use_rd = 0;
use_rs1 = 0;
use_rs2 = 0;
use_rs3 = 0;
use_PC = 0;
use_imm = 0;
rd_fp = 0;
rs1_fp = 0;
rs2_fp = 0;
is_join = 0;
is_wstall = 0;
is_wstall = 0;
used_regs = 0;
rd_r = rd;
rs1_r = rs1;
rs2_r = rs2;
rs3_r = rs3;
case (opcode)
`INST_I: begin
@ -78,8 +88,9 @@ module VX_decode #(
op_mod = 0;
imm = {{20{alu_imm[11]}}, alu_imm};
use_rd = 1;
use_rs1 = 1;
use_imm = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b0, rs1);
end
`INST_R: begin
ex_type = `EX_ALU;
@ -113,18 +124,21 @@ module VX_decode #(
endcase
op_mod = 0;
end
use_rd = 1;
use_rs1 = 1;
use_rs2 = 1;
use_rd = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b0, rs1);
`USED_REGS (1'b0, rs2);
end
`INST_LUI: begin
ex_type = `EX_ALU;
op_type = `OP_BITS'(`ALU_LUI);
op_mod = 0;
op_mod = 0;
rs1_r = 0;
imm = {upper_imm, 12'(0)};
use_rd = 1;
use_rs1 = 1;
use_imm = 1;
use_imm = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b0, 5'b0);
end
`INST_AUIPC: begin
ex_type = `EX_ALU;
@ -134,6 +148,7 @@ module VX_decode #(
use_rd = 1;
use_PC = 1;
use_imm = 1;
`USED_REGS (1'b0, rd);
end
`INST_JAL: begin
ex_type = `EX_ALU;
@ -144,6 +159,7 @@ module VX_decode #(
use_PC = 1;
use_imm = 1;
is_wstall = 1;
`USED_REGS (1'b0, rd);
end
`INST_JALR: begin
ex_type = `EX_ALU;
@ -151,9 +167,10 @@ module VX_decode #(
op_mod = 1;
imm = {{20{jalr_imm[11]}}, jalr_imm};
use_rd = 1;
use_rs1 = 1;
use_imm = 1;
is_wstall = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b0, rs1);
end
`INST_B: begin
ex_type = `EX_ALU;
@ -168,11 +185,11 @@ module VX_decode #(
endcase
op_mod = 1;
imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
use_rs1 = 1;
use_rs2 = 1;
use_PC = 1;
use_imm = 1;
is_wstall = 1;
`USED_REGS (1'b0, rs1);
`USED_REGS (1'b0, rs2);
end
`INST_SYS : begin
if (func3 == 0) begin
@ -190,6 +207,7 @@ module VX_decode #(
use_rd = 1;
use_PC = 1;
use_imm = 1;
`USED_REGS (1'b0, rd);
end else begin
ex_type = `EX_CSR;
case (func3[1:0])
@ -201,8 +219,10 @@ module VX_decode #(
endcase
imm = 32'(u_12);
use_rd = 1;
use_rs1 = !func3[2];
use_imm = func3[2];
`USED_REGS (1'b0, rd);
if (!func3[2])
`USED_REGS (1'b0, rs1);
end
end
`ifdef EXT_F_ENABLE
@ -212,10 +232,11 @@ module VX_decode #(
ex_type = `EX_LSU;
op_type = `OP_BITS'({1'b0, func3});
imm = {{20{u_12[11]}}, u_12};
use_rd = 1;
use_rs1 = 1;
`ifdef EXT_F_ENABLE
rd_fp = (opcode == `INST_FL);
use_rd = 1;
`USED_REGS (1'b0, rs1);
`USED_REGS ((opcode == `INST_FL), rd);
`ifdef EXT_F_ENABLE
rd_fp = (opcode == `INST_FL);
`endif
end
`ifdef EXT_F_ENABLE
@ -225,8 +246,8 @@ module VX_decode #(
ex_type = `EX_LSU;
op_type = `OP_BITS'({1'b1, func3});
imm = {{20{func7[6]}}, func7, rd};
use_rs1 = 1;
use_rs2 = 1;
`USED_REGS (1'b0, rs1);
`USED_REGS ((opcode == `INST_FS), rs2);
`ifdef EXT_F_ENABLE
rs2_fp = (opcode == `INST_FS);
`endif
@ -240,17 +261,18 @@ module VX_decode #(
op_type = `OP_BITS'(opcode[3:0]);
op_mod = func3;
use_rd = 1;
use_rs1 = 1;
use_rs2 = 1;
use_rs3 = 1;
rd_fp = 1;
rs1_fp = 1;
rs2_fp = 1;
rs2_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
`USED_REGS (1'b1, rs3);
end
`INST_FCI: begin
ex_type = `EX_FPU;
op_mod = func3;
use_rd = 1;
use_rd = 1;
case (func7)
7'h00, // FADD
7'h04, // FSUB
@ -258,55 +280,61 @@ module VX_decode #(
7'h0C: // FDIV
begin
op_type = `OP_BITS'(func7[3:0]);
use_rd = 1;
use_rs1 = 1;
use_rs2 = 1;
rd_fp = 1;
rs1_fp = 1;
rs2_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
end
7'h2C: begin
op_type = `OP_BITS'(`FPU_SQRT);
use_rs1 = 1;
rd_fp = 1;
rs1_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
end
7'h50: begin
op_type = `OP_BITS'(`FPU_CMP);
use_rs1 = 1;
use_rs2 = 1;
rs1_fp = 1;
rs2_fp = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
end
7'h60: begin
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTWUS) : `OP_BITS'(`FPU_CVTWS);
use_rs1 = 1;
rs1_fp = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b1, rs1);
end
7'h68: begin
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTSWU) : `OP_BITS'(`FPU_CVTSW);
use_rs1 = 1;
rd_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b0, rs1);
end
7'h10: begin
// FSGNJ=0, FSGNJN=1, FSGNJX=2
op_type = `OP_BITS'(`FPU_MISC);
op_mod = {1'b0, func3[1:0]};
use_rs1 = 1;
use_rs2 = 1;
rd_fp = 1;
rs1_fp = 1;
rs2_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
end
7'h14: begin
// FMIN=3, FMAX=4
op_type = `OP_BITS'(`FPU_MISC);
op_mod = func3[0] ? 4 : 3;
use_rs1 = 1;
use_rs2 = 1;
rd_fp = 1;
rs1_fp = 1;
rs2_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
end
7'h70: begin
if (func3[0]) begin
@ -316,15 +344,17 @@ module VX_decode #(
// FMV.X.W=5
op_type = `OP_BITS'(`FPU_MISC);
op_mod = 5;
end
use_rs1 = 1;
rs1_fp = 1;
end
rs1_fp = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b1, rs1);
end
7'h78: begin
// FMV.W.X=6
op_type = `OP_BITS'(`FPU_MISC);
op_mod = 6;
rd_fp = 1;
`USED_REGS (1'b1, rd);
end
default:;
endcase
@ -335,28 +365,28 @@ module VX_decode #(
case (func3)
3'h0: begin
op_type = `OP_BITS'(`GPU_TMC);
use_rs1 = 1;
is_wstall = 1;
`USED_REGS (1'b0, rs1);
end
3'h1: begin
op_type = `OP_BITS'(`GPU_WSPAWN);
use_rs1 = 1;
use_rs2 = 1;
`USED_REGS (1'b0, rs1);
`USED_REGS (1'b0, rs2);
end
3'h2: begin
op_type = `OP_BITS'(`GPU_SPLIT);
use_rs1 = 1;
is_wstall = 1;
`USED_REGS (1'b0, rs1);
end
3'h3: begin
op_type = `OP_BITS'(`GPU_JOIN);
is_join = 1;
end
3'h4: begin
op_type = `OP_BITS'(`GPU_BAR);
use_rs1 = 1;
use_rs2 = 1;
op_type = `OP_BITS'(`GPU_BAR);
is_wstall = 1;
`USED_REGS (1'b0, rs1);
`USED_REGS (1'b0, rs2);
end
default:;
endcase
@ -366,10 +396,7 @@ module VX_decode #(
end
// disable write to integer register r0
wire use_rd_qual = use_rd && (rd_fp || (rd != 0));
// EX_ALU needs rs1=0 for LUI operation
wire [4:0] rs1_qual = (opcode == `INST_LUI) ? 5'h0 : rs1;
wire wb = use_rd && (rd_fp || (rd_r != 0));
assign decode_if.valid = ifetch_rsp_if.valid;
assign decode_if.wid = ifetch_rsp_if.wid;
@ -378,31 +405,27 @@ module VX_decode #(
assign decode_if.ex_type = ex_type;
assign decode_if.op_type = op_type;
assign decode_if.op_mod = op_mod;
assign decode_if.wb = use_rd_qual;
assign decode_if.wb = wb;
`ifdef EXT_F_ENABLE
assign decode_if.rd = {rd_fp, rd};
assign decode_if.rs1 = {rs1_fp, rs1_qual};
assign decode_if.rs2 = {rs2_fp, rs2};
assign decode_if.rs3 = {1'b1, rs3};
`else
`UNUSED_VAR (rd_fp)
`UNUSED_VAR (rs1_fp)
`UNUSED_VAR (rs2_fp)
assign decode_if.rd = rd;
assign decode_if.rs1 = rs1_qual;
assign decode_if.rs2 = rs2;
assign decode_if.rs3 = rs3;
`endif
`ifdef EXT_F_ENABLE
assign decode_if.rd = {rd_fp, rd_r};
assign decode_if.rs1 = {rs1_fp, rs1_r};
assign decode_if.rs2 = {rs2_fp, rs2_r};
assign decode_if.rs3 = {1'b1, rs3_r};
`else
`UNUSED_VAR (rd_fp)
`UNUSED_VAR (rs1_fp)
`UNUSED_VAR (rs2_fp)
assign decode_if.rd = rd_r;
assign decode_if.rs1 = rs1_r;
assign decode_if.rs2 = rs2_r;
assign decode_if.rs3 = rs3_r;
`endif
assign decode_if.imm = imm;
assign decode_if.use_PC = use_PC;
assign decode_if.use_imm = use_imm;
assign decode_if.used_regs = (`NUM_REGS'(use_rd) << decode_if.rd)
| (`NUM_REGS'(use_rs1) << decode_if.rs1)
| (`NUM_REGS'(use_rs2) << decode_if.rs2)
| (`NUM_REGS'(use_rs3) << decode_if.rs3);
assign decode_if.imm = imm;
assign decode_if.use_PC = use_PC;
assign decode_if.use_imm = use_imm;
assign decode_if.used_regs = used_regs;
///////////////////////////////////////////////////////////////////////////

View file

@ -54,7 +54,8 @@ module VX_fpu_unit #(
.write_data ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.rd, fpu_req_if.wb}),
.read_data ({rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}),
.release_slot (fpuq_pop),
.full (fpuq_full)
.full (fpuq_full),
`UNUSED_PIN (empty)
);
// can accept new request?

View file

@ -82,8 +82,7 @@ module VX_ibuffer #(
if (writing && is_slot0) begin
q_data_out[i] <= q_data_in;
end
if (pop) begin
end else if (pop) begin
q_data_out[i] <= q_data_prev[i];
end
end

View file

@ -38,7 +38,8 @@ module VX_instr_demux (
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32))
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
.BUFFERED (1)
) alu_buffer (
.clk (clk),
.reset (reset),
@ -55,7 +56,8 @@ module VX_instr_demux (
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32))
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
.BUFFERED (1)
) lsu_buffer (
.clk (clk),
.reset (reset),
@ -72,7 +74,8 @@ module VX_instr_demux (
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32)
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
.BUFFERED (1)
) csr_buffer (
.clk (clk),
.reset (reset),
@ -90,7 +93,8 @@ module VX_instr_demux (
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32))
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
.BUFFERED (1)
) fpu_buffer (
.clk (clk),
.reset (reset),
@ -111,7 +115,8 @@ module VX_instr_demux (
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32))
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
.BUFFERED (1)
) gpu_buffer (
.clk (clk),
.reset (reset),

View file

@ -183,19 +183,44 @@ module VX_issue #(
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (alu_req_if.valid && alu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data);
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=",
$time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd);
`PRINT_ARRAY1D(alu_req_if.rs1_data, `NUM_THREADS);
$write(", rs2_data=");
`PRINT_ARRAY1D(alu_req_if.rs2_data, `NUM_THREADS);
$write("\n");
end
if (lsu_req_if.valid && lsu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, offset=%0h, addr=",
$time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.offset);
`PRINT_ARRAY1D(lsu_req_if.base_addr, `NUM_THREADS);
$write(", data=");
`PRINT_ARRAY1D(lsu_req_if.store_data, `NUM_THREADS);
$write("\n");
end
if (csr_req_if.valid && csr_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr, csr_req_if.rs1_data);
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=",
$time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr);
`PRINT_ARRAY1D(csr_req_if.rs1_data, `NUM_THREADS);
$write("\n");
end
if (fpu_req_if.valid && fpu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=",
$time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd);
`PRINT_ARRAY1D(fpu_req_if.rs1_data, `NUM_THREADS);
$write(", rs2_data=");
`PRINT_ARRAY1D(fpu_req_if.rs2_data, `NUM_THREADS);
$write(", rs3_data=");
`PRINT_ARRAY1D(fpu_req_if.rs3_data, `NUM_THREADS);
$write("\n");
end
if (gpu_req_if.valid && gpu_req_if.ready) begin
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=",
$time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd);
`PRINT_ARRAY1D(gpu_req_if.rs1_data, `NUM_THREADS);
$write(", rs2_data=");
`PRINT_ARRAY1D(gpu_req_if.rs2_data, `NUM_THREADS);
$write("\n");
end
end
`endif

View file

@ -75,10 +75,11 @@ module VX_lsu_unit #(
`UNUSED_VAR (rsp_type)
reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] rsp_rem_mask;
reg [`NUM_THREADS-1:0] rsp_rem_mask_n;
wire [`NUM_THREADS-1:0] rsp_rem_mask_n;
wire [`NUM_THREADS-1:0] rsp_tmask;
reg [`NUM_THREADS-1:0] req_sent_mask;
wire req_sent_all;
wire sent_all_ready;
wire [`DCORE_TAG_ID_BITS-1:0] mbuf_waddr, mbuf_raddr;
wire mbuf_full;
@ -88,18 +89,20 @@ module VX_lsu_unit #(
assign req_offset[i] = req_addr[i][1:0];
end
wire mbuf_push = (| (dcache_req_if.valid & dcache_req_if.ready))
wire [`NUM_THREADS-1:0] dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready;
wire dcache_rsp_fire = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
wire mbuf_push = (| dcache_req_fire)
&& (0 == req_sent_mask) // first submission only
&& req_wb; // loads only
wire mbuf_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
wire mbuf_pop = mbuf_pop_part && (rsp_rem_mask_n == 0 || rsp_is_dup);
wire mbuf_pop = dcache_rsp_fire && (0 == rsp_rem_mask_n);
assign mbuf_raddr = dcache_rsp_if.tag[`DCORE_TAG_ID_BITS-1:0];
VX_index_buffer #(
.DATAW (`NW_BITS + 32 + `NR_BITS + 1 + `LSU_BITS + (`NUM_THREADS * 2) + 1),
.DATAW (`NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `LSU_BITS + (`NUM_THREADS * 2) + 1),
.SIZE (`LSUQ_SIZE)
) req_metadata (
.clk (clk),
@ -107,26 +110,34 @@ module VX_lsu_unit #(
.write_addr (mbuf_waddr),
.acquire_slot (mbuf_push),
.read_addr (mbuf_raddr),
.write_data ({req_wid, req_pc, req_rd, req_wb, req_type, req_offset, req_is_dup}),
.read_data ({rsp_wid, rsp_pc, rsp_rd, rsp_wb, rsp_type, rsp_offset, rsp_is_dup}),
.write_data ({req_wid, req_pc, req_tmask, req_rd, req_wb, req_type, req_offset, req_is_dup}),
.read_data ({rsp_wid, rsp_pc, rsp_tmask, rsp_rd, rsp_wb, rsp_type, rsp_offset, rsp_is_dup}),
.release_addr (mbuf_raddr),
.release_slot (mbuf_pop),
.full (mbuf_full)
.full (mbuf_full),
`UNUSED_PIN (empty)
);
assign req_sent_all = (&(dcache_req_if.ready | req_sent_mask | ~req_tmask))
|| (req_is_dup && dcache_req_if.ready[0]);
always @(posedge clk) begin
if (mbuf_push) begin
pending_tags[mbuf_waddr] <= req_tag;
end
end
assign sent_all_ready = &(dcache_req_if.ready | req_sent_mask);
wire [`NUM_THREADS-1:0] req_sent_dup = {{(`NUM_THREADS-1){dcache_req_fire[0] && req_is_dup}}, 1'b0};
always @(posedge clk) begin
if (reset) begin
req_sent_mask <= 0;
end else begin
if (req_sent_all)
if (sent_all_ready)
req_sent_mask <= 0;
else
req_sent_mask <= req_sent_mask | (dcache_req_if.valid & dcache_req_if.ready);
req_sent_mask <= req_sent_mask | dcache_req_fire | req_sent_dup;
end
end
end
// need to hold the acquired tag index until the full request is submitted
reg [`DCORE_TAG_ID_BITS-1:0] req_tag_hold;
@ -136,20 +147,21 @@ module VX_lsu_unit #(
req_tag_hold <= mbuf_waddr;
end
wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.valid;
always @(posedge clk) begin
if (mbuf_push) begin
rsp_rem_mask[mbuf_waddr] <= req_tmask;
pending_tags[mbuf_waddr] <= req_tag;
rsp_rem_mask[mbuf_waddr] <= req_tmask_dup;
end
if (mbuf_pop_part) begin
if (dcache_rsp_fire) begin
rsp_rem_mask[mbuf_raddr] <= rsp_rem_mask_n;
end
end
wire req_ready_dep = (req_wb && ~mbuf_full) || (~req_wb && st_commit_if.ready);
wire [`NUM_THREADS-1:0] dup_mask = {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
wire req_ready_dep = (req_wb && ~mbuf_full)
|| (~req_wb && st_commit_if.ready);
// DCache Request
@ -181,23 +193,23 @@ module VX_lsu_unit #(
end
end
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_ready_dep}} & req_tmask & dup_mask & ~req_sent_mask;
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_ready_dep}} & req_tmask_dup & ~req_sent_mask;
assign dcache_req_if.rw = {`NUM_THREADS{~req_wb}};
assign dcache_req_if.addr = mem_req_addr;
assign dcache_req_if.byteen = mem_req_byteen;
assign dcache_req_if.data = mem_req_data;
`ifdef DBG_CACHE_REQ_INFO
assign dcache_req_if.tag = {`NUM_THREADS{{req_pc, req_wid, req_tag}}};
assign dcache_req_if.tag = {`NUM_THREADS{req_pc, req_wid, req_tag}};
`else
assign dcache_req_if.tag = {`NUM_THREADS{req_tag}};
`endif
assign ready_in = req_ready_dep && req_sent_all;
assign ready_in = req_ready_dep && sent_all_ready;
// send store commit
wire is_store_rsp = req_valid && ~req_wb && req_sent_all;
wire is_store_rsp = req_valid && ~req_wb && sent_all_ready;
assign st_commit_if.valid = is_store_rsp;
assign st_commit_if.wid = req_wid;
@ -211,7 +223,7 @@ module VX_lsu_unit #(
// load response formatting
reg [`NUM_THREADS-1:0][31:0] rsp_data;
wire [`NUM_THREADS-1:0] rsp_tmask;
wire [`NUM_THREADS-1:0] rsp_tmask_qual;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [31:0] src_data = (i == 0 || rsp_is_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i];
@ -234,7 +246,7 @@ module VX_lsu_unit #(
end
end
assign rsp_tmask = rsp_is_dup ? rsp_rem_mask[mbuf_raddr] : dcache_rsp_if.valid;
assign rsp_tmask_qual = rsp_is_dup ? rsp_tmask : dcache_rsp_if.valid;
// send load commit
@ -247,15 +259,15 @@ module VX_lsu_unit #(
.clk (clk),
.reset (reset),
.enable (!load_rsp_stall),
.data_in ({(| dcache_rsp_if.valid), rsp_wid, rsp_tmask, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
.data_in ({(| dcache_rsp_if.valid), rsp_wid, rsp_tmask_qual, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
);
// Can accept new cache response?
assign dcache_rsp_if.ready = ~load_rsp_stall;
// scope registration
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & dcache_req_if.ready);
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_fire);
`SCOPE_ASSIGN (dcache_req_wid, req_wid);
`SCOPE_ASSIGN (dcache_req_pc, req_pc);
`SCOPE_ASSIGN (dcache_req_addr, req_addr);
@ -269,15 +281,15 @@ module VX_lsu_unit #(
`ifdef DBG_PRINT_CORE_DCACHE
always @(posedge clk) begin
if ((| (dcache_req_if.valid & dcache_req_if.ready))) begin
if ((| dcache_req_fire)) begin
if ((| dcache_req_if.rw))
$display("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, data=%0h",
$time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_addr, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data);
$time, CORE_ID, req_wid, req_pc, dcache_req_fire, req_addr, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data);
else
$display("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, rd=%0d, is_dup=%b",
$time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_addr, dcache_req_if.tag, dcache_req_if.byteen, req_rd, req_is_dup);
$time, CORE_ID, req_wid, req_pc, dcache_req_fire, req_addr, dcache_req_if.tag, dcache_req_if.byteen, req_rd, req_is_dup);
end
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
if (dcache_rsp_fire) begin
$display("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h, is_dup=%b",
$time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data, rsp_is_dup);
end
@ -291,4 +303,4 @@ module VX_lsu_unit #(
end
`endif
endmodule
endmodule

View file

@ -81,4 +81,25 @@
`define LTRIM(x,s) x[s-1:0]
`define PRINT_ARRAY1D(a, m) \
$write("{"); \
for (integer i = (m-1); i >= 0; --i) begin \
if (i != (m-1)) $write(", "); \
$write("0x%0h", a[i]); \
end \
$write("}"); \
`define PRINT_ARRAY2D(a, m, n) \
$write("{"); \
for (integer i = n-1; i >= 0; --i) begin \
if (i != (n-1)) $write(", "); \
$write("{"); \
for (integer j = (m-1); j >= 0; --j) begin \
if (j != (m-1)) $write(", "); \
$write("0x%0h", a[i][j]); \
end \
$write("}"); \
end \
$write("}")
`endif

View file

@ -121,7 +121,7 @@ module Vortex (
.NUM_REQS (`NUM_CLUSTERS),
.DATA_WIDTH (32),
.ADDR_WIDTH (12),
.BUFFERED_REQ (`NUM_CLUSTERS >= 4),
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
) csr_arb (
.clk (clk),
@ -228,7 +228,7 @@ module Vortex (
.TAG_IN_WIDTH (`L2DRAM_TAG_WIDTH),
.TAG_OUT_WIDTH (`L3DRAM_TAG_WIDTH),
.BUFFERED_REQ (1),
.BUFFERED_RSP (`NUM_CLUSTERS >= 4)
.BUFFERED_RSP (1)
) dram_arb (
.clk (clk),
.reset (reset),

View file

@ -1,14 +1,13 @@
`include "VX_define.vh"
`ifndef NOPAE
import local_mem_cfg_pkg::*;
`include "afu_json_info.vh"
`else
`include "vortex_afu.vh"
`endif
/* verilator lint_off IMPORTSTAR */
import ccip_if_pkg::*;
import local_mem_cfg_pkg::*;
/* verilator lint_on IMPORTSTAR */
`endif
module vortex_afu #(
parameter NUM_LOCAL_MEM_BANKS = 2

View file

@ -487,7 +487,8 @@ module VX_bank #(
end
VX_skid_buffer #(
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS)
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
.BUFFERED (NUM_BANKS == 1)
) core_rsp_req (
.clk (clk),
.reset (reset),

View file

@ -168,8 +168,7 @@ module VX_cache #(
.NUM_BANKS (NUM_BANKS)
) flush_ctrl (
.clk (clk),
.reset (reset),
.flush (flush),
.reset (reset || flush),
.addr_out (flush_addr),
.valid_out (flush_enable)
);

View file

@ -98,7 +98,8 @@ module VX_cache_core_rsp_merge #(
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
VX_skid_buffer #(
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH))
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)),
.BUFFERED (1)
) pipe_reg (
.clk (clk),
.reset (reset),
@ -146,7 +147,8 @@ module VX_cache_core_rsp_merge #(
for (genvar i = 0; i < NUM_REQS; i++) begin
VX_skid_buffer #(
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH)
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH),
.BUFFERED (1)
) pipe_reg (
.clk (clk),
.reset (reset),

View file

@ -9,8 +9,7 @@ module VX_flush_ctrl #(
parameter NUM_BANKS = 1
) (
input wire clk,
input wire reset,
input wire flush,
input wire reset,
output wire [`LINE_SELECT_BITS-1:0] addr_out,
output wire valid_out
);
@ -18,7 +17,7 @@ module VX_flush_ctrl #(
reg [`LINE_SELECT_BITS-1:0] flush_ctr;
always @(posedge clk) begin
if (reset || flush) begin
if (reset) begin
flush_enable <= 1;
flush_ctr <= 0;
end else begin

View file

@ -3,10 +3,6 @@
/// Modified port of cast module from fpnew Libray
/// reference: https://github.com/pulp-platform/fpnew
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_cvt #(
parameter TAGW = 1,
parameter LANES = 1
@ -73,19 +69,19 @@ module VX_fp_cvt #(
);
end
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent;
wire [LANES-1:0] input_sign;
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit
wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent;
wire [LANES-1:0] input_sign;
for (genvar i = 0; i < LANES; ++i) begin
wire [INT_MAN_WIDTH-1:0] int_mantissa;
wire [INT_MAN_WIDTH-1:0] fmt_mantissa;
wire fmt_sign = dataa[i][31];
wire int_sign = dataa[i][31] & is_signed;
assign int_mantissa = int_sign ? $unsigned(-dataa[i]) : dataa[i];
assign int_mantissa = int_sign ? (-dataa[i]) : dataa[i];
assign fmt_mantissa = INT_MAN_WIDTH'({in_a_type[i].is_normal, dataa[i][MAN_BITS-1:0]});
assign fmt_exponent[i] = $signed({1'b0, dataa[i][MAN_BITS+EXP_BITS-1:MAN_BITS]});
assign fmt_exponent[i] = {1'b0, dataa[i][MAN_BITS+EXP_BITS-1:MAN_BITS]};
assign encoded_mant[i] = is_itof ? int_mantissa : fmt_mantissa;
assign input_sign[i] = is_itof ? int_sign : fmt_sign;
end
@ -115,7 +111,7 @@ module VX_fp_cvt #(
wire [2:0] rnd_mode_s0;
fp_type_t [LANES-1:0] in_a_type_s0;
wire [LANES-1:0] input_sign_s0;
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0;
wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0;
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant_s0;
wire [LANES-1:0][LZC_RESULT_WIDTH-1:0] renorm_shamt_s0;
wire [LANES-1:0] mant_is_zero_s0;
@ -135,38 +131,93 @@ module VX_fp_cvt #(
// Normalization
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa
wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent
wire [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
for (genvar i = 0; i < LANES; ++i) begin
`IGNORE_WARNINGS_BEGIN
// Input mantissa needs to be normalized
wire signed [INT_EXP_WIDTH-1:0] fp_input_exp;
wire signed [INT_EXP_WIDTH-1:0] int_input_exp;
wire [LZC_RESULT_WIDTH:0] renorm_shamt_sgn;
// signed form for calculations
assign renorm_shamt_sgn = $signed({1'b0, renorm_shamt_s0[i]});
wire [INT_EXP_WIDTH-1:0] fp_input_exp;
wire [INT_EXP_WIDTH-1:0] int_input_exp;
// Realign input mantissa, append zeroes if destination is wider
assign input_mant[i] = encoded_mant_s0[i] << renorm_shamt_s0[i];
// Unbias exponent and compensate for shift
assign fp_input_exp = $signed(fmt_exponent_s0[i] +
(($signed({1'b0, in_a_type_s0[i].is_subnormal}) +
$signed(FMT_SHIFT_COMPENSATION - EXP_BIAS)) -
renorm_shamt_sgn));
assign fp_input_exp = fmt_exponent_s0[i] +
{1'b0, in_a_type_s0[i].is_subnormal} +
(FMT_SHIFT_COMPENSATION - EXP_BIAS) -
{1'b0, renorm_shamt_s0[i]};
assign int_input_exp = $signed(INT_MAN_WIDTH - 1 - renorm_shamt_sgn);
assign int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]};
assign input_exp[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
assign input_exp[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
// Rebias the exponent
assign destination_exp[i] = input_exp[i] + $signed(EXP_BIAS);
assign destination_exp[i] = input_exp[i] + EXP_BIAS;
`IGNORE_WARNINGS_END
end
// Perform adjustments to mantissa and exponent
wire [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant_s0;
wire [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt_s0;
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s0;
wire [LANES-1:0] of_before_round_s0;
for (genvar i = 0; i < LANES; ++i) begin
reg [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
reg [SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
reg [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
reg of_before_round;
always @(*) begin
`IGNORE_WARNINGS_BEGIN
// Default assignment
final_exp = destination_exp[i]; // take exponent as is, only look at lower bits
preshift_mant = {input_mant[i], 33'b0}; // Place mantissa to the left of the shifter
denorm_shamt = 0; // right of mantissa
of_before_round = 1'b0;
// Handle INT casts
if (is_itof_s0) begin
if ($signed(destination_exp[i]) >= $signed(2**EXP_BITS-1)) begin
// Overflow or infinities (for proper rounding)
final_exp = (2**EXP_BITS-2); // largest normal value
preshift_mant = ~0; // largest normal value and RS bits set
of_before_round = 1'b1;
end else if ($signed(destination_exp[i]) < $signed(-MAN_BITS)) begin
// Limit the shift to retain sticky bits
final_exp = 0; // denormal result
denorm_shamt = denorm_shamt + (2 + MAN_BITS); // to sticky
end else if ($signed(destination_exp[i]) < $signed(1)) begin
// Denormalize underflowing values
final_exp = 0; // denormal result
denorm_shamt = denorm_shamt + 1 - destination_exp[i]; // adjust right shifting
end
end else begin
if ($signed(input_exp[i]) >= $signed((MAX_INT_WIDTH-1) + unsigned_s0)) begin
// overflow: when converting to unsigned the range is larger by one
denorm_shamt = SHAMT_BITS'(0); // prevent shifting
of_before_round = 1'b1;
end else if ($signed(input_exp[i]) < $signed(-1)) begin
// underflow
denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky
end else begin
// By default right shift mantissa to be an integer
denorm_shamt = (MAX_INT_WIDTH-1) - input_exp[i];
end
end
`IGNORE_WARNINGS_END
end
assign preshift_mant_s0[i] = preshift_mant;
assign denorm_shamt_s0[i] = denorm_shamt;
assign final_exp_s0[i] = final_exp;
assign of_before_round_s0[i] = of_before_round;
end
// Pipeline stage1
wire valid_in_s1;
@ -176,121 +227,68 @@ module VX_fp_cvt #(
wire [2:0] rnd_mode_s1;
fp_type_t [LANES-1:0] in_a_type_s1;
wire [LANES-1:0] mant_is_zero_s1;
wire [LANES-1:0] input_sign_s1;
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp_s1;
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp_s1;
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant_s1;
wire [LANES-1:0] input_sign_s1;
wire [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant_s1;
wire [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt_s1;
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s1;
wire [LANES-1:0] of_before_round_s1;
VX_pipe_register #(
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + 2*INT_EXP_WIDTH)),
.DATAW (1 + TAGW + 1 + 1 + `FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + SHAMT_BITS + INT_EXP_WIDTH + 1)),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (~stall),
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, mant_is_zero_s0, input_sign_s0, input_mant, input_exp, destination_exp}),
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, input_mant_s1, input_exp_s1, destination_exp_s1})
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, mant_is_zero_s0, input_sign_s0, preshift_mant_s0, denorm_shamt_s0, final_exp_s0, of_before_round_s0}),
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, preshift_mant_s1, denorm_shamt_s1, final_exp_s1, of_before_round_s1})
);
// Casting
reg [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
reg [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
wire [LANES-1:0][2*INT_MAN_WIDTH:0] destination_mant; // mantissa from shifter, with rnd bit
wire [LANES-1:0][MAN_BITS-1:0] final_mant; // mantissa after adjustments
wire [LANES-1:0][MAX_INT_WIDTH-1:0] final_int; // integer shifted in position
reg [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
wire [LANES-1:0][1:0] fp_round_sticky_bits, int_round_sticky_bits, round_sticky_bits;
reg [LANES-1:0] of_before_round;
// Perform adjustments to mantissa and exponent
wire [LANES-1:0] rounded_sign;
wire [LANES-1:0][31:0] rounded_abs; // absolute value of result after rounding
wire [LANES-1:0][1:0] fp_round_sticky_bits, int_round_sticky_bits;
// Rouding and classification
for (genvar i = 0; i < LANES; ++i) begin
always @(*) begin
`IGNORE_WARNINGS_BEGIN
// Default assignment
final_exp[i] = $unsigned(destination_exp_s1[i]); // take exponent as is, only look at lower bits
preshift_mant[i] = 65'b0; // initialize mantissa container with zeroes
denorm_shamt[i] = 0; // right of mantissa
of_before_round[i] = 1'b0;
// Place mantissa to the left of the shifter
preshift_mant[i] = {input_mant_s1[i], 33'b0};
// Handle INT casts
if (is_itof_s1) begin
// Overflow or infinities (for proper rounding)
if ($signed(destination_exp_s1[i]) >= $signed(2**EXP_BITS-1)) begin
final_exp[i] = (2**EXP_BITS-2); // largest normal value
preshift_mant[i] = ~0; // largest normal value and RS bits set
of_before_round[i] = 1'b1;
// Denormalize underflowing values
end else if (($signed(destination_exp_s1[i]) < $signed(1))
&& ($signed(destination_exp_s1[i]) >= -$signed(MAN_BITS))) begin
final_exp[i] = 0; // denormal result
denorm_shamt[i] = $unsigned(denorm_shamt[i] + 1 - destination_exp_s1[i]); // adjust right shifting
// Limit the shift to retain sticky bits
end else if ($signed(destination_exp_s1[i]) < -$signed(MAN_BITS)) begin
final_exp[i] = 0; // denormal result
denorm_shamt[i] = $unsigned(denorm_shamt[i] + (2 + MAN_BITS)); // to sticky
end
end else begin
// By default right shift mantissa to be an integer
denorm_shamt[i] = (MAX_INT_WIDTH-1) - input_exp_s1[i];
// overflow: when converting to unsigned the range is larger by one
if ($signed(input_exp_s1[i]) >= $signed(MAX_INT_WIDTH -1 + unsigned_s1)) begin
denorm_shamt[i] = SHAMT_BITS'(0); // prevent shifting
of_before_round[i] = 1'b1;
// underflow
end else if ($signed(input_exp_s1[i]) < $signed(-1)) begin
denorm_shamt[i] = MAX_INT_WIDTH + 1; // all bits go to the sticky
end
end
`IGNORE_WARNINGS_END
end
wire [2*INT_MAN_WIDTH:0] destination_mant;
wire [MAN_BITS-1:0] final_mant; // mantissa after adjustments
wire [MAX_INT_WIDTH-1:0] final_int; // integer shifted in position
wire [1:0] round_sticky_bits;
wire [31:0] fmt_pre_round_abs;
wire [31:0] pre_round_abs;
// Mantissa adjustment shift
assign destination_mant[i] = preshift_mant[i] >> denorm_shamt[i];
assign destination_mant = preshift_mant_s1[i] >> denorm_shamt_s1[i];
// Extract final mantissa and round bit, discard the normal bit (for FP)
assign {final_mant[i], fp_round_sticky_bits[i][1]} = destination_mant[i][2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1];
assign {final_int[i], int_round_sticky_bits[i][1]} = destination_mant[i][2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (MAX_INT_WIDTH+1) + 1];
assign {final_mant, fp_round_sticky_bits[i][1]} = destination_mant[2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1];
assign {final_int, int_round_sticky_bits[i][1]} = destination_mant[2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (MAX_INT_WIDTH+1) + 1];
// Collapse sticky bits
assign fp_round_sticky_bits[i][0] = (| destination_mant[i][NUM_FP_STICKY-1:0]);
assign int_round_sticky_bits[i][0] = (| destination_mant[i][NUM_INT_STICKY-1:0]);
assign fp_round_sticky_bits[i][0] = (| destination_mant[NUM_FP_STICKY-1:0]);
assign int_round_sticky_bits[i][0] = (| destination_mant[NUM_INT_STICKY-1:0]);
// select RS bits for destination operation
assign round_sticky_bits[i] = is_itof_s1 ? fp_round_sticky_bits[i] : int_round_sticky_bits[i];
end
assign round_sticky_bits = is_itof_s1 ? fp_round_sticky_bits[i] : int_round_sticky_bits[i];
// Rouding and classification
wire [LANES-1:0] rounded_sign;
wire [LANES-1:0][31:0] rounded_abs; // absolute value of result after rounding
for (genvar i = 0; i < LANES; ++i) begin
// Pack exponent and mantissa into proper rounding form
wire [31:0] fmt_pre_round_abs = {1'b0, final_exp[i][EXP_BITS-1:0], final_mant[i][MAN_BITS-1:0]};
// Sign-extend integer result
wire [31:0] ifmt_pre_round_abs = final_int[i];
assign fmt_pre_round_abs = {1'b0, final_exp_s1[i][EXP_BITS-1:0], final_mant[MAN_BITS-1:0]};
// Select output with destination format and operation
wire [31:0] pre_round_abs = is_itof_s1 ? fmt_pre_round_abs : ifmt_pre_round_abs;
assign pre_round_abs = is_itof_s1 ? fmt_pre_round_abs : final_int;
// Perform the rounding
VX_fp_rounding #(
.DAT_WIDTH (32)
) fp_rounding (
.abs_value_i (pre_round_abs),
.sign_i (input_sign_s1[i]),
.round_sticky_bits_i (round_sticky_bits[i]),
.rnd_mode_i (rnd_mode_s1),
.effective_subtraction_i (1'b0),
.abs_rounded_o (rounded_abs[i]),
.sign_o (rounded_sign[i]),
.abs_value_i (pre_round_abs),
.sign_i (input_sign_s1[i]),
.round_sticky_bits_i(round_sticky_bits),
.rnd_mode_i (rnd_mode_s1),
.effective_subtraction_i(1'b0),
.abs_rounded_o (rounded_abs[i]),
.sign_o (rounded_sign[i]),
`UNUSED_PIN (exact_zero_o)
);
end
@ -306,23 +304,22 @@ module VX_fp_cvt #(
wire [LANES-1:0] input_sign_s2;
wire [LANES-1:0] rounded_sign_s2;
wire [LANES-1:0][31:0] rounded_abs_s2;
wire [LANES-1:0] of_before_round_s2;
VX_pipe_register #(
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1)),
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1 + 1)),
.RESETW (1)
) pipe_reg2 (
.clk (clk),
.reset (reset),
.enable (~stall),
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, rounded_abs, rounded_sign}),
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2})
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, rounded_abs, rounded_sign, of_before_round_s1}),
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2, of_before_round_s2})
);
wire [LANES-1:0] of_after_round;
wire [LANES-1:0] uf_after_round;
wire [LANES-1:0][31:0] fmt_result;
wire [LANES-1:0][31:0] rounded_int_res; // after possible inversion
wire [LANES-1:0] rounded_int_res_zero; // after rounding
@ -335,7 +332,7 @@ module VX_fp_cvt #(
assign of_after_round[i] = (rounded_abs_s2[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == ~0); // inf exp.
// Negative integer result needs to be brought into two's complement
assign rounded_int_res[i] = rounded_sign_s2[i] ? $unsigned(-rounded_abs_s2[i]) : rounded_abs_s2[i];
assign rounded_int_res[i] = rounded_sign_s2[i] ? (-rounded_abs_s2[i]) : rounded_abs_s2[i];
assign rounded_int_res_zero[i] = (rounded_int_res[i] == 0);
end
@ -373,7 +370,7 @@ module VX_fp_cvt #(
int_special_result[i][30:0] = 0; // alone yields 2**(31)-1
int_special_result[i][31] = ~unsigned_s2; // for unsigned casts yields 2**31
end else begin
int_special_result[i][30:0] = 2**(31) -1; // alone yields 2**(31)-1
int_special_result[i][30:0] = 2**(31) - 1; // alone yields 2**(31)-1
int_special_result[i][31] = unsigned_s2; // for unsigned casts yields 2**31
end
end
@ -381,7 +378,7 @@ module VX_fp_cvt #(
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
assign int_result_is_special[i] = in_a_type_s2[i].is_nan
| in_a_type_s2[i].is_inf
| of_before_round[i]
| of_before_round_s2[i]
| (input_sign_s2[i] & unsigned_s2 & ~rounded_int_res_zero[i]);
// All integer special cases are invalid
@ -399,11 +396,11 @@ module VX_fp_cvt #(
wire [31:0] fp_result, int_result;
wire inexact = is_itof_s2 ? (| fp_round_sticky_bits[i]) // overflow is invalid in i2f;
: (| fp_round_sticky_bits[i]) | (~in_a_type_s2[i].is_inf & (of_before_round[i] | of_after_round[i]));
: (| fp_round_sticky_bits[i]) | (~in_a_type_s2[i].is_inf & (of_before_round_s2[i] | of_after_round[i]));
assign fp_regular_status.NV = is_itof_s2 & (of_before_round[i] | of_after_round[i]); // overflow is invalid for I2F casts
assign fp_regular_status.NV = is_itof_s2 & (of_before_round_s2[i] | of_after_round[i]); // overflow is invalid for I2F casts
assign fp_regular_status.DZ = 1'b0; // no divisions
assign fp_regular_status.OF = ~is_itof_s2 & (~in_a_type_s2[i].is_inf & (of_before_round[i] | of_after_round[i])); // inf casts no OF
assign fp_regular_status.OF = ~is_itof_s2 & (~in_a_type_s2[i].is_inf & (of_before_round_s2[i] | of_after_round[i])); // inf casts no OF
assign fp_regular_status.UF = uf_after_round[i] & inexact;
assign fp_regular_status.NX = inexact;

View file

@ -1,5 +1,9 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_div #(
parameter TAGW = 1,
parameter LANES = 1

View file

@ -1,5 +1,9 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_fma #(
parameter TAGW = 1,
parameter LANES = 1

View file

@ -1,5 +1,9 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_sqrt #(
parameter TAGW = 1,
parameter LANES = 1
@ -44,7 +48,7 @@ module VX_fp_sqrt #(
fflags_t f;
always @(*) begin
dpi_fsqrt (dataa[i], frm, r, f);
dpi_fsqrt (dataa[i], frm, r, f);
end
`UNUSED_VAR (f)

View file

@ -10,7 +10,7 @@ module VX_fp_type (
);
wire is_normal = (exp_i != 8'd0) && (exp_i != 8'hff);
wire is_zero = (exp_i == 8'd0) && (man_i == 23'd0);
wire is_subnormal = (exp_i == 8'd0) && !is_zero;
wire is_subnormal = (exp_i == 8'd0) && (man_i != 23'd0);
wire is_inf = (exp_i == 8'hff) && (man_i == 23'd0);
wire is_nan = (exp_i == 8'hff) && (man_i != 23'd0);
wire is_signaling = is_nan && (man_i[22] == 1'b0);

View file

@ -330,9 +330,9 @@ module VX_fpu_dpi #(
dpi_feq (dataa[i], datab[i], result_feq[i], fflags_feq[i]);
dpi_fmin (dataa[i], datab[i], result_fmin[i], fflags_fmin[i]);
dpi_fmax (dataa[i], datab[i], result_fmax[i], fflags_fmax[i]);
dpi_fsgnj (dataa[i], result_fsgnj[i]);
dpi_fsgnjn (dataa[i], result_fsgnjn[i]);
dpi_fsgnjx (dataa[i], result_fsgnjx[i]);
dpi_fsgnj (dataa[i], datab[i], result_fsgnj[i]);
dpi_fsgnjn (dataa[i], datab[i], result_fsgnjn[i]);
dpi_fsgnjx (dataa[i], datab[i], result_fsgnjx[i]);
result_fmv[i] = dataa[i];
end
end

View file

@ -18,11 +18,12 @@ module VX_index_buffer #(
input wire [ADDRW-1:0] release_addr,
input wire release_slot,
output wire full
output wire empty,
output wire full
);
reg [SIZE-1:0] free_slots, free_slots_n;
reg [ADDRW-1:0] write_addr_r;
reg full_r;
reg empty_r, full_r;
wire free_valid;
wire [ADDRW-1:0] free_index;
@ -51,6 +52,7 @@ module VX_index_buffer #(
if (reset) begin
write_addr_r <= ADDRW'(1'b0);
free_slots <= {SIZE{1'b1}};
empty_r <= 1'b1;
full_r <= 1'b0;
end else begin
if (release_slot) begin
@ -60,6 +62,7 @@ module VX_index_buffer #(
write_addr_r <= free_index;
end
free_slots <= free_slots_n;
empty_r <= (& free_slots_n);
full_r <= ~free_valid;
end
end
@ -81,6 +84,7 @@ module VX_index_buffer #(
);
assign write_addr = write_addr_r;
assign empty = empty_r;
assign full = full_r;
endmodule

View file

@ -67,8 +67,7 @@ module VX_skid_buffer #(
end else begin
if (ready_out) begin
use_buffer <= 0;
end
if (push && !pop) begin
end else if (push && valid_out_r) begin
assert(!use_buffer);
use_buffer <= 1;
end
@ -81,9 +80,11 @@ module VX_skid_buffer #(
always @(posedge clk) begin
if (push) begin
buffer <= data_in;
end
if (pop) begin
data_out_r <= use_buffer ? buffer : data_in;
end
if (pop && !use_buffer) begin
data_out_r <= data_in;
end else if (pop) begin
data_out_r <= buffer;
end
end

View file

@ -138,5 +138,4 @@ clean-fpga-32c:
clean-fpga-64c:
rm -rf $(FPGA_BUILD_DIR)_64c sources.txt
clean: clean-ase-1c clean-ase-2c clean-ase-4c clean-fpga-1c clean-fpga-2c clean-fpga-4c clean-fpga-8c clean-fpga-16c clean-fpga-32c clean-fpga-64c
rm sources.txt
clean: clean-ase-1c clean-ase-2c clean-ase-4c clean-fpga-1c clean-fpga-2c clean-fpga-4c clean-fpga-8c clean-fpga-16c clean-fpga-32c clean-fpga-64c

View file

@ -6,7 +6,7 @@
+define+QUARTUS
#+define+PERF_ENABLE
vortex_afu.json
vortex_afu16.json
QI:vortex_afu.qsf
C:sources.txt

View file

@ -2,6 +2,8 @@
+define+NUM_CLUSTERS=4
#+define+L3_ENABLE=1
+define+GLOBAL_BLOCK_SIZE=16
+define+SYNTHESIS
+define+QUARTUS
#+define+PERF_ENABLE

View file

@ -2,6 +2,8 @@
+define+NUM_CLUSTERS=8
#+define+L3_ENABLE=1
+define+GLOBAL_BLOCK_SIZE=16
+define+SYNTHESIS
+define+QUARTUS
#+define+PERF_ENABLE

View file

@ -6,7 +6,7 @@
+define+QUARTUS
#+define+PERF_ENABLE
vortex_afu.json
vortex_afu8.json
QI:vortex_afu.qsf
C:sources.txt

View file

@ -0,0 +1,56 @@
{
"version": 1,
"afu-image": {
"power": 0,
"clock-frequency-high": "auto-200",
"clock-frequency-low": "auto-200",
"cmd-mem-read": 1,
"cmd-mem-write": 2,
"cmd-run": 3,
"cmd-csr-read": 4,
"cmd-csr-write": 5,
"mmio-cmd-type": 10,
"mmio-io-addr": 12,
"mmio-mem-addr": 14,
"mmio-data-size": 16,
"mmio-status": 18,
"mmio-scope-read": 20,
"mmio-scope-write": 22,
"mmio-csr-core": 24,
"mmio-csr-addr": 26,
"mmio-csr-data": 28,
"mmio-csr-read": 30,
"afu-top-interface":
{
"class": "ccip_std_afu_avalon_mm",
"module-ports" :
[
{
"class": "cci-p",
"params":
{
"clock": "uClk_usr"
}
},
{
"class": "local-memory",
"params":
{
"clock": "uClk_usr"
}
}
]
},
"accelerator-clusters":
[
{
"name": "vortex_afu",
"total-contexts": 1,
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
}
]
}
}

View file

@ -0,0 +1,57 @@
{
"version": 1,
"afu-image": {
"power": 0,
"clock-frequency-high": "auto-210",
"clock-frequency-low": "auto-210",
"cmd-mem-read": 1,
"cmd-mem-write": 2,
"cmd-run": 3,
"cmd-csr-read": 4,
"cmd-csr-write": 5,
"mmio-cmd-type": 10,
"mmio-io-addr": 12,
"mmio-mem-addr": 14,
"mmio-data-size": 16,
"mmio-status": 18,
"mmio-scope-read": 20,
"mmio-scope-write": 22,
"mmio-csr-core": 24,
"mmio-csr-addr": 26,
"mmio-csr-data": 28,
"mmio-csr-read": 30,
"afu-top-interface":
{
"class": "ccip_std_afu_avalon_mm",
"module-ports" :
[
{
"class": "cci-p",
"params":
{
"clock": "uClk_usr"
}
},
{
"class": "local-memory",
"params":
{
"clock": "uClk_usr"
}
}
]
},
"accelerator-clusters":
[
{
"name": "vortex_afu",
"total-contexts": 1,
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
}
]
}
}

37
hw/syn/quartus/Makefile Normal file
View file

@ -0,0 +1,37 @@
.PHONY: unittest pipeline cache core vortex top1 top2 top4 top8 top16 top32 top64
unittest:
$(MAKE) -C unittest clean && $(MAKE) -C unittest > unittest/build.log 2>&1 &
pipeline:
$(MAKE) -C pipeline clean && $(MAKE) -C pipeline > pipeline/build.log 2>&1 &
cache:
$(MAKE) -C cache clean && $(MAKE) -C cache > cache/build.log 2>&1 &
core:
$(MAKE) -C core clean && $(MAKE) -C core > core/build.log 2>&1 &
vortex:
$(MAKE) -C vortex clean && $(MAKE) -C vortex > vortex/build.log 2>&1 &
top1:
$(MAKE) -C top1 clean && $(MAKE) -C top1 > top1/build.log 2>&1 &
top2:
$(MAKE) -C top2 clean && $(MAKE) -C top2 > top2/build.log 2>&1 &
top4:
$(MAKE) -C top4 clean && $(MAKE) -C top4 > top4/build.log 2>&1 &
top8:
$(MAKE) -C top8 clean && $(MAKE) -C top8 > top8/build.log 2>&1 &
top16:
$(MAKE) -C top16 clean && $(MAKE) -C top16 > top16/build.log 2>&1 &
top32:
$(MAKE) -C top32 clean && $(MAKE) -C top32 > top32/build.log 2>&1 &
top64:
$(MAKE) -C top64 clean && $(MAKE) -C top64 > top64/build.log 2>&1 &

View file

@ -41,10 +41,6 @@ set_global_assignment -name VERILOG_MACRO NDEBUG
set_global_assignment -name MESSAGE_DISABLE 16818
set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
#set_global_assignment -name ALLOW_ANY_RAM_SIZE_FOR_RECOGNITION ON
#set_global_assignment -name USE_HIGH_SPEED_ADDER ON
#set_global_assignment -name MUX_RESTRUCTURE ON
#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
#set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS

View file

@ -1,13 +1,20 @@
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10
PROJECT = Vortex
TOP_LEVEL_ENTITY = Vortex
SRC_FILE = Vortex.v
FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera/arria10;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
SRC_FILE = Vortex.sv
# Part, Family
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
RTL_DIR=../../../rtl
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(FPU_INCLUDE)
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on