mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
Merge branch 'master' of https://github.com/vortexgpgpu/vortex-dev
This commit is contained in:
commit
bb576a8f40
39 changed files with 667 additions and 362 deletions
|
@ -15,6 +15,9 @@ set -e
|
|||
CONFIGS=-DEXT_M_DISABLE make -C hw/simulate
|
||||
CONFIGS=-DEXT_F_DISABLE make -C hw/simulate
|
||||
|
||||
# disable shared memory
|
||||
CONFIGS=-DSM_ENABLE=0 make -C hw/simulate
|
||||
|
||||
# Blackbox tests
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"
|
||||
|
|
|
@ -5,16 +5,19 @@ Description: Makes the build in the opae directory with the specified core
|
|||
exists, a make clean command is ran before the build. Script waits
|
||||
until the inteldev script or quartus program is finished running.
|
||||
|
||||
Usage: ./build.sh -c [1|2|4|8|16] [-p [y|n]]
|
||||
Usage: ./build.sh -c [1|2|4|8|16] [-p perf] [-w wait]
|
||||
|
||||
Options:
|
||||
-c
|
||||
Core count (1, 2, 4, 8, or 16).
|
||||
|
||||
-p
|
||||
Performance profiling enable (y or n). Changes the source file in the
|
||||
Performance profiling enable. Changes the source file in the
|
||||
opae directory to include/exclude "+define+PERF_ENABLE".
|
||||
|
||||
-w
|
||||
Wait for the build to complete
|
||||
|
||||
_______________________________________________________________________________
|
||||
|
||||
|
||||
|
|
|
@ -1,10 +1,23 @@
|
|||
#!/bin/bash
|
||||
|
||||
while getopts c:p: flag
|
||||
BUILD_DIR=../../hw/syn/opae
|
||||
|
||||
perf=0
|
||||
wait=0
|
||||
|
||||
while getopts c:pwh flag
|
||||
do
|
||||
case "${flag}" in
|
||||
c) cores=${OPTARG};; #1, 2, 4, 8, 16
|
||||
p) perf=${OPTARG};; #perf counters enable (y/n)
|
||||
p) perf=1;; #perf counters enable
|
||||
w) wait=1;; # wait for build to complete
|
||||
h) echo "Usage: -c <cores> [-p perf] [-w wait] [-h help]"
|
||||
exit 0
|
||||
;;
|
||||
\?)
|
||||
echo "Invalid option: -$OPTARG" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
|
@ -13,25 +26,22 @@ if [[ ! "$cores" =~ ^(1|2|4|8|16)$ ]]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
cd ../../hw/syn/opae
|
||||
cd ${BUILD_DIR}
|
||||
|
||||
sources_file="./sources_${cores}c.txt"
|
||||
|
||||
if [ ${perf:0:1} = "n" ]; then
|
||||
if grep -v '^ *#' ${sources_file} | grep -Fxq '+define+SYNTHESIS'; then
|
||||
sed -i 's/+define+PERF_ENABLE/#+define+PERF_ENABLE/' ${sources_file}
|
||||
elif ! grep -Fxq '#+define+PERF_ENABLE' ${sources_file}; then
|
||||
sed -i '1s/^/#+define+PERF_ENABLE\n/' ${sources_file}
|
||||
fi
|
||||
elif [ ${perf:0:1} = "y" ]; then
|
||||
if [ ${perf} = 1 ]; then
|
||||
if grep -Fxq '#+define+PERF_ENABLE' ${sources_file}; then
|
||||
sed -i 's/+define+PERF_ENABLE/#+define+PERF_ENABLE/' ${sources_file}
|
||||
elif ! grep -Fxq '+define+PERF_ENABLE' ${sources_file}; then
|
||||
sed -i '1s/^/+define+PERF_ENABLE\n/' ${sources_file}
|
||||
fi
|
||||
else
|
||||
echo 'Invalid parameter for argument -p (y/n expected)'
|
||||
exit 1
|
||||
if grep -v '^ *#' ${sources_file} | grep -Fxq '+define+SYNTHESIS'; then
|
||||
sed -i 's/+define+PERF_ENABLE/#+define+PERF_ENABLE/' ${sources_file}
|
||||
elif ! grep -Fxq '#+define+PERF_ENABLE' ${sources_file}; then
|
||||
sed -i '1s/^/#+define+PERF_ENABLE\n/' ${sources_file}
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -d "./build_fpga_{$cores}c" ]; then
|
||||
|
@ -39,12 +49,12 @@ if [ -d "./build_fpga_{$cores}c" ]; then
|
|||
fi
|
||||
make "fpga-${cores}c"
|
||||
|
||||
sleep 30
|
||||
|
||||
pids=($(pgrep -f "${OPAE_PLATFORM_ROOT}|quartus"))
|
||||
for pid in ${pids[@]}; do
|
||||
while kill -0 ${pid} 2> /dev/null; do
|
||||
sleep 30
|
||||
if [ ${wait} = 1 ]; then
|
||||
sleep 30
|
||||
pids=($(pgrep -f "${OPAE_PLATFORM_ROOT}|quartus"))
|
||||
for pid in ${pids[@]}; do
|
||||
while kill -0 ${pid} 2> /dev/null; do
|
||||
sleep 30
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
fi
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
|
||||
for ((i=1; i <= 16; i=i*2)); do
|
||||
echo "Building ${i} core build..."
|
||||
./build.sh -c ${i} -p y
|
||||
./build.sh -c ${i} -p -w
|
||||
echo "Done ${i} core build."
|
||||
done
|
||||
|
|
|
@ -26,9 +26,9 @@ extern "C" {
|
|||
void dpi_utof(int a, int frm, int* result, int* fflags);
|
||||
|
||||
void dpi_fclss(int a, int* result);
|
||||
void dpi_fsgnj(int a, int* result);
|
||||
void dpi_fsgnjn(int a, int* result);
|
||||
void dpi_fsgnjx(int a, int* result);
|
||||
void dpi_fsgnj(int a, int b, int* result);
|
||||
void dpi_fsgnjn(int a, int b, int* result);
|
||||
void dpi_fsgnjx(int a, int b, int* result);
|
||||
|
||||
void dpi_flt(int a, int b, int* result, int* fflags);
|
||||
void dpi_fle(int a, int b, int* result, int* fflags);
|
||||
|
@ -244,21 +244,53 @@ void dpi_fmax(int a, int b, int* result, int* fflags) {
|
|||
}
|
||||
|
||||
void dpi_fclss(int a, int* result) {
|
||||
// TODO
|
||||
*result = 0;
|
||||
|
||||
int r = 0; // clear all bits
|
||||
|
||||
bool fsign = (a >> 31);
|
||||
uint32_t expo = (a >> 23) & 0xFF;
|
||||
uint32_t fraction = a & 0x7FFFFF;
|
||||
|
||||
if ((expo == 0) && (fraction == 0)) {
|
||||
r = fsign ? (1 << 3) : (1 << 4); // +/- 0
|
||||
} else if ((expo == 0) && (fraction != 0)) {
|
||||
r = fsign ? (1 << 2) : (1 << 5); // +/- subnormal
|
||||
} else if ((expo == 0xFF) && (fraction == 0)) {
|
||||
r = fsign ? (1<<0) : (1<<7); // +/- infinity
|
||||
} else if ((expo == 0xFF ) && (fraction != 0)) {
|
||||
if (!fsign && (fraction == 0x00400000)) {
|
||||
r = (1 << 9); // quiet NaN
|
||||
} else {
|
||||
r = (1 << 8); // signaling NaN
|
||||
}
|
||||
} else {
|
||||
r = fsign ? (1 << 1) : (1 << 6); // +/- normal
|
||||
}
|
||||
|
||||
*result = r;
|
||||
}
|
||||
|
||||
void dpi_fsgnj(int a, int* result) {
|
||||
// TODO
|
||||
*result = 0;
|
||||
void dpi_fsgnj(int a, int b, int* result) {
|
||||
|
||||
int sign = b & 0x80000000;
|
||||
int r = sign | (a & 0x7FFFFFFF);
|
||||
|
||||
*result = r;
|
||||
}
|
||||
|
||||
void dpi_fsgnjn(int a, int* result) {
|
||||
// TODO
|
||||
*result = 0;
|
||||
void dpi_fsgnjn(int a, int b, int* result) {
|
||||
|
||||
int sign = ~b & 0x80000000;
|
||||
int r = sign | (a & 0x7FFFFFFF);
|
||||
|
||||
*result = r;
|
||||
}
|
||||
|
||||
void dpi_fsgnjx(int a, int* result) {
|
||||
// TODO
|
||||
*result = 0;
|
||||
void dpi_fsgnjx(int a, int b, int* result) {
|
||||
|
||||
int sign1 = a & 0x80000000;
|
||||
int sign2 = b & 0x80000000;
|
||||
int r = (sign1 ^ sign2) | (a & 0x7FFFFFFF);
|
||||
|
||||
*result = r;
|
||||
}
|
|
@ -18,9 +18,9 @@ import "DPI-C" context function void dpi_itof(input int a, input bit[2:0] frm, o
|
|||
import "DPI-C" context function void dpi_utof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
|
||||
import "DPI-C" context function void dpi_fclss(input int a, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnj(input int a, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnjn(input int a, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnjx(input int a, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnj(input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnjn(input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnjx(input int a, input int b, output int result);
|
||||
|
||||
import "DPI-C" context function void dpi_flt(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fle(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
|
|
|
@ -120,7 +120,7 @@ module VX_cluster #(
|
|||
.DATA_WIDTH (32),
|
||||
.ADDR_WIDTH (12),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (`NUM_CORES >= 4)
|
||||
.BUFFERED_RSP (1)
|
||||
) csr_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -225,7 +225,7 @@ module VX_cluster #(
|
|||
.DATA_WIDTH (`L2DRAM_LINE_WIDTH),
|
||||
.TAG_IN_WIDTH (`XDRAM_TAG_WIDTH),
|
||||
.TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH),
|
||||
.BUFFERED_REQ (`NUM_CORES >= 4),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
) dram_arb (
|
||||
.clk (clk),
|
||||
|
|
|
@ -21,7 +21,7 @@ module VX_databus_arb (
|
|||
localparam SMEM_ASHIFT = `CLOG2(`SHARED_MEM_BASE_ADDR_ALIGN);
|
||||
localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE);
|
||||
localparam REQ_ADDRW = 32 - REQ_ASHIFT;
|
||||
localparam REQ_DATAW = REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||
localparam REQ_DATAW = 1 + REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||
|
||||
//
|
||||
|
@ -30,41 +30,42 @@ module VX_databus_arb (
|
|||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
|
||||
wire cache_req_ready_in;
|
||||
wire smem_req_ready_in;
|
||||
wire cache_req_valid_out, cache_req_ready_out;
|
||||
wire is_smem_addr_in, is_smem_addr_out;
|
||||
|
||||
// select shared memory bus
|
||||
wire is_smem_addr = core_req_if.valid[i] && `SM_ENABLE
|
||||
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] >= (32-SMEM_ASHIFT)'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> SMEM_ASHIFT))
|
||||
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT));
|
||||
assign is_smem_addr_in = core_req_if.valid[i] && `SM_ENABLE
|
||||
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] >= (32-SMEM_ASHIFT)'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> SMEM_ASHIFT))
|
||||
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT));
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (REQ_DATAW)
|
||||
) cache_out_buffer (
|
||||
) out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_req_if.valid[i] && !is_smem_addr),
|
||||
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
|
||||
.ready_in (cache_req_ready_in),
|
||||
.valid_out (cache_req_if.valid[i]),
|
||||
.data_out ({cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
|
||||
.ready_out (cache_req_if.ready[i])
|
||||
.valid_in (core_req_if.valid[i]),
|
||||
.data_in ({is_smem_addr_in, core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
|
||||
.ready_in (core_req_if.ready[i]),
|
||||
.valid_out (cache_req_valid_out),
|
||||
.data_out ({is_smem_addr_out, cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
|
||||
.ready_out (cache_req_ready_out)
|
||||
);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (REQ_DATAW)
|
||||
) smem_out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_req_if.valid[i] && is_smem_addr),
|
||||
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
|
||||
.ready_in (smem_req_ready_in),
|
||||
.valid_out (smem_req_if.valid[i]),
|
||||
.data_out ({smem_req_if.addr[i], smem_req_if.rw[i], smem_req_if.byteen[i], smem_req_if.data[i], smem_req_if.tag[i]}),
|
||||
.ready_out (smem_req_if.ready[i])
|
||||
);
|
||||
|
||||
assign core_req_if.ready[i] = is_smem_addr ? smem_req_ready_in : cache_req_ready_in;
|
||||
if (`SM_ENABLE ) begin
|
||||
assign cache_req_if.valid[i] = cache_req_valid_out && ~is_smem_addr_out;
|
||||
assign smem_req_if.valid[i] = cache_req_valid_out && is_smem_addr_out;
|
||||
assign cache_req_ready_out = is_smem_addr_out ? smem_req_if.ready[i] : cache_req_if.ready[i];
|
||||
|
||||
assign smem_req_if.addr[i] = cache_req_if.addr[i];
|
||||
assign smem_req_if.rw[i] = cache_req_if.rw[i];
|
||||
assign smem_req_if.byteen[i] = cache_req_if.byteen[i];
|
||||
assign smem_req_if.data[i] = cache_req_if.data[i];
|
||||
assign smem_req_if.tag[i] = cache_req_if.tag[i];
|
||||
end else begin
|
||||
`UNUSED_VAR (is_smem_addr_out)
|
||||
assign cache_req_if.valid[i] = cache_req_valid_out;
|
||||
assign cache_req_ready_out = cache_req_if.ready[i];
|
||||
end
|
||||
end
|
||||
|
||||
//
|
||||
|
|
|
@ -1,6 +1,12 @@
|
|||
`include "VX_define.vh"
|
||||
`include "VX_print_instr.vh"
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define USED_REGS(f,r) used_regs[{f,r}] = 1
|
||||
`else
|
||||
`define USED_REGS(f,r) used_regs[r] = 1
|
||||
`endif
|
||||
|
||||
module VX_decode #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
|
@ -22,10 +28,12 @@ module VX_decode #(
|
|||
reg [`EX_BITS-1:0] ex_type;
|
||||
reg [`OP_BITS-1:0] op_type;
|
||||
reg [`MOD_BITS-1:0] op_mod;
|
||||
reg [31:0] imm;
|
||||
reg use_rd, use_rs1, use_rs2, use_rs3, use_PC, use_imm;
|
||||
reg [4:0] rd_r, rs1_r, rs2_r, rs3_r;
|
||||
reg [31:0] imm;
|
||||
reg use_rd, use_PC, use_imm;
|
||||
reg rd_fp, rs1_fp, rs2_fp;
|
||||
reg is_join, is_wstall;
|
||||
reg [`NUM_REGS-1:0] used_regs;
|
||||
|
||||
wire [31:0] instr = ifetch_rsp_if.instr;
|
||||
wire [6:0] opcode = instr[6:0];
|
||||
|
@ -45,21 +53,23 @@ module VX_decode #(
|
|||
|
||||
always @(*) begin
|
||||
|
||||
ex_type = `EX_NOP;
|
||||
ex_type = 0;
|
||||
op_type = 'x;
|
||||
op_mod = 'x;
|
||||
imm = 'x;
|
||||
use_rd = 0;
|
||||
use_rs1 = 0;
|
||||
use_rs2 = 0;
|
||||
use_rs3 = 0;
|
||||
use_PC = 0;
|
||||
use_imm = 0;
|
||||
rd_fp = 0;
|
||||
rs1_fp = 0;
|
||||
rs2_fp = 0;
|
||||
is_join = 0;
|
||||
is_wstall = 0;
|
||||
is_wstall = 0;
|
||||
used_regs = 0;
|
||||
rd_r = rd;
|
||||
rs1_r = rs1;
|
||||
rs2_r = rs2;
|
||||
rs3_r = rs3;
|
||||
|
||||
case (opcode)
|
||||
`INST_I: begin
|
||||
|
@ -78,8 +88,9 @@ module VX_decode #(
|
|||
op_mod = 0;
|
||||
imm = {{20{alu_imm[11]}}, alu_imm};
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_imm = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
`USED_REGS (1'b0, rs1);
|
||||
end
|
||||
`INST_R: begin
|
||||
ex_type = `EX_ALU;
|
||||
|
@ -113,18 +124,21 @@ module VX_decode #(
|
|||
endcase
|
||||
op_mod = 0;
|
||||
end
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
use_rd = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
`USED_REGS (1'b0, rs1);
|
||||
`USED_REGS (1'b0, rs2);
|
||||
end
|
||||
`INST_LUI: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`ALU_LUI);
|
||||
op_mod = 0;
|
||||
op_mod = 0;
|
||||
rs1_r = 0;
|
||||
imm = {upper_imm, 12'(0)};
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_imm = 1;
|
||||
use_imm = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
`USED_REGS (1'b0, 5'b0);
|
||||
end
|
||||
`INST_AUIPC: begin
|
||||
ex_type = `EX_ALU;
|
||||
|
@ -134,6 +148,7 @@ module VX_decode #(
|
|||
use_rd = 1;
|
||||
use_PC = 1;
|
||||
use_imm = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
end
|
||||
`INST_JAL: begin
|
||||
ex_type = `EX_ALU;
|
||||
|
@ -144,6 +159,7 @@ module VX_decode #(
|
|||
use_PC = 1;
|
||||
use_imm = 1;
|
||||
is_wstall = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
end
|
||||
`INST_JALR: begin
|
||||
ex_type = `EX_ALU;
|
||||
|
@ -151,9 +167,10 @@ module VX_decode #(
|
|||
op_mod = 1;
|
||||
imm = {{20{jalr_imm[11]}}, jalr_imm};
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_imm = 1;
|
||||
is_wstall = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
`USED_REGS (1'b0, rs1);
|
||||
end
|
||||
`INST_B: begin
|
||||
ex_type = `EX_ALU;
|
||||
|
@ -168,11 +185,11 @@ module VX_decode #(
|
|||
endcase
|
||||
op_mod = 1;
|
||||
imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
use_PC = 1;
|
||||
use_imm = 1;
|
||||
is_wstall = 1;
|
||||
`USED_REGS (1'b0, rs1);
|
||||
`USED_REGS (1'b0, rs2);
|
||||
end
|
||||
`INST_SYS : begin
|
||||
if (func3 == 0) begin
|
||||
|
@ -190,6 +207,7 @@ module VX_decode #(
|
|||
use_rd = 1;
|
||||
use_PC = 1;
|
||||
use_imm = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
end else begin
|
||||
ex_type = `EX_CSR;
|
||||
case (func3[1:0])
|
||||
|
@ -201,8 +219,10 @@ module VX_decode #(
|
|||
endcase
|
||||
imm = 32'(u_12);
|
||||
use_rd = 1;
|
||||
use_rs1 = !func3[2];
|
||||
use_imm = func3[2];
|
||||
`USED_REGS (1'b0, rd);
|
||||
if (!func3[2])
|
||||
`USED_REGS (1'b0, rs1);
|
||||
end
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
@ -212,10 +232,11 @@ module VX_decode #(
|
|||
ex_type = `EX_LSU;
|
||||
op_type = `OP_BITS'({1'b0, func3});
|
||||
imm = {{20{u_12[11]}}, u_12};
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
`ifdef EXT_F_ENABLE
|
||||
rd_fp = (opcode == `INST_FL);
|
||||
use_rd = 1;
|
||||
`USED_REGS (1'b0, rs1);
|
||||
`USED_REGS ((opcode == `INST_FL), rd);
|
||||
`ifdef EXT_F_ENABLE
|
||||
rd_fp = (opcode == `INST_FL);
|
||||
`endif
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
@ -225,8 +246,8 @@ module VX_decode #(
|
|||
ex_type = `EX_LSU;
|
||||
op_type = `OP_BITS'({1'b1, func3});
|
||||
imm = {{20{func7[6]}}, func7, rd};
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
`USED_REGS (1'b0, rs1);
|
||||
`USED_REGS ((opcode == `INST_FS), rs2);
|
||||
`ifdef EXT_F_ENABLE
|
||||
rs2_fp = (opcode == `INST_FS);
|
||||
`endif
|
||||
|
@ -240,17 +261,18 @@ module VX_decode #(
|
|||
op_type = `OP_BITS'(opcode[3:0]);
|
||||
op_mod = func3;
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
use_rs3 = 1;
|
||||
rd_fp = 1;
|
||||
rs1_fp = 1;
|
||||
rs2_fp = 1;
|
||||
rs2_fp = 1;
|
||||
`USED_REGS (1'b1, rd);
|
||||
`USED_REGS (1'b1, rs1);
|
||||
`USED_REGS (1'b1, rs2);
|
||||
`USED_REGS (1'b1, rs3);
|
||||
end
|
||||
`INST_FCI: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_mod = func3;
|
||||
use_rd = 1;
|
||||
use_rd = 1;
|
||||
case (func7)
|
||||
7'h00, // FADD
|
||||
7'h04, // FSUB
|
||||
|
@ -258,55 +280,61 @@ module VX_decode #(
|
|||
7'h0C: // FDIV
|
||||
begin
|
||||
op_type = `OP_BITS'(func7[3:0]);
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
rd_fp = 1;
|
||||
rs1_fp = 1;
|
||||
rs2_fp = 1;
|
||||
`USED_REGS (1'b1, rd);
|
||||
`USED_REGS (1'b1, rs1);
|
||||
`USED_REGS (1'b1, rs2);
|
||||
end
|
||||
7'h2C: begin
|
||||
op_type = `OP_BITS'(`FPU_SQRT);
|
||||
use_rs1 = 1;
|
||||
rd_fp = 1;
|
||||
rs1_fp = 1;
|
||||
`USED_REGS (1'b1, rd);
|
||||
`USED_REGS (1'b1, rs1);
|
||||
end
|
||||
7'h50: begin
|
||||
op_type = `OP_BITS'(`FPU_CMP);
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
rs1_fp = 1;
|
||||
rs2_fp = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
`USED_REGS (1'b1, rs1);
|
||||
`USED_REGS (1'b1, rs2);
|
||||
end
|
||||
7'h60: begin
|
||||
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTWUS) : `OP_BITS'(`FPU_CVTWS);
|
||||
use_rs1 = 1;
|
||||
rs1_fp = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
`USED_REGS (1'b1, rs1);
|
||||
end
|
||||
7'h68: begin
|
||||
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTSWU) : `OP_BITS'(`FPU_CVTSW);
|
||||
use_rs1 = 1;
|
||||
rd_fp = 1;
|
||||
`USED_REGS (1'b1, rd);
|
||||
`USED_REGS (1'b0, rs1);
|
||||
end
|
||||
7'h10: begin
|
||||
// FSGNJ=0, FSGNJN=1, FSGNJX=2
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_mod = {1'b0, func3[1:0]};
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
rd_fp = 1;
|
||||
rs1_fp = 1;
|
||||
rs2_fp = 1;
|
||||
`USED_REGS (1'b1, rd);
|
||||
`USED_REGS (1'b1, rs1);
|
||||
`USED_REGS (1'b1, rs2);
|
||||
end
|
||||
7'h14: begin
|
||||
// FMIN=3, FMAX=4
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_mod = func3[0] ? 4 : 3;
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
rd_fp = 1;
|
||||
rs1_fp = 1;
|
||||
rs2_fp = 1;
|
||||
`USED_REGS (1'b1, rd);
|
||||
`USED_REGS (1'b1, rs1);
|
||||
`USED_REGS (1'b1, rs2);
|
||||
end
|
||||
7'h70: begin
|
||||
if (func3[0]) begin
|
||||
|
@ -316,15 +344,17 @@ module VX_decode #(
|
|||
// FMV.X.W=5
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_mod = 5;
|
||||
end
|
||||
use_rs1 = 1;
|
||||
rs1_fp = 1;
|
||||
end
|
||||
rs1_fp = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
`USED_REGS (1'b1, rs1);
|
||||
end
|
||||
7'h78: begin
|
||||
// FMV.W.X=6
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_mod = 6;
|
||||
rd_fp = 1;
|
||||
`USED_REGS (1'b1, rd);
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
|
@ -335,28 +365,28 @@ module VX_decode #(
|
|||
case (func3)
|
||||
3'h0: begin
|
||||
op_type = `OP_BITS'(`GPU_TMC);
|
||||
use_rs1 = 1;
|
||||
is_wstall = 1;
|
||||
`USED_REGS (1'b0, rs1);
|
||||
end
|
||||
3'h1: begin
|
||||
op_type = `OP_BITS'(`GPU_WSPAWN);
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
`USED_REGS (1'b0, rs1);
|
||||
`USED_REGS (1'b0, rs2);
|
||||
end
|
||||
3'h2: begin
|
||||
op_type = `OP_BITS'(`GPU_SPLIT);
|
||||
use_rs1 = 1;
|
||||
is_wstall = 1;
|
||||
`USED_REGS (1'b0, rs1);
|
||||
end
|
||||
3'h3: begin
|
||||
op_type = `OP_BITS'(`GPU_JOIN);
|
||||
is_join = 1;
|
||||
end
|
||||
3'h4: begin
|
||||
op_type = `OP_BITS'(`GPU_BAR);
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
op_type = `OP_BITS'(`GPU_BAR);
|
||||
is_wstall = 1;
|
||||
`USED_REGS (1'b0, rs1);
|
||||
`USED_REGS (1'b0, rs2);
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
|
@ -366,10 +396,7 @@ module VX_decode #(
|
|||
end
|
||||
|
||||
// disable write to integer register r0
|
||||
wire use_rd_qual = use_rd && (rd_fp || (rd != 0));
|
||||
|
||||
// EX_ALU needs rs1=0 for LUI operation
|
||||
wire [4:0] rs1_qual = (opcode == `INST_LUI) ? 5'h0 : rs1;
|
||||
wire wb = use_rd && (rd_fp || (rd_r != 0));
|
||||
|
||||
assign decode_if.valid = ifetch_rsp_if.valid;
|
||||
assign decode_if.wid = ifetch_rsp_if.wid;
|
||||
|
@ -378,31 +405,27 @@ module VX_decode #(
|
|||
assign decode_if.ex_type = ex_type;
|
||||
assign decode_if.op_type = op_type;
|
||||
assign decode_if.op_mod = op_mod;
|
||||
assign decode_if.wb = use_rd_qual;
|
||||
assign decode_if.wb = wb;
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign decode_if.rd = {rd_fp, rd};
|
||||
assign decode_if.rs1 = {rs1_fp, rs1_qual};
|
||||
assign decode_if.rs2 = {rs2_fp, rs2};
|
||||
assign decode_if.rs3 = {1'b1, rs3};
|
||||
`else
|
||||
`UNUSED_VAR (rd_fp)
|
||||
`UNUSED_VAR (rs1_fp)
|
||||
`UNUSED_VAR (rs2_fp)
|
||||
assign decode_if.rd = rd;
|
||||
assign decode_if.rs1 = rs1_qual;
|
||||
assign decode_if.rs2 = rs2;
|
||||
assign decode_if.rs3 = rs3;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign decode_if.rd = {rd_fp, rd_r};
|
||||
assign decode_if.rs1 = {rs1_fp, rs1_r};
|
||||
assign decode_if.rs2 = {rs2_fp, rs2_r};
|
||||
assign decode_if.rs3 = {1'b1, rs3_r};
|
||||
`else
|
||||
`UNUSED_VAR (rd_fp)
|
||||
`UNUSED_VAR (rs1_fp)
|
||||
`UNUSED_VAR (rs2_fp)
|
||||
assign decode_if.rd = rd_r;
|
||||
assign decode_if.rs1 = rs1_r;
|
||||
assign decode_if.rs2 = rs2_r;
|
||||
assign decode_if.rs3 = rs3_r;
|
||||
`endif
|
||||
|
||||
assign decode_if.imm = imm;
|
||||
assign decode_if.use_PC = use_PC;
|
||||
assign decode_if.use_imm = use_imm;
|
||||
|
||||
assign decode_if.used_regs = (`NUM_REGS'(use_rd) << decode_if.rd)
|
||||
| (`NUM_REGS'(use_rs1) << decode_if.rs1)
|
||||
| (`NUM_REGS'(use_rs2) << decode_if.rs2)
|
||||
| (`NUM_REGS'(use_rs3) << decode_if.rs3);
|
||||
assign decode_if.imm = imm;
|
||||
assign decode_if.use_PC = use_PC;
|
||||
assign decode_if.use_imm = use_imm;
|
||||
assign decode_if.used_regs = used_regs;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
|
|
@ -54,7 +54,8 @@ module VX_fpu_unit #(
|
|||
.write_data ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.rd, fpu_req_if.wb}),
|
||||
.read_data ({rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}),
|
||||
.release_slot (fpuq_pop),
|
||||
.full (fpuq_full)
|
||||
.full (fpuq_full),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
// can accept new request?
|
||||
|
|
|
@ -82,8 +82,7 @@ module VX_ibuffer #(
|
|||
|
||||
if (writing && is_slot0) begin
|
||||
q_data_out[i] <= q_data_in;
|
||||
end
|
||||
if (pop) begin
|
||||
end else if (pop) begin
|
||||
q_data_out[i] <= q_data_prev[i];
|
||||
end
|
||||
end
|
||||
|
|
|
@ -38,7 +38,8 @@ module VX_instr_demux (
|
|||
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32))
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.BUFFERED (1)
|
||||
) alu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -55,7 +56,8 @@ module VX_instr_demux (
|
|||
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32))
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
||||
.BUFFERED (1)
|
||||
) lsu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -72,7 +74,8 @@ module VX_instr_demux (
|
|||
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32)
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
|
||||
.BUFFERED (1)
|
||||
) csr_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -90,7 +93,8 @@ module VX_instr_demux (
|
|||
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32))
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||
.BUFFERED (1)
|
||||
) fpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -111,7 +115,8 @@ module VX_instr_demux (
|
|||
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32))
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
|
||||
.BUFFERED (1)
|
||||
) gpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -183,19 +183,44 @@ module VX_issue #(
|
|||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (alu_req_if.valid && alu_req_if.ready) begin
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data);
|
||||
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=",
|
||||
$time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd);
|
||||
`PRINT_ARRAY1D(alu_req_if.rs1_data, `NUM_THREADS);
|
||||
$write(", rs2_data=");
|
||||
`PRINT_ARRAY1D(alu_req_if.rs2_data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
|
||||
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, offset=%0h, addr=",
|
||||
$time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.offset);
|
||||
`PRINT_ARRAY1D(lsu_req_if.base_addr, `NUM_THREADS);
|
||||
$write(", data=");
|
||||
`PRINT_ARRAY1D(lsu_req_if.store_data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr, csr_req_if.rs1_data);
|
||||
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=",
|
||||
$time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr);
|
||||
`PRINT_ARRAY1D(csr_req_if.rs1_data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
||||
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=",
|
||||
$time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd);
|
||||
`PRINT_ARRAY1D(fpu_req_if.rs1_data, `NUM_THREADS);
|
||||
$write(", rs2_data=");
|
||||
`PRINT_ARRAY1D(fpu_req_if.rs2_data, `NUM_THREADS);
|
||||
$write(", rs3_data=");
|
||||
`PRINT_ARRAY1D(fpu_req_if.rs3_data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
if (gpu_req_if.valid && gpu_req_if.ready) begin
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
|
||||
$write("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=",
|
||||
$time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd);
|
||||
`PRINT_ARRAY1D(gpu_req_if.rs1_data, `NUM_THREADS);
|
||||
$write(", rs2_data=");
|
||||
`PRINT_ARRAY1D(gpu_req_if.rs2_data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -75,10 +75,11 @@ module VX_lsu_unit #(
|
|||
`UNUSED_VAR (rsp_type)
|
||||
|
||||
reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] rsp_rem_mask;
|
||||
reg [`NUM_THREADS-1:0] rsp_rem_mask_n;
|
||||
wire [`NUM_THREADS-1:0] rsp_rem_mask_n;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
|
||||
reg [`NUM_THREADS-1:0] req_sent_mask;
|
||||
wire req_sent_all;
|
||||
wire sent_all_ready;
|
||||
|
||||
wire [`DCORE_TAG_ID_BITS-1:0] mbuf_waddr, mbuf_raddr;
|
||||
wire mbuf_full;
|
||||
|
@ -88,18 +89,20 @@ module VX_lsu_unit #(
|
|||
assign req_offset[i] = req_addr[i][1:0];
|
||||
end
|
||||
|
||||
wire mbuf_push = (| (dcache_req_if.valid & dcache_req_if.ready))
|
||||
wire [`NUM_THREADS-1:0] dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready;
|
||||
|
||||
wire dcache_rsp_fire = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
|
||||
|
||||
wire mbuf_push = (| dcache_req_fire)
|
||||
&& (0 == req_sent_mask) // first submission only
|
||||
&& req_wb; // loads only
|
||||
|
||||
wire mbuf_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
|
||||
|
||||
wire mbuf_pop = mbuf_pop_part && (rsp_rem_mask_n == 0 || rsp_is_dup);
|
||||
wire mbuf_pop = dcache_rsp_fire && (0 == rsp_rem_mask_n);
|
||||
|
||||
assign mbuf_raddr = dcache_rsp_if.tag[`DCORE_TAG_ID_BITS-1:0];
|
||||
|
||||
VX_index_buffer #(
|
||||
.DATAW (`NW_BITS + 32 + `NR_BITS + 1 + `LSU_BITS + (`NUM_THREADS * 2) + 1),
|
||||
.DATAW (`NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `LSU_BITS + (`NUM_THREADS * 2) + 1),
|
||||
.SIZE (`LSUQ_SIZE)
|
||||
) req_metadata (
|
||||
.clk (clk),
|
||||
|
@ -107,26 +110,34 @@ module VX_lsu_unit #(
|
|||
.write_addr (mbuf_waddr),
|
||||
.acquire_slot (mbuf_push),
|
||||
.read_addr (mbuf_raddr),
|
||||
.write_data ({req_wid, req_pc, req_rd, req_wb, req_type, req_offset, req_is_dup}),
|
||||
.read_data ({rsp_wid, rsp_pc, rsp_rd, rsp_wb, rsp_type, rsp_offset, rsp_is_dup}),
|
||||
.write_data ({req_wid, req_pc, req_tmask, req_rd, req_wb, req_type, req_offset, req_is_dup}),
|
||||
.read_data ({rsp_wid, rsp_pc, rsp_tmask, rsp_rd, rsp_wb, rsp_type, rsp_offset, rsp_is_dup}),
|
||||
.release_addr (mbuf_raddr),
|
||||
.release_slot (mbuf_pop),
|
||||
.full (mbuf_full)
|
||||
.full (mbuf_full),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
assign req_sent_all = (&(dcache_req_if.ready | req_sent_mask | ~req_tmask))
|
||||
|| (req_is_dup && dcache_req_if.ready[0]);
|
||||
always @(posedge clk) begin
|
||||
if (mbuf_push) begin
|
||||
pending_tags[mbuf_waddr] <= req_tag;
|
||||
end
|
||||
end
|
||||
|
||||
assign sent_all_ready = &(dcache_req_if.ready | req_sent_mask);
|
||||
|
||||
wire [`NUM_THREADS-1:0] req_sent_dup = {{(`NUM_THREADS-1){dcache_req_fire[0] && req_is_dup}}, 1'b0};
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
req_sent_mask <= 0;
|
||||
end else begin
|
||||
if (req_sent_all)
|
||||
if (sent_all_ready)
|
||||
req_sent_mask <= 0;
|
||||
else
|
||||
req_sent_mask <= req_sent_mask | (dcache_req_if.valid & dcache_req_if.ready);
|
||||
req_sent_mask <= req_sent_mask | dcache_req_fire | req_sent_dup;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// need to hold the acquired tag index until the full request is submitted
|
||||
reg [`DCORE_TAG_ID_BITS-1:0] req_tag_hold;
|
||||
|
@ -136,20 +147,21 @@ module VX_lsu_unit #(
|
|||
req_tag_hold <= mbuf_waddr;
|
||||
end
|
||||
|
||||
wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
|
||||
|
||||
assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.valid;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (mbuf_push) begin
|
||||
rsp_rem_mask[mbuf_waddr] <= req_tmask;
|
||||
pending_tags[mbuf_waddr] <= req_tag;
|
||||
rsp_rem_mask[mbuf_waddr] <= req_tmask_dup;
|
||||
end
|
||||
if (mbuf_pop_part) begin
|
||||
if (dcache_rsp_fire) begin
|
||||
rsp_rem_mask[mbuf_raddr] <= rsp_rem_mask_n;
|
||||
end
|
||||
end
|
||||
|
||||
wire req_ready_dep = (req_wb && ~mbuf_full) || (~req_wb && st_commit_if.ready);
|
||||
|
||||
wire [`NUM_THREADS-1:0] dup_mask = {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
|
||||
wire req_ready_dep = (req_wb && ~mbuf_full)
|
||||
|| (~req_wb && st_commit_if.ready);
|
||||
|
||||
// DCache Request
|
||||
|
||||
|
@ -181,23 +193,23 @@ module VX_lsu_unit #(
|
|||
end
|
||||
end
|
||||
|
||||
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_ready_dep}} & req_tmask & dup_mask & ~req_sent_mask;
|
||||
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_ready_dep}} & req_tmask_dup & ~req_sent_mask;
|
||||
assign dcache_req_if.rw = {`NUM_THREADS{~req_wb}};
|
||||
assign dcache_req_if.addr = mem_req_addr;
|
||||
assign dcache_req_if.byteen = mem_req_byteen;
|
||||
assign dcache_req_if.data = mem_req_data;
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
assign dcache_req_if.tag = {`NUM_THREADS{{req_pc, req_wid, req_tag}}};
|
||||
assign dcache_req_if.tag = {`NUM_THREADS{req_pc, req_wid, req_tag}};
|
||||
`else
|
||||
assign dcache_req_if.tag = {`NUM_THREADS{req_tag}};
|
||||
`endif
|
||||
|
||||
assign ready_in = req_ready_dep && req_sent_all;
|
||||
assign ready_in = req_ready_dep && sent_all_ready;
|
||||
|
||||
// send store commit
|
||||
|
||||
wire is_store_rsp = req_valid && ~req_wb && req_sent_all;
|
||||
wire is_store_rsp = req_valid && ~req_wb && sent_all_ready;
|
||||
|
||||
assign st_commit_if.valid = is_store_rsp;
|
||||
assign st_commit_if.wid = req_wid;
|
||||
|
@ -211,7 +223,7 @@ module VX_lsu_unit #(
|
|||
// load response formatting
|
||||
|
||||
reg [`NUM_THREADS-1:0][31:0] rsp_data;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask_qual;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [31:0] src_data = (i == 0 || rsp_is_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i];
|
||||
|
@ -234,7 +246,7 @@ module VX_lsu_unit #(
|
|||
end
|
||||
end
|
||||
|
||||
assign rsp_tmask = rsp_is_dup ? rsp_rem_mask[mbuf_raddr] : dcache_rsp_if.valid;
|
||||
assign rsp_tmask_qual = rsp_is_dup ? rsp_tmask : dcache_rsp_if.valid;
|
||||
|
||||
// send load commit
|
||||
|
||||
|
@ -247,15 +259,15 @@ module VX_lsu_unit #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!load_rsp_stall),
|
||||
.data_in ({(| dcache_rsp_if.valid), rsp_wid, rsp_tmask, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
|
||||
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
|
||||
.data_in ({(| dcache_rsp_if.valid), rsp_wid, rsp_tmask_qual, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
|
||||
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
|
||||
);
|
||||
|
||||
// Can accept new cache response?
|
||||
assign dcache_rsp_if.ready = ~load_rsp_stall;
|
||||
|
||||
// scope registration
|
||||
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & dcache_req_if.ready);
|
||||
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_fire);
|
||||
`SCOPE_ASSIGN (dcache_req_wid, req_wid);
|
||||
`SCOPE_ASSIGN (dcache_req_pc, req_pc);
|
||||
`SCOPE_ASSIGN (dcache_req_addr, req_addr);
|
||||
|
@ -269,15 +281,15 @@ module VX_lsu_unit #(
|
|||
|
||||
`ifdef DBG_PRINT_CORE_DCACHE
|
||||
always @(posedge clk) begin
|
||||
if ((| (dcache_req_if.valid & dcache_req_if.ready))) begin
|
||||
if ((| dcache_req_fire)) begin
|
||||
if ((| dcache_req_if.rw))
|
||||
$display("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, data=%0h",
|
||||
$time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_addr, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data);
|
||||
$time, CORE_ID, req_wid, req_pc, dcache_req_fire, req_addr, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data);
|
||||
else
|
||||
$display("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, rd=%0d, is_dup=%b",
|
||||
$time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_addr, dcache_req_if.tag, dcache_req_if.byteen, req_rd, req_is_dup);
|
||||
$time, CORE_ID, req_wid, req_pc, dcache_req_fire, req_addr, dcache_req_if.tag, dcache_req_if.byteen, req_rd, req_is_dup);
|
||||
end
|
||||
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
|
||||
if (dcache_rsp_fire) begin
|
||||
$display("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h, is_dup=%b",
|
||||
$time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data, rsp_is_dup);
|
||||
end
|
||||
|
@ -291,4 +303,4 @@ module VX_lsu_unit #(
|
|||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
endmodule
|
|
@ -81,4 +81,25 @@
|
|||
|
||||
`define LTRIM(x,s) x[s-1:0]
|
||||
|
||||
`define PRINT_ARRAY1D(a, m) \
|
||||
$write("{"); \
|
||||
for (integer i = (m-1); i >= 0; --i) begin \
|
||||
if (i != (m-1)) $write(", "); \
|
||||
$write("0x%0h", a[i]); \
|
||||
end \
|
||||
$write("}"); \
|
||||
|
||||
`define PRINT_ARRAY2D(a, m, n) \
|
||||
$write("{"); \
|
||||
for (integer i = n-1; i >= 0; --i) begin \
|
||||
if (i != (n-1)) $write(", "); \
|
||||
$write("{"); \
|
||||
for (integer j = (m-1); j >= 0; --j) begin \
|
||||
if (j != (m-1)) $write(", "); \
|
||||
$write("0x%0h", a[i][j]); \
|
||||
end \
|
||||
$write("}"); \
|
||||
end \
|
||||
$write("}")
|
||||
|
||||
`endif
|
|
@ -121,7 +121,7 @@ module Vortex (
|
|||
.NUM_REQS (`NUM_CLUSTERS),
|
||||
.DATA_WIDTH (32),
|
||||
.ADDR_WIDTH (12),
|
||||
.BUFFERED_REQ (`NUM_CLUSTERS >= 4),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
) csr_arb (
|
||||
.clk (clk),
|
||||
|
@ -228,7 +228,7 @@ module Vortex (
|
|||
.TAG_IN_WIDTH (`L2DRAM_TAG_WIDTH),
|
||||
.TAG_OUT_WIDTH (`L3DRAM_TAG_WIDTH),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (`NUM_CLUSTERS >= 4)
|
||||
.BUFFERED_RSP (1)
|
||||
) dram_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -1,14 +1,13 @@
|
|||
`include "VX_define.vh"
|
||||
`ifndef NOPAE
|
||||
import local_mem_cfg_pkg::*;
|
||||
`include "afu_json_info.vh"
|
||||
`else
|
||||
`include "vortex_afu.vh"
|
||||
`endif
|
||||
/* verilator lint_off IMPORTSTAR */
|
||||
import ccip_if_pkg::*;
|
||||
import local_mem_cfg_pkg::*;
|
||||
/* verilator lint_on IMPORTSTAR */
|
||||
`endif
|
||||
|
||||
module vortex_afu #(
|
||||
parameter NUM_LOCAL_MEM_BANKS = 2
|
||||
|
|
3
hw/rtl/cache/VX_bank.v
vendored
3
hw/rtl/cache/VX_bank.v
vendored
|
@ -487,7 +487,8 @@ module VX_bank #(
|
|||
end
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS)
|
||||
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
|
||||
.BUFFERED (NUM_BANKS == 1)
|
||||
) core_rsp_req (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
3
hw/rtl/cache/VX_cache.v
vendored
3
hw/rtl/cache/VX_cache.v
vendored
|
@ -168,8 +168,7 @@ module VX_cache #(
|
|||
.NUM_BANKS (NUM_BANKS)
|
||||
) flush_ctrl (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.flush (flush),
|
||||
.reset (reset || flush),
|
||||
.addr_out (flush_addr),
|
||||
.valid_out (flush_enable)
|
||||
);
|
||||
|
|
6
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
6
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
|
@ -98,7 +98,8 @@ module VX_cache_core_rsp_merge #(
|
|||
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH))
|
||||
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)),
|
||||
.BUFFERED (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -146,7 +147,8 @@ module VX_cache_core_rsp_merge #(
|
|||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH)
|
||||
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH),
|
||||
.BUFFERED (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
5
hw/rtl/cache/VX_flush_ctrl.v
vendored
5
hw/rtl/cache/VX_flush_ctrl.v
vendored
|
@ -9,8 +9,7 @@ module VX_flush_ctrl #(
|
|||
parameter NUM_BANKS = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire flush,
|
||||
input wire reset,
|
||||
output wire [`LINE_SELECT_BITS-1:0] addr_out,
|
||||
output wire valid_out
|
||||
);
|
||||
|
@ -18,7 +17,7 @@ module VX_flush_ctrl #(
|
|||
reg [`LINE_SELECT_BITS-1:0] flush_ctr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset || flush) begin
|
||||
if (reset) begin
|
||||
flush_enable <= 1;
|
||||
flush_ctr <= 0;
|
||||
end else begin
|
||||
|
|
|
@ -3,10 +3,6 @@
|
|||
/// Modified port of cast module from fpnew Libray
|
||||
/// reference: https://github.com/pulp-platform/fpnew
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
|
||||
module VX_fp_cvt #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
|
@ -73,19 +69,19 @@ module VX_fp_cvt #(
|
|||
);
|
||||
end
|
||||
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit
|
||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent;
|
||||
wire [LANES-1:0] input_sign;
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent;
|
||||
wire [LANES-1:0] input_sign;
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
wire [INT_MAN_WIDTH-1:0] int_mantissa;
|
||||
wire [INT_MAN_WIDTH-1:0] fmt_mantissa;
|
||||
wire fmt_sign = dataa[i][31];
|
||||
wire int_sign = dataa[i][31] & is_signed;
|
||||
assign int_mantissa = int_sign ? $unsigned(-dataa[i]) : dataa[i];
|
||||
assign int_mantissa = int_sign ? (-dataa[i]) : dataa[i];
|
||||
assign fmt_mantissa = INT_MAN_WIDTH'({in_a_type[i].is_normal, dataa[i][MAN_BITS-1:0]});
|
||||
|
||||
assign fmt_exponent[i] = $signed({1'b0, dataa[i][MAN_BITS+EXP_BITS-1:MAN_BITS]});
|
||||
assign fmt_exponent[i] = {1'b0, dataa[i][MAN_BITS+EXP_BITS-1:MAN_BITS]};
|
||||
assign encoded_mant[i] = is_itof ? int_mantissa : fmt_mantissa;
|
||||
assign input_sign[i] = is_itof ? int_sign : fmt_sign;
|
||||
end
|
||||
|
@ -115,7 +111,7 @@ module VX_fp_cvt #(
|
|||
wire [2:0] rnd_mode_s0;
|
||||
fp_type_t [LANES-1:0] in_a_type_s0;
|
||||
wire [LANES-1:0] input_sign_s0;
|
||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0;
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant_s0;
|
||||
wire [LANES-1:0][LZC_RESULT_WIDTH-1:0] renorm_shamt_s0;
|
||||
wire [LANES-1:0] mant_is_zero_s0;
|
||||
|
@ -135,38 +131,93 @@ module VX_fp_cvt #(
|
|||
|
||||
// Normalization
|
||||
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa
|
||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent
|
||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
// Input mantissa needs to be normalized
|
||||
wire signed [INT_EXP_WIDTH-1:0] fp_input_exp;
|
||||
wire signed [INT_EXP_WIDTH-1:0] int_input_exp;
|
||||
wire [LZC_RESULT_WIDTH:0] renorm_shamt_sgn;
|
||||
|
||||
// signed form for calculations
|
||||
assign renorm_shamt_sgn = $signed({1'b0, renorm_shamt_s0[i]});
|
||||
wire [INT_EXP_WIDTH-1:0] fp_input_exp;
|
||||
wire [INT_EXP_WIDTH-1:0] int_input_exp;
|
||||
|
||||
// Realign input mantissa, append zeroes if destination is wider
|
||||
assign input_mant[i] = encoded_mant_s0[i] << renorm_shamt_s0[i];
|
||||
|
||||
// Unbias exponent and compensate for shift
|
||||
assign fp_input_exp = $signed(fmt_exponent_s0[i] +
|
||||
(($signed({1'b0, in_a_type_s0[i].is_subnormal}) +
|
||||
$signed(FMT_SHIFT_COMPENSATION - EXP_BIAS)) -
|
||||
renorm_shamt_sgn));
|
||||
assign fp_input_exp = fmt_exponent_s0[i] +
|
||||
{1'b0, in_a_type_s0[i].is_subnormal} +
|
||||
(FMT_SHIFT_COMPENSATION - EXP_BIAS) -
|
||||
{1'b0, renorm_shamt_s0[i]};
|
||||
|
||||
assign int_input_exp = $signed(INT_MAN_WIDTH - 1 - renorm_shamt_sgn);
|
||||
assign int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]};
|
||||
|
||||
assign input_exp[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
|
||||
assign input_exp[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
|
||||
|
||||
// Rebias the exponent
|
||||
assign destination_exp[i] = input_exp[i] + $signed(EXP_BIAS);
|
||||
assign destination_exp[i] = input_exp[i] + EXP_BIAS;
|
||||
`IGNORE_WARNINGS_END
|
||||
end
|
||||
|
||||
// Perform adjustments to mantissa and exponent
|
||||
|
||||
wire [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant_s0;
|
||||
wire [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt_s0;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s0;
|
||||
wire [LANES-1:0] of_before_round_s0;
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
reg [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
|
||||
reg [SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
|
||||
reg [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
|
||||
reg of_before_round;
|
||||
|
||||
always @(*) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
// Default assignment
|
||||
final_exp = destination_exp[i]; // take exponent as is, only look at lower bits
|
||||
preshift_mant = {input_mant[i], 33'b0}; // Place mantissa to the left of the shifter
|
||||
denorm_shamt = 0; // right of mantissa
|
||||
of_before_round = 1'b0;
|
||||
|
||||
// Handle INT casts
|
||||
if (is_itof_s0) begin
|
||||
if ($signed(destination_exp[i]) >= $signed(2**EXP_BITS-1)) begin
|
||||
// Overflow or infinities (for proper rounding)
|
||||
final_exp = (2**EXP_BITS-2); // largest normal value
|
||||
preshift_mant = ~0; // largest normal value and RS bits set
|
||||
of_before_round = 1'b1;
|
||||
end else if ($signed(destination_exp[i]) < $signed(-MAN_BITS)) begin
|
||||
// Limit the shift to retain sticky bits
|
||||
final_exp = 0; // denormal result
|
||||
denorm_shamt = denorm_shamt + (2 + MAN_BITS); // to sticky
|
||||
end else if ($signed(destination_exp[i]) < $signed(1)) begin
|
||||
// Denormalize underflowing values
|
||||
final_exp = 0; // denormal result
|
||||
denorm_shamt = denorm_shamt + 1 - destination_exp[i]; // adjust right shifting
|
||||
end
|
||||
end else begin
|
||||
if ($signed(input_exp[i]) >= $signed((MAX_INT_WIDTH-1) + unsigned_s0)) begin
|
||||
// overflow: when converting to unsigned the range is larger by one
|
||||
denorm_shamt = SHAMT_BITS'(0); // prevent shifting
|
||||
of_before_round = 1'b1;
|
||||
end else if ($signed(input_exp[i]) < $signed(-1)) begin
|
||||
// underflow
|
||||
denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky
|
||||
end else begin
|
||||
// By default right shift mantissa to be an integer
|
||||
denorm_shamt = (MAX_INT_WIDTH-1) - input_exp[i];
|
||||
end
|
||||
end
|
||||
`IGNORE_WARNINGS_END
|
||||
end
|
||||
|
||||
assign preshift_mant_s0[i] = preshift_mant;
|
||||
assign denorm_shamt_s0[i] = denorm_shamt;
|
||||
assign final_exp_s0[i] = final_exp;
|
||||
assign of_before_round_s0[i] = of_before_round;
|
||||
end
|
||||
|
||||
// Pipeline stage1
|
||||
|
||||
wire valid_in_s1;
|
||||
|
@ -176,121 +227,68 @@ module VX_fp_cvt #(
|
|||
wire [2:0] rnd_mode_s1;
|
||||
fp_type_t [LANES-1:0] in_a_type_s1;
|
||||
wire [LANES-1:0] mant_is_zero_s1;
|
||||
wire [LANES-1:0] input_sign_s1;
|
||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp_s1;
|
||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp_s1;
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant_s1;
|
||||
|
||||
wire [LANES-1:0] input_sign_s1;
|
||||
wire [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant_s1;
|
||||
wire [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt_s1;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s1;
|
||||
wire [LANES-1:0] of_before_round_s1;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + 2*INT_EXP_WIDTH)),
|
||||
.DATAW (1 + TAGW + 1 + 1 + `FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + SHAMT_BITS + INT_EXP_WIDTH + 1)),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, mant_is_zero_s0, input_sign_s0, input_mant, input_exp, destination_exp}),
|
||||
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, input_mant_s1, input_exp_s1, destination_exp_s1})
|
||||
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, mant_is_zero_s0, input_sign_s0, preshift_mant_s0, denorm_shamt_s0, final_exp_s0, of_before_round_s0}),
|
||||
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, preshift_mant_s1, denorm_shamt_s1, final_exp_s1, of_before_round_s1})
|
||||
);
|
||||
|
||||
// Casting
|
||||
reg [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
|
||||
|
||||
reg [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
|
||||
wire [LANES-1:0][2*INT_MAN_WIDTH:0] destination_mant; // mantissa from shifter, with rnd bit
|
||||
wire [LANES-1:0][MAN_BITS-1:0] final_mant; // mantissa after adjustments
|
||||
wire [LANES-1:0][MAX_INT_WIDTH-1:0] final_int; // integer shifted in position
|
||||
|
||||
reg [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
|
||||
|
||||
wire [LANES-1:0][1:0] fp_round_sticky_bits, int_round_sticky_bits, round_sticky_bits;
|
||||
reg [LANES-1:0] of_before_round;
|
||||
|
||||
// Perform adjustments to mantissa and exponent
|
||||
wire [LANES-1:0] rounded_sign;
|
||||
wire [LANES-1:0][31:0] rounded_abs; // absolute value of result after rounding
|
||||
wire [LANES-1:0][1:0] fp_round_sticky_bits, int_round_sticky_bits;
|
||||
|
||||
// Rouding and classification
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
always @(*) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
// Default assignment
|
||||
final_exp[i] = $unsigned(destination_exp_s1[i]); // take exponent as is, only look at lower bits
|
||||
preshift_mant[i] = 65'b0; // initialize mantissa container with zeroes
|
||||
denorm_shamt[i] = 0; // right of mantissa
|
||||
of_before_round[i] = 1'b0;
|
||||
|
||||
// Place mantissa to the left of the shifter
|
||||
preshift_mant[i] = {input_mant_s1[i], 33'b0};
|
||||
|
||||
// Handle INT casts
|
||||
if (is_itof_s1) begin
|
||||
// Overflow or infinities (for proper rounding)
|
||||
if ($signed(destination_exp_s1[i]) >= $signed(2**EXP_BITS-1)) begin
|
||||
final_exp[i] = (2**EXP_BITS-2); // largest normal value
|
||||
preshift_mant[i] = ~0; // largest normal value and RS bits set
|
||||
of_before_round[i] = 1'b1;
|
||||
// Denormalize underflowing values
|
||||
end else if (($signed(destination_exp_s1[i]) < $signed(1))
|
||||
&& ($signed(destination_exp_s1[i]) >= -$signed(MAN_BITS))) begin
|
||||
final_exp[i] = 0; // denormal result
|
||||
denorm_shamt[i] = $unsigned(denorm_shamt[i] + 1 - destination_exp_s1[i]); // adjust right shifting
|
||||
// Limit the shift to retain sticky bits
|
||||
end else if ($signed(destination_exp_s1[i]) < -$signed(MAN_BITS)) begin
|
||||
final_exp[i] = 0; // denormal result
|
||||
denorm_shamt[i] = $unsigned(denorm_shamt[i] + (2 + MAN_BITS)); // to sticky
|
||||
end
|
||||
end else begin
|
||||
// By default right shift mantissa to be an integer
|
||||
denorm_shamt[i] = (MAX_INT_WIDTH-1) - input_exp_s1[i];
|
||||
// overflow: when converting to unsigned the range is larger by one
|
||||
if ($signed(input_exp_s1[i]) >= $signed(MAX_INT_WIDTH -1 + unsigned_s1)) begin
|
||||
denorm_shamt[i] = SHAMT_BITS'(0); // prevent shifting
|
||||
of_before_round[i] = 1'b1;
|
||||
// underflow
|
||||
end else if ($signed(input_exp_s1[i]) < $signed(-1)) begin
|
||||
denorm_shamt[i] = MAX_INT_WIDTH + 1; // all bits go to the sticky
|
||||
end
|
||||
end
|
||||
`IGNORE_WARNINGS_END
|
||||
end
|
||||
wire [2*INT_MAN_WIDTH:0] destination_mant;
|
||||
wire [MAN_BITS-1:0] final_mant; // mantissa after adjustments
|
||||
wire [MAX_INT_WIDTH-1:0] final_int; // integer shifted in position
|
||||
wire [1:0] round_sticky_bits;
|
||||
wire [31:0] fmt_pre_round_abs;
|
||||
wire [31:0] pre_round_abs;
|
||||
|
||||
// Mantissa adjustment shift
|
||||
assign destination_mant[i] = preshift_mant[i] >> denorm_shamt[i];
|
||||
|
||||
assign destination_mant = preshift_mant_s1[i] >> denorm_shamt_s1[i];
|
||||
|
||||
// Extract final mantissa and round bit, discard the normal bit (for FP)
|
||||
assign {final_mant[i], fp_round_sticky_bits[i][1]} = destination_mant[i][2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1];
|
||||
assign {final_int[i], int_round_sticky_bits[i][1]} = destination_mant[i][2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (MAX_INT_WIDTH+1) + 1];
|
||||
assign {final_mant, fp_round_sticky_bits[i][1]} = destination_mant[2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1];
|
||||
assign {final_int, int_round_sticky_bits[i][1]} = destination_mant[2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (MAX_INT_WIDTH+1) + 1];
|
||||
|
||||
// Collapse sticky bits
|
||||
assign fp_round_sticky_bits[i][0] = (| destination_mant[i][NUM_FP_STICKY-1:0]);
|
||||
assign int_round_sticky_bits[i][0] = (| destination_mant[i][NUM_INT_STICKY-1:0]);
|
||||
assign fp_round_sticky_bits[i][0] = (| destination_mant[NUM_FP_STICKY-1:0]);
|
||||
assign int_round_sticky_bits[i][0] = (| destination_mant[NUM_INT_STICKY-1:0]);
|
||||
|
||||
// select RS bits for destination operation
|
||||
assign round_sticky_bits[i] = is_itof_s1 ? fp_round_sticky_bits[i] : int_round_sticky_bits[i];
|
||||
end
|
||||
assign round_sticky_bits = is_itof_s1 ? fp_round_sticky_bits[i] : int_round_sticky_bits[i];
|
||||
|
||||
// Rouding and classification
|
||||
|
||||
wire [LANES-1:0] rounded_sign;
|
||||
wire [LANES-1:0][31:0] rounded_abs; // absolute value of result after rounding
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
// Pack exponent and mantissa into proper rounding form
|
||||
wire [31:0] fmt_pre_round_abs = {1'b0, final_exp[i][EXP_BITS-1:0], final_mant[i][MAN_BITS-1:0]};
|
||||
|
||||
// Sign-extend integer result
|
||||
wire [31:0] ifmt_pre_round_abs = final_int[i];
|
||||
assign fmt_pre_round_abs = {1'b0, final_exp_s1[i][EXP_BITS-1:0], final_mant[MAN_BITS-1:0]};
|
||||
|
||||
// Select output with destination format and operation
|
||||
wire [31:0] pre_round_abs = is_itof_s1 ? fmt_pre_round_abs : ifmt_pre_round_abs;
|
||||
assign pre_round_abs = is_itof_s1 ? fmt_pre_round_abs : final_int;
|
||||
|
||||
// Perform the rounding
|
||||
VX_fp_rounding #(
|
||||
.DAT_WIDTH (32)
|
||||
) fp_rounding (
|
||||
.abs_value_i (pre_round_abs),
|
||||
.sign_i (input_sign_s1[i]),
|
||||
.round_sticky_bits_i (round_sticky_bits[i]),
|
||||
.rnd_mode_i (rnd_mode_s1),
|
||||
.effective_subtraction_i (1'b0),
|
||||
.abs_rounded_o (rounded_abs[i]),
|
||||
.sign_o (rounded_sign[i]),
|
||||
.abs_value_i (pre_round_abs),
|
||||
.sign_i (input_sign_s1[i]),
|
||||
.round_sticky_bits_i(round_sticky_bits),
|
||||
.rnd_mode_i (rnd_mode_s1),
|
||||
.effective_subtraction_i(1'b0),
|
||||
.abs_rounded_o (rounded_abs[i]),
|
||||
.sign_o (rounded_sign[i]),
|
||||
`UNUSED_PIN (exact_zero_o)
|
||||
);
|
||||
end
|
||||
|
@ -306,23 +304,22 @@ module VX_fp_cvt #(
|
|||
wire [LANES-1:0] input_sign_s2;
|
||||
wire [LANES-1:0] rounded_sign_s2;
|
||||
wire [LANES-1:0][31:0] rounded_abs_s2;
|
||||
wire [LANES-1:0] of_before_round_s2;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1)),
|
||||
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1 + 1)),
|
||||
.RESETW (1)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, rounded_abs, rounded_sign}),
|
||||
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2})
|
||||
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, rounded_abs, rounded_sign, of_before_round_s1}),
|
||||
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2, of_before_round_s2})
|
||||
);
|
||||
|
||||
wire [LANES-1:0] of_after_round;
|
||||
wire [LANES-1:0] uf_after_round;
|
||||
|
||||
wire [LANES-1:0][31:0] fmt_result;
|
||||
|
||||
wire [LANES-1:0][31:0] rounded_int_res; // after possible inversion
|
||||
wire [LANES-1:0] rounded_int_res_zero; // after rounding
|
||||
|
||||
|
@ -335,7 +332,7 @@ module VX_fp_cvt #(
|
|||
assign of_after_round[i] = (rounded_abs_s2[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == ~0); // inf exp.
|
||||
|
||||
// Negative integer result needs to be brought into two's complement
|
||||
assign rounded_int_res[i] = rounded_sign_s2[i] ? $unsigned(-rounded_abs_s2[i]) : rounded_abs_s2[i];
|
||||
assign rounded_int_res[i] = rounded_sign_s2[i] ? (-rounded_abs_s2[i]) : rounded_abs_s2[i];
|
||||
assign rounded_int_res_zero[i] = (rounded_int_res[i] == 0);
|
||||
end
|
||||
|
||||
|
@ -373,7 +370,7 @@ module VX_fp_cvt #(
|
|||
int_special_result[i][30:0] = 0; // alone yields 2**(31)-1
|
||||
int_special_result[i][31] = ~unsigned_s2; // for unsigned casts yields 2**31
|
||||
end else begin
|
||||
int_special_result[i][30:0] = 2**(31) -1; // alone yields 2**(31)-1
|
||||
int_special_result[i][30:0] = 2**(31) - 1; // alone yields 2**(31)-1
|
||||
int_special_result[i][31] = unsigned_s2; // for unsigned casts yields 2**31
|
||||
end
|
||||
end
|
||||
|
@ -381,7 +378,7 @@ module VX_fp_cvt #(
|
|||
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
|
||||
assign int_result_is_special[i] = in_a_type_s2[i].is_nan
|
||||
| in_a_type_s2[i].is_inf
|
||||
| of_before_round[i]
|
||||
| of_before_round_s2[i]
|
||||
| (input_sign_s2[i] & unsigned_s2 & ~rounded_int_res_zero[i]);
|
||||
|
||||
// All integer special cases are invalid
|
||||
|
@ -399,11 +396,11 @@ module VX_fp_cvt #(
|
|||
wire [31:0] fp_result, int_result;
|
||||
|
||||
wire inexact = is_itof_s2 ? (| fp_round_sticky_bits[i]) // overflow is invalid in i2f;
|
||||
: (| fp_round_sticky_bits[i]) | (~in_a_type_s2[i].is_inf & (of_before_round[i] | of_after_round[i]));
|
||||
: (| fp_round_sticky_bits[i]) | (~in_a_type_s2[i].is_inf & (of_before_round_s2[i] | of_after_round[i]));
|
||||
|
||||
assign fp_regular_status.NV = is_itof_s2 & (of_before_round[i] | of_after_round[i]); // overflow is invalid for I2F casts
|
||||
assign fp_regular_status.NV = is_itof_s2 & (of_before_round_s2[i] | of_after_round[i]); // overflow is invalid for I2F casts
|
||||
assign fp_regular_status.DZ = 1'b0; // no divisions
|
||||
assign fp_regular_status.OF = ~is_itof_s2 & (~in_a_type_s2[i].is_inf & (of_before_round[i] | of_after_round[i])); // inf casts no OF
|
||||
assign fp_regular_status.OF = ~is_itof_s2 & (~in_a_type_s2[i].is_inf & (of_before_round_s2[i] | of_after_round[i])); // inf casts no OF
|
||||
assign fp_regular_status.UF = uf_after_round[i] & inexact;
|
||||
assign fp_regular_status.NX = inexact;
|
||||
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
|
||||
module VX_fp_div #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
|
||||
module VX_fp_fma #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
|
||||
module VX_fp_sqrt #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
|
@ -44,7 +48,7 @@ module VX_fp_sqrt #(
|
|||
fflags_t f;
|
||||
|
||||
always @(*) begin
|
||||
dpi_fsqrt (dataa[i], frm, r, f);
|
||||
dpi_fsqrt (dataa[i], frm, r, f);
|
||||
end
|
||||
`UNUSED_VAR (f)
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ module VX_fp_type (
|
|||
);
|
||||
wire is_normal = (exp_i != 8'd0) && (exp_i != 8'hff);
|
||||
wire is_zero = (exp_i == 8'd0) && (man_i == 23'd0);
|
||||
wire is_subnormal = (exp_i == 8'd0) && !is_zero;
|
||||
wire is_subnormal = (exp_i == 8'd0) && (man_i != 23'd0);
|
||||
wire is_inf = (exp_i == 8'hff) && (man_i == 23'd0);
|
||||
wire is_nan = (exp_i == 8'hff) && (man_i != 23'd0);
|
||||
wire is_signaling = is_nan && (man_i[22] == 1'b0);
|
||||
|
|
|
@ -330,9 +330,9 @@ module VX_fpu_dpi #(
|
|||
dpi_feq (dataa[i], datab[i], result_feq[i], fflags_feq[i]);
|
||||
dpi_fmin (dataa[i], datab[i], result_fmin[i], fflags_fmin[i]);
|
||||
dpi_fmax (dataa[i], datab[i], result_fmax[i], fflags_fmax[i]);
|
||||
dpi_fsgnj (dataa[i], result_fsgnj[i]);
|
||||
dpi_fsgnjn (dataa[i], result_fsgnjn[i]);
|
||||
dpi_fsgnjx (dataa[i], result_fsgnjx[i]);
|
||||
dpi_fsgnj (dataa[i], datab[i], result_fsgnj[i]);
|
||||
dpi_fsgnjn (dataa[i], datab[i], result_fsgnjn[i]);
|
||||
dpi_fsgnjx (dataa[i], datab[i], result_fsgnjx[i]);
|
||||
result_fmv[i] = dataa[i];
|
||||
end
|
||||
end
|
||||
|
|
|
@ -18,11 +18,12 @@ module VX_index_buffer #(
|
|||
input wire [ADDRW-1:0] release_addr,
|
||||
input wire release_slot,
|
||||
|
||||
output wire full
|
||||
output wire empty,
|
||||
output wire full
|
||||
);
|
||||
reg [SIZE-1:0] free_slots, free_slots_n;
|
||||
reg [ADDRW-1:0] write_addr_r;
|
||||
reg full_r;
|
||||
reg empty_r, full_r;
|
||||
|
||||
wire free_valid;
|
||||
wire [ADDRW-1:0] free_index;
|
||||
|
@ -51,6 +52,7 @@ module VX_index_buffer #(
|
|||
if (reset) begin
|
||||
write_addr_r <= ADDRW'(1'b0);
|
||||
free_slots <= {SIZE{1'b1}};
|
||||
empty_r <= 1'b1;
|
||||
full_r <= 1'b0;
|
||||
end else begin
|
||||
if (release_slot) begin
|
||||
|
@ -60,6 +62,7 @@ module VX_index_buffer #(
|
|||
write_addr_r <= free_index;
|
||||
end
|
||||
free_slots <= free_slots_n;
|
||||
empty_r <= (& free_slots_n);
|
||||
full_r <= ~free_valid;
|
||||
end
|
||||
end
|
||||
|
@ -81,6 +84,7 @@ module VX_index_buffer #(
|
|||
);
|
||||
|
||||
assign write_addr = write_addr_r;
|
||||
assign empty = empty_r;
|
||||
assign full = full_r;
|
||||
|
||||
|
||||
endmodule
|
|
@ -67,8 +67,7 @@ module VX_skid_buffer #(
|
|||
end else begin
|
||||
if (ready_out) begin
|
||||
use_buffer <= 0;
|
||||
end
|
||||
if (push && !pop) begin
|
||||
end else if (push && valid_out_r) begin
|
||||
assert(!use_buffer);
|
||||
use_buffer <= 1;
|
||||
end
|
||||
|
@ -81,9 +80,11 @@ module VX_skid_buffer #(
|
|||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
buffer <= data_in;
|
||||
end
|
||||
if (pop) begin
|
||||
data_out_r <= use_buffer ? buffer : data_in;
|
||||
end
|
||||
if (pop && !use_buffer) begin
|
||||
data_out_r <= data_in;
|
||||
end else if (pop) begin
|
||||
data_out_r <= buffer;
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -138,5 +138,4 @@ clean-fpga-32c:
|
|||
clean-fpga-64c:
|
||||
rm -rf $(FPGA_BUILD_DIR)_64c sources.txt
|
||||
|
||||
clean: clean-ase-1c clean-ase-2c clean-ase-4c clean-fpga-1c clean-fpga-2c clean-fpga-4c clean-fpga-8c clean-fpga-16c clean-fpga-32c clean-fpga-64c
|
||||
rm sources.txt
|
||||
clean: clean-ase-1c clean-ase-2c clean-ase-4c clean-fpga-1c clean-fpga-2c clean-fpga-4c clean-fpga-8c clean-fpga-16c clean-fpga-32c clean-fpga-64c
|
|
@ -6,7 +6,7 @@
|
|||
+define+QUARTUS
|
||||
#+define+PERF_ENABLE
|
||||
|
||||
vortex_afu.json
|
||||
vortex_afu16.json
|
||||
QI:vortex_afu.qsf
|
||||
|
||||
C:sources.txt
|
|
@ -2,6 +2,8 @@
|
|||
+define+NUM_CLUSTERS=4
|
||||
#+define+L3_ENABLE=1
|
||||
|
||||
+define+GLOBAL_BLOCK_SIZE=16
|
||||
|
||||
+define+SYNTHESIS
|
||||
+define+QUARTUS
|
||||
#+define+PERF_ENABLE
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
+define+NUM_CLUSTERS=8
|
||||
#+define+L3_ENABLE=1
|
||||
|
||||
+define+GLOBAL_BLOCK_SIZE=16
|
||||
|
||||
+define+SYNTHESIS
|
||||
+define+QUARTUS
|
||||
#+define+PERF_ENABLE
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
+define+QUARTUS
|
||||
#+define+PERF_ENABLE
|
||||
|
||||
vortex_afu.json
|
||||
vortex_afu8.json
|
||||
QI:vortex_afu.qsf
|
||||
|
||||
C:sources.txt
|
56
hw/syn/opae/vortex_afu16.json
Normal file
56
hw/syn/opae/vortex_afu16.json
Normal file
|
@ -0,0 +1,56 @@
|
|||
{
|
||||
"version": 1,
|
||||
"afu-image": {
|
||||
"power": 0,
|
||||
"clock-frequency-high": "auto-200",
|
||||
"clock-frequency-low": "auto-200",
|
||||
|
||||
"cmd-mem-read": 1,
|
||||
"cmd-mem-write": 2,
|
||||
"cmd-run": 3,
|
||||
"cmd-csr-read": 4,
|
||||
"cmd-csr-write": 5,
|
||||
|
||||
"mmio-cmd-type": 10,
|
||||
"mmio-io-addr": 12,
|
||||
"mmio-mem-addr": 14,
|
||||
"mmio-data-size": 16,
|
||||
"mmio-status": 18,
|
||||
"mmio-scope-read": 20,
|
||||
"mmio-scope-write": 22,
|
||||
"mmio-csr-core": 24,
|
||||
"mmio-csr-addr": 26,
|
||||
"mmio-csr-data": 28,
|
||||
"mmio-csr-read": 30,
|
||||
|
||||
"afu-top-interface":
|
||||
{
|
||||
"class": "ccip_std_afu_avalon_mm",
|
||||
"module-ports" :
|
||||
[
|
||||
{
|
||||
"class": "cci-p",
|
||||
"params":
|
||||
{
|
||||
"clock": "uClk_usr"
|
||||
}
|
||||
},
|
||||
{
|
||||
"class": "local-memory",
|
||||
"params":
|
||||
{
|
||||
"clock": "uClk_usr"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"accelerator-clusters":
|
||||
[
|
||||
{
|
||||
"name": "vortex_afu",
|
||||
"total-contexts": 1,
|
||||
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
57
hw/syn/opae/vortex_afu8.json
Normal file
57
hw/syn/opae/vortex_afu8.json
Normal file
|
@ -0,0 +1,57 @@
|
|||
{
|
||||
"version": 1,
|
||||
"afu-image": {
|
||||
"power": 0,
|
||||
"clock-frequency-high": "auto-210",
|
||||
"clock-frequency-low": "auto-210",
|
||||
|
||||
"cmd-mem-read": 1,
|
||||
"cmd-mem-write": 2,
|
||||
"cmd-run": 3,
|
||||
"cmd-csr-read": 4,
|
||||
"cmd-csr-write": 5,
|
||||
|
||||
"mmio-cmd-type": 10,
|
||||
"mmio-io-addr": 12,
|
||||
"mmio-mem-addr": 14,
|
||||
"mmio-data-size": 16,
|
||||
"mmio-status": 18,
|
||||
"mmio-scope-read": 20,
|
||||
"mmio-scope-write": 22,
|
||||
"mmio-csr-core": 24,
|
||||
"mmio-csr-addr": 26,
|
||||
"mmio-csr-data": 28,
|
||||
"mmio-csr-read": 30,
|
||||
|
||||
"afu-top-interface":
|
||||
{
|
||||
"class": "ccip_std_afu_avalon_mm",
|
||||
"module-ports" :
|
||||
[
|
||||
{
|
||||
"class": "cci-p",
|
||||
"params":
|
||||
{
|
||||
"clock": "uClk_usr"
|
||||
}
|
||||
},
|
||||
{
|
||||
"class": "local-memory",
|
||||
"params":
|
||||
{
|
||||
"clock": "uClk_usr"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"accelerator-clusters":
|
||||
[
|
||||
{
|
||||
"name": "vortex_afu",
|
||||
"total-contexts": 1,
|
||||
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
37
hw/syn/quartus/Makefile
Normal file
37
hw/syn/quartus/Makefile
Normal file
|
@ -0,0 +1,37 @@
|
|||
.PHONY: unittest pipeline cache core vortex top1 top2 top4 top8 top16 top32 top64
|
||||
|
||||
unittest:
|
||||
$(MAKE) -C unittest clean && $(MAKE) -C unittest > unittest/build.log 2>&1 &
|
||||
|
||||
pipeline:
|
||||
$(MAKE) -C pipeline clean && $(MAKE) -C pipeline > pipeline/build.log 2>&1 &
|
||||
|
||||
cache:
|
||||
$(MAKE) -C cache clean && $(MAKE) -C cache > cache/build.log 2>&1 &
|
||||
|
||||
core:
|
||||
$(MAKE) -C core clean && $(MAKE) -C core > core/build.log 2>&1 &
|
||||
|
||||
vortex:
|
||||
$(MAKE) -C vortex clean && $(MAKE) -C vortex > vortex/build.log 2>&1 &
|
||||
|
||||
top1:
|
||||
$(MAKE) -C top1 clean && $(MAKE) -C top1 > top1/build.log 2>&1 &
|
||||
|
||||
top2:
|
||||
$(MAKE) -C top2 clean && $(MAKE) -C top2 > top2/build.log 2>&1 &
|
||||
|
||||
top4:
|
||||
$(MAKE) -C top4 clean && $(MAKE) -C top4 > top4/build.log 2>&1 &
|
||||
|
||||
top8:
|
||||
$(MAKE) -C top8 clean && $(MAKE) -C top8 > top8/build.log 2>&1 &
|
||||
|
||||
top16:
|
||||
$(MAKE) -C top16 clean && $(MAKE) -C top16 > top16/build.log 2>&1 &
|
||||
|
||||
top32:
|
||||
$(MAKE) -C top32 clean && $(MAKE) -C top32 > top32/build.log 2>&1 &
|
||||
|
||||
top64:
|
||||
$(MAKE) -C top64 clean && $(MAKE) -C top64 > top64/build.log 2>&1 &
|
|
@ -41,10 +41,6 @@ set_global_assignment -name VERILOG_MACRO NDEBUG
|
|||
set_global_assignment -name MESSAGE_DISABLE 16818
|
||||
set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
|
||||
|
||||
#set_global_assignment -name ALLOW_ANY_RAM_SIZE_FOR_RECOGNITION ON
|
||||
#set_global_assignment -name USE_HIGH_SPEED_ADDER ON
|
||||
#set_global_assignment -name MUX_RESTRUCTURE ON
|
||||
|
||||
#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||
#set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
|
||||
|
|
|
@ -1,13 +1,20 @@
|
|||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10
|
||||
|
||||
#FAMILY = "Stratix 10"
|
||||
#DEVICE = 1SX280HN2F43E2VG
|
||||
#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10
|
||||
|
||||
PROJECT = Vortex
|
||||
TOP_LEVEL_ENTITY = Vortex
|
||||
SRC_FILE = Vortex.v
|
||||
FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera/arria10;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
|
||||
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
SRC_FILE = Vortex.sv
|
||||
|
||||
# Part, Family
|
||||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
RTL_DIR=../../../rtl
|
||||
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
|
||||
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(FPU_INCLUDE)
|
||||
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue