mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
64-bit rtl fix
This commit is contained in:
parent
78b6e0638c
commit
5bcf24ed55
7 changed files with 166 additions and 167 deletions
|
@ -74,16 +74,20 @@ isa()
|
|||
|
||||
if [ "$XLEN" == "64" ]
|
||||
then
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DEXT_D_ENABLE -DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64d || true
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64d
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64d
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64fx
|
||||
fi
|
||||
|
||||
|
|
|
@ -225,7 +225,7 @@ def write_csv(log_filename, csv_filename, log_type):
|
|||
|
||||
# write to CSV
|
||||
with open(csv_filename, 'w', newline='') as csv_file:
|
||||
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "operands", "destination"]
|
||||
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "destination", "operands"]
|
||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for entry in entries:
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -40,7 +40,7 @@ extern "C" {
|
|||
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result);
|
||||
|
||||
|
||||
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result);
|
||||
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
|
||||
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
|
||||
|
@ -54,31 +54,21 @@ extern "C" {
|
|||
}
|
||||
|
||||
inline uint64_t nan_box(uint32_t value) {
|
||||
#ifdef FPU_RV64F
|
||||
return value | 0xffffffff00000000;
|
||||
#else
|
||||
return value;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline bool is_nan_boxed(uint64_t value) {
|
||||
#ifdef FPU_RV64F
|
||||
return (uint32_t(value >> 32) == 0xffffffff);
|
||||
#else
|
||||
__unused (value);
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline int64_t check_boxing(int64_t a) {
|
||||
if (!is_nan_boxed(a)) {
|
||||
return nan_box(0x7fc00000); // NaN
|
||||
}
|
||||
return a;
|
||||
inline int64_t check_boxing(int64_t a) {
|
||||
if (is_nan_boxed(a))
|
||||
return a;
|
||||
return nan_box(0x7fc00000); // NaN
|
||||
}
|
||||
|
||||
void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fadd_d(a, b, (*frm & 0x7), fflags);
|
||||
|
@ -88,7 +78,7 @@ void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal*
|
|||
}
|
||||
|
||||
void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsub_d(a, b, (*frm & 0x7), fflags);
|
||||
|
@ -98,19 +88,19 @@ void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal*
|
|||
}
|
||||
|
||||
void dpi_fmul(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmul_d(a, b, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmul_d(a, b, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmul_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmadd_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
|
@ -118,9 +108,9 @@ void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
|
|||
}
|
||||
|
||||
void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmsub_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
|
@ -128,9 +118,9 @@ void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
|
|||
}
|
||||
|
||||
void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (dst_fmt) {
|
||||
*result = rv_fnmadd_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fnmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
|
@ -138,9 +128,9 @@ void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
|
|||
}
|
||||
|
||||
void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (dst_fmt) {
|
||||
*result = rv_fnmsub_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fnmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
|
@ -148,36 +138,36 @@ void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
|
|||
}
|
||||
|
||||
void dpi_fdiv(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fdiv_d(a, b, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fdiv_d(a, b, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fdiv_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsqrt(bool enable, int dst_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsqrt_d(a, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsqrt_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fsqrt_s(check_boxing(a), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_ftol_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = rv_ftol_s(check_boxing(a), (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
*result = sext<uint64_t>(rv_ftoi_d(a, (*frm & 0x7), fflags), 32);
|
||||
} else {
|
||||
*result = sext<uint64_t>(rv_ftoi_s(check_boxing(a), (*frm & 0x7), fflags), 32);
|
||||
|
@ -186,61 +176,61 @@ void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVa
|
|||
}
|
||||
|
||||
void dpi_ftou(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_ftolu_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = rv_ftolu_s(check_boxing(a), (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
*result = sext<uint64_t>(rv_ftou_d(a, (*frm & 0x7), fflags), 32);
|
||||
} else {
|
||||
*result = sext<uint64_t>(rv_ftou_s(check_boxing(a), (*frm & 0x7), fflags), 32);
|
||||
*result = sext<uint64_t>(rv_ftou_s(check_boxing(a), (*frm & 0x7), fflags), 32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_ltof_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
} else {
|
||||
*result = rv_itof_d(a, (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
*result = nan_box(rv_ltof_s(a, (*frm & 0x7), fflags));
|
||||
} else {
|
||||
*result = nan_box(rv_itof_s(a, (*frm & 0x7), fflags));
|
||||
if (src_fmt) {
|
||||
*result = nan_box(rv_ltof_s(a, (*frm & 0x7), fflags));
|
||||
} else {
|
||||
*result = nan_box(rv_itof_s(a, (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_lutof_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
} else {
|
||||
*result = rv_utof_d(a, (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = nan_box(rv_lutof_s(a, (*frm & 0x7), fflags));
|
||||
} else {
|
||||
} else {
|
||||
*result = nan_box(rv_utof_s(a, (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_ftod((int32_t)check_boxing(a));
|
||||
|
@ -250,90 +240,90 @@ void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result) {
|
|||
}
|
||||
|
||||
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fclss_d(a);
|
||||
} else {
|
||||
*result = rv_fclss_s(check_boxing(a));
|
||||
if (dst_fmt) {
|
||||
*result = rv_fclss_d(a);
|
||||
} else {
|
||||
*result = rv_fclss_s(check_boxing(a));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnj_d(a, b);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnj_d(a, b);
|
||||
} else {
|
||||
*result = nan_box(rv_fsgnj_s(check_boxing(a), check_boxing(b)));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnjn_d(a, b);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnjn_d(a, b);
|
||||
} else {
|
||||
*result = nan_box(rv_fsgnjn_s(check_boxing(a), check_boxing(b)));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsgnjx(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnjx_d(a, b);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnjx_d(a, b);
|
||||
} else {
|
||||
*result = nan_box(rv_fsgnjx_s(check_boxing(a), check_boxing(b)));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_flt(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_flt_d(a, b, fflags);
|
||||
*result = rv_flt_d(a, b, fflags);
|
||||
} else {
|
||||
*result = rv_flt_s(check_boxing(a), check_boxing(b), fflags);
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fle(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fle_d(a, b, fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fle_d(a, b, fflags);
|
||||
} else {
|
||||
*result = rv_fle_s(check_boxing(a), check_boxing(b), fflags);
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_feq(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_feq_d(a, b, fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_feq_d(a, b, fflags);
|
||||
} else {
|
||||
*result = rv_feq_s(check_boxing(a), check_boxing(b), fflags);
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fmin(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmin_d(a, b, fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmin_d(a, b, fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmin_s(check_boxing(a), check_boxing(b), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fmax(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmax_d(a, b, fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmax_d(a, b, fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmax_s(check_boxing(a), check_boxing(b), fflags));
|
||||
}
|
||||
|
|
|
@ -40,6 +40,14 @@
|
|||
`define EXT_F_ENABLE
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
`ifndef FPU_DSP
|
||||
`ifndef EXT_D_DISABLE
|
||||
`define EXT_D_ENABLE
|
||||
`endif
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifndef EXT_ZICOND_DISABLE
|
||||
`define EXT_ZICOND_ENABLE
|
||||
`endif
|
||||
|
|
|
@ -230,9 +230,9 @@
|
|||
`define INST_FPU_MUL 4'b0010
|
||||
`define INST_FPU_DIV 4'b0011
|
||||
`define INST_FPU_SQRT 4'b0100
|
||||
`define INST_FPU_CMP 4'b0101 // mod: LE=0, LT=1, EQ=2
|
||||
`define INST_FPU_CMP 4'b0101 // frm: LE=0, LT=1, EQ=2
|
||||
`define INST_FPU_F2F 4'b0110
|
||||
`define INST_FPU_MISC 4'b0111 // mod: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
|
||||
`define INST_FPU_MISC 4'b0111 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
|
||||
`define INST_FPU_F2I 4'b1000
|
||||
`define INST_FPU_F2U 4'b1001
|
||||
`define INST_FPU_I2F 4'b1010
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,7 +16,7 @@
|
|||
`ifdef FPU_DSP
|
||||
|
||||
module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
||||
parameter NUM_LANES = 4,
|
||||
parameter NUM_LANES = 4,
|
||||
parameter TAG_WIDTH = 4,
|
||||
parameter OUT_BUF = 0
|
||||
) (
|
||||
|
@ -29,7 +29,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
input wire [NUM_LANES-1:0] mask_in,
|
||||
|
||||
input wire [TAG_WIDTH-1:0] tag_in,
|
||||
|
||||
|
||||
input wire [`INST_FPU_BITS-1:0] op_type,
|
||||
input wire [`INST_FMT_BITS-1:0] fmt,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
@ -37,7 +37,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
input wire [NUM_LANES-1:0][`XLEN-1:0] dataa,
|
||||
input wire [NUM_LANES-1:0][`XLEN-1:0] datab,
|
||||
input wire [NUM_LANES-1:0][`XLEN-1:0] datac,
|
||||
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
|
||||
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
||||
|
@ -56,22 +56,22 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
|
||||
localparam RSP_DATAW = (NUM_LANES * 32) + 1 + $bits(fflags_t) + TAG_WIDTH;
|
||||
|
||||
`UNUSED_VAR (fmt)
|
||||
`UNUSED_VAR (fmt)
|
||||
|
||||
wire [NUM_FPC-1:0] per_core_ready_in;
|
||||
wire [NUM_FPC-1:0][NUM_LANES-1:0][31:0] per_core_result;
|
||||
wire [NUM_FPC-1:0][TAG_WIDTH-1:0] per_core_tag_out;
|
||||
wire [NUM_FPC-1:0] per_core_ready_out;
|
||||
wire [NUM_FPC-1:0] per_core_valid_out;
|
||||
wire [NUM_FPC-1:0] per_core_has_fflags;
|
||||
wire [NUM_FPC-1:0] per_core_valid_out;
|
||||
wire [NUM_FPC-1:0] per_core_has_fflags;
|
||||
fflags_t [NUM_FPC-1:0] per_core_fflags;
|
||||
|
||||
wire div_ready_in, sqrt_ready_in;
|
||||
wire [NUM_LANES-1:0][31:0] div_result, sqrt_result;
|
||||
wire [TAG_WIDTH-1:0] div_tag_out, sqrt_tag_out;
|
||||
wire div_ready_out, sqrt_ready_out;
|
||||
wire div_valid_out, sqrt_valid_out;
|
||||
wire div_has_fflags, sqrt_has_fflags;
|
||||
wire div_valid_out, sqrt_valid_out;
|
||||
wire div_has_fflags, sqrt_has_fflags;
|
||||
fflags_t div_fflags, sqrt_fflags;
|
||||
|
||||
reg [FPC_BITS-1:0] core_select;
|
||||
|
@ -79,7 +79,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
|
||||
always @(*) begin
|
||||
is_madd = 0;
|
||||
is_sub = 0;
|
||||
is_sub = 0;
|
||||
is_neg = 0;
|
||||
is_div = 0;
|
||||
is_itof = 0;
|
||||
|
@ -126,19 +126,19 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) fpu_fma (
|
||||
.clk (clk),
|
||||
.reset (fma_reset),
|
||||
.clk (clk),
|
||||
.reset (fma_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_FMA)),
|
||||
.ready_in (per_core_ready_in[FPU_FMA]),
|
||||
.mask_in (mask_in),
|
||||
.tag_in (tag_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.is_madd (is_madd),
|
||||
.is_sub (is_sub),
|
||||
.is_neg (is_neg),
|
||||
.dataa (dataa_s),
|
||||
.datab (datab_s),
|
||||
.datac (datac_s),
|
||||
.dataa (dataa_s),
|
||||
.datab (datab_s),
|
||||
.datac (datac_s),
|
||||
.has_fflags (per_core_has_fflags[FPU_FMA]),
|
||||
.fflags (per_core_fflags[FPU_FMA]),
|
||||
.result (per_core_result[FPU_FMA]),
|
||||
|
@ -151,17 +151,17 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) fpu_div (
|
||||
.clk (clk),
|
||||
.reset (div_reset),
|
||||
.clk (clk),
|
||||
.reset (div_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_DIVSQRT) && is_div),
|
||||
.ready_in (div_ready_in),
|
||||
.mask_in (mask_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.dataa (dataa_s),
|
||||
.datab (datab_s),
|
||||
.frm (frm),
|
||||
.dataa (dataa_s),
|
||||
.datab (datab_s),
|
||||
.has_fflags (div_has_fflags),
|
||||
.fflags (div_fflags),
|
||||
.fflags (div_fflags),
|
||||
.result (div_result),
|
||||
.tag_out (div_tag_out),
|
||||
.valid_out (div_valid_out),
|
||||
|
@ -172,14 +172,14 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) fpu_sqrt (
|
||||
.clk (clk),
|
||||
.reset (sqrt_reset),
|
||||
.clk (clk),
|
||||
.reset (sqrt_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_DIVSQRT) && ~is_div),
|
||||
.ready_in (sqrt_ready_in),
|
||||
.mask_in (mask_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.dataa (dataa_s),
|
||||
.frm (frm),
|
||||
.dataa (dataa_s),
|
||||
.has_fflags (sqrt_has_fflags),
|
||||
.fflags (sqrt_fflags),
|
||||
.result (sqrt_result),
|
||||
|
@ -188,57 +188,57 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
.ready_out (sqrt_ready_out)
|
||||
);
|
||||
|
||||
wire cvt_rt_int_in = ~is_itof;
|
||||
wire cvt_rt_int_out;
|
||||
wire cvt_ret_int_in = ~is_itof;
|
||||
wire cvt_ret_int_out;
|
||||
|
||||
VX_fpu_cvt #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH+1)
|
||||
) fpu_cvt (
|
||||
.clk (clk),
|
||||
.reset (cvt_reset),
|
||||
.clk (clk),
|
||||
.reset (cvt_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_CVT)),
|
||||
.ready_in (per_core_ready_in[FPU_CVT]),
|
||||
.mask_in (mask_in),
|
||||
.tag_in ({cvt_rt_int_in, tag_in}),
|
||||
.tag_in ({cvt_ret_int_in, tag_in}),
|
||||
.frm (frm),
|
||||
.is_itof (is_itof),
|
||||
.is_signed (is_signed),
|
||||
.dataa (dataa_s),
|
||||
.is_itof (is_itof),
|
||||
.is_signed (is_signed),
|
||||
.dataa (dataa_s),
|
||||
.has_fflags (per_core_has_fflags[FPU_CVT]),
|
||||
.fflags (per_core_fflags[FPU_CVT]),
|
||||
.result (per_core_result[FPU_CVT]),
|
||||
.tag_out ({cvt_rt_int_out, per_core_tag_out[FPU_CVT]}),
|
||||
.tag_out ({cvt_ret_int_out, per_core_tag_out[FPU_CVT]}),
|
||||
.valid_out (per_core_valid_out[FPU_CVT]),
|
||||
.ready_out (per_core_ready_out[FPU_CVT])
|
||||
);
|
||||
|
||||
wire ncp_rt_int_in = (op_type == `INST_FPU_CMP)
|
||||
|| `INST_FPU_IS_CLASS(op_type, frm)
|
||||
wire ncp_ret_int_in = (op_type == `INST_FPU_CMP)
|
||||
|| `INST_FPU_IS_CLASS(op_type, frm)
|
||||
|| `INST_FPU_IS_MVXW(op_type, frm);
|
||||
wire ncp_rt_int_out;
|
||||
wire ncp_ret_int_out;
|
||||
|
||||
wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(op_type, frm);
|
||||
wire ncp_ret_sext_out;
|
||||
|
||||
wire ncp_rt_sext_in = `INST_FPU_IS_MVXW(op_type, frm);
|
||||
wire ncp_rt_sext_out;
|
||||
|
||||
VX_fpu_ncp #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH+2)
|
||||
) fpu_ncp (
|
||||
.clk (clk),
|
||||
.reset (ncp_reset),
|
||||
.reset (ncp_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_NCP)),
|
||||
.ready_in (per_core_ready_in[FPU_NCP]),
|
||||
.mask_in (mask_in),
|
||||
.tag_in ({ncp_rt_sext_in, ncp_rt_int_in, tag_in}),
|
||||
.tag_in ({ncp_ret_sext_in, ncp_ret_int_in, tag_in}),
|
||||
.op_type (op_type),
|
||||
.frm (frm),
|
||||
.dataa (dataa_s),
|
||||
.datab (datab_s),
|
||||
.result (per_core_result[FPU_NCP]),
|
||||
.datab (datab_s),
|
||||
.result (per_core_result[FPU_NCP]),
|
||||
.has_fflags (per_core_has_fflags[FPU_NCP]),
|
||||
.fflags (per_core_fflags[FPU_NCP]),
|
||||
.tag_out ({ncp_rt_sext_out, ncp_rt_int_out, per_core_tag_out[FPU_NCP]}),
|
||||
.tag_out ({ncp_ret_sext_out, ncp_ret_int_out, per_core_tag_out[FPU_NCP]}),
|
||||
.valid_out (per_core_valid_out[FPU_NCP]),
|
||||
.ready_out (per_core_ready_out[FPU_NCP])
|
||||
);
|
||||
|
@ -249,20 +249,20 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (RSP_DATAW),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (0)
|
||||
) div_sqrt_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in ({sqrt_valid_out, div_valid_out}),
|
||||
.valid_in ({sqrt_valid_out, div_valid_out}),
|
||||
.ready_in ({sqrt_ready_out, div_ready_out}),
|
||||
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
|
||||
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
|
||||
{div_result, div_has_fflags, div_fflags, div_tag_out}}),
|
||||
.data_out ({
|
||||
per_core_result[FPU_DIVSQRT],
|
||||
per_core_has_fflags[FPU_DIVSQRT],
|
||||
per_core_fflags[FPU_DIVSQRT],
|
||||
per_core_result[FPU_DIVSQRT],
|
||||
per_core_has_fflags[FPU_DIVSQRT],
|
||||
per_core_fflags[FPU_DIVSQRT],
|
||||
per_core_tag_out[FPU_DIVSQRT]
|
||||
}),
|
||||
.valid_out (per_core_valid_out[FPU_DIVSQRT]),
|
||||
|
@ -273,50 +273,48 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
reg [NUM_FPC-1:0][RSP_DATAW+2-1:0] per_core_data_out;
|
||||
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_FPC; ++i) begin
|
||||
per_core_data_out[i][RSP_DATAW+1:2] = {
|
||||
per_core_result[i],
|
||||
per_core_has_fflags[i],
|
||||
per_core_fflags[i],
|
||||
per_core_result[i],
|
||||
per_core_has_fflags[i],
|
||||
per_core_fflags[i],
|
||||
per_core_tag_out[i]
|
||||
};
|
||||
per_core_data_out[i][1:0] = '0;
|
||||
end
|
||||
per_core_data_out[FPU_CVT][1:0] = {1'b1, cvt_rt_int_out};
|
||||
per_core_data_out[FPU_NCP][1:0] = {ncp_rt_sext_out, ncp_rt_int_out};
|
||||
end
|
||||
per_core_data_out[FPU_CVT][1:0] = {1'b1, cvt_ret_int_out};
|
||||
per_core_data_out[FPU_NCP][1:0] = {ncp_ret_sext_out, ncp_ret_int_out};
|
||||
end
|
||||
|
||||
wire [NUM_LANES-1:0][31:0] result_s;
|
||||
wire [1:0] op_rt_int_out;
|
||||
|
||||
wire [1:0] op_ret_int_out;
|
||||
`UNUSED_VAR (op_ret_int_out)
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_FPC),
|
||||
.DATAW (RSP_DATAW + 2),
|
||||
.DATAW (RSP_DATAW + 2),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (OUT_BUF)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (per_core_valid_out),
|
||||
.valid_in (per_core_valid_out),
|
||||
.ready_in (per_core_ready_out),
|
||||
.data_in (per_core_data_out),
|
||||
.data_out ({result_s, has_fflags, fflags, tag_out, op_rt_int_out}),
|
||||
.data_out ({result_s, has_fflags, fflags, tag_out, op_ret_int_out}),
|
||||
.valid_out (valid_out),
|
||||
.ready_out (ready_out),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
`ifndef FPU_RV64F
|
||||
`UNUSED_VAR (op_rt_int_out)
|
||||
`endif
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`ifdef FPU_RV64F
|
||||
reg [`XLEN-1:0] result_r;
|
||||
always @(*) begin
|
||||
case (op_rt_int_out)
|
||||
case (op_ret_int_out)
|
||||
2'b11: result_r = `XLEN'($signed(result_s[i]));
|
||||
2'b01: result_r = {32'h00000000, result_s[i]};
|
||||
default: result_r = {32'hffffffff, result_s[i]};
|
||||
|
@ -333,4 +331,4 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
|
||||
endmodule
|
||||
|
||||
`endif
|
||||
`endif
|
||||
|
|
|
@ -41,8 +41,7 @@ union reg_data_t {
|
|||
};
|
||||
|
||||
inline uint64_t nan_box(uint32_t value) {
|
||||
uint64_t mask = 0xffffffff00000000;
|
||||
return value | mask;
|
||||
return value | 0xffffffff00000000;
|
||||
}
|
||||
|
||||
inline bool is_nan_boxed(uint64_t value) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue