64-bit rtl fix

This commit is contained in:
Blaise Tine 2024-06-13 06:26:45 -07:00
parent 78b6e0638c
commit 5bcf24ed55
7 changed files with 166 additions and 167 deletions

View file

@ -74,16 +74,20 @@ isa()
if [ "$XLEN" == "64" ]
then
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-rtlsim-64f
make -C sim/rtlsim clean && CONFIGS="-DEXT_D_ENABLE -DFPU_FPNEW" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-rtlsim-64d || true
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-rtlsim-64d
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-rtlsim-64d
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-rtlsim-64f
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-rtlsim-64f
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-rtlsim-64fx
fi

View file

@ -225,7 +225,7 @@ def write_csv(log_filename, csv_filename, log_type):
# write to CSV
with open(csv_filename, 'w', newline='') as csv_file:
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "operands", "destination"]
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "destination", "operands"]
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for entry in entries:

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -40,7 +40,7 @@ extern "C" {
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result);
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result);
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
@ -54,31 +54,21 @@ extern "C" {
}
inline uint64_t nan_box(uint32_t value) {
#ifdef FPU_RV64F
return value | 0xffffffff00000000;
#else
return value;
#endif
}
inline bool is_nan_boxed(uint64_t value) {
#ifdef FPU_RV64F
return (uint32_t(value >> 32) == 0xffffffff);
#else
__unused (value);
return true;
#endif
}
inline int64_t check_boxing(int64_t a) {
if (!is_nan_boxed(a)) {
return nan_box(0x7fc00000); // NaN
}
return a;
inline int64_t check_boxing(int64_t a) {
if (is_nan_boxed(a))
return a;
return nan_box(0x7fc00000); // NaN
}
void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fadd_d(a, b, (*frm & 0x7), fflags);
@ -88,7 +78,7 @@ void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal*
}
void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fsub_d(a, b, (*frm & 0x7), fflags);
@ -98,19 +88,19 @@ void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal*
}
void dpi_fmul(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fmul_d(a, b, (*frm & 0x7), fflags);
if (dst_fmt) {
*result = rv_fmul_d(a, b, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fmul_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
}
}
void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
if (dst_fmt) {
*result = rv_fmadd_d(a, b, c, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
@ -118,9 +108,9 @@ void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
}
void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
if (dst_fmt) {
*result = rv_fmsub_d(a, b, c, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
@ -128,9 +118,9 @@ void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
}
void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
if (dst_fmt) {
*result = rv_fnmadd_d(a, b, c, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fnmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
@ -138,9 +128,9 @@ void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
}
void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
if (dst_fmt) {
*result = rv_fnmsub_d(a, b, c, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fnmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
@ -148,36 +138,36 @@ void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
}
void dpi_fdiv(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fdiv_d(a, b, (*frm & 0x7), fflags);
if (dst_fmt) {
*result = rv_fdiv_d(a, b, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fdiv_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
}
}
void dpi_fsqrt(bool enable, int dst_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fsqrt_d(a, (*frm & 0x7), fflags);
if (dst_fmt) {
*result = rv_fsqrt_d(a, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fsqrt_s(check_boxing(a), (*frm & 0x7), fflags));
}
}
void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
if (src_fmt) {
if (src_fmt) {
*result = rv_ftol_d(a, (*frm & 0x7), fflags);
} else {
*result = rv_ftol_s(check_boxing(a), (*frm & 0x7), fflags);
}
} else {
if (src_fmt) {
} else {
if (src_fmt) {
*result = sext<uint64_t>(rv_ftoi_d(a, (*frm & 0x7), fflags), 32);
} else {
*result = sext<uint64_t>(rv_ftoi_s(check_boxing(a), (*frm & 0x7), fflags), 32);
@ -186,61 +176,61 @@ void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVa
}
void dpi_ftou(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
if (src_fmt) {
if (src_fmt) {
*result = rv_ftolu_d(a, (*frm & 0x7), fflags);
} else {
*result = rv_ftolu_s(check_boxing(a), (*frm & 0x7), fflags);
}
} else {
if (src_fmt) {
} else {
if (src_fmt) {
*result = sext<uint64_t>(rv_ftou_d(a, (*frm & 0x7), fflags), 32);
} else {
*result = sext<uint64_t>(rv_ftou_s(check_boxing(a), (*frm & 0x7), fflags), 32);
*result = sext<uint64_t>(rv_ftou_s(check_boxing(a), (*frm & 0x7), fflags), 32);
}
}
}
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
if (src_fmt) {
if (src_fmt) {
*result = rv_ltof_d(a, (*frm & 0x7), fflags);
} else {
} else {
*result = rv_itof_d(a, (*frm & 0x7), fflags);
}
} else {
if (src_fmt) {
*result = nan_box(rv_ltof_s(a, (*frm & 0x7), fflags));
} else {
*result = nan_box(rv_itof_s(a, (*frm & 0x7), fflags));
if (src_fmt) {
*result = nan_box(rv_ltof_s(a, (*frm & 0x7), fflags));
} else {
*result = nan_box(rv_itof_s(a, (*frm & 0x7), fflags));
}
}
}
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
if (src_fmt) {
if (src_fmt) {
*result = rv_lutof_d(a, (*frm & 0x7), fflags);
} else {
} else {
*result = rv_utof_d(a, (*frm & 0x7), fflags);
}
} else {
if (src_fmt) {
if (src_fmt) {
*result = nan_box(rv_lutof_s(a, (*frm & 0x7), fflags));
} else {
} else {
*result = nan_box(rv_utof_s(a, (*frm & 0x7), fflags));
}
}
}
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_ftod((int32_t)check_boxing(a));
@ -250,90 +240,90 @@ void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result) {
}
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fclss_d(a);
} else {
*result = rv_fclss_s(check_boxing(a));
if (dst_fmt) {
*result = rv_fclss_d(a);
} else {
*result = rv_fclss_s(check_boxing(a));
}
}
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fsgnj_d(a, b);
if (dst_fmt) {
*result = rv_fsgnj_d(a, b);
} else {
*result = nan_box(rv_fsgnj_s(check_boxing(a), check_boxing(b)));
}
}
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fsgnjn_d(a, b);
if (dst_fmt) {
*result = rv_fsgnjn_d(a, b);
} else {
*result = nan_box(rv_fsgnjn_s(check_boxing(a), check_boxing(b)));
}
}
void dpi_fsgnjx(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fsgnjx_d(a, b);
if (dst_fmt) {
*result = rv_fsgnjx_d(a, b);
} else {
*result = nan_box(rv_fsgnjx_s(check_boxing(a), check_boxing(b)));
}
}
void dpi_flt(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_flt_d(a, b, fflags);
*result = rv_flt_d(a, b, fflags);
} else {
*result = rv_flt_s(check_boxing(a), check_boxing(b), fflags);
}
}
void dpi_fle(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fle_d(a, b, fflags);
if (dst_fmt) {
*result = rv_fle_d(a, b, fflags);
} else {
*result = rv_fle_s(check_boxing(a), check_boxing(b), fflags);
}
}
void dpi_feq(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_feq_d(a, b, fflags);
if (dst_fmt) {
*result = rv_feq_d(a, b, fflags);
} else {
*result = rv_feq_s(check_boxing(a), check_boxing(b), fflags);
}
}
void dpi_fmin(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fmin_d(a, b, fflags);
if (dst_fmt) {
*result = rv_fmin_d(a, b, fflags);
} else {
*result = nan_box(rv_fmin_s(check_boxing(a), check_boxing(b), fflags));
}
}
void dpi_fmax(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fmax_d(a, b, fflags);
if (dst_fmt) {
*result = rv_fmax_d(a, b, fflags);
} else {
*result = nan_box(rv_fmax_s(check_boxing(a), check_boxing(b), fflags));
}

View file

@ -40,6 +40,14 @@
`define EXT_F_ENABLE
`endif
`ifdef XLEN_64
`ifndef FPU_DSP
`ifndef EXT_D_DISABLE
`define EXT_D_ENABLE
`endif
`endif
`endif
`ifndef EXT_ZICOND_DISABLE
`define EXT_ZICOND_ENABLE
`endif

View file

@ -230,9 +230,9 @@
`define INST_FPU_MUL 4'b0010
`define INST_FPU_DIV 4'b0011
`define INST_FPU_SQRT 4'b0100
`define INST_FPU_CMP 4'b0101 // mod: LE=0, LT=1, EQ=2
`define INST_FPU_CMP 4'b0101 // frm: LE=0, LT=1, EQ=2
`define INST_FPU_F2F 4'b0110
`define INST_FPU_MISC 4'b0111 // mod: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
`define INST_FPU_MISC 4'b0111 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
`define INST_FPU_F2I 4'b1000
`define INST_FPU_F2U 4'b1001
`define INST_FPU_I2F 4'b1010

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -16,7 +16,7 @@
`ifdef FPU_DSP
module VX_fpu_dsp import VX_fpu_pkg::*; #(
parameter NUM_LANES = 4,
parameter NUM_LANES = 4,
parameter TAG_WIDTH = 4,
parameter OUT_BUF = 0
) (
@ -29,7 +29,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0] mask_in,
input wire [TAG_WIDTH-1:0] tag_in,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_FMT_BITS-1:0] fmt,
input wire [`INST_FRM_BITS-1:0] frm,
@ -37,7 +37,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0][`XLEN-1:0] dataa,
input wire [NUM_LANES-1:0][`XLEN-1:0] datab,
input wire [NUM_LANES-1:0][`XLEN-1:0] datac,
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags,
@ -56,22 +56,22 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
localparam RSP_DATAW = (NUM_LANES * 32) + 1 + $bits(fflags_t) + TAG_WIDTH;
`UNUSED_VAR (fmt)
`UNUSED_VAR (fmt)
wire [NUM_FPC-1:0] per_core_ready_in;
wire [NUM_FPC-1:0][NUM_LANES-1:0][31:0] per_core_result;
wire [NUM_FPC-1:0][TAG_WIDTH-1:0] per_core_tag_out;
wire [NUM_FPC-1:0] per_core_ready_out;
wire [NUM_FPC-1:0] per_core_valid_out;
wire [NUM_FPC-1:0] per_core_has_fflags;
wire [NUM_FPC-1:0] per_core_valid_out;
wire [NUM_FPC-1:0] per_core_has_fflags;
fflags_t [NUM_FPC-1:0] per_core_fflags;
wire div_ready_in, sqrt_ready_in;
wire [NUM_LANES-1:0][31:0] div_result, sqrt_result;
wire [TAG_WIDTH-1:0] div_tag_out, sqrt_tag_out;
wire div_ready_out, sqrt_ready_out;
wire div_valid_out, sqrt_valid_out;
wire div_has_fflags, sqrt_has_fflags;
wire div_valid_out, sqrt_valid_out;
wire div_has_fflags, sqrt_has_fflags;
fflags_t div_fflags, sqrt_fflags;
reg [FPC_BITS-1:0] core_select;
@ -79,7 +79,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
always @(*) begin
is_madd = 0;
is_sub = 0;
is_sub = 0;
is_neg = 0;
is_div = 0;
is_itof = 0;
@ -126,19 +126,19 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
.NUM_LANES (NUM_LANES),
.TAG_WIDTH (TAG_WIDTH)
) fpu_fma (
.clk (clk),
.reset (fma_reset),
.clk (clk),
.reset (fma_reset),
.valid_in (valid_in && (core_select == FPU_FMA)),
.ready_in (per_core_ready_in[FPU_FMA]),
.mask_in (mask_in),
.tag_in (tag_in),
.tag_in (tag_in),
.frm (frm),
.is_madd (is_madd),
.is_sub (is_sub),
.is_neg (is_neg),
.dataa (dataa_s),
.datab (datab_s),
.datac (datac_s),
.dataa (dataa_s),
.datab (datab_s),
.datac (datac_s),
.has_fflags (per_core_has_fflags[FPU_FMA]),
.fflags (per_core_fflags[FPU_FMA]),
.result (per_core_result[FPU_FMA]),
@ -151,17 +151,17 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
.NUM_LANES (NUM_LANES),
.TAG_WIDTH (TAG_WIDTH)
) fpu_div (
.clk (clk),
.reset (div_reset),
.clk (clk),
.reset (div_reset),
.valid_in (valid_in && (core_select == FPU_DIVSQRT) && is_div),
.ready_in (div_ready_in),
.mask_in (mask_in),
.tag_in (tag_in),
.frm (frm),
.dataa (dataa_s),
.datab (datab_s),
.frm (frm),
.dataa (dataa_s),
.datab (datab_s),
.has_fflags (div_has_fflags),
.fflags (div_fflags),
.fflags (div_fflags),
.result (div_result),
.tag_out (div_tag_out),
.valid_out (div_valid_out),
@ -172,14 +172,14 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
.NUM_LANES (NUM_LANES),
.TAG_WIDTH (TAG_WIDTH)
) fpu_sqrt (
.clk (clk),
.reset (sqrt_reset),
.clk (clk),
.reset (sqrt_reset),
.valid_in (valid_in && (core_select == FPU_DIVSQRT) && ~is_div),
.ready_in (sqrt_ready_in),
.mask_in (mask_in),
.tag_in (tag_in),
.frm (frm),
.dataa (dataa_s),
.frm (frm),
.dataa (dataa_s),
.has_fflags (sqrt_has_fflags),
.fflags (sqrt_fflags),
.result (sqrt_result),
@ -188,57 +188,57 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
.ready_out (sqrt_ready_out)
);
wire cvt_rt_int_in = ~is_itof;
wire cvt_rt_int_out;
wire cvt_ret_int_in = ~is_itof;
wire cvt_ret_int_out;
VX_fpu_cvt #(
.NUM_LANES (NUM_LANES),
.TAG_WIDTH (TAG_WIDTH+1)
) fpu_cvt (
.clk (clk),
.reset (cvt_reset),
.clk (clk),
.reset (cvt_reset),
.valid_in (valid_in && (core_select == FPU_CVT)),
.ready_in (per_core_ready_in[FPU_CVT]),
.mask_in (mask_in),
.tag_in ({cvt_rt_int_in, tag_in}),
.tag_in ({cvt_ret_int_in, tag_in}),
.frm (frm),
.is_itof (is_itof),
.is_signed (is_signed),
.dataa (dataa_s),
.is_itof (is_itof),
.is_signed (is_signed),
.dataa (dataa_s),
.has_fflags (per_core_has_fflags[FPU_CVT]),
.fflags (per_core_fflags[FPU_CVT]),
.result (per_core_result[FPU_CVT]),
.tag_out ({cvt_rt_int_out, per_core_tag_out[FPU_CVT]}),
.tag_out ({cvt_ret_int_out, per_core_tag_out[FPU_CVT]}),
.valid_out (per_core_valid_out[FPU_CVT]),
.ready_out (per_core_ready_out[FPU_CVT])
);
wire ncp_rt_int_in = (op_type == `INST_FPU_CMP)
|| `INST_FPU_IS_CLASS(op_type, frm)
wire ncp_ret_int_in = (op_type == `INST_FPU_CMP)
|| `INST_FPU_IS_CLASS(op_type, frm)
|| `INST_FPU_IS_MVXW(op_type, frm);
wire ncp_rt_int_out;
wire ncp_ret_int_out;
wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(op_type, frm);
wire ncp_ret_sext_out;
wire ncp_rt_sext_in = `INST_FPU_IS_MVXW(op_type, frm);
wire ncp_rt_sext_out;
VX_fpu_ncp #(
.NUM_LANES (NUM_LANES),
.TAG_WIDTH (TAG_WIDTH+2)
) fpu_ncp (
.clk (clk),
.reset (ncp_reset),
.reset (ncp_reset),
.valid_in (valid_in && (core_select == FPU_NCP)),
.ready_in (per_core_ready_in[FPU_NCP]),
.mask_in (mask_in),
.tag_in ({ncp_rt_sext_in, ncp_rt_int_in, tag_in}),
.tag_in ({ncp_ret_sext_in, ncp_ret_int_in, tag_in}),
.op_type (op_type),
.frm (frm),
.dataa (dataa_s),
.datab (datab_s),
.result (per_core_result[FPU_NCP]),
.datab (datab_s),
.result (per_core_result[FPU_NCP]),
.has_fflags (per_core_has_fflags[FPU_NCP]),
.fflags (per_core_fflags[FPU_NCP]),
.tag_out ({ncp_rt_sext_out, ncp_rt_int_out, per_core_tag_out[FPU_NCP]}),
.tag_out ({ncp_ret_sext_out, ncp_ret_int_out, per_core_tag_out[FPU_NCP]}),
.valid_out (per_core_valid_out[FPU_NCP]),
.ready_out (per_core_ready_out[FPU_NCP])
);
@ -249,20 +249,20 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
VX_stream_arb #(
.NUM_INPUTS (2),
.DATAW (RSP_DATAW),
.DATAW (RSP_DATAW),
.ARBITER ("R"),
.OUT_BUF (0)
) div_sqrt_arb (
.clk (clk),
.reset (reset),
.valid_in ({sqrt_valid_out, div_valid_out}),
.valid_in ({sqrt_valid_out, div_valid_out}),
.ready_in ({sqrt_ready_out, div_ready_out}),
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
{div_result, div_has_fflags, div_fflags, div_tag_out}}),
.data_out ({
per_core_result[FPU_DIVSQRT],
per_core_has_fflags[FPU_DIVSQRT],
per_core_fflags[FPU_DIVSQRT],
per_core_result[FPU_DIVSQRT],
per_core_has_fflags[FPU_DIVSQRT],
per_core_fflags[FPU_DIVSQRT],
per_core_tag_out[FPU_DIVSQRT]
}),
.valid_out (per_core_valid_out[FPU_DIVSQRT]),
@ -273,50 +273,48 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
///////////////////////////////////////////////////////////////////////////
reg [NUM_FPC-1:0][RSP_DATAW+2-1:0] per_core_data_out;
always @(*) begin
for (integer i = 0; i < NUM_FPC; ++i) begin
per_core_data_out[i][RSP_DATAW+1:2] = {
per_core_result[i],
per_core_has_fflags[i],
per_core_fflags[i],
per_core_result[i],
per_core_has_fflags[i],
per_core_fflags[i],
per_core_tag_out[i]
};
per_core_data_out[i][1:0] = '0;
end
per_core_data_out[FPU_CVT][1:0] = {1'b1, cvt_rt_int_out};
per_core_data_out[FPU_NCP][1:0] = {ncp_rt_sext_out, ncp_rt_int_out};
end
per_core_data_out[FPU_CVT][1:0] = {1'b1, cvt_ret_int_out};
per_core_data_out[FPU_NCP][1:0] = {ncp_ret_sext_out, ncp_ret_int_out};
end
wire [NUM_LANES-1:0][31:0] result_s;
wire [1:0] op_rt_int_out;
wire [1:0] op_ret_int_out;
`UNUSED_VAR (op_ret_int_out)
VX_stream_arb #(
.NUM_INPUTS (NUM_FPC),
.DATAW (RSP_DATAW + 2),
.DATAW (RSP_DATAW + 2),
.ARBITER ("R"),
.OUT_BUF (OUT_BUF)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (per_core_valid_out),
.valid_in (per_core_valid_out),
.ready_in (per_core_ready_out),
.data_in (per_core_data_out),
.data_out ({result_s, has_fflags, fflags, tag_out, op_rt_int_out}),
.data_out ({result_s, has_fflags, fflags, tag_out, op_ret_int_out}),
.valid_out (valid_out),
.ready_out (ready_out),
`UNUSED_PIN (sel_out)
);
`ifndef FPU_RV64F
`UNUSED_VAR (op_rt_int_out)
`endif
for (genvar i = 0; i < NUM_LANES; ++i) begin
for (genvar i = 0; i < NUM_LANES; ++i) begin
`ifdef FPU_RV64F
reg [`XLEN-1:0] result_r;
always @(*) begin
case (op_rt_int_out)
case (op_ret_int_out)
2'b11: result_r = `XLEN'($signed(result_s[i]));
2'b01: result_r = {32'h00000000, result_s[i]};
default: result_r = {32'hffffffff, result_s[i]};
@ -333,4 +331,4 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
endmodule
`endif
`endif

View file

@ -41,8 +41,7 @@ union reg_data_t {
};
inline uint64_t nan_box(uint32_t value) {
uint64_t mask = 0xffffffff00000000;
return value | mask;
return value | 0xffffffff00000000;
}
inline bool is_nan_boxed(uint64_t value) {