Merge branch 'develop'

This commit is contained in:
Blaise Tine 2024-06-13 23:31:47 -07:00
commit 3f52964a94
17 changed files with 221 additions and 196 deletions

View file

@ -74,8 +74,8 @@ jobs:
- ./ci/travis_run.py ./ci/regression.sh --isa
- ./ci/travis_run.py ./ci/regression.sh --kernel
- ./ci/travis_run.py ./ci/regression.sh --synthesis
- ./ci/travis_run.py ./ci/regression.sh --opencl
- ./ci/travis_run.py ./ci/regression.sh --regression
- ./ci/travis_run.py ./ci/regression.sh --opencl
- stage: test
name: regression64
@ -85,6 +85,7 @@ jobs:
- ./ci/travis_run.py ./ci/regression.sh --kernel
- ./ci/travis_run.py ./ci/regression.sh --synthesis
- ./ci/travis_run.py ./ci/regression.sh --regression
- ./ci/travis_run.py ./ci/regression.sh --opencl
- stage: test
name: config

View file

@ -74,16 +74,20 @@ isa()
if [ "$XLEN" == "64" ]
then
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-rtlsim-64f
make -C sim/rtlsim clean && CONFIGS="-DEXT_D_ENABLE -DFPU_FPNEW" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-rtlsim-64d || true
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-rtlsim-64d
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-rtlsim-64d
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-rtlsim-64f
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-rtlsim-64f
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-rtlsim-64fx
fi
@ -110,19 +114,14 @@ regression()
make -C tests/regression run-simx
make -C tests/regression run-rtlsim
# test FPU hardware implementations
CONFIGS="-DFPU_DPI" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
CONFIGS="-DFPU_DSP" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
CONFIGS="-DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
# test global barrier
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tgbar" --cores=2
# test local barrier
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tbar"
# test global barrier
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tgbar" --cores=2
echo "regression tests done!"
}

View file

@ -225,7 +225,7 @@ def write_csv(log_filename, csv_filename, log_type):
# write to CSV
with open(csv_filename, 'w', newline='') as csv_file:
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "operands", "destination"]
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "destination", "operands"]
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for entry in entries:

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -40,7 +40,7 @@ extern "C" {
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result);
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result);
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
@ -54,15 +54,15 @@ extern "C" {
}
inline uint64_t nan_box(uint32_t value) {
#ifdef FPU_RV64F
#ifdef XLEN_64
return value | 0xffffffff00000000;
#else
#else
return value;
#endif
}
inline bool is_nan_boxed(uint64_t value) {
#ifdef FPU_RV64F
#ifdef XLEN_64
return (uint32_t(value >> 32) == 0xffffffff);
#else
__unused (value);
@ -70,15 +70,14 @@ inline bool is_nan_boxed(uint64_t value) {
#endif
}
inline int64_t check_boxing(int64_t a) {
if (!is_nan_boxed(a)) {
return nan_box(0x7fc00000); // NaN
}
return a;
inline int64_t check_boxing(int64_t a) {
if (is_nan_boxed(a))
return a;
return nan_box(0x7fc00000); // NaN
}
void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fadd_d(a, b, (*frm & 0x7), fflags);
@ -88,7 +87,7 @@ void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal*
}
void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fsub_d(a, b, (*frm & 0x7), fflags);
@ -98,19 +97,19 @@ void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal*
}
void dpi_fmul(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fmul_d(a, b, (*frm & 0x7), fflags);
if (dst_fmt) {
*result = rv_fmul_d(a, b, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fmul_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
}
}
void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
if (dst_fmt) {
*result = rv_fmadd_d(a, b, c, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
@ -118,9 +117,9 @@ void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
}
void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
if (dst_fmt) {
*result = rv_fmsub_d(a, b, c, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
@ -128,9 +127,9 @@ void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
}
void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
if (dst_fmt) {
*result = rv_fnmadd_d(a, b, c, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fnmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
@ -138,9 +137,9 @@ void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
}
void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
if (dst_fmt) {
*result = rv_fnmsub_d(a, b, c, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fnmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
@ -148,36 +147,36 @@ void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
}
void dpi_fdiv(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fdiv_d(a, b, (*frm & 0x7), fflags);
if (dst_fmt) {
*result = rv_fdiv_d(a, b, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fdiv_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
}
}
void dpi_fsqrt(bool enable, int dst_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fsqrt_d(a, (*frm & 0x7), fflags);
if (dst_fmt) {
*result = rv_fsqrt_d(a, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fsqrt_s(check_boxing(a), (*frm & 0x7), fflags));
}
}
void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
if (src_fmt) {
if (src_fmt) {
*result = rv_ftol_d(a, (*frm & 0x7), fflags);
} else {
*result = rv_ftol_s(check_boxing(a), (*frm & 0x7), fflags);
}
} else {
if (src_fmt) {
} else {
if (src_fmt) {
*result = sext<uint64_t>(rv_ftoi_d(a, (*frm & 0x7), fflags), 32);
} else {
*result = sext<uint64_t>(rv_ftoi_s(check_boxing(a), (*frm & 0x7), fflags), 32);
@ -186,61 +185,61 @@ void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVa
}
void dpi_ftou(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
if (src_fmt) {
if (src_fmt) {
*result = rv_ftolu_d(a, (*frm & 0x7), fflags);
} else {
*result = rv_ftolu_s(check_boxing(a), (*frm & 0x7), fflags);
}
} else {
if (src_fmt) {
} else {
if (src_fmt) {
*result = sext<uint64_t>(rv_ftou_d(a, (*frm & 0x7), fflags), 32);
} else {
*result = sext<uint64_t>(rv_ftou_s(check_boxing(a), (*frm & 0x7), fflags), 32);
*result = sext<uint64_t>(rv_ftou_s(check_boxing(a), (*frm & 0x7), fflags), 32);
}
}
}
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
if (src_fmt) {
if (src_fmt) {
*result = rv_ltof_d(a, (*frm & 0x7), fflags);
} else {
} else {
*result = rv_itof_d(a, (*frm & 0x7), fflags);
}
} else {
if (src_fmt) {
*result = nan_box(rv_ltof_s(a, (*frm & 0x7), fflags));
} else {
*result = nan_box(rv_itof_s(a, (*frm & 0x7), fflags));
if (src_fmt) {
*result = nan_box(rv_ltof_s(a, (*frm & 0x7), fflags));
} else {
*result = nan_box(rv_itof_s(a, (*frm & 0x7), fflags));
}
}
}
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
if (src_fmt) {
if (src_fmt) {
*result = rv_lutof_d(a, (*frm & 0x7), fflags);
} else {
} else {
*result = rv_utof_d(a, (*frm & 0x7), fflags);
}
} else {
if (src_fmt) {
if (src_fmt) {
*result = nan_box(rv_lutof_s(a, (*frm & 0x7), fflags));
} else {
} else {
*result = nan_box(rv_utof_s(a, (*frm & 0x7), fflags));
}
}
}
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_ftod((int32_t)check_boxing(a));
@ -250,90 +249,90 @@ void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result) {
}
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fclss_d(a);
} else {
*result = rv_fclss_s(check_boxing(a));
if (dst_fmt) {
*result = rv_fclss_d(a);
} else {
*result = rv_fclss_s(check_boxing(a));
}
}
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fsgnj_d(a, b);
if (dst_fmt) {
*result = rv_fsgnj_d(a, b);
} else {
*result = nan_box(rv_fsgnj_s(check_boxing(a), check_boxing(b)));
}
}
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fsgnjn_d(a, b);
if (dst_fmt) {
*result = rv_fsgnjn_d(a, b);
} else {
*result = nan_box(rv_fsgnjn_s(check_boxing(a), check_boxing(b)));
}
}
void dpi_fsgnjx(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fsgnjx_d(a, b);
if (dst_fmt) {
*result = rv_fsgnjx_d(a, b);
} else {
*result = nan_box(rv_fsgnjx_s(check_boxing(a), check_boxing(b)));
}
}
void dpi_flt(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_flt_d(a, b, fflags);
*result = rv_flt_d(a, b, fflags);
} else {
*result = rv_flt_s(check_boxing(a), check_boxing(b), fflags);
}
}
void dpi_fle(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fle_d(a, b, fflags);
if (dst_fmt) {
*result = rv_fle_d(a, b, fflags);
} else {
*result = rv_fle_s(check_boxing(a), check_boxing(b), fflags);
}
}
void dpi_feq(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_feq_d(a, b, fflags);
if (dst_fmt) {
*result = rv_feq_d(a, b, fflags);
} else {
*result = rv_feq_s(check_boxing(a), check_boxing(b), fflags);
}
}
void dpi_fmin(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fmin_d(a, b, fflags);
if (dst_fmt) {
*result = rv_fmin_d(a, b, fflags);
} else {
*result = nan_box(rv_fmin_s(check_boxing(a), check_boxing(b), fflags));
}
}
void dpi_fmax(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
if (!enable)
return;
if (dst_fmt) {
*result = rv_fmax_d(a, b, fflags);
if (dst_fmt) {
*result = rv_fmax_d(a, b, fflags);
} else {
*result = nan_box(rv_fmax_s(check_boxing(a), check_boxing(b), fflags));
}

View file

@ -40,6 +40,14 @@
`define EXT_F_ENABLE
`endif
`ifdef XLEN_64
`ifndef FPU_DSP
`ifndef EXT_D_DISABLE
`define EXT_D_ENABLE
`endif
`endif
`endif
`ifndef EXT_ZICOND_DISABLE
`define EXT_ZICOND_ENABLE
`endif
@ -248,7 +256,7 @@
// Issue width
`ifndef ISSUE_WIDTH
`define ISSUE_WIDTH 1
`define ISSUE_WIDTH `UP(`NUM_WARPS / 8)
`endif
// Number of ALU units

View file

@ -230,9 +230,9 @@
`define INST_FPU_MUL 4'b0010
`define INST_FPU_DIV 4'b0011
`define INST_FPU_SQRT 4'b0100
`define INST_FPU_CMP 4'b0101 // mod: LE=0, LT=1, EQ=2
`define INST_FPU_CMP 4'b0101 // frm: LE=0, LT=1, EQ=2
`define INST_FPU_F2F 4'b0110
`define INST_FPU_MISC 4'b0111 // mod: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
`define INST_FPU_MISC 4'b0111 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
`define INST_FPU_F2I 4'b1000
`define INST_FPU_F2U 4'b1001
`define INST_FPU_I2F 4'b1010

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -16,7 +16,7 @@
`ifdef FPU_DSP
module VX_fpu_dsp import VX_fpu_pkg::*; #(
parameter NUM_LANES = 4,
parameter NUM_LANES = 4,
parameter TAG_WIDTH = 4,
parameter OUT_BUF = 0
) (
@ -29,7 +29,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0] mask_in,
input wire [TAG_WIDTH-1:0] tag_in,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_FMT_BITS-1:0] fmt,
input wire [`INST_FRM_BITS-1:0] frm,
@ -37,7 +37,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0][`XLEN-1:0] dataa,
input wire [NUM_LANES-1:0][`XLEN-1:0] datab,
input wire [NUM_LANES-1:0][`XLEN-1:0] datac,
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags,
@ -56,22 +56,22 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
localparam RSP_DATAW = (NUM_LANES * 32) + 1 + $bits(fflags_t) + TAG_WIDTH;
`UNUSED_VAR (fmt)
`UNUSED_VAR (fmt)
wire [NUM_FPC-1:0] per_core_ready_in;
wire [NUM_FPC-1:0][NUM_LANES-1:0][31:0] per_core_result;
wire [NUM_FPC-1:0][TAG_WIDTH-1:0] per_core_tag_out;
wire [NUM_FPC-1:0] per_core_ready_out;
wire [NUM_FPC-1:0] per_core_valid_out;
wire [NUM_FPC-1:0] per_core_has_fflags;
wire [NUM_FPC-1:0] per_core_valid_out;
wire [NUM_FPC-1:0] per_core_has_fflags;
fflags_t [NUM_FPC-1:0] per_core_fflags;
wire div_ready_in, sqrt_ready_in;
wire [NUM_LANES-1:0][31:0] div_result, sqrt_result;
wire [TAG_WIDTH-1:0] div_tag_out, sqrt_tag_out;
wire div_ready_out, sqrt_ready_out;
wire div_valid_out, sqrt_valid_out;
wire div_has_fflags, sqrt_has_fflags;
wire div_valid_out, sqrt_valid_out;
wire div_has_fflags, sqrt_has_fflags;
fflags_t div_fflags, sqrt_fflags;
reg [FPC_BITS-1:0] core_select;
@ -79,7 +79,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
always @(*) begin
is_madd = 0;
is_sub = 0;
is_sub = 0;
is_neg = 0;
is_div = 0;
is_itof = 0;
@ -126,19 +126,19 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
.NUM_LANES (NUM_LANES),
.TAG_WIDTH (TAG_WIDTH)
) fpu_fma (
.clk (clk),
.reset (fma_reset),
.clk (clk),
.reset (fma_reset),
.valid_in (valid_in && (core_select == FPU_FMA)),
.ready_in (per_core_ready_in[FPU_FMA]),
.mask_in (mask_in),
.tag_in (tag_in),
.tag_in (tag_in),
.frm (frm),
.is_madd (is_madd),
.is_sub (is_sub),
.is_neg (is_neg),
.dataa (dataa_s),
.datab (datab_s),
.datac (datac_s),
.dataa (dataa_s),
.datab (datab_s),
.datac (datac_s),
.has_fflags (per_core_has_fflags[FPU_FMA]),
.fflags (per_core_fflags[FPU_FMA]),
.result (per_core_result[FPU_FMA]),
@ -151,17 +151,17 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
.NUM_LANES (NUM_LANES),
.TAG_WIDTH (TAG_WIDTH)
) fpu_div (
.clk (clk),
.reset (div_reset),
.clk (clk),
.reset (div_reset),
.valid_in (valid_in && (core_select == FPU_DIVSQRT) && is_div),
.ready_in (div_ready_in),
.mask_in (mask_in),
.tag_in (tag_in),
.frm (frm),
.dataa (dataa_s),
.datab (datab_s),
.frm (frm),
.dataa (dataa_s),
.datab (datab_s),
.has_fflags (div_has_fflags),
.fflags (div_fflags),
.fflags (div_fflags),
.result (div_result),
.tag_out (div_tag_out),
.valid_out (div_valid_out),
@ -172,14 +172,14 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
.NUM_LANES (NUM_LANES),
.TAG_WIDTH (TAG_WIDTH)
) fpu_sqrt (
.clk (clk),
.reset (sqrt_reset),
.clk (clk),
.reset (sqrt_reset),
.valid_in (valid_in && (core_select == FPU_DIVSQRT) && ~is_div),
.ready_in (sqrt_ready_in),
.mask_in (mask_in),
.tag_in (tag_in),
.frm (frm),
.dataa (dataa_s),
.frm (frm),
.dataa (dataa_s),
.has_fflags (sqrt_has_fflags),
.fflags (sqrt_fflags),
.result (sqrt_result),
@ -188,57 +188,57 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
.ready_out (sqrt_ready_out)
);
wire cvt_rt_int_in = ~is_itof;
wire cvt_rt_int_out;
wire cvt_ret_int_in = ~is_itof;
wire cvt_ret_int_out;
VX_fpu_cvt #(
.NUM_LANES (NUM_LANES),
.TAG_WIDTH (TAG_WIDTH+1)
) fpu_cvt (
.clk (clk),
.reset (cvt_reset),
.clk (clk),
.reset (cvt_reset),
.valid_in (valid_in && (core_select == FPU_CVT)),
.ready_in (per_core_ready_in[FPU_CVT]),
.mask_in (mask_in),
.tag_in ({cvt_rt_int_in, tag_in}),
.tag_in ({cvt_ret_int_in, tag_in}),
.frm (frm),
.is_itof (is_itof),
.is_signed (is_signed),
.dataa (dataa_s),
.is_itof (is_itof),
.is_signed (is_signed),
.dataa (dataa_s),
.has_fflags (per_core_has_fflags[FPU_CVT]),
.fflags (per_core_fflags[FPU_CVT]),
.result (per_core_result[FPU_CVT]),
.tag_out ({cvt_rt_int_out, per_core_tag_out[FPU_CVT]}),
.tag_out ({cvt_ret_int_out, per_core_tag_out[FPU_CVT]}),
.valid_out (per_core_valid_out[FPU_CVT]),
.ready_out (per_core_ready_out[FPU_CVT])
);
wire ncp_rt_int_in = (op_type == `INST_FPU_CMP)
|| `INST_FPU_IS_CLASS(op_type, frm)
wire ncp_ret_int_in = (op_type == `INST_FPU_CMP)
|| `INST_FPU_IS_CLASS(op_type, frm)
|| `INST_FPU_IS_MVXW(op_type, frm);
wire ncp_rt_int_out;
wire ncp_ret_int_out;
wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(op_type, frm);
wire ncp_ret_sext_out;
wire ncp_rt_sext_in = `INST_FPU_IS_MVXW(op_type, frm);
wire ncp_rt_sext_out;
VX_fpu_ncp #(
.NUM_LANES (NUM_LANES),
.TAG_WIDTH (TAG_WIDTH+2)
) fpu_ncp (
.clk (clk),
.reset (ncp_reset),
.reset (ncp_reset),
.valid_in (valid_in && (core_select == FPU_NCP)),
.ready_in (per_core_ready_in[FPU_NCP]),
.mask_in (mask_in),
.tag_in ({ncp_rt_sext_in, ncp_rt_int_in, tag_in}),
.tag_in ({ncp_ret_sext_in, ncp_ret_int_in, tag_in}),
.op_type (op_type),
.frm (frm),
.dataa (dataa_s),
.datab (datab_s),
.result (per_core_result[FPU_NCP]),
.datab (datab_s),
.result (per_core_result[FPU_NCP]),
.has_fflags (per_core_has_fflags[FPU_NCP]),
.fflags (per_core_fflags[FPU_NCP]),
.tag_out ({ncp_rt_sext_out, ncp_rt_int_out, per_core_tag_out[FPU_NCP]}),
.tag_out ({ncp_ret_sext_out, ncp_ret_int_out, per_core_tag_out[FPU_NCP]}),
.valid_out (per_core_valid_out[FPU_NCP]),
.ready_out (per_core_ready_out[FPU_NCP])
);
@ -249,20 +249,20 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
VX_stream_arb #(
.NUM_INPUTS (2),
.DATAW (RSP_DATAW),
.DATAW (RSP_DATAW),
.ARBITER ("R"),
.OUT_BUF (0)
) div_sqrt_arb (
.clk (clk),
.reset (reset),
.valid_in ({sqrt_valid_out, div_valid_out}),
.valid_in ({sqrt_valid_out, div_valid_out}),
.ready_in ({sqrt_ready_out, div_ready_out}),
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
{div_result, div_has_fflags, div_fflags, div_tag_out}}),
.data_out ({
per_core_result[FPU_DIVSQRT],
per_core_has_fflags[FPU_DIVSQRT],
per_core_fflags[FPU_DIVSQRT],
per_core_result[FPU_DIVSQRT],
per_core_has_fflags[FPU_DIVSQRT],
per_core_fflags[FPU_DIVSQRT],
per_core_tag_out[FPU_DIVSQRT]
}),
.valid_out (per_core_valid_out[FPU_DIVSQRT]),
@ -273,50 +273,48 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
///////////////////////////////////////////////////////////////////////////
reg [NUM_FPC-1:0][RSP_DATAW+2-1:0] per_core_data_out;
always @(*) begin
for (integer i = 0; i < NUM_FPC; ++i) begin
per_core_data_out[i][RSP_DATAW+1:2] = {
per_core_result[i],
per_core_has_fflags[i],
per_core_fflags[i],
per_core_result[i],
per_core_has_fflags[i],
per_core_fflags[i],
per_core_tag_out[i]
};
per_core_data_out[i][1:0] = '0;
end
per_core_data_out[FPU_CVT][1:0] = {1'b1, cvt_rt_int_out};
per_core_data_out[FPU_NCP][1:0] = {ncp_rt_sext_out, ncp_rt_int_out};
end
per_core_data_out[FPU_CVT][1:0] = {1'b1, cvt_ret_int_out};
per_core_data_out[FPU_NCP][1:0] = {ncp_ret_sext_out, ncp_ret_int_out};
end
wire [NUM_LANES-1:0][31:0] result_s;
wire [1:0] op_rt_int_out;
wire [1:0] op_ret_int_out;
`UNUSED_VAR (op_ret_int_out)
VX_stream_arb #(
.NUM_INPUTS (NUM_FPC),
.DATAW (RSP_DATAW + 2),
.DATAW (RSP_DATAW + 2),
.ARBITER ("R"),
.OUT_BUF (OUT_BUF)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (per_core_valid_out),
.valid_in (per_core_valid_out),
.ready_in (per_core_ready_out),
.data_in (per_core_data_out),
.data_out ({result_s, has_fflags, fflags, tag_out, op_rt_int_out}),
.data_out ({result_s, has_fflags, fflags, tag_out, op_ret_int_out}),
.valid_out (valid_out),
.ready_out (ready_out),
`UNUSED_PIN (sel_out)
);
`ifndef FPU_RV64F
`UNUSED_VAR (op_rt_int_out)
`endif
for (genvar i = 0; i < NUM_LANES; ++i) begin
for (genvar i = 0; i < NUM_LANES; ++i) begin
`ifdef FPU_RV64F
reg [`XLEN-1:0] result_r;
always @(*) begin
case (op_rt_int_out)
case (op_ret_int_out)
2'b11: result_r = `XLEN'($signed(result_s[i]));
2'b01: result_r = {32'h00000000, result_s[i]};
default: result_r = {32'hffffffff, result_s[i]};
@ -333,4 +331,4 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
endmodule
`endif
`endif

View file

@ -17,17 +17,9 @@
#ifndef __VX_INTRINSICS_H__
#define __VX_INTRINSICS_H__
#include <stdint.h>
#include <stddef.h>
#include <VX_types.h>
#if __riscv_xlen == 64
typedef unsigned long size_t; // 64-bit RISC-V
#elif __riscv_xlen == 32
typedef unsigned int size_t; // 32-bit RISC-V
#else
#error "Unknown RISC-V architecture"
#endif
#if defined(__clang__)
#define __UNIFORM__ __attribute__((annotate("vortex.uniform")))
#else
@ -38,12 +30,6 @@ typedef unsigned int size_t; // 32-bit RISC-V
extern "C" {
#endif
#ifdef __ASSEMBLY__
#define __ASM_STR(x) x
#else
#define __ASM_STR(x) #x
#endif
#define RISCV_CUSTOM0 0x0B
#define RISCV_CUSTOM1 0x2B
#define RISCV_CUSTOM2 0x5B
@ -110,7 +96,7 @@ extern "C" {
})
// Set thread mask
inline void vx_tmc(size_t thread_mask) {
inline void vx_tmc(int thread_mask) {
__asm__ volatile (".insn r %0, 0, 0, x0, %1, x0" :: "i"(RISCV_CUSTOM0), "r"(thread_mask));
}
@ -139,20 +125,20 @@ inline void vx_pred_n(int condition, int thread_mask) {
// Spawn warps
typedef void (*vx_wspawn_pfn)();
inline void vx_wspawn(size_t num_warps, vx_wspawn_pfn func_ptr) {
inline void vx_wspawn(int num_warps, vx_wspawn_pfn func_ptr) {
__asm__ volatile (".insn r %0, 1, 0, x0, %1, %2" :: "i"(RISCV_CUSTOM0), "r"(num_warps), "r"(func_ptr));
}
// Split on a predicate
inline int vx_split(int predicate) {
size_t ret;
int ret;
__asm__ volatile (".insn r %1, 2, 0, %0, %2, x0" : "=r"(ret) : "i"(RISCV_CUSTOM0), "r"(predicate));
return ret;
}
// Split on a not predicate
inline int vx_split_n(int predicate) {
size_t ret;
int ret;
__asm__ volatile (".insn r %1, 2, 0, %0, %2, x1" : "=r"(ret) : "i"(RISCV_CUSTOM0), "r"(predicate));
return ret;
}

View file

@ -100,14 +100,14 @@ SECTIONS
PROVIDE_HIDDEN (__tdata_end = .);
}
PROVIDE (__tdata_size = SIZEOF (.tdata));
.tbss :
.tbss ALIGN(4) :
{
PROVIDE_HIDDEN (__tbss_start = .);
PROVIDE_HIDDEN (__tbss_offset = ABSOLUTE (__tbss_start - __tdata_start));
*(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon)
PROVIDE_HIDDEN (__tbss_end = .);
}
PROVIDE (__tbss_size = SIZEOF (.tbss));
PROVIDE(__tbss_size = ALIGN(SIZEOF(.tbss), 4));
.preinit_array :
{
PROVIDE_HIDDEN (__preinit_array_start = .);

View file

@ -100,14 +100,14 @@ SECTIONS
PROVIDE_HIDDEN (__tdata_end = .);
}
PROVIDE (__tdata_size = SIZEOF (.tdata));
.tbss :
.tbss ALIGN(8) :
{
PROVIDE_HIDDEN (__tbss_start = .);
PROVIDE_HIDDEN (__tbss_offset = ABSOLUTE (__tbss_start - __tdata_start));
*(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon)
PROVIDE_HIDDEN (__tbss_end = .);
}
PROVIDE (__tbss_size = SIZEOF (.tbss));
PROVIDE(__tbss_size = ALIGN(SIZEOF(.tbss), 8));
.preinit_array :
{
PROVIDE_HIDDEN (__preinit_array_start = .);

View file

@ -1,3 +1,16 @@
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Use Ubuntu 20.04 as the base image
FROM ubuntu:20.04

20
miscs/docker/README.md Normal file
View file

@ -0,0 +1,20 @@
You can install Docker desktop on MAC or PC or Ubuntu.
- PC: https://docs.docker.com/desktop/install/windows-install
- MAC: https://docs.docker.com/desktop/install/mac-install
- Ubuntu: https://docs.docker.com/desktop/install/ubuntu
### 1- Create a Docker image from the Dockerfile
$ docker build -f Dockerfile.ubuntu -t vortex
### 2- Build the Docker image
$ docker docker run -it vortex /bin/bash
### 3- Build the project
One you login the Docker terminal, you will be in the build directory.
$ make -s
### 4- Run a simple test
$ ./ci/blackbox.sh --cores=2 --app=vecadd

View file

@ -41,8 +41,7 @@ union reg_data_t {
};
inline uint64_t nan_box(uint32_t value) {
uint64_t mask = 0xffffffff00000000;
return value | mask;
return value | 0xffffffff00000000;
}
inline bool is_nan_boxed(uint64_t value) {

View file

@ -8,9 +8,11 @@ XRT_DEVICE_INDEX ?= 0
ifeq ($(XLEN),64)
VX_CFLAGS += -march=rv64imafd -mabi=lp64d
STARTUP_ADDR ?= 0x180000000
POCL_CC_FLAGS += POCL_VORTEX_XLEN=64
else
VX_CFLAGS += -march=rv32imaf -mabi=ilp32f
STARTUP_ADDR ?= 0x80000000
POCL_CC_FLAGS += POCL_VORTEX_XLEN=32
endif
POCL_PATH ?= $(TOOLDIR)/pocl
@ -40,7 +42,7 @@ CXXFLAGS += -Wno-deprecated-declarations -Wno-unused-parameter -Wno-narrowing
CXXFLAGS += -pthread
CXXFLAGS += -I$(POCL_PATH)/include
POCL_CC_FLAGS = LLVM_PREFIX=$(LLVM_VORTEX) POCL_VORTEX_BINTOOL="$(VX_BINTOOL)" POCL_VORTEX_CFLAGS="$(VX_CFLAGS)" POCL_VORTEX_LDFLAGS="$(VX_LDFLAGS)"
POCL_CC_FLAGS += LLVM_PREFIX=$(LLVM_VORTEX) POCL_VORTEX_BINTOOL="$(VX_BINTOOL)" POCL_VORTEX_CFLAGS="$(VX_CFLAGS)" POCL_VORTEX_LDFLAGS="$(VX_LDFLAGS)"
# Debugigng
ifdef DEBUG

Binary file not shown.

Binary file not shown.

View file

@ -15,6 +15,6 @@ kernel.cl: $(SRC_DIR)/kernel.cl
KERNEL_SRCS := kernel.cl
# Usage: #nx #ny #nz #iter -i input_file [-o output_file]
OPTS ?= 64 64 8 1 -i $(SRC_DIR)/64x64x8.bin
OPTS ?= 32 32 8 1 -i $(SRC_DIR)/32x32x8.bin
include ../common.mk