mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'develop'
This commit is contained in:
commit
3f52964a94
17 changed files with 221 additions and 196 deletions
|
@ -74,8 +74,8 @@ jobs:
|
|||
- ./ci/travis_run.py ./ci/regression.sh --isa
|
||||
- ./ci/travis_run.py ./ci/regression.sh --kernel
|
||||
- ./ci/travis_run.py ./ci/regression.sh --synthesis
|
||||
- ./ci/travis_run.py ./ci/regression.sh --opencl
|
||||
- ./ci/travis_run.py ./ci/regression.sh --regression
|
||||
- ./ci/travis_run.py ./ci/regression.sh --opencl
|
||||
|
||||
- stage: test
|
||||
name: regression64
|
||||
|
@ -85,6 +85,7 @@ jobs:
|
|||
- ./ci/travis_run.py ./ci/regression.sh --kernel
|
||||
- ./ci/travis_run.py ./ci/regression.sh --synthesis
|
||||
- ./ci/travis_run.py ./ci/regression.sh --regression
|
||||
- ./ci/travis_run.py ./ci/regression.sh --opencl
|
||||
|
||||
- stage: test
|
||||
name: config
|
||||
|
|
|
@ -74,16 +74,20 @@ isa()
|
|||
|
||||
if [ "$XLEN" == "64" ]
|
||||
then
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DEXT_D_ENABLE -DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64d || true
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64d
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64d
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64fx
|
||||
fi
|
||||
|
||||
|
@ -110,19 +114,14 @@ regression()
|
|||
make -C tests/regression run-simx
|
||||
make -C tests/regression run-rtlsim
|
||||
|
||||
# test FPU hardware implementations
|
||||
CONFIGS="-DFPU_DPI" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
CONFIGS="-DFPU_DSP" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
CONFIGS="-DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
# test global barrier
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
|
||||
# test local barrier
|
||||
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
|
||||
./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tbar"
|
||||
|
||||
# test global barrier
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
|
||||
echo "regression tests done!"
|
||||
}
|
||||
|
||||
|
|
|
@ -225,7 +225,7 @@ def write_csv(log_filename, csv_filename, log_type):
|
|||
|
||||
# write to CSV
|
||||
with open(csv_filename, 'w', newline='') as csv_file:
|
||||
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "operands", "destination"]
|
||||
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "destination", "operands"]
|
||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for entry in entries:
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -40,7 +40,7 @@ extern "C" {
|
|||
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result);
|
||||
|
||||
|
||||
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result);
|
||||
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
|
||||
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
|
||||
|
@ -54,15 +54,15 @@ extern "C" {
|
|||
}
|
||||
|
||||
inline uint64_t nan_box(uint32_t value) {
|
||||
#ifdef FPU_RV64F
|
||||
#ifdef XLEN_64
|
||||
return value | 0xffffffff00000000;
|
||||
#else
|
||||
#else
|
||||
return value;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline bool is_nan_boxed(uint64_t value) {
|
||||
#ifdef FPU_RV64F
|
||||
#ifdef XLEN_64
|
||||
return (uint32_t(value >> 32) == 0xffffffff);
|
||||
#else
|
||||
__unused (value);
|
||||
|
@ -70,15 +70,14 @@ inline bool is_nan_boxed(uint64_t value) {
|
|||
#endif
|
||||
}
|
||||
|
||||
inline int64_t check_boxing(int64_t a) {
|
||||
if (!is_nan_boxed(a)) {
|
||||
return nan_box(0x7fc00000); // NaN
|
||||
}
|
||||
return a;
|
||||
inline int64_t check_boxing(int64_t a) {
|
||||
if (is_nan_boxed(a))
|
||||
return a;
|
||||
return nan_box(0x7fc00000); // NaN
|
||||
}
|
||||
|
||||
void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fadd_d(a, b, (*frm & 0x7), fflags);
|
||||
|
@ -88,7 +87,7 @@ void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal*
|
|||
}
|
||||
|
||||
void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsub_d(a, b, (*frm & 0x7), fflags);
|
||||
|
@ -98,19 +97,19 @@ void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal*
|
|||
}
|
||||
|
||||
void dpi_fmul(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmul_d(a, b, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmul_d(a, b, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmul_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmadd_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
|
@ -118,9 +117,9 @@ void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
|
|||
}
|
||||
|
||||
void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmsub_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
|
@ -128,9 +127,9 @@ void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
|
|||
}
|
||||
|
||||
void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (dst_fmt) {
|
||||
*result = rv_fnmadd_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fnmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
|
@ -138,9 +137,9 @@ void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
|
|||
}
|
||||
|
||||
void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (dst_fmt) {
|
||||
*result = rv_fnmsub_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fnmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
|
@ -148,36 +147,36 @@ void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const
|
|||
}
|
||||
|
||||
void dpi_fdiv(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fdiv_d(a, b, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fdiv_d(a, b, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fdiv_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsqrt(bool enable, int dst_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsqrt_d(a, (*frm & 0x7), fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsqrt_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fsqrt_s(check_boxing(a), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_ftol_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = rv_ftol_s(check_boxing(a), (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
*result = sext<uint64_t>(rv_ftoi_d(a, (*frm & 0x7), fflags), 32);
|
||||
} else {
|
||||
*result = sext<uint64_t>(rv_ftoi_s(check_boxing(a), (*frm & 0x7), fflags), 32);
|
||||
|
@ -186,61 +185,61 @@ void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVa
|
|||
}
|
||||
|
||||
void dpi_ftou(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_ftolu_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = rv_ftolu_s(check_boxing(a), (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
*result = sext<uint64_t>(rv_ftou_d(a, (*frm & 0x7), fflags), 32);
|
||||
} else {
|
||||
*result = sext<uint64_t>(rv_ftou_s(check_boxing(a), (*frm & 0x7), fflags), 32);
|
||||
*result = sext<uint64_t>(rv_ftou_s(check_boxing(a), (*frm & 0x7), fflags), 32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_ltof_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
} else {
|
||||
*result = rv_itof_d(a, (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
*result = nan_box(rv_ltof_s(a, (*frm & 0x7), fflags));
|
||||
} else {
|
||||
*result = nan_box(rv_itof_s(a, (*frm & 0x7), fflags));
|
||||
if (src_fmt) {
|
||||
*result = nan_box(rv_ltof_s(a, (*frm & 0x7), fflags));
|
||||
} else {
|
||||
*result = nan_box(rv_itof_s(a, (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_lutof_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
} else {
|
||||
*result = rv_utof_d(a, (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = nan_box(rv_lutof_s(a, (*frm & 0x7), fflags));
|
||||
} else {
|
||||
} else {
|
||||
*result = nan_box(rv_utof_s(a, (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_ftod((int32_t)check_boxing(a));
|
||||
|
@ -250,90 +249,90 @@ void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result) {
|
|||
}
|
||||
|
||||
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fclss_d(a);
|
||||
} else {
|
||||
*result = rv_fclss_s(check_boxing(a));
|
||||
if (dst_fmt) {
|
||||
*result = rv_fclss_d(a);
|
||||
} else {
|
||||
*result = rv_fclss_s(check_boxing(a));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnj_d(a, b);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnj_d(a, b);
|
||||
} else {
|
||||
*result = nan_box(rv_fsgnj_s(check_boxing(a), check_boxing(b)));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnjn_d(a, b);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnjn_d(a, b);
|
||||
} else {
|
||||
*result = nan_box(rv_fsgnjn_s(check_boxing(a), check_boxing(b)));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsgnjx(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnjx_d(a, b);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnjx_d(a, b);
|
||||
} else {
|
||||
*result = nan_box(rv_fsgnjx_s(check_boxing(a), check_boxing(b)));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_flt(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_flt_d(a, b, fflags);
|
||||
*result = rv_flt_d(a, b, fflags);
|
||||
} else {
|
||||
*result = rv_flt_s(check_boxing(a), check_boxing(b), fflags);
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fle(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fle_d(a, b, fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fle_d(a, b, fflags);
|
||||
} else {
|
||||
*result = rv_fle_s(check_boxing(a), check_boxing(b), fflags);
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_feq(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_feq_d(a, b, fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_feq_d(a, b, fflags);
|
||||
} else {
|
||||
*result = rv_feq_s(check_boxing(a), check_boxing(b), fflags);
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fmin(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmin_d(a, b, fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmin_d(a, b, fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmin_s(check_boxing(a), check_boxing(b), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fmax(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmax_d(a, b, fflags);
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmax_d(a, b, fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmax_s(check_boxing(a), check_boxing(b), fflags));
|
||||
}
|
||||
|
|
|
@ -40,6 +40,14 @@
|
|||
`define EXT_F_ENABLE
|
||||
`endif
|
||||
|
||||
`ifdef XLEN_64
|
||||
`ifndef FPU_DSP
|
||||
`ifndef EXT_D_DISABLE
|
||||
`define EXT_D_ENABLE
|
||||
`endif
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifndef EXT_ZICOND_DISABLE
|
||||
`define EXT_ZICOND_ENABLE
|
||||
`endif
|
||||
|
@ -248,7 +256,7 @@
|
|||
|
||||
// Issue width
|
||||
`ifndef ISSUE_WIDTH
|
||||
`define ISSUE_WIDTH 1
|
||||
`define ISSUE_WIDTH `UP(`NUM_WARPS / 8)
|
||||
`endif
|
||||
|
||||
// Number of ALU units
|
||||
|
|
|
@ -230,9 +230,9 @@
|
|||
`define INST_FPU_MUL 4'b0010
|
||||
`define INST_FPU_DIV 4'b0011
|
||||
`define INST_FPU_SQRT 4'b0100
|
||||
`define INST_FPU_CMP 4'b0101 // mod: LE=0, LT=1, EQ=2
|
||||
`define INST_FPU_CMP 4'b0101 // frm: LE=0, LT=1, EQ=2
|
||||
`define INST_FPU_F2F 4'b0110
|
||||
`define INST_FPU_MISC 4'b0111 // mod: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
|
||||
`define INST_FPU_MISC 4'b0111 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
|
||||
`define INST_FPU_F2I 4'b1000
|
||||
`define INST_FPU_F2U 4'b1001
|
||||
`define INST_FPU_I2F 4'b1010
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,7 +16,7 @@
|
|||
`ifdef FPU_DSP
|
||||
|
||||
module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
||||
parameter NUM_LANES = 4,
|
||||
parameter NUM_LANES = 4,
|
||||
parameter TAG_WIDTH = 4,
|
||||
parameter OUT_BUF = 0
|
||||
) (
|
||||
|
@ -29,7 +29,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
input wire [NUM_LANES-1:0] mask_in,
|
||||
|
||||
input wire [TAG_WIDTH-1:0] tag_in,
|
||||
|
||||
|
||||
input wire [`INST_FPU_BITS-1:0] op_type,
|
||||
input wire [`INST_FMT_BITS-1:0] fmt,
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
@ -37,7 +37,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
input wire [NUM_LANES-1:0][`XLEN-1:0] dataa,
|
||||
input wire [NUM_LANES-1:0][`XLEN-1:0] datab,
|
||||
input wire [NUM_LANES-1:0][`XLEN-1:0] datac,
|
||||
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
|
||||
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`FP_FLAGS_BITS-1:0] fflags,
|
||||
|
@ -56,22 +56,22 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
|
||||
localparam RSP_DATAW = (NUM_LANES * 32) + 1 + $bits(fflags_t) + TAG_WIDTH;
|
||||
|
||||
`UNUSED_VAR (fmt)
|
||||
`UNUSED_VAR (fmt)
|
||||
|
||||
wire [NUM_FPC-1:0] per_core_ready_in;
|
||||
wire [NUM_FPC-1:0][NUM_LANES-1:0][31:0] per_core_result;
|
||||
wire [NUM_FPC-1:0][TAG_WIDTH-1:0] per_core_tag_out;
|
||||
wire [NUM_FPC-1:0] per_core_ready_out;
|
||||
wire [NUM_FPC-1:0] per_core_valid_out;
|
||||
wire [NUM_FPC-1:0] per_core_has_fflags;
|
||||
wire [NUM_FPC-1:0] per_core_valid_out;
|
||||
wire [NUM_FPC-1:0] per_core_has_fflags;
|
||||
fflags_t [NUM_FPC-1:0] per_core_fflags;
|
||||
|
||||
wire div_ready_in, sqrt_ready_in;
|
||||
wire [NUM_LANES-1:0][31:0] div_result, sqrt_result;
|
||||
wire [TAG_WIDTH-1:0] div_tag_out, sqrt_tag_out;
|
||||
wire div_ready_out, sqrt_ready_out;
|
||||
wire div_valid_out, sqrt_valid_out;
|
||||
wire div_has_fflags, sqrt_has_fflags;
|
||||
wire div_valid_out, sqrt_valid_out;
|
||||
wire div_has_fflags, sqrt_has_fflags;
|
||||
fflags_t div_fflags, sqrt_fflags;
|
||||
|
||||
reg [FPC_BITS-1:0] core_select;
|
||||
|
@ -79,7 +79,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
|
||||
always @(*) begin
|
||||
is_madd = 0;
|
||||
is_sub = 0;
|
||||
is_sub = 0;
|
||||
is_neg = 0;
|
||||
is_div = 0;
|
||||
is_itof = 0;
|
||||
|
@ -126,19 +126,19 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) fpu_fma (
|
||||
.clk (clk),
|
||||
.reset (fma_reset),
|
||||
.clk (clk),
|
||||
.reset (fma_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_FMA)),
|
||||
.ready_in (per_core_ready_in[FPU_FMA]),
|
||||
.mask_in (mask_in),
|
||||
.tag_in (tag_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.is_madd (is_madd),
|
||||
.is_sub (is_sub),
|
||||
.is_neg (is_neg),
|
||||
.dataa (dataa_s),
|
||||
.datab (datab_s),
|
||||
.datac (datac_s),
|
||||
.dataa (dataa_s),
|
||||
.datab (datab_s),
|
||||
.datac (datac_s),
|
||||
.has_fflags (per_core_has_fflags[FPU_FMA]),
|
||||
.fflags (per_core_fflags[FPU_FMA]),
|
||||
.result (per_core_result[FPU_FMA]),
|
||||
|
@ -151,17 +151,17 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) fpu_div (
|
||||
.clk (clk),
|
||||
.reset (div_reset),
|
||||
.clk (clk),
|
||||
.reset (div_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_DIVSQRT) && is_div),
|
||||
.ready_in (div_ready_in),
|
||||
.mask_in (mask_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.dataa (dataa_s),
|
||||
.datab (datab_s),
|
||||
.frm (frm),
|
||||
.dataa (dataa_s),
|
||||
.datab (datab_s),
|
||||
.has_fflags (div_has_fflags),
|
||||
.fflags (div_fflags),
|
||||
.fflags (div_fflags),
|
||||
.result (div_result),
|
||||
.tag_out (div_tag_out),
|
||||
.valid_out (div_valid_out),
|
||||
|
@ -172,14 +172,14 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) fpu_sqrt (
|
||||
.clk (clk),
|
||||
.reset (sqrt_reset),
|
||||
.clk (clk),
|
||||
.reset (sqrt_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_DIVSQRT) && ~is_div),
|
||||
.ready_in (sqrt_ready_in),
|
||||
.mask_in (mask_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.dataa (dataa_s),
|
||||
.frm (frm),
|
||||
.dataa (dataa_s),
|
||||
.has_fflags (sqrt_has_fflags),
|
||||
.fflags (sqrt_fflags),
|
||||
.result (sqrt_result),
|
||||
|
@ -188,57 +188,57 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
.ready_out (sqrt_ready_out)
|
||||
);
|
||||
|
||||
wire cvt_rt_int_in = ~is_itof;
|
||||
wire cvt_rt_int_out;
|
||||
wire cvt_ret_int_in = ~is_itof;
|
||||
wire cvt_ret_int_out;
|
||||
|
||||
VX_fpu_cvt #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH+1)
|
||||
) fpu_cvt (
|
||||
.clk (clk),
|
||||
.reset (cvt_reset),
|
||||
.clk (clk),
|
||||
.reset (cvt_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_CVT)),
|
||||
.ready_in (per_core_ready_in[FPU_CVT]),
|
||||
.mask_in (mask_in),
|
||||
.tag_in ({cvt_rt_int_in, tag_in}),
|
||||
.tag_in ({cvt_ret_int_in, tag_in}),
|
||||
.frm (frm),
|
||||
.is_itof (is_itof),
|
||||
.is_signed (is_signed),
|
||||
.dataa (dataa_s),
|
||||
.is_itof (is_itof),
|
||||
.is_signed (is_signed),
|
||||
.dataa (dataa_s),
|
||||
.has_fflags (per_core_has_fflags[FPU_CVT]),
|
||||
.fflags (per_core_fflags[FPU_CVT]),
|
||||
.result (per_core_result[FPU_CVT]),
|
||||
.tag_out ({cvt_rt_int_out, per_core_tag_out[FPU_CVT]}),
|
||||
.tag_out ({cvt_ret_int_out, per_core_tag_out[FPU_CVT]}),
|
||||
.valid_out (per_core_valid_out[FPU_CVT]),
|
||||
.ready_out (per_core_ready_out[FPU_CVT])
|
||||
);
|
||||
|
||||
wire ncp_rt_int_in = (op_type == `INST_FPU_CMP)
|
||||
|| `INST_FPU_IS_CLASS(op_type, frm)
|
||||
wire ncp_ret_int_in = (op_type == `INST_FPU_CMP)
|
||||
|| `INST_FPU_IS_CLASS(op_type, frm)
|
||||
|| `INST_FPU_IS_MVXW(op_type, frm);
|
||||
wire ncp_rt_int_out;
|
||||
wire ncp_ret_int_out;
|
||||
|
||||
wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(op_type, frm);
|
||||
wire ncp_ret_sext_out;
|
||||
|
||||
wire ncp_rt_sext_in = `INST_FPU_IS_MVXW(op_type, frm);
|
||||
wire ncp_rt_sext_out;
|
||||
|
||||
VX_fpu_ncp #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAG_WIDTH (TAG_WIDTH+2)
|
||||
) fpu_ncp (
|
||||
.clk (clk),
|
||||
.reset (ncp_reset),
|
||||
.reset (ncp_reset),
|
||||
.valid_in (valid_in && (core_select == FPU_NCP)),
|
||||
.ready_in (per_core_ready_in[FPU_NCP]),
|
||||
.mask_in (mask_in),
|
||||
.tag_in ({ncp_rt_sext_in, ncp_rt_int_in, tag_in}),
|
||||
.tag_in ({ncp_ret_sext_in, ncp_ret_int_in, tag_in}),
|
||||
.op_type (op_type),
|
||||
.frm (frm),
|
||||
.dataa (dataa_s),
|
||||
.datab (datab_s),
|
||||
.result (per_core_result[FPU_NCP]),
|
||||
.datab (datab_s),
|
||||
.result (per_core_result[FPU_NCP]),
|
||||
.has_fflags (per_core_has_fflags[FPU_NCP]),
|
||||
.fflags (per_core_fflags[FPU_NCP]),
|
||||
.tag_out ({ncp_rt_sext_out, ncp_rt_int_out, per_core_tag_out[FPU_NCP]}),
|
||||
.tag_out ({ncp_ret_sext_out, ncp_ret_int_out, per_core_tag_out[FPU_NCP]}),
|
||||
.valid_out (per_core_valid_out[FPU_NCP]),
|
||||
.ready_out (per_core_ready_out[FPU_NCP])
|
||||
);
|
||||
|
@ -249,20 +249,20 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (2),
|
||||
.DATAW (RSP_DATAW),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (0)
|
||||
) div_sqrt_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in ({sqrt_valid_out, div_valid_out}),
|
||||
.valid_in ({sqrt_valid_out, div_valid_out}),
|
||||
.ready_in ({sqrt_ready_out, div_ready_out}),
|
||||
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
|
||||
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
|
||||
{div_result, div_has_fflags, div_fflags, div_tag_out}}),
|
||||
.data_out ({
|
||||
per_core_result[FPU_DIVSQRT],
|
||||
per_core_has_fflags[FPU_DIVSQRT],
|
||||
per_core_fflags[FPU_DIVSQRT],
|
||||
per_core_result[FPU_DIVSQRT],
|
||||
per_core_has_fflags[FPU_DIVSQRT],
|
||||
per_core_fflags[FPU_DIVSQRT],
|
||||
per_core_tag_out[FPU_DIVSQRT]
|
||||
}),
|
||||
.valid_out (per_core_valid_out[FPU_DIVSQRT]),
|
||||
|
@ -273,50 +273,48 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
reg [NUM_FPC-1:0][RSP_DATAW+2-1:0] per_core_data_out;
|
||||
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_FPC; ++i) begin
|
||||
per_core_data_out[i][RSP_DATAW+1:2] = {
|
||||
per_core_result[i],
|
||||
per_core_has_fflags[i],
|
||||
per_core_fflags[i],
|
||||
per_core_result[i],
|
||||
per_core_has_fflags[i],
|
||||
per_core_fflags[i],
|
||||
per_core_tag_out[i]
|
||||
};
|
||||
per_core_data_out[i][1:0] = '0;
|
||||
end
|
||||
per_core_data_out[FPU_CVT][1:0] = {1'b1, cvt_rt_int_out};
|
||||
per_core_data_out[FPU_NCP][1:0] = {ncp_rt_sext_out, ncp_rt_int_out};
|
||||
end
|
||||
per_core_data_out[FPU_CVT][1:0] = {1'b1, cvt_ret_int_out};
|
||||
per_core_data_out[FPU_NCP][1:0] = {ncp_ret_sext_out, ncp_ret_int_out};
|
||||
end
|
||||
|
||||
wire [NUM_LANES-1:0][31:0] result_s;
|
||||
wire [1:0] op_rt_int_out;
|
||||
|
||||
wire [1:0] op_ret_int_out;
|
||||
`UNUSED_VAR (op_ret_int_out)
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_FPC),
|
||||
.DATAW (RSP_DATAW + 2),
|
||||
.DATAW (RSP_DATAW + 2),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (OUT_BUF)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (per_core_valid_out),
|
||||
.valid_in (per_core_valid_out),
|
||||
.ready_in (per_core_ready_out),
|
||||
.data_in (per_core_data_out),
|
||||
.data_out ({result_s, has_fflags, fflags, tag_out, op_rt_int_out}),
|
||||
.data_out ({result_s, has_fflags, fflags, tag_out, op_ret_int_out}),
|
||||
.valid_out (valid_out),
|
||||
.ready_out (ready_out),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
`ifndef FPU_RV64F
|
||||
`UNUSED_VAR (op_rt_int_out)
|
||||
`endif
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`ifdef FPU_RV64F
|
||||
reg [`XLEN-1:0] result_r;
|
||||
always @(*) begin
|
||||
case (op_rt_int_out)
|
||||
case (op_ret_int_out)
|
||||
2'b11: result_r = `XLEN'($signed(result_s[i]));
|
||||
2'b01: result_r = {32'h00000000, result_s[i]};
|
||||
default: result_r = {32'hffffffff, result_s[i]};
|
||||
|
@ -333,4 +331,4 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
|||
|
||||
endmodule
|
||||
|
||||
`endif
|
||||
`endif
|
||||
|
|
|
@ -17,17 +17,9 @@
|
|||
#ifndef __VX_INTRINSICS_H__
|
||||
#define __VX_INTRINSICS_H__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <VX_types.h>
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
typedef unsigned long size_t; // 64-bit RISC-V
|
||||
#elif __riscv_xlen == 32
|
||||
typedef unsigned int size_t; // 32-bit RISC-V
|
||||
#else
|
||||
#error "Unknown RISC-V architecture"
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
#define __UNIFORM__ __attribute__((annotate("vortex.uniform")))
|
||||
#else
|
||||
|
@ -38,12 +30,6 @@ typedef unsigned int size_t; // 32-bit RISC-V
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
#define __ASM_STR(x) x
|
||||
#else
|
||||
#define __ASM_STR(x) #x
|
||||
#endif
|
||||
|
||||
#define RISCV_CUSTOM0 0x0B
|
||||
#define RISCV_CUSTOM1 0x2B
|
||||
#define RISCV_CUSTOM2 0x5B
|
||||
|
@ -110,7 +96,7 @@ extern "C" {
|
|||
})
|
||||
|
||||
// Set thread mask
|
||||
inline void vx_tmc(size_t thread_mask) {
|
||||
inline void vx_tmc(int thread_mask) {
|
||||
__asm__ volatile (".insn r %0, 0, 0, x0, %1, x0" :: "i"(RISCV_CUSTOM0), "r"(thread_mask));
|
||||
}
|
||||
|
||||
|
@ -139,20 +125,20 @@ inline void vx_pred_n(int condition, int thread_mask) {
|
|||
|
||||
// Spawn warps
|
||||
typedef void (*vx_wspawn_pfn)();
|
||||
inline void vx_wspawn(size_t num_warps, vx_wspawn_pfn func_ptr) {
|
||||
inline void vx_wspawn(int num_warps, vx_wspawn_pfn func_ptr) {
|
||||
__asm__ volatile (".insn r %0, 1, 0, x0, %1, %2" :: "i"(RISCV_CUSTOM0), "r"(num_warps), "r"(func_ptr));
|
||||
}
|
||||
|
||||
// Split on a predicate
|
||||
inline int vx_split(int predicate) {
|
||||
size_t ret;
|
||||
int ret;
|
||||
__asm__ volatile (".insn r %1, 2, 0, %0, %2, x0" : "=r"(ret) : "i"(RISCV_CUSTOM0), "r"(predicate));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Split on a not predicate
|
||||
inline int vx_split_n(int predicate) {
|
||||
size_t ret;
|
||||
int ret;
|
||||
__asm__ volatile (".insn r %1, 2, 0, %0, %2, x1" : "=r"(ret) : "i"(RISCV_CUSTOM0), "r"(predicate));
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -100,14 +100,14 @@ SECTIONS
|
|||
PROVIDE_HIDDEN (__tdata_end = .);
|
||||
}
|
||||
PROVIDE (__tdata_size = SIZEOF (.tdata));
|
||||
.tbss :
|
||||
.tbss ALIGN(4) :
|
||||
{
|
||||
PROVIDE_HIDDEN (__tbss_start = .);
|
||||
PROVIDE_HIDDEN (__tbss_offset = ABSOLUTE (__tbss_start - __tdata_start));
|
||||
*(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon)
|
||||
PROVIDE_HIDDEN (__tbss_end = .);
|
||||
}
|
||||
PROVIDE (__tbss_size = SIZEOF (.tbss));
|
||||
PROVIDE(__tbss_size = ALIGN(SIZEOF(.tbss), 4));
|
||||
.preinit_array :
|
||||
{
|
||||
PROVIDE_HIDDEN (__preinit_array_start = .);
|
||||
|
|
|
@ -100,14 +100,14 @@ SECTIONS
|
|||
PROVIDE_HIDDEN (__tdata_end = .);
|
||||
}
|
||||
PROVIDE (__tdata_size = SIZEOF (.tdata));
|
||||
.tbss :
|
||||
.tbss ALIGN(8) :
|
||||
{
|
||||
PROVIDE_HIDDEN (__tbss_start = .);
|
||||
PROVIDE_HIDDEN (__tbss_offset = ABSOLUTE (__tbss_start - __tdata_start));
|
||||
*(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon)
|
||||
PROVIDE_HIDDEN (__tbss_end = .);
|
||||
}
|
||||
PROVIDE (__tbss_size = SIZEOF (.tbss));
|
||||
PROVIDE(__tbss_size = ALIGN(SIZEOF(.tbss), 8));
|
||||
.preinit_array :
|
||||
{
|
||||
PROVIDE_HIDDEN (__preinit_array_start = .);
|
||||
|
|
|
@ -1,3 +1,16 @@
|
|||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Use Ubuntu 20.04 as the base image
|
||||
FROM ubuntu:20.04
|
||||
|
||||
|
|
20
miscs/docker/README.md
Normal file
20
miscs/docker/README.md
Normal file
|
@ -0,0 +1,20 @@
|
|||
You can install Docker desktop on MAC or PC or Ubuntu.
|
||||
|
||||
- PC: https://docs.docker.com/desktop/install/windows-install
|
||||
- MAC: https://docs.docker.com/desktop/install/mac-install
|
||||
- Ubuntu: https://docs.docker.com/desktop/install/ubuntu
|
||||
|
||||
### 1- Create a Docker image from the Dockerfile
|
||||
$ docker build -f Dockerfile.ubuntu -t vortex
|
||||
|
||||
### 2- Build the Docker image
|
||||
$ docker docker run -it vortex /bin/bash
|
||||
|
||||
### 3- Build the project
|
||||
One you login the Docker terminal, you will be in the build directory.
|
||||
|
||||
$ make -s
|
||||
|
||||
### 4- Run a simple test
|
||||
|
||||
$ ./ci/blackbox.sh --cores=2 --app=vecadd
|
|
@ -41,8 +41,7 @@ union reg_data_t {
|
|||
};
|
||||
|
||||
inline uint64_t nan_box(uint32_t value) {
|
||||
uint64_t mask = 0xffffffff00000000;
|
||||
return value | mask;
|
||||
return value | 0xffffffff00000000;
|
||||
}
|
||||
|
||||
inline bool is_nan_boxed(uint64_t value) {
|
||||
|
|
|
@ -8,9 +8,11 @@ XRT_DEVICE_INDEX ?= 0
|
|||
ifeq ($(XLEN),64)
|
||||
VX_CFLAGS += -march=rv64imafd -mabi=lp64d
|
||||
STARTUP_ADDR ?= 0x180000000
|
||||
POCL_CC_FLAGS += POCL_VORTEX_XLEN=64
|
||||
else
|
||||
VX_CFLAGS += -march=rv32imaf -mabi=ilp32f
|
||||
STARTUP_ADDR ?= 0x80000000
|
||||
POCL_CC_FLAGS += POCL_VORTEX_XLEN=32
|
||||
endif
|
||||
|
||||
POCL_PATH ?= $(TOOLDIR)/pocl
|
||||
|
@ -40,7 +42,7 @@ CXXFLAGS += -Wno-deprecated-declarations -Wno-unused-parameter -Wno-narrowing
|
|||
CXXFLAGS += -pthread
|
||||
CXXFLAGS += -I$(POCL_PATH)/include
|
||||
|
||||
POCL_CC_FLAGS = LLVM_PREFIX=$(LLVM_VORTEX) POCL_VORTEX_BINTOOL="$(VX_BINTOOL)" POCL_VORTEX_CFLAGS="$(VX_CFLAGS)" POCL_VORTEX_LDFLAGS="$(VX_LDFLAGS)"
|
||||
POCL_CC_FLAGS += LLVM_PREFIX=$(LLVM_VORTEX) POCL_VORTEX_BINTOOL="$(VX_BINTOOL)" POCL_VORTEX_CFLAGS="$(VX_CFLAGS)" POCL_VORTEX_LDFLAGS="$(VX_LDFLAGS)"
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
|
|
BIN
tests/opencl/stencil/32x32x8.bin
Normal file
BIN
tests/opencl/stencil/32x32x8.bin
Normal file
Binary file not shown.
BIN
tests/opencl/stencil/32x32x8.gold
Normal file
BIN
tests/opencl/stencil/32x32x8.gold
Normal file
Binary file not shown.
|
@ -15,6 +15,6 @@ kernel.cl: $(SRC_DIR)/kernel.cl
|
|||
KERNEL_SRCS := kernel.cl
|
||||
|
||||
# Usage: #nx #ny #nz #iter -i input_file [-o output_file]
|
||||
OPTS ?= 64 64 8 1 -i $(SRC_DIR)/64x64x8.bin
|
||||
OPTS ?= 32 32 8 1 -i $(SRC_DIR)/32x32x8.bin
|
||||
|
||||
include ../common.mk
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue