fixed fp_noncomp bug, ci toolchain script update, increased DRAM latency to 100 cycles

This commit is contained in:
Blaise Tine 2020-11-23 11:59:40 -08:00
parent e281d32138
commit 2d4fef6dd6
6 changed files with 50 additions and 28 deletions

View file

@ -5,6 +5,8 @@ set -e
REPOSITORY=https://github.com/vortexgpgpu/vortex-toolchain-prebuilt/raw/master
DESTDIR="${DESTDIR:=/opt}"
riscv()
{
for x in {a..o}
@ -14,7 +16,7 @@ riscv()
cat riscv-gnu-toolchain.tar.bz2.parta* > riscv-gnu-toolchain.tar.bz2
tar -xvf riscv-gnu-toolchain.tar.bz2
rm -f riscv-gnu-toolchain.tar.bz2*
sudo cp -r riscv-gnu-toolchain /opt/
cp -r riscv-gnu-toolchain $DESTDIR
rm -rf riscv-gnu-toolchain
}
@ -27,7 +29,7 @@ llvm()
cat llvm-riscv.tar.bz2.parta* > llvm-riscv.tar.bz2
tar -xvf llvm-riscv.tar.bz2
rm -f llvm-riscv.tar.bz2*
sudo cp -r llvm-riscv /opt/
cp -r llvm-riscv $DESTDIR
rm -rf llvm-riscv
}
@ -36,7 +38,7 @@ pocl()
wget $REPOSITORY/pocl/ubuntu/bionic/pocl.tar.bz2
tar -xvf pocl.tar.bz2
rm -f pocl.tar.bz2
sudo cp -r pocl /opt/
cp -r pocl $DESTDIR
rm -rf pocl
}
@ -45,7 +47,7 @@ verilator()
wget $REPOSITORY/verilator/ubuntu/bionic/verilator.tar.bz2
tar -xvf verilator.tar.bz2
rm -f verilator.tar.bz2
sudo cp -r verilator /opt/
cp -r verilator $DESTDIR
rm -rf verilator
}

View file

@ -9,7 +9,7 @@
#define CCI_WQ_SIZE 16
#define ENABLE_DRAM_STALLS
#define DRAM_LATENCY 4
#define DRAM_LATENCY 100
#define DRAM_RQ_SIZE 16
#define DRAM_STALLS_MODULO 16
@ -261,14 +261,14 @@ void opae_sim::avs_bus() {
if (dram_rd_it != dram_reads_.end()) {
vortex_afu_->avs_readdatavalid = 1;
memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE);
uint32_t tag = dram_rd_it->tag;
uint32_t addr = dram_rd_it->addr;
dram_reads_.erase(dram_rd_it);
/*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, tag);
/*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, addr * CACHE_BLOCK_SIZE);
for (auto& req : dram_reads_) {
if (req.cycles_left != 0)
printf(" !%0x", req.tag);
printf(" !%0x", req.addr * CACHE_BLOCK_SIZE);
else
printf(" %0x", req.tag);
printf(" %0x", req.addr * CACHE_BLOCK_SIZE);
}
printf("}\n");*/
}
@ -300,18 +300,27 @@ void opae_sim::avs_bus() {
}
if (vortex_afu_->avs_read) {
assert(0 == vortex_afu_->mem_bank_select);
dram_rd_req_t dram_req;
dram_req.cycles_left = DRAM_LATENCY;
unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE);
ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data());
dram_req.tag = base_addr;
dram_rd_req_t dram_req;
dram_req.addr = vortex_afu_->avs_address;
ram_.read(vortex_afu_->avs_address * CACHE_BLOCK_SIZE, CACHE_BLOCK_SIZE, dram_req.block.data());
dram_req.cycles_left = DRAM_LATENCY;
for (auto& req : dram_reads_) {
if (req.addr == dram_req.addr) {
dram_req.cycles_left = req.cycles_left;
break;
}
}
dram_reads_.emplace_back(dram_req);
/*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, base_addr);
/*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, dram_req.addr * CACHE_BLOCK_SIZE);
for (auto& req : dram_reads_) {
if (req.cycles_left != 0)
printf(" !%0x", req.tag);
printf(" !%0x", req.addr * CACHE_BLOCK_SIZE);
else
printf(" %0x", req.tag);
printf(" %0x", req.addr * CACHE_BLOCK_SIZE);
}
printf("}\n");*/
}

View file

@ -41,7 +41,7 @@ private:
typedef struct {
int cycles_left;
std::array<uint8_t, CACHE_BLOCK_SIZE> block;
uint32_t tag;
uint32_t addr;
} dram_rd_req_t;
typedef struct {

View file

@ -147,7 +147,7 @@ module VX_fp_noncomp #(
case (frm_r) // use LSB to distinguish MIN and MAX
3: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i];
4: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i];
default: fminmax_res[i] = 32'hdeadbeaf; // don't care value
default: fminmax_res[i] = 'x; // don't care value
endcase
end
end
@ -160,7 +160,7 @@ module VX_fp_noncomp #(
0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
default: fsgnj_res[i] = 32'hdeadbeaf; // don't care value
default: fsgnj_res[i] = 'x; // don't care value
endcase
end
end
@ -192,8 +192,8 @@ module VX_fp_noncomp #(
`FRM_RDN: begin
if (a_type[i].is_nan || b_type[i].is_nan) begin
fcmp_res[i] = 32'h0; // result is 0 when either operand is NaN
// ** FEQS only raise NV flag when either operand is signaling NaN
fcmp_excp[i] = {(a_type[i].is_signaling | b_type[i].is_signaling), 4'h0};
// FEQS only raise NV flag when either operand is signaling NaN
fcmp_excp[i] = {(a_type[i].is_signaling | b_type[i].is_signaling), 4'h0};
end
else begin
fcmp_res[i] = {31'h0, ab_equal[i]};
@ -201,7 +201,7 @@ module VX_fp_noncomp #(
end
end
default: begin
fcmp_res[i] = 32'hdeadbeaf; // don't care value
fcmp_res[i] = 'x; // don't care value
fcmp_excp[i] = 5'h0;
end
endcase
@ -226,7 +226,7 @@ module VX_fp_noncomp #(
end
//`FPU_MISC:
default: begin
case (frm)
case (frm_r)
0,1,2: begin
tmp_result[i] = fsgnj_res[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;

View file

@ -4,7 +4,7 @@
#include <iomanip>
#define ENABLE_DRAM_STALLS
#define DRAM_LATENCY 4
#define DRAM_LATENCY 100
#define DRAM_RQ_SIZE 16
#define DRAM_STALLS_MODULO 16
@ -180,9 +180,19 @@ void Simulator::eval_dram_bus() {
}
} else {
dram_req_t dram_req;
dram_req.cycles_left = DRAM_LATENCY;
dram_req.tag = vortex_->dram_req_tag;
dram_req.tag = vortex_->dram_req_tag;
dram_req.addr = vortex_->dram_req_addr;
ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data());
dram_req.cycles_left = DRAM_LATENCY;
for (auto& req : dram_rsp_vec_) {
if (req.addr == dram_req.addr) {
dram_req.cycles_left = req.cycles_left;
break;
}
}
dram_rsp_vec_.emplace_back(dram_req);
}
}

View file

@ -51,7 +51,8 @@ private:
typedef struct {
int cycles_left;
std::array<uint8_t, GLOBAL_BLOCK_SIZE> block;
unsigned tag;
uint32_t tag;
uint32_t addr;
} dram_req_t;
std::unordered_map<int, std::stringstream> print_bufs_;