mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
fixed fp_noncomp bug, ci toolchain script update, increased DRAM latency to 100 cycles
This commit is contained in:
parent
e281d32138
commit
2d4fef6dd6
6 changed files with 50 additions and 28 deletions
|
@ -5,6 +5,8 @@ set -e
|
|||
|
||||
REPOSITORY=https://github.com/vortexgpgpu/vortex-toolchain-prebuilt/raw/master
|
||||
|
||||
DESTDIR="${DESTDIR:=/opt}"
|
||||
|
||||
riscv()
|
||||
{
|
||||
for x in {a..o}
|
||||
|
@ -14,7 +16,7 @@ riscv()
|
|||
cat riscv-gnu-toolchain.tar.bz2.parta* > riscv-gnu-toolchain.tar.bz2
|
||||
tar -xvf riscv-gnu-toolchain.tar.bz2
|
||||
rm -f riscv-gnu-toolchain.tar.bz2*
|
||||
sudo cp -r riscv-gnu-toolchain /opt/
|
||||
cp -r riscv-gnu-toolchain $DESTDIR
|
||||
rm -rf riscv-gnu-toolchain
|
||||
}
|
||||
|
||||
|
@ -27,7 +29,7 @@ llvm()
|
|||
cat llvm-riscv.tar.bz2.parta* > llvm-riscv.tar.bz2
|
||||
tar -xvf llvm-riscv.tar.bz2
|
||||
rm -f llvm-riscv.tar.bz2*
|
||||
sudo cp -r llvm-riscv /opt/
|
||||
cp -r llvm-riscv $DESTDIR
|
||||
rm -rf llvm-riscv
|
||||
}
|
||||
|
||||
|
@ -36,7 +38,7 @@ pocl()
|
|||
wget $REPOSITORY/pocl/ubuntu/bionic/pocl.tar.bz2
|
||||
tar -xvf pocl.tar.bz2
|
||||
rm -f pocl.tar.bz2
|
||||
sudo cp -r pocl /opt/
|
||||
cp -r pocl $DESTDIR
|
||||
rm -rf pocl
|
||||
}
|
||||
|
||||
|
@ -45,7 +47,7 @@ verilator()
|
|||
wget $REPOSITORY/verilator/ubuntu/bionic/verilator.tar.bz2
|
||||
tar -xvf verilator.tar.bz2
|
||||
rm -f verilator.tar.bz2
|
||||
sudo cp -r verilator /opt/
|
||||
cp -r verilator $DESTDIR
|
||||
rm -rf verilator
|
||||
}
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
#define CCI_WQ_SIZE 16
|
||||
|
||||
#define ENABLE_DRAM_STALLS
|
||||
#define DRAM_LATENCY 4
|
||||
#define DRAM_LATENCY 100
|
||||
#define DRAM_RQ_SIZE 16
|
||||
#define DRAM_STALLS_MODULO 16
|
||||
|
||||
|
@ -261,14 +261,14 @@ void opae_sim::avs_bus() {
|
|||
if (dram_rd_it != dram_reads_.end()) {
|
||||
vortex_afu_->avs_readdatavalid = 1;
|
||||
memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE);
|
||||
uint32_t tag = dram_rd_it->tag;
|
||||
uint32_t addr = dram_rd_it->addr;
|
||||
dram_reads_.erase(dram_rd_it);
|
||||
/*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, tag);
|
||||
/*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, addr * CACHE_BLOCK_SIZE);
|
||||
for (auto& req : dram_reads_) {
|
||||
if (req.cycles_left != 0)
|
||||
printf(" !%0x", req.tag);
|
||||
printf(" !%0x", req.addr * CACHE_BLOCK_SIZE);
|
||||
else
|
||||
printf(" %0x", req.tag);
|
||||
printf(" %0x", req.addr * CACHE_BLOCK_SIZE);
|
||||
}
|
||||
printf("}\n");*/
|
||||
}
|
||||
|
@ -300,18 +300,27 @@ void opae_sim::avs_bus() {
|
|||
}
|
||||
if (vortex_afu_->avs_read) {
|
||||
assert(0 == vortex_afu_->mem_bank_select);
|
||||
dram_rd_req_t dram_req;
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE);
|
||||
ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data());
|
||||
dram_req.tag = base_addr;
|
||||
dram_rd_req_t dram_req;
|
||||
|
||||
dram_req.addr = vortex_afu_->avs_address;
|
||||
|
||||
ram_.read(vortex_afu_->avs_address * CACHE_BLOCK_SIZE, CACHE_BLOCK_SIZE, dram_req.block.data());
|
||||
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
for (auto& req : dram_reads_) {
|
||||
if (req.addr == dram_req.addr) {
|
||||
dram_req.cycles_left = req.cycles_left;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
dram_reads_.emplace_back(dram_req);
|
||||
/*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, base_addr);
|
||||
/*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, dram_req.addr * CACHE_BLOCK_SIZE);
|
||||
for (auto& req : dram_reads_) {
|
||||
if (req.cycles_left != 0)
|
||||
printf(" !%0x", req.tag);
|
||||
printf(" !%0x", req.addr * CACHE_BLOCK_SIZE);
|
||||
else
|
||||
printf(" %0x", req.tag);
|
||||
printf(" %0x", req.addr * CACHE_BLOCK_SIZE);
|
||||
}
|
||||
printf("}\n");*/
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@ private:
|
|||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, CACHE_BLOCK_SIZE> block;
|
||||
uint32_t tag;
|
||||
uint32_t addr;
|
||||
} dram_rd_req_t;
|
||||
|
||||
typedef struct {
|
||||
|
|
|
@ -147,7 +147,7 @@ module VX_fp_noncomp #(
|
|||
case (frm_r) // use LSB to distinguish MIN and MAX
|
||||
3: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i];
|
||||
4: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i];
|
||||
default: fminmax_res[i] = 32'hdeadbeaf; // don't care value
|
||||
default: fminmax_res[i] = 'x; // don't care value
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
@ -160,7 +160,7 @@ module VX_fp_noncomp #(
|
|||
0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||
1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||
2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||
default: fsgnj_res[i] = 32'hdeadbeaf; // don't care value
|
||||
default: fsgnj_res[i] = 'x; // don't care value
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
@ -192,8 +192,8 @@ module VX_fp_noncomp #(
|
|||
`FRM_RDN: begin
|
||||
if (a_type[i].is_nan || b_type[i].is_nan) begin
|
||||
fcmp_res[i] = 32'h0; // result is 0 when either operand is NaN
|
||||
// ** FEQS only raise NV flag when either operand is signaling NaN
|
||||
fcmp_excp[i] = {(a_type[i].is_signaling | b_type[i].is_signaling), 4'h0};
|
||||
// FEQS only raise NV flag when either operand is signaling NaN
|
||||
fcmp_excp[i] = {(a_type[i].is_signaling | b_type[i].is_signaling), 4'h0};
|
||||
end
|
||||
else begin
|
||||
fcmp_res[i] = {31'h0, ab_equal[i]};
|
||||
|
@ -201,7 +201,7 @@ module VX_fp_noncomp #(
|
|||
end
|
||||
end
|
||||
default: begin
|
||||
fcmp_res[i] = 32'hdeadbeaf; // don't care value
|
||||
fcmp_res[i] = 'x; // don't care value
|
||||
fcmp_excp[i] = 5'h0;
|
||||
end
|
||||
endcase
|
||||
|
@ -226,7 +226,7 @@ module VX_fp_noncomp #(
|
|||
end
|
||||
//`FPU_MISC:
|
||||
default: begin
|
||||
case (frm)
|
||||
case (frm_r)
|
||||
0,1,2: begin
|
||||
tmp_result[i] = fsgnj_res[i];
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#include <iomanip>
|
||||
|
||||
#define ENABLE_DRAM_STALLS
|
||||
#define DRAM_LATENCY 4
|
||||
#define DRAM_LATENCY 100
|
||||
#define DRAM_RQ_SIZE 16
|
||||
#define DRAM_STALLS_MODULO 16
|
||||
|
||||
|
@ -180,9 +180,19 @@ void Simulator::eval_dram_bus() {
|
|||
}
|
||||
} else {
|
||||
dram_req_t dram_req;
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
dram_req.tag = vortex_->dram_req_tag;
|
||||
|
||||
dram_req.tag = vortex_->dram_req_tag;
|
||||
dram_req.addr = vortex_->dram_req_addr;
|
||||
|
||||
ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data());
|
||||
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
for (auto& req : dram_rsp_vec_) {
|
||||
if (req.addr == dram_req.addr) {
|
||||
dram_req.cycles_left = req.cycles_left;
|
||||
break;
|
||||
}
|
||||
}
|
||||
dram_rsp_vec_.emplace_back(dram_req);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,7 +51,8 @@ private:
|
|||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, GLOBAL_BLOCK_SIZE> block;
|
||||
unsigned tag;
|
||||
uint32_t tag;
|
||||
uint32_t addr;
|
||||
} dram_req_t;
|
||||
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue