master merge fixes

This commit is contained in:
Blaise Tine 2021-04-04 21:12:12 -07:00
commit 87888a9a93
27 changed files with 399 additions and 281 deletions

View file

@ -15,6 +15,9 @@ set -e
CONFIGS=-DEXT_M_DISABLE make -C hw/simulate
CONFIGS=-DEXT_F_DISABLE make -C hw/simulate
# disable shared memory
CONFIGS=-DSM_ENABLE=0 make -C hw/simulate
# Blackbox tests
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"

View file

@ -120,7 +120,7 @@ module VX_cluster #(
.DATA_WIDTH (32),
.ADDR_WIDTH (12),
.BUFFERED_REQ (1),
.BUFFERED_RSP (`NUM_CORES >= 4)
.BUFFERED_RSP (1)
) csr_arb (
.clk (clk),
.reset (reset),
@ -225,7 +225,7 @@ module VX_cluster #(
.DATA_WIDTH (`L2DRAM_LINE_WIDTH),
.TAG_IN_WIDTH (`XDRAM_TAG_WIDTH),
.TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH),
.BUFFERED_REQ (`NUM_CORES >= 4),
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
) dram_arb (
.clk (clk),

View file

@ -1,6 +1,12 @@
`include "VX_define.vh"
`include "VX_print_instr.vh"
`ifdef EXT_F_ENABLE
`define USED_REGS(f,r) used_regs[{f,r}] = 1
`else
`define USED_REGS(f,r) used_regs[r] = 1
`endif
module VX_decode #(
parameter CORE_ID = 0
) (
@ -22,10 +28,12 @@ module VX_decode #(
reg [`EX_BITS-1:0] ex_type;
reg [`OP_BITS-1:0] op_type;
reg [`MOD_BITS-1:0] op_mod;
reg [31:0] imm;
reg use_rd, use_rs1, use_rs2, use_rs3, use_PC, use_imm;
reg [4:0] rd_r, rs1_r, rs2_r, rs3_r;
reg [31:0] imm;
reg use_rd, use_PC, use_imm;
reg rd_fp, rs1_fp, rs2_fp, rs3_fp;
reg is_join, is_wstall;
reg [`NUM_REGS-1:0] used_regs;
wire [31:0] instr = ifetch_rsp_if.instr;
wire [6:0] opcode = instr[6:0];
@ -46,14 +54,11 @@ module VX_decode #(
always @(*) begin
ex_type = `EX_NOP;
ex_type = 0;
op_type = 'x;
op_mod = 'x;
imm = 'x;
use_rd = 0;
use_rs1 = 0;
use_rs2 = 0;
use_rs3 = 0;
use_PC = 0;
use_imm = 0;
rd_fp = 0;
@ -61,7 +66,12 @@ module VX_decode #(
rs2_fp = 0;
rs3_fp = 1;
is_join = 0;
is_wstall = 0;
is_wstall = 0;
used_regs = 0;
rd_r = rd;
rs1_r = rs1;
rs2_r = rs2;
rs3_r = rs3;
case (opcode)
`INST_I: begin
@ -80,8 +90,9 @@ module VX_decode #(
op_mod = 0;
imm = {{20{alu_imm[11]}}, alu_imm};
use_rd = 1;
use_rs1 = 1;
use_imm = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b0, rs1);
end
`INST_R: begin
ex_type = `EX_ALU;
@ -115,18 +126,21 @@ module VX_decode #(
endcase
op_mod = 0;
end
use_rd = 1;
use_rs1 = 1;
use_rs2 = 1;
use_rd = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b0, rs1);
`USED_REGS (1'b0, rs2);
end
`INST_LUI: begin
ex_type = `EX_ALU;
op_type = `OP_BITS'(`ALU_LUI);
op_mod = 0;
op_mod = 0;
rs1_r = 0;
imm = {upper_imm, 12'(0)};
use_rd = 1;
use_rs1 = 1;
use_imm = 1;
use_imm = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b0, 5'b0);
end
`INST_AUIPC: begin
ex_type = `EX_ALU;
@ -136,6 +150,7 @@ module VX_decode #(
use_rd = 1;
use_PC = 1;
use_imm = 1;
`USED_REGS (1'b0, rd);
end
`INST_JAL: begin
ex_type = `EX_ALU;
@ -146,6 +161,7 @@ module VX_decode #(
use_PC = 1;
use_imm = 1;
is_wstall = 1;
`USED_REGS (1'b0, rd);
end
`INST_JALR: begin
ex_type = `EX_ALU;
@ -153,9 +169,10 @@ module VX_decode #(
op_mod = 1;
imm = {{20{jalr_imm[11]}}, jalr_imm};
use_rd = 1;
use_rs1 = 1;
use_imm = 1;
is_wstall = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b0, rs1);
end
`INST_B: begin
ex_type = `EX_ALU;
@ -170,11 +187,11 @@ module VX_decode #(
endcase
op_mod = 1;
imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
use_rs1 = 1;
use_rs2 = 1;
use_PC = 1;
use_imm = 1;
is_wstall = 1;
`USED_REGS (1'b0, rs1);
`USED_REGS (1'b0, rs2);
end
`INST_SYS : begin
if (func3 == 0) begin
@ -192,6 +209,7 @@ module VX_decode #(
use_rd = 1;
use_PC = 1;
use_imm = 1;
`USED_REGS (1'b0, rd);
end else begin
ex_type = `EX_CSR;
case (func3[1:0])
@ -203,8 +221,10 @@ module VX_decode #(
endcase
imm = 32'(u_12);
use_rd = 1;
use_rs1 = !func3[2];
use_imm = func3[2];
`USED_REGS (1'b0, rd);
if (!func3[2])
`USED_REGS (1'b0, rs1);
end
end
`ifdef EXT_F_ENABLE
@ -214,10 +234,11 @@ module VX_decode #(
ex_type = `EX_LSU;
op_type = `OP_BITS'({1'b0, func3});
imm = {{20{u_12[11]}}, u_12};
use_rd = 1;
use_rs1 = 1;
`ifdef EXT_F_ENABLE
rd_fp = (opcode == `INST_FL);
use_rd = 1;
`USED_REGS (1'b0, rs1);
`USED_REGS ((opcode == `INST_FL), rd);
`ifdef EXT_F_ENABLE
rd_fp = (opcode == `INST_FL);
`endif
end
`ifdef EXT_F_ENABLE
@ -227,8 +248,8 @@ module VX_decode #(
ex_type = `EX_LSU;
op_type = `OP_BITS'({1'b1, func3});
imm = {{20{func7[6]}}, func7, rd};
use_rs1 = 1;
use_rs2 = 1;
`USED_REGS (1'b0, rs1);
`USED_REGS ((opcode == `INST_FS), rs2);
`ifdef EXT_F_ENABLE
rs2_fp = (opcode == `INST_FS);
`endif
@ -242,17 +263,18 @@ module VX_decode #(
op_type = `OP_BITS'(opcode[3:0]);
op_mod = func3;
use_rd = 1;
use_rs1 = 1;
use_rs2 = 1;
use_rs3 = 1;
rd_fp = 1;
rs1_fp = 1;
rs2_fp = 1;
rs2_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
`USED_REGS (1'b1, rs3);
end
`INST_FCI: begin
ex_type = `EX_FPU;
op_mod = func3;
use_rd = 1;
use_rd = 1;
case (func7)
7'h00, // FADD
7'h04, // FSUB
@ -260,55 +282,61 @@ module VX_decode #(
7'h0C: // FDIV
begin
op_type = `OP_BITS'(func7[3:0]);
use_rd = 1;
use_rs1 = 1;
use_rs2 = 1;
rd_fp = 1;
rs1_fp = 1;
rs2_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
end
7'h2C: begin
op_type = `OP_BITS'(`FPU_SQRT);
use_rs1 = 1;
rd_fp = 1;
rs1_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
end
7'h50: begin
op_type = `OP_BITS'(`FPU_CMP);
use_rs1 = 1;
use_rs2 = 1;
rs1_fp = 1;
rs2_fp = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
end
7'h60: begin
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTWUS) : `OP_BITS'(`FPU_CVTWS);
use_rs1 = 1;
rs1_fp = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b1, rs1);
end
7'h68: begin
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTSWU) : `OP_BITS'(`FPU_CVTSW);
use_rs1 = 1;
rd_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b0, rs1);
end
7'h10: begin
// FSGNJ=0, FSGNJN=1, FSGNJX=2
op_type = `OP_BITS'(`FPU_MISC);
op_mod = {1'b0, func3[1:0]};
use_rs1 = 1;
use_rs2 = 1;
rd_fp = 1;
rs1_fp = 1;
rs2_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
end
7'h14: begin
// FMIN=3, FMAX=4
op_type = `OP_BITS'(`FPU_MISC);
op_mod = func3[0] ? 4 : 3;
use_rs1 = 1;
use_rs2 = 1;
rd_fp = 1;
rs1_fp = 1;
rs2_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
end
7'h70: begin
if (func3[0]) begin
@ -318,15 +346,17 @@ module VX_decode #(
// FMV.X.W=5
op_type = `OP_BITS'(`FPU_MISC);
op_mod = 5;
end
use_rs1 = 1;
rs1_fp = 1;
end
rs1_fp = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b1, rs1);
end
7'h78: begin
// FMV.W.X=6
op_type = `OP_BITS'(`FPU_MISC);
op_mod = 6;
rd_fp = 1;
`USED_REGS (1'b1, rd);
end
default:;
endcase
@ -337,38 +367,38 @@ module VX_decode #(
case (func3)
3'h0: begin
op_type = `OP_BITS'(`GPU_TMC);
use_rs1 = 1;
is_wstall = 1;
`USED_REGS (1'b0, rs1);
end
3'h1: begin
op_type = `OP_BITS'(`GPU_WSPAWN);
use_rs1 = 1;
use_rs2 = 1;
`USED_REGS (1'b0, rs1);
`USED_REGS (1'b0, rs2);
end
3'h2: begin
op_type = `OP_BITS'(`GPU_SPLIT);
use_rs1 = 1;
is_wstall = 1;
`USED_REGS (1'b0, rs1);
end
3'h3: begin
op_type = `OP_BITS'(`GPU_JOIN);
is_join = 1;
end
3'h4: begin
op_type = `OP_BITS'(`GPU_BAR);
use_rs1 = 1;
use_rs2 = 1;
op_type = `OP_BITS'(`GPU_BAR);
is_wstall = 1;
`USED_REGS (1'b0, rs1);
`USED_REGS (1'b0, rs2);
end
`ifdef EXT_TEX_ENABLE
3'h5: begin
op_type = `OP_BITS'(`GPU_TEX);
op_mod = `MOD_BITS'(func2);
use_rd = 1;
use_rs1 = 1;
use_rs2 = 1;
use_rs3 = 1;
rs3_fp = 0;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b0, rs1);
`USED_REGS (1'b0, rs2);
`USED_REGS (1'b0, rs3);
end
`endif
default:;
@ -379,10 +409,7 @@ module VX_decode #(
end
// disable write to integer register r0
wire wb = use_rd && (rd_fp || (rd != 0));
// EX_ALU needs rs1=0 for LUI operation
wire [4:0] rs1_qual = (opcode == `INST_LUI) ? 5'h0 : rs1;
wire wb = use_rd && (rd_fp || (rd_r != 0));
assign decode_if.valid = ifetch_rsp_if.valid;
assign decode_if.wid = ifetch_rsp_if.wid;
@ -393,29 +420,25 @@ module VX_decode #(
assign decode_if.op_mod = op_mod;
assign decode_if.wb = wb;
`ifdef EXT_F_ENABLE
assign decode_if.rd = {rd_fp, rd};
assign decode_if.rs1 = {rs1_fp, rs1_qual};
assign decode_if.rs2 = {rs2_fp, rs2};
assign decode_if.rs3 = {rs3_fp, rs3};
`else
`UNUSED_VAR (rd_fp)
`UNUSED_VAR (rs1_fp)
`UNUSED_VAR (rs2_fp)
assign decode_if.rd = rd;
assign decode_if.rs1 = rs1_qual;
assign decode_if.rs2 = rs2;
assign decode_if.rs3 = rs3;
`endif
`ifdef EXT_F_ENABLE
assign decode_if.rd = {rd_fp, rd_r};
assign decode_if.rs1 = {rs1_fp, rs1_r};
assign decode_if.rs2 = {rs2_fp, rs2_r};
assign decode_if.rs3 = {rs3_fp, rs3_r};
`else
`UNUSED_VAR (rd_fp)
`UNUSED_VAR (rs1_fp)
`UNUSED_VAR (rs2_fp)
assign decode_if.rd = rd_r;
assign decode_if.rs1 = rs1_r;
assign decode_if.rs2 = rs2_r;
assign decode_if.rs3 = rs3_r;
`endif
assign decode_if.imm = imm;
assign decode_if.use_PC = use_PC;
assign decode_if.use_imm = use_imm;
assign decode_if.used_regs = (`NUM_REGS'(use_rd) << decode_if.rd)
| (`NUM_REGS'(use_rs1) << decode_if.rs1)
| (`NUM_REGS'(use_rs2) << decode_if.rs2)
| (`NUM_REGS'(use_rs3) << decode_if.rs3);
assign decode_if.imm = imm;
assign decode_if.use_PC = use_PC;
assign decode_if.use_imm = use_imm;
assign decode_if.used_regs = used_regs;
///////////////////////////////////////////////////////////////////////////

View file

@ -82,8 +82,7 @@ module VX_ibuffer #(
if (writing && is_slot0) begin
q_data_out[i] <= q_data_in;
end
if (pop) begin
end else if (pop) begin
q_data_out[i] <= q_data_prev[i];
end
end

View file

@ -38,7 +38,8 @@ module VX_instr_demux (
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32))
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
.BUFFERED (1)
) alu_buffer (
.clk (clk),
.reset (reset),
@ -55,7 +56,8 @@ module VX_instr_demux (
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32))
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
.BUFFERED (1)
) lsu_buffer (
.clk (clk),
.reset (reset),
@ -72,7 +74,8 @@ module VX_instr_demux (
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32)
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
.BUFFERED (1)
) csr_buffer (
.clk (clk),
.reset (reset),
@ -90,7 +93,8 @@ module VX_instr_demux (
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32))
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
.BUFFERED (1)
) fpu_buffer (
.clk (clk),
.reset (reset),
@ -111,7 +115,8 @@ module VX_instr_demux (
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)) //update number of bits
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
.BUFFERED (1)
) gpu_buffer (
.clk (clk),
.reset (reset),

View file

@ -97,7 +97,7 @@ module VX_lsu_unit #(
&& (0 == req_sent_mask) // first submission only
&& req_wb; // loads only
wire mbuf_pop = dcache_rsp_fire && ~(|rsp_rem_mask_n);
wire mbuf_pop = dcache_rsp_fire && (0 == rsp_rem_mask_n);
assign mbuf_raddr = dcache_rsp_if.tag[`LSUQ_ADDR_BITS-1:0];
@ -124,8 +124,9 @@ module VX_lsu_unit #(
end
end
assign sent_all_ready = (&(dcache_req_if.ready | req_sent_mask | ~req_tmask))
|| (req_is_dup & dcache_req_if.ready[0]);
assign sent_all_ready = &(dcache_req_if.ready | req_sent_mask);
wire [`NUM_THREADS-1:0] req_sent_dup = {{(`NUM_THREADS-1){dcache_req_fire[0] && req_is_dup}}, 1'b0};
always @(posedge clk) begin
if (reset) begin
@ -134,7 +135,7 @@ module VX_lsu_unit #(
if (sent_all_ready)
req_sent_mask <= 0;
else
req_sent_mask <= req_sent_mask | dcache_req_fire;
req_sent_mask <= req_sent_mask | dcache_req_fire | req_sent_dup;
end
end
@ -146,10 +147,13 @@ module VX_lsu_unit #(
req_tag_hold <= mbuf_waddr;
end
wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.valid;
always @(posedge clk) begin
if (mbuf_push) begin
rsp_rem_mask[mbuf_waddr] <= req_is_dup ? (`NUM_THREADS)'(1) : req_tmask;
rsp_rem_mask[mbuf_waddr] <= req_tmask_dup;
end
if (dcache_rsp_fire) begin
rsp_rem_mask[mbuf_raddr] <= rsp_rem_mask_n;
@ -159,8 +163,6 @@ module VX_lsu_unit #(
wire req_ready_dep = (req_wb && ~mbuf_full)
|| (~req_wb && st_commit_if.ready);
wire [`NUM_THREADS-1:0] dup_mask = {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
// DCache Request
reg [`NUM_THREADS-1:0][29:0] mem_req_addr;
@ -191,7 +193,7 @@ module VX_lsu_unit #(
end
end
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_ready_dep}} & req_tmask & dup_mask & ~req_sent_mask;
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_ready_dep}} & req_tmask_dup & ~req_sent_mask;
assign dcache_req_if.rw = {`NUM_THREADS{~req_wb}};
assign dcache_req_if.addr = mem_req_addr;
assign dcache_req_if.byteen = mem_req_byteen;
@ -257,8 +259,8 @@ module VX_lsu_unit #(
.clk (clk),
.reset (reset),
.enable (!load_rsp_stall),
.data_in ({(| dcache_rsp_if.valid), rsp_wid, rsp_tmask_qual, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
.data_in ({(| dcache_rsp_if.valid), rsp_wid, rsp_tmask_qual, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
);
// Can accept new cache response?

View file

@ -81,6 +81,27 @@
`define LTRIM(x, s) x[s-1:0]
`define PRINT_ARRAY1D(a, m) \
$write("{"); \
for (integer i = (m-1); i >= 0; --i) begin \
if (i != (m-1)) $write(", "); \
$write("0x%0h", a[i]); \
end \
$write("}"); \
`define PRINT_ARRAY2D(a, m, n) \
$write("{"); \
for (integer i = n-1; i >= 0; --i) begin \
if (i != (n-1)) $write(", "); \
$write("{"); \
for (integer j = (m-1); j >= 0; --j) begin \
if (j != (m-1)) $write(", "); \
$write("0x%0h", a[i][j]); \
end \
$write("}"); \
end \
$write("}")
`define PRINT_ARRAY1D(a, m) \
$write("{"); \
for (integer i = (m-1); i >= 0; --i) begin \

View file

@ -21,7 +21,7 @@ module VX_smem_arb (
localparam SMEM_ASHIFT = `CLOG2(`SHARED_MEM_BASE_ADDR_ALIGN);
localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE);
localparam REQ_ADDRW = 32 - REQ_ASHIFT;
localparam REQ_DATAW = REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
localparam REQ_DATAW = 1 + REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
//
@ -30,41 +30,42 @@ module VX_smem_arb (
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire cache_req_ready_in;
wire smem_req_ready_in;
wire cache_req_valid_out, cache_req_ready_out;
wire is_smem_addr_in, is_smem_addr_out;
// select shared memory bus
wire is_smem_addr = core_req_if.valid[i] && `SM_ENABLE
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] >= (32-SMEM_ASHIFT)'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> SMEM_ASHIFT))
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT));
assign is_smem_addr_in = core_req_if.valid[i] && `SM_ENABLE
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] >= (32-SMEM_ASHIFT)'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> SMEM_ASHIFT))
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT));
VX_skid_buffer #(
.DATAW (REQ_DATAW)
) cache_out_buffer (
) out_buffer (
.clk (clk),
.reset (reset),
.valid_in (core_req_if.valid[i] && !is_smem_addr),
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
.ready_in (cache_req_ready_in),
.valid_out (cache_req_if.valid[i]),
.data_out ({cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
.ready_out (cache_req_if.ready[i])
.valid_in (core_req_if.valid[i]),
.data_in ({is_smem_addr_in, core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
.ready_in (core_req_if.ready[i]),
.valid_out (cache_req_valid_out),
.data_out ({is_smem_addr_out, cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
.ready_out (cache_req_ready_out)
);
VX_skid_buffer #(
.DATAW (REQ_DATAW)
) smem_out_buffer (
.clk (clk),
.reset (reset),
.valid_in (core_req_if.valid[i] && is_smem_addr),
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
.ready_in (smem_req_ready_in),
.valid_out (smem_req_if.valid[i]),
.data_out ({smem_req_if.addr[i], smem_req_if.rw[i], smem_req_if.byteen[i], smem_req_if.data[i], smem_req_if.tag[i]}),
.ready_out (smem_req_if.ready[i])
);
assign core_req_if.ready[i] = is_smem_addr ? smem_req_ready_in : cache_req_ready_in;
if (`SM_ENABLE ) begin
assign cache_req_if.valid[i] = cache_req_valid_out && ~is_smem_addr_out;
assign smem_req_if.valid[i] = cache_req_valid_out && is_smem_addr_out;
assign cache_req_ready_out = is_smem_addr_out ? smem_req_if.ready[i] : cache_req_if.ready[i];
assign smem_req_if.addr[i] = cache_req_if.addr[i];
assign smem_req_if.rw[i] = cache_req_if.rw[i];
assign smem_req_if.byteen[i] = cache_req_if.byteen[i];
assign smem_req_if.data[i] = cache_req_if.data[i];
assign smem_req_if.tag[i] = cache_req_if.tag[i];
end else begin
`UNUSED_VAR (is_smem_addr_out)
assign cache_req_if.valid[i] = cache_req_valid_out;
assign cache_req_ready_out = cache_req_if.ready[i];
end
end
//

View file

@ -121,7 +121,7 @@ module Vortex (
.NUM_REQS (`NUM_CLUSTERS),
.DATA_WIDTH (32),
.ADDR_WIDTH (12),
.BUFFERED_REQ (`NUM_CLUSTERS >= 4),
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
) csr_arb (
.clk (clk),
@ -228,7 +228,7 @@ module Vortex (
.TAG_IN_WIDTH (`L2DRAM_TAG_WIDTH),
.TAG_OUT_WIDTH (`L3DRAM_TAG_WIDTH),
.BUFFERED_REQ (1),
.BUFFERED_RSP (`NUM_CLUSTERS >= 4)
.BUFFERED_RSP (1)
) dram_arb (
.clk (clk),
.reset (reset),

View file

@ -1,14 +1,13 @@
`include "VX_define.vh"
`ifndef NOPAE
import local_mem_cfg_pkg::*;
`include "afu_json_info.vh"
`else
`include "vortex_afu.vh"
`endif
/* verilator lint_off IMPORTSTAR */
import ccip_if_pkg::*;
import local_mem_cfg_pkg::*;
/* verilator lint_on IMPORTSTAR */
`endif
module vortex_afu #(
parameter NUM_LOCAL_MEM_BANKS = 2

View file

@ -168,8 +168,7 @@ module VX_cache #(
.NUM_BANKS (NUM_BANKS)
) flush_ctrl (
.clk (clk),
.reset (reset),
.flush (flush),
.reset (reset || flush),
.addr_out (flush_addr),
.valid_out (flush_enable)
);

View file

@ -98,7 +98,8 @@ module VX_cache_core_rsp_merge #(
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
VX_skid_buffer #(
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH))
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)),
.BUFFERED (1)
) pipe_reg (
.clk (clk),
.reset (reset),
@ -146,7 +147,8 @@ module VX_cache_core_rsp_merge #(
for (genvar i = 0; i < NUM_REQS; i++) begin
VX_skid_buffer #(
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH)
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH),
.BUFFERED (1)
) pipe_reg (
.clk (clk),
.reset (reset),

View file

@ -9,8 +9,7 @@ module VX_flush_ctrl #(
parameter NUM_BANKS = 1
) (
input wire clk,
input wire reset,
input wire flush,
input wire reset,
output wire [`LINE_SELECT_BITS-1:0] addr_out,
output wire valid_out
);
@ -18,7 +17,7 @@ module VX_flush_ctrl #(
reg [`LINE_SELECT_BITS-1:0] flush_ctr;
always @(posedge clk) begin
if (reset || flush) begin
if (reset) begin
flush_enable <= 1;
flush_ctr <= 0;
end else begin

View file

@ -3,10 +3,6 @@
/// Modified port of cast module from fpnew Libray
/// reference: https://github.com/pulp-platform/fpnew
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_cvt #(
parameter TAGW = 1,
parameter LANES = 1
@ -73,19 +69,19 @@ module VX_fp_cvt #(
);
end
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent;
wire [LANES-1:0] input_sign;
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit
wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent;
wire [LANES-1:0] input_sign;
for (genvar i = 0; i < LANES; ++i) begin
wire [INT_MAN_WIDTH-1:0] int_mantissa;
wire [INT_MAN_WIDTH-1:0] fmt_mantissa;
wire fmt_sign = dataa[i][31];
wire int_sign = dataa[i][31] & is_signed;
assign int_mantissa = int_sign ? $unsigned(-dataa[i]) : dataa[i];
assign int_mantissa = int_sign ? (-dataa[i]) : dataa[i];
assign fmt_mantissa = INT_MAN_WIDTH'({in_a_type[i].is_normal, dataa[i][MAN_BITS-1:0]});
assign fmt_exponent[i] = $signed({1'b0, dataa[i][MAN_BITS+EXP_BITS-1:MAN_BITS]});
assign fmt_exponent[i] = {1'b0, dataa[i][MAN_BITS+EXP_BITS-1:MAN_BITS]};
assign encoded_mant[i] = is_itof ? int_mantissa : fmt_mantissa;
assign input_sign[i] = is_itof ? int_sign : fmt_sign;
end
@ -115,7 +111,7 @@ module VX_fp_cvt #(
wire [2:0] rnd_mode_s0;
fp_type_t [LANES-1:0] in_a_type_s0;
wire [LANES-1:0] input_sign_s0;
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0;
wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0;
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant_s0;
wire [LANES-1:0][LZC_RESULT_WIDTH-1:0] renorm_shamt_s0;
wire [LANES-1:0] mant_is_zero_s0;
@ -135,38 +131,93 @@ module VX_fp_cvt #(
// Normalization
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa
wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent
wire [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
for (genvar i = 0; i < LANES; ++i) begin
`IGNORE_WARNINGS_BEGIN
// Input mantissa needs to be normalized
wire signed [INT_EXP_WIDTH-1:0] fp_input_exp;
wire signed [INT_EXP_WIDTH-1:0] int_input_exp;
wire [LZC_RESULT_WIDTH:0] renorm_shamt_sgn;
// signed form for calculations
assign renorm_shamt_sgn = $signed({1'b0, renorm_shamt_s0[i]});
wire [INT_EXP_WIDTH-1:0] fp_input_exp;
wire [INT_EXP_WIDTH-1:0] int_input_exp;
// Realign input mantissa, append zeroes if destination is wider
assign input_mant[i] = encoded_mant_s0[i] << renorm_shamt_s0[i];
// Unbias exponent and compensate for shift
assign fp_input_exp = $signed(fmt_exponent_s0[i] +
(($signed({1'b0, in_a_type_s0[i].is_subnormal}) +
$signed(FMT_SHIFT_COMPENSATION - EXP_BIAS)) -
renorm_shamt_sgn));
assign fp_input_exp = fmt_exponent_s0[i] +
{1'b0, in_a_type_s0[i].is_subnormal} +
(FMT_SHIFT_COMPENSATION - EXP_BIAS) -
{1'b0, renorm_shamt_s0[i]};
assign int_input_exp = $signed(INT_MAN_WIDTH - 1 - renorm_shamt_sgn);
assign int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]};
assign input_exp[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
assign input_exp[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
// Rebias the exponent
assign destination_exp[i] = input_exp[i] + $signed(EXP_BIAS);
assign destination_exp[i] = input_exp[i] + EXP_BIAS;
`IGNORE_WARNINGS_END
end
// Perform adjustments to mantissa and exponent
wire [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant_s0;
wire [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt_s0;
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s0;
wire [LANES-1:0] of_before_round_s0;
for (genvar i = 0; i < LANES; ++i) begin
reg [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
reg [SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
reg [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
reg of_before_round;
always @(*) begin
`IGNORE_WARNINGS_BEGIN
// Default assignment
final_exp = destination_exp[i]; // take exponent as is, only look at lower bits
preshift_mant = {input_mant[i], 33'b0}; // Place mantissa to the left of the shifter
denorm_shamt = 0; // right of mantissa
of_before_round = 1'b0;
// Handle INT casts
if (is_itof_s0) begin
if ($signed(destination_exp[i]) >= $signed(2**EXP_BITS-1)) begin
// Overflow or infinities (for proper rounding)
final_exp = (2**EXP_BITS-2); // largest normal value
preshift_mant = ~0; // largest normal value and RS bits set
of_before_round = 1'b1;
end else if ($signed(destination_exp[i]) < $signed(-MAN_BITS)) begin
// Limit the shift to retain sticky bits
final_exp = 0; // denormal result
denorm_shamt = denorm_shamt + (2 + MAN_BITS); // to sticky
end else if ($signed(destination_exp[i]) < $signed(1)) begin
// Denormalize underflowing values
final_exp = 0; // denormal result
denorm_shamt = denorm_shamt + 1 - destination_exp[i]; // adjust right shifting
end
end else begin
if ($signed(input_exp[i]) >= $signed((MAX_INT_WIDTH-1) + unsigned_s0)) begin
// overflow: when converting to unsigned the range is larger by one
denorm_shamt = SHAMT_BITS'(0); // prevent shifting
of_before_round = 1'b1;
end else if ($signed(input_exp[i]) < $signed(-1)) begin
// underflow
denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky
end else begin
// By default right shift mantissa to be an integer
denorm_shamt = (MAX_INT_WIDTH-1) - input_exp[i];
end
end
`IGNORE_WARNINGS_END
end
assign preshift_mant_s0[i] = preshift_mant;
assign denorm_shamt_s0[i] = denorm_shamt;
assign final_exp_s0[i] = final_exp;
assign of_before_round_s0[i] = of_before_round;
end
// Pipeline stage1
wire valid_in_s1;
@ -176,121 +227,68 @@ module VX_fp_cvt #(
wire [2:0] rnd_mode_s1;
fp_type_t [LANES-1:0] in_a_type_s1;
wire [LANES-1:0] mant_is_zero_s1;
wire [LANES-1:0] input_sign_s1;
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp_s1;
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp_s1;
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant_s1;
wire [LANES-1:0] input_sign_s1;
wire [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant_s1;
wire [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt_s1;
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s1;
wire [LANES-1:0] of_before_round_s1;
VX_pipe_register #(
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + 2*INT_EXP_WIDTH)),
.DATAW (1 + TAGW + 1 + 1 + `FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + SHAMT_BITS + INT_EXP_WIDTH + 1)),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (~stall),
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, mant_is_zero_s0, input_sign_s0, input_mant, input_exp, destination_exp}),
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, input_mant_s1, input_exp_s1, destination_exp_s1})
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, mant_is_zero_s0, input_sign_s0, preshift_mant_s0, denorm_shamt_s0, final_exp_s0, of_before_round_s0}),
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, preshift_mant_s1, denorm_shamt_s1, final_exp_s1, of_before_round_s1})
);
// Casting
reg [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
reg [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
wire [LANES-1:0][2*INT_MAN_WIDTH:0] destination_mant; // mantissa from shifter, with rnd bit
wire [LANES-1:0][MAN_BITS-1:0] final_mant; // mantissa after adjustments
wire [LANES-1:0][MAX_INT_WIDTH-1:0] final_int; // integer shifted in position
reg [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
wire [LANES-1:0][1:0] fp_round_sticky_bits, int_round_sticky_bits, round_sticky_bits;
reg [LANES-1:0] of_before_round;
// Perform adjustments to mantissa and exponent
wire [LANES-1:0] rounded_sign;
wire [LANES-1:0][31:0] rounded_abs; // absolute value of result after rounding
wire [LANES-1:0][1:0] fp_round_sticky_bits, int_round_sticky_bits;
// Rouding and classification
for (genvar i = 0; i < LANES; ++i) begin
always @(*) begin
`IGNORE_WARNINGS_BEGIN
// Default assignment
final_exp[i] = $unsigned(destination_exp_s1[i]); // take exponent as is, only look at lower bits
preshift_mant[i] = 65'b0; // initialize mantissa container with zeroes
denorm_shamt[i] = 0; // right of mantissa
of_before_round[i] = 1'b0;
// Place mantissa to the left of the shifter
preshift_mant[i] = {input_mant_s1[i], 33'b0};
// Handle INT casts
if (is_itof_s1) begin
// Overflow or infinities (for proper rounding)
if ($signed(destination_exp_s1[i]) >= $signed(2**EXP_BITS-1)) begin
final_exp[i] = (2**EXP_BITS-2); // largest normal value
preshift_mant[i] = ~0; // largest normal value and RS bits set
of_before_round[i] = 1'b1;
// Denormalize underflowing values
end else if (($signed(destination_exp_s1[i]) < $signed(1))
&& ($signed(destination_exp_s1[i]) >= -$signed(MAN_BITS))) begin
final_exp[i] = 0; // denormal result
denorm_shamt[i] = $unsigned(denorm_shamt[i] + 1 - destination_exp_s1[i]); // adjust right shifting
// Limit the shift to retain sticky bits
end else if ($signed(destination_exp_s1[i]) < -$signed(MAN_BITS)) begin
final_exp[i] = 0; // denormal result
denorm_shamt[i] = $unsigned(denorm_shamt[i] + (2 + MAN_BITS)); // to sticky
end
end else begin
// By default right shift mantissa to be an integer
denorm_shamt[i] = (MAX_INT_WIDTH-1) - input_exp_s1[i];
// overflow: when converting to unsigned the range is larger by one
if ($signed(input_exp_s1[i]) >= $signed(MAX_INT_WIDTH -1 + unsigned_s1)) begin
denorm_shamt[i] = SHAMT_BITS'(0); // prevent shifting
of_before_round[i] = 1'b1;
// underflow
end else if ($signed(input_exp_s1[i]) < $signed(-1)) begin
denorm_shamt[i] = MAX_INT_WIDTH + 1; // all bits go to the sticky
end
end
`IGNORE_WARNINGS_END
end
wire [2*INT_MAN_WIDTH:0] destination_mant;
wire [MAN_BITS-1:0] final_mant; // mantissa after adjustments
wire [MAX_INT_WIDTH-1:0] final_int; // integer shifted in position
wire [1:0] round_sticky_bits;
wire [31:0] fmt_pre_round_abs;
wire [31:0] pre_round_abs;
// Mantissa adjustment shift
assign destination_mant[i] = preshift_mant[i] >> denorm_shamt[i];
assign destination_mant = preshift_mant_s1[i] >> denorm_shamt_s1[i];
// Extract final mantissa and round bit, discard the normal bit (for FP)
assign {final_mant[i], fp_round_sticky_bits[i][1]} = destination_mant[i][2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1];
assign {final_int[i], int_round_sticky_bits[i][1]} = destination_mant[i][2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (MAX_INT_WIDTH+1) + 1];
assign {final_mant, fp_round_sticky_bits[i][1]} = destination_mant[2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1];
assign {final_int, int_round_sticky_bits[i][1]} = destination_mant[2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (MAX_INT_WIDTH+1) + 1];
// Collapse sticky bits
assign fp_round_sticky_bits[i][0] = (| destination_mant[i][NUM_FP_STICKY-1:0]);
assign int_round_sticky_bits[i][0] = (| destination_mant[i][NUM_INT_STICKY-1:0]);
assign fp_round_sticky_bits[i][0] = (| destination_mant[NUM_FP_STICKY-1:0]);
assign int_round_sticky_bits[i][0] = (| destination_mant[NUM_INT_STICKY-1:0]);
// select RS bits for destination operation
assign round_sticky_bits[i] = is_itof_s1 ? fp_round_sticky_bits[i] : int_round_sticky_bits[i];
end
assign round_sticky_bits = is_itof_s1 ? fp_round_sticky_bits[i] : int_round_sticky_bits[i];
// Rouding and classification
wire [LANES-1:0] rounded_sign;
wire [LANES-1:0][31:0] rounded_abs; // absolute value of result after rounding
for (genvar i = 0; i < LANES; ++i) begin
// Pack exponent and mantissa into proper rounding form
wire [31:0] fmt_pre_round_abs = {1'b0, final_exp[i][EXP_BITS-1:0], final_mant[i][MAN_BITS-1:0]};
// Sign-extend integer result
wire [31:0] ifmt_pre_round_abs = final_int[i];
assign fmt_pre_round_abs = {1'b0, final_exp_s1[i][EXP_BITS-1:0], final_mant[MAN_BITS-1:0]};
// Select output with destination format and operation
wire [31:0] pre_round_abs = is_itof_s1 ? fmt_pre_round_abs : ifmt_pre_round_abs;
assign pre_round_abs = is_itof_s1 ? fmt_pre_round_abs : final_int;
// Perform the rounding
VX_fp_rounding #(
.DAT_WIDTH (32)
) fp_rounding (
.abs_value_i (pre_round_abs),
.sign_i (input_sign_s1[i]),
.round_sticky_bits_i (round_sticky_bits[i]),
.rnd_mode_i (rnd_mode_s1),
.effective_subtraction_i (1'b0),
.abs_rounded_o (rounded_abs[i]),
.sign_o (rounded_sign[i]),
.abs_value_i (pre_round_abs),
.sign_i (input_sign_s1[i]),
.round_sticky_bits_i(round_sticky_bits),
.rnd_mode_i (rnd_mode_s1),
.effective_subtraction_i(1'b0),
.abs_rounded_o (rounded_abs[i]),
.sign_o (rounded_sign[i]),
`UNUSED_PIN (exact_zero_o)
);
end
@ -306,23 +304,22 @@ module VX_fp_cvt #(
wire [LANES-1:0] input_sign_s2;
wire [LANES-1:0] rounded_sign_s2;
wire [LANES-1:0][31:0] rounded_abs_s2;
wire [LANES-1:0] of_before_round_s2;
VX_pipe_register #(
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1)),
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1 + 1)),
.RESETW (1)
) pipe_reg2 (
.clk (clk),
.reset (reset),
.enable (~stall),
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, rounded_abs, rounded_sign}),
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2})
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, rounded_abs, rounded_sign, of_before_round_s1}),
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2, of_before_round_s2})
);
wire [LANES-1:0] of_after_round;
wire [LANES-1:0] uf_after_round;
wire [LANES-1:0][31:0] fmt_result;
wire [LANES-1:0][31:0] rounded_int_res; // after possible inversion
wire [LANES-1:0] rounded_int_res_zero; // after rounding
@ -335,7 +332,7 @@ module VX_fp_cvt #(
assign of_after_round[i] = (rounded_abs_s2[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == ~0); // inf exp.
// Negative integer result needs to be brought into two's complement
assign rounded_int_res[i] = rounded_sign_s2[i] ? $unsigned(-rounded_abs_s2[i]) : rounded_abs_s2[i];
assign rounded_int_res[i] = rounded_sign_s2[i] ? (-rounded_abs_s2[i]) : rounded_abs_s2[i];
assign rounded_int_res_zero[i] = (rounded_int_res[i] == 0);
end
@ -373,7 +370,7 @@ module VX_fp_cvt #(
int_special_result[i][30:0] = 0; // alone yields 2**(31)-1
int_special_result[i][31] = ~unsigned_s2; // for unsigned casts yields 2**31
end else begin
int_special_result[i][30:0] = 2**(31) -1; // alone yields 2**(31)-1
int_special_result[i][30:0] = 2**(31) - 1; // alone yields 2**(31)-1
int_special_result[i][31] = unsigned_s2; // for unsigned casts yields 2**31
end
end
@ -381,7 +378,7 @@ module VX_fp_cvt #(
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
assign int_result_is_special[i] = in_a_type_s2[i].is_nan
| in_a_type_s2[i].is_inf
| of_before_round[i]
| of_before_round_s2[i]
| (input_sign_s2[i] & unsigned_s2 & ~rounded_int_res_zero[i]);
// All integer special cases are invalid
@ -399,11 +396,11 @@ module VX_fp_cvt #(
wire [31:0] fp_result, int_result;
wire inexact = is_itof_s2 ? (| fp_round_sticky_bits[i]) // overflow is invalid in i2f;
: (| fp_round_sticky_bits[i]) | (~in_a_type_s2[i].is_inf & (of_before_round[i] | of_after_round[i]));
: (| fp_round_sticky_bits[i]) | (~in_a_type_s2[i].is_inf & (of_before_round_s2[i] | of_after_round[i]));
assign fp_regular_status.NV = is_itof_s2 & (of_before_round[i] | of_after_round[i]); // overflow is invalid for I2F casts
assign fp_regular_status.NV = is_itof_s2 & (of_before_round_s2[i] | of_after_round[i]); // overflow is invalid for I2F casts
assign fp_regular_status.DZ = 1'b0; // no divisions
assign fp_regular_status.OF = ~is_itof_s2 & (~in_a_type_s2[i].is_inf & (of_before_round[i] | of_after_round[i])); // inf casts no OF
assign fp_regular_status.OF = ~is_itof_s2 & (~in_a_type_s2[i].is_inf & (of_before_round_s2[i] | of_after_round[i])); // inf casts no OF
assign fp_regular_status.UF = uf_after_round[i] & inexact;
assign fp_regular_status.NX = inexact;

View file

@ -1,5 +1,9 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_div #(
parameter TAGW = 1,
parameter LANES = 1

View file

@ -1,5 +1,9 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_fma #(
parameter TAGW = 1,
parameter LANES = 1

View file

@ -1,5 +1,9 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "float_dpi.vh"
`endif
module VX_fp_sqrt #(
parameter TAGW = 1,
parameter LANES = 1
@ -44,7 +48,7 @@ module VX_fp_sqrt #(
fflags_t f;
always @(*) begin
dpi_fsqrt (dataa[i], frm, r, f);
dpi_fsqrt (dataa[i], frm, r, f);
end
`UNUSED_VAR (f)

View file

@ -10,7 +10,7 @@ module VX_fp_type (
);
wire is_normal = (exp_i != 8'd0) && (exp_i != 8'hff);
wire is_zero = (exp_i == 8'd0) && (man_i == 23'd0);
wire is_subnormal = (exp_i == 8'd0) && !is_zero;
wire is_subnormal = (exp_i == 8'd0) && (man_i != 23'd0);
wire is_inf = (exp_i == 8'hff) && (man_i == 23'd0);
wire is_nan = (exp_i == 8'hff) && (man_i != 23'd0);
wire is_signaling = is_nan && (man_i[22] == 1'b0);

View file

@ -67,8 +67,7 @@ module VX_skid_buffer #(
end else begin
if (ready_out) begin
use_buffer <= 0;
end
if (push && !pop) begin
end else if (push && valid_out_r) begin
assert(!use_buffer);
use_buffer <= 1;
end
@ -81,9 +80,11 @@ module VX_skid_buffer #(
always @(posedge clk) begin
if (push) begin
buffer <= data_in;
end
if (pop) begin
data_out_r <= use_buffer ? buffer : data_in;
end
if (pop && !use_buffer) begin
data_out_r <= data_in;
end else if (pop) begin
data_out_r <= buffer;
end
end

View file

@ -138,5 +138,4 @@ clean-fpga-32c:
clean-fpga-64c:
rm -rf $(FPGA_BUILD_DIR)_64c sources.txt
clean: clean-ase-1c clean-ase-2c clean-ase-4c clean-fpga-1c clean-fpga-2c clean-fpga-4c clean-fpga-8c clean-fpga-16c clean-fpga-32c clean-fpga-64c
rm sources.txt
clean: clean-ase-1c clean-ase-2c clean-ase-4c clean-fpga-1c clean-fpga-2c clean-fpga-4c clean-fpga-8c clean-fpga-16c clean-fpga-32c clean-fpga-64c

View file

@ -6,7 +6,7 @@
+define+QUARTUS
#+define+PERF_ENABLE
vortex_afu.json
vortex_afu16.json
QI:vortex_afu.qsf
C:sources.txt

View file

@ -2,6 +2,8 @@
+define+NUM_CLUSTERS=4
#+define+L3_ENABLE=1
+define+GLOBAL_BLOCK_SIZE=16
+define+SYNTHESIS
+define+QUARTUS
#+define+PERF_ENABLE

View file

@ -2,6 +2,8 @@
+define+NUM_CLUSTERS=8
#+define+L3_ENABLE=1
+define+GLOBAL_BLOCK_SIZE=16
+define+SYNTHESIS
+define+QUARTUS
#+define+PERF_ENABLE

View file

@ -6,7 +6,7 @@
+define+QUARTUS
#+define+PERF_ENABLE
vortex_afu.json
vortex_afu16.json
QI:vortex_afu.qsf
C:sources.txt

View file

@ -0,0 +1,56 @@
{
"version": 1,
"afu-image": {
"power": 0,
"clock-frequency-high": "auto-210",
"clock-frequency-low": "auto-210",
"cmd-mem-read": 1,
"cmd-mem-write": 2,
"cmd-run": 3,
"cmd-csr-read": 4,
"cmd-csr-write": 5,
"mmio-cmd-type": 10,
"mmio-io-addr": 12,
"mmio-mem-addr": 14,
"mmio-data-size": 16,
"mmio-status": 18,
"mmio-scope-read": 20,
"mmio-scope-write": 22,
"mmio-csr-core": 24,
"mmio-csr-addr": 26,
"mmio-csr-data": 28,
"mmio-csr-read": 30,
"afu-top-interface":
{
"class": "ccip_std_afu_avalon_mm",
"module-ports" :
[
{
"class": "cci-p",
"params":
{
"clock": "uClk_usr"
}
},
{
"class": "local-memory",
"params":
{
"clock": "uClk_usr"
}
}
]
},
"accelerator-clusters":
[
{
"name": "vortex_afu",
"total-contexts": 1,
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
}
]
}
}

View file

@ -41,11 +41,7 @@ set_global_assignment -name VERILOG_MACRO NDEBUG
set_global_assignment -name MESSAGE_DISABLE 16818
set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
#set_global_assignment -name ALLOW_ANY_RAM_SIZE_FOR_RECOGNITION ON
#set_global_assignment -name USE_HIGH_SPEED_ADDER ON
#set_global_assignment -name MUX_RESTRUCTURE ON
set_global_assignment -name OPTIMIZATION_TECHNIQUE AREA
#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
#set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
#set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0

Binary file not shown.