mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
master merge fixes
This commit is contained in:
commit
87888a9a93
27 changed files with 399 additions and 281 deletions
|
@ -15,6 +15,9 @@ set -e
|
|||
CONFIGS=-DEXT_M_DISABLE make -C hw/simulate
|
||||
CONFIGS=-DEXT_F_DISABLE make -C hw/simulate
|
||||
|
||||
# disable shared memory
|
||||
CONFIGS=-DSM_ENABLE=0 make -C hw/simulate
|
||||
|
||||
# Blackbox tests
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"
|
||||
|
|
|
@ -120,7 +120,7 @@ module VX_cluster #(
|
|||
.DATA_WIDTH (32),
|
||||
.ADDR_WIDTH (12),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (`NUM_CORES >= 4)
|
||||
.BUFFERED_RSP (1)
|
||||
) csr_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -225,7 +225,7 @@ module VX_cluster #(
|
|||
.DATA_WIDTH (`L2DRAM_LINE_WIDTH),
|
||||
.TAG_IN_WIDTH (`XDRAM_TAG_WIDTH),
|
||||
.TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH),
|
||||
.BUFFERED_REQ (`NUM_CORES >= 4),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
) dram_arb (
|
||||
.clk (clk),
|
||||
|
|
|
@ -1,6 +1,12 @@
|
|||
`include "VX_define.vh"
|
||||
`include "VX_print_instr.vh"
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define USED_REGS(f,r) used_regs[{f,r}] = 1
|
||||
`else
|
||||
`define USED_REGS(f,r) used_regs[r] = 1
|
||||
`endif
|
||||
|
||||
module VX_decode #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
|
@ -22,10 +28,12 @@ module VX_decode #(
|
|||
reg [`EX_BITS-1:0] ex_type;
|
||||
reg [`OP_BITS-1:0] op_type;
|
||||
reg [`MOD_BITS-1:0] op_mod;
|
||||
reg [31:0] imm;
|
||||
reg use_rd, use_rs1, use_rs2, use_rs3, use_PC, use_imm;
|
||||
reg [4:0] rd_r, rs1_r, rs2_r, rs3_r;
|
||||
reg [31:0] imm;
|
||||
reg use_rd, use_PC, use_imm;
|
||||
reg rd_fp, rs1_fp, rs2_fp, rs3_fp;
|
||||
reg is_join, is_wstall;
|
||||
reg [`NUM_REGS-1:0] used_regs;
|
||||
|
||||
wire [31:0] instr = ifetch_rsp_if.instr;
|
||||
wire [6:0] opcode = instr[6:0];
|
||||
|
@ -46,14 +54,11 @@ module VX_decode #(
|
|||
|
||||
always @(*) begin
|
||||
|
||||
ex_type = `EX_NOP;
|
||||
ex_type = 0;
|
||||
op_type = 'x;
|
||||
op_mod = 'x;
|
||||
imm = 'x;
|
||||
use_rd = 0;
|
||||
use_rs1 = 0;
|
||||
use_rs2 = 0;
|
||||
use_rs3 = 0;
|
||||
use_PC = 0;
|
||||
use_imm = 0;
|
||||
rd_fp = 0;
|
||||
|
@ -61,7 +66,12 @@ module VX_decode #(
|
|||
rs2_fp = 0;
|
||||
rs3_fp = 1;
|
||||
is_join = 0;
|
||||
is_wstall = 0;
|
||||
is_wstall = 0;
|
||||
used_regs = 0;
|
||||
rd_r = rd;
|
||||
rs1_r = rs1;
|
||||
rs2_r = rs2;
|
||||
rs3_r = rs3;
|
||||
|
||||
case (opcode)
|
||||
`INST_I: begin
|
||||
|
@ -80,8 +90,9 @@ module VX_decode #(
|
|||
op_mod = 0;
|
||||
imm = {{20{alu_imm[11]}}, alu_imm};
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_imm = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
`USED_REGS (1'b0, rs1);
|
||||
end
|
||||
`INST_R: begin
|
||||
ex_type = `EX_ALU;
|
||||
|
@ -115,18 +126,21 @@ module VX_decode #(
|
|||
endcase
|
||||
op_mod = 0;
|
||||
end
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
use_rd = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
`USED_REGS (1'b0, rs1);
|
||||
`USED_REGS (1'b0, rs2);
|
||||
end
|
||||
`INST_LUI: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`ALU_LUI);
|
||||
op_mod = 0;
|
||||
op_mod = 0;
|
||||
rs1_r = 0;
|
||||
imm = {upper_imm, 12'(0)};
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_imm = 1;
|
||||
use_imm = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
`USED_REGS (1'b0, 5'b0);
|
||||
end
|
||||
`INST_AUIPC: begin
|
||||
ex_type = `EX_ALU;
|
||||
|
@ -136,6 +150,7 @@ module VX_decode #(
|
|||
use_rd = 1;
|
||||
use_PC = 1;
|
||||
use_imm = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
end
|
||||
`INST_JAL: begin
|
||||
ex_type = `EX_ALU;
|
||||
|
@ -146,6 +161,7 @@ module VX_decode #(
|
|||
use_PC = 1;
|
||||
use_imm = 1;
|
||||
is_wstall = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
end
|
||||
`INST_JALR: begin
|
||||
ex_type = `EX_ALU;
|
||||
|
@ -153,9 +169,10 @@ module VX_decode #(
|
|||
op_mod = 1;
|
||||
imm = {{20{jalr_imm[11]}}, jalr_imm};
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_imm = 1;
|
||||
is_wstall = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
`USED_REGS (1'b0, rs1);
|
||||
end
|
||||
`INST_B: begin
|
||||
ex_type = `EX_ALU;
|
||||
|
@ -170,11 +187,11 @@ module VX_decode #(
|
|||
endcase
|
||||
op_mod = 1;
|
||||
imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
use_PC = 1;
|
||||
use_imm = 1;
|
||||
is_wstall = 1;
|
||||
`USED_REGS (1'b0, rs1);
|
||||
`USED_REGS (1'b0, rs2);
|
||||
end
|
||||
`INST_SYS : begin
|
||||
if (func3 == 0) begin
|
||||
|
@ -192,6 +209,7 @@ module VX_decode #(
|
|||
use_rd = 1;
|
||||
use_PC = 1;
|
||||
use_imm = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
end else begin
|
||||
ex_type = `EX_CSR;
|
||||
case (func3[1:0])
|
||||
|
@ -203,8 +221,10 @@ module VX_decode #(
|
|||
endcase
|
||||
imm = 32'(u_12);
|
||||
use_rd = 1;
|
||||
use_rs1 = !func3[2];
|
||||
use_imm = func3[2];
|
||||
`USED_REGS (1'b0, rd);
|
||||
if (!func3[2])
|
||||
`USED_REGS (1'b0, rs1);
|
||||
end
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
@ -214,10 +234,11 @@ module VX_decode #(
|
|||
ex_type = `EX_LSU;
|
||||
op_type = `OP_BITS'({1'b0, func3});
|
||||
imm = {{20{u_12[11]}}, u_12};
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
`ifdef EXT_F_ENABLE
|
||||
rd_fp = (opcode == `INST_FL);
|
||||
use_rd = 1;
|
||||
`USED_REGS (1'b0, rs1);
|
||||
`USED_REGS ((opcode == `INST_FL), rd);
|
||||
`ifdef EXT_F_ENABLE
|
||||
rd_fp = (opcode == `INST_FL);
|
||||
`endif
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
@ -227,8 +248,8 @@ module VX_decode #(
|
|||
ex_type = `EX_LSU;
|
||||
op_type = `OP_BITS'({1'b1, func3});
|
||||
imm = {{20{func7[6]}}, func7, rd};
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
`USED_REGS (1'b0, rs1);
|
||||
`USED_REGS ((opcode == `INST_FS), rs2);
|
||||
`ifdef EXT_F_ENABLE
|
||||
rs2_fp = (opcode == `INST_FS);
|
||||
`endif
|
||||
|
@ -242,17 +263,18 @@ module VX_decode #(
|
|||
op_type = `OP_BITS'(opcode[3:0]);
|
||||
op_mod = func3;
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
use_rs3 = 1;
|
||||
rd_fp = 1;
|
||||
rs1_fp = 1;
|
||||
rs2_fp = 1;
|
||||
rs2_fp = 1;
|
||||
`USED_REGS (1'b1, rd);
|
||||
`USED_REGS (1'b1, rs1);
|
||||
`USED_REGS (1'b1, rs2);
|
||||
`USED_REGS (1'b1, rs3);
|
||||
end
|
||||
`INST_FCI: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_mod = func3;
|
||||
use_rd = 1;
|
||||
use_rd = 1;
|
||||
case (func7)
|
||||
7'h00, // FADD
|
||||
7'h04, // FSUB
|
||||
|
@ -260,55 +282,61 @@ module VX_decode #(
|
|||
7'h0C: // FDIV
|
||||
begin
|
||||
op_type = `OP_BITS'(func7[3:0]);
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
rd_fp = 1;
|
||||
rs1_fp = 1;
|
||||
rs2_fp = 1;
|
||||
`USED_REGS (1'b1, rd);
|
||||
`USED_REGS (1'b1, rs1);
|
||||
`USED_REGS (1'b1, rs2);
|
||||
end
|
||||
7'h2C: begin
|
||||
op_type = `OP_BITS'(`FPU_SQRT);
|
||||
use_rs1 = 1;
|
||||
rd_fp = 1;
|
||||
rs1_fp = 1;
|
||||
`USED_REGS (1'b1, rd);
|
||||
`USED_REGS (1'b1, rs1);
|
||||
end
|
||||
7'h50: begin
|
||||
op_type = `OP_BITS'(`FPU_CMP);
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
rs1_fp = 1;
|
||||
rs2_fp = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
`USED_REGS (1'b1, rs1);
|
||||
`USED_REGS (1'b1, rs2);
|
||||
end
|
||||
7'h60: begin
|
||||
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTWUS) : `OP_BITS'(`FPU_CVTWS);
|
||||
use_rs1 = 1;
|
||||
rs1_fp = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
`USED_REGS (1'b1, rs1);
|
||||
end
|
||||
7'h68: begin
|
||||
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTSWU) : `OP_BITS'(`FPU_CVTSW);
|
||||
use_rs1 = 1;
|
||||
rd_fp = 1;
|
||||
`USED_REGS (1'b1, rd);
|
||||
`USED_REGS (1'b0, rs1);
|
||||
end
|
||||
7'h10: begin
|
||||
// FSGNJ=0, FSGNJN=1, FSGNJX=2
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_mod = {1'b0, func3[1:0]};
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
rd_fp = 1;
|
||||
rs1_fp = 1;
|
||||
rs2_fp = 1;
|
||||
`USED_REGS (1'b1, rd);
|
||||
`USED_REGS (1'b1, rs1);
|
||||
`USED_REGS (1'b1, rs2);
|
||||
end
|
||||
7'h14: begin
|
||||
// FMIN=3, FMAX=4
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_mod = func3[0] ? 4 : 3;
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
rd_fp = 1;
|
||||
rs1_fp = 1;
|
||||
rs2_fp = 1;
|
||||
`USED_REGS (1'b1, rd);
|
||||
`USED_REGS (1'b1, rs1);
|
||||
`USED_REGS (1'b1, rs2);
|
||||
end
|
||||
7'h70: begin
|
||||
if (func3[0]) begin
|
||||
|
@ -318,15 +346,17 @@ module VX_decode #(
|
|||
// FMV.X.W=5
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_mod = 5;
|
||||
end
|
||||
use_rs1 = 1;
|
||||
rs1_fp = 1;
|
||||
end
|
||||
rs1_fp = 1;
|
||||
`USED_REGS (1'b0, rd);
|
||||
`USED_REGS (1'b1, rs1);
|
||||
end
|
||||
7'h78: begin
|
||||
// FMV.W.X=6
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_mod = 6;
|
||||
rd_fp = 1;
|
||||
`USED_REGS (1'b1, rd);
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
|
@ -337,38 +367,38 @@ module VX_decode #(
|
|||
case (func3)
|
||||
3'h0: begin
|
||||
op_type = `OP_BITS'(`GPU_TMC);
|
||||
use_rs1 = 1;
|
||||
is_wstall = 1;
|
||||
`USED_REGS (1'b0, rs1);
|
||||
end
|
||||
3'h1: begin
|
||||
op_type = `OP_BITS'(`GPU_WSPAWN);
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
`USED_REGS (1'b0, rs1);
|
||||
`USED_REGS (1'b0, rs2);
|
||||
end
|
||||
3'h2: begin
|
||||
op_type = `OP_BITS'(`GPU_SPLIT);
|
||||
use_rs1 = 1;
|
||||
is_wstall = 1;
|
||||
`USED_REGS (1'b0, rs1);
|
||||
end
|
||||
3'h3: begin
|
||||
op_type = `OP_BITS'(`GPU_JOIN);
|
||||
is_join = 1;
|
||||
end
|
||||
3'h4: begin
|
||||
op_type = `OP_BITS'(`GPU_BAR);
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
op_type = `OP_BITS'(`GPU_BAR);
|
||||
is_wstall = 1;
|
||||
`USED_REGS (1'b0, rs1);
|
||||
`USED_REGS (1'b0, rs2);
|
||||
end
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
3'h5: begin
|
||||
op_type = `OP_BITS'(`GPU_TEX);
|
||||
op_mod = `MOD_BITS'(func2);
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
use_rs3 = 1;
|
||||
rs3_fp = 0;
|
||||
`USED_REGS (1'b0, rd);
|
||||
`USED_REGS (1'b0, rs1);
|
||||
`USED_REGS (1'b0, rs2);
|
||||
`USED_REGS (1'b0, rs3);
|
||||
end
|
||||
`endif
|
||||
default:;
|
||||
|
@ -379,10 +409,7 @@ module VX_decode #(
|
|||
end
|
||||
|
||||
// disable write to integer register r0
|
||||
wire wb = use_rd && (rd_fp || (rd != 0));
|
||||
|
||||
// EX_ALU needs rs1=0 for LUI operation
|
||||
wire [4:0] rs1_qual = (opcode == `INST_LUI) ? 5'h0 : rs1;
|
||||
wire wb = use_rd && (rd_fp || (rd_r != 0));
|
||||
|
||||
assign decode_if.valid = ifetch_rsp_if.valid;
|
||||
assign decode_if.wid = ifetch_rsp_if.wid;
|
||||
|
@ -393,29 +420,25 @@ module VX_decode #(
|
|||
assign decode_if.op_mod = op_mod;
|
||||
assign decode_if.wb = wb;
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign decode_if.rd = {rd_fp, rd};
|
||||
assign decode_if.rs1 = {rs1_fp, rs1_qual};
|
||||
assign decode_if.rs2 = {rs2_fp, rs2};
|
||||
assign decode_if.rs3 = {rs3_fp, rs3};
|
||||
`else
|
||||
`UNUSED_VAR (rd_fp)
|
||||
`UNUSED_VAR (rs1_fp)
|
||||
`UNUSED_VAR (rs2_fp)
|
||||
assign decode_if.rd = rd;
|
||||
assign decode_if.rs1 = rs1_qual;
|
||||
assign decode_if.rs2 = rs2;
|
||||
assign decode_if.rs3 = rs3;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign decode_if.rd = {rd_fp, rd_r};
|
||||
assign decode_if.rs1 = {rs1_fp, rs1_r};
|
||||
assign decode_if.rs2 = {rs2_fp, rs2_r};
|
||||
assign decode_if.rs3 = {rs3_fp, rs3_r};
|
||||
`else
|
||||
`UNUSED_VAR (rd_fp)
|
||||
`UNUSED_VAR (rs1_fp)
|
||||
`UNUSED_VAR (rs2_fp)
|
||||
assign decode_if.rd = rd_r;
|
||||
assign decode_if.rs1 = rs1_r;
|
||||
assign decode_if.rs2 = rs2_r;
|
||||
assign decode_if.rs3 = rs3_r;
|
||||
`endif
|
||||
|
||||
assign decode_if.imm = imm;
|
||||
assign decode_if.use_PC = use_PC;
|
||||
assign decode_if.use_imm = use_imm;
|
||||
|
||||
assign decode_if.used_regs = (`NUM_REGS'(use_rd) << decode_if.rd)
|
||||
| (`NUM_REGS'(use_rs1) << decode_if.rs1)
|
||||
| (`NUM_REGS'(use_rs2) << decode_if.rs2)
|
||||
| (`NUM_REGS'(use_rs3) << decode_if.rs3);
|
||||
assign decode_if.imm = imm;
|
||||
assign decode_if.use_PC = use_PC;
|
||||
assign decode_if.use_imm = use_imm;
|
||||
assign decode_if.used_regs = used_regs;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
|
|
@ -82,8 +82,7 @@ module VX_ibuffer #(
|
|||
|
||||
if (writing && is_slot0) begin
|
||||
q_data_out[i] <= q_data_in;
|
||||
end
|
||||
if (pop) begin
|
||||
end else if (pop) begin
|
||||
q_data_out[i] <= q_data_prev[i];
|
||||
end
|
||||
end
|
||||
|
|
|
@ -38,7 +38,8 @@ module VX_instr_demux (
|
|||
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32))
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.BUFFERED (1)
|
||||
) alu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -55,7 +56,8 @@ module VX_instr_demux (
|
|||
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32))
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
||||
.BUFFERED (1)
|
||||
) lsu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -72,7 +74,8 @@ module VX_instr_demux (
|
|||
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32)
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
|
||||
.BUFFERED (1)
|
||||
) csr_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -90,7 +93,8 @@ module VX_instr_demux (
|
|||
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32))
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||
.BUFFERED (1)
|
||||
) fpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -111,7 +115,8 @@ module VX_instr_demux (
|
|||
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)) //update number of bits
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||
.BUFFERED (1)
|
||||
) gpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -97,7 +97,7 @@ module VX_lsu_unit #(
|
|||
&& (0 == req_sent_mask) // first submission only
|
||||
&& req_wb; // loads only
|
||||
|
||||
wire mbuf_pop = dcache_rsp_fire && ~(|rsp_rem_mask_n);
|
||||
wire mbuf_pop = dcache_rsp_fire && (0 == rsp_rem_mask_n);
|
||||
|
||||
assign mbuf_raddr = dcache_rsp_if.tag[`LSUQ_ADDR_BITS-1:0];
|
||||
|
||||
|
@ -124,8 +124,9 @@ module VX_lsu_unit #(
|
|||
end
|
||||
end
|
||||
|
||||
assign sent_all_ready = (&(dcache_req_if.ready | req_sent_mask | ~req_tmask))
|
||||
|| (req_is_dup & dcache_req_if.ready[0]);
|
||||
assign sent_all_ready = &(dcache_req_if.ready | req_sent_mask);
|
||||
|
||||
wire [`NUM_THREADS-1:0] req_sent_dup = {{(`NUM_THREADS-1){dcache_req_fire[0] && req_is_dup}}, 1'b0};
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -134,7 +135,7 @@ module VX_lsu_unit #(
|
|||
if (sent_all_ready)
|
||||
req_sent_mask <= 0;
|
||||
else
|
||||
req_sent_mask <= req_sent_mask | dcache_req_fire;
|
||||
req_sent_mask <= req_sent_mask | dcache_req_fire | req_sent_dup;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -146,10 +147,13 @@ module VX_lsu_unit #(
|
|||
req_tag_hold <= mbuf_waddr;
|
||||
end
|
||||
|
||||
wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
|
||||
|
||||
assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.valid;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (mbuf_push) begin
|
||||
rsp_rem_mask[mbuf_waddr] <= req_is_dup ? (`NUM_THREADS)'(1) : req_tmask;
|
||||
rsp_rem_mask[mbuf_waddr] <= req_tmask_dup;
|
||||
end
|
||||
if (dcache_rsp_fire) begin
|
||||
rsp_rem_mask[mbuf_raddr] <= rsp_rem_mask_n;
|
||||
|
@ -159,8 +163,6 @@ module VX_lsu_unit #(
|
|||
wire req_ready_dep = (req_wb && ~mbuf_full)
|
||||
|| (~req_wb && st_commit_if.ready);
|
||||
|
||||
wire [`NUM_THREADS-1:0] dup_mask = {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
|
||||
|
||||
// DCache Request
|
||||
|
||||
reg [`NUM_THREADS-1:0][29:0] mem_req_addr;
|
||||
|
@ -191,7 +193,7 @@ module VX_lsu_unit #(
|
|||
end
|
||||
end
|
||||
|
||||
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_ready_dep}} & req_tmask & dup_mask & ~req_sent_mask;
|
||||
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_ready_dep}} & req_tmask_dup & ~req_sent_mask;
|
||||
assign dcache_req_if.rw = {`NUM_THREADS{~req_wb}};
|
||||
assign dcache_req_if.addr = mem_req_addr;
|
||||
assign dcache_req_if.byteen = mem_req_byteen;
|
||||
|
@ -257,8 +259,8 @@ module VX_lsu_unit #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!load_rsp_stall),
|
||||
.data_in ({(| dcache_rsp_if.valid), rsp_wid, rsp_tmask_qual, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
|
||||
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
|
||||
.data_in ({(| dcache_rsp_if.valid), rsp_wid, rsp_tmask_qual, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
|
||||
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
|
||||
);
|
||||
|
||||
// Can accept new cache response?
|
||||
|
|
|
@ -81,6 +81,27 @@
|
|||
|
||||
`define LTRIM(x, s) x[s-1:0]
|
||||
|
||||
`define PRINT_ARRAY1D(a, m) \
|
||||
$write("{"); \
|
||||
for (integer i = (m-1); i >= 0; --i) begin \
|
||||
if (i != (m-1)) $write(", "); \
|
||||
$write("0x%0h", a[i]); \
|
||||
end \
|
||||
$write("}"); \
|
||||
|
||||
`define PRINT_ARRAY2D(a, m, n) \
|
||||
$write("{"); \
|
||||
for (integer i = n-1; i >= 0; --i) begin \
|
||||
if (i != (n-1)) $write(", "); \
|
||||
$write("{"); \
|
||||
for (integer j = (m-1); j >= 0; --j) begin \
|
||||
if (j != (m-1)) $write(", "); \
|
||||
$write("0x%0h", a[i][j]); \
|
||||
end \
|
||||
$write("}"); \
|
||||
end \
|
||||
$write("}")
|
||||
|
||||
`define PRINT_ARRAY1D(a, m) \
|
||||
$write("{"); \
|
||||
for (integer i = (m-1); i >= 0; --i) begin \
|
||||
|
|
|
@ -21,7 +21,7 @@ module VX_smem_arb (
|
|||
localparam SMEM_ASHIFT = `CLOG2(`SHARED_MEM_BASE_ADDR_ALIGN);
|
||||
localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE);
|
||||
localparam REQ_ADDRW = 32 - REQ_ASHIFT;
|
||||
localparam REQ_DATAW = REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||
localparam REQ_DATAW = 1 + REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||
|
||||
//
|
||||
|
@ -30,41 +30,42 @@ module VX_smem_arb (
|
|||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
|
||||
wire cache_req_ready_in;
|
||||
wire smem_req_ready_in;
|
||||
wire cache_req_valid_out, cache_req_ready_out;
|
||||
wire is_smem_addr_in, is_smem_addr_out;
|
||||
|
||||
// select shared memory bus
|
||||
wire is_smem_addr = core_req_if.valid[i] && `SM_ENABLE
|
||||
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] >= (32-SMEM_ASHIFT)'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> SMEM_ASHIFT))
|
||||
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT));
|
||||
assign is_smem_addr_in = core_req_if.valid[i] && `SM_ENABLE
|
||||
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] >= (32-SMEM_ASHIFT)'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> SMEM_ASHIFT))
|
||||
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT));
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (REQ_DATAW)
|
||||
) cache_out_buffer (
|
||||
) out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_req_if.valid[i] && !is_smem_addr),
|
||||
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
|
||||
.ready_in (cache_req_ready_in),
|
||||
.valid_out (cache_req_if.valid[i]),
|
||||
.data_out ({cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
|
||||
.ready_out (cache_req_if.ready[i])
|
||||
.valid_in (core_req_if.valid[i]),
|
||||
.data_in ({is_smem_addr_in, core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
|
||||
.ready_in (core_req_if.ready[i]),
|
||||
.valid_out (cache_req_valid_out),
|
||||
.data_out ({is_smem_addr_out, cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
|
||||
.ready_out (cache_req_ready_out)
|
||||
);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (REQ_DATAW)
|
||||
) smem_out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_req_if.valid[i] && is_smem_addr),
|
||||
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
|
||||
.ready_in (smem_req_ready_in),
|
||||
.valid_out (smem_req_if.valid[i]),
|
||||
.data_out ({smem_req_if.addr[i], smem_req_if.rw[i], smem_req_if.byteen[i], smem_req_if.data[i], smem_req_if.tag[i]}),
|
||||
.ready_out (smem_req_if.ready[i])
|
||||
);
|
||||
|
||||
assign core_req_if.ready[i] = is_smem_addr ? smem_req_ready_in : cache_req_ready_in;
|
||||
if (`SM_ENABLE ) begin
|
||||
assign cache_req_if.valid[i] = cache_req_valid_out && ~is_smem_addr_out;
|
||||
assign smem_req_if.valid[i] = cache_req_valid_out && is_smem_addr_out;
|
||||
assign cache_req_ready_out = is_smem_addr_out ? smem_req_if.ready[i] : cache_req_if.ready[i];
|
||||
|
||||
assign smem_req_if.addr[i] = cache_req_if.addr[i];
|
||||
assign smem_req_if.rw[i] = cache_req_if.rw[i];
|
||||
assign smem_req_if.byteen[i] = cache_req_if.byteen[i];
|
||||
assign smem_req_if.data[i] = cache_req_if.data[i];
|
||||
assign smem_req_if.tag[i] = cache_req_if.tag[i];
|
||||
end else begin
|
||||
`UNUSED_VAR (is_smem_addr_out)
|
||||
assign cache_req_if.valid[i] = cache_req_valid_out;
|
||||
assign cache_req_ready_out = cache_req_if.ready[i];
|
||||
end
|
||||
end
|
||||
|
||||
//
|
||||
|
|
|
@ -121,7 +121,7 @@ module Vortex (
|
|||
.NUM_REQS (`NUM_CLUSTERS),
|
||||
.DATA_WIDTH (32),
|
||||
.ADDR_WIDTH (12),
|
||||
.BUFFERED_REQ (`NUM_CLUSTERS >= 4),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
) csr_arb (
|
||||
.clk (clk),
|
||||
|
@ -228,7 +228,7 @@ module Vortex (
|
|||
.TAG_IN_WIDTH (`L2DRAM_TAG_WIDTH),
|
||||
.TAG_OUT_WIDTH (`L3DRAM_TAG_WIDTH),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (`NUM_CLUSTERS >= 4)
|
||||
.BUFFERED_RSP (1)
|
||||
) dram_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -1,14 +1,13 @@
|
|||
`include "VX_define.vh"
|
||||
`ifndef NOPAE
|
||||
import local_mem_cfg_pkg::*;
|
||||
`include "afu_json_info.vh"
|
||||
`else
|
||||
`include "vortex_afu.vh"
|
||||
`endif
|
||||
/* verilator lint_off IMPORTSTAR */
|
||||
import ccip_if_pkg::*;
|
||||
import local_mem_cfg_pkg::*;
|
||||
/* verilator lint_on IMPORTSTAR */
|
||||
`endif
|
||||
|
||||
module vortex_afu #(
|
||||
parameter NUM_LOCAL_MEM_BANKS = 2
|
||||
|
|
3
hw/rtl/cache/VX_cache.v
vendored
3
hw/rtl/cache/VX_cache.v
vendored
|
@ -168,8 +168,7 @@ module VX_cache #(
|
|||
.NUM_BANKS (NUM_BANKS)
|
||||
) flush_ctrl (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.flush (flush),
|
||||
.reset (reset || flush),
|
||||
.addr_out (flush_addr),
|
||||
.valid_out (flush_enable)
|
||||
);
|
||||
|
|
6
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
6
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
|
@ -98,7 +98,8 @@ module VX_cache_core_rsp_merge #(
|
|||
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH))
|
||||
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)),
|
||||
.BUFFERED (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -146,7 +147,8 @@ module VX_cache_core_rsp_merge #(
|
|||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH)
|
||||
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH),
|
||||
.BUFFERED (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
5
hw/rtl/cache/VX_flush_ctrl.v
vendored
5
hw/rtl/cache/VX_flush_ctrl.v
vendored
|
@ -9,8 +9,7 @@ module VX_flush_ctrl #(
|
|||
parameter NUM_BANKS = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire flush,
|
||||
input wire reset,
|
||||
output wire [`LINE_SELECT_BITS-1:0] addr_out,
|
||||
output wire valid_out
|
||||
);
|
||||
|
@ -18,7 +17,7 @@ module VX_flush_ctrl #(
|
|||
reg [`LINE_SELECT_BITS-1:0] flush_ctr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset || flush) begin
|
||||
if (reset) begin
|
||||
flush_enable <= 1;
|
||||
flush_ctr <= 0;
|
||||
end else begin
|
||||
|
|
|
@ -3,10 +3,6 @@
|
|||
/// Modified port of cast module from fpnew Libray
|
||||
/// reference: https://github.com/pulp-platform/fpnew
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
|
||||
module VX_fp_cvt #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
|
@ -73,19 +69,19 @@ module VX_fp_cvt #(
|
|||
);
|
||||
end
|
||||
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit
|
||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent;
|
||||
wire [LANES-1:0] input_sign;
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent;
|
||||
wire [LANES-1:0] input_sign;
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
wire [INT_MAN_WIDTH-1:0] int_mantissa;
|
||||
wire [INT_MAN_WIDTH-1:0] fmt_mantissa;
|
||||
wire fmt_sign = dataa[i][31];
|
||||
wire int_sign = dataa[i][31] & is_signed;
|
||||
assign int_mantissa = int_sign ? $unsigned(-dataa[i]) : dataa[i];
|
||||
assign int_mantissa = int_sign ? (-dataa[i]) : dataa[i];
|
||||
assign fmt_mantissa = INT_MAN_WIDTH'({in_a_type[i].is_normal, dataa[i][MAN_BITS-1:0]});
|
||||
|
||||
assign fmt_exponent[i] = $signed({1'b0, dataa[i][MAN_BITS+EXP_BITS-1:MAN_BITS]});
|
||||
assign fmt_exponent[i] = {1'b0, dataa[i][MAN_BITS+EXP_BITS-1:MAN_BITS]};
|
||||
assign encoded_mant[i] = is_itof ? int_mantissa : fmt_mantissa;
|
||||
assign input_sign[i] = is_itof ? int_sign : fmt_sign;
|
||||
end
|
||||
|
@ -115,7 +111,7 @@ module VX_fp_cvt #(
|
|||
wire [2:0] rnd_mode_s0;
|
||||
fp_type_t [LANES-1:0] in_a_type_s0;
|
||||
wire [LANES-1:0] input_sign_s0;
|
||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0;
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant_s0;
|
||||
wire [LANES-1:0][LZC_RESULT_WIDTH-1:0] renorm_shamt_s0;
|
||||
wire [LANES-1:0] mant_is_zero_s0;
|
||||
|
@ -135,38 +131,93 @@ module VX_fp_cvt #(
|
|||
|
||||
// Normalization
|
||||
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa
|
||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent
|
||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
// Input mantissa needs to be normalized
|
||||
wire signed [INT_EXP_WIDTH-1:0] fp_input_exp;
|
||||
wire signed [INT_EXP_WIDTH-1:0] int_input_exp;
|
||||
wire [LZC_RESULT_WIDTH:0] renorm_shamt_sgn;
|
||||
|
||||
// signed form for calculations
|
||||
assign renorm_shamt_sgn = $signed({1'b0, renorm_shamt_s0[i]});
|
||||
wire [INT_EXP_WIDTH-1:0] fp_input_exp;
|
||||
wire [INT_EXP_WIDTH-1:0] int_input_exp;
|
||||
|
||||
// Realign input mantissa, append zeroes if destination is wider
|
||||
assign input_mant[i] = encoded_mant_s0[i] << renorm_shamt_s0[i];
|
||||
|
||||
// Unbias exponent and compensate for shift
|
||||
assign fp_input_exp = $signed(fmt_exponent_s0[i] +
|
||||
(($signed({1'b0, in_a_type_s0[i].is_subnormal}) +
|
||||
$signed(FMT_SHIFT_COMPENSATION - EXP_BIAS)) -
|
||||
renorm_shamt_sgn));
|
||||
assign fp_input_exp = fmt_exponent_s0[i] +
|
||||
{1'b0, in_a_type_s0[i].is_subnormal} +
|
||||
(FMT_SHIFT_COMPENSATION - EXP_BIAS) -
|
||||
{1'b0, renorm_shamt_s0[i]};
|
||||
|
||||
assign int_input_exp = $signed(INT_MAN_WIDTH - 1 - renorm_shamt_sgn);
|
||||
assign int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]};
|
||||
|
||||
assign input_exp[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
|
||||
assign input_exp[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
|
||||
|
||||
// Rebias the exponent
|
||||
assign destination_exp[i] = input_exp[i] + $signed(EXP_BIAS);
|
||||
assign destination_exp[i] = input_exp[i] + EXP_BIAS;
|
||||
`IGNORE_WARNINGS_END
|
||||
end
|
||||
|
||||
// Perform adjustments to mantissa and exponent
|
||||
|
||||
wire [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant_s0;
|
||||
wire [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt_s0;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s0;
|
||||
wire [LANES-1:0] of_before_round_s0;
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
reg [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
|
||||
reg [SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
|
||||
reg [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
|
||||
reg of_before_round;
|
||||
|
||||
always @(*) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
// Default assignment
|
||||
final_exp = destination_exp[i]; // take exponent as is, only look at lower bits
|
||||
preshift_mant = {input_mant[i], 33'b0}; // Place mantissa to the left of the shifter
|
||||
denorm_shamt = 0; // right of mantissa
|
||||
of_before_round = 1'b0;
|
||||
|
||||
// Handle INT casts
|
||||
if (is_itof_s0) begin
|
||||
if ($signed(destination_exp[i]) >= $signed(2**EXP_BITS-1)) begin
|
||||
// Overflow or infinities (for proper rounding)
|
||||
final_exp = (2**EXP_BITS-2); // largest normal value
|
||||
preshift_mant = ~0; // largest normal value and RS bits set
|
||||
of_before_round = 1'b1;
|
||||
end else if ($signed(destination_exp[i]) < $signed(-MAN_BITS)) begin
|
||||
// Limit the shift to retain sticky bits
|
||||
final_exp = 0; // denormal result
|
||||
denorm_shamt = denorm_shamt + (2 + MAN_BITS); // to sticky
|
||||
end else if ($signed(destination_exp[i]) < $signed(1)) begin
|
||||
// Denormalize underflowing values
|
||||
final_exp = 0; // denormal result
|
||||
denorm_shamt = denorm_shamt + 1 - destination_exp[i]; // adjust right shifting
|
||||
end
|
||||
end else begin
|
||||
if ($signed(input_exp[i]) >= $signed((MAX_INT_WIDTH-1) + unsigned_s0)) begin
|
||||
// overflow: when converting to unsigned the range is larger by one
|
||||
denorm_shamt = SHAMT_BITS'(0); // prevent shifting
|
||||
of_before_round = 1'b1;
|
||||
end else if ($signed(input_exp[i]) < $signed(-1)) begin
|
||||
// underflow
|
||||
denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky
|
||||
end else begin
|
||||
// By default right shift mantissa to be an integer
|
||||
denorm_shamt = (MAX_INT_WIDTH-1) - input_exp[i];
|
||||
end
|
||||
end
|
||||
`IGNORE_WARNINGS_END
|
||||
end
|
||||
|
||||
assign preshift_mant_s0[i] = preshift_mant;
|
||||
assign denorm_shamt_s0[i] = denorm_shamt;
|
||||
assign final_exp_s0[i] = final_exp;
|
||||
assign of_before_round_s0[i] = of_before_round;
|
||||
end
|
||||
|
||||
// Pipeline stage1
|
||||
|
||||
wire valid_in_s1;
|
||||
|
@ -176,121 +227,68 @@ module VX_fp_cvt #(
|
|||
wire [2:0] rnd_mode_s1;
|
||||
fp_type_t [LANES-1:0] in_a_type_s1;
|
||||
wire [LANES-1:0] mant_is_zero_s1;
|
||||
wire [LANES-1:0] input_sign_s1;
|
||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp_s1;
|
||||
wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp_s1;
|
||||
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant_s1;
|
||||
|
||||
wire [LANES-1:0] input_sign_s1;
|
||||
wire [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant_s1;
|
||||
wire [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt_s1;
|
||||
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s1;
|
||||
wire [LANES-1:0] of_before_round_s1;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + 2*INT_EXP_WIDTH)),
|
||||
.DATAW (1 + TAGW + 1 + 1 + `FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + SHAMT_BITS + INT_EXP_WIDTH + 1)),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, mant_is_zero_s0, input_sign_s0, input_mant, input_exp, destination_exp}),
|
||||
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, input_mant_s1, input_exp_s1, destination_exp_s1})
|
||||
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, mant_is_zero_s0, input_sign_s0, preshift_mant_s0, denorm_shamt_s0, final_exp_s0, of_before_round_s0}),
|
||||
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, preshift_mant_s1, denorm_shamt_s1, final_exp_s1, of_before_round_s1})
|
||||
);
|
||||
|
||||
// Casting
|
||||
reg [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
|
||||
|
||||
reg [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
|
||||
wire [LANES-1:0][2*INT_MAN_WIDTH:0] destination_mant; // mantissa from shifter, with rnd bit
|
||||
wire [LANES-1:0][MAN_BITS-1:0] final_mant; // mantissa after adjustments
|
||||
wire [LANES-1:0][MAX_INT_WIDTH-1:0] final_int; // integer shifted in position
|
||||
|
||||
reg [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
|
||||
|
||||
wire [LANES-1:0][1:0] fp_round_sticky_bits, int_round_sticky_bits, round_sticky_bits;
|
||||
reg [LANES-1:0] of_before_round;
|
||||
|
||||
// Perform adjustments to mantissa and exponent
|
||||
wire [LANES-1:0] rounded_sign;
|
||||
wire [LANES-1:0][31:0] rounded_abs; // absolute value of result after rounding
|
||||
wire [LANES-1:0][1:0] fp_round_sticky_bits, int_round_sticky_bits;
|
||||
|
||||
// Rouding and classification
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
always @(*) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
// Default assignment
|
||||
final_exp[i] = $unsigned(destination_exp_s1[i]); // take exponent as is, only look at lower bits
|
||||
preshift_mant[i] = 65'b0; // initialize mantissa container with zeroes
|
||||
denorm_shamt[i] = 0; // right of mantissa
|
||||
of_before_round[i] = 1'b0;
|
||||
|
||||
// Place mantissa to the left of the shifter
|
||||
preshift_mant[i] = {input_mant_s1[i], 33'b0};
|
||||
|
||||
// Handle INT casts
|
||||
if (is_itof_s1) begin
|
||||
// Overflow or infinities (for proper rounding)
|
||||
if ($signed(destination_exp_s1[i]) >= $signed(2**EXP_BITS-1)) begin
|
||||
final_exp[i] = (2**EXP_BITS-2); // largest normal value
|
||||
preshift_mant[i] = ~0; // largest normal value and RS bits set
|
||||
of_before_round[i] = 1'b1;
|
||||
// Denormalize underflowing values
|
||||
end else if (($signed(destination_exp_s1[i]) < $signed(1))
|
||||
&& ($signed(destination_exp_s1[i]) >= -$signed(MAN_BITS))) begin
|
||||
final_exp[i] = 0; // denormal result
|
||||
denorm_shamt[i] = $unsigned(denorm_shamt[i] + 1 - destination_exp_s1[i]); // adjust right shifting
|
||||
// Limit the shift to retain sticky bits
|
||||
end else if ($signed(destination_exp_s1[i]) < -$signed(MAN_BITS)) begin
|
||||
final_exp[i] = 0; // denormal result
|
||||
denorm_shamt[i] = $unsigned(denorm_shamt[i] + (2 + MAN_BITS)); // to sticky
|
||||
end
|
||||
end else begin
|
||||
// By default right shift mantissa to be an integer
|
||||
denorm_shamt[i] = (MAX_INT_WIDTH-1) - input_exp_s1[i];
|
||||
// overflow: when converting to unsigned the range is larger by one
|
||||
if ($signed(input_exp_s1[i]) >= $signed(MAX_INT_WIDTH -1 + unsigned_s1)) begin
|
||||
denorm_shamt[i] = SHAMT_BITS'(0); // prevent shifting
|
||||
of_before_round[i] = 1'b1;
|
||||
// underflow
|
||||
end else if ($signed(input_exp_s1[i]) < $signed(-1)) begin
|
||||
denorm_shamt[i] = MAX_INT_WIDTH + 1; // all bits go to the sticky
|
||||
end
|
||||
end
|
||||
`IGNORE_WARNINGS_END
|
||||
end
|
||||
wire [2*INT_MAN_WIDTH:0] destination_mant;
|
||||
wire [MAN_BITS-1:0] final_mant; // mantissa after adjustments
|
||||
wire [MAX_INT_WIDTH-1:0] final_int; // integer shifted in position
|
||||
wire [1:0] round_sticky_bits;
|
||||
wire [31:0] fmt_pre_round_abs;
|
||||
wire [31:0] pre_round_abs;
|
||||
|
||||
// Mantissa adjustment shift
|
||||
assign destination_mant[i] = preshift_mant[i] >> denorm_shamt[i];
|
||||
|
||||
assign destination_mant = preshift_mant_s1[i] >> denorm_shamt_s1[i];
|
||||
|
||||
// Extract final mantissa and round bit, discard the normal bit (for FP)
|
||||
assign {final_mant[i], fp_round_sticky_bits[i][1]} = destination_mant[i][2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1];
|
||||
assign {final_int[i], int_round_sticky_bits[i][1]} = destination_mant[i][2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (MAX_INT_WIDTH+1) + 1];
|
||||
assign {final_mant, fp_round_sticky_bits[i][1]} = destination_mant[2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1];
|
||||
assign {final_int, int_round_sticky_bits[i][1]} = destination_mant[2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (MAX_INT_WIDTH+1) + 1];
|
||||
|
||||
// Collapse sticky bits
|
||||
assign fp_round_sticky_bits[i][0] = (| destination_mant[i][NUM_FP_STICKY-1:0]);
|
||||
assign int_round_sticky_bits[i][0] = (| destination_mant[i][NUM_INT_STICKY-1:0]);
|
||||
assign fp_round_sticky_bits[i][0] = (| destination_mant[NUM_FP_STICKY-1:0]);
|
||||
assign int_round_sticky_bits[i][0] = (| destination_mant[NUM_INT_STICKY-1:0]);
|
||||
|
||||
// select RS bits for destination operation
|
||||
assign round_sticky_bits[i] = is_itof_s1 ? fp_round_sticky_bits[i] : int_round_sticky_bits[i];
|
||||
end
|
||||
assign round_sticky_bits = is_itof_s1 ? fp_round_sticky_bits[i] : int_round_sticky_bits[i];
|
||||
|
||||
// Rouding and classification
|
||||
|
||||
wire [LANES-1:0] rounded_sign;
|
||||
wire [LANES-1:0][31:0] rounded_abs; // absolute value of result after rounding
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
// Pack exponent and mantissa into proper rounding form
|
||||
wire [31:0] fmt_pre_round_abs = {1'b0, final_exp[i][EXP_BITS-1:0], final_mant[i][MAN_BITS-1:0]};
|
||||
|
||||
// Sign-extend integer result
|
||||
wire [31:0] ifmt_pre_round_abs = final_int[i];
|
||||
assign fmt_pre_round_abs = {1'b0, final_exp_s1[i][EXP_BITS-1:0], final_mant[MAN_BITS-1:0]};
|
||||
|
||||
// Select output with destination format and operation
|
||||
wire [31:0] pre_round_abs = is_itof_s1 ? fmt_pre_round_abs : ifmt_pre_round_abs;
|
||||
assign pre_round_abs = is_itof_s1 ? fmt_pre_round_abs : final_int;
|
||||
|
||||
// Perform the rounding
|
||||
VX_fp_rounding #(
|
||||
.DAT_WIDTH (32)
|
||||
) fp_rounding (
|
||||
.abs_value_i (pre_round_abs),
|
||||
.sign_i (input_sign_s1[i]),
|
||||
.round_sticky_bits_i (round_sticky_bits[i]),
|
||||
.rnd_mode_i (rnd_mode_s1),
|
||||
.effective_subtraction_i (1'b0),
|
||||
.abs_rounded_o (rounded_abs[i]),
|
||||
.sign_o (rounded_sign[i]),
|
||||
.abs_value_i (pre_round_abs),
|
||||
.sign_i (input_sign_s1[i]),
|
||||
.round_sticky_bits_i(round_sticky_bits),
|
||||
.rnd_mode_i (rnd_mode_s1),
|
||||
.effective_subtraction_i(1'b0),
|
||||
.abs_rounded_o (rounded_abs[i]),
|
||||
.sign_o (rounded_sign[i]),
|
||||
`UNUSED_PIN (exact_zero_o)
|
||||
);
|
||||
end
|
||||
|
@ -306,23 +304,22 @@ module VX_fp_cvt #(
|
|||
wire [LANES-1:0] input_sign_s2;
|
||||
wire [LANES-1:0] rounded_sign_s2;
|
||||
wire [LANES-1:0][31:0] rounded_abs_s2;
|
||||
wire [LANES-1:0] of_before_round_s2;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1)),
|
||||
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1 + 1)),
|
||||
.RESETW (1)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall),
|
||||
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, rounded_abs, rounded_sign}),
|
||||
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2})
|
||||
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, rounded_abs, rounded_sign, of_before_round_s1}),
|
||||
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2, of_before_round_s2})
|
||||
);
|
||||
|
||||
wire [LANES-1:0] of_after_round;
|
||||
wire [LANES-1:0] uf_after_round;
|
||||
|
||||
wire [LANES-1:0][31:0] fmt_result;
|
||||
|
||||
wire [LANES-1:0][31:0] rounded_int_res; // after possible inversion
|
||||
wire [LANES-1:0] rounded_int_res_zero; // after rounding
|
||||
|
||||
|
@ -335,7 +332,7 @@ module VX_fp_cvt #(
|
|||
assign of_after_round[i] = (rounded_abs_s2[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == ~0); // inf exp.
|
||||
|
||||
// Negative integer result needs to be brought into two's complement
|
||||
assign rounded_int_res[i] = rounded_sign_s2[i] ? $unsigned(-rounded_abs_s2[i]) : rounded_abs_s2[i];
|
||||
assign rounded_int_res[i] = rounded_sign_s2[i] ? (-rounded_abs_s2[i]) : rounded_abs_s2[i];
|
||||
assign rounded_int_res_zero[i] = (rounded_int_res[i] == 0);
|
||||
end
|
||||
|
||||
|
@ -373,7 +370,7 @@ module VX_fp_cvt #(
|
|||
int_special_result[i][30:0] = 0; // alone yields 2**(31)-1
|
||||
int_special_result[i][31] = ~unsigned_s2; // for unsigned casts yields 2**31
|
||||
end else begin
|
||||
int_special_result[i][30:0] = 2**(31) -1; // alone yields 2**(31)-1
|
||||
int_special_result[i][30:0] = 2**(31) - 1; // alone yields 2**(31)-1
|
||||
int_special_result[i][31] = unsigned_s2; // for unsigned casts yields 2**31
|
||||
end
|
||||
end
|
||||
|
@ -381,7 +378,7 @@ module VX_fp_cvt #(
|
|||
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
|
||||
assign int_result_is_special[i] = in_a_type_s2[i].is_nan
|
||||
| in_a_type_s2[i].is_inf
|
||||
| of_before_round[i]
|
||||
| of_before_round_s2[i]
|
||||
| (input_sign_s2[i] & unsigned_s2 & ~rounded_int_res_zero[i]);
|
||||
|
||||
// All integer special cases are invalid
|
||||
|
@ -399,11 +396,11 @@ module VX_fp_cvt #(
|
|||
wire [31:0] fp_result, int_result;
|
||||
|
||||
wire inexact = is_itof_s2 ? (| fp_round_sticky_bits[i]) // overflow is invalid in i2f;
|
||||
: (| fp_round_sticky_bits[i]) | (~in_a_type_s2[i].is_inf & (of_before_round[i] | of_after_round[i]));
|
||||
: (| fp_round_sticky_bits[i]) | (~in_a_type_s2[i].is_inf & (of_before_round_s2[i] | of_after_round[i]));
|
||||
|
||||
assign fp_regular_status.NV = is_itof_s2 & (of_before_round[i] | of_after_round[i]); // overflow is invalid for I2F casts
|
||||
assign fp_regular_status.NV = is_itof_s2 & (of_before_round_s2[i] | of_after_round[i]); // overflow is invalid for I2F casts
|
||||
assign fp_regular_status.DZ = 1'b0; // no divisions
|
||||
assign fp_regular_status.OF = ~is_itof_s2 & (~in_a_type_s2[i].is_inf & (of_before_round[i] | of_after_round[i])); // inf casts no OF
|
||||
assign fp_regular_status.OF = ~is_itof_s2 & (~in_a_type_s2[i].is_inf & (of_before_round_s2[i] | of_after_round[i])); // inf casts no OF
|
||||
assign fp_regular_status.UF = uf_after_round[i] & inexact;
|
||||
assign fp_regular_status.NX = inexact;
|
||||
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
|
||||
module VX_fp_div #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
|
||||
module VX_fp_fma #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
|
||||
module VX_fp_sqrt #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
|
@ -44,7 +48,7 @@ module VX_fp_sqrt #(
|
|||
fflags_t f;
|
||||
|
||||
always @(*) begin
|
||||
dpi_fsqrt (dataa[i], frm, r, f);
|
||||
dpi_fsqrt (dataa[i], frm, r, f);
|
||||
end
|
||||
`UNUSED_VAR (f)
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ module VX_fp_type (
|
|||
);
|
||||
wire is_normal = (exp_i != 8'd0) && (exp_i != 8'hff);
|
||||
wire is_zero = (exp_i == 8'd0) && (man_i == 23'd0);
|
||||
wire is_subnormal = (exp_i == 8'd0) && !is_zero;
|
||||
wire is_subnormal = (exp_i == 8'd0) && (man_i != 23'd0);
|
||||
wire is_inf = (exp_i == 8'hff) && (man_i == 23'd0);
|
||||
wire is_nan = (exp_i == 8'hff) && (man_i != 23'd0);
|
||||
wire is_signaling = is_nan && (man_i[22] == 1'b0);
|
||||
|
|
|
@ -67,8 +67,7 @@ module VX_skid_buffer #(
|
|||
end else begin
|
||||
if (ready_out) begin
|
||||
use_buffer <= 0;
|
||||
end
|
||||
if (push && !pop) begin
|
||||
end else if (push && valid_out_r) begin
|
||||
assert(!use_buffer);
|
||||
use_buffer <= 1;
|
||||
end
|
||||
|
@ -81,9 +80,11 @@ module VX_skid_buffer #(
|
|||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
buffer <= data_in;
|
||||
end
|
||||
if (pop) begin
|
||||
data_out_r <= use_buffer ? buffer : data_in;
|
||||
end
|
||||
if (pop && !use_buffer) begin
|
||||
data_out_r <= data_in;
|
||||
end else if (pop) begin
|
||||
data_out_r <= buffer;
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -138,5 +138,4 @@ clean-fpga-32c:
|
|||
clean-fpga-64c:
|
||||
rm -rf $(FPGA_BUILD_DIR)_64c sources.txt
|
||||
|
||||
clean: clean-ase-1c clean-ase-2c clean-ase-4c clean-fpga-1c clean-fpga-2c clean-fpga-4c clean-fpga-8c clean-fpga-16c clean-fpga-32c clean-fpga-64c
|
||||
rm sources.txt
|
||||
clean: clean-ase-1c clean-ase-2c clean-ase-4c clean-fpga-1c clean-fpga-2c clean-fpga-4c clean-fpga-8c clean-fpga-16c clean-fpga-32c clean-fpga-64c
|
|
@ -6,7 +6,7 @@
|
|||
+define+QUARTUS
|
||||
#+define+PERF_ENABLE
|
||||
|
||||
vortex_afu.json
|
||||
vortex_afu16.json
|
||||
QI:vortex_afu.qsf
|
||||
|
||||
C:sources.txt
|
|
@ -2,6 +2,8 @@
|
|||
+define+NUM_CLUSTERS=4
|
||||
#+define+L3_ENABLE=1
|
||||
|
||||
+define+GLOBAL_BLOCK_SIZE=16
|
||||
|
||||
+define+SYNTHESIS
|
||||
+define+QUARTUS
|
||||
#+define+PERF_ENABLE
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
+define+NUM_CLUSTERS=8
|
||||
#+define+L3_ENABLE=1
|
||||
|
||||
+define+GLOBAL_BLOCK_SIZE=16
|
||||
|
||||
+define+SYNTHESIS
|
||||
+define+QUARTUS
|
||||
#+define+PERF_ENABLE
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
+define+QUARTUS
|
||||
#+define+PERF_ENABLE
|
||||
|
||||
vortex_afu.json
|
||||
vortex_afu16.json
|
||||
QI:vortex_afu.qsf
|
||||
|
||||
C:sources.txt
|
56
hw/syn/opae/vortex_afu16.json
Normal file
56
hw/syn/opae/vortex_afu16.json
Normal file
|
@ -0,0 +1,56 @@
|
|||
{
|
||||
"version": 1,
|
||||
"afu-image": {
|
||||
"power": 0,
|
||||
"clock-frequency-high": "auto-210",
|
||||
"clock-frequency-low": "auto-210",
|
||||
|
||||
"cmd-mem-read": 1,
|
||||
"cmd-mem-write": 2,
|
||||
"cmd-run": 3,
|
||||
"cmd-csr-read": 4,
|
||||
"cmd-csr-write": 5,
|
||||
|
||||
"mmio-cmd-type": 10,
|
||||
"mmio-io-addr": 12,
|
||||
"mmio-mem-addr": 14,
|
||||
"mmio-data-size": 16,
|
||||
"mmio-status": 18,
|
||||
"mmio-scope-read": 20,
|
||||
"mmio-scope-write": 22,
|
||||
"mmio-csr-core": 24,
|
||||
"mmio-csr-addr": 26,
|
||||
"mmio-csr-data": 28,
|
||||
"mmio-csr-read": 30,
|
||||
|
||||
"afu-top-interface":
|
||||
{
|
||||
"class": "ccip_std_afu_avalon_mm",
|
||||
"module-ports" :
|
||||
[
|
||||
{
|
||||
"class": "cci-p",
|
||||
"params":
|
||||
{
|
||||
"clock": "uClk_usr"
|
||||
}
|
||||
},
|
||||
{
|
||||
"class": "local-memory",
|
||||
"params":
|
||||
{
|
||||
"clock": "uClk_usr"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"accelerator-clusters":
|
||||
[
|
||||
{
|
||||
"name": "vortex_afu",
|
||||
"total-contexts": 1,
|
||||
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
|
@ -41,11 +41,7 @@ set_global_assignment -name VERILOG_MACRO NDEBUG
|
|||
set_global_assignment -name MESSAGE_DISABLE 16818
|
||||
set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
|
||||
|
||||
#set_global_assignment -name ALLOW_ANY_RAM_SIZE_FOR_RECOGNITION ON
|
||||
#set_global_assignment -name USE_HIGH_SPEED_ADDER ON
|
||||
#set_global_assignment -name MUX_RESTRUCTURE ON
|
||||
|
||||
set_global_assignment -name OPTIMIZATION_TECHNIQUE AREA
|
||||
#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||
#set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
|
||||
#set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0
|
||||
|
|
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue