mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
merge update
This commit is contained in:
parent
e380ded5e1
commit
58a2140b92
31 changed files with 169 additions and 98 deletions
|
@ -22,6 +22,15 @@ make -C tests/opencl run-simx
|
|||
echo "coverage tests done!"
|
||||
}
|
||||
|
||||
tex()
|
||||
{
|
||||
echo "begin texture tests..."
|
||||
|
||||
CONFIGS="-DEXT_TEX_ENABLE=1" ./ci/blackbox.sh --app=tex
|
||||
|
||||
echo "coverage texture done!"
|
||||
}
|
||||
|
||||
cluster()
|
||||
{
|
||||
echo "begin clustering tests..."
|
||||
|
@ -137,13 +146,15 @@ echo "stress1 tests done!"
|
|||
|
||||
usage()
|
||||
{
|
||||
echo "usage: regression [-coverage] [-cluster] [-debug] [-config] [-stress[#n]] [-all] [-h|--help]"
|
||||
echo "usage: regression [-coverage] [-tex] [-cluster] [-debug] [-config] [-stress[#n]] [-all] [-h|--help]"
|
||||
}
|
||||
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
-coverage ) coverage
|
||||
;;
|
||||
-tex ) tex
|
||||
;;
|
||||
-cluster ) cluster
|
||||
;;
|
||||
-debug ) debug
|
||||
|
|
|
@ -78,12 +78,12 @@ module VX_commit #(
|
|||
`ifdef EXT_F_ENABLE
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
`endif
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
.writeback_if (writeback_if)
|
||||
);
|
||||
|
||||
// store and gpu commits don't writeback
|
||||
assign st_commit_if.ready = 1'b1;
|
||||
assign gpu_commit_if.ready = 1'b1;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
|
|
|
@ -77,10 +77,6 @@
|
|||
`define EXT_F_ENABLE
|
||||
`endif
|
||||
|
||||
`ifndef EXT_TEX_DISABLE
|
||||
`define EXT_TEX_ENABLE
|
||||
`endif
|
||||
|
||||
// Device identification
|
||||
`define VENDOR_ID 0
|
||||
`define ARCHITECTURE_ID 0
|
||||
|
|
|
@ -18,7 +18,7 @@ module VX_csr_data #(
|
|||
VX_fpu_to_csr_if.slave fpu_to_csr_if,
|
||||
`endif
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
VX_tex_csr_if.slave tex_csr_if,
|
||||
VX_tex_csr_if.master tex_csr_if,
|
||||
`endif
|
||||
|
||||
input wire read_enable,
|
||||
|
@ -49,13 +49,13 @@ module VX_csr_data #(
|
|||
|
||||
reg [`NUM_WARPS-1:0][`INST_FRM_BITS+`FFLAGS_BITS-1:0] fcsr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
always @(posedge clk) begin
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (reset) begin
|
||||
fcsr <= '0;
|
||||
end
|
||||
end
|
||||
if (fpu_to_csr_if.write_enable) begin
|
||||
fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0]
|
||||
fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0]
|
||||
| fpu_to_csr_if.write_fflags;
|
||||
end
|
||||
`endif
|
||||
|
@ -64,25 +64,21 @@ module VX_csr_data #(
|
|||
`CSR_FFLAGS: fcsr[write_wid][`FFLAGS_BITS-1:0] <= write_data[`FFLAGS_BITS-1:0];
|
||||
`CSR_FRM: fcsr[write_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS] <= write_data[`INST_FRM_BITS-1:0];
|
||||
`CSR_FCSR: fcsr[write_wid] <= write_data[`FFLAGS_BITS+`INST_FRM_BITS-1:0];
|
||||
|
||||
`CSR_SATP: csr_satp <= write_data;
|
||||
|
||||
`CSR_MSTATUS: csr_mstatus <= write_data;
|
||||
`CSR_MEDELEG: csr_medeleg <= write_data;
|
||||
`CSR_MIDELEG: csr_mideleg <= write_data;
|
||||
`CSR_MIE: csr_mie <= write_data;
|
||||
`CSR_MTVEC: csr_mtvec <= write_data;
|
||||
|
||||
`CSR_MEPC: csr_mepc <= write_data;
|
||||
|
||||
`CSR_PMPCFG0: csr_pmpcfg[0] <= write_data;
|
||||
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data;
|
||||
|
||||
default: begin
|
||||
assert (write_addr >= `CSR_TEX_BEGIN(0) && write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES))
|
||||
else `ASSERT(~write_enable, ("%t: invalid CSR write address: %0h", $time, write_addr));
|
||||
`CSR_SATP: csr_satp <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MSTATUS: csr_mstatus <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MEDELEG: csr_medeleg <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MIDELEG: csr_mideleg <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MIE: csr_mie <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MTVEC: csr_mtvec <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MEPC: csr_mepc <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_PMPCFG0: csr_pmpcfg[0] <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data[`CSR_WIDTH-1:0];
|
||||
default: begin
|
||||
`ASSERT(write_addr >= `CSR_TEX_BEGIN(0)
|
||||
&& write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES),
|
||||
("%t: invalid CSR write address: %0h", $time, write_addr));
|
||||
end
|
||||
endcase
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ module VX_csr_unit #(
|
|||
input wire[`NUM_WARPS-1:0] fpu_pending,
|
||||
`endif
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
VX_tex_csr_if.slave tex_csr_if,
|
||||
VX_tex_csr_if.master tex_csr_if,
|
||||
`endif
|
||||
|
||||
output wire[`NUM_WARPS-1:0] pending,
|
||||
|
@ -49,6 +49,7 @@ module VX_csr_unit #(
|
|||
.fetch_to_csr_if(fetch_to_csr_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
`endif
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
.tex_csr_if (tex_csr_if),
|
||||
`endif
|
||||
|
|
|
@ -378,8 +378,8 @@ module VX_decode #(
|
|||
end
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
3'h5: begin
|
||||
op_type = `OP_BITS'(`GPU_TEX);
|
||||
op_mod = `MOD_BITS'(func2);
|
||||
op_type = `INST_OP_BITS'(`INST_GPU_TEX);
|
||||
op_mod = `INST_MOD_BITS'(func2);
|
||||
use_rd = 1;
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
|
|
|
@ -285,17 +285,17 @@
|
|||
// Core request tag bits
|
||||
`define LSUQ_ADDR_BITS `LOG2UP(`LSUQ_SIZE)
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
`define LSU_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_ADDR_BITS + `SM_ENABLE)
|
||||
`define LSU_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_FLAG_BITS + `SM_ENABLE)
|
||||
`define TEX_TAG_ID_BITS (2)
|
||||
`define LSU_TEX_TAG_ID_BITS `MAX(`LSU_TAG_ID_BITS, `TEX_TAG_ID_BITS)
|
||||
`define DCACHE_DCORE_TAG_ID_BITS (`LSU_TEX_TAG_ID_BITS + `NC_FLAG_BITS)
|
||||
`define DCACHE_CORE_TAG_ID_BITS (`LSU_TEX_TAG_ID_BITS + `NC_FLAG_BITS)
|
||||
`define LSU_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `LSU_TAG_ID_BITS)
|
||||
`define TEX_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `TEX_TAG_ID_BITS)
|
||||
`define LSU_TEX_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `LSU_TEX_TAG_ID_BITS)
|
||||
`else
|
||||
`define DCACHE_DCORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_ADDR_BITS + `SM_ENABLE)
|
||||
`define DCACHE_CORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_FLAG_BITS + `SM_ENABLE)
|
||||
`endif
|
||||
`define DCACHE_DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCACHE_CORE_TAG_ID_BITS)
|
||||
`define DCACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCACHE_CORE_TAG_ID_BITS)
|
||||
|
||||
// Memory request data bits
|
||||
`define DCACHE_MEM_DATA_WIDTH (`DCACHE_LINE_SIZE * 8)
|
||||
|
|
|
@ -103,7 +103,7 @@ module VX_execute #(
|
|||
.LANES (`NUM_THREADS),
|
||||
.DATA_SIZE (4),
|
||||
.TAG_IN_WIDTH (`LSU_TEX_DCACHE_TAG_BITS),
|
||||
.TAG_SEL_IDX (`NC_ADDR_BITS + `SM_ENABLE)
|
||||
.TAG_SEL_IDX (`NC_FLAG_BITS + `SM_ENABLE)
|
||||
) tex_lsu_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -189,10 +189,7 @@ module VX_execute #(
|
|||
.perf_pipeline_if(perf_pipeline_if),
|
||||
`endif
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
.tex_csr_if (tex_csr_if),
|
||||
`endif
|
||||
.fetch_to_csr_if(fetch_to_csr_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
@ -202,6 +199,9 @@ module VX_execute #(
|
|||
`else
|
||||
`UNUSED_PIN (pending),
|
||||
`endif
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
.tex_csr_if (tex_csr_if),
|
||||
`endif
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
|
|
|
@ -52,8 +52,13 @@ module VX_gpu_unit #(
|
|||
wire is_tmc = (gpu_req_if.op_type == `INST_GPU_TMC);
|
||||
wire is_split = (gpu_req_if.op_type == `INST_GPU_SPLIT);
|
||||
wire is_bar = (gpu_req_if.op_type == `INST_GPU_BAR);
|
||||
wire is_pred = (gpu_req_if.op_type == `INST_GPU_PRED);
|
||||
|
||||
wire [31:0] rs1_data = gpu_req_if.rs1_data[gpu_req_if.tid];
|
||||
wire [31:0] rs2_data = gpu_req_if.rs2_data[gpu_req_if.tid];
|
||||
|
||||
// tmc
|
||||
wire [`NUM_THREADS-1:0] taken_tmask;
|
||||
wire [`NUM_THREADS-1:0] not_taken_tmask;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire taken = (gpu_req_if.rs1_data[i] != 0);
|
||||
|
@ -70,7 +75,7 @@ module VX_gpu_unit #(
|
|||
|
||||
// wspawn
|
||||
|
||||
wire [31:0] wspawn_pc = gpu_req_if.rs2_data[0];
|
||||
wire [31:0] wspawn_pc = rs2_data;
|
||||
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
||||
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||
assign wspawn_wmask[i] = (i < rs1_data);
|
||||
|
@ -90,8 +95,8 @@ module VX_gpu_unit #(
|
|||
// barrier
|
||||
|
||||
assign barrier.valid = is_bar;
|
||||
assign barrier.id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
|
||||
assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data[0] - 1);
|
||||
assign barrier.id = rs1_data[`NB_BITS-1:0];
|
||||
assign barrier.size_m1 = (`NW_BITS)'(rs2_data - 1);
|
||||
|
||||
// pack warp ctl result
|
||||
assign warp_ctl_data = {tmc, wspawn, split, barrier};
|
||||
|
@ -105,7 +110,7 @@ module VX_gpu_unit #(
|
|||
VX_tex_req_if tex_req_if();
|
||||
VX_tex_rsp_if tex_rsp_if();
|
||||
|
||||
wire is_tex = (gpu_req_if.op_type == `GPU_TEX);
|
||||
wire is_tex = (gpu_req_if.op_type == `INST_GPU_TEX);
|
||||
|
||||
assign tex_req_if.valid = gpu_req_if.valid && is_tex;
|
||||
assign tex_req_if.wid = gpu_req_if.wid;
|
||||
|
@ -114,19 +119,19 @@ module VX_gpu_unit #(
|
|||
assign tex_req_if.rd = gpu_req_if.rd;
|
||||
assign tex_req_if.wb = gpu_req_if.wb;
|
||||
|
||||
assign tex_req_if.unit = gpu_req_if.op_mod[`NTEX_BITS-1:0];
|
||||
assign tex_req_if.unit = gpu_req_if.op_mod[`NTEX_BITS-1:0];
|
||||
assign tex_req_if.coords[0] = gpu_req_if.rs1_data;
|
||||
assign tex_req_if.coords[1] = gpu_req_if.rs2_data;
|
||||
assign tex_req_if.lod = gpu_req_if.rs3_data;
|
||||
assign tex_req_if.lod = gpu_req_if.rs3_data;
|
||||
|
||||
VX_tex_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) tex_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.tex_req_if (tex_req_if),
|
||||
.tex_csr_if (tex_csr_if),
|
||||
.tex_rsp_if (tex_rsp_if),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.tex_req_if (tex_req_if),
|
||||
.tex_csr_if (tex_csr_if),
|
||||
.tex_rsp_if (tex_rsp_if),
|
||||
.dcache_req_if (dcache_req_if),
|
||||
.dcache_rsp_if (dcache_rsp_if)
|
||||
);
|
||||
|
@ -149,7 +154,6 @@ module VX_gpu_unit #(
|
|||
`else
|
||||
|
||||
`UNUSED_VAR (gpu_req_if.op_mod)
|
||||
`UNUSED_VAR (gpu_req_if.rs2_data)
|
||||
`UNUSED_VAR (gpu_req_if.rs3_data)
|
||||
`UNUSED_VAR (gpu_req_if.wb)
|
||||
`UNUSED_VAR (gpu_req_if.rd)
|
||||
|
|
|
@ -124,18 +124,17 @@ module VX_instr_demux (
|
|||
|
||||
wire gpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_GPU);
|
||||
wire [`INST_GPU_BITS-1:0] gpu_op_type = `INST_GPU_BITS'(ibuffer_if.op_type);
|
||||
wire [31:0] gpu_rs2_data = gpr_rsp_if.rs2_data[tid];
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `NR_BITS + 1 + + `NT_BITS + (`NUM_THREADS * 32 + 32)),
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + `NT_BITS + (3 * `NUM_THREADS * 32)),
|
||||
.OUT_REG (1)
|
||||
) gpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (gpu_req_valid),
|
||||
.ready_in (gpu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, gpu_op_type, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpu_rs2_data}),
|
||||
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, gpu_op_type, ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.op_mod, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data}),
|
||||
.valid_out (gpu_req_if.valid),
|
||||
.ready_out (gpu_req_if.ready)
|
||||
);
|
||||
|
|
|
@ -10,7 +10,6 @@ module VX_writeback #(
|
|||
VX_commit_if.slave alu_commit_if,
|
||||
VX_commit_if.slave ld_commit_if,
|
||||
VX_commit_if.slave csr_commit_if,
|
||||
VX_commit_if.slave csr_commit_if,
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_commit_if.slave fpu_commit_if,
|
||||
`endif
|
||||
|
@ -50,28 +49,28 @@ module VX_writeback #(
|
|||
wire [NUM_RSPS-1:0] rsp_ready;
|
||||
wire stall;
|
||||
|
||||
assign rsp_valid = {
|
||||
assign rsp_valid = {
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
gpu_commit_if.valid && gpu_commit_if.wb,
|
||||
`endif
|
||||
csr_commit_if.valid && csr_commit_if.wb,
|
||||
alu_commit_if.valid && alu_commit_if.wb,
|
||||
`ifdef EXT_F_ENABLE
|
||||
fpu_commit_if.valid && fpu_commit_if.wb,
|
||||
`endif
|
||||
ld_commit_if.valid && ld_commit_if.wb,
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
gpu_commit_if.valid && gpu_commit_if.wb,
|
||||
`ifend
|
||||
ld_commit_if.valid && ld_commit_if.wb
|
||||
};
|
||||
|
||||
assign rsp_data = {
|
||||
assign rsp_data = {
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
{gpu_commit_if.wid, gpu_commit_if.PC, gpu_commit_if.tmask, gpu_commit_if.rd, gpu_commit_if.data, gpu_commit_if.eop},
|
||||
`endif
|
||||
{csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.rd, csr_commit_if.data, csr_commit_if.eop},
|
||||
{alu_commit_if.wid, alu_commit_if.PC, alu_commit_if.tmask, alu_commit_if.rd, alu_commit_if.data, alu_commit_if.eop},
|
||||
`ifdef EXT_F_ENABLE
|
||||
{fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.rd, fpu_commit_if.data, fpu_commit_if.eop},
|
||||
`endif
|
||||
{ ld_commit_if.wid, ld_commit_if.PC, ld_commit_if.tmask, ld_commit_if.rd, ld_commit_if.data, ld_commit_if.eop},
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
{gpu_commit_if.wid, gpu_commit_if.PC, gpu_commit_if.tmask, gpu_commit_if.rd, gpu_commit_if.data, gpu_commit_if.eop},
|
||||
`endif
|
||||
{ ld_commit_if.wid, ld_commit_if.PC, ld_commit_if.tmask, ld_commit_if.rd, ld_commit_if.data, ld_commit_if.eop}
|
||||
};
|
||||
|
||||
VX_stream_arbiter #(
|
||||
|
@ -103,8 +102,7 @@ module VX_writeback #(
|
|||
`ifdef EXT_TEX_ENABLE
|
||||
assign gpu_commit_if.ready = rsp_ready[3] || ~gpu_commit_if.wb;
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`endif
|
||||
|
||||
assign stall = ~writeback_if.ready && writeback_if.valid;
|
||||
|
||||
|
|
|
@ -47,11 +47,12 @@ localparam CCI_ADDR_WIDTH = 32 - $clog2(CCI_DATA_SIZE);
|
|||
|
||||
|
||||
localparam AVS_RD_QUEUE_SIZE = 4;
|
||||
localparam AVS_REQ_TAGW_VX_ = `VX_MEM_TAG_WIDTH + $clog2(LMEM_DATA_WIDTH) - $clog2(`VX_MEM_DATA_WIDTH);
|
||||
localparam AVS_REQ_TAGW_VX = `MAX(`VX_MEM_TAG_WIDTH, AVS_REQ_TAGW_VX_);
|
||||
localparam AVS_REQ_TAGW_CCI_ = CCI_ADDR_WIDTH + $clog2(LMEM_DATA_WIDTH) - $clog2(CCI_DATA_WIDTH);
|
||||
localparam AVS_REQ_TAGW_CCI = `MAX(CCI_ADDR_WIDTH, AVS_REQ_TAGW_CCI_);
|
||||
localparam AVS_REQ_TAGW = `MAX(AVS_REQ_TAGW_VX, AVS_REQ_TAGW_CCI);
|
||||
localparam _VX_MEM_TAG_WIDTH = `VX_MEM_TAG_WIDTH;
|
||||
localparam _AVS_REQ_TAGW_VX = _VX_MEM_TAG_WIDTH + $clog2(LMEM_DATA_WIDTH) - $clog2(`VX_MEM_DATA_WIDTH);
|
||||
localparam _AVS_REQ_TAGW_VX2 = `MAX(_VX_MEM_TAG_WIDTH, _AVS_REQ_TAGW_VX);
|
||||
localparam _AVS_REQ_TAGW_CCI = CCI_ADDR_WIDTH + $clog2(LMEM_DATA_WIDTH) - $clog2(CCI_DATA_WIDTH);
|
||||
localparam _AVS_REQ_TAGW_CCI2 = `MAX(CCI_ADDR_WIDTH, _AVS_REQ_TAGW_CCI);
|
||||
localparam AVS_REQ_TAGW = `MAX(_AVS_REQ_TAGW_VX2, _AVS_REQ_TAGW_CCI2);
|
||||
|
||||
localparam CCI_RD_WINDOW_SIZE = 8;
|
||||
localparam CCI_RW_PENDING_SIZE= 256;
|
||||
|
|
|
@ -12,9 +12,11 @@ interface VX_gpu_req_if();
|
|||
wire [31:0] PC;
|
||||
wire [31:0] next_PC;
|
||||
wire [`INST_GPU_BITS-1:0] op_type;
|
||||
wire [`INST_MOD_BITS-1:0] op_mod;
|
||||
wire [`NT_BITS-1:0] tid;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
|
||||
|
@ -27,9 +29,11 @@ interface VX_gpu_req_if();
|
|||
output PC,
|
||||
output next_PC,
|
||||
output op_type,
|
||||
output op_mod,
|
||||
output tid,
|
||||
output rs1_data,
|
||||
output rs2_data,
|
||||
output rs3_data,
|
||||
output rd,
|
||||
output wb,
|
||||
input ready
|
||||
|
@ -42,9 +46,11 @@ interface VX_gpu_req_if();
|
|||
input PC,
|
||||
input next_PC,
|
||||
input op_type,
|
||||
input op_mod,
|
||||
input tid,
|
||||
input rs1_data,
|
||||
input rs2_data,
|
||||
input rs3_data,
|
||||
input rd,
|
||||
input wb,
|
||||
output ready
|
||||
|
|
|
@ -7,7 +7,19 @@ interface VX_tex_csr_if ();
|
|||
|
||||
wire write_enable;
|
||||
wire [`CSR_ADDR_BITS-1:0] write_addr;
|
||||
wire [31:0] write_data;
|
||||
wire [31:0] write_data;
|
||||
|
||||
modport master (
|
||||
output write_enable,
|
||||
output write_addr,
|
||||
output write_data
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input write_enable,
|
||||
input write_addr,
|
||||
input write_data
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -18,6 +18,32 @@ interface VX_tex_req_if ();
|
|||
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
output rd,
|
||||
output wb,
|
||||
output unit,
|
||||
output coords,
|
||||
output lod,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
input rd,
|
||||
input wb,
|
||||
input unit,
|
||||
input coords,
|
||||
input lod,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
`endif
|
||||
|
||||
|
|
|
@ -14,6 +14,28 @@ interface VX_tex_rsp_if ();
|
|||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
output rd,
|
||||
output wb,
|
||||
output data,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
input rd,
|
||||
input wb,
|
||||
input data,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
|
|
|
@ -70,9 +70,9 @@ module VX_tex_memory #(
|
|||
assign reqq_push = req_valid && req_ready;
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW ((NUM_REQS * 4 * 30) + NUM_REQS + REQ_INFOW + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (4 * NUM_REQS * 2) + 4),
|
||||
.SIZE (`LSUQ_SIZE),
|
||||
.OUTPUT_REG (1)
|
||||
.DATAW ((NUM_REQS * 4 * 30) + NUM_REQS + REQ_INFOW + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (4 * NUM_REQS * 2) + 4),
|
||||
.SIZE (`LSUQ_SIZE),
|
||||
.OUT_REG (1)
|
||||
) req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
|
@ -7,15 +7,15 @@ module VX_tex_unit #(
|
|||
input wire reset,
|
||||
|
||||
// Texture unit <-> Memory Unit
|
||||
VX_dcache_req_if dcache_req_if,
|
||||
VX_dcache_rsp_if dcache_rsp_if,
|
||||
VX_dcache_req_if.master dcache_req_if,
|
||||
VX_dcache_rsp_if.slave dcache_rsp_if,
|
||||
|
||||
// Inputs
|
||||
VX_tex_req_if tex_req_if,
|
||||
VX_tex_csr_if tex_csr_if,
|
||||
VX_tex_req_if.slave tex_req_if,
|
||||
VX_tex_csr_if.slave tex_csr_if,
|
||||
|
||||
// Outputs
|
||||
VX_tex_rsp_if tex_rsp_if
|
||||
VX_tex_rsp_if.master tex_rsp_if
|
||||
);
|
||||
|
||||
localparam REQ_INFOW_S = `NR_BITS + 1 + `NW_BITS + 32;
|
|
@ -20,6 +20,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
|
|||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_TEX
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
|
|
|
@ -20,13 +20,15 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
|
|||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_TEX
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
DBG_FLAGS += -DVCD_OUTPUT
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE)
|
||||
TEX_INCLUDE = -I$(RTL_DIR)/tex_unit
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE) $(TEX_INCLUDE)
|
||||
|
||||
SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
|
|
|
@ -21,6 +21,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
|
|||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_TEX
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
|
@ -30,7 +31,8 @@ SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
|||
SRCS += fpga.cpp opae_sim.cpp
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
TEX_INCLUDE = -I$(RTL_DIR)/tex_unit
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) $(TEX_INCLUDE)
|
||||
RTL_INCLUDE += -I$(RTL_DIR)/afu -I$(RTL_DIR)/afu/ccip
|
||||
|
||||
TOP = vortex_afu_shim
|
||||
|
|
|
@ -22,7 +22,7 @@ CXXFLAGS += -I$(POCL_RT_PATH)/include
|
|||
|
||||
LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex
|
||||
|
||||
PROJECT = printf
|
||||
PROJECT = oclprintf
|
||||
|
||||
SRCS = main.cc
|
||||
|
||||
|
|
|
@ -8,7 +8,6 @@ all:
|
|||
$(MAKE) -C diverge
|
||||
$(MAKE) -C sort
|
||||
$(MAKE) -C fence
|
||||
$(MAKE) -C tex
|
||||
$(MAKE) -C no_mf_ext
|
||||
$(MAKE) -C no_smem
|
||||
|
||||
|
@ -20,9 +19,8 @@ run-simx:
|
|||
$(MAKE) -C io_addr run-simx
|
||||
$(MAKE) -C printf run-simx
|
||||
$(MAKE) -C diverge run-simx
|
||||
#$(MAKE) -C sort run-simx
|
||||
$(MAKE) -C sort run-simx
|
||||
$(MAKE) -C fence run-simx
|
||||
#$(MAKE) -C tex run-simx
|
||||
$(MAKE) -C no_mf_ext run-simx
|
||||
$(MAKE) -C no_smem run-simx
|
||||
|
||||
|
@ -34,7 +32,7 @@ run-rtlsim:
|
|||
$(MAKE) -C io_addr run-rtlsim
|
||||
$(MAKE) -C printf run-rtlsim
|
||||
$(MAKE) -C diverge run-rtlsim
|
||||
#$(MAKE) -C sort run-rtlsim
|
||||
$(MAKE) -C sort run-rtlsim
|
||||
$(MAKE) -C fence run-rtlsim
|
||||
$(MAKE) -C no_mf_ext run-rtlsim
|
||||
$(MAKE) -C no_smem run-rtlsim
|
||||
|
@ -47,9 +45,8 @@ run-vlsim:
|
|||
$(MAKE) -C io_addr run-vlsim
|
||||
$(MAKE) -C printf run-vlsim
|
||||
$(MAKE) -C diverge run-vlsim
|
||||
#$(MAKE) -C sort run-vlsim
|
||||
$(MAKE) -C sort run-vlsim
|
||||
$(MAKE) -C fence run-vlsim
|
||||
$(MAKE) -C tex run-vlsim
|
||||
$(MAKE) -C no_mf_ext run-vlsim
|
||||
$(MAKE) -C no_smem run-vlsim
|
||||
|
||||
|
@ -63,7 +60,6 @@ clean:
|
|||
$(MAKE) -C diverge clean
|
||||
$(MAKE) -C sort clean
|
||||
$(MAKE) -C fence clean
|
||||
$(MAKE) -C tex clean
|
||||
$(MAKE) -C no_mf_ext clean
|
||||
$(MAKE) -C no_smem clean
|
||||
|
||||
|
@ -77,7 +73,5 @@ clean-all:
|
|||
$(MAKE) -C diverge clean-all
|
||||
$(MAKE) -C sort clean-all
|
||||
$(MAKE) -C fence clean-all
|
||||
$(MAKE) -C tex clean-all
|
||||
$(MAKE) -C no_mf_ext clean-all
|
||||
$(MAKE) -C no_smem clean-all
|
||||
|
||||
$(MAKE) -C no_smem clean-all
|
Loading…
Add table
Add a link
Reference in a new issue