mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'master' of https://github.gatech.edu/casl/Vortex
This commit is contained in:
commit
d3b9c43dd8
123 changed files with 222 additions and 152 deletions
|
@ -3,9 +3,6 @@
|
|||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
# build sources
|
||||
make -s
|
||||
|
||||
coverage()
|
||||
{
|
||||
echo "begin coverage tests..."
|
||||
|
|
|
@ -12,16 +12,16 @@ module VX_cluster #(
|
|||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire [`L2_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
|
||||
output wire [`L2_MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`L2_MEM_DATA_WIDTH-1:0] mem_req_data,
|
||||
output wire [`L2_MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire [`L2_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
|
||||
output wire [`L2_MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`L2_MEM_DATA_WIDTH-1:0] mem_req_data,
|
||||
output wire [`L2_MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`L2_MEM_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [`L2_MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
input wire [`L2_MEM_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [`L2_MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// Status
|
||||
|
@ -34,12 +34,12 @@ module VX_cluster #(
|
|||
wire [`NUM_CORES-1:0][`DCACHE_MEM_BYTEEN_WIDTH-1:0] per_core_mem_req_byteen;
|
||||
wire [`NUM_CORES-1:0][`DCACHE_MEM_ADDR_WIDTH-1:0] per_core_mem_req_addr;
|
||||
wire [`NUM_CORES-1:0][`DCACHE_MEM_DATA_WIDTH-1:0] per_core_mem_req_data;
|
||||
wire [`NUM_CORES-1:0][`XMEM_TAG_WIDTH-1:0] per_core_mem_req_tag;
|
||||
wire [`NUM_CORES-1:0][`L1_MEM_TAG_WIDTH-1:0] per_core_mem_req_tag;
|
||||
wire [`NUM_CORES-1:0] per_core_mem_req_ready;
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_mem_rsp_valid;
|
||||
wire [`NUM_CORES-1:0][`DCACHE_MEM_DATA_WIDTH-1:0] per_core_mem_rsp_data;
|
||||
wire [`NUM_CORES-1:0][`XMEM_TAG_WIDTH-1:0] per_core_mem_rsp_tag;
|
||||
wire [`NUM_CORES-1:0][`L1_MEM_TAG_WIDTH-1:0] per_core_mem_rsp_tag;
|
||||
wire [`NUM_CORES-1:0] per_core_mem_rsp_ready;
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_busy;
|
||||
|
@ -69,7 +69,7 @@ module VX_cluster #(
|
|||
.mem_rsp_tag (per_core_mem_rsp_tag [i]),
|
||||
.mem_rsp_ready (per_core_mem_rsp_ready[i]),
|
||||
|
||||
.busy (per_core_busy [i])
|
||||
.busy (per_core_busy [i])
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -96,7 +96,7 @@ module VX_cluster #(
|
|||
.MRSQ_SIZE (`L2_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L2_MREQ_SIZE),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`XMEM_TAG_WIDTH),
|
||||
.CORE_TAG_WIDTH (`L1_MEM_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (0),
|
||||
.MEM_TAG_WIDTH (`L2_MEM_TAG_WIDTH),
|
||||
.NC_ENABLE (1)
|
||||
|
@ -150,7 +150,7 @@ module VX_cluster #(
|
|||
.NUM_REQS (`NUM_CORES),
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`XMEM_TAG_WIDTH),
|
||||
.TAG_IN_WIDTH (`L1_MEM_TAG_WIDTH),
|
||||
.TYPE ("R"),
|
||||
.TAG_SEL_IDX (1), // Skip 0 for NC flag
|
||||
.BUFFERED_REQ (1),
|
|
@ -12,16 +12,16 @@ module VX_core #(
|
|||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire [`DCACHE_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
|
||||
output wire [`DCACHE_MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`DCACHE_MEM_DATA_WIDTH-1:0] mem_req_data,
|
||||
output wire [`XMEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire [`DCACHE_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
|
||||
output wire [`DCACHE_MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`DCACHE_MEM_DATA_WIDTH-1:0] mem_req_data,
|
||||
output wire [`L1_MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory reponse
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`DCACHE_MEM_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [`XMEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
input wire [`DCACHE_MEM_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [`L1_MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// Status
|
||||
|
@ -34,12 +34,12 @@ module VX_core #(
|
|||
VX_mem_req_if #(
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`XMEM_TAG_WIDTH)
|
||||
.TAG_WIDTH (`L1_MEM_TAG_WIDTH)
|
||||
) mem_req_if();
|
||||
|
||||
VX_mem_rsp_if #(
|
||||
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
|
||||
.TAG_WIDTH (`XMEM_TAG_WIDTH)
|
||||
.TAG_WIDTH (`L1_MEM_TAG_WIDTH)
|
||||
) mem_rsp_if();
|
||||
|
||||
assign mem_req_valid = mem_req_if.valid;
|
|
@ -30,6 +30,8 @@ module VX_csr_data #(
|
|||
|
||||
input wire busy
|
||||
);
|
||||
import fpu_types::*;
|
||||
|
||||
reg [`CSR_WIDTH-1:0] csr_satp;
|
||||
reg [`CSR_WIDTH-1:0] csr_mstatus;
|
||||
reg [`CSR_WIDTH-1:0] csr_medeleg;
|
|
@ -1,5 +1,7 @@
|
|||
`include "VX_define.vh"
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
`include "VX_print_instr.vh"
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define USED_IREG(r) \
|
|
@ -301,6 +301,9 @@
|
|||
`define _DNC_MEM_TAG_WIDTH ($clog2(`DCACHE_NUM_REQS) + `_DMEM_ADDR_RATIO_W + `DCACHE_CORE_TAG_WIDTH)
|
||||
`define DCACHE_MEM_TAG_WIDTH `MAX((`CLOG2(`DCACHE_NUM_BANKS) + `CLOG2(`DCACHE_MSHR_SIZE) + `NC_FLAG_BITS), `_DNC_MEM_TAG_WIDTH)
|
||||
|
||||
// Merged D-cache/I-cache memory tag
|
||||
`define L1_MEM_TAG_WIDTH (`MAX(`ICACHE_MEM_TAG_WIDTH, `DCACHE_MEM_TAG_WIDTH) + `CLOG2(2))
|
||||
|
||||
////////////////////////// SM Configurable Knobs //////////////////////////////
|
||||
|
||||
// Cache ID
|
||||
|
@ -343,9 +346,9 @@
|
|||
|
||||
// Memory request tag bits
|
||||
`define _L2_MEM_ADDR_RATIO_W $clog2(`L2_CACHE_LINE_SIZE / `L2_WORD_SIZE)
|
||||
`define _L2_NC_MEM_TAG_WIDTH ($clog2(`L2_NUM_REQS) + `_L2_MEM_ADDR_RATIO_W + `XMEM_TAG_WIDTH)
|
||||
`define _L2_NC_MEM_TAG_WIDTH ($clog2(`L2_NUM_REQS) + `_L2_MEM_ADDR_RATIO_W + `L1_MEM_TAG_WIDTH)
|
||||
`define _L2_MEM_TAG_WIDTH `MAX((`CLOG2(`L2_NUM_BANKS) + `CLOG2(`L2_MSHR_SIZE) + `NC_FLAG_BITS), `_L2_NC_MEM_TAG_WIDTH)
|
||||
`define L2_MEM_TAG_WIDTH ((`L2_ENABLE) ? `_L2_MEM_TAG_WIDTH : (`XMEM_TAG_WIDTH + `CLOG2(`L2_NUM_REQS)))
|
||||
`define L2_MEM_TAG_WIDTH ((`L2_ENABLE) ? `_L2_MEM_TAG_WIDTH : (`L1_MEM_TAG_WIDTH + `CLOG2(`L2_NUM_REQS)))
|
||||
|
||||
////////////////////////// L3cache Configurable Knobs /////////////////////////
|
||||
|
||||
|
@ -390,9 +393,9 @@
|
|||
|
||||
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
|
||||
|
||||
// Merged D-cache/I-cache memory tag
|
||||
`define XMEM_TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH + `CLOG2(2))
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "VX_types.vh"
|
||||
`include "VX_fpu_types.vh"
|
||||
`include "VX_gpu_types.vh"
|
||||
|
||||
`endif
|
||||
|
|
|
@ -13,7 +13,8 @@ module VX_fpu_unit #(
|
|||
input wire[`NUM_WARPS-1:0] csr_pending,
|
||||
output wire[`NUM_WARPS-1:0] pending
|
||||
);
|
||||
|
||||
import fpu_types::*;
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam FPUQ_BITS = `LOG2UP(`FPUQ_SIZE);
|
||||
|
|
@ -1,34 +1,16 @@
|
|||
`ifndef VX_TYPES
|
||||
`define VX_TYPES
|
||||
`ifndef VX_GPU_TYPES
|
||||
`define VX_GPU_TYPES
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
typedef struct packed {
|
||||
logic is_normal;
|
||||
logic is_zero;
|
||||
logic is_subnormal;
|
||||
logic is_inf;
|
||||
logic is_nan;
|
||||
logic is_quiet;
|
||||
logic is_signaling;
|
||||
} fp_class_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic NV; // 4-Invalid
|
||||
logic DZ; // 3-Divide by zero
|
||||
logic OF; // 2-Overflow
|
||||
logic UF; // 1-Underflow
|
||||
logic NX; // 0-Inexact
|
||||
} fflags_t;
|
||||
|
||||
`define FFLAGS_BITS $bits(fflags_t)
|
||||
package gpu_types;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NUM_THREADS-1:0] tmask;
|
||||
} gpu_tmc_t;
|
||||
|
||||
`define GPU_TMC_BITS (1+`NUM_THREADS)
|
||||
`define GPU_TMC_BITS $bits(gpu_tmc_t)
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
|
@ -36,7 +18,7 @@ typedef struct packed {
|
|||
logic [31:0] pc;
|
||||
} gpu_wspawn_t;
|
||||
|
||||
`define GPU_WSPAWN_BITS (1+`NUM_WARPS+32)
|
||||
`define GPU_WSPAWN_BITS $bits(gpu_wspawn_t)
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
|
@ -46,7 +28,7 @@ typedef struct packed {
|
|||
logic [31:0] pc;
|
||||
} gpu_split_t;
|
||||
|
||||
`define GPU_SPLIT_BITS (1+1+`NUM_THREADS+`NUM_THREADS+32)
|
||||
`define GPU_SPLIT_BITS $bits(gpu_split_t)
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
|
@ -54,6 +36,8 @@ typedef struct packed {
|
|||
logic [`NW_BITS-1:0] size_m1;
|
||||
} gpu_barrier_t;
|
||||
|
||||
`define GPU_BARRIER_BITS (1+`NB_BITS+`NW_BITS)
|
||||
`define GPU_BARRIER_BITS $bits(gpu_barrier_t)
|
||||
|
||||
endpackage
|
||||
|
||||
`endif
|
|
@ -15,6 +15,7 @@ module VX_gpu_unit #(
|
|||
VX_warp_ctl_if.master warp_ctl_if,
|
||||
VX_commit_if.master gpu_commit_if
|
||||
);
|
||||
import gpu_types::*;
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (clk)
|
|
@ -24,11 +24,51 @@ module VX_issue #(
|
|||
VX_gpu_req_if.master gpu_req_if
|
||||
);
|
||||
VX_ibuffer_if ibuffer_if();
|
||||
VX_ibuffer_if execute_if();
|
||||
VX_gpr_req_if gpr_req_if();
|
||||
VX_gpr_rsp_if gpr_rsp_if();
|
||||
|
||||
wire scoreboard_delay;
|
||||
VX_gpr_req_if gpr_req_if();
|
||||
assign gpr_req_if.wid = ibuffer_if.wid;
|
||||
assign gpr_req_if.rs1 = ibuffer_if.rs1;
|
||||
assign gpr_req_if.rs2 = ibuffer_if.rs2;
|
||||
assign gpr_req_if.rs3 = ibuffer_if.rs3;
|
||||
|
||||
VX_writeback_if sboard_wb_if();
|
||||
assign sboard_wb_if.valid = writeback_if.valid;
|
||||
assign sboard_wb_if.wid = writeback_if.wid;
|
||||
assign sboard_wb_if.PC = writeback_if.PC;
|
||||
assign sboard_wb_if.rd = writeback_if.rd;
|
||||
assign sboard_wb_if.eop = writeback_if.eop;
|
||||
assign sboard_wb_if.ready = writeback_if.ready;
|
||||
|
||||
VX_ibuffer_if sboard_ib_if();
|
||||
assign sboard_ib_if.valid = ibuffer_if.valid && idmux_ib_if.ready;
|
||||
assign sboard_ib_if.wid = ibuffer_if.wid;
|
||||
assign sboard_ib_if.PC = ibuffer_if.PC;
|
||||
assign sboard_ib_if.wb = ibuffer_if.wb;
|
||||
assign sboard_ib_if.rd = ibuffer_if.rd;
|
||||
assign sboard_ib_if.rd_n = ibuffer_if.rd_n;
|
||||
assign sboard_ib_if.rs1_n = ibuffer_if.rs1_n;
|
||||
assign sboard_ib_if.rs2_n = ibuffer_if.rs2_n;
|
||||
assign sboard_ib_if.rs3_n = ibuffer_if.rs3_n;
|
||||
assign sboard_ib_if.wid_n = ibuffer_if.wid_n;
|
||||
|
||||
VX_ibuffer_if idmux_ib_if();
|
||||
assign idmux_ib_if.valid = ibuffer_if.valid && sboard_ib_if.ready;
|
||||
assign idmux_ib_if.wid = ibuffer_if.wid;
|
||||
assign idmux_ib_if.tmask = ibuffer_if.tmask;
|
||||
assign idmux_ib_if.PC = ibuffer_if.PC;
|
||||
assign idmux_ib_if.ex_type = ibuffer_if.ex_type;
|
||||
assign idmux_ib_if.op_type = ibuffer_if.op_type;
|
||||
assign idmux_ib_if.op_mod = ibuffer_if.op_mod;
|
||||
assign idmux_ib_if.wb = ibuffer_if.wb;
|
||||
assign idmux_ib_if.rd = ibuffer_if.rd;
|
||||
assign idmux_ib_if.rs1 = ibuffer_if.rs1;
|
||||
assign idmux_ib_if.imm = ibuffer_if.imm;
|
||||
assign idmux_ib_if.use_PC = ibuffer_if.use_PC;
|
||||
assign idmux_ib_if.use_imm = ibuffer_if.use_imm;
|
||||
|
||||
// issue the instruction
|
||||
assign ibuffer_if.ready = sboard_ib_if.ready && idmux_ib_if.ready;
|
||||
|
||||
`RESET_RELAY (ibuf_reset);
|
||||
`RESET_RELAY (gpr_reset);
|
||||
|
@ -48,15 +88,9 @@ module VX_issue #(
|
|||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ibuffer_if (ibuffer_if),
|
||||
.writeback_if(writeback_if),
|
||||
.delay (scoreboard_delay)
|
||||
.ibuffer_if (sboard_ib_if),
|
||||
.writeback_if(sboard_wb_if)
|
||||
);
|
||||
|
||||
assign gpr_req_if.wid = ibuffer_if.wid;
|
||||
assign gpr_req_if.rs1 = ibuffer_if.rs1;
|
||||
assign gpr_req_if.rs2 = ibuffer_if.rs2;
|
||||
assign gpr_req_if.rs3 = ibuffer_if.rs3;
|
||||
|
||||
VX_gpr_stage #(
|
||||
.CORE_ID(CORE_ID)
|
||||
|
@ -68,24 +102,10 @@ module VX_issue #(
|
|||
.gpr_rsp_if (gpr_rsp_if)
|
||||
);
|
||||
|
||||
assign execute_if.valid = ibuffer_if.valid && ~scoreboard_delay;
|
||||
assign execute_if.wid = ibuffer_if.wid;
|
||||
assign execute_if.tmask = ibuffer_if.tmask;
|
||||
assign execute_if.PC = ibuffer_if.PC;
|
||||
assign execute_if.ex_type = ibuffer_if.ex_type;
|
||||
assign execute_if.op_type = ibuffer_if.op_type;
|
||||
assign execute_if.op_mod = ibuffer_if.op_mod;
|
||||
assign execute_if.wb = ibuffer_if.wb;
|
||||
assign execute_if.rd = ibuffer_if.rd;
|
||||
assign execute_if.rs1 = ibuffer_if.rs1;
|
||||
assign execute_if.imm = ibuffer_if.imm;
|
||||
assign execute_if.use_PC = ibuffer_if.use_PC;
|
||||
assign execute_if.use_imm = ibuffer_if.use_imm;
|
||||
|
||||
VX_instr_demux instr_demux (
|
||||
.clk (clk),
|
||||
.reset (demux_reset),
|
||||
.ibuffer_if (execute_if),
|
||||
.ibuffer_if (idmux_ib_if),
|
||||
.gpr_rsp_if (gpr_rsp_if),
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
|
@ -94,10 +114,7 @@ module VX_issue #(
|
|||
.fpu_req_if (fpu_req_if),
|
||||
`endif
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
|
||||
// issue the instruction
|
||||
assign ibuffer_if.ready = !scoreboard_delay && execute_if.ready;
|
||||
);
|
||||
|
||||
`SCOPE_ASSIGN (issue_fire, ibuffer_if.valid && ibuffer_if.ready);
|
||||
`SCOPE_ASSIGN (issue_wid, ibuffer_if.wid);
|
||||
|
@ -115,7 +132,7 @@ module VX_issue #(
|
|||
`SCOPE_ASSIGN (issue_use_pc, ibuffer_if.use_PC);
|
||||
`SCOPE_ASSIGN (issue_use_imm, ibuffer_if.use_imm);
|
||||
`SCOPE_ASSIGN (scoreboard_delay, scoreboard_delay);
|
||||
`SCOPE_ASSIGN (execute_delay, ~execute_if.ready);
|
||||
`SCOPE_ASSIGN (execute_delay, ~idmux_ib_if.ready);
|
||||
`SCOPE_ASSIGN (gpr_rsp_a, gpr_rsp_if.rs1_data);
|
||||
`SCOPE_ASSIGN (gpr_rsp_b, gpr_rsp_if.rs2_data);
|
||||
`SCOPE_ASSIGN (gpr_rsp_c, gpr_rsp_if.rs3_data);
|
|
@ -22,19 +22,19 @@ module VX_pipeline #(
|
|||
input wire dcache_rsp_valid,
|
||||
input wire [`NUM_THREADS-1:0] dcache_rsp_tmask,
|
||||
input wire [`NUM_THREADS-1:0][31:0] dcache_rsp_data,
|
||||
input wire [`DCACHE_CORE_TAG_WIDTH-1:0] dcache_rsp_tag,
|
||||
input wire [`DCACHE_CORE_TAG_WIDTH-1:0] dcache_rsp_tag,
|
||||
output wire dcache_rsp_ready,
|
||||
|
||||
// Icache core request
|
||||
output wire icache_req_valid,
|
||||
output wire [29:0] icache_req_addr,
|
||||
output wire [`ICACHE_CORE_TAG_WIDTH-1:0] icache_req_tag,
|
||||
output wire [`ICACHE_CORE_TAG_WIDTH-1:0] icache_req_tag,
|
||||
input wire icache_req_ready,
|
||||
|
||||
// Icache core response
|
||||
input wire icache_rsp_valid,
|
||||
input wire [31:0] icache_rsp_data,
|
||||
input wire [`ICACHE_CORE_TAG_WIDTH-1:0] icache_rsp_tag,
|
||||
input wire [`ICACHE_CORE_TAG_WIDTH-1:0] icache_rsp_tag,
|
||||
output wire icache_rsp_ready,
|
||||
|
||||
`ifdef PERF_ENABLE
|
|
@ -29,7 +29,8 @@
|
|||
/* verilator lint_off UNOPTFLAT */ \
|
||||
/* verilator lint_off UNDRIVEN */ \
|
||||
/* verilator lint_off DECLFILENAME */ \
|
||||
/* verilator lint_off IMPLICIT */
|
||||
/* verilator lint_off IMPLICIT */ \
|
||||
/* verilator lint_off IMPORTSTAR */
|
||||
|
||||
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
|
||||
/* verilator lint_on PINCONNECTEMPTY */ \
|
||||
|
@ -37,7 +38,8 @@
|
|||
/* verilator lint_on UNOPTFLAT */ \
|
||||
/* verilator lint_on UNDRIVEN */ \
|
||||
/* verilator lint_on DECLFILENAME */ \
|
||||
/* verilator lint_on IMPLICIT */
|
||||
/* verilator lint_on IMPLICIT */ \
|
||||
/* verilator lint_on IMPORTSTAR */
|
||||
|
||||
`define UNUSED_PARAM(x) /* verilator lint_off UNUSED */ \
|
||||
localparam __``x = x; \
|
||||
|
@ -49,6 +51,9 @@
|
|||
. x () \
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
`define ERROR(msg) \
|
||||
$error msg
|
||||
|
||||
`define ASSERT(cond, msg) \
|
||||
assert(cond) else $error msg
|
||||
|
||||
|
@ -75,6 +80,7 @@
|
|||
`define UNUSED_PARAM(x)
|
||||
`define UNUSED_VAR(x)
|
||||
`define UNUSED_PIN(x) . x ()
|
||||
`define ERROR(msg)
|
||||
`define ASSERT(cond, msg) if (cond);
|
||||
`define STATIC_ASSERT(cond, msg)
|
||||
`define RUNTIME_ASSERT(cond, msg)
|
||||
|
@ -106,14 +112,12 @@
|
|||
`define LOG2UP(x) (((x) > 1) ? $clog2(x) : 1)
|
||||
`define ISPOW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1))))
|
||||
|
||||
`define ABS(x) (($signed(x) < 0) ? (-$signed(x)) : x);
|
||||
`define ABS(x) (($signed(x) < 0) ? (-$signed(x)) : (x));
|
||||
|
||||
`define MIN(x, y) ((x < y) ? (x) : (y))
|
||||
`define MAX(x, y) ((x > y) ? (x) : (y))
|
||||
`define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||
`define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
||||
|
||||
`define UP(x) (((x) > 0) ? x : 1)
|
||||
|
||||
`define SAFE_RNG(h,l) `MAX(h,l) : l
|
||||
`define UP(x) (((x) > 0) ? (x) : 1)
|
||||
|
||||
`define RTRIM(x,s) x[$bits(x)-1:($bits(x)-s)]
|
||||
|
||||
|
|
|
@ -3,12 +3,11 @@
|
|||
module VX_scoreboard #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_ibuffer_if.slave ibuffer_if,
|
||||
VX_writeback_if.slave writeback_if,
|
||||
output wire delay
|
||||
VX_ibuffer_if.scoreboard ibuffer_if,
|
||||
VX_writeback_if.scoreboard writeback_if
|
||||
);
|
||||
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_regs, inuse_regs_n;
|
||||
|
||||
|
@ -43,7 +42,12 @@ module VX_scoreboard #(
|
|||
deq_inuse_rs3 <= inuse_regs_n[ibuffer_if.wid_n][ibuffer_if.rs3_n];
|
||||
end
|
||||
|
||||
assign delay = deq_inuse_rd | deq_inuse_rs1 | deq_inuse_rs2 | deq_inuse_rs3;
|
||||
assign writeback_if.ready = 1'b1;
|
||||
|
||||
assign ibuffer_if.ready = ~(deq_inuse_rd
|
||||
| deq_inuse_rs1
|
||||
| deq_inuse_rs2
|
||||
| deq_inuse_rs3);
|
||||
|
||||
`UNUSED_VAR (writeback_if.PC)
|
||||
|
|
@ -30,14 +30,14 @@ module Vortex (
|
|||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_valid;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_rw;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_BYTEEN_WIDTH-1:0] per_cluster_mem_req_byteen;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_ADDR_WIDTH-1:0] per_cluster_mem_req_addr;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_DATA_WIDTH-1:0] per_cluster_mem_req_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_TAG_WIDTH-1:0] per_cluster_mem_req_tag;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_ADDR_WIDTH-1:0] per_cluster_mem_req_addr;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_DATA_WIDTH-1:0] per_cluster_mem_req_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_TAG_WIDTH-1:0] per_cluster_mem_req_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_ready;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_DATA_WIDTH-1:0] per_cluster_mem_rsp_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_TAG_WIDTH-1:0] per_cluster_mem_rsp_tag;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_DATA_WIDTH-1:0] per_cluster_mem_rsp_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2_MEM_TAG_WIDTH-1:0] per_cluster_mem_rsp_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_ready;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
|
@ -94,7 +94,7 @@ localparam STATE_WIDTH = $clog2(STATE_MAX_VALUE);
|
|||
|
||||
wire [127:0] afu_id = `AFU_ACCEL_UUID;
|
||||
|
||||
wire [63:0] dev_caps = {16'(`NUM_THREADS), 16'(`NUM_WARPS), 16'(`NUM_CORES), 16'(`IMPLEMENTATION_ID)};
|
||||
wire [63:0] dev_caps = {16'(`NUM_THREADS), 16'(`NUM_WARPS), 16'(`NUM_CORES * `NUM_CLUSTERS), 16'(`IMPLEMENTATION_ID)};
|
||||
|
||||
reg [STATE_WIDTH-1:0] state;
|
||||
|
||||
|
|
|
@ -157,7 +157,8 @@ module VX_shared_mem #(
|
|||
.valid_out (creq_out_valid)
|
||||
);
|
||||
|
||||
wire crsq_last_read;
|
||||
wire crsq_in_valid, crsq_in_ready;
|
||||
wire crsq_last_read;
|
||||
|
||||
assign creq_out_ready = core_req_writeonly
|
||||
|| (crsq_in_ready && crsq_last_read);
|
||||
|
@ -195,8 +196,6 @@ module VX_shared_mem #(
|
|||
wire [CORE_TAG_WIDTH-1:0] core_rsp_tag_in;
|
||||
reg [NUM_BANKS-1:0] bank_rsp_sel_r, bank_rsp_sel_n;
|
||||
|
||||
wire crsq_in_valid, crsq_in_ready;
|
||||
|
||||
wire crsq_in_fire = crsq_in_valid && crsq_in_ready;
|
||||
|
||||
assign crsq_last_read = (bank_rsp_sel_n == core_req_read_mask);
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fp_class # (
|
||||
parameter MAN_BITS = 23,
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.vh"
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
/// Modified port of cast module from fpnew Libray
|
||||
/// reference: https://github.com/pulp-platform/fpnew
|
|
@ -1,8 +1,4 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fp_div #(
|
||||
parameter TAGW = 1,
|
|
@ -1,8 +1,4 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fp_fma #(
|
||||
parameter TAGW = 1,
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.vh"
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
/// Modified port of noncomp module from fpnew Libray
|
||||
/// reference: https://github.com/pulp-platform/fpnew
|
|
@ -1,5 +1,4 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
/// Modified port of rouding module from fpnew Libray
|
||||
/// reference: https://github.com/pulp-platform/fpnew
|
|
@ -1,8 +1,4 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fp_sqrt #(
|
||||
parameter TAGW = 1,
|
14
hw/rtl/fp_cores/VX_fpu_define.vh
Normal file
14
hw/rtl/fp_cores/VX_fpu_define.vh
Normal file
|
@ -0,0 +1,14 @@
|
|||
`ifndef VX_FPU_DEFINE
|
||||
`define VX_FPU_DEFINE
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`include "float_dpi.vh"
|
||||
`endif
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
import fpu_types::*;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
`endif
|
|
@ -1,7 +1,4 @@
|
|||
`ifndef SYNTHESIS
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "float_dpi.vh"
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fpu_dpi #(
|
||||
parameter TAGW = 1
|
||||
|
@ -410,6 +407,4 @@ module VX_fpu_dpi #(
|
|||
|
||||
assign ready_in = per_core_ready_in[core_select];
|
||||
|
||||
endmodule
|
||||
|
||||
`endif
|
||||
endmodule
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.vh"
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fpu_fpga #(
|
||||
parameter TAGW = 4
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.vh"
|
||||
`include "VX_fpu_define.vh"
|
||||
`include "fpnew_pkg.sv"
|
||||
`include "defs_div_sqrt_mvp.sv"
|
||||
|
32
hw/rtl/fp_cores/VX_fpu_types.vh
Normal file
32
hw/rtl/fp_cores/VX_fpu_types.vh
Normal file
|
@ -0,0 +1,32 @@
|
|||
`ifndef VX_FPU_TYPES
|
||||
`define VX_FPU_TYPES
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
package fpu_types;
|
||||
|
||||
typedef struct packed {
|
||||
logic is_normal;
|
||||
logic is_zero;
|
||||
logic is_subnormal;
|
||||
logic is_inf;
|
||||
logic is_nan;
|
||||
logic is_quiet;
|
||||
logic is_signaling;
|
||||
} fp_class_t;
|
||||
|
||||
`define FP_CLASS_BITS $bits(fp_class_t)
|
||||
|
||||
typedef struct packed {
|
||||
logic NV; // 4-Invalid
|
||||
logic DZ; // 3-Divide by zero
|
||||
logic OF; // 2-Overflow
|
||||
logic UF; // 1-Underflow
|
||||
logic NX; // 0-Inexact
|
||||
} fflags_t;
|
||||
|
||||
`define FFLAGS_BITS $bits(fflags_t)
|
||||
|
||||
endpackage
|
||||
|
||||
`endif
|
|
@ -7,7 +7,7 @@ interface VX_fpu_to_csr_if ();
|
|||
|
||||
wire write_enable;
|
||||
wire [`NW_BITS-1:0] write_wid;
|
||||
fflags_t write_fflags;
|
||||
fpu_types::fflags_t write_fflags;
|
||||
|
||||
wire [`NW_BITS-1:0] read_wid;
|
||||
wire [`INST_FRM_BITS-1:0] read_frm;
|
|
@ -76,6 +76,20 @@ interface VX_ibuffer_if ();
|
|||
input wid_n,
|
||||
output ready
|
||||
);
|
||||
|
||||
modport scoreboard (
|
||||
input valid,
|
||||
input wid,
|
||||
input PC,
|
||||
input wb,
|
||||
input rd,
|
||||
input rd_n,
|
||||
input rs1_n,
|
||||
input rs2_n,
|
||||
input rs3_n,
|
||||
input wid_n,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
|
@ -5,12 +5,12 @@
|
|||
|
||||
interface VX_warp_ctl_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
gpu_tmc_t tmc;
|
||||
gpu_wspawn_t wspawn;
|
||||
gpu_barrier_t barrier;
|
||||
gpu_split_t split;
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
gpu_types::gpu_tmc_t tmc;
|
||||
gpu_types::gpu_wspawn_t wspawn;
|
||||
gpu_types::gpu_barrier_t barrier;
|
||||
gpu_types::gpu_split_t split;
|
||||
|
||||
modport master (
|
||||
output valid,
|
|
@ -36,6 +36,15 @@ interface VX_writeback_if ();
|
|||
output ready
|
||||
);
|
||||
|
||||
modport scoreboard (
|
||||
input valid,
|
||||
input wid,
|
||||
input PC,
|
||||
input rd,
|
||||
input eop,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -31,7 +31,7 @@ module VX_bypass_buffer #(
|
|||
buffer_valid <= 0;
|
||||
end
|
||||
if (valid_in && ~ready_out) begin
|
||||
`ASSERT(!buffer_valid, "runtime error");
|
||||
`ASSERT(!buffer_valid, ("runtime error"));
|
||||
buffer_valid <= 1;
|
||||
end
|
||||
end
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue