mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
313 lines
10 KiB
Systemverilog
313 lines
10 KiB
Systemverilog
// Copyright © 2019-2023
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
`ifndef VX_GPU_PKG_VH
|
|
`define VX_GPU_PKG_VH
|
|
|
|
`include "VX_define.vh"
|
|
|
|
package VX_gpu_pkg;
|
|
|
|
typedef struct packed {
|
|
logic valid;
|
|
logic [`NUM_THREADS-1:0] tmask;
|
|
} tmc_t;
|
|
|
|
typedef struct packed {
|
|
logic valid;
|
|
logic [`NUM_WARPS-1:0] wmask;
|
|
logic [`PC_BITS-1:0] pc;
|
|
} wspawn_t;
|
|
|
|
typedef struct packed {
|
|
logic valid;
|
|
logic is_dvg;
|
|
logic [`NUM_THREADS-1:0] then_tmask;
|
|
logic [`NUM_THREADS-1:0] else_tmask;
|
|
logic [`PC_BITS-1:0] next_pc;
|
|
} split_t;
|
|
|
|
typedef struct packed {
|
|
logic valid;
|
|
logic [`DV_STACK_SIZEW-1:0] stack_ptr;
|
|
} join_t;
|
|
|
|
typedef struct packed {
|
|
logic valid;
|
|
logic [`NB_WIDTH-1:0] id;
|
|
logic is_global;
|
|
`ifdef GBAR_ENABLE
|
|
logic [`MAX(`NW_WIDTH, `NC_WIDTH)-1:0] size_m1;
|
|
`else
|
|
logic [`NW_WIDTH-1:0] size_m1;
|
|
`endif
|
|
logic is_noop;
|
|
} barrier_t;
|
|
|
|
typedef struct packed {
|
|
logic [`XLEN-1:0] startup_addr;
|
|
logic [`XLEN-1:0] startup_arg;
|
|
logic [7:0] mpm_class;
|
|
} base_dcrs_t;
|
|
|
|
//////////////////////////// Perf counter types ///////////////////////////
|
|
|
|
typedef struct packed {
|
|
logic [`PERF_CTR_BITS-1:0] reads;
|
|
logic [`PERF_CTR_BITS-1:0] writes;
|
|
logic [`PERF_CTR_BITS-1:0] read_misses;
|
|
logic [`PERF_CTR_BITS-1:0] write_misses;
|
|
logic [`PERF_CTR_BITS-1:0] bank_stalls;
|
|
logic [`PERF_CTR_BITS-1:0] mshr_stalls;
|
|
logic [`PERF_CTR_BITS-1:0] mem_stalls;
|
|
logic [`PERF_CTR_BITS-1:0] crsp_stalls;
|
|
} cache_perf_t;
|
|
|
|
typedef struct packed {
|
|
logic [`PERF_CTR_BITS-1:0] reads;
|
|
logic [`PERF_CTR_BITS-1:0] writes;
|
|
logic [`PERF_CTR_BITS-1:0] latency;
|
|
} mem_perf_t;
|
|
|
|
typedef struct packed {
|
|
logic [`PERF_CTR_BITS-1:0] idles;
|
|
logic [`PERF_CTR_BITS-1:0] stalls;
|
|
} sched_perf_t;
|
|
|
|
typedef struct packed {
|
|
logic [`PERF_CTR_BITS-1:0] ibf_stalls;
|
|
logic [`PERF_CTR_BITS-1:0] scb_stalls;
|
|
logic [`PERF_CTR_BITS-1:0] opd_stalls;
|
|
logic [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] units_uses;
|
|
logic [`NUM_SFU_UNITS-1:0][`PERF_CTR_BITS-1:0] sfu_uses;
|
|
} issue_perf_t;
|
|
|
|
//////////////////////// instruction arguments ////////////////////////////
|
|
|
|
typedef struct packed {
|
|
logic use_PC;
|
|
logic use_imm;
|
|
logic is_w;
|
|
logic [`ALU_TYPE_BITS-1:0] xtype;
|
|
logic [`IMM_BITS-1:0] imm;
|
|
} alu_args_t;
|
|
|
|
typedef struct packed {
|
|
logic [($bits(alu_args_t)-`INST_FRM_BITS-`INST_FMT_BITS)-1:0] __padding;
|
|
logic [`INST_FRM_BITS-1:0] frm;
|
|
logic [`INST_FMT_BITS-1:0] fmt;
|
|
} fpu_args_t;
|
|
|
|
typedef struct packed {
|
|
logic [($bits(alu_args_t)-1-1-`OFFSET_BITS)-1:0] __padding;
|
|
logic is_store;
|
|
logic is_float;
|
|
logic [`OFFSET_BITS-1:0] offset;
|
|
} lsu_args_t;
|
|
|
|
typedef struct packed {
|
|
logic [($bits(alu_args_t)-1-`VX_CSR_ADDR_BITS-5)-1:0] __padding;
|
|
logic use_imm;
|
|
logic [`VX_CSR_ADDR_BITS-1:0] addr;
|
|
logic [4:0] imm;
|
|
} csr_args_t;
|
|
|
|
typedef struct packed {
|
|
logic [($bits(alu_args_t)-1)-1:0] __padding;
|
|
logic is_neg;
|
|
} wctl_args_t;
|
|
|
|
typedef union packed {
|
|
alu_args_t alu;
|
|
fpu_args_t fpu;
|
|
lsu_args_t lsu;
|
|
csr_args_t csr;
|
|
wctl_args_t wctl;
|
|
} op_args_t;
|
|
|
|
`IGNORE_UNUSED_BEGIN
|
|
|
|
///////////////////////// LSU memory Parameters ///////////////////////////
|
|
|
|
localparam LSU_WORD_SIZE = `XLEN / 8;
|
|
localparam LSU_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(LSU_WORD_SIZE));
|
|
localparam LSU_MEM_BATCHES = 1;
|
|
localparam LSU_TAG_ID_BITS = (`CLOG2(`LSUQ_IN_SIZE) + `CLOG2(LSU_MEM_BATCHES));
|
|
localparam LSU_TAG_WIDTH = (`UUID_WIDTH + LSU_TAG_ID_BITS);
|
|
localparam LSU_NUM_REQS = `NUM_LSU_BLOCKS * `NUM_LSU_LANES;
|
|
|
|
////////////////////////// Icache Parameters //////////////////////////////
|
|
|
|
// Word size in bytes
|
|
localparam ICACHE_WORD_SIZE = 4;
|
|
localparam ICACHE_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(ICACHE_WORD_SIZE));
|
|
|
|
// Block size in bytes
|
|
localparam ICACHE_LINE_SIZE = `L1_LINE_SIZE;
|
|
|
|
// Core request tag Id bits
|
|
localparam ICACHE_TAG_ID_BITS = `NW_WIDTH;
|
|
|
|
// Core request tag bits
|
|
localparam ICACHE_TAG_WIDTH = (`UUID_WIDTH + ICACHE_TAG_ID_BITS);
|
|
|
|
// Memory request data bits
|
|
localparam ICACHE_MEM_DATA_WIDTH = (ICACHE_LINE_SIZE * 8);
|
|
|
|
// Memory request tag bits
|
|
`ifdef ICACHE_ENABLE
|
|
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES);
|
|
`else
|
|
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES);
|
|
`endif
|
|
|
|
////////////////////////// Dcache Parameters //////////////////////////////
|
|
|
|
// Word size in bytes
|
|
localparam DCACHE_WORD_SIZE = `LSU_LINE_SIZE;
|
|
localparam DCACHE_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(DCACHE_WORD_SIZE));
|
|
|
|
// Block size in bytes
|
|
localparam DCACHE_LINE_SIZE = `L1_LINE_SIZE;
|
|
|
|
// Input request size
|
|
localparam DCACHE_CHANNELS = `UP((`NUM_LSU_LANES * LSU_WORD_SIZE) / DCACHE_WORD_SIZE);
|
|
localparam DCACHE_NUM_REQS = `NUM_LSU_BLOCKS * DCACHE_CHANNELS;
|
|
|
|
// Core request tag Id bits
|
|
localparam DCACHE_MERGED_REQS = (`NUM_LSU_LANES * LSU_WORD_SIZE) / DCACHE_WORD_SIZE;
|
|
localparam DCACHE_MEM_BATCHES = `CDIV(DCACHE_MERGED_REQS, DCACHE_CHANNELS);
|
|
localparam DCACHE_TAG_ID_BITS = (`CLOG2(`LSUQ_OUT_SIZE) + `CLOG2(DCACHE_MEM_BATCHES));
|
|
|
|
// Core request tag bits
|
|
localparam DCACHE_TAG_WIDTH = (`UUID_WIDTH + DCACHE_TAG_ID_BITS);
|
|
|
|
// Memory request data bits
|
|
localparam DCACHE_MEM_DATA_WIDTH = (DCACHE_LINE_SIZE * 8);
|
|
|
|
// Memory request tag bits
|
|
`ifdef DCACHE_ENABLE
|
|
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
|
|
`else
|
|
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
|
|
`endif
|
|
|
|
/////////////////////////////// L1 Parameters /////////////////////////////
|
|
|
|
localparam L1_MEM_TAG_WIDTH = `MAX(ICACHE_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH);
|
|
localparam L1_MEM_ARB_TAG_WIDTH = (L1_MEM_TAG_WIDTH + `CLOG2(2));
|
|
|
|
/////////////////////////////// L2 Parameters /////////////////////////////
|
|
|
|
localparam ICACHE_MEM_ARB_IDX = 0;
|
|
localparam DCACHE_MEM_ARB_IDX = ICACHE_MEM_ARB_IDX + 1;
|
|
|
|
// Word size in bytes
|
|
localparam L2_WORD_SIZE = `L1_LINE_SIZE;
|
|
|
|
// Input request size
|
|
localparam L2_NUM_REQS = `NUM_SOCKETS;
|
|
|
|
// Core request tag bits
|
|
localparam L2_TAG_WIDTH = L1_MEM_ARB_TAG_WIDTH;
|
|
|
|
// Memory request data bits
|
|
localparam L2_MEM_DATA_WIDTH = (`L2_LINE_SIZE * 8);
|
|
|
|
// Memory request tag bits
|
|
`ifdef L2_ENABLE
|
|
localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
|
|
`else
|
|
localparam L2_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
|
|
`endif
|
|
|
|
/////////////////////////////// L3 Parameters /////////////////////////////
|
|
|
|
// Word size in bytes
|
|
localparam L3_WORD_SIZE = `L2_LINE_SIZE;
|
|
|
|
// Input request size
|
|
localparam L3_NUM_REQS = `NUM_CLUSTERS;
|
|
|
|
// Core request tag bits
|
|
localparam L3_TAG_WIDTH = L2_MEM_TAG_WIDTH;
|
|
|
|
// Memory request data bits
|
|
localparam L3_MEM_DATA_WIDTH = (`L3_LINE_SIZE * 8);
|
|
|
|
// Memory request tag bits
|
|
`ifdef L3_ENABLE
|
|
localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
|
|
`else
|
|
localparam L3_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
|
|
`endif
|
|
|
|
/////////////////////////////// Issue parameters //////////////////////////
|
|
|
|
localparam ISSUE_ISW = `CLOG2(`ISSUE_WIDTH);
|
|
localparam ISSUE_ISW_W = `UP(ISSUE_ISW);
|
|
localparam PER_ISSUE_WARPS = `NUM_WARPS / `ISSUE_WIDTH;
|
|
localparam ISSUE_WIS = `CLOG2(PER_ISSUE_WARPS);
|
|
localparam ISSUE_WIS_W = `UP(ISSUE_WIS);
|
|
|
|
function logic [`NW_WIDTH-1:0] wis_to_wid(
|
|
input logic [ISSUE_WIS_W-1:0] wis,
|
|
input logic [ISSUE_ISW_W-1:0] isw
|
|
);
|
|
if (ISSUE_WIS == 0) begin
|
|
wis_to_wid = `NW_WIDTH'(isw);
|
|
end else if (ISSUE_ISW == 0) begin
|
|
wis_to_wid = `NW_WIDTH'(wis);
|
|
end else begin
|
|
wis_to_wid = `NW_WIDTH'({wis, isw});
|
|
end
|
|
endfunction
|
|
|
|
function logic [ISSUE_ISW_W-1:0] wid_to_isw(
|
|
input logic [`NW_WIDTH-1:0] wid
|
|
);
|
|
if (ISSUE_ISW != 0) begin
|
|
wid_to_isw = wid[ISSUE_ISW_W-1:0];
|
|
end else begin
|
|
wid_to_isw = 0;
|
|
end
|
|
endfunction
|
|
|
|
function logic [ISSUE_WIS_W-1:0] wid_to_wis(
|
|
input logic [`NW_WIDTH-1:0] wid
|
|
);
|
|
if (ISSUE_WIS != 0) begin
|
|
wid_to_wis = ISSUE_WIS_W'(wid >> ISSUE_ISW);
|
|
end else begin
|
|
wid_to_wis = 0;
|
|
end
|
|
endfunction
|
|
|
|
///////////////////////// Miscaellaneous functions ////////////////////////
|
|
|
|
function logic [`SFU_WIDTH-1:0] op_to_sfu_type(
|
|
input logic [`INST_OP_BITS-1:0] op_type
|
|
);
|
|
case (op_type)
|
|
`INST_SFU_CSRRW,
|
|
`INST_SFU_CSRRS,
|
|
`INST_SFU_CSRRC: op_to_sfu_type = `SFU_CSRS;
|
|
default: op_to_sfu_type = `SFU_WCTL;
|
|
endcase
|
|
endfunction
|
|
|
|
`IGNORE_UNUSED_END
|
|
|
|
endpackage
|
|
|
|
`endif // VX_GPU_PKG_VH
|