mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
SIMT Tack compression
This commit is contained in:
parent
9df25ff48f
commit
ca79e69355
13 changed files with 289 additions and 235 deletions
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -44,6 +44,9 @@
|
|||
|
||||
`define NR_BITS `CLOG2(`NUM_REGS)
|
||||
|
||||
`define DV_STACK_SIZE `UP(`NUM_THREADS-1)
|
||||
`define DV_STACK_SIZEW `UP(`CLOG2(`DV_STACK_SIZE))
|
||||
|
||||
`define PERF_CTR_BITS 44
|
||||
|
||||
`ifndef NDEBUG
|
||||
|
@ -90,10 +93,10 @@
|
|||
|
||||
`define INST_FL 7'b0000111 // float load instruction
|
||||
`define INST_FS 7'b0100111 // float store instruction
|
||||
`define INST_FMADD 7'b1000011
|
||||
`define INST_FMADD 7'b1000011
|
||||
`define INST_FMSUB 7'b1000111
|
||||
`define INST_FNMSUB 7'b1001011
|
||||
`define INST_FNMADD 7'b1001111
|
||||
`define INST_FNMADD 7'b1001111
|
||||
`define INST_FCI 7'b1010011 // float common instructions
|
||||
|
||||
// Custom extension opcodes
|
||||
|
@ -143,8 +146,8 @@
|
|||
|
||||
`define INST_BR_EQ 4'b0000
|
||||
`define INST_BR_NE 4'b0010
|
||||
`define INST_BR_LTU 4'b0100
|
||||
`define INST_BR_GEU 4'b0110
|
||||
`define INST_BR_LTU 4'b0100
|
||||
`define INST_BR_GEU 4'b0110
|
||||
`define INST_BR_LT 4'b0101
|
||||
`define INST_BR_GE 4'b0111
|
||||
`define INST_BR_JAL 4'b1000
|
||||
|
@ -184,14 +187,14 @@
|
|||
`define INST_FMT_HU 3'b101
|
||||
`define INST_FMT_WU 3'b110
|
||||
|
||||
`define INST_LSU_LB 4'b0000
|
||||
`define INST_LSU_LB 4'b0000
|
||||
`define INST_LSU_LH 4'b0001
|
||||
`define INST_LSU_LW 4'b0010
|
||||
`define INST_LSU_LD 4'b0011 // new for RV64I LD
|
||||
`define INST_LSU_LBU 4'b0100
|
||||
`define INST_LSU_LHU 4'b0101
|
||||
`define INST_LSU_LWU 4'b0110 // new for RV64I LWU
|
||||
`define INST_LSU_SB 4'b1000
|
||||
`define INST_LSU_SB 4'b1000
|
||||
`define INST_LSU_SH 4'b1001
|
||||
`define INST_LSU_SW 4'b1010
|
||||
`define INST_LSU_SD 4'b1011 // new for RV64I SD
|
||||
|
@ -205,9 +208,9 @@
|
|||
`define INST_FENCE_D 1'h0
|
||||
`define INST_FENCE_I 1'h1
|
||||
|
||||
`define INST_FPU_ADD 4'b0000
|
||||
`define INST_FPU_SUB 4'b0001
|
||||
`define INST_FPU_MUL 4'b0010
|
||||
`define INST_FPU_ADD 4'b0000
|
||||
`define INST_FPU_SUB 4'b0001
|
||||
`define INST_FPU_MUL 4'b0010
|
||||
`define INST_FPU_DIV 4'b0011
|
||||
`define INST_FPU_SQRT 4'b0100
|
||||
`define INST_FPU_CMP 4'b0101 // mod: LE=0, LT=1, EQ=2
|
||||
|
@ -217,9 +220,9 @@
|
|||
`define INST_FPU_F2U 4'b1001
|
||||
`define INST_FPU_I2F 4'b1010
|
||||
`define INST_FPU_U2F 4'b1011
|
||||
`define INST_FPU_MADD 4'b1100
|
||||
`define INST_FPU_MSUB 4'b1101
|
||||
`define INST_FPU_NMSUB 4'b1110
|
||||
`define INST_FPU_MADD 4'b1100
|
||||
`define INST_FPU_MSUB 4'b1101
|
||||
`define INST_FPU_NMSUB 4'b1110
|
||||
`define INST_FPU_NMADD 4'b1111
|
||||
`define INST_FPU_BITS 4
|
||||
`define INST_FPU_IS_W(mod) (mod[4])
|
||||
|
@ -227,7 +230,7 @@
|
|||
`define INST_FPU_IS_MVXW(op, mod) (op == `INST_FPU_MISC && mod == 4)
|
||||
|
||||
`define INST_SFU_TMC 4'h0
|
||||
`define INST_SFU_WSPAWN 4'h1
|
||||
`define INST_SFU_WSPAWN 4'h1
|
||||
`define INST_SFU_SPLIT 4'h2
|
||||
`define INST_SFU_JOIN 4'h3
|
||||
`define INST_SFU_BAR 4'h4
|
||||
|
@ -249,7 +252,7 @@
|
|||
|
||||
`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks) \
|
||||
(`CLOG2(mshr_size) + `CLOG2(num_banks))
|
||||
|
||||
|
||||
`define CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width) \
|
||||
(`CLOG2(num_reqs) + `CLOG2(line_size / word_size) + tag_width)
|
||||
|
||||
|
@ -259,7 +262,7 @@
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches) \
|
||||
(tag_width + `ARB_SEL_BITS(num_inputs, `UP(num_caches)))
|
||||
(tag_width + `ARB_SEL_BITS(num_inputs, `UP(num_caches)))
|
||||
|
||||
`define CACHE_CLUSTER_MEM_ARB_TAG(tag_width, num_caches) \
|
||||
(tag_width + `ARB_SEL_BITS(`UP(num_caches), 1))
|
||||
|
@ -286,7 +289,7 @@
|
|||
`define ADDR_TYPE_LOCAL 1
|
||||
`define ADDR_TYPE_WIDTH (`LMEM_ENABLED + 1)
|
||||
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
|
||||
`define VX_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH - `CLOG2(`L3_LINE_SIZE))
|
||||
`define VX_MEM_DATA_WIDTH (`L3_LINE_SIZE * 8)
|
||||
`define VX_MEM_TAG_WIDTH L3_MEM_TAG_WIDTH
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -39,7 +39,7 @@ package VX_gpu_pkg;
|
|||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic is_dvg;
|
||||
logic [`DV_STACK_SIZEW-1:0] stack_ptr;
|
||||
} join_t;
|
||||
|
||||
typedef struct packed {
|
||||
|
@ -100,14 +100,14 @@ package VX_gpu_pkg;
|
|||
localparam DCACHE_CHANNELS = `UP((`NUM_LSU_LANES * LSU_WORD_SIZE) / DCACHE_WORD_SIZE);
|
||||
localparam DCACHE_NUM_REQS = `NUM_LSU_BLOCKS * DCACHE_CHANNELS;
|
||||
|
||||
// Core request tag Id bits
|
||||
// Core request tag Id bits
|
||||
localparam DCACHE_MERGED_REQS = (`NUM_LSU_LANES * LSU_WORD_SIZE) / DCACHE_WORD_SIZE;
|
||||
localparam DCACHE_MEM_BATCHES = `CDIV(DCACHE_MERGED_REQS, DCACHE_CHANNELS);
|
||||
localparam DCACHE_TAG_ID_BITS = (`CLOG2(`LSUQ_OUT_SIZE) + `CLOG2(DCACHE_MEM_BATCHES));
|
||||
|
||||
// Core request tag bits
|
||||
localparam DCACHE_TAG_WIDTH = (`UUID_WIDTH + DCACHE_TAG_ID_BITS);
|
||||
|
||||
|
||||
// Memory request data bits
|
||||
localparam DCACHE_MEM_DATA_WIDTH = (DCACHE_LINE_SIZE * 8);
|
||||
|
||||
|
@ -127,7 +127,7 @@ package VX_gpu_pkg;
|
|||
// Block size in bytes
|
||||
localparam ICACHE_LINE_SIZE = `L1_LINE_SIZE;
|
||||
|
||||
// Core request tag Id bits
|
||||
// Core request tag Id bits
|
||||
localparam ICACHE_TAG_ID_BITS = `NW_WIDTH;
|
||||
|
||||
// Core request tag bits
|
||||
|
@ -147,7 +147,7 @@ package VX_gpu_pkg;
|
|||
|
||||
localparam L1_MEM_TAG_WIDTH = `MAX(ICACHE_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH);
|
||||
localparam L1_MEM_ARB_TAG_WIDTH = (L1_MEM_TAG_WIDTH + `CLOG2(2));
|
||||
|
||||
|
||||
/////////////////////////////// L2 Parameters /////////////////////////////
|
||||
|
||||
localparam ICACHE_MEM_ARB_IDX = 0;
|
||||
|
@ -198,21 +198,21 @@ package VX_gpu_pkg;
|
|||
/////////////////////////////// Issue parameters //////////////////////////
|
||||
|
||||
localparam ISSUE_ISW = `CLOG2(`ISSUE_WIDTH);
|
||||
localparam ISSUE_ISW_W = `UP(ISSUE_ISW);
|
||||
localparam ISSUE_ISW_W = `UP(ISSUE_ISW);
|
||||
localparam ISSUE_RATIO = `NUM_WARPS / `ISSUE_WIDTH;
|
||||
localparam ISSUE_WIS = `CLOG2(ISSUE_RATIO);
|
||||
localparam ISSUE_WIS_W = `UP(ISSUE_WIS);
|
||||
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
function logic [`NW_WIDTH-1:0] wis_to_wid(
|
||||
input logic [ISSUE_WIS_W-1:0] wis,
|
||||
input logic [ISSUE_WIS_W-1:0] wis,
|
||||
input logic [ISSUE_ISW_W-1:0] isw
|
||||
);
|
||||
if (ISSUE_WIS == 0) begin
|
||||
wis_to_wid = `NW_WIDTH'(isw);
|
||||
end else if (ISSUE_ISW == 0) begin
|
||||
wis_to_wid = `NW_WIDTH'(wis);
|
||||
end else begin
|
||||
end else begin
|
||||
wis_to_wid = `NW_WIDTH'({wis, isw});
|
||||
end
|
||||
endfunction
|
||||
|
@ -220,7 +220,7 @@ package VX_gpu_pkg;
|
|||
function logic [ISSUE_ISW_W-1:0] wid_to_isw(
|
||||
input logic [`NW_WIDTH-1:0] wid
|
||||
);
|
||||
if (ISSUE_ISW != 0) begin
|
||||
if (ISSUE_ISW != 0) begin
|
||||
wid_to_isw = wid[ISSUE_ISW_W-1:0];
|
||||
end else begin
|
||||
wid_to_isw = 0;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -37,7 +37,7 @@ module VX_decode #(
|
|||
// inputs
|
||||
VX_fetch_if.slave fetch_if,
|
||||
|
||||
// outputs
|
||||
// outputs
|
||||
VX_decode_if.master decode_if,
|
||||
VX_decode_sched_if.master decode_sched_if
|
||||
);
|
||||
|
@ -47,17 +47,17 @@ module VX_decode #(
|
|||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [`EX_BITS-1:0] ex_type;
|
||||
reg [`INST_OP_BITS-1:0] op_type;
|
||||
|
||||
reg [`EX_BITS-1:0] ex_type;
|
||||
reg [`INST_OP_BITS-1:0] op_type;
|
||||
reg [`INST_MOD_BITS-1:0] op_mod;
|
||||
reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r;
|
||||
reg [`XLEN-1:0] imm;
|
||||
reg [`XLEN-1:0] imm;
|
||||
reg use_rd, use_rs1, use_rs2, use_rs3, use_PC, use_imm;
|
||||
reg is_wstall;
|
||||
|
||||
wire [31:0] instr = fetch_if.data.instr;
|
||||
wire [6:0] opcode = instr[6:0];
|
||||
wire [6:0] opcode = instr[6:0];
|
||||
wire [1:0] func2 = instr[26:25];
|
||||
wire [2:0] func3 = instr[14:12];
|
||||
wire [4:0] func5 = instr[31:27];
|
||||
|
@ -85,7 +85,7 @@ module VX_decode #(
|
|||
wire [11:0] iw_imm = is_itype_sh ? {7'b0, instr[24:20]} : u_12;
|
||||
`else
|
||||
wire [11:0] i_imm = is_itype_sh ? {7'b0, instr[24:20]} : u_12;
|
||||
`endif
|
||||
`endif
|
||||
wire [11:0] s_imm = {func7, rd};
|
||||
wire [12:0] b_imm = {instr[31], instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
|
||||
|
@ -121,9 +121,9 @@ module VX_decode #(
|
|||
always @(*) begin
|
||||
case (u_12)
|
||||
12'h000: s_type = `INST_OP_BITS'(`INST_BR_ECALL);
|
||||
12'h001: s_type = `INST_OP_BITS'(`INST_BR_EBREAK);
|
||||
12'h002: s_type = `INST_OP_BITS'(`INST_BR_URET);
|
||||
12'h102: s_type = `INST_OP_BITS'(`INST_BR_SRET);
|
||||
12'h001: s_type = `INST_OP_BITS'(`INST_BR_EBREAK);
|
||||
12'h002: s_type = `INST_OP_BITS'(`INST_BR_URET);
|
||||
12'h102: s_type = `INST_OP_BITS'(`INST_BR_SRET);
|
||||
12'h302: s_type = `INST_OP_BITS'(`INST_BR_MRET);
|
||||
default: s_type = 'x;
|
||||
endcase
|
||||
|
@ -163,7 +163,7 @@ module VX_decode #(
|
|||
use_rs3 = 0;
|
||||
is_wstall = 0;
|
||||
|
||||
case (opcode)
|
||||
case (opcode)
|
||||
`INST_I: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
|
@ -173,17 +173,17 @@ module VX_decode #(
|
|||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_R: begin
|
||||
`INST_R: begin
|
||||
ex_type = `EX_ALU;
|
||||
`ifdef EXT_M_ENABLE
|
||||
if (func7[0]) begin
|
||||
op_type = `INST_OP_BITS'(m_type);
|
||||
op_mod[1] = 1;
|
||||
end else
|
||||
end else
|
||||
`endif
|
||||
begin
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
end
|
||||
end
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
|
@ -203,12 +203,12 @@ module VX_decode #(
|
|||
end
|
||||
`INST_R_W: begin
|
||||
ex_type = `EX_ALU;
|
||||
`ifdef EXT_M_ENABLE
|
||||
`ifdef EXT_M_ENABLE
|
||||
if (func7[0]) begin
|
||||
// MULW, DIVW, DIVUW, REMW, REMUW
|
||||
op_type = `INST_OP_BITS'(m_type);
|
||||
op_mod[1] = 1;
|
||||
end else
|
||||
op_mod[1] = 1;
|
||||
end else
|
||||
`endif
|
||||
begin
|
||||
// ADDW, SUBW, SLLW, SRLW, SRAW
|
||||
|
@ -221,7 +221,7 @@ module VX_decode #(
|
|||
`USED_IREG (rs2);
|
||||
end
|
||||
`endif
|
||||
`INST_LUI: begin
|
||||
`INST_LUI: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_ALU_LUI);
|
||||
use_rd = 1;
|
||||
|
@ -229,7 +229,7 @@ module VX_decode #(
|
|||
imm = {{`XLEN-31{ui_imm[19]}}, ui_imm[18:0], 12'(0)};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_AUIPC: begin
|
||||
`INST_AUIPC: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_ALU_AUIPC);
|
||||
use_rd = 1;
|
||||
|
@ -238,7 +238,7 @@ module VX_decode #(
|
|||
imm = {{`XLEN-31{ui_imm[19]}}, ui_imm[18:0], 12'(0)};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_JAL: begin
|
||||
`INST_JAL: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_BR_JAL);
|
||||
op_mod[0] = 1;
|
||||
|
@ -249,7 +249,7 @@ module VX_decode #(
|
|||
imm = {{(`XLEN-21){jal_imm[20]}}, jal_imm};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_JALR: begin
|
||||
`INST_JALR: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_BR_JALR);
|
||||
op_mod[0] = 1;
|
||||
|
@ -260,7 +260,7 @@ module VX_decode #(
|
|||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_B: begin
|
||||
`INST_B: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(b_type);
|
||||
op_mod[0] = 1;
|
||||
|
@ -275,8 +275,8 @@ module VX_decode #(
|
|||
ex_type = `EX_LSU;
|
||||
op_type = `INST_LSU_FENCE;
|
||||
end
|
||||
`INST_SYS : begin
|
||||
if (func3[1:0] != 0) begin
|
||||
`INST_SYS : begin
|
||||
if (func3[1:0] != 0) begin
|
||||
ex_type = `EX_SFU;
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_CSR(func3[1:0]));
|
||||
use_rd = 1;
|
||||
|
@ -288,7 +288,7 @@ module VX_decode #(
|
|||
imm[`VX_CSR_ADDR_BITS +: `NRI_BITS] = rs1; // imm
|
||||
end else begin
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(s_type);
|
||||
|
@ -302,9 +302,9 @@ module VX_decode #(
|
|||
end
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FL,
|
||||
`INST_FL,
|
||||
`endif
|
||||
`INST_L: begin
|
||||
`INST_L: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_OP_BITS'({1'b0, func3});
|
||||
use_rd = 1;
|
||||
|
@ -319,9 +319,9 @@ module VX_decode #(
|
|||
`USED_IREG (rs1);
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FS,
|
||||
`INST_FS,
|
||||
`endif
|
||||
`INST_S: begin
|
||||
`INST_S: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_OP_BITS'({1'b1, func3});
|
||||
imm = {{(`XLEN-12){s_imm[11]}}, s_imm};
|
||||
|
@ -338,24 +338,24 @@ module VX_decode #(
|
|||
`INST_FMADD,
|
||||
`INST_FMSUB,
|
||||
`INST_FNMSUB,
|
||||
`INST_FNMADD: begin
|
||||
`INST_FNMADD: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_type = `INST_OP_BITS'({2'b11, opcode[3:2]});
|
||||
op_mod = `INST_MOD_BITS'(func3);
|
||||
imm[0] = func2[0]; // destination is double?
|
||||
use_rd = 1;
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
`USED_FREG (rs3);
|
||||
end
|
||||
`INST_FCI: begin
|
||||
`INST_FCI: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_mod = `INST_MOD_BITS'(func3);
|
||||
`ifdef FLEN_64
|
||||
imm[0] = func2[0]; // destination is double?
|
||||
`endif
|
||||
use_rd = 1;
|
||||
use_rd = 1;
|
||||
case (func5)
|
||||
5'b00000, // FADD
|
||||
5'b00001, // FSUB
|
||||
|
@ -381,28 +381,28 @@ module VX_decode #(
|
|||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
end
|
||||
`ifdef FLEN_64
|
||||
5'b01000: begin
|
||||
5'b01000: begin
|
||||
// CVT.S.D, CVT.D.S
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_F2F);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
`endif
|
||||
5'b01011: begin
|
||||
5'b01011: begin
|
||||
// SQRT
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_SQRT);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
end
|
||||
5'b10100: begin
|
||||
// CMP
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_CMP);
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
end
|
||||
5'b11000: begin
|
||||
// CVT.W.X, CVT.WU.X
|
||||
op_type = (rs2[0]) ? `INST_OP_BITS'(`INST_FPU_F2U) : `INST_OP_BITS'(`INST_FPU_F2I);
|
||||
|
@ -421,10 +421,10 @@ module VX_decode #(
|
|||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
5'b11100: begin
|
||||
5'b11100: begin
|
||||
if (func3[0]) begin
|
||||
// NCP: FCLASS=3
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 3;
|
||||
end else begin
|
||||
// NCP: FMV.X.W=4
|
||||
|
@ -432,11 +432,11 @@ module VX_decode #(
|
|||
op_mod = 4;
|
||||
end
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
5'b11110: begin
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
5'b11110: begin
|
||||
// NCP: FMV.W.X=5
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 5;
|
||||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
|
@ -445,7 +445,7 @@ module VX_decode #(
|
|||
endcase
|
||||
end
|
||||
`endif
|
||||
`INST_EXT1: begin
|
||||
`INST_EXT1: begin
|
||||
case (func7)
|
||||
7'h00: begin
|
||||
ex_type = `EX_SFU;
|
||||
|
@ -463,8 +463,9 @@ module VX_decode #(
|
|||
3'h2: begin // SPLIT
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_SPLIT);
|
||||
use_rd = 1;
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rd);
|
||||
op_mod[0] = rs2[0]; // not?
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
3'h3: begin // JOIN
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_JOIN);
|
||||
|
@ -477,6 +478,7 @@ module VX_decode #(
|
|||
end
|
||||
3'h5: begin // PRED
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_PRED);
|
||||
op_mod[0] = rd[0]; // not?
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
|
@ -486,10 +488,10 @@ module VX_decode #(
|
|||
default:;
|
||||
endcase
|
||||
end
|
||||
`INST_EXT2: begin
|
||||
`INST_EXT2: begin
|
||||
case (func3)
|
||||
3'h1: begin
|
||||
case (func2)
|
||||
case (func2)
|
||||
2'h0: begin // CMOV
|
||||
ex_type = `EX_SFU;
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_CMOV);
|
||||
|
@ -533,7 +535,7 @@ module VX_decode #(
|
|||
assign decode_sched_if.valid = fetch_fire;
|
||||
assign decode_sched_if.wid = fetch_if.data.wid;
|
||||
assign decode_sched_if.is_wstall = is_wstall;
|
||||
`ifndef L1_ENABLE
|
||||
`ifndef L1_ENABLE
|
||||
assign fetch_if.ibuf_pop = decode_if.ibuf_pop;
|
||||
`endif
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -24,14 +24,15 @@ module VX_ipdom_stack #(
|
|||
input wire [WIDTH-1:0] q0,
|
||||
input wire [WIDTH-1:0] q1,
|
||||
output wire [WIDTH-1:0] d,
|
||||
output wire d_set,
|
||||
output wire d_set,
|
||||
output wire [ADDRW-1:0] q_ptr,
|
||||
input wire push,
|
||||
input wire pop,
|
||||
input wire pop,
|
||||
output wire empty,
|
||||
output wire full
|
||||
);
|
||||
reg slot_set [DEPTH-1:0];
|
||||
|
||||
|
||||
reg [ADDRW-1:0] rd_ptr, wr_ptr;
|
||||
|
||||
reg empty_r, full_r;
|
||||
|
@ -41,28 +42,28 @@ module VX_ipdom_stack #(
|
|||
wire d_set_n = slot_set[rd_ptr];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
if (reset) begin
|
||||
rd_ptr <= '0;
|
||||
wr_ptr <= '0;
|
||||
empty_r <= 1;
|
||||
full_r <= 0;
|
||||
full_r <= 0;
|
||||
end else begin
|
||||
`ASSERT(~push || ~full, ("runtime error: writing to a full stack!"));
|
||||
`ASSERT(~pop || ~empty, ("runtime error: reading an empty stack!"));
|
||||
`ASSERT(~push || ~pop, ("runtime error: push and pop in same cycle not supported!"));
|
||||
if (push) begin
|
||||
if (push) begin
|
||||
rd_ptr <= wr_ptr;
|
||||
wr_ptr <= wr_ptr + ADDRW'(1);
|
||||
wr_ptr <= wr_ptr + ADDRW'(1);
|
||||
empty_r <= 0;
|
||||
full_r <= (ADDRW'(DEPTH-1) == wr_ptr);
|
||||
end else if (pop) begin
|
||||
end else if (pop) begin
|
||||
wr_ptr <= wr_ptr - ADDRW'(d_set_n);
|
||||
rd_ptr <= rd_ptr - ADDRW'(d_set_n);
|
||||
empty_r <= (rd_ptr == 0) && (d_set_n == 1);
|
||||
full_r <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (WIDTH * 2),
|
||||
|
@ -72,23 +73,24 @@ module VX_ipdom_stack #(
|
|||
) store (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (push),
|
||||
`UNUSED_PIN (wren),
|
||||
.write (push),
|
||||
`UNUSED_PIN (wren),
|
||||
.waddr (wr_ptr),
|
||||
.wdata ({q1, q0}),
|
||||
.raddr (rd_ptr),
|
||||
.rdata ({d1, d0})
|
||||
);
|
||||
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
slot_set[wr_ptr] <= 0;
|
||||
end else if (pop) begin
|
||||
slot_set[wr_ptr] <= 0;
|
||||
end else if (pop) begin
|
||||
slot_set[rd_ptr] <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
wire d_set_r;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1),
|
||||
.DEPTH (OUT_REG)
|
||||
|
@ -102,6 +104,7 @@ module VX_ipdom_stack #(
|
|||
|
||||
assign d = d_set_r ? d0 : d1;
|
||||
assign d_set = ~d_set_r;
|
||||
assign q_ptr = wr_ptr;
|
||||
assign empty = empty_r;
|
||||
assign full = full_r;
|
||||
|
||||
|
|
|
@ -296,11 +296,13 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
.split (warp_ctl_if.split),
|
||||
.sjoin (warp_ctl_if.sjoin),
|
||||
.join_valid (join_valid),
|
||||
.join_is_dvg (join_is_dvg),
|
||||
.join_is_else (join_is_else),
|
||||
.join_is_dvg(join_is_dvg),
|
||||
.join_is_else(join_is_else),
|
||||
.join_wid (join_wid),
|
||||
.join_tmask (join_tmask),
|
||||
.join_pc (join_pc)
|
||||
.join_pc (join_pc),
|
||||
.stack_wid (warp_ctl_if.dvstack_wid),
|
||||
.stack_ptr (warp_ctl_if.dvstack_ptr)
|
||||
);
|
||||
|
||||
// schedule the next ready warp
|
||||
|
@ -308,7 +310,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
wire [`NUM_WARPS-1:0] ready_warps = active_warps & ~(stalled_warps | barrier_stalls);
|
||||
|
||||
VX_lzc #(
|
||||
.N (`NUM_WARPS),
|
||||
.N (`NUM_WARPS),
|
||||
.REVERSE (1)
|
||||
) wid_select (
|
||||
.data_in (ready_warps),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -15,7 +15,7 @@
|
|||
|
||||
module VX_sfu_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -28,7 +28,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
// Inputs
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_fpu_csr_if.slave fpu_csr_if [`NUM_FPU_BLOCKS],
|
||||
`endif
|
||||
|
@ -37,13 +37,13 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_warp_ctl_if.master warp_ctl_if
|
||||
VX_warp_ctl_if.master warp_ctl_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam BLOCK_SIZE = 1;
|
||||
localparam NUM_LANES = `NUM_SFU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam BLOCK_SIZE = 1;
|
||||
localparam NUM_LANES = `NUM_SFU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
|
||||
localparam RSP_ARB_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + (NUM_LANES * `XLEN) + `NR_BITS + 1 + `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_SIZE = 1 + 1;
|
||||
|
@ -67,29 +67,29 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [RSP_ARB_SIZE-1:0] rsp_arb_valid_in;
|
||||
wire [RSP_ARB_SIZE-1:0] rsp_arb_ready_in;
|
||||
wire [RSP_ARB_SIZE-1:0][RSP_ARB_DATAW-1:0] rsp_arb_data_in;
|
||||
wire [RSP_ARB_SIZE-1:0][RSP_ARB_DATAW-1:0] rsp_arb_data_in;
|
||||
|
||||
// Warp control block
|
||||
// Warp control block
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_execute_if();
|
||||
VX_commit_if#(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_commit_if();
|
||||
|
||||
|
||||
assign wctl_execute_if.valid = per_block_execute_if[0].valid && `INST_SFU_IS_WCTL(per_block_execute_if[0].data.op_type);
|
||||
assign wctl_execute_if.data = per_block_execute_if[0].data;
|
||||
|
||||
`RESET_RELAY (wctl_reset, reset);
|
||||
|
||||
|
||||
VX_wctl_unit #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_unit (
|
||||
.clk (clk),
|
||||
.reset (wctl_reset),
|
||||
.execute_if (wctl_execute_if),
|
||||
.warp_ctl_if(warp_ctl_if),
|
||||
.execute_if (wctl_execute_if),
|
||||
.warp_ctl_if(warp_ctl_if),
|
||||
.commit_if (wctl_commit_if)
|
||||
);
|
||||
|
||||
|
@ -119,20 +119,20 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
.base_dcrs (base_dcrs),
|
||||
.execute_if (csr_execute_if),
|
||||
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
.pipeline_perf_if(pipeline_perf_if),
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_csr_if (fpu_csr_if),
|
||||
`endif
|
||||
|
||||
.sched_csr_if (sched_csr_if),
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.commit_if (csr_commit_if)
|
||||
);
|
||||
);
|
||||
|
||||
assign rsp_arb_valid_in[RSP_ARB_IDX_CSRS] = csr_commit_if.valid;
|
||||
assign rsp_arb_data_in[RSP_ARB_IDX_CSRS] = csr_commit_if.data;
|
||||
|
@ -164,7 +164,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
.OUT_BUF (3)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.reset (reset),
|
||||
.valid_in (rsp_arb_valid_in),
|
||||
.ready_in (rsp_arb_ready_in),
|
||||
.data_in (rsp_arb_data_in),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -27,50 +27,58 @@ module VX_split_join import VX_gpu_pkg::*; #(
|
|||
output wire join_is_else,
|
||||
output wire [`NW_WIDTH-1:0] join_wid,
|
||||
output wire [`NUM_THREADS-1:0] join_tmask,
|
||||
output wire [`XLEN-1:0] join_pc
|
||||
output wire [`XLEN-1:0] join_pc,
|
||||
input wire [`NW_WIDTH-1:0] stack_wid,
|
||||
output wire [`DV_STACK_SIZEW-1:0] stack_ptr
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
|
||||
wire [(`XLEN+`NUM_THREADS)-1:0] ipdom_data [`NUM_WARPS-1:0];
|
||||
wire [`DV_STACK_SIZEW-1:0] ipdom_q_ptr [`NUM_WARPS-1:0];
|
||||
wire ipdom_set [`NUM_WARPS-1:0];
|
||||
|
||||
wire [(`XLEN+`NUM_THREADS)-1:0] ipdom_q0 = {split.then_tmask | split.else_tmask, `XLEN'(0)};
|
||||
wire [(`XLEN+`NUM_THREADS)-1:0] ipdom_q1 = {split.else_tmask, split.next_pc};
|
||||
|
||||
wire sjoin_is_dvg = (sjoin.stack_ptr != ipdom_q_ptr[wid]);
|
||||
|
||||
wire ipdom_push = valid && split.valid && split.is_dvg;
|
||||
wire ipdom_pop = valid && sjoin.valid && sjoin.is_dvg;
|
||||
wire ipdom_pop = valid && sjoin.valid && sjoin_is_dvg;
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
`RESET_RELAY (ipdom_reset, reset);
|
||||
|
||||
VX_ipdom_stack #(
|
||||
.WIDTH (`XLEN+`NUM_THREADS),
|
||||
.DEPTH (`UP(`NUM_THREADS-1))
|
||||
.WIDTH (`XLEN+`NUM_THREADS),
|
||||
.DEPTH (`DV_STACK_SIZE)
|
||||
) ipdom_stack (
|
||||
.clk (clk),
|
||||
.reset (ipdom_reset),
|
||||
.push (ipdom_push && (i == wid)),
|
||||
.pop (ipdom_pop && (i == wid)),
|
||||
.q0 (ipdom_q0),
|
||||
.q1 (ipdom_q1),
|
||||
.d (ipdom_data[i]),
|
||||
.d_set (ipdom_set[i]),
|
||||
.q_ptr (ipdom_q_ptr[i]),
|
||||
.push (ipdom_push && (i == wid)),
|
||||
.pop (ipdom_pop && (i == wid)),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (full)
|
||||
);
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + `NW_WIDTH + 1 + `XLEN + `NUM_THREADS),
|
||||
.DATAW (1 + 1 + 1 + `NW_WIDTH + `NUM_THREADS + `XLEN),
|
||||
.DEPTH (1),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({valid && sjoin.valid, sjoin.is_dvg, ipdom_set[wid], wid, ipdom_data[wid]}),
|
||||
.data_out ({join_valid, join_is_dvg, join_is_else, join_wid, join_tmask, join_pc})
|
||||
.data_in ({valid && sjoin.valid, sjoin_is_dvg, ipdom_set[wid], wid, ipdom_data[wid]}),
|
||||
.data_out ({join_valid, join_is_dvg, join_is_else, join_wid, {join_tmask, join_pc}})
|
||||
);
|
||||
|
||||
assign stack_ptr = ipdom_q_ptr[stack_wid];
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -26,17 +26,17 @@ task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type);
|
|||
endcase
|
||||
endtask
|
||||
|
||||
task trace_ex_op(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input [`INST_MOD_BITS-1:0] op_mod,
|
||||
task trace_ex_op(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input [`INST_MOD_BITS-1:0] op_mod,
|
||||
input use_imm,
|
||||
input fdst_d,
|
||||
input fcvt_l,
|
||||
input rd_float
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
`EX_ALU: begin
|
||||
if (`INST_ALU_IS_BR(op_mod)) begin
|
||||
case (`INST_BR_BITS'(op_type))
|
||||
`INST_BR_EQ: `TRACE(level, ("BEQ"));
|
||||
|
@ -131,7 +131,7 @@ task trace_ex_op(input int level,
|
|||
end
|
||||
end
|
||||
end
|
||||
`EX_LSU: begin
|
||||
`EX_LSU: begin
|
||||
if (rd_float) begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LW: `TRACE(level, ("FLW"));
|
||||
|
@ -161,55 +161,55 @@ task trace_ex_op(input int level,
|
|||
`EX_FPU: begin
|
||||
case (`INST_FPU_BITS'(op_type))
|
||||
`INST_FPU_ADD: begin
|
||||
if (fdst_d)
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FADD.S"));
|
||||
end
|
||||
`INST_FPU_SUB: begin
|
||||
if (fdst_d)
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FSUB.S"));
|
||||
end
|
||||
`INST_FPU_MUL: begin
|
||||
if (fdst_d)
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FMUL.D"));
|
||||
else
|
||||
`TRACE(level, ("FMUL.S"));
|
||||
end
|
||||
`INST_FPU_DIV: begin
|
||||
if (fdst_d)
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FDIV.D"));
|
||||
else
|
||||
`TRACE(level, ("FDIV.S"));
|
||||
end
|
||||
`INST_FPU_SQRT: begin
|
||||
if (fdst_d)
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FSQRT.D"));
|
||||
else
|
||||
`TRACE(level, ("FSQRT.S"));
|
||||
end
|
||||
`INST_FPU_MADD: begin
|
||||
if (fdst_d)
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FMADD.S"));
|
||||
end
|
||||
`INST_FPU_MSUB: begin
|
||||
if (fdst_d)
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FMSUB.S"));
|
||||
end
|
||||
`INST_FPU_NMADD: begin
|
||||
if (fdst_d)
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FNMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMADD.S"));
|
||||
end
|
||||
`INST_FPU_NMSUB: begin
|
||||
if (fdst_d)
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FNMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMSUB.S"));
|
||||
|
@ -330,10 +330,10 @@ task trace_ex_op(input int level,
|
|||
case (`INST_SFU_BITS'(op_type))
|
||||
`INST_SFU_TMC: `TRACE(level, ("TMC"));
|
||||
`INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN"));
|
||||
`INST_SFU_SPLIT: `TRACE(level, ("SPLIT"));
|
||||
`INST_SFU_SPLIT: begin if (op_mod[0]) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end
|
||||
`INST_SFU_JOIN: `TRACE(level, ("JOIN"));
|
||||
`INST_SFU_BAR: `TRACE(level, ("BAR"));
|
||||
`INST_SFU_PRED: `TRACE(level, ("PRED"));
|
||||
`INST_SFU_PRED: begin if (op_mod[0]) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end
|
||||
`INST_SFU_CSRRW: begin if (use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end
|
||||
`INST_SFU_CSRRS: begin if (use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end
|
||||
`INST_SFU_CSRRC: begin if (use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -22,7 +22,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
// Inputs
|
||||
VX_execute_if.slave execute_if,
|
||||
|
||||
|
||||
// Outputs
|
||||
VX_warp_ctl_if.master warp_ctl_if,
|
||||
VX_commit_if.master commit_if
|
||||
|
@ -32,12 +32,12 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam WCTL_WIDTH = $bits(tmc_t) + $bits(wspawn_t) + $bits(split_t) + $bits(join_t) + $bits(barrier_t);
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + WCTL_WIDTH + PID_WIDTH + 1 + 1;
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + WCTL_WIDTH + PID_WIDTH + 1 + 1 + `DV_STACK_SIZEW;
|
||||
|
||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||
|
||||
|
||||
tmc_t tmc, tmc_r;
|
||||
wspawn_t wspawn, wspawn_r;
|
||||
wspawn_t wspawn, wspawn_r;
|
||||
split_t split, split_r;
|
||||
join_t sjoin, sjoin_r;
|
||||
barrier_t barrier, barrier_r;
|
||||
|
@ -55,14 +55,16 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
end else begin
|
||||
assign tid = 0;
|
||||
end
|
||||
|
||||
|
||||
wire [`XLEN-1:0] rs1_data = execute_if.data.rs1_data[tid];
|
||||
wire [`XLEN-1:0] rs2_data = execute_if.data.rs2_data[tid];
|
||||
`UNUSED_VAR (rs1_data)
|
||||
|
||||
|
||||
wire not_pred = execute_if.data.op_mod[0];
|
||||
|
||||
wire [NUM_LANES-1:0] taken;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign taken[i] = execute_if.data.rs1_data[i][0];
|
||||
assign taken[i] = (execute_if.data.rs1_data[i][0] ^ not_pred);
|
||||
end
|
||||
|
||||
reg [`NUM_THREADS-1:0] then_tmask_r, then_tmask_n;
|
||||
|
@ -93,17 +95,20 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
assign tmc.tmask = is_pred ? pred_mask : rs1_data[`NUM_THREADS-1:0];
|
||||
|
||||
// split
|
||||
|
||||
|
||||
assign split.valid = is_split;
|
||||
assign split.is_dvg = has_then && has_else;
|
||||
assign split.then_tmask = then_tmask_n;
|
||||
assign split.else_tmask = else_tmask_n;
|
||||
assign split.next_pc = execute_if.data.PC + 4;
|
||||
|
||||
assign warp_ctl_if.dvstack_wid = execute_if.data.wid;
|
||||
wire [`DV_STACK_SIZEW-1:0] dvstack_ptr;
|
||||
|
||||
// join
|
||||
|
||||
assign sjoin.valid = is_join;
|
||||
assign sjoin.is_dvg = rs1_data[0];
|
||||
assign sjoin.valid = is_join;
|
||||
assign sjoin.stack_ptr = rs1_data[`DV_STACK_SIZEW-1:0];
|
||||
|
||||
// barrier
|
||||
assign barrier.valid = is_bar;
|
||||
|
@ -126,7 +131,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
assign wspawn.pc = rs2_data;
|
||||
|
||||
// response
|
||||
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2)
|
||||
|
@ -135,8 +140,8 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
.reset (reset),
|
||||
.valid_in (execute_if.valid),
|
||||
.ready_in (execute_if.ready),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, {tmc, wspawn, split, sjoin, barrier}}),
|
||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, {tmc_r, wspawn_r, split_r, sjoin_r, barrier_r}}),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, {tmc, wspawn, split, sjoin, barrier}, warp_ctl_if.dvstack_ptr}),
|
||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, {tmc_r, wspawn_r, split_r, sjoin_r, barrier_r}, dvstack_ptr}),
|
||||
.valid_out (commit_if.valid),
|
||||
.ready_out (commit_if.ready)
|
||||
);
|
||||
|
@ -148,9 +153,9 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
assign warp_ctl_if.split = split_r;
|
||||
assign warp_ctl_if.sjoin = sjoin_r;
|
||||
assign warp_ctl_if.barrier = barrier_r;
|
||||
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign commit_if.data.data[i] = `XLEN'(split_r.is_dvg);
|
||||
assign commit_if.data.data[i] = `XLEN'(dvstack_ptr);
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -23,6 +23,9 @@ interface VX_warp_ctl_if import VX_gpu_pkg::*; ();
|
|||
join_t sjoin;
|
||||
barrier_t barrier;
|
||||
|
||||
wire [`NW_WIDTH-1:0] dvstack_wid;
|
||||
wire [`DV_STACK_SIZEW-1:0] dvstack_ptr;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output wid,
|
||||
|
@ -30,7 +33,10 @@ interface VX_warp_ctl_if import VX_gpu_pkg::*; ();
|
|||
output tmc,
|
||||
output split,
|
||||
output sjoin,
|
||||
output barrier
|
||||
output barrier,
|
||||
|
||||
output dvstack_wid,
|
||||
input dvstack_ptr
|
||||
);
|
||||
|
||||
modport slave (
|
||||
|
@ -40,7 +46,10 @@ interface VX_warp_ctl_if import VX_gpu_pkg::*; ();
|
|||
input tmc,
|
||||
input split,
|
||||
input sjoin,
|
||||
input barrier
|
||||
input barrier,
|
||||
|
||||
input dvstack_wid,
|
||||
output dvstack_ptr
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -121,6 +121,11 @@ inline void vx_pred(int condition, int thread_mask) {
|
|||
asm volatile (".insn r %0, 5, 0, x0, %1, %2" :: "i"(RISCV_CUSTOM0), "r"(condition), "r"(thread_mask));
|
||||
}
|
||||
|
||||
// Set thread not predicate
|
||||
inline void vx_pred_n(int condition, int thread_mask) {
|
||||
asm volatile (".insn r %0, 5, 0, x1, %1, %2" :: "i"(RISCV_CUSTOM0), "r"(condition), "r"(thread_mask));
|
||||
}
|
||||
|
||||
typedef void (*vx_wspawn_pfn)();
|
||||
|
||||
// Spawn warps
|
||||
|
@ -135,6 +140,13 @@ inline int vx_split(int predicate) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
// Split on a not predicate
|
||||
inline int vx_split_n(int predicate) {
|
||||
size_t ret;
|
||||
asm volatile (".insn r %1, 2, 0, %0, %2, x1" : "=r"(ret) : "i"(RISCV_CUSTOM0), "r"(predicate));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Join
|
||||
inline void vx_join(int stack_ptr) {
|
||||
asm volatile (".insn r %0, 3, 0, x0, %1, x0" :: "i"(RISCV_CUSTOM0), "r"(stack_ptr));
|
||||
|
@ -191,7 +203,7 @@ inline int vx_num_threads() {
|
|||
inline int vx_num_warps() {
|
||||
int ret;
|
||||
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_NUM_WARPS));
|
||||
return ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Return the number of cores per cluster
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -42,7 +42,7 @@ static const std::unordered_map<Opcode, InstType> sc_instTable = {
|
|||
{Opcode::AMO, InstType::R},
|
||||
{Opcode::FL, InstType::I},
|
||||
{Opcode::FS, InstType::S},
|
||||
{Opcode::FCI, InstType::R},
|
||||
{Opcode::FCI, InstType::R},
|
||||
{Opcode::FMADD, InstType::R4},
|
||||
{Opcode::FMSUB, InstType::R4},
|
||||
{Opcode::FMNMADD, InstType::R4},
|
||||
|
@ -71,7 +71,7 @@ enum Constants {
|
|||
shift_func7 = shift_rs2 + width_reg,
|
||||
shift_rs3 = shift_func7 + width_func2,
|
||||
|
||||
mask_opcode = (1 << width_opcode) - 1,
|
||||
mask_opcode = (1 << width_opcode) - 1,
|
||||
mask_reg = (1 << width_reg) - 1,
|
||||
mask_func2 = (1 << width_func2) - 1,
|
||||
mask_func3 = (1 << width_func3) - 1,
|
||||
|
@ -131,7 +131,7 @@ static const char* op_string(const Instr &instr) {
|
|||
case 7: return "ANDI";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
case Opcode::B:
|
||||
switch (func3) {
|
||||
case 0: return "BEQ";
|
||||
|
@ -181,7 +181,7 @@ static const char* op_string(const Instr &instr) {
|
|||
switch (func3) {
|
||||
case 0: return (func7 & 0x20) ? "SUBW" : "ADDW";
|
||||
case 1: return "SLLW";
|
||||
case 5: return (func7 & 0x20) ? "SRAW" : "SRLW";
|
||||
case 5: return (func7 & 0x20) ? "SRAW" : "SRLW";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
@ -194,7 +194,7 @@ static const char* op_string(const Instr &instr) {
|
|||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::SYS:
|
||||
case Opcode::SYS:
|
||||
switch (func3) {
|
||||
case 0:
|
||||
switch (imm) {
|
||||
|
@ -204,7 +204,7 @@ static const char* op_string(const Instr &instr) {
|
|||
case 0x102: return "SRET";
|
||||
case 0x302: return "MRET";
|
||||
default:
|
||||
std::abort();
|
||||
std::abort();
|
||||
}
|
||||
case 1: return "CSRRW";
|
||||
case 2: return "CSRRS";
|
||||
|
@ -216,20 +216,20 @@ static const char* op_string(const Instr &instr) {
|
|||
std::abort();
|
||||
}
|
||||
case Opcode::FENCE: return "FENCE";
|
||||
case Opcode::FL:
|
||||
case Opcode::FL:
|
||||
switch (func3) {
|
||||
case 0x1: return "VL";
|
||||
case 0x2: return "FLW";
|
||||
case 0x3: return "FLD";
|
||||
default:
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::FS:
|
||||
case Opcode::FS:
|
||||
switch (func3) {
|
||||
case 0x1: return "VS";
|
||||
case 0x2: return "FSW";
|
||||
case 0x3: return "FSD";
|
||||
default:
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::AMO: {
|
||||
|
@ -267,11 +267,11 @@ static const char* op_string(const Instr &instr) {
|
|||
default:
|
||||
std::abort();
|
||||
}
|
||||
default:
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
case Opcode::FCI:
|
||||
case Opcode::FCI:
|
||||
switch (func7) {
|
||||
case 0x00: return "FADD.S";
|
||||
case 0x01: return "FADD.D";
|
||||
|
@ -284,7 +284,7 @@ static const char* op_string(const Instr &instr) {
|
|||
case 0x2c: return "FSQRT.S";
|
||||
case 0x2d: return "FSQRT.D";
|
||||
case 0x10:
|
||||
switch (func3) {
|
||||
switch (func3) {
|
||||
case 0: return "FSGNJ.S";
|
||||
case 1: return "FSGNJN.S";
|
||||
case 2: return "FSGNJX.S";
|
||||
|
@ -292,7 +292,7 @@ static const char* op_string(const Instr &instr) {
|
|||
std::abort();
|
||||
}
|
||||
case 0x11:
|
||||
switch (func3) {
|
||||
switch (func3) {
|
||||
case 0: return "FSGNJ.D";
|
||||
case 1: return "FSGNJN.D";
|
||||
case 2: return "FSGNJX.D";
|
||||
|
@ -300,14 +300,14 @@ static const char* op_string(const Instr &instr) {
|
|||
std::abort();
|
||||
}
|
||||
case 0x14:
|
||||
switch (func3) {
|
||||
switch (func3) {
|
||||
case 0: return "FMIN.S";
|
||||
case 1: return "FMAX.S";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x15:
|
||||
switch (func3) {
|
||||
switch (func3) {
|
||||
case 0: return "FMIN.D";
|
||||
case 1: return "FMAX.D";
|
||||
default:
|
||||
|
@ -315,23 +315,23 @@ static const char* op_string(const Instr &instr) {
|
|||
}
|
||||
case 0x20: return "FCVT.S.D";
|
||||
case 0x21: return "FCVT.D.S";
|
||||
case 0x50:
|
||||
switch (func3) {
|
||||
case 0: return "FLE.S";
|
||||
case 1: return "FLT.S";
|
||||
case 0x50:
|
||||
switch (func3) {
|
||||
case 0: return "FLE.S";
|
||||
case 1: return "FLT.S";
|
||||
case 2: return "FEQ.S";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x51:
|
||||
switch (func3) {
|
||||
case 0: return "FLE.D";
|
||||
case 1: return "FLT.D";
|
||||
case 0x51:
|
||||
switch (func3) {
|
||||
case 0: return "FLE.D";
|
||||
case 1: return "FLT.D";
|
||||
case 2: return "FEQ.D";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x60:
|
||||
case 0x60:
|
||||
switch (rs2) {
|
||||
case 0: return "FCVT.W.S";
|
||||
case 1: return "FCVT.WU.S";
|
||||
|
@ -349,7 +349,7 @@ static const char* op_string(const Instr &instr) {
|
|||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x68:
|
||||
case 0x68:
|
||||
switch (rs2) {
|
||||
case 0: return "FCVT.S.W";
|
||||
case 1: return "FCVT.S.WU";
|
||||
|
@ -381,13 +381,13 @@ static const char* op_string(const Instr &instr) {
|
|||
case Opcode::EXT1:
|
||||
switch (func7) {
|
||||
case 0:
|
||||
switch (func3) {
|
||||
switch (func3) {
|
||||
case 0: return "TMC";
|
||||
case 1: return "WSPAWN";
|
||||
case 2: return "SPLIT";
|
||||
case 2: return imm ? "SPLIT.N" : "SPLIT";
|
||||
case 3: return "JOIN";
|
||||
case 4: return "BAR";
|
||||
case 5: return "PRED";
|
||||
case 5: return imm ? "PRED.N" : "PRED";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
@ -398,7 +398,7 @@ static const char* op_string(const Instr &instr) {
|
|||
switch (func3) {
|
||||
case 1: {
|
||||
switch (func2) {
|
||||
case 0: return "CMOV";
|
||||
case 0: return "CMOV";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
@ -412,16 +412,16 @@ static const char* op_string(const Instr &instr) {
|
|||
}
|
||||
|
||||
namespace vortex {
|
||||
std::ostream &operator<<(std::ostream &os, const Instr &instr) {
|
||||
std::ostream &operator<<(std::ostream &os, const Instr &instr) {
|
||||
os << op_string(instr);
|
||||
int sep = 0;
|
||||
if (instr.getRDType() != RegType::None) {
|
||||
if (sep++ != 0) { os << ", "; } else { os << " "; }
|
||||
os << instr.getRDType() << std::dec << instr.getRDest();
|
||||
}
|
||||
for (uint32_t i = 0; i < instr.getNRSrc(); ++i) {
|
||||
for (uint32_t i = 0; i < instr.getNRSrc(); ++i) {
|
||||
if (sep++ != 0) { os << ", "; } else { os << " "; }
|
||||
if (instr.getRSType(i) != RegType::None) {
|
||||
if (instr.getRSType(i) != RegType::None) {
|
||||
os << instr.getRSType(i) << std::dec << instr.getRSrc(i);
|
||||
} else {
|
||||
os << "0x" << std::hex << instr.getRSrc(0);
|
||||
|
@ -435,7 +435,7 @@ std::ostream &operator<<(std::ostream &os, const Instr &instr) {
|
|||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
|
||||
std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
|
||||
auto instr = std::make_shared<Instr>();
|
||||
auto op = Opcode((code >> shift_opcode) & mask_opcode);
|
||||
instr->setOpcode(op);
|
||||
|
@ -460,12 +460,12 @@ std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
|
|||
case InstType::R:
|
||||
switch (op) {
|
||||
case Opcode::FCI:
|
||||
switch (func7) {
|
||||
switch (func7) {
|
||||
case 0x2c: // FSQRT.S
|
||||
case 0x2d: // FSQRT.D
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
break;
|
||||
break;
|
||||
case 0x50: // FLE.S, FLT.S, FEQ.S
|
||||
case 0x51: // FLE.D, FLT.D, FEQ.D
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
|
@ -485,39 +485,44 @@ std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
|
|||
instr->addSrcReg(rs2, RegType::None);
|
||||
break;
|
||||
case 0x70: // FCLASS.S, FMV.X.S
|
||||
case 0x71: // FCLASS.D, FMV.X.D
|
||||
case 0x71: // FCLASS.D, FMV.X.D
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
break;
|
||||
case 0x78: // FMV.S.X
|
||||
case 0x79: // FMV.D.X
|
||||
case 0x79: // FMV.D.X
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
break;
|
||||
default:
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
instr->addSrcReg(rs2, RegType::Float);
|
||||
instr->addSrcReg(rs2, RegType::Float);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Opcode::EXT1:
|
||||
switch (func7) {
|
||||
case 0:
|
||||
switch (func3) {
|
||||
switch (func3) {
|
||||
case 0: // TMC
|
||||
case 3: // JOIN
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
break;
|
||||
case 1: // WSPAWN
|
||||
case 1: // WSPAWN
|
||||
case 4: // BAR
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs2, RegType::Integer);
|
||||
break;
|
||||
case 5: // PRED
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs2, RegType::Integer);
|
||||
instr->setImm(rd);
|
||||
break;
|
||||
case 2: // SPLIT
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->setImm(rs2);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
|
@ -542,7 +547,7 @@ std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
|
|||
case Opcode::I:
|
||||
case Opcode::I_W:
|
||||
case Opcode::JALR:
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->setFunc3(func3);
|
||||
if (func3 == 0x1 || func3 == 0x5) {
|
||||
|
@ -560,7 +565,7 @@ std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
|
|||
auto imm = code >> shift_rs2;
|
||||
instr->setImm(sext(imm, width_i_imm));
|
||||
}
|
||||
break;
|
||||
break;
|
||||
case Opcode::L:
|
||||
case Opcode::FL: {
|
||||
instr->setDestReg(rd, (op == Opcode::FL) ? RegType::Float : RegType::Integer);
|
||||
|
@ -578,24 +583,24 @@ std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
|
|||
// CSR instructions
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setFunc3(func3);
|
||||
if (func3 < 5) {
|
||||
if (func3 < 5) {
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
} else {
|
||||
// zimm
|
||||
} else {
|
||||
// zimm
|
||||
instr->addSrcReg(rs1, RegType::None);
|
||||
}
|
||||
instr->setImm(code >> shift_rs2);
|
||||
} else {
|
||||
// ECALL/EBREACK instructions
|
||||
instr->setImm(code >> shift_rs2);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
std::abort();
|
||||
break;
|
||||
}
|
||||
} break;
|
||||
case InstType::S: {
|
||||
case InstType::S: {
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs2, (op == Opcode::FS) ? RegType::Float : RegType::Integer);
|
||||
instr->setFunc3(func3);
|
||||
|
|
|
@ -1309,17 +1309,20 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
trace->used_iregs.set(rsrc0);
|
||||
trace->fetch_stall = true;
|
||||
|
||||
auto stack_size = warp.ipdom_stack.size();
|
||||
|
||||
ThreadMask then_tmask, else_tmask;
|
||||
auto not_pred = immsrc & 0x1;
|
||||
for (uint32_t t = 0; t < num_threads; ++t) {
|
||||
auto cond = warp.ireg_file.at(t).at(rsrc0);
|
||||
auto cond = (warp.ireg_file.at(t).at(rsrc0) & 0x1) ^ not_pred;
|
||||
then_tmask[t] = warp.tmask.test(t) && cond;
|
||||
else_tmask[t] = warp.tmask.test(t) && !cond;
|
||||
}
|
||||
|
||||
bool is_divergent = then_tmask.any() && else_tmask.any();
|
||||
if (is_divergent) {
|
||||
if (warp.ipdom_stack.size() == arch_.ipdom_size()) {
|
||||
std::cout << "IPDOM stack is full! size=" << std::dec << warp.ipdom_stack.size() << ", PC=0x" << std::hex << warp.PC << " (#" << std::dec << trace->uuid << ")\n" << std::flush;
|
||||
if (stack_size == arch_.ipdom_size()) {
|
||||
std::cout << "IPDOM stack is full! size=" << std::dec << stack_size << ", PC=0x" << std::hex << warp.PC << " (#" << std::dec << trace->uuid << ")\n" << std::flush;
|
||||
std::abort();
|
||||
}
|
||||
// set new thread mask
|
||||
|
@ -1331,7 +1334,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
}
|
||||
// return divergent state
|
||||
for (uint32_t t = thread_start; t < num_threads; ++t) {
|
||||
rddata[t].i = is_divergent;
|
||||
rddata[t].i = stack_size;
|
||||
}
|
||||
rd_write = true;
|
||||
} break;
|
||||
|
@ -1342,8 +1345,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
trace->used_iregs.set(rsrc0);
|
||||
trace->fetch_stall = true;
|
||||
|
||||
int is_divergent = warp.ireg_file.at(thread_start).at(rsrc0);
|
||||
if (is_divergent != 0) {
|
||||
auto stack_ptr = warp.ireg_file.at(thread_start).at(rsrc0);
|
||||
if (stack_ptr != warp.ipdom_stack.size()) {
|
||||
if (warp.ipdom_stack.empty()) {
|
||||
std::cout << "IPDOM stack is empty!\n" << std::flush;
|
||||
std::abort();
|
||||
|
@ -1372,8 +1375,10 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
trace->used_iregs.set(rsrc1);
|
||||
trace->fetch_stall = true;
|
||||
ThreadMask pred;
|
||||
auto not_pred = immsrc & 0x1;
|
||||
for (uint32_t t = 0; t < num_threads; ++t) {
|
||||
pred[t] = warp.tmask.test(t) && (warp.ireg_file.at(t).at(rsrc0) & 0x1);
|
||||
auto cond = (warp.ireg_file.at(t).at(rsrc0) & 0x1) ^ not_pred;
|
||||
pred[t] = warp.tmask.test(t) && cond;
|
||||
}
|
||||
if (pred.any()) {
|
||||
next_tmask &= pred;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue