mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
+ support for ZICOND RISC-V extension
+ RTL decode refactoring
This commit is contained in:
parent
8d97d2c998
commit
b3f96e288a
48 changed files with 1079 additions and 1018 deletions
|
@ -1,12 +1,12 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,7 +17,7 @@ import sys
|
|||
import argparse
|
||||
import csv
|
||||
import re
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='CPU trace log to CSV format converter.')
|
||||
parser.add_argument('-t', '--type', default='simx', help='log type (rtlsim or simx)')
|
||||
|
@ -25,7 +25,7 @@ def parse_args():
|
|||
parser.add_argument('log', help='Input log file')
|
||||
return parser.parse_args()
|
||||
|
||||
def parse_simx(log_filename):
|
||||
def parse_simx(log_filename):
|
||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||
instr_pattern = r"Instr (0x[0-9a-fA-F]+):"
|
||||
opcode_pattern = r"Instr 0x[0-9a-fA-F]+: ([0-9a-zA-Z_\.]+)"
|
||||
|
@ -42,12 +42,12 @@ def parse_simx(log_filename):
|
|||
if line.startswith("DEBUG Fetch:"):
|
||||
if instr_data:
|
||||
entries.append(instr_data)
|
||||
instr_data = {}
|
||||
instr_data = {}
|
||||
instr_data["lineno"] = lineno
|
||||
instr_data["PC"] = re.search(pc_pattern, line).group(1)
|
||||
instr_data["core_id"] = re.search(core_id_pattern, line).group(1)
|
||||
instr_data["warp_id"] = re.search(warp_id_pattern, line).group(1)
|
||||
instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
|
||||
instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
|
||||
instr_data["uuid"] = re.search(uuid_pattern, line).group(1)
|
||||
elif line.startswith("DEBUG Instr"):
|
||||
instr_data["instr"] = re.search(instr_pattern, line).group(1)
|
||||
|
@ -60,13 +60,13 @@ def parse_simx(log_filename):
|
|||
if instr_data:
|
||||
entries.append(instr_data)
|
||||
return entries
|
||||
|
||||
|
||||
def reverse_binary(bin_str):
|
||||
return bin_str[::-1]
|
||||
|
||||
def bin_to_array(bin_str):
|
||||
return [int(bit) for bit in bin_str]
|
||||
|
||||
|
||||
def append_reg(text, value, sep):
|
||||
if sep:
|
||||
text += ", "
|
||||
|
@ -77,14 +77,7 @@ def append_reg(text, value, sep):
|
|||
text += "x" + value
|
||||
sep = True
|
||||
return text, sep
|
||||
|
||||
def append_imm(text, value, sep):
|
||||
if sep:
|
||||
text += ", "
|
||||
text += value
|
||||
sep = True
|
||||
return text, sep
|
||||
|
||||
|
||||
def append_value(text, reg, value, tmask_arr, sep):
|
||||
text, sep = append_reg(text, reg, sep)
|
||||
text += "={"
|
||||
|
@ -97,8 +90,8 @@ def append_value(text, reg, value, tmask_arr, sep):
|
|||
text +="-"
|
||||
text += "}"
|
||||
return text, sep
|
||||
|
||||
def parse_rtlsim(log_filename):
|
||||
|
||||
def parse_rtlsim(log_filename):
|
||||
line_pattern = r"\d+: core(\d+)-(decode|issue|commit)"
|
||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||
instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
|
||||
|
@ -108,8 +101,6 @@ def parse_rtlsim(log_filename):
|
|||
tmask_pattern = r"tmask=(\d+)"
|
||||
wb_pattern = r"wb=(\d)"
|
||||
opds_pattern = r"opds=(\d+)"
|
||||
use_imm_pattern = r"use_imm=(\d)"
|
||||
imm_pattern = r"imm=(0x[0-9a-fA-F]+)"
|
||||
rd_pattern = r"rd=(\d+)"
|
||||
rs1_pattern = r"rs1=(\d+)"
|
||||
rs2_pattern = r"rs2=(\d+)"
|
||||
|
@ -120,24 +111,24 @@ def parse_rtlsim(log_filename):
|
|||
rd_data_pattern = r"data=\{(.+?)\}"
|
||||
eop_pattern = r"eop=(\d)"
|
||||
uuid_pattern = r"#(\d+)"
|
||||
entries = []
|
||||
entries = []
|
||||
with open(log_filename, 'r') as log_file:
|
||||
instr_data = {}
|
||||
for lineno, line in enumerate(log_file, start=1):
|
||||
line_match = re.search(line_pattern, line)
|
||||
if line_match:
|
||||
PC = re.search(pc_pattern, line).group(1)
|
||||
warp_id = re.search(warp_id_pattern, line).group(1)
|
||||
warp_id = re.search(warp_id_pattern, line).group(1)
|
||||
tmask = re.search(tmask_pattern, line).group(1)
|
||||
uuid = re.search(uuid_pattern, line).group(1)
|
||||
uuid = re.search(uuid_pattern, line).group(1)
|
||||
core_id = line_match.group(1)
|
||||
stage = line_match.group(2)
|
||||
if stage == "decode":
|
||||
if stage == "decode":
|
||||
trace = {}
|
||||
trace["uuid"] = uuid
|
||||
trace["PC"] = PC
|
||||
trace["PC"] = PC
|
||||
trace["core_id"] = core_id
|
||||
trace["warp_id"] = warp_id
|
||||
trace["warp_id"] = warp_id
|
||||
trace["tmask"] = reverse_binary(tmask)
|
||||
trace["instr"] = re.search(instr_pattern, line).group(1)
|
||||
trace["opcode"] = re.search(op_pattern, line).group(1)
|
||||
|
@ -146,8 +137,6 @@ def parse_rtlsim(log_filename):
|
|||
trace["rs1"] = re.search(rs1_pattern, line).group(1)
|
||||
trace["rs2"] = re.search(rs2_pattern, line).group(1)
|
||||
trace["rs3"] = re.search(rs3_pattern, line).group(1)
|
||||
trace["use_imm"] = re.search(use_imm_pattern, line).group(1) == "1"
|
||||
trace["imm"] = re.search(imm_pattern, line).group(1)
|
||||
instr_data[uuid] = trace
|
||||
elif stage == "issue":
|
||||
if uuid in instr_data:
|
||||
|
@ -162,7 +151,7 @@ def parse_rtlsim(log_filename):
|
|||
trace["rs3_data"] = re.search(rs3_data_pattern, line).group(1).split(', ')[::-1]
|
||||
trace["issued"] = True
|
||||
instr_data[uuid] = trace
|
||||
elif stage == "commit":
|
||||
elif stage == "commit":
|
||||
if uuid in instr_data:
|
||||
trace = instr_data[uuid]
|
||||
if "issued" in trace:
|
||||
|
@ -205,16 +194,14 @@ def parse_rtlsim(log_filename):
|
|||
del trace["rs1"]
|
||||
del trace["rs2"]
|
||||
del trace["rs3"]
|
||||
del trace["use_imm"]
|
||||
del trace["imm"]
|
||||
del trace["issued"]
|
||||
del trace["issued"]
|
||||
del instr_data[uuid]
|
||||
entries.append(trace)
|
||||
return entries
|
||||
return entries
|
||||
|
||||
def write_csv(log_filename, csv_filename, log_type):
|
||||
entries = None
|
||||
|
||||
|
||||
# parse log file
|
||||
if log_type == "rtlsim":
|
||||
entries = parse_rtlsim(log_filename)
|
||||
|
@ -223,19 +210,19 @@ def write_csv(log_filename, csv_filename, log_type):
|
|||
else:
|
||||
print('Error: invalid log type')
|
||||
sys.exit()
|
||||
|
||||
|
||||
# sort entries by uuid
|
||||
entries.sort(key=lambda x: (int(x['uuid'])))
|
||||
for entry in entries:
|
||||
del entry['lineno']
|
||||
|
||||
|
||||
# write to CSV
|
||||
with open(csv_filename, 'w', newline='') as csv_file:
|
||||
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "operands", "destination"]
|
||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for entry in entries:
|
||||
writer.writerow(entry)
|
||||
writer.writerow(entry)
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
|
|
@ -40,6 +40,10 @@
|
|||
`define EXT_F_ENABLE
|
||||
`endif
|
||||
|
||||
`ifndef EXT_ZICOND_ENABLE
|
||||
`define EXT_ZICOND_ENABLE
|
||||
`endif
|
||||
|
||||
`ifndef XLEN_32
|
||||
`ifndef XLEN_64
|
||||
`define XLEN_32
|
||||
|
@ -637,6 +641,12 @@
|
|||
`define EXT_M_ENABLED 0
|
||||
`endif
|
||||
|
||||
`ifdef EXT_ZICOND_ENABLE
|
||||
`define EXT_ZICOND_ENABLED 1
|
||||
`else
|
||||
`define EXT_ZICOND_ENABLED 0
|
||||
`endif
|
||||
|
||||
`define ISA_STD_A 0
|
||||
`define ISA_STD_C 2
|
||||
`define ISA_STD_D 3
|
||||
|
@ -654,12 +664,14 @@
|
|||
`define ISA_EXT_L2CACHE 2
|
||||
`define ISA_EXT_L3CACHE 3
|
||||
`define ISA_EXT_LMEM 4
|
||||
`define ISA_EXT_ZICOND 5
|
||||
|
||||
`define MISA_EXT (`ICACHE_ENABLED << `ISA_EXT_ICACHE) \
|
||||
| (`DCACHE_ENABLED << `ISA_EXT_DCACHE) \
|
||||
| (`L2_ENABLED << `ISA_EXT_L2CACHE) \
|
||||
| (`L3_ENABLED << `ISA_EXT_L3CACHE) \
|
||||
| (`LMEM_ENABLED << `ISA_EXT_LMEM)
|
||||
| (`LMEM_ENABLED << `ISA_EXT_LMEM) \
|
||||
| (`EXT_ZICOND_ENABLED << `ISA_EXT_ZICOND)
|
||||
|
||||
`define MISA_STD (`EXT_A_ENABLED << 0) /* A - Atomic Instructions extension */ \
|
||||
| (0 << 1) /* B - Tentatively reserved for Bit operations extension */ \
|
||||
|
|
|
@ -55,6 +55,10 @@
|
|||
`define UUID_WIDTH 1
|
||||
`endif
|
||||
|
||||
`define PC_BITS (`XLEN-1)
|
||||
`define OFFSET_BITS 12
|
||||
`define IMM_BITS `XLEN
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define EX_ALU 0
|
||||
|
@ -105,6 +109,10 @@
|
|||
`define INST_EXT3 7'b1011011 // 0x5B
|
||||
`define INST_EXT4 7'b1111011 // 0x7B
|
||||
|
||||
// Opcode extensions
|
||||
`define INST_R_F7_MUL 7'b0000001
|
||||
`define INST_R_F7_ZICOND 7'b0000111
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_FRM_RNE 3'b000 // round to nearest even
|
||||
|
@ -118,7 +126,7 @@
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_OP_BITS 4
|
||||
`define INST_MOD_BITS 3
|
||||
`define INST_MOD_BITS $bits(op_mod_t)
|
||||
`define INST_FMT_BITS 2
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -135,14 +143,23 @@
|
|||
`define INST_ALU_OR 4'b1101
|
||||
`define INST_ALU_XOR 4'b1110
|
||||
`define INST_ALU_SLL 4'b1111
|
||||
`define INST_ALU_OTHER 4'b0111
|
||||
`define INST_ALU_CZEQ 4'b1010
|
||||
`define INST_ALU_CZNE 4'b1011
|
||||
//`define INST_ALU_UNUSED 4'b0001
|
||||
//`define INST_ALU_UNUSED 4'b0110
|
||||
|
||||
|
||||
`define ALU_TYPE_BITS 2
|
||||
`define ALU_TYPE_ARITH 0
|
||||
`define ALU_TYPE_BRANCH 1
|
||||
`define ALU_TYPE_MULDIV 2
|
||||
`define ALU_TYPE_OTHER 3
|
||||
|
||||
`define INST_ALU_BITS 4
|
||||
`define INST_ALU_CLASS(op) op[3:2]
|
||||
`define INST_ALU_SIGNED(op) op[0]
|
||||
`define INST_ALU_IS_SUB(op) op[1]
|
||||
`define INST_ALU_IS_BR(mod) mod[0]
|
||||
`define INST_ALU_IS_M(mod) mod[1]
|
||||
`define INST_ALU_IS_W(mod) mod[2]
|
||||
`define INST_ALU_IS_CZERO(op) (op[3:1] == 3'b101)
|
||||
|
||||
`define INST_BR_EQ 4'b0000
|
||||
`define INST_BR_NE 4'b0010
|
||||
|
@ -225,9 +242,8 @@
|
|||
`define INST_FPU_NMSUB 4'b1110
|
||||
`define INST_FPU_NMADD 4'b1111
|
||||
`define INST_FPU_BITS 4
|
||||
`define INST_FPU_IS_W(mod) (mod[4])
|
||||
`define INST_FPU_IS_CLASS(op, mod) (op == `INST_FPU_MISC && mod == 3)
|
||||
`define INST_FPU_IS_MVXW(op, mod) (op == `INST_FPU_MISC && mod == 4)
|
||||
`define INST_FPU_IS_CLASS(op, frm) (op == `INST_FPU_MISC && frm == 3)
|
||||
`define INST_FPU_IS_MVXW(op, frm) (op == `INST_FPU_MISC && frm == 4)
|
||||
|
||||
`define INST_SFU_TMC 4'h0
|
||||
`define INST_SFU_WSPAWN 4'h1
|
||||
|
@ -238,7 +254,6 @@
|
|||
`define INST_SFU_CSRRW 4'h6
|
||||
`define INST_SFU_CSRRS 4'h7
|
||||
`define INST_SFU_CSRRC 4'h8
|
||||
`define INST_SFU_CMOV 4'h9
|
||||
`define INST_SFU_BITS 4
|
||||
`define INST_SFU_CSR(f3) (4'h6 + 4'(f3) - 4'h1)
|
||||
`define INST_SFU_IS_WCTL(op) (op <= 5)
|
||||
|
@ -414,13 +429,10 @@
|
|||
data.uuid, \
|
||||
data.wis, \
|
||||
data.tmask, \
|
||||
data.PC, \
|
||||
data.op_type, \
|
||||
data.op_mod, \
|
||||
data.wb, \
|
||||
data.use_PC, \
|
||||
data.use_imm, \
|
||||
data.PC, \
|
||||
data.imm, \
|
||||
data.rd, \
|
||||
tid, \
|
||||
data.rs1_data, \
|
||||
|
|
|
@ -26,7 +26,7 @@ package VX_gpu_pkg;
|
|||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NUM_WARPS-1:0] wmask;
|
||||
logic [`XLEN-1:0] pc;
|
||||
logic [`PC_BITS-1:0] pc;
|
||||
} wspawn_t;
|
||||
|
||||
typedef struct packed {
|
||||
|
@ -34,7 +34,7 @@ package VX_gpu_pkg;
|
|||
logic is_dvg;
|
||||
logic [`NUM_THREADS-1:0] then_tmask;
|
||||
logic [`NUM_THREADS-1:0] else_tmask;
|
||||
logic [`XLEN-1:0] next_pc;
|
||||
logic [`PC_BITS-1:0] next_pc;
|
||||
} split_t;
|
||||
|
||||
typedef struct packed {
|
||||
|
@ -55,9 +55,9 @@ package VX_gpu_pkg;
|
|||
} barrier_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [`XLEN-1:0] startup_addr;
|
||||
logic [`XLEN-1:0] startup_arg;
|
||||
logic [7:0] mpm_class;
|
||||
logic [`XLEN-1:0] startup_addr;
|
||||
logic [`XLEN-1:0] startup_arg;
|
||||
logic [7:0] mpm_class;
|
||||
} base_dcrs_t;
|
||||
|
||||
typedef struct packed {
|
||||
|
@ -77,6 +77,42 @@ package VX_gpu_pkg;
|
|||
logic [`PERF_CTR_BITS-1:0] latency;
|
||||
} mem_perf_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic use_PC;
|
||||
logic use_imm;
|
||||
logic is_w;
|
||||
logic [`ALU_TYPE_BITS-1:0] xtype;
|
||||
logic [`IMM_BITS-1:0] imm;
|
||||
} alu_mod_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [`INST_FRM_BITS-1:0] frm;
|
||||
logic [`INST_FMT_BITS-1:0] fmt;
|
||||
} fpu_mod_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic is_float;
|
||||
logic [`OFFSET_BITS-1:0] offset;
|
||||
} lsu_mod_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic use_imm;
|
||||
logic [`VX_CSR_ADDR_BITS-1:0] addr;
|
||||
logic [4:0] imm;
|
||||
} csr_mod_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic is_neg;
|
||||
} wctl_mod_t;
|
||||
|
||||
typedef union packed {
|
||||
alu_mod_t alu;
|
||||
fpu_mod_t fpu;
|
||||
lsu_mod_t lsu;
|
||||
csr_mod_t csr;
|
||||
wctl_mod_t wctl;
|
||||
} op_mod_t;
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
|
||||
///////////////////////// LSU memory Parameters ///////////////////////////
|
||||
|
|
|
@ -195,6 +195,8 @@
|
|||
|
||||
`define LTRIM(x, s) x[s-1:0]
|
||||
|
||||
`define SEXT(len, x) {{(len-$bits(x)+1){x[$bits(x)-1]}}, x[$bits(x)-2:0]}
|
||||
|
||||
`define TRACE_ARRAY1D(lvl, fmt, arr, n) \
|
||||
`TRACE(lvl, ("{")); \
|
||||
for (integer __i = (n-1); __i >= 0; --__i) begin \
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -20,14 +20,14 @@ module VX_alu_int #(
|
|||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
||||
// Inputs
|
||||
VX_execute_if.slave execute_if,
|
||||
|
||||
// Outputs
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if,
|
||||
VX_branch_ctl_if.master branch_ctl_if
|
||||
);
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam LANE_BITS = `CLOG2(NUM_LANES);
|
||||
|
@ -40,9 +40,9 @@ module VX_alu_int #(
|
|||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] add_result;
|
||||
wire [NUM_LANES-1:0][`XLEN:0] sub_result; // +1 bit for branch compare
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] shr_result;
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] shr_zic_result;
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] msc_result;
|
||||
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] add_result_w;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] sub_result_w;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] shr_result_w;
|
||||
|
@ -52,7 +52,7 @@ module VX_alu_int #(
|
|||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_result_r;
|
||||
|
||||
`ifdef XLEN_64
|
||||
wire is_alu_w = `INST_ALU_IS_W(execute_if.data.op_mod);
|
||||
wire is_alu_w = execute_if.data.op_mod.alu.is_w;
|
||||
`else
|
||||
wire is_alu_w = 0;
|
||||
`endif
|
||||
|
@ -61,17 +61,17 @@ module VX_alu_int #(
|
|||
|
||||
wire [`INST_ALU_BITS-1:0] alu_op = `INST_ALU_BITS'(execute_if.data.op_type);
|
||||
wire [`INST_BR_BITS-1:0] br_op = `INST_BR_BITS'(execute_if.data.op_type);
|
||||
wire is_br_op = `INST_ALU_IS_BR(execute_if.data.op_mod);
|
||||
wire is_br_op = (execute_if.data.op_mod.alu.xtype == `ALU_TYPE_BRANCH);
|
||||
wire is_sub_op = `INST_ALU_IS_SUB(alu_op);
|
||||
wire is_signed = `INST_ALU_SIGNED(alu_op);
|
||||
wire is_signed = `INST_ALU_SIGNED(alu_op);
|
||||
wire [1:0] op_class = is_br_op ? `INST_BR_CLASS(alu_op) : `INST_ALU_CLASS(alu_op);
|
||||
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in1 = execute_if.data.rs1_data;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2 = execute_if.data.rs2_data;
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in1_PC = execute_if.data.use_PC ? {NUM_LANES{execute_if.data.PC}} : alu_in1;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_imm = execute_if.data.use_imm ? {NUM_LANES{execute_if.data.imm}} : alu_in2;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_br = (execute_if.data.use_imm && ~is_br_op) ? {NUM_LANES{execute_if.data.imm}} : alu_in2;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in1_PC = execute_if.data.op_mod.alu.use_PC ? {NUM_LANES{execute_if.data.PC, 1'd0}} : alu_in1;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_imm = execute_if.data.op_mod.alu.use_imm ? {NUM_LANES{`SEXT(`XLEN, execute_if.data.op_mod.alu.imm)}} : alu_in2;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_br = (execute_if.data.op_mod.alu.use_imm && ~is_br_op) ? {NUM_LANES{`SEXT(`XLEN, execute_if.data.op_mod.alu.imm)}} : alu_in2;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
|
||||
|
@ -85,9 +85,20 @@ module VX_alu_int #(
|
|||
assign sub_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] - alu_in2_imm[i][31:0]));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN:0] shr_in1 = {is_signed && alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
assign shr_result[i] = `XLEN'($signed(shr_in1) >>> alu_in2_imm[i][SHIFT_IMM_BITS-1:0]);
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN:0] shr_in1 = {is_signed && alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
always @(*) begin
|
||||
case (alu_op[1:0])
|
||||
`ifdef EXT_ZICOND_ENABLE
|
||||
2'b10, 2'b11: begin // CZERO
|
||||
shr_zic_result[i] = alu_in1[i] & {`XLEN{alu_op[0] ^ (| alu_in2[i])}};
|
||||
end
|
||||
`endif
|
||||
default: begin // SRL, SRA, SRLI, SRAI
|
||||
shr_zic_result[i] = `XLEN'($signed(shr_in1) >>> alu_in2_imm[i][SHIFT_IMM_BITS-1:0]);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
wire [32:0] shr_in1_w = {is_signed && alu_in1[i][31], alu_in1[i][31:0]};
|
||||
wire [31:0] shr_res_w = 32'($signed(shr_in1_w) >>> alu_in2_imm[i][4:0]);
|
||||
assign shr_result_w[i] = `XLEN'($signed(shr_res_w));
|
||||
|
@ -102,48 +113,51 @@ module VX_alu_int #(
|
|||
2'b11: msc_result[i] = alu_in1[i] << alu_in2_imm[i][SHIFT_IMM_BITS-1:0]; // SLL
|
||||
endcase
|
||||
end
|
||||
assign msc_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] << alu_in2_imm[i][4:0]));
|
||||
assign msc_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] << alu_in2_imm[i][4:0])); // SLLW
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [`XLEN-1:0] slt_br_result = `XLEN'({is_br_op && ~(| sub_result[i][`XLEN-1:0]), sub_result[i][`XLEN]});
|
||||
wire [`XLEN-1:0] sub_slt_br_result = (is_sub_op && ~is_br_op) ? sub_result[i][`XLEN-1:0] : slt_br_result;
|
||||
always @(*) begin
|
||||
case ({is_alu_w, op_class})
|
||||
case ({is_alu_w, op_class})
|
||||
3'b000: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
|
||||
3'b001: alu_result[i] = sub_slt_br_result; // SUB, SLTU, SLTI, BR*
|
||||
3'b010: alu_result[i] = shr_result[i]; // SRL, SRA, SRLI, SRAI
|
||||
3'b010: alu_result[i] = shr_zic_result[i]; // SRL, SRA, SRLI, SRAI, CZERO*
|
||||
3'b011: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLI
|
||||
3'b100: alu_result[i] = add_result_w[i]; // ADDIW, ADDW
|
||||
3'b101: alu_result[i] = sub_result_w[i]; // SUBW
|
||||
3'b110: alu_result[i] = shr_result_w[i]; // SRLW, SRAW, SRLIW, SRAIW
|
||||
3'b111: alu_result[i] = msc_result_w[i]; // SLLW
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// branch
|
||||
|
||||
wire [`XLEN-1:0] PC_r, imm_r;
|
||||
wire [`PC_BITS-1:0] PC_r;
|
||||
wire [`INST_BR_BITS-1:0] br_op_r;
|
||||
wire [`PC_BITS-1:0] cbr_dest, cbr_dest_r;
|
||||
wire [LANE_WIDTH-1:0] tid, tid_r;
|
||||
wire is_br_op_r;
|
||||
|
||||
assign cbr_dest = add_result[0][1 +: `PC_BITS];
|
||||
|
||||
if (LANE_BITS != 0) begin
|
||||
assign tid = execute_if.data.tid[0 +: LANE_BITS];
|
||||
end else begin
|
||||
assign tid = 0;
|
||||
end
|
||||
end
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `NR_BITS + 1 + PID_WIDTH + 1 + 1 + (NUM_LANES * `XLEN) + `XLEN + `XLEN + 1 + `INST_BR_BITS + LANE_WIDTH)
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `NR_BITS + 1 + PID_WIDTH + 1 + 1 + (NUM_LANES * `XLEN) + `PC_BITS + `PC_BITS + 1 + `INST_BR_BITS + LANE_WIDTH)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (execute_if.valid),
|
||||
.ready_in (execute_if.ready),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, alu_result, execute_if.data.PC, execute_if.data.imm, is_br_op, br_op, tid}),
|
||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, alu_result_r, PC_r, imm_r, is_br_op_r, br_op_r, tid_r}),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, alu_result, execute_if.data.PC, cbr_dest, is_br_op, br_op, tid}),
|
||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, alu_result_r, PC_r, cbr_dest_r, is_br_op_r, br_op_r, tid_r}),
|
||||
.valid_out (commit_if.valid),
|
||||
.ready_out (commit_if.ready)
|
||||
);
|
||||
|
@ -152,19 +166,19 @@ module VX_alu_int #(
|
|||
wire is_br_neg = `INST_BR_IS_NEG(br_op_r);
|
||||
wire is_br_less = `INST_BR_IS_LESS(br_op_r);
|
||||
wire is_br_static = `INST_BR_IS_STATIC(br_op_r);
|
||||
wire [`XLEN-1:0] br_result = alu_result_r[tid_r];
|
||||
|
||||
wire [`XLEN-1:0] br_result = alu_result_r[tid_r];
|
||||
wire is_less = br_result[0];
|
||||
wire is_equal = br_result[1];
|
||||
|
||||
wire br_enable = is_br_op_r && commit_if.valid && commit_if.ready && commit_if.data.eop;
|
||||
wire br_taken = ((is_br_less ? is_less : is_equal) ^ is_br_neg) | is_br_static;
|
||||
wire [`XLEN-1:0] br_dest = is_br_static ? br_result : (PC_r + imm_r);
|
||||
wire [`PC_BITS-1:0] br_dest = is_br_static ? br_result[1 +: `PC_BITS] : cbr_dest_r;
|
||||
wire [`NW_WIDTH-1:0] br_wid;
|
||||
`ASSIGN_BLOCKED_WID (br_wid, commit_if.data.wid, BLOCK_IDX, `NUM_ALU_BLOCKS)
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_WIDTH + 1 + `XLEN)
|
||||
.DATAW (1 + `NW_WIDTH + 1 + `PC_BITS)
|
||||
) branch_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -173,17 +187,17 @@ module VX_alu_int #(
|
|||
.data_out ({branch_ctl_if.valid, branch_ctl_if.wid, branch_ctl_if.taken, branch_ctl_if.dest})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign commit_if.data.data[i] = (is_br_op_r && is_br_static) ? (PC_r + 4) : alu_result_r[i];
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign commit_if.data.data[i] = (is_br_op_r && is_br_static) ? {(PC_r + `PC_BITS'(2)), 1'd0} : alu_result_r[i];
|
||||
end
|
||||
|
||||
assign commit_if.data.PC = PC_r;
|
||||
|
||||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (branch_ctl_if.valid) begin
|
||||
`TRACE(1, ("%d: core%0d-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
|
||||
$time, CORE_ID, branch_ctl_if.wid, commit_if.data.PC, branch_ctl_if.taken, branch_ctl_if.dest, commit_if.data.uuid));
|
||||
$time, CORE_ID, branch_ctl_if.wid, {commit_if.data.PC, 1'b0}, branch_ctl_if.taken, {branch_ctl_if.dest, 1'b0}, commit_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -29,7 +29,7 @@ module VX_alu_muldiv #(
|
|||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam TAG_WIDTH = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + PID_WIDTH + 1 + 1;
|
||||
localparam TAG_WIDTH = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + PID_WIDTH + 1 + 1;
|
||||
|
||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||
|
||||
|
@ -38,7 +38,7 @@ module VX_alu_muldiv #(
|
|||
wire is_mulx_op = `INST_M_IS_MULX(muldiv_op);
|
||||
wire is_signed_op = `INST_M_SIGNED(muldiv_op);
|
||||
`ifdef XLEN_64
|
||||
wire is_alu_w = `INST_ALU_IS_W(execute_if.data.op_mod);
|
||||
wire is_alu_w = execute_if.data.op_mod.alu.is_w;
|
||||
`else
|
||||
wire is_alu_w = 0;
|
||||
`endif
|
||||
|
@ -47,7 +47,7 @@ module VX_alu_muldiv #(
|
|||
wire [`UUID_WIDTH-1:0] mul_uuid_out;
|
||||
wire [`NW_WIDTH-1:0] mul_wid_out;
|
||||
wire [NUM_LANES-1:0] mul_tmask_out;
|
||||
wire [`XLEN-1:0] mul_PC_out;
|
||||
wire [`PC_BITS-1:0] mul_PC_out;
|
||||
wire [`NR_BITS-1:0] mul_rd_out;
|
||||
wire mul_wb_out;
|
||||
wire [PID_WIDTH-1:0] mul_pid_out;
|
||||
|
@ -203,7 +203,7 @@ module VX_alu_muldiv #(
|
|||
wire [`UUID_WIDTH-1:0] div_uuid_out;
|
||||
wire [`NW_WIDTH-1:0] div_wid_out;
|
||||
wire [NUM_LANES-1:0] div_tmask_out;
|
||||
wire [`XLEN-1:0] div_PC_out;
|
||||
wire [`PC_BITS-1:0] div_PC_out;
|
||||
wire [`NR_BITS-1:0] div_rd_out;
|
||||
wire div_wb_out;
|
||||
wire [PID_WIDTH-1:0] div_pid_out;
|
||||
|
|
|
@ -32,7 +32,7 @@ module VX_alu_unit #(
|
|||
localparam NUM_LANES = `NUM_ALU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED;
|
||||
localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS);
|
||||
|
||||
|
@ -59,7 +59,7 @@ module VX_alu_unit #(
|
|||
|
||||
`RESET_RELAY (block_reset, reset);
|
||||
|
||||
wire is_muldiv_op = `EXT_M_ENABLED && `INST_ALU_IS_M(per_block_execute_if[block_idx].data.op_mod);
|
||||
wire is_muldiv_op = `EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_mod.alu.xtype == `ALU_TYPE_MULDIV);
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -31,7 +31,7 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1;
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1;
|
||||
localparam COMMIT_SIZEW = `CLOG2(`NUM_THREADS + 1);
|
||||
localparam COMMIT_ALL_SIZEW = COMMIT_SIZEW + `ISSUE_WIDTH - 1;
|
||||
|
||||
|
@ -47,7 +47,7 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (arb_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
|
||||
|
||||
wire [`NUM_EX_UNITS-1:0] valid_in;
|
||||
wire [`NUM_EX_UNITS-1:0][DATAW-1:0] data_in;
|
||||
wire [`NUM_EX_UNITS-1:0] ready_in;
|
||||
|
@ -75,14 +75,14 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
assign commit_fire[i] = commit_arb_if[i].valid && commit_arb_if[i].ready;
|
||||
assign commit_fire[i] = commit_arb_if[i].valid && commit_arb_if[i].ready;
|
||||
assign commit_tmask[i]= {`NUM_THREADS{commit_fire[i]}} & commit_arb_if[i].data.tmask;
|
||||
assign commit_wid[i] = commit_arb_if[i].data.wid;
|
||||
assign commit_eop[i] = commit_arb_if[i].data.eop;
|
||||
end
|
||||
|
||||
// CSRs update
|
||||
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0][COMMIT_SIZEW-1:0] commit_size, commit_size_r;
|
||||
wire [COMMIT_ALL_SIZEW-1:0] commit_size_all_r, commit_size_all_rr;
|
||||
wire commit_fire_any, commit_fire_any_r, commit_fire_any_rr;
|
||||
|
@ -158,17 +158,17 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
assign writeback_if[i].valid = commit_arb_if[i].valid && commit_arb_if[i].data.wb;
|
||||
assign writeback_if[i].data.uuid = commit_arb_if[i].data.uuid;
|
||||
assign writeback_if[i].data.uuid = commit_arb_if[i].data.uuid;
|
||||
assign writeback_if[i].data.wis = wid_to_wis(commit_arb_if[i].data.wid);
|
||||
assign writeback_if[i].data.PC = commit_arb_if[i].data.PC;
|
||||
assign writeback_if[i].data.tmask= commit_arb_if[i].data.tmask;
|
||||
assign writeback_if[i].data.rd = commit_arb_if[i].data.rd;
|
||||
assign writeback_if[i].data.data = commit_arb_if[i].data.data;
|
||||
assign writeback_if[i].data.sop = commit_arb_if[i].data.sop;
|
||||
assign writeback_if[i].data.PC = commit_arb_if[i].data.PC;
|
||||
assign writeback_if[i].data.tmask= commit_arb_if[i].data.tmask;
|
||||
assign writeback_if[i].data.rd = commit_arb_if[i].data.rd;
|
||||
assign writeback_if[i].data.data = commit_arb_if[i].data.data;
|
||||
assign writeback_if[i].data.sop = commit_arb_if[i].data.sop;
|
||||
assign writeback_if[i].data.eop = commit_arb_if[i].data.eop;
|
||||
assign commit_arb_if[i].ready = 1'b1; // writeback has no backpressure
|
||||
end
|
||||
|
||||
|
||||
// simulation helper signal to get RISC-V tests Pass/Fail status
|
||||
reg [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value_r;
|
||||
always @(posedge clk) begin
|
||||
|
@ -177,13 +177,13 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
assign sim_wb_value = sim_wb_value_r;
|
||||
|
||||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin
|
||||
always @(posedge clk) begin
|
||||
if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, commit_if[j * `ISSUE_WIDTH + i].data.PC));
|
||||
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0}));
|
||||
trace_ex_type(1, j);
|
||||
`TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS);
|
||||
|
|
|
@ -39,7 +39,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
|
||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||
|
||||
|
@ -51,8 +51,8 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
wire csr_wr_enable;
|
||||
wire csr_req_ready;
|
||||
|
||||
wire [`VX_CSR_ADDR_BITS-1:0] csr_addr = execute_if.data.imm[`VX_CSR_ADDR_BITS-1:0];
|
||||
wire [`NRI_BITS-1:0] csr_imm = execute_if.data.imm[`VX_CSR_ADDR_BITS +: `NRI_BITS];
|
||||
wire [`VX_CSR_ADDR_BITS-1:0] csr_addr = execute_if.data.op_mod.csr.addr;
|
||||
wire [`NRI_BITS-1:0] csr_imm = execute_if.data.op_mod.csr.imm;
|
||||
|
||||
wire is_fpu_csr = (csr_addr <= `VX_CSR_FCSR);
|
||||
|
||||
|
@ -134,7 +134,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
// CSR write
|
||||
|
||||
assign csr_req_data = execute_if.data.use_imm ? `XLEN'(csr_imm) : rs1_data[0];
|
||||
assign csr_req_data = execute_if.data.op_mod.csr.use_imm ? `XLEN'(csr_imm) : rs1_data[0];
|
||||
assign csr_wr_enable = (csr_write_enable || (| csr_req_data));
|
||||
|
||||
always @(*) begin
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
use_``x = 1
|
||||
`endif
|
||||
|
||||
module VX_decode #(
|
||||
module VX_decode import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -42,7 +42,7 @@ module VX_decode #(
|
|||
VX_decode_sched_if.master decode_sched_if
|
||||
);
|
||||
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + (`NR_BITS * 4) + `XLEN + 1 + 1;
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + (`NR_BITS * 4);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (clk)
|
||||
|
@ -50,10 +50,9 @@ module VX_decode #(
|
|||
|
||||
reg [`EX_BITS-1:0] ex_type;
|
||||
reg [`INST_OP_BITS-1:0] op_type;
|
||||
reg [`INST_MOD_BITS-1:0] op_mod;
|
||||
op_mod_t op_mod;
|
||||
reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r;
|
||||
reg [`XLEN-1:0] imm;
|
||||
reg use_rd, use_rs1, use_rs2, use_rs3, use_PC, use_imm;
|
||||
reg use_rd, use_rs1, use_rs2, use_rs3;
|
||||
reg is_wstall;
|
||||
|
||||
wire [31:0] instr = fetch_if.data.instr;
|
||||
|
@ -150,14 +149,11 @@ module VX_decode #(
|
|||
|
||||
ex_type = '0;
|
||||
op_type = 'x;
|
||||
op_mod = '0;
|
||||
op_mod = 'x;
|
||||
rd_r = '0;
|
||||
rs1_r = '0;
|
||||
rs2_r = '0;
|
||||
rs3_r = '0;
|
||||
imm = 'x;
|
||||
use_imm = 0;
|
||||
use_PC = 0;
|
||||
use_rd = 0;
|
||||
use_rs1 = 0;
|
||||
use_rs2 = 0;
|
||||
|
@ -168,107 +164,140 @@ module VX_decode #(
|
|||
`INST_I: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {{(`XLEN-12){i_imm[11]}}, i_imm};
|
||||
op_mod.alu.xtype = `ALU_TYPE_ARITH;
|
||||
op_mod.alu.is_w = 0;
|
||||
op_mod.alu.use_PC = 0;
|
||||
op_mod.alu.use_imm = 1;
|
||||
op_mod.alu.imm = `SEXT(`IMM_BITS, i_imm);
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_R: begin
|
||||
ex_type = `EX_ALU;
|
||||
`ifdef EXT_M_ENABLE
|
||||
if (func7[0]) begin
|
||||
op_type = `INST_OP_BITS'(m_type);
|
||||
op_mod[1] = 1;
|
||||
end else
|
||||
`endif
|
||||
begin
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
end
|
||||
op_mod.alu.is_w = 0;
|
||||
op_mod.alu.use_PC = 0;
|
||||
op_mod.alu.use_imm = 0;
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
case (func7)
|
||||
`ifdef EXT_M_ENABLE
|
||||
`INST_R_F7_MUL: begin
|
||||
// MUL, MULH, MULHSU, MULHU
|
||||
op_type = `INST_OP_BITS'(m_type);
|
||||
op_mod.alu.xtype = `ALU_TYPE_MULDIV;
|
||||
end
|
||||
`endif
|
||||
`ifdef EXT_ZICOND_ENABLE
|
||||
`INST_R_F7_ZICOND: begin
|
||||
// CZERO-EQZ, CZERO-NEZ
|
||||
op_type = func3[1] ? `INST_OP_BITS'(`INST_ALU_CZNE) : `INST_OP_BITS'(`INST_ALU_CZEQ);
|
||||
op_mod.alu.xtype = `ALU_TYPE_ARITH;
|
||||
end
|
||||
`endif
|
||||
default: begin
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
op_mod.alu.xtype = `ALU_TYPE_ARITH;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
`ifdef XLEN_64
|
||||
`INST_I_W: begin
|
||||
// ADDIW, SLLIW, SRLIW, SRAIW
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
op_mod[2] = 1;
|
||||
op_mod.alu.xtype = `ALU_TYPE_ARITH;
|
||||
op_mod.alu.is_w = 1;
|
||||
op_mod.alu.use_PC = 0;
|
||||
op_mod.alu.use_imm = 1;
|
||||
op_mod.alu.imm = `SEXT(`IMM_BITS, iw_imm);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {{(`XLEN-12){iw_imm[11]}}, iw_imm};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_R_W: begin
|
||||
ex_type = `EX_ALU;
|
||||
`ifdef EXT_M_ENABLE
|
||||
if (func7[0]) begin
|
||||
// MULW, DIVW, DIVUW, REMW, REMUW
|
||||
op_type = `INST_OP_BITS'(m_type);
|
||||
op_mod[1] = 1;
|
||||
end else
|
||||
`endif
|
||||
begin
|
||||
// ADDW, SUBW, SLLW, SRLW, SRAW
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
end
|
||||
op_mod[2] = 1;
|
||||
op_mod.alu.is_w = 1;
|
||||
op_mod.alu.use_PC = 0;
|
||||
op_mod.alu.use_imm = 0;
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
case (func7)
|
||||
`ifdef EXT_M_ENABLE
|
||||
`INST_R_F7_MUL: begin
|
||||
// MULW, DIVW, DIVUW, REMW, REMUW
|
||||
op_type = `INST_OP_BITS'(m_type);
|
||||
op_mod.alu.xtype = `ALU_TYPE_MULDIV;
|
||||
end
|
||||
`endif
|
||||
default: begin
|
||||
// ADDW, SUBW, SLLW, SRLW, SRAW
|
||||
op_type = `INST_OP_BITS'(r_type);
|
||||
op_mod.alu.xtype = `ALU_TYPE_ARITH;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
`endif
|
||||
`INST_LUI: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_ALU_LUI);
|
||||
op_mod.alu.xtype = `ALU_TYPE_ARITH;
|
||||
op_mod.alu.is_w = 0;
|
||||
op_mod.alu.use_PC = 0;
|
||||
op_mod.alu.use_imm = 1;
|
||||
op_mod.alu.imm = {{`IMM_BITS-31{ui_imm[19]}}, ui_imm[18:0], 12'(0)};
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {{`XLEN-31{ui_imm[19]}}, ui_imm[18:0], 12'(0)};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_AUIPC: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_ALU_AUIPC);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
imm = {{`XLEN-31{ui_imm[19]}}, ui_imm[18:0], 12'(0)};
|
||||
op_mod.alu.xtype = `ALU_TYPE_ARITH;
|
||||
op_mod.alu.is_w = 0;
|
||||
op_mod.alu.use_PC = 1;
|
||||
op_mod.alu.use_imm = 1;
|
||||
op_mod.alu.imm = {{`IMM_BITS-31{ui_imm[19]}}, ui_imm[18:0], 12'(0)};
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_JAL: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_BR_JAL);
|
||||
op_mod[0] = 1;
|
||||
op_mod.alu.xtype = `ALU_TYPE_BRANCH;
|
||||
op_mod.alu.is_w = 0;
|
||||
op_mod.alu.use_PC = 1;
|
||||
op_mod.alu.use_imm = 1;
|
||||
op_mod.alu.imm = `SEXT(`IMM_BITS, jal_imm);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{(`XLEN-21){jal_imm[20]}}, jal_imm};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_JALR: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_BR_JALR);
|
||||
op_mod[0] = 1;
|
||||
op_mod.alu.xtype = `ALU_TYPE_BRANCH;
|
||||
op_mod.alu.is_w = 0;
|
||||
op_mod.alu.use_PC = 0;
|
||||
op_mod.alu.use_imm = 1;
|
||||
op_mod.alu.imm = `SEXT(`IMM_BITS, u_12);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{(`XLEN-12){u_12[11]}}, u_12};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_B: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(b_type);
|
||||
op_mod[0] = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
op_mod.alu.xtype = `ALU_TYPE_BRANCH;
|
||||
op_mod.alu.is_w = 0;
|
||||
op_mod.alu.use_PC = 1;
|
||||
op_mod.alu.use_imm = 1;
|
||||
op_mod.alu.imm = `SEXT(`IMM_BITS, b_imm);
|
||||
is_wstall = 1;
|
||||
imm = {{(`XLEN-13){b_imm[12]}}, b_imm};
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
|
@ -280,25 +309,26 @@ module VX_decode #(
|
|||
if (func3[1:0] != 0) begin
|
||||
ex_type = `EX_SFU;
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_CSR(func3[1:0]));
|
||||
op_mod.csr.addr = u_12;
|
||||
op_mod.csr.use_imm = func3[2];
|
||||
use_rd = 1;
|
||||
is_wstall = is_fpu_csr; // only stall for FPU CSRs
|
||||
use_imm = func3[2];
|
||||
imm[`VX_CSR_ADDR_BITS-1:0] = u_12; // addr
|
||||
`USED_IREG (rd);
|
||||
if (func3[2]) begin
|
||||
imm[`VX_CSR_ADDR_BITS +: `NRI_BITS] = rs1; // imm
|
||||
op_mod.csr.imm = rs1;
|
||||
end else begin
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
end else begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(s_type);
|
||||
op_mod[0] = 1;
|
||||
op_mod.alu.xtype = `ALU_TYPE_BRANCH;
|
||||
op_mod.alu.is_w = 0;
|
||||
op_mod.alu.use_imm = 1;
|
||||
op_mod.alu.use_PC = 1;
|
||||
op_mod.alu.imm = `IMM_BITS'd4;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = `XLEN'd4;
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
end
|
||||
|
@ -308,9 +338,9 @@ module VX_decode #(
|
|||
`INST_L: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_OP_BITS'({1'b0, func3});
|
||||
op_mod.lsu.is_float = opcode[2];
|
||||
op_mod.lsu.offset = u_12;
|
||||
use_rd = 1;
|
||||
imm = {{(`XLEN-12){u_12[11]}}, u_12};
|
||||
use_imm = 1;
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (opcode[2]) begin
|
||||
`USED_FREG (rd);
|
||||
|
@ -325,8 +355,8 @@ module VX_decode #(
|
|||
`INST_S: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_OP_BITS'({1'b1, func3});
|
||||
imm = {{(`XLEN-12){s_imm[11]}}, s_imm};
|
||||
use_imm = 1;
|
||||
op_mod.lsu.is_float = opcode[2];
|
||||
op_mod.lsu.offset = s_imm;
|
||||
`USED_IREG (rs1);
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (opcode[2]) begin
|
||||
|
@ -342,8 +372,8 @@ module VX_decode #(
|
|||
`INST_FNMADD: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_type = `INST_OP_BITS'({2'b11, opcode[3:2]});
|
||||
op_mod = `INST_MOD_BITS'(func3);
|
||||
imm[0] = func2[0]; // destination is double?
|
||||
op_mod.fpu.frm = func3;
|
||||
op_mod.fpu.fmt[0] = func2[0]; // float / double
|
||||
use_rd = 1;
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
|
@ -352,10 +382,9 @@ module VX_decode #(
|
|||
end
|
||||
`INST_FCI: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_mod = `INST_MOD_BITS'(func3);
|
||||
`ifdef FLEN_64
|
||||
imm[0] = func2[0]; // destination is double?
|
||||
`endif
|
||||
op_mod.fpu.frm = func3;
|
||||
op_mod.fpu.fmt[0] = func2[0]; // float / double
|
||||
op_mod.fpu.fmt[1] = rs2[1]; // int32 / int64
|
||||
use_rd = 1;
|
||||
case (func5)
|
||||
5'b00000, // FADD
|
||||
|
@ -370,7 +399,7 @@ module VX_decode #(
|
|||
5'b00100: begin
|
||||
// NCP: FSGNJ=0, FSGNJN=1, FSGNJX=2
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = `INST_MOD_BITS'(func3[1:0]);
|
||||
op_mod.fpu.frm = `INST_FRM_BITS'(func3[1:0]);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
|
@ -378,7 +407,7 @@ module VX_decode #(
|
|||
5'b00101: begin
|
||||
// NCP: FMIN=6, FMAX=7
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = func3[0] ? 7 : 6;
|
||||
op_mod.fpu.frm = `INST_FRM_BITS'(func3[0] ? 7 : 6);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
|
@ -407,18 +436,12 @@ module VX_decode #(
|
|||
5'b11000: begin
|
||||
// FCVT.W.X, FCVT.WU.X
|
||||
op_type = (rs2[0]) ? `INST_OP_BITS'(`INST_FPU_F2U) : `INST_OP_BITS'(`INST_FPU_F2I);
|
||||
`ifdef XLEN_64
|
||||
imm[1] = rs2[1]; // is 64-bit integer
|
||||
`endif
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
5'b11010: begin
|
||||
// FCVT.X.W, FCVT.X.WU
|
||||
op_type = (rs2[0]) ? `INST_OP_BITS'(`INST_FPU_U2F) : `INST_OP_BITS'(`INST_FPU_I2F);
|
||||
`ifdef XLEN_64
|
||||
imm[1] = rs2[1]; // is 64-bit integer
|
||||
`endif
|
||||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
|
@ -426,11 +449,11 @@ module VX_decode #(
|
|||
if (func3[0]) begin
|
||||
// NCP: FCLASS=3
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 3;
|
||||
op_mod.fpu.frm = `INST_FRM_BITS'(3);
|
||||
end else begin
|
||||
// NCP: FMV.X.W=4
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 4;
|
||||
op_mod.fpu.frm = `INST_FRM_BITS'(4);
|
||||
end
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
|
@ -438,7 +461,7 @@ module VX_decode #(
|
|||
5'b11110: begin
|
||||
// NCP: FMV.W.X=5
|
||||
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
|
||||
op_mod = 5;
|
||||
op_mod.fpu.frm = `INST_FRM_BITS'(5);
|
||||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
|
@ -464,7 +487,7 @@ module VX_decode #(
|
|||
3'h2: begin // SPLIT
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_SPLIT);
|
||||
use_rd = 1;
|
||||
op_mod[0] = rs2[0]; // not?
|
||||
op_mod.wctl.is_neg = rs2[0];
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
|
@ -479,7 +502,7 @@ module VX_decode #(
|
|||
end
|
||||
3'h5: begin // PRED
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_PRED);
|
||||
op_mod[0] = rd[0]; // not?
|
||||
op_mod.wctl.is_neg = rd[0];
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
|
@ -489,25 +512,6 @@ module VX_decode #(
|
|||
default:;
|
||||
endcase
|
||||
end
|
||||
`INST_EXT2: begin
|
||||
case (func3)
|
||||
3'h1: begin
|
||||
case (func2)
|
||||
2'h0: begin // CMOV
|
||||
ex_type = `EX_SFU;
|
||||
op_type = `INST_OP_BITS'(`INST_SFU_CMOV);
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
`USED_IREG (rs3);
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
@ -523,8 +527,8 @@ module VX_decode #(
|
|||
.reset (reset),
|
||||
.valid_in (fetch_if.valid),
|
||||
.ready_in (fetch_if.ready),
|
||||
.data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_mod, use_PC, imm, use_imm, wb, rd_r, rs1_r, rs2_r, rs3_r}),
|
||||
.data_out ({decode_if.data.uuid, decode_if.data.wid, decode_if.data.tmask, decode_if.data.PC, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_mod, decode_if.data.use_PC, decode_if.data.imm, decode_if.data.use_imm, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3}),
|
||||
.data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_mod, wb, rd_r, rs1_r, rs2_r, rs3_r}),
|
||||
.data_out ({decode_if.data.uuid, decode_if.data.wid, decode_if.data.tmask, decode_if.data.PC, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_mod, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3}),
|
||||
.valid_out (decode_if.valid),
|
||||
.ready_out (decode_if.ready)
|
||||
);
|
||||
|
@ -541,29 +545,16 @@ module VX_decode #(
|
|||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
`ifdef FLEN_64
|
||||
wire fdst_d = decode_if.data.imm[0];
|
||||
`else
|
||||
wire fdst_d = 0;
|
||||
`endif
|
||||
`ifdef XLEN_64
|
||||
wire fcvt_l = decode_if.data.imm[1];
|
||||
`else
|
||||
wire fcvt_l = 0;
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire rd_float = 1'(decode_if.data.rd >> 5) || 1'(decode_if.data.rs2 >> 5);
|
||||
`else
|
||||
wire rd_float = 0;
|
||||
`endif
|
||||
always @(posedge clk) begin
|
||||
if (decode_if.valid && decode_if.ready) begin
|
||||
`TRACE(1, ("%d: core%0d-decode: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, CORE_ID, decode_if.data.wid, decode_if.data.PC, instr));
|
||||
`TRACE(1, ("%d: core%0d-decode: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, CORE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr));
|
||||
trace_ex_type(1, decode_if.data.ex_type);
|
||||
`TRACE(1, (", op="));
|
||||
trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_mod, decode_if.data.use_imm, fdst_d, fcvt_l, rd_float);
|
||||
`TRACE(1, (", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=0x%0h, opds=%b%b%b%b, use_pc=%b, use_imm=%b (#%0d)\n",
|
||||
decode_if.data.op_mod, decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, decode_if.data.imm, use_rd, use_rs1, use_rs2, use_rs3, decode_if.data.use_PC, decode_if.data.use_imm, decode_if.data.uuid));
|
||||
trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_mod);
|
||||
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, opds=%b%b%b%b",
|
||||
decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, use_rd, use_rs1, use_rs2, use_rs3));
|
||||
trace_op_mod(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_mod);
|
||||
`TRACE(1, (" (#%0d)\n", decode_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -31,15 +31,15 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
|||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH;
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH;
|
||||
|
||||
wire [`NUM_THREADS-1:0][`NT_WIDTH-1:0] tids;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign tids[i] = `NT_WIDTH'(i);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
|
||||
|
||||
wire [`NT_WIDTH-1:0] last_active_tid;
|
||||
|
||||
VX_find_first #(
|
||||
|
@ -77,13 +77,13 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
|||
assign operands_if[i].ready = operands_reset[operands_if[i].data.ex_type];
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
`ifdef PERF_ENABLE
|
||||
wire [`NUM_EX_UNITS-1:0] perf_unit_stalls_per_cycle, perf_unit_stalls_per_cycle_r;
|
||||
reg [`ISSUE_WIDTH-1:0][`NUM_EX_UNITS-1:0] perf_issue_unit_stalls_per_cycle;
|
||||
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_stalls_r;
|
||||
|
||||
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||
always @(*) begin
|
||||
always @(*) begin
|
||||
perf_issue_unit_stalls_per_cycle[i] = '0;
|
||||
if (operands_if[i].valid && ~operands_if[i].ready) begin
|
||||
perf_issue_unit_stalls_per_cycle[i][operands_if[i].data.ex_type] = 1;
|
||||
|
@ -111,7 +111,7 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
for (genvar i=0; i < `NUM_EX_UNITS; ++i) begin
|
||||
assign perf_stalls[i] = perf_stalls_r[i];
|
||||
end
|
||||
|
@ -121,14 +121,17 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
|||
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (operands_if[i].valid && operands_if[i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-issue: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, wis_to_wid(operands_if[i].data.wis, i), operands_if[i].data.PC));
|
||||
`TRACE(1, ("%d: core%0d-issue: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, wis_to_wid(operands_if[i].data.wis, i), {operands_if[i].data.PC, 1'b0}));
|
||||
trace_ex_type(1, operands_if[i].data.ex_type);
|
||||
`TRACE(1, (", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if[i].data.op_mod, operands_if[i].data.tmask, operands_if[i].data.wb, operands_if[i].data.rd));
|
||||
`TRACE(1, (", op="));
|
||||
trace_ex_op(1, operands_if[i].data.ex_type, operands_if[i].data.op_type, operands_if[i].data.op_mod);
|
||||
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if[i].data.tmask, operands_if[i].data.wb, operands_if[i].data.rd));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs1_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs2_data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs2_data, `NUM_THREADS);
|
||||
`TRACE(1, (", rs3_data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs3_data, `NUM_THREADS);
|
||||
trace_op_mod(1, operands_if[i].data.ex_type, operands_if[i].data.op_type, operands_if[i].data.op_mod);
|
||||
`TRACE(1, (" (#%0d)\n", operands_if[i].data.uuid));
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -13,16 +13,16 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
||||
module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
||||
parameter BLOCK_SIZE = 1,
|
||||
parameter NUM_LANES = 1,
|
||||
parameter OUT_BUF = 0,
|
||||
parameter MAX_FANOUT = `MAX_FANOUT
|
||||
) (
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
// inputs
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
|
||||
// outputs
|
||||
|
@ -30,7 +30,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
);
|
||||
`STATIC_ASSERT (`IS_DIVISBLE(`ISSUE_WIDTH, BLOCK_SIZE), ("invalid parameter"))
|
||||
`STATIC_ASSERT (`IS_DIVISBLE(`NUM_THREADS, NUM_LANES), ("invalid parameter"))
|
||||
`STATIC_ASSERT (`IS_DIVISBLE(`NUM_THREADS, NUM_LANES), ("invalid parameter"))
|
||||
localparam BLOCK_SIZE_W = `LOG2UP(BLOCK_SIZE);
|
||||
localparam NUM_PACKETS = `NUM_THREADS / NUM_LANES;
|
||||
localparam PID_BITS = `CLOG2(NUM_PACKETS);
|
||||
|
@ -38,8 +38,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
localparam BATCH_COUNT = `ISSUE_WIDTH / BLOCK_SIZE;
|
||||
localparam BATCH_COUNT_W= `LOG2UP(BATCH_COUNT);
|
||||
localparam ISSUE_W = `LOG2UP(`ISSUE_WIDTH);
|
||||
localparam IN_DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + `NT_WIDTH + (3 * `NUM_THREADS * `XLEN);
|
||||
localparam OUT_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
|
||||
localparam IN_DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_MOD_BITS + 1 + `PC_BITS + `NR_BITS + `NT_WIDTH + (3 * `NUM_THREADS * `XLEN);
|
||||
localparam OUT_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `INST_OP_BITS + `INST_MOD_BITS + 1 + `PC_BITS + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
|
||||
localparam FANOUT_ENABLE= (`NUM_THREADS > MAX_FANOUT);
|
||||
|
||||
localparam DATA_TMASK_OFF = IN_DATAW - (`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS);
|
||||
|
@ -54,7 +54,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
assign dispatch_data[i] = dispatch_if[i].data;
|
||||
assign dispatch_if[i].ready = dispatch_ready[i];
|
||||
end
|
||||
|
||||
|
||||
wire [BLOCK_SIZE-1:0][ISSUE_W-1:0] issue_indices;
|
||||
wire [BLOCK_SIZE-1:0] block_ready;
|
||||
wire [BLOCK_SIZE-1:0][NUM_LANES-1:0] block_tmask;
|
||||
|
@ -65,7 +65,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
wire [BLOCK_SIZE-1:0] block_done;
|
||||
|
||||
wire batch_done = (& block_done);
|
||||
|
||||
|
||||
logic [BATCH_COUNT_W-1:0] batch_idx;
|
||||
if (BATCH_COUNT != 1) begin
|
||||
always @(posedge clk) begin
|
||||
|
@ -79,7 +79,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
assign batch_idx = 0;
|
||||
`UNUSED_VAR (batch_done)
|
||||
end
|
||||
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
|
||||
wire [ISSUE_W-1:0] issue_idx = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx);
|
||||
|
@ -115,7 +115,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
wire [NUM_PACKETS-1:0][NUM_LANES-1:0] per_packet_tmask;
|
||||
wire [NUM_PACKETS-1:0][2:0][NUM_LANES-1:0][`XLEN-1:0] per_packet_regs;
|
||||
wire [NUM_PACKETS-1:0][2:0][NUM_LANES-1:0][`XLEN-1:0] per_packet_regs;
|
||||
|
||||
wire [`NUM_THREADS-1:0] dispatch_tmask = dispatch_data[issue_idx][DATA_TMASK_OFF +: `NUM_THREADS];
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs1_data = dispatch_data[issue_idx][DATA_REGS_OFF + 2 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN];
|
||||
|
@ -135,7 +135,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
wire [NUM_PACKETS-1:0] packet_valids;
|
||||
wire [NUM_PACKETS-1:0][PID_WIDTH-1:0] packet_ids;
|
||||
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin
|
||||
for (genvar i = 0; i < NUM_PACKETS; ++i) begin
|
||||
assign packet_valids[i] = (| per_packet_tmask[i]);
|
||||
assign packet_ids[i] = PID_WIDTH'(i);
|
||||
end
|
||||
|
@ -144,7 +144,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
.N (NUM_PACKETS),
|
||||
.DATAW (PID_WIDTH),
|
||||
.REVERSE (0)
|
||||
) find_first (
|
||||
) find_first (
|
||||
.valid_in (packet_valids & ~sent_mask_p),
|
||||
.data_in (packet_ids),
|
||||
.data_out (start_p_n),
|
||||
|
@ -155,12 +155,12 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
.N (NUM_PACKETS),
|
||||
.DATAW (PID_WIDTH),
|
||||
.REVERSE (1)
|
||||
) find_last (
|
||||
) find_last (
|
||||
.valid_in (packet_valids),
|
||||
.data_in (packet_ids),
|
||||
.data_out (end_p),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + PID_WIDTH),
|
||||
|
@ -172,14 +172,14 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
.enable (1'b1),
|
||||
.data_in ({dispatch_valid[issue_idx], start_p_n}),
|
||||
.data_out ({dispatch_valid_r, start_p})
|
||||
);
|
||||
);
|
||||
|
||||
wire [NUM_LANES-1:0] tmask_p = per_packet_tmask[start_p];
|
||||
wire [2:0][NUM_LANES-1:0][`XLEN-1:0] regs_p = per_packet_regs[start_p];
|
||||
|
||||
wire block_enable = (BATCH_COUNT == 1 || ~(& sent_mask_p));
|
||||
|
||||
assign valid_p = dispatch_valid_r && block_enable;
|
||||
|
||||
assign valid_p = dispatch_valid_r && block_enable;
|
||||
assign block_tmask[block_idx] = tmask_p;
|
||||
assign block_regs[block_idx] = regs_p;
|
||||
assign block_pid[block_idx] = start_p;
|
||||
|
@ -228,14 +228,14 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
.reset (buf_out_reset),
|
||||
.valid_in (valid_p),
|
||||
.ready_in (ready_p),
|
||||
.data_in ({
|
||||
.data_in ({
|
||||
dispatch_data[issue_idx][IN_DATAW-1 : DATA_TMASK_OFF+`NUM_THREADS+ISSUE_WIS_W],
|
||||
block_wid,
|
||||
block_tmask[block_idx],
|
||||
dispatch_data[issue_idx][DATA_TMASK_OFF-1 : DATA_REGS_OFF + 3 * `NUM_THREADS * `XLEN],
|
||||
block_regs[block_idx][0],
|
||||
block_regs[block_idx][1],
|
||||
block_regs[block_idx][2],
|
||||
block_regs[block_idx][2],
|
||||
block_pid[block_idx],
|
||||
block_sop[block_idx],
|
||||
block_eop[block_idx]}),
|
||||
|
@ -252,6 +252,6 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
ready_in[issue_indices[i]] = block_ready[i] && block_eop[i];
|
||||
end
|
||||
end
|
||||
assign dispatch_ready = ready_in;
|
||||
assign dispatch_ready = ready_in;
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -18,32 +18,32 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if.slave mem_perf_if,
|
||||
VX_pipeline_perf_if.slave pipeline_perf_if,
|
||||
`endif
|
||||
`endif
|
||||
|
||||
input base_dcrs_t base_dcrs,
|
||||
|
||||
// Dcache interface
|
||||
VX_lsu_mem_if.master lsu_mem_if [`NUM_LSU_BLOCKS],
|
||||
|
||||
|
||||
// dispatch interface
|
||||
VX_dispatch_if.slave dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
|
||||
// commit interface
|
||||
VX_commit_if.master commit_if [`NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
|
||||
|
||||
// scheduler interfaces
|
||||
VX_sched_csr_if.slave sched_csr_if,
|
||||
VX_branch_ctl_if.master branch_ctl_if [`NUM_ALU_BLOCKS],
|
||||
VX_warp_ctl_if.master warp_ctl_if,
|
||||
|
||||
// commit interface
|
||||
VX_commit_csr_if.slave commit_csr_if,
|
||||
VX_commit_csr_if.slave commit_csr_if,
|
||||
|
||||
// simulation helper signals
|
||||
output wire sim_ebreak
|
||||
|
@ -56,13 +56,13 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (alu_reset, reset);
|
||||
`RESET_RELAY (lsu_reset, reset);
|
||||
`RESET_RELAY (sfu_reset, reset);
|
||||
|
||||
|
||||
VX_alu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) alu_unit (
|
||||
.clk (clk),
|
||||
.reset (alu_reset),
|
||||
.dispatch_if (dispatch_if[`EX_ALU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.dispatch_if (dispatch_if[`EX_ALU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.commit_if (commit_if[`EX_ALU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.branch_ctl_if (branch_ctl_if)
|
||||
);
|
||||
|
@ -76,7 +76,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
.clk (clk),
|
||||
.reset (lsu_reset),
|
||||
.dispatch_if (dispatch_if[`EX_LSU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.commit_if (commit_if[`EX_LSU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.commit_if (commit_if[`EX_LSU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.lsu_mem_if (lsu_mem_if)
|
||||
);
|
||||
|
||||
|
@ -87,8 +87,8 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
.CORE_ID (CORE_ID)
|
||||
) fpu_unit (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.dispatch_if (dispatch_if[`EX_FPU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.reset (fpu_reset),
|
||||
.dispatch_if (dispatch_if[`EX_FPU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.commit_if (commit_if[`EX_FPU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.fpu_csr_if (fpu_csr_if)
|
||||
);
|
||||
|
@ -104,20 +104,20 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
.pipeline_perf_if (pipeline_perf_if),
|
||||
`endif
|
||||
.base_dcrs (base_dcrs),
|
||||
.dispatch_if (dispatch_if[`EX_SFU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.dispatch_if (dispatch_if[`EX_SFU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
.commit_if (commit_if[`EX_SFU * `ISSUE_WIDTH +: `ISSUE_WIDTH]),
|
||||
`ifdef EXT_F_ENABLE
|
||||
.fpu_csr_if (fpu_csr_if),
|
||||
`endif
|
||||
`endif
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.sched_csr_if (sched_csr_if),
|
||||
.sched_csr_if (sched_csr_if),
|
||||
.warp_ctl_if (warp_ctl_if)
|
||||
);
|
||||
|
||||
// simulation helper signal to get RISC-V tests Pass/Fail status
|
||||
assign sim_ebreak = dispatch_if[0].valid && dispatch_if[0].ready
|
||||
assign sim_ebreak = dispatch_if[0].valid && dispatch_if[0].ready
|
||||
&& dispatch_if[0].data.wis == 0
|
||||
&& `INST_ALU_IS_BR(dispatch_if[0].data.op_mod)
|
||||
&& (dispatch_if[0].data.op_mod.alu.xtype == `ALU_TYPE_BRANCH)
|
||||
&& (`INST_BR_BITS'(dispatch_if[0].data.op_type) == `INST_BR_EBREAK
|
||||
|| `INST_BR_BITS'(dispatch_if[0].data.op_type) == `INST_BR_ECALL);
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -23,7 +23,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
|
||||
// Icache interface
|
||||
VX_mem_bus_if.master icache_bus_if,
|
||||
|
||||
|
||||
// inputs
|
||||
VX_schedule_if.slave schedule_if,
|
||||
|
||||
|
@ -39,25 +39,25 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
wire icache_req_ready;
|
||||
|
||||
wire [`UUID_WIDTH-1:0] rsp_uuid;
|
||||
wire [`NW_WIDTH-1:0] req_tag, rsp_tag;
|
||||
wire [`NW_WIDTH-1:0] req_tag, rsp_tag;
|
||||
|
||||
wire icache_req_fire = icache_req_valid && icache_req_ready;
|
||||
|
||||
assign req_tag = schedule_if.data.wid;
|
||||
|
||||
|
||||
assign {rsp_uuid, rsp_tag} = icache_bus_if.rsp_data.tag;
|
||||
|
||||
wire [`XLEN-1:0] rsp_PC;
|
||||
wire [`PC_BITS-1:0] rsp_PC;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (`XLEN + `NUM_THREADS),
|
||||
.DATAW (`PC_BITS + `NUM_THREADS),
|
||||
.SIZE (`NUM_WARPS),
|
||||
.LUTRAM (1)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (icache_req_fire),
|
||||
.write (icache_req_fire),
|
||||
`UNUSED_PIN (wren),
|
||||
.waddr (req_tag),
|
||||
.wdata ({schedule_if.data.PC, schedule_if.data.tmask}),
|
||||
|
@ -71,7 +71,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
// This issue is particularly prevalent when the icache and dcache are disabled and both requests share the same bus.
|
||||
wire [`NUM_WARPS-1:0] pending_ibuf_full;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
VX_pending_size #(
|
||||
VX_pending_size #(
|
||||
.SIZE (`IBUF_SIZE)
|
||||
) pending_reads (
|
||||
.clk (clk),
|
||||
|
@ -88,13 +88,13 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
wire ibuf_ready = 1'b1;
|
||||
`endif
|
||||
|
||||
`RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0),
|
||||
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, schedule_if.data.PC, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
|
||||
`RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0),
|
||||
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, {schedule_if.data.PC, 1'b0}, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
|
||||
|
||||
// Icache Request
|
||||
|
||||
|
||||
assign icache_req_valid = schedule_if.valid && ibuf_ready;
|
||||
assign icache_req_addr = schedule_if.data.PC[`MEM_ADDR_WIDTH-1:2];
|
||||
assign icache_req_addr = schedule_if.data.PC[1 +: ICACHE_ADDR_WIDTH];
|
||||
assign icache_req_tag = {schedule_if.data.uuid, req_tag};
|
||||
assign schedule_if.ready = icache_req_ready && ibuf_ready;
|
||||
|
||||
|
@ -116,7 +116,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
assign icache_bus_if.req_data.atype = '0;
|
||||
assign icache_bus_if.req_data.rw = 0;
|
||||
assign icache_bus_if.req_data.byteen = 4'b1111;
|
||||
assign icache_bus_if.req_data.data = '0;
|
||||
assign icache_bus_if.req_data.data = '0;
|
||||
|
||||
// Icache Response
|
||||
|
||||
|
@ -136,7 +136,9 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
VX_scope_tap #(
|
||||
.SCOPE_ID (1),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (3*`UUID_WIDTH + 108)
|
||||
.PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS +
|
||||
ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH +
|
||||
(ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
|
@ -160,7 +162,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
`ifdef CHIPSCOPE
|
||||
ila_fetch ila_fetch_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({reset, schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, schedule_if.ready, schedule_if.valid}),
|
||||
.probe0 ({reset, schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, schedule_if.ready, schedule_if.valid}),
|
||||
.probe1 ({icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, icache_bus_if.req_ready, icache_bus_if.req_valid}),
|
||||
.probe2 ({icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag, icache_bus_if.rsp_ready, icache_bus_if.rsp_valid})
|
||||
);
|
||||
|
@ -175,10 +177,10 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
wire fetch_fire = fetch_if.valid && fetch_if.ready;
|
||||
always @(posedge clk) begin
|
||||
if (schedule_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, CORE_ID, schedule_if.data.wid, schedule_if.data.PC, schedule_if.data.tmask, schedule_if.data.uuid));
|
||||
`TRACE(1, ("%d: I$%0d req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, CORE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid));
|
||||
end
|
||||
if (fetch_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, CORE_ID, fetch_if.data.wid, fetch_if.data.PC, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
|
||||
`TRACE(1, ("%d: I$%0d rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, CORE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -23,7 +23,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_fpu_csr_if.master fpu_csr_if[`NUM_FPU_BLOCKS]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
@ -56,14 +56,12 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
`UNUSED_VAR (per_block_execute_if[block_idx].data.tid)
|
||||
`UNUSED_VAR (per_block_execute_if[block_idx].data.wb)
|
||||
`UNUSED_VAR (per_block_execute_if[block_idx].data.use_PC)
|
||||
`UNUSED_VAR (per_block_execute_if[block_idx].data.use_imm)
|
||||
|
||||
`RESET_RELAY (block_reset, reset);
|
||||
|
||||
// Store request info
|
||||
wire fpu_req_valid, fpu_req_ready;
|
||||
wire fpu_rsp_valid, fpu_rsp_ready;
|
||||
wire fpu_rsp_valid, fpu_rsp_ready;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] fpu_rsp_result;
|
||||
fflags_t fpu_rsp_fflags;
|
||||
wire fpu_rsp_has_fflags;
|
||||
|
@ -71,41 +69,41 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
wire [`UUID_WIDTH-1:0] fpu_rsp_uuid;
|
||||
wire [`NW_WIDTH-1:0] fpu_rsp_wid;
|
||||
wire [NUM_LANES-1:0] fpu_rsp_tmask;
|
||||
wire [`XLEN-1:0] fpu_rsp_PC;
|
||||
wire [`PC_BITS-1:0] fpu_rsp_PC;
|
||||
wire [`NR_BITS-1:0] fpu_rsp_rd;
|
||||
wire [PID_WIDTH-1:0] fpu_rsp_pid;
|
||||
wire fpu_rsp_sop;
|
||||
wire fpu_rsp_eop;
|
||||
|
||||
wire [TAG_WIDTH-1:0] fpu_req_tag, fpu_rsp_tag;
|
||||
wire [TAG_WIDTH-1:0] fpu_req_tag, fpu_rsp_tag;
|
||||
wire mdata_full;
|
||||
|
||||
wire [`INST_FMT_BITS-1:0] fpu_fmt = per_block_execute_if[block_idx].data.imm[`INST_FMT_BITS-1:0];
|
||||
wire [`INST_FRM_BITS-1:0] fpu_frm = per_block_execute_if[block_idx].data.op_mod[`INST_FRM_BITS-1:0];
|
||||
wire [`INST_FMT_BITS-1:0] fpu_fmt = per_block_execute_if[block_idx].data.op_mod.fpu.fmt;
|
||||
wire [`INST_FRM_BITS-1:0] fpu_frm = per_block_execute_if[block_idx].data.op_mod.fpu.frm;
|
||||
|
||||
wire execute_fire = per_block_execute_if[block_idx].valid && per_block_execute_if[block_idx].ready;
|
||||
wire fpu_rsp_fire = fpu_rsp_valid && fpu_rsp_ready;
|
||||
|
||||
VX_index_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + PID_WIDTH + 1 + 1),
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + PID_WIDTH + 1 + 1),
|
||||
.SIZE (`FPUQ_SIZE)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.acquire_en (execute_fire),
|
||||
.write_addr (fpu_req_tag),
|
||||
.acquire_en (execute_fire),
|
||||
.write_addr (fpu_req_tag),
|
||||
.write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}),
|
||||
.read_data ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}),
|
||||
.read_addr (fpu_rsp_tag),
|
||||
.release_en (fpu_rsp_fire),
|
||||
.release_en (fpu_rsp_fire),
|
||||
.full (mdata_full),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
// resolve dynamic FRM from CSR
|
||||
wire [`INST_FRM_BITS-1:0] fpu_req_frm;
|
||||
// resolve dynamic FRM from CSR
|
||||
wire [`INST_FRM_BITS-1:0] fpu_req_frm;
|
||||
`ASSIGN_BLOCKED_WID (fpu_csr_if[block_idx].read_wid, per_block_execute_if[block_idx].data.wid, block_idx, `NUM_FPU_BLOCKS)
|
||||
assign fpu_req_frm = (per_block_execute_if[block_idx].data.op_type != `INST_FPU_MISC
|
||||
assign fpu_req_frm = (per_block_execute_if[block_idx].data.op_type != `INST_FPU_MISC
|
||||
&& fpu_frm == `INST_FRM_DYN) ? fpu_csr_if[block_idx].read_frm : fpu_frm;
|
||||
|
||||
// submit FPU request
|
||||
|
@ -113,7 +111,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
assign fpu_req_valid = per_block_execute_if[block_idx].valid && ~mdata_full;
|
||||
assign per_block_execute_if[block_idx].ready = fpu_req_ready && ~mdata_full;
|
||||
|
||||
`RESET_RELAY (fpu_reset, block_reset);
|
||||
`RESET_RELAY (fpu_reset, block_reset);
|
||||
|
||||
`ifdef FPU_DPI
|
||||
|
||||
|
@ -141,8 +139,8 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.has_fflags (fpu_rsp_has_fflags),
|
||||
.fflags (fpu_rsp_fflags),
|
||||
.tag_out (fpu_rsp_tag),
|
||||
.ready_out (fpu_rsp_ready)
|
||||
);
|
||||
.ready_out (fpu_rsp_ready)
|
||||
);
|
||||
|
||||
`elsif FPU_FPNEW
|
||||
|
||||
|
@ -152,7 +150,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) fpu_fpnew (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.reset (fpu_reset),
|
||||
|
||||
.valid_in (fpu_req_valid),
|
||||
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
||||
|
@ -161,16 +159,16 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.frm (fpu_req_frm),
|
||||
.dataa (per_block_execute_if[block_idx].data.rs1_data),
|
||||
.datab (per_block_execute_if[block_idx].data.rs2_data),
|
||||
.datac (per_block_execute_if[block_idx].data.rs3_data),
|
||||
.datac (per_block_execute_if[block_idx].data.rs3_data),
|
||||
.tag_in (fpu_req_tag),
|
||||
.ready_in (fpu_req_ready),
|
||||
|
||||
.valid_out (fpu_rsp_valid),
|
||||
.valid_out (fpu_rsp_valid),
|
||||
.result (fpu_rsp_result),
|
||||
.has_fflags (fpu_rsp_has_fflags),
|
||||
.fflags (fpu_rsp_fflags),
|
||||
.tag_out (fpu_rsp_tag),
|
||||
.ready_out (fpu_rsp_ready)
|
||||
.tag_out (fpu_rsp_tag),
|
||||
.ready_out (fpu_rsp_ready)
|
||||
);
|
||||
|
||||
`elsif FPU_DSP
|
||||
|
@ -181,7 +179,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) fpu_dsp (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
.reset (fpu_reset),
|
||||
|
||||
.valid_in (fpu_req_valid),
|
||||
.mask_in (per_block_execute_if[block_idx].data.tmask),
|
||||
|
@ -190,18 +188,18 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
.frm (fpu_req_frm),
|
||||
.dataa (per_block_execute_if[block_idx].data.rs1_data),
|
||||
.datab (per_block_execute_if[block_idx].data.rs2_data),
|
||||
.datac (per_block_execute_if[block_idx].data.rs3_data),
|
||||
.datac (per_block_execute_if[block_idx].data.rs3_data),
|
||||
.tag_in (fpu_req_tag),
|
||||
.ready_in (fpu_req_ready),
|
||||
|
||||
.valid_out (fpu_rsp_valid),
|
||||
.result (fpu_rsp_result),
|
||||
.valid_out (fpu_rsp_valid),
|
||||
.result (fpu_rsp_result),
|
||||
.has_fflags (fpu_rsp_has_fflags),
|
||||
.fflags (fpu_rsp_fflags),
|
||||
.tag_out (fpu_rsp_tag),
|
||||
.ready_out (fpu_rsp_ready)
|
||||
);
|
||||
|
||||
|
||||
`endif
|
||||
|
||||
// handle FPU response
|
||||
|
@ -221,7 +219,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
end else begin
|
||||
assign fpu_rsp_fflags_q = fpu_rsp_fflags;
|
||||
end
|
||||
|
||||
|
||||
assign fpu_csr_if[block_idx].write_enable = fpu_rsp_fire && fpu_rsp_eop && fpu_rsp_has_fflags;
|
||||
`ASSIGN_BLOCKED_WID (fpu_csr_if[block_idx].write_wid, fpu_rsp_wid, block_idx, `NUM_FPU_BLOCKS)
|
||||
assign fpu_csr_if[block_idx].write_fflags = fpu_rsp_fflags_q;
|
||||
|
@ -229,7 +227,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
// send response
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
|
||||
.SIZE (0)
|
||||
) rsp_buf (
|
||||
.clk (clk),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,13 +17,13 @@ module VX_gather_unit import VX_gpu_pkg::*; #(
|
|||
parameter BLOCK_SIZE = 1,
|
||||
parameter NUM_LANES = 1,
|
||||
parameter OUT_BUF = 0
|
||||
) (
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if.slave commit_in_if [BLOCK_SIZE],
|
||||
|
||||
|
||||
// outputs
|
||||
VX_commit_if.master commit_out_if [`ISSUE_WIDTH]
|
||||
|
||||
|
@ -33,7 +33,7 @@ module VX_gather_unit import VX_gpu_pkg::*; #(
|
|||
localparam BLOCK_SIZE_W = `LOG2UP(BLOCK_SIZE);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + 1 + `NR_BITS + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + 1 + `NR_BITS + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam DATA_WIS_OFF = DATAW - (`UUID_WIDTH + `NW_WIDTH);
|
||||
|
||||
wire [BLOCK_SIZE-1:0] commit_in_valid;
|
||||
|
@ -73,7 +73,7 @@ module VX_gather_unit import VX_gpu_pkg::*; #(
|
|||
for (genvar i = 0; i < BLOCK_SIZE; ++i) begin
|
||||
assign commit_in_ready[i] = commit_out_ready[commit_in_isw[i]];
|
||||
end
|
||||
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
|
@ -90,7 +90,7 @@ module VX_gather_unit import VX_gpu_pkg::*; #(
|
|||
.reset (commit_out_reset),
|
||||
.valid_in (commit_out_valid[i]),
|
||||
.ready_in (commit_out_ready[i]),
|
||||
.data_in (commit_out_data[i]),
|
||||
.data_in (commit_out_data[i]),
|
||||
.data_out (commit_tmp_if.data),
|
||||
.valid_out (commit_tmp_if.valid),
|
||||
.ready_out (commit_tmp_if.ready)
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -25,7 +25,7 @@ module VX_gpr_slice import VX_gpu_pkg::*; #(
|
|||
VX_operands_if.master operands_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `XLEN + 1 + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + `NR_BITS;
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + `NR_BITS;
|
||||
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * ISSUE_RATIO);
|
||||
|
||||
localparam STATE_IDLE = 2'd0;
|
||||
|
@ -58,7 +58,7 @@ module VX_gpr_slice import VX_gpu_pkg::*; #(
|
|||
reg data_ready, data_ready_n;
|
||||
|
||||
wire stg_valid_in, stg_ready_in;
|
||||
|
||||
|
||||
wire is_rs1_zero = (scoreboard_if.data.rs1 == 0);
|
||||
wire is_rs2_zero = (scoreboard_if.data.rs2 == 0);
|
||||
wire is_rs3_zero = (scoreboard_if.data.rs3 == 0);
|
||||
|
@ -87,8 +87,8 @@ module VX_gpr_slice import VX_gpu_pkg::*; #(
|
|||
end
|
||||
if (scoreboard_if.valid && data_ready_n == 0) begin
|
||||
data_ready_n = 1;
|
||||
if (is_rs3_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if.data.rs3 == cache_reg[scoreboard_if.data.wis] &&
|
||||
if (is_rs3_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if.data.rs3 == cache_reg[scoreboard_if.data.wis] &&
|
||||
(scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin
|
||||
rs3_data_n = (is_rs3_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis];
|
||||
rs3_ready_n = 1;
|
||||
|
@ -98,8 +98,8 @@ module VX_gpr_slice import VX_gpu_pkg::*; #(
|
|||
data_ready_n = 0;
|
||||
state_n = STATE_FETCH3;
|
||||
end
|
||||
if (is_rs2_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if.data.rs2 == cache_reg[scoreboard_if.data.wis] &&
|
||||
if (is_rs2_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if.data.rs2 == cache_reg[scoreboard_if.data.wis] &&
|
||||
(scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin
|
||||
rs2_data_n = (is_rs2_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis];
|
||||
rs2_ready_n = 1;
|
||||
|
@ -109,8 +109,8 @@ module VX_gpr_slice import VX_gpu_pkg::*; #(
|
|||
data_ready_n = 0;
|
||||
state_n = STATE_FETCH2;
|
||||
end
|
||||
if (is_rs1_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if.data.rs1 == cache_reg[scoreboard_if.data.wis] &&
|
||||
if (is_rs1_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if.data.rs1 == cache_reg[scoreboard_if.data.wis] &&
|
||||
(scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin
|
||||
rs1_data_n = (is_rs1_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis];
|
||||
end else begin
|
||||
|
@ -152,9 +152,9 @@ module VX_gpr_slice import VX_gpu_pkg::*; #(
|
|||
state_n = STATE_IDLE;
|
||||
end
|
||||
endcase
|
||||
|
||||
|
||||
if (CACHE_ENABLE != 0 && writeback_if.valid) begin
|
||||
if ((cache_reg[writeback_if.data.wis] == writeback_if.data.rd)
|
||||
if ((cache_reg[writeback_if.data.wis] == writeback_if.data.rd)
|
||||
|| (cache_eop[writeback_if.data.wis] && writeback_if.data.sop)) begin
|
||||
for (integer j = 0; j < `NUM_THREADS; ++j) begin
|
||||
if (writeback_if.data.tmask[j]) begin
|
||||
|
@ -163,14 +163,14 @@ module VX_gpr_slice import VX_gpu_pkg::*; #(
|
|||
end
|
||||
cache_reg_n[writeback_if.data.wis] = writeback_if.data.rd;
|
||||
cache_eop_n[writeback_if.data.wis] = writeback_if.data.eop;
|
||||
cache_tmask_n[writeback_if.data.wis] = writeback_if.data.sop ? writeback_if.data.tmask :
|
||||
cache_tmask_n[writeback_if.data.wis] = writeback_if.data.sop ? writeback_if.data.tmask :
|
||||
(cache_tmask_n[writeback_if.data.wis] | writeback_if.data.tmask);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
cache_eop <= {ISSUE_RATIO{1'b1}};
|
||||
data_ready <= 0;
|
||||
|
@ -187,14 +187,14 @@ module VX_gpr_slice import VX_gpu_pkg::*; #(
|
|||
rs3 <= rs3_n;
|
||||
rs1_data <= rs1_data_n;
|
||||
rs2_data <= rs2_data_n;
|
||||
rs3_data <= rs3_data_n;
|
||||
rs3_data <= rs3_data_n;
|
||||
cache_data <= cache_data_n;
|
||||
cache_reg <= cache_reg_n;
|
||||
cache_tmask <= cache_tmask_n;
|
||||
end
|
||||
|
||||
assign stg_valid_in = scoreboard_if.valid && data_ready;
|
||||
assign scoreboard_if.ready = stg_ready_in && data_ready;
|
||||
assign scoreboard_if.ready = stg_ready_in && data_ready;
|
||||
|
||||
VX_toggle_buffer #(
|
||||
.DATAW (DATAW)
|
||||
|
@ -206,14 +206,11 @@ module VX_gpr_slice import VX_gpu_pkg::*; #(
|
|||
scoreboard_if.data.uuid,
|
||||
scoreboard_if.data.wis,
|
||||
scoreboard_if.data.tmask,
|
||||
scoreboard_if.data.PC,
|
||||
scoreboard_if.data.PC,
|
||||
scoreboard_if.data.wb,
|
||||
scoreboard_if.data.ex_type,
|
||||
scoreboard_if.data.op_type,
|
||||
scoreboard_if.data.op_mod,
|
||||
scoreboard_if.data.use_PC,
|
||||
scoreboard_if.data.use_imm,
|
||||
scoreboard_if.data.imm,
|
||||
scoreboard_if.data.rd
|
||||
}),
|
||||
.ready_in (stg_ready_in),
|
||||
|
@ -222,14 +219,11 @@ module VX_gpr_slice import VX_gpu_pkg::*; #(
|
|||
operands_if.data.uuid,
|
||||
operands_if.data.wis,
|
||||
operands_if.data.tmask,
|
||||
operands_if.data.PC,
|
||||
operands_if.data.PC,
|
||||
operands_if.data.wb,
|
||||
operands_if.data.ex_type,
|
||||
operands_if.data.op_type,
|
||||
operands_if.data.op_mod,
|
||||
operands_if.data.use_PC,
|
||||
operands_if.data.use_imm,
|
||||
operands_if.data.imm,
|
||||
operands_if.data.rd
|
||||
}),
|
||||
.ready_out (operands_if.ready)
|
||||
|
@ -241,7 +235,7 @@ module VX_gpr_slice import VX_gpu_pkg::*; #(
|
|||
|
||||
// GPR banks
|
||||
|
||||
reg [RAM_ADDRW-1:0] gpr_rd_addr;
|
||||
reg [RAM_ADDRW-1:0] gpr_rd_addr;
|
||||
wire [RAM_ADDRW-1:0] gpr_wr_addr;
|
||||
if (ISSUE_WIS != 0) begin
|
||||
assign gpr_wr_addr = {writeback_if.data.wis, writeback_if.data.rd};
|
||||
|
@ -254,7 +248,7 @@ module VX_gpr_slice import VX_gpu_pkg::*; #(
|
|||
gpr_rd_addr <= gpr_rd_rid_n;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
`ifdef GPR_RESET
|
||||
reg wr_enabled = 0;
|
||||
always @(posedge clk) begin
|
||||
|
@ -281,7 +275,7 @@ module VX_gpr_slice import VX_gpu_pkg::*; #(
|
|||
.write (wr_enabled && writeback_if.valid && writeback_if.data.tmask[j]),
|
||||
`else
|
||||
.write (writeback_if.valid && writeback_if.data.tmask[j]),
|
||||
`endif
|
||||
`endif
|
||||
.waddr (gpr_wr_addr),
|
||||
.wdata (writeback_if.data.data[j]),
|
||||
.raddr (gpr_rd_addr),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -26,10 +26,10 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
|
|||
VX_ibuffer_if.master ibuffer_if [`NUM_WARPS]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `XLEN + 1 + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + (`NR_BITS * 4);
|
||||
|
||||
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + (`NR_BITS * 4);
|
||||
|
||||
wire [`NUM_WARPS-1:0] ibuf_ready_in;
|
||||
|
||||
|
||||
assign decode_if.ready = ibuf_ready_in[decode_if.data.wid];
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
@ -40,21 +40,18 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
|
|||
) instr_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (decode_if.valid && decode_if.data.wid == i),
|
||||
.valid_in (decode_if.valid && decode_if.data.wid == i),
|
||||
.data_in ({
|
||||
decode_if.data.uuid,
|
||||
decode_if.data.tmask,
|
||||
decode_if.data.PC,
|
||||
decode_if.data.ex_type,
|
||||
decode_if.data.op_type,
|
||||
decode_if.data.op_mod,
|
||||
decode_if.data.wb,
|
||||
decode_if.data.use_PC,
|
||||
decode_if.data.use_imm,
|
||||
decode_if.data.PC,
|
||||
decode_if.data.imm,
|
||||
decode_if.data.rd,
|
||||
decode_if.data.rs1,
|
||||
decode_if.data.rs2,
|
||||
decode_if.data.rd,
|
||||
decode_if.data.rs1,
|
||||
decode_if.data.rs2,
|
||||
decode_if.data.rs3}),
|
||||
.ready_in (ibuf_ready_in[i]),
|
||||
.valid_out(ibuffer_if[i].valid),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -43,7 +43,7 @@ module VX_issue #(
|
|||
.CORE_ID (CORE_ID)
|
||||
) ibuffer (
|
||||
.clk (clk),
|
||||
.reset (ibuf_reset),
|
||||
.reset (ibuf_reset),
|
||||
.decode_if (decode_if),
|
||||
.ibuffer_if (ibuffer_if)
|
||||
);
|
||||
|
@ -66,8 +66,8 @@ module VX_issue #(
|
|||
VX_operands #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) operands (
|
||||
.clk (clk),
|
||||
.reset (operands_reset),
|
||||
.clk (clk),
|
||||
.reset (operands_reset),
|
||||
.writeback_if (writeback_if),
|
||||
.scoreboard_if (scoreboard_if),
|
||||
.operands_if (operands_if)
|
||||
|
@ -76,14 +76,14 @@ module VX_issue #(
|
|||
VX_dispatch #(
|
||||
.CORE_ID (CORE_ID)
|
||||
) dispatch (
|
||||
.clk (clk),
|
||||
.clk (clk),
|
||||
.reset (dispatch_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
`UNUSED_PIN (perf_stalls),
|
||||
`endif
|
||||
.operands_if (operands_if),
|
||||
.dispatch_if (dispatch_if)
|
||||
);
|
||||
);
|
||||
|
||||
`ifdef DBG_SCOPE_ISSUE
|
||||
if (CORE_ID == 0) begin
|
||||
|
@ -94,8 +94,8 @@ module VX_issue #(
|
|||
VX_scope_tap #(
|
||||
.SCOPE_ID (2),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS +
|
||||
1 + `NR_BITS + `XLEN + 1 + 1 + (`NUM_THREADS * 3 * `XLEN) +
|
||||
.PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS +
|
||||
1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) +
|
||||
`UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
|
@ -103,9 +103,9 @@ module VX_issue #(
|
|||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({
|
||||
reset,
|
||||
reset,
|
||||
operands_if_fire,
|
||||
operands_if_not_ready,
|
||||
operands_if_not_ready,
|
||||
writeback_if_valid
|
||||
}),
|
||||
.probes({
|
||||
|
@ -113,12 +113,8 @@ module VX_issue #(
|
|||
operands_if[0].data.tmask,
|
||||
operands_if[0].data.ex_type,
|
||||
operands_if[0].data.op_type,
|
||||
operands_if[0].data.op_mod,
|
||||
operands_if[0].data.wb,
|
||||
operands_if[0].data.rd,
|
||||
operands_if[0].data.imm,
|
||||
operands_if[0].data.use_PC,
|
||||
operands_if[0].data.use_imm,
|
||||
operands_if[0].data.rs1_data,
|
||||
operands_if[0].data.rs2_data,
|
||||
operands_if[0].data.rs3_data,
|
||||
|
@ -131,7 +127,7 @@ module VX_issue #(
|
|||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_issue ila_issue_inst (
|
||||
.clk (clk),
|
||||
|
@ -146,7 +142,7 @@ module VX_issue #(
|
|||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;
|
||||
|
||||
|
||||
wire decode_stall = decode_if.valid && ~decode_if.ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,7 +16,7 @@
|
|||
module VX_lsu_slice import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter BLOCK_ID = 0
|
||||
) (
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
|
@ -25,59 +25,57 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
// Inputs
|
||||
VX_execute_if.slave execute_if,
|
||||
|
||||
// Outputs
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if,
|
||||
VX_lsu_mem_if.master lsu_mem_if
|
||||
);
|
||||
localparam NUM_LANES = `NUM_LSU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam LSUQ_SIZEW = `LOG2UP(`LSUQ_IN_SIZE);
|
||||
localparam REQ_ASHIFT = `CLOG2(LSU_WORD_SIZE);
|
||||
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
|
||||
localparam MEM_ADDRW = `MEM_ADDR_WIDTH - MEM_ASHIFT;
|
||||
|
||||
// tag_id = wid + PC + rd + op_type + align + pid + pkt_addr
|
||||
localparam TAG_ID_WIDTH = `NW_WIDTH + `XLEN + `NR_BITS + `INST_LSU_BITS + (NUM_LANES * REQ_ASHIFT) + PID_WIDTH + LSUQ_SIZEW;
|
||||
// tag_id = wid + PC + rd + op_type + align + pid + pkt_addr
|
||||
localparam TAG_ID_WIDTH = `NW_WIDTH + `PC_BITS + `NR_BITS + `INST_LSU_BITS + (NUM_LANES * REQ_ASHIFT) + PID_WIDTH + LSUQ_SIZEW;
|
||||
|
||||
// tag = uuid + tag_id
|
||||
// tag = uuid + tag_id
|
||||
localparam TAG_WIDTH = `UUID_WIDTH + TAG_ID_WIDTH;
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_st_if();
|
||||
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) commit_ld_if();
|
||||
|
||||
`UNUSED_VAR (execute_if.data.op_mod)
|
||||
`UNUSED_VAR (execute_if.data.use_PC)
|
||||
`UNUSED_VAR (execute_if.data.use_imm)
|
||||
`UNUSED_VAR (execute_if.data.op_mod)
|
||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||
`UNUSED_VAR (execute_if.data.tid)
|
||||
|
||||
// full address calculation
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] full_addr;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] full_addr;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign full_addr[i] = execute_if.data.rs1_data[i][`XLEN-1:0] + execute_if.data.imm;
|
||||
assign full_addr[i] = execute_if.data.rs1_data[i] + `SEXT(`XLEN, execute_if.data.op_mod.lsu.offset);
|
||||
end
|
||||
|
||||
// address type calculation
|
||||
|
||||
wire [NUM_LANES-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_atype;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [MEM_ADDRW-1:0] block_addr = full_addr[i][MEM_ASHIFT +: MEM_ADDRW];
|
||||
wire [MEM_ADDRW-1:0] block_addr = full_addr[i][MEM_ASHIFT +: MEM_ADDRW];
|
||||
// is I/O address
|
||||
wire [MEM_ADDRW-1:0] io_addr_start = MEM_ADDRW'(`XLEN'(`IO_BASE_ADDR) >> MEM_ASHIFT);
|
||||
assign mem_req_atype[i][`ADDR_TYPE_IO] = (block_addr >= io_addr_start);
|
||||
`ifdef LMEM_ENABLE
|
||||
// is local memory address
|
||||
// is local memory address
|
||||
wire [MEM_ADDRW-1:0] lmem_addr_start = MEM_ADDRW'(`XLEN'(`LMEM_BASE_ADDR) >> MEM_ASHIFT);
|
||||
wire [MEM_ADDRW-1:0] lmem_addr_end = MEM_ADDRW'((`XLEN'(`LMEM_BASE_ADDR) + `XLEN'(1 << `LMEM_LOG_SIZE)) >> MEM_ASHIFT);
|
||||
assign mem_req_atype[i][`ADDR_TYPE_LOCAL] = (block_addr >= lmem_addr_start) && (block_addr < lmem_addr_end);
|
||||
assign mem_req_atype[i][`ADDR_TYPE_LOCAL] = (block_addr >= lmem_addr_start) && (block_addr < lmem_addr_end);
|
||||
`endif
|
||||
end
|
||||
|
||||
|
@ -88,18 +86,18 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
// fence: stall the pipeline until all pending requests are sent
|
||||
wire is_fence = `INST_LSU_IS_FENCE(execute_if.data.op_type);
|
||||
wire fence_wait = is_fence && ~mem_req_empty;
|
||||
|
||||
|
||||
assign lsu_valid = execute_if.valid && ~fence_wait;
|
||||
assign execute_if.ready = lsu_ready && ~fence_wait;
|
||||
|
||||
// schedule memory request
|
||||
// schedule memory request
|
||||
|
||||
wire mem_req_valid;
|
||||
wire [NUM_LANES-1:0] mem_req_mask;
|
||||
wire mem_req_rw;
|
||||
wire mem_req_rw;
|
||||
wire [NUM_LANES-1:0][LSU_ADDR_WIDTH-1:0] mem_req_addr;
|
||||
reg [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] mem_req_byteen;
|
||||
reg [NUM_LANES-1:0][LSU_WORD_SIZE*8-1:0] mem_req_data;
|
||||
reg [NUM_LANES-1:0][LSU_WORD_SIZE*8-1:0] mem_req_data;
|
||||
wire [TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire mem_req_ready;
|
||||
|
||||
|
@ -112,11 +110,11 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
wire mem_rsp_ready;
|
||||
|
||||
assign mem_req_valid = lsu_valid;
|
||||
assign lsu_ready = mem_req_ready
|
||||
assign lsu_ready = mem_req_ready
|
||||
&& (~mem_req_rw || st_rsp_ready); // writes commit directly
|
||||
|
||||
assign mem_req_mask = execute_if.data.tmask;
|
||||
assign mem_req_rw = ~execute_if.data.wb;
|
||||
assign mem_req_rw = ~execute_if.data.wb;
|
||||
|
||||
wire mem_req_fire = mem_req_valid && mem_req_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
|
@ -127,7 +125,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [NUM_LANES-1:0][REQ_ASHIFT-1:0] req_align;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign req_align[i] = full_addr[i][REQ_ASHIFT-1:0];
|
||||
assign mem_req_addr[i] = full_addr[i][`MEM_ADDR_WIDTH-1:REQ_ASHIFT];
|
||||
end
|
||||
|
@ -137,7 +135,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
always @(*) begin
|
||||
mem_req_byteen[i] = '0;
|
||||
case (`INST_LSU_WSIZE(execute_if.data.op_type))
|
||||
0: begin // 8-bit
|
||||
0: begin // 8-bit
|
||||
mem_req_byteen[i][req_align[i]] = 1'b1;
|
||||
end
|
||||
1: begin // 16 bit
|
||||
|
@ -159,10 +157,10 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
|
||||
// memory misalignment not supported!
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire lsu_req_fire = execute_if.valid && execute_if.ready;
|
||||
`RUNTIME_ASSERT((~lsu_req_fire || ~execute_if.data.tmask[i] || is_fence || (full_addr[i] % (1 << `INST_LSU_WSIZE(execute_if.data.op_type))) == 0),
|
||||
("misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)",
|
||||
execute_if.data.wid, execute_if.data.PC, full_addr[i], `INST_LSU_WSIZE(execute_if.data.op_type), execute_if.data.uuid));
|
||||
wire lsu_req_fire = execute_if.valid && execute_if.ready;
|
||||
`RUNTIME_ASSERT((~lsu_req_fire || ~execute_if.data.tmask[i] || is_fence || (full_addr[i] % (1 << `INST_LSU_WSIZE(execute_if.data.op_type))) == 0),
|
||||
("misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)",
|
||||
execute_if.data.wid, {execute_if.data.PC, 1'b0}, full_addr[i], `INST_LSU_WSIZE(execute_if.data.op_type), execute_if.data.uuid));
|
||||
end
|
||||
|
||||
// store data formatting
|
||||
|
@ -184,7 +182,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
// track SOP/EOP for out-of-order memory responses
|
||||
// track SOP/EOP for out-of-order memory responses
|
||||
|
||||
wire [LSUQ_SIZEW-1:0] pkt_waddr, pkt_raddr;
|
||||
wire mem_rsp_sop_pkt, mem_rsp_eop_pkt;
|
||||
|
@ -198,7 +196,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
wire mem_req_rd_eop_fire = mem_req_rd_fire && execute_if.data.eop;
|
||||
wire mem_rsp_eop_fire = mem_rsp_fire && mem_rsp_eop;
|
||||
wire full;
|
||||
|
||||
|
||||
VX_allocator #(
|
||||
.SIZE (`LSUQ_IN_SIZE)
|
||||
) pkt_allocator (
|
||||
|
@ -215,7 +213,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
wire rd_during_wr = mem_req_rd_fire && mem_rsp_eop_fire && (pkt_raddr == pkt_waddr);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
if (reset) begin
|
||||
pkt_ctr <= '0;
|
||||
pkt_sop <= '0;
|
||||
pkt_eop <= '0;
|
||||
|
@ -250,19 +248,19 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
`UNUSED_VAR (mem_rsp_sop)
|
||||
end else begin
|
||||
assign pkt_waddr = 0;
|
||||
assign mem_rsp_sop_pkt = mem_rsp_sop;
|
||||
assign mem_rsp_sop_pkt = mem_rsp_sop;
|
||||
assign mem_rsp_eop_pkt = mem_rsp_eop;
|
||||
`UNUSED_VAR (pkt_raddr)
|
||||
end
|
||||
|
||||
// pack memory request tag
|
||||
assign mem_req_tag = {
|
||||
execute_if.data.uuid,
|
||||
execute_if.data.uuid,
|
||||
execute_if.data.wid,
|
||||
execute_if.data.PC,
|
||||
execute_if.data.rd,
|
||||
execute_if.data.op_type,
|
||||
req_align, execute_if.data.pid,
|
||||
execute_if.data.PC,
|
||||
execute_if.data.rd,
|
||||
execute_if.data.op_type,
|
||||
req_align, execute_if.data.pid,
|
||||
pkt_waddr
|
||||
};
|
||||
|
||||
|
@ -287,12 +285,12 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
VX_mem_scheduler #(
|
||||
.INSTANCE_ID ($sformatf("core%0d-lsu-memsched%0d", CORE_ID, BLOCK_ID)),
|
||||
.CORE_REQS (NUM_LANES),
|
||||
.MEM_CHANNELS(NUM_LANES),
|
||||
.MEM_CHANNELS(NUM_LANES),
|
||||
.WORD_SIZE (LSU_WORD_SIZE),
|
||||
.LINE_SIZE (LSU_WORD_SIZE),
|
||||
.ADDR_WIDTH (LSU_ADDR_WIDTH),
|
||||
.ATYPE_WIDTH (`ADDR_TYPE_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_QUEUE_SIZE (`LSUQ_IN_SIZE),
|
||||
.MEM_QUEUE_SIZE (`LSUQ_OUT_SIZE),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
|
@ -310,12 +308,12 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
.core_req_byteen(mem_req_byteen),
|
||||
.core_req_addr (mem_req_addr),
|
||||
.core_req_atype (mem_req_atype),
|
||||
.core_req_data (mem_req_data),
|
||||
.core_req_data (mem_req_data),
|
||||
.core_req_tag (mem_req_tag),
|
||||
.core_req_ready (mem_req_ready),
|
||||
.core_req_ready (mem_req_ready),
|
||||
.core_req_empty (mem_req_empty),
|
||||
`UNUSED_PIN (core_req_sent),
|
||||
|
||||
`UNUSED_PIN (core_req_sent),
|
||||
|
||||
// Output response
|
||||
.core_rsp_valid (mem_rsp_valid),
|
||||
.core_rsp_mask (mem_rsp_mask),
|
||||
|
@ -359,10 +357,10 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
assign lsu_mem_rsp_data = lsu_mem_if.rsp_data.data;
|
||||
assign lsu_mem_rsp_tag = lsu_mem_if.rsp_data.tag;
|
||||
assign lsu_mem_if.rsp_ready = lsu_mem_rsp_ready;
|
||||
|
||||
|
||||
wire [`UUID_WIDTH-1:0] rsp_uuid;
|
||||
wire [`NW_WIDTH-1:0] rsp_wid;
|
||||
wire [`XLEN-1:0] rsp_pc;
|
||||
wire [`PC_BITS-1:0] rsp_pc;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire [`INST_LSU_BITS-1:0] rsp_op_type;
|
||||
wire [NUM_LANES-1:0][REQ_ASHIFT-1:0] rsp_align;
|
||||
|
@ -371,12 +369,12 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
|
||||
// unpack memory response tag
|
||||
assign {
|
||||
rsp_uuid,
|
||||
rsp_uuid,
|
||||
rsp_wid,
|
||||
rsp_pc, rsp_rd,
|
||||
rsp_op_type,
|
||||
rsp_align,
|
||||
rsp_pid,
|
||||
rsp_pc, rsp_rd,
|
||||
rsp_op_type,
|
||||
rsp_align,
|
||||
rsp_pid,
|
||||
pkt_raddr
|
||||
} = mem_rsp_tag;
|
||||
|
||||
|
@ -399,7 +397,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
wire [31:0] rsp_data32 = (rsp_align[i][2] ? mem_rsp_data[i][63:32] : mem_rsp_data[i][31:0]);
|
||||
`else
|
||||
wire [31:0] rsp_data32 = mem_rsp_data[i];
|
||||
`endif
|
||||
`endif
|
||||
wire [15:0] rsp_data16 = rsp_align[i][1] ? rsp_data32[31:16] : rsp_data32[15:0];
|
||||
wire [7:0] rsp_data8 = rsp_align[i][0] ? rsp_data16[15:8] : rsp_data16[7:0];
|
||||
|
||||
|
@ -409,7 +407,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
`INST_FMT_H: rsp_data[i] = `XLEN'(signed'(rsp_data16));
|
||||
`INST_FMT_BU: rsp_data[i] = `XLEN'(unsigned'(rsp_data8));
|
||||
`INST_FMT_HU: rsp_data[i] = `XLEN'(unsigned'(rsp_data16));
|
||||
`ifdef XLEN_64
|
||||
`ifdef XLEN_64
|
||||
`INST_FMT_W: rsp_data[i] = rsp_is_float ? (`XLEN'(rsp_data32) | 64'hffffffff00000000) : `XLEN'(signed'(rsp_data32));
|
||||
`INST_FMT_WU: rsp_data[i] = `XLEN'(unsigned'(rsp_data32));
|
||||
`INST_FMT_D: rsp_data[i] = `XLEN'(signed'(rsp_data64));
|
||||
|
@ -418,13 +416,13 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
default: rsp_data[i] = 'x;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// load commit
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
|
||||
.SIZE (2)
|
||||
) ld_rsp_buf (
|
||||
.clk (clk),
|
||||
|
@ -442,7 +440,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
// store commit
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + PID_WIDTH + 1 + 1),
|
||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + PID_WIDTH + 1 + 1),
|
||||
.SIZE (2)
|
||||
) st_rsp_buf (
|
||||
.clk (clk),
|
||||
|
@ -471,19 +469,19 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
.ready_in ({commit_st_if.ready, commit_ld_if.ready}),
|
||||
.data_in ({commit_st_if.data, commit_ld_if.data}),
|
||||
.data_out (commit_if.data),
|
||||
.valid_out (commit_if.valid),
|
||||
.ready_out (commit_if.ready),
|
||||
.valid_out (commit_if.valid),
|
||||
.ready_out (commit_if.ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
`ifdef DBG_TRACE_MEM
|
||||
always @(posedge clk) begin
|
||||
always @(posedge clk) begin
|
||||
if (execute_if.valid && fence_wait) begin
|
||||
`TRACE(1, ("%d: *** D$%0d fence wait\n", $time, CORE_ID));
|
||||
end
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw) begin
|
||||
`TRACE(1, ("%d: D$%0d Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, execute_if.data.PC, mem_req_mask));
|
||||
`TRACE(1, ("%d: D$%0d Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
|
||||
|
@ -491,7 +489,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if.data.uuid));
|
||||
end else begin
|
||||
`TRACE(1, ("%d: D$%0d Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, execute_if.data.PC, mem_req_mask));
|
||||
`TRACE(1, ("%d: D$%0d Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
|
||||
|
@ -500,7 +498,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: D$%0d Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
|
||||
$time, CORE_ID, rsp_wid, rsp_pc, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop));
|
||||
$time, CORE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid));
|
||||
end
|
||||
|
@ -525,7 +523,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
`ifdef CHIPSCOPE
|
||||
wire [31:0] full_addr_0 = full_addr[0];
|
||||
wire [31:0] mem_req_data_0 = mem_req_data[0];
|
||||
wire [31:0] rsp_data_0 = rsp_data[0];
|
||||
|
@ -541,5 +539,5 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -48,11 +48,11 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
reg [`NUM_WARPS-1:0] stalled_warps, stalled_warps_n; // set when branch/gpgpu instructions are issued
|
||||
|
||||
reg [`NUM_WARPS-1:0][`NUM_THREADS-1:0] thread_masks, thread_masks_n;
|
||||
reg [`NUM_WARPS-1:0][`XLEN-1:0] warp_pcs, warp_pcs_n;
|
||||
reg [`NUM_WARPS-1:0][`PC_BITS-1:0] warp_pcs, warp_pcs_n;
|
||||
|
||||
wire [`NW_WIDTH-1:0] schedule_wid;
|
||||
wire [`NUM_THREADS-1:0] schedule_tmask;
|
||||
wire [`XLEN-1:0] schedule_pc;
|
||||
wire [`PC_BITS-1:0] schedule_pc;
|
||||
wire schedule_valid;
|
||||
wire schedule_ready;
|
||||
|
||||
|
@ -62,7 +62,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
wire join_is_else;
|
||||
wire [`NW_WIDTH-1:0] join_wid;
|
||||
wire [`NUM_THREADS-1:0] join_tmask;
|
||||
wire [`XLEN-1:0] join_pc;
|
||||
wire [`PC_BITS-1:0] join_pc;
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] cycles;
|
||||
|
||||
|
@ -75,7 +75,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
wire [`NUM_ALU_BLOCKS-1:0] branch_valid;
|
||||
wire [`NUM_ALU_BLOCKS-1:0][`NW_WIDTH-1:0] branch_wid;
|
||||
wire [`NUM_ALU_BLOCKS-1:0] branch_taken;
|
||||
wire [`NUM_ALU_BLOCKS-1:0][`XLEN-1:0] branch_dest;
|
||||
wire [`NUM_ALU_BLOCKS-1:0][`PC_BITS-1:0] branch_dest;
|
||||
for (genvar i = 0; i < `NUM_ALU_BLOCKS; ++i) begin
|
||||
assign branch_valid[i] = branch_ctl_if[i].valid;
|
||||
assign branch_wid[i] = branch_ctl_if[i].wid;
|
||||
|
@ -203,7 +203,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
|
||||
// advance PC
|
||||
if (schedule_if_fire) begin
|
||||
warp_pcs_n[schedule_if.data.wid] = schedule_if.data.PC + 4;
|
||||
warp_pcs_n[schedule_if.data.wid] = schedule_if.data.PC + 2;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -226,7 +226,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
wspawn.valid <= 0;
|
||||
|
||||
// activate first warp
|
||||
warp_pcs[0] <= base_dcrs.startup_addr;
|
||||
warp_pcs[0] <= base_dcrs.startup_addr[1 +: `PC_BITS];
|
||||
active_warps[0] <= 1;
|
||||
thread_masks[0][0] <= 1;
|
||||
is_single_warp <= 1;
|
||||
|
@ -321,14 +321,14 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
.valid_out (schedule_valid)
|
||||
);
|
||||
|
||||
wire [`NUM_WARPS-1:0][(`NUM_THREADS + `XLEN)-1:0] schedule_data;
|
||||
wire [`NUM_WARPS-1:0][(`NUM_THREADS + `PC_BITS)-1:0] schedule_data;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
assign schedule_data[i] = {thread_masks[i], warp_pcs[i]};
|
||||
end
|
||||
|
||||
assign {schedule_tmask, schedule_pc} = {
|
||||
schedule_data[schedule_wid][(`NUM_THREADS + `XLEN)-1:(`NUM_THREADS + `XLEN)-4],
|
||||
schedule_data[schedule_wid][(`NUM_THREADS + `XLEN)-5:0]
|
||||
schedule_data[schedule_wid][(`NUM_THREADS + `PC_BITS)-1:(`NUM_THREADS + `PC_BITS)-4],
|
||||
schedule_data[schedule_wid][(`NUM_THREADS + `PC_BITS)-5:0]
|
||||
};
|
||||
|
||||
`ifndef NDEBUG
|
||||
|
@ -354,7 +354,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`NUM_THREADS + `XLEN + `NW_WIDTH)
|
||||
.DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH)
|
||||
) out_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -30,19 +30,19 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
VX_scoreboard_if.master scoreboard_if [`ISSUE_WIDTH]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `XLEN + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + (`NR_BITS * 4) + 1;
|
||||
|
||||
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + (`NR_BITS * 4) + 1;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`NUM_WARPS-1:0][`NUM_EX_UNITS-1:0] perf_inuse_units_per_cycle;
|
||||
wire [`NUM_EX_UNITS-1:0] perf_units_per_cycle, perf_units_per_cycle_r;
|
||||
|
||||
reg [`NUM_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_inuse_sfu_per_cycle;
|
||||
reg [`NUM_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_inuse_sfu_per_cycle;
|
||||
wire [`NUM_SFU_UNITS-1:0] perf_sfu_per_cycle, perf_sfu_per_cycle_r;
|
||||
|
||||
wire [`NUM_WARPS-1:0] perf_issue_stalls_per_cycle;
|
||||
wire [`CLOG2(`NUM_WARPS+1)-1:0] perf_stalls_per_cycle, perf_stalls_per_cycle_r;
|
||||
wire [`CLOG2(`NUM_WARPS+1)-1:0] perf_stalls_per_cycle, perf_stalls_per_cycle_r;
|
||||
|
||||
`POP_COUNT(perf_stalls_per_cycle, perf_issue_stalls_per_cycle);
|
||||
`POP_COUNT(perf_stalls_per_cycle, perf_issue_stalls_per_cycle);
|
||||
|
||||
VX_reduce #(
|
||||
.DATAW_IN (`NUM_EX_UNITS),
|
||||
|
@ -51,7 +51,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
) perf_units_reduce (
|
||||
.data_in (perf_inuse_units_per_cycle),
|
||||
.data_out (perf_units_per_cycle)
|
||||
);
|
||||
);
|
||||
|
||||
VX_reduce #(
|
||||
.DATAW_IN (`NUM_SFU_UNITS),
|
||||
|
@ -68,7 +68,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_scb_stalls <= '0;
|
||||
perf_scb_stalls <= '0;
|
||||
end else begin
|
||||
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'(perf_stalls_per_cycle_r);
|
||||
end
|
||||
|
@ -83,7 +83,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -106,13 +106,13 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (ibuffer_if[i].valid),
|
||||
.data_in (ibuffer_if[i].data),
|
||||
.ready_in (ibuffer_if[i].ready),
|
||||
.data_in (ibuffer_if[i].data),
|
||||
.ready_in (ibuffer_if[i].ready),
|
||||
.valid_out(staging_if[i].valid),
|
||||
.data_out (staging_if[i].data),
|
||||
.ready_out(staging_if[i].ready)
|
||||
);
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
reg [`NUM_REGS-1:0] inuse_regs;
|
||||
|
@ -126,13 +126,13 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
|
||||
wire staging_fire = staging_if[i].valid && staging_if[i].ready;
|
||||
|
||||
wire writeback_fire = writeback_if[iw].valid
|
||||
wire writeback_fire = writeback_if[iw].valid
|
||||
&& (writeback_if[iw].data.wis == ISSUE_WIS_W'(wis))
|
||||
&& writeback_if[iw].data.eop;
|
||||
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`NUM_REGS-1:0][`EX_WIDTH-1:0] inuse_units;
|
||||
reg [`NUM_REGS-1:0][`SFU_WIDTH-1:0] inuse_sfu;
|
||||
reg [`NUM_REGS-1:0][`EX_WIDTH-1:0] inuse_units;
|
||||
reg [`NUM_REGS-1:0][`SFU_WIDTH-1:0] inuse_sfu;
|
||||
|
||||
reg [`SFU_WIDTH-1:0] sfu_type;
|
||||
always @(*) begin
|
||||
|
@ -249,7 +249,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
inuse_units[staging_if[i].data.rd] <= staging_if[i].data.ex_type;
|
||||
if (staging_if[i].data.ex_type == `EX_SFU) begin
|
||||
inuse_sfu[staging_if[i].data.rd] <= sfu_type;
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
end
|
||||
|
@ -257,16 +257,16 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
assign staging_opds_busy[i] = operands_busy_r;
|
||||
|
||||
`ifdef SIMULATION
|
||||
reg [31:0] timeout_ctr;
|
||||
reg [31:0] timeout_ctr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
timeout_ctr <= '0;
|
||||
end else begin
|
||||
end else begin
|
||||
if (staging_if[i].valid && ~staging_if[i].ready) begin
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
`TRACE(3, ("%d: *** core%0d-scoreboard-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
|
||||
$time, CORE_ID, i, staging_if[i].data.PC, staging_if[i].data.tmask, timeout_ctr,
|
||||
$time, CORE_ID, i, {staging_if[i].data.PC, 1'b0}, staging_if[i].data.tmask, timeout_ctr,
|
||||
operands_busy_r, staging_if[i].data.uuid));
|
||||
`endif
|
||||
timeout_ctr <= timeout_ctr + 1;
|
||||
|
@ -275,18 +275,18 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
`RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT),
|
||||
("%t: *** core%0d-scoreboard-timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
|
||||
$time, CORE_ID, i, staging_if[i].data.PC, staging_if[i].data.tmask, timeout_ctr,
|
||||
$time, CORE_ID, i, {staging_if[i].data.PC, 1'b0}, staging_if[i].data.tmask, timeout_ctr,
|
||||
operands_busy_r, staging_if[i].data.uuid));
|
||||
|
||||
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[iw].data.rd] != 0,
|
||||
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
|
||||
$time, CORE_ID, i, writeback_if[iw].data.PC, writeback_if[iw].data.tmask, writeback_if[iw].data.rd, writeback_if[iw].data.uuid));
|
||||
$time, CORE_ID, i, {writeback_if[iw].data.PC, 1'b0}, writeback_if[iw].data.tmask, writeback_if[iw].data.rd, writeback_if[iw].data.uuid));
|
||||
`endif
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
|
||||
|
@ -301,7 +301,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
assign data_in[j] = staging_if[j * `ISSUE_WIDTH + i].data;
|
||||
assign staging_if[j * `ISSUE_WIDTH + i].ready = ready_in[j] && operands_ready;
|
||||
end
|
||||
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (ISSUE_RATIO),
|
||||
.DATAW (DATAW),
|
||||
|
@ -316,14 +316,11 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
.data_out ({
|
||||
scoreboard_if[i].data.uuid,
|
||||
scoreboard_if[i].data.tmask,
|
||||
scoreboard_if[i].data.ex_type,
|
||||
scoreboard_if[i].data.PC,
|
||||
scoreboard_if[i].data.ex_type,
|
||||
scoreboard_if[i].data.op_type,
|
||||
scoreboard_if[i].data.op_mod,
|
||||
scoreboard_if[i].data.wb,
|
||||
scoreboard_if[i].data.use_PC,
|
||||
scoreboard_if[i].data.use_imm,
|
||||
scoreboard_if[i].data.PC,
|
||||
scoreboard_if[i].data.imm,
|
||||
scoreboard_if[i].data.rd,
|
||||
scoreboard_if[i].data.rs1,
|
||||
scoreboard_if[i].data.rs2,
|
||||
|
|
|
@ -45,7 +45,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
|
||||
localparam RSP_ARB_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + (NUM_LANES * `XLEN) + `NR_BITS + 1 + `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + (NUM_LANES * `XLEN) + `NR_BITS + 1 + `PC_BITS + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_SIZE = 1 + 1;
|
||||
localparam RSP_ARB_IDX_WCTL = 0;
|
||||
localparam RSP_ARB_IDX_CSRS = 1;
|
||||
|
|
|
@ -27,18 +27,18 @@ module VX_split_join import VX_gpu_pkg::*; #(
|
|||
output wire join_is_else,
|
||||
output wire [`NW_WIDTH-1:0] join_wid,
|
||||
output wire [`NUM_THREADS-1:0] join_tmask,
|
||||
output wire [`XLEN-1:0] join_pc,
|
||||
output wire [`PC_BITS-1:0] join_pc,
|
||||
input wire [`NW_WIDTH-1:0] stack_wid,
|
||||
output wire [`DV_STACK_SIZEW-1:0] stack_ptr
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
wire [(`XLEN+`NUM_THREADS)-1:0] ipdom_data [`NUM_WARPS-1:0];
|
||||
wire [(`NUM_THREADS+`PC_BITS)-1:0] ipdom_data [`NUM_WARPS-1:0];
|
||||
wire [`DV_STACK_SIZEW-1:0] ipdom_q_ptr [`NUM_WARPS-1:0];
|
||||
wire ipdom_set [`NUM_WARPS-1:0];
|
||||
|
||||
wire [(`XLEN+`NUM_THREADS)-1:0] ipdom_q0 = {split.then_tmask | split.else_tmask, `XLEN'(0)};
|
||||
wire [(`XLEN+`NUM_THREADS)-1:0] ipdom_q1 = {split.else_tmask, split.next_pc};
|
||||
wire [(`NUM_THREADS+`PC_BITS)-1:0] ipdom_q0 = {split.then_tmask | split.else_tmask, `PC_BITS'(0)};
|
||||
wire [(`NUM_THREADS+`PC_BITS)-1:0] ipdom_q1 = {split.else_tmask, split.next_pc};
|
||||
|
||||
wire sjoin_is_dvg = (sjoin.stack_ptr != ipdom_q_ptr[wid]);
|
||||
|
||||
|
@ -50,7 +50,7 @@ module VX_split_join import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (ipdom_reset, reset);
|
||||
|
||||
VX_ipdom_stack #(
|
||||
.WIDTH (`XLEN+`NUM_THREADS),
|
||||
.WIDTH (`NUM_THREADS+`PC_BITS),
|
||||
.DEPTH (`DV_STACK_SIZE)
|
||||
) ipdom_stack (
|
||||
.clk (clk),
|
||||
|
@ -68,7 +68,7 @@ module VX_split_join import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + `NW_WIDTH + `NUM_THREADS + `XLEN),
|
||||
.DATAW (1 + 1 + 1 + `NW_WIDTH + `NUM_THREADS + `PC_BITS),
|
||||
.DEPTH (1),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
|
|
|
@ -16,344 +16,370 @@
|
|||
|
||||
`ifdef SIMULATION
|
||||
|
||||
task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type);
|
||||
case (ex_type)
|
||||
`EX_ALU: `TRACE(level, ("ALU"));
|
||||
`EX_LSU: `TRACE(level, ("LSU"));
|
||||
`EX_FPU: `TRACE(level, ("FPU"));
|
||||
`EX_SFU: `TRACE(level, ("SFU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type);
|
||||
case (ex_type)
|
||||
`EX_ALU: `TRACE(level, ("ALU"));
|
||||
`EX_LSU: `TRACE(level, ("LSU"));
|
||||
`EX_FPU: `TRACE(level, ("FPU"));
|
||||
`EX_SFU: `TRACE(level, ("SFU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_ex_op(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input [`INST_MOD_BITS-1:0] op_mod,
|
||||
input use_imm,
|
||||
input fdst_d,
|
||||
input fcvt_l,
|
||||
input rd_float
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
if (`INST_ALU_IS_BR(op_mod)) begin
|
||||
case (`INST_BR_BITS'(op_type))
|
||||
`INST_BR_EQ: `TRACE(level, ("BEQ"));
|
||||
`INST_BR_NE: `TRACE(level, ("BNE"));
|
||||
`INST_BR_LT: `TRACE(level, ("BLT"));
|
||||
`INST_BR_GE: `TRACE(level, ("BGE"));
|
||||
`INST_BR_LTU: `TRACE(level, ("BLTU"));
|
||||
`INST_BR_GEU: `TRACE(level, ("BGEU"));
|
||||
`INST_BR_JAL: `TRACE(level, ("JAL"));
|
||||
`INST_BR_JALR: `TRACE(level, ("JALR"));
|
||||
`INST_BR_ECALL: `TRACE(level, ("ECALL"));
|
||||
`INST_BR_EBREAK:`TRACE(level, ("EBREAK"));
|
||||
`INST_BR_URET: `TRACE(level, ("URET"));
|
||||
`INST_BR_SRET: `TRACE(level, ("SRET"));
|
||||
`INST_BR_MRET: `TRACE(level, ("MRET"));
|
||||
default: `TRACE(level, ("?"));
|
||||
task trace_ex_op(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input VX_gpu_pkg::op_mod_t op_mod
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
case (op_mod.alu.xtype)
|
||||
`ALU_TYPE_ARITH: begin
|
||||
if (op_mod.alu.is_w) begin
|
||||
if (op_mod.alu.use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDIW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLIW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLIW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAIW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDW"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUBW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
if (op_mod.alu.use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDI"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLI"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLI"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAI"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLTI"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTIU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XORI"));
|
||||
`INST_ALU_OR: `TRACE(level, ("ORI"));
|
||||
`INST_ALU_AND: `TRACE(level, ("ANDI"));
|
||||
`INST_ALU_LUI: `TRACE(level, ("LUI"));
|
||||
`INST_ALU_AUIPC: `TRACE(level, ("AUIPC"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADD"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUB"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLL"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRL"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRA"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLT"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XOR"));
|
||||
`INST_ALU_OR: `TRACE(level, ("OR"));
|
||||
`INST_ALU_AND: `TRACE(level, ("AND"));
|
||||
`INST_ALU_CZEQ: `TRACE(level, ("CZEQ"));
|
||||
`INST_ALU_CZNE: `TRACE(level, ("CZNE"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
`ALU_TYPE_BRANCH: begin
|
||||
case (`INST_BR_BITS'(op_type))
|
||||
`INST_BR_EQ: `TRACE(level, ("BEQ"));
|
||||
`INST_BR_NE: `TRACE(level, ("BNE"));
|
||||
`INST_BR_LT: `TRACE(level, ("BLT"));
|
||||
`INST_BR_GE: `TRACE(level, ("BGE"));
|
||||
`INST_BR_LTU: `TRACE(level, ("BLTU"));
|
||||
`INST_BR_GEU: `TRACE(level, ("BGEU"));
|
||||
`INST_BR_JAL: `TRACE(level, ("JAL"));
|
||||
`INST_BR_JALR: `TRACE(level, ("JALR"));
|
||||
`INST_BR_ECALL: `TRACE(level, ("ECALL"));
|
||||
`INST_BR_EBREAK:`TRACE(level, ("EBREAK"));
|
||||
`INST_BR_URET: `TRACE(level, ("URET"));
|
||||
`INST_BR_SRET: `TRACE(level, ("SRET"));
|
||||
`INST_BR_MRET: `TRACE(level, ("MRET"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`ALU_TYPE_MULDIV: begin
|
||||
if (op_mod.alu.is_w) begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MULW"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIVW"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVUW"));
|
||||
`INST_M_REM: `TRACE(level, ("REMW"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMUW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MUL"));
|
||||
`INST_M_MULH: `TRACE(level, ("MULH"));
|
||||
`INST_M_MULHSU:`TRACE(level, ("MULHSU"));
|
||||
`INST_M_MULHU: `TRACE(level, ("MULHU"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIV"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVU"));
|
||||
`INST_M_REM: `TRACE(level, ("REM"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end else if (`INST_ALU_IS_M(op_mod)) begin
|
||||
if (`INST_ALU_IS_W(op_mod)) begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MULW"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIVW"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVUW"));
|
||||
`INST_M_REM: `TRACE(level, ("REMW"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMUW"));
|
||||
end
|
||||
`EX_LSU: begin
|
||||
if (op_mod.lsu.is_float) begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LW: `TRACE(level, ("FLW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("FLD"));
|
||||
`INST_LSU_SW: `TRACE(level, ("FSW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("FSD"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MUL"));
|
||||
`INST_M_MULH: `TRACE(level, ("MULH"));
|
||||
`INST_M_MULHSU:`TRACE(level, ("MULHSU"));
|
||||
`INST_M_MULHU: `TRACE(level, ("MULHU"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIV"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVU"));
|
||||
`INST_M_REM: `TRACE(level, ("REM"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LB: `TRACE(level, ("LB"));
|
||||
`INST_LSU_LH: `TRACE(level, ("LH"));
|
||||
`INST_LSU_LW: `TRACE(level, ("LW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("LD"));
|
||||
`INST_LSU_LBU:`TRACE(level, ("LBU"));
|
||||
`INST_LSU_LHU:`TRACE(level, ("LHU"));
|
||||
`INST_LSU_LWU:`TRACE(level, ("LWU"));
|
||||
`INST_LSU_SB: `TRACE(level, ("SB"));
|
||||
`INST_LSU_SH: `TRACE(level, ("SH"));
|
||||
`INST_LSU_SW: `TRACE(level, ("SW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("SD"));
|
||||
`INST_LSU_FENCE:`TRACE(level,("FENCE"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
if (`INST_ALU_IS_W(op_mod)) begin
|
||||
if (use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDIW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLIW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLIW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAIW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDW"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUBW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
if (use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDI"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLI"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLI"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAI"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLTI"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTIU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XORI"));
|
||||
`INST_ALU_OR: `TRACE(level, ("ORI"));
|
||||
`INST_ALU_AND: `TRACE(level, ("ANDI"));
|
||||
`INST_ALU_LUI: `TRACE(level, ("LUI"));
|
||||
`INST_ALU_AUIPC: `TRACE(level, ("AUIPC"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADD"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUB"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLL"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRL"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRA"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLT"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XOR"));
|
||||
`INST_ALU_OR: `TRACE(level, ("OR"));
|
||||
`INST_ALU_AND: `TRACE(level, ("AND"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
`EX_LSU: begin
|
||||
if (rd_float) begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LW: `TRACE(level, ("FLW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("FLD"));
|
||||
`INST_LSU_SW: `TRACE(level, ("FSW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("FSD"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LB: `TRACE(level, ("LB"));
|
||||
`INST_LSU_LH: `TRACE(level, ("LH"));
|
||||
`INST_LSU_LW: `TRACE(level, ("LW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("LD"));
|
||||
`INST_LSU_LBU:`TRACE(level, ("LBU"));
|
||||
`INST_LSU_LHU:`TRACE(level, ("LHU"));
|
||||
`INST_LSU_LWU:`TRACE(level, ("LWU"));
|
||||
`INST_LSU_SB: `TRACE(level, ("SB"));
|
||||
`INST_LSU_SH: `TRACE(level, ("SH"));
|
||||
`INST_LSU_SW: `TRACE(level, ("SW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("SD"));
|
||||
`INST_LSU_FENCE:`TRACE(level,("FENCE"));
|
||||
default: `TRACE(level, ("?"));
|
||||
`EX_FPU: begin
|
||||
case (`INST_FPU_BITS'(op_type))
|
||||
`INST_FPU_ADD: begin
|
||||
if (op_mod.fpu.fmt[0])
|
||||
`TRACE(level, ("FADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FADD.S"));
|
||||
end
|
||||
`INST_FPU_SUB: begin
|
||||
if (op_mod.fpu.fmt[0])
|
||||
`TRACE(level, ("FSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FSUB.S"));
|
||||
end
|
||||
`INST_FPU_MUL: begin
|
||||
if (op_mod.fpu.fmt[0])
|
||||
`TRACE(level, ("FMUL.D"));
|
||||
else
|
||||
`TRACE(level, ("FMUL.S"));
|
||||
end
|
||||
`INST_FPU_DIV: begin
|
||||
if (op_mod.fpu.fmt[0])
|
||||
`TRACE(level, ("FDIV.D"));
|
||||
else
|
||||
`TRACE(level, ("FDIV.S"));
|
||||
end
|
||||
`INST_FPU_SQRT: begin
|
||||
if (op_mod.fpu.fmt[0])
|
||||
`TRACE(level, ("FSQRT.D"));
|
||||
else
|
||||
`TRACE(level, ("FSQRT.S"));
|
||||
end
|
||||
`INST_FPU_MADD: begin
|
||||
if (op_mod.fpu.fmt[0])
|
||||
`TRACE(level, ("FMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FMADD.S"));
|
||||
end
|
||||
`INST_FPU_MSUB: begin
|
||||
if (op_mod.fpu.fmt[0])
|
||||
`TRACE(level, ("FMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FMSUB.S"));
|
||||
end
|
||||
`INST_FPU_NMADD: begin
|
||||
if (op_mod.fpu.fmt[0])
|
||||
`TRACE(level, ("FNMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMADD.S"));
|
||||
end
|
||||
`INST_FPU_NMSUB: begin
|
||||
if (op_mod.fpu.fmt[0])
|
||||
`TRACE(level, ("FNMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMSUB.S"));
|
||||
end
|
||||
`INST_FPU_CMP: begin
|
||||
if (op_mod.fpu.fmt[0]) begin
|
||||
case (op_mod.fpu.frm[1:0])
|
||||
0: `TRACE(level, ("FLE.D"));
|
||||
1: `TRACE(level, ("FLT.D"));
|
||||
2: `TRACE(level, ("FEQ.D"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_mod.fpu.frm[1:0])
|
||||
0: `TRACE(level, ("FLE.S"));
|
||||
1: `TRACE(level, ("FLT.S"));
|
||||
2: `TRACE(level, ("FEQ.S"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2F: begin
|
||||
if (op_mod.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FCVT.D.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.D"));
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2I: begin
|
||||
if (op_mod.fpu.fmt[0]) begin
|
||||
if (op_mod.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.L.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.D"));
|
||||
end
|
||||
end else begin
|
||||
if (op_mod.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.L.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2U: begin
|
||||
if (op_mod.fpu.fmt[0]) begin
|
||||
if (op_mod.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.LU.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.D"));
|
||||
end
|
||||
end else begin
|
||||
if (op_mod.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.LU.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_I2F: begin
|
||||
if (op_mod.fpu.fmt[0]) begin
|
||||
if (op_mod.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.D.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.W"));
|
||||
end
|
||||
end else begin
|
||||
if (op_mod.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.S.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.W"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_U2F: begin
|
||||
if (op_mod.fpu.fmt[0]) begin
|
||||
if (op_mod.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.D.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.WU"));
|
||||
end
|
||||
end else begin
|
||||
if (op_mod.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.S.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.WU"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_MISC: begin
|
||||
if (op_mod.fpu.fmt[0]) begin
|
||||
case (op_mod)
|
||||
0: `TRACE(level, ("FSGNJ.D"));
|
||||
1: `TRACE(level, ("FSGNJN.D"));
|
||||
2: `TRACE(level, ("FSGNJX.D"));
|
||||
3: `TRACE(level, ("FCLASS.D"));
|
||||
4: `TRACE(level, ("FMV.X.D"));
|
||||
5: `TRACE(level, ("FMV.D.X"));
|
||||
6: `TRACE(level, ("FMIN.D"));
|
||||
7: `TRACE(level, ("FMAX.D"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_mod)
|
||||
0: `TRACE(level, ("FSGNJ.S"));
|
||||
1: `TRACE(level, ("FSGNJN.S"));
|
||||
2: `TRACE(level, ("FSGNJX.S"));
|
||||
3: `TRACE(level, ("FCLASS.S"));
|
||||
4: `TRACE(level, ("FMV.X.S"));
|
||||
5: `TRACE(level, ("FMV.S.X"));
|
||||
6: `TRACE(level, ("FMIN.S"));
|
||||
7: `TRACE(level, ("FMAX.S"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`EX_FPU: begin
|
||||
case (`INST_FPU_BITS'(op_type))
|
||||
`INST_FPU_ADD: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FADD.S"));
|
||||
end
|
||||
`INST_FPU_SUB: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FSUB.S"));
|
||||
end
|
||||
`INST_FPU_MUL: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FMUL.D"));
|
||||
else
|
||||
`TRACE(level, ("FMUL.S"));
|
||||
end
|
||||
`INST_FPU_DIV: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FDIV.D"));
|
||||
else
|
||||
`TRACE(level, ("FDIV.S"));
|
||||
end
|
||||
`INST_FPU_SQRT: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FSQRT.D"));
|
||||
else
|
||||
`TRACE(level, ("FSQRT.S"));
|
||||
end
|
||||
`INST_FPU_MADD: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FMADD.S"));
|
||||
end
|
||||
`INST_FPU_MSUB: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FMSUB.S"));
|
||||
end
|
||||
`INST_FPU_NMADD: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FNMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMADD.S"));
|
||||
end
|
||||
`INST_FPU_NMSUB: begin
|
||||
if (fdst_d)
|
||||
`TRACE(level, ("FNMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMSUB.S"));
|
||||
end
|
||||
`INST_FPU_CMP: begin
|
||||
if (fdst_d) begin
|
||||
case (op_mod[1:0])
|
||||
0: `TRACE(level, ("FLE.D"));
|
||||
1: `TRACE(level, ("FLT.D"));
|
||||
2: `TRACE(level, ("FEQ.D"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_mod[1:0])
|
||||
0: `TRACE(level, ("FLE.S"));
|
||||
1: `TRACE(level, ("FLT.S"));
|
||||
2: `TRACE(level, ("FEQ.S"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2F: begin
|
||||
if (fdst_d) begin
|
||||
`TRACE(level, ("FCVT.D.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.D"));
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2I: begin
|
||||
if (fdst_d) begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.L.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.D"));
|
||||
end
|
||||
end else begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.L.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2U: begin
|
||||
if (fdst_d) begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.LU.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.D"));
|
||||
end
|
||||
end else begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.LU.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_I2F: begin
|
||||
if (fdst_d) begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.D.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.W"));
|
||||
end
|
||||
end else begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.S.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.W"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_U2F: begin
|
||||
if (fdst_d) begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.D.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.WU"));
|
||||
end
|
||||
end else begin
|
||||
if (fcvt_l) begin
|
||||
`TRACE(level, ("FCVT.S.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.WU"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_MISC: begin
|
||||
if (fdst_d) begin
|
||||
case (op_mod)
|
||||
0: `TRACE(level, ("FSGNJ.D"));
|
||||
1: `TRACE(level, ("FSGNJN.D"));
|
||||
2: `TRACE(level, ("FSGNJX.D"));
|
||||
3: `TRACE(level, ("FCLASS.D"));
|
||||
4: `TRACE(level, ("FMV.X.D"));
|
||||
5: `TRACE(level, ("FMV.D.X"));
|
||||
6: `TRACE(level, ("FMIN.D"));
|
||||
7: `TRACE(level, ("FMAX.D"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_mod)
|
||||
0: `TRACE(level, ("FSGNJ.S"));
|
||||
1: `TRACE(level, ("FSGNJN.S"));
|
||||
2: `TRACE(level, ("FSGNJX.S"));
|
||||
3: `TRACE(level, ("FCLASS.S"));
|
||||
4: `TRACE(level, ("FMV.X.S"));
|
||||
5: `TRACE(level, ("FMV.S.X"));
|
||||
6: `TRACE(level, ("FMIN.S"));
|
||||
7: `TRACE(level, ("FMAX.S"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`EX_SFU: begin
|
||||
`EX_SFU: begin
|
||||
case (`INST_SFU_BITS'(op_type))
|
||||
`INST_SFU_TMC: `TRACE(level, ("TMC"));
|
||||
`INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN"));
|
||||
`INST_SFU_SPLIT: begin if (op_mod[0]) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end
|
||||
`INST_SFU_JOIN: `TRACE(level, ("JOIN"));
|
||||
`INST_SFU_BAR: `TRACE(level, ("BAR"));
|
||||
`INST_SFU_PRED: begin if (op_mod[0]) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end
|
||||
`INST_SFU_CSRRW: begin if (use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end
|
||||
`INST_SFU_CSRRS: begin if (use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end
|
||||
`INST_SFU_CSRRC: begin if (use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end
|
||||
default: `TRACE(level, ("?"));
|
||||
`INST_SFU_TMC: `TRACE(level, ("TMC"));
|
||||
`INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN"));
|
||||
`INST_SFU_SPLIT: begin if (op_mod.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end
|
||||
`INST_SFU_JOIN: `TRACE(level, ("JOIN"));
|
||||
`INST_SFU_BAR: `TRACE(level, ("BAR"));
|
||||
`INST_SFU_PRED: begin if (op_mod.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end
|
||||
`INST_SFU_CSRRW: begin if (op_mod.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end
|
||||
`INST_SFU_CSRRS: begin if (op_mod.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end
|
||||
`INST_SFU_CSRRC: begin if (op_mod.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
endtask
|
||||
|
||||
task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr);
|
||||
case (addr)
|
||||
`VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0"));
|
||||
`VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1"));
|
||||
`VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0"));
|
||||
`VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1"));
|
||||
`VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
task trace_op_mod(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input VX_gpu_pkg::op_mod_t op_mod
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
`TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_mod.alu.use_PC, op_mod.alu.use_imm, op_mod.alu.imm));
|
||||
end
|
||||
`EX_LSU: begin
|
||||
`TRACE(level, (", offset=0x%0h", op_mod.lsu.offset));
|
||||
end
|
||||
`EX_FPU: begin
|
||||
`TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_mod.fpu.fmt, op_mod.fpu.frm));
|
||||
end
|
||||
`EX_SFU: begin
|
||||
if (`INST_SFU_IS_CSR(op_type)) begin
|
||||
`TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_mod.csr.addr, op_mod.csr.use_imm, op_mod.csr.imm));
|
||||
end
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr);
|
||||
case (addr)
|
||||
`VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0"));
|
||||
`VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1"));
|
||||
`VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0"));
|
||||
`VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1"));
|
||||
`VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
`endif
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam WCTL_WIDTH = $bits(tmc_t) + $bits(wspawn_t) + $bits(split_t) + $bits(join_t) + $bits(barrier_t);
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + WCTL_WIDTH + PID_WIDTH + 1 + 1 + `DV_STACK_SIZEW;
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + WCTL_WIDTH + PID_WIDTH + 1 + 1 + `DV_STACK_SIZEW;
|
||||
|
||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||
|
||||
|
@ -60,7 +60,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
wire [`XLEN-1:0] rs2_data = execute_if.data.rs2_data[tid];
|
||||
`UNUSED_VAR (rs1_data)
|
||||
|
||||
wire not_pred = execute_if.data.op_mod[0];
|
||||
wire not_pred = execute_if.data.op_mod.wctl.is_neg;
|
||||
|
||||
wire [NUM_LANES-1:0] taken;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
|
@ -107,7 +107,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
assign split.is_dvg = has_then && has_else;
|
||||
assign split.then_tmask = taken_tmask;
|
||||
assign split.else_tmask = ntaken_tmask;
|
||||
assign split.next_pc = execute_if.data.PC + 4;
|
||||
assign split.next_pc = execute_if.data.PC + 2;
|
||||
|
||||
assign warp_ctl_if.dvstack_wid = execute_if.data.wid;
|
||||
wire [`DV_STACK_SIZEW-1:0] dvstack_ptr;
|
||||
|
@ -136,7 +136,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
assign wspawn.valid = is_wspawn;
|
||||
assign wspawn.wmask = wspawn_wmask;
|
||||
assign wspawn.pc = rs2_data;
|
||||
assign wspawn.pc = rs2_data[1 +: `PC_BITS];
|
||||
|
||||
// response
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -15,20 +15,20 @@
|
|||
|
||||
interface VX_branch_ctl_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`NW_WIDTH-1:0] wid;
|
||||
wire valid;
|
||||
wire [`NW_WIDTH-1:0] wid;
|
||||
wire taken;
|
||||
wire [`XLEN-1:0] dest;
|
||||
wire [`PC_BITS-1:0] dest;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output valid,
|
||||
output wid,
|
||||
output taken,
|
||||
output dest
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input valid,
|
||||
input wid,
|
||||
input taken,
|
||||
input dest
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,12 +17,12 @@ interface VX_commit_if #(
|
|||
parameter NUM_LANES = `NUM_THREADS,
|
||||
parameter PID_WIDTH = `LOG2UP(`NUM_THREADS / NUM_LANES)
|
||||
) ();
|
||||
|
||||
|
||||
typedef struct packed {
|
||||
logic [`UUID_WIDTH-1:0] uuid;
|
||||
logic [`NW_WIDTH-1:0] wid;
|
||||
logic [NUM_LANES-1:0] tmask;
|
||||
logic [`XLEN-1:0] PC;
|
||||
logic [`PC_BITS-1:0] PC;
|
||||
logic wb;
|
||||
logic [`NR_BITS-1:0] rd;
|
||||
logic [NUM_LANES-1:0][`XLEN-1:0] data;
|
||||
|
@ -33,7 +33,7 @@ interface VX_commit_if #(
|
|||
|
||||
logic valid;
|
||||
data_t data;
|
||||
logic ready;
|
||||
logic ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -13,20 +13,17 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_decode_if ();
|
||||
interface VX_decode_if import VX_gpu_pkg::*; ();
|
||||
|
||||
typedef struct packed {
|
||||
logic [`UUID_WIDTH-1:0] uuid;
|
||||
logic [`NW_WIDTH-1:0] wid;
|
||||
logic [`NUM_THREADS-1:0] tmask;
|
||||
logic [`EX_BITS-1:0] ex_type;
|
||||
logic [`PC_BITS-1:0] PC;
|
||||
logic [`EX_BITS-1:0] ex_type;
|
||||
logic [`INST_OP_BITS-1:0] op_type;
|
||||
logic [`INST_MOD_BITS-1:0] op_mod;
|
||||
op_mod_t op_mod;
|
||||
logic wb;
|
||||
logic use_PC;
|
||||
logic use_imm;
|
||||
logic [`XLEN-1:0] PC;
|
||||
logic [`XLEN-1:0] imm;
|
||||
logic [`NR_BITS-1:0] rd;
|
||||
logic [`NR_BITS-1:0] rs1;
|
||||
logic [`NR_BITS-1:0] rs2;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -19,13 +19,10 @@ interface VX_dispatch_if import VX_gpu_pkg::*; ();
|
|||
logic [`UUID_WIDTH-1:0] uuid;
|
||||
logic [ISSUE_WIS_W-1:0] wis;
|
||||
logic [`NUM_THREADS-1:0] tmask;
|
||||
logic [`PC_BITS-1:0] PC;
|
||||
logic [`INST_ALU_BITS-1:0] op_type;
|
||||
logic [`INST_MOD_BITS-1:0] op_mod;
|
||||
op_mod_t op_mod;
|
||||
logic wb;
|
||||
logic use_PC;
|
||||
logic use_imm;
|
||||
logic [`XLEN-1:0] PC;
|
||||
logic [`XLEN-1:0] imm;
|
||||
logic [`NR_BITS-1:0] rd;
|
||||
logic [`NT_WIDTH-1:0] tid;
|
||||
logic [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -13,21 +13,18 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_execute_if #(
|
||||
interface VX_execute_if import VX_gpu_pkg::*; #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter PID_WIDTH = `LOG2UP(`NUM_THREADS / NUM_LANES)
|
||||
) ();
|
||||
);
|
||||
typedef struct packed {
|
||||
logic [`UUID_WIDTH-1:0] uuid;
|
||||
logic [`UUID_WIDTH-1:0] uuid;
|
||||
logic [`NW_WIDTH-1:0] wid;
|
||||
logic [NUM_LANES-1:0] tmask;
|
||||
logic [NUM_LANES-1:0] tmask;
|
||||
logic [`PC_BITS-1:0] PC;
|
||||
logic [`INST_ALU_BITS-1:0] op_type;
|
||||
logic [`INST_MOD_BITS-1:0] op_mod;
|
||||
op_mod_t op_mod;
|
||||
logic wb;
|
||||
logic use_PC;
|
||||
logic use_imm;
|
||||
logic [`XLEN-1:0] PC;
|
||||
logic [`XLEN-1:0] imm;
|
||||
logic [`NR_BITS-1:0] rd;
|
||||
logic [`NT_WIDTH-1:0] tid;
|
||||
logic [NUM_LANES-1:0][`XLEN-1:0] rs1_data;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -19,7 +19,7 @@ interface VX_fetch_if ();
|
|||
logic [`UUID_WIDTH-1:0] uuid;
|
||||
logic [`NW_WIDTH-1:0] wid;
|
||||
logic [`NUM_THREADS-1:0] tmask;
|
||||
logic [`XLEN-1:0] PC;
|
||||
logic [`PC_BITS-1:0] PC;
|
||||
logic [31:0] instr;
|
||||
} data_t;
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -18,14 +18,11 @@ interface VX_ibuffer_if import VX_gpu_pkg::*; ();
|
|||
typedef struct packed {
|
||||
logic [`UUID_WIDTH-1:0] uuid;
|
||||
logic [`NUM_THREADS-1:0] tmask;
|
||||
logic [`EX_BITS-1:0] ex_type;
|
||||
logic [`PC_BITS-1:0] PC;
|
||||
logic [`EX_BITS-1:0] ex_type;
|
||||
logic [`INST_OP_BITS-1:0] op_type;
|
||||
logic [`INST_MOD_BITS-1:0] op_mod;
|
||||
op_mod_t op_mod;
|
||||
logic wb;
|
||||
logic use_PC;
|
||||
logic use_imm;
|
||||
logic [`XLEN-1:0] PC;
|
||||
logic [`XLEN-1:0] imm;
|
||||
logic [`NR_BITS-1:0] rd;
|
||||
logic [`NR_BITS-1:0] rs1;
|
||||
logic [`NR_BITS-1:0] rs2;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -19,14 +19,11 @@ interface VX_operands_if import VX_gpu_pkg::*; ();
|
|||
logic [`UUID_WIDTH-1:0] uuid;
|
||||
logic [ISSUE_WIS_W-1:0] wis;
|
||||
logic [`NUM_THREADS-1:0] tmask;
|
||||
logic [`XLEN-1:0] PC;
|
||||
logic [`PC_BITS-1:0] PC;
|
||||
logic [`EX_BITS-1:0] ex_type;
|
||||
logic [`INST_OP_BITS-1:0] op_type;
|
||||
logic [`INST_MOD_BITS-1:0] op_mod;
|
||||
op_mod_t op_mod;
|
||||
logic wb;
|
||||
logic use_PC;
|
||||
logic use_imm;
|
||||
logic [`XLEN-1:0] imm;
|
||||
logic [`NR_BITS-1:0] rd;
|
||||
logic [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data;
|
||||
logic [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data;
|
||||
|
@ -36,7 +33,7 @@ interface VX_operands_if import VX_gpu_pkg::*; ();
|
|||
logic valid;
|
||||
data_t data;
|
||||
logic ready;
|
||||
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output data,
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -18,8 +18,8 @@ interface VX_schedule_if ();
|
|||
typedef struct packed {
|
||||
logic [`UUID_WIDTH-1:0] uuid;
|
||||
logic [`NW_WIDTH-1:0] wid;
|
||||
logic [`NUM_THREADS-1:0] tmask;
|
||||
logic [`XLEN-1:0] PC;
|
||||
logic [`NUM_THREADS-1:0] tmask;
|
||||
logic [`PC_BITS-1:0] PC;
|
||||
} data_t;
|
||||
|
||||
logic valid;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -19,14 +19,11 @@ interface VX_scoreboard_if import VX_gpu_pkg::*; ();
|
|||
logic [`UUID_WIDTH-1:0] uuid;
|
||||
logic [ISSUE_WIS_W-1:0] wis;
|
||||
logic [`NUM_THREADS-1:0] tmask;
|
||||
logic [`EX_BITS-1:0] ex_type;
|
||||
logic [`PC_BITS-1:0] PC;
|
||||
logic [`EX_BITS-1:0] ex_type;
|
||||
logic [`INST_OP_BITS-1:0] op_type;
|
||||
logic [`INST_MOD_BITS-1:0] op_mod;
|
||||
op_mod_t op_mod;
|
||||
logic wb;
|
||||
logic use_PC;
|
||||
logic use_imm;
|
||||
logic [`XLEN-1:0] PC;
|
||||
logic [`XLEN-1:0] imm;
|
||||
logic [`NR_BITS-1:0] rd;
|
||||
logic [`NR_BITS-1:0] rs1;
|
||||
logic [`NR_BITS-1:0] rs2;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -19,7 +19,7 @@ interface VX_writeback_if import VX_gpu_pkg::*; ();
|
|||
logic [`UUID_WIDTH-1:0] uuid;
|
||||
logic [ISSUE_WIS_W-1:0] wis;
|
||||
logic [`NUM_THREADS-1:0] tmask;
|
||||
logic [`XLEN-1:0] PC;
|
||||
logic [`PC_BITS-1:0] PC;
|
||||
logic [`NR_BITS-1:0] rd;
|
||||
logic [`NUM_THREADS-1:0][`XLEN-1:0] data;
|
||||
logic sop;
|
||||
|
|
|
@ -93,6 +93,16 @@ static const char* op_string(const Instr &instr) {
|
|||
case Opcode::LUI: return "LUI";
|
||||
case Opcode::AUIPC: return "AUIPC";
|
||||
case Opcode::R:
|
||||
if (func7 == 0x7) {
|
||||
if (func3 == 0x5) {
|
||||
return "CZERO.EQZ";
|
||||
} else
|
||||
if (func3 == 0x7) {
|
||||
return "CZERO.NEZ";
|
||||
} else {
|
||||
std::abort();
|
||||
}
|
||||
} else
|
||||
if (func7 & 0x1) {
|
||||
switch (func3) {
|
||||
case 0: return "MUL";
|
||||
|
@ -395,18 +405,6 @@ static const char* op_string(const Instr &instr) {
|
|||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::EXT2:
|
||||
switch (func3) {
|
||||
case 1: {
|
||||
switch (func2) {
|
||||
case 0: return "CMOV";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
@ -639,29 +637,10 @@ std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
|
|||
} break;
|
||||
|
||||
case InstType::R4:
|
||||
if (op == Opcode::EXT2) {
|
||||
switch (func3) {
|
||||
case 1:
|
||||
switch (func2) {
|
||||
case 0: // CMOV
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs2, RegType::Integer);
|
||||
instr->addSrcReg(rs3, RegType::Integer);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
} else {
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
instr->addSrcReg(rs2, RegType::Float);
|
||||
instr->addSrcReg(rs3, RegType::Float);
|
||||
}
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
instr->addSrcReg(rs2, RegType::Float);
|
||||
instr->addSrcReg(rs3, RegType::Float);
|
||||
instr->setFunc2(func2);
|
||||
instr->setFunc3(func3);
|
||||
break;
|
||||
|
|
|
@ -164,6 +164,22 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
for (uint32_t t = thread_start; t < num_threads; ++t) {
|
||||
if (!warp.tmask.test(t))
|
||||
continue;
|
||||
if (func7 == 0x7) {
|
||||
auto value = rsdata[t][0].i;
|
||||
auto cond = rsdata[t][1].i;
|
||||
if (func3 == 0x5) {
|
||||
// CZERO.EQZ
|
||||
rddata[t].i = (cond == 0) ? 0 : value;
|
||||
trace->alu_type = AluType::ARITH;
|
||||
} else
|
||||
if (func3 == 0x7) {
|
||||
// CZERO.NEZ
|
||||
rddata[t].i = (cond != 0) ? 0 : value;
|
||||
trace->alu_type = AluType::ARITH;
|
||||
} else {
|
||||
std::abort();
|
||||
}
|
||||
} else
|
||||
if (func7 & 0x1) {
|
||||
switch (func3) {
|
||||
case 0: {
|
||||
|
@ -1399,31 +1415,6 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
std::abort();
|
||||
}
|
||||
} break;
|
||||
case Opcode::EXT2: {
|
||||
switch (func3) {
|
||||
case 1:
|
||||
switch (func2) {
|
||||
case 0: { // CMOV
|
||||
trace->fu_type = FUType::SFU;
|
||||
trace->sfu_type = SfuType::CMOV;
|
||||
trace->used_iregs.set(rsrc0);
|
||||
trace->used_iregs.set(rsrc1);
|
||||
trace->used_iregs.set(rsrc2);
|
||||
for (uint32_t t = thread_start; t < num_threads; ++t) {
|
||||
if (!warp.tmask.test(t))
|
||||
continue;
|
||||
rddata[t].i = rsdata[t][0].i ? rsdata[t][1].i : rsdata[t][2].i;
|
||||
}
|
||||
rd_write = true;
|
||||
} break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
|
|
@ -279,9 +279,6 @@ void SfuUnit::tick() {
|
|||
release_warp = core_->barrier(trace_data->arg1, trace_data->arg2, trace->wid);
|
||||
}
|
||||
} break;
|
||||
case SfuType::CMOV:
|
||||
output.push(trace, 3);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
|
|
@ -203,8 +203,7 @@ enum class SfuType {
|
|||
PRED,
|
||||
CSRRW,
|
||||
CSRRS,
|
||||
CSRRC,
|
||||
CMOV
|
||||
CSRRC
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const SfuType& type) {
|
||||
|
@ -218,7 +217,6 @@ inline std::ostream &operator<<(std::ostream &os, const SfuType& type) {
|
|||
case SfuType::CSRRW: os << "CSRRW"; break;
|
||||
case SfuType::CSRRS: os << "CSRRS"; break;
|
||||
case SfuType::CSRRC: os << "CSRRC"; break;
|
||||
case SfuType::CMOV: os << "CMOV"; break;
|
||||
default: assert(false);
|
||||
}
|
||||
return os;
|
||||
|
|
|
@ -9,10 +9,6 @@ endif
|
|||
LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT)
|
||||
LLVM_CFLAGS += --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH)
|
||||
LLVM_CFLAGS += -Xclang -target-feature -Xclang +vortex -mllvm -vortex-branch-divergence=0
|
||||
#LLVM_CFLAGS += -I$(RISCV_SYSROOT)/include/c++/9.2.0/$(RISCV_PREFIX)
|
||||
#LLVM_CFLAGS += -I$(RISCV_SYSROOT)/include/c++/9.2.0
|
||||
#LLVM_CFLAGS += -Wl,-L$(RISCV_TOOLCHAIN_PATH)/lib/gcc/$(RISCV_PREFIX)/9.2.0
|
||||
#LLVM_CFLAGS += --rtlib=libgcc
|
||||
|
||||
#CC = $(LLVM_VORTEX)/bin/clang $(LLVM_CFLAGS)
|
||||
#CXX = $(LLVM_VORTEX)/bin/clang++ $(LLVM_CFLAGS)
|
||||
|
@ -55,4 +51,4 @@ run-simx: $(PROJECT).bin
|
|||
$(CC) $(CFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf *.elf *.bin *.dump *.log .depend
|
||||
rm -rf *.elf *.bin *.dump *.log .depend
|
||||
|
|
|
@ -29,6 +29,7 @@ VX_CFLAGS += -O3 -mcmodel=medany --sysroot=$(RISCV_SYSROOT) --gcc-toolchain=$(R
|
|||
VX_CFLAGS += -fno-rtti -fno-exceptions -nostartfiles -nostdlib -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -I$(ROOT_DIR)/hw -I$(VORTEX_KN_PATH)/include -DXLEN_$(XLEN) -DNDEBUG
|
||||
VX_CFLAGS += -Xclang -target-feature -Xclang +vortex
|
||||
VX_CFLAGS += -Xclang -target-feature -Xclang +zicond
|
||||
VX_CFLAGS += -mllvm -disable-loop-idiom-all # disable memset/memcpy loop idiom
|
||||
#VX_CFLAGS += -mllvm -vortex-branch-divergence=0
|
||||
#VX_CFLAGS += -mllvm -print-after-all
|
||||
|
@ -46,6 +47,7 @@ CXXFLAGS += -I$(POCL_RT_PATH)/include
|
|||
# Debugigng
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0
|
||||
POCL_CC_FLAGS += POCL_DEBUG=all
|
||||
POCL_RT_FLAGS += POCL_DEBUG=all
|
||||
else
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
|
@ -73,7 +75,7 @@ kernel.cl: $(SRC_DIR)/kernel.cl
|
|||
cp $< $@
|
||||
|
||||
kernel.pocl: $(SRC_DIR)/kernel.cl
|
||||
LD_LIBRARY_PATH=$(LLVM_POCL)/lib:$(POCL_CC_PATH)/lib:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) LLVM_PREFIX=$(LLVM_VORTEX) POCL_KERNEL_CACHE=0 POCL_VORTEX_BINTOOL="OBJCOPY=$(LLVM_VORTEX)/bin/llvm-objcopy $(VORTEX_HOME)/kernel/scripts/vxbin.py" POCL_VORTEX_LLCFLAGS="$(VX_LLCFLAGS)" POCL_VORTEX_CFLAGS="$(VX_CFLAGS)" POCL_VORTEX_LDFLAGS="$(VX_LDFLAGS)" $(POCL_CC_PATH)/bin/poclcc -o $@ $<
|
||||
LD_LIBRARY_PATH=$(LLVM_POCL)/lib:$(POCL_CC_PATH)/lib:$(LD_LIBRARY_PATH) LLVM_PREFIX=$(LLVM_VORTEX) $(POCL_CC_FLAGS) POCL_VORTEX_BINTOOL="OBJCOPY=$(LLVM_VORTEX)/bin/llvm-objcopy $(VORTEX_HOME)/kernel/scripts/vxbin.py" POCL_VORTEX_LLCFLAGS="$(VX_LLCFLAGS)" POCL_VORTEX_CFLAGS="$(VX_CFLAGS)" POCL_VORTEX_LDFLAGS="$(VX_LDFLAGS)" $(POCL_CC_PATH)/bin/poclcc -o $@ $<
|
||||
|
||||
%.cc.o: $(SRC_DIR)/%.cc
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
|
|
@ -16,6 +16,7 @@ endif
|
|||
LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT)
|
||||
LLVM_CFLAGS += --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH)
|
||||
LLVM_CFLAGS += -Xclang -target-feature -Xclang +vortex
|
||||
LLVM_CFLAGS += -Xclang -target-feature -Xclang +zicond
|
||||
LLVM_CFLAGS += -mllvm -disable-loop-idiom-all # disable memset/memcpy loop idiom
|
||||
#LLVM_CFLAGS += -mllvm -vortex-branch-divergence=0
|
||||
#LLVM_CFLAGS += -mllvm -print-after-all
|
||||
|
@ -107,7 +108,7 @@ clean:
|
|||
rm -rf $(PROJECT) *.o *.log .depend
|
||||
|
||||
clean-all: clean
|
||||
rm -rf *.elf *.bin *.dump
|
||||
rm -rf *.elf *.vxbin *.dump
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
|
|
|
@ -307,8 +307,8 @@ void kernel_utof(int task_id, kernel_arg_t* __UNIFORM__ arg) {
|
|||
}
|
||||
}
|
||||
|
||||
float fclamp(float a, float b, float c) {
|
||||
return fmin(fmax(a, b), c);
|
||||
inline float fclamp(float a, float b, float c) {
|
||||
return fmin(fmax(a, b), c);
|
||||
}
|
||||
|
||||
void kernel_fclamp(int task_id, kernel_arg_t* __UNIFORM__ arg) {
|
||||
|
@ -325,6 +325,24 @@ void kernel_fclamp(int task_id, kernel_arg_t* __UNIFORM__ arg) {
|
|||
}
|
||||
}
|
||||
|
||||
inline int iclamp(int a, int b, int c) {
|
||||
return std::min(std::max(a, b), c);
|
||||
}
|
||||
|
||||
void kernel_iclamp(int task_id, kernel_arg_t* __UNIFORM__ arg) {
|
||||
auto count = arg->task_size;
|
||||
auto src0_ptr = (int*)arg->src0_addr;
|
||||
auto src1_ptr = (int*)arg->src1_addr;
|
||||
auto dst_ptr = (int*)arg->dst_addr;
|
||||
auto offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
auto a = src0_ptr[offset+i];
|
||||
auto b = src1_ptr[offset+i];
|
||||
dst_ptr[offset+i] = iclamp(1, a, b);
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_trigo(int task_id, kernel_arg_t* __UNIFORM__ arg) {
|
||||
auto count = arg->task_size;
|
||||
auto src0_ptr = (float*)arg->src0_addr;
|
||||
|
@ -403,31 +421,32 @@ void kernel_gbar(int task_id, kernel_arg_t* __UNIFORM__ arg) {
|
|||
dst_ptr[task_id] += 1;
|
||||
}
|
||||
|
||||
static PFN_Kernel sc_tests[23];
|
||||
static PFN_Kernel sc_tests[24];
|
||||
void register_tests() {
|
||||
sc_tests[0] = kernel_iadd;
|
||||
sc_tests[1] = kernel_imul;
|
||||
sc_tests[2] = kernel_idiv;
|
||||
sc_tests[3] = kernel_idiv_mul;
|
||||
sc_tests[4] = kernel_fadd;
|
||||
sc_tests[5] = kernel_fsub;
|
||||
sc_tests[6] = kernel_fmul;
|
||||
sc_tests[7] = kernel_fmadd;
|
||||
sc_tests[8] = kernel_fmsub;
|
||||
sc_tests[9] = kernel_fnmadd;
|
||||
sc_tests[10] = kernel_fnmsub;
|
||||
sc_tests[11] = kernel_fnmadd_madd;
|
||||
sc_tests[12] = kernel_fdiv;
|
||||
sc_tests[13] = kernel_fdiv2;
|
||||
sc_tests[14] = kernel_fsqrt;
|
||||
sc_tests[15] = kernel_ftoi;
|
||||
sc_tests[16] = kernel_ftou;
|
||||
sc_tests[17] = kernel_itof;
|
||||
sc_tests[18] = kernel_utof;
|
||||
sc_tests[19] = kernel_fclamp;
|
||||
sc_tests[20] = kernel_trigo;
|
||||
sc_tests[21] = kernel_bar;
|
||||
sc_tests[22] = kernel_gbar;
|
||||
sc_tests[0] = kernel_iadd;
|
||||
sc_tests[1] = kernel_imul;
|
||||
sc_tests[2] = kernel_idiv;
|
||||
sc_tests[3] = kernel_idiv_mul;
|
||||
sc_tests[4] = kernel_fadd;
|
||||
sc_tests[5] = kernel_fsub;
|
||||
sc_tests[6] = kernel_fmul;
|
||||
sc_tests[7] = kernel_fmadd;
|
||||
sc_tests[8] = kernel_fmsub;
|
||||
sc_tests[9] = kernel_fnmadd;
|
||||
sc_tests[10] = kernel_fnmsub;
|
||||
sc_tests[11] = kernel_fnmadd_madd;
|
||||
sc_tests[12] = kernel_fdiv;
|
||||
sc_tests[13] = kernel_fdiv2;
|
||||
sc_tests[14] = kernel_fsqrt;
|
||||
sc_tests[15] = kernel_ftoi;
|
||||
sc_tests[16] = kernel_ftou;
|
||||
sc_tests[17] = kernel_itof;
|
||||
sc_tests[18] = kernel_utof;
|
||||
sc_tests[19] = kernel_fclamp;
|
||||
sc_tests[20] = kernel_iclamp;
|
||||
sc_tests[21] = kernel_trigo;
|
||||
sc_tests[22] = kernel_bar;
|
||||
sc_tests[23] = kernel_gbar;
|
||||
}
|
||||
|
||||
int main() {
|
||||
|
|
|
@ -732,6 +732,36 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
class Test_ICLAMP : public ITestCase {
|
||||
public:
|
||||
Test_ICLAMP(TestSuite* suite) : ITestCase(suite, "iclamp") {}
|
||||
|
||||
int setup(uint32_t n, void* src1, void* src2) override {
|
||||
auto a = (int*)src1;
|
||||
auto b = (int*)src2;
|
||||
for (uint32_t i = 0; i < n; ++i) {
|
||||
a[i] = n/2 - i;
|
||||
b[i] = n/2 - i;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int verify(uint32_t n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (int*)src1;
|
||||
auto b = (int*)src2;
|
||||
auto c = (int*)dst;
|
||||
for (uint32_t i = 0; i < n; ++i) {
|
||||
auto ref = std::min(std::max(1, a[i]), b[i]);
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_TRIGO : public ITestCase {
|
||||
public:
|
||||
Test_TRIGO(TestSuite* suite) : ITestCase(suite, "trig") {}
|
||||
|
@ -863,6 +893,7 @@ TestSuite::TestSuite(vx_device_h device)
|
|||
this->add_test(new Test_ITOF(this));
|
||||
this->add_test(new Test_UTOF(this));
|
||||
this->add_test(new Test_FCLAMP(this));
|
||||
this->add_test(new Test_ICLAMP(this));
|
||||
this->add_test(new Test_TRIGO(this));
|
||||
this->add_test(new Test_BAR(this));
|
||||
this->add_test(new Test_GBAR(this));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue