simx instruction decode refactoring
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (cupbop, 32) (push) Blocked by required conditions
CI / tests (cupbop, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (tensor, 32) (push) Blocked by required conditions
CI / tests (tensor, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions

This commit is contained in:
tinebp 2025-06-15 14:24:53 -07:00
parent 8e16302da2
commit 2f2a2ed886
18 changed files with 3049 additions and 2601 deletions

View file

@ -211,12 +211,12 @@ package VX_gpu_pkg;
///////////////////////////////////////////////////////////////////////////
localparam INST_BR_EQ = 4'b0000;
localparam INST_BR_NE = 4'b0010;
localparam INST_BR_LTU = 4'b0100;
localparam INST_BR_GEU = 4'b0110;
localparam INST_BR_LT = 4'b0101;
localparam INST_BR_GE = 4'b0111;
localparam INST_BR_BEQ = 4'b0000;
localparam INST_BR_BNE = 4'b0010;
localparam INST_BR_BLTU = 4'b0100;
localparam INST_BR_BGEU = 4'b0110;
localparam INST_BR_BLT = 4'b0101;
localparam INST_BR_BGE = 4'b0111;
localparam INST_BR_JAL = 4'b1000;
localparam INST_BR_JALR = 4'b1001;
localparam INST_BR_ECALL = 4'b1010;
@ -923,12 +923,12 @@ package VX_gpu_pkg;
end
ALU_TYPE_BRANCH: begin
case (INST_BR_BITS'(op_type))
INST_BR_EQ: `TRACE(level, ("BEQ"))
INST_BR_NE: `TRACE(level, ("BNE"))
INST_BR_LT: `TRACE(level, ("BLT"))
INST_BR_GE: `TRACE(level, ("BGE"))
INST_BR_LTU: `TRACE(level, ("BLTU"))
INST_BR_GEU: `TRACE(level, ("BGEU"))
INST_BR_BEQ: `TRACE(level, ("BEQ"))
INST_BR_BNE: `TRACE(level, ("BNE"))
INST_BR_BLT: `TRACE(level, ("BLT"))
INST_BR_BGE: `TRACE(level, ("BGE"))
INST_BR_BLTU: `TRACE(level, ("BLTU"))
INST_BR_BGEU: `TRACE(level, ("BGEU"))
INST_BR_JAL: `TRACE(level, ("JAL"))
INST_BR_JALR: `TRACE(level, ("JALR"))
INST_BR_ECALL: `TRACE(level, ("ECALL"))

View file

@ -102,12 +102,12 @@ module VX_decode import VX_gpu_pkg::*; #(
reg [INST_BR_BITS-1:0] b_type;
always @(*) begin
case (funct3)
3'h0: b_type = INST_BR_EQ;
3'h1: b_type = INST_BR_NE;
3'h4: b_type = INST_BR_LT;
3'h5: b_type = INST_BR_GE;
3'h6: b_type = INST_BR_LTU;
3'h7: b_type = INST_BR_GEU;
3'h0: b_type = INST_BR_BEQ;
3'h1: b_type = INST_BR_BNE;
3'h4: b_type = INST_BR_BLT;
3'h5: b_type = INST_BR_BGE;
3'h6: b_type = INST_BR_BLTU;
3'h7: b_type = INST_BR_BGEU;
default: b_type = 'x;
endcase
end
@ -375,8 +375,7 @@ module VX_decode import VX_gpu_pkg::*; #(
INST_FCI: begin
ex_type = EX_FPU;
op_args.fpu.frm = funct3;
op_args.fpu.fmt[0] = funct2[0]; // float / double
op_args.fpu.fmt[1] = rs2[1]; // int32 / int64
op_args.fpu.fmt = funct2;
case (funct5)
5'b00000, // FADD
5'b00001, // FSUB

View file

@ -13,32 +13,33 @@
#pragma once
#include <cstdint>
#include <algorithm>
#include <array>
#include <utility>
#include <string>
#include <assert.h>
#include <bitmanip.h>
#include <cstdint>
#include <string>
#include <utility>
#include <variant>
namespace vortex {
template <typename... Args>
void unused(Args&&...) {}
void unused(Args &&...) {}
#define __unused(...) unused(__VA_ARGS__)
#define __assert(cond, msg) \
if (!(cond)) { \
#define __assert(cond, msg) \
if (!(cond)) { \
std::cerr << "Assertion failed: " << msg << "\n"; \
std::cerr << "File: " << __FILE__ << "\n"; \
std::cerr << "Line: " << __LINE__ << "\n"; \
std::cerr << "Function: " << __func__ << "\n"; \
std::abort(); \
std::cerr << "File: " << __FILE__ << "\n"; \
std::cerr << "Line: " << __LINE__ << "\n"; \
std::cerr << "Function: " << __func__ << "\n"; \
std::abort(); \
}
// return file extension
const char* fileExtension(const char* filepath);
const char *fileExtension(const char *filepath);
#if defined(_MSC_VER)
#define DISABLE_WARNING_PUSH __pragma(warning(push))
@ -98,7 +99,7 @@ template <typename R, size_t W>
class VDataCast<R, W, typename std::enable_if<(W > 8)>::type> {
public:
template <typename T>
static R get(T& obj) {
static R get(T &obj) {
return reinterpret_cast<R>(obj.data());
}
};
@ -106,21 +107,40 @@ template <typename R, size_t W>
class VDataCast<R, W, typename std::enable_if<(W <= 8)>::type> {
public:
template <typename T>
static R get(T& obj) {
static R get(T &obj) {
return reinterpret_cast<R>(&obj);
}
};
template <typename T, std::size_t N, typename... Args, std::size_t... Is>
constexpr std::array<T, N> make_array_impl(std::index_sequence<Is...>, Args&&... args) {
return { { (static_cast<void>(Is), T(std::forward<Args>(args)...))... } };
constexpr std::array<T, N> make_array_impl(std::index_sequence<Is...>, Args &&...args) {
return {{(static_cast<void>(Is), T(std::forward<Args>(args)...))...}};
}
template <typename T, std::size_t N, typename... Args>
constexpr std::array<T, N> make_array(Args&&... args) {
constexpr std::array<T, N> make_array(Args &&...args) {
return make_array_impl<T, N>(std::make_index_sequence<N>{}, std::forward<Args>(args)...);
}
std::string resolve_file_path(const std::string& filename, const std::string& searchPaths);
// visit_var(variant, f1, f2, f3, ...)
// - deduces a closure type that inherits all your lambdas
// - forwards them into std::visit
// - works in C++17 without any extra global templates
template <typename Variant, typename... Fs>
auto visit_var(Variant &&var, Fs &&...fs) {
// define a local visitor type that inherits all your lambdas
struct Visitor : std::decay_t<Fs>... {
// inherit ctors
Visitor(Fs &&...f) : std::decay_t<Fs>(std::forward<Fs>(f))... {}
// pull in operator() into this scope
using std::decay_t<Fs>::operator()...;
};
}
return std::visit(
Visitor{std::forward<Fs>(fs)...},
std::forward<Variant>(var));
}
std::string resolve_file_path(const std::string &filename, const std::string &searchPaths);
} // namespace vortex

View file

@ -100,7 +100,7 @@ PROJECT := libopae-c-sim.so
all: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/vortex.xml:
$(DESTDIR)/vortex.xml: $(CONFIG_FILE)
verilator --xml-only -O0 $(VL_FLAGS) $(TOP) --xml-output $@
$(DESTDIR)/scope.json: $(DESTDIR)/vortex.xml

View file

@ -337,17 +337,10 @@ void Core::issue() {
case FUType::LSU: ++perf_stats_.scrb_lsu; break;
case FUType::SFU: {
++perf_stats_.scrb_sfu;
switch (use.sfu_type) {
case SfuType::TMC:
case SfuType::WSPAWN:
case SfuType::SPLIT:
case SfuType::JOIN:
case SfuType::BAR:
case SfuType::PRED: ++perf_stats_.scrb_wctl; break;
case SfuType::CSRRW:
case SfuType::CSRRS:
case SfuType::CSRRC: ++perf_stats_.scrb_csrs; break;
default: assert(false);
if (std::get_if<WctlType>(&use.op_type)) {
++perf_stats_.scrb_wctl;
} else if (std::get_if<CsrType>(&use.op_type)) {
++perf_stats_.scrb_csrs;
}
} break;
#ifdef EXT_TPU_ENABLE
@ -432,8 +425,9 @@ void Core::commit() {
if (pending_instrs_.size() != orig_size) {
perf_stats_.instrs += trace->tmask.count();
#ifdef EXT_V_ENABLE
if (trace->fu_type == FUType::VPU
|| (trace->fu_type == FUType::LSU && (trace->lsu_type == LsuType::VLOAD || trace->lsu_type == LsuType::VSTORE))) {
if (std::get_if<VsetType>(&trace->op_type)
|| std::get_if<VlsType>(&trace->op_type)
|| std::get_if<VopType>(&trace->op_type)) {
perf_stats_.vinstrs += trace->tmask.count();
}
#endif

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -34,23 +34,65 @@ void AluUnit::tick() {
continue;
auto& output = Outputs.at(iw);
auto trace = input.front();
int delay = 2;
switch (trace->alu_type) {
case AluType::ARITH:
case AluType::BRANCH:
case AluType::SYSCALL:
output.push(trace, 2+delay);
break;
case AluType::IMUL:
output.push(trace, LATENCY_IMUL+delay);
break;
case AluType::IDIV:
output.push(trace, XLEN+delay);
break;
default:
int delay = 0;
if (std::get_if<AluType>(&trace->op_type)) {
auto alu_type = std::get<AluType>(trace->op_type);
switch (alu_type) {
case AluType::LUI:
case AluType::AUIPC:
case AluType::ADD:
case AluType::SUB:
case AluType::SLL:
case AluType::SRL:
case AluType::SRA:
case AluType::SLT:
case AluType::SLTU:
case AluType::XOR:
case AluType::AND:
case AluType::OR:
case AluType::CZERO:
delay = 2;
break;
default:
std::abort();
}
DT(3, this->name() << ": op=" << alu_type << ", " << *trace);
} else if (std:: get_if<BrType>(&trace->op_type)) {
auto br_type = std::get<BrType>(trace->op_type);
switch (br_type) {
case BrType::BR:
case BrType::JAL:
case BrType::JALR:
case BrType::SYS:
delay = 2;
break;
default:
std::abort();
}
DT(3, this->name() << ": op=" << br_type << ", " << *trace);
} else if (std::get_if<MdvType>(&trace->op_type)) {
auto mdv_type = std::get<MdvType>(trace->op_type);
switch (mdv_type) {
case MdvType::MUL:
case MdvType::MULHU:
case MdvType::MULH:
case MdvType::MULHSU:
delay = LATENCY_IMUL+2;
break;
case MdvType::DIV:
case MdvType::DIVU:
case MdvType::REM:
case MdvType::REMU:
delay = XLEN+2;
break;
default:
std::abort();
}
DT(3, this->name() << ": op=" << mdv_type << ", " << *trace);
} else {
std::abort();
}
DT(3, this->name() << ": op=" << trace->alu_type << ", " << *trace);
output.push(trace, delay);
if (trace->eop && trace->fetch_stall) {
core_->resume(trace->wid);
}
@ -69,12 +111,23 @@ void FpuUnit::tick() {
continue;
auto& output = Outputs.at(iw);
auto trace = input.front();
auto fpu_type = std::get<FpuType>(trace->op_type);
int delay = 2;
switch (trace->fpu_type) {
case FpuType::FNCP:
switch (fpu_type) {
case FpuType::FCMP:
case FpuType::FSGNJ:
case FpuType::FCLASS:
case FpuType::FMV:
case FpuType::FMINMAX:
output.push(trace, 2+delay);
break;
case FpuType::FMA:
case FpuType::FADD:
case FpuType::FSUB:
case FpuType::FMUL:
case FpuType::FMADD:
case FpuType::FMSUB:
case FpuType::FNMADD:
case FpuType::FNMSUB:
output.push(trace, LATENCY_FMA+delay);
break;
case FpuType::FDIV:
@ -83,13 +136,15 @@ void FpuUnit::tick() {
case FpuType::FSQRT:
output.push(trace, LATENCY_FSQRT+delay);
break;
case FpuType::FCVT:
case FpuType::F2I:
case FpuType::I2F:
case FpuType::F2F:
output.push(trace, LATENCY_FCVT+delay);
break;
default:
std::abort();
}
DT(3,this->name() << ": op=" << trace->fpu_type << ", " << *trace);
DT(3,this->name() << ": op=" << fpu_type << ", " << *trace);
input.pop();
}
}
@ -158,9 +213,29 @@ void LsuUnit::tick() {
if (input.empty())
continue;
auto trace = input.front();
bool is_fence = false;
bool is_write = false;
if (trace->lsu_type == LsuType::FENCE) {
auto trace = input.front();
if (std::get_if<LsuType>(&trace->op_type)) {
auto lsu_type = std::get<LsuType>(trace->op_type);
is_fence = (lsu_type == LsuType::FENCE);
is_write = (lsu_type == LsuType::STORE);
} else if (std::get_if<AmoType>(&trace->op_type)) {
auto amp_type = std::get<AmoType>(trace->op_type);
is_write = (amp_type != AmoType::LR);
}
#ifdef EXT_V_ENABLE
else if (std::get_if<VlsType>(&trace->op_type)) {
auto vls_type = std::get<VlsType>(trace->op_type);
is_write = (vls_type == VlsType::STORE);
}
#endif // EXT_V_ENABLE
else {
std::abort();
}
if (is_fence) {
// schedule fence lock
state.fence_trace = trace;
state.fence_lock = true;
@ -170,12 +245,6 @@ void LsuUnit::tick() {
continue;
}
bool is_write = (trace->lsu_type == LsuType::STORE)
#ifdef EXT_V_ENABLE
|| (trace->lsu_type == LsuType::VSTORE)
#endif
;
// check pending queue capacity
if (!is_write && state.pending_rd_reqs.full()) {
if (!trace->log_once(true)) {
@ -190,7 +259,7 @@ void LsuUnit::tick() {
pending_addrs_.clear();
if (trace->data) {
#ifdef EXT_V_ENABLE
if (trace->lsu_type == LsuType::VLOAD || trace->lsu_type == LsuType::VSTORE) {
if (std::get_if<VlsType>(&trace->op_type)) {
auto trace_data = std::dynamic_pointer_cast<VecUnit::MemTraceData>(trace->data);
for (uint32_t t = 0; t < trace_data->mem_addrs.size(); ++t) {
if (!trace->tmask.test(t))
@ -275,38 +344,52 @@ void SfuUnit::tick() {
continue;
auto& output = Outputs.at(iw);
auto trace = input.front();
auto sfu_type = trace->sfu_type;
bool release_warp = trace->fetch_stall;
int delay = 2;
switch (sfu_type) {
case SfuType::WSPAWN:
output.push(trace, 2+delay);
if (trace->eop) {
auto trace_data = std::dynamic_pointer_cast<SfuTraceData>(trace->data);
release_warp = core_->wspawn(trace_data->arg1, trace_data->arg2);
if (std::get_if<WctlType>(&trace->op_type)) {
auto wctl_type = std::get<WctlType>(trace->op_type);
switch (wctl_type) {
case WctlType::WSPAWN:
output.push(trace, 2+delay);
if (trace->eop) {
auto trace_data = std::dynamic_pointer_cast<SfuTraceData>(trace->data);
release_warp = core_->wspawn(trace_data->arg1, trace_data->arg2);
}
break;
case WctlType::TMC:
case WctlType::SPLIT:
case WctlType::JOIN:
case WctlType::PRED:
output.push(trace, 2+delay);
break;
case WctlType::BAR: {
output.push(trace, 2+delay);
if (trace->eop) {
auto trace_data = std::dynamic_pointer_cast<SfuTraceData>(trace->data);
release_warp = core_->barrier(trace_data->arg1, trace_data->arg2, trace->wid);
}
} break;
default:
std::abort();
}
break;
case SfuType::TMC:
case SfuType::SPLIT:
case SfuType::JOIN:
case SfuType::PRED:
case SfuType::CSRRW:
case SfuType::CSRRS:
case SfuType::CSRRC:
output.push(trace, 2+delay);
break;
case SfuType::BAR: {
output.push(trace, 2+delay);
if (trace->eop) {
auto trace_data = std::dynamic_pointer_cast<SfuTraceData>(trace->data);
release_warp = core_->barrier(trace_data->arg1, trace_data->arg2, trace->wid);
DT(3, this->name() << ": op=" << wctl_type << ", " << *trace);
} else if (std::get_if<CsrType>(&trace->op_type)) {
auto csr_type = std::get<CsrType>(trace->op_type);
switch (csr_type) {
case CsrType::CSRRW:
case CsrType::CSRRS:
case CsrType::CSRRC:
output.push(trace, 2+delay);
break;
default:
std::abort();
}
} break;
default:
DT(3, this->name() << ": op=" << csr_type << ", " << *trace);
} else {
std::abort();
}
DT(3, this->name() << ": op=" << trace->sfu_type << ", " << *trace);
if (trace->eop && release_warp) {
core_->resume(trace->wid);
}
@ -317,25 +400,6 @@ void SfuUnit::tick() {
///////////////////////////////////////////////////////////////////////////////
#ifdef EXT_TPU_ENABLE
TpuUnit::TpuUnit(const SimContext& ctx, Core* core)
: FuncUnit(ctx, core, "tpu-unit")
{
// bind tensor unit
for (uint32_t iw = 0; iw < ISSUE_WIDTH; ++iw) {
this->Inputs.at(iw).bind(&core_->tensor_unit()->Inputs.at(iw));
core_->tensor_unit()->Outputs.at(iw).bind(&this->Outputs.at(iw));
}
}
void TpuUnit::tick() {
// use tensor_unit
}
#endif
///////////////////////////////////////////////////////////////////////////////
#ifdef EXT_V_ENABLE
VpuUnit::VpuUnit(const SimContext& ctx, Core* core)
@ -352,3 +416,22 @@ void VpuUnit::tick() {
// use vec_unit
}
#endif
///////////////////////////////////////////////////////////////////////////////
#ifdef EXT_TPU_ENABLE
TpuUnit::TpuUnit(const SimContext& ctx, Core* core)
: FuncUnit(ctx, core, "tpu-unit")
{
// bind tensor unit
for (uint32_t iw = 0; iw < ISSUE_WIDTH; ++iw) {
this->Inputs.at(iw).bind(&core_->tensor_unit()->Inputs.at(iw));
core_->tensor_unit()->Outputs.at(iw).bind(&this->Outputs.at(iw));
}
}
void TpuUnit::tick() {
// use tensor_unit
}
#endif

View file

@ -17,39 +17,39 @@
namespace vortex {
enum class Opcode {
NONE = 0,
R = 0x33,
L = 0x3,
I = 0x13,
S = 0x23,
B = 0x63,
LUI = 0x37,
AUIPC = 0x17,
JAL = 0x6f,
JALR = 0x67,
SYS = 0x73,
FENCE = 0x0f,
AMO = 0x2f,
enum class Opcode : uint8_t {
NONE = 0b0000000,
R = 0b0110011,
L = 0b0000011,
I = 0b0010011,
S = 0b0100011,
B = 0b1100011,
LUI = 0b0110111,
AUIPC = 0b0010111,
JAL = 0b1101111,
JALR = 0b1100111,
SYS = 0b1110011,
FENCE = 0b0001111,
AMO = 0b0101111,
// F Extension
FL = 0x7,
FS = 0x27,
FCI = 0x53,
FMADD = 0x43,
FMSUB = 0x47,
FMNMSUB = 0x4b,
FMNMADD = 0x4f,
FL = 0b0000111,
FS = 0b0100111,
FCI = 0b1010011,
FMADD = 0b1000011,
FMSUB = 0b1000111,
FNMSUB = 0b1001011,
FNMADD = 0b1001111,
// RV64 Standard Extension
R_W = 0x3b,
I_W = 0x1b,
R_W = 0b0111011,
I_W = 0b0011011,
// Vector Extension
VSET = 0x57,
VSET = 0b1010111,
// Custom Extensions
EXT1 = 0x0b,
EXT2 = 0x2b,
EXT3 = 0x5b,
EXT4 = 0x7b
};
EXT1 = 0b0001011,
EXT2 = 0b0101011,
EXT3 = 0b1011011,
EXT4 = 0b1111011
};;
enum class InstType {
R,
@ -67,17 +67,20 @@ enum DecodeConstants {
width_reg = 5,
width_funct2= 2,
width_funct3= 3,
width_funct5= 5,
width_funct6= 6,
width_funct7= 7,
width_mop = 3,
width_vmask = 1,
width_i_imm = 12,
width_j_imm = 20,
width_v_zimm= 11,
width_v_ma = 1,
width_v_ta = 1,
width_v_sew = 3,
width_v_lmul= 3,
width_vmop = 2,
width_vmew = 1,
width_vnf = 3,
width_vm = 1,
width_vzimm = 11,
width_vma = 1,
width_vta = 1,
width_vsew = 3,
width_vlmul = 3,
width_aq = 1,
width_rl = 1,
@ -87,59 +90,66 @@ enum DecodeConstants {
shift_rs1 = shift_funct3 + width_funct3,
shift_rs2 = shift_rs1 + width_reg,
shift_funct2= shift_rs2 + width_reg,
shift_funct7= shift_rs2 + width_reg,
shift_funct5= shift_funct2 + width_funct2,
shift_funct7= shift_funct2,
shift_rl = shift_funct2,
shift_aq = shift_rl + width_rl,
shift_rs3 = shift_funct7 + width_funct2,
shift_vmop = shift_funct7 + width_vmask,
shift_vnf = shift_vmop + width_mop,
shift_funct6= shift_funct7 + width_vmask,
shift_vm = shift_funct7,
shift_vmop = shift_funct7 + width_vm,
shift_vmew = shift_vmop + width_vmop,
shift_vnf = shift_vmew + width_vmew,
shift_funct6= shift_funct7 + width_vm,
shift_vset = shift_funct7 + width_funct6,
shift_v_sew = width_v_lmul,
shift_v_ta = shift_v_sew + width_v_sew,
shift_v_ma = shift_v_ta + width_v_ta,
shift_vsew = width_vlmul,
shift_vta = shift_vsew + width_vsew,
shift_vma = shift_vta + width_vta,
shift_vzimm = shift_rs2,
mask_opcode = (1 << width_opcode) - 1,
mask_reg = (1 << width_reg) - 1,
mask_funct2 = (1 << width_funct2) - 1,
mask_funct3 = (1 << width_funct3) - 1,
mask_funct5 = (1 << width_funct5) - 1,
mask_funct6 = (1 << width_funct6) - 1,
mask_funct7 = (1 << width_funct7) - 1,
mask_aq = (1 << width_aq) - 1,
mask_rl = (1 << width_rl) - 1,
mask_i_imm = (1 << width_i_imm) - 1,
mask_j_imm = (1 << width_j_imm) - 1,
mask_v_zimm = (1 << width_v_zimm) - 1,
mask_v_ma = (1 << width_v_ma) - 1,
mask_v_ta = (1 << width_v_ta) - 1,
mask_v_sew = (1 << width_v_sew) - 1,
mask_v_lmul = (1 << width_v_lmul) - 1,
};
enum VectorAttrMask {
vattr_vlswidth = (1 << 0),
vattr_vmop = (1 << 1),
vattr_vumop = (1 << 2),
vattr_vnf = (1 << 3),
vattr_vmask = (1 << 4),
vattr_vs3 = (1 << 5),
vattr_zimm = (1 << 6),
vattr_vediv = (1 << 7)
mask_vmop = (1 << width_vmop) - 1,
mask_vmew = (1 << width_vmew) - 1,
mask_vnf = (1 << width_vnf) - 1,
mask_vm = (1 << width_vm) - 1,
mask_vzimm = (1 << width_vzimm) - 1,
mask_vma = (1 << width_vma) - 1,
mask_vta = (1 << width_vta) - 1,
mask_vsew = (1 << width_vsew) - 1,
mask_vlmul = (1 << width_vlmul) - 1,
};
class Instr {
public:
using Ptr = std::shared_ptr<Instr>;
Instr(Opcode opcode = Opcode::NONE)
: opcode_(opcode)
, num_rsrcs_(0)
, has_imm_(false)
, imm_(0)
, funct2_(0)
, funct3_(0)
, funct6_(0)
, funct7_(0)
enum {
MAX_REG_SOURCES = 3
};
Instr(FUType fu_type = FUType::ALU)
: fu_type_(fu_type)
{}
void setOpcode(Opcode opcode) {
opcode_ = opcode;
void setFUType(FUType fu_type) {
fu_type_ = fu_type;
}
template <typename T> void setOpType(T op_type) {
op_type_ = static_cast<T>(op_type);
}
template <typename T> void setArgs(T args) {
args_ = static_cast<T>(args);
}
void setDestReg(uint32_t destReg, RegType type) {
@ -148,83 +158,25 @@ public:
void setSrcReg(uint32_t index, uint32_t srcReg, RegType type) {
rsrc_[index] = { type, srcReg};
num_rsrcs_ = std::max<uint32_t>(num_rsrcs_, index+1);
}
void setImm(uint32_t imm) { has_imm_ = true; imm_ = imm; }
FUType getFUType() const { return fu_type_; }
void setfunct2(uint32_t funct2) { funct2_ = funct2; }
void setfunct3(uint32_t funct3) { funct3_ = funct3; }
void setfunct6(uint32_t funct6) { funct6_ = funct6; }
void setfunct7(uint32_t funct7) { funct7_ = funct7; }
OpType getOpType() const { return op_type_; }
Opcode getOpcode() const { return opcode_; }
const IntrArgs& getArgs() const { return args_; }
uint32_t getNumSrcRegs() const { return num_rsrcs_; }
RegOpd getSrcReg(uint32_t i) const { return rsrc_[i]; }
RegOpd getSrcReg(uint32_t i) const { return rsrc_[i]; }
RegOpd getDestReg() const { return rdest_; }
bool hasImm() const { return has_imm_; }
uint32_t getImm() const { return imm_; }
uint32_t getFunct2() const { return funct2_; }
uint32_t getFunct3() const { return funct3_; }
uint32_t getFunct6() const { return funct6_; }
uint32_t getFunct7() const { return funct7_; }
#ifdef EXT_V_ENABLE
// Attributes for Vector instructions
void setVlsWidth(uint32_t width) { vlsWidth_ = width; vattr_mask_ |= vattr_vlswidth; }
void setVmop(uint32_t mop) { vmop_ = mop; vattr_mask_ |= vattr_vmop; }
void setVumop(uint32_t umop) { vumop_ = umop; vattr_mask_ |= vattr_vumop; }
void setVnf(uint32_t nf) { vnf_ = nf; vattr_mask_ |= vattr_vnf; }
void setVmask(uint32_t vmask) { vmask_ = vmask; vattr_mask_ |= vattr_vmask; }
void setVs3(uint32_t vs) { vs3_ = vs; vattr_mask_ |= vattr_vs3; }
void setZimm(uint32_t zimm) { zimm_ = zimm; vattr_mask_ |= vattr_zimm; }
void setVediv(uint32_t ediv) { vediv_ = 1 << ediv; vattr_mask_ |= vattr_vediv; }
uint32_t getVlsWidth() const { return vlsWidth_; }
uint32_t getVmop() const { return vmop_; }
uint32_t getVumop() const { return vumop_; }
uint32_t getVnf() const { return vnf_; }
uint32_t getVmask() const { return vmask_; }
uint32_t getVs3() const { return vs3_; }
uint32_t getZimm() const { return zimm_; }
uint32_t getVediv() const { return vediv_; }
uint32_t getVattrMask() const { return vattr_mask_; }
bool hasVattrMask(VectorAttrMask mask) const { return vattr_mask_ & mask; }
#endif
RegOpd getDestReg() const { return rdest_; }
private:
enum {
MAX_REG_SOURCES = 3
};
Opcode opcode_;
uint32_t num_rsrcs_;
bool has_imm_;
FUType fu_type_;
OpType op_type_;
IntrArgs args_;
RegOpd rsrc_[MAX_REG_SOURCES];
RegOpd rdest_;
uint32_t imm_;
uint32_t funct2_;
uint32_t funct3_;
uint32_t funct6_;
uint32_t funct7_;
#ifdef EXT_V_ENABLE
// Vector
uint32_t vmask_ = 0;
uint32_t vlsWidth_ = 0;
uint32_t vmop_ = 0;
uint32_t vumop_ = 0;
uint32_t vnf_ = 0;
uint32_t vs3_ = 0;
uint32_t zimm_ = 0;
uint32_t vediv_ = 0;
uint32_t vattr_mask_ = 0;
#endif
friend std::ostream &operator<<(std::ostream &, const Instr&);
};

View file

@ -66,19 +66,7 @@ public:
FUType fu_type;
//--
union {
uint32_t unit_type;
LsuType lsu_type;
AluType alu_type;
FpuType fpu_type;
SfuType sfu_type;
#ifdef EXT_TPU_ENABLE
TpuType tpu_type;
#endif
#ifdef EXT_V_ENABLE
VpuType vpu_type;
#endif
};
OpType op_type;
ITraceData::Ptr data;
@ -101,7 +89,7 @@ public:
, dst_reg({RegType::None, 0})
, src_regs(NUM_SRC_REGS, {RegType::None, 0})
, fu_type(FUType::ALU)
, unit_type(0)
, op_type({})
, data(nullptr)
, pid(-1)
, sop(true)
@ -122,7 +110,7 @@ public:
, dst_reg(rhs.dst_reg)
, src_regs(rhs.src_regs)
, fu_type(rhs.fu_type)
, unit_type(rhs.unit_type)
, op_type(rhs.op_type)
, data(rhs.data)
, pid(rhs.pid)
, sop(rhs.sop)

View file

@ -26,7 +26,7 @@ public:
RegType reg_type;
uint32_t reg_id;
FUType fu_type;
SfuType sfu_type;
OpType op_type;
uint64_t uuid;
};
@ -71,7 +71,7 @@ public:
if (in_use_regs_.at(trace->wid).at((int)trace->dst_reg.type).test(trace->dst_reg.idx)) {
uint32_t reg_id = get_reg_id(trace->dst_reg, trace->wid);
auto owner = owners_.at(reg_id);
out.push_back({trace->dst_reg.type, trace->dst_reg.idx, owner->fu_type, owner->sfu_type, owner->uuid});
out.push_back({trace->dst_reg.type, trace->dst_reg.idx, owner->fu_type, owner->op_type, owner->uuid});
}
}
for (uint32_t i = 0; i < trace->src_regs.size(); ++i) {
@ -79,7 +79,7 @@ public:
if (in_use_regs_.at(trace->wid).at((int)trace->src_regs[i].type).test(trace->src_regs[i].idx)) {
uint32_t reg_id = get_reg_id(trace->src_regs[i], trace->wid);
auto owner = owners_.at(reg_id);
out.push_back({trace->src_regs[i].type, trace->src_regs[i].idx, owner->fu_type, owner->sfu_type, owner->uuid});
out.push_back({trace->src_regs[i].type, trace->src_regs[i].idx, owner->fu_type, owner->op_type, owner->uuid});
}
}
}

View file

@ -201,8 +201,9 @@ public:
if (input.empty())
return;
auto trace = input.front();
auto tpu_type = std::get<TpuType>(trace->op_type);
int delay = 0;
switch (trace->tpu_type) {
switch (tpu_type) {
case TpuType::WMMA:
delay = 4;
break;
@ -210,7 +211,7 @@ public:
std::abort();
}
simobject_->Outputs.at(iw).push(trace, 2 + delay);
DT(3, simobject_->name() << ": op=" << trace->tpu_type << ", " << *trace);
DT(3, simobject_->name() << ": op=" << tpu_type << ", " << *trace);
input.pop();
}
}
@ -226,12 +227,12 @@ public:
__unused(wid);
__unused(trace_data);
uint32_t fmt_d = fmt >> 4;
uint32_t fmt_s = fmt & 0xf;
uint32_t fmt_d = fmt >> 4;
auto fedp = select_FEDP(fmt_s, fmt_d);
uint32_t m = step >> 4;
uint32_t n = step & 0xf;
uint32_t m = step & 0xf;
uint32_t n = step >> 4;
uint32_t a_off = (m % cfg::a_sub_blocks) * cfg::a_block_size;
uint32_t b_off = (n % cfg::b_sub_blocks) * cfg::b_block_size;

View file

@ -18,6 +18,7 @@
#include <queue>
#include <vector>
#include <unordered_map>
#include <variant>
#include <util.h>
#include <stringutil.h>
#include <VX_config.h>
@ -31,6 +32,7 @@
namespace vortex {
typedef uint8_t Byte;
#if (XLEN == 32)
typedef uint32_t Word;
typedef int32_t WordI;
@ -118,11 +120,11 @@ enum class FUType {
LSU,
FPU,
SFU,
#ifdef EXT_TPU_ENABLE
TPU,
#endif
#ifdef EXT_V_ENABLE
VPU,
#endif
#ifdef EXT_TPU_ENABLE
TPU,
#endif
Count
};
@ -133,13 +135,14 @@ inline std::ostream &operator<<(std::ostream &os, const FUType& type) {
case FUType::LSU: os << "LSU"; break;
case FUType::FPU: os << "FPU"; break;
case FUType::SFU: os << "SFU"; break;
#ifdef EXT_TPU_ENABLE
case FUType::TPU: os << "TPU"; break;
#endif
#ifdef EXT_V_ENABLE
case FUType::VPU: os << "VPU"; break;
#endif
default: assert(false);
#ifdef EXT_TPU_ENABLE
case FUType::TPU: os << "TPU"; break;
#endif
default:
assert(false);
}
return os;
}
@ -147,21 +150,103 @@ inline std::ostream &operator<<(std::ostream &os, const FUType& type) {
///////////////////////////////////////////////////////////////////////////////
enum class AluType {
ARITH,
BRANCH,
SYSCALL,
IMUL,
IDIV
LUI,
AUIPC,
ADD,
SUB,
SLL,
SRL,
SRA,
SLT,
SLTU,
AND,
OR,
XOR,
CZERO
};
struct IntrAluArgs {
uint32_t is_imm : 1;
uint32_t is_w : 1;
uint32_t imm;
};
inline std::ostream &operator<<(std::ostream &os, const AluType& type) {
switch (type) {
case AluType::ARITH: os << "ARITH"; break;
case AluType::BRANCH: os << "BRANCH"; break;
case AluType::SYSCALL: os << "SYSCALL"; break;
case AluType::IMUL: os << "IMUL"; break;
case AluType::IDIV: os << "IDIV"; break;
default: assert(false);
case AluType::LUI: os << "LUI"; break;
case AluType::AUIPC: os << "AUIPC"; break;
case AluType::ADD: os << "ADD"; break;
case AluType::SUB: os << "SUB"; break;
case AluType::SLL: os << "SLL"; break;
case AluType::SRL: os << "SRL"; break;
case AluType::SRA: os << "SRA"; break;
case AluType::SLT: os << "SLT"; break;
case AluType::SLTU: os << "SLTU"; break;
case AluType::AND: os << "AND"; break;
case AluType::OR: os << "OR"; break;
case AluType::XOR: os << "XOR"; break;
case AluType::CZERO: os << "CZERO"; break;
default:
assert(false);
}
return os;
}
///////////////////////////////////////////////////////////////////////////////
enum class BrType {
BR,
JAL,
JALR,
SYS,
};
struct IntrBrArgs {
uint32_t cmp : 3;
uint32_t offset;
};
inline std::ostream &operator<<(std::ostream &os, const BrType& type) {
switch (type) {
case BrType::BR: os << "BR"; break;
case BrType::JAL: os << "JAL"; break;
case BrType::JALR: os << "JALR"; break;
case BrType::SYS: os << "SYS"; break;
default:
assert(false);
}
return os;
}
///////////////////////////////////////////////////////////////////////////////
enum class MdvType {
MUL,
MULHU,
MULH,
MULHSU,
DIV,
DIVU,
REM,
REMU
};
struct IntrMdvArgs {
uint32_t is_w : 1;
};
inline std::ostream &operator<<(std::ostream &os, const MdvType& type) {
switch (type) {
case MdvType::MUL: os << "MUL"; break;
case MdvType::MULHU: os << "MULHU"; break;
case MdvType::MULH: os << "MULH"; break;
case MdvType::MULHSU: os << "MULHSU"; break;
case MdvType::DIV: os << "DIV"; break;
case MdvType::DIVU: os << "DIVU"; break;
case MdvType::REM: os << "REM"; break;
case MdvType::REMU: os << "REMU"; break;
default:
assert(false);
}
return os;
}
@ -171,29 +256,360 @@ inline std::ostream &operator<<(std::ostream &os, const AluType& type) {
enum class LsuType {
LOAD,
STORE,
#ifdef EXT_V_ENABLE
VLOAD,
VSTORE,
#endif
FENCE
};
struct IntrLsuArgs {
uint32_t width : 3;
uint32_t is_float : 1;
uint32_t offset;
};
inline std::ostream &operator<<(std::ostream &os, const LsuType& type) {
switch (type) {
case LsuType::LOAD: os << "LOAD"; break;
case LsuType::STORE: os << "STORE"; break;
#ifdef EXT_V_ENABLE
case LsuType::VLOAD: os << "VLOAD"; break;
case LsuType::VSTORE:os << "VSTORE"; break;
#endif
case LsuType::FENCE: os << "FENCE"; break;
default: assert(false);
case LsuType::LOAD: os << "LOAD"; break;
case LsuType::STORE: os << "STORE"; break;
case LsuType::FENCE: os << "FENCE"; break;
default:
assert(false);
}
return os;
}
///////////////////////////////////////////////////////////////////////////////
enum class AmoType {
LR,
SC,
AMOADD,
AMOSWAP,
AMOAND,
AMOOR,
AMOXOR,
AMOMIN,
AMOMAX,
AMOMINU,
AMOMAXU
};
struct IntrAmoArgs {
uint32_t width : 3;
uint32_t aq : 1;
uint32_t rl : 1;
};
inline std::ostream &operator<<(std::ostream &os, const AmoType& type) {
switch (type) {
case AmoType::LR: os << "LR"; break;
case AmoType::SC: os << "SC"; break;
case AmoType::AMOADD: os << "AMOADD"; break;
case AmoType::AMOSWAP: os << "AMOSWAP"; break;
case AmoType::AMOAND: os << "AMOAND"; break;
case AmoType::AMOOR: os << "AMOOR"; break;
case AmoType::AMOXOR: os << "AMOXOR"; break;
case AmoType::AMOMIN: os << "AMOMIN"; break;
case AmoType::AMOMAX: os << "AMOMAX"; break;
case AmoType::AMOMINU: os << "AMOMINU"; break;
case AmoType::AMOMAXU: os << "AMOMAXU"; break;
default:
assert(false);
}
return os;
}
///////////////////////////////////////////////////////////////////////////////
enum class FpuType {
FADD,
FSUB,
FMUL,
FDIV,
FSQRT,
FMADD,
FMSUB,
FNMADD,
FNMSUB,
F2I,
I2F,
F2F,
FCMP,
FSGNJ,
FCLASS,
FMV,
FMINMAX,
};
struct IntrFpuArgs {
uint32_t frm : 3;
uint32_t fmt : 2;
uint32_t is_f64 : 1;
};
inline std::ostream &operator<<(std::ostream &os, const FpuType& type) {
switch (type) {
case FpuType::FADD: os << "FADD"; break;
case FpuType::FSUB: os << "FSUB"; break;
case FpuType::FMUL: os << "FMUL"; break;
case FpuType::FDIV: os << "FDIV"; break;
case FpuType::FSQRT: os << "FSQRT"; break;
case FpuType::FMADD: os << "FMADD"; break;
case FpuType::FMSUB: os << "FMSUB"; break;
case FpuType::FNMADD: os << "FNMADD"; break;
case FpuType::FNMSUB: os << "FNMSUB"; break;
case FpuType::F2I: os << "F2I"; break;
case FpuType::I2F: os << "I2F"; break;
case FpuType::F2F: os << "F2F"; break;
case FpuType::FCMP: os << "FCMP"; break;
case FpuType::FSGNJ: os << "FSGNJ"; break;
case FpuType::FCLASS: os << "FCLASS"; break;
case FpuType::FMV: os << "FMV"; break;
case FpuType::FMINMAX: os << "FMIN_MAX"; break;
default:
assert(false);
}
return os;
}
///////////////////////////////////////////////////////////////////////////////
enum class WctlType {
TMC,
WSPAWN,
SPLIT,
JOIN,
BAR,
PRED
};
struct IntrWctlArgs {
uint32_t is_neg : 1;
};
inline std::ostream &operator<<(std::ostream &os, const WctlType& type) {
switch (type) {
case WctlType::TMC: os << "TMC"; break;
case WctlType::WSPAWN: os << "WSPAWN"; break;
case WctlType::SPLIT: os << "SPLIT"; break;
case WctlType::JOIN: os << "JOIN"; break;
case WctlType::BAR: os << "BAR"; break;
case WctlType::PRED: os << "PRED"; break;
default:
assert(false);
}
return os;
}
///////////////////////////////////////////////////////////////////////////////
enum class CsrType {
CSRRW,
CSRRS,
CSRRC
};
struct IntrCsrArgs {
uint32_t is_imm: 1;
uint32_t imm : 5;
uint32_t csr : 12;
};
inline std::ostream &operator<<(std::ostream &os, const CsrType& type) {
switch (type) {
case CsrType::CSRRW: os << "CSRRW"; break;
case CsrType::CSRRS: os << "CSRRS"; break;
case CsrType::CSRRC: os << "CSRRC"; break;
default:
assert(false);
}
return os;
}
///////////////////////////////////////////////////////////////////////////////
enum class VsetType {
VSETVLI,
VSETIVLI,
VSETVL
};
struct IntrVsetArgs {
uint32_t zimm: 11;
uint32_t uimm: 5;
};
inline std::ostream &operator<<(std::ostream &os, const VsetType& type) {
switch (type) {
case VsetType::VSETVLI: os << "VSETVLI"; break;
case VsetType::VSETIVLI: os << "VSETIVLI"; break;
case VsetType::VSETVL: os << "VSETVL"; break;
default:
assert(false);
}
return os;
}
///////////////////////////////////////////////////////////////////////////////
enum class VlsType {
LOAD,
STORE
};
struct IntrVlsArgs {
uint32_t width:2;
uint32_t umop: 5;
uint32_t vm: 1;
uint32_t mop: 2;
uint32_t mew: 1;
uint32_t nf: 3;
};
inline std::ostream &operator<<(std::ostream &os, const VlsType& type) {
switch (type) {
case VlsType::LOAD: os << "LOAD"; break;
case VlsType::STORE: os << "STORE"; break;
default:
assert(false);
}
return os;
}
///////////////////////////////////////////////////////////////////////////////
enum class VopType {
OPIVV,
OPFVV,
OPMVV,
OPIVI,
OPIVX,
OPFVF,
OPMVX
};
struct IntrVopArgs {
uint32_t vm: 1;
uint32_t funct6: 6;
uint32_t imm: 5;
};
inline std::ostream &operator<<(std::ostream &os, const VopType& type) {
switch (type) {
case VopType::OPIVV: os << "OPIVV"; break;
case VopType::OPFVV: os << "OPFVV"; break;
case VopType::OPMVV: os << "OPMVV"; break;
case VopType::OPIVI: os << "OPIVI"; break;
case VopType::OPIVX: os << "OPIVX"; break;
case VopType::OPFVF: os << "OPFVF"; break;
case VopType::OPMVX: os << "OPMVX"; break;
default:
assert(false);
}
return os;
}
///////////////////////////////////////////////////////////////////////////////
enum class VpuOpType {
VSET = 0,
ARITH = 1,
IMUL = 2,
IDIV = 3,
FMA = 4,
FDIV = 5,
FSQRT = 6,
FCVT = 7,
FNCP = 8,
// reduction
ARITH_R = 9,
FMA_R = 10,
FNCP_R = 11
};
inline std::ostream &operator<<(std::ostream &os, const VpuOpType& type) {
switch (type) {
case VpuOpType::VSET: os << "VSET"; break;
case VpuOpType::ARITH: os << "ARITH"; break;
case VpuOpType::IMUL: os << "IMUL"; break;
case VpuOpType::IDIV: os << "IDIV"; break;
case VpuOpType::FMA: os << "FMA"; break;
case VpuOpType::FDIV: os << "FDIV"; break;
case VpuOpType::FSQRT: os << "FSQRT"; break;
case VpuOpType::FCVT: os << "FCVT"; break;
case VpuOpType::FNCP: os << "FNCP"; break;
case VpuOpType::ARITH_R: os << "ARITH_R"; break;
case VpuOpType::FMA_R: os << "FMA_R"; break;
case VpuOpType::FNCP_R: os << "FNCP_R"; break;
default:
assert(false);
}
return os;
}
///////////////////////////////////////////////////////////////////////////////
enum class TpuType {
WMMA,
};
struct IntrTpuArgs {
uint32_t fmt : 8;
uint32_t step : 8;
};
inline std::ostream &operator<<(std::ostream &os, const TpuType& type) {
switch (type) {
case TpuType::WMMA: os << "WMMA"; break;
default:
assert(false);
}
return os;
}
///////////////////////////////////////////////////////////////////////////////
using OpType = std::variant<
AluType
, BrType
, MdvType
, LsuType
, AmoType
, FpuType
, CsrType
, WctlType
#ifdef EXT_V_ENABLE
, VsetType
, VlsType
, VopType
#endif
#ifdef EXT_TPU_ENABLE
, TpuType
#endif
>;
using IntrArgs = std::variant<
IntrAluArgs
, IntrBrArgs
, IntrMdvArgs
, IntrLsuArgs
, IntrAmoArgs
, IntrFpuArgs
, IntrCsrArgs
, IntrWctlArgs
#ifdef EXT_V_ENABLE
, IntrVsetArgs
, IntrVlsArgs
, IntrVopArgs
#endif
#ifdef EXT_TPU_ENABLE
, IntrTpuArgs
#endif
>;
///////////////////////////////////////////////////////////////////////////////
enum class AddrType {
Global,
Shared,
@ -231,112 +647,6 @@ struct mem_addr_size_t {
///////////////////////////////////////////////////////////////////////////////
enum class FpuType {
FNCP,
FMA,
FDIV,
FSQRT,
FCVT
};
inline std::ostream &operator<<(std::ostream &os, const FpuType& type) {
switch (type) {
case FpuType::FNCP: os << "FNCP"; break;
case FpuType::FMA: os << "FMA"; break;
case FpuType::FDIV: os << "FDIV"; break;
case FpuType::FSQRT: os << "FSQRT"; break;
case FpuType::FCVT: os << "FCVT"; break;
default: assert(false);
}
return os;
}
///////////////////////////////////////////////////////////////////////////////
enum class SfuType {
TMC,
WSPAWN,
SPLIT,
JOIN,
BAR,
PRED,
CSRRW,
CSRRS,
CSRRC
};
inline std::ostream &operator<<(std::ostream &os, const SfuType& type) {
switch (type) {
case SfuType::TMC: os << "TMC"; break;
case SfuType::WSPAWN: os << "WSPAWN"; break;
case SfuType::SPLIT: os << "SPLIT"; break;
case SfuType::JOIN: os << "JOIN"; break;
case SfuType::BAR: os << "BAR"; break;
case SfuType::PRED: os << "PRED"; break;
case SfuType::CSRRW: os << "CSRRW"; break;
case SfuType::CSRRS: os << "CSRRS"; break;
case SfuType::CSRRC: os << "CSRRC"; break;
default: assert(false);
}
return os;
}
///////////////////////////////////////////////////////////////////////////////
enum class TpuType {
WMMA = 0,
};
inline std::ostream &operator<<(std::ostream &os, const TpuType& type) {
switch (type) {
case TpuType::WMMA: os << "WMMA"; break;
default: assert(false);
}
return os;
}
///////////////////////////////////////////////////////////////////////////////
enum class VpuType {
VSET = 0,
ARITH = 1,
IMUL = 2,
IDIV = 3,
FMA = 4,
FDIV = 5,
FSQRT = 6,
FCVT = 7,
FNCP = 8,
// reduction
ARITH_R = 9,
FMA_R = 10,
FNCP_R = 11
};
inline std::ostream &operator<<(std::ostream &os, const VpuType& type) {
switch (type) {
case VpuType::VSET: os << "VSET"; break;
case VpuType::ARITH: os << "ARITH"; break;
case VpuType::IMUL: os << "IMUL"; break;
case VpuType::IDIV: os << "IDIV"; break;
case VpuType::FMA: os << "FMA"; break;
case VpuType::FDIV: os << "FDIV"; break;
case VpuType::FSQRT: os << "FSQRT"; break;
case VpuType::FCVT: os << "FCVT"; break;
case VpuType::FNCP: os << "FNCP"; break;
case VpuType::ARITH_R:os << "ARITH_R"; break;
case VpuType::FMA_R: os << "FMA_R"; break;
case VpuType::FNCP_R: os << "FNCP_R"; break;
default: assert(false);
}
return os;
}
///////////////////////////////////////////////////////////////////////////////
enum class ArbiterType {
Priority,
RoundRobin,

File diff suppressed because it is too large Load diff

View file

@ -22,15 +22,11 @@ public:
struct ExeTraceData : public ITraceData {
using Ptr = std::shared_ptr<ExeTraceData>;
VpuOpType vpu_op;
uint32_t vl = 0;
uint32_t vlmul = 0;
};
struct ExeRet {
VpuType vpu_type;
bool rd_write;
};
struct PerfStats {
uint64_t reads;
uint64_t writes;
@ -71,7 +67,9 @@ public:
void store(const Instr &instr, uint32_t wid, uint32_t tid, const std::vector<reg_data_t>& rs1_data, const std::vector<reg_data_t>& rs2_data, MemTraceData* trace_data);
ExeRet execute(const Instr &instr, uint32_t wid, uint32_t tid, const std::vector<reg_data_t>& rs1_data, const std::vector<reg_data_t>& rs2_data, std::vector<reg_data_t>& rd_data, ExeTraceData* trace_data);
void configure(const Instr &instr, uint32_t wid, uint32_t tid, const std::vector<reg_data_t>& rs1_data, const std::vector<reg_data_t>& rs2_data, std::vector<reg_data_t>& rd_data, ExeTraceData* trace_data);
void execute(const Instr &instr, uint32_t wid, uint32_t tid, const std::vector<reg_data_t>& rs1_data, std::vector<reg_data_t>& rd_data, ExeTraceData* trace_data);
const PerfStats& perf_stats() const;

View file

@ -59,14 +59,14 @@ void VOpcUnit::tick() {
if (trace->fu_type == FUType::VPU) {
auto trace_data = std::dynamic_pointer_cast<VecUnit::ExeTraceData>(trace->data);
active_PC_ = trace->PC;
if (trace->vpu_type != VpuType::VSET) {
if (trace_data->vpu_op == VpuOpType::VSET) {
vl_counter_ = trace_data->vl;
vlmul_counter_ = trace_data->vlmul;
} else {
vl_counter_ = 1;
vlmul_counter_ = 1;
}
is_reduction_ = (trace->vpu_type >= VpuType::ARITH_R);
is_reduction_ = (trace_data->vpu_op >= VpuOpType::ARITH_R);
if (is_reduction_) {
red_counter_ = (vlmul_counter_ * vl_counter_) - 1;
wb_counter_ = (red_counter_ > 1) ? (red_counter_ - 1) : 0;
@ -83,7 +83,7 @@ void VOpcUnit::tick() {
if (vl_counter_ == 0) {
// Convert to Nop
trace->fu_type = FUType::ALU;
trace->alu_type = AluType::ARITH;
trace->op_type = AluType::ADD;
this->Output.push(trace);
Input.pop();
return;
@ -309,51 +309,53 @@ void VOpcUnit::decode(instr_trace_t* trace) {
case FUType::LSU:
// no conversion
break;
case FUType::VPU:
case FUType::VPU: {
// decode VPU instructions
switch (trace->vpu_type) {
case VpuType::VSET:
auto trace_data = std::dynamic_pointer_cast<VecUnit::ExeTraceData>(trace->data);
auto vpu_op = trace_data->vpu_op;
switch (vpu_op) {
case VpuOpType::VSET:
// no convertion
break;
case VpuType::ARITH:
case VpuType::ARITH_R:
case VpuOpType::ARITH:
case VpuOpType::ARITH_R:
trace->fu_type = FUType::ALU;
trace->alu_type = AluType::ARITH;
trace->op_type = AluType::ADD;
break;
case VpuType::IMUL:
case VpuOpType::IMUL:
trace->fu_type = FUType::ALU;
trace->alu_type = AluType::IMUL;
trace->op_type = MdvType::MUL;
break;
case VpuType::IDIV:
case VpuOpType::IDIV:
trace->fu_type = FUType::ALU;
trace->alu_type = AluType::IDIV;
trace->op_type = MdvType::DIV;
break;
case VpuType::FMA:
case VpuType::FMA_R:
case VpuOpType::FMA:
case VpuOpType::FMA_R:
trace->fu_type = FUType::FPU;
trace->fpu_type = FpuType::FMA;
trace->op_type = FpuType::FADD;
break;
case VpuType::FDIV:
case VpuOpType::FDIV:
trace->fu_type = FUType::FPU;
trace->fpu_type = FpuType::FDIV;
trace->op_type = FpuType::FDIV;
break;
case VpuType::FSQRT:
case VpuOpType::FSQRT:
trace->fu_type = FUType::FPU;
trace->fpu_type = FpuType::FSQRT;
trace->op_type = FpuType::FSQRT;
break;
case VpuType::FCVT:
case VpuOpType::FCVT:
trace->fu_type = FUType::FPU;
trace->fpu_type = FpuType::FCVT;
trace->op_type = FpuType::F2I;
break;
case VpuType::FNCP:
case VpuType::FNCP_R:
case VpuOpType::FNCP:
case VpuOpType::FNCP_R:
trace->fu_type = FUType::FPU;
trace->fpu_type = FpuType::FNCP;
trace->op_type = FpuType::FCMP;
break;
default:
assert(false);
}
break;
} break;
default:
assert(false);
}

View file

@ -91,8 +91,9 @@ void Operands::tick() {
if (Input.empty())
return;
auto trace = this->Input.front();
if (trace->fu_type == FUType::VPU
|| (trace->fu_type == FUType::LSU && (trace->lsu_type == LsuType::VLOAD || trace->lsu_type == LsuType::VSTORE))) {
if (std::get_if<VsetType>(&trace->op_type)
|| std::get_if<VlsType>(&trace->op_type)
|| std::get_if<VopType>(&trace->op_type)) {
for (uint32_t i = 0; i < NUM_VOPCS; i++) {
// skip if busy
if (vopc_units_.at(i)->Input.full())

View file

@ -99,7 +99,7 @@ PROJECT := libxrtsim.so
all: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/vortex.xml:
$(DESTDIR)/vortex.xml: $(CONFIG_FILE)
verilator --xml-only -O0 $(VL_FLAGS) $(TOP) --xml-output $@
$(DESTDIR)/scope.json: $(DESTDIR)/vortex.xml