From 1b6d9bd3a5ac7f2910c1acbb801e02fc4bf0f0e2 Mon Sep 17 00:00:00 2001 From: Nicholas Ade <90573287+Nadir-Lafayette@users.noreply.github.com> Date: Wed, 12 Apr 2023 15:01:09 -0400 Subject: [PATCH 001/488] Making the bfloat files --- sim/common/bfloat.cpp | 0 sim/common/bfloat.hpp | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 sim/common/bfloat.cpp create mode 100644 sim/common/bfloat.hpp diff --git a/sim/common/bfloat.cpp b/sim/common/bfloat.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/sim/common/bfloat.hpp b/sim/common/bfloat.hpp new file mode 100644 index 000000000..e69de29bb From afa9e4003c6800d3c9a842d40ca10bcf2fd3d8ad Mon Sep 17 00:00:00 2001 From: Nicholas Ade <90573287+Nadir-Lafayette@users.noreply.github.com> Date: Thu, 13 Apr 2023 04:20:23 -0400 Subject: [PATCH 002/488] adding mul and divide to bfloat --- sim/common/bfloat.cpp | 221 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 221 insertions(+) diff --git a/sim/common/bfloat.cpp b/sim/common/bfloat.cpp index e69de29bb..e44f81b8b 100644 --- a/sim/common/bfloat.cpp +++ b/sim/common/bfloat.cpp @@ -0,0 +1,221 @@ +#include +#include + +#include +#include + +// get float "in-memory" to exploit iee754 binary representation of floating point values +// use a u to trick compiler into letting you access float's bits directly +// bitwise operations cannot be done directly on iee754 representations per compiler settings +// ordering of the fields is important here +class MyFloat +{ +private: + void printBinary(int n, int i) + { + // Prints the binary representation + // of a number n up to i-bits. + int k; + for (k = i - 1; k >= 0; k--) + { + + if ((n >> k) & 1) + std::cout << "1"; + else + std::cout << "0"; + } + } + +public: + union BFloat_t + { + float f; + int i; + struct + { + uint32_t dead : 16; // don't use these, just place-holders + uint32_t mantissa : 7; // Mantissa (fractional part) of the number + uint32_t exponent : 8; // Exponent (power of 2) of the number + uint32_t sign : 1; + } parts; + }; + + void printBFloat(BFloat_t b) + { + std::cout << b.parts.sign << " | "; + printBinary(b.parts.exponent, 8); + std::cout << " | "; + printBinary(b.parts.mantissa, 7); + std::cout << std::endl; + } + + BFloat_t in_mem; + + MyFloat(float x) + { + in_mem.f = x; + printBFloat(in_mem); + } + + MyFloat(uint8_t mantissa, uint8_t exponent, bool sign) + { + in_mem.parts.mantissa = mantissa & 0x7F; + in_mem.parts.exponent = exponent; + in_mem.parts.sign = (int)sign; + + std::cout << "inside constructor" << std::endl; + std::cout << "bfloat:" << in_mem.f << std::endl; + printBFloat(in_mem); + } + + friend MyFloat operator+(const MyFloat &a, const MyFloat &b) + { + // get fields + bool a_sign = (bool)a.in_mem.parts.sign; + uint8_t a_exp = a.in_mem.parts.exponent - 127; + uint8_t a_mantissa = a.in_mem.parts.mantissa | 0x80; // add in the implicit bit + + bool b_sign = (bool)b.in_mem.parts.sign; + uint8_t b_exp = b.in_mem.parts.exponent - 127; + uint8_t b_mantissa = b.in_mem.parts.mantissa | 0x80; // add in the implicit bit + + // align mantissas by shifting the smaller exponent to the larger exponent + if (a_exp < b_exp) + { + a_mantissa >>= (b_exp - a_exp); + a_exp = b_exp; + } + else + { + b_mantissa >>= (a_exp - b_exp); + b_exp = a_exp; + } + + // add mantissas and adjust exponent if necessary + int sum_mantissa = a_mantissa + b_mantissa; + if (sum_mantissa & 0x100) + { // this val check might be wrong + sum_mantissa >>= 1; + a_exp++; + } + + // build binary representation of result + return MyFloat(sum_mantissa, a_exp, a_sign); + } + + friend MyFloat operator*(const MyFloat &a, const MyFloat &b) + { + uint16_t a_exp = a.in_mem.parts.exponent; + uint16_t b_exp = b.in_mem.parts.exponent; + uint16_t a_mantissa = a.in_mem.parts.mantissa | 0x0080; // Add implicit bit + uint16_t b_mantissa = b.in_mem.parts.mantissa | 0x0080; // Add implicit bi + + std::bitset<8> bits(a_exp); + std::cout << "Binary a exp: " << bits << std::endl; + + bool product_sign = a.in_mem.parts.sign ^ b.in_mem.parts.sign; + + if (a_exp == 0xFF || b_exp == 0xff) + { + return MyFloat(0, 0xFF, product_sign); + } + // Multiply mantissas + uint32_t product_mantissa = static_cast(a_mantissa) * static_cast(b_mantissa); + + // Add exponents + int product_exp = a_exp + b_exp - 127; + + product_mantissa = (product_mantissa + 0x40) >> 7; + + // Round to nearest even (round half to even) + if ((product_mantissa & 0x7F) == 0x40 && (product_mantissa & 0x1) != 0) + { + product_mantissa++; + } + if (product_mantissa & 0x0100) + { // Check if the implicit bit shifted to the left + product_mantissa >>= 1; + product_exp++; + } + else + { + product_mantissa &= 0x7F; // Remove the implicit bit + } + return MyFloat(product_mantissa, product_exp, product_sign); + } + + friend MyFloat operator/(const MyFloat &a, const MyFloat &b) + { + uint16_t a_exp = a.in_mem.parts.exponent; + uint16_t b_exp = b.in_mem.parts.exponent; + std::bitset<8> bits(b_exp); + std::cout << "Binary b exp: " << bits << std::endl; + uint16_t a_mantissa = a.in_mem.parts.mantissa | 0x0080; // Add implicit bit + uint16_t b_mantissa = b.in_mem.parts.mantissa | 0x0080; // Add implicit bit + + bool quotient_sign = a.in_mem.parts.sign ^ b.in_mem.parts.sign; + + // Check if divisor is zero + if (b_exp == 0 && b_mantissa == 0) + { + std::cout << "HERE" << std::endl; + return MyFloat(0, 0xFF, quotient_sign); // Return infinity with the appropriate sign + } + + // Check for infinity or zero in dividend + if (a_exp == 0xFF || a_exp == 0) + { + return MyFloat(0, a_exp, quotient_sign); + } + + // Subtract exponents + int quotient_exp = a_exp - b_exp + 127; + + // Divide mantissas + uint32_t quotient_mantissa = (static_cast(a_mantissa) << 8) / static_cast(b_mantissa); + + quotient_mantissa = (quotient_mantissa + 0x40) >> 8; + + // Round to nearest even (round half to even) + if ((quotient_mantissa & 0x1) != 0 && (quotient_mantissa & 0x7F) == 0x40) + { + quotient_mantissa--; + } + else if ((quotient_mantissa & 0x7F) == 0x40) + { + quotient_mantissa++; + } + + if (quotient_mantissa & 0x0100) + { // Check if the implicit bit shifted to the left + quotient_mantissa >>= 1; + quotient_exp++; + } + else + { + quotient_mantissa &= 0x7F; // Remove the implicit bit + } + return MyFloat(quotient_mantissa, quotient_exp, quotient_sign); + } +}; + +int main() +{ + float a = 8; + float b = 0; + std::cout << a << std::endl; + + std::bitset bits(*reinterpret_cast(&a)); + std::cout << "Binary representation of " << a << " is \n" + << bits << std::endl; + std::cout << "Binary representation of " << b << " is \n" + << bits << std::endl; + + MyFloat bfloat_version_of_a(a); + MyFloat bfloat_version_of_b(b); + MyFloat c = bfloat_version_of_a / bfloat_version_of_b; + + // You can now print the result stored in c or perform other operations with it. + + return 0; +} From b08c7403f6471f7587a027b85287cb40cafc0ff5 Mon Sep 17 00:00:00 2001 From: dhy2000 <46858361+dhy2000@users.noreply.github.com> Date: Fri, 26 Jan 2024 19:56:50 +0800 Subject: [PATCH 003/488] fix #100: change return type to float --- tests/regression/vecaddx/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/regression/vecaddx/main.cpp b/tests/regression/vecaddx/main.cpp index 117f34709..15d58e013 100644 --- a/tests/regression/vecaddx/main.cpp +++ b/tests/regression/vecaddx/main.cpp @@ -50,7 +50,7 @@ public: static const char* type_str() { return "float"; } - static int generate() { + static float generate() { return static_cast(rand()) / RAND_MAX; } static bool compare(float a, float b, int index, int errors) { From 99c6a1af5a58cbd915f2a0ed9fe279d77d99498a Mon Sep 17 00:00:00 2001 From: Varsha Singhania Date: Mon, 17 Jun 2024 04:28:51 -0400 Subject: [PATCH 004/488] Tensor cores in Vortex --- ci/blackbox.sh | 12 +- hw/rtl/VX_config.vh | 18 ++ hw/rtl/VX_types.vh | 3 + kernel/include/vx_intrinsics.h | 20 ++ runtime/include/vortex.h | 2 + runtime/simx/vortex.cpp | 8 +- sim/simx/arch.h | 15 +- sim/simx/core.cpp | 4 +- sim/simx/core.h | 1 + sim/simx/decode.cpp | 19 ++ sim/simx/emulator.cpp | 18 +- sim/simx/emulator.h | 4 + sim/simx/execute.cpp | 179 +++++++++++++++ sim/simx/func_unit.cpp | 89 ++++++-- sim/simx/func_unit.h | 9 + sim/simx/instr.h | 2 +- sim/simx/instr_trace.h | 1 + sim/simx/main.cpp | 4 +- sim/simx/types.h | 23 +- tests/regression/matmul/Makefile | 14 ++ tests/regression/matmul/common.h | 17 ++ tests/regression/matmul/kernel.cpp | 124 +++++++++++ tests/regression/matmul/main.cpp | 343 +++++++++++++++++++++++++++++ 23 files changed, 898 insertions(+), 31 deletions(-) create mode 100644 tests/regression/matmul/Makefile create mode 100644 tests/regression/matmul/common.h create mode 100644 tests/regression/matmul/kernel.cpp create mode 100644 tests/regression/matmul/main.cpp diff --git a/ci/blackbox.sh b/ci/blackbox.sh index fe94677aa..8a04133f9 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -48,6 +48,8 @@ PERF_CLASS=0 REBUILD=2 TEMPBUILD=0 LOGFILE=run.log +TC_SIZE=567 +TC_NUM=123 for i in "$@" do @@ -112,6 +114,14 @@ case $i in LOGFILE=${i#*=} shift ;; + --tc_size=*) + TC_SIZE=${i#*=} + shift + ;; + --tc_num=*) + TC_NUM=${i#*=} + shift + ;; --help) show_help exit 0 @@ -180,7 +190,7 @@ then fi CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS $L2 $L3 $PERF_FLAG $CONFIGS" - +CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DTC_NUM=$TC_NUM -DTC_SIZE=$TC_SIZE $L2 $L3 $PERF_FLAG $CONFIGS" echo "CONFIGS=$CONFIGS" if [ $REBUILD -ne 0 ] diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 7fc8d1464..651234768 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -111,6 +111,24 @@ `endif `define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE) +`ifndef TC_SIZE +`define TC_SIZE 4 +`endif + +`ifndef TC_NUM +`define TC_NUM 1 +`endif + +// Number of TCU units +`ifndef NUM_TCU_LANES +`define NUM_TCU_LANES `TC_NUM +`endif + +// Number of TCU units +`ifndef NUM_TCU_BLOCKS +`define NUM_TCU_BLOCKS `ISSUE_WIDTH +`endif + `ifdef L2_ENABLE `define L2_ENABLED 1 `else diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index e744a26f9..06929b058 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -196,4 +196,7 @@ `define VX_CSR_NUM_CORES 12'hFC2 `define VX_CSR_LOCAL_MEM_BASE 12'hFC3 +`define VX_MAT_MUL_SIZE 12'hFC4 + + `endif // VX_TYPES_VH diff --git a/kernel/include/vx_intrinsics.h b/kernel/include/vx_intrinsics.h index 6000065e9..b67a770da 100644 --- a/kernel/include/vx_intrinsics.h +++ b/kernel/include/vx_intrinsics.h @@ -221,6 +221,26 @@ inline void vx_fence() { __asm__ volatile ("fence iorw, iorw"); } +//Matrix load +//Converted instruction type cause destination registers were not getiing blocked otherwise +inline void mload(unsigned dest, unsigned addr) +{ + asm volatile (".insn i 0x7b, 0, x0, %0(%1)" :: "i"(dest), "r"(addr)); +} + +//mat store +inline void ms(unsigned addr) +{ + asm volatile (".insn i 0x7b, 1, x0, 0(%0)" :: "r"(addr)); +} + +//mat mul +//num tiles along reduced K dimension of matmul as imm value (can use rd,rs field to expand range of n_tiles from 12 bits) +inline void mm() +{ + asm volatile (".insn i 0x7b, 2, x0, 0(x0)"); +} + #ifdef __cplusplus } #endif diff --git a/runtime/include/vortex.h b/runtime/include/vortex.h index c9dd6ec36..f1a412b81 100644 --- a/runtime/include/vortex.h +++ b/runtime/include/vortex.h @@ -34,6 +34,8 @@ typedef void* vx_buffer_h; #define VX_CAPS_GLOBAL_MEM_SIZE 0x5 #define VX_CAPS_LOCAL_MEM_SIZE 0x6 #define VX_CAPS_ISA_FLAGS 0x7 +#define VX_CAPS_TC_SIZE 0x8 +#define VX_CAPS_TC_NUM 0x9 // device isa flags #define VX_ISA_STD_A (1ull << 0) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 89856f3a0..f65d7b385 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -32,7 +32,7 @@ using namespace vortex; class vx_device { public: vx_device() - : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES) + : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES, TC_SIZE, TC_NUM) , ram_(0, RAM_PAGE_SIZE) , processor_(arch_) , global_mem_(ALLOC_BASE_ADDR, @@ -69,6 +69,12 @@ public: case VX_CAPS_NUM_CORES: _value = NUM_CORES * NUM_CLUSTERS; break; + case VX_CAPS_TC_SIZE: + _value = TC_SIZE; + break; + case VX_CAPS_TC_NUM: + _value = TC_NUM; + break; case VX_CAPS_CACHE_LINE_SIZE: _value = CACHE_BLOCK_SIZE; break; diff --git a/sim/simx/arch.h b/sim/simx/arch.h index 2507bf28f..e35687dbd 100644 --- a/sim/simx/arch.h +++ b/sim/simx/arch.h @@ -35,9 +35,11 @@ private: uint16_t num_barriers_; uint16_t ipdom_size_; uint64_t local_mem_base_; + uint16_t tc_size_; + uint16_t tc_num_; public: - Arch(uint16_t num_threads, uint16_t num_warps, uint16_t num_cores) + Arch(uint16_t num_threads, uint16_t num_warps, uint16_t num_cores, uint64_t tc_size, uint64_t tc_num) : num_threads_(num_threads) , num_warps_(num_warps) , num_cores_(num_cores) @@ -49,6 +51,8 @@ public: , num_barriers_(NUM_BARRIERS) , ipdom_size_((num_threads-1) * 2) , local_mem_base_(LMEM_BASE_ADDR) + , tc_size_ (tc_size) + , tc_num_ (tc_num) {} uint16_t vsize() const { @@ -94,6 +98,15 @@ public: uint16_t socket_size() const { return socket_size_; } + + uint16_t tc_size() const { + return tc_size_; + } + + uint16_t tc_num() const { + return tc_num_; + } + }; } \ No newline at end of file diff --git a/sim/simx/core.cpp b/sim/simx/core.cpp index 0bd72524d..7020cf8ff 100644 --- a/sim/simx/core.cpp +++ b/sim/simx/core.cpp @@ -105,12 +105,14 @@ Core::Core(const SimContext& ctx, dispatchers_.at((int)FUType::FPU) = SimPlatform::instance().create_object(arch, 2, NUM_FPU_BLOCKS, NUM_FPU_LANES); dispatchers_.at((int)FUType::LSU) = SimPlatform::instance().create_object(arch, 2, NUM_LSU_BLOCKS, NUM_LSU_LANES); dispatchers_.at((int)FUType::SFU) = SimPlatform::instance().create_object(arch, 2, NUM_SFU_BLOCKS, NUM_SFU_LANES); - + dispatchers_.at((int)FUType::TCU) = SimPlatform::instance().create_object(arch, 2, NUM_TCU_BLOCKS, NUM_TCU_LANES); + // initialize execute units func_units_.at((int)FUType::ALU) = SimPlatform::instance().create_object(this); func_units_.at((int)FUType::FPU) = SimPlatform::instance().create_object(this); func_units_.at((int)FUType::LSU) = SimPlatform::instance().create_object(this); func_units_.at((int)FUType::SFU) = SimPlatform::instance().create_object(this); + func_units_.at((int)FUType::TCU) = SimPlatform::instance().create_object(this); // bind commit arbiters for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) { diff --git a/sim/simx/core.h b/sim/simx/core.h index cc0e46c8c..0b82de84a 100644 --- a/sim/simx/core.h +++ b/sim/simx/core.h @@ -170,6 +170,7 @@ private: friend class AluUnit; friend class FpuUnit; friend class SfuUnit; + friend class TcuUnit; }; } // namespace vortex diff --git a/sim/simx/decode.cpp b/sim/simx/decode.cpp index f934524c3..4d8d0a105 100644 --- a/sim/simx/decode.cpp +++ b/sim/simx/decode.cpp @@ -51,6 +51,7 @@ static const std::unordered_map sc_instTable = { {Opcode::EXT2, InstType::R4}, {Opcode::R_W, InstType::R}, {Opcode::I_W, InstType::I}, + {Opcode::TCU, InstType::I}, }; enum Constants { @@ -405,6 +406,16 @@ static const char* op_string(const Instr &instr) { default: std::abort(); } + + case Opcode::TCU: + switch(func3) + { + case 0: return "ML"; // + case 1: return "MS"; // + case 2: return "MATMUL"; + default: + std::abort(); + } default: std::abort(); } @@ -543,6 +554,14 @@ std::shared_ptr Emulator::decode(uint32_t code) const { case InstType::I: { switch (op) { + case Opcode::TCU: { + instr->setDestReg(rs1, RegType::Integer); + instr->addSrcReg(rs1, RegType::Integer); + instr->setFunc3(func3); + instr->setFunc7(func7); + auto imm = code >> shift_rs2; + instr->setImm(sext(imm, width_i_imm)); + } break; case Opcode::I: case Opcode::I_W: case Opcode::JALR: diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 5850bfd56..ea5f72c42 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -74,6 +74,7 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core) , core_(core) , warps_(arch.num_warps(), arch) , barriers_(arch.num_barriers(), 0) + , scratchpad(std::vector(core->arch().tc_size() * core->arch().tc_size() * 32768)) //Fix this { this->clear(); } @@ -110,6 +111,11 @@ void Emulator::clear() { active_warps_.set(0); warps_[0].tmask.set(0); wspawn_.valid = false; + + for (auto& reg : scratchpad) + { + reg = 0; + } } void Emulator::attach_ram(RAM* ram) { @@ -344,6 +350,11 @@ void Emulator::cout_flush() { case (addr + (VX_CSR_MPM_BASE_H-VX_CSR_MPM_BASE)) : return ((value >> 32) & 0xFFFFFFFF) #endif +Word Emulator::get_tiles() +{ + return mat_size; +} + Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { auto core_perf = core_->perf_stats(); switch (addr) { @@ -375,6 +386,8 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { case VX_CSR_NUM_CORES: return uint32_t(arch_.num_cores()) * arch_.num_clusters(); case VX_CSR_LOCAL_MEM_BASE: return arch_.local_mem_base(); case VX_CSR_MSCRATCH: return csr_mscratch_; + case VX_MAT_MUL_SIZE: return mat_size; + CSR_READ_64(VX_CSR_MCYCLE, core_perf.cycles); CSR_READ_64(VX_CSR_MINSTRET, core_perf.instrs); default: @@ -484,6 +497,9 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) { case VX_CSR_MNSTATUS: case VX_CSR_MCAUSE: break; + case VX_MAT_MUL_SIZE: + mat_size = value; + break; default: { std::cout << std::hex << "Error: invalid CSR write addr=0x" << addr << ", value=0x" << value << std::endl; std::abort(); @@ -500,4 +516,4 @@ void Emulator::update_fcrs(uint32_t fflags, uint32_t tid, uint32_t wid) { this->set_csr(VX_CSR_FCSR, this->get_csr(VX_CSR_FCSR, tid, wid) | fflags, tid, wid); this->set_csr(VX_CSR_FFLAGS, this->get_csr(VX_CSR_FFLAGS, tid, wid) | fflags, tid, wid); } -} \ No newline at end of file +} diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index 81dcecd83..82b5bc98b 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -53,6 +53,8 @@ public: bool wspawn(uint32_t num_warps, Word nextPC); int get_exitcode() const; + + Word get_tiles(); private: @@ -121,6 +123,8 @@ private: MemoryUnit mmu_; Word csr_mscratch_; wspawn_t wspawn_; + std::vector scratchpad; + uint32_t mat_size; }; } diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index e0fc2b94a..d522145db 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -25,6 +25,7 @@ #include "emulator.h" #include "instr.h" #include "core.h" +#include "VX_types.h" using namespace vortex; @@ -1414,6 +1415,184 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { std::abort(); } } break; + case Opcode::TCU: + { //TODO - make it data-type flexible + uint32_t mem_bytes = 1; + DP(3, "mem_bytes=" << mem_bytes << std::endl); + uint16_t tc_size = core_->arch().tc_size(); + uint32_t TC_per_warp = core_->arch().tc_num(); + + //Number of loads - dependant on the thread config + uint32_t n_tiles = this->get_csr(VX_MAT_MUL_SIZE, 0, wid); //CSR instruction before MLOAD will ensure that this csr has value + int num_data_per_thread; + int num_data_per_thread_st; + int num_threads_actv; + int num_threads_actv_st; + uint32_t data_bytes_load; + uint32_t data_bytes_store; + uint32_t num_threads_per_tc = MAX (1, num_threads/TC_per_warp); + + //int num_warps = MIN() + //int active_tcs = MIN (TC_per_warp, num_output_tiles/num_warps) + //LOAD + if(num_threads > tc_size*tc_size*n_tiles*TC_per_warp) + { + num_threads_actv = tc_size*tc_size*n_tiles*TC_per_warp; + num_data_per_thread = 1; + } + else + { + num_threads_actv = num_threads; + num_data_per_thread = (tc_size*tc_size*n_tiles)/num_threads_per_tc; + } + data_bytes_load = mem_bytes*num_data_per_thread; + + //STORE + + // DP(3, "DEBUG :: num_threads = " << num_threads); + // DP(3, "DEBUG :: tc_size*tc_size = " << tc_size*tc_size); + //DP(3, "imm = " << immsrc); + + if(num_threads > tc_size*tc_size*TC_per_warp) + { + num_threads_actv_st = tc_size*tc_size*TC_per_warp; + num_data_per_thread_st = 1; + } + else + { + num_threads_actv_st = num_threads; + num_data_per_thread_st = (tc_size*tc_size)/num_threads_per_tc; + } + data_bytes_store = mem_bytes*num_data_per_thread_st; + + DP(3, "Num Tiles=" << n_tiles << std::endl); + + switch (func3) { + case 0: + { //Matrix Load + + DP (4, "TCU LOAD"); + trace->fu_type = FUType::LSU; + trace->lsu_type = LsuType::TCU_LOAD; + + trace->used_iregs.set(rsrc0); + auto trace_data = std::make_shared(num_threads); + trace->data = trace_data; + + for (uint32_t t = thread_start; t < num_threads_actv; ++t) + { + if (!warp.tmask.test(t)) + continue; + DP(3, "Thread ID" << t); + + uint32_t base_addr = rsdata[t][0].i ; + trace_data->mem_addrs.at(t) = {base_addr, data_bytes_load}; + + //Load A or B (depends on immsrc) + int loop_offset = 0; + DP(3, "n_tiles = " << n_tiles << "; num_data_per_thread = " << num_data_per_thread <dcache_read(temp_ref, (base_addr+(n*mem_bytes)+(loop_offset*mem_bytes)), mem_bytes); + + scratchpad[loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n] = *temp_ref; + DP(3, "Scratchpad Index: " << loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n << ", Value: " << scratchpad[loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n]); + } + //loop_offset += tc_size*tc_size; + //} + } + rd_write = true; + } break; + case 1: + { + DP(4, "TCU STORE"); + trace->fu_type = FUType::LSU; + trace->lsu_type = LsuType::TCU_STORE; + + auto trace_data = std::make_shared(num_threads); + trace->data = trace_data; + uint32_t accu_offset = (n_tiles)*(n_tiles)*(n_tiles)*tc_size*tc_size*2; + + for (uint32_t t = thread_start; t < num_threads_actv_st; ++t) + { + if (!warp.tmask.test(t)) + continue; + + DP(3, "Thread ID" << t); + uint32_t base_addr = rsdata[t][0].i ; + + trace_data->mem_addrs.at(t) = {base_addr, data_bytes_store}; + + //Store C + for (int n=0; n csr (TODO :: can intermediate step of moving to CSR be skipped?) + //core_->set_csr(csr_addr[(2*num_data_per_thread) + n], scratchpad[(n_tiles*tc_size*tc_size*2) + (t*num_data_per_thread) + n], t, warp_id_); + Word* temp_ref = &(warp.ireg_file.at(t).at(rsrc0)); + *temp_ref = scratchpad[(n_tiles*tc_size*tc_size*2) + (t*num_data_per_thread_st) + n]; + + this->dcache_write(temp_ref, base_addr+(n*mem_bytes), mem_bytes); + } + } + //Clear the scratchpad + for(int i =0 ; i < scratchpad.size(); i++) + { + scratchpad[i] = 0; + } + } + break; + case 2: + { //Matrix Multiply + DP(4, "TCU MULTIPLY MAT"); + trace->fu_type = FUType::TCU; + trace->tcu_type = TCUType::TCU_MUL; + uint32_t accu_offset = (n_tiles)*(n_tiles)*(n_tiles)*tc_size*tc_size*2; + uint32_t threads_per_tc = MAX (1, num_threads/TC_per_warp); + for (uint32_t t = thread_start; t < num_threads_actv; ++t) + { + if (!warp.tmask.test(t)) + continue; + + DP(3, "Thread ID" << t); + //TC operation [only 1 thread in 1 warp needs to do this] + if (t%threads_per_tc == 0) + { + //TODO - change to systolic array implementation + uint32_t thread_offset = t*(tc_size*tc_size); + int loop_offset = 0; + int offset_b = n_tiles*n_tiles*n_tiles*tc_size*tc_size; + // Loop over all tiles - output stationary + //for(int tiles = 0 ; tiles < n_tiles ; tiles++) //What's the HW implication of this?? A counter implementation? + //{ + /* + for (int i = 0; i < tc_size; i++) { //ROW-1 + for (int j = 0; j < tc_size; j++) { //COL-2 + int sum = 0; + for (int k = 0; k < tc_size; k++) + { //COL-1 + sum = sum + scratchpad[loop_offset + thread_offset*n_tiles + i * tc_size + k] *scratchpad[loop_offset + thread_offset*n_tiles + offset_b + (k * tc_size + j)]; + } + scratchpad[accu_offset + thread_offset +(i * tc_size + j)] += sum; //[i * col2 + j] = sum + DP(3, "Scratchpad Index: " << accu_offset + (i * tc_size + j) << " , Value=" << scratchpad[accu_offset + (i * tc_size + j)]); + + } + } + */ + //loop_offset += tc_size*tc_size; //Move to the next tiled matmul fragment + //} + } + } + + }break; + default: + std::abort(); + } + } break; default: std::abort(); } diff --git a/sim/simx/func_unit.cpp b/sim/simx/func_unit.cpp index c9a3f0fc7..3991a17e7 100644 --- a/sim/simx/func_unit.cpp +++ b/sim/simx/func_unit.cpp @@ -21,6 +21,7 @@ #include "core.h" #include "constants.h" #include "cache_sim.h" +#include "VX_types.h" using namespace vortex; @@ -162,7 +163,7 @@ void LsuUnit::tick() { continue; } - bool is_write = (trace->lsu_type == LsuType::STORE); + bool is_write = ((trace->lsu_type == LsuType::STORE) || (trace->lsu_type == LsuType::TCU_STORE)); // check pending queue capacity if (!is_write && state.pending_rd_reqs.full()) { @@ -175,13 +176,14 @@ void LsuUnit::tick() { } uint32_t tag = 0; + if (!is_write) { tag = state.pending_rd_reqs.allocate({trace, 0}); } // send memory request auto num_reqs = this->send_requests(trace, block_idx, tag); - + if (!is_write) { state.pending_rd_reqs.at(tag).count = num_reqs; } @@ -200,7 +202,14 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) { int count = 0; auto trace_data = std::dynamic_pointer_cast(trace->data); - bool is_write = (trace->lsu_type == LsuType::STORE); + bool is_write = ((trace->lsu_type == LsuType::STORE) || (trace->lsu_type == LsuType::TCU_STORE)); + + uint16_t req_per_thread = 1; + if ((trace->lsu_type == LsuType::TCU_LOAD) || (trace->lsu_type == LsuType::TCU_STORE)) + { + req_per_thread= (1>(trace_data->mem_addrs.at(0).size)/4)? 1: ((trace_data->mem_addrs.at(0).size)/4); + } + auto t0 = trace->pid * NUM_LSU_LANES; for (uint32_t i = 0; i < NUM_LSU_LANES; ++i) { @@ -213,33 +222,69 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) { auto mem_addr = trace_data->mem_addrs.at(t); auto type = get_addr_type(mem_addr.addr); + // DT(3, "addr_type = " << type << ", " << *trace); + uint32_t mem_bytes = 1; + for (int i = 0; i < req_per_thread; i++) + { + MemReq mem_req; + mem_req.addr = mem_addr.addr + (i*mem_bytes); + mem_req.write = is_write; + mem_req.type = type; + mem_req.tag = tag; + mem_req.cid = trace->cid; + mem_req.uuid = trace->uuid; + + dcache_req_port.push(mem_req, 1); + DT(3, "mem-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << tag + << ", lsu_type=" << trace->lsu_type << ", rid=" << req_idx << ", addr_type=" << mem_req.type << ", " << *trace); - MemReq mem_req; - mem_req.addr = mem_addr.addr; - mem_req.write = is_write; - mem_req.type = type; - mem_req.tag = tag; - mem_req.cid = trace->cid; - mem_req.uuid = trace->uuid; - - dcache_req_port.push(mem_req, 1); - DT(3, "mem-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << tag - << ", lsu_type=" << trace->lsu_type << ", rid=" << req_idx << ", addr_type=" << mem_req.type << ", " << *trace); - - if (is_write) { - ++core_->perf_stats_.stores; - } else { - ++core_->perf_stats_.loads; - ++pending_loads_; + if (is_write) { + ++core_->perf_stats_.stores; + } else { + ++core_->perf_stats_.loads; + ++pending_loads_; + } + + ++count; } - - ++count; } return count; } /////////////////////////////////////////////////////////////////////////////// +TcuUnit::TcuUnit(const SimContext& ctx, Core* core) + : FuncUnit(ctx, core, "TCU") + , tc_size (core_->arch().tc_size()) + {} + +void TcuUnit::tick() { + + for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) { + auto& input = Inputs.at(i); + if (input.empty()) + continue; + auto& output = Outputs.at(i); + auto trace = input.front(); + uint32_t n_tiles = core_->emulator_.get_tiles(); + switch (trace->tcu_type) { + case TCUType::TCU_MUL: + { //mat size = n_tiles * tc_size + int matmul_latency = (n_tiles * tc_size) + tc_size + tc_size; + output.push(trace, matmul_latency); + DT(3, "matmul_latency = " << matmul_latency << ", " << *trace); + break; + } + default: + std::abort(); + } + DT(3, "pipeline-execute: op=" << trace->tcu_type << ", " << *trace); + input.pop(); + } +} + +/////////////////////////////////////////////////////////////////////////////// + SfuUnit::SfuUnit(const SimContext& ctx, Core* core) : FuncUnit(ctx, core, "SFU") {} diff --git a/sim/simx/func_unit.h b/sim/simx/func_unit.h index 45f0152ff..5fc922991 100644 --- a/sim/simx/func_unit.h +++ b/sim/simx/func_unit.h @@ -100,6 +100,15 @@ private: /////////////////////////////////////////////////////////////////////////////// +class TcuUnit : public FuncUnit { +public: + TcuUnit(const SimContext& ctx, Core*); + uint64_t tc_size; + void tick(); +}; + +/////////////////////////////////////////////////////////////////////////////// + class SfuUnit : public FuncUnit { public: SfuUnit(const SimContext& ctx, Core*); diff --git a/sim/simx/instr.h b/sim/simx/instr.h index f97a19eac..061b4deb0 100644 --- a/sim/simx/instr.h +++ b/sim/simx/instr.h @@ -46,7 +46,7 @@ enum class Opcode { EXT1 = 0x0b, EXT2 = 0x2b, EXT3 = 0x5b, - EXT4 = 0x7b + TCU = 0x7b }; enum class InstType { diff --git a/sim/simx/instr_trace.h b/sim/simx/instr_trace.h index 532b736f5..9d6859fb7 100644 --- a/sim/simx/instr_trace.h +++ b/sim/simx/instr_trace.h @@ -75,6 +75,7 @@ public: AluType alu_type; FpuType fpu_type; SfuType sfu_type; + TCUType tcu_type; }; ITraceData::Ptr data; diff --git a/sim/simx/main.cpp b/sim/simx/main.cpp index 0f61de6f4..58eb96d61 100644 --- a/sim/simx/main.cpp +++ b/sim/simx/main.cpp @@ -35,6 +35,8 @@ static void show_usage() { uint32_t num_threads = NUM_THREADS; uint32_t num_warps = NUM_WARPS; uint32_t num_cores = NUM_CORES; +uint32_t tc_size = TC_SIZE; +uint32_t tc_num = TC_NUM; bool showStats = false; const char* program = nullptr; @@ -81,7 +83,7 @@ int main(int argc, char **argv) { { // create processor configuation - Arch arch(num_threads, num_warps, num_cores); + Arch arch(num_threads, num_warps, num_cores, tc_size, tc_num); // create memory module RAM ram(0, RAM_PAGE_SIZE); diff --git a/sim/simx/types.h b/sim/simx/types.h index a84216ae1..15623ce39 100644 --- a/sim/simx/types.h +++ b/sim/simx/types.h @@ -23,6 +23,7 @@ #include #include #include "debug.h" +#include namespace vortex { @@ -78,6 +79,7 @@ enum class FUType { LSU, FPU, SFU, + TCU, Count }; @@ -87,6 +89,7 @@ inline std::ostream &operator<<(std::ostream &os, const FUType& type) { case FUType::LSU: os << "LSU"; break; case FUType::FPU: os << "FPU"; break; case FUType::SFU: os << "SFU"; break; + case FUType::TCU: os << "TCU"; break; default: assert(false); } return os; @@ -118,14 +121,30 @@ inline std::ostream &operator<<(std::ostream &os, const AluType& type) { enum class LsuType { LOAD, + TCU_LOAD, STORE, + TCU_STORE, FENCE }; +enum class TCUType { + TCU_MUL +}; + +inline std::ostream &operator<<(std::ostream &os, const TCUType& type) { + switch (type) { + case TCUType::TCU_MUL: os << "TCU MUL"; break; + default: assert(false); + } + return os; +} + inline std::ostream &operator<<(std::ostream &os, const LsuType& type) { switch (type) { case LsuType::LOAD: os << "LOAD"; break; + case LsuType::TCU_LOAD: os << "TCU_LOAD"; break; case LsuType::STORE: os << "STORE"; break; + case LsuType::TCU_STORE: os << "TCU_STORE"; break; case LsuType::FENCE: os << "FENCE"; break; default: assert(false); } @@ -383,7 +402,7 @@ public: , type_(type) , delay_(delay) , cursors_(num_outputs, 0) - , num_reqs_(num_inputs / num_outputs) + , num_reqs_(log2ceil(num_inputs / num_outputs)) { assert(delay != 0); assert(num_inputs <= 32); @@ -407,7 +426,7 @@ public: void tick() { uint32_t I = Inputs.size(); uint32_t O = Outputs.size(); - uint32_t R = num_reqs_; + uint32_t R = 1 << num_reqs_; // skip bypass mode if (I == O) diff --git a/tests/regression/matmul/Makefile b/tests/regression/matmul/Makefile new file mode 100644 index 000000000..7f1c48523 --- /dev/null +++ b/tests/regression/matmul/Makefile @@ -0,0 +1,14 @@ +ROOT_DIR := $(realpath ../../..) +include $(ROOT_DIR)/config.mk + +PROJECT := matmul + +SRC_DIR := $(VORTEX_HOME)/tests/regression/$(PROJECT) + +SRCS := $(SRC_DIR)/main.cpp + +VX_SRCS := $(SRC_DIR)/kernel.cpp + +OPTS ?= -n128 -d1 + +include ../common.mk diff --git a/tests/regression/matmul/common.h b/tests/regression/matmul/common.h new file mode 100644 index 000000000..a9aa5de6c --- /dev/null +++ b/tests/regression/matmul/common.h @@ -0,0 +1,17 @@ +#ifndef _COMMON_H_ +#define _COMMON_H_ + +typedef struct { + uint32_t num_tasks; + uint32_t num_warps; + uint32_t num_threads; + uint32_t TC_per_warp; + uint32_t matrix_size; + uint32_t data_size; + uint64_t tc_size; + uint64_t src0_addr; + uint64_t src1_addr; + uint64_t dst_addr; +} kernel_arg_t; + +#endif \ No newline at end of file diff --git a/tests/regression/matmul/kernel.cpp b/tests/regression/matmul/kernel.cpp new file mode 100644 index 000000000..eeb902acb --- /dev/null +++ b/tests/regression/matmul/kernel.cpp @@ -0,0 +1,124 @@ +#include +#include +#include +#include "common.h" + +void kernel_body(kernel_arg_t* __UNIFORM__ arg) { + uint32_t task_id = blockIdx.x; + int32_t* src0_ptr = (int32_t*)arg->src0_addr; + int32_t* src1_ptr = (int32_t*)arg->src1_addr; + int32_t* dst_ptr = (int32_t*)arg->dst_addr; + unsigned a_addr = reinterpret_cast(src0_ptr); + unsigned b_addr = reinterpret_cast(src1_ptr); + unsigned c_addr = reinterpret_cast(dst_ptr); + + uint32_t tc_size = arg->tc_size; + int TC_per_warp = arg->TC_per_warp; + unsigned num_threads = arg->num_threads; + int num_warps = arg->num_warps; + uint32_t matrix_size = arg->matrix_size; + + int n_tiles = matrix_size/tc_size; + int num_output_tiles = (matrix_size*matrix_size)/(tc_size*tc_size); + + int num_tasks = arg->num_tasks; + + //Assuming matrix size always > tensor core size + int warps_actual; + if (TC_per_warp > num_output_tiles) + warps_actual = 1; + else + warps_actual = num_output_tiles/TC_per_warp; + + int num_warps_actual = (warps_actual < num_warps)? warps_actual: num_warps; + int num_threads_per_tc = (1> num_threads/TC_per_warp)? 1: num_threads/TC_per_warp; + + int num_tasks_per_thread = (1> (num_tasks/(num_threads*num_warps_actual)))? 1: (num_tasks/(num_threads*num_warps_actual)); + int num_tasks_per_warp = (1 > num_tasks/num_warps_actual)? 1:num_tasks/num_warps_actual; + int task_id_first_warp = task_id%num_tasks_per_warp; + + //A&B + int num_data_per_op_tile = tc_size*tc_size*n_tiles; + int num_data_per_warp = num_data_per_op_tile*((1> (num_output_tiles/num_warps_actual))?1:(num_output_tiles/num_warps_actual)); + + int addr_shift; + if (((tc_size*tc_size*n_tiles)/(num_threads)) > 1) + addr_shift = (tc_size*tc_size*n_tiles)/(num_threads); + else + addr_shift = 1; + //Offset for 1st warp + int offset = ((task_id_first_warp/num_tasks_per_thread)*addr_shift) + ((task_id_first_warp%num_tasks_per_thread)*num_data_per_op_tile); + offset = offset + (num_data_per_warp*(task_id/num_tasks_per_warp)); + + //C + int num_data_per_op_tile_c = tc_size*tc_size; + int num_data_per_warp_c = num_data_per_warp/n_tiles; + + int addr_shift_c; + if (((tc_size*tc_size)/(num_threads)) > 1) + addr_shift_c = tc_size; + else + addr_shift_c = 1; + //Offset for 1st warp + int offset_c = ((task_id_first_warp/num_tasks_per_thread)*addr_shift_c) + ((task_id_first_warp%num_tasks_per_thread)*num_data_per_op_tile_c); + offset_c = offset_c + (num_data_per_warp_c*(task_id/num_tasks_per_warp)); + + int thread_limit = (num_threads < tc_size*tc_size*n_tiles*TC_per_warp)? num_threads : tc_size*tc_size*n_tiles*TC_per_warp; + int thread_limit_c = (num_threads 64 tasks => 32 tasks/warp => 8 tasks/thread + /*task0->thread0, warp0 + task1->thread0 , warp0 + task2->thread0 , warp0 + . + task7->thread0 + task8->thread1 + task9->thread1 + . + . + ------ + task32 -> thread0, warp1 + task33 -> thread1, warp1 + . + */ + + //NEW TASK DISTRIBUTION // For 8x8 matrix, 2x2 tc_size, 1 tc_num, 4threads, 2warps => 64 tasks => 32 tasks/warp => 8 tasks/thread + /*task0->thread0, warp0 + task1->thread1 , warp0 + task2->thread2 , warp0 + task3->thread3 ,... + task4->thread0 + task5->thread1 + . + . + ------ + task32 -> thread0, warp1 + task33 -> thread1, warp1 + . + .*/ + + //TODO :: change this for new task->thread distribution + if (((task_id%num_tasks_per_warp)/num_tasks_per_thread) < thread_limit) + { + unsigned a_addr_base = a_addr + offset*arg->data_size; + unsigned b_addr_base = b_addr + offset*arg->data_size; + unsigned c_addr_base = c_addr + offset_c*arg->data_size; + csr_write(VX_MAT_MUL_SIZE,n_tiles); + mload (0, a_addr_base); + mload (1, b_addr_base); + //In case of multiple threads - sync load + vx_fence(); + + mm(); //Assuming padding to ensure matrix size is a multiple of tc_size + vx_fence(); + if (((task_id%num_tasks_per_warp)/num_tasks_per_thread) < thread_limit_c) + ms(c_addr_base); + //In case of multiple threads - sync store + vx_fence(); + } +} + +int main() { + kernel_arg_t* arg = (kernel_arg_t*)csr_read(VX_CSR_MSCRATCH); + return vx_spawn_threads(1, &arg->num_tasks, nullptr, (vx_kernel_func_cb)kernel_body, arg); +} diff --git a/tests/regression/matmul/main.cpp b/tests/regression/matmul/main.cpp new file mode 100644 index 000000000..6a86712ae --- /dev/null +++ b/tests/regression/matmul/main.cpp @@ -0,0 +1,343 @@ +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +#define RT_CHECK(_expr) \ + do { \ + int _ret = _expr; \ + if (0 == _ret) \ + break; \ + printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ + cleanup(); \ + exit(-1); \ + } while (false) + +/////////////////////////////////////////////////////////////////////////////// + +const char* kernel_file = "kernel.vxbin"; +uint32_t matrix_size = 0; +vx_device_h device = nullptr; +vx_buffer_h A_buffer = nullptr; +vx_buffer_h B_buffer = nullptr; +vx_buffer_h C_buffer = nullptr; +vx_buffer_h krnl_buffer = nullptr; +vx_buffer_h args_buffer = nullptr; + +std::vector staging_buf; +kernel_arg_t kernel_arg = {}; + +static void show_usage() { + std::cout << "Vortex Test." << std::endl; + std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; +} + +static void parse_args(int argc, char **argv, uint32_t &data_size) { + int c; + while ((c = getopt(argc, argv, "n:k:d:h?")) != -1) { + switch (c) { + case 'n': + matrix_size = atoi(optarg); + break; + case 'k': + kernel_file = optarg; + break; + case 'd': + data_size = atoi(optarg); + break; + case 'h': + case '?': { + show_usage(); + exit(0); + } break; + default: + show_usage(); + exit(-1); + } + } +} + +void cleanup() { + if (device) { + vx_mem_free(A_buffer); + vx_mem_free(B_buffer); + vx_mem_free(C_buffer); + vx_mem_free(krnl_buffer); + vx_mem_free(args_buffer); + vx_dev_close(device); + } +} + +template +class mainVariables +{ + public: + // Constructor + mainVariables(uint32_t bufSize, uint32_t dataSize, uint32_t matrixSize) + : buf_size(bufSize), data_size(dataSize), matrix_size(matrixSize) + { + // Resize vectors to specified sizes + src_A.resize(buf_size/data_size); + src_B.resize(buf_size/data_size); + refs.resize(buf_size/data_size); + } + + void init_inputs () + { + std::cout << "inside init" << std::endl; + for (uint32_t i = 0; i < matrix_size*matrix_size; ++i) + { + auto a = static_cast(std::rand()) / RAND_MAX; + auto b = static_cast(std::rand()) / RAND_MAX; + src_A[i] = static_cast(a * matrix_size); + src_B[i] = static_cast(b * matrix_size); + } + } + + void matmul_cpu() + { + for (uint32_t row = 0; row < matrix_size; ++row) + { + for (uint32_t col = 0; col < matrix_size; ++col) + { + TYPE sum(0); + for (uint32_t e = 0; e < matrix_size; ++e) { + sum += src_A[row * matrix_size + e] * src_B[e * matrix_size + col]; + } + refs[row * matrix_size + col] = sum; + } + } + } + + //Public variables + std::vector src_A; + std::vector src_B; + std::vector refs; + + std::vector A_mat; + std::vector B_mat; + + private: + uint32_t buf_size; + uint32_t data_size; + uint32_t matrix_size; +}; + + + +int main(int argc, char *argv[]) { + // parse command arguments + uint32_t data_size = 0; + parse_args(argc, argv, data_size); + if (matrix_size == 0) { + matrix_size = 2; + } + + // open device connection + std::cout << "open device connection" << std::endl; + RT_CHECK(vx_dev_open(&device)); + + uint64_t num_cores, num_warps, num_threads, tc_size, TC_per_warp; + RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_CORES, &num_cores)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_WARPS, &num_warps)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_THREADS, &num_threads)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_SIZE, &tc_size)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_NUM, &TC_per_warp)); + + std::cout << "Debug :: tc_size = " << tc_size << std::endl; + std::cout << "Debug :: tc_num = " << TC_per_warp << std::endl; + + int threads_per_tc; + //TODO - can be changed + //Number of output tiles * number of threads + if (TC_per_warp > num_threads) + threads_per_tc = 1; + else + threads_per_tc = num_threads/TC_per_warp; + + uint32_t num_tasks = ((matrix_size*matrix_size)/(tc_size*tc_size))*threads_per_tc; + + //size of each operand + uint32_t buf_size = ((matrix_size*matrix_size)/(tc_size*tc_size))*(matrix_size/(tc_size))*(tc_size*tc_size)*data_size; + + //256 + std::cout << "Debug :: buf_size: " << buf_size << " bytes" << std::endl; + + // allocate device memory + std::cout << "allocate device memory" << std::endl; + + RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_READ, &A_buffer)); + RT_CHECK(vx_mem_address(A_buffer, &kernel_arg.src0_addr)); + RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_READ, &B_buffer)); + RT_CHECK(vx_mem_address(B_buffer, &kernel_arg.src1_addr)); + RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_WRITE, &C_buffer)); + RT_CHECK(vx_mem_address(C_buffer, &kernel_arg.dst_addr)); + + std::cout << "A_addr=0x" << std::hex << kernel_arg.src0_addr << std::endl; + std::cout << "B_addr=0x" << std::hex << kernel_arg.src1_addr << std::endl; + std::cout << "C_addr=0x" << std::hex << kernel_arg.dst_addr << std::endl; + + mainVariables variables (buf_size, data_size, matrix_size); + variables.init_inputs(); + + ////////////////////////////////////////////////// + // generate source data + ////////////////////////////////////////////////// + variables.matmul_cpu(); + + uint32_t tc_size_f = tc_size*tc_size; + uint32_t n_tiles = matrix_size/tc_size; + + variables.A_mat.resize(buf_size); + variables.B_mat.resize(buf_size); + + //Demand matrix creation for A / traverse through the rows + for(uint32_t k=0; k(time_end - time_start).count(); + printf("Elapsed time: %lg ms\n", elapsed); + + // download destination buffer + std::cout << "download destination buffer" << std::endl; + RT_CHECK(vx_copy_from_dev((int8_t*)variables.B_mat.data(), C_buffer, 0, buf_size)); + + // verify result (TODO : needs to be fixed for for functional correctness) + /* + std::cout << "verify result" << std::endl; + { + int errors = 0; + auto buf_ptr = (int8_t*)staging_buf.data(); + uint64_t tc_size = kernel_arg.tc_size; + std::cout << "tc_size = " << tc_size << std::endl; + int Result[matrix_size*matrix_size]; + int n_tiles = (matrix_size/tc_size); + int tc_size_f = tc_size*tc_size; + + //converting buf ptr (tile by tile) to CPU style linear (row by row) + for(int k = 0; k < matrix_size/tc_size; k+= 1) + { + for(int j = 0; j < matrix_size; j+= tc_size) + { + for(int i =0; i < tc_size*tc_size; i++) + { + Result[ tc_size*matrix_size*k +j+ (i/tc_size)*matrix_size +i%(tc_size)] = buf_ptr[matrix_size*tc_size*k+tc_size*j+i]; + } + } + } + + for (uint32_t i = 0; i < matrix_size*matrix_size; ++i) { + //int ref = i + i; + int cur = Result[i]; + if (cur != refs[i]) { + ++errors; + } + } + if (errors != 0) { + std::cout << "Found " << std::dec << errors << " errors!" << std::endl; + std::cout << "FAILED!" << std::endl; + return 1; + } + else + { + std::cout << "CONDITIONALLY PASSED!" << std::endl; + } + } + */ + + // cleanup + std::cout << "cleanup" << std::endl; + cleanup(); + + std::cout << "PASSED!" << std::endl; + + return 0; +} \ No newline at end of file From 0e3badf723c9e2d03013237433aa0838b21c7f99 Mon Sep 17 00:00:00 2001 From: Varsha Singhania Date: Tue, 18 Jun 2024 02:19:57 -0400 Subject: [PATCH 005/488] Script checkin and code cleanup --- run_final.sh | 22 ++++++++++++++++++++++ sim/simx/execute.cpp | 28 +++++++++------------------- 2 files changed, 31 insertions(+), 19 deletions(-) create mode 100755 run_final.sh diff --git a/run_final.sh b/run_final.sh new file mode 100755 index 000000000..5f618dc64 --- /dev/null +++ b/run_final.sh @@ -0,0 +1,22 @@ +# Define arrays for threads, warps, and matrix sizes +matrix_sizes=(16 32 64 128 256 512) +tcsizes=(8 16 32) +tcnums=(4 8 16 32) +#lsulanes=(4 16) +#cores=(32) + + +# Loop through each combination of threads and warps +for size in "${matrix_sizes[@]}"; do + sed -i "s/OPTS ?= -n[0-9]\+/OPTS ?= -n${size}/" ../tests/regression/matmul/Makefile + sed -i "s/OPTS ?= -n[0-9]\+/OPTS ?= -n${size}/" tests/regression/matmul/Makefile + echo "Matrix size changed to ${size} in Makefile" + for tcsize in "${tcsizes[@]}"; do + for tcnum in "${tcnums[@]}"; do + log_name="sim_final/mat${size}/tcsize${tcsize}_tcnum${tcnum}_32w32t" + command="./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --tc_size=${tcsize} --tc_num=${tcnum} --rebuild=1 --perf=1 > ${log_name} 2>&1" + echo "$command" + eval "$command" + done + done +done diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index d522145db..e13df18b9 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -1432,8 +1432,6 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { uint32_t data_bytes_store; uint32_t num_threads_per_tc = MAX (1, num_threads/TC_per_warp); - //int num_warps = MIN() - //int active_tcs = MIN (TC_per_warp, num_output_tiles/num_warps) //LOAD if(num_threads > tc_size*tc_size*n_tiles*TC_per_warp) { @@ -1448,11 +1446,6 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { data_bytes_load = mem_bytes*num_data_per_thread; //STORE - - // DP(3, "DEBUG :: num_threads = " << num_threads); - // DP(3, "DEBUG :: tc_size*tc_size = " << tc_size*tc_size); - //DP(3, "imm = " << immsrc); - if(num_threads > tc_size*tc_size*TC_per_warp) { num_threads_actv_st = tc_size*tc_size*TC_per_warp; @@ -1499,8 +1492,6 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { scratchpad[loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n] = *temp_ref; DP(3, "Scratchpad Index: " << loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n << ", Value: " << scratchpad[loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n]); } - //loop_offset += tc_size*tc_size; - //} } rd_write = true; } break; @@ -1531,7 +1522,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { uint32_t csr_index = (2*num_data_per_thread_st) + n; uint32_t scratchpad_index = (tc_size*tc_size*2) + (t*num_data_per_thread) + n; - //scratchpad -> csr (TODO :: can intermediate step of moving to CSR be skipped?) + //scratchpad -> csr (TODO :: removed intermediate CSR stage ; incorporate limited scratchmad implementation) //core_->set_csr(csr_addr[(2*num_data_per_thread) + n], scratchpad[(n_tiles*tc_size*tc_size*2) + (t*num_data_per_thread) + n], t, warp_id_); Word* temp_ref = &(warp.ireg_file.at(t).at(rsrc0)); *temp_ref = scratchpad[(n_tiles*tc_size*tc_size*2) + (t*num_data_per_thread_st) + n]; @@ -1562,14 +1553,14 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { //TC operation [only 1 thread in 1 warp needs to do this] if (t%threads_per_tc == 0) { - //TODO - change to systolic array implementation + //TODO : change to systolic array implementation uint32_t thread_offset = t*(tc_size*tc_size); int loop_offset = 0; int offset_b = n_tiles*n_tiles*n_tiles*tc_size*tc_size; - // Loop over all tiles - output stationary - //for(int tiles = 0 ; tiles < n_tiles ; tiles++) //What's the HW implication of this?? A counter implementation? - //{ - /* + /* + // TODO : Fix needed for functional correctness + for(int tiles = 0 ; tiles < n_tiles ; tiles++) //What's the HW implication of this?? A counter implementation? + { for (int i = 0; i < tc_size; i++) { //ROW-1 for (int j = 0; j < tc_size; j++) { //COL-2 int sum = 0; @@ -1579,12 +1570,11 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { } scratchpad[accu_offset + thread_offset +(i * tc_size + j)] += sum; //[i * col2 + j] = sum DP(3, "Scratchpad Index: " << accu_offset + (i * tc_size + j) << " , Value=" << scratchpad[accu_offset + (i * tc_size + j)]); - } } - */ - //loop_offset += tc_size*tc_size; //Move to the next tiled matmul fragment - //} + loop_offset += tc_size*tc_size; //Move to the next tiled matmul fragment + } + */ } } From efe12ca6bfe090cc73110ace1e8c24ab92b394a7 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Wed, 5 Jun 2024 11:19:06 -0400 Subject: [PATCH 006/488] Update README.md --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6eeb1ccfa..f0cfe45fe 100644 --- a/README.md +++ b/README.md @@ -57,9 +57,13 @@ More detailed build instructions can be found [here](docs/install_vortex.md). $ cd Vortex ### Configure your build folder # By default, the toolchain default install location is the /opt folder and can be overridden by setting --tooldir. - $ mkdir build - $ cd build - $ ../configure --xlen=32 --tooldir=$HOME/tools + # This is the example for volvo server +``` + mkdir build + cd build + ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-05-22 + source ./ci/toolchain_env.sh +``` ### Install prebuilt toolchain $ ./ci/toolchain_install.sh --all ### set environment variables From 54af5eb1861a6d636c89313816245bde4a22cd05 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Wed, 5 Jun 2024 11:23:47 -0400 Subject: [PATCH 007/488] Update README.md --- README.md | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index f0cfe45fe..135dab466 100644 --- a/README.md +++ b/README.md @@ -47,14 +47,18 @@ More detailed build instructions can be found [here](docs/install_vortex.md). - [Yosys](https://github.com/YosysHQ/yosys) - [Sv2v](https://github.com/zachjs/sv2v) ### Install development tools - $ sudo apt-get install build-essential - $ sudo apt-get install binutils - $ sudo apt-get install python - $ sudo apt-get install uuid-dev - $ sudo apt-get install git +``` + sudo apt-get install build-essential + sudo apt-get install binutils + sudo apt-get install python + sudo apt-get install uuid-dev + sudo apt-get install git +``` ### Install Vortex codebase - $ git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git - $ cd Vortex +``` + git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git + cd Vortex +``` ### Configure your build folder # By default, the toolchain default install location is the /opt folder and can be overridden by setting --tooldir. # This is the example for volvo server @@ -62,16 +66,17 @@ More detailed build instructions can be found [here](docs/install_vortex.md). mkdir build cd build ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-05-22 - source ./ci/toolchain_env.sh + source ./ci/toolchain_env.sh -all ``` ### Install prebuilt toolchain - $ ./ci/toolchain_install.sh --all + # We will use the precomipled tools in volvo toolchanin directory ### set environment variables # should always run before using the toolchain! - $ source ./ci/toolchain_env.sh + source ./ci/toolchain_env.sh ### Building Vortex - $ make -s + make -s ### Quick demo running vecadd OpenCL kernel on 2 cores +<<<<<<< HEAD $ ./ci/blackbox.sh --cores=2 --app=vecadd ### Common Developer Tips From cf3f2d4f6fdde32fa114b6c006da91ab6e88f471 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Wed, 5 Jun 2024 11:24:49 -0400 Subject: [PATCH 008/488] Update README.md --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index 135dab466..0618b0c89 100644 --- a/README.md +++ b/README.md @@ -62,12 +62,9 @@ More detailed build instructions can be found [here](docs/install_vortex.md). ### Configure your build folder # By default, the toolchain default install location is the /opt folder and can be overridden by setting --tooldir. # This is the example for volvo server -``` mkdir build cd build ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-05-22 - source ./ci/toolchain_env.sh -all -``` ### Install prebuilt toolchain # We will use the precomipled tools in volvo toolchanin directory ### set environment variables From 2b426693f525bd53a60336885e0a0326a7ffd19f Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Wed, 5 Jun 2024 16:11:51 -0400 Subject: [PATCH 009/488] expand MemoryUnit class defs and add some tlb-related functions --- hw/rtl/VX_config.vh | 16 ++++++ sim/common/mem.cpp | 41 +++++++++++++- sim/common/mem.h | 131 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 181 insertions(+), 7 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 7fc8d1464..5bb5720e8 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -14,6 +14,22 @@ `ifndef VX_CONFIG_VH `define VX_CONFIG_VH +`ifndef VM_ADDR_MODE +`define VM_ADDR_MODE SV32 +`endif + +`ifndef PTE_SIZE +`define PTE_SIZE 8 +`endif + +`ifndef TLB_SIZE +`define TLB_SIZE 32 +`endif + +`ifndef SUPER_PAGING +`define SUPER_PAGING false +`endif + `ifndef MIN `define MIN(x, y) (((x) < (y)) ? (x) : (y)) `endif diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index ed4bcc522..92a983410 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -17,9 +17,22 @@ #include #include #include "util.h" +#include +#include using namespace vortex; +uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) +{ + return (addr >> s_idx) & ((1 << (e_idx - s_idx + 1)) - 1); +} + +bool bit(uint64_t addr, uint8_t idx) +{ + return (addr) & (1 << idx); +} + + RamMemDevice::RamMemDevice(const char *filename, uint32_t wordSize) : wordSize_(wordSize) { std::ifstream input(filename); @@ -158,12 +171,12 @@ uint64_t MemoryUnit::toPhyAddr(uint64_t addr, uint32_t flagMask) { return pAddr; } -void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, bool sup) { +void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, bool sup, ACCESS_TYPE type) { uint64_t pAddr = this->toPhyAddr(addr, sup ? 8 : 1); return decoder_.read(data, pAddr, size); } -void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, bool sup) { +void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, bool sup, ACCESS_TYPE type) { uint64_t pAddr = this->toPhyAddr(addr, sup ? 16 : 1); decoder_.write(data, pAddr, size); amo_reservation_.valid = false; @@ -179,10 +192,34 @@ bool MemoryUnit::amo_check(uint64_t addr) { uint64_t pAddr = this->toPhyAddr(addr, 1); return amo_reservation_.valid && (amo_reservation_.addr == pAddr); } + void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags) { tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags); } +void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits) { + // HW: evict TLB by Most Recently Used + if (tlb_.size() == TLB_SIZE - 1) { + for (auto& entry : tlb_) + { + entry.second.mru_bit = false; + } + + } else if (tlb_.size() == TLB_SIZE) { + uint64_t del; + for (auto entry : tlb_) { + if (!entry.second.mru_bit) + { + del = entry.first; + break; + } + } + tlb_.erase(tlb_.find(del)); + TLB_EVICT++; + } + tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags, size_bits); +} + void MemoryUnit::tlbRm(uint64_t va) { if (tlb_.find(va / pageSize_) != tlb_.end()) tlb_.erase(tlb_.find(va / pageSize_)); diff --git a/sim/common/mem.h b/sim/common/mem.h index 1f5196113..76e2f2ae5 100644 --- a/sim/common/mem.h +++ b/sim/common/mem.h @@ -18,8 +18,22 @@ #include #include #include +#include +#include namespace vortex { + +enum VA_MODE { + BARE, + SV32 +}; + +enum ACCESS_TYPE { + LOAD, + STORE, + FETCH +}; + struct BadAddress {}; struct OutOfRange {}; @@ -73,31 +87,39 @@ public: class MemoryUnit { public: +// HW: Expand PageFault struct to contain access_type info for debug purposes struct PageFault { PageFault(uint64_t a, bool nf) : faultAddr(a) , notFound(nf) + , access_type(ACCESS_TYPE::LOAD) {} - uint64_t faultAddr; - bool notFound; + uint64_t faultAddr; + bool notFound; + ACCESS_TYPE access_type; }; MemoryUnit(uint64_t pageSize = 0); void attach(MemDevice &m, uint64_t start, uint64_t end); - void read(void* data, uint64_t addr, uint64_t size, bool sup); - void write(const void* data, uint64_t addr, uint64_t size, bool sup); + void read(void* data, uint64_t addr, uint64_t size, bool sup, ACCESS_TYPE type = ACCESS_TYPE::LOAD); + void write(const void* data, uint64_t addr, uint64_t size, bool sup, ACCESS_TYPE type = ACCESS_TYPE::LOAD); void amo_reserve(uint64_t addr); bool amo_check(uint64_t addr); void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags); + void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits); + void tlbRm(uint64_t vaddr); void tlbFlush() { tlb_.clear(); } + uint32_t get_satp(); + void set_satp(uint32_t satp); + private: struct amo_reservation_t { @@ -137,11 +159,41 @@ private: TLBEntry(uint32_t pfn, uint32_t flags) : pfn(pfn) , flags(flags) - {} + , mru_bit(true) + {}; + TLBEntry(uint32_t pfn, uint32_t flags, uint64_t size_bits) + : pfn(pfn) + , flags(flags) + , mru_bit(true) + , size_bits (size_bits) + { + d = bit(7); + a = bit(6); + g = bit(5); + u = bit(4); + x = bit(3); + w = bit(2); + r = bit(1); + v = bit(0); + } + bool bit(uint8_t idx) + { + return (flags) & (1 << idx); + } + uint32_t pfn; + bool d, a, g, u, x, w, r, v; + bool mru_bit; + uint64_t size_bits; uint32_t flags; }; + std::pair tlbLookup(uint64_t vAddr, ACCESS_TYPE type, uint64_t* size_bits); + + uint64_t vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type); + + std::pair page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits); + TLBEntry tlbLookup(uint64_t vAddr, uint32_t flagMask); uint64_t toPhyAddr(uint64_t vAddr, uint32_t flagMask); @@ -151,6 +203,13 @@ private: ADecoder decoder_; bool enableVM_; + uint32_t satp; + VA_MODE mode; + uint32_t ptbr; + + std::unordered_set unique_translations; + uint64_t TLB_HIT, TLB_MISS, TLB_EVICT, PTW, PERF_UNIQUE_PTW; + amo_reservation_t amo_reservation_; }; @@ -219,4 +278,66 @@ private: bool check_acl_; }; +class PTE_SV32_t +{ + + private: + uint64_t address; + uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) + { + return (addr >> s_idx) & ((1 << (e_idx - s_idx + 1)) - 1); + } + bool bit(uint8_t idx) + { + return (address) & (1 << idx); + } + + public: + uint64_t ppn[2]; + uint32_t rsw; + uint32_t flags; + bool d, a, g, u, x, w, r, v; + PTE_SV32_t(uint64_t address) : address(address) + { + flags = bits(address,0,7); + rsw = bits(address,8,9); + ppn[0] = bits(address,10,19); + ppn[1] = bits(address,20,31); + + d = bit(7); + a = bit(6); + g = bit(5); + u = bit(4); + x = bit(3); + w = bit(2); + r = bit(1); + v = bit(0); + } +}; + +class vAddr_SV32_t +{ + + private: + uint64_t address; + uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) + { + return (addr >> s_idx) & ((1 << (e_idx - s_idx + 1)) - 1); + } + bool bit(uint64_t addr, uint8_t idx) + { + return (addr) & (1 << idx); + } + + public: + uint64_t vpn[2]; + uint64_t pgoff; + vAddr_SV32_t(uint64_t address) : address(address) + { + vpn[0] = bits(address,12,21); + vpn[1] = bits(address,22,31); + pgoff = bits(address,0,11); + } +}; + } // namespace vortex From 6f0af066e8e982dce5358a72acfd03d99477fc01 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Fri, 7 Jun 2024 10:38:41 -0400 Subject: [PATCH 010/488] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0618b0c89..4141ec8fb 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ More detailed build instructions can be found [here](docs/install_vortex.md). ``` ### Install Vortex codebase ``` - git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git + git clone --depth=1 --recursive git@github.com:gthparch/vortex_vm.git cd Vortex ``` ### Configure your build folder From 2f2974ee721afa88baaba5979254f336bf70435f Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Fri, 7 Jun 2024 10:53:25 -0400 Subject: [PATCH 011/488] Ignore the changed on ramulator --- .gitmodules | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitmodules b/.gitmodules index af1d1a476..1a002355f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,4 @@ [submodule "third_party/ramulator"] path = third_party/ramulator url = https://github.com/CMU-SAFARI/ramulator.git + ignore = dirty From d8a6ac748a83b6258548b506c89d0c08d8528f52 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Fri, 7 Jun 2024 10:52:43 -0400 Subject: [PATCH 012/488] Update README.md --- README.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4141ec8fb..3994bf942 100644 --- a/README.md +++ b/README.md @@ -57,14 +57,24 @@ More detailed build instructions can be found [here](docs/install_vortex.md). ### Install Vortex codebase ``` git clone --depth=1 --recursive git@github.com:gthparch/vortex_vm.git - cd Vortex + cd vortex_vm ``` + ### Configure your build folder + # # By default, the toolchain default install location is the /opt folder and can be overridden by setting --tooldir. # This is the example for volvo server mkdir build + mkdir out + export OUT_DIR=`pwd`/out cd build - ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-05-22 + ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-05-22 --prefix=$OUT_DIR +### Ignore the commit for ramulator when it is compiled + # Please add ignore = dirty entry on .gitmodules + [submodule "third_party/ramulator"] + path = third_party/ramulator + url = https://github.com/CMU-SAFARI/ramulator.git + ignore = dirty ### Install prebuilt toolchain # We will use the precomipled tools in volvo toolchanin directory ### set environment variables From cfcece940ed28eed574eeeecbe2d1a6d08bbe8bb Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Tue, 11 Jun 2024 23:06:48 -0400 Subject: [PATCH 013/488] Merge Austin's code (Preliminary) --- hw/rtl/VX_config.vh | 33 ++-- runtime/simx/vortex.cpp | 372 ++++++++++++++++++++++++++++++++++++-- sim/common/mem.cpp | 250 ++++++++++++++++++++++++- sim/common/mem.h | 68 +++++-- sim/simx/cluster.cpp | 8 + sim/simx/cluster.h | 4 + sim/simx/core.cpp | 7 + sim/simx/core.h | 4 + sim/simx/emulator.cpp | 82 ++++++++- sim/simx/emulator.h | 6 + sim/simx/processor.cpp | 22 ++- sim/simx/processor.h | 10 + sim/simx/processor_impl.h | 5 + sim/simx/socket.cpp | 8 + sim/simx/socket.h | 4 + 15 files changed, 830 insertions(+), 53 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 5bb5720e8..98dcdd16e 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -14,21 +14,28 @@ `ifndef VX_CONFIG_VH `define VX_CONFIG_VH -`ifndef VM_ADDR_MODE -`define VM_ADDR_MODE SV32 +`ifndef VM_DISABLE +`define VM_ENABLE +`endif +`ifdef VM_ENABLE + `ifndef VM_ADDR_MODE + `define VM_ADDR_MODE SV32 + `endif + + `ifndef PTE_SIZE + `define PTE_SIZE 8 + `endif + + `ifndef TLB_SIZE + `define TLB_SIZE 32 + `endif + + `ifndef SUPER_PAGING + `define SUPER_PAGING 0 + `endif + `endif -`ifndef PTE_SIZE -`define PTE_SIZE 8 -`endif - -`ifndef TLB_SIZE -`define TLB_SIZE 32 -`endif - -`ifndef SUPER_PAGING -`define SUPER_PAGING false -`endif `ifndef MIN `define MIN(x, y) (((x) < (y)) ? (x) : (y)) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 89856f3a0..6e5cafc38 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -29,6 +29,38 @@ using namespace vortex; +#ifdef VM_ENABLE + +#ifndef NDEBUG +#define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0) +#else +#define DBGPRINT(format, ...) ((void)0) +#endif + +#define CHECK_ERR(_expr, _cleanup) \ + do { \ + auto err = _expr; \ + if (err == 0) \ + break; \ + printf("[VXDRV] Error: '%s' returned %d!\n", #_expr, (int)err); \ + _cleanup \ + } while (false) + +/////////////////////////////////////////////////////////////////////////////// +// +#include +#include + +uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) +{ + return (addr >> s_idx) & ((1 << (e_idx - s_idx + 1)) - 1); +} +bool bit(uint64_t addr, uint8_t idx) +{ + return (addr) & (1 << idx); +} +#endif + class vx_device { public: vx_device() @@ -42,6 +74,10 @@ public: { // attach memory module processor_.attach_ram(&ram_); +#ifdef VM_ENABLE + //Set + processor_.set_processor_satp(VM_ADDR_MODE); +#endif } ~vx_device() { @@ -90,18 +126,75 @@ public: return 0; } - int mem_alloc(uint64_t size, int flags, uint64_t* dev_addr) { - uint64_t addr; - CHECK_ERR(global_mem_.allocate(size, &addr), { - return err; - }); - CHECK_ERR(this->mem_access(addr, size, flags), { - global_mem_.release(addr); - return err; - }); - *dev_addr = addr; - return 0; - } +#ifdef VM_ENABLE + // VM SUPPORT + uint64_t map_local_mem(uint64_t size, uint64_t* dev_maddr) + { + bool is_pc = false; + std::cout << "startup addr: " << std::hex << STARTUP_ADDR << std::endl; + std::cout << "bit mode: " << std::dec << XLEN << std::endl; + if (*dev_maddr == STARTUP_ADDR || *dev_maddr == 0x7FFFF000) { + is_pc = true; + } + + if (get_mode() == VA_MODE::BARE) + return 0; + + uint64_t ppn = *dev_maddr >> 12; + uint64_t init_pAddr = *dev_maddr; + uint64_t init_vAddr = *dev_maddr + 0xf0000000; // vpn will change, but we want to return the vpn of the beginning of the virtual allocation + init_vAddr = (init_vAddr >> 12) << 12; + uint64_t vpn; + + //dev_maddr can be of size greater than a page, but we have to map and update + //page tables on a page table granularity. So divide the allocation into pages. + for (ppn = (*dev_maddr) >> 12; ppn < ((*dev_maddr) >> 12) + (size/RAM_PAGE_SIZE) + 1; ppn++) + { + //Currently a 1-1 mapping is used, this can be changed here to support different + //mapping schemes + vpn = is_pc ? ppn : ppn + 0xf0000; + //vpn = ppn; + + //If ppn to vpn mapping doesnt exist. + if (addr_mapping.find(vpn) == addr_mapping.end()) + { + //Create mapping. + update_page_table(ppn, vpn); + addr_mapping[vpn] = ppn; + } + } + + std::cout << "mapped virtual addr: " << init_vAddr << " to physical addr: " << init_pAddr << std::endl; + uint64_t size_bits; + if (is_pc) { + std::cout << "not returning virtual address because it is PC or stack" << std::endl; + std::pair ptw_access = page_table_walk(init_vAddr - 0xf0000000, &size_bits); + return 0; + } else { + std::pair ptw_access = page_table_walk(init_vAddr, &size_bits); + } + *dev_maddr = init_vAddr; // commit vpn to be returned to host + return 0; + } +#endif + + int mem_alloc(uint64_t size, int flags, uint64_t* dev_addr) { + uint64_t addr; + CHECK_ERR(global_mem_.allocate(size, &addr), { + return err; + }); + CHECK_ERR(this->mem_access(addr, size, flags), { + global_mem_.release(addr); + return err; + }); +#ifdef VM_ENABLE + // VM address translation + std::cout << "physical addr: " << std::hex << *dev_addr << std::endl; + map_local_mem(size, dev_addr); +#endif + *dev_addr = addr; + return 0; + } int mem_reserve(uint64_t dev_addr, uint64_t size, int flags) { CHECK_ERR(global_mem_.reserve(dev_addr, size), { @@ -140,6 +233,18 @@ public: if (dest_addr + asize > GLOBAL_MEM_SIZE) return -1; +#ifdef VM_ENABLE + uint64_t pAddr = dest_addr; // map_local_mem overwrites the provided dest_addr, so store away physical destination address + if (dest_addr >= STARTUP_ADDR) { + map_local_mem(asize,&dest_addr); + } else if (dest_addr >= 0x7fff0000) + { + map_local_mem(asize,&dest_addr); + } + std::cout << "uploading to 0x" << pAddr << "(VA)" << std::endl; + dest_addr = pAddr; +#endif + ram_.enable_acl(false); ram_.write((const uint8_t*)src, dest_addr, size); ram_.enable_acl(true); @@ -235,6 +340,244 @@ public: return 0; } +#ifdef VM_ENABLE + /* VM Management */ + void set_processor_satp(VA_MODE mode) + { + uint32_t satp; + if (mode == VA_MODE::BARE) + satp = 0; + else if (mode == VA_MODE::SV32) + { + satp = (alloc_page_table() >> 10) | 0x80000000; + // satp = 0xFEBFE000 ; + } + processor_.set_satp(satp); + } + + uint32_t get_ptbr() + { + // return processor_.get_satp(); + return processor_.get_satp() & 0x003fffff; + } + + VA_MODE get_mode() + { + return processor_.get_satp() & 0x80000000 ? VA_MODE::SV32 : VA_MODE::BARE; + // return VA_MODE::SV32; + } + + void update_page_table(uint64_t pAddr, uint64_t vAddr) { + std::cout << "mapping vpn: " << vAddr << " to ppn:" << pAddr << std::endl; + //Updating page table with the following mapping of (vAddr) to (pAddr). + uint64_t ppn_1, pte_addr, pte_bytes; + uint64_t vpn_1 = bits(vAddr, 10, 19); + uint64_t vpn_0 = bits(vAddr, 0, 9); + + //Read first level PTE. + pte_addr = (get_ptbr() << 12) + (vpn_1 * PTE_SIZE); + pte_bytes = read_pte(pte_addr); + std::cout << "[PTE] addr 0x" << std::hex << pte_addr << ", PTE 0x" << std::hex << pte_bytes << std::endl; + + + if ( bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) + { + //If valid bit set, proceed to next level using new ppn form PTE. + std::cout << "PTE valid, continuing the walk..." << std::endl; + ppn_1 = (pte_bytes >> 10); + } + else + { + //If valid bit not set, allocate a second level page table + // in device memory and store ppn in PTE. Set rwx = 000 in PTE + //to indicate this is a pointer to the next level of the page table. + ppn_1 = (alloc_page_table() >> 12); + pte_bytes = ( (ppn_1 << 10) | 0b0000000001) ; + write_pte(pte_addr, pte_bytes); + } + + //Read second level PTE. + pte_addr = (ppn_1 << 12) + (vpn_0 * PTE_SIZE); + pte_bytes = read_pte(pte_addr); + std::cout << "got pte: " << std::hex << pte_bytes << std::endl; + + if ( bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) + { + std::cout << "ERROR, shouldn't be here" << std::endl; + //If valid bit is set, then the page is already allocated. + //Should not reach this point, a sanity check. + } + else + { + //If valid bit not set, write ppn of pAddr in PTE. Set rwx = 111 in PTE + //to indicate this is a leaf PTE and has the stated permissions. + pte_bytes = ( (pAddr << 10) | 0b0000001111) ; + write_pte(pte_addr, pte_bytes); + + //If super paging is enabled. + if (SUPER_PAGING) + { + //Check if this second level Page Table can be promoted to a super page. Brute force + //method is used to iterate over all PTE entries of the table and check if they have + //their valid bit set. + bool superpage = true; + for(int i = 0; i < 1024; i++) + { + pte_addr = (ppn_1 << 12) + (i * PTE_SIZE); + pte_bytes = read_pte(pte_addr); + + if (!bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) + { + superpage = false; + break; + } + } + if (superpage) + { + //This can be promoted to a super page. Set root PTE to the first PTE of the + //second level. This is because the first PTE of the second level already has the + //correct PPN1, PPN0 set to zero and correct access bits. + pte_addr = (ppn_1 << 12); + pte_bytes = read_pte(pte_addr); + pte_addr = (get_ptbr() << 12) + (vpn_1 * PTE_SIZE); + write_pte(pte_addr, pte_bytes); + } + } + } + } + + std::pair page_table_walk(uint64_t vAddr_bits, uint64_t* size_bits) + { + uint64_t LEVELS = 2; + vAddr_SV32_t vAddr(vAddr_bits); + uint64_t pte_bytes; + + std::cout << "PTW on vAddr: 0x" << std::hex << vAddr_bits << std::endl; + + //Get base page table. + uint64_t a = this->processor_.get_satp() << 12; + std::cout << "PTW SATP: 0x" << a << std::endl; + int i = LEVELS - 1; + + while(true) + { + + //Read PTE. + std::cout << "reading PTE from RAM addr 0x" << std::hex << (a+vAddr.vpn[i]*PTE_SIZE) << std::endl; + ram_.read(&pte_bytes, a+vAddr.vpn[i]*PTE_SIZE, sizeof(uint64_t)); + //pte_bytes &= 0x00000000FFFFFFFF; + PTE_SV32_t pte(pte_bytes); + std::cout << "got pte: " << std::hex << pte_bytes << std::endl; + + //Check if it has invalid flag bits. + if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) + { + std::cout << "Error on vAddr 0x" << std::hex << vAddr_bits << std::endl; + throw Page_Fault_Exception("Page Fault : Attempted to access invalid entry. Entry: 0x"); + } + + if ( (pte.r == 0) & (pte.w == 0) & (pte.x == 0)) + { + //Not a leaf node as rwx == 000 + i--; + if (i < 0) + { + throw Page_Fault_Exception("Page Fault : No leaf node found."); + } + else + { + //Continue on to next level. + a = (pte_bytes >> 10 ) << 12; + std::cout << "next a: " << a << std::endl; + } + } + else + { + //Leaf node found, finished walking. + a = (pte_bytes >> 10 ) << 12; + break; + } + } + + PTE_SV32_t pte(pte_bytes); + + //Check RWX permissions according to access type. + if (pte.r == 0) + { + throw Page_Fault_Exception("Page Fault : TYPE LOAD, Incorrect permissions."); + } + + uint64_t pfn; + if (i > 0) + { + //It is a super page. + if (pte.ppn[0] != 0) + { + //Misss aligned super page. + throw Page_Fault_Exception("Page Fault : Miss Aligned Super Page."); + + } + else + { + //Valid super page. + pfn = pte.ppn[1]; + *size_bits = 22; + } + } + else + { + //Regular page. + *size_bits = 12; + pfn = a >> 12; + } + return std::make_pair(pfn, pte_bytes & 0xff); + } + + uint64_t alloc_page_table() { + uint64_t addr; + global_mem_.allocate(RAM_PAGE_SIZE, &addr); + std::cout << "address of page table 0x" << std::hex << addr << std::endl; + init_page_table(addr); + return addr; + } + + + void init_page_table(uint64_t addr) { + uint64_t asize = aligned_size(RAM_PAGE_SIZE, CACHE_BLOCK_SIZE); + uint8_t *src = new uint8_t[RAM_PAGE_SIZE]; + for (uint64_t i = 0; i < RAM_PAGE_SIZE; ++i) { + src[i] = (0x00000000 >> ((i & 0x3) * 8)) & 0xff; + } + ram_.write((const uint8_t*)src, addr, asize); + } + + void read_page_table(uint64_t addr) { + uint8_t *dest = new uint8_t[RAM_PAGE_SIZE]; + download(dest, addr, RAM_PAGE_SIZE); + printf("VXDRV: download %d bytes from 0x%x\n", RAM_PAGE_SIZE, addr); + for (int i = 0; i < RAM_PAGE_SIZE; i += 4) { + printf("mem-read: 0x%x -> 0x%x\n", addr + i, *(uint64_t*)((uint8_t*)dest + i)); + } + } + + void write_pte(uint64_t addr, uint64_t value = 0xbaadf00d) { + std::cout << "writing pte " << std::hex << value << " to pAddr: " << std::hex << addr << std::endl; + uint8_t *src = new uint8_t[PTE_SIZE]; + for (uint64_t i = 0; i < PTE_SIZE; ++i) { + src[i] = (value >> ((i & 0x3) * 8)) & 0xff; + } + //std::cout << "writing PTE to RAM addr 0x" << std::hex << addr << std::endl; + ram_.write((const uint8_t*)src, addr, PTE_SIZE); + } + + uint64_t read_pte(uint64_t addr) { + uint8_t *dest = new uint8_t[PTE_SIZE]; + std::cout << "[read_pte] reading PTE from RAM addr 0x" << std::hex << addr << std::endl; + ram_.read((uint8_t*)dest, addr, PTE_SIZE); + return *(uint64_t*)((uint8_t*)dest); + } +#endif // JAEWON + private: Arch arch_; RAM ram_; @@ -243,6 +586,9 @@ private: DeviceConfig dcrs_; std::future future_; std::unordered_map> mpm_cache_; +#ifdef VM_ENABLE + std::unordered_map addr_mapping; +#endif }; -#include \ No newline at end of file +#include diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index 92a983410..b55d0de9a 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -137,16 +137,90 @@ void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) MemoryUnit::MemoryUnit(uint64_t pageSize) : pageSize_(pageSize) , enableVM_(pageSize != 0) - , amo_reservation_({0x0, false}) { - if (pageSize != 0) { - tlb_[0] = TLBEntry(0, 077); + , amo_reservation_({0x0, false}) +#ifdef VM_ENABLE + , TLB_HIT(0) + , TLB_MISS(0) + , TLB_EVICT(0) + , PTW(0) {}; +#else + { + if (pageSize != 0) + { + tlb_[0] = TLBEntry(0, 077); + } } -} +#endif void MemoryUnit::attach(MemDevice &m, uint64_t start, uint64_t end) { decoder_.map(start, end, m); } +#ifdef VM_ENABLE +std::pair MemoryUnit::tlbLookup(uint64_t vAddr, ACCESS_TYPE type, uint64_t* size_bits) { + + //Find entry while accounting for different sizes. + for (auto entry : tlb_) + { + if(entry.first == vAddr >> entry.second.size_bits) + { + *size_bits = entry.second.size_bits; + vAddr = vAddr >> (*size_bits); + } + } + + + auto iter = tlb_.find(vAddr); + if (iter != tlb_.end()) { + TLBEntry e = iter->second; + + //Set mru bit if it is a hit. + iter->second.mru_bit = true; + + //If at full capacity and no other unset bits. + // Clear all bits except the one we just looked up. + if (tlb_.size() == TLB_SIZE) + { + // bool no_cleared = true; + // for (auto& entry : tlb_) + // { + // no_cleared = no_cleared & entry.second.mru_bit; + // } + + // if(no_cleared) + // { + for (auto& entry : tlb_) + { + entry.second.mru_bit = false; + } + iter->second.mru_bit = true; + //} + + } + //Check access permissions. + if ( (type == ACCESS_TYPE::FETCH) & ((e.r == 0) | (e.x == 0)) ) + { + throw Page_Fault_Exception("Page Fault : Incorrect permissions."); + } + else if ( (type == ACCESS_TYPE::LOAD) & (e.r == 0) ) + { + throw Page_Fault_Exception("Page Fault : Incorrect permissions."); + } + else if ( (type == ACCESS_TYPE::STORE) & (e.w == 0) ) + { + throw Page_Fault_Exception("Page Fault : Incorrect permissions."); + } + else + { + //TLB Hit + return std::make_pair(true, iter->second.pfn); + } + } else { + //TLB Miss + return std::make_pair(false, 0); + } +} +#endif //JAEWON MemoryUnit::TLBEntry MemoryUnit::tlbLookup(uint64_t vAddr, uint32_t flagMask) { auto iter = tlb_.find(vAddr / pageSize_); if (iter != tlb_.end()) { @@ -171,16 +245,40 @@ uint64_t MemoryUnit::toPhyAddr(uint64_t addr, uint32_t flagMask) { return pAddr; } -void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, bool sup, ACCESS_TYPE type) { +#ifdef VM_ENABLE +void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, ACCESS_TYPE type) { + uint64_t pAddr; + if (this->mode == VA_MODE::BARE) { + pAddr = addr; + } else { + pAddr = vAddr_to_pAddr(addr, type); + } + return decoder_.read(data, pAddr, size); +} +#else +void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, bool sup) { uint64_t pAddr = this->toPhyAddr(addr, sup ? 8 : 1); return decoder_.read(data, pAddr, size); } - -void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, bool sup, ACCESS_TYPE type) { +#endif +#ifdef VM_ENABLE +void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, ACCESS_TYPE type) { + uint64_t pAddr; + if ( (this->mode == VA_MODE::BARE) | (addr >= IO_BASE_ADDR) ) { + pAddr = addr; + } else { + pAddr = vAddr_to_pAddr(addr, type); + } + decoder_.write(data, pAddr, size); + amo_reservation_.valid = false; +} +#else +void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, bool sup) { uint64_t pAddr = this->toPhyAddr(addr, sup ? 16 : 1); decoder_.write(data, pAddr, size); amo_reservation_.valid = false; } +#endif void MemoryUnit::amo_reserve(uint64_t addr) { uint64_t pAddr = this->toPhyAddr(addr, 1); @@ -193,9 +291,8 @@ bool MemoryUnit::amo_check(uint64_t addr) { return amo_reservation_.valid && (amo_reservation_.addr == pAddr); } -void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags) { - tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags); -} + +#ifdef VM_ENABLE void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits) { // HW: evict TLB by Most Recently Used @@ -219,6 +316,12 @@ void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t s } tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags, size_bits); } +#else + +void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags) { + tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags); +} +#endif void MemoryUnit::tlbRm(uint64_t va) { if (tlb_.find(va / pageSize_) != tlb_.end()) @@ -472,3 +575,130 @@ void RAM::loadHexImage(const char* filename) { --size; } } + +#ifdef VM_ENABLE +uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type) +{ + uint64_t pfn; + uint64_t size_bits; + + //First lookup TLB. + std::pair tlb_access = tlbLookup(vAddr, type, &size_bits); + if (tlb_access.first) + { + pfn = tlb_access.second; + TLB_HIT++; + } + else //Else walk the PT. + { + std::pair ptw_access = page_table_walk(vAddr, type, &size_bits); + tlbAdd(vAddr>>size_bits, ptw_access.first, ptw_access.second,size_bits); + pfn = ptw_access.first; TLB_MISS++; PTW++; + unique_translations.insert(vAddr>>size_bits); + PERF_UNIQUE_PTW = unique_translations.size(); + } + + //Construct final address using pfn and offset. + std::cout << "[MemoryUnit] translated vAddr: 0x" << std::hex << vAddr << " to pAddr: 0x" << std::hex << ((pfn << size_bits) + (vAddr & ((1 << size_bits) - 1))) << std::endl; + return (pfn << size_bits) + (vAddr & ((1 << size_bits) - 1)); +} + +std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits) +{ + uint64_t LEVELS = 2; + vAddr_SV32_t vAddr(vAddr_bits); + uint64_t pte_bytes; + + //Get base page table. + uint64_t a = this->ptbr << 12; + int i = LEVELS - 1; + + while(true) + { + + //Read PTE. + decoder_.read(&pte_bytes, a+vAddr.vpn[i]*PTE_SIZE, sizeof(uint64_t)); + PTE_SV32_t pte(pte_bytes); + + //Check if it has invalid flag bits. + if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) + { + throw Page_Fault_Exception("Page Fault : Attempted to access invalid entry."); + } + + if ( (pte.r == 0) & (pte.w == 0) & (pte.x == 0)) + { + //Not a leaf node as rwx == 000 + i--; + if (i < 0) + { + throw Page_Fault_Exception("Page Fault : No leaf node found."); + } + else + { + //Continue on to next level. + a = (pte_bytes >> 10 ) << 12; + } + } + else + { + //Leaf node found, finished walking. + a = (pte_bytes >> 10 ) << 12; + break; + } + } + + PTE_SV32_t pte(pte_bytes); + + //Check RWX permissions according to access type. + if ( (type == ACCESS_TYPE::FETCH) & ((pte.r == 0) | (pte.x == 0)) ) + { + throw Page_Fault_Exception("Page Fault : TYPE FETCH, Incorrect permissions."); + } + else if ( (type == ACCESS_TYPE::LOAD) & (pte.r == 0) ) + { + throw Page_Fault_Exception("Page Fault : TYPE LOAD, Incorrect permissions."); + } + else if ( (type == ACCESS_TYPE::STORE) & (pte.w == 0) ) + { + throw Page_Fault_Exception("Page Fault : TYPE STORE, Incorrect permissions."); + } + + uint64_t pfn; + if (i > 0) + { + //It is a super page. + if (pte.ppn[0] != 0) + { + //Misss aligned super page. + throw Page_Fault_Exception("Page Fault : Miss Aligned Super Page."); + + } + else + { + //Valid super page. + pfn = pte.ppn[1]; + *size_bits = 22; + } + } + else + { + //Regular page. + *size_bits = 12; + pfn = a >> 12; + } + return std::make_pair(pfn, pte_bytes & 0xff); +} + + +uint32_t MemoryUnit::get_satp() +{ + return satp; +} +void MemoryUnit::set_satp(uint32_t satp) +{ + this->satp = satp; + this->ptbr = satp & 0x003fffff; //22 bits + this->mode = satp & 0x80000000 ? VA_MODE::SV32 : VA_MODE::BARE; +} +#endif \ No newline at end of file diff --git a/sim/common/mem.h b/sim/common/mem.h index 76e2f2ae5..8477fb800 100644 --- a/sim/common/mem.h +++ b/sim/common/mem.h @@ -20,9 +20,18 @@ #include #include #include +#include "VX_config.h" +#ifdef VM_ENABLE +#include +#include +#include +#endif + namespace vortex { + +#ifdef VM_ENABLE enum VA_MODE { BARE, SV32 @@ -34,6 +43,14 @@ enum ACCESS_TYPE { FETCH }; +class Page_Fault_Exception : public std::runtime_error /* or logic_error */ +{ +public: + Page_Fault_Exception(const std::string& what = "") : std::runtime_error(what) {} + uint64_t addr; + ACCESS_TYPE type; +}; +#endif struct BadAddress {}; struct OutOfRange {}; @@ -92,34 +109,42 @@ public: PageFault(uint64_t a, bool nf) : faultAddr(a) , notFound(nf) - , access_type(ACCESS_TYPE::LOAD) + // , access_type(ACCESS_TYPE::LOAD) {} uint64_t faultAddr; bool notFound; - ACCESS_TYPE access_type; + // ACCESS_TYPE access_type; }; MemoryUnit(uint64_t pageSize = 0); void attach(MemDevice &m, uint64_t start, uint64_t end); - void read(void* data, uint64_t addr, uint64_t size, bool sup, ACCESS_TYPE type = ACCESS_TYPE::LOAD); - void write(const void* data, uint64_t addr, uint64_t size, bool sup, ACCESS_TYPE type = ACCESS_TYPE::LOAD); + +#ifdef VM_ENABLE + void read(void* data, uint64_t addr, uint64_t size, ACCESS_TYPE type = ACCESS_TYPE::LOAD); + void write(const void* data, uint64_t addr, uint64_t size, ACCESS_TYPE type = ACCESS_TYPE::STORE); +#else + void read(void* data, uint64_t addr, uint64_t size, bool sup); + void write(const void* data, uint64_t addr, uint64_t size, bool sup); +#endif void amo_reserve(uint64_t addr); bool amo_check(uint64_t addr); - void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags); +#ifdef VM_ENABLE void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits); + uint32_t get_satp(); + void set_satp(uint32_t satp); +#else + void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags); +#endif void tlbRm(uint64_t vaddr); void tlbFlush() { tlb_.clear(); } - uint32_t get_satp(); - void set_satp(uint32_t satp); - private: struct amo_reservation_t { @@ -156,11 +181,7 @@ private: struct TLBEntry { TLBEntry() {} - TLBEntry(uint32_t pfn, uint32_t flags) - : pfn(pfn) - , flags(flags) - , mru_bit(true) - {}; + #ifdef VM_ENABLE TLBEntry(uint32_t pfn, uint32_t flags, uint64_t size_bits) : pfn(pfn) , flags(flags) @@ -182,17 +203,27 @@ private: } uint32_t pfn; - bool d, a, g, u, x, w, r, v; + uint32_t flags; bool mru_bit; uint64_t size_bits; + bool d, a, g, u, x, w, r, v; + #else + TLBEntry(uint32_t pfn, uint32_t flags) + : pfn(pfn) + , flags(flags) + {} + uint32_t pfn; uint32_t flags; + #endif }; +#ifdef VM_ENABLE std::pair tlbLookup(uint64_t vAddr, ACCESS_TYPE type, uint64_t* size_bits); uint64_t vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type); std::pair page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits); +#endif TLBEntry tlbLookup(uint64_t vAddr, uint32_t flagMask); @@ -203,14 +234,17 @@ private: ADecoder decoder_; bool enableVM_; + amo_reservation_t amo_reservation_; +#ifdef VM_ENABLE + uint32_t satp; VA_MODE mode; uint32_t ptbr; std::unordered_set unique_translations; uint64_t TLB_HIT, TLB_MISS, TLB_EVICT, PTW, PERF_UNIQUE_PTW; +#endif - amo_reservation_t amo_reservation_; }; /////////////////////////////////////////////////////////////////////////////// @@ -278,6 +312,7 @@ private: bool check_acl_; }; +#ifdef VM_ENABLE class PTE_SV32_t { @@ -299,6 +334,7 @@ class PTE_SV32_t bool d, a, g, u, x, w, r, v; PTE_SV32_t(uint64_t address) : address(address) { + assert((address>> 32) == 0 && "Upper 32 bits are not zero!"); flags = bits(address,0,7); rsw = bits(address,8,9); ppn[0] = bits(address,10,19); @@ -334,10 +370,12 @@ class vAddr_SV32_t uint64_t pgoff; vAddr_SV32_t(uint64_t address) : address(address) { + assert((address>> 32) == 0 && "Upper 32 bits are not zero!"); vpn[0] = bits(address,12,21); vpn[1] = bits(address,22,31); pgoff = bits(address,0,11); } }; +#endif } // namespace vortex diff --git a/sim/simx/cluster.cpp b/sim/simx/cluster.cpp index 4b9048867..2ca12f411 100644 --- a/sim/simx/cluster.cpp +++ b/sim/simx/cluster.cpp @@ -106,6 +106,14 @@ void Cluster::attach_ram(RAM* ram) { } } +#ifdef VM_ENABLE +void Cluster::set_satp(uint32_t satp) { + for (auto& socket : sockets_) { + socket->set_satp(satp); + } +} +#endif + bool Cluster::running() const { for (auto& socket : sockets_) { if (socket->running()) diff --git a/sim/simx/cluster.h b/sim/simx/cluster.h index 253c54fb4..113ac04f7 100644 --- a/sim/simx/cluster.h +++ b/sim/simx/cluster.h @@ -57,6 +57,10 @@ public: void attach_ram(RAM* ram); + #ifdef VM_ENABLE + void set_satp(uint32_t satp); + #endif + bool running() const; int get_exitcode() const; diff --git a/sim/simx/core.cpp b/sim/simx/core.cpp index 0bd72524d..29d77f5df 100644 --- a/sim/simx/core.cpp +++ b/sim/simx/core.cpp @@ -396,3 +396,10 @@ bool Core::wspawn(uint32_t num_warps, Word nextPC) { void Core::attach_ram(RAM* ram) { emulator_.attach_ram(ram); } + +#ifdef VM_ENABLE +void Core::set_satp(uint32_t satp) { + emulator_.set_satp(satp); //JAEWON wit, tid??? + // emulator_.set_csr(VX_CSR_SATP,satp,0,0); //JAEWON wit, tid??? +} +#endif \ No newline at end of file diff --git a/sim/simx/core.h b/sim/simx/core.h index cc0e46c8c..6d305f7e2 100644 --- a/sim/simx/core.h +++ b/sim/simx/core.h @@ -26,6 +26,7 @@ #include "dispatcher.h" #include "func_unit.h" #include "mem_coalescer.h" +#include "VX_config.h" namespace vortex { @@ -96,6 +97,9 @@ public: void tick(); void attach_ram(RAM* ram); +#ifdef VM_ENABLE + void set_satp(uint32_t satp); +#endif bool running() const; diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 5850bfd56..417ef83aa 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -268,10 +268,51 @@ bool Emulator::barrier(uint32_t bar_id, uint32_t count, uint32_t wid) { return false; } +#ifdef VM_ENABLE void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) { - mmu_.read(data, addr, size, 0); + try + { + mmu_.read(data, addr, size, ACCESS_TYPE::LOAD); + } + catch (Page_Fault_Exception& page_fault) + { + std::cout<local_mem()->read(data, addr, size); + } else { + try + { + // mmu_.read(data, addr, size, 0); + mmu_.read(data, addr, size, ACCESS_TYPE::LOAD); + } + catch (Page_Fault_Exception& page_fault) + { + std::cout<= uint64_t(IO_COUT_ADDR) + && addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { + this->writeToStdOut(data, addr, size); + } else { + if (type == AddrType::Shared) { + core_->local_mem()->write(data, addr, size); + } else { + try + { + // mmu_.write(data, addr, size, 0); + mmu_.write(data, addr, size, ACCESS_TYPE::STORE); + } + catch (Page_Fault_Exception& page_fault) + { + std::cout<= uint64_t(IO_COUT_ADDR) @@ -297,6 +364,7 @@ void Emulator::dcache_write(const void* data, uint64_t addr, uint32_t size) { } DPH(2, "Mem Write: addr=0x" << std::hex << addr << ", data=0x" << ByteStream(data, size) << " (size=" << size << ", type=" << type << ")" << std::endl); } +#endif void Emulator::dcache_amo_reserve(uint64_t addr) { auto type = get_addr_type(addr); @@ -348,6 +416,10 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { auto core_perf = core_->perf_stats(); switch (addr) { case VX_CSR_SATP: +#ifdef VM_ENABLE + // return csrs_.at(wid).at(tid)[addr]; + return mmu_.get_satp(); +#endif case VX_CSR_PMPCFG0: case VX_CSR_PMPADDR0: case VX_CSR_MSTATUS: @@ -473,6 +545,12 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) { csr_mscratch_ = value; break; case VX_CSR_SATP: + #ifdef VM_ENABLE + // warps_.at(wid).fcsr = (warps_.at(wid).fcsr & ~0x1F) | (value & 0x1F); + // csrs_.at(wid).at(tid)[addr] = value; //what is wid and tid? + mmu_.set_satp(value); + break; + #endif case VX_CSR_MSTATUS: case VX_CSR_MEDELEG: case VX_CSR_MIDELEG: @@ -491,6 +569,8 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) { } } + + uint32_t Emulator::get_fpu_rm(uint32_t func3, uint32_t tid, uint32_t wid) { return (func3 == 0x7) ? this->get_csr(VX_CSR_FRM, tid, wid) : func3; } diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index 81dcecd83..15708f3c4 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -39,6 +39,9 @@ public: void clear(); void attach_ram(RAM* ram); +#ifdef VM_ENABLE + void set_satp(uint32_t satp) ; +#endif instr_trace_t* step(); @@ -121,6 +124,9 @@ private: MemoryUnit mmu_; Word csr_mscratch_; wspawn_t wspawn_; +#ifdef VM_ENABLE + Word ptbr_; +#endif }; } diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index 2627de0b3..f6deaeec8 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -82,6 +82,13 @@ void ProcessorImpl::attach_ram(RAM* ram) { cluster->attach_ram(ram); } } +#ifdef VM_ENABLE +void ProcessorImpl::set_satp(uint32_t satp) { + for (auto cluster : clusters_) { + cluster->set_satp(satp); + } +} +#endif void ProcessorImpl::run() { SimPlatform::instance().reset(); @@ -141,4 +148,17 @@ void Processor::run() { void Processor::dcr_write(uint32_t addr, uint32_t value) { return impl_->dcr_write(addr, value); -} \ No newline at end of file +} + +#ifdef VM_ENABLE +uint32_t Processor::get_satp() { + std::cout << "get SATP: 0x" << std::hex << this->satp << std::endl; + return this->satp; +} + +void Processor::set_satp(uint32_t satp) { + std::cout << "set SATP: 0x" << std::hex << this->satp << std::endl; + impl_->set_satp(satp); + this->satp = satp; +} +#endif diff --git a/sim/simx/processor.h b/sim/simx/processor.h index 003af6b0a..17340cf2c 100644 --- a/sim/simx/processor.h +++ b/sim/simx/processor.h @@ -14,6 +14,8 @@ #pragma once #include +#include +#include namespace vortex { @@ -31,9 +33,17 @@ public: void run(); void dcr_write(uint32_t addr, uint32_t value); +#ifdef VM_ENABLE + void set_processor_satp(VA_MODE mode); + uint32_t get_satp(); + void set_satp(uint32_t satp); +#endif private: ProcessorImpl* impl_; +#ifdef VM_ENABLE + uint32_t satp; +#endif }; } diff --git a/sim/simx/processor_impl.h b/sim/simx/processor_impl.h index dcfba84d7..e6e9a4cf1 100644 --- a/sim/simx/processor_impl.h +++ b/sim/simx/processor_impl.h @@ -39,6 +39,11 @@ public: void dcr_write(uint32_t addr, uint32_t value); +#ifdef VM_ENABLE + // 32bit satp + void set_satp(uint32_t satp); +#endif + PerfStats perf_stats() const; private: diff --git a/sim/simx/socket.cpp b/sim/simx/socket.cpp index 1ef4b1689..4fa3636e1 100644 --- a/sim/simx/socket.cpp +++ b/sim/simx/socket.cpp @@ -107,6 +107,14 @@ void Socket::attach_ram(RAM* ram) { } } +#ifdef VM_ENABLE +void Socket::set_satp(uint32_t satp) { + for (auto core : cores_) { + core->set_satp(satp); + } +} +#endif + bool Socket::running() const { for (auto& core : cores_) { if (core->running()) diff --git a/sim/simx/socket.h b/sim/simx/socket.h index ed38dce67..a09f73e8b 100644 --- a/sim/simx/socket.h +++ b/sim/simx/socket.h @@ -60,6 +60,10 @@ public: void attach_ram(RAM* ram); +#ifdef VM_ENABLE + void set_satp(uint32_t satp); +#endif + bool running() const; int get_exitcode() const; From 01c7b5e3840987b0ea1f1d1b33fd502830051e00 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Thu, 13 Jun 2024 11:30:54 -0400 Subject: [PATCH 014/488] Change the declaration of set_processor_satp function --- runtime/simx/vortex.cpp | 21 +++++++++++++-------- sim/simx/processor.h | 1 - 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 6e5cafc38..2d1168179 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -75,8 +75,8 @@ public: // attach memory module processor_.attach_ram(&ram_); #ifdef VM_ENABLE - //Set - processor_.set_processor_satp(VM_ADDR_MODE); + //Set + set_processor_satp(VM_ADDR_MODE); #endif } @@ -133,13 +133,13 @@ public: bool is_pc = false; std::cout << "startup addr: " << std::hex << STARTUP_ADDR << std::endl; std::cout << "bit mode: " << std::dec << XLEN << std::endl; + if (get_mode() == VA_MODE::BARE) + return 0; + if (*dev_maddr == STARTUP_ADDR || *dev_maddr == 0x7FFFF000) { is_pc = true; } - if (get_mode() == VA_MODE::BARE) - return 0; - uint64_t ppn = *dev_maddr >> 12; uint64_t init_pAddr = *dev_maddr; uint64_t init_vAddr = *dev_maddr + 0xf0000000; // vpn will change, but we want to return the vpn of the beginning of the virtual allocation @@ -188,9 +188,10 @@ public: return err; }); #ifdef VM_ENABLE - // VM address translation std::cout << "physical addr: " << std::hex << *dev_addr << std::endl; + // VM address translation map_local_mem(size, dev_addr); + std::cout << "virtual addr: " << std::hex << *dev_addr << std::endl; #endif *dev_addr = addr; return 0; @@ -342,7 +343,7 @@ public: #ifdef VM_ENABLE /* VM Management */ - void set_processor_satp(VA_MODE mode) + void set_processor_satp(VA_MODE mode) { uint32_t satp; if (mode == VA_MODE::BARE) @@ -546,9 +547,11 @@ public: uint64_t asize = aligned_size(RAM_PAGE_SIZE, CACHE_BLOCK_SIZE); uint8_t *src = new uint8_t[RAM_PAGE_SIZE]; for (uint64_t i = 0; i < RAM_PAGE_SIZE; ++i) { - src[i] = (0x00000000 >> ((i & 0x3) * 8)) & 0xff; + src[i] = (0x00000000 >> ((i & 0x3) << 3)) & 0xff; } + ram_.enable_acl(false); ram_.write((const uint8_t*)src, addr, asize); + ram_.enable_acl(true); } void read_page_table(uint64_t addr) { @@ -567,7 +570,9 @@ public: src[i] = (value >> ((i & 0x3) * 8)) & 0xff; } //std::cout << "writing PTE to RAM addr 0x" << std::hex << addr << std::endl; + ram_.enable_acl(false); ram_.write((const uint8_t*)src, addr, PTE_SIZE); + ram_.enable_acl(true); } uint64_t read_pte(uint64_t addr) { diff --git a/sim/simx/processor.h b/sim/simx/processor.h index 17340cf2c..e22f11569 100644 --- a/sim/simx/processor.h +++ b/sim/simx/processor.h @@ -34,7 +34,6 @@ public: void dcr_write(uint32_t addr, uint32_t value); #ifdef VM_ENABLE - void set_processor_satp(VA_MODE mode); uint32_t get_satp(); void set_satp(uint32_t satp); #endif From 62673b4b720d47681518de36631879ad85d44e61 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Fri, 14 Jun 2024 17:03:43 -0400 Subject: [PATCH 015/488] Update upload and download function in simx runtime --- hw/rtl/VX_config.vh | 52 ++++--- runtime/simx/vortex.cpp | 299 ++++++++++++++++++++++------------------ sim/simx/processor.cpp | 2 +- 3 files changed, 194 insertions(+), 159 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 98dcdd16e..e0fab021d 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -14,27 +14,6 @@ `ifndef VX_CONFIG_VH `define VX_CONFIG_VH -`ifndef VM_DISABLE -`define VM_ENABLE -`endif -`ifdef VM_ENABLE - `ifndef VM_ADDR_MODE - `define VM_ADDR_MODE SV32 - `endif - - `ifndef PTE_SIZE - `define PTE_SIZE 8 - `endif - - `ifndef TLB_SIZE - `define TLB_SIZE 32 - `endif - - `ifndef SUPER_PAGING - `define SUPER_PAGING 0 - `endif - -`endif `ifndef MIN @@ -275,6 +254,37 @@ `define DEBUG_LEVEL 3 `endif +// Virtual Memory Configuration /////////////////////////////////////////////////////// +`ifndef VM_DISABLE +`define VM_ENABLE +`endif +`ifdef VM_ENABLE + `ifndef VM_ADDR_MODE + `define VM_ADDR_MODE SV32 + `endif + + `ifndef PTE_SIZE + `ifdef XLEN_32 + `define PTE_SIZE 4 + `else + `ifdef XLEN_64 + `define PTE_SIZE 8 + `else + `define PTE_SIZE 8 + `endif + `endif + `endif + + `ifndef TLB_SIZE + `define TLB_SIZE 32 + `endif + + `ifndef SUPER_PAGING + `define SUPER_PAGING 0 + `endif + +`endif + // Pipeline Configuration ///////////////////////////////////////////////////// // Issue width diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 2d1168179..64ba1653d 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -27,6 +27,26 @@ #include #include +#ifdef VM_ENABLE +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#endif + using namespace vortex; #ifdef VM_ENABLE @@ -128,32 +148,37 @@ public: #ifdef VM_ENABLE // VM SUPPORT - uint64_t map_local_mem(uint64_t size, uint64_t* dev_maddr) + uint64_t map_local_mem(uint64_t size, uint64_t* dev_pAddr) { - bool is_pc = false; - std::cout << "startup addr: " << std::hex << STARTUP_ADDR << std::endl; + bool no_trans = false; + std::cout << __PRETTY_FUNCTION__ << std::endl; + // std::cout << "startup addr: 0x" << std::hex << STARTUP_ADDR << std::endl; + std::cout << "Input device physical addr: 0x" << std::hex << *dev_pAddr<< std::endl; std::cout << "bit mode: " << std::dec << XLEN << std::endl; - if (get_mode() == VA_MODE::BARE) - return 0; - if (*dev_maddr == STARTUP_ADDR || *dev_maddr == 0x7FFFF000) { - is_pc = true; + // if (*dev_pAddr == STARTUP_ADDR || *dev_pAddr == 0x7FFFF000) { + if (*dev_pAddr >= 0xF0000000 ) + no_trans = true; } - uint64_t ppn = *dev_maddr >> 12; - uint64_t init_pAddr = *dev_maddr; - uint64_t init_vAddr = *dev_maddr + 0xf0000000; // vpn will change, but we want to return the vpn of the beginning of the virtual allocation - init_vAddr = (init_vAddr >> 12) << 12; - uint64_t vpn; + if (get_mode() == VA_MODE::BARE || no_trans == true) + { + std::cout << "No Translation is needed." << std::endl; + return 0; + } - //dev_maddr can be of size greater than a page, but we have to map and update + uint64_t init_pAddr = *dev_pAddr; + uint64_t init_vAddr = *dev_pAddr + 0xf000000; // vpn will change, but we want to return the vpn of the beginning of the virtual allocation + uint64_t ppn = 0, vpn = 0 ; + + + //dev_pAddr can be of size greater than a page, but we have to map and update //page tables on a page table granularity. So divide the allocation into pages. - for (ppn = (*dev_maddr) >> 12; ppn < ((*dev_maddr) >> 12) + (size/RAM_PAGE_SIZE) + 1; ppn++) + for (ppn = (*dev_pAddr) >> 12; ppn < ((*dev_pAddr) >> 12) + (size/RAM_PAGE_SIZE) + 1; ppn++) { //Currently a 1-1 mapping is used, this can be changed here to support different //mapping schemes - vpn = is_pc ? ppn : ppn + 0xf0000; - //vpn = ppn; + vpn = ppn + (0xf000000 >> 12); //If ppn to vpn mapping doesnt exist. if (addr_mapping.find(vpn) == addr_mapping.end()) @@ -164,21 +189,23 @@ public: } } - std::cout << "mapped virtual addr: " << init_vAddr << " to physical addr: " << init_pAddr << std::endl; - uint64_t size_bits; - if (is_pc) { - std::cout << "not returning virtual address because it is PC or stack" << std::endl; - std::pair ptw_access = page_table_walk(init_vAddr - 0xf0000000, &size_bits); - return 0; - } else { - std::pair ptw_access = page_table_walk(init_vAddr, &size_bits); + std::cout << "Mapped virtual addr: " << init_vAddr << " to physical addr: " << init_pAddr << std::endl; + // sanity check + uint64_t pAddr = page_table_walk(init_vAddr); + if (pAddr != init_pAddr) + { + std::cout << "ERROR" << pAddr << "and" << init_pAddr << " is not the same" < GLOBAL_MEM_SIZE) - return -1; - + int upload(uint64_t dest_addr, const void* src, uint64_t size) { + std::cout << __PRETTY_FUNCTION__ << std::endl; + uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); + if (dest_addr + asize > GLOBAL_MEM_SIZE) + return -1; #ifdef VM_ENABLE - uint64_t pAddr = dest_addr; // map_local_mem overwrites the provided dest_addr, so store away physical destination address - if (dest_addr >= STARTUP_ADDR) { - map_local_mem(asize,&dest_addr); - } else if (dest_addr >= 0x7fff0000) - { - map_local_mem(asize,&dest_addr); - } - std::cout << "uploading to 0x" << pAddr << "(VA)" << std::endl; - dest_addr = pAddr; + uint64_t pAddr = page_table_walk(dest_addr); + std::cout << "== Upload data to vAddr = 0x" << std::hex < GLOBAL_MEM_SIZE) return -1; +#ifdef VM_ENABLE + uint64_t pAddr = page_table_walk(src_addr); + std::cout << "== Download data to vAddr = 0x" << std::hex <> 10) | 0x80000000; - // satp = 0xFEBFE000 ; + satp = (alloc_first_level_page_table() >> 12) | 0x80000000; } processor_.set_satp(satp); } @@ -365,22 +391,23 @@ public: VA_MODE get_mode() { return processor_.get_satp() & 0x80000000 ? VA_MODE::SV32 : VA_MODE::BARE; - // return VA_MODE::SV32; } - void update_page_table(uint64_t pAddr, uint64_t vAddr) { - std::cout << "mapping vpn: " << vAddr << " to ppn:" << pAddr << std::endl; + void update_page_table(uint64_t ppn, uint64_t vpn) { + std::cout << __PRETTY_FUNCTION__ << std::endl; + std::cout << "mapping vpn: " << std::hex << vpn << " to ppn:" << ppn << std::endl; //Updating page table with the following mapping of (vAddr) to (pAddr). + // uint32_t page_bit_shift = log2ceil(PTE_SIZE*NUM_PTE_ENTRY); + uint32_t page_bit_shift = 12; uint64_t ppn_1, pte_addr, pte_bytes; - uint64_t vpn_1 = bits(vAddr, 10, 19); - uint64_t vpn_0 = bits(vAddr, 0, 9); + uint64_t vpn_1 = bits(vpn, 10, 19); + uint64_t vpn_0 = bits(vpn, 0, 9); //Read first level PTE. pte_addr = (get_ptbr() << 12) + (vpn_1 * PTE_SIZE); pte_bytes = read_pte(pte_addr); std::cout << "[PTE] addr 0x" << std::hex << pte_addr << ", PTE 0x" << std::hex << pte_bytes << std::endl; - if ( bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) { //If valid bit set, proceed to next level using new ppn form PTE. @@ -392,13 +419,14 @@ public: //If valid bit not set, allocate a second level page table // in device memory and store ppn in PTE. Set rwx = 000 in PTE //to indicate this is a pointer to the next level of the page table. - ppn_1 = (alloc_page_table() >> 12); - pte_bytes = ( (ppn_1 << 10) | 0b0000000001) ; + std::cout << "PTE invalid, get second page table..." << std::endl; + ppn_1 = (alloc_second_level_page_table() >> 12); + pte_bytes = ((ppn_1 << 10) | 0b0000000001) ; write_pte(pte_addr, pte_bytes); } //Read second level PTE. - pte_addr = (ppn_1 << 12) + (vpn_0 * PTE_SIZE); + pte_addr = (ppn_1 << page_bit_shift) + (vpn_0 * PTE_SIZE); pte_bytes = read_pte(pte_addr); std::cout << "got pte: " << std::hex << pte_bytes << std::endl; @@ -412,10 +440,11 @@ public: { //If valid bit not set, write ppn of pAddr in PTE. Set rwx = 111 in PTE //to indicate this is a leaf PTE and has the stated permissions. - pte_bytes = ( (pAddr << 10) | 0b0000001111) ; + pte_bytes = ( (ppn << 10) | 0b0000001111) ; write_pte(pte_addr, pte_bytes); //If super paging is enabled. + /* if (SUPER_PAGING) { //Check if this second level Page Table can be promoted to a super page. Brute force @@ -444,130 +473,118 @@ public: write_pte(pte_addr, pte_bytes); } } + */ } } - std::pair page_table_walk(uint64_t vAddr_bits, uint64_t* size_bits) + uint64_t page_table_walk(uint64_t vAddr_bits) { + + std::cout << "PTW on vAddr: 0x" << std::hex << vAddr_bits << std::endl; uint64_t LEVELS = 2; vAddr_SV32_t vAddr(vAddr_bits); - uint64_t pte_bytes; + uint64_t pte_addr, pte_bytes; + uint64_t pt_ba = get_ptbr() << 12; - std::cout << "PTW on vAddr: 0x" << std::hex << vAddr_bits << std::endl; //Get base page table. - uint64_t a = this->processor_.get_satp() << 12; - std::cout << "PTW SATP: 0x" << a << std::endl; - int i = LEVELS - 1; - while(true) + for ( i = LEVELS-1 ; i >= 0 ; i--) { - - //Read PTE. - std::cout << "reading PTE from RAM addr 0x" << std::hex << (a+vAddr.vpn[i]*PTE_SIZE) << std::endl; - ram_.read(&pte_bytes, a+vAddr.vpn[i]*PTE_SIZE, sizeof(uint64_t)); - //pte_bytes &= 0x00000000FFFFFFFF; - PTE_SV32_t pte(pte_bytes); - std::cout << "got pte: " << std::hex << pte_bytes << std::endl; - - //Check if it has invalid flag bits. - if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) - { - std::cout << "Error on vAddr 0x" << std::hex << vAddr_bits << std::endl; - throw Page_Fault_Exception("Page Fault : Attempted to access invalid entry. Entry: 0x"); - } - - if ( (pte.r == 0) & (pte.w == 0) & (pte.x == 0)) - { - //Not a leaf node as rwx == 000 - i--; - if (i < 0) + //Read PTE. + pte_addr = pt_ba+vAddr.vpn[i]*PTE_SIZE; + std::cout << "reading PTE from RAM addr 0x" << std::hex << (pte_addr) << std::endl; + pte_bytes = read_pte(pte_addr); + pte_bytes &= 0x00000000FFFFFFFF; // Only for 32 bit + PTE_SV32_t pte(pte_bytes); + std::cout << "got pte: " << std::hex << pte_bytes << std::endl; + + //Check if it has invalid flag bits. + if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) { - throw Page_Fault_Exception("Page Fault : No leaf node found."); + std::cout << "Error on vAddr 0x" << std::hex << vAddr_bits << std::endl; + throw Page_Fault_Exception("Page Fault : Attempted to access invalid entry. Entry: 0x"); + } + + if ( (pte.r == 0) & (pte.w == 0) & (pte.x == 0)) + { + //Not a leaf node as rwx == 000 + if (i == 0) + throw Page_Fault_Exception("Page Fault : No leaf node found."); + else + { + //Continue on to next level. + pt_ba = (pte_bytes >> 10 ) << 12; + std::cout << "next pt_ba: " << pt_ba << std::endl; + } } else { - //Continue on to next level. - a = (pte_bytes >> 10 ) << 12; - std::cout << "next a: " << a << std::endl; + //Leaf node found, finished walking. + pt_ba = (pte_bytes >> 10 ) << 12; + std::cout << "Found PPN 0 = 0x" << pt_ba << std::endl; + break; } - } - else - { - //Leaf node found, finished walking. - a = (pte_bytes >> 10 ) << 12; - break; - } + } + // pte_bytes is final leaf PTE_SV32_t pte(pte_bytes); - //Check RWX permissions according to access type. if (pte.r == 0) { - throw Page_Fault_Exception("Page Fault : TYPE LOAD, Incorrect permissions."); + throw Page_Fault_Exception("Page Fault : TYPE LOAD, Incorrect permissions."); } - - uint64_t pfn; - if (i > 0) - { - //It is a super page. - if (pte.ppn[0] != 0) - { - //Misss aligned super page. - throw Page_Fault_Exception("Page Fault : Miss Aligned Super Page."); - - } - else - { - //Valid super page. - pfn = pte.ppn[1]; - *size_bits = 22; - } - } - else - { //Regular page. - *size_bits = 12; - pfn = a >> 12; - } - return std::make_pair(pfn, pte_bytes & 0xff); + + uint64_t paddr = pt_ba << 12 + vAddr.pgoff; + return paddr } - uint64_t alloc_page_table() { - uint64_t addr; - global_mem_.allocate(RAM_PAGE_SIZE, &addr); + uint64_t alloc_first_level_page_table() { + uint64_t addr=0xF0000000; + uint64_t size=1<<23; + CHECK_ERR(this->mem_reserve(addr, size, VX_MEM_READ_WRITE), { + return err; + }); + // global_mem_.allocate(RAM_PAGE_SIZE, &addr); + std::cout << "address of page table 0x" << std::hex << addr << std::endl; + init_page_table(addr,size); + return addr; + } + uint64_t alloc_second_level_page_table(uint64_t vpn_1) { + uint64_t addr = 0xF0000000 + PTE_SIZE * NUM_PTE_ENTRY*(1+vpn_1) std::cout << "address of page table 0x" << std::hex << addr << std::endl; - init_page_table(addr); return addr; } - - void init_page_table(uint64_t addr) { - uint64_t asize = aligned_size(RAM_PAGE_SIZE, CACHE_BLOCK_SIZE); + void init_page_table(uint64_t addr, uint64_t size) { + // uint64_t asize = aligned_size(RAM_PAGE_SIZE, CACHE_BLOCK_SIZE); + uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); uint8_t *src = new uint8_t[RAM_PAGE_SIZE]; for (uint64_t i = 0; i < RAM_PAGE_SIZE; ++i) { - src[i] = (0x00000000 >> ((i & 0x3) << 3)) & 0xff; + // src[i] = (value >> (i << 3)) & 0xff; + src[i] = 0; } ram_.enable_acl(false); ram_.write((const uint8_t*)src, addr, asize); ram_.enable_acl(true); } - void read_page_table(uint64_t addr) { - uint8_t *dest = new uint8_t[RAM_PAGE_SIZE]; - download(dest, addr, RAM_PAGE_SIZE); - printf("VXDRV: download %d bytes from 0x%x\n", RAM_PAGE_SIZE, addr); - for (int i = 0; i < RAM_PAGE_SIZE; i += 4) { - printf("mem-read: 0x%x -> 0x%x\n", addr + i, *(uint64_t*)((uint8_t*)dest + i)); - } - } + // void read_page_table(uint64_t addr) { + // uint8_t *dest = new uint8_t[RAM_PAGE_SIZE]; + // download(dest, addr, RAM_PAGE_SIZE); + // printf("VXDRV: download %d bytes from 0x%x\n", RAM_PAGE_SIZE, addr); + // for (int i = 0; i < RAM_PAGE_SIZE; i += 4) { + // printf("mem-read: 0x%x -> 0x%x\n", addr + i, *(uint64_t*)((uint8_t*)dest + i)); + // } + // } void write_pte(uint64_t addr, uint64_t value = 0xbaadf00d) { - std::cout << "writing pte " << std::hex << value << " to pAddr: " << std::hex << addr << std::endl; + std::cout << "writing pte 0x" << std::hex << value << " to pAddr: 0x" << std::hex << addr << std::endl; uint8_t *src = new uint8_t[PTE_SIZE]; for (uint64_t i = 0; i < PTE_SIZE; ++i) { - src[i] = (value >> ((i & 0x3) * 8)) & 0xff; + src[i] = (value >> (i << 3)) & 0xff; } //std::cout << "writing PTE to RAM addr 0x" << std::hex << addr << std::endl; ram_.enable_acl(false); @@ -577,9 +594,17 @@ public: uint64_t read_pte(uint64_t addr) { uint8_t *dest = new uint8_t[PTE_SIZE]; + uint64_t mask = 0; + if (XLEN == 32) + mask = 0xFFFFFFFF; + else if (XLEN == 64) + mask = 0xFFFFFFFFFFFFFFFF; + else + assert(0, "XLEN is not either 32 or 64") + std::cout << "[read_pte] reading PTE from RAM addr 0x" << std::hex << addr << std::endl; ram_.read((uint8_t*)dest, addr, PTE_SIZE); - return *(uint64_t*)((uint8_t*)dest); + return (*(uint64_t*)((uint8_t*)dest)) & mask; } #endif // JAEWON diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index f6deaeec8..c3241a207 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -152,7 +152,7 @@ void Processor::dcr_write(uint32_t addr, uint32_t value) { #ifdef VM_ENABLE uint32_t Processor::get_satp() { - std::cout << "get SATP: 0x" << std::hex << this->satp << std::endl; + // std::cout << "get SATP: 0x" << std::hex << this->satp << std::endl; return this->satp; } From 862997fc9456b647dc224c4ada2c65a7a701d87c Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sun, 16 Jun 2024 19:05:38 -0400 Subject: [PATCH 016/488] Virtual Memory Support --- hw/rtl/VX_config.vh | 22 ++- runtime/simx/vortex.cpp | 299 ++++++++++++++++++++++------------------ sim/common/mem.cpp | 116 ++++++++++------ sim/common/mem.h | 25 ++-- sim/simx/emulator.cpp | 4 + sim/simx/processor.cpp | 1 - 6 files changed, 275 insertions(+), 192 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index e0fab021d..3a8242379 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -172,7 +172,15 @@ `define IO_BASE_ADDR 64'h000000040 `endif -`else +`ifndef PAGE_TABLE_BASE_ADDR +`define PAGE_TABLE_BASE_ADDR 64'h1F0000000 +`endif + +`ifndef PAGE_TABLE_SIZE +`define PAGE_TABLE_SIZE 4096 +`endif + +`else # XLEN_32 `ifndef STACK_BASE_ADDR `define STACK_BASE_ADDR 32'hFFFF0000 @@ -190,6 +198,14 @@ `define IO_BASE_ADDR 32'h00000040 `endif +`ifndef PAGE_TABLE_BASE_ADDR +`define PAGE_TABLE_BASE_ADDR 32'hF0000000 +`endif + +`ifndef PAGE_TABLE_SIZE +`define PAGE_TABLE_SIZE 4096 +`endif + `endif `define IO_END_ADDR `USER_BASE_ADDR @@ -266,13 +282,17 @@ `ifndef PTE_SIZE `ifdef XLEN_32 `define PTE_SIZE 4 + `define NUM_PTE_ENTRY 1024 `else `ifdef XLEN_64 `define PTE_SIZE 8 + `define NUM_PTE_ENTRY 1024 `else `define PTE_SIZE 8 + `define NUM_PTE_ENTRY 1024 `endif `endif + `define PT_SIZE (PTE_SIZE * NUM_PTE_ENTRY) `endif `ifndef TLB_SIZE diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 64ba1653d..816ca3081 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -83,17 +83,18 @@ bool bit(uint64_t addr, uint8_t idx) class vx_device { public: - vx_device() - : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES) - , ram_(0, RAM_PAGE_SIZE) - , processor_(arch_) - , global_mem_(ALLOC_BASE_ADDR, - GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR, - RAM_PAGE_SIZE, - CACHE_BLOCK_SIZE) - { - // attach memory module - processor_.attach_ram(&ram_); + vx_device() + : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES) +#ifdef VM_ENABLE + , ram_(0, RAM_PAGE_SIZE<<11) +#else + , ram_(0, RAM_PAGE_SIZE) +#endif + , processor_(arch_) + , global_mem_(ALLOC_BASE_ADDR, GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR, RAM_PAGE_SIZE, CACHE_BLOCK_SIZE) + { + // attach memory module + processor_.attach_ram(&ram_); #ifdef VM_ENABLE //Set set_processor_satp(VM_ADDR_MODE); @@ -101,6 +102,9 @@ public: } ~vx_device() { +#ifdef VM_ENABLE + this->mem_free(PAGE_TABLE_BASE_ADDR); // Right position? +#endif if (future_.valid()) { future_.wait(); } @@ -147,66 +151,90 @@ public: } #ifdef VM_ENABLE - // VM SUPPORT - uint64_t map_local_mem(uint64_t size, uint64_t* dev_pAddr) + // virtual to phycial mapping + uint64_t map_p2v(uint64_t pAddr) { - bool no_trans = false; - std::cout << __PRETTY_FUNCTION__ << std::endl; - // std::cout << "startup addr: 0x" << std::hex << STARTUP_ADDR << std::endl; - std::cout << "Input device physical addr: 0x" << std::hex << *dev_pAddr<< std::endl; - std::cout << "bit mode: " << std::dec << XLEN << std::endl; + return pAddr + 0xf000000; + } + bool need_trans(uint64_t dev_pAddr) + { + // Check if the this is the BARE mode + bool isBAREMode = (get_mode() == VA_MODE::BARE); + // Check if the address is reserved + bool isReserved = (dev_pAddr >= PAGE_TABLE_BASE_ADDR); + // Check if the address falls within the startup address range + bool isStartAddress = (STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)); + + // Print the boolean results for debugging purposes + // printf("%p, %u, %u\n", (void *)dev_pAddr, isReserved, isStartAddress); + + // Return true if the address needs translation (i.e., it's not reserved and not a start address) + return (!isBAREMode && !isReserved && !isStartAddress); + } + + uint64_t phy_to_virt_map(uint64_t size, uint64_t* dev_pAddr, uint32_t flags) + { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT("(size = 0x%lx, dev_pAddr= 0x%lx, flags = 0x%x)\n", size, *dev_pAddr, flags); + DBGPRINT("bit mode: %d\n", XLEN); // if (*dev_pAddr == STARTUP_ADDR || *dev_pAddr == 0x7FFFF000) { - if (*dev_pAddr >= 0xF0000000 ) - no_trans = true; - } - if (get_mode() == VA_MODE::BARE || no_trans == true) + if (!need_trans(*dev_pAddr)) { - std::cout << "No Translation is needed." << std::endl; + DBGPRINT("Translation is not needed.\n"); return 0; } uint64_t init_pAddr = *dev_pAddr; - uint64_t init_vAddr = *dev_pAddr + 0xf000000; // vpn will change, but we want to return the vpn of the beginning of the virtual allocation + uint64_t init_vAddr = map_p2v(init_pAddr); uint64_t ppn = 0, vpn = 0 ; - //dev_pAddr can be of size greater than a page, but we have to map and update //page tables on a page table granularity. So divide the allocation into pages. + bool is_start = false; for (ppn = (*dev_pAddr) >> 12; ppn < ((*dev_pAddr) >> 12) + (size/RAM_PAGE_SIZE) + 1; ppn++) { + vpn = map_p2v(ppn << 12) >> 12; + if (is_start == false) { + DBGPRINT("**Search vpn in page table:0x%lx\n", vpn); + is_start = true; + } + else { + DBGPRINT("Next vpn: 0x%lx\n",vpn); + } + //Currently a 1-1 mapping is used, this can be changed here to support different //mapping schemes - vpn = ppn + (0xf000000 >> 12); //If ppn to vpn mapping doesnt exist. if (addr_mapping.find(vpn) == addr_mapping.end()) { //Create mapping. - update_page_table(ppn, vpn); + update_page_table(ppn, vpn, flags); addr_mapping[vpn] = ppn; } } + DBGPRINT("Mapped virtual addr: 0x%lx to physical addr: %lx\n", init_vAddr, init_pAddr); - std::cout << "Mapped virtual addr: " << init_vAddr << " to physical addr: " << init_pAddr << std::endl; - // sanity check + // Sanity check uint64_t pAddr = page_table_walk(init_vAddr); if (pAddr != init_pAddr) { - std::cout << "ERROR" << pAddr << "and" << init_pAddr << " is not the same" <mem_access(dev_addr, size, flags), { - global_mem_.release(dev_addr); - return err; - }); - return 0; - } + int mem_reserve(uint64_t dev_addr, uint64_t size, int flags) { + CHECK_ERR(global_mem_.reserve(dev_addr, size), { + return err; + }); + DBGPRINT("mem_reserve: addr: 0x%lx, size: 0x%lx\n",dev_addr, size); + CHECK_ERR(this->mem_access(dev_addr, size, flags), { + global_mem_.release(dev_addr); + return err; + }); +#ifdef VM_ENABLE + uint64_t paddr = dev_addr; + phy_to_virt_map(size, &paddr, flags); +#endif + return 0; + } - int mem_free(uint64_t dev_addr) { - return global_mem_.release(dev_addr); - } + int mem_free(uint64_t dev_addr) { +#ifdef VM_ENABLE + uint64_t pAddr = page_table_walk(dev_addr); + // VM address translation + return global_mem_.release(pAddr); +#else + return global_mem_.release(dev_addr); +#endif + } int mem_access(uint64_t dev_addr, uint64_t size, int flags) { uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); @@ -255,17 +294,13 @@ public: } int upload(uint64_t dest_addr, const void* src, uint64_t size) { - std::cout << __PRETTY_FUNCTION__ << std::endl; + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); if (dest_addr + asize > GLOBAL_MEM_SIZE) return -1; #ifdef VM_ENABLE uint64_t pAddr = page_table_walk(dest_addr); - std::cout << "== Upload data to vAddr = 0x" << std::hex <> 12) | 0x80000000; + satp = (alloc_2nd_level_page_table() >> 12) | 0x80000000; + DBGPRINT("VA_MODE = SV32 MODE(satp = 0x%x)\n",satp); } processor_.set_satp(satp); } @@ -387,52 +427,62 @@ public: // return processor_.get_satp(); return processor_.get_satp() & 0x003fffff; } + uint64_t get_pte_address(uint64_t base_page, uint64_t vpn) + { + return (base_page << 12) + (vpn * PTE_SIZE); + } VA_MODE get_mode() { return processor_.get_satp() & 0x80000000 ? VA_MODE::SV32 : VA_MODE::BARE; } - void update_page_table(uint64_t ppn, uint64_t vpn) { - std::cout << __PRETTY_FUNCTION__ << std::endl; - std::cout << "mapping vpn: " << std::hex << vpn << " to ppn:" << ppn << std::endl; + void update_page_table(uint64_t ppn, uint64_t vpn, uint32_t flag) { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT("Mapping vpn 0x%05lx to ppn 0x%05lx(flags = %u)\n", vpn, ppn,flag); + assert((((ppn>> 20) == 0) && ((vpn >> 20) == 0)) && "Upper 12 bits are not zero!"); //Updating page table with the following mapping of (vAddr) to (pAddr). // uint32_t page_bit_shift = log2ceil(PTE_SIZE*NUM_PTE_ENTRY); - uint32_t page_bit_shift = 12; - uint64_t ppn_1, pte_addr, pte_bytes; + uint64_t ppn_1 = 0, pte_addr = 0, pte_bytes = 0; uint64_t vpn_1 = bits(vpn, 10, 19); uint64_t vpn_0 = bits(vpn, 0, 9); //Read first level PTE. - pte_addr = (get_ptbr() << 12) + (vpn_1 * PTE_SIZE); + DBGPRINT("Start second-level page table\n"); + pte_addr = get_pte_address(get_ptbr(), vpn_1); pte_bytes = read_pte(pte_addr); - std::cout << "[PTE] addr 0x" << std::hex << pte_addr << ", PTE 0x" << std::hex << pte_bytes << std::endl; + DBGPRINT("[PTE] addr 0x%lx, PTE 0x%lx\n", pte_addr, pte_bytes); + ppn_1 = (pte_bytes >> 10); if ( bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) { //If valid bit set, proceed to next level using new ppn form PTE. - std::cout << "PTE valid, continuing the walk..." << std::endl; - ppn_1 = (pte_bytes >> 10); + DBGPRINT("PTE valid (ppn 0x%lx), continuing the walk...\n",ppn_1); } else { //If valid bit not set, allocate a second level page table // in device memory and store ppn in PTE. Set rwx = 000 in PTE //to indicate this is a pointer to the next level of the page table. - std::cout << "PTE invalid, get second page table..." << std::endl; - ppn_1 = (alloc_second_level_page_table() >> 12); + DBGPRINT("PTE Invalid (ppn 0x%lx), continuing the walk...\n",ppn_1); + ppn_1 = (alloc_1st_level_page_table(vpn_1) >> 12); pte_bytes = ((ppn_1 << 10) | 0b0000000001) ; + assert((pte_addr>> 32) == 0 && "Upper 32 bits are not zero!"); write_pte(pte_addr, pte_bytes); + // if (pte_bytes != read_pte(pte_addr)) + // DBGPRINT("Read/write values are different!\n"); } + + DBGPRINT("Move to first-level page table\n"); //Read second level PTE. - pte_addr = (ppn_1 << page_bit_shift) + (vpn_0 * PTE_SIZE); + pte_addr = get_pte_address(ppn_1, vpn_0); pte_bytes = read_pte(pte_addr); - std::cout << "got pte: " << std::hex << pte_bytes << std::endl; if ( bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) { - std::cout << "ERROR, shouldn't be here" << std::endl; + DBGPRINT("ERROR, shouldn't be here\n"); + exit(1); //If valid bit is set, then the page is already allocated. //Should not reach this point, a sanity check. } @@ -442,87 +492,62 @@ public: //to indicate this is a leaf PTE and has the stated permissions. pte_bytes = ( (ppn << 10) | 0b0000001111) ; write_pte(pte_addr, pte_bytes); - - //If super paging is enabled. - /* - if (SUPER_PAGING) - { - //Check if this second level Page Table can be promoted to a super page. Brute force - //method is used to iterate over all PTE entries of the table and check if they have - //their valid bit set. - bool superpage = true; - for(int i = 0; i < 1024; i++) - { - pte_addr = (ppn_1 << 12) + (i * PTE_SIZE); - pte_bytes = read_pte(pte_addr); - - if (!bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) - { - superpage = false; - break; - } - } - if (superpage) - { - //This can be promoted to a super page. Set root PTE to the first PTE of the - //second level. This is because the first PTE of the second level already has the - //correct PPN1, PPN0 set to zero and correct access bits. - pte_addr = (ppn_1 << 12); - pte_bytes = read_pte(pte_addr); - pte_addr = (get_ptbr() << 12) + (vpn_1 * PTE_SIZE); - write_pte(pte_addr, pte_bytes); - } - } - */ + if (pte_bytes != read_pte(pte_addr)) + DBGPRINT("Read/write values are different!\n"); } } uint64_t page_table_walk(uint64_t vAddr_bits) { - - std::cout << "PTW on vAddr: 0x" << std::hex << vAddr_bits << std::endl; + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT("PTW on vAddr: 0x%lx\n", vAddr_bits); + if (!need_trans(vAddr_bits)) + { + DBGPRINT("Translation is not needed.\n"); + return vAddr_bits; + } uint64_t LEVELS = 2; vAddr_SV32_t vAddr(vAddr_bits); uint64_t pte_addr, pte_bytes; uint64_t pt_ba = get_ptbr() << 12; - //Get base page table. - for ( i = LEVELS-1 ; i >= 0 ; i--) + for ( int i = LEVELS-1 ; i >= 0 ; i--) { //Read PTE. pte_addr = pt_ba+vAddr.vpn[i]*PTE_SIZE; - std::cout << "reading PTE from RAM addr 0x" << std::hex << (pte_addr) << std::endl; pte_bytes = read_pte(pte_addr); - pte_bytes &= 0x00000000FFFFFFFF; // Only for 32 bit PTE_SV32_t pte(pte_bytes); - std::cout << "got pte: " << std::hex << pte_bytes << std::endl; + DBGPRINT("pte_bytes = 0x%lx, pte flags = %u)\n", pte.ppn , pte.flags); //Check if it has invalid flag bits. if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) { - std::cout << "Error on vAddr 0x" << std::hex << vAddr_bits << std::endl; - throw Page_Fault_Exception("Page Fault : Attempted to access invalid entry. Entry: 0x"); + std::string msg= "Page Fault : Attempted to access invalid entry. Entry: 0x"; + throw Page_Fault_Exception(msg); } if ( (pte.r == 0) & (pte.w == 0) & (pte.x == 0)) { //Not a leaf node as rwx == 000 if (i == 0) + { throw Page_Fault_Exception("Page Fault : No leaf node found."); + } else { //Continue on to next level. - pt_ba = (pte_bytes >> 10 ) << 12; - std::cout << "next pt_ba: " << pt_ba << std::endl; + pt_ba = pte.ppn << 12; + DBGPRINT("next pt_ba: %p\n", (void *)pt_ba); + } } else { //Leaf node found, finished walking. - pt_ba = (pte_bytes >> 10 ) << 12; - std::cout << "Found PPN 0 = 0x" << pt_ba << std::endl; + pt_ba = pte.ppn << 12; + DBGPRINT("Found PT_Base_Address [%d] = %lx\n", i, pt_ba); break; } @@ -535,35 +560,35 @@ public: { throw Page_Fault_Exception("Page Fault : TYPE LOAD, Incorrect permissions."); } - //Regular page. - uint64_t paddr = pt_ba << 12 + vAddr.pgoff; - return paddr + uint64_t paddr = pt_ba + vAddr.pgoff; + return paddr; } - uint64_t alloc_first_level_page_table() { - uint64_t addr=0xF0000000; - uint64_t size=1<<23; + uint64_t alloc_2nd_level_page_table() { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + uint64_t addr=PAGE_TABLE_BASE_ADDR; + uint64_t size=1<<23; // 8MB !!!FIXME!!! CHECK_ERR(this->mem_reserve(addr, size, VX_MEM_READ_WRITE), { return err; }); - // global_mem_.allocate(RAM_PAGE_SIZE, &addr); - std::cout << "address of page table 0x" << std::hex << addr << std::endl; - init_page_table(addr,size); + init_page_table(addr); return addr; } - uint64_t alloc_second_level_page_table(uint64_t vpn_1) { - uint64_t addr = 0xF0000000 + PTE_SIZE * NUM_PTE_ENTRY*(1+vpn_1) - std::cout << "address of page table 0x" << std::hex << addr << std::endl; + uint64_t alloc_1st_level_page_table(uint64_t vpn_1) { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + uint64_t addr = PAGE_TABLE_BASE_ADDR + PTE_SIZE * NUM_PTE_ENTRY*(1+vpn_1); + init_page_table(addr); return addr; } - void init_page_table(uint64_t addr, uint64_t size) { - // uint64_t asize = aligned_size(RAM_PAGE_SIZE, CACHE_BLOCK_SIZE); - uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); + void init_page_table(uint64_t addr) { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT("int_page_table (addr=0x%lx)\n", addr); + uint64_t asize = aligned_size(RAM_PAGE_SIZE, CACHE_BLOCK_SIZE); + // uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); uint8_t *src = new uint8_t[RAM_PAGE_SIZE]; for (uint64_t i = 0; i < RAM_PAGE_SIZE; ++i) { - // src[i] = (value >> (i << 3)) & 0xff; src[i] = 0; } ram_.enable_acl(false); @@ -574,14 +599,14 @@ public: // void read_page_table(uint64_t addr) { // uint8_t *dest = new uint8_t[RAM_PAGE_SIZE]; // download(dest, addr, RAM_PAGE_SIZE); - // printf("VXDRV: download %d bytes from 0x%x\n", RAM_PAGE_SIZE, addr); + // DBGPRINT("VXDRV: download %d bytes from 0x%x\n", RAM_PAGE_SIZE, addr); // for (int i = 0; i < RAM_PAGE_SIZE; i += 4) { - // printf("mem-read: 0x%x -> 0x%x\n", addr + i, *(uint64_t*)((uint8_t*)dest + i)); + // DBGPRINT("mem-read: 0x%x -> 0x%x\n", addr + i, *(uint64_t*)((uint8_t*)dest + i)); // } // } void write_pte(uint64_t addr, uint64_t value = 0xbaadf00d) { - std::cout << "writing pte 0x" << std::hex << value << " to pAddr: 0x" << std::hex << addr << std::endl; + DBGPRINT("[Write_pte] writing pte 0x%lx to pAddr: 0x%lx\n", value, addr); uint8_t *src = new uint8_t[PTE_SIZE]; for (uint64_t i = 0; i < PTE_SIZE; ++i) { src[i] = (value >> (i << 3)) & 0xff; @@ -596,15 +621,17 @@ public: uint8_t *dest = new uint8_t[PTE_SIZE]; uint64_t mask = 0; if (XLEN == 32) - mask = 0xFFFFFFFF; + mask = 0x00000000FFFFFFFF; else if (XLEN == 64) mask = 0xFFFFFFFFFFFFFFFF; else - assert(0, "XLEN is not either 32 or 64") + assert(0 && "XLEN is not either 32 or 64"); - std::cout << "[read_pte] reading PTE from RAM addr 0x" << std::hex << addr << std::endl; ram_.read((uint8_t*)dest, addr, PTE_SIZE); - return (*(uint64_t*)((uint8_t*)dest)) & mask; + uint64_t ret = (*(uint64_t*)((uint8_t*)dest)) & mask; + DBGPRINT("[read_pte] reading PTE 0x%lx from RAM addr 0x%lx\n", ret, addr); + + return ret; } #endif // JAEWON diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index b55d0de9a..98eefdaf2 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -115,6 +115,7 @@ void MemoryUnit::ADecoder::map(uint64_t start, uint64_t end, MemDevice &md) { } void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) { + // printf("====%s (addr= 0x%lx, size= 0x%lx) ====\n", __PRETTY_FUNCTION__,addr,size); mem_accessor_t ma; if (!this->lookup(addr, size, &ma)) { std::cout << "lookup of 0x" << std::hex << addr << " failed.\n"; @@ -124,6 +125,7 @@ void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) { } void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) { + // printf("====%s====\n", __PRETTY_FUNCTION__); mem_accessor_t ma; if (!this->lookup(addr, size, &ma)) { std::cout << "lookup of 0x" << std::hex << addr << " failed.\n"; @@ -158,6 +160,7 @@ void MemoryUnit::attach(MemDevice &m, uint64_t start, uint64_t end) { #ifdef VM_ENABLE std::pair MemoryUnit::tlbLookup(uint64_t vAddr, ACCESS_TYPE type, uint64_t* size_bits) { + // printf("====%s====\n", __PRETTY_FUNCTION__); //Find entry while accounting for different sizes. for (auto entry : tlb_) @@ -220,7 +223,7 @@ std::pair MemoryUnit::tlbLookup(uint64_t vAddr, ACCESS_TYPE type return std::make_pair(false, 0); } } -#endif //JAEWON +#else MemoryUnit::TLBEntry MemoryUnit::tlbLookup(uint64_t vAddr, uint32_t flagMask) { auto iter = tlb_.find(vAddr / pageSize_); if (iter != tlb_.end()) { @@ -244,52 +247,62 @@ uint64_t MemoryUnit::toPhyAddr(uint64_t addr, uint32_t flagMask) { } return pAddr; } +#endif #ifdef VM_ENABLE -void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, ACCESS_TYPE type) { +void MemoryUnit::read(void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type) { + // printf("====%s====\n", __PRETTY_FUNCTION__); uint64_t pAddr; - if (this->mode == VA_MODE::BARE) { - pAddr = addr; - } else { - pAddr = vAddr_to_pAddr(addr, type); - } + pAddr = vAddr_to_pAddr(addr, type); return decoder_.read(data, pAddr, size); } #else -void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, bool sup) { +void MemoryUnit::read(void* data, uint64_t addr, uint32_t size, bool sup) { uint64_t pAddr = this->toPhyAddr(addr, sup ? 8 : 1); return decoder_.read(data, pAddr, size); } #endif #ifdef VM_ENABLE -void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, ACCESS_TYPE type) { +void MemoryUnit::write(const void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type) { + // printf("====%s====\n", __PRETTY_FUNCTION__); uint64_t pAddr; - if ( (this->mode == VA_MODE::BARE) | (addr >= IO_BASE_ADDR) ) { - pAddr = addr; - } else { - pAddr = vAddr_to_pAddr(addr, type); - } + pAddr = vAddr_to_pAddr(addr, type); decoder_.write(data, pAddr, size); amo_reservation_.valid = false; } #else -void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, bool sup) { +void MemoryUnit::write(const void* data, uint64_t addr, uint32_t size, bool sup) { uint64_t pAddr = this->toPhyAddr(addr, sup ? 16 : 1); decoder_.write(data, pAddr, size); amo_reservation_.valid = false; } #endif +#ifdef VM_ENABLE +void MemoryUnit::amo_reserve(uint64_t addr) { + uint64_t pAddr = this->vAddr_to_pAddr(addr,ACCESS_TYPE::LOAD); + amo_reservation_.addr = pAddr; + amo_reservation_.valid = true; +} +#else void MemoryUnit::amo_reserve(uint64_t addr) { uint64_t pAddr = this->toPhyAddr(addr, 1); amo_reservation_.addr = pAddr; amo_reservation_.valid = true; } +#endif +#ifdef VM_ENABLE +bool MemoryUnit::amo_check(uint64_t addr) { + uint64_t pAddr = this->vAddr_to_pAddr(addr, ACCESS_TYPE::LOAD); + return amo_reservation_.valid && (amo_reservation_.addr == pAddr); +} +#else bool MemoryUnit::amo_check(uint64_t addr) { uint64_t pAddr = this->toPhyAddr(addr, 1); return amo_reservation_.valid && (amo_reservation_.addr == pAddr); } +#endif #ifdef VM_ENABLE @@ -465,6 +478,7 @@ uint8_t *RAM::get(uint64_t address) const { } void RAM::read(void* data, uint64_t addr, uint64_t size) { + // printf("====%s (addr= 0x%lx, size= 0x%lx) ====\n", __PRETTY_FUNCTION__,addr,size); if (check_acl_ && acl_mngr_.check(addr, size, 0x1) == false) { throw BadAddress(); } @@ -577,15 +591,41 @@ void RAM::loadHexImage(const char* filename) { } #ifdef VM_ENABLE + +bool MemoryUnit::need_trans(uint64_t dev_pAddr) +{ + // Check if the this is the BARE mode + bool isBAREMode = (this->mode == VA_MODE::BARE); + // Check if the address is reserved + bool isReserved = (dev_pAddr >= PAGE_TABLE_BASE_ADDR); + // Check if the address falls within the startup address range + bool isStartAddress = (STARTUP_ADDR <= dev_pAddr) && (dev_pAddr < (STARTUP_ADDR + 0x40000)); + + // Print the boolean results for debugging purposes + // printf("0x%lx, %u, %u, %u \n", dev_pAddr,isBAREMode, isReserved, isStartAddress); + + // Return true if the address needs translation (i.e., it's not reserved and not a start address) + return (!isBAREMode && !isReserved && !isStartAddress); +} + uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type) { uint64_t pfn; uint64_t size_bits; + // printf("====%s====\n", __PRETTY_FUNCTION__); + // printf("vaddr = 0x%lx, type = 0x%u\n",vAddr,type); + if (!need_trans(vAddr)) + { + // printf("Translation is not needed.\n"); + return vAddr; + } //First lookup TLB. std::pair tlb_access = tlbLookup(vAddr, type, &size_bits); if (tlb_access.first) { + + // printf("Found pfn %lx in TLB\n",tlb_access.second); pfn = tlb_access.second; TLB_HIT++; } @@ -596,33 +636,37 @@ uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type) pfn = ptw_access.first; TLB_MISS++; PTW++; unique_translations.insert(vAddr>>size_bits); PERF_UNIQUE_PTW = unique_translations.size(); + } //Construct final address using pfn and offset. - std::cout << "[MemoryUnit] translated vAddr: 0x" << std::hex << vAddr << " to pAddr: 0x" << std::hex << ((pfn << size_bits) + (vAddr & ((1 << size_bits) - 1))) << std::endl; + // std::cout << "[MemoryUnit] translated vAddr: 0x" << std::hex << vAddr << " to pAddr: 0x" << std::hex << ((pfn << size_bits) + (vAddr & ((1 << size_bits) - 1))) << std::endl; return (pfn << size_bits) + (vAddr & ((1 << size_bits) - 1)); } std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits) { + // printf("====%s====\n", __PRETTY_FUNCTION__); + // printf("vaddr = 0x%lx, type = %u, size_bits %lu\n", vAddr_bits, type, *size_bits); uint64_t LEVELS = 2; vAddr_SV32_t vAddr(vAddr_bits); - uint64_t pte_bytes; + uint64_t pte_bytes = 0; //Get base page table. - uint64_t a = this->ptbr << 12; + uint64_t pt_ba = this->ptbr << 12; int i = LEVELS - 1; while(true) { //Read PTE. - decoder_.read(&pte_bytes, a+vAddr.vpn[i]*PTE_SIZE, sizeof(uint64_t)); + decoder_.read(&pte_bytes, pt_ba+vAddr.vpn[i]*PTE_SIZE, PTE_SIZE); PTE_SV32_t pte(pte_bytes); //Check if it has invalid flag bits. if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) { + printf("Error: PTE FLAGS=0x%x\n",pte.flags); throw Page_Fault_Exception("Page Fault : Attempted to access invalid entry."); } @@ -632,18 +676,19 @@ std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, AC i--; if (i < 0) { + printf("Error: PTE FLAGS=0x%x\n",pte.flags); throw Page_Fault_Exception("Page Fault : No leaf node found."); } else { //Continue on to next level. - a = (pte_bytes >> 10 ) << 12; + pt_ba = (pte_bytes >> 10 ) << 12; } } else { //Leaf node found, finished walking. - a = (pte_bytes >> 10 ) << 12; + pt_ba = (pte_bytes >> 10 ) << 12; break; } } @@ -653,40 +698,21 @@ std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, AC //Check RWX permissions according to access type. if ( (type == ACCESS_TYPE::FETCH) & ((pte.r == 0) | (pte.x == 0)) ) { + printf("Error: PTE FLAGS=0x%x\n",pte.flags); throw Page_Fault_Exception("Page Fault : TYPE FETCH, Incorrect permissions."); } else if ( (type == ACCESS_TYPE::LOAD) & (pte.r == 0) ) { + printf("Error: PTE FLAGS=0x%x\n",pte.flags); throw Page_Fault_Exception("Page Fault : TYPE LOAD, Incorrect permissions."); } else if ( (type == ACCESS_TYPE::STORE) & (pte.w == 0) ) { + printf("Error: PTE FLAGS=0x%x\n",pte.flags); throw Page_Fault_Exception("Page Fault : TYPE STORE, Incorrect permissions."); } - - uint64_t pfn; - if (i > 0) - { - //It is a super page. - if (pte.ppn[0] != 0) - { - //Misss aligned super page. - throw Page_Fault_Exception("Page Fault : Miss Aligned Super Page."); - - } - else - { - //Valid super page. - pfn = pte.ppn[1]; - *size_bits = 22; - } - } - else - { - //Regular page. - *size_bits = 12; - pfn = a >> 12; - } + *size_bits = 12; + uint64_t pfn = pt_ba >> *size_bits; return std::make_pair(pfn, pte_bytes & 0xff); } diff --git a/sim/common/mem.h b/sim/common/mem.h index 8477fb800..a655a6d3c 100644 --- a/sim/common/mem.h +++ b/sim/common/mem.h @@ -116,17 +116,21 @@ public: // ACCESS_TYPE access_type; }; +#ifdef VM_ENABLE + MemoryUnit(uint64_t pageSize = PAGE_TABLE_SIZE); +#else MemoryUnit(uint64_t pageSize = 0); +#endif void attach(MemDevice &m, uint64_t start, uint64_t end); #ifdef VM_ENABLE - void read(void* data, uint64_t addr, uint64_t size, ACCESS_TYPE type = ACCESS_TYPE::LOAD); - void write(const void* data, uint64_t addr, uint64_t size, ACCESS_TYPE type = ACCESS_TYPE::STORE); + void read(void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type = ACCESS_TYPE::LOAD); + void write(const void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type = ACCESS_TYPE::STORE); #else - void read(void* data, uint64_t addr, uint64_t size, bool sup); - void write(const void* data, uint64_t addr, uint64_t size, bool sup); + void read(void* data, uint64_t addr, uint32_t size, bool sup); + void write(const void* data, uint64_t addr, uint32_t size, bool sup); #endif void amo_reserve(uint64_t addr); @@ -220,14 +224,16 @@ private: #ifdef VM_ENABLE std::pair tlbLookup(uint64_t vAddr, ACCESS_TYPE type, uint64_t* size_bits); + bool need_trans(uint64_t dev_pAddr); uint64_t vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type); std::pair page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits); +#else + uint64_t toPhyAddr(uint64_t vAddr, uint32_t flagMask); + TLBEntry tlbLookup(uint64_t vAddr, uint32_t flagMask); #endif - TLBEntry tlbLookup(uint64_t vAddr, uint32_t flagMask); - uint64_t toPhyAddr(uint64_t vAddr, uint32_t flagMask); std::unordered_map tlb_; uint64_t pageSize_; @@ -328,7 +334,7 @@ class PTE_SV32_t } public: - uint64_t ppn[2]; + uint64_t ppn; uint32_t rsw; uint32_t flags; bool d, a, g, u, x, w, r, v; @@ -337,8 +343,7 @@ class PTE_SV32_t assert((address>> 32) == 0 && "Upper 32 bits are not zero!"); flags = bits(address,0,7); rsw = bits(address,8,9); - ppn[0] = bits(address,10,19); - ppn[1] = bits(address,20,31); + ppn = bits(address,10,31); d = bit(7); a = bit(6); @@ -348,6 +353,7 @@ class PTE_SV32_t w = bit(2); r = bit(1); v = bit(0); + // printf("ppn = 0x%lx, flags= 0x%x, rsw= 0x%x\n",ppn,flags,rsw); } }; @@ -374,6 +380,7 @@ class vAddr_SV32_t vpn[0] = bits(address,12,21); vpn[1] = bits(address,22,31); pgoff = bits(address,0,11); + // printf("vpn[0] = 0x%lx, vpn[1] = 0x%lx, pgoff = 0x%lx\n",vpn[0],vpn[1],pgoff); } }; #endif diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 417ef83aa..63473cfd8 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -270,6 +270,8 @@ bool Emulator::barrier(uint32_t bar_id, uint32_t count, uint32_t wid) { #ifdef VM_ENABLE void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) { + DPH(3, "*** icache_read 0x" << std::hex << addr << ", size = 0x " << size); + try { mmu_.read(data, addr, size, ACCESS_TYPE::LOAD); @@ -288,6 +290,7 @@ void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) { #ifdef VM_ENABLE void Emulator::set_satp(uint32_t satp) { + DPH(3, "set satp 0x" << std::hex << satp << " in emulator module\n"); set_csr(VX_CSR_SATP,satp,0,0); } #endif @@ -327,6 +330,7 @@ void Emulator::dcache_read(void *data, uint64_t addr, uint32_t size) { #ifdef VM_ENABLE void Emulator::dcache_write(const void* data, uint64_t addr, uint32_t size) { + DP(1, "*** dcache_write 0x" << std::hex << addr << ", size = 0x " << size); auto type = get_addr_type(addr); if (addr >= uint64_t(IO_COUT_ADDR) && addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index c3241a207..3ae99fa4e 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -157,7 +157,6 @@ uint32_t Processor::get_satp() { } void Processor::set_satp(uint32_t satp) { - std::cout << "set SATP: 0x" << std::hex << this->satp << std::endl; impl_->set_satp(satp); this->satp = satp; } From 2271d2b286f13b18519e162a8ca12ee0b4f3cc46 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Wed, 19 Jun 2024 02:04:24 -0400 Subject: [PATCH 017/488] remove # --- hw/rtl/VX_config.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 3a8242379..2d01f2bf2 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -180,7 +180,7 @@ `define PAGE_TABLE_SIZE 4096 `endif -`else # XLEN_32 +`else // XLEN_32 `ifndef STACK_BASE_ADDR `define STACK_BASE_ADDR 32'hFFFF0000 From a378aed67cc7d891201124eb5b51059ec3989252 Mon Sep 17 00:00:00 2001 From: Nayan Sivakumar Nair Date: Fri, 21 Jun 2024 22:23:24 -0400 Subject: [PATCH 018/488] Moved tc_num, tc_size param to makefile args --- ci/blackbox.sh | 10 +--------- ci/regression.sh.in | 1 + hw/rtl/VX_types.vh | 3 +++ runtime/simx/vortex.cpp | 14 +++++++------- sim/simx/arch.h | 14 +------------- sim/simx/emulator.cpp | 16 +++++++++++++++- sim/simx/emulator.h | 3 +++ sim/simx/execute.cpp | 7 +++++-- sim/simx/func_unit.cpp | 4 +++- sim/simx/func_unit.h | 2 +- sim/simx/main.cpp | 2 +- tests/regression/matmul/Makefile | 2 +- tests/regression/matmul/kernel.cpp | 5 ++++- tests/regression/matmul/main.cpp | 29 ++++++++++++++++++++++++----- 14 files changed, 70 insertions(+), 42 deletions(-) diff --git a/ci/blackbox.sh b/ci/blackbox.sh index 8a04133f9..defad4c05 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -114,14 +114,6 @@ case $i in LOGFILE=${i#*=} shift ;; - --tc_size=*) - TC_SIZE=${i#*=} - shift - ;; - --tc_num=*) - TC_NUM=${i#*=} - shift - ;; --help) show_help exit 0 @@ -190,7 +182,7 @@ then fi CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS $L2 $L3 $PERF_FLAG $CONFIGS" -CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DTC_NUM=$TC_NUM -DTC_SIZE=$TC_SIZE $L2 $L3 $PERF_FLAG $CONFIGS" +# CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DTC_NUM=$TC_NUM -DTC_SIZE=$TC_SIZE $L2 $L3 $PERF_FLAG $CONFIGS" echo "CONFIGS=$CONFIGS" if [ $REBUILD -ne 0 ] diff --git a/ci/regression.sh.in b/ci/regression.sh.in index a5f1bffdb..50d309af6 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -124,6 +124,7 @@ regression() # test local barrier ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tbar" + echo "regression tests done!" } diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index 06929b058..9a8f93234 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -197,6 +197,9 @@ `define VX_CSR_LOCAL_MEM_BASE 12'hFC3 `define VX_MAT_MUL_SIZE 12'hFC4 +`define VX_TC_NUM 12'hFC5 +`define VX_TC_SIZE 12'hFC6 + `endif // VX_TYPES_VH diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index f65d7b385..4210ab0b6 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -32,7 +32,7 @@ using namespace vortex; class vx_device { public: vx_device() - : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES, TC_SIZE, TC_NUM) + : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES) , ram_(0, RAM_PAGE_SIZE) , processor_(arch_) , global_mem_(ALLOC_BASE_ADDR, @@ -69,12 +69,12 @@ public: case VX_CAPS_NUM_CORES: _value = NUM_CORES * NUM_CLUSTERS; break; - case VX_CAPS_TC_SIZE: - _value = TC_SIZE; - break; - case VX_CAPS_TC_NUM: - _value = TC_NUM; - break; + // case VX_CAPS_TC_SIZE: + // _value = TC_SIZE; + // break; + // case VX_CAPS_TC_NUM: + // _value = TC_NUM; + // break; case VX_CAPS_CACHE_LINE_SIZE: _value = CACHE_BLOCK_SIZE; break; diff --git a/sim/simx/arch.h b/sim/simx/arch.h index e35687dbd..9af266d7a 100644 --- a/sim/simx/arch.h +++ b/sim/simx/arch.h @@ -35,11 +35,9 @@ private: uint16_t num_barriers_; uint16_t ipdom_size_; uint64_t local_mem_base_; - uint16_t tc_size_; - uint16_t tc_num_; public: - Arch(uint16_t num_threads, uint16_t num_warps, uint16_t num_cores, uint64_t tc_size, uint64_t tc_num) + Arch(uint16_t num_threads, uint16_t num_warps, uint16_t num_cores) : num_threads_(num_threads) , num_warps_(num_warps) , num_cores_(num_cores) @@ -51,8 +49,6 @@ public: , num_barriers_(NUM_BARRIERS) , ipdom_size_((num_threads-1) * 2) , local_mem_base_(LMEM_BASE_ADDR) - , tc_size_ (tc_size) - , tc_num_ (tc_num) {} uint16_t vsize() const { @@ -98,14 +94,6 @@ public: uint16_t socket_size() const { return socket_size_; } - - uint16_t tc_size() const { - return tc_size_; - } - - uint16_t tc_num() const { - return tc_num_; - } }; diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index ea5f72c42..d2faf7f98 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -74,7 +74,7 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core) , core_(core) , warps_(arch.num_warps(), arch) , barriers_(arch.num_barriers(), 0) - , scratchpad(std::vector(core->arch().tc_size() * core->arch().tc_size() * 32768)) //Fix this + , scratchpad(std::vector(32 * 32 * 32768)) //Fix this : Max TC_SIZE = 32 { this->clear(); } @@ -355,6 +355,11 @@ Word Emulator::get_tiles() return mat_size; } +Word Emulator::get_tc_size() +{ + return tc_size; +} + Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { auto core_perf = core_->perf_stats(); switch (addr) { @@ -387,6 +392,8 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { case VX_CSR_LOCAL_MEM_BASE: return arch_.local_mem_base(); case VX_CSR_MSCRATCH: return csr_mscratch_; case VX_MAT_MUL_SIZE: return mat_size; + case VX_TC_NUM: return tc_num; + case VX_TC_SIZE: return tc_size; CSR_READ_64(VX_CSR_MCYCLE, core_perf.cycles); CSR_READ_64(VX_CSR_MINSTRET, core_perf.instrs); @@ -500,6 +507,13 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) { case VX_MAT_MUL_SIZE: mat_size = value; break; + case VX_TC_NUM: + tc_num = value; + break; + case VX_TC_SIZE: + tc_size = value; + break; + default: { std::cout << std::hex << "Error: invalid CSR write addr=0x" << addr << ", value=0x" << value << std::endl; std::abort(); diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index 82b5bc98b..743c2786e 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -55,6 +55,7 @@ public: int get_exitcode() const; Word get_tiles(); + Word get_tc_size(); private: @@ -125,6 +126,8 @@ private: wspawn_t wspawn_; std::vector scratchpad; uint32_t mat_size; + uint32_t tc_size; + uint32_t tc_num; }; } diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index e13df18b9..0dfd72a0f 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -1419,8 +1419,11 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { { //TODO - make it data-type flexible uint32_t mem_bytes = 1; DP(3, "mem_bytes=" << mem_bytes << std::endl); - uint16_t tc_size = core_->arch().tc_size(); - uint32_t TC_per_warp = core_->arch().tc_num(); + uint16_t tc_size = this->get_csr(VX_TC_SIZE, 0, wid); + uint32_t TC_per_warp = this->get_csr(VX_TC_NUM, 0, wid); + + DP(3, "tc_size=" << tc_size << std::endl); + DP(3, "TC_per_warp=" << TC_per_warp << std::endl); //Number of loads - dependant on the thread config uint32_t n_tiles = this->get_csr(VX_MAT_MUL_SIZE, 0, wid); //CSR instruction before MLOAD will ensure that this csr has value diff --git a/sim/simx/func_unit.cpp b/sim/simx/func_unit.cpp index 3991a17e7..f53a1fb22 100644 --- a/sim/simx/func_unit.cpp +++ b/sim/simx/func_unit.cpp @@ -255,7 +255,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) { TcuUnit::TcuUnit(const SimContext& ctx, Core* core) : FuncUnit(ctx, core, "TCU") - , tc_size (core_->arch().tc_size()) + // , tc_size (core_->arch().tc_size()) {} void TcuUnit::tick() { @@ -267,6 +267,8 @@ void TcuUnit::tick() { auto& output = Outputs.at(i); auto trace = input.front(); uint32_t n_tiles = core_->emulator_.get_tiles(); + uint32_t tc_size = core_->emulator_.get_tc_size(); + switch (trace->tcu_type) { case TCUType::TCU_MUL: { //mat size = n_tiles * tc_size diff --git a/sim/simx/func_unit.h b/sim/simx/func_unit.h index 5fc922991..a7f182efe 100644 --- a/sim/simx/func_unit.h +++ b/sim/simx/func_unit.h @@ -103,7 +103,7 @@ private: class TcuUnit : public FuncUnit { public: TcuUnit(const SimContext& ctx, Core*); - uint64_t tc_size; + // uint64_t tc_size; void tick(); }; diff --git a/sim/simx/main.cpp b/sim/simx/main.cpp index 58eb96d61..9031a0a02 100644 --- a/sim/simx/main.cpp +++ b/sim/simx/main.cpp @@ -83,7 +83,7 @@ int main(int argc, char **argv) { { // create processor configuation - Arch arch(num_threads, num_warps, num_cores, tc_size, tc_num); + Arch arch(num_threads, num_warps, num_cores); // create memory module RAM ram(0, RAM_PAGE_SIZE); diff --git a/tests/regression/matmul/Makefile b/tests/regression/matmul/Makefile index 7f1c48523..0ef207194 100644 --- a/tests/regression/matmul/Makefile +++ b/tests/regression/matmul/Makefile @@ -9,6 +9,6 @@ SRCS := $(SRC_DIR)/main.cpp VX_SRCS := $(SRC_DIR)/kernel.cpp -OPTS ?= -n128 -d1 +OPTS ?= -n512 -d1 -s4 -t4 include ../common.mk diff --git a/tests/regression/matmul/kernel.cpp b/tests/regression/matmul/kernel.cpp index eeb902acb..a4585fb53 100644 --- a/tests/regression/matmul/kernel.cpp +++ b/tests/regression/matmul/kernel.cpp @@ -13,7 +13,7 @@ void kernel_body(kernel_arg_t* __UNIFORM__ arg) { unsigned c_addr = reinterpret_cast(dst_ptr); uint32_t tc_size = arg->tc_size; - int TC_per_warp = arg->TC_per_warp; + uint32_t TC_per_warp = arg->TC_per_warp; unsigned num_threads = arg->num_threads; int num_warps = arg->num_warps; uint32_t matrix_size = arg->matrix_size; @@ -104,6 +104,9 @@ void kernel_body(kernel_arg_t* __UNIFORM__ arg) { unsigned b_addr_base = b_addr + offset*arg->data_size; unsigned c_addr_base = c_addr + offset_c*arg->data_size; csr_write(VX_MAT_MUL_SIZE,n_tiles); + csr_write(VX_TC_NUM,TC_per_warp); + csr_write(VX_TC_SIZE,tc_size); + mload (0, a_addr_base); mload (1, b_addr_base); //In case of multiple threads - sync load diff --git a/tests/regression/matmul/main.cpp b/tests/regression/matmul/main.cpp index 6a86712ae..b2238bf5a 100644 --- a/tests/regression/matmul/main.cpp +++ b/tests/regression/matmul/main.cpp @@ -21,6 +21,9 @@ const char* kernel_file = "kernel.vxbin"; uint32_t matrix_size = 0; +uint32_t tc_num = 4; +uint32_t TC_size = 8; + vx_device_h device = nullptr; vx_buffer_h A_buffer = nullptr; vx_buffer_h B_buffer = nullptr; @@ -38,7 +41,7 @@ static void show_usage() { static void parse_args(int argc, char **argv, uint32_t &data_size) { int c; - while ((c = getopt(argc, argv, "n:k:d:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:d:t:s:h?")) != -1) { switch (c) { case 'n': matrix_size = atoi(optarg); @@ -48,7 +51,13 @@ static void parse_args(int argc, char **argv, uint32_t &data_size) { break; case 'd': data_size = atoi(optarg); - break; + break; + case 't': + tc_num = atoi(optarg); + break; + case 's': + TC_size = atoi(optarg); + break; case 'h': case '?': { show_usage(); @@ -141,12 +150,22 @@ int main(int argc, char *argv[]) { std::cout << "open device connection" << std::endl; RT_CHECK(vx_dev_open(&device)); - uint64_t num_cores, num_warps, num_threads, tc_size, TC_per_warp; + uint64_t num_cores, num_warps, num_threads; + uint32_t tc_size, TC_per_warp; + RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_CORES, &num_cores)); RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_WARPS, &num_warps)); RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_THREADS, &num_threads)); - RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_SIZE, &tc_size)); - RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_NUM, &TC_per_warp)); + + std::cout << "Debug :: tc_size (optarg) = " << TC_size << std::endl; + std::cout << "Debug :: tc_num (optarg) = " << tc_num << std::endl; + + //Add assert/knob + tc_size = TC_size; + TC_per_warp = tc_num; + + // RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_SIZE, &tc_size)); + // RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_NUM, &TC_per_warp)); std::cout << "Debug :: tc_size = " << tc_size << std::endl; std::cout << "Debug :: tc_num = " << TC_per_warp << std::endl; From 02091f3d4436cb17c09cc5d9a8ab306298ddd997 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sat, 22 Jun 2024 23:55:01 -0400 Subject: [PATCH 019/488] Merge Vortex 2.2 --- hw/rtl/VX_config.vh | 77 ++-- runtime/simx/vortex.cpp | 749 +++++++++++++++++++------------------- sim/common/mem.cpp | 95 ++--- sim/common/mem.h | 19 +- sim/simx/cluster.cpp | 2 +- sim/simx/cluster.h | 2 +- sim/simx/core.cpp | 2 +- sim/simx/core.h | 2 +- sim/simx/emulator.cpp | 5 +- sim/simx/emulator.h | 2 +- sim/simx/processor.cpp | 6 +- sim/simx/processor.h | 6 +- sim/simx/processor_impl.h | 3 +- sim/simx/socket.cpp | 2 +- sim/simx/socket.h | 2 +- 15 files changed, 512 insertions(+), 462 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 2d01f2bf2..4ff4dc9eb 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -33,6 +33,9 @@ `endif /////////////////////////////////////////////////////////////////////////////// +`ifndef VM_DISABLE +`define VM_ENABLE +`endif `ifndef EXT_M_DISABLE `define EXT_M_ENABLE @@ -172,12 +175,11 @@ `define IO_BASE_ADDR 64'h000000040 `endif +`ifdef VM_ENABLE `ifndef PAGE_TABLE_BASE_ADDR `define PAGE_TABLE_BASE_ADDR 64'h1F0000000 `endif -`ifndef PAGE_TABLE_SIZE -`define PAGE_TABLE_SIZE 4096 `endif `else // XLEN_32 @@ -198,12 +200,11 @@ `define IO_BASE_ADDR 32'h00000040 `endif +`ifdef VM_ENABLE `ifndef PAGE_TABLE_BASE_ADDR `define PAGE_TABLE_BASE_ADDR 32'hF0000000 `endif -`ifndef PAGE_TABLE_SIZE -`define PAGE_TABLE_SIZE 4096 `endif `endif @@ -271,40 +272,58 @@ `endif // Virtual Memory Configuration /////////////////////////////////////////////////////// -`ifndef VM_DISABLE -`define VM_ENABLE -`endif `ifdef VM_ENABLE - `ifndef VM_ADDR_MODE - `define VM_ADDR_MODE SV32 - `endif - - `ifndef PTE_SIZE - `ifdef XLEN_32 - `define PTE_SIZE 4 - `define NUM_PTE_ENTRY 1024 - `else - `ifdef XLEN_64 - `define PTE_SIZE 8 - `define NUM_PTE_ENTRY 1024 - `else - `define PTE_SIZE 8 - `define NUM_PTE_ENTRY 1024 - `endif + `ifdef XLEN_32 + `ifndef VM_ADDR_MODE + `define VM_ADDR_MODE SV32 //or BARE + `endif + `ifndef PTE_SIZE + `define PTE_SIZE (4) + `endif + `ifndef SATP_MODE_IDX + `define SATP_MODE_IDX (31) + `endif + `ifndef SATP_PPN_WIDTH + `define SATP_PPN_WIDTH (22) + `endif + `else + `ifndef VM_ADDR_MODE + `define VM_ADDR_MODE SV64 //or BARE + `endif + `ifndef PTE_SIZE + `define PTE_SIZE (8) + `endif + `ifndef SATP_MODE_IDX + `define SATP_MODE_IDX (63) + `endif + `ifndef SATP_PPN_WIDTH + `define SATP_PPN_WIDTH (44) `endif - `define PT_SIZE (PTE_SIZE * NUM_PTE_ENTRY) `endif + `ifndef NUM_PTE_ENTRY + `define NUM_PTE_ENTRY (1024) + `endif + + `ifndef PT_SIZE + `define PT_SIZE (PTE_SIZE * NUM_PTE_ENTRY) + `endif + + `ifndef PT_TOTAL_SIZE + `define PT_TOTAL_SIZE (PT_SIZE*(1+NUM_PTE_ENTRY)) + `endif + + `ifndef TLB_SIZE - `define TLB_SIZE 32 - `endif - - `ifndef SUPER_PAGING - `define SUPER_PAGING 0 + `define TLB_SIZE (32) `endif `endif +`ifndef MEM_PAGE_SIZE +`define MEM_PAGE_SIZE (4096) +`endif + // Pipeline Configuration ///////////////////////////////////////////////////// // Issue width diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 816ca3081..1a5da088a 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -28,11 +28,11 @@ #include #ifdef VM_ENABLE -#include -#include +#include +// #include +//#include #include -#include #include #include @@ -50,7 +50,6 @@ using namespace vortex; #ifdef VM_ENABLE - #ifndef NDEBUG #define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0) #else @@ -85,13 +84,9 @@ class vx_device { public: vx_device() : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES) -#ifdef VM_ENABLE - , ram_(0, RAM_PAGE_SIZE<<11) -#else - , ram_(0, RAM_PAGE_SIZE) -#endif + , ram_(0, MEM_PAGE_SIZE) , processor_(arch_) - , global_mem_(ALLOC_BASE_ADDR, GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR, RAM_PAGE_SIZE, CACHE_BLOCK_SIZE) + , global_mem_(ALLOC_BASE_ADDR, GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR, MEM_PAGE_SIZE, CACHE_BLOCK_SIZE) { // attach memory module processor_.attach_ram(&ram_); @@ -150,133 +145,141 @@ public: return 0; } -#ifdef VM_ENABLE - // virtual to phycial mapping - uint64_t map_p2v(uint64_t pAddr) - { - return pAddr + 0xf000000; - } - bool need_trans(uint64_t dev_pAddr) - { - // Check if the this is the BARE mode - bool isBAREMode = (get_mode() == VA_MODE::BARE); - // Check if the address is reserved - bool isReserved = (dev_pAddr >= PAGE_TABLE_BASE_ADDR); - // Check if the address falls within the startup address range - bool isStartAddress = (STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)); - - // Print the boolean results for debugging purposes - // printf("%p, %u, %u\n", (void *)dev_pAddr, isReserved, isStartAddress); - - // Return true if the address needs translation (i.e., it's not reserved and not a start address) - return (!isBAREMode && !isReserved && !isStartAddress); - } - - uint64_t phy_to_virt_map(uint64_t size, uint64_t* dev_pAddr, uint32_t flags) - { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - DBGPRINT("(size = 0x%lx, dev_pAddr= 0x%lx, flags = 0x%x)\n", size, *dev_pAddr, flags); - DBGPRINT("bit mode: %d\n", XLEN); - - // if (*dev_pAddr == STARTUP_ADDR || *dev_pAddr == 0x7FFFF000) { - - if (!need_trans(*dev_pAddr)) - { - DBGPRINT("Translation is not needed.\n"); - return 0; - } - - uint64_t init_pAddr = *dev_pAddr; - uint64_t init_vAddr = map_p2v(init_pAddr); - uint64_t ppn = 0, vpn = 0 ; - - //dev_pAddr can be of size greater than a page, but we have to map and update - //page tables on a page table granularity. So divide the allocation into pages. - bool is_start = false; - for (ppn = (*dev_pAddr) >> 12; ppn < ((*dev_pAddr) >> 12) + (size/RAM_PAGE_SIZE) + 1; ppn++) - { - vpn = map_p2v(ppn << 12) >> 12; - if (is_start == false) { - DBGPRINT("**Search vpn in page table:0x%lx\n", vpn); - is_start = true; - } - else { - DBGPRINT("Next vpn: 0x%lx\n",vpn); - } - - //Currently a 1-1 mapping is used, this can be changed here to support different - //mapping schemes - - //If ppn to vpn mapping doesnt exist. - if (addr_mapping.find(vpn) == addr_mapping.end()) - { - //Create mapping. - update_page_table(ppn, vpn, flags); - addr_mapping[vpn] = ppn; - } - } - DBGPRINT("Mapped virtual addr: 0x%lx to physical addr: %lx\n", init_vAddr, init_pAddr); - - // Sanity check - uint64_t pAddr = page_table_walk(init_vAddr); - if (pAddr != init_pAddr) - { - assert(pAddr == init_pAddr && "ERROR: translated virtual Addresses are not the same with physical Address"); - } - - *dev_pAddr = init_vAddr; // commit vpn to be returned to host - DBGPRINT("Translated device virtual addr: 0x%lx\n", *dev_pAddr); - - return 0; - } -#endif - - int mem_alloc(uint64_t size, int flags, uint64_t* dev_addr) { - - uint64_t addr; - DBGPRINT("mem_alloc size: 0x%lx\n",size); - CHECK_ERR(global_mem_.allocate(size, &addr), { - return err; - }); - CHECK_ERR(this->mem_access(addr, size, flags), { - global_mem_.release(addr); - return err; - }); - *dev_addr = addr; #ifdef VM_ENABLE - // VM address translation - phy_to_virt_map(size, dev_addr,flags); + // virtual to phycial mapping + uint64_t map_p2v(uint64_t pAddr) + { + return pAddr + 0xf000000; + } + bool need_trans(uint64_t dev_pAddr) + { + // Check if the this is the BARE mode + bool isBAREMode = (get_mode() == VA_MODE::BARE); + // Check if the address is reserved for system usage + bool isReserved = (dev_pAddr >= PAGE_TABLE_BASE_ADDR); + // Check if the address is reserved for IO usage + bool isIO = (dev_pAddr < USER_BASE_ADDR); + // Check if the address falls within the startup address range + bool isStartAddress = (STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)); + + // Print the boolean results for debugging purposes + // printf("%p, %u, %u\n", (void *)dev_pAddr, isReserved, isStartAddress); + + // Return true if the address needs translation (i.e., it's not reserved and not a start address) + return (!isBAREMode && !isReserved && !isIO && !isStartAddress); + } + + uint64_t phy_to_virt_map(uint64_t size, uint64_t *dev_pAddr, uint32_t flags) + { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT(" [RT:PTV_MAP] size = 0x%lx, dev_pAddr= 0x%lx, flags = 0x%x\n", size, *dev_pAddr, flags); + DBGPRINT(" [RT:PTV_MAP] bit mode: %d\n", XLEN); + + // if (*dev_pAddr == STARTUP_ADDR || *dev_pAddr == 0x7FFFF000) { + + if (!need_trans(*dev_pAddr)) + { + DBGPRINT(" [RT:PTV_MAP] Translation is not needed.\n"); + return 0; + } + + uint64_t init_pAddr = *dev_pAddr; + uint64_t init_vAddr = map_p2v(init_pAddr); + uint64_t ppn = 0, vpn = 0; + + // dev_pAddr can be of size greater than a page, but we have to map and update + // page tables on a page table granularity. So divide the allocation into pages. + bool is_start = false; + for (ppn = (*dev_pAddr) >> 12; ppn < ((*dev_pAddr) >> 12) + (size / MEM_PAGE_SIZE) + 1; ppn++) + { + vpn = map_p2v(ppn << 12) >> 12; + if (is_start == false) + { + DBGPRINT(" [RT:PTV_MAP] Search vpn in page table:0x%lx\n", vpn); + is_start = true; + } + else + { + DBGPRINT(" [RT:PTV_MAP] Next vpn: 0x%lx\n", vpn); + } + + // Currently a 1-1 mapping is used, this can be changed here to support different + // mapping schemes + + // If ppn to vpn mapping doesnt exist. + if (addr_mapping.find(vpn) == addr_mapping.end()) + { + // Create mapping. + update_page_table(ppn, vpn, flags); + addr_mapping[vpn] = ppn; + } + } + DBGPRINT(" [RT:PTV_MAP] Mapped virtual addr: 0x%lx to physical addr: %lx\n", init_vAddr, init_pAddr); + + // Sanity check + uint64_t pAddr = page_table_walk(init_vAddr); + if (pAddr != init_pAddr) + { + assert(pAddr == init_pAddr && "ERROR: translated virtual Addresses are not the same with physical Address"); + } + + *dev_pAddr = init_vAddr; // commit vpn to be returned to host + DBGPRINT(" [RT:PTV_MAP] Translated device virtual addr: 0x%lx\n", *dev_pAddr); + + return 0; + } #endif - return 0; - } - int mem_reserve(uint64_t dev_addr, uint64_t size, int flags) { - CHECK_ERR(global_mem_.reserve(dev_addr, size), { - return err; - }); - DBGPRINT("mem_reserve: addr: 0x%lx, size: 0x%lx\n",dev_addr, size); - CHECK_ERR(this->mem_access(dev_addr, size, flags), { - global_mem_.release(dev_addr); - return err; - }); + int mem_alloc(uint64_t size, int flags, uint64_t *dev_addr) + { + + uint64_t addr; + DBGPRINT(" [RT:mem_alloc] mem_alloc size: 0x%lx\n", size); + CHECK_ERR(global_mem_.allocate(size, &addr), { + return err; + }); + CHECK_ERR(this->mem_access(addr, size, flags), { + global_mem_.release(addr); + return err; + }); + *dev_addr = addr; #ifdef VM_ENABLE - uint64_t paddr = dev_addr; - phy_to_virt_map(size, &paddr, flags); + // VM address translation + phy_to_virt_map(size, dev_addr, flags); #endif - return 0; - } + return 0; + } - int mem_free(uint64_t dev_addr) { + int mem_reserve(uint64_t dev_addr, uint64_t size, int flags) + { + CHECK_ERR(global_mem_.reserve(dev_addr, size), { + return err; + }); + DBGPRINT(" [RT:mem_reserve] mem_reserve: addr: 0x%lx, size: 0x%lx\n", dev_addr, size); + CHECK_ERR(this->mem_access(dev_addr, size, flags), { + global_mem_.release(dev_addr); + return err; + }); #ifdef VM_ENABLE - uint64_t pAddr = page_table_walk(dev_addr); - // VM address translation - return global_mem_.release(pAddr); + uint64_t paddr = dev_addr; + phy_to_virt_map(size, &paddr, flags); +#endif + return 0; + } + + int mem_free(uint64_t dev_addr) + { +#ifdef VM_ENABLE + uint64_t pAddr = page_table_walk(dev_addr); + // VM address translation + return global_mem_.release(pAddr); #else - return global_mem_.release(dev_addr); + return global_mem_.release(dev_addr); #endif - } + } - int mem_access(uint64_t dev_addr, uint64_t size, int flags) { + int mem_access(uint64_t dev_addr, uint64_t size, int flags) + { uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); if (dev_addr + asize > GLOBAL_MEM_SIZE) return -1; @@ -285,7 +288,8 @@ public: return 0; } - int mem_info(uint64_t* mem_free, uint64_t* mem_used) const { + int mem_info(uint64_t *mem_free, uint64_t *mem_used) const + { if (mem_free) *mem_free = global_mem_.free(); if (mem_used) @@ -293,21 +297,23 @@ public: return 0; } - int upload(uint64_t dest_addr, const void* src, uint64_t size) { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); - if (dest_addr + asize > GLOBAL_MEM_SIZE) - return -1; + int upload(uint64_t dest_addr, const void *src, uint64_t size) + { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); + if (dest_addr + asize > GLOBAL_MEM_SIZE) + return -1; #ifdef VM_ENABLE - uint64_t pAddr = page_table_walk(dest_addr); - DBGPRINT("Upload data to vAddr = 0x%lx (pAddr=0x%lx)\n", dest_addr, pAddr); - + uint64_t pAddr = page_table_walk(dest_addr); + DBGPRINT(" [RT:upload] Upload data to vAddr = 0x%lx (pAddr=0x%lx)\n", dest_addr, pAddr); + dest_addr = pAddr; //Overwirte #endif ram_.enable_acl(false); - ram_.write((const uint8_t*)src, dest_addr, size); + ram_.write((const uint8_t *)src, dest_addr, size); ram_.enable_acl(true); + /*DBGPRINT("upload %ld bytes to 0x%lx\n", size, dest_addr); for (uint64_t i = 0; i < size && i < 1024; i += 4) { DBGPRINT(" 0x%lx <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + i)); @@ -316,17 +322,19 @@ public: return 0; } - int download(void* dest, uint64_t src_addr, uint64_t size) { + int download(void *dest, uint64_t src_addr, uint64_t size) + { uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); if (src_addr + asize > GLOBAL_MEM_SIZE) return -1; #ifdef VM_ENABLE - uint64_t pAddr = page_table_walk(src_addr); - DBGPRINT("Download data to vAddr = 0x%lx (pAddr=0x%lx)\n", src_addr, pAddr); + uint64_t pAddr = page_table_walk(src_addr); + DBGPRINT(" [RT:download] Download data to vAddr = 0x%lx (pAddr=0x%lx)\n", src_addr, pAddr); + src_addr = pAddr; //Overwirte #endif ram_.enable_acl(false); - ram_.read((uint8_t*)dest, src_addr, size); + ram_.read((uint8_t *)dest, src_addr, size); ram_.enable_acl(true); /*DBGPRINT("download %ld bytes from 0x%lx\n", size, src_addr); @@ -337,9 +345,11 @@ public: return 0; } - int start(uint64_t krnl_addr, uint64_t args_addr) { + int start(uint64_t krnl_addr, uint64_t args_addr) + { // ensure prior run completed - if (future_.valid()) { + if (future_.valid()) + { future_.wait(); } @@ -350,9 +360,8 @@ public: this->dcr_write(VX_DCR_BASE_STARTUP_ARG1, args_addr >> 32); // start new run - future_ = std::async(std::launch::async, [&]{ - processor_.run(); - }); + future_ = std::async(std::launch::async, [&] + { processor_.run(); }); // clear mpm cache mpm_cache_.clear(); @@ -360,12 +369,14 @@ public: return 0; } - int ready_wait(uint64_t timeout) { + int ready_wait(uint64_t timeout) + { if (!future_.valid()) return 0; uint64_t timeout_sec = timeout / 1000; std::chrono::seconds wait_time(1); - for (;;) { + for (;;) + { // wait for 1 sec and check status auto status = future_.wait_for(wait_time); if (status == std::future_status::ready) @@ -376,8 +387,10 @@ public: return 0; } - int dcr_write(uint32_t addr, uint32_t value) { - if (future_.valid()) { + int dcr_write(uint32_t addr, uint32_t value) + { + if (future_.valid()) + { future_.wait(); // ensure prior run completed } processor_.dcr_write(addr, value); @@ -385,15 +398,18 @@ public: return 0; } - int dcr_read(uint32_t addr, uint32_t* value) const { + int dcr_read(uint32_t addr, uint32_t *value) const + { return dcrs_.read(addr, value); } - int mpm_query(uint32_t addr, uint32_t core_id, uint64_t* value) { + int mpm_query(uint32_t addr, uint32_t core_id, uint64_t *value) + { uint32_t offset = addr - VX_CSR_MPM_BASE; if (offset > 31) return -1; - if (mpm_cache_.count(core_id) == 0) { + if (mpm_cache_.count(core_id) == 0) + { uint64_t mpm_mem_addr = IO_MPM_ADDR + core_id * 32 * sizeof(uint64_t); CHECK_ERR(this->download(mpm_cache_[core_id].data(), mpm_mem_addr, 32 * sizeof(uint64_t)), { return err; @@ -404,247 +420,250 @@ public: } #ifdef VM_ENABLE - /* VM Management */ - void set_processor_satp(VA_MODE mode) + /* VM Management */ + void set_processor_satp(VA_MODE mode) + { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + uint64_t satp = 0; + if (mode == VA_MODE::BARE) { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - uint32_t satp; - if (mode == VA_MODE::BARE) - { - DBGPRINT("VA_MODE = BARE MODE"); - satp = 0; - } - else if (mode == VA_MODE::SV32) - { - satp = (alloc_2nd_level_page_table() >> 12) | 0x80000000; - DBGPRINT("VA_MODE = SV32 MODE(satp = 0x%x)\n",satp); - } - processor_.set_satp(satp); + DBGPRINT(" [RT:set_satp] VA_MODE = BARE MODE"); + } + else + { + satp = (alloc_2nd_level_page_table() / MEM_PAGE_SIZE) | (1 << SATP_MODE_IDX); + DBGPRINT(" [RT:set_satp] VA_MODE = SV mode (satp = 0x%lx)\n", satp); + } + processor_.set_satp(satp); + } + + uint64_t get_ptbr() + { + // return processor_.get_satp(); + return processor_.get_satp() & ((1 << SATP_PPN_WIDTH) - 1); + } + uint64_t get_pte_address(uint64_t base_page, uint64_t vpn) + { + return (base_page * MEM_PAGE_SIZE) + (vpn * PTE_SIZE); + } + + VA_MODE get_mode() + { +#ifdef XLEN_32 + return processor_.get_satp() & (1 << SATP_MODE_IDX) ? VA_MODE::SV32 : VA_MODE::BARE; +#else // 64 bit + return processor_.get_satp() & (1 << SATP_MODE_IDX) ? VA_MODE::SV64 : VA_MODE::BARE; +#endif + } + + void update_page_table(uint64_t ppn, uint64_t vpn, uint32_t flag) + { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT(" [RT:Update PT] Mapping vpn 0x%05lx to ppn 0x%05lx(flags = %u)\n", vpn, ppn, flag); + assert((((ppn >> 20) == 0) && ((vpn >> 20) == 0)) && "Upper 12 bits are not zero!"); + // Updating page table with the following mapping of (vAddr) to (pAddr). + // uint32_t page_bit_shift = log2ceil(PTE_SIZE*NUM_PTE_ENTRY); + uint64_t ppn_1 = 0, pte_addr = 0, pte_bytes = 0; + uint64_t vpn_1 = bits(vpn, 10, 19); + uint64_t vpn_0 = bits(vpn, 0, 9); + + // Read first level PTE. + DBGPRINT(" [RT:Update PT]Start second-level page table\n"); + pte_addr = get_pte_address(get_ptbr(), vpn_1); + pte_bytes = read_pte(pte_addr); + DBGPRINT(" [RT:Update PT] PTE addr 0x%lx, PTE bytes 0x%lx\n", pte_addr, pte_bytes); + ppn_1 = (pte_bytes >> 10); + + if (bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) + { + // If valid bit set, proceed to next level using new ppn form PTE. + DBGPRINT(" [RT:Update PT] PTE valid (ppn 0x%lx), continuing the walk...\n", ppn_1); + } + else + { + // If valid bit not set, allocate a second level page table + // in device memory and store ppn in PTE. Set rwx = 000 in PTE + // to indicate this is a pointer to the next level of the page table. + DBGPRINT(" [RT:Update PT] PTE Invalid (ppn 0x%lx), continuing the walk...\n", ppn_1); + ppn_1 = (alloc_1st_level_page_table(vpn_1) >> 12); + pte_bytes = ((ppn_1 << 10) | 0b0000000001); + assert((pte_addr >> 32) == 0 && "Upper 32 bits are not zero!"); + write_pte(pte_addr, pte_bytes); + // if (pte_bytes != read_pte(pte_addr)) + // DBGPRINT("Read/write values are different!\n"); } - uint32_t get_ptbr() - { - // return processor_.get_satp(); - return processor_.get_satp() & 0x003fffff; + DBGPRINT(" [RT:Update PT] Move to first-level page table\n"); + // Read second level PTE. + pte_addr = get_pte_address(ppn_1, vpn_0); + pte_bytes = read_pte(pte_addr); + + if (bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) + { + DBGPRINT(" [RT:Update PT] ERROR, shouldn't be here\n"); + exit(1); + // If valid bit is set, then the page is already allocated. + // Should not reach this point, a sanity check. } - uint64_t get_pte_address(uint64_t base_page, uint64_t vpn) + else { - return (base_page << 12) + (vpn * PTE_SIZE); - } + // If valid bit not set, write ppn of pAddr in PTE. Set rwx = 111 in PTE + // to indicate this is a leaf PTE and has the stated permissions. + pte_bytes = ((ppn << 10) | 0b0000001111); + write_pte(pte_addr, pte_bytes); + if (pte_bytes != read_pte(pte_addr)) + DBGPRINT(" [RT:Update PT] PTE write value and read value are not matched!\n"); + } + } - VA_MODE get_mode() + uint64_t page_table_walk(uint64_t vAddr_bits) + { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT(" [RT:PTW] start vAddr: 0x%lx\n", vAddr_bits); + if (!need_trans(vAddr_bits)) { - return processor_.get_satp() & 0x80000000 ? VA_MODE::SV32 : VA_MODE::BARE; - } + DBGPRINT(" [RT:PTW] Translation is not needed.\n"); + return vAddr_bits; + } + uint64_t LEVELS = 2; + vAddr_SV32_t vAddr(vAddr_bits); + uint64_t pte_addr, pte_bytes; + uint64_t pt_ba = get_ptbr() << 12; - void update_page_table(uint64_t ppn, uint64_t vpn, uint32_t flag) { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - DBGPRINT("Mapping vpn 0x%05lx to ppn 0x%05lx(flags = %u)\n", vpn, ppn,flag); - assert((((ppn>> 20) == 0) && ((vpn >> 20) == 0)) && "Upper 12 bits are not zero!"); - //Updating page table with the following mapping of (vAddr) to (pAddr). - // uint32_t page_bit_shift = log2ceil(PTE_SIZE*NUM_PTE_ENTRY); - uint64_t ppn_1 = 0, pte_addr = 0, pte_bytes = 0; - uint64_t vpn_1 = bits(vpn, 10, 19); - uint64_t vpn_0 = bits(vpn, 0, 9); + // Get base page table. - //Read first level PTE. - DBGPRINT("Start second-level page table\n"); - pte_addr = get_pte_address(get_ptbr(), vpn_1); - pte_bytes = read_pte(pte_addr); - DBGPRINT("[PTE] addr 0x%lx, PTE 0x%lx\n", pte_addr, pte_bytes); - ppn_1 = (pte_bytes >> 10); + for (int i = LEVELS - 1; i >= 0; i--) + { + // Read PTE. + pte_addr = pt_ba + vAddr.vpn[i] * PTE_SIZE; + pte_bytes = read_pte(pte_addr); + PTE_SV32_t pte(pte_bytes); + DBGPRINT(" [RT:PTW] Level[%u] pte_bytes = 0x%lx, pte flags = %u)\n", i, pte.ppn, pte.flags); - if ( bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) + // Check if it has invalid flag bits. + if ((pte.v == 0) | ((pte.r == 0) & (pte.w == 1))) + { + std::string msg = " [RT:PTW] Page Fault : Attempted to access invalid entry. Entry: 0x"; + throw Page_Fault_Exception(msg); + } + + if ((pte.r == 0) & (pte.w == 0) & (pte.x == 0)) + { + // Not a leaf node as rwx == 000 + if (i == 0) { - //If valid bit set, proceed to next level using new ppn form PTE. - DBGPRINT("PTE valid (ppn 0x%lx), continuing the walk...\n",ppn_1); + throw Page_Fault_Exception(" [RT:PTW] Page Fault : No leaf node found."); } else { - //If valid bit not set, allocate a second level page table - // in device memory and store ppn in PTE. Set rwx = 000 in PTE - //to indicate this is a pointer to the next level of the page table. - DBGPRINT("PTE Invalid (ppn 0x%lx), continuing the walk...\n",ppn_1); - ppn_1 = (alloc_1st_level_page_table(vpn_1) >> 12); - pte_bytes = ((ppn_1 << 10) | 0b0000000001) ; - assert((pte_addr>> 32) == 0 && "Upper 32 bits are not zero!"); - write_pte(pte_addr, pte_bytes); - // if (pte_bytes != read_pte(pte_addr)) - // DBGPRINT("Read/write values are different!\n"); - } - - - DBGPRINT("Move to first-level page table\n"); - //Read second level PTE. - pte_addr = get_pte_address(ppn_1, vpn_0); - pte_bytes = read_pte(pte_addr); - - if ( bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) - { - DBGPRINT("ERROR, shouldn't be here\n"); - exit(1); - //If valid bit is set, then the page is already allocated. - //Should not reach this point, a sanity check. - } - else - { - //If valid bit not set, write ppn of pAddr in PTE. Set rwx = 111 in PTE - //to indicate this is a leaf PTE and has the stated permissions. - pte_bytes = ( (ppn << 10) | 0b0000001111) ; - write_pte(pte_addr, pte_bytes); - if (pte_bytes != read_pte(pte_addr)) - DBGPRINT("Read/write values are different!\n"); + // Continue on to next level. + pt_ba = pte.ppn << 12; + DBGPRINT(" [RT:PTW] next pt_ba: %p\n", (void *)pt_ba); } + } + else + { + // Leaf node found, finished walking. + pt_ba = pte.ppn << 12; + DBGPRINT(" [RT:PTW] Found PT_Base_Address [%d] = %lx\n", i, pt_ba); + break; + } } - uint64_t page_table_walk(uint64_t vAddr_bits) - { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - DBGPRINT("PTW on vAddr: 0x%lx\n", vAddr_bits); - if (!need_trans(vAddr_bits)) - { - DBGPRINT("Translation is not needed.\n"); - return vAddr_bits; - } - uint64_t LEVELS = 2; - vAddr_SV32_t vAddr(vAddr_bits); - uint64_t pte_addr, pte_bytes; - uint64_t pt_ba = get_ptbr() << 12; - - //Get base page table. - - for ( int i = LEVELS-1 ; i >= 0 ; i--) - { - //Read PTE. - pte_addr = pt_ba+vAddr.vpn[i]*PTE_SIZE; - pte_bytes = read_pte(pte_addr); - PTE_SV32_t pte(pte_bytes); - DBGPRINT("pte_bytes = 0x%lx, pte flags = %u)\n", pte.ppn , pte.flags); - - //Check if it has invalid flag bits. - if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) - { - std::string msg= "Page Fault : Attempted to access invalid entry. Entry: 0x"; - throw Page_Fault_Exception(msg); - } - - if ( (pte.r == 0) & (pte.w == 0) & (pte.x == 0)) - { - //Not a leaf node as rwx == 000 - if (i == 0) - { - throw Page_Fault_Exception("Page Fault : No leaf node found."); - } - else - { - //Continue on to next level. - pt_ba = pte.ppn << 12; - DBGPRINT("next pt_ba: %p\n", (void *)pt_ba); - - } - } - else - { - //Leaf node found, finished walking. - pt_ba = pte.ppn << 12; - DBGPRINT("Found PT_Base_Address [%d] = %lx\n", i, pt_ba); - break; - } - - } - - // pte_bytes is final leaf - PTE_SV32_t pte(pte_bytes); - //Check RWX permissions according to access type. - if (pte.r == 0) - { - throw Page_Fault_Exception("Page Fault : TYPE LOAD, Incorrect permissions."); - } - - uint64_t paddr = pt_ba + vAddr.pgoff; - return paddr; + // pte_bytes is final leaf + PTE_SV32_t pte(pte_bytes); + // Check RWX permissions according to access type. + if (pte.r == 0) + { + throw Page_Fault_Exception(" [RT:PTW] Page Fault : TYPE LOAD, Incorrect permissions."); } - uint64_t alloc_2nd_level_page_table() { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - uint64_t addr=PAGE_TABLE_BASE_ADDR; - uint64_t size=1<<23; // 8MB !!!FIXME!!! - CHECK_ERR(this->mem_reserve(addr, size, VX_MEM_READ_WRITE), { - return err; - }); - init_page_table(addr); - return addr; + uint64_t paddr = pt_ba + vAddr.pgoff; + return paddr; + } + + uint64_t alloc_2nd_level_page_table() + { + uint64_t addr = PAGE_TABLE_BASE_ADDR; + uint64_t size = PT_TOTAL_SIZE; + CHECK_ERR(this->mem_reserve(addr, size, VX_MEM_READ_WRITE), { + return err; + }); + init_page_table(addr); + return addr; + } + uint64_t alloc_1st_level_page_table(uint64_t vpn_1) + { + uint64_t addr = PAGE_TABLE_BASE_ADDR + PT_SIZE * (1 + vpn_1); + init_page_table(addr); + return addr; + } + + // Initialize to zero the target page table area. 32bit 4K, 64bit 8K + void init_page_table(uint64_t addr) + { + uint64_t asize = aligned_size(PT_SIZE, CACHE_BLOCK_SIZE); + DBGPRINT(" [RT:init_page_table] (addr=0x%lx, size=0x%lx)\n", addr, asize); + uint8_t *src = new uint8_t[asize]; + for (uint64_t i = 0; i < PT_SIZE; ++i) + { + src[i] = 0; } - uint64_t alloc_1st_level_page_table(uint64_t vpn_1) { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - uint64_t addr = PAGE_TABLE_BASE_ADDR + PTE_SIZE * NUM_PTE_ENTRY*(1+vpn_1); - init_page_table(addr); - return addr; + ram_.enable_acl(false); + ram_.write((const uint8_t *)src, addr, asize); + ram_.enable_acl(true); + } + + // void read_page_table(uint64_t addr) { + // uint8_t *dest = new uint8_t[MEM_PAGE_SIZE]; + // download(dest, addr, MEM_PAGE_SIZE); + // DBGPRINT("VXDRV: download %d bytes from 0x%x\n", MEM_PAGE_SIZE, addr); + // for (int i = 0; i < MEM_PAGE_SIZE; i += 4) { + // DBGPRINT("mem-read: 0x%x -> 0x%x\n", addr + i, *(uint64_t*)((uint8_t*)dest + i)); + // } + // } + + void write_pte(uint64_t addr, uint64_t value = 0xbaadf00d) + { + DBGPRINT(" [RT:Write_pte] writing pte 0x%lx to pAddr: 0x%lx\n", value, addr); + uint8_t *src = new uint8_t[PTE_SIZE]; + for (uint64_t i = 0; i < PTE_SIZE; ++i) + { + src[i] = (value >> (i << 3)) & 0xff; } + // std::cout << "writing PTE to RAM addr 0x" << std::hex << addr << std::endl; + ram_.enable_acl(false); + ram_.write((const uint8_t *)src, addr, PTE_SIZE); + ram_.enable_acl(true); + } - void init_page_table(uint64_t addr) { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - DBGPRINT("int_page_table (addr=0x%lx)\n", addr); - uint64_t asize = aligned_size(RAM_PAGE_SIZE, CACHE_BLOCK_SIZE); - // uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); - uint8_t *src = new uint8_t[RAM_PAGE_SIZE]; - for (uint64_t i = 0; i < RAM_PAGE_SIZE; ++i) { - src[i] = 0; - } - ram_.enable_acl(false); - ram_.write((const uint8_t*)src, addr, asize); - ram_.enable_acl(true); - } + uint64_t read_pte(uint64_t addr) + { + uint8_t *dest = new uint8_t[PTE_SIZE]; +#ifdef XLEN_32 + uint64_t mask = 0x00000000FFFFFFFF; +#else // 64bit + uint64_t mask = 0xFFFFFFFFFFFFFFFF; +#endif - // void read_page_table(uint64_t addr) { - // uint8_t *dest = new uint8_t[RAM_PAGE_SIZE]; - // download(dest, addr, RAM_PAGE_SIZE); - // DBGPRINT("VXDRV: download %d bytes from 0x%x\n", RAM_PAGE_SIZE, addr); - // for (int i = 0; i < RAM_PAGE_SIZE; i += 4) { - // DBGPRINT("mem-read: 0x%x -> 0x%x\n", addr + i, *(uint64_t*)((uint8_t*)dest + i)); - // } - // } + ram_.read((uint8_t *)dest, addr, PTE_SIZE); + uint64_t ret = (*(uint64_t *)((uint8_t *)dest)) & mask; + DBGPRINT(" [RT:read_pte] reading PTE 0x%lx from RAM addr 0x%lx\n", ret, addr); - void write_pte(uint64_t addr, uint64_t value = 0xbaadf00d) { - DBGPRINT("[Write_pte] writing pte 0x%lx to pAddr: 0x%lx\n", value, addr); - uint8_t *src = new uint8_t[PTE_SIZE]; - for (uint64_t i = 0; i < PTE_SIZE; ++i) { - src[i] = (value >> (i << 3)) & 0xff; - } - //std::cout << "writing PTE to RAM addr 0x" << std::hex << addr << std::endl; - ram_.enable_acl(false); - ram_.write((const uint8_t*)src, addr, PTE_SIZE); - ram_.enable_acl(true); - } - - uint64_t read_pte(uint64_t addr) { - uint8_t *dest = new uint8_t[PTE_SIZE]; - uint64_t mask = 0; - if (XLEN == 32) - mask = 0x00000000FFFFFFFF; - else if (XLEN == 64) - mask = 0xFFFFFFFFFFFFFFFF; - else - assert(0 && "XLEN is not either 32 or 64"); - - ram_.read((uint8_t*)dest, addr, PTE_SIZE); - uint64_t ret = (*(uint64_t*)((uint8_t*)dest)) & mask; - DBGPRINT("[read_pte] reading PTE 0x%lx from RAM addr 0x%lx\n", ret, addr); - - return ret; - } + return ret; + } #endif // JAEWON private: - Arch arch_; - RAM ram_; - Processor processor_; - MemoryAllocator global_mem_; - DeviceConfig dcrs_; - std::future future_; + Arch arch_; + RAM ram_; + Processor processor_; + MemoryAllocator global_mem_; + DeviceConfig dcrs_; + std::future future_; std::unordered_map> mpm_cache_; #ifdef VM_ENABLE - std::unordered_map addr_mapping; + std::unordered_map addr_mapping; #endif }; diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index 98eefdaf2..eebd2cde3 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -21,6 +21,13 @@ #include using namespace vortex; +#ifdef VM_ENABLE +#ifndef NDEBUG +#define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0) +#else +#define DBGPRINT(format, ...) ((void)0) +#endif +#endif uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) { @@ -115,7 +122,6 @@ void MemoryUnit::ADecoder::map(uint64_t start, uint64_t end, MemDevice &md) { } void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) { - // printf("====%s (addr= 0x%lx, size= 0x%lx) ====\n", __PRETTY_FUNCTION__,addr,size); mem_accessor_t ma; if (!this->lookup(addr, size, &ma)) { std::cout << "lookup of 0x" << std::hex << addr << " failed.\n"; @@ -125,7 +131,6 @@ void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) { } void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) { - // printf("====%s====\n", __PRETTY_FUNCTION__); mem_accessor_t ma; if (!this->lookup(addr, size, &ma)) { std::cout << "lookup of 0x" << std::hex << addr << " failed.\n"; @@ -138,7 +143,9 @@ void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) MemoryUnit::MemoryUnit(uint64_t pageSize) : pageSize_(pageSize) +#ifndef VM_ENABLE , enableVM_(pageSize != 0) +#endif , amo_reservation_({0x0, false}) #ifdef VM_ENABLE , TLB_HIT(0) @@ -158,9 +165,9 @@ void MemoryUnit::attach(MemDevice &m, uint64_t start, uint64_t end) { decoder_.map(start, end, m); } + #ifdef VM_ENABLE std::pair MemoryUnit::tlbLookup(uint64_t vAddr, ACCESS_TYPE type, uint64_t* size_bits) { - // printf("====%s====\n", __PRETTY_FUNCTION__); //Find entry while accounting for different sizes. for (auto entry : tlb_) @@ -201,7 +208,7 @@ std::pair MemoryUnit::tlbLookup(uint64_t vAddr, ACCESS_TYPE type } //Check access permissions. - if ( (type == ACCESS_TYPE::FETCH) & ((e.r == 0) | (e.x == 0)) ) + if ( (type == ACCESS_TYPE::FENCE) & ((e.r == 0) | (e.x == 0)) ) { throw Page_Fault_Exception("Page Fault : Incorrect permissions."); } @@ -251,7 +258,7 @@ uint64_t MemoryUnit::toPhyAddr(uint64_t addr, uint32_t flagMask) { #ifdef VM_ENABLE void MemoryUnit::read(void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type) { - // printf("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT(" [MMU:read] 0x%lx, 0x%x, %u\n",addr,size,type); uint64_t pAddr; pAddr = vAddr_to_pAddr(addr, type); return decoder_.read(data, pAddr, size); @@ -264,7 +271,7 @@ void MemoryUnit::read(void* data, uint64_t addr, uint32_t size, bool sup) { #endif #ifdef VM_ENABLE void MemoryUnit::write(const void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type) { - // printf("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT(" [MMU:Write] 0x%lx, 0x%x, %u\n",addr,size,type); uint64_t pAddr; pAddr = vAddr_to_pAddr(addr, type); decoder_.write(data, pAddr, size); @@ -280,6 +287,7 @@ void MemoryUnit::write(const void* data, uint64_t addr, uint32_t size, bool sup) #ifdef VM_ENABLE void MemoryUnit::amo_reserve(uint64_t addr) { + DBGPRINT(" [MMU:amo_reserve] 0x%lx\n",addr); uint64_t pAddr = this->vAddr_to_pAddr(addr,ACCESS_TYPE::LOAD); amo_reservation_.addr = pAddr; amo_reservation_.valid = true; @@ -294,6 +302,7 @@ void MemoryUnit::amo_reserve(uint64_t addr) { #ifdef VM_ENABLE bool MemoryUnit::amo_check(uint64_t addr) { + DBGPRINT(" [MMU:amo_check] 0x%lx\n",addr); uint64_t pAddr = this->vAddr_to_pAddr(addr, ACCESS_TYPE::LOAD); return amo_reservation_.valid && (amo_reservation_.addr == pAddr); } @@ -593,30 +602,30 @@ void RAM::loadHexImage(const char* filename) { #ifdef VM_ENABLE bool MemoryUnit::need_trans(uint64_t dev_pAddr) -{ - // Check if the this is the BARE mode - bool isBAREMode = (this->mode == VA_MODE::BARE); - // Check if the address is reserved - bool isReserved = (dev_pAddr >= PAGE_TABLE_BASE_ADDR); - // Check if the address falls within the startup address range - bool isStartAddress = (STARTUP_ADDR <= dev_pAddr) && (dev_pAddr < (STARTUP_ADDR + 0x40000)); - - // Print the boolean results for debugging purposes - // printf("0x%lx, %u, %u, %u \n", dev_pAddr,isBAREMode, isReserved, isStartAddress); - - // Return true if the address needs translation (i.e., it's not reserved and not a start address) - return (!isBAREMode && !isReserved && !isStartAddress); -} + { + // Check if the this is the BARE mode + bool isBAREMode = (this->mode == VA_MODE::BARE); + // Check if the address is reserved for system usage + bool isReserved = (dev_pAddr >= PAGE_TABLE_BASE_ADDR); + // Check if the address is reserved for IO usage + bool isIO= (dev_pAddr < USER_BASE_ADDR); + // Check if the address falls within the startup address range + bool isStartAddress = (STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)); + + // Print the boolean results for debugging purposes + // printf("%p, %u, %u\n", (void *)dev_pAddr, isReserved, isStartAddress); + // Return true if the address needs translation (i.e., it's not reserved and not a start address) + return (!isBAREMode && !isReserved && !isIO && !isStartAddress); + } uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type) { uint64_t pfn; uint64_t size_bits; - // printf("====%s====\n", __PRETTY_FUNCTION__); - // printf("vaddr = 0x%lx, type = 0x%u\n",vAddr,type); + DBGPRINT(" [MMU: V2P] vaddr = 0x%lx, type = 0x%u\n",vAddr,type); if (!need_trans(vAddr)) { - // printf("Translation is not needed.\n"); + DBGPRINT(" [MMU: V2P] Translation is not needed.\n"); return vAddr; } @@ -640,18 +649,18 @@ uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type) } //Construct final address using pfn and offset. - // std::cout << "[MemoryUnit] translated vAddr: 0x" << std::hex << vAddr << " to pAddr: 0x" << std::hex << ((pfn << size_bits) + (vAddr & ((1 << size_bits) - 1))) << std::endl; + DBGPRINT(" [MMU: V2P] translated vAddr: 0x%lx to pAddr 0x%lx",vAddr,((pfn << size_bits) + (vAddr & ((1 << size_bits) - 1)))); return (pfn << size_bits) + (vAddr & ((1 << size_bits) - 1)); } std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits) { - // printf("====%s====\n", __PRETTY_FUNCTION__); - // printf("vaddr = 0x%lx, type = %u, size_bits %lu\n", vAddr_bits, type, *size_bits); + DBGPRINT(" [MMU:PTW] Start: vaddr = 0x%lx, type = %u, size_bits %lu\n", vAddr_bits, type, *size_bits); uint64_t LEVELS = 2; vAddr_SV32_t vAddr(vAddr_bits); uint64_t pte_bytes = 0; + uint64_t pte_addr =0; //Get base page table. uint64_t pt_ba = this->ptbr << 12; int i = LEVELS - 1; @@ -660,14 +669,15 @@ std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, AC { //Read PTE. - decoder_.read(&pte_bytes, pt_ba+vAddr.vpn[i]*PTE_SIZE, PTE_SIZE); + pte_addr = pt_ba+vAddr.vpn[i] * PTE_SIZE; + decoder_.read(&pte_bytes, pte_addr, PTE_SIZE); PTE_SV32_t pte(pte_bytes); + DBGPRINT(" [MMU:PTW] Level[%u] pte_bytes = 0x%lx, pte flags = %u)\n", i, pte.ppn , pte.flags); //Check if it has invalid flag bits. if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) { - printf("Error: PTE FLAGS=0x%x\n",pte.flags); - throw Page_Fault_Exception("Page Fault : Attempted to access invalid entry."); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : Attempted to access invalid entry."); } if ( (pte.r == 0) & (pte.w == 0) & (pte.x == 0)) @@ -676,8 +686,7 @@ std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, AC i--; if (i < 0) { - printf("Error: PTE FLAGS=0x%x\n",pte.flags); - throw Page_Fault_Exception("Page Fault : No leaf node found."); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : No leaf node found."); } else { @@ -696,35 +705,35 @@ std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, AC PTE_SV32_t pte(pte_bytes); //Check RWX permissions according to access type. - if ( (type == ACCESS_TYPE::FETCH) & ((pte.r == 0) | (pte.x == 0)) ) + if ( (type == ACCESS_TYPE::FENCE) & ((pte.r == 0) | (pte.x == 0)) ) { - printf("Error: PTE FLAGS=0x%x\n",pte.flags); - throw Page_Fault_Exception("Page Fault : TYPE FETCH, Incorrect permissions."); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE FENCE, Incorrect permissions."); } else if ( (type == ACCESS_TYPE::LOAD) & (pte.r == 0) ) { - printf("Error: PTE FLAGS=0x%x\n",pte.flags); - throw Page_Fault_Exception("Page Fault : TYPE LOAD, Incorrect permissions."); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE LOAD, Incorrect permissions."); } else if ( (type == ACCESS_TYPE::STORE) & (pte.w == 0) ) { - printf("Error: PTE FLAGS=0x%x\n",pte.flags); - throw Page_Fault_Exception("Page Fault : TYPE STORE, Incorrect permissions."); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE STORE, Incorrect permissions."); } *size_bits = 12; uint64_t pfn = pt_ba >> *size_bits; return std::make_pair(pfn, pte_bytes & 0xff); } - -uint32_t MemoryUnit::get_satp() +uint64_t MemoryUnit::get_satp() { return satp; } -void MemoryUnit::set_satp(uint32_t satp) +void MemoryUnit::set_satp(uint64_t satp) { this->satp = satp; - this->ptbr = satp & 0x003fffff; //22 bits - this->mode = satp & 0x80000000 ? VA_MODE::SV32 : VA_MODE::BARE; + this->ptbr = satp & ( (1<< SATP_PPN_WIDTH) - 1); +#ifdef XLEN_32 + this->mode = satp & (1<< SATP_MODE_IDX) ? VA_MODE::SV32 : VA_MODE::BARE; +#else // 64 bit + this->mode = satp & (1<< SATP_MODE_IDX) ? VA_MODE::SV64 : VA_MODE::BARE; +#endif } #endif \ No newline at end of file diff --git a/sim/common/mem.h b/sim/common/mem.h index a655a6d3c..4b7744c2b 100644 --- a/sim/common/mem.h +++ b/sim/common/mem.h @@ -34,13 +34,14 @@ namespace vortex { #ifdef VM_ENABLE enum VA_MODE { BARE, - SV32 + SV32, + SV64 }; enum ACCESS_TYPE { LOAD, STORE, - FETCH + FENCE }; class Page_Fault_Exception : public std::runtime_error /* or logic_error */ @@ -117,7 +118,7 @@ public: }; #ifdef VM_ENABLE - MemoryUnit(uint64_t pageSize = PAGE_TABLE_SIZE); + MemoryUnit(uint64_t pageSize = MEM_PAGE_SIZE); #else MemoryUnit(uint64_t pageSize = 0); #endif @@ -138,8 +139,8 @@ public: #ifdef VM_ENABLE void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits); - uint32_t get_satp(); - void set_satp(uint32_t satp); + uint64_t get_satp(); + void set_satp(uint64_t satp); #else void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags); #endif @@ -238,14 +239,16 @@ private: std::unordered_map tlb_; uint64_t pageSize_; ADecoder decoder_; +#ifndef VM_ENABLE bool enableVM_; +#endif amo_reservation_t amo_reservation_; #ifdef VM_ENABLE - uint32_t satp; + uint64_t satp; VA_MODE mode; - uint32_t ptbr; + uint64_t ptbr; std::unordered_set unique_translations; uint64_t TLB_HIT, TLB_MISS, TLB_EVICT, PTW, PERF_UNIQUE_PTW; @@ -380,7 +383,7 @@ class vAddr_SV32_t vpn[0] = bits(address,12,21); vpn[1] = bits(address,22,31); pgoff = bits(address,0,11); - // printf("vpn[0] = 0x%lx, vpn[1] = 0x%lx, pgoff = 0x%lx\n",vpn[0],vpn[1],pgoff); + // printf("vpn[1] = 0x%lx, vpn[0] = 0x%lx, pgoff = 0x%lx\n",vpn[1],vpn[0],pgoff); } }; #endif diff --git a/sim/simx/cluster.cpp b/sim/simx/cluster.cpp index 2ca12f411..cb6c3c9d6 100644 --- a/sim/simx/cluster.cpp +++ b/sim/simx/cluster.cpp @@ -107,7 +107,7 @@ void Cluster::attach_ram(RAM* ram) { } #ifdef VM_ENABLE -void Cluster::set_satp(uint32_t satp) { +void Cluster::set_satp(uint64_t satp) { for (auto& socket : sockets_) { socket->set_satp(satp); } diff --git a/sim/simx/cluster.h b/sim/simx/cluster.h index 113ac04f7..df96031c3 100644 --- a/sim/simx/cluster.h +++ b/sim/simx/cluster.h @@ -58,7 +58,7 @@ public: void attach_ram(RAM* ram); #ifdef VM_ENABLE - void set_satp(uint32_t satp); + void set_satp(uint64_t satp); #endif bool running() const; diff --git a/sim/simx/core.cpp b/sim/simx/core.cpp index 29d77f5df..efaa19133 100644 --- a/sim/simx/core.cpp +++ b/sim/simx/core.cpp @@ -398,7 +398,7 @@ void Core::attach_ram(RAM* ram) { } #ifdef VM_ENABLE -void Core::set_satp(uint32_t satp) { +void Core::set_satp(uint64_t satp) { emulator_.set_satp(satp); //JAEWON wit, tid??? // emulator_.set_csr(VX_CSR_SATP,satp,0,0); //JAEWON wit, tid??? } diff --git a/sim/simx/core.h b/sim/simx/core.h index 6d305f7e2..42f72e552 100644 --- a/sim/simx/core.h +++ b/sim/simx/core.h @@ -98,7 +98,7 @@ public: void attach_ram(RAM* ram); #ifdef VM_ENABLE - void set_satp(uint32_t satp); + void set_satp(uint64_t satp); #endif bool running() const; diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 63473cfd8..a15bdae43 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -270,7 +270,7 @@ bool Emulator::barrier(uint32_t bar_id, uint32_t count, uint32_t wid) { #ifdef VM_ENABLE void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) { - DPH(3, "*** icache_read 0x" << std::hex << addr << ", size = 0x " << size); + // DP(1, "*** icache_read 0x" << std::hex << addr << ", size = 0x " << size); try { @@ -289,7 +289,7 @@ void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) { #endif #ifdef VM_ENABLE -void Emulator::set_satp(uint32_t satp) { +void Emulator::set_satp(uint64_t satp) { DPH(3, "set satp 0x" << std::hex << satp << " in emulator module\n"); set_csr(VX_CSR_SATP,satp,0,0); } @@ -298,6 +298,7 @@ void Emulator::set_satp(uint32_t satp) { #ifdef VM_ENABLE void Emulator::dcache_read(void *data, uint64_t addr, uint32_t size) { + DP(1, "*** dcache_read 0x" << std::hex << addr << ", size = 0x " << size); auto type = get_addr_type(addr); if (type == AddrType::Shared) { core_->local_mem()->read(data, addr, size); diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index 15708f3c4..f9e250768 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -40,7 +40,7 @@ public: void attach_ram(RAM* ram); #ifdef VM_ENABLE - void set_satp(uint32_t satp) ; + void set_satp(uint64_t satp) ; #endif instr_trace_t* step(); diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index 3ae99fa4e..305fb410b 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -83,7 +83,7 @@ void ProcessorImpl::attach_ram(RAM* ram) { } } #ifdef VM_ENABLE -void ProcessorImpl::set_satp(uint32_t satp) { +void ProcessorImpl::set_satp(uint64_t satp) { for (auto cluster : clusters_) { cluster->set_satp(satp); } @@ -151,12 +151,12 @@ void Processor::dcr_write(uint32_t addr, uint32_t value) { } #ifdef VM_ENABLE -uint32_t Processor::get_satp() { +uint64_t Processor::get_satp() { // std::cout << "get SATP: 0x" << std::hex << this->satp << std::endl; return this->satp; } -void Processor::set_satp(uint32_t satp) { +void Processor::set_satp(uint64_t satp) { impl_->set_satp(satp); this->satp = satp; } diff --git a/sim/simx/processor.h b/sim/simx/processor.h index e22f11569..d2b575421 100644 --- a/sim/simx/processor.h +++ b/sim/simx/processor.h @@ -34,14 +34,14 @@ public: void dcr_write(uint32_t addr, uint32_t value); #ifdef VM_ENABLE - uint32_t get_satp(); - void set_satp(uint32_t satp); + uint64_t get_satp(); + void set_satp(uint64_t satp); #endif private: ProcessorImpl* impl_; #ifdef VM_ENABLE - uint32_t satp; + uint64_t satp; #endif }; diff --git a/sim/simx/processor_impl.h b/sim/simx/processor_impl.h index e6e9a4cf1..511c0cad6 100644 --- a/sim/simx/processor_impl.h +++ b/sim/simx/processor_impl.h @@ -40,8 +40,7 @@ public: void dcr_write(uint32_t addr, uint32_t value); #ifdef VM_ENABLE - // 32bit satp - void set_satp(uint32_t satp); + void set_satp(uint64_t satp); #endif PerfStats perf_stats() const; diff --git a/sim/simx/socket.cpp b/sim/simx/socket.cpp index 4fa3636e1..afda924d8 100644 --- a/sim/simx/socket.cpp +++ b/sim/simx/socket.cpp @@ -108,7 +108,7 @@ void Socket::attach_ram(RAM* ram) { } #ifdef VM_ENABLE -void Socket::set_satp(uint32_t satp) { +void Socket::set_satp(uint64_t satp) { for (auto core : cores_) { core->set_satp(satp); } diff --git a/sim/simx/socket.h b/sim/simx/socket.h index a09f73e8b..104d53292 100644 --- a/sim/simx/socket.h +++ b/sim/simx/socket.h @@ -61,7 +61,7 @@ public: void attach_ram(RAM* ram); #ifdef VM_ENABLE - void set_satp(uint32_t satp); + void set_satp(uint64_t satp); #endif bool running() const; From 3d98121ab6d2e6e430a1877469a98a4a15335162 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sun, 23 Jun 2024 11:24:10 -0400 Subject: [PATCH 020/488] Update README.md --- README.md | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 3994bf942..bab81ddcb 100644 --- a/README.md +++ b/README.md @@ -56,8 +56,8 @@ More detailed build instructions can be found [here](docs/install_vortex.md). ``` ### Install Vortex codebase ``` - git clone --depth=1 --recursive git@github.com:gthparch/vortex_vm.git - cd vortex_vm + git clone --depth=1 --recursive git@github.com:vortexgpgpu/vortex.git -b vortex_vm + cd vortex ``` ### Configure your build folder @@ -69,21 +69,18 @@ More detailed build instructions can be found [here](docs/install_vortex.md). export OUT_DIR=`pwd`/out cd build ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-05-22 --prefix=$OUT_DIR -### Ignore the commit for ramulator when it is compiled - # Please add ignore = dirty entry on .gitmodules - [submodule "third_party/ramulator"] - path = third_party/ramulator - url = https://github.com/CMU-SAFARI/ramulator.git - ignore = dirty + ### Install prebuilt toolchain # We will use the precomipled tools in volvo toolchanin directory + ### set environment variables # should always run before using the toolchain! source ./ci/toolchain_env.sh + ### Building Vortex make -s + ### Quick demo running vecadd OpenCL kernel on 2 cores -<<<<<<< HEAD $ ./ci/blackbox.sh --cores=2 --app=vecadd ### Common Developer Tips From 5b0fc8cbd43c813ced9790b287e8444ab619606b Mon Sep 17 00:00:00 2001 From: Nayan Sivakumar Nair Date: Tue, 25 Jun 2024 03:18:50 -0400 Subject: [PATCH 021/488] Fixes for PR --- ci/blackbox.sh | 3 --- ci/regression.sh.in | 4 ++- hw/rtl/VX_config.vh | 8 +++--- hw/rtl/VX_types.vh | 2 +- kernel/include/vx_intrinsics.h | 12 ++++----- run_final.sh | 22 ----------------- runtime/simx/vortex.cpp | 12 ++++----- sim/simx/decode.cpp | 6 ++--- sim/simx/emulator.cpp | 10 +++++++- sim/simx/emulator.h | 3 ++- sim/simx/execute.cpp | 24 +++++++----------- sim/simx/func_unit.cpp | 1 - sim/simx/func_unit.h | 1 - sim/simx/main.cpp | 2 -- tests/regression/matmul/Makefile | 2 +- tests/regression/matmul/kernel.cpp | 8 +++--- tests/regression/matmul/main.cpp | 22 +++-------------- tests/regression/matmul/matmul_regression.sh | 26 ++++++++++++++++++++ 18 files changed, 77 insertions(+), 91 deletions(-) delete mode 100755 run_final.sh create mode 100755 tests/regression/matmul/matmul_regression.sh diff --git a/ci/blackbox.sh b/ci/blackbox.sh index defad4c05..8bcb120f3 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -48,8 +48,6 @@ PERF_CLASS=0 REBUILD=2 TEMPBUILD=0 LOGFILE=run.log -TC_SIZE=567 -TC_NUM=123 for i in "$@" do @@ -182,7 +180,6 @@ then fi CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS $L2 $L3 $PERF_FLAG $CONFIGS" -# CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DTC_NUM=$TC_NUM -DTC_SIZE=$TC_SIZE $L2 $L3 $PERF_FLAG $CONFIGS" echo "CONFIGS=$CONFIGS" if [ $REBUILD -ne 0 ] diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 50d309af6..3c89ac996 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -124,7 +124,9 @@ regression() # test local barrier ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-n1 -tbar" - + + # test for matmul + CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1" echo "regression tests done!" } diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 651234768..ef9306503 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -111,20 +111,20 @@ `endif `define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE) +// Size of Tensor Core `ifndef TC_SIZE -`define TC_SIZE 4 +`define TC_SIZE 8 `endif +// Number of TCs per Warp `ifndef TC_NUM -`define TC_NUM 1 +`define TC_NUM 4 `endif -// Number of TCU units `ifndef NUM_TCU_LANES `define NUM_TCU_LANES `TC_NUM `endif -// Number of TCU units `ifndef NUM_TCU_BLOCKS `define NUM_TCU_BLOCKS `ISSUE_WIDTH `endif diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index 9a8f93234..23fb16904 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -196,7 +196,7 @@ `define VX_CSR_NUM_CORES 12'hFC2 `define VX_CSR_LOCAL_MEM_BASE 12'hFC3 -`define VX_MAT_MUL_SIZE 12'hFC4 +`define VX_MAT_MUL_SIZE 12'hFC4 // VX_MAT_MUL_SIZE = Matrix Size / TC Size `define VX_TC_NUM 12'hFC5 `define VX_TC_SIZE 12'hFC6 diff --git a/kernel/include/vx_intrinsics.h b/kernel/include/vx_intrinsics.h index b67a770da..5d16d44da 100644 --- a/kernel/include/vx_intrinsics.h +++ b/kernel/include/vx_intrinsics.h @@ -222,21 +222,19 @@ inline void vx_fence() { } //Matrix load -//Converted instruction type cause destination registers were not getiing blocked otherwise -inline void mload(unsigned dest, unsigned addr) +inline void vx_matrix_load(unsigned dest, unsigned addr) { asm volatile (".insn i 0x7b, 0, x0, %0(%1)" :: "i"(dest), "r"(addr)); } -//mat store -inline void ms(unsigned addr) +//Matrix Store +inline void vx_matrix_store(unsigned addr) { asm volatile (".insn i 0x7b, 1, x0, 0(%0)" :: "r"(addr)); } -//mat mul -//num tiles along reduced K dimension of matmul as imm value (can use rd,rs field to expand range of n_tiles from 12 bits) -inline void mm() +//Matrix Mul +inline void vx_matrix_mul() { asm volatile (".insn i 0x7b, 2, x0, 0(x0)"); } diff --git a/run_final.sh b/run_final.sh deleted file mode 100755 index 5f618dc64..000000000 --- a/run_final.sh +++ /dev/null @@ -1,22 +0,0 @@ -# Define arrays for threads, warps, and matrix sizes -matrix_sizes=(16 32 64 128 256 512) -tcsizes=(8 16 32) -tcnums=(4 8 16 32) -#lsulanes=(4 16) -#cores=(32) - - -# Loop through each combination of threads and warps -for size in "${matrix_sizes[@]}"; do - sed -i "s/OPTS ?= -n[0-9]\+/OPTS ?= -n${size}/" ../tests/regression/matmul/Makefile - sed -i "s/OPTS ?= -n[0-9]\+/OPTS ?= -n${size}/" tests/regression/matmul/Makefile - echo "Matrix size changed to ${size} in Makefile" - for tcsize in "${tcsizes[@]}"; do - for tcnum in "${tcnums[@]}"; do - log_name="sim_final/mat${size}/tcsize${tcsize}_tcnum${tcnum}_32w32t" - command="./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --tc_size=${tcsize} --tc_num=${tcnum} --rebuild=1 --perf=1 > ${log_name} 2>&1" - echo "$command" - eval "$command" - done - done -done diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 4210ab0b6..5ab5e14f5 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -69,12 +69,12 @@ public: case VX_CAPS_NUM_CORES: _value = NUM_CORES * NUM_CLUSTERS; break; - // case VX_CAPS_TC_SIZE: - // _value = TC_SIZE; - // break; - // case VX_CAPS_TC_NUM: - // _value = TC_NUM; - // break; + case VX_CAPS_TC_SIZE: + _value = TC_SIZE; + break; + case VX_CAPS_TC_NUM: + _value = TC_NUM; + break; case VX_CAPS_CACHE_LINE_SIZE: _value = CACHE_BLOCK_SIZE; break; diff --git a/sim/simx/decode.cpp b/sim/simx/decode.cpp index 4d8d0a105..21d0e61dd 100644 --- a/sim/simx/decode.cpp +++ b/sim/simx/decode.cpp @@ -410,9 +410,9 @@ static const char* op_string(const Instr &instr) { case Opcode::TCU: switch(func3) { - case 0: return "ML"; // - case 1: return "MS"; // - case 2: return "MATMUL"; + case 0: return "ML"; // Matrix Load + case 1: return "MS"; // Matrix Store + case 2: return "MATMUL"; // Matrix Multiply default: std::abort(); } diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index d2faf7f98..0dc8a06c4 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -74,7 +74,10 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core) , core_(core) , warps_(arch.num_warps(), arch) , barriers_(arch.num_barriers(), 0) - , scratchpad(std::vector(32 * 32 * 32768)) //Fix this : Max TC_SIZE = 32 + // Currently, tradeoff between scratchpad size & performance has not been evaluated. Scratchpad is + // considered to be big enough to hold input tiles for one output tile. + // In future versions, scratchpad size should be fixed to an appropriate value. + , scratchpad(std::vector(32 * 32 * 32768)) { this->clear(); } @@ -360,6 +363,11 @@ Word Emulator::get_tc_size() return tc_size; } +Word Emulator::get_tc_num() +{ + return tc_num; +} + Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { auto core_perf = core_->perf_stats(); switch (addr) { diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index 743c2786e..fe3aadf81 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -56,7 +56,8 @@ public: Word get_tiles(); Word get_tc_size(); - + Word get_tc_num(); + private: struct ipdom_entry_t { diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index 0dfd72a0f..20025f40b 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -1429,8 +1429,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { uint32_t n_tiles = this->get_csr(VX_MAT_MUL_SIZE, 0, wid); //CSR instruction before MLOAD will ensure that this csr has value int num_data_per_thread; int num_data_per_thread_st; - int num_threads_actv; - int num_threads_actv_st; + uint32_t num_threads_actv; + uint32_t num_threads_actv_st; uint32_t data_bytes_load; uint32_t data_bytes_store; uint32_t num_threads_per_tc = MAX (1, num_threads/TC_per_warp); @@ -1506,7 +1506,6 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { auto trace_data = std::make_shared(num_threads); trace->data = trace_data; - uint32_t accu_offset = (n_tiles)*(n_tiles)*(n_tiles)*tc_size*tc_size*2; for (uint32_t t = thread_start; t < num_threads_actv_st; ++t) { @@ -1521,12 +1520,6 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { //Store C for (int n=0; n csr (TODO :: removed intermediate CSR stage ; incorporate limited scratchmad implementation) - //core_->set_csr(csr_addr[(2*num_data_per_thread) + n], scratchpad[(n_tiles*tc_size*tc_size*2) + (t*num_data_per_thread) + n], t, warp_id_); Word* temp_ref = &(warp.ireg_file.at(t).at(rsrc0)); *temp_ref = scratchpad[(n_tiles*tc_size*tc_size*2) + (t*num_data_per_thread_st) + n]; @@ -1534,7 +1527,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { } } //Clear the scratchpad - for(int i =0 ; i < scratchpad.size(); i++) + for(long unsigned int i=0 ; i < scratchpad.size(); i++) { scratchpad[i] = 0; } @@ -1545,7 +1538,6 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { DP(4, "TCU MULTIPLY MAT"); trace->fu_type = FUType::TCU; trace->tcu_type = TCUType::TCU_MUL; - uint32_t accu_offset = (n_tiles)*(n_tiles)*(n_tiles)*tc_size*tc_size*2; uint32_t threads_per_tc = MAX (1, num_threads/TC_per_warp); for (uint32_t t = thread_start; t < num_threads_actv; ++t) { @@ -1556,12 +1548,14 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { //TC operation [only 1 thread in 1 warp needs to do this] if (t%threads_per_tc == 0) { - //TODO : change to systolic array implementation - uint32_t thread_offset = t*(tc_size*tc_size); - int loop_offset = 0; - int offset_b = n_tiles*n_tiles*n_tiles*tc_size*tc_size; /* // TODO : Fix needed for functional correctness + // TODO : change to systolic array implementation + uint32_t thread_offset = t*(tc_size*tc_size); + + int loop_offset = 0; + int offset_b = n_tiles*n_tiles*n_tiles*tc_size*tc_size; + uint32_t accu_offset = (n_tiles)*(n_tiles)*(n_tiles)*tc_size*tc_size*2; for(int tiles = 0 ; tiles < n_tiles ; tiles++) //What's the HW implication of this?? A counter implementation? { for (int i = 0; i < tc_size; i++) { //ROW-1 diff --git a/sim/simx/func_unit.cpp b/sim/simx/func_unit.cpp index f53a1fb22..8acbfddeb 100644 --- a/sim/simx/func_unit.cpp +++ b/sim/simx/func_unit.cpp @@ -255,7 +255,6 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) { TcuUnit::TcuUnit(const SimContext& ctx, Core* core) : FuncUnit(ctx, core, "TCU") - // , tc_size (core_->arch().tc_size()) {} void TcuUnit::tick() { diff --git a/sim/simx/func_unit.h b/sim/simx/func_unit.h index a7f182efe..cf119a5c3 100644 --- a/sim/simx/func_unit.h +++ b/sim/simx/func_unit.h @@ -103,7 +103,6 @@ private: class TcuUnit : public FuncUnit { public: TcuUnit(const SimContext& ctx, Core*); - // uint64_t tc_size; void tick(); }; diff --git a/sim/simx/main.cpp b/sim/simx/main.cpp index 9031a0a02..0f61de6f4 100644 --- a/sim/simx/main.cpp +++ b/sim/simx/main.cpp @@ -35,8 +35,6 @@ static void show_usage() { uint32_t num_threads = NUM_THREADS; uint32_t num_warps = NUM_WARPS; uint32_t num_cores = NUM_CORES; -uint32_t tc_size = TC_SIZE; -uint32_t tc_num = TC_NUM; bool showStats = false; const char* program = nullptr; diff --git a/tests/regression/matmul/Makefile b/tests/regression/matmul/Makefile index 0ef207194..7f1c48523 100644 --- a/tests/regression/matmul/Makefile +++ b/tests/regression/matmul/Makefile @@ -9,6 +9,6 @@ SRCS := $(SRC_DIR)/main.cpp VX_SRCS := $(SRC_DIR)/kernel.cpp -OPTS ?= -n512 -d1 -s4 -t4 +OPTS ?= -n128 -d1 include ../common.mk diff --git a/tests/regression/matmul/kernel.cpp b/tests/regression/matmul/kernel.cpp index a4585fb53..b0b4753c7 100644 --- a/tests/regression/matmul/kernel.cpp +++ b/tests/regression/matmul/kernel.cpp @@ -107,15 +107,15 @@ void kernel_body(kernel_arg_t* __UNIFORM__ arg) { csr_write(VX_TC_NUM,TC_per_warp); csr_write(VX_TC_SIZE,tc_size); - mload (0, a_addr_base); - mload (1, b_addr_base); + vx_matrix_load (0, a_addr_base); + vx_matrix_load (1, b_addr_base); //In case of multiple threads - sync load vx_fence(); - mm(); //Assuming padding to ensure matrix size is a multiple of tc_size + vx_matrix_mul(); //Assuming padding to ensure matrix size is a multiple of tc_size vx_fence(); if (((task_id%num_tasks_per_warp)/num_tasks_per_thread) < thread_limit_c) - ms(c_addr_base); + vx_matrix_store(c_addr_base); //In case of multiple threads - sync store vx_fence(); } diff --git a/tests/regression/matmul/main.cpp b/tests/regression/matmul/main.cpp index b2238bf5a..9b3465c52 100644 --- a/tests/regression/matmul/main.cpp +++ b/tests/regression/matmul/main.cpp @@ -21,8 +21,6 @@ const char* kernel_file = "kernel.vxbin"; uint32_t matrix_size = 0; -uint32_t tc_num = 4; -uint32_t TC_size = 8; vx_device_h device = nullptr; vx_buffer_h A_buffer = nullptr; @@ -41,7 +39,7 @@ static void show_usage() { static void parse_args(int argc, char **argv, uint32_t &data_size) { int c; - while ((c = getopt(argc, argv, "n:k:d:t:s:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:d:h?")) != -1) { switch (c) { case 'n': matrix_size = atoi(optarg); @@ -52,12 +50,6 @@ static void parse_args(int argc, char **argv, uint32_t &data_size) { case 'd': data_size = atoi(optarg); break; - case 't': - tc_num = atoi(optarg); - break; - case 's': - TC_size = atoi(optarg); - break; case 'h': case '?': { show_usage(); @@ -151,21 +143,15 @@ int main(int argc, char *argv[]) { RT_CHECK(vx_dev_open(&device)); uint64_t num_cores, num_warps, num_threads; - uint32_t tc_size, TC_per_warp; + uint64_t tc_size, TC_per_warp; RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_CORES, &num_cores)); RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_WARPS, &num_warps)); RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_THREADS, &num_threads)); - std::cout << "Debug :: tc_size (optarg) = " << TC_size << std::endl; - std::cout << "Debug :: tc_num (optarg) = " << tc_num << std::endl; - //Add assert/knob - tc_size = TC_size; - TC_per_warp = tc_num; - - // RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_SIZE, &tc_size)); - // RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_NUM, &TC_per_warp)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_SIZE, &tc_size)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_NUM, &TC_per_warp)); std::cout << "Debug :: tc_size = " << tc_size << std::endl; std::cout << "Debug :: tc_num = " << TC_per_warp << std::endl; diff --git a/tests/regression/matmul/matmul_regression.sh b/tests/regression/matmul/matmul_regression.sh new file mode 100755 index 000000000..8d35fcfd3 --- /dev/null +++ b/tests/regression/matmul/matmul_regression.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# README: +# This script launches a sweep of TC_SIZE, TC_NUM and MATRIX SIZES +# default values of NUM_WARPS=32, NUM_THREADS=32, NUM_CORES=4, DATA_SIZE=1 +# Edit matrix_sizes, tcsizes & tcnums variables to vary the sweep limits + +# Define arrays for tc_size,tc_num and matrix sizes +matrix_sizes=(16 32 64 128 256 512) +tcsizes=(8 16 32) +tcnums=(4 8 16 32) + +cd ../../../build/ + +# Loop through each combination of above configs +for size in "${matrix_sizes[@]}"; do + for tcsize in "${tcsizes[@]}"; do + for tcnum in "${tcnums[@]}"; do + mkdir -p sim_final/mat${size} + log_name="sim_final/mat${size}/tcsize${tcsize}_tcnum${tcnum}_32w32t" + cmd="CONFIGS=\"-DTC_NUM=${tcnum} -DTC_SIZE=${tcsize}\" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args=\"-n${size} -d1\" --rebuild=1 --perf=1 > ${log_name} 2>&1" + echo $cmd + eval $cmd + done + done +done From 5e63b8f35ac3b695d574fde4ad0a280ecbe1b83a Mon Sep 17 00:00:00 2001 From: Nayan Sivakumar Nair Date: Tue, 25 Jun 2024 23:27:18 -0400 Subject: [PATCH 022/488] dummy commit --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 6eeb1ccfa..a1593a67a 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ Vortex is a full-stack open-source RISC-V GPGPU. ## Specifications - Support RISC-V RV32IMAF and RV64IMAFD + - Microarchitecture: - configurable number of cores, warps, and threads. - configurable number of ALU, FPU, LSU, and SFU units per core. From 4ab015ddd9b921a2ada6f7d53b95ae49c2c0ac99 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Fri, 28 Jun 2024 09:48:04 -0400 Subject: [PATCH 023/488] Update README.md Update TOOLDIR to vortex-toolchain-2024-6-14/ --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bab81ddcb..704883e30 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ More detailed build instructions can be found [here](docs/install_vortex.md). mkdir out export OUT_DIR=`pwd`/out cd build - ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-05-22 --prefix=$OUT_DIR + ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-6-14 --prefix=$OUT_DIR ### Install prebuilt toolchain # We will use the precomipled tools in volvo toolchanin directory From d531fa6b26816e53066ccff6e7145bed35f8766d Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sat, 29 Jun 2024 17:43:20 -0400 Subject: [PATCH 024/488] 64bit support --- .gitignore | 3 +- hw/rtl/VX_config.vh | 48 +-- runtime/common/common.h | 2 +- runtime/common/malloc.h | 26 +- runtime/simx/vortex.cpp | 482 +++++++++++++--------------- sim/common/mem.cpp | 176 +++++----- sim/common/mem.h | 232 ++++++++++--- sim/simx/emulator.cpp | 9 +- sim/simx/main.cpp | 2 +- sim/simx/processor.cpp | 23 +- sim/simx/processor.h | 10 +- tests/regression/diverge/kernel.cpp | 2 +- 12 files changed, 572 insertions(+), 443 deletions(-) diff --git a/.gitignore b/.gitignore index d1571b535..ca68f0eb2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /build* -/.vscode \ No newline at end of file +/.vscode +*.code-workspace diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 4ff4dc9eb..5dbcb96b4 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -164,7 +164,7 @@ `endif `ifndef STARTUP_ADDR -`define STARTUP_ADDR 64'h080000000 +`define STARTUP_ADDR 64'h180000000 `endif `ifndef USER_BASE_ADDR @@ -271,59 +271,59 @@ `define DEBUG_LEVEL 3 `endif +`ifndef MEM_PAGE_SIZE +`define MEM_PAGE_SIZE (4096) +`endif +`ifndef MEM_PAGE_LOG2_SIZE +`define MEM_PAGE_LOG2_SIZE (12) +`endif + // Virtual Memory Configuration /////////////////////////////////////////////////////// `ifdef VM_ENABLE `ifdef XLEN_32 `ifndef VM_ADDR_MODE `define VM_ADDR_MODE SV32 //or BARE `endif + `ifndef PT_LEVEL + `define PT_LEVEL (2) + `endif `ifndef PTE_SIZE `define PTE_SIZE (4) `endif - `ifndef SATP_MODE_IDX - `define SATP_MODE_IDX (31) + `ifndef NUM_PTE_ENTRY + `define NUM_PTE_ENTRY (1024) `endif - `ifndef SATP_PPN_WIDTH - `define SATP_PPN_WIDTH (22) + `ifndef PT_SIZE_LIMIT + `define PT_SIZE_LIMIT (1<<23) `endif `else `ifndef VM_ADDR_MODE - `define VM_ADDR_MODE SV64 //or BARE + `define VM_ADDR_MODE SV39 //or BARE + `endif + `ifndef PT_LEVEL + `define PT_LEVEL (3) `endif `ifndef PTE_SIZE `define PTE_SIZE (8) `endif - `ifndef SATP_MODE_IDX - `define SATP_MODE_IDX (63) + `ifndef NUM_PTE_ENTRY + `define NUM_PTE_ENTRY (512) `endif - `ifndef SATP_PPN_WIDTH - `define SATP_PPN_WIDTH (44) + `ifndef PT_SIZE_LIMIT + `define PT_SIZE_LIMIT (1<<25) `endif `endif - `ifndef NUM_PTE_ENTRY - `define NUM_PTE_ENTRY (1024) - `endif - `ifndef PT_SIZE - `define PT_SIZE (PTE_SIZE * NUM_PTE_ENTRY) + `define PT_SIZE MEM_PAGE_SIZE `endif - `ifndef PT_TOTAL_SIZE - `define PT_TOTAL_SIZE (PT_SIZE*(1+NUM_PTE_ENTRY)) - `endif - - `ifndef TLB_SIZE `define TLB_SIZE (32) `endif `endif -`ifndef MEM_PAGE_SIZE -`define MEM_PAGE_SIZE (4096) -`endif - // Pipeline Configuration ///////////////////////////////////////////////////// // Issue width diff --git a/runtime/common/common.h b/runtime/common/common.h index f7125064e..37fec4846 100644 --- a/runtime/common/common.h +++ b/runtime/common/common.h @@ -24,7 +24,7 @@ #define CACHE_BLOCK_SIZE 64 -#define RAM_PAGE_SIZE 4096 +#define RAM_PAGE_SIZE 4096 // Please use MEM_PAGE_SIZE in VX_config.h #define ALLOC_BASE_ADDR USER_BASE_ADDR diff --git a/runtime/common/malloc.h b/runtime/common/malloc.h index 480c198a6..ca386031a 100644 --- a/runtime/common/malloc.h +++ b/runtime/common/malloc.h @@ -39,6 +39,15 @@ public: page_t* currPage = pages_; while (currPage) { auto nextPage = currPage->next; + #ifdef VM_ENABLE + block_t* currblock = currPage->findfirstUsedBlock(); + block_t* nextblock; + while (currblock) { + nextblock= currblock->nextUsed; + currPage->release(currblock); + currblock = nextblock; + } + #endif delete currPage; currPage = nextPage; } @@ -70,7 +79,7 @@ public: size = alignSize(size, pageAlign_); // Check if the reservation is within memory capacity bounds - if (addr + size > capacity_) { + if (addr + size > baseAddress_ + capacity_) { printf("error: address range out of bounds\n"); return -1; } @@ -118,12 +127,12 @@ public: auto pageSize = alignSize(size, pageAlign_); uint64_t pageAddr; if (!this->findNextAddress(pageSize, &pageAddr)) { - printf("error: out of memory\n"); + printf("error: out of memory (Can't find next address)\n"); return -1; } currPage = this->createPage(pageAddr, pageSize); if (nullptr == currPage) { - printf("error: out of memory\n"); + printf("error: out of memory (Can't create a page)\n"); return -1; } freeBlock = currPage->findFreeBlock(size); @@ -335,6 +344,11 @@ private: } return nullptr; } +#ifdef VM_ENABLE + block_t* findfirstUsedBlock() { + return usedList_; + } +#endif private: @@ -480,7 +494,7 @@ private: bool findNextAddress(uint64_t size, uint64_t* addr) { if (pages_ == nullptr) { - *addr = baseAddress_; + *addr = baseAddress_; return true; } @@ -498,10 +512,10 @@ private: endOfLastPage = current->addr + current->size; current = current->next; } - + // If no suitable gap is found, place the new page at the end of the last page // Check if the allocator has enough capacity - if ((endOfLastPage + size) <= capacity_) { + if ((endOfLastPage + size) <= (baseAddress_ + capacity_)) { *addr = endOfLastPage; return true; } diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 1a5da088a..ae9fe5bb5 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -27,10 +27,8 @@ #include #include -#ifdef VM_ENABLE #include -// #include -//#include +#ifdef VM_ENABLE #include #include @@ -44,42 +42,10 @@ #include #include #include -#include #endif using namespace vortex; -#ifdef VM_ENABLE -#ifndef NDEBUG -#define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0) -#else -#define DBGPRINT(format, ...) ((void)0) -#endif - -#define CHECK_ERR(_expr, _cleanup) \ - do { \ - auto err = _expr; \ - if (err == 0) \ - break; \ - printf("[VXDRV] Error: '%s' returned %d!\n", #_expr, (int)err); \ - _cleanup \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// -// -#include -#include - -uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) -{ - return (addr >> s_idx) & ((1 << (e_idx - s_idx + 1)) - 1); -} -bool bit(uint64_t addr, uint8_t idx) -{ - return (addr) & (1 << idx); -} -#endif - class vx_device { public: vx_device() @@ -91,14 +57,16 @@ public: // attach memory module processor_.attach_ram(&ram_); #ifdef VM_ENABLE - //Set - set_processor_satp(VM_ADDR_MODE); + CHECK_ERR(init_VM(), ); #endif - } + } ~vx_device() { #ifdef VM_ENABLE - this->mem_free(PAGE_TABLE_BASE_ADDR); // Right position? + global_mem_.release(PAGE_TABLE_BASE_ADDR); + // for (auto i = addr_mapping.begin(); i != addr_mapping.end(); i++) + // page_table_mem_->release(i->second << MEM_PAGE_SIZE); + delete page_table_mem_; #endif if (future_.valid()) { future_.wait(); @@ -154,9 +122,10 @@ public: bool need_trans(uint64_t dev_pAddr) { // Check if the this is the BARE mode - bool isBAREMode = (get_mode() == VA_MODE::BARE); + bool isBAREMode = (get_mode() == BARE); // Check if the address is reserved for system usage - bool isReserved = (dev_pAddr >= PAGE_TABLE_BASE_ADDR); + // bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr && dev_pAddr < PAGE_TABLE_BASE_ADDR + PT_SIZE_LIMIT); + bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr); // Check if the address is reserved for IO usage bool isIO = (dev_pAddr < USER_BASE_ADDR); // Check if the address falls within the startup address range @@ -172,14 +141,12 @@ public: uint64_t phy_to_virt_map(uint64_t size, uint64_t *dev_pAddr, uint32_t flags) { // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - DBGPRINT(" [RT:PTV_MAP] size = 0x%lx, dev_pAddr= 0x%lx, flags = 0x%x\n", size, *dev_pAddr, flags); - DBGPRINT(" [RT:PTV_MAP] bit mode: %d\n", XLEN); - - // if (*dev_pAddr == STARTUP_ADDR || *dev_pAddr == 0x7FFFF000) { + DBGPRINT(" [RT:PTV_MAP] size = 0x%lx, dev_pAddr= 0x%lx, flags = 0x%x\n", size, *dev_pAddr, flags); + DBGPRINT(" [RT:PTV_MAP] bit mode: %d\n", XLEN); if (!need_trans(*dev_pAddr)) { - DBGPRINT(" [RT:PTV_MAP] Translation is not needed.\n"); + DBGPRINT(" [RT:PTV_MAP] Translation is not needed.\n"); return 0; } @@ -189,42 +156,30 @@ public: // dev_pAddr can be of size greater than a page, but we have to map and update // page tables on a page table granularity. So divide the allocation into pages. - bool is_start = false; - for (ppn = (*dev_pAddr) >> 12; ppn < ((*dev_pAddr) >> 12) + (size / MEM_PAGE_SIZE) + 1; ppn++) + // FUTURE Work: Super Page + for (ppn = (*dev_pAddr >> MEM_PAGE_LOG2_SIZE); ppn < ((*dev_pAddr) >> MEM_PAGE_LOG2_SIZE) + (size >> MEM_PAGE_LOG2_SIZE) ; ppn++) { - vpn = map_p2v(ppn << 12) >> 12; - if (is_start == false) - { - DBGPRINT(" [RT:PTV_MAP] Search vpn in page table:0x%lx\n", vpn); - is_start = true; - } - else - { - DBGPRINT(" [RT:PTV_MAP] Next vpn: 0x%lx\n", vpn); - } - + vpn = map_p2v(ppn << MEM_PAGE_LOG2_SIZE) >> MEM_PAGE_LOG2_SIZE; + DBGPRINT(" [RT:PTV_MAP] Search vpn in page table:0x%lx\n", vpn); // Currently a 1-1 mapping is used, this can be changed here to support different // mapping schemes - // If ppn to vpn mapping doesnt exist. if (addr_mapping.find(vpn) == addr_mapping.end()) { // Create mapping. - update_page_table(ppn, vpn, flags); + DBGPRINT(" [RT:PTV_MAP] Not found. Allocate new page table or update a PTE.\n"); + CHECK_ERR(update_page_table(ppn, vpn, flags),); addr_mapping[vpn] = ppn; } } - DBGPRINT(" [RT:PTV_MAP] Mapped virtual addr: 0x%lx to physical addr: %lx\n", init_vAddr, init_pAddr); - + DBGPRINT(" [RT:PTV_MAP] Mapped virtual addr: 0x%lx to physical addr: 0x%lx\n", init_vAddr, init_pAddr); // Sanity check uint64_t pAddr = page_table_walk(init_vAddr); - if (pAddr != init_pAddr) - { - assert(pAddr == init_pAddr && "ERROR: translated virtual Addresses are not the same with physical Address"); - } + DBGPRINT(" [RT:PTV_MAP] physical addr from PTW: 0x%lx\n", pAddr); + assert(pAddr == init_pAddr && "ERROR: translated virtual Addresses are not the same with physical Address\n"); *dev_pAddr = init_vAddr; // commit vpn to be returned to host - DBGPRINT(" [RT:PTV_MAP] Translated device virtual addr: 0x%lx\n", *dev_pAddr); + DBGPRINT(" [RT:PTV_MAP] Translated device virtual addr: 0x%lx\n", *dev_pAddr); return 0; } @@ -232,47 +187,44 @@ public: int mem_alloc(uint64_t size, int flags, uint64_t *dev_addr) { + uint64_t asize = aligned_size(size, MEM_PAGE_SIZE); + uint64_t addr = 0; - uint64_t addr; - DBGPRINT(" [RT:mem_alloc] mem_alloc size: 0x%lx\n", size); - CHECK_ERR(global_mem_.allocate(size, &addr), { + DBGPRINT("[RT:mem_alloc] size: 0x%lx, asize, 0x%lx,flag : 0x%d\n", size, asize, flags); + CHECK_ERR(global_mem_.allocate(asize, &addr), { return err; }); - CHECK_ERR(this->mem_access(addr, size, flags), { + CHECK_ERR(this->mem_access(addr, asize, flags), { global_mem_.release(addr); return err; }); *dev_addr = addr; #ifdef VM_ENABLE // VM address translation - phy_to_virt_map(size, dev_addr, flags); + phy_to_virt_map(asize, dev_addr, flags); #endif return 0; } int mem_reserve(uint64_t dev_addr, uint64_t size, int flags) { - CHECK_ERR(global_mem_.reserve(dev_addr, size), { + uint64_t asize = aligned_size(size, MEM_PAGE_SIZE); + CHECK_ERR(global_mem_.reserve(dev_addr, asize), { return err; }); - DBGPRINT(" [RT:mem_reserve] mem_reserve: addr: 0x%lx, size: 0x%lx\n", dev_addr, size); - CHECK_ERR(this->mem_access(dev_addr, size, flags), { + DBGPRINT("[RT:mem_reserve] addr: 0x%lx, asize:0x%lx, size: 0x%lx\n", dev_addr, asize, size); + CHECK_ERR(this->mem_access(dev_addr, asize, flags), { global_mem_.release(dev_addr); return err; }); -#ifdef VM_ENABLE - uint64_t paddr = dev_addr; - phy_to_virt_map(size, &paddr, flags); -#endif return 0; } int mem_free(uint64_t dev_addr) { #ifdef VM_ENABLE - uint64_t pAddr = page_table_walk(dev_addr); - // VM address translation - return global_mem_.release(pAddr); + uint64_t paddr= page_table_walk(dev_addr); + return global_mem_.release(paddr); #else return global_mem_.release(dev_addr); #endif @@ -313,8 +265,8 @@ public: ram_.write((const uint8_t *)src, dest_addr, size); ram_.enable_acl(true); - - /*DBGPRINT("upload %ld bytes to 0x%lx\n", size, dest_addr); + /* + DBGPRINT("upload %ld bytes to 0x%lx\n", size, dest_addr); for (uint64_t i = 0; i < size && i < 1024; i += 4) { DBGPRINT(" 0x%lx <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + i)); }*/ @@ -418,200 +370,195 @@ public: *value = mpm_cache_.at(core_id).at(offset); return 0; } - #ifdef VM_ENABLE /* VM Management */ - void set_processor_satp(VA_MODE mode) - { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - uint64_t satp = 0; - if (mode == VA_MODE::BARE) - { - DBGPRINT(" [RT:set_satp] VA_MODE = BARE MODE"); - } - else - { - satp = (alloc_2nd_level_page_table() / MEM_PAGE_SIZE) | (1 << SATP_MODE_IDX); - DBGPRINT(" [RT:set_satp] VA_MODE = SV mode (satp = 0x%lx)\n", satp); - } - processor_.set_satp(satp); - } - - uint64_t get_ptbr() - { - // return processor_.get_satp(); - return processor_.get_satp() & ((1 << SATP_PPN_WIDTH) - 1); - } - uint64_t get_pte_address(uint64_t base_page, uint64_t vpn) - { - return (base_page * MEM_PAGE_SIZE) + (vpn * PTE_SIZE); - } - - VA_MODE get_mode() - { -#ifdef XLEN_32 - return processor_.get_satp() & (1 << SATP_MODE_IDX) ? VA_MODE::SV32 : VA_MODE::BARE; -#else // 64 bit - return processor_.get_satp() & (1 << SATP_MODE_IDX) ? VA_MODE::SV64 : VA_MODE::BARE; -#endif - } - - void update_page_table(uint64_t ppn, uint64_t vpn, uint32_t flag) - { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - DBGPRINT(" [RT:Update PT] Mapping vpn 0x%05lx to ppn 0x%05lx(flags = %u)\n", vpn, ppn, flag); - assert((((ppn >> 20) == 0) && ((vpn >> 20) == 0)) && "Upper 12 bits are not zero!"); - // Updating page table with the following mapping of (vAddr) to (pAddr). - // uint32_t page_bit_shift = log2ceil(PTE_SIZE*NUM_PTE_ENTRY); - uint64_t ppn_1 = 0, pte_addr = 0, pte_bytes = 0; - uint64_t vpn_1 = bits(vpn, 10, 19); - uint64_t vpn_0 = bits(vpn, 0, 9); - - // Read first level PTE. - DBGPRINT(" [RT:Update PT]Start second-level page table\n"); - pte_addr = get_pte_address(get_ptbr(), vpn_1); - pte_bytes = read_pte(pte_addr); - DBGPRINT(" [RT:Update PT] PTE addr 0x%lx, PTE bytes 0x%lx\n", pte_addr, pte_bytes); - ppn_1 = (pte_bytes >> 10); - - if (bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) - { - // If valid bit set, proceed to next level using new ppn form PTE. - DBGPRINT(" [RT:Update PT] PTE valid (ppn 0x%lx), continuing the walk...\n", ppn_1); - } - else - { - // If valid bit not set, allocate a second level page table - // in device memory and store ppn in PTE. Set rwx = 000 in PTE - // to indicate this is a pointer to the next level of the page table. - DBGPRINT(" [RT:Update PT] PTE Invalid (ppn 0x%lx), continuing the walk...\n", ppn_1); - ppn_1 = (alloc_1st_level_page_table(vpn_1) >> 12); - pte_bytes = ((ppn_1 << 10) | 0b0000000001); - assert((pte_addr >> 32) == 0 && "Upper 32 bits are not zero!"); - write_pte(pte_addr, pte_bytes); - // if (pte_bytes != read_pte(pte_addr)) - // DBGPRINT("Read/write values are different!\n"); - } - - DBGPRINT(" [RT:Update PT] Move to first-level page table\n"); - // Read second level PTE. - pte_addr = get_pte_address(ppn_1, vpn_0); - pte_bytes = read_pte(pte_addr); - - if (bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) - { - DBGPRINT(" [RT:Update PT] ERROR, shouldn't be here\n"); - exit(1); - // If valid bit is set, then the page is already allocated. - // Should not reach this point, a sanity check. - } - else - { - // If valid bit not set, write ppn of pAddr in PTE. Set rwx = 111 in PTE - // to indicate this is a leaf PTE and has the stated permissions. - pte_bytes = ((ppn << 10) | 0b0000001111); - write_pte(pte_addr, pte_bytes); - if (pte_bytes != read_pte(pte_addr)) - DBGPRINT(" [RT:Update PT] PTE write value and read value are not matched!\n"); - } - } - - uint64_t page_table_walk(uint64_t vAddr_bits) - { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - DBGPRINT(" [RT:PTW] start vAddr: 0x%lx\n", vAddr_bits); - if (!need_trans(vAddr_bits)) - { - DBGPRINT(" [RT:PTW] Translation is not needed.\n"); - return vAddr_bits; - } - uint64_t LEVELS = 2; - vAddr_SV32_t vAddr(vAddr_bits); - uint64_t pte_addr, pte_bytes; - uint64_t pt_ba = get_ptbr() << 12; - - // Get base page table. - - for (int i = LEVELS - 1; i >= 0; i--) - { - // Read PTE. - pte_addr = pt_ba + vAddr.vpn[i] * PTE_SIZE; - pte_bytes = read_pte(pte_addr); - PTE_SV32_t pte(pte_bytes); - DBGPRINT(" [RT:PTW] Level[%u] pte_bytes = 0x%lx, pte flags = %u)\n", i, pte.ppn, pte.flags); - - // Check if it has invalid flag bits. - if ((pte.v == 0) | ((pte.r == 0) & (pte.w == 1))) - { - std::string msg = " [RT:PTW] Page Fault : Attempted to access invalid entry. Entry: 0x"; - throw Page_Fault_Exception(msg); - } - - if ((pte.r == 0) & (pte.w == 0) & (pte.x == 0)) - { - // Not a leaf node as rwx == 000 - if (i == 0) - { - throw Page_Fault_Exception(" [RT:PTW] Page Fault : No leaf node found."); - } - else - { - // Continue on to next level. - pt_ba = pte.ppn << 12; - DBGPRINT(" [RT:PTW] next pt_ba: %p\n", (void *)pt_ba); - } - } - else - { - // Leaf node found, finished walking. - pt_ba = pte.ppn << 12; - DBGPRINT(" [RT:PTW] Found PT_Base_Address [%d] = %lx\n", i, pt_ba); - break; - } - } - - // pte_bytes is final leaf - PTE_SV32_t pte(pte_bytes); - // Check RWX permissions according to access type. - if (pte.r == 0) - { - throw Page_Fault_Exception(" [RT:PTW] Page Fault : TYPE LOAD, Incorrect permissions."); - } - - uint64_t paddr = pt_ba + vAddr.pgoff; - return paddr; - } - - uint64_t alloc_2nd_level_page_table() - { - uint64_t addr = PAGE_TABLE_BASE_ADDR; - uint64_t size = PT_TOTAL_SIZE; - CHECK_ERR(this->mem_reserve(addr, size, VX_MEM_READ_WRITE), { - return err; - }); - init_page_table(addr); - return addr; - } - uint64_t alloc_1st_level_page_table(uint64_t vpn_1) - { - uint64_t addr = PAGE_TABLE_BASE_ADDR + PT_SIZE * (1 + vpn_1); - init_page_table(addr); - return addr; - } // Initialize to zero the target page table area. 32bit 4K, 64bit 8K - void init_page_table(uint64_t addr) + uint16_t init_page_table(uint64_t addr, uint64_t size) { - uint64_t asize = aligned_size(PT_SIZE, CACHE_BLOCK_SIZE); + uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); DBGPRINT(" [RT:init_page_table] (addr=0x%lx, size=0x%lx)\n", addr, asize); uint8_t *src = new uint8_t[asize]; - for (uint64_t i = 0; i < PT_SIZE; ++i) + if (src == NULL) + return 1; + + for (uint64_t i = 0; i < asize; ++i) { src[i] = 0; } ram_.enable_acl(false); ram_.write((const uint8_t *)src, addr, asize); ram_.enable_acl(true); + return 0; + } + + uint8_t alloc_page_table (uint64_t * pt_addr) + { + CHECK_ERR(page_table_mem_->allocate(PT_SIZE, pt_addr), { return err; }); + CHECK_ERR(init_page_table(*pt_addr, PT_SIZE), { return err; }); + DBGPRINT(" [RT:alloc_page_table] addr= 0x%lx\n", *pt_addr); + return 0; + } + + int16_t init_VM() + { + uint64_t pt_addr = 0; + // Reserve space for PT + DBGPRINT("[RT:init_VM] Initialize VM\n"); + CHECK_ERR(mem_reserve(PAGE_TABLE_BASE_ADDR, PT_SIZE_LIMIT, VX_MEM_READ_WRITE), { + return err; + }); + page_table_mem_ = new MemoryAllocator (PAGE_TABLE_BASE_ADDR, PT_SIZE_LIMIT, MEM_PAGE_SIZE, CACHE_BLOCK_SIZE); + if (page_table_mem_ == NULL) + { + CHECK_ERR(this->mem_free(PAGE_TABLE_BASE_ADDR),); + return 1; + } + + if (VM_ADDR_MODE == BARE) + DBGPRINT("[RT:init_VM] VA_MODE = BARE MODE(addr= 0x0)"); + else + CHECK_ERR(alloc_page_table(&pt_addr),{return err;}); + + CHECK_ERR(processor_.set_satp_by_addr(pt_addr),{return err;}); + return 0; + } + + // Return value in in ptbr + uint64_t get_base_ppn() + { + return processor_.get_base_ppn(); + } + uint64_t get_pte_address(uint64_t base_ppn, uint64_t vpn) + { + return (base_ppn * PT_SIZE) + (vpn * PTE_SIZE); + } + + uint8_t get_mode() + { + return processor_.get_satp_mode(); + } + + int16_t update_page_table(uint64_t ppn, uint64_t vpn, uint32_t flag) + { + DBGPRINT(" [RT:Update PT] Mapping vpn 0x%05lx to ppn 0x%05lx(flags = %u)\n", vpn, ppn, flag); + // sanity check +#if VM_ADDR_MODE == SV39 + assert((((ppn >> 44) == 0) && ((vpn >> 27) == 0)) && "Upper bits are not zero!"); + uint8_t level = 3; +#else // Default is SV32, BARE will not reach this point. + assert((((ppn >> 20) == 0) && ((vpn >> 20) == 0)) && "Upper 12 bits are not zero!"); + uint8_t level = 2; +#endif + int i = level - 1; + vAddr_t vaddr(vpn << MEM_PAGE_LOG2_SIZE); + uint64_t pte_addr = 0, pte_bytes = 0; + uint64_t pt_addr = 0; + uint64_t cur_base_ppn = get_base_ppn(); + + while (i >= 0) + { + DBGPRINT(" [RT:Update PT]Start %u-level page table\n", i); + pte_addr = get_pte_address(cur_base_ppn, vaddr.vpn[i]); + pte_bytes = read_pte(pte_addr); + PTE_t pte_chk(pte_bytes); + DBGPRINT(" [RT:Update PT] PTE addr 0x%lx, PTE bytes 0x%lx\n", pte_addr, pte_bytes); + if (pte_chk.v == 1 && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) + { + DBGPRINT(" [RT:Update PT] PTE valid (ppn 0x%lx), continuing the walk...\n", pte_chk.ppn); + cur_base_ppn = pte_chk.ppn; + } + else + { + // If valid bit not set, allocate a next level page table + DBGPRINT(" [RT:Update PT] PTE Invalid (ppn 0x%lx) ...\n", pte_chk.ppn); + if (i == 0) + { + // Reach to leaf + DBGPRINT(" [RT:Update PT] Reached to level 0. This should be a leaf node(flag = %x) \n",flag); + uint32_t pte_flag = (flag << 1) | 0x3; + PTE_t new_pte(ppn <> mpm_cache_; #ifdef VM_ENABLE std::unordered_map addr_mapping; + MemoryAllocator* page_table_mem_; #endif }; diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index eebd2cde3..f3c1025a2 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -21,6 +21,7 @@ #include using namespace vortex; + #ifdef VM_ENABLE #ifndef NDEBUG #define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0) @@ -29,16 +30,6 @@ using namespace vortex; #endif #endif -uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) -{ - return (addr >> s_idx) & ((1 << (e_idx - s_idx + 1)) - 1); -} - -bool bit(uint64_t addr, uint8_t idx) -{ - return (addr) & (1 << idx); -} - RamMemDevice::RamMemDevice(const char *filename, uint32_t wordSize) : wordSize_(wordSize) { @@ -124,6 +115,7 @@ void MemoryUnit::ADecoder::map(uint64_t start, uint64_t end, MemDevice &md) { void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) { mem_accessor_t ma; if (!this->lookup(addr, size, &ma)) { + assert(0); std::cout << "lookup of 0x" << std::hex << addr << " failed.\n"; throw BadAddress(); } @@ -133,6 +125,7 @@ void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) { void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) { mem_accessor_t ma; if (!this->lookup(addr, size, &ma)) { + assert(0); std::cout << "lookup of 0x" << std::hex << addr << " failed.\n"; throw BadAddress(); } @@ -208,7 +201,7 @@ std::pair MemoryUnit::tlbLookup(uint64_t vAddr, ACCESS_TYPE type } //Check access permissions. - if ( (type == ACCESS_TYPE::FENCE) & ((e.r == 0) | (e.x == 0)) ) + if ( (type == ACCESS_TYPE::FETCH) & ((e.r == 0) | (e.x == 0)) ) { throw Page_Fault_Exception("Page Fault : Incorrect permissions."); } @@ -601,12 +594,33 @@ void RAM::loadHexImage(const char* filename) { #ifdef VM_ENABLE +uint64_t MemoryUnit::get_base_ppn() +{ + return satp_->get_base_ppn(); +} + +uint64_t MemoryUnit::get_satp() +{ + return satp_->get_satp(); +} + +uint8_t MemoryUnit::get_mode() +{ + return satp_->get_mode(); +} +void MemoryUnit::set_satp(uint64_t satp) +{ + // uint16_t asid = 0; // set asid for different process + satp_ = new SATP_t (satp ); +} + bool MemoryUnit::need_trans(uint64_t dev_pAddr) { // Check if the this is the BARE mode - bool isBAREMode = (this->mode == VA_MODE::BARE); + bool isBAREMode = (get_mode() == BARE); // Check if the address is reserved for system usage - bool isReserved = (dev_pAddr >= PAGE_TABLE_BASE_ADDR); + // bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr && dev_pAddr < PAGE_TABLE_BASE_ADDR + PT_SIZE_LIMIT); + bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr); // Check if the address is reserved for IO usage bool isIO= (dev_pAddr < USER_BASE_ADDR); // Check if the address falls within the startup address range @@ -634,7 +648,6 @@ uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type) if (tlb_access.first) { - // printf("Found pfn %lx in TLB\n",tlb_access.second); pfn = tlb_access.second; TLB_HIT++; } @@ -649,91 +662,86 @@ uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type) } //Construct final address using pfn and offset. - DBGPRINT(" [MMU: V2P] translated vAddr: 0x%lx to pAddr 0x%lx",vAddr,((pfn << size_bits) + (vAddr & ((1 << size_bits) - 1)))); + DBGPRINT(" [MMU: V2P] translated vAddr: 0x%lx to pAddr 0x%lx\n",vAddr,((pfn << size_bits) + (vAddr & ((1 << size_bits) - 1)))); return (pfn << size_bits) + (vAddr & ((1 << size_bits) - 1)); } -std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits) -{ - DBGPRINT(" [MMU:PTW] Start: vaddr = 0x%lx, type = %u, size_bits %lu\n", vAddr_bits, type, *size_bits); - uint64_t LEVELS = 2; - vAddr_SV32_t vAddr(vAddr_bits); - uint64_t pte_bytes = 0; +uint64_t MemoryUnit::get_pte_address(uint64_t base_ppn, uint64_t vpn) +{ + return (base_ppn * PT_SIZE) + (vpn * PTE_SIZE); +} - uint64_t pte_addr =0; - //Get base page table. - uint64_t pt_ba = this->ptbr << 12; - int i = LEVELS - 1; +std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t *size_bits) +{ + DBGPRINT(" [MMU:PTW] Start: vaddr = 0x%lx, type = %u.\n", vAddr_bits, type); + uint8_t level = PT_LEVEL; + int i = level-1; + vAddr_t vaddr(vAddr_bits); + uint32_t flags =0; + uint64_t pte_addr = 0, pte_bytes = 0; + uint64_t cur_base_ppn = get_base_ppn(); + // Need to fix for super page + *size_bits = 12; - while(true) + while (true) + { + // Read PTE. + pte_addr = get_pte_address(cur_base_ppn, vaddr.vpn[i]); + decoder_.read(&pte_bytes, pte_addr, PTE_SIZE); + PTE_t pte(pte_bytes); + DBGPRINT(" [MMU:PTW] Level[%u] pte_addr=0x%lx, pte_bytes =0x%lx, pte.ppn= 0x%lx, pte.flags = %u)\n", i, pte_addr, pte_bytes, pte.ppn, pte.flags); + + assert(((pte.pte_bytes & 0xFFFFFFFF) != 0xbaadf00d) && "ERROR: uninitialzed PTE\n" ); + + // Check if it has invalid flag bits. + if ((pte.v == 0) | ((pte.r == 0) & (pte.w == 1))) { + assert(0); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : Attempted to access invalid entry."); + } - //Read PTE. - pte_addr = pt_ba+vAddr.vpn[i] * PTE_SIZE; - decoder_.read(&pte_bytes, pte_addr, PTE_SIZE); - PTE_SV32_t pte(pte_bytes); - DBGPRINT(" [MMU:PTW] Level[%u] pte_bytes = 0x%lx, pte flags = %u)\n", i, pte.ppn , pte.flags); - - //Check if it has invalid flag bits. - if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) + if ((pte.r == 0) & (pte.w == 0) & (pte.x == 0)) + { + // Not a leaf node as rwx == 000 + i--; + if (i < 0) { - throw Page_Fault_Exception(" [MMU:PTW] Page Fault : Attempted to access invalid entry."); - } - - if ( (pte.r == 0) & (pte.w == 0) & (pte.x == 0)) - { - //Not a leaf node as rwx == 000 - i--; - if (i < 0) - { - throw Page_Fault_Exception(" [MMU:PTW] Page Fault : No leaf node found."); - } - else - { - //Continue on to next level. - pt_ba = (pte_bytes >> 10 ) << 12; - } + assert(0); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : No leaf node found."); } else { - //Leaf node found, finished walking. - pt_ba = (pte_bytes >> 10 ) << 12; - break; + // Continue on to next level. + cur_base_ppn= pte.ppn; + DBGPRINT(" [MMU:PTW] next base_ppn: 0x%lx\n", cur_base_ppn); + continue; } } - - PTE_SV32_t pte(pte_bytes); - - //Check RWX permissions according to access type. - if ( (type == ACCESS_TYPE::FENCE) & ((pte.r == 0) | (pte.x == 0)) ) + else { - throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE FENCE, Incorrect permissions."); + // Leaf node found, finished walking. + // Check RWX permissions according to access type. + if ((type == ACCESS_TYPE::FETCH) & ((pte.r == 0) | (pte.x == 0))) + { + assert(0); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE FETCH, Incorrect permissions."); + } + else if ((type == ACCESS_TYPE::LOAD) & (pte.r == 0)) + { + assert(0); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE LOAD, Incorrect permissions."); + } + else if ((type == ACCESS_TYPE::STORE) & (pte.w == 0)) + { + assert(0); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE STORE, Incorrect permissions."); + } + cur_base_ppn = pte.ppn; + flags = pte.flags; + break; } - else if ( (type == ACCESS_TYPE::LOAD) & (pte.r == 0) ) - { - throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE LOAD, Incorrect permissions."); - } - else if ( (type == ACCESS_TYPE::STORE) & (pte.w == 0) ) - { - throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE STORE, Incorrect permissions."); - } - *size_bits = 12; - uint64_t pfn = pt_ba >> *size_bits; - return std::make_pair(pfn, pte_bytes & 0xff); + } + return std::make_pair(cur_base_ppn, flags); } -uint64_t MemoryUnit::get_satp() -{ - return satp; -} -void MemoryUnit::set_satp(uint64_t satp) -{ - this->satp = satp; - this->ptbr = satp & ( (1<< SATP_PPN_WIDTH) - 1); -#ifdef XLEN_32 - this->mode = satp & (1<< SATP_MODE_IDX) ? VA_MODE::SV32 : VA_MODE::BARE; -#else // 64 bit - this->mode = satp & (1<< SATP_MODE_IDX) ? VA_MODE::SV64 : VA_MODE::BARE; -#endif -} #endif \ No newline at end of file diff --git a/sim/common/mem.h b/sim/common/mem.h index 4b7744c2b..9f212e184 100644 --- a/sim/common/mem.h +++ b/sim/common/mem.h @@ -32,17 +32,85 @@ namespace vortex { #ifdef VM_ENABLE -enum VA_MODE { - BARE, - SV32, - SV64 -}; + +// VA MODE +#define BARE 0x0 +#define SV32 0x1 +#define SV39 0x8 enum ACCESS_TYPE { LOAD, STORE, - FENCE + FETCH }; +class SATP_t +{ + private: + uint64_t address; + uint16_t asid; + uint8_t mode; + uint64_t ppn; + uint64_t satp; + + uint64_t bits(uint64_t input, uint8_t s_idx, uint8_t e_idx) + { + return (input>> s_idx) & (((uint64_t)1 << (e_idx - s_idx + 1)) - 1); + } + bool bit(uint64_t input , uint8_t idx) + { + return (input ) & ((uint64_t)1 << idx); + } + + public: + SATP_t(uint64_t satp) : satp(satp) + { +#ifdef XLEN_32 + mode = bit(satp, 31); + asid = bits(satp, 22, 30); + ppn = bits(satp, 0,21); +#else + mode = bits(satp, 60,63); + asid = bits(satp, 44, 59); + ppn = bits(satp, 0,43); +#endif + address = ppn << MEM_PAGE_LOG2_SIZE; + } + + SATP_t(uint64_t address, uint16_t asid) : address(address), asid(asid) + { +#ifdef XLEN_32 + assert((address >> 32) == 0 && "Upper 32 bits are not zero!"); +#endif + mode= VM_ADDR_MODE; + // asid = 0 ; + ppn = address >> MEM_PAGE_LOG2_SIZE; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshift-count-overflow" +#ifdef XLEN_32 + satp = (((uint64_t)mode << 31) | ((uint64_t)asid << 22) | ppn); +#else + satp = (((uint64_t)mode << 60) | ((uint64_t)asid << 44) | ppn); +#endif +#pragma GCC diagnostic pop + } + uint8_t get_mode() + { + return mode; + } + uint16_t get_asid() + { + return asid; + } + uint64_t get_base_ppn() + { + return ppn; + } + uint64_t get_satp() + { + return satp; + } +}; + class Page_Fault_Exception : public std::runtime_error /* or logic_error */ { @@ -119,6 +187,7 @@ public: #ifdef VM_ENABLE MemoryUnit(uint64_t pageSize = MEM_PAGE_SIZE); + ~MemoryUnit(){delete this->satp_;}; #else MemoryUnit(uint64_t pageSize = 0); #endif @@ -139,7 +208,9 @@ public: #ifdef VM_ENABLE void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits); - uint64_t get_satp(); + uint64_t get_satp(); + uint8_t get_mode(); + uint64_t get_base_ppn(); void set_satp(uint64_t satp); #else void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags); @@ -228,6 +299,7 @@ private: bool need_trans(uint64_t dev_pAddr); uint64_t vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type); + uint64_t get_pte_address(uint64_t base_ppn, uint64_t vpn); std::pair page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits); #else uint64_t toPhyAddr(uint64_t vAddr, uint32_t flagMask); @@ -245,13 +317,9 @@ private: amo_reservation_t amo_reservation_; #ifdef VM_ENABLE - - uint64_t satp; - VA_MODE mode; - uint64_t ptbr; - std::unordered_set unique_translations; uint64_t TLB_HIT, TLB_MISS, TLB_EVICT, PTW, PERF_UNIQUE_PTW; + SATP_t *satp_; #endif }; @@ -322,68 +390,146 @@ private: }; #ifdef VM_ENABLE -class PTE_SV32_t +class PTE_t { private: uint64_t address; - uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) + uint64_t bits(uint64_t input, uint8_t s_idx, uint8_t e_idx) { - return (addr >> s_idx) & ((1 << (e_idx - s_idx + 1)) - 1); + return (input>> s_idx) & (((uint64_t)1 << (e_idx - s_idx + 1)) - 1); } - bool bit(uint8_t idx) + bool bit(uint64_t input, uint8_t idx) { - return (address) & (1 << idx); + return (input) & ((uint64_t)1 << idx); } public: +#if VM_ADDR_MODE == SV39 + bool N; + uint8_t PBMT; +#endif uint64_t ppn; uint32_t rsw; uint32_t flags; + uint8_t level; bool d, a, g, u, x, w, r, v; - PTE_SV32_t(uint64_t address) : address(address) - { - assert((address>> 32) == 0 && "Upper 32 bits are not zero!"); - flags = bits(address,0,7); - rsw = bits(address,8,9); - ppn = bits(address,10,31); + uint64_t pte_bytes; - d = bit(7); - a = bit(6); - g = bit(5); - u = bit(4); - x = bit(3); - w = bit(2); - r = bit(1); - v = bit(0); - // printf("ppn = 0x%lx, flags= 0x%x, rsw= 0x%x\n",ppn,flags,rsw); + void set_flags (uint32_t flag) + { + this->flags = flag; + d = bit(flags,7); + a = bit(flags,6); + g = bit(flags,5); + u = bit(flags,4); + x = bit(flags,3); + w = bit(flags,2); + r = bit(flags,1); + v = bit(flags,0); + } + + PTE_t(uint64_t address, uint32_t flags) : address(address) + { +#if VM_ADDR_MODE == SV39 + N = 0; + PBMT = 0; + level = 3; + ppn = address >> MEM_PAGE_LOG2_SIZE; + // Reserve for Super page support + // ppn = new uint32_t [level]; + // ppn[2]=bits(address,28,53); + // ppn[1]=bits(address,19,27); + // ppn[0]=bits(address,10,18); + set_flags(flags); + // pte_bytes = (N << 63) | (PBMT << 61) | (ppn <<10) | flags ; + pte_bytes = (ppn <<10) | flags ; +#else // if VM_ADDR_MODE == SV32 + assert((address>> 32) == 0 && "Upper 32 bits are not zero!"); + level = 2; + ppn = address >> MEM_PAGE_LOG2_SIZE; + // Reserve for Super page support + // ppn = new uint32_t[level]; + // ppn[1]=bits(address,20,31); + // ppn[0]=bits(address,10,19); + set_flags(flags); + pte_bytes = ppn <<10 | flags ; +#endif + } + + PTE_t(uint64_t pte_bytes) : pte_bytes(pte_bytes) + { +#if VM_ADDR_MODE == SV39 + N = bit(pte_bytes,63); + PBMT = bits(pte_bytes,61,62); + level = 3; + ppn=bits(pte_bytes,10,53); + address = ppn << MEM_PAGE_LOG2_SIZE; + // Reserve for Super page support + // ppn = new uint32_t [level]; + // ppn[2]=bits(pte_bytes,28,53); + // ppn[1]=bits(pte_bytes,19,27); + // ppn[0]=bits(pte_bytes,10,18); +#else //#if VM_ADDR_MODE == SV32 + assert((pte_bytes >> 32) == 0 && "Upper 32 bits are not zero!"); + level = 2; + ppn=bits(pte_bytes,10, 31); + address = ppn << MEM_PAGE_LOG2_SIZE; + // Reserve for Super page support + // ppn = new uint32_t[level]; + // ppn[1]=bits(address, 20,31); + // ppn[0]=bits(address, 10,19); +#endif + rsw = bits(pte_bytes,8,9); + set_flags((uint32_t)(bits(pte_bytes,0,7))); + } + ~PTE_t() + { + // Reserve for Super page support + // delete ppn; } }; -class vAddr_SV32_t +class vAddr_t { private: uint64_t address; - uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) + uint64_t bits(uint8_t s_idx, uint8_t e_idx) { - return (addr >> s_idx) & ((1 << (e_idx - s_idx + 1)) - 1); + return (address>> s_idx) & (((uint64_t)1 << (e_idx - s_idx + 1)) - 1); } - bool bit(uint64_t addr, uint8_t idx) + bool bit( uint8_t idx) { - return (addr) & (1 << idx); + return (address) & ((uint64_t)1 << idx); } public: - uint64_t vpn[2]; + uint64_t *vpn; uint64_t pgoff; - vAddr_SV32_t(uint64_t address) : address(address) + uint8_t level; + vAddr_t(uint64_t address) : address(address) { +#if VM_ADDR_MODE == SV39 + level = 3; + vpn = new uint64_t [level]; + vpn[2] = bits(30,38); + vpn[1] = bits(21,29); + vpn[0] = bits(12,20); + pgoff = bits(0,11); +#else //#if VM_ADDR_MODE == SV32 assert((address>> 32) == 0 && "Upper 32 bits are not zero!"); - vpn[0] = bits(address,12,21); - vpn[1] = bits(address,22,31); - pgoff = bits(address,0,11); - // printf("vpn[1] = 0x%lx, vpn[0] = 0x%lx, pgoff = 0x%lx\n",vpn[1],vpn[0],pgoff); + level = 2; + vpn = new uint64_t [level]; + vpn[1] = bits(22,31); + vpn[0] = bits(12,21); + pgoff = bits(0,11); +#endif + } + + ~vAddr_t() + { + delete vpn; } }; #endif diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index a15bdae43..9e96bef2f 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -115,7 +115,7 @@ void Emulator::clear() { void Emulator::attach_ram(RAM* ram) { // bind RAM to memory unit #if (XLEN == 64) - mmu_.attach(*ram, 0, 0xFFFFFFFFFFFFFFFF); + mmu_.attach(*ram, 0, 0x7FFFFFFFFF); //39bit SV39 #else mmu_.attach(*ram, 0, 0xFFFFFFFF); #endif @@ -270,11 +270,11 @@ bool Emulator::barrier(uint32_t bar_id, uint32_t count, uint32_t wid) { #ifdef VM_ENABLE void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) { - // DP(1, "*** icache_read 0x" << std::hex << addr << ", size = 0x " << size); + DP(3, "*** icache_read 0x" << std::hex << addr << ", size = 0x " << size); try { - mmu_.read(data, addr, size, ACCESS_TYPE::LOAD); + mmu_.read(data, addr, size, ACCESS_TYPE::FETCH); } catch (Page_Fault_Exception& page_fault) { @@ -305,8 +305,7 @@ void Emulator::dcache_read(void *data, uint64_t addr, uint32_t size) { } else { try { - // mmu_.read(data, addr, size, 0); - mmu_.read(data, addr, size, ACCESS_TYPE::LOAD); + mmu_.read(data, addr, size, ACCESS_TYPE::LOAD); } catch (Page_Fault_Exception& page_fault) { diff --git a/sim/simx/main.cpp b/sim/simx/main.cpp index 0f61de6f4..be1505610 100644 --- a/sim/simx/main.cpp +++ b/sim/simx/main.cpp @@ -84,7 +84,7 @@ int main(int argc, char **argv) { Arch arch(num_threads, num_warps, num_cores); // create memory module - RAM ram(0, RAM_PAGE_SIZE); + RAM ram(0, MEM_PAGE_SIZE); // create processor Processor processor(arch); diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index 305fb410b..23406be98 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -136,6 +136,9 @@ Processor::Processor(const Arch& arch) Processor::~Processor() { delete impl_; +#ifdef VM_ENABLE + delete satp_; +#endif } void Processor::attach_ram(RAM* mem) { @@ -151,13 +154,19 @@ void Processor::dcr_write(uint32_t addr, uint32_t value) { } #ifdef VM_ENABLE -uint64_t Processor::get_satp() { - // std::cout << "get SATP: 0x" << std::hex << this->satp << std::endl; - return this->satp; -} - -void Processor::set_satp(uint64_t satp) { +int16_t Processor::set_satp_by_addr(uint64_t base_addr) { + uint16_t asid = 0; + satp_ = new SATP_t (base_addr,asid); + if (satp_ == NULL) + return 1; + uint64_t satp = satp_->get_satp(); impl_->set_satp(satp); - this->satp = satp; + return 0; +} +uint8_t Processor::get_satp_mode() { + return satp_->get_mode(); +} +uint64_t Processor::get_base_ppn() { + return satp_->get_base_ppn(); } #endif diff --git a/sim/simx/processor.h b/sim/simx/processor.h index d2b575421..a20cfff0b 100644 --- a/sim/simx/processor.h +++ b/sim/simx/processor.h @@ -22,6 +22,9 @@ namespace vortex { class Arch; class RAM; class ProcessorImpl; +#ifdef VM_ENABLE +class SATP_t; +#endif class Processor { public: @@ -34,14 +37,15 @@ public: void dcr_write(uint32_t addr, uint32_t value); #ifdef VM_ENABLE - uint64_t get_satp(); - void set_satp(uint64_t satp); + uint8_t get_satp_mode(); + uint64_t get_base_ppn(); + int16_t set_satp_by_addr(uint64_t addr); #endif private: ProcessorImpl* impl_; #ifdef VM_ENABLE - uint64_t satp; + SATP_t *satp_; #endif }; diff --git a/tests/regression/diverge/kernel.cpp b/tests/regression/diverge/kernel.cpp index f0380e0e4..70b27fa79 100644 --- a/tests/regression/diverge/kernel.cpp +++ b/tests/regression/diverge/kernel.cpp @@ -62,7 +62,7 @@ void kernel_body(kernel_arg_t* __UNIFORM__ arg) { value *= 5; break; default: - assert(task_id < arg->num_points); + //assert(task_id < arg->num_points); break; } From 3caeeeea132bdd94237a5f5507bbbe088b9fcfd1 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sun, 30 Jun 2024 00:35:26 -0400 Subject: [PATCH 025/488] satp_ is not set, then we skip VAT --- runtime/simx/vortex.cpp | 24 ++++++++++++---------- sim/common/mem.cpp | 44 +++++++++++++++++++++++++++-------------- sim/common/mem.h | 9 +++++++-- sim/simx/emulator.h | 3 --- sim/simx/processor.cpp | 14 +++++++++++-- sim/simx/processor.h | 1 + 6 files changed, 63 insertions(+), 32 deletions(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index ae9fe5bb5..fc686ca76 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -121,21 +121,25 @@ public: } bool need_trans(uint64_t dev_pAddr) { - // Check if the this is the BARE mode - bool isBAREMode = (get_mode() == BARE); + + // Check if the satp is set and BARE mode + if (processor_.is_satp_unset() || get_mode() == BARE) + return 0; + // Check if the address is reserved for system usage // bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr && dev_pAddr < PAGE_TABLE_BASE_ADDR + PT_SIZE_LIMIT); - bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr); + if (PAGE_TABLE_BASE_ADDR <= dev_pAddr) + return 0; + // Check if the address is reserved for IO usage - bool isIO = (dev_pAddr < USER_BASE_ADDR); + if (dev_pAddr < USER_BASE_ADDR) + return 0; // Check if the address falls within the startup address range - bool isStartAddress = (STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)); + if ((STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000))) + return 0; - // Print the boolean results for debugging purposes - // printf("%p, %u, %u\n", (void *)dev_pAddr, isReserved, isStartAddress); - - // Return true if the address needs translation (i.e., it's not reserved and not a start address) - return (!isBAREMode && !isReserved && !isIO && !isStartAddress); + // Now all conditions are not met. Return true because the address needs translation + return 1; } uint64_t phy_to_virt_map(uint64_t size, uint64_t *dev_pAddr, uint32_t flags) diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index f3c1025a2..a5339be6e 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -596,16 +596,26 @@ void RAM::loadHexImage(const char* filename) { uint64_t MemoryUnit::get_base_ppn() { + assert(satp_!= NULL); return satp_->get_base_ppn(); } uint64_t MemoryUnit::get_satp() { - return satp_->get_satp(); + if (is_satp_unset()) + return 0; + else + return satp_->get_satp(); +} + +uint8_t MemoryUnit::is_satp_unset() +{ + return (satp_==NULL); } uint8_t MemoryUnit::get_mode() { + assert(satp_!= NULL); return satp_->get_mode(); } void MemoryUnit::set_satp(uint64_t satp) @@ -616,22 +626,26 @@ void MemoryUnit::set_satp(uint64_t satp) bool MemoryUnit::need_trans(uint64_t dev_pAddr) { - // Check if the this is the BARE mode - bool isBAREMode = (get_mode() == BARE); - // Check if the address is reserved for system usage - // bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr && dev_pAddr < PAGE_TABLE_BASE_ADDR + PT_SIZE_LIMIT); - bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr); - // Check if the address is reserved for IO usage - bool isIO= (dev_pAddr < USER_BASE_ADDR); - // Check if the address falls within the startup address range - bool isStartAddress = (STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)); - - // Print the boolean results for debugging purposes - // printf("%p, %u, %u\n", (void *)dev_pAddr, isReserved, isStartAddress); + // Check if the satp is set and BARE mode + if ( is_satp_unset() || (get_mode() == BARE)) + return 0; - // Return true if the address needs translation (i.e., it's not reserved and not a start address) - return (!isBAREMode && !isReserved && !isIO && !isStartAddress); + // Check if the address is reserved for system usage + // bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr && dev_pAddr < PAGE_TABLE_BASE_ADDR + PT_SIZE_LIMIT); + if (PAGE_TABLE_BASE_ADDR <= dev_pAddr) + return 0; + + // Check if the address is reserved for IO usage + if (dev_pAddr < USER_BASE_ADDR) + return 0; + // Check if the address falls within the startup address range + if ((STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000))) + return 0; + + // Now all conditions are not met. Return true because the address needs translation + return 1; } + uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type) { uint64_t pfn; diff --git a/sim/common/mem.h b/sim/common/mem.h index 9f212e184..7ef13393a 100644 --- a/sim/common/mem.h +++ b/sim/common/mem.h @@ -186,8 +186,12 @@ public: }; #ifdef VM_ENABLE - MemoryUnit(uint64_t pageSize = MEM_PAGE_SIZE); - ~MemoryUnit(){delete this->satp_;}; + MemoryUnit(uint64_t pageSize = MEM_PAGE_SIZE) :satp_(NULL) + {}; + ~MemoryUnit(){ + if ( this->satp_ != NULL) + delete this->satp_; + }; #else MemoryUnit(uint64_t pageSize = 0); #endif @@ -208,6 +212,7 @@ public: #ifdef VM_ENABLE void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits); + uint8_t is_satp_unset(); uint64_t get_satp(); uint8_t get_mode(); uint64_t get_base_ppn(); diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index f9e250768..47744c6d5 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -124,9 +124,6 @@ private: MemoryUnit mmu_; Word csr_mscratch_; wspawn_t wspawn_; -#ifdef VM_ENABLE - Word ptbr_; -#endif }; } diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index 23406be98..7c78218ff 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -132,12 +132,17 @@ ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const { Processor::Processor(const Arch& arch) : impl_(new ProcessorImpl(arch)) -{} +{ +#ifdef VM_ENABLE + satp_ = NULL; +#endif +} Processor::~Processor() { delete impl_; #ifdef VM_ENABLE - delete satp_; + if (satp_ != NULL) + delete satp_; #endif } @@ -163,10 +168,15 @@ int16_t Processor::set_satp_by_addr(uint64_t base_addr) { impl_->set_satp(satp); return 0; } +bool Processor::is_satp_unset() { + return (satp_== NULL); +} uint8_t Processor::get_satp_mode() { + assert (satp_!=NULL); return satp_->get_mode(); } uint64_t Processor::get_base_ppn() { + assert (satp_!=NULL); return satp_->get_base_ppn(); } #endif diff --git a/sim/simx/processor.h b/sim/simx/processor.h index a20cfff0b..8315eedba 100644 --- a/sim/simx/processor.h +++ b/sim/simx/processor.h @@ -37,6 +37,7 @@ public: void dcr_write(uint32_t addr, uint32_t value); #ifdef VM_ENABLE + bool is_satp_unset(); uint8_t get_satp_mode(); uint64_t get_base_ppn(); int16_t set_satp_by_addr(uint64_t addr); From ccbb2243cc87de7c275cf3d061a25ac7dd296271 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sun, 30 Jun 2024 00:54:22 -0400 Subject: [PATCH 026/488] fixed compile error --- sim/common/mem.cpp | 3 ++- sim/common/mem.h | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index a5339be6e..e3c9b5cc4 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -144,7 +144,8 @@ MemoryUnit::MemoryUnit(uint64_t pageSize) , TLB_HIT(0) , TLB_MISS(0) , TLB_EVICT(0) - , PTW(0) {}; + , PTW(0) + , satp_(NULL) {}; #else { if (pageSize != 0) diff --git a/sim/common/mem.h b/sim/common/mem.h index 7ef13393a..617e83d69 100644 --- a/sim/common/mem.h +++ b/sim/common/mem.h @@ -186,8 +186,7 @@ public: }; #ifdef VM_ENABLE - MemoryUnit(uint64_t pageSize = MEM_PAGE_SIZE) :satp_(NULL) - {}; + MemoryUnit(uint64_t pageSize = MEM_PAGE_SIZE); ~MemoryUnit(){ if ( this->satp_ != NULL) delete this->satp_; From c13e02b19f6ad3c8e670f19555a4b7c7b46b7688 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sun, 30 Jun 2024 03:10:36 -0400 Subject: [PATCH 027/488] Change STARTUP_ADDR from 0x40000000 to 0x80000000(32b) and 0x180000000(64b) --- ci/regression.sh.in | 6 +++++- runtime/simx/vortex.cpp | 6 +----- sim/common/mem.cpp | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index a5f1bffdb..ce3f9bb43 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -240,7 +240,11 @@ config() # custom program startup address make -C tests/regression/dogfood clean-kernel - STARTUP_ADDR=0x40000000 make -C tests/regression/dogfood + if [ "$XLEN" == "64" ]; then + STARTUP_ADDR=0x180000000 make -C tests/regression/dogfood + else + STARTUP_ADDR=0x80000000 make -C tests/regression/dogfood + fi ./ci/blackbox.sh --driver=simx --app=dogfood ./ci/blackbox.sh --driver=rtlsim --app=dogfood diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index fc686ca76..08261fcd7 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -144,7 +144,6 @@ public: uint64_t phy_to_virt_map(uint64_t size, uint64_t *dev_pAddr, uint32_t flags) { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); DBGPRINT(" [RT:PTV_MAP] size = 0x%lx, dev_pAddr= 0x%lx, flags = 0x%x\n", size, *dev_pAddr, flags); DBGPRINT(" [RT:PTV_MAP] bit mode: %d\n", XLEN); @@ -178,10 +177,8 @@ public: } DBGPRINT(" [RT:PTV_MAP] Mapped virtual addr: 0x%lx to physical addr: 0x%lx\n", init_vAddr, init_pAddr); // Sanity check - uint64_t pAddr = page_table_walk(init_vAddr); - DBGPRINT(" [RT:PTV_MAP] physical addr from PTW: 0x%lx\n", pAddr); + assert(page_table_walk(init_vAddr) == init_pAddr && "ERROR: translated virtual Addresses are not the same with physical Address\n"); - assert(pAddr == init_pAddr && "ERROR: translated virtual Addresses are not the same with physical Address\n"); *dev_pAddr = init_vAddr; // commit vpn to be returned to host DBGPRINT(" [RT:PTV_MAP] Translated device virtual addr: 0x%lx\n", *dev_pAddr); @@ -255,7 +252,6 @@ public: int upload(uint64_t dest_addr, const void *src, uint64_t size) { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); if (dest_addr + asize > GLOBAL_MEM_SIZE) return -1; diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index e3c9b5cc4..e6e998fce 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -23,11 +23,11 @@ using namespace vortex; #ifdef VM_ENABLE -#ifndef NDEBUG -#define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0) -#else +// #ifndef NDEBUG +// #define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0) +// #else #define DBGPRINT(format, ...) ((void)0) -#endif +// #endif #endif From f0ea1acaa2d32210ea54eede1063ee3203ac06de Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Mon, 8 Jul 2024 17:07:30 -0400 Subject: [PATCH 028/488] vpn allocator added but doesn't pass any tests --- runtime/simx/vortex.cpp | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 08261fcd7..6f31a7ef6 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -114,10 +114,21 @@ public: } #ifdef VM_ENABLE - // virtual to phycial mapping - uint64_t map_p2v(uint64_t pAddr) + // virtual (vpn) to phycial (ppn) mapping + uint64_t map_p2v(uint64_t ppn, uint32_t flags) { - return pAddr + 0xf000000; + DBGPRINT(" [RT:MAP_P2V] ppn: %x\n", ppn); + // std::cout << std::hex << pAddr << std::endl; + // return pAddr + 0xf000000; + if (addr_mapping.find(ppn) != addr_mapping.end()) return addr_mapping[ppn]; + + // If ppn to vpn mapping doesnt exist, create mapping + DBGPRINT(" [RT:MAP_P2V] Not found. Allocate new page table or update a PTE.\n"); + uint64_t vpn; + virtual_mem_->allocate(MEM_PAGE_SIZE, &vpn); + CHECK_ERR(update_page_table(ppn, vpn, flags),); + addr_mapping[ppn] = vpn; + return vpn; } bool need_trans(uint64_t dev_pAddr) { @@ -154,7 +165,7 @@ public: } uint64_t init_pAddr = *dev_pAddr; - uint64_t init_vAddr = map_p2v(init_pAddr); + uint64_t init_vAddr = (map_p2v(init_pAddr >> MEM_PAGE_LOG2_SIZE, flags) << MEM_PAGE_LOG2_SIZE) & ((1 << MEM_PAGE_LOG2_SIZE) - 1); uint64_t ppn = 0, vpn = 0; // dev_pAddr can be of size greater than a page, but we have to map and update @@ -162,18 +173,11 @@ public: // FUTURE Work: Super Page for (ppn = (*dev_pAddr >> MEM_PAGE_LOG2_SIZE); ppn < ((*dev_pAddr) >> MEM_PAGE_LOG2_SIZE) + (size >> MEM_PAGE_LOG2_SIZE) ; ppn++) { - vpn = map_p2v(ppn << MEM_PAGE_LOG2_SIZE) >> MEM_PAGE_LOG2_SIZE; + + vpn = map_p2v(ppn << MEM_PAGE_LOG2_SIZE, flags) >> MEM_PAGE_LOG2_SIZE; DBGPRINT(" [RT:PTV_MAP] Search vpn in page table:0x%lx\n", vpn); // Currently a 1-1 mapping is used, this can be changed here to support different // mapping schemes - // If ppn to vpn mapping doesnt exist. - if (addr_mapping.find(vpn) == addr_mapping.end()) - { - // Create mapping. - DBGPRINT(" [RT:PTV_MAP] Not found. Allocate new page table or update a PTE.\n"); - CHECK_ERR(update_page_table(ppn, vpn, flags),); - addr_mapping[vpn] = ppn; - } } DBGPRINT(" [RT:PTV_MAP] Mapped virtual addr: 0x%lx to physical addr: 0x%lx\n", init_vAddr, init_pAddr); // Sanity check @@ -415,6 +419,13 @@ public: return 1; } + // HW: virtual mem allocator has the same address range as global_mem. next step is to adjust it + virtual_mem_ = new MemoryAllocator(ALLOC_BASE_ADDR >> MEM_PAGE_LOG2_SIZE, (GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR) >> MEM_PAGE_LOG2_SIZE, MEM_PAGE_SIZE, CACHE_BLOCK_SIZE); + if (virtual_mem_ == nullptr) { + // virtual_mem_ does not intefere with physical mem, so no need to free space + return 1; + } + if (VM_ADDR_MODE == BARE) DBGPRINT("[RT:init_VM] VA_MODE = BARE MODE(addr= 0x0)"); else @@ -606,11 +617,12 @@ private: RAM ram_; Processor processor_; MemoryAllocator global_mem_; + MemoryAllocator* virtual_mem_; DeviceConfig dcrs_; std::future future_; std::unordered_map> mpm_cache_; #ifdef VM_ENABLE - std::unordered_map addr_mapping; + std::unordered_map addr_mapping; // HW: key: ppn; value: vpn MemoryAllocator* page_table_mem_; #endif }; From 31837dd7c3a59546eaef7561cba073d2c681a37c Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Mon, 8 Jul 2024 17:10:19 -0400 Subject: [PATCH 029/488] vpn allocator debug complete, now pass demo&vecadd tests --- runtime/simx/vortex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 6f31a7ef6..3f82e647d 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -165,7 +165,7 @@ public: } uint64_t init_pAddr = *dev_pAddr; - uint64_t init_vAddr = (map_p2v(init_pAddr >> MEM_PAGE_LOG2_SIZE, flags) << MEM_PAGE_LOG2_SIZE) & ((1 << MEM_PAGE_LOG2_SIZE) - 1); + uint64_t init_vAddr = (map_p2v(init_pAddr >> MEM_PAGE_LOG2_SIZE, flags) << MEM_PAGE_LOG2_SIZE) | (init_pAddr & ((1 << MEM_PAGE_LOG2_SIZE) - 1)); uint64_t ppn = 0, vpn = 0; // dev_pAddr can be of size greater than a page, but we have to map and update From 314ad3ff8a591cd9213cc6f84ae01a31bf69dcd6 Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Tue, 9 Jul 2024 13:42:57 -0400 Subject: [PATCH 030/488] update destructor of vx_device --- runtime/simx/vortex.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 3f82e647d..6757bffbf 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -66,6 +66,7 @@ public: global_mem_.release(PAGE_TABLE_BASE_ADDR); // for (auto i = addr_mapping.begin(); i != addr_mapping.end(); i++) // page_table_mem_->release(i->second << MEM_PAGE_SIZE); + delete virtual_mem_; delete page_table_mem_; #endif if (future_.valid()) { @@ -114,6 +115,7 @@ public: } #ifdef VM_ENABLE + // virtual (vpn) to phycial (ppn) mapping uint64_t map_p2v(uint64_t ppn, uint32_t flags) { @@ -130,6 +132,7 @@ public: addr_mapping[ppn] = vpn; return vpn; } + bool need_trans(uint64_t dev_pAddr) { @@ -423,6 +426,7 @@ public: virtual_mem_ = new MemoryAllocator(ALLOC_BASE_ADDR >> MEM_PAGE_LOG2_SIZE, (GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR) >> MEM_PAGE_LOG2_SIZE, MEM_PAGE_SIZE, CACHE_BLOCK_SIZE); if (virtual_mem_ == nullptr) { // virtual_mem_ does not intefere with physical mem, so no need to free space + return 1; } From b8757c539ded97619d1913780418746438cf62a1 Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Wed, 10 Jul 2024 22:39:00 -0400 Subject: [PATCH 031/488] add virtual mem allocator addr spacereservation --- runtime/simx/vortex.cpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 6757bffbf..92ae2362d 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -407,6 +407,21 @@ public: return 0; } + // reserve IO space, startup space, and local mem area + int virtual_mem_reserve(uint64_t dev_addr, uint64_t size, int flags) + { + // uint64_t asize = aligned_size(size, MEM_PAGE_SIZE); + CHECK_ERR(virtual_mem_->reserve(dev_addr, size), { + return err; + }); + DBGPRINT("[RT:mem_reserve] addr: 0x%lx, size:0x%lx, size: 0x%lx\n", dev_addr, size, size); + // CHECK_ERR(this->mem_access(dev_addr, asize, flags), { + // global_mem_.release(dev_addr); + // return err; + // }); + return 0; + } + int16_t init_VM() { uint64_t pt_addr = 0; @@ -424,6 +439,16 @@ public: // HW: virtual mem allocator has the same address range as global_mem. next step is to adjust it virtual_mem_ = new MemoryAllocator(ALLOC_BASE_ADDR >> MEM_PAGE_LOG2_SIZE, (GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR) >> MEM_PAGE_LOG2_SIZE, MEM_PAGE_SIZE, CACHE_BLOCK_SIZE); + CHECK_ERR(virtual_mem_reserve(PAGE_TABLE_BASE_ADDR >> MEM_PAGE_LOG2_SIZE, (GLOBAL_MEM_SIZE - PAGE_TABLE_BASE_ADDR) >> MEM_PAGE_LOG2_SIZE, VX_MEM_READ_WRITE), { + return err; + }); + CHECK_ERR(virtual_mem_reserve(0, USER_BASE_ADDR >> MEM_PAGE_LOG2_SIZE, VX_MEM_READ_WRITE), { + return err; + }); + CHECK_ERR(virtual_mem_reserve(STARTUP_ADDR >> MEM_PAGE_LOG2_SIZE, (STARTUP_ADDR + 0x40000) >> MEM_PAGE_LOG2_SIZE, VX_MEM_READ_WRITE), { + return err; + }); + if (virtual_mem_ == nullptr) { // virtual_mem_ does not intefere with physical mem, so no need to free space From 91a1f41f9932f18b073f03d0bf029bf0d5fc29e4 Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Thu, 11 Jul 2024 14:49:00 -0400 Subject: [PATCH 032/488] debugged virtual memory allocator --- runtime/simx/vortex.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 92ae2362d..01c84fab6 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -119,7 +119,7 @@ public: // virtual (vpn) to phycial (ppn) mapping uint64_t map_p2v(uint64_t ppn, uint32_t flags) { - DBGPRINT(" [RT:MAP_P2V] ppn: %x\n", ppn); + DBGPRINT(" [RT:MAP_P2V] ppn: %lx\n", ppn); // std::cout << std::hex << pAddr << std::endl; // return pAddr + 0xf000000; if (addr_mapping.find(ppn) != addr_mapping.end()) return addr_mapping[ppn]; @@ -128,6 +128,7 @@ public: DBGPRINT(" [RT:MAP_P2V] Not found. Allocate new page table or update a PTE.\n"); uint64_t vpn; virtual_mem_->allocate(MEM_PAGE_SIZE, &vpn); + vpn = vpn >> MEM_PAGE_LOG2_SIZE; CHECK_ERR(update_page_table(ppn, vpn, flags),); addr_mapping[ppn] = vpn; return vpn; @@ -176,8 +177,7 @@ public: // FUTURE Work: Super Page for (ppn = (*dev_pAddr >> MEM_PAGE_LOG2_SIZE); ppn < ((*dev_pAddr) >> MEM_PAGE_LOG2_SIZE) + (size >> MEM_PAGE_LOG2_SIZE) ; ppn++) { - - vpn = map_p2v(ppn << MEM_PAGE_LOG2_SIZE, flags) >> MEM_PAGE_LOG2_SIZE; + vpn = map_p2v(ppn, flags) >> MEM_PAGE_LOG2_SIZE; DBGPRINT(" [RT:PTV_MAP] Search vpn in page table:0x%lx\n", vpn); // Currently a 1-1 mapping is used, this can be changed here to support different // mapping schemes @@ -438,14 +438,11 @@ public: } // HW: virtual mem allocator has the same address range as global_mem. next step is to adjust it - virtual_mem_ = new MemoryAllocator(ALLOC_BASE_ADDR >> MEM_PAGE_LOG2_SIZE, (GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR) >> MEM_PAGE_LOG2_SIZE, MEM_PAGE_SIZE, CACHE_BLOCK_SIZE); - CHECK_ERR(virtual_mem_reserve(PAGE_TABLE_BASE_ADDR >> MEM_PAGE_LOG2_SIZE, (GLOBAL_MEM_SIZE - PAGE_TABLE_BASE_ADDR) >> MEM_PAGE_LOG2_SIZE, VX_MEM_READ_WRITE), { + virtual_mem_ = new MemoryAllocator(ALLOC_BASE_ADDR, (GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR), MEM_PAGE_SIZE, CACHE_BLOCK_SIZE); + CHECK_ERR(virtual_mem_reserve(PAGE_TABLE_BASE_ADDR, (GLOBAL_MEM_SIZE - PAGE_TABLE_BASE_ADDR), VX_MEM_READ_WRITE), { return err; }); - CHECK_ERR(virtual_mem_reserve(0, USER_BASE_ADDR >> MEM_PAGE_LOG2_SIZE, VX_MEM_READ_WRITE), { - return err; - }); - CHECK_ERR(virtual_mem_reserve(STARTUP_ADDR >> MEM_PAGE_LOG2_SIZE, (STARTUP_ADDR + 0x40000) >> MEM_PAGE_LOG2_SIZE, VX_MEM_READ_WRITE), { + CHECK_ERR(virtual_mem_reserve(STARTUP_ADDR, 0x40000, VX_MEM_READ_WRITE), { return err; }); @@ -646,13 +643,13 @@ private: RAM ram_; Processor processor_; MemoryAllocator global_mem_; - MemoryAllocator* virtual_mem_; DeviceConfig dcrs_; std::future future_; std::unordered_map> mpm_cache_; #ifdef VM_ENABLE std::unordered_map addr_mapping; // HW: key: ppn; value: vpn MemoryAllocator* page_table_mem_; + MemoryAllocator* virtual_mem_; #endif }; From a4ee8dfa7fbbeac214ac54485c1f35948c68d288 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sat, 20 Jul 2024 10:25:33 -0400 Subject: [PATCH 033/488] Update runtime/simx/vortex.cpp Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com> --- runtime/simx/vortex.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 01c84fab6..821fb057f 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -120,8 +120,6 @@ public: uint64_t map_p2v(uint64_t ppn, uint32_t flags) { DBGPRINT(" [RT:MAP_P2V] ppn: %lx\n", ppn); - // std::cout << std::hex << pAddr << std::endl; - // return pAddr + 0xf000000; if (addr_mapping.find(ppn) != addr_mapping.end()) return addr_mapping[ppn]; // If ppn to vpn mapping doesnt exist, create mapping From a23fb26a8b2559e19b83368f587505238fcbb4c2 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sat, 20 Jul 2024 10:34:24 -0400 Subject: [PATCH 034/488] Update runtime/simx/vortex.cpp Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com> --- runtime/simx/vortex.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 821fb057f..3657912da 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -413,10 +413,6 @@ public: return err; }); DBGPRINT("[RT:mem_reserve] addr: 0x%lx, size:0x%lx, size: 0x%lx\n", dev_addr, size, size); - // CHECK_ERR(this->mem_access(dev_addr, asize, flags), { - // global_mem_.release(dev_addr); - // return err; - // }); return 0; } From 0f8e5505d3cdf5ca229834fb78c9323161df76d5 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sat, 20 Jul 2024 10:36:58 -0400 Subject: [PATCH 035/488] Update runtime/simx/vortex.cpp Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com> --- runtime/simx/vortex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 3657912da..fcf9f83ec 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -120,7 +120,7 @@ public: uint64_t map_p2v(uint64_t ppn, uint32_t flags) { DBGPRINT(" [RT:MAP_P2V] ppn: %lx\n", ppn); - if (addr_mapping.find(ppn) != addr_mapping.end()) return addr_mapping[ppn]; + if (addr_mapping.contains(ppn)) return addr_mapping[ppn]; // If ppn to vpn mapping doesnt exist, create mapping DBGPRINT(" [RT:MAP_P2V] Not found. Allocate new page table or update a PTE.\n"); From c3e657f201a641f02879e329617b933c2057016b Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sat, 20 Jul 2024 10:39:40 -0400 Subject: [PATCH 036/488] Update runtime/simx/vortex.cpp Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com> --- runtime/simx/vortex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index fcf9f83ec..1c22e3d36 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -116,7 +116,7 @@ public: #ifdef VM_ENABLE - // virtual (vpn) to phycial (ppn) mapping + // physical (ppn) to virtual (vpn) mapping uint64_t map_p2v(uint64_t ppn, uint32_t flags) { DBGPRINT(" [RT:MAP_P2V] ppn: %lx\n", ppn); From 90b4a16c9b70762f87146eb2c71b9a21341d6889 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sat, 20 Jul 2024 11:20:27 -0400 Subject: [PATCH 037/488] Apply suggestions from code review Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com> --- runtime/simx/vortex.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 1c22e3d36..6a8457e99 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -408,7 +408,6 @@ public: // reserve IO space, startup space, and local mem area int virtual_mem_reserve(uint64_t dev_addr, uint64_t size, int flags) { - // uint64_t asize = aligned_size(size, MEM_PAGE_SIZE); CHECK_ERR(virtual_mem_->reserve(dev_addr, size), { return err; }); From de66a1b86131cd74dd4303a6da4df08eb6177831 Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Mon, 29 Jul 2024 14:35:11 -0400 Subject: [PATCH 038/488] save work before pull --- .travis.yml | 18 +++++++++++++++++- ci/regression.sh.in | 2 ++ runtime/include/vortex.h | 1 + runtime/simx/vortex.cpp | 11 ++++++++++- 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index d43abb153..57098c8f0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -99,4 +99,20 @@ jobs: env: XLEN=32 script: - ./ci/travis_run.py ./ci/regression.sh --debug - - ./ci/travis_run.py ./ci/regression.sh --stress \ No newline at end of file + - ./ci/travis_run.py ./ci/regression.sh --stress + + - stage: test + name: virtual_memory + env: XLEN=32 + env: VM_DISABLE=1 + script: + - ./ci/travis_run.py ./ci/regression.sh --regression + - ./ci/travis_run.py ./ci/regression.sh --opencl + + - stage: test + name: virtual_memory + env: XLEN=64 + env: VM_DISABLE=1 + script: + - ./ci/travis_run.py ./ci/regression.sh --regression + - ./ci/travis_run.py ./ci/regression.sh --opencl \ No newline at end of file diff --git a/ci/regression.sh.in b/ci/regression.sh.in index ce3f9bb43..dffd91502 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -19,6 +19,8 @@ set -e # clear blackbox cache rm -f blackbox.*.cache +# HW: add a test "VM Test" to make sure VM feature is enabled + XLEN=${XLEN:=@XLEN@} echo "Vortex Regression Test: XLEN=$XLEN" diff --git a/runtime/include/vortex.h b/runtime/include/vortex.h index c9dd6ec36..6f57c7de8 100644 --- a/runtime/include/vortex.h +++ b/runtime/include/vortex.h @@ -61,6 +61,7 @@ typedef void* vx_buffer_h; #define VX_MEM_READ 0x1 #define VX_MEM_WRITE 0x2 #define VX_MEM_READ_WRITE 0x3 +#define VX_MEM_PIN_MEMORY 0x4 // open the device and connect to it int vx_dev_open(vx_device_h* hdevice); diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 01c84fab6..5e54576a5 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -199,6 +199,7 @@ public: uint64_t addr = 0; DBGPRINT("[RT:mem_alloc] size: 0x%lx, asize, 0x%lx,flag : 0x%d\n", size, asize, flags); + // HW: when vm is supported this global_mem_ should be virtual memory allocator CHECK_ERR(global_mem_.allocate(asize, &addr), { return err; }); @@ -231,7 +232,7 @@ public: int mem_free(uint64_t dev_addr) { #ifdef VM_ENABLE - uint64_t paddr= page_table_walk(dev_addr); + uint64_t paddr = page_table_walk(dev_addr); return global_mem_.release(paddr); #else return global_mem_.release(dev_addr); @@ -264,6 +265,14 @@ public: return -1; #ifdef VM_ENABLE uint64_t pAddr = page_table_walk(dest_addr); + // uint64_t pAddr; + // try { + // pAddr = page_table_walk(dest_addr); + // } catch ( Page_Fault_Exception ) { + // // HW: place holder + // // should be virt_to_phy_map here + // phy_to_virt_map(0, dest_addr, 0); + // } DBGPRINT(" [RT:upload] Upload data to vAddr = 0x%lx (pAddr=0x%lx)\n", dest_addr, pAddr); dest_addr = pAddr; //Overwirte #endif From e20a610e67d380adc6766b583481c413232ef9ed Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Wed, 5 Jun 2024 11:19:06 -0400 Subject: [PATCH 039/488] Update README.md --- third_party/ramulator | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/ramulator b/third_party/ramulator index e62c84a6f..214f63584 160000 --- a/third_party/ramulator +++ b/third_party/ramulator @@ -1 +1 @@ -Subproject commit e62c84a6f0e06566ba6e182d308434b4532068a5 +Subproject commit 214f635845214adf030367939655d172ef0fed5f From ae312f902213a760c5e64066c652378e24fab824 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Wed, 5 Jun 2024 11:23:47 -0400 Subject: [PATCH 040/488] Update README.md --- README.md | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 97484ff57..27cef6b6e 100644 --- a/README.md +++ b/README.md @@ -47,26 +47,31 @@ More detailed build instructions can be found [here](docs/install_vortex.md). - [Yosys](https://github.com/YosysHQ/yosys) - [Sv2v](https://github.com/zachjs/sv2v) ### Install development tools - $ sudo apt-get install build-essential - $ sudo apt-get install binutils - $ sudo apt-get install python - $ sudo apt-get install uuid-dev - $ sudo apt-get install git +``` + sudo apt-get install build-essential + sudo apt-get install binutils + sudo apt-get install python + sudo apt-get install uuid-dev + sudo apt-get install git +``` ### Install Vortex codebase - $ git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git - $ cd Vortex +``` + git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git + cd Vortex +``` ### Configure your build folder $ mkdir build $ cd build $ ../configure --xlen=32 --tooldir=$HOME/tools ### Install prebuilt toolchain - $ ./ci/toolchain_install.sh --all + # We will use the precomipled tools in volvo toolchanin directory ### set environment variables # should always run before using the toolchain! - $ source ./ci/toolchain_env.sh + source ./ci/toolchain_env.sh ### Building Vortex - $ make -s + make -s ### Quick demo running vecadd OpenCL kernel on 2 cores +<<<<<<< HEAD $ ./ci/blackbox.sh --cores=2 --app=vecadd ### Common Developer Tips From 768c9666817b6ef012f87674543a6a41abd3b391 Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Wed, 5 Jun 2024 16:11:51 -0400 Subject: [PATCH 041/488] expand MemoryUnit class defs and add some tlb-related functions --- hw/rtl/VX_config.vh | 16 ++++++ sim/common/mem.cpp | 41 +++++++++++++- sim/common/mem.h | 131 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 181 insertions(+), 7 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 804715aad..740748b76 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -14,6 +14,22 @@ `ifndef VX_CONFIG_VH `define VX_CONFIG_VH +`ifndef VM_ADDR_MODE +`define VM_ADDR_MODE SV32 +`endif + +`ifndef PTE_SIZE +`define PTE_SIZE 8 +`endif + +`ifndef TLB_SIZE +`define TLB_SIZE 32 +`endif + +`ifndef SUPER_PAGING +`define SUPER_PAGING false +`endif + `ifndef MIN `define MIN(x, y) (((x) < (y)) ? (x) : (y)) `endif diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index ed4bcc522..92a983410 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -17,9 +17,22 @@ #include #include #include "util.h" +#include +#include using namespace vortex; +uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) +{ + return (addr >> s_idx) & ((1 << (e_idx - s_idx + 1)) - 1); +} + +bool bit(uint64_t addr, uint8_t idx) +{ + return (addr) & (1 << idx); +} + + RamMemDevice::RamMemDevice(const char *filename, uint32_t wordSize) : wordSize_(wordSize) { std::ifstream input(filename); @@ -158,12 +171,12 @@ uint64_t MemoryUnit::toPhyAddr(uint64_t addr, uint32_t flagMask) { return pAddr; } -void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, bool sup) { +void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, bool sup, ACCESS_TYPE type) { uint64_t pAddr = this->toPhyAddr(addr, sup ? 8 : 1); return decoder_.read(data, pAddr, size); } -void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, bool sup) { +void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, bool sup, ACCESS_TYPE type) { uint64_t pAddr = this->toPhyAddr(addr, sup ? 16 : 1); decoder_.write(data, pAddr, size); amo_reservation_.valid = false; @@ -179,10 +192,34 @@ bool MemoryUnit::amo_check(uint64_t addr) { uint64_t pAddr = this->toPhyAddr(addr, 1); return amo_reservation_.valid && (amo_reservation_.addr == pAddr); } + void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags) { tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags); } +void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits) { + // HW: evict TLB by Most Recently Used + if (tlb_.size() == TLB_SIZE - 1) { + for (auto& entry : tlb_) + { + entry.second.mru_bit = false; + } + + } else if (tlb_.size() == TLB_SIZE) { + uint64_t del; + for (auto entry : tlb_) { + if (!entry.second.mru_bit) + { + del = entry.first; + break; + } + } + tlb_.erase(tlb_.find(del)); + TLB_EVICT++; + } + tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags, size_bits); +} + void MemoryUnit::tlbRm(uint64_t va) { if (tlb_.find(va / pageSize_) != tlb_.end()) tlb_.erase(tlb_.find(va / pageSize_)); diff --git a/sim/common/mem.h b/sim/common/mem.h index 1f5196113..76e2f2ae5 100644 --- a/sim/common/mem.h +++ b/sim/common/mem.h @@ -18,8 +18,22 @@ #include #include #include +#include +#include namespace vortex { + +enum VA_MODE { + BARE, + SV32 +}; + +enum ACCESS_TYPE { + LOAD, + STORE, + FETCH +}; + struct BadAddress {}; struct OutOfRange {}; @@ -73,31 +87,39 @@ public: class MemoryUnit { public: +// HW: Expand PageFault struct to contain access_type info for debug purposes struct PageFault { PageFault(uint64_t a, bool nf) : faultAddr(a) , notFound(nf) + , access_type(ACCESS_TYPE::LOAD) {} - uint64_t faultAddr; - bool notFound; + uint64_t faultAddr; + bool notFound; + ACCESS_TYPE access_type; }; MemoryUnit(uint64_t pageSize = 0); void attach(MemDevice &m, uint64_t start, uint64_t end); - void read(void* data, uint64_t addr, uint64_t size, bool sup); - void write(const void* data, uint64_t addr, uint64_t size, bool sup); + void read(void* data, uint64_t addr, uint64_t size, bool sup, ACCESS_TYPE type = ACCESS_TYPE::LOAD); + void write(const void* data, uint64_t addr, uint64_t size, bool sup, ACCESS_TYPE type = ACCESS_TYPE::LOAD); void amo_reserve(uint64_t addr); bool amo_check(uint64_t addr); void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags); + void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits); + void tlbRm(uint64_t vaddr); void tlbFlush() { tlb_.clear(); } + uint32_t get_satp(); + void set_satp(uint32_t satp); + private: struct amo_reservation_t { @@ -137,11 +159,41 @@ private: TLBEntry(uint32_t pfn, uint32_t flags) : pfn(pfn) , flags(flags) - {} + , mru_bit(true) + {}; + TLBEntry(uint32_t pfn, uint32_t flags, uint64_t size_bits) + : pfn(pfn) + , flags(flags) + , mru_bit(true) + , size_bits (size_bits) + { + d = bit(7); + a = bit(6); + g = bit(5); + u = bit(4); + x = bit(3); + w = bit(2); + r = bit(1); + v = bit(0); + } + bool bit(uint8_t idx) + { + return (flags) & (1 << idx); + } + uint32_t pfn; + bool d, a, g, u, x, w, r, v; + bool mru_bit; + uint64_t size_bits; uint32_t flags; }; + std::pair tlbLookup(uint64_t vAddr, ACCESS_TYPE type, uint64_t* size_bits); + + uint64_t vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type); + + std::pair page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits); + TLBEntry tlbLookup(uint64_t vAddr, uint32_t flagMask); uint64_t toPhyAddr(uint64_t vAddr, uint32_t flagMask); @@ -151,6 +203,13 @@ private: ADecoder decoder_; bool enableVM_; + uint32_t satp; + VA_MODE mode; + uint32_t ptbr; + + std::unordered_set unique_translations; + uint64_t TLB_HIT, TLB_MISS, TLB_EVICT, PTW, PERF_UNIQUE_PTW; + amo_reservation_t amo_reservation_; }; @@ -219,4 +278,66 @@ private: bool check_acl_; }; +class PTE_SV32_t +{ + + private: + uint64_t address; + uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) + { + return (addr >> s_idx) & ((1 << (e_idx - s_idx + 1)) - 1); + } + bool bit(uint8_t idx) + { + return (address) & (1 << idx); + } + + public: + uint64_t ppn[2]; + uint32_t rsw; + uint32_t flags; + bool d, a, g, u, x, w, r, v; + PTE_SV32_t(uint64_t address) : address(address) + { + flags = bits(address,0,7); + rsw = bits(address,8,9); + ppn[0] = bits(address,10,19); + ppn[1] = bits(address,20,31); + + d = bit(7); + a = bit(6); + g = bit(5); + u = bit(4); + x = bit(3); + w = bit(2); + r = bit(1); + v = bit(0); + } +}; + +class vAddr_SV32_t +{ + + private: + uint64_t address; + uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) + { + return (addr >> s_idx) & ((1 << (e_idx - s_idx + 1)) - 1); + } + bool bit(uint64_t addr, uint8_t idx) + { + return (addr) & (1 << idx); + } + + public: + uint64_t vpn[2]; + uint64_t pgoff; + vAddr_SV32_t(uint64_t address) : address(address) + { + vpn[0] = bits(address,12,21); + vpn[1] = bits(address,22,31); + pgoff = bits(address,0,11); + } +}; + } // namespace vortex From da1f4baa5ddcce241e6a9f275faa43d9c77d9d3a Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Fri, 7 Jun 2024 10:38:41 -0400 Subject: [PATCH 042/488] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 27cef6b6e..cbe7b13cd 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ More detailed build instructions can be found [here](docs/install_vortex.md). ``` ### Install Vortex codebase ``` - git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git + git clone --depth=1 --recursive git@github.com:gthparch/vortex_vm.git cd Vortex ``` ### Configure your build folder From 2662b6bcab42e1efa05450fd68a70f28b22271f8 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Fri, 7 Jun 2024 10:52:43 -0400 Subject: [PATCH 043/488] Update README.md --- README.md | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index cbe7b13cd..cd517b2b5 100644 --- a/README.md +++ b/README.md @@ -57,12 +57,23 @@ More detailed build instructions can be found [here](docs/install_vortex.md). ### Install Vortex codebase ``` git clone --depth=1 --recursive git@github.com:gthparch/vortex_vm.git - cd Vortex + cd vortex_vm ``` + ### Configure your build folder - $ mkdir build - $ cd build - $ ../configure --xlen=32 --tooldir=$HOME/tools + # By default, the toolchain default install location is the /opt folder and can be overridden by setting --tooldir. + # This is the example for volvo server + mkdir build + mkdir out + export OUT_DIR=`pwd`/out + cd build + ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-05-22 --prefix=$OUT_DIR +### Ignore the commit for ramulator when it is compiled + # Please add ignore = dirty entry on .gitmodules + [submodule "third_party/ramulator"] + path = third_party/ramulator + url = https://github.com/CMU-SAFARI/ramulator.git + ignore = dirty ### Install prebuilt toolchain # We will use the precomipled tools in volvo toolchanin directory ### set environment variables From 43a90071e111b440f870ae4319cf34aaffb9488f Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Tue, 11 Jun 2024 23:06:48 -0400 Subject: [PATCH 044/488] Merge Austin's code (Preliminary) --- hw/rtl/VX_config.vh | 33 ++-- runtime/simx/vortex.cpp | 372 ++++++++++++++++++++++++++++++++++++-- sim/common/mem.cpp | 250 ++++++++++++++++++++++++- sim/common/mem.h | 68 +++++-- sim/simx/cluster.cpp | 8 + sim/simx/cluster.h | 4 + sim/simx/core.cpp | 7 + sim/simx/core.h | 4 + sim/simx/emulator.cpp | 82 ++++++++- sim/simx/emulator.h | 6 + sim/simx/processor.cpp | 22 ++- sim/simx/processor.h | 10 + sim/simx/processor_impl.h | 5 + sim/simx/socket.cpp | 8 + sim/simx/socket.h | 4 + 15 files changed, 830 insertions(+), 53 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 740748b76..a73de1d10 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -14,21 +14,28 @@ `ifndef VX_CONFIG_VH `define VX_CONFIG_VH -`ifndef VM_ADDR_MODE -`define VM_ADDR_MODE SV32 +`ifndef VM_DISABLE +`define VM_ENABLE +`endif +`ifdef VM_ENABLE + `ifndef VM_ADDR_MODE + `define VM_ADDR_MODE SV32 + `endif + + `ifndef PTE_SIZE + `define PTE_SIZE 8 + `endif + + `ifndef TLB_SIZE + `define TLB_SIZE 32 + `endif + + `ifndef SUPER_PAGING + `define SUPER_PAGING 0 + `endif + `endif -`ifndef PTE_SIZE -`define PTE_SIZE 8 -`endif - -`ifndef TLB_SIZE -`define TLB_SIZE 32 -`endif - -`ifndef SUPER_PAGING -`define SUPER_PAGING false -`endif `ifndef MIN `define MIN(x, y) (((x) < (y)) ? (x) : (y)) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 89856f3a0..6e5cafc38 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -29,6 +29,38 @@ using namespace vortex; +#ifdef VM_ENABLE + +#ifndef NDEBUG +#define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0) +#else +#define DBGPRINT(format, ...) ((void)0) +#endif + +#define CHECK_ERR(_expr, _cleanup) \ + do { \ + auto err = _expr; \ + if (err == 0) \ + break; \ + printf("[VXDRV] Error: '%s' returned %d!\n", #_expr, (int)err); \ + _cleanup \ + } while (false) + +/////////////////////////////////////////////////////////////////////////////// +// +#include +#include + +uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) +{ + return (addr >> s_idx) & ((1 << (e_idx - s_idx + 1)) - 1); +} +bool bit(uint64_t addr, uint8_t idx) +{ + return (addr) & (1 << idx); +} +#endif + class vx_device { public: vx_device() @@ -42,6 +74,10 @@ public: { // attach memory module processor_.attach_ram(&ram_); +#ifdef VM_ENABLE + //Set + processor_.set_processor_satp(VM_ADDR_MODE); +#endif } ~vx_device() { @@ -90,18 +126,75 @@ public: return 0; } - int mem_alloc(uint64_t size, int flags, uint64_t* dev_addr) { - uint64_t addr; - CHECK_ERR(global_mem_.allocate(size, &addr), { - return err; - }); - CHECK_ERR(this->mem_access(addr, size, flags), { - global_mem_.release(addr); - return err; - }); - *dev_addr = addr; - return 0; - } +#ifdef VM_ENABLE + // VM SUPPORT + uint64_t map_local_mem(uint64_t size, uint64_t* dev_maddr) + { + bool is_pc = false; + std::cout << "startup addr: " << std::hex << STARTUP_ADDR << std::endl; + std::cout << "bit mode: " << std::dec << XLEN << std::endl; + if (*dev_maddr == STARTUP_ADDR || *dev_maddr == 0x7FFFF000) { + is_pc = true; + } + + if (get_mode() == VA_MODE::BARE) + return 0; + + uint64_t ppn = *dev_maddr >> 12; + uint64_t init_pAddr = *dev_maddr; + uint64_t init_vAddr = *dev_maddr + 0xf0000000; // vpn will change, but we want to return the vpn of the beginning of the virtual allocation + init_vAddr = (init_vAddr >> 12) << 12; + uint64_t vpn; + + //dev_maddr can be of size greater than a page, but we have to map and update + //page tables on a page table granularity. So divide the allocation into pages. + for (ppn = (*dev_maddr) >> 12; ppn < ((*dev_maddr) >> 12) + (size/RAM_PAGE_SIZE) + 1; ppn++) + { + //Currently a 1-1 mapping is used, this can be changed here to support different + //mapping schemes + vpn = is_pc ? ppn : ppn + 0xf0000; + //vpn = ppn; + + //If ppn to vpn mapping doesnt exist. + if (addr_mapping.find(vpn) == addr_mapping.end()) + { + //Create mapping. + update_page_table(ppn, vpn); + addr_mapping[vpn] = ppn; + } + } + + std::cout << "mapped virtual addr: " << init_vAddr << " to physical addr: " << init_pAddr << std::endl; + uint64_t size_bits; + if (is_pc) { + std::cout << "not returning virtual address because it is PC or stack" << std::endl; + std::pair ptw_access = page_table_walk(init_vAddr - 0xf0000000, &size_bits); + return 0; + } else { + std::pair ptw_access = page_table_walk(init_vAddr, &size_bits); + } + *dev_maddr = init_vAddr; // commit vpn to be returned to host + return 0; + } +#endif + + int mem_alloc(uint64_t size, int flags, uint64_t* dev_addr) { + uint64_t addr; + CHECK_ERR(global_mem_.allocate(size, &addr), { + return err; + }); + CHECK_ERR(this->mem_access(addr, size, flags), { + global_mem_.release(addr); + return err; + }); +#ifdef VM_ENABLE + // VM address translation + std::cout << "physical addr: " << std::hex << *dev_addr << std::endl; + map_local_mem(size, dev_addr); +#endif + *dev_addr = addr; + return 0; + } int mem_reserve(uint64_t dev_addr, uint64_t size, int flags) { CHECK_ERR(global_mem_.reserve(dev_addr, size), { @@ -140,6 +233,18 @@ public: if (dest_addr + asize > GLOBAL_MEM_SIZE) return -1; +#ifdef VM_ENABLE + uint64_t pAddr = dest_addr; // map_local_mem overwrites the provided dest_addr, so store away physical destination address + if (dest_addr >= STARTUP_ADDR) { + map_local_mem(asize,&dest_addr); + } else if (dest_addr >= 0x7fff0000) + { + map_local_mem(asize,&dest_addr); + } + std::cout << "uploading to 0x" << pAddr << "(VA)" << std::endl; + dest_addr = pAddr; +#endif + ram_.enable_acl(false); ram_.write((const uint8_t*)src, dest_addr, size); ram_.enable_acl(true); @@ -235,6 +340,244 @@ public: return 0; } +#ifdef VM_ENABLE + /* VM Management */ + void set_processor_satp(VA_MODE mode) + { + uint32_t satp; + if (mode == VA_MODE::BARE) + satp = 0; + else if (mode == VA_MODE::SV32) + { + satp = (alloc_page_table() >> 10) | 0x80000000; + // satp = 0xFEBFE000 ; + } + processor_.set_satp(satp); + } + + uint32_t get_ptbr() + { + // return processor_.get_satp(); + return processor_.get_satp() & 0x003fffff; + } + + VA_MODE get_mode() + { + return processor_.get_satp() & 0x80000000 ? VA_MODE::SV32 : VA_MODE::BARE; + // return VA_MODE::SV32; + } + + void update_page_table(uint64_t pAddr, uint64_t vAddr) { + std::cout << "mapping vpn: " << vAddr << " to ppn:" << pAddr << std::endl; + //Updating page table with the following mapping of (vAddr) to (pAddr). + uint64_t ppn_1, pte_addr, pte_bytes; + uint64_t vpn_1 = bits(vAddr, 10, 19); + uint64_t vpn_0 = bits(vAddr, 0, 9); + + //Read first level PTE. + pte_addr = (get_ptbr() << 12) + (vpn_1 * PTE_SIZE); + pte_bytes = read_pte(pte_addr); + std::cout << "[PTE] addr 0x" << std::hex << pte_addr << ", PTE 0x" << std::hex << pte_bytes << std::endl; + + + if ( bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) + { + //If valid bit set, proceed to next level using new ppn form PTE. + std::cout << "PTE valid, continuing the walk..." << std::endl; + ppn_1 = (pte_bytes >> 10); + } + else + { + //If valid bit not set, allocate a second level page table + // in device memory and store ppn in PTE. Set rwx = 000 in PTE + //to indicate this is a pointer to the next level of the page table. + ppn_1 = (alloc_page_table() >> 12); + pte_bytes = ( (ppn_1 << 10) | 0b0000000001) ; + write_pte(pte_addr, pte_bytes); + } + + //Read second level PTE. + pte_addr = (ppn_1 << 12) + (vpn_0 * PTE_SIZE); + pte_bytes = read_pte(pte_addr); + std::cout << "got pte: " << std::hex << pte_bytes << std::endl; + + if ( bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) + { + std::cout << "ERROR, shouldn't be here" << std::endl; + //If valid bit is set, then the page is already allocated. + //Should not reach this point, a sanity check. + } + else + { + //If valid bit not set, write ppn of pAddr in PTE. Set rwx = 111 in PTE + //to indicate this is a leaf PTE and has the stated permissions. + pte_bytes = ( (pAddr << 10) | 0b0000001111) ; + write_pte(pte_addr, pte_bytes); + + //If super paging is enabled. + if (SUPER_PAGING) + { + //Check if this second level Page Table can be promoted to a super page. Brute force + //method is used to iterate over all PTE entries of the table and check if they have + //their valid bit set. + bool superpage = true; + for(int i = 0; i < 1024; i++) + { + pte_addr = (ppn_1 << 12) + (i * PTE_SIZE); + pte_bytes = read_pte(pte_addr); + + if (!bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) + { + superpage = false; + break; + } + } + if (superpage) + { + //This can be promoted to a super page. Set root PTE to the first PTE of the + //second level. This is because the first PTE of the second level already has the + //correct PPN1, PPN0 set to zero and correct access bits. + pte_addr = (ppn_1 << 12); + pte_bytes = read_pte(pte_addr); + pte_addr = (get_ptbr() << 12) + (vpn_1 * PTE_SIZE); + write_pte(pte_addr, pte_bytes); + } + } + } + } + + std::pair page_table_walk(uint64_t vAddr_bits, uint64_t* size_bits) + { + uint64_t LEVELS = 2; + vAddr_SV32_t vAddr(vAddr_bits); + uint64_t pte_bytes; + + std::cout << "PTW on vAddr: 0x" << std::hex << vAddr_bits << std::endl; + + //Get base page table. + uint64_t a = this->processor_.get_satp() << 12; + std::cout << "PTW SATP: 0x" << a << std::endl; + int i = LEVELS - 1; + + while(true) + { + + //Read PTE. + std::cout << "reading PTE from RAM addr 0x" << std::hex << (a+vAddr.vpn[i]*PTE_SIZE) << std::endl; + ram_.read(&pte_bytes, a+vAddr.vpn[i]*PTE_SIZE, sizeof(uint64_t)); + //pte_bytes &= 0x00000000FFFFFFFF; + PTE_SV32_t pte(pte_bytes); + std::cout << "got pte: " << std::hex << pte_bytes << std::endl; + + //Check if it has invalid flag bits. + if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) + { + std::cout << "Error on vAddr 0x" << std::hex << vAddr_bits << std::endl; + throw Page_Fault_Exception("Page Fault : Attempted to access invalid entry. Entry: 0x"); + } + + if ( (pte.r == 0) & (pte.w == 0) & (pte.x == 0)) + { + //Not a leaf node as rwx == 000 + i--; + if (i < 0) + { + throw Page_Fault_Exception("Page Fault : No leaf node found."); + } + else + { + //Continue on to next level. + a = (pte_bytes >> 10 ) << 12; + std::cout << "next a: " << a << std::endl; + } + } + else + { + //Leaf node found, finished walking. + a = (pte_bytes >> 10 ) << 12; + break; + } + } + + PTE_SV32_t pte(pte_bytes); + + //Check RWX permissions according to access type. + if (pte.r == 0) + { + throw Page_Fault_Exception("Page Fault : TYPE LOAD, Incorrect permissions."); + } + + uint64_t pfn; + if (i > 0) + { + //It is a super page. + if (pte.ppn[0] != 0) + { + //Misss aligned super page. + throw Page_Fault_Exception("Page Fault : Miss Aligned Super Page."); + + } + else + { + //Valid super page. + pfn = pte.ppn[1]; + *size_bits = 22; + } + } + else + { + //Regular page. + *size_bits = 12; + pfn = a >> 12; + } + return std::make_pair(pfn, pte_bytes & 0xff); + } + + uint64_t alloc_page_table() { + uint64_t addr; + global_mem_.allocate(RAM_PAGE_SIZE, &addr); + std::cout << "address of page table 0x" << std::hex << addr << std::endl; + init_page_table(addr); + return addr; + } + + + void init_page_table(uint64_t addr) { + uint64_t asize = aligned_size(RAM_PAGE_SIZE, CACHE_BLOCK_SIZE); + uint8_t *src = new uint8_t[RAM_PAGE_SIZE]; + for (uint64_t i = 0; i < RAM_PAGE_SIZE; ++i) { + src[i] = (0x00000000 >> ((i & 0x3) * 8)) & 0xff; + } + ram_.write((const uint8_t*)src, addr, asize); + } + + void read_page_table(uint64_t addr) { + uint8_t *dest = new uint8_t[RAM_PAGE_SIZE]; + download(dest, addr, RAM_PAGE_SIZE); + printf("VXDRV: download %d bytes from 0x%x\n", RAM_PAGE_SIZE, addr); + for (int i = 0; i < RAM_PAGE_SIZE; i += 4) { + printf("mem-read: 0x%x -> 0x%x\n", addr + i, *(uint64_t*)((uint8_t*)dest + i)); + } + } + + void write_pte(uint64_t addr, uint64_t value = 0xbaadf00d) { + std::cout << "writing pte " << std::hex << value << " to pAddr: " << std::hex << addr << std::endl; + uint8_t *src = new uint8_t[PTE_SIZE]; + for (uint64_t i = 0; i < PTE_SIZE; ++i) { + src[i] = (value >> ((i & 0x3) * 8)) & 0xff; + } + //std::cout << "writing PTE to RAM addr 0x" << std::hex << addr << std::endl; + ram_.write((const uint8_t*)src, addr, PTE_SIZE); + } + + uint64_t read_pte(uint64_t addr) { + uint8_t *dest = new uint8_t[PTE_SIZE]; + std::cout << "[read_pte] reading PTE from RAM addr 0x" << std::hex << addr << std::endl; + ram_.read((uint8_t*)dest, addr, PTE_SIZE); + return *(uint64_t*)((uint8_t*)dest); + } +#endif // JAEWON + private: Arch arch_; RAM ram_; @@ -243,6 +586,9 @@ private: DeviceConfig dcrs_; std::future future_; std::unordered_map> mpm_cache_; +#ifdef VM_ENABLE + std::unordered_map addr_mapping; +#endif }; -#include \ No newline at end of file +#include diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index 92a983410..b55d0de9a 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -137,16 +137,90 @@ void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) MemoryUnit::MemoryUnit(uint64_t pageSize) : pageSize_(pageSize) , enableVM_(pageSize != 0) - , amo_reservation_({0x0, false}) { - if (pageSize != 0) { - tlb_[0] = TLBEntry(0, 077); + , amo_reservation_({0x0, false}) +#ifdef VM_ENABLE + , TLB_HIT(0) + , TLB_MISS(0) + , TLB_EVICT(0) + , PTW(0) {}; +#else + { + if (pageSize != 0) + { + tlb_[0] = TLBEntry(0, 077); + } } -} +#endif void MemoryUnit::attach(MemDevice &m, uint64_t start, uint64_t end) { decoder_.map(start, end, m); } +#ifdef VM_ENABLE +std::pair MemoryUnit::tlbLookup(uint64_t vAddr, ACCESS_TYPE type, uint64_t* size_bits) { + + //Find entry while accounting for different sizes. + for (auto entry : tlb_) + { + if(entry.first == vAddr >> entry.second.size_bits) + { + *size_bits = entry.second.size_bits; + vAddr = vAddr >> (*size_bits); + } + } + + + auto iter = tlb_.find(vAddr); + if (iter != tlb_.end()) { + TLBEntry e = iter->second; + + //Set mru bit if it is a hit. + iter->second.mru_bit = true; + + //If at full capacity and no other unset bits. + // Clear all bits except the one we just looked up. + if (tlb_.size() == TLB_SIZE) + { + // bool no_cleared = true; + // for (auto& entry : tlb_) + // { + // no_cleared = no_cleared & entry.second.mru_bit; + // } + + // if(no_cleared) + // { + for (auto& entry : tlb_) + { + entry.second.mru_bit = false; + } + iter->second.mru_bit = true; + //} + + } + //Check access permissions. + if ( (type == ACCESS_TYPE::FETCH) & ((e.r == 0) | (e.x == 0)) ) + { + throw Page_Fault_Exception("Page Fault : Incorrect permissions."); + } + else if ( (type == ACCESS_TYPE::LOAD) & (e.r == 0) ) + { + throw Page_Fault_Exception("Page Fault : Incorrect permissions."); + } + else if ( (type == ACCESS_TYPE::STORE) & (e.w == 0) ) + { + throw Page_Fault_Exception("Page Fault : Incorrect permissions."); + } + else + { + //TLB Hit + return std::make_pair(true, iter->second.pfn); + } + } else { + //TLB Miss + return std::make_pair(false, 0); + } +} +#endif //JAEWON MemoryUnit::TLBEntry MemoryUnit::tlbLookup(uint64_t vAddr, uint32_t flagMask) { auto iter = tlb_.find(vAddr / pageSize_); if (iter != tlb_.end()) { @@ -171,16 +245,40 @@ uint64_t MemoryUnit::toPhyAddr(uint64_t addr, uint32_t flagMask) { return pAddr; } -void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, bool sup, ACCESS_TYPE type) { +#ifdef VM_ENABLE +void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, ACCESS_TYPE type) { + uint64_t pAddr; + if (this->mode == VA_MODE::BARE) { + pAddr = addr; + } else { + pAddr = vAddr_to_pAddr(addr, type); + } + return decoder_.read(data, pAddr, size); +} +#else +void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, bool sup) { uint64_t pAddr = this->toPhyAddr(addr, sup ? 8 : 1); return decoder_.read(data, pAddr, size); } - -void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, bool sup, ACCESS_TYPE type) { +#endif +#ifdef VM_ENABLE +void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, ACCESS_TYPE type) { + uint64_t pAddr; + if ( (this->mode == VA_MODE::BARE) | (addr >= IO_BASE_ADDR) ) { + pAddr = addr; + } else { + pAddr = vAddr_to_pAddr(addr, type); + } + decoder_.write(data, pAddr, size); + amo_reservation_.valid = false; +} +#else +void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, bool sup) { uint64_t pAddr = this->toPhyAddr(addr, sup ? 16 : 1); decoder_.write(data, pAddr, size); amo_reservation_.valid = false; } +#endif void MemoryUnit::amo_reserve(uint64_t addr) { uint64_t pAddr = this->toPhyAddr(addr, 1); @@ -193,9 +291,8 @@ bool MemoryUnit::amo_check(uint64_t addr) { return amo_reservation_.valid && (amo_reservation_.addr == pAddr); } -void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags) { - tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags); -} + +#ifdef VM_ENABLE void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits) { // HW: evict TLB by Most Recently Used @@ -219,6 +316,12 @@ void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t s } tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags, size_bits); } +#else + +void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags) { + tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags); +} +#endif void MemoryUnit::tlbRm(uint64_t va) { if (tlb_.find(va / pageSize_) != tlb_.end()) @@ -472,3 +575,130 @@ void RAM::loadHexImage(const char* filename) { --size; } } + +#ifdef VM_ENABLE +uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type) +{ + uint64_t pfn; + uint64_t size_bits; + + //First lookup TLB. + std::pair tlb_access = tlbLookup(vAddr, type, &size_bits); + if (tlb_access.first) + { + pfn = tlb_access.second; + TLB_HIT++; + } + else //Else walk the PT. + { + std::pair ptw_access = page_table_walk(vAddr, type, &size_bits); + tlbAdd(vAddr>>size_bits, ptw_access.first, ptw_access.second,size_bits); + pfn = ptw_access.first; TLB_MISS++; PTW++; + unique_translations.insert(vAddr>>size_bits); + PERF_UNIQUE_PTW = unique_translations.size(); + } + + //Construct final address using pfn and offset. + std::cout << "[MemoryUnit] translated vAddr: 0x" << std::hex << vAddr << " to pAddr: 0x" << std::hex << ((pfn << size_bits) + (vAddr & ((1 << size_bits) - 1))) << std::endl; + return (pfn << size_bits) + (vAddr & ((1 << size_bits) - 1)); +} + +std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits) +{ + uint64_t LEVELS = 2; + vAddr_SV32_t vAddr(vAddr_bits); + uint64_t pte_bytes; + + //Get base page table. + uint64_t a = this->ptbr << 12; + int i = LEVELS - 1; + + while(true) + { + + //Read PTE. + decoder_.read(&pte_bytes, a+vAddr.vpn[i]*PTE_SIZE, sizeof(uint64_t)); + PTE_SV32_t pte(pte_bytes); + + //Check if it has invalid flag bits. + if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) + { + throw Page_Fault_Exception("Page Fault : Attempted to access invalid entry."); + } + + if ( (pte.r == 0) & (pte.w == 0) & (pte.x == 0)) + { + //Not a leaf node as rwx == 000 + i--; + if (i < 0) + { + throw Page_Fault_Exception("Page Fault : No leaf node found."); + } + else + { + //Continue on to next level. + a = (pte_bytes >> 10 ) << 12; + } + } + else + { + //Leaf node found, finished walking. + a = (pte_bytes >> 10 ) << 12; + break; + } + } + + PTE_SV32_t pte(pte_bytes); + + //Check RWX permissions according to access type. + if ( (type == ACCESS_TYPE::FETCH) & ((pte.r == 0) | (pte.x == 0)) ) + { + throw Page_Fault_Exception("Page Fault : TYPE FETCH, Incorrect permissions."); + } + else if ( (type == ACCESS_TYPE::LOAD) & (pte.r == 0) ) + { + throw Page_Fault_Exception("Page Fault : TYPE LOAD, Incorrect permissions."); + } + else if ( (type == ACCESS_TYPE::STORE) & (pte.w == 0) ) + { + throw Page_Fault_Exception("Page Fault : TYPE STORE, Incorrect permissions."); + } + + uint64_t pfn; + if (i > 0) + { + //It is a super page. + if (pte.ppn[0] != 0) + { + //Misss aligned super page. + throw Page_Fault_Exception("Page Fault : Miss Aligned Super Page."); + + } + else + { + //Valid super page. + pfn = pte.ppn[1]; + *size_bits = 22; + } + } + else + { + //Regular page. + *size_bits = 12; + pfn = a >> 12; + } + return std::make_pair(pfn, pte_bytes & 0xff); +} + + +uint32_t MemoryUnit::get_satp() +{ + return satp; +} +void MemoryUnit::set_satp(uint32_t satp) +{ + this->satp = satp; + this->ptbr = satp & 0x003fffff; //22 bits + this->mode = satp & 0x80000000 ? VA_MODE::SV32 : VA_MODE::BARE; +} +#endif \ No newline at end of file diff --git a/sim/common/mem.h b/sim/common/mem.h index 76e2f2ae5..8477fb800 100644 --- a/sim/common/mem.h +++ b/sim/common/mem.h @@ -20,9 +20,18 @@ #include #include #include +#include "VX_config.h" +#ifdef VM_ENABLE +#include +#include +#include +#endif + namespace vortex { + +#ifdef VM_ENABLE enum VA_MODE { BARE, SV32 @@ -34,6 +43,14 @@ enum ACCESS_TYPE { FETCH }; +class Page_Fault_Exception : public std::runtime_error /* or logic_error */ +{ +public: + Page_Fault_Exception(const std::string& what = "") : std::runtime_error(what) {} + uint64_t addr; + ACCESS_TYPE type; +}; +#endif struct BadAddress {}; struct OutOfRange {}; @@ -92,34 +109,42 @@ public: PageFault(uint64_t a, bool nf) : faultAddr(a) , notFound(nf) - , access_type(ACCESS_TYPE::LOAD) + // , access_type(ACCESS_TYPE::LOAD) {} uint64_t faultAddr; bool notFound; - ACCESS_TYPE access_type; + // ACCESS_TYPE access_type; }; MemoryUnit(uint64_t pageSize = 0); void attach(MemDevice &m, uint64_t start, uint64_t end); - void read(void* data, uint64_t addr, uint64_t size, bool sup, ACCESS_TYPE type = ACCESS_TYPE::LOAD); - void write(const void* data, uint64_t addr, uint64_t size, bool sup, ACCESS_TYPE type = ACCESS_TYPE::LOAD); + +#ifdef VM_ENABLE + void read(void* data, uint64_t addr, uint64_t size, ACCESS_TYPE type = ACCESS_TYPE::LOAD); + void write(const void* data, uint64_t addr, uint64_t size, ACCESS_TYPE type = ACCESS_TYPE::STORE); +#else + void read(void* data, uint64_t addr, uint64_t size, bool sup); + void write(const void* data, uint64_t addr, uint64_t size, bool sup); +#endif void amo_reserve(uint64_t addr); bool amo_check(uint64_t addr); - void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags); +#ifdef VM_ENABLE void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits); + uint32_t get_satp(); + void set_satp(uint32_t satp); +#else + void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags); +#endif void tlbRm(uint64_t vaddr); void tlbFlush() { tlb_.clear(); } - uint32_t get_satp(); - void set_satp(uint32_t satp); - private: struct amo_reservation_t { @@ -156,11 +181,7 @@ private: struct TLBEntry { TLBEntry() {} - TLBEntry(uint32_t pfn, uint32_t flags) - : pfn(pfn) - , flags(flags) - , mru_bit(true) - {}; + #ifdef VM_ENABLE TLBEntry(uint32_t pfn, uint32_t flags, uint64_t size_bits) : pfn(pfn) , flags(flags) @@ -182,17 +203,27 @@ private: } uint32_t pfn; - bool d, a, g, u, x, w, r, v; + uint32_t flags; bool mru_bit; uint64_t size_bits; + bool d, a, g, u, x, w, r, v; + #else + TLBEntry(uint32_t pfn, uint32_t flags) + : pfn(pfn) + , flags(flags) + {} + uint32_t pfn; uint32_t flags; + #endif }; +#ifdef VM_ENABLE std::pair tlbLookup(uint64_t vAddr, ACCESS_TYPE type, uint64_t* size_bits); uint64_t vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type); std::pair page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits); +#endif TLBEntry tlbLookup(uint64_t vAddr, uint32_t flagMask); @@ -203,14 +234,17 @@ private: ADecoder decoder_; bool enableVM_; + amo_reservation_t amo_reservation_; +#ifdef VM_ENABLE + uint32_t satp; VA_MODE mode; uint32_t ptbr; std::unordered_set unique_translations; uint64_t TLB_HIT, TLB_MISS, TLB_EVICT, PTW, PERF_UNIQUE_PTW; +#endif - amo_reservation_t amo_reservation_; }; /////////////////////////////////////////////////////////////////////////////// @@ -278,6 +312,7 @@ private: bool check_acl_; }; +#ifdef VM_ENABLE class PTE_SV32_t { @@ -299,6 +334,7 @@ class PTE_SV32_t bool d, a, g, u, x, w, r, v; PTE_SV32_t(uint64_t address) : address(address) { + assert((address>> 32) == 0 && "Upper 32 bits are not zero!"); flags = bits(address,0,7); rsw = bits(address,8,9); ppn[0] = bits(address,10,19); @@ -334,10 +370,12 @@ class vAddr_SV32_t uint64_t pgoff; vAddr_SV32_t(uint64_t address) : address(address) { + assert((address>> 32) == 0 && "Upper 32 bits are not zero!"); vpn[0] = bits(address,12,21); vpn[1] = bits(address,22,31); pgoff = bits(address,0,11); } }; +#endif } // namespace vortex diff --git a/sim/simx/cluster.cpp b/sim/simx/cluster.cpp index ec5e3f2b6..0c5ff9f3f 100644 --- a/sim/simx/cluster.cpp +++ b/sim/simx/cluster.cpp @@ -106,6 +106,14 @@ void Cluster::attach_ram(RAM* ram) { } } +#ifdef VM_ENABLE +void Cluster::set_satp(uint32_t satp) { + for (auto& socket : sockets_) { + socket->set_satp(satp); + } +} +#endif + bool Cluster::running() const { for (auto& socket : sockets_) { if (socket->running()) diff --git a/sim/simx/cluster.h b/sim/simx/cluster.h index 253c54fb4..113ac04f7 100644 --- a/sim/simx/cluster.h +++ b/sim/simx/cluster.h @@ -57,6 +57,10 @@ public: void attach_ram(RAM* ram); + #ifdef VM_ENABLE + void set_satp(uint32_t satp); + #endif + bool running() const; int get_exitcode() const; diff --git a/sim/simx/core.cpp b/sim/simx/core.cpp index 75aa47670..9a134b6ca 100644 --- a/sim/simx/core.cpp +++ b/sim/simx/core.cpp @@ -428,3 +428,10 @@ bool Core::wspawn(uint32_t num_warps, Word nextPC) { void Core::attach_ram(RAM* ram) { emulator_.attach_ram(ram); } + +#ifdef VM_ENABLE +void Core::set_satp(uint32_t satp) { + emulator_.set_satp(satp); //JAEWON wit, tid??? + // emulator_.set_csr(VX_CSR_SATP,satp,0,0); //JAEWON wit, tid??? +} +#endif \ No newline at end of file diff --git a/sim/simx/core.h b/sim/simx/core.h index c0e3d5de8..c18498a52 100644 --- a/sim/simx/core.h +++ b/sim/simx/core.h @@ -26,6 +26,7 @@ #include "dispatcher.h" #include "func_unit.h" #include "mem_coalescer.h" +#include "VX_config.h" namespace vortex { @@ -98,6 +99,9 @@ public: void tick(); void attach_ram(RAM* ram); +#ifdef VM_ENABLE + void set_satp(uint32_t satp); +#endif bool running() const; diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index cd305bb0d..0214dbddd 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -269,10 +269,51 @@ bool Emulator::barrier(uint32_t bar_id, uint32_t count, uint32_t wid) { return false; } +#ifdef VM_ENABLE void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) { - mmu_.read(data, addr, size, 0); + try + { + mmu_.read(data, addr, size, ACCESS_TYPE::LOAD); + } + catch (Page_Fault_Exception& page_fault) + { + std::cout<local_mem()->read(data, addr, size); + } else { + try + { + // mmu_.read(data, addr, size, 0); + mmu_.read(data, addr, size, ACCESS_TYPE::LOAD); + } + catch (Page_Fault_Exception& page_fault) + { + std::cout<= uint64_t(IO_COUT_ADDR) + && addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { + this->writeToStdOut(data, addr, size); + } else { + if (type == AddrType::Shared) { + core_->local_mem()->write(data, addr, size); + } else { + try + { + // mmu_.write(data, addr, size, 0); + mmu_.write(data, addr, size, ACCESS_TYPE::STORE); + } + catch (Page_Fault_Exception& page_fault) + { + std::cout<= uint64_t(IO_COUT_ADDR) @@ -298,6 +365,7 @@ void Emulator::dcache_write(const void* data, uint64_t addr, uint32_t size) { } DPH(2, "Mem Write: addr=0x" << std::hex << addr << ", data=0x" << ByteStream(data, size) << " (size=" << size << ", type=" << type << ")" << std::endl); } +#endif void Emulator::dcache_amo_reserve(uint64_t addr) { auto type = get_addr_type(addr); @@ -349,6 +417,10 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { auto core_perf = core_->perf_stats(); switch (addr) { case VX_CSR_SATP: +#ifdef VM_ENABLE + // return csrs_.at(wid).at(tid)[addr]; + return mmu_.get_satp(); +#endif case VX_CSR_PMPCFG0: case VX_CSR_PMPADDR0: case VX_CSR_MSTATUS: @@ -475,6 +547,12 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) { csr_mscratch_ = value; break; case VX_CSR_SATP: + #ifdef VM_ENABLE + // warps_.at(wid).fcsr = (warps_.at(wid).fcsr & ~0x1F) | (value & 0x1F); + // csrs_.at(wid).at(tid)[addr] = value; //what is wid and tid? + mmu_.set_satp(value); + break; + #endif case VX_CSR_MSTATUS: case VX_CSR_MEDELEG: case VX_CSR_MIDELEG: @@ -493,6 +571,8 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) { } } + + uint32_t Emulator::get_fpu_rm(uint32_t func3, uint32_t tid, uint32_t wid) { return (func3 == 0x7) ? this->get_csr(VX_CSR_FRM, tid, wid) : func3; } diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index de466d352..9ee42812a 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -39,6 +39,9 @@ public: void clear(); void attach_ram(RAM* ram); +#ifdef VM_ENABLE + void set_satp(uint32_t satp) ; +#endif instr_trace_t* step(); @@ -122,6 +125,9 @@ private: uint32_t ipdom_size_; Word csr_mscratch_; wspawn_t wspawn_; +#ifdef VM_ENABLE + Word ptbr_; +#endif }; } diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index 3807fa5e8..9644c2efb 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -95,6 +95,13 @@ void ProcessorImpl::attach_ram(RAM* ram) { cluster->attach_ram(ram); } } +#ifdef VM_ENABLE +void ProcessorImpl::set_satp(uint32_t satp) { + for (auto cluster : clusters_) { + cluster->set_satp(satp); + } +} +#endif void ProcessorImpl::run() { SimPlatform::instance().reset(); @@ -154,4 +161,17 @@ void Processor::run() { void Processor::dcr_write(uint32_t addr, uint32_t value) { return impl_->dcr_write(addr, value); -} \ No newline at end of file +} + +#ifdef VM_ENABLE +uint32_t Processor::get_satp() { + std::cout << "get SATP: 0x" << std::hex << this->satp << std::endl; + return this->satp; +} + +void Processor::set_satp(uint32_t satp) { + std::cout << "set SATP: 0x" << std::hex << this->satp << std::endl; + impl_->set_satp(satp); + this->satp = satp; +} +#endif diff --git a/sim/simx/processor.h b/sim/simx/processor.h index 003af6b0a..17340cf2c 100644 --- a/sim/simx/processor.h +++ b/sim/simx/processor.h @@ -14,6 +14,8 @@ #pragma once #include +#include +#include namespace vortex { @@ -31,9 +33,17 @@ public: void run(); void dcr_write(uint32_t addr, uint32_t value); +#ifdef VM_ENABLE + void set_processor_satp(VA_MODE mode); + uint32_t get_satp(); + void set_satp(uint32_t satp); +#endif private: ProcessorImpl* impl_; +#ifdef VM_ENABLE + uint32_t satp; +#endif }; } diff --git a/sim/simx/processor_impl.h b/sim/simx/processor_impl.h index dcfba84d7..e6e9a4cf1 100644 --- a/sim/simx/processor_impl.h +++ b/sim/simx/processor_impl.h @@ -39,6 +39,11 @@ public: void dcr_write(uint32_t addr, uint32_t value); +#ifdef VM_ENABLE + // 32bit satp + void set_satp(uint32_t satp); +#endif + PerfStats perf_stats() const; private: diff --git a/sim/simx/socket.cpp b/sim/simx/socket.cpp index d7e421b4b..9374bbc59 100644 --- a/sim/simx/socket.cpp +++ b/sim/simx/socket.cpp @@ -107,6 +107,14 @@ void Socket::attach_ram(RAM* ram) { } } +#ifdef VM_ENABLE +void Socket::set_satp(uint32_t satp) { + for (auto core : cores_) { + core->set_satp(satp); + } +} +#endif + bool Socket::running() const { for (auto& core : cores_) { if (core->running()) diff --git a/sim/simx/socket.h b/sim/simx/socket.h index ed38dce67..a09f73e8b 100644 --- a/sim/simx/socket.h +++ b/sim/simx/socket.h @@ -60,6 +60,10 @@ public: void attach_ram(RAM* ram); +#ifdef VM_ENABLE + void set_satp(uint32_t satp); +#endif + bool running() const; int get_exitcode() const; From 53c547f9de0d97e5c543f71bab728a89b785413f Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Thu, 13 Jun 2024 11:30:54 -0400 Subject: [PATCH 045/488] Change the declaration of set_processor_satp function --- runtime/simx/vortex.cpp | 21 +++++++++++++-------- sim/simx/processor.h | 1 - 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 6e5cafc38..2d1168179 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -75,8 +75,8 @@ public: // attach memory module processor_.attach_ram(&ram_); #ifdef VM_ENABLE - //Set - processor_.set_processor_satp(VM_ADDR_MODE); + //Set + set_processor_satp(VM_ADDR_MODE); #endif } @@ -133,13 +133,13 @@ public: bool is_pc = false; std::cout << "startup addr: " << std::hex << STARTUP_ADDR << std::endl; std::cout << "bit mode: " << std::dec << XLEN << std::endl; + if (get_mode() == VA_MODE::BARE) + return 0; + if (*dev_maddr == STARTUP_ADDR || *dev_maddr == 0x7FFFF000) { is_pc = true; } - if (get_mode() == VA_MODE::BARE) - return 0; - uint64_t ppn = *dev_maddr >> 12; uint64_t init_pAddr = *dev_maddr; uint64_t init_vAddr = *dev_maddr + 0xf0000000; // vpn will change, but we want to return the vpn of the beginning of the virtual allocation @@ -188,9 +188,10 @@ public: return err; }); #ifdef VM_ENABLE - // VM address translation std::cout << "physical addr: " << std::hex << *dev_addr << std::endl; + // VM address translation map_local_mem(size, dev_addr); + std::cout << "virtual addr: " << std::hex << *dev_addr << std::endl; #endif *dev_addr = addr; return 0; @@ -342,7 +343,7 @@ public: #ifdef VM_ENABLE /* VM Management */ - void set_processor_satp(VA_MODE mode) + void set_processor_satp(VA_MODE mode) { uint32_t satp; if (mode == VA_MODE::BARE) @@ -546,9 +547,11 @@ public: uint64_t asize = aligned_size(RAM_PAGE_SIZE, CACHE_BLOCK_SIZE); uint8_t *src = new uint8_t[RAM_PAGE_SIZE]; for (uint64_t i = 0; i < RAM_PAGE_SIZE; ++i) { - src[i] = (0x00000000 >> ((i & 0x3) * 8)) & 0xff; + src[i] = (0x00000000 >> ((i & 0x3) << 3)) & 0xff; } + ram_.enable_acl(false); ram_.write((const uint8_t*)src, addr, asize); + ram_.enable_acl(true); } void read_page_table(uint64_t addr) { @@ -567,7 +570,9 @@ public: src[i] = (value >> ((i & 0x3) * 8)) & 0xff; } //std::cout << "writing PTE to RAM addr 0x" << std::hex << addr << std::endl; + ram_.enable_acl(false); ram_.write((const uint8_t*)src, addr, PTE_SIZE); + ram_.enable_acl(true); } uint64_t read_pte(uint64_t addr) { diff --git a/sim/simx/processor.h b/sim/simx/processor.h index 17340cf2c..e22f11569 100644 --- a/sim/simx/processor.h +++ b/sim/simx/processor.h @@ -34,7 +34,6 @@ public: void dcr_write(uint32_t addr, uint32_t value); #ifdef VM_ENABLE - void set_processor_satp(VA_MODE mode); uint32_t get_satp(); void set_satp(uint32_t satp); #endif From 7b80da25382e8e4e23f0d6335b714ea752f7c918 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Fri, 14 Jun 2024 17:03:43 -0400 Subject: [PATCH 046/488] Update upload and download function in simx runtime --- hw/rtl/VX_config.vh | 52 ++++--- runtime/simx/vortex.cpp | 299 ++++++++++++++++++++++------------------ sim/simx/processor.cpp | 2 +- 3 files changed, 194 insertions(+), 159 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index a73de1d10..55ad0f113 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -14,27 +14,6 @@ `ifndef VX_CONFIG_VH `define VX_CONFIG_VH -`ifndef VM_DISABLE -`define VM_ENABLE -`endif -`ifdef VM_ENABLE - `ifndef VM_ADDR_MODE - `define VM_ADDR_MODE SV32 - `endif - - `ifndef PTE_SIZE - `define PTE_SIZE 8 - `endif - - `ifndef TLB_SIZE - `define TLB_SIZE 32 - `endif - - `ifndef SUPER_PAGING - `define SUPER_PAGING 0 - `endif - -`endif `ifndef MIN @@ -274,6 +253,37 @@ `define DEBUG_LEVEL 3 `endif +// Virtual Memory Configuration /////////////////////////////////////////////////////// +`ifndef VM_DISABLE +`define VM_ENABLE +`endif +`ifdef VM_ENABLE + `ifndef VM_ADDR_MODE + `define VM_ADDR_MODE SV32 + `endif + + `ifndef PTE_SIZE + `ifdef XLEN_32 + `define PTE_SIZE 4 + `else + `ifdef XLEN_64 + `define PTE_SIZE 8 + `else + `define PTE_SIZE 8 + `endif + `endif + `endif + + `ifndef TLB_SIZE + `define TLB_SIZE 32 + `endif + + `ifndef SUPER_PAGING + `define SUPER_PAGING 0 + `endif + +`endif + // Pipeline Configuration ///////////////////////////////////////////////////// // Issue width diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 2d1168179..64ba1653d 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -27,6 +27,26 @@ #include #include +#ifdef VM_ENABLE +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#endif + using namespace vortex; #ifdef VM_ENABLE @@ -128,32 +148,37 @@ public: #ifdef VM_ENABLE // VM SUPPORT - uint64_t map_local_mem(uint64_t size, uint64_t* dev_maddr) + uint64_t map_local_mem(uint64_t size, uint64_t* dev_pAddr) { - bool is_pc = false; - std::cout << "startup addr: " << std::hex << STARTUP_ADDR << std::endl; + bool no_trans = false; + std::cout << __PRETTY_FUNCTION__ << std::endl; + // std::cout << "startup addr: 0x" << std::hex << STARTUP_ADDR << std::endl; + std::cout << "Input device physical addr: 0x" << std::hex << *dev_pAddr<< std::endl; std::cout << "bit mode: " << std::dec << XLEN << std::endl; - if (get_mode() == VA_MODE::BARE) - return 0; - if (*dev_maddr == STARTUP_ADDR || *dev_maddr == 0x7FFFF000) { - is_pc = true; + // if (*dev_pAddr == STARTUP_ADDR || *dev_pAddr == 0x7FFFF000) { + if (*dev_pAddr >= 0xF0000000 ) + no_trans = true; } - uint64_t ppn = *dev_maddr >> 12; - uint64_t init_pAddr = *dev_maddr; - uint64_t init_vAddr = *dev_maddr + 0xf0000000; // vpn will change, but we want to return the vpn of the beginning of the virtual allocation - init_vAddr = (init_vAddr >> 12) << 12; - uint64_t vpn; + if (get_mode() == VA_MODE::BARE || no_trans == true) + { + std::cout << "No Translation is needed." << std::endl; + return 0; + } - //dev_maddr can be of size greater than a page, but we have to map and update + uint64_t init_pAddr = *dev_pAddr; + uint64_t init_vAddr = *dev_pAddr + 0xf000000; // vpn will change, but we want to return the vpn of the beginning of the virtual allocation + uint64_t ppn = 0, vpn = 0 ; + + + //dev_pAddr can be of size greater than a page, but we have to map and update //page tables on a page table granularity. So divide the allocation into pages. - for (ppn = (*dev_maddr) >> 12; ppn < ((*dev_maddr) >> 12) + (size/RAM_PAGE_SIZE) + 1; ppn++) + for (ppn = (*dev_pAddr) >> 12; ppn < ((*dev_pAddr) >> 12) + (size/RAM_PAGE_SIZE) + 1; ppn++) { //Currently a 1-1 mapping is used, this can be changed here to support different //mapping schemes - vpn = is_pc ? ppn : ppn + 0xf0000; - //vpn = ppn; + vpn = ppn + (0xf000000 >> 12); //If ppn to vpn mapping doesnt exist. if (addr_mapping.find(vpn) == addr_mapping.end()) @@ -164,21 +189,23 @@ public: } } - std::cout << "mapped virtual addr: " << init_vAddr << " to physical addr: " << init_pAddr << std::endl; - uint64_t size_bits; - if (is_pc) { - std::cout << "not returning virtual address because it is PC or stack" << std::endl; - std::pair ptw_access = page_table_walk(init_vAddr - 0xf0000000, &size_bits); - return 0; - } else { - std::pair ptw_access = page_table_walk(init_vAddr, &size_bits); + std::cout << "Mapped virtual addr: " << init_vAddr << " to physical addr: " << init_pAddr << std::endl; + // sanity check + uint64_t pAddr = page_table_walk(init_vAddr); + if (pAddr != init_pAddr) + { + std::cout << "ERROR" << pAddr << "and" << init_pAddr << " is not the same" < GLOBAL_MEM_SIZE) - return -1; - + int upload(uint64_t dest_addr, const void* src, uint64_t size) { + std::cout << __PRETTY_FUNCTION__ << std::endl; + uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); + if (dest_addr + asize > GLOBAL_MEM_SIZE) + return -1; #ifdef VM_ENABLE - uint64_t pAddr = dest_addr; // map_local_mem overwrites the provided dest_addr, so store away physical destination address - if (dest_addr >= STARTUP_ADDR) { - map_local_mem(asize,&dest_addr); - } else if (dest_addr >= 0x7fff0000) - { - map_local_mem(asize,&dest_addr); - } - std::cout << "uploading to 0x" << pAddr << "(VA)" << std::endl; - dest_addr = pAddr; + uint64_t pAddr = page_table_walk(dest_addr); + std::cout << "== Upload data to vAddr = 0x" << std::hex < GLOBAL_MEM_SIZE) return -1; +#ifdef VM_ENABLE + uint64_t pAddr = page_table_walk(src_addr); + std::cout << "== Download data to vAddr = 0x" << std::hex <> 10) | 0x80000000; - // satp = 0xFEBFE000 ; + satp = (alloc_first_level_page_table() >> 12) | 0x80000000; } processor_.set_satp(satp); } @@ -365,22 +391,23 @@ public: VA_MODE get_mode() { return processor_.get_satp() & 0x80000000 ? VA_MODE::SV32 : VA_MODE::BARE; - // return VA_MODE::SV32; } - void update_page_table(uint64_t pAddr, uint64_t vAddr) { - std::cout << "mapping vpn: " << vAddr << " to ppn:" << pAddr << std::endl; + void update_page_table(uint64_t ppn, uint64_t vpn) { + std::cout << __PRETTY_FUNCTION__ << std::endl; + std::cout << "mapping vpn: " << std::hex << vpn << " to ppn:" << ppn << std::endl; //Updating page table with the following mapping of (vAddr) to (pAddr). + // uint32_t page_bit_shift = log2ceil(PTE_SIZE*NUM_PTE_ENTRY); + uint32_t page_bit_shift = 12; uint64_t ppn_1, pte_addr, pte_bytes; - uint64_t vpn_1 = bits(vAddr, 10, 19); - uint64_t vpn_0 = bits(vAddr, 0, 9); + uint64_t vpn_1 = bits(vpn, 10, 19); + uint64_t vpn_0 = bits(vpn, 0, 9); //Read first level PTE. pte_addr = (get_ptbr() << 12) + (vpn_1 * PTE_SIZE); pte_bytes = read_pte(pte_addr); std::cout << "[PTE] addr 0x" << std::hex << pte_addr << ", PTE 0x" << std::hex << pte_bytes << std::endl; - if ( bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) { //If valid bit set, proceed to next level using new ppn form PTE. @@ -392,13 +419,14 @@ public: //If valid bit not set, allocate a second level page table // in device memory and store ppn in PTE. Set rwx = 000 in PTE //to indicate this is a pointer to the next level of the page table. - ppn_1 = (alloc_page_table() >> 12); - pte_bytes = ( (ppn_1 << 10) | 0b0000000001) ; + std::cout << "PTE invalid, get second page table..." << std::endl; + ppn_1 = (alloc_second_level_page_table() >> 12); + pte_bytes = ((ppn_1 << 10) | 0b0000000001) ; write_pte(pte_addr, pte_bytes); } //Read second level PTE. - pte_addr = (ppn_1 << 12) + (vpn_0 * PTE_SIZE); + pte_addr = (ppn_1 << page_bit_shift) + (vpn_0 * PTE_SIZE); pte_bytes = read_pte(pte_addr); std::cout << "got pte: " << std::hex << pte_bytes << std::endl; @@ -412,10 +440,11 @@ public: { //If valid bit not set, write ppn of pAddr in PTE. Set rwx = 111 in PTE //to indicate this is a leaf PTE and has the stated permissions. - pte_bytes = ( (pAddr << 10) | 0b0000001111) ; + pte_bytes = ( (ppn << 10) | 0b0000001111) ; write_pte(pte_addr, pte_bytes); //If super paging is enabled. + /* if (SUPER_PAGING) { //Check if this second level Page Table can be promoted to a super page. Brute force @@ -444,130 +473,118 @@ public: write_pte(pte_addr, pte_bytes); } } + */ } } - std::pair page_table_walk(uint64_t vAddr_bits, uint64_t* size_bits) + uint64_t page_table_walk(uint64_t vAddr_bits) { + + std::cout << "PTW on vAddr: 0x" << std::hex << vAddr_bits << std::endl; uint64_t LEVELS = 2; vAddr_SV32_t vAddr(vAddr_bits); - uint64_t pte_bytes; + uint64_t pte_addr, pte_bytes; + uint64_t pt_ba = get_ptbr() << 12; - std::cout << "PTW on vAddr: 0x" << std::hex << vAddr_bits << std::endl; //Get base page table. - uint64_t a = this->processor_.get_satp() << 12; - std::cout << "PTW SATP: 0x" << a << std::endl; - int i = LEVELS - 1; - while(true) + for ( i = LEVELS-1 ; i >= 0 ; i--) { - - //Read PTE. - std::cout << "reading PTE from RAM addr 0x" << std::hex << (a+vAddr.vpn[i]*PTE_SIZE) << std::endl; - ram_.read(&pte_bytes, a+vAddr.vpn[i]*PTE_SIZE, sizeof(uint64_t)); - //pte_bytes &= 0x00000000FFFFFFFF; - PTE_SV32_t pte(pte_bytes); - std::cout << "got pte: " << std::hex << pte_bytes << std::endl; - - //Check if it has invalid flag bits. - if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) - { - std::cout << "Error on vAddr 0x" << std::hex << vAddr_bits << std::endl; - throw Page_Fault_Exception("Page Fault : Attempted to access invalid entry. Entry: 0x"); - } - - if ( (pte.r == 0) & (pte.w == 0) & (pte.x == 0)) - { - //Not a leaf node as rwx == 000 - i--; - if (i < 0) + //Read PTE. + pte_addr = pt_ba+vAddr.vpn[i]*PTE_SIZE; + std::cout << "reading PTE from RAM addr 0x" << std::hex << (pte_addr) << std::endl; + pte_bytes = read_pte(pte_addr); + pte_bytes &= 0x00000000FFFFFFFF; // Only for 32 bit + PTE_SV32_t pte(pte_bytes); + std::cout << "got pte: " << std::hex << pte_bytes << std::endl; + + //Check if it has invalid flag bits. + if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) { - throw Page_Fault_Exception("Page Fault : No leaf node found."); + std::cout << "Error on vAddr 0x" << std::hex << vAddr_bits << std::endl; + throw Page_Fault_Exception("Page Fault : Attempted to access invalid entry. Entry: 0x"); + } + + if ( (pte.r == 0) & (pte.w == 0) & (pte.x == 0)) + { + //Not a leaf node as rwx == 000 + if (i == 0) + throw Page_Fault_Exception("Page Fault : No leaf node found."); + else + { + //Continue on to next level. + pt_ba = (pte_bytes >> 10 ) << 12; + std::cout << "next pt_ba: " << pt_ba << std::endl; + } } else { - //Continue on to next level. - a = (pte_bytes >> 10 ) << 12; - std::cout << "next a: " << a << std::endl; + //Leaf node found, finished walking. + pt_ba = (pte_bytes >> 10 ) << 12; + std::cout << "Found PPN 0 = 0x" << pt_ba << std::endl; + break; } - } - else - { - //Leaf node found, finished walking. - a = (pte_bytes >> 10 ) << 12; - break; - } + } + // pte_bytes is final leaf PTE_SV32_t pte(pte_bytes); - //Check RWX permissions according to access type. if (pte.r == 0) { - throw Page_Fault_Exception("Page Fault : TYPE LOAD, Incorrect permissions."); + throw Page_Fault_Exception("Page Fault : TYPE LOAD, Incorrect permissions."); } - - uint64_t pfn; - if (i > 0) - { - //It is a super page. - if (pte.ppn[0] != 0) - { - //Misss aligned super page. - throw Page_Fault_Exception("Page Fault : Miss Aligned Super Page."); - - } - else - { - //Valid super page. - pfn = pte.ppn[1]; - *size_bits = 22; - } - } - else - { //Regular page. - *size_bits = 12; - pfn = a >> 12; - } - return std::make_pair(pfn, pte_bytes & 0xff); + + uint64_t paddr = pt_ba << 12 + vAddr.pgoff; + return paddr } - uint64_t alloc_page_table() { - uint64_t addr; - global_mem_.allocate(RAM_PAGE_SIZE, &addr); + uint64_t alloc_first_level_page_table() { + uint64_t addr=0xF0000000; + uint64_t size=1<<23; + CHECK_ERR(this->mem_reserve(addr, size, VX_MEM_READ_WRITE), { + return err; + }); + // global_mem_.allocate(RAM_PAGE_SIZE, &addr); + std::cout << "address of page table 0x" << std::hex << addr << std::endl; + init_page_table(addr,size); + return addr; + } + uint64_t alloc_second_level_page_table(uint64_t vpn_1) { + uint64_t addr = 0xF0000000 + PTE_SIZE * NUM_PTE_ENTRY*(1+vpn_1) std::cout << "address of page table 0x" << std::hex << addr << std::endl; - init_page_table(addr); return addr; } - - void init_page_table(uint64_t addr) { - uint64_t asize = aligned_size(RAM_PAGE_SIZE, CACHE_BLOCK_SIZE); + void init_page_table(uint64_t addr, uint64_t size) { + // uint64_t asize = aligned_size(RAM_PAGE_SIZE, CACHE_BLOCK_SIZE); + uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); uint8_t *src = new uint8_t[RAM_PAGE_SIZE]; for (uint64_t i = 0; i < RAM_PAGE_SIZE; ++i) { - src[i] = (0x00000000 >> ((i & 0x3) << 3)) & 0xff; + // src[i] = (value >> (i << 3)) & 0xff; + src[i] = 0; } ram_.enable_acl(false); ram_.write((const uint8_t*)src, addr, asize); ram_.enable_acl(true); } - void read_page_table(uint64_t addr) { - uint8_t *dest = new uint8_t[RAM_PAGE_SIZE]; - download(dest, addr, RAM_PAGE_SIZE); - printf("VXDRV: download %d bytes from 0x%x\n", RAM_PAGE_SIZE, addr); - for (int i = 0; i < RAM_PAGE_SIZE; i += 4) { - printf("mem-read: 0x%x -> 0x%x\n", addr + i, *(uint64_t*)((uint8_t*)dest + i)); - } - } + // void read_page_table(uint64_t addr) { + // uint8_t *dest = new uint8_t[RAM_PAGE_SIZE]; + // download(dest, addr, RAM_PAGE_SIZE); + // printf("VXDRV: download %d bytes from 0x%x\n", RAM_PAGE_SIZE, addr); + // for (int i = 0; i < RAM_PAGE_SIZE; i += 4) { + // printf("mem-read: 0x%x -> 0x%x\n", addr + i, *(uint64_t*)((uint8_t*)dest + i)); + // } + // } void write_pte(uint64_t addr, uint64_t value = 0xbaadf00d) { - std::cout << "writing pte " << std::hex << value << " to pAddr: " << std::hex << addr << std::endl; + std::cout << "writing pte 0x" << std::hex << value << " to pAddr: 0x" << std::hex << addr << std::endl; uint8_t *src = new uint8_t[PTE_SIZE]; for (uint64_t i = 0; i < PTE_SIZE; ++i) { - src[i] = (value >> ((i & 0x3) * 8)) & 0xff; + src[i] = (value >> (i << 3)) & 0xff; } //std::cout << "writing PTE to RAM addr 0x" << std::hex << addr << std::endl; ram_.enable_acl(false); @@ -577,9 +594,17 @@ public: uint64_t read_pte(uint64_t addr) { uint8_t *dest = new uint8_t[PTE_SIZE]; + uint64_t mask = 0; + if (XLEN == 32) + mask = 0xFFFFFFFF; + else if (XLEN == 64) + mask = 0xFFFFFFFFFFFFFFFF; + else + assert(0, "XLEN is not either 32 or 64") + std::cout << "[read_pte] reading PTE from RAM addr 0x" << std::hex << addr << std::endl; ram_.read((uint8_t*)dest, addr, PTE_SIZE); - return *(uint64_t*)((uint8_t*)dest); + return (*(uint64_t*)((uint8_t*)dest)) & mask; } #endif // JAEWON diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index 9644c2efb..ecc1474e1 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -165,7 +165,7 @@ void Processor::dcr_write(uint32_t addr, uint32_t value) { #ifdef VM_ENABLE uint32_t Processor::get_satp() { - std::cout << "get SATP: 0x" << std::hex << this->satp << std::endl; + // std::cout << "get SATP: 0x" << std::hex << this->satp << std::endl; return this->satp; } From da9c51aa3f3f5655b4c08d79e5b6a2e87d5d72fb Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sun, 16 Jun 2024 19:05:38 -0400 Subject: [PATCH 047/488] Virtual Memory Support --- hw/rtl/VX_config.vh | 22 ++- runtime/simx/vortex.cpp | 299 ++++++++++++++++++++++------------------ sim/common/mem.cpp | 116 ++++++++++------ sim/common/mem.h | 25 ++-- sim/simx/emulator.cpp | 4 + sim/simx/processor.cpp | 1 - 6 files changed, 275 insertions(+), 192 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 55ad0f113..6d3738489 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -171,7 +171,15 @@ `define IO_BASE_ADDR 64'h000000040 `endif -`else +`ifndef PAGE_TABLE_BASE_ADDR +`define PAGE_TABLE_BASE_ADDR 64'h1F0000000 +`endif + +`ifndef PAGE_TABLE_SIZE +`define PAGE_TABLE_SIZE 4096 +`endif + +`else # XLEN_32 `ifndef STACK_BASE_ADDR `define STACK_BASE_ADDR 32'hFFFF0000 @@ -189,6 +197,14 @@ `define IO_BASE_ADDR 32'h00000040 `endif +`ifndef PAGE_TABLE_BASE_ADDR +`define PAGE_TABLE_BASE_ADDR 32'hF0000000 +`endif + +`ifndef PAGE_TABLE_SIZE +`define PAGE_TABLE_SIZE 4096 +`endif + `endif `define IO_END_ADDR `USER_BASE_ADDR @@ -265,13 +281,17 @@ `ifndef PTE_SIZE `ifdef XLEN_32 `define PTE_SIZE 4 + `define NUM_PTE_ENTRY 1024 `else `ifdef XLEN_64 `define PTE_SIZE 8 + `define NUM_PTE_ENTRY 1024 `else `define PTE_SIZE 8 + `define NUM_PTE_ENTRY 1024 `endif `endif + `define PT_SIZE (PTE_SIZE * NUM_PTE_ENTRY) `endif `ifndef TLB_SIZE diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 64ba1653d..816ca3081 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -83,17 +83,18 @@ bool bit(uint64_t addr, uint8_t idx) class vx_device { public: - vx_device() - : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES) - , ram_(0, RAM_PAGE_SIZE) - , processor_(arch_) - , global_mem_(ALLOC_BASE_ADDR, - GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR, - RAM_PAGE_SIZE, - CACHE_BLOCK_SIZE) - { - // attach memory module - processor_.attach_ram(&ram_); + vx_device() + : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES) +#ifdef VM_ENABLE + , ram_(0, RAM_PAGE_SIZE<<11) +#else + , ram_(0, RAM_PAGE_SIZE) +#endif + , processor_(arch_) + , global_mem_(ALLOC_BASE_ADDR, GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR, RAM_PAGE_SIZE, CACHE_BLOCK_SIZE) + { + // attach memory module + processor_.attach_ram(&ram_); #ifdef VM_ENABLE //Set set_processor_satp(VM_ADDR_MODE); @@ -101,6 +102,9 @@ public: } ~vx_device() { +#ifdef VM_ENABLE + this->mem_free(PAGE_TABLE_BASE_ADDR); // Right position? +#endif if (future_.valid()) { future_.wait(); } @@ -147,66 +151,90 @@ public: } #ifdef VM_ENABLE - // VM SUPPORT - uint64_t map_local_mem(uint64_t size, uint64_t* dev_pAddr) + // virtual to phycial mapping + uint64_t map_p2v(uint64_t pAddr) { - bool no_trans = false; - std::cout << __PRETTY_FUNCTION__ << std::endl; - // std::cout << "startup addr: 0x" << std::hex << STARTUP_ADDR << std::endl; - std::cout << "Input device physical addr: 0x" << std::hex << *dev_pAddr<< std::endl; - std::cout << "bit mode: " << std::dec << XLEN << std::endl; + return pAddr + 0xf000000; + } + bool need_trans(uint64_t dev_pAddr) + { + // Check if the this is the BARE mode + bool isBAREMode = (get_mode() == VA_MODE::BARE); + // Check if the address is reserved + bool isReserved = (dev_pAddr >= PAGE_TABLE_BASE_ADDR); + // Check if the address falls within the startup address range + bool isStartAddress = (STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)); + + // Print the boolean results for debugging purposes + // printf("%p, %u, %u\n", (void *)dev_pAddr, isReserved, isStartAddress); + + // Return true if the address needs translation (i.e., it's not reserved and not a start address) + return (!isBAREMode && !isReserved && !isStartAddress); + } + + uint64_t phy_to_virt_map(uint64_t size, uint64_t* dev_pAddr, uint32_t flags) + { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT("(size = 0x%lx, dev_pAddr= 0x%lx, flags = 0x%x)\n", size, *dev_pAddr, flags); + DBGPRINT("bit mode: %d\n", XLEN); // if (*dev_pAddr == STARTUP_ADDR || *dev_pAddr == 0x7FFFF000) { - if (*dev_pAddr >= 0xF0000000 ) - no_trans = true; - } - if (get_mode() == VA_MODE::BARE || no_trans == true) + if (!need_trans(*dev_pAddr)) { - std::cout << "No Translation is needed." << std::endl; + DBGPRINT("Translation is not needed.\n"); return 0; } uint64_t init_pAddr = *dev_pAddr; - uint64_t init_vAddr = *dev_pAddr + 0xf000000; // vpn will change, but we want to return the vpn of the beginning of the virtual allocation + uint64_t init_vAddr = map_p2v(init_pAddr); uint64_t ppn = 0, vpn = 0 ; - //dev_pAddr can be of size greater than a page, but we have to map and update //page tables on a page table granularity. So divide the allocation into pages. + bool is_start = false; for (ppn = (*dev_pAddr) >> 12; ppn < ((*dev_pAddr) >> 12) + (size/RAM_PAGE_SIZE) + 1; ppn++) { + vpn = map_p2v(ppn << 12) >> 12; + if (is_start == false) { + DBGPRINT("**Search vpn in page table:0x%lx\n", vpn); + is_start = true; + } + else { + DBGPRINT("Next vpn: 0x%lx\n",vpn); + } + //Currently a 1-1 mapping is used, this can be changed here to support different //mapping schemes - vpn = ppn + (0xf000000 >> 12); //If ppn to vpn mapping doesnt exist. if (addr_mapping.find(vpn) == addr_mapping.end()) { //Create mapping. - update_page_table(ppn, vpn); + update_page_table(ppn, vpn, flags); addr_mapping[vpn] = ppn; } } + DBGPRINT("Mapped virtual addr: 0x%lx to physical addr: %lx\n", init_vAddr, init_pAddr); - std::cout << "Mapped virtual addr: " << init_vAddr << " to physical addr: " << init_pAddr << std::endl; - // sanity check + // Sanity check uint64_t pAddr = page_table_walk(init_vAddr); if (pAddr != init_pAddr) { - std::cout << "ERROR" << pAddr << "and" << init_pAddr << " is not the same" <mem_access(dev_addr, size, flags), { - global_mem_.release(dev_addr); - return err; - }); - return 0; - } + int mem_reserve(uint64_t dev_addr, uint64_t size, int flags) { + CHECK_ERR(global_mem_.reserve(dev_addr, size), { + return err; + }); + DBGPRINT("mem_reserve: addr: 0x%lx, size: 0x%lx\n",dev_addr, size); + CHECK_ERR(this->mem_access(dev_addr, size, flags), { + global_mem_.release(dev_addr); + return err; + }); +#ifdef VM_ENABLE + uint64_t paddr = dev_addr; + phy_to_virt_map(size, &paddr, flags); +#endif + return 0; + } - int mem_free(uint64_t dev_addr) { - return global_mem_.release(dev_addr); - } + int mem_free(uint64_t dev_addr) { +#ifdef VM_ENABLE + uint64_t pAddr = page_table_walk(dev_addr); + // VM address translation + return global_mem_.release(pAddr); +#else + return global_mem_.release(dev_addr); +#endif + } int mem_access(uint64_t dev_addr, uint64_t size, int flags) { uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); @@ -255,17 +294,13 @@ public: } int upload(uint64_t dest_addr, const void* src, uint64_t size) { - std::cout << __PRETTY_FUNCTION__ << std::endl; + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); if (dest_addr + asize > GLOBAL_MEM_SIZE) return -1; #ifdef VM_ENABLE uint64_t pAddr = page_table_walk(dest_addr); - std::cout << "== Upload data to vAddr = 0x" << std::hex <> 12) | 0x80000000; + satp = (alloc_2nd_level_page_table() >> 12) | 0x80000000; + DBGPRINT("VA_MODE = SV32 MODE(satp = 0x%x)\n",satp); } processor_.set_satp(satp); } @@ -387,52 +427,62 @@ public: // return processor_.get_satp(); return processor_.get_satp() & 0x003fffff; } + uint64_t get_pte_address(uint64_t base_page, uint64_t vpn) + { + return (base_page << 12) + (vpn * PTE_SIZE); + } VA_MODE get_mode() { return processor_.get_satp() & 0x80000000 ? VA_MODE::SV32 : VA_MODE::BARE; } - void update_page_table(uint64_t ppn, uint64_t vpn) { - std::cout << __PRETTY_FUNCTION__ << std::endl; - std::cout << "mapping vpn: " << std::hex << vpn << " to ppn:" << ppn << std::endl; + void update_page_table(uint64_t ppn, uint64_t vpn, uint32_t flag) { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT("Mapping vpn 0x%05lx to ppn 0x%05lx(flags = %u)\n", vpn, ppn,flag); + assert((((ppn>> 20) == 0) && ((vpn >> 20) == 0)) && "Upper 12 bits are not zero!"); //Updating page table with the following mapping of (vAddr) to (pAddr). // uint32_t page_bit_shift = log2ceil(PTE_SIZE*NUM_PTE_ENTRY); - uint32_t page_bit_shift = 12; - uint64_t ppn_1, pte_addr, pte_bytes; + uint64_t ppn_1 = 0, pte_addr = 0, pte_bytes = 0; uint64_t vpn_1 = bits(vpn, 10, 19); uint64_t vpn_0 = bits(vpn, 0, 9); //Read first level PTE. - pte_addr = (get_ptbr() << 12) + (vpn_1 * PTE_SIZE); + DBGPRINT("Start second-level page table\n"); + pte_addr = get_pte_address(get_ptbr(), vpn_1); pte_bytes = read_pte(pte_addr); - std::cout << "[PTE] addr 0x" << std::hex << pte_addr << ", PTE 0x" << std::hex << pte_bytes << std::endl; + DBGPRINT("[PTE] addr 0x%lx, PTE 0x%lx\n", pte_addr, pte_bytes); + ppn_1 = (pte_bytes >> 10); if ( bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) { //If valid bit set, proceed to next level using new ppn form PTE. - std::cout << "PTE valid, continuing the walk..." << std::endl; - ppn_1 = (pte_bytes >> 10); + DBGPRINT("PTE valid (ppn 0x%lx), continuing the walk...\n",ppn_1); } else { //If valid bit not set, allocate a second level page table // in device memory and store ppn in PTE. Set rwx = 000 in PTE //to indicate this is a pointer to the next level of the page table. - std::cout << "PTE invalid, get second page table..." << std::endl; - ppn_1 = (alloc_second_level_page_table() >> 12); + DBGPRINT("PTE Invalid (ppn 0x%lx), continuing the walk...\n",ppn_1); + ppn_1 = (alloc_1st_level_page_table(vpn_1) >> 12); pte_bytes = ((ppn_1 << 10) | 0b0000000001) ; + assert((pte_addr>> 32) == 0 && "Upper 32 bits are not zero!"); write_pte(pte_addr, pte_bytes); + // if (pte_bytes != read_pte(pte_addr)) + // DBGPRINT("Read/write values are different!\n"); } + + DBGPRINT("Move to first-level page table\n"); //Read second level PTE. - pte_addr = (ppn_1 << page_bit_shift) + (vpn_0 * PTE_SIZE); + pte_addr = get_pte_address(ppn_1, vpn_0); pte_bytes = read_pte(pte_addr); - std::cout << "got pte: " << std::hex << pte_bytes << std::endl; if ( bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) { - std::cout << "ERROR, shouldn't be here" << std::endl; + DBGPRINT("ERROR, shouldn't be here\n"); + exit(1); //If valid bit is set, then the page is already allocated. //Should not reach this point, a sanity check. } @@ -442,87 +492,62 @@ public: //to indicate this is a leaf PTE and has the stated permissions. pte_bytes = ( (ppn << 10) | 0b0000001111) ; write_pte(pte_addr, pte_bytes); - - //If super paging is enabled. - /* - if (SUPER_PAGING) - { - //Check if this second level Page Table can be promoted to a super page. Brute force - //method is used to iterate over all PTE entries of the table and check if they have - //their valid bit set. - bool superpage = true; - for(int i = 0; i < 1024; i++) - { - pte_addr = (ppn_1 << 12) + (i * PTE_SIZE); - pte_bytes = read_pte(pte_addr); - - if (!bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) - { - superpage = false; - break; - } - } - if (superpage) - { - //This can be promoted to a super page. Set root PTE to the first PTE of the - //second level. This is because the first PTE of the second level already has the - //correct PPN1, PPN0 set to zero and correct access bits. - pte_addr = (ppn_1 << 12); - pte_bytes = read_pte(pte_addr); - pte_addr = (get_ptbr() << 12) + (vpn_1 * PTE_SIZE); - write_pte(pte_addr, pte_bytes); - } - } - */ + if (pte_bytes != read_pte(pte_addr)) + DBGPRINT("Read/write values are different!\n"); } } uint64_t page_table_walk(uint64_t vAddr_bits) { - - std::cout << "PTW on vAddr: 0x" << std::hex << vAddr_bits << std::endl; + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT("PTW on vAddr: 0x%lx\n", vAddr_bits); + if (!need_trans(vAddr_bits)) + { + DBGPRINT("Translation is not needed.\n"); + return vAddr_bits; + } uint64_t LEVELS = 2; vAddr_SV32_t vAddr(vAddr_bits); uint64_t pte_addr, pte_bytes; uint64_t pt_ba = get_ptbr() << 12; - //Get base page table. - for ( i = LEVELS-1 ; i >= 0 ; i--) + for ( int i = LEVELS-1 ; i >= 0 ; i--) { //Read PTE. pte_addr = pt_ba+vAddr.vpn[i]*PTE_SIZE; - std::cout << "reading PTE from RAM addr 0x" << std::hex << (pte_addr) << std::endl; pte_bytes = read_pte(pte_addr); - pte_bytes &= 0x00000000FFFFFFFF; // Only for 32 bit PTE_SV32_t pte(pte_bytes); - std::cout << "got pte: " << std::hex << pte_bytes << std::endl; + DBGPRINT("pte_bytes = 0x%lx, pte flags = %u)\n", pte.ppn , pte.flags); //Check if it has invalid flag bits. if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) { - std::cout << "Error on vAddr 0x" << std::hex << vAddr_bits << std::endl; - throw Page_Fault_Exception("Page Fault : Attempted to access invalid entry. Entry: 0x"); + std::string msg= "Page Fault : Attempted to access invalid entry. Entry: 0x"; + throw Page_Fault_Exception(msg); } if ( (pte.r == 0) & (pte.w == 0) & (pte.x == 0)) { //Not a leaf node as rwx == 000 if (i == 0) + { throw Page_Fault_Exception("Page Fault : No leaf node found."); + } else { //Continue on to next level. - pt_ba = (pte_bytes >> 10 ) << 12; - std::cout << "next pt_ba: " << pt_ba << std::endl; + pt_ba = pte.ppn << 12; + DBGPRINT("next pt_ba: %p\n", (void *)pt_ba); + } } else { //Leaf node found, finished walking. - pt_ba = (pte_bytes >> 10 ) << 12; - std::cout << "Found PPN 0 = 0x" << pt_ba << std::endl; + pt_ba = pte.ppn << 12; + DBGPRINT("Found PT_Base_Address [%d] = %lx\n", i, pt_ba); break; } @@ -535,35 +560,35 @@ public: { throw Page_Fault_Exception("Page Fault : TYPE LOAD, Incorrect permissions."); } - //Regular page. - uint64_t paddr = pt_ba << 12 + vAddr.pgoff; - return paddr + uint64_t paddr = pt_ba + vAddr.pgoff; + return paddr; } - uint64_t alloc_first_level_page_table() { - uint64_t addr=0xF0000000; - uint64_t size=1<<23; + uint64_t alloc_2nd_level_page_table() { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + uint64_t addr=PAGE_TABLE_BASE_ADDR; + uint64_t size=1<<23; // 8MB !!!FIXME!!! CHECK_ERR(this->mem_reserve(addr, size, VX_MEM_READ_WRITE), { return err; }); - // global_mem_.allocate(RAM_PAGE_SIZE, &addr); - std::cout << "address of page table 0x" << std::hex << addr << std::endl; - init_page_table(addr,size); + init_page_table(addr); return addr; } - uint64_t alloc_second_level_page_table(uint64_t vpn_1) { - uint64_t addr = 0xF0000000 + PTE_SIZE * NUM_PTE_ENTRY*(1+vpn_1) - std::cout << "address of page table 0x" << std::hex << addr << std::endl; + uint64_t alloc_1st_level_page_table(uint64_t vpn_1) { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + uint64_t addr = PAGE_TABLE_BASE_ADDR + PTE_SIZE * NUM_PTE_ENTRY*(1+vpn_1); + init_page_table(addr); return addr; } - void init_page_table(uint64_t addr, uint64_t size) { - // uint64_t asize = aligned_size(RAM_PAGE_SIZE, CACHE_BLOCK_SIZE); - uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); + void init_page_table(uint64_t addr) { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT("int_page_table (addr=0x%lx)\n", addr); + uint64_t asize = aligned_size(RAM_PAGE_SIZE, CACHE_BLOCK_SIZE); + // uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); uint8_t *src = new uint8_t[RAM_PAGE_SIZE]; for (uint64_t i = 0; i < RAM_PAGE_SIZE; ++i) { - // src[i] = (value >> (i << 3)) & 0xff; src[i] = 0; } ram_.enable_acl(false); @@ -574,14 +599,14 @@ public: // void read_page_table(uint64_t addr) { // uint8_t *dest = new uint8_t[RAM_PAGE_SIZE]; // download(dest, addr, RAM_PAGE_SIZE); - // printf("VXDRV: download %d bytes from 0x%x\n", RAM_PAGE_SIZE, addr); + // DBGPRINT("VXDRV: download %d bytes from 0x%x\n", RAM_PAGE_SIZE, addr); // for (int i = 0; i < RAM_PAGE_SIZE; i += 4) { - // printf("mem-read: 0x%x -> 0x%x\n", addr + i, *(uint64_t*)((uint8_t*)dest + i)); + // DBGPRINT("mem-read: 0x%x -> 0x%x\n", addr + i, *(uint64_t*)((uint8_t*)dest + i)); // } // } void write_pte(uint64_t addr, uint64_t value = 0xbaadf00d) { - std::cout << "writing pte 0x" << std::hex << value << " to pAddr: 0x" << std::hex << addr << std::endl; + DBGPRINT("[Write_pte] writing pte 0x%lx to pAddr: 0x%lx\n", value, addr); uint8_t *src = new uint8_t[PTE_SIZE]; for (uint64_t i = 0; i < PTE_SIZE; ++i) { src[i] = (value >> (i << 3)) & 0xff; @@ -596,15 +621,17 @@ public: uint8_t *dest = new uint8_t[PTE_SIZE]; uint64_t mask = 0; if (XLEN == 32) - mask = 0xFFFFFFFF; + mask = 0x00000000FFFFFFFF; else if (XLEN == 64) mask = 0xFFFFFFFFFFFFFFFF; else - assert(0, "XLEN is not either 32 or 64") + assert(0 && "XLEN is not either 32 or 64"); - std::cout << "[read_pte] reading PTE from RAM addr 0x" << std::hex << addr << std::endl; ram_.read((uint8_t*)dest, addr, PTE_SIZE); - return (*(uint64_t*)((uint8_t*)dest)) & mask; + uint64_t ret = (*(uint64_t*)((uint8_t*)dest)) & mask; + DBGPRINT("[read_pte] reading PTE 0x%lx from RAM addr 0x%lx\n", ret, addr); + + return ret; } #endif // JAEWON diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index b55d0de9a..98eefdaf2 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -115,6 +115,7 @@ void MemoryUnit::ADecoder::map(uint64_t start, uint64_t end, MemDevice &md) { } void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) { + // printf("====%s (addr= 0x%lx, size= 0x%lx) ====\n", __PRETTY_FUNCTION__,addr,size); mem_accessor_t ma; if (!this->lookup(addr, size, &ma)) { std::cout << "lookup of 0x" << std::hex << addr << " failed.\n"; @@ -124,6 +125,7 @@ void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) { } void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) { + // printf("====%s====\n", __PRETTY_FUNCTION__); mem_accessor_t ma; if (!this->lookup(addr, size, &ma)) { std::cout << "lookup of 0x" << std::hex << addr << " failed.\n"; @@ -158,6 +160,7 @@ void MemoryUnit::attach(MemDevice &m, uint64_t start, uint64_t end) { #ifdef VM_ENABLE std::pair MemoryUnit::tlbLookup(uint64_t vAddr, ACCESS_TYPE type, uint64_t* size_bits) { + // printf("====%s====\n", __PRETTY_FUNCTION__); //Find entry while accounting for different sizes. for (auto entry : tlb_) @@ -220,7 +223,7 @@ std::pair MemoryUnit::tlbLookup(uint64_t vAddr, ACCESS_TYPE type return std::make_pair(false, 0); } } -#endif //JAEWON +#else MemoryUnit::TLBEntry MemoryUnit::tlbLookup(uint64_t vAddr, uint32_t flagMask) { auto iter = tlb_.find(vAddr / pageSize_); if (iter != tlb_.end()) { @@ -244,52 +247,62 @@ uint64_t MemoryUnit::toPhyAddr(uint64_t addr, uint32_t flagMask) { } return pAddr; } +#endif #ifdef VM_ENABLE -void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, ACCESS_TYPE type) { +void MemoryUnit::read(void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type) { + // printf("====%s====\n", __PRETTY_FUNCTION__); uint64_t pAddr; - if (this->mode == VA_MODE::BARE) { - pAddr = addr; - } else { - pAddr = vAddr_to_pAddr(addr, type); - } + pAddr = vAddr_to_pAddr(addr, type); return decoder_.read(data, pAddr, size); } #else -void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, bool sup) { +void MemoryUnit::read(void* data, uint64_t addr, uint32_t size, bool sup) { uint64_t pAddr = this->toPhyAddr(addr, sup ? 8 : 1); return decoder_.read(data, pAddr, size); } #endif #ifdef VM_ENABLE -void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, ACCESS_TYPE type) { +void MemoryUnit::write(const void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type) { + // printf("====%s====\n", __PRETTY_FUNCTION__); uint64_t pAddr; - if ( (this->mode == VA_MODE::BARE) | (addr >= IO_BASE_ADDR) ) { - pAddr = addr; - } else { - pAddr = vAddr_to_pAddr(addr, type); - } + pAddr = vAddr_to_pAddr(addr, type); decoder_.write(data, pAddr, size); amo_reservation_.valid = false; } #else -void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, bool sup) { +void MemoryUnit::write(const void* data, uint64_t addr, uint32_t size, bool sup) { uint64_t pAddr = this->toPhyAddr(addr, sup ? 16 : 1); decoder_.write(data, pAddr, size); amo_reservation_.valid = false; } #endif +#ifdef VM_ENABLE +void MemoryUnit::amo_reserve(uint64_t addr) { + uint64_t pAddr = this->vAddr_to_pAddr(addr,ACCESS_TYPE::LOAD); + amo_reservation_.addr = pAddr; + amo_reservation_.valid = true; +} +#else void MemoryUnit::amo_reserve(uint64_t addr) { uint64_t pAddr = this->toPhyAddr(addr, 1); amo_reservation_.addr = pAddr; amo_reservation_.valid = true; } +#endif +#ifdef VM_ENABLE +bool MemoryUnit::amo_check(uint64_t addr) { + uint64_t pAddr = this->vAddr_to_pAddr(addr, ACCESS_TYPE::LOAD); + return amo_reservation_.valid && (amo_reservation_.addr == pAddr); +} +#else bool MemoryUnit::amo_check(uint64_t addr) { uint64_t pAddr = this->toPhyAddr(addr, 1); return amo_reservation_.valid && (amo_reservation_.addr == pAddr); } +#endif #ifdef VM_ENABLE @@ -465,6 +478,7 @@ uint8_t *RAM::get(uint64_t address) const { } void RAM::read(void* data, uint64_t addr, uint64_t size) { + // printf("====%s (addr= 0x%lx, size= 0x%lx) ====\n", __PRETTY_FUNCTION__,addr,size); if (check_acl_ && acl_mngr_.check(addr, size, 0x1) == false) { throw BadAddress(); } @@ -577,15 +591,41 @@ void RAM::loadHexImage(const char* filename) { } #ifdef VM_ENABLE + +bool MemoryUnit::need_trans(uint64_t dev_pAddr) +{ + // Check if the this is the BARE mode + bool isBAREMode = (this->mode == VA_MODE::BARE); + // Check if the address is reserved + bool isReserved = (dev_pAddr >= PAGE_TABLE_BASE_ADDR); + // Check if the address falls within the startup address range + bool isStartAddress = (STARTUP_ADDR <= dev_pAddr) && (dev_pAddr < (STARTUP_ADDR + 0x40000)); + + // Print the boolean results for debugging purposes + // printf("0x%lx, %u, %u, %u \n", dev_pAddr,isBAREMode, isReserved, isStartAddress); + + // Return true if the address needs translation (i.e., it's not reserved and not a start address) + return (!isBAREMode && !isReserved && !isStartAddress); +} + uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type) { uint64_t pfn; uint64_t size_bits; + // printf("====%s====\n", __PRETTY_FUNCTION__); + // printf("vaddr = 0x%lx, type = 0x%u\n",vAddr,type); + if (!need_trans(vAddr)) + { + // printf("Translation is not needed.\n"); + return vAddr; + } //First lookup TLB. std::pair tlb_access = tlbLookup(vAddr, type, &size_bits); if (tlb_access.first) { + + // printf("Found pfn %lx in TLB\n",tlb_access.second); pfn = tlb_access.second; TLB_HIT++; } @@ -596,33 +636,37 @@ uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type) pfn = ptw_access.first; TLB_MISS++; PTW++; unique_translations.insert(vAddr>>size_bits); PERF_UNIQUE_PTW = unique_translations.size(); + } //Construct final address using pfn and offset. - std::cout << "[MemoryUnit] translated vAddr: 0x" << std::hex << vAddr << " to pAddr: 0x" << std::hex << ((pfn << size_bits) + (vAddr & ((1 << size_bits) - 1))) << std::endl; + // std::cout << "[MemoryUnit] translated vAddr: 0x" << std::hex << vAddr << " to pAddr: 0x" << std::hex << ((pfn << size_bits) + (vAddr & ((1 << size_bits) - 1))) << std::endl; return (pfn << size_bits) + (vAddr & ((1 << size_bits) - 1)); } std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits) { + // printf("====%s====\n", __PRETTY_FUNCTION__); + // printf("vaddr = 0x%lx, type = %u, size_bits %lu\n", vAddr_bits, type, *size_bits); uint64_t LEVELS = 2; vAddr_SV32_t vAddr(vAddr_bits); - uint64_t pte_bytes; + uint64_t pte_bytes = 0; //Get base page table. - uint64_t a = this->ptbr << 12; + uint64_t pt_ba = this->ptbr << 12; int i = LEVELS - 1; while(true) { //Read PTE. - decoder_.read(&pte_bytes, a+vAddr.vpn[i]*PTE_SIZE, sizeof(uint64_t)); + decoder_.read(&pte_bytes, pt_ba+vAddr.vpn[i]*PTE_SIZE, PTE_SIZE); PTE_SV32_t pte(pte_bytes); //Check if it has invalid flag bits. if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) { + printf("Error: PTE FLAGS=0x%x\n",pte.flags); throw Page_Fault_Exception("Page Fault : Attempted to access invalid entry."); } @@ -632,18 +676,19 @@ std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, AC i--; if (i < 0) { + printf("Error: PTE FLAGS=0x%x\n",pte.flags); throw Page_Fault_Exception("Page Fault : No leaf node found."); } else { //Continue on to next level. - a = (pte_bytes >> 10 ) << 12; + pt_ba = (pte_bytes >> 10 ) << 12; } } else { //Leaf node found, finished walking. - a = (pte_bytes >> 10 ) << 12; + pt_ba = (pte_bytes >> 10 ) << 12; break; } } @@ -653,40 +698,21 @@ std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, AC //Check RWX permissions according to access type. if ( (type == ACCESS_TYPE::FETCH) & ((pte.r == 0) | (pte.x == 0)) ) { + printf("Error: PTE FLAGS=0x%x\n",pte.flags); throw Page_Fault_Exception("Page Fault : TYPE FETCH, Incorrect permissions."); } else if ( (type == ACCESS_TYPE::LOAD) & (pte.r == 0) ) { + printf("Error: PTE FLAGS=0x%x\n",pte.flags); throw Page_Fault_Exception("Page Fault : TYPE LOAD, Incorrect permissions."); } else if ( (type == ACCESS_TYPE::STORE) & (pte.w == 0) ) { + printf("Error: PTE FLAGS=0x%x\n",pte.flags); throw Page_Fault_Exception("Page Fault : TYPE STORE, Incorrect permissions."); } - - uint64_t pfn; - if (i > 0) - { - //It is a super page. - if (pte.ppn[0] != 0) - { - //Misss aligned super page. - throw Page_Fault_Exception("Page Fault : Miss Aligned Super Page."); - - } - else - { - //Valid super page. - pfn = pte.ppn[1]; - *size_bits = 22; - } - } - else - { - //Regular page. - *size_bits = 12; - pfn = a >> 12; - } + *size_bits = 12; + uint64_t pfn = pt_ba >> *size_bits; return std::make_pair(pfn, pte_bytes & 0xff); } diff --git a/sim/common/mem.h b/sim/common/mem.h index 8477fb800..a655a6d3c 100644 --- a/sim/common/mem.h +++ b/sim/common/mem.h @@ -116,17 +116,21 @@ public: // ACCESS_TYPE access_type; }; +#ifdef VM_ENABLE + MemoryUnit(uint64_t pageSize = PAGE_TABLE_SIZE); +#else MemoryUnit(uint64_t pageSize = 0); +#endif void attach(MemDevice &m, uint64_t start, uint64_t end); #ifdef VM_ENABLE - void read(void* data, uint64_t addr, uint64_t size, ACCESS_TYPE type = ACCESS_TYPE::LOAD); - void write(const void* data, uint64_t addr, uint64_t size, ACCESS_TYPE type = ACCESS_TYPE::STORE); + void read(void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type = ACCESS_TYPE::LOAD); + void write(const void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type = ACCESS_TYPE::STORE); #else - void read(void* data, uint64_t addr, uint64_t size, bool sup); - void write(const void* data, uint64_t addr, uint64_t size, bool sup); + void read(void* data, uint64_t addr, uint32_t size, bool sup); + void write(const void* data, uint64_t addr, uint32_t size, bool sup); #endif void amo_reserve(uint64_t addr); @@ -220,14 +224,16 @@ private: #ifdef VM_ENABLE std::pair tlbLookup(uint64_t vAddr, ACCESS_TYPE type, uint64_t* size_bits); + bool need_trans(uint64_t dev_pAddr); uint64_t vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type); std::pair page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits); +#else + uint64_t toPhyAddr(uint64_t vAddr, uint32_t flagMask); + TLBEntry tlbLookup(uint64_t vAddr, uint32_t flagMask); #endif - TLBEntry tlbLookup(uint64_t vAddr, uint32_t flagMask); - uint64_t toPhyAddr(uint64_t vAddr, uint32_t flagMask); std::unordered_map tlb_; uint64_t pageSize_; @@ -328,7 +334,7 @@ class PTE_SV32_t } public: - uint64_t ppn[2]; + uint64_t ppn; uint32_t rsw; uint32_t flags; bool d, a, g, u, x, w, r, v; @@ -337,8 +343,7 @@ class PTE_SV32_t assert((address>> 32) == 0 && "Upper 32 bits are not zero!"); flags = bits(address,0,7); rsw = bits(address,8,9); - ppn[0] = bits(address,10,19); - ppn[1] = bits(address,20,31); + ppn = bits(address,10,31); d = bit(7); a = bit(6); @@ -348,6 +353,7 @@ class PTE_SV32_t w = bit(2); r = bit(1); v = bit(0); + // printf("ppn = 0x%lx, flags= 0x%x, rsw= 0x%x\n",ppn,flags,rsw); } }; @@ -374,6 +380,7 @@ class vAddr_SV32_t vpn[0] = bits(address,12,21); vpn[1] = bits(address,22,31); pgoff = bits(address,0,11); + // printf("vpn[0] = 0x%lx, vpn[1] = 0x%lx, pgoff = 0x%lx\n",vpn[0],vpn[1],pgoff); } }; #endif diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 0214dbddd..a2c8e06d4 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -271,6 +271,8 @@ bool Emulator::barrier(uint32_t bar_id, uint32_t count, uint32_t wid) { #ifdef VM_ENABLE void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) { + DPH(3, "*** icache_read 0x" << std::hex << addr << ", size = 0x " << size); + try { mmu_.read(data, addr, size, ACCESS_TYPE::LOAD); @@ -289,6 +291,7 @@ void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) { #ifdef VM_ENABLE void Emulator::set_satp(uint32_t satp) { + DPH(3, "set satp 0x" << std::hex << satp << " in emulator module\n"); set_csr(VX_CSR_SATP,satp,0,0); } #endif @@ -328,6 +331,7 @@ void Emulator::dcache_read(void *data, uint64_t addr, uint32_t size) { #ifdef VM_ENABLE void Emulator::dcache_write(const void* data, uint64_t addr, uint32_t size) { + DP(1, "*** dcache_write 0x" << std::hex << addr << ", size = 0x " << size); auto type = get_addr_type(addr); if (addr >= uint64_t(IO_COUT_ADDR) && addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index ecc1474e1..1d4779b3a 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -170,7 +170,6 @@ uint32_t Processor::get_satp() { } void Processor::set_satp(uint32_t satp) { - std::cout << "set SATP: 0x" << std::hex << this->satp << std::endl; impl_->set_satp(satp); this->satp = satp; } From 9942f251e02b792f648da211f6c6bf408cc48c55 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Wed, 19 Jun 2024 02:04:24 -0400 Subject: [PATCH 048/488] remove # --- hw/rtl/VX_config.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 6d3738489..e130ae49b 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -179,7 +179,7 @@ `define PAGE_TABLE_SIZE 4096 `endif -`else # XLEN_32 +`else // XLEN_32 `ifndef STACK_BASE_ADDR `define STACK_BASE_ADDR 32'hFFFF0000 From e21bf9afbd650ec1e0b0ba5a4b91146332850046 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sat, 22 Jun 2024 23:55:01 -0400 Subject: [PATCH 049/488] Merge Vortex 2.2 --- hw/rtl/VX_config.vh | 77 ++-- runtime/simx/vortex.cpp | 749 +++++++++++++++++++------------------- sim/common/mem.cpp | 95 ++--- sim/common/mem.h | 19 +- sim/simx/cluster.cpp | 2 +- sim/simx/cluster.h | 2 +- sim/simx/core.cpp | 2 +- sim/simx/core.h | 2 +- sim/simx/emulator.cpp | 5 +- sim/simx/emulator.h | 2 +- sim/simx/processor.cpp | 6 +- sim/simx/processor.h | 6 +- sim/simx/processor_impl.h | 3 +- sim/simx/socket.cpp | 2 +- sim/simx/socket.h | 2 +- 15 files changed, 512 insertions(+), 462 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index e130ae49b..e0b170373 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -33,6 +33,9 @@ `endif /////////////////////////////////////////////////////////////////////////////// +`ifndef VM_DISABLE +`define VM_ENABLE +`endif `ifndef EXT_M_DISABLE `define EXT_M_ENABLE @@ -171,12 +174,11 @@ `define IO_BASE_ADDR 64'h000000040 `endif +`ifdef VM_ENABLE `ifndef PAGE_TABLE_BASE_ADDR `define PAGE_TABLE_BASE_ADDR 64'h1F0000000 `endif -`ifndef PAGE_TABLE_SIZE -`define PAGE_TABLE_SIZE 4096 `endif `else // XLEN_32 @@ -197,12 +199,11 @@ `define IO_BASE_ADDR 32'h00000040 `endif +`ifdef VM_ENABLE `ifndef PAGE_TABLE_BASE_ADDR `define PAGE_TABLE_BASE_ADDR 32'hF0000000 `endif -`ifndef PAGE_TABLE_SIZE -`define PAGE_TABLE_SIZE 4096 `endif `endif @@ -270,40 +271,58 @@ `endif // Virtual Memory Configuration /////////////////////////////////////////////////////// -`ifndef VM_DISABLE -`define VM_ENABLE -`endif `ifdef VM_ENABLE - `ifndef VM_ADDR_MODE - `define VM_ADDR_MODE SV32 - `endif - - `ifndef PTE_SIZE - `ifdef XLEN_32 - `define PTE_SIZE 4 - `define NUM_PTE_ENTRY 1024 - `else - `ifdef XLEN_64 - `define PTE_SIZE 8 - `define NUM_PTE_ENTRY 1024 - `else - `define PTE_SIZE 8 - `define NUM_PTE_ENTRY 1024 - `endif + `ifdef XLEN_32 + `ifndef VM_ADDR_MODE + `define VM_ADDR_MODE SV32 //or BARE + `endif + `ifndef PTE_SIZE + `define PTE_SIZE (4) + `endif + `ifndef SATP_MODE_IDX + `define SATP_MODE_IDX (31) + `endif + `ifndef SATP_PPN_WIDTH + `define SATP_PPN_WIDTH (22) + `endif + `else + `ifndef VM_ADDR_MODE + `define VM_ADDR_MODE SV64 //or BARE + `endif + `ifndef PTE_SIZE + `define PTE_SIZE (8) + `endif + `ifndef SATP_MODE_IDX + `define SATP_MODE_IDX (63) + `endif + `ifndef SATP_PPN_WIDTH + `define SATP_PPN_WIDTH (44) `endif - `define PT_SIZE (PTE_SIZE * NUM_PTE_ENTRY) `endif + `ifndef NUM_PTE_ENTRY + `define NUM_PTE_ENTRY (1024) + `endif + + `ifndef PT_SIZE + `define PT_SIZE (PTE_SIZE * NUM_PTE_ENTRY) + `endif + + `ifndef PT_TOTAL_SIZE + `define PT_TOTAL_SIZE (PT_SIZE*(1+NUM_PTE_ENTRY)) + `endif + + `ifndef TLB_SIZE - `define TLB_SIZE 32 - `endif - - `ifndef SUPER_PAGING - `define SUPER_PAGING 0 + `define TLB_SIZE (32) `endif `endif +`ifndef MEM_PAGE_SIZE +`define MEM_PAGE_SIZE (4096) +`endif + // Pipeline Configuration ///////////////////////////////////////////////////// // Issue width diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 816ca3081..1a5da088a 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -28,11 +28,11 @@ #include #ifdef VM_ENABLE -#include -#include +#include +// #include +//#include #include -#include #include #include @@ -50,7 +50,6 @@ using namespace vortex; #ifdef VM_ENABLE - #ifndef NDEBUG #define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0) #else @@ -85,13 +84,9 @@ class vx_device { public: vx_device() : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES) -#ifdef VM_ENABLE - , ram_(0, RAM_PAGE_SIZE<<11) -#else - , ram_(0, RAM_PAGE_SIZE) -#endif + , ram_(0, MEM_PAGE_SIZE) , processor_(arch_) - , global_mem_(ALLOC_BASE_ADDR, GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR, RAM_PAGE_SIZE, CACHE_BLOCK_SIZE) + , global_mem_(ALLOC_BASE_ADDR, GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR, MEM_PAGE_SIZE, CACHE_BLOCK_SIZE) { // attach memory module processor_.attach_ram(&ram_); @@ -150,133 +145,141 @@ public: return 0; } -#ifdef VM_ENABLE - // virtual to phycial mapping - uint64_t map_p2v(uint64_t pAddr) - { - return pAddr + 0xf000000; - } - bool need_trans(uint64_t dev_pAddr) - { - // Check if the this is the BARE mode - bool isBAREMode = (get_mode() == VA_MODE::BARE); - // Check if the address is reserved - bool isReserved = (dev_pAddr >= PAGE_TABLE_BASE_ADDR); - // Check if the address falls within the startup address range - bool isStartAddress = (STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)); - - // Print the boolean results for debugging purposes - // printf("%p, %u, %u\n", (void *)dev_pAddr, isReserved, isStartAddress); - - // Return true if the address needs translation (i.e., it's not reserved and not a start address) - return (!isBAREMode && !isReserved && !isStartAddress); - } - - uint64_t phy_to_virt_map(uint64_t size, uint64_t* dev_pAddr, uint32_t flags) - { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - DBGPRINT("(size = 0x%lx, dev_pAddr= 0x%lx, flags = 0x%x)\n", size, *dev_pAddr, flags); - DBGPRINT("bit mode: %d\n", XLEN); - - // if (*dev_pAddr == STARTUP_ADDR || *dev_pAddr == 0x7FFFF000) { - - if (!need_trans(*dev_pAddr)) - { - DBGPRINT("Translation is not needed.\n"); - return 0; - } - - uint64_t init_pAddr = *dev_pAddr; - uint64_t init_vAddr = map_p2v(init_pAddr); - uint64_t ppn = 0, vpn = 0 ; - - //dev_pAddr can be of size greater than a page, but we have to map and update - //page tables on a page table granularity. So divide the allocation into pages. - bool is_start = false; - for (ppn = (*dev_pAddr) >> 12; ppn < ((*dev_pAddr) >> 12) + (size/RAM_PAGE_SIZE) + 1; ppn++) - { - vpn = map_p2v(ppn << 12) >> 12; - if (is_start == false) { - DBGPRINT("**Search vpn in page table:0x%lx\n", vpn); - is_start = true; - } - else { - DBGPRINT("Next vpn: 0x%lx\n",vpn); - } - - //Currently a 1-1 mapping is used, this can be changed here to support different - //mapping schemes - - //If ppn to vpn mapping doesnt exist. - if (addr_mapping.find(vpn) == addr_mapping.end()) - { - //Create mapping. - update_page_table(ppn, vpn, flags); - addr_mapping[vpn] = ppn; - } - } - DBGPRINT("Mapped virtual addr: 0x%lx to physical addr: %lx\n", init_vAddr, init_pAddr); - - // Sanity check - uint64_t pAddr = page_table_walk(init_vAddr); - if (pAddr != init_pAddr) - { - assert(pAddr == init_pAddr && "ERROR: translated virtual Addresses are not the same with physical Address"); - } - - *dev_pAddr = init_vAddr; // commit vpn to be returned to host - DBGPRINT("Translated device virtual addr: 0x%lx\n", *dev_pAddr); - - return 0; - } -#endif - - int mem_alloc(uint64_t size, int flags, uint64_t* dev_addr) { - - uint64_t addr; - DBGPRINT("mem_alloc size: 0x%lx\n",size); - CHECK_ERR(global_mem_.allocate(size, &addr), { - return err; - }); - CHECK_ERR(this->mem_access(addr, size, flags), { - global_mem_.release(addr); - return err; - }); - *dev_addr = addr; #ifdef VM_ENABLE - // VM address translation - phy_to_virt_map(size, dev_addr,flags); + // virtual to phycial mapping + uint64_t map_p2v(uint64_t pAddr) + { + return pAddr + 0xf000000; + } + bool need_trans(uint64_t dev_pAddr) + { + // Check if the this is the BARE mode + bool isBAREMode = (get_mode() == VA_MODE::BARE); + // Check if the address is reserved for system usage + bool isReserved = (dev_pAddr >= PAGE_TABLE_BASE_ADDR); + // Check if the address is reserved for IO usage + bool isIO = (dev_pAddr < USER_BASE_ADDR); + // Check if the address falls within the startup address range + bool isStartAddress = (STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)); + + // Print the boolean results for debugging purposes + // printf("%p, %u, %u\n", (void *)dev_pAddr, isReserved, isStartAddress); + + // Return true if the address needs translation (i.e., it's not reserved and not a start address) + return (!isBAREMode && !isReserved && !isIO && !isStartAddress); + } + + uint64_t phy_to_virt_map(uint64_t size, uint64_t *dev_pAddr, uint32_t flags) + { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT(" [RT:PTV_MAP] size = 0x%lx, dev_pAddr= 0x%lx, flags = 0x%x\n", size, *dev_pAddr, flags); + DBGPRINT(" [RT:PTV_MAP] bit mode: %d\n", XLEN); + + // if (*dev_pAddr == STARTUP_ADDR || *dev_pAddr == 0x7FFFF000) { + + if (!need_trans(*dev_pAddr)) + { + DBGPRINT(" [RT:PTV_MAP] Translation is not needed.\n"); + return 0; + } + + uint64_t init_pAddr = *dev_pAddr; + uint64_t init_vAddr = map_p2v(init_pAddr); + uint64_t ppn = 0, vpn = 0; + + // dev_pAddr can be of size greater than a page, but we have to map and update + // page tables on a page table granularity. So divide the allocation into pages. + bool is_start = false; + for (ppn = (*dev_pAddr) >> 12; ppn < ((*dev_pAddr) >> 12) + (size / MEM_PAGE_SIZE) + 1; ppn++) + { + vpn = map_p2v(ppn << 12) >> 12; + if (is_start == false) + { + DBGPRINT(" [RT:PTV_MAP] Search vpn in page table:0x%lx\n", vpn); + is_start = true; + } + else + { + DBGPRINT(" [RT:PTV_MAP] Next vpn: 0x%lx\n", vpn); + } + + // Currently a 1-1 mapping is used, this can be changed here to support different + // mapping schemes + + // If ppn to vpn mapping doesnt exist. + if (addr_mapping.find(vpn) == addr_mapping.end()) + { + // Create mapping. + update_page_table(ppn, vpn, flags); + addr_mapping[vpn] = ppn; + } + } + DBGPRINT(" [RT:PTV_MAP] Mapped virtual addr: 0x%lx to physical addr: %lx\n", init_vAddr, init_pAddr); + + // Sanity check + uint64_t pAddr = page_table_walk(init_vAddr); + if (pAddr != init_pAddr) + { + assert(pAddr == init_pAddr && "ERROR: translated virtual Addresses are not the same with physical Address"); + } + + *dev_pAddr = init_vAddr; // commit vpn to be returned to host + DBGPRINT(" [RT:PTV_MAP] Translated device virtual addr: 0x%lx\n", *dev_pAddr); + + return 0; + } #endif - return 0; - } - int mem_reserve(uint64_t dev_addr, uint64_t size, int flags) { - CHECK_ERR(global_mem_.reserve(dev_addr, size), { - return err; - }); - DBGPRINT("mem_reserve: addr: 0x%lx, size: 0x%lx\n",dev_addr, size); - CHECK_ERR(this->mem_access(dev_addr, size, flags), { - global_mem_.release(dev_addr); - return err; - }); + int mem_alloc(uint64_t size, int flags, uint64_t *dev_addr) + { + + uint64_t addr; + DBGPRINT(" [RT:mem_alloc] mem_alloc size: 0x%lx\n", size); + CHECK_ERR(global_mem_.allocate(size, &addr), { + return err; + }); + CHECK_ERR(this->mem_access(addr, size, flags), { + global_mem_.release(addr); + return err; + }); + *dev_addr = addr; #ifdef VM_ENABLE - uint64_t paddr = dev_addr; - phy_to_virt_map(size, &paddr, flags); + // VM address translation + phy_to_virt_map(size, dev_addr, flags); #endif - return 0; - } + return 0; + } - int mem_free(uint64_t dev_addr) { + int mem_reserve(uint64_t dev_addr, uint64_t size, int flags) + { + CHECK_ERR(global_mem_.reserve(dev_addr, size), { + return err; + }); + DBGPRINT(" [RT:mem_reserve] mem_reserve: addr: 0x%lx, size: 0x%lx\n", dev_addr, size); + CHECK_ERR(this->mem_access(dev_addr, size, flags), { + global_mem_.release(dev_addr); + return err; + }); #ifdef VM_ENABLE - uint64_t pAddr = page_table_walk(dev_addr); - // VM address translation - return global_mem_.release(pAddr); + uint64_t paddr = dev_addr; + phy_to_virt_map(size, &paddr, flags); +#endif + return 0; + } + + int mem_free(uint64_t dev_addr) + { +#ifdef VM_ENABLE + uint64_t pAddr = page_table_walk(dev_addr); + // VM address translation + return global_mem_.release(pAddr); #else - return global_mem_.release(dev_addr); + return global_mem_.release(dev_addr); #endif - } + } - int mem_access(uint64_t dev_addr, uint64_t size, int flags) { + int mem_access(uint64_t dev_addr, uint64_t size, int flags) + { uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); if (dev_addr + asize > GLOBAL_MEM_SIZE) return -1; @@ -285,7 +288,8 @@ public: return 0; } - int mem_info(uint64_t* mem_free, uint64_t* mem_used) const { + int mem_info(uint64_t *mem_free, uint64_t *mem_used) const + { if (mem_free) *mem_free = global_mem_.free(); if (mem_used) @@ -293,21 +297,23 @@ public: return 0; } - int upload(uint64_t dest_addr, const void* src, uint64_t size) { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); - if (dest_addr + asize > GLOBAL_MEM_SIZE) - return -1; + int upload(uint64_t dest_addr, const void *src, uint64_t size) + { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); + if (dest_addr + asize > GLOBAL_MEM_SIZE) + return -1; #ifdef VM_ENABLE - uint64_t pAddr = page_table_walk(dest_addr); - DBGPRINT("Upload data to vAddr = 0x%lx (pAddr=0x%lx)\n", dest_addr, pAddr); - + uint64_t pAddr = page_table_walk(dest_addr); + DBGPRINT(" [RT:upload] Upload data to vAddr = 0x%lx (pAddr=0x%lx)\n", dest_addr, pAddr); + dest_addr = pAddr; //Overwirte #endif ram_.enable_acl(false); - ram_.write((const uint8_t*)src, dest_addr, size); + ram_.write((const uint8_t *)src, dest_addr, size); ram_.enable_acl(true); + /*DBGPRINT("upload %ld bytes to 0x%lx\n", size, dest_addr); for (uint64_t i = 0; i < size && i < 1024; i += 4) { DBGPRINT(" 0x%lx <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + i)); @@ -316,17 +322,19 @@ public: return 0; } - int download(void* dest, uint64_t src_addr, uint64_t size) { + int download(void *dest, uint64_t src_addr, uint64_t size) + { uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); if (src_addr + asize > GLOBAL_MEM_SIZE) return -1; #ifdef VM_ENABLE - uint64_t pAddr = page_table_walk(src_addr); - DBGPRINT("Download data to vAddr = 0x%lx (pAddr=0x%lx)\n", src_addr, pAddr); + uint64_t pAddr = page_table_walk(src_addr); + DBGPRINT(" [RT:download] Download data to vAddr = 0x%lx (pAddr=0x%lx)\n", src_addr, pAddr); + src_addr = pAddr; //Overwirte #endif ram_.enable_acl(false); - ram_.read((uint8_t*)dest, src_addr, size); + ram_.read((uint8_t *)dest, src_addr, size); ram_.enable_acl(true); /*DBGPRINT("download %ld bytes from 0x%lx\n", size, src_addr); @@ -337,9 +345,11 @@ public: return 0; } - int start(uint64_t krnl_addr, uint64_t args_addr) { + int start(uint64_t krnl_addr, uint64_t args_addr) + { // ensure prior run completed - if (future_.valid()) { + if (future_.valid()) + { future_.wait(); } @@ -350,9 +360,8 @@ public: this->dcr_write(VX_DCR_BASE_STARTUP_ARG1, args_addr >> 32); // start new run - future_ = std::async(std::launch::async, [&]{ - processor_.run(); - }); + future_ = std::async(std::launch::async, [&] + { processor_.run(); }); // clear mpm cache mpm_cache_.clear(); @@ -360,12 +369,14 @@ public: return 0; } - int ready_wait(uint64_t timeout) { + int ready_wait(uint64_t timeout) + { if (!future_.valid()) return 0; uint64_t timeout_sec = timeout / 1000; std::chrono::seconds wait_time(1); - for (;;) { + for (;;) + { // wait for 1 sec and check status auto status = future_.wait_for(wait_time); if (status == std::future_status::ready) @@ -376,8 +387,10 @@ public: return 0; } - int dcr_write(uint32_t addr, uint32_t value) { - if (future_.valid()) { + int dcr_write(uint32_t addr, uint32_t value) + { + if (future_.valid()) + { future_.wait(); // ensure prior run completed } processor_.dcr_write(addr, value); @@ -385,15 +398,18 @@ public: return 0; } - int dcr_read(uint32_t addr, uint32_t* value) const { + int dcr_read(uint32_t addr, uint32_t *value) const + { return dcrs_.read(addr, value); } - int mpm_query(uint32_t addr, uint32_t core_id, uint64_t* value) { + int mpm_query(uint32_t addr, uint32_t core_id, uint64_t *value) + { uint32_t offset = addr - VX_CSR_MPM_BASE; if (offset > 31) return -1; - if (mpm_cache_.count(core_id) == 0) { + if (mpm_cache_.count(core_id) == 0) + { uint64_t mpm_mem_addr = IO_MPM_ADDR + core_id * 32 * sizeof(uint64_t); CHECK_ERR(this->download(mpm_cache_[core_id].data(), mpm_mem_addr, 32 * sizeof(uint64_t)), { return err; @@ -404,247 +420,250 @@ public: } #ifdef VM_ENABLE - /* VM Management */ - void set_processor_satp(VA_MODE mode) + /* VM Management */ + void set_processor_satp(VA_MODE mode) + { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + uint64_t satp = 0; + if (mode == VA_MODE::BARE) { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - uint32_t satp; - if (mode == VA_MODE::BARE) - { - DBGPRINT("VA_MODE = BARE MODE"); - satp = 0; - } - else if (mode == VA_MODE::SV32) - { - satp = (alloc_2nd_level_page_table() >> 12) | 0x80000000; - DBGPRINT("VA_MODE = SV32 MODE(satp = 0x%x)\n",satp); - } - processor_.set_satp(satp); + DBGPRINT(" [RT:set_satp] VA_MODE = BARE MODE"); + } + else + { + satp = (alloc_2nd_level_page_table() / MEM_PAGE_SIZE) | (1 << SATP_MODE_IDX); + DBGPRINT(" [RT:set_satp] VA_MODE = SV mode (satp = 0x%lx)\n", satp); + } + processor_.set_satp(satp); + } + + uint64_t get_ptbr() + { + // return processor_.get_satp(); + return processor_.get_satp() & ((1 << SATP_PPN_WIDTH) - 1); + } + uint64_t get_pte_address(uint64_t base_page, uint64_t vpn) + { + return (base_page * MEM_PAGE_SIZE) + (vpn * PTE_SIZE); + } + + VA_MODE get_mode() + { +#ifdef XLEN_32 + return processor_.get_satp() & (1 << SATP_MODE_IDX) ? VA_MODE::SV32 : VA_MODE::BARE; +#else // 64 bit + return processor_.get_satp() & (1 << SATP_MODE_IDX) ? VA_MODE::SV64 : VA_MODE::BARE; +#endif + } + + void update_page_table(uint64_t ppn, uint64_t vpn, uint32_t flag) + { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT(" [RT:Update PT] Mapping vpn 0x%05lx to ppn 0x%05lx(flags = %u)\n", vpn, ppn, flag); + assert((((ppn >> 20) == 0) && ((vpn >> 20) == 0)) && "Upper 12 bits are not zero!"); + // Updating page table with the following mapping of (vAddr) to (pAddr). + // uint32_t page_bit_shift = log2ceil(PTE_SIZE*NUM_PTE_ENTRY); + uint64_t ppn_1 = 0, pte_addr = 0, pte_bytes = 0; + uint64_t vpn_1 = bits(vpn, 10, 19); + uint64_t vpn_0 = bits(vpn, 0, 9); + + // Read first level PTE. + DBGPRINT(" [RT:Update PT]Start second-level page table\n"); + pte_addr = get_pte_address(get_ptbr(), vpn_1); + pte_bytes = read_pte(pte_addr); + DBGPRINT(" [RT:Update PT] PTE addr 0x%lx, PTE bytes 0x%lx\n", pte_addr, pte_bytes); + ppn_1 = (pte_bytes >> 10); + + if (bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) + { + // If valid bit set, proceed to next level using new ppn form PTE. + DBGPRINT(" [RT:Update PT] PTE valid (ppn 0x%lx), continuing the walk...\n", ppn_1); + } + else + { + // If valid bit not set, allocate a second level page table + // in device memory and store ppn in PTE. Set rwx = 000 in PTE + // to indicate this is a pointer to the next level of the page table. + DBGPRINT(" [RT:Update PT] PTE Invalid (ppn 0x%lx), continuing the walk...\n", ppn_1); + ppn_1 = (alloc_1st_level_page_table(vpn_1) >> 12); + pte_bytes = ((ppn_1 << 10) | 0b0000000001); + assert((pte_addr >> 32) == 0 && "Upper 32 bits are not zero!"); + write_pte(pte_addr, pte_bytes); + // if (pte_bytes != read_pte(pte_addr)) + // DBGPRINT("Read/write values are different!\n"); } - uint32_t get_ptbr() - { - // return processor_.get_satp(); - return processor_.get_satp() & 0x003fffff; + DBGPRINT(" [RT:Update PT] Move to first-level page table\n"); + // Read second level PTE. + pte_addr = get_pte_address(ppn_1, vpn_0); + pte_bytes = read_pte(pte_addr); + + if (bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) + { + DBGPRINT(" [RT:Update PT] ERROR, shouldn't be here\n"); + exit(1); + // If valid bit is set, then the page is already allocated. + // Should not reach this point, a sanity check. } - uint64_t get_pte_address(uint64_t base_page, uint64_t vpn) + else { - return (base_page << 12) + (vpn * PTE_SIZE); - } + // If valid bit not set, write ppn of pAddr in PTE. Set rwx = 111 in PTE + // to indicate this is a leaf PTE and has the stated permissions. + pte_bytes = ((ppn << 10) | 0b0000001111); + write_pte(pte_addr, pte_bytes); + if (pte_bytes != read_pte(pte_addr)) + DBGPRINT(" [RT:Update PT] PTE write value and read value are not matched!\n"); + } + } - VA_MODE get_mode() + uint64_t page_table_walk(uint64_t vAddr_bits) + { + // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT(" [RT:PTW] start vAddr: 0x%lx\n", vAddr_bits); + if (!need_trans(vAddr_bits)) { - return processor_.get_satp() & 0x80000000 ? VA_MODE::SV32 : VA_MODE::BARE; - } + DBGPRINT(" [RT:PTW] Translation is not needed.\n"); + return vAddr_bits; + } + uint64_t LEVELS = 2; + vAddr_SV32_t vAddr(vAddr_bits); + uint64_t pte_addr, pte_bytes; + uint64_t pt_ba = get_ptbr() << 12; - void update_page_table(uint64_t ppn, uint64_t vpn, uint32_t flag) { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - DBGPRINT("Mapping vpn 0x%05lx to ppn 0x%05lx(flags = %u)\n", vpn, ppn,flag); - assert((((ppn>> 20) == 0) && ((vpn >> 20) == 0)) && "Upper 12 bits are not zero!"); - //Updating page table with the following mapping of (vAddr) to (pAddr). - // uint32_t page_bit_shift = log2ceil(PTE_SIZE*NUM_PTE_ENTRY); - uint64_t ppn_1 = 0, pte_addr = 0, pte_bytes = 0; - uint64_t vpn_1 = bits(vpn, 10, 19); - uint64_t vpn_0 = bits(vpn, 0, 9); + // Get base page table. - //Read first level PTE. - DBGPRINT("Start second-level page table\n"); - pte_addr = get_pte_address(get_ptbr(), vpn_1); - pte_bytes = read_pte(pte_addr); - DBGPRINT("[PTE] addr 0x%lx, PTE 0x%lx\n", pte_addr, pte_bytes); - ppn_1 = (pte_bytes >> 10); + for (int i = LEVELS - 1; i >= 0; i--) + { + // Read PTE. + pte_addr = pt_ba + vAddr.vpn[i] * PTE_SIZE; + pte_bytes = read_pte(pte_addr); + PTE_SV32_t pte(pte_bytes); + DBGPRINT(" [RT:PTW] Level[%u] pte_bytes = 0x%lx, pte flags = %u)\n", i, pte.ppn, pte.flags); - if ( bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) + // Check if it has invalid flag bits. + if ((pte.v == 0) | ((pte.r == 0) & (pte.w == 1))) + { + std::string msg = " [RT:PTW] Page Fault : Attempted to access invalid entry. Entry: 0x"; + throw Page_Fault_Exception(msg); + } + + if ((pte.r == 0) & (pte.w == 0) & (pte.x == 0)) + { + // Not a leaf node as rwx == 000 + if (i == 0) { - //If valid bit set, proceed to next level using new ppn form PTE. - DBGPRINT("PTE valid (ppn 0x%lx), continuing the walk...\n",ppn_1); + throw Page_Fault_Exception(" [RT:PTW] Page Fault : No leaf node found."); } else { - //If valid bit not set, allocate a second level page table - // in device memory and store ppn in PTE. Set rwx = 000 in PTE - //to indicate this is a pointer to the next level of the page table. - DBGPRINT("PTE Invalid (ppn 0x%lx), continuing the walk...\n",ppn_1); - ppn_1 = (alloc_1st_level_page_table(vpn_1) >> 12); - pte_bytes = ((ppn_1 << 10) | 0b0000000001) ; - assert((pte_addr>> 32) == 0 && "Upper 32 bits are not zero!"); - write_pte(pte_addr, pte_bytes); - // if (pte_bytes != read_pte(pte_addr)) - // DBGPRINT("Read/write values are different!\n"); - } - - - DBGPRINT("Move to first-level page table\n"); - //Read second level PTE. - pte_addr = get_pte_address(ppn_1, vpn_0); - pte_bytes = read_pte(pte_addr); - - if ( bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) - { - DBGPRINT("ERROR, shouldn't be here\n"); - exit(1); - //If valid bit is set, then the page is already allocated. - //Should not reach this point, a sanity check. - } - else - { - //If valid bit not set, write ppn of pAddr in PTE. Set rwx = 111 in PTE - //to indicate this is a leaf PTE and has the stated permissions. - pte_bytes = ( (ppn << 10) | 0b0000001111) ; - write_pte(pte_addr, pte_bytes); - if (pte_bytes != read_pte(pte_addr)) - DBGPRINT("Read/write values are different!\n"); + // Continue on to next level. + pt_ba = pte.ppn << 12; + DBGPRINT(" [RT:PTW] next pt_ba: %p\n", (void *)pt_ba); } + } + else + { + // Leaf node found, finished walking. + pt_ba = pte.ppn << 12; + DBGPRINT(" [RT:PTW] Found PT_Base_Address [%d] = %lx\n", i, pt_ba); + break; + } } - uint64_t page_table_walk(uint64_t vAddr_bits) - { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - DBGPRINT("PTW on vAddr: 0x%lx\n", vAddr_bits); - if (!need_trans(vAddr_bits)) - { - DBGPRINT("Translation is not needed.\n"); - return vAddr_bits; - } - uint64_t LEVELS = 2; - vAddr_SV32_t vAddr(vAddr_bits); - uint64_t pte_addr, pte_bytes; - uint64_t pt_ba = get_ptbr() << 12; - - //Get base page table. - - for ( int i = LEVELS-1 ; i >= 0 ; i--) - { - //Read PTE. - pte_addr = pt_ba+vAddr.vpn[i]*PTE_SIZE; - pte_bytes = read_pte(pte_addr); - PTE_SV32_t pte(pte_bytes); - DBGPRINT("pte_bytes = 0x%lx, pte flags = %u)\n", pte.ppn , pte.flags); - - //Check if it has invalid flag bits. - if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) - { - std::string msg= "Page Fault : Attempted to access invalid entry. Entry: 0x"; - throw Page_Fault_Exception(msg); - } - - if ( (pte.r == 0) & (pte.w == 0) & (pte.x == 0)) - { - //Not a leaf node as rwx == 000 - if (i == 0) - { - throw Page_Fault_Exception("Page Fault : No leaf node found."); - } - else - { - //Continue on to next level. - pt_ba = pte.ppn << 12; - DBGPRINT("next pt_ba: %p\n", (void *)pt_ba); - - } - } - else - { - //Leaf node found, finished walking. - pt_ba = pte.ppn << 12; - DBGPRINT("Found PT_Base_Address [%d] = %lx\n", i, pt_ba); - break; - } - - } - - // pte_bytes is final leaf - PTE_SV32_t pte(pte_bytes); - //Check RWX permissions according to access type. - if (pte.r == 0) - { - throw Page_Fault_Exception("Page Fault : TYPE LOAD, Incorrect permissions."); - } - - uint64_t paddr = pt_ba + vAddr.pgoff; - return paddr; + // pte_bytes is final leaf + PTE_SV32_t pte(pte_bytes); + // Check RWX permissions according to access type. + if (pte.r == 0) + { + throw Page_Fault_Exception(" [RT:PTW] Page Fault : TYPE LOAD, Incorrect permissions."); } - uint64_t alloc_2nd_level_page_table() { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - uint64_t addr=PAGE_TABLE_BASE_ADDR; - uint64_t size=1<<23; // 8MB !!!FIXME!!! - CHECK_ERR(this->mem_reserve(addr, size, VX_MEM_READ_WRITE), { - return err; - }); - init_page_table(addr); - return addr; + uint64_t paddr = pt_ba + vAddr.pgoff; + return paddr; + } + + uint64_t alloc_2nd_level_page_table() + { + uint64_t addr = PAGE_TABLE_BASE_ADDR; + uint64_t size = PT_TOTAL_SIZE; + CHECK_ERR(this->mem_reserve(addr, size, VX_MEM_READ_WRITE), { + return err; + }); + init_page_table(addr); + return addr; + } + uint64_t alloc_1st_level_page_table(uint64_t vpn_1) + { + uint64_t addr = PAGE_TABLE_BASE_ADDR + PT_SIZE * (1 + vpn_1); + init_page_table(addr); + return addr; + } + + // Initialize to zero the target page table area. 32bit 4K, 64bit 8K + void init_page_table(uint64_t addr) + { + uint64_t asize = aligned_size(PT_SIZE, CACHE_BLOCK_SIZE); + DBGPRINT(" [RT:init_page_table] (addr=0x%lx, size=0x%lx)\n", addr, asize); + uint8_t *src = new uint8_t[asize]; + for (uint64_t i = 0; i < PT_SIZE; ++i) + { + src[i] = 0; } - uint64_t alloc_1st_level_page_table(uint64_t vpn_1) { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - uint64_t addr = PAGE_TABLE_BASE_ADDR + PTE_SIZE * NUM_PTE_ENTRY*(1+vpn_1); - init_page_table(addr); - return addr; + ram_.enable_acl(false); + ram_.write((const uint8_t *)src, addr, asize); + ram_.enable_acl(true); + } + + // void read_page_table(uint64_t addr) { + // uint8_t *dest = new uint8_t[MEM_PAGE_SIZE]; + // download(dest, addr, MEM_PAGE_SIZE); + // DBGPRINT("VXDRV: download %d bytes from 0x%x\n", MEM_PAGE_SIZE, addr); + // for (int i = 0; i < MEM_PAGE_SIZE; i += 4) { + // DBGPRINT("mem-read: 0x%x -> 0x%x\n", addr + i, *(uint64_t*)((uint8_t*)dest + i)); + // } + // } + + void write_pte(uint64_t addr, uint64_t value = 0xbaadf00d) + { + DBGPRINT(" [RT:Write_pte] writing pte 0x%lx to pAddr: 0x%lx\n", value, addr); + uint8_t *src = new uint8_t[PTE_SIZE]; + for (uint64_t i = 0; i < PTE_SIZE; ++i) + { + src[i] = (value >> (i << 3)) & 0xff; } + // std::cout << "writing PTE to RAM addr 0x" << std::hex << addr << std::endl; + ram_.enable_acl(false); + ram_.write((const uint8_t *)src, addr, PTE_SIZE); + ram_.enable_acl(true); + } - void init_page_table(uint64_t addr) { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - DBGPRINT("int_page_table (addr=0x%lx)\n", addr); - uint64_t asize = aligned_size(RAM_PAGE_SIZE, CACHE_BLOCK_SIZE); - // uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); - uint8_t *src = new uint8_t[RAM_PAGE_SIZE]; - for (uint64_t i = 0; i < RAM_PAGE_SIZE; ++i) { - src[i] = 0; - } - ram_.enable_acl(false); - ram_.write((const uint8_t*)src, addr, asize); - ram_.enable_acl(true); - } + uint64_t read_pte(uint64_t addr) + { + uint8_t *dest = new uint8_t[PTE_SIZE]; +#ifdef XLEN_32 + uint64_t mask = 0x00000000FFFFFFFF; +#else // 64bit + uint64_t mask = 0xFFFFFFFFFFFFFFFF; +#endif - // void read_page_table(uint64_t addr) { - // uint8_t *dest = new uint8_t[RAM_PAGE_SIZE]; - // download(dest, addr, RAM_PAGE_SIZE); - // DBGPRINT("VXDRV: download %d bytes from 0x%x\n", RAM_PAGE_SIZE, addr); - // for (int i = 0; i < RAM_PAGE_SIZE; i += 4) { - // DBGPRINT("mem-read: 0x%x -> 0x%x\n", addr + i, *(uint64_t*)((uint8_t*)dest + i)); - // } - // } + ram_.read((uint8_t *)dest, addr, PTE_SIZE); + uint64_t ret = (*(uint64_t *)((uint8_t *)dest)) & mask; + DBGPRINT(" [RT:read_pte] reading PTE 0x%lx from RAM addr 0x%lx\n", ret, addr); - void write_pte(uint64_t addr, uint64_t value = 0xbaadf00d) { - DBGPRINT("[Write_pte] writing pte 0x%lx to pAddr: 0x%lx\n", value, addr); - uint8_t *src = new uint8_t[PTE_SIZE]; - for (uint64_t i = 0; i < PTE_SIZE; ++i) { - src[i] = (value >> (i << 3)) & 0xff; - } - //std::cout << "writing PTE to RAM addr 0x" << std::hex << addr << std::endl; - ram_.enable_acl(false); - ram_.write((const uint8_t*)src, addr, PTE_SIZE); - ram_.enable_acl(true); - } - - uint64_t read_pte(uint64_t addr) { - uint8_t *dest = new uint8_t[PTE_SIZE]; - uint64_t mask = 0; - if (XLEN == 32) - mask = 0x00000000FFFFFFFF; - else if (XLEN == 64) - mask = 0xFFFFFFFFFFFFFFFF; - else - assert(0 && "XLEN is not either 32 or 64"); - - ram_.read((uint8_t*)dest, addr, PTE_SIZE); - uint64_t ret = (*(uint64_t*)((uint8_t*)dest)) & mask; - DBGPRINT("[read_pte] reading PTE 0x%lx from RAM addr 0x%lx\n", ret, addr); - - return ret; - } + return ret; + } #endif // JAEWON private: - Arch arch_; - RAM ram_; - Processor processor_; - MemoryAllocator global_mem_; - DeviceConfig dcrs_; - std::future future_; + Arch arch_; + RAM ram_; + Processor processor_; + MemoryAllocator global_mem_; + DeviceConfig dcrs_; + std::future future_; std::unordered_map> mpm_cache_; #ifdef VM_ENABLE - std::unordered_map addr_mapping; + std::unordered_map addr_mapping; #endif }; diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index 98eefdaf2..eebd2cde3 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -21,6 +21,13 @@ #include using namespace vortex; +#ifdef VM_ENABLE +#ifndef NDEBUG +#define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0) +#else +#define DBGPRINT(format, ...) ((void)0) +#endif +#endif uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) { @@ -115,7 +122,6 @@ void MemoryUnit::ADecoder::map(uint64_t start, uint64_t end, MemDevice &md) { } void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) { - // printf("====%s (addr= 0x%lx, size= 0x%lx) ====\n", __PRETTY_FUNCTION__,addr,size); mem_accessor_t ma; if (!this->lookup(addr, size, &ma)) { std::cout << "lookup of 0x" << std::hex << addr << " failed.\n"; @@ -125,7 +131,6 @@ void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) { } void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) { - // printf("====%s====\n", __PRETTY_FUNCTION__); mem_accessor_t ma; if (!this->lookup(addr, size, &ma)) { std::cout << "lookup of 0x" << std::hex << addr << " failed.\n"; @@ -138,7 +143,9 @@ void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) MemoryUnit::MemoryUnit(uint64_t pageSize) : pageSize_(pageSize) +#ifndef VM_ENABLE , enableVM_(pageSize != 0) +#endif , amo_reservation_({0x0, false}) #ifdef VM_ENABLE , TLB_HIT(0) @@ -158,9 +165,9 @@ void MemoryUnit::attach(MemDevice &m, uint64_t start, uint64_t end) { decoder_.map(start, end, m); } + #ifdef VM_ENABLE std::pair MemoryUnit::tlbLookup(uint64_t vAddr, ACCESS_TYPE type, uint64_t* size_bits) { - // printf("====%s====\n", __PRETTY_FUNCTION__); //Find entry while accounting for different sizes. for (auto entry : tlb_) @@ -201,7 +208,7 @@ std::pair MemoryUnit::tlbLookup(uint64_t vAddr, ACCESS_TYPE type } //Check access permissions. - if ( (type == ACCESS_TYPE::FETCH) & ((e.r == 0) | (e.x == 0)) ) + if ( (type == ACCESS_TYPE::FENCE) & ((e.r == 0) | (e.x == 0)) ) { throw Page_Fault_Exception("Page Fault : Incorrect permissions."); } @@ -251,7 +258,7 @@ uint64_t MemoryUnit::toPhyAddr(uint64_t addr, uint32_t flagMask) { #ifdef VM_ENABLE void MemoryUnit::read(void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type) { - // printf("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT(" [MMU:read] 0x%lx, 0x%x, %u\n",addr,size,type); uint64_t pAddr; pAddr = vAddr_to_pAddr(addr, type); return decoder_.read(data, pAddr, size); @@ -264,7 +271,7 @@ void MemoryUnit::read(void* data, uint64_t addr, uint32_t size, bool sup) { #endif #ifdef VM_ENABLE void MemoryUnit::write(const void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type) { - // printf("====%s====\n", __PRETTY_FUNCTION__); + DBGPRINT(" [MMU:Write] 0x%lx, 0x%x, %u\n",addr,size,type); uint64_t pAddr; pAddr = vAddr_to_pAddr(addr, type); decoder_.write(data, pAddr, size); @@ -280,6 +287,7 @@ void MemoryUnit::write(const void* data, uint64_t addr, uint32_t size, bool sup) #ifdef VM_ENABLE void MemoryUnit::amo_reserve(uint64_t addr) { + DBGPRINT(" [MMU:amo_reserve] 0x%lx\n",addr); uint64_t pAddr = this->vAddr_to_pAddr(addr,ACCESS_TYPE::LOAD); amo_reservation_.addr = pAddr; amo_reservation_.valid = true; @@ -294,6 +302,7 @@ void MemoryUnit::amo_reserve(uint64_t addr) { #ifdef VM_ENABLE bool MemoryUnit::amo_check(uint64_t addr) { + DBGPRINT(" [MMU:amo_check] 0x%lx\n",addr); uint64_t pAddr = this->vAddr_to_pAddr(addr, ACCESS_TYPE::LOAD); return amo_reservation_.valid && (amo_reservation_.addr == pAddr); } @@ -593,30 +602,30 @@ void RAM::loadHexImage(const char* filename) { #ifdef VM_ENABLE bool MemoryUnit::need_trans(uint64_t dev_pAddr) -{ - // Check if the this is the BARE mode - bool isBAREMode = (this->mode == VA_MODE::BARE); - // Check if the address is reserved - bool isReserved = (dev_pAddr >= PAGE_TABLE_BASE_ADDR); - // Check if the address falls within the startup address range - bool isStartAddress = (STARTUP_ADDR <= dev_pAddr) && (dev_pAddr < (STARTUP_ADDR + 0x40000)); - - // Print the boolean results for debugging purposes - // printf("0x%lx, %u, %u, %u \n", dev_pAddr,isBAREMode, isReserved, isStartAddress); - - // Return true if the address needs translation (i.e., it's not reserved and not a start address) - return (!isBAREMode && !isReserved && !isStartAddress); -} + { + // Check if the this is the BARE mode + bool isBAREMode = (this->mode == VA_MODE::BARE); + // Check if the address is reserved for system usage + bool isReserved = (dev_pAddr >= PAGE_TABLE_BASE_ADDR); + // Check if the address is reserved for IO usage + bool isIO= (dev_pAddr < USER_BASE_ADDR); + // Check if the address falls within the startup address range + bool isStartAddress = (STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)); + + // Print the boolean results for debugging purposes + // printf("%p, %u, %u\n", (void *)dev_pAddr, isReserved, isStartAddress); + // Return true if the address needs translation (i.e., it's not reserved and not a start address) + return (!isBAREMode && !isReserved && !isIO && !isStartAddress); + } uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type) { uint64_t pfn; uint64_t size_bits; - // printf("====%s====\n", __PRETTY_FUNCTION__); - // printf("vaddr = 0x%lx, type = 0x%u\n",vAddr,type); + DBGPRINT(" [MMU: V2P] vaddr = 0x%lx, type = 0x%u\n",vAddr,type); if (!need_trans(vAddr)) { - // printf("Translation is not needed.\n"); + DBGPRINT(" [MMU: V2P] Translation is not needed.\n"); return vAddr; } @@ -640,18 +649,18 @@ uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type) } //Construct final address using pfn and offset. - // std::cout << "[MemoryUnit] translated vAddr: 0x" << std::hex << vAddr << " to pAddr: 0x" << std::hex << ((pfn << size_bits) + (vAddr & ((1 << size_bits) - 1))) << std::endl; + DBGPRINT(" [MMU: V2P] translated vAddr: 0x%lx to pAddr 0x%lx",vAddr,((pfn << size_bits) + (vAddr & ((1 << size_bits) - 1)))); return (pfn << size_bits) + (vAddr & ((1 << size_bits) - 1)); } std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits) { - // printf("====%s====\n", __PRETTY_FUNCTION__); - // printf("vaddr = 0x%lx, type = %u, size_bits %lu\n", vAddr_bits, type, *size_bits); + DBGPRINT(" [MMU:PTW] Start: vaddr = 0x%lx, type = %u, size_bits %lu\n", vAddr_bits, type, *size_bits); uint64_t LEVELS = 2; vAddr_SV32_t vAddr(vAddr_bits); uint64_t pte_bytes = 0; + uint64_t pte_addr =0; //Get base page table. uint64_t pt_ba = this->ptbr << 12; int i = LEVELS - 1; @@ -660,14 +669,15 @@ std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, AC { //Read PTE. - decoder_.read(&pte_bytes, pt_ba+vAddr.vpn[i]*PTE_SIZE, PTE_SIZE); + pte_addr = pt_ba+vAddr.vpn[i] * PTE_SIZE; + decoder_.read(&pte_bytes, pte_addr, PTE_SIZE); PTE_SV32_t pte(pte_bytes); + DBGPRINT(" [MMU:PTW] Level[%u] pte_bytes = 0x%lx, pte flags = %u)\n", i, pte.ppn , pte.flags); //Check if it has invalid flag bits. if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) { - printf("Error: PTE FLAGS=0x%x\n",pte.flags); - throw Page_Fault_Exception("Page Fault : Attempted to access invalid entry."); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : Attempted to access invalid entry."); } if ( (pte.r == 0) & (pte.w == 0) & (pte.x == 0)) @@ -676,8 +686,7 @@ std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, AC i--; if (i < 0) { - printf("Error: PTE FLAGS=0x%x\n",pte.flags); - throw Page_Fault_Exception("Page Fault : No leaf node found."); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : No leaf node found."); } else { @@ -696,35 +705,35 @@ std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, AC PTE_SV32_t pte(pte_bytes); //Check RWX permissions according to access type. - if ( (type == ACCESS_TYPE::FETCH) & ((pte.r == 0) | (pte.x == 0)) ) + if ( (type == ACCESS_TYPE::FENCE) & ((pte.r == 0) | (pte.x == 0)) ) { - printf("Error: PTE FLAGS=0x%x\n",pte.flags); - throw Page_Fault_Exception("Page Fault : TYPE FETCH, Incorrect permissions."); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE FENCE, Incorrect permissions."); } else if ( (type == ACCESS_TYPE::LOAD) & (pte.r == 0) ) { - printf("Error: PTE FLAGS=0x%x\n",pte.flags); - throw Page_Fault_Exception("Page Fault : TYPE LOAD, Incorrect permissions."); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE LOAD, Incorrect permissions."); } else if ( (type == ACCESS_TYPE::STORE) & (pte.w == 0) ) { - printf("Error: PTE FLAGS=0x%x\n",pte.flags); - throw Page_Fault_Exception("Page Fault : TYPE STORE, Incorrect permissions."); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE STORE, Incorrect permissions."); } *size_bits = 12; uint64_t pfn = pt_ba >> *size_bits; return std::make_pair(pfn, pte_bytes & 0xff); } - -uint32_t MemoryUnit::get_satp() +uint64_t MemoryUnit::get_satp() { return satp; } -void MemoryUnit::set_satp(uint32_t satp) +void MemoryUnit::set_satp(uint64_t satp) { this->satp = satp; - this->ptbr = satp & 0x003fffff; //22 bits - this->mode = satp & 0x80000000 ? VA_MODE::SV32 : VA_MODE::BARE; + this->ptbr = satp & ( (1<< SATP_PPN_WIDTH) - 1); +#ifdef XLEN_32 + this->mode = satp & (1<< SATP_MODE_IDX) ? VA_MODE::SV32 : VA_MODE::BARE; +#else // 64 bit + this->mode = satp & (1<< SATP_MODE_IDX) ? VA_MODE::SV64 : VA_MODE::BARE; +#endif } #endif \ No newline at end of file diff --git a/sim/common/mem.h b/sim/common/mem.h index a655a6d3c..4b7744c2b 100644 --- a/sim/common/mem.h +++ b/sim/common/mem.h @@ -34,13 +34,14 @@ namespace vortex { #ifdef VM_ENABLE enum VA_MODE { BARE, - SV32 + SV32, + SV64 }; enum ACCESS_TYPE { LOAD, STORE, - FETCH + FENCE }; class Page_Fault_Exception : public std::runtime_error /* or logic_error */ @@ -117,7 +118,7 @@ public: }; #ifdef VM_ENABLE - MemoryUnit(uint64_t pageSize = PAGE_TABLE_SIZE); + MemoryUnit(uint64_t pageSize = MEM_PAGE_SIZE); #else MemoryUnit(uint64_t pageSize = 0); #endif @@ -138,8 +139,8 @@ public: #ifdef VM_ENABLE void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits); - uint32_t get_satp(); - void set_satp(uint32_t satp); + uint64_t get_satp(); + void set_satp(uint64_t satp); #else void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags); #endif @@ -238,14 +239,16 @@ private: std::unordered_map tlb_; uint64_t pageSize_; ADecoder decoder_; +#ifndef VM_ENABLE bool enableVM_; +#endif amo_reservation_t amo_reservation_; #ifdef VM_ENABLE - uint32_t satp; + uint64_t satp; VA_MODE mode; - uint32_t ptbr; + uint64_t ptbr; std::unordered_set unique_translations; uint64_t TLB_HIT, TLB_MISS, TLB_EVICT, PTW, PERF_UNIQUE_PTW; @@ -380,7 +383,7 @@ class vAddr_SV32_t vpn[0] = bits(address,12,21); vpn[1] = bits(address,22,31); pgoff = bits(address,0,11); - // printf("vpn[0] = 0x%lx, vpn[1] = 0x%lx, pgoff = 0x%lx\n",vpn[0],vpn[1],pgoff); + // printf("vpn[1] = 0x%lx, vpn[0] = 0x%lx, pgoff = 0x%lx\n",vpn[1],vpn[0],pgoff); } }; #endif diff --git a/sim/simx/cluster.cpp b/sim/simx/cluster.cpp index 0c5ff9f3f..25669e26b 100644 --- a/sim/simx/cluster.cpp +++ b/sim/simx/cluster.cpp @@ -107,7 +107,7 @@ void Cluster::attach_ram(RAM* ram) { } #ifdef VM_ENABLE -void Cluster::set_satp(uint32_t satp) { +void Cluster::set_satp(uint64_t satp) { for (auto& socket : sockets_) { socket->set_satp(satp); } diff --git a/sim/simx/cluster.h b/sim/simx/cluster.h index 113ac04f7..df96031c3 100644 --- a/sim/simx/cluster.h +++ b/sim/simx/cluster.h @@ -58,7 +58,7 @@ public: void attach_ram(RAM* ram); #ifdef VM_ENABLE - void set_satp(uint32_t satp); + void set_satp(uint64_t satp); #endif bool running() const; diff --git a/sim/simx/core.cpp b/sim/simx/core.cpp index 9a134b6ca..6f817a3ae 100644 --- a/sim/simx/core.cpp +++ b/sim/simx/core.cpp @@ -430,7 +430,7 @@ void Core::attach_ram(RAM* ram) { } #ifdef VM_ENABLE -void Core::set_satp(uint32_t satp) { +void Core::set_satp(uint64_t satp) { emulator_.set_satp(satp); //JAEWON wit, tid??? // emulator_.set_csr(VX_CSR_SATP,satp,0,0); //JAEWON wit, tid??? } diff --git a/sim/simx/core.h b/sim/simx/core.h index c18498a52..339d76fb8 100644 --- a/sim/simx/core.h +++ b/sim/simx/core.h @@ -100,7 +100,7 @@ public: void attach_ram(RAM* ram); #ifdef VM_ENABLE - void set_satp(uint32_t satp); + void set_satp(uint64_t satp); #endif bool running() const; diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index a2c8e06d4..a1ee0072d 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -271,7 +271,7 @@ bool Emulator::barrier(uint32_t bar_id, uint32_t count, uint32_t wid) { #ifdef VM_ENABLE void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) { - DPH(3, "*** icache_read 0x" << std::hex << addr << ", size = 0x " << size); + // DP(1, "*** icache_read 0x" << std::hex << addr << ", size = 0x " << size); try { @@ -290,7 +290,7 @@ void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) { #endif #ifdef VM_ENABLE -void Emulator::set_satp(uint32_t satp) { +void Emulator::set_satp(uint64_t satp) { DPH(3, "set satp 0x" << std::hex << satp << " in emulator module\n"); set_csr(VX_CSR_SATP,satp,0,0); } @@ -299,6 +299,7 @@ void Emulator::set_satp(uint32_t satp) { #ifdef VM_ENABLE void Emulator::dcache_read(void *data, uint64_t addr, uint32_t size) { + DP(1, "*** dcache_read 0x" << std::hex << addr << ", size = 0x " << size); auto type = get_addr_type(addr); if (type == AddrType::Shared) { core_->local_mem()->read(data, addr, size); diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index 9ee42812a..f5c785581 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -40,7 +40,7 @@ public: void attach_ram(RAM* ram); #ifdef VM_ENABLE - void set_satp(uint32_t satp) ; + void set_satp(uint64_t satp) ; #endif instr_trace_t* step(); diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index 1d4779b3a..972792bfb 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -96,7 +96,7 @@ void ProcessorImpl::attach_ram(RAM* ram) { } } #ifdef VM_ENABLE -void ProcessorImpl::set_satp(uint32_t satp) { +void ProcessorImpl::set_satp(uint64_t satp) { for (auto cluster : clusters_) { cluster->set_satp(satp); } @@ -164,12 +164,12 @@ void Processor::dcr_write(uint32_t addr, uint32_t value) { } #ifdef VM_ENABLE -uint32_t Processor::get_satp() { +uint64_t Processor::get_satp() { // std::cout << "get SATP: 0x" << std::hex << this->satp << std::endl; return this->satp; } -void Processor::set_satp(uint32_t satp) { +void Processor::set_satp(uint64_t satp) { impl_->set_satp(satp); this->satp = satp; } diff --git a/sim/simx/processor.h b/sim/simx/processor.h index e22f11569..d2b575421 100644 --- a/sim/simx/processor.h +++ b/sim/simx/processor.h @@ -34,14 +34,14 @@ public: void dcr_write(uint32_t addr, uint32_t value); #ifdef VM_ENABLE - uint32_t get_satp(); - void set_satp(uint32_t satp); + uint64_t get_satp(); + void set_satp(uint64_t satp); #endif private: ProcessorImpl* impl_; #ifdef VM_ENABLE - uint32_t satp; + uint64_t satp; #endif }; diff --git a/sim/simx/processor_impl.h b/sim/simx/processor_impl.h index e6e9a4cf1..511c0cad6 100644 --- a/sim/simx/processor_impl.h +++ b/sim/simx/processor_impl.h @@ -40,8 +40,7 @@ public: void dcr_write(uint32_t addr, uint32_t value); #ifdef VM_ENABLE - // 32bit satp - void set_satp(uint32_t satp); + void set_satp(uint64_t satp); #endif PerfStats perf_stats() const; diff --git a/sim/simx/socket.cpp b/sim/simx/socket.cpp index 9374bbc59..cef8a3908 100644 --- a/sim/simx/socket.cpp +++ b/sim/simx/socket.cpp @@ -108,7 +108,7 @@ void Socket::attach_ram(RAM* ram) { } #ifdef VM_ENABLE -void Socket::set_satp(uint32_t satp) { +void Socket::set_satp(uint64_t satp) { for (auto core : cores_) { core->set_satp(satp); } diff --git a/sim/simx/socket.h b/sim/simx/socket.h index a09f73e8b..104d53292 100644 --- a/sim/simx/socket.h +++ b/sim/simx/socket.h @@ -61,7 +61,7 @@ public: void attach_ram(RAM* ram); #ifdef VM_ENABLE - void set_satp(uint32_t satp); + void set_satp(uint64_t satp); #endif bool running() const; From 3a5278a62e3f2231a880458163f69aae6d6cb60c Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sat, 29 Jun 2024 17:43:20 -0400 Subject: [PATCH 050/488] 64bit support --- .gitignore | 3 +- hw/rtl/VX_config.vh | 48 +-- runtime/common/common.h | 2 +- runtime/common/malloc.h | 26 +- runtime/simx/vortex.cpp | 482 +++++++++++++--------------- sim/common/mem.cpp | 176 +++++----- sim/common/mem.h | 232 ++++++++++--- sim/simx/emulator.cpp | 9 +- sim/simx/main.cpp | 2 +- sim/simx/processor.cpp | 23 +- sim/simx/processor.h | 10 +- tests/regression/diverge/kernel.cpp | 2 +- 12 files changed, 572 insertions(+), 443 deletions(-) diff --git a/.gitignore b/.gitignore index 039456040..43388e9cb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /build* /.vscode -*.cache \ No newline at end of file +*.cache +*.code-workspace diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index e0b170373..ed2afc900 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -163,7 +163,7 @@ `endif `ifndef STARTUP_ADDR -`define STARTUP_ADDR 64'h080000000 +`define STARTUP_ADDR 64'h180000000 `endif `ifndef USER_BASE_ADDR @@ -270,59 +270,59 @@ `define DEBUG_LEVEL 3 `endif +`ifndef MEM_PAGE_SIZE +`define MEM_PAGE_SIZE (4096) +`endif +`ifndef MEM_PAGE_LOG2_SIZE +`define MEM_PAGE_LOG2_SIZE (12) +`endif + // Virtual Memory Configuration /////////////////////////////////////////////////////// `ifdef VM_ENABLE `ifdef XLEN_32 `ifndef VM_ADDR_MODE `define VM_ADDR_MODE SV32 //or BARE `endif + `ifndef PT_LEVEL + `define PT_LEVEL (2) + `endif `ifndef PTE_SIZE `define PTE_SIZE (4) `endif - `ifndef SATP_MODE_IDX - `define SATP_MODE_IDX (31) + `ifndef NUM_PTE_ENTRY + `define NUM_PTE_ENTRY (1024) `endif - `ifndef SATP_PPN_WIDTH - `define SATP_PPN_WIDTH (22) + `ifndef PT_SIZE_LIMIT + `define PT_SIZE_LIMIT (1<<23) `endif `else `ifndef VM_ADDR_MODE - `define VM_ADDR_MODE SV64 //or BARE + `define VM_ADDR_MODE SV39 //or BARE + `endif + `ifndef PT_LEVEL + `define PT_LEVEL (3) `endif `ifndef PTE_SIZE `define PTE_SIZE (8) `endif - `ifndef SATP_MODE_IDX - `define SATP_MODE_IDX (63) + `ifndef NUM_PTE_ENTRY + `define NUM_PTE_ENTRY (512) `endif - `ifndef SATP_PPN_WIDTH - `define SATP_PPN_WIDTH (44) + `ifndef PT_SIZE_LIMIT + `define PT_SIZE_LIMIT (1<<25) `endif `endif - `ifndef NUM_PTE_ENTRY - `define NUM_PTE_ENTRY (1024) - `endif - `ifndef PT_SIZE - `define PT_SIZE (PTE_SIZE * NUM_PTE_ENTRY) + `define PT_SIZE MEM_PAGE_SIZE `endif - `ifndef PT_TOTAL_SIZE - `define PT_TOTAL_SIZE (PT_SIZE*(1+NUM_PTE_ENTRY)) - `endif - - `ifndef TLB_SIZE `define TLB_SIZE (32) `endif `endif -`ifndef MEM_PAGE_SIZE -`define MEM_PAGE_SIZE (4096) -`endif - // Pipeline Configuration ///////////////////////////////////////////////////// // Issue width diff --git a/runtime/common/common.h b/runtime/common/common.h index f7125064e..37fec4846 100644 --- a/runtime/common/common.h +++ b/runtime/common/common.h @@ -24,7 +24,7 @@ #define CACHE_BLOCK_SIZE 64 -#define RAM_PAGE_SIZE 4096 +#define RAM_PAGE_SIZE 4096 // Please use MEM_PAGE_SIZE in VX_config.h #define ALLOC_BASE_ADDR USER_BASE_ADDR diff --git a/runtime/common/malloc.h b/runtime/common/malloc.h index 480c198a6..ca386031a 100644 --- a/runtime/common/malloc.h +++ b/runtime/common/malloc.h @@ -39,6 +39,15 @@ public: page_t* currPage = pages_; while (currPage) { auto nextPage = currPage->next; + #ifdef VM_ENABLE + block_t* currblock = currPage->findfirstUsedBlock(); + block_t* nextblock; + while (currblock) { + nextblock= currblock->nextUsed; + currPage->release(currblock); + currblock = nextblock; + } + #endif delete currPage; currPage = nextPage; } @@ -70,7 +79,7 @@ public: size = alignSize(size, pageAlign_); // Check if the reservation is within memory capacity bounds - if (addr + size > capacity_) { + if (addr + size > baseAddress_ + capacity_) { printf("error: address range out of bounds\n"); return -1; } @@ -118,12 +127,12 @@ public: auto pageSize = alignSize(size, pageAlign_); uint64_t pageAddr; if (!this->findNextAddress(pageSize, &pageAddr)) { - printf("error: out of memory\n"); + printf("error: out of memory (Can't find next address)\n"); return -1; } currPage = this->createPage(pageAddr, pageSize); if (nullptr == currPage) { - printf("error: out of memory\n"); + printf("error: out of memory (Can't create a page)\n"); return -1; } freeBlock = currPage->findFreeBlock(size); @@ -335,6 +344,11 @@ private: } return nullptr; } +#ifdef VM_ENABLE + block_t* findfirstUsedBlock() { + return usedList_; + } +#endif private: @@ -480,7 +494,7 @@ private: bool findNextAddress(uint64_t size, uint64_t* addr) { if (pages_ == nullptr) { - *addr = baseAddress_; + *addr = baseAddress_; return true; } @@ -498,10 +512,10 @@ private: endOfLastPage = current->addr + current->size; current = current->next; } - + // If no suitable gap is found, place the new page at the end of the last page // Check if the allocator has enough capacity - if ((endOfLastPage + size) <= capacity_) { + if ((endOfLastPage + size) <= (baseAddress_ + capacity_)) { *addr = endOfLastPage; return true; } diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 1a5da088a..ae9fe5bb5 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -27,10 +27,8 @@ #include #include -#ifdef VM_ENABLE #include -// #include -//#include +#ifdef VM_ENABLE #include #include @@ -44,42 +42,10 @@ #include #include #include -#include #endif using namespace vortex; -#ifdef VM_ENABLE -#ifndef NDEBUG -#define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0) -#else -#define DBGPRINT(format, ...) ((void)0) -#endif - -#define CHECK_ERR(_expr, _cleanup) \ - do { \ - auto err = _expr; \ - if (err == 0) \ - break; \ - printf("[VXDRV] Error: '%s' returned %d!\n", #_expr, (int)err); \ - _cleanup \ - } while (false) - -/////////////////////////////////////////////////////////////////////////////// -// -#include -#include - -uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) -{ - return (addr >> s_idx) & ((1 << (e_idx - s_idx + 1)) - 1); -} -bool bit(uint64_t addr, uint8_t idx) -{ - return (addr) & (1 << idx); -} -#endif - class vx_device { public: vx_device() @@ -91,14 +57,16 @@ public: // attach memory module processor_.attach_ram(&ram_); #ifdef VM_ENABLE - //Set - set_processor_satp(VM_ADDR_MODE); + CHECK_ERR(init_VM(), ); #endif - } + } ~vx_device() { #ifdef VM_ENABLE - this->mem_free(PAGE_TABLE_BASE_ADDR); // Right position? + global_mem_.release(PAGE_TABLE_BASE_ADDR); + // for (auto i = addr_mapping.begin(); i != addr_mapping.end(); i++) + // page_table_mem_->release(i->second << MEM_PAGE_SIZE); + delete page_table_mem_; #endif if (future_.valid()) { future_.wait(); @@ -154,9 +122,10 @@ public: bool need_trans(uint64_t dev_pAddr) { // Check if the this is the BARE mode - bool isBAREMode = (get_mode() == VA_MODE::BARE); + bool isBAREMode = (get_mode() == BARE); // Check if the address is reserved for system usage - bool isReserved = (dev_pAddr >= PAGE_TABLE_BASE_ADDR); + // bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr && dev_pAddr < PAGE_TABLE_BASE_ADDR + PT_SIZE_LIMIT); + bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr); // Check if the address is reserved for IO usage bool isIO = (dev_pAddr < USER_BASE_ADDR); // Check if the address falls within the startup address range @@ -172,14 +141,12 @@ public: uint64_t phy_to_virt_map(uint64_t size, uint64_t *dev_pAddr, uint32_t flags) { // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - DBGPRINT(" [RT:PTV_MAP] size = 0x%lx, dev_pAddr= 0x%lx, flags = 0x%x\n", size, *dev_pAddr, flags); - DBGPRINT(" [RT:PTV_MAP] bit mode: %d\n", XLEN); - - // if (*dev_pAddr == STARTUP_ADDR || *dev_pAddr == 0x7FFFF000) { + DBGPRINT(" [RT:PTV_MAP] size = 0x%lx, dev_pAddr= 0x%lx, flags = 0x%x\n", size, *dev_pAddr, flags); + DBGPRINT(" [RT:PTV_MAP] bit mode: %d\n", XLEN); if (!need_trans(*dev_pAddr)) { - DBGPRINT(" [RT:PTV_MAP] Translation is not needed.\n"); + DBGPRINT(" [RT:PTV_MAP] Translation is not needed.\n"); return 0; } @@ -189,42 +156,30 @@ public: // dev_pAddr can be of size greater than a page, but we have to map and update // page tables on a page table granularity. So divide the allocation into pages. - bool is_start = false; - for (ppn = (*dev_pAddr) >> 12; ppn < ((*dev_pAddr) >> 12) + (size / MEM_PAGE_SIZE) + 1; ppn++) + // FUTURE Work: Super Page + for (ppn = (*dev_pAddr >> MEM_PAGE_LOG2_SIZE); ppn < ((*dev_pAddr) >> MEM_PAGE_LOG2_SIZE) + (size >> MEM_PAGE_LOG2_SIZE) ; ppn++) { - vpn = map_p2v(ppn << 12) >> 12; - if (is_start == false) - { - DBGPRINT(" [RT:PTV_MAP] Search vpn in page table:0x%lx\n", vpn); - is_start = true; - } - else - { - DBGPRINT(" [RT:PTV_MAP] Next vpn: 0x%lx\n", vpn); - } - + vpn = map_p2v(ppn << MEM_PAGE_LOG2_SIZE) >> MEM_PAGE_LOG2_SIZE; + DBGPRINT(" [RT:PTV_MAP] Search vpn in page table:0x%lx\n", vpn); // Currently a 1-1 mapping is used, this can be changed here to support different // mapping schemes - // If ppn to vpn mapping doesnt exist. if (addr_mapping.find(vpn) == addr_mapping.end()) { // Create mapping. - update_page_table(ppn, vpn, flags); + DBGPRINT(" [RT:PTV_MAP] Not found. Allocate new page table or update a PTE.\n"); + CHECK_ERR(update_page_table(ppn, vpn, flags),); addr_mapping[vpn] = ppn; } } - DBGPRINT(" [RT:PTV_MAP] Mapped virtual addr: 0x%lx to physical addr: %lx\n", init_vAddr, init_pAddr); - + DBGPRINT(" [RT:PTV_MAP] Mapped virtual addr: 0x%lx to physical addr: 0x%lx\n", init_vAddr, init_pAddr); // Sanity check uint64_t pAddr = page_table_walk(init_vAddr); - if (pAddr != init_pAddr) - { - assert(pAddr == init_pAddr && "ERROR: translated virtual Addresses are not the same with physical Address"); - } + DBGPRINT(" [RT:PTV_MAP] physical addr from PTW: 0x%lx\n", pAddr); + assert(pAddr == init_pAddr && "ERROR: translated virtual Addresses are not the same with physical Address\n"); *dev_pAddr = init_vAddr; // commit vpn to be returned to host - DBGPRINT(" [RT:PTV_MAP] Translated device virtual addr: 0x%lx\n", *dev_pAddr); + DBGPRINT(" [RT:PTV_MAP] Translated device virtual addr: 0x%lx\n", *dev_pAddr); return 0; } @@ -232,47 +187,44 @@ public: int mem_alloc(uint64_t size, int flags, uint64_t *dev_addr) { + uint64_t asize = aligned_size(size, MEM_PAGE_SIZE); + uint64_t addr = 0; - uint64_t addr; - DBGPRINT(" [RT:mem_alloc] mem_alloc size: 0x%lx\n", size); - CHECK_ERR(global_mem_.allocate(size, &addr), { + DBGPRINT("[RT:mem_alloc] size: 0x%lx, asize, 0x%lx,flag : 0x%d\n", size, asize, flags); + CHECK_ERR(global_mem_.allocate(asize, &addr), { return err; }); - CHECK_ERR(this->mem_access(addr, size, flags), { + CHECK_ERR(this->mem_access(addr, asize, flags), { global_mem_.release(addr); return err; }); *dev_addr = addr; #ifdef VM_ENABLE // VM address translation - phy_to_virt_map(size, dev_addr, flags); + phy_to_virt_map(asize, dev_addr, flags); #endif return 0; } int mem_reserve(uint64_t dev_addr, uint64_t size, int flags) { - CHECK_ERR(global_mem_.reserve(dev_addr, size), { + uint64_t asize = aligned_size(size, MEM_PAGE_SIZE); + CHECK_ERR(global_mem_.reserve(dev_addr, asize), { return err; }); - DBGPRINT(" [RT:mem_reserve] mem_reserve: addr: 0x%lx, size: 0x%lx\n", dev_addr, size); - CHECK_ERR(this->mem_access(dev_addr, size, flags), { + DBGPRINT("[RT:mem_reserve] addr: 0x%lx, asize:0x%lx, size: 0x%lx\n", dev_addr, asize, size); + CHECK_ERR(this->mem_access(dev_addr, asize, flags), { global_mem_.release(dev_addr); return err; }); -#ifdef VM_ENABLE - uint64_t paddr = dev_addr; - phy_to_virt_map(size, &paddr, flags); -#endif return 0; } int mem_free(uint64_t dev_addr) { #ifdef VM_ENABLE - uint64_t pAddr = page_table_walk(dev_addr); - // VM address translation - return global_mem_.release(pAddr); + uint64_t paddr= page_table_walk(dev_addr); + return global_mem_.release(paddr); #else return global_mem_.release(dev_addr); #endif @@ -313,8 +265,8 @@ public: ram_.write((const uint8_t *)src, dest_addr, size); ram_.enable_acl(true); - - /*DBGPRINT("upload %ld bytes to 0x%lx\n", size, dest_addr); + /* + DBGPRINT("upload %ld bytes to 0x%lx\n", size, dest_addr); for (uint64_t i = 0; i < size && i < 1024; i += 4) { DBGPRINT(" 0x%lx <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + i)); }*/ @@ -418,200 +370,195 @@ public: *value = mpm_cache_.at(core_id).at(offset); return 0; } - #ifdef VM_ENABLE /* VM Management */ - void set_processor_satp(VA_MODE mode) - { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - uint64_t satp = 0; - if (mode == VA_MODE::BARE) - { - DBGPRINT(" [RT:set_satp] VA_MODE = BARE MODE"); - } - else - { - satp = (alloc_2nd_level_page_table() / MEM_PAGE_SIZE) | (1 << SATP_MODE_IDX); - DBGPRINT(" [RT:set_satp] VA_MODE = SV mode (satp = 0x%lx)\n", satp); - } - processor_.set_satp(satp); - } - - uint64_t get_ptbr() - { - // return processor_.get_satp(); - return processor_.get_satp() & ((1 << SATP_PPN_WIDTH) - 1); - } - uint64_t get_pte_address(uint64_t base_page, uint64_t vpn) - { - return (base_page * MEM_PAGE_SIZE) + (vpn * PTE_SIZE); - } - - VA_MODE get_mode() - { -#ifdef XLEN_32 - return processor_.get_satp() & (1 << SATP_MODE_IDX) ? VA_MODE::SV32 : VA_MODE::BARE; -#else // 64 bit - return processor_.get_satp() & (1 << SATP_MODE_IDX) ? VA_MODE::SV64 : VA_MODE::BARE; -#endif - } - - void update_page_table(uint64_t ppn, uint64_t vpn, uint32_t flag) - { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - DBGPRINT(" [RT:Update PT] Mapping vpn 0x%05lx to ppn 0x%05lx(flags = %u)\n", vpn, ppn, flag); - assert((((ppn >> 20) == 0) && ((vpn >> 20) == 0)) && "Upper 12 bits are not zero!"); - // Updating page table with the following mapping of (vAddr) to (pAddr). - // uint32_t page_bit_shift = log2ceil(PTE_SIZE*NUM_PTE_ENTRY); - uint64_t ppn_1 = 0, pte_addr = 0, pte_bytes = 0; - uint64_t vpn_1 = bits(vpn, 10, 19); - uint64_t vpn_0 = bits(vpn, 0, 9); - - // Read first level PTE. - DBGPRINT(" [RT:Update PT]Start second-level page table\n"); - pte_addr = get_pte_address(get_ptbr(), vpn_1); - pte_bytes = read_pte(pte_addr); - DBGPRINT(" [RT:Update PT] PTE addr 0x%lx, PTE bytes 0x%lx\n", pte_addr, pte_bytes); - ppn_1 = (pte_bytes >> 10); - - if (bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) - { - // If valid bit set, proceed to next level using new ppn form PTE. - DBGPRINT(" [RT:Update PT] PTE valid (ppn 0x%lx), continuing the walk...\n", ppn_1); - } - else - { - // If valid bit not set, allocate a second level page table - // in device memory and store ppn in PTE. Set rwx = 000 in PTE - // to indicate this is a pointer to the next level of the page table. - DBGPRINT(" [RT:Update PT] PTE Invalid (ppn 0x%lx), continuing the walk...\n", ppn_1); - ppn_1 = (alloc_1st_level_page_table(vpn_1) >> 12); - pte_bytes = ((ppn_1 << 10) | 0b0000000001); - assert((pte_addr >> 32) == 0 && "Upper 32 bits are not zero!"); - write_pte(pte_addr, pte_bytes); - // if (pte_bytes != read_pte(pte_addr)) - // DBGPRINT("Read/write values are different!\n"); - } - - DBGPRINT(" [RT:Update PT] Move to first-level page table\n"); - // Read second level PTE. - pte_addr = get_pte_address(ppn_1, vpn_0); - pte_bytes = read_pte(pte_addr); - - if (bit(pte_bytes, 0) && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) - { - DBGPRINT(" [RT:Update PT] ERROR, shouldn't be here\n"); - exit(1); - // If valid bit is set, then the page is already allocated. - // Should not reach this point, a sanity check. - } - else - { - // If valid bit not set, write ppn of pAddr in PTE. Set rwx = 111 in PTE - // to indicate this is a leaf PTE and has the stated permissions. - pte_bytes = ((ppn << 10) | 0b0000001111); - write_pte(pte_addr, pte_bytes); - if (pte_bytes != read_pte(pte_addr)) - DBGPRINT(" [RT:Update PT] PTE write value and read value are not matched!\n"); - } - } - - uint64_t page_table_walk(uint64_t vAddr_bits) - { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); - DBGPRINT(" [RT:PTW] start vAddr: 0x%lx\n", vAddr_bits); - if (!need_trans(vAddr_bits)) - { - DBGPRINT(" [RT:PTW] Translation is not needed.\n"); - return vAddr_bits; - } - uint64_t LEVELS = 2; - vAddr_SV32_t vAddr(vAddr_bits); - uint64_t pte_addr, pte_bytes; - uint64_t pt_ba = get_ptbr() << 12; - - // Get base page table. - - for (int i = LEVELS - 1; i >= 0; i--) - { - // Read PTE. - pte_addr = pt_ba + vAddr.vpn[i] * PTE_SIZE; - pte_bytes = read_pte(pte_addr); - PTE_SV32_t pte(pte_bytes); - DBGPRINT(" [RT:PTW] Level[%u] pte_bytes = 0x%lx, pte flags = %u)\n", i, pte.ppn, pte.flags); - - // Check if it has invalid flag bits. - if ((pte.v == 0) | ((pte.r == 0) & (pte.w == 1))) - { - std::string msg = " [RT:PTW] Page Fault : Attempted to access invalid entry. Entry: 0x"; - throw Page_Fault_Exception(msg); - } - - if ((pte.r == 0) & (pte.w == 0) & (pte.x == 0)) - { - // Not a leaf node as rwx == 000 - if (i == 0) - { - throw Page_Fault_Exception(" [RT:PTW] Page Fault : No leaf node found."); - } - else - { - // Continue on to next level. - pt_ba = pte.ppn << 12; - DBGPRINT(" [RT:PTW] next pt_ba: %p\n", (void *)pt_ba); - } - } - else - { - // Leaf node found, finished walking. - pt_ba = pte.ppn << 12; - DBGPRINT(" [RT:PTW] Found PT_Base_Address [%d] = %lx\n", i, pt_ba); - break; - } - } - - // pte_bytes is final leaf - PTE_SV32_t pte(pte_bytes); - // Check RWX permissions according to access type. - if (pte.r == 0) - { - throw Page_Fault_Exception(" [RT:PTW] Page Fault : TYPE LOAD, Incorrect permissions."); - } - - uint64_t paddr = pt_ba + vAddr.pgoff; - return paddr; - } - - uint64_t alloc_2nd_level_page_table() - { - uint64_t addr = PAGE_TABLE_BASE_ADDR; - uint64_t size = PT_TOTAL_SIZE; - CHECK_ERR(this->mem_reserve(addr, size, VX_MEM_READ_WRITE), { - return err; - }); - init_page_table(addr); - return addr; - } - uint64_t alloc_1st_level_page_table(uint64_t vpn_1) - { - uint64_t addr = PAGE_TABLE_BASE_ADDR + PT_SIZE * (1 + vpn_1); - init_page_table(addr); - return addr; - } // Initialize to zero the target page table area. 32bit 4K, 64bit 8K - void init_page_table(uint64_t addr) + uint16_t init_page_table(uint64_t addr, uint64_t size) { - uint64_t asize = aligned_size(PT_SIZE, CACHE_BLOCK_SIZE); + uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); DBGPRINT(" [RT:init_page_table] (addr=0x%lx, size=0x%lx)\n", addr, asize); uint8_t *src = new uint8_t[asize]; - for (uint64_t i = 0; i < PT_SIZE; ++i) + if (src == NULL) + return 1; + + for (uint64_t i = 0; i < asize; ++i) { src[i] = 0; } ram_.enable_acl(false); ram_.write((const uint8_t *)src, addr, asize); ram_.enable_acl(true); + return 0; + } + + uint8_t alloc_page_table (uint64_t * pt_addr) + { + CHECK_ERR(page_table_mem_->allocate(PT_SIZE, pt_addr), { return err; }); + CHECK_ERR(init_page_table(*pt_addr, PT_SIZE), { return err; }); + DBGPRINT(" [RT:alloc_page_table] addr= 0x%lx\n", *pt_addr); + return 0; + } + + int16_t init_VM() + { + uint64_t pt_addr = 0; + // Reserve space for PT + DBGPRINT("[RT:init_VM] Initialize VM\n"); + CHECK_ERR(mem_reserve(PAGE_TABLE_BASE_ADDR, PT_SIZE_LIMIT, VX_MEM_READ_WRITE), { + return err; + }); + page_table_mem_ = new MemoryAllocator (PAGE_TABLE_BASE_ADDR, PT_SIZE_LIMIT, MEM_PAGE_SIZE, CACHE_BLOCK_SIZE); + if (page_table_mem_ == NULL) + { + CHECK_ERR(this->mem_free(PAGE_TABLE_BASE_ADDR),); + return 1; + } + + if (VM_ADDR_MODE == BARE) + DBGPRINT("[RT:init_VM] VA_MODE = BARE MODE(addr= 0x0)"); + else + CHECK_ERR(alloc_page_table(&pt_addr),{return err;}); + + CHECK_ERR(processor_.set_satp_by_addr(pt_addr),{return err;}); + return 0; + } + + // Return value in in ptbr + uint64_t get_base_ppn() + { + return processor_.get_base_ppn(); + } + uint64_t get_pte_address(uint64_t base_ppn, uint64_t vpn) + { + return (base_ppn * PT_SIZE) + (vpn * PTE_SIZE); + } + + uint8_t get_mode() + { + return processor_.get_satp_mode(); + } + + int16_t update_page_table(uint64_t ppn, uint64_t vpn, uint32_t flag) + { + DBGPRINT(" [RT:Update PT] Mapping vpn 0x%05lx to ppn 0x%05lx(flags = %u)\n", vpn, ppn, flag); + // sanity check +#if VM_ADDR_MODE == SV39 + assert((((ppn >> 44) == 0) && ((vpn >> 27) == 0)) && "Upper bits are not zero!"); + uint8_t level = 3; +#else // Default is SV32, BARE will not reach this point. + assert((((ppn >> 20) == 0) && ((vpn >> 20) == 0)) && "Upper 12 bits are not zero!"); + uint8_t level = 2; +#endif + int i = level - 1; + vAddr_t vaddr(vpn << MEM_PAGE_LOG2_SIZE); + uint64_t pte_addr = 0, pte_bytes = 0; + uint64_t pt_addr = 0; + uint64_t cur_base_ppn = get_base_ppn(); + + while (i >= 0) + { + DBGPRINT(" [RT:Update PT]Start %u-level page table\n", i); + pte_addr = get_pte_address(cur_base_ppn, vaddr.vpn[i]); + pte_bytes = read_pte(pte_addr); + PTE_t pte_chk(pte_bytes); + DBGPRINT(" [RT:Update PT] PTE addr 0x%lx, PTE bytes 0x%lx\n", pte_addr, pte_bytes); + if (pte_chk.v == 1 && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d)) + { + DBGPRINT(" [RT:Update PT] PTE valid (ppn 0x%lx), continuing the walk...\n", pte_chk.ppn); + cur_base_ppn = pte_chk.ppn; + } + else + { + // If valid bit not set, allocate a next level page table + DBGPRINT(" [RT:Update PT] PTE Invalid (ppn 0x%lx) ...\n", pte_chk.ppn); + if (i == 0) + { + // Reach to leaf + DBGPRINT(" [RT:Update PT] Reached to level 0. This should be a leaf node(flag = %x) \n",flag); + uint32_t pte_flag = (flag << 1) | 0x3; + PTE_t new_pte(ppn <> mpm_cache_; #ifdef VM_ENABLE std::unordered_map addr_mapping; + MemoryAllocator* page_table_mem_; #endif }; diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index eebd2cde3..f3c1025a2 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -21,6 +21,7 @@ #include using namespace vortex; + #ifdef VM_ENABLE #ifndef NDEBUG #define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0) @@ -29,16 +30,6 @@ using namespace vortex; #endif #endif -uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) -{ - return (addr >> s_idx) & ((1 << (e_idx - s_idx + 1)) - 1); -} - -bool bit(uint64_t addr, uint8_t idx) -{ - return (addr) & (1 << idx); -} - RamMemDevice::RamMemDevice(const char *filename, uint32_t wordSize) : wordSize_(wordSize) { @@ -124,6 +115,7 @@ void MemoryUnit::ADecoder::map(uint64_t start, uint64_t end, MemDevice &md) { void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) { mem_accessor_t ma; if (!this->lookup(addr, size, &ma)) { + assert(0); std::cout << "lookup of 0x" << std::hex << addr << " failed.\n"; throw BadAddress(); } @@ -133,6 +125,7 @@ void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) { void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) { mem_accessor_t ma; if (!this->lookup(addr, size, &ma)) { + assert(0); std::cout << "lookup of 0x" << std::hex << addr << " failed.\n"; throw BadAddress(); } @@ -208,7 +201,7 @@ std::pair MemoryUnit::tlbLookup(uint64_t vAddr, ACCESS_TYPE type } //Check access permissions. - if ( (type == ACCESS_TYPE::FENCE) & ((e.r == 0) | (e.x == 0)) ) + if ( (type == ACCESS_TYPE::FETCH) & ((e.r == 0) | (e.x == 0)) ) { throw Page_Fault_Exception("Page Fault : Incorrect permissions."); } @@ -601,12 +594,33 @@ void RAM::loadHexImage(const char* filename) { #ifdef VM_ENABLE +uint64_t MemoryUnit::get_base_ppn() +{ + return satp_->get_base_ppn(); +} + +uint64_t MemoryUnit::get_satp() +{ + return satp_->get_satp(); +} + +uint8_t MemoryUnit::get_mode() +{ + return satp_->get_mode(); +} +void MemoryUnit::set_satp(uint64_t satp) +{ + // uint16_t asid = 0; // set asid for different process + satp_ = new SATP_t (satp ); +} + bool MemoryUnit::need_trans(uint64_t dev_pAddr) { // Check if the this is the BARE mode - bool isBAREMode = (this->mode == VA_MODE::BARE); + bool isBAREMode = (get_mode() == BARE); // Check if the address is reserved for system usage - bool isReserved = (dev_pAddr >= PAGE_TABLE_BASE_ADDR); + // bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr && dev_pAddr < PAGE_TABLE_BASE_ADDR + PT_SIZE_LIMIT); + bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr); // Check if the address is reserved for IO usage bool isIO= (dev_pAddr < USER_BASE_ADDR); // Check if the address falls within the startup address range @@ -634,7 +648,6 @@ uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type) if (tlb_access.first) { - // printf("Found pfn %lx in TLB\n",tlb_access.second); pfn = tlb_access.second; TLB_HIT++; } @@ -649,91 +662,86 @@ uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type) } //Construct final address using pfn and offset. - DBGPRINT(" [MMU: V2P] translated vAddr: 0x%lx to pAddr 0x%lx",vAddr,((pfn << size_bits) + (vAddr & ((1 << size_bits) - 1)))); + DBGPRINT(" [MMU: V2P] translated vAddr: 0x%lx to pAddr 0x%lx\n",vAddr,((pfn << size_bits) + (vAddr & ((1 << size_bits) - 1)))); return (pfn << size_bits) + (vAddr & ((1 << size_bits) - 1)); } -std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits) -{ - DBGPRINT(" [MMU:PTW] Start: vaddr = 0x%lx, type = %u, size_bits %lu\n", vAddr_bits, type, *size_bits); - uint64_t LEVELS = 2; - vAddr_SV32_t vAddr(vAddr_bits); - uint64_t pte_bytes = 0; +uint64_t MemoryUnit::get_pte_address(uint64_t base_ppn, uint64_t vpn) +{ + return (base_ppn * PT_SIZE) + (vpn * PTE_SIZE); +} - uint64_t pte_addr =0; - //Get base page table. - uint64_t pt_ba = this->ptbr << 12; - int i = LEVELS - 1; +std::pair MemoryUnit::page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t *size_bits) +{ + DBGPRINT(" [MMU:PTW] Start: vaddr = 0x%lx, type = %u.\n", vAddr_bits, type); + uint8_t level = PT_LEVEL; + int i = level-1; + vAddr_t vaddr(vAddr_bits); + uint32_t flags =0; + uint64_t pte_addr = 0, pte_bytes = 0; + uint64_t cur_base_ppn = get_base_ppn(); + // Need to fix for super page + *size_bits = 12; - while(true) + while (true) + { + // Read PTE. + pte_addr = get_pte_address(cur_base_ppn, vaddr.vpn[i]); + decoder_.read(&pte_bytes, pte_addr, PTE_SIZE); + PTE_t pte(pte_bytes); + DBGPRINT(" [MMU:PTW] Level[%u] pte_addr=0x%lx, pte_bytes =0x%lx, pte.ppn= 0x%lx, pte.flags = %u)\n", i, pte_addr, pte_bytes, pte.ppn, pte.flags); + + assert(((pte.pte_bytes & 0xFFFFFFFF) != 0xbaadf00d) && "ERROR: uninitialzed PTE\n" ); + + // Check if it has invalid flag bits. + if ((pte.v == 0) | ((pte.r == 0) & (pte.w == 1))) { + assert(0); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : Attempted to access invalid entry."); + } - //Read PTE. - pte_addr = pt_ba+vAddr.vpn[i] * PTE_SIZE; - decoder_.read(&pte_bytes, pte_addr, PTE_SIZE); - PTE_SV32_t pte(pte_bytes); - DBGPRINT(" [MMU:PTW] Level[%u] pte_bytes = 0x%lx, pte flags = %u)\n", i, pte.ppn , pte.flags); - - //Check if it has invalid flag bits. - if ( (pte.v == 0) | ( (pte.r == 0) & (pte.w == 1) ) ) + if ((pte.r == 0) & (pte.w == 0) & (pte.x == 0)) + { + // Not a leaf node as rwx == 000 + i--; + if (i < 0) { - throw Page_Fault_Exception(" [MMU:PTW] Page Fault : Attempted to access invalid entry."); - } - - if ( (pte.r == 0) & (pte.w == 0) & (pte.x == 0)) - { - //Not a leaf node as rwx == 000 - i--; - if (i < 0) - { - throw Page_Fault_Exception(" [MMU:PTW] Page Fault : No leaf node found."); - } - else - { - //Continue on to next level. - pt_ba = (pte_bytes >> 10 ) << 12; - } + assert(0); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : No leaf node found."); } else { - //Leaf node found, finished walking. - pt_ba = (pte_bytes >> 10 ) << 12; - break; + // Continue on to next level. + cur_base_ppn= pte.ppn; + DBGPRINT(" [MMU:PTW] next base_ppn: 0x%lx\n", cur_base_ppn); + continue; } } - - PTE_SV32_t pte(pte_bytes); - - //Check RWX permissions according to access type. - if ( (type == ACCESS_TYPE::FENCE) & ((pte.r == 0) | (pte.x == 0)) ) + else { - throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE FENCE, Incorrect permissions."); + // Leaf node found, finished walking. + // Check RWX permissions according to access type. + if ((type == ACCESS_TYPE::FETCH) & ((pte.r == 0) | (pte.x == 0))) + { + assert(0); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE FETCH, Incorrect permissions."); + } + else if ((type == ACCESS_TYPE::LOAD) & (pte.r == 0)) + { + assert(0); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE LOAD, Incorrect permissions."); + } + else if ((type == ACCESS_TYPE::STORE) & (pte.w == 0)) + { + assert(0); + throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE STORE, Incorrect permissions."); + } + cur_base_ppn = pte.ppn; + flags = pte.flags; + break; } - else if ( (type == ACCESS_TYPE::LOAD) & (pte.r == 0) ) - { - throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE LOAD, Incorrect permissions."); - } - else if ( (type == ACCESS_TYPE::STORE) & (pte.w == 0) ) - { - throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE STORE, Incorrect permissions."); - } - *size_bits = 12; - uint64_t pfn = pt_ba >> *size_bits; - return std::make_pair(pfn, pte_bytes & 0xff); + } + return std::make_pair(cur_base_ppn, flags); } -uint64_t MemoryUnit::get_satp() -{ - return satp; -} -void MemoryUnit::set_satp(uint64_t satp) -{ - this->satp = satp; - this->ptbr = satp & ( (1<< SATP_PPN_WIDTH) - 1); -#ifdef XLEN_32 - this->mode = satp & (1<< SATP_MODE_IDX) ? VA_MODE::SV32 : VA_MODE::BARE; -#else // 64 bit - this->mode = satp & (1<< SATP_MODE_IDX) ? VA_MODE::SV64 : VA_MODE::BARE; -#endif -} #endif \ No newline at end of file diff --git a/sim/common/mem.h b/sim/common/mem.h index 4b7744c2b..9f212e184 100644 --- a/sim/common/mem.h +++ b/sim/common/mem.h @@ -32,17 +32,85 @@ namespace vortex { #ifdef VM_ENABLE -enum VA_MODE { - BARE, - SV32, - SV64 -}; + +// VA MODE +#define BARE 0x0 +#define SV32 0x1 +#define SV39 0x8 enum ACCESS_TYPE { LOAD, STORE, - FENCE + FETCH }; +class SATP_t +{ + private: + uint64_t address; + uint16_t asid; + uint8_t mode; + uint64_t ppn; + uint64_t satp; + + uint64_t bits(uint64_t input, uint8_t s_idx, uint8_t e_idx) + { + return (input>> s_idx) & (((uint64_t)1 << (e_idx - s_idx + 1)) - 1); + } + bool bit(uint64_t input , uint8_t idx) + { + return (input ) & ((uint64_t)1 << idx); + } + + public: + SATP_t(uint64_t satp) : satp(satp) + { +#ifdef XLEN_32 + mode = bit(satp, 31); + asid = bits(satp, 22, 30); + ppn = bits(satp, 0,21); +#else + mode = bits(satp, 60,63); + asid = bits(satp, 44, 59); + ppn = bits(satp, 0,43); +#endif + address = ppn << MEM_PAGE_LOG2_SIZE; + } + + SATP_t(uint64_t address, uint16_t asid) : address(address), asid(asid) + { +#ifdef XLEN_32 + assert((address >> 32) == 0 && "Upper 32 bits are not zero!"); +#endif + mode= VM_ADDR_MODE; + // asid = 0 ; + ppn = address >> MEM_PAGE_LOG2_SIZE; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshift-count-overflow" +#ifdef XLEN_32 + satp = (((uint64_t)mode << 31) | ((uint64_t)asid << 22) | ppn); +#else + satp = (((uint64_t)mode << 60) | ((uint64_t)asid << 44) | ppn); +#endif +#pragma GCC diagnostic pop + } + uint8_t get_mode() + { + return mode; + } + uint16_t get_asid() + { + return asid; + } + uint64_t get_base_ppn() + { + return ppn; + } + uint64_t get_satp() + { + return satp; + } +}; + class Page_Fault_Exception : public std::runtime_error /* or logic_error */ { @@ -119,6 +187,7 @@ public: #ifdef VM_ENABLE MemoryUnit(uint64_t pageSize = MEM_PAGE_SIZE); + ~MemoryUnit(){delete this->satp_;}; #else MemoryUnit(uint64_t pageSize = 0); #endif @@ -139,7 +208,9 @@ public: #ifdef VM_ENABLE void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits); - uint64_t get_satp(); + uint64_t get_satp(); + uint8_t get_mode(); + uint64_t get_base_ppn(); void set_satp(uint64_t satp); #else void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags); @@ -228,6 +299,7 @@ private: bool need_trans(uint64_t dev_pAddr); uint64_t vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type); + uint64_t get_pte_address(uint64_t base_ppn, uint64_t vpn); std::pair page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits); #else uint64_t toPhyAddr(uint64_t vAddr, uint32_t flagMask); @@ -245,13 +317,9 @@ private: amo_reservation_t amo_reservation_; #ifdef VM_ENABLE - - uint64_t satp; - VA_MODE mode; - uint64_t ptbr; - std::unordered_set unique_translations; uint64_t TLB_HIT, TLB_MISS, TLB_EVICT, PTW, PERF_UNIQUE_PTW; + SATP_t *satp_; #endif }; @@ -322,68 +390,146 @@ private: }; #ifdef VM_ENABLE -class PTE_SV32_t +class PTE_t { private: uint64_t address; - uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) + uint64_t bits(uint64_t input, uint8_t s_idx, uint8_t e_idx) { - return (addr >> s_idx) & ((1 << (e_idx - s_idx + 1)) - 1); + return (input>> s_idx) & (((uint64_t)1 << (e_idx - s_idx + 1)) - 1); } - bool bit(uint8_t idx) + bool bit(uint64_t input, uint8_t idx) { - return (address) & (1 << idx); + return (input) & ((uint64_t)1 << idx); } public: +#if VM_ADDR_MODE == SV39 + bool N; + uint8_t PBMT; +#endif uint64_t ppn; uint32_t rsw; uint32_t flags; + uint8_t level; bool d, a, g, u, x, w, r, v; - PTE_SV32_t(uint64_t address) : address(address) - { - assert((address>> 32) == 0 && "Upper 32 bits are not zero!"); - flags = bits(address,0,7); - rsw = bits(address,8,9); - ppn = bits(address,10,31); + uint64_t pte_bytes; - d = bit(7); - a = bit(6); - g = bit(5); - u = bit(4); - x = bit(3); - w = bit(2); - r = bit(1); - v = bit(0); - // printf("ppn = 0x%lx, flags= 0x%x, rsw= 0x%x\n",ppn,flags,rsw); + void set_flags (uint32_t flag) + { + this->flags = flag; + d = bit(flags,7); + a = bit(flags,6); + g = bit(flags,5); + u = bit(flags,4); + x = bit(flags,3); + w = bit(flags,2); + r = bit(flags,1); + v = bit(flags,0); + } + + PTE_t(uint64_t address, uint32_t flags) : address(address) + { +#if VM_ADDR_MODE == SV39 + N = 0; + PBMT = 0; + level = 3; + ppn = address >> MEM_PAGE_LOG2_SIZE; + // Reserve for Super page support + // ppn = new uint32_t [level]; + // ppn[2]=bits(address,28,53); + // ppn[1]=bits(address,19,27); + // ppn[0]=bits(address,10,18); + set_flags(flags); + // pte_bytes = (N << 63) | (PBMT << 61) | (ppn <<10) | flags ; + pte_bytes = (ppn <<10) | flags ; +#else // if VM_ADDR_MODE == SV32 + assert((address>> 32) == 0 && "Upper 32 bits are not zero!"); + level = 2; + ppn = address >> MEM_PAGE_LOG2_SIZE; + // Reserve for Super page support + // ppn = new uint32_t[level]; + // ppn[1]=bits(address,20,31); + // ppn[0]=bits(address,10,19); + set_flags(flags); + pte_bytes = ppn <<10 | flags ; +#endif + } + + PTE_t(uint64_t pte_bytes) : pte_bytes(pte_bytes) + { +#if VM_ADDR_MODE == SV39 + N = bit(pte_bytes,63); + PBMT = bits(pte_bytes,61,62); + level = 3; + ppn=bits(pte_bytes,10,53); + address = ppn << MEM_PAGE_LOG2_SIZE; + // Reserve for Super page support + // ppn = new uint32_t [level]; + // ppn[2]=bits(pte_bytes,28,53); + // ppn[1]=bits(pte_bytes,19,27); + // ppn[0]=bits(pte_bytes,10,18); +#else //#if VM_ADDR_MODE == SV32 + assert((pte_bytes >> 32) == 0 && "Upper 32 bits are not zero!"); + level = 2; + ppn=bits(pte_bytes,10, 31); + address = ppn << MEM_PAGE_LOG2_SIZE; + // Reserve for Super page support + // ppn = new uint32_t[level]; + // ppn[1]=bits(address, 20,31); + // ppn[0]=bits(address, 10,19); +#endif + rsw = bits(pte_bytes,8,9); + set_flags((uint32_t)(bits(pte_bytes,0,7))); + } + ~PTE_t() + { + // Reserve for Super page support + // delete ppn; } }; -class vAddr_SV32_t +class vAddr_t { private: uint64_t address; - uint64_t bits(uint64_t addr, uint8_t s_idx, uint8_t e_idx) + uint64_t bits(uint8_t s_idx, uint8_t e_idx) { - return (addr >> s_idx) & ((1 << (e_idx - s_idx + 1)) - 1); + return (address>> s_idx) & (((uint64_t)1 << (e_idx - s_idx + 1)) - 1); } - bool bit(uint64_t addr, uint8_t idx) + bool bit( uint8_t idx) { - return (addr) & (1 << idx); + return (address) & ((uint64_t)1 << idx); } public: - uint64_t vpn[2]; + uint64_t *vpn; uint64_t pgoff; - vAddr_SV32_t(uint64_t address) : address(address) + uint8_t level; + vAddr_t(uint64_t address) : address(address) { +#if VM_ADDR_MODE == SV39 + level = 3; + vpn = new uint64_t [level]; + vpn[2] = bits(30,38); + vpn[1] = bits(21,29); + vpn[0] = bits(12,20); + pgoff = bits(0,11); +#else //#if VM_ADDR_MODE == SV32 assert((address>> 32) == 0 && "Upper 32 bits are not zero!"); - vpn[0] = bits(address,12,21); - vpn[1] = bits(address,22,31); - pgoff = bits(address,0,11); - // printf("vpn[1] = 0x%lx, vpn[0] = 0x%lx, pgoff = 0x%lx\n",vpn[1],vpn[0],pgoff); + level = 2; + vpn = new uint64_t [level]; + vpn[1] = bits(22,31); + vpn[0] = bits(12,21); + pgoff = bits(0,11); +#endif + } + + ~vAddr_t() + { + delete vpn; } }; #endif diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index a1ee0072d..503e21cd9 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -116,7 +116,7 @@ void Emulator::clear() { void Emulator::attach_ram(RAM* ram) { // bind RAM to memory unit #if (XLEN == 64) - mmu_.attach(*ram, 0, 0xFFFFFFFFFFFFFFFF); + mmu_.attach(*ram, 0, 0x7FFFFFFFFF); //39bit SV39 #else mmu_.attach(*ram, 0, 0xFFFFFFFF); #endif @@ -271,11 +271,11 @@ bool Emulator::barrier(uint32_t bar_id, uint32_t count, uint32_t wid) { #ifdef VM_ENABLE void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) { - // DP(1, "*** icache_read 0x" << std::hex << addr << ", size = 0x " << size); + DP(3, "*** icache_read 0x" << std::hex << addr << ", size = 0x " << size); try { - mmu_.read(data, addr, size, ACCESS_TYPE::LOAD); + mmu_.read(data, addr, size, ACCESS_TYPE::FETCH); } catch (Page_Fault_Exception& page_fault) { @@ -306,8 +306,7 @@ void Emulator::dcache_read(void *data, uint64_t addr, uint32_t size) { } else { try { - // mmu_.read(data, addr, size, 0); - mmu_.read(data, addr, size, ACCESS_TYPE::LOAD); + mmu_.read(data, addr, size, ACCESS_TYPE::LOAD); } catch (Page_Fault_Exception& page_fault) { diff --git a/sim/simx/main.cpp b/sim/simx/main.cpp index a8883c696..cd375b516 100644 --- a/sim/simx/main.cpp +++ b/sim/simx/main.cpp @@ -84,7 +84,7 @@ int main(int argc, char **argv) { Arch arch(num_threads, num_warps, num_cores); // create memory module - RAM ram(0, RAM_PAGE_SIZE); + RAM ram(0, MEM_PAGE_SIZE); // create processor Processor processor(arch); diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index 972792bfb..751db635e 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -149,6 +149,9 @@ Processor::Processor(const Arch& arch) Processor::~Processor() { delete impl_; +#ifdef VM_ENABLE + delete satp_; +#endif } void Processor::attach_ram(RAM* mem) { @@ -164,13 +167,19 @@ void Processor::dcr_write(uint32_t addr, uint32_t value) { } #ifdef VM_ENABLE -uint64_t Processor::get_satp() { - // std::cout << "get SATP: 0x" << std::hex << this->satp << std::endl; - return this->satp; -} - -void Processor::set_satp(uint64_t satp) { +int16_t Processor::set_satp_by_addr(uint64_t base_addr) { + uint16_t asid = 0; + satp_ = new SATP_t (base_addr,asid); + if (satp_ == NULL) + return 1; + uint64_t satp = satp_->get_satp(); impl_->set_satp(satp); - this->satp = satp; + return 0; +} +uint8_t Processor::get_satp_mode() { + return satp_->get_mode(); +} +uint64_t Processor::get_base_ppn() { + return satp_->get_base_ppn(); } #endif diff --git a/sim/simx/processor.h b/sim/simx/processor.h index d2b575421..a20cfff0b 100644 --- a/sim/simx/processor.h +++ b/sim/simx/processor.h @@ -22,6 +22,9 @@ namespace vortex { class Arch; class RAM; class ProcessorImpl; +#ifdef VM_ENABLE +class SATP_t; +#endif class Processor { public: @@ -34,14 +37,15 @@ public: void dcr_write(uint32_t addr, uint32_t value); #ifdef VM_ENABLE - uint64_t get_satp(); - void set_satp(uint64_t satp); + uint8_t get_satp_mode(); + uint64_t get_base_ppn(); + int16_t set_satp_by_addr(uint64_t addr); #endif private: ProcessorImpl* impl_; #ifdef VM_ENABLE - uint64_t satp; + SATP_t *satp_; #endif }; diff --git a/tests/regression/diverge/kernel.cpp b/tests/regression/diverge/kernel.cpp index f0380e0e4..70b27fa79 100644 --- a/tests/regression/diverge/kernel.cpp +++ b/tests/regression/diverge/kernel.cpp @@ -62,7 +62,7 @@ void kernel_body(kernel_arg_t* __UNIFORM__ arg) { value *= 5; break; default: - assert(task_id < arg->num_points); + //assert(task_id < arg->num_points); break; } From c99e4b37b6ca6d60c91f40402847a099b55c5541 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sun, 23 Jun 2024 11:24:10 -0400 Subject: [PATCH 051/488] Update README.md --- .travis.yml | 102 + ci/toolchain_env.sh.in | 9 +- docs/fpga_setup.md | 74 + hw/rtl/core/VX_gpr_slice.sv | 286 + hw/rtl/core/VX_pending_instr.sv | 79 + hw/rtl/core/VX_trace.vh | 387 + miscs/patches/ramulator.patch | 46 + tests/opencl/bfs/graph4096.txt | 28677 ++++++++++++++++++++++++++++++ 8 files changed, 29656 insertions(+), 4 deletions(-) create mode 100644 .travis.yml create mode 100644 docs/fpga_setup.md create mode 100644 hw/rtl/core/VX_gpr_slice.sv create mode 100644 hw/rtl/core/VX_pending_instr.sv create mode 100644 hw/rtl/core/VX_trace.vh create mode 100644 miscs/patches/ramulator.patch create mode 100755 tests/opencl/bfs/graph4096.txt diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..d43abb153 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,102 @@ +language: cpp +dist: focal +os: linux +compiler: gcc + +addons: + apt: + packages: + - build-essential + - valgrind + - libstdc++6 + - binutils + - python + - uuid-dev + +env: + global: + - TOOLDIR=$HOME/tools + +cache: + directories: + - $TOOLDIR + - $HOME/third_party + - $HOME/build32 + - $HOME/build64 + +before_install: + - if [ ! -d "$TOOLDIR" ] || [ -z "$(ls -A $TOOLDIR)" ] || [ "$(cat "$TOOLDIR/version.txt")" != "v0.4" ]; then + rm -rf $TOOLDIR; + mkdir -p $TRAVIS_BUILD_DIR/build && cd $TRAVIS_BUILD_DIR/build; + ../configure --tooldir=$TOOLDIR; + ci/toolchain_install.sh --all; + echo "v0.3" > "$TOOLDIR/version.txt"; + else + echo "using existing tooldir build"; + fi + - if [ ! -d "$HOME/third_party" ] || [ -z "$(ls -A $HOME/third_party)" ] || [ "$(cat "$HOME/third_party/version.txt")" != "v0.2" ]; then + cd $TRAVIS_BUILD_DIR; + make -C third_party > /dev/null; + echo "v0.2" > "third_party/version.txt"; + cp -rf third_party $HOME; + else + echo "using existing third_party build"; + cp -rf $HOME/third_party $TRAVIS_BUILD_DIR; + fi + +install: + - if [ ! -d "$HOME/build$XLEN" ] || [ -z "$(ls -A $HOME/build$XLEN)" ] || [ "$(cat "$HOME/build$XLEN/version.txt")" != "$TRAVIS_COMMIT" ]; then + mkdir -p $TRAVIS_BUILD_DIR/build$XLEN && cd $TRAVIS_BUILD_DIR/build$XLEN; + ../configure --tooldir=$TOOLDIR --xlen=$XLEN; + source ci/toolchain_env.sh; + make build -s > /dev/null; + echo "$TRAVIS_COMMIT" > version.txt; + cp -rf $TRAVIS_BUILD_DIR/build$XLEN $HOME; + else + echo "using existing build for commit $TRAVIS_COMMIT"; + cp -rf $HOME/build$XLEN $TRAVIS_BUILD_DIR; + fi + +before_script: + - cd $TRAVIS_BUILD_DIR/build$XLEN + - source ci/toolchain_env.sh + +stages: + - test + +jobs: + include: + - stage: test + name: regression32 + env: XLEN=32 + script: + - ./ci/travis_run.py ./ci/regression.sh --unittest + - ./ci/travis_run.py ./ci/regression.sh --isa + - ./ci/travis_run.py ./ci/regression.sh --kernel + - ./ci/travis_run.py ./ci/regression.sh --synthesis + - ./ci/travis_run.py ./ci/regression.sh --regression + - ./ci/travis_run.py ./ci/regression.sh --opencl + + - stage: test + name: regression64 + env: XLEN=64 + script: + - ./ci/travis_run.py ./ci/regression.sh --isa + - ./ci/travis_run.py ./ci/regression.sh --kernel + - ./ci/travis_run.py ./ci/regression.sh --synthesis + - ./ci/travis_run.py ./ci/regression.sh --regression + - ./ci/travis_run.py ./ci/regression.sh --opencl + + - stage: test + name: config + env: XLEN=32 + script: + - ./ci/travis_run.py ./ci/regression.sh --cluster + - ./ci/travis_run.py ./ci/regression.sh --config + + - stage: test + name: debug + env: XLEN=32 + script: + - ./ci/travis_run.py ./ci/regression.sh --debug + - ./ci/travis_run.py ./ci/regression.sh --stress \ No newline at end of file diff --git a/ci/toolchain_env.sh.in b/ci/toolchain_env.sh.in index 9fcfdbb89..dc50389a9 100755 --- a/ci/toolchain_env.sh.in +++ b/ci/toolchain_env.sh.in @@ -1,13 +1,13 @@ #!/bin/sh # Copyright 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,7 +16,8 @@ TOOLDIR=${TOOLDIR:=@TOOLDIR@} -export PATH=$TOOLDIR/verilator/bin:$PATH +export VERILATOR_ROOT=$TOOLDIR/verilator +export PATH=$VERILATOR_ROOT/bin:$PATH export SV2V_PATH=$TOOLDIR/sv2v export PATH=$SV2V_PATH/bin:$PATH diff --git a/docs/fpga_setup.md b/docs/fpga_setup.md new file mode 100644 index 000000000..80d71e45f --- /dev/null +++ b/docs/fpga_setup.md @@ -0,0 +1,74 @@ +# FPGA Startup and Configuration Guide + +OPAE Environment Setup +---------------------- + + $ source /opt/inteldevstack/init_env_user.sh + $ export OPAE_HOME=/opt/opae/1.1.2 + $ export PATH=$OPAE_HOME/bin:$PATH + $ export C_INCLUDE_PATH=$OPAE_HOME/include:$C_INCLUDE_PATH + $ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH + $ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH + +OPAE Build +------------------ + +The FPGA has to following configuration options: +- DEVICE_FAMILY=arria10 | stratix10 +- NUM_CORES=#n + +Command line: + + $ cd hw/syn/altera/opae + $ PREFIX=test1 TARGET=fpga NUM_CORES=4 make + +A new folder (ex: `test1_xxx_4c`) will be created and the build will start and take ~30-480 min to complete. +Setting TARGET=ase will build the project for simulation using Intel ASE. + + +OPAE Build Configuration +------------------------ + +The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware parameters that can be modified when build the processor.For example, have the following parameters that can be configured: +- `NUM_WARPS`: Number of warps per cores +- `NUM_THREADS`: Number of threads per warps +- `PERF_ENABLE`: enable the use of all profile counters + +You configure the syntesis build from the command line: + + $ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make + +OPAE Build Progress +------------------- + +You could check the last 10 lines in the build log for possible errors until build completion. + + $ tail -n 10 /build.log + +Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs. + + $ ps -u + +If the build fails and you need to restart it, clean up the build folder using the following command: + + $ make clean + +The file `vortex_afu.gbs` should exist when the build is done: + + $ ls -lsa /synth/vortex_afu.gbs + + +Signing the bitstream and Programming the FPGA +---------------------------------------------- + + $ cd + $ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs + $ fpgasupdate vortex_afu_unsigned_ssl.gbs + +FPGA sample test running OpenCL sgemm kernel +-------------------------------------------- + +Run the following from the Vortex root directory + + $ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128" + diff --git a/hw/rtl/core/VX_gpr_slice.sv b/hw/rtl/core/VX_gpr_slice.sv new file mode 100644 index 000000000..b036fc555 --- /dev/null +++ b/hw/rtl/core/VX_gpr_slice.sv @@ -0,0 +1,286 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_gpr_slice import VX_gpu_pkg::*; #( + parameter CORE_ID = 0, + parameter CACHE_ENABLE = 0 +) ( + input wire clk, + input wire reset, + + VX_writeback_if.slave writeback_if, + VX_scoreboard_if.slave scoreboard_if, + VX_operands_if.master operands_if +); + `UNUSED_PARAM (CORE_ID) + localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS; + localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * ISSUE_RATIO); + + localparam STATE_IDLE = 2'd0; + localparam STATE_FETCH1 = 2'd1; + localparam STATE_FETCH2 = 2'd2; + localparam STATE_FETCH3 = 2'd3; + localparam STATE_BITS = 2; + + wire [`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data; + reg [`NR_BITS-1:0] gpr_rd_rid, gpr_rd_rid_n; + reg [ISSUE_WIS_W-1:0] gpr_rd_wis, gpr_rd_wis_n; + + reg [`NUM_THREADS-1:0][`XLEN-1:0] cache_data [ISSUE_RATIO-1:0]; + reg [`NUM_THREADS-1:0][`XLEN-1:0] cache_data_n [ISSUE_RATIO-1:0]; + reg [`NR_BITS-1:0] cache_reg [ISSUE_RATIO-1:0]; + reg [`NR_BITS-1:0] cache_reg_n [ISSUE_RATIO-1:0]; + reg [`NUM_THREADS-1:0] cache_tmask [ISSUE_RATIO-1:0]; + reg [`NUM_THREADS-1:0] cache_tmask_n [ISSUE_RATIO-1:0]; + reg [ISSUE_RATIO-1:0] cache_eop, cache_eop_n; + + reg [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data, rs1_data_n; + reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n; + reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n; + + reg [STATE_BITS-1:0] state, state_n; + reg [`NR_BITS-1:0] rs2, rs2_n; + reg [`NR_BITS-1:0] rs3, rs3_n; + reg rs2_ready, rs2_ready_n; + reg rs3_ready, rs3_ready_n; + reg data_ready, data_ready_n; + + wire stg_valid_in, stg_ready_in; + + wire is_rs1_zero = (scoreboard_if.data.rs1 == 0); + wire is_rs2_zero = (scoreboard_if.data.rs2 == 0); + wire is_rs3_zero = (scoreboard_if.data.rs3 == 0); + + always @(*) begin + state_n = state; + rs2_n = rs2; + rs3_n = rs3; + rs2_ready_n = rs2_ready; + rs3_ready_n = rs3_ready; + rs1_data_n = rs1_data; + rs2_data_n = rs2_data; + rs3_data_n = rs3_data; + cache_data_n = cache_data; + cache_reg_n = cache_reg; + cache_tmask_n= cache_tmask; + cache_eop_n = cache_eop; + gpr_rd_rid_n = gpr_rd_rid; + gpr_rd_wis_n = gpr_rd_wis; + data_ready_n = data_ready; + + case (state) + STATE_IDLE: begin + if (operands_if.valid && operands_if.ready) begin + data_ready_n = 0; + end + if (scoreboard_if.valid && data_ready_n == 0) begin + data_ready_n = 1; + if (is_rs3_zero || (CACHE_ENABLE != 0 && + scoreboard_if.data.rs3 == cache_reg[scoreboard_if.data.wis] && + (scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin + rs3_data_n = (is_rs3_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis]; + rs3_ready_n = 1; + end else begin + rs3_ready_n = 0; + gpr_rd_rid_n = scoreboard_if.data.rs3; + data_ready_n = 0; + state_n = STATE_FETCH3; + end + if (is_rs2_zero || (CACHE_ENABLE != 0 && + scoreboard_if.data.rs2 == cache_reg[scoreboard_if.data.wis] && + (scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin + rs2_data_n = (is_rs2_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis]; + rs2_ready_n = 1; + end else begin + rs2_ready_n = 0; + gpr_rd_rid_n = scoreboard_if.data.rs2; + data_ready_n = 0; + state_n = STATE_FETCH2; + end + if (is_rs1_zero || (CACHE_ENABLE != 0 && + scoreboard_if.data.rs1 == cache_reg[scoreboard_if.data.wis] && + (scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin + rs1_data_n = (is_rs1_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis]; + end else begin + gpr_rd_rid_n = scoreboard_if.data.rs1; + data_ready_n = 0; + state_n = STATE_FETCH1; + end + end + gpr_rd_wis_n = scoreboard_if.data.wis; + rs2_n = scoreboard_if.data.rs2; + rs3_n = scoreboard_if.data.rs3; + end + STATE_FETCH1: begin + rs1_data_n = gpr_rd_data; + if (~rs2_ready) begin + gpr_rd_rid_n = rs2; + state_n = STATE_FETCH2; + end else if (~rs3_ready) begin + gpr_rd_rid_n = rs3; + state_n = STATE_FETCH3; + end else begin + data_ready_n = 1; + state_n = STATE_IDLE; + end + end + STATE_FETCH2: begin + rs2_data_n = gpr_rd_data; + if (~rs3_ready) begin + gpr_rd_rid_n = rs3; + state_n = STATE_FETCH3; + end else begin + data_ready_n = 1; + state_n = STATE_IDLE; + end + end + STATE_FETCH3: begin + rs3_data_n = gpr_rd_data; + data_ready_n = 1; + state_n = STATE_IDLE; + end + endcase + + if (CACHE_ENABLE != 0 && writeback_if.valid) begin + if ((cache_reg[writeback_if.data.wis] == writeback_if.data.rd) + || (cache_eop[writeback_if.data.wis] && writeback_if.data.sop)) begin + for (integer j = 0; j < `NUM_THREADS; ++j) begin + if (writeback_if.data.tmask[j]) begin + cache_data_n[writeback_if.data.wis][j] = writeback_if.data.data[j]; + end + end + cache_reg_n[writeback_if.data.wis] = writeback_if.data.rd; + cache_eop_n[writeback_if.data.wis] = writeback_if.data.eop; + cache_tmask_n[writeback_if.data.wis] = writeback_if.data.sop ? writeback_if.data.tmask : + (cache_tmask_n[writeback_if.data.wis] | writeback_if.data.tmask); + end + end + end + + always @(posedge clk) begin + if (reset) begin + state <= STATE_IDLE; + cache_eop <= {ISSUE_RATIO{1'b1}}; + data_ready <= 0; + end else begin + state <= state_n; + cache_eop <= cache_eop_n; + data_ready <= data_ready_n; + end + gpr_rd_rid <= gpr_rd_rid_n; + gpr_rd_wis <= gpr_rd_wis_n; + rs2_ready <= rs2_ready_n; + rs3_ready <= rs3_ready_n; + rs2 <= rs2_n; + rs3 <= rs3_n; + rs1_data <= rs1_data_n; + rs2_data <= rs2_data_n; + rs3_data <= rs3_data_n; + cache_data <= cache_data_n; + cache_reg <= cache_reg_n; + cache_tmask <= cache_tmask_n; + end + + assign stg_valid_in = scoreboard_if.valid && data_ready; + assign scoreboard_if.ready = stg_ready_in && data_ready; + + VX_toggle_buffer #( + .DATAW (DATAW) + ) toggle_buffer ( + .clk (clk), + .reset (reset), + .valid_in (stg_valid_in), + .data_in ({ + scoreboard_if.data.uuid, + scoreboard_if.data.wis, + scoreboard_if.data.tmask, + scoreboard_if.data.PC, + scoreboard_if.data.wb, + scoreboard_if.data.ex_type, + scoreboard_if.data.op_type, + scoreboard_if.data.op_args, + scoreboard_if.data.rd + }), + .ready_in (stg_ready_in), + .valid_out (operands_if.valid), + .data_out ({ + operands_if.data.uuid, + operands_if.data.wis, + operands_if.data.tmask, + operands_if.data.PC, + operands_if.data.wb, + operands_if.data.ex_type, + operands_if.data.op_type, + operands_if.data.op_args, + operands_if.data.rd + }), + .ready_out (operands_if.ready) + ); + + assign operands_if.data.rs1_data = rs1_data; + assign operands_if.data.rs2_data = rs2_data; + assign operands_if.data.rs3_data = rs3_data; + + // GPR banks + + reg [RAM_ADDRW-1:0] gpr_rd_addr; + wire [RAM_ADDRW-1:0] gpr_wr_addr; + if (ISSUE_WIS != 0) begin + assign gpr_wr_addr = {writeback_if.data.wis, writeback_if.data.rd}; + always @(posedge clk) begin + gpr_rd_addr <= {gpr_rd_wis_n, gpr_rd_rid_n}; + end + end else begin + assign gpr_wr_addr = writeback_if.data.rd; + always @(posedge clk) begin + gpr_rd_addr <= gpr_rd_rid_n; + end + end + +`ifdef GPR_RESET + reg wr_enabled = 0; + always @(posedge clk) begin + if (reset) begin + wr_enabled <= 1; + end + end +`endif + + for (genvar j = 0; j < `NUM_THREADS; ++j) begin + VX_dp_ram #( + .DATAW (`XLEN), + .SIZE (`NUM_REGS * ISSUE_RATIO), + `ifdef GPR_RESET + .INIT_ENABLE (1), + .INIT_VALUE (0), + `endif + .NO_RWCHECK (1) + ) gpr_ram ( + .clk (clk), + .read (1'b1), + `UNUSED_PIN (wren), + `ifdef GPR_RESET + .write (wr_enabled && writeback_if.valid && writeback_if.data.tmask[j]), + `else + .write (writeback_if.valid && writeback_if.data.tmask[j]), + `endif + .waddr (gpr_wr_addr), + .wdata (writeback_if.data.data[j]), + .raddr (gpr_rd_addr), + .rdata (gpr_rd_data[j]) + ); + end + +endmodule diff --git a/hw/rtl/core/VX_pending_instr.sv b/hw/rtl/core/VX_pending_instr.sv new file mode 100644 index 000000000..af87b53e0 --- /dev/null +++ b/hw/rtl/core/VX_pending_instr.sv @@ -0,0 +1,79 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_pending_instr #( + parameter CTR_WIDTH = 12, + parameter ALM_EMPTY = 1, + parameter DECR_COUNT = 1 +) ( + input wire clk, + input wire reset, + input wire incr, + input wire [`NW_WIDTH-1:0] incr_wid, + input wire [DECR_COUNT-1:0] decr, + input wire [DECR_COUNT-1:0][`NW_WIDTH-1:0] decr_wid, + input wire [`NW_WIDTH-1:0] alm_empty_wid, + output wire empty, + output wire alm_empty +); + localparam COUNTW = `CLOG2(DECR_COUNT+1); + + reg [`NUM_WARPS-1:0][CTR_WIDTH-1:0] pending_instrs; + reg [`NUM_WARPS-1:0][COUNTW-1:0] decr_cnt; + reg [`NUM_WARPS-1:0][DECR_COUNT-1:0] decr_mask; + reg [`NUM_WARPS-1:0] incr_cnt, incr_cnt_n; + reg [`NUM_WARPS-1:0] alm_empty_r, empty_r; + + always @(*) begin + incr_cnt_n = 0; + decr_mask = 0; + if (incr) begin + incr_cnt_n[incr_wid] = 1; + end + for (integer i = 0; i < DECR_COUNT; ++i) begin + if (decr[i]) begin + decr_mask[decr_wid[i]][i] = 1; + end + end + end + + for (genvar i = 0; i < `NUM_WARPS; ++i) begin + + wire [COUNTW-1:0] decr_cnt_n; + `POP_COUNT(decr_cnt_n, decr_mask[i]); + + wire [CTR_WIDTH-1:0] pending_instrs_n = pending_instrs[i] + CTR_WIDTH'(incr_cnt[i]) - CTR_WIDTH'(decr_cnt[i]); + + always @(posedge clk) begin + if (reset) begin + incr_cnt[i] <= '0; + decr_cnt[i] <= '0; + pending_instrs[i] <= '0; + alm_empty_r[i] <= 0; + empty_r[i] <= 1; + end else begin + incr_cnt[i] <= incr_cnt_n[i]; + decr_cnt[i] <= decr_cnt_n; + pending_instrs[i] <= pending_instrs_n; + alm_empty_r[i] <= (pending_instrs_n == ALM_EMPTY); + empty_r[i] <= (pending_instrs_n == 0); + end + end + end + + assign alm_empty = alm_empty_r[alm_empty_wid]; + assign empty = (& empty_r); + +endmodule diff --git a/hw/rtl/core/VX_trace.vh b/hw/rtl/core/VX_trace.vh new file mode 100644 index 000000000..5dc4bc304 --- /dev/null +++ b/hw/rtl/core/VX_trace.vh @@ -0,0 +1,387 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`ifndef VX_TRACE_VH +`define VX_TRACE_VH + +`ifdef SIMULATION + + task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type); + case (ex_type) + `EX_ALU: `TRACE(level, ("ALU")); + `EX_LSU: `TRACE(level, ("LSU")); + `EX_FPU: `TRACE(level, ("FPU")); + `EX_SFU: `TRACE(level, ("SFU")); + default: `TRACE(level, ("?")); + endcase + endtask + + task trace_ex_op(input int level, + input [`EX_BITS-1:0] ex_type, + input [`INST_OP_BITS-1:0] op_type, + input VX_gpu_pkg::op_args_t op_args + ); + case (ex_type) + `EX_ALU: begin + case (op_args.alu.xtype) + `ALU_TYPE_ARITH: begin + if (op_args.alu.is_w) begin + if (op_args.alu.use_imm) begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADDIW")); + `INST_ALU_SLL: `TRACE(level, ("SLLIW")); + `INST_ALU_SRL: `TRACE(level, ("SRLIW")); + `INST_ALU_SRA: `TRACE(level, ("SRAIW")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADDW")); + `INST_ALU_SUB: `TRACE(level, ("SUBW")); + `INST_ALU_SLL: `TRACE(level, ("SLLW")); + `INST_ALU_SRL: `TRACE(level, ("SRLW")); + `INST_ALU_SRA: `TRACE(level, ("SRAW")); + default: `TRACE(level, ("?")); + endcase + end + end else begin + if (op_args.alu.use_imm) begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADDI")); + `INST_ALU_SLL: `TRACE(level, ("SLLI")); + `INST_ALU_SRL: `TRACE(level, ("SRLI")); + `INST_ALU_SRA: `TRACE(level, ("SRAI")); + `INST_ALU_SLT: `TRACE(level, ("SLTI")); + `INST_ALU_SLTU: `TRACE(level, ("SLTIU")); + `INST_ALU_XOR: `TRACE(level, ("XORI")); + `INST_ALU_OR: `TRACE(level, ("ORI")); + `INST_ALU_AND: `TRACE(level, ("ANDI")); + `INST_ALU_LUI: `TRACE(level, ("LUI")); + `INST_ALU_AUIPC: `TRACE(level, ("AUIPC")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADD")); + `INST_ALU_SUB: `TRACE(level, ("SUB")); + `INST_ALU_SLL: `TRACE(level, ("SLL")); + `INST_ALU_SRL: `TRACE(level, ("SRL")); + `INST_ALU_SRA: `TRACE(level, ("SRA")); + `INST_ALU_SLT: `TRACE(level, ("SLT")); + `INST_ALU_SLTU: `TRACE(level, ("SLTU")); + `INST_ALU_XOR: `TRACE(level, ("XOR")); + `INST_ALU_OR: `TRACE(level, ("OR")); + `INST_ALU_AND: `TRACE(level, ("AND")); + `INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ")); + `INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ")); + default: `TRACE(level, ("?")); + endcase + end + end + end + `ALU_TYPE_BRANCH: begin + case (`INST_BR_BITS'(op_type)) + `INST_BR_EQ: `TRACE(level, ("BEQ")); + `INST_BR_NE: `TRACE(level, ("BNE")); + `INST_BR_LT: `TRACE(level, ("BLT")); + `INST_BR_GE: `TRACE(level, ("BGE")); + `INST_BR_LTU: `TRACE(level, ("BLTU")); + `INST_BR_GEU: `TRACE(level, ("BGEU")); + `INST_BR_JAL: `TRACE(level, ("JAL")); + `INST_BR_JALR: `TRACE(level, ("JALR")); + `INST_BR_ECALL: `TRACE(level, ("ECALL")); + `INST_BR_EBREAK:`TRACE(level, ("EBREAK")); + `INST_BR_URET: `TRACE(level, ("URET")); + `INST_BR_SRET: `TRACE(level, ("SRET")); + `INST_BR_MRET: `TRACE(level, ("MRET")); + default: `TRACE(level, ("?")); + endcase + end + `ALU_TYPE_MULDIV: begin + if (op_args.alu.is_w) begin + case (`INST_M_BITS'(op_type)) + `INST_M_MUL: `TRACE(level, ("MULW")); + `INST_M_DIV: `TRACE(level, ("DIVW")); + `INST_M_DIVU: `TRACE(level, ("DIVUW")); + `INST_M_REM: `TRACE(level, ("REMW")); + `INST_M_REMU: `TRACE(level, ("REMUW")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (`INST_M_BITS'(op_type)) + `INST_M_MUL: `TRACE(level, ("MUL")); + `INST_M_MULH: `TRACE(level, ("MULH")); + `INST_M_MULHSU:`TRACE(level, ("MULHSU")); + `INST_M_MULHU: `TRACE(level, ("MULHU")); + `INST_M_DIV: `TRACE(level, ("DIV")); + `INST_M_DIVU: `TRACE(level, ("DIVU")); + `INST_M_REM: `TRACE(level, ("REM")); + `INST_M_REMU: `TRACE(level, ("REMU")); + default: `TRACE(level, ("?")); + endcase + end + end + default: `TRACE(level, ("?")); + endcase + end + `EX_LSU: begin + if (op_args.lsu.is_float) begin + case (`INST_LSU_BITS'(op_type)) + `INST_LSU_LW: `TRACE(level, ("FLW")); + `INST_LSU_LD: `TRACE(level, ("FLD")); + `INST_LSU_SW: `TRACE(level, ("FSW")); + `INST_LSU_SD: `TRACE(level, ("FSD")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (`INST_LSU_BITS'(op_type)) + `INST_LSU_LB: `TRACE(level, ("LB")); + `INST_LSU_LH: `TRACE(level, ("LH")); + `INST_LSU_LW: `TRACE(level, ("LW")); + `INST_LSU_LD: `TRACE(level, ("LD")); + `INST_LSU_LBU:`TRACE(level, ("LBU")); + `INST_LSU_LHU:`TRACE(level, ("LHU")); + `INST_LSU_LWU:`TRACE(level, ("LWU")); + `INST_LSU_SB: `TRACE(level, ("SB")); + `INST_LSU_SH: `TRACE(level, ("SH")); + `INST_LSU_SW: `TRACE(level, ("SW")); + `INST_LSU_SD: `TRACE(level, ("SD")); + `INST_LSU_FENCE:`TRACE(level,("FENCE")); + default: `TRACE(level, ("?")); + endcase + end + end + `EX_FPU: begin + case (`INST_FPU_BITS'(op_type)) + `INST_FPU_ADD: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FADD.D")); + else + `TRACE(level, ("FADD.S")); + end + `INST_FPU_SUB: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FSUB.D")); + else + `TRACE(level, ("FSUB.S")); + end + `INST_FPU_MUL: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FMUL.D")); + else + `TRACE(level, ("FMUL.S")); + end + `INST_FPU_DIV: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FDIV.D")); + else + `TRACE(level, ("FDIV.S")); + end + `INST_FPU_SQRT: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FSQRT.D")); + else + `TRACE(level, ("FSQRT.S")); + end + `INST_FPU_MADD: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FMADD.D")); + else + `TRACE(level, ("FMADD.S")); + end + `INST_FPU_MSUB: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FMSUB.D")); + else + `TRACE(level, ("FMSUB.S")); + end + `INST_FPU_NMADD: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FNMADD.D")); + else + `TRACE(level, ("FNMADD.S")); + end + `INST_FPU_NMSUB: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FNMSUB.D")); + else + `TRACE(level, ("FNMSUB.S")); + end + `INST_FPU_CMP: begin + if (op_args.fpu.fmt[0]) begin + case (op_args.fpu.frm[1:0]) + 0: `TRACE(level, ("FLE.D")); + 1: `TRACE(level, ("FLT.D")); + 2: `TRACE(level, ("FEQ.D")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (op_args.fpu.frm[1:0]) + 0: `TRACE(level, ("FLE.S")); + 1: `TRACE(level, ("FLT.S")); + 2: `TRACE(level, ("FEQ.S")); + default: `TRACE(level, ("?")); + endcase + end + end + `INST_FPU_F2F: begin + if (op_args.fpu.fmt[0]) begin + `TRACE(level, ("FCVT.D.S")); + end else begin + `TRACE(level, ("FCVT.S.D")); + end + end + `INST_FPU_F2I: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.L.D")); + end else begin + `TRACE(level, ("FCVT.W.D")); + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.L.S")); + end else begin + `TRACE(level, ("FCVT.W.S")); + end + end + end + `INST_FPU_F2U: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.LU.D")); + end else begin + `TRACE(level, ("FCVT.WU.D")); + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.LU.S")); + end else begin + `TRACE(level, ("FCVT.WU.S")); + end + end + end + `INST_FPU_I2F: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.D.L")); + end else begin + `TRACE(level, ("FCVT.D.W")); + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.S.L")); + end else begin + `TRACE(level, ("FCVT.S.W")); + end + end + end + `INST_FPU_U2F: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.D.LU")); + end else begin + `TRACE(level, ("FCVT.D.WU")); + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.S.LU")); + end else begin + `TRACE(level, ("FCVT.S.WU")); + end + end + end + `INST_FPU_MISC: begin + if (op_args.fpu.fmt[0]) begin + case (op_args.fpu.frm) + 0: `TRACE(level, ("FSGNJ.D")); + 1: `TRACE(level, ("FSGNJN.D")); + 2: `TRACE(level, ("FSGNJX.D")); + 3: `TRACE(level, ("FCLASS.D")); + 4: `TRACE(level, ("FMV.X.D")); + 5: `TRACE(level, ("FMV.D.X")); + 6: `TRACE(level, ("FMIN.D")); + 7: `TRACE(level, ("FMAX.D")); + endcase + end else begin + case (op_args.fpu.frm) + 0: `TRACE(level, ("FSGNJ.S")); + 1: `TRACE(level, ("FSGNJN.S")); + 2: `TRACE(level, ("FSGNJX.S")); + 3: `TRACE(level, ("FCLASS.S")); + 4: `TRACE(level, ("FMV.X.S")); + 5: `TRACE(level, ("FMV.S.X")); + 6: `TRACE(level, ("FMIN.S")); + 7: `TRACE(level, ("FMAX.S")); + endcase + end + end + default: `TRACE(level, ("?")); + endcase + end + `EX_SFU: begin + case (`INST_SFU_BITS'(op_type)) + `INST_SFU_TMC: `TRACE(level, ("TMC")); + `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")); + `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end + `INST_SFU_JOIN: `TRACE(level, ("JOIN")); + `INST_SFU_BAR: `TRACE(level, ("BAR")); + `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end + `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end + `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end + `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end + default: `TRACE(level, ("?")); + endcase + end + default: `TRACE(level, ("?")); + endcase + endtask + + task trace_op_args(input int level, + input [`EX_BITS-1:0] ex_type, + input [`INST_OP_BITS-1:0] op_type, + input VX_gpu_pkg::op_args_t op_args + ); + case (ex_type) + `EX_ALU: begin + `TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm)); + end + `EX_LSU: begin + `TRACE(level, (", offset=0x%0h", op_args.lsu.offset)); + end + `EX_FPU: begin + `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)); + end + `EX_SFU: begin + if (`INST_SFU_IS_CSR(op_type)) begin + `TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm)); + end + end + default:; + endcase + endtask + + task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr); + case (addr) + `VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0")); + `VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1")); + `VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0")); + `VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1")); + `VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS")); + default: `TRACE(level, ("?")); + endcase + endtask + +`endif + +`endif // VX_TRACE_VH diff --git a/miscs/patches/ramulator.patch b/miscs/patches/ramulator.patch new file mode 100644 index 000000000..e24b5d230 --- /dev/null +++ b/miscs/patches/ramulator.patch @@ -0,0 +1,46 @@ +diff --git a/Makefile b/Makefile +index ea340c8..d2aac5b 100644 +--- a/Makefile ++++ b/Makefile +@@ -7,16 +7,16 @@ OBJS := $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.o, $(SRCS)) + + # Ramulator currently supports g++ 5.1+ or clang++ 3.4+. It will NOT work with + # g++ 4.x due to an internal compiler error when processing lambda functions. +-CXX := clang++ ++#CXX := clang++ + # CXX := g++-5 +-CXXFLAGS := -O3 -std=c++11 -g -Wall ++CXXFLAGS := -std=c++11 -O3 -g -Wall -fPIC + + .PHONY: all clean depend + + all: depend ramulator + + clean: +- rm -f ramulator ++ rm -f ramulator libramulator.a + rm -rf $(OBJDIR) + + depend: $(OBJDIR)/.depend +@@ -36,7 +36,7 @@ ramulator: $(MAIN) $(OBJS) $(SRCDIR)/*.h | depend + $(CXX) $(CXXFLAGS) -DRAMULATOR -o $@ $(MAIN) $(OBJS) + + libramulator.a: $(OBJS) $(OBJDIR)/Gem5Wrapper.o +- libtool -static -o $@ $(OBJS) $(OBJDIR)/Gem5Wrapper.o ++ $(AR) rcs $@ $^ + + $(OBJS): | $(OBJDIR) + +diff --git a/src/Request.h b/src/Request.h +index 57abd0d..a5ce061 100644 +--- a/src/Request.h ++++ b/src/Request.h +@@ -36,7 +36,7 @@ public: + + Request(long addr, Type type, int coreid = 0) + : is_first_command(true), addr(addr), coreid(coreid), type(type), +- callback([](Request& req){}) {} ++ callback([](Request&){}) {} + + Request(long addr, Type type, function callback, int coreid = 0) + : is_first_command(true), addr(addr), coreid(coreid), type(type), callback(callback) {} diff --git a/tests/opencl/bfs/graph4096.txt b/tests/opencl/bfs/graph4096.txt new file mode 100755 index 000000000..567432612 --- /dev/null +++ b/tests/opencl/bfs/graph4096.txt @@ -0,0 +1,28677 @@ +4096 +0 10 +10 6 +16 2 +18 5 +23 7 +30 7 +37 4 +41 4 +45 3 +48 5 +53 7 +60 4 +64 4 +68 6 +74 7 +81 5 +86 11 +97 5 +102 5 +107 8 +115 4 +119 4 +123 6 +129 4 +133 5 +138 7 +145 4 +149 2 +151 12 +163 3 +166 6 +172 6 +178 7 +185 5 +190 11 +201 4 +205 6 +211 9 +220 3 +223 4 +227 5 +232 4 +236 5 +241 6 +247 10 +257 4 +261 5 +266 7 +273 5 +278 4 +282 8 +290 5 +295 8 +303 9 +312 4 +316 5 +321 5 +326 3 +329 8 +337 5 +342 10 +352 6 +358 4 +362 5 +367 5 +372 10 +382 6 +388 8 +396 8 +404 5 +409 5 +414 5 +419 8 +427 6 +433 8 +441 9 +450 5 +455 10 +465 5 +470 11 +481 5 +486 7 +493 8 +501 9 +510 4 +514 10 +524 9 +533 5 +538 5 +543 7 +550 3 +553 3 +556 2 +558 6 +564 8 +572 3 +575 4 +579 5 +584 11 +595 8 +603 7 +610 5 +615 7 +622 4 +626 7 +633 6 +639 5 +644 4 +648 5 +653 4 +657 8 +665 8 +673 10 +683 2 +685 5 +690 5 +695 6 +701 3 +704 5 +709 9 +718 10 +728 7 +735 7 +742 9 +751 3 +754 4 +758 9 +767 6 +773 10 +783 7 +790 4 +794 8 +802 4 +806 5 +811 5 +816 8 +824 7 +831 8 +839 10 +849 5 +854 5 +859 4 +863 4 +867 7 +874 9 +883 2 +885 10 +895 8 +903 5 +908 6 +914 5 +919 11 +930 2 +932 6 +938 2 +940 4 +944 6 +950 6 +956 5 +961 4 +965 3 +968 4 +972 1 +973 10 +983 7 +990 4 +994 6 +1000 9 +1009 6 +1015 10 +1025 7 +1032 7 +1039 5 +1044 5 +1049 9 +1058 4 +1062 5 +1067 4 +1071 6 +1077 6 +1083 7 +1090 9 +1099 2 +1101 4 +1105 3 +1108 9 +1117 7 +1124 4 +1128 9 +1137 9 +1146 4 +1150 11 +1161 6 +1167 8 +1175 6 +1181 7 +1188 8 +1196 4 +1200 7 +1207 8 +1215 10 +1225 3 +1228 6 +1234 3 +1237 4 +1241 5 +1246 3 +1249 1 +1250 4 +1254 6 +1260 4 +1264 11 +1275 7 +1282 9 +1291 8 +1299 5 +1304 6 +1310 8 +1318 9 +1327 6 +1333 7 +1340 10 +1350 7 +1357 8 +1365 10 +1375 6 +1381 2 +1383 10 +1393 5 +1398 8 +1406 9 +1415 4 +1419 5 +1424 3 +1427 4 +1431 4 +1435 9 +1444 6 +1450 9 +1459 6 +1465 4 +1469 6 +1475 9 +1484 8 +1492 7 +1499 9 +1508 2 +1510 3 +1513 8 +1521 6 +1527 5 +1532 9 +1541 6 +1547 5 +1552 9 +1561 12 +1573 2 +1575 3 +1578 7 +1585 6 +1591 5 +1596 10 +1606 9 +1615 9 +1624 1 +1625 4 +1629 6 +1635 2 +1637 14 +1651 2 +1653 4 +1657 4 +1661 5 +1666 7 +1673 6 +1679 3 +1682 13 +1695 5 +1700 7 +1707 8 +1715 3 +1718 5 +1723 6 +1729 7 +1736 6 +1742 7 +1749 2 +1751 7 +1758 5 +1763 4 +1767 4 +1771 6 +1777 2 +1779 3 +1782 9 +1791 7 +1798 4 +1802 8 +1810 7 +1817 6 +1823 6 +1829 4 +1833 7 +1840 8 +1848 2 +1850 14 +1864 9 +1873 6 +1879 6 +1885 6 +1891 2 +1893 8 +1901 5 +1906 8 +1914 4 +1918 4 +1922 3 +1925 8 +1933 4 +1937 4 +1941 6 +1947 3 +1950 8 +1958 7 +1965 6 +1971 4 +1975 5 +1980 6 +1986 4 +1990 6 +1996 10 +2006 5 +2011 5 +2016 5 +2021 7 +2028 5 +2033 6 +2039 7 +2046 4 +2050 6 +2056 16 +2072 6 +2078 11 +2089 11 +2100 3 +2103 5 +2108 8 +2116 4 +2120 7 +2127 2 +2129 10 +2139 6 +2145 7 +2152 8 +2160 6 +2166 5 +2171 5 +2176 2 +2178 4 +2182 5 +2187 5 +2192 1 +2193 7 +2200 8 +2208 7 +2215 8 +2223 8 +2231 5 +2236 9 +2245 3 +2248 5 +2253 6 +2259 2 +2261 4 +2265 4 +2269 8 +2277 5 +2282 8 +2290 6 +2296 6 +2302 4 +2306 5 +2311 7 +2318 5 +2323 4 +2327 8 +2335 12 +2347 1 +2348 5 +2353 8 +2361 3 +2364 6 +2370 4 +2374 7 +2381 4 +2385 8 +2393 4 +2397 2 +2399 5 +2404 5 +2409 6 +2415 6 +2421 5 +2426 8 +2434 5 +2439 6 +2445 6 +2451 6 +2457 2 +2459 4 +2463 3 +2466 6 +2472 5 +2477 5 +2482 10 +2492 6 +2498 4 +2502 9 +2511 4 +2515 4 +2519 6 +2525 9 +2534 7 +2541 6 +2547 4 +2551 6 +2557 5 +2562 3 +2565 6 +2571 6 +2577 7 +2584 4 +2588 10 +2598 8 +2606 6 +2612 6 +2618 4 +2622 7 +2629 7 +2636 6 +2642 2 +2644 4 +2648 12 +2660 6 +2666 13 +2679 11 +2690 9 +2699 2 +2701 5 +2706 6 +2712 6 +2718 3 +2721 7 +2728 3 +2731 6 +2737 11 +2748 2 +2750 7 +2757 4 +2761 5 +2766 4 +2770 6 +2776 4 +2780 6 +2786 5 +2791 6 +2797 4 +2801 9 +2810 7 +2817 6 +2823 8 +2831 8 +2839 13 +2852 7 +2859 6 +2865 6 +2871 4 +2875 7 +2882 9 +2891 11 +2902 5 +2907 9 +2916 4 +2920 5 +2925 3 +2928 4 +2932 4 +2936 5 +2941 2 +2943 9 +2952 3 +2955 13 +2968 4 +2972 5 +2977 4 +2981 8 +2989 10 +2999 5 +3004 8 +3012 5 +3017 4 +3021 4 +3025 11 +3036 5 +3041 6 +3047 7 +3054 5 +3059 5 +3064 6 +3070 9 +3079 2 +3081 4 +3085 5 +3090 11 +3101 5 +3106 3 +3109 5 +3114 5 +3119 6 +3125 8 +3133 6 +3139 4 +3143 5 +3148 7 +3155 6 +3161 8 +3169 5 +3174 4 +3178 6 +3184 6 +3190 6 +3196 5 +3201 4 +3205 6 +3211 8 +3219 7 +3226 6 +3232 3 +3235 5 +3240 7 +3247 6 +3253 11 +3264 6 +3270 4 +3274 3 +3277 1 +3278 2 +3280 8 +3288 7 +3295 8 +3303 8 +3311 5 +3316 9 +3325 9 +3334 7 +3341 3 +3344 7 +3351 8 +3359 5 +3364 1 +3365 4 +3369 2 +3371 5 +3376 7 +3383 8 +3391 2 +3393 5 +3398 5 +3403 12 +3415 4 +3419 6 +3425 5 +3430 4 +3434 8 +3442 6 +3448 4 +3452 3 +3455 7 +3462 4 +3466 6 +3472 5 +3477 7 +3484 8 +3492 6 +3498 6 +3504 11 +3515 6 +3521 2 +3523 10 +3533 3 +3536 7 +3543 8 +3551 4 +3555 11 +3566 5 +3571 8 +3579 6 +3585 8 +3593 6 +3599 5 +3604 7 +3611 2 +3613 5 +3618 2 +3620 4 +3624 10 +3634 4 +3638 8 +3646 5 +3651 4 +3655 6 +3661 5 +3666 11 +3677 10 +3687 4 +3691 6 +3697 2 +3699 6 +3705 6 +3711 4 +3715 7 +3722 4 +3726 6 +3732 9 +3741 4 +3745 6 +3751 7 +3758 10 +3768 3 +3771 12 +3783 2 +3785 7 +3792 4 +3796 8 +3804 8 +3812 4 +3816 5 +3821 10 +3831 6 +3837 6 +3843 5 +3848 6 +3854 7 +3861 6 +3867 3 +3870 5 +3875 5 +3880 7 +3887 12 +3899 9 +3908 4 +3912 6 +3918 3 +3921 6 +3927 4 +3931 8 +3939 6 +3945 9 +3954 11 +3965 10 +3975 7 +3982 7 +3989 8 +3997 8 +4005 6 +4011 3 +4014 6 +4020 10 +4030 5 +4035 9 +4044 6 +4050 3 +4053 7 +4060 2 +4062 4 +4066 3 +4069 4 +4073 7 +4080 11 +4091 3 +4094 6 +4100 7 +4107 4 +4111 7 +4118 7 +4125 7 +4132 9 +4141 6 +4147 5 +4152 3 +4155 9 +4164 8 +4172 5 +4177 5 +4182 7 +4189 7 +4196 7 +4203 7 +4210 4 +4214 6 +4220 3 +4223 4 +4227 3 +4230 4 +4234 4 +4238 8 +4246 5 +4251 5 +4256 3 +4259 7 +4266 5 +4271 3 +4274 4 +4278 6 +4284 5 +4289 4 +4293 9 +4302 9 +4311 5 +4316 5 +4321 6 +4327 5 +4332 9 +4341 5 +4346 5 +4351 4 +4355 5 +4360 8 +4368 6 +4374 15 +4389 7 +4396 2 +4398 7 +4405 7 +4412 9 +4421 4 +4425 8 +4433 6 +4439 6 +4445 5 +4450 6 +4456 7 +4463 5 +4468 4 +4472 9 +4481 4 +4485 7 +4492 9 +4501 3 +4504 15 +4519 8 +4527 10 +4537 5 +4542 2 +4544 6 +4550 3 +4553 5 +4558 5 +4563 8 +4571 9 +4580 8 +4588 7 +4595 10 +4605 9 +4614 10 +4624 7 +4631 3 +4634 5 +4639 6 +4645 5 +4650 5 +4655 10 +4665 10 +4675 9 +4684 3 +4687 6 +4693 7 +4700 5 +4705 9 +4714 6 +4720 7 +4727 6 +4733 8 +4741 12 +4753 7 +4760 5 +4765 9 +4774 4 +4778 5 +4783 5 +4788 9 +4797 7 +4804 6 +4810 5 +4815 9 +4824 3 +4827 5 +4832 5 +4837 3 +4840 7 +4847 3 +4850 4 +4854 7 +4861 9 +4870 7 +4877 8 +4885 6 +4891 6 +4897 5 +4902 10 +4912 4 +4916 4 +4920 4 +4924 3 +4927 5 +4932 7 +4939 4 +4943 3 +4946 5 +4951 5 +4956 3 +4959 3 +4962 1 +4963 6 +4969 4 +4973 10 +4983 4 +4987 7 +4994 4 +4998 4 +5002 4 +5006 6 +5012 7 +5019 7 +5026 8 +5034 12 +5046 7 +5053 5 +5058 7 +5065 8 +5073 3 +5076 6 +5082 5 +5087 3 +5090 6 +5096 3 +5099 4 +5103 4 +5107 6 +5113 7 +5120 5 +5125 3 +5128 2 +5130 5 +5135 6 +5141 6 +5147 2 +5149 7 +5156 9 +5165 8 +5173 6 +5179 4 +5183 6 +5189 6 +5195 8 +5203 3 +5206 9 +5215 3 +5218 6 +5224 13 +5237 9 +5246 6 +5252 7 +5259 11 +5270 5 +5275 9 +5284 6 +5290 4 +5294 6 +5300 9 +5309 7 +5316 4 +5320 5 +5325 3 +5328 1 +5329 4 +5333 4 +5337 3 +5340 3 +5343 2 +5345 4 +5349 7 +5356 5 +5361 11 +5372 6 +5378 8 +5386 7 +5393 5 +5398 2 +5400 9 +5409 8 +5417 2 +5419 5 +5424 3 +5427 9 +5436 6 +5442 7 +5449 5 +5454 7 +5461 6 +5467 4 +5471 4 +5475 8 +5483 3 +5486 4 +5490 13 +5503 7 +5510 6 +5516 2 +5518 6 +5524 8 +5532 8 +5540 7 +5547 9 +5556 4 +5560 4 +5564 7 +5571 2 +5573 10 +5583 2 +5585 8 +5593 4 +5597 7 +5604 8 +5612 8 +5620 5 +5625 3 +5628 6 +5634 5 +5639 9 +5648 6 +5654 6 +5660 3 +5663 9 +5672 9 +5681 7 +5688 8 +5696 6 +5702 7 +5709 2 +5711 8 +5719 4 +5723 6 +5729 3 +5732 9 +5741 7 +5748 6 +5754 8 +5762 6 +5768 4 +5772 6 +5778 8 +5786 3 +5789 10 +5799 10 +5809 5 +5814 9 +5823 5 +5828 10 +5838 9 +5847 7 +5854 5 +5859 4 +5863 7 +5870 4 +5874 5 +5879 6 +5885 8 +5893 8 +5901 7 +5908 4 +5912 2 +5914 6 +5920 5 +5925 7 +5932 6 +5938 3 +5941 6 +5947 7 +5954 5 +5959 8 +5967 5 +5972 7 +5979 6 +5985 4 +5989 5 +5994 5 +5999 3 +6002 2 +6004 5 +6009 7 +6016 11 +6027 7 +6034 6 +6040 3 +6043 6 +6049 11 +6060 10 +6070 2 +6072 9 +6081 5 +6086 2 +6088 4 +6092 7 +6099 6 +6105 5 +6110 5 +6115 5 +6120 3 +6123 3 +6126 5 +6131 7 +6138 5 +6143 11 +6154 4 +6158 8 +6166 8 +6174 9 +6183 4 +6187 6 +6193 5 +6198 4 +6202 6 +6208 5 +6213 6 +6219 8 +6227 6 +6233 6 +6239 5 +6244 4 +6248 4 +6252 4 +6256 6 +6262 7 +6269 4 +6273 6 +6279 11 +6290 5 +6295 9 +6304 2 +6306 8 +6314 4 +6318 3 +6321 2 +6323 9 +6332 9 +6341 2 +6343 8 +6351 9 +6360 5 +6365 4 +6369 5 +6374 3 +6377 6 +6383 12 +6395 7 +6402 3 +6405 9 +6414 7 +6421 7 +6428 5 +6433 6 +6439 5 +6444 6 +6450 2 +6452 6 +6458 3 +6461 9 +6470 6 +6476 7 +6483 11 +6494 9 +6503 5 +6508 8 +6516 4 +6520 7 +6527 5 +6532 2 +6534 4 +6538 4 +6542 7 +6549 5 +6554 6 +6560 3 +6563 4 +6567 7 +6574 5 +6579 6 +6585 5 +6590 7 +6597 11 +6608 8 +6616 5 +6621 16 +6637 5 +6642 12 +6654 7 +6661 6 +6667 10 +6677 5 +6682 7 +6689 1 +6690 6 +6696 8 +6704 5 +6709 10 +6719 5 +6724 3 +6727 6 +6733 5 +6738 2 +6740 4 +6744 5 +6749 12 +6761 5 +6766 10 +6776 8 +6784 7 +6791 6 +6797 6 +6803 3 +6806 5 +6811 6 +6817 2 +6819 11 +6830 7 +6837 7 +6844 8 +6852 6 +6858 8 +6866 6 +6872 4 +6876 3 +6879 7 +6886 8 +6894 6 +6900 6 +6906 3 +6909 8 +6917 5 +6922 7 +6929 4 +6933 6 +6939 7 +6946 5 +6951 5 +6956 5 +6961 9 +6970 8 +6978 5 +6983 8 +6991 5 +6996 6 +7002 7 +7009 3 +7012 8 +7020 10 +7030 3 +7033 6 +7039 6 +7045 8 +7053 5 +7058 7 +7065 5 +7070 4 +7074 9 +7083 10 +7093 6 +7099 5 +7104 4 +7108 12 +7120 8 +7128 2 +7130 3 +7133 2 +7135 11 +7146 12 +7158 6 +7164 9 +7173 12 +7185 8 +7193 5 +7198 4 +7202 7 +7209 3 +7212 4 +7216 8 +7224 3 +7227 4 +7231 5 +7236 7 +7243 5 +7248 7 +7255 3 +7258 10 +7268 8 +7276 3 +7279 8 +7287 11 +7298 2 +7300 8 +7308 6 +7314 6 +7320 9 +7329 4 +7333 11 +7344 6 +7350 4 +7354 5 +7359 4 +7363 9 +7372 1 +7373 10 +7383 4 +7387 8 +7395 7 +7402 8 +7410 9 +7419 4 +7423 3 +7426 6 +7432 5 +7437 7 +7444 9 +7453 8 +7461 6 +7467 10 +7477 8 +7485 13 +7498 4 +7502 6 +7508 7 +7515 10 +7525 7 +7532 4 +7536 3 +7539 3 +7542 10 +7552 5 +7557 6 +7563 6 +7569 3 +7572 7 +7579 9 +7588 5 +7593 8 +7601 7 +7608 7 +7615 7 +7622 5 +7627 5 +7632 6 +7638 7 +7645 6 +7651 6 +7657 10 +7667 6 +7673 4 +7677 5 +7682 8 +7690 6 +7696 8 +7704 9 +7713 2 +7715 3 +7718 9 +7727 4 +7731 4 +7735 6 +7741 6 +7747 9 +7756 6 +7762 3 +7765 4 +7769 12 +7781 4 +7785 4 +7789 6 +7795 7 +7802 3 +7805 1 +7806 7 +7813 2 +7815 4 +7819 3 +7822 5 +7827 9 +7836 8 +7844 9 +7853 8 +7861 6 +7867 2 +7869 4 +7873 8 +7881 5 +7886 9 +7895 3 +7898 10 +7908 2 +7910 8 +7918 6 +7924 7 +7931 4 +7935 7 +7942 3 +7945 6 +7951 8 +7959 6 +7965 11 +7976 6 +7982 9 +7991 4 +7995 2 +7997 7 +8004 5 +8009 5 +8014 7 +8021 8 +8029 7 +8036 4 +8040 4 +8044 11 +8055 11 +8066 6 +8072 6 +8078 9 +8087 3 +8090 6 +8096 9 +8105 6 +8111 4 +8115 6 +8121 4 +8125 4 +8129 5 +8134 8 +8142 10 +8152 5 +8157 4 +8161 6 +8167 7 +8174 6 +8180 3 +8183 6 +8189 5 +8194 10 +8204 4 +8208 6 +8214 5 +8219 3 +8222 5 +8227 8 +8235 8 +8243 4 +8247 4 +8251 4 +8255 11 +8266 10 +8276 6 +8282 6 +8288 8 +8296 3 +8299 4 +8303 6 +8309 5 +8314 9 +8323 3 +8326 3 +8329 9 +8338 6 +8344 7 +8351 5 +8356 4 +8360 7 +8367 11 +8378 4 +8382 6 +8388 9 +8397 8 +8405 8 +8413 4 +8417 6 +8423 9 +8432 1 +8433 3 +8436 7 +8443 5 +8448 4 +8452 6 +8458 3 +8461 4 +8465 4 +8469 5 +8474 5 +8479 4 +8483 5 +8488 5 +8493 3 +8496 7 +8503 5 +8508 9 +8517 6 +8523 3 +8526 3 +8529 6 +8535 4 +8539 7 +8546 8 +8554 7 +8561 4 +8565 5 +8570 6 +8576 6 +8582 6 +8588 6 +8594 6 +8600 6 +8606 4 +8610 3 +8613 5 +8618 4 +8622 8 +8630 2 +8632 8 +8640 5 +8645 6 +8651 4 +8655 5 +8660 4 +8664 7 +8671 3 +8674 7 +8681 3 +8684 5 +8689 7 +8696 3 +8699 5 +8704 5 +8709 5 +8714 6 +8720 9 +8729 5 +8734 6 +8740 2 +8742 4 +8746 9 +8755 5 +8760 8 +8768 4 +8772 10 +8782 5 +8787 7 +8794 7 +8801 3 +8804 4 +8808 5 +8813 10 +8823 4 +8827 8 +8835 8 +8843 5 +8848 4 +8852 4 +8856 5 +8861 7 +8868 10 +8878 5 +8883 3 +8886 2 +8888 4 +8892 8 +8900 5 +8905 3 +8908 4 +8912 7 +8919 12 +8931 9 +8940 6 +8946 5 +8951 5 +8956 7 +8963 12 +8975 10 +8985 8 +8993 9 +9002 10 +9012 6 +9018 11 +9029 5 +9034 4 +9038 9 +9047 6 +9053 12 +9065 6 +9071 6 +9077 2 +9079 1 +9080 6 +9086 3 +9089 6 +9095 8 +9103 5 +9108 6 +9114 10 +9124 2 +9126 10 +9136 5 +9141 4 +9145 4 +9149 4 +9153 4 +9157 8 +9165 7 +9172 12 +9184 2 +9186 5 +9191 6 +9197 4 +9201 4 +9205 5 +9210 5 +9215 5 +9220 14 +9234 5 +9239 4 +9243 5 +9248 3 +9251 3 +9254 7 +9261 5 +9266 6 +9272 7 +9279 6 +9285 5 +9290 6 +9296 4 +9300 7 +9307 8 +9315 5 +9320 2 +9322 4 +9326 7 +9333 9 +9342 7 +9349 4 +9353 7 +9360 3 +9363 2 +9365 3 +9368 7 +9375 5 +9380 4 +9384 4 +9388 4 +9392 3 +9395 3 +9398 5 +9403 9 +9412 7 +9419 4 +9423 5 +9428 3 +9431 6 +9437 6 +9443 2 +9445 7 +9452 4 +9456 9 +9465 4 +9469 5 +9474 6 +9480 4 +9484 12 +9496 6 +9502 7 +9509 8 +9517 6 +9523 1 +9524 5 +9529 5 +9534 5 +9539 5 +9544 4 +9548 3 +9551 11 +9562 4 +9566 6 +9572 4 +9576 6 +9582 5 +9587 4 +9591 3 +9594 3 +9597 3 +9600 9 +9609 6 +9615 4 +9619 7 +9626 5 +9631 4 +9635 4 +9639 8 +9647 6 +9653 9 +9662 5 +9667 7 +9674 6 +9680 8 +9688 2 +9690 6 +9696 4 +9700 5 +9705 8 +9713 6 +9719 4 +9723 9 +9732 9 +9741 9 +9750 2 +9752 3 +9755 6 +9761 8 +9769 4 +9773 7 +9780 3 +9783 5 +9788 4 +9792 1 +9793 8 +9801 6 +9807 11 +9818 4 +9822 8 +9830 5 +9835 8 +9843 6 +9849 6 +9855 8 +9863 9 +9872 7 +9879 2 +9881 5 +9886 6 +9892 5 +9897 4 +9901 14 +9915 5 +9920 5 +9925 8 +9933 10 +9943 5 +9948 5 +9953 5 +9958 5 +9963 5 +9968 7 +9975 3 +9978 4 +9982 6 +9988 5 +9993 6 +9999 11 +10010 7 +10017 5 +10022 4 +10026 6 +10032 7 +10039 5 +10044 6 +10050 4 +10054 7 +10061 9 +10070 7 +10077 4 +10081 6 +10087 3 +10090 5 +10095 6 +10101 4 +10105 13 +10118 5 +10123 4 +10127 10 +10137 8 +10145 6 +10151 9 +10160 3 +10163 2 +10165 12 +10177 10 +10187 9 +10196 3 +10199 11 +10210 13 +10223 5 +10228 7 +10235 6 +10241 5 +10246 2 +10248 3 +10251 6 +10257 9 +10266 6 +10272 6 +10278 8 +10286 7 +10293 2 +10295 3 +10298 9 +10307 5 +10312 5 +10317 6 +10323 5 +10328 9 +10337 6 +10343 7 +10350 9 +10359 7 +10366 5 +10371 7 +10378 9 +10387 4 +10391 7 +10398 6 +10404 2 +10406 4 +10410 10 +10420 9 +10429 10 +10439 4 +10443 4 +10447 4 +10451 3 +10454 6 +10460 5 +10465 8 +10473 6 +10479 6 +10485 6 +10491 7 +10498 7 +10505 11 +10516 6 +10522 9 +10531 4 +10535 5 +10540 7 +10547 6 +10553 3 +10556 5 +10561 4 +10565 11 +10576 6 +10582 7 +10589 3 +10592 4 +10596 5 +10601 8 +10609 3 +10612 7 +10619 9 +10628 5 +10633 3 +10636 11 +10647 5 +10652 5 +10657 8 +10665 5 +10670 8 +10678 5 +10683 2 +10685 9 +10694 7 +10701 6 +10707 5 +10712 5 +10717 7 +10724 5 +10729 3 +10732 3 +10735 7 +10742 5 +10747 4 +10751 9 +10760 7 +10767 11 +10778 9 +10787 5 +10792 6 +10798 6 +10804 5 +10809 5 +10814 6 +10820 5 +10825 5 +10830 11 +10841 6 +10847 5 +10852 5 +10857 7 +10864 5 +10869 12 +10881 7 +10888 7 +10895 4 +10899 2 +10901 5 +10906 6 +10912 9 +10921 2 +10923 7 +10930 5 +10935 4 +10939 7 +10946 10 +10956 10 +10966 4 +10970 7 +10977 6 +10983 7 +10990 6 +10996 2 +10998 3 +11001 5 +11006 4 +11010 6 +11016 5 +11021 5 +11026 6 +11032 6 +11038 3 +11041 9 +11050 7 +11057 5 +11062 2 +11064 5 +11069 5 +11074 8 +11082 9 +11091 4 +11095 6 +11101 6 +11107 9 +11116 5 +11121 5 +11126 4 +11130 2 +11132 7 +11139 4 +11143 6 +11149 7 +11156 3 +11159 5 +11164 4 +11168 1 +11169 8 +11177 7 +11184 5 +11189 6 +11195 2 +11197 7 +11204 4 +11208 8 +11216 4 +11220 5 +11225 8 +11233 9 +11242 3 +11245 5 +11250 11 +11261 6 +11267 4 +11271 9 +11280 11 +11291 4 +11295 5 +11300 6 +11306 9 +11315 1 +11316 5 +11321 7 +11328 4 +11332 3 +11335 3 +11338 5 +11343 5 +11348 7 +11355 2 +11357 5 +11362 5 +11367 9 +11376 7 +11383 6 +11389 9 +11398 8 +11406 9 +11415 5 +11420 16 +11436 1 +11437 8 +11445 7 +11452 6 +11458 11 +11469 7 +11476 5 +11481 11 +11492 3 +11495 3 +11498 5 +11503 3 +11506 7 +11513 7 +11520 5 +11525 7 +11532 5 +11537 11 +11548 3 +11551 2 +11553 6 +11559 7 +11566 6 +11572 6 +11578 8 +11586 7 +11593 7 +11600 6 +11606 7 +11613 9 +11622 10 +11632 7 +11639 10 +11649 8 +11657 6 +11663 7 +11670 5 +11675 11 +11686 10 +11696 13 +11709 6 +11715 6 +11721 12 +11733 5 +11738 3 +11741 4 +11745 6 +11751 6 +11757 13 +11770 6 +11776 6 +11782 5 +11787 2 +11789 6 +11795 5 +11800 4 +11804 7 +11811 8 +11819 3 +11822 7 +11829 7 +11836 7 +11843 9 +11852 2 +11854 2 +11856 7 +11863 5 +11868 6 +11874 4 +11878 7 +11885 2 +11887 4 +11891 4 +11895 3 +11898 5 +11903 6 +11909 3 +11912 7 +11919 6 +11925 8 +11933 2 +11935 9 +11944 5 +11949 6 +11955 5 +11960 3 +11963 13 +11976 8 +11984 6 +11990 3 +11993 4 +11997 3 +12000 7 +12007 6 +12013 9 +12022 4 +12026 11 +12037 4 +12041 6 +12047 6 +12053 9 +12062 4 +12066 3 +12069 6 +12075 7 +12082 3 +12085 5 +12090 8 +12098 6 +12104 4 +12108 8 +12116 4 +12120 11 +12131 6 +12137 7 +12144 3 +12147 8 +12155 8 +12163 3 +12166 6 +12172 5 +12177 3 +12180 5 +12185 6 +12191 3 +12194 7 +12201 8 +12209 3 +12212 2 +12214 5 +12219 4 +12223 2 +12225 8 +12233 4 +12237 5 +12242 3 +12245 5 +12250 6 +12256 5 +12261 8 +12269 4 +12273 6 +12279 4 +12283 7 +12290 5 +12295 8 +12303 5 +12308 3 +12311 6 +12317 6 +12323 4 +12327 7 +12334 7 +12341 1 +12342 8 +12350 4 +12354 4 +12358 6 +12364 5 +12369 13 +12382 3 +12385 7 +12392 4 +12396 7 +12403 10 +12413 8 +12421 9 +12430 9 +12439 6 +12445 6 +12451 10 +12461 6 +12467 8 +12475 3 +12478 9 +12487 11 +12498 4 +12502 6 +12508 4 +12512 4 +12516 4 +12520 5 +12525 8 +12533 4 +12537 5 +12542 5 +12547 6 +12553 4 +12557 8 +12565 8 +12573 6 +12579 5 +12584 5 +12589 6 +12595 4 +12599 4 +12603 2 +12605 8 +12613 4 +12617 5 +12622 4 +12626 6 +12632 5 +12637 12 +12649 5 +12654 6 +12660 9 +12669 5 +12674 5 +12679 7 +12686 6 +12692 5 +12697 4 +12701 6 +12707 5 +12712 5 +12717 6 +12723 5 +12728 8 +12736 10 +12746 6 +12752 7 +12759 6 +12765 4 +12769 6 +12775 6 +12781 13 +12794 6 +12800 11 +12811 4 +12815 8 +12823 7 +12830 7 +12837 6 +12843 6 +12849 7 +12856 5 +12861 10 +12871 10 +12881 8 +12889 9 +12898 4 +12902 6 +12908 9 +12917 8 +12925 9 +12934 4 +12938 4 +12942 7 +12949 2 +12951 7 +12958 1 +12959 9 +12968 8 +12976 8 +12984 1 +12985 4 +12989 4 +12993 4 +12997 8 +13005 4 +13009 4 +13013 8 +13021 9 +13030 6 +13036 8 +13044 3 +13047 8 +13055 5 +13060 7 +13067 8 +13075 7 +13082 2 +13084 5 +13089 9 +13098 5 +13103 7 +13110 6 +13116 6 +13122 6 +13128 6 +13134 7 +13141 5 +13146 8 +13154 12 +13166 4 +13170 6 +13176 4 +13180 6 +13186 5 +13191 3 +13194 6 +13200 9 +13209 4 +13213 5 +13218 8 +13226 6 +13232 3 +13235 8 +13243 7 +13250 8 +13258 5 +13263 5 +13268 4 +13272 7 +13279 3 +13282 11 +13293 7 +13300 6 +13306 5 +13311 5 +13316 6 +13322 9 +13331 2 +13333 6 +13339 7 +13346 7 +13353 6 +13359 6 +13365 5 +13370 4 +13374 6 +13380 9 +13389 11 +13400 5 +13405 8 +13413 4 +13417 4 +13421 9 +13430 4 +13434 9 +13443 3 +13446 7 +13453 6 +13459 6 +13465 1 +13466 7 +13473 7 +13480 6 +13486 5 +13491 7 +13498 3 +13501 6 +13507 5 +13512 4 +13516 8 +13524 2 +13526 4 +13530 5 +13535 3 +13538 5 +13543 5 +13548 5 +13553 3 +13556 4 +13560 7 +13567 4 +13571 4 +13575 8 +13583 9 +13592 6 +13598 7 +13605 1 +13606 9 +13615 9 +13624 10 +13634 4 +13638 3 +13641 9 +13650 8 +13658 5 +13663 7 +13670 4 +13674 12 +13686 2 +13688 3 +13691 5 +13696 5 +13701 10 +13711 4 +13715 4 +13719 7 +13726 5 +13731 4 +13735 9 +13744 7 +13751 5 +13756 4 +13760 8 +13768 8 +13776 9 +13785 7 +13792 7 +13799 6 +13805 6 +13811 7 +13818 11 +13829 7 +13836 6 +13842 5 +13847 6 +13853 7 +13860 10 +13870 4 +13874 3 +13877 4 +13881 4 +13885 6 +13891 6 +13897 8 +13905 10 +13915 9 +13924 6 +13930 2 +13932 4 +13936 6 +13942 10 +13952 8 +13960 4 +13964 12 +13976 6 +13982 5 +13987 6 +13993 5 +13998 3 +14001 7 +14008 7 +14015 10 +14025 3 +14028 6 +14034 6 +14040 2 +14042 3 +14045 5 +14050 6 +14056 4 +14060 7 +14067 9 +14076 1 +14077 6 +14083 5 +14088 4 +14092 7 +14099 9 +14108 2 +14110 14 +14124 7 +14131 4 +14135 7 +14142 8 +14150 3 +14153 5 +14158 7 +14165 11 +14176 6 +14182 8 +14190 5 +14195 8 +14203 6 +14209 5 +14214 5 +14219 2 +14221 4 +14225 3 +14228 4 +14232 5 +14237 5 +14242 5 +14247 3 +14250 9 +14259 7 +14266 4 +14270 6 +14276 6 +14282 3 +14285 2 +14287 8 +14295 6 +14301 6 +14307 2 +14309 7 +14316 6 +14322 5 +14327 9 +14336 3 +14339 6 +14345 7 +14352 2 +14354 7 +14361 6 +14367 8 +14375 6 +14381 6 +14387 2 +14389 3 +14392 4 +14396 5 +14401 3 +14404 6 +14410 7 +14417 4 +14421 4 +14425 5 +14430 4 +14434 11 +14445 8 +14453 5 +14458 5 +14463 8 +14471 3 +14474 5 +14479 5 +14484 15 +14499 6 +14505 7 +14512 5 +14517 5 +14522 2 +14524 3 +14527 2 +14529 9 +14538 9 +14547 6 +14553 4 +14557 11 +14568 6 +14574 7 +14581 5 +14586 2 +14588 3 +14591 8 +14599 3 +14602 5 +14607 5 +14612 6 +14618 2 +14620 4 +14624 3 +14627 9 +14636 5 +14641 2 +14643 12 +14655 7 +14662 4 +14666 8 +14674 4 +14678 9 +14687 5 +14692 6 +14698 9 +14707 3 +14710 9 +14719 4 +14723 5 +14728 5 +14733 2 +14735 5 +14740 4 +14744 11 +14755 7 +14762 7 +14769 12 +14781 4 +14785 8 +14793 8 +14801 5 +14806 4 +14810 4 +14814 6 +14820 8 +14828 9 +14837 4 +14841 4 +14845 6 +14851 3 +14854 6 +14860 9 +14869 7 +14876 8 +14884 7 +14891 6 +14897 6 +14903 5 +14908 6 +14914 8 +14922 5 +14927 3 +14930 7 +14937 4 +14941 8 +14949 8 +14957 7 +14964 7 +14971 7 +14978 7 +14985 8 +14993 6 +14999 6 +15005 10 +15015 5 +15020 4 +15024 8 +15032 7 +15039 3 +15042 6 +15048 7 +15055 5 +15060 9 +15069 13 +15082 6 +15088 5 +15093 4 +15097 5 +15102 6 +15108 6 +15114 6 +15120 10 +15130 7 +15137 9 +15146 5 +15151 8 +15159 9 +15168 10 +15178 10 +15188 4 +15192 4 +15196 4 +15200 7 +15207 6 +15213 12 +15225 3 +15228 7 +15235 3 +15238 6 +15244 8 +15252 5 +15257 10 +15267 7 +15274 6 +15280 2 +15282 5 +15287 2 +15289 4 +15293 3 +15296 5 +15301 8 +15309 9 +15318 4 +15322 3 +15325 4 +15329 5 +15334 3 +15337 5 +15342 2 +15344 4 +15348 11 +15359 3 +15362 8 +15370 7 +15377 4 +15381 7 +15388 7 +15395 5 +15400 6 +15406 9 +15415 4 +15419 10 +15429 9 +15438 4 +15442 2 +15444 6 +15450 6 +15456 12 +15468 7 +15475 5 +15480 6 +15486 3 +15489 5 +15494 6 +15500 5 +15505 3 +15508 3 +15511 1 +15512 10 +15522 8 +15530 6 +15536 4 +15540 3 +15543 8 +15551 5 +15556 4 +15560 9 +15569 10 +15579 6 +15585 11 +15596 10 +15606 9 +15615 12 +15627 9 +15636 4 +15640 4 +15644 4 +15648 11 +15659 4 +15663 5 +15668 4 +15672 5 +15677 5 +15682 1 +15683 3 +15686 4 +15690 7 +15697 7 +15704 6 +15710 6 +15716 4 +15720 4 +15724 10 +15734 7 +15741 5 +15746 8 +15754 5 +15759 5 +15764 4 +15768 6 +15774 8 +15782 2 +15784 6 +15790 5 +15795 4 +15799 5 +15804 5 +15809 9 +15818 6 +15824 6 +15830 3 +15833 10 +15843 7 +15850 4 +15854 5 +15859 6 +15865 9 +15874 4 +15878 4 +15882 5 +15887 7 +15894 5 +15899 6 +15905 8 +15913 8 +15921 4 +15925 6 +15931 10 +15941 7 +15948 4 +15952 5 +15957 7 +15964 10 +15974 1 +15975 1 +15976 6 +15982 2 +15984 11 +15995 6 +16001 3 +16004 4 +16008 5 +16013 6 +16019 6 +16025 11 +16036 4 +16040 3 +16043 4 +16047 4 +16051 4 +16055 6 +16061 3 +16064 4 +16068 3 +16071 7 +16078 10 +16088 11 +16099 5 +16104 7 +16111 8 +16119 8 +16127 6 +16133 3 +16136 3 +16139 8 +16147 4 +16151 4 +16155 11 +16166 6 +16172 4 +16176 12 +16188 5 +16193 2 +16195 2 +16197 2 +16199 7 +16206 1 +16207 8 +16215 7 +16222 4 +16226 6 +16232 6 +16238 7 +16245 8 +16253 6 +16259 6 +16265 4 +16269 5 +16274 5 +16279 7 +16286 3 +16289 9 +16298 4 +16302 5 +16307 7 +16314 6 +16320 5 +16325 4 +16329 8 +16337 6 +16343 5 +16348 7 +16355 3 +16358 3 +16361 4 +16365 6 +16371 4 +16375 9 +16384 4 +16388 5 +16393 3 +16396 7 +16403 5 +16408 9 +16417 9 +16426 7 +16433 7 +16440 9 +16449 3 +16452 5 +16457 1 +16458 10 +16468 10 +16478 9 +16487 4 +16491 6 +16497 9 +16506 11 +16517 7 +16524 9 +16533 6 +16539 9 +16548 8 +16556 8 +16564 5 +16569 8 +16577 4 +16581 4 +16585 5 +16590 5 +16595 9 +16604 7 +16611 8 +16619 3 +16622 12 +16634 13 +16647 3 +16650 2 +16652 4 +16656 7 +16663 7 +16670 4 +16674 3 +16677 6 +16683 6 +16689 6 +16695 5 +16700 10 +16710 3 +16713 7 +16720 6 +16726 6 +16732 6 +16738 12 +16750 5 +16755 3 +16758 4 +16762 4 +16766 10 +16776 6 +16782 10 +16792 5 +16797 5 +16802 4 +16806 4 +16810 7 +16817 2 +16819 6 +16825 7 +16832 8 +16840 9 +16849 5 +16854 3 +16857 10 +16867 6 +16873 8 +16881 7 +16888 8 +16896 10 +16906 9 +16915 6 +16921 3 +16924 7 +16931 5 +16936 6 +16942 7 +16949 11 +16960 3 +16963 8 +16971 6 +16977 8 +16985 4 +16989 8 +16997 8 +17005 5 +17010 3 +17013 6 +17019 5 +17024 4 +17028 7 +17035 2 +17037 7 +17044 3 +17047 5 +17052 5 +17057 3 +17060 8 +17068 6 +17074 8 +17082 4 +17086 6 +17092 9 +17101 5 +17106 4 +17110 6 +17116 6 +17122 7 +17129 8 +17137 7 +17144 7 +17151 9 +17160 6 +17166 3 +17169 3 +17172 4 +17176 5 +17181 4 +17185 4 +17189 4 +17193 7 +17200 14 +17214 6 +17220 4 +17224 5 +17229 5 +17234 8 +17242 10 +17252 8 +17260 4 +17264 6 +17270 12 +17282 7 +17289 9 +17298 10 +17308 6 +17314 5 +17319 5 +17324 4 +17328 4 +17332 7 +17339 4 +17343 8 +17351 4 +17355 7 +17362 8 +17370 5 +17375 4 +17379 5 +17384 4 +17388 8 +17396 5 +17401 5 +17406 6 +17412 4 +17416 6 +17422 6 +17428 9 +17437 7 +17444 8 +17452 4 +17456 3 +17459 4 +17463 9 +17472 5 +17477 8 +17485 5 +17490 6 +17496 6 +17502 8 +17510 6 +17516 8 +17524 5 +17529 8 +17537 5 +17542 6 +17548 7 +17555 5 +17560 4 +17564 5 +17569 6 +17575 4 +17579 4 +17583 3 +17586 4 +17590 13 +17603 4 +17607 8 +17615 2 +17617 19 +17636 7 +17643 3 +17646 6 +17652 7 +17659 6 +17665 3 +17668 5 +17673 7 +17680 5 +17685 7 +17692 8 +17700 4 +17704 5 +17709 2 +17711 4 +17715 5 +17720 11 +17731 5 +17736 11 +17747 6 +17753 3 +17756 5 +17761 5 +17766 13 +17779 4 +17783 5 +17788 8 +17796 5 +17801 10 +17811 6 +17817 9 +17826 3 +17829 7 +17836 7 +17843 4 +17847 3 +17850 6 +17856 6 +17862 8 +17870 3 +17873 5 +17878 4 +17882 7 +17889 5 +17894 11 +17905 6 +17911 4 +17915 5 +17920 7 +17927 3 +17930 9 +17939 7 +17946 9 +17955 4 +17959 7 +17966 5 +17971 5 +17976 5 +17981 18 +17999 8 +18007 3 +18010 6 +18016 7 +18023 3 +18026 8 +18034 7 +18041 8 +18049 5 +18054 4 +18058 10 +18068 2 +18070 9 +18079 5 +18084 5 +18089 6 +18095 4 +18099 3 +18102 11 +18113 5 +18118 6 +18124 4 +18128 2 +18130 9 +18139 5 +18144 11 +18155 7 +18162 3 +18165 5 +18170 4 +18174 4 +18178 5 +18183 6 +18189 6 +18195 6 +18201 6 +18207 9 +18216 4 +18220 9 +18229 3 +18232 5 +18237 6 +18243 7 +18250 8 +18258 4 +18262 11 +18273 5 +18278 7 +18285 3 +18288 3 +18291 7 +18298 6 +18304 4 +18308 2 +18310 2 +18312 10 +18322 5 +18327 6 +18333 3 +18336 9 +18345 3 +18348 5 +18353 9 +18362 6 +18368 5 +18373 6 +18379 5 +18384 10 +18394 6 +18400 3 +18403 4 +18407 7 +18414 5 +18419 6 +18425 6 +18431 5 +18436 4 +18440 5 +18445 8 +18453 11 +18464 10 +18474 10 +18484 5 +18489 3 +18492 9 +18501 9 +18510 3 +18513 8 +18521 7 +18528 4 +18532 3 +18535 5 +18540 7 +18547 6 +18553 3 +18556 7 +18563 7 +18570 8 +18578 4 +18582 3 +18585 12 +18597 6 +18603 8 +18611 7 +18618 5 +18623 4 +18627 5 +18632 10 +18642 5 +18647 5 +18652 4 +18656 7 +18663 9 +18672 8 +18680 5 +18685 5 +18690 3 +18693 7 +18700 8 +18708 6 +18714 10 +18724 6 +18730 7 +18737 2 +18739 4 +18743 9 +18752 6 +18758 5 +18763 8 +18771 3 +18774 13 +18787 8 +18795 9 +18804 5 +18809 4 +18813 5 +18818 5 +18823 5 +18828 4 +18832 8 +18840 2 +18842 7 +18849 6 +18855 8 +18863 8 +18871 6 +18877 5 +18882 9 +18891 5 +18896 11 +18907 8 +18915 3 +18918 5 +18923 8 +18931 7 +18938 6 +18944 5 +18949 2 +18951 4 +18955 7 +18962 6 +18968 5 +18973 5 +18978 5 +18983 10 +18993 6 +18999 9 +19008 9 +19017 5 +19022 6 +19028 4 +19032 7 +19039 5 +19044 4 +19048 7 +19055 7 +19062 2 +19064 12 +19076 6 +19082 5 +19087 3 +19090 8 +19098 3 +19101 5 +19106 7 +19113 3 +19116 5 +19121 4 +19125 6 +19131 4 +19135 5 +19140 8 +19148 5 +19153 7 +19160 9 +19169 4 +19173 8 +19181 2 +19183 5 +19188 6 +19194 5 +19199 7 +19206 1 +19207 3 +19210 2 +19212 6 +19218 7 +19225 3 +19228 4 +19232 8 +19240 6 +19246 10 +19256 5 +19261 8 +19269 9 +19278 9 +19287 3 +19290 6 +19296 3 +19299 2 +19301 9 +19310 3 +19313 8 +19321 4 +19325 6 +19331 7 +19338 8 +19346 5 +19351 10 +19361 2 +19363 6 +19369 4 +19373 5 +19378 8 +19386 10 +19396 4 +19400 8 +19408 7 +19415 5 +19420 7 +19427 6 +19433 5 +19438 6 +19444 5 +19449 6 +19455 7 +19462 5 +19467 4 +19471 8 +19479 2 +19481 6 +19487 4 +19491 3 +19494 4 +19498 3 +19501 2 +19503 7 +19510 6 +19516 2 +19518 8 +19526 9 +19535 8 +19543 2 +19545 3 +19548 10 +19558 4 +19562 7 +19569 5 +19574 5 +19579 5 +19584 6 +19590 3 +19593 6 +19599 6 +19605 4 +19609 6 +19615 8 +19623 5 +19628 7 +19635 6 +19641 6 +19647 4 +19651 8 +19659 6 +19665 4 +19669 3 +19672 9 +19681 6 +19687 6 +19693 4 +19697 6 +19703 8 +19711 6 +19717 6 +19723 7 +19730 3 +19733 5 +19738 4 +19742 5 +19747 5 +19752 11 +19763 8 +19771 6 +19777 9 +19786 6 +19792 11 +19803 4 +19807 8 +19815 6 +19821 8 +19829 7 +19836 9 +19845 3 +19848 5 +19853 5 +19858 10 +19868 3 +19871 4 +19875 10 +19885 5 +19890 8 +19898 5 +19903 5 +19908 5 +19913 4 +19917 6 +19923 4 +19927 5 +19932 6 +19938 3 +19941 8 +19949 7 +19956 6 +19962 7 +19969 4 +19973 6 +19979 4 +19983 10 +19993 8 +20001 16 +20017 5 +20022 6 +20028 11 +20039 6 +20045 6 +20051 8 +20059 5 +20064 4 +20068 9 +20077 9 +20086 10 +20096 9 +20105 6 +20111 4 +20115 5 +20120 3 +20123 7 +20130 9 +20139 4 +20143 2 +20145 7 +20152 7 +20159 6 +20165 7 +20172 10 +20182 6 +20188 2 +20190 5 +20195 7 +20202 6 +20208 6 +20214 5 +20219 3 +20222 2 +20224 4 +20228 7 +20235 4 +20239 5 +20244 8 +20252 3 +20255 6 +20261 9 +20270 9 +20279 2 +20281 8 +20289 2 +20291 3 +20294 3 +20297 3 +20300 4 +20304 7 +20311 7 +20318 6 +20324 4 +20328 4 +20332 4 +20336 10 +20346 7 +20353 3 +20356 5 +20361 3 +20364 6 +20370 4 +20374 4 +20378 3 +20381 5 +20386 5 +20391 4 +20395 5 +20400 4 +20404 5 +20409 1 +20410 5 +20415 2 +20417 5 +20422 7 +20429 4 +20433 2 +20435 6 +20441 5 +20446 4 +20450 7 +20457 8 +20465 2 +20467 10 +20477 5 +20482 5 +20487 8 +20495 11 +20506 5 +20511 4 +20515 3 +20518 6 +20524 12 +20536 3 +20539 12 +20551 8 +20559 4 +20563 10 +20573 8 +20581 3 +20584 7 +20591 6 +20597 4 +20601 10 +20611 7 +20618 5 +20623 4 +20627 5 +20632 8 +20640 6 +20646 3 +20649 9 +20658 2 +20660 7 +20667 4 +20671 5 +20676 7 +20683 8 +20691 4 +20695 6 +20701 4 +20705 8 +20713 8 +20721 4 +20725 5 +20730 7 +20737 4 +20741 5 +20746 4 +20750 2 +20752 8 +20760 8 +20768 10 +20778 7 +20785 7 +20792 10 +20802 3 +20805 3 +20808 10 +20818 4 +20822 6 +20828 7 +20835 4 +20839 8 +20847 5 +20852 12 +20864 2 +20866 7 +20873 3 +20876 5 +20881 3 +20884 8 +20892 4 +20896 7 +20903 5 +20908 4 +20912 10 +20922 5 +20927 10 +20937 10 +20947 6 +20953 3 +20956 6 +20962 4 +20966 5 +20971 8 +20979 4 +20983 7 +20990 5 +20995 10 +21005 7 +21012 10 +21022 3 +21025 7 +21032 5 +21037 12 +21049 8 +21057 4 +21061 5 +21066 2 +21068 10 +21078 3 +21081 4 +21085 5 +21090 4 +21094 9 +21103 4 +21107 7 +21114 7 +21121 6 +21127 1 +21128 9 +21137 7 +21144 10 +21154 4 +21158 5 +21163 6 +21169 5 +21174 4 +21178 4 +21182 5 +21187 6 +21193 11 +21204 7 +21211 7 +21218 10 +21228 5 +21233 13 +21246 10 +21256 3 +21259 5 +21264 9 +21273 6 +21279 11 +21290 8 +21298 3 +21301 3 +21304 12 +21316 7 +21323 5 +21328 6 +21334 4 +21338 7 +21345 7 +21352 6 +21358 3 +21361 8 +21369 6 +21375 5 +21380 5 +21385 6 +21391 6 +21397 6 +21403 6 +21409 9 +21418 6 +21424 12 +21436 5 +21441 3 +21444 7 +21451 9 +21460 9 +21469 10 +21479 5 +21484 5 +21489 8 +21497 6 +21503 4 +21507 2 +21509 5 +21514 4 +21518 4 +21522 6 +21528 11 +21539 6 +21545 6 +21551 6 +21557 11 +21568 3 +21571 4 +21575 3 +21578 6 +21584 7 +21591 3 +21594 8 +21602 7 +21609 9 +21618 4 +21622 7 +21629 8 +21637 8 +21645 4 +21649 4 +21653 2 +21655 6 +21661 6 +21667 5 +21672 5 +21677 8 +21685 6 +21691 6 +21697 4 +21701 4 +21705 3 +21708 12 +21720 6 +21726 9 +21735 5 +21740 6 +21746 5 +21751 6 +21757 12 +21769 5 +21774 6 +21780 4 +21784 8 +21792 3 +21795 3 +21798 6 +21804 7 +21811 5 +21816 11 +21827 7 +21834 3 +21837 6 +21843 9 +21852 8 +21860 6 +21866 4 +21870 5 +21875 6 +21881 9 +21890 3 +21893 5 +21898 4 +21902 5 +21907 8 +21915 6 +21921 3 +21924 5 +21929 6 +21935 5 +21940 5 +21945 7 +21952 7 +21959 4 +21963 8 +21971 4 +21975 6 +21981 3 +21984 9 +21993 6 +21999 5 +22004 6 +22010 9 +22019 8 +22027 2 +22029 7 +22036 3 +22039 5 +22044 3 +22047 7 +22054 10 +22064 3 +22067 2 +22069 5 +22074 6 +22080 4 +22084 5 +22089 7 +22096 4 +22100 8 +22108 4 +22112 10 +22122 4 +22126 6 +22132 3 +22135 4 +22139 9 +22148 5 +22153 4 +22157 5 +22162 5 +22167 8 +22175 5 +22180 4 +22184 5 +22189 4 +22193 4 +22197 5 +22202 10 +22212 8 +22220 5 +22225 4 +22229 8 +22237 8 +22245 8 +22253 2 +22255 4 +22259 9 +22268 10 +22278 3 +22281 9 +22290 4 +22294 4 +22298 7 +22305 6 +22311 4 +22315 7 +22322 3 +22325 6 +22331 3 +22334 5 +22339 9 +22348 4 +22352 5 +22357 6 +22363 5 +22368 4 +22372 5 +22377 7 +22384 2 +22386 7 +22393 8 +22401 7 +22408 9 +22417 8 +22425 3 +22428 4 +22432 7 +22439 6 +22445 4 +22449 9 +22458 6 +22464 5 +22469 7 +22476 10 +22486 5 +22491 5 +22496 10 +22506 8 +22514 8 +22522 8 +22530 8 +22538 5 +22543 9 +22552 2 +22554 2 +22556 6 +22562 7 +22569 9 +22578 3 +22581 7 +22588 8 +22596 3 +22599 6 +22605 3 +22608 4 +22612 6 +22618 13 +22631 7 +22638 5 +22643 8 +22651 8 +22659 9 +22668 7 +22675 2 +22677 12 +22689 14 +22703 10 +22713 4 +22717 7 +22724 3 +22727 5 +22732 9 +22741 9 +22750 11 +22761 4 +22765 5 +22770 12 +22782 4 +22786 8 +22794 4 +22798 7 +22805 6 +22811 4 +22815 8 +22823 4 +22827 7 +22834 2 +22836 7 +22843 9 +22852 2 +22854 10 +22864 6 +22870 7 +22877 8 +22885 7 +22892 2 +22894 4 +22898 9 +22907 7 +22914 8 +22922 7 +22929 5 +22934 5 +22939 4 +22943 7 +22950 8 +22958 3 +22961 7 +22968 5 +22973 4 +22977 5 +22982 7 +22989 4 +22993 4 +22997 7 +23004 4 +23008 5 +23013 7 +23020 6 +23026 3 +23029 7 +23036 9 +23045 3 +23048 3 +23051 5 +23056 5 +23061 5 +23066 6 +23072 7 +23079 4 +23083 4 +23087 9 +23096 4 +23100 6 +23106 5 +23111 6 +23117 7 +23124 5 +23129 5 +23134 3 +23137 2 +23139 8 +23147 13 +23160 6 +23166 4 +23170 5 +23175 4 +23179 3 +23182 6 +23188 6 +23194 5 +23199 4 +23203 6 +23209 4 +23213 12 +23225 4 +23229 7 +23236 10 +23246 9 +23255 6 +23261 9 +23270 9 +23279 8 +23287 3 +23290 7 +23297 5 +23302 3 +23305 2 +23307 7 +23314 7 +23321 6 +23327 10 +23337 4 +23341 9 +23350 6 +23356 6 +23362 7 +23369 12 +23381 4 +23385 5 +23390 3 +23393 10 +23403 10 +23413 4 +23417 6 +23423 3 +23426 9 +23435 6 +23441 7 +23448 5 +23453 6 +23459 5 +23464 3 +23467 8 +23475 2 +23477 10 +23487 3 +23490 4 +23494 5 +23499 8 +23507 4 +23511 7 +23518 8 +23526 4 +23530 6 +23536 8 +23544 6 +23550 7 +23557 3 +23560 7 +23567 2 +23569 6 +23575 9 +23584 12 +23596 5 +23601 8 +23609 7 +23616 6 +23622 6 +23628 4 +23632 10 +23642 8 +23650 7 +23657 1 +23658 8 +23666 8 +23674 4 +23678 8 +23686 6 +23692 9 +23701 8 +23709 7 +23716 9 +23725 8 +23733 9 +23742 5 +23747 4 +23751 5 +23756 10 +23766 6 +23772 4 +23776 6 +23782 3 +23785 6 +23791 7 +23798 9 +23807 9 +23816 13 +23829 10 +23839 6 +23845 3 +23848 6 +23854 7 +23861 6 +23867 4 +23871 11 +23882 12 +23894 3 +23897 2 +23899 8 +23907 3 +23910 8 +23918 7 +23925 6 +23931 5 +23936 6 +23942 11 +23953 6 +23959 4 +23963 4 +23967 5 +23972 7 +23979 8 +23987 5 +23992 5 +23997 6 +24003 4 +24007 8 +24015 9 +24024 5 +24029 4 +24033 3 +24036 4 +24040 2 +24042 7 +24049 2 +24051 6 +24057 1 +24058 9 +24067 8 +24075 6 +24081 3 +24084 9 +24093 6 +24099 8 +24107 9 +24116 6 +24122 5 +24127 4 +24131 8 +24139 6 +24145 7 +24152 3 +24155 8 +24163 5 +24168 6 +24174 6 +24180 4 +24184 8 +24192 5 +24197 5 +24202 6 +24208 7 +24215 5 +24220 3 +24223 11 +24234 12 +24246 12 +24258 3 +24261 9 +24270 6 +24276 5 +24281 5 +24286 8 +24294 4 +24298 8 +24306 5 +24311 7 +24318 3 +24321 8 +24329 5 +24334 3 +24337 7 +24344 7 +24351 5 +24356 7 +24363 4 +24367 6 +24373 3 +24376 8 +24384 3 +24387 7 +24394 10 +24404 3 +24407 5 +24412 6 +24418 4 +24422 4 +24426 2 +24428 3 +24431 9 +24440 8 +24448 7 +24455 5 +24460 11 +24471 7 +24478 7 +24485 5 +24490 10 +24500 4 +24504 7 +24511 6 +24517 13 +24530 10 +24540 7 +24547 8 +24555 4 +24559 2 +24561 9 +24570 2 +24572 4 + +0 + +24576 +2539 2 +1187 5 +3911 2 +585 8 +1498 10 +1681 2 +2115 7 +2424 1 +3708 7 +196 1 +1852 10 +3555 8 +2134 1 +1064 9 +1293 8 +944 9 +2413 3 +1678 2 +839 9 +297 1 +174 7 +2217 9 +51 8 +3195 6 +3215 5 +332 3 +2077 7 +1214 2 +2367 10 +1947 10 +2350 6 +3441 1 +3246 7 +1999 1 +2037 5 +2227 8 +101 7 +3340 9 +3713 7 +3013 4 +1001 3 +444 6 +3306 2 +4043 1 +1361 1 +3916 6 +365 4 +1485 8 +251 8 +234 2 +4042 2 +870 7 +3803 9 +3874 4 +1058 5 +831 3 +2331 6 +1328 1 +2525 4 +255 3 +381 1 +2521 1 +3946 5 +2449 4 +285 2 +3848 4 +2669 9 +3949 3 +1050 4 +2855 9 +1974 3 +349 7 +2874 6 +192 6 +3442 4 +265 1 +2281 4 +403 6 +2359 5 +319 8 +39 1 +3893 3 +1176 1 +3154 10 +866 9 +2670 9 +3934 6 +3799 5 +393 8 +2722 10 +2107 4 +185 3 +69 1 +1958 4 +1613 2 +1908 10 +3867 5 +2950 2 +3397 10 +3737 1 +1074 9 +234 2 +2795 8 +1452 8 +1437 2 +768 7 +3400 1 +1212 6 +2675 7 +989 4 +1338 6 +764 5 +216 3 +2186 3 +2210 9 +2194 1 +1703 3 +2668 5 +3684 3 +3636 6 +3939 5 +3718 2 +3954 10 +4009 10 +703 8 +2990 8 +2162 4 +3980 1 +1245 8 +2488 1 +2391 3 +3774 9 +3238 5 +1534 4 +3440 3 +2611 6 +2878 7 +1931 8 +3668 9 +3139 10 +3822 10 +2184 3 +82 6 +3317 1 +1702 3 +4087 10 +519 3 +1944 1 +3830 9 +3563 10 +2150 5 +3735 9 +1158 2 +3265 9 +2571 6 +2587 4 +2073 3 +405 6 +3865 3 +42 4 +2358 9 +2632 1 +1629 5 +2968 10 +3160 8 +1934 7 +1108 3 +2324 9 +1923 4 +2536 10 +3112 3 +3817 1 +4008 3 +2118 10 +1034 6 +3094 8 +3868 9 +2484 6 +3791 7 +1456 5 +2643 5 +462 9 +1481 8 +1788 10 +811 5 +1441 10 +2258 6 +3559 5 +2816 2 +3886 1 +428 9 +2442 8 +873 2 +3460 2 +989 7 +2897 9 +1464 7 +1525 4 +685 7 +3906 4 +678 7 +1824 2 +2256 8 +1016 9 +3705 1 +3368 10 +136 1 +1154 8 +2478 10 +3323 2 +104 10 +932 7 +3100 8 +2465 5 +491 9 +1735 3 +1031 3 +2790 1 +1423 5 +2939 6 +1829 9 +1241 3 +386 4 +1934 8 +2883 9 +14 1 +686 2 +992 5 +3564 8 +551 10 +2074 3 +2344 1 +3593 9 +1103 6 +2668 6 +696 5 +4019 4 +1708 1 +2519 3 +3455 8 +28 4 +3639 8 +1977 7 +2429 5 +3549 7 +468 10 +2801 10 +848 7 +959 9 +2410 6 +3898 9 +2059 3 +1938 9 +3544 1 +3513 9 +1136 1 +302 4 +1589 7 +305 1 +3199 2 +847 4 +3900 6 +2632 6 +2193 6 +442 7 +3972 1 +3426 4 +1500 3 +1723 5 +2849 1 +2498 4 +3104 4 +3131 5 +1198 2 +1492 10 +2112 6 +1202 2 +2284 10 +1672 10 +3115 3 +2934 4 +990 4 +434 8 +3372 6 +1974 6 +2729 9 +3517 3 +2286 6 +1761 1 +3637 3 +3058 4 +1178 2 +985 4 +3 8 +939 6 +445 3 +1807 9 +2728 7 +1861 5 +2716 5 +3316 3 +2836 5 +174 3 +1190 4 +1061 9 +2375 6 +3599 9 +1048 3 +3021 8 +1421 5 +2090 10 +1289 6 +971 1 +3560 4 +1817 2 +3691 1 +2572 6 +1938 9 +576 6 +3178 3 +3265 6 +3747 6 +1332 1 +2812 9 +3574 5 +2033 7 +1103 4 +2806 4 +2506 5 +686 3 +3917 3 +350 5 +2609 6 +1906 7 +3969 10 +3419 10 +3338 10 +1448 9 +1050 3 +1080 5 +3620 4 +1286 10 +2202 5 +4079 7 +3722 3 +1210 7 +3678 2 +1323 7 +2341 7 +320 2 +3506 7 +649 4 +2993 5 +1165 6 +1384 9 +335 7 +2002 2 +302 7 +1502 1 +4049 9 +2628 3 +259 2 +2500 1 +2022 9 +541 9 +2910 2 +4089 6 +3356 1 +2474 9 +1941 2 +1025 2 +3026 10 +2314 6 +2102 6 +1122 7 +1833 10 +1692 1 +1372 3 +1302 4 +3883 10 +2310 9 +3151 9 +2447 2 +1205 5 +276 2 +2431 8 +611 3 +512 8 +1134 10 +758 2 +2418 6 +276 10 +2592 1 +1655 8 +2181 1 +3243 10 +2191 3 +455 4 +1130 5 +2880 8 +740 1 +635 6 +932 9 +3178 8 +1032 9 +89 6 +414 1 +730 9 +16 1 +3631 9 +1411 6 +2356 5 +2474 5 +3025 4 +3876 8 +2897 7 +957 5 +2621 6 +1568 8 +2610 8 +3253 7 +1169 1 +3292 4 +1035 2 +1417 5 +3613 10 +1063 5 +1779 7 +360 2 +208 3 +1014 7 +894 8 +2599 7 +4076 3 +3329 6 +2497 10 +1110 5 +803 8 +3322 10 +3100 7 +1921 8 +3077 2 +1052 7 +2808 5 +3802 9 +2708 9 +3412 1 +690 9 +2266 3 +112 3 +765 4 +3276 3 +3823 5 +181 9 +457 1 +299 6 +934 5 +3422 7 +3718 4 +1793 6 +3672 8 +2858 2 +3801 3 +1693 8 +3711 4 +2917 1 +291 6 +3209 1 +334 10 +3287 6 +626 5 +915 3 +2886 6 +236 3 +1390 10 +2523 8 +1386 10 +3340 2 +4047 7 +303 8 +230 2 +2390 8 +1983 5 +2897 2 +3922 3 +954 3 +3004 4 +3912 10 +393 1 +1768 3 +2783 2 +1522 6 +4055 8 +3429 6 +3884 2 +25 6 +3606 3 +3813 7 +2176 9 +2774 10 +2829 1 +2858 7 +3722 8 +1468 6 +1208 5 +3466 7 +446 2 +1824 4 +4056 8 +1036 5 +985 4 +2979 3 +3919 6 +479 3 +3896 5 +128 3 +2928 9 +1208 1 +1356 10 +928 10 +787 5 +3418 6 +421 8 +1985 3 +2218 3 +3452 1 +2255 3 +405 7 +3265 4 +2763 4 +641 10 +3202 1 +3754 8 +1949 3 +3120 10 +2017 9 +1932 9 +2302 9 +2060 9 +773 5 +3294 1 +2044 2 +2277 10 +3755 10 +3620 6 +69 6 +2237 4 +3696 3 +2141 7 +1698 7 +2629 7 +2951 1 +1211 8 +3830 3 +1858 3 +2153 10 +2512 9 +3088 10 +3996 3 +423 8 +584 7 +383 10 +2355 1 +2140 5 +954 4 +99 4 +1575 4 +2552 2 +405 4 +1175 10 +1124 10 +3839 8 +1711 6 +3475 8 +1104 5 +2724 4 +1185 4 +1081 9 +2892 8 +1177 10 +2260 8 +1362 1 +1979 3 +2161 4 +3940 7 +694 3 +254 1 +966 6 +3083 5 +920 6 +3555 6 +1233 6 +947 6 +3804 6 +1611 2 +951 1 +3524 10 +94 4 +3332 5 +3542 10 +152 7 +289 1 +539 9 +566 10 +3745 8 +2949 10 +2114 8 +2206 1 +364 5 +3081 4 +2286 9 +3450 1 +2703 10 +5 7 +1851 3 +2618 6 +1958 1 +550 3 +2220 3 +375 7 +3322 10 +3901 10 +2296 4 +732 1 +3721 8 +3064 1 +3315 5 +2066 10 +2566 7 +593 10 +36 10 +1177 2 +2225 9 +1485 8 +392 6 +3144 7 +2170 5 +2052 5 +1235 7 +801 2 +3439 1 +2565 9 +3646 7 +893 3 +1991 3 +2220 1 +1540 5 +1493 4 +3384 5 +1115 4 +488 6 +568 8 +1240 3 +4030 2 +3376 2 +3660 1 +2790 5 +3528 8 +1131 10 +1932 7 +2690 10 +3852 7 +2833 3 +785 1 +705 10 +2183 9 +3411 6 +2966 6 +2765 1 +3756 1 +199 4 +817 3 +3221 1 +1154 9 +1610 9 +1224 6 +3511 6 +3245 5 +75 3 +1353 8 +2848 7 +2353 4 +268 10 +374 8 +2591 9 +2501 8 +953 5 +2335 3 +1304 1 +407 1 +1556 9 +2965 3 +1263 7 +2258 4 +138 3 +1237 5 +1719 6 +1272 6 +1867 6 +3052 9 +2829 10 +515 10 +1874 9 +1699 8 +3351 2 +1303 10 +2853 9 +866 6 +3533 1 +895 2 +2287 9 +1954 9 +3352 9 +3760 2 +1026 9 +2074 6 +1529 4 +868 2 +3551 9 +3603 8 +1589 3 +2230 3 +1141 7 +3914 8 +3396 3 +1997 6 +898 10 +3176 8 +3063 7 +2957 5 +194 10 +2959 2 +1616 9 +686 1 +921 9 +2578 10 +3986 4 +2293 3 +2529 6 +722 7 +1783 3 +594 1 +2188 7 +1317 6 +992 1 +2754 3 +3113 7 +205 4 +3815 5 +3076 8 +1205 9 +1703 4 +3901 4 +1627 8 +2490 6 +524 4 +4031 10 +3070 1 +4004 9 +652 8 +891 8 +765 2 +248 9 +836 4 +2567 7 +1083 8 +1743 7 +3716 7 +2978 9 +2097 6 +3205 10 +310 4 +907 4 +2378 6 +85 3 +1268 1 +1250 4 +1745 4 +3608 4 +948 6 +3799 2 +552 4 +2391 9 +758 7 +2703 6 +2951 6 +2674 5 +3839 2 +1778 4 +3064 8 +2392 7 +1312 9 +798 6 +391 5 +3602 3 +1346 7 +2819 7 +3549 2 +476 8 +1661 5 +2335 8 +963 5 +3882 4 +2778 6 +521 9 +353 4 +1534 2 +3229 1 +2011 3 +3422 8 +757 9 +2851 1 +180 10 +584 10 +3797 4 +2092 8 +237 10 +2797 7 +3207 10 +3546 9 +1225 9 +282 3 +1545 2 +2111 7 +3439 1 +2231 5 +1814 3 +36 1 +1513 4 +1803 10 +2642 3 +2749 4 +3608 7 +2702 4 +1331 8 +3867 6 +883 3 +2695 6 +3879 1 +2200 10 +1720 4 +2801 5 +1463 1 +250 2 +3074 8 +1938 8 +115 3 +1161 5 +835 10 +962 7 +2543 10 +1828 7 +1488 7 +3860 1 +1497 2 +413 1 +3003 7 +3593 9 +3711 7 +1680 2 +2586 7 +3164 4 +1227 1 +2124 9 +2302 10 +541 7 +1123 7 +1261 10 +2938 9 +3420 3 +1604 3 +3772 10 +3921 10 +3518 4 +194 3 +456 2 +3212 4 +3898 5 +1158 7 +186 3 +449 3 +620 7 +330 8 +3579 1 +1214 2 +1598 2 +160 2 +3430 4 +2579 5 +2321 6 +3585 7 +1710 5 +4037 2 +3234 6 +3245 5 +3139 2 +2571 4 +536 9 +358 3 +378 8 +383 8 +1575 5 +432 5 +2731 1 +2298 2 +2600 1 +1525 5 +2324 9 +2883 4 +473 4 +934 3 +641 7 +3351 7 +1225 4 +1535 3 +2448 7 +3853 3 +1055 1 +2545 5 +3337 2 +1247 1 +2846 4 +681 2 +1495 2 +3803 4 +1023 7 +2533 8 +338 10 +3061 5 +2127 9 +1459 6 +99 7 +3569 7 +1724 8 +2816 4 +351 7 +2074 7 +193 5 +3012 7 +2078 6 +3269 10 +2182 1 +3485 10 +685 8 +2592 5 +2970 10 +170 1 +1314 7 +1342 7 +3914 8 +761 1 +3823 3 +2388 1 +3280 10 +2773 6 +3930 3 +1338 3 +895 8 +1576 3 +1445 8 +221 8 +415 6 +2915 1 +3712 2 +2374 6 +146 2 +333 10 +1369 1 +2909 10 +1699 4 +2560 8 +982 4 +716 3 +3109 4 +2823 5 +1810 2 +2582 8 +3314 3 +1875 4 +3040 1 +3229 7 +2454 6 +2690 4 +2880 4 +203 2 +3240 9 +639 6 +3636 10 +4025 5 +3986 3 +3159 8 +2873 1 +1798 1 +3724 2 +1942 6 +3947 2 +1767 8 +2916 3 +1358 8 +3242 4 +1710 2 +3440 9 +2958 4 +427 3 +1003 1 +2351 7 +2339 10 +3991 2 +3758 1 +3229 3 +2572 5 +297 4 +1987 4 +1033 4 +2941 10 +1582 1 +1775 7 +1510 1 +1216 8 +2154 6 +2178 5 +2009 10 +1887 8 +1090 10 +1213 9 +867 5 +1604 4 +3968 3 +2542 1 +156 1 +2056 6 +2008 4 +1882 1 +3508 5 +3603 10 +195 7 +226 7 +1070 8 +1523 3 +3067 10 +2665 6 +639 3 +3369 6 +3750 7 +1326 10 +3019 2 +261 2 +3191 7 +1692 9 +1403 1 +3822 8 +3 7 +1215 5 +2335 4 +52 3 +2325 8 +1872 2 +3000 4 +399 9 +962 4 +3591 5 +3366 9 +1774 8 +2512 10 +2805 7 +1001 4 +3962 8 +318 9 +2789 7 +3299 4 +1140 10 +1234 9 +1301 10 +2402 2 +2978 4 +494 1 +2857 9 +2856 8 +1970 1 +3511 8 +2335 6 +907 5 +730 5 +2194 2 +1785 10 +134 10 +4045 9 +872 5 +2925 5 +353 10 +3690 9 +3147 9 +2525 9 +1087 6 +2143 9 +1301 8 +76 9 +412 7 +2266 6 +2772 1 +1253 5 +2786 2 +186 10 +354 2 +3073 6 +1807 6 +2720 10 +496 5 +1936 9 +4044 9 +333 4 +3080 3 +2030 5 +831 8 +1824 10 +16 3 +3371 3 +3971 6 +1671 8 +183 10 +716 9 +144 3 +824 3 +1499 2 +288 7 +379 9 +2076 3 +1418 10 +3787 1 +549 2 +1904 1 +939 4 +1841 3 +2637 1 +1448 7 +420 1 +382 6 +2592 1 +2591 2 +1298 2 +2238 9 +3599 1 +2705 9 +3938 7 +2700 10 +2881 10 +3331 7 +1130 2 +3909 10 +2516 6 +1695 2 +196 7 +3700 1 +2510 7 +1838 8 +3886 8 +4041 7 +3904 4 +3272 8 +426 3 +3851 9 +1539 1 +2457 2 +2890 10 +968 9 +13 6 +613 3 +282 7 +1110 2 +2559 7 +1913 5 +153 5 +515 2 +2026 6 +2985 8 +144 3 +3929 4 +121 10 +478 6 +713 4 +1204 3 +2721 9 +171 7 +659 5 +3872 5 +719 4 +1651 4 +2765 3 +1370 10 +191 7 +3359 4 +3869 4 +900 6 +2104 2 +0 1 +3350 1 +2890 9 +305 7 +1997 7 +2885 8 +2138 9 +3940 2 +3704 8 +1379 9 +908 3 +1249 9 +1286 1 +1632 1 +2438 4 +2000 5 +1237 3 +1797 6 +3789 8 +111 4 +668 1 +3502 2 +1338 5 +1617 7 +4067 4 +2203 1 +2924 7 +2009 2 +1903 4 +3918 6 +1857 3 +1062 8 +2212 10 +1847 4 +671 8 +3686 1 +1788 1 +1578 10 +3501 1 +2554 1 +164 2 +2886 10 +1318 10 +570 5 +2649 10 +2581 4 +1103 4 +3748 7 +2504 5 +123 4 +3156 5 +1737 10 +1594 3 +667 3 +3426 7 +2117 10 +1626 9 +73 3 +933 7 +3979 9 +2199 3 +1885 2 +2168 2 +1276 8 +2458 7 +591 3 +1502 3 +2454 1 +471 2 +2918 1 +3221 4 +1135 8 +3922 4 +239 6 +723 6 +3865 8 +1145 2 +2640 7 +812 1 +3977 6 +2318 3 +558 8 +1479 7 +1248 9 +377 1 +2986 9 +3356 2 +2964 9 +2967 6 +3212 9 +2455 6 +3254 1 +933 1 +3326 3 +464 10 +4003 4 +3246 5 +2883 6 +3568 7 +3578 7 +3045 4 +1516 4 +3131 3 +2759 2 +2942 6 +1211 3 +3201 1 +333 1 +2319 6 +2033 10 +3489 6 +704 5 +1242 7 +1327 1 +1596 6 +19 3 +552 6 +4031 7 +4060 9 +3793 7 +978 3 +3817 1 +2194 4 +3677 6 +1684 10 +227 7 +2985 8 +2105 6 +3677 2 +1486 2 +3993 5 +1698 5 +3903 9 +2048 4 +568 10 +2101 2 +1272 4 +1358 10 +3457 10 +1460 7 +3763 3 +1066 8 +2459 2 +1117 5 +712 6 +4018 1 +425 8 +3698 5 +3277 2 +2648 8 +226 7 +1201 7 +158 8 +503 2 +1517 10 +803 9 +1582 4 +637 9 +3550 9 +2803 7 +2130 1 +2199 6 +1682 10 +3393 8 +352 8 +1107 2 +1994 8 +1894 7 +3787 2 +2311 9 +2262 3 +3517 1 +3867 9 +1868 2 +856 10 +4029 8 +2769 5 +253 1 +832 4 +3702 2 +468 9 +1984 8 +2524 1 +767 4 +2701 3 +4086 4 +660 4 +171 7 +221 7 +218 7 +2965 2 +3286 8 +1200 10 +3434 4 +1969 8 +1625 4 +501 10 +1701 9 +2440 7 +3201 1 +1870 8 +3934 4 +3252 9 +2169 2 +3959 5 +2629 4 +2557 10 +557 9 +817 3 +745 9 +3575 10 +651 10 +2591 5 +2432 1 +1689 9 +1173 7 +1743 5 +1163 3 +1320 3 +79 2 +1370 2 +2077 8 +3964 9 +1021 1 +1484 9 +1551 6 +3956 3 +2222 7 +3843 5 +347 10 +841 5 +3810 1 +3443 9 +9 2 +2063 9 +17 2 +3938 3 +1839 4 +1233 2 +2308 1 +2941 10 +1598 2 +1287 1 +79 3 +1377 4 +3143 7 +3366 2 +2660 2 +2225 10 +2731 3 +1070 5 +3581 7 +135 10 +1704 7 +1314 1 +1309 5 +3273 8 +3207 5 +4037 7 +1014 2 +3825 6 +2835 7 +3363 4 +3895 6 +2554 8 +1121 9 +3166 5 +211 6 +3249 2 +1744 10 +3165 10 +1191 2 +1054 10 +3828 10 +3761 3 +2625 4 +765 8 +3157 8 +3575 2 +3196 8 +577 10 +1460 7 +1447 5 +2756 6 +1208 7 +1100 5 +857 4 +1808 7 +4018 9 +2670 6 +3445 10 +3564 10 +3854 8 +3476 10 +2565 6 +1536 3 +3436 9 +2842 7 +423 3 +2912 7 +271 7 +1992 6 +883 5 +1287 6 +3277 1 +3560 9 +2265 3 +900 8 +2009 7 +2644 3 +1288 5 +2997 2 +329 2 +2344 4 +1900 5 +532 9 +2821 2 +3858 7 +1644 7 +2114 8 +848 10 +2820 6 +126 9 +2006 6 +2471 10 +3260 2 +2660 5 +3745 6 +1105 9 +3301 3 +2592 6 +3313 1 +1388 3 +2828 10 +138 2 +3765 6 +1882 3 +1552 10 +2130 8 +2421 1 +9 8 +1084 1 +1615 8 +2619 7 +1831 6 +3940 9 +1596 9 +1127 9 +1718 1 +814 5 +2365 9 +1654 4 +279 10 +3360 9 +4025 6 +224 1 +2529 2 +2277 10 +286 3 +1781 3 +2429 3 +98 1 +1214 4 +3920 2 +2300 6 +1818 10 +2490 1 +2674 10 +2767 4 +3042 9 +3007 1 +3082 6 +1264 6 +738 3 +2078 6 +3111 6 +10 3 +2939 3 +2420 9 +3298 6 +472 10 +3383 6 +3041 1 +557 2 +2520 5 +3695 4 +1487 10 +3723 3 +317 10 +3442 10 +574 8 +2151 7 +3178 1 +1727 9 +62 2 +3282 10 +564 6 +1492 6 +948 3 +3745 3 +3866 6 +749 6 +1150 2 +691 4 +1023 4 +2960 4 +173 2 +1170 5 +3284 7 +808 6 +4088 1 +2320 6 +1876 1 +1171 9 +3202 5 +1899 9 +3775 8 +1090 6 +881 1 +2486 9 +1102 10 +3164 2 +3261 7 +1200 3 +3738 10 +1250 3 +3947 10 +1409 3 +2226 5 +2599 8 +1847 3 +14 1 +587 8 +2833 8 +3511 8 +2348 5 +3814 3 +734 6 +1469 3 +3521 6 +1980 7 +2075 3 +675 3 +2818 3 +3436 7 +1169 10 +3998 8 +3268 4 +373 6 +2090 1 +1641 5 +1220 9 +502 3 +1103 1 +1907 6 +956 4 +112 10 +1229 6 +3587 4 +3008 10 +3458 5 +1991 3 +3781 9 +1335 1 +244 7 +1054 3 +1566 1 +1325 5 +3153 3 +4069 10 +318 1 +3883 6 +2088 9 +446 1 +2397 2 +2999 5 +3668 9 +2764 8 +1962 4 +2531 3 +348 4 +2445 2 +1730 2 +4070 9 +3439 9 +1430 7 +2819 7 +2225 4 +553 2 +2429 9 +1763 2 +503 4 +2692 7 +2899 1 +67 10 +3106 5 +2896 9 +66 2 +2111 3 +4061 7 +1103 8 +3469 3 +612 4 +1922 7 +300 8 +3712 10 +1386 9 +3626 4 +2924 3 +3897 4 +1149 5 +2435 6 +480 7 +2695 6 +1772 5 +1140 8 +2930 8 +2593 3 +252 10 +574 7 +648 2 +321 10 +3347 1 +391 7 +2755 6 +2834 8 +3028 8 +1339 10 +943 2 +3139 3 +193 7 +3133 6 +932 8 +135 3 +1621 4 +1837 6 +409 1 +3328 9 +3938 4 +2062 9 +3383 5 +3584 9 +1646 9 +3074 3 +1945 1 +734 10 +2558 5 +2644 4 +2850 1 +12 2 +1033 7 +1612 8 +3555 2 +3520 4 +1176 4 +254 3 +3906 6 +2661 10 +489 8 +3828 10 +2171 6 +2611 10 +1502 7 +622 5 +1913 4 +2033 6 +187 7 +1307 8 +3612 7 +2677 3 +1813 6 +3700 2 +99 1 +3300 6 +3535 7 +931 1 +3119 5 +3010 4 +78 6 +2386 7 +3633 5 +1706 7 +647 9 +669 9 +3026 4 +2259 3 +3716 3 +1337 5 +1813 8 +580 4 +3163 10 +2789 5 +3971 2 +1466 8 +1120 2 +2384 7 +4079 8 +1253 4 +428 5 +2713 10 +3247 3 +1031 1 +1398 6 +2114 6 +3392 3 +308 4 +1008 3 +655 7 +3483 8 +3 1 +418 8 +2071 4 +1238 2 +168 4 +642 4 +1031 6 +915 5 +742 4 +2101 2 +1627 9 +1593 4 +419 5 +1763 9 +77 6 +3463 6 +2740 4 +4000 2 +1192 5 +1959 2 +3150 8 +528 8 +1751 7 +1639 2 +2363 7 +277 8 +3790 7 +688 8 +1667 8 +1870 1 +3122 5 +795 7 +2150 9 +61 7 +44 4 +1213 7 +2420 3 +1355 4 +430 7 +1115 3 +3473 3 +2992 2 +2787 9 +789 6 +79 8 +786 6 +305 4 +1054 6 +2507 3 +197 7 +1296 1 +3126 2 +3274 1 +2143 3 +45 1 +4077 8 +1909 7 +304 4 +2444 5 +2457 3 +1388 5 +4003 3 +3304 7 +1671 9 +2554 6 +3080 2 +1004 8 +1610 9 +475 9 +597 9 +1984 3 +2096 9 +3046 4 +2725 6 +1141 10 +3287 10 +2049 10 +296 4 +439 2 +1424 10 +1080 9 +2884 5 +2767 1 +1619 3 +645 3 +2259 6 +866 5 +3559 10 +2414 9 +127 4 +1380 1 +1180 5 +2342 9 +467 2 +1500 7 +616 7 +2511 7 +1714 9 +828 3 +3509 4 +1002 2 +1340 7 +1886 10 +1203 3 +2010 2 +3289 10 +3963 4 +992 2 +1669 6 +1405 1 +3342 2 +1421 7 +2761 7 +459 4 +2007 5 +2400 5 +1601 4 +3057 2 +1086 9 +3956 2 +786 7 +1401 4 +1036 2 +1095 1 +258 10 +864 10 +402 6 +3880 7 +3268 2 +3968 6 +177 9 +450 1 +3520 9 +272 1 +474 4 +2195 8 +3049 4 +14 8 +1920 10 +964 7 +2409 10 +454 5 +716 1 +60 2 +280 10 +4013 10 +1825 2 +1639 10 +2149 10 +3730 4 +1631 4 +1600 1 +707 2 +765 3 +924 6 +2011 3 +3304 3 +3267 10 +1280 10 +2467 3 +3621 7 +2970 10 +3119 8 +834 1 +504 7 +2209 5 +1593 8 +2914 1 +2123 6 +2951 8 +3075 4 +3518 6 +2102 6 +1899 9 +2574 9 +4077 3 +1850 7 +3734 4 +2330 1 +2680 3 +1216 2 +3915 4 +3361 5 +358 3 +1317 8 +794 9 +1513 2 +2065 4 +3161 2 +893 1 +4062 7 +2286 7 +245 2 +4088 9 +3214 4 +4020 1 +1723 5 +1462 2 +2652 6 +2549 3 +144 8 +2646 2 +685 4 +3242 3 +2633 5 +2625 5 +2366 5 +2019 3 +3369 5 +1350 7 +4 3 +2019 10 +663 9 +2373 1 +160 10 +185 4 +215 1 +1706 3 +2565 1 +1158 1 +78 10 +2433 5 +1543 4 +1704 8 +3098 8 +832 7 +61 7 +433 7 +705 2 +837 3 +1622 3 +1025 1 +4074 2 +1897 6 +3598 2 +2113 2 +3735 5 +1622 10 +3517 5 +3540 1 +3656 6 +1388 8 +1985 7 +2284 2 +1937 1 +2800 5 +151 10 +3823 7 +2937 10 +3100 1 +2566 4 +1157 4 +1848 6 +3122 3 +2065 10 +2890 1 +869 5 +2450 1 +634 7 +661 8 +726 3 +3599 1 +1099 3 +2725 1 +1513 1 +1176 1 +3474 9 +3643 5 +627 1 +2773 4 +2173 4 +544 10 +2950 5 +1047 1 +2535 3 +2821 10 +3929 10 +3770 6 +477 1 +765 5 +3666 9 +1929 7 +715 10 +1941 4 +1299 9 +1912 7 +375 6 +1481 9 +774 1 +1516 5 +577 3 +1373 2 +2822 6 +3694 10 +3338 2 +1915 2 +2461 2 +673 7 +3165 6 +2635 5 +1900 5 +1264 7 +1580 5 +1310 8 +2815 1 +2053 2 +2750 7 +1522 5 +1601 5 +953 10 +3764 3 +4033 4 +3763 1 +3167 6 +630 10 +232 10 +3228 7 +3190 7 +1512 8 +274 4 +1299 5 +377 5 +1327 8 +860 5 +1489 3 +13 7 +1350 10 +3046 4 +3254 3 +1946 8 +2996 1 +395 7 +3068 6 +58 5 +2429 9 +1987 9 +2124 4 +2714 3 +3312 2 +153 7 +2558 3 +3051 3 +223 8 +2167 1 +2974 7 +3793 10 +918 6 +479 6 +3151 3 +2875 1 +3343 8 +132 4 +2995 1 +3006 9 +180 10 +3996 4 +3742 3 +3899 10 +3751 6 +2976 3 +1914 9 +183 2 +3004 5 +579 3 +766 7 +3381 7 +2072 9 +1223 8 +1063 1 +3020 5 +3778 4 +4055 2 +1371 4 +3756 4 +588 3 +328 3 +147 3 +2082 10 +1860 10 +3077 8 +2936 10 +3445 9 +2795 7 +3513 5 +2763 7 +73 2 +1480 7 +1475 5 +966 7 +2178 7 +4075 8 +3541 5 +3507 3 +2097 4 +1313 2 +2648 10 +3037 3 +668 3 +3828 3 +1366 9 +899 5 +1948 10 +1540 3 +2020 1 +1136 4 +3771 3 +3581 3 +1604 9 +3648 9 +3838 9 +3980 1 +100 5 +3022 9 +2117 3 +1617 2 +1856 4 +8 4 +4057 6 +2708 6 +3392 1 +764 3 +3595 5 +2560 3 +3670 2 +456 6 +542 3 +2333 3 +1134 7 +3643 3 +2835 6 +1091 2 +1616 2 +1525 2 +2960 5 +1424 1 +762 10 +2380 1 +1932 3 +377 3 +703 2 +2384 3 +1916 7 +429 7 +1986 10 +1064 4 +3871 3 +947 10 +1510 7 +1722 5 +3972 6 +442 7 +2630 4 +3923 9 +701 4 +878 2 +2700 1 +609 10 +2911 4 +2702 4 +925 9 +2769 9 +268 6 +113 8 +923 8 +1044 2 +1163 6 +3896 8 +1770 6 +343 6 +785 8 +102 7 +3757 6 +2902 3 +2140 2 +2897 1 +1369 7 +853 4 +3715 3 +3842 10 +2289 2 +3955 8 +1795 5 +2428 6 +212 1 +348 5 +368 3 +2240 3 +956 4 +3489 8 +1081 3 +1098 7 +2015 5 +147 8 +4028 2 +1067 8 +187 9 +1350 6 +1201 1 +2986 1 +2236 10 +722 3 +1902 6 +2518 1 +11 1 +407 7 +718 3 +3125 6 +1605 9 +1577 2 +1349 5 +899 7 +3277 4 +188 6 +2315 9 +2535 8 +2148 8 +2422 9 +93 10 +3583 2 +147 8 +507 7 +1484 5 +2812 6 +1520 6 +1901 10 +475 8 +1402 4 +1454 3 +2988 10 +2328 10 +2863 5 +1956 5 +1655 8 +988 3 +421 3 +3287 9 +3223 7 +2255 3 +1825 5 +2010 6 +2240 7 +2655 1 +38 4 +968 10 +3451 10 +759 1 +1362 7 +421 5 +1943 8 +1099 3 +1756 10 +513 7 +3683 10 +2108 9 +1000 7 +1072 3 +2710 6 +3839 1 +3884 9 +2408 8 +3533 10 +2453 7 +1253 3 +130 5 +280 7 +3464 3 +1994 6 +105 6 +3473 2 +1407 1 +3019 9 +1820 7 +3278 9 +16 8 +81 1 +1135 10 +2509 7 +2685 4 +1252 3 +585 1 +526 8 +1689 4 +3582 9 +350 7 +2432 4 +683 2 +437 6 +2594 4 +1520 5 +4041 9 +612 9 +2342 1 +2657 7 +1893 3 +528 1 +657 3 +1296 9 +4046 9 +1828 4 +2444 3 +1655 7 +175 9 +648 6 +1541 5 +2987 10 +944 1 +3777 2 +691 1 +1904 6 +1786 8 +1663 6 +1423 7 +597 7 +480 3 +2398 2 +417 10 +2610 9 +3464 7 +593 6 +2428 6 +2220 10 +317 6 +1135 7 +2762 3 +1943 4 +1736 3 +975 1 +14 6 +3681 6 +633 6 +2505 4 +3971 5 +2618 10 +3902 10 +618 2 +3249 9 +495 4 +4030 3 +86 7 +3327 2 +28 6 +94 4 +1717 5 +783 8 +2521 10 +4018 8 +2156 5 +1331 10 +958 2 +3362 3 +3351 2 +381 7 +114 1 +1805 7 +1903 2 +2663 9 +2542 3 +283 1 +3931 7 +1115 3 +563 3 +2584 8 +1400 6 +3584 5 +2605 10 +3338 8 +4029 5 +1157 1 +1828 3 +1982 2 +2276 6 +1531 4 +626 6 +181 7 +3734 5 +140 1 +2835 1 +3805 7 +3094 8 +2553 10 +1948 1 +69 1 +732 5 +786 2 +2152 4 +3992 10 +2884 9 +611 8 +2053 1 +3132 1 +159 6 +3376 4 +3846 2 +2703 1 +2660 4 +583 8 +3563 3 +3421 5 +2081 8 +1372 8 +3802 7 +3927 2 +1332 1 +401 10 +3164 10 +640 6 +665 6 +3261 4 +1292 4 +2037 10 +297 8 +607 7 +2218 8 +3101 1 +298 5 +709 3 +472 3 +1995 1 +1475 7 +3289 2 +1152 10 +188 1 +2554 3 +2655 8 +388 5 +386 3 +3997 5 +933 9 +2941 1 +4047 3 +85 8 +3850 5 +1757 3 +3920 3 +1611 4 +1817 6 +2138 1 +2944 4 +244 3 +3902 4 +93 8 +1614 9 +1851 3 +621 9 +3211 9 +503 4 +3034 3 +2328 6 +4021 9 +1839 6 +221 8 +908 9 +2417 7 +819 7 +590 8 +1940 1 +1652 1 +3750 3 +191 3 +2247 8 +167 3 +1034 5 +34 9 +295 5 +1149 7 +1762 6 +1853 1 +2450 5 +1682 3 +369 7 +3726 1 +613 4 +3931 5 +2214 3 +303 7 +1091 2 +642 5 +1675 8 +743 4 +1176 5 +2579 1 +2473 5 +3862 3 +3672 1 +1129 8 +1191 6 +3790 3 +2537 10 +1950 3 +2653 6 +3653 3 +1212 4 +1082 10 +147 5 +1468 5 +730 6 +2640 1 +335 7 +2568 8 +2719 1 +689 3 +686 3 +2145 6 +50 8 +2911 8 +3260 5 +3244 5 +3703 1 +577 8 +2192 6 +1459 7 +759 10 +2185 10 +895 6 +3981 4 +1420 3 +2161 5 +2529 7 +2943 10 +778 3 +828 10 +4087 7 +2416 8 +692 2 +3985 6 +395 6 +3628 10 +3951 7 +3089 8 +2571 2 +2867 2 +982 5 +1022 1 +442 4 +2390 2 +3345 1 +308 2 +3818 7 +3433 6 +3896 1 +694 6 +3157 2 +2557 7 +2151 9 +2786 1 +751 8 +371 7 +4051 2 +1717 5 +439 4 +2833 3 +3278 8 +1070 4 +459 2 +2349 3 +46 7 +588 4 +539 3 +3371 6 +1310 8 +2531 5 +2075 1 +2766 2 +3242 8 +3066 4 +2900 10 +3021 3 +7 6 +3311 6 +2171 8 +3750 1 +1550 1 +756 1 +1849 1 +2649 6 +1134 10 +2693 2 +52 3 +2004 3 +1782 10 +3076 2 +1586 7 +3650 9 +1705 5 +3287 9 +2025 8 +1077 9 +2233 3 +1816 2 +1850 7 +273 1 +3458 9 +2606 6 +83 2 +2657 10 +2486 4 +4052 7 +2874 10 +520 4 +2485 6 +2587 7 +3806 7 +4024 4 +3391 10 +2760 6 +3009 2 +144 3 +1414 5 +3565 8 +3128 9 +3192 7 +3333 8 +318 1 +3937 7 +2027 2 +951 10 +2610 9 +1260 10 +3343 9 +3218 6 +3079 9 +1587 3 +1032 5 +658 8 +868 10 +1085 10 +749 5 +4028 6 +1029 3 +3979 10 +4001 9 +1181 8 +1281 6 +320 5 +68 4 +4085 2 +1857 3 +3240 1 +1193 2 +525 5 +3535 7 +2438 6 +1771 9 +2812 1 +3815 8 +144 2 +366 6 +1847 2 +1434 3 +3170 6 +76 1 +3522 7 +3703 1 +2016 10 +1516 1 +1804 2 +1727 8 +2682 6 +2672 5 +687 1 +442 2 +314 4 +2553 3 +2489 5 +1319 9 +2001 2 +2297 1 +935 8 +3378 6 +2472 4 +1358 4 +1640 4 +1958 10 +1719 7 +32 9 +3620 10 +2455 9 +1186 8 +3283 8 +1937 2 +2787 3 +2208 6 +1680 8 +3348 5 +886 5 +213 10 +3651 6 +2328 5 +3140 1 +783 1 +3679 2 +486 3 +3997 6 +2420 2 +3116 7 +2596 6 +651 1 +594 8 +1197 5 +1954 5 +2844 1 +2550 5 +311 2 +3818 4 +3099 7 +4072 10 +4085 9 +1618 9 +2572 6 +3031 8 +43 10 +224 9 +1515 2 +1248 2 +3187 6 +2950 8 +3835 5 +2238 4 +4030 2 +2980 10 +2152 8 +1105 5 +1238 5 +3564 10 +1892 3 +1019 10 +1351 8 +2964 9 +2191 9 +4058 7 +1366 7 +1843 10 +2136 7 +210 2 +1870 3 +2307 8 +1405 4 +2098 2 +420 3 +3166 9 +3400 1 +2946 9 +1210 6 +850 6 +906 3 +256 10 +2817 7 +2319 10 +1367 8 +717 5 +149 4 +1035 4 +964 3 +1747 2 +3494 1 +3187 1 +1071 8 +2716 1 +319 4 +3392 7 +851 2 +1355 4 +1907 2 +385 8 +958 1 +933 2 +4004 7 +306 9 +637 4 +2620 10 +131 8 +3138 7 +3146 5 +3665 10 +342 1 +2361 7 +3332 7 +1153 4 +1208 3 +3453 5 +2285 2 +1692 9 +1565 9 +3932 5 +1129 2 +1282 4 +1323 7 +194 6 +1363 1 +1288 3 +4087 2 +3244 2 +1408 4 +3278 9 +3000 8 +84 3 +3788 6 +3777 9 +352 6 +3082 2 +3815 8 +401 3 +278 7 +1410 6 +736 4 +1051 2 +2683 8 +2580 3 +2862 5 +769 5 +3626 5 +4006 7 +618 5 +1977 1 +771 7 +4026 8 +3212 4 +1323 5 +2699 8 +3683 7 +4081 10 +4042 10 +2115 5 +2221 1 +1006 6 +3965 7 +1466 5 +2782 7 +3883 10 +465 3 +3280 1 +1152 10 +557 9 +1061 10 +1181 1 +1449 3 +2154 9 +3221 10 +108 6 +1115 10 +2308 6 +287 8 +1775 5 +1918 5 +978 6 +1054 9 +848 9 +1469 8 +729 4 +2086 4 +2710 6 +3468 2 +2673 3 +676 9 +3198 4 +985 1 +3647 8 +1025 7 +1461 5 +3741 7 +3780 10 +2885 8 +37 9 +3114 1 +1704 4 +2775 3 +3515 7 +2277 9 +2176 1 +3196 7 +3231 10 +2693 4 +2855 8 +1774 1 +3426 6 +1097 9 +2088 8 +178 1 +405 4 +3199 9 +2633 8 +1241 1 +3782 8 +3637 2 +2732 9 +1295 10 +2952 8 +585 2 +1605 6 +1753 6 +3015 5 +184 5 +872 3 +4030 4 +1418 5 +2318 7 +3813 7 +2012 6 +574 7 +2574 10 +2898 2 +1492 3 +3735 7 +2882 9 +1446 10 +1671 10 +1146 3 +2947 5 +3150 8 +2796 4 +872 8 +3915 10 +2135 5 +1806 4 +3748 9 +3824 10 +3866 4 +2236 9 +3597 7 +3421 7 +608 9 +227 10 +1735 10 +674 10 +2621 8 +2742 8 +2056 7 +3717 3 +2211 10 +2546 2 +2210 6 +2756 5 +268 3 +2698 2 +276 4 +424 4 +3217 3 +221 2 +660 7 +1626 4 +2158 10 +324 7 +609 3 +3056 1 +2207 8 +1253 7 +1224 3 +2636 9 +3560 6 +3137 7 +3178 5 +3879 3 +2797 5 +2394 9 +550 8 +3610 2 +384 7 +1668 7 +3456 4 +1876 5 +1874 1 +1244 8 +3161 6 +1389 6 +1097 7 +743 3 +3599 9 +2129 2 +3620 7 +858 3 +3993 4 +1686 6 +2561 3 +3456 7 +656 6 +3245 1 +734 5 +849 1 +897 2 +2861 1 +1711 8 +1549 3 +1081 3 +1208 6 +4063 3 +66 8 +2047 7 +2031 6 +704 3 +1293 5 +2441 8 +2816 9 +3667 10 +709 10 +1702 8 +3080 9 +389 7 +1949 1 +3887 7 +1712 2 +3273 10 +3876 2 +1662 6 +117 10 +923 6 +193 2 +3301 10 +3128 6 +2148 5 +2265 6 +3990 7 +2615 3 +3310 7 +2775 5 +3110 10 +2657 9 +1001 7 +1065 1 +1793 7 +533 9 +2986 6 +2089 9 +26 3 +2384 5 +3568 8 +969 2 +3062 3 +2330 6 +661 8 +956 7 +1684 3 +448 4 +2293 1 +2192 8 +3401 1 +1961 2 +869 3 +132 9 +902 9 +3533 6 +2493 5 +2162 2 +1644 7 +2240 1 +2152 6 +1617 9 +1023 5 +1934 10 +3861 3 +2532 1 +2440 2 +3584 6 +1317 1 +1939 8 +931 7 +125 4 +2073 4 +1806 3 +2377 2 +2070 3 +532 2 +741 10 +1092 6 +2350 1 +455 5 +2687 1 +2664 4 +3497 4 +1812 6 +1456 1 +394 8 +3347 3 +937 10 +3880 2 +1317 9 +3140 4 +300 8 +397 1 +2059 5 +2476 9 +1608 1 +3288 5 +2640 2 +1757 3 +2641 6 +2603 5 +2545 2 +3159 9 +3387 7 +3987 8 +1645 5 +2049 1 +2995 1 +1532 1 +2478 3 +2599 7 +3035 2 +768 6 +525 2 +3308 10 +246 9 +1723 5 +2727 9 +518 9 +1222 5 +677 2 +1196 2 +1824 3 +3310 4 +1129 5 +2665 2 +2004 6 +862 2 +1190 2 +2075 5 +2657 9 +2618 7 +3337 7 +3113 9 +1970 4 +1988 1 +863 8 +2625 10 +147 9 +1395 6 +2187 2 +1039 5 +1843 6 +1805 10 +1913 1 +2793 9 +2420 3 +1987 7 +1233 8 +3491 4 +3761 6 +2967 2 +443 3 +1502 6 +1586 10 +99 9 +2373 7 +3045 2 +945 7 +1145 2 +658 8 +682 2 +2717 2 +3663 1 +3178 1 +1558 10 +3148 3 +1159 2 +968 8 +3862 8 +2476 6 +63 9 +142 7 +2412 3 +2505 3 +4079 7 +3113 9 +1160 8 +1234 5 +2604 6 +3123 4 +366 3 +2954 1 +2298 9 +3526 7 +3071 2 +1579 2 +3108 10 +341 10 +3385 8 +3201 2 +4024 3 +3989 1 +2840 3 +803 10 +1698 8 +1100 10 +2982 9 +1657 7 +3584 8 +3626 3 +1983 1 +1765 10 +3843 4 +3101 10 +2972 8 +1692 9 +1874 4 +188 2 +1425 10 +2366 2 +3314 1 +2063 9 +2354 1 +2565 4 +1190 10 +3072 7 +945 6 +2670 10 +102 3 +3070 7 +1750 5 +506 8 +3060 2 +3108 9 +40 10 +1995 4 +2963 2 +217 6 +1585 5 +661 5 +769 4 +3476 9 +1583 7 +128 4 +1154 1 +3485 3 +276 2 +2850 4 +4026 9 +1551 8 +3113 3 +1887 5 +1895 7 +653 4 +960 4 +4060 9 +2873 8 +1374 3 +2762 2 +2336 5 +2954 4 +3048 7 +3791 9 +2818 3 +2544 9 +2364 6 +1081 8 +1369 3 +2397 9 +3635 8 +3219 2 +1811 4 +1532 2 +2492 4 +229 9 +1725 8 +1608 8 +257 2 +486 9 +2756 5 +212 8 +3191 9 +1855 4 +3752 8 +2958 4 +1134 3 +3533 3 +1951 10 +2131 4 +2610 9 +2919 4 +3949 2 +3292 2 +1456 1 +2276 4 +3196 5 +3501 8 +1020 10 +1175 5 +3252 3 +3757 5 +2920 4 +1755 9 +410 3 +605 7 +1207 8 +2536 5 +3803 4 +972 9 +259 6 +2712 3 +1886 6 +1610 10 +3107 5 +100 10 +1551 1 +2627 8 +876 6 +979 1 +3767 7 +1682 4 +3011 10 +1346 3 +4060 2 +749 2 +985 4 +1607 7 +2158 9 +219 10 +1320 10 +989 8 +2288 9 +4002 9 +3639 3 +2251 6 +108 8 +3571 5 +1871 4 +1798 8 +3303 9 +830 10 +204 5 +2710 4 +690 1 +871 2 +3513 7 +1718 6 +1493 8 +2766 2 +2847 7 +3304 4 +1122 5 +4016 9 +3035 3 +3626 7 +1202 3 +2422 2 +2267 9 +2837 2 +1253 10 +2135 4 +2592 7 +895 1 +497 7 +258 8 +2515 9 +3309 7 +1945 10 +279 7 +807 7 +750 4 +2745 7 +3154 2 +1091 1 +55 6 +3749 2 +2469 8 +1771 1 +434 8 +935 8 +3013 3 +241 10 +343 3 +3839 7 +2967 4 +2877 9 +729 1 +2844 2 +1627 1 +1805 6 +355 3 +3715 3 +3513 3 +294 4 +3911 8 +1748 6 +3890 10 +1027 1 +3646 7 +1210 8 +3549 3 +882 4 +2439 8 +3578 7 +606 3 +3881 6 +2532 6 +1396 8 +3425 10 +778 8 +3003 10 +1838 10 +1596 5 +416 8 +2314 4 +2755 9 +2133 6 +3384 1 +3039 10 +2575 8 +93 7 +134 10 +2137 3 +1431 2 +1299 6 +1745 8 +943 5 +496 2 +394 1 +0 8 +693 5 +3931 3 +3976 10 +3829 10 +3181 7 +1338 5 +3057 10 +2894 9 +2043 1 +3121 10 +2248 10 +1188 8 +265 8 +3422 3 +3565 4 +649 9 +2980 1 +2923 4 +3570 8 +357 3 +442 4 +1470 4 +2726 10 +4003 8 +1331 5 +3786 7 +2368 3 +3113 8 +902 3 +426 8 +1570 10 +1944 2 +4049 7 +3548 1 +728 5 +1047 7 +3482 10 +2645 2 +928 9 +1986 1 +209 3 +2623 2 +3860 4 +1380 8 +4026 7 +3918 5 +1051 10 +3944 3 +3250 2 +694 10 +402 6 +1707 7 +4037 10 +1283 5 +1261 1 +104 10 +2859 1 +1262 7 +3877 8 +466 8 +122 1 +3346 9 +3570 8 +1921 8 +3987 3 +1670 10 +2598 2 +3718 10 +2091 5 +3745 9 +1009 5 +2823 3 +2506 3 +2945 4 +1941 1 +2372 4 +833 7 +2509 4 +3358 1 +401 7 +3688 8 +3441 4 +306 9 +3991 7 +1636 5 +789 9 +3662 6 +728 2 +3376 3 +2619 7 +3994 8 +3485 1 +1844 4 +2819 3 +1027 9 +1267 7 +2068 4 +1659 6 +1878 4 +3620 8 +778 3 +3801 8 +1354 1 +1967 5 +3829 7 +1123 5 +3990 9 +3199 3 +2923 3 +1366 1 +3516 10 +1228 4 +1367 4 +3435 7 +1213 4 +564 7 +3668 7 +1730 5 +2317 6 +1688 4 +1647 1 +3429 6 +1080 4 +721 1 +1795 8 +3204 9 +3529 8 +581 3 +1833 6 +2435 2 +3641 4 +3085 9 +1569 6 +2799 6 +1389 7 +418 7 +3103 6 +2438 6 +3126 5 +501 9 +2675 9 +750 1 +504 3 +372 10 +1741 4 +3746 6 +4075 10 +2654 8 +622 10 +633 5 +2107 10 +869 3 +66 3 +1724 8 +2734 7 +3801 2 +414 8 +2164 1 +2812 2 +396 9 +2526 7 +3088 1 +277 4 +3455 4 +2535 8 +3039 6 +2670 3 +762 3 +2842 4 +3746 1 +1691 4 +429 4 +3319 2 +192 3 +3180 4 +3633 10 +1232 10 +2420 2 +622 8 +1721 1 +3665 8 +2476 1 +2432 5 +2419 7 +1778 6 +2852 8 +3101 5 +948 2 +1896 6 +311 7 +3321 8 +1686 3 +3126 2 +2589 5 +3920 6 +3499 3 +404 2 +1581 6 +3045 4 +3363 4 +481 5 +3439 8 +2868 8 +2306 2 +1331 1 +1352 4 +797 3 +2136 7 +2222 5 +1796 10 +1541 3 +144 7 +3522 1 +3608 5 +3480 2 +423 9 +1621 1 +3896 3 +614 8 +610 10 +975 6 +287 5 +1651 6 +4087 7 +1979 10 +1406 3 +2786 2 +2682 8 +901 6 +2356 1 +1623 9 +1311 10 +3431 6 +873 7 +2216 4 +3918 1 +3801 4 +2766 8 +4005 10 +767 1 +1933 2 +1532 3 +79 5 +2101 3 +2366 8 +412 6 +2925 9 +3375 3 +1773 4 +4009 8 +3572 8 +1512 5 +1188 2 +3942 7 +3366 6 +1544 6 +3548 10 +340 1 +678 6 +3557 6 +922 6 +3996 5 +1672 7 +1910 5 +1099 2 +3570 9 +4029 2 +3950 6 +1599 10 +1841 9 +3785 5 +3981 6 +3063 9 +986 9 +347 10 +1832 5 +2273 1 +2509 8 +2470 5 +2067 10 +719 6 +1269 8 +2941 1 +4031 7 +3032 3 +2822 5 +916 7 +1781 4 +2107 1 +3950 2 +2227 7 +3153 4 +610 5 +913 3 +403 6 +340 7 +3573 10 +1325 9 +881 7 +3903 4 +799 9 +1249 8 +2114 3 +3648 1 +4076 4 +3782 10 +68 6 +3936 9 +2202 9 +3932 4 +1467 4 +2978 5 +476 4 +222 9 +2747 1 +1227 9 +1823 8 +2387 10 +1440 4 +2887 10 +943 4 +875 5 +1401 1 +1615 10 +3520 2 +2384 10 +2884 8 +3669 5 +2387 9 +164 6 +172 3 +2510 2 +2926 9 +3235 6 +1881 5 +1950 7 +3728 6 +1128 1 +417 6 +836 6 +149 7 +1300 6 +3946 8 +86 10 +3291 8 +1233 3 +3856 1 +3118 1 +1761 1 +430 5 +938 6 +297 4 +1548 1 +2995 4 +1048 3 +3783 5 +3499 7 +3868 2 +2272 10 +4007 10 +3906 10 +309 3 +1660 10 +2925 3 +2792 7 +773 4 +3786 1 +3468 5 +2748 1 +1680 6 +978 7 +815 5 +1632 6 +291 9 +3937 1 +1277 1 +4071 2 +3781 5 +1858 3 +399 6 +1108 8 +3145 6 +2173 9 +3652 6 +1588 4 +1241 7 +2724 5 +2344 6 +279 2 +2602 8 +588 9 +3281 5 +742 2 +3824 3 +2506 9 +60 4 +2815 4 +3679 1 +2121 7 +755 9 +3033 1 +1025 3 +1265 10 +1513 2 +1802 3 +2800 9 +1695 1 +229 10 +466 1 +126 8 +4027 5 +943 7 +4066 8 +2329 5 +3925 2 +3970 6 +553 4 +3589 3 +1504 10 +939 2 +829 8 +3608 4 +3197 9 +1613 4 +2219 3 +2744 10 +296 7 +3970 6 +3902 5 +1915 2 +3423 4 +3305 9 +3303 9 +1819 5 +3765 3 +509 6 +1146 9 +2902 6 +4035 4 +950 9 +1946 7 +3092 3 +397 3 +2952 4 +870 7 +3611 6 +2213 10 +2894 3 +540 8 +1944 3 +1879 8 +2040 4 +1552 10 +2498 2 +823 4 +452 8 +3351 1 +3025 7 +3241 5 +2244 7 +3168 4 +2072 6 +195 5 +880 6 +1257 7 +3455 2 +504 7 +1848 2 +2660 4 +2317 8 +1884 3 +225 4 +1809 10 +552 5 +1112 5 +340 8 +3021 2 +3084 3 +2140 6 +519 8 +1879 2 +2878 5 +1785 10 +1589 2 +1259 2 +3609 5 +2048 10 +2345 10 +670 8 +3944 6 +1773 9 +1612 7 +4076 4 +2856 9 +332 9 +2127 8 +1091 2 +3606 6 +751 3 +4036 9 +3866 9 +1326 3 +1120 9 +3361 8 +417 6 +1075 2 +1459 6 +1269 5 +3602 5 +2276 1 +678 3 +3846 9 +206 3 +1592 5 +1677 4 +2752 4 +2158 1 +2350 6 +2931 8 +2294 1 +1215 1 +363 3 +1423 3 +1526 10 +199 1 +3893 7 +3443 1 +2004 1 +1796 3 +292 9 +3030 5 +1002 7 +1657 4 +717 4 +1567 3 +663 8 +4037 10 +1253 9 +2510 4 +1699 4 +2198 6 +202 8 +777 10 +3846 3 +2196 5 +2910 2 +2246 9 +3640 9 +1491 5 +1503 10 +1670 4 +344 7 +3988 9 +1347 2 +502 10 +2808 3 +3885 5 +2786 2 +267 3 +3512 3 +3211 10 +491 9 +2175 7 +2833 3 +3513 6 +3403 9 +973 10 +1560 3 +734 4 +533 2 +1839 1 +1926 4 +2975 1 +2156 3 +3377 2 +2299 4 +666 3 +3981 2 +2857 6 +627 6 +34 7 +3789 7 +2067 3 +751 6 +2819 9 +1311 9 +1113 5 +2389 1 +2600 9 +1820 6 +1090 9 +3392 3 +2987 2 +2031 2 +2522 2 +4004 1 +151 2 +3816 3 +2188 6 +2184 7 +540 2 +2076 6 +3861 10 +3289 2 +1024 6 +2344 1 +1880 4 +1704 3 +395 2 +3616 3 +3136 4 +2388 5 +3016 3 +3086 8 +2745 3 +2143 7 +1009 9 +3566 9 +155 8 +330 4 +3616 3 +2777 5 +34 7 +3824 2 +58 3 +1069 9 +1959 6 +1326 10 +121 1 +39 2 +708 8 +433 3 +2002 3 +1537 9 +459 1 +2062 1 +2212 1 +1689 2 +301 8 +785 9 +1777 4 +2689 4 +2614 5 +3668 7 +3096 8 +433 3 +3618 1 +902 10 +760 3 +1181 10 +570 1 +3705 6 +2119 1 +2040 7 +75 9 +945 8 +1652 2 +261 4 +1925 5 +400 1 +1630 4 +3873 6 +3964 3 +3633 10 +2434 6 +3058 9 +437 2 +1939 4 +1577 1 +585 5 +3775 1 +3825 3 +3629 7 +98 3 +593 10 +2123 9 +2668 9 +1845 8 +440 6 +3140 4 +1397 8 +2796 6 +1974 10 +2409 7 +1383 6 +3167 9 +3146 2 +3175 1 +2007 2 +4083 2 +782 9 +2423 7 +41 5 +2687 9 +1083 5 +2213 6 +1865 10 +1077 2 +770 4 +3067 1 +2747 3 +3136 5 +2861 7 +2093 4 +3547 4 +3509 10 +3388 2 +3252 6 +2245 10 +2690 1 +915 7 +2760 2 +2304 10 +1416 3 +1226 10 +2056 7 +371 4 +1700 4 +1080 3 +722 8 +1133 4 +1915 7 +22 8 +368 2 +1223 5 +513 3 +216 5 +923 10 +4081 6 +1186 7 +2072 10 +335 2 +3573 6 +1543 8 +1825 3 +110 10 +2327 1 +1010 6 +1954 4 +3420 2 +1862 4 +3075 10 +2937 9 +3747 3 +322 2 +2944 9 +3751 6 +2462 10 +3596 9 +686 8 +2853 1 +2072 1 +2941 9 +513 10 +3508 4 +419 3 +1327 2 +1594 10 +3150 7 +3013 1 +3214 3 +2671 5 +3782 10 +3802 1 +3958 10 +3795 6 +1522 6 +1401 4 +220 6 +2269 10 +3654 3 +755 1 +1803 6 +3780 2 +194 4 +4057 1 +2433 1 +856 7 +3131 5 +3963 6 +1949 6 +1643 7 +3594 6 +342 10 +3132 8 +1849 3 +2588 5 +3774 9 +186 9 +2446 4 +162 3 +1681 8 +320 1 +473 5 +1648 8 +809 5 +1421 6 +1656 7 +2678 4 +3269 2 +2563 7 +669 4 +921 2 +3819 10 +1546 6 +2286 9 +381 3 +3492 5 +1230 3 +195 4 +3236 9 +631 6 +1848 8 +2904 2 +3668 2 +1794 8 +3286 1 +3144 4 +1830 7 +1039 8 +3926 6 +3408 5 +605 1 +3806 10 +2356 3 +2266 1 +1520 5 +702 8 +380 3 +122 7 +1726 4 +1139 4 +3062 3 +2496 7 +3760 10 +211 6 +2970 4 +1211 3 +2315 9 +2739 6 +1137 9 +1725 6 +3946 4 +3446 10 +1218 10 +3736 5 +3246 1 +3816 7 +3051 6 +340 3 +3934 8 +2177 8 +963 4 +1978 9 +1076 5 +3329 7 +2824 6 +900 7 +1077 7 +591 5 +809 5 +1175 2 +598 2 +3882 2 +1753 1 +3796 1 +2958 7 +2551 5 +2574 7 +2240 1 +578 1 +2462 10 +2082 9 +4043 2 +489 4 +2008 7 +3176 4 +2675 9 +3178 2 +1655 7 +3293 3 +433 6 +3353 2 +2230 7 +179 5 +2290 5 +69 9 +2822 4 +908 1 +1488 3 +103 1 +1803 10 +3633 5 +1447 5 +1165 2 +414 5 +3311 5 +1882 8 +3396 10 +2937 9 +1823 2 +1895 2 +3746 9 +1409 3 +677 4 +266 6 +2961 3 +3229 2 +284 10 +510 5 +1385 4 +1105 9 +1481 10 +2218 7 +3113 9 +1185 10 +481 4 +3427 4 +859 5 +3885 8 +2238 5 +1933 10 +3188 7 +3824 2 +3712 2 +3336 7 +1127 8 +3648 5 +2894 9 +1370 4 +2276 2 +2952 6 +3528 10 +3977 6 +3714 3 +255 3 +1946 4 +2867 10 +978 8 +3391 1 +3137 6 +3584 6 +3170 1 +1441 6 +3988 2 +68 1 +2842 1 +2574 5 +525 10 +2742 6 +873 7 +3436 9 +836 2 +1320 4 +298 4 +3559 1 +3008 2 +2519 5 +649 2 +3098 6 +1217 9 +430 4 +508 3 +3641 8 +2941 8 +1172 7 +3938 8 +987 10 +2640 7 +2175 7 +1589 1 +3858 6 +1799 7 +2386 5 +2921 5 +229 9 +1875 8 +3662 5 +3382 5 +1457 8 +2667 1 +1020 4 +1529 8 +2273 3 +3537 9 +2486 3 +3058 8 +3500 4 +3907 2 +4023 8 +2301 5 +875 10 +853 4 +1284 10 +1577 7 +568 2 +3351 9 +3747 8 +1624 8 +3734 1 +1924 2 +453 5 +2140 10 +2486 6 +886 4 +1088 4 +1911 8 +1722 3 +260 6 +1655 1 +1627 10 +575 4 +2477 5 +3718 5 +1236 2 +1886 10 +608 1 +2025 10 +442 8 +664 3 +3810 7 +802 6 +1433 1 +1700 8 +1823 7 +3167 3 +679 6 +2025 9 +3808 7 +1765 9 +2703 1 +2508 6 +1762 5 +1219 4 +2483 10 +3182 7 +3739 2 +1473 6 +1270 1 +3942 2 +3869 10 +650 9 +713 1 +2696 6 +2817 7 +2214 9 +3339 8 +3379 2 +444 1 +837 9 +3325 6 +3605 7 +133 9 +3903 5 +129 7 +919 9 +67 2 +1519 6 +2093 2 +863 9 +2481 10 +2267 4 +388 1 +4034 5 +2236 2 +2963 8 +2563 10 +2641 10 +1925 7 +435 10 +946 2 +1408 3 +1672 9 +1064 10 +690 3 +1566 6 +3434 1 +2659 9 +3511 9 +157 1 +1768 6 +3980 2 +3126 4 +1763 7 +1494 1 +956 9 +1267 4 +1485 1 +368 10 +3108 5 +1683 9 +2098 5 +2746 6 +612 3 +1994 5 +3867 5 +2411 9 +3485 4 +3200 6 +807 3 +2942 5 +3652 6 +3093 5 +1102 9 +3343 5 +1669 7 +366 3 +2797 6 +1969 3 +3297 4 +2688 10 +3444 6 +1576 8 +2409 4 +19 5 +76 4 +241 8 +126 2 +342 5 +2267 8 +322 3 +1458 3 +771 5 +355 7 +1012 6 +1410 2 +225 4 +625 1 +1537 5 +3643 4 +4017 7 +1681 10 +18 7 +988 9 +531 6 +3340 1 +3715 5 +552 4 +481 5 +2289 9 +2799 2 +1854 9 +3959 4 +3941 7 +697 4 +3044 5 +3879 10 +823 2 +482 7 +766 5 +1611 2 +1186 1 +1063 5 +3696 4 +3997 4 +1121 2 +1532 4 +3565 2 +3844 8 +3642 2 +2298 8 +3612 4 +3319 6 +2730 3 +1361 9 +2790 3 +2653 10 +3237 4 +2719 2 +88 5 +894 1 +4048 3 +645 4 +2641 7 +970 9 +3808 3 +3216 3 +343 1 +2582 9 +3595 5 +2230 10 +2953 10 +2343 8 +2333 5 +2659 3 +3320 10 +2310 9 +3659 1 +2166 6 +1147 7 +3420 6 +3912 1 +2932 6 +4095 5 +815 3 +671 10 +1709 10 +437 3 +2612 7 +948 10 +582 8 +600 3 +2057 10 +1943 1 +3193 6 +1005 5 +2603 2 +1975 6 +1551 7 +861 9 +805 4 +2556 8 +2980 8 +1150 9 +2859 8 +3236 10 +2504 7 +3151 2 +2432 10 +1337 8 +3581 5 +2099 6 +2249 1 +2755 9 +3959 9 +2478 4 +1950 2 +696 9 +783 8 +3474 10 +1250 4 +1640 4 +406 8 +1045 2 +2403 10 +465 1 +2555 10 +867 6 +932 5 +782 8 +991 1 +3450 4 +2163 7 +4014 5 +2548 10 +2088 9 +2206 8 +2695 2 +2360 8 +3681 2 +1849 7 +2659 4 +688 9 +375 8 +1702 10 +110 1 +2464 1 +3988 5 +1309 4 +316 7 +3777 2 +304 6 +3448 1 +3484 3 +414 2 +2171 3 +2190 5 +1234 6 +85 5 +4036 8 +2928 8 +832 9 +800 10 +799 9 +598 9 +3154 3 +3829 10 +2183 9 +303 6 +2100 3 +3751 2 +1404 2 +2872 3 +3529 10 +3178 3 +3184 5 +2229 4 +2452 1 +4064 3 +2624 4 +1858 5 +4038 9 +2116 3 +3140 5 +1762 2 +1278 7 +3472 5 +3779 9 +3487 8 +1745 1 +904 3 +1487 9 +1532 8 +1159 2 +2898 1 +1408 10 +2516 10 +2320 1 +3764 3 +2506 7 +1887 2 +1457 6 +2111 3 +1434 8 +328 9 +302 7 +3819 6 +1137 3 +2846 9 +1432 1 +3129 8 +2929 5 +1912 5 +1461 10 +3630 5 +620 3 +3217 5 +3176 10 +2691 5 +923 9 +130 6 +3075 8 +3104 2 +634 9 +1953 5 +840 10 +788 9 +2142 7 +788 10 +3641 10 +2398 10 +106 2 +2817 9 +2196 2 +1266 10 +4091 1 +2069 1 +751 6 +3077 5 +2497 6 +1919 8 +2524 6 +547 10 +3896 2 +3216 6 +2263 1 +74 8 +3736 4 +2958 7 +221 9 +2353 1 +3987 7 +3894 2 +3556 3 +1661 3 +1270 4 +3749 6 +3599 1 +2712 6 +1776 8 +1370 1 +1757 9 +3157 4 +2404 10 +779 4 +3029 2 +3154 8 +1503 2 +1166 8 +1657 9 +1727 9 +2278 2 +575 7 +3046 5 +2276 1 +763 3 +3781 5 +1355 6 +2091 4 +3323 9 +904 9 +2388 8 +261 6 +1099 2 +827 10 +1204 4 +728 5 +717 5 +1425 1 +1017 5 +3516 9 +1395 3 +1883 1 +3193 8 +1838 1 +1226 10 +1646 6 +2328 1 +2603 6 +32 5 +2660 6 +3992 7 +977 5 +3369 2 +211 1 +1526 5 +3302 10 +3332 3 +1422 3 +3467 5 +252 5 +4001 10 +3832 3 +647 5 +1311 8 +2676 7 +777 3 +1459 8 +3346 1 +3498 3 +4042 10 +2097 1 +928 8 +1523 6 +1179 8 +229 3 +111 3 +3898 5 +1932 9 +1413 1 +2283 7 +3192 3 +3533 7 +3581 1 +3549 4 +425 7 +2740 2 +2600 2 +4006 3 +1513 8 +4017 5 +3449 6 +3751 5 +3518 3 +771 2 +2254 5 +3596 4 +1826 9 +1265 4 +658 4 +1015 2 +3840 2 +186 3 +1904 4 +988 6 +2508 4 +3309 9 +1553 6 +1894 7 +4064 10 +2256 1 +1399 6 +3120 9 +3891 3 +2364 1 +3351 2 +3364 9 +3724 8 +3279 7 +809 10 +3446 1 +1057 3 +3114 9 +3952 5 +2817 3 +312 3 +437 10 +1690 10 +2620 2 +2785 7 +3914 2 +654 8 +2473 5 +570 10 +1857 8 +2927 3 +3633 8 +2586 10 +1979 9 +3221 10 +185 8 +3094 3 +10 3 +335 7 +3610 7 +3820 9 +3210 9 +788 9 +224 4 +2623 5 +2714 6 +1288 6 +597 7 +1995 9 +1699 7 +2072 6 +3344 6 +2649 5 +1779 6 +324 1 +1018 1 +2155 7 +869 7 +1636 2 +3612 5 +2360 5 +1043 10 +2716 10 +1962 7 +1923 8 +2994 10 +1160 3 +138 10 +1379 4 +942 1 +2718 5 +3565 5 +1245 5 +641 6 +1953 2 +1186 4 +126 4 +3651 1 +741 2 +4026 7 +1044 1 +3329 6 +335 3 +757 9 +1959 1 +970 10 +1374 6 +1372 3 +2080 6 +3134 5 +2353 9 +3 9 +2327 3 +3715 9 +2304 7 +3320 2 +3035 2 +954 6 +3934 10 +2073 2 +2233 2 +799 10 +1736 1 +3663 9 +985 4 +233 5 +3515 2 +993 6 +2173 6 +3041 6 +2718 7 +3604 5 +1238 3 +2604 4 +3032 8 +3675 8 +905 7 +3644 2 +1388 7 +3322 1 +3798 1 +3338 4 +1194 7 +2614 3 +1600 8 +2937 8 +1452 10 +893 4 +4077 9 +2633 10 +3024 2 +45 4 +2351 1 +44 7 +248 10 +2566 1 +2282 8 +2721 6 +489 9 +2994 10 +3121 7 +3316 1 +2512 2 +2221 7 +510 1 +3000 3 +2551 5 +3512 4 +3770 5 +472 6 +1555 1 +1540 8 +474 2 +3574 1 +3385 9 +1272 3 +3225 3 +1225 8 +748 4 +1122 1 +376 4 +1160 3 +1260 2 +2478 6 +3236 4 +1873 9 +2811 2 +2034 1 +2712 3 +3957 7 +1364 4 +1303 1 +3264 5 +224 10 +714 7 +2487 8 +2272 6 +1067 9 +1252 3 +242 4 +3523 5 +3954 5 +2360 7 +1939 8 +3576 5 +1035 2 +509 3 +1477 6 +3307 8 +3731 6 +2372 7 +736 5 +1469 1 +3459 1 +3949 7 +2502 5 +2273 7 +1007 8 +2756 7 +1486 1 +2849 4 +976 1 +2354 3 +348 5 +3211 4 +3659 9 +3949 2 +1305 10 +779 9 +1559 4 +1827 8 +3133 6 +534 2 +2501 5 +1378 6 +2656 10 +4060 10 +758 9 +2607 8 +535 8 +3398 6 +1572 6 +4092 2 +2856 2 +317 10 +1333 3 +2024 4 +2912 7 +1334 10 +2471 3 +1186 7 +1027 2 +1139 1 +3857 1 +118 6 +15 9 +309 5 +2691 10 +1855 7 +3243 9 +3972 5 +170 5 +783 6 +1648 10 +2250 1 +3008 10 +452 10 +119 2 +2175 8 +2432 2 +3369 9 +340 5 +1207 2 +521 3 +3438 10 +2126 8 +3333 3 +3293 7 +610 3 +1504 9 +3487 5 +3962 8 +834 7 +3231 1 +3834 5 +2801 7 +2515 3 +3627 4 +2839 4 +1796 10 +657 7 +9 7 +1298 1 +2113 1 +2261 4 +1215 4 +570 2 +2509 3 +499 8 +1675 2 +1159 8 +1633 6 +2181 5 +180 5 +496 3 +3674 9 +1866 10 +2576 4 +2640 10 +2986 7 +741 7 +34 2 +4050 10 +624 7 +2524 2 +3795 9 +1544 8 +3232 1 +2544 7 +3116 8 +1180 9 +3784 9 +2335 1 +2941 7 +2329 8 +637 5 +1408 5 +974 6 +747 10 +2873 9 +2754 10 +1682 2 +1962 3 +3132 7 +3578 5 +566 6 +1152 6 +2729 4 +3160 9 +1700 4 +1789 5 +2309 4 +1773 4 +371 2 +3821 6 +3587 7 +2523 3 +993 9 +2604 5 +3284 7 +3117 2 +3249 6 +1839 5 +1228 6 +1835 8 +3598 2 +1284 7 +3343 1 +659 6 +2633 4 +1227 8 +2996 9 +1224 3 +634 7 +3985 4 +262 1 +2655 9 +581 4 +3039 10 +2723 1 +1957 1 +2528 2 +244 5 +137 3 +4075 3 +3436 2 +4087 7 +3641 10 +2620 2 +3511 3 +464 5 +1857 4 +749 4 +2694 7 +3515 2 +3285 4 +2205 5 +1417 8 +1834 10 +3335 9 +2735 2 +2596 5 +4057 6 +3832 2 +3595 10 +126 8 +2982 1 +2578 1 +1442 6 +3415 5 +2849 8 +2145 9 +3870 3 +3082 1 +3210 10 +3737 1 +1449 7 +1304 3 +2853 7 +329 1 +1904 3 +3690 2 +3711 2 +2568 9 +846 4 +1446 2 +106 3 +3568 9 +1030 5 +2394 10 +773 1 +3241 8 +73 8 +2778 6 +119 2 +2873 4 +158 8 +2655 4 +2269 10 +573 1 +3776 8 +435 6 +1733 8 +2862 7 +3845 2 +3189 8 +3969 1 +3108 1 +1215 2 +511 2 +2863 5 +3713 2 +3127 4 +3081 6 +1419 10 +120 10 +2843 6 +3079 7 +382 7 +2755 8 +2196 9 +363 5 +3175 7 +3447 9 +4021 1 +2999 3 +2210 4 +245 8 +3486 6 +196 6 +4055 7 +4083 5 +727 7 +623 6 +1936 1 +590 3 +690 10 +1327 1 +2046 8 +521 9 +3709 8 +1357 8 +3306 4 +2909 9 +808 9 +2466 10 +2968 7 +792 3 +1565 10 +1199 3 +977 7 +2759 7 +2836 5 +1631 9 +844 7 +1675 9 +1770 6 +1131 5 +3687 1 +2869 7 +3020 9 +2215 10 +3912 10 +3568 5 +472 3 +3801 2 +2739 1 +179 5 +127 4 +1912 5 +198 3 +2111 5 +1162 7 +425 9 +731 1 +1410 10 +3101 9 +1103 5 +1485 7 +1555 8 +2825 9 +3312 8 +1881 1 +3349 1 +3721 2 +1049 5 +2363 8 +3727 2 +2794 10 +2658 5 +3487 1 +2979 10 +2119 10 +1466 3 +3963 2 +3181 1 +1866 8 +1646 3 +2777 2 +2483 8 +3825 9 +633 3 +3489 8 +2970 7 +1956 4 +3246 3 +298 5 +79 3 +2958 10 +1600 6 +3610 7 +1230 3 +2683 6 +2687 5 +4054 2 +699 7 +3400 10 +1956 4 +1907 4 +3961 3 +3709 4 +3893 8 +2588 8 +632 7 +3859 6 +1001 3 +1108 10 +3667 8 +1009 3 +3586 9 +3187 10 +1790 8 +2542 3 +352 6 +2829 2 +758 9 +3788 8 +1950 8 +1995 2 +3562 4 +1812 3 +3072 4 +973 7 +98 6 +2162 10 +2251 7 +1984 8 +1871 8 +2085 10 +3638 3 +2192 3 +718 2 +3932 2 +2416 7 +121 9 +1394 3 +1053 4 +3505 5 +1671 9 +3121 8 +1205 3 +2068 1 +628 6 +704 10 +515 6 +798 9 +3251 1 +374 8 +2594 8 +3858 4 +2619 5 +2191 7 +1986 10 +322 6 +2839 10 +2546 6 +1236 9 +1752 8 +3056 5 +373 9 +2983 8 +2264 6 +2325 8 +2959 3 +3631 7 +1979 3 +3088 5 +3082 2 +2863 2 +2681 8 +3473 7 +816 8 +85 10 +955 7 +591 9 +3790 6 +1168 3 +2321 9 +1923 2 +2731 3 +2146 8 +2847 1 +2206 9 +1113 2 +3631 7 +2177 7 +2281 3 +2262 5 +3129 2 +2149 4 +524 7 +2552 7 +290 1 +37 7 +1938 10 +1799 9 +4080 5 +783 5 +282 8 +68 9 +2637 2 +2539 9 +213 1 +475 2 +208 7 +421 9 +1530 6 +2418 5 +3953 6 +3985 1 +3000 10 +77 5 +149 3 +2218 4 +1826 1 +2212 2 +461 8 +4087 7 +2039 5 +1590 3 +577 8 +1191 5 +2466 4 +3361 5 +527 10 +3358 1 +2079 4 +2798 5 +3990 2 +2835 6 +2139 10 +2979 7 +2117 8 +3185 3 +642 6 +188 4 +654 2 +2128 1 +3288 4 +3134 10 +51 6 +3496 9 +2883 10 +1077 7 +3069 6 +1204 10 +1396 2 +2541 1 +2317 7 +4090 7 +3539 5 +3235 4 +1110 3 +1790 6 +1968 10 +1076 7 +2311 2 +3495 9 +835 1 +585 5 +2294 5 +2840 3 +1028 4 +652 7 +1619 6 +3608 10 +281 2 +637 4 +1123 8 +1155 6 +2604 6 +2203 3 +2420 7 +3215 7 +399 1 +1 9 +1436 6 +691 8 +550 6 +2629 1 +539 7 +1001 7 +3453 6 +2965 4 +2098 5 +1789 5 +3621 6 +3958 4 +1681 3 +759 2 +1172 3 +1126 7 +3477 3 +370 10 +1318 9 +98 6 +2313 2 +1291 6 +993 4 +3593 3 +128 6 +778 10 +1473 7 +615 2 +260 3 +3651 1 +3334 8 +4042 5 +657 9 +3542 7 +2233 7 +1956 4 +2133 8 +2782 4 +3889 5 +99 1 +2578 3 +451 10 +2484 9 +1947 4 +2212 2 +2284 6 +1371 2 +3921 5 +2002 3 +114 5 +4084 1 +346 10 +4070 2 +2330 10 +2200 10 +94 4 +3099 4 +1497 7 +1740 9 +80 3 +839 6 +2305 7 +928 7 +1369 3 +2532 8 +995 9 +1568 1 +1773 3 +378 4 +2271 5 +761 9 +519 7 +3151 2 +268 4 +3857 4 +71 5 +2831 10 +2903 3 +3173 4 +3630 6 +2258 5 +1272 7 +475 1 +407 2 +1433 2 +2624 8 +1492 10 +4013 6 +2006 5 +44 9 +3647 9 +3104 1 +3251 9 +4090 1 +4053 9 +2748 6 +553 4 +2964 8 +3234 4 +2097 4 +2762 10 +3947 7 +2941 3 +3343 9 +1872 1 +3647 2 +139 7 +175 4 +1573 1 +2708 3 +2525 4 +727 4 +1281 9 +2165 6 +3119 6 +131 5 +2162 7 +2469 3 +1384 6 +1382 6 +3262 10 +2898 2 +1168 10 +320 7 +1772 4 +473 3 +3529 8 +2740 2 +3866 10 +1730 9 +1447 8 +2700 1 +1340 1 +1161 4 +1811 4 +3582 5 +98 6 +3185 1 +1405 9 +3288 4 +1797 9 +360 7 +3764 6 +1722 4 +3924 4 +2621 9 +1187 6 +1487 10 +2761 9 +541 8 +2024 6 +192 9 +3758 6 +3311 9 +2768 8 +3336 7 +386 10 +1103 5 +2229 1 +519 2 +1819 4 +2215 8 +2053 1 +1345 4 +3518 1 +1189 7 +3789 8 +1794 9 +1995 3 +2693 9 +838 10 +1363 3 +773 9 +2361 8 +1417 3 +54 1 +2915 1 +3216 8 +3374 7 +1153 7 +564 9 +3772 6 +3009 4 +920 7 +677 10 +979 3 +2910 3 +1048 2 +3011 1 +2728 9 +2689 5 +1947 9 +3480 3 +875 6 +2501 6 +403 1 +622 6 +1937 7 +1144 1 +1928 2 +3868 5 +860 1 +2372 10 +2503 8 +1345 10 +3113 10 +3953 7 +1961 4 +812 5 +3080 1 +2311 7 +3193 7 +904 7 +3556 6 +2952 4 +739 8 +217 3 +2240 4 +489 6 +646 7 +2897 2 +4053 4 +973 3 +1981 7 +1990 4 +566 1 +3001 2 +3480 7 +2082 1 +2792 4 +3419 5 +3024 8 +1277 3 +1510 9 +2498 1 +3858 4 +1157 1 +1254 2 +161 4 +438 5 +3650 4 +3831 5 +4020 3 +1006 6 +2614 3 +1326 6 +1373 1 +3721 3 +1020 1 +3233 9 +1749 10 +3807 5 +84 4 +568 4 +491 1 +841 4 +1034 6 +51 4 +3602 10 +629 9 +3973 8 +1868 9 +1446 6 +2989 9 +744 10 +1532 2 +2925 10 +825 6 +386 3 +2393 4 +4035 6 +768 9 +2040 6 +2832 10 +2975 7 +568 8 +19 4 +3984 4 +34 7 +3284 8 +3156 3 +1019 9 +2933 10 +49 4 +4077 2 +1355 3 +2545 2 +1996 10 +2248 3 +1017 5 +4089 5 +783 1 +1172 3 +40 5 +123 1 +2792 1 +2268 9 +2753 3 +313 2 +948 4 +2304 8 +879 9 +1166 6 +841 6 +3261 10 +2327 2 +3126 7 +2692 10 +3446 6 +1215 4 +3609 8 +3941 5 +1542 1 +955 9 +2203 10 +3357 6 +1738 5 +1091 1 +3621 6 +3578 2 +4064 1 +3219 5 +1585 7 +1567 6 +1242 10 +1678 3 +2076 9 +3229 6 +2482 1 +2001 5 +1968 2 +4086 8 +1474 8 +1595 7 +3949 8 +389 7 +518 7 +3353 5 +1771 5 +176 4 +3143 7 +1062 1 +3723 6 +2526 9 +6 3 +916 3 +945 7 +2457 6 +1225 7 +1501 5 +312 2 +2929 8 +669 7 +1425 6 +2928 5 +3538 6 +1444 9 +3465 8 +3437 2 +167 1 +3190 5 +2577 8 +306 8 +4033 2 +2328 5 +779 5 +1500 7 +2871 2 +1743 4 +2576 9 +1528 9 +1617 3 +2812 3 +2018 9 +3726 10 +1503 8 +3606 9 +3525 6 +484 6 +983 6 +1851 5 +2362 9 +2500 8 +2253 10 +1238 2 +859 8 +3411 2 +2654 10 +2875 10 +3981 6 +296 3 +3343 10 +2490 7 +596 5 +1242 4 +917 3 +685 9 +3037 8 +4062 8 +3358 4 +2020 4 +3051 1 +706 6 +3352 6 +3930 2 +2514 4 +2324 2 +1957 4 +1550 9 +3652 3 +766 6 +3272 9 +2208 8 +2373 7 +1449 1 +4076 3 +3757 6 +2161 2 +1279 7 +2691 5 +3233 8 +238 2 +73 7 +3186 7 +2862 5 +2711 3 +824 2 +4048 8 +3774 6 +3607 8 +1511 8 +4085 7 +1144 6 +2260 4 +35 9 +3432 7 +991 5 +1808 9 +2489 2 +809 5 +3806 8 +1757 7 +834 1 +990 9 +1455 9 +470 10 +563 10 +2445 5 +984 1 +2935 6 +746 4 +1113 5 +3351 5 +1597 7 +231 1 +3145 9 +2295 4 +2004 6 +2916 10 +3419 2 +438 1 +3711 6 +1064 5 +2075 4 +523 5 +261 4 +2574 1 +2443 7 +2812 2 +151 7 +3046 3 +3699 5 +1677 8 +1185 7 +683 6 +3300 10 +2144 7 +2628 8 +491 7 +4084 4 +2199 1 +1684 7 +336 1 +650 3 +4048 10 +64 2 +1623 7 +2228 7 +3790 5 +1977 2 +119 9 +2063 9 +1127 5 +2145 8 +1158 4 +1100 5 +3564 7 +865 2 +580 1 +3794 10 +1621 6 +599 9 +3026 8 +3182 1 +943 4 +3462 4 +3390 1 +1672 1 +454 3 +1599 1 +3866 1 +1925 10 +3973 3 +894 5 +2404 5 +3911 2 +1974 4 +2769 2 +295 1 +2131 4 +297 6 +37 3 +1655 10 +1706 8 +1380 9 +69 9 +1261 1 +452 5 +285 7 +2702 4 +2808 6 +2288 4 +168 4 +2535 4 +1179 8 +31 6 +985 6 +427 5 +1793 1 +3950 2 +4050 6 +473 4 +3749 10 +858 2 +2783 6 +2865 5 +72 2 +317 2 +83 5 +2835 10 +2970 8 +2445 4 +1907 7 +3755 7 +3220 5 +1212 9 +1866 8 +2923 7 +3425 2 +2185 9 +2268 10 +1101 1 +2508 4 +2412 6 +2231 1 +1086 10 +721 8 +536 5 +3132 9 +1583 9 +2922 4 +1733 2 +2003 8 +2151 6 +3964 4 +2653 1 +3929 6 +3772 2 +3549 8 +1585 7 +2414 9 +1398 9 +835 10 +2111 1 +1921 6 +1625 10 +4035 9 +2153 5 +2544 8 +1419 7 +837 1 +3674 10 +374 2 +783 2 +3037 1 +2860 7 +3361 9 +2160 6 +3610 5 +3669 1 +1462 9 +2179 2 +3097 8 +2400 8 +1703 2 +2742 9 +1445 10 +3308 2 +2933 3 +3671 6 +2688 1 +591 7 +2597 7 +2615 1 +341 1 +3323 10 +3673 9 +643 3 +1500 10 +2765 9 +53 3 +973 2 +2733 7 +4044 8 +3912 4 +910 5 +2219 4 +13 4 +59 3 +3989 8 +1989 6 +2264 1 +1981 4 +3312 7 +593 10 +481 2 +3357 1 +3309 4 +75 7 +3573 3 +3416 1 +922 4 +1912 9 +305 6 +1347 2 +240 10 +1340 8 +271 3 +1489 9 +4017 9 +2196 10 +489 9 +2553 9 +3552 3 +2211 4 +1707 2 +2026 6 +150 1 +2019 1 +3302 6 +1103 3 +2928 8 +1932 8 +2849 9 +3964 3 +3316 2 +827 3 +2539 6 +3906 8 +3010 2 +2978 7 +2238 10 +3688 1 +2970 4 +10 5 +3763 1 +3845 8 +1236 8 +3027 1 +1103 3 +2121 3 +1697 1 +3316 1 +3389 6 +3338 5 +3791 5 +3895 6 +1110 1 +3670 1 +53 9 +3283 1 +487 10 +1793 10 +1809 8 +1611 2 +201 8 +1001 1 +356 1 +2754 3 +771 5 +3793 10 +72 5 +2873 9 +4020 5 +2492 5 +2004 8 +760 10 +3015 9 +3595 2 +1 9 +3636 8 +369 4 +1022 5 +2738 1 +1189 3 +1904 7 +2150 3 +518 1 +2067 6 +1944 6 +1358 4 +2897 4 +3545 2 +220 8 +1115 1 +1379 3 +1382 5 +3269 6 +3510 8 +379 8 +857 9 +3631 2 +1696 3 +2309 9 +1116 4 +3279 5 +2990 8 +3186 6 +2864 5 +4065 9 +2127 1 +1925 3 +1841 3 +686 9 +1404 1 +2371 1 +3340 4 +2080 10 +237 5 +442 4 +171 8 +1959 3 +2504 1 +474 8 +1761 7 +3057 3 +2051 3 +1657 7 +2597 3 +3463 5 +2334 2 +2562 4 +2527 4 +389 3 +1929 4 +2744 7 +2109 9 +1918 9 +3515 4 +2994 6 +17 9 +2022 7 +2678 8 +666 2 +2000 1 +4083 10 +1281 5 +2689 7 +1294 7 +941 7 +727 5 +697 2 +1586 5 +445 9 +3879 4 +727 7 +939 7 +3630 10 +3746 4 +2241 10 +2441 4 +1151 2 +3696 9 +2023 2 +3502 7 +2415 4 +3238 8 +2079 10 +2813 7 +2555 8 +2569 6 +3950 3 +3784 3 +3371 10 +3265 3 +702 3 +605 4 +1510 3 +59 5 +2396 2 +3647 6 +3203 4 +2946 9 +308 9 +2141 6 +512 3 +2231 3 +556 8 +378 3 +96 9 +3837 10 +3878 3 +1685 8 +3786 2 +2974 2 +1466 5 +1173 5 +432 10 +697 5 +1109 6 +3939 2 +2166 5 +1616 2 +1415 5 +1878 10 +126 8 +251 1 +2404 6 +3118 7 +4083 9 +453 10 +2851 4 +3353 1 +3906 9 +3452 2 +1691 2 +2531 9 +1595 5 +1039 10 +3183 9 +315 9 +3580 10 +181 6 +3034 1 +3822 10 +2217 7 +1096 1 +749 4 +2775 3 +3722 3 +4013 8 +1745 6 +3560 10 +3450 6 +2212 10 +3302 5 +262 6 +680 9 +169 10 +664 2 +367 2 +576 1 +430 2 +996 1 +525 6 +2879 9 +3893 10 +2596 2 +3926 9 +3063 10 +2092 1 +3535 2 +1753 1 +1747 4 +1647 3 +1658 9 +1391 9 +317 1 +1265 3 +2018 1 +1849 9 +2974 7 +3643 8 +1490 10 +2818 8 +1796 3 +1410 8 +3495 3 +1088 1 +1461 1 +3197 3 +3555 2 +1569 2 +508 7 +494 9 +1578 10 +1367 7 +2708 4 +378 7 +3221 2 +809 2 +2226 5 +629 2 +1460 3 +2908 6 +388 3 +340 3 +3437 3 +2596 2 +3018 1 +2073 8 +1027 5 +242 5 +1226 2 +547 10 +1672 10 +3843 7 +2941 2 +2178 4 +3964 2 +1038 1 +2925 2 +2741 9 +3659 9 +1679 9 +3098 9 +3096 4 +2846 4 +262 10 +2609 10 +763 9 +909 5 +41 6 +3771 6 +3756 1 +57 4 +2278 6 +204 4 +3135 4 +1058 3 +2430 7 +968 5 +276 8 +1055 3 +1567 8 +2034 7 +268 1 +3712 1 +3462 4 +2625 9 +95 5 +2386 1 +249 9 +3086 3 +470 5 +2357 7 +4042 9 +3577 10 +1269 3 +2582 2 +2707 3 +3259 5 +734 9 +2531 2 +2497 5 +3346 7 +1471 6 +3556 8 +3881 7 +1671 5 +3761 1 +2011 3 +2994 9 +223 2 +2469 9 +2715 2 +3925 3 +917 10 +3700 5 +30 3 +648 8 +2711 8 +3955 10 +3434 9 +1332 7 +2426 1 +3265 7 +1384 10 +2200 2 +1769 6 +1083 6 +3487 4 +193 2 +74 5 +3120 10 +941 3 +1060 1 +1519 5 +3053 6 +1646 1 +3081 8 +661 5 +2178 9 +3945 8 +3594 4 +1176 9 +1021 5 +3169 2 +1224 6 +930 2 +680 5 +2877 6 +1515 2 +2755 1 +2377 4 +1256 6 +2793 3 +2184 7 +2921 8 +108 4 +303 3 +1066 1 +409 3 +2237 7 +1752 2 +488 10 +2851 8 +3101 6 +2210 9 +1068 4 +3060 6 +2083 5 +1977 6 +3496 2 +3604 2 +3007 6 +220 5 +2910 3 +1332 9 +3795 5 +3497 1 +1609 2 +3805 7 +2249 3 +2971 2 +1280 1 +1194 8 +2004 8 +294 2 +665 9 +239 9 +1689 8 +771 2 +3960 2 +572 5 +65 7 +2085 7 +853 1 +3924 9 +3364 9 +3237 6 +944 8 +3086 5 +1720 1 +3034 6 +2514 6 +602 5 +4044 4 +3773 3 +142 7 +1902 1 +3840 6 +1561 3 +1389 5 +3355 2 +94 10 +2979 9 +3224 10 +2206 6 +1175 9 +1217 10 +1768 9 +3629 10 +1207 2 +1773 2 +2941 1 +1801 6 +2920 9 +3735 6 +2572 5 +946 7 +1615 2 +3680 9 +3007 9 +1459 1 +252 9 +737 8 +2263 3 +2456 6 +4026 5 +1026 5 +2208 8 +1939 8 +2444 5 +3747 9 +1262 10 +640 1 +534 5 +3660 3 +478 2 +1703 3 +431 8 +1659 10 +68 5 +190 2 +1733 7 +110 10 +3610 1 +2266 5 +905 5 +1865 6 +2530 5 +2071 5 +3889 2 +2860 5 +1433 3 +3908 6 +702 4 +1659 4 +67 10 +3952 5 +559 3 +3869 8 +1320 2 +3978 10 +366 7 +444 10 +1468 3 +3896 3 +2353 3 +211 8 +1387 5 +2750 6 +393 10 +2379 6 +402 7 +3495 3 +2281 7 +1455 4 +1900 1 +4067 3 +1552 1 +363 4 +44 1 +2135 5 +3643 4 +2082 4 +3434 10 +724 9 +3372 2 +795 3 +1808 3 +1346 4 +3392 9 +2935 4 +1442 7 +3227 1 +2113 3 +3294 10 +866 1 +3571 10 +2258 4 +4040 6 +2070 4 +722 4 +2599 7 +3078 4 +3663 10 +279 8 +2693 10 +177 10 +1750 3 +1413 1 +307 10 +120 7 +3970 8 +3789 2 +3036 8 +2813 10 +1443 9 +1426 1 +3281 10 +3566 1 +3280 5 +3835 1 +2545 4 +1627 4 +1230 10 +2529 1 +2831 2 +4071 7 +975 1 +2329 2 +1016 6 +1995 4 +1584 2 +3436 8 +540 2 +3267 9 +211 2 +657 9 +3683 4 +3075 5 +4041 5 +498 3 +3189 1 +1738 7 +1929 5 +776 7 +1280 1 +3997 4 +2958 2 +1564 10 +2375 4 +3536 6 +2832 5 +3732 9 +1368 5 +428 7 +2208 8 +3588 2 +278 5 +1875 5 +261 2 +3375 9 +3267 9 +1845 7 +780 9 +3185 5 +2191 2 +1078 2 +2833 9 +3954 7 +3592 4 +877 6 +486 10 +420 10 +1564 3 +3518 4 +3898 7 +3228 1 +972 7 +2566 4 +3063 3 +3849 2 +477 4 +112 9 +36 8 +3299 9 +1266 3 +552 1 +1731 10 +944 6 +1160 4 +2160 5 +1836 1 +3098 5 +1702 6 +2884 3 +1573 10 +1829 4 +2323 5 +1910 4 +982 1 +3032 2 +2733 2 +339 4 +411 1 +2426 10 +1185 8 +28 2 +334 1 +1027 4 +3008 4 +2466 6 +144 7 +3098 8 +3518 4 +541 2 +872 8 +2515 2 +2123 9 +793 2 +2938 1 +1735 10 +854 3 +542 8 +1155 4 +3691 4 +3799 10 +835 3 +1495 8 +2996 1 +965 4 +2538 7 +138 5 +2403 4 +3501 6 +2046 5 +908 7 +1509 6 +3389 6 +3451 6 +230 3 +3665 9 +374 6 +3430 3 +1955 7 +1965 1 +4067 9 +3337 10 +1903 4 +61 6 +3001 8 +3400 9 +1552 4 +2890 6 +2014 3 +3231 9 +732 2 +1638 5 +3526 5 +3355 6 +806 8 +3530 9 +2698 9 +993 6 +2242 4 +3945 8 +2827 7 +1787 3 +2816 2 +3444 10 +1199 9 +964 10 +3934 8 +2028 6 +2205 10 +928 3 +72 1 +1366 7 +2770 10 +3320 3 +3434 9 +268 10 +1259 6 +3804 4 +2391 5 +2655 9 +261 5 +2951 1 +3333 2 +2649 2 +1383 10 +3011 6 +3529 10 +262 9 +2760 3 +2393 3 +992 3 +744 7 +2178 3 +3969 8 +3762 1 +946 3 +3910 1 +1213 8 +230 7 +3888 5 +1082 5 +2835 3 +3770 7 +2887 6 +1892 1 +2151 3 +2481 9 +2803 10 +563 5 +1125 9 +728 2 +3036 5 +2200 3 +94 10 +2274 8 +15 1 +430 5 +1112 9 +285 4 +1846 6 +2473 5 +1890 4 +1992 2 +340 1 +97 10 +2422 6 +1589 6 +1530 4 +1777 5 +104 2 +3022 9 +51 2 +2948 1 +2136 9 +1652 8 +1034 8 +817 8 +3157 8 +2614 3 +3735 10 +2900 10 +4014 6 +311 5 +4075 1 +3524 9 +2788 3 +2604 5 +2365 7 +3145 9 +874 9 +3140 6 +3587 6 +454 8 +1569 10 +690 10 +487 1 +1516 2 +3034 1 +3883 5 +2120 3 +3346 4 +3525 5 +2542 7 +3544 10 +2820 6 +3519 8 +96 4 +3883 7 +3115 2 +2645 8 +735 10 +1023 7 +3211 1 +3155 9 +1157 8 +2861 6 +1951 6 +836 4 +705 7 +4090 1 +1653 9 +3096 9 +463 8 +2961 10 +771 1 +1297 6 +3135 4 +865 7 +3926 8 +2438 7 +0 5 +1622 2 +1711 7 +3380 9 +967 6 +1702 5 +3013 5 +3885 4 +3042 5 +3200 8 +627 2 +2182 6 +586 8 +2083 10 +3043 2 +1938 8 +2783 10 +1891 1 +2245 8 +4068 7 +1064 3 +1700 4 +1970 4 +1818 3 +3096 6 +969 7 +550 10 +53 4 +1766 1 +2308 9 +534 2 +3906 8 +1279 5 +3918 4 +432 6 +936 5 +240 2 +2454 5 +2711 4 +1968 5 +3954 1 +2262 9 +299 5 +3757 8 +455 2 +3607 5 +3765 8 +3919 10 +2766 2 +2870 8 +845 7 +3687 4 +1119 8 +3413 8 +3969 5 +3192 9 +2188 5 +1756 2 +2089 7 +2293 8 +2774 5 +2074 2 +533 2 +3081 2 +2759 9 +467 5 +1546 6 +3195 8 +48 2 +2498 6 +1850 4 +1870 2 +3295 6 +1997 5 +3000 9 +1168 9 +904 3 +263 3 +1497 8 +227 10 +3893 3 +2863 7 +1361 9 +2345 3 +1367 3 +3590 2 +1776 1 +379 1 +221 7 +3299 1 +573 3 +48 2 +2177 6 +1485 7 +1889 10 +1443 5 +313 3 +2093 9 +1254 4 +3912 1 +809 4 +940 10 +1804 8 +2271 3 +1416 4 +1500 5 +1392 2 +194 3 +2746 9 +2724 6 +2185 9 +66 5 +1306 4 +2646 5 +922 3 +123 9 +3413 5 +2813 9 +1778 5 +1907 8 +564 8 +2539 9 +869 2 +1251 4 +3291 8 +2513 7 +3115 9 +1125 2 +2143 5 +242 7 +83 5 +3453 8 +85 1 +2734 2 +512 6 +2486 9 +3014 1 +477 3 +2338 2 +3110 5 +1220 1 +581 8 +1492 9 +3588 10 +3617 8 +472 6 +60 7 +2922 4 +2516 5 +724 3 +215 3 +90 8 +2686 5 +3807 5 +3392 1 +2021 9 +19 6 +1037 9 +2861 4 +3148 2 +432 4 +1173 8 +3121 5 +2566 6 +1968 10 +2094 2 +604 4 +1384 1 +169 9 +302 7 +254 4 +2904 5 +145 2 +4 2 +995 4 +3739 1 +3665 5 +1737 9 +1416 6 +174 5 +871 4 +667 1 +4011 5 +896 2 +2247 8 +2252 3 +2656 2 +2104 1 +169 8 +327 2 +1791 10 +1561 8 +3759 4 +743 9 +1125 10 +3357 9 +3114 9 +2583 7 +725 10 +1940 7 +3874 8 +1521 2 +1673 9 +753 4 +3418 5 +2984 2 +1502 7 +3818 3 +2957 5 +1209 1 +2764 9 +3916 4 +268 9 +1777 2 +1827 4 +3680 9 +3945 7 +533 5 +2775 8 +2880 9 +2636 8 +2401 3 +2517 2 +3195 9 +2959 8 +355 8 +703 5 +2513 8 +1113 6 +881 3 +505 3 +3941 8 +1304 10 +2610 2 +3170 7 +112 6 +1002 7 +1582 7 +853 8 +135 9 +2418 9 +149 4 +3195 1 +3857 2 +701 10 +3513 4 +3004 6 +3643 4 +3163 5 +1100 2 +810 10 +3498 10 +1793 8 +3248 4 +3043 2 +637 9 +1930 8 +1924 9 +141 1 +880 8 +1345 8 +604 4 +2442 8 +879 6 +2970 8 +3477 4 +269 6 +719 3 +2915 2 +1144 10 +3399 7 +3813 9 +915 3 +2708 9 +1565 1 +3066 7 +2478 4 +3048 7 +1340 4 +2150 2 +1241 6 +1247 2 +3721 2 +2853 8 +613 10 +642 3 +2411 9 +1623 6 +1522 9 +3000 5 +235 2 +98 6 +538 8 +1609 10 +2392 8 +1724 10 +178 9 +1825 10 +787 6 +542 5 +3492 6 +1480 6 +1532 8 +1512 1 +2820 6 +3357 6 +105 7 +2710 5 +3553 4 +925 9 +2745 3 +3180 5 +750 2 +3860 5 +3783 2 +1058 8 +3367 2 +1284 1 +1993 7 +4040 9 +3683 6 +116 5 +1362 1 +2484 6 +199 3 +1447 10 +1710 5 +2240 1 +470 5 +2704 1 +3296 1 +297 2 +1007 2 +1796 3 +842 3 +1976 10 +3880 9 +2491 8 +1334 10 +2149 1 +1534 6 +2323 9 +1435 2 +1619 1 +3436 5 +2073 5 +2741 7 +108 3 +1309 7 +38 3 +3474 10 +495 1 +1232 6 +2524 2 +648 7 +3154 8 +1669 10 +1009 4 +999 10 +2451 10 +2534 2 +216 7 +2487 8 +3495 6 +2558 6 +3902 7 +2454 3 +2625 2 +2715 3 +3779 9 +2179 8 +3318 4 +1567 9 +508 8 +1481 9 +3080 6 +2339 7 +836 5 +22 8 +3879 7 +3326 9 +2984 2 +2428 1 +151 1 +1614 2 +1930 9 +2412 3 +1842 4 +3349 1 +1232 2 +3240 8 +4036 10 +2171 8 +3873 10 +212 9 +2231 10 +468 2 +2121 10 +2691 1 +3477 6 +3542 8 +634 8 +3735 9 +198 9 +2641 1 +128 4 +2774 1 +263 3 +3531 5 +782 4 +2886 3 +1207 4 +2718 3 +394 3 +2200 7 +857 3 +2340 9 +3493 1 +1822 1 +2077 7 +295 4 +2825 6 +505 7 +3461 7 +670 9 +1836 2 +573 10 +182 5 +391 3 +982 2 +2516 2 +2574 5 +1203 4 +3513 6 +3486 3 +2267 4 +3695 9 +2363 1 +2244 8 +3503 7 +3423 3 +3999 3 +2658 7 +3913 6 +2541 3 +3290 5 +1114 6 +3576 4 +3647 4 +1646 3 +2216 2 +2457 9 +3703 5 +2746 5 +3376 3 +659 7 +2114 10 +1343 9 +2086 4 +3319 3 +2971 6 +4005 4 +1375 6 +1170 6 +3319 2 +3937 4 +2050 4 +662 2 +854 2 +3402 4 +451 10 +3349 3 +2126 3 +143 10 +2287 2 +2887 3 +593 1 +1032 1 +1656 2 +594 7 +1989 1 +1128 10 +3319 7 +1998 5 +3071 7 +2069 10 +1554 3 +1792 4 +115 7 +2918 9 +2782 4 +3855 8 +345 7 +2797 2 +2905 4 +3841 5 +3733 2 +255 6 +3498 4 +1095 3 +3065 5 +3957 2 +2924 3 +823 4 +650 10 +2729 2 +3253 3 +1513 9 +2839 6 +1538 6 +3243 7 +1154 3 +801 10 +2688 10 +762 4 +600 7 +2105 7 +2626 6 +128 1 +1377 1 +2296 9 +2118 10 +3178 7 +3396 7 +3852 4 +666 5 +1785 7 +1105 3 +3982 7 +1368 10 +631 8 +1472 5 +1935 9 +754 1 +2291 6 +2324 9 +804 4 +3661 7 +3148 9 +1855 10 +1930 6 +3434 2 +3554 6 +3591 10 +2791 6 +2845 2 +2105 3 +2015 5 +3662 1 +219 4 +116 6 +852 3 +957 7 +2338 7 +3987 5 +2602 6 +3737 4 +3056 4 +2303 5 +3697 10 +2528 6 +2937 3 +3162 9 +1836 3 +3827 8 +1876 8 +3800 7 +1712 5 +1305 5 +3222 1 +209 8 +1320 6 +981 3 +3637 5 +1975 9 +647 1 +792 7 +1507 2 +3234 2 +1938 10 +1483 4 +3101 1 +3970 4 +1582 7 +3444 1 +2949 3 +1013 7 +1190 5 +1148 1 +1817 1 +3502 3 +323 10 +3436 8 +1119 1 +3362 2 +2291 7 +1896 8 +2170 10 +1342 6 +454 6 +2343 9 +963 9 +1075 5 +1703 2 +478 4 +4009 10 +593 5 +2653 6 +2372 7 +3176 7 +1526 3 +4082 3 +2465 6 +748 10 +880 7 +3472 7 +1581 7 +2809 10 +1236 1 +3494 3 +4079 4 +3407 2 +3818 2 +2293 5 +3369 3 +2813 2 +1801 6 +59 10 +198 1 +3992 9 +2334 6 +236 1 +244 6 +3316 5 +2990 6 +3544 9 +479 3 +833 6 +2926 6 +245 5 +2019 4 +2979 7 +2851 5 +1305 10 +53 6 +2415 9 +1931 5 +3764 2 +2032 1 +2663 2 +1748 8 +947 6 +2500 2 +2854 8 +418 4 +3297 3 +513 5 +2257 10 +4082 1 +1 8 +1076 7 +2937 7 +1751 8 +3295 1 +3346 4 +1350 4 +495 10 +2518 9 +398 9 +3429 3 +3256 4 +3573 7 +305 1 +3082 7 +1754 9 +1465 5 +2276 2 +3530 5 +2678 8 +1407 5 +2504 9 +1186 6 +3854 2 +2879 3 +1378 10 +871 1 +2331 8 +3056 3 +2363 9 +1795 5 +189 2 +3143 5 +2159 4 +2537 8 +2757 6 +348 5 +2527 9 +1724 1 +3451 8 +2327 7 +584 6 +342 9 +2580 6 +2925 4 +641 6 +4050 10 +1828 1 +3155 8 +181 8 +178 10 +1668 1 +3853 1 +2350 3 +65 4 +1608 3 +2429 8 +118 10 +1882 3 +3825 1 +855 1 +2590 2 +1762 6 +893 3 +2457 3 +1224 10 +3890 10 +114 1 +1276 5 +2060 8 +3195 4 +3085 1 +2277 7 +861 10 +1500 7 +2524 7 +1289 10 +3868 6 +1205 4 +1915 2 +2565 9 +2526 6 +288 8 +2945 3 +1622 5 +1458 5 +2813 3 +3097 4 +2671 9 +2965 6 +2969 5 +3544 4 +786 4 +2405 5 +1560 1 +1605 2 +1240 7 +3215 10 +237 5 +3595 10 +3233 5 +443 8 +2368 6 +345 8 +2747 6 +1713 3 +680 9 +815 8 +2224 9 +2214 2 +623 10 +2742 6 +1951 5 +2759 7 +130 9 +2820 4 +2879 5 +1896 8 +1725 3 +4080 8 +362 2 +2954 1 +1689 3 +2461 4 +3068 8 +156 7 +237 1 +2958 7 +2694 2 +1914 6 +2391 3 +3577 6 +2343 8 +1681 4 +2299 3 +2720 2 +2622 6 +3710 2 +2106 1 +3216 10 +3716 3 +524 1 +123 6 +328 8 +3297 10 +527 9 +2020 7 +2610 10 +3599 8 +2014 3 +1796 3 +947 9 +1644 8 +3298 1 +203 10 +1728 1 +1879 9 +2273 10 +2632 6 +2498 1 +3808 2 +3092 2 +2795 2 +460 9 +3496 8 +568 10 +2417 7 +230 3 +741 4 +2318 5 +2703 7 +1276 6 +1134 2 +2665 4 +3746 2 +1546 9 +1687 4 +2365 2 +2681 1 +3724 7 +483 5 +3656 3 +3289 8 +1582 4 +478 7 +60 7 +3636 9 +2142 3 +2531 8 +3973 5 +2122 5 +272 5 +1378 5 +634 9 +1973 5 +2293 8 +173 10 +3989 1 +3287 6 +984 6 +686 10 +664 3 +3921 9 +2940 9 +216 1 +902 1 +348 8 +1656 4 +709 2 +1518 8 +1756 1 +3267 6 +1794 7 +3961 7 +1596 5 +1725 9 +2792 5 +10 1 +2822 7 +3586 2 +3986 7 +3343 5 +2071 8 +2378 9 +2608 7 +2873 7 +589 5 +2954 2 +2562 5 +137 8 +619 1 +2262 10 +406 10 +2433 4 +3242 7 +3350 5 +1676 2 +2181 3 +2854 5 +1424 8 +1790 6 +1862 4 +56 1 +1118 9 +417 1 +2873 3 +3482 7 +1108 7 +3103 8 +2080 4 +3055 1 +864 3 +3334 6 +2351 5 +1335 9 +2175 5 +1751 1 +864 10 +1238 10 +3039 6 +3767 5 +1334 9 +3747 5 +271 1 +3364 3 +3302 8 +2454 5 +3737 1 +3664 2 +1568 2 +3853 6 +3464 1 +3464 2 +781 8 +1655 9 +293 5 +2728 6 +2496 3 +3812 2 +158 3 +200 5 +1915 7 +3365 8 +1803 10 +2644 9 +585 5 +19 6 +1802 5 +3980 8 +3278 2 +2766 1 +3032 7 +281 10 +1232 4 +965 1 +1054 8 +312 7 +2148 10 +2197 2 +3863 1 +4036 4 +1551 3 +2651 4 +1281 6 +4052 8 +2956 2 +156 7 +3504 4 +2777 3 +1258 9 +2271 8 +2162 7 +3594 2 +1735 8 +4085 7 +2516 7 +1228 8 +3534 6 +1860 9 +2620 9 +3304 2 +2466 8 +976 10 +969 4 +131 7 +1138 4 +2071 3 +3482 4 +567 3 +1497 5 +1373 10 +3594 7 +2551 9 +1982 7 +674 2 +1054 2 +1821 2 +1390 8 +2456 4 +3786 5 +2738 6 +3436 10 +2349 5 +382 5 +3676 6 +3791 4 +3447 5 +4019 7 +3866 10 +350 10 +3081 7 +3204 10 +3545 9 +332 7 +379 6 +1295 4 +1439 8 +1693 3 +3008 2 +1867 5 +2420 1 +470 8 +1832 3 +619 4 +1928 4 +3900 7 +1544 10 +1451 3 +3721 7 +1719 3 +112 8 +601 1 +3971 7 +2247 8 +1774 9 +2851 7 +3945 6 +3478 2 +2522 7 +3630 9 +303 4 +2171 1 +2024 2 +807 6 +474 4 +990 3 +85 10 +3668 4 +3823 9 +2731 9 +2748 7 +2283 10 +903 8 +1807 10 +1521 10 +3026 6 +2902 10 +219 10 +461 4 +166 8 +1065 4 +2325 3 +2922 3 +2572 7 +3034 4 +3694 10 +3552 7 +3554 4 +3189 10 +1805 2 +3953 9 +4033 7 +2154 6 +772 9 +7 1 +2616 10 +1200 9 +1237 1 +388 7 +2052 2 +2777 9 +3131 9 +97 1 +1592 2 +1940 1 +479 1 +2770 3 +970 3 +1553 7 +1531 3 +855 4 +2157 2 +3786 2 +3221 7 +2133 8 +1558 4 +2759 6 +2627 10 +603 1 +3477 5 +1714 2 +1945 5 +1936 10 +471 7 +363 9 +1169 7 +1871 5 +2078 3 +1201 3 +1098 7 +2291 4 +604 4 +3558 8 +472 8 +3770 3 +3595 5 +2432 6 +2848 2 +2941 2 +1473 2 +1149 5 +3522 8 +3365 9 +1269 10 +556 3 +2778 9 +955 3 +376 7 +160 1 +2626 2 +4069 7 +196 10 +805 1 +2185 5 +3577 8 +737 4 +230 2 +3555 4 +3601 3 +356 4 +952 2 +417 8 +838 3 +65 3 +3658 8 +3607 4 +3113 6 +984 1 +1346 10 +4080 7 +343 2 +838 6 +554 3 +2613 7 +2947 2 +3981 8 +2537 10 +2894 1 +3578 9 +3568 1 +2281 8 +3941 8 +1258 6 +1634 5 +3416 3 +2580 2 +4076 3 +3048 8 +1268 1 +236 4 +3117 9 +1713 1 +1325 5 +3635 1 +1436 8 +2985 10 +862 6 +2911 6 +1297 10 +2873 1 +2195 6 +1067 3 +2452 8 +2752 3 +198 9 +835 4 +311 1 +592 8 +3676 3 +1032 9 +1838 10 +1533 7 +2586 8 +2980 1 +2646 2 +4033 3 +4062 9 +2260 1 +964 6 +1067 5 +1824 5 +1485 9 +1171 10 +4033 3 +695 6 +2703 10 +4010 9 +3927 5 +2241 9 +1109 10 +3056 10 +3626 10 +61 9 +1710 10 +2030 7 +3077 3 +3519 7 +963 6 +2565 1 +1213 1 +1956 7 +3302 2 +2640 1 +734 4 +278 9 +1605 3 +3712 9 +79 10 +2378 4 +3653 1 +3507 6 +2289 1 +3629 6 +3080 8 +1135 5 +2556 9 +3448 3 +3102 2 +2958 1 +2878 9 +1598 4 +844 7 +3508 4 +2452 7 +305 5 +249 3 +337 8 +1641 6 +1915 9 +2099 8 +1124 5 +508 6 +3461 4 +2096 10 +607 7 +79 10 +1347 8 +2840 5 +2491 6 +2309 9 +3572 3 +3204 7 +1094 9 +2553 1 +2535 6 +2120 6 +2207 1 +1486 10 +1682 2 +2187 5 +3376 5 +1829 5 +1204 2 +4088 10 +3167 7 +2291 4 +921 3 +1800 10 +2773 4 +3553 8 +536 6 +1550 7 +1631 10 +3619 1 +809 3 +2196 6 +2749 3 +940 2 +582 8 +3589 1 +695 8 +3115 3 +2531 8 +1852 9 +2842 7 +295 6 +3658 5 +1991 1 +1042 9 +2772 1 +2378 2 +2002 9 +825 6 +2908 2 +3467 3 +410 6 +3261 7 +638 1 +4001 5 +316 4 +712 4 +3943 5 +1604 1 +2972 1 +385 4 +1485 1 +174 1 +3712 8 +2121 1 +2263 2 +3527 6 +790 2 +3648 5 +1447 4 +1069 1 +472 4 +966 9 +3321 4 +2305 8 +313 1 +3054 8 +2207 10 +623 3 +2843 1 +2223 3 +1297 5 +392 1 +2024 4 +760 3 +479 4 +2098 8 +3766 1 +3740 1 +793 10 +875 5 +734 3 +2361 9 +1495 1 +2583 9 +263 3 +3311 5 +3924 4 +767 2 +1096 8 +3657 5 +1454 5 +1506 1 +480 6 +908 10 +1903 4 +70 6 +1783 3 +3006 2 +2745 1 +2778 2 +2075 1 +2682 5 +3534 5 +1141 1 +3527 8 +818 1 +3067 9 +3208 2 +3677 4 +2850 8 +3719 7 +449 5 +3184 7 +1759 3 +3547 9 +1083 5 +3088 10 +2089 10 +1204 4 +1215 6 +700 3 +2188 3 +3500 4 +3283 2 +888 8 +971 3 +2164 10 +1459 4 +2657 10 +1880 5 +72 5 +3540 6 +2516 7 +3183 2 +3925 4 +187 10 +1757 4 +496 5 +1044 7 +1674 8 +1910 5 +898 10 +436 3 +2711 10 +3553 7 +2242 7 +4093 5 +314 7 +1779 1 +717 6 +2834 4 +53 5 +3642 9 +814 3 +2008 5 +3764 8 +1903 8 +3104 8 +2883 3 +1923 2 +668 3 +3264 7 +2084 9 +400 7 +37 5 +1332 8 +2382 1 +368 1 +2821 6 +308 10 +2080 4 +549 10 +3131 9 +3545 7 +809 1 +1002 6 +3954 2 +1143 1 +2762 10 +1695 1 +2516 6 +3886 2 +2544 4 +3984 3 +3258 10 +1750 6 +3175 7 +1876 9 +1631 9 +2125 6 +1821 10 +1693 3 +2199 8 +1857 8 +3561 9 +4041 6 +275 7 +3431 9 +1890 4 +3510 9 +1703 2 +2084 6 +1740 3 +584 2 +2044 6 +3370 8 +2047 4 +796 1 +3790 2 +2454 1 +751 1 +2693 9 +2581 9 +1504 9 +1132 3 +3271 8 +958 2 +1435 6 +3812 1 +2015 2 +457 3 +794 8 +3842 10 +3216 3 +2042 9 +1434 6 +1239 2 +2127 6 +1875 3 +944 6 +3891 4 +1378 8 +3079 4 +18 2 +3976 3 +1541 5 +3214 5 +3051 7 +3073 2 +1602 2 +3425 4 +1351 8 +1690 2 +1897 3 +1664 9 +3108 10 +3148 2 +1947 9 +1882 8 +2122 10 +637 4 +2600 9 +33 10 +740 6 +4052 2 +3853 2 +2945 10 +3184 10 +1138 7 +891 6 +4046 10 +1143 9 +2222 1 +3773 4 +1202 5 +2821 10 +2819 5 +3248 10 +1468 1 +1003 9 +2874 4 +2326 3 +3856 4 +2754 9 +1046 10 +158 8 +987 6 +498 10 +1450 9 +2469 6 +893 2 +242 5 +965 8 +1404 4 +1237 10 +732 5 +1851 10 +2109 7 +59 9 +188 7 +2796 6 +1013 1 +487 3 +2324 5 +3743 8 +892 7 +4064 7 +4045 3 +3782 10 +1446 9 +2252 3 +3909 1 +2342 5 +3848 4 +2927 4 +1566 9 +2926 10 +1353 3 +2182 6 +3307 1 +3550 9 +2691 10 +2161 5 +18 8 +846 10 +3044 2 +3781 10 +3874 5 +1806 3 +3004 7 +3706 4 +1410 5 +385 3 +2192 3 +2394 5 +1136 4 +3317 4 +2178 10 +4041 5 +2993 8 +4040 9 +1019 9 +2970 6 +562 1 +32 5 +2279 10 +526 1 +2837 1 +2567 2 +3052 6 +1494 9 +4057 7 +746 8 +794 6 +2297 8 +1915 3 +2059 2 +765 3 +1307 5 +1127 1 +152 6 +2790 6 +3288 4 +666 6 +1417 4 +4066 10 +435 7 +815 8 +3398 5 +242 7 +220 7 +1099 3 +3662 1 +4005 5 +797 10 +1097 1 +2316 1 +491 5 +3261 6 +2273 7 +2782 9 +1929 3 +1046 9 +330 2 +4046 1 +3587 4 +3946 4 +3234 6 +138 1 +3011 3 +1700 6 +2820 6 +2043 1 +3290 6 +34 7 +1907 10 +1689 5 +2015 3 +3168 9 +1296 5 +485 5 +2642 9 +912 3 +3574 5 +2187 6 +294 8 +1082 5 +2047 2 +2364 8 +3798 2 +2315 10 +636 4 +3260 7 +3611 1 +83 6 +2147 6 +1444 1 +3128 4 +2620 8 +1805 8 +432 5 +1134 3 +3839 6 +3958 6 +859 1 +3553 10 +1860 10 +266 3 +3831 9 +489 8 +3482 5 +1726 5 +2778 10 +3276 1 +588 4 +1106 6 +3010 10 +1904 10 +2911 1 +1270 5 +1933 7 +1668 3 +2371 2 +1368 2 +1935 2 +754 6 +948 7 +4086 9 +1736 6 +2621 6 +3620 5 +3147 9 +2652 7 +3169 6 +1000 8 +3131 10 +3956 10 +3640 2 +1964 8 +2045 5 +3052 7 +360 5 +420 7 +3965 4 +2531 9 +1693 4 +1793 6 +3131 4 +3668 8 +3973 2 +1992 1 +858 6 +2315 3 +2248 4 +3981 5 +212 7 +2446 6 +1943 2 +2335 3 +1932 6 +2896 9 +360 7 +4019 6 +3330 10 +1234 6 +1792 7 +3785 5 +735 10 +343 9 +1244 9 +32 8 +2145 9 +2048 3 +2638 6 +2376 3 +1678 6 +1517 1 +3968 5 +1278 4 +2850 8 +384 5 +3305 4 +1696 8 +231 9 +3208 9 +2345 9 +3380 8 +105 8 +2586 4 +909 7 +762 1 +2857 6 +3035 6 +1202 7 +8 8 +1402 1 +1382 9 +3977 4 +860 1 +1392 10 +2480 10 +1663 2 +218 2 +2324 8 +3023 2 +3539 5 +1883 10 +3576 9 +258 10 +793 9 +2534 4 +967 10 +2851 4 +732 3 +3340 10 +139 7 +1777 4 +4067 4 +1892 4 +1651 6 +1054 9 +1563 5 +349 3 +1987 6 +4087 7 +1945 5 +1990 4 +1095 10 +2507 2 +2146 6 +2975 4 +2503 9 +4011 2 +2523 1 +3597 6 +2361 6 +2883 7 +4058 5 +2580 5 +672 5 +2903 3 +3705 7 +3364 7 +498 3 +3776 6 +1210 9 +260 6 +48 10 +1825 2 +3355 6 +2966 10 +958 10 +2739 3 +571 8 +2246 5 +1647 8 +107 4 +2268 7 +3306 7 +2320 3 +3845 7 +4052 7 +3121 5 +3152 8 +1457 9 +1899 4 +2679 3 +2272 4 +761 1 +1511 9 +3331 9 +2836 3 +1161 8 +1409 1 +151 2 +4039 5 +2306 10 +3518 1 +2878 1 +3216 1 +2136 3 +3066 1 +2002 8 +1853 7 +2803 8 +3575 3 +2766 10 +140 2 +2380 7 +1638 7 +954 7 +1200 8 +2932 2 +1346 5 +1628 9 +1527 2 +2214 6 +0 10 +3101 6 +3820 1 +2960 8 +3712 2 +3644 2 +186 2 +4003 2 +1005 7 +1048 10 +47 3 +1204 5 +1305 7 +311 7 +3553 5 +2177 9 +2134 4 +2156 6 +3213 6 +1712 3 +4077 4 +1002 5 +3338 9 +3790 3 +210 3 +1744 8 +2771 6 +3089 9 +2018 6 +3079 2 +539 6 +62 1 +287 7 +1220 7 +2632 9 +806 2 +2889 10 +2385 10 +1006 8 +1598 7 +672 10 +654 10 +2968 5 +2954 3 +2647 4 +1433 9 +869 9 +2516 9 +2641 4 +1410 1 +2263 4 +1278 2 +3487 1 +4044 8 +3472 8 +3228 4 +2269 6 +4083 10 +3930 9 +1976 1 +1729 3 +2474 1 +1162 6 +3393 2 +3206 10 +3661 6 +370 7 +1080 3 +169 1 +981 9 +2977 7 +1833 2 +3547 4 +1495 9 +1016 8 +2064 7 +2971 6 +3397 8 +348 8 +627 5 +3026 5 +3692 6 +3596 3 +1235 1 +651 2 +2084 7 +2432 5 +136 4 +4040 8 +820 8 +1265 9 +3425 3 +328 2 +340 1 +3161 7 +3849 5 +3448 2 +3869 8 +2734 1 +1776 7 +1113 2 +3366 9 +2128 1 +2368 5 +1645 5 +468 2 +458 1 +214 4 +1181 2 +3903 10 +343 5 +1483 1 +2450 10 +3092 5 +221 10 +3226 7 +4064 10 +3592 8 +1327 8 +758 6 +2094 7 +1110 5 +2272 8 +722 5 +3483 9 +384 6 +395 5 +1219 2 +2729 6 +2917 7 +2913 6 +2956 10 +1940 1 +4057 2 +1357 10 +712 6 +2062 4 +1233 9 +3567 1 +81 6 +346 5 +3885 8 +3340 7 +4041 2 +2606 5 +3324 2 +171 3 +3975 1 +816 6 +1556 9 +1761 3 +1811 7 +4042 8 +3559 5 +3349 5 +2184 10 +1882 1 +2481 6 +148 5 +367 2 +34 4 +813 5 +1284 3 +668 10 +3340 2 +2051 7 +1805 2 +2500 8 +3417 4 +1497 2 +2223 8 +1964 1 +3321 3 +1006 9 +1753 4 +2029 9 +3651 1 +746 8 +2755 6 +119 4 +2076 5 +1177 4 +2112 5 +2475 9 +933 6 +2400 8 +1364 3 +1998 1 +412 4 +2651 8 +2481 7 +772 4 +557 2 +3258 9 +531 1 +3685 9 +793 8 +1235 8 +3974 10 +987 2 +3499 7 +625 3 +2313 6 +3913 2 +2427 5 +3794 2 +1380 7 +2446 5 +3385 9 +133 2 +24 4 +1239 6 +1955 2 +1911 1 +150 3 +4015 2 +3292 6 +1926 3 +243 3 +3738 6 +3500 4 +687 9 +1642 9 +767 5 +1266 6 +3112 6 +3385 10 +3271 1 +3338 1 +2876 5 +4054 10 +2204 3 +1925 8 +3738 8 +192 1 +1907 5 +851 8 +3311 6 +107 5 +3225 10 +3890 5 +363 3 +2629 9 +2460 3 +399 5 +3622 5 +3672 7 +620 3 +1437 5 +3439 8 +2697 3 +3867 4 +995 1 +2512 9 +1818 1 +2488 10 +705 8 +2226 3 +334 4 +2080 3 +3440 3 +874 8 +1353 10 +2539 9 +3699 8 +627 6 +2928 5 +2244 1 +3730 9 +135 2 +3463 4 +2835 5 +1197 6 +3428 8 +1321 9 +718 6 +3813 10 +3435 4 +2379 7 +3080 2 +3083 6 +3480 1 +1848 10 +1903 7 +2182 7 +2115 7 +643 1 +2700 6 +3730 9 +2113 3 +511 3 +2279 1 +3577 6 +1012 9 +444 1 +1395 7 +232 6 +553 8 +3936 6 +3674 10 +779 7 +566 1 +1341 3 +1673 8 +1165 4 +2998 10 +658 10 +2941 6 +3713 10 +250 10 +3088 8 +1136 1 +1677 9 +2568 4 +825 6 +1363 7 +3803 10 +2531 4 +3493 6 +1263 3 +2768 1 +3134 6 +3503 5 +2271 4 +909 8 +2723 7 +3863 10 +850 1 +3385 2 +3789 3 +115 9 +3542 4 +1523 9 +2715 5 +1936 4 +541 10 +1673 1 +1365 4 +3649 5 +862 4 +1903 1 +3088 2 +2062 8 +2391 5 +2111 5 +2398 3 +677 3 +2665 1 +2741 9 +1309 1 +1217 8 +1124 3 +2501 2 +3134 3 +2086 4 +2115 3 +2170 5 +3180 6 +1963 8 +2031 3 +1489 5 +2129 2 +3046 7 +1148 10 +1152 3 +1231 1 +478 9 +904 10 +760 6 +1973 10 +271 1 +1450 9 +1904 2 +4028 3 +3952 4 +4031 2 +998 6 +3397 6 +1798 2 +1243 9 +669 3 +1103 8 +2561 9 +1336 2 +1898 10 +3757 6 +71 8 +2191 3 +955 1 +1181 10 +1097 2 +607 6 +3789 8 +2397 3 +3731 7 +590 10 +3673 1 +3001 2 +3464 6 +2933 5 +1798 7 +864 6 +3376 7 +2628 9 +2012 8 +1778 9 +4004 10 +2607 1 +2224 8 +3822 6 +1640 6 +962 1 +1156 10 +2197 2 +2335 6 +3502 3 +3850 1 +94 4 +2836 1 +3545 2 +3568 2 +147 5 +3812 9 +2883 2 +158 3 +764 8 +382 2 +3227 10 +1902 9 +693 1 +2808 6 +2778 3 +3224 7 +748 7 +3291 10 +1098 10 +202 10 +3440 3 +1715 5 +1676 5 +544 2 +2446 2 +2419 4 +2003 10 +345 5 +2569 8 +3645 9 +3442 3 +3336 5 +2466 8 +3894 9 +618 6 +2501 5 +1284 7 +2334 9 +3551 4 +222 4 +1225 7 +3703 3 +169 1 +1279 7 +1323 4 +3785 2 +1942 3 +2301 10 +1616 8 +2266 8 +3885 2 +1626 1 +552 7 +1040 9 +3796 1 +1145 2 +3568 3 +2973 1 +2361 4 +1690 5 +3478 9 +2362 1 +2586 7 +2335 6 +552 5 +1042 7 +998 7 +2295 4 +3080 3 +3340 7 +539 10 +445 7 +2453 3 +3289 10 +2697 10 +1077 5 +452 3 +3538 3 +2971 7 +2351 8 +648 4 +2591 9 +1177 6 +45 7 +120 3 +662 2 +744 1 +2748 7 +2016 5 +3566 4 +3063 2 +935 3 +2375 8 +3382 9 +3709 3 +3150 1 +2717 7 +667 5 +1362 2 +3286 6 +2738 5 +298 4 +324 8 +1649 10 +2800 8 +1823 6 +206 3 +2642 1 +710 10 +2488 5 +2058 8 +2183 5 +3690 1 +2807 3 +3797 4 +3972 10 +1086 5 +2752 2 +1000 7 +2083 8 +2655 2 +1328 5 +251 9 +582 5 +216 6 +2669 6 +1021 7 +1870 5 +2365 7 +1388 4 +236 2 +146 2 +3013 10 +1503 7 +3728 8 +1029 1 +3445 3 +3721 3 +629 10 +2488 5 +2878 10 +322 1 +845 8 +915 6 +3599 10 +315 4 +346 5 +3467 2 +1438 2 +3752 6 +2755 4 +2422 1 +3026 4 +170 4 +1402 1 +2791 8 +143 3 +364 9 +2751 1 +3433 8 +1617 10 +2479 1 +1790 4 +1386 3 +496 6 +2842 9 +381 9 +1309 2 +2860 6 +3872 4 +3481 3 +4042 1 +2633 2 +568 7 +3264 1 +1935 5 +1879 5 +3712 8 +3549 7 +1303 3 +3758 7 +557 8 +528 1 +2361 4 +3533 7 +1118 2 +1233 10 +1692 10 +565 10 +112 9 +2924 4 +306 9 +1062 2 +771 2 +422 4 +3627 6 +3759 7 +98 2 +3618 1 +2167 1 +3920 2 +3831 10 +3358 2 +285 8 +663 7 +2211 6 +1940 10 +2724 10 +2462 5 +3231 7 +4059 1 +655 4 +3209 4 +1967 2 +16 2 +2907 6 +1247 2 +423 9 +2550 8 +2504 8 +3717 6 +638 10 +3612 9 +251 8 +1957 2 +2920 8 +1126 2 +4066 6 +3226 9 +367 2 +121 9 +1582 8 +1083 2 +523 9 +2216 7 +365 2 +1006 3 +200 7 +2057 6 +2091 1 +1604 10 +468 9 +1648 10 +1240 1 +2192 8 +2788 9 +309 3 +2429 1 +943 6 +2749 7 +2008 7 +3065 3 +3963 9 +3473 2 +1899 4 +282 4 +621 1 +1027 6 +4082 2 +336 3 +3997 10 +337 10 +1187 2 +2267 1 +3160 9 +1307 5 +1026 7 +1905 10 +1233 6 +3477 7 +623 9 +1811 4 +2416 9 +749 8 +2941 7 +4067 2 +2988 9 +2802 9 +3350 10 +2006 9 +1948 8 +2569 9 +1043 10 +227 4 +2570 4 +208 9 +504 4 +2605 6 +1583 1 +2863 7 +2535 2 +1898 5 +2526 4 +1958 3 +750 10 +1144 4 +3770 10 +2773 1 +579 1 +298 9 +2876 5 +124 8 +3938 6 +2761 6 +1497 9 +2385 3 +28 5 +1902 1 +2215 1 +2232 4 +691 4 +3335 3 +1653 9 +2574 5 +905 9 +2089 1 +4054 2 +322 4 +1428 9 +3986 7 +3064 1 +1395 10 +199 1 +1969 8 +647 6 +2922 2 +3846 6 +3710 1 +2717 7 +872 6 +3434 9 +2872 5 +3901 5 +3798 1 +3308 2 +1375 5 +2324 5 +3747 1 +1766 10 +4054 1 +3359 8 +3596 1 +598 5 +1763 5 +834 2 +2993 6 +2178 8 +1166 5 +1497 7 +3001 2 +3940 1 +3314 7 +2921 9 +3621 2 +322 10 +3712 10 +1826 9 +2031 3 +300 2 +1676 9 +2713 10 +3797 4 +3538 5 +1714 8 +1573 6 +461 4 +2638 8 +3952 8 +2699 3 +782 4 +2420 9 +1389 6 +3213 9 +2469 8 +268 5 +1800 3 +3283 9 +2168 6 +2790 1 +2303 5 +1537 9 +2811 9 +2176 1 +4047 4 +4057 8 +2859 4 +715 7 +3273 8 +522 7 +2281 3 +3620 4 +1318 8 +2615 8 +247 7 +3388 4 +2357 9 +1736 4 +2903 10 +3366 2 +530 5 +4067 7 +1515 5 +1257 3 +284 9 +2575 8 +810 6 +1111 1 +912 3 +2310 5 +1689 6 +605 1 +1094 3 +1493 8 +1956 1 +2774 2 +1818 4 +717 8 +3409 7 +3451 6 +2795 10 +2000 9 +867 10 +1618 10 +3671 8 +2327 7 +3069 3 +3664 7 +3641 8 +1703 4 +1593 10 +2346 4 +2062 2 +2366 7 +2835 9 +3325 5 +1489 6 +3933 7 +622 6 +195 4 +2799 2 +691 2 +426 1 +1178 8 +2160 1 +3000 9 +3391 6 +1186 9 +3507 10 +2895 10 +1630 9 +3024 3 +2015 9 +2312 5 +252 4 +1032 10 +386 8 +1337 9 +4041 8 +67 8 +4058 2 +2072 8 +1684 8 +1896 6 +1753 4 +398 7 +749 1 +729 7 +2602 10 +2766 8 +2777 5 +717 7 +1261 2 +1327 4 +806 9 +2775 6 +1071 7 +669 4 +547 7 +2400 6 +3094 3 +3333 5 +1094 9 +2456 5 +2750 2 +3026 8 +2710 9 +3808 8 +1996 10 +3515 3 +3116 4 +600 6 +1129 10 +2806 7 +1133 4 +3239 4 +3498 8 +3927 3 +3119 6 +645 10 +3976 7 +3000 7 +1941 8 +2398 2 +804 3 +2801 9 +131 5 +3908 1 +3488 6 +2652 9 +514 6 +3429 8 +1486 2 +2305 2 +3119 6 +3841 8 +400 6 +3821 10 +1439 9 +3818 5 +3814 6 +3004 2 +2864 7 +2671 5 +2987 4 +3497 1 +2841 10 +3223 10 +2353 7 +2602 2 +2515 10 +2764 6 +3647 6 +301 8 +3496 1 +2796 1 +507 7 +3450 4 +1967 3 +1302 1 +1883 7 +1472 3 +764 7 +1242 10 +3043 2 +2329 3 +313 6 +2454 1 +595 10 +2469 7 +2829 1 +672 4 +2318 10 +3829 3 +306 9 +2391 6 +186 8 +922 9 +498 10 +2596 4 +4041 7 +3766 3 +2092 1 +1106 5 +1029 1 +760 9 +629 7 +2972 7 +49 10 +1723 1 +1100 10 +1552 8 +2948 7 +3257 6 +1219 9 +1558 1 +2476 5 +1419 8 +3284 8 +3402 6 +872 2 +905 9 +1830 6 +3549 6 +430 8 +2495 5 +1579 5 +2147 6 +3292 4 +1639 9 +1331 2 +2285 3 +1700 6 +3407 4 +1553 9 +667 4 +3829 7 +1023 8 +999 3 +2571 8 +1483 6 +4059 9 +2 2 +3736 4 +3863 6 +1784 10 +3006 6 +1101 9 +1805 7 +141 2 +4044 6 +646 6 +1909 1 +463 8 +4083 8 +3321 2 +1316 4 +2416 4 +768 10 +2575 9 +0 2 +946 3 +2547 9 +716 8 +876 2 +567 4 +429 3 +3650 3 +1392 2 +222 10 +3304 3 +1999 8 +3132 5 +2022 5 +762 9 +520 3 +218 10 +3536 7 +1025 7 +3440 10 +1655 8 +2431 4 +1081 8 +2069 3 +617 3 +2451 6 +3468 4 +2915 8 +509 6 +3601 1 +3734 1 +1848 10 +3266 5 +1321 4 +3339 1 +3907 3 +605 4 +2670 5 +3700 5 +1465 5 +230 9 +1647 6 +1121 8 +1702 10 +1313 3 +3437 7 +687 2 +394 4 +3413 7 +3785 1 +3701 7 +2420 1 +1439 2 +3617 1 +2377 7 +828 10 +1584 5 +2105 10 +613 4 +1703 9 +1085 2 +3265 7 +2187 10 +65 1 +478 9 +1802 2 +548 9 +173 9 +1609 10 +2362 1 +2078 8 +3227 8 +1351 3 +1476 4 +4030 3 +77 8 +1429 3 +2230 1 +2267 4 +3761 7 +2482 3 +3695 2 +2715 10 +1950 8 +3214 3 +191 2 +1426 1 +4025 9 +2288 1 +651 1 +3778 8 +3558 2 +3037 4 +2204 6 +1067 3 +3070 9 +1484 8 +3005 3 +1059 1 +3446 3 +4014 4 +3870 8 +547 8 +2775 6 +3845 8 +1804 4 +2908 1 +218 5 +3093 3 +89 7 +3684 6 +3658 9 +833 7 +1967 6 +161 4 +670 4 +2866 3 +117 8 +3446 3 +2549 1 +1795 3 +2873 8 +1846 1 +751 8 +701 4 +1463 6 +3840 2 +877 4 +1676 6 +1189 4 +2423 10 +2994 3 +227 9 +1188 5 +3373 3 +513 8 +1689 10 +1156 6 +2272 9 +785 10 +2816 1 +25 3 +3238 8 +2060 2 +2353 2 +1282 2 +2330 5 +2565 4 +124 4 +1431 2 +1046 2 +20 3 +1129 3 +3634 7 +1691 9 +2914 1 +1649 4 +2172 2 +237 7 +683 3 +491 4 +334 8 +2083 10 +3861 6 +2302 2 +3605 8 +4050 2 +2811 1 +445 5 +1032 8 +2550 3 +3586 7 +291 7 +333 3 +2188 10 +593 7 +3659 7 +1753 4 +1055 2 +2025 4 +42 1 +3533 3 +778 10 +3235 8 +3881 5 +167 2 +2373 7 +4031 6 +1238 5 +1384 10 +146 5 +2762 5 +95 6 +2201 3 +2946 7 +1187 7 +3056 5 +2049 6 +1761 4 +511 8 +1501 3 +2194 4 +514 2 +1275 5 +2585 9 +1824 4 +2886 6 +1378 1 +1310 3 +3751 8 +1893 6 +2449 5 +1366 2 +1640 8 +1890 2 +3838 9 +3109 8 +311 9 +2731 4 +3516 4 +4013 3 +2313 10 +2471 7 +3221 3 +3547 7 +1578 5 +2093 8 +3201 7 +3212 2 +406 5 +442 5 +2052 2 +3781 7 +3699 5 +571 6 +2319 7 +252 1 +2511 8 +2334 8 +3676 10 +3033 5 +462 7 +3261 4 +116 6 +3862 4 +1353 3 +138 4 +2869 9 +3701 5 +1123 1 +2054 4 +1928 6 +2355 5 +614 1 +2389 7 +2568 7 +3382 10 +967 4 +1844 6 +2337 4 +370 5 +749 3 +3739 3 +2660 9 +330 5 +3931 9 +2422 6 +47 5 +1672 1 +532 5 +2381 1 +153 8 +1234 10 +611 8 +1299 1 +3473 1 +3457 3 +1313 3 +557 8 +1826 8 +1328 9 +2872 10 +724 6 +3361 4 +1470 5 +2960 5 +2399 2 +3695 3 +2674 6 +2528 1 +1879 5 +3290 5 +722 4 +458 8 +3622 6 +2228 6 +2952 9 +259 9 +4081 10 +806 9 +3096 5 +1874 1 +2058 3 +2194 2 +1318 1 +3759 10 +3080 10 +1509 3 +1823 2 +2253 1 +4087 4 +3684 5 +1961 7 +965 9 +605 5 +3052 10 +274 2 +3743 9 +3707 5 +2463 6 +2156 10 +3623 5 +1155 10 +3838 2 +4078 1 +2192 2 +3102 3 +1773 1 +3948 9 +2377 8 +2888 5 +2136 3 +2060 5 +896 8 +3079 9 +1040 2 +1130 7 +3937 6 +3076 6 +3555 3 +1160 10 +502 10 +1344 8 +37 3 +1474 6 +2152 6 +3943 6 +2839 6 +3575 5 +841 1 +1645 4 +403 3 +3421 2 +2622 3 +2038 7 +1854 8 +1215 9 +2510 3 +3126 5 +2836 2 +205 10 +2493 5 +2828 7 +2832 1 +1147 7 +2746 2 +3423 1 +996 5 +3615 8 +2340 4 +3044 2 +2626 6 +1859 3 +2203 9 +2429 1 +3878 1 +1973 3 +3902 1 +1947 6 +1431 3 +954 9 +2126 9 +1750 5 +3783 7 +609 4 +3544 9 +3000 2 +3231 9 +230 5 +1005 4 +2676 3 +1779 8 +126 7 +3815 9 +1502 8 +3379 7 +239 10 +1746 8 +3556 1 +585 8 +128 4 +2657 8 +3755 6 +792 1 +3560 10 +1089 6 +1759 1 +2366 10 +3763 9 +3904 4 +3946 4 +2756 6 +1744 8 +1094 4 +2773 9 +2866 10 +473 2 +3495 1 +2644 6 +2988 4 +580 6 +3062 9 +1291 8 +3403 1 +2381 8 +3605 4 +2384 4 +2624 6 +2276 5 +3504 6 +1794 3 +984 10 +2298 4 +1741 5 +3294 1 +1427 6 +550 5 +1140 3 +3464 5 +3081 8 +2807 3 +2306 1 +1334 1 +2968 4 +300 7 +3997 5 +3240 9 +1294 8 +3015 7 +3973 6 +3172 7 +2599 10 +4076 10 +925 8 +4002 8 +1115 2 +2096 6 +2261 3 +1707 4 +496 6 +2034 5 +728 1 +1528 4 +1093 1 +1655 4 +2484 7 +2747 7 +1296 9 +3705 8 +2130 5 +2688 6 +3843 1 +3428 6 +563 9 +1196 2 +2313 8 +389 10 +2293 2 +2089 9 +1327 1 +2247 1 +1018 7 +422 3 +2384 2 +529 3 +805 9 +1418 4 +2608 5 +2303 1 +3074 1 +2861 6 +2880 3 +1415 3 +1745 1 +3101 2 +3574 1 +2530 7 +3120 1 +2466 2 +3287 6 +1071 7 +642 1 +50 1 +2096 3 +1810 4 +3897 6 +1711 4 +2236 10 +3087 1 +1523 3 +428 6 +3090 2 +752 5 +1303 6 +791 2 +3772 5 +3060 3 +276 2 +3836 6 +1636 5 +3260 9 +298 9 +761 7 +3539 10 +3033 2 +2710 5 +548 10 +2236 10 +752 9 +3956 3 +3436 4 +1190 1 +2438 8 +1635 10 +2186 5 +2279 7 +2011 2 +3246 9 +166 8 +3613 5 +2767 3 +3310 10 +3182 5 +761 6 +81 3 +1125 9 +2079 9 +2713 6 +2949 8 +1109 6 +1802 6 +3473 5 +3316 7 +1995 1 +2101 1 +3781 8 +375 6 +3845 4 +905 6 +2920 1 +2864 10 +2161 3 +2636 5 +3050 5 +1001 5 +577 1 +455 9 +279 5 +964 4 +3290 1 +3165 6 +3941 7 +663 9 +878 4 +3683 2 +1732 1 +2821 3 +626 4 +955 3 +3228 9 +1125 2 +176 8 +3467 1 +2231 1 +493 1 +1354 9 +3457 7 +489 5 +2915 6 +169 7 +2606 2 +3155 7 +1887 7 +805 8 +1201 1 +2784 7 +1515 7 +3404 2 +3131 5 +688 4 +2514 1 +1177 5 +1221 2 +1488 4 +3282 9 +3540 9 +615 6 +1572 9 +2183 8 +1206 5 +2648 5 +129 4 +73 7 +834 6 +1421 1 +3000 3 +1743 8 +3202 7 +3561 10 +254 3 +2436 2 +633 4 +2914 4 +3341 3 +2957 9 +2326 8 +3617 2 +3928 7 +2087 6 +1948 4 +3483 7 +3571 2 +445 10 +3758 6 +2060 8 +1411 3 +3633 10 +2902 3 +2883 1 +2072 9 +122 3 +3060 1 +3294 1 +1679 10 +2728 1 +2040 6 +662 10 +180 10 +1269 7 +1840 8 +2469 10 +3559 5 +2778 6 +2144 10 +2363 3 +2205 4 +2284 7 +400 8 +1167 3 +2692 8 +3226 8 +1845 4 +2370 7 +202 1 +2413 6 +32 10 +878 5 +946 5 +3493 6 +1605 4 +1332 6 +941 6 +3075 6 +2886 2 +917 8 +3930 4 +3052 9 +2986 7 +3234 3 +1216 10 +2660 2 +1263 4 +4093 10 +4015 9 +1480 7 +1227 8 +518 7 +1476 6 +2073 9 +77 6 +1061 10 +3768 1 +1034 1 +3905 3 +1328 7 +2601 8 +970 9 +2644 1 +2034 10 +720 8 +1749 3 +1298 5 +2304 10 +377 5 +3482 1 +2233 7 +3569 10 +605 8 +2151 10 +3546 4 +1699 3 +3277 1 +2573 2 +1318 3 +1096 3 +669 3 +1930 10 +620 10 +3123 4 +870 10 +1238 3 +2084 3 +2368 3 +966 9 +199 6 +3942 6 +2792 1 +569 8 +165 1 +1571 7 +2859 6 +1567 2 +3782 4 +932 9 +2540 3 +3627 1 +745 7 +2420 4 +3761 7 +3870 9 +1642 3 +1394 10 +3151 5 +1286 6 +3902 9 +1126 6 +2171 3 +2645 2 +651 3 +1339 5 +3791 7 +3945 9 +1769 6 +1692 2 +1338 10 +732 10 +2410 7 +713 6 +136 10 +2966 3 +458 2 +1204 8 +1698 4 +2628 9 +1680 7 +1361 2 +579 6 +1948 4 +3507 10 +4019 10 +3171 7 +536 10 +407 7 +1526 2 +1468 8 +3874 8 +3144 8 +499 4 +1453 3 +524 3 +2746 1 +184 6 +1811 1 +52 9 +3121 1 +1357 10 +1017 9 +2192 2 +2987 6 +1137 3 +242 7 +2761 9 +2075 10 +3275 5 +1061 8 +2137 8 +660 10 +1996 5 +163 2 +1761 4 +2318 2 +3570 5 +2478 3 +966 4 +3212 10 +2345 9 +3321 5 +1807 1 +3326 4 +2135 2 +3927 8 +2992 4 +556 4 +1623 4 +1523 7 +920 3 +526 6 +3249 1 +3437 1 +3043 8 +2877 7 +3945 4 +294 8 +289 6 +2722 7 +3440 3 +3979 1 +3144 1 +1985 3 +3975 9 +3826 8 +136 3 +3342 2 +3679 6 +3088 5 +446 2 +2292 4 +3041 10 +2656 3 +3513 6 +1280 1 +2610 9 +2661 4 +422 6 +54 2 +2021 5 +3864 2 +254 10 +1542 1 +1647 4 +3368 1 +3790 5 +3016 7 +3277 4 +1189 3 +969 4 +656 5 +3823 6 +4081 4 +393 7 +3358 1 +2825 9 +3544 1 +680 6 +1429 10 +1347 2 +3018 6 +2662 2 +3516 3 +4074 3 +3215 7 +3970 7 +1252 1 +1594 6 +1729 2 +3765 7 +637 8 +751 7 +2482 4 +733 2 +3850 10 +2449 4 +1382 5 +185 10 +83 4 +3644 8 +2661 1 +1712 4 +533 3 +35 2 +3955 4 +3133 4 +3064 10 +3728 10 +1492 2 +1234 10 +2203 2 +705 3 +321 2 +386 5 +1639 9 +1725 8 +823 9 +934 1 +1222 4 +862 8 +2665 4 +2998 4 +2214 5 +2306 3 +3735 7 +3509 5 +139 7 +398 4 +411 3 +3341 4 +1300 1 +38 9 +1877 5 +1392 5 +1156 4 +3161 1 +3027 3 +2939 10 +721 7 +3238 9 +2148 9 +1675 6 +1853 5 +1912 7 +251 6 +3098 4 +1352 3 +630 5 +3370 1 +65 10 +2325 8 +3688 8 +606 6 +1510 2 +3982 1 +3867 10 +888 10 +2874 7 +2560 7 +2199 2 +1996 5 +2965 4 +879 8 +3151 2 +1253 2 +1275 3 +1155 1 +2036 10 +3880 3 +3907 6 +283 6 +3319 7 +3543 7 +3446 9 +810 1 +2069 9 +2928 4 +191 8 +1380 10 +582 10 +425 6 +235 4 +1995 6 +677 1 +3967 9 +879 5 +3179 3 +3038 7 +1785 8 +1906 10 +4095 3 +3679 9 +2749 7 +1069 3 +188 3 +3307 2 +629 9 +2304 5 +2244 5 +1247 4 +2603 1 +3044 9 +2567 8 +3285 2 +3387 10 +2907 1 +471 10 +2077 9 +3257 10 +536 6 +1722 6 +599 4 +3487 10 +1150 7 +694 8 +1787 4 +3202 6 +3354 5 +2059 4 +1700 1 +2012 7 +1176 6 +2306 5 +2052 5 +2118 1 +1998 3 +457 2 +201 4 +264 3 +1911 6 +3168 4 +720 8 +3410 4 +2493 5 +1687 10 +660 2 +3167 3 +339 6 +1547 10 +716 3 +1095 9 +784 7 +444 1 +446 7 +2945 4 +1198 4 +2037 8 +326 7 +3370 3 +1448 10 +1007 5 +3943 6 +423 3 +101 3 +2099 3 +1 10 +2841 2 +2516 1 +4060 6 +2563 5 +1963 8 +3989 3 +1397 9 +2786 8 +3013 4 +428 1 +1830 5 +2502 3 +1496 7 +770 9 +1737 8 +2612 9 +2542 3 +3154 9 +3661 5 +1271 10 +558 4 +866 7 +365 4 +3517 3 +830 8 +455 3 +3380 8 +886 4 +1429 8 +200 3 +3908 8 +648 3 +91 3 +791 5 +3998 8 +3420 7 +3604 9 +1988 4 +1927 7 +1738 3 +3145 4 +4017 1 +3732 1 +1345 9 +1469 10 +2896 4 +358 10 +1905 5 +2025 6 +52 5 +2466 1 +1332 4 +706 4 +3153 2 +2509 10 +3789 4 +2525 6 +2994 9 +3386 9 +2353 2 +1970 8 +3150 10 +697 10 +3628 4 +3735 1 +2902 1 +2916 8 +1131 6 +2449 7 +2256 2 +1037 8 +873 10 +2524 9 +3729 7 +3510 9 +912 8 +1351 5 +3213 7 +116 6 +2781 9 +3781 7 +987 9 +224 2 +2170 7 +2957 10 +3753 8 +2546 1 +2295 6 +2162 5 +228 8 +2825 8 +471 3 +1198 2 +3532 1 +301 1 +1597 5 +569 4 +1366 5 +920 8 +1937 3 +3212 1 +2528 8 +2803 3 +961 1 +2705 3 +3672 7 +2234 9 +174 2 +854 9 +2816 5 +2280 2 +3101 2 +549 4 +2064 1 +117 9 +507 1 +1728 1 +1150 5 +3312 8 +746 8 +163 4 +1436 3 +2183 5 +3464 2 +3702 2 +1428 9 +262 1 +3486 6 +3200 10 +2428 7 +507 5 +1275 8 +3160 8 +3044 6 +3909 1 +1829 5 +2082 6 +600 4 +2011 8 +3697 8 +1983 4 +1083 10 +2200 10 +662 2 +3974 4 +2660 5 +3007 2 +2732 10 +658 8 +1607 5 +1726 5 +2072 7 +1318 9 +2327 6 +683 4 +1417 5 +4019 2 +2298 10 +2468 5 +2484 1 +640 5 +909 1 +3383 1 +733 8 +171 1 +1525 1 +3995 3 +3358 1 +1303 3 +1440 8 +2982 5 +250 3 +3681 3 +3585 7 +1668 7 +4028 10 +3734 9 +1486 10 +809 1 +2895 8 +3498 1 +1937 9 +3426 5 +4067 8 +3358 1 +2379 1 +660 3 +2233 5 +209 2 +2433 7 +2579 10 +3888 5 +3581 10 +2047 10 +3382 4 +312 10 +564 6 +750 10 +2459 7 +3991 10 +3691 6 +1776 7 +553 5 +794 2 +1928 2 +4032 5 +169 8 +2668 2 +3603 6 +3673 7 +3554 1 +3810 4 +1202 10 +1714 2 +3415 9 +4059 7 +3495 1 +3524 7 +1430 4 +1176 4 +4055 1 +1189 1 +3876 8 +3357 1 +1489 4 +1174 1 +470 3 +396 3 +3206 2 +1713 6 +3938 2 +223 7 +825 7 +3377 4 +4002 5 +2301 7 +3428 4 +3796 3 +553 7 +733 2 +1313 8 +3271 2 +616 6 +2533 7 +3916 6 +1280 8 +1655 6 +1439 3 +336 6 +4030 4 +3584 6 +1626 5 +1568 10 +2000 1 +1621 4 +326 9 +262 9 +1494 4 +3936 9 +345 5 +2071 8 +2090 4 +246 5 +2059 2 +2962 7 +2860 10 +3029 7 +1136 1 +2354 7 +2352 7 +2727 1 +385 10 +3312 9 +4075 5 +3319 7 +2917 8 +1577 9 +3490 4 +1629 1 +1123 1 +380 6 +1411 4 +1559 1 +3765 7 +408 2 +1422 8 +200 4 +1164 4 +3994 7 +1547 7 +3982 1 +188 1 +1065 7 +893 3 +400 6 +824 4 +1566 2 +1471 10 +3063 10 +1623 10 +3839 10 +2209 4 +1860 5 +3279 10 +4000 4 +3763 7 +1994 10 +1841 10 +3347 5 +58 7 +3053 10 +2020 3 +1465 10 +475 2 +3230 2 +1539 5 +1206 8 +3910 7 +3428 3 +915 4 +2602 2 +1036 7 +2873 3 +3426 2 +3789 9 +3867 10 +2420 7 +268 6 +16 10 +4072 2 +2510 4 +1975 9 +4075 6 +1680 1 +2231 6 +3514 6 +305 7 +629 5 +1157 4 +4079 8 +3085 6 +3667 1 +2830 3 +1419 5 +1535 1 +3703 7 +3475 9 +2563 5 +1847 6 +749 8 +2222 2 +3356 1 +1830 7 +1053 9 +3040 3 +907 5 +342 7 +2002 7 +2554 8 +796 5 +2960 8 +288 4 +4091 5 +537 1 +3772 4 +2944 8 +2436 5 +193 5 +4017 7 +3813 9 +1315 6 +354 9 +2268 2 +1458 3 +1338 7 +703 7 +1389 9 +3459 5 +2492 2 +1306 2 +3739 7 +3081 7 +655 2 +343 2 +2127 6 +368 7 +1965 3 +2220 4 +2810 1 +1996 10 +2980 4 +1073 9 +489 5 +2625 10 +3867 4 +3131 5 +2048 5 +802 8 +320 10 +2852 1 +3911 7 +3585 4 +1991 8 +4002 8 +2146 1 +2301 2 +595 8 +3298 9 +1043 6 +74 8 +3826 9 +3145 5 +2067 8 +2972 1 +3083 3 +2167 8 +277 7 +1423 2 +30 4 +835 8 +2595 6 +928 2 +3105 7 +2777 7 +3550 7 +749 2 +2206 6 +3923 5 +1227 9 +2410 6 +1069 3 +1539 8 +691 5 +1029 10 +759 7 +3185 1 +2948 7 +2047 1 +3145 3 +2602 10 +678 4 +1535 3 +3244 2 +2659 4 +1859 7 +1721 6 +976 2 +3808 3 +2188 10 +1352 4 +1887 2 +1073 4 +1462 3 +3347 9 +342 7 +1147 5 +3310 2 +2879 10 +1247 9 +1796 10 +1271 6 +1227 8 +2907 9 +3342 7 +3470 5 +3974 10 +3227 7 +24 8 +1290 5 +3966 3 +1480 6 +818 9 +110 7 +368 3 +2331 3 +2793 7 +1056 8 +87 9 +2185 8 +2437 5 +3128 10 +2431 1 +1472 7 +736 10 +625 2 +2524 8 +2896 6 +523 10 +3900 2 +39 8 +29 7 +3419 10 +1473 2 +3676 3 +1270 9 +1607 5 +2863 10 +2489 1 +185 9 +1366 10 +2688 8 +2721 2 +1557 4 +901 1 +3999 8 +463 2 +338 1 +975 7 +2213 9 +3579 4 +1871 3 +2407 6 +2121 5 +1883 9 +2673 2 +932 10 +1189 8 +55 9 +3505 2 +1278 10 +3984 1 +138 8 +3847 4 +44 9 +1128 8 +524 8 +3695 8 +858 8 +3998 9 +692 4 +3851 5 +1613 10 +3202 4 +2119 4 +1521 1 +2611 7 +3324 6 +426 1 +1362 1 +1218 7 +1994 6 +3575 6 +1661 8 +64 2 +3758 10 +2322 9 +3765 3 +596 1 +342 4 +2811 9 +166 6 +3821 8 +2317 7 +1582 3 +3898 2 +388 8 +403 4 +2876 4 +3466 9 +1479 2 +2638 10 +778 1 +2175 5 +26 1 +658 3 +590 2 +1065 6 +4014 1 +3093 8 +3340 1 +3835 6 +1366 5 +2207 2 +3634 10 +284 1 +1490 5 +2578 5 +574 10 +3098 5 +2438 6 +739 4 +350 8 +3544 9 +657 7 +2999 1 +2611 10 +2105 8 +3416 7 +952 4 +3886 3 +3437 2 +1740 6 +3627 4 +2275 7 +2992 8 +974 9 +3900 8 +4 10 +3258 5 +1439 9 +3007 6 +1782 4 +1625 8 +414 1 +1805 4 +2885 9 +363 10 +2635 8 +715 6 +87 3 +2050 1 +513 1 +2020 10 +2294 4 +3713 3 +3611 8 +640 7 +2474 6 +782 2 +432 3 +2424 9 +2661 6 +919 8 +2453 7 +1694 8 +560 10 +1311 5 +3812 8 +1185 6 +3277 8 +2681 3 +3695 8 +2804 10 +836 2 +2331 7 +799 5 +2602 3 +119 9 +467 5 +3483 4 +706 4 +2544 8 +2491 10 +2124 6 +3472 5 +2085 10 +3649 1 +1534 2 +1163 7 +2186 9 +1385 7 +914 4 +2603 8 +950 4 +3991 4 +1647 1 +2278 8 +385 5 +2320 3 +3261 8 +2689 7 +915 4 +1615 2 +2722 4 +1011 4 +882 1 +2544 7 +3906 3 +102 1 +3270 2 +2172 8 +461 10 +1626 3 +16 4 +686 6 +838 1 +2327 4 +299 2 +1070 3 +3076 7 +2740 3 +1730 7 +3560 8 +3786 2 +977 4 +520 2 +2333 8 +835 7 +3915 3 +876 3 +273 4 +2967 7 +1563 8 +1852 8 +3721 3 +3859 8 +1528 1 +1475 8 +3293 9 +3165 8 +3501 9 +2396 3 +3608 9 +2272 6 +2165 8 +3257 9 +2610 4 +1163 1 +3509 3 +1916 3 +3182 3 +2371 4 +2451 1 +3350 1 +2898 3 +2300 5 +1668 3 +2103 10 +1699 6 +601 5 +1613 2 +1192 5 +2242 5 +1992 4 +1000 2 +941 10 +1213 10 +3913 1 +3555 2 +1632 8 +2423 2 +227 8 +764 3 +2619 7 +3879 5 +179 1 +3913 9 +2466 4 +535 4 +2936 7 +1864 8 +2765 7 +3059 4 +1189 4 +2223 3 +2341 5 +2939 2 +3941 6 +3223 9 +1994 9 +3308 1 +3122 9 +1325 5 +1739 3 +1566 10 +50 6 +695 10 +2593 9 +13 3 +1030 4 +2702 10 +1909 9 +779 6 +3447 3 +3263 1 +1277 9 +1509 1 +3466 7 +2193 7 +1238 10 +482 1 +1026 2 +3504 5 +43 7 +1116 6 +3103 10 +3342 9 +3338 2 +727 9 +623 10 +831 9 +97 3 +926 3 +3812 1 +3470 8 +266 7 +3445 8 +2394 3 +979 7 +1050 4 +2067 2 +3617 3 +412 2 +1346 7 +3277 10 +548 1 +80 5 +3596 9 +3072 1 +2583 5 +1878 4 +307 3 +225 8 +920 8 +3260 5 +3237 4 +1813 3 +337 7 +85 3 +2357 8 +2327 4 +369 10 +924 10 +4089 1 +2310 9 +3379 2 +591 1 +2988 5 +1490 6 +4028 5 +538 7 +168 4 +2168 4 +350 9 +3798 2 +535 1 +1859 4 +2186 8 +4011 5 +3635 6 +1262 1 +2529 4 +1050 6 +2014 9 +2269 6 +3534 10 +2635 8 +1490 4 +979 4 +2981 4 +3493 9 +3085 2 +107 3 +3336 8 +270 3 +1920 8 +1398 1 +1968 4 +1477 1 +244 6 +3898 1 +1176 2 +1237 7 +3657 4 +3846 1 +3963 1 +1973 9 +223 8 +2640 8 +2148 8 +3957 8 +1940 6 +391 6 +2694 5 +2599 10 +2327 6 +1905 10 +762 5 +1770 1 +1145 4 +833 9 +420 1 +970 3 +551 4 +919 2 +1839 6 +2596 4 +991 10 +1659 10 +1917 10 +1809 5 +1835 5 +197 7 +1199 5 +120 6 +1531 1 +1847 3 +3539 7 +1262 5 +1683 8 +5 1 +3279 6 +1075 1 +199 5 +3986 7 +1648 9 +3929 9 +1898 1 +3873 5 +3550 5 +2803 7 +2429 8 +1000 5 +2265 9 +460 2 +2657 1 +687 3 +61 2 +1399 9 +1496 8 +952 3 +3675 7 +3212 1 +1912 7 +3953 2 +1041 8 +1579 10 +2090 5 +2472 10 +2296 6 +1064 8 +534 6 +669 1 +445 3 +2713 5 +1119 8 +1021 6 +3815 2 +2857 2 +2602 3 +3713 9 +2803 2 +3275 8 +959 5 +1625 9 +2189 4 +248 6 +2983 7 +3182 4 +696 2 +3458 6 +2456 10 +314 5 +3712 7 +2531 3 +3989 3 +1422 5 +1620 7 +170 4 +3562 5 +2963 7 +2518 7 +3555 2 +729 7 +3397 7 +245 7 +200 2 +169 10 +2027 8 +313 2 +386 6 +1107 3 +133 3 +323 3 +1767 2 +1878 8 +2341 2 +2469 2 +3722 4 +497 6 +1572 8 +3332 5 +3172 6 +1846 7 +3105 5 +2239 10 +3140 5 +2168 9 +1318 3 +3639 10 +1989 9 +1165 3 +2288 3 +1654 9 +1272 5 +1434 2 +1465 3 +378 5 +2543 8 +3443 7 +2578 6 +1590 5 +3397 6 +457 10 +2220 8 +3763 7 +3461 5 +87 9 +2351 3 +2952 6 +4072 1 +1095 1 +1502 6 +1006 9 +2466 1 +3924 10 +3303 5 +3884 1 +332 10 +1288 4 +331 3 +1055 1 +3754 5 +2886 8 +2959 8 +4087 6 +2734 2 +1949 10 +1009 4 +4041 4 +1906 3 +1317 7 +363 1 +1212 9 +3142 3 +1817 5 +2246 10 +3563 5 +2756 5 +63 9 +3101 4 +3782 7 +2576 7 +3221 10 +1074 7 +1683 5 +3955 2 +3645 8 +1078 2 +4021 4 +968 6 +4093 4 +1355 2 +2889 8 +1407 4 +2986 7 +864 4 +1861 6 +2654 2 +3886 1 +1707 4 +2580 10 +751 9 +750 10 +445 8 +1055 6 +2636 1 +193 6 +2010 8 +2950 3 +3717 1 +2744 6 +450 2 +3456 10 +3531 9 +3257 10 +2757 10 +1168 6 +4041 5 +1529 9 +3601 5 +2412 7 +2878 10 +3562 3 +185 5 +2563 8 +1384 7 +513 6 +1563 3 +681 2 +1639 3 +2177 1 +2432 6 +1291 1 +3617 6 +2337 7 +2274 7 +288 6 +3436 5 +3898 2 +56 7 +215 10 +2701 7 +3097 9 +855 1 +1753 5 +1794 10 +2737 4 +3033 7 +2635 3 +1103 7 +4051 5 +2734 3 +2594 8 +3391 4 +1836 10 +3074 1 +418 10 +3174 6 +5 5 +1850 8 +1737 7 +2913 2 +3168 10 +3044 9 +935 5 +3529 1 +3447 10 +658 4 +2834 5 +3690 9 +988 6 +1784 6 +2519 3 +690 7 +2426 4 +3790 9 +2893 10 +3717 3 +3165 6 +1435 9 +3512 10 +3094 6 +2585 6 +586 1 +1464 1 +2347 8 +2402 3 +4045 6 +88 2 +3054 2 +1431 6 +3923 1 +4063 4 +1475 5 +4034 9 +2639 9 +3836 8 +2603 1 +3079 9 +1162 5 +902 8 +3504 9 +3122 10 +1886 10 +1466 2 +512 7 +2840 8 +1431 4 +2923 9 +1925 1 +219 4 +1482 3 +1919 5 +662 10 +308 10 +2537 2 +3087 1 +1711 6 +2778 6 +530 1 +2722 4 +1949 1 +1259 4 +3334 7 +3745 3 +2895 8 +3042 7 +2625 10 +1071 3 +3360 1 +1526 7 +1847 5 +1362 2 +4024 1 +1717 2 +105 5 +3761 3 +3243 8 +346 2 +2754 8 +3591 1 +3572 9 +414 1 +969 1 +2714 9 +3558 3 +2297 5 +1720 4 +3720 8 +3150 4 +4073 8 +3303 2 +2692 1 +3429 5 +701 7 +170 6 +2121 2 +502 7 +2172 1 +3261 4 +1617 6 +2151 6 +778 10 +2683 1 +2626 8 +2822 3 +1594 8 +1728 3 +3762 10 +1846 4 +1900 2 +3599 10 +528 5 +1458 2 +44 3 +1305 8 +1733 5 +88 9 +1782 8 +3755 1 +1702 2 +4083 10 +3911 9 +3894 7 +3036 2 +1522 4 +3683 10 +1559 8 +687 1 +1649 2 +283 9 +3725 1 +1026 9 +234 9 +549 9 +1874 1 +3716 6 +2385 8 +1511 7 +340 10 +329 4 +3227 4 +3500 4 +3021 8 +3928 9 +3675 10 +2745 10 +4024 9 +104 10 +4067 7 +2514 4 +1982 2 +1922 8 +2539 9 +3064 2 +1065 6 +2145 4 +2365 8 +679 3 +631 10 +3391 3 +2604 3 +3610 4 +3968 5 +600 4 +922 1 +802 1 +1838 9 +3124 4 +2142 8 +1262 10 +1685 3 +2353 10 +2134 5 +525 3 +1139 4 +2110 8 +1900 8 +1330 8 +1132 5 +1346 3 +2477 3 +297 4 +2994 9 +709 1 +705 10 +3144 2 +659 6 +3842 1 +355 9 +1783 9 +1655 8 +833 6 +1879 7 +1793 9 +840 2 +2880 7 +1100 8 +1240 5 +28 3 +524 4 +3320 2 +3918 10 +3232 10 +3721 4 +2752 1 +3469 9 +119 6 +40 3 +1196 2 +153 7 +1412 1 +1023 4 +2199 6 +4020 1 +3339 1 +267 3 +534 5 +1809 10 +443 1 +3047 4 +1530 5 +999 9 +187 3 +682 6 +2101 1 +231 8 +1843 9 +4 7 +1252 7 +2628 6 +2873 7 +3224 9 +3350 2 +2356 3 +3838 10 +2271 8 +154 6 +4091 1 +1366 3 +1692 8 +255 6 +3856 3 +1769 9 +937 4 +2600 4 +1079 10 +2209 4 +1333 4 +838 6 +1543 3 +1424 4 +3972 3 +1069 10 +3741 6 +2895 4 +3091 6 +416 8 +2310 1 +3449 5 +980 1 +1137 4 +3295 1 +2537 10 +358 10 +1877 6 +3183 9 +729 9 +1705 10 +1596 8 +3885 9 +3740 2 +3226 9 +1116 5 +3267 8 +1188 10 +2489 8 +3964 10 +2518 5 +1513 7 +1431 6 +1797 3 +1423 9 +921 10 +3562 2 +1955 10 +1122 7 +3990 3 +3960 5 +1562 4 +1258 4 +490 4 +2236 2 +3664 4 +1782 6 +2973 3 +2473 6 +273 9 +784 9 +2434 2 +494 8 +1196 7 +1416 10 +1631 1 +518 9 +1756 9 +3957 7 +1900 4 +2754 9 +3777 1 +53 10 +2003 5 +4001 8 +268 1 +3237 7 +808 4 +595 5 +1617 1 +1093 1 +2162 10 +2289 8 +134 8 +1671 1 +758 2 +698 4 +1203 9 +1715 8 +2787 5 +3170 4 +3987 8 +4067 10 +1519 7 +2314 4 +1213 2 +3345 2 +3304 1 +3792 9 +3340 5 +2579 6 +307 9 +1753 6 +3547 10 +1761 3 +2886 3 +3110 10 +1389 10 +961 4 +2207 3 +2827 4 +362 4 +816 1 +127 6 +2450 10 +3879 5 +3620 9 +472 2 +946 5 +1408 8 +2322 1 +762 5 +3162 3 +1389 8 +781 6 +1851 3 +3896 10 +790 3 +3365 2 +2820 8 +3210 5 +2584 9 +626 3 +298 2 +1770 1 +219 2 +2076 1 +3885 3 +65 6 +326 6 +4068 3 +2359 7 +1967 10 +3458 8 +2498 5 +3206 6 +1216 1 +196 2 +218 6 +1272 3 +1691 10 +2849 10 +3830 7 +1267 7 +3000 1 +1946 8 +3059 8 +3379 3 +2818 10 +1316 1 +3641 7 +16 4 +633 1 +3907 6 +610 10 +2836 2 +2250 7 +3507 6 +389 9 +3438 2 +1448 7 +1073 9 +3074 8 +3004 1 +3705 6 +3537 2 +2689 8 +2070 8 +2138 7 +2334 3 +3404 2 +1043 1 +3487 2 +908 5 +276 3 +2628 4 +794 3 +2567 2 +135 7 +1559 5 +3642 1 +3973 4 +2905 4 +48 6 +1530 5 +3659 6 +3210 1 +2520 9 +871 1 +1138 3 +1548 3 +336 2 +3684 1 +248 8 +1258 10 +3858 1 +100 8 +3501 10 +3897 10 +295 6 +634 3 +4079 10 +484 5 +1548 7 +3748 9 +1562 3 +0 7 +2139 7 +4024 2 +3352 6 +2749 3 +791 3 +365 3 +3835 10 +2872 6 +2305 4 +938 8 +207 10 +2934 2 +1847 1 +3662 5 +31 10 +3231 7 +2673 2 +1268 10 +2885 5 +912 10 +1940 4 +3632 6 +690 1 +1182 3 +1392 6 +2486 4 +2463 4 +1059 3 +1403 1 +2056 2 +1248 10 +649 7 +1937 5 +3522 6 +3588 9 +3004 4 +1324 5 +1440 10 +694 9 +325 6 +2231 3 +1159 9 +2821 7 +351 4 +1955 6 +3836 4 +142 9 +3406 8 +3108 7 +2828 4 +2230 1 +3395 10 +1428 6 +3546 2 +1741 9 +2505 3 +869 8 +2601 4 +2991 10 +2413 5 +1260 3 +3700 9 +1916 6 +3677 5 +2240 8 +663 8 +1068 1 +151 9 +2250 8 +1435 6 +3274 10 +3595 1 +939 1 +3649 4 +3862 1 +3945 1 +1515 1 +4066 3 +3597 2 +509 2 +3024 1 +2732 1 +2575 3 +1563 2 +3899 1 +251 8 +3423 8 +1755 5 +222 1 +2286 1 +3037 3 +3884 2 +3108 4 +560 4 +1031 4 +2828 10 +3025 1 +3672 8 +2637 5 +2769 10 +2879 10 +2525 10 +950 8 +3348 9 +3913 3 +1365 8 +583 6 +2070 5 +2147 6 +3622 7 +2350 10 +1 1 +2998 1 +3268 6 +2171 5 +2428 5 +1500 4 +4086 2 +3881 9 +2854 1 +2452 7 +1137 5 +1811 2 +3475 7 +573 4 +499 5 +3365 2 +1496 3 +620 7 +1178 9 +471 7 +3491 9 +3427 4 +3926 2 +1732 3 +3207 3 +3701 8 +3904 6 +584 3 +2269 3 +1809 8 +198 9 +2839 1 +2380 3 +3147 4 +3633 4 +3938 5 +422 1 +2110 7 +938 10 +2953 3 +2375 9 +2152 10 +2116 7 +3214 1 +3381 9 +3935 9 +749 10 +93 5 +375 2 +3235 7 +2273 5 +661 6 +1081 6 +2591 10 +2980 4 +3576 9 +2685 6 +89 7 +3791 5 +1324 3 +799 7 +3817 2 +3597 8 +2069 8 +1208 5 +181 9 +2470 4 +305 3 +3769 7 +684 7 +3530 1 +3045 6 +1786 10 +2674 10 +3354 3 +1024 7 +3725 10 +2067 4 +3786 6 +2834 3 +1481 9 +1026 8 +433 6 +891 9 +2960 9 +2241 2 +3283 10 +3755 5 +3801 2 +2694 8 +2519 8 +3572 8 +929 8 +1920 1 +1490 6 +2965 10 +2134 6 +4094 9 +1676 6 +3291 2 +1468 6 +2697 4 +2374 2 +2226 10 +3168 1 +1341 10 +2267 5 +383 8 +1830 9 +516 5 +3775 6 +2244 5 +1994 8 +322 10 +931 4 +1239 1 +3771 3 +1065 3 +2158 1 +302 9 +1232 2 +27 5 +2198 5 +1175 3 +259 7 +1041 6 +441 9 +2057 6 +4025 7 +2997 6 +2612 3 +1795 10 +1736 6 +470 8 +2139 10 +2292 5 +3877 5 +2182 9 +522 6 +414 4 +3480 9 +2813 6 +3846 9 +2364 8 +3167 1 +3545 5 +91 10 +3297 3 +1043 5 +1361 6 +3509 10 +169 6 +487 9 +4011 2 +2829 3 +2796 7 +834 7 +1501 6 +2302 2 +678 3 +406 5 +2282 9 +1730 10 +3180 7 +2823 9 +1364 2 +2150 1 +568 9 +504 10 +3665 1 +667 1 +2582 8 +3717 9 +1298 4 +3866 6 +2818 8 +2768 5 +1045 6 +3522 3 +1155 5 +2573 7 +4050 4 +1652 1 +1452 5 +436 5 +3847 3 +3607 4 +3792 10 +97 4 +1770 3 +1013 2 +1344 7 +522 2 +2092 10 +920 10 +22 4 +1869 5 +2956 3 +963 7 +783 7 +612 1 +1417 10 +2938 2 +2513 1 +3969 8 +1965 8 +3341 3 +963 6 +2776 2 +776 6 +3961 10 +1083 5 +352 1 +2796 2 +2380 3 +3073 1 +1922 8 +3254 4 +1611 1 +2675 9 +2014 9 +1642 6 +209 2 +1987 4 +3961 2 +2989 4 +228 2 +3609 6 +3115 2 +1281 10 +1868 7 +3282 9 +105 5 +2738 3 +2689 10 +1562 5 +787 3 +287 6 +1355 1 +2487 4 +2232 8 +444 8 +2134 5 +1247 8 +1801 3 +2057 1 +1704 2 +3722 8 +3354 6 +3517 7 +1958 8 +2941 5 +341 4 +842 6 +648 9 +3942 8 +3992 1 +3825 3 +2476 9 +3122 9 +2830 9 +3434 10 +3584 6 +1944 5 +1334 5 +2300 9 +676 7 +744 7 +3021 8 +868 8 +2813 4 +82 9 +492 1 +1642 1 +3024 1 +2654 5 +727 8 +3168 1 +930 7 +2031 1 +1202 6 +1500 9 +1101 4 +1638 8 +2348 8 +1172 3 +1112 9 +1455 10 +361 7 +169 5 +2766 8 +1046 2 +3022 5 +3446 4 +2985 2 +2579 6 +1243 8 +2563 9 +3524 9 +1332 3 +872 5 +3511 7 +3603 7 +67 1 +3095 2 +1451 6 +2152 9 +1188 6 +155 1 +2701 10 +2184 9 +1547 7 +3630 1 +111 9 +1875 5 +1778 8 +789 9 +1594 5 +2222 2 +682 7 +25 3 +1114 7 +3784 10 +1524 10 +2182 9 +1933 8 +2809 3 +1038 9 +1370 5 +1205 9 +435 10 +3227 7 +1956 9 +1989 8 +3017 10 +1766 5 +19 3 +3860 5 +1692 10 +1392 5 +1466 6 +536 2 +3076 2 +682 6 +123 7 +1928 10 +1195 5 +1706 10 +1416 3 +2377 3 +2701 2 +2497 4 +2006 4 +4042 7 +3047 10 +2885 4 +787 5 +3125 10 +3153 1 +2396 8 +4022 2 +68 3 +471 9 +1151 2 +2424 10 +2315 10 +2647 6 +923 7 +1568 3 +1455 3 +1732 2 +1619 8 +2236 10 +3652 2 +921 3 +435 6 +520 8 +3827 10 +3811 9 +1808 2 +3463 1 +2904 1 +46 6 +3775 9 +1976 7 +1712 4 +180 2 +3792 4 +20 1 +217 4 +1728 2 +1379 6 +2227 7 +319 8 +4018 6 +672 5 +1396 6 +3473 8 +899 9 +801 2 +1054 10 +2683 10 +2972 4 +1341 2 +1574 2 +2958 9 +670 6 +2150 5 +3907 8 +2075 6 +209 3 +222 6 +1025 1 +1429 8 +1835 2 +138 10 +1879 10 +3717 2 +954 10 +1109 2 +1252 7 +2263 9 +1175 3 +2932 7 +1711 3 +2417 8 +2768 3 +3771 3 +60 5 +636 9 +4044 10 +3915 7 +3548 3 +1739 9 +3539 1 +996 10 +3690 2 +200 1 +944 3 +1825 2 +2821 1 +1539 3 +3258 4 +2918 2 +3429 5 +1695 6 +3019 7 +888 5 +1786 4 +1168 10 +2416 4 +930 9 +3907 1 +784 8 +1125 6 +3627 3 +1924 6 +100 1 +505 8 +1406 10 +1392 1 +2097 3 +1945 2 +3977 9 +3696 1 +3151 6 +1128 8 +1013 8 +3398 10 +3087 1 +3777 3 +1149 8 +463 6 +2299 6 +324 5 +1905 4 +2079 4 +3758 1 +900 4 +2406 7 +1115 9 +19 9 +502 6 +1055 4 +1612 6 +3175 10 +502 10 +952 2 +1090 10 +3677 8 +2921 3 +201 10 +934 2 +687 1 +697 6 +658 10 +1937 9 +1498 6 +3684 5 +2529 7 +2345 6 +2650 5 +756 9 +3051 7 +1827 5 +2805 3 +429 3 +1311 2 +1630 1 +906 10 +3972 2 +2267 9 +2787 3 +2854 8 +969 8 +3208 5 +1617 7 +1257 2 +2686 1 +3185 1 +624 4 +2806 1 +3 9 +2281 10 +1088 7 +3706 8 +86 3 +2751 6 +419 8 +934 4 +735 7 +1050 4 +2650 8 +2974 7 +3507 2 +3378 3 +655 3 +3938 10 +3890 5 +2810 6 +107 1 +402 10 +102 3 +2569 10 +1917 4 +2016 8 +484 1 +849 7 +2184 2 +2664 2 +1443 1 +620 5 +232 7 +1912 2 +3987 3 +2452 10 +1971 3 +3443 3 +1406 3 +1527 8 +3127 9 +3006 2 +1573 8 +2734 6 +2642 6 +3673 4 +3856 7 +1311 9 +3227 3 +2793 10 +104 9 +275 4 +3607 6 +236 10 +1099 5 +2699 9 +1543 3 +3014 4 +2147 10 +263 5 +2195 7 +2457 1 +3089 9 +633 7 +5 8 +1026 7 +1727 6 +3000 7 +2407 7 +2481 4 +969 1 +790 4 +2650 8 +2250 6 +3364 4 +2342 6 +2125 1 +3487 5 +3962 8 +775 10 +120 3 +2409 7 +1693 1 +730 7 +2123 3 +1081 3 +3430 7 +1039 1 +136 5 +1774 1 +1909 6 +3608 2 +2798 9 +2919 10 +1248 10 +3346 3 +1630 4 +2171 8 +3063 10 +2248 7 +446 3 +1885 5 +2906 9 +840 2 +3376 5 +950 7 +1795 7 +3019 7 +3991 5 +3399 2 +2402 8 +1872 9 +2271 9 +2391 4 +3594 3 +3902 1 +2192 10 +759 2 +2296 7 +1765 10 +380 10 +3552 2 +2086 2 +500 9 +1761 10 +3501 4 +3029 1 +89 4 +1115 7 +1058 10 +189 9 +3543 9 +2984 10 +4076 3 +3110 5 +469 4 +736 5 +3463 1 +2013 10 +3046 4 +3498 2 +1238 1 +522 1 +2127 8 +978 4 +729 1 +377 3 +386 7 +1383 9 +2361 4 +2909 3 +2145 2 +1077 10 +2420 9 +1968 5 +2732 6 +3160 9 +1420 7 +1166 4 +3797 4 +3500 1 +1842 5 +3906 4 +1545 1 +659 7 +1255 8 +2148 5 +2412 5 +4032 9 +3519 7 +2829 3 +3433 4 +1189 8 +2520 9 +699 10 +2471 1 +1493 5 +3088 5 +672 9 +2447 10 +2021 10 +3618 1 +427 8 +1215 8 +1756 1 +1354 8 +1478 4 +991 3 +586 10 +3611 2 +2232 7 +3246 3 +3589 5 +2253 1 +1119 3 +781 1 +2485 6 +2108 7 +3947 10 +2229 6 +868 1 +2127 8 +2896 3 +920 7 +4081 4 +3772 5 +568 6 +1216 3 +3173 4 +1450 3 +4033 1 +2249 1 +3957 10 +3035 1 +1729 1 +3325 5 +1007 10 +2506 3 +3994 2 +823 5 +3192 6 +86 3 +386 3 +4008 1 +2620 4 +1866 2 +3206 3 +3073 10 +825 1 +35 8 +2494 7 +1293 10 +3960 2 +1139 4 +2794 1 +33 6 +115 4 +957 5 +293 3 +2879 8 +309 6 +2931 1 +2406 4 +97 8 +2860 8 +1381 1 +3990 5 +1016 4 +1753 3 +871 4 +3896 7 +930 5 +1331 10 +223 3 +1192 9 +1507 4 +3316 9 +2379 4 +803 1 +1127 3 +2200 9 +1403 2 +3959 9 +926 6 +1050 1 +3988 7 +245 3 +3801 7 +2001 9 +516 6 +1583 8 +3727 7 +1131 5 +722 1 +181 6 +3062 2 +2831 9 +75 3 +1255 4 +2148 5 +573 9 +1622 1 +3778 8 +765 8 +1693 4 +758 4 +2215 9 +2774 2 +2932 1 +1038 10 +992 9 +1914 2 +1493 7 +713 10 +1508 6 +3977 8 +3845 8 +895 10 +2137 3 +1989 6 +3691 7 +1555 4 +2778 4 +1204 3 +2078 8 +2235 9 +956 5 +3698 10 +1343 8 +3365 5 +644 10 +4054 8 +2758 8 +1965 6 +857 6 +1702 9 +2673 2 +1519 8 +1494 4 +747 3 +631 1 +2729 9 +859 7 +1461 7 +3917 1 +2298 10 +3868 4 +1318 10 +2140 5 +2033 7 +1176 8 +2504 1 +2810 4 +2413 8 +1947 7 +3616 10 +2610 4 +738 2 +3708 8 +1749 5 +807 1 +412 6 +562 4 +1296 2 +666 1 +3297 10 +3071 1 +3072 10 +1305 7 +492 9 +88 10 +253 10 +2293 7 +3578 9 +3010 3 +1103 6 +806 2 +1956 8 +1767 7 +3676 9 +3808 10 +1456 10 +1549 1 +3459 6 +2544 6 +3331 5 +3148 4 +3730 1 +1873 2 +3367 2 +14 4 +1136 7 +3946 9 +1644 3 +2633 6 +4002 3 +930 3 +4034 7 +3655 4 +2217 10 +1375 8 +848 8 +2156 9 +1357 10 +2487 10 +818 7 +2854 5 +338 2 +1786 7 +48 10 +952 6 +478 2 +4026 5 +2973 1 +2365 10 +1676 3 +101 9 +3287 10 +3129 1 +50 6 +2367 5 +329 7 +2130 1 +3216 8 +2411 9 +718 9 +119 9 +1261 2 +2742 4 +2537 7 +2015 3 +568 9 +1695 1 +1033 4 +1387 1 +377 2 +769 9 +3557 7 +3682 4 +2298 10 +2092 8 +2861 9 +4058 9 +3866 8 +2392 5 +730 5 +1367 4 +1270 6 +1394 4 +1280 7 +3716 9 +3628 2 +3724 3 +2152 5 +3150 7 +1816 4 +3361 7 +3881 2 +2687 3 +1196 8 +520 1 +1285 5 +122 3 +1325 8 +2277 7 +1756 2 +1950 4 +943 5 +3063 9 +3271 5 +667 1 +1585 4 +1869 6 +3748 1 +1021 4 +2974 2 +3761 4 +2004 6 +2236 7 +103 4 +3308 3 +3345 7 +1268 9 +3402 9 +2054 5 +3611 7 +1457 8 +2644 5 +3963 1 +460 1 +4003 5 +1750 4 +772 8 +148 2 +543 9 +3123 5 +1880 10 +2289 10 +3171 10 +2273 10 +3375 3 +2209 6 +2851 2 +1316 3 +3785 6 +3668 8 +678 4 +3604 7 +3133 10 +1967 5 +254 6 +2175 9 +2619 10 +3425 8 +1921 2 +1895 7 +2781 7 +747 5 +3027 8 +1582 10 +2156 2 +1705 2 +142 10 +2922 9 +87 9 +2535 6 +2624 3 +2596 3 +3152 3 +1758 1 +1642 5 +1274 5 +2318 3 +2609 9 +1795 10 +993 8 +839 7 +700 10 +2971 2 +3278 1 +3266 1 +2900 4 +1841 5 +2338 4 +2353 7 +2718 8 +2117 4 +955 7 +1663 2 +2930 8 +1405 8 +1751 1 +1847 5 +2888 5 +619 2 +1495 10 +1827 3 +2583 4 +4059 2 +2441 10 +471 8 +3001 5 +489 6 +2922 8 +3143 1 +1190 9 +235 1 +3849 8 +1391 9 +2917 8 +3836 9 +3760 3 +878 4 +1067 9 +3887 7 +2617 9 +2885 6 +1647 5 +776 9 +1986 9 +2081 1 +3772 4 +2516 3 +2760 10 +65 9 +942 2 +223 9 +3817 9 +977 7 +1654 5 +2963 7 +2599 7 +1756 8 +1715 10 +947 2 +1532 6 +65 6 +3133 10 +583 4 +2094 4 +724 9 +2191 10 +1467 10 +3013 9 +1477 3 +382 9 +1461 1 +3658 7 +2626 9 +3200 1 +3371 6 +4079 5 +3058 3 +605 6 +2811 7 +3553 1 +1942 7 +3466 4 +940 7 +660 8 +2888 5 +3090 6 +1810 2 +2963 1 +3239 7 +2303 3 +1670 10 +496 7 +211 3 +1320 5 +3672 10 +2720 2 +3976 4 +1718 7 +3166 5 +3829 1 +215 6 +2918 9 +472 10 +2736 2 +794 1 +2494 1 +1493 3 +261 6 +1956 3 +146 6 +928 9 +3493 10 +2533 8 +1941 9 +2098 1 +4090 2 +1157 5 +3283 5 +2744 1 +1239 9 +3837 2 +1011 2 +1635 5 +30 9 +1449 5 +3137 2 +3188 8 +3621 6 +1270 9 +148 9 +1486 8 +3255 1 +1833 8 +3170 5 +1359 3 +3614 6 +926 8 +3692 6 +174 8 +3870 8 +3559 9 +1444 3 +1781 8 +994 2 +839 3 +1880 6 +3972 4 +1959 4 +1299 7 +3647 2 +2337 1 +1985 4 +1648 7 +705 1 +1994 6 +1005 5 +811 1 +3310 7 +464 5 +424 6 +385 10 +653 5 +1669 3 +3109 4 +875 8 +1144 2 +954 10 +1703 5 +327 1 +3600 8 +3006 1 +519 6 +1298 8 +3093 5 +1932 3 +1953 7 +10 6 +3606 1 +2383 3 +2947 9 +3537 3 +2803 7 +2514 4 +775 5 +3214 4 +1961 8 +366 3 +1582 9 +1287 6 +2457 3 +1072 2 +2354 4 +2110 3 +1718 8 +1585 6 +3362 7 +875 1 +114 3 +179 6 +174 4 +1479 3 +2347 7 +1574 6 +131 8 +2819 1 +4066 6 +554 5 +3660 1 +3713 8 +1722 4 +2032 7 +4040 4 +2327 1 +3218 9 +2304 4 +1208 2 +1272 7 +3973 2 +2546 8 +1244 7 +167 10 +1252 9 +4012 8 +1738 4 +3182 2 +3331 7 +1971 5 +2011 2 +60 7 +2230 6 +311 9 +3097 3 +3544 1 +396 1 +1450 5 +1281 9 +3761 1 +1315 8 +775 8 +3120 7 +683 1 +2369 7 +245 4 +40 1 +3887 5 +648 6 +3911 8 +1811 9 +2978 10 +2214 6 +1200 3 +662 10 +3517 5 +1484 9 +2694 1 +1649 4 +3097 1 +3759 7 +3353 7 +2757 5 +2043 8 +2335 7 +2178 8 +266 5 +2378 3 +3650 3 +3902 10 +3780 2 +442 3 +1348 5 +3576 4 +3674 1 +5 6 +2134 10 +525 1 +2398 8 +667 6 +1302 3 +2670 4 +3730 7 +3069 1 +1588 8 +2017 3 +3600 5 +847 1 +1333 5 +167 7 +1901 7 +3950 6 +1703 2 +2472 10 +2305 7 +3644 10 +838 9 +3468 2 +1665 7 +1863 2 +2069 10 +803 1 +2941 10 +3930 6 +1134 3 +112 4 +1901 7 +2829 9 +4032 2 +3564 7 +2334 4 +860 3 +549 1 +1721 5 +2537 1 +2876 7 +93 1 +2836 5 +2078 3 +70 5 +722 3 +623 1 +3732 8 +2760 8 +3092 8 +3557 5 +1105 7 +2407 1 +2697 7 +3798 6 +1644 9 +1985 8 +3751 6 +3006 3 +28 9 +2503 3 +3489 10 +14 5 +2102 7 +2773 7 +835 5 +858 7 +3046 6 +2470 7 +2434 4 +784 8 +2623 8 +1409 9 +1491 6 +1584 4 +477 7 +3550 2 +3638 7 +3988 7 +970 8 +1608 4 +2364 3 +2241 4 +3477 3 +3306 1 +1007 9 +3152 7 +1584 1 +1692 1 +3136 7 +1298 9 +1255 1 +1786 3 +300 7 +3535 9 +910 8 +3595 3 +826 1 +2153 8 +556 6 +1466 8 +2361 3 +3294 7 +1322 2 +2067 8 +252 9 +1180 7 +2591 9 +1597 7 +2285 10 +1746 10 +1650 7 +549 2 +626 8 +3492 6 +331 5 +2286 5 +3405 7 +2605 10 +3475 7 +4 10 +2768 8 +1310 6 +1797 3 +589 3 +1515 5 +3233 9 +2344 7 +2541 2 +1787 7 +4045 7 +2420 1 +1966 4 +1472 2 +1069 1 +1283 7 +858 7 +596 4 +976 10 +1710 7 +333 1 +1013 7 +4034 1 +539 7 +4080 5 +3437 8 +2147 2 +159 6 +2971 3 +2139 9 +1591 8 +53 6 +2390 5 +1148 4 +2909 2 +1482 3 +3832 4 +525 2 +2189 3 +2575 4 +1690 7 +3861 10 +3784 7 +1114 4 +2781 2 +1732 8 +128 6 +1399 2 +3284 2 +2348 3 +3542 9 +1330 9 +1386 4 +1547 7 +2263 4 +1135 6 +1884 1 +3998 5 +1497 7 +2167 3 +368 1 +2138 3 +4037 5 +2597 9 +2724 3 +2630 4 +1723 1 +1748 8 +2450 2 +3249 4 +1424 1 +3584 8 +4089 8 +2332 3 +2750 2 +1749 4 +3349 2 +1757 2 +519 5 +638 10 +294 7 +368 3 +3166 8 +1629 3 +1503 10 +3487 6 +2064 8 +3065 8 +745 5 +291 7 +3601 6 +1104 1 +3720 10 +2689 8 +639 9 +637 10 +3459 6 +684 5 +157 1 +2870 2 +3527 10 +2917 4 +808 8 +3481 3 +3827 7 +2632 10 +1721 7 +3048 8 +680 1 +80 8 +439 2 +2997 9 +2375 5 +3000 7 +23 3 +1671 6 +1170 5 +2412 4 +1315 3 +1559 5 +3466 3 +128 9 +2235 4 +1234 8 +130 7 +2290 5 +1172 3 +988 4 +3293 6 +3955 5 +3742 2 +3341 5 +1981 3 +3863 1 +1455 5 +3057 2 +2747 2 +894 10 +506 9 +3800 3 +3837 1 +3078 5 +1080 2 +2605 8 +2867 3 +2190 8 +3406 10 +1964 3 +1570 3 +3135 6 +273 2 +3114 8 +556 9 +3506 8 +3403 4 +1560 3 +1661 2 +2350 8 +401 2 +800 10 +3005 1 +3493 1 +1726 2 +3423 10 +2471 7 +2887 5 +3444 8 +3666 6 +315 5 +1658 6 +1531 8 +1046 8 +3627 6 +3978 7 +3622 4 +1222 3 +2234 8 +2044 3 +178 2 +783 10 +1162 4 +3791 1 +2718 2 +3112 9 +2532 1 +1030 5 +1084 6 +805 10 +4067 2 +2768 2 +1309 5 +3937 1 +3020 3 +3393 5 +2259 4 +2650 2 +2210 7 +3125 1 +2915 6 +2796 9 +2357 1 +2228 7 +3486 3 +1937 6 +2562 7 +2534 5 +3545 9 +390 8 +695 7 +320 10 +2230 7 +764 4 +1925 6 +2854 7 +1803 7 +2432 5 +44 6 +763 9 +1233 9 +3689 4 +2286 9 +1247 3 +2391 4 +3349 6 +541 3 +3030 5 +2707 9 +2244 5 +2029 7 +3454 3 +1038 6 +2677 7 +3681 6 +2450 6 +2275 8 +1788 6 +3029 6 +2 3 +3667 1 +2126 5 +310 9 +1042 9 +4090 8 +3951 6 +3556 6 +3841 8 +3691 7 +1078 4 +1289 9 +2909 2 +2206 4 +3091 1 +1624 9 +1681 4 +437 8 +3112 9 +2679 9 +921 7 +1320 7 +2201 8 +425 7 +2930 2 +67 6 +1225 9 +933 5 +3952 5 +3123 1 +615 7 +3958 7 +1579 4 +3453 6 +944 7 +1351 1 +537 3 +1799 4 +2370 1 +2540 7 +1640 9 +3705 3 +1689 1 +302 3 +255 9 +613 2 +2241 9 +465 2 +1907 7 +251 1 +3398 6 +3306 7 +2646 9 +3697 7 +2996 10 +1177 6 +2513 5 +573 2 +383 9 +1723 6 +2759 2 +1603 1 +1701 10 +1969 2 +3900 2 +2828 4 +696 7 +2191 10 +3280 7 +3241 6 +1950 9 +0 1 +3352 5 +3994 8 +2041 4 +1157 10 +1108 1 +1533 5 +3628 6 +402 6 +377 6 +3321 4 +1876 7 +2851 8 +2439 8 +2134 5 +1246 1 +2580 1 +254 3 +276 9 +1739 1 +2001 8 +1303 8 +3666 3 +43 5 +350 9 +1619 1 +2449 3 +3991 4 +3133 4 +2754 2 +2808 2 +1103 7 +1933 1 +66 8 +3431 3 +1685 4 +781 10 +615 5 +1513 5 +230 1 +395 4 +2410 5 +3608 6 +2031 6 +3742 3 +868 2 +1367 6 +3929 6 +714 1 +1885 7 +3334 5 +334 5 +1331 4 +3245 5 +2617 1 +2360 4 +692 6 +2537 1 +2088 2 +2656 9 +607 2 +2924 1 +2619 6 +3043 4 +278 6 +1781 2 +1913 5 +1933 5 +2976 8 +3063 6 +1946 6 +608 6 +1187 7 +4070 8 +199 4 +1766 8 +455 6 +2961 1 +581 8 +2428 8 +3609 7 +3068 5 +3723 10 +3046 9 +227 7 +523 2 +1078 4 +2307 10 +513 8 +3658 1 +2901 4 +34 8 +2467 1 +2915 8 +3072 7 +3147 10 +1228 8 +1023 7 +2446 4 +1128 5 +398 3 +4016 5 +305 5 +274 2 +1020 5 +1036 4 +3663 10 +3575 10 +1579 2 +1479 6 +3604 2 +2575 3 +716 4 +2443 4 +1533 5 +3364 8 +66 2 +2500 3 +3487 9 +2246 10 +150 7 +4006 9 +4040 4 +2430 3 +4087 9 +1824 4 +11 4 +3395 6 +1865 7 +2906 6 +1713 5 +3445 1 +3127 5 +2756 6 +2413 6 +340 1 +3958 4 +2097 10 +428 5 +2381 2 +1517 10 +1242 10 +1686 6 +1966 1 +3688 3 +2135 7 +2223 10 +1379 8 +3244 3 +3215 7 +3005 4 +790 1 +1388 7 +391 7 +2936 9 +1950 7 +1586 3 +210 1 +1433 1 +3135 8 +1670 1 +1243 3 +1335 5 +163 6 +1191 5 +3350 7 +213 6 +4045 9 +3476 10 +462 9 +3248 4 +3436 3 +1127 6 +1658 5 +1347 4 +2932 5 +2007 10 +1002 6 +1304 3 +2334 3 +192 2 +1257 9 +2227 1 +3308 1 +2814 3 +305 3 +4038 7 +2605 8 +209 7 +1887 7 +3522 1 +2492 4 +3894 7 +3459 6 +3142 10 +3991 1 +3256 3 +220 2 +1541 3 +2844 3 +3940 1 +3425 6 +1313 4 +2499 5 +3559 9 +343 2 +3789 5 +3440 10 +708 10 +1613 5 +4054 10 +729 10 +2120 4 +1730 6 +2600 10 +786 1 +3192 9 +3450 4 +2610 6 +1284 6 +37 5 +2563 4 +2821 6 +2018 1 +1970 4 +3072 10 +1158 6 +904 10 +936 4 +1861 1 +1580 8 +2758 6 +1760 2 +1345 8 +2884 1 +2442 1 +3824 6 +323 3 +3813 10 +3198 2 +3754 10 +3437 6 +3739 5 +3834 8 +2605 10 +2936 2 +1880 5 +3439 3 +2012 2 +2602 9 +2743 6 +1670 7 +1107 9 +577 8 +1446 6 +1641 8 +4044 8 +1785 10 +4063 3 +963 3 +2360 7 +2143 4 +631 5 +2770 8 +2246 1 +2591 7 +1715 7 +2399 7 +865 3 +248 10 +2736 4 +3382 2 +2004 10 +2353 10 +3988 7 +461 4 +3776 6 +3037 8 +3479 2 +2953 9 +431 5 +3361 9 +2087 6 +829 5 +1176 5 +1509 1 +64 9 +1950 6 +70 5 +2499 10 +1530 9 +3704 8 +2965 1 +1674 5 +541 6 +2724 1 +614 1 +2173 9 +528 9 +750 5 +2849 5 +4054 6 +2821 7 +2071 3 +3121 9 +3567 1 +2906 5 +2923 9 +854 6 +3856 3 +782 4 +531 3 +36 10 +1231 4 +1810 3 +3397 8 +3603 2 +3463 4 +1604 1 +3527 9 +3197 3 +1486 10 +2829 5 +4009 1 +1532 7 +1175 9 +2229 4 +758 10 +1525 6 +3036 3 +1694 3 +999 1 +1823 4 +913 8 +3362 6 +2952 9 +3089 7 +753 10 +2687 7 +1754 7 +1881 1 +1237 6 +3456 10 +3011 4 +3430 6 +31 6 +951 9 +3084 8 +2250 6 +448 6 +3423 4 +2852 5 +2908 9 +4023 3 +3381 8 +4050 7 +747 3 +749 6 +1208 9 +2120 4 +2983 2 +446 4 +262 9 +2805 5 +857 8 +2171 4 +1242 8 +3981 7 +2653 6 +2283 10 +1543 10 +23 1 +1594 5 +4005 5 +1599 5 +2883 3 +3549 2 +460 5 +1017 2 +2773 8 +1935 1 +2083 8 +125 6 +1009 7 +2563 1 +254 1 +2960 10 +2676 1 +1954 10 +3727 5 +1390 6 +2767 6 +1238 8 +1064 5 +3526 5 +3394 4 +2459 4 +3292 8 +557 4 +1915 2 +2885 4 +522 5 +1848 5 +2737 3 +3946 7 +1737 5 +2257 7 +3592 4 +2320 1 +3302 10 +3434 4 +3461 7 +3007 8 +2558 10 +1675 5 +2523 1 +723 7 +3009 5 +1337 3 +3338 7 +1106 5 +2530 5 +2830 4 +2189 4 +74 10 +3974 10 +802 6 +3327 9 +982 1 +3260 3 +1319 1 +1198 6 +658 2 +2103 5 +4028 8 +47 4 +3675 2 +3015 10 +2475 10 +2789 1 +3871 8 +4089 6 +2461 5 +63 1 +1527 8 +1007 8 +3740 6 +2447 3 +3136 4 +1291 2 +975 6 +114 8 +3956 1 +1561 2 +1581 5 +3008 1 +862 5 +3916 9 +2829 2 +3533 9 +859 5 +3800 5 +2568 3 +1853 3 +1491 9 +2359 3 +2750 2 +2781 10 +2605 9 +2696 4 +2885 10 +976 8 +205 5 +1297 9 +2274 1 +1614 8 +1070 1 +780 7 +2903 3 +2126 3 +2811 8 +2572 3 +403 4 +541 3 +3383 2 +596 3 +3481 3 +794 7 +2605 7 +2808 9 +2253 3 +57 5 +3523 9 +649 9 +305 3 +3719 2 +2525 9 +3789 4 +1490 2 +3408 1 +825 4 +1038 4 +752 6 +597 4 +631 8 +3349 5 +3790 6 +3775 6 +393 7 +871 3 +1862 10 +2850 7 +1909 4 +3082 7 +670 4 +191 7 +1737 3 +639 2 +4018 8 +1718 8 +311 7 +4081 7 +176 10 +92 9 +849 2 +3130 5 +1542 9 +2422 5 +3978 9 +2606 3 +2164 1 +2940 10 +1223 8 +1207 7 +2067 4 +1123 6 +1777 1 +1010 4 +2333 4 +3535 1 +1159 2 +3640 10 +3455 10 +870 3 +1666 10 +4002 4 +3374 7 +574 9 +794 10 +1852 1 +3033 9 +3344 7 +1505 9 +1418 7 +1254 2 +1426 6 +1210 5 +1344 7 +3439 2 +190 6 +2310 3 +3417 1 +3218 1 +3767 3 +2740 3 +3469 5 +1222 2 +2083 5 +1295 9 +380 1 +4024 2 +2008 7 +2146 8 +42 3 +742 5 +2040 3 +258 5 +3952 7 +2113 9 +2801 4 +2245 9 +2645 4 +406 10 +11 1 +3805 8 +4021 1 +3852 1 +4009 9 +1355 7 +681 2 +3999 1 +3860 7 +3918 2 +1491 1 +879 3 +79 8 +2761 1 +2495 1 +3212 9 +1934 8 +2688 6 +225 1 +3301 1 +3774 5 +1241 2 +1866 9 +1305 7 +802 6 +873 2 +1863 6 +181 9 +2133 10 +963 4 +2507 9 +3048 10 +10 4 +3178 8 +1307 6 +3644 6 +3295 4 +3342 1 +612 7 +1626 4 +3110 4 +1001 9 +3538 8 +3001 3 +1299 9 +3974 4 +1072 4 +3947 10 +1275 6 +883 2 +1872 8 +2996 8 +1726 1 +2986 9 +3383 10 +3697 10 +2214 7 +1144 1 +3011 10 +122 6 +1989 4 +253 2 +3604 2 +436 7 +3439 9 +3014 9 +1132 5 +2497 5 +1760 7 +3698 5 +3682 8 +2715 8 +2697 6 +2802 3 +274 3 +1324 8 +1397 8 +443 5 +1475 9 +3836 5 +1105 2 +2007 3 +1085 9 +1553 4 +2404 1 +582 6 +955 8 +523 1 +3553 9 +2322 8 +1896 7 +151 8 +2408 5 +1242 2 +3562 4 +1487 4 +1034 4 +1626 2 +1391 6 +341 3 +382 8 +2302 6 +612 8 +2868 8 +3886 9 +564 5 +30 10 +3082 1 +3902 10 +2355 1 +2595 5 +1375 10 +432 10 +2434 1 +2049 2 +3927 6 +2082 10 +3262 6 +2287 7 +1298 8 +2777 8 +2651 9 +2951 8 +1161 7 +0 2 +2067 9 +1207 9 +933 9 +3419 6 +1057 6 +1544 9 +3706 1 +1799 3 +2420 7 +1256 3 +2686 6 +940 1 +3258 2 +3531 9 +2370 2 +2615 3 +409 3 +3640 1 +170 1 +918 3 +1854 3 +3581 5 +1183 7 +139 10 +2701 5 +3094 8 +2015 8 +2730 10 +3635 8 +3753 1 +1954 8 +2684 3 +874 7 +2279 6 +1426 4 +1043 8 +555 9 +1957 7 +529 2 +150 5 +3874 6 +1143 4 +3684 9 +990 2 +2689 5 +3365 7 +1868 1 +3312 1 +924 6 +2338 8 +502 2 +1681 9 +3819 8 +784 10 +3578 6 +3793 8 +3022 2 +3336 1 +330 3 +1699 1 +1706 3 +467 5 +3085 5 +1614 8 +850 5 +729 5 +1346 9 +2587 9 +3329 8 +931 7 +3438 9 +94 2 +414 10 +1055 9 +2744 9 +2746 3 +3793 3 +3996 3 +459 3 +1391 1 +421 3 +2880 5 +3881 4 +306 6 +3279 6 +238 8 +2838 8 +202 1 +1912 8 +783 10 +1079 8 +3410 3 +3103 3 +780 8 +1387 9 +3247 5 +441 7 +3453 1 +229 10 +4071 5 +351 3 +1242 6 +4071 5 +284 5 +2495 10 +3582 6 +193 7 +3878 7 +1835 7 +3920 10 +366 3 +161 8 +3202 7 +1568 9 +509 3 +2408 7 +1331 5 +1072 4 +3296 8 +2598 2 +759 10 +2490 1 +2180 9 +1852 5 +2030 8 +2465 4 +1911 5 +3244 3 +2681 3 +717 7 +2784 4 +3661 9 +3235 8 +2862 1 +1307 9 +334 1 +1703 4 +106 9 +243 6 +549 4 +1384 1 +339 4 +3729 10 +848 1 +104 7 +1213 6 +2601 5 +1153 4 +1457 2 +126 7 +1842 8 +2111 2 +1553 4 +433 8 +1721 7 +893 9 +2502 3 +4031 7 +3887 2 +3853 6 +3518 8 +1580 8 +1625 9 +3938 1 +2220 10 +1079 6 +3787 4 +3303 4 +3085 2 +1625 4 +4088 9 +147 4 +1678 8 +438 2 +28 6 +2776 6 +3305 10 +55 6 +3237 8 +468 6 +2505 3 +168 5 +2744 7 +3060 5 +1359 7 +1126 5 +1796 2 +3179 2 +2160 7 +2788 6 +741 5 +2774 3 +2626 5 +1023 1 +326 9 +1254 5 +729 7 +497 10 +1630 5 +2799 7 +2377 4 +584 8 +2909 3 +2738 8 +3993 9 +1646 8 +2446 3 +1681 9 +2129 3 +1006 9 +873 4 +2022 7 +3591 10 +3020 6 +1004 8 +122 10 +2016 6 +951 3 +3229 3 +891 1 +1945 5 +2096 6 +3140 8 +146 5 +1885 10 +430 1 +2179 6 +1376 2 +3049 8 +3672 7 +4058 5 +1300 6 +2697 4 +481 3 +1491 5 +3664 2 +2914 6 +2428 1 +2025 10 +3740 5 +3495 5 +3522 6 +204 4 +1433 9 +3559 5 +3491 8 +775 9 +163 8 +4026 3 +1105 2 +2158 8 +2307 4 +3052 8 +1218 7 +1409 9 +2749 3 +1983 5 +3082 1 +2100 9 +410 8 +3202 2 +2886 2 +2837 5 +2042 6 +1712 9 +1585 7 +831 10 +141 7 +1485 4 +1380 8 +3328 4 +2552 9 +3442 10 +28 4 +3295 5 +448 7 +716 5 +3798 7 +916 8 +4084 7 +617 5 +4088 2 +1303 2 +230 5 +189 2 +2141 10 +2471 7 +3445 7 +3267 9 +3805 2 +1588 9 +113 9 +2365 9 +189 1 +156 5 +3652 10 +3773 8 +67 1 +249 6 +573 7 +3179 8 +4062 5 +2733 6 +1974 9 +3021 9 +3017 5 +279 3 +3550 4 +923 8 +2035 8 +395 4 +4089 8 +2537 5 +1923 6 +890 5 +1996 4 +3414 7 +2303 3 +1100 2 +1671 4 +1092 2 +466 6 +2381 9 +3742 1 +1047 7 +1071 3 +4085 9 +3150 4 +2563 2 +595 2 +3896 8 +3174 8 +3984 2 +1752 10 +531 7 +73 7 +1139 7 +2312 7 +263 8 +1994 10 +1441 9 +2464 10 +2079 4 +3827 8 +820 2 +3448 10 +148 1 +3872 9 +3197 6 +680 9 +3229 3 +1794 8 +3952 6 +3950 6 +2566 5 +2126 4 +1666 2 +3131 2 +2469 9 +2005 3 +1953 3 +3515 2 +1273 6 +648 8 +1925 10 +1655 10 +1907 2 +3675 6 +811 6 +779 2 +1842 1 +2046 1 +3744 3 +1956 8 +529 5 +3925 6 +2731 10 +3582 7 +843 4 +3598 7 +944 6 +879 5 +1180 5 +542 6 +3156 4 +2067 3 +411 10 +1626 6 +3324 5 +4093 7 +2506 7 +2458 8 +2468 10 +2396 8 +2503 9 +2367 10 +3787 6 +2803 2 +4077 2 +1523 5 +2728 1 +446 6 +2513 3 +3613 10 +1775 2 +3457 3 +3930 4 +1573 1 +2969 2 +863 8 +3207 2 +1758 5 +3306 4 +3130 2 +1330 7 +3733 4 +2304 9 +58 6 +1102 10 +2276 4 +1318 10 +72 8 +1817 9 +1224 2 +2639 1 +451 9 +401 9 +2464 6 +560 9 +1965 4 +287 10 +1940 7 +24 6 +1946 10 +3108 9 +778 7 +1854 9 +3398 1 +2151 3 +2923 5 +2725 9 +3378 8 +1374 7 +845 3 +688 5 +983 3 +1179 3 +3101 9 +517 3 +2542 3 +2735 10 +1047 1 +1644 8 +1361 10 +2310 9 +2434 1 +3206 3 +535 7 +102 6 +404 10 +3868 5 +3149 5 +2435 6 +251 7 +2300 10 +1969 7 +598 7 +923 5 +1468 8 +476 10 +2255 4 +828 2 +3250 8 +885 2 +1345 9 +1474 6 +3764 1 +502 8 +71 6 +967 9 +3653 10 +3014 4 +3569 7 +2820 4 +1316 6 +1736 3 +2992 3 +2360 8 +591 2 +832 5 +3902 10 +2303 3 +791 4 +1749 6 +958 8 +2051 10 +2864 3 +2891 4 +241 4 +1918 10 +331 5 +1104 9 +1243 2 +535 10 +2948 8 +2058 8 +2574 5 +2316 9 +2937 5 +1369 2 +1267 6 +1738 6 +1366 10 +2937 5 +2859 6 +566 8 +3383 4 +3538 2 +1572 9 +62 3 +3980 8 +2111 4 +1024 8 +1804 9 +2077 6 +1541 9 +229 4 +3343 5 +90 7 +945 1 +2381 4 +371 4 +2661 2 +3672 6 +3246 6 +2902 8 +3771 5 +3020 6 +3744 3 +1319 6 +3197 6 +2389 10 +46 6 +1502 9 +28 1 +2857 7 +331 5 +1607 2 +2794 10 +495 8 +2281 6 +880 4 +847 10 +3205 8 +4019 5 +1949 8 +3477 6 +1990 8 +344 5 +2752 8 +2034 3 +3588 7 +1771 5 +505 9 +2026 1 +1222 8 +933 2 +188 1 +2132 5 +3767 9 +3484 4 +2768 5 +1482 6 +1943 10 +1640 8 +2812 5 +3279 6 +3959 7 +2610 1 +2045 9 +433 1 +529 2 +873 10 +1385 1 +1994 8 +744 7 +2665 9 +3311 6 +211 7 +1250 1 +529 6 +759 10 +3624 8 +1505 4 +773 7 +1594 1 +3429 9 +1466 9 +2224 6 +136 3 +3932 4 +4086 8 +32 5 +3534 7 +245 3 +3196 7 +1338 9 +1794 1 +3218 10 +284 4 +1747 6 +3710 7 +3343 8 +2297 5 +2521 4 +3802 10 +3643 10 +591 2 +4093 3 +1801 2 +1185 8 +2421 9 +1381 2 +1205 5 +330 2 +3644 5 +1504 4 +3281 9 +3169 9 +2191 6 +3037 3 +3072 6 +1778 5 +221 8 +362 10 +3549 8 +834 5 +2804 7 +204 10 +3044 6 +3720 1 +3166 8 +1170 2 +3210 2 +444 6 +2219 8 +2214 5 +2229 8 +2406 2 +2538 9 +1531 8 +1341 4 +4000 5 +1662 9 +330 6 +3485 6 +1474 7 +2921 1 +773 10 +3340 8 +432 6 +1283 6 +2487 6 +1041 1 +3626 7 +2177 5 +610 8 +2025 2 +2665 2 +1007 10 +882 9 +421 8 +895 4 +1596 2 +1170 9 +386 1 +863 10 +1216 2 +3614 4 +2822 3 +1816 3 +2434 9 +3923 8 +2717 7 +2002 1 +1745 8 +1417 10 +446 10 +396 7 +517 9 +534 9 +2942 6 +1256 7 +4068 10 +911 5 +2907 2 +1927 4 +776 3 +3477 1 +785 4 +2842 2 +760 9 +3268 6 +3425 1 +1723 9 +1879 5 +660 4 +415 4 +1791 2 +811 6 +248 5 +236 2 +287 10 +1817 4 +2630 2 +2992 2 +1950 6 +3474 5 +1824 1 +3571 2 +2758 5 +3343 7 +1821 2 +2972 6 +1291 2 +2746 7 +408 9 +4042 10 +526 4 +3311 1 +2222 2 +3155 1 +3408 5 +3727 9 +3716 7 +1321 4 +172 6 +534 2 +1827 4 +1560 1 +2654 2 +2937 3 +3102 1 +2640 9 +3527 8 +2810 8 +746 1 +3423 9 +694 9 +41 6 +20 5 +1888 2 +2831 3 +1597 6 +12 9 +2351 4 +550 10 +1688 5 +4070 3 +3345 4 +15 9 +242 6 +2823 4 +2870 6 +3587 3 +612 3 +3067 4 +1665 5 +3909 7 +3483 9 +710 5 +1307 9 +459 5 +3370 10 +3711 6 +491 3 +1938 2 +2272 2 +2118 2 +255 10 +129 5 +1726 6 +2144 10 +3655 1 +3228 1 +19 7 +608 9 +2167 9 +3599 10 +729 9 +3547 8 +2491 1 +3318 4 +815 7 +3745 8 +1743 3 +3102 5 +3946 7 +289 3 +3352 8 +4042 4 +3943 7 +3786 1 +2910 8 +2412 7 +3851 8 +3896 10 +1297 8 +1075 8 +3520 5 +717 4 +2416 9 +3535 2 +1494 3 +3614 4 +327 3 +3272 7 +3078 7 +1952 3 +928 8 +1322 1 +2563 3 +1412 5 +623 8 +458 6 +3754 8 +2197 10 +481 8 +3081 2 +2712 6 +2057 1 +915 6 +3583 9 +2544 3 +2841 5 +3389 1 +2732 8 +393 4 +2141 6 +2216 1 +2541 6 +1211 5 +3478 10 +525 1 +2292 3 +2483 7 +696 9 +2828 1 +915 5 +1047 1 +1755 6 +2524 6 +2721 10 +1936 8 +764 10 +2789 7 +3012 3 +1266 10 +4085 8 +3797 2 +2110 8 +2170 10 +688 4 +974 5 +2386 8 +1075 7 +3606 7 +3612 2 +2545 5 +1956 7 +3552 5 +3585 1 +110 10 +163 4 +699 1 +798 5 +1452 10 +3588 10 +1014 5 +1249 1 +3817 9 +866 10 +3177 10 +276 7 +2056 1 +1787 8 +4024 4 +3284 10 +2852 9 +994 10 +3106 7 +445 2 +970 9 +1140 10 +493 4 +1433 9 +3762 2 +3608 3 +887 7 +1315 2 +2146 8 +3944 1 +2345 1 +1994 5 +279 6 +784 2 +137 6 +3041 3 +755 6 +2503 4 +2778 3 +3646 9 +2580 4 +2147 4 +1542 3 +2530 6 +2357 7 +1586 10 +503 2 +3471 4 +1166 9 +3133 8 +2226 9 +483 8 +3475 6 +1640 3 +3188 10 +1548 6 +3520 5 +965 1 +3348 1 +189 10 +3796 9 +3653 1 +3804 6 +371 1 +3046 8 +2189 2 +2543 5 +3253 2 +225 3 +2033 7 +2182 10 +1975 10 +373 4 +137 4 +1033 4 +3898 8 +129 6 +101 10 +3114 9 +3741 10 +415 1 +752 1 +1383 10 +3232 3 +3534 6 +2786 6 +1320 7 +3762 9 +3929 9 +1238 1 +3353 7 +3911 7 +189 9 +1872 3 +3941 3 +3292 1 +2412 9 +1105 3 +1231 9 +963 3 +1098 4 +3351 6 +3409 4 +75 9 +365 6 +4088 2 +570 4 +3450 7 +490 6 +3582 3 +1764 5 +1658 9 +1235 5 +389 6 +1015 3 +1108 8 +4009 7 +1420 10 +4007 3 +1191 4 +3350 10 +805 6 +855 3 +2683 6 +564 3 +1640 10 +3632 7 +1769 6 +295 10 +2004 5 +3962 4 +3720 7 +833 6 +2054 9 +351 3 +3162 6 +3564 8 +1557 5 +2737 2 +2530 8 +1694 10 +3637 9 +1107 2 +1243 3 +474 1 +835 10 +3981 4 +3722 8 +52 5 +2942 3 +3461 9 +3959 10 +4080 1 +3554 6 +1633 7 +1591 7 +2656 7 +540 2 +2305 8 +842 7 +3146 10 +1251 3 +2403 2 +835 5 +773 2 +3458 7 +3165 4 +433 1 +2319 2 +184 10 +3171 4 +1316 2 +3103 5 +195 9 +3694 4 +2688 10 +1936 2 +848 6 +3991 7 +3714 7 +16 10 +2050 4 +1957 4 +1813 7 +3883 3 +3129 10 +1555 7 +882 1 +3957 1 +1613 10 +2381 3 +1205 6 +96 4 +3400 2 +2476 1 +3132 6 +648 5 +2613 9 +307 6 +3069 2 +340 1 +4033 7 +3613 3 +3821 6 +3658 7 +588 10 +3796 5 +1901 1 +2932 8 +533 9 +2864 1 +2976 6 +4058 5 +4000 6 +52 7 +2606 1 +1784 1 +973 9 +1337 6 +1521 6 +2273 9 +50 9 +877 4 +1265 2 +3981 9 +772 3 +2543 10 +2910 10 +148 1 +929 3 +3817 10 +1356 9 +2603 10 +3064 10 +236 3 +1714 4 +2242 6 +2907 4 +1879 10 +2685 8 +2129 1 +495 9 +3688 3 +2593 6 +1157 2 +1048 7 +3763 5 +2224 6 +3561 4 +2035 3 +1208 2 +1515 1 +611 7 +2020 2 +2615 10 +889 2 +3331 2 +2320 2 +2471 4 +3194 7 +2715 2 +3911 3 +2493 3 +2034 4 +2575 8 +2170 3 +1348 6 +1592 5 +3146 3 +1064 1 +1493 3 +724 6 +907 1 +3502 3 +3672 7 +299 4 +2517 3 +3487 6 +3732 2 +964 2 +819 2 +1960 3 +2892 7 +2993 6 +1101 9 +1240 7 +1560 9 +741 6 +1046 9 +2287 4 +502 8 +1311 6 +3071 8 +2469 6 +2760 1 +2553 9 +1073 7 +3543 2 +2323 1 +2572 7 +2027 6 +655 10 +575 7 +2066 10 +1236 3 +1411 1 +684 3 +1738 2 +1257 5 +2553 3 +2663 7 +3251 4 +1204 9 +1806 1 +3003 8 +762 6 +3163 7 +1754 7 +4040 9 +2394 2 +2892 3 +637 1 +1310 6 +697 3 +3016 2 +3237 7 +1357 7 +1590 7 +646 1 +4003 10 +3500 8 +960 6 +1841 7 +1620 7 +1396 3 +137 4 +2583 3 +3340 8 +2116 3 +4047 9 +2384 2 +2503 2 +2827 5 +1135 6 +346 7 +3504 3 +3738 8 +1658 2 +2218 6 +3144 2 +1604 1 +2074 1 +1379 3 +667 4 +1595 2 +2635 8 +992 3 +876 10 +1063 3 +3065 10 +1445 9 +2430 2 +2090 9 +123 3 +3695 1 +3168 5 +2053 8 +281 6 +899 8 +1603 4 +3085 4 +583 9 +3737 8 +1113 1 +3894 10 +781 9 +1529 6 +242 6 +1746 6 +859 7 +557 5 +4039 2 +2021 5 +3493 9 +2449 6 +502 5 +2792 10 +2028 10 +1299 6 +2347 5 +2662 5 +4015 8 +2272 8 +3546 3 +3687 2 +2466 6 +1312 7 +2764 9 +3068 4 +2422 2 +1196 9 +3139 6 +904 7 +1365 6 +214 2 +700 2 +449 6 +3611 3 +3476 8 +4069 10 +2743 1 +1171 3 +4075 10 +2356 8 +3758 8 +2310 10 +1809 9 +1628 6 +3410 3 +968 9 +3434 6 +314 7 +2523 1 +3429 9 +1426 10 +961 10 +1711 5 +403 3 +3823 7 +554 2 +3537 9 +3062 3 +360 7 +3181 7 +86 4 +3597 10 +3837 3 +3963 4 +3378 10 +2796 2 +2759 9 +273 8 +1666 6 +3315 1 +3729 6 +3574 7 +1220 9 +2887 9 +2860 5 +3324 6 +1048 9 +111 1 +3535 5 +195 3 +1970 7 +1497 10 +1656 8 +2179 8 +625 8 +1339 1 +571 2 +443 2 +1193 2 +309 1 +255 4 +2777 10 +1767 3 +2491 6 +1554 1 +3238 7 +2368 8 +2160 5 +2638 5 +2201 3 +2405 2 +968 8 +224 5 +2132 10 +1030 2 +373 9 +1363 3 +1169 10 +2470 8 +3607 7 +3155 7 +1502 6 +3687 9 +2833 5 +3829 1 +3777 10 +2998 5 +182 1 +1398 1 +3701 6 +1395 4 +341 4 +1627 1 +1747 9 +3265 6 +2489 8 +3944 6 +2359 7 +157 6 +2268 2 +1250 1 +2574 3 +4020 10 +1196 5 +82 10 +1647 2 +4038 10 +1089 3 +492 3 +3633 8 +1657 6 +517 5 +1698 6 +1222 8 +3172 4 +2166 2 +2571 6 +1656 5 +1343 3 +1362 9 +3554 9 +2941 2 +2767 10 +3191 7 +3471 6 +2537 8 +912 2 +1923 7 +685 5 +2697 3 +4048 4 +2929 6 +2271 4 +1786 6 +1470 10 +132 6 +4013 10 +1369 9 +1577 3 +894 6 +1411 2 +2049 6 +3885 7 +3098 8 +3958 8 +2841 3 +3300 4 +2503 10 +2301 7 +2377 2 +1867 9 +3131 9 +485 7 +3578 7 +1263 4 +2950 9 +1461 9 +950 4 +3771 8 +1189 10 +3455 7 +81 2 +1035 6 +3512 10 +3572 6 +2891 5 +2564 4 +1776 7 +3028 4 +829 7 +2937 8 +4088 9 +183 2 +623 2 +675 2 +441 1 +1852 8 +2703 6 +2825 6 +463 3 +303 9 +2953 8 +2093 5 +2215 3 +1619 9 +2906 8 +1180 3 +3956 1 +2573 6 +3032 3 +294 5 +2959 2 +177 7 +2688 7 +2499 1 +4038 1 +3699 3 +3859 7 +1459 6 +1642 1 +3293 2 +109 5 +772 3 +3819 6 +37 1 +1604 8 +1271 6 +3470 1 +2858 10 +2757 10 +1798 1 +992 1 +980 4 +645 7 +1328 5 +4002 10 +2225 10 +1932 7 +537 9 +1114 3 +3522 4 +911 10 +2633 10 +3001 8 +2258 1 +3882 1 +3206 9 +18 8 +3612 2 +1648 10 +1319 2 +3573 4 +359 7 +499 4 +3158 10 +695 6 +3165 10 +2167 2 +3646 4 +2764 2 +2407 9 +2155 7 +1448 6 +1667 1 +3127 1 +135 7 +1264 2 +764 6 +506 5 +3105 8 +937 5 +4010 2 +2231 9 +1652 2 +769 2 +2574 7 +607 6 +1594 8 +651 9 +338 5 +3642 7 +3371 1 +3527 3 +138 5 +3833 3 +870 7 +2520 4 +3068 3 +1661 9 +43 10 +3234 4 +3111 6 +1625 9 +2898 8 +3525 1 +2530 3 +2917 7 +2001 7 +1175 10 +4027 9 +222 7 +2333 7 +1872 3 +2005 2 +1496 8 +2605 2 +3973 1 +2975 9 +2649 7 +1952 10 +3835 9 +3390 10 +2487 5 +3693 8 +3397 7 +176 7 +2214 3 +3599 2 +2217 1 +57 4 +1659 7 +1751 3 +3714 3 +2875 10 +1594 3 +3245 7 +1577 6 +75 5 +2430 2 +2506 9 +674 3 +1033 6 +2185 3 +1284 10 +2220 6 +3269 7 +1917 1 +2666 8 +2274 4 +3643 8 +1942 9 +3126 3 +2317 7 +2505 8 +1705 1 +854 2 +1642 9 +2639 5 +612 2 +1006 3 +56 9 +1023 2 +384 6 +3366 8 +455 1 +2153 6 +1079 7 +2176 4 +1206 9 +4081 6 +1285 2 +4094 2 +1142 10 +1307 3 +3587 4 +2844 7 +3226 7 +2457 3 +2921 6 +3132 2 +345 1 +649 4 +4065 10 +3693 3 +3563 5 +513 9 +1167 2 +33 2 +153 4 +3185 8 +1873 5 +1702 1 +3799 10 +756 7 +801 9 +3801 2 +827 3 +472 7 +1096 8 +268 3 +2160 8 +2931 4 +3145 5 +555 3 +3863 6 +2106 10 +2336 1 +1444 5 +3832 2 +131 7 +275 7 +679 9 +599 3 +1184 6 +1464 6 +2622 4 +248 6 +1312 4 +2100 8 +3531 7 +1235 6 +342 10 +2477 7 +247 2 +1424 6 +2989 6 +2123 7 +2465 6 +2203 1 +1443 10 +1773 3 +2058 3 +3027 10 +1329 7 +3578 7 +731 4 +632 5 +2656 3 +2901 5 +343 6 +2157 9 +596 3 +163 5 +3700 8 +2955 8 +2670 4 +3695 1 +3428 5 +727 6 +3111 7 +1253 6 +1870 8 +2787 6 +909 9 +1820 9 +3830 3 +3126 6 +3118 5 +3670 7 +3757 8 +3454 7 +2750 5 +2097 4 +3445 4 +1166 7 +3947 4 +3770 5 +2125 4 +2132 10 +3089 7 +250 10 +2423 4 +1737 7 +2687 1 +2502 2 +919 2 +2354 9 +3074 7 +2245 3 +2155 3 +3640 4 +1670 1 +82 1 +116 10 +2480 5 +2174 9 +2497 4 +1910 3 +3481 8 +957 10 +3011 3 +3902 9 +1144 2 +3894 10 +2668 3 +2266 9 +1738 1 +3002 6 +3280 6 +988 10 +3073 8 +1148 5 +3624 8 +3011 3 +442 3 +2771 5 +265 8 +1151 9 +676 3 +110 3 +1421 4 +2040 5 +281 8 +2145 3 +1174 3 +1546 5 +367 6 +413 1 +238 7 +1650 9 +937 6 +1036 10 +905 5 +2108 2 +2969 9 +2356 5 +1495 3 +1575 1 +52 5 +1737 2 +1457 1 +573 2 +3489 1 +3301 5 +2585 5 +3978 4 +3945 4 +2554 8 +1266 6 +1736 6 +2138 1 +870 4 +4036 10 +924 10 +547 3 +943 3 +3859 4 +1390 5 +2047 8 +1852 2 +2780 3 +2684 5 +1665 10 +613 4 +1398 7 +3509 7 +1605 9 +740 1 +243 7 +2659 2 +899 6 +1406 1 +579 2 +3301 8 +2814 7 +467 1 +2460 3 +3172 7 +3746 5 +3238 2 +1272 2 +3292 9 +796 9 +151 4 +3114 9 +1102 4 +4072 7 +3927 5 +930 1 +3501 3 +3166 2 +571 7 +4062 2 +1367 2 +112 7 +2477 5 +860 4 +1057 9 +2105 10 +3283 5 +47 1 +3477 5 +891 8 +553 4 +2510 7 +285 1 +1484 8 +4022 2 +1414 8 +134 1 +1085 4 +2299 2 +2428 8 +1288 5 +1487 4 +1354 7 +1115 8 +1920 1 +615 8 +2485 5 +2692 9 +709 1 +893 7 +2945 3 +118 9 +1232 8 +3262 7 +1332 5 +2284 5 +2410 7 +3191 5 +3808 6 +3573 2 +2134 1 +1291 8 +2215 8 +4017 2 +13 9 +3263 8 +3875 10 +493 8 +864 2 +179 8 +2933 7 +663 9 +2633 7 +1485 6 +2004 2 +178 9 +3816 3 +678 6 +3019 7 +2792 10 +83 7 +3328 3 +77 2 +2991 6 +1643 4 +780 8 +2627 6 +3422 10 +4085 8 +593 1 +1798 6 +1606 6 +1045 7 +2765 5 +3186 2 +2260 8 +3972 7 +1132 5 +1900 10 +1759 6 +2290 9 +1212 4 +698 7 +511 1 +3331 7 +1185 6 +2565 1 +481 5 +896 7 +3301 7 +3907 7 +1014 5 +3916 1 +3628 3 +897 5 +1626 7 +1935 10 +1200 7 +3970 8 +3287 6 +927 2 +385 5 +1665 7 +2625 3 +1068 5 +3819 1 +2727 1 +1770 10 +3401 4 +1035 5 +3934 7 +1747 10 +3304 5 +1699 3 +739 10 +2396 3 +438 2 +3852 10 +2536 8 +619 8 +3535 3 +3758 3 +3889 1 +2887 6 +1720 9 +906 7 +3930 2 +3424 8 +2388 2 +1193 8 +2670 6 +3415 6 +3748 5 +1005 2 +3621 1 +2117 6 +3173 1 +3138 4 +3527 6 +790 3 +1633 5 +1725 10 +1700 8 +895 4 +3164 10 +3433 1 +165 1 +554 8 +1332 3 +1330 7 +1063 9 +2077 7 +875 9 +1378 1 +3839 9 +1907 3 +3274 8 +1444 4 +3809 1 +1834 7 +447 10 +13 6 +353 1 +2807 10 +3759 2 +1007 10 +3404 7 +1943 4 +1538 5 +1627 5 +2355 7 +1113 6 +578 9 +3056 3 +4034 8 +1812 7 +1388 9 +662 5 +2030 10 +24 7 +1600 10 +3051 7 +1495 1 +3155 4 +2911 7 +3017 3 +3764 7 +3561 7 +2259 8 +1092 9 +1312 5 +2132 10 +1929 10 +1297 3 +164 4 +1759 3 +2554 5 +3570 9 +2073 7 +68 8 +3225 1 +1222 9 +3001 8 +189 10 +3512 8 +3954 1 +4007 10 +498 9 +3559 7 +4052 3 +4066 5 +3914 10 +214 6 +149 4 +3949 7 +1491 7 +1783 1 +39 9 +1576 2 +3915 6 +1422 3 +2488 3 +3578 5 +939 10 +2467 1 +3742 10 +3990 3 +1156 3 +638 8 +308 5 +414 9 +2119 5 +2310 6 +491 8 +1948 9 +3551 1 +197 8 +2189 4 +2492 4 +2503 10 +3930 9 +3180 3 +1251 3 +1713 6 +203 10 +79 6 +2020 8 +2585 2 +2096 3 +1790 2 +2869 6 +1174 6 +2765 9 +1261 3 +2399 5 +637 10 +2318 5 +2306 5 +3370 7 +3379 1 +1732 5 +1503 10 +3555 8 +2024 8 +3905 6 +3491 5 +197 9 +340 1 +192 10 +1165 6 +3663 2 +2625 4 +2784 5 +3138 10 +3624 2 +3707 4 +2747 3 +96 8 +3822 6 +2740 7 +4083 7 +3339 8 +2041 10 +3050 7 +3165 7 +3096 9 +1375 1 +658 3 +3089 7 +586 9 +737 9 +2962 8 +3511 4 +2051 8 +1653 10 +2080 4 +1883 8 +2251 3 +1934 6 +1480 9 +3874 6 +276 9 +3255 8 +1860 4 +376 1 +71 7 +3753 2 +80 2 +3707 6 +1065 4 +978 2 +34 9 +1967 3 +964 2 +2802 8 +497 2 +793 1 +3976 9 +276 1 +3541 7 +2997 6 +444 10 +1180 10 +3008 1 +4091 10 +2304 4 +2965 6 +3270 5 +2441 4 +2822 5 +657 6 +2631 8 +1358 10 +1783 3 +3165 3 +1865 1 +3323 6 +375 3 +3779 5 +2505 3 +1645 10 +957 3 +1491 3 +1214 5 +3670 3 +2193 1 +720 2 +3241 10 +3819 8 +2112 4 +3301 10 +1264 4 +3937 3 +3991 9 +2233 9 +2788 8 +2477 5 +2449 6 +3996 10 +1614 6 +1843 1 +2732 4 +2658 2 +1930 9 +1400 2 +3464 10 +3043 7 +1099 6 +1698 1 +2485 9 +904 9 +3305 1 +161 10 +3368 3 +2575 3 +2376 2 +3414 10 +2415 2 +2241 3 +1118 3 +672 2 +973 3 +63 2 +3909 10 +2730 10 +2677 8 +2879 7 +434 8 +3328 1 +372 4 +3892 9 +3724 3 +1471 1 +1378 6 +3369 9 +244 7 +3068 4 +864 7 +1521 6 +2038 2 +3124 2 +1781 4 +2580 6 +324 1 +1703 1 +1230 2 +2407 6 +3972 9 +1775 6 +3082 4 +2442 8 +159 1 +971 1 +1686 8 +1022 10 +166 3 +3153 3 +3406 10 +1865 8 +1902 8 +2309 8 +78 1 +1521 7 +3207 10 +3637 2 +2802 7 +2388 4 +2204 2 +1263 9 +3758 7 +210 1 +2319 9 +561 4 +3534 9 +3902 2 +3460 8 +3392 4 +2231 10 +3718 9 +3019 5 +1126 9 +563 4 +1770 1 +1615 8 +2212 3 +3923 4 +745 5 +1638 9 +2814 6 +2652 1 +1114 8 +3194 5 +2302 9 +2308 8 +1040 4 +1210 4 +1632 2 +1359 3 +2478 9 +2613 5 +1037 7 +588 4 +602 3 +4014 7 +2961 4 +2047 9 +2435 1 +200 7 +1265 3 +278 3 +1610 4 +3825 10 +3239 6 +1101 2 +1300 4 +645 3 +180 5 +987 10 +626 9 +1288 6 +4017 3 +1451 10 +3465 6 +639 9 +830 3 +3332 1 +2983 10 +3702 5 +3877 10 +1450 4 +1003 5 +1545 5 +85 9 +1838 4 +788 8 +3927 10 +1056 8 +2778 6 +3679 3 +1002 8 +3338 5 +796 5 +2418 2 +3877 6 +279 8 +2305 8 +3895 4 +3515 1 +2818 4 +667 8 +2259 1 +2268 1 +2727 8 +1497 2 +777 6 +2200 7 +2456 5 +2856 7 +1571 5 +990 10 +1046 3 +3554 2 +3317 2 +2117 2 +49 4 +3251 5 +1138 4 +1020 6 +359 10 +2453 9 +2468 2 +1970 7 +3781 8 +339 10 +707 9 +1294 7 +3950 1 +846 8 +3362 9 +1275 3 +2627 5 +2665 3 +2785 8 +2626 5 +733 9 +1160 1 +3159 6 +143 9 +2164 2 +3928 2 +1972 2 +3856 7 +3888 7 +3983 8 +1829 10 +37 6 +255 3 +1327 9 +2513 10 +1368 2 +744 8 +709 9 +3809 9 +2173 5 +2777 2 +961 3 +421 1 +875 7 +1552 6 +1624 7 +3938 4 +1100 2 +631 1 +235 10 +1125 1 +168 10 +3547 7 +2353 10 +3006 10 +763 5 +2716 3 +2657 6 +3549 9 +214 6 +3547 7 +3270 6 +436 10 +3474 8 +3223 6 +4019 3 +4083 4 +1913 8 +422 4 +707 9 +2853 3 +1850 4 +596 4 +3455 10 +1307 3 +3706 8 +1441 10 +3879 8 +3858 3 +472 9 +1711 7 +3057 7 +1080 9 +498 5 +2332 9 +1374 2 +1178 1 +1673 7 +3260 5 +2625 8 +1925 7 +1769 8 +100 10 +3527 10 +3042 7 +3425 8 +3027 6 +1279 3 +2027 3 +469 8 +17 2 +2782 9 +341 5 +129 6 +2538 8 +325 8 +3066 3 +4047 6 +90 1 +1170 1 +496 8 +3767 3 +738 6 +978 4 +1727 9 +2483 9 +2017 6 +657 4 +2139 3 +775 10 +2472 9 +2787 8 +1504 3 +543 1 +1331 2 +1313 1 +554 4 +3997 6 +2823 8 +1521 10 +1342 2 +3175 5 +2162 3 +2970 2 +1781 9 +121 5 +1868 10 +1220 5 +1315 7 +3619 1 +729 7 +1148 2 +167 4 +915 10 +2197 9 +1387 1 +558 4 +3475 5 +803 7 +1223 8 +2789 2 +2020 8 +121 2 +926 3 +368 5 +1726 5 +261 4 +3162 3 +2490 10 +3168 3 +3301 10 +3438 5 +1498 8 +1912 8 +2145 9 +3118 4 +3638 1 +1186 10 +734 3 +2438 1 +2923 4 +1900 7 +2894 8 +3372 2 +759 8 +2318 1 +2312 7 +551 2 +2008 7 +3030 8 +960 8 +212 9 +470 9 +4042 1 +115 3 +3981 1 +2901 6 +227 2 +3460 6 +3819 8 +2974 2 +945 4 +3000 9 +2475 1 +2146 10 +1307 6 +1835 4 +3016 9 +111 6 +1804 3 +1492 10 +213 6 +578 4 +1962 7 +538 2 +3498 7 +1504 5 +3276 1 +29 10 +1751 4 +3691 8 +3940 7 +3590 5 +904 7 +1308 5 +2836 9 +2607 2 +3977 4 +3483 5 +914 7 +3591 8 +2957 2 +1456 6 +1058 4 +156 10 +1229 8 +723 4 +323 10 +1036 8 +1588 7 +1119 2 +2304 2 +1258 6 +2374 3 +1511 6 +3309 8 +2197 4 +1922 1 +2663 6 +1672 7 +3887 5 +3053 6 +1402 1 +548 8 +1584 1 +2087 3 +2285 1 +2296 2 +2219 7 +352 7 +1082 2 +1095 7 +3190 3 +2965 2 +1491 4 +3628 2 +678 1 +989 7 +3992 8 +2804 9 +3427 10 +2437 8 +354 3 +3931 2 +2727 6 +3545 6 +3365 5 +1510 7 +2345 10 +127 9 +3498 10 +636 5 +1057 7 +178 4 +912 10 +1125 9 +3365 5 +84 3 +938 7 +1288 7 +1381 1 +1918 4 +2141 4 +780 8 +3992 8 +588 1 +469 10 +3797 1 +3704 4 +3692 6 +1990 4 +891 1 +4079 7 +547 9 +1882 5 +3816 10 +926 8 +2927 10 +2006 7 +2486 2 +3632 3 +1220 2 +2238 10 +3433 9 +1246 2 +3886 4 +3922 3 +218 8 +2179 2 +3334 1 +193 8 +1378 10 +3579 7 +1791 7 +3787 4 +873 7 +2528 9 +518 6 +212 9 +3299 9 +3114 10 +379 1 +2024 7 +681 2 +3421 8 +399 10 +3187 5 +1665 4 +1808 6 +1987 5 +1748 4 +1625 9 +385 10 +987 9 +3359 7 +2821 6 +2169 4 +3375 9 +3512 9 +3189 7 +1068 8 +3790 4 +3807 2 +22 8 +1287 6 +3718 9 +2858 6 +2126 10 +4011 5 +3800 10 +2661 2 +1947 8 +3834 2 +303 2 +2622 3 +3913 1 +1811 4 +61 5 +3661 5 +2741 6 +3856 9 +1455 8 +1637 6 +3822 1 +849 10 +1107 9 +4017 7 +1863 9 +835 10 +1701 3 +2071 9 +1073 6 +3155 9 +3832 10 +643 4 +530 1 +353 1 +1161 1 +350 1 +2528 8 +3713 9 +880 9 +2421 10 +3781 1 +2390 9 +2151 6 +245 2 +2899 6 +3547 9 +2772 5 +2134 1 +1827 4 +1552 10 +3487 4 +900 3 +273 5 +1946 1 +3128 2 +3301 9 +3175 5 +934 10 +1779 3 +1199 9 +1233 5 +2228 7 +2105 1 +479 8 +3535 1 +1742 2 +2390 7 +3399 2 +1660 7 +849 3 +1652 9 +3332 8 +174 4 +2965 9 +1165 8 +2794 8 +1638 2 +2881 8 +2527 3 +1570 2 +2307 5 +979 2 +2832 6 +3507 8 +3430 1 +3962 7 +140 7 +3207 2 +3306 10 +582 10 +2746 8 +81 4 +2122 4 +1226 6 +1454 7 +354 5 +1664 2 +2109 1 +1697 3 +2452 4 +2398 1 +2224 2 +1679 6 +2330 1 +2358 3 +2942 10 +3842 2 +1411 2 +353 9 +1879 2 +1117 6 +255 1 +2495 8 +1126 9 +1947 6 +3705 6 +270 10 +1351 2 +2900 1 +3427 7 +742 2 +1158 4 +2501 1 +868 10 +3810 5 +449 2 +2496 5 +972 4 +3187 9 +291 4 +2278 3 +1057 2 +1471 10 +3238 2 +1171 6 +1463 3 +2833 3 +2529 10 +2831 3 +567 10 +2484 4 +973 1 +3606 5 +154 7 +2688 3 +1188 5 +1853 4 +3407 6 +710 1 +1598 10 +6 4 +2315 9 +3218 10 +577 3 +2530 9 +2622 4 +4048 1 +1208 1 +2226 4 +1064 9 +2499 10 +3998 7 +496 5 +1751 7 +4021 7 +2966 9 +684 3 +3805 7 +2747 2 +1818 7 +2879 3 +3599 6 +2593 5 +2186 10 +3511 10 +1100 1 +1821 6 +3472 4 +2858 7 +2920 5 +173 2 +3517 4 +3322 9 +3410 4 +2233 7 +392 9 +2204 7 +3584 3 +356 5 +2406 3 +906 9 +2577 6 +2631 6 +444 3 +2593 9 +2065 8 +53 8 +661 2 +2175 8 +365 9 +1178 9 +2179 5 +2548 2 +4022 7 +1486 2 +3648 5 +1654 3 +2129 1 +3787 1 +3637 2 +980 8 +3142 1 +2176 1 +847 2 +659 7 +2132 1 +3193 6 +70 4 +3333 7 +3145 4 +1512 5 +292 4 +1357 6 +1603 4 +64 10 +4048 3 +1027 8 +3850 2 +3056 4 +1658 8 +3884 7 +2822 10 +2949 6 +1058 1 +2301 8 +3666 1 +1829 3 +3148 8 +2784 4 +281 8 +3434 1 +2237 1 +2413 6 +805 2 +1900 7 +669 5 +2412 5 +2964 8 +3704 3 +468 8 +3184 5 +3394 3 +3059 1 +632 3 +843 8 +1157 2 +2788 3 +1339 7 +2516 9 +650 1 +1764 2 +3082 10 +1718 5 +2034 7 +1360 4 +4023 7 +1123 6 +424 3 +1087 1 +1181 1 +2253 1 +531 2 +1485 6 +572 3 +3615 8 +839 2 +2062 2 +1142 8 +1175 5 +3997 2 +2481 3 +3086 5 +3060 4 +3474 1 +1045 1 +1009 8 +2648 3 +2472 8 +2130 3 +362 3 +1695 4 +3669 8 +3233 8 +1840 7 +3803 3 +3042 3 +882 10 +3123 1 +3752 8 +3475 2 +3648 4 +583 10 +1334 6 +612 6 +163 1 +3764 5 +1912 3 +1816 10 +2696 3 +842 6 +257 1 +4033 6 +3039 3 +2051 7 +1188 5 +2949 7 +255 9 +3385 1 +1189 2 +3189 9 +1669 2 +1227 2 +2908 7 +1812 8 +2435 4 +1842 9 +1452 2 +2649 6 +1876 6 +770 5 +2038 9 +3784 10 +1738 2 +2144 6 +214 4 +618 4 +539 2 +2360 6 +350 4 +307 4 +807 5 +1564 7 +3877 2 +3824 10 +1023 3 +2440 9 +2700 8 +2239 4 +2076 4 +3086 9 +3480 5 +2189 10 +3143 5 +3434 4 +2389 8 +3170 4 +1231 7 +1376 8 +554 7 +2525 10 +2580 8 +4069 5 +319 4 +1771 5 +2893 7 +3742 6 +1438 7 +1010 1 +726 6 +3146 9 +2214 7 +351 3 +2878 7 +1791 9 +1475 7 +1457 6 +2583 8 +1730 10 +116 9 +2972 6 +3886 9 +1110 6 +1906 10 +1406 8 +2044 2 +1333 1 +3736 5 +1384 10 +1298 3 +2877 3 +1274 4 +1711 5 +3467 9 +925 5 +504 1 +3689 6 +3026 4 +1071 3 +586 10 +2394 2 +315 2 +2946 7 +747 8 +51 4 +2317 3 +692 9 +3653 10 +3718 10 +2106 8 +3031 1 +1970 4 +1763 3 +3037 4 +1116 6 +1784 1 +3486 1 +551 2 +3451 8 +3809 2 +2572 5 +3576 1 +3229 1 +151 5 +723 3 +1748 9 +519 3 +2762 3 +2266 2 +121 7 +1905 10 +2294 9 +629 9 +2232 10 +1590 2 +2437 6 +1092 10 +1153 3 +2067 2 +1825 10 +1631 1 +103 1 +129 8 +2731 10 +1265 5 +2754 10 +3176 2 +2385 8 +1620 3 +444 4 +1231 7 +1496 1 +3681 10 +2951 3 +3148 10 +172 10 +1414 9 +3775 9 +2671 4 +697 1 +3632 5 +2440 5 +3099 2 +350 6 +3080 10 +1314 8 +2759 4 +2801 3 +3304 4 +2912 4 +2351 1 +940 6 +2725 2 +3543 9 +3971 3 +1649 3 +550 7 +125 1 +1696 9 +2743 8 +2277 1 +543 2 +1262 7 +550 7 +920 4 +2277 10 +2466 10 +2648 6 +2442 7 +1983 1 +1438 2 +2167 1 +2256 10 +183 6 +2832 8 +2037 1 +2829 7 +284 3 +138 8 +1758 1 +2109 8 +1146 5 +3817 10 +799 8 +325 4 +706 10 +1790 6 +445 2 +1734 6 +123 8 +2187 2 +1960 7 +75 2 +359 8 +802 5 +1384 3 +1140 4 +2396 5 +4087 7 +2680 7 +3182 8 +3436 6 +899 7 +1437 4 +1502 2 +2046 9 +452 9 +3709 5 +1733 9 +1547 2 +1729 10 +3826 7 +1387 8 +185 3 +513 9 +3068 10 +306 2 +1585 3 +1244 6 +977 1 +1751 8 +1350 7 +1112 8 +2683 2 +3677 6 +1196 2 +100 4 +4058 3 +897 6 +1915 7 +927 2 +480 2 +892 1 +3033 10 +2510 7 +2915 4 +1296 7 +2536 1 +255 6 +2584 1 +98 5 +1922 3 +1547 6 +3939 6 +3795 10 +3628 6 +2484 8 +661 3 +3160 1 +1991 2 +607 9 +1305 1 +1910 6 +3274 4 +2755 4 +2570 2 +2550 5 +3805 3 +3987 3 +1123 5 +1105 3 +3047 9 +3404 1 +684 8 +3036 5 +3368 8 +2208 1 +2049 1 +1761 1 +1416 10 +1559 2 +2246 5 +612 1 +92 10 +1815 5 +926 5 +1552 8 +438 8 +2828 7 +1502 9 +2894 7 +3200 4 +2227 9 +2483 7 +3918 5 +3274 3 +2318 6 +1762 2 +2416 1 +2081 6 +3583 6 +2357 8 +1319 2 +657 3 +4073 7 +1517 5 +3633 10 +1945 8 +2331 5 +3289 6 +763 5 +3895 6 +1698 3 +1658 3 +31 8 +2042 6 +2543 8 +413 8 +831 3 +2182 2 +3657 2 +3790 4 +2894 9 +1186 9 +3197 2 +1102 4 +1728 5 +689 8 +1189 6 +2347 1 +2034 9 +1046 8 +2342 3 +3731 8 +3407 5 +1307 4 +1156 5 +1946 5 +2779 8 +743 6 +334 8 +1101 9 +1831 4 +1158 8 +3068 2 +954 4 +3810 2 +467 7 +37 8 +339 1 +74 7 +2022 4 +419 1 +615 5 +1498 6 +548 10 +1759 2 +1873 2 +3670 4 +2614 9 +1278 1 +908 9 +1115 6 +2677 5 +1732 3 +3546 4 +3924 1 +2665 1 +1387 2 +3622 6 +1333 8 +1977 10 +4051 5 +2720 5 +2555 3 +607 6 +3498 4 +799 2 +3439 1 +1422 8 +3862 6 +959 1 +4029 2 +47 4 +2013 5 +3339 10 +2797 8 +3463 10 +1923 7 +2693 7 +276 5 +3223 2 +3887 6 +4060 1 +3765 3 +3480 6 +565 5 +3616 10 +3576 5 +2612 9 +4049 9 +762 5 +551 9 +1439 10 +2131 4 +544 10 +2124 7 +896 1 +163 4 +4021 5 +3887 4 +2329 4 +1714 8 +1209 5 +2238 5 +2096 10 +517 10 +2526 4 +2825 7 +2802 6 +3625 2 +255 6 +3419 7 +2404 9 +1538 6 +3235 2 +2416 9 +30 3 +3790 6 +977 10 +590 8 +535 9 +542 9 +553 3 +3670 5 +1373 6 +123 7 +735 9 +1218 9 +2397 8 +2703 9 +2846 9 +827 9 +491 1 +2986 10 +3797 3 +2170 2 +1397 3 +1185 2 +49 3 +1207 9 +3167 1 +466 7 +1659 4 +3479 9 +874 8 +3136 2 +1377 9 +879 2 +2961 4 +4020 10 +642 1 +2826 5 +3641 8 +3631 5 +1084 7 +324 8 +1660 6 +3774 10 +1663 6 +3907 1 +4027 1 +290 5 +963 6 +2344 7 +3325 9 +87 10 +1110 10 +1760 1 +825 9 +3647 9 +1213 5 +849 7 +1494 5 +3980 6 +922 8 +586 10 +1807 1 +3755 6 +2477 9 +302 5 +2174 9 +340 3 +2047 10 +1973 9 +3168 5 +2419 1 +3039 1 +4020 9 +2298 5 +1796 4 +3313 6 +542 4 +2913 2 +2069 4 +2407 1 +3566 7 +2190 10 +381 6 +2826 6 +2811 3 +305 2 +608 5 +3637 10 +617 2 +994 7 +1737 5 +761 4 +3223 2 +4070 3 +897 4 +2223 9 +2796 1 +2449 5 +1933 10 +450 9 +516 6 +1468 4 +2999 2 +3656 1 +3197 5 +2286 1 +3695 7 +3210 6 +2723 10 +930 2 +796 8 +2608 2 +3529 10 +2512 5 +3975 10 +1475 10 +1425 9 +2602 2 +2782 9 +1919 5 +1362 9 +214 3 +1476 4 +3714 4 +47 5 +1776 5 +714 5 +2815 2 +716 8 +1040 9 +415 1 +1683 5 +3396 1 +876 7 +2724 6 +1825 4 +2314 10 +3581 2 +2430 4 +282 6 +862 6 +2300 10 +2698 8 +3704 9 +1554 6 +939 10 +3315 9 +1561 3 +838 5 +2454 8 +2397 6 +1186 4 +1103 4 +2363 7 +698 5 +684 4 +3117 2 +2500 4 +3798 4 +4080 2 +2324 2 +739 6 +505 7 +2872 4 +476 7 +2891 10 +3213 10 +3634 4 +147 2 +282 3 +25 10 +2759 6 +465 1 +528 4 +2579 8 +2013 5 +3811 3 +694 4 +1180 6 +791 5 +3556 4 +3981 2 +3378 5 +3526 7 +2021 3 +2459 10 +3528 10 +3855 10 +3024 1 +3266 8 +1298 5 +2308 1 +236 7 +3047 5 +1001 7 +3633 3 +105 7 +2072 2 +2751 2 +1806 8 +4014 9 +720 4 +1813 1 +3026 4 +648 6 +2818 5 +1021 9 +1180 9 +1859 4 +1921 5 +1925 3 +477 5 +3051 9 +3474 4 +2718 10 +695 2 +2738 3 +181 9 +2138 4 +1474 9 +3440 10 +2442 10 +3753 7 +541 3 +1271 9 +2280 4 +1212 2 +3028 8 +3066 10 +3241 5 +1439 2 +3323 9 +3958 10 +2619 5 +4056 6 +3306 2 +2598 4 +1865 10 +300 8 +3693 7 +2055 4 +710 7 +2292 7 +3443 1 +498 8 +3295 3 +1591 1 +2208 6 +4032 7 +1800 5 +352 3 +780 2 +1835 2 +65 9 +956 2 +2303 3 +1494 8 +2362 7 +272 3 +2916 3 +2190 1 +633 4 +1862 2 +806 8 +3214 7 +15 10 +789 3 +1854 9 +575 2 +1241 8 +3633 7 +2771 7 +1776 7 +2664 1 +2994 9 +1300 8 +2878 4 +1185 9 +3652 2 +990 3 +205 5 +3316 5 +3237 9 +2604 4 +441 2 +241 8 +805 4 +3357 5 +1179 8 +2796 10 +3949 2 +530 9 +2938 6 +165 8 +3716 1 +3697 6 +3085 1 +29 8 +2242 9 +1622 9 +877 9 +1876 8 +329 2 +508 6 +3600 10 +1514 7 +3301 10 +1829 1 +2099 3 +2960 3 +3851 6 +1275 9 +2714 6 +2747 7 +294 10 +1226 5 +3453 2 +3326 8 +263 2 +2873 10 +3305 2 +417 10 +141 4 +1773 6 +3875 7 +2042 6 +2796 10 +1964 8 +2719 4 +2902 3 +2893 7 +239 10 +344 6 +2385 8 +472 9 +239 5 +2319 5 +2847 2 +2649 8 +3116 1 +347 6 +1848 3 +3705 4 +3340 8 +751 3 +695 9 +1393 7 +2153 1 +2148 1 +1848 4 +659 4 +2177 1 +2038 10 +2754 5 +1465 9 +3122 5 +2960 3 +1113 2 +3649 3 +3225 6 +2647 9 +1474 6 +2094 4 +740 1 +2325 5 +1224 7 +3048 4 +457 6 +2720 4 +3779 6 +2298 10 +1805 7 +1752 7 +3417 3 +3801 4 +2776 4 +2012 6 +3307 3 +2844 7 +2872 1 +957 4 +2252 4 +3174 9 +3675 1 +2599 8 +2037 6 +3173 9 +3304 3 +3000 5 +696 1 +3583 10 +2956 5 +899 7 +1427 7 +2211 10 +3065 2 +3351 3 +797 10 +1283 7 +120 8 +3194 3 +729 4 +2692 10 +3422 7 +2526 8 +3354 9 +790 3 +259 1 +55 3 +505 5 +68 8 +540 1 +3416 8 +3584 3 +1268 7 +729 2 +1840 3 +2573 2 +3843 3 +3823 9 +2592 8 +3453 4 +2886 3 +1236 5 +1562 6 +2156 7 +613 4 +2763 7 +912 1 +585 7 +2341 2 +754 7 +1028 1 +2006 4 +1767 5 +1965 3 +3078 8 +3587 2 +1418 2 +1086 9 +2082 9 +1415 7 +790 5 +3031 5 +1441 10 +3496 7 +966 1 +3562 5 +2816 8 +938 3 +2216 1 +1150 5 +1925 1 +1068 6 +2860 2 +1014 7 +469 6 +2987 5 +473 1 +3009 9 +917 10 +2700 10 +3394 3 +2324 8 +736 7 +2990 7 +3043 9 +896 8 +1146 1 +1360 10 +3906 6 +348 7 +3786 8 +1004 5 +2974 3 +558 9 +2854 5 +2777 7 +173 7 +3332 4 +450 7 +2464 9 +1195 9 +3235 7 +3336 2 +2254 6 +818 3 +3798 1 +810 8 +3606 2 +3025 6 +778 6 +977 7 +3549 2 +4015 3 +2736 7 +3550 6 +3889 4 +2921 5 +3176 3 +4 6 +1305 4 +4040 1 +1225 1 +3314 6 +1222 9 +1197 8 +3932 5 +3991 6 +493 7 +2644 7 +241 8 +562 5 +1097 3 +2632 6 +2480 3 +3129 5 +3096 2 +3585 8 +655 9 +2600 6 +491 4 +2467 2 +495 9 +3969 6 +3467 10 +45 2 +602 3 +763 6 +1876 10 +1188 8 +3089 4 +2316 1 +3761 10 +228 1 +3596 9 +215 1 +546 2 +1716 7 +1940 4 +2585 2 +1780 7 +262 5 +2560 7 +1845 6 +86 1 +1080 4 +1350 10 +606 9 +1391 7 +2634 8 +127 10 +2256 3 +2794 9 +2617 3 +1509 10 +2103 6 +1893 2 +238 5 +135 10 +3003 2 +2917 10 +3425 2 +2607 2 +2136 3 +2216 5 +1414 2 +1484 9 +3474 6 +3871 3 +78 1 +1613 4 +892 10 +3655 9 +3129 6 +832 9 +2100 5 +4092 3 +2112 1 +2649 2 +676 10 +3347 10 +424 9 +860 4 +3666 4 +1185 1 +1872 1 +1811 10 +213 9 +144 4 +3984 8 +3748 4 +1716 2 +2523 9 +482 4 +2002 1 +1501 6 +3333 5 +1641 9 +1867 7 +3138 10 +330 4 +3154 7 +710 3 +2139 1 +3269 3 +1694 3 +1437 5 +2333 4 +3433 4 +4 5 +2452 7 +3848 10 +944 7 +1822 7 +4025 6 +3936 9 +1309 10 +1496 1 +3341 6 +1435 3 +803 6 +3276 4 +971 8 +774 3 +2286 8 +1316 10 +3276 1 +797 5 +503 3 +4020 8 +2517 1 +452 6 +2644 10 +2338 9 +3013 10 +997 5 +3485 3 +556 2 +1037 5 +3610 9 +211 4 +4015 10 +831 10 +1715 3 +1365 7 +1098 2 +487 10 +111 1 +2022 10 +3957 7 +1276 1 +3879 9 +3127 2 +1973 9 +3891 10 +2944 6 +3106 2 +3939 3 +386 7 +1665 10 +2078 9 +1125 10 +1577 7 +3543 7 +853 3 +3798 8 +3801 7 +3169 6 +2880 1 +1540 10 +1518 7 +2083 9 +1616 9 +2814 7 +1787 8 +3727 4 +3708 6 +2186 7 +1693 8 +1577 10 +2225 3 +2065 4 +1931 7 +1138 1 +3381 3 +2675 1 +1153 1 +1507 4 +347 7 +1773 9 +2601 3 +133 1 +3813 10 +3061 1 +163 7 +168 3 +2578 3 +4076 2 +734 2 +999 6 +1907 2 +3972 4 +493 10 +870 1 +1613 7 +2118 7 +1742 9 +1165 9 +2074 10 +3320 3 +874 1 +2703 3 +1014 8 +1310 5 +3038 8 +2369 9 +984 9 +3684 4 +961 4 +1278 2 +1791 3 +3968 9 +1462 6 +2801 4 +146 6 +3717 8 +3445 10 +941 4 +1709 8 +3112 2 +3192 7 +3353 4 +2564 8 +639 6 +2140 7 +4056 4 +854 4 +719 9 +780 10 +2091 7 +2748 7 +1123 6 +773 4 +2572 8 +3240 6 +3156 9 +1985 4 +2845 2 +3011 2 +1830 9 +2768 7 +1079 8 +23 5 +1702 8 +3920 7 +2925 6 +2318 7 +3833 2 +1659 4 +164 9 +455 1 +3237 6 +3397 7 +1751 9 +1247 8 +3951 8 +659 5 +2424 6 +894 8 +4082 7 +2904 10 +3148 5 +444 8 +331 3 +3653 4 +166 4 +1331 7 +2053 8 +3411 6 +1266 7 +3971 1 +67 10 +866 9 +479 2 +2452 3 +434 5 +1926 2 +2563 3 +2434 5 +2808 7 +3612 4 +509 1 +146 5 +112 5 +726 1 +5 7 +1767 9 +213 5 +2630 6 +914 3 +2248 3 +295 3 +3251 10 +3771 3 +2556 5 +3851 3 +1227 4 +1444 10 +2455 4 +3500 9 +2382 4 +3745 1 +4040 10 +239 2 +3552 2 +1812 1 +404 9 +879 6 +593 2 +2620 8 +960 9 +2935 5 +3247 10 +923 1 +3362 4 +2746 4 +563 3 +228 9 +3501 6 +699 6 +72 7 +2701 2 +1265 3 +350 3 +213 1 +3267 7 +2167 4 +2325 1 +2896 8 +3789 9 +1296 4 +2459 3 +3485 3 +3459 7 +2028 10 +3655 4 +1965 9 +1673 6 +1843 10 +3491 5 +1532 9 +2204 4 +1427 10 +2541 2 +1947 5 +3718 6 +1105 5 +2498 3 +3322 6 +1985 5 +434 5 +2948 5 +1763 9 +248 2 +1467 7 +1719 4 +263 7 +3514 8 +2057 4 +1461 6 +993 10 +417 4 +1400 7 +1956 8 +3824 6 +964 10 +3822 8 +3459 8 +3676 1 +2537 6 +2853 7 +3629 1 +2855 8 +1975 1 +1607 1 +855 5 +1423 7 +1692 7 +1080 3 +28 9 +86 4 +3955 8 +2773 6 +1108 7 +55 6 +3905 7 +3796 7 +3143 8 +3808 8 +1687 5 +2304 1 +1328 6 +1150 9 +323 10 +2591 9 +2083 8 +1145 9 +3254 7 +2660 6 +2134 6 +317 2 +3971 6 +268 4 +155 10 +1067 6 +2810 7 +3214 3 +717 2 +3692 4 +3479 2 +2901 5 +2943 6 +1958 2 +3965 10 +1896 2 +1538 1 +2294 5 +3815 7 +1433 8 +2680 7 +1012 9 +191 8 +238 8 +3300 5 +514 10 +1643 8 +3348 8 +3547 7 +2874 8 +3090 3 +305 1 +3842 6 +3085 4 +2127 10 +3843 10 +3473 7 +2005 8 +1809 5 +3217 1 +2968 1 +3422 7 +76 3 +3216 4 +1470 1 +3350 2 +221 2 +382 4 +1982 10 +244 1 +1795 1 +1951 8 +1818 4 +393 9 +1339 2 +442 8 +479 9 +2304 1 +1068 5 +827 7 +2639 6 +2554 6 +1999 6 +4078 6 +1905 10 +3957 3 +2424 7 +1143 5 +486 1 +2832 6 +157 10 +4082 9 +1143 10 +649 5 +2647 9 +3693 2 +3595 4 +1778 9 +2170 9 +3830 2 +259 10 +1417 2 +1061 1 +2146 10 +1642 9 +463 8 +2849 5 +2323 5 +3355 5 +2378 2 +990 8 +2692 10 +879 7 +1674 8 +261 7 +3914 10 +1842 2 +887 4 +4036 7 +227 8 +1592 6 +720 1 +1761 6 +1326 6 +2286 10 +386 9 +2863 6 +78 6 +3986 9 +307 10 +445 9 +3940 7 +529 5 +939 4 +1459 4 +966 4 +3798 9 +683 2 +1323 8 +313 10 +3093 6 +420 2 +1586 10 +1256 5 +1726 5 +1772 1 +1464 6 +3980 10 +2147 2 +3727 8 +641 8 +1577 10 +1207 8 +4035 4 +562 2 +2492 8 +72 4 +1535 6 +2706 1 +2845 9 +1676 4 +730 3 +1964 9 +3894 9 +2393 6 +2790 2 +869 7 +1139 10 +1784 1 +2365 7 +1750 1 +88 1 +3565 5 +1199 6 +2526 4 +3472 9 +1295 1 +2082 1 +2587 5 +3150 3 +1238 1 +2562 8 +3926 5 +2277 10 +1317 10 +764 4 +1292 3 +2153 3 +3582 2 +1921 9 +256 6 +1318 1 +1202 1 +177 4 +1154 9 +2986 9 +3936 6 +3273 5 +290 6 +1024 10 +2780 4 +3986 5 +516 10 +249 3 +2905 10 +2844 8 +2862 7 +2524 1 +2837 5 +3402 8 +3161 10 +2999 9 +2960 10 +3824 3 +2495 10 +1385 2 +1335 8 +813 10 +1090 5 +3901 7 +1055 6 +656 9 +2570 4 +3329 5 +569 9 +2055 2 +2018 5 +306 7 +323 3 +2866 5 +2095 1 +3068 4 +3174 3 +571 4 +1682 3 +1345 2 +2909 1 +656 9 +1484 4 +3164 2 +2571 10 +3966 10 +3340 10 +3728 8 +7 2 +2608 4 +2421 7 +2362 1 +3003 10 +3149 2 +903 4 +3827 6 +1493 7 +1841 2 +858 8 +1451 1 +3172 3 +1973 1 +3439 3 +2296 3 +1634 2 +2457 1 +532 10 +1046 2 +3357 2 +2972 8 +825 9 +3344 4 +3911 4 +1051 4 +574 7 +3352 3 +534 4 +3882 1 +2328 7 +517 7 +3393 4 +1929 2 +1767 10 +733 5 +2664 1 +1410 5 +444 6 +1540 6 +968 9 +2640 6 +1875 8 +1901 9 +3463 6 +3969 6 +351 2 +3927 9 +909 8 +1050 7 +2546 1 +3510 4 +249 1 +3123 6 +163 3 +549 1 +3607 10 +1638 7 +3195 6 +3973 1 +104 5 +3502 9 +3134 9 +2764 1 +2263 9 +3943 7 +52 3 +849 1 +1057 2 +1287 5 +3156 5 +1769 7 +3908 3 +1059 1 +1455 4 +2934 2 +25 1 +2676 4 +3981 6 +3527 7 +1243 4 +1259 2 +3833 8 +1258 3 +772 6 +1262 7 +1837 7 +3722 5 +1901 7 +3677 10 +613 2 +3232 3 +776 10 +1169 3 +2073 2 +839 2 +617 8 +1811 5 +3395 1 +1528 3 +1681 2 +2428 4 +1405 4 +3810 3 +3260 6 +3019 9 +844 1 +74 10 +102 10 +3149 9 +1048 10 +808 9 +36 2 +2902 6 +2605 5 +1523 2 +2765 6 +1940 6 +3654 5 +3120 9 +2253 5 +1651 5 +757 6 +1246 9 +3442 2 +1811 4 +213 3 +3163 8 +3938 9 +405 2 +3465 10 +2497 9 +3963 4 +2858 3 +2911 1 +2586 4 +4093 9 +283 9 +3429 5 +74 6 +2552 8 +837 6 +3303 5 +727 7 +3844 5 +3646 2 +1480 10 +190 7 +1495 9 +2341 7 +2280 5 +3956 4 +3860 5 +2735 2 +2861 7 +2927 1 +2012 5 +477 7 +99 5 +3191 4 +813 3 +3000 8 +3213 5 +1658 5 +450 8 +869 3 +3025 7 +1170 2 +3437 6 +3514 5 +2433 5 +1333 6 +2050 7 +949 8 +2985 1 +3727 5 +889 9 +1630 3 +3443 4 +737 7 +1991 8 +1580 5 +3192 2 +2548 1 +968 7 +151 2 +535 7 +3856 5 +1164 10 +411 8 +1538 1 +2929 5 +1978 2 +58 10 +844 4 +1501 9 +1059 5 +2496 7 +343 2 +2893 8 +3966 7 +2075 1 +3105 10 +756 8 +1687 1 +3754 4 +3947 3 +3306 10 +1523 7 +3955 9 +6 9 +1945 1 +1488 10 +2653 8 +3688 8 +2749 8 +3167 8 +79 2 +1526 2 +1585 7 +2095 5 +768 1 +1069 4 +3216 6 +1781 3 +2165 3 +2393 5 +1828 4 +2526 1 +1814 2 +1977 9 +313 2 +1930 7 +3803 3 +2629 5 +452 9 +353 8 +961 9 +880 1 +2662 7 +3725 4 +1329 5 +1008 10 +763 5 +2644 8 +4013 5 +2516 7 +3550 5 +3797 7 +833 6 +3309 4 +2095 2 +439 1 +3984 2 +2296 7 +2670 4 +3352 8 +1106 7 +2232 3 +3932 4 +3922 10 +1295 4 +1182 4 +594 9 +815 1 +527 3 +3211 10 +1929 9 +1906 5 +280 1 +464 5 +2700 1 +2133 9 +3273 8 +4053 2 +2384 2 +2509 5 +1247 1 +3745 2 +910 1 +1524 5 +2412 6 +1260 3 +3277 2 +2078 2 +1625 10 +3767 10 +1966 1 +2711 10 +2454 7 +196 1 +1331 5 +659 1 +118 2 +3428 1 +749 9 +826 2 +2708 6 +1021 5 +407 2 +149 7 +3176 3 +3310 3 +3951 1 +2425 5 +1010 6 +119 9 +2677 8 +3760 8 +3345 8 +2116 6 +1001 5 +730 2 +1085 1 +2347 7 +2704 7 +3235 4 +3178 9 +2172 6 +1846 5 +2144 3 +1166 6 +1492 6 +3283 5 +3655 8 +1124 2 +64 1 +212 2 +1912 1 +1218 9 +1051 1 +996 6 +3157 5 +3308 2 +1891 1 +1235 6 +937 1 +1820 1 +597 1 +3382 1 +1882 1 +4090 3 +1612 2 +1884 1 +1009 4 +2989 7 +196 4 +1635 8 +3632 4 +253 9 +2051 1 +1045 9 +2473 9 +2292 7 +936 5 +1725 4 +327 5 +665 8 +2335 7 +2937 9 +2483 6 +3251 4 +407 3 +1280 2 +3407 10 +3574 10 +3480 7 +238 4 +3999 6 +618 4 +3899 5 +1123 9 +1492 7 +2447 8 +1335 3 +826 9 +2229 4 +3643 10 +2979 5 +4025 1 +2136 2 +2100 2 +1338 8 +2546 7 +3854 10 +1368 9 +2271 5 +2977 5 +1645 2 +1515 9 +236 2 +2812 8 +175 9 +627 6 +2281 2 +1236 2 +36 10 +2909 3 +3086 8 +3846 1 +1818 1 +332 5 +2912 9 +869 9 +3898 4 +1285 3 +172 6 +812 2 +2672 10 +2888 7 +1850 3 +3499 8 +1832 1 +1431 8 +2801 1 +1080 10 +443 6 +3893 5 +185 3 +2316 6 +50 6 +3849 9 +1137 2 +2962 2 +4079 10 +4014 3 +1702 3 +4055 9 +971 7 +2515 7 +2299 3 +1150 9 +2989 9 +626 3 +1572 7 +2233 5 +1392 5 +1257 3 +415 4 +598 3 +109 2 +3403 6 +3411 9 +1894 4 +678 2 +461 6 +2764 10 +3142 5 +2613 8 +2219 3 +3810 8 +3510 9 +1744 7 +3700 8 +1986 2 +2106 3 +756 2 +2888 1 +1485 8 +1857 8 +1187 9 +355 7 +3227 3 +4019 3 +2485 8 +2139 9 +3517 3 +3665 10 +3618 2 +3358 1 +1591 9 +1886 4 +746 5 +1721 10 +2471 2 +3938 9 +2506 2 +257 6 +3861 3 +3588 4 +3619 4 +2627 4 +2528 10 +1881 1 +283 5 +108 5 +583 1 +852 9 +3783 10 +1538 10 +3931 5 +545 8 +3042 1 +1533 9 +1555 2 +3840 5 +1863 9 +530 7 +1842 10 +3966 10 +699 2 +3960 2 +1644 4 +2685 1 +1162 6 +1059 6 +3406 3 +2804 10 +1028 1 +3754 2 +3937 2 +4020 10 +2036 4 +1653 6 +449 10 +739 1 +2067 3 +681 3 +296 3 +1138 9 +474 7 +365 1 +1211 1 +2919 4 +223 8 +3724 7 +3490 6 +3310 4 +1509 2 +2406 5 +3450 9 +3188 3 +2492 4 +3031 3 +3321 1 +2125 10 +4070 4 +2449 6 +1269 7 +3132 1 +733 10 +120 3 +2009 7 +4013 5 +17 10 +2805 7 +2016 6 +3240 7 +2478 8 +3622 8 +1511 8 +1567 6 +2208 10 +2421 6 +3722 8 +2612 1 +1459 5 +863 6 +3812 10 +2234 2 +1230 7 +3000 2 +1165 9 +472 1 +2724 2 +18 1 +915 10 +3744 8 +2865 4 +520 1 +2297 9 +3469 8 +3748 6 +3301 8 +1674 6 +1260 4 +677 9 +2398 4 +3377 6 +1748 1 +2111 2 +2876 7 +3086 1 +1776 2 +3505 7 +2367 7 +3830 8 +3390 3 +2124 8 +3984 7 +2916 10 +2396 10 +1677 4 +3013 6 +3362 10 +1284 2 +3097 5 +2508 1 +2664 5 +721 5 +3878 2 +3689 9 +1648 7 +2708 4 +3937 7 +3415 10 +2761 3 +1848 4 +3019 4 +2555 3 +111 6 +3243 6 +3377 9 +1007 2 +3769 3 +75 1 +1195 8 +3968 1 +1205 5 +3756 2 +1689 7 +2596 7 +2909 10 +3807 2 +2871 6 +891 5 +3409 10 +1890 9 +3724 9 +3611 6 +1052 1 +1946 7 +1375 3 +3432 3 +3178 8 +2517 1 +3172 3 +3873 8 +1527 4 +1220 5 +85 6 +3112 7 +2539 6 +980 5 +1022 2 +1934 10 +58 10 +1859 7 +143 3 +706 2 +776 6 +4088 7 +2987 8 +1736 2 +501 7 +416 5 +3276 7 +77 7 +133 8 +3566 8 +3177 7 +587 3 +2859 10 +656 4 +2130 8 +2668 9 +1738 1 +2399 10 +2485 4 +3758 8 +1255 3 +2870 8 +3970 9 +2660 1 +2949 8 +582 10 +3207 2 +2460 6 +1037 2 +2300 8 +1438 4 +4064 2 +1513 3 +47 4 +494 6 +206 7 +1883 5 +1907 2 +736 4 +4037 3 +3008 7 +2975 10 +2136 4 +3351 1 +1895 4 +2824 5 +1546 8 +1755 6 +3513 5 +3462 5 +1907 3 +3329 5 +1296 3 +2762 9 +2642 9 +82 6 +2056 5 +3469 9 +605 6 +3834 3 +1662 8 +2204 5 +2231 7 +146 4 +2484 6 +3002 1 +1163 3 +624 6 +3993 2 +2431 3 +1430 9 +1017 7 +3450 2 +3416 3 +3215 4 +2245 4 +2873 1 +2984 9 +439 6 +1604 8 +2761 6 +3029 1 +3048 4 +1137 10 +1633 9 +227 4 +1271 2 +2495 4 +1169 9 +1108 9 +2174 10 +760 1 +1547 4 +3924 9 +604 7 +3079 6 +885 2 +2456 3 +1240 5 +1766 4 +1145 8 +2033 5 +243 9 +741 9 +1280 8 +268 7 +1348 10 +2468 6 +1947 2 +3334 6 +2374 8 +1100 3 +1003 2 +1812 1 +1689 7 +2109 2 +869 10 +2552 9 +2960 5 +2530 9 +1542 8 +136 1 +106 1 +3308 3 +3104 1 +618 8 +2468 3 +274 9 +2516 2 +2462 10 +167 9 +1544 3 +24 3 +3147 10 +1578 3 +1684 10 +1813 3 +5 1 +3684 4 +597 4 +14 4 +3326 2 +3728 1 +3867 3 +1580 3 +2587 10 +258 10 +669 1 +3150 1 +2015 7 +3335 4 +233 9 +2223 3 +1279 1 +2399 8 +1167 10 +764 6 +243 10 +3235 10 +2591 7 +1599 3 +359 9 +2827 4 +3682 1 +1980 8 +3899 7 +2449 1 +1698 3 +2179 4 +827 1 +725 10 +1837 9 +994 6 +1699 3 +2040 10 +1349 5 +3794 6 +1975 3 +899 9 +1515 2 +2600 10 +786 1 +1387 3 +2082 5 +3476 8 +821 6 +3768 2 +3541 4 +3394 9 +101 1 +1668 4 +3432 2 +1090 6 +2710 7 +2464 4 +783 4 +1648 6 +1163 6 +3060 8 +1299 8 +387 10 +3744 6 +86 1 +3529 8 +1085 2 +478 5 +2557 1 +1208 8 +3767 4 +3163 2 +3179 4 +2419 6 +4022 9 +945 6 +2826 7 +2412 3 +2783 7 +2515 10 +3818 5 +42 8 +2945 10 +659 2 +612 4 +2484 10 +507 4 +2027 10 +509 7 +1775 7 +219 10 +3733 5 +1724 3 +2606 3 +270 5 +3653 1 +446 9 +2719 7 +4095 9 +2103 8 +2007 6 +3257 7 +859 1 +3995 6 +2388 6 +1915 5 +3262 8 +2459 6 +2279 6 +3530 2 +2919 8 +2965 6 +34 2 +2017 5 +1253 7 +3971 4 +2495 7 +2716 9 +1389 4 +4077 1 +1104 4 +1028 4 +3428 5 +1546 4 +299 6 +3312 6 +1072 5 +2479 4 +2192 1 +2238 1 +3105 10 +1571 6 +1337 2 +2908 10 +1875 2 +1750 5 +401 7 +1336 1 +391 3 +2926 6 +1003 8 +4024 3 +3327 10 +1976 7 +83 7 +2317 4 +1943 9 +2391 3 +1602 2 +3199 10 +814 5 +1774 1 +3056 9 +3815 3 +1400 3 +646 5 +1686 4 +490 2 +2353 2 +277 3 +2074 9 +3402 8 +3429 9 +2517 5 +1931 5 +1980 8 +2791 1 +3549 2 +2698 4 +2777 6 +3019 4 +4079 9 +792 5 +1955 5 +3295 9 +1284 7 +3477 1 +1507 8 +1621 2 +392 2 +3275 7 +928 7 +2196 8 +303 3 +1769 5 +1724 1 +3960 1 +3209 6 +3037 1 +1241 10 +3146 4 +782 10 +2661 5 +2943 8 +3586 6 +340 9 +2135 7 +1911 9 +2699 6 +95 8 +3039 2 +2367 7 +2958 5 +3882 9 +3449 8 +243 10 +552 9 +2760 8 +2455 10 +3781 10 +947 3 +2362 3 +1366 5 +2659 1 +1249 6 +2635 6 +1623 7 +3472 1 +2849 5 +1229 4 +2687 10 +1355 2 +1584 9 +3270 2 +3116 9 +2472 2 +2153 9 +3907 7 +621 2 +3047 5 +3107 6 +3363 7 +3685 2 +1547 1 +979 7 +974 3 +2389 3 +2831 8 +2506 3 +1606 3 +1470 5 +1346 4 +591 10 +1795 1 +1332 7 +4040 6 +297 8 +1954 4 +3511 6 +1782 7 +1520 9 +2969 5 +2671 9 +3977 7 +2638 4 +3619 7 +786 3 +3257 3 +763 4 +2652 6 +155 10 +599 1 +3494 7 +552 3 +3219 3 +1255 3 +1876 6 +3060 1 +900 6 +2407 3 +1507 1 +792 8 +1845 10 +2111 2 +2230 5 +2385 6 +2740 6 +2447 9 +911 1 +2998 4 +1109 4 +869 5 +1662 6 +4048 9 +914 8 +2359 10 +216 6 +378 8 +2837 1 +3926 10 +3501 3 +3393 6 +4007 1 +1902 4 +3258 5 +538 4 +2889 5 +2581 8 +2136 9 +719 5 +2366 6 +1234 6 +2322 10 +3818 3 +1252 1 +1789 6 +1990 9 +2398 1 +1553 6 +2756 9 +473 1 +3485 7 +3505 6 +1284 3 +2581 5 +1242 6 +1747 1 +942 9 +1096 3 +1135 3 +1890 1 +1320 8 +1667 1 +1116 2 +3184 7 +939 9 +3598 3 +526 4 +1118 1 +1665 1 +1227 10 +1265 4 +3687 10 +2978 10 +2239 2 +815 3 +2967 7 +1659 8 +3103 4 +1883 1 +3833 8 +1532 7 +643 7 +617 3 +3370 8 +3932 8 +747 4 +2065 4 +3693 10 +2748 8 +2243 1 +1536 4 +3248 9 +1416 4 +3548 5 +2847 3 +2237 4 +1162 6 +3825 5 +2114 10 +3252 6 +1964 9 +3489 3 +562 8 +202 1 +1575 3 +200 2 +3315 9 +1280 3 +2739 3 +1078 7 +3897 2 +1554 5 +1255 7 +1343 4 +1977 5 +1749 6 +2750 3 +2046 9 +3983 10 +3405 7 +922 5 +1938 2 +3494 6 +3635 2 +3980 3 +2397 8 +3953 1 +60 7 +1387 6 +362 3 +2219 2 +1653 10 +1805 10 +3002 8 +2108 6 +3855 7 +171 5 +3775 4 +1388 4 +709 4 +699 10 +1828 5 +3516 8 +312 4 +2154 10 +2842 7 +1965 3 +1431 9 +1067 8 +3379 9 +3313 4 +1866 9 +886 3 +3556 9 +3018 10 +179 8 +3483 6 +2181 7 +265 8 +2894 4 +760 9 +112 6 +2990 9 +850 4 +2042 10 +3815 5 +2783 10 +675 3 +2881 8 +677 6 +1226 4 +3428 5 +359 5 +579 3 +1254 6 +1816 6 +570 7 +3744 4 +44 9 +3334 5 +3261 8 +1909 6 +2931 1 +1659 3 +492 7 +1073 4 +887 2 +841 2 +2602 2 +3509 8 +603 10 +1714 4 +1821 3 +809 9 +224 1 +3666 3 +3812 6 +3970 3 +3649 6 +50 3 +3019 4 +337 5 +2172 7 +1856 3 +3381 3 +2345 5 +2569 8 +1495 1 +143 4 +822 3 +1152 4 +325 6 +1158 4 +969 1 +2245 7 +4003 2 +1184 8 +1384 7 +2700 5 +638 2 +2678 5 +318 9 +285 4 +266 6 +4054 4 +2122 6 +2459 1 +3677 8 +2581 6 +1368 8 +2160 3 +3780 4 +620 1 +2793 4 +457 7 +3707 3 +857 5 +2506 9 +99 10 +1180 9 +2180 9 +1890 7 +4050 3 +1183 5 +2802 1 +3624 2 +1006 6 +2492 5 +1166 5 +3142 7 +543 7 +2801 3 +2949 10 +1413 8 +2872 6 +2388 10 +1403 6 +2665 8 +2479 9 +3318 7 +110 8 +3980 6 +738 10 +3142 10 +1171 10 +790 10 +3130 10 +964 8 +606 8 +2039 1 +3452 8 +1297 5 +3460 2 +3782 6 +1166 9 +4016 10 +2143 1 +4041 10 +2028 9 +3978 6 +3559 7 +1250 5 +2541 9 +2820 7 +1870 1 +560 3 +819 7 +1609 7 +2502 9 +390 10 +1708 3 +118 1 +521 6 +3816 6 +3859 4 +1345 6 +2919 9 +2643 7 +1412 5 +1989 10 +2703 6 +2515 1 +2868 3 +3693 7 +455 7 +1093 2 +2679 2 +2363 9 +3000 1 +2765 5 +290 7 +1684 7 +3626 6 +3971 5 +1148 6 +2333 3 +747 9 +2110 2 +3879 6 +2762 9 +2628 2 +1588 3 +1640 5 +2527 8 +1003 6 +3761 8 +2203 1 +941 5 +1764 10 +1998 7 +1486 5 +1778 9 +1418 6 +337 1 +3546 9 +362 5 +2899 7 +3449 4 +3803 8 +950 7 +1249 8 +2378 9 +99 10 +1556 4 +2744 2 +3619 2 +2238 9 +3069 9 +3224 7 +1837 7 +2342 1 +1946 9 +4086 6 +1742 9 +1820 1 +1183 10 +1308 4 +3928 6 +1287 9 +3580 8 +44 1 +2977 6 +1350 9 +1425 7 +1066 2 +2408 9 +1575 2 +2153 5 +3102 4 +135 9 +2758 3 +3540 9 +2125 2 +3796 5 +1795 4 +2676 8 +2096 10 +1415 9 +1715 7 +698 4 +3273 7 +1510 4 +2942 7 +2997 9 +2941 7 +2202 3 +4062 10 +590 1 +3500 5 +627 10 +2489 2 +581 3 +1042 8 +1675 6 +43 7 +131 2 +3194 2 +819 4 +1607 7 +3809 1 +2648 8 +3470 2 +2942 9 +2001 5 +1924 7 +3722 5 +222 9 +3344 5 +3909 2 +2361 2 +2594 4 +1451 9 +3194 6 +1582 4 +120 9 +2885 1 +2690 5 +1055 3 +2236 2 +3249 2 +1360 7 +2533 9 +1395 8 +3741 7 +1236 4 +2317 1 +1469 10 +3676 5 +1420 7 +1500 5 +2717 6 +2934 2 +2777 9 +1271 6 +1889 1 +1360 4 +1969 2 +1 8 +1097 2 +285 2 +3900 9 +98 6 +2889 8 +1734 3 +1370 4 +3999 2 +2008 2 +3511 9 +978 6 +3747 7 +1106 8 +804 3 +2414 6 +1744 1 +3141 4 +2357 5 +2289 7 +628 6 +2054 3 +1367 8 +1695 2 +4061 4 +1786 5 +3531 7 +33 5 +742 1 +2882 7 +2326 9 +3730 8 +2581 5 +309 10 +1523 5 +2461 9 +1090 10 +245 9 +1961 8 +3826 4 +54 4 +1745 10 +505 6 +2734 4 +2879 7 +1429 9 +1780 10 +3763 8 +2085 2 +3185 5 +2030 3 +2534 4 +919 4 +2008 5 +2816 5 +27 10 +416 3 +2021 5 +243 10 +40 8 +2354 7 +1027 7 +4095 2 +2714 8 +470 10 +588 4 +772 2 +3791 7 +3294 5 +835 5 +449 8 +3746 3 +3762 4 +1143 1 +3125 7 +3422 8 +1590 4 +685 9 +4014 7 +1522 1 +2477 1 +214 7 +1584 3 +519 8 +906 5 +1375 1 +1575 2 +893 9 +3991 2 +4075 3 +2622 7 +153 7 +3756 6 +3697 7 +1795 10 +595 8 +629 9 +2880 9 +1810 5 +588 8 +2662 2 +1139 10 +569 5 +1782 2 +3787 7 +3767 1 +1391 3 +627 8 +2146 8 +2783 6 +2053 9 +1052 3 +1296 7 +634 10 +705 6 +2795 4 +2854 2 +1760 1 +3363 10 +1466 5 +56 5 +851 1 +2764 7 +1497 3 +1736 5 +1941 6 +2446 10 +241 2 +229 10 +3804 6 +3108 5 +1487 9 +3061 1 +858 5 +2141 9 +2349 4 +3767 9 +1256 4 +1550 6 +3940 3 +1370 8 +1105 10 +3710 8 +1315 6 +2278 9 +997 2 +214 7 +2548 6 +2822 7 +1375 9 +2782 7 +3766 9 +581 7 +876 5 +3832 4 +2883 5 +2986 7 +4065 7 +3648 8 +145 1 +1937 4 +4011 3 +1086 10 +3544 8 +1886 10 +237 7 +3133 2 +364 3 +819 1 +781 5 +2542 5 +2604 7 +2559 6 +3899 10 +3298 2 +966 5 +395 9 +3784 1 +4078 8 +2710 3 +4042 7 +3175 10 +2684 9 +3774 7 +383 2 +3091 6 +4046 1 +3959 8 +3781 1 +2175 6 +740 6 +411 5 +1898 6 +2382 8 +547 8 +3019 3 +523 6 +283 9 +3178 3 +1883 7 +2690 1 +3197 8 +1920 4 +146 7 +3725 7 +1329 2 +917 9 +1706 7 +3474 6 +1181 6 +2814 4 +3708 7 +1462 4 +878 7 +269 4 +3182 2 +2670 3 +2691 10 +2122 9 +2636 7 +1210 10 +3383 4 +1149 2 +653 3 +1396 1 +2248 5 +3643 1 +1201 2 +2968 5 +2970 8 +175 5 +1271 10 +2576 10 +2053 1 +1152 4 +2494 4 +1518 8 +3679 3 +41 9 +948 3 +3693 10 +140 9 +1344 2 +4017 4 +1112 4 +1346 7 +715 6 +2235 3 +775 5 +3889 4 +366 5 +1064 2 +890 10 +2363 3 +3281 4 +1309 10 +3842 9 +2127 1 +1367 5 +1636 1 +3201 9 +823 4 +708 9 +1983 9 +1512 3 +2129 2 +501 7 +1491 6 +3694 4 +2763 10 +2142 8 +4078 10 +3497 3 +880 2 +2604 7 +3884 5 +336 2 +2806 2 +1601 10 +1318 8 +189 1 +3017 6 +2059 10 +53 9 +340 1 +804 1 +508 9 +2675 10 +2330 8 +3161 10 +2351 5 +1687 1 +1371 3 +2029 5 +2386 6 +131 3 +986 10 +666 5 +2479 2 +3762 3 +1889 6 +120 8 +171 10 +2181 7 +2300 7 +1117 2 +3836 3 +1859 9 +2446 2 +842 5 +2529 2 +1749 4 +1705 8 +757 7 +664 6 +3193 2 +82 3 +1006 9 +2332 1 +3011 5 +4090 7 +2689 7 +1373 4 +2161 4 +3314 10 +1193 5 +1015 8 +2770 7 +2225 6 +621 5 +128 4 +137 7 +2432 6 +2231 2 +2693 3 +1964 9 +654 4 +943 10 +995 8 +2439 7 +2169 6 +662 5 +832 7 +1131 1 +1045 5 +3220 9 +506 2 +2067 4 +915 7 +658 6 +3416 6 +1950 8 +2760 3 +2297 7 +4051 10 +1467 1 +2248 2 +2795 2 +1615 9 +772 4 +3245 4 +288 7 +834 5 +3795 4 +2689 2 +2726 3 +2606 10 +1767 5 +72 10 +2680 4 +2656 4 +2325 6 +3643 8 +3035 8 +1738 8 +684 3 +3832 8 +3728 1 +2275 10 +3107 10 +685 3 +3846 4 +1982 3 +1690 1 +2032 6 +1210 8 +1781 2 +3382 2 +2247 1 +690 1 +3735 1 +1611 1 +2958 1 +3543 2 +3484 7 +3383 4 +1395 1 +2098 9 +1474 5 +89 6 +509 7 +1644 4 +600 8 +462 10 +59 4 +946 6 +2324 6 +2871 1 +996 6 +1638 2 +323 7 +3103 6 +2134 7 +1541 5 +2401 4 +1727 6 +3397 8 +1731 5 +3671 1 +3651 8 +3635 6 +2892 2 +2833 8 +2641 8 +3525 2 +3738 2 +3686 10 +3111 2 +278 4 +1384 10 +548 3 +3772 7 +3536 6 +481 5 +3748 10 +4052 7 +572 7 +2653 7 +3797 4 +3867 10 +1799 1 +2206 3 +1947 4 +870 4 +1611 6 +2400 6 +438 10 +2292 2 +2975 2 +2863 3 +3747 10 +3738 2 +1865 4 +2427 6 +3084 6 +4044 4 +1387 6 +3262 1 +693 7 +1125 10 +797 5 +1355 9 +957 6 +3781 10 +2182 1 +1077 10 +70 9 +930 7 +3118 5 +1067 2 +926 7 +3068 5 +2984 3 +2713 7 +3882 1 +3359 4 +2119 6 +692 10 +3093 10 +3144 3 +1783 10 +2775 8 +732 5 +2138 4 +291 5 +830 8 +3752 5 +3154 7 +613 10 +1945 10 +1703 7 +3138 4 +3954 8 +3963 5 +1989 6 +3506 2 +2544 8 +556 8 +3623 6 +1378 1 +1324 9 +21 6 +164 10 +1064 8 +1277 5 +3024 2 +3754 8 +2917 2 +3126 10 +2715 9 +50 3 +495 2 +2961 1 +921 3 +2361 7 +43 8 +2014 10 +568 3 +2542 1 +1475 2 +2515 10 +2829 4 +672 9 +3836 1 +607 4 +744 8 +2107 7 +3118 8 +885 10 +800 10 +1649 8 +772 2 +3713 10 +2800 7 +1421 9 +2111 1 +367 3 +1137 4 +2645 10 +1226 4 +1095 8 +3364 10 +2810 8 +3614 8 +767 4 +3589 1 +340 5 +2647 5 +3762 7 +2526 6 +844 2 +2353 10 +1499 2 +1824 8 +4043 8 +1580 9 +2023 8 +581 7 +2697 9 +3806 7 +3330 2 +2796 4 +106 7 +1667 6 +3121 9 +491 8 +1080 6 +959 9 +961 2 +3875 9 +1256 4 +2327 2 +3024 5 +3579 8 +635 1 +4051 8 +364 9 +737 5 +1404 5 +3039 4 +1559 5 +3169 3 +3517 6 +2128 4 +3883 4 +1955 1 +983 4 +1682 3 +2348 3 +445 9 +949 1 +1529 1 +3623 8 +836 1 +464 6 +2192 2 +3156 2 +2592 10 +648 6 +763 6 +1012 3 +3458 1 +3242 4 +2700 1 +3724 10 +3058 10 +432 3 +2621 10 +1386 1 +3954 8 +713 3 +3324 5 +3680 9 +3210 9 +3257 4 +2281 4 +2674 1 +3355 8 +3129 1 +1323 3 +3924 10 +337 6 +1993 4 +1410 5 +3095 2 +3873 10 +3867 4 +3841 9 +1699 9 +2316 7 +2441 1 +1398 5 +1372 8 +3995 4 +2726 7 +861 9 +1707 7 +3939 10 +776 1 +1101 9 +2112 6 +2337 1 +1129 3 +109 1 +2993 5 +1271 7 +1855 5 +1510 6 +2564 9 +1272 1 +2118 5 +746 5 +598 6 +1460 1 +3752 4 +2891 2 +841 9 +2446 10 +1140 10 +540 1 +3855 5 +2087 4 +2580 2 +1335 2 +1649 7 +4039 5 +3382 10 +477 10 +1215 5 +2158 1 +1163 9 +614 8 +3517 3 +2429 3 +3744 5 +342 9 +3027 1 +2399 6 +3211 4 +917 8 +513 10 +1910 1 +2413 1 +25 9 +605 7 +689 7 +273 9 +2299 8 +720 2 +1356 4 +1476 8 +3038 8 +1046 1 +638 5 +3954 7 +3113 5 +2904 3 +2826 7 +366 2 +1060 1 +3101 4 +3623 1 +1046 6 +1648 8 +2319 10 +2580 7 +2740 7 +2132 8 +77 8 +1541 7 +431 1 +2630 6 +1872 7 +1048 9 +3717 2 +1889 7 +2224 4 +1570 1 +3920 3 +2350 1 +1044 10 +873 9 +1551 10 +3882 1 +2499 2 +2603 6 +2066 10 +843 8 +3173 1 +2002 7 +1935 3 +1349 6 +2279 9 +3933 8 +4052 6 +1380 3 +3553 5 +3262 1 +1718 10 +2127 5 +3522 8 +218 2 +3081 6 +2212 8 +1414 4 +217 6 +3319 10 +4093 9 +60 2 +1841 9 +2929 3 +465 2 +3793 6 +1815 6 +3592 3 +649 1 +1222 9 +3654 9 +1126 9 +1883 3 +2413 6 +3066 10 +784 2 +403 6 +2530 8 +2289 4 +3445 1 +3904 1 +1237 6 +1146 4 +2062 10 +1773 2 +483 7 +389 10 +3913 9 +3234 4 +2214 5 +3441 4 +3730 3 +1729 5 +20 3 +1698 6 +2113 1 +2545 9 +1532 9 +3480 2 +3967 10 +202 1 +3625 10 +906 1 +2771 9 +1194 4 +3498 10 +2758 2 +1058 1 +2733 3 +1833 8 +3340 8 +2451 3 +597 8 +3409 9 +3920 5 +2411 4 +3056 6 +181 9 +2203 2 +893 2 +2040 9 +1594 1 +1887 6 +2415 7 +2968 8 +1160 4 +54 1 +2270 7 +3801 8 +3270 4 +2981 6 +1512 6 +2325 6 +2816 3 +3150 7 +4048 2 +2805 8 +3535 7 +3593 10 +3500 10 +3281 2 +1360 10 +3597 4 +2721 4 +343 10 +258 4 +1255 9 +1939 8 +2824 1 +1726 3 +3129 7 +2754 1 +1694 2 +1952 8 +4061 2 +89 3 +1078 9 +2207 1 +771 4 +3160 6 +2529 10 +1275 10 +3569 7 +2421 7 +1878 8 +221 5 +2530 5 +2271 10 +1718 5 +2790 3 +1023 5 +1544 8 +1108 5 +191 1 +2823 8 +3379 8 +289 2 +1688 5 +4060 3 +2126 9 +3701 2 +1720 5 +2772 6 +3700 2 +2136 8 +1689 7 +3815 4 +224 2 +1875 2 +2927 5 +1911 7 +1582 3 +1257 5 +434 1 +457 1 +2981 4 +198 8 +3030 3 +3133 9 +2475 8 +3167 4 +690 6 +1754 8 +2109 6 +35 1 +3007 6 +1491 7 +2420 3 +2540 1 +3714 3 +1454 4 +2217 8 +2945 8 +3523 3 +2892 4 +2897 6 +1730 5 +4003 4 +2276 8 +3587 7 +3226 6 +0 7 +916 4 +903 8 +3079 5 +1591 3 +1633 1 +1316 2 +3577 8 +2644 7 +893 2 +77 4 +140 7 +2672 6 +1022 6 +1499 2 +1639 10 +1104 1 +737 2 +1403 8 +159 2 +1386 9 +1607 8 +277 10 +2007 7 +1950 3 +6 7 +3642 10 +897 2 +2337 8 +2005 9 +1552 10 +2996 9 +2807 3 +3706 3 +2722 7 +738 3 +3131 4 +769 5 +839 9 +579 3 +376 3 +127 7 +2292 9 +2064 6 +293 3 +2664 7 +3159 1 +1316 3 +3741 10 +2200 2 +3235 8 +1615 6 +3673 2 +2027 1 +2041 3 +2158 9 +502 3 +3259 6 +2920 9 +2991 9 +750 5 +595 10 +77 4 +3058 10 +21 2 +2507 2 +1414 7 +2714 7 +2649 1 +2054 8 +2386 10 +2074 4 +3972 9 +1599 3 +984 3 +910 2 +1353 7 +103 8 +1232 2 +1963 3 +3550 5 +1089 3 +83 8 +2172 8 +2716 8 +2012 4 +3828 4 +3398 8 +60 3 +3319 5 +258 3 +2440 10 +1001 6 +1323 7 +3974 5 +3416 9 +2292 3 +3393 7 +3653 10 +826 8 +165 2 +2911 3 +2145 10 +3586 7 +2063 1 +3343 4 +429 1 +1006 10 +3920 10 +3762 8 +3335 5 +911 2 +2266 7 +3226 4 +3291 8 +2664 9 +2491 5 +3306 8 +3442 1 +1825 10 +640 6 +1598 8 +3616 1 +3793 3 +2566 10 +1866 7 +2764 6 +2351 7 +1548 9 +322 4 +2280 1 +3559 8 +1545 9 +3684 3 +1570 7 +3097 8 +858 6 +3959 6 +1860 1 +2740 2 +1148 9 +3830 1 +2356 8 +2609 4 +1264 2 +3457 5 +413 5 +327 4 +1687 1 +749 1 +1883 9 +1180 10 +337 5 +498 7 +28 9 +1865 1 +3618 1 +1249 9 +1827 7 +1126 6 +725 5 +3055 5 +1678 4 +803 4 +1274 4 +892 1 +1335 1 +17 1 +2755 8 +1539 8 +263 10 +3628 2 +1536 6 +3625 2 +2750 8 +1723 3 +754 2 +1215 1 +2468 5 +1915 7 +2581 5 +2083 2 +2500 6 +1408 1 +3553 7 +491 7 +2703 10 +3716 10 +2080 6 +3910 6 +2597 1 +2884 10 +2393 2 +3050 6 +353 3 +2432 3 +1449 8 +1730 9 +3401 8 +2603 3 +3666 5 +3757 7 +3451 6 +2631 3 +3513 4 +2051 3 +249 6 +100 8 +3249 1 +2676 8 +3349 2 +595 9 +260 3 +1321 2 +613 1 +609 6 +733 9 +3565 3 +2844 5 +1077 4 +1335 5 +56 6 +1635 1 +749 8 +3556 7 +3628 10 +707 3 +1128 9 +4037 8 +2115 9 +500 9 +205 7 +3402 6 +3212 4 +2871 5 +3626 10 +2295 1 +1035 10 +576 2 +804 6 +3995 8 +444 1 +172 7 +426 3 +2358 6 +790 2 +354 6 +707 6 +821 5 +3885 2 +1713 8 +3784 6 +3039 8 +558 8 +3662 4 +1602 6 +3633 5 +3148 7 +2544 1 +2897 2 +1868 8 +2020 5 +4075 9 +3637 8 +3963 6 +2467 10 +2682 8 +86 8 +3390 2 +3339 4 +1037 7 +89 10 +2146 5 +1745 6 +3121 6 +2060 1 +3569 6 +357 4 +1103 1 +111 1 +3413 2 +1193 8 +563 5 +2826 8 +3744 7 +375 6 +1013 6 +1568 6 +2868 3 +1608 7 +1941 10 +968 6 +3423 8 +2918 7 +1782 6 +2209 1 +167 1 +2760 8 +1729 10 +1217 4 +2875 2 +2347 7 +1611 7 +2309 3 +119 2 +723 10 +3352 8 +4079 1 +1694 7 +1800 7 +2296 4 +2053 3 +2343 1 +3538 8 +1106 1 +240 3 +3200 10 +538 6 +2704 9 +3566 4 +3644 7 +3603 3 +2059 10 +1172 1 +3726 8 +2693 2 +1746 9 +220 3 +1058 1 +2733 5 +346 1 +3561 8 +2016 7 +1905 7 +1291 2 +794 3 +2621 1 +2879 7 +1422 8 +3040 5 +966 6 +346 3 +4074 8 +3107 3 +250 6 +1903 7 +1823 7 +1941 3 +1193 8 +656 3 +3856 10 +3578 9 +1671 3 +1408 1 +3973 4 +1335 5 +2952 3 +3572 1 +567 7 +2517 3 +3453 4 +3350 10 +2637 9 +3576 9 +3449 2 +1793 1 +3411 3 +2143 7 +1627 10 +1174 7 +342 6 +850 5 +2827 5 +1367 3 +2783 8 +364 3 +1103 6 +3247 3 +2149 3 +2201 3 +2631 5 +4090 3 +3626 7 +1042 2 +972 6 +1913 4 +143 10 +2251 5 +1762 5 +2310 4 +2592 8 +1443 4 +1123 3 +716 9 +3583 7 +2524 5 +3119 10 +23 9 +1015 6 +3945 4 +4069 3 +3508 4 +3067 9 +693 1 +262 8 +2509 6 +2148 6 +2193 9 +1492 6 +2472 6 +895 8 +2772 10 +400 2 +786 2 +2090 1 +2208 3 +479 9 +356 4 +2267 8 +1695 8 +792 9 +2903 5 +3171 6 +1243 9 +2349 2 +3895 10 +491 10 +3972 7 +713 2 +3522 4 +2937 8 +1718 7 +1770 8 +807 5 +3955 1 +270 9 +2996 1 +647 5 +1867 7 +3583 1 +3476 10 +1452 10 +3630 10 +1799 4 +711 10 +1450 10 +3530 6 +635 10 +2022 7 +495 8 +3385 10 +1741 7 +1236 2 +643 5 +4066 5 +3752 6 +875 9 +3582 1 +2377 7 +2184 10 +3864 5 +1079 3 +3044 10 +3813 2 +3966 9 +629 5 +2299 6 +1582 2 +1480 5 +3984 1 +1689 1 +1082 2 +3190 8 +646 1 +1348 5 +589 7 +1961 2 +2145 6 +1364 2 +2677 1 +188 1 +2570 4 +2986 4 +3024 1 +223 2 +3571 7 +2605 6 +919 8 +479 6 +1142 2 +199 8 +2507 4 +3255 9 +1555 3 +1862 4 +1569 8 +4003 2 +969 8 +2461 5 +678 7 +1907 9 +1502 3 +2990 4 +1026 5 +3877 6 +2041 9 +3876 1 +301 7 +432 3 +2509 6 +928 6 +1432 2 +3112 6 +3095 4 +1818 5 +2142 5 +3986 1 +32 7 +1349 4 +2403 1 +1060 5 +1802 7 +555 9 +4018 4 +3565 7 +2161 10 +2194 4 +3926 7 +2095 9 +3729 3 +217 7 +352 10 +2548 8 +3679 6 +2553 3 +1063 10 +1533 2 +3447 6 +1027 10 +712 6 +1118 5 +3083 10 +873 9 +3612 4 +2727 5 +729 1 +1895 3 +2700 9 +4078 6 +4089 7 +3265 7 +1583 1 +3546 5 +2689 2 +1640 4 +3344 7 +134 4 +3114 3 +2242 4 +2980 1 +1594 4 +3626 4 +3225 8 +3137 4 +1634 1 +2588 7 +3933 1 +844 1 +1466 2 +3288 9 +3192 1 +1987 2 +2357 6 +16 5 +2817 10 +128 2 +1160 10 +2992 10 +2502 5 +3972 9 +2395 3 +1275 7 +625 4 +907 2 +2265 7 +3172 4 +3225 7 +77 3 +2146 2 +2817 2 +3845 9 +3691 8 +600 8 +611 2 +417 7 +2645 10 +75 9 +711 1 +564 4 +151 4 +3541 8 +3038 3 +3912 9 +3342 3 +9 9 +1553 10 +3576 6 +1170 4 +98 6 +2700 6 +3086 3 +2591 2 +413 7 +2521 8 +3016 7 +1118 7 +4000 10 +1018 8 +722 10 +3952 1 +3646 7 +3920 4 +448 7 +3415 2 +2990 2 +984 5 +1211 5 +1659 8 +1928 3 +1319 2 +774 3 +3266 8 +752 7 +2279 10 +2854 6 +2941 9 +3060 2 +2874 1 +3549 1 +3379 8 +751 7 +3009 5 +3099 2 +233 1 +3321 3 +1889 4 +2192 9 +3140 3 +1338 2 +1980 1 +3517 6 +1434 1 +1576 9 +3398 10 +1951 8 +3785 2 +3229 10 +497 7 +1914 9 +82 7 +2467 10 +1546 10 +3857 9 +1230 9 +266 3 +1664 6 +3702 4 +480 8 +3512 5 +123 5 +3271 7 +3467 3 +3861 9 +455 8 +1743 9 +2004 2 +726 7 +4059 6 +2982 10 +681 3 +3533 6 +2857 3 +2731 10 +3075 10 +217 1 +2691 9 +2311 9 +30 1 +2142 2 +3943 6 +1285 2 +1664 5 +3455 5 +3493 3 +467 4 +1220 3 +439 7 +2905 8 +718 10 +795 6 +2965 8 +2864 1 +2547 8 +2790 6 +832 9 +1498 1 +1664 10 +1942 8 +878 6 +2726 6 +1088 10 +174 8 +25 10 +3262 8 +1573 6 +3861 3 +2993 1 +3965 1 +2892 6 +1820 6 +339 7 +157 3 +2762 7 +76 5 +3179 9 +1356 9 +686 2 +3302 3 +3262 6 +2467 6 +500 10 +3046 10 +736 2 +649 3 +2925 10 +3501 5 +238 6 +1303 1 +913 9 +693 3 +2173 3 +1814 8 +3080 7 +3560 4 +3904 2 +1921 9 +2389 7 +2600 8 +2192 10 +1275 8 +3306 6 +287 10 +3722 4 +363 3 +240 10 +602 7 +1671 3 +1677 7 +789 10 +2319 1 +2771 1 +585 10 +91 3 +2105 7 +3282 2 +3942 9 +2825 3 +26 9 +3405 8 +3732 1 +1612 10 +983 5 +1469 9 +2819 2 +2995 10 +890 2 +3616 8 +814 3 +2376 4 +3578 4 +3499 8 +3319 8 +2801 3 +3953 6 +3239 2 +870 5 +2468 8 +2992 2 +3429 3 +2117 10 +1945 6 +1143 1 +469 5 +2804 9 +2309 9 +2124 4 +1763 6 +3604 3 +3640 1 +2045 8 +2531 5 +2763 3 +2395 1 +2323 2 +1081 10 +2078 10 +1731 2 +364 9 +1714 9 +578 7 +1469 6 +1905 10 +129 2 +389 1 +94 8 +2873 9 +1124 6 +824 2 +3386 5 +1700 2 +3658 9 +2415 8 +1264 5 +4028 8 +1663 8 +1435 10 +4002 5 +3274 6 +2072 1 +3006 2 +376 10 +3595 9 +3275 10 +1755 1 +548 4 +232 5 +3179 3 +1100 7 +772 8 +3330 5 +3967 3 +1494 7 +1770 4 +2269 8 +896 2 +1058 8 +1698 8 +3801 9 +1633 6 +667 9 +2153 9 +3867 1 +3617 4 +415 2 +671 3 +1993 1 +3368 1 +2161 3 +3957 3 +1938 4 +3215 10 +12 4 +1450 4 +3852 3 +3372 9 +1514 5 +2308 8 +1153 2 +422 5 +1824 10 +1575 1 +3979 9 +3026 2 +3162 6 +3247 3 +1939 5 +191 9 +2677 8 +3849 3 +3871 9 +1269 4 +2867 10 +2521 1 +110 7 +2569 6 +3901 9 +1302 1 +1441 2 +150 3 +4029 10 +1336 6 +243 8 +3365 10 +3901 9 +1297 2 +3664 5 +3507 7 +1263 8 +3142 10 +2079 3 +642 1 +2478 3 +3766 10 +2993 9 +3337 5 +2224 7 +1444 4 +2939 7 +1226 2 +866 1 +957 4 +3813 9 +982 4 +2114 1 +247 7 +2946 3 +744 6 +923 4 +3534 4 +2790 7 +2840 4 +1963 8 +916 6 +592 4 +2187 5 +1236 5 +2522 7 +139 1 +3331 5 +1705 6 +683 10 +3383 3 +2377 10 +523 3 +3815 9 +3822 3 +541 8 +2128 1 +431 3 +1719 4 +3104 6 +2394 1 +1679 6 +1341 1 +1555 10 +2818 6 +1818 2 +3978 6 +3784 5 +211 8 +28 3 +2160 6 +2290 8 +1029 1 +500 4 +664 9 +964 10 +1349 10 +260 6 +3889 1 +224 9 +3846 1 +3442 3 +1542 4 +1834 10 +137 6 +1918 4 +3657 4 +16 5 +3626 10 +762 5 +1907 10 +1306 6 +976 5 +31 9 +2618 5 +643 2 +2273 4 +1515 8 +196 4 +754 10 +1134 8 +892 3 +1800 9 +1698 8 +2326 8 +4061 4 +370 3 +3209 3 +3852 9 +2499 8 +195 5 +1606 4 +2600 9 +1248 10 +3417 8 +691 6 +3882 9 +2000 5 +3657 10 +1219 8 +1453 5 +1806 8 +2896 6 +10 4 +2545 6 +3165 7 +3882 2 +2855 10 +3935 1 +3647 9 +71 8 +3790 1 +1891 8 +514 2 +3790 6 +2152 5 +3046 2 +594 8 +2930 6 +2927 10 +1739 1 +2559 7 +3408 2 +1081 3 +771 10 +506 3 +3537 6 +2946 8 +3995 7 +1245 7 +2097 5 +3222 9 +137 1 +3918 9 +1969 5 +1077 4 +527 2 +317 7 +1836 3 +1238 9 +582 6 +2554 4 +2292 2 +1106 7 +1709 5 +3980 2 +2134 9 +3310 1 +3476 9 +132 4 +3924 6 +3875 2 +2794 1 +3632 1 +728 2 +3674 1 +3873 9 +3649 4 +1185 7 +2722 3 +485 10 +272 6 +1181 5 +3942 6 +4013 5 +65 10 +3027 7 +2131 2 +390 9 +82 2 +2018 1 +3598 5 +2083 9 +1188 4 +1583 2 +736 8 +2779 7 +2101 3 +1522 8 +674 5 +3751 2 +1947 3 +4044 9 +2536 9 +191 8 +3053 9 +2025 1 +2984 4 +33 1 +1426 2 +514 7 +2972 5 +3109 4 +3106 6 +2568 2 +2309 7 +3966 7 +2344 5 +1173 5 +1885 5 +2939 7 +3867 1 +3595 4 +4083 5 +1132 2 +2868 1 +950 5 +3194 4 +2220 5 +3917 4 +580 10 +1304 10 +1540 5 +3223 10 +825 3 +1436 4 +3907 1 +2911 9 +15 3 +3371 5 +1200 3 +1092 10 +916 8 +668 7 +4035 6 +2062 7 +1581 9 +804 2 +3293 9 +2459 7 +2831 10 +2755 10 +2931 4 +3780 10 +4013 1 +1060 6 +3093 6 +238 6 +440 1 +85 5 +375 8 +622 3 +803 2 +2099 10 +2261 7 +2677 10 +2598 8 +1134 3 +3503 2 +1761 6 +2114 10 +278 4 +2703 8 +1153 7 +818 5 +3369 4 +144 5 +1992 1 +2033 2 +1942 2 +44 9 +3582 10 +354 10 +4004 1 +3363 5 +2129 1 +3445 7 +1353 7 +46 6 +1934 2 +3555 9 +2423 2 +1947 8 +3976 7 +3853 9 +124 4 +3854 9 +1633 5 +102 10 +3302 7 +1801 9 +423 4 +1740 1 +2919 2 +404 10 +2831 9 +2348 10 +2235 1 +1242 7 +2536 10 +2623 10 +655 5 +634 4 +218 9 +757 5 +1516 10 +3683 1 +1746 4 +3826 2 +2137 6 +191 4 +2889 6 +1793 3 +3265 7 +2244 4 +1057 8 +1190 8 +1085 9 +286 6 +644 10 +3189 6 +3934 1 +1957 3 +34 4 +1837 6 +3480 7 +2206 1 +747 2 +1688 3 +2107 6 +3892 1 +3119 1 +2198 8 +2862 7 +1662 1 +1857 8 +1132 6 +3316 3 +1877 1 +3550 2 +2671 7 +190 10 +1450 1 +2910 10 +1173 1 +3742 6 +1907 7 +2345 8 +580 8 +0 2 +1920 7 +2737 3 +1030 2 +2061 9 +2704 7 +3309 4 +1204 1 +777 1 +81 10 +906 10 +1049 4 +3803 9 +3684 9 +1256 6 +1970 9 +2133 3 +1968 1 +1532 2 +2992 1 +3285 10 +829 2 +156 8 +2882 10 +120 8 +499 10 +1962 3 +2202 7 +4015 8 +2883 6 +327 4 +2502 9 +8 6 +1896 6 +2862 1 +1220 4 +3890 4 +58 3 +2273 1 +2074 10 +3090 5 +200 6 +2522 2 +624 1 +592 5 +1190 4 +3879 9 +84 6 +1193 10 +1612 2 +3239 7 +254 2 +3689 5 +2560 10 +422 3 +3726 10 +617 6 +3673 3 +3806 4 +143 10 +1326 9 +952 5 +211 4 +3346 10 +2984 3 +80 3 +2045 1 +371 9 +2921 4 +1924 5 +2656 8 +3435 9 +3882 6 +1410 4 +967 4 +3102 1 +2018 10 +1122 9 +3656 10 +653 2 +1418 4 +1107 3 +2603 6 +3792 7 +721 6 +3489 10 +1092 9 +1186 8 +3296 5 +2136 2 +2847 5 +1660 3 +417 2 +3312 9 +1811 8 +2537 6 +2928 10 +1383 5 +2939 2 +2065 9 +1781 7 +3544 6 +1042 6 +342 10 +2704 9 +2433 6 +194 4 +2000 9 +2886 9 +1010 2 +2869 2 +1508 9 +157 3 +2606 4 +1790 4 +2353 6 +1723 9 +429 5 +3385 5 +2976 2 +409 7 +585 3 +3346 4 +3500 8 +636 4 +478 5 +921 2 +2642 4 +3195 5 +3676 8 +3798 1 +1651 7 +16 6 +228 4 +1168 8 +2865 7 +726 8 +839 10 +3906 1 +2140 9 +3875 1 +636 9 +4087 1 +1551 6 +3299 6 +1899 9 +3215 9 +2406 1 +3391 2 +4087 1 +1259 4 +3409 7 +450 7 +2905 3 +1733 6 +647 1 +2220 10 +1894 2 +744 8 +189 7 +2138 5 +2569 1 +2941 4 +1627 6 +234 3 +3382 9 +3326 9 +283 4 +3659 10 +3223 3 +1083 2 +21 5 +3083 6 +98 7 +3288 7 +198 2 +3577 3 +1638 1 +2968 7 +251 9 +2460 1 +2706 3 +1224 8 +1773 7 +995 5 +770 7 +1972 6 +1375 8 +3830 9 +754 2 +2173 8 +627 7 +1797 6 +3883 6 +1402 5 +1736 6 +3818 6 +1851 6 +3316 7 +2677 7 +663 6 +593 3 +2773 6 +2694 1 +1355 6 +2838 4 +1222 7 +4049 4 +2128 1 +1802 9 +1112 8 +1812 4 +3774 4 +1166 8 +725 4 +2677 7 +2281 9 +1746 4 +2493 7 +641 8 +11 5 +1462 4 +2250 10 +166 2 +2528 10 +961 7 +263 10 +3339 3 +2827 4 +1732 9 +2883 7 +859 7 +861 2 +3158 2 +561 2 +12 3 +4009 3 +1000 8 +1035 2 +2937 1 +629 6 +4084 6 +633 2 +2601 6 +2352 6 +1079 3 +438 7 +3352 1 +3240 8 +2414 6 +2520 7 +3806 1 +1134 5 +1567 4 +2601 6 +827 5 +2418 5 +1640 8 +934 6 +2003 2 +1361 9 +977 7 +3833 6 +3506 1 +1192 1 +857 5 +1151 7 +21 10 +3669 7 +3653 8 +2881 1 +1425 2 +3634 8 +2023 2 +1825 4 +3340 9 +377 8 +3265 8 +1108 10 +2393 5 +3781 1 +316 2 +1475 10 +2501 1 +232 3 +3331 4 +1765 3 +2788 1 +3280 3 +2253 10 +2090 7 +3222 7 +2724 1 +1265 2 +3847 3 +855 7 +1994 8 +3149 10 +1469 6 +2450 4 +2419 7 +946 4 +2779 8 +711 10 +3970 3 +229 5 +782 9 +2264 9 +3732 6 +3980 8 +770 4 +2639 7 +2716 10 +3583 8 +3474 1 +2085 5 +1121 2 +2257 2 +3388 2 +1328 7 +916 3 +2169 2 +2166 10 +3003 7 +2230 8 +2713 4 +176 8 +869 8 +1994 1 +912 2 +313 4 +3754 6 +2763 4 +714 6 +3634 5 +3327 4 +1620 9 +2297 1 +231 9 +1057 3 +1101 2 +1041 4 +691 3 +2083 10 +485 7 +3271 10 +1475 4 +3822 1 +3339 7 +3785 9 +3305 10 +1931 3 +3387 10 +3887 7 +3685 10 +3844 3 +1839 9 +2068 5 +170 3 +3234 9 +3413 1 +317 6 +1483 5 +2165 8 +3199 6 +3312 6 +1195 5 +1172 8 +896 1 +58 10 +3517 3 +1279 4 +653 6 +1822 7 +2863 8 +1141 8 +3424 9 +3958 3 +655 6 +404 5 +294 2 +186 6 +3461 4 +1354 7 +3243 1 +3536 5 +3069 3 +3268 6 +1595 10 +2915 9 +3721 9 +2327 4 +3800 9 +2860 7 +2215 2 +3780 7 +3230 4 +46 1 +370 6 +866 5 +2080 3 +3315 1 +1324 5 +1029 3 +3766 4 +1751 6 +1476 2 +986 8 +2803 1 +2111 4 +2338 2 +3724 5 +1879 4 +1931 10 +2497 10 +1532 10 +2527 4 +3131 10 +1814 9 +1523 1 +3901 7 +2319 4 +2899 9 +1437 3 +1660 7 +585 10 +3484 7 +2269 8 +738 6 +2207 9 +2969 4 +1485 4 +211 6 +2838 4 +3531 6 +2513 9 +3864 6 +2400 7 +1134 10 +454 10 +3850 9 +208 9 +1813 1 +3291 10 +3528 6 +3959 8 +1339 8 +3506 3 +3121 6 +761 2 +22 1 +364 1 +2628 8 +3881 2 +2965 1 +2716 4 +2487 7 +2730 9 +3318 6 +629 6 +1007 6 +435 4 +1478 5 +3141 2 +1374 8 +678 2 +1904 1 +1834 1 +1269 7 +3504 10 +2939 8 +2599 2 +1427 3 +3212 8 +3345 2 +3406 7 +1938 1 +989 4 +3785 1 +934 1 +437 6 +881 4 +122 4 +3791 1 +2000 7 +1631 7 +1329 7 +164 3 +3300 5 +3287 9 +3086 3 +2094 8 +804 7 +2223 3 +595 3 +530 8 +1273 5 +786 5 +2472 7 +740 2 +2264 7 +673 9 +2361 7 +1326 1 +547 1 +2008 3 +1050 8 +1852 3 +2884 3 +602 9 +937 2 +2085 3 +516 7 +2260 5 +2234 5 +1887 10 +2430 4 +2722 7 +1956 4 +2459 1 +2905 9 +3195 6 +3568 2 +597 7 +167 2 +2975 8 +812 7 +2980 8 +2173 1 +1286 9 +414 10 +2575 9 +3431 2 +218 5 +509 4 +599 8 +2253 2 +2425 8 +1903 7 +1882 3 +3459 6 +3750 8 +3879 7 +3658 4 +93 3 +2907 10 +4093 5 +4046 10 +2553 3 +628 5 +353 4 +2955 6 +1148 4 +1622 10 +421 5 +1751 5 +3036 2 +465 6 +771 4 +2380 5 +1939 9 +3015 7 +1858 8 +268 8 +2522 1 +3363 6 +1936 8 +1255 3 +3555 2 +2728 6 +4022 1 +299 2 +3805 10 +2651 5 +1905 4 +1401 5 +454 9 +814 10 +2090 8 +2793 10 +568 9 +3842 5 +2281 3 +2515 4 +1920 8 +1894 5 +1752 8 +306 3 +3519 2 +3708 4 +213 4 +2748 10 +588 8 +1499 2 +2297 5 +3789 2 +126 9 +681 1 +3899 1 +1572 10 +475 7 +625 10 +1258 4 +1460 5 +2488 5 +481 7 +2448 9 +820 3 +2882 10 +3490 1 +2711 3 +644 10 +31 3 +2255 1 +2522 9 +627 8 +22 10 +2711 7 +1282 10 +2480 1 +3949 3 +2798 2 +1383 9 +2992 5 +1491 2 +1989 5 +2155 2 +3580 3 +1215 5 +2340 8 +1715 3 +3344 5 +3397 5 +1089 8 +2778 10 +3895 1 +321 10 +958 6 +3883 5 +1945 1 +3373 3 +1180 6 +1698 4 +3567 7 +3144 9 +783 5 +2923 7 +3221 10 +2758 8 +3915 8 +1535 2 +3194 3 +1792 9 +572 9 +3530 10 +2444 5 +2855 2 +768 7 +1914 7 +821 5 +1860 1 +2994 7 +2926 3 +3594 4 +1054 9 +406 8 +2511 8 +3791 4 +220 1 +2195 6 +242 9 +42 4 +1349 7 +2944 3 +1880 2 +1480 6 +1805 10 +2634 5 +3381 3 +1064 5 +3218 8 +3391 10 +3118 10 +330 1 +2075 1 +2774 10 +3123 9 +983 3 +2024 4 +3016 7 +425 9 +3109 5 +899 1 +2521 1 +4000 1 +2850 2 +3023 7 +2190 2 +3453 9 +4093 7 +3034 7 +747 8 +2485 3 +2066 9 +2052 1 +3465 3 +2692 4 +2116 2 +546 3 +448 4 +2518 2 +3365 1 +1695 9 +253 6 +164 5 +2151 7 +3215 6 +837 7 +553 9 +2582 3 +2285 5 +592 7 +1127 5 +482 8 +2803 9 +652 5 +3119 1 +1567 3 +1987 5 +379 2 +1883 10 +3841 8 +4038 6 +453 6 +2498 8 +224 8 +629 2 +411 5 +3853 10 +3104 2 +405 3 +1898 4 +1693 3 +109 2 +469 2 +496 4 +217 7 +632 7 +1710 6 +125 10 +1567 2 +2568 7 +2245 9 +3151 7 +2354 2 +1887 5 +1005 2 +2726 7 +1361 7 +1381 3 +1383 3 +3041 6 +2252 1 +346 4 +759 5 +2045 9 +2877 8 +2281 7 +2373 1 +3292 4 +657 4 +988 6 +3893 6 +1043 9 +788 8 +1341 4 +664 9 +1247 10 +3285 7 +2839 10 +670 10 +593 10 +3427 3 +238 7 +3747 8 +2380 5 +146 2 +2775 10 +2790 1 +2458 7 +791 9 +4028 6 +3665 5 +1495 5 +2756 2 +1237 9 +2449 4 +1139 6 +3249 10 +2747 9 +1513 8 +4050 1 +3195 1 +1455 9 +3482 6 +2337 4 +1523 2 +1430 6 +1146 5 +1655 8 +4057 6 +1455 5 +191 7 +1671 7 +2028 5 +3530 10 +395 9 +2020 4 +3583 7 +950 5 +1105 9 +816 10 +2189 7 +2677 4 +9 2 +483 10 +1606 1 +2663 10 +2964 1 +1523 8 +3645 8 +7 1 +729 2 +185 9 +1680 6 +3629 4 +3886 9 +1507 8 +2202 10 +1123 4 +1048 8 +2469 8 +2455 9 +1450 3 +4064 10 +2044 6 +180 9 +2370 7 +3996 10 +398 9 +1462 1 +1442 10 +3583 1 +2750 9 +1643 4 +2951 6 +79 7 +421 3 +2778 4 +3693 2 +1015 8 +773 3 +3014 1 +1025 10 +3488 9 +3026 3 +3108 9 +3945 4 +62 9 +590 7 +2486 7 +1035 6 +3525 3 +1705 2 +2160 4 +873 10 +4040 1 +1300 10 +442 2 +3648 8 +2035 5 +3611 10 +3103 5 +447 7 +1494 7 +1342 8 +3676 6 +1441 2 +2882 3 +3626 7 +3349 2 +979 4 +960 9 +2272 8 +2477 6 +1631 2 +2462 10 +1635 1 +3521 4 +1538 10 +915 2 +1891 1 +356 2 +3373 9 +81 8 +900 7 +3236 4 +3149 6 +83 8 +890 6 +1643 8 +714 1 +4041 6 +365 6 +1457 7 +1521 2 +2580 5 +2290 9 +471 7 +1491 5 +1655 2 +2727 5 +3081 3 +2307 2 +3816 6 +1678 9 +1613 1 +1890 7 +3107 1 +217 9 +863 10 +1852 6 +554 9 +567 2 +3700 3 +3559 4 +3870 4 +3695 2 +276 7 +2593 5 +1009 8 +329 7 +1381 9 +2848 2 +3548 10 +2045 4 +512 3 +2469 3 +791 3 +1518 10 +4088 10 +997 1 +4045 10 +825 10 +1449 7 +3425 2 +2816 10 +3579 7 +1068 9 +653 8 +1616 6 +2336 6 +1459 10 +3783 5 +2128 3 +2882 5 +2405 2 +200 4 +1164 9 +2094 10 +1884 8 +1645 7 +1624 2 +2066 7 +1488 4 +1136 3 +2658 10 +2102 3 +1189 7 +3775 3 +1370 7 +3049 5 +272 10 +2760 10 +954 2 +3127 3 +2438 8 +2670 3 +3395 4 +274 9 +2558 5 +1144 7 +2557 5 +647 2 +2018 1 +1909 2 +2846 7 +467 10 +2055 8 +3092 7 +1822 3 +3765 8 +336 2 +610 10 +362 8 +3569 3 +1180 1 +3754 9 +1901 5 +1909 6 +884 3 +2760 10 +74 3 +635 4 +1752 10 +2238 3 +663 4 +3229 2 +1013 3 +1376 3 +1501 4 +2606 2 +3462 1 +326 3 +305 8 +846 9 +990 2 +3598 10 +3582 8 +3796 6 +1731 1 +3279 6 +3472 9 +60 7 +1910 8 +2982 7 +3372 10 +2114 10 +541 7 +294 8 +2316 5 +3760 1 +1284 4 +2374 5 +2717 1 +1313 8 +932 5 +3137 2 +1373 7 +4088 5 +1820 4 +2512 7 +2813 6 +2251 4 +1727 10 +704 6 +483 10 +3281 9 +1622 2 +1284 3 +1293 1 +3241 7 +1508 10 +696 2 +2944 4 +3889 5 +1075 10 +1680 8 +1084 9 +2060 10 +2892 7 +900 5 +2589 7 +1025 4 +3950 6 +953 1 +455 2 +1016 7 +1344 7 +2688 8 +467 9 +2597 9 +2859 8 +2643 8 +3544 6 +1000 8 +225 4 +1473 9 +2134 2 +26 10 +623 7 +2449 9 +479 2 +3936 1 +935 7 +1490 7 +885 7 +437 7 +3937 1 +1729 4 +3078 7 +2020 6 +330 9 +4064 10 +1392 10 +2589 2 +4080 5 +2785 9 +2570 9 +3420 7 +2709 2 +261 1 +2595 8 +2383 8 +1986 1 +991 5 +3796 7 +63 6 +2499 6 +2323 2 +3772 3 +960 1 +1186 1 +3358 3 +2414 8 +940 7 +3606 7 +802 1 +1913 5 +2900 10 +2078 1 +864 2 +3210 3 +4023 7 +3678 9 +1792 10 +3996 5 +2024 4 +2605 7 +2645 3 +1420 5 +3328 9 +2147 9 +2813 2 +1841 3 +3458 9 +777 5 +3564 2 From 2e61dad11faeff4b9d2f6d53de46fc1eaacbcbbd Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Fri, 28 Jun 2024 09:48:04 -0400 Subject: [PATCH 052/488] Update README.md Update TOOLDIR to vortex-toolchain-2024-6-14/ --- README.md | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index cd517b2b5..aebbed455 100644 --- a/README.md +++ b/README.md @@ -67,13 +67,8 @@ More detailed build instructions can be found [here](docs/install_vortex.md). mkdir out export OUT_DIR=`pwd`/out cd build - ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-05-22 --prefix=$OUT_DIR -### Ignore the commit for ramulator when it is compiled - # Please add ignore = dirty entry on .gitmodules - [submodule "third_party/ramulator"] - path = third_party/ramulator - url = https://github.com/CMU-SAFARI/ramulator.git - ignore = dirty + ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-6-14 --prefix=$OUT_DIR + ### Install prebuilt toolchain # We will use the precomipled tools in volvo toolchanin directory ### set environment variables From 6d480b3da1587ee3fadc958fa0292bec2f86140d Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sun, 30 Jun 2024 00:35:26 -0400 Subject: [PATCH 053/488] satp_ is not set, then we skip VAT --- runtime/simx/vortex.cpp | 24 ++++++++++++---------- sim/common/mem.cpp | 44 +++++++++++++++++++++++++++-------------- sim/common/mem.h | 9 +++++++-- sim/simx/emulator.h | 3 --- sim/simx/processor.cpp | 14 +++++++++++-- sim/simx/processor.h | 1 + 6 files changed, 63 insertions(+), 32 deletions(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index ae9fe5bb5..fc686ca76 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -121,21 +121,25 @@ public: } bool need_trans(uint64_t dev_pAddr) { - // Check if the this is the BARE mode - bool isBAREMode = (get_mode() == BARE); + + // Check if the satp is set and BARE mode + if (processor_.is_satp_unset() || get_mode() == BARE) + return 0; + // Check if the address is reserved for system usage // bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr && dev_pAddr < PAGE_TABLE_BASE_ADDR + PT_SIZE_LIMIT); - bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr); + if (PAGE_TABLE_BASE_ADDR <= dev_pAddr) + return 0; + // Check if the address is reserved for IO usage - bool isIO = (dev_pAddr < USER_BASE_ADDR); + if (dev_pAddr < USER_BASE_ADDR) + return 0; // Check if the address falls within the startup address range - bool isStartAddress = (STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)); + if ((STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000))) + return 0; - // Print the boolean results for debugging purposes - // printf("%p, %u, %u\n", (void *)dev_pAddr, isReserved, isStartAddress); - - // Return true if the address needs translation (i.e., it's not reserved and not a start address) - return (!isBAREMode && !isReserved && !isIO && !isStartAddress); + // Now all conditions are not met. Return true because the address needs translation + return 1; } uint64_t phy_to_virt_map(uint64_t size, uint64_t *dev_pAddr, uint32_t flags) diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index f3c1025a2..a5339be6e 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -596,16 +596,26 @@ void RAM::loadHexImage(const char* filename) { uint64_t MemoryUnit::get_base_ppn() { + assert(satp_!= NULL); return satp_->get_base_ppn(); } uint64_t MemoryUnit::get_satp() { - return satp_->get_satp(); + if (is_satp_unset()) + return 0; + else + return satp_->get_satp(); +} + +uint8_t MemoryUnit::is_satp_unset() +{ + return (satp_==NULL); } uint8_t MemoryUnit::get_mode() { + assert(satp_!= NULL); return satp_->get_mode(); } void MemoryUnit::set_satp(uint64_t satp) @@ -616,22 +626,26 @@ void MemoryUnit::set_satp(uint64_t satp) bool MemoryUnit::need_trans(uint64_t dev_pAddr) { - // Check if the this is the BARE mode - bool isBAREMode = (get_mode() == BARE); - // Check if the address is reserved for system usage - // bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr && dev_pAddr < PAGE_TABLE_BASE_ADDR + PT_SIZE_LIMIT); - bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr); - // Check if the address is reserved for IO usage - bool isIO= (dev_pAddr < USER_BASE_ADDR); - // Check if the address falls within the startup address range - bool isStartAddress = (STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)); - - // Print the boolean results for debugging purposes - // printf("%p, %u, %u\n", (void *)dev_pAddr, isReserved, isStartAddress); + // Check if the satp is set and BARE mode + if ( is_satp_unset() || (get_mode() == BARE)) + return 0; - // Return true if the address needs translation (i.e., it's not reserved and not a start address) - return (!isBAREMode && !isReserved && !isIO && !isStartAddress); + // Check if the address is reserved for system usage + // bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr && dev_pAddr < PAGE_TABLE_BASE_ADDR + PT_SIZE_LIMIT); + if (PAGE_TABLE_BASE_ADDR <= dev_pAddr) + return 0; + + // Check if the address is reserved for IO usage + if (dev_pAddr < USER_BASE_ADDR) + return 0; + // Check if the address falls within the startup address range + if ((STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000))) + return 0; + + // Now all conditions are not met. Return true because the address needs translation + return 1; } + uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type) { uint64_t pfn; diff --git a/sim/common/mem.h b/sim/common/mem.h index 9f212e184..7ef13393a 100644 --- a/sim/common/mem.h +++ b/sim/common/mem.h @@ -186,8 +186,12 @@ public: }; #ifdef VM_ENABLE - MemoryUnit(uint64_t pageSize = MEM_PAGE_SIZE); - ~MemoryUnit(){delete this->satp_;}; + MemoryUnit(uint64_t pageSize = MEM_PAGE_SIZE) :satp_(NULL) + {}; + ~MemoryUnit(){ + if ( this->satp_ != NULL) + delete this->satp_; + }; #else MemoryUnit(uint64_t pageSize = 0); #endif @@ -208,6 +212,7 @@ public: #ifdef VM_ENABLE void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits); + uint8_t is_satp_unset(); uint64_t get_satp(); uint8_t get_mode(); uint64_t get_base_ppn(); diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index f5c785581..0b2d6ac03 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -125,9 +125,6 @@ private: uint32_t ipdom_size_; Word csr_mscratch_; wspawn_t wspawn_; -#ifdef VM_ENABLE - Word ptbr_; -#endif }; } diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index 751db635e..01023125b 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -145,12 +145,17 @@ ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const { Processor::Processor(const Arch& arch) : impl_(new ProcessorImpl(arch)) -{} +{ +#ifdef VM_ENABLE + satp_ = NULL; +#endif +} Processor::~Processor() { delete impl_; #ifdef VM_ENABLE - delete satp_; + if (satp_ != NULL) + delete satp_; #endif } @@ -176,10 +181,15 @@ int16_t Processor::set_satp_by_addr(uint64_t base_addr) { impl_->set_satp(satp); return 0; } +bool Processor::is_satp_unset() { + return (satp_== NULL); +} uint8_t Processor::get_satp_mode() { + assert (satp_!=NULL); return satp_->get_mode(); } uint64_t Processor::get_base_ppn() { + assert (satp_!=NULL); return satp_->get_base_ppn(); } #endif diff --git a/sim/simx/processor.h b/sim/simx/processor.h index a20cfff0b..8315eedba 100644 --- a/sim/simx/processor.h +++ b/sim/simx/processor.h @@ -37,6 +37,7 @@ public: void dcr_write(uint32_t addr, uint32_t value); #ifdef VM_ENABLE + bool is_satp_unset(); uint8_t get_satp_mode(); uint64_t get_base_ppn(); int16_t set_satp_by_addr(uint64_t addr); From 52233fe13a25ef8d3de0e1c1816bcb944193a6ac Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sun, 30 Jun 2024 00:54:22 -0400 Subject: [PATCH 054/488] fixed compile error --- sim/common/mem.cpp | 3 ++- sim/common/mem.h | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index a5339be6e..e3c9b5cc4 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -144,7 +144,8 @@ MemoryUnit::MemoryUnit(uint64_t pageSize) , TLB_HIT(0) , TLB_MISS(0) , TLB_EVICT(0) - , PTW(0) {}; + , PTW(0) + , satp_(NULL) {}; #else { if (pageSize != 0) diff --git a/sim/common/mem.h b/sim/common/mem.h index 7ef13393a..617e83d69 100644 --- a/sim/common/mem.h +++ b/sim/common/mem.h @@ -186,8 +186,7 @@ public: }; #ifdef VM_ENABLE - MemoryUnit(uint64_t pageSize = MEM_PAGE_SIZE) :satp_(NULL) - {}; + MemoryUnit(uint64_t pageSize = MEM_PAGE_SIZE); ~MemoryUnit(){ if ( this->satp_ != NULL) delete this->satp_; From 5877cfe8aef4479137079d678b84dbf6a6f11db6 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sun, 30 Jun 2024 03:10:36 -0400 Subject: [PATCH 055/488] Change STARTUP_ADDR from 0x40000000 to 0x80000000(32b) and 0x180000000(64b) --- ci/regression.sh.in | 6 +++++- runtime/simx/vortex.cpp | 6 +----- sim/common/mem.cpp | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 4c7e9967a..600fffe91 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -219,7 +219,11 @@ config2() # custom program startup address make -C tests/regression/dogfood clean-kernel - STARTUP_ADDR=0x40000000 make -C tests/regression/dogfood + if [ "$XLEN" == "64" ]; then + STARTUP_ADDR=0x180000000 make -C tests/regression/dogfood + else + STARTUP_ADDR=0x80000000 make -C tests/regression/dogfood + fi ./ci/blackbox.sh --driver=simx --app=dogfood ./ci/blackbox.sh --driver=rtlsim --app=dogfood make -C tests/regression/dogfood clean-kernel diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index fc686ca76..08261fcd7 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -144,7 +144,6 @@ public: uint64_t phy_to_virt_map(uint64_t size, uint64_t *dev_pAddr, uint32_t flags) { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); DBGPRINT(" [RT:PTV_MAP] size = 0x%lx, dev_pAddr= 0x%lx, flags = 0x%x\n", size, *dev_pAddr, flags); DBGPRINT(" [RT:PTV_MAP] bit mode: %d\n", XLEN); @@ -178,10 +177,8 @@ public: } DBGPRINT(" [RT:PTV_MAP] Mapped virtual addr: 0x%lx to physical addr: 0x%lx\n", init_vAddr, init_pAddr); // Sanity check - uint64_t pAddr = page_table_walk(init_vAddr); - DBGPRINT(" [RT:PTV_MAP] physical addr from PTW: 0x%lx\n", pAddr); + assert(page_table_walk(init_vAddr) == init_pAddr && "ERROR: translated virtual Addresses are not the same with physical Address\n"); - assert(pAddr == init_pAddr && "ERROR: translated virtual Addresses are not the same with physical Address\n"); *dev_pAddr = init_vAddr; // commit vpn to be returned to host DBGPRINT(" [RT:PTV_MAP] Translated device virtual addr: 0x%lx\n", *dev_pAddr); @@ -255,7 +252,6 @@ public: int upload(uint64_t dest_addr, const void *src, uint64_t size) { - // DBGPRINT("====%s====\n", __PRETTY_FUNCTION__); uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE); if (dest_addr + asize > GLOBAL_MEM_SIZE) return -1; diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index e3c9b5cc4..e6e998fce 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -23,11 +23,11 @@ using namespace vortex; #ifdef VM_ENABLE -#ifndef NDEBUG -#define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0) -#else +// #ifndef NDEBUG +// #define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0) +// #else #define DBGPRINT(format, ...) ((void)0) -#endif +// #endif #endif From aa45f55126af5f5b08383f8a01c1f7ea679a5e4e Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Mon, 8 Jul 2024 17:07:30 -0400 Subject: [PATCH 056/488] vpn allocator added but doesn't pass any tests --- runtime/simx/vortex.cpp | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 08261fcd7..6f31a7ef6 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -114,10 +114,21 @@ public: } #ifdef VM_ENABLE - // virtual to phycial mapping - uint64_t map_p2v(uint64_t pAddr) + // virtual (vpn) to phycial (ppn) mapping + uint64_t map_p2v(uint64_t ppn, uint32_t flags) { - return pAddr + 0xf000000; + DBGPRINT(" [RT:MAP_P2V] ppn: %x\n", ppn); + // std::cout << std::hex << pAddr << std::endl; + // return pAddr + 0xf000000; + if (addr_mapping.find(ppn) != addr_mapping.end()) return addr_mapping[ppn]; + + // If ppn to vpn mapping doesnt exist, create mapping + DBGPRINT(" [RT:MAP_P2V] Not found. Allocate new page table or update a PTE.\n"); + uint64_t vpn; + virtual_mem_->allocate(MEM_PAGE_SIZE, &vpn); + CHECK_ERR(update_page_table(ppn, vpn, flags),); + addr_mapping[ppn] = vpn; + return vpn; } bool need_trans(uint64_t dev_pAddr) { @@ -154,7 +165,7 @@ public: } uint64_t init_pAddr = *dev_pAddr; - uint64_t init_vAddr = map_p2v(init_pAddr); + uint64_t init_vAddr = (map_p2v(init_pAddr >> MEM_PAGE_LOG2_SIZE, flags) << MEM_PAGE_LOG2_SIZE) & ((1 << MEM_PAGE_LOG2_SIZE) - 1); uint64_t ppn = 0, vpn = 0; // dev_pAddr can be of size greater than a page, but we have to map and update @@ -162,18 +173,11 @@ public: // FUTURE Work: Super Page for (ppn = (*dev_pAddr >> MEM_PAGE_LOG2_SIZE); ppn < ((*dev_pAddr) >> MEM_PAGE_LOG2_SIZE) + (size >> MEM_PAGE_LOG2_SIZE) ; ppn++) { - vpn = map_p2v(ppn << MEM_PAGE_LOG2_SIZE) >> MEM_PAGE_LOG2_SIZE; + + vpn = map_p2v(ppn << MEM_PAGE_LOG2_SIZE, flags) >> MEM_PAGE_LOG2_SIZE; DBGPRINT(" [RT:PTV_MAP] Search vpn in page table:0x%lx\n", vpn); // Currently a 1-1 mapping is used, this can be changed here to support different // mapping schemes - // If ppn to vpn mapping doesnt exist. - if (addr_mapping.find(vpn) == addr_mapping.end()) - { - // Create mapping. - DBGPRINT(" [RT:PTV_MAP] Not found. Allocate new page table or update a PTE.\n"); - CHECK_ERR(update_page_table(ppn, vpn, flags),); - addr_mapping[vpn] = ppn; - } } DBGPRINT(" [RT:PTV_MAP] Mapped virtual addr: 0x%lx to physical addr: 0x%lx\n", init_vAddr, init_pAddr); // Sanity check @@ -415,6 +419,13 @@ public: return 1; } + // HW: virtual mem allocator has the same address range as global_mem. next step is to adjust it + virtual_mem_ = new MemoryAllocator(ALLOC_BASE_ADDR >> MEM_PAGE_LOG2_SIZE, (GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR) >> MEM_PAGE_LOG2_SIZE, MEM_PAGE_SIZE, CACHE_BLOCK_SIZE); + if (virtual_mem_ == nullptr) { + // virtual_mem_ does not intefere with physical mem, so no need to free space + return 1; + } + if (VM_ADDR_MODE == BARE) DBGPRINT("[RT:init_VM] VA_MODE = BARE MODE(addr= 0x0)"); else @@ -606,11 +617,12 @@ private: RAM ram_; Processor processor_; MemoryAllocator global_mem_; + MemoryAllocator* virtual_mem_; DeviceConfig dcrs_; std::future future_; std::unordered_map> mpm_cache_; #ifdef VM_ENABLE - std::unordered_map addr_mapping; + std::unordered_map addr_mapping; // HW: key: ppn; value: vpn MemoryAllocator* page_table_mem_; #endif }; From 7916684c3631148c0a0576dfe98e3ca347b64b3a Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Mon, 8 Jul 2024 17:10:19 -0400 Subject: [PATCH 057/488] vpn allocator debug complete, now pass demo&vecadd tests --- runtime/simx/vortex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 6f31a7ef6..3f82e647d 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -165,7 +165,7 @@ public: } uint64_t init_pAddr = *dev_pAddr; - uint64_t init_vAddr = (map_p2v(init_pAddr >> MEM_PAGE_LOG2_SIZE, flags) << MEM_PAGE_LOG2_SIZE) & ((1 << MEM_PAGE_LOG2_SIZE) - 1); + uint64_t init_vAddr = (map_p2v(init_pAddr >> MEM_PAGE_LOG2_SIZE, flags) << MEM_PAGE_LOG2_SIZE) | (init_pAddr & ((1 << MEM_PAGE_LOG2_SIZE) - 1)); uint64_t ppn = 0, vpn = 0; // dev_pAddr can be of size greater than a page, but we have to map and update From 31133ae6e98ebe5b76552f830a938d116ebb7f55 Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Tue, 9 Jul 2024 13:42:57 -0400 Subject: [PATCH 058/488] update destructor of vx_device --- runtime/simx/vortex.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 3f82e647d..6757bffbf 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -66,6 +66,7 @@ public: global_mem_.release(PAGE_TABLE_BASE_ADDR); // for (auto i = addr_mapping.begin(); i != addr_mapping.end(); i++) // page_table_mem_->release(i->second << MEM_PAGE_SIZE); + delete virtual_mem_; delete page_table_mem_; #endif if (future_.valid()) { @@ -114,6 +115,7 @@ public: } #ifdef VM_ENABLE + // virtual (vpn) to phycial (ppn) mapping uint64_t map_p2v(uint64_t ppn, uint32_t flags) { @@ -130,6 +132,7 @@ public: addr_mapping[ppn] = vpn; return vpn; } + bool need_trans(uint64_t dev_pAddr) { @@ -423,6 +426,7 @@ public: virtual_mem_ = new MemoryAllocator(ALLOC_BASE_ADDR >> MEM_PAGE_LOG2_SIZE, (GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR) >> MEM_PAGE_LOG2_SIZE, MEM_PAGE_SIZE, CACHE_BLOCK_SIZE); if (virtual_mem_ == nullptr) { // virtual_mem_ does not intefere with physical mem, so no need to free space + return 1; } From 49255bfa69c9435e70116f5629e8a90ec61f4c27 Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Wed, 10 Jul 2024 22:39:00 -0400 Subject: [PATCH 059/488] add virtual mem allocator addr spacereservation --- runtime/simx/vortex.cpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 6757bffbf..92ae2362d 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -407,6 +407,21 @@ public: return 0; } + // reserve IO space, startup space, and local mem area + int virtual_mem_reserve(uint64_t dev_addr, uint64_t size, int flags) + { + // uint64_t asize = aligned_size(size, MEM_PAGE_SIZE); + CHECK_ERR(virtual_mem_->reserve(dev_addr, size), { + return err; + }); + DBGPRINT("[RT:mem_reserve] addr: 0x%lx, size:0x%lx, size: 0x%lx\n", dev_addr, size, size); + // CHECK_ERR(this->mem_access(dev_addr, asize, flags), { + // global_mem_.release(dev_addr); + // return err; + // }); + return 0; + } + int16_t init_VM() { uint64_t pt_addr = 0; @@ -424,6 +439,16 @@ public: // HW: virtual mem allocator has the same address range as global_mem. next step is to adjust it virtual_mem_ = new MemoryAllocator(ALLOC_BASE_ADDR >> MEM_PAGE_LOG2_SIZE, (GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR) >> MEM_PAGE_LOG2_SIZE, MEM_PAGE_SIZE, CACHE_BLOCK_SIZE); + CHECK_ERR(virtual_mem_reserve(PAGE_TABLE_BASE_ADDR >> MEM_PAGE_LOG2_SIZE, (GLOBAL_MEM_SIZE - PAGE_TABLE_BASE_ADDR) >> MEM_PAGE_LOG2_SIZE, VX_MEM_READ_WRITE), { + return err; + }); + CHECK_ERR(virtual_mem_reserve(0, USER_BASE_ADDR >> MEM_PAGE_LOG2_SIZE, VX_MEM_READ_WRITE), { + return err; + }); + CHECK_ERR(virtual_mem_reserve(STARTUP_ADDR >> MEM_PAGE_LOG2_SIZE, (STARTUP_ADDR + 0x40000) >> MEM_PAGE_LOG2_SIZE, VX_MEM_READ_WRITE), { + return err; + }); + if (virtual_mem_ == nullptr) { // virtual_mem_ does not intefere with physical mem, so no need to free space From 6add1e16f65e437471ab4028df8c260a72fbffd4 Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Thu, 11 Jul 2024 14:49:00 -0400 Subject: [PATCH 060/488] debugged virtual memory allocator --- runtime/simx/vortex.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 92ae2362d..01c84fab6 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -119,7 +119,7 @@ public: // virtual (vpn) to phycial (ppn) mapping uint64_t map_p2v(uint64_t ppn, uint32_t flags) { - DBGPRINT(" [RT:MAP_P2V] ppn: %x\n", ppn); + DBGPRINT(" [RT:MAP_P2V] ppn: %lx\n", ppn); // std::cout << std::hex << pAddr << std::endl; // return pAddr + 0xf000000; if (addr_mapping.find(ppn) != addr_mapping.end()) return addr_mapping[ppn]; @@ -128,6 +128,7 @@ public: DBGPRINT(" [RT:MAP_P2V] Not found. Allocate new page table or update a PTE.\n"); uint64_t vpn; virtual_mem_->allocate(MEM_PAGE_SIZE, &vpn); + vpn = vpn >> MEM_PAGE_LOG2_SIZE; CHECK_ERR(update_page_table(ppn, vpn, flags),); addr_mapping[ppn] = vpn; return vpn; @@ -176,8 +177,7 @@ public: // FUTURE Work: Super Page for (ppn = (*dev_pAddr >> MEM_PAGE_LOG2_SIZE); ppn < ((*dev_pAddr) >> MEM_PAGE_LOG2_SIZE) + (size >> MEM_PAGE_LOG2_SIZE) ; ppn++) { - - vpn = map_p2v(ppn << MEM_PAGE_LOG2_SIZE, flags) >> MEM_PAGE_LOG2_SIZE; + vpn = map_p2v(ppn, flags) >> MEM_PAGE_LOG2_SIZE; DBGPRINT(" [RT:PTV_MAP] Search vpn in page table:0x%lx\n", vpn); // Currently a 1-1 mapping is used, this can be changed here to support different // mapping schemes @@ -438,14 +438,11 @@ public: } // HW: virtual mem allocator has the same address range as global_mem. next step is to adjust it - virtual_mem_ = new MemoryAllocator(ALLOC_BASE_ADDR >> MEM_PAGE_LOG2_SIZE, (GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR) >> MEM_PAGE_LOG2_SIZE, MEM_PAGE_SIZE, CACHE_BLOCK_SIZE); - CHECK_ERR(virtual_mem_reserve(PAGE_TABLE_BASE_ADDR >> MEM_PAGE_LOG2_SIZE, (GLOBAL_MEM_SIZE - PAGE_TABLE_BASE_ADDR) >> MEM_PAGE_LOG2_SIZE, VX_MEM_READ_WRITE), { + virtual_mem_ = new MemoryAllocator(ALLOC_BASE_ADDR, (GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR), MEM_PAGE_SIZE, CACHE_BLOCK_SIZE); + CHECK_ERR(virtual_mem_reserve(PAGE_TABLE_BASE_ADDR, (GLOBAL_MEM_SIZE - PAGE_TABLE_BASE_ADDR), VX_MEM_READ_WRITE), { return err; }); - CHECK_ERR(virtual_mem_reserve(0, USER_BASE_ADDR >> MEM_PAGE_LOG2_SIZE, VX_MEM_READ_WRITE), { - return err; - }); - CHECK_ERR(virtual_mem_reserve(STARTUP_ADDR >> MEM_PAGE_LOG2_SIZE, (STARTUP_ADDR + 0x40000) >> MEM_PAGE_LOG2_SIZE, VX_MEM_READ_WRITE), { + CHECK_ERR(virtual_mem_reserve(STARTUP_ADDR, 0x40000, VX_MEM_READ_WRITE), { return err; }); @@ -646,13 +643,13 @@ private: RAM ram_; Processor processor_; MemoryAllocator global_mem_; - MemoryAllocator* virtual_mem_; DeviceConfig dcrs_; std::future future_; std::unordered_map> mpm_cache_; #ifdef VM_ENABLE std::unordered_map addr_mapping; // HW: key: ppn; value: vpn MemoryAllocator* page_table_mem_; + MemoryAllocator* virtual_mem_; #endif }; From 78fc053ad56672fe8622bb55c1fb09800c611053 Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Mon, 29 Jul 2024 14:35:11 -0400 Subject: [PATCH 061/488] save work before pull --- .travis.yml | 18 +++++++++++++++++- ci/regression.sh.in | 2 ++ runtime/include/vortex.h | 1 + runtime/simx/vortex.cpp | 11 ++++++++++- 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index d43abb153..57098c8f0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -99,4 +99,20 @@ jobs: env: XLEN=32 script: - ./ci/travis_run.py ./ci/regression.sh --debug - - ./ci/travis_run.py ./ci/regression.sh --stress \ No newline at end of file + - ./ci/travis_run.py ./ci/regression.sh --stress + + - stage: test + name: virtual_memory + env: XLEN=32 + env: VM_DISABLE=1 + script: + - ./ci/travis_run.py ./ci/regression.sh --regression + - ./ci/travis_run.py ./ci/regression.sh --opencl + + - stage: test + name: virtual_memory + env: XLEN=64 + env: VM_DISABLE=1 + script: + - ./ci/travis_run.py ./ci/regression.sh --regression + - ./ci/travis_run.py ./ci/regression.sh --opencl \ No newline at end of file diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 600fffe91..b3bf798cb 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -19,6 +19,8 @@ set -e # clear blackbox cache rm -f blackbox.*.cache +# HW: add a test "VM Test" to make sure VM feature is enabled + XLEN=${XLEN:=@XLEN@} echo "Vortex Regression Test: XLEN=$XLEN" diff --git a/runtime/include/vortex.h b/runtime/include/vortex.h index 8481002e1..957e5d62a 100644 --- a/runtime/include/vortex.h +++ b/runtime/include/vortex.h @@ -65,6 +65,7 @@ typedef void* vx_buffer_h; #define VX_MEM_READ 0x1 #define VX_MEM_WRITE 0x2 #define VX_MEM_READ_WRITE 0x3 +#define VX_MEM_PIN_MEMORY 0x4 // open the device and connect to it int vx_dev_open(vx_device_h* hdevice); diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 01c84fab6..5e54576a5 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -199,6 +199,7 @@ public: uint64_t addr = 0; DBGPRINT("[RT:mem_alloc] size: 0x%lx, asize, 0x%lx,flag : 0x%d\n", size, asize, flags); + // HW: when vm is supported this global_mem_ should be virtual memory allocator CHECK_ERR(global_mem_.allocate(asize, &addr), { return err; }); @@ -231,7 +232,7 @@ public: int mem_free(uint64_t dev_addr) { #ifdef VM_ENABLE - uint64_t paddr= page_table_walk(dev_addr); + uint64_t paddr = page_table_walk(dev_addr); return global_mem_.release(paddr); #else return global_mem_.release(dev_addr); @@ -264,6 +265,14 @@ public: return -1; #ifdef VM_ENABLE uint64_t pAddr = page_table_walk(dest_addr); + // uint64_t pAddr; + // try { + // pAddr = page_table_walk(dest_addr); + // } catch ( Page_Fault_Exception ) { + // // HW: place holder + // // should be virt_to_phy_map here + // phy_to_virt_map(0, dest_addr, 0); + // } DBGPRINT(" [RT:upload] Upload data to vAddr = 0x%lx (pAddr=0x%lx)\n", dest_addr, pAddr); dest_addr = pAddr; //Overwirte #endif From 8d978f23ce4eecb7f60411fdc2321df418b064d6 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sat, 20 Jul 2024 10:25:33 -0400 Subject: [PATCH 062/488] Update runtime/simx/vortex.cpp Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com> --- runtime/simx/vortex.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 5e54576a5..7faae8bdd 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -120,8 +120,6 @@ public: uint64_t map_p2v(uint64_t ppn, uint32_t flags) { DBGPRINT(" [RT:MAP_P2V] ppn: %lx\n", ppn); - // std::cout << std::hex << pAddr << std::endl; - // return pAddr + 0xf000000; if (addr_mapping.find(ppn) != addr_mapping.end()) return addr_mapping[ppn]; // If ppn to vpn mapping doesnt exist, create mapping From 735b71361314683e01986b97f6a8b2e040a2dbf0 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sat, 20 Jul 2024 10:34:24 -0400 Subject: [PATCH 063/488] Update runtime/simx/vortex.cpp Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com> --- runtime/simx/vortex.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 7faae8bdd..ea110e57a 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -422,10 +422,6 @@ public: return err; }); DBGPRINT("[RT:mem_reserve] addr: 0x%lx, size:0x%lx, size: 0x%lx\n", dev_addr, size, size); - // CHECK_ERR(this->mem_access(dev_addr, asize, flags), { - // global_mem_.release(dev_addr); - // return err; - // }); return 0; } From 34ef5009107e0a1736e8a085840ee6518348511c Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sat, 20 Jul 2024 10:36:58 -0400 Subject: [PATCH 064/488] Update runtime/simx/vortex.cpp Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com> --- runtime/simx/vortex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index ea110e57a..c8b7033e0 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -120,7 +120,7 @@ public: uint64_t map_p2v(uint64_t ppn, uint32_t flags) { DBGPRINT(" [RT:MAP_P2V] ppn: %lx\n", ppn); - if (addr_mapping.find(ppn) != addr_mapping.end()) return addr_mapping[ppn]; + if (addr_mapping.contains(ppn)) return addr_mapping[ppn]; // If ppn to vpn mapping doesnt exist, create mapping DBGPRINT(" [RT:MAP_P2V] Not found. Allocate new page table or update a PTE.\n"); From 9db3870309740bf26224849bf7f903ad66442293 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sat, 20 Jul 2024 10:39:40 -0400 Subject: [PATCH 065/488] Update runtime/simx/vortex.cpp Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com> --- runtime/simx/vortex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index c8b7033e0..b560d20d1 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -116,7 +116,7 @@ public: #ifdef VM_ENABLE - // virtual (vpn) to phycial (ppn) mapping + // physical (ppn) to virtual (vpn) mapping uint64_t map_p2v(uint64_t ppn, uint32_t flags) { DBGPRINT(" [RT:MAP_P2V] ppn: %lx\n", ppn); From 30258c04d216e674963e528fd2a44b752f186ed0 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Sat, 20 Jul 2024 11:20:27 -0400 Subject: [PATCH 066/488] Apply suggestions from code review Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com> --- runtime/simx/vortex.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index b560d20d1..f9143cf0f 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -417,7 +417,6 @@ public: // reserve IO space, startup space, and local mem area int virtual_mem_reserve(uint64_t dev_addr, uint64_t size, int flags) { - // uint64_t asize = aligned_size(size, MEM_PAGE_SIZE); CHECK_ERR(virtual_mem_->reserve(dev_addr, size), { return err; }); From 34f7e3c982929f5c148ff063ac83c234f44458c5 Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Tue, 30 Jul 2024 00:18:28 -0400 Subject: [PATCH 067/488] config ramulator2 --- third_party/ramulator | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/ramulator b/third_party/ramulator index 214f63584..e62c84a6f 160000 --- a/third_party/ramulator +++ b/third_party/ramulator @@ -1 +1 @@ -Subproject commit 214f635845214adf030367939655d172ef0fed5f +Subproject commit e62c84a6f0e06566ba6e182d308434b4532068a5 From bddf276335bf6671a688e4150457e2f5eed6e231 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 6 Aug 2024 19:05:22 -0700 Subject: [PATCH 068/488] memory request flags refactoring --- hw/rtl/VX_define.vh | 10 ++++---- hw/rtl/Vortex.sv | 2 +- hw/rtl/afu/opae/vortex_afu.sv | 12 ++++----- hw/rtl/cache/VX_cache.sv | 4 +-- hw/rtl/cache/VX_cache_bypass.sv | 20 +++++++-------- hw/rtl/cache/VX_cache_flush.sv | 2 +- hw/rtl/cache/VX_cache_top.sv | 6 ++--- hw/rtl/core/VX_core.sv | 6 ++--- hw/rtl/core/VX_core_top.sv | 6 ++--- hw/rtl/core/VX_dispatch.sv | 3 ++- hw/rtl/core/VX_fetch.sv | 2 +- hw/rtl/core/VX_lmem_unit.sv | 12 ++++----- hw/rtl/core/VX_lsu_adapter.sv | 6 ++--- hw/rtl/core/VX_lsu_slice.sv | 26 +++++++++---------- hw/rtl/core/VX_operands.sv | 6 +++-- hw/rtl/interfaces/VX_lsu_mem_if.sv | 10 ++++---- hw/rtl/libs/VX_mem_coalescer.sv | 32 ++++++++++++------------ hw/rtl/libs/VX_mem_scheduler.sv | 40 +++++++++++++++--------------- hw/rtl/mem/VX_local_mem.sv | 2 +- hw/rtl/mem/VX_local_mem_top.sv | 20 +++++++-------- hw/rtl/mem/VX_mem_arb.sv | 40 +++++++++++++++--------------- hw/rtl/mem/VX_mem_bus_if.sv | 8 +++--- hw/rtl/mem/VX_mem_switch.sv | 2 +- 23 files changed, 140 insertions(+), 137 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 686124c16..438466016 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -303,10 +303,10 @@ `define L1_ENABLE `endif -`define ADDR_TYPE_FLUSH 0 -`define ADDR_TYPE_IO 1 -`define ADDR_TYPE_LOCAL 2 // shoud be last since optional -`define ADDR_TYPE_WIDTH (`ADDR_TYPE_LOCAL + `LMEM_ENABLED) +`define MEM_REQ_FLAG_FLUSH 0 +`define MEM_REQ_FLAG_IO 1 +`define MEM_REQ_FLAG_LOCAL 2 // shoud be last since optional +`define MEM_REQ_FLAGS_WIDTH (`MEM_REQ_FLAG_LOCAL + `LMEM_ENABLED) `define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE `define VX_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH - `CLOG2(`L3_LINE_SIZE)) @@ -364,7 +364,7 @@ assign dst.req_data.rw = src.req_data.rw; \ assign dst.req_data.byteen = src.req_data.byteen; \ assign dst.req_data.addr = src.req_data.addr; \ - assign dst.req_data.atype = src.req_data.atype; \ + assign dst.req_data.flags = src.req_data.flags; \ assign dst.req_data.data = src.req_data.data; \ if (TD != TS) \ assign dst.req_data.tag = {src.req_data.tag, {(TD-TS){1'b0}}}; \ diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 978259101..b49612047 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -109,7 +109,7 @@ module Vortex import VX_gpu_pkg::*; ( assign mem_req_data = mem_bus_if.req_data.data; assign mem_req_tag = mem_bus_if.req_data.tag; assign mem_bus_if.req_ready = mem_req_ready; - `UNUSED_VAR (mem_bus_if.req_data.atype) + `UNUSED_VAR (mem_bus_if.req_data.flags) assign mem_bus_if.rsp_valid = mem_rsp_valid; assign mem_bus_if.rsp_data.data = mem_rsp_data; diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 93f63c48d..b67cae3a5 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -517,8 +517,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .mem_rsp_ready_out (cci_vx_mem_bus_if[1].rsp_ready) ); - assign cci_vx_mem_bus_if[1].req_data.atype = '0; - `UNUSED_VAR (cci_vx_mem_bus_if[1].req_data.atype) + assign cci_vx_mem_bus_if[1].req_data.flags = '0; + `UNUSED_VAR (cci_vx_mem_bus_if[1].req_data.flags) //-- @@ -570,8 +570,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .mem_rsp_ready_out (cci_vx_mem_bus_if[0].rsp_ready) ); - assign cci_vx_mem_bus_if[0].req_data.atype = '0; - `UNUSED_VAR (cci_vx_mem_bus_if[0].req_data.atype) + assign cci_vx_mem_bus_if[0].req_data.flags = '0; + `UNUSED_VAR (cci_vx_mem_bus_if[0].req_data.flags) //-- VX_mem_bus_if #( @@ -639,8 +639,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .avs_readdatavalid(avs_readdatavalid) ); - assign mem_bus_if[0].req_data.atype = '0; - `UNUSED_VAR (mem_bus_if[0].req_data.atype) + assign mem_bus_if[0].req_data.flags = '0; + `UNUSED_VAR (mem_bus_if[0].req_data.flags) // CCI-P Read Request /////////////////////////////////////////////////////////// diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index ae0747690..1131791bb 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -185,7 +185,7 @@ module VX_cache import VX_gpu_pkg::*; #( .ready_out (mem_bus_if.req_ready) ); - assign mem_bus_if.req_data.atype = mem_bus_if_flush ? `ADDR_TYPE_WIDTH'(1 << `ADDR_TYPE_FLUSH) : '0; + assign mem_bus_if.req_data.flags = mem_bus_if_flush ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0; /////////////////////////////////////////////////////////////////////////// @@ -273,7 +273,7 @@ module VX_cache import VX_gpu_pkg::*; #( assign core_req_addr[i] = core_bus2_if[i].req_data.addr; assign core_req_data[i] = core_bus2_if[i].req_data.data; assign core_req_tag[i] = core_bus2_if[i].req_data.tag; - assign core_req_flush[i] = core_bus2_if[i].req_data.atype[`ADDR_TYPE_FLUSH]; + assign core_req_flush[i] = core_bus2_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH]; assign core_bus2_if[i].req_ready = core_req_ready[i]; end diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 379d33e8a..53d847c4e 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -56,7 +56,7 @@ module VX_cache_bypass #( localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1); localparam REQ_SEL_BITS = `CLOG2(NUM_REQS); - localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `ADDR_TYPE_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH; + localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH; localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE; localparam WSEL_BITS = `CLOG2(WORDS_PER_LINE); @@ -80,7 +80,7 @@ module VX_cache_bypass #( if (PASSTHRU != 0) begin assign core_req_nc_idxs[i] = 1'b1; end else if (NC_ENABLE) begin - assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO]; + assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.flags[`MEM_REQ_FLAG_IO]; end else begin assign core_req_nc_idxs[i] = 1'b0; end @@ -113,7 +113,7 @@ module VX_cache_bypass #( wire mem_req_out_rw; wire [LINE_SIZE-1:0] mem_req_out_byteen; wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_out_addr; - wire [`ADDR_TYPE_WIDTH-1:0] mem_req_out_atype; + wire [`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_out_flags; wire [`CS_LINE_WIDTH-1:0] mem_req_out_data; wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag; wire mem_req_out_ready; @@ -121,7 +121,7 @@ module VX_cache_bypass #( wire core_req_nc_sel_rw; wire [WORD_SIZE-1:0] core_req_nc_sel_byteen; wire [CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr; - wire [`ADDR_TYPE_WIDTH-1:0] core_req_nc_sel_atype; + wire [`MEM_REQ_FLAGS_WIDTH-1:0] core_req_nc_sel_flags; wire [CORE_DATA_WIDTH-1:0] core_req_nc_sel_data; wire [CORE_TAG_WIDTH-1:0] core_req_nc_sel_tag; @@ -131,7 +131,7 @@ module VX_cache_bypass #( core_bus_in_if[i].req_data.rw, core_bus_in_if[i].req_data.byteen, core_bus_in_if[i].req_data.addr, - core_bus_in_if[i].req_data.atype, + core_bus_in_if[i].req_data.flags, core_bus_in_if[i].req_data.data, core_bus_in_if[i].req_data.tag }; @@ -141,7 +141,7 @@ module VX_cache_bypass #( core_req_nc_sel_rw, core_req_nc_sel_byteen, core_req_nc_sel_addr, - core_req_nc_sel_atype, + core_req_nc_sel_flags, core_req_nc_sel_data, core_req_nc_sel_tag } = core_req_nc_mux_in[core_req_nc_idx]; @@ -151,7 +151,7 @@ module VX_cache_bypass #( assign mem_req_out_valid = mem_bus_in_if.req_valid || core_req_nc_valid; assign mem_req_out_rw = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.rw : core_req_nc_sel_rw; assign mem_req_out_addr = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.addr : core_req_nc_sel_addr[WSEL_BITS +: MEM_ADDR_WIDTH]; - assign mem_req_out_atype = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.atype : core_req_nc_sel_atype; + assign mem_req_out_flags = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.flags : core_req_nc_sel_flags; wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass; @@ -218,7 +218,7 @@ module VX_cache_bypass #( assign mem_bus_in_if.req_ready = mem_req_out_ready; VX_elastic_buffer #( - .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `ADDR_TYPE_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH), + .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH), .SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) ) mem_req_buf ( @@ -226,8 +226,8 @@ module VX_cache_bypass #( .reset (reset), .valid_in (mem_req_out_valid), .ready_in (mem_req_out_ready), - .data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}), - .data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.atype, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}), + .data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_flags, mem_req_out_data, mem_req_out_tag}), + .data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.flags, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}), .valid_out (mem_bus_out_if.req_valid), .ready_out (mem_bus_out_if.req_ready) ); diff --git a/hw/rtl/cache/VX_cache_flush.sv b/hw/rtl/cache/VX_cache_flush.sv index 7a33565fc..648fbebb3 100644 --- a/hw/rtl/cache/VX_cache_flush.sv +++ b/hw/rtl/cache/VX_cache_flush.sv @@ -83,7 +83,7 @@ module VX_cache_flush #( wire [NUM_REQS-1:0] flush_req_mask; for (genvar i = 0; i < NUM_REQS; ++i) begin - assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.atype[`ADDR_TYPE_FLUSH]; + assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH]; end wire flush_req_enable = (| flush_req_mask); diff --git a/hw/rtl/cache/VX_cache_top.sv b/hw/rtl/cache/VX_cache_top.sv index 0959701aa..3fa0e5d65 100644 --- a/hw/rtl/cache/VX_cache_top.sv +++ b/hw/rtl/cache/VX_cache_top.sv @@ -75,7 +75,7 @@ module VX_cache_top import VX_gpu_pkg::*; #( input wire [NUM_REQS-1:0] core_req_rw, input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, input wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr, - input wire [NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] core_req_atype, + input wire [NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] core_req_flags, input wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data, input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag, output wire [NUM_REQS-1:0] core_req_ready, @@ -117,7 +117,7 @@ module VX_cache_top import VX_gpu_pkg::*; #( assign core_bus_if[i].req_data.rw = core_req_rw[i]; assign core_bus_if[i].req_data.byteen = core_req_byteen[i]; assign core_bus_if[i].req_data.addr = core_req_addr[i]; - assign core_bus_if[i].req_data.atype = core_req_atype[i]; + assign core_bus_if[i].req_data.flags = core_req_flags[i]; assign core_bus_if[i].req_data.data = core_req_data[i]; assign core_bus_if[i].req_data.tag = core_req_tag[i]; assign core_req_ready[i] = core_bus_if[i].req_ready; @@ -139,7 +139,7 @@ module VX_cache_top import VX_gpu_pkg::*; #( assign mem_req_data = mem_bus_if.req_data.data; assign mem_req_tag = mem_bus_if.req_data.tag; assign mem_bus_if.req_ready = mem_req_ready; - `UNUSED_VAR (mem_bus_if.req_data.atype) + `UNUSED_VAR (mem_bus_if.req_data.flags) // Memory response assign mem_bus_if.rsp_valid = mem_rsp_valid; diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 4c82db812..83af50f16 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -250,7 +250,7 @@ module VX_core import VX_gpu_pkg::*; #( .DATA_IN_SIZE (LSU_WORD_SIZE), .DATA_OUT_SIZE (DCACHE_WORD_SIZE), .ADDR_WIDTH (LSU_ADDR_WIDTH), - .ATYPE_WIDTH (`ADDR_TYPE_WIDTH), + .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .TAG_WIDTH (LSU_TAG_WIDTH), .UUID_WIDTH (`UUID_WIDTH), .QUEUE_SIZE (`LSUQ_OUT_SIZE) @@ -264,7 +264,7 @@ module VX_core import VX_gpu_pkg::*; #( .in_req_rw (lsu_dcache_if[i].req_data.rw), .in_req_byteen (lsu_dcache_if[i].req_data.byteen), .in_req_addr (lsu_dcache_if[i].req_data.addr), - .in_req_atype (lsu_dcache_if[i].req_data.atype), + .in_req_flags (lsu_dcache_if[i].req_data.flags), .in_req_data (lsu_dcache_if[i].req_data.data), .in_req_tag (lsu_dcache_if[i].req_data.tag), .in_req_ready (lsu_dcache_if[i].req_ready), @@ -282,7 +282,7 @@ module VX_core import VX_gpu_pkg::*; #( .out_req_rw (dcache_coalesced_if.req_data.rw), .out_req_byteen (dcache_coalesced_if.req_data.byteen), .out_req_addr (dcache_coalesced_if.req_data.addr), - .out_req_atype (dcache_coalesced_if.req_data.atype), + .out_req_flags (dcache_coalesced_if.req_data.flags), .out_req_data (dcache_coalesced_if.req_data.data), .out_req_tag (dcache_coalesced_if.req_data.tag), .out_req_ready (dcache_coalesced_if.req_ready), diff --git a/hw/rtl/core/VX_core_top.sv b/hw/rtl/core/VX_core_top.sv index 420ae7b67..9ade1c28b 100644 --- a/hw/rtl/core/VX_core_top.sv +++ b/hw/rtl/core/VX_core_top.sv @@ -32,7 +32,7 @@ module VX_core_top import VX_gpu_pkg::*; #( output wire [DCACHE_NUM_REQS-1:0] dcache_req_rw, output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] dcache_req_byteen, output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] dcache_req_addr, - output wire [DCACHE_NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] dcache_req_atype, + output wire [DCACHE_NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] dcache_req_flags, output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_req_data, output wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] dcache_req_tag, input wire [DCACHE_NUM_REQS-1:0] dcache_req_ready, @@ -96,7 +96,7 @@ module VX_core_top import VX_gpu_pkg::*; #( assign dcache_req_rw[i] = dcache_bus_if[i].req_data.rw; assign dcache_req_byteen[i] = dcache_bus_if[i].req_data.byteen; assign dcache_req_addr[i] = dcache_bus_if[i].req_data.addr; - assign dcache_req_atype[i] = dcache_bus_if[i].req_data.atype; + assign dcache_req_flags[i] = dcache_bus_if[i].req_data.flags; assign dcache_req_data[i] = dcache_bus_if[i].req_data.data; assign dcache_req_tag[i] = dcache_bus_if[i].req_data.tag; assign dcache_bus_if[i].req_ready = dcache_req_ready[i]; @@ -119,7 +119,7 @@ module VX_core_top import VX_gpu_pkg::*; #( assign icache_req_data = icache_bus_if.req_data.data; assign icache_req_tag = icache_bus_if.req_data.tag; assign icache_bus_if.req_ready = icache_req_ready; - `UNUSED_VAR (icache_bus_if.req_data.atype) + `UNUSED_VAR (icache_bus_if.req_data.flags) assign icache_bus_if.rsp_valid = icache_rsp_valid; assign icache_bus_if.rsp_data.tag = icache_rsp_tag; diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index 8ea3a6125..96c947d1e 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -61,7 +61,8 @@ module VX_dispatch import VX_gpu_pkg::*; #( .DATAW (DATAW), .SIZE (2), .OUT_REG (2), // 2-cycle EB for area reduction - .LUTRAM (1) + .LUTRAM (1), + .MAX_FANOUT (`MAX_FANOUT * 64) ) buffer ( .clk (clk), .reset (buffer_reset), diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 043a87939..de622bd1d 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -116,7 +116,7 @@ module VX_fetch import VX_gpu_pkg::*; #( .ready_out (icache_bus_if.req_ready) ); - assign icache_bus_if.req_data.atype = '0; + assign icache_bus_if.req_data.flags = '0; assign icache_bus_if.req_data.rw = 0; assign icache_bus_if.req_data.byteen = 4'b1111; assign icache_bus_if.req_data.data = '0; diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv index accb7a586..74da1e114 100644 --- a/hw/rtl/core/VX_lmem_unit.sv +++ b/hw/rtl/core/VX_lmem_unit.sv @@ -29,7 +29,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( `STATIC_ASSERT(`IS_DIVISBLE((1 << `LMEM_LOG_SIZE), `MEM_BLOCK_SIZE), ("invalid parameter")) `STATIC_ASSERT(0 == (`LMEM_BASE_ADDR % (1 << `LMEM_LOG_SIZE)), ("invalid parameter")) - localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `ADDR_TYPE_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; + localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; localparam LMEM_ADDR_WIDTH = `LMEM_LOG_SIZE - `CLOG2(LSU_WORD_SIZE); @@ -45,7 +45,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin - assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.atype[j][`ADDR_TYPE_LOCAL]; + assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.flags[j][`MEM_REQ_FLAGE_LOCAL]; end wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask); @@ -67,7 +67,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( lsu_mem_in_if[i].req_data.rw, lsu_mem_in_if[i].req_data.byteen, lsu_mem_in_if[i].req_data.addr, - lsu_mem_in_if[i].req_data.atype, + lsu_mem_in_if[i].req_data.flags, lsu_mem_in_if[i].req_data.data, lsu_mem_in_if[i].req_data.tag }), @@ -78,7 +78,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( lsu_mem_out_if[i].req_data.rw, lsu_mem_out_if[i].req_data.byteen, lsu_mem_out_if[i].req_data.addr, - lsu_mem_out_if[i].req_data.atype, + lsu_mem_out_if[i].req_data.flags, lsu_mem_out_if[i].req_data.data, lsu_mem_out_if[i].req_data.tag }), @@ -98,7 +98,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( lsu_mem_in_if[i].req_data.rw, lsu_mem_in_if[i].req_data.byteen, lsu_mem_in_if[i].req_data.addr, - lsu_mem_in_if[i].req_data.atype, + lsu_mem_in_if[i].req_data.flags, lsu_mem_in_if[i].req_data.data, lsu_mem_in_if[i].req_data.tag }), @@ -109,7 +109,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( lsu_switch_if[i].req_data.rw, lsu_switch_if[i].req_data.byteen, lsu_switch_if[i].req_data.addr, - lsu_switch_if[i].req_data.atype, + lsu_switch_if[i].req_data.flags, lsu_switch_if[i].req_data.data, lsu_switch_if[i].req_data.tag }), diff --git a/hw/rtl/core/VX_lsu_adapter.sv b/hw/rtl/core/VX_lsu_adapter.sv index 21d43d280..48ef23163 100644 --- a/hw/rtl/core/VX_lsu_adapter.sv +++ b/hw/rtl/core/VX_lsu_adapter.sv @@ -29,7 +29,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( VX_mem_bus_if.master mem_bus_if [NUM_LANES] ); localparam REQ_ADDR_WIDTH = `MEM_ADDR_WIDTH - `CLOG2(DATA_SIZE); - localparam REQ_DATA_WIDTH = 1 + DATA_SIZE + REQ_ADDR_WIDTH + `ADDR_TYPE_WIDTH + DATA_SIZE * 8; + localparam REQ_DATA_WIDTH = 1 + DATA_SIZE + REQ_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + DATA_SIZE * 8; localparam RSP_DATA_WIDTH = DATA_SIZE * 8; // handle request unpacking @@ -46,7 +46,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( lsu_mem_if.req_data.rw, lsu_mem_if.req_data.byteen[i], lsu_mem_if.req_data.addr[i], - lsu_mem_if.req_data.atype[i], + lsu_mem_if.req_data.flags[i], lsu_mem_if.req_data.data[i] }; end @@ -57,7 +57,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( mem_bus_if[i].req_data.rw, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.addr, - mem_bus_if[i].req_data.atype, + mem_bus_if[i].req_data.flags, mem_bus_if[i].req_data.data } = req_data_out[i]; assign mem_bus_if[i].req_data.tag = req_tag_out[i]; diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 8c685fca2..6de901182 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -65,19 +65,19 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( // address type calculation - wire [NUM_LANES-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_atype; + wire [NUM_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_flags; for (genvar i = 0; i < NUM_LANES; ++i) begin wire [MEM_ADDRW-1:0] block_addr = full_addr[i][MEM_ASHIFT +: MEM_ADDRW]; // is I/O address wire [MEM_ADDRW-1:0] io_addr_start = MEM_ADDRW'(`XLEN'(`IO_BASE_ADDR) >> MEM_ASHIFT); wire [MEM_ADDRW-1:0] io_addr_end = MEM_ADDRW'(`XLEN'(`IO_END_ADDR) >> MEM_ASHIFT); - assign mem_req_atype[i][`ADDR_TYPE_FLUSH] = req_is_fence; - assign mem_req_atype[i][`ADDR_TYPE_IO] = (block_addr >= io_addr_start) && (block_addr < io_addr_end); + assign mem_req_flags[i][`MEM_REQ_FLAG_FLUSH] = req_is_fence; + assign mem_req_flags[i][`MEM_REQ_FLAG_IO] = (block_addr >= io_addr_start) && (block_addr < io_addr_end); `ifdef LMEM_ENABLE // is local memory address wire [MEM_ADDRW-1:0] lmem_addr_start = MEM_ADDRW'(`XLEN'(`LMEM_BASE_ADDR) >> MEM_ASHIFT); wire [MEM_ADDRW-1:0] lmem_addr_end = MEM_ADDRW'((`XLEN'(`LMEM_BASE_ADDR) + `XLEN'(1 << `LMEM_LOG_SIZE)) >> MEM_ASHIFT); - assign mem_req_atype[i][`ADDR_TYPE_LOCAL] = (block_addr >= lmem_addr_start) && (block_addr < lmem_addr_end); + assign mem_req_flags[i][`MEM_REQ_FLAG_LOCAL] = (block_addr >= lmem_addr_start) && (block_addr < lmem_addr_end); `endif end @@ -300,7 +300,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( wire [NUM_LANES-1:0] lsu_mem_req_mask; wire [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] lsu_mem_req_byteen; wire [NUM_LANES-1:0][LSU_ADDR_WIDTH-1:0] lsu_mem_req_addr; - wire [NUM_LANES-1:0][`ADDR_TYPE_WIDTH-1:0] lsu_mem_req_atype; + wire [NUM_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] lsu_mem_req_flags; wire [NUM_LANES-1:0][(LSU_WORD_SIZE*8)-1:0] lsu_mem_req_data; wire [LSU_TAG_WIDTH-1:0] lsu_mem_req_tag; wire lsu_mem_req_ready; @@ -320,7 +320,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( .WORD_SIZE (LSU_WORD_SIZE), .LINE_SIZE (LSU_WORD_SIZE), .ADDR_WIDTH (LSU_ADDR_WIDTH), - .ATYPE_WIDTH (`ADDR_TYPE_WIDTH), + .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .TAG_WIDTH (TAG_WIDTH), .CORE_QUEUE_SIZE (`LSUQ_IN_SIZE), .MEM_QUEUE_SIZE (`LSUQ_OUT_SIZE), @@ -338,7 +338,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( .core_req_mask (mem_req_mask), .core_req_byteen(mem_req_byteen), .core_req_addr (mem_req_addr), - .core_req_atype (mem_req_atype), + .core_req_flags (mem_req_flags), .core_req_data (mem_req_data), .core_req_tag (mem_req_tag), .core_req_ready (mem_req_ready), @@ -360,7 +360,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( .mem_req_mask (lsu_mem_req_mask), .mem_req_byteen (lsu_mem_req_byteen), .mem_req_addr (lsu_mem_req_addr), - .mem_req_atype (lsu_mem_req_atype), + .mem_req_flags (lsu_mem_req_flags), .mem_req_data (lsu_mem_req_data), .mem_req_tag (lsu_mem_req_tag), .mem_req_ready (lsu_mem_req_ready), @@ -378,7 +378,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( assign lsu_mem_if.req_data.rw = lsu_mem_req_rw; assign lsu_mem_if.req_data.byteen = lsu_mem_req_byteen; assign lsu_mem_if.req_data.addr = lsu_mem_req_addr; - assign lsu_mem_if.req_data.atype = lsu_mem_req_atype; + assign lsu_mem_if.req_data.flags = lsu_mem_req_flags; assign lsu_mem_if.req_data.data = lsu_mem_req_data; assign lsu_mem_if.req_data.tag = lsu_mem_req_tag; assign lsu_mem_req_ready = lsu_mem_if.req_ready; @@ -513,16 +513,16 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( if (mem_req_rw) begin `TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES); - `TRACE(1, (", atype=")); - `TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES); + `TRACE(1, (", flags=")); + `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES); `TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen)); `TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES); `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if.data.uuid)); end else begin `TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES); - `TRACE(1, (", atype=")); - `TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES); + `TRACE(1, (", flags=")); + `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES); `TRACE(1, (", byteen=0x%0h, rd=%0d, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, mem_req_tag, execute_if.data.uuid)); end end diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index e3df0c1fa..bd0d122eb 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -183,7 +183,8 @@ module VX_operands import VX_gpu_pkg::*; #( VX_pipe_register #( .DATAW (1 + NUM_SRC_REGS * REGS_DATAW + NUM_BANKS + NUM_BANKS * REGS_DATAW + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH), - .RESETW (1 + NUM_SRC_REGS * REGS_DATAW) + .RESETW (1 + NUM_SRC_REGS * REGS_DATAW), + .MAX_FANOUT (`MAX_FANOUT * 64) ) pipe_reg2 ( .clk (clk), .reset (pipe2_reset), @@ -205,7 +206,8 @@ module VX_operands import VX_gpu_pkg::*; #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (1) + .LUTRAM (1), + .MAX_FANOUT (`MAX_FANOUT * 64) ) out_buf ( .clk (clk), .reset (reset), diff --git a/hw/rtl/interfaces/VX_lsu_mem_if.sv b/hw/rtl/interfaces/VX_lsu_mem_if.sv index 661071eb6..4b2c6d4af 100644 --- a/hw/rtl/interfaces/VX_lsu_mem_if.sv +++ b/hw/rtl/interfaces/VX_lsu_mem_if.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,18 +16,18 @@ interface VX_lsu_mem_if #( parameter NUM_LANES = 1, parameter DATA_SIZE = 1, - parameter ATYPE_WIDTH= `ADDR_TYPE_WIDTH, + parameter FLAGS_WIDTH= `MEM_REQ_FLAGS_WIDTH, parameter TAG_WIDTH = 1, parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, parameter ADDR_WIDTH = MEM_ADDR_WIDTH - `CLOG2(DATA_SIZE) ) (); typedef struct packed { - logic rw; + logic rw; logic [NUM_LANES-1:0] mask; logic [NUM_LANES-1:0][DATA_SIZE-1:0] byteen; logic [NUM_LANES-1:0][ADDR_WIDTH-1:0] addr; - logic [NUM_LANES-1:0][ATYPE_WIDTH-1:0] atype; + logic [NUM_LANES-1:0][FLAGS_WIDTH-1:0] flags; logic [NUM_LANES-1:0][DATA_SIZE*8-1:0] data; logic [TAG_WIDTH-1:0] tag; } req_data_t; diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index d1ffde09a..db36ac781 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -18,7 +18,7 @@ module VX_mem_coalescer #( parameter `STRING INSTANCE_ID = "", parameter NUM_REQS = 1, parameter ADDR_WIDTH = 32, - parameter ATYPE_WIDTH = 1, + parameter FLAGS_WIDTH = 1, parameter DATA_IN_SIZE = 4, parameter DATA_OUT_SIZE = 64, parameter TAG_WIDTH = 8, @@ -43,7 +43,7 @@ module VX_mem_coalescer #( input wire [NUM_REQS-1:0] in_req_mask, input wire [NUM_REQS-1:0][DATA_IN_SIZE-1:0] in_req_byteen, input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr, - input wire [NUM_REQS-1:0][ATYPE_WIDTH-1:0] in_req_atype, + input wire [NUM_REQS-1:0][FLAGS_WIDTH-1:0] in_req_flags, input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data, input wire [TAG_WIDTH-1:0] in_req_tag, output wire in_req_ready, @@ -61,7 +61,7 @@ module VX_mem_coalescer #( output wire [OUT_REQS-1:0] out_req_mask, output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen, output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr, - output wire [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype, + output wire [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags, output wire [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data, output wire [OUT_TAG_WIDTH-1:0] out_req_tag, input wire out_req_ready, @@ -93,7 +93,7 @@ module VX_mem_coalescer #( logic out_req_rw_r, out_req_rw_n; logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n; logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n; - logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n; + logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags_r, out_req_flags_n; logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n; logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n; logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n; @@ -111,7 +111,7 @@ module VX_mem_coalescer #( logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n; logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n; - logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] seed_atype_r, seed_atype_n; + logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] seed_flags_r, seed_flags_n; logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n; logic [NUM_REQS-1:0] processed_mask_r, processed_mask_n; @@ -144,7 +144,7 @@ module VX_mem_coalescer #( for (genvar i = 0; i < OUT_REQS; ++i) begin assign seed_addr_n[i] = in_addr_base[seed_idx[i]]; - assign seed_atype_n[i] = in_req_atype[seed_idx[i]]; + assign seed_flags_n[i] = in_req_flags[seed_idx[i]]; end for (genvar i = 0; i < OUT_REQS; ++i) begin @@ -188,7 +188,7 @@ module VX_mem_coalescer #( out_req_mask_n = out_req_mask_r; out_req_rw_n = out_req_rw_r; out_req_addr_n = out_req_addr_r; - out_req_atype_n = out_req_atype_r; + out_req_flags_n = out_req_flags_r; out_req_byteen_n = out_req_byteen_r; out_req_data_n = out_req_data_r; out_req_tag_n = out_req_tag_r; @@ -211,7 +211,7 @@ module VX_mem_coalescer #( out_req_mask_n = batch_valid_r; out_req_rw_n = in_req_rw; out_req_addr_n = seed_addr_r; - out_req_atype_n = seed_atype_r; + out_req_flags_n = seed_flags_r; out_req_byteen_n= req_byteen_merged; out_req_data_n = req_data_merged; out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; @@ -230,14 +230,14 @@ module VX_mem_coalescer #( end VX_pipe_register #( - .DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + ATYPE_WIDTH + OUT_ADDR_WIDTH + ATYPE_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH), + .DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + FLAGS_WIDTH + OUT_ADDR_WIDTH + FLAGS_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH), .RESETW (1 + NUM_REQS + 1) ) pipe_reg ( .clk (clk), .reset (reset), .enable (1'b1), - .data_in ({state_n, processed_mask_n, out_req_valid_n, out_req_rw_n, addr_matches_n, batch_valid_n, out_req_mask_n, seed_addr_n, seed_atype_n, out_req_addr_n, out_req_atype_n, out_req_byteen_n, out_req_data_n, out_req_tag_n}), - .data_out ({state_r, processed_mask_r, out_req_valid_r, out_req_rw_r, addr_matches_r, batch_valid_r, out_req_mask_r, seed_addr_r, seed_atype_r, out_req_addr_r, out_req_atype_r, out_req_byteen_r, out_req_data_r, out_req_tag_r}) + .data_in ({state_n, processed_mask_n, out_req_valid_n, out_req_rw_n, addr_matches_n, batch_valid_n, out_req_mask_n, seed_addr_n, seed_flags_n, out_req_addr_n, out_req_flags_n, out_req_byteen_n, out_req_data_n, out_req_tag_n}), + .data_out ({state_r, processed_mask_r, out_req_valid_r, out_req_rw_r, addr_matches_r, batch_valid_r, out_req_mask_r, seed_addr_r, seed_flags_r, out_req_addr_r, out_req_flags_r, out_req_byteen_r, out_req_data_r, out_req_tag_r}) ); wire out_rsp_fire = out_rsp_valid && out_rsp_ready; @@ -278,7 +278,7 @@ module VX_mem_coalescer #( assign out_req_mask = out_req_mask_r; assign out_req_byteen = out_req_byteen_r; assign out_req_addr = out_req_addr_r; - assign out_req_atype = out_req_atype_r; + assign out_req_flags = out_req_flags_r; assign out_req_data = out_req_data_r; assign out_req_tag = out_req_tag_r; @@ -350,8 +350,8 @@ module VX_mem_coalescer #( if (out_req_rw) begin `TRACE(1, ("%d: %s-out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); - `TRACE(1, (", atype=")); - `TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS); + `TRACE(1, (", flags=")); + `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS); `TRACE(1, (", byteen=")); `TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS); `TRACE(1, (", data=")); @@ -359,8 +359,8 @@ module VX_mem_coalescer #( end else begin `TRACE(1, ("%d: %s-out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); - `TRACE(1, (", atype=")); - `TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS); + `TRACE(1, (", flags=")); + `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS); end `TRACE(1, (", offset=")); `TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS); diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index aa3ef9b2f..f173d7d0a 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -21,7 +21,7 @@ module VX_mem_scheduler #( parameter WORD_SIZE = 4, parameter LINE_SIZE = WORD_SIZE, parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE), - parameter ATYPE_WIDTH = 1, + parameter FLAGS_WIDTH = 1, parameter TAG_WIDTH = 8, parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID parameter CORE_QUEUE_SIZE= 8, @@ -50,7 +50,7 @@ module VX_mem_scheduler #( input wire [CORE_REQS-1:0] core_req_mask, input wire [CORE_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, input wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] core_req_addr, - input wire [CORE_REQS-1:0][ATYPE_WIDTH-1:0] core_req_atype, + input wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] core_req_flags, input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data, input wire [TAG_WIDTH-1:0] core_req_tag, output wire core_req_ready, @@ -72,7 +72,7 @@ module VX_mem_scheduler #( output wire [MEM_CHANNELS-1:0] mem_req_mask, output wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen, output wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr, - output wire [MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype, + output wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags, output wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data, output wire [MEM_TAG_WIDTH-1:0] mem_req_tag, input wire mem_req_ready, @@ -113,7 +113,7 @@ module VX_mem_scheduler #( wire reqq_rw; wire [CORE_REQS-1:0][WORD_SIZE-1:0] reqq_byteen; wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] reqq_addr; - wire [CORE_REQS-1:0][ATYPE_WIDTH-1:0] reqq_atype; + wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags; wire [CORE_REQS-1:0][WORD_WIDTH-1:0] reqq_data; wire [REQQ_TAG_WIDTH-1:0] reqq_tag; wire reqq_ready; @@ -123,7 +123,7 @@ module VX_mem_scheduler #( wire reqq_rw_s; wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s; wire [MERGED_REQS-1:0][MEM_ADDR_WIDTH-1:0] reqq_addr_s; - wire [MERGED_REQS-1:0][ATYPE_WIDTH-1:0] reqq_atype_s; + wire [MERGED_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags_s; wire [MERGED_REQS-1:0][LINE_WIDTH-1:0] reqq_data_s; wire [MERGED_TAG_WIDTH-1:0] reqq_tag_s; wire reqq_ready_s; @@ -133,7 +133,7 @@ module VX_mem_scheduler #( wire mem_req_rw_s; wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_s; wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_s; - wire [MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype_s; + wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_s; wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_s; wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s; wire mem_req_ready_s; @@ -168,7 +168,7 @@ module VX_mem_scheduler #( end VX_elastic_buffer #( - .DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + ATYPE_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH), + .DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + FLAGS_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH), .SIZE (CORE_QUEUE_SIZE), .OUT_REG (1) ) req_queue ( @@ -176,8 +176,8 @@ module VX_mem_scheduler #( .reset (reset), .valid_in (reqq_valid_in), .ready_in (reqq_ready_in), - .data_in ({core_req_rw, core_req_mask, core_req_byteen, core_req_addr, core_req_atype, core_req_data, reqq_tag_u}), - .data_out ({reqq_rw, reqq_mask, reqq_byteen, reqq_addr, reqq_atype, reqq_data, reqq_tag}), + .data_in ({core_req_rw, core_req_mask, core_req_byteen, core_req_addr, core_req_flags, core_req_data, reqq_tag_u}), + .data_out ({reqq_rw, reqq_mask, reqq_byteen, reqq_addr, reqq_flags, reqq_data, reqq_tag}), .valid_out(reqq_valid), .ready_out(reqq_ready) ); @@ -231,7 +231,7 @@ module VX_mem_scheduler #( .DATA_IN_SIZE (WORD_SIZE), .DATA_OUT_SIZE (LINE_SIZE), .ADDR_WIDTH (ADDR_WIDTH), - .ATYPE_WIDTH (ATYPE_WIDTH), + .FLAGS_WIDTH (FLAGS_WIDTH), .TAG_WIDTH (REQQ_TAG_WIDTH), .UUID_WIDTH (UUID_WIDTH), .QUEUE_SIZE (MEM_QUEUE_SIZE) @@ -245,7 +245,7 @@ module VX_mem_scheduler #( .in_req_rw (reqq_rw), .in_req_byteen (reqq_byteen), .in_req_addr (reqq_addr), - .in_req_atype (reqq_atype), + .in_req_flags (reqq_flags), .in_req_data (reqq_data), .in_req_tag (reqq_tag), .in_req_ready (reqq_ready), @@ -263,7 +263,7 @@ module VX_mem_scheduler #( .out_req_rw (reqq_rw_s), .out_req_byteen (reqq_byteen_s), .out_req_addr (reqq_addr_s), - .out_req_atype (reqq_atype_s), + .out_req_flags (reqq_flags_s), .out_req_data (reqq_data_s), .out_req_tag (reqq_tag_s), .out_req_ready (reqq_ready_s), @@ -283,7 +283,7 @@ module VX_mem_scheduler #( assign reqq_rw_s = reqq_rw; assign reqq_byteen_s= reqq_byteen; assign reqq_addr_s = reqq_addr; - assign reqq_atype_s = reqq_atype; + assign reqq_flags_s = reqq_flags; assign reqq_data_s = reqq_data; assign reqq_tag_s = reqq_tag; assign reqq_ready = reqq_ready_s; @@ -301,7 +301,7 @@ module VX_mem_scheduler #( wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b; - wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype_b; + wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b; wire [BATCH_SEL_WIDTH-1:0] req_batch_idx; @@ -313,13 +313,13 @@ module VX_mem_scheduler #( assign mem_req_mask_b[i][j] = reqq_mask_s[r]; assign mem_req_byteen_b[i][j] = reqq_byteen_s[r]; assign mem_req_addr_b[i][j] = reqq_addr_s[r]; - assign mem_req_atype_b[i][j] = reqq_atype_s[r]; + assign mem_req_flags_b[i][j] = reqq_flags_s[r]; assign mem_req_data_b[i][j] = reqq_data_s[r]; end else begin assign mem_req_mask_b[i][j] = 0; assign mem_req_byteen_b[i][j] = '0; assign mem_req_addr_b[i][j] = '0; - assign mem_req_atype_b[i][j] = '0; + assign mem_req_flags_b[i][j] = '0; assign mem_req_data_b[i][j] = '0; end end @@ -329,7 +329,7 @@ module VX_mem_scheduler #( assign mem_req_rw_s = reqq_rw_s; assign mem_req_byteen_s = mem_req_byteen_b[req_batch_idx]; assign mem_req_addr_s = mem_req_addr_b[req_batch_idx]; - assign mem_req_atype_s = mem_req_atype_b[req_batch_idx]; + assign mem_req_flags_s = mem_req_flags_b[req_batch_idx]; assign mem_req_data_s = mem_req_data_b[req_batch_idx]; if (MEM_BATCHES != 1) begin @@ -390,7 +390,7 @@ module VX_mem_scheduler #( assign reqq_ready_s = req_sent_all; VX_elastic_buffer #( - .DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + ATYPE_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH), + .DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + FLAGS_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH), .SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) ) mem_req_buf ( @@ -398,8 +398,8 @@ module VX_mem_scheduler #( .reset (reset), .valid_in (mem_req_valid_s), .ready_in (mem_req_ready_s), - .data_in ({mem_req_mask_s, mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_atype_s, mem_req_data_s, mem_req_tag_s}), - .data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_atype, mem_req_data, mem_req_tag}), + .data_in ({mem_req_mask_s, mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_flags_s, mem_req_data_s, mem_req_tag_s}), + .data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_flags, mem_req_data, mem_req_tag}), .valid_out (mem_req_valid), .ready_out (mem_req_ready) ); diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 3dce0ec43..5d095b083 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -80,7 +80,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][BANK_ADDR_WIDTH-1:0] req_bank_addr; for (genvar i = 0; i < NUM_REQS; ++i) begin assign req_bank_addr[i] = mem_bus_if[i].req_data.addr[BANK_SEL_BITS +: BANK_ADDR_WIDTH]; - `UNUSED_VAR (mem_bus_if[i].req_data.atype) + `UNUSED_VAR (mem_bus_if[i].req_data.flags) end // bank requests dispatch diff --git a/hw/rtl/mem/VX_local_mem_top.sv b/hw/rtl/mem/VX_local_mem_top.sv index e576d32ec..d1cac7ebf 100644 --- a/hw/rtl/mem/VX_local_mem_top.sv +++ b/hw/rtl/mem/VX_local_mem_top.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,10 +17,10 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "", // Size of cache in bytes - parameter SIZE = (1024*16*8), - + parameter SIZE = (1024*16*8), + // Number of Word requests per cycle - parameter NUM_REQS = 4, + parameter NUM_REQS = 4, // Number of banks parameter NUM_BANKS = 4, @@ -34,7 +34,7 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( // Request tag size parameter TAG_WIDTH = 16 - ) ( + ) ( input wire clk, input wire reset, @@ -43,7 +43,7 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( input wire [NUM_REQS-1:0] mem_req_rw, input wire [NUM_REQS-1:0][WORD_SIZE-1:0] mem_req_byteen, input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] mem_req_addr, - input wire [NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_atype, + input wire [NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_flags, input wire [NUM_REQS-1:0][WORD_SIZE*8-1:0] mem_req_data, input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] mem_req_tag, output wire [NUM_REQS-1:0] mem_req_ready, @@ -65,7 +65,7 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( assign mem_bus_if[i].req_data.rw = mem_req_rw[i]; assign mem_bus_if[i].req_data.byteen = mem_req_byteen[i]; assign mem_bus_if[i].req_data.addr = mem_req_addr[i]; - assign mem_bus_if[i].req_data.atype = mem_req_atype[i]; + assign mem_bus_if[i].req_data.flags = mem_req_flags[i]; assign mem_bus_if[i].req_data.data = mem_req_data[i]; assign mem_bus_if[i].req_data.tag = mem_req_tag[i]; assign mem_req_ready[i] = mem_bus_if[i].req_ready; @@ -86,9 +86,9 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), .ADDR_WIDTH (ADDR_WIDTH), - .UUID_WIDTH (UUID_WIDTH), + .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (TAG_WIDTH) - ) local_mem ( + ) local_mem ( .clk (clk), .reset (reset), .mem_bus_if (mem_bus_if) diff --git a/hw/rtl/mem/VX_mem_arb.sv b/hw/rtl/mem/VX_mem_arb.sv index ef51e2387..f45a7ea75 100644 --- a/hw/rtl/mem/VX_mem_arb.sv +++ b/hw/rtl/mem/VX_mem_arb.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,14 +13,14 @@ `include "VX_define.vh" -module VX_mem_arb #( - parameter NUM_INPUTS = 1, +module VX_mem_arb #( + parameter NUM_INPUTS = 1, parameter NUM_OUTPUTS = 1, parameter DATA_SIZE = 1, - parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, + parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, parameter ADDR_WIDTH = (MEM_ADDR_WIDTH-`CLOG2(DATA_SIZE)), - parameter TAG_WIDTH = 1, - parameter TAG_SEL_IDX = 0, + parameter TAG_WIDTH = 1, + parameter TAG_SEL_IDX = 0, parameter REQ_OUT_BUF = 0, parameter RSP_OUT_BUF = 0, parameter `STRING ARBITER = "R" @@ -30,10 +30,10 @@ module VX_mem_arb #( VX_mem_bus_if.slave bus_in_if [NUM_INPUTS], VX_mem_bus_if.master bus_out_if [NUM_OUTPUTS] -); +); localparam DATA_WIDTH = (8 * DATA_SIZE); localparam LOG_NUM_REQS = `ARB_SEL_BITS(NUM_INPUTS, NUM_OUTPUTS); - localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `ADDR_TYPE_WIDTH + 1 + DATA_SIZE + DATA_WIDTH; + localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + 1 + DATA_SIZE + DATA_WIDTH; localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH; `STATIC_ASSERT ((NUM_INPUTS >= NUM_OUTPUTS), ("invalid parameter")) @@ -53,14 +53,14 @@ module VX_mem_arb #( bus_in_if[i].req_data.rw, bus_in_if[i].req_data.byteen, bus_in_if[i].req_data.addr, - bus_in_if[i].req_data.atype, + bus_in_if[i].req_data.flags, bus_in_if[i].req_data.data, bus_in_if[i].req_data.tag }; assign bus_in_if[i].req_ready = req_ready_in[i]; end - VX_stream_arb #( + VX_stream_arb #( .NUM_INPUTS (NUM_INPUTS), .NUM_OUTPUTS (NUM_OUTPUTS), .DATAW (REQ_DATAW), @@ -80,7 +80,7 @@ module VX_mem_arb #( for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin wire [TAG_WIDTH-1:0] req_tag_out; - VX_bits_insert #( + VX_bits_insert #( .N (TAG_WIDTH), .S (LOG_NUM_REQS), .POS (TAG_SEL_IDX) @@ -94,8 +94,8 @@ module VX_mem_arb #( bus_out_if[i].req_data.rw, bus_out_if[i].req_data.byteen, bus_out_if[i].req_data.addr, - bus_out_if[i].req_data.atype, - bus_out_if[i].req_data.data, + bus_out_if[i].req_data.flags, + bus_out_if[i].req_data.data, req_tag_out } = req_data_out[i]; assign req_ready_out[i] = bus_out_if[i].req_ready; @@ -117,7 +117,7 @@ module VX_mem_arb #( for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin wire [TAG_WIDTH-1:0] rsp_tag_out; - VX_bits_remove #( + VX_bits_remove #( .N (TAG_WIDTH + LOG_NUM_REQS), .S (LOG_NUM_REQS), .POS (TAG_SEL_IDX) @@ -135,7 +135,7 @@ module VX_mem_arb #( end else begin assign rsp_sel_in[i] = '0; end - end + end VX_stream_switch #( .NUM_INPUTS (NUM_OUTPUTS), @@ -155,11 +155,11 @@ module VX_mem_arb #( ); end else begin - + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin assign rsp_valid_in[i] = bus_out_if[i].rsp_valid; assign rsp_data_in[i] = { - bus_out_if[i].rsp_data.tag, + bus_out_if[i].rsp_data.tag, bus_out_if[i].rsp_data.data }; assign bus_out_if[i].rsp_ready = rsp_ready_in[i]; @@ -184,11 +184,11 @@ module VX_mem_arb #( ); end - + for (genvar i = 0; i < NUM_INPUTS; ++i) begin assign bus_in_if[i].rsp_valid = rsp_valid_out[i]; assign { - bus_in_if[i].rsp_data.tag, + bus_in_if[i].rsp_data.tag, bus_in_if[i].rsp_data.data } = rsp_data_out[i]; assign rsp_ready_out[i] = bus_in_if[i].rsp_ready; diff --git a/hw/rtl/mem/VX_mem_bus_if.sv b/hw/rtl/mem/VX_mem_bus_if.sv index 1b7fca777..5f341904c 100644 --- a/hw/rtl/mem/VX_mem_bus_if.sv +++ b/hw/rtl/mem/VX_mem_bus_if.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,7 +15,7 @@ interface VX_mem_bus_if #( parameter DATA_SIZE = 1, - parameter ATYPE_WIDTH= `ADDR_TYPE_WIDTH, + parameter FLAGS_WIDTH= `MEM_REQ_FLAGS_WIDTH, parameter TAG_WIDTH = 1, parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, parameter ADDR_WIDTH = MEM_ADDR_WIDTH - `CLOG2(DATA_SIZE) @@ -25,7 +25,7 @@ interface VX_mem_bus_if #( logic rw; logic [DATA_SIZE-1:0] byteen; logic [ADDR_WIDTH-1:0] addr; - logic [ATYPE_WIDTH-1:0] atype; + logic [FLAGS_WIDTH-1:0] flags; logic [DATA_SIZE*8-1:0] data; logic [TAG_WIDTH-1:0] tag; } req_data_t; diff --git a/hw/rtl/mem/VX_mem_switch.sv b/hw/rtl/mem/VX_mem_switch.sv index fd26c2aa8..21ec7278a 100644 --- a/hw/rtl/mem/VX_mem_switch.sv +++ b/hw/rtl/mem/VX_mem_switch.sv @@ -31,7 +31,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #( VX_mem_bus_if.master bus_out_if [NUM_REQS] ); localparam DATA_WIDTH = (8 * DATA_SIZE); - localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `ADDR_TYPE_WIDTH + 1 + DATA_SIZE + DATA_WIDTH; + localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + 1 + DATA_SIZE + DATA_WIDTH; localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH; // handle requests //////////////////////////////////////////////////////// From 0d7012e69e36b0b144fa44f231351a1e8a6784b0 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 6 Aug 2024 21:27:08 -0700 Subject: [PATCH 069/488] minor update --- hw/rtl/core/VX_lmem_unit.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv index 74da1e114..d93befda7 100644 --- a/hw/rtl/core/VX_lmem_unit.sv +++ b/hw/rtl/core/VX_lmem_unit.sv @@ -45,7 +45,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin - assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.flags[j][`MEM_REQ_FLAGE_LOCAL]; + assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.flags[j][`MEM_REQ_FLAG_LOCAL]; end wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask); From 30ebb65fc336ac59323791f35a9e43a071e8d62e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 6 Aug 2024 23:36:37 -0700 Subject: [PATCH 070/488] minor update --- hw/rtl/core/VX_dispatch.sv | 3 +- hw/rtl/core/VX_operands.sv | 6 +-- hw/rtl/libs/VX_elastic_buffer.sv | 40 +------------- hw/rtl/libs/VX_pipe_register.sv | 93 ++++++++++++-------------------- 4 files changed, 39 insertions(+), 103 deletions(-) diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index 96c947d1e..8ea3a6125 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -61,8 +61,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( .DATAW (DATAW), .SIZE (2), .OUT_REG (2), // 2-cycle EB for area reduction - .LUTRAM (1), - .MAX_FANOUT (`MAX_FANOUT * 64) + .LUTRAM (1) ) buffer ( .clk (clk), .reset (buffer_reset), diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index bd0d122eb..e3df0c1fa 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -183,8 +183,7 @@ module VX_operands import VX_gpu_pkg::*; #( VX_pipe_register #( .DATAW (1 + NUM_SRC_REGS * REGS_DATAW + NUM_BANKS + NUM_BANKS * REGS_DATAW + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH), - .RESETW (1 + NUM_SRC_REGS * REGS_DATAW), - .MAX_FANOUT (`MAX_FANOUT * 64) + .RESETW (1 + NUM_SRC_REGS * REGS_DATAW) ) pipe_reg2 ( .clk (clk), .reset (pipe2_reset), @@ -206,8 +205,7 @@ module VX_operands import VX_gpu_pkg::*; #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (1), - .MAX_FANOUT (`MAX_FANOUT * 64) + .LUTRAM (1) ) out_buf ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_elastic_buffer.sv b/hw/rtl/libs/VX_elastic_buffer.sv index 9213572d3..ee6f31b58 100644 --- a/hw/rtl/libs/VX_elastic_buffer.sv +++ b/hw/rtl/libs/VX_elastic_buffer.sv @@ -18,8 +18,7 @@ module VX_elastic_buffer #( parameter DATAW = 1, parameter SIZE = 1, parameter OUT_REG = 0, - parameter LUTRAM = 0, - parameter MAX_FANOUT = 0 + parameter LUTRAM = 0 ) ( input wire clk, input wire reset, @@ -41,43 +40,6 @@ module VX_elastic_buffer #( assign data_out = data_in; assign ready_in = ready_out; - end else if (MAX_FANOUT != 0 && (DATAW > (MAX_FANOUT + MAX_FANOUT/2))) begin - - localparam NUM_SLICES = `CDIV(DATAW, MAX_FANOUT); - localparam N_DATAW = DATAW / NUM_SLICES; - - for (genvar i = 0; i < NUM_SLICES; ++i) begin - - localparam S_DATAW = (i == NUM_SLICES-1) ? (DATAW - i * N_DATAW) : N_DATAW; - - wire valid_out_t, ready_in_t; - `UNUSED_VAR (valid_out_t) - `UNUSED_VAR (ready_in_t) - - `RESET_RELAY (slice_reset, reset); - - VX_elastic_buffer #( - .DATAW (S_DATAW), - .SIZE (SIZE), - .OUT_REG (OUT_REG), - .LUTRAM (LUTRAM) - ) buffer_slice ( - .clk (clk), - .reset (slice_reset), - .valid_in (valid_in), - .data_in (data_in[i * N_DATAW +: S_DATAW]), - .ready_in (ready_in_t), - .valid_out (valid_out_t), - .data_out (data_out[i * N_DATAW +: S_DATAW]), - .ready_out (ready_out) - ); - - if (i == 0) begin - assign ready_in = ready_in_t; - assign valid_out = valid_out_t; - end - end - end else if (SIZE == 1) begin VX_pipe_buffer #( diff --git a/hw/rtl/libs/VX_pipe_register.sv b/hw/rtl/libs/VX_pipe_register.sv index 707438abd..2c1cddfd6 100644 --- a/hw/rtl/libs/VX_pipe_register.sv +++ b/hw/rtl/libs/VX_pipe_register.sv @@ -17,8 +17,7 @@ module VX_pipe_register #( parameter DATAW = 1, parameter RESETW = 0, - parameter DEPTH = 1, - parameter MAX_FANOUT = 0 + parameter DEPTH = 1 ) ( input wire clk, input wire reset, @@ -32,67 +31,45 @@ module VX_pipe_register #( `UNUSED_VAR (enable) assign data_out = data_in; end else if (DEPTH == 1) begin - if (MAX_FANOUT != 0 && (DATAW > (MAX_FANOUT + MAX_FANOUT/2))) begin - localparam NUM_SLICES = `CDIV(DATAW, MAX_FANOUT); - localparam N_DATAW = DATAW / NUM_SLICES; - for (genvar i = 0; i < NUM_SLICES; ++i) begin - localparam SLICE_START = i * N_DATAW; - localparam SLICE_END = SLICE_START + S_DATAW - 1; - localparam S_DATAW = (i == NUM_SLICES-1) ? (DATAW - SLICE_START) : N_DATAW; - localparam S_RESETW = (SLICE_END >= (DATAW - RESETW)) ? - ((SLICE_START >= (DATAW - RESETW)) ? S_DATAW : (SLICE_END - (DATAW - RESETW) + 1)) : 0; - VX_pipe_register #( - .DATAW (S_DATAW), - .RESETW (S_RESETW) - ) pipe_register_slice ( - .clk (clk), - .reset (reset), - .enable (enable), - .data_in (data_in[i * N_DATAW +: S_DATAW]), - .data_out (data_out[i * N_DATAW +: S_DATAW]) - ); + if (RESETW == 0) begin + `UNUSED_VAR (reset) + reg [DATAW-1:0] value; + + always @(posedge clk) begin + if (enable) begin + value <= data_in; + end end + assign data_out = value; + end else if (RESETW == DATAW) begin + reg [DATAW-1:0] value; + + always @(posedge clk) begin + if (reset) begin + value <= RESETW'(0); + end else if (enable) begin + value <= data_in; + end + end + assign data_out = value; end else begin - if (RESETW == 0) begin - `UNUSED_VAR (reset) - reg [DATAW-1:0] value; + reg [DATAW-RESETW-1:0] value_d; + reg [RESETW-1:0] value_r; - always @(posedge clk) begin - if (enable) begin - value <= data_in; - end + always @(posedge clk) begin + if (reset) begin + value_r <= RESETW'(0); + end else if (enable) begin + value_r <= data_in[DATAW-1:DATAW-RESETW]; end - assign data_out = value; - end else if (RESETW == DATAW) begin - reg [DATAW-1:0] value; - - always @(posedge clk) begin - if (reset) begin - value <= RESETW'(0); - end else if (enable) begin - value <= data_in; - end - end - assign data_out = value; - end else begin - reg [DATAW-RESETW-1:0] value_d; - reg [RESETW-1:0] value_r; - - always @(posedge clk) begin - if (reset) begin - value_r <= RESETW'(0); - end else if (enable) begin - value_r <= data_in[DATAW-1:DATAW-RESETW]; - end - end - - always @(posedge clk) begin - if (enable) begin - value_d <= data_in[DATAW-RESETW-1:0]; - end - end - assign data_out = {value_r, value_d}; end + + always @(posedge clk) begin + if (enable) begin + value_d <= data_in[DATAW-RESETW-1:0]; + end + end + assign data_out = {value_r, value_d}; end end else begin wire [DEPTH:0][DATAW-1:0] data_delayed; From f1e79f4c0f8e4d140e2b88ee9371f07fbf251472 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 7 Aug 2024 19:44:04 -0700 Subject: [PATCH 071/488] fixed toolchain install on centos/7 --- ci/toolchain_install.sh.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/toolchain_install.sh.in b/ci/toolchain_install.sh.in index 935568ff0..73e27eb55 100755 --- a/ci/toolchain_install.sh.in +++ b/ci/toolchain_install.sh.in @@ -23,7 +23,7 @@ OSVERSION=${OSVERSION:=@OSVERSION@} riscv32() { case $OSVERSION in - "centos/7") parts=$(eval echo {a..h}) ;; + "centos/7") parts=$(eval echo {a..l}) ;; "ubuntu/focal") parts=$(eval echo {a..k}) ;; *) parts=$(eval echo {a..j}) ;; esac @@ -41,7 +41,7 @@ riscv32() riscv64() { case $OSVERSION in - "centos/7") parts=$(eval echo {a..h}) ;; + "centos/7") parts=$(eval echo {a..l}) ;; *) parts=$(eval echo {a..j}) ;; esac rm -f riscv64-gnu-toolchain.tar.bz2.parta* From ab21f76aed67afd251bfc3b6d9b960170d738454 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 7 Aug 2024 19:44:24 -0700 Subject: [PATCH 072/488] minor update --- tests/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/Makefile b/tests/Makefile index b141fd41d..1068da2ab 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -13,9 +13,7 @@ regression: $(MAKE) -C regression opencl: -ifneq ($(XLEN),64) $(MAKE) -C opencl -endif riscv: $(MAKE) -C riscv From 455fc8389c029b741c041b5245b9d6feb006a417 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 9 Aug 2024 13:58:19 -0700 Subject: [PATCH 073/488] refactoring priority encoder --- hw/rtl/libs/VX_mem_coalescer.sv | 14 +++++--- hw/rtl/libs/VX_priority_arbiter.sv | 8 ++--- hw/rtl/libs/VX_priority_encoder.sv | 54 +++++++++++++++--------------- 3 files changed, 40 insertions(+), 36 deletions(-) diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index db36ac781..dbc53336b 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -125,16 +125,20 @@ module VX_mem_coalescer #( end for (genvar i = 0; i < OUT_REQS; ++i) begin - wire [DATA_RATIO-1:0] batch_mask = in_req_mask[i * DATA_RATIO +: DATA_RATIO] & ~processed_mask_r[i * DATA_RATIO +: DATA_RATIO]; + wire [DATA_RATIO-1:0] batch_mask; wire [DATA_RATIO_W-1:0] batch_idx; + + assign batch_mask = in_req_mask[i * DATA_RATIO +: DATA_RATIO] & ~processed_mask_r[i * DATA_RATIO +: DATA_RATIO]; + VX_priority_encoder #( .N (DATA_RATIO) ) priority_encoder ( - .data_in (batch_mask), - .index (batch_idx), - `UNUSED_PIN (onehot), - .valid_out (batch_valid_n[i]) + .data_in (batch_mask), + .index_out (batch_idx), + `UNUSED_PIN (onehot_out), + .valid_out (batch_valid_n[i]) ); + if (OUT_REQS > 1) begin assign seed_idx[i] = {(NUM_REQS_W-DATA_RATIO_W)'(i), batch_idx}; end else begin diff --git a/hw/rtl/libs/VX_priority_arbiter.sv b/hw/rtl/libs/VX_priority_arbiter.sv index cd4844d25..13a940178 100644 --- a/hw/rtl/libs/VX_priority_arbiter.sv +++ b/hw/rtl/libs/VX_priority_arbiter.sv @@ -34,10 +34,10 @@ module VX_priority_arbiter #( VX_priority_encoder #( .N (NUM_REQS) ) priority_encoder ( - .data_in (requests), - .index (grant_index), - .onehot (grant_onehot), - .valid_out (grant_valid) + .data_in (requests), + .index_out (grant_index), + .onehot_out (grant_onehot), + .valid_out (grant_valid) ); end diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index 5a08e3412..8bba538b1 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,38 +14,38 @@ `include "VX_platform.vh" `TRACING_OFF -module VX_priority_encoder #( - parameter N = 1, +module VX_priority_encoder #( + parameter N = 1, parameter REVERSE = 0, parameter MODEL = 1, parameter LN = `LOG2UP(N) ) ( - input wire [N-1:0] data_in, - output wire [N-1:0] onehot, - output wire [LN-1:0] index, + input wire [N-1:0] data_in, + output wire [N-1:0] onehot_out, + output wire [LN-1:0] index_out, output wire valid_out ); - wire [N-1:0] reversed; + wire [N-1:0] reversed; if (REVERSE != 0) begin for (genvar i = 0; i < N; ++i) begin assign reversed[N-i-1] = data_in[i]; - end + end end else begin assign reversed = data_in; end if (N == 1) begin - assign onehot = reversed; - assign index = '0; - assign valid_out = reversed; + assign onehot_out = reversed; + assign index_out = '0; + assign valid_out = reversed; end else if (N == 2) begin - assign onehot = {~reversed[0], reversed[0]}; - assign index = ~reversed[0]; - assign valid_out = (| reversed); + assign onehot_out = {~reversed[0], reversed[0]}; + assign index_out = ~reversed[0]; + assign valid_out = (| reversed); end else if (MODEL == 1) begin @@ -64,12 +64,12 @@ module VX_priority_encoder #( .REVERSE (1) ) lzc ( .data_in (reversed), - .data_out (index), + .data_out (index_out), `UNUSED_PIN (valid_out) ); - assign onehot = scan_lo & {(~scan_lo[N-2:0]), 1'b1}; - assign valid_out = scan_lo[N-1]; + assign onehot_out = scan_lo & {(~scan_lo[N-2:0]), 1'b1}; + assign valid_out = scan_lo[N-1]; end else if (MODEL == 2) begin @@ -78,27 +78,27 @@ module VX_priority_encoder #( `IGNORE_WARNINGS_END assign higher_pri_regs[N-1:1] = higher_pri_regs[N-2:0] | reversed[N-2:0]; assign higher_pri_regs[0] = 1'b0; - assign onehot[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0]; + assign onehot_out[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0]; VX_lzc #( .N (N), .REVERSE (1) ) lzc ( .data_in (reversed), - .data_out (index), + .data_out (index_out), .valid_out (valid_out) ); end else if (MODEL == 3) begin - assign onehot = reversed & -reversed; + assign onehot_out = reversed & -reversed; VX_lzc #( .N (N), .REVERSE (1) ) lzc ( .data_in (reversed), - .data_out (index), + .data_out (index_out), .valid_out (valid_out) ); @@ -117,13 +117,13 @@ module VX_priority_encoder #( onehot_r[i] = 1'b1; end end - end + end - assign index = index_r; - assign onehot = onehot_r; - assign valid_out = (| reversed); + assign index_out = index_r; + assign onehot_out = onehot_r; + assign valid_out = (| reversed); - end + end endmodule `TRACING_ON From 42afa2472f9296a680c6752b49a2942182433edc Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 9 Aug 2024 18:11:12 -0700 Subject: [PATCH 074/488] cdiv --- hw/rtl/core/VX_issue.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index 1480e6649..45e414865 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -29,6 +29,7 @@ module VX_issue import VX_gpu_pkg::*; #( VX_writeback_if.slave writeback_if [`ISSUE_WIDTH], VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH] ); + `STATIC_ASSERT (`ISSUE_WIDTH <= `NUM_WARPS, "invalid parameter"); `ifdef PERF_ENABLE issue_perf_t per_issue_perf [`ISSUE_WIDTH]; From 229641441f0ba5afcc47aa84a5cb740ff0922f8d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 9 Aug 2024 18:13:52 -0700 Subject: [PATCH 075/488] adding static assertion --- hw/rtl/core/VX_issue.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index 45e414865..e77a3633a 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -29,7 +29,7 @@ module VX_issue import VX_gpu_pkg::*; #( VX_writeback_if.slave writeback_if [`ISSUE_WIDTH], VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH] ); - `STATIC_ASSERT (`ISSUE_WIDTH <= `NUM_WARPS, "invalid parameter"); + `STATIC_ASSERT ((`ISSUE_WIDTH <= `NUM_WARPS), ("invalid parameter")) `ifdef PERF_ENABLE issue_perf_t per_issue_perf [`ISSUE_WIDTH]; From c8d0357ac655e06f9036e22ace309f8592db795f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 10 Aug 2024 00:37:56 -0700 Subject: [PATCH 076/488] rtl arbiter fixes --- hw/rtl/libs/VX_fair_arbiter.sv | 25 +++++++-------- hw/rtl/libs/VX_matrix_arbiter.sv | 52 ++++++++++++++------------------ hw/rtl/libs/VX_rr_arbiter.sv | 4 +-- 3 files changed, 36 insertions(+), 45 deletions(-) diff --git a/hw/rtl/libs/VX_fair_arbiter.sv b/hw/rtl/libs/VX_fair_arbiter.sv index 82bcfc5c6..d4dba9a3f 100644 --- a/hw/rtl/libs/VX_fair_arbiter.sv +++ b/hw/rtl/libs/VX_fair_arbiter.sv @@ -38,26 +38,27 @@ module VX_fair_arbiter #( end else begin - reg [NUM_REQS-1:0] requests_r; + reg [NUM_REQS-1:0] grant_hist; - wire [NUM_REQS-1:0] requests_sel = requests_r & requests; - wire [NUM_REQS-1:0] requests_qual = (| requests_sel) ? requests_sel : requests; + wire [NUM_REQS-1:0] requests_sel = requests & ~grant_hist; + wire rem_valid = (| requests_sel); + wire [NUM_REQS-1:0] requests_qual = rem_valid ? requests_sel : requests; always @(posedge clk) begin if (reset) begin - requests_r <= '0; + grant_hist <= '0; end else if (grant_ready) begin - requests_r <= requests_qual & ~grant_onehot; + grant_hist <= rem_valid ? (grant_hist | grant_onehot) : grant_onehot; end end - VX_priority_arbiter #( - .NUM_REQS (NUM_REQS) - ) priority_arbiter ( - .requests (requests_qual), - .grant_index (grant_index), - .grant_onehot (grant_onehot), - .grant_valid (grant_valid) + VX_priority_encoder #( + .N (NUM_REQS) + ) priority_enc ( + .data_in (requests_qual), + .index_out (grant_index), + .onehot_out (grant_onehot), + .valid_out (grant_valid) ); end diff --git a/hw/rtl/libs/VX_matrix_arbiter.sv b/hw/rtl/libs/VX_matrix_arbiter.sv index 23f9ea2a0..9f0ead356 100644 --- a/hw/rtl/libs/VX_matrix_arbiter.sv +++ b/hw/rtl/libs/VX_matrix_arbiter.sv @@ -38,57 +38,49 @@ module VX_matrix_arbiter #( end else begin - reg [NUM_REQS-1:1] state [NUM_REQS-1:0]; + reg [NUM_REQS-1:1] state [NUM_REQS-1:0]; wire [NUM_REQS-1:0] pri [NUM_REQS-1:0]; - wire [NUM_REQS-1:0] grant_unqual; + wire [NUM_REQS-1:0] grant; - for (genvar i = 0; i < NUM_REQS; ++i) begin - for (genvar j = 0; j < NUM_REQS; ++j) begin - if (j > i) begin - assign pri[j][i] = requests[i] && state[i][j]; + for (genvar r = 0; r < NUM_REQS; ++r) begin + for (genvar c = 0; c < NUM_REQS; ++c) begin + if (r > c) begin + assign pri[r][c] = requests[c] && state[c][r]; end - else if (j < i) begin - assign pri[j][i] = requests[i] && !state[j][i]; + else if (r < c) begin + assign pri[r][c] = requests[c] && !state[r][c]; end else begin - assign pri[j][i] = 0; + assign pri[r][c] = 0; end end - assign grant_unqual[i] = requests[i] && !(| pri[i]); end - for (genvar i = 0; i < NUM_REQS; ++i) begin - for (genvar j = i + 1; j < NUM_REQS; ++j) begin + for (genvar r = 0; r < NUM_REQS; ++r) begin + assign grant[r] = requests[r] && ~(| pri[r]); + end + + for (genvar r = 0; r < NUM_REQS; ++r) begin + for (genvar c = r + 1; c < NUM_REQS; ++c) begin always @(posedge clk) begin if (reset) begin - state[i][j] <= '0; - end else begin - state[i][j] <= (state[i][j] || grant_unqual[j]) && !grant_unqual[i]; + state[r][c] <= '0; + end else if (grant_ready) begin + state[r][c] <= (state[r][c] || grant[c]) && ~grant[r]; end end end end - reg [NUM_REQS-1:0] grant_unqual_prev; - always @(posedge clk) begin - if (reset) begin - grant_unqual_prev <= '0; - end else if (grant_ready) begin - grant_unqual_prev <= grant_unqual; - end - end - assign grant_onehot = grant_ready ? grant_unqual : grant_unqual_prev; + assign grant_onehot = grant; VX_onehot_encoder #( .N (NUM_REQS) ) encoder ( - .data_in (grant_unqual), - .data_out (grant_index), - `UNUSED_PIN (valid_out) + .data_in (grant_onehot), + .data_out (grant_index), + .valid_out (grant_valid) ); - - assign grant_valid = (| requests); - end endmodule diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 52a981184..adb7c3beb 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -416,14 +416,12 @@ module VX_rr_arbiter #( end end - assign grant_valid = (| requests); - VX_onehot_encoder #( .N (NUM_REQS) ) onehot_encoder ( .data_in (grant_onehot), .data_out (grant_index), - `UNUSED_PIN (valid_out) + .valid_out(grant_valid) ); end else begin From eaa7ed7fe206721111fd03033e19ffcf632f088d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 10 Aug 2024 02:38:54 -0700 Subject: [PATCH 077/488] rtl arbiter update --- hw/rtl/libs/VX_fair_arbiter.sv | 2 +- hw/rtl/libs/VX_rr_arbiter.sv | 71 ++++++++++++++++++---------------- 2 files changed, 38 insertions(+), 35 deletions(-) diff --git a/hw/rtl/libs/VX_fair_arbiter.sv b/hw/rtl/libs/VX_fair_arbiter.sv index d4dba9a3f..430966aee 100644 --- a/hw/rtl/libs/VX_fair_arbiter.sv +++ b/hw/rtl/libs/VX_fair_arbiter.sv @@ -47,7 +47,7 @@ module VX_fair_arbiter #( always @(posedge clk) begin if (reset) begin grant_hist <= '0; - end else if (grant_ready) begin + end else if (grant_valid && grant_ready) begin grant_hist <= rem_valid ? (grant_hist | grant_onehot) : grant_onehot; end end diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index adb7c3beb..60cc4813f 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -377,41 +377,38 @@ module VX_rr_arbiter #( end else if (MODEL == 1) begin `IGNORE_UNOPTFLAT_BEGIN - wire [NUM_REQS-1:0] mask_higher_pri_regs, unmask_higher_pri_regs; + wire [NUM_REQS-1:0] masked_pri_reqs, unmasked_pri_reqs; `IGNORE_UNOPTFLAT_END - wire [NUM_REQS-1:0] grant_masked, grant_unmasked; + reg [NUM_REQS-1:0] reqs_mask; - reg [NUM_REQS-1:0] pointer_reg; + wire [NUM_REQS-1:0] masked_reqs = requests & reqs_mask; - wire [NUM_REQS-1:0] req_masked = requests & pointer_reg; - - assign mask_higher_pri_regs[0] = 1'b0; + assign masked_pri_reqs[0] = 1'b0; for (genvar i = 1; i < NUM_REQS; ++i) begin - assign mask_higher_pri_regs[i] = mask_higher_pri_regs[i-1] | req_masked[i-1]; + assign masked_pri_reqs[i] = masked_pri_reqs[i-1] | masked_reqs[i-1]; end - assign grant_masked[NUM_REQS-1:0] = req_masked[NUM_REQS-1:0] & ~mask_higher_pri_regs[NUM_REQS-1:0]; - - assign unmask_higher_pri_regs[0] = 1'b0; + assign unmasked_pri_reqs[0] = 1'b0; for (genvar i = 1; i < NUM_REQS; ++i) begin - assign unmask_higher_pri_regs[i] = unmask_higher_pri_regs[i-1] | requests[i-1]; + assign unmasked_pri_reqs[i] = unmasked_pri_reqs[i-1] | requests[i-1]; end - assign grant_unmasked[NUM_REQS-1:0] = requests[NUM_REQS-1:0] & ~unmask_higher_pri_regs[NUM_REQS-1:0]; + wire [NUM_REQS-1:0] grant_masked = masked_reqs & ~masked_pri_reqs[NUM_REQS-1:0]; + wire [NUM_REQS-1:0] grant_unmasked = requests & ~unmasked_pri_reqs[NUM_REQS-1:0]; - wire no_req_masked = ~(|req_masked); - assign grant_onehot = ({NUM_REQS{no_req_masked}} & grant_unmasked) | grant_masked; + wire has_masked_reqs = (| masked_reqs); + wire has_unmasked_reqs = (| requests); + + assign grant_onehot = ({NUM_REQS{~has_masked_reqs}} & grant_unmasked) | grant_masked; always @(posedge clk) begin if (reset) begin - pointer_reg <= {NUM_REQS{1'b1}}; + reqs_mask <= {NUM_REQS{1'b1}}; end else if (grant_ready) begin - if (|req_masked) begin - pointer_reg <= mask_higher_pri_regs; - end else if (|requests) begin - pointer_reg <= unmask_higher_pri_regs; - end else begin - pointer_reg <= pointer_reg; + if (has_masked_reqs) begin + reqs_mask <= masked_pri_reqs; + end else if (has_unmasked_reqs) begin + reqs_mask <= unmasked_pri_reqs; end end end @@ -426,35 +423,41 @@ module VX_rr_arbiter #( end else begin - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; - reg [NUM_REQS-1:0] state; + reg grant_valid_r; + reg [LOG_NUM_REQS-1:0] grant_index_r; + reg [NUM_REQS-1:0] grant_onehot_r; + reg [LOG_NUM_REQS-1:0] next_grant_index; + + wire [NUM_REQS-1:0][LOG_NUM_REQS-1:0] next_grant_index_qual; + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign next_grant_index_qual[i] = LOG_NUM_REQS'(i) + next_grant_index; + end always @(*) begin grant_index_r = 'x; grant_onehot_r = 'x; + grant_valid_r = 0; for (integer i = 0; i < NUM_REQS; ++i) begin - for (integer j = 0; j < NUM_REQS; ++j) begin - if (state[i] && requests[(j + 1) % NUM_REQS]) begin - grant_index_r = LOG_NUM_REQS'((j + 1) % NUM_REQS); - grant_onehot_r = '0; - grant_onehot_r[(j + 1) % NUM_REQS] = 1; - end + if (requests[next_grant_index_qual[i]]) begin + grant_valid_r = 1; + grant_index_r = next_grant_index_qual[i]; + grant_onehot_r = NUM_REQS'(1) << next_grant_index_qual[i]; + break; end end end always @(posedge clk) begin if (reset) begin - state <= '0; - end else if (grant_ready) begin - state <= grant_index_r; + next_grant_index <= '0; + end else if (grant_valid && grant_ready) begin + next_grant_index <= grant_index_r + LOG_NUM_REQS'(1); end end assign grant_index = grant_index_r; assign grant_onehot = grant_onehot_r; - assign grant_valid = (| requests); + assign grant_valid = grant_valid_r; end endmodule From 32a882e26fc20afb5047f833526a64c04d7929c3 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 10 Aug 2024 18:41:10 -0700 Subject: [PATCH 078/488] arbiters optimization --- hw/rtl/libs/VX_fair_arbiter.sv | 12 ++++++------ hw/rtl/libs/VX_onehot_mux.sv | 10 ++++++++++ hw/rtl/libs/VX_onehot_shift.sv | 30 ++++++++++++++++++++++++++++++ hw/rtl/libs/VX_rr_arbiter.sv | 26 ++++++++++++-------------- hw/rtl/libs/VX_transpose.sv | 29 +++++++++++++++++++++++++++++ 5 files changed, 87 insertions(+), 20 deletions(-) create mode 100644 hw/rtl/libs/VX_onehot_shift.sv create mode 100644 hw/rtl/libs/VX_transpose.sv diff --git a/hw/rtl/libs/VX_fair_arbiter.sv b/hw/rtl/libs/VX_fair_arbiter.sv index 430966aee..9a6ca8459 100644 --- a/hw/rtl/libs/VX_fair_arbiter.sv +++ b/hw/rtl/libs/VX_fair_arbiter.sv @@ -38,17 +38,17 @@ module VX_fair_arbiter #( end else begin - reg [NUM_REQS-1:0] grant_hist; + reg [NUM_REQS-1:0] reqs_mask; - wire [NUM_REQS-1:0] requests_sel = requests & ~grant_hist; - wire rem_valid = (| requests_sel); - wire [NUM_REQS-1:0] requests_qual = rem_valid ? requests_sel : requests; + wire [NUM_REQS-1:0] requests_rem = requests & reqs_mask; + wire rem_valid = (| requests_rem); + wire [NUM_REQS-1:0] requests_qual = rem_valid ? requests_rem : requests; always @(posedge clk) begin if (reset) begin - grant_hist <= '0; + reqs_mask <= '1; end else if (grant_valid && grant_ready) begin - grant_hist <= rem_valid ? (grant_hist | grant_onehot) : grant_onehot; + reqs_mask <= rem_valid ? (reqs_mask & ~grant_onehot) : ~grant_onehot; end end diff --git a/hw/rtl/libs/VX_onehot_mux.sv b/hw/rtl/libs/VX_onehot_mux.sv index cc0fffaa6..74e19a41b 100644 --- a/hw/rtl/libs/VX_onehot_mux.sv +++ b/hw/rtl/libs/VX_onehot_mux.sv @@ -124,6 +124,16 @@ module VX_onehot_mux #( assign data_out[i] = (| gather); end end else if (MODEL == 2) begin + VX_find_first #( + .N (N), + .DATAW (DATAW) + ) find_first ( + .valid_in (sel_in), + .data_in (data_in), + .data_out (data_out), + `UNUSED_PIN (valid_out) + ); + end else if (MODEL == 3) begin reg [DATAW-1:0] data_out_r; always @(*) begin data_out_r = 'x; diff --git a/hw/rtl/libs/VX_onehot_shift.sv b/hw/rtl/libs/VX_onehot_shift.sv new file mode 100644 index 000000000..950d1f380 --- /dev/null +++ b/hw/rtl/libs/VX_onehot_shift.sv @@ -0,0 +1,30 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +module VX_onehot_shift #( + parameter N = 1, + parameter M = 1 +) ( + input wire [N-1:0] data_in0, + input wire [M-1:0] data_in1, + output wire [N*M-1:0] data_out +); + for (genvar i = 0; i < M; ++i) begin + for (genvar j = 0; j < N; ++j) begin + assign data_out[i*N + j] = data_in1[i] & data_in0[j]; + end + end + +endmodule diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 60cc4813f..d9f5b767f 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -426,32 +426,30 @@ module VX_rr_arbiter #( reg grant_valid_r; reg [LOG_NUM_REQS-1:0] grant_index_r; reg [NUM_REQS-1:0] grant_onehot_r; - reg [LOG_NUM_REQS-1:0] next_grant_index; - - wire [NUM_REQS-1:0][LOG_NUM_REQS-1:0] next_grant_index_qual; - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign next_grant_index_qual[i] = LOG_NUM_REQS'(i) + next_grant_index; - end + reg [NUM_REQS-1:0][LOG_NUM_REQS-1:0] next_grant_index; always @(*) begin grant_index_r = 'x; grant_onehot_r = 'x; grant_valid_r = 0; - for (integer i = 0; i < NUM_REQS; ++i) begin - if (requests[next_grant_index_qual[i]]) begin - grant_valid_r = 1; - grant_index_r = next_grant_index_qual[i]; - grant_onehot_r = NUM_REQS'(1) << next_grant_index_qual[i]; - break; + for (integer i = NUM_REQS-1; i >= 0; --i) begin + if (requests[next_grant_index[i]]) begin + grant_valid_r = 1; + grant_index_r = next_grant_index[i]; + grant_onehot_r = NUM_REQS'(1) << next_grant_index[i]; end end end always @(posedge clk) begin if (reset) begin - next_grant_index <= '0; + for (integer i = 0; i < NUM_REQS; ++i) begin + next_grant_index[i] <= LOG_NUM_REQS'(i); + end end else if (grant_valid && grant_ready) begin - next_grant_index <= grant_index_r + LOG_NUM_REQS'(1); + for (integer i = 0; i < NUM_REQS; ++i) begin + next_grant_index[i] <= grant_index_r + LOG_NUM_REQS'(i + 1); + end end end diff --git a/hw/rtl/libs/VX_transpose.sv b/hw/rtl/libs/VX_transpose.sv new file mode 100644 index 000000000..93a8c1683 --- /dev/null +++ b/hw/rtl/libs/VX_transpose.sv @@ -0,0 +1,29 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +module VX_transpose #( + parameter N = 1, + parameter M = 1 +) ( + input wire [N-1:0][M-1:0] data_in, + output wire [M-1:0][N-1:0] data_out +); + for (genvar i = 0; i < N; ++i) begin + for (genvar j = 0; j < M; ++j) begin + assign data_out[j][i] = data_in[i][j]; + end + end + +endmodule From 8fb73b6da7e5bc0824d6e69a91bf4687c9187614 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 10 Aug 2024 22:11:49 -0700 Subject: [PATCH 079/488] fair arbiter optimization --- hw/rtl/libs/VX_fair_arbiter.sv | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/hw/rtl/libs/VX_fair_arbiter.sv b/hw/rtl/libs/VX_fair_arbiter.sv index 9a6ca8459..3503ea21e 100644 --- a/hw/rtl/libs/VX_fair_arbiter.sv +++ b/hw/rtl/libs/VX_fair_arbiter.sv @@ -40,15 +40,14 @@ module VX_fair_arbiter #( reg [NUM_REQS-1:0] reqs_mask; - wire [NUM_REQS-1:0] requests_rem = requests & reqs_mask; - wire rem_valid = (| requests_rem); - wire [NUM_REQS-1:0] requests_qual = rem_valid ? requests_rem : requests; + wire [NUM_REQS-1:0] masked_reqs = requests & reqs_mask; + wire [NUM_REQS-1:0] requests_qual = (| masked_reqs) ? masked_reqs : requests; always @(posedge clk) begin if (reset) begin reqs_mask <= '1; end else if (grant_valid && grant_ready) begin - reqs_mask <= rem_valid ? (reqs_mask & ~grant_onehot) : ~grant_onehot; + reqs_mask <= (| reqs_mask) ? (reqs_mask & ~grant_onehot) : ~grant_onehot; end end From c94c3651ec71a3e2e1d8ff8485da01c1f85da33d Mon Sep 17 00:00:00 2001 From: sij814 Date: Sun, 11 Aug 2024 14:47:43 -0700 Subject: [PATCH 080/488] configure change 22.04 --- configure | 1 + 1 file changed, 1 insertion(+) diff --git a/configure b/configure index 62975784b..cab5142c5 100755 --- a/configure +++ b/configure @@ -26,6 +26,7 @@ detect_osversion() { case "$VERSION_CODENAME" in bionic) osversion="ubuntu/bionic";; focal) osversion="ubuntu/focal";; + jammy) osversion="ubuntu/focal";; # Add new versions as needed esac ;; From 1fb0691bc74e0909413908cf1b5cfe262c9c3514 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 11 Aug 2024 19:50:31 -0700 Subject: [PATCH 081/488] minor update --- hw/rtl/mem/VX_local_mem_top.sv | 3 ++- hw/syn/xilinx/xrt/Makefile | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/rtl/mem/VX_local_mem_top.sv b/hw/rtl/mem/VX_local_mem_top.sv index d1cac7ebf..5f9b17da0 100644 --- a/hw/rtl/mem/VX_local_mem_top.sv +++ b/hw/rtl/mem/VX_local_mem_top.sv @@ -87,7 +87,8 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( .WORD_SIZE (WORD_SIZE), .ADDR_WIDTH (ADDR_WIDTH), .UUID_WIDTH (UUID_WIDTH), - .TAG_WIDTH (TAG_WIDTH) + .TAG_WIDTH (TAG_WIDTH), + .OUT_BUF (3) ) local_mem ( .clk (clk), .reset (reset), diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 38ae29f36..e1acce8d6 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -4,7 +4,7 @@ include $(ROOT_DIR)/config.mk ifneq ($(findstring Makefile, $(MAKEFILE_LIST)), Makefile) help: $(ECHO) "Makefile Usage:" - $(ECHO) " make all TARGET= PLATFORM=" + $(ECHO) " make all TARGET= PLATFORM=" $(ECHO) " Command to generate the design for specified Target and Device." $(ECHO) "" $(ECHO) " make clean" From 6f3add273dacf5a49843ec34264a4a64856b4239 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 11 Aug 2024 20:28:39 -0700 Subject: [PATCH 082/488] elastic buffer lutram refactoring --- hw/rtl/VX_platform.vh | 10 ++++++++-- hw/rtl/core/VX_dispatch.sv | 2 +- hw/rtl/core/VX_operands.sv | 5 ++--- hw/rtl/core/VX_scoreboard.sv | 3 +-- hw/rtl/libs/VX_elastic_buffer.sv | 30 ++++++++++++++++++++++++------ hw/rtl/libs/VX_stream_arb.sv | 25 +++++++++---------------- hw/rtl/libs/VX_stream_xbar.sv | 11 ++++------- 7 files changed, 49 insertions(+), 37 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 59f5ef0f5..cd0550efa 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -239,10 +239,16 @@ `RESET_RELAY_EX (dst, src, 1, 0) // size(x): 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4-> 2, 5 -> 2 -`define TO_OUT_BUF_SIZE(s) `MIN(s, 2) +`define TO_OUT_BUF_SIZE(s) `MIN(s & 7, 2) // reg(x): 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 2, 5 > 3 -`define TO_OUT_BUF_REG(s) ((s < 2) ? s : (s - 2)) +`define TO_OUT_BUF_REG(s) (((s & 7) < 2) ? (s & 7) : ((s & 7) - 2)) + +// lut(x): (x & 8) != 0 +`define TO_OUT_BUF_LUTRAM(s) ((s & 8) != 0) + +// rbuf(x): (x <= 2) ? 3 : x +`define TO_OUT_RBUF(s) ((s & 8) | `MAX(s & 7, 3)) `define REPEAT(n,f,s) `_REPEAT_``n(f,s) `define _REPEAT_0(f,s) diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index 8ea3a6125..04c3d92bf 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -60,7 +60,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (DATAW), .SIZE (2), - .OUT_REG (2), // 2-cycle EB for area reduction + .OUT_REG (2), // 2-cycle LUT EB for area reduction .LUTRAM (1) ) buffer ( .clk (clk), diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index e3df0c1fa..bdf8d2cdf 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -23,7 +23,7 @@ module VX_operands import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "", parameter NUM_BANKS = 4, - parameter OUT_BUF = 4 // using 2-cycle EB for area reduction + parameter OUT_BUF = 8+4 // using 2-cycle LUT EB for area reduction ) ( input wire clk, input wire reset, @@ -204,8 +204,7 @@ module VX_operands import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), - .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (1) + .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) ) out_buf ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index 9b3a146c6..056e1c165 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -290,8 +290,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( .NUM_INPUTS (PER_ISSUE_WARPS), .DATAW (DATAW), .ARBITER ("F"), - .LUTRAM (1), - .OUT_BUF (4) // using 2-cycle EB for area reduction + .OUT_BUF (8+4) // using 2-cycle LUT EB for area reduction ) out_arb ( .clk (clk), .reset (arb_reset), diff --git a/hw/rtl/libs/VX_elastic_buffer.sv b/hw/rtl/libs/VX_elastic_buffer.sv index ee6f31b58..3bfcdeb9c 100644 --- a/hw/rtl/libs/VX_elastic_buffer.sv +++ b/hw/rtl/libs/VX_elastic_buffer.sv @@ -43,7 +43,8 @@ module VX_elastic_buffer #( end else if (SIZE == 1) begin VX_pipe_buffer #( - .DATAW (DATAW) + .DATAW (DATAW), + .DEPTH (`MAX(OUT_REG, 1)) ) pipe_buffer ( .clk (clk), .reset (reset), @@ -57,16 +58,33 @@ module VX_elastic_buffer #( end else if (SIZE == 2 && LUTRAM == 0) begin - VX_skid_buffer #( + wire valid_out_t; + wire [DATAW-1:0] data_out_t; + wire ready_out_t; + + VX_stream_buffer #( .DATAW (DATAW), - .HALF_BW (OUT_REG == 2), - .OUT_REG (OUT_REG) - ) skid_buffer ( + .OUT_REG (OUT_REG == 1) + ) stream_buffer ( .clk (clk), .reset (reset), .valid_in (valid_in), .data_in (data_in), .ready_in (ready_in), + .valid_out (valid_out_t), + .data_out (data_out_t), + .ready_out (ready_out_t) + ); + + VX_pipe_buffer #( + .DATAW (DATAW), + .DEPTH ((OUT_REG > 1) ? (OUT_REG-1) : 0) + ) out_buf ( + .clk (clk), + .reset (reset), + .valid_in (valid_out_t), + .data_in (data_out_t), + .ready_in (ready_out_t), .valid_out (valid_out), .data_out (data_out), .ready_out (ready_out) @@ -105,7 +123,7 @@ module VX_elastic_buffer #( VX_pipe_buffer #( .DATAW (DATAW), - .DEPTH ((OUT_REG > 0) ? (OUT_REG-1) : 0) + .DEPTH ((OUT_REG > 1) ? (OUT_REG-1) : 0) ) out_buf ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_stream_arb.sv b/hw/rtl/libs/VX_stream_arb.sv index 98fed5859..d5157a8dd 100644 --- a/hw/rtl/libs/VX_stream_arb.sv +++ b/hw/rtl/libs/VX_stream_arb.sv @@ -21,7 +21,6 @@ module VX_stream_arb #( parameter `STRING ARBITER = "R", parameter MAX_FANOUT = `MAX_FANOUT, parameter OUT_BUF = 0, - parameter LUTRAM = 0, parameter NUM_REQS = `CDIV(NUM_INPUTS, NUM_OUTPUTS), parameter LOG_NUM_REQS = `CLOG2(NUM_REQS), parameter NUM_REQS_W = `UP(LOG_NUM_REQS) @@ -58,8 +57,7 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) arb_slice ( .clk (clk), .reset (slice_reset), @@ -103,8 +101,7 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (3), // registered output - .LUTRAM (LUTRAM) + .OUT_BUF (`TO_OUT_RBUF(OUT_BUF)) // to registered output ) fanout_slice_arb ( .clk (clk), .reset (slice_reset), @@ -130,8 +127,7 @@ module VX_stream_arb #( .DATAW (DATAW + LOG_NUM_REQS2), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) fanout_join_arb ( .clk (clk), .reset (reset), @@ -185,7 +181,7 @@ module VX_stream_arb #( .DATAW (LOG_NUM_REQS + DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (LUTRAM) + .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), .reset (reset), @@ -218,8 +214,7 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) arb_slice ( .clk (clk), .reset (slice_reset), @@ -253,8 +248,7 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (3), // registered output - .LUTRAM (LUTRAM) + .OUT_BUF (`TO_OUT_RBUF(OUT_BUF)) // to registered output ) fanout_fork_arb ( .clk (clk), .reset (reset), @@ -281,8 +275,7 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) fanout_slice_arb ( .clk (clk), .reset (slice_reset), @@ -329,7 +322,7 @@ module VX_stream_arb #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (LUTRAM) + .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), .reset (reset), @@ -357,7 +350,7 @@ module VX_stream_arb #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (LUTRAM) + .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), .reset (out_buf_reset[i]), diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index b7bdcbf5e..8cdb9ced6 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -22,7 +22,6 @@ module VX_stream_xbar #( parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS), parameter ARBITER = "R", parameter OUT_BUF = 0, - parameter LUTRAM = 0, parameter MAX_FANOUT = `MAX_FANOUT, parameter PERF_CTR_BITS = `CLOG2(NUM_INPUTS+1) ) ( @@ -67,8 +66,7 @@ module VX_stream_xbar #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) xbar_arb ( .clk (clk), .reset (slice_reset), @@ -96,8 +94,7 @@ module VX_stream_xbar #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) xbar_arb ( .clk (clk), .reset (reset), @@ -133,7 +130,7 @@ module VX_stream_xbar #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (LUTRAM) + .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), .reset (out_buf_reset[i]), @@ -156,7 +153,7 @@ module VX_stream_xbar #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (LUTRAM) + .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), .reset (reset), From de81baaabf79b2816339e232dfa2c5006ea8602a Mon Sep 17 00:00:00 2001 From: sij814 Date: Mon, 12 Aug 2024 02:52:47 -0700 Subject: [PATCH 083/488] hbm for vortex 2.2 --- hw/rtl/VX_config.vh | 2 +- hw/rtl/VX_types.vh | 5 +++ runtime/include/vortex.h | 1 + runtime/simx/vortex.cpp | 3 ++ runtime/stub/utils.cpp | 21 ++++++++++ sim/common/dram_sim.cpp | 1 + sim/simx/cache_cluster.h | 4 +- sim/simx/cache_sim.cpp | 82 +++++++++++++++++++++++++++++++-------- sim/simx/cache_sim.h | 4 +- sim/simx/cluster.cpp | 4 +- sim/simx/constants.h | 2 +- sim/simx/emulator.cpp | 3 ++ sim/simx/mem_sim.cpp | 78 ++++++++++++++++++++----------------- sim/simx/mem_sim.h | 18 ++++++--- sim/simx/processor.cpp | 29 ++++++++------ sim/simx/processor_impl.h | 1 + 16 files changed, 180 insertions(+), 78 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 8d1c280fd..615c1ae6d 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -617,7 +617,7 @@ // Number of Banks `ifndef L3_NUM_BANKS -`define L3_NUM_BANKS `MIN(4, `NUM_CLUSTERS) +`define L3_NUM_BANKS `MIN(8, `NUM_CLUSTERS) `endif // Core Response Queue Size diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index 927ffae96..685051b6c 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -173,6 +173,11 @@ `define VX_CSR_MPM_LMEM_WRITES_H 12'hB9C `define VX_CSR_MPM_LMEM_BANK_ST 12'hB1D // bank conflicts `define VX_CSR_MPM_LMEM_BANK_ST_H 12'hB9D +// PERF: hbm +`define VX_CSR_HBM_BANK_CNTR 12'hB1E // hbm banks +`define VX_CSR_HBM_BANK_CNTR_H 12'hB9E +`define VX_CSR_HBM_BANK_TICK 12'hB1F // hbm ticks +`define VX_CSR_HBM_BANK_TICK_H 12'hB9F // Machine Performance-monitoring memory counters (class 3) /////////////////// // diff --git a/runtime/include/vortex.h b/runtime/include/vortex.h index 8481002e1..bf263da09 100644 --- a/runtime/include/vortex.h +++ b/runtime/include/vortex.h @@ -34,6 +34,7 @@ typedef void* vx_buffer_h; #define VX_CAPS_GLOBAL_MEM_SIZE 0x5 #define VX_CAPS_LOCAL_MEM_SIZE 0x6 #define VX_CAPS_ISA_FLAGS 0x7 +#define VX_CAPS_L3CACHE_NUM_BANKS 0x8 // device isa flags #define VX_ISA_STD_A (1ull << ISA_STD_A) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 89856f3a0..be7173fc3 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -81,6 +81,9 @@ public: case VX_CAPS_ISA_FLAGS: _value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD; break; + case VX_CAPS_L3CACHE_NUM_BANKS: + _value = L3_NUM_BANKS; + break; default: std::cout << "invalid caps id: " << caps_id << std::endl; std::abort(); diff --git a/runtime/stub/utils.cpp b/runtime/stub/utils.cpp index 9826db711..ae894fcbb 100644 --- a/runtime/stub/utils.cpp +++ b/runtime/stub/utils.cpp @@ -211,6 +211,10 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { uint64_t mem_reads = 0; uint64_t mem_writes = 0; uint64_t mem_lat = 0; + + // PERF: hbm + uint64_t hbm_counter = 0; + uint64_t hbm_ticks = 0; uint64_t num_cores; CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), { @@ -222,6 +226,11 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { return err; }); + uint64_t l3cache_banks; + CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_L3CACHE_NUM_BANKS, &l3cache_banks), { + return err; + }); + bool icache_enable = isa_flags & VX_ISA_EXT_ICACHE; bool dcache_enable = isa_flags & VX_ISA_EXT_DCACHE; bool l2cache_enable = isa_flags & VX_ISA_EXT_L2CACHE; @@ -522,6 +531,14 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_L3CACHE_MSHR_ST, core_id, &l3cache_mshr_stalls), { return err; }); + + // PERF: HBM + CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_HBM_BANK_CNTR, core_id, &hbm_counter), { + return err; + }); + CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_HBM_BANK_TICK, core_id, &hbm_ticks), { + return err; + }); } // PERF: memory CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_READS, core_id, &mem_reads), { @@ -606,6 +623,10 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { fprintf(stream, "PERF: l3cache write misses=%ld (hit ratio=%d%%)\n", l3cache_write_misses, write_hit_ratio); fprintf(stream, "PERF: l3cache bank stalls=%ld (utilization=%d%%)\n", l3cache_bank_stalls, bank_utilization); fprintf(stream, "PERF: l3cache mshr stalls=%ld (utilization=%d%%)\n", l3cache_mshr_stalls, mshr_utilization); + + // HBM + float util = (float)hbm_counter / (hbm_ticks * l3cache_banks) * 100; + fprintf(stream, "PERF: hbm bank utilization=%f\n", util); } int mem_avg_lat = caclAverage(mem_lat, mem_reads); diff --git a/sim/common/dram_sim.cpp b/sim/common/dram_sim.cpp index f7cfa8a32..c2a9e9ee0 100644 --- a/sim/common/dram_sim.cpp +++ b/sim/common/dram_sim.cpp @@ -41,6 +41,7 @@ public: dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2"; dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb"; dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192; + dram_config["MemorySystem"]["DRAM"]["org"]["channel"] = 8; dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps"; dram_config["MemorySystem"]["Controller"]["impl"] = "Generic"; dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS"; diff --git a/sim/simx/cache_cluster.h b/sim/simx/cache_cluster.h index 63016577b..2ba26dc21 100644 --- a/sim/simx/cache_cluster.h +++ b/sim/simx/cache_cluster.h @@ -77,8 +77,8 @@ public: caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i)); } - caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i)); - cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort); + caches_.at(i)->MemReqPorts.at(0).bind(&cache_arb->ReqIn.at(i)); + cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPorts.at(0)); } cache_arb->ReqOut.at(0).bind(&this->MemReqPort); diff --git a/sim/simx/cache_sim.cpp b/sim/simx/cache_sim.cpp index 65a8da70b..d7d1727f6 100644 --- a/sim/simx/cache_sim.cpp +++ b/sim/simx/cache_sim.cpp @@ -19,6 +19,7 @@ #include #include #include +#include using namespace vortex; @@ -315,27 +316,74 @@ public: simobject->CoreReqPorts.at(i).bind(&bypass_switch_->ReqIn.at(i)); bypass_switch_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i)); } - bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort); - simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0)); + bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0)); + simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0)); return; } - bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2); - bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort); - simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0)); + if (strcmp(simobject->name().c_str(), "l3cache")) { + bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2); + bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0)); + simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0)); - if (config.B != 0) { - snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str()); - bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B)); - for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) { - mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i)); - bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i)); + if (config.B != 0) { + snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str()); + bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B)); + for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) { + mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i)); + bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i)); + } + bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0)); + bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0)); + } else { + mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0)); + bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0)); } - bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0)); - bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0)); } else { - mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0)); - bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0)); + uint32_t max = MAX(2, config_.num_inputs); + //printf("%s connecting\n", simobject_->name().c_str()); + //3 + if (config.B != 0) { + bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, max, max); + for (uint32_t i = 0; i < max; ++i) { + //printf("%s connecting input=%d to MemPorts\n", simobject_->name().c_str(), i); + bypass_switch_->ReqOut.at(i).bind(&simobject->MemReqPorts.at(i % (1 << config.B))); + simobject->MemRspPorts.at(i % (1 << config.B)).bind(&bypass_switch_->RspOut.at(i)); + } + } else { + bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2); + bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0)); + simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0)); + } + + if (config.B != 0) + { + snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str()); + bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B), (1 << config.B)); + for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) + { + //1 + //printf("%s Connecting memory ports to bank=%d\n", simobject_->name().c_str(), i); + mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i)); + bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i)); + } + //2 + if (config_.num_inputs > 1) { + for (uint32_t i = 0; i < max; ++i) { + //printf("%s connecting bank and bypass port=%d\n", simobject_->name().c_str(), i); + bank_switch_->ReqOut.at(i % (1 << config.B)).bind(&bypass_switch_->ReqIn.at(i)); + bypass_switch_->RspIn.at(i).bind(&bank_switch_->RspOut.at(i % (1 << config.B))); + } + } else { + bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0)); + bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0)); + } + } + else + { + mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0)); + bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0)); + } } // calculate cache initialization cycles @@ -673,8 +721,8 @@ CacheSim::CacheSim(const SimContext& ctx, const char* name, const Config& config : SimObject(ctx, name) , CoreReqPorts(config.num_inputs, this) , CoreRspPorts(config.num_inputs, this) - , MemReqPort(this) - , MemRspPort(this) + , MemReqPorts((1 << config.B), this) + , MemRspPorts((1 << config.B), this) , impl_(new Impl(this, config)) {} diff --git a/sim/simx/cache_sim.h b/sim/simx/cache_sim.h index df62bf854..aad489546 100644 --- a/sim/simx/cache_sim.h +++ b/sim/simx/cache_sim.h @@ -75,8 +75,8 @@ public: std::vector> CoreReqPorts; std::vector> CoreRspPorts; - SimPort MemReqPort; - SimPort MemRspPort; + std::vector> MemReqPorts; + std::vector> MemRspPorts; CacheSim(const SimContext& ctx, const char* name, const Config& config); ~CacheSim(); diff --git a/sim/simx/cluster.cpp b/sim/simx/cluster.cpp index ec5e3f2b6..e23df448b 100644 --- a/sim/simx/cluster.cpp +++ b/sim/simx/cluster.cpp @@ -76,8 +76,8 @@ Cluster::Cluster(const SimContext& ctx, 2, // pipeline latency }); - l2cache_->MemReqPort.bind(&this->mem_req_port); - this->mem_rsp_port.bind(&l2cache_->MemRspPort); + l2cache_->MemReqPorts.at(0).bind(&this->mem_req_port); + this->mem_rsp_port.bind(&l2cache_->MemRspPorts.at(0)); icache_switch->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(0)); l2cache_->CoreRspPorts.at(0).bind(&icache_switch->RspOut.at(0)); diff --git a/sim/simx/constants.h b/sim/simx/constants.h index 09a509ce1..81a626b84 100644 --- a/sim/simx/constants.h +++ b/sim/simx/constants.h @@ -22,7 +22,7 @@ #endif #ifndef MEMORY_BANKS -#define MEMORY_BANKS 2 +#define MEMORY_BANKS 8 #endif #define LSU_WORD_SIZE (XLEN / 8) diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 7ed9a10f9..d76113249 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -455,6 +455,9 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { CSR_READ_64(VX_CSR_MPM_LMEM_READS, lmem_perf.reads); CSR_READ_64(VX_CSR_MPM_LMEM_WRITES, lmem_perf.writes); CSR_READ_64(VX_CSR_MPM_LMEM_BANK_ST, lmem_perf.bank_stalls); + + CSR_READ_64(VX_CSR_HBM_BANK_CNTR, proc_perf.memsim.counter); + CSR_READ_64(VX_CSR_HBM_BANK_TICK, proc_perf.memsim.ticks); } } break; default: { diff --git a/sim/simx/mem_sim.cpp b/sim/simx/mem_sim.cpp index a12713fea..6d8015d1f 100644 --- a/sim/simx/mem_sim.cpp +++ b/sim/simx/mem_sim.cpp @@ -33,6 +33,7 @@ private: struct DramCallbackArgs { MemSim* simobject; MemReq request; + uint32_t i; }; public: @@ -56,46 +57,49 @@ public: void tick() { dram_sim_.tick(); + uint32_t counter = 0; - if (simobject_->MemReqPort.empty()) - return; + for (uint32_t i = 0; i < L3_NUM_BANKS; ++i) { + if (simobject_->MemReqPorts.at(i).empty()) + continue; - auto& mem_req = simobject_->MemReqPort.front(); + auto& mem_req = simobject_->MemReqPorts.at(i).front(); - // try to enqueue the request to the memory system - auto req_args = new DramCallbackArgs{simobject_, mem_req}; - auto enqueue_success = dram_sim_.send_request( - mem_req.write, - mem_req.addr, - 0, - [](void* arg) { - auto rsp_args = reinterpret_cast(arg); - // only send a response for read requests - if (!rsp_args->request.write) { - MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid}; - rsp_args->simobject->MemRspPort.push(mem_rsp, 1); - DT(3, rsp_args->simobject->name() << " mem-rsp: " << mem_rsp); - } - delete rsp_args; - }, - req_args - ); + // try to enqueue the request to the memory system + auto req_args = new DramCallbackArgs{simobject_, mem_req, i}; + auto enqueue_success = dram_sim_.send_request( + mem_req.write, + mem_req.addr, + i, + [](void* arg) { + auto rsp_args = reinterpret_cast(arg); + // only send a response for read requests + if (!rsp_args->request.write) { + MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid}; + rsp_args->simobject->MemRspPorts.at(rsp_args->i).push(mem_rsp, 1); + DT(3, rsp_args->simobject->name() << " mem-rsp: " << mem_rsp); + } + delete rsp_args; + }, + req_args + ); - // check if the request was enqueued successfully - if (!enqueue_success) { - delete req_args; - return; + // check if the request was enqueued successfully + if (!enqueue_success) { + delete req_args; + continue; + } + + DT(3, simobject_->name() << " mem-req: " << mem_req << " bank: " << i); + + simobject_->MemReqPorts.at(i).pop(); + counter++; } - if (mem_req.write) { - ++perf_stats_.writes; - } else { - ++perf_stats_.reads; + perf_stats_.counter += counter; + if (counter > 0) { + ++perf_stats_.ticks; } - - DT(3, simobject_->name() << " mem-req: " << mem_req); - - simobject_->MemReqPort.pop(); } }; @@ -103,8 +107,8 @@ public: MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config) : SimObject(ctx, name) - , MemReqPort(this) - , MemRspPort(this) + , MemReqPorts(L3_NUM_BANKS, this) + , MemRspPorts(L3_NUM_BANKS, this) , impl_(new Impl(this, config)) {} @@ -118,4 +122,8 @@ void MemSim::reset() { void MemSim::tick() { impl_->tick(); +} + +const MemSim::PerfStats &MemSim::perf_stats() const { + return impl_->perf_stats(); } \ No newline at end of file diff --git a/sim/simx/mem_sim.h b/sim/simx/mem_sim.h index 3f4d9801e..2f4f96187 100644 --- a/sim/simx/mem_sim.h +++ b/sim/simx/mem_sim.h @@ -26,17 +26,23 @@ public: }; struct PerfStats { - uint64_t reads; - uint64_t writes; + uint64_t counter; + uint64_t ticks; PerfStats() - : reads(0) - , writes(0) + : counter(0) + , ticks(0) {} + + PerfStats& operator+=(const PerfStats& rhs) { + this->counter += rhs.counter; + this->ticks += rhs.ticks; + return *this; + } }; - SimPort MemReqPort; - SimPort MemRspPort; + std::vector> MemReqPorts; + std::vector> MemRspPorts; MemSim(const SimContext& ctx, const char* name, const Config& config); ~MemSim(); diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index 3807fa5e8..b3664f3fa 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -47,8 +47,10 @@ ProcessorImpl::ProcessorImpl(const Arch& arch) ); // connect L3 memory ports - l3cache_->MemReqPort.bind(&memsim_->MemReqPort); - memsim_->MemRspPort.bind(&l3cache_->MemRspPort); + for (uint32_t i = 0; i < L3_NUM_BANKS; ++i) { + l3cache_->MemReqPorts.at(i).bind(&memsim_->MemReqPorts.at(i)); + memsim_->MemRspPorts.at(i).bind(&l3cache_->MemRspPorts.at(i)); + } // create clusters for (uint32_t i = 0; i < arch.num_clusters(); ++i) { @@ -59,16 +61,18 @@ ProcessorImpl::ProcessorImpl(const Arch& arch) } // set up memory profiling - memsim_->MemReqPort.tx_callback([&](const MemReq& req, uint64_t cycle){ - __unused (cycle); - perf_mem_reads_ += !req.write; - perf_mem_writes_ += req.write; - perf_mem_pending_reads_ += !req.write; - }); - memsim_->MemRspPort.tx_callback([&](const MemRsp&, uint64_t cycle){ - __unused (cycle); - --perf_mem_pending_reads_; - }); + for (uint32_t i = 0; i < L3_NUM_BANKS; ++i) { + memsim_->MemReqPorts.at(i).tx_callback([&](const MemReq& req, uint64_t cycle){ + __unused (cycle); + perf_mem_reads_ += !req.write; + perf_mem_writes_ += req.write; + perf_mem_pending_reads_ += !req.write; + }); + memsim_->MemRspPorts.at(i).tx_callback([&](const MemRsp&, uint64_t cycle){ + __unused (cycle); + --perf_mem_pending_reads_; + }); + } #ifndef NDEBUG // dump device configuration @@ -131,6 +135,7 @@ ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const { perf.mem_writes = perf_mem_writes_; perf.mem_latency = perf_mem_latency_; perf.l3cache = l3cache_->perf_stats(); + perf.memsim = memsim_->perf_stats(); return perf; } diff --git a/sim/simx/processor_impl.h b/sim/simx/processor_impl.h index dcfba84d7..cffeffbfe 100644 --- a/sim/simx/processor_impl.h +++ b/sim/simx/processor_impl.h @@ -25,6 +25,7 @@ class ProcessorImpl { public: struct PerfStats { CacheSim::PerfStats l3cache; + MemSim::PerfStats memsim; uint64_t mem_reads; uint64_t mem_writes; uint64_t mem_latency; From bab9496117a2438e54ebc1429969a3eb281f539c Mon Sep 17 00:00:00 2001 From: sij814 Date: Mon, 12 Aug 2024 03:52:48 -0700 Subject: [PATCH 084/488] debugging segmentation fault with 8 clusters --- sim/common/dram_sim.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sim/common/dram_sim.cpp b/sim/common/dram_sim.cpp index c2a9e9ee0..aa6f882e1 100644 --- a/sim/common/dram_sim.cpp +++ b/sim/common/dram_sim.cpp @@ -46,7 +46,6 @@ public: dram_config["MemorySystem"]["Controller"]["impl"] = "Generic"; dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS"; dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank"; - dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank"; dram_config["MemorySystem"]["Controller"]["RowPolicy"]["impl"] = "OpenRowPolicy"; { YAML::Node draw_plugin; @@ -67,7 +66,9 @@ public: auto original_buf = std::cout.rdbuf(); std::cout.rdbuf(nullstream.rdbuf()); ramulator_frontend_->finalize(); - ramulator_memorysystem_->finalize(); + ramulator_memorysystem_->finalize(); + delete ramulator_frontend_; + delete ramulator_memorysystem_; std::cout.rdbuf(original_buf); } From ed66ee2806726d31a7773d90f059aae405b679e1 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 04:09:56 -0700 Subject: [PATCH 085/488] arbitration update --- hw/rtl/cache/VX_cache.sv | 6 +- hw/rtl/core/VX_alu_muldiv.sv | 2 +- hw/rtl/core/VX_alu_unit.sv | 2 +- hw/rtl/core/VX_operands.sv | 2 +- hw/rtl/core/VX_scoreboard.sv | 4 +- hw/rtl/fpu/VX_fpu_dpi.sv | 2 +- hw/rtl/fpu/VX_fpu_dsp.sv | 2 +- hw/rtl/libs/VX_avs_adapter.sv | 2 +- hw/rtl/libs/VX_axi_adapter.sv | 4 +- hw/rtl/libs/VX_fair_arbiter.sv | 66 ----------- hw/rtl/libs/VX_generic_arbiter.sv | 14 --- hw/rtl/libs/VX_rr_arbiter.sv | 183 +++++++++++++++++------------- hw/rtl/mem/VX_local_mem.sv | 2 +- 13 files changed, 116 insertions(+), 175 deletions(-) delete mode 100644 hw/rtl/libs/VX_fair_arbiter.sv diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 1131791bb..8221c284c 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -317,7 +317,7 @@ module VX_cache import VX_gpu_pkg::*; #( .NUM_OUTPUTS (NUM_BANKS), .DATAW (CORE_REQ_DATAW), .PERF_CTR_BITS (`PERF_CTR_BITS), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (REQ_XBAR_BUF) ) req_xbar ( .clk (clk), @@ -452,7 +452,7 @@ module VX_cache import VX_gpu_pkg::*; #( .NUM_INPUTS (NUM_BANKS), .NUM_OUTPUTS (NUM_REQS), .DATAW (CORE_RSP_DATAW), - .ARBITER ("F") + .ARBITER ("R") ) rsp_xbar ( .clk (clk), .reset (rsp_xbar_reset), @@ -501,7 +501,7 @@ module VX_cache import VX_gpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (NUM_BANKS), .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1), - .ARBITER ("F") + .ARBITER ("R") ) mem_req_arb ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_alu_muldiv.sv b/hw/rtl/core/VX_alu_muldiv.sv index 3beb035f4..650c27833 100644 --- a/hw/rtl/core/VX_alu_muldiv.sv +++ b/hw/rtl/core/VX_alu_muldiv.sv @@ -324,7 +324,7 @@ module VX_alu_muldiv #( VX_stream_arb #( .NUM_INPUTS (2), .DATAW (TAG_WIDTH + (NUM_LANES * `XLEN)), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (1) ) rsp_buf ( .clk (clk), diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 86bcaf05e..70eab1529 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -121,7 +121,7 @@ module VX_alu_unit #( .NUM_INPUTS (RSP_ARB_SIZE), .DATAW (RSP_ARB_DATAW), .OUT_BUF (PARTIAL_BW ? 1 : 3), - .ARBITER ("F") + .ARBITER ("R") ) rsp_arb ( .clk (clk), .reset (block_reset), diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index bdf8d2cdf..5dbb73791 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -23,7 +23,7 @@ module VX_operands import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "", parameter NUM_BANKS = 4, - parameter OUT_BUF = 8+4 // using 2-cycle LUT EB for area reduction + parameter OUT_BUF = 3 ) ( input wire clk, input wire reset, diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index 056e1c165..df25aff26 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -289,8 +289,8 @@ module VX_scoreboard import VX_gpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (PER_ISSUE_WARPS), .DATAW (DATAW), - .ARBITER ("F"), - .OUT_BUF (8+4) // using 2-cycle LUT EB for area reduction + .ARBITER ("R"), + .OUT_BUF (3) ) out_arb ( .clk (clk), .reset (arb_reset), diff --git a/hw/rtl/fpu/VX_fpu_dpi.sv b/hw/rtl/fpu/VX_fpu_dpi.sv index 781b5b88e..67022e8fd 100644 --- a/hw/rtl/fpu/VX_fpu_dpi.sv +++ b/hw/rtl/fpu/VX_fpu_dpi.sv @@ -470,7 +470,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (NUM_FPC), .DATAW (RSP_DATAW), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (OUT_BUF) ) rsp_arb ( .clk (clk), diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index ad398dcd7..967bbbc29 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -296,7 +296,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (NUM_FPC), .DATAW (RSP_DATAW + 2), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (OUT_BUF) ) rsp_arb ( .clk (clk), diff --git a/hw/rtl/libs/VX_avs_adapter.sv b/hw/rtl/libs/VX_avs_adapter.sv index 35d329c7b..659114c8d 100644 --- a/hw/rtl/libs/VX_avs_adapter.sv +++ b/hw/rtl/libs/VX_avs_adapter.sv @@ -199,7 +199,7 @@ module VX_avs_adapter #( VX_stream_arb #( .NUM_INPUTS (NUM_BANKS), .DATAW (DATA_WIDTH + TAG_WIDTH), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (RSP_OUT_BUF) ) rsp_arb ( .clk (clk), diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index 7fffb9be2..9cd862560 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -203,11 +203,11 @@ module VX_axi_adapter #( `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time)); `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time)); end - + VX_stream_arb #( .NUM_INPUTS (NUM_BANKS), .DATAW (DATA_WIDTH + TAG_WIDTH), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (RSP_OUT_BUF) ) rsp_arb ( .clk (clk), diff --git a/hw/rtl/libs/VX_fair_arbiter.sv b/hw/rtl/libs/VX_fair_arbiter.sv deleted file mode 100644 index 3503ea21e..000000000 --- a/hw/rtl/libs/VX_fair_arbiter.sv +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_platform.vh" - -`TRACING_OFF -module VX_fair_arbiter #( - parameter NUM_REQS = 1, - parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) -) ( - input wire clk, - input wire reset, - input wire [NUM_REQS-1:0] requests, - output wire [LOG_NUM_REQS-1:0] grant_index, - output wire [NUM_REQS-1:0] grant_onehot, - output wire grant_valid, - input wire grant_ready -); - if (NUM_REQS == 1) begin - - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - `UNUSED_VAR (grant_ready) - - assign grant_index = '0; - assign grant_onehot = requests; - assign grant_valid = requests[0]; - - end else begin - - reg [NUM_REQS-1:0] reqs_mask; - - wire [NUM_REQS-1:0] masked_reqs = requests & reqs_mask; - wire [NUM_REQS-1:0] requests_qual = (| masked_reqs) ? masked_reqs : requests; - - always @(posedge clk) begin - if (reset) begin - reqs_mask <= '1; - end else if (grant_valid && grant_ready) begin - reqs_mask <= (| reqs_mask) ? (reqs_mask & ~grant_onehot) : ~grant_onehot; - end - end - - VX_priority_encoder #( - .N (NUM_REQS) - ) priority_enc ( - .data_in (requests_qual), - .index_out (grant_index), - .onehot_out (grant_onehot), - .valid_out (grant_valid) - ); - - end - -endmodule -`TRACING_ON diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index a1f7be4a0..a3c4b71dd 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -56,20 +56,6 @@ module VX_generic_arbiter #( .grant_ready (grant_ready) ); - end else if (TYPE == "F") begin - - VX_fair_arbiter #( - .NUM_REQS (NUM_REQS) - ) fair_arbiter ( - .clk (clk), - .reset (reset), - .requests (requests), - .grant_valid (grant_valid), - .grant_index (grant_index), - .grant_onehot (grant_onehot), - .grant_ready (grant_ready) - ); - end else if (TYPE == "M") begin VX_matrix_arbiter #( diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index d9f5b767f..a222022bf 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -16,7 +16,7 @@ `TRACING_OFF module VX_rr_arbiter #( parameter NUM_REQS = 1, - parameter MODEL = 1, + parameter MODEL = 2, parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS), parameter LUT_OPT = 0 ) ( @@ -41,14 +41,15 @@ module VX_rr_arbiter #( end else if (LUT_OPT && NUM_REQS == 2) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 3'b0_01, - 3'b1_?1: begin grant_onehot_r = 2'b01; grant_index_r = LOG_NUM_REQS'(0); end - default: begin grant_onehot_r = 2'b10; grant_index_r = LOG_NUM_REQS'(1); end + 3'b1_?1: begin grant_index_r = LOG_NUM_REQS'(0); end + 3'b0_1?, + 3'b1_10: begin grant_index_r = LOG_NUM_REQS'(1); end + default: begin grant_index_r = 'x; end endcase end @@ -61,24 +62,26 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_onehot = NUM_REQS'(1) << grant_index_r; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 3) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 5'b00_001, 5'b01_0?1, - 5'b10_??1: begin grant_onehot_r = 3'b001; grant_index_r = LOG_NUM_REQS'(0); end + 5'b10_??1: begin grant_index_r = LOG_NUM_REQS'(0); end 5'b00_?1?, 5'b01_010, - 5'b10_?10: begin grant_onehot_r = 3'b010; grant_index_r = LOG_NUM_REQS'(1); end - default: begin grant_onehot_r = 3'b100; grant_index_r = LOG_NUM_REQS'(2); end + 5'b10_?10: begin grant_index_r = LOG_NUM_REQS'(1); end + 5'b00_10?, + 5'b01_1??, + 5'b10_100: begin grant_index_r = LOG_NUM_REQS'(2); end + default: begin grant_index_r = 'x; end endcase end @@ -91,13 +94,12 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_onehot = NUM_REQS'(1) << grant_index_r; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 4) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -105,16 +107,20 @@ module VX_rr_arbiter #( 6'b00_0001, 6'b01_00?1, 6'b10_0??1, - 6'b11_???1: begin grant_onehot_r = 4'b0001; grant_index_r = LOG_NUM_REQS'(0); end + 6'b11_???1: begin grant_index_r = LOG_NUM_REQS'(0); end 6'b00_??1?, 6'b01_0010, 6'b10_0?10, - 6'b11_??10: begin grant_onehot_r = 4'b0010; grant_index_r = LOG_NUM_REQS'(1); end + 6'b11_??10: begin grant_index_r = LOG_NUM_REQS'(1); end 6'b00_?10?, 6'b01_?1??, 6'b10_0100, - 6'b11_?100: begin grant_onehot_r = 4'b0100; grant_index_r = LOG_NUM_REQS'(2); end - default: begin grant_onehot_r = 4'b1000; grant_index_r = LOG_NUM_REQS'(3); end + 6'b11_?100: begin grant_index_r = LOG_NUM_REQS'(2); end + 6'b00_100?, + 6'b01_10??, + 6'b10_1???, + 6'b11_1000: begin grant_index_r = LOG_NUM_REQS'(3); end + default: begin grant_index_r = 'x; end endcase end @@ -127,13 +133,12 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_onehot = NUM_REQS'(1) << grant_index_r; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 5) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -142,23 +147,28 @@ module VX_rr_arbiter #( 8'b001_000?1, 8'b010_00??1, 8'b011_0???1, - 8'b100_????1: begin grant_onehot_r = 5'b00001; grant_index_r = LOG_NUM_REQS'(0); end + 8'b100_????1: begin grant_index_r = LOG_NUM_REQS'(0); end 8'b000_???1?, 8'b001_00010, 8'b010_00?10, 8'b011_0??10, - 8'b100_???10: begin grant_onehot_r = 5'b00010; grant_index_r = LOG_NUM_REQS'(1); end + 8'b100_???10: begin grant_index_r = LOG_NUM_REQS'(1); end 8'b000_??10?, 8'b001_??1??, 8'b010_00100, 8'b011_0?100, - 8'b100_??100: begin grant_onehot_r = 5'b00100; grant_index_r = LOG_NUM_REQS'(2); end + 8'b100_??100: begin grant_index_r = LOG_NUM_REQS'(2); end 8'b000_?100?, 8'b001_?10??, 8'b010_?1???, 8'b011_01000, - 8'b100_?1000: begin grant_onehot_r = 5'b01000; grant_index_r = LOG_NUM_REQS'(3); end - default: begin grant_onehot_r = 5'b10000; grant_index_r = LOG_NUM_REQS'(4); end + 8'b100_?1000: begin grant_index_r = LOG_NUM_REQS'(3); end + 8'b000_1000?, + 8'b001_100??, + 8'b010_10???, + 8'b011_1????, + 8'b100_10000: begin grant_index_r = LOG_NUM_REQS'(4); end + default: begin grant_index_r = 'x; end endcase end @@ -171,13 +181,12 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_onehot = NUM_REQS'(1) << grant_index_r; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 6) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -187,32 +196,38 @@ module VX_rr_arbiter #( 9'b010_000??1, 9'b011_00???1, 9'b100_0????1, - 9'b101_?????1: begin grant_onehot_r = 6'b000001; grant_index_r = LOG_NUM_REQS'(0); end + 9'b101_?????1: begin grant_index_r = LOG_NUM_REQS'(0); end 9'b000_????1?, 9'b001_000010, 9'b010_000?10, 9'b011_00??10, 9'b100_0???10, - 9'b101_????10: begin grant_onehot_r = 6'b000010; grant_index_r = LOG_NUM_REQS'(1); end + 9'b101_????10: begin grant_index_r = LOG_NUM_REQS'(1); end 9'b000_???10?, 9'b001_???1??, 9'b010_000100, 9'b011_00?100, 9'b100_0??100, - 9'b101_???100: begin grant_onehot_r = 6'b000100; grant_index_r = LOG_NUM_REQS'(2); end + 9'b101_???100: begin grant_index_r = LOG_NUM_REQS'(2); end 9'b000_??100?, 9'b001_??10??, 9'b010_??1???, 9'b011_001000, 9'b100_0?1000, - 9'b101_??1000: begin grant_onehot_r = 6'b001000; grant_index_r = LOG_NUM_REQS'(3); end + 9'b101_??1000: begin grant_index_r = LOG_NUM_REQS'(3); end 9'b000_?1000?, 9'b001_?100??, 9'b010_?10???, 9'b011_?1????, 9'b100_010000, - 9'b101_?10000: begin grant_onehot_r = 6'b010000; grant_index_r = LOG_NUM_REQS'(4); end - default: begin grant_onehot_r = 6'b100000; grant_index_r = LOG_NUM_REQS'(5); end + 9'b101_?10000: begin grant_index_r = LOG_NUM_REQS'(4); end + 9'b000_10000?, + 9'b001_1000??, + 9'b010_100???, + 9'b011_10????, + 9'b100_1?????, + 9'b101_100000: begin grant_index_r = LOG_NUM_REQS'(5); end + default: begin grant_index_r = 'x; end endcase end @@ -225,60 +240,66 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_onehot = NUM_REQS'(1) << grant_index_r; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 7) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) - 10'b000_000001, - 10'b001_0000?1, - 10'b010_000??1, - 10'b011_00???1, - 10'b100_00???1, - 10'b101_0????1, - 10'b110_?????1: begin grant_onehot_r = 7'b0000001; grant_index_r = LOG_NUM_REQS'(0); end + 10'b000_0000001, + 10'b001_00000?1, + 10'b010_0000??1, + 10'b011_000???1, + 10'b100_000???1, + 10'b101_00????1, + 10'b110_??????1: begin grant_index_r = LOG_NUM_REQS'(0); end 10'b000_?????1?, 10'b001_0000010, 10'b010_0000?10, 10'b011_000??10, 10'b100_00???10, 10'b101_0????10, - 10'b110_?????10: begin grant_onehot_r = 7'b0000010; grant_index_r = LOG_NUM_REQS'(1); end + 10'b110_?????10: begin grant_index_r = LOG_NUM_REQS'(1); end 10'b000_????10?, 10'b001_????1??, 10'b010_0000100, 10'b011_000?100, 10'b100_00??100, 10'b101_0???100, - 10'b110_????100: begin grant_onehot_r = 7'b0000100; grant_index_r = LOG_NUM_REQS'(2); end + 10'b110_????100: begin grant_index_r = LOG_NUM_REQS'(2); end 10'b000_???100?, 10'b001_???10??, 10'b010_???1???, 10'b011_0001000, 10'b100_00?1000, 10'b101_0??1000, - 10'b110_???1000: begin grant_onehot_r = 7'b0001000; grant_index_r = LOG_NUM_REQS'(3); end + 10'b110_???1000: begin grant_index_r = LOG_NUM_REQS'(3); end 10'b000_??1000?, 10'b001_??100??, 10'b010_??10???, 10'b011_??1????, 10'b100_0010000, 10'b101_0?10000, - 10'b110_??10000: begin grant_onehot_r = 7'b0010000; grant_index_r = LOG_NUM_REQS'(4); end + 10'b110_??10000: begin grant_index_r = LOG_NUM_REQS'(4); end 10'b000_?10000?, 10'b001_?1000??, 10'b010_?100???, 10'b011_?10????, 10'b100_?1?????, 10'b101_0100000, - 10'b110_?100000: begin grant_onehot_r = 7'b0100000; grant_index_r = LOG_NUM_REQS'(5); end - default: begin grant_onehot_r = 7'b1000000; grant_index_r = LOG_NUM_REQS'(6); end + 10'b110_?100000: begin grant_index_r = LOG_NUM_REQS'(5); end + 10'b000_100000?, + 10'b001_10000??, + 10'b010_1000???, + 10'b011_100????, + 10'b100_10?????, + 10'b101_1??????, + 10'b110_1000000: begin grant_index_r = LOG_NUM_REQS'(6); end + default: begin grant_index_r = 'x; end endcase end @@ -291,13 +312,12 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_onehot = NUM_REQS'(1) << grant_index_r; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 8) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -309,7 +329,7 @@ module VX_rr_arbiter #( 11'b100_000????1, 11'b101_00?????1, 11'b110_0??????1, - 11'b111_???????1: begin grant_onehot_r = 8'b00000001; grant_index_r = LOG_NUM_REQS'(0); end + 11'b111_???????1: begin grant_index_r = LOG_NUM_REQS'(0); end 11'b000_??????1?, 11'b001_00000010, 11'b010_00000?10, @@ -317,7 +337,7 @@ module VX_rr_arbiter #( 11'b100_000???10, 11'b101_00????10, 11'b110_0?????10, - 11'b111_??????10: begin grant_onehot_r = 8'b00000010; grant_index_r = LOG_NUM_REQS'(1); end + 11'b111_??????10: begin grant_index_r = LOG_NUM_REQS'(1); end 11'b000_?????10?, 11'b001_?????1??, 11'b010_00000100, @@ -325,7 +345,7 @@ module VX_rr_arbiter #( 11'b100_000??100, 11'b101_00???100, 11'b110_0????100, - 11'b111_?????100: begin grant_onehot_r = 8'b00000100; grant_index_r = LOG_NUM_REQS'(2); end + 11'b111_?????100: begin grant_index_r = LOG_NUM_REQS'(2); end 11'b000_????100?, 11'b001_????10??, 11'b010_????1???, @@ -333,7 +353,7 @@ module VX_rr_arbiter #( 11'b100_000?1000, 11'b101_00??1000, 11'b110_0???1000, - 11'b111_????1000: begin grant_onehot_r = 8'b00001000; grant_index_r = LOG_NUM_REQS'(3); end + 11'b111_????1000: begin grant_index_r = LOG_NUM_REQS'(3); end 11'b000_???1000?, 11'b001_???100??, 11'b010_???10???, @@ -341,7 +361,7 @@ module VX_rr_arbiter #( 11'b100_00010000, 11'b101_00?10000, 11'b110_0??10000, - 11'b111_???10000: begin grant_onehot_r = 8'b00010000; grant_index_r = LOG_NUM_REQS'(4); end + 11'b111_???10000: begin grant_index_r = LOG_NUM_REQS'(4); end 11'b000_??10000?, 11'b001_??1000??, 11'b010_??100???, @@ -349,7 +369,7 @@ module VX_rr_arbiter #( 11'b100_??1?????, 11'b101_00100000, 11'b110_0?100000, - 11'b111_??100000: begin grant_onehot_r = 8'b00100000; grant_index_r = LOG_NUM_REQS'(5); end + 11'b111_??100000: begin grant_index_r = LOG_NUM_REQS'(5); end 11'b000_?100000?, 11'b001_?10000??, 11'b010_?1000???, @@ -357,8 +377,16 @@ module VX_rr_arbiter #( 11'b100_?10?????, 11'b101_?1??????, 11'b110_01000000, - 11'b111_?1000000: begin grant_onehot_r = 8'b01000000; grant_index_r = LOG_NUM_REQS'(6); end - default: begin grant_onehot_r = 8'b10000000; grant_index_r = LOG_NUM_REQS'(7); end + 11'b111_?1000000: begin grant_index_r = LOG_NUM_REQS'(6); end + 11'b000_1000000?, + 11'b001_100000??, + 11'b010_10000???, + 11'b011_1000????, + 11'b100_100?????, + 11'b101_10??????, + 11'b110_1???????, + 11'b111_10000000: begin grant_index_r = LOG_NUM_REQS'(7); end + default: begin grant_index_r = 'x; end endcase end @@ -371,7 +399,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_onehot = NUM_REQS'(1) << grant_index_r; assign grant_valid = (| requests); end else if (MODEL == 1) begin @@ -393,8 +421,8 @@ module VX_rr_arbiter #( assign unmasked_pri_reqs[i] = unmasked_pri_reqs[i-1] | requests[i-1]; end - wire [NUM_REQS-1:0] grant_masked = masked_reqs & ~masked_pri_reqs[NUM_REQS-1:0]; - wire [NUM_REQS-1:0] grant_unmasked = requests & ~unmasked_pri_reqs[NUM_REQS-1:0]; + wire [NUM_REQS-1:0] grant_masked = masked_reqs & ~masked_pri_reqs; + wire [NUM_REQS-1:0] grant_unmasked = requests & ~unmasked_pri_reqs; wire has_masked_reqs = (| masked_reqs); wire has_unmasked_reqs = (| requests); @@ -421,41 +449,34 @@ module VX_rr_arbiter #( .valid_out(grant_valid) ); - end else begin + end else if (MODEL == 2) begin - reg grant_valid_r; - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; - reg [NUM_REQS-1:0][LOG_NUM_REQS-1:0] next_grant_index; + reg [LOG_NUM_REQS-1:0] grant_table [NUM_REQS-1:0]; + reg [LOG_NUM_REQS-1:0] state; - always @(*) begin - grant_index_r = 'x; - grant_onehot_r = 'x; - grant_valid_r = 0; - for (integer i = NUM_REQS-1; i >= 0; --i) begin - if (requests[next_grant_index[i]]) begin - grant_valid_r = 1; - grant_index_r = next_grant_index[i]; - grant_onehot_r = NUM_REQS'(1) << next_grant_index[i]; + for (genvar i = 0; i < NUM_REQS; ++i) begin + always @(*) begin + grant_table[i] = 'x; + for (integer j = NUM_REQS-1; j >= 0; --j) begin + if (requests[(i+j+1) % NUM_REQS]) begin + grant_table[i] = LOG_NUM_REQS'(i+j+1); + end end end end always @(posedge clk) begin if (reset) begin - for (integer i = 0; i < NUM_REQS; ++i) begin - next_grant_index[i] <= LOG_NUM_REQS'(i); - end + state <= 0; end else if (grant_valid && grant_ready) begin - for (integer i = 0; i < NUM_REQS; ++i) begin - next_grant_index[i] <= grant_index_r + LOG_NUM_REQS'(i + 1); - end + state <= grant_index; end end - assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; - assign grant_valid = grant_valid_r; + assign grant_index = grant_table[state]; + assign grant_onehot = NUM_REQS'(1) << grant_index; + assign grant_valid = (| requests); + end endmodule diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 5d095b083..abd44b564 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -121,7 +121,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .NUM_OUTPUTS (NUM_BANKS), .DATAW (REQ_DATAW), .PERF_CTR_BITS (`PERF_CTR_BITS), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (3) // output should be registered for the data_store addressing ) req_xbar ( .clk (clk), From 9053919e92e36a09a0bb0f3a310c0398fa30e914 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 05:24:46 -0700 Subject: [PATCH 086/488] fixed synthesis warning --- hw/rtl/libs/VX_rr_arbiter.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index a222022bf..85cf96f9a 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -451,7 +451,7 @@ module VX_rr_arbiter #( end else if (MODEL == 2) begin - reg [LOG_NUM_REQS-1:0] grant_table [NUM_REQS-1:0]; + reg [NUM_REQS-1:0][LOG_NUM_REQS-1:0] grant_table; reg [LOG_NUM_REQS-1:0] state; for (genvar i = 0; i < NUM_REQS; ++i) begin From 79362dea4b7e2e8779224338d1dc8f2ac5308439 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 14:01:11 -0700 Subject: [PATCH 087/488] minor update --- hw/rtl/libs/VX_mem_coalescer.sv | 25 ++++++++++--------------- hw/rtl/libs/VX_stream_unpack.sv | 30 +++++++++++++++--------------- 2 files changed, 25 insertions(+), 30 deletions(-) diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index dbc53336b..e6ca41e4b 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -84,8 +84,8 @@ module VX_mem_coalescer #( // tag + mask + offest localparam IBUF_DATA_WIDTH = TAG_ID_WIDTH + NUM_REQS + (NUM_REQS * DATA_RATIO_W); - localparam STATE_SETUP = 0; - localparam STATE_SEND = 1; + localparam STATE_WAIT = 0; + localparam STATE_SEND = 1; logic state_r, state_n; @@ -179,11 +179,9 @@ module VX_mem_coalescer #( end end - wire [OUT_REQS * DATA_RATIO - 1:0] pending_mask; - for (genvar i = 0; i < OUT_REQS * DATA_RATIO; ++i) begin - assign pending_mask[i] = in_req_mask[i] && ~addr_matches_r[i] && ~processed_mask_r[i]; - end - wire batch_completed = ~(| pending_mask); + wire is_last_batch = ~(| (in_req_mask & ~addr_matches_r & ~processed_mask_r)); + + wire out_req_fire = out_req_valid && out_req_ready; always @(*) begin state_n = state_r; @@ -201,9 +199,9 @@ module VX_mem_coalescer #( in_req_ready_n = 0; case (state_r) - STATE_SETUP: begin + STATE_WAIT: begin // wait for pending outgoing request to submit - if (out_req_valid && out_req_ready) begin + if (out_req_fire) begin out_req_valid_n = 0; end if (in_req_valid && ~out_req_valid_n && ~ibuf_full) begin @@ -220,15 +218,14 @@ module VX_mem_coalescer #( out_req_data_n = req_data_merged; out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; - in_req_ready_n = batch_completed; + in_req_ready_n = is_last_batch; - if (batch_completed) begin + if (is_last_batch) begin processed_mask_n = '0; end else begin processed_mask_n = processed_mask_r | current_pmask; end - - state_n = STATE_SETUP; + state_n = STATE_WAIT; end endcase end @@ -347,8 +344,6 @@ module VX_mem_coalescer #( end end - wire out_req_fire = out_req_valid && out_req_ready; - always @(posedge clk) begin if (out_req_fire) begin if (out_req_rw) begin diff --git a/hw/rtl/libs/VX_stream_unpack.sv b/hw/rtl/libs/VX_stream_unpack.sv index e8b905cdf..6a6aa0e9e 100644 --- a/hw/rtl/libs/VX_stream_unpack.sv +++ b/hw/rtl/libs/VX_stream_unpack.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,8 +15,8 @@ `TRACING_OFF module VX_stream_unpack #( - parameter NUM_REQS = 1, - parameter DATA_WIDTH = 1, + parameter NUM_REQS = 1, + parameter DATA_WIDTH = 1, parameter TAG_WIDTH = 1, parameter OUT_BUF = 0 ) ( @@ -31,28 +31,28 @@ module VX_stream_unpack #( output wire ready_in, // output - output wire [NUM_REQS-1:0] valid_out, + output wire [NUM_REQS-1:0] valid_out, output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] data_out, output wire [NUM_REQS-1:0][TAG_WIDTH-1:0] tag_out, input wire [NUM_REQS-1:0] ready_out ); if (NUM_REQS > 1) begin - reg [NUM_REQS-1:0] sent_mask; + reg [NUM_REQS-1:0] rem_mask; wire [NUM_REQS-1:0] ready_out_r; - wire [NUM_REQS-1:0] sent_mask_n = sent_mask | ready_out_r; - wire sent_all = ~(| (mask_in & ~sent_mask_n)); + wire [NUM_REQS-1:0] rem_mask_n = rem_mask & ~ready_out_r; + wire sent_all = ~(| (mask_in & rem_mask_n)); always @(posedge clk) begin if (reset) begin - sent_mask <= '0; + rem_mask <= '1; end else begin if (valid_in) begin if (sent_all) begin - sent_mask <= '0; + rem_mask <= '1; end else begin - sent_mask <= sent_mask_n; + rem_mask <= rem_mask_n; end end end @@ -68,7 +68,7 @@ module VX_stream_unpack #( ) out_buf ( .clk (clk), .reset (reset), - .valid_in (valid_in && mask_in[i] && ~sent_mask[i]), + .valid_in (valid_in && mask_in[i] && rem_mask[i]), .ready_in (ready_out_r[i]), .data_in ({data_in[i], tag_in}), .data_out ({data_out[i], tag_out[i]}), @@ -76,13 +76,13 @@ module VX_stream_unpack #( .ready_out (ready_out[i]) ); end - + end else begin - + `UNUSED_VAR (clk) `UNUSED_VAR (reset) `UNUSED_VAR (mask_in) - assign valid_out = valid_in; + assign valid_out = valid_in; assign data_out = data_in; assign tag_out = tag_in; assign ready_in = ready_out; From d74ee43a662fbd94a91d45a9836f90bba2e3f061 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 14:19:09 -0700 Subject: [PATCH 088/488] minor update --- hw/rtl/core/VX_dispatch.sv | 3 +-- hw/rtl/core/VX_lmem_unit.sv | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index 04c3d92bf..dcc15d5e3 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -60,8 +60,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (DATAW), .SIZE (2), - .OUT_REG (2), // 2-cycle LUT EB for area reduction - .LUTRAM (1) + .OUT_REG (1) ) buffer ( .clk (clk), .reset (buffer_reset), diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv index d93befda7..6b53a7d7d 100644 --- a/hw/rtl/core/VX_lmem_unit.sv +++ b/hw/rtl/core/VX_lmem_unit.sv @@ -57,7 +57,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (REQ_DATAW), .SIZE (2), - .OUT_REG (1) + .OUT_REG (3) ) req_global_buf ( .clk (clk), .reset (block_reset[i]), From 47427ab22e20984d615f4574afde1a6cdd4f053d Mon Sep 17 00:00:00 2001 From: sij814 Date: Mon, 12 Aug 2024 16:22:30 -0700 Subject: [PATCH 089/488] regression test with source_id 0 --- sim/common/dram_sim.cpp | 2 -- sim/simx/cache_sim.cpp | 1 + sim/simx/mem_sim.cpp | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sim/common/dram_sim.cpp b/sim/common/dram_sim.cpp index aa6f882e1..684dd6f7d 100644 --- a/sim/common/dram_sim.cpp +++ b/sim/common/dram_sim.cpp @@ -67,8 +67,6 @@ public: std::cout.rdbuf(nullstream.rdbuf()); ramulator_frontend_->finalize(); ramulator_memorysystem_->finalize(); - delete ramulator_frontend_; - delete ramulator_memorysystem_; std::cout.rdbuf(original_buf); } diff --git a/sim/simx/cache_sim.cpp b/sim/simx/cache_sim.cpp index d7d1727f6..ca98c1e5f 100644 --- a/sim/simx/cache_sim.cpp +++ b/sim/simx/cache_sim.cpp @@ -340,6 +340,7 @@ public: bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0)); } } else { + // TODO: Change this into a crossbar uint32_t max = MAX(2, config_.num_inputs); //printf("%s connecting\n", simobject_->name().c_str()); //3 diff --git a/sim/simx/mem_sim.cpp b/sim/simx/mem_sim.cpp index 6d8015d1f..04395683a 100644 --- a/sim/simx/mem_sim.cpp +++ b/sim/simx/mem_sim.cpp @@ -70,7 +70,7 @@ public: auto enqueue_success = dram_sim_.send_request( mem_req.write, mem_req.addr, - i, + 0, [](void* arg) { auto rsp_args = reinterpret_cast(arg); // only send a response for read requests From 2edda834c39dd7f0ce393d6c6e964d9025fe00d2 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 18:11:21 -0700 Subject: [PATCH 090/488] minor update --- hw/rtl/libs/VX_mem_coalescer.sv | 12 ++---------- hw/rtl/libs/VX_stream_unpack.sv | 6 +----- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index e6ca41e4b..5b646dcb0 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -185,7 +185,6 @@ module VX_mem_coalescer #( always @(*) begin state_n = state_r; - out_req_valid_n = out_req_valid_r; out_req_mask_n = out_req_mask_r; out_req_rw_n = out_req_rw_r; @@ -194,7 +193,6 @@ module VX_mem_coalescer #( out_req_byteen_n = out_req_byteen_r; out_req_data_n = out_req_data_r; out_req_tag_n = out_req_tag_r; - processed_mask_n = processed_mask_r; in_req_ready_n = 0; @@ -209,6 +207,7 @@ module VX_mem_coalescer #( end end default/*STATE_SEND*/: begin + state_n = STATE_WAIT; out_req_valid_n = 1; out_req_mask_n = batch_valid_r; out_req_rw_n = in_req_rw; @@ -217,15 +216,8 @@ module VX_mem_coalescer #( out_req_byteen_n= req_byteen_merged; out_req_data_n = req_data_merged; out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; - + processed_mask_n= is_last_batch ? '0 (processed_mask_r | current_pmask); in_req_ready_n = is_last_batch; - - if (is_last_batch) begin - processed_mask_n = '0; - end else begin - processed_mask_n = processed_mask_r | current_pmask; - end - state_n = STATE_WAIT; end endcase end diff --git a/hw/rtl/libs/VX_stream_unpack.sv b/hw/rtl/libs/VX_stream_unpack.sv index 6a6aa0e9e..c81b30099 100644 --- a/hw/rtl/libs/VX_stream_unpack.sv +++ b/hw/rtl/libs/VX_stream_unpack.sv @@ -49,11 +49,7 @@ module VX_stream_unpack #( rem_mask <= '1; end else begin if (valid_in) begin - if (sent_all) begin - rem_mask <= '1; - end else begin - rem_mask <= rem_mask_n; - end + rem_mask <= sent_all ? '1 : rem_mask_n; end end end From 14ae4b8c13ac225d5ee69904169d761996316f36 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 20:07:50 -0700 Subject: [PATCH 091/488] minor update --- hw/rtl/cache/VX_bank_flush.sv | 7 +------ hw/rtl/libs/VX_mem_coalescer.sv | 10 +++------- hw/rtl/libs/VX_priority_encoder.sv | 11 +++++++---- 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index 6c02c1e13..2d62e354c 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -114,12 +114,7 @@ module VX_bank_flush #( assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0]; if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin - reg [NUM_WAYS-1:0] flush_way_r; - always @(*) begin - flush_way_r = '0; - flush_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1; - end - assign flush_way = flush_way_r; + assign flush_way = NUM_WAYS'(1) << counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]; end else begin assign flush_way = {NUM_WAYS{1'b1}}; end diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 5b646dcb0..cd6bcb904 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -168,12 +168,8 @@ module VX_mem_coalescer #( for (integer i = 0; i < OUT_REQS; ++i) begin for (integer j = 0; j < DATA_RATIO; ++j) begin if (current_pmask[i * DATA_RATIO + j]) begin - for (integer k = 0; k < DATA_IN_SIZE; ++k) begin - if (in_req_byteen[DATA_RATIO * i + j][k]) begin - req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1; - req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8]; - end - end + req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]] = in_req_byteen[DATA_RATIO * i + j]; + req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]] = in_req_data[DATA_RATIO * i + j]; end end end @@ -216,7 +212,7 @@ module VX_mem_coalescer #( out_req_byteen_n= req_byteen_merged; out_req_data_n = req_data_merged; out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; - processed_mask_n= is_last_batch ? '0 (processed_mask_r | current_pmask); + processed_mask_n= is_last_batch ? '0 : (processed_mask_r | current_pmask); in_req_ready_n = is_last_batch; end endcase diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index 8bba538b1..43d7d80ba 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -73,11 +73,14 @@ module VX_priority_encoder #( end else if (MODEL == 2) begin - `IGNORE_WARNINGS_BEGIN + `IGNORE_UNOPTFLAT_BEGIN wire [N-1:0] higher_pri_regs; - `IGNORE_WARNINGS_END - assign higher_pri_regs[N-1:1] = higher_pri_regs[N-2:0] | reversed[N-2:0]; - assign higher_pri_regs[0] = 1'b0; + `IGNORE_UNOPTFLAT_END + + assign higher_pri_regs[0] = 1'b0; + for (genvar i = 1; i < N; ++i) begin + assign higher_pri_regs[i] = higher_pri_regs[i-1] | reversed[i-1]; + end assign onehot_out[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0]; VX_lzc #( From 6c1ee9bfea39505bec258b0f705aa4f79fb0dbf9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 20:08:08 -0700 Subject: [PATCH 092/488] arbiter fixes --- hw/rtl/libs/VX_cyclic_arbiter.sv | 31 +++++++++++++++++++------------ hw/rtl/libs/VX_generic_arbiter.sv | 2 ++ hw/rtl/libs/VX_rr_arbiter.sv | 2 +- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index c4a42da14..d721e5130 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -30,6 +30,7 @@ module VX_cyclic_arbiter #( `UNUSED_VAR (clk) `UNUSED_VAR (reset) + `UNUSED_VAR (grant_ready) assign grant_index = '0; assign grant_onehot = requests; @@ -39,29 +40,35 @@ module VX_cyclic_arbiter #( localparam IS_POW2 = (1 << LOG_NUM_REQS) == NUM_REQS; + wire [LOG_NUM_REQS-1:0] grant_index_um, grant_index_ql; reg [LOG_NUM_REQS-1:0] grant_index_r; always @(posedge clk) begin if (reset) begin grant_index_r <= '0; - end else begin - if (!IS_POW2 && grant_index_r == LOG_NUM_REQS'(NUM_REQS-1)) begin + end else if (grant_valid && grant_ready) begin + if (!IS_POW2 && grant_index_ql == LOG_NUM_REQS'(NUM_REQS-1)) begin grant_index_r <= '0; - end else if (~grant_valid || grant_ready) begin - grant_index_r <= grant_index_r + LOG_NUM_REQS'(1); + end else begin + grant_index_r <= grant_index_ql + LOG_NUM_REQS'(1); end end end - reg [NUM_REQS-1:0] grant_onehot_r; - always @(*) begin - grant_onehot_r = '0; - grant_onehot_r[grant_index_r] = 1'b1; - end + VX_priority_encoder #( + .N (NUM_REQS) + ) priority_encoder ( + .data_in (requests), + `UNUSED_PIN (onehot_out), + .index_out (grant_index_um), + `UNUSED_PIN (valid_out) + ); - assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; - assign grant_valid = requests[grant_index_r]; + assign grant_index_ql = requests[grant_index_r] ? grant_index_r : grant_index_um; + + assign grant_index = grant_index_ql; + assign grant_onehot = NUM_REQS'(1) << grant_index_ql; + assign grant_valid = (| requests); end diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index a3c4b71dd..f55b866f8 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -90,5 +90,7 @@ module VX_generic_arbiter #( end + `RUNTIME_ASSERT ((~grant_valid || (requests[grant_index] != 0)), ("invalid arbiter grant!")) + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 85cf96f9a..8c0fa0558 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -459,7 +459,7 @@ module VX_rr_arbiter #( grant_table[i] = 'x; for (integer j = NUM_REQS-1; j >= 0; --j) begin if (requests[(i+j+1) % NUM_REQS]) begin - grant_table[i] = LOG_NUM_REQS'(i+j+1); + grant_table[i] = LOG_NUM_REQS'((i+j+1) % NUM_REQS); end end end From 5126a7c472aecbc6be0a472b486aa6e694e9e63a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 21:32:20 -0700 Subject: [PATCH 093/488] minor update --- hw/rtl/VX_platform.vh | 7 +++++ hw/rtl/cache/VX_cache_data.sv | 2 +- hw/rtl/cache/VX_cache_tags.sv | 2 +- hw/rtl/cache/VX_cache_wrap.sv | 52 +++++++++++++++++------------------ hw/rtl/core/VX_alu_unit.sv | 2 +- hw/rtl/core/VX_core.sv | 4 +-- hw/rtl/core/VX_fpu_unit.sv | 2 +- hw/rtl/core/VX_ibuffer.sv | 2 +- hw/rtl/core/VX_lmem_unit.sv | 4 +-- hw/rtl/core/VX_lsu_unit.sv | 2 +- hw/rtl/core/VX_schedule.sv | 1 - hw/rtl/core/VX_split_join.sv | 2 +- 12 files changed, 44 insertions(+), 38 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index cd0550efa..730b3cd7d 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -50,8 +50,15 @@ `define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args `else `ifdef VERILATOR + +`ifndef TRACING_ALL `define TRACING_ON /* verilator tracing_on */ `define TRACING_OFF /* verilator tracing_off */ +`else +`define TRACING_ON +`define TRACING_OFF +`endif + `ifndef NDEBUG `define DEBUG_BLOCK(x) /* verilator lint_off UNUSED */ \ x \ diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index a114e1689..efc873f41 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -75,7 +75,7 @@ module VX_cache_data #( wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata; wire [`LOG2UP(NUM_WAYS)-1:0] way_idx; - if (WRITEBACK) begin + if (WRITEBACK) begin : dirty_bytes if (DIRTY_BYTES) begin wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata; wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata; diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 7fef69be6..6c6ac92f2 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -100,7 +100,7 @@ module VX_cache_tags #( wire fill_s = fill && (!WRITEBACK || ~stall); wire flush_s = flush && (!WRITEBACK || ~stall); - for (genvar i = 0; i < NUM_WAYS; ++i) begin + for (genvar i = 0; i < NUM_WAYS; ++i) begin : ways wire do_fill = fill_s && evict_way[i]; wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 37940297f..afae06181 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -103,7 +103,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .TAG_WIDTH (CACHE_MEM_TAG_WIDTH) ) mem_bus_cache_if(); - if (NC_OR_BYPASS) begin + if (NC_OR_BYPASS) begin : bypass_if `RESET_RELAY (nc_bypass_reset, reset); @@ -148,31 +148,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_cache_if); end - if (PASSTHRU != 0) begin - - for (genvar i = 0; i < NUM_REQS; ++i) begin - `UNUSED_VAR (core_bus_cache_if[i].req_valid) - `UNUSED_VAR (core_bus_cache_if[i].req_data) - assign core_bus_cache_if[i].req_ready = 0; - - assign core_bus_cache_if[i].rsp_valid = 0; - assign core_bus_cache_if[i].rsp_data = '0; - `UNUSED_VAR (core_bus_cache_if[i].rsp_ready) - end - - assign mem_bus_cache_if.req_valid = 0; - assign mem_bus_cache_if.req_data = '0; - `UNUSED_VAR (mem_bus_cache_if.req_ready) - - `UNUSED_VAR (mem_bus_cache_if.rsp_valid) - `UNUSED_VAR (mem_bus_cache_if.rsp_data) - assign mem_bus_cache_if.rsp_ready = 0; - - `ifdef PERF_ENABLE - assign cache_perf = '0; - `endif - - end else begin + if (PASSTHRU == 0) begin : cache_if `RESET_RELAY (cache_reset, reset); @@ -205,6 +181,30 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .mem_bus_if (mem_bus_cache_if) ); + end else begin + + for (genvar i = 0; i < NUM_REQS; ++i) begin + `UNUSED_VAR (core_bus_cache_if[i].req_valid) + `UNUSED_VAR (core_bus_cache_if[i].req_data) + assign core_bus_cache_if[i].req_ready = 0; + + assign core_bus_cache_if[i].rsp_valid = 0; + assign core_bus_cache_if[i].rsp_data = '0; + `UNUSED_VAR (core_bus_cache_if[i].rsp_ready) + end + + assign mem_bus_cache_if.req_valid = 0; + assign mem_bus_cache_if.req_data = '0; + `UNUSED_VAR (mem_bus_cache_if.req_ready) + + `UNUSED_VAR (mem_bus_cache_if.rsp_valid) + `UNUSED_VAR (mem_bus_cache_if.rsp_data) + assign mem_bus_cache_if.rsp_ready = 0; + + `ifdef PERF_ENABLE + assign cache_perf = '0; + `endif + end `ifdef DBG_TRACE_CACHE diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 70eab1529..72ef74b9c 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -55,7 +55,7 @@ module VX_alu_unit #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : alu_blocks `RESET_RELAY_EN (block_reset, reset,(BLOCK_SIZE > 1)); diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 83af50f16..d8cd804f9 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -232,7 +232,7 @@ module VX_core import VX_gpu_pkg::*; #( `endif - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescer_blocks VX_lsu_mem_if #( .NUM_LANES (DCACHE_CHANNELS), @@ -240,7 +240,7 @@ module VX_core import VX_gpu_pkg::*; #( .TAG_WIDTH (DCACHE_TAG_WIDTH) ) dcache_coalesced_if(); - if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin + if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if `RESET_RELAY (mem_coalescer_reset, reset); diff --git a/hw/rtl/core/VX_fpu_unit.sv b/hw/rtl/core/VX_fpu_unit.sv index 496b24e29..127ba9755 100644 --- a/hw/rtl/core/VX_fpu_unit.sv +++ b/hw/rtl/core/VX_fpu_unit.sv @@ -53,7 +53,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : fpu_blocks `UNUSED_VAR (per_block_execute_if[block_idx].data.tid) `UNUSED_VAR (per_block_execute_if[block_idx].data.wb) diff --git a/hw/rtl/core/VX_ibuffer.sv b/hw/rtl/core/VX_ibuffer.sv index e8edf64c7..6f068d45f 100644 --- a/hw/rtl/core/VX_ibuffer.sv +++ b/hw/rtl/core/VX_ibuffer.sv @@ -35,7 +35,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #( wire [PER_ISSUE_WARPS-1:0] ibuf_ready_in; assign decode_if.ready = ibuf_ready_in[decode_if.data.wid]; - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : ibuf_slices VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`IBUF_SIZE), diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv index 6b53a7d7d..0b524c540 100644 --- a/hw/rtl/core/VX_lmem_unit.sv +++ b/hw/rtl/core/VX_lmem_unit.sv @@ -41,7 +41,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( `RESET_RELAY_EX (block_reset, reset, `NUM_LSU_BLOCKS, 1); - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : demux_slices wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin @@ -151,7 +151,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (LSU_TAG_WIDTH) ) lmem_bus_if[LSU_NUM_REQS](); - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : adapter_slices VX_mem_bus_if #( .DATA_SIZE (LSU_WORD_SIZE), .TAG_WIDTH (LSU_TAG_WIDTH) diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index d40f5fcfb..5e280e48f 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -54,7 +54,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsu_slices + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsu_blocks `RESET_RELAY (slice_reset, reset); diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 71a74c6ac..11a62469b 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -386,7 +386,6 @@ module VX_schedule import VX_gpu_pkg::*; #( `RESET_RELAY_EX (pending_instr_reset, reset, `NUM_WARPS, `MAX_FANOUT); for (genvar i = 0; i < `NUM_WARPS; ++i) begin - VX_pending_size #( .SIZE (4096), .ALM_EMPTY (1) diff --git a/hw/rtl/core/VX_split_join.sv b/hw/rtl/core/VX_split_join.sv index 7f887e602..9f47023b0 100644 --- a/hw/rtl/core/VX_split_join.sv +++ b/hw/rtl/core/VX_split_join.sv @@ -45,7 +45,7 @@ module VX_split_join import VX_gpu_pkg::*; #( wire ipdom_push = valid && split.valid && split.is_dvg; wire ipdom_pop = valid && sjoin.valid && sjoin_is_dvg; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : ipdom_slices `RESET_RELAY (ipdom_reset, reset); From 3ae3afc59be69a04b3c3cfae1c26207cd95a7c29 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 12 Aug 2024 21:34:41 -0700 Subject: [PATCH 094/488] minor update --- hw/rtl/libs/VX_mem_coalescer.sv | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index cd6bcb904..9ef462223 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -168,8 +168,13 @@ module VX_mem_coalescer #( for (integer i = 0; i < OUT_REQS; ++i) begin for (integer j = 0; j < DATA_RATIO; ++j) begin if (current_pmask[i * DATA_RATIO + j]) begin - req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]] = in_req_byteen[DATA_RATIO * i + j]; - req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]] = in_req_data[DATA_RATIO * i + j]; + for (integer k = 0; k < DATA_IN_SIZE; ++k) begin + // perform byte-level merge since each thread may have different bytes enabled + if (in_req_byteen[DATA_RATIO * i + j][k]) begin + req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1; + req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8]; + end + end end end end From 76f4cd66d3e3d9fe60b13c929c8287738e4605cc Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 13 Aug 2024 03:08:48 -0700 Subject: [PATCH 095/488] minor update --- hw/rtl/core/VX_schedule.sv | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 11a62469b..46fad97be 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -356,7 +356,9 @@ module VX_schedule import VX_gpu_pkg::*; #( `endif VX_elastic_buffer #( - .DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH) + .DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH), + .SIZE (2), // need a skid buffer to buffer out schedule_ready + .OUT_REG (1) // should be registered for BRAM acces in fetch unit ) out_buf ( .clk (clk), .reset (reset), From 1a9a04ac7602ed7c88f799af5a179bc900ca0efe Mon Sep 17 00:00:00 2001 From: donghanyuan Date: Tue, 13 Aug 2024 18:06:53 +0800 Subject: [PATCH 096/488] replace local static allocator to global static Ensure MemoryPool construct before SimPlatform, thus MemoryPool destruct after SimPlatform. Avoid use-after-free issue clearing events_ of SimPlatform after SimPortEvent's allocator is destructed. --- sim/common/simobject.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/sim/common/simobject.h b/sim/common/simobject.h index f4c84e3f3..31fc4c0e6 100644 --- a/sim/common/simobject.h +++ b/sim/common/simobject.h @@ -168,23 +168,23 @@ public: {} void* operator new(size_t /*size*/) { - return allocator().allocate(); + return allocator_.allocate(); } void operator delete(void* ptr) { - allocator().deallocate(ptr); + allocator_.deallocate(ptr); } protected: Func func_; Pkt pkt_; - static MemoryPool>& allocator() { - static MemoryPool> instance(64); - return instance; - } + static MemoryPool> allocator_; }; +template +MemoryPool> SimCallEvent::allocator_(64); + /////////////////////////////////////////////////////////////////////////////// template @@ -201,23 +201,23 @@ public: {} void* operator new(size_t /*size*/) { - return allocator().allocate(); + return allocator_.allocate(); } void operator delete(void* ptr) { - allocator().deallocate(ptr); + allocator_.deallocate(ptr); } protected: const SimPort* port_; Pkt pkt_; - static MemoryPool>& allocator() { - static MemoryPool> instance(64); - return instance; - } + static MemoryPool> allocator_; }; +template +MemoryPool> SimPortEvent::allocator_(64); + /////////////////////////////////////////////////////////////////////////////// class SimContext; From ee39da74b4951d829a7bde787bcd23d2b49be948 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 13 Aug 2024 04:14:02 -0700 Subject: [PATCH 097/488] increasing reset delay --- hw/rtl/VX_config.vh | 2 +- hw/rtl/core/VX_lmem_unit.sv | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 8d1c280fd..d46c679e9 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -214,7 +214,7 @@ `endif `define STACK_SIZE (1 << `STACK_LOG2_SIZE) -`define RESET_DELAY 8 +`define RESET_DELAY 16 `ifndef STALL_TIMEOUT `define STALL_TIMEOUT (100000 * (1 ** (`L2_ENABLED + `L3_ENABLED))) diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv index 0b524c540..988133cc1 100644 --- a/hw/rtl/core/VX_lmem_unit.sv +++ b/hw/rtl/core/VX_lmem_unit.sv @@ -37,7 +37,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .NUM_LANES (`NUM_LSU_LANES), .DATA_SIZE (LSU_WORD_SIZE), .TAG_WIDTH (LSU_TAG_WIDTH) - ) lsu_switch_if[`NUM_LSU_BLOCKS](); + ) lsu_lmem_if[`NUM_LSU_BLOCKS](); `RESET_RELAY_EX (block_reset, reset, `NUM_LSU_BLOCKS, 1); @@ -103,17 +103,17 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( lsu_mem_in_if[i].req_data.tag }), .ready_in (req_local_ready), - .valid_out (lsu_switch_if[i].req_valid), + .valid_out (lsu_lmem_if[i].req_valid), .data_out ({ - lsu_switch_if[i].req_data.mask, - lsu_switch_if[i].req_data.rw, - lsu_switch_if[i].req_data.byteen, - lsu_switch_if[i].req_data.addr, - lsu_switch_if[i].req_data.flags, - lsu_switch_if[i].req_data.data, - lsu_switch_if[i].req_data.tag + lsu_lmem_if[i].req_data.mask, + lsu_lmem_if[i].req_data.rw, + lsu_lmem_if[i].req_data.byteen, + lsu_lmem_if[i].req_data.addr, + lsu_lmem_if[i].req_data.flags, + lsu_lmem_if[i].req_data.data, + lsu_lmem_if[i].req_data.tag }), - .ready_out (lsu_switch_if[i].req_ready) + .ready_out (lsu_lmem_if[i].req_ready) ); assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global) @@ -128,15 +128,15 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .clk (clk), .reset (block_reset[i]), .valid_in ({ - lsu_switch_if[i].rsp_valid, + lsu_lmem_if[i].rsp_valid, lsu_mem_out_if[i].rsp_valid }), .ready_in ({ - lsu_switch_if[i].rsp_ready, + lsu_lmem_if[i].rsp_ready, lsu_mem_out_if[i].rsp_ready }), .data_in ({ - lsu_switch_if[i].rsp_data, + lsu_lmem_if[i].rsp_data, lsu_mem_out_if[i].rsp_data }), .data_out (lsu_mem_in_if[i].rsp_data), @@ -168,7 +168,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( ) lsu_adapter ( .clk (clk), .reset (block_reset[i]), - .lsu_mem_if (lsu_switch_if[i]), + .lsu_mem_if (lsu_lmem_if[i]), .mem_bus_if (lmem_bus_tmp_if) ); From 19b5496f00a31b77aba1e3169e9e68381f390efa Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Tue, 13 Aug 2024 17:54:06 -0400 Subject: [PATCH 098/488] modify makefile to only compile simx --- Makefile.in | 8 ++++++++ ci/toolchain_env.sh.in | 4 ++-- runtime/Makefile | 2 ++ sim/Makefile | 3 +++ 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Makefile.in b/Makefile.in index 82a2ebdb5..7f594747a 100644 --- a/Makefile.in +++ b/Makefile.in @@ -2,6 +2,14 @@ include config.mk .PHONY: build software tests +vm: + $(MAKE) -C $(VORTEX_HOME)/third_party + $(MAKE) -C hw + $(MAKE) -C sim simx + $(MAKE) -C kernel + $(MAKE) -C runtime vm + $(MAKE) -C tests + all: $(MAKE) -C $(VORTEX_HOME)/third_party $(MAKE) -C hw diff --git a/ci/toolchain_env.sh.in b/ci/toolchain_env.sh.in index dc50389a9..be140d28d 100755 --- a/ci/toolchain_env.sh.in +++ b/ci/toolchain_env.sh.in @@ -16,8 +16,8 @@ TOOLDIR=${TOOLDIR:=@TOOLDIR@} -export VERILATOR_ROOT=$TOOLDIR/verilator -export PATH=$VERILATOR_ROOT/bin:$PATH +# export VERILATOR_ROOT=$TOOLDIR/verilator +# export PATH=$VERILATOR_ROOT/bin:$PATH export SV2V_PATH=$TOOLDIR/sv2v export PATH=$SV2V_PATH/bin:$PATH diff --git a/runtime/Makefile b/runtime/Makefile index e5f8af74c..aecac00e1 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -3,6 +3,8 @@ include $(ROOT_DIR)/config.mk all: stub rtlsim simx opae xrt +vm: stub simx + stub: $(MAKE) -C stub diff --git a/sim/Makefile b/sim/Makefile index e16486e8f..4d5ea89c1 100644 --- a/sim/Makefile +++ b/sim/Makefile @@ -1,6 +1,9 @@ ROOT_DIR := $(realpath ..) include $(ROOT_DIR)/config.mk +simx: + $(MAKE) -C simx + all: $(MAKE) -C simx $(MAKE) -C rtlsim From 7528dd9c0fc9cd7eb3bff92d427c0f765c88cab7 Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Tue, 13 Aug 2024 18:18:54 -0400 Subject: [PATCH 099/488] debug and remove travis.yml --- .travis.yml | 118 ---------------------------------------- runtime/simx/vortex.cpp | 2 +- 2 files changed, 1 insertion(+), 119 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 57098c8f0..000000000 --- a/.travis.yml +++ /dev/null @@ -1,118 +0,0 @@ -language: cpp -dist: focal -os: linux -compiler: gcc - -addons: - apt: - packages: - - build-essential - - valgrind - - libstdc++6 - - binutils - - python - - uuid-dev - -env: - global: - - TOOLDIR=$HOME/tools - -cache: - directories: - - $TOOLDIR - - $HOME/third_party - - $HOME/build32 - - $HOME/build64 - -before_install: - - if [ ! -d "$TOOLDIR" ] || [ -z "$(ls -A $TOOLDIR)" ] || [ "$(cat "$TOOLDIR/version.txt")" != "v0.4" ]; then - rm -rf $TOOLDIR; - mkdir -p $TRAVIS_BUILD_DIR/build && cd $TRAVIS_BUILD_DIR/build; - ../configure --tooldir=$TOOLDIR; - ci/toolchain_install.sh --all; - echo "v0.3" > "$TOOLDIR/version.txt"; - else - echo "using existing tooldir build"; - fi - - if [ ! -d "$HOME/third_party" ] || [ -z "$(ls -A $HOME/third_party)" ] || [ "$(cat "$HOME/third_party/version.txt")" != "v0.2" ]; then - cd $TRAVIS_BUILD_DIR; - make -C third_party > /dev/null; - echo "v0.2" > "third_party/version.txt"; - cp -rf third_party $HOME; - else - echo "using existing third_party build"; - cp -rf $HOME/third_party $TRAVIS_BUILD_DIR; - fi - -install: - - if [ ! -d "$HOME/build$XLEN" ] || [ -z "$(ls -A $HOME/build$XLEN)" ] || [ "$(cat "$HOME/build$XLEN/version.txt")" != "$TRAVIS_COMMIT" ]; then - mkdir -p $TRAVIS_BUILD_DIR/build$XLEN && cd $TRAVIS_BUILD_DIR/build$XLEN; - ../configure --tooldir=$TOOLDIR --xlen=$XLEN; - source ci/toolchain_env.sh; - make build -s > /dev/null; - echo "$TRAVIS_COMMIT" > version.txt; - cp -rf $TRAVIS_BUILD_DIR/build$XLEN $HOME; - else - echo "using existing build for commit $TRAVIS_COMMIT"; - cp -rf $HOME/build$XLEN $TRAVIS_BUILD_DIR; - fi - -before_script: - - cd $TRAVIS_BUILD_DIR/build$XLEN - - source ci/toolchain_env.sh - -stages: - - test - -jobs: - include: - - stage: test - name: regression32 - env: XLEN=32 - script: - - ./ci/travis_run.py ./ci/regression.sh --unittest - - ./ci/travis_run.py ./ci/regression.sh --isa - - ./ci/travis_run.py ./ci/regression.sh --kernel - - ./ci/travis_run.py ./ci/regression.sh --synthesis - - ./ci/travis_run.py ./ci/regression.sh --regression - - ./ci/travis_run.py ./ci/regression.sh --opencl - - - stage: test - name: regression64 - env: XLEN=64 - script: - - ./ci/travis_run.py ./ci/regression.sh --isa - - ./ci/travis_run.py ./ci/regression.sh --kernel - - ./ci/travis_run.py ./ci/regression.sh --synthesis - - ./ci/travis_run.py ./ci/regression.sh --regression - - ./ci/travis_run.py ./ci/regression.sh --opencl - - - stage: test - name: config - env: XLEN=32 - script: - - ./ci/travis_run.py ./ci/regression.sh --cluster - - ./ci/travis_run.py ./ci/regression.sh --config - - - stage: test - name: debug - env: XLEN=32 - script: - - ./ci/travis_run.py ./ci/regression.sh --debug - - ./ci/travis_run.py ./ci/regression.sh --stress - - - stage: test - name: virtual_memory - env: XLEN=32 - env: VM_DISABLE=1 - script: - - ./ci/travis_run.py ./ci/regression.sh --regression - - ./ci/travis_run.py ./ci/regression.sh --opencl - - - stage: test - name: virtual_memory - env: XLEN=64 - env: VM_DISABLE=1 - script: - - ./ci/travis_run.py ./ci/regression.sh --regression - - ./ci/travis_run.py ./ci/regression.sh --opencl \ No newline at end of file diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index f9143cf0f..e5ec36b60 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -120,7 +120,7 @@ public: uint64_t map_p2v(uint64_t ppn, uint32_t flags) { DBGPRINT(" [RT:MAP_P2V] ppn: %lx\n", ppn); - if (addr_mapping.contains(ppn)) return addr_mapping[ppn]; + if (addr_mapping.find(ppn) != addr_mapping.end()) return addr_mapping[ppn]; // If ppn to vpn mapping doesnt exist, create mapping DBGPRINT(" [RT:MAP_P2V] Not found. Allocate new page table or update a PTE.\n"); From ea34239b4361c356c93495e2a3a7b0dfd335f9f1 Mon Sep 17 00:00:00 2001 From: sij814 Date: Tue, 13 Aug 2024 16:52:27 -0700 Subject: [PATCH 100/488] changes made for initial feedback --- hw/rtl/VX_config.vh | 9 +++++++++ hw/rtl/VX_types.vh | 9 ++++----- runtime/include/vortex.h | 2 +- runtime/simx/vortex.cpp | 4 ++-- runtime/stub/utils.cpp | 34 ++++++++++++++-------------------- sim/simx/cache_sim.cpp | 4 ++-- sim/simx/constants.h | 4 ---- sim/simx/emulator.cpp | 5 ++--- sim/simx/mem_sim.cpp | 6 +++--- sim/simx/processor.cpp | 4 ++-- 10 files changed, 39 insertions(+), 42 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 615c1ae6d..3ff9e3a54 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -650,6 +650,15 @@ `define L3_WRITEBACK 0 `endif +`ifndef MEMORY_BANKS +`define MEMORY_BANKS 8 +`endif + +// Number of Memory Ports from LLC +`ifndef NUM_MEM_PORTS +`define NUM_MEM_PORTS `MIN(`MEMORY_BANKS, `L3_NUM_BANKS) +`endif + // ISA Extensions ///////////////////////////////////////////////////////////// `ifdef EXT_A_ENABLE diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index 685051b6c..2eac22a5a 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -166,6 +166,10 @@ `define VX_CSR_MPM_MEM_WRITES_H 12'hB99 `define VX_CSR_MPM_MEM_LT 12'hB1A // memory latency `define VX_CSR_MPM_MEM_LT_H 12'hB9A +`define VX_CSR_MPM_MEM_BANK_CNTR 12'hB1E // memory bank requests +`define VX_CSR_MPM_MEM_BANK_CNTR_H 12'hB9E +`define VX_CSR_MPM_MEM_BANK_TICK 12'hB1F // memory ticks +`define VX_CSR_MPM_MEM_BANK_TICK_H 12'hB9F // PERF: lmem `define VX_CSR_MPM_LMEM_READS 12'hB1B // memory reads `define VX_CSR_MPM_LMEM_READS_H 12'hB9B @@ -173,11 +177,6 @@ `define VX_CSR_MPM_LMEM_WRITES_H 12'hB9C `define VX_CSR_MPM_LMEM_BANK_ST 12'hB1D // bank conflicts `define VX_CSR_MPM_LMEM_BANK_ST_H 12'hB9D -// PERF: hbm -`define VX_CSR_HBM_BANK_CNTR 12'hB1E // hbm banks -`define VX_CSR_HBM_BANK_CNTR_H 12'hB9E -`define VX_CSR_HBM_BANK_TICK 12'hB1F // hbm ticks -`define VX_CSR_HBM_BANK_TICK_H 12'hB9F // Machine Performance-monitoring memory counters (class 3) /////////////////// // diff --git a/runtime/include/vortex.h b/runtime/include/vortex.h index bf263da09..853da5994 100644 --- a/runtime/include/vortex.h +++ b/runtime/include/vortex.h @@ -34,7 +34,7 @@ typedef void* vx_buffer_h; #define VX_CAPS_GLOBAL_MEM_SIZE 0x5 #define VX_CAPS_LOCAL_MEM_SIZE 0x6 #define VX_CAPS_ISA_FLAGS 0x7 -#define VX_CAPS_L3CACHE_NUM_BANKS 0x8 +#define VX_CAPS_NUM_MEM_BANKS 0x8 // device isa flags #define VX_ISA_STD_A (1ull << ISA_STD_A) diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index be7173fc3..70ceb7fc4 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -81,8 +81,8 @@ public: case VX_CAPS_ISA_FLAGS: _value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD; break; - case VX_CAPS_L3CACHE_NUM_BANKS: - _value = L3_NUM_BANKS; + case VX_CAPS_NUM_MEM_BANKS: + _value = MEMORY_BANKS; break; default: std::cout << "invalid caps id: " << caps_id << std::endl; diff --git a/runtime/stub/utils.cpp b/runtime/stub/utils.cpp index ae894fcbb..c1f75f092 100644 --- a/runtime/stub/utils.cpp +++ b/runtime/stub/utils.cpp @@ -211,10 +211,8 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { uint64_t mem_reads = 0; uint64_t mem_writes = 0; uint64_t mem_lat = 0; - - // PERF: hbm - uint64_t hbm_counter = 0; - uint64_t hbm_ticks = 0; + uint64_t mem_req_counter = 0; + uint64_t mem_ticks = 0; uint64_t num_cores; CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), { @@ -225,9 +223,9 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_ISA_FLAGS, &isa_flags), { return err; }); - - uint64_t l3cache_banks; - CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_L3CACHE_NUM_BANKS, &l3cache_banks), { + + uint64_t num_mem_bank_ports; + CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_MEM_BANKS, &num_mem_bank_ports), { return err; }); @@ -531,14 +529,6 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_L3CACHE_MSHR_ST, core_id, &l3cache_mshr_stalls), { return err; }); - - // PERF: HBM - CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_HBM_BANK_CNTR, core_id, &hbm_counter), { - return err; - }); - CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_HBM_BANK_TICK, core_id, &hbm_ticks), { - return err; - }); } // PERF: memory CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_READS, core_id, &mem_reads), { @@ -550,6 +540,12 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_LT, core_id, &mem_lat), { return err; }); + CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_CNTR, core_id, &mem_req_counter), { + return err; + }); + CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_TICK, core_id, &mem_ticks), { + return err; + }); } } break; default: @@ -616,22 +612,20 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { int read_hit_ratio = calcRatio(l3cache_read_misses, l3cache_reads); int write_hit_ratio = calcRatio(l3cache_write_misses, l3cache_writes); int bank_utilization = calcAvgPercent(l3cache_reads + l3cache_writes, l3cache_reads + l3cache_writes + l3cache_bank_stalls); - int mshr_utilization = calcAvgPercent(l3cache_read_misses + l3cache_write_misses, l3cache_read_misses + l3cache_write_misses + l3cache_mshr_stalls); + int mshr_utilization = calcAvgPercent(l3cache_read_misses + l3cache_write_misses, l3cache_read_misses + l3cache_write_misses + l3cache_mshr_stalls); fprintf(stream, "PERF: l3cache reads=%ld\n", l3cache_reads); fprintf(stream, "PERF: l3cache writes=%ld\n", l3cache_writes); fprintf(stream, "PERF: l3cache read misses=%ld (hit ratio=%d%%)\n", l3cache_read_misses, read_hit_ratio); fprintf(stream, "PERF: l3cache write misses=%ld (hit ratio=%d%%)\n", l3cache_write_misses, write_hit_ratio); fprintf(stream, "PERF: l3cache bank stalls=%ld (utilization=%d%%)\n", l3cache_bank_stalls, bank_utilization); fprintf(stream, "PERF: l3cache mshr stalls=%ld (utilization=%d%%)\n", l3cache_mshr_stalls, mshr_utilization); - - // HBM - float util = (float)hbm_counter / (hbm_ticks * l3cache_banks) * 100; - fprintf(stream, "PERF: hbm bank utilization=%f\n", util); } int mem_avg_lat = caclAverage(mem_lat, mem_reads); + int memory_bank_port_utilization = calcAvgPercent(mem_req_counter, (mem_ticks * num_mem_bank_ports)); fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes); fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat); + fprintf(stream, "PERF: memory bank port utilization=%d%%\n", memory_bank_port_utilization); } break; default: break; diff --git a/sim/simx/cache_sim.cpp b/sim/simx/cache_sim.cpp index ca98c1e5f..4f357f195 100644 --- a/sim/simx/cache_sim.cpp +++ b/sim/simx/cache_sim.cpp @@ -722,8 +722,8 @@ CacheSim::CacheSim(const SimContext& ctx, const char* name, const Config& config : SimObject(ctx, name) , CoreReqPorts(config.num_inputs, this) , CoreRspPorts(config.num_inputs, this) - , MemReqPorts((1 << config.B), this) - , MemRspPorts((1 << config.B), this) + , MemReqPorts(NUM_MEM_PORTS, this) + , MemRspPorts(NUM_MEM_PORTS, this) , impl_(new Impl(this, config)) {} diff --git a/sim/simx/constants.h b/sim/simx/constants.h index 81a626b84..0c707b55c 100644 --- a/sim/simx/constants.h +++ b/sim/simx/constants.h @@ -21,10 +21,6 @@ #define MEM_CLOCK_RATIO 1 #endif -#ifndef MEMORY_BANKS -#define MEMORY_BANKS 8 -#endif - #define LSU_WORD_SIZE (XLEN / 8) #define LSU_CHANNELS NUM_LSU_LANES #define LSU_NUM_REQS (NUM_LSU_BLOCKS * LSU_CHANNELS) diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index d76113249..3dfdf420b 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -451,13 +451,12 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { CSR_READ_64(VX_CSR_MPM_MEM_READS, proc_perf.mem_reads); CSR_READ_64(VX_CSR_MPM_MEM_WRITES, proc_perf.mem_writes); CSR_READ_64(VX_CSR_MPM_MEM_LT, proc_perf.mem_latency); + CSR_READ_64(VX_CSR_MPM_MEM_BANK_CNTR, proc_perf.memsim.counter); + CSR_READ_64(VX_CSR_MPM_MEM_BANK_TICK, proc_perf.memsim.ticks); CSR_READ_64(VX_CSR_MPM_LMEM_READS, lmem_perf.reads); CSR_READ_64(VX_CSR_MPM_LMEM_WRITES, lmem_perf.writes); CSR_READ_64(VX_CSR_MPM_LMEM_BANK_ST, lmem_perf.bank_stalls); - - CSR_READ_64(VX_CSR_HBM_BANK_CNTR, proc_perf.memsim.counter); - CSR_READ_64(VX_CSR_HBM_BANK_TICK, proc_perf.memsim.ticks); } } break; default: { diff --git a/sim/simx/mem_sim.cpp b/sim/simx/mem_sim.cpp index 04395683a..c1ff87680 100644 --- a/sim/simx/mem_sim.cpp +++ b/sim/simx/mem_sim.cpp @@ -59,7 +59,7 @@ public: dram_sim_.tick(); uint32_t counter = 0; - for (uint32_t i = 0; i < L3_NUM_BANKS; ++i) { + for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) { if (simobject_->MemReqPorts.at(i).empty()) continue; @@ -107,8 +107,8 @@ public: MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config) : SimObject(ctx, name) - , MemReqPorts(L3_NUM_BANKS, this) - , MemRspPorts(L3_NUM_BANKS, this) + , MemReqPorts(NUM_MEM_PORTS, this) + , MemRspPorts(NUM_MEM_PORTS, this) , impl_(new Impl(this, config)) {} diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index b3664f3fa..58fabf14c 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -47,7 +47,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch) ); // connect L3 memory ports - for (uint32_t i = 0; i < L3_NUM_BANKS; ++i) { + for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) { l3cache_->MemReqPorts.at(i).bind(&memsim_->MemReqPorts.at(i)); memsim_->MemRspPorts.at(i).bind(&l3cache_->MemRspPorts.at(i)); } @@ -61,7 +61,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch) } // set up memory profiling - for (uint32_t i = 0; i < L3_NUM_BANKS; ++i) { + for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) { memsim_->MemReqPorts.at(i).tx_callback([&](const MemReq& req, uint64_t cycle){ __unused (cycle); perf_mem_reads_ += !req.write; From d6f1393627daad324c9ebe1e01fa07899bc6b763 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 13 Aug 2024 18:34:06 -0700 Subject: [PATCH 101/488] memory coalescer timing optimization --- hw/rtl/libs/VX_mem_coalescer.sv | 18 ++++++++++-------- hw/rtl/libs/VX_pipe_register.sv | 9 ++++++--- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 9ef462223..17e5923bd 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -113,12 +113,13 @@ module VX_mem_coalescer #( logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n; logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] seed_flags_r, seed_flags_n; logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n; - logic [NUM_REQS-1:0] processed_mask_r, processed_mask_n; + logic [NUM_REQS-1:0] req_rem_mask_r, req_rem_mask_n; wire [OUT_REQS-1:0][NUM_REQS_W-1:0] seed_idx; wire [NUM_REQS-1:0][OUT_ADDR_WIDTH-1:0] in_addr_base; wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] in_addr_offset; + for (genvar i = 0; i < NUM_REQS; i++) begin assign in_addr_base[i] = in_req_addr[i][ADDR_WIDTH-1:DATA_RATIO_W]; assign in_addr_offset[i] = in_req_addr[i][DATA_RATIO_W-1:0]; @@ -128,7 +129,7 @@ module VX_mem_coalescer #( wire [DATA_RATIO-1:0] batch_mask; wire [DATA_RATIO_W-1:0] batch_idx; - assign batch_mask = in_req_mask[i * DATA_RATIO +: DATA_RATIO] & ~processed_mask_r[i * DATA_RATIO +: DATA_RATIO]; + assign batch_mask = in_req_mask[i * DATA_RATIO +: DATA_RATIO] & req_rem_mask_r[i * DATA_RATIO +: DATA_RATIO]; VX_priority_encoder #( .N (DATA_RATIO) @@ -180,7 +181,7 @@ module VX_mem_coalescer #( end end - wire is_last_batch = ~(| (in_req_mask & ~addr_matches_r & ~processed_mask_r)); + wire is_last_batch = ~(| (in_req_mask & ~addr_matches_r & req_rem_mask_r)); wire out_req_fire = out_req_valid && out_req_ready; @@ -194,7 +195,7 @@ module VX_mem_coalescer #( out_req_byteen_n = out_req_byteen_r; out_req_data_n = out_req_data_r; out_req_tag_n = out_req_tag_r; - processed_mask_n = processed_mask_r; + req_rem_mask_n = req_rem_mask_r; in_req_ready_n = 0; case (state_r) @@ -217,7 +218,7 @@ module VX_mem_coalescer #( out_req_byteen_n= req_byteen_merged; out_req_data_n = req_data_merged; out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; - processed_mask_n= is_last_batch ? '0 : (processed_mask_r | current_pmask); + req_rem_mask_n = is_last_batch ? '1 : (req_rem_mask_r & ~current_pmask); in_req_ready_n = is_last_batch; end endcase @@ -225,13 +226,14 @@ module VX_mem_coalescer #( VX_pipe_register #( .DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + FLAGS_WIDTH + OUT_ADDR_WIDTH + FLAGS_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH), - .RESETW (1 + NUM_REQS + 1) + .RESETW (1 + NUM_REQS + 1), + .INIT_VALUE ({1'b0, {NUM_REQS{1'b1}}, 1'b0}) ) pipe_reg ( .clk (clk), .reset (reset), .enable (1'b1), - .data_in ({state_n, processed_mask_n, out_req_valid_n, out_req_rw_n, addr_matches_n, batch_valid_n, out_req_mask_n, seed_addr_n, seed_flags_n, out_req_addr_n, out_req_flags_n, out_req_byteen_n, out_req_data_n, out_req_tag_n}), - .data_out ({state_r, processed_mask_r, out_req_valid_r, out_req_rw_r, addr_matches_r, batch_valid_r, out_req_mask_r, seed_addr_r, seed_flags_r, out_req_addr_r, out_req_flags_r, out_req_byteen_r, out_req_data_r, out_req_tag_r}) + .data_in ({state_n, req_rem_mask_n, out_req_valid_n, out_req_rw_n, addr_matches_n, batch_valid_n, out_req_mask_n, seed_addr_n, seed_flags_n, out_req_addr_n, out_req_flags_n, out_req_byteen_n, out_req_data_n, out_req_tag_n}), + .data_out ({state_r, req_rem_mask_r, out_req_valid_r, out_req_rw_r, addr_matches_r, batch_valid_r, out_req_mask_r, seed_addr_r, seed_flags_r, out_req_addr_r, out_req_flags_r, out_req_byteen_r, out_req_data_r, out_req_tag_r}) ); wire out_rsp_fire = out_rsp_valid && out_rsp_ready; diff --git a/hw/rtl/libs/VX_pipe_register.sv b/hw/rtl/libs/VX_pipe_register.sv index 2c1cddfd6..69184898f 100644 --- a/hw/rtl/libs/VX_pipe_register.sv +++ b/hw/rtl/libs/VX_pipe_register.sv @@ -17,6 +17,7 @@ module VX_pipe_register #( parameter DATAW = 1, parameter RESETW = 0, + parameter [`UP(RESETW)-1:0] INIT_VALUE = {`UP(RESETW){1'b0}}, parameter DEPTH = 1 ) ( input wire clk, @@ -46,7 +47,7 @@ module VX_pipe_register #( always @(posedge clk) begin if (reset) begin - value <= RESETW'(0); + value <= INIT_VALUE; end else if (enable) begin value <= data_in; end @@ -58,7 +59,7 @@ module VX_pipe_register #( always @(posedge clk) begin if (reset) begin - value_r <= RESETW'(0); + value_r <= INIT_VALUE; end else if (enable) begin value_r <= data_in[DATAW-1:DATAW-RESETW]; end @@ -74,10 +75,12 @@ module VX_pipe_register #( end else begin wire [DEPTH:0][DATAW-1:0] data_delayed; assign data_delayed[0] = data_in; + for (genvar i = 1; i <= DEPTH; ++i) begin VX_pipe_register #( .DATAW (DATAW), - .RESETW (RESETW) + .RESETW (RESETW), + .INIT_VALUE (INIT_VALUE) ) pipe_reg ( .clk (clk), .reset (reset), From aef1411af5e6f028d090852c108dfdd679888b2c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 13 Aug 2024 21:38:33 -0700 Subject: [PATCH 102/488] scoreboard timing optimization --- hw/rtl/core/VX_operands.sv | 55 +++++++++--------- hw/rtl/core/VX_schedule.sv | 2 +- hw/rtl/core/VX_scoreboard.sv | 105 ++++++++++------------------------- 3 files changed, 58 insertions(+), 104 deletions(-) diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 5dbb73791..3f64caf77 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -37,15 +37,15 @@ module VX_operands import VX_gpu_pkg::*; #( VX_operands_if.master operands_if ); `UNUSED_SPARAM (INSTANCE_ID) - localparam NUM_SRC_REGS = 3; - localparam REQ_SEL_BITS = `CLOG2(NUM_SRC_REGS); + localparam NUM_SRC_OPDS = 3; + localparam REQ_SEL_BITS = `CLOG2(NUM_SRC_OPDS); localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS); localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS; localparam META_DATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS + `UUID_WIDTH; localparam REGS_DATAW = `XLEN * `NUM_THREADS; - localparam DATAW = META_DATAW + NUM_SRC_REGS * REGS_DATAW; + localparam DATAW = META_DATAW + NUM_SRC_OPDS * REGS_DATAW; localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS); localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS; localparam XLEN_SIZE = `XLEN / 8; @@ -53,10 +53,10 @@ module VX_operands import VX_gpu_pkg::*; #( `UNUSED_VAR (writeback_if.data.sop) - wire [NUM_SRC_REGS-1:0] src_valid; - wire [NUM_SRC_REGS-1:0] req_in_valid, req_in_ready; - wire [NUM_SRC_REGS-1:0][PER_BANK_ADDRW-1:0] req_in_data; - wire [NUM_SRC_REGS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx; + wire [NUM_SRC_OPDS-1:0] src_valid; + wire [NUM_SRC_OPDS-1:0] req_in_valid, req_in_ready; + wire [NUM_SRC_OPDS-1:0][PER_BANK_ADDRW-1:0] req_in_data; + wire [NUM_SRC_OPDS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx; wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready; wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2; @@ -68,40 +68,39 @@ module VX_operands import VX_gpu_pkg::*; #( wire pipe_valid_st2, pipe_ready_st2; wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2; - reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_n; - wire [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2; + reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_n; + wire [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2; - reg [NUM_SRC_REGS-1:0] data_fetched_n; - wire [NUM_SRC_REGS-1:0] data_fetched_st1; + reg [NUM_SRC_OPDS-1:0] data_fetched_n; + wire [NUM_SRC_OPDS-1:0] data_fetched_st1; reg has_collision_n; wire has_collision_st1; - wire [NUM_SRC_REGS-1:0][`NR_BITS-1:0] src_regs = {scoreboard_if.data.rs3, - scoreboard_if.data.rs2, - scoreboard_if.data.rs1}; + wire [NUM_SRC_OPDS-1:0][`NR_BITS-1:0] src_opds; + assign src_opds = {scoreboard_if.data.rs3, scoreboard_if.data.rs2, scoreboard_if.data.rs1}; - for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin + for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin if (ISSUE_WIS != 0) begin - assign req_in_data[i] = {src_regs[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis}; + assign req_in_data[i] = {src_opds[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis}; end else begin - assign req_in_data[i] = src_regs[i][`NR_BITS-1:BANK_SEL_BITS]; + assign req_in_data[i] = src_opds[i][`NR_BITS-1:BANK_SEL_BITS]; end if (NUM_BANKS != 1) begin - assign req_bank_idx[i] = src_regs[i][BANK_SEL_BITS-1:0]; + assign req_bank_idx[i] = src_opds[i][BANK_SEL_BITS-1:0]; end else begin assign req_bank_idx[i] = '0; end end - for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin - assign src_valid[i] = (src_regs[i] != 0) && ~data_fetched_st1[i]; + for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin + assign src_valid[i] = (src_opds[i] != 0) && ~data_fetched_st1[i]; end - assign req_in_valid = {NUM_SRC_REGS{scoreboard_if.valid}} & src_valid; + assign req_in_valid = {NUM_SRC_OPDS{scoreboard_if.valid}} & src_valid; VX_stream_xbar #( - .NUM_INPUTS (NUM_SRC_REGS), + .NUM_INPUTS (NUM_SRC_OPDS), .NUM_OUTPUTS (NUM_BANKS), .DATAW (PER_BANK_ADDRW), .ARBITER ("P"), // use priority arbiter @@ -132,8 +131,8 @@ module VX_operands import VX_gpu_pkg::*; #( always @(*) begin has_collision_n = 0; - for (integer i = 0; i < NUM_SRC_REGS; ++i) begin - for (integer j = 1; j < (NUM_SRC_REGS-i); ++j) begin + for (integer i = 0; i < NUM_SRC_OPDS; ++i) begin + for (integer j = 1; j < (NUM_SRC_OPDS-i); ++j) begin has_collision_n |= src_valid[i] && src_valid[j+i] && (req_bank_idx[i] == req_bank_idx[j+i]); @@ -163,8 +162,8 @@ module VX_operands import VX_gpu_pkg::*; #( }; VX_pipe_register #( - .DATAW (1 + NUM_SRC_REGS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)), - .RESETW (1 + NUM_SRC_REGS) + .DATAW (1 + NUM_SRC_OPDS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)), + .RESETW (1 + NUM_SRC_OPDS) ) pipe_reg1 ( .clk (clk), .reset (reset), @@ -182,8 +181,8 @@ module VX_operands import VX_gpu_pkg::*; #( `RESET_RELAY (pipe2_reset, reset); // needed for pipe_reg2's wide RESETW VX_pipe_register #( - .DATAW (1 + NUM_SRC_REGS * REGS_DATAW + NUM_BANKS + NUM_BANKS * REGS_DATAW + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH), - .RESETW (1 + NUM_SRC_REGS * REGS_DATAW) + .DATAW (1 + NUM_SRC_OPDS * REGS_DATAW + NUM_BANKS + NUM_BANKS * REGS_DATAW + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH), + .RESETW (1 + NUM_SRC_OPDS * REGS_DATAW) ) pipe_reg2 ( .clk (clk), .reset (pipe2_reset), diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 46fad97be..5fe81a718 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -357,7 +357,7 @@ module VX_schedule import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH), - .SIZE (2), // need a skid buffer to buffer out schedule_ready + .SIZE (2), // need to buffer out ready_in .OUT_REG (1) // should be registered for BRAM acces in fetch unit ) out_buf ( .clk (clk), diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index df25aff26..cd9f3093d 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -30,6 +30,8 @@ module VX_scoreboard import VX_gpu_pkg::*; #( VX_scoreboard_if.master scoreboard_if ); `UNUSED_SPARAM (INSTANCE_ID) + localparam NUM_SRC_OPDS = 3; + localparam NUM_OPDS = NUM_SRC_OPDS + 1; localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4) + 1; VX_ibuffer_if staging_if [PER_ISSUE_WARPS](); @@ -100,9 +102,8 @@ module VX_scoreboard import VX_gpu_pkg::*; #( `endif for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin - VX_elastic_buffer #( - .DATAW (DATAW), - .SIZE (1) + VX_pipe_buffer #( + .DATAW (DATAW) ) stanging_buf ( .clk (clk), .reset (reset), @@ -118,7 +119,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin reg [`NUM_REGS-1:0] inuse_regs; - reg [3:0] operands_busy, operands_busy_n; + reg [NUM_OPDS-1:0] operands_busy, operands_busy_n; wire ibuffer_fire = ibuffer_if[w].valid && ibuffer_if[w].ready; @@ -128,6 +129,10 @@ module VX_scoreboard import VX_gpu_pkg::*; #( && (writeback_if.data.wis == ISSUE_WIS_W'(w)) && writeback_if.data.eop; + wire [NUM_OPDS-1:0][`NR_BITS-1:0] ibuf_opds, stg_opds; + assign ibuf_opds = {ibuffer_if[w].data.rs3, ibuffer_if[w].data.rs2, ibuffer_if[w].data.rs1, ibuffer_if[w].data.rd}; + assign stg_opds = {staging_if[w].data.rs3, staging_if[w].data.rs2, staging_if[w].data.rs1, staging_if[w].data.rd}; + `ifdef PERF_ENABLE reg [`NUM_REGS-1:0][`EX_WIDTH-1:0] inuse_units; reg [`NUM_REGS-1:0][`SFU_WIDTH-1:0] inuse_sfu; @@ -135,29 +140,11 @@ module VX_scoreboard import VX_gpu_pkg::*; #( always @(*) begin perf_inuse_units_per_cycle[w] = '0; perf_inuse_sfu_per_cycle[w] = '0; - if (staging_if[w].valid) begin - if (operands_busy[0]) begin - perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rd]] = 1; - if (inuse_units[staging_if[w].data.rd] == `EX_SFU) begin - perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rd]] = 1; - end - end - if (operands_busy[1]) begin - perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs1]] = 1; - if (inuse_units[staging_if[w].data.rs1] == `EX_SFU) begin - perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs1]] = 1; - end - end - if (operands_busy[2]) begin - perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs2]] = 1; - if (inuse_units[staging_if[w].data.rs2] == `EX_SFU) begin - perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs2]] = 1; - end - end - if (operands_busy[3]) begin - perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs3]] = 1; - if (inuse_units[staging_if[w].data.rs3] == `EX_SFU) begin - perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs3]] = 1; + for (integer i = 0; i < NUM_OPDS; ++i) begin + if (staging_if[w].valid && operands_busy[i]) begin + perf_inuse_units_per_cycle[w][inuse_units[stg_opds[i]]] = 1; + if (inuse_units[stg_opds[i]] == `EX_SFU) begin + perf_inuse_sfu_per_cycle[w][inuse_sfu[stg_opds[i]]] = 1; end end end @@ -165,56 +152,24 @@ module VX_scoreboard import VX_gpu_pkg::*; #( `endif always @(*) begin - operands_busy_n = operands_busy; - if (ibuffer_fire) begin - operands_busy_n = { - inuse_regs[ibuffer_if[w].data.rs3], - inuse_regs[ibuffer_if[w].data.rs2], - inuse_regs[ibuffer_if[w].data.rs1], - inuse_regs[ibuffer_if[w].data.rd] - }; - end - if (writeback_fire) begin + for (integer i = 0; i < NUM_OPDS; ++i) begin + operands_busy_n[i] = operands_busy[i]; if (ibuffer_fire) begin - if (writeback_if.data.rd == ibuffer_if[w].data.rd) begin - operands_busy_n[0] = 0; - end - if (writeback_if.data.rd == ibuffer_if[w].data.rs1) begin - operands_busy_n[1] = 0; - end - if (writeback_if.data.rd == ibuffer_if[w].data.rs2) begin - operands_busy_n[2] = 0; - end - if (writeback_if.data.rd == ibuffer_if[w].data.rs3) begin - operands_busy_n[3] = 0; - end - end else begin - if (writeback_if.data.rd == staging_if[w].data.rd) begin - operands_busy_n[0] = 0; - end - if (writeback_if.data.rd == staging_if[w].data.rs1) begin - operands_busy_n[1] = 0; - end - if (writeback_if.data.rd == staging_if[w].data.rs2) begin - operands_busy_n[2] = 0; - end - if (writeback_if.data.rd == staging_if[w].data.rs3) begin - operands_busy_n[3] = 0; + operands_busy_n[i] = inuse_regs[ibuf_opds[i]]; + end + if (writeback_fire) begin + if (ibuffer_fire) begin + if (writeback_if.data.rd == ibuf_opds[i]) begin + operands_busy_n[i] = 0; + end + end else begin + if (writeback_if.data.rd == stg_opds[i]) begin + operands_busy_n[i] = 0; + end end end - end - if (staging_fire && staging_if[w].data.wb) begin - if (staging_if[w].data.rd == ibuffer_if[w].data.rd) begin - operands_busy_n[0] = 1; - end - if (staging_if[w].data.rd == ibuffer_if[w].data.rs1) begin - operands_busy_n[1] = 1; - end - if (staging_if[w].data.rd == ibuffer_if[w].data.rs2) begin - operands_busy_n[2] = 1; - end - if (staging_if[w].data.rd == ibuffer_if[w].data.rs3) begin - operands_busy_n[3] = 1; + if (staging_fire && staging_if[w].data.wb && staging_if[w].data.rd == ibuf_opds[i]) begin + operands_busy_n[i] = 1; end end end @@ -289,7 +244,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (PER_ISSUE_WARPS), .DATAW (DATAW), - .ARBITER ("R"), + .ARBITER ("C"), .OUT_BUF (3) ) out_arb ( .clk (clk), From cfb5cd5326af65ff6eb3e551e28c6f6b034d4422 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 13 Aug 2024 21:39:08 -0700 Subject: [PATCH 103/488] arbiter runtime assertion --- hw/rtl/libs/VX_generic_arbiter.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index f55b866f8..db0173349 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -90,7 +90,7 @@ module VX_generic_arbiter #( end - `RUNTIME_ASSERT ((~grant_valid || (requests[grant_index] != 0)), ("invalid arbiter grant!")) + `RUNTIME_ASSERT ((~(| requests) || (grant_valid && (requests[grant_index] != 0) && (grant_onehot == (NUM_REQS'(1) << grant_index)))), ("invalid arbiter grant!")) endmodule `TRACING_ON From 58e5435f0fbf569bc4a48c2d156e68db109b1301 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 13 Aug 2024 22:30:54 -0700 Subject: [PATCH 104/488] a priority arbiter performs better than round-robin during commit arbitration --- hw/rtl/core/VX_commit.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/core/VX_commit.sv b/hw/rtl/core/VX_commit.sv index d78c2ec89..ff3039484 100644 --- a/hw/rtl/core/VX_commit.sv +++ b/hw/rtl/core/VX_commit.sv @@ -58,7 +58,7 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (`NUM_EX_UNITS), .DATAW (DATAW), - .ARBITER ("R"), + .ARBITER ("P"), .OUT_BUF (1) ) commit_arb ( .clk (clk), From 9c346dee86798c9d922ef786f44103523b8fcfd7 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 15 Aug 2024 01:55:22 -0700 Subject: [PATCH 105/488] read-only cache optimization --- hw/rtl/VX_define.vh | 14 ++++ hw/rtl/cache/VX_cache.sv | 109 ++++++++++++------------------- hw/rtl/cache/VX_cache_cluster.sv | 6 +- hw/rtl/cache/VX_cache_wrap.sv | 15 ++++- 4 files changed, 75 insertions(+), 69 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 438466016..7d5dbb342 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -359,6 +359,20 @@ assign src.rsp_data = dst.rsp_data; \ assign dst.rsp_ready = src.rsp_ready +`define ASSIGN_VX_MEM_BUS_RO_IF(dst, src) \ + assign dst.req_valid = src.req_valid; \ + assign dst.req_data.rw = 0; \ + assign dst.req_data.byteen = '0; \ + assign dst.req_data.addr = src.req_data.addr; \ + assign dst.req_data.flags = src.req_data.flags; \ + assign dst.req_data.data = '0; \ + assign dst.req_data.tag = src.req_data.tag; \ + assign src.req_ready = dst.req_ready; \ + assign src.rsp_valid = dst.rsp_valid; \ + assign src.rsp_data.data = dst.rsp_data.data; \ + assign src.rsp_data.tag = dst.rsp_data.tag; \ + assign dst.rsp_ready = src.rsp_ready + `define ASSIGN_VX_MEM_BUS_IF_X(dst, src, TD, TS) \ assign dst.req_valid = src.req_valid; \ assign dst.req_data.rw = src.req_data.rw; \ diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 8221c284c..ea34beeaa 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -158,37 +158,6 @@ module VX_cache import VX_gpu_pkg::*; #( /////////////////////////////////////////////////////////////////////////// - // Memory request buffering - wire mem_req_valid_s; - wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_s; - wire mem_req_rw_s; - wire [LINE_SIZE-1:0] mem_req_byteen_s; - wire [`CS_LINE_WIDTH-1:0] mem_req_data_s; - wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s; - wire mem_req_flush_s; - wire mem_req_ready_s; - - wire mem_bus_if_flush; - - VX_elastic_buffer #( - .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), - .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), - .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) - ) mem_req_buf ( - .clk (clk), - .reset (reset), - .valid_in (mem_req_valid_s), - .ready_in (mem_req_ready_s), - .data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s, mem_req_flush_s}), - .data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag, mem_bus_if_flush}), - .valid_out (mem_bus_if.req_valid), - .ready_out (mem_bus_if.req_ready) - ); - - assign mem_bus_if.req_data.flags = mem_bus_if_flush ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0; - - /////////////////////////////////////////////////////////////////////////// - // Memory response buffering wire mem_rsp_valid_s; wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_s; @@ -471,20 +440,18 @@ module VX_cache import VX_gpu_pkg::*; #( assign {core_rsp_data_s[i], core_rsp_tag_s[i]} = core_rsp_data_out[i]; end - /////////////////////////////////////////////////////////////////////////// - - wire mem_req_valid_p; - wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p; - wire mem_req_rw_p; - wire [LINE_SIZE-1:0] mem_req_byteen_p; - wire [`CS_LINE_WIDTH-1:0] mem_req_data_p; - wire [MEM_TAG_WIDTH-1:0] mem_req_tag_p; - wire [MSHR_ADDR_WIDTH-1:0] mem_req_id_p; - wire mem_req_flush_p; - wire mem_req_ready_p; - // Memory request arbitration + wire mem_req_valid; + wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr; + wire mem_req_rw; + wire [LINE_SIZE-1:0] mem_req_byteen; + wire [`CS_LINE_WIDTH-1:0] mem_req_data; + wire [MEM_TAG_WIDTH-1:0] mem_req_tag; + wire [MSHR_ADDR_WIDTH-1:0] mem_req_id; + wire mem_req_flush; + wire mem_req_ready; + wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in; for (genvar i = 0; i < NUM_BANKS; ++i) begin @@ -508,39 +475,49 @@ module VX_cache import VX_gpu_pkg::*; #( .valid_in (per_bank_mem_req_valid), .ready_in (per_bank_mem_req_ready), .data_in (data_in), - .data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p, mem_req_flush_p}), - .valid_out (mem_req_valid_p), - .ready_out (mem_req_ready_p), + .data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, mem_req_id, mem_req_flush}), + .valid_out (mem_req_valid), + .ready_out (mem_req_ready), `UNUSED_PIN (sel_out) ); if (NUM_BANKS > 1) begin - wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p); - assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p}); + wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr); + assign mem_req_tag = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id}); end else begin - assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p); + assign mem_req_tag = MEM_TAG_WIDTH'(mem_req_id); end - // Memory request multi-port handling + // Memory request buffering - assign mem_req_valid_s = mem_req_valid_p; - assign mem_req_addr_s = mem_req_addr_p; - assign mem_req_tag_s = mem_req_tag_p; - assign mem_req_flush_s = mem_req_flush_p; - assign mem_req_ready_p = mem_req_ready_s; + wire mem_req_flush_b; - if (WRITE_ENABLE != 0) begin - assign mem_req_rw_s = mem_req_rw_p; - assign mem_req_byteen_s = mem_req_byteen_p; - assign mem_req_data_s = mem_req_data_p; + VX_mem_bus_if #( + .DATA_SIZE (LINE_SIZE), + .TAG_WIDTH (MEM_TAG_WIDTH) + ) mem_bus_tmp_if(); + + VX_elastic_buffer #( + .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), + .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), + .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) + ) mem_req_buf ( + .clk (clk), + .reset (reset), + .valid_in (mem_req_valid), + .ready_in (mem_req_ready), + .data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flush}), + .data_out ({mem_bus_tmp_if.req_data.rw, mem_bus_tmp_if.req_data.byteen, mem_bus_tmp_if.req_data.addr, mem_bus_tmp_if.req_data.data, mem_bus_tmp_if.req_data.tag, mem_req_flush_b}), + .valid_out (mem_bus_tmp_if.req_valid), + .ready_out (mem_bus_tmp_if.req_ready) + ); + + assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0; + + if (WRITE_ENABLE) begin + `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if); end else begin - `UNUSED_VAR (mem_req_byteen_p) - `UNUSED_VAR (mem_req_data_p) - `UNUSED_VAR (mem_req_rw_p) - - assign mem_req_rw_s = 0; - assign mem_req_byteen_s = {LINE_SIZE{1'b1}}; - assign mem_req_data_s = '0; + `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if); end `ifdef PERF_ENABLE diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 939768b63..17b9b4508 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -197,6 +197,10 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .bus_out_if (mem_bus_tmp_if) ); - `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]); + if (WRITE_ENABLE) begin + `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]); + end else begin + `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if[0]); + end endmodule diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index afae06181..153b68e7d 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -103,6 +103,11 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .TAG_WIDTH (CACHE_MEM_TAG_WIDTH) ) mem_bus_cache_if(); + VX_mem_bus_if #( + .DATA_SIZE (LINE_SIZE), + .TAG_WIDTH (MEM_TAG_WIDTH) + ) mem_bus_tmp_if(); + if (NC_OR_BYPASS) begin : bypass_if `RESET_RELAY (nc_bypass_reset, reset); @@ -136,7 +141,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .core_bus_out_if(core_bus_cache_if), .mem_bus_in_if (mem_bus_cache_if), - .mem_bus_out_if (mem_bus_if) + .mem_bus_out_if (mem_bus_tmp_if) ); end else begin @@ -145,7 +150,13 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( `ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]); end - `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_cache_if); + `ASSIGN_VX_MEM_BUS_IF (mem_bus_tmp_if, mem_bus_cache_if); + end + + if (WRITE_ENABLE) begin + `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if); + end else begin + `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if); end if (PASSTHRU == 0) begin : cache_if From 98db24950096a1b3609c003f3a5de810f1b2cc46 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 15 Aug 2024 01:56:31 -0700 Subject: [PATCH 106/488] minor updates --- hw/rtl/cache/VX_cache_bank.sv | 10 +++++----- hw/rtl/cache/VX_cache_bypass.sv | 6 +----- hw/rtl/core/VX_schedule.sv | 10 +--------- hw/rtl/libs/VX_priority_encoder.sv | 5 ++--- 4 files changed, 9 insertions(+), 22 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index dbbb4aba3..3dede22d5 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -266,7 +266,7 @@ module VX_cache_bank #( if (UUID_WIDTH != 0) begin assign req_uuid_sel = tag_sel[TAG_WIDTH-1 -: UUID_WIDTH]; end else begin - assign req_uuid_sel = 0; + assign req_uuid_sel = '0; end VX_pipe_register #( @@ -283,7 +283,7 @@ module VX_cache_bank #( if (UUID_WIDTH != 0) begin assign req_uuid_st0 = tag_st0[TAG_WIDTH-1 -: UUID_WIDTH]; end else begin - assign req_uuid_st0 = 0; + assign req_uuid_st0 = '0; end wire do_init_st0 = valid_st0 && is_init_st0; @@ -365,7 +365,7 @@ module VX_cache_bank #( if (UUID_WIDTH != 0) begin assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH]; end else begin - assign req_uuid_st1 = 0; + assign req_uuid_st1 = '0; end wire is_read_st1 = is_creq_st1 && ~rw_st1; @@ -622,8 +622,8 @@ module VX_cache_bank #( assign mreq_queue_byteen = WRITEBACK ? dirty_byteen_st1 : write_byteen_st1; end else begin assign mreq_queue_rw = 0; - assign mreq_queue_data = 0; - assign mreq_queue_byteen = 0; + assign mreq_queue_data = '0; + assign mreq_queue_byteen = '0; `UNUSED_VAR (dirty_data_st1) `UNUSED_VAR (dirty_byteen_st1) end diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 53d847c4e..f36d542b3 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -268,11 +268,7 @@ module VX_cache_bypass #( assign rsp_idx = 1'b0; end - reg [NUM_REQS-1:0] rsp_nc_valid_r; - always @(*) begin - rsp_nc_valid_r = '0; - rsp_nc_valid_r[rsp_idx] = is_mem_rsp_nc; - end + reg [NUM_REQS-1:0] rsp_nc_valid_r = NUM_REQS'(is_mem_rsp_nc) << rsp_idx; for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i]; diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 5fe81a718..4454280c4 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -374,14 +374,6 @@ module VX_schedule import VX_gpu_pkg::*; #( // Track pending instructions per warp - reg [`NUM_WARPS-1:0] per_warp_incr; - always @(*) begin - per_warp_incr = 0; - if (schedule_if_fire) begin - per_warp_incr[schedule_if.data.wid] = 1; - end - end - wire [`NUM_WARPS-1:0] pending_warp_empty; wire [`NUM_WARPS-1:0] pending_warp_alm_empty; @@ -394,7 +386,7 @@ module VX_schedule import VX_gpu_pkg::*; #( ) counter ( .clk (clk), .reset (pending_instr_reset[i]), - .incr (per_warp_incr[i]), + .incr (schedule_if_fire && (schedule_if.data.wid == `NW_WIDTH'(i))), .decr (commit_sched_if.committed_warps[i]), .empty (pending_warp_empty[i]), .alm_empty (pending_warp_alm_empty[i]), diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index 43d7d80ba..27465b414 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -115,9 +115,8 @@ module VX_priority_encoder #( onehot_r = 'x; for (integer i = N-1; i >= 0; --i) begin if (reversed[i]) begin - index_r = LN'(i); - onehot_r = '0; - onehot_r[i] = 1'b1; + index_r = LN'(i); + onehot_r = N'(1) << i; end end end From 2b22d47dd99f52c97bb7fb8e5a86b69c54a02850 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 15 Aug 2024 05:11:19 -0700 Subject: [PATCH 107/488] minor update --- hw/rtl/VX_define.vh | 20 ++++++++++---------- hw/rtl/cache/VX_cache_bypass.sv | 8 ++++---- hw/rtl/core/VX_lmem_unit.sv | 16 ++++++++-------- hw/rtl/core/VX_lsu_adapter.sv | 12 ++++++------ hw/rtl/interfaces/VX_lsu_mem_if.sv | 4 ++-- hw/rtl/mem/VX_mem_bus_if.sv | 4 ++-- 6 files changed, 32 insertions(+), 32 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 7d5dbb342..8050ad6fc 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -362,10 +362,10 @@ `define ASSIGN_VX_MEM_BUS_RO_IF(dst, src) \ assign dst.req_valid = src.req_valid; \ assign dst.req_data.rw = 0; \ - assign dst.req_data.byteen = '0; \ assign dst.req_data.addr = src.req_data.addr; \ - assign dst.req_data.flags = src.req_data.flags; \ assign dst.req_data.data = '0; \ + assign dst.req_data.byteen = '0; \ + assign dst.req_data.flags = src.req_data.flags; \ assign dst.req_data.tag = src.req_data.tag; \ assign src.req_ready = dst.req_ready; \ assign src.rsp_valid = dst.rsp_valid; \ @@ -376,10 +376,10 @@ `define ASSIGN_VX_MEM_BUS_IF_X(dst, src, TD, TS) \ assign dst.req_valid = src.req_valid; \ assign dst.req_data.rw = src.req_data.rw; \ - assign dst.req_data.byteen = src.req_data.byteen; \ assign dst.req_data.addr = src.req_data.addr; \ - assign dst.req_data.flags = src.req_data.flags; \ assign dst.req_data.data = src.req_data.data; \ + assign dst.req_data.byteen = src.req_data.byteen; \ + assign dst.req_data.flags = src.req_data.flags; \ if (TD != TS) \ assign dst.req_data.tag = {src.req_data.tag, {(TD-TS){1'b0}}}; \ else \ @@ -391,12 +391,12 @@ assign dst.rsp_ready = src.rsp_ready `define ASSIGN_VX_LSU_MEM_IF(dst, src) \ - assign dst.req_valid = src.req_valid; \ - assign dst.req_data = src.req_data; \ - assign src.req_ready = dst.req_ready; \ - assign src.rsp_valid = dst.rsp_valid; \ - assign src.rsp_data = dst.rsp_data; \ - assign dst.rsp_ready = src.rsp_ready + assign dst.req_valid = src.req_valid; \ + assign dst.req_data = src.req_data; \ + assign src.req_ready = dst.req_ready; \ + assign src.rsp_valid = dst.rsp_valid; \ + assign src.rsp_data = dst.rsp_data; \ + assign dst.rsp_ready = src.rsp_ready `define BUFFER_DCR_BUS_IF(dst, src, enable) \ if (enable) begin \ diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index f36d542b3..5c1a123ef 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -129,20 +129,20 @@ module VX_cache_bypass #( for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_req_nc_mux_in[i] = { core_bus_in_if[i].req_data.rw, - core_bus_in_if[i].req_data.byteen, core_bus_in_if[i].req_data.addr, - core_bus_in_if[i].req_data.flags, core_bus_in_if[i].req_data.data, + core_bus_in_if[i].req_data.byteen, + core_bus_in_if[i].req_data.flags, core_bus_in_if[i].req_data.tag }; end assign { core_req_nc_sel_rw, - core_req_nc_sel_byteen, core_req_nc_sel_addr, - core_req_nc_sel_flags, core_req_nc_sel_data, + core_req_nc_sel_byteen, + core_req_nc_sel_flags, core_req_nc_sel_tag } = core_req_nc_mux_in[core_req_nc_idx]; diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv index 988133cc1..01462dd65 100644 --- a/hw/rtl/core/VX_lmem_unit.sv +++ b/hw/rtl/core/VX_lmem_unit.sv @@ -65,10 +65,10 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .data_in ({ lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask, lsu_mem_in_if[i].req_data.rw, - lsu_mem_in_if[i].req_data.byteen, lsu_mem_in_if[i].req_data.addr, - lsu_mem_in_if[i].req_data.flags, lsu_mem_in_if[i].req_data.data, + lsu_mem_in_if[i].req_data.byteen, + lsu_mem_in_if[i].req_data.flags, lsu_mem_in_if[i].req_data.tag }), .ready_in (req_global_ready), @@ -76,10 +76,10 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .data_out ({ lsu_mem_out_if[i].req_data.mask, lsu_mem_out_if[i].req_data.rw, - lsu_mem_out_if[i].req_data.byteen, lsu_mem_out_if[i].req_data.addr, - lsu_mem_out_if[i].req_data.flags, lsu_mem_out_if[i].req_data.data, + lsu_mem_out_if[i].req_data.byteen, + lsu_mem_out_if[i].req_data.flags, lsu_mem_out_if[i].req_data.tag }), .ready_out (lsu_mem_out_if[i].req_ready) @@ -96,10 +96,10 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .data_in ({ lsu_mem_in_if[i].req_data.mask & is_addr_local_mask, lsu_mem_in_if[i].req_data.rw, - lsu_mem_in_if[i].req_data.byteen, lsu_mem_in_if[i].req_data.addr, - lsu_mem_in_if[i].req_data.flags, lsu_mem_in_if[i].req_data.data, + lsu_mem_in_if[i].req_data.byteen, + lsu_mem_in_if[i].req_data.flags, lsu_mem_in_if[i].req_data.tag }), .ready_in (req_local_ready), @@ -107,10 +107,10 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .data_out ({ lsu_lmem_if[i].req_data.mask, lsu_lmem_if[i].req_data.rw, - lsu_lmem_if[i].req_data.byteen, lsu_lmem_if[i].req_data.addr, - lsu_lmem_if[i].req_data.flags, lsu_lmem_if[i].req_data.data, + lsu_lmem_if[i].req_data.byteen, + lsu_lmem_if[i].req_data.flags, lsu_lmem_if[i].req_data.tag }), .ready_out (lsu_lmem_if[i].req_ready) diff --git a/hw/rtl/core/VX_lsu_adapter.sv b/hw/rtl/core/VX_lsu_adapter.sv index 48ef23163..822341692 100644 --- a/hw/rtl/core/VX_lsu_adapter.sv +++ b/hw/rtl/core/VX_lsu_adapter.sv @@ -44,10 +44,10 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( for (genvar i = 0; i < NUM_LANES; ++i) begin assign req_data_in[i] = { lsu_mem_if.req_data.rw, - lsu_mem_if.req_data.byteen[i], lsu_mem_if.req_data.addr[i], - lsu_mem_if.req_data.flags[i], - lsu_mem_if.req_data.data[i] + lsu_mem_if.req_data.data[i], + lsu_mem_if.req_data.byteen[i], + lsu_mem_if.req_data.flags[i] }; end @@ -55,10 +55,10 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( assign mem_bus_if[i].req_valid = req_valid_out[i]; assign { mem_bus_if[i].req_data.rw, - mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.addr, - mem_bus_if[i].req_data.flags, - mem_bus_if[i].req_data.data + mem_bus_if[i].req_data.data, + mem_bus_if[i].req_data.byteen, + mem_bus_if[i].req_data.flags } = req_data_out[i]; assign mem_bus_if[i].req_data.tag = req_tag_out[i]; assign req_ready_out[i] = mem_bus_if[i].req_ready; diff --git a/hw/rtl/interfaces/VX_lsu_mem_if.sv b/hw/rtl/interfaces/VX_lsu_mem_if.sv index 4b2c6d4af..0789bcb13 100644 --- a/hw/rtl/interfaces/VX_lsu_mem_if.sv +++ b/hw/rtl/interfaces/VX_lsu_mem_if.sv @@ -25,10 +25,10 @@ interface VX_lsu_mem_if #( typedef struct packed { logic rw; logic [NUM_LANES-1:0] mask; - logic [NUM_LANES-1:0][DATA_SIZE-1:0] byteen; logic [NUM_LANES-1:0][ADDR_WIDTH-1:0] addr; - logic [NUM_LANES-1:0][FLAGS_WIDTH-1:0] flags; logic [NUM_LANES-1:0][DATA_SIZE*8-1:0] data; + logic [NUM_LANES-1:0][DATA_SIZE-1:0] byteen; + logic [NUM_LANES-1:0][FLAGS_WIDTH-1:0] flags; logic [TAG_WIDTH-1:0] tag; } req_data_t; diff --git a/hw/rtl/mem/VX_mem_bus_if.sv b/hw/rtl/mem/VX_mem_bus_if.sv index 5f341904c..15f226690 100644 --- a/hw/rtl/mem/VX_mem_bus_if.sv +++ b/hw/rtl/mem/VX_mem_bus_if.sv @@ -23,10 +23,10 @@ interface VX_mem_bus_if #( typedef struct packed { logic rw; - logic [DATA_SIZE-1:0] byteen; logic [ADDR_WIDTH-1:0] addr; - logic [FLAGS_WIDTH-1:0] flags; logic [DATA_SIZE*8-1:0] data; + logic [DATA_SIZE-1:0] byteen; + logic [FLAGS_WIDTH-1:0] flags; logic [TAG_WIDTH-1:0] tag; } req_data_t; From aaff18cca259f00ed50ccbb69748f6ef33d2b322 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 15 Aug 2024 05:11:51 -0700 Subject: [PATCH 108/488] bug fix --- hw/rtl/cache/VX_cache_bypass.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 5c1a123ef..b2aeb8791 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -268,7 +268,7 @@ module VX_cache_bypass #( assign rsp_idx = 1'b0; end - reg [NUM_REQS-1:0] rsp_nc_valid_r = NUM_REQS'(is_mem_rsp_nc) << rsp_idx; + wire [NUM_REQS-1:0] rsp_nc_valid_r = NUM_REQS'(is_mem_rsp_nc) << rsp_idx; for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i]; From 48ff4ee4e091981e5b046e6fda2ed16d2e198c41 Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Thu, 15 Aug 2024 16:34:36 -0400 Subject: [PATCH 109/488] add VM_ENABLE flag to configure&compilation --- config.mk.in | 4 +++- configure | 9 ++++++++- hw/rtl/VX_config.vh | 4 ---- kernel/Makefile | 4 ++++ runtime/simx/Makefile | 4 ++++ sim/simx/Makefile | 4 ++++ 6 files changed, 23 insertions(+), 6 deletions(-) diff --git a/config.mk.in b/config.mk.in index c1f67e5a9..6b20a3050 100644 --- a/config.mk.in +++ b/config.mk.in @@ -34,4 +34,6 @@ RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX) VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel -THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party \ No newline at end of file +THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party + +VM_ENABLE ?= @VM_ENABLE@ \ No newline at end of file diff --git a/configure b/configure index 643c27150..2c0811ec3 100755 --- a/configure +++ b/configure @@ -63,7 +63,7 @@ copy_files() { filename_no_ext="${filename%.in}" dest_file="$dest_dir/$filename_no_ext" mkdir -p "$dest_dir" - sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@PREFIX@|$PREFIX|g" "$file" > "$dest_file" + sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@PREFIX@|$PREFIX|g; s|@VM_ENABLE@|$VM_ENABLE|g" "$file" > "$dest_file" # apply permissions to bash scripts read -r firstline < "$dest_file" if [[ "$firstline" =~ ^#!.*bash ]]; then @@ -114,6 +114,7 @@ default_xlen=32 default_tooldir=$HOME/tools default_osversion=$(detect_osversion) default_prefix=$CURRENT_DIR +default_vm=0 # load default configuration parameters from existing config.mk if [ -f "config.mk" ]; then @@ -126,6 +127,7 @@ if [ -f "config.mk" ]; then TOOLDIR\ ?*) default_tooldir=${value//\?=/} ;; OSVERSION\ ?*) default_osversion=${value//\?=/} ;; PREFIX\ ?*) default_prefix=${value//\?=/} ;; + VM_ENABLE\ ?*) default_vm=${value//\?=/} ;; esac done < config.mk fi @@ -135,6 +137,7 @@ XLEN=${XLEN:=$default_xlen} TOOLDIR=${TOOLDIR:=$default_tooldir} OSVERSION=${OSVERSION:=$default_osversion} PREFIX=${PREFIX:=$default_prefix} +VM_ENABLE=${VM_ENABLE:=$default_vm} # parse command line arguments usage() { @@ -143,6 +146,7 @@ usage() { echo " --tooldir= Set the TOOLDIR path (default: $HOME/tools)" echo " --osversion= Set the OS Version (default: $(detect_osversion))" echo " --prefix= Set installation directory" + echo " --vm_enable= Enable Virtual Memory support (default: 0)" exit 1 } while [[ "$#" -gt 0 ]]; do @@ -151,6 +155,7 @@ while [[ "$#" -gt 0 ]]; do --tooldir=*) TOOLDIR="${1#*=}" ;; --osversion=*) OSVERSION="${1#*=}" ;; --prefix=*) PREFIX="${1#*=}" ;; + --vm_enable=*) VM_ENABLE="${1#*=}" ;; -h|--help) usage ;; *) echo "Unknown parameter passed: $1"; usage ;; esac @@ -172,3 +177,5 @@ SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) THIRD_PARTY_DIR=$SCRIPT_DIR/third_party copy_files "$SCRIPT_DIR" "$CURRENT_DIR" + +echo "VM Enable: "$VM_ENABLE \ No newline at end of file diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index ed2afc900..45041ac4a 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -33,10 +33,6 @@ `endif /////////////////////////////////////////////////////////////////////////////// -`ifndef VM_DISABLE -`define VM_ENABLE -`endif - `ifndef EXT_M_DISABLE `define EXT_M_ENABLE `endif diff --git a/kernel/Makefile b/kernel/Makefile index 201ebc200..16d279fa0 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -32,6 +32,10 @@ CFLAGS += -O3 -mcmodel=medany -fno-exceptions -fdata-sections -ffunction-section CFLAGS += -I$(INC_DIR) -I$(ROOT_DIR)/hw CFLAGS += -DXLEN_$(XLEN) +ifeq ($(VM_ENABLE), 1) +CFLAGS += -DVM_ENABLE +endif + PROJECT := libvortex SRCS = $(SRC_DIR)/vx_start.S $(SRC_DIR)/vx_syscalls.c $(SRC_DIR)/vx_print.S $(SRC_DIR)/tinyprintf.c $(SRC_DIR)/vx_print.c $(SRC_DIR)/vx_spawn.c $(SRC_DIR)/vx_serial.S $(SRC_DIR)/vx_perf.c diff --git a/runtime/simx/Makefile b/runtime/simx/Makefile index 7c73ca66d..7615f72b2 100644 --- a/runtime/simx/Makefile +++ b/runtime/simx/Makefile @@ -10,6 +10,10 @@ CXXFLAGS += -I$(INC_DIR) -I../common -I$(ROOT_DIR)/hw -I$(SIM_DIR)/simx -I$(COMM CXXFLAGS += $(CONFIGS) CXXFLAGS += -DXLEN_$(XLEN) +ifeq ($(VM_ENABLE), 1) +CXXFLAGS += -DVM_ENABLE +endif + LDFLAGS += -shared -pthread LDFLAGS += -L$(DESTDIR) -lsimx diff --git a/sim/simx/Makefile b/sim/simx/Makefile index 622f653dd..8520e5191 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -14,6 +14,10 @@ CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/src CXXFLAGS += -DXLEN_$(XLEN) CXXFLAGS += $(CONFIGS) +ifeq ($(VM_ENABLE), 1) +CXXFLAGS += -DVM_ENABLE +endif + LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator From 49738672ec812653cbcdf26df4a536bc7a6500ae Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 15 Aug 2024 19:34:50 -0700 Subject: [PATCH 110/488] minor update --- hw/rtl/VX_cluster.sv | 4 ++-- hw/rtl/VX_socket.sv | 8 ++++---- hw/rtl/Vortex.sv | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index 714e69dd4..b9a43f845 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -102,8 +102,8 @@ module VX_cluster import VX_gpu_pkg::*; #( .WRITEBACK (`L2_WRITEBACK), .DIRTY_BYTES (`L2_WRITEBACK), .UUID_WIDTH (`UUID_WIDTH), - .CORE_OUT_BUF (2), - .MEM_OUT_BUF (2), + .CORE_OUT_BUF (3), + .MEM_OUT_BUF (3), .NC_ENABLE (1), .PASSTHRU (!`L2_ENABLED) ) l2cache ( diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 694edfe9c..33c29e515 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -106,7 +106,7 @@ module VX_socket import VX_gpu_pkg::*; #( .WRITE_ENABLE (0), .NC_ENABLE (0), .CORE_OUT_BUF (2), - .MEM_OUT_BUF (2) + .MEM_OUT_BUF (0) ) icache ( `ifdef PERF_ENABLE .cache_perf (mem_perf_tmp_if.icache), @@ -153,7 +153,7 @@ module VX_socket import VX_gpu_pkg::*; #( .DIRTY_BYTES (`DCACHE_WRITEBACK), .NC_ENABLE (1), .CORE_OUT_BUF (2), - .MEM_OUT_BUF (2) + .MEM_OUT_BUF (0) ) dcache ( `ifdef PERF_ENABLE .cache_perf (mem_perf_tmp_if.dcache), @@ -185,8 +185,8 @@ module VX_socket import VX_gpu_pkg::*; #( .TAG_WIDTH (L1_MEM_TAG_WIDTH), .TAG_SEL_IDX (0), .ARBITER ("R"), - .REQ_OUT_BUF (2), - .RSP_OUT_BUF (2) + .REQ_OUT_BUF (0), + .RSP_OUT_BUF (3) ) mem_arb ( .clk (clk), .reset (reset), diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index b49612047..875faf47e 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -86,8 +86,8 @@ module Vortex import VX_gpu_pkg::*; ( .WRITEBACK (`L3_WRITEBACK), .DIRTY_BYTES (`L3_WRITEBACK), .UUID_WIDTH (`UUID_WIDTH), - .CORE_OUT_BUF (2), - .MEM_OUT_BUF (2), + .CORE_OUT_BUF (3), + .MEM_OUT_BUF (3), .NC_ENABLE (1), .PASSTHRU (!`L3_ENABLED) ) l3cache ( From d7e8fd74ff33046e60c7580592642b8073424d96 Mon Sep 17 00:00:00 2001 From: sij814 Date: Thu, 15 Aug 2024 19:40:52 -0700 Subject: [PATCH 111/488] source_id = 0 --- sim/simx/mem_sim.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sim/simx/mem_sim.cpp b/sim/simx/mem_sim.cpp index c1ff87680..a38f4c01c 100644 --- a/sim/simx/mem_sim.cpp +++ b/sim/simx/mem_sim.cpp @@ -77,7 +77,7 @@ public: if (!rsp_args->request.write) { MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid}; rsp_args->simobject->MemRspPorts.at(rsp_args->i).push(mem_rsp, 1); - DT(3, rsp_args->simobject->name() << " mem-rsp: " << mem_rsp); + DT(3, rsp_args->simobject->name() << " mem-rsp: " << mem_rsp << " bank: " << rsp_args->i); } delete rsp_args; }, From 26df47d6e28679a2513b6a11d272b3c3d93bb898 Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Thu, 15 Aug 2024 22:55:29 -0400 Subject: [PATCH 112/488] add a subset of tests for vm and update ci --- .github/workflows/ci.yml | 95 ++++++++++++++++++++++++++++++++++++++++ ci/regression.sh.in | 27 ++++++++++++ 2 files changed, 122 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 386ad0ba1..691226e3f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -166,6 +166,101 @@ jobs: ./ci/regression.sh --${{ matrix.name }} fi + build_vm: + runs-on: ubuntu-20.04 + needs: setup + strategy: + matrix: + xlen: [32, 64] + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Install Dependencies + run: | + sudo bash ./ci/system_updates.sh + + - name: Cache Toolchain Directory + id: cache-toolchain + uses: actions/cache@v2 + with: + path: tools + key: ${{ runner.os }}-toolchain-v0.1 + restore-keys: | + ${{ runner.os }}-toolchain- + + - name: Cache Third Party Directory + id: cache-thirdparty + uses: actions/cache@v2 + with: + path: third_party + key: ${{ runner.os }}-thirdparty-v0.1 + restore-keys: | + ${{ runner.os }}-thirdparty- + + - name: Run Build + run: | + TOOLDIR=$PWD/tools + mkdir -p build${{ matrix.xlen }} + cd build${{ matrix.xlen }} + ../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }} --vm_enable=1 + source ci/toolchain_env.sh + make software -s > /dev/null + make tests -s > /dev/null + + - name: Upload Build Artifact + uses: actions/upload-artifact@v2 + with: + name: build-${{ matrix.xlen }}-vm + path: build${{ matrix.xlen }}-vm + + test_vm: + runs-on: ubuntu-20.04 + needs: build + strategy: + matrix: + xlen: [32, 64] + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Install Dependencies + run: | + sudo bash ./ci/system_updates.sh + + - name: Cache Toolchain Directory + id: cache-toolchain + uses: actions/cache@v2 + with: + path: tools + key: ${{ runner.os }}-toolchain-v0.1 + restore-keys: | + ${{ runner.os }}-toolchain- + + - name: Cache Third Party Directory + id: cache-thirdparty + uses: actions/cache@v2 + with: + path: third_party + key: ${{ runner.os }}-thirdparty-v0.1 + restore-keys: | + ${{ runner.os }}-thirdparty- + + - name: Download Build Artifact + uses: actions/download-artifact@v2 + with: + name: build-${{ matrix.xlen }}-vm + path: build${{ matrix.xlen }}-vm + + - name: Run tests + run: | + cd build${{ matrix.xlen }}-vm + source ci/toolchain_env.sh + chmod -R +x . # Ensure all files have executable permissions + ./ci/regression.sh --vm + complete: runs-on: ubuntu-20.04 needs: tests diff --git a/ci/regression.sh.in b/ci/regression.sh.in index b3bf798cb..57c021a70 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -124,6 +124,30 @@ opencl() echo "opencl tests done!" } +vm(){ + echo "begin vm tests..." + + make -C sim/simx + make -C runtime/simx + + make -C tests/kernel run-simx + + # Regression tests + make -C tests/regression run-simx + + # test global barrier + CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2 + + # test local barrier + ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar" + + # OpenCL tests + make -C tests/opencl run-simx + ./ci/blackbox.sh --driver=simx --app=lbm --warps=8 + + echo "vm tests done!" +} + test_csv_trace() { # test CSV trace generation @@ -315,6 +339,9 @@ clean=0 while [ "$1" != "" ]; do case $1 in + --vm ) + tests+=("vm") + ;; --clean ) clean=1 ;; From 4a213e7c20c59c5e9906e87676e391fd2af7d386 Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Thu, 15 Aug 2024 23:00:14 -0400 Subject: [PATCH 113/488] update readme --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index aebbed455..fa1e7d839 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,10 @@ More detailed build instructions can be found [here](docs/install_vortex.md). mkdir out export OUT_DIR=`pwd`/out cd build - ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-6-14 --prefix=$OUT_DIR + # Run the following to disble virtual memory feature in compilation + ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR + # Run the following instead to enable virtual memory feature in compilation + ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR --vm_enable=1 ### Install prebuilt toolchain # We will use the precomipled tools in volvo toolchanin directory @@ -77,7 +80,6 @@ More detailed build instructions can be found [here](docs/install_vortex.md). ### Building Vortex make -s ### Quick demo running vecadd OpenCL kernel on 2 cores -<<<<<<< HEAD $ ./ci/blackbox.sh --cores=2 --app=vecadd ### Common Developer Tips From bc936c67a37ecb4e112dcde2b255abd82633ef90 Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Thu, 15 Aug 2024 23:02:03 -0400 Subject: [PATCH 114/488] update ci --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 691226e3f..52210694e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -217,7 +217,7 @@ jobs: test_vm: runs-on: ubuntu-20.04 - needs: build + needs: build_vm strategy: matrix: xlen: [32, 64] From 54045fa05b070f973071da802abd915e15224467 Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Thu, 15 Aug 2024 23:04:08 -0400 Subject: [PATCH 115/488] skip build and tests ci stages for vm_disable due to verilator dependency --- .github/workflows/ci.yml | 182 +++++++++++++++++++-------------------- 1 file changed, 91 insertions(+), 91 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 52210694e..203612974 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -62,109 +62,109 @@ jobs: run: | make -C third_party > /dev/null - build: - runs-on: ubuntu-20.04 - needs: setup - strategy: - matrix: - xlen: [32, 64] + # build: + # runs-on: ubuntu-20.04 + # needs: setup + # strategy: + # matrix: + # xlen: [32, 64] - steps: - - name: Checkout code - uses: actions/checkout@v2 + # steps: + # - name: Checkout code + # uses: actions/checkout@v2 - - name: Install Dependencies - run: | - sudo bash ./ci/system_updates.sh + # - name: Install Dependencies + # run: | + # sudo bash ./ci/system_updates.sh - - name: Cache Toolchain Directory - id: cache-toolchain - uses: actions/cache@v2 - with: - path: tools - key: ${{ runner.os }}-toolchain-v0.1 - restore-keys: | - ${{ runner.os }}-toolchain- + # - name: Cache Toolchain Directory + # id: cache-toolchain + # uses: actions/cache@v2 + # with: + # path: tools + # key: ${{ runner.os }}-toolchain-v0.1 + # restore-keys: | + # ${{ runner.os }}-toolchain- - - name: Cache Third Party Directory - id: cache-thirdparty - uses: actions/cache@v2 - with: - path: third_party - key: ${{ runner.os }}-thirdparty-v0.1 - restore-keys: | - ${{ runner.os }}-thirdparty- + # - name: Cache Third Party Directory + # id: cache-thirdparty + # uses: actions/cache@v2 + # with: + # path: third_party + # key: ${{ runner.os }}-thirdparty-v0.1 + # restore-keys: | + # ${{ runner.os }}-thirdparty- - - name: Run Build - run: | - TOOLDIR=$PWD/tools - mkdir -p build${{ matrix.xlen }} - cd build${{ matrix.xlen }} - ../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }} - source ci/toolchain_env.sh - make software -s > /dev/null - make tests -s > /dev/null + # - name: Run Build + # run: | + # TOOLDIR=$PWD/tools + # mkdir -p build${{ matrix.xlen }} + # cd build${{ matrix.xlen }} + # ../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }} + # source ci/toolchain_env.sh + # make software -s > /dev/null + # make tests -s > /dev/null - - name: Upload Build Artifact - uses: actions/upload-artifact@v2 - with: - name: build-${{ matrix.xlen }} - path: build${{ matrix.xlen }} + # - name: Upload Build Artifact + # uses: actions/upload-artifact@v2 + # with: + # name: build-${{ matrix.xlen }} + # path: build${{ matrix.xlen }} - tests: - runs-on: ubuntu-20.04 - needs: build - strategy: - matrix: - name: [regression, opencl, config1, config2, debug, stress] - xlen: [32, 64] + # tests: + # runs-on: ubuntu-20.04 + # needs: build + # strategy: + # matrix: + # name: [regression, opencl, config1, config2, debug, stress] + # xlen: [32, 64] - steps: - - name: Checkout code - uses: actions/checkout@v2 + # steps: + # - name: Checkout code + # uses: actions/checkout@v2 - - name: Install Dependencies - run: | - sudo bash ./ci/system_updates.sh + # - name: Install Dependencies + # run: | + # sudo bash ./ci/system_updates.sh - - name: Cache Toolchain Directory - id: cache-toolchain - uses: actions/cache@v2 - with: - path: tools - key: ${{ runner.os }}-toolchain-v0.1 - restore-keys: | - ${{ runner.os }}-toolchain- + # - name: Cache Toolchain Directory + # id: cache-toolchain + # uses: actions/cache@v2 + # with: + # path: tools + # key: ${{ runner.os }}-toolchain-v0.1 + # restore-keys: | + # ${{ runner.os }}-toolchain- - - name: Cache Third Party Directory - id: cache-thirdparty - uses: actions/cache@v2 - with: - path: third_party - key: ${{ runner.os }}-thirdparty-v0.1 - restore-keys: | - ${{ runner.os }}-thirdparty- + # - name: Cache Third Party Directory + # id: cache-thirdparty + # uses: actions/cache@v2 + # with: + # path: third_party + # key: ${{ runner.os }}-thirdparty-v0.1 + # restore-keys: | + # ${{ runner.os }}-thirdparty- - - name: Download Build Artifact - uses: actions/download-artifact@v2 - with: - name: build-${{ matrix.xlen }} - path: build${{ matrix.xlen }} + # - name: Download Build Artifact + # uses: actions/download-artifact@v2 + # with: + # name: build-${{ matrix.xlen }} + # path: build${{ matrix.xlen }} - - name: Run tests - run: | - cd build${{ matrix.xlen }} - source ci/toolchain_env.sh - chmod -R +x . # Ensure all files have executable permissions - if [ "${{ matrix.name }}" == "regression" ]; then - ./ci/regression.sh --unittest - ./ci/regression.sh --isa - ./ci/regression.sh --kernel - ./ci/regression.sh --synthesis - ./ci/regression.sh --regression - else - ./ci/regression.sh --${{ matrix.name }} - fi + # - name: Run tests + # run: | + # cd build${{ matrix.xlen }} + # source ci/toolchain_env.sh + # chmod -R +x . # Ensure all files have executable permissions + # if [ "${{ matrix.name }}" == "regression" ]; then + # ./ci/regression.sh --unittest + # ./ci/regression.sh --isa + # ./ci/regression.sh --kernel + # ./ci/regression.sh --synthesis + # ./ci/regression.sh --regression + # else + # ./ci/regression.sh --${{ matrix.name }} + # fi build_vm: runs-on: ubuntu-20.04 @@ -263,7 +263,7 @@ jobs: complete: runs-on: ubuntu-20.04 - needs: tests + needs: test_vm steps: - name: Check Completion From 65bd9afabb50bb27d642bfe996df8b70a9ec460e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 15 Aug 2024 20:35:07 -0700 Subject: [PATCH 116/488] reset relay cleanup --- hw/rtl/cache/VX_cache.sv | 5 +---- hw/rtl/cache/VX_cache_cluster.sv | 4 +--- hw/rtl/cache/VX_cache_wrap.sv | 10 +++------- hw/rtl/core/VX_alu_unit.sv | 2 +- hw/rtl/core/VX_commit.sv | 4 +--- hw/rtl/core/VX_core.sv | 4 +--- hw/rtl/core/VX_dispatch.sv | 5 +---- hw/rtl/core/VX_dispatch_unit.sv | 6 ++---- hw/rtl/core/VX_issue.sv | 2 +- hw/rtl/core/VX_lmem_unit.sv | 10 ++++------ hw/rtl/core/VX_lsu_unit.sv | 2 +- hw/rtl/core/VX_operands.sv | 20 +++++--------------- hw/rtl/core/VX_schedule.sv | 4 ++-- hw/rtl/core/VX_scoreboard.sv | 4 +--- hw/rtl/core/VX_split_join.sv | 5 +---- hw/rtl/libs/VX_avs_adapter.sv | 10 ++++------ hw/rtl/libs/VX_stream_arb.sv | 20 +++++--------------- hw/rtl/libs/VX_stream_switch.sv | 14 ++++---------- hw/rtl/libs/VX_stream_xbar.sv | 8 ++------ hw/rtl/mem/VX_local_mem.sv | 16 +++++++++------- 20 files changed, 50 insertions(+), 105 deletions(-) diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index ea34beeaa..bc5571850 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -136,17 +136,14 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s; wire [NUM_REQS-1:0] core_rsp_ready_s; - `RESET_RELAY_EX (core_rsp_reset, reset, NUM_REQS, `MAX_FANOUT); - for (genvar i = 0; i < NUM_REQS; ++i) begin - VX_elastic_buffer #( .DATAW (`CS_WORD_WIDTH + TAG_WIDTH), .SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) ) core_rsp_buf ( .clk (clk), - .reset (core_rsp_reset[i]), + .reset (reset), .valid_in (core_rsp_valid_s[i]), .ready_in (core_rsp_ready_s[i]), .data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}), diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 17b9b4508..5e0010a8c 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -102,8 +102,6 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .TAG_WIDTH (ARB_TAG_WIDTH) ) arb_core_bus_if[NUM_CACHES * NUM_REQS](); - `RESET_RELAY_EX (cache_arb_reset, reset, NUM_REQS, `MAX_FANOUT); - for (genvar i = 0; i < NUM_REQS; ++i) begin VX_mem_bus_if #( .DATA_SIZE (WORD_SIZE), @@ -130,7 +128,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0) ) cache_arb ( .clk (clk), - .reset (cache_arb_reset[i]), + .reset (reset), .bus_in_if (core_bus_tmp_if), .bus_out_if (arb_core_bus_tmp_if) ); diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 153b68e7d..3b1076d46 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -110,8 +110,6 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( if (NC_OR_BYPASS) begin : bypass_if - `RESET_RELAY (nc_bypass_reset, reset); - VX_cache_bypass #( .NUM_REQS (NUM_REQS), .TAG_SEL_IDX (TAG_SEL_IDX), @@ -135,7 +133,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .MEM_OUT_BUF (MEM_OUT_BUF) ) cache_bypass ( .clk (clk), - .reset (nc_bypass_reset), + .reset (reset), .core_bus_in_if (core_bus_if), .core_bus_out_if(core_bus_cache_if), @@ -160,9 +158,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( end if (PASSTHRU == 0) begin : cache_if - - `RESET_RELAY (cache_reset, reset); - + VX_cache #( .INSTANCE_ID (INSTANCE_ID), .CACHE_SIZE (CACHE_SIZE), @@ -184,7 +180,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF) ) cache ( .clk (clk), - .reset (cache_reset), + .reset (reset), `ifdef PERF_ENABLE .cache_perf (cache_perf), `endif diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 72ef74b9c..adbc7898b 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -57,7 +57,7 @@ module VX_alu_unit #( for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : alu_blocks - `RESET_RELAY_EN (block_reset, reset,(BLOCK_SIZE > 1)); + `RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1)); wire is_muldiv_op = `EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV); diff --git a/hw/rtl/core/VX_commit.sv b/hw/rtl/core/VX_commit.sv index ff3039484..7106cc65f 100644 --- a/hw/rtl/core/VX_commit.sv +++ b/hw/rtl/core/VX_commit.sv @@ -53,8 +53,6 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #( assign commit_if[j * `ISSUE_WIDTH + i].ready = ready_in[j]; end - `RESET_RELAY (arb_reset, reset); - VX_stream_arb #( .NUM_INPUTS (`NUM_EX_UNITS), .DATAW (DATAW), @@ -62,7 +60,7 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #( .OUT_BUF (1) ) commit_arb ( .clk (clk), - .reset (arb_reset), + .reset (reset), .valid_in (valid_in), .ready_in (ready_in), .data_in (data_in), diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index d8cd804f9..35758824d 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -306,8 +306,6 @@ module VX_core import VX_gpu_pkg::*; #( .TAG_WIDTH (DCACHE_TAG_WIDTH) ) dcache_bus_tmp_if[DCACHE_CHANNELS](); - `RESET_RELAY (lsu_adapter_reset, reset); - VX_lsu_adapter #( .NUM_LANES (DCACHE_CHANNELS), .DATA_SIZE (DCACHE_WORD_SIZE), @@ -318,7 +316,7 @@ module VX_core import VX_gpu_pkg::*; #( .RSP_OUT_BUF (0) ) lsu_adapter ( .clk (clk), - .reset (lsu_adapter_reset), + .reset (reset), .lsu_mem_if (dcache_coalesced_if), .mem_bus_if (dcache_bus_tmp_if) ); diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index dcc15d5e3..0766fd83f 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -54,16 +54,13 @@ module VX_dispatch import VX_gpu_pkg::*; #( assign operands_if.ready = operands_reset[operands_if.data.ex_type]; for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin - - `RESET_RELAY (buffer_reset, reset); - VX_elastic_buffer #( .DATAW (DATAW), .SIZE (2), .OUT_REG (1) ) buffer ( .clk (clk), - .reset (buffer_reset), + .reset (reset), .valid_in (operands_if.valid && (operands_if.data.ex_type == `EX_BITS'(i))), .ready_in (operands_reset[i]), .data_in ({ diff --git a/hw/rtl/core/VX_dispatch_unit.sv b/hw/rtl/core/VX_dispatch_unit.sv index 618ea1221..3c84649bd 100644 --- a/hw/rtl/core/VX_dispatch_unit.sv +++ b/hw/rtl/core/VX_dispatch_unit.sv @@ -85,8 +85,6 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( wire [ISSUE_W-1:0] issue_idx = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx); assign issue_indices[block_idx] = issue_idx; - `RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1)); - wire valid_p, ready_p; if (`NUM_THREADS != NUM_LANES) begin @@ -102,7 +100,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( wire fire_eop = fire_p && is_last_p; always @(posedge clk) begin - if (block_reset) begin + if (reset) begin sent_mask_p <= '0; is_first_p <= 1; end else begin @@ -225,7 +223,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) ) buf_out ( .clk (clk), - .reset (block_reset), + .reset (reset), .valid_in (valid_p), .ready_in (ready_p), .data_in ({ diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index e77a3633a..5d5af64d9 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -77,7 +77,7 @@ module VX_issue import VX_gpu_pkg::*; #( assign decode_if.ibuf_pop[issue_id * PER_ISSUE_WARPS +: PER_ISSUE_WARPS] = per_issue_decode_if.ibuf_pop; `endif - `RESET_RELAY (slice_reset, reset); + `RESET_RELAY_EN (slice_reset, reset, (`ISSUE_WIDTH > 1)); VX_issue_slice #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, issue_id)), diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv index 01462dd65..e61f626fd 100644 --- a/hw/rtl/core/VX_lmem_unit.sv +++ b/hw/rtl/core/VX_lmem_unit.sv @@ -39,8 +39,6 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (LSU_TAG_WIDTH) ) lsu_lmem_if[`NUM_LSU_BLOCKS](); - `RESET_RELAY_EX (block_reset, reset, `NUM_LSU_BLOCKS, 1); - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : demux_slices wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; @@ -60,7 +58,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .OUT_REG (3) ) req_global_buf ( .clk (clk), - .reset (block_reset[i]), + .reset (reset), .valid_in (lsu_mem_in_if[i].req_valid && is_addr_global), .data_in ({ lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask, @@ -91,7 +89,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .OUT_REG (0) ) req_local_buf ( .clk (clk), - .reset (block_reset[i]), + .reset (reset), .valid_in (lsu_mem_in_if[i].req_valid && is_addr_local), .data_in ({ lsu_mem_in_if[i].req_data.mask & is_addr_local_mask, @@ -126,7 +124,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .OUT_BUF (1) ) rsp_arb ( .clk (clk), - .reset (block_reset[i]), + .reset (reset), .valid_in ({ lsu_lmem_if[i].rsp_valid, lsu_mem_out_if[i].rsp_valid @@ -167,7 +165,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .RSP_OUT_BUF (0) ) lsu_adapter ( .clk (clk), - .reset (block_reset[i]), + .reset (reset), .lsu_mem_if (lsu_lmem_if[i]), .mem_bus_if (lmem_bus_tmp_if) ); diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index 5e280e48f..febaec5aa 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -56,7 +56,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsu_blocks - `RESET_RELAY (slice_reset, reset); + `RESET_RELAY_EN (slice_reset, reset, (BLOCK_SIZE > 1)); VX_lsu_slice #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx)) diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 3f64caf77..62e2bb883 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -99,6 +99,8 @@ module VX_operands import VX_gpu_pkg::*; #( assign req_in_valid = {NUM_SRC_OPDS{scoreboard_if.valid}} & src_valid; + `RESET_RELAY (req_xbar_reset, reset); + VX_stream_xbar #( .NUM_INPUTS (NUM_SRC_OPDS), .NUM_OUTPUTS (NUM_BANKS), @@ -108,7 +110,7 @@ module VX_operands import VX_gpu_pkg::*; #( .OUT_BUF (0) // no output buffering ) req_xbar ( .clk (clk), - .reset (reset), + .reset (req_xbar_reset), `UNUSED_PIN(collisions), .valid_in (req_in_valid), .data_in (req_in_data), @@ -247,25 +249,13 @@ module VX_operands import VX_gpu_pkg::*; #( assign gpr_wr_bank_idx = '0; end - `ifdef GPR_RESET - reg wr_enabled = 0; - always @(posedge clk) begin - if (reset) begin - wr_enabled <= 1; - end - end - `else - wire wr_enabled = 1; - `endif - for (genvar b = 0; b < NUM_BANKS; ++b) begin wire gpr_wr_enabled; if (BANK_SEL_BITS != 0) begin - assign gpr_wr_enabled = wr_enabled - && writeback_if.valid + assign gpr_wr_enabled = writeback_if.valid && (gpr_wr_bank_idx == BANK_SEL_BITS'(b)); end else begin - assign gpr_wr_enabled = wr_enabled && writeback_if.valid; + assign gpr_wr_enabled = writeback_if.valid; end wire [BYTEENW-1:0] wren; diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 4454280c4..9cdf879eb 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -377,7 +377,7 @@ module VX_schedule import VX_gpu_pkg::*; #( wire [`NUM_WARPS-1:0] pending_warp_empty; wire [`NUM_WARPS-1:0] pending_warp_alm_empty; - `RESET_RELAY_EX (pending_instr_reset, reset, `NUM_WARPS, `MAX_FANOUT); + `RESET_RELAY (pending_instr_reset, reset); for (genvar i = 0; i < `NUM_WARPS; ++i) begin VX_pending_size #( @@ -385,7 +385,7 @@ module VX_schedule import VX_gpu_pkg::*; #( .ALM_EMPTY (1) ) counter ( .clk (clk), - .reset (pending_instr_reset[i]), + .reset (pending_instr_reset), .incr (schedule_if_fire && (schedule_if.data.wid == `NW_WIDTH'(i))), .decr (commit_sched_if.committed_warps[i]), .empty (pending_warp_empty[i]), diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index cd9f3093d..503cc22c8 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -239,8 +239,6 @@ module VX_scoreboard import VX_gpu_pkg::*; #( assign staging_if[w].ready = arb_ready_in[w] && operands_ready[w]; end - `RESET_RELAY (arb_reset, reset); - VX_stream_arb #( .NUM_INPUTS (PER_ISSUE_WARPS), .DATAW (DATAW), @@ -248,7 +246,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( .OUT_BUF (3) ) out_arb ( .clk (clk), - .reset (arb_reset), + .reset (reset), .valid_in (arb_valid_in), .ready_in (arb_ready_in), .data_in (arb_data_in), diff --git a/hw/rtl/core/VX_split_join.sv b/hw/rtl/core/VX_split_join.sv index 9f47023b0..4b58ebc26 100644 --- a/hw/rtl/core/VX_split_join.sv +++ b/hw/rtl/core/VX_split_join.sv @@ -46,15 +46,12 @@ module VX_split_join import VX_gpu_pkg::*; #( wire ipdom_pop = valid && sjoin.valid && sjoin_is_dvg; for (genvar i = 0; i < `NUM_WARPS; ++i) begin : ipdom_slices - - `RESET_RELAY (ipdom_reset, reset); - VX_ipdom_stack #( .WIDTH (`NUM_THREADS+`PC_BITS), .DEPTH (`DV_STACK_SIZE) ) ipdom_stack ( .clk (clk), - .reset (ipdom_reset), + .reset (reset), .q0 (ipdom_q0), .q1 (ipdom_q1), .d (ipdom_data[i]), diff --git a/hw/rtl/libs/VX_avs_adapter.sv b/hw/rtl/libs/VX_avs_adapter.sv index 659114c8d..f0941b028 100644 --- a/hw/rtl/libs/VX_avs_adapter.sv +++ b/hw/rtl/libs/VX_avs_adapter.sv @@ -81,15 +81,13 @@ module VX_avs_adapter #( assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i); end - `RESET_RELAY_EX (bank_reset, reset, NUM_BANKS, 1); - for (genvar i = 0; i < NUM_BANKS; ++i) begin VX_pending_size #( .SIZE (RD_QUEUE_SIZE) ) pending_size ( .clk (clk), - .reset (bank_reset[i]), + .reset (reset), .incr (req_queue_push[i]), .decr (req_queue_pop[i]), `UNUSED_PIN (empty), @@ -105,7 +103,7 @@ module VX_avs_adapter #( .DEPTH (RD_QUEUE_SIZE) ) rd_req_queue ( .clk (clk), - .reset (bank_reset[i]), + .reset (reset), .push (req_queue_push[i]), .pop (req_queue_pop[i]), .data_in (mem_req_tag), @@ -135,7 +133,7 @@ module VX_avs_adapter #( .OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF)) ) req_out_buf ( .clk (clk), - .reset (bank_reset[i]), + .reset (reset), .valid_in (valid_out_w), .ready_in (ready_out_w), .data_in ({mem_req_rw, mem_req_byteen, req_bank_off, mem_req_data}), @@ -177,7 +175,7 @@ module VX_avs_adapter #( .DEPTH (RD_QUEUE_SIZE) ) rd_rsp_queue ( .clk (clk), - .reset (bank_reset[i]), + .reset (reset), .push (avs_readdatavalid[i]), .pop (req_queue_pop[i]), .data_in (avs_readdata[i]), diff --git a/hw/rtl/libs/VX_stream_arb.sv b/hw/rtl/libs/VX_stream_arb.sv index d5157a8dd..ffb56eb26 100644 --- a/hw/rtl/libs/VX_stream_arb.sv +++ b/hw/rtl/libs/VX_stream_arb.sv @@ -49,8 +49,6 @@ module VX_stream_arb #( localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_INPUTS); localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN; - `RESET_RELAY (slice_reset, reset); - VX_stream_arb #( .NUM_INPUTS (SLICE_SIZE), .NUM_OUTPUTS (1), @@ -60,7 +58,7 @@ module VX_stream_arb #( .OUT_BUF (OUT_BUF) ) arb_slice ( .clk (clk), - .reset (slice_reset), + .reset (reset), .valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]), .ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]), .data_in (data_in[SLICE_END-1: SLICE_BEGIN]), @@ -92,8 +90,6 @@ module VX_stream_arb #( wire [DATAW-1:0] data_tmp_u; wire [`LOG2UP(SLICE_SIZE)-1:0] sel_tmp_u; - `RESET_RELAY (slice_reset, reset); - if (MAX_FANOUT != 1) begin VX_stream_arb #( .NUM_INPUTS (SLICE_SIZE), @@ -104,7 +100,7 @@ module VX_stream_arb #( .OUT_BUF (`TO_OUT_RBUF(OUT_BUF)) // to registered output ) fanout_slice_arb ( .clk (clk), - .reset (slice_reset), + .reset (reset), .valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]), .data_in (data_in[SLICE_END-1: SLICE_BEGIN]), .ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]), @@ -206,8 +202,6 @@ module VX_stream_arb #( localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_OUTPUTS); localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN; - `RESET_RELAY (slice_reset, reset); - VX_stream_arb #( .NUM_INPUTS (1), .NUM_OUTPUTS (SLICE_SIZE), @@ -217,7 +211,7 @@ module VX_stream_arb #( .OUT_BUF (OUT_BUF) ) arb_slice ( .clk (clk), - .reset (slice_reset), + .reset (reset), .valid_in (valid_in[i]), .ready_in (ready_in[i]), .data_in (data_in[i]), @@ -267,8 +261,6 @@ module VX_stream_arb #( localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_OUTPUTS); localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN; - `RESET_RELAY (slice_reset, reset); - VX_stream_arb #( .NUM_INPUTS (1), .NUM_OUTPUTS (SLICE_SIZE), @@ -278,7 +270,7 @@ module VX_stream_arb #( .OUT_BUF (OUT_BUF) ) fanout_slice_arb ( .clk (clk), - .reset (slice_reset), + .reset (reset), .valid_in (valid_tmp[i]), .ready_in (ready_tmp[i]), .data_in (data_tmp[i]), @@ -342,8 +334,6 @@ module VX_stream_arb #( // #Inputs == #Outputs - `RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT); - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin VX_elastic_buffer #( @@ -353,7 +343,7 @@ module VX_stream_arb #( .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), - .reset (out_buf_reset[i]), + .reset (reset), .valid_in (valid_in[i]), .ready_in (ready_in[i]), .data_in (data_in[i]), diff --git a/hw/rtl/libs/VX_stream_switch.sv b/hw/rtl/libs/VX_stream_switch.sv index 3a905cb1d..c379dd7c0 100644 --- a/hw/rtl/libs/VX_stream_switch.sv +++ b/hw/rtl/libs/VX_stream_switch.sv @@ -72,8 +72,6 @@ module VX_stream_switch #( end end - `RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT); - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin VX_elastic_buffer #( .DATAW (DATAW), @@ -81,7 +79,7 @@ module VX_stream_switch #( .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) ) out_buf ( .clk (clk), - .reset (out_buf_reset[i]), + .reset (reset), .valid_in (valid_out_r[i]), .ready_in (ready_out_r[i]), .data_in (data_out_r[i]), @@ -103,8 +101,6 @@ module VX_stream_switch #( assign ready_in[i] = ready_out_r[i][sel_in[i]]; end - `RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT); - for (genvar i = 0; i < NUM_INPUTS; ++i) begin for (genvar j = 0; j < NUM_REQS; ++j) begin localparam ii = i * NUM_REQS + j; @@ -115,7 +111,7 @@ module VX_stream_switch #( .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) ) out_buf ( .clk (clk), - .reset (out_buf_reset[ii]), + .reset (reset), .valid_in (valid_out_r[i][j]), .ready_in (ready_out_r[i][j]), .data_in (data_in[i]), @@ -124,7 +120,7 @@ module VX_stream_switch #( .ready_out (ready_out[ii]) ); end else begin - `UNUSED_VAR (out_buf_reset[ii]) + `UNUSED_VAR (reset) `UNUSED_VAR (valid_out_r[i][j]) assign ready_out_r[i][j] = '0; end @@ -137,8 +133,6 @@ module VX_stream_switch #( `UNUSED_VAR (sel_in) - `RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT); - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin VX_elastic_buffer #( .DATAW (DATAW), @@ -146,7 +140,7 @@ module VX_stream_switch #( .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) ) out_buf ( .clk (clk), - .reset (out_buf_reset[i]), + .reset (reset), .valid_in (valid_in[i]), .ready_in (ready_in[i]), .data_in (data_in[i]), diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index 8cdb9ced6..b37c9b676 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -58,8 +58,6 @@ module VX_stream_xbar #( assign valid_in_q[j] = valid_in[j] && (sel_in[j] == i); end - `RESET_RELAY (slice_reset, reset); - VX_stream_arb #( .NUM_INPUTS (NUM_INPUTS), .NUM_OUTPUTS (1), @@ -69,7 +67,7 @@ module VX_stream_xbar #( .OUT_BUF (OUT_BUF) ) xbar_arb ( .clk (clk), - .reset (slice_reset), + .reset (reset), .valid_in (valid_in_q), .data_in (data_in), .ready_in (per_output_ready_in[i]), @@ -123,8 +121,6 @@ module VX_stream_xbar #( assign data_out_r = {NUM_OUTPUTS{data_in}}; assign ready_in = ready_out_r[sel_in]; - `RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT); - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin VX_elastic_buffer #( .DATAW (DATAW), @@ -133,7 +129,7 @@ module VX_stream_xbar #( .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), - .reset (out_buf_reset[i]), + .reset (reset), .valid_in (valid_out_r[i]), .ready_in (ready_out_r[i]), .data_in (data_out_r[i]), diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index abd44b564..67a6dfc48 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -116,6 +116,8 @@ module VX_local_mem import VX_gpu_pkg::*; #( assign mem_bus_if[i].req_ready = req_ready_in[i]; end + `RESET_RELAY (req_xbar_reset, reset); + VX_stream_xbar #( .NUM_INPUTS (NUM_REQS), .NUM_OUTPUTS (NUM_BANKS), @@ -125,7 +127,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .OUT_BUF (3) // output should be registered for the data_store addressing ) req_xbar ( .clk (clk), - .reset (reset), + .reset (req_xbar_reset), `ifdef PERF_ENABLE .collisions (perf_collisions), `else @@ -163,8 +165,6 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire bank_rsp_valid, bank_rsp_ready; wire [WORD_WIDTH-1:0] bank_rsp_data; - `RESET_RELAY_EN (bram_reset, reset, (NUM_BANKS > 1)); - VX_sp_ram #( .DATAW (WORD_WIDTH), .SIZE (WORDS_PER_BANK), @@ -172,7 +172,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .NO_RWCHECK (1) ) data_store ( .clk (clk), - .reset (bram_reset), + .reset (reset), .read (per_bank_req_valid[i] && per_bank_req_ready[i] && ~per_bank_req_rw[i]), .write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]), .wren (per_bank_req_byteen[i]), @@ -185,7 +185,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( reg [BANK_ADDR_WIDTH-1:0] last_wr_addr; reg last_wr_valid; always @(posedge clk) begin - if (bram_reset) begin + if (reset) begin last_wr_valid <= 0; end else begin last_wr_valid <= per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]; @@ -203,7 +203,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .DATAW (REQ_SEL_WIDTH + WORD_WIDTH + TAG_WIDTH) ) bram_buf ( .clk (clk), - .reset (bram_reset), + .reset (reset), .valid_in (bank_rsp_valid), .ready_in (bank_rsp_ready), .data_in ({per_bank_req_idx[i], bank_rsp_data, per_bank_req_tag[i]}), @@ -225,6 +225,8 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out; wire [NUM_REQS-1:0] rsp_ready_out; + `RESET_RELAY (rsp_xbar_reset, reset); + VX_stream_xbar #( .NUM_INPUTS (NUM_BANKS), .NUM_OUTPUTS (NUM_REQS), @@ -233,7 +235,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .OUT_BUF (OUT_BUF) ) rsp_xbar ( .clk (clk), - .reset (reset), + .reset (rsp_xbar_reset), `UNUSED_PIN (collisions), .sel_in (per_bank_rsp_idx), .valid_in (per_bank_rsp_valid), From f4983cb380c4b3634cc85f77bd9dcda9979f3e9e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 15 Aug 2024 21:12:28 -0700 Subject: [PATCH 117/488] core memory unit refactoring --- hw/rtl/core/VX_core.sv | 126 ++------------------- hw/rtl/core/VX_lmem_unit.sv | 199 --------------------------------- hw/rtl/core/VX_mem_unit.sv | 217 ++++++++++++++++++++++++++++++++++++ hw/rtl/mem/VX_lmem_demux.sv | 132 ++++++++++++++++++++++ 4 files changed, 356 insertions(+), 318 deletions(-) delete mode 100644 hw/rtl/core/VX_lmem_unit.sv create mode 100644 hw/rtl/core/VX_mem_unit.sv create mode 100644 hw/rtl/mem/VX_lmem_demux.sv diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 35758824d..65ad65c75 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -202,132 +202,20 @@ module VX_core import VX_gpu_pkg::*; #( .commit_sched_if(commit_sched_if) ); - VX_lsu_mem_if #( - .NUM_LANES (`NUM_LSU_LANES), - .DATA_SIZE (LSU_WORD_SIZE), - .TAG_WIDTH (LSU_TAG_WIDTH) - ) lsu_dcache_if[`NUM_LSU_BLOCKS](); - -`ifdef LMEM_ENABLE - `RESET_RELAY (lmem_unit_reset, reset); - VX_lmem_unit #( + VX_mem_unit #( .INSTANCE_ID (INSTANCE_ID) - ) lmem_unit ( - .clk (clk), - .reset (lmem_unit_reset), + ) mem_unit ( + .clk (clk), + .reset (lmem_unit_reset), `ifdef PERF_ENABLE - .cache_perf (mem_perf_tmp_if.lmem), + .cache_perf (mem_perf_tmp_if.lmem), `endif - .lsu_mem_in_if (lsu_mem_if), - .lsu_mem_out_if (lsu_dcache_if) + .lsu_mem_in_if (lsu_mem_if), + .dcache_bus_if (dcache_bus_if) ); -`else - - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - `ASSIGN_VX_LSU_MEM_IF (lsu_dcache_if[i], lsu_mem_if[i]); - end - -`endif - - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescer_blocks - - VX_lsu_mem_if #( - .NUM_LANES (DCACHE_CHANNELS), - .DATA_SIZE (DCACHE_WORD_SIZE), - .TAG_WIDTH (DCACHE_TAG_WIDTH) - ) dcache_coalesced_if(); - - if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if - - `RESET_RELAY (mem_coalescer_reset, reset); - - VX_mem_coalescer #( - .INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)), - .NUM_REQS (`NUM_LSU_LANES), - .DATA_IN_SIZE (LSU_WORD_SIZE), - .DATA_OUT_SIZE (DCACHE_WORD_SIZE), - .ADDR_WIDTH (LSU_ADDR_WIDTH), - .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), - .TAG_WIDTH (LSU_TAG_WIDTH), - .UUID_WIDTH (`UUID_WIDTH), - .QUEUE_SIZE (`LSUQ_OUT_SIZE) - ) mem_coalescer ( - .clk (clk), - .reset (mem_coalescer_reset), - - // Input request - .in_req_valid (lsu_dcache_if[i].req_valid), - .in_req_mask (lsu_dcache_if[i].req_data.mask), - .in_req_rw (lsu_dcache_if[i].req_data.rw), - .in_req_byteen (lsu_dcache_if[i].req_data.byteen), - .in_req_addr (lsu_dcache_if[i].req_data.addr), - .in_req_flags (lsu_dcache_if[i].req_data.flags), - .in_req_data (lsu_dcache_if[i].req_data.data), - .in_req_tag (lsu_dcache_if[i].req_data.tag), - .in_req_ready (lsu_dcache_if[i].req_ready), - - // Input response - .in_rsp_valid (lsu_dcache_if[i].rsp_valid), - .in_rsp_mask (lsu_dcache_if[i].rsp_data.mask), - .in_rsp_data (lsu_dcache_if[i].rsp_data.data), - .in_rsp_tag (lsu_dcache_if[i].rsp_data.tag), - .in_rsp_ready (lsu_dcache_if[i].rsp_ready), - - // Output request - .out_req_valid (dcache_coalesced_if.req_valid), - .out_req_mask (dcache_coalesced_if.req_data.mask), - .out_req_rw (dcache_coalesced_if.req_data.rw), - .out_req_byteen (dcache_coalesced_if.req_data.byteen), - .out_req_addr (dcache_coalesced_if.req_data.addr), - .out_req_flags (dcache_coalesced_if.req_data.flags), - .out_req_data (dcache_coalesced_if.req_data.data), - .out_req_tag (dcache_coalesced_if.req_data.tag), - .out_req_ready (dcache_coalesced_if.req_ready), - - // Output response - .out_rsp_valid (dcache_coalesced_if.rsp_valid), - .out_rsp_mask (dcache_coalesced_if.rsp_data.mask), - .out_rsp_data (dcache_coalesced_if.rsp_data.data), - .out_rsp_tag (dcache_coalesced_if.rsp_data.tag), - .out_rsp_ready (dcache_coalesced_if.rsp_ready) - ); - - end else begin - - `ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if, lsu_dcache_if[i]); - - end - - VX_mem_bus_if #( - .DATA_SIZE (DCACHE_WORD_SIZE), - .TAG_WIDTH (DCACHE_TAG_WIDTH) - ) dcache_bus_tmp_if[DCACHE_CHANNELS](); - - VX_lsu_adapter #( - .NUM_LANES (DCACHE_CHANNELS), - .DATA_SIZE (DCACHE_WORD_SIZE), - .TAG_WIDTH (DCACHE_TAG_WIDTH), - .TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH), - .ARBITER ("P"), - .REQ_OUT_BUF (0), - .RSP_OUT_BUF (0) - ) lsu_adapter ( - .clk (clk), - .reset (reset), - .lsu_mem_if (dcache_coalesced_if), - .mem_bus_if (dcache_bus_tmp_if) - ); - - for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin - `ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]); - end - - end - - `ifdef PERF_ENABLE wire [`CLOG2(LSU_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle; diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv deleted file mode 100644 index e61f626fd..000000000 --- a/hw/rtl/core/VX_lmem_unit.sv +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_define.vh" - -module VX_lmem_unit import VX_gpu_pkg::*; #( - parameter `STRING INSTANCE_ID = "" -) ( - input wire clk, - input wire reset, - -`ifdef PERF_ENABLE - output cache_perf_t cache_perf, -`endif - - VX_lsu_mem_if.slave lsu_mem_in_if [`NUM_LSU_BLOCKS], - VX_lsu_mem_if.master lsu_mem_out_if [`NUM_LSU_BLOCKS] -); - `STATIC_ASSERT(`IS_DIVISBLE((1 << `LMEM_LOG_SIZE), `MEM_BLOCK_SIZE), ("invalid parameter")) - `STATIC_ASSERT(0 == (`LMEM_BASE_ADDR % (1 << `LMEM_LOG_SIZE)), ("invalid parameter")) - - localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; - localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; - localparam LMEM_ADDR_WIDTH = `LMEM_LOG_SIZE - `CLOG2(LSU_WORD_SIZE); - - VX_lsu_mem_if #( - .NUM_LANES (`NUM_LSU_LANES), - .DATA_SIZE (LSU_WORD_SIZE), - .TAG_WIDTH (LSU_TAG_WIDTH) - ) lsu_lmem_if[`NUM_LSU_BLOCKS](); - - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : demux_slices - - wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; - for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin - assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.flags[j][`MEM_REQ_FLAG_LOCAL]; - end - - wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask); - wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask); - - wire req_global_ready; - wire req_local_ready; - - VX_elastic_buffer #( - .DATAW (REQ_DATAW), - .SIZE (2), - .OUT_REG (3) - ) req_global_buf ( - .clk (clk), - .reset (reset), - .valid_in (lsu_mem_in_if[i].req_valid && is_addr_global), - .data_in ({ - lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask, - lsu_mem_in_if[i].req_data.rw, - lsu_mem_in_if[i].req_data.addr, - lsu_mem_in_if[i].req_data.data, - lsu_mem_in_if[i].req_data.byteen, - lsu_mem_in_if[i].req_data.flags, - lsu_mem_in_if[i].req_data.tag - }), - .ready_in (req_global_ready), - .valid_out (lsu_mem_out_if[i].req_valid), - .data_out ({ - lsu_mem_out_if[i].req_data.mask, - lsu_mem_out_if[i].req_data.rw, - lsu_mem_out_if[i].req_data.addr, - lsu_mem_out_if[i].req_data.data, - lsu_mem_out_if[i].req_data.byteen, - lsu_mem_out_if[i].req_data.flags, - lsu_mem_out_if[i].req_data.tag - }), - .ready_out (lsu_mem_out_if[i].req_ready) - ); - - VX_elastic_buffer #( - .DATAW (REQ_DATAW), - .SIZE (0), - .OUT_REG (0) - ) req_local_buf ( - .clk (clk), - .reset (reset), - .valid_in (lsu_mem_in_if[i].req_valid && is_addr_local), - .data_in ({ - lsu_mem_in_if[i].req_data.mask & is_addr_local_mask, - lsu_mem_in_if[i].req_data.rw, - lsu_mem_in_if[i].req_data.addr, - lsu_mem_in_if[i].req_data.data, - lsu_mem_in_if[i].req_data.byteen, - lsu_mem_in_if[i].req_data.flags, - lsu_mem_in_if[i].req_data.tag - }), - .ready_in (req_local_ready), - .valid_out (lsu_lmem_if[i].req_valid), - .data_out ({ - lsu_lmem_if[i].req_data.mask, - lsu_lmem_if[i].req_data.rw, - lsu_lmem_if[i].req_data.addr, - lsu_lmem_if[i].req_data.data, - lsu_lmem_if[i].req_data.byteen, - lsu_lmem_if[i].req_data.flags, - lsu_lmem_if[i].req_data.tag - }), - .ready_out (lsu_lmem_if[i].req_ready) - ); - - assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global) - || (req_local_ready && is_addr_local); - - VX_stream_arb #( - .NUM_INPUTS (2), - .DATAW (RSP_DATAW), - .ARBITER ("R"), - .OUT_BUF (1) - ) rsp_arb ( - .clk (clk), - .reset (reset), - .valid_in ({ - lsu_lmem_if[i].rsp_valid, - lsu_mem_out_if[i].rsp_valid - }), - .ready_in ({ - lsu_lmem_if[i].rsp_ready, - lsu_mem_out_if[i].rsp_ready - }), - .data_in ({ - lsu_lmem_if[i].rsp_data, - lsu_mem_out_if[i].rsp_data - }), - .data_out (lsu_mem_in_if[i].rsp_data), - .valid_out (lsu_mem_in_if[i].rsp_valid), - .ready_out (lsu_mem_in_if[i].rsp_ready), - `UNUSED_PIN (sel_out) - ); - end - - VX_mem_bus_if #( - .DATA_SIZE (LSU_WORD_SIZE), - .TAG_WIDTH (LSU_TAG_WIDTH) - ) lmem_bus_if[LSU_NUM_REQS](); - - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : adapter_slices - VX_mem_bus_if #( - .DATA_SIZE (LSU_WORD_SIZE), - .TAG_WIDTH (LSU_TAG_WIDTH) - ) lmem_bus_tmp_if[`NUM_LSU_LANES](); - - VX_lsu_adapter #( - .NUM_LANES (`NUM_LSU_LANES), - .DATA_SIZE (LSU_WORD_SIZE), - .TAG_WIDTH (LSU_TAG_WIDTH), - .TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH), - .ARBITER ("P"), - .REQ_OUT_BUF (3), - .RSP_OUT_BUF (0) - ) lsu_adapter ( - .clk (clk), - .reset (reset), - .lsu_mem_if (lsu_lmem_if[i]), - .mem_bus_if (lmem_bus_tmp_if) - ); - - for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin - `ASSIGN_VX_MEM_BUS_IF (lmem_bus_if[i * `NUM_LSU_LANES + j], lmem_bus_tmp_if[j]); - end - end - - `RESET_RELAY (lmem_reset, reset); - - VX_local_mem #( - .INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)), - .SIZE (1 << `LMEM_LOG_SIZE), - .NUM_REQS (LSU_NUM_REQS), - .NUM_BANKS (`LMEM_NUM_BANKS), - .WORD_SIZE (LSU_WORD_SIZE), - .ADDR_WIDTH (LMEM_ADDR_WIDTH), - .UUID_WIDTH (`UUID_WIDTH), - .TAG_WIDTH (LSU_TAG_WIDTH), - .OUT_BUF (3) - ) local_mem ( - .clk (clk), - .reset (lmem_reset), - `ifdef PERF_ENABLE - .cache_perf (cache_perf), - `endif - .mem_bus_if (lmem_bus_if) - ); - -endmodule diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv new file mode 100644 index 000000000..b960d5ff2 --- /dev/null +++ b/hw/rtl/core/VX_mem_unit.sv @@ -0,0 +1,217 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_mem_unit import VX_gpu_pkg::*; #( + parameter `STRING INSTANCE_ID = "" +) ( + input wire clk, + input wire reset, + +`ifdef PERF_ENABLE + output cache_perf_t cache_perf, +`endif + + VX_lsu_mem_if.slave lsu_mem_in_if [`NUM_LSU_BLOCKS], + VX_mem_bus_if.master dcache_bus_if [DCACHE_NUM_REQS] +); + VX_lsu_mem_if #( + .NUM_LANES (`NUM_LSU_LANES), + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH) + ) lsu_dcache_if[`NUM_LSU_BLOCKS](); + +`ifdef LMEM_ENABLE + + `STATIC_ASSERT(`IS_DIVISBLE((1 << `LMEM_LOG_SIZE), `MEM_BLOCK_SIZE), ("invalid parameter")) + `STATIC_ASSERT(0 == (`LMEM_BASE_ADDR % (1 << `LMEM_LOG_SIZE)), ("invalid parameter")) + + localparam LMEM_ADDR_WIDTH = `LMEM_LOG_SIZE - `CLOG2(LSU_WORD_SIZE); + + VX_lsu_mem_if #( + .NUM_LANES (`NUM_LSU_LANES), + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH) + ) lsu_lmem_if[`NUM_LSU_BLOCKS](); + + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : demux_slices + VX_lmem_demux #( + .REQ0_OUT_BUF (3), + .REQ1_OUT_BUF (0), + .RSP_OUT_BUF (1) + ) lmem_demux ( + .clk (clk), + .reset (reset), + .lsu_in_if (lsu_mem_in_if[i]), + .cache_out_if (lsu_dcache_if[i]), + .lmem_out_if (lsu_lmem_if[i]) + ); + end + + VX_mem_bus_if #( + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH) + ) lmem_bus_if[LSU_NUM_REQS](); + + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : adapter_slices + VX_mem_bus_if #( + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH) + ) lmem_bus_tmp_if[`NUM_LSU_LANES](); + + VX_lsu_adapter #( + .NUM_LANES (`NUM_LSU_LANES), + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH), + .TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH), + .ARBITER ("P"), + .REQ_OUT_BUF (3), + .RSP_OUT_BUF (0) + ) lmem_adapter ( + .clk (clk), + .reset (reset), + .lsu_mem_if (lsu_lmem_if[i]), + .mem_bus_if (lmem_bus_tmp_if) + ); + + for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin + `ASSIGN_VX_MEM_BUS_IF (lmem_bus_if[i * `NUM_LSU_LANES + j], lmem_bus_tmp_if[j]); + end + end + + `RESET_RELAY (lmem_reset, reset); + + VX_local_mem #( + .INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)), + .SIZE (1 << `LMEM_LOG_SIZE), + .NUM_REQS (LSU_NUM_REQS), + .NUM_BANKS (`LMEM_NUM_BANKS), + .WORD_SIZE (LSU_WORD_SIZE), + .ADDR_WIDTH (LMEM_ADDR_WIDTH), + .UUID_WIDTH (`UUID_WIDTH), + .TAG_WIDTH (LSU_TAG_WIDTH), + .OUT_BUF (3) + ) local_mem ( + .clk (clk), + .reset (lmem_reset), + `ifdef PERF_ENABLE + .cache_perf (cache_perf), + `endif + .mem_bus_if (lmem_bus_if) + ); + +`else + + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + `ASSIGN_VX_LSU_MEM_IF (lsu_dcache_if[i], lsu_mem_if[i]); + end + +`endif + + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescer_blocks + + VX_lsu_mem_if #( + .NUM_LANES (DCACHE_CHANNELS), + .DATA_SIZE (DCACHE_WORD_SIZE), + .TAG_WIDTH (DCACHE_TAG_WIDTH) + ) dcache_coalesced_if(); + + if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if + + `RESET_RELAY (mem_coalescer_reset, reset); + + VX_mem_coalescer #( + .INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)), + .NUM_REQS (`NUM_LSU_LANES), + .DATA_IN_SIZE (LSU_WORD_SIZE), + .DATA_OUT_SIZE (DCACHE_WORD_SIZE), + .ADDR_WIDTH (LSU_ADDR_WIDTH), + .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), + .TAG_WIDTH (LSU_TAG_WIDTH), + .UUID_WIDTH (`UUID_WIDTH), + .QUEUE_SIZE (`LSUQ_OUT_SIZE) + ) mem_coalescer ( + .clk (clk), + .reset (mem_coalescer_reset), + + // Input request + .in_req_valid (lsu_dcache_if[i].req_valid), + .in_req_mask (lsu_dcache_if[i].req_data.mask), + .in_req_rw (lsu_dcache_if[i].req_data.rw), + .in_req_byteen (lsu_dcache_if[i].req_data.byteen), + .in_req_addr (lsu_dcache_if[i].req_data.addr), + .in_req_flags (lsu_dcache_if[i].req_data.flags), + .in_req_data (lsu_dcache_if[i].req_data.data), + .in_req_tag (lsu_dcache_if[i].req_data.tag), + .in_req_ready (lsu_dcache_if[i].req_ready), + + // Input response + .in_rsp_valid (lsu_dcache_if[i].rsp_valid), + .in_rsp_mask (lsu_dcache_if[i].rsp_data.mask), + .in_rsp_data (lsu_dcache_if[i].rsp_data.data), + .in_rsp_tag (lsu_dcache_if[i].rsp_data.tag), + .in_rsp_ready (lsu_dcache_if[i].rsp_ready), + + // Output request + .out_req_valid (dcache_coalesced_if.req_valid), + .out_req_mask (dcache_coalesced_if.req_data.mask), + .out_req_rw (dcache_coalesced_if.req_data.rw), + .out_req_byteen (dcache_coalesced_if.req_data.byteen), + .out_req_addr (dcache_coalesced_if.req_data.addr), + .out_req_flags (dcache_coalesced_if.req_data.flags), + .out_req_data (dcache_coalesced_if.req_data.data), + .out_req_tag (dcache_coalesced_if.req_data.tag), + .out_req_ready (dcache_coalesced_if.req_ready), + + // Output response + .out_rsp_valid (dcache_coalesced_if.rsp_valid), + .out_rsp_mask (dcache_coalesced_if.rsp_data.mask), + .out_rsp_data (dcache_coalesced_if.rsp_data.data), + .out_rsp_tag (dcache_coalesced_if.rsp_data.tag), + .out_rsp_ready (dcache_coalesced_if.rsp_ready) + ); + + end else begin + + `ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if, lsu_dcache_if[i]); + + end + + VX_mem_bus_if #( + .DATA_SIZE (DCACHE_WORD_SIZE), + .TAG_WIDTH (DCACHE_TAG_WIDTH) + ) dcache_bus_tmp_if[DCACHE_CHANNELS](); + + VX_lsu_adapter #( + .NUM_LANES (DCACHE_CHANNELS), + .DATA_SIZE (DCACHE_WORD_SIZE), + .TAG_WIDTH (DCACHE_TAG_WIDTH), + .TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH), + .ARBITER ("P"), + .REQ_OUT_BUF (0), + .RSP_OUT_BUF (0) + ) lsu_adapter ( + .clk (clk), + .reset (reset), + .lsu_mem_if (dcache_coalesced_if), + .mem_bus_if (dcache_bus_tmp_if) + ); + + for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin + `ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]); + end + + end + +endmodule diff --git a/hw/rtl/mem/VX_lmem_demux.sv b/hw/rtl/mem/VX_lmem_demux.sv new file mode 100644 index 000000000..47a3912a5 --- /dev/null +++ b/hw/rtl/mem/VX_lmem_demux.sv @@ -0,0 +1,132 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_lmem_demux import VX_gpu_pkg::*; #( + parameter REQ0_OUT_BUF = 0, + parameter REQ1_OUT_BUF = 0, + parameter RSP_OUT_BUF = 0 +) ( + input wire clk, + input wire reset, + VX_lsu_mem_if.slave lsu_in_if, + VX_lsu_mem_if.master cache_out_if, + VX_lsu_mem_if.master lmem_out_if +); + localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; + localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; + + wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; + for (genvar i = 0; i < `NUM_LSU_LANES; ++i) begin + assign is_addr_local_mask[i] = lsu_in_if.req_data.flags[i][`MEM_REQ_FLAG_LOCAL]; + end + + wire is_addr_global = | (lsu_in_if.req_data.mask & ~is_addr_local_mask); + wire is_addr_local = | (lsu_in_if.req_data.mask & is_addr_local_mask); + + wire req_global_ready; + wire req_local_ready; + + VX_elastic_buffer #( + .DATAW (REQ_DATAW), + .SIZE (2), + .OUT_REG (REQ0_OUT_BUF) + ) req_global_buf ( + .clk (clk), + .reset (reset), + .valid_in (lsu_in_if.req_valid && is_addr_global), + .data_in ({ + lsu_in_if.req_data.mask & ~is_addr_local_mask, + lsu_in_if.req_data.rw, + lsu_in_if.req_data.addr, + lsu_in_if.req_data.data, + lsu_in_if.req_data.byteen, + lsu_in_if.req_data.flags, + lsu_in_if.req_data.tag + }), + .ready_in (req_global_ready), + .valid_out (cache_out_if.req_valid), + .data_out ({ + cache_out_if.req_data.mask, + cache_out_if.req_data.rw, + cache_out_if.req_data.addr, + cache_out_if.req_data.data, + cache_out_if.req_data.byteen, + cache_out_if.req_data.flags, + cache_out_if.req_data.tag + }), + .ready_out (cache_out_if.req_ready) + ); + + VX_elastic_buffer #( + .DATAW (REQ_DATAW), + .SIZE (0), + .OUT_REG (REQ1_OUT_BUF) + ) req_local_buf ( + .clk (clk), + .reset (reset), + .valid_in (lsu_in_if.req_valid && is_addr_local), + .data_in ({ + lsu_in_if.req_data.mask & is_addr_local_mask, + lsu_in_if.req_data.rw, + lsu_in_if.req_data.addr, + lsu_in_if.req_data.data, + lsu_in_if.req_data.byteen, + lsu_in_if.req_data.flags, + lsu_in_if.req_data.tag + }), + .ready_in (req_local_ready), + .valid_out (lmem_out_if.req_valid), + .data_out ({ + lmem_out_if.req_data.mask, + lmem_out_if.req_data.rw, + lmem_out_if.req_data.addr, + lmem_out_if.req_data.data, + lmem_out_if.req_data.byteen, + lmem_out_if.req_data.flags, + lmem_out_if.req_data.tag + }), + .ready_out (lmem_out_if.req_ready) + ); + + assign lsu_in_if.req_ready = (req_global_ready && is_addr_global) + || (req_local_ready && is_addr_local); + + VX_stream_arb #( + .NUM_INPUTS (2), + .DATAW (RSP_DATAW), + .ARBITER ("R"), + .OUT_BUF (RSP_OUT_BUF) + ) rsp_arb ( + .clk (clk), + .reset (reset), + .valid_in ({ + lmem_out_if.rsp_valid, + cache_out_if.rsp_valid + }), + .ready_in ({ + lmem_out_if.rsp_ready, + cache_out_if.rsp_ready + }), + .data_in ({ + lmem_out_if.rsp_data, + cache_out_if.rsp_data + }), + .data_out (lsu_in_if.rsp_data), + .valid_out (lsu_in_if.rsp_valid), + .ready_out (lsu_in_if.rsp_ready), + `UNUSED_PIN (sel_out) + ); + +endmodule From b83190c6e13bae20adda82c76a123964be6c8332 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 15 Aug 2024 21:29:06 -0700 Subject: [PATCH 118/488] minor update --- hw/rtl/core/VX_core.sv | 2 +- hw/rtl/core/VX_mem_unit.sv | 6 ++++-- hw/rtl/mem/VX_local_mem.sv | 18 +++++++++--------- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 65ad65c75..30a774ee5 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -210,7 +210,7 @@ module VX_core import VX_gpu_pkg::*; #( .clk (clk), .reset (lmem_unit_reset), `ifdef PERF_ENABLE - .cache_perf (mem_perf_tmp_if.lmem), + .lmem_perf (mem_perf_tmp_if.lmem), `endif .lsu_mem_in_if (lsu_mem_if), .dcache_bus_if (dcache_bus_if) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index b960d5ff2..fef21a81f 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -20,7 +20,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( input wire reset, `ifdef PERF_ENABLE - output cache_perf_t cache_perf, + output cache_perf_t lmem_perf, `endif VX_lsu_mem_if.slave lsu_mem_in_if [`NUM_LSU_BLOCKS], @@ -106,13 +106,15 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .clk (clk), .reset (lmem_reset), `ifdef PERF_ENABLE - .cache_perf (cache_perf), + .lmem_perf (lmem_perf), `endif .mem_bus_if (lmem_bus_if) ); `else + assign lmem_perf = '0; + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin `ASSIGN_VX_LSU_MEM_IF (lsu_dcache_if[i], lsu_mem_if[i]); end diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 67a6dfc48..aff058cb9 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -43,7 +43,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( // PERF `ifdef PERF_ENABLE - output cache_perf_t cache_perf, + output cache_perf_t lmem_perf, `endif VX_mem_bus_if.slave mem_bus_if [NUM_REQS] @@ -290,14 +290,14 @@ module VX_local_mem import VX_gpu_pkg::*; #( end end - assign cache_perf.reads = perf_reads; - assign cache_perf.writes = perf_writes; - assign cache_perf.read_misses = '0; - assign cache_perf.write_misses = '0; - assign cache_perf.bank_stalls = perf_collisions; - assign cache_perf.mshr_stalls = '0; - assign cache_perf.mem_stalls = '0; - assign cache_perf.crsp_stalls = perf_crsp_stalls; + assign lmem_perf.reads = perf_reads; + assign lmem_perf.writes = perf_writes; + assign lmem_perf.read_misses = '0; + assign lmem_perf.write_misses = '0; + assign lmem_perf.bank_stalls = perf_collisions; + assign lmem_perf.mshr_stalls = '0; + assign lmem_perf.mem_stalls = '0; + assign lmem_perf.crsp_stalls = perf_crsp_stalls; `endif From a523afbebe248f0de5c425a4d0440555722d01e0 Mon Sep 17 00:00:00 2001 From: sij814 Date: Thu, 15 Aug 2024 22:30:32 -0700 Subject: [PATCH 119/488] removed jammy --- configure | 1 - 1 file changed, 1 deletion(-) diff --git a/configure b/configure index cab5142c5..62975784b 100755 --- a/configure +++ b/configure @@ -26,7 +26,6 @@ detect_osversion() { case "$VERSION_CODENAME" in bionic) osversion="ubuntu/bionic";; focal) osversion="ubuntu/focal";; - jammy) osversion="ubuntu/focal";; # Add new versions as needed esac ;; From d5fa26350c5dfdc5252d67eee8b85df82fe0bae6 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 16 Aug 2024 01:35:20 -0700 Subject: [PATCH 120/488] minor update --- hw/rtl/core/VX_mem_unit.sv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index fef21a81f..8df272439 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -113,8 +113,9 @@ module VX_mem_unit import VX_gpu_pkg::*; #( `else +`ifdef PERF_ENABLE assign lmem_perf = '0; - +`endif for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin `ASSIGN_VX_LSU_MEM_IF (lsu_dcache_if[i], lsu_mem_if[i]); end From f6ed49f19c92cf66c6d3a32401ea9bb1789d5643 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 16 Aug 2024 08:19:55 -0700 Subject: [PATCH 121/488] minor update --- hw/rtl/cache/VX_cache.sv | 91 +++++++++++---------- hw/rtl/mem/VX_lmem_demux.sv | 10 +-- hw/rtl/{core => mem}/VX_lsu_adapter.sv | 0 hw/rtl/{interfaces => mem}/VX_lsu_mem_if.sv | 0 4 files changed, 51 insertions(+), 50 deletions(-) rename hw/rtl/{core => mem}/VX_lsu_adapter.sv (100%) rename hw/rtl/{interfaces => mem}/VX_lsu_mem_if.sv (100%) diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index bc5571850..60493665b 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -155,7 +155,13 @@ module VX_cache import VX_gpu_pkg::*; #( /////////////////////////////////////////////////////////////////////////// + VX_mem_bus_if #( + .DATA_SIZE (LINE_SIZE), + .TAG_WIDTH (MEM_TAG_WIDTH) + ) mem_bus_tmp_if(); + // Memory response buffering + wire mem_rsp_valid_s; wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_s; wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s; @@ -168,14 +174,51 @@ module VX_cache import VX_gpu_pkg::*; #( ) mem_rsp_queue ( .clk (clk), .reset (reset), - .valid_in (mem_bus_if.rsp_valid), - .ready_in (mem_bus_if.rsp_ready), - .data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}), + .valid_in (mem_bus_tmp_if.rsp_valid), + .ready_in (mem_bus_tmp_if.rsp_ready), + .data_in ({mem_bus_tmp_if.rsp_data.tag, mem_bus_tmp_if.rsp_data.data}), .data_out ({mem_rsp_tag_s, mem_rsp_data_s}), .valid_out (mem_rsp_valid_s), .ready_out (mem_rsp_ready_s) ); + // Memory request buffering + + wire mem_req_valid; + wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr; + wire mem_req_rw; + wire [LINE_SIZE-1:0] mem_req_byteen; + wire [`CS_LINE_WIDTH-1:0] mem_req_data; + wire [MEM_TAG_WIDTH-1:0] mem_req_tag; + wire [MSHR_ADDR_WIDTH-1:0] mem_req_id; + wire mem_req_flush; + wire mem_req_ready; + + wire mem_req_flush_b; + + VX_elastic_buffer #( + .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), + .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), + .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) + ) mem_req_buf ( + .clk (clk), + .reset (reset), + .valid_in (mem_req_valid), + .ready_in (mem_req_ready), + .data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flush}), + .data_out ({mem_bus_tmp_if.req_data.rw, mem_bus_tmp_if.req_data.byteen, mem_bus_tmp_if.req_data.addr, mem_bus_tmp_if.req_data.data, mem_bus_tmp_if.req_data.tag, mem_req_flush_b}), + .valid_out (mem_bus_tmp_if.req_valid), + .ready_out (mem_bus_tmp_if.req_ready) + ); + + assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0; + + if (WRITE_ENABLE) begin + `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if); + end else begin + `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if); + end + /////////////////////////////////////////////////////////////////////////// wire [NUM_BANKS-1:0] per_bank_core_req_valid; @@ -439,16 +482,6 @@ module VX_cache import VX_gpu_pkg::*; #( // Memory request arbitration - wire mem_req_valid; - wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr; - wire mem_req_rw; - wire [LINE_SIZE-1:0] mem_req_byteen; - wire [`CS_LINE_WIDTH-1:0] mem_req_data; - wire [MEM_TAG_WIDTH-1:0] mem_req_tag; - wire [MSHR_ADDR_WIDTH-1:0] mem_req_id; - wire mem_req_flush; - wire mem_req_ready; - wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in; for (genvar i = 0; i < NUM_BANKS; ++i) begin @@ -485,38 +518,6 @@ module VX_cache import VX_gpu_pkg::*; #( assign mem_req_tag = MEM_TAG_WIDTH'(mem_req_id); end - // Memory request buffering - - wire mem_req_flush_b; - - VX_mem_bus_if #( - .DATA_SIZE (LINE_SIZE), - .TAG_WIDTH (MEM_TAG_WIDTH) - ) mem_bus_tmp_if(); - - VX_elastic_buffer #( - .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), - .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), - .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) - ) mem_req_buf ( - .clk (clk), - .reset (reset), - .valid_in (mem_req_valid), - .ready_in (mem_req_ready), - .data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flush}), - .data_out ({mem_bus_tmp_if.req_data.rw, mem_bus_tmp_if.req_data.byteen, mem_bus_tmp_if.req_data.addr, mem_bus_tmp_if.req_data.data, mem_bus_tmp_if.req_data.tag, mem_req_flush_b}), - .valid_out (mem_bus_tmp_if.req_valid), - .ready_out (mem_bus_tmp_if.req_ready) - ); - - assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0; - - if (WRITE_ENABLE) begin - `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if); - end else begin - `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if); - end - `ifdef PERF_ENABLE // per cycle: core_reads, core_writes wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle; diff --git a/hw/rtl/mem/VX_lmem_demux.sv b/hw/rtl/mem/VX_lmem_demux.sv index 47a3912a5..b3158ad8a 100644 --- a/hw/rtl/mem/VX_lmem_demux.sv +++ b/hw/rtl/mem/VX_lmem_demux.sv @@ -28,6 +28,9 @@ module VX_lmem_demux import VX_gpu_pkg::*; #( localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; + wire req_global_ready; + wire req_local_ready; + for (genvar i = 0; i < `NUM_LSU_LANES; ++i) begin assign is_addr_local_mask[i] = lsu_in_if.req_data.flags[i][`MEM_REQ_FLAG_LOCAL]; end @@ -35,8 +38,8 @@ module VX_lmem_demux import VX_gpu_pkg::*; #( wire is_addr_global = | (lsu_in_if.req_data.mask & ~is_addr_local_mask); wire is_addr_local = | (lsu_in_if.req_data.mask & is_addr_local_mask); - wire req_global_ready; - wire req_local_ready; + assign lsu_in_if.req_ready = (req_global_ready && is_addr_global) + || (req_local_ready && is_addr_local); VX_elastic_buffer #( .DATAW (REQ_DATAW), @@ -100,9 +103,6 @@ module VX_lmem_demux import VX_gpu_pkg::*; #( .ready_out (lmem_out_if.req_ready) ); - assign lsu_in_if.req_ready = (req_global_ready && is_addr_global) - || (req_local_ready && is_addr_local); - VX_stream_arb #( .NUM_INPUTS (2), .DATAW (RSP_DATAW), diff --git a/hw/rtl/core/VX_lsu_adapter.sv b/hw/rtl/mem/VX_lsu_adapter.sv similarity index 100% rename from hw/rtl/core/VX_lsu_adapter.sv rename to hw/rtl/mem/VX_lsu_adapter.sv diff --git a/hw/rtl/interfaces/VX_lsu_mem_if.sv b/hw/rtl/mem/VX_lsu_mem_if.sv similarity index 100% rename from hw/rtl/interfaces/VX_lsu_mem_if.sv rename to hw/rtl/mem/VX_lsu_mem_if.sv From 7a61b67170373cb4da487552d7e3607b18290c17 Mon Sep 17 00:00:00 2001 From: sij814 Date: Fri, 16 Aug 2024 15:47:03 -0700 Subject: [PATCH 122/488] added CAPS --- runtime/opae/vortex.cpp | 3 +++ runtime/rtlsim/vortex.cpp | 3 +++ runtime/xrt/vortex.cpp | 3 +++ 3 files changed, 9 insertions(+) diff --git a/runtime/opae/vortex.cpp b/runtime/opae/vortex.cpp index 390d5acc4..06458fa1f 100755 --- a/runtime/opae/vortex.cpp +++ b/runtime/opae/vortex.cpp @@ -232,6 +232,9 @@ public: case VX_CAPS_ISA_FLAGS: _value = isa_caps_; break; + case VX_CAPS_NUM_MEM_BANKS: + _value = MEMORY_BANKS; + break; default: fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); std::abort(); diff --git a/runtime/rtlsim/vortex.cpp b/runtime/rtlsim/vortex.cpp index c75a6c12f..91df7f7e8 100644 --- a/runtime/rtlsim/vortex.cpp +++ b/runtime/rtlsim/vortex.cpp @@ -77,6 +77,9 @@ public: case VX_CAPS_ISA_FLAGS: _value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD; break; + case VX_CAPS_NUM_MEM_BANKS: + _value = MEMORY_BANKS; + break; default: std::cout << "invalid caps id: " << caps_id << std::endl; std::abort(); diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index 408bf23ed..5f4e27ff2 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -404,6 +404,9 @@ public: case VX_CAPS_ISA_FLAGS: _value = isa_caps_; break; + case VX_CAPS_NUM_MEM_BANKS: + _value = MEMORY_BANKS; + break; default: fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); std::abort(); From e34e4b790a623536d02db13228d570f850a9b564 Mon Sep 17 00:00:00 2001 From: sij814 Date: Fri, 16 Aug 2024 16:53:18 -0700 Subject: [PATCH 123/488] forced memory bank change in opae --- sim/opaesim/opae_sim.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 9d43ea595..7a1bae3e4 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -35,13 +35,13 @@ #include #include -#ifndef MEMORY_BANKS +//#ifndef MEMORY_BANKS #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS #else #define MEMORY_BANKS 2 #endif -#endif +//#endif #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 From 304761c6fc12a0651485880babfd61f41b48ff3c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 16 Aug 2024 22:32:35 -0700 Subject: [PATCH 124/488] fixed blackbox temp driver mode with --rebuild=3 --- ci/blackbox.sh | 458 +++++++++++------------------ ci/regression.sh.in | 3 + config.mk.in | 3 - hw/syn/xilinx/test/kernel/Makefile | 6 +- tests/kernel/common.mk | 6 +- tests/opencl/common.mk | 19 +- tests/regression/basic/Makefile | 2 +- tests/regression/common.mk | 21 +- tests/unittest/common.mk | 4 +- 9 files changed, 201 insertions(+), 321 deletions(-) diff --git a/ci/blackbox.sh b/ci/blackbox.sh index fe94677aa..5c0dfbde1 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -13,6 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +SCRIPT_DIR=$(dirname "$0") +ROOT_DIR=$SCRIPT_DIR/.. + show_usage() { echo "Vortex BlackBox Test Driver v1.0" @@ -29,302 +32,169 @@ show_help() echo "--rebuild: 0=disable, 1=force, 2=auto, 3=temp" } -SCRIPT_DIR=$(dirname "$0") -ROOT_DIR=$SCRIPT_DIR/.. - -DRIVER=simx -APP=sgemm -CLUSTERS=1 -CORES=1 -WARPS=4 -THREADS=4 -L2= -L3= -DEBUG=0 -DEBUG_LEVEL=0 -SCOPE=0 -HAS_ARGS=0 -PERF_CLASS=0 -REBUILD=2 -TEMPBUILD=0 -LOGFILE=run.log - -for i in "$@" -do -case $i in - --driver=*) - DRIVER=${i#*=} - shift - ;; - --app=*) - APP=${i#*=} - shift - ;; - --clusters=*) - CLUSTERS=${i#*=} - shift - ;; - --cores=*) - CORES=${i#*=} - shift - ;; - --warps=*) - WARPS=${i#*=} - shift - ;; - --threads=*) - THREADS=${i#*=} - shift - ;; - --l2cache) - L2=-DL2_ENABLE - shift - ;; - --l3cache) - L3=-DL3_ENABLE - shift - ;; - --debug=*) - DEBUG_LEVEL=${i#*=} - DEBUG=1 - shift - ;; - --scope) - SCOPE=1 - CORES=1 - shift - ;; - --perf=*) - PERF_FLAG=-DPERF_ENABLE - PERF_CLASS=${i#*=} - shift - ;; - --args=*) - ARGS=${i#*=} - HAS_ARGS=1 - shift - ;; - --rebuild=*) - REBUILD=${i#*=} - shift - ;; - --log=*) - LOGFILE=${i#*=} - shift - ;; - --help) - show_help - exit 0 - ;; - *) - show_usage - exit -1 - ;; -esac -done - -if [ $REBUILD -eq 3 ]; -then - REBUILD=1 - TEMPBUILD=1 -fi - -case $DRIVER in - gpu) - DRIVER_PATH= - ;; - simx) - DRIVER_PATH=$ROOT_DIR/runtime/simx - ;; - rtlsim) - DRIVER_PATH=$ROOT_DIR/runtime/rtlsim - ;; - opae) - DRIVER_PATH=$ROOT_DIR/runtime/opae - ;; - xrt) - DRIVER_PATH=$ROOT_DIR/runtime/xrt - ;; - *) - echo "invalid driver: $DRIVER" - exit -1 - ;; -esac - -if [ -d "$ROOT_DIR/tests/opencl/$APP" ]; -then - APP_PATH=$ROOT_DIR/tests/opencl/$APP -elif [ -d "$ROOT_DIR/tests/regression/$APP" ]; -then - APP_PATH=$ROOT_DIR/tests/regression/$APP -else - echo "Application folder not found: $APP" - exit -1 -fi - -if [ "$DRIVER" = "gpu" ]; -then - # running application - if [ $HAS_ARGS -eq 1 ] - then - echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER" - OPTS=$ARGS make -C $APP_PATH run-$DRIVER - status=$? +add_option() { + if [ -n "$1" ]; then + echo "$1 $2" else - echo "running: make -C $APP_PATH run-$DRIVER" - make -C $APP_PATH run-$DRIVER - status=$? + echo "$2" + fi +} + +DEFAULTS() { + DRIVER=simx + APP=sgemm + DEBUG=0 + DEBUG_LEVEL=0 + SCOPE=0 + HAS_ARGS=0 + PERF_CLASS=0 + CONFIGS="$CONFIGS" + REBUILD=2 + TEMPBUILD=0 + LOGFILE=run.log +} + +parse_args() { + DEFAULTS + for i in "$@"; do + case $i in + --driver=*) DRIVER=${i#*=} ;; + --app=*) APP=${i#*=} ;; + --clusters=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CLUSTERS=${i#*=}") ;; + --cores=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CORES=${i#*=}") ;; + --warps=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_WARPS=${i#*=}") ;; + --threads=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_THREADS=${i#*=}") ;; + --l2cache) CONFIGS=$(add_option "$CONFIGS" "-DL2_ENABLE") ;; + --l3cache) CONFIGS=$(add_option "$CONFIGS" "-DL3_ENABLE") ;; + --perf=*) CONFIGS=$(add_option "$CONFIGS" "-DPERF_ENABLE"); PERF_CLASS=${i#*=} ;; + --debug=*) DEBUG=1; DEBUG_LEVEL=${i#*=} ;; + --scope) SCOPE=1; ;; + --args=*) HAS_ARGS=1; ARGS=${i#*=} ;; + --rebuild=*) REBUILD=${i#*=} ;; + --log=*) LOGFILE=${i#*=} ;; + --help) show_help; exit 0 ;; + *) show_usage; exit 1 ;; + esac + done + + if [ $REBUILD -eq 3 ]; + then + REBUILD=1 + TEMPBUILD=1 + fi +} + +set_driver_path() { + case $DRIVER in + gpu) DRIVER_PATH="" ;; + simx|rtlsim|opae|xrt) DRIVER_PATH="$ROOT_DIR/runtime/$DRIVER" ;; + *) echo "Invalid driver: $DRIVER"; exit 1 ;; + esac +} + +set_app_path() { + if [ -d "$ROOT_DIR/tests/opencl/$APP" ]; then + APP_PATH="$ROOT_DIR/tests/opencl/$APP" + elif [ -d "$ROOT_DIR/tests/regression/$APP" ]; then + APP_PATH="$ROOT_DIR/tests/regression/$APP" + else + echo "Application folder not found: $APP" + exit 1 + fi +} + +build_driver() { + local cmd_opts="" + [ $DEBUG -ne 0 ] && cmd_opts=$(add_option "$cmd_opts" "DEBUG=$DEBUG_LEVEL") + [ $SCOPE -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "SCOPE=1") + [ $TEMPBUILD -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "DESTDIR=\"$TEMPDIR\"") + [ -n "$CONFIGS" ] && cmd_opts=$(add_option "$cmd_opts" "CONFIGS=\"$CONFIGS\"") + + if [ -n "$cmd_opts" ]; then + echo "Running: $cmd_opts make -C $DRIVER_PATH > /dev/null" + eval "$cmd_opts make -C $DRIVER_PATH > /dev/null" + else + echo "Running: make -C $DRIVER_PATH > /dev/null" + make -C $DRIVER_PATH > /dev/null + fi +} + +run_app() { + local cmd_opts="" + [ $DEBUG -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "DEBUG=1") + [ $TEMPBUILD -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "VORTEX_RT_PATH=\"$TEMPDIR\"") + [ $HAS_ARGS -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "OPTS=\"$ARGS\"") + + if [ $DEBUG -ne 0 ]; then + if [ -n "$cmd_opts" ]; then + echo "Running: $cmd_opts make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" + eval "$cmd_opts make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" + else + echo "Running: make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" + make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1 + fi + else + if [ -n "$cmd_opts" ]; then + echo "Running: $cmd_opts make -C $APP_PATH run-$DRIVER" + eval "$cmd_opts make -C $APP_PATH run-$DRIVER" + else + echo "Running: make -C $APP_PATH run-$DRIVER" + make -C $APP_PATH run-$DRIVER + fi + fi + status=$? + exit $status +} + +main() { + parse_args "$@" + set_driver_path + set_app_path + + # execute on default installed GPU + if [ "$DRIVER" = "gpu" ]; then + run_app + exit $status + fi + + if [ -n "$CONFIGS" ]; then + echo "CONFIGS=$CONFIGS" + fi + + if [ $REBUILD -ne 0 ]; then + BLACKBOX_CACHE=blackbox.$DRIVER.cache + LAST_CONFIGS=$(cat "$BLACKBOX_CACHE" 2>/dev/null || echo "") + + if [ $REBUILD -eq 1 ] || [ "$CONFIGS+$DEBUG+$SCOPE" != "$LAST_CONFIGS" ]; then + make -C $DRIVER_PATH clean-driver > /dev/null + echo "$CONFIGS+$DEBUG+$SCOPE" > "$BLACKBOX_CACHE" + fi + fi + + export VORTEX_PROFILING=$PERF_CLASS + + make -C "$ROOT_DIR/hw" config > /dev/null + make -C "$ROOT_DIR/runtime/stub" > /dev/null + + if [ $TEMPBUILD -eq 1 ]; then + # setup temp directory + TEMPDIR=$(mktemp -d) + mkdir -p "$TEMPDIR" + # build stub driver + echo "running: DESTDIR=$TEMPDIR make -C $ROOT_DIR/runtime/stub" + DESTDIR="$TEMPDIR" make -C $ROOT_DIR/runtime/stub > /dev/null + # register tempdir cleanup on exit + trap "rm -rf $TEMPDIR" EXIT + fi + + build_driver + run_app + + if [ $DEBUG -eq 1 ] && [ -f "$APP_PATH/trace.vcd" ]; then + mv -f $APP_PATH/trace.vcd . fi exit $status -fi +} -CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS $L2 $L3 $PERF_FLAG $CONFIGS" - -echo "CONFIGS=$CONFIGS" - -if [ $REBUILD -ne 0 ] -then - BLACKBOX_CACHE=blackbox.$DRIVER.cache - if [ -f "$BLACKBOX_CACHE" ] - then - LAST_CONFIGS=`cat $BLACKBOX_CACHE` - fi - - if [ $REBUILD -eq 1 ] || [ "$CONFIGS+$DEBUG+$SCOPE" != "$LAST_CONFIGS" ]; - then - make -C $DRIVER_PATH clean-driver > /dev/null - echo "$CONFIGS+$DEBUG+$SCOPE" > $BLACKBOX_CACHE - fi -fi - -# export performance monitor class identifier -export VORTEX_PROFILING=$PERF_CLASS - -status=0 - -# ensure config update -make -C $ROOT_DIR/hw config > /dev/null - -# ensure the stub driver is present -make -C $ROOT_DIR/runtime/stub > /dev/null - -if [ $DEBUG -ne 0 ] -then - # running application - if [ $TEMPBUILD -eq 1 ] - then - # setup temp directory - TEMPDIR=$(mktemp -d) - mkdir -p "$TEMPDIR/$DRIVER" - - # driver initialization - if [ $SCOPE -eq 1 ] - then - echo "running: DESTDIR=$TEMPDIR/$DRIVER DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DESTDIR="$TEMPDIR/$DRIVER" DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - else - echo "running: DESTDIR=$TEMPDIR/$DRIVER DEBUG=$DEBUG_LEVEL CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DESTDIR="$TEMPDIR/$DRIVER" DEBUG=$DEBUG_LEVEL CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - fi - - # running application - if [ $HAS_ARGS -eq 1 ] - then - echo "running: VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" - DEBUG=1 VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1 - status=$? - else - echo "running: VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" - DEBUG=1 VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1 - status=$? - fi - - # cleanup temp directory - trap "rm -rf $TEMPDIR" EXIT - else - # driver initialization - if [ $SCOPE -eq 1 ] - then - echo "running: DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - else - echo "running: DEBUG=$DEBUG_LEVEL CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DEBUG=$DEBUG_LEVEL CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - fi - - # running application - if [ $HAS_ARGS -eq 1 ] - then - echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" - DEBUG=1 OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1 - status=$? - else - echo "running: make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" - DEBUG=1 make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1 - status=$? - fi - fi - - if [ -f "$APP_PATH/trace.vcd" ] - then - mv -f $APP_PATH/trace.vcd . - fi -else - if [ $TEMPBUILD -eq 1 ] - then - # setup temp directory - TEMPDIR=$(mktemp -d) - mkdir -p "$TEMPDIR/$DRIVER" - - # driver initialization - if [ $SCOPE -eq 1 ] - then - echo "running: DESTDIR=$TEMPDIR/$DRIVER SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DESTDIR="$TEMPDIR/$DRIVER" SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - else - echo "running: DESTDIR=$TEMPDIR/$DRIVER CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DESTDIR="$TEMPDIR/$DRIVER" CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - fi - - # running application - if [ $HAS_ARGS -eq 1 ] - then - echo "running: VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER" - VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER - status=$? - else - echo "running: VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER" - VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER - status=$? - fi - - # cleanup temp directory - trap "rm -rf $TEMPDIR" EXIT - else - - # driver initialization - if [ $SCOPE -eq 1 ] - then - echo "running: SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH" - SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - else - echo "running: CONFIGS=$CONFIGS make -C $DRIVER_PATH" - CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - fi - - # running application - if [ $HAS_ARGS -eq 1 ] - then - echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER" - OPTS=$ARGS make -C $APP_PATH run-$DRIVER - status=$? - else - echo "running: make -C $APP_PATH run-$DRIVER" - make -C $APP_PATH run-$DRIVER - status=$? - fi - fi -fi - -exit $status +main "$@" \ No newline at end of file diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 3cd46a463..e0da29e20 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -105,6 +105,9 @@ regression() ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tbar" + # test temp driver mode for + ./ci/blackbox.sh --driver=simx --app=vecadd --rebuild=3 + echo "regression tests done!" } diff --git a/config.mk.in b/config.mk.in index 81339f195..12593924f 100644 --- a/config.mk.in +++ b/config.mk.in @@ -31,7 +31,4 @@ RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv$(XLEN)-gnu-toolchain RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX) -VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime -VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel - THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party \ No newline at end of file diff --git a/hw/syn/xilinx/test/kernel/Makefile b/hw/syn/xilinx/test/kernel/Makefile index 515533689..9f3b95c1a 100644 --- a/hw/syn/xilinx/test/kernel/Makefile +++ b/hw/syn/xilinx/test/kernel/Makefile @@ -19,9 +19,9 @@ DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy CFLAGS += -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections -CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_HOME)/hw +CFLAGS += -I$(VORTEX_HOME)/runtime/include -I$(VORTEX_HOME)/hw -LDFLAGS += -lm -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 +LDFLAGS += -lm -Wl,-Bstatic,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 PROJECT = kernel @@ -48,4 +48,4 @@ $(PROJECT).elf: $(SRCS) $(CC) $(CFLAGS) -MM $^ > .depend; clean: - rm -rf *.bin *.elf *.hex *.dump *.coe .depend + rm -rf *.bin *.elf *.hex *.dump *.coe .depend diff --git a/tests/kernel/common.mk b/tests/kernel/common.mk index e3f6b472b..050b1b48d 100644 --- a/tests/kernel/common.mk +++ b/tests/kernel/common.mk @@ -6,6 +6,8 @@ else CFLAGS += -march=rv32imaf -mabi=ilp32f endif +VORTEX_KN_PATH ?= $(ROOT_DIR)/kernel + LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT) LLVM_CFLAGS += --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) LLVM_CFLAGS += -Xclang -target-feature -Xclang +vortex -mllvm -vortex-branch-divergence=0 @@ -23,13 +25,13 @@ DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy CFLAGS += -O3 -mcmodel=medany -fno-exceptions -nostartfiles -nostdlib -fdata-sections -ffunction-sections -CFLAGS += -I$(VORTEX_KN_PATH)/include -I$(ROOT_DIR)/hw +CFLAGS += -I$(VORTEX_HOME)/kernel/include -I$(ROOT_DIR)/hw CFLAGS += -DXLEN_$(XLEN) -DNDEBUG LIBC_LIB += -L$(LIBC_VORTEX)/lib -lm -lc LIBC_LIB += $(LIBCRT_VORTEX)/lib/baremetal/libclang_rt.builtins-riscv$(XLEN).a -LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 $(ROOT_DIR)/kernel/libvortex.a $(LIBC_LIB) +LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 $(VORTEX_KN_PATH)/libvortex.a $(LIBC_LIB) all: $(PROJECT).elf $(PROJECT).bin $(PROJECT).dump diff --git a/tests/opencl/common.mk b/tests/opencl/common.mk index 2e287a944..dd5af90db 100644 --- a/tests/opencl/common.mk +++ b/tests/opencl/common.mk @@ -15,6 +15,9 @@ STARTUP_ADDR ?= 0x80000000 POCL_CC_FLAGS += POCL_VORTEX_XLEN=32 endif +VORTEX_RT_PATH ?= $(ROOT_DIR)/runtime +VORTEX_KN_PATH ?= $(ROOT_DIR)/kernel + POCL_PATH ?= $(TOOLDIR)/pocl LLVM_POCL ?= $(TOOLDIR)/llvm-vortex @@ -26,14 +29,14 @@ VX_LIBS += $(LIBCRT_VORTEX)/lib/baremetal/libclang_rt.builtins-riscv$(XLEN).a VX_CFLAGS += -O3 -mcmodel=medany --sysroot=$(RISCV_SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) VX_CFLAGS += -fno-rtti -fno-exceptions -nostartfiles -nostdlib -fdata-sections -ffunction-sections -VX_CFLAGS += -I$(ROOT_DIR)/hw -I$(VORTEX_KN_PATH)/include -DXLEN_$(XLEN) -DNDEBUG +VX_CFLAGS += -I$(ROOT_DIR)/hw -I$(VORTEX_HOME)/kernel/include -DXLEN_$(XLEN) -DNDEBUG VX_CFLAGS += -Xclang -target-feature -Xclang +vortex VX_CFLAGS += -Xclang -target-feature -Xclang +zicond VX_CFLAGS += -mllvm -disable-loop-idiom-all #VX_CFLAGS += -mllvm -vortex-branch-divergence=0 #VX_CFLAGS += -mllvm -print-after-all -VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T$(VORTEX_KN_PATH)/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(ROOT_DIR)/kernel/libvortex.a $(VX_LIBS) +VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(VORTEX_KN_PATH)/libvortex.a $(VX_LIBS) VX_BINTOOL += OBJCOPY=$(LLVM_VORTEX)/bin/llvm-objcopy $(VORTEX_HOME)/kernel/scripts/vxbin.py @@ -80,7 +83,7 @@ all: $(PROJECT) $(CC) $(CXXFLAGS) -c $< -o $@ $(PROJECT): $(OBJS) - $(CXX) $(CXXFLAGS) $(OBJS) $(LDFLAGS) -L$(ROOT_DIR)/runtime -lvortex -L$(POCL_PATH)/lib -lOpenCL -o $@ + $(CXX) $(CXXFLAGS) $(OBJS) $(LDFLAGS) -L$(VORTEX_RT_PATH) -lvortex -L$(POCL_PATH)/lib -lOpenCL -o $@ $(PROJECT).host: $(OBJS) $(CXX) $(CXXFLAGS) $(OBJS) $(LDFLAGS) -lOpenCL -o $@ @@ -89,19 +92,19 @@ run-gpu: $(PROJECT).host $(KERNEL_SRCS) ./$(PROJECT).host $(OPTS) run-simx: $(PROJECT) $(KERNEL_SRCS) - LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(ROOT_DIR)/runtime:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=simx ./$(PROJECT) $(OPTS) + LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=simx ./$(PROJECT) $(OPTS) run-rtlsim: $(PROJECT) $(KERNEL_SRCS) - LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(ROOT_DIR)/runtime:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=rtlsim ./$(PROJECT) $(OPTS) + LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=rtlsim ./$(PROJECT) $(OPTS) run-opae: $(PROJECT) $(KERNEL_SRCS) - SCOPE_JSON_PATH=$(ROOT_DIR)/runtime/scope.json OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(ROOT_DIR)/runtime:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=opae ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=opae ./$(PROJECT) $(OPTS) run-xrt: $(PROJECT) $(KERNEL_SRCS) ifeq ($(TARGET), hw) - XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(ROOT_DIR)/runtime:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(ROOT_DIR)/runtime:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) diff --git a/tests/regression/basic/Makefile b/tests/regression/basic/Makefile index a8e86cc17..5940ca65c 100644 --- a/tests/regression/basic/Makefile +++ b/tests/regression/basic/Makefile @@ -13,7 +13,7 @@ OPTS ?= -n256 include ../common.mk -VX_LDFLAGS = -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) +VX_LDFLAGS = -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-g++ diff --git a/tests/regression/common.mk b/tests/regression/common.mk index 12b45e848..c4a00bc13 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -5,6 +5,9 @@ TARGET ?= opaesim XRT_SYN_DIR ?= $(VORTEX_HOME)/hw/syn/xilinx/xrt XRT_DEVICE_INDEX ?= 0 +VORTEX_RT_PATH ?= $(ROOT_DIR)/runtime +VORTEX_KN_PATH ?= $(ROOT_DIR)/kernel + ifeq ($(XLEN),64) VX_CFLAGS += -march=rv64imafd -mabi=lp64d STARTUP_ADDR ?= 0x180000000 @@ -36,7 +39,7 @@ VX_CP = $(LLVM_VORTEX)/bin/llvm-objcopy #VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy VX_CFLAGS += -O3 -mcmodel=medany -fno-rtti -fno-exceptions -nostartfiles -nostdlib -fdata-sections -ffunction-sections -VX_CFLAGS += -I$(VORTEX_KN_PATH)/include -I$(ROOT_DIR)/hw +VX_CFLAGS += -I$(VORTEX_HOME)/kernel/include -I$(ROOT_DIR)/hw VX_CFLAGS += -DXLEN_$(XLEN) VX_CFLAGS += -DNDEBUG @@ -45,12 +48,12 @@ VX_LIBS += -L$(LIBC_VORTEX)/lib -lm -lc VX_LIBS += $(LIBCRT_VORTEX)/lib/baremetal/libclang_rt.builtins-riscv$(XLEN).a #VX_LIBS += -lgcc -VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(ROOT_DIR)/kernel/libvortex.a $(VX_LIBS) +VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(VORTEX_KN_PATH)/libvortex.a $(VX_LIBS) CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors -CXXFLAGS += -I$(VORTEX_RT_PATH)/include -I$(ROOT_DIR)/hw +CXXFLAGS += -I$(VORTEX_HOME)/runtime/include -I$(ROOT_DIR)/hw -LDFLAGS += -L$(ROOT_DIR)/runtime -lvortex +LDFLAGS += -L$(VORTEX_RT_PATH) -lvortex # Debugging ifdef DEBUG @@ -86,19 +89,19 @@ $(PROJECT): $(SRCS) $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ run-simx: $(PROJECT) kernel.vxbin - LD_LIBRARY_PATH=$(ROOT_DIR)/runtime:$(LD_LIBRARY_PATH) VORTEX_DRIVER=simx ./$(PROJECT) $(OPTS) + LD_LIBRARY_PATH=$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=simx ./$(PROJECT) $(OPTS) run-rtlsim: $(PROJECT) kernel.vxbin - LD_LIBRARY_PATH=$(ROOT_DIR)/runtime:$(LD_LIBRARY_PATH) VORTEX_DRIVER=rtlsim ./$(PROJECT) $(OPTS) + LD_LIBRARY_PATH=$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=rtlsim ./$(PROJECT) $(OPTS) run-opae: $(PROJECT) kernel.vxbin - SCOPE_JSON_PATH=$(ROOT_DIR)/runtime/scope.json OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(ROOT_DIR)/runtime:$(LD_LIBRARY_PATH) VORTEX_DRIVER=opae ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=opae ./$(PROJECT) $(OPTS) run-xrt: $(PROJECT) kernel.vxbin ifeq ($(TARGET), hw) - XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(ROOT_DIR)/runtime:$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(ROOT_DIR)/runtime:$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) diff --git a/tests/unittest/common.mk b/tests/unittest/common.mk index a6f6b2794..384a2f02c 100644 --- a/tests/unittest/common.mk +++ b/tests/unittest/common.mk @@ -1,6 +1,8 @@ +ROOT_DIR := $(realpath ../../..) + CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors -CXXFLAGS += -I$(VORTEX_RT_PATH)/common +CXXFLAGS += -I$(VORTEX_HOME)/runtime/common # Debugging ifdef DEBUG From 9fc9b433073e54b7d4da1a6cadba8786fbe8e27e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 02:18:04 -0700 Subject: [PATCH 125/488] OPAE runtime bug fix --- runtime/opae/Makefile | 5 +++-- runtime/opae/driver.h | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/runtime/opae/Makefile b/runtime/opae/Makefile index 1a9810eca..9650915ea 100644 --- a/runtime/opae/Makefile +++ b/runtime/opae/Makefile @@ -1,3 +1,4 @@ +ROOT_DIR := $(realpath ../..) include ../common.mk TARGET ?= opaesim @@ -25,9 +26,9 @@ SRCS = $(SRC_DIR)/vortex.cpp $(SRC_DIR)/driver.cpp # set up target types ifeq ($(TARGET), opaesim) OPAESIM = $(DESTDIR)/libopae-c-sim.so - CXXFLAGS += -I$(SIM_DIR)/opaesim + CXXFLAGS += -DOPAESIM -I$(SIM_DIR)/opaesim else - CXXFLAGS += -I$(SYN_DIR) + CXXFLAGS += -I$(SYN_DIR) -I$(ROOT_DIR)/hw/syn/altera/opae endif # Debugging diff --git a/runtime/opae/driver.h b/runtime/opae/driver.h index 0d1d4daa7..0a45b6f67 100644 --- a/runtime/opae/driver.h +++ b/runtime/opae/driver.h @@ -13,7 +13,11 @@ #pragma once +#ifdef OPAESIM #include +#else +#include +#endif typedef fpga_result (*pfn_fpgaGetProperties)(fpga_token token, fpga_properties *prop); typedef fpga_result (*pfn_fpgaPropertiesSetObjectType)(fpga_properties prop, fpga_objtype objtype); From 4b6f8efeaa2178627b390ababcc4d7952c219c11 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 04:07:10 -0700 Subject: [PATCH 126/488] removing trace_pkg to fix unsupported package dependencies --- hw/rtl/VX_gpu_pkg.sv | 376 +++++++++++++++++++++++++++++++ hw/rtl/afu/opae/vortex_afu.sv | 3 - hw/rtl/core/VX_commit.sv | 2 +- hw/rtl/core/VX_dcr_data.sv | 2 +- hw/rtl/core/VX_decode.sv | 2 +- hw/rtl/core/VX_issue_slice.sv | 2 +- hw/rtl/core/VX_lsu_slice.sv | 2 +- hw/rtl/core/VX_trace_pkg.sv | 399 --------------------------------- hw/unittest/core_top/Makefile | 2 +- hw/unittest/issue_top/Makefile | 2 +- sim/opaesim/Makefile | 2 +- sim/rtlsim/Makefile | 2 +- sim/xrtsim/Makefile | 2 +- 13 files changed, 386 insertions(+), 412 deletions(-) delete mode 100644 hw/rtl/core/VX_trace_pkg.sv diff --git a/hw/rtl/VX_gpu_pkg.sv b/hw/rtl/VX_gpu_pkg.sv index 393f2a66f..f29067855 100644 --- a/hw/rtl/VX_gpu_pkg.sv +++ b/hw/rtl/VX_gpu_pkg.sv @@ -308,6 +308,382 @@ package VX_gpu_pkg; `IGNORE_UNUSED_END +////////////////////////////////// Tracing //////////////////////////////////// + +`ifdef SIMULATION + +`ifdef SV_DPI + import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/); +`endif + + task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type); + case (ex_type) + `EX_ALU: `TRACE(level, ("ALU")); + `EX_LSU: `TRACE(level, ("LSU")); + `EX_FPU: `TRACE(level, ("FPU")); + `EX_SFU: `TRACE(level, ("SFU")); + default: `TRACE(level, ("?")); + endcase + endtask + + task trace_ex_op(input int level, + input [`EX_BITS-1:0] ex_type, + input [`INST_OP_BITS-1:0] op_type, + input VX_gpu_pkg::op_args_t op_args + ); + case (ex_type) + `EX_ALU: begin + case (op_args.alu.xtype) + `ALU_TYPE_ARITH: begin + if (op_args.alu.is_w) begin + if (op_args.alu.use_imm) begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADDIW")); + `INST_ALU_SLL: `TRACE(level, ("SLLIW")); + `INST_ALU_SRL: `TRACE(level, ("SRLIW")); + `INST_ALU_SRA: `TRACE(level, ("SRAIW")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADDW")); + `INST_ALU_SUB: `TRACE(level, ("SUBW")); + `INST_ALU_SLL: `TRACE(level, ("SLLW")); + `INST_ALU_SRL: `TRACE(level, ("SRLW")); + `INST_ALU_SRA: `TRACE(level, ("SRAW")); + default: `TRACE(level, ("?")); + endcase + end + end else begin + if (op_args.alu.use_imm) begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADDI")); + `INST_ALU_SLL: `TRACE(level, ("SLLI")); + `INST_ALU_SRL: `TRACE(level, ("SRLI")); + `INST_ALU_SRA: `TRACE(level, ("SRAI")); + `INST_ALU_SLT: `TRACE(level, ("SLTI")); + `INST_ALU_SLTU: `TRACE(level, ("SLTIU")); + `INST_ALU_XOR: `TRACE(level, ("XORI")); + `INST_ALU_OR: `TRACE(level, ("ORI")); + `INST_ALU_AND: `TRACE(level, ("ANDI")); + `INST_ALU_LUI: `TRACE(level, ("LUI")); + `INST_ALU_AUIPC: `TRACE(level, ("AUIPC")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADD")); + `INST_ALU_SUB: `TRACE(level, ("SUB")); + `INST_ALU_SLL: `TRACE(level, ("SLL")); + `INST_ALU_SRL: `TRACE(level, ("SRL")); + `INST_ALU_SRA: `TRACE(level, ("SRA")); + `INST_ALU_SLT: `TRACE(level, ("SLT")); + `INST_ALU_SLTU: `TRACE(level, ("SLTU")); + `INST_ALU_XOR: `TRACE(level, ("XOR")); + `INST_ALU_OR: `TRACE(level, ("OR")); + `INST_ALU_AND: `TRACE(level, ("AND")); + `INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ")); + `INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ")); + default: `TRACE(level, ("?")); + endcase + end + end + end + `ALU_TYPE_BRANCH: begin + case (`INST_BR_BITS'(op_type)) + `INST_BR_EQ: `TRACE(level, ("BEQ")); + `INST_BR_NE: `TRACE(level, ("BNE")); + `INST_BR_LT: `TRACE(level, ("BLT")); + `INST_BR_GE: `TRACE(level, ("BGE")); + `INST_BR_LTU: `TRACE(level, ("BLTU")); + `INST_BR_GEU: `TRACE(level, ("BGEU")); + `INST_BR_JAL: `TRACE(level, ("JAL")); + `INST_BR_JALR: `TRACE(level, ("JALR")); + `INST_BR_ECALL: `TRACE(level, ("ECALL")); + `INST_BR_EBREAK:`TRACE(level, ("EBREAK")); + `INST_BR_URET: `TRACE(level, ("URET")); + `INST_BR_SRET: `TRACE(level, ("SRET")); + `INST_BR_MRET: `TRACE(level, ("MRET")); + default: `TRACE(level, ("?")); + endcase + end + `ALU_TYPE_MULDIV: begin + if (op_args.alu.is_w) begin + case (`INST_M_BITS'(op_type)) + `INST_M_MUL: `TRACE(level, ("MULW")); + `INST_M_DIV: `TRACE(level, ("DIVW")); + `INST_M_DIVU: `TRACE(level, ("DIVUW")); + `INST_M_REM: `TRACE(level, ("REMW")); + `INST_M_REMU: `TRACE(level, ("REMUW")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (`INST_M_BITS'(op_type)) + `INST_M_MUL: `TRACE(level, ("MUL")); + `INST_M_MULH: `TRACE(level, ("MULH")); + `INST_M_MULHSU:`TRACE(level, ("MULHSU")); + `INST_M_MULHU: `TRACE(level, ("MULHU")); + `INST_M_DIV: `TRACE(level, ("DIV")); + `INST_M_DIVU: `TRACE(level, ("DIVU")); + `INST_M_REM: `TRACE(level, ("REM")); + `INST_M_REMU: `TRACE(level, ("REMU")); + default: `TRACE(level, ("?")); + endcase + end + end + default: `TRACE(level, ("?")); + endcase + end + `EX_LSU: begin + if (op_args.lsu.is_float) begin + case (`INST_LSU_BITS'(op_type)) + `INST_LSU_LW: `TRACE(level, ("FLW")); + `INST_LSU_LD: `TRACE(level, ("FLD")); + `INST_LSU_SW: `TRACE(level, ("FSW")); + `INST_LSU_SD: `TRACE(level, ("FSD")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (`INST_LSU_BITS'(op_type)) + `INST_LSU_LB: `TRACE(level, ("LB")); + `INST_LSU_LH: `TRACE(level, ("LH")); + `INST_LSU_LW: `TRACE(level, ("LW")); + `INST_LSU_LD: `TRACE(level, ("LD")); + `INST_LSU_LBU:`TRACE(level, ("LBU")); + `INST_LSU_LHU:`TRACE(level, ("LHU")); + `INST_LSU_LWU:`TRACE(level, ("LWU")); + `INST_LSU_SB: `TRACE(level, ("SB")); + `INST_LSU_SH: `TRACE(level, ("SH")); + `INST_LSU_SW: `TRACE(level, ("SW")); + `INST_LSU_SD: `TRACE(level, ("SD")); + `INST_LSU_FENCE:`TRACE(level,("FENCE")); + default: `TRACE(level, ("?")); + endcase + end + end + `EX_FPU: begin + case (`INST_FPU_BITS'(op_type)) + `INST_FPU_ADD: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FADD.D")); + else + `TRACE(level, ("FADD.S")); + end + `INST_FPU_SUB: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FSUB.D")); + else + `TRACE(level, ("FSUB.S")); + end + `INST_FPU_MUL: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FMUL.D")); + else + `TRACE(level, ("FMUL.S")); + end + `INST_FPU_DIV: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FDIV.D")); + else + `TRACE(level, ("FDIV.S")); + end + `INST_FPU_SQRT: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FSQRT.D")); + else + `TRACE(level, ("FSQRT.S")); + end + `INST_FPU_MADD: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FMADD.D")); + else + `TRACE(level, ("FMADD.S")); + end + `INST_FPU_MSUB: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FMSUB.D")); + else + `TRACE(level, ("FMSUB.S")); + end + `INST_FPU_NMADD: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FNMADD.D")); + else + `TRACE(level, ("FNMADD.S")); + end + `INST_FPU_NMSUB: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FNMSUB.D")); + else + `TRACE(level, ("FNMSUB.S")); + end + `INST_FPU_CMP: begin + if (op_args.fpu.fmt[0]) begin + case (op_args.fpu.frm[1:0]) + 0: `TRACE(level, ("FLE.D")); + 1: `TRACE(level, ("FLT.D")); + 2: `TRACE(level, ("FEQ.D")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (op_args.fpu.frm[1:0]) + 0: `TRACE(level, ("FLE.S")); + 1: `TRACE(level, ("FLT.S")); + 2: `TRACE(level, ("FEQ.S")); + default: `TRACE(level, ("?")); + endcase + end + end + `INST_FPU_F2F: begin + if (op_args.fpu.fmt[0]) begin + `TRACE(level, ("FCVT.D.S")); + end else begin + `TRACE(level, ("FCVT.S.D")); + end + end + `INST_FPU_F2I: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.L.D")); + end else begin + `TRACE(level, ("FCVT.W.D")); + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.L.S")); + end else begin + `TRACE(level, ("FCVT.W.S")); + end + end + end + `INST_FPU_F2U: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.LU.D")); + end else begin + `TRACE(level, ("FCVT.WU.D")); + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.LU.S")); + end else begin + `TRACE(level, ("FCVT.WU.S")); + end + end + end + `INST_FPU_I2F: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.D.L")); + end else begin + `TRACE(level, ("FCVT.D.W")); + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.S.L")); + end else begin + `TRACE(level, ("FCVT.S.W")); + end + end + end + `INST_FPU_U2F: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.D.LU")); + end else begin + `TRACE(level, ("FCVT.D.WU")); + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.S.LU")); + end else begin + `TRACE(level, ("FCVT.S.WU")); + end + end + end + `INST_FPU_MISC: begin + if (op_args.fpu.fmt[0]) begin + case (op_args.fpu.frm) + 0: `TRACE(level, ("FSGNJ.D")); + 1: `TRACE(level, ("FSGNJN.D")); + 2: `TRACE(level, ("FSGNJX.D")); + 3: `TRACE(level, ("FCLASS.D")); + 4: `TRACE(level, ("FMV.X.D")); + 5: `TRACE(level, ("FMV.D.X")); + 6: `TRACE(level, ("FMIN.D")); + 7: `TRACE(level, ("FMAX.D")); + endcase + end else begin + case (op_args.fpu.frm) + 0: `TRACE(level, ("FSGNJ.S")); + 1: `TRACE(level, ("FSGNJN.S")); + 2: `TRACE(level, ("FSGNJX.S")); + 3: `TRACE(level, ("FCLASS.S")); + 4: `TRACE(level, ("FMV.X.S")); + 5: `TRACE(level, ("FMV.S.X")); + 6: `TRACE(level, ("FMIN.S")); + 7: `TRACE(level, ("FMAX.S")); + endcase + end + end + default: `TRACE(level, ("?")); + endcase + end + `EX_SFU: begin + case (`INST_SFU_BITS'(op_type)) + `INST_SFU_TMC: `TRACE(level, ("TMC")); + `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")); + `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end + `INST_SFU_JOIN: `TRACE(level, ("JOIN")); + `INST_SFU_BAR: `TRACE(level, ("BAR")); + `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end + `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end + `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end + `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end + default: `TRACE(level, ("?")); + endcase + end + default: `TRACE(level, ("?")); + endcase + endtask + + task trace_op_args(input int level, + input [`EX_BITS-1:0] ex_type, + input [`INST_OP_BITS-1:0] op_type, + input VX_gpu_pkg::op_args_t op_args + ); + case (ex_type) + `EX_ALU: begin + `TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm)); + end + `EX_LSU: begin + `TRACE(level, (", offset=0x%0h", op_args.lsu.offset)); + end + `EX_FPU: begin + `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)); + end + `EX_SFU: begin + if (`INST_SFU_IS_CSR(op_type)) begin + `TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm)); + end + end + default:; + endcase + endtask + + task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr); + case (addr) + `VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0")); + `VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1")); + `VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0")); + `VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1")); + `VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS")); + default: `TRACE(level, ("?")); + endcase + endtask + +`endif + endpackage `endif // VX_GPU_PKG_VH diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index b67cae3a5..cb5725e78 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -518,7 +518,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ ); assign cci_vx_mem_bus_if[1].req_data.flags = '0; - `UNUSED_VAR (cci_vx_mem_bus_if[1].req_data.flags) //-- @@ -571,7 +570,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ ); assign cci_vx_mem_bus_if[0].req_data.flags = '0; - `UNUSED_VAR (cci_vx_mem_bus_if[0].req_data.flags) //-- VX_mem_bus_if #( @@ -639,7 +637,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .avs_readdatavalid(avs_readdatavalid) ); - assign mem_bus_if[0].req_data.flags = '0; `UNUSED_VAR (mem_bus_if[0].req_data.flags) // CCI-P Read Request /////////////////////////////////////////////////////////// diff --git a/hw/rtl/core/VX_commit.sv b/hw/rtl/core/VX_commit.sv index 7106cc65f..f945c7903 100644 --- a/hw/rtl/core/VX_commit.sv +++ b/hw/rtl/core/VX_commit.sv @@ -13,7 +13,7 @@ `include "VX_define.vh" -module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #( +module VX_commit import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "" ) ( input wire clk, diff --git a/hw/rtl/core/VX_dcr_data.sv b/hw/rtl/core/VX_dcr_data.sv index 4ac137547..b20d95fc7 100644 --- a/hw/rtl/core/VX_dcr_data.sv +++ b/hw/rtl/core/VX_dcr_data.sv @@ -13,7 +13,7 @@ `include "VX_define.vh" -module VX_dcr_data import VX_gpu_pkg::*, VX_trace_pkg::*; ( +module VX_dcr_data import VX_gpu_pkg::*; ( input wire clk, input wire reset, diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index 9660859ce..4f6ffe100 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -27,7 +27,7 @@ use_``x = 1 `endif -module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #( +module VX_decode import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "" ) ( input wire clk, diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 03b91b5fe..4b4e168a2 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -13,7 +13,7 @@ `include "VX_define.vh" -module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( +module VX_issue_slice import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "", parameter ISSUE_ID = 0 ) ( diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 6de901182..f83b23fb3 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -13,7 +13,7 @@ `include "VX_define.vh" -module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( +module VX_lsu_slice import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "" ) ( `SCOPE_IO_DECL diff --git a/hw/rtl/core/VX_trace_pkg.sv b/hw/rtl/core/VX_trace_pkg.sv deleted file mode 100644 index b4eae96fe..000000000 --- a/hw/rtl/core/VX_trace_pkg.sv +++ /dev/null @@ -1,399 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`ifndef VX_TRACE_PKG_VH -`define VX_TRACE_PKG_VH - -`include "VX_define.vh" - -package VX_trace_pkg; - -`ifdef SIMULATION - -`ifdef SV_DPI - import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/); -`endif - - import VX_gpu_pkg::*; - - task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type); - case (ex_type) - `EX_ALU: `TRACE(level, ("ALU")); - `EX_LSU: `TRACE(level, ("LSU")); - `EX_FPU: `TRACE(level, ("FPU")); - `EX_SFU: `TRACE(level, ("SFU")); - default: `TRACE(level, ("?")); - endcase - endtask - - task trace_ex_op(input int level, - input [`EX_BITS-1:0] ex_type, - input [`INST_OP_BITS-1:0] op_type, - input VX_gpu_pkg::op_args_t op_args - ); - case (ex_type) - `EX_ALU: begin - case (op_args.alu.xtype) - `ALU_TYPE_ARITH: begin - if (op_args.alu.is_w) begin - if (op_args.alu.use_imm) begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDIW")); - `INST_ALU_SLL: `TRACE(level, ("SLLIW")); - `INST_ALU_SRL: `TRACE(level, ("SRLIW")); - `INST_ALU_SRA: `TRACE(level, ("SRAIW")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDW")); - `INST_ALU_SUB: `TRACE(level, ("SUBW")); - `INST_ALU_SLL: `TRACE(level, ("SLLW")); - `INST_ALU_SRL: `TRACE(level, ("SRLW")); - `INST_ALU_SRA: `TRACE(level, ("SRAW")); - default: `TRACE(level, ("?")); - endcase - end - end else begin - if (op_args.alu.use_imm) begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDI")); - `INST_ALU_SLL: `TRACE(level, ("SLLI")); - `INST_ALU_SRL: `TRACE(level, ("SRLI")); - `INST_ALU_SRA: `TRACE(level, ("SRAI")); - `INST_ALU_SLT: `TRACE(level, ("SLTI")); - `INST_ALU_SLTU: `TRACE(level, ("SLTIU")); - `INST_ALU_XOR: `TRACE(level, ("XORI")); - `INST_ALU_OR: `TRACE(level, ("ORI")); - `INST_ALU_AND: `TRACE(level, ("ANDI")); - `INST_ALU_LUI: `TRACE(level, ("LUI")); - `INST_ALU_AUIPC: `TRACE(level, ("AUIPC")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADD")); - `INST_ALU_SUB: `TRACE(level, ("SUB")); - `INST_ALU_SLL: `TRACE(level, ("SLL")); - `INST_ALU_SRL: `TRACE(level, ("SRL")); - `INST_ALU_SRA: `TRACE(level, ("SRA")); - `INST_ALU_SLT: `TRACE(level, ("SLT")); - `INST_ALU_SLTU: `TRACE(level, ("SLTU")); - `INST_ALU_XOR: `TRACE(level, ("XOR")); - `INST_ALU_OR: `TRACE(level, ("OR")); - `INST_ALU_AND: `TRACE(level, ("AND")); - `INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ")); - `INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ")); - default: `TRACE(level, ("?")); - endcase - end - end - end - `ALU_TYPE_BRANCH: begin - case (`INST_BR_BITS'(op_type)) - `INST_BR_EQ: `TRACE(level, ("BEQ")); - `INST_BR_NE: `TRACE(level, ("BNE")); - `INST_BR_LT: `TRACE(level, ("BLT")); - `INST_BR_GE: `TRACE(level, ("BGE")); - `INST_BR_LTU: `TRACE(level, ("BLTU")); - `INST_BR_GEU: `TRACE(level, ("BGEU")); - `INST_BR_JAL: `TRACE(level, ("JAL")); - `INST_BR_JALR: `TRACE(level, ("JALR")); - `INST_BR_ECALL: `TRACE(level, ("ECALL")); - `INST_BR_EBREAK:`TRACE(level, ("EBREAK")); - `INST_BR_URET: `TRACE(level, ("URET")); - `INST_BR_SRET: `TRACE(level, ("SRET")); - `INST_BR_MRET: `TRACE(level, ("MRET")); - default: `TRACE(level, ("?")); - endcase - end - `ALU_TYPE_MULDIV: begin - if (op_args.alu.is_w) begin - case (`INST_M_BITS'(op_type)) - `INST_M_MUL: `TRACE(level, ("MULW")); - `INST_M_DIV: `TRACE(level, ("DIVW")); - `INST_M_DIVU: `TRACE(level, ("DIVUW")); - `INST_M_REM: `TRACE(level, ("REMW")); - `INST_M_REMU: `TRACE(level, ("REMUW")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_M_BITS'(op_type)) - `INST_M_MUL: `TRACE(level, ("MUL")); - `INST_M_MULH: `TRACE(level, ("MULH")); - `INST_M_MULHSU:`TRACE(level, ("MULHSU")); - `INST_M_MULHU: `TRACE(level, ("MULHU")); - `INST_M_DIV: `TRACE(level, ("DIV")); - `INST_M_DIVU: `TRACE(level, ("DIVU")); - `INST_M_REM: `TRACE(level, ("REM")); - `INST_M_REMU: `TRACE(level, ("REMU")); - default: `TRACE(level, ("?")); - endcase - end - end - default: `TRACE(level, ("?")); - endcase - end - `EX_LSU: begin - if (op_args.lsu.is_float) begin - case (`INST_LSU_BITS'(op_type)) - `INST_LSU_LW: `TRACE(level, ("FLW")); - `INST_LSU_LD: `TRACE(level, ("FLD")); - `INST_LSU_SW: `TRACE(level, ("FSW")); - `INST_LSU_SD: `TRACE(level, ("FSD")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_LSU_BITS'(op_type)) - `INST_LSU_LB: `TRACE(level, ("LB")); - `INST_LSU_LH: `TRACE(level, ("LH")); - `INST_LSU_LW: `TRACE(level, ("LW")); - `INST_LSU_LD: `TRACE(level, ("LD")); - `INST_LSU_LBU:`TRACE(level, ("LBU")); - `INST_LSU_LHU:`TRACE(level, ("LHU")); - `INST_LSU_LWU:`TRACE(level, ("LWU")); - `INST_LSU_SB: `TRACE(level, ("SB")); - `INST_LSU_SH: `TRACE(level, ("SH")); - `INST_LSU_SW: `TRACE(level, ("SW")); - `INST_LSU_SD: `TRACE(level, ("SD")); - `INST_LSU_FENCE:`TRACE(level,("FENCE")); - default: `TRACE(level, ("?")); - endcase - end - end - `EX_FPU: begin - case (`INST_FPU_BITS'(op_type)) - `INST_FPU_ADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FADD.D")); - else - `TRACE(level, ("FADD.S")); - end - `INST_FPU_SUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FSUB.D")); - else - `TRACE(level, ("FSUB.S")); - end - `INST_FPU_MUL: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMUL.D")); - else - `TRACE(level, ("FMUL.S")); - end - `INST_FPU_DIV: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FDIV.D")); - else - `TRACE(level, ("FDIV.S")); - end - `INST_FPU_SQRT: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FSQRT.D")); - else - `TRACE(level, ("FSQRT.S")); - end - `INST_FPU_MADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMADD.D")); - else - `TRACE(level, ("FMADD.S")); - end - `INST_FPU_MSUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMSUB.D")); - else - `TRACE(level, ("FMSUB.S")); - end - `INST_FPU_NMADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMADD.D")); - else - `TRACE(level, ("FNMADD.S")); - end - `INST_FPU_NMSUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMSUB.D")); - else - `TRACE(level, ("FNMSUB.S")); - end - `INST_FPU_CMP: begin - if (op_args.fpu.fmt[0]) begin - case (op_args.fpu.frm[1:0]) - 0: `TRACE(level, ("FLE.D")); - 1: `TRACE(level, ("FLT.D")); - 2: `TRACE(level, ("FEQ.D")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (op_args.fpu.frm[1:0]) - 0: `TRACE(level, ("FLE.S")); - 1: `TRACE(level, ("FLT.S")); - 2: `TRACE(level, ("FEQ.S")); - default: `TRACE(level, ("?")); - endcase - end - end - `INST_FPU_F2F: begin - if (op_args.fpu.fmt[0]) begin - `TRACE(level, ("FCVT.D.S")); - end else begin - `TRACE(level, ("FCVT.S.D")); - end - end - `INST_FPU_F2I: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.L.D")); - end else begin - `TRACE(level, ("FCVT.W.D")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.L.S")); - end else begin - `TRACE(level, ("FCVT.W.S")); - end - end - end - `INST_FPU_F2U: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.LU.D")); - end else begin - `TRACE(level, ("FCVT.WU.D")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.LU.S")); - end else begin - `TRACE(level, ("FCVT.WU.S")); - end - end - end - `INST_FPU_I2F: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.D.L")); - end else begin - `TRACE(level, ("FCVT.D.W")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.S.L")); - end else begin - `TRACE(level, ("FCVT.S.W")); - end - end - end - `INST_FPU_U2F: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.D.LU")); - end else begin - `TRACE(level, ("FCVT.D.WU")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.S.LU")); - end else begin - `TRACE(level, ("FCVT.S.WU")); - end - end - end - `INST_FPU_MISC: begin - if (op_args.fpu.fmt[0]) begin - case (op_args.fpu.frm) - 0: `TRACE(level, ("FSGNJ.D")); - 1: `TRACE(level, ("FSGNJN.D")); - 2: `TRACE(level, ("FSGNJX.D")); - 3: `TRACE(level, ("FCLASS.D")); - 4: `TRACE(level, ("FMV.X.D")); - 5: `TRACE(level, ("FMV.D.X")); - 6: `TRACE(level, ("FMIN.D")); - 7: `TRACE(level, ("FMAX.D")); - endcase - end else begin - case (op_args.fpu.frm) - 0: `TRACE(level, ("FSGNJ.S")); - 1: `TRACE(level, ("FSGNJN.S")); - 2: `TRACE(level, ("FSGNJX.S")); - 3: `TRACE(level, ("FCLASS.S")); - 4: `TRACE(level, ("FMV.X.S")); - 5: `TRACE(level, ("FMV.S.X")); - 6: `TRACE(level, ("FMIN.S")); - 7: `TRACE(level, ("FMAX.S")); - endcase - end - end - default: `TRACE(level, ("?")); - endcase - end - `EX_SFU: begin - case (`INST_SFU_BITS'(op_type)) - `INST_SFU_TMC: `TRACE(level, ("TMC")); - `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")); - `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end - `INST_SFU_JOIN: `TRACE(level, ("JOIN")); - `INST_SFU_BAR: `TRACE(level, ("BAR")); - `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end - `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end - `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end - `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end - default: `TRACE(level, ("?")); - endcase - end - default: `TRACE(level, ("?")); - endcase - endtask - - task trace_op_args(input int level, - input [`EX_BITS-1:0] ex_type, - input [`INST_OP_BITS-1:0] op_type, - input VX_gpu_pkg::op_args_t op_args - ); - case (ex_type) - `EX_ALU: begin - `TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm)); - end - `EX_LSU: begin - `TRACE(level, (", offset=0x%0h", op_args.lsu.offset)); - end - `EX_FPU: begin - `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)); - end - `EX_SFU: begin - if (`INST_SFU_IS_CSR(op_type)) begin - `TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm)); - end - end - default:; - endcase - endtask - - task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr); - case (addr) - `VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0")); - `VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1")); - `VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0")); - `VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1")); - `VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS")); - default: `TRACE(level, ("?")); - endcase - endtask - -`endif - -endpackage - -`endif // VX_TRACE_PKG_VH diff --git a/hw/unittest/core_top/Makefile b/hw/unittest/core_top/Makefile index d9fbf40f6..f9d037999 100644 --- a/hw/unittest/core_top/Makefile +++ b/hw/unittest/core_top/Makefile @@ -16,7 +16,7 @@ SRCS += $(SRC_DIR)/main.cpp DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE -RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv +RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/fpu -I$(RTL_DIR)/core diff --git a/hw/unittest/issue_top/Makefile b/hw/unittest/issue_top/Makefile index 7e298849c..b6a8b0527 100644 --- a/hw/unittest/issue_top/Makefile +++ b/hw/unittest/issue_top/Makefile @@ -16,7 +16,7 @@ SRCS += $(SRC_DIR)/main.cpp DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE -RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv +RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index 2e549ca74..9c6314ecf 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -54,7 +54,7 @@ SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/opae_sim.cpp RTL_PKGS = $(AFU_DIR)/local_mem_cfg_pkg.sv $(AFU_DIR)/ccip/ccip_if_pkg.sv -RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv +RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 3deffc759..638d7403f 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -26,7 +26,7 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) -RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv +RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 765e3e268..1e0d11b66 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -53,7 +53,7 @@ SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $ SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/xrt_sim.cpp -RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv +RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) From 20b82fd34d24675debc14a5eda0baebba45331a0 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 04:09:50 -0700 Subject: [PATCH 127/488] update configure to deep-copy syn directory tree --- configure | 2 +- hw/syn/altera/opae/Makefile | 10 ++++------ hw/syn/altera/quartus/common.mk | 4 +--- hw/syn/xilinx/xrt/Makefile | 6 ++---- hw/syn/yosys/Makefile | 8 +++----- 5 files changed, 11 insertions(+), 19 deletions(-) diff --git a/configure b/configure index 62975784b..37e95a2bd 100755 --- a/configure +++ b/configure @@ -164,7 +164,7 @@ if [ "$OSVERSION" == "unsupported" ]; then fi # project subdirectories to build -SUBDIRS=("." "!ci" "!perf" "hw*" "kernel*" "runtime*" "sim*" "tests*") +SUBDIRS=("." "!ci" "!perf" "hw*" "!hw/syn*" "kernel*" "runtime*" "sim*" "tests*") # Get the directory of the script SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index 62a9bb72c..4e031ea69 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -7,8 +7,6 @@ PREFIX ?= build$(XLEN) TARGET ?= fpga NUM_CORES ?= 1 -SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/opae - RTL_DIR := $(VORTEX_HOME)/hw/rtl DPI_DIR := $(VORTEX_HOME)/hw/dpi AFU_DIR := $(RTL_DIR)/afu/opae @@ -105,17 +103,17 @@ $(IP_CACHE_DIR)/ip-gen.log: $(SCRIPT_DIR)/ip_gen.sh $(IP_CACHE_DIR) swconfig: vortex_afu.h -vortex_afu.h: $(SRC_DIR)/vortex_afu.json +vortex_afu.h: vortex_afu.json afu_json_mgr json-info --afu-json=$^ --c-hdr=$@ $(BUILD_DIR)/setup.cfg: - mkdir -p $(BUILD_DIR); cp $(SRC_DIR)/setup.cfg $(BUILD_DIR)/setup.cfg + mkdir -p $(BUILD_DIR); cp setup.cfg $(BUILD_DIR)/setup.cfg $(BUILD_DIR)/vortex_afu.qsf: - mkdir -p $(BUILD_DIR); cp $(SRC_DIR)/vortex_afu.qsf $(BUILD_DIR)/vortex_afu.qsf + mkdir -p $(BUILD_DIR); cp vortex_afu.qsf $(BUILD_DIR)/vortex_afu.qsf $(BUILD_DIR)/vortex_afu.json: - mkdir -p $(BUILD_DIR); cp $(SRC_DIR)/vortex_afu.json $(BUILD_DIR)/vortex_afu.json + mkdir -p $(BUILD_DIR); cp vortex_afu.json $(BUILD_DIR)/vortex_afu.json gen-sources: $(BUILD_DIR)/sources.txt $(BUILD_DIR)/sources.txt: diff --git a/hw/syn/altera/quartus/common.mk b/hw/syn/altera/quartus/common.mk index 3890dcfe8..d84797d5a 100644 --- a/hw/syn/altera/quartus/common.mk +++ b/hw/syn/altera/quartus/common.mk @@ -1,8 +1,6 @@ ROOT_DIR := $(realpath ../../../../../..) include $(ROOT_DIR)/config.mk -SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/quartus - RTL_DIR := $(VORTEX_HOME)/hw/rtl AFU_DIR := $(RTL_DIR)/afu/opae SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts @@ -79,7 +77,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): gen-sources - quartus_sh -t $(SRC_DIR)/project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc $(SRC_DIR)/project.sdc -inc "src" + quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc project.sdc -inc "src" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index e1acce8d6..e5cab8a08 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -19,8 +19,6 @@ NUM_CORES ?= 1 PREFIX ?= build$(XLEN) MAX_JOBS ?= 8 -SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/xrt - RTL_DIR := $(VORTEX_HOME)/hw/rtl DPI_DIR := $(VORTEX_HOME)/hw/dpi AFU_DIR := $(RTL_DIR)/afu/xrt @@ -94,7 +92,7 @@ VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] endif VPP_FLAGS += --report_level 2 -VPP_FLAGS += --config $(SRC_DIR)/vitis.ini +VPP_FLAGS += --config vitis.ini # Enable perf counters ifdef PERF @@ -163,7 +161,7 @@ $(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml gen-xo: $(XO_CONTAINER) $(XO_CONTAINER): $(BUILD_DIR)/sources.txt - mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/scripts/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) + mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source scripts/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) gen-bin: $(XCLBIN_CONTAINER) $(XCLBIN_CONTAINER): $(XO_CONTAINER) $(SCOPE_JSON) diff --git a/hw/syn/yosys/Makefile b/hw/syn/yosys/Makefile index 80bfdae02..493c7ba6b 100644 --- a/hw/syn/yosys/Makefile +++ b/hw/syn/yosys/Makefile @@ -1,8 +1,6 @@ ROOT_DIR := $(realpath ../../..) include $(ROOT_DIR)/config.mk -SRC_DIR := $(VORTEX_HOME)/hw/syn/yosys - TOP_LEVEL_ENTITY ?= Vortex PREFIX ?= build NUM_CORES ?= 1 @@ -84,13 +82,13 @@ $(BUILD_DIR)/project.v: gen-sources cd $(BUILD_DIR); $(SCRIPT_DIR)/sv2v.sh -t$(TOP_LEVEL_ENTITY) -Isrc -oproject.v build: $(BUILD_DIR)/project.v - cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v + cd $(BUILD_DIR); synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v elaborate: $(BUILD_DIR)/project.v - cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="elaborate" + cd $(BUILD_DIR); synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="elaborate" synthesis: $(BUILD_DIR)/project.v - cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="synthesis" + cd $(BUILD_DIR); synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="synthesis" clean: $(RMDIR) $(BUILD_DIR) From 8fe02093e2b3f01e360b15506df40316cab8f0a5 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 04:11:16 -0700 Subject: [PATCH 128/488] minor udpate --- hw/scripts/gen_sources.sh | 84 +++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 38 deletions(-) diff --git a/hw/scripts/gen_sources.sh b/hw/scripts/gen_sources.sh index 0748b3632..8a12a6c56 100755 --- a/hw/scripts/gen_sources.sh +++ b/hw/scripts/gen_sources.sh @@ -1,18 +1,20 @@ #!/bin/bash # Copyright © 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + defines=() includes=() externs=() @@ -21,40 +23,47 @@ output_file="" define_header="" top_module="" copy_folder="" -prepropressor=0 +preprocessor=0 defines_str="" params_str="" includes_str="" -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +# Helper function to append options +add_option() { + if [ -n "$1" ]; then + echo "$1 $2" + else + echo "$2" + fi +} -# parse command arguments +# Parse command arguments while getopts D:G:T:I:J:O:H:C:Ph flag do case "${flag}" in D) defines+=( ${OPTARG} ) - defines_str+="-D${OPTARG} " + defines_str=$(add_option "$defines_str" "-D${OPTARG}") ;; - G) params_str+="-G${OPTARG} " + G) params_str=$(add_option "$params_str" "-G${OPTARG}") ;; - T) top_module=( ${OPTARG} ) + T) top_module="${OPTARG}" ;; I) includes+=( ${OPTARG} ) - includes_str+="-I${OPTARG} " + includes_str=$(add_option "$includes_str" "-I${OPTARG}") ;; J) externs+=( ${OPTARG} ) - includes_str+="-I${OPTARG} " + includes_str=$(add_option "$includes_str" "-I${OPTARG}") ;; - O) output_file=( ${OPTARG} ) + O) output_file="${OPTARG}" ;; - H) define_header=( ${OPTARG} ) + H) define_header="${OPTARG}" ;; - C) copy_folder=( ${OPTARG} ) + C) copy_folder="${OPTARG}" ;; - P) prepropressor=1 + P) preprocessor=1 ;; - h) echo "Usage: [-D] [-G=] [-T] [-I] [-J] [-O] [-C: copy to] [-H] [-P: macro prepropressing] [-h help]" + h) echo "Usage: [-D] [-G=] [-T] [-I] [-J] [-O] [-C: copy to] [-H] [-P: macro preprocessing] [-h help]" exit 0 ;; \?) echo "Invalid option: -$OPTARG" 1>&2 @@ -70,33 +79,32 @@ if [ "$define_header" != "" ]; then # dump defines into a header file for value in ${defines[@]}; do arrNV=(${value//=/ }) - if (( ${#arrNV[@]} > 1 )); - then + if (( ${#arrNV[@]} > 1 )); then echo "\`define ${arrNV[0]} ${arrNV[1]}" else echo "\`define $value" - fi + fi done - } > $define_header + } > "$define_header" fi if [ "$copy_folder" != "" ]; then - # copy source files - mkdir -p $copy_folder + # copy source files + mkdir -p "$copy_folder" for dir in ${includes[@]}; do find "$dir" -maxdepth 1 -type f | while read -r file; do file_ext="${file##*.}" - file_name=$(basename -- $file) - if [ $prepropressor != 0 ] && { [ "$file_ext" == "v" ] || [ "$file_ext" == "sv" ]; }; then + file_name=$(basename -- "$file") + if [ $preprocessor != 0 ] && { [ "$file_ext" == "v" ] || [ "$file_ext" == "sv" ]; }; then if [[ -n "$params_str" && $file_name == "$top_module."* ]]; then temp_file=$(mktemp) - $script_dir/repl_params.py $params_str -T$top_module $file > $temp_file - verilator $defines_str $includes_str -E -P $temp_file > $copy_folder/$file_name + "$SCRIPT_DIR/repl_params.py" "$params_str" -T"$top_module" "$file" > "$temp_file" + verilator "$defines_str" "$includes_str" -E -P "$temp_file" > "$copy_folder/$file_name" else - verilator $defines_str $includes_str -E -P $file > $copy_folder/$file_name - fi + verilator "$defines_str" "$includes_str" -E -P "$file" > "$copy_folder/$file_name" + fi else - cp $file $copy_folder + cp "$file" "$copy_folder" fi done done @@ -112,7 +120,7 @@ if [ "$output_file" != "" ]; then fi for dir in ${externs[@]}; do - echo "+incdir+$(realpath $dir)" + echo "+incdir+$(realpath "$dir")" done for dir in ${externs[@]}; do @@ -124,24 +132,24 @@ if [ "$output_file" != "" ]; then if [ "$copy_folder" != "" ]; then # dump include directories - echo "+incdir+$(realpath $copy_folder)" + echo "+incdir+$(realpath "$copy_folder")" # dump source files - find "$(realpath $copy_folder)" -maxdepth 1 -type f -name "*_pkg.sv" -print - find "$(realpath $copy_folder)" -maxdepth 1 -type f \( -name "*.v" -o -name "*.sv" \) ! -name "*_pkg.sv" -print + find "$(realpath "$copy_folder")" -maxdepth 1 -type f -name "*_pkg.sv" -print + find "$(realpath "$copy_folder")" -maxdepth 1 -type f \( -name "*.v" -o -name "*.sv" \) ! -name "*_pkg.sv" -print else # dump include directories for dir in ${includes[@]}; do - echo "+incdir+$(realpath $dir)" + echo "+incdir+$(realpath "$dir")" done - + # dump source files for dir in ${includes[@]}; do - find "$(realpath $dir)" -maxdepth 1 -type f -name "*_pkg.sv" -print + find "$(realpath "$dir")" -maxdepth 1 -type f -name "*_pkg.sv" -print done for dir in ${includes[@]}; do - find "$(realpath $dir)" -maxdepth 1 -type f \( -name "*.v" -o -name "*.sv" \) ! -name "*_pkg.sv" -print + find "$(realpath "$dir")" -maxdepth 1 -type f \( -name "*.v" -o -name "*.sv" \) ! -name "*_pkg.sv" -print done fi - } > $output_file -fi + } > "$output_file" +fi \ No newline at end of file From 1f43d4a2fce89ba012cbae8c4f8fecb2c421b021 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 04:55:32 -0700 Subject: [PATCH 129/488] ASE simulation fixes + docs update --- docs/altera_fpga_guide.md | 19 ++++++++++-- hw/syn/altera/README | 16 ++++------ .../altera/opae/{run_ase.sh => start_ase.sh} | 18 ----------- hw/syn/altera/opae/stop_ase.sh | 31 +++++++++++++++++++ 4 files changed, 53 insertions(+), 31 deletions(-) rename hw/syn/altera/opae/{run_ase.sh => start_ase.sh} (74%) create mode 100755 hw/syn/altera/opae/stop_ase.sh diff --git a/docs/altera_fpga_guide.md b/docs/altera_fpga_guide.md index 61d1ae26e..e8070beb2 100644 --- a/docs/altera_fpga_guide.md +++ b/docs/altera_fpga_guide.md @@ -34,7 +34,7 @@ The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware - `NUM_THREADS`: Number of threads per warps - `PERF_ENABLE`: enable the use of all profile counters -You configure the syntesis build from the command line: +You can configure the synthesis build from the command line: $ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make @@ -43,7 +43,7 @@ OPAE Build Progress You could check the last 10 lines in the build log for possible errors until build completion. - $ tail -n 10 /build.log + $ tail -n 10 /synth/build.log Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs. @@ -70,10 +70,23 @@ Sample FPGA Run Test Ensure you have the correct opae runtime for the FPGA target - $ make -C runtime/opae clean $ TARGET=FPGA make -C runtime/opae Run the following from your Vortex build directory $ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128" +Testing OPAE Synthesis using Intel ASE Simulation +------------------------------------------------- + +Building ASE synthesis + + $ TARGET=asesim make -C runtime/opae + +Building ASE runtime + + $ TARGET=asesim make -C runtime/opae + +Running ASE simulation + + $ ASE_LOG=0 ASE_WORKDIR=/synth/work TARGET=asesim ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n16" \ No newline at end of file diff --git a/hw/syn/altera/README b/hw/syn/altera/README index 11d048442..3f9168d5c 100644 --- a/hw/syn/altera/README +++ b/hw/syn/altera/README @@ -10,10 +10,10 @@ cd build_fpga && qsub-synth # check last 10 lines in build log for possible errors tail -n 10 ./build_arria10_fpga_1c/build.log -# Check if the job is submitted to the queue and running. Status should be R +# Check if the job is submitted to the queue and running. Status should be R qstat | grep -# Constantly monitoring the job submitted to the queue. Stop this using Ctrl+C +# Constantly monitoring the job submitted to the queue. Stop this using Ctrl+C watch ‘qstat | grep ’ # @@ -35,7 +35,7 @@ fpgaconf --bus 0xaf /synth/vortex_afu.gbs # get portid fpgainfo port -# Running the Test case +# Running the Test case cd /driver/tests/basic make run-fpga @@ -54,13 +54,9 @@ TARGET=asesim make -C runtime/opae PREFIX=build_base CONFIGS="-DEXT_F_DISABLE -DL1_DISABLE -DSM_DISABLE -DNUM_WARPS=2 -DNUM_THREADS=2" TARGET=asesim make # ASE test runs -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/regression/basic/basic -n1 -t0 -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/regression/basic/basic -n1 -t1 -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/regression/basic/basic -n16 -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/regression/demo/demo -n16 -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/regression/dogfood/dogfood -n16 -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/opencl/vecadd/vecadd -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/opencl/sgemm/sgemm -n4 +start_ase.sh +ASE_LOG=0 ASE_WORKDIR=/synth/work TARGET=asesim ./ci/blackbox.sh --driver=opae --app=vecadd +stop_ase.sh # modify "vsim_run.tcl" to dump VCD trace vcd file trace.vcd diff --git a/hw/syn/altera/opae/run_ase.sh b/hw/syn/altera/opae/start_ase.sh similarity index 74% rename from hw/syn/altera/opae/run_ase.sh rename to hw/syn/altera/opae/start_ase.sh index 04fd27540..d408b2170 100755 --- a/hw/syn/altera/opae/run_ase.sh +++ b/hw/syn/altera/opae/start_ase.sh @@ -17,12 +17,6 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" BUILD_DIR=$(realpath $1) -PROGRAM=$(basename "$2") -PROGRAM_DIR=`dirname $2` - -POCL_PATH=$TOOLDIR/pocl -VORTEX_RT_PATH=$SCRIPT_DIR/../../../../runtime - # Export ASE_WORKDIR variable export ASE_WORKDIR=$BUILD_DIR/synth/work @@ -35,7 +29,6 @@ rm -f $BUILD_DIR/synth/nohup.out pushd $BUILD_DIR/synth echo " [DBG] starting ASE simnulator (stdout saved to '$BUILD_DIR/synth/nohup.out')" setsid make sim &> /dev/null & -SIM_PID=$! popd # Wait for simulator readiness @@ -44,14 +37,3 @@ while [ ! -f $ASE_WORKDIR/.ase_ready.pid ] do sleep 1 done - -# run application -pushd $PROGRAM_DIR -shift 2 -echo " [DBG] running ./$PROGRAM $*" -ASE_LOG=0 LD_LIBRARY_PATH=$POCL_PATH/lib:$VORTEX_RT_PATH/opae:$LD_LIBRARY_PATH ./$PROGRAM $* -popd - -# stop the simulator (kill process group) -kill -- -$(ps -o pgid= $SIM_PID | grep -o '[0-9]*') -wait $SIM_PID 2> /dev/null \ No newline at end of file diff --git a/hw/syn/altera/opae/stop_ase.sh b/hw/syn/altera/opae/stop_ase.sh new file mode 100755 index 000000000..caee290db --- /dev/null +++ b/hw/syn/altera/opae/stop_ase.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +BUILD_DIR=$(realpath $1) + +# Export ASE_WORKDIR variable +export ASE_WORKDIR=$BUILD_DIR/synth/work + +# stop the simulator (kill process group) +if [ -f "$ASE_WORKDIR/.ase_ready.pid" ]; then + SIM_PID=$(grep '^pid' "$ASE_WORKDIR/.ase_ready.pid" | cut -d'=' -f2 | tr -d ' ') + echo " [DBG] stopping ASE simulator (pid=$SIM_PID)" + kill -- -$(ps -o pgid= $SIM_PID | grep -o '[0-9]*') + wait $SIM_PID 2> /dev/null +else + echo "ASE PID file does not exist." +fi \ No newline at end of file From 62a4ee7a3e75ed6ea05816df07f6673a69a77ece Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 05:32:21 -0700 Subject: [PATCH 130/488] minor update --- hw/scripts/gen_sources.sh | 6 +++--- hw/syn/yosys/Makefile | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/hw/scripts/gen_sources.sh b/hw/scripts/gen_sources.sh index 8a12a6c56..ed9143eb3 100755 --- a/hw/scripts/gen_sources.sh +++ b/hw/scripts/gen_sources.sh @@ -98,10 +98,10 @@ if [ "$copy_folder" != "" ]; then if [ $preprocessor != 0 ] && { [ "$file_ext" == "v" ] || [ "$file_ext" == "sv" ]; }; then if [[ -n "$params_str" && $file_name == "$top_module."* ]]; then temp_file=$(mktemp) - "$SCRIPT_DIR/repl_params.py" "$params_str" -T"$top_module" "$file" > "$temp_file" - verilator "$defines_str" "$includes_str" -E -P "$temp_file" > "$copy_folder/$file_name" + $script_dir/repl_params.py $params_str -T$top_module "$file" > "$temp_file" + verilator $defines_str $includes_str -E -P "$temp_file" > "$copy_folder/$file_name" else - verilator "$defines_str" "$includes_str" -E -P "$file" > "$copy_folder/$file_name" + verilator $defines_str $includes_str -E -P "$file" > "$copy_folder/$file_name" fi else cp "$file" "$copy_folder" diff --git a/hw/syn/yosys/Makefile b/hw/syn/yosys/Makefile index 493c7ba6b..80bfdae02 100644 --- a/hw/syn/yosys/Makefile +++ b/hw/syn/yosys/Makefile @@ -1,6 +1,8 @@ ROOT_DIR := $(realpath ../../..) include $(ROOT_DIR)/config.mk +SRC_DIR := $(VORTEX_HOME)/hw/syn/yosys + TOP_LEVEL_ENTITY ?= Vortex PREFIX ?= build NUM_CORES ?= 1 @@ -82,13 +84,13 @@ $(BUILD_DIR)/project.v: gen-sources cd $(BUILD_DIR); $(SCRIPT_DIR)/sv2v.sh -t$(TOP_LEVEL_ENTITY) -Isrc -oproject.v build: $(BUILD_DIR)/project.v - cd $(BUILD_DIR); synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v + cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v elaborate: $(BUILD_DIR)/project.v - cd $(BUILD_DIR); synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="elaborate" + cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="elaborate" synthesis: $(BUILD_DIR)/project.v - cd $(BUILD_DIR); synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="synthesis" + cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="synthesis" clean: $(RMDIR) $(BUILD_DIR) From 9638f5a6e63c742ef28df490b67363615d0fe60e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 06:05:26 -0700 Subject: [PATCH 131/488] minor update --- hw/syn/altera/opae/Makefile | 10 ++++++---- hw/syn/altera/quartus/common.mk | 4 +++- hw/syn/xilinx/xrt/Makefile | 6 ++++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index 4e031ea69..62a9bb72c 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -7,6 +7,8 @@ PREFIX ?= build$(XLEN) TARGET ?= fpga NUM_CORES ?= 1 +SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/opae + RTL_DIR := $(VORTEX_HOME)/hw/rtl DPI_DIR := $(VORTEX_HOME)/hw/dpi AFU_DIR := $(RTL_DIR)/afu/opae @@ -103,17 +105,17 @@ $(IP_CACHE_DIR)/ip-gen.log: $(SCRIPT_DIR)/ip_gen.sh $(IP_CACHE_DIR) swconfig: vortex_afu.h -vortex_afu.h: vortex_afu.json +vortex_afu.h: $(SRC_DIR)/vortex_afu.json afu_json_mgr json-info --afu-json=$^ --c-hdr=$@ $(BUILD_DIR)/setup.cfg: - mkdir -p $(BUILD_DIR); cp setup.cfg $(BUILD_DIR)/setup.cfg + mkdir -p $(BUILD_DIR); cp $(SRC_DIR)/setup.cfg $(BUILD_DIR)/setup.cfg $(BUILD_DIR)/vortex_afu.qsf: - mkdir -p $(BUILD_DIR); cp vortex_afu.qsf $(BUILD_DIR)/vortex_afu.qsf + mkdir -p $(BUILD_DIR); cp $(SRC_DIR)/vortex_afu.qsf $(BUILD_DIR)/vortex_afu.qsf $(BUILD_DIR)/vortex_afu.json: - mkdir -p $(BUILD_DIR); cp vortex_afu.json $(BUILD_DIR)/vortex_afu.json + mkdir -p $(BUILD_DIR); cp $(SRC_DIR)/vortex_afu.json $(BUILD_DIR)/vortex_afu.json gen-sources: $(BUILD_DIR)/sources.txt $(BUILD_DIR)/sources.txt: diff --git a/hw/syn/altera/quartus/common.mk b/hw/syn/altera/quartus/common.mk index d84797d5a..3890dcfe8 100644 --- a/hw/syn/altera/quartus/common.mk +++ b/hw/syn/altera/quartus/common.mk @@ -1,6 +1,8 @@ ROOT_DIR := $(realpath ../../../../../..) include $(ROOT_DIR)/config.mk +SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/quartus + RTL_DIR := $(VORTEX_HOME)/hw/rtl AFU_DIR := $(RTL_DIR)/afu/opae SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts @@ -77,7 +79,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): gen-sources - quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc project.sdc -inc "src" + quartus_sh -t $(SRC_DIR)/project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc $(SRC_DIR)/project.sdc -inc "src" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index e5cab8a08..e1acce8d6 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -19,6 +19,8 @@ NUM_CORES ?= 1 PREFIX ?= build$(XLEN) MAX_JOBS ?= 8 +SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/xrt + RTL_DIR := $(VORTEX_HOME)/hw/rtl DPI_DIR := $(VORTEX_HOME)/hw/dpi AFU_DIR := $(RTL_DIR)/afu/xrt @@ -92,7 +94,7 @@ VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] endif VPP_FLAGS += --report_level 2 -VPP_FLAGS += --config vitis.ini +VPP_FLAGS += --config $(SRC_DIR)/vitis.ini # Enable perf counters ifdef PERF @@ -161,7 +163,7 @@ $(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml gen-xo: $(XO_CONTAINER) $(XO_CONTAINER): $(BUILD_DIR)/sources.txt - mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source scripts/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) + mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/scripts/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) gen-bin: $(XCLBIN_CONTAINER) $(XCLBIN_CONTAINER): $(XO_CONTAINER) $(SCOPE_JSON) From a03471837cbec94917f804efdb2dec716baa593a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 15:21:13 -0700 Subject: [PATCH 132/488] minor update --- hw/rtl/VX_config.vh | 2 +- hw/rtl/core/VX_mem_unit.sv | 4 ++-- hw/rtl/mem/{VX_lmem_demux.sv => VX_lmem_switch.sv} | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) rename hw/rtl/mem/{VX_lmem_demux.sv => VX_lmem_switch.sv} (98%) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index d46c679e9..ea036959d 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -214,7 +214,7 @@ `endif `define STACK_SIZE (1 << `STACK_LOG2_SIZE) -`define RESET_DELAY 16 +`define RESET_DELAY 8 `ifndef STALL_TIMEOUT `define STALL_TIMEOUT (100000 * (1 ** (`L2_ENABLED + `L3_ENABLED))) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 8df272439..5bfbf311f 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -46,11 +46,11 @@ module VX_mem_unit import VX_gpu_pkg::*; #( ) lsu_lmem_if[`NUM_LSU_BLOCKS](); for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : demux_slices - VX_lmem_demux #( + VX_lmem_switch #( .REQ0_OUT_BUF (3), .REQ1_OUT_BUF (0), .RSP_OUT_BUF (1) - ) lmem_demux ( + ) lmem_switch ( .clk (clk), .reset (reset), .lsu_in_if (lsu_mem_in_if[i]), diff --git a/hw/rtl/mem/VX_lmem_demux.sv b/hw/rtl/mem/VX_lmem_switch.sv similarity index 98% rename from hw/rtl/mem/VX_lmem_demux.sv rename to hw/rtl/mem/VX_lmem_switch.sv index b3158ad8a..da2a190a2 100644 --- a/hw/rtl/mem/VX_lmem_demux.sv +++ b/hw/rtl/mem/VX_lmem_switch.sv @@ -13,7 +13,7 @@ `include "VX_define.vh" -module VX_lmem_demux import VX_gpu_pkg::*; #( +module VX_lmem_switch import VX_gpu_pkg::*; #( parameter REQ0_OUT_BUF = 0, parameter REQ1_OUT_BUF = 0, parameter RSP_OUT_BUF = 0 From b6663eaff907994ffb286952dd806cda2081c3d4 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 15:49:49 -0700 Subject: [PATCH 133/488] output register fix --- hw/rtl/core/VX_mem_unit.sv | 3 ++- hw/rtl/mem/VX_lmem_switch.sv | 13 +++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 5bfbf311f..841707da1 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -49,7 +49,8 @@ module VX_mem_unit import VX_gpu_pkg::*; #( VX_lmem_switch #( .REQ0_OUT_BUF (3), .REQ1_OUT_BUF (0), - .RSP_OUT_BUF (1) + .RSP_OUT_BUF (1), + .ARBITER ("R") ) lmem_switch ( .clk (clk), .reset (reset), diff --git a/hw/rtl/mem/VX_lmem_switch.sv b/hw/rtl/mem/VX_lmem_switch.sv index da2a190a2..628190a8d 100644 --- a/hw/rtl/mem/VX_lmem_switch.sv +++ b/hw/rtl/mem/VX_lmem_switch.sv @@ -16,7 +16,8 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( parameter REQ0_OUT_BUF = 0, parameter REQ1_OUT_BUF = 0, - parameter RSP_OUT_BUF = 0 + parameter RSP_OUT_BUF = 0, + parameter `STRING ARBITER = "R" ) ( input wire clk, input wire reset, @@ -43,8 +44,8 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (REQ_DATAW), - .SIZE (2), - .OUT_REG (REQ0_OUT_BUF) + .SIZE (`TO_OUT_BUF_SIZE(REQ0_OUT_BUF)), + .OUT_REG (`TO_OUT_BUF_REG(REQ0_OUT_BUF)) ) req_global_buf ( .clk (clk), .reset (reset), @@ -74,8 +75,8 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (REQ_DATAW), - .SIZE (0), - .OUT_REG (REQ1_OUT_BUF) + .SIZE (`TO_OUT_BUF_SIZE(REQ1_OUT_BUF)), + .OUT_REG (`TO_OUT_BUF_REG(REQ1_OUT_BUF)) ) req_local_buf ( .clk (clk), .reset (reset), @@ -106,7 +107,7 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (2), .DATAW (RSP_DATAW), - .ARBITER ("R"), + .ARBITER (ARBITER), .OUT_BUF (RSP_OUT_BUF) ) rsp_arb ( .clk (clk), From 9d3d35c6b4541fa550e68d00f29526f93e3fd56e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 16:03:02 -0700 Subject: [PATCH 134/488] operands timing optimization --- hw/rtl/core/VX_operands.sv | 52 ++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 62e2bb883..1b9c6f010 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -59,17 +59,18 @@ module VX_operands import VX_gpu_pkg::*; #( wire [NUM_SRC_OPDS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx; wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready; - wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2; + wire [NUM_BANKS-1:0] gpr_rd_valid_st1; wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr, gpr_rd_addr_st1; - wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st1, gpr_rd_data_st2; - wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2; + wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data; + wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1; wire pipe_valid_st1, pipe_ready_st1; wire pipe_valid_st2, pipe_ready_st2; wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2; - reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_n; - wire [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2; + reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st1; + wire [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st2; + wire [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2, src_data_m_st2; reg [NUM_SRC_OPDS-1:0] data_fetched_n; wire [NUM_SRC_OPDS-1:0] data_fetched_st1; @@ -176,32 +177,34 @@ module VX_operands import VX_gpu_pkg::*; #( assign pipe_ready_st1 = pipe_ready_st2 || ~pipe_valid_st2; - assign src_data_st1 = pipe_fire_st2 ? '0 : src_data_n; + always @(*) begin + gpr_rd_data_st1 = '0; + for (integer b = 0; b < NUM_BANKS; ++b) begin + if (gpr_rd_valid_st1[b]) begin + gpr_rd_data_st1[gpr_rd_req_idx_st1[b]] = gpr_rd_data[b]; + end + end + end + + assign src_data_m_st2 = src_data_st2 | gpr_rd_data_st2; + + assign src_data_st1 = pipe_fire_st2 ? '0 : src_data_m_st2; wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1; `RESET_RELAY (pipe2_reset, reset); // needed for pipe_reg2's wide RESETW VX_pipe_register #( - .DATAW (1 + NUM_SRC_OPDS * REGS_DATAW + NUM_BANKS + NUM_BANKS * REGS_DATAW + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH), + .DATAW (1 + NUM_SRC_OPDS * REGS_DATAW + NUM_SRC_OPDS * REGS_DATAW + META_DATAW), .RESETW (1 + NUM_SRC_OPDS * REGS_DATAW) ) pipe_reg2 ( .clk (clk), .reset (pipe2_reset), .enable (pipe_ready_st1), - .data_in ({pipe_valid2_st1, src_data_st1, gpr_rd_valid_st1, gpr_rd_data_st1, pipe_data_st1, gpr_rd_req_idx_st1}), - .data_out ({pipe_valid_st2, src_data_st2, gpr_rd_valid_st2, gpr_rd_data_st2, pipe_data_st2, gpr_rd_req_idx_st2}) + .data_in ({pipe_valid2_st1, src_data_st1, gpr_rd_data_st1, pipe_data_st1}), + .data_out ({pipe_valid_st2, src_data_st2, gpr_rd_data_st2, pipe_data_st2}) ); - always @(*) begin - src_data_n = src_data_st2; - for (integer b = 0; b < NUM_BANKS; ++b) begin - if (gpr_rd_valid_st2[b]) begin - src_data_n[gpr_rd_req_idx_st2[b]] = gpr_rd_data_st2[b]; - end - end - end - VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -211,12 +214,7 @@ module VX_operands import VX_gpu_pkg::*; #( .reset (reset), .valid_in (pipe_valid_st2), .ready_in (pipe_ready_st2), - .data_in ({ - pipe_data_st2, - src_data_n[0], - src_data_n[1], - src_data_n[2] - }), + .data_in ({pipe_data_st2, src_data_m_st2}), .data_out ({ operands_if.data.wis, operands_if.data.tmask, @@ -227,9 +225,9 @@ module VX_operands import VX_gpu_pkg::*; #( operands_if.data.op_args, operands_if.data.rd, operands_if.data.uuid, - operands_if.data.rs1_data, + operands_if.data.rs3_data, operands_if.data.rs2_data, - operands_if.data.rs3_data + operands_if.data.rs1_data }), .valid_out (operands_if.valid), .ready_out (operands_if.ready) @@ -280,7 +278,7 @@ module VX_operands import VX_gpu_pkg::*; #( .waddr (gpr_wr_addr), .wdata (writeback_if.data.data), .raddr (gpr_rd_addr_st1[b]), - .rdata (gpr_rd_data_st1[b]) + .rdata (gpr_rd_data[b]) ); end From 51862dbc06ddd8f3a6f24a611f1420c234d09eea Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 19:05:47 -0700 Subject: [PATCH 135/488] doc update --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7cafd498d..22635bab3 100644 --- a/README.md +++ b/README.md @@ -88,19 +88,19 @@ make -s make -s make install ``` -- Building Vortex 64-bit simply requires using --xlen=64 configure option. +- Building Vortex 64-bit requires setting --xlen=64 configure option. ```sh -../configure --xlen=32 --tooldir=$HOME/tools +../configure --xlen=64 --tooldir=$HOME/tools ``` - Sourcing "./ci/toolchain_env.sh" is required everytime you start a new terminal. we recommend adding "source /ci/toolchain_env.sh" to your ~/.bashrc file to automate the process at login. ```sh echo "source /ci/toolchain_env.sh" >> ~/.bashrc ``` -- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again to get it propagated into your build folder. +- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again without any options to get changes propagated to your build folder. ```sh ../configure ``` -- To debug the GPU, you can generate a "run.log" trace. see /docs/debugging.md for more information. +- To debug the GPU, the simulation can generate a runtime trace for analysis. See /docs/debugging.md for more information. ```sh ./ci/blackbox.sh --app=demo --debug=3 ``` From adcad92a7332260f88a178e9a1504bcb6c39c770 Mon Sep 17 00:00:00 2001 From: tinebp Date: Sat, 17 Aug 2024 19:09:02 -0700 Subject: [PATCH 136/488] extending OS support --- README.md | 2 +- ci/toolchain_install.sh.in | 4 ++-- configure | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7cafd498d..40446187c 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ Vortex is a full-stack open-source RISC-V GPGPU. ## Build Instructions More detailed build instructions can be found [here](docs/install_vortex.md). ### Supported OS Platforms -- Ubuntu 18.04, 20.04 +- Ubuntu 18.04, 20.04, 22.04, 24.04 - Centos 7 ### Toolchain Dependencies - [POCL](http://portablecl.org/) diff --git a/ci/toolchain_install.sh.in b/ci/toolchain_install.sh.in index 73e27eb55..01ebe889b 100755 --- a/ci/toolchain_install.sh.in +++ b/ci/toolchain_install.sh.in @@ -24,8 +24,8 @@ riscv32() { case $OSVERSION in "centos/7") parts=$(eval echo {a..l}) ;; - "ubuntu/focal") parts=$(eval echo {a..k}) ;; - *) parts=$(eval echo {a..j}) ;; + "ubuntu/bionic") parts=$(eval echo {a..j}) ;; + *) parts=$(eval echo {a..k}) ;; esac rm -f riscv32-gnu-toolchain.tar.bz2.parta* for x in $parts diff --git a/configure b/configure index 37e95a2bd..de04b648b 100755 --- a/configure +++ b/configure @@ -26,6 +26,8 @@ detect_osversion() { case "$VERSION_CODENAME" in bionic) osversion="ubuntu/bionic";; focal) osversion="ubuntu/focal";; + jammy) osversion="ubuntu/focal";; + noble) osversion="ubuntu/focal";; # Add new versions as needed esac ;; From 06ef53025dd6bb8e62da8f17df7eb8d56316c979 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 17 Aug 2024 21:19:10 -0700 Subject: [PATCH 137/488] minor update --- docs/altera_fpga_guide.md | 4 ++-- docs/xilinx_fpga_guide.md | 18 +++++++++++++++++- hw/rtl/libs/VX_mem_scheduler.sv | 6 ++---- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/docs/altera_fpga_guide.md b/docs/altera_fpga_guide.md index e8070beb2..ba95d942a 100644 --- a/docs/altera_fpga_guide.md +++ b/docs/altera_fpga_guide.md @@ -76,8 +76,8 @@ Run the following from your Vortex build directory $ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128" -Testing OPAE Synthesis using Intel ASE Simulation -------------------------------------------------- +Testing Vortex using OPAE with Intel ASE Simulation +--------------------------------------------------- Building ASE synthesis diff --git a/docs/xilinx_fpga_guide.md b/docs/xilinx_fpga_guide.md index f2960deb6..959ca6773 100644 --- a/docs/xilinx_fpga_guide.md +++ b/docs/xilinx_fpga_guide.md @@ -33,4 +33,20 @@ Ensure you have the correct opae runtime for the FPGA target Run the following from your Vortex build directory - $ TARGET=hw FPGA_BIN_DIR=/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128" \ No newline at end of file + $ TARGET=hw FPGA_BIN_DIR=/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128" + +Testing Vortex using XRT Hardware Emulation +------------------------------------------- + +Building XRT's hw_emu target + + $ cd hw/syn/xilinx/xrt + $ PREFIX=test2 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw_emu make + +Building XRT hw_meu runtime + + $ TARGET=hw_emu make -C runtime/xrt + +Running XRT hw_emu simulation + + $ TARGET=hw_emu FPGA_BIN_DIR=/bin ./ci/blackbox.sh --driver=xrt --app=sgemm \ No newline at end of file diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index f173d7d0a..5324d7ffa 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -223,8 +223,6 @@ module VX_mem_scheduler #( if (COALESCE_ENABLE) begin - `RESET_RELAY (coalescer_reset, reset); - VX_mem_coalescer #( .INSTANCE_ID ($sformatf("%s-coalescer", INSTANCE_ID)), .NUM_REQS (CORE_REQS), @@ -236,8 +234,8 @@ module VX_mem_scheduler #( .UUID_WIDTH (UUID_WIDTH), .QUEUE_SIZE (MEM_QUEUE_SIZE) ) coalescer ( - .clk (clk), - .reset (coalescer_reset), + .clk (clk), + .reset (reset), // Input request .in_req_valid (reqq_valid), From de47307428f1e1c5c195bed067788687ec31f07b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 18 Aug 2024 01:57:36 -0700 Subject: [PATCH 138/488] minor update --- hw/rtl/libs/VX_rr_arbiter.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 8c0fa0558..bbfd8269d 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -16,7 +16,7 @@ `TRACING_OFF module VX_rr_arbiter #( parameter NUM_REQS = 1, - parameter MODEL = 2, + parameter MODEL = 1, parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS), parameter LUT_OPT = 0 ) ( From a2b24b4ed0d8bf3c35eb7f557be1544cf0b28bf3 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 18 Aug 2024 02:10:34 -0700 Subject: [PATCH 139/488] xilinx non-xrt synthesis fixes --- hw/syn/xilinx/test/Makefile | 10 ++----- hw/syn/xilinx/test/project.tcl.in | 43 +++++++++++++++---------------- 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/hw/syn/xilinx/test/Makefile b/hw/syn/xilinx/test/Makefile index e15789516..bf950b4ed 100644 --- a/hw/syn/xilinx/test/Makefile +++ b/hw/syn/xilinx/test/Makefile @@ -28,10 +28,7 @@ CFLAGS += -DEXT_F_DISABLE # update memory layout for 2MB RAM CFLAGS += -DSTARTUP_ADDR=32\'h80000 -CFLAGS += -DIO_BASE_ADDR=32\'hFF000 - -COE_FILE := $(SRC_DIR)/project_1_files/kernel.bin.coe -ESCAPED_COE_FILE := $(shell echo "$(COE_FILE)" | sed -e 's/[\/&]/\\&/g') +CFLAGS += -DSTACK_BASE_ADDR=32\'hFF000 all: build @@ -40,9 +37,6 @@ project_1/sources.txt: mkdir -p project_1 $(SCRIPT_DIR)/gen_sources.sh $(CFLAGS) -P -Cproject_1/src -Oproject_1/sources.txt -project.tcl: project.tcl.in - sed -e 's/%COE_FILE%/$(ESCAPED_COE_FILE)/g' < $< > $@ - build: project_1/vortex.xpr project_1/vortex.xpr: project_1/sources.txt project.tcl $(VIVADO) -mode batch -source project.tcl -tclargs project_1/sources.txt project_1/src $(SCRIPT_DIR) @@ -51,4 +45,4 @@ run: project_1/vortex.xpr $(VIVADO) project_1/vortex.xpr & clean: - rm -rf project_1 project.tcl + rm -rf project_1 diff --git a/hw/syn/xilinx/test/project.tcl.in b/hw/syn/xilinx/test/project.tcl.in index a2692f637..61ee63464 100644 --- a/hw/syn/xilinx/test/project.tcl.in +++ b/hw/syn/xilinx/test/project.tcl.in @@ -46,7 +46,6 @@ set proj_dir [get_property directory [current_project]] # Set project properties set obj [current_project] -set_property -name "board_part" -value "xilinx.com:au280:part0:1.1" -objects $obj set_property -name "compxlib.activehdl_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/activehdl" -objects $obj set_property -name "compxlib.funcsim" -value "1" -objects $obj set_property -name "compxlib.ies_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/ies" -objects $obj @@ -260,7 +259,7 @@ set_property -name "name" -value "utils_1" -objects $obj # Proc to create BD design_1 proc cr_bd_design_1 { parentCell } { -# The design that will be created by this Tcl proc contains the following +# The design that will be created by this Tcl proc contains the following # module references: # Vortex_top @@ -277,7 +276,7 @@ set bCheckIPsPassed 1 ################################################################## set bCheckIPs 1 if { $bCheckIPs == 1 } { - set list_check_ips "\ + set list_check_ips "\ xilinx.com:ip:axi_bram_ctrl:4.1\ xilinx.com:ip:blk_mem_gen:8.4\ " @@ -304,7 +303,7 @@ if { $bCheckIPs == 1 } { ################################################################## set bCheckModules 1 if { $bCheckModules == 1 } { - set list_check_mods "\ + set list_check_mods "\ Vortex_top\ " @@ -369,7 +368,7 @@ set vx_reset [ create_bd_port -dir I -type rst vx_reset ] set_property -dict [ list \ CONFIG.POLARITY {ACTIVE_HIGH} \ ] $vx_reset - + set dcr_wr_valid [ create_bd_port -dir I dcr_wr_valid ] set dcr_wr_addr [ create_bd_port -dir I -from 11 -to 0 dcr_wr_addr ] set dcr_wr_data [ create_bd_port -dir I -from 31 -to 0 dcr_wr_data ] @@ -384,7 +383,7 @@ if { [catch {set Vortex_top_0 [create_bd_cell -type module -reference $block_nam catch {common::send_gid_msg -ssname BD::TCL -id 2096 -severity "ERROR" "Unable to referenced block <$block_name>. Please add the files for ${block_name}'s definition into the project."} return 1 } - + # Create instance: axi_bram_ctrl_0, and set properties set axi_bram_ctrl_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_bram_ctrl:4.1 axi_bram_ctrl_0 ] set_property -dict [ list \ @@ -399,7 +398,7 @@ set_property -dict [ list \ CONFIG.Assume_Synchronous_Clk {true} \ CONFIG.Byte_Size {8} \ CONFIG.Load_Init_File {true} \ - CONFIG.Coe_File {%COE_FILE%} \ + CONFIG.Coe_File {@VORTEX_HOME@/hw/syn/xilinx/test/project_1_files/kernel.bin.coe} \ CONFIG.EN_SAFETY_CKT {true} \ CONFIG.Enable_32bit_Address {true} \ CONFIG.Fill_Remaining_Memory_Locations {false} \ @@ -475,24 +474,24 @@ pagesize -pg 1 -db -bbox -sgen -180 0 1060 240 validate_bd_design save_bd_design - close_bd_design $design_name + close_bd_design $design_name } # End of cr_bd_design_1() cr_bd_design_1 "" -set_property EXCLUDE_DEBUG_LOGIC "0" [get_files design_1.bd ] -set_property GENERATE_SYNTH_CHECKPOINT "1" [get_files design_1.bd ] -set_property IS_ENABLED "1" [get_files design_1.bd ] -set_property IS_GLOBAL_INCLUDE "0" [get_files design_1.bd ] -#set_property IS_LOCKED "0" [get_files design_1.bd ] -set_property LIBRARY "xil_defaultlib" [get_files design_1.bd ] -set_property PATH_MODE "RelativeFirst" [get_files design_1.bd ] -set_property PFM_NAME "" [get_files design_1.bd ] -set_property REGISTERED_WITH_MANAGER "1" [get_files design_1.bd ] -set_property SYNTH_CHECKPOINT_MODE "Hierarchical" [get_files design_1.bd ] -set_property USED_IN "synthesis implementation simulation" [get_files design_1.bd ] -set_property USED_IN_IMPLEMENTATION "1" [get_files design_1.bd ] -set_property USED_IN_SIMULATION "1" [get_files design_1.bd ] -set_property USED_IN_SYNTHESIS "1" [get_files design_1.bd ] +set_property EXCLUDE_DEBUG_LOGIC "0" [get_files design_1.bd ] +set_property GENERATE_SYNTH_CHECKPOINT "1" [get_files design_1.bd ] +set_property IS_ENABLED "1" [get_files design_1.bd ] +set_property IS_GLOBAL_INCLUDE "0" [get_files design_1.bd ] +#set_property IS_LOCKED "0" [get_files design_1.bd ] +set_property LIBRARY "xil_defaultlib" [get_files design_1.bd ] +set_property PATH_MODE "RelativeFirst" [get_files design_1.bd ] +set_property PFM_NAME "" [get_files design_1.bd ] +set_property REGISTERED_WITH_MANAGER "1" [get_files design_1.bd ] +set_property SYNTH_CHECKPOINT_MODE "Hierarchical" [get_files design_1.bd ] +set_property USED_IN "synthesis implementation simulation" [get_files design_1.bd ] +set_property USED_IN_IMPLEMENTATION "1" [get_files design_1.bd ] +set_property USED_IN_SIMULATION "1" [get_files design_1.bd ] +set_property USED_IN_SYNTHESIS "1" [get_files design_1.bd ] #call make_wrapper to create wrapper files set wrapper_path [make_wrapper -fileset sources_1 -files [ get_files -norecurse design_1.bd] -top] From 3612ceda805fe7381574fd33cc539f3b0b608719 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 18 Aug 2024 02:13:43 -0700 Subject: [PATCH 140/488] minor update --- hw/rtl/libs/VX_cyclic_arbiter.sv | 15 ++++++--------- hw/rtl/libs/VX_priority_encoder.sv | 2 +- hw/rtl/libs/VX_scan.sv | 26 +++++++++++++------------- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index d721e5130..dc4de1300 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -40,17 +40,17 @@ module VX_cyclic_arbiter #( localparam IS_POW2 = (1 << LOG_NUM_REQS) == NUM_REQS; - wire [LOG_NUM_REQS-1:0] grant_index_um, grant_index_ql; + wire [LOG_NUM_REQS-1:0] grant_index_um; reg [LOG_NUM_REQS-1:0] grant_index_r; always @(posedge clk) begin if (reset) begin grant_index_r <= '0; end else if (grant_valid && grant_ready) begin - if (!IS_POW2 && grant_index_ql == LOG_NUM_REQS'(NUM_REQS-1)) begin + if (!IS_POW2 && grant_index == LOG_NUM_REQS'(NUM_REQS-1)) begin grant_index_r <= '0; end else begin - grant_index_r <= grant_index_ql + LOG_NUM_REQS'(1); + grant_index_r <= grant_index + LOG_NUM_REQS'(1); end end end @@ -61,14 +61,11 @@ module VX_cyclic_arbiter #( .data_in (requests), `UNUSED_PIN (onehot_out), .index_out (grant_index_um), - `UNUSED_PIN (valid_out) + .valid_out (grant_valid) ); - assign grant_index_ql = requests[grant_index_r] ? grant_index_r : grant_index_um; - - assign grant_index = grant_index_ql; - assign grant_onehot = NUM_REQS'(1) << grant_index_ql; - assign grant_valid = (| requests); + assign grant_index = requests[grant_index_r] ? grant_index_r : grant_index_um; + assign grant_onehot = NUM_REQS'(1) << grant_index; end diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index 27465b414..1d34f0e51 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -53,7 +53,7 @@ module VX_priority_encoder #( VX_scan #( .N (N), - .OP (2) + .OP ("|") ) scan ( .data_in (reversed), .data_out (scan_lo) diff --git a/hw/rtl/libs/VX_scan.sv b/hw/rtl/libs/VX_scan.sv index f263dd218..48de2964a 100644 --- a/hw/rtl/libs/VX_scan.sv +++ b/hw/rtl/libs/VX_scan.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,8 +19,8 @@ `TRACING_OFF module VX_scan #( parameter N = 1, - parameter OP = 0, // 0: XOR, 1: AND, 2: OR - parameter REVERSE = 0 // 0: LO->HI, 1: HI->LO + parameter `STRING OP = "^", // ^: XOR, &: AND, |: OR + parameter REVERSE = 0 // 0: LO->HI, 1: HI->LO ) ( input wire [N-1:0] data_in, output wire [N-1:0] data_out @@ -28,7 +28,7 @@ module VX_scan #( localparam LOGN = `CLOG2(N); `IGNORE_UNOPTFLAT_BEGIN - wire [LOGN:0][N-1:0] t; + wire [LOGN:0][N-1:0] t; `IGNORE_UNOPTFLAT_END // reverses bits @@ -39,29 +39,29 @@ module VX_scan #( end // optimize for the common case of small and-scans - if ((N == 2) && (OP == 1)) begin + if ((N == 2) && (OP == "&")) begin assign t[LOGN] = {t[0][1], &t[0][1:0]}; - end else if ((N == 3) && (OP == 1)) begin + end else if ((N == 3) && (OP == "&")) begin assign t[LOGN] = {t[0][2], &t[0][2:1], &t[0][2:0]}; - end else if ((N == 4) && (OP == 1)) begin + end else if ((N == 4) && (OP == "&")) begin assign t[LOGN] = {t[0][3], &t[0][3:2], &t[0][3:1], &t[0][3:0]}; end else begin // general case wire [N-1:0] fill; for (genvar i = 0; i < LOGN; ++i) begin wire [N-1:0] shifted = N'({fill, t[i]} >> (1< Date: Sun, 18 Aug 2024 16:03:59 -0700 Subject: [PATCH 141/488] synthesis of the memory unit and local memory --- hw/rtl/core/VX_mem_unit_top.sv | 123 ++++++++++++++++++++++++ hw/rtl/mem/VX_local_mem_top.sv | 13 ++- hw/syn/altera/quartus/Makefile | 7 +- hw/syn/altera/quartus/mem_unit/Makefile | 7 ++ hw/unittest/Makefile | 8 +- hw/unittest/local_mem_top/Makefile | 26 +++++ hw/unittest/local_mem_top/main.cpp | 49 ++++++++++ hw/unittest/mem_unit_top/Makefile | 26 +++++ hw/unittest/mem_unit_top/main.cpp | 49 ++++++++++ 9 files changed, 302 insertions(+), 6 deletions(-) create mode 100644 hw/rtl/core/VX_mem_unit_top.sv create mode 100755 hw/syn/altera/quartus/mem_unit/Makefile create mode 100644 hw/unittest/local_mem_top/Makefile create mode 100644 hw/unittest/local_mem_top/main.cpp create mode 100644 hw/unittest/mem_unit_top/Makefile create mode 100644 hw/unittest/mem_unit_top/main.cpp diff --git a/hw/rtl/core/VX_mem_unit_top.sv b/hw/rtl/core/VX_mem_unit_top.sv new file mode 100644 index 000000000..6c7e2ff66 --- /dev/null +++ b/hw/rtl/core/VX_mem_unit_top.sv @@ -0,0 +1,123 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_mem_unit_top import VX_gpu_pkg::*; #( + parameter `STRING INSTANCE_ID = "", + parameter LSU_WORD_WIDTH = LSU_WORD_SIZE * 8 +) ( + // Clock + input wire clk, + input wire reset, + + // LSU memory request + input wire [`NUM_LSU_BLOCKS-1:0] lsu_req_valid, + input wire [`NUM_LSU_BLOCKS-1:0] lsu_req_rw, + input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_SIZE-1:0] lsu_req_byteen, + input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_ADDR_WIDTH-1:0] lsu_req_addr, + input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] lsu_req_flags, + input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_WIDTH-1:0] lsu_req_data, + input wire [`NUM_LSU_BLOCKS-1:0][LSU_TAG_WIDTH-1:0] lsu_req_tag, + output wire [`NUM_LSU_BLOCKS-1:0] lsu_req_ready, + + // LSU memory response + output wire [`NUM_LSU_BLOCKS-1:0] lsu_rsp_valid, + output wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_WIDTH-1:0] lsu_rsp_data, + output wire [`NUM_LSU_BLOCKS-1:0][LSU_TAG_WIDTH-1:0] lsu_rsp_tag, + input wire [`NUM_LSU_BLOCKS-1:0] lsu_rsp_ready, + + // Memory request + output wire [DCACHE_NUM_REQS-1:0] mem_req_valid, + output wire [DCACHE_NUM_REQS-1:0] mem_req_rw, + output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] mem_req_byteen, + output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] mem_req_addr, + output wire [DCACHE_NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_flags, + output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] mem_req_data, + output wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] mem_req_tag, + input wire [DCACHE_NUM_REQS-1:0] mem_req_ready, + + // Memory response + input wire [DCACHE_NUM_REQS-1:0] mem_rsp_valid, + input wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] mem_rsp_data, + input wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] mem_rsp_tag, + output wire [DCACHE_NUM_REQS-1:0] mem_rsp_ready +); + VX_lsu_mem_if #( + .NUM_LANES (`NUM_LSU_LANES), + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH) + ) lsu_mem_if[`NUM_LSU_BLOCKS](); + + // LSU memory request + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + assign lsu_mem_if[i].req_valid = lsu_req_valid[i]; + assign lsu_mem_if[i].req_data.rw = lsu_req_rw[i]; + assign lsu_mem_if[i].req_data.byteen = lsu_req_byteen[i]; + assign lsu_mem_if[i].req_data.addr = lsu_req_addr[i]; + assign lsu_mem_if[i].req_data.flags = lsu_req_flags[i]; + assign lsu_mem_if[i].req_data.data = lsu_req_data[i]; + assign lsu_mem_if[i].req_data.tag = lsu_req_tag[i]; + assign lsu_req_ready[i] = lsu_mem_if[i].req_ready; + end + + // LSU memory response + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + assign lsu_rsp_valid[i] = lsu_mem_if[i].rsp_valid; + assign lsu_rsp_data[i] = lsu_mem_if[i].rsp_data.data; + assign lsu_rsp_tag[i] = lsu_mem_if[i].rsp_data.tag; + assign lsu_mem_if[i].rsp_ready = lsu_rsp_ready[i]; + end + + VX_mem_bus_if #( + .DATA_SIZE (DCACHE_WORD_SIZE), + .TAG_WIDTH (DCACHE_TAG_WIDTH) + ) mem_bus_if[DCACHE_NUM_REQS](); + + // memory request + for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin + assign mem_req_valid[i] = mem_bus_if[i].req_valid; + assign mem_req_rw[i] = mem_bus_if[i].req_data.rw; + assign mem_req_byteen[i] = mem_bus_if[i].req_data.byteen; + assign mem_req_addr[i] = mem_bus_if[i].req_data.addr; + assign mem_req_flags[i] = mem_bus_if[i].req_data.flags; + assign mem_req_data[i] = mem_bus_if[i].req_data.data; + assign mem_req_tag[i] = mem_bus_if[i].req_data.tag; + assign mem_bus_if[i].req_ready = mem_req_ready[i]; + end + + // memory response + for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin + assign mem_bus_if[i].rsp_valid = mem_rsp_valid[i]; + assign mem_bus_if[i].rsp_data.tag = mem_rsp_tag[i]; + assign mem_bus_if[i].rsp_data.data = mem_rsp_data[i]; + assign mem_rsp_ready[i] = mem_bus_if[i].rsp_ready; + end + +`ifdef PERF_ENABLE + cache_perf_t lmem_perf = '0; +`endif + + VX_mem_unit #( + .INSTANCE_ID (INSTANCE_ID) + ) mem_unit ( + .clk (clk), + .reset (reset), + `ifdef PERF_ENABLE + .lmem_perf (lmem_perf), + `endif + .lsu_mem_in_if (lsu_mem_if), + .dcache_bus_if (mem_bus_if) + ); + +endmodule diff --git a/hw/rtl/mem/VX_local_mem_top.sv b/hw/rtl/mem/VX_local_mem_top.sv index 5f9b17da0..fda15cde2 100644 --- a/hw/rtl/mem/VX_local_mem_top.sv +++ b/hw/rtl/mem/VX_local_mem_top.sv @@ -24,8 +24,6 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( // Number of banks parameter NUM_BANKS = 4, - // Address width - parameter ADDR_WIDTH = `CLOG2(SIZE), // Size of a word in bytes parameter WORD_SIZE = `XLEN/8, @@ -33,7 +31,13 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( parameter UUID_WIDTH = 0, // Request tag size - parameter TAG_WIDTH = 16 + parameter TAG_WIDTH = 16, + + // Address width + parameter NUM_WORDS = SIZE / WORD_SIZE, + parameter WORDS_PER_BANK = NUM_WORDS / NUM_BANKS, + parameter BANK_ADDR_WIDTH = `CLOG2(WORDS_PER_BANK), + parameter ADDR_WIDTH = BANK_ADDR_WIDTH + `CLOG2(NUM_BANKS) ) ( input wire clk, input wire reset, @@ -56,7 +60,8 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( ); VX_mem_bus_if #( .DATA_SIZE (WORD_SIZE), - .TAG_WIDTH (TAG_WIDTH) + .TAG_WIDTH (TAG_WIDTH), + .ADDR_WIDTH(ADDR_WIDTH) ) mem_bus_if[NUM_REQS](); // memory request diff --git a/hw/syn/altera/quartus/Makefile b/hw/syn/altera/quartus/Makefile index d0a2999bd..f8993bf87 100644 --- a/hw/syn/altera/quartus/Makefile +++ b/hw/syn/altera/quartus/Makefile @@ -9,7 +9,7 @@ SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY) -.PHONY: dogfood unittest pipeline lmem cache fpu core issue vortex top test +.PHONY: dogfood unittest pipeline mem_unit lmem cache fpu core issue vortex top test ip-gen: $(IP_CACHE_DIR)/ip_gen.log $(IP_CACHE_DIR)/ip_gen.log: @@ -30,6 +30,11 @@ pipeline: cp pipeline/Makefile pipeline/$(BUILD_DIR) $(MAKE) -C pipeline/$(BUILD_DIR) clean && $(MAKE) -C pipeline/$(BUILD_DIR) > pipeline/$(BUILD_DIR)/build.log 2>&1 & +mem_unit: + mkdir -p mem_unit/$(BUILD_DIR) + cp mem_unit/Makefile mem_unit/$(BUILD_DIR) + $(MAKE) -C mem_unit/$(BUILD_DIR) clean && $(MAKE) -C mem_unit/$(BUILD_DIR) > mem_unit/$(BUILD_DIR)/build.log 2>&1 & + lmem: mkdir -p lmem/$(BUILD_DIR) cp lmem/Makefile lmem/$(BUILD_DIR) diff --git a/hw/syn/altera/quartus/mem_unit/Makefile b/hw/syn/altera/quartus/mem_unit/Makefile new file mode 100755 index 000000000..585e5fc34 --- /dev/null +++ b/hw/syn/altera/quartus/mem_unit/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_mem_init_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core diff --git a/hw/unittest/Makefile b/hw/unittest/Makefile index 5722ec9bc..f37d6ae1b 100644 --- a/hw/unittest/Makefile +++ b/hw/unittest/Makefile @@ -5,6 +5,8 @@ all: $(MAKE) -C cache_top $(MAKE) -C core_top $(MAKE) -C issue_top + $(MAKE) -C local_mem_top + $(MAKE) -C mem_unit_top run: $(MAKE) -C cache run @@ -13,6 +15,8 @@ run: $(MAKE) -C cache_top run $(MAKE) -C core_top run $(MAKE) -C issue_top run + $(MAKE) -C local_mem_top run + $(MAKE) -C mem_unit_top run clean: $(MAKE) -C cache clean @@ -20,4 +24,6 @@ clean: $(MAKE) -C mem_streamer clean $(MAKE) -C cache_top clean $(MAKE) -C core_top clean - $(MAKE) -C issue_top clean \ No newline at end of file + $(MAKE) -C issue_top clean + $(MAKE) -C local_mem_top clean + $(MAKE) -C mem_unit_top clean \ No newline at end of file diff --git a/hw/unittest/local_mem_top/Makefile b/hw/unittest/local_mem_top/Makefile new file mode 100644 index 000000000..22a8adfae --- /dev/null +++ b/hw/unittest/local_mem_top/Makefile @@ -0,0 +1,26 @@ +ROOT_DIR := $(realpath ../../..) +include $(ROOT_DIR)/config.mk + +PROJECT := local_mem_top + +RTL_DIR := $(VORTEX_HOME)/hw/rtl +DPI_DIR := $(VORTEX_HOME)/hw/dpi + +SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT) + +CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common +CXXFLAGS += -I$(ROOT_DIR)/hw + +SRCS := $(DPI_DIR)/util_dpi.cpp +SRCS += $(SRC_DIR)/main.cpp + +DBG_TRACE_FLAGS := + +RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv + +RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs +RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem + +TOP := VX_local_mem_top + +include ../common.mk \ No newline at end of file diff --git a/hw/unittest/local_mem_top/main.cpp b/hw/unittest/local_mem_top/main.cpp new file mode 100644 index 000000000..5191b4433 --- /dev/null +++ b/hw/unittest/local_mem_top/main.cpp @@ -0,0 +1,49 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "vl_simulator.h" + +#ifndef TRACE_START_TIME +#define TRACE_START_TIME 0ull +#endif + +#ifndef TRACE_STOP_TIME +#define TRACE_STOP_TIME -1ull +#endif + +static uint64_t timestamp = 0; +static bool trace_enabled = false; +static uint64_t trace_start_time = TRACE_START_TIME; +static uint64_t trace_stop_time = TRACE_STOP_TIME; + +double sc_time_stamp() { + return timestamp; +} + +bool sim_trace_enabled() { + if (timestamp >= trace_start_time + && timestamp < trace_stop_time) + return true; + return trace_enabled; +} + +void sim_trace_enable(bool enable) { + trace_enabled = enable; +} + +int main(int argc, char **argv) { + // Initialize Verilators variables + Verilated::commandArgs(argc, argv); + + return 0; +} \ No newline at end of file diff --git a/hw/unittest/mem_unit_top/Makefile b/hw/unittest/mem_unit_top/Makefile new file mode 100644 index 000000000..a44befbce --- /dev/null +++ b/hw/unittest/mem_unit_top/Makefile @@ -0,0 +1,26 @@ +ROOT_DIR := $(realpath ../../..) +include $(ROOT_DIR)/config.mk + +PROJECT := mem_unit_top + +RTL_DIR := $(VORTEX_HOME)/hw/rtl +DPI_DIR := $(VORTEX_HOME)/hw/dpi + +SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT) + +CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common +CXXFLAGS += -I$(ROOT_DIR)/hw + +SRCS := $(DPI_DIR)/util_dpi.cpp +SRCS += $(SRC_DIR)/main.cpp + +DBG_TRACE_FLAGS := + +RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv + +RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs +RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core + +TOP := VX_mem_unit_top + +include ../common.mk \ No newline at end of file diff --git a/hw/unittest/mem_unit_top/main.cpp b/hw/unittest/mem_unit_top/main.cpp new file mode 100644 index 000000000..5191b4433 --- /dev/null +++ b/hw/unittest/mem_unit_top/main.cpp @@ -0,0 +1,49 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "vl_simulator.h" + +#ifndef TRACE_START_TIME +#define TRACE_START_TIME 0ull +#endif + +#ifndef TRACE_STOP_TIME +#define TRACE_STOP_TIME -1ull +#endif + +static uint64_t timestamp = 0; +static bool trace_enabled = false; +static uint64_t trace_start_time = TRACE_START_TIME; +static uint64_t trace_stop_time = TRACE_STOP_TIME; + +double sc_time_stamp() { + return timestamp; +} + +bool sim_trace_enabled() { + if (timestamp >= trace_start_time + && timestamp < trace_stop_time) + return true; + return trace_enabled; +} + +void sim_trace_enable(bool enable) { + trace_enabled = enable; +} + +int main(int argc, char **argv) { + // Initialize Verilators variables + Verilated::commandArgs(argc, argv); + + return 0; +} \ No newline at end of file From 2762bd53ff60e086177081face5e039382f13af5 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 18 Aug 2024 18:56:17 -0700 Subject: [PATCH 142/488] minor updates --- hw/rtl/core/VX_mem_unit.sv | 2 +- hw/rtl/core/VX_mem_unit_top.sv | 4 ++++ hw/syn/altera/quartus/mem_unit/Makefile | 4 ++-- hw/unittest/mem_unit_top/Makefile | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 841707da1..4f94c2765 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -205,7 +205,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .ARBITER ("P"), .REQ_OUT_BUF (0), .RSP_OUT_BUF (0) - ) lsu_adapter ( + ) dcache_adapter ( .clk (clk), .reset (reset), .lsu_mem_if (dcache_coalesced_if), diff --git a/hw/rtl/core/VX_mem_unit_top.sv b/hw/rtl/core/VX_mem_unit_top.sv index 6c7e2ff66..c1acb6382 100644 --- a/hw/rtl/core/VX_mem_unit_top.sv +++ b/hw/rtl/core/VX_mem_unit_top.sv @@ -24,6 +24,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( // LSU memory request input wire [`NUM_LSU_BLOCKS-1:0] lsu_req_valid, input wire [`NUM_LSU_BLOCKS-1:0] lsu_req_rw, + input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0] lsu_req_mask, input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_SIZE-1:0] lsu_req_byteen, input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_ADDR_WIDTH-1:0] lsu_req_addr, input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] lsu_req_flags, @@ -33,6 +34,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( // LSU memory response output wire [`NUM_LSU_BLOCKS-1:0] lsu_rsp_valid, + output wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0] lsu_rsp_mask, output wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_WIDTH-1:0] lsu_rsp_data, output wire [`NUM_LSU_BLOCKS-1:0][LSU_TAG_WIDTH-1:0] lsu_rsp_tag, input wire [`NUM_LSU_BLOCKS-1:0] lsu_rsp_ready, @@ -63,6 +65,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin assign lsu_mem_if[i].req_valid = lsu_req_valid[i]; assign lsu_mem_if[i].req_data.rw = lsu_req_rw[i]; + assign lsu_mem_if[i].req_data.mask = lsu_req_mask[i]; assign lsu_mem_if[i].req_data.byteen = lsu_req_byteen[i]; assign lsu_mem_if[i].req_data.addr = lsu_req_addr[i]; assign lsu_mem_if[i].req_data.flags = lsu_req_flags[i]; @@ -74,6 +77,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( // LSU memory response for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin assign lsu_rsp_valid[i] = lsu_mem_if[i].rsp_valid; + assign lsu_rsp_mask[i] = lsu_mem_if[i].rsp_data.mask; assign lsu_rsp_data[i] = lsu_mem_if[i].rsp_data.data; assign lsu_rsp_tag[i] = lsu_mem_if[i].rsp_data.tag; assign lsu_mem_if[i].rsp_ready = lsu_rsp_ready[i]; diff --git a/hw/syn/altera/quartus/mem_unit/Makefile b/hw/syn/altera/quartus/mem_unit/Makefile index 585e5fc34..209492265 100755 --- a/hw/syn/altera/quartus/mem_unit/Makefile +++ b/hw/syn/altera/quartus/mem_unit/Makefile @@ -1,7 +1,7 @@ -PROJECT = VX_mem_init_top +PROJECT = VX_mem_unit_top TOP_LEVEL_ENTITY = $(PROJECT) SRC_FILE = $(PROJECT).sv include ../../common.mk -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core -I$(RTL_DIR)/fpu diff --git a/hw/unittest/mem_unit_top/Makefile b/hw/unittest/mem_unit_top/Makefile index a44befbce..8809551f4 100644 --- a/hw/unittest/mem_unit_top/Makefile +++ b/hw/unittest/mem_unit_top/Makefile @@ -19,7 +19,7 @@ DBG_TRACE_FLAGS := RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core +RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core -I$(RTL_DIR)/fpu TOP := VX_mem_unit_top From 1814ff6d403568b39963853e7f7a5b9f033c0c6b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 18 Aug 2024 22:02:37 -0700 Subject: [PATCH 143/488] xilinx standalone synthesis fixes --- hw/scripts/bin2coe.py | 91 ++++++++ hw/syn/xilinx/test/Makefile | 22 +- hw/syn/xilinx/test/kernel/Makefile | 51 ----- hw/syn/xilinx/test/kernel/kernel.dat | 3 - hw/syn/xilinx/test/kernel/main.c | 36 --- hw/syn/xilinx/test/kernel/start.S | 23 -- hw/syn/xilinx/test/project.tcl.in | 2 +- .../xilinx/test/project_1_files/Vortex_top.v | 192 +++++----------- .../test/project_1_files/Vortex_wrap.sv | 208 ++++++++++++++++++ tests/kernel/common.mk | 4 +- 10 files changed, 373 insertions(+), 259 deletions(-) create mode 100755 hw/scripts/bin2coe.py delete mode 100644 hw/syn/xilinx/test/kernel/Makefile delete mode 100644 hw/syn/xilinx/test/kernel/kernel.dat delete mode 100644 hw/syn/xilinx/test/kernel/main.c delete mode 100644 hw/syn/xilinx/test/kernel/start.S create mode 100644 hw/syn/xilinx/test/project_1_files/Vortex_wrap.sv diff --git a/hw/scripts/bin2coe.py b/hw/scripts/bin2coe.py new file mode 100755 index 000000000..95b3bcbeb --- /dev/null +++ b/hw/scripts/bin2coe.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 + +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + +g_memory = {} + +def hex2bin(ch): + return int(ch, 16) if ch.isdigit() or ch in 'abcdefABCDEF' else 0 + +def process_binary(binfname, wordsize, binaddr): + with open(binfname, 'rb') as f: + buffer = list(f.read()) + g_memory[binaddr] = buffer + return (len(buffer) + wordsize - 1) // wordsize + +def process_data(datfname, wordsize): + offset, buffer = 0, [] + with open(datfname, 'r') as f: + for line in f: + line = line.strip() + if line.startswith("#"): + continue + if line.startswith("@"): + if buffer: + g_memory[offset] = buffer + offset = int(line[1:], 16) + buffer = [] + else: + for i in range(0, len(line), 2): + byte = hex2bin(line[i]) << 4 | hex2bin(line[i+1]) + buffer.append(byte) + if len(buffer) % wordsize: + buffer.extend([0] * (wordsize - len(buffer) % wordsize)) + offset += 1 + if buffer: + g_memory[offset] = buffer + return offset + +def write_coe(outfname, wordsize, depth, defval): + with open(outfname, 'w') as f: + f.write("MEMORY_INITIALIZATION_RADIX=16;\nMEMORY_INITIALIZATION_VECTOR=\n") + i = 0 + for addr in sorted(g_memory): + while i < addr: + f.write(f"{defval},\n") + i += 1 + data = g_memory[addr] + for j in range(0, len(data), wordsize): + f.write(",".join([f"{byte:02x}" for byte in data[j:j+wordsize][::-1]]) + ",\n") + i += 1 + while i < depth: + f.write(f"{defval},\n") + i += 1 + f.seek(f.tell() - 2, 0) # Remove the last comma + f.write(";\n") + +def main(): + parser = argparse.ArgumentParser(description="Binary to Xilinx COE File Converter") + parser.add_argument("--binary", help="Input binary file.") + parser.add_argument("--data", help="Input data file.") + parser.add_argument("--out", default="output.coe", help="Output file (optional).") + parser.add_argument("--wordsize", type=int, default=4, help="Word size in bytes (default 4).") + parser.add_argument("--depth", type=int, default=0, help="Address size (optional).") + parser.add_argument("--binaddr", type=int, default=0, help="Binary address (optional).") + parser.add_argument("--default", default="00", help="Default hex value as string (optional).") + + args = parser.parse_args() + + depth = max( + process_binary(args.binary, args.wordsize, args.binaddr) if args.binary else 0, + process_data(args.data, args.wordsize) if args.data else 0, + args.depth + ) + + write_coe(args.out, args.wordsize, depth, args.default) + +if __name__ == "__main__": + main() diff --git a/hw/syn/xilinx/test/Makefile b/hw/syn/xilinx/test/Makefile index bf950b4ed..5b6a76da3 100644 --- a/hw/syn/xilinx/test/Makefile +++ b/hw/syn/xilinx/test/Makefile @@ -10,6 +10,8 @@ DPI_DIR := $(VORTEX_HOME)/hw/dpi AFU_DIR := $(RTL_DIR)/afu/opae SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts +KERNEL ?= fibonacci + # include paths FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) @@ -32,17 +34,27 @@ CFLAGS += -DSTACK_BASE_ADDR=32\'hFF000 all: build +$(KERNEL).bin: + $(MAKE) -C $(ROOT_DIR)/kernel clean + STACK_BASE_ADDR=0xFF000 $(MAKE) -C $(ROOT_DIR)/kernel + $(MAKE) -C $(ROOT_DIR)/tests/kernel/$(KERNEL) clean + STARTUP_ADDR=0x8000 $(MAKE) -C $(ROOT_DIR)/tests/kernel/$(KERNEL) + cp $(ROOT_DIR)/tests/kernel/$(KERNEL)/$(KERNEL).bin $(KERNEL).bin + +kernel.bin.coe: $(KERNEL).bin + $(SCRIPT_DIR)/bin2coe.py --out=$@ --binary=$(KERNEL).bin --binaddr=8192 --depth=16384 --wordsize=64 + gen-sources: project_1/sources.txt project_1/sources.txt: mkdir -p project_1 $(SCRIPT_DIR)/gen_sources.sh $(CFLAGS) -P -Cproject_1/src -Oproject_1/sources.txt -build: project_1/vortex.xpr -project_1/vortex.xpr: project_1/sources.txt project.tcl +build: project_1/project_1.xpr +project_1/project_1.xpr: project_1/sources.txt kernel.bin.coe project.tcl $(VIVADO) -mode batch -source project.tcl -tclargs project_1/sources.txt project_1/src $(SCRIPT_DIR) -run: project_1/vortex.xpr - $(VIVADO) project_1/vortex.xpr & +run: project_1/project_1.xpr + $(VIVADO) project_1/project_1.xpr & clean: - rm -rf project_1 + rm -rf project_1 $(KERNEL).bin kernel.bin.coe diff --git a/hw/syn/xilinx/test/kernel/Makefile b/hw/syn/xilinx/test/kernel/Makefile deleted file mode 100644 index 9f3b95c1a..000000000 --- a/hw/syn/xilinx/test/kernel/Makefile +++ /dev/null @@ -1,51 +0,0 @@ -ROOT_DIR := $(realpath ../../../../..) -include $(ROOT_DIR)/config.mk - -ifeq ($(XLEN),64) -CFLAGS += -march=rv64imafd -mabi=lp64d -else -CFLAGS += -march=rv32imaf -mabi=ilp32f -endif - -SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/test/kernel - -SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts - -BIN2COE_PATH ?= $(SCRIPT_DIR)/bin2coe - -CC = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc -AR = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc-ar -DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump -CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy - -CFLAGS += -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections -CFLAGS += -I$(VORTEX_HOME)/runtime/include -I$(VORTEX_HOME)/hw - -LDFLAGS += -lm -Wl,-Bstatic,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 - -PROJECT = kernel - -SRCS = $(SRC_DIR)/main.c $(SRC_DIR)/start.S - -all: $(PROJECT).elf $(PROJECT).hex $(PROJECT).bin $(PROJECT).dump $(PROJECT).bin.coe - -$(PROJECT).dump: $(PROJECT).elf - $(DP) -D $< > $@ - -$(PROJECT).hex: $(PROJECT).elf - $(CP) -O ihex $< $@ - -$(PROJECT).bin: $(PROJECT).elf - $(CP) -O binary $< $@ - -$(PROJECT).bin.coe: $(PROJECT).bin - $(BIN2COE_PATH)/bin2coe $< --out=$@ --binary=$(PROJECT).bin --data=$(PROJECT).dat --binaddr=8192 --depth=16384 --wordsize=64 - -$(PROJECT).elf: $(SRCS) - $(CC) $(CFLAGS) $^ $(LDFLAGS) -o $@ - -.depend: $(SRCS) - $(CC) $(CFLAGS) -MM $^ > .depend; - -clean: - rm -rf *.bin *.elf *.hex *.dump *.coe .depend diff --git a/hw/syn/xilinx/test/kernel/kernel.dat b/hw/syn/xilinx/test/kernel/kernel.dat deleted file mode 100644 index 6e197b719..000000000 --- a/hw/syn/xilinx/test/kernel/kernel.dat +++ /dev/null @@ -1,3 +0,0 @@ -@1 -000000C00000008000000002, -00000003000000020000000100000000, \ No newline at end of file diff --git a/hw/syn/xilinx/test/kernel/main.c b/hw/syn/xilinx/test/kernel/main.c deleted file mode 100644 index 4fcfd99c0..000000000 --- a/hw/syn/xilinx/test/kernel/main.c +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -typedef struct { - uint32_t count; - uint32_t src_addr; - uint32_t dst_addr; -} kernel_arg_t; - -int main() { - kernel_arg_t* arg = (kernel_arg_t*)csr_read(VX_CSR_MSCRATCH); - uint32_t count = arg->count; - int32_t* src_ptr = (int32_t*)arg->src_addr; - int32_t* dst_ptr = (int32_t*)arg->dst_addr; - - uint32_t offset = vx_core_id() * count; - - for (uint32_t i = 0; i < count; ++i) { - dst_ptr[offset + i] = src_ptr[offset + i]; - } - - return 0; -} diff --git a/hw/syn/xilinx/test/kernel/start.S b/hw/syn/xilinx/test/kernel/start.S deleted file mode 100644 index e9295d643..000000000 --- a/hw/syn/xilinx/test/kernel/start.S +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -.section .init, "ax" -.global _start -.type _start, @function -_start: - # call main routine - call main - - # end execution - .insn r 0x0b, 0, 0, x0, x0, x0 -.size _start, .-_start \ No newline at end of file diff --git a/hw/syn/xilinx/test/project.tcl.in b/hw/syn/xilinx/test/project.tcl.in index 61ee63464..45f9a9104 100644 --- a/hw/syn/xilinx/test/project.tcl.in +++ b/hw/syn/xilinx/test/project.tcl.in @@ -398,7 +398,7 @@ set_property -dict [ list \ CONFIG.Assume_Synchronous_Clk {true} \ CONFIG.Byte_Size {8} \ CONFIG.Load_Init_File {true} \ - CONFIG.Coe_File {@VORTEX_HOME@/hw/syn/xilinx/test/project_1_files/kernel.bin.coe} \ + CONFIG.Coe_File {@VORTEX_HOME@/hw/syn/xilinx/test/kernel.bin.coe} \ CONFIG.EN_SAFETY_CKT {true} \ CONFIG.Enable_32bit_Address {true} \ CONFIG.Fill_Remaining_Memory_Locations {false} \ diff --git a/hw/syn/xilinx/test/project_1_files/Vortex_top.v b/hw/syn/xilinx/test/project_1_files/Vortex_top.v index a7adf71bc..cd634b9b6 100644 --- a/hw/syn/xilinx/test/project_1_files/Vortex_top.v +++ b/hw/syn/xilinx/test/project_1_files/Vortex_top.v @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,7 +22,7 @@ module Vortex_top #( input wire clk, input wire reset, - // AXI4 memory interface + // AXI4 memory interface output wire m_axi_mem_awvalid, input wire m_axi_mem_awready, output wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr, @@ -68,141 +68,55 @@ module Vortex_top #( output wire busy ); - wire m_axi_mem_awvalid_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_awready_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_awid_a [C_M_AXI_MEM_NUM_BANKS]; - wire [7:0] m_axi_mem_awlen_a [C_M_AXI_MEM_NUM_BANKS]; - wire [2:0] m_axi_mem_awsize_a [C_M_AXI_MEM_NUM_BANKS]; - wire [1:0] m_axi_mem_awburst_a [C_M_AXI_MEM_NUM_BANKS]; - wire [1:0] m_axi_mem_awlock_a [C_M_AXI_MEM_NUM_BANKS]; - wire [3:0] m_axi_mem_awcache_a [C_M_AXI_MEM_NUM_BANKS]; - wire [2:0] m_axi_mem_awprot_a [C_M_AXI_MEM_NUM_BANKS]; - wire [3:0] m_axi_mem_awqos_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_wvalid_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_wready_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_DATA_WIDTH-1:0] m_axi_mem_wdata_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_DATA_WIDTH/8-1:0] m_axi_mem_wstrb_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_wlast_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_arvalid_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_arready_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_ID_WIDTH-1:0] m_axi_mem_arid_a [C_M_AXI_MEM_NUM_BANKS]; - wire [7:0] m_axi_mem_arlen_a [C_M_AXI_MEM_NUM_BANKS]; - wire [2:0] m_axi_mem_arsize_a [C_M_AXI_MEM_NUM_BANKS]; - wire [1:0] m_axi_mem_arburst_a [C_M_AXI_MEM_NUM_BANKS]; - wire [1:0] m_axi_mem_arlock_a [C_M_AXI_MEM_NUM_BANKS]; - wire [3:0] m_axi_mem_arcache_a [C_M_AXI_MEM_NUM_BANKS]; - wire [2:0] m_axi_mem_arprot_a [C_M_AXI_MEM_NUM_BANKS]; - wire [3:0] m_axi_mem_arqos_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_rvalid_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_rready_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_DATA_WIDTH - 1:0] m_axi_mem_rdata_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_rlast_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_rid_a [C_M_AXI_MEM_NUM_BANKS]; - wire [1:0] m_axi_mem_rresp_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_bvalid_a [C_M_AXI_MEM_NUM_BANKS]; - wire m_axi_mem_bready_a [C_M_AXI_MEM_NUM_BANKS]; - wire [1:0] m_axi_mem_bresp_a [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_bid_a [C_M_AXI_MEM_NUM_BANKS]; - - assign m_axi_mem_awvalid = m_axi_mem_awvalid_a[0]; - assign m_axi_mem_awready_a[0] = m_axi_mem_awready; - assign m_axi_mem_awaddr = m_axi_mem_awaddr_a[0]; - assign m_axi_mem_awid = m_axi_mem_awid_a[0]; - assign m_axi_mem_awlen = m_axi_mem_awlen_a[0]; - assign m_axi_mem_awsize = m_axi_mem_awsize_a[0]; - assign m_axi_mem_awburst = m_axi_mem_awburst_a[0]; - assign m_axi_mem_awlock = m_axi_mem_awlock_a[0]; - assign m_axi_mem_awcache = m_axi_mem_awcache_a[0]; - assign m_axi_mem_awprot = m_axi_mem_awprot_a[0]; - assign m_axi_mem_awqos = m_axi_mem_awqos_a[0]; - - assign m_axi_mem_wvalid = m_axi_mem_wvalid_a[0]; - assign m_axi_mem_wready_a[0] = m_axi_mem_wready; - assign m_axi_mem_wdata = m_axi_mem_wdata_a[0]; - assign m_axi_mem_wstrb = m_axi_mem_wstrb_a[0]; - assign m_axi_mem_wlast = m_axi_mem_wlast_a[0]; - - assign m_axi_mem_arvalid = m_axi_mem_arvalid_a[0]; - assign m_axi_mem_arready_a[0] = m_axi_mem_arready; - assign m_axi_mem_araddr = m_axi_mem_araddr_a[0]; - assign m_axi_mem_arid = m_axi_mem_arid_a[0]; - assign m_axi_mem_arlen = m_axi_mem_arlen_a[0]; - assign m_axi_mem_arsize = m_axi_mem_arsize_a[0]; - assign m_axi_mem_arburst = m_axi_mem_arburst_a[0]; - assign m_axi_mem_arlock = m_axi_mem_arlock_a[0]; - assign m_axi_mem_arcache = m_axi_mem_arcache_a[0]; - assign m_axi_mem_arprot = m_axi_mem_arprot_a[0]; - assign m_axi_mem_arqos = m_axi_mem_arqos_a[0]; - - assign m_axi_mem_rvalid_a[0] = m_axi_mem_rvalid; - assign m_axi_mem_rready = m_axi_mem_rready_a[0]; - assign m_axi_mem_rdata_a[0] = m_axi_mem_rdata; - assign m_axi_mem_rlast_a[0] = m_axi_mem_rlast; - assign m_axi_mem_rid_a[0] = m_axi_mem_rid; - assign m_axi_mem_rresp_a[0] = m_axi_mem_rresp; - - assign m_axi_mem_bvalid_a[0] = m_axi_mem_bvalid; - assign m_axi_mem_bready = m_axi_mem_bready_a[0]; - assign m_axi_mem_bresp_a[0] = m_axi_mem_bresp; - assign m_axi_mem_bid_a[0] = m_axi_mem_bid; - - Vortex_axi #( - .AXI_DATA_WIDTH (C_M_AXI_GMEM_DATA_WIDTH), - .AXI_ADDR_WIDTH (C_M_AXI_GMEM_ADDR_WIDTH), - .AXI_TID_WIDTH (C_M_AXI_GMEM_ID_WIDTH) - ) inst ( - .clk (clk), - .reset (reset), - - .m_axi_awvalid (m_axi_mem_awvalid_a), - .m_axi_awready (m_axi_mem_awready_a), - .m_axi_awaddr (m_axi_mem_awaddr_a), - .m_axi_awid (m_axi_mem_awid_a), - .m_axi_awlen (m_axi_mem_awlen_a), - .m_axi_awsize (m_axi_mem_awsize_a), - .m_axi_awburst (m_axi_mem_awburst_a), - .m_axi_awlock (m_axi_mem_awlock_a), - .m_axi_awcache (m_axi_mem_awcache_a), - .m_axi_awprot (m_axi_mem_awprot_a), - .m_axi_awqos (m_axi_mem_awqos_a), - - .m_axi_wvalid (m_axi_mem_wvalid_a), - .m_axi_wready (m_axi_mem_wready_a), - .m_axi_wdata (m_axi_mem_wdata_a), - .m_axi_wstrb (m_axi_mem_wstrb_a), - .m_axi_wlast (m_axi_mem_wlast_a), - - .m_axi_bvalid (m_axi_mem_bvalid_a), - .m_axi_bready (m_axi_mem_bready_a), - .m_axi_bid (m_axi_mem_bid_a), - .m_axi_bresp (m_axi_mem_bresp_a), - - .m_axi_arvalid (m_axi_mem_arvalid_a), - .m_axi_arready (m_axi_mem_arready_a), - .m_axi_araddr (m_axi_mem_araddr_a), - .m_axi_arid (m_axi_mem_arid_a), - .m_axi_arlen (m_axi_mem_arlen_a), - .m_axi_arsize (m_axi_mem_arsize_a), - .m_axi_arburst (m_axi_mem_arburst_a), - .m_axi_arlock (m_axi_mem_arlock_a), - .m_axi_arcache (m_axi_mem_arcache_a), - .m_axi_arprot (m_axi_mem_arprot_a), - .m_axi_arqos (m_axi_mem_arqos_a), - - .m_axi_rvalid (m_axi_mem_rvalid_a), - .m_axi_rready (m_axi_mem_rready_a), - .m_axi_rdata (m_axi_mem_rdata_a), - .m_axi_rid (m_axi_mem_rid_a), - .m_axi_rresp (m_axi_mem_rresp_a), - .m_axi_rlast (m_axi_mem_rlast_a), - - .dcr_wr_valid (dcr_wr_valid), - .dcr_wr_addr (dcr_wr_addr), - .dcr_wr_data (dcr_wr_data), - - .busy (busy) + Vortex_wrap #( + .C_M_AXI_GMEM_DATA_WIDTH(C_M_AXI_GMEM_DATA_WIDTH), + .C_M_AXI_GMEM_ADDR_WIDTH(C_M_AXI_GMEM_ADDR_WIDTH), + .C_M_AXI_GMEM_ID_WIDTH(C_M_AXI_GMEM_ID_WIDTH), + .C_M_AXI_MEM_NUM_BANKS(C_M_AXI_MEM_NUM_BANKS) + ) wrapper ( + .clk(clk), + .reset(reset), + .m_axi_mem_awvalid(m_axi_mem_awvalid), + .m_axi_mem_awready(m_axi_mem_awready), + .m_axi_mem_awaddr(m_axi_mem_awaddr), + .m_axi_mem_awid(m_axi_mem_awid), + .m_axi_mem_awlen(m_axi_mem_awlen), + .m_axi_mem_awsize(m_axi_mem_awsize), + .m_axi_mem_awburst(m_axi_mem_awburst), + .m_axi_mem_awlock(m_axi_mem_awlock), + .m_axi_mem_awcache(m_axi_mem_awcache), + .m_axi_mem_awprot(m_axi_mem_awprot), + .m_axi_mem_awqos(m_axi_mem_awqos), + .m_axi_mem_wvalid(m_axi_mem_wvalid), + .m_axi_mem_wready(m_axi_mem_wready), + .m_axi_mem_wdata(m_axi_mem_wdata), + .m_axi_mem_wstrb(m_axi_mem_wstrb), + .m_axi_mem_wlast(m_axi_mem_wlast), + .m_axi_mem_arvalid(m_axi_mem_arvalid), + .m_axi_mem_arready(m_axi_mem_arready), + .m_axi_mem_araddr(m_axi_mem_araddr), + .m_axi_mem_arid(m_axi_mem_arid), + .m_axi_mem_arlen(m_axi_mem_arlen), + .m_axi_mem_arsize(m_axi_mem_arsize), + .m_axi_mem_arburst(m_axi_mem_arburst), + .m_axi_mem_arlock(m_axi_mem_arlock), + .m_axi_mem_arcache(m_axi_mem_arcache), + .m_axi_mem_arprot(m_axi_mem_arprot), + .m_axi_mem_arqos(m_axi_mem_arqos), + .m_axi_mem_rvalid(m_axi_mem_rvalid), + .m_axi_mem_rready(m_axi_mem_rready), + .m_axi_mem_rdata(m_axi_mem_rdata), + .m_axi_mem_rlast(m_axi_mem_rlast), + .m_axi_mem_rid(m_axi_mem_rid), + .m_axi_mem_rresp(m_axi_mem_rresp), + .m_axi_mem_bvalid(m_axi_mem_bvalid), + .m_axi_mem_bready(m_axi_mem_bready), + .m_axi_mem_bresp(m_axi_mem_bresp), + .m_axi_mem_bid(m_axi_mem_bid), + .dcr_wr_valid(dcr_wr_valid), + .dcr_wr_addr(dcr_wr_addr), + .dcr_wr_data(dcr_wr_data), + .busy(busy) ); - + endmodule diff --git a/hw/syn/xilinx/test/project_1_files/Vortex_wrap.sv b/hw/syn/xilinx/test/project_1_files/Vortex_wrap.sv new file mode 100644 index 000000000..5ec7a868e --- /dev/null +++ b/hw/syn/xilinx/test/project_1_files/Vortex_wrap.sv @@ -0,0 +1,208 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module Vortex_wrap #( + parameter C_M_AXI_GMEM_DATA_WIDTH = 512, + parameter C_M_AXI_GMEM_ADDR_WIDTH = `XLEN, + parameter C_M_AXI_GMEM_ID_WIDTH = 32, + parameter C_M_AXI_MEM_NUM_BANKS = 1 +) ( + input wire clk, + input wire reset, + + // AXI4 memory interface + output wire m_axi_mem_awvalid, + input wire m_axi_mem_awready, + output wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr, + output wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_awid, + output wire [7:0] m_axi_mem_awlen, + output wire [2:0] m_axi_mem_awsize, + output wire [1:0] m_axi_mem_awburst, + output wire [1:0] m_axi_mem_awlock, + output wire [3:0] m_axi_mem_awcache, + output wire [2:0] m_axi_mem_awprot, + output wire [3:0] m_axi_mem_awqos, + output wire m_axi_mem_wvalid, + input wire m_axi_mem_wready, + output wire [C_M_AXI_GMEM_DATA_WIDTH-1:0] m_axi_mem_wdata, + output wire [C_M_AXI_GMEM_DATA_WIDTH/8-1:0] m_axi_mem_wstrb, + output wire m_axi_mem_wlast, + output wire m_axi_mem_arvalid, + input wire m_axi_mem_arready, + output wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_mem_araddr, + output wire [C_M_AXI_GMEM_ID_WIDTH-1:0] m_axi_mem_arid, + output wire [7:0] m_axi_mem_arlen, + output wire [2:0] m_axi_mem_arsize, + output wire [1:0] m_axi_mem_arburst, + output wire [1:0] m_axi_mem_arlock, + output wire [3:0] m_axi_mem_arcache, + output wire [2:0] m_axi_mem_arprot, + output wire [3:0] m_axi_mem_arqos, + input wire m_axi_mem_rvalid, + output wire m_axi_mem_rready, + input wire [C_M_AXI_GMEM_DATA_WIDTH - 1:0] m_axi_mem_rdata, + input wire m_axi_mem_rlast, + input wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_rid, + input wire [1:0] m_axi_mem_rresp, + input wire m_axi_mem_bvalid, + output wire m_axi_mem_bready, + input wire [1:0] m_axi_mem_bresp, + input wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_bid, + + input wire dcr_wr_valid, + input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr, + input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data, + + output wire busy +); + + wire m_axi_mem_awvalid_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_awready_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_awid_a [C_M_AXI_MEM_NUM_BANKS]; + wire [7:0] m_axi_mem_awlen_a [C_M_AXI_MEM_NUM_BANKS]; + wire [2:0] m_axi_mem_awsize_a [C_M_AXI_MEM_NUM_BANKS]; + wire [1:0] m_axi_mem_awburst_a [C_M_AXI_MEM_NUM_BANKS]; + wire [1:0] m_axi_mem_awlock_a [C_M_AXI_MEM_NUM_BANKS]; + wire [3:0] m_axi_mem_awcache_a [C_M_AXI_MEM_NUM_BANKS]; + wire [2:0] m_axi_mem_awprot_a [C_M_AXI_MEM_NUM_BANKS]; + wire [3:0] m_axi_mem_awqos_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_wvalid_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_wready_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_DATA_WIDTH-1:0] m_axi_mem_wdata_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_DATA_WIDTH/8-1:0] m_axi_mem_wstrb_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_wlast_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_arvalid_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_arready_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_ID_WIDTH-1:0] m_axi_mem_arid_a [C_M_AXI_MEM_NUM_BANKS]; + wire [7:0] m_axi_mem_arlen_a [C_M_AXI_MEM_NUM_BANKS]; + wire [2:0] m_axi_mem_arsize_a [C_M_AXI_MEM_NUM_BANKS]; + wire [1:0] m_axi_mem_arburst_a [C_M_AXI_MEM_NUM_BANKS]; + wire [1:0] m_axi_mem_arlock_a [C_M_AXI_MEM_NUM_BANKS]; + wire [3:0] m_axi_mem_arcache_a [C_M_AXI_MEM_NUM_BANKS]; + wire [2:0] m_axi_mem_arprot_a [C_M_AXI_MEM_NUM_BANKS]; + wire [3:0] m_axi_mem_arqos_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_rvalid_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_rready_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_DATA_WIDTH - 1:0] m_axi_mem_rdata_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_rlast_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_rid_a [C_M_AXI_MEM_NUM_BANKS]; + wire [1:0] m_axi_mem_rresp_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_bvalid_a [C_M_AXI_MEM_NUM_BANKS]; + wire m_axi_mem_bready_a [C_M_AXI_MEM_NUM_BANKS]; + wire [1:0] m_axi_mem_bresp_a [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_bid_a [C_M_AXI_MEM_NUM_BANKS]; + + assign m_axi_mem_awvalid = m_axi_mem_awvalid_a[0]; + assign m_axi_mem_awready_a[0] = m_axi_mem_awready; + assign m_axi_mem_awaddr = m_axi_mem_awaddr_a[0]; + assign m_axi_mem_awid = m_axi_mem_awid_a[0]; + assign m_axi_mem_awlen = m_axi_mem_awlen_a[0]; + assign m_axi_mem_awsize = m_axi_mem_awsize_a[0]; + assign m_axi_mem_awburst = m_axi_mem_awburst_a[0]; + assign m_axi_mem_awlock = m_axi_mem_awlock_a[0]; + assign m_axi_mem_awcache = m_axi_mem_awcache_a[0]; + assign m_axi_mem_awprot = m_axi_mem_awprot_a[0]; + assign m_axi_mem_awqos = m_axi_mem_awqos_a[0]; + + assign m_axi_mem_wvalid = m_axi_mem_wvalid_a[0]; + assign m_axi_mem_wready_a[0] = m_axi_mem_wready; + assign m_axi_mem_wdata = m_axi_mem_wdata_a[0]; + assign m_axi_mem_wstrb = m_axi_mem_wstrb_a[0]; + assign m_axi_mem_wlast = m_axi_mem_wlast_a[0]; + + assign m_axi_mem_arvalid = m_axi_mem_arvalid_a[0]; + assign m_axi_mem_arready_a[0] = m_axi_mem_arready; + assign m_axi_mem_araddr = m_axi_mem_araddr_a[0]; + assign m_axi_mem_arid = m_axi_mem_arid_a[0]; + assign m_axi_mem_arlen = m_axi_mem_arlen_a[0]; + assign m_axi_mem_arsize = m_axi_mem_arsize_a[0]; + assign m_axi_mem_arburst = m_axi_mem_arburst_a[0]; + assign m_axi_mem_arlock = m_axi_mem_arlock_a[0]; + assign m_axi_mem_arcache = m_axi_mem_arcache_a[0]; + assign m_axi_mem_arprot = m_axi_mem_arprot_a[0]; + assign m_axi_mem_arqos = m_axi_mem_arqos_a[0]; + + assign m_axi_mem_rvalid_a[0] = m_axi_mem_rvalid; + assign m_axi_mem_rready = m_axi_mem_rready_a[0]; + assign m_axi_mem_rdata_a[0] = m_axi_mem_rdata; + assign m_axi_mem_rlast_a[0] = m_axi_mem_rlast; + assign m_axi_mem_rid_a[0] = m_axi_mem_rid; + assign m_axi_mem_rresp_a[0] = m_axi_mem_rresp; + + assign m_axi_mem_bvalid_a[0] = m_axi_mem_bvalid; + assign m_axi_mem_bready = m_axi_mem_bready_a[0]; + assign m_axi_mem_bresp_a[0] = m_axi_mem_bresp; + assign m_axi_mem_bid_a[0] = m_axi_mem_bid; + + Vortex_axi #( + .AXI_DATA_WIDTH (C_M_AXI_GMEM_DATA_WIDTH), + .AXI_ADDR_WIDTH (C_M_AXI_GMEM_ADDR_WIDTH), + .AXI_TID_WIDTH (C_M_AXI_GMEM_ID_WIDTH) + ) inst ( + .clk (clk), + .reset (reset), + + .m_axi_awvalid (m_axi_mem_awvalid_a), + .m_axi_awready (m_axi_mem_awready_a), + .m_axi_awaddr (m_axi_mem_awaddr_a), + .m_axi_awid (m_axi_mem_awid_a), + .m_axi_awlen (m_axi_mem_awlen_a), + .m_axi_awsize (m_axi_mem_awsize_a), + .m_axi_awburst (m_axi_mem_awburst_a), + .m_axi_awlock (m_axi_mem_awlock_a), + .m_axi_awcache (m_axi_mem_awcache_a), + .m_axi_awprot (m_axi_mem_awprot_a), + .m_axi_awqos (m_axi_mem_awqos_a), + + .m_axi_wvalid (m_axi_mem_wvalid_a), + .m_axi_wready (m_axi_mem_wready_a), + .m_axi_wdata (m_axi_mem_wdata_a), + .m_axi_wstrb (m_axi_mem_wstrb_a), + .m_axi_wlast (m_axi_mem_wlast_a), + + .m_axi_bvalid (m_axi_mem_bvalid_a), + .m_axi_bready (m_axi_mem_bready_a), + .m_axi_bid (m_axi_mem_bid_a), + .m_axi_bresp (m_axi_mem_bresp_a), + + .m_axi_arvalid (m_axi_mem_arvalid_a), + .m_axi_arready (m_axi_mem_arready_a), + .m_axi_araddr (m_axi_mem_araddr_a), + .m_axi_arid (m_axi_mem_arid_a), + .m_axi_arlen (m_axi_mem_arlen_a), + .m_axi_arsize (m_axi_mem_arsize_a), + .m_axi_arburst (m_axi_mem_arburst_a), + .m_axi_arlock (m_axi_mem_arlock_a), + .m_axi_arcache (m_axi_mem_arcache_a), + .m_axi_arprot (m_axi_mem_arprot_a), + .m_axi_arqos (m_axi_mem_arqos_a), + + .m_axi_rvalid (m_axi_mem_rvalid_a), + .m_axi_rready (m_axi_mem_rready_a), + .m_axi_rdata (m_axi_mem_rdata_a), + .m_axi_rid (m_axi_mem_rid_a), + .m_axi_rresp (m_axi_mem_rresp_a), + .m_axi_rlast (m_axi_mem_rlast_a), + + .dcr_wr_valid (dcr_wr_valid), + .dcr_wr_addr (dcr_wr_addr), + .dcr_wr_data (dcr_wr_data), + + .busy (busy) + ); + +endmodule diff --git a/tests/kernel/common.mk b/tests/kernel/common.mk index 050b1b48d..b02454412 100644 --- a/tests/kernel/common.mk +++ b/tests/kernel/common.mk @@ -8,6 +8,8 @@ endif VORTEX_KN_PATH ?= $(ROOT_DIR)/kernel +STARTUP_ADDR ?= 0x80000000 + LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT) LLVM_CFLAGS += --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) LLVM_CFLAGS += -Xclang -target-feature -Xclang +vortex -mllvm -vortex-branch-divergence=0 @@ -31,7 +33,7 @@ CFLAGS += -DXLEN_$(XLEN) -DNDEBUG LIBC_LIB += -L$(LIBC_VORTEX)/lib -lm -lc LIBC_LIB += $(LIBCRT_VORTEX)/lib/baremetal/libclang_rt.builtins-riscv$(XLEN).a -LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 $(VORTEX_KN_PATH)/libvortex.a $(LIBC_LIB) +LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(VORTEX_KN_PATH)/libvortex.a $(LIBC_LIB) all: $(PROJECT).elf $(PROJECT).bin $(PROJECT).dump From 693a9f648d41a5ee76755f924e0576b215816f7d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 19 Aug 2024 18:25:38 -0700 Subject: [PATCH 144/488] Ci script update --- ci/system_updates.sh | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/ci/system_updates.sh b/ci/system_updates.sh index 43abbe5ab..a62ed253b 100755 --- a/ci/system_updates.sh +++ b/ci/system_updates.sh @@ -16,12 +16,31 @@ set -e +# Function to check if GCC version is less than 11 +check_gcc_version() { + local gcc_version + gcc_version=$(gcc -dumpversion) + if dpkg --compare-versions "$gcc_version" lt 11; then + return 0 # GCC version is less than 11 + else + return 1 # GCC version is 11 or greater + fi +} + +# Update package list apt-get update -y -add-apt-repository -y ppa:ubuntu-toolchain-r/test -apt-get update -apt-get install -y g++-11 gcc-11 -update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100 -update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 +# install system dependencies +apt-get install -y build-essential valgrind libstdc++6 binutils python3 uuid-dev ccache -apt-get install -y build-essential valgrind libstdc++6 binutils python uuid-dev ccache +# Check and install GCC 11 if necessary +if check_gcc_version; then + echo "GCC version is less than 11. Installing GCC 11..." + add-apt-repository -y ppa:ubuntu-toolchain-r/test + apt-get update + apt-get install -y g++-11 gcc-11 + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100 + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 +else + echo "GCC version is 11 or greater. No need to install GCC 11." +fi From 5e241c153c9e0d7e3e29eae596c47ed5805c7fbd Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 19 Aug 2024 18:36:37 -0700 Subject: [PATCH 145/488] Ci script update --- .github/workflows/ci.yml | 6 +++--- README.md | 13 +++++-------- ci/{system_updates.sh => install_dependencies.sh} | 0 miscs/docker/Dockerfile.ubuntu | 2 +- 4 files changed, 9 insertions(+), 12 deletions(-) rename ci/{system_updates.sh => install_dependencies.sh} (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f49dd42bf..724ec2a13 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,7 +46,7 @@ jobs: - name: Install Dependencies if: steps.cache-toolchain.outputs.cache-hit != 'true' || steps.cache-thirdparty.outputs.cache-hit != 'true' run: | - sudo bash ./ci/system_updates.sh + sudo bash ./ci/install_dependencies.sh - name: Setup Toolchain if: steps.cache-toolchain.outputs.cache-hit != 'true' @@ -75,7 +75,7 @@ jobs: - name: Install Dependencies run: | - sudo bash ./ci/system_updates.sh + sudo bash ./ci/install_dependencies.sh - name: Cache Toolchain Directory id: cache-toolchain @@ -126,7 +126,7 @@ jobs: - name: Install Dependencies run: | - sudo bash ./ci/system_updates.sh + sudo bash ./ci/install_dependencies.sh - name: Cache Toolchain Directory id: cache-toolchain diff --git a/README.md b/README.md index d789d00bd..d4ed68a59 100644 --- a/README.md +++ b/README.md @@ -44,19 +44,16 @@ More detailed build instructions can be found [here](docs/install_vortex.md). - [Ramulator](https://github.com/CMU-SAFARI/ramulator.git) - [Yosys](https://github.com/YosysHQ/yosys) - [Sv2v](https://github.com/zachjs/sv2v) -### Install development tools -```sh -sudo apt-get install build-essential -sudo apt-get install binutils -sudo apt-get install python -sudo apt-get install uuid-dev -sudo apt-get install git -``` ### Install Vortex codebase ```sh git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git cd vortex ``` +### Install system dependencies +```sh +# ensure dependent libraries are present +sudo ./ci/install_dependencies.sh +``` ### Configure your build folder ```sh mkdir build diff --git a/ci/system_updates.sh b/ci/install_dependencies.sh similarity index 100% rename from ci/system_updates.sh rename to ci/install_dependencies.sh diff --git a/miscs/docker/Dockerfile.ubuntu b/miscs/docker/Dockerfile.ubuntu index c3e72a0f4..f3a864ce5 100644 --- a/miscs/docker/Dockerfile.ubuntu +++ b/miscs/docker/Dockerfile.ubuntu @@ -39,7 +39,7 @@ RUN git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git /v WORKDIR /vortex # install system dependencies -RUN ./ci/system_updates.sh +RUN ./ci/install_dependencies.sh # Configure the build folder RUN mkdir build && cd build && ../configure From 005d480bb459a6dda8419918caf921a49ac3a07e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 20 Aug 2024 23:30:44 -0700 Subject: [PATCH 146/488] minor updates --- hw/rtl/fpu/VX_fpu_dsp.sv | 32 ++++++++++---------- hw/rtl/libs/VX_pe_serializer.sv | 2 +- hw/rtl/libs/VX_pipe_buffer.sv | 7 +++-- hw/rtl/libs/VX_sp_ram.sv | 2 ++ hw/rtl/mem/VX_lmem_switch.sv | 52 ++++++++++++++++----------------- 5 files changed, 49 insertions(+), 46 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index 967bbbc29..2e479976a 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -51,20 +51,20 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( localparam FPU_DIVSQRT = 1; localparam FPU_CVT = 2; localparam FPU_NCP = 3; - localparam NUM_FPC = 4; - localparam FPC_BITS = `LOG2UP(NUM_FPC); + localparam NUM_FPCORES = 4; + localparam FPCORES_BITS = `LOG2UP(NUM_FPCORES); localparam RSP_DATAW = (NUM_LANES * 32) + 1 + $bits(fflags_t) + TAG_WIDTH; `UNUSED_VAR (fmt) - wire [NUM_FPC-1:0] per_core_ready_in; - wire [NUM_FPC-1:0][NUM_LANES-1:0][31:0] per_core_result; - wire [NUM_FPC-1:0][TAG_WIDTH-1:0] per_core_tag_out; - wire [NUM_FPC-1:0] per_core_ready_out; - wire [NUM_FPC-1:0] per_core_valid_out; - wire [NUM_FPC-1:0] per_core_has_fflags; - fflags_t [NUM_FPC-1:0] per_core_fflags; + wire [NUM_FPCORES-1:0] per_core_ready_in; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_result; + wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_out; + wire [NUM_FPCORES-1:0] per_core_ready_out; + wire [NUM_FPCORES-1:0] per_core_valid_out; + wire [NUM_FPCORES-1:0] per_core_has_fflags; + fflags_t [NUM_FPCORES-1:0] per_core_fflags; wire div_ready_in, sqrt_ready_in; wire [NUM_LANES-1:0][31:0] div_result, sqrt_result; @@ -74,7 +74,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire div_has_fflags, sqrt_has_fflags; fflags_t div_fflags, sqrt_fflags; - reg [FPC_BITS-1:0] core_select; + reg [FPCORES_BITS-1:0] core_select; reg is_madd, is_sub, is_neg, is_div, is_itof, is_signed; always @(*) begin @@ -122,6 +122,9 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( `UNUSED_VAR (datab) `UNUSED_VAR (datac) + // can accept new request? + assign ready_in = per_core_ready_in[core_select]; + VX_fpu_fma #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH) @@ -272,10 +275,10 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( /////////////////////////////////////////////////////////////////////////// - reg [NUM_FPC-1:0][RSP_DATAW+2-1:0] per_core_data_out; + reg [NUM_FPCORES-1:0][RSP_DATAW+2-1:0] per_core_data_out; always @(*) begin - for (integer i = 0; i < NUM_FPC; ++i) begin + for (integer i = 0; i < NUM_FPCORES; ++i) begin per_core_data_out[i][RSP_DATAW+1:2] = { per_core_result[i], per_core_has_fflags[i], @@ -294,7 +297,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( `UNUSED_VAR (op_ret_int_out) VX_stream_arb #( - .NUM_INPUTS (NUM_FPC), + .NUM_INPUTS (NUM_FPCORES), .DATAW (RSP_DATAW + 2), .ARBITER ("R"), .OUT_BUF (OUT_BUF) @@ -326,9 +329,6 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( `endif end - // can accept new request? - assign ready_in = per_core_ready_in[core_select]; - endmodule `endif diff --git a/hw/rtl/libs/VX_pe_serializer.sv b/hw/rtl/libs/VX_pe_serializer.sv index eac1eddcb..4e3a29132 100644 --- a/hw/rtl/libs/VX_pe_serializer.sv +++ b/hw/rtl/libs/VX_pe_serializer.sv @@ -137,7 +137,7 @@ module VX_pe_serializer #( assign pe_data_in_s = data_in; - assign enable = ready_out_u || ~valid_out_u; + assign enable = ready_out_u || ~valid_out_s; assign ready_in = enable; assign pe_enable = enable; diff --git a/hw/rtl/libs/VX_pipe_buffer.sv b/hw/rtl/libs/VX_pipe_buffer.sv index 167235c17..6ed6cf8ec 100644 --- a/hw/rtl/libs/VX_pipe_buffer.sv +++ b/hw/rtl/libs/VX_pipe_buffer.sv @@ -24,8 +24,9 @@ `TRACING_OFF module VX_pipe_buffer #( - parameter DATAW = 1, - parameter DEPTH = 1 + parameter DATAW = 1, + parameter RESETW = 0, + parameter DEPTH = 1 ) ( input wire clk, input wire reset, @@ -57,7 +58,7 @@ module VX_pipe_buffer #( assign ready[i] = (ready[i+1] || ~valid[i+1]); VX_pipe_register #( .DATAW (1 + DATAW), - .RESETW (1) + .RESETW (1 + RESETW) ) pipe_register ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index 4ab2a9b7a..3e73a013f 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -24,6 +24,7 @@ module VX_sp_ram #( parameter RW_ASSERT = 0, parameter LUTRAM = 0, parameter RESET_RAM = 0, + parameter READ_ENABLE = 0, parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, @@ -48,6 +49,7 @@ module VX_sp_ram #( .RW_ASSERT (RW_ASSERT), .LUTRAM (LUTRAM), .RESET_RAM (RESET_RAM), + .READ_ENABLE (READ_ENABLE), .INIT_ENABLE (INIT_ENABLE), .INIT_FILE (INIT_FILE), .INIT_VALUE (INIT_VALUE), diff --git a/hw/rtl/mem/VX_lmem_switch.sv b/hw/rtl/mem/VX_lmem_switch.sv index 628190a8d..642907785 100644 --- a/hw/rtl/mem/VX_lmem_switch.sv +++ b/hw/rtl/mem/VX_lmem_switch.sv @@ -22,8 +22,8 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( input wire clk, input wire reset, VX_lsu_mem_if.slave lsu_in_if, - VX_lsu_mem_if.master cache_out_if, - VX_lsu_mem_if.master lmem_out_if + VX_lsu_mem_if.master global_out_if, + VX_lsu_mem_if.master local_out_if ); localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; @@ -60,17 +60,17 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( lsu_in_if.req_data.tag }), .ready_in (req_global_ready), - .valid_out (cache_out_if.req_valid), + .valid_out (global_out_if.req_valid), .data_out ({ - cache_out_if.req_data.mask, - cache_out_if.req_data.rw, - cache_out_if.req_data.addr, - cache_out_if.req_data.data, - cache_out_if.req_data.byteen, - cache_out_if.req_data.flags, - cache_out_if.req_data.tag + global_out_if.req_data.mask, + global_out_if.req_data.rw, + global_out_if.req_data.addr, + global_out_if.req_data.data, + global_out_if.req_data.byteen, + global_out_if.req_data.flags, + global_out_if.req_data.tag }), - .ready_out (cache_out_if.req_ready) + .ready_out (global_out_if.req_ready) ); VX_elastic_buffer #( @@ -91,17 +91,17 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( lsu_in_if.req_data.tag }), .ready_in (req_local_ready), - .valid_out (lmem_out_if.req_valid), + .valid_out (local_out_if.req_valid), .data_out ({ - lmem_out_if.req_data.mask, - lmem_out_if.req_data.rw, - lmem_out_if.req_data.addr, - lmem_out_if.req_data.data, - lmem_out_if.req_data.byteen, - lmem_out_if.req_data.flags, - lmem_out_if.req_data.tag + local_out_if.req_data.mask, + local_out_if.req_data.rw, + local_out_if.req_data.addr, + local_out_if.req_data.data, + local_out_if.req_data.byteen, + local_out_if.req_data.flags, + local_out_if.req_data.tag }), - .ready_out (lmem_out_if.req_ready) + .ready_out (local_out_if.req_ready) ); VX_stream_arb #( @@ -113,16 +113,16 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( .clk (clk), .reset (reset), .valid_in ({ - lmem_out_if.rsp_valid, - cache_out_if.rsp_valid + local_out_if.rsp_valid, + global_out_if.rsp_valid }), .ready_in ({ - lmem_out_if.rsp_ready, - cache_out_if.rsp_ready + local_out_if.rsp_ready, + global_out_if.rsp_ready }), .data_in ({ - lmem_out_if.rsp_data, - cache_out_if.rsp_data + local_out_if.rsp_data, + global_out_if.rsp_data }), .data_out (lsu_in_if.rsp_data), .valid_out (lsu_in_if.rsp_valid), From 771a10ea0ccef0090a0d223fc73ad38a4cdfc30d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 20 Aug 2024 23:31:16 -0700 Subject: [PATCH 147/488] minor update --- hw/rtl/core/VX_mem_unit.sv | 65 ++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 4f94c2765..6569c1d47 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -55,8 +55,8 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .clk (clk), .reset (reset), .lsu_in_if (lsu_mem_in_if[i]), - .cache_out_if (lsu_dcache_if[i]), - .lmem_out_if (lsu_lmem_if[i]) + .global_out_if(lsu_dcache_if[i]), + .local_out_if (lsu_lmem_if[i]) ); end @@ -65,7 +65,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (LSU_TAG_WIDTH) ) lmem_bus_if[LSU_NUM_REQS](); - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : adapter_slices + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : lmem_adapter_slices VX_mem_bus_if #( .DATA_SIZE (LSU_WORD_SIZE), .TAG_WIDTH (LSU_TAG_WIDTH) @@ -123,15 +123,15 @@ module VX_mem_unit import VX_gpu_pkg::*; #( `endif - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescer_blocks + VX_lsu_mem_if #( + .NUM_LANES (DCACHE_CHANNELS), + .DATA_SIZE (DCACHE_WORD_SIZE), + .TAG_WIDTH (DCACHE_TAG_WIDTH) + ) dcache_coalesced_if[`NUM_LSU_BLOCKS](); - VX_lsu_mem_if #( - .NUM_LANES (DCACHE_CHANNELS), - .DATA_SIZE (DCACHE_WORD_SIZE), - .TAG_WIDTH (DCACHE_TAG_WIDTH) - ) dcache_coalesced_if(); + if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if - if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescer_blocks `RESET_RELAY (mem_coalescer_reset, reset); @@ -168,30 +168,35 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .in_rsp_ready (lsu_dcache_if[i].rsp_ready), // Output request - .out_req_valid (dcache_coalesced_if.req_valid), - .out_req_mask (dcache_coalesced_if.req_data.mask), - .out_req_rw (dcache_coalesced_if.req_data.rw), - .out_req_byteen (dcache_coalesced_if.req_data.byteen), - .out_req_addr (dcache_coalesced_if.req_data.addr), - .out_req_flags (dcache_coalesced_if.req_data.flags), - .out_req_data (dcache_coalesced_if.req_data.data), - .out_req_tag (dcache_coalesced_if.req_data.tag), - .out_req_ready (dcache_coalesced_if.req_ready), + .out_req_valid (dcache_coalesced_if[i].req_valid), + .out_req_mask (dcache_coalesced_if[i].req_data.mask), + .out_req_rw (dcache_coalesced_if[i].req_data.rw), + .out_req_byteen (dcache_coalesced_if[i].req_data.byteen), + .out_req_addr (dcache_coalesced_if[i].req_data.addr), + .out_req_flags (dcache_coalesced_if[i].req_data.flags), + .out_req_data (dcache_coalesced_if[i].req_data.data), + .out_req_tag (dcache_coalesced_if[i].req_data.tag), + .out_req_ready (dcache_coalesced_if[i].req_ready), // Output response - .out_rsp_valid (dcache_coalesced_if.rsp_valid), - .out_rsp_mask (dcache_coalesced_if.rsp_data.mask), - .out_rsp_data (dcache_coalesced_if.rsp_data.data), - .out_rsp_tag (dcache_coalesced_if.rsp_data.tag), - .out_rsp_ready (dcache_coalesced_if.rsp_ready) + .out_rsp_valid (dcache_coalesced_if[i].rsp_valid), + .out_rsp_mask (dcache_coalesced_if[i].rsp_data.mask), + .out_rsp_data (dcache_coalesced_if[i].rsp_data.data), + .out_rsp_tag (dcache_coalesced_if[i].rsp_data.tag), + .out_rsp_ready (dcache_coalesced_if[i].rsp_ready) ); - - end else begin - - `ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if, lsu_dcache_if[i]); - end + end else begin + + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + `ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if[i], lsu_dcache_if[i]); + end + + end + + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : dcache_adapter_slices + VX_mem_bus_if #( .DATA_SIZE (DCACHE_WORD_SIZE), .TAG_WIDTH (DCACHE_TAG_WIDTH) @@ -208,7 +213,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( ) dcache_adapter ( .clk (clk), .reset (reset), - .lsu_mem_if (dcache_coalesced_if), + .lsu_mem_if (dcache_coalesced_if[i]), .mem_bus_if (dcache_bus_tmp_if) ); From 9797c6c48aee4496498252426a1e79a02d8ac127 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 21 Aug 2024 03:38:15 -0700 Subject: [PATCH 148/488] minor udpate --- hw/rtl/libs/VX_cyclic_arbiter.sv | 12 +++++++----- hw/rtl/libs/VX_priority_encoder.sv | 3 +-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index dc4de1300..0b8fcedfe 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -40,17 +40,17 @@ module VX_cyclic_arbiter #( localparam IS_POW2 = (1 << LOG_NUM_REQS) == NUM_REQS; - wire [LOG_NUM_REQS-1:0] grant_index_um; + wire [LOG_NUM_REQS-1:0] grant_index_um, grant_index_ql; reg [LOG_NUM_REQS-1:0] grant_index_r; always @(posedge clk) begin if (reset) begin grant_index_r <= '0; end else if (grant_valid && grant_ready) begin - if (!IS_POW2 && grant_index == LOG_NUM_REQS'(NUM_REQS-1)) begin + if (!IS_POW2 && grant_index_ql == LOG_NUM_REQS'(NUM_REQS-1)) begin grant_index_r <= '0; end else begin - grant_index_r <= grant_index + LOG_NUM_REQS'(1); + grant_index_r <= grant_index_ql + LOG_NUM_REQS'(1); end end end @@ -64,8 +64,10 @@ module VX_cyclic_arbiter #( .valid_out (grant_valid) ); - assign grant_index = requests[grant_index_r] ? grant_index_r : grant_index_um; - assign grant_onehot = NUM_REQS'(1) << grant_index; + assign grant_index_ql = requests[grant_index_r] ? grant_index_r : grant_index_um; + + assign grant_index = grant_index_ql; + assign grant_onehot = NUM_REQS'(1) << grant_index_ql; end diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index 1d34f0e51..f96a07bb7 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -65,11 +65,10 @@ module VX_priority_encoder #( ) lzc ( .data_in (reversed), .data_out (index_out), - `UNUSED_PIN (valid_out) + .valid_out(valid_out) ); assign onehot_out = scan_lo & {(~scan_lo[N-2:0]), 1'b1}; - assign valid_out = scan_lo[N-1]; end else if (MODEL == 2) begin From 177f0efc597850e229ffe0f3ca9c120463a27770 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 21 Aug 2024 03:39:09 -0700 Subject: [PATCH 149/488] minor update --- hw/rtl/core/VX_operands.sv | 81 +++++++++++++++++++------------------- hw/rtl/mem/VX_local_mem.sv | 13 +++--- 2 files changed, 48 insertions(+), 46 deletions(-) diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 1b9c6f010..f47b4964f 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -59,18 +59,17 @@ module VX_operands import VX_gpu_pkg::*; #( wire [NUM_SRC_OPDS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx; wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready; - wire [NUM_BANKS-1:0] gpr_rd_valid_st1; + wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2; wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr, gpr_rd_addr_st1; - wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data; - wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1; + wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st2; + wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2; - wire pipe_valid_st1, pipe_ready_st1; + wire pipe_valid_st1, pipe_ready_st1, pipe_in_ready; wire pipe_valid_st2, pipe_ready_st2; wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2; - reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st1; - wire [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st2; - wire [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2, src_data_m_st2; + reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_m_st2; + wire [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2; reg [NUM_SRC_OPDS-1:0] data_fetched_n; wire [NUM_SRC_OPDS-1:0] data_fetched_st1; @@ -123,15 +122,8 @@ module VX_operands import VX_gpu_pkg::*; #( .ready_out (gpr_rd_ready) ); - wire pipe_in_ready = pipe_ready_st1 || ~pipe_valid_st1; - assign gpr_rd_ready = {NUM_BANKS{pipe_in_ready}}; - assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n; - - wire pipe_fire_st1 = pipe_valid_st1 && pipe_ready_st1; - wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2; - always @(*) begin has_collision_n = 0; for (integer i = 0; i < NUM_SRC_OPDS; ++i) begin @@ -164,47 +156,54 @@ module VX_operands import VX_gpu_pkg::*; #( scoreboard_if.data.uuid }; - VX_pipe_register #( - .DATAW (1 + NUM_SRC_OPDS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)), - .RESETW (1 + NUM_SRC_OPDS) + assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n; + + wire pipe_fire_st1 = pipe_valid_st1 && pipe_ready_st1; + wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2; + + VX_pipe_buffer #( + .DATAW (NUM_SRC_OPDS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)), + .RESETW (NUM_SRC_OPDS) ) pipe_reg1 ( .clk (clk), .reset (reset), - .enable (pipe_in_ready), - .data_in ({scoreboard_if.valid, data_fetched_n, gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}), - .data_out ({pipe_valid_st1, data_fetched_st1, gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1}) + .valid_in (scoreboard_if.valid), + .ready_in (pipe_in_ready), + .data_in ({data_fetched_n, gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}), + .data_out ({data_fetched_st1, gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1}), + .valid_out(pipe_valid_st1), + .ready_out(pipe_ready_st1) ); - assign pipe_ready_st1 = pipe_ready_st2 || ~pipe_valid_st2; - - always @(*) begin - gpr_rd_data_st1 = '0; - for (integer b = 0; b < NUM_BANKS; ++b) begin - if (gpr_rd_valid_st1[b]) begin - gpr_rd_data_st1[gpr_rd_req_idx_st1[b]] = gpr_rd_data[b]; - end - end - end - - assign src_data_m_st2 = src_data_st2 | gpr_rd_data_st2; - assign src_data_st1 = pipe_fire_st2 ? '0 : src_data_m_st2; wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1; `RESET_RELAY (pipe2_reset, reset); // needed for pipe_reg2's wide RESETW - VX_pipe_register #( - .DATAW (1 + NUM_SRC_OPDS * REGS_DATAW + NUM_SRC_OPDS * REGS_DATAW + META_DATAW), - .RESETW (1 + NUM_SRC_OPDS * REGS_DATAW) + VX_pipe_buffer #( + .DATAW (NUM_SRC_OPDS * REGS_DATAW + NUM_BANKS + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH), + .RESETW (NUM_SRC_OPDS * REGS_DATAW) ) pipe_reg2 ( .clk (clk), .reset (pipe2_reset), - .enable (pipe_ready_st1), - .data_in ({pipe_valid2_st1, src_data_st1, gpr_rd_data_st1, pipe_data_st1}), - .data_out ({pipe_valid_st2, src_data_st2, gpr_rd_data_st2, pipe_data_st2}) + .valid_in (pipe_valid2_st1), + .ready_in (pipe_ready_st1), + .data_in ({src_data_st1, gpr_rd_valid_st1, pipe_data_st1, gpr_rd_req_idx_st1}), + .data_out ({src_data_st2, gpr_rd_valid_st2, pipe_data_st2, gpr_rd_req_idx_st2}), + .valid_out(pipe_valid_st2), + .ready_out(pipe_ready_st2) ); + always @(*) begin + src_data_m_st2 = src_data_st2; + for (integer b = 0; b < NUM_BANKS; ++b) begin + if (gpr_rd_valid_st2[b]) begin + src_data_m_st2[gpr_rd_req_idx_st2[b]] = gpr_rd_data_st2[b]; + end + end + end + VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -264,6 +263,8 @@ module VX_operands import VX_gpu_pkg::*; #( VX_dp_ram #( .DATAW (REGS_DATAW), .SIZE (PER_BANK_REGS * PER_ISSUE_WARPS), + .READ_ENABLE (1), + .OUT_REG (1), .WRENW (BYTEENW), `ifdef GPR_RESET .RESET_RAM (1), @@ -278,7 +279,7 @@ module VX_operands import VX_gpu_pkg::*; #( .waddr (gpr_wr_addr), .wdata (writeback_if.data.data), .raddr (gpr_rd_addr_st1[b]), - .rdata (gpr_rd_data[b]) + .rdata (gpr_rd_data_st2[b]) ); end diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index aff058cb9..72e55fe8b 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -163,12 +163,13 @@ module VX_local_mem import VX_gpu_pkg::*; #( for (genvar i = 0; i < NUM_BANKS; ++i) begin wire bank_rsp_valid, bank_rsp_ready; - wire [WORD_WIDTH-1:0] bank_rsp_data; VX_sp_ram #( .DATAW (WORD_WIDTH), .SIZE (WORDS_PER_BANK), .WRENW (WORD_SIZE), + .READ_ENABLE (1), + .OUT_REG (1), .NO_RWCHECK (1) ) data_store ( .clk (clk), @@ -178,7 +179,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .wren (per_bank_req_byteen[i]), .addr (per_bank_req_addr[i]), .wdata (per_bank_req_data[i]), - .rdata (bank_rsp_data) + .rdata (per_bank_rsp_data[i]) ); // read-during-write hazard detection @@ -194,20 +195,20 @@ module VX_local_mem import VX_gpu_pkg::*; #( end wire is_rdw_hazard = last_wr_valid && ~per_bank_req_rw[i] && (per_bank_req_addr[i] == last_wr_addr); - // drop write response and stall on read-during-write hazard + // drop write response assign bank_rsp_valid = per_bank_req_valid[i] && ~per_bank_req_rw[i] && ~is_rdw_hazard; assign per_bank_req_ready[i] = (bank_rsp_ready || per_bank_req_rw[i]) && ~is_rdw_hazard; // register BRAM output VX_pipe_buffer #( - .DATAW (REQ_SEL_WIDTH + WORD_WIDTH + TAG_WIDTH) + .DATAW (REQ_SEL_WIDTH + TAG_WIDTH) ) bram_buf ( .clk (clk), .reset (reset), .valid_in (bank_rsp_valid), .ready_in (bank_rsp_ready), - .data_in ({per_bank_req_idx[i], bank_rsp_data, per_bank_req_tag[i]}), - .data_out ({per_bank_rsp_idx[i], per_bank_rsp_data[i], per_bank_rsp_tag[i]}), + .data_in ({per_bank_req_idx[i], per_bank_req_tag[i]}), + .data_out ({per_bank_rsp_idx[i], per_bank_rsp_tag[i]}), .valid_out (per_bank_rsp_valid[i]), .ready_out (per_bank_rsp_ready[i]) ); From 811ceb5dc086fc0041dd13ec7a0afcc37550810e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 21 Aug 2024 13:00:05 -0700 Subject: [PATCH 150/488] minor update --- hw/rtl/libs/VX_priority_encoder.sv | 42 +++++++++++++++--------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index f96a07bb7..2138ea457 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -49,6 +49,27 @@ module VX_priority_encoder #( end else if (MODEL == 1) begin + `IGNORE_UNOPTFLAT_BEGIN + wire [N-1:0] higher_pri_regs; + `IGNORE_UNOPTFLAT_END + + assign higher_pri_regs[0] = 1'b0; + for (genvar i = 1; i < N; ++i) begin + assign higher_pri_regs[i] = higher_pri_regs[i-1] | reversed[i-1]; + end + assign onehot_out[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0]; + + VX_lzc #( + .N (N), + .REVERSE (1) + ) lzc ( + .data_in (reversed), + .data_out (index_out), + .valid_out (valid_out) + ); + + end else if (MODEL == 2) begin + wire [N-1:0] scan_lo; VX_scan #( @@ -70,27 +91,6 @@ module VX_priority_encoder #( assign onehot_out = scan_lo & {(~scan_lo[N-2:0]), 1'b1}; - end else if (MODEL == 2) begin - - `IGNORE_UNOPTFLAT_BEGIN - wire [N-1:0] higher_pri_regs; - `IGNORE_UNOPTFLAT_END - - assign higher_pri_regs[0] = 1'b0; - for (genvar i = 1; i < N; ++i) begin - assign higher_pri_regs[i] = higher_pri_regs[i-1] | reversed[i-1]; - end - assign onehot_out[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0]; - - VX_lzc #( - .N (N), - .REVERSE (1) - ) lzc ( - .data_in (reversed), - .data_out (index_out), - .valid_out (valid_out) - ); - end else if (MODEL == 3) begin assign onehot_out = reversed & -reversed; From ca3499f3dfe3864c8cdc13119989b4f085565cab Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 21 Aug 2024 17:54:30 -0700 Subject: [PATCH 151/488] minor update --- hw/syn/altera/{quartus => dut}/Makefile | 0 hw/syn/altera/{quartus => dut}/cache/Makefile | 0 hw/syn/altera/{quartus => dut}/common.mk | 0 hw/syn/altera/{quartus => dut}/core/Makefile | 0 hw/syn/altera/{quartus => dut}/fpu/Makefile | 0 hw/syn/altera/{quartus => dut}/issue/Makefile | 0 hw/syn/altera/{quartus => dut}/lmem/Makefile | 0 hw/syn/altera/{quartus => dut}/mem_unit/Makefile | 0 hw/syn/altera/{quartus => dut}/project.sdc | 0 hw/syn/altera/{quartus => dut}/project.tcl | 0 hw/syn/altera/{quartus => dut}/test/Makefile | 0 hw/syn/altera/{quartus => dut}/timing-html.tcl | 0 hw/syn/altera/{quartus => dut}/top/Makefile | 0 hw/syn/altera/{quartus => dut}/unittest/Makefile | 0 hw/syn/altera/{quartus => dut}/vortex/Makefile | 0 hw/syn/xilinx/{test => sandbox}/Makefile | 0 hw/syn/xilinx/{test => sandbox}/project.tcl.in | 0 hw/syn/xilinx/{test => sandbox}/project_1_files/Vortex_top.v | 0 hw/syn/xilinx/{test => sandbox}/project_1_files/Vortex_wrap.sv | 0 hw/syn/xilinx/{test => sandbox}/project_1_files/kernel.bin.coe | 0 hw/syn/xilinx/{test => sandbox}/project_1_files/testbench.v | 0 21 files changed, 0 insertions(+), 0 deletions(-) rename hw/syn/altera/{quartus => dut}/Makefile (100%) rename hw/syn/altera/{quartus => dut}/cache/Makefile (100%) rename hw/syn/altera/{quartus => dut}/common.mk (100%) rename hw/syn/altera/{quartus => dut}/core/Makefile (100%) rename hw/syn/altera/{quartus => dut}/fpu/Makefile (100%) rename hw/syn/altera/{quartus => dut}/issue/Makefile (100%) rename hw/syn/altera/{quartus => dut}/lmem/Makefile (100%) rename hw/syn/altera/{quartus => dut}/mem_unit/Makefile (100%) rename hw/syn/altera/{quartus => dut}/project.sdc (100%) rename hw/syn/altera/{quartus => dut}/project.tcl (100%) rename hw/syn/altera/{quartus => dut}/test/Makefile (100%) rename hw/syn/altera/{quartus => dut}/timing-html.tcl (100%) rename hw/syn/altera/{quartus => dut}/top/Makefile (100%) rename hw/syn/altera/{quartus => dut}/unittest/Makefile (100%) rename hw/syn/altera/{quartus => dut}/vortex/Makefile (100%) rename hw/syn/xilinx/{test => sandbox}/Makefile (100%) rename hw/syn/xilinx/{test => sandbox}/project.tcl.in (100%) rename hw/syn/xilinx/{test => sandbox}/project_1_files/Vortex_top.v (100%) rename hw/syn/xilinx/{test => sandbox}/project_1_files/Vortex_wrap.sv (100%) rename hw/syn/xilinx/{test => sandbox}/project_1_files/kernel.bin.coe (100%) rename hw/syn/xilinx/{test => sandbox}/project_1_files/testbench.v (100%) diff --git a/hw/syn/altera/quartus/Makefile b/hw/syn/altera/dut/Makefile similarity index 100% rename from hw/syn/altera/quartus/Makefile rename to hw/syn/altera/dut/Makefile diff --git a/hw/syn/altera/quartus/cache/Makefile b/hw/syn/altera/dut/cache/Makefile similarity index 100% rename from hw/syn/altera/quartus/cache/Makefile rename to hw/syn/altera/dut/cache/Makefile diff --git a/hw/syn/altera/quartus/common.mk b/hw/syn/altera/dut/common.mk similarity index 100% rename from hw/syn/altera/quartus/common.mk rename to hw/syn/altera/dut/common.mk diff --git a/hw/syn/altera/quartus/core/Makefile b/hw/syn/altera/dut/core/Makefile similarity index 100% rename from hw/syn/altera/quartus/core/Makefile rename to hw/syn/altera/dut/core/Makefile diff --git a/hw/syn/altera/quartus/fpu/Makefile b/hw/syn/altera/dut/fpu/Makefile similarity index 100% rename from hw/syn/altera/quartus/fpu/Makefile rename to hw/syn/altera/dut/fpu/Makefile diff --git a/hw/syn/altera/quartus/issue/Makefile b/hw/syn/altera/dut/issue/Makefile similarity index 100% rename from hw/syn/altera/quartus/issue/Makefile rename to hw/syn/altera/dut/issue/Makefile diff --git a/hw/syn/altera/quartus/lmem/Makefile b/hw/syn/altera/dut/lmem/Makefile similarity index 100% rename from hw/syn/altera/quartus/lmem/Makefile rename to hw/syn/altera/dut/lmem/Makefile diff --git a/hw/syn/altera/quartus/mem_unit/Makefile b/hw/syn/altera/dut/mem_unit/Makefile similarity index 100% rename from hw/syn/altera/quartus/mem_unit/Makefile rename to hw/syn/altera/dut/mem_unit/Makefile diff --git a/hw/syn/altera/quartus/project.sdc b/hw/syn/altera/dut/project.sdc similarity index 100% rename from hw/syn/altera/quartus/project.sdc rename to hw/syn/altera/dut/project.sdc diff --git a/hw/syn/altera/quartus/project.tcl b/hw/syn/altera/dut/project.tcl similarity index 100% rename from hw/syn/altera/quartus/project.tcl rename to hw/syn/altera/dut/project.tcl diff --git a/hw/syn/altera/quartus/test/Makefile b/hw/syn/altera/dut/test/Makefile similarity index 100% rename from hw/syn/altera/quartus/test/Makefile rename to hw/syn/altera/dut/test/Makefile diff --git a/hw/syn/altera/quartus/timing-html.tcl b/hw/syn/altera/dut/timing-html.tcl similarity index 100% rename from hw/syn/altera/quartus/timing-html.tcl rename to hw/syn/altera/dut/timing-html.tcl diff --git a/hw/syn/altera/quartus/top/Makefile b/hw/syn/altera/dut/top/Makefile similarity index 100% rename from hw/syn/altera/quartus/top/Makefile rename to hw/syn/altera/dut/top/Makefile diff --git a/hw/syn/altera/quartus/unittest/Makefile b/hw/syn/altera/dut/unittest/Makefile similarity index 100% rename from hw/syn/altera/quartus/unittest/Makefile rename to hw/syn/altera/dut/unittest/Makefile diff --git a/hw/syn/altera/quartus/vortex/Makefile b/hw/syn/altera/dut/vortex/Makefile similarity index 100% rename from hw/syn/altera/quartus/vortex/Makefile rename to hw/syn/altera/dut/vortex/Makefile diff --git a/hw/syn/xilinx/test/Makefile b/hw/syn/xilinx/sandbox/Makefile similarity index 100% rename from hw/syn/xilinx/test/Makefile rename to hw/syn/xilinx/sandbox/Makefile diff --git a/hw/syn/xilinx/test/project.tcl.in b/hw/syn/xilinx/sandbox/project.tcl.in similarity index 100% rename from hw/syn/xilinx/test/project.tcl.in rename to hw/syn/xilinx/sandbox/project.tcl.in diff --git a/hw/syn/xilinx/test/project_1_files/Vortex_top.v b/hw/syn/xilinx/sandbox/project_1_files/Vortex_top.v similarity index 100% rename from hw/syn/xilinx/test/project_1_files/Vortex_top.v rename to hw/syn/xilinx/sandbox/project_1_files/Vortex_top.v diff --git a/hw/syn/xilinx/test/project_1_files/Vortex_wrap.sv b/hw/syn/xilinx/sandbox/project_1_files/Vortex_wrap.sv similarity index 100% rename from hw/syn/xilinx/test/project_1_files/Vortex_wrap.sv rename to hw/syn/xilinx/sandbox/project_1_files/Vortex_wrap.sv diff --git a/hw/syn/xilinx/test/project_1_files/kernel.bin.coe b/hw/syn/xilinx/sandbox/project_1_files/kernel.bin.coe similarity index 100% rename from hw/syn/xilinx/test/project_1_files/kernel.bin.coe rename to hw/syn/xilinx/sandbox/project_1_files/kernel.bin.coe diff --git a/hw/syn/xilinx/test/project_1_files/testbench.v b/hw/syn/xilinx/sandbox/project_1_files/testbench.v similarity index 100% rename from hw/syn/xilinx/test/project_1_files/testbench.v rename to hw/syn/xilinx/sandbox/project_1_files/testbench.v From e4bfa47895964cbbcb50bb7f9bae9239f0877594 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 22 Aug 2024 02:51:17 -0700 Subject: [PATCH 152/488] adding test coverage for xilinx synthesis --- hw/scripts/bin2coe.py | 119 +- hw/syn/altera/dut/Makefile | 9 +- hw/syn/altera/dut/common.mk | 3 +- hw/syn/xilinx/dut/Makefile | 63 + hw/syn/xilinx/dut/cache/Makefile | 7 + hw/syn/xilinx/dut/common.mk | 37 + hw/syn/xilinx/dut/core/Makefile | 14 + hw/syn/xilinx/dut/fpu/Makefile | 11 + hw/syn/xilinx/dut/issue/Makefile | 14 + hw/syn/xilinx/dut/lmem/Makefile | 7 + hw/syn/xilinx/dut/mem_unit/Makefile | 7 + hw/syn/xilinx/dut/project.tcl | 82 + hw/syn/xilinx/dut/project.xdc | 1 + hw/syn/xilinx/dut/top/Makefile | 32 + .../dut/test => xilinx/dut/unittest}/Makefile | 4 +- hw/syn/xilinx/dut/vortex/Makefile | 16 + hw/syn/xilinx/sandbox/Makefile | 24 +- .../{project_1_files => }/Vortex_top.v | 0 .../{project_1_files => }/Vortex_wrap.sv | 0 hw/syn/xilinx/sandbox/project.tcl.in | 1899 +- .../sandbox/project_1_files/kernel.bin.coe | 16386 ---------------- .../sandbox/{project_1_files => }/testbench.v | 0 22 files changed, 415 insertions(+), 18320 deletions(-) create mode 100644 hw/syn/xilinx/dut/Makefile create mode 100644 hw/syn/xilinx/dut/cache/Makefile create mode 100644 hw/syn/xilinx/dut/common.mk create mode 100644 hw/syn/xilinx/dut/core/Makefile create mode 100644 hw/syn/xilinx/dut/fpu/Makefile create mode 100644 hw/syn/xilinx/dut/issue/Makefile create mode 100644 hw/syn/xilinx/dut/lmem/Makefile create mode 100644 hw/syn/xilinx/dut/mem_unit/Makefile create mode 100644 hw/syn/xilinx/dut/project.tcl create mode 100644 hw/syn/xilinx/dut/project.xdc create mode 100644 hw/syn/xilinx/dut/top/Makefile rename hw/syn/{altera/dut/test => xilinx/dut/unittest}/Makefile (79%) create mode 100644 hw/syn/xilinx/dut/vortex/Makefile rename hw/syn/xilinx/sandbox/{project_1_files => }/Vortex_top.v (100%) rename hw/syn/xilinx/sandbox/{project_1_files => }/Vortex_wrap.sv (100%) delete mode 100644 hw/syn/xilinx/sandbox/project_1_files/kernel.bin.coe rename hw/syn/xilinx/sandbox/{project_1_files => }/testbench.v (100%) diff --git a/hw/scripts/bin2coe.py b/hw/scripts/bin2coe.py index 95b3bcbeb..eaaa3619e 100755 --- a/hw/scripts/bin2coe.py +++ b/hw/scripts/bin2coe.py @@ -14,78 +14,83 @@ # limitations under the License. import argparse +import os -g_memory = {} +def parse_binfile_option(option): + addr, path = option.split(':') + return int(addr, 0), path -def hex2bin(ch): - return int(ch, 16) if ch.isdigit() or ch in 'abcdefABCDEF' else 0 +def parse_value_option(option): + addr, value = option.split(':') + return int(addr, 0), value -def process_binary(binfname, wordsize, binaddr): - with open(binfname, 'rb') as f: - buffer = list(f.read()) - g_memory[binaddr] = buffer - return (len(buffer) + wordsize - 1) // wordsize +def load_binary_data(addr, path, word_size, memory, little_endian): + with open(path, 'rb') as f: + binary_data = f.read() -def process_data(datfname, wordsize): - offset, buffer = 0, [] - with open(datfname, 'r') as f: - for line in f: - line = line.strip() - if line.startswith("#"): - continue - if line.startswith("@"): - if buffer: - g_memory[offset] = buffer - offset = int(line[1:], 16) - buffer = [] - else: - for i in range(0, len(line), 2): - byte = hex2bin(line[i]) << 4 | hex2bin(line[i+1]) - buffer.append(byte) - if len(buffer) % wordsize: - buffer.extend([0] * (wordsize - len(buffer) % wordsize)) - offset += 1 - if buffer: - g_memory[offset] = buffer - return offset + word_count = len(binary_data) // word_size + if len(binary_data) % word_size != 0: + word_count += 1 -def write_coe(outfname, wordsize, depth, defval): - with open(outfname, 'w') as f: - f.write("MEMORY_INITIALIZATION_RADIX=16;\nMEMORY_INITIALIZATION_VECTOR=\n") - i = 0 - for addr in sorted(g_memory): - while i < addr: - f.write(f"{defval},\n") - i += 1 - data = g_memory[addr] - for j in range(0, len(data), wordsize): - f.write(",".join([f"{byte:02x}" for byte in data[j:j+wordsize][::-1]]) + ",\n") - i += 1 - while i < depth: - f.write(f"{defval},\n") - i += 1 - f.seek(f.tell() - 2, 0) # Remove the last comma - f.write(";\n") + for i in range(word_count): + word_data = binary_data[i * word_size: (i + 1) * word_size] + if little_endian: + word_data = word_data[::-1] # Reverse the byte order for little-endian + hex_value = word_data.hex().zfill(word_size * 2) + memory[addr + i] = hex_value + +def add_value_data(addr, value, memory, word_size): + value = value.zfill(word_size * 2) + memory[addr] = value + +def binary_to_coe(output_file, word_size, depth, default_value, memory): + if depth == 0: + depth = max(memory.keys()) + 1 + + with open(output_file, 'w') as coe_file: + coe_file.write("; This file was generated from binary blobs and/or values\n") + coe_file.write("memory_initialization_radix=16;\n") + coe_file.write("memory_initialization_vector=\n") + + for addr in range(depth): + hex_value = memory.get(addr, default_value) + coe_file.write(f"{hex_value},\n") + + coe_file.seek(coe_file.tell() - 2) + coe_file.write(";\n") def main(): - parser = argparse.ArgumentParser(description="Binary to Xilinx COE File Converter") - parser.add_argument("--binary", help="Input binary file.") - parser.add_argument("--data", help="Input data file.") + parser = argparse.ArgumentParser(description="Convert binaries and values to a Xilinx COE file.") + parser.add_argument("--binfile", action='append', help="Binary file with starting address in the format :") + parser.add_argument("--value", action='append', help="Hex value with starting address in the format :") parser.add_argument("--out", default="output.coe", help="Output file (optional).") parser.add_argument("--wordsize", type=int, default=4, help="Word size in bytes (default 4).") parser.add_argument("--depth", type=int, default=0, help="Address size (optional).") - parser.add_argument("--binaddr", type=int, default=0, help="Binary address (optional).") parser.add_argument("--default", default="00", help="Default hex value as string (optional).") + parser.add_argument("--little_endian", action='store_true', help="Interpret binary files as little-endian (default is big-endian).") args = parser.parse_args() - depth = max( - process_binary(args.binary, args.wordsize, args.binaddr) if args.binary else 0, - process_data(args.data, args.wordsize) if args.data else 0, - args.depth - ) + if args.binfile is None and args.value is None: + raise ValueError("At least one --binfile or --value must be provided.") - write_coe(args.out, args.wordsize, depth, args.default) + # Initialize memory dictionary + memory = {} + + # Process binary files + if args.binfile: + for option in args.binfile: + addr, path = parse_binfile_option(option) + load_binary_data(addr, path, args.wordsize, memory, args.little_endian) + + # Process individual values + if args.value: + for option in args.value: + addr, value = parse_value_option(option) + add_value_data(addr, value, memory, args.wordsize) + + # Generate the COE file + binary_to_coe(args.out, args.wordsize, args.depth, args.default.zfill(args.wordsize * 2), memory) if __name__ == "__main__": main() diff --git a/hw/syn/altera/dut/Makefile b/hw/syn/altera/dut/Makefile index f8993bf87..924b7602b 100644 --- a/hw/syn/altera/dut/Makefile +++ b/hw/syn/altera/dut/Makefile @@ -9,7 +9,7 @@ SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY) -.PHONY: dogfood unittest pipeline mem_unit lmem cache fpu core issue vortex top test +.PHONY: dogfood unittest pipeline mem_unit lmem cache fpu core issue vortex top ip-gen: $(IP_CACHE_DIR)/ip_gen.log $(IP_CACHE_DIR)/ip_gen.log: @@ -68,9 +68,4 @@ vortex: ip-gen top: ip-gen mkdir -p top/$(BUILD_DIR) cp top/Makefile top/$(BUILD_DIR) - $(MAKE) -C top/$(BUILD_DIR) clean && $(MAKE) -C top/$(BUILD_DIR) > top/$(BUILD_DIR)/build.log 2>&1 & - -test: ip-gen - mkdir -p test/$(BUILD_DIR) - cp test/Makefile test/$(BUILD_DIR) - $(MAKE) -C test/$(BUILD_DIR) clean && $(MAKE) -C test/$(BUILD_DIR) > test/$(BUILD_DIR)/build.log 2>&1 & + $(MAKE) -C top/$(BUILD_DIR) clean && $(MAKE) -C top/$(BUILD_DIR) > top/$(BUILD_DIR)/build.log 2>&1 & \ No newline at end of file diff --git a/hw/syn/altera/dut/common.mk b/hw/syn/altera/dut/common.mk index 3890dcfe8..1adcb3d49 100644 --- a/hw/syn/altera/dut/common.mk +++ b/hw/syn/altera/dut/common.mk @@ -1,7 +1,7 @@ ROOT_DIR := $(realpath ../../../../../..) include $(ROOT_DIR)/config.mk -SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/quartus +SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/dut RTL_DIR := $(VORTEX_HOME)/hw/rtl AFU_DIR := $(RTL_DIR)/afu/opae @@ -21,7 +21,6 @@ endif CONFIGS += -DNDEBUG CONFIGS += -DQUARTUS CONFIGS += -DSYNTHESIS -CONFIGS += -DNOGLOBALS PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf diff --git a/hw/syn/xilinx/dut/Makefile b/hw/syn/xilinx/dut/Makefile new file mode 100644 index 000000000..b8f67b8a5 --- /dev/null +++ b/hw/syn/xilinx/dut/Makefile @@ -0,0 +1,63 @@ +ROOT_DIR := $(realpath ../../../..) +include $(ROOT_DIR)/config.mk + +PREFIX ?= build + +BUILD_DIR := $(PREFIX) + +.PHONY: dogfood unittest pipeline mem_unit lmem cache fpu core issue vortex top + +dogfood: + mkdir -p dogfood/$(BUILD_DIR) + cp dogfood/Makefile dogfood/$(BUILD_DIR) + $(MAKE) -C dogfood/$(BUILD_DIR) clean && $(MAKE) -C dogfood/$(BUILD_DIR) > dogfood/$(BUILD_DIR)/build.log 2>&1 & + +unittest: + mkdir -p unittest/$(BUILD_DIR) + cp unittest/Makefile unittest/$(BUILD_DIR) + $(MAKE) -C unittest/$(BUILD_DIR) clean && $(MAKE) -C unittest/$(BUILD_DIR) > unittest/$(BUILD_DIR)/build.log 2>&1 & + +pipeline: + mkdir -p pipeline/$(BUILD_DIR) + cp pipeline/Makefile pipeline/$(BUILD_DIR) + $(MAKE) -C pipeline/$(BUILD_DIR) clean && $(MAKE) -C pipeline/$(BUILD_DIR) > pipeline/$(BUILD_DIR)/build.log 2>&1 & + +mem_unit: + mkdir -p mem_unit/$(BUILD_DIR) + cp mem_unit/Makefile mem_unit/$(BUILD_DIR) + $(MAKE) -C mem_unit/$(BUILD_DIR) clean && $(MAKE) -C mem_unit/$(BUILD_DIR) > mem_unit/$(BUILD_DIR)/build.log 2>&1 & + +lmem: + mkdir -p lmem/$(BUILD_DIR) + cp lmem/Makefile lmem/$(BUILD_DIR) + $(MAKE) -C lmem/$(BUILD_DIR) clean && $(MAKE) -C lmem/$(BUILD_DIR) > lmem/$(BUILD_DIR)/build.log 2>&1 & + +cache: + mkdir -p cache/$(BUILD_DIR) + cp cache/Makefile cache/$(BUILD_DIR) + $(MAKE) -C cache/$(BUILD_DIR) clean && $(MAKE) -C cache/$(BUILD_DIR) > cache/$(BUILD_DIR)/build.log 2>&1 & + +fpu: + mkdir -p fpu/$(BUILD_DIR) + cp fpu/Makefile fpu/$(BUILD_DIR) + $(MAKE) -C fpu/$(BUILD_DIR) clean && $(MAKE) -C fpu/$(BUILD_DIR) > fpu/$(BUILD_DIR)/build.log 2>&1 & + +core: + mkdir -p core/$(BUILD_DIR) + cp core/Makefile core/$(BUILD_DIR) + $(MAKE) -C core/$(BUILD_DIR) clean && $(MAKE) -C core/$(BUILD_DIR) > core/$(BUILD_DIR)/build.log 2>&1 & + +issue: + mkdir -p issue/$(BUILD_DIR) + cp issue/Makefile issue/$(BUILD_DIR) + $(MAKE) -C issue/$(BUILD_DIR) clean && $(MAKE) -C issue/$(BUILD_DIR) > issue/$(BUILD_DIR)/build.log 2>&1 & + +vortex: + mkdir -p vortex/$(BUILD_DIR) + cp vortex/Makefile vortex/$(BUILD_DIR) + $(MAKE) -C vortex/$(BUILD_DIR) clean && $(MAKE) -C vortex/$(BUILD_DIR) > vortex/$(BUILD_DIR)/build.log 2>&1 & + +top: + mkdir -p top/$(BUILD_DIR) + cp top/Makefile top/$(BUILD_DIR) + $(MAKE) -C top/$(BUILD_DIR) clean && $(MAKE) -C top/$(BUILD_DIR) > top/$(BUILD_DIR)/build.log 2>&1 & \ No newline at end of file diff --git a/hw/syn/xilinx/dut/cache/Makefile b/hw/syn/xilinx/dut/cache/Makefile new file mode 100644 index 000000000..f96a76142 --- /dev/null +++ b/hw/syn/xilinx/dut/cache/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_cache_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache diff --git a/hw/syn/xilinx/dut/common.mk b/hw/syn/xilinx/dut/common.mk new file mode 100644 index 000000000..b435b1409 --- /dev/null +++ b/hw/syn/xilinx/dut/common.mk @@ -0,0 +1,37 @@ +ROOT_DIR := $(realpath ../../../../../..) +include $(ROOT_DIR)/config.mk + +DEVICE ?= xcu55c-fsvh2892-2L-e + +VIVADO := $(XILINX_VIVADO)/bin/vivado + +SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/dut + +RTL_DIR := $(VORTEX_HOME)/hw/rtl +AFU_DIR := $(RTL_DIR)/afu/xrt +SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts + +CONFIGS += -DNDEBUG +CONFIGS += -DVIVADO +CONFIGS += -DSYNTHESIS + +# Build targets +all: $(PROJECT).xpr + +gen-sources: project_1/sources.txt +project_1/sources.txt: + mkdir -p project_1 + $(SCRIPT_DIR)/gen_sources.sh $(CONFIGS) $(RTL_INCLUDE) -T$(TOP_LEVEL_ENTITY) -P -Cproject_1/src -Oproject_1/sources.txt + +build: $(PROJECT).xpr +$(PROJECT).xpr: project_1/sources.txt + $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) + +clean: + rm -rf project_1 + rm -rf .Xil + rm -f *.rpt + rm -f vivado*.log + rm -f vivado*.jou + +.PHONY: all gen-sources build clean \ No newline at end of file diff --git a/hw/syn/xilinx/dut/core/Makefile b/hw/syn/xilinx/dut/core/Makefile new file mode 100644 index 000000000..eeeaa5233 --- /dev/null +++ b/hw/syn/xilinx/dut/core/Makefile @@ -0,0 +1,14 @@ +PROJECT = VX_core_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +#CONFIGS += -DNUM_WARPS=32 +#CONFIGS += -DNUM_THREADS=32 + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src +endif +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/fpu/Makefile b/hw/syn/xilinx/dut/fpu/Makefile new file mode 100644 index 000000000..b7826dc68 --- /dev/null +++ b/hw/syn/xilinx/dut/fpu/Makefile @@ -0,0 +1,11 @@ +PROJECT = VX_fpu_dsp +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src +endif +RTL_INCLUDE = $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(IP_CACHE_DIR) diff --git a/hw/syn/xilinx/dut/issue/Makefile b/hw/syn/xilinx/dut/issue/Makefile new file mode 100644 index 000000000..c1804a398 --- /dev/null +++ b/hw/syn/xilinx/dut/issue/Makefile @@ -0,0 +1,14 @@ +PROJECT = VX_issue_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +#CONFIGS += -DNUM_WARPS=32 +#CONFIGS += -DNUM_THREADS=32 + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src +endif +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem $(FPU_INCLUDE) -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/lmem/Makefile b/hw/syn/xilinx/dut/lmem/Makefile new file mode 100644 index 000000000..b3ba57c8d --- /dev/null +++ b/hw/syn/xilinx/dut/lmem/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_local_mem_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem diff --git a/hw/syn/xilinx/dut/mem_unit/Makefile b/hw/syn/xilinx/dut/mem_unit/Makefile new file mode 100644 index 000000000..209492265 --- /dev/null +++ b/hw/syn/xilinx/dut/mem_unit/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_mem_unit_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core -I$(RTL_DIR)/fpu diff --git a/hw/syn/xilinx/dut/project.tcl b/hw/syn/xilinx/dut/project.tcl new file mode 100644 index 000000000..bee841d79 --- /dev/null +++ b/hw/syn/xilinx/dut/project.tcl @@ -0,0 +1,82 @@ +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if { $::argc != 5 } { + puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" + puts "Usage: $::argv0 \n" + exit +} + +# Set the project name +set project_name "project_1" + +set top_module [lindex $::argv 0] +set device_part [lindex $::argv 1] +set vcs_file [lindex $::argv 2] +set xdc_file [lindex $::argv 3] +set tool_dir [lindex $::argv 4] + +#puts top_module +#puts $device_part +#puts $vcs_file +#puts xdc_file +#puts $tool_dir + +source "${tool_dir}/parse_vcs_list.tcl" +set vlist [parse_vcs_list "${vcs_file}"] + +set vsources_list [lindex $vlist 0] +set vincludes_list [lindex $vlist 1] +set vdefines_list [lindex $vlist 2] + +#puts $vsources_list +#puts $vincludes_list +#puts $vdefines_list + +# Create project +create_project $project_name $project_name -force -part $device_part + +# Add constrains file +read_xdc $xdc_file + +# Add the design sources +add_files -norecurse -verbose $vsources_list + +# process defines +set obj [current_fileset] +foreach def $vdefines_list { + set_property verilog_define $def $obj +} + +# Synthesis +synth_design -top $top_module -include_dirs $vincludes_list -flatten_hierarchy none +write_checkpoint -force post_synth.dcp +report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages + +# Optimize +opt_design + +# Place +place_design +write_checkpoint -force post_place.dcp +report_place_status -file place.rpt + +# Route +route_design +write_checkpoint -force post_route.dcp +report_route_status -file route.rpt + +# Generate the synthesis report +report_timing -file timing.rpt +report_power -file power.rpt +report_drc -file drc.rpt \ No newline at end of file diff --git a/hw/syn/xilinx/dut/project.xdc b/hw/syn/xilinx/dut/project.xdc new file mode 100644 index 000000000..8c74ebb4a --- /dev/null +++ b/hw/syn/xilinx/dut/project.xdc @@ -0,0 +1 @@ +## empty \ No newline at end of file diff --git a/hw/syn/xilinx/dut/top/Makefile b/hw/syn/xilinx/dut/top/Makefile new file mode 100644 index 000000000..341690206 --- /dev/null +++ b/hw/syn/xilinx/dut/top/Makefile @@ -0,0 +1,32 @@ +PROJECT = vortex_afu +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +# AFU parameters +CONFIGS += -DNOPAE +CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY +ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS))) + CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 +endif +ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 +endif +ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 +endif +ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 +endif + +#CONFIGS += -DNUM_CORES=2 +#CONFIGS += -DNUM_WARPS=32 +#CONFIGS += -DNUM_THREADS=32 +#CONFIGS += -DL2_ENABLE + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src +endif +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(AFU_DIR)/ccip -I$(IP_CACHE_DIR) $(FPU_INCLUDE) diff --git a/hw/syn/altera/dut/test/Makefile b/hw/syn/xilinx/dut/unittest/Makefile similarity index 79% rename from hw/syn/altera/dut/test/Makefile rename to hw/syn/xilinx/dut/unittest/Makefile index 0c4a7ae4e..2bfb18e4e 100644 --- a/hw/syn/altera/dut/test/Makefile +++ b/hw/syn/xilinx/dut/unittest/Makefile @@ -1,4 +1,4 @@ -PROJECT = Vortex +PROJECT = Unittest TOP_LEVEL_ENTITY = $(PROJECT) SRC_FILE = $(PROJECT).sv @@ -8,4 +8,4 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src endif -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/vortex/Makefile b/hw/syn/xilinx/dut/vortex/Makefile new file mode 100644 index 000000000..7429df414 --- /dev/null +++ b/hw/syn/xilinx/dut/vortex/Makefile @@ -0,0 +1,16 @@ +PROJECT = Vortex +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +#CONFIGS += -DNUM_CORES=2 +#CONFIGS += -DNUM_WARPS=32 +#CONFIGS += -DNUM_THREADS=32 +#CONFIGS += -DL2_ENABLE + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src +endif +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/sandbox/Makefile b/hw/syn/xilinx/sandbox/Makefile index 5b6a76da3..bcfd91f9c 100644 --- a/hw/syn/xilinx/sandbox/Makefile +++ b/hw/syn/xilinx/sandbox/Makefile @@ -1,17 +1,22 @@ ROOT_DIR := $(realpath ../../../..) include $(ROOT_DIR)/config.mk +DEVICE ?= xcu55c-fsvh2892-2L-e + VIVADO := $(XILINX_VIVADO)/bin/vivado -SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/test +SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/sandbox RTL_DIR := $(VORTEX_HOME)/hw/rtl DPI_DIR := $(VORTEX_HOME)/hw/dpi -AFU_DIR := $(RTL_DIR)/afu/opae +AFU_DIR := $(RTL_DIR)/afu/xrt SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts KERNEL ?= fibonacci +COE_FILE := $(shell realpath kernel.bin.coe) +ESCAPED_COE_FILE := $(shell echo "$(COE_FILE)" | sed -e 's/[\/&]/\\&/g') + # include paths FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) @@ -19,14 +24,13 @@ ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache RTL_INCLUDE += $(FPU_INCLUDE) -RTL_INCLUDE += -I$(SRC_DIR)/project_1_files +RTL_INCLUDE += -I$(SRC_DIR) # compilation flags CFLAGS += -DNDEBUG -DSYNTHESIS -DVIVADO CFLAGS += $(CONFIGS) CFLAGS += $(RTL_INCLUDE) CFLAGS += -DEXT_F_DISABLE -#CFLAGS += -DNUM_CORES 4 # update memory layout for 2MB RAM CFLAGS += -DSTARTUP_ADDR=32\'h80000 @@ -34,6 +38,9 @@ CFLAGS += -DSTACK_BASE_ADDR=32\'hFF000 all: build +project2.tcl: project.tcl + @sed -e "s/@COE_FILE@/$(ESCAPED_COE_FILE)/g" $< > $@ + $(KERNEL).bin: $(MAKE) -C $(ROOT_DIR)/kernel clean STACK_BASE_ADDR=0xFF000 $(MAKE) -C $(ROOT_DIR)/kernel @@ -42,7 +49,7 @@ $(KERNEL).bin: cp $(ROOT_DIR)/tests/kernel/$(KERNEL)/$(KERNEL).bin $(KERNEL).bin kernel.bin.coe: $(KERNEL).bin - $(SCRIPT_DIR)/bin2coe.py --out=$@ --binary=$(KERNEL).bin --binaddr=8192 --depth=16384 --wordsize=64 + $(SCRIPT_DIR)/bin2coe.py --out=$@ --binfile=8192:$(KERNEL).bin --depth=16384 --wordsize=64 --little_endian gen-sources: project_1/sources.txt project_1/sources.txt: @@ -50,11 +57,12 @@ project_1/sources.txt: $(SCRIPT_DIR)/gen_sources.sh $(CFLAGS) -P -Cproject_1/src -Oproject_1/sources.txt build: project_1/project_1.xpr -project_1/project_1.xpr: project_1/sources.txt kernel.bin.coe project.tcl - $(VIVADO) -mode batch -source project.tcl -tclargs project_1/sources.txt project_1/src $(SCRIPT_DIR) +project_1/project_1.xpr: project_1/sources.txt kernel.bin.coe project2.tcl + $(VIVADO) -mode batch -source project2.tcl -tclargs $(DEVICE) project_1/sources.txt $(SCRIPT_DIR) run: project_1/project_1.xpr $(VIVADO) project_1/project_1.xpr & clean: - rm -rf project_1 $(KERNEL).bin kernel.bin.coe + rm -rf project_1 project2.tcl $(KERNEL).bin kernel.bin.coe + rm -rf .Xil *.log *.jou diff --git a/hw/syn/xilinx/sandbox/project_1_files/Vortex_top.v b/hw/syn/xilinx/sandbox/Vortex_top.v similarity index 100% rename from hw/syn/xilinx/sandbox/project_1_files/Vortex_top.v rename to hw/syn/xilinx/sandbox/Vortex_top.v diff --git a/hw/syn/xilinx/sandbox/project_1_files/Vortex_wrap.sv b/hw/syn/xilinx/sandbox/Vortex_wrap.sv similarity index 100% rename from hw/syn/xilinx/sandbox/project_1_files/Vortex_wrap.sv rename to hw/syn/xilinx/sandbox/Vortex_wrap.sv diff --git a/hw/syn/xilinx/sandbox/project.tcl.in b/hw/syn/xilinx/sandbox/project.tcl.in index 45f9a9104..e92e31a44 100644 --- a/hw/syn/xilinx/sandbox/project.tcl.in +++ b/hw/syn/xilinx/sandbox/project.tcl.in @@ -1,15 +1,28 @@ +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + if { $::argc != 3 } { puts "ERROR: Program \"$::argv0\" requires 3 arguments!\n" - puts "Usage: $::argv0 \n" + puts "Usage: $::argv0 \n" exit } -set vcs_file [lindex $::argv 0] -set files_dir [lindex $::argv 1] +set device_part [lindex $::argv 0] +set vcs_file [lindex $::argv 1] set tool_dir [lindex $::argv 2] +#puts $device_part #puts $vcs_file -#puts $files_dir #puts $tool_dir set origin_dir [file normalize "."] @@ -39,81 +52,11 @@ set vdefines_list [lindex $vlist 2] #puts ${vdefines_list} # Create project -create_project ${project_name} ./${project_name} -force -part xcu280-fsvh2892-2L-e +create_project $project_name $project_name -force -part $device_part # Set the directory path for the new project set proj_dir [get_property directory [current_project]] -# Set project properties -set obj [current_project] -set_property -name "compxlib.activehdl_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/activehdl" -objects $obj -set_property -name "compxlib.funcsim" -value "1" -objects $obj -set_property -name "compxlib.ies_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/ies" -objects $obj -set_property -name "compxlib.modelsim_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/modelsim" -objects $obj -set_property -name "compxlib.overwrite_libs" -value "0" -objects $obj -set_property -name "compxlib.questa_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/questa" -objects $obj -set_property -name "compxlib.riviera_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/riviera" -objects $obj -set_property -name "compxlib.timesim" -value "1" -objects $obj -set_property -name "compxlib.vcs_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/vcs" -objects $obj -set_property -name "compxlib.xsim_compiled_library_dir" -value "" -objects $obj -set_property -name "corecontainer.enable" -value "0" -objects $obj -set_property -name "default_lib" -value "xil_defaultlib" -objects $obj -set_property -name "enable_optional_runs_sta" -value "0" -objects $obj -set_property -name "enable_vhdl_2008" -value "1" -objects $obj -set_property -name "generate_ip_upgrade_log" -value "1" -objects $obj -set_property -name "ip_cache_permissions" -value "read write" -objects $obj -set_property -name "ip_interface_inference_priority" -value "" -objects $obj -set_property -name "ip_output_repo" -value "$proj_dir/${project_name}.cache/ip" -objects $obj -set_property -name "legacy_ip_repo_paths" -value "" -objects $obj -set_property -name "mem.enable_memory_map_generation" -value "1" -objects $obj -set_property -name "platform.board_id" -value "au280" -objects $obj -set_property -name "platform.default_output_type" -value "undefined" -objects $obj -set_property -name "platform.design_intent.datacenter" -value "undefined" -objects $obj -set_property -name "platform.design_intent.embedded" -value "undefined" -objects $obj -set_property -name "platform.design_intent.external_host" -value "undefined" -objects $obj -set_property -name "platform.design_intent.server_managed" -value "undefined" -objects $obj -set_property -name "platform.rom.debug_type" -value "0" -objects $obj -set_property -name "platform.rom.prom_type" -value "0" -objects $obj -set_property -name "platform.slrconstraintmode" -value "0" -objects $obj -set_property -name "preferred_sim_model" -value "rtl" -objects $obj -set_property -name "project_type" -value "Default" -objects $obj -set_property -name "pr_flow" -value "0" -objects $obj -set_property -name "sim.central_dir" -value "$proj_dir/${project_name}.ip_user_files" -objects $obj -set_property -name "sim.ip.auto_export_scripts" -value "1" -objects $obj -set_property -name "sim.use_ip_compiled_libs" -value "1" -objects $obj -set_property -name "simulator.activehdl_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.activehdl_install_dir" -value "" -objects $obj -set_property -name "simulator.ies_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.ies_install_dir" -value "" -objects $obj -set_property -name "simulator.modelsim_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.modelsim_install_dir" -value "" -objects $obj -set_property -name "simulator.questa_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.riviera_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.riviera_install_dir" -value "" -objects $obj -set_property -name "simulator.vcs_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.vcs_install_dir" -value "" -objects $obj -set_property -name "simulator.xcelium_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.xcelium_install_dir" -value "" -objects $obj -set_property -name "simulator_language" -value "Verilog" -objects $obj -set_property -name "source_mgmt_mode" -value "All" -objects $obj -set_property -name "target_language" -value "Verilog" -objects $obj -set_property -name "target_simulator" -value "XSim" -objects $obj -set_property -name "tool_flow" -value "Vivado" -objects $obj -set_property -name "webtalk.activehdl_export_sim" -value "27" -objects $obj -set_property -name "webtalk.ies_export_sim" -value "27" -objects $obj -set_property -name "webtalk.modelsim_export_sim" -value "27" -objects $obj -set_property -name "webtalk.questa_export_sim" -value "27" -objects $obj -set_property -name "webtalk.riviera_export_sim" -value "27" -objects $obj -set_property -name "webtalk.vcs_export_sim" -value "27" -objects $obj -set_property -name "webtalk.xcelium_export_sim" -value "5" -objects $obj -set_property -name "webtalk.xsim_export_sim" -value "27" -objects $obj -set_property -name "webtalk.xsim_launch_sim" -value "91" -objects $obj -set_property -name "xpm_libraries" -value "XPM_CDC XPM_MEMORY" -objects $obj -set_property -name "xsim.array_display_limit" -value "1024" -objects $obj -set_property -name "xsim.radix" -value "hex" -objects $obj -set_property -name "xsim.time_unit" -value "ns" -objects $obj -set_property -name "xsim.trace_limit" -value "65536" -objects $obj - # Create 'sources_1' fileset (if not found) if {[string equal [get_filesets -quiet sources_1] ""]} { create_fileset -srcset sources_1 @@ -131,21 +74,8 @@ foreach def $vdefines_list { # Set 'sources_1' fileset properties set obj [get_filesets sources_1] -set_property -name "design_mode" -value "RTL" -objects $obj -set_property -name "edif_extra_search_paths" -value "" -objects $obj -set_property -name "elab_link_dcps" -value "1" -objects $obj -set_property -name "elab_load_timing_constraints" -value "1" -objects $obj -set_property -name "generic" -value "" -objects $obj -set_property -name "include_dirs" -value "" -objects $obj -set_property -name "lib_map_file" -value "" -objects $obj -set_property -name "loop_count" -value "1000" -objects $obj set_property -name "name" -value "sources_1" -objects $obj set_property -name "top" -value "design_1_wrapper" -objects $obj -set_property -name "top_auto_set" -value "0" -objects $obj -set_property -name "verilog_define" -value "" -objects $obj -set_property -name "verilog_uppercase" -value "1" -objects $obj -set_property -name "verilog_version" -value "verilog_2001" -objects $obj -set_property -name "vhdl_version" -value "vhdl_2k" -objects $obj # Create 'constrs_1' fileset (if not found) if {[string equal [get_filesets -quiet constrs_1] ""]} { @@ -172,7 +102,7 @@ if {[string equal [get_filesets -quiet sim_1] ""]} { set obj [get_filesets sim_1] # Import local files from the original project set files [list \ - [file normalize "$files_dir/testbench.v" ]\ + [file normalize "testbench.v" ]\ ] set imported_files [import_files -fileset sim_1 $files] @@ -202,52 +132,14 @@ set_property -name "hbs.configure_design_for_hier_access" -value "1" -objects $o set_property -name "include_dirs" -value "" -objects $obj set_property -name "incremental" -value "1" -objects $obj set_property -name "name" -value "sim_1" -objects $obj -set_property -name "nl.cell" -value "" -objects $obj -set_property -name "nl.incl_unisim_models" -value "0" -objects $obj -set_property -name "nl.mode" -value "funcsim" -objects $obj -set_property -name "nl.process_corner" -value "slow" -objects $obj -set_property -name "nl.rename_top" -value "" -objects $obj -set_property -name "nl.sdf_anno" -value "1" -objects $obj -set_property -name "nl.write_all_overrides" -value "0" -objects $obj set_property -name "source_set" -value "sources_1" -objects $obj set_property -name "systemc_include_dirs" -value "" -objects $obj set_property -name "top" -value "testbench" -objects $obj set_property -name "top_auto_set" -value "0" -objects $obj set_property -name "top_lib" -value "xil_defaultlib" -objects $obj -set_property -name "transport_int_delay" -value "0" -objects $obj -set_property -name "transport_path_delay" -value "0" -objects $obj -set_property -name "unifast" -value "0" -objects $obj set_property -name "verilog_define" -value "" -objects $obj set_property -name "verilog_uppercase" -value "0" -objects $obj -set_property -name "xelab.dll" -value "0" -objects $obj -set_property -name "xsim.compile.tcl.pre" -value "" -objects $obj -set_property -name "xsim.compile.xsc.more_options" -value "" -objects $obj -set_property -name "xsim.compile.xvhdl.more_options" -value "" -objects $obj -set_property -name "xsim.compile.xvhdl.nosort" -value "1" -objects $obj -set_property -name "xsim.compile.xvhdl.relax" -value "1" -objects $obj -set_property -name "xsim.compile.xvlog.more_options" -value "" -objects $obj -set_property -name "xsim.compile.xvlog.nosort" -value "1" -objects $obj -set_property -name "xsim.compile.xvlog.relax" -value "1" -objects $obj -set_property -name "xsim.elaborate.debug_level" -value "typical" -objects $obj -set_property -name "xsim.elaborate.load_glbl" -value "1" -objects $obj -set_property -name "xsim.elaborate.mt_level" -value "auto" -objects $obj -set_property -name "xsim.elaborate.rangecheck" -value "0" -objects $obj -set_property -name "xsim.elaborate.relax" -value "1" -objects $obj -set_property -name "xsim.elaborate.sdf_delay" -value "sdfmax" -objects $obj -set_property -name "xsim.elaborate.snapshot" -value "" -objects $obj -set_property -name "xsim.elaborate.xelab.more_options" -value "" -objects $obj -set_property -name "xsim.elaborate.xsc.more_options" -value "" -objects $obj -set_property -name "xsim.simulate.add_positional" -value "0" -objects $obj -set_property -name "xsim.simulate.custom_tcl" -value "" -objects $obj -set_property -name "xsim.simulate.log_all_signals" -value "0" -objects $obj -set_property -name "xsim.simulate.no_quit" -value "0" -objects $obj -set_property -name "xsim.simulate.runtime" -value "4000ns" -objects $obj -set_property -name "xsim.simulate.saif" -value "" -objects $obj -set_property -name "xsim.simulate.saif_all_signals" -value "0" -objects $obj -set_property -name "xsim.simulate.saif_scope" -value "" -objects $obj -set_property -name "xsim.simulate.tcl.post" -value "" -objects $obj -set_property -name "xsim.simulate.wdb" -value "" -objects $obj -set_property -name "xsim.simulate.xsim.more_options" -value "" -objects $obj + # Set 'utils_1' fileset object set obj [get_filesets utils_1] @@ -398,7 +290,7 @@ set_property -dict [ list \ CONFIG.Assume_Synchronous_Clk {true} \ CONFIG.Byte_Size {8} \ CONFIG.Load_Init_File {true} \ - CONFIG.Coe_File {@VORTEX_HOME@/hw/syn/xilinx/test/kernel.bin.coe} \ + CONFIG.Coe_File {@COE_FILE@} \ CONFIG.EN_SAFETY_CKT {true} \ CONFIG.Enable_32bit_Address {true} \ CONFIG.Fill_Remaining_Memory_Locations {false} \ @@ -493,1735 +385,26 @@ set_property USED_IN_IMPLEMENTATION "1" [get_files design_1.bd ] set_property USED_IN_SIMULATION "1" [get_files design_1.bd ] set_property USED_IN_SYNTHESIS "1" [get_files design_1.bd ] -#call make_wrapper to create wrapper files +# Call make_wrapper to create wrapper files set wrapper_path [make_wrapper -fileset sources_1 -files [ get_files -norecurse design_1.bd] -top] add_files -norecurse -fileset sources_1 $wrapper_path -# Create 'synth_1' run (if not found) -if {[string equal [get_runs -quiet synth_1] ""]} { - create_run -name synth_1 -part xcu280-fsvh2892-2L-e -flow {Vivado Synthesis 2020} -strategy "Vivado Synthesis Defaults" -report_strategy {No Reports} -constrset constrs_1 -} else { - set_property strategy "Vivado Synthesis Defaults" [get_runs synth_1] - set_property flow "Vivado Synthesis 2020" [get_runs synth_1] -} -set obj [get_runs synth_1] -set_property set_report_strategy_name 1 $obj -set_property report_strategy {Vivado Synthesis Default Reports} $obj -set_property set_report_strategy_name 0 $obj -# Create 'synth_1_synth_report_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs synth_1] synth_1_synth_report_utilization_0] "" ] } { - create_report_config -report_name synth_1_synth_report_utilization_0 -report_type report_utilization:1.0 -steps synth_design -runs synth_1 -} -set obj [get_report_configs -of_objects [get_runs synth_1] synth_1_synth_report_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Utilization - Synth Design" -objects $obj -set_property -name "options.pblocks" -value "" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.slr" -value "0" -objects $obj -set_property -name "options.packthru" -value "0" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.hierarchical_percentages" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -set obj [get_runs synth_1] -set_property -name "constrset" -value "constrs_1" -objects $obj -set_property -name "description" -value "Vivado Synthesis Defaults" -objects $obj -set_property -name "flow" -value "Vivado Synthesis 2020" -objects $obj -set_property -name "name" -value "synth_1" -objects $obj -set_property -name "needs_refresh" -value "0" -objects $obj -set_property -name "srcset" -value "sources_1" -objects $obj -set_property -name "incremental_checkpoint" -value "" -objects $obj -set_property -name "auto_incremental_checkpoint" -value "0" -objects $obj -set_property -name "rqs_files" -value "" -objects $obj -set_property -name "incremental_checkpoint.more_options" -value "" -objects $obj -set_property -name "include_in_archive" -value "1" -objects $obj -set_property -name "gen_full_bitstream" -value "1" -objects $obj -set_property -name "write_incremental_synth_checkpoint" -value "0" -objects $obj -set_property -name "auto_incremental_checkpoint.directory" -value "$proj_dir/project_1.srcs/utils_1/imports/synth_1" -objects $obj -set_property -name "strategy" -value "Vivado Synthesis Defaults" -objects $obj -set_property -name "steps.synth_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.synth_design.tcl.post" -value "" -objects $obj -set_property -name "steps.synth_design.args.flatten_hierarchy" -value "rebuilt" -objects $obj -set_property -name "steps.synth_design.args.gated_clock_conversion" -value "off" -objects $obj -set_property -name "steps.synth_design.args.bufg" -value "12" -objects $obj -set_property -name "steps.synth_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.synth_design.args.retiming" -value "0" -objects $obj -set_property -name "steps.synth_design.args.fsm_extraction" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.keep_equivalent_registers" -value "0" -objects $obj -set_property -name "steps.synth_design.args.resource_sharing" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.control_set_opt_threshold" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.no_lc" -value "0" -objects $obj -set_property -name "steps.synth_design.args.no_srlextract" -value "0" -objects $obj -set_property -name "steps.synth_design.args.shreg_min_size" -value "3" -objects $obj -set_property -name "steps.synth_design.args.max_bram" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_uram" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_dsp" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_bram_cascade_height" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_uram_cascade_height" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.cascade_dsp" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.assert" -value "0" -objects $obj -set_property -name "steps.synth_design.args.more options" -value "" -objects $obj - -# Create 'synth_1_copy_1' run (if not found) -if {[string equal [get_runs -quiet synth_1_copy_1] ""]} { - create_run -name synth_1_copy_1 -part xcu280-fsvh2892-2L-e -flow {Vivado Synthesis 2020} -strategy "Vivado Synthesis Defaults" -report_strategy {No Reports} -constrset constrs_1 -} else { - set_property strategy "Vivado Synthesis Defaults" [get_runs synth_1_copy_1] - set_property flow "Vivado Synthesis 2020" [get_runs synth_1_copy_1] -} -set obj [get_runs synth_1_copy_1] -set_property set_report_strategy_name 1 $obj -set_property report_strategy {Vivado Synthesis Default Reports} $obj -set_property set_report_strategy_name 0 $obj -# Create 'synth_1_copy_1_synth_report_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs synth_1_copy_1] synth_1_copy_1_synth_report_utilization_0] "" ] } { - create_report_config -report_name synth_1_copy_1_synth_report_utilization_0 -report_type report_utilization:1.0 -steps synth_design -runs synth_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs synth_1_copy_1] synth_1_copy_1_synth_report_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Utilization - Synth Design" -objects $obj -set_property -name "options.pblocks" -value "" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.slr" -value "0" -objects $obj -set_property -name "options.packthru" -value "0" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.hierarchical_percentages" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -set obj [get_runs synth_1_copy_1] -set_property -name "constrset" -value "constrs_1" -objects $obj -set_property -name "description" -value "Vivado Synthesis Defaults" -objects $obj -set_property -name "flow" -value "Vivado Synthesis 2020" -objects $obj -set_property -name "name" -value "synth_1_copy_1" -objects $obj -set_property -name "needs_refresh" -value "0" -objects $obj -set_property -name "srcset" -value "sources_1" -objects $obj -set_property -name "incremental_checkpoint" -value "" -objects $obj -set_property -name "auto_incremental_checkpoint" -value "0" -objects $obj -set_property -name "rqs_files" -value "" -objects $obj -set_property -name "incremental_checkpoint.more_options" -value "" -objects $obj -set_property -name "include_in_archive" -value "1" -objects $obj -set_property -name "gen_full_bitstream" -value "1" -objects $obj -set_property -name "write_incremental_synth_checkpoint" -value "0" -objects $obj -set_property -name "auto_incremental_checkpoint.directory" -value "$proj_dir/project_1.srcs/utils_1/imports/synth_1" -objects $obj -set_property -name "strategy" -value "Vivado Synthesis Defaults" -objects $obj -set_property -name "steps.synth_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.synth_design.tcl.post" -value "" -objects $obj -set_property -name "steps.synth_design.args.flatten_hierarchy" -value "rebuilt" -objects $obj -set_property -name "steps.synth_design.args.gated_clock_conversion" -value "off" -objects $obj -set_property -name "steps.synth_design.args.bufg" -value "12" -objects $obj -set_property -name "steps.synth_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.synth_design.args.retiming" -value "0" -objects $obj -set_property -name "steps.synth_design.args.fsm_extraction" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.keep_equivalent_registers" -value "0" -objects $obj -set_property -name "steps.synth_design.args.resource_sharing" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.control_set_opt_threshold" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.no_lc" -value "0" -objects $obj -set_property -name "steps.synth_design.args.no_srlextract" -value "0" -objects $obj -set_property -name "steps.synth_design.args.shreg_min_size" -value "3" -objects $obj -set_property -name "steps.synth_design.args.max_bram" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_uram" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_dsp" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_bram_cascade_height" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_uram_cascade_height" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.cascade_dsp" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.assert" -value "0" -objects $obj -set_property -name "steps.synth_design.args.more options" -value "" -objects $obj - -# set the current synth run -current_run -synthesis [get_runs synth_1] - -# preserve signal names -set_property STEPS.SYNTH_DESIGN.ARGS.FLATTEN_HIERARCHY none [get_runs synth_1] - -# Create 'impl_1' run (if not found) -if {[string equal [get_runs -quiet impl_1] ""]} { - create_run -name impl_1 -part xcu280-fsvh2892-2L-e -flow {Vivado Implementation 2020} -strategy "Vivado Implementation Defaults" -report_strategy {No Reports} -constrset constrs_1 -parent_run synth_1 -} else { - set_property strategy "Vivado Implementation Defaults" [get_runs impl_1] - set_property flow "Vivado Implementation 2020" [get_runs impl_1] -} -set obj [get_runs impl_1] -set_property set_report_strategy_name 1 $obj -set_property report_strategy {Vivado Implementation Default Reports} $obj -set_property set_report_strategy_name 0 $obj -# Create 'impl_1_init_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_init_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_init_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps init_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_init_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Design Initialization" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_opt_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_opt_report_drc_0] "" ] } { - create_report_config -report_name impl_1_opt_report_drc_0 -report_type report_drc:1.0 -steps opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_opt_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Opt Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps power_opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_io_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_io_0] "" ] } { - create_report_config -report_name impl_1_place_report_io_0 -report_type report_io:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_io_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "IO - Place Design" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_utilization_0] "" ] } { - create_report_config -report_name impl_1_place_report_utilization_0 -report_type report_utilization:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Utilization - Place Design" -objects $obj -set_property -name "options.pblocks" -value "" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.slr" -value "0" -objects $obj -set_property -name "options.packthru" -value "0" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.hierarchical_percentages" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_control_sets_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_control_sets_0] "" ] } { - create_report_config -report_name impl_1_place_report_control_sets_0 -report_type report_control_sets:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_control_sets_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Control Sets - Place Design" -objects $obj -set_property -name "options.verbose" -value "1" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_place_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_incremental_reuse_1' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_incremental_reuse_1] "" ] } { - create_report_config -report_name impl_1_place_report_incremental_reuse_1 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_incremental_reuse_1] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_place_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Place Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_post_place_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_post_place_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_post_place_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_place_power_opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_post_place_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps phys_opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_drc_0] "" ] } { - create_report_config -report_name impl_1_route_report_drc_0 -report_type report_drc:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Route Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_methodology_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_methodology_0] "" ] } { - create_report_config -report_name impl_1_route_report_methodology_0 -report_type report_methodology:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_methodology_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Methodology - Route Design" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_power_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_power_0] "" ] } { - create_report_config -report_name impl_1_route_report_power_0 -report_type report_power:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_power_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Power - Route Design" -objects $obj -set_property -name "options.advisory" -value "0" -objects $obj -set_property -name "options.xpe" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_route_status_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_route_status_0] "" ] } { - create_report_config -report_name impl_1_route_report_route_status_0 -report_type report_route_status:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_route_status_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Route Status - Route Design" -objects $obj -set_property -name "options.of_objects" -value "" -objects $obj -set_property -name "options.route_type" -value "" -objects $obj -set_property -name "options.list_all_nets" -value "0" -objects $obj -set_property -name "options.show_all" -value "0" -objects $obj -set_property -name "options.has_routing" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_route_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Route Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_route_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Route Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_clock_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_clock_utilization_0] "" ] } { - create_report_config -report_name impl_1_route_report_clock_utilization_0 -report_type report_clock_utilization:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_clock_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Clock Utilization - Route Design" -objects $obj -set_property -name "options.write_xdc" -value "0" -objects $obj -set_property -name "options.clock_roots_only" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_route_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Route Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_post_route_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_post_route_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_post_route_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_route_phys_opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_post_route_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_post_route_phys_opt_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_post_route_phys_opt_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_post_route_phys_opt_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps post_route_phys_opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_post_route_phys_opt_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -set obj [get_runs impl_1] -set_property -name "constrset" -value "constrs_1" -objects $obj -set_property -name "description" -value "Default settings for Implementation." -objects $obj -set_property -name "flow" -value "Vivado Implementation 2020" -objects $obj -set_property -name "name" -value "impl_1" -objects $obj -set_property -name "needs_refresh" -value "0" -objects $obj -set_property -name "pr_configuration" -value "" -objects $obj -set_property -name "srcset" -value "sources_1" -objects $obj -set_property -name "incremental_checkpoint" -value "" -objects $obj -set_property -name "auto_incremental_checkpoint" -value "0" -objects $obj -set_property -name "rqs_files" -value "" -objects $obj -set_property -name "incremental_checkpoint.more_options" -value "" -objects $obj -set_property -name "include_in_archive" -value "1" -objects $obj -set_property -name "gen_full_bitstream" -value "1" -objects $obj -set_property -name "auto_incremental_checkpoint.directory" -value "$proj_dir/project_1.srcs/utils_1/imports/impl_1" -objects $obj -set_property -name "strategy" -value "Vivado Implementation Defaults" -objects $obj -set_property -name "steps.init_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.init_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.args.verbose" -value "0" -objects $obj -set_property -name "steps.opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.place_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.place_design.tcl.post" -value "" -objects $obj -set_property -name "steps.place_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.place_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.phys_opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.route_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.route_design.tcl.post" -value "" -objects $obj -set_property -name "steps.route_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.route_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.pre" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.post" -value "" -objects $obj -set_property -name "steps.write_bitstream.args.raw_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.mask_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.no_binary_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.bin_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.readback_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.logic_location_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.verbose" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.more options" -value "" -objects $obj - -# Create 'impl_1_copy_1' run (if not found) -if {[string equal [get_runs -quiet impl_1_copy_1] ""]} { - create_run -name impl_1_copy_1 -part xcu280-fsvh2892-2L-e -flow {Vivado Implementation 2020} -strategy "Vivado Implementation Defaults" -report_strategy {No Reports} -constrset constrs_1 -parent_run synth_1 -} else { - set_property strategy "Vivado Implementation Defaults" [get_runs impl_1_copy_1] - set_property flow "Vivado Implementation 2020" [get_runs impl_1_copy_1] -} -set obj [get_runs impl_1_copy_1] -set_property set_report_strategy_name 1 $obj -set_property report_strategy {Vivado Implementation Default Reports} $obj -set_property set_report_strategy_name 0 $obj -# Create 'impl_1_copy_1_init_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_init_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_init_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps init_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_init_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Design Initialization" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_opt_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_opt_report_drc_0] "" ] } { - create_report_config -report_name impl_1_copy_1_opt_report_drc_0 -report_type report_drc:1.0 -steps opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_opt_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Opt Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps power_opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_io_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_io_0] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_io_0 -report_type report_io:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_io_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "IO - Place Design" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_utilization_0] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_utilization_0 -report_type report_utilization:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Utilization - Place Design" -objects $obj -set_property -name "options.pblocks" -value "" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.slr" -value "0" -objects $obj -set_property -name "options.packthru" -value "0" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.hierarchical_percentages" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_control_sets_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_control_sets_0] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_control_sets_0 -report_type report_control_sets:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_control_sets_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Control Sets - Place Design" -objects $obj -set_property -name "options.verbose" -value "1" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_incremental_reuse_1' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_incremental_reuse_1] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_incremental_reuse_1 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_incremental_reuse_1] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Place Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_post_place_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_place_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_post_place_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_place_power_opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_place_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps phys_opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_drc_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_drc_0 -report_type report_drc:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Route Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_methodology_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_methodology_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_methodology_0 -report_type report_methodology:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_methodology_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Methodology - Route Design" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_power_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_power_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_power_0 -report_type report_power:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_power_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Power - Route Design" -objects $obj -set_property -name "options.advisory" -value "0" -objects $obj -set_property -name "options.xpe" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_route_status_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_route_status_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_route_status_0 -report_type report_route_status:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_route_status_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Route Status - Route Design" -objects $obj -set_property -name "options.of_objects" -value "" -objects $obj -set_property -name "options.route_type" -value "" -objects $obj -set_property -name "options.list_all_nets" -value "0" -objects $obj -set_property -name "options.show_all" -value "0" -objects $obj -set_property -name "options.has_routing" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Route Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Route Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_clock_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_clock_utilization_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_clock_utilization_0 -report_type report_clock_utilization:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_clock_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Clock Utilization - Route Design" -objects $obj -set_property -name "options.write_xdc" -value "0" -objects $obj -set_property -name "options.clock_roots_only" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Route Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_post_route_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_route_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_post_route_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_route_phys_opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_route_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_post_route_phys_opt_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_route_phys_opt_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_copy_1_post_route_phys_opt_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps post_route_phys_opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_route_phys_opt_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -set obj [get_runs impl_1_copy_1] -set_property -name "constrset" -value "constrs_1" -objects $obj -set_property -name "description" -value "Default settings for Implementation." -objects $obj -set_property -name "flow" -value "Vivado Implementation 2020" -objects $obj -set_property -name "name" -value "impl_1_copy_1" -objects $obj -set_property -name "needs_refresh" -value "0" -objects $obj -set_property -name "pr_configuration" -value "" -objects $obj -set_property -name "srcset" -value "sources_1" -objects $obj -set_property -name "incremental_checkpoint" -value "" -objects $obj -set_property -name "auto_incremental_checkpoint" -value "0" -objects $obj -set_property -name "rqs_files" -value "" -objects $obj -set_property -name "incremental_checkpoint.more_options" -value "" -objects $obj -set_property -name "include_in_archive" -value "1" -objects $obj -set_property -name "gen_full_bitstream" -value "1" -objects $obj -set_property -name "auto_incremental_checkpoint.directory" -value "$proj_dir/project_1.srcs/utils_1/imports/impl_1" -objects $obj -set_property -name "strategy" -value "Vivado Implementation Defaults" -objects $obj -set_property -name "steps.init_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.init_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.args.verbose" -value "0" -objects $obj -set_property -name "steps.opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.place_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.place_design.tcl.post" -value "" -objects $obj -set_property -name "steps.place_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.place_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.phys_opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.route_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.route_design.tcl.post" -value "" -objects $obj -set_property -name "steps.route_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.route_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.pre" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.post" -value "" -objects $obj -set_property -name "steps.write_bitstream.args.raw_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.mask_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.no_binary_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.bin_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.readback_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.logic_location_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.verbose" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.more options" -value "" -objects $obj - -# Create 'impl_1_copy_2' run (if not found) -if {[string equal [get_runs -quiet impl_1_copy_2] ""]} { - create_run -name impl_1_copy_2 -part xcu280-fsvh2892-2L-e -flow {Vivado Implementation 2020} -strategy "Vivado Implementation Defaults" -report_strategy {No Reports} -constrset constrs_1 -parent_run synth_1 -} else { - set_property strategy "Vivado Implementation Defaults" [get_runs impl_1_copy_2] - set_property flow "Vivado Implementation 2020" [get_runs impl_1_copy_2] -} -set obj [get_runs impl_1_copy_2] -set_property set_report_strategy_name 1 $obj -set_property report_strategy {Vivado Implementation Default Reports} $obj -set_property set_report_strategy_name 0 $obj -# Create 'impl_1_copy_2_init_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_init_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_init_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps init_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_init_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Design Initialization" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_opt_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_opt_report_drc_0] "" ] } { - create_report_config -report_name impl_1_copy_2_opt_report_drc_0 -report_type report_drc:1.0 -steps opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_opt_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Opt Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps power_opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_io_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_io_0] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_io_0 -report_type report_io:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_io_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "IO - Place Design" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_utilization_0] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_utilization_0 -report_type report_utilization:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Utilization - Place Design" -objects $obj -set_property -name "options.pblocks" -value "" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.slr" -value "0" -objects $obj -set_property -name "options.packthru" -value "0" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.hierarchical_percentages" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_control_sets_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_control_sets_0] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_control_sets_0 -report_type report_control_sets:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_control_sets_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Control Sets - Place Design" -objects $obj -set_property -name "options.verbose" -value "1" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_incremental_reuse_1' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_incremental_reuse_1] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_incremental_reuse_1 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_incremental_reuse_1] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Place Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_post_place_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_place_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_post_place_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_place_power_opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_place_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps phys_opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_drc_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_drc_0 -report_type report_drc:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Route Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_methodology_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_methodology_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_methodology_0 -report_type report_methodology:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_methodology_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Methodology - Route Design" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_power_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_power_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_power_0 -report_type report_power:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_power_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Power - Route Design" -objects $obj -set_property -name "options.advisory" -value "0" -objects $obj -set_property -name "options.xpe" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_route_status_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_route_status_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_route_status_0 -report_type report_route_status:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_route_status_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Route Status - Route Design" -objects $obj -set_property -name "options.of_objects" -value "" -objects $obj -set_property -name "options.route_type" -value "" -objects $obj -set_property -name "options.list_all_nets" -value "0" -objects $obj -set_property -name "options.show_all" -value "0" -objects $obj -set_property -name "options.has_routing" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Route Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Route Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_clock_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_clock_utilization_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_clock_utilization_0 -report_type report_clock_utilization:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_clock_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Clock Utilization - Route Design" -objects $obj -set_property -name "options.write_xdc" -value "0" -objects $obj -set_property -name "options.clock_roots_only" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Route Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_post_route_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_route_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_post_route_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_route_phys_opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_route_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_post_route_phys_opt_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_route_phys_opt_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_copy_2_post_route_phys_opt_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps post_route_phys_opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_route_phys_opt_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -set obj [get_runs impl_1_copy_2] -set_property -name "constrset" -value "constrs_1" -objects $obj -set_property -name "description" -value "Default settings for Implementation." -objects $obj -set_property -name "flow" -value "Vivado Implementation 2020" -objects $obj -set_property -name "name" -value "impl_1_copy_2" -objects $obj -set_property -name "needs_refresh" -value "0" -objects $obj -set_property -name "pr_configuration" -value "" -objects $obj -set_property -name "srcset" -value "sources_1" -objects $obj -set_property -name "incremental_checkpoint" -value "" -objects $obj -set_property -name "auto_incremental_checkpoint" -value "0" -objects $obj -set_property -name "rqs_files" -value "" -objects $obj -set_property -name "incremental_checkpoint.more_options" -value "" -objects $obj -set_property -name "include_in_archive" -value "1" -objects $obj -set_property -name "gen_full_bitstream" -value "1" -objects $obj -set_property -name "auto_incremental_checkpoint.directory" -value "$proj_dir/project_1.srcs/utils_1/imports/impl_1" -objects $obj -set_property -name "strategy" -value "Vivado Implementation Defaults" -objects $obj -set_property -name "steps.init_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.init_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.args.verbose" -value "0" -objects $obj -set_property -name "steps.opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.place_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.place_design.tcl.post" -value "" -objects $obj -set_property -name "steps.place_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.place_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.phys_opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.route_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.route_design.tcl.post" -value "" -objects $obj -set_property -name "steps.route_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.route_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.pre" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.post" -value "" -objects $obj -set_property -name "steps.write_bitstream.args.raw_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.mask_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.no_binary_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.bin_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.readback_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.logic_location_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.verbose" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.more options" -value "" -objects $obj - -# set the current impl run -current_run -implementation [get_runs impl_1] - -puts "INFO: Project created:${project_name}" -# Create 'drc_1' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "drc_1" ] ] ""]} { -create_dashboard_gadget -name {drc_1} -type drc -} -set obj [get_dashboard_gadgets [ list "drc_1" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "impl_1#impl_1_route_report_drc_0" -objects $obj -set_property -name "run.step" -value "route_design" -objects $obj -set_property -name "run.type" -value "implementation" -objects $obj -set_property -name "statistics.critical_warning" -value "1" -objects $obj -set_property -name "statistics.error" -value "1" -objects $obj -set_property -name "statistics.info" -value "1" -objects $obj -set_property -name "statistics.warning" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Graph" -objects $obj - -# Create 'methodology_1' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "methodology_1" ] ] ""]} { -create_dashboard_gadget -name {methodology_1} -type methodology -} -set obj [get_dashboard_gadgets [ list "methodology_1" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "impl_1#impl_1_route_report_methodology_0" -objects $obj -set_property -name "run.step" -value "route_design" -objects $obj -set_property -name "run.type" -value "implementation" -objects $obj -set_property -name "statistics.critical_warning" -value "1" -objects $obj -set_property -name "statistics.error" -value "1" -objects $obj -set_property -name "statistics.info" -value "1" -objects $obj -set_property -name "statistics.warning" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Graph" -objects $obj - -# Create 'power_1' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "power_1" ] ] ""]} { -create_dashboard_gadget -name {power_1} -type power -} -set obj [get_dashboard_gadgets [ list "power_1" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "impl_1#impl_1_route_report_power_0" -objects $obj -set_property -name "run.step" -value "route_design" -objects $obj -set_property -name "run.type" -value "implementation" -objects $obj -set_property -name "statistics.bram" -value "1" -objects $obj -set_property -name "statistics.clocks" -value "1" -objects $obj -set_property -name "statistics.dsp" -value "1" -objects $obj -set_property -name "statistics.gth" -value "1" -objects $obj -set_property -name "statistics.gtp" -value "1" -objects $obj -set_property -name "statistics.gtx" -value "1" -objects $obj -set_property -name "statistics.gtz" -value "1" -objects $obj -set_property -name "statistics.io" -value "1" -objects $obj -set_property -name "statistics.logic" -value "1" -objects $obj -set_property -name "statistics.mmcm" -value "1" -objects $obj -set_property -name "statistics.pcie" -value "1" -objects $obj -set_property -name "statistics.phaser" -value "1" -objects $obj -set_property -name "statistics.pll" -value "1" -objects $obj -set_property -name "statistics.pl_static" -value "1" -objects $obj -set_property -name "statistics.ps7" -value "1" -objects $obj -set_property -name "statistics.ps" -value "1" -objects $obj -set_property -name "statistics.ps_static" -value "1" -objects $obj -set_property -name "statistics.signals" -value "1" -objects $obj -set_property -name "statistics.total_power" -value "1" -objects $obj -set_property -name "statistics.transceiver" -value "1" -objects $obj -set_property -name "statistics.xadc" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Graph" -objects $obj - -# Create 'timing_1' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "timing_1" ] ] ""]} { -create_dashboard_gadget -name {timing_1} -type timing -} -set obj [get_dashboard_gadgets [ list "timing_1" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "impl_1#impl_1_route_report_timing_summary_0" -objects $obj -set_property -name "run.step" -value "route_design" -objects $obj -set_property -name "run.type" -value "implementation" -objects $obj -set_property -name "statistics.ths" -value "1" -objects $obj -set_property -name "statistics.tns" -value "1" -objects $obj -set_property -name "statistics.tpws" -value "1" -objects $obj -set_property -name "statistics.whs" -value "1" -objects $obj -set_property -name "statistics.wns" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Table" -objects $obj - -# Create 'utilization_1' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "utilization_1" ] ] ""]} { -create_dashboard_gadget -name {utilization_1} -type utilization -} -set obj [get_dashboard_gadgets [ list "utilization_1" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "synth_1#synth_1_synth_report_utilization_0" -objects $obj -set_property -name "run.step" -value "synth_design" -objects $obj -set_property -name "run.type" -value "synthesis" -objects $obj -set_property -name "statistics.bram" -value "1" -objects $obj -set_property -name "statistics.bufg" -value "1" -objects $obj -set_property -name "statistics.dsp" -value "1" -objects $obj -set_property -name "statistics.ff" -value "1" -objects $obj -set_property -name "statistics.gt" -value "1" -objects $obj -set_property -name "statistics.io" -value "1" -objects $obj -set_property -name "statistics.lut" -value "1" -objects $obj -set_property -name "statistics.lutram" -value "1" -objects $obj -set_property -name "statistics.mmcm" -value "1" -objects $obj -set_property -name "statistics.pcie" -value "1" -objects $obj -set_property -name "statistics.pll" -value "1" -objects $obj -set_property -name "statistics.uram" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Graph" -objects $obj - -# Create 'utilization_2' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "utilization_2" ] ] ""]} { -create_dashboard_gadget -name {utilization_2} -type utilization -} -set obj [get_dashboard_gadgets [ list "utilization_2" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "impl_1#impl_1_place_report_utilization_0" -objects $obj -set_property -name "run.step" -value "place_design" -objects $obj -set_property -name "run.type" -value "implementation" -objects $obj -set_property -name "statistics.bram" -value "1" -objects $obj -set_property -name "statistics.bufg" -value "1" -objects $obj -set_property -name "statistics.dsp" -value "1" -objects $obj -set_property -name "statistics.ff" -value "1" -objects $obj -set_property -name "statistics.gt" -value "1" -objects $obj -set_property -name "statistics.io" -value "1" -objects $obj -set_property -name "statistics.lut" -value "1" -objects $obj -set_property -name "statistics.lutram" -value "1" -objects $obj -set_property -name "statistics.mmcm" -value "1" -objects $obj -set_property -name "statistics.pcie" -value "1" -objects $obj -set_property -name "statistics.pll" -value "1" -objects $obj -set_property -name "statistics.uram" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Graph" -objects $obj - -move_dashboard_gadget -name {utilization_1} -row 0 -col 0 -move_dashboard_gadget -name {power_1} -row 1 -col 0 -move_dashboard_gadget -name {drc_1} -row 2 -col 0 -move_dashboard_gadget -name {timing_1} -row 0 -col 1 -move_dashboard_gadget -name {utilization_2} -row 1 -col 1 -move_dashboard_gadget -name {methodology_1} -row 2 -col 1 +update_compile_order -fileset sources_1 + +# Synthesis +launch_runs synth_1 +wait_on_run synth_1 +open_run synth_1 + +# Implementation +launch_runs impl_1 +wait_on_run impl_1 +open_run impl_1 + +# Generate reports +report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages +report_place_status -file place.rpt +report_route_status -file route.rpt +report_timing -file timing.rpt +report_power -file power.rpt +report_drc -file drc.rpt \ No newline at end of file diff --git a/hw/syn/xilinx/sandbox/project_1_files/kernel.bin.coe b/hw/syn/xilinx/sandbox/project_1_files/kernel.bin.coe deleted file mode 100644 index a316d82b5..000000000 --- a/hw/syn/xilinx/sandbox/project_1_files/kernel.bin.coe +++ /dev/null @@ -1,16386 +0,0 @@ -MEMORY_INITIALIZATION_RADIX=16; -MEMORY_INITIALIZATION_VECTOR= -0, -000000C00000008000000002, -00000003000000020000000100000000, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -00f586b30007a60340d585b300d7073300d787b3002797930027171300f707330207086302e787b3cc5027f30480258304402683040027030000000b008000ef, -00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008067fef718e300c6a02300478793, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0; diff --git a/hw/syn/xilinx/sandbox/project_1_files/testbench.v b/hw/syn/xilinx/sandbox/testbench.v similarity index 100% rename from hw/syn/xilinx/sandbox/project_1_files/testbench.v rename to hw/syn/xilinx/sandbox/testbench.v From 7ae7ffa007e6c83207e44905f77a7f1495c20477 Mon Sep 17 00:00:00 2001 From: sij814 Date: Thu, 22 Aug 2024 18:37:34 +0200 Subject: [PATCH 153/488] pulled master and made initial changes --- hw/rtl/Vortex_hbm.sv | 229 +++++++++++++++++++++++++ hw/rtl/cache/VX_cache_wrap_l3.sv | 286 +++++++++++++++++++++++++++++++ sim/rtlsim/Makefile | 2 +- sim/rtlsim/processor.cpp | 244 ++++++++++++++------------ 4 files changed, 650 insertions(+), 111 deletions(-) create mode 100644 hw/rtl/Vortex_hbm.sv create mode 100644 hw/rtl/cache/VX_cache_wrap_l3.sv diff --git a/hw/rtl/Vortex_hbm.sv b/hw/rtl/Vortex_hbm.sv new file mode 100644 index 000000000..253c325bb --- /dev/null +++ b/hw/rtl/Vortex_hbm.sv @@ -0,0 +1,229 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module Vortex_hbm import VX_gpu_pkg::*; ( + `SCOPE_IO_DECL + + // Clock + input wire clk, + input wire reset, + + // Memory request + output wire mem_req_valid [`NUM_MEM_PORTS], + output wire mem_req_rw [`NUM_MEM_PORTS], + output wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen [`NUM_MEM_PORTS], + output wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr [`NUM_MEM_PORTS], + output wire [`VX_MEM_DATA_WIDTH-1:0] mem_req_data [`NUM_MEM_PORTS], + output wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag [`NUM_MEM_PORTS], + input wire mem_req_ready [`NUM_MEM_PORTS], + + // Memory response + input wire mem_rsp_valid [`NUM_MEM_PORTS], + input wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data [`NUM_MEM_PORTS], + input wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag [`NUM_MEM_PORTS], + output wire mem_rsp_ready [`NUM_MEM_PORTS], + + // DCR write request + input wire dcr_wr_valid, + input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr, + input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data, + + // Status + output wire busy +); + +`ifdef SCOPE + localparam scope_cluster = 0; + `SCOPE_IO_SWITCH (`NUM_CLUSTERS); +`endif + +`ifdef PERF_ENABLE + VX_mem_perf_if mem_perf_if(); + assign mem_perf_if.icache = 'x; + assign mem_perf_if.dcache = 'x; + assign mem_perf_if.l2cache = 'x; + assign mem_perf_if.lmem = 'x; +`endif + + VX_mem_bus_if #( + .DATA_SIZE (`L2_LINE_SIZE), + .TAG_WIDTH (L2_MEM_TAG_WIDTH) + ) per_cluster_mem_bus_if[`NUM_CLUSTERS](); + + VX_mem_bus_if #( + .DATA_SIZE (`L3_LINE_SIZE), + .TAG_WIDTH (L3_MEM_TAG_WIDTH) + ) mem_bus_if[`NUM_MEM_PORTS](); + + `RESET_RELAY (l3_reset, reset); + + VX_cache_wrap_l3 #( + .INSTANCE_ID ("l3cache"), + .CACHE_SIZE (`L3_CACHE_SIZE), + .LINE_SIZE (`L3_LINE_SIZE), + .NUM_BANKS (`L3_NUM_BANKS), + .NUM_WAYS (`L3_NUM_WAYS), + .WORD_SIZE (L3_WORD_SIZE), + .NUM_MEM_PORTS (`NUM_MEM_PORTS), + .NUM_REQS (L3_NUM_REQS), + .CRSQ_SIZE (`L3_CRSQ_SIZE), + .MSHR_SIZE (`L3_MSHR_SIZE), + .MRSQ_SIZE (`L3_MRSQ_SIZE), + .MREQ_SIZE (`L3_WRITEBACK ? `L3_MSHR_SIZE : `L3_MREQ_SIZE), + .TAG_WIDTH (L2_MEM_TAG_WIDTH), + .WRITE_ENABLE (1), + .WRITEBACK (`L3_WRITEBACK), + .DIRTY_BYTES (`L3_WRITEBACK), + .UUID_WIDTH (`UUID_WIDTH), + .CORE_OUT_BUF (2), + .MEM_OUT_BUF (2), + .NC_ENABLE (1), + .PASSTHRU (!`L3_ENABLED) + ) l3cache ( + .clk (clk), + .reset (l3_reset), + + `ifdef PERF_ENABLE + .cache_perf (mem_perf_if.l3cache), + `endif + + .core_bus_if (per_cluster_mem_bus_if), + .mem_bus_if (mem_bus_if) + ); + + wire mem_req_fire[`NUM_MEM_PORTS-1:0]; + wire mem_rsp_fire[`NUM_MEM_PORTS-1:0]; + + for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin + assign mem_req_valid[i] = mem_bus_if[i].req_valid; + assign mem_req_rw[i] = mem_bus_if[i].req_data.rw; + assign mem_req_byteen[i]= mem_bus_if[i].req_data.byteen; + assign mem_req_addr[i] = mem_bus_if[i].req_data.addr; + assign mem_req_data[i] = mem_bus_if[i].req_data.data; + assign mem_req_tag[i] = mem_bus_if[i].req_data.tag; + assign mem_bus_if[i].req_ready = mem_req_ready[i]; + `UNUSED_VAR (mem_bus_if[i].req_data.atype) + + assign mem_bus_if[i].rsp_valid = mem_rsp_valid[i]; + assign mem_bus_if[i].rsp_data.data = mem_rsp_data[i]; + assign mem_bus_if[i].rsp_data.tag = mem_rsp_tag[i]; + assign mem_rsp_ready[i] = mem_bus_if[i].rsp_ready; + + assign mem_req_fire[i] = mem_req_valid[i] && mem_req_ready[i]; + assign mem_rsp_fire[i] = mem_rsp_valid[i] && mem_rsp_ready[i]; + `UNUSED_VAR (mem_req_fire[i]) + `UNUSED_VAR (mem_rsp_fire[i]) + end + + VX_dcr_bus_if dcr_bus_if(); + assign dcr_bus_if.write_valid = dcr_wr_valid; + assign dcr_bus_if.write_addr = dcr_wr_addr; + assign dcr_bus_if.write_data = dcr_wr_data; + + wire [`NUM_CLUSTERS-1:0] per_cluster_busy; + + // Generate all clusters + for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters + + `RESET_RELAY (cluster_reset, reset); + + VX_dcr_bus_if cluster_dcr_bus_if(); + `BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1)); + + VX_cluster #( + .CLUSTER_ID (cluster_id), + .INSTANCE_ID ($sformatf("cluster%0d", cluster_id)) + ) cluster ( + `SCOPE_IO_BIND (scope_cluster + cluster_id) + + .clk (clk), + .reset (cluster_reset), + + `ifdef PERF_ENABLE + .mem_perf_if (mem_perf_if), + `endif + + .dcr_bus_if (cluster_dcr_bus_if), + + .mem_bus_if (per_cluster_mem_bus_if[cluster_id]), + + .busy (per_cluster_busy[cluster_id]) + ); + end + + `BUFFER_EX(busy, (| per_cluster_busy), 1'b1, (`NUM_CLUSTERS > 1)); + +`ifdef PERF_ENABLE + + reg [`PERF_CTR_BITS-1:0] perf_mem_pending_reads; + mem_perf_t mem_perf; + + for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin + always @(posedge clk) begin + if (reset) begin + perf_mem_pending_reads <= '0; + end else begin + perf_mem_pending_reads <= $signed(perf_mem_pending_reads) + + `PERF_CTR_BITS'($signed(2'(mem_req_fire[i] && ~mem_bus_if[i].req_data.rw) - 2'(mem_rsp_fire[i]))); + end + end + end + + wire mem_rd_req_fire[`NUM_MEM_PORTS-1:0]; + wire mem_wr_req_fire[`NUM_MEM_PORTS-1:0]; + + for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin + assign mem_rd_req_fire[i] = mem_req_fire[i] && ~mem_bus_if[i].req_data.rw; + assign mem_wr_req_fire[i] = mem_req_fire[i] && mem_bus_if[i].req_data.rw; + end + + always @(posedge clk) begin + if (reset) begin + mem_perf <= '0; + end else begin + for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin + mem_perf.reads <= mem_perf.reads + `PERF_CTR_BITS'(mem_rd_req_fire[i]); + mem_perf.writes <= mem_perf.writes + `PERF_CTR_BITS'(mem_wr_req_fire[i]); + end + mem_perf.latency <= mem_perf.latency + perf_mem_pending_reads; + end + end + assign mem_perf_if.mem = mem_perf; + +`endif + +`ifdef DBG_TRACE_MEM + always @(posedge clk) begin + for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin + if (mem_req_fire[i]) begin + if (mem_req_rw[i]) + `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], mem_req_data[i])); + else + `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i])); + end + if (mem_rsp_fire[i]) begin + `TRACE(1, ("%d: MEM Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag[i], mem_rsp_data[i])); + end + end + end +`endif + +`ifdef SIMULATION + always @(posedge clk) begin + $fflush(); // flush stdout buffer + end +`endif + +endmodule diff --git a/hw/rtl/cache/VX_cache_wrap_l3.sv b/hw/rtl/cache/VX_cache_wrap_l3.sv new file mode 100644 index 000000000..9a8f1688f --- /dev/null +++ b/hw/rtl/cache/VX_cache_wrap_l3.sv @@ -0,0 +1,286 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_cache_define.vh" + +module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( + parameter `STRING INSTANCE_ID = "", + + parameter TAG_SEL_IDX = 0, + + // Number of Word requests per cycle + parameter NUM_REQS = 4, + + + // Size of cache in bytes + parameter CACHE_SIZE = 4096, + // Size of line inside a bank in bytes + parameter LINE_SIZE = 64, + // Number of banks + parameter NUM_BANKS = 1, + // Number of associative ways + parameter NUM_WAYS = 1, + // Size of a word in bytes + parameter WORD_SIZE = 4, + // Number of memory ports + parameter NUM_MEM_PORTS = 4, + + // Core Response Queue Size + parameter CRSQ_SIZE = 2, + // Miss Reserv Queue Knob + parameter MSHR_SIZE = 8, + // Memory Response Queue Size + parameter MRSQ_SIZE = 0, + // Memory Request Queue Size + parameter MREQ_SIZE = 4, + + // Enable cache writeable + parameter WRITE_ENABLE = 1, + + // Enable cache writeback + parameter WRITEBACK = 0, + + // Enable dirty bytes on writeback + parameter DIRTY_BYTES = 0, + + // Request debug identifier + parameter UUID_WIDTH = 0, + + // core request tag size + parameter TAG_WIDTH = UUID_WIDTH + 1, + + // enable bypass for non-cacheable addresses + parameter NC_ENABLE = 0, + + // Force bypass for all requests + parameter PASSTHRU = 0, + + // Core response output buffer + parameter CORE_OUT_BUF = 0, + + // Memory request output buffer + parameter MEM_OUT_BUF = 0 + ) ( + + input wire clk, + input wire reset, + + // PERF +`ifdef PERF_ENABLE + output cache_perf_t cache_perf, +`endif + + VX_mem_bus_if.slave core_bus_if [NUM_REQS], + VX_mem_bus_if.master mem_bus_if [NUM_MEM_PORTS] +); + + `STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter")) + + localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); + localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS; + + localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : + (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : + `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)); + + localparam NC_OR_BYPASS = (NC_ENABLE || PASSTHRU); + + VX_mem_bus_if #( + .DATA_SIZE (WORD_SIZE), + .TAG_WIDTH (TAG_WIDTH) + ) core_bus_cache_if[NUM_REQS](); + + VX_mem_bus_if #( + .DATA_SIZE (LINE_SIZE), + .TAG_WIDTH (CACHE_MEM_TAG_WIDTH) + ) mem_bus_cache_if[NUM_MEM_PORTS](); + + if (NC_OR_BYPASS) begin + + `RESET_RELAY (nc_bypass_reset, reset); + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + VX_cache_bypass #( + .NUM_REQS (NUM_REQS), + .TAG_SEL_IDX (TAG_SEL_IDX), + + .PASSTHRU (PASSTHRU), + .NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE), + + .WORD_SIZE (WORD_SIZE), + .LINE_SIZE (LINE_SIZE), + + .CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH), + .CORE_TAG_WIDTH (TAG_WIDTH), + + .MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH), + .MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH), + .MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH), + + .UUID_WIDTH (UUID_WIDTH), + + .CORE_OUT_BUF (CORE_OUT_BUF), + .MEM_OUT_BUF (MEM_OUT_BUF) + ) cache_bypass ( + .clk (clk), + .reset (nc_bypass_reset), + + .core_bus_in_if (core_bus_if), + .core_bus_out_if(core_bus_cache_if), + + .mem_bus_in_if (mem_bus_cache_if[i]), + .mem_bus_out_if (mem_bus_if[i]) + ); + end + + end else begin + + for (genvar i = 0; i < NUM_REQS; ++i) begin + `ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]); + end + + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + `ASSIGN_VX_MEM_BUS_IF (mem_bus_if[i], mem_bus_cache_if[i]); + end + end + + if (PASSTHRU != 0) begin + + for (genvar i = 0; i < NUM_REQS; ++i) begin + `UNUSED_VAR (core_bus_cache_if[i].req_valid) + `UNUSED_VAR (core_bus_cache_if[i].req_data) + assign core_bus_cache_if[i].req_ready = 0; + + assign core_bus_cache_if[i].rsp_valid = 0; + assign core_bus_cache_if[i].rsp_data = '0; + `UNUSED_VAR (core_bus_cache_if[i].rsp_ready) + end + + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + assign mem_bus_cache_if[i].req_valid = 0; + assign mem_bus_cache_if[i].req_data = '0; + `UNUSED_VAR (mem_bus_cache_if[i].req_ready) + + `UNUSED_VAR (mem_bus_cache_if[i].rsp_valid) + `UNUSED_VAR (mem_bus_cache_if[i].rsp_data) + assign mem_bus_cache_if[i].rsp_ready = 0; + end + + `ifdef PERF_ENABLE + assign cache_perf = '0; + `endif + + end else begin + + `RESET_RELAY (cache_reset, reset); + + VX_cache #( + .INSTANCE_ID (INSTANCE_ID), + .CACHE_SIZE (CACHE_SIZE), + .LINE_SIZE (LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .NUM_WAYS (NUM_WAYS), + .WORD_SIZE (WORD_SIZE), + .NUM_REQS (NUM_REQS), + .CRSQ_SIZE (CRSQ_SIZE), + .MSHR_SIZE (MSHR_SIZE), + .MRSQ_SIZE (MRSQ_SIZE), + .MREQ_SIZE (MREQ_SIZE), + .WRITE_ENABLE (WRITE_ENABLE), + .WRITEBACK (WRITEBACK), + .DIRTY_BYTES (DIRTY_BYTES), + .UUID_WIDTH (UUID_WIDTH), + .TAG_WIDTH (TAG_WIDTH), + .CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF), + .MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF) + ) cache ( + .clk (clk), + .reset (cache_reset), + `ifdef PERF_ENABLE + .cache_perf (cache_perf), + `endif + .core_bus_if (core_bus_cache_if), + .mem_bus_if (mem_bus_cache_if[0]) + ); + + end + +`ifdef DBG_TRACE_CACHE + + for (genvar i = 0; i < NUM_REQS; ++i) begin + wire [`UP(UUID_WIDTH)-1:0] core_req_uuid; + wire [`UP(UUID_WIDTH)-1:0] core_rsp_uuid; + + if (UUID_WIDTH != 0) begin + assign core_req_uuid = core_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; + assign core_rsp_uuid = core_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; + end else begin + assign core_req_uuid = 0; + assign core_rsp_uuid = 0; + end + + wire core_req_fire = core_bus_if[i].req_valid && core_bus_if[i].req_ready; + wire core_rsp_fire = core_bus_if[i].rsp_valid && core_bus_if[i].rsp_ready; + + always @(posedge clk) begin + if (core_req_fire) begin + if (core_bus_if[i].req_data.rw) + `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)); + else + `TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)); + end + if (core_rsp_fire) begin + `TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)); + end + end + end + + wire [NUM_MEM_PORTS-1:0][`UP(UUID_WIDTH)-1:0] mem_req_uuid; + wire [NUM_MEM_PORTS-1:0][`UP(UUID_WIDTH)-1:0] mem_rsp_uuid; + + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin + assign mem_req_uuid[i] = mem_bus_if[i].req_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; + assign mem_rsp_uuid[i] = mem_bus_if[i].rsp_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; + end else begin + assign mem_req_uuid[i] = 0; + assign mem_rsp_uuid[i] = 0; + end + end + + wire mem_req_fire [NUM_MEM_PORTS-1:0]; + wire mem_rsp_fire [NUM_MEM_PORTS-1:0]; + + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + assign mem_req_fire[i] = mem_bus_if[i].req_valid && mem_bus_if[i].req_ready; + assign mem_rsp_fire[i] = mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready; + end + + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + always @(posedge clk) begin + if (mem_req_fire[i]) begin + if (mem_bus_if[i].req_data.rw) + `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d) bank=%d\n", + $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, mem_req_uuid[i], i)); + else + `TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d) bank=%d\n", + $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag, mem_req_uuid[i], i)); + end + if (mem_rsp_fire[i]) begin + `TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n", + $time, INSTANCE_ID, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data, mem_rsp_uuid[i])); + end + end + end +`endif + +endmodule diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 3deffc759..197078813 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -43,7 +43,7 @@ ifdef AXI_BUS TOP = Vortex_axi CXXFLAGS += -DAXI_BUS else - TOP = Vortex + TOP = Vortex_hbm endif VL_FLAGS = --exe diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index e5e00f49e..7c812f7e8 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -17,8 +17,8 @@ #include "VVortex_axi.h" typedef VVortex_axi Device; #else -#include "VVortex.h" -typedef VVortex Device; +#include "VVortex_hbm.h" +typedef VVortex_hbm Device; #endif #ifdef VCD_OUTPUT @@ -123,6 +123,15 @@ public: tfp_->open("trace.vcd"); #endif + pending_mem_reqs_.resize(NUM_MEM_PORTS); + dram_queue_.resize(NUM_MEM_PORTS); + + mem_rd_rsp_active_.resize(NUM_MEM_PORTS); + mem_rd_rsp_ready_.resize(NUM_MEM_PORTS); + + mem_wr_rsp_active_.resize(NUM_MEM_PORTS); + mem_wr_rsp_ready_.resize(NUM_MEM_PORTS); + ram_ = nullptr; #ifndef NDEBUG @@ -210,16 +219,19 @@ private: print_bufs_.clear(); - pending_mem_reqs_.clear(); + for (int i = 0; i < NUM_MEM_PORTS; ++i) { - { - std::queue empty; - std::swap(dram_queue_, empty); + pending_mem_reqs_.at(i).clear(); + + { + std::queue empty; + std::swap(dram_queue_.at(i), empty); + } + + mem_rd_rsp_active_.at(i) = false; + mem_wr_rsp_active_.at(i) = false; } - mem_rd_rsp_active_ = false; - mem_wr_rsp_active_ = false; - this->mem_bus_reset(); this->dcr_bus_reset(); @@ -250,17 +262,19 @@ private: dram_sim_.tick(); - if (!dram_queue_.empty()) { - auto mem_req = dram_queue_.front(); - if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) { - auto orig_req = reinterpret_cast(arg); - if (orig_req->ready) { - delete orig_req; - } else { - orig_req->ready = true; + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + if (!dram_queue_.at(i).empty()) { + auto mem_req = dram_queue_.at(i).front(); + if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) { + auto orig_req = reinterpret_cast(arg); + if (orig_req->ready) { + delete orig_req; + } else { + orig_req->ready = true; + } + }, mem_req)) { + dram_queue_.at(i).pop(); } - }, mem_req)) { - dram_queue_.pop(); } } @@ -437,116 +451,126 @@ private: #else void mem_bus_reset() { - device_->mem_req_ready = 0; - device_->mem_rsp_valid = 0; + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + device_->mem_req_ready[i] = 0; + device_->mem_rsp_valid[i] = 0; + } } void mem_bus_eval(bool clk) { - if (!clk) { - mem_rd_rsp_ready_ = device_->mem_rsp_ready; - return; + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + if (!clk) { + mem_rd_rsp_ready_.at(i) = device_->mem_rsp_ready[i]; + return; + } } - if (ram_ == nullptr) { - device_->mem_req_ready = 0; - return; + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + if (ram_ == nullptr) { + device_->mem_req_ready[i] = 0; + return; + } } // process memory read responses - if (mem_rd_rsp_active_ - && device_->mem_rsp_valid && mem_rd_rsp_ready_) { - mem_rd_rsp_active_ = false; - } - if (!mem_rd_rsp_active_) { - if (!pending_mem_reqs_.empty() - && (*pending_mem_reqs_.begin())->ready) { - device_->mem_rsp_valid = 1; - auto mem_rsp_it = pending_mem_reqs_.begin(); - auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", mem_rsp->block[i]); + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + if (mem_rd_rsp_active_.at(i) + && device_->mem_rsp_valid[i] && mem_rd_rsp_ready_.at(i)) { + mem_rd_rsp_active_.at(i) = false; + } + if (!mem_rd_rsp_active_.at(i)) { + if (!pending_mem_reqs_.at(i).empty() + && (*pending_mem_reqs_.at(i).begin())->ready) { + device_->mem_rsp_valid[i] = 1; + auto mem_rsp_it = pending_mem_reqs_.at(i).begin(); + auto mem_rsp = *mem_rsp_it; + /* + printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); + for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { + printf("%02x", mem_rsp->block[i]); + } + printf("\n"); + */ + memcpy(VDataCast::get(device_->mem_rsp_data[i]), mem_rsp->block.data(), MEM_BLOCK_SIZE); + device_->mem_rsp_tag[i] = mem_rsp->tag; + pending_mem_reqs_.at(i).erase(mem_rsp_it); + mem_rd_rsp_active_.at(i) = true; + delete mem_rsp; + } else { + device_->mem_rsp_valid[i] = 0; } - printf("\n"); - */ - memcpy(VDataCast::get(device_->mem_rsp_data), mem_rsp->block.data(), MEM_BLOCK_SIZE); - device_->mem_rsp_tag = mem_rsp->tag; - pending_mem_reqs_.erase(mem_rsp_it); - mem_rd_rsp_active_ = true; - delete mem_rsp; - } else { - device_->mem_rsp_valid = 0; } } // process memory requests - if (device_->mem_req_valid && running_) { - uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE); - if (device_->mem_req_rw) { - auto byteen = device_->mem_req_byteen; - auto data = VDataCast::get(device_->mem_req_data); + for (int j = 0; j < NUM_MEM_PORTS; ++j) { + if (device_->mem_req_valid[j] && running_) { + uint64_t byte_addr = (device_->mem_req_addr[j] * MEM_BLOCK_SIZE); + if (device_->mem_req_rw[j]) { + auto byteen = device_->mem_req_byteen[j]; + auto data = VDataCast::get(device_->mem_req_data[j]); - if (byte_addr >= uint64_t(IO_COUT_ADDR) - && byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { - // process console output - for (int i = 0; i < IO_COUT_SIZE; i++) { - if ((byteen >> i) & 0x1) { - auto& ss_buf = print_bufs_[i]; - char c = data[i]; - ss_buf << c; - if (c == '\n') { - std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; - ss_buf.str(""); + if (byte_addr >= uint64_t(IO_COUT_ADDR) + && byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { + // process console output + for (int i = 0; i < IO_COUT_SIZE; i++) { + if ((byteen >> i) & 0x1) { + auto& ss_buf = print_bufs_[i]; + char c = data[i]; + ss_buf << c; + if (c == '\n') { + std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; + ss_buf.str(""); + } } } + } else { + // process writes + /* + printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr); + for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { + printf("%x", (int)((byteen >> (4 * i)) & 0xf)); + } + printf(", data=0x"); + for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { + printf("%d=%02x,", i, data[i]); + } + printf("\n"); + */ + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + (*ram_)[byte_addr + i] = data[i]; + } + } + + auto mem_req = new mem_req_t(); + mem_req->tag = device_->mem_req_tag[j]; + mem_req->addr = byte_addr; + mem_req->write = true; + mem_req->ready = true; + + // send dram request + dram_queue_.at(j).push(mem_req); } } else { - // process writes - /* - printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr); - for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { - printf("%x", (int)((byteen >> (4 * i)) & 0xf)); - } - printf(", data=0x"); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%d=%02x,", i, data[i]); - } - printf("\n"); - */ - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - (*ram_)[byte_addr + i] = data[i]; - } - } - + // process reads auto mem_req = new mem_req_t(); - mem_req->tag = device_->mem_req_tag; + mem_req->tag = device_->mem_req_tag[j]; mem_req->addr = byte_addr; - mem_req->write = true; - mem_req->ready = true; + mem_req->write = false; + mem_req->ready = false; + ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE); + pending_mem_reqs_.at(j).emplace_back(mem_req); + + //printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag); // send dram request - dram_queue_.push(mem_req); + dram_queue_.at(j).push(mem_req); } - } else { - // process reads - auto mem_req = new mem_req_t(); - mem_req->tag = device_->mem_req_tag; - mem_req->addr = byte_addr; - mem_req->write = false; - mem_req->ready = false; - ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE); - pending_mem_reqs_.emplace_back(mem_req); - - //printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag); - - // send dram request - dram_queue_.push(mem_req); } - } - device_->mem_req_ready = running_; + device_->mem_req_ready[j] = running_; + } } #endif @@ -583,9 +607,9 @@ private: std::unordered_map print_bufs_; - std::list pending_mem_reqs_; + std::vector> pending_mem_reqs_; - std::queue dram_queue_; + std::vector> dram_queue_; DramSim dram_sim_; @@ -597,11 +621,11 @@ private: RAM* ram_; - bool mem_rd_rsp_active_; - bool mem_rd_rsp_ready_; + std::vector mem_rd_rsp_active_; + std::vector mem_rd_rsp_ready_; - bool mem_wr_rsp_active_; - bool mem_wr_rsp_ready_; + std::vector mem_wr_rsp_active_; + std::vector mem_wr_rsp_ready_; bool running_; }; From df99b9da0e28dd07ed9c7f47d939d052b0a9fed8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 22 Aug 2024 16:29:27 -0700 Subject: [PATCH 154/488] minor update --- hw/syn/xilinx/dut/project.tcl | 2 +- hw/syn/yosys/synth.sh | 27 +++++++++++++++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/hw/syn/xilinx/dut/project.tcl b/hw/syn/xilinx/dut/project.tcl index bee841d79..05b76d21f 100644 --- a/hw/syn/xilinx/dut/project.tcl +++ b/hw/syn/xilinx/dut/project.tcl @@ -59,7 +59,7 @@ foreach def $vdefines_list { } # Synthesis -synth_design -top $top_module -include_dirs $vincludes_list -flatten_hierarchy none +synth_design -top $top_module -include_dirs $vincludes_list -mode out_of_context -flatten_hierarchy none write_checkpoint -force post_synth.dcp report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages diff --git a/hw/syn/yosys/synth.sh b/hw/syn/yosys/synth.sh index 79708b189..b44f16e6b 100755 --- a/hw/syn/yosys/synth.sh +++ b/hw/syn/yosys/synth.sh @@ -20,13 +20,15 @@ # exit when any command fails set -e +library="" +sdc_file="" source="" top_level="" dir_list=() inc_args="" macro_args="" no_warnings=1 -process="elaborate,netlist,techmap,verilog" +process="elaborate,netlist,techmap,verilog,link" declare -a excluded_warnings=("Resizing cell port") @@ -66,8 +68,14 @@ checkErrors() usage() { echo "$0 usage:" && grep " .)\ #" $0; exit 0; } [ $# -eq 0 ] && usage -while getopts "s:t:I:D:P:Wh" arg; do +while getopts "c:l:s:t:I:D:P:Wh" arg; do case $arg in + l) # library + library=${OPTARG} + ;; + c) # SDC constraints + sdc_file=${OPTARG} + ;; s) # source source=${OPTARG} ;; @@ -95,6 +103,16 @@ while getopts "s:t:I:D:P:Wh" arg; do done { + # read device library + if [ -n "$library" ]; then + echo "read_liberty $library" + fi + + # read design constraints + if [ -n "$sdc_file" ]; then + echo "read_sdc $sdc_file" + fi + # read design sources for dir in "${dir_list[@]}" do @@ -117,6 +135,11 @@ done echo "synth -top $top_level" fi + # link design + if echo "$process" | grep -q "link"; then + echo "link_design -top $top_level" + fi + # convert to netlist if echo "$process" | grep -q "netlist"; then echo "proc; opt" From 6eeb8eac0f3ed3a8242bf7b76feb19ec7b6402b2 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 23 Aug 2024 00:54:48 -0700 Subject: [PATCH 155/488] minor update --- hw/rtl/libs/VX_stream_buffer.sv | 84 ++++++++++++++--------------- hw/syn/xilinx/dut/core/Makefile | 2 +- hw/syn/xilinx/dut/fpu/Makefile | 2 +- hw/syn/xilinx/dut/issue/Makefile | 2 +- hw/syn/xilinx/dut/project.xdc | 5 +- hw/syn/xilinx/dut/top/Makefile | 2 +- hw/syn/xilinx/dut/unittest/Makefile | 2 +- hw/syn/xilinx/dut/vortex/Makefile | 2 +- 8 files changed, 49 insertions(+), 52 deletions(-) diff --git a/hw/rtl/libs/VX_stream_buffer.sv b/hw/rtl/libs/VX_stream_buffer.sv index bebe8ec71..ea4561933 100644 --- a/hw/rtl/libs/VX_stream_buffer.sv +++ b/hw/rtl/libs/VX_stream_buffer.sv @@ -1,18 +1,18 @@ // Copyright 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// A stream elastic buffer operates at full-bandwidth where push and pop can happen simultaneously +// A stream elastic buffer operates at full-bandwidth where fire_in and fire_out can happen simultaneously // It has the following benefits: // + full-bandwidth throughput // + ready_in and ready_out are decoupled @@ -27,21 +27,21 @@ module VX_stream_buffer #( parameter DATAW = 1, parameter OUT_REG = 0, parameter PASSTHRU = 0 -) ( +) ( input wire clk, input wire reset, input wire valid_in, - output wire ready_in, + output wire ready_in, input wire [DATAW-1:0] data_in, output wire [DATAW-1:0] data_out, input wire ready_out, output wire valid_out -); +); if (PASSTHRU != 0) begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) assign ready_in = ready_out; - assign valid_out = valid_in; + assign valid_out = valid_in; assign data_out = data_in; end else begin if (OUT_REG != 0) begin @@ -49,77 +49,71 @@ module VX_stream_buffer #( reg [DATAW-1:0] data_out_r; reg [DATAW-1:0] buffer; reg valid_out_r; - reg use_buffer; - - wire push = valid_in && ready_in; - wire stall_out = valid_out_r && ~ready_out; - + reg no_buffer; + + wire fire_in = valid_in && ready_in; + wire flow_out = ready_out || ~valid_out_r; + always @(posedge clk) begin if (reset) begin - valid_out_r <= 0; - use_buffer <= 0; + valid_out_r <= 0; + no_buffer <= 1; end else begin if (ready_out) begin - use_buffer <= 0; + no_buffer <= 1; end else if (valid_in && valid_out) begin - use_buffer <= 1; + no_buffer <= 0; end - if (~stall_out) begin - valid_out_r <= valid_in || use_buffer; + if (flow_out) begin + valid_out_r <= valid_in || ~no_buffer; end end end always @(posedge clk) begin - if (push) begin + if (fire_in) begin buffer <= data_in; end - if (~stall_out) begin - data_out_r <= use_buffer ? buffer : data_in; + if (flow_out) begin + data_out_r <= no_buffer ? data_in : buffer; end end - assign ready_in = ~use_buffer; + assign ready_in = no_buffer; assign valid_out = valid_out_r; assign data_out = data_out_r; end else begin - reg [1:0][DATAW-1:0] shift_reg; - reg valid_out_r, ready_in_r, rd_ptr_r; + reg [DATAW-1:0] shift_reg [1:0]; + reg [1:0] fifo_state; - wire push = valid_in && ready_in; - wire pop = valid_out_r && ready_out; + wire fire_in = valid_in && ready_in; + wire fire_out = valid_out && ready_out; always @(posedge clk) begin if (reset) begin - valid_out_r <= 0; - ready_in_r <= 1; - rd_ptr_r <= 1; + fifo_state <= 2'b00; end else begin - if (push) begin - if (!pop) begin - ready_in_r <= rd_ptr_r; - valid_out_r <= 1; - end - end else if (pop) begin - ready_in_r <= 1; - valid_out_r <= rd_ptr_r; - end - rd_ptr_r <= rd_ptr_r ^ (push ^ pop); - end + case ({fire_in, fire_out}) + 2'b10: fifo_state <= {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10 + 2'b01: fifo_state <= {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00 + default: fifo_state <= fifo_state; + endcase + end end always @(posedge clk) begin - if (push) begin + if (fire_in) begin shift_reg[1] <= shift_reg[0]; shift_reg[0] <= data_in; end end - assign ready_in = ready_in_r; - assign valid_out = valid_out_r; - assign data_out = shift_reg[rd_ptr_r]; + assign ready_in = ~fifo_state[1]; + assign valid_out = fifo_state[0]; + assign data_out = shift_reg[fifo_state[1]]; + end end diff --git a/hw/syn/xilinx/dut/core/Makefile b/hw/syn/xilinx/dut/core/Makefile index eeeaa5233..86bb0b53c 100644 --- a/hw/syn/xilinx/dut/core/Makefile +++ b/hw/syn/xilinx/dut/core/Makefile @@ -11,4 +11,4 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src endif -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/fpu/Makefile b/hw/syn/xilinx/dut/fpu/Makefile index b7826dc68..133a8a4e9 100644 --- a/hw/syn/xilinx/dut/fpu/Makefile +++ b/hw/syn/xilinx/dut/fpu/Makefile @@ -8,4 +8,4 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src endif -RTL_INCLUDE = $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(IP_CACHE_DIR) +RTL_INCLUDE = $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces diff --git a/hw/syn/xilinx/dut/issue/Makefile b/hw/syn/xilinx/dut/issue/Makefile index c1804a398..bb93f44d2 100644 --- a/hw/syn/xilinx/dut/issue/Makefile +++ b/hw/syn/xilinx/dut/issue/Makefile @@ -11,4 +11,4 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src endif -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem $(FPU_INCLUDE) -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem $(FPU_INCLUDE) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/project.xdc b/hw/syn/xilinx/dut/project.xdc index 8c74ebb4a..f786e7837 100644 --- a/hw/syn/xilinx/dut/project.xdc +++ b/hw/syn/xilinx/dut/project.xdc @@ -1 +1,4 @@ -## empty \ No newline at end of file +set CLK_FREQ_MHZ 300 +set clk_port_name clk +set clk_port [get_ports $clk_port_name] +create_clock -name core_clock -period [expr 1000.0 / $CLK_FREQ_MHZ] $clk_port \ No newline at end of file diff --git a/hw/syn/xilinx/dut/top/Makefile b/hw/syn/xilinx/dut/top/Makefile index 341690206..bc55224f6 100644 --- a/hw/syn/xilinx/dut/top/Makefile +++ b/hw/syn/xilinx/dut/top/Makefile @@ -29,4 +29,4 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src endif -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(AFU_DIR)/ccip -I$(IP_CACHE_DIR) $(FPU_INCLUDE) +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(AFU_DIR)/ccip $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/dut/unittest/Makefile b/hw/syn/xilinx/dut/unittest/Makefile index 2bfb18e4e..061e75441 100644 --- a/hw/syn/xilinx/dut/unittest/Makefile +++ b/hw/syn/xilinx/dut/unittest/Makefile @@ -8,4 +8,4 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src endif -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/vortex/Makefile b/hw/syn/xilinx/dut/vortex/Makefile index 7429df414..ee49be436 100644 --- a/hw/syn/xilinx/dut/vortex/Makefile +++ b/hw/syn/xilinx/dut/vortex/Makefile @@ -13,4 +13,4 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src endif -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) From 66fd2d4e2d19dde471f2089d644fc882e7e68014 Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Fri, 23 Aug 2024 16:42:31 -0400 Subject: [PATCH 156/488] update ci --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 203612974..c51abf4e3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -202,8 +202,8 @@ jobs: - name: Run Build run: | TOOLDIR=$PWD/tools - mkdir -p build${{ matrix.xlen }} - cd build${{ matrix.xlen }} + mkdir -p build${{ matrix.xlen }}-vm + cd build${{ matrix.xlen }}-vm ../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }} --vm_enable=1 source ci/toolchain_env.sh make software -s > /dev/null From ea9560b33b863918f5cc3f1f6309d9b54baebdbf Mon Sep 17 00:00:00 2001 From: Hanran Wu Date: Fri, 23 Aug 2024 17:44:24 -0400 Subject: [PATCH 157/488] merge --- .github/workflows/ci.yml | 270 + .gitignore | 1 + .gitmodules | 3 +- .travis.yml | 118 - Makefile.in | 23 +- README.md | 10 +- ci/regression.sh.in | 232 +- ci/system_updates.sh | 27 + ci/toolchain_env.sh.in | 4 +- ci/trace_csv.py | 310 +- config.mk.in | 6 +- configure | 15 +- docs/altera_fpga_guide.md | 79 + docs/index.md | 5 +- docs/xilinx_fpga_guide.md | 36 + hw/Makefile | 5 +- hw/dpi/float_dpi.vh | 6 +- hw/dpi/util_dpi.vh | 2 - hw/rtl/VX_cluster.sv | 20 +- hw/rtl/VX_config.vh | 29 +- hw/rtl/VX_define.vh | 70 +- hw/rtl/VX_gpu_pkg.sv | 136 +- hw/rtl/VX_platform.vh | 10 +- hw/rtl/VX_socket.sv | 33 +- hw/rtl/VX_types.vh | 47 +- hw/rtl/Vortex.sv | 20 +- hw/rtl/afu/opae/ccip_std_afu.sv | 9 +- hw/rtl/afu/opae/vortex_afu.sv | 12 +- hw/rtl/afu/xrt/VX_afu_wrap.sv | 37 +- hw/rtl/cache/VX_bank_flush.sv | 109 + hw/rtl/cache/VX_cache.sv | 348 +- hw/rtl/cache/VX_cache_bank.sv | 355 +- hw/rtl/cache/VX_cache_bypass.sv | 91 +- hw/rtl/cache/VX_cache_cluster.sv | 35 +- hw/rtl/cache/VX_cache_data.sv | 128 +- hw/rtl/cache/VX_cache_define.vh | 26 +- hw/rtl/cache/VX_cache_flush.sv | 154 + hw/rtl/cache/VX_cache_init.sv | 1 + hw/rtl/cache/VX_cache_mshr.sv | 27 +- hw/rtl/cache/VX_cache_tags.sv | 90 +- hw/rtl/cache/VX_cache_wrap.sv | 54 +- hw/rtl/core/VX_alu_int.sv | 14 +- hw/rtl/core/VX_alu_muldiv.sv | 8 +- hw/rtl/core/VX_alu_unit.sv | 38 +- hw/rtl/core/VX_commit.sv | 53 +- hw/rtl/core/VX_core.sv | 92 +- hw/rtl/core/VX_core_top.sv | 1 + hw/rtl/core/VX_csr_data.sv | 28 +- hw/rtl/core/VX_csr_unit.sv | 6 +- hw/rtl/core/VX_dcr_data.sv | 3 +- hw/rtl/core/VX_decode.sv | 15 +- hw/rtl/core/VX_dispatch.sv | 136 +- hw/rtl/core/VX_execute.sv | 8 +- hw/rtl/core/VX_fetch.sv | 82 +- hw/rtl/core/VX_fpu_unit.sv | 12 +- hw/rtl/core/VX_ibuffer.sv | 48 +- hw/rtl/core/VX_issue.sv | 193 +- hw/rtl/core/VX_issue_slice.sv | 159 + hw/rtl/core/VX_issue_top.sv | 132 + hw/rtl/core/VX_lmem_unit.sv | 143 +- hw/rtl/core/VX_lsu_adapter.sv | 24 +- hw/rtl/core/VX_lsu_slice.sv | 84 +- hw/rtl/core/VX_lsu_unit.sv | 26 +- hw/rtl/core/VX_operands.sv | 293 +- hw/rtl/core/VX_schedule.sv | 64 +- hw/rtl/core/VX_scoreboard.sv | 286 +- hw/rtl/core/VX_sfu_unit.sv | 6 +- hw/rtl/core/VX_split_join.sv | 4 +- hw/rtl/core/VX_trace_pkg.sv | 399 + hw/rtl/core/VX_wctl_unit.sv | 4 +- hw/rtl/fpu/VX_fpu_dpi.sv | 160 +- hw/rtl/fpu/VX_fpu_dsp.sv | 4 +- hw/rtl/fpu/VX_fpu_fma.sv | 48 +- hw/rtl/fpu/VX_fpu_fpnew.sv | 2 +- hw/rtl/interfaces/VX_commit_sched_if.sv | 13 +- hw/rtl/interfaces/VX_decode_if.sv | 9 +- hw/rtl/interfaces/VX_pipeline_perf_if.sv | 42 +- hw/rtl/libs/VX_avs_adapter.sv | 50 +- hw/rtl/libs/VX_cyclic_arbiter.sv | 25 +- hw/rtl/libs/VX_dp_ram.sv | 57 +- hw/rtl/libs/VX_dp_ram_rst.sv | 115 + hw/rtl/libs/VX_elastic_buffer.sv | 24 +- hw/rtl/libs/VX_fair_arbiter.sv | 41 +- hw/rtl/libs/VX_fifo_queue.sv | 84 +- hw/rtl/libs/VX_generic_arbiter.sv | 52 +- hw/rtl/libs/VX_matrix_arbiter.sv | 56 +- hw/rtl/libs/VX_mem_coalescer.sv | 222 +- hw/rtl/libs/VX_mem_scheduler.sv | 119 +- hw/rtl/libs/VX_onehot_mux.sv | 141 +- hw/rtl/libs/VX_pe_serializer.sv | 65 +- hw/rtl/libs/VX_pending_size.sv | 96 +- hw/rtl/libs/VX_priority_arbiter.sv | 10 +- hw/rtl/libs/VX_rr_arbiter.sv | 319 +- hw/rtl/libs/VX_sp_ram.sv | 14 +- hw/rtl/libs/VX_stream_arb.sv | 96 +- hw/rtl/libs/VX_stream_pack.sv | 73 +- hw/rtl/libs/VX_stream_xbar.sv | 29 +- hw/rtl/mem/VX_local_mem.sv | 65 +- hw/rtl/mem/VX_mem_switch.sv | 27 +- hw/syn/altera/opae/Makefile | 15 +- hw/syn/altera/opae/run_ase.sh | 12 +- hw/syn/altera/quartus/Makefile | 7 +- hw/syn/altera/quartus/common.mk | 5 +- hw/syn/altera/quartus/issue/Makefile | 14 + hw/syn/altera/quartus/project.sdc | 2 +- hw/syn/altera/quartus/top/Makefile | 14 + hw/syn/xilinx/README | 12 +- hw/syn/xilinx/test/Makefile | 1 - hw/syn/xilinx/xrt/Makefile | 16 +- hw/syn/yosys/Makefile | 12 +- hw/syn/yosys/synth.sh | 26 +- hw/unittest/Makefile | 5 +- hw/unittest/cache/cachesim.cpp | 103 +- hw/unittest/cache/cachesim.h | 37 +- hw/unittest/common.mk | 10 +- hw/unittest/core_top/Makefile | 2 +- hw/unittest/issue_top/Makefile | 26 + hw/unittest/issue_top/main.cpp | 49 + hw/unittest/mem_streamer/memsim.cpp | 25 +- hw/unittest/mem_streamer/memsim.h | 8 +- kernel/Makefile | 4 + miscs/docker/Dockerfile.ubuntu | 11 +- runtime/Makefile | 2 + runtime/include/vortex.h | 38 +- runtime/opae/Makefile | 8 +- runtime/opae/driver.cpp | 8 +- runtime/opae/driver.h | 23 +- runtime/opae/vortex.cpp | 4 +- runtime/rtlsim/vortex.cpp | 4 + runtime/simx/Makefile | 4 + runtime/simx/vortex.cpp | 2 +- runtime/stub/utils.cpp | 102 +- runtime/stub/vortex.cpp | 19 +- runtime/xrt/vortex.cpp | 10 +- sim/Makefile | 3 + sim/common.mk | 5 +- sim/common/bitvector.h | 314 + sim/common/dram_sim.cpp | 120 + sim/common/dram_sim.h | 36 + sim/common/util.h | 9 +- sim/opaesim/Makefile | 17 +- sim/opaesim/opae_sim.cpp | 175 +- sim/rtlsim/Makefile | 14 +- sim/rtlsim/main.cpp | 4 +- sim/rtlsim/processor.cpp | 231 +- sim/simx/Makefile | 16 +- sim/simx/arch.h | 24 - sim/simx/cache_sim.cpp | 33 +- sim/simx/cache_sim.h | 14 +- sim/simx/cluster.cpp | 28 +- sim/simx/constants.h | 18 +- sim/simx/core.cpp | 174 +- sim/simx/core.h | 8 +- sim/simx/decode.cpp | 2 +- sim/simx/emulator.cpp | 10 +- sim/simx/emulator.h | 1 + sim/simx/execute.cpp | 153 +- sim/simx/func_unit.cpp | 149 +- sim/simx/func_unit.h | 28 +- sim/simx/instr_trace.h | 41 +- sim/simx/local_mem.cpp | 4 +- sim/simx/main.cpp | 4 +- sim/simx/mem_coalescer.cpp | 185 +- sim/simx/mem_coalescer.h | 25 +- sim/simx/mem_sim.cpp | 103 +- sim/simx/operand.h | 52 +- sim/simx/processor.cpp | 15 +- sim/simx/scoreboard.h | 102 +- sim/simx/socket.cpp | 16 +- sim/simx/types.cpp | 155 +- sim/simx/types.h | 108 +- sim/xrtsim/Makefile | 17 +- sim/xrtsim/vortex_afu_shim.sv | 1 - sim/xrtsim/xrt_sim.cpp | 160 +- tests/opencl/Makefile | 21 - tests/opencl/bfs/Makefile | 2 +- tests/opencl/bfs/graph32.txt | 225 + tests/opencl/bfs/graph4k.txt | 28677 +++++++++++++++++++++ tests/opencl/lbm/main.cc | 11 +- tests/regression/Makefile | 21 +- tests/regression/stencil3d/Makefile | 14 + tests/regression/stencil3d/common.h | 18 + tests/regression/stencil3d/kernel.cpp | 58 + tests/regression/stencil3d/main.cpp | 328 + third_party/Makefile | 8 +- third_party/ramulator | 2 +- 186 files changed, 36003 insertions(+), 4008 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .travis.yml create mode 100755 ci/system_updates.sh create mode 100644 docs/altera_fpga_guide.md create mode 100644 docs/xilinx_fpga_guide.md create mode 100644 hw/rtl/cache/VX_bank_flush.sv create mode 100644 hw/rtl/cache/VX_cache_flush.sv create mode 100644 hw/rtl/core/VX_issue_slice.sv create mode 100644 hw/rtl/core/VX_issue_top.sv create mode 100644 hw/rtl/core/VX_trace_pkg.sv create mode 100644 hw/rtl/libs/VX_dp_ram_rst.sv create mode 100644 hw/syn/altera/quartus/issue/Makefile create mode 100644 hw/unittest/issue_top/Makefile create mode 100644 hw/unittest/issue_top/main.cpp create mode 100644 sim/common/bitvector.h create mode 100644 sim/common/dram_sim.cpp create mode 100644 sim/common/dram_sim.h create mode 100644 tests/opencl/bfs/graph32.txt create mode 100755 tests/opencl/bfs/graph4k.txt create mode 100644 tests/regression/stencil3d/Makefile create mode 100644 tests/regression/stencil3d/common.h create mode 100644 tests/regression/stencil3d/kernel.cpp create mode 100644 tests/regression/stencil3d/main.cpp diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..c51abf4e3 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,270 @@ +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: CI + +on: [push, pull_request] + +jobs: + setup: + runs-on: ubuntu-20.04 + + steps: + - name: Checkout code + uses: actions/checkout@v2 + with: + submodules: recursive + + - name: Cache Toolchain Directory + id: cache-toolchain + uses: actions/cache@v2 + with: + path: tools + key: ${{ runner.os }}-toolchain-v0.1 + restore-keys: | + ${{ runner.os }}-toolchain- + + - name: Cache Third Party Directory + id: cache-thirdparty + uses: actions/cache@v2 + with: + path: third_party + key: ${{ runner.os }}-thirdparty-v0.1 + restore-keys: | + ${{ runner.os }}-thirdparty- + + - name: Install Dependencies + if: steps.cache-toolchain.outputs.cache-hit != 'true' || steps.cache-thirdparty.outputs.cache-hit != 'true' + run: | + sudo bash ./ci/system_updates.sh + + - name: Setup Toolchain + if: steps.cache-toolchain.outputs.cache-hit != 'true' + run: | + TOOLDIR=$PWD/tools + mkdir -p build + cd build + ../configure --tooldir=$TOOLDIR + ci/toolchain_install.sh --all + + - name: Setup Third Party + if: steps.cache-thirdparty.outputs.cache-hit != 'true' + run: | + make -C third_party > /dev/null + + # build: + # runs-on: ubuntu-20.04 + # needs: setup + # strategy: + # matrix: + # xlen: [32, 64] + + # steps: + # - name: Checkout code + # uses: actions/checkout@v2 + + # - name: Install Dependencies + # run: | + # sudo bash ./ci/system_updates.sh + + # - name: Cache Toolchain Directory + # id: cache-toolchain + # uses: actions/cache@v2 + # with: + # path: tools + # key: ${{ runner.os }}-toolchain-v0.1 + # restore-keys: | + # ${{ runner.os }}-toolchain- + + # - name: Cache Third Party Directory + # id: cache-thirdparty + # uses: actions/cache@v2 + # with: + # path: third_party + # key: ${{ runner.os }}-thirdparty-v0.1 + # restore-keys: | + # ${{ runner.os }}-thirdparty- + + # - name: Run Build + # run: | + # TOOLDIR=$PWD/tools + # mkdir -p build${{ matrix.xlen }} + # cd build${{ matrix.xlen }} + # ../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }} + # source ci/toolchain_env.sh + # make software -s > /dev/null + # make tests -s > /dev/null + + # - name: Upload Build Artifact + # uses: actions/upload-artifact@v2 + # with: + # name: build-${{ matrix.xlen }} + # path: build${{ matrix.xlen }} + + # tests: + # runs-on: ubuntu-20.04 + # needs: build + # strategy: + # matrix: + # name: [regression, opencl, config1, config2, debug, stress] + # xlen: [32, 64] + + # steps: + # - name: Checkout code + # uses: actions/checkout@v2 + + # - name: Install Dependencies + # run: | + # sudo bash ./ci/system_updates.sh + + # - name: Cache Toolchain Directory + # id: cache-toolchain + # uses: actions/cache@v2 + # with: + # path: tools + # key: ${{ runner.os }}-toolchain-v0.1 + # restore-keys: | + # ${{ runner.os }}-toolchain- + + # - name: Cache Third Party Directory + # id: cache-thirdparty + # uses: actions/cache@v2 + # with: + # path: third_party + # key: ${{ runner.os }}-thirdparty-v0.1 + # restore-keys: | + # ${{ runner.os }}-thirdparty- + + # - name: Download Build Artifact + # uses: actions/download-artifact@v2 + # with: + # name: build-${{ matrix.xlen }} + # path: build${{ matrix.xlen }} + + # - name: Run tests + # run: | + # cd build${{ matrix.xlen }} + # source ci/toolchain_env.sh + # chmod -R +x . # Ensure all files have executable permissions + # if [ "${{ matrix.name }}" == "regression" ]; then + # ./ci/regression.sh --unittest + # ./ci/regression.sh --isa + # ./ci/regression.sh --kernel + # ./ci/regression.sh --synthesis + # ./ci/regression.sh --regression + # else + # ./ci/regression.sh --${{ matrix.name }} + # fi + + build_vm: + runs-on: ubuntu-20.04 + needs: setup + strategy: + matrix: + xlen: [32, 64] + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Install Dependencies + run: | + sudo bash ./ci/system_updates.sh + + - name: Cache Toolchain Directory + id: cache-toolchain + uses: actions/cache@v2 + with: + path: tools + key: ${{ runner.os }}-toolchain-v0.1 + restore-keys: | + ${{ runner.os }}-toolchain- + + - name: Cache Third Party Directory + id: cache-thirdparty + uses: actions/cache@v2 + with: + path: third_party + key: ${{ runner.os }}-thirdparty-v0.1 + restore-keys: | + ${{ runner.os }}-thirdparty- + + - name: Run Build + run: | + TOOLDIR=$PWD/tools + mkdir -p build${{ matrix.xlen }}-vm + cd build${{ matrix.xlen }}-vm + ../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }} --vm_enable=1 + source ci/toolchain_env.sh + make software -s > /dev/null + make tests -s > /dev/null + + - name: Upload Build Artifact + uses: actions/upload-artifact@v2 + with: + name: build-${{ matrix.xlen }}-vm + path: build${{ matrix.xlen }}-vm + + test_vm: + runs-on: ubuntu-20.04 + needs: build_vm + strategy: + matrix: + xlen: [32, 64] + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Install Dependencies + run: | + sudo bash ./ci/system_updates.sh + + - name: Cache Toolchain Directory + id: cache-toolchain + uses: actions/cache@v2 + with: + path: tools + key: ${{ runner.os }}-toolchain-v0.1 + restore-keys: | + ${{ runner.os }}-toolchain- + + - name: Cache Third Party Directory + id: cache-thirdparty + uses: actions/cache@v2 + with: + path: third_party + key: ${{ runner.os }}-thirdparty-v0.1 + restore-keys: | + ${{ runner.os }}-thirdparty- + + - name: Download Build Artifact + uses: actions/download-artifact@v2 + with: + name: build-${{ matrix.xlen }}-vm + path: build${{ matrix.xlen }}-vm + + - name: Run tests + run: | + cd build${{ matrix.xlen }}-vm + source ci/toolchain_env.sh + chmod -R +x . # Ensure all files have executable permissions + ./ci/regression.sh --vm + + complete: + runs-on: ubuntu-20.04 + needs: test_vm + + steps: + - name: Check Completion + run: echo "All matrix jobs passed" \ No newline at end of file diff --git a/.gitignore b/.gitignore index ca68f0eb2..43388e9cb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /build* /.vscode +*.cache *.code-workspace diff --git a/.gitmodules b/.gitmodules index 1a002355f..df3ca47e2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -6,5 +6,4 @@ url = https://github.com/ucb-bar/berkeley-softfloat-3.git [submodule "third_party/ramulator"] path = third_party/ramulator - url = https://github.com/CMU-SAFARI/ramulator.git - ignore = dirty + url = https://github.com/CMU-SAFARI/ramulator2.git diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 57098c8f0..000000000 --- a/.travis.yml +++ /dev/null @@ -1,118 +0,0 @@ -language: cpp -dist: focal -os: linux -compiler: gcc - -addons: - apt: - packages: - - build-essential - - valgrind - - libstdc++6 - - binutils - - python - - uuid-dev - -env: - global: - - TOOLDIR=$HOME/tools - -cache: - directories: - - $TOOLDIR - - $HOME/third_party - - $HOME/build32 - - $HOME/build64 - -before_install: - - if [ ! -d "$TOOLDIR" ] || [ -z "$(ls -A $TOOLDIR)" ] || [ "$(cat "$TOOLDIR/version.txt")" != "v0.4" ]; then - rm -rf $TOOLDIR; - mkdir -p $TRAVIS_BUILD_DIR/build && cd $TRAVIS_BUILD_DIR/build; - ../configure --tooldir=$TOOLDIR; - ci/toolchain_install.sh --all; - echo "v0.3" > "$TOOLDIR/version.txt"; - else - echo "using existing tooldir build"; - fi - - if [ ! -d "$HOME/third_party" ] || [ -z "$(ls -A $HOME/third_party)" ] || [ "$(cat "$HOME/third_party/version.txt")" != "v0.2" ]; then - cd $TRAVIS_BUILD_DIR; - make -C third_party > /dev/null; - echo "v0.2" > "third_party/version.txt"; - cp -rf third_party $HOME; - else - echo "using existing third_party build"; - cp -rf $HOME/third_party $TRAVIS_BUILD_DIR; - fi - -install: - - if [ ! -d "$HOME/build$XLEN" ] || [ -z "$(ls -A $HOME/build$XLEN)" ] || [ "$(cat "$HOME/build$XLEN/version.txt")" != "$TRAVIS_COMMIT" ]; then - mkdir -p $TRAVIS_BUILD_DIR/build$XLEN && cd $TRAVIS_BUILD_DIR/build$XLEN; - ../configure --tooldir=$TOOLDIR --xlen=$XLEN; - source ci/toolchain_env.sh; - make build -s > /dev/null; - echo "$TRAVIS_COMMIT" > version.txt; - cp -rf $TRAVIS_BUILD_DIR/build$XLEN $HOME; - else - echo "using existing build for commit $TRAVIS_COMMIT"; - cp -rf $HOME/build$XLEN $TRAVIS_BUILD_DIR; - fi - -before_script: - - cd $TRAVIS_BUILD_DIR/build$XLEN - - source ci/toolchain_env.sh - -stages: - - test - -jobs: - include: - - stage: test - name: regression32 - env: XLEN=32 - script: - - ./ci/travis_run.py ./ci/regression.sh --unittest - - ./ci/travis_run.py ./ci/regression.sh --isa - - ./ci/travis_run.py ./ci/regression.sh --kernel - - ./ci/travis_run.py ./ci/regression.sh --synthesis - - ./ci/travis_run.py ./ci/regression.sh --regression - - ./ci/travis_run.py ./ci/regression.sh --opencl - - - stage: test - name: regression64 - env: XLEN=64 - script: - - ./ci/travis_run.py ./ci/regression.sh --isa - - ./ci/travis_run.py ./ci/regression.sh --kernel - - ./ci/travis_run.py ./ci/regression.sh --synthesis - - ./ci/travis_run.py ./ci/regression.sh --regression - - ./ci/travis_run.py ./ci/regression.sh --opencl - - - stage: test - name: config - env: XLEN=32 - script: - - ./ci/travis_run.py ./ci/regression.sh --cluster - - ./ci/travis_run.py ./ci/regression.sh --config - - - stage: test - name: debug - env: XLEN=32 - script: - - ./ci/travis_run.py ./ci/regression.sh --debug - - ./ci/travis_run.py ./ci/regression.sh --stress - - - stage: test - name: virtual_memory - env: XLEN=32 - env: VM_DISABLE=1 - script: - - ./ci/travis_run.py ./ci/regression.sh --regression - - ./ci/travis_run.py ./ci/regression.sh --opencl - - - stage: test - name: virtual_memory - env: XLEN=64 - env: VM_DISABLE=1 - script: - - ./ci/travis_run.py ./ci/regression.sh --regression - - ./ci/travis_run.py ./ci/regression.sh --opencl \ No newline at end of file diff --git a/Makefile.in b/Makefile.in index ea572e70e..7f594747a 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1,5 +1,15 @@ include config.mk +.PHONY: build software tests + +vm: + $(MAKE) -C $(VORTEX_HOME)/third_party + $(MAKE) -C hw + $(MAKE) -C sim simx + $(MAKE) -C kernel + $(MAKE) -C runtime vm + $(MAKE) -C tests + all: $(MAKE) -C $(VORTEX_HOME)/third_party $(MAKE) -C hw @@ -15,13 +25,24 @@ build: $(MAKE) -C runtime $(MAKE) -C tests -clean: +software: + $(MAKE) -C hw + $(MAKE) -C kernel + $(MAKE) -C runtime/stub + +tests: + $(MAKE) -C tests + +clean-build: $(MAKE) -C hw clean $(MAKE) -C sim clean $(MAKE) -C kernel clean $(MAKE) -C runtime clean $(MAKE) -C tests clean +clean: clean-build + $(MAKE) -C $(VORTEX_HOME)/third_party clean + # Install setup KERNEL_INC_DST = $(PREFIX)/kernel/include KERNEL_LIB_DST = $(PREFIX)/kernel/lib$(XLEN) diff --git a/README.md b/README.md index 704883e30..6cabdeee5 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ More detailed build instructions can be found [here](docs/install_vortex.md). ``` ### Install Vortex codebase ``` - git clone --depth=1 --recursive git@github.com:vortexgpgpu/vortex.git -b vortex_vm + git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git -b vortex_vm cd vortex ``` @@ -68,18 +68,18 @@ More detailed build instructions can be found [here](docs/install_vortex.md). mkdir out export OUT_DIR=`pwd`/out cd build - ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-6-14 --prefix=$OUT_DIR + # Run the following to disble virtual memory feature in compilation + ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR + # Run the following instead to enable virtual memory feature in compilation + ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR --vm_enable=1 ### Install prebuilt toolchain # We will use the precomipled tools in volvo toolchanin directory - ### set environment variables # should always run before using the toolchain! source ./ci/toolchain_env.sh - ### Building Vortex make -s - ### Quick demo running vecadd OpenCL kernel on 2 cores $ ./ci/blackbox.sh --cores=2 --app=vecadd diff --git a/ci/regression.sh.in b/ci/regression.sh.in index dffd91502..57c021a70 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -25,37 +25,6 @@ XLEN=${XLEN:=@XLEN@} echo "Vortex Regression Test: XLEN=$XLEN" -split_file() { - if [[ $# -ne 2 ]]; then - echo "Usage: $0 " - return 1 - fi - input_file="$1" - start_with="$2" - if [[ ! -r "$input_file" ]]; then - echo "Error: File '$input_file' is not readable or does not exist." - return 1 - fi - count=0 - output_file="" - while IFS= read -r line; do - if [[ $line == $start_with* ]]; then - count=$((count + 1)) - output_file="$input_file.part$count" - > "$output_file" # ensure empty - fi - if [[ -n "$output_file" ]]; then - echo "$line" >> "$output_file" - fi - done < "$input_file" - - if [[ $count -eq 0 ]]; then - echo "No lines starting with '$start_with' were found in '$input_file'." - fi -} - -############################################################################### - unittest() { make -C tests/unittest run @@ -66,6 +35,9 @@ isa() { echo "begin isa tests..." + make -C sim/simx + make -C sim/rtlsim + make -C tests/riscv/isa run-simx make -C tests/riscv/isa run-rtlsim @@ -96,8 +68,8 @@ isa() make -C tests/riscv/isa run-rtlsim-64fx fi - # restore default prebuilt configuration - make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null + # clean build + make -C sim/rtlsim clean echo "isa tests done!" } @@ -106,6 +78,9 @@ kernel() { echo "begin kernel tests..." + make -C sim/simx + make -C sim/rtlsim + make -C tests/kernel run-simx make -C tests/kernel run-rtlsim @@ -116,6 +91,9 @@ regression() { echo "begin regression tests..." + make -C runtime/simx + make -C runtime/rtlsim + make -C tests/regression run-simx make -C tests/regression run-rtlsim @@ -134,6 +112,9 @@ opencl() { echo "begin opencl tests..." + make -C runtime/simx + make -C runtime/rtlsim + make -C tests/opencl run-simx make -C tests/opencl run-rtlsim @@ -143,24 +124,28 @@ opencl() echo "opencl tests done!" } -cluster() -{ - echo "begin clustering tests..." +vm(){ + echo "begin vm tests..." - # cores clustering - ./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=diverge --args="-n1" - ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1" - ./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=diverge --args="-n1" - ./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=diverge --args="-n1" + make -C sim/simx + make -C runtime/simx - # L2/L3 - ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=diverge --args="-n1" - ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=diverge --args="-n1" - ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1" - ./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=diverge --args="-n1" - ./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=diverge --args="-n1" + make -C tests/kernel run-simx + + # Regression tests + make -C tests/regression run-simx - echo "clustering tests done!" + # test global barrier + CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2 + + # test local barrier + ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar" + + # OpenCL tests + make -C tests/opencl run-simx + ./ci/blackbox.sh --driver=simx --app=lbm --warps=8 + + echo "vm tests done!" } test_csv_trace() @@ -170,29 +155,20 @@ test_csv_trace() make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null make -C tests/riscv/isa run-simx-32im > run_simx.log make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log - split_file run_simx.log "Running " - split_file run_rtlsim.log "Running " - for file in ./run_simx.log.part*; do - if [[ -f "$file" ]]; then - file2="${file//simx/rtlsim}" - if [[ -f "$file2" ]]; then - ./ci/trace_csv.py -tsimx $file -otrace_simx.csv - ./ci/trace_csv.py -trtlsim $file2 -otrace_rtlsim.csv - diff trace_rtlsim.csv trace_simx.csv - else - echo "File $file2 not found." - fi - fi - done - # restore default prebuilt configuration - make -C sim/simx clean && make -C sim/simx > /dev/null - make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null + ./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv + ./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv + diff trace_rtlsim.csv trace_simx.csv + # clean build + make -C sim/simx clean + make -C sim/rtlsim clean } debug() { echo "begin debugging tests..." + test_csv_trace + ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" ./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1" @@ -200,21 +176,23 @@ debug() echo "debugging tests done!" } -config() +config1() { - echo "begin configuration tests..." + echo "begin configuration-1 tests..." - # warp/threads configurations - ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=1 --threads=1 --app=diverge - ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=2 --app=diverge - ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=8 --app=diverge - ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=8 --threads=2 --app=diverge - ./ci/blackbox.sh --driver=simx --cores=1 --warps=1 --threads=1 --app=diverge - ./ci/blackbox.sh --driver=simx --cores=1 --warps=8 --threads=16 --app=diverge + # warp/threads + ./ci/blackbox.sh --driver=rtlsim --warps=1 --threads=1 --app=diverge + ./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=2 --app=diverge + ./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=8 --app=diverge + ./ci/blackbox.sh --driver=rtlsim --warps=8 --threads=2 --app=diverge + ./ci/blackbox.sh --driver=simx --warps=1 --threads=1 --app=diverge + ./ci/blackbox.sh --driver=simx --warps=8 --threads=16 --app=diverge - # disable DPI - CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood - CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood + # cores clustering + ./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=diverge --args="-n1" + ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1" + ./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=diverge --args="-n1" + ./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=diverge --args="-n1" # issue width CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge @@ -240,6 +218,31 @@ config() CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx + # L2/L3 + ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=diverge --args="-n1" + ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=diverge --args="-n1" + ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1" + ./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=diverge --args="-n1" + ./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=diverge --args="-n1" + + # multiple L1 caches per socket + CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=2 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=8 --warps=1 --threads=2 + + echo "configuration-1 tests done!" +} + +config2() +{ + echo "begin configuration-2 tests..." + + # test opaesim + ./ci/blackbox.sh --driver=opae --app=printf + ./ci/blackbox.sh --driver=opae --app=diverge + + # disable DPI + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood + # custom program startup address make -C tests/regression/dogfood clean-kernel if [ "$XLEN" == "64" ]; then @@ -249,55 +252,57 @@ config() fi ./ci/blackbox.sh --driver=simx --app=dogfood ./ci/blackbox.sh --driver=rtlsim --app=dogfood + make -C tests/regression/dogfood clean-kernel # disabling M & F extensions make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null make -C tests/riscv/isa run-rtlsim-32i - make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null + make -C sim/rtlsim clean # disabling ZICOND extension CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo # disable local memory - CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo --perf=1 - CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --cores=1 --app=demo --perf=1 - - # disable L1 cache - CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx - CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx - CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx - CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx - - # multiple L1 caches per socket - CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=2 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=8 --warps=1 --threads=2 + CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo --perf=1 + CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=demo --perf=1 # test AXI bus - AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo + AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=demo + + # disable L1 cache + CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx + CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx + CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx + CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx # reduce l1 line size - CONFIGS="-DL1_LINE_SIZE=4" ./ci/blackbox.sh --driver=rtlsim --app=io_addr - CONFIGS="-DL1_LINE_SIZE=4" ./ci/blackbox.sh --driver=simx --app=io_addr - CONFIGS="-DL1_LINE_SIZE=4 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx - CONFIGS="-DL1_LINE_SIZE=4 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx + CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=rtlsim --app=io_addr + CONFIGS="-DL1_LINE_SIZE=$XLEN/8" ./ci/blackbox.sh --driver=simx --app=io_addr + CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx + CONFIGS="-DL1_LINE_SIZE=$XLEN/8 -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx + + # test cache ways + CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx + CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx # test cache banking CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx - CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx - CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemmx - CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --cores=1 --app=sgemmx + CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx + CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx + CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx # test 128-bit MEM block - CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo + CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=demo # test single-bank DRAM - CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo + CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=demo # test 27-bit DRAM address - CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --cores=1 --app=demo + CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=demo - echo "configuration tests done!" + echo "configuration-2 tests done!" } stress() @@ -306,9 +311,7 @@ stress() # test verilator reset values CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood - CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr - CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=opae --app=printf - ./ci/blackbox.sh --driver=rtlsim --app=sgemm --args="-n128" --l2cache + CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --args="-n128" --l2cache echo "stress tests done!" } @@ -318,7 +321,7 @@ synthesis() echo "begin synthesis tests..." PREFIX=build_base make -C hw/syn/yosys clean - PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE" make -C hw/syn/yosys elaborate + PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE" make -C hw/syn/yosys synthesis echo "synthesis tests done!" } @@ -326,7 +329,7 @@ synthesis() show_usage() { echo "Vortex Regression Test" - echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cluster] [--debug] [--config] [--stress] [--synthesis] [--all] [--h|--help]" + echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--config1] [--config2] [--debug] [--stress] [--synthesis] [--all] [--h|--help]" } start=$SECONDS @@ -336,6 +339,9 @@ clean=0 while [ "$1" != "" ]; do case $1 in + --vm ) + tests+=("vm") + ;; --clean ) clean=1 ;; @@ -354,15 +360,15 @@ while [ "$1" != "" ]; do --opencl ) tests+=("opencl") ;; - --cluster ) - tests+=("cluster") + --config1 ) + tests+=("config1") + ;; + --config2 ) + tests+=("config2") ;; --debug ) tests+=("debug") ;; - --config ) - tests+=("config") - ;; --stress ) tests+=("stress") ;; @@ -376,9 +382,9 @@ while [ "$1" != "" ]; do tests+=("kernel") tests+=("regression") tests+=("opencl") - tests+=("cluster") + tests+=("config1") + tests+=("config2") tests+=("debug") - tests+=("config") tests+=("stress") tests+=("synthesis") ;; diff --git a/ci/system_updates.sh b/ci/system_updates.sh new file mode 100755 index 000000000..43abbe5ab --- /dev/null +++ b/ci/system_updates.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +# Copyright 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +apt-get update -y + +add-apt-repository -y ppa:ubuntu-toolchain-r/test +apt-get update +apt-get install -y g++-11 gcc-11 +update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100 +update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 + +apt-get install -y build-essential valgrind libstdc++6 binutils python uuid-dev ccache diff --git a/ci/toolchain_env.sh.in b/ci/toolchain_env.sh.in index dc50389a9..be140d28d 100755 --- a/ci/toolchain_env.sh.in +++ b/ci/toolchain_env.sh.in @@ -16,8 +16,8 @@ TOOLDIR=${TOOLDIR:=@TOOLDIR@} -export VERILATOR_ROOT=$TOOLDIR/verilator -export PATH=$VERILATOR_ROOT/bin:$PATH +# export VERILATOR_ROOT=$TOOLDIR/verilator +# export PATH=$VERILATOR_ROOT/bin:$PATH export SV2V_PATH=$TOOLDIR/sv2v export PATH=$SV2V_PATH/bin:$PATH diff --git a/ci/trace_csv.py b/ci/trace_csv.py index b8cafe379..c3113de85 100755 --- a/ci/trace_csv.py +++ b/ci/trace_csv.py @@ -26,7 +26,7 @@ def parse_args(): parser.add_argument('log', help='Input log file') return parser.parse_args() -def parse_simx(log_filename): +def parse_simx(log_lines): pc_pattern = r"PC=(0x[0-9a-fA-F]+)" instr_pattern = r"Instr (0x[0-9a-fA-F]+):" opcode_pattern = r"Instr 0x[0-9a-fA-F]+: ([0-9a-zA-Z_\.]+)" @@ -37,32 +37,31 @@ def parse_simx(log_filename): destination_pattern = r"Dest Reg: (.+)" uuid_pattern = r"#(\d+)" entries = [] - with open(log_filename, 'r') as log_file: - instr_data = None - for lineno, line in enumerate(log_file, start=1): - try: - if line.startswith("DEBUG Fetch:"): - if instr_data: - entries.append(instr_data) - instr_data = {} - instr_data["lineno"] = lineno - instr_data["PC"] = re.search(pc_pattern, line).group(1) - instr_data["core_id"] = re.search(core_id_pattern, line).group(1) - instr_data["warp_id"] = re.search(warp_id_pattern, line).group(1) - instr_data["tmask"] = re.search(tmask_pattern, line).group(1) - instr_data["uuid"] = re.search(uuid_pattern, line).group(1) - elif line.startswith("DEBUG Instr"): - instr_data["instr"] = re.search(instr_pattern, line).group(1) - instr_data["opcode"] = re.search(opcode_pattern, line).group(1) - elif line.startswith("DEBUG Src"): - src_reg = re.search(operands_pattern, line).group(1) - instr_data["operands"] = (instr_data["operands"] + ', ' + src_reg) if 'operands' in instr_data else src_reg - elif line.startswith("DEBUG Dest"): - instr_data["destination"] = re.search(destination_pattern, line).group(1) - except Exception as e: - print("Error at line {}: {}".format(lineno, e)) - if instr_data: - entries.append(instr_data) + instr_data = None + for lineno, line in enumerate(log_lines, start=1): + try: + if line.startswith("DEBUG Fetch:"): + if instr_data: + entries.append(instr_data) + instr_data = {} + instr_data["lineno"] = lineno + instr_data["PC"] = re.search(pc_pattern, line).group(1) + instr_data["core_id"] = re.search(core_id_pattern, line).group(1) + instr_data["warp_id"] = re.search(warp_id_pattern, line).group(1) + instr_data["tmask"] = re.search(tmask_pattern, line).group(1) + instr_data["uuid"] = re.search(uuid_pattern, line).group(1) + elif line.startswith("DEBUG Instr"): + instr_data["instr"] = re.search(instr_pattern, line).group(1) + instr_data["opcode"] = re.search(opcode_pattern, line).group(1) + elif line.startswith("DEBUG Src"): + src_reg = re.search(operands_pattern, line).group(1) + instr_data["operands"] = (instr_data["operands"] + ', ' + src_reg) if 'operands' in instr_data else src_reg + elif line.startswith("DEBUG Dest"): + instr_data["destination"] = re.search(destination_pattern, line).group(1) + except Exception as e: + print("Error at line {}: {}".format(lineno, e)) + if instr_data: + entries.append(instr_data) return entries def reverse_binary(bin_str): @@ -95,8 +94,9 @@ def append_value(text, reg, value, tmask_arr, sep): text += "}" return text, sep -def parse_rtlsim(log_filename): - line_pattern = r"\d+: core(\d+)-(decode|issue|commit)" +def parse_rtlsim(log_lines): + config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=(\d+), num_barriers=(\d+)" + line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)" pc_pattern = r"PC=(0x[0-9a-fA-F]+)" instr_pattern = r"instr=(0x[0-9a-fA-F]+)" ex_pattern = r"ex=([a-zA-Z]+)" @@ -116,124 +116,166 @@ def parse_rtlsim(log_filename): eop_pattern = r"eop=(\d)" uuid_pattern = r"#(\d+)" entries = [] - with open(log_filename, 'r') as log_file: - instr_data = {} - for lineno, line in enumerate(log_file, start=1): - try: - line_match = re.search(line_pattern, line) - if line_match: - PC = re.search(pc_pattern, line).group(1) - warp_id = re.search(warp_id_pattern, line).group(1) - tmask = re.search(tmask_pattern, line).group(1) - uuid = re.search(uuid_pattern, line).group(1) - core_id = line_match.group(1) - stage = line_match.group(2) - if stage == "decode": - trace = {} - trace["uuid"] = uuid - trace["PC"] = PC - trace["core_id"] = core_id - trace["warp_id"] = warp_id - trace["tmask"] = reverse_binary(tmask) - trace["instr"] = re.search(instr_pattern, line).group(1) - trace["opcode"] = re.search(op_pattern, line).group(1) - trace["opds"] = bin_to_array(re.search(opds_pattern, line).group(1)) - trace["rd"] = re.search(rd_pattern, line).group(1) - trace["rs1"] = re.search(rs1_pattern, line).group(1) - trace["rs2"] = re.search(rs2_pattern, line).group(1) - trace["rs3"] = re.search(rs3_pattern, line).group(1) + instr_data = {} + num_threads = 0 + num_warps = 0 + num_cores = 0 + num_clusters = 0 + socket_size = 0 + local_mem_base = 0 + num_barriers = 0 + num_sockets = 0 + for lineno, line in enumerate(log_lines, start=1): + try: + config_match = re.search(config_pattern, line) + if config_match: + num_threads = int(config_match.group(1)) + num_warps = int(config_match.group(2)) + num_cores = int(config_match.group(3)) + num_clusters = int(config_match.group(4)) + socket_size = int(config_match.group(5)) + local_mem_base = int(config_match.group(6)) + num_barriers = int(config_match.group(7)) + num_sockets = (num_cores + socket_size - 1) // socket_size + continue + line_match = re.search(line_pattern, line) + if line_match: + PC = re.search(pc_pattern, line).group(1) + warp_id = re.search(warp_id_pattern, line).group(1) + tmask = re.search(tmask_pattern, line).group(1) + uuid = re.search(uuid_pattern, line).group(1) + cluster_id = line_match.group(1) + socket_id = line_match.group(2) + core_id = line_match.group(3) + stage = line_match.group(4) + if stage == "decode": + trace = {} + trace["uuid"] = uuid + trace["PC"] = PC + trace["core_id"] = ((((cluster_id * num_sockets) + socket_id) * socket_size) + core_id) + trace["warp_id"] = warp_id + trace["tmask"] = reverse_binary(tmask) + trace["instr"] = re.search(instr_pattern, line).group(1) + trace["opcode"] = re.search(op_pattern, line).group(1) + trace["opds"] = bin_to_array(re.search(opds_pattern, line).group(1)) + trace["rd"] = re.search(rd_pattern, line).group(1) + trace["rs1"] = re.search(rs1_pattern, line).group(1) + trace["rs2"] = re.search(rs2_pattern, line).group(1) + trace["rs3"] = re.search(rs3_pattern, line).group(1) + instr_data[uuid] = trace + elif stage == "issue": + if uuid in instr_data: + trace = instr_data[uuid] + trace["lineno"] = lineno + opds = trace["opds"] + if opds[1]: + trace["rs1_data"] = re.search(rs1_data_pattern, line).group(1).split(', ')[::-1] + if opds[2]: + trace["rs2_data"] = re.search(rs2_data_pattern, line).group(1).split(', ')[::-1] + if opds[3]: + trace["rs3_data"] = re.search(rs3_data_pattern, line).group(1).split(', ')[::-1] + trace["issued"] = True instr_data[uuid] = trace - elif stage == "issue": - if uuid in instr_data: - trace = instr_data[uuid] - trace["lineno"] = lineno + elif stage == "commit": + if uuid in instr_data: + trace = instr_data[uuid] + if "issued" in trace: opds = trace["opds"] - if opds[1]: - trace["rs1_data"] = re.search(rs1_data_pattern, line).group(1).split(', ')[::-1] - if opds[2]: - trace["rs2_data"] = re.search(rs2_data_pattern, line).group(1).split(', ')[::-1] - if opds[3]: - trace["rs3_data"] = re.search(rs3_data_pattern, line).group(1).split(', ')[::-1] - trace["issued"] = True + dst_tmask_arr = bin_to_array(tmask)[::-1] + wb = re.search(wb_pattern, line).group(1) == "1" + if wb: + rd_data = re.search(rd_data_pattern, line).group(1).split(', ')[::-1] + if 'rd_data' in trace: + merged_rd_data = trace['rd_data'] + for i in range(len(dst_tmask_arr)): + if dst_tmask_arr[i] == 1: + merged_rd_data[i] = rd_data[i] + trace['rd_data'] = merged_rd_data + else: + trace['rd_data'] = rd_data instr_data[uuid] = trace - elif stage == "commit": - if uuid in instr_data: - trace = instr_data[uuid] - if "issued" in trace: - opds = trace["opds"] - dst_tmask_arr = bin_to_array(tmask)[::-1] - wb = re.search(wb_pattern, line).group(1) == "1" + eop = re.search(eop_pattern, line).group(1) == "1" + if eop: + tmask_arr = bin_to_array(trace["tmask"]) + destination = '' if wb: - rd_data = re.search(rd_data_pattern, line).group(1).split(', ')[::-1] - if 'rd_data' in trace: - merged_rd_data = trace['rd_data'] - for i in range(len(dst_tmask_arr)): - if dst_tmask_arr[i] == 1: - merged_rd_data[i] = rd_data[i] - trace['rd_data'] = merged_rd_data - else: - trace['rd_data'] = rd_data - instr_data[uuid] = trace - eop = re.search(eop_pattern, line).group(1) == "1" - if eop: - tmask_arr = bin_to_array(trace["tmask"]) - destination = '' - if wb: - destination, sep = append_value(destination, trace["rd"], trace['rd_data'], tmask_arr, False) - del trace['rd_data'] - trace["destination"] = destination - operands = '' - sep = False - if opds[1]: - operands, sep = append_value(operands, trace["rs1"], trace["rs1_data"], tmask_arr, sep) - del trace["rs1_data"] - if opds[2]: - operands, sep = append_value(operands, trace["rs2"], trace["rs2_data"], tmask_arr, sep) - del trace["rs2_data"] - if opds[3]: - operands, sep = append_value(operands, trace["rs3"], trace["rs3_data"], tmask_arr, sep) - del trace["rs3_data"] - trace["operands"] = operands - del trace["opds"] - del trace["rd"] - del trace["rs1"] - del trace["rs2"] - del trace["rs3"] - del trace["issued"] - del instr_data[uuid] - entries.append(trace) - except Exception as e: - print("Error at line {}: {}".format(lineno, e)) + destination, sep = append_value(destination, trace["rd"], trace['rd_data'], tmask_arr, False) + del trace['rd_data'] + trace["destination"] = destination + operands = '' + sep = False + if opds[1]: + operands, sep = append_value(operands, trace["rs1"], trace["rs1_data"], tmask_arr, sep) + del trace["rs1_data"] + if opds[2]: + operands, sep = append_value(operands, trace["rs2"], trace["rs2_data"], tmask_arr, sep) + del trace["rs2_data"] + if opds[3]: + operands, sep = append_value(operands, trace["rs3"], trace["rs3_data"], tmask_arr, sep) + del trace["rs3_data"] + trace["operands"] = operands + del trace["opds"] + del trace["rd"] + del trace["rs1"] + del trace["rs2"] + del trace["rs3"] + del trace["issued"] + del instr_data[uuid] + entries.append(trace) + except Exception as e: + print("Error at line {}: {}".format(lineno, e)) return entries -def write_csv(log_filename, csv_filename, log_type): - entries = None - - # parse log file - if log_type == "rtlsim": - entries = parse_rtlsim(log_filename) - elif log_type == "simx": - entries = parse_simx(log_filename) - else: - print('Error: invalid log type') - sys.exit() - - # sort entries by uuid - entries.sort(key=lambda x: (int(x['uuid']))) - for entry in entries: - del entry['lineno'] - - # write to CSV +def write_csv(sublogs, csv_filename, log_type): with open(csv_filename, 'w', newline='') as csv_file: fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "destination", "operands"] writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() - for entry in entries: - writer.writerow(entry) + + for sublog in sublogs: + entries = None + + # parse sublog + if log_type == "rtlsim": + entries = parse_rtlsim(sublog) + elif log_type == "simx": + entries = parse_simx(sublog) + else: + print('Error: invalid log type') + sys.exit() + + # sort entries by uuid + entries.sort(key=lambda x: (int(x['uuid']))) + for entry in entries: + del entry['lineno'] + + for entry in entries: + writer.writerow(entry) + +def split_log_file(log_filename): + with open(log_filename, 'r') as log_file: + log_lines = log_file.readlines() + + sublogs = [] + current_sublog = None + + for line in log_lines: + if line.startswith("[VXDRV] START"): + if current_sublog is not None: + sublogs.append(current_sublog) + current_sublog = [line] + elif current_sublog is not None: + current_sublog.append(line) + + if current_sublog is not None: + sublogs.append(current_sublog) + + return sublogs def main(): args = parse_args() - write_csv(args.log, args.csv, args.type) + sublogs = split_log_file(args.log) + write_csv(sublogs, args.csv, args.type) if __name__ == "__main__": main() diff --git a/config.mk.in b/config.mk.in index 534f47ed6..6b20a3050 100644 --- a/config.mk.in +++ b/config.mk.in @@ -32,4 +32,8 @@ RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX) VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime -VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel \ No newline at end of file +VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel + +THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party + +VM_ENABLE ?= @VM_ENABLE@ \ No newline at end of file diff --git a/configure b/configure index 5e96ab59e..2c0811ec3 100755 --- a/configure +++ b/configure @@ -63,7 +63,7 @@ copy_files() { filename_no_ext="${filename%.in}" dest_file="$dest_dir/$filename_no_ext" mkdir -p "$dest_dir" - sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@PREFIX@|$PREFIX|g" "$file" > "$dest_file" + sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@PREFIX@|$PREFIX|g; s|@VM_ENABLE@|$VM_ENABLE|g" "$file" > "$dest_file" # apply permissions to bash scripts read -r firstline < "$dest_file" if [[ "$firstline" =~ ^#!.*bash ]]; then @@ -111,9 +111,10 @@ copy_files() { # default configuration parameters default_xlen=32 -default_tooldir=/opt +default_tooldir=$HOME/tools default_osversion=$(detect_osversion) default_prefix=$CURRENT_DIR +default_vm=0 # load default configuration parameters from existing config.mk if [ -f "config.mk" ]; then @@ -126,6 +127,7 @@ if [ -f "config.mk" ]; then TOOLDIR\ ?*) default_tooldir=${value//\?=/} ;; OSVERSION\ ?*) default_osversion=${value//\?=/} ;; PREFIX\ ?*) default_prefix=${value//\?=/} ;; + VM_ENABLE\ ?*) default_vm=${value//\?=/} ;; esac done < config.mk fi @@ -135,14 +137,16 @@ XLEN=${XLEN:=$default_xlen} TOOLDIR=${TOOLDIR:=$default_tooldir} OSVERSION=${OSVERSION:=$default_osversion} PREFIX=${PREFIX:=$default_prefix} +VM_ENABLE=${VM_ENABLE:=$default_vm} # parse command line arguments usage() { echo "Usage: $0 [--xlen=] [--tooldir=] [--osversion=]" echo " --xlen= Set the XLEN value (default: 32)" - echo " --tooldir= Set the TOOLDIR path (default: /opt)" - echo " --osversion= Set the OS Version (default: $(detect_os))" + echo " --tooldir= Set the TOOLDIR path (default: $HOME/tools)" + echo " --osversion= Set the OS Version (default: $(detect_osversion))" echo " --prefix= Set installation directory" + echo " --vm_enable= Enable Virtual Memory support (default: 0)" exit 1 } while [[ "$#" -gt 0 ]]; do @@ -151,6 +155,7 @@ while [[ "$#" -gt 0 ]]; do --tooldir=*) TOOLDIR="${1#*=}" ;; --osversion=*) OSVERSION="${1#*=}" ;; --prefix=*) PREFIX="${1#*=}" ;; + --vm_enable=*) VM_ENABLE="${1#*=}" ;; -h|--help) usage ;; *) echo "Unknown parameter passed: $1"; usage ;; esac @@ -172,3 +177,5 @@ SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) THIRD_PARTY_DIR=$SCRIPT_DIR/third_party copy_files "$SCRIPT_DIR" "$CURRENT_DIR" + +echo "VM Enable: "$VM_ENABLE \ No newline at end of file diff --git a/docs/altera_fpga_guide.md b/docs/altera_fpga_guide.md new file mode 100644 index 000000000..61d1ae26e --- /dev/null +++ b/docs/altera_fpga_guide.md @@ -0,0 +1,79 @@ +# FPGA Startup and Configuration Guide + +OPAE Environment Setup +---------------------- + + $ source /opt/inteldevstack/init_env_user.sh + $ export OPAE_HOME=/opt/opae/1.1.2 + $ export PATH=$OPAE_HOME/bin:$PATH + $ export C_INCLUDE_PATH=$OPAE_HOME/include:$C_INCLUDE_PATH + $ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH + $ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH + +OPAE Build +------------------ + +The FPGA has to following configuration options: +- DEVICE_FAMILY=arria10 | stratix10 +- NUM_CORES=#n + +Command line: + + $ cd hw/syn/altera/opae + $ PREFIX=test1 TARGET=fpga NUM_CORES=4 make + +A new folder (ex: `test1_xxx_4c`) will be created and the build will start and take ~30-480 min to complete. +Setting TARGET=ase will build the project for simulation using Intel ASE. + + +OPAE Build Configuration +------------------------ + +The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware parameters that can be modified when build the processor.For example, have the following parameters that can be configured: +- `NUM_WARPS`: Number of warps per cores +- `NUM_THREADS`: Number of threads per warps +- `PERF_ENABLE`: enable the use of all profile counters + +You configure the syntesis build from the command line: + + $ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make + +OPAE Build Progress +------------------- + +You could check the last 10 lines in the build log for possible errors until build completion. + + $ tail -n 10 /build.log + +Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs. + + $ ps -u + +If the build fails and you need to restart it, clean up the build folder using the following command: + + $ make clean + +The bitstream file `vortex_afu.gbs` should exist when the build is done: + + $ ls -lsa /synth/vortex_afu.gbs + + +Signing the bitstream and Programming the FPGA +---------------------------------------------- + + $ cd + $ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs + $ fpgasupdate vortex_afu_unsigned_ssl.gbs + +Sample FPGA Run Test +-------------------- + +Ensure you have the correct opae runtime for the FPGA target + + $ make -C runtime/opae clean + $ TARGET=FPGA make -C runtime/opae + +Run the following from your Vortex build directory + + $ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128" + diff --git a/docs/index.md b/docs/index.md index 07faa2927..14a45f335 100644 --- a/docs/index.md +++ b/docs/index.md @@ -7,7 +7,8 @@ - [Cache Subsystem](cache_subsystem.md) - [Software](software.md) - [Simulation](simulation.md) -- [FPGA Setup Guide](fpga_setup.md) +- [Altera FPGA Setup Guide](altera_fpga_guide.md) +- [Xilinx FPGA Setup Guide](xilinx_fpga_guide.md) - [Debugging](debugging.md) - [Useful Links](references.md) @@ -27,6 +28,6 @@ Running Vortex simulators with different configurations: $ ./ci/blackbox.sh --driver=opae --clusters=1 --cores=4 --warps=4 --threads=2 --app=demo -- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads +- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads $ ./ci/blackbox.sh --driver=simx --clusters=4 --cores=4 --warps=8 --threads=6 --app=dogfood diff --git a/docs/xilinx_fpga_guide.md b/docs/xilinx_fpga_guide.md new file mode 100644 index 000000000..f2960deb6 --- /dev/null +++ b/docs/xilinx_fpga_guide.md @@ -0,0 +1,36 @@ +# FPGA Startup and Configuration Guide + +XRT Environment Setup +---------------------- + + $ source /opt/xilinx/Vitis/2023.1/settings64.sh + $ source /opt/xilinx/xrt/setup.sh + + +Check Installed FPGA Platforms +------------------------------ + + $ platforminfo -l + + +Build FPGA image +---------------- + + $ cd hw/syn/xilinx/xrt + $ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=4 make + +Will run the synthesis under new build directory: BUILD_DIR := "\\_\\_\" + +The generated bitstream will be located under /bin/vortex_afu.xclbin + +Sample FPGA Run Test +-------------------- + +Ensure you have the correct opae runtime for the FPGA target + + $ make -C runtime/xrt clean + $ TARGET=hw make -C runtime/xrt + +Run the following from your Vortex build directory + + $ TARGET=hw FPGA_BIN_DIR=/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128" \ No newline at end of file diff --git a/hw/Makefile b/hw/Makefile index f3aa5b651..6db654202 100644 --- a/hw/Makefile +++ b/hw/Makefile @@ -9,13 +9,14 @@ all: config config: VX_config.h VX_types.h -VX_config.h: $(RTL_DIR)/VX_config.vh +VX_config.h: $(RTL_DIR)/VX_config.vh $(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_config.vh -o VX_config.h -VX_types.h: $(RTL_DIR)/VX_types.vh +VX_types.h: $(RTL_DIR)/VX_types.vh $(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_types.vh -o VX_types.h clean: + $(MAKE) -C unittest clean rm -f VX_config.h VX_types.h .PHONY: VX_config.h VX_types.h \ No newline at end of file diff --git a/hw/dpi/float_dpi.vh b/hw/dpi/float_dpi.vh index 135807650..26bd10933 100644 --- a/hw/dpi/float_dpi.vh +++ b/hw/dpi/float_dpi.vh @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,8 +14,6 @@ `ifndef FLOAT_DPI_VH `define FLOAT_DPI_VH -`include "VX_config.vh" - import "DPI-C" function void dpi_fadd(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags); import "DPI-C" function void dpi_fsub(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags); import "DPI-C" function void dpi_fmul(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags); diff --git a/hw/dpi/util_dpi.vh b/hw/dpi/util_dpi.vh index dfd411c94..0da62b041 100644 --- a/hw/dpi/util_dpi.vh +++ b/hw/dpi/util_dpi.vh @@ -14,8 +14,6 @@ `ifndef UTIL_DPI_VH `define UTIL_DPI_VH -`include "VX_config.vh" - `ifdef XLEN_64 `define INT_TYPE longint `else diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index 98ca3e96a..108e95073 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -14,7 +14,8 @@ `include "VX_define.vh" module VX_cluster import VX_gpu_pkg::*; #( - parameter CLUSTER_ID = 0 + parameter CLUSTER_ID = 0, + parameter `STRING INSTANCE_ID = "" ) ( `SCOPE_IO_DECL @@ -85,7 +86,7 @@ module VX_cluster import VX_gpu_pkg::*; #( `RESET_RELAY (l2_reset, reset); VX_cache_wrap #( - .INSTANCE_ID ("l2cache"), + .INSTANCE_ID ($sformatf("%s-l2cache", INSTANCE_ID)), .CACHE_SIZE (`L2_CACHE_SIZE), .LINE_SIZE (`L2_LINE_SIZE), .NUM_BANKS (`L2_NUM_BANKS), @@ -98,6 +99,7 @@ module VX_cluster import VX_gpu_pkg::*; #( .MREQ_SIZE (`L2_MREQ_SIZE), .TAG_WIDTH (L2_TAG_WIDTH), .WRITE_ENABLE (1), + .WRITEBACK (`L2_WRITEBACK), .UUID_WIDTH (`UUID_WIDTH), .CORE_OUT_BUF (2), .MEM_OUT_BUF (2), @@ -122,17 +124,19 @@ module VX_cluster import VX_gpu_pkg::*; #( wire [`NUM_SOCKETS-1:0] per_socket_busy; + VX_dcr_bus_if socket_dcr_bus_if(); `BUFFER_DCR_BUS_IF (socket_dcr_bus_if, socket_dcr_bus_tmp_if, (`NUM_SOCKETS > 1)); // Generate all sockets - for (genvar i = 0; i < `NUM_SOCKETS; ++i) begin + for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : sockets `RESET_RELAY (socket_reset, reset); VX_socket #( - .SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + i) + .SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id), + .INSTANCE_ID ($sformatf("%s-socket%0d", INSTANCE_ID, socket_id)) ) socket ( - `SCOPE_IO_BIND (scope_socket+i) + `SCOPE_IO_BIND (scope_socket+socket_id) .clk (clk), .reset (socket_reset), @@ -143,13 +147,13 @@ module VX_cluster import VX_gpu_pkg::*; #( .dcr_bus_if (socket_dcr_bus_if), - .mem_bus_if (per_socket_mem_bus_if[i]), + .mem_bus_if (per_socket_mem_bus_if[socket_id]), `ifdef GBAR_ENABLE - .gbar_bus_if (per_socket_gbar_bus_if[i]), + .gbar_bus_if (per_socket_gbar_bus_if[socket_id]), `endif - .busy (per_socket_busy[i]) + .busy (per_socket_busy[socket_id]) ); end diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 5dbcb96b4..45041ac4a 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -33,10 +33,6 @@ `endif /////////////////////////////////////////////////////////////////////////////// -`ifndef VM_DISABLE -`define VM_ENABLE -`endif - `ifndef EXT_M_DISABLE `define EXT_M_ENABLE `endif @@ -114,7 +110,6 @@ `ifndef SOCKET_SIZE `define SOCKET_SIZE `MIN(4, `NUM_CORES) `endif -`define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE) `ifdef L2_ENABLE `define L2_ENABLED 1 @@ -357,7 +352,7 @@ // Number of SFU units `ifndef NUM_SFU_LANES -`define NUM_SFU_LANES `MIN(`NUM_THREADS, 4) +`define NUM_SFU_LANES `NUM_THREADS `endif `ifndef NUM_SFU_BLOCKS `define NUM_SFU_BLOCKS 1 @@ -481,22 +476,27 @@ `define LATENCY_FCVT 5 `endif +// FMA Bandwidth ratio `ifndef FMA_PE_RATIO `define FMA_PE_RATIO 1 `endif +// FDIV Bandwidth ratio `ifndef FDIV_PE_RATIO `define FDIV_PE_RATIO 8 `endif +// FSQRT Bandwidth ratio `ifndef FSQRT_PE_RATIO `define FSQRT_PE_RATIO 8 `endif +// FCVT Bandwidth ratio `ifndef FCVT_PE_RATIO `define FCVT_PE_RATIO 8 `endif +// FNCP Bandwidth ratio `ifndef FNCP_PE_RATIO `define FNCP_PE_RATIO 2 `endif @@ -603,7 +603,12 @@ `define DCACHE_NUM_WAYS 1 `endif -// SM Configurable Knobs ////////////////////////////////////////////////////// +// Enable Cache Writeback +`ifndef DCACHE_WRITEBACK +`define DCACHE_WRITEBACK 0 +`endif + +// LMEM Configurable Knobs //////////////////////////////////////////////////// `ifndef LMEM_DISABLE `define LMEM_ENABLE @@ -662,6 +667,11 @@ `define L2_NUM_WAYS 2 `endif +// Enable Cache Writeback +`ifndef L2_WRITEBACK +`define L2_WRITEBACK 0 +`endif + // L3cache Configurable Knobs ///////////////////////////////////////////////// // Cache Size @@ -703,6 +713,11 @@ `define L3_NUM_WAYS 4 `endif +// Enable Cache Writeback +`ifndef L3_WRITEBACK +`define L3_WRITEBACK 0 +`endif + // ISA Extensions ///////////////////////////////////////////////////////////// `ifdef EXT_A_ENABLE diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index d7d38a930..686124c16 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -59,6 +59,8 @@ `define OFFSET_BITS 12 `define IMM_BITS `XLEN +`define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE) + /////////////////////////////////////////////////////////////////////////////// `define EX_ALU 0 @@ -296,6 +298,7 @@ `ifdef ICACHE_ENABLE `define L1_ENABLE `endif + `ifdef DCACHE_ENABLE `define L1_ENABLE `endif @@ -322,7 +325,7 @@ .DATAW ($bits(dst)), \ .RESETW ($bits(dst)), \ .DEPTH (latency) \ - ) __``dst ( \ + ) __``dst``__ ( \ .clk (clk), \ .reset (reset), \ .enable (ena), \ @@ -336,13 +339,18 @@ VX_popcount #( \ .N ($bits(in)), \ .MODEL (model) \ - ) __``out ( \ + ) __``out``__ ( \ .data_in (in), \ .data_out (out) \ ) `define POP_COUNT(out, in) `POP_COUNT_EX(out, in, 1) +`define ASSIGN_VX_IF(dst, src) \ + assign dst.valid = src.valid; \ + assign dst.data = src.data; \ + assign src.ready = dst.ready + `define ASSIGN_VX_MEM_BUS_IF(dst, src) \ assign dst.req_valid = src.req_valid; \ assign dst.req_data = src.req_data; \ @@ -377,42 +385,42 @@ assign dst.rsp_ready = src.rsp_ready `define BUFFER_DCR_BUS_IF(dst, src, enable) \ - logic [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __``dst; \ if (enable) begin \ + reg [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __dst; \ always @(posedge clk) begin \ - __``dst <= {src.write_valid, src.write_addr, src.write_data}; \ + __dst <= {src.write_valid, src.write_addr, src.write_data}; \ end \ + assign {dst.write_valid, dst.write_addr, dst.write_data} = __dst; \ end else begin \ - assign __``dst = {src.write_valid, src.write_addr, src.write_data}; \ - end \ - VX_dcr_bus_if dst(); \ - assign {dst.write_valid, dst.write_addr, dst.write_data} = __``dst + assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid, src.write_addr, src.write_data}; \ + end -`define PERF_COUNTER_ADD(dst, src, field, width, dst_count, src_count, reg_enable) \ - for (genvar __d = 0; __d < dst_count; ++__d) begin \ - localparam __count = ((src_count > dst_count) ? `CDIV(src_count, dst_count) : 1); \ - wire [__count-1:0][width-1:0] __reduce_add_i_``src``field; \ - wire [width-1:0] __reduce_add_o_``dst``field; \ - for (genvar __i = 0; __i < __count; ++__i) begin \ - assign __reduce_add_i_``src``field[__i] = ``src[__d * __count + __i].``field; \ +`define PERF_COUNTER_ADD(dst, src, field, width, count, reg_enable) \ + if (count > 1) begin \ + wire [count-1:0][width-1:0] __reduce_add_i_field; \ + wire [width-1:0] __reduce_add_o_field; \ + for (genvar __i = 0; __i < count; ++__i) begin \ + assign __reduce_add_i_field[__i] = src[__i].``field; \ end \ - VX_reduce #(.DATAW_IN(width), .N(__count), .OP("+")) __reduce_add_``dst``field ( \ - __reduce_add_i_``src``field, \ - __reduce_add_o_``dst``field \ + VX_reduce #(.DATAW_IN(width), .N(count), .OP("+")) __reduce_add_field ( \ + __reduce_add_i_field, \ + __reduce_add_o_field \ ); \ if (reg_enable) begin \ - reg [width-1:0] __reduce_add_r_``dst``field; \ + reg [width-1:0] __reduce_add_r_field; \ always @(posedge clk) begin \ if (reset) begin \ - __reduce_add_r_``dst``field <= '0; \ + __reduce_add_r_field <= '0; \ end else begin \ - __reduce_add_r_``dst``field <= __reduce_add_o_``dst``field; \ + __reduce_add_r_field <= __reduce_add_o_field; \ end \ end \ - assign ``dst[__d].``field = __reduce_add_r_``dst``field; \ + assign dst.``field = __reduce_add_r_field; \ end else begin \ - assign ``dst[__d].``field = __reduce_add_o_``dst``field; \ + assign dst.``field = __reduce_add_o_field; \ end \ + end else begin \ + assign dst.``field = src[0].``field; \ end `define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \ @@ -426,20 +434,4 @@ assign dst = src; \ end -`define TO_DISPATCH_DATA(data, tid) { \ - data.uuid, \ - data.wis, \ - data.tmask, \ - data.PC, \ - data.op_type, \ - data.op_args, \ - data.wb, \ - data.rd, \ - tid, \ - data.rs1_data, \ - data.rs2_data, \ - data.rs3_data} - -/////////////////////////////////////////////////////////////////////////////// - `endif // VX_DEFINE_VH diff --git a/hw/rtl/VX_gpu_pkg.sv b/hw/rtl/VX_gpu_pkg.sv index 6de25f139..393f2a66f 100644 --- a/hw/rtl/VX_gpu_pkg.sv +++ b/hw/rtl/VX_gpu_pkg.sv @@ -60,6 +60,8 @@ package VX_gpu_pkg; logic [7:0] mpm_class; } base_dcrs_t; + //////////////////////////// Perf counter types /////////////////////////// + typedef struct packed { logic [`PERF_CTR_BITS-1:0] reads; logic [`PERF_CTR_BITS-1:0] writes; @@ -77,48 +79,63 @@ package VX_gpu_pkg; logic [`PERF_CTR_BITS-1:0] latency; } mem_perf_t; + typedef struct packed { + logic [`PERF_CTR_BITS-1:0] idles; + logic [`PERF_CTR_BITS-1:0] stalls; + } sched_perf_t; + + typedef struct packed { + logic [`PERF_CTR_BITS-1:0] ibf_stalls; + logic [`PERF_CTR_BITS-1:0] scb_stalls; + logic [`PERF_CTR_BITS-1:0] opd_stalls; + logic [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] units_uses; + logic [`NUM_SFU_UNITS-1:0][`PERF_CTR_BITS-1:0] sfu_uses; + } issue_perf_t; + + //////////////////////// instruction arguments //////////////////////////// + typedef struct packed { logic use_PC; logic use_imm; logic is_w; logic [`ALU_TYPE_BITS-1:0] xtype; logic [`IMM_BITS-1:0] imm; - } alu_mod_t; + } alu_args_t; typedef struct packed { - logic [($bits(alu_mod_t)-`INST_FRM_BITS-`INST_FMT_BITS)-1:0] __padding; + logic [($bits(alu_args_t)-`INST_FRM_BITS-`INST_FMT_BITS)-1:0] __padding; logic [`INST_FRM_BITS-1:0] frm; logic [`INST_FMT_BITS-1:0] fmt; - } fpu_mod_t; + } fpu_args_t; typedef struct packed { - logic [($bits(alu_mod_t)-1-1-`OFFSET_BITS)-1:0] __padding; + logic [($bits(alu_args_t)-1-1-`OFFSET_BITS)-1:0] __padding; logic is_store; logic is_float; logic [`OFFSET_BITS-1:0] offset; - } lsu_mod_t; + } lsu_args_t; typedef struct packed { - logic [($bits(alu_mod_t)-1-`VX_CSR_ADDR_BITS-5)-1:0] __padding; + logic [($bits(alu_args_t)-1-`VX_CSR_ADDR_BITS-5)-1:0] __padding; logic use_imm; logic [`VX_CSR_ADDR_BITS-1:0] addr; logic [4:0] imm; - } csr_mod_t; + } csr_args_t; typedef struct packed { - logic [($bits(alu_mod_t)-1)-1:0] __padding; + logic [($bits(alu_args_t)-1)-1:0] __padding; logic is_neg; - } wctl_mod_t; + } wctl_args_t; typedef union packed { - alu_mod_t alu; - fpu_mod_t fpu; - lsu_mod_t lsu; - csr_mod_t csr; - wctl_mod_t wctl; + alu_args_t alu; + fpu_args_t fpu; + lsu_args_t lsu; + csr_args_t csr; + wctl_args_t wctl; } op_args_t; - /* verilator lint_off UNUSED */ +`IGNORE_UNUSED_BEGIN ///////////////////////// LSU memory Parameters /////////////////////////// @@ -129,6 +146,31 @@ package VX_gpu_pkg; localparam LSU_TAG_WIDTH = (`UUID_WIDTH + LSU_TAG_ID_BITS); localparam LSU_NUM_REQS = `NUM_LSU_BLOCKS * `NUM_LSU_LANES; + ////////////////////////// Icache Parameters ////////////////////////////// + + // Word size in bytes + localparam ICACHE_WORD_SIZE = 4; + localparam ICACHE_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(ICACHE_WORD_SIZE)); + + // Block size in bytes + localparam ICACHE_LINE_SIZE = `L1_LINE_SIZE; + + // Core request tag Id bits + localparam ICACHE_TAG_ID_BITS = `NW_WIDTH; + + // Core request tag bits + localparam ICACHE_TAG_WIDTH = (`UUID_WIDTH + ICACHE_TAG_ID_BITS); + + // Memory request data bits + localparam ICACHE_MEM_DATA_WIDTH = (ICACHE_LINE_SIZE * 8); + + // Memory request tag bits +`ifdef ICACHE_ENABLE + localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES); +`else + localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES); +`endif + ////////////////////////// Dcache Parameters ////////////////////////////// // Word size in bytes @@ -154,36 +196,11 @@ package VX_gpu_pkg; localparam DCACHE_MEM_DATA_WIDTH = (DCACHE_LINE_SIZE * 8); // Memory request tag bits - `ifdef DCACHE_ENABLE +`ifdef DCACHE_ENABLE localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES); - `else +`else localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES); - `endif - - ////////////////////////// Icache Parameters ////////////////////////////// - - // Word size in bytes - localparam ICACHE_WORD_SIZE = 4; - localparam ICACHE_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(ICACHE_WORD_SIZE)); - - // Block size in bytes - localparam ICACHE_LINE_SIZE = `L1_LINE_SIZE; - - // Core request tag Id bits - localparam ICACHE_TAG_ID_BITS = `NW_WIDTH; - - // Core request tag bits - localparam ICACHE_TAG_WIDTH = (`UUID_WIDTH + ICACHE_TAG_ID_BITS); - - // Memory request data bits - localparam ICACHE_MEM_DATA_WIDTH = (ICACHE_LINE_SIZE * 8); - - // Memory request tag bits - `ifdef ICACHE_ENABLE - localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES); - `else - localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES); - `endif +`endif /////////////////////////////// L1 Parameters ///////////////////////////// @@ -208,11 +225,11 @@ package VX_gpu_pkg; localparam L2_MEM_DATA_WIDTH = (`L2_LINE_SIZE * 8); // Memory request tag bits - `ifdef L2_ENABLE +`ifdef L2_ENABLE localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH); - `else +`else localparam L2_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH); - `endif +`endif /////////////////////////////// L3 Parameters ///////////////////////////// @@ -229,23 +246,20 @@ package VX_gpu_pkg; localparam L3_MEM_DATA_WIDTH = (`L3_LINE_SIZE * 8); // Memory request tag bits - `ifdef L3_ENABLE +`ifdef L3_ENABLE localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH); - `else +`else localparam L3_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH); - `endif - - /* verilator lint_on UNUSED */ +`endif /////////////////////////////// Issue parameters ////////////////////////// localparam ISSUE_ISW = `CLOG2(`ISSUE_WIDTH); localparam ISSUE_ISW_W = `UP(ISSUE_ISW); - localparam ISSUE_RATIO = `NUM_WARPS / `ISSUE_WIDTH; - localparam ISSUE_WIS = `CLOG2(ISSUE_RATIO); + localparam PER_ISSUE_WARPS = `NUM_WARPS / `ISSUE_WIDTH; + localparam ISSUE_WIS = `CLOG2(PER_ISSUE_WARPS); localparam ISSUE_WIS_W = `UP(ISSUE_WIS); -`IGNORE_UNUSED_BEGIN function logic [`NW_WIDTH-1:0] wis_to_wid( input logic [ISSUE_WIS_W-1:0] wis, input logic [ISSUE_ISW_W-1:0] isw @@ -278,6 +292,20 @@ package VX_gpu_pkg; wid_to_wis = 0; end endfunction + + ///////////////////////// Miscaellaneous functions //////////////////////// + + function logic [`SFU_WIDTH-1:0] op_to_sfu_type( + input logic [`INST_OP_BITS-1:0] op_type + ); + case (op_type) + `INST_SFU_CSRRW, + `INST_SFU_CSRRS, + `INST_SFU_CSRRC: op_to_sfu_type = `SFU_CSRS; + default: op_to_sfu_type = `SFU_WCTL; + endcase + endfunction + `IGNORE_UNUSED_END endpackage diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 9769d81f7..73a6edd78 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -47,7 +47,7 @@ `define UNUSED_VAR(x) `define UNUSED_PIN(x) . x () `define UNUSED_ARG(x) x -`define TRACE(level, args) $write args +`define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args `else `ifdef VERILATOR `define TRACING_ON /* verilator tracing_on */ @@ -112,8 +112,14 @@ `define UNUSED_ARG(x) /* verilator lint_off UNUSED */ \ x \ /* verilator lint_on UNUSED */ -`define TRACE(level, args) dpi_trace(level, $sformatf args) `endif + +`ifdef SV_DPI +`define TRACE(level, args) dpi_trace(level, $sformatf args) +`else +`define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args +`endif + `endif `ifdef SIMULATION diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 0f329a72b..abdf67612 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -14,7 +14,8 @@ `include "VX_define.vh" module VX_socket import VX_gpu_pkg::*; #( - parameter SOCKET_ID = 0 + parameter SOCKET_ID = 0, + parameter `STRING INSTANCE_ID = "" ) ( `SCOPE_IO_DECL @@ -40,6 +41,11 @@ module VX_socket import VX_gpu_pkg::*; #( output wire busy ); +`ifdef SCOPE + localparam scope_core = 0; + `SCOPE_IO_SWITCH (`SOCKET_SIZE); +`endif + `ifdef GBAR_ENABLE VX_gbar_bus_if per_core_gbar_bus_if[`SOCKET_SIZE](); @@ -81,7 +87,7 @@ module VX_socket import VX_gpu_pkg::*; #( `RESET_RELAY (icache_reset, reset); VX_cache_cluster #( - .INSTANCE_ID ($sformatf("socket%0d-icache", SOCKET_ID)), + .INSTANCE_ID ($sformatf("%s-icache", INSTANCE_ID)), .NUM_UNITS (`NUM_ICACHES), .NUM_INPUTS (`SOCKET_SIZE), .TAG_SEL_IDX (0), @@ -126,7 +132,7 @@ module VX_socket import VX_gpu_pkg::*; #( `RESET_RELAY (dcache_reset, reset); VX_cache_cluster #( - .INSTANCE_ID ($sformatf("socket%0d-dcache", SOCKET_ID)), + .INSTANCE_ID ($sformatf("%s-dcache", INSTANCE_ID)), .NUM_UNITS (`NUM_DCACHES), .NUM_INPUTS (`SOCKET_SIZE), .TAG_SEL_IDX (0), @@ -143,8 +149,9 @@ module VX_socket import VX_gpu_pkg::*; #( .TAG_WIDTH (DCACHE_TAG_WIDTH), .UUID_WIDTH (`UUID_WIDTH), .WRITE_ENABLE (1), + .WRITEBACK (`DCACHE_WRITEBACK), .NC_ENABLE (1), - .CORE_OUT_BUF (`LMEM_ENABLED ? 2 : 1), + .CORE_OUT_BUF (2), .MEM_OUT_BUF (2) ) dcache ( `ifdef PERF_ENABLE @@ -194,19 +201,19 @@ module VX_socket import VX_gpu_pkg::*; #( wire [`SOCKET_SIZE-1:0] per_core_busy; + VX_dcr_bus_if core_dcr_bus_if(); `BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, (`SOCKET_SIZE > 1)); - `SCOPE_IO_SWITCH (`SOCKET_SIZE) - // Generate all cores - for (genvar i = 0; i < `SOCKET_SIZE; ++i) begin + for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : cores `RESET_RELAY (core_reset, reset); VX_core #( - .CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + i) + .CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id), + .INSTANCE_ID ($sformatf("%s-core%0d", INSTANCE_ID, core_id)) ) core ( - `SCOPE_IO_BIND (i) + `SCOPE_IO_BIND (scope_core + core_id) .clk (clk), .reset (core_reset), @@ -217,15 +224,15 @@ module VX_socket import VX_gpu_pkg::*; #( .dcr_bus_if (core_dcr_bus_if), - .dcache_bus_if (per_core_dcache_bus_if[i * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]), + .dcache_bus_if (per_core_dcache_bus_if[core_id * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]), - .icache_bus_if (per_core_icache_bus_if[i]), + .icache_bus_if (per_core_icache_bus_if[core_id]), `ifdef GBAR_ENABLE - .gbar_bus_if (per_core_gbar_bus_if[i]), + .gbar_bus_if (per_core_gbar_bus_if[core_id]), `endif - .busy (per_core_busy[i]) + .busy (per_core_busy[core_id]) ); end diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index e744a26f9..927ffae96 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -85,30 +85,31 @@ `define VX_CSR_MPM_IBUF_ST_H 12'hB85 `define VX_CSR_MPM_SCRB_ST 12'hB06 `define VX_CSR_MPM_SCRB_ST_H 12'hB86 -`define VX_CSR_MPM_SCRB_ALU 12'hB07 -`define VX_CSR_MPM_SCRB_ALU_H 12'hB87 -`define VX_CSR_MPM_SCRB_FPU 12'hB08 -`define VX_CSR_MPM_SCRB_FPU_H 12'hB88 -`define VX_CSR_MPM_SCRB_LSU 12'hB09 -`define VX_CSR_MPM_SCRB_LSU_H 12'hB89 -`define VX_CSR_MPM_SCRB_SFU 12'hB0A -`define VX_CSR_MPM_SCRB_SFU_H 12'hB8A +`define VX_CSR_MPM_OPDS_ST 12'hB07 +`define VX_CSR_MPM_OPDS_ST_H 12'hB87 +`define VX_CSR_MPM_SCRB_ALU 12'hB08 +`define VX_CSR_MPM_SCRB_ALU_H 12'hB88 +`define VX_CSR_MPM_SCRB_FPU 12'hB09 +`define VX_CSR_MPM_SCRB_FPU_H 12'hB89 +`define VX_CSR_MPM_SCRB_LSU 12'hB0A +`define VX_CSR_MPM_SCRB_LSU_H 12'hB8A +`define VX_CSR_MPM_SCRB_SFU 12'hB0B +`define VX_CSR_MPM_SCRB_SFU_H 12'hB8B +`define VX_CSR_MPM_SCRB_CSRS 12'hB0C +`define VX_CSR_MPM_SCRB_CSRS_H 12'hB8C +`define VX_CSR_MPM_SCRB_WCTL 12'hB0D +`define VX_CSR_MPM_SCRB_WCTL_H 12'hB8D // PERF: memory -`define VX_CSR_MPM_IFETCHES 12'hB0B -`define VX_CSR_MPM_IFETCHES_H 12'hB8B -`define VX_CSR_MPM_LOADS 12'hB0C -`define VX_CSR_MPM_LOADS_H 12'hB8C -`define VX_CSR_MPM_STORES 12'hB0D -`define VX_CSR_MPM_STORES_H 12'hB8D -`define VX_CSR_MPM_IFETCH_LT 12'hB0E -`define VX_CSR_MPM_IFETCH_LT_H 12'hB8E -`define VX_CSR_MPM_LOAD_LT 12'hB0F -`define VX_CSR_MPM_LOAD_LT_H 12'hB8F -// SFU: scoreboard -`define VX_CSR_MPM_SCRB_WCTL 12'hB10 -`define VX_CSR_MPM_SCRB_WCTL_H 12'hB90 -`define VX_CSR_MPM_SCRB_CSRS 12'hB11 -`define VX_CSR_MPM_SCRB_CSRS_H 12'hB91 +`define VX_CSR_MPM_IFETCHES 12'hB0E +`define VX_CSR_MPM_IFETCHES_H 12'hB8E +`define VX_CSR_MPM_LOADS 12'hB0F +`define VX_CSR_MPM_LOADS_H 12'hB8F +`define VX_CSR_MPM_STORES 12'hB10 +`define VX_CSR_MPM_STORES_H 12'hB90 +`define VX_CSR_MPM_IFETCH_LT 12'hB11 +`define VX_CSR_MPM_IFETCH_LT_H 12'hB91 +`define VX_CSR_MPM_LOAD_LT 12'hB12 +`define VX_CSR_MPM_LOAD_LT_H 12'hB92 // Machine Performance-monitoring memory counters (class 2) /////////////////// diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 2c8b3389f..d3ef57c72 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -44,6 +44,11 @@ module Vortex import VX_gpu_pkg::*; ( output wire busy ); +`ifdef SCOPE + localparam scope_cluster = 0; + `SCOPE_IO_SWITCH (`NUM_CLUSTERS); +`endif + `ifdef PERF_ENABLE VX_mem_perf_if mem_perf_if(); assign mem_perf_if.icache = 'x; @@ -78,6 +83,7 @@ module Vortex import VX_gpu_pkg::*; ( .MREQ_SIZE (`L3_MREQ_SIZE), .TAG_WIDTH (L2_MEM_TAG_WIDTH), .WRITE_ENABLE (1), + .WRITEBACK (`L3_WRITEBACK), .UUID_WIDTH (`UUID_WIDTH), .CORE_OUT_BUF (2), .MEM_OUT_BUF (2), @@ -121,19 +127,19 @@ module Vortex import VX_gpu_pkg::*; ( wire [`NUM_CLUSTERS-1:0] per_cluster_busy; - `SCOPE_IO_SWITCH (`NUM_CLUSTERS) - // Generate all clusters - for (genvar i = 0; i < `NUM_CLUSTERS; ++i) begin + for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters `RESET_RELAY (cluster_reset, reset); + VX_dcr_bus_if cluster_dcr_bus_if(); `BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1)); VX_cluster #( - .CLUSTER_ID (i) + .CLUSTER_ID (cluster_id), + .INSTANCE_ID ($sformatf("cluster%0d", cluster_id)) ) cluster ( - `SCOPE_IO_BIND (i) + `SCOPE_IO_BIND (scope_cluster + cluster_id) .clk (clk), .reset (cluster_reset), @@ -144,9 +150,9 @@ module Vortex import VX_gpu_pkg::*; ( .dcr_bus_if (cluster_dcr_bus_if), - .mem_bus_if (per_cluster_mem_bus_if[i]), + .mem_bus_if (per_cluster_mem_bus_if[cluster_id]), - .busy (per_cluster_busy[i]) + .busy (per_cluster_busy[cluster_id]) ); end diff --git a/hw/rtl/afu/opae/ccip_std_afu.sv b/hw/rtl/afu/opae/ccip_std_afu.sv index 2adea591f..b042ba61d 100644 --- a/hw/rtl/afu/opae/ccip_std_afu.sv +++ b/hw/rtl/afu/opae/ccip_std_afu.sv @@ -5,6 +5,7 @@ // To be done: // Check how to run this with OPAE. Looks like setup issue +`ifndef NOPAE `include "platform_if.vh" @@ -85,7 +86,7 @@ module ccip_std_afu #( t_local_mem_data avs_writedata [NUM_LOCAL_MEM_BANKS]; t_local_mem_addr avs_address [NUM_LOCAL_MEM_BANKS]; logic avs_write [NUM_LOCAL_MEM_BANKS]; - logic avs_read [NUM_LOCAL_MEM_BANKS]; + logic avs_read [NUM_LOCAL_MEM_BANKS]; for (genvar b = 0; b < NUM_LOCAL_MEM_BANKS; b++) begin assign local_mem[b].burstcount = avs_burstcount[b]; @@ -94,7 +95,7 @@ module ccip_std_afu #( assign local_mem[b].byteenable = avs_byteenable[b]; assign local_mem[b].write = avs_write[b]; assign local_mem[b].read = avs_read[b]; - + assign avs_waitrequest[b] = local_mem[b].waitrequest; assign avs_readdata[b] = local_mem[b].readdata; assign avs_readdatavalid[b] = local_mem[b].readdatavalid; @@ -107,7 +108,7 @@ module ccip_std_afu #( .reset (reset_T1), .cp2af_sRxPort (cp2af_sRx_T1), - .af2cp_sTxPort (af2cp_sTx_T0), + .af2cp_sTxPort (af2cp_sTx_T0), .avs_writedata (avs_writedata), .avs_readdata (avs_readdata), @@ -121,3 +122,5 @@ module ccip_std_afu #( ); endmodule + +`endif diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index e7b63e731..cd49e7ddd 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -587,7 +587,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .DATA_SIZE (LMEM_DATA_SIZE), .ADDR_WIDTH (LMEM_ADDR_WIDTH), .TAG_WIDTH (AVS_REQ_TAGW), - .ARBITER ("P"), + .ARBITER ("P"), // prioritize VX requests .REQ_OUT_BUF (0), .RSP_OUT_BUF (0) ) mem_arb ( @@ -692,9 +692,11 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .reset (reset), .incr (cci_rd_req_fire), .decr (cci_rdq_pop), + `UNUSED_PIN (empty), + `UNUSED_PIN (alm_empty), .full (cci_pending_reads_full), - .size (cci_pending_reads), - `UNUSED_PIN (empty) + `UNUSED_PIN (alm_full), + .size (cci_pending_reads) ); `UNUSED_VAR (cci_pending_reads) @@ -852,7 +854,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .incr (cci_mem_rd_rsp_fire), .decr (cci_wr_rsp_fire), .empty (cci_pending_writes_empty), + `UNUSED_PIN (alm_empty), .full (cci_pending_writes_full), + `UNUSED_PIN (alm_full), .size (cci_pending_writes) ); @@ -1010,7 +1014,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // SCOPE ////////////////////////////////////////////////////////////////////// `ifdef DBG_SCOPE_AFU -`ifdef SCOPE wire mem_req_fire = mem_bus_if[0].req_valid && mem_bus_if[0].req_ready; wire mem_rsp_fire = mem_bus_if[0].rsp_valid && mem_bus_if[0].rsp_ready; wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0]; @@ -1080,7 +1083,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .bus_in(scope_bus_in_w[0]), .bus_out(scope_bus_out_w[0]) ); -`endif `else `SCOPE_IO_UNUSED_W(0) `endif diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 69dbbe5f5..15be69007 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -311,7 +311,6 @@ module VX_afu_wrap #( // SCOPE ////////////////////////////////////////////////////////////////////// `ifdef DBG_SCOPE_AFU -`ifdef SCOPE `define TRIGGERS { \ reset, \ ap_start, \ @@ -330,35 +329,17 @@ module VX_afu_wrap #( VX_scope_tap #( .SCOPE_ID (0), .TRIGGERW ($bits(`TRIGGERS)), - .PROBEW ($bits(`PROBES)) + .PROBEW ($bits(`PROBES)) ) scope_tap ( - .clk(clk), - .reset(scope_reset_w[0]), - .start(1'b0), - .stop(1'b0), - .triggers(`TRIGGERS), - .probes(`PROBES), - .bus_in(scope_bus_in_w[0]), - .bus_out(scope_bus_out_w[0]) + .clk (clk), + .reset (scope_reset_w[0]), + .start (1'b0), + .stop (1'b0), + .triggers (`TRIGGERS), + .probes (`PROBES), + .bus_in (scope_bus_in_w[0]), + .bus_out (scope_bus_out_w[0]) ); -`endif -`ifdef CHIPSCOPE - ila_afu ila_afu_inst ( - .clk (ap_clk), - .probe0 ({ - ap_start, - ap_done, - ap_idle, - interrupt - }), - .probe1 ({ - vx_pending_writes, - vx_busy_wait, - vx_busy, - vx_running - }) - ); -`endif `else `SCOPE_IO_UNUSED_W(0) `endif diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv new file mode 100644 index 000000000..15d1e8379 --- /dev/null +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -0,0 +1,109 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_cache_define.vh" + +module VX_bank_flush #( + // Size of cache in bytes + parameter CACHE_SIZE = 1024, + // Size of line inside a bank in bytes + parameter LINE_SIZE = 64, + // Number of banks + parameter NUM_BANKS = 1, + // Number of associative ways + parameter NUM_WAYS = 1, + // Enable cache writeback + parameter WRITEBACK = 0 +) ( + input wire clk, + input wire reset, + input wire flush_in_valid, + output wire flush_in_ready, + output wire flush_out_init, + output wire flush_out_valid, + output wire [`CS_LINE_SEL_BITS-1:0] flush_out_line, + output wire [NUM_WAYS-1:0] flush_out_way, + input wire flush_out_ready, + input wire mshr_empty +); + parameter CTR_WIDTH = `CS_LINE_SEL_BITS + (WRITEBACK ? `CS_WAY_SEL_BITS : 0); + + parameter STATE_IDLE = 2'd0; + parameter STATE_INIT = 2'd1; + parameter STATE_FLUSH = 2'd2; + + reg [CTR_WIDTH-1:0] counter_r; + reg [1:0] state_r, state_n; + reg flush_in_ready_r, flush_in_ready_n; + + always @(*) begin + state_n = state_r; + flush_in_ready_n = 0; + case (state_r) + // STATE_IDLE + default: begin + if (flush_in_valid && mshr_empty) begin + state_n = STATE_FLUSH; + end + end + STATE_INIT: begin + if (counter_r == ((2 ** `CS_LINE_SEL_BITS)-1)) begin + state_n = STATE_IDLE; + end + end + STATE_FLUSH: begin + if (counter_r == ((2 ** CTR_WIDTH)-1)) begin + state_n = STATE_IDLE; + flush_in_ready_n = 1; + end + end + endcase + end + + always @(posedge clk) begin + if (reset) begin + state_r <= STATE_INIT; + counter_r <= '0; + flush_in_ready_r <= '0; + end else begin + state_r <= state_n; + flush_in_ready_r <= flush_in_ready_n; + if (state_r != STATE_IDLE) begin + if ((state_r == STATE_INIT) || flush_out_ready) begin + counter_r <= counter_r + CTR_WIDTH'(1); + end + end else begin + counter_r <= '0; + end + end + end + + assign flush_in_ready = flush_in_ready_r; + + assign flush_out_init = (state_r == STATE_INIT); + + assign flush_out_valid = (state_r == STATE_FLUSH); + assign flush_out_line = counter_r[`CS_LINE_SEL_BITS-1:0]; + + if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin + reg [NUM_WAYS-1:0] flush_out_way_r; + always @(*) begin + flush_out_way_r = '0; + flush_out_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1; + end + assign flush_out_way = flush_out_way_r; + end else begin + assign flush_out_way = {NUM_WAYS{1'b1}}; + end + +endmodule diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 298efe2f9..acaa1dac3 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,15 +14,15 @@ `include "VX_cache_define.vh" module VX_cache import VX_gpu_pkg::*; #( - parameter `STRING INSTANCE_ID = "", + parameter `STRING INSTANCE_ID = "", // Number of Word requests per cycle parameter NUM_REQS = 4, // Size of cache in bytes - parameter CACHE_SIZE = 4096, + parameter CACHE_SIZE = 4096, // Size of line inside a bank in bytes - parameter LINE_SIZE = 64, + parameter LINE_SIZE = 64, // Number of banks parameter NUM_BANKS = 1, // Number of associative ways @@ -33,7 +33,7 @@ module VX_cache import VX_gpu_pkg::*; #( // Core Response Queue Size parameter CRSQ_SIZE = 2, // Miss Reserv Queue Knob - parameter MSHR_SIZE = 8, + parameter MSHR_SIZE = 8, // Memory Response Queue Size parameter MRSQ_SIZE = 0, // Memory Request Queue Size @@ -42,6 +42,9 @@ module VX_cache import VX_gpu_pkg::*; #( // Enable cache writeable parameter WRITE_ENABLE = 1, + // Enable cache writeback + parameter WRITEBACK = 0, + // Request debug identifier parameter UUID_WIDTH = 0, @@ -53,12 +56,12 @@ module VX_cache import VX_gpu_pkg::*; #( // Memory request output register parameter MEM_OUT_BUF = 0 - ) ( + ) ( // PERF `ifdef PERF_ENABLE output cache_perf_t cache_perf, `endif - + input wire clk, input wire reset, @@ -67,6 +70,7 @@ module VX_cache import VX_gpu_pkg::*; #( ); `STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter")) + `STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter")) localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS); localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS); @@ -78,36 +82,46 @@ module VX_cache import VX_gpu_pkg::*; #( localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS); - localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH; + localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1; localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH; localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1); localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1); + localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0; + `ifdef PERF_ENABLE wire [NUM_BANKS-1:0] perf_read_miss_per_bank; wire [NUM_BANKS-1:0] perf_write_miss_per_bank; wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank; `endif - wire [NUM_REQS-1:0] core_req_valid; - wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr; - wire [NUM_REQS-1:0] core_req_rw; - wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen; - wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data; - wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag; - wire [NUM_REQS-1:0] core_req_ready; + VX_mem_bus_if #( + .DATA_SIZE (WORD_SIZE), + .TAG_WIDTH (TAG_WIDTH) + ) core_bus2_if[NUM_REQS](); - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_req_valid[i] = core_bus_if[i].req_valid; - assign core_req_rw[i] = core_bus_if[i].req_data.rw; - assign core_req_byteen[i] = core_bus_if[i].req_data.byteen; - assign core_req_addr[i] = core_bus_if[i].req_data.addr; - assign core_req_data[i] = core_bus_if[i].req_data.data; - assign core_req_tag[i] = core_bus_if[i].req_data.tag; - assign core_bus_if[i].req_ready = core_req_ready[i]; - `UNUSED_VAR (core_bus_if[i].req_data.atype) - end + wire [NUM_BANKS-1:0] per_bank_flush_valid; + wire [NUM_BANKS-1:0] per_bank_flush_ready; + + wire [NUM_BANKS-1:0] per_bank_core_req_fire; + + // this reset relay is required to sync with bank initialization + `RESET_RELAY (flush_reset, reset); + + VX_cache_flush #( + .NUM_REQS (NUM_REQS), + .NUM_BANKS (NUM_BANKS), + .BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency + ) flush_unit ( + .clk (clk), + .reset (flush_reset), + .core_bus_in_if (core_bus_if), + .core_bus_out_if (core_bus2_if), + .bank_req_fire (per_bank_core_req_fire), + .flush_valid (per_bank_flush_valid), + .flush_ready (per_bank_flush_ready) + ); /////////////////////////////////////////////////////////////////////////// @@ -117,10 +131,10 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s; wire [NUM_REQS-1:0] core_rsp_ready_s; - `RESET_RELAY (core_rsp_reset, reset); - for (genvar i = 0; i < NUM_REQS; ++i) begin + `RESET_RELAY (core_rsp_reset, reset); + VX_elastic_buffer #( .DATAW (`CS_WORD_WIDTH + TAG_WIDTH), .SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), @@ -131,9 +145,9 @@ module VX_cache import VX_gpu_pkg::*; #( .valid_in (core_rsp_valid_s[i]), .ready_in (core_rsp_ready_s[i]), .data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}), - .data_out ({core_bus_if[i].rsp_data.data, core_bus_if[i].rsp_data.tag}), - .valid_out (core_bus_if[i].rsp_valid), - .ready_out (core_bus_if[i].rsp_ready) + .data_out ({core_bus2_if[i].rsp_data.data, core_bus2_if[i].rsp_data.tag}), + .valid_out (core_bus2_if[i].rsp_valid), + .ready_out (core_bus2_if[i].rsp_ready) ); end @@ -146,24 +160,29 @@ module VX_cache import VX_gpu_pkg::*; #( wire [LINE_SIZE-1:0] mem_req_byteen_s; wire [`CS_LINE_WIDTH-1:0] mem_req_data_s; wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s; + wire mem_req_flush_s; wire mem_req_ready_s; + wire mem_bus_if_flush; + + `RESET_RELAY (mem_req_reset, reset); + VX_elastic_buffer #( - .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH), + .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) ) mem_req_buf ( .clk (clk), - .reset (reset), - .valid_in (mem_req_valid_s), - .ready_in (mem_req_ready_s), - .data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s}), - .data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag}), - .valid_out (mem_bus_if.req_valid), + .reset (mem_req_reset), + .valid_in (mem_req_valid_s), + .ready_in (mem_req_ready_s), + .data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s, mem_req_flush_s}), + .data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag, mem_bus_if_flush}), + .valid_out (mem_bus_if.req_valid), .ready_out (mem_bus_if.req_ready) ); - - assign mem_bus_if.req_data.atype = '0; + + assign mem_bus_if.req_data.atype = mem_bus_if_flush ? `ADDR_TYPE_WIDTH'(1 << `ADDR_TYPE_FLUSH) : '0; /////////////////////////////////////////////////////////////////////////// @@ -172,44 +191,26 @@ module VX_cache import VX_gpu_pkg::*; #( wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_s; wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s; wire mem_rsp_ready_s; - + + `RESET_RELAY (mem_rsp_reset, reset); + VX_elastic_buffer #( - .DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH), + .DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH), .SIZE (MRSQ_SIZE), .OUT_REG (MRSQ_SIZE > 2) ) mem_rsp_queue ( .clk (clk), - .reset (reset), + .reset (mem_rsp_reset), .valid_in (mem_bus_if.rsp_valid), .ready_in (mem_bus_if.rsp_ready), - .data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}), - .data_out ({mem_rsp_tag_s, mem_rsp_data_s}), + .data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}), + .data_out ({mem_rsp_tag_s, mem_rsp_data_s}), .valid_out (mem_rsp_valid_s), .ready_out (mem_rsp_ready_s) ); - /////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////// - wire [`CS_LINE_SEL_BITS-1:0] init_line_sel; - wire init_enable; - - // this reset relay is required to sync with bank initialization - `RESET_RELAY (init_reset, reset); - - VX_cache_init #( - .CACHE_SIZE (CACHE_SIZE), - .LINE_SIZE (LINE_SIZE), - .NUM_BANKS (NUM_BANKS), - .NUM_WAYS (NUM_WAYS) - ) cache_init ( - .clk (clk), - .reset (init_reset), - .addr_out (init_line_sel), - .valid_out (init_enable) - ); - - /////////////////////////////////////////////////////////////////////// - wire [NUM_BANKS-1:0] per_bank_core_req_valid; wire [NUM_BANKS-1:0][`CS_LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr; wire [NUM_BANKS-1:0] per_bank_core_req_rw; @@ -218,25 +219,28 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx; + wire [NUM_BANKS-1:0] per_bank_core_req_flush; wire [NUM_BANKS-1:0] per_bank_core_req_ready; - + wire [NUM_BANKS-1:0] per_bank_core_rsp_valid; wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_rsp_data; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_rsp_tag; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_rsp_idx; wire [NUM_BANKS-1:0] per_bank_core_rsp_ready; - wire [NUM_BANKS-1:0] per_bank_mem_req_valid; + wire [NUM_BANKS-1:0] per_bank_mem_req_valid; wire [NUM_BANKS-1:0][`CS_MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr; wire [NUM_BANKS-1:0] per_bank_mem_req_rw; - wire [NUM_BANKS-1:0][WORD_SEL_WIDTH-1:0] per_bank_mem_req_wsel; - wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_mem_req_byteen; - wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_mem_req_data; + wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen; + wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data; wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id; + wire [NUM_BANKS-1:0] per_bank_mem_req_flush; wire [NUM_BANKS-1:0] per_bank_mem_req_ready; wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready; - + + assign per_bank_core_req_fire = per_bank_core_req_valid & per_bank_mem_req_ready; + if (NUM_BANKS == 1) begin assign mem_rsp_ready_s = per_bank_mem_rsp_ready; end else begin @@ -245,12 +249,33 @@ module VX_cache import VX_gpu_pkg::*; #( // Bank requests dispatch - wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in; - wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out; + wire [NUM_REQS-1:0] core_req_valid; + wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr; + wire [NUM_REQS-1:0] core_req_rw; + wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen; + wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data; + wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag; + wire [NUM_REQS-1:0] core_req_flush; + wire [NUM_REQS-1:0] core_req_ready; + wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr; wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] core_req_bid; wire [NUM_REQS-1:0][WORD_SEL_WIDTH-1:0] core_req_wsel; + wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in; + wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out; + + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_req_valid[i] = core_bus2_if[i].req_valid; + assign core_req_rw[i] = core_bus2_if[i].req_data.rw; + assign core_req_byteen[i] = core_bus2_if[i].req_data.byteen; + assign core_req_addr[i] = core_bus2_if[i].req_data.addr; + assign core_req_data[i] = core_bus2_if[i].req_data.data; + assign core_req_tag[i] = core_bus2_if[i].req_data.tag; + assign core_req_flush[i] = core_bus2_if[i].req_data.atype[`ADDR_TYPE_FLUSH]; + assign core_bus2_if[i].req_ready = core_req_ready[i]; + end + for (genvar i = 0; i < NUM_REQS; ++i) begin if (WORDS_PER_LINE > 1) begin assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS]; @@ -273,9 +298,11 @@ module VX_cache import VX_gpu_pkg::*; #( core_req_line_addr[i], core_req_rw[i], core_req_wsel[i], - core_req_byteen[i], + core_req_byteen[i], core_req_data[i], - core_req_tag[i]}; + core_req_tag[i], + core_req_flush[i] + }; end `ifdef PERF_ENABLE @@ -284,12 +311,12 @@ module VX_cache import VX_gpu_pkg::*; #( `RESET_RELAY (req_xbar_reset, reset); - VX_stream_xbar #( + VX_stream_xbar #( .NUM_INPUTS (NUM_REQS), .NUM_OUTPUTS (NUM_BANKS), .DATAW (CORE_REQ_DATAW), .PERF_CTR_BITS (`PERF_CTR_BITS), - .OUT_BUF ((NUM_REQS > 4) ? 2 : 0) + .OUT_BUF (REQ_XBAR_BUF) ) req_xbar ( .clk (clk), .reset (req_xbar_reset), @@ -313,27 +340,29 @@ module VX_cache import VX_gpu_pkg::*; #( per_bank_core_req_addr[i], per_bank_core_req_rw[i], per_bank_core_req_wsel[i], - per_bank_core_req_byteen[i], + per_bank_core_req_byteen[i], per_bank_core_req_data[i], - per_bank_core_req_tag[i]} = core_req_data_out[i]; + per_bank_core_req_tag[i], + per_bank_core_req_flush[i] + } = core_req_data_out[i]; end - + // Banks access - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr; wire curr_bank_mem_rsp_valid; if (NUM_BANKS == 1) begin assign curr_bank_mem_rsp_valid = mem_rsp_valid_s; end else begin - assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == i); + assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == bank_id); end `RESET_RELAY (bank_reset, reset); - - VX_cache_bank #( - .BANK_ID (i), - .INSTANCE_ID (INSTANCE_ID), + + VX_cache_bank #( + .BANK_ID (bank_id), + .INSTANCE_ID ($sformatf("%s-bank%0d", INSTANCE_ID, bank_id)), .CACHE_SIZE (CACHE_SIZE), .LINE_SIZE (LINE_SIZE), .NUM_BANKS (NUM_BANKS), @@ -344,65 +373,66 @@ module VX_cache import VX_gpu_pkg::*; #( .MSHR_SIZE (MSHR_SIZE), .MREQ_SIZE (MREQ_SIZE), .WRITE_ENABLE (WRITE_ENABLE), + .WRITEBACK (WRITEBACK), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (TAG_WIDTH), .CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF), .MEM_OUT_BUF (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_BUF) - ) bank ( + ) bank ( .clk (clk), .reset (bank_reset), `ifdef PERF_ENABLE - .perf_read_misses (perf_read_miss_per_bank[i]), - .perf_write_misses (perf_write_miss_per_bank[i]), - .perf_mshr_stalls (perf_mshr_stall_per_bank[i]), + .perf_read_misses (perf_read_miss_per_bank[bank_id]), + .perf_write_misses (perf_write_miss_per_bank[bank_id]), + .perf_mshr_stalls (perf_mshr_stall_per_bank[bank_id]), `endif - - // Core request - .core_req_valid (per_bank_core_req_valid[i]), - .core_req_addr (per_bank_core_req_addr[i]), - .core_req_rw (per_bank_core_req_rw[i]), - .core_req_wsel (per_bank_core_req_wsel[i]), - .core_req_byteen (per_bank_core_req_byteen[i]), - .core_req_data (per_bank_core_req_data[i]), - .core_req_tag (per_bank_core_req_tag[i]), - .core_req_idx (per_bank_core_req_idx[i]), - .core_req_ready (per_bank_core_req_ready[i]), - // Core response - .core_rsp_valid (per_bank_core_rsp_valid[i]), - .core_rsp_data (per_bank_core_rsp_data[i]), - .core_rsp_tag (per_bank_core_rsp_tag[i]), - .core_rsp_idx (per_bank_core_rsp_idx[i]), - .core_rsp_ready (per_bank_core_rsp_ready[i]), + // Core request + .core_req_valid (per_bank_core_req_valid[bank_id]), + .core_req_addr (per_bank_core_req_addr[bank_id]), + .core_req_rw (per_bank_core_req_rw[bank_id]), + .core_req_wsel (per_bank_core_req_wsel[bank_id]), + .core_req_byteen (per_bank_core_req_byteen[bank_id]), + .core_req_data (per_bank_core_req_data[bank_id]), + .core_req_tag (per_bank_core_req_tag[bank_id]), + .core_req_idx (per_bank_core_req_idx[bank_id]), + .core_req_flush (per_bank_core_req_flush[bank_id]), + .core_req_ready (per_bank_core_req_ready[bank_id]), + + // Core response + .core_rsp_valid (per_bank_core_rsp_valid[bank_id]), + .core_rsp_data (per_bank_core_rsp_data[bank_id]), + .core_rsp_tag (per_bank_core_rsp_tag[bank_id]), + .core_rsp_idx (per_bank_core_rsp_idx[bank_id]), + .core_rsp_ready (per_bank_core_rsp_ready[bank_id]), // Memory request - .mem_req_valid (per_bank_mem_req_valid[i]), + .mem_req_valid (per_bank_mem_req_valid[bank_id]), .mem_req_addr (curr_bank_mem_req_addr), - .mem_req_rw (per_bank_mem_req_rw[i]), - .mem_req_wsel (per_bank_mem_req_wsel[i]), - .mem_req_byteen (per_bank_mem_req_byteen[i]), - .mem_req_data (per_bank_mem_req_data[i]), - .mem_req_id (per_bank_mem_req_id[i]), - .mem_req_ready (per_bank_mem_req_ready[i]), + .mem_req_rw (per_bank_mem_req_rw[bank_id]), + .mem_req_byteen (per_bank_mem_req_byteen[bank_id]), + .mem_req_data (per_bank_mem_req_data[bank_id]), + .mem_req_id (per_bank_mem_req_id[bank_id]), + .mem_req_flush (per_bank_mem_req_flush[bank_id]), + .mem_req_ready (per_bank_mem_req_ready[bank_id]), // Memory response .mem_rsp_valid (curr_bank_mem_rsp_valid), .mem_rsp_data (mem_rsp_data_s), .mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)), - .mem_rsp_ready (per_bank_mem_rsp_ready[i]), + .mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]), - // initialization - .init_enable (init_enable), - .init_line_sel (init_line_sel) + .flush_valid (per_bank_flush_valid[bank_id]), + .flush_ready (per_bank_flush_ready[bank_id]) ); if (NUM_BANKS == 1) begin - assign per_bank_mem_req_addr[i] = curr_bank_mem_req_addr; + assign per_bank_mem_req_addr[bank_id] = curr_bank_mem_req_addr; end else begin - assign per_bank_mem_req_addr[i] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, i); + assign per_bank_mem_req_addr[bank_id] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, bank_id); end - end + end // Bank responses gather @@ -442,37 +472,41 @@ module VX_cache import VX_gpu_pkg::*; #( wire mem_req_valid_p; wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p; wire mem_req_rw_p; - wire [WORD_SEL_WIDTH-1:0] mem_req_wsel_p; - wire [WORD_SIZE-1:0] mem_req_byteen_p; - wire [`CS_WORD_WIDTH-1:0] mem_req_data_p; + wire [LINE_SIZE-1:0] mem_req_byteen_p; + wire [`CS_LINE_WIDTH-1:0] mem_req_data_p; wire [MEM_TAG_WIDTH-1:0] mem_req_tag_p; wire [MSHR_ADDR_WIDTH-1:0] mem_req_id_p; + wire mem_req_flush_p; wire mem_req_ready_p; // Memory request arbitration - wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + `CS_WORD_WIDTH)-1:0] data_in; + wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in; for (genvar i = 0; i < NUM_BANKS; ++i) begin - assign data_in[i] = {per_bank_mem_req_addr[i], - per_bank_mem_req_rw[i], - per_bank_mem_req_wsel[i], - per_bank_mem_req_byteen[i], - per_bank_mem_req_data[i], - per_bank_mem_req_id[i]}; + assign data_in[i] = { + per_bank_mem_req_addr[i], + per_bank_mem_req_rw[i], + per_bank_mem_req_byteen[i], + per_bank_mem_req_data[i], + per_bank_mem_req_id[i], + per_bank_mem_req_flush[i] + }; end + `RESET_RELAY (mem_arb_reset, reset); + VX_stream_arb #( .NUM_INPUTS (NUM_BANKS), - .DATAW (`CS_MEM_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + MSHR_ADDR_WIDTH), - .ARBITER ("R") + .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1), + .ARBITER ("F") ) mem_req_arb ( .clk (clk), - .reset (reset), + .reset (mem_arb_reset), .valid_in (per_bank_mem_req_valid), .ready_in (per_bank_mem_req_ready), .data_in (data_in), - .data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_wsel_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p}), + .data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p, mem_req_flush_p}), .valid_out (mem_req_valid_p), .ready_out (mem_req_ready_p), `UNUSED_PIN (sel_out) @@ -480,44 +514,28 @@ module VX_cache import VX_gpu_pkg::*; #( if (NUM_BANKS > 1) begin wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p); - assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p}); + assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p}); end else begin assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p); - end + end // Memory request multi-port handling assign mem_req_valid_s = mem_req_valid_p; assign mem_req_addr_s = mem_req_addr_p; assign mem_req_tag_s = mem_req_tag_p; + assign mem_req_flush_s = mem_req_flush_p; assign mem_req_ready_p = mem_req_ready_s; if (WRITE_ENABLE != 0) begin - if (`CS_WORDS_PER_LINE > 1) begin - reg [LINE_SIZE-1:0] mem_req_byteen_r; - reg [`CS_LINE_WIDTH-1:0] mem_req_data_r; - - always @(*) begin - mem_req_byteen_r = '0; - mem_req_data_r = 'x; - mem_req_byteen_r[mem_req_wsel_p * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p; - mem_req_data_r[mem_req_wsel_p * `CS_WORD_WIDTH +: `CS_WORD_WIDTH] = mem_req_data_p; - end - assign mem_req_rw_s = mem_req_rw_p; - assign mem_req_byteen_s = mem_req_byteen_r; - assign mem_req_data_s = mem_req_data_r; - end else begin - `UNUSED_VAR (mem_req_wsel_p) - assign mem_req_rw_s = mem_req_rw_p; - assign mem_req_byteen_s = mem_req_byteen_p; - assign mem_req_data_s = mem_req_data_p; - end + assign mem_req_rw_s = mem_req_rw_p; + assign mem_req_byteen_s = mem_req_byteen_p; + assign mem_req_data_s = mem_req_data_p; end else begin `UNUSED_VAR (mem_req_byteen_p) - `UNUSED_VAR (mem_req_wsel_p) `UNUSED_VAR (mem_req_data_p) `UNUSED_VAR (mem_req_rw_p) - + assign mem_req_rw_s = 0; assign mem_req_byteen_s = {LINE_SIZE{1'b1}}; assign mem_req_data_s = '0; @@ -527,10 +545,10 @@ module VX_cache import VX_gpu_pkg::*; #( // per cycle: core_reads, core_writes wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle; wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle; - + wire [NUM_REQS-1:0] perf_core_reads_per_req; wire [NUM_REQS-1:0] perf_core_writes_per_req; - + // per cycle: read misses, write misses, msrq stalls, pipeline stalls wire [`CLOG2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle; wire [`CLOG2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle; @@ -539,16 +557,16 @@ module VX_cache import VX_gpu_pkg::*; #( `BUFFER(perf_core_reads_per_req, core_req_valid & core_req_ready & ~core_req_rw); `BUFFER(perf_core_writes_per_req, core_req_valid & core_req_ready & core_req_rw); - + `POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_req); `POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_req); `POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank); `POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank); `POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank); - + wire [NUM_REQS-1:0] perf_crsp_stall_per_req; for (genvar i = 0; i < NUM_REQS; ++i) begin - assign perf_crsp_stall_per_req[i] = core_bus_if[i].rsp_valid && ~core_bus_if[i].rsp_ready; + assign perf_crsp_stall_per_req[i] = core_bus2_if[i].rsp_valid && ~core_bus2_if[i].rsp_ready; end `POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req); @@ -561,7 +579,7 @@ module VX_cache import VX_gpu_pkg::*; #( reg [`PERF_CTR_BITS-1:0] perf_write_misses; reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls; reg [`PERF_CTR_BITS-1:0] perf_mem_stalls; - reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls; + reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls; always @(posedge clk) begin if (reset) begin diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 0db783066..03f3efd41 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -41,6 +41,9 @@ module VX_cache_bank #( // Enable cache writeable parameter WRITE_ENABLE = 1, + // Enable cache writeback + parameter WRITEBACK = 0, + // Request debug identifier parameter UUID_WIDTH = 0, @@ -69,12 +72,13 @@ module VX_cache_bank #( // Core Request input wire core_req_valid, input wire [`CS_LINE_ADDR_WIDTH-1:0] core_req_addr, - input wire core_req_rw, - input wire [WORD_SEL_WIDTH-1:0] core_req_wsel, - input wire [WORD_SIZE-1:0] core_req_byteen, - input wire [`CS_WORD_WIDTH-1:0] core_req_data, - input wire [TAG_WIDTH-1:0] core_req_tag, - input wire [REQ_SEL_WIDTH-1:0] core_req_idx, + input wire core_req_rw, // write enable + input wire [WORD_SEL_WIDTH-1:0] core_req_wsel, // select the word in a cacheline, e.g. word size = 4 bytes, cacheline size = 64 bytes, it should have log(64/4)= 4 bits + input wire [WORD_SIZE-1:0] core_req_byteen,// which bytes in data to write + input wire [`CS_WORD_WIDTH-1:0] core_req_data, // data to be written + input wire [TAG_WIDTH-1:0] core_req_tag, // identifier of the request (request id) + input wire [REQ_SEL_WIDTH-1:0] core_req_idx, // index of the request in the core request array + input wire core_req_flush, // flush enable output wire core_req_ready, // Core Response @@ -88,10 +92,10 @@ module VX_cache_bank #( output wire mem_req_valid, output wire [`CS_LINE_ADDR_WIDTH-1:0] mem_req_addr, output wire mem_req_rw, - output wire [WORD_SEL_WIDTH-1:0] mem_req_wsel, - output wire [WORD_SIZE-1:0] mem_req_byteen, - output wire [`CS_WORD_WIDTH-1:0] mem_req_data, - output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id, + output wire [LINE_SIZE-1:0] mem_req_byteen, + output wire [`CS_LINE_WIDTH-1:0] mem_req_data, + output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id, // index of the head entry in the mshr + output wire mem_req_flush, input wire mem_req_ready, // Memory response @@ -100,9 +104,9 @@ module VX_cache_bank #( input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id, output wire mem_rsp_ready, - // initialization - input wire init_enable, - input wire [`CS_LINE_SEL_BITS-1:0] init_line_sel + // flush + input wire flush_valid, + output wire flush_ready ); localparam PIPELINE_STAGES = 2; @@ -128,23 +132,56 @@ module VX_cache_bank #( wire [MSHR_ADDR_WIDTH-1:0] replay_id; wire replay_ready; + wire is_init_st0; + wire is_flush_st0, is_flush_st1; + wire [NUM_WAYS-1:0] flush_way_st0; + wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1; - wire rw_st0, rw_st1; - wire [WORD_SEL_WIDTH-1:0] wsel_st0, wsel_st1; - wire [WORD_SIZE-1:0] byteen_st0, byteen_st1; - wire [REQ_SEL_WIDTH-1:0] req_idx_st0, req_idx_st1; - wire [TAG_WIDTH-1:0] tag_st0, tag_st1; + wire rw_sel, rw_st0, rw_st1; + wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1; + wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1; + wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1; + wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1; wire [`CS_WORD_WIDTH-1:0] read_data_st1; wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1; wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0, mshr_id_st0, mshr_id_st1; wire valid_sel, valid_st0, valid_st1; - wire is_init_st0; wire is_creq_st0, is_creq_st1; wire is_fill_st0, is_fill_st1; wire is_replay_st0, is_replay_st1; + wire creq_flush_st0, creq_flush_st1; + wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1; + wire [NUM_WAYS-1:0] tag_matches_st0; wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1; wire mshr_pending_st0, mshr_pending_st1; + wire mshr_empty; + + wire line_flush_valid; + wire line_flush_init; + wire [`CS_LINE_SEL_BITS-1:0] line_flush_sel; + wire [NUM_WAYS-1:0] line_flush_way; + wire line_flush_ready; + + // flush unit + VX_bank_flush #( + .CACHE_SIZE (CACHE_SIZE), + .LINE_SIZE (LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .NUM_WAYS (NUM_WAYS), + .WRITEBACK (WRITEBACK) + ) flush_unit ( + .clk (clk), + .reset (reset), + .flush_in_valid (flush_valid), + .flush_in_ready (flush_ready), + .flush_out_init (line_flush_init), + .flush_out_valid (line_flush_valid), + .flush_out_line (line_flush_sel), + .flush_out_way (line_flush_way), + .flush_out_ready (line_flush_ready), + .mshr_empty (mshr_empty) + ); wire rdw_hazard_st0; reg rdw_hazard_st1; @@ -154,76 +191,77 @@ module VX_cache_bank #( // inputs arbitration: // mshr replay has highest priority to maximize utilization since there is no miss. // handle memory responses next to prevent deadlock with potential memory request from a miss. - wire replay_grant = ~init_enable; + // flush has precedence over core requests to ensure that the cache is in a consistent state. + wire replay_grant = ~line_flush_init; wire replay_enable = replay_grant && replay_valid; - wire fill_grant = ~init_enable && ~replay_enable; + wire fill_grant = ~line_flush_init && ~replay_enable; wire fill_enable = fill_grant && mem_rsp_valid; - wire creq_grant = ~init_enable && ~replay_enable && ~fill_enable; + wire flush_grant = ~line_flush_init && ~replay_enable && ~fill_enable; + wire flush_enable = flush_grant && line_flush_valid; + + wire creq_grant = ~line_flush_init && ~replay_enable && ~fill_enable && ~flush_enable; wire creq_enable = creq_grant && core_req_valid; assign replay_ready = replay_grant - && ~rdw_hazard_st0 - && ~pipe_stall; + && ~rdw_hazard_st0 + && ~pipe_stall; assign mem_rsp_ready = fill_grant && ~pipe_stall; - assign core_req_ready = creq_grant - && ~mreq_queue_alm_full - && ~mshr_alm_full - && ~pipe_stall; + assign line_flush_ready = flush_grant + && ~mreq_queue_alm_full + && ~pipe_stall; - wire init_fire = init_enable; + assign core_req_ready = creq_grant + && ~mreq_queue_alm_full + && ~mshr_alm_full + && ~pipe_stall; + + wire init_fire = line_flush_init; wire replay_fire = replay_valid && replay_ready; wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready; + wire flush_fire = line_flush_valid && line_flush_ready; wire core_req_fire = core_req_valid && core_req_ready; - wire [TAG_WIDTH-1:0] mshr_creq_tag = replay_enable ? replay_tag : core_req_tag; + assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire; + assign rw_sel = replay_valid ? replay_rw : core_req_rw; + assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen; + assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel; + assign req_idx_sel = replay_valid ? replay_idx : core_req_idx; + assign tag_sel = replay_valid ? replay_tag : core_req_tag; + + assign addr_sel = (line_flush_init | line_flush_valid) ? `CS_LINE_ADDR_WIDTH'(line_flush_sel) : + (replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr)); + + if (WRITE_ENABLE) begin + assign data_sel[`CS_WORD_WIDTH-1:0] = replay_valid ? replay_data : (mem_rsp_valid ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : core_req_data); + end else begin + assign data_sel[`CS_WORD_WIDTH-1:0] = mem_rsp_data[`CS_WORD_WIDTH-1:0]; + `UNUSED_VAR (core_req_data) + `UNUSED_VAR (replay_data) + end + for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin + assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel + end if (UUID_WIDTH != 0) begin - assign req_uuid_sel = mshr_creq_tag[TAG_WIDTH-1 -: UUID_WIDTH]; + assign req_uuid_sel = tag_sel[TAG_WIDTH-1 -: UUID_WIDTH]; end else begin assign req_uuid_sel = 0; end - `UNUSED_VAR (mshr_creq_tag) - - assign valid_sel = init_fire || replay_fire || mem_rsp_fire || core_req_fire; - - assign addr_sel = init_enable ? `CS_LINE_ADDR_WIDTH'(init_line_sel) : - (replay_valid ? replay_addr : - (mem_rsp_valid ? mem_rsp_addr : core_req_addr)); - - assign data_sel[`CS_WORD_WIDTH-1:0] = (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : (replay_valid ? replay_data : core_req_data); - for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin - assign data_sel[i] = mem_rsp_data[i]; - end - VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH), .RESETW (1) ) pipe_reg0 ( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({ - valid_sel, - init_enable, - replay_enable, - fill_enable, - creq_enable, - addr_sel, - data_sel, - replay_valid ? replay_rw : core_req_rw, - replay_valid ? replay_byteen : core_req_byteen, - replay_valid ? replay_wsel : core_req_wsel, - replay_valid ? replay_idx : core_req_idx, - replay_valid ? replay_tag : core_req_tag, - replay_id - }), - .data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_creq_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0}) + .data_in ({valid_sel, line_flush_init, replay_enable, fill_enable, flush_enable, creq_enable, core_req_flush, line_flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}), + .data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0}) ); if (UUID_WIDTH != 0) begin @@ -232,20 +270,24 @@ module VX_cache_bank #( assign req_uuid_st0 = 0; end - wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0; - wire do_fill_st0 = valid_st0 && is_fill_st0; wire do_init_st0 = valid_st0 && is_init_st0; + wire do_flush_st0 = valid_st0 && is_flush_st0; + wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0; + wire do_replay_rd_st0 = valid_st0 && is_replay_st0 && ~rw_st0; + wire do_fill_st0 = valid_st0 && is_fill_st0; wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_init_st0); + wire do_cache_rd_st0 = do_creq_rd_st0 || do_replay_rd_st0; + wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0]; - wire [NUM_WAYS-1:0] tag_matches_st0, tag_matches_st1; - wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1; + wire [NUM_WAYS-1:0] repl_way_st0; + wire [`CS_TAG_SEL_BITS-1:0] repl_tag_st0; `RESET_RELAY (tag_reset, reset); VX_cache_tags #( - .INSTANCE_ID(INSTANCE_ID), + .INSTANCE_ID($sformatf("%s-tags", INSTANCE_ID)), .BANK_ID (BANK_ID), .CACHE_SIZE (CACHE_SIZE), .LINE_SIZE (LINE_SIZE), @@ -261,30 +303,37 @@ module VX_cache_bank #( .stall (pipe_stall), - // read/Fill + // init/fill/lookup/flush + .init (do_init_st0 || do_flush_st0), + .fill (do_fill_st0), .lookup (do_lookup_st0), .line_addr (addr_st0), - .fill (do_fill_st0), - .init (do_init_st0), - .way_sel (way_sel_st0), - .tag_matches(tag_matches_st0) + .tag_matches(tag_matches_st0), + + // replacement + .repl_way (repl_way_st0), + .repl_tag (repl_tag_st0) ); assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0; + assign way_sel_st0 = is_fill_st0 ? repl_way_st0 : (is_flush_st0 ? flush_way_st0 : tag_matches_st0); + + wire [`CS_LINE_ADDR_WIDTH-1:0] addr_r_st0 = (is_fill_st0 || is_flush_st0) ? {repl_tag_st0, addr_st0[`CS_LINE_SEL_BITS-1:0]} : addr_st0; + VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + NUM_WAYS + 1), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_st0, is_replay_st0, is_fill_st0, is_creq_st0, rw_st0, addr_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, tag_matches_st0, way_sel_st0, mshr_pending_st0}), - .data_out ({valid_st1, is_replay_st1, is_fill_st1, is_creq_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, tag_matches_st1, way_sel_st1, mshr_pending_st1}) + .data_in ({valid_st0, is_flush_st0, is_replay_st0, is_fill_st0, is_creq_st0, creq_flush_st0, rw_st0, addr_r_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, mshr_pending_st0}), + .data_out ({valid_st1, is_flush_st1, is_replay_st1, is_fill_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, mshr_pending_st1}) ); // we have a tag hit - wire is_hit_st1 = (| tag_matches_st1); + wire is_hit_st1 = (| way_sel_st1); if (UUID_WIDTH != 0) begin assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH]; @@ -292,37 +341,62 @@ module VX_cache_bank #( assign req_uuid_st1 = 0; end - wire do_creq_rd_st1 = valid_st1 && is_creq_st1 && ~rw_st1; - wire do_creq_wr_st1 = valid_st1 && is_creq_st1 && rw_st1; + wire is_read_st1 = is_creq_st1 && ~rw_st1; + wire is_write_st1 = is_creq_st1 && rw_st1; + wire do_creq_rd_st1 = valid_st1 && is_read_st1; + wire do_creq_wr_st1 = valid_st1 && is_write_st1; wire do_fill_st1 = valid_st1 && is_fill_st1; wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1; wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1; + wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1; + wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1; + wire do_read_hit_st1 = do_creq_rd_st1 && is_hit_st1; wire do_read_miss_st1 = do_creq_rd_st1 && ~is_hit_st1; wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1; wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1; + wire do_flush_st1 = valid_st1 && is_flush_st1; + `UNUSED_VAR (do_write_miss_st1) // ensure mshr replay always get a hit `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("runtime error: invalid mshr replay")); // detect BRAM's read-during-write hazard - assign rdw_hazard_st0 = do_fill_st0; // after a fill - always @(posedge clk) begin - rdw_hazard_st1 <= (do_creq_rd_st0 && do_write_hit_st1 && (addr_st0 == addr_st1)) - && ~rdw_hazard_st1; // after a write to same address + assign rdw_hazard_st0 = do_fill_st0; // stall cycle after a fill + wire rdw_case1 = do_cache_rd_st0 && do_cache_wr_st1 && (addr_st0 == addr_st1); // standard cache access + wire rdw_case2 = WRITEBACK && (do_flush_st0 || do_fill_st0) && do_cache_wr_st1; // a writeback can evict preceeding write + always @(posedge clk) begin // after a write to same address + rdw_hazard_st1 <= (rdw_case1 || rdw_case2) + && ~rdw_hazard_st1; // invalidate if pipeline stalled to avoid repeats end - wire [`CS_WORD_WIDTH-1:0] write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0]; + wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}}; wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1; + wire [LINE_SIZE-1:0] write_byteen_st1; + + wire [`CS_LINE_WIDTH-1:0] dirty_data_st1; + wire [LINE_SIZE-1:0] dirty_byteen_st1; + wire dirty_valid_st1; + + if (`CS_WORDS_PER_LINE > 1) begin + reg [LINE_SIZE-1:0] write_byteen_r; + always @(*) begin + write_byteen_r = '0; + write_byteen_r[wsel_st1 * WORD_SIZE +: WORD_SIZE] = byteen_st1; + end + assign write_byteen_st1 = write_byteen_r; + end else begin + assign write_byteen_st1 = byteen_st1; + end `RESET_RELAY (data_reset, reset); VX_cache_data #( - .INSTANCE_ID (INSTANCE_ID), + .INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)), .BANK_ID (BANK_ID), .CACHE_SIZE (CACHE_SIZE), .LINE_SIZE (LINE_SIZE), @@ -330,6 +404,7 @@ module VX_cache_bank #( .NUM_WAYS (NUM_WAYS), .WORD_SIZE (WORD_SIZE), .WRITE_ENABLE (WRITE_ENABLE), + .WRITEBACK (WRITEBACK), .UUID_WIDTH (UUID_WIDTH) ) cache_data ( .clk (clk), @@ -339,23 +414,38 @@ module VX_cache_bank #( .stall (pipe_stall), - .read (do_read_hit_st1 || do_replay_rd_st1), - .fill (do_fill_st1), - .write (do_write_hit_st1 || do_replay_wr_st1), - .way_sel (way_sel_st1 | tag_matches_st1), + .read (do_cache_rd_st1), + .fill (do_fill_st1 && ~rdw_hazard_st1), + .flush (do_flush_st1), + .write (do_cache_wr_st1), + .way_sel (way_sel_st1), .line_addr (addr_st1), .wsel (wsel_st1), - .byteen (byteen_st1), .fill_data (fill_data_st1), .write_data (write_data_st1), - .read_data (read_data_st1) + .write_byteen(write_byteen_st1), + .read_data (read_data_st1), + .dirty_valid(dirty_valid_st1), + .dirty_data (dirty_data_st1), + .dirty_byteen(dirty_byteen_st1) ); - wire [MSHR_SIZE-1:0] mshr_matches_st0; + wire [MSHR_SIZE-1:0] mshr_lookup_pending_st0; + wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0; wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall; wire mshr_lookup_st0 = mshr_allocate_st0; wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall; - wire mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1); + + // release allocated mshr entry if we had a hit + wire mshr_release_st1; + if (WRITEBACK) begin + assign mshr_release_st1 = is_hit_st1; + end else begin + // we need to keep missed write requests in MSHR if there is already a pending entry to the same address + // this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content + // this can happen when writes are sent late, when the fill was already in flight. + assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1); + end VX_pending_size #( .SIZE (MSHR_SIZE) @@ -364,15 +454,17 @@ module VX_cache_bank #( .reset (reset), .incr (core_req_fire), .decr (replay_fire || (mshr_finalize_st1 && mshr_release_st1)), + .empty (mshr_empty), + `UNUSED_PIN (alm_empty), .full (mshr_alm_full), - `UNUSED_PIN (size), - `UNUSED_PIN (empty) + `UNUSED_PIN (alm_full), + `UNUSED_PIN (size) ); `RESET_RELAY (mshr_reset, reset); VX_cache_mshr #( - .INSTANCE_ID (INSTANCE_ID), + .INSTANCE_ID ($sformatf("%s-mshr", INSTANCE_ID)), .BANK_ID (BANK_ID), .LINE_SIZE (LINE_SIZE), .NUM_BANKS (NUM_BANKS), @@ -412,7 +504,8 @@ module VX_cache_bank #( // lookup .lookup_valid (mshr_lookup_st0), .lookup_addr (addr_st0), - .lookup_matches (mshr_matches_st0), + .lookup_pending (mshr_lookup_pending_st0), + .lookup_rw (mshr_lookup_rw_st0), // finalize .finalize_valid (mshr_finalize_st1), @@ -422,10 +515,12 @@ module VX_cache_bank #( .finalize_prev (mshr_prev_st1) ); - // ignore allocated id from mshr matches + // check if there are pending requests to same line in the MSHR wire [MSHR_SIZE-1:0] lookup_matches; for (genvar i = 0; i < MSHR_SIZE; ++i) begin - assign lookup_matches[i] = (i != mshr_alloc_id_st0) && mshr_matches_st0[i]; + assign lookup_matches[i] = mshr_lookup_pending_st0[i] + && (i != mshr_alloc_id_st0) // exclude current mshr id + && (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough end assign mshr_pending_st0 = (| lookup_matches); @@ -436,7 +531,7 @@ module VX_cache_bank #( wire [REQ_SEL_WIDTH-1:0] crsp_queue_idx; wire [TAG_WIDTH-1:0] crsp_queue_tag; - assign crsp_queue_valid = do_read_hit_st1 || do_replay_rd_st1; + assign crsp_queue_valid = do_cache_rd_st1; assign crsp_queue_idx = req_idx_st1; assign crsp_queue_data = read_data_st1; assign crsp_queue_tag = tag_st1; @@ -463,29 +558,40 @@ module VX_cache_bank #( // schedule memory request wire mreq_queue_push, mreq_queue_pop, mreq_queue_empty; - wire [`CS_WORD_WIDTH-1:0] mreq_queue_data; - wire [WORD_SIZE-1:0] mreq_queue_byteen; - wire [WORD_SEL_WIDTH-1:0] mreq_queue_wsel; + wire [`CS_LINE_WIDTH-1:0] mreq_queue_data; + wire [LINE_SIZE-1:0] mreq_queue_byteen; wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr; wire [MSHR_ADDR_WIDTH-1:0] mreq_queue_id; wire mreq_queue_rw; + wire mreq_queue_flush; - assign mreq_queue_push = (do_read_miss_st1 && ~mshr_pending_st1) - || do_creq_wr_st1; + wire is_evict_st1 = (is_fill_st1 || is_flush_st1) && dirty_valid_st1; + wire do_writeback_st1 = valid_st1 && is_evict_st1; + `UNUSED_VAR (do_writeback_st1) + + if (WRITEBACK) begin + assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1) + || do_writeback_st1) + && ~rdw_hazard_st1; + end else begin + `UNUSED_VAR (dirty_valid_st1) + assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1) + || do_creq_wr_st1) + && ~rdw_hazard_st1; + end assign mreq_queue_pop = mem_req_valid && mem_req_ready; - - assign mreq_queue_rw = WRITE_ENABLE && rw_st1; + assign mreq_queue_rw = WRITE_ENABLE && (WRITEBACK ? is_evict_st1 : rw_st1); assign mreq_queue_addr = addr_st1; assign mreq_queue_id = mshr_id_st1; - assign mreq_queue_wsel = wsel_st1; - assign mreq_queue_byteen = byteen_st1; - assign mreq_queue_data = write_data_st1; + assign mreq_queue_data = is_write_st1 ? write_data_st1 : dirty_data_st1; + assign mreq_queue_byteen = is_write_st1 ? write_byteen_st1 : dirty_byteen_st1; + assign mreq_queue_flush = creq_flush_st1; `RESET_RELAY (mreq_queue_reset, reset); VX_fifo_queue #( - .DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + `CS_WORD_WIDTH), + .DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1), .DEPTH (MREQ_SIZE), .ALM_FULL (MREQ_SIZE-PIPELINE_STAGES), .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) @@ -494,8 +600,8 @@ module VX_cache_bank #( .reset (mreq_queue_reset), .push (mreq_queue_push), .pop (mreq_queue_pop), - .data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_wsel, mreq_queue_data}), - .data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_wsel, mem_req_data}), + .data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_data, mreq_queue_flush}), + .data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_data, mem_req_flush}), .empty (mreq_queue_empty), .alm_full (mreq_queue_alm_full), `UNUSED_PIN (full), @@ -515,35 +621,34 @@ module VX_cache_bank #( `ifdef DBG_TRACE_CACHE wire crsp_queue_fire = crsp_queue_valid && crsp_queue_ready; - wire pipeline_stall = (replay_valid || mem_rsp_valid || core_req_valid) - && ~(replay_fire || mem_rsp_fire || core_req_fire); + wire pipeline_stall = (replay_valid || mem_rsp_valid || core_req_valid || line_flush_valid) + && ~(replay_fire || mem_rsp_fire || core_req_fire || line_flush_valid); always @(posedge clk) begin if (pipeline_stall) begin - `TRACE(3, ("%d: *** %s-bank%0d stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID, BANK_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full)); - end - if (init_enable) begin - `TRACE(2, ("%d: %s-bank%0d init: addr=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(init_line_sel, BANK_ID))); + `TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw_st0=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard_st0)); end if (mem_rsp_fire) begin - `TRACE(2, ("%d: %s-bank%0d fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data)); + `TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data)); end if (replay_fire) begin - `TRACE(2, ("%d: %s-bank%0d mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)); + `TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)); end if (core_req_fire) begin if (core_req_rw) - `TRACE(2, ("%d: %s-bank%0d core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)); + `TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)); else - `TRACE(2, ("%d: %s-bank%0d core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)); + `TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)); end if (crsp_queue_fire) begin - `TRACE(2, ("%d: %s-bank%0d core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)); + `TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)); end if (mreq_queue_push) begin - if (do_creq_wr_st1) - `TRACE(2, ("%d: %s-bank%0d writethrough: addr=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)); + if (do_creq_wr_st1 && !WRITEBACK) + `TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)); + else if (do_writeback_st1) + `TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=%b, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)); else - `TRACE(2, ("%d: %s-bank%0d fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)); + `TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)); end end `endif diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 6b211830f..379d33e8a 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,16 +18,16 @@ module VX_cache_bypass #( parameter TAG_SEL_IDX = 0, parameter PASSTHRU = 0, - parameter NC_ENABLE = 0, + parameter NC_ENABLE = 0, parameter WORD_SIZE = 1, - parameter LINE_SIZE = 1, + parameter LINE_SIZE = 1, parameter CORE_ADDR_WIDTH = 1, - + parameter CORE_TAG_WIDTH = 1, - - parameter MEM_ADDR_WIDTH = 1, + + parameter MEM_ADDR_WIDTH = 1, parameter MEM_TAG_IN_WIDTH = 1, parameter MEM_TAG_OUT_WIDTH = 1, @@ -35,9 +35,9 @@ module VX_cache_bypass #( parameter CORE_OUT_BUF = 0, parameter MEM_OUT_BUF = 0, - + parameter CORE_DATA_WIDTH = WORD_SIZE * 8 - ) ( + ) ( input wire clk, input wire reset, @@ -71,40 +71,39 @@ module VX_cache_bypass #( wire core_req_nc_valid; wire [NUM_REQS-1:0] core_req_nc_valids; - wire [NUM_REQS-1:0] core_req_nc_idxs; + wire [NUM_REQS-1:0] core_req_nc_idxs; wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx; - wire [NUM_REQS-1:0] core_req_nc_sel; + wire [NUM_REQS-1:0] core_req_nc_sel; wire core_req_nc_ready; - - for (genvar i = 0; i < NUM_REQS; ++i) begin + + for (genvar i = 0; i < NUM_REQS; ++i) begin if (PASSTHRU != 0) begin assign core_req_nc_idxs[i] = 1'b1; end else if (NC_ENABLE) begin assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO]; end else begin assign core_req_nc_idxs[i] = 1'b0; - end + end assign core_req_nc_valids[i] = core_bus_in_if[i].req_valid && core_req_nc_idxs[i]; - end + end VX_generic_arbiter #( .NUM_REQS (NUM_REQS), - .TYPE (PASSTHRU ? "R" : "P"), - .LOCK_ENABLE (1) + .TYPE (PASSTHRU ? "R" : "P") ) core_req_nc_arb ( .clk (clk), - .reset (reset), - .requests (core_req_nc_valids), + .reset (reset), + .requests (core_req_nc_valids), .grant_index (core_req_nc_idx), .grant_onehot (core_req_nc_sel), .grant_valid (core_req_nc_valid), - .grant_unlock (core_req_nc_ready) + .grant_ready (core_req_nc_ready) ); for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && ~core_req_nc_idxs[i]; assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data; - assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i]) + assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i]) : core_bus_out_if[i].req_ready; end @@ -118,7 +117,7 @@ module VX_cache_bypass #( wire [`CS_LINE_WIDTH-1:0] mem_req_out_data; wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag; wire mem_req_out_ready; - + wire core_req_nc_sel_rw; wire [WORD_SIZE-1:0] core_req_nc_sel_byteen; wire [CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr; @@ -129,22 +128,22 @@ module VX_cache_bypass #( wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in; for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_req_nc_mux_in[i] = { - core_bus_in_if[i].req_data.rw, + core_bus_in_if[i].req_data.rw, core_bus_in_if[i].req_data.byteen, core_bus_in_if[i].req_data.addr, core_bus_in_if[i].req_data.atype, core_bus_in_if[i].req_data.data, - core_bus_in_if[i].req_data.tag + core_bus_in_if[i].req_data.tag }; end - + assign { core_req_nc_sel_rw, core_req_nc_sel_byteen, core_req_nc_sel_addr, core_req_nc_sel_atype, core_req_nc_sel_data, - core_req_nc_sel_tag + core_req_nc_sel_tag } = core_req_nc_mux_in[core_req_nc_idx]; assign core_req_nc_ready = ~mem_bus_in_if.req_valid && mem_req_out_ready; @@ -157,11 +156,11 @@ module VX_cache_bypass #( wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass; wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0]; - + if (WORDS_PER_LINE > 1) begin reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_r; reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r; - + wire [WSEL_BITS-1:0] req_wsel = core_req_nc_sel_addr[WSEL_BITS-1:0]; always @(*) begin @@ -176,7 +175,7 @@ module VX_cache_bypass #( assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_r; if (NUM_REQS > 1) begin assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id}); - end else begin + end else begin assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id}); end end else begin @@ -189,7 +188,7 @@ module VX_cache_bypass #( end end - wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass; + wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass; if (UUID_WIDTH != 0) begin assign mem_req_tag_bypass = {core_req_nc_sel_tag[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass}; @@ -202,7 +201,7 @@ module VX_cache_bypass #( `UNUSED_VAR (mem_bus_in_if.req_data.tag) end else begin if (NC_ENABLE) begin - VX_bits_insert #( + VX_bits_insert #( .N (MEM_TAG_OUT_WIDTH-1), .S (1), .POS (TAG_SEL_IDX) @@ -213,8 +212,8 @@ module VX_cache_bypass #( ); end else begin assign mem_req_out_tag = mem_bus_in_if.req_data.tag; - end - end + end + end assign mem_bus_in_if.req_ready = mem_req_out_ready; @@ -225,11 +224,11 @@ module VX_cache_bypass #( ) mem_req_buf ( .clk (clk), .reset (reset), - .valid_in (mem_req_out_valid), - .ready_in (mem_req_out_ready), + .valid_in (mem_req_out_valid), + .ready_in (mem_req_out_ready), .data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}), .data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.atype, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}), - .valid_out (mem_bus_out_if.req_valid), + .valid_out (mem_bus_out_if.req_valid), .ready_out (mem_bus_out_if.req_ready) ); @@ -253,7 +252,7 @@ module VX_cache_bypass #( wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc; - VX_bits_remove #( + VX_bits_remove #( .N (MEM_TAG_OUT_WIDTH), .S (NC_ENABLE), .POS (TAG_SEL_IDX) @@ -265,10 +264,10 @@ module VX_cache_bypass #( wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx; if (NUM_REQS > 1) begin assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS]; - end else begin + end else begin assign rsp_idx = 1'b0; end - + reg [NUM_REQS-1:0] rsp_nc_valid_r; always @(*) begin rsp_nc_valid_r = '0; @@ -277,13 +276,13 @@ module VX_cache_bypass #( for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i]; - assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; + assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; end - + if (WORDS_PER_LINE > 1) begin - wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS]; + wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS]; for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? + assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.data : mem_bus_out_if.rsp_data.data[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH]; end end else begin @@ -306,7 +305,7 @@ module VX_cache_bypass #( assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.tag : mem_rsp_tag_in_nc2; end else begin assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_data.tag; - end + end end for (genvar i = 0; i < NUM_REQS; ++i) begin @@ -320,7 +319,7 @@ module VX_cache_bypass #( .valid_in (core_rsp_in_valid[i]), .ready_in (core_rsp_in_ready[i]), .data_in ({core_rsp_in_data[i], core_rsp_in_tag[i]}), - .data_out ({core_bus_in_if[i].rsp_data.data, core_bus_in_if[i].rsp_data.tag}), + .data_out ({core_bus_in_if[i].rsp_data.data, core_bus_in_if[i].rsp_data.tag}), .valid_out (core_bus_in_if[i].rsp_valid), .ready_out (core_bus_in_if[i].rsp_ready) ); @@ -341,7 +340,7 @@ module VX_cache_bypass #( assign mem_bus_in_if.rsp_data.data = mem_bus_out_if.rsp_data.data; assign mem_bus_in_if.rsp_data.tag = mem_rsp_tag_id_nc; end - + wire [NUM_REQS-1:0] core_rsp_out_valid; for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_rsp_out_valid[i] = core_bus_out_if[i].rsp_valid; diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index a132f9b67..c567ddbc5 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,20 +24,20 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( parameter NUM_REQS = 4, // Size of cache in bytes - parameter CACHE_SIZE = 16384, + parameter CACHE_SIZE = 16384, // Size of line inside a bank in bytes - parameter LINE_SIZE = 64, + parameter LINE_SIZE = 64, // Number of banks parameter NUM_BANKS = 1, // Number of associative ways parameter NUM_WAYS = 4, // Size of a word in bytes - parameter WORD_SIZE = 4, + parameter WORD_SIZE = 4, // Core Response Queue Size parameter CRSQ_SIZE = 2, // Miss Reserv Queue Knob - parameter MSHR_SIZE = 8, + parameter MSHR_SIZE = 8, // Memory Response Queue Size parameter MRSQ_SIZE = 0, // Memory Request Queue Size @@ -46,6 +46,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( // Enable cache writeable parameter WRITE_ENABLE = 1, + // Enable cache writeback + parameter WRITEBACK = 0, + // Request debug identifier parameter UUID_WIDTH = 0, @@ -60,7 +63,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( // Memory request output buffer parameter MEM_OUT_BUF = 0 - ) ( + ) ( input wire clk, input wire reset, @@ -74,17 +77,16 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( ); localparam NUM_CACHES = `UP(NUM_UNITS); localparam PASSTHRU = (NUM_UNITS == 0); - localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES); - localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) : + localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES); + localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) : (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) : `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)); `STATIC_ASSERT(NUM_INPUTS >= NUM_CACHES, ("invalid parameter")) `ifdef PERF_ENABLE - cache_perf_t perf_cache_tmp[1], perf_cache_unit[NUM_CACHES]; - `PERF_CACHE_ADD (perf_cache_tmp, perf_cache_unit, 1, NUM_CACHES) - assign cache_perf = perf_cache_tmp[0]; + cache_perf_t perf_cache_unit[NUM_CACHES]; + `PERF_CACHE_ADD (cache_perf, perf_cache_unit, NUM_CACHES) `endif VX_mem_bus_if #( @@ -97,8 +99,6 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .TAG_WIDTH (ARB_TAG_WIDTH) ) arb_core_bus_if[NUM_CACHES * NUM_REQS](); - `RESET_RELAY (arb_reset, reset); - for (genvar i = 0; i < NUM_REQS; ++i) begin VX_mem_bus_if #( .DATA_SIZE (WORD_SIZE), @@ -114,6 +114,8 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( `ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]); end + `RESET_RELAY (arb_reset, reset); + VX_mem_arb #( .NUM_INPUTS (NUM_INPUTS), .NUM_OUTPUTS (NUM_CACHES), @@ -135,9 +137,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( end end - `RESET_RELAY (cache_reset, reset); + for (genvar i = 0; i < NUM_CACHES; ++i) begin : caches - for (genvar i = 0; i < NUM_CACHES; ++i) begin + `RESET_RELAY (cache_reset, reset); VX_cache_wrap #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)), @@ -152,6 +154,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .MRSQ_SIZE (MRSQ_SIZE), .MREQ_SIZE (MREQ_SIZE), .WRITE_ENABLE (WRITE_ENABLE), + .WRITEBACK (WRITEBACK), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (ARB_TAG_WIDTH), .TAG_SEL_IDX (TAG_SEL_IDX), diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 08df15e4c..6bf8f1c3e 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,17 +17,19 @@ module VX_cache_data #( parameter `STRING INSTANCE_ID= "", parameter BANK_ID = 0, // Size of cache in bytes - parameter CACHE_SIZE = 1024, + parameter CACHE_SIZE = 1024, // Size of line inside a bank in bytes - parameter LINE_SIZE = 16, + parameter LINE_SIZE = 16, // Number of banks - parameter NUM_BANKS = 1, + parameter NUM_BANKS = 1, // Number of associative ways parameter NUM_WAYS = 1, // Size of a word in bytes parameter WORD_SIZE = 1, // Enable cache writeable parameter WRITE_ENABLE = 1, + // Enable cache writeback + parameter WRITEBACK = 0, // Request debug identifier parameter UUID_WIDTH = 0 ) ( @@ -41,59 +43,100 @@ module VX_cache_data #( input wire stall, input wire read, - input wire fill, + input wire fill, + input wire flush, input wire write, input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, input wire [`UP(`CS_WORD_SEL_BITS)-1:0] wsel, - input wire [WORD_SIZE-1:0] byteen, input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data, - input wire [`CS_WORD_WIDTH-1:0] write_data, + input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] write_data, + input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen, input wire [NUM_WAYS-1:0] way_sel, - - output wire [`CS_WORD_WIDTH-1:0] read_data + output wire [`CS_WORD_WIDTH-1:0] read_data, + output wire dirty_valid, + output wire [`CS_LINE_WIDTH-1:0] dirty_data, + output wire [LINE_SIZE-1:0] dirty_byteen ); `UNUSED_SPARAM (INSTANCE_ID) `UNUSED_PARAM (BANK_ID) `UNUSED_PARAM (WORD_SIZE) `UNUSED_VAR (reset) + `UNUSED_VAR (stall) `UNUSED_VAR (line_addr) `UNUSED_VAR (read) + `UNUSED_VAR (flush) localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1; + wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0]; + wire [`LOG2UP(NUM_WAYS)-1:0] way_idx; + + if (WRITEBACK) begin + reg [`CS_LINES_PER_BANK * NUM_WAYS-1:0][LINE_SIZE-1:0] dirty_bytes_r; + reg [`CS_LINES_PER_BANK * NUM_WAYS-1:0] dirty_blocks_r; + + wire [`CLOG2(`CS_LINES_PER_BANK * NUM_WAYS)-1:0] way_addr; + if (NUM_WAYS > 1) begin + assign way_addr = {line_sel, way_idx}; + end else begin + assign way_addr = line_sel; + end + + always @(posedge clk) begin + if (fill) begin + dirty_bytes_r[way_addr] <= '0; + end else if (write) begin + dirty_bytes_r[way_addr] <= dirty_bytes_r[way_addr] | write_byteen; + end + end + + always @(posedge clk) begin + if (reset) begin + for (integer i = 0; i < `CS_LINES_PER_BANK * NUM_WAYS; ++i) begin + dirty_blocks_r[i] <= 0; + end + end else begin + if (fill) begin + dirty_blocks_r[way_addr] <= 0; + end else if (write) begin + dirty_blocks_r[way_addr] <= 1; + end + end + end + + assign dirty_byteen = dirty_bytes_r[way_addr]; + assign dirty_valid = dirty_blocks_r[way_addr]; + end else begin + assign dirty_byteen = '0; + assign dirty_valid = 0; + end + + // order the data layout to perform ways multiplexing last. + // this allows converting way index to binary in parallel with BRAM read. + wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] wdata; wire [BYTEENW-1:0] wren; if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin - reg [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] wdata_r; - reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_r; - - always @(*) begin - wdata_r = {`CS_WORDS_PER_LINE{write_data}}; - wren_r = '0; - wren_r[wsel] = byteen; + for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin + assign wdata[i] = (fill || !WRITE_ENABLE) ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{write_data[i]}}; end - - // order the data layout to perform ways multiplexing last - // this allows performing onehot encoding of the way index in parallel with BRAM read. + wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w; for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin - assign wdata[i] = fill ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{wdata_r[i]}}; for (genvar j = 0; j < NUM_WAYS; ++j) begin - assign wren_w[i][j] = (fill ? {WORD_SIZE{1'b1}} : wren_r[i]) - & {WORD_SIZE{((NUM_WAYS == 1) || way_sel[j])}}; + assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i]) + & {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}}; end end assign wren = wren_w; end else begin `UNUSED_VAR (write) - `UNUSED_VAR (byteen) + `UNUSED_VAR (write_byteen) `UNUSED_VAR (write_data) assign wdata = fill_data; assign wren = fill; end - - wire [`LOG2UP(NUM_WAYS)-1:0] way_idx; VX_onehot_encoder #( .N (NUM_WAYS) @@ -105,8 +148,6 @@ module VX_cache_data #( wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] rdata; - wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0]; - VX_sp_ram #( .DATAW (`CS_LINE_WIDTH * NUM_WAYS), .SIZE (`CS_LINES_PER_BANK), @@ -119,34 +160,41 @@ module VX_cache_data #( .wren (wren), .addr (line_sel), .wdata (wdata), - .rdata (rdata) + .rdata (rdata) ); wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata; - if (`CS_WORDS_PER_LINE > 1) begin assign per_way_rdata = rdata[wsel]; end else begin `UNUSED_VAR (wsel) assign per_way_rdata = rdata; - end - + end assign read_data = per_way_rdata[way_idx]; - `UNUSED_VAR (stall) + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] dirty_data_w; + for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin + for (genvar j = 0; j < NUM_WAYS; ++j) begin + assign dirty_data_w[j][i] = rdata[i][j]; + end + end + assign dirty_data = dirty_data_w[way_idx]; `ifdef DBG_TRACE_CACHE - always @(posedge clk) begin + always @(posedge clk) begin if (fill && ~stall) begin - `TRACE(3, ("%d: %s-bank%0d data-fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)); + `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)); + end + if (flush && ~stall) begin + `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b, byteen=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_valid, dirty_byteen)); end if (read && ~stall) begin - `TRACE(3, ("%d: %s-bank%0d data-read: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, read_data, req_uuid)); - end + `TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid)); + end if (write && ~stall) begin - `TRACE(3, ("%d: %s-bank%0d data-write: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, byteen, write_data, req_uuid)); - end - end + `TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)); + end + end `endif endmodule diff --git a/hw/rtl/cache/VX_cache_define.vh b/hw/rtl/cache/VX_cache_define.vh index a9c10f3fb..e6d7da167 100644 --- a/hw/rtl/cache/VX_cache_define.vh +++ b/hw/rtl/cache/VX_cache_define.vh @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,7 +14,7 @@ `ifndef VX_CACHE_DEFINE_VH `define VX_CACHE_DEFINE_VH -`include "VX_define.vh" +`include "VX_define.vh" `define CS_REQ_SEL_BITS `CLOG2(NUM_REQS) @@ -50,7 +50,7 @@ `define CS_TAG_SEL_ADDR_START (1+`CS_LINE_SEL_ADDR_END) `define CS_TAG_SEL_ADDR_END (`CS_WORD_ADDR_WIDTH-1) -`define CS_LINE_TAG_ADDR(x) x[`CS_LINE_ADDR_WIDTH-1 : `CS_LINE_SEL_BITS] +`define CS_LINE_ADDR_TAG(x) x[`CS_LINE_ADDR_WIDTH-1 : `CS_LINE_SEL_BITS] /////////////////////////////////////////////////////////////////////////////// @@ -64,14 +64,14 @@ /////////////////////////////////////////////////////////////////////////////// -`define PERF_CACHE_ADD(dst, src, dcount, scount) \ - `PERF_COUNTER_ADD (dst, src, reads, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \ - `PERF_COUNTER_ADD (dst, src, writes, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \ - `PERF_COUNTER_ADD (dst, src, read_misses, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \ - `PERF_COUNTER_ADD (dst, src, write_misses, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \ - `PERF_COUNTER_ADD (dst, src, bank_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \ - `PERF_COUNTER_ADD (dst, src, mshr_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \ - `PERF_COUNTER_ADD (dst, src, mem_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) \ - `PERF_COUNTER_ADD (dst, src, crsp_stalls, `PERF_CTR_BITS, dcount, scount, (`CDIV(scount, dcount) > 1)) +`define PERF_CACHE_ADD(dst, src, count) \ + `PERF_COUNTER_ADD (dst, src, reads, `PERF_CTR_BITS, count, (count > 1)) \ + `PERF_COUNTER_ADD (dst, src, writes, `PERF_CTR_BITS, count, (count > 1)) \ + `PERF_COUNTER_ADD (dst, src, read_misses, `PERF_CTR_BITS, count, (count > 1)) \ + `PERF_COUNTER_ADD (dst, src, write_misses, `PERF_CTR_BITS, count, (count > 1)) \ + `PERF_COUNTER_ADD (dst, src, bank_stalls, `PERF_CTR_BITS, count, (count > 1)) \ + `PERF_COUNTER_ADD (dst, src, mshr_stalls, `PERF_CTR_BITS, count, (count > 1)) \ + `PERF_COUNTER_ADD (dst, src, mem_stalls, `PERF_CTR_BITS, count, (count > 1)) \ + `PERF_COUNTER_ADD (dst, src, crsp_stalls, `PERF_CTR_BITS, count, (count > 1)) `endif // VX_CACHE_DEFINE_VH diff --git a/hw/rtl/cache/VX_cache_flush.sv b/hw/rtl/cache/VX_cache_flush.sv new file mode 100644 index 000000000..7c46a48f0 --- /dev/null +++ b/hw/rtl/cache/VX_cache_flush.sv @@ -0,0 +1,154 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_cache_define.vh" + +module VX_cache_flush #( + // Number of Word requests per cycle + parameter NUM_REQS = 4, + // Number of banks + parameter NUM_BANKS = 1, + // Bank select latency + parameter BANK_SEL_LATENCY = 1 +) ( + input wire clk, + input wire reset, + VX_mem_bus_if.slave core_bus_in_if [NUM_REQS], + VX_mem_bus_if.master core_bus_out_if [NUM_REQS], + input wire [NUM_BANKS-1:0] bank_req_fire, + output wire [NUM_BANKS-1:0] flush_valid, + input wire [NUM_BANKS-1:0] flush_ready +); + localparam STATE_IDLE = 0; + localparam STATE_WAIT = 1; + localparam STATE_FLUSH = 2; + localparam STATE_DONE = 3; + + // track in-flight core requests + + wire no_inflight_reqs; + + if (BANK_SEL_LATENCY != 0) begin + + localparam NUM_REQS_W = `CLOG2(NUM_REQS+1); + localparam NUM_BANKS_W = `CLOG2(NUM_BANKS+1); + + wire [NUM_REQS-1:0] core_bus_out_fire; + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_bus_out_fire[i] = core_bus_out_if[i].req_valid && core_bus_out_if[i].req_ready; + end + + wire [NUM_REQS_W-1:0] core_bus_out_cnt; + wire [NUM_BANKS_W-1:0] bank_req_cnt; + + `POP_COUNT(core_bus_out_cnt, core_bus_out_fire); + `POP_COUNT(bank_req_cnt, bank_req_fire); + `UNUSED_VAR (core_bus_out_cnt) + + VX_pending_size #( + .SIZE (BANK_SEL_LATENCY * NUM_BANKS), + .INCRW (NUM_BANKS_W), + .DECRW (NUM_BANKS_W) + ) pending_size ( + .clk (clk), + .reset (reset), + .incr (NUM_BANKS_W'(core_bus_out_cnt)), + .decr (bank_req_cnt), + .empty (no_inflight_reqs), + `UNUSED_PIN (alm_empty), + `UNUSED_PIN (full), + `UNUSED_PIN (alm_full), + `UNUSED_PIN (size) + ); + + end else begin + assign no_inflight_reqs = 0; + `UNUSED_VAR (bank_req_fire) + end + + + reg [1:0] state, state_n; + reg [NUM_BANKS-1:0] flush_done, flush_done_n; + + wire [NUM_REQS-1:0] flush_req_mask; + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.atype[`ADDR_TYPE_FLUSH]; + end + wire flush_req_enable = (| flush_req_mask); + + reg [NUM_REQS-1:0] lock_released, lock_released_n; + + for (genvar i = 0; i < NUM_REQS; ++i) begin + wire input_enable = ~flush_req_enable || lock_released[i]; + assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && input_enable; + assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data; + assign core_bus_in_if[i].req_ready = core_bus_out_if[i].req_ready && input_enable; + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_bus_in_if[i].rsp_valid = core_bus_out_if[i].rsp_valid; + assign core_bus_in_if[i].rsp_data = core_bus_out_if[i].rsp_data; + assign core_bus_out_if[i].rsp_ready = core_bus_in_if[i].rsp_ready; + end + + wire [NUM_REQS-1:0] core_bus_out_ready; + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_bus_out_ready[i] = core_bus_out_if[i].req_ready; + end + + always @(*) begin + state_n = state; + flush_done_n = flush_done; + lock_released_n = lock_released; + case (state) + STATE_IDLE: begin + if (flush_req_enable) begin + state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT : STATE_FLUSH; + end + end + STATE_WAIT: begin + if (no_inflight_reqs) begin + state_n = STATE_FLUSH; + end + end + STATE_FLUSH: begin + flush_done_n = flush_done | flush_ready; + if (flush_done_n == 0) begin + state_n = STATE_DONE; + lock_released_n = flush_req_mask; + end + end + STATE_DONE: begin + lock_released_n = lock_released & ~core_bus_out_ready; + if (lock_released_n == 0) begin + state_n = STATE_IDLE; + end + end + endcase + end + + always @(posedge clk) begin + if (reset) begin + state <= STATE_IDLE; + flush_done <= '0; + lock_released <= '0; + end else begin + state <= state_n; + flush_done <= flush_done_n; + lock_released <= lock_released_n; + end + end + + assign flush_valid = {NUM_BANKS{state == STATE_FLUSH}}; + +endmodule diff --git a/hw/rtl/cache/VX_cache_init.sv b/hw/rtl/cache/VX_cache_init.sv index 7aa4b3ae4..3cccdcdae 100644 --- a/hw/rtl/cache/VX_cache_init.sv +++ b/hw/rtl/cache/VX_cache_init.sv @@ -13,6 +13,7 @@ `include "VX_cache_define.vh" +// cache flush unit module VX_cache_init #( // Size of cache in bytes parameter CACHE_SIZE = 1024, diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index 912bd4d7f..b0e577283 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -104,7 +104,8 @@ module VX_cache_mshr #( // lookup input wire lookup_valid, input wire [`CS_LINE_ADDR_WIDTH-1:0] lookup_addr, - output wire [MSHR_SIZE-1:0] lookup_matches, + output wire [MSHR_SIZE-1:0] lookup_pending, + output wire [MSHR_SIZE-1:0] lookup_rw, // finalize input wire finalize_valid, @@ -216,13 +217,13 @@ module VX_cache_mshr #( next_table <= next_table_n; end - `RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s-bank%0d inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, BANK_ID, + `RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, lkp_req_uuid)) - `RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s-bank%0d invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, BANK_ID, + `RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid)) - `RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s-bank%0d invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID, BANK_ID, + `RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id)) VX_dp_ram #( @@ -251,7 +252,9 @@ module VX_cache_mshr #( assign dequeue_rw = write_table[dequeue_id_r]; assign dequeue_id = dequeue_id_r; - assign lookup_matches = addr_matches & ~write_table; + // return pending entries for the given cache line + assign lookup_pending = addr_matches; + assign lookup_rw = write_table; `UNUSED_VAR (lookup_valid) @@ -264,22 +267,22 @@ module VX_cache_mshr #( show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire; end if (allocate_fire) - `TRACE(3, ("%d: %s-bank%0d mshr-allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, + `TRACE(3, ("%d: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid)); if (lookup_valid) - `TRACE(3, ("%d: %s-bank%0d mshr-lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, BANK_ID, - `CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_matches, lkp_req_uuid)); + `TRACE(3, ("%d: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid)); if (finalize_valid) - `TRACE(3, ("%d: %s-bank%0d mshr-finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, + `TRACE(3, ("%d: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid)); if (fill_valid) - `TRACE(3, ("%d: %s-bank%0d mshr-fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, BANK_ID, + `TRACE(3, ("%d: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id)); if (dequeue_fire) - `TRACE(3, ("%d: %s-bank%0d mshr-dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, + `TRACE(3, ("%d: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid)); if (show_table) begin - `TRACE(3, ("%d: %s-bank%0d mshr-table", $time, INSTANCE_ID, BANK_ID)); + `TRACE(3, ("%d: %s table", $time, INSTANCE_ID)); for (integer i = 0; i < MSHR_SIZE; ++i) begin if (valid_table[i]) begin `TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID))); diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 274f55136..4595bdbcf 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,15 +17,15 @@ module VX_cache_tags #( parameter `STRING INSTANCE_ID = "", parameter BANK_ID = 0, // Size of cache in bytes - parameter CACHE_SIZE = 1024, + parameter CACHE_SIZE = 1024, // Size of line inside a bank in bytes - parameter LINE_SIZE = 16, + parameter LINE_SIZE = 16, // Number of banks - parameter NUM_BANKS = 1, + parameter NUM_BANKS = 1, // Number of associative ways - parameter NUM_WAYS = 1, + parameter NUM_WAYS = 1, // Size of a word in bytes - parameter WORD_SIZE = 1, + parameter WORD_SIZE = 1, // Request debug identifier parameter UUID_WIDTH = 0 ) ( @@ -38,45 +38,63 @@ module VX_cache_tags #( input wire stall, - // read/fill + // init/fill/lookup + input wire init, + input wire fill, input wire lookup, input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, - input wire fill, - input wire init, - output wire [NUM_WAYS-1:0] way_sel, - output wire [NUM_WAYS-1:0] tag_matches + output wire [NUM_WAYS-1:0] tag_matches, + + // replacement + output wire [NUM_WAYS-1:0] repl_way, + output wire [`CS_TAG_SEL_BITS-1:0] repl_tag ); `UNUSED_SPARAM (INSTANCE_ID) `UNUSED_PARAM (BANK_ID) `UNUSED_VAR (reset) `UNUSED_VAR (lookup) + // valid, tag localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS; wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0]; - wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_TAG_ADDR(line_addr); + wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr); + + wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag; + wire [NUM_WAYS-1:0] read_valid; if (NUM_WAYS > 1) begin - reg [NUM_WAYS-1:0] repl_way; + reg [NUM_WAYS-1:0] repl_way_r; // cyclic assignment of replacement way always @(posedge clk) begin if (reset) begin - repl_way <= 1; + repl_way_r <= 1; end else if (~stall) begin // hold the value on stalls prevent filling different slots twice - repl_way <= {repl_way[NUM_WAYS-2:0], repl_way[NUM_WAYS-1]}; + repl_way_r <= {repl_way_r[NUM_WAYS-2:0], repl_way_r[NUM_WAYS-1]}; end - end - for (genvar i = 0; i < NUM_WAYS; ++i) begin - assign way_sel[i] = fill && repl_way[i]; end + + assign repl_way = repl_way_r; + + VX_onehot_mux #( + .DATAW (`CS_TAG_SEL_BITS), + .N (NUM_WAYS) + ) repl_tag_sel ( + .data_in (read_tag), + .sel_in (repl_way_r), + .data_out (repl_tag) + ); end else begin `UNUSED_VAR (stall) - assign way_sel = fill; + assign repl_way = 1'b1; + assign repl_tag = read_tag; end for (genvar i = 0; i < NUM_WAYS; ++i) begin - wire [`CS_TAG_SEL_BITS-1:0] read_tag; - wire read_valid; + + wire do_fill = fill && repl_way[i]; + wire do_write = init || do_fill; + wire line_valid = ~init; VX_sp_ram #( .DATAW (TAG_WIDTH), @@ -85,32 +103,34 @@ module VX_cache_tags #( ) tag_store ( .clk (clk), .read (1'b1), - .write (way_sel[i] || init), - `UNUSED_PIN (wren), + .write (do_write), + `UNUSED_PIN (wren), .addr (line_sel), - .wdata ({~init, line_tag}), - .rdata ({read_valid, read_tag}) + .wdata ({line_valid, line_tag}), + .rdata ({read_valid[i], read_tag[i]}) ); - - assign tag_matches[i] = read_valid && (line_tag == read_tag); end - + + for (genvar i = 0; i < NUM_WAYS; ++i) begin + assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]); + end + `ifdef DBG_TRACE_CACHE always @(posedge clk) begin if (fill && ~stall) begin - `TRACE(3, ("%d: %s-bank%0d tag-fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag)); + `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), repl_way, line_sel, line_tag)); end if (init) begin - `TRACE(3, ("%d: %s-bank%0d tag-init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel)); + `TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel)); end if (lookup && ~stall) begin if (tag_matches != 0) begin - `TRACE(3, ("%d: %s-bank%0d tag-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag, req_uuid)); + `TRACE(3, ("%d: %s hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)); end else begin - `TRACE(3, ("%d: %s-bank%0d tag-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)); + `TRACE(3, ("%d: %s miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)); end - end - end + end + end `endif endmodule diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 019a213a1..082d8b4e1 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,20 +23,20 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( // Size of cache in bytes - parameter CACHE_SIZE = 4096, + parameter CACHE_SIZE = 4096, // Size of line inside a bank in bytes - parameter LINE_SIZE = 64, + parameter LINE_SIZE = 64, // Number of banks parameter NUM_BANKS = 1, // Number of associative ways parameter NUM_WAYS = 1, // Size of a word in bytes - parameter WORD_SIZE = 4, + parameter WORD_SIZE = 4, // Core Response Queue Size parameter CRSQ_SIZE = 2, // Miss Reserv Queue Knob - parameter MSHR_SIZE = 8, + parameter MSHR_SIZE = 8, // Memory Response Queue Size parameter MRSQ_SIZE = 0, // Memory Request Queue Size @@ -45,6 +45,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( // Enable cache writeable parameter WRITE_ENABLE = 1, + // Enable cache writeback + parameter WRITEBACK = 0, + // Request debug identifier parameter UUID_WIDTH = 0, @@ -63,7 +66,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( // Memory request output buffer parameter MEM_OUT_BUF = 0 ) ( - + input wire clk, input wire reset, @@ -80,7 +83,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS; - + localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)); @@ -98,7 +101,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( ) mem_bus_cache_if(); if (NC_OR_BYPASS) begin - + `RESET_RELAY (nc_bypass_reset, reset); VX_cache_bypass #( @@ -108,13 +111,13 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .PASSTHRU (PASSTHRU), .NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE), - .WORD_SIZE (WORD_SIZE), + .WORD_SIZE (WORD_SIZE), .LINE_SIZE (LINE_SIZE), - .CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH), + .CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH), .CORE_TAG_WIDTH (TAG_WIDTH), - - .MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH), + + .MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH), .MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH), .MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH), @@ -132,15 +135,15 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .mem_bus_in_if (mem_bus_cache_if), .mem_bus_out_if (mem_bus_if) ); - + end else begin for (genvar i = 0; i < NUM_REQS; ++i) begin - `ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]); + `ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]); end `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_cache_if); - end + end if (PASSTHRU != 0) begin @@ -152,7 +155,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( assign core_bus_cache_if[i].rsp_valid = 0; assign core_bus_cache_if[i].rsp_data = '0; `UNUSED_VAR (core_bus_cache_if[i].rsp_ready) - end + end assign mem_bus_cache_if.req_valid = 0; assign mem_bus_cache_if.req_data = '0; @@ -183,6 +186,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .MRSQ_SIZE (MRSQ_SIZE), .MREQ_SIZE (MREQ_SIZE), .WRITE_ENABLE (WRITE_ENABLE), + .WRITEBACK (WRITEBACK), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (TAG_WIDTH), .CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF), @@ -195,8 +199,8 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( `endif .core_bus_if (core_bus_cache_if), .mem_bus_if (mem_bus_cache_if) - ); - + ); + end `ifdef DBG_TRACE_CACHE @@ -225,9 +229,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( end if (core_rsp_fire) begin `TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)); - end + end end - end + end wire [`UP(UUID_WIDTH)-1:0] mem_req_uuid; wire [`UP(UUID_WIDTH)-1:0] mem_rsp_uuid; @@ -246,17 +250,17 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (mem_req_fire) begin if (mem_bus_if.req_data.rw) - `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", + `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid)); else - `TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", + `TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid)); end if (mem_rsp_fire) begin - `TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n", + `TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid)); end - end + end `endif endmodule diff --git a/hw/rtl/core/VX_alu_int.sv b/hw/rtl/core/VX_alu_int.sv index d84ff0a07..47bfcc6bf 100644 --- a/hw/rtl/core/VX_alu_int.sv +++ b/hw/rtl/core/VX_alu_int.sv @@ -14,7 +14,7 @@ `include "VX_define.vh" module VX_alu_int #( - parameter CORE_ID = 0, + parameter `STRING INSTANCE_ID = "", parameter BLOCK_IDX = 0, parameter NUM_LANES = 1 ) ( @@ -29,7 +29,7 @@ module VX_alu_int #( VX_branch_ctl_if.master branch_ctl_if ); - `UNUSED_PARAM (CORE_ID) + `UNUSED_SPARAM (INSTANCE_ID) localparam LANE_BITS = `CLOG2(NUM_LANES); localparam LANE_WIDTH = `UP(LANE_BITS); localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); @@ -121,7 +121,7 @@ module VX_alu_int #( case ({is_alu_w, op_class}) 3'b000: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC 3'b001: alu_result[i] = sub_slt_br_result; // SUB, SLTU, SLTI, BR* - 3'b010: alu_result[i] = shr_zic_result[i]; // SRL, SRA, SRLI, SRAI, CZERO* + 3'b010: alu_result[i] = shr_zic_result[i]; // SRL, SRA, SRLI, SRAI, CZERO* 3'b011: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLI 3'b100: alu_result[i] = add_result_w[i]; // ADDIW, ADDW 3'b101: alu_result[i] = sub_result_w[i]; // SUBW @@ -181,7 +181,7 @@ module VX_alu_int #( .clk (clk), .reset (reset), .enable (1'b1), - .data_in ({br_enable, br_wid, br_taken, br_dest}), + .data_in ({br_enable, br_wid, br_taken, br_dest}), .data_out ({branch_ctl_if.valid, branch_ctl_if.wid, branch_ctl_if.taken, branch_ctl_if.dest}) ); @@ -193,9 +193,9 @@ module VX_alu_int #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin - if (branch_ctl_if.valid) begin - `TRACE(1, ("%d: core%0d-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", - $time, CORE_ID, branch_ctl_if.wid, {commit_if.data.PC, 1'b0}, branch_ctl_if.taken, {branch_ctl_if.dest, 1'b0}, commit_if.data.uuid)); + if (br_enable) begin + `TRACE(1, ("%d: %s-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", + $time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid)); end end `endif diff --git a/hw/rtl/core/VX_alu_muldiv.sv b/hw/rtl/core/VX_alu_muldiv.sv index 492625874..460295463 100644 --- a/hw/rtl/core/VX_alu_muldiv.sv +++ b/hw/rtl/core/VX_alu_muldiv.sv @@ -14,7 +14,7 @@ `include "VX_define.vh" module VX_alu_muldiv #( - parameter CORE_ID = 0, + parameter `STRING INSTANCE_ID = "", parameter NUM_LANES = 1 ) ( input wire clk, @@ -26,7 +26,7 @@ module VX_alu_muldiv #( // Outputs VX_commit_if.master commit_if ); - `UNUSED_PARAM (CORE_ID) + `UNUSED_SPARAM (INSTANCE_ID) localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); localparam PID_WIDTH = `UP(PID_BITS); localparam TAG_WIDTH = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + PID_WIDTH + 1 + 1; @@ -69,7 +69,7 @@ module VX_alu_muldiv #( wire mul_fire_in = mul_valid_in && mul_ready_in; for (genvar i = 0; i < NUM_LANES; ++i) begin - wire [`XLEN-1:0] mul_resultl, mul_resulth; + reg [`XLEN-1:0] mul_resultl, mul_resulth; wire [`XLEN-1:0] mul_in1 = is_alu_w ? (execute_if.data.rs1_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs1_data[i]; wire [`XLEN-1:0] mul_in2 = is_alu_w ? (execute_if.data.rs2_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs2_data[i]; always @(*) begin @@ -235,7 +235,7 @@ module VX_alu_muldiv #( wire div_fire_in = div_valid_in && div_ready_in; for (genvar i = 0; i < NUM_LANES; ++i) begin - wire [`XLEN-1:0] div_quotient, div_remainder; + reg [`XLEN-1:0] div_quotient, div_remainder; always @(*) begin dpi_idiv (div_fire_in, is_signed_op, div_in1[i], div_in2[i], div_quotient, div_remainder); end diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index f3e0a39b2..d8c131838 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -14,7 +14,7 @@ `include "VX_define.vh" module VX_alu_unit #( - parameter CORE_ID = 0 + parameter `STRING INSTANCE_ID = "" ) ( input wire clk, input wire reset, @@ -27,7 +27,7 @@ module VX_alu_unit #( VX_branch_ctl_if.master branch_ctl_if [`NUM_ALU_BLOCKS] ); - `UNUSED_PARAM (CORE_ID) + `UNUSED_SPARAM (INSTANCE_ID) localparam BLOCK_SIZE = `NUM_ALU_BLOCKS; localparam NUM_LANES = `NUM_ALU_LANES; localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); @@ -75,7 +75,7 @@ module VX_alu_unit #( `RESET_RELAY (int_reset, block_reset); VX_alu_int #( - .CORE_ID (CORE_ID), + .INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)), .BLOCK_IDX (block_idx), .NUM_LANES (NUM_LANES) ) alu_int ( @@ -90,59 +90,61 @@ module VX_alu_unit #( VX_execute_if #( .NUM_LANES (NUM_LANES) - ) mdv_execute_if(); + ) muldiv_execute_if(); VX_commit_if #( .NUM_LANES (NUM_LANES) - ) mdv_commit_if(); + ) muldiv_commit_if(); - assign mdv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op; - assign mdv_execute_if.data = per_block_execute_if[block_idx].data; + assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op; + assign muldiv_execute_if.data = per_block_execute_if[block_idx].data; - `RESET_RELAY (mdv_reset, block_reset); + `RESET_RELAY (muldiv_reset, block_reset); VX_alu_muldiv #( - .CORE_ID (CORE_ID), + .INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)), .NUM_LANES (NUM_LANES) - ) mdv_unit ( + ) muldiv_unit ( .clk (clk), - .reset (mdv_reset), - .execute_if (mdv_execute_if), - .commit_if (mdv_commit_if) + .reset (muldiv_reset), + .execute_if (muldiv_execute_if), + .commit_if (muldiv_commit_if) ); `endif assign per_block_execute_if[block_idx].ready = `ifdef EXT_M_ENABLE - is_muldiv_op ? mdv_execute_if.ready : + is_muldiv_op ? muldiv_execute_if.ready : `endif int_execute_if.ready; // send response + `RESET_RELAY (arb_reset, block_reset); + VX_stream_arb #( .NUM_INPUTS (RSP_ARB_SIZE), .DATAW (RSP_ARB_DATAW), .OUT_BUF (PARTIAL_BW ? 1 : 3) ) rsp_arb ( .clk (clk), - .reset (block_reset), + .reset (arb_reset), .valid_in ({ `ifdef EXT_M_ENABLE - mdv_commit_if.valid, + muldiv_commit_if.valid, `endif int_commit_if.valid }), .ready_in ({ `ifdef EXT_M_ENABLE - mdv_commit_if.ready, + muldiv_commit_if.ready, `endif int_commit_if.ready }), .data_in ({ `ifdef EXT_M_ENABLE - mdv_commit_if.data, + muldiv_commit_if.data, `endif int_commit_if.data }), diff --git a/hw/rtl/core/VX_commit.sv b/hw/rtl/core/VX_commit.sv index 3726622aa..d78c2ec89 100644 --- a/hw/rtl/core/VX_commit.sv +++ b/hw/rtl/core/VX_commit.sv @@ -13,8 +13,8 @@ `include "VX_define.vh" -module VX_commit import VX_gpu_pkg::*; #( - parameter CORE_ID = 0 +module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #( + parameter `STRING INSTANCE_ID = "" ) ( input wire clk, input wire reset, @@ -27,7 +27,7 @@ module VX_commit import VX_gpu_pkg::*; #( VX_commit_csr_if.master commit_csr_if, VX_commit_sched_if.master commit_sched_if ); - `UNUSED_PARAM (CORE_ID) + `UNUSED_SPARAM (INSTANCE_ID) localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1; localparam COMMIT_SIZEW = `CLOG2(`NUM_THREADS + 1); localparam COMMIT_ALL_SIZEW = COMMIT_SIZEW + `ISSUE_WIDTH - 1; @@ -36,12 +36,10 @@ module VX_commit import VX_gpu_pkg::*; #( VX_commit_if commit_arb_if[`ISSUE_WIDTH](); - wire [`ISSUE_WIDTH-1:0] commit_fire; - wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] commit_wid; - wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] commit_tmask; - wire [`ISSUE_WIDTH-1:0] commit_eop; - - `RESET_RELAY (arb_reset, reset); + wire [`ISSUE_WIDTH-1:0] per_issue_commit_fire; + wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] per_issue_commit_wid; + wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] per_issue_commit_tmask; + wire [`ISSUE_WIDTH-1:0] per_issue_commit_eop; for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin @@ -55,6 +53,8 @@ module VX_commit import VX_gpu_pkg::*; #( assign commit_if[j * `ISSUE_WIDTH + i].ready = ready_in[j]; end + `RESET_RELAY (arb_reset, reset); + VX_stream_arb #( .NUM_INPUTS (`NUM_EX_UNITS), .DATAW (DATAW), @@ -72,10 +72,10 @@ module VX_commit import VX_gpu_pkg::*; #( `UNUSED_PIN (sel_out) ); - assign commit_fire[i] = commit_arb_if[i].valid && commit_arb_if[i].ready; - assign commit_tmask[i]= {`NUM_THREADS{commit_fire[i]}} & commit_arb_if[i].data.tmask; - assign commit_wid[i] = commit_arb_if[i].data.wid; - assign commit_eop[i] = commit_arb_if[i].data.eop; + assign per_issue_commit_fire[i] = commit_arb_if[i].valid && commit_arb_if[i].ready; + assign per_issue_commit_tmask[i]= {`NUM_THREADS{per_issue_commit_fire[i]}} & commit_arb_if[i].data.tmask; + assign per_issue_commit_wid[i] = commit_arb_if[i].data.wid; + assign per_issue_commit_eop[i] = commit_arb_if[i].data.eop; end // CSRs update @@ -84,11 +84,11 @@ module VX_commit import VX_gpu_pkg::*; #( wire [COMMIT_ALL_SIZEW-1:0] commit_size_all_r, commit_size_all_rr; wire commit_fire_any, commit_fire_any_r, commit_fire_any_rr; - assign commit_fire_any = (| commit_fire); + assign commit_fire_any = (| per_issue_commit_fire); for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin wire [COMMIT_SIZEW-1:0] count; - `POP_COUNT(count, commit_tmask[i]); + `POP_COUNT(count, per_issue_commit_tmask[i]); assign commit_size[i] = count; end @@ -136,19 +136,28 @@ module VX_commit import VX_gpu_pkg::*; #( end assign commit_csr_if.instret = instret; - // Committed instructions + // Track committed instructions - wire [`ISSUE_WIDTH-1:0] committed = commit_fire & commit_eop; + reg [`NUM_WARPS-1:0] committed_warps; + + always @(*) begin + committed_warps = 0; + for (integer i = 0; i < `ISSUE_WIDTH; ++i) begin + if (per_issue_commit_fire[i] && per_issue_commit_eop[i]) begin + committed_warps[per_issue_commit_wid[i]] = 1; + end + end + end VX_pipe_register #( - .DATAW (`ISSUE_WIDTH * (1 + `NW_WIDTH)), - .RESETW (`ISSUE_WIDTH) + .DATAW (`NUM_WARPS), + .RESETW (`NUM_WARPS) ) committed_pipe_reg ( .clk (clk), .reset (reset), .enable (1'b1), - .data_in ({committed, commit_wid}), - .data_out ({commit_sched_if.committed, commit_sched_if.committed_wid}) + .data_in (committed_warps), + .data_out ({commit_sched_if.committed_warps}) ); // Writeback @@ -171,7 +180,7 @@ module VX_commit import VX_gpu_pkg::*; #( for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin always @(posedge clk) begin if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin - `TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0})); + `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0})); trace_ex_type(1, j); `TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop)); `TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS); diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index c923e2ef0..090f47199 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -18,7 +18,8 @@ `endif module VX_core import VX_gpu_pkg::*; #( - parameter CORE_ID = 0 + parameter CORE_ID = 0, + parameter `STRING INSTANCE_ID = "" ) ( `SCOPE_IO_DECL @@ -94,13 +95,14 @@ module VX_core import VX_gpu_pkg::*; #( `SCOPE_IO_SWITCH (3) VX_schedule #( + .INSTANCE_ID ($sformatf("%s-schedule", INSTANCE_ID)), .CORE_ID (CORE_ID) ) schedule ( .clk (clk), .reset (schedule_reset), `ifdef PERF_ENABLE - .perf_schedule_if (pipeline_perf_if.schedule), + .sched_perf (pipeline_perf_if.sched), `endif .base_dcrs (base_dcrs), @@ -121,7 +123,7 @@ module VX_core import VX_gpu_pkg::*; #( ); VX_fetch #( - .CORE_ID (CORE_ID) + .INSTANCE_ID ($sformatf("%s-fetch", INSTANCE_ID)) ) fetch ( `SCOPE_IO_BIND (0) .clk (clk), @@ -132,7 +134,7 @@ module VX_core import VX_gpu_pkg::*; #( ); VX_decode #( - .CORE_ID (CORE_ID) + .INSTANCE_ID ($sformatf("%s-decode", INSTANCE_ID)) ) decode ( .clk (clk), .reset (decode_reset), @@ -142,7 +144,7 @@ module VX_core import VX_gpu_pkg::*; #( ); VX_issue #( - .CORE_ID (CORE_ID) + .INSTANCE_ID ($sformatf("%s-issue", INSTANCE_ID)) ) issue ( `SCOPE_IO_BIND (1) @@ -150,7 +152,7 @@ module VX_core import VX_gpu_pkg::*; #( .reset (issue_reset), `ifdef PERF_ENABLE - .perf_issue_if (pipeline_perf_if.issue), + .issue_perf (pipeline_perf_if.issue), `endif .decode_if (decode_if), @@ -159,6 +161,7 @@ module VX_core import VX_gpu_pkg::*; #( ); VX_execute #( + .INSTANCE_ID ($sformatf("%s-execute", INSTANCE_ID)), .CORE_ID (CORE_ID) ) execute ( `SCOPE_IO_BIND (2) @@ -186,7 +189,7 @@ module VX_core import VX_gpu_pkg::*; #( ); VX_commit #( - .CORE_ID (CORE_ID) + .INSTANCE_ID ($sformatf("%s-commit", INSTANCE_ID)) ) commit ( .clk (clk), .reset (commit_reset), @@ -210,7 +213,7 @@ module VX_core import VX_gpu_pkg::*; #( `RESET_RELAY (lmem_unit_reset, reset); VX_lmem_unit #( - .CORE_ID (CORE_ID) + .INSTANCE_ID (INSTANCE_ID) ) lmem_unit ( .clk (clk), .reset (lmem_unit_reset), @@ -229,20 +232,20 @@ module VX_core import VX_gpu_pkg::*; #( `endif - VX_lsu_mem_if #( - .NUM_LANES (DCACHE_CHANNELS), - .DATA_SIZE (DCACHE_WORD_SIZE), - .TAG_WIDTH (DCACHE_TAG_WIDTH) - ) dcache_coalesced_if[`NUM_LSU_BLOCKS](); + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin + VX_lsu_mem_if #( + .NUM_LANES (DCACHE_CHANNELS), + .DATA_SIZE (DCACHE_WORD_SIZE), + .TAG_WIDTH (DCACHE_TAG_WIDTH) + ) dcache_coalesced_if(); - `RESET_RELAY (coalescer_reset, reset); + if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + `RESET_RELAY (mem_coalescer_reset, reset); VX_mem_coalescer #( - .INSTANCE_ID ($sformatf("core%0d-coalescer", CORE_ID)), + .INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)), .NUM_REQS (`NUM_LSU_LANES), .DATA_IN_SIZE (LSU_WORD_SIZE), .DATA_OUT_SIZE (DCACHE_WORD_SIZE), @@ -251,9 +254,9 @@ module VX_core import VX_gpu_pkg::*; #( .TAG_WIDTH (LSU_TAG_WIDTH), .UUID_WIDTH (`UUID_WIDTH), .QUEUE_SIZE (`LSUQ_OUT_SIZE) - ) coalescer ( + ) mem_coalescer ( .clk (clk), - .reset (coalescer_reset), + .reset (mem_coalescer_reset), // Input request .in_req_valid (lsu_dcache_if[i].req_valid), @@ -274,42 +277,37 @@ module VX_core import VX_gpu_pkg::*; #( .in_rsp_ready (lsu_dcache_if[i].rsp_ready), // Output request - .out_req_valid (dcache_coalesced_if[i].req_valid), - .out_req_mask (dcache_coalesced_if[i].req_data.mask), - .out_req_rw (dcache_coalesced_if[i].req_data.rw), - .out_req_byteen (dcache_coalesced_if[i].req_data.byteen), - .out_req_addr (dcache_coalesced_if[i].req_data.addr), - .out_req_atype (dcache_coalesced_if[i].req_data.atype), - .out_req_data (dcache_coalesced_if[i].req_data.data), - .out_req_tag (dcache_coalesced_if[i].req_data.tag), - .out_req_ready (dcache_coalesced_if[i].req_ready), + .out_req_valid (dcache_coalesced_if.req_valid), + .out_req_mask (dcache_coalesced_if.req_data.mask), + .out_req_rw (dcache_coalesced_if.req_data.rw), + .out_req_byteen (dcache_coalesced_if.req_data.byteen), + .out_req_addr (dcache_coalesced_if.req_data.addr), + .out_req_atype (dcache_coalesced_if.req_data.atype), + .out_req_data (dcache_coalesced_if.req_data.data), + .out_req_tag (dcache_coalesced_if.req_data.tag), + .out_req_ready (dcache_coalesced_if.req_ready), // Output response - .out_rsp_valid (dcache_coalesced_if[i].rsp_valid), - .out_rsp_mask (dcache_coalesced_if[i].rsp_data.mask), - .out_rsp_data (dcache_coalesced_if[i].rsp_data.data), - .out_rsp_tag (dcache_coalesced_if[i].rsp_data.tag), - .out_rsp_ready (dcache_coalesced_if[i].rsp_ready) + .out_rsp_valid (dcache_coalesced_if.rsp_valid), + .out_rsp_mask (dcache_coalesced_if.rsp_data.mask), + .out_rsp_data (dcache_coalesced_if.rsp_data.data), + .out_rsp_tag (dcache_coalesced_if.rsp_data.tag), + .out_rsp_ready (dcache_coalesced_if.rsp_ready) ); + + end else begin + + `ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if, lsu_dcache_if[i]); + end - end else begin - - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - `ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if[i], lsu_dcache_if[i]); - end - - end - - `RESET_RELAY (lsu_adapter_reset, reset); - - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - VX_mem_bus_if #( .DATA_SIZE (DCACHE_WORD_SIZE), .TAG_WIDTH (DCACHE_TAG_WIDTH) ) dcache_bus_tmp_if[DCACHE_CHANNELS](); + `RESET_RELAY (lsu_adapter_reset, reset); + VX_lsu_adapter #( .NUM_LANES (DCACHE_CHANNELS), .DATA_SIZE (DCACHE_WORD_SIZE), @@ -320,15 +318,17 @@ module VX_core import VX_gpu_pkg::*; #( ) lsu_adapter ( .clk (clk), .reset (lsu_adapter_reset), - .lsu_mem_if (dcache_coalesced_if[i]), + .lsu_mem_if (dcache_coalesced_if), .mem_bus_if (dcache_bus_tmp_if) ); for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin `ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]); end + end + `ifdef PERF_ENABLE wire [`CLOG2(LSU_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle; diff --git a/hw/rtl/core/VX_core_top.sv b/hw/rtl/core/VX_core_top.sv index db6485564..420ae7b67 100644 --- a/hw/rtl/core/VX_core_top.sv +++ b/hw/rtl/core/VX_core_top.sv @@ -144,6 +144,7 @@ module VX_core_top import VX_gpu_pkg::*; #( `endif VX_core #( + .INSTANCE_ID ($sformatf("core")), .CORE_ID (CORE_ID) ) core ( `SCOPE_IO_BIND (0) diff --git a/hw/rtl/core/VX_csr_data.sv b/hw/rtl/core/VX_csr_data.sv index 7f781cbf5..a2b0741ad 100644 --- a/hw/rtl/core/VX_csr_data.sv +++ b/hw/rtl/core/VX_csr_data.sv @@ -26,13 +26,13 @@ addr+12'h80 : dst = 32'(src[$bits(src)-1:32]) `endif - module VX_csr_data import VX_gpu_pkg::*; `ifdef EXT_F_ENABLE import VX_fpu_pkg::*; `endif #( + parameter `STRING INSTANCE_ID = "", parameter CORE_ID = 0 ) ( input wire clk, @@ -147,7 +147,7 @@ import VX_fpu_pkg::*; mscratch <= write_data; end default: begin - `ASSERT(0, ("%t: *** invalid CSR write address: %0h (#%0d)", $time, write_addr, write_uuid)); + `ASSERT(0, ("%t: *** %s invalid CSR write address: %0h (#%0d)", $time, INSTANCE_ID, write_addr, write_uuid)); end endcase end @@ -212,21 +212,21 @@ import VX_fpu_pkg::*; `VX_DCR_MPM_CLASS_CORE: begin case (read_addr) // PERF: pipeline - `CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_r, pipeline_perf_if.sched_idles); - `CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_r, pipeline_perf_if.sched_stalls); - `CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_r, pipeline_perf_if.ibf_stalls); - `CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_r, pipeline_perf_if.scb_stalls); - `CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_ALU]); + `CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_r, pipeline_perf_if.sched.idles); + `CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_r, pipeline_perf_if.sched.stalls); + `CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_r, pipeline_perf_if.issue.ibf_stalls); + `CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_r, pipeline_perf_if.issue.scb_stalls); + `CSR_READ_64(`VX_CSR_MPM_OPDS_ST, read_data_ro_r, pipeline_perf_if.issue.opd_stalls); + `CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_ALU]); `ifdef EXT_F_ENABLE - `CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_FPU]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_FPU]); `else - `VX_CSR_MPM_SCRB_FPU : read_data_ro_r = '0; - `VX_CSR_MPM_SCRB_FPU_H : read_data_ro_r = '0; + `CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, `PERF_CTR_BITS'(0)); `endif - `CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_LSU]); - `CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_r, pipeline_perf_if.units_uses[`EX_SFU]); - `CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_r, pipeline_perf_if.sfu_uses[`SFU_CSRS]); - `CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_r, pipeline_perf_if.sfu_uses[`SFU_WCTL]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_LSU]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_SFU]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_CSRS]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_WCTL]); // PERF: memory `CSR_READ_64(`VX_CSR_MPM_IFETCHES, read_data_ro_r, pipeline_perf_if.ifetches); `CSR_READ_64(`VX_CSR_MPM_LOADS, read_data_ro_r, pipeline_perf_if.loads); diff --git a/hw/rtl/core/VX_csr_unit.sv b/hw/rtl/core/VX_csr_unit.sv index 4bc552374..999c9c416 100644 --- a/hw/rtl/core/VX_csr_unit.sv +++ b/hw/rtl/core/VX_csr_unit.sv @@ -14,6 +14,7 @@ `include "VX_define.vh" module VX_csr_unit import VX_gpu_pkg::*; #( + parameter `STRING INSTANCE_ID = "", parameter CORE_ID = 0, parameter NUM_LANES = 1 ) ( @@ -36,7 +37,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #( VX_execute_if.slave execute_if, VX_commit_if.master commit_if ); - `UNUSED_PARAM (CORE_ID) + `UNUSED_SPARAM (INSTANCE_ID) localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); localparam PID_WIDTH = `UP(PID_BITS); localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1; @@ -72,7 +73,8 @@ module VX_csr_unit import VX_gpu_pkg::*; #( wire csr_write_enable = (execute_if.data.op_type == `INST_SFU_CSRRW); VX_csr_data #( - .CORE_ID (CORE_ID) + .INSTANCE_ID (INSTANCE_ID), + .CORE_ID (CORE_ID) ) csr_data ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_dcr_data.sv b/hw/rtl/core/VX_dcr_data.sv index 5218fb86a..58e51efc5 100644 --- a/hw/rtl/core/VX_dcr_data.sv +++ b/hw/rtl/core/VX_dcr_data.sv @@ -12,9 +12,8 @@ // limitations under the License. `include "VX_define.vh" -`include "VX_trace.vh" -module VX_dcr_data import VX_gpu_pkg::*; ( +module VX_dcr_data import VX_gpu_pkg::*, VX_trace_pkg::*; ( input wire clk, input wire reset, diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index 3fad20b75..9660859ce 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -12,7 +12,6 @@ // limitations under the License. `include "VX_define.vh" -`include "VX_trace.vh" `ifdef EXT_F_ENABLE `define USED_IREG(x) \ @@ -28,8 +27,8 @@ use_``x = 1 `endif -module VX_decode import VX_gpu_pkg::*; #( - parameter CORE_ID = 0 +module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #( + parameter `STRING INSTANCE_ID = "" ) ( input wire clk, input wire reset, @@ -44,7 +43,7 @@ module VX_decode import VX_gpu_pkg::*; #( localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + (`NR_BITS * 4); - `UNUSED_PARAM (CORE_ID) + `UNUSED_SPARAM (INSTANCE_ID) `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -145,6 +144,12 @@ module VX_decode import VX_gpu_pkg::*; #( end `endif + `STATIC_ASSERT($bits(alu_args_t) == $bits(op_args_t), ("alu_args_t size mismatch: current=%0d, expected=%0d", $bits(alu_args_t), $bits(op_args_t))); + `STATIC_ASSERT($bits(fpu_args_t) == $bits(op_args_t), ("fpu_args_t size mismatch: current=%0d, expected=%0d", $bits(fpu_args_t), $bits(op_args_t))); + `STATIC_ASSERT($bits(lsu_args_t) == $bits(op_args_t), ("lsu_args_t size mismatch: current=%0d, expected=%0d", $bits(lsu_args_t), $bits(op_args_t))); + `STATIC_ASSERT($bits(csr_args_t) == $bits(op_args_t), ("csr_args_t size mismatch: current=%0d, expected=%0d", $bits(csr_args_t), $bits(op_args_t))); + `STATIC_ASSERT($bits(wctl_args_t) == $bits(op_args_t), ("wctl_args_t size mismatch: current=%0d, expected=%0d", $bits(wctl_args_t), $bits(op_args_t))); + always @(*) begin ex_type = '0; @@ -552,7 +557,7 @@ module VX_decode import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (decode_if.valid && decode_if.ready) begin - `TRACE(1, ("%d: core%0d-decode: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, CORE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr)); + `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr)); trace_ex_type(1, decode_if.data.ex_type); `TRACE(1, (", op=")); trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args); diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index 77a5ed449..8ea3a6125 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -12,10 +12,9 @@ // limitations under the License. `include "VX_define.vh" -`include "VX_trace.vh" module VX_dispatch import VX_gpu_pkg::*; #( - parameter CORE_ID = 0 + parameter `STRING INSTANCE_ID = "" ) ( input wire clk, input wire reset, @@ -24,12 +23,12 @@ module VX_dispatch import VX_gpu_pkg::*; #( output wire [`PERF_CTR_BITS-1:0] perf_stalls [`NUM_EX_UNITS], `endif // inputs - VX_operands_if.slave operands_if [`ISSUE_WIDTH], + VX_operands_if.slave operands_if, // outputs - VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH] + VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS] ); - `UNUSED_PARAM (CORE_ID) + `UNUSED_SPARAM (INSTANCE_ID) localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH; @@ -38,104 +37,71 @@ module VX_dispatch import VX_gpu_pkg::*; #( assign tids[i] = `NT_WIDTH'(i); end - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + wire [`NT_WIDTH-1:0] last_active_tid; - wire [`NT_WIDTH-1:0] last_active_tid; + VX_find_first #( + .N (`NUM_THREADS), + .DATAW (`NT_WIDTH), + .REVERSE (1) + ) last_tid_select ( + .valid_in (operands_if.data.tmask), + .data_in (tids), + .data_out (last_active_tid), + `UNUSED_PIN (valid_out) + ); - VX_find_first #( - .N (`NUM_THREADS), - .DATAW (`NT_WIDTH), - .REVERSE (1) - ) last_tid_select ( - .valid_in (operands_if[i].data.tmask), - .data_in (tids), - .data_out (last_active_tid), - `UNUSED_PIN (valid_out) + wire [`NUM_EX_UNITS-1:0] operands_reset; + assign operands_if.ready = operands_reset[operands_if.data.ex_type]; + + for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin + + `RESET_RELAY (buffer_reset, reset); + + VX_elastic_buffer #( + .DATAW (DATAW), + .SIZE (2), + .OUT_REG (2), // 2-cycle EB for area reduction + .LUTRAM (1) + ) buffer ( + .clk (clk), + .reset (buffer_reset), + .valid_in (operands_if.valid && (operands_if.data.ex_type == `EX_BITS'(i))), + .ready_in (operands_reset[i]), + .data_in ({ + operands_if.data.uuid, + operands_if.data.wis, + operands_if.data.tmask, + operands_if.data.PC, + operands_if.data.op_type, + operands_if.data.op_args, + operands_if.data.wb, + operands_if.data.rd, + last_active_tid, + operands_if.data.rs1_data, + operands_if.data.rs2_data, + operands_if.data.rs3_data + }), + .data_out (dispatch_if[i].data), + .valid_out (dispatch_if[i].valid), + .ready_out (dispatch_if[i].ready) ); - - wire [`NUM_EX_UNITS-1:0] operands_reset; - - `RESET_RELAY (buf_reset, reset); - - for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin - VX_elastic_buffer #( - .DATAW (DATAW), - .SIZE (2), - .OUT_REG (2) - ) buffer ( - .clk (clk), - .reset (buf_reset), - .valid_in (operands_if[i].valid && (operands_if[i].data.ex_type == j)), - .ready_in (operands_reset[j]), - .data_in (`TO_DISPATCH_DATA(operands_if[i].data, last_active_tid)), - .data_out (dispatch_if[j * `ISSUE_WIDTH + i].data), - .valid_out (dispatch_if[j * `ISSUE_WIDTH + i].valid), - .ready_out (dispatch_if[j * `ISSUE_WIDTH + i].ready) - ); - end - - assign operands_if[i].ready = operands_reset[operands_if[i].data.ex_type]; end `ifdef PERF_ENABLE - wire [`NUM_EX_UNITS-1:0] perf_unit_stalls_per_cycle, perf_unit_stalls_per_cycle_r; - reg [`ISSUE_WIDTH-1:0][`NUM_EX_UNITS-1:0] perf_issue_unit_stalls_per_cycle; reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_stalls_r; - for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin - always @(*) begin - perf_issue_unit_stalls_per_cycle[i] = '0; - if (operands_if[i].valid && ~operands_if[i].ready) begin - perf_issue_unit_stalls_per_cycle[i][operands_if[i].data.ex_type] = 1; - end - end - end - - VX_reduce #( - .DATAW_IN (`NUM_EX_UNITS), - .N (`ISSUE_WIDTH), - .OP ("|") - ) reduce ( - .data_in (perf_issue_unit_stalls_per_cycle), - .data_out (perf_unit_stalls_per_cycle) - ); - - `BUFFER(perf_unit_stalls_per_cycle_r, perf_unit_stalls_per_cycle); + wire operands_if_stall = operands_if.valid && ~operands_if.ready; for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin always @(posedge clk) begin if (reset) begin perf_stalls_r[i] <= '0; end else begin - perf_stalls_r[i] <= perf_stalls_r[i] + `PERF_CTR_BITS'(perf_unit_stalls_per_cycle_r[i]); + perf_stalls_r[i] <= perf_stalls_r[i] + `PERF_CTR_BITS'(operands_if_stall && operands_if.data.ex_type == `EX_BITS'(i)); end end - end - - for (genvar i=0; i < `NUM_EX_UNITS; ++i) begin assign perf_stalls[i] = perf_stalls_r[i]; end `endif -`ifdef DBG_TRACE_PIPELINE - for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin - always @(posedge clk) begin - if (operands_if[i].valid && operands_if[i].ready) begin - `TRACE(1, ("%d: core%0d-issue: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, wis_to_wid(operands_if[i].data.wis, i), {operands_if[i].data.PC, 1'b0})); - trace_ex_type(1, operands_if[i].data.ex_type); - `TRACE(1, (", op=")); - trace_ex_op(1, operands_if[i].data.ex_type, operands_if[i].data.op_type, operands_if[i].data.op_args); - `TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if[i].data.tmask, operands_if[i].data.wb, operands_if[i].data.rd)); - `TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs1_data, `NUM_THREADS); - `TRACE(1, (", rs2_data=")); - `TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs2_data, `NUM_THREADS); - `TRACE(1, (", rs3_data=")); - `TRACE_ARRAY1D(1, "0x%0h", operands_if[i].data.rs3_data, `NUM_THREADS); - trace_op_args(1, operands_if[i].data.ex_type, operands_if[i].data.op_type, operands_if[i].data.op_args); - `TRACE(1, (" (#%0d)\n", operands_if[i].data.uuid)); - end - end - end -`endif - endmodule diff --git a/hw/rtl/core/VX_execute.sv b/hw/rtl/core/VX_execute.sv index 5bc9b5566..ded25918c 100644 --- a/hw/rtl/core/VX_execute.sv +++ b/hw/rtl/core/VX_execute.sv @@ -14,6 +14,7 @@ `include "VX_define.vh" module VX_execute import VX_gpu_pkg::*; #( + parameter `STRING INSTANCE_ID = "", parameter CORE_ID = 0 ) ( `SCOPE_IO_DECL @@ -55,7 +56,7 @@ module VX_execute import VX_gpu_pkg::*; #( `RESET_RELAY (sfu_reset, reset); VX_alu_unit #( - .CORE_ID (CORE_ID) + .INSTANCE_ID ($sformatf("%s-alu", INSTANCE_ID)) ) alu_unit ( .clk (clk), .reset (alu_reset), @@ -67,7 +68,7 @@ module VX_execute import VX_gpu_pkg::*; #( `SCOPE_IO_SWITCH (1) VX_lsu_unit #( - .CORE_ID (CORE_ID) + .INSTANCE_ID ($sformatf("%s-lsu", INSTANCE_ID)) ) lsu_unit ( `SCOPE_IO_BIND (0) .clk (clk), @@ -81,7 +82,7 @@ module VX_execute import VX_gpu_pkg::*; #( `RESET_RELAY (fpu_reset, reset); VX_fpu_unit #( - .CORE_ID (CORE_ID) + .INSTANCE_ID ($sformatf("%s-fpu", INSTANCE_ID)) ) fpu_unit ( .clk (clk), .reset (fpu_reset), @@ -92,6 +93,7 @@ module VX_execute import VX_gpu_pkg::*; #( `endif VX_sfu_unit #( + .INSTANCE_ID ($sformatf("%s-sfu", INSTANCE_ID)), .CORE_ID (CORE_ID) ) sfu_unit ( .clk (clk), diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 7f3dc61ba..59c419a83 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -14,7 +14,7 @@ `include "VX_define.vh" module VX_fetch import VX_gpu_pkg::*; #( - parameter CORE_ID = 0 + parameter `STRING INSTANCE_ID = "" ) ( `SCOPE_IO_DECL @@ -30,7 +30,7 @@ module VX_fetch import VX_gpu_pkg::*; #( // outputs VX_fetch_if.master fetch_if ); - `UNUSED_PARAM (CORE_ID) + `UNUSED_SPARAM (INSTANCE_ID) `UNUSED_VAR (reset) wire icache_req_valid; @@ -78,9 +78,11 @@ module VX_fetch import VX_gpu_pkg::*; #( .reset (reset), .incr (icache_req_fire && schedule_if.data.wid == i), .decr (fetch_if.ibuf_pop[i]), + `UNUSED_PIN (empty), + `UNUSED_PIN (alm_empty), .full (pending_ibuf_full[i]), - `UNUSED_PIN (size), - `UNUSED_PIN (empty) + `UNUSED_PIN (alm_full), + `UNUSED_PIN (size) ); end wire ibuf_ready = ~pending_ibuf_full[schedule_if.data.wid]; @@ -89,7 +91,7 @@ module VX_fetch import VX_gpu_pkg::*; #( `endif `RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0), - ("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, {schedule_if.data.PC, 1'b0}, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid)) + ("%t: *** %s invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, INSTANCE_ID, {schedule_if.data.PC, 1'b0}, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid)) // Icache Request @@ -129,45 +131,33 @@ module VX_fetch import VX_gpu_pkg::*; #( assign icache_bus_if.rsp_ready = fetch_if.ready; `ifdef DBG_SCOPE_FETCH - if (CORE_ID == 0) begin - `ifdef SCOPE - wire schedule_fire = schedule_if.valid && schedule_if.ready; - wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready; - VX_scope_tap #( - .SCOPE_ID (1), - .TRIGGERW (4), - .PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + - ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH + - (ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH) - ) scope_tap ( - .clk(clk), - .reset(scope_reset), - .start(1'b0), - .stop(1'b0), - .triggers({ - reset, - schedule_fire, - icache_req_fire, - icache_rsp_fire - }), - .probes({ - schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, - icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, - icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag - }), - .bus_in(scope_bus_in), - .bus_out(scope_bus_out) - ); - `endif - `ifdef CHIPSCOPE - ila_fetch ila_fetch_inst ( - .clk (clk), - .probe0 ({reset, schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, schedule_if.ready, schedule_if.valid}), - .probe1 ({icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, icache_bus_if.req_ready, icache_bus_if.req_valid}), - .probe2 ({icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag, icache_bus_if.rsp_ready, icache_bus_if.rsp_valid}) - ); - `endif - end + wire schedule_fire = schedule_if.valid && schedule_if.ready; + wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready; + VX_scope_tap #( + .SCOPE_ID (1), + .TRIGGERW (4), + .PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + + ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH + + (ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH) + ) scope_tap ( + .clk (clk), + .reset (scope_reset), + .start (1'b0), + .stop (1'b0), + .triggers ({ + reset, + schedule_fire, + icache_req_fire, + icache_rsp_fire + }), + .probes ({ + schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, + icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, + icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag + }), + .bus_in (scope_bus_in), + .bus_out (scope_bus_out) + ); `else `SCOPE_IO_UNUSED() `endif @@ -177,10 +167,10 @@ module VX_fetch import VX_gpu_pkg::*; #( wire fetch_fire = fetch_if.valid && fetch_if.ready; always @(posedge clk) begin if (schedule_fire) begin - `TRACE(1, ("%d: I$%0d req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, CORE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid)); + `TRACE(1, ("%d: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid)); end if (fetch_fire) begin - `TRACE(1, ("%d: I$%0d rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, CORE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid)); + `TRACE(1, ("%d: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid)); end end `endif diff --git a/hw/rtl/core/VX_fpu_unit.sv b/hw/rtl/core/VX_fpu_unit.sv index b90208719..8622db490 100644 --- a/hw/rtl/core/VX_fpu_unit.sv +++ b/hw/rtl/core/VX_fpu_unit.sv @@ -14,7 +14,7 @@ `include "VX_fpu_define.vh" module VX_fpu_unit import VX_fpu_pkg::*; #( - parameter CORE_ID = 0 + parameter `STRING INSTANCE_ID = "" ) ( input wire clk, input wire reset, @@ -26,7 +26,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( VX_commit_if.master commit_if [`ISSUE_WIDTH], VX_fpu_csr_if.master fpu_csr_if[`NUM_FPU_BLOCKS] ); - `UNUSED_PARAM (CORE_ID) + `UNUSED_SPARAM (INSTANCE_ID) localparam BLOCK_SIZE = `NUM_FPU_BLOCKS; localparam NUM_LANES = `NUM_FPU_LANES; localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); @@ -84,12 +84,14 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( wire execute_fire = per_block_execute_if[block_idx].valid && per_block_execute_if[block_idx].ready; wire fpu_rsp_fire = fpu_rsp_valid && fpu_rsp_ready; + `RESET_RELAY (ibuf_reset, block_reset); + VX_index_buffer #( .DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + PID_WIDTH + 1 + 1), .SIZE (`FPUQ_SIZE) ) tag_store ( .clk (clk), - .reset (block_reset), + .reset (ibuf_reset), .acquire_en (execute_fire), .write_addr (fpu_req_tag), .write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}), @@ -226,12 +228,14 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( // send response + `RESET_RELAY (rsp_reset, block_reset); + VX_elastic_buffer #( .DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1), .SIZE (0) ) rsp_buf ( .clk (clk), - .reset (block_reset), + .reset (rsp_reset), .valid_in (fpu_rsp_valid), .ready_in (fpu_rsp_ready), .data_in ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_result, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}), diff --git a/hw/rtl/core/VX_ibuffer.sv b/hw/rtl/core/VX_ibuffer.sv index cf10e688c..e8edf64c7 100644 --- a/hw/rtl/core/VX_ibuffer.sv +++ b/hw/rtl/core/VX_ibuffer.sv @@ -14,33 +14,36 @@ `include "VX_define.vh" module VX_ibuffer import VX_gpu_pkg::*; #( - parameter CORE_ID = 0 + parameter `STRING INSTANCE_ID = "" ) ( input wire clk, input wire reset, +`ifdef PERF_ENABLE + output wire [`PERF_CTR_BITS-1:0] perf_stalls, +`endif + // inputs VX_decode_if.slave decode_if, // outputs - VX_ibuffer_if.master ibuffer_if [`NUM_WARPS] + VX_ibuffer_if.master ibuffer_if [PER_ISSUE_WARPS] ); - `UNUSED_PARAM (CORE_ID) + `UNUSED_SPARAM (INSTANCE_ID) localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4); - wire [`NUM_WARPS-1:0] ibuf_ready_in; - + wire [PER_ISSUE_WARPS-1:0] ibuf_ready_in; assign decode_if.ready = ibuf_ready_in[decode_if.data.wid]; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`IBUF_SIZE), - .OUT_REG (2) // use a 2-cycle FIFO + .OUT_REG (2) // 2-cycle EB for area reduction ) instr_buf ( .clk (clk), .reset (reset), - .valid_in (decode_if.valid && decode_if.data.wid == i), + .valid_in (decode_if.valid && decode_if.data.wid == ISSUE_WIS_W'(w)), .data_in ({ decode_if.data.uuid, decode_if.data.tmask, @@ -52,15 +55,32 @@ module VX_ibuffer import VX_gpu_pkg::*; #( decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, - decode_if.data.rs3}), - .ready_in (ibuf_ready_in[i]), - .valid_out(ibuffer_if[i].valid), - .data_out (ibuffer_if[i].data), - .ready_out(ibuffer_if[i].ready) + decode_if.data.rs3 + }), + .ready_in (ibuf_ready_in[w]), + .valid_out(ibuffer_if[w].valid), + .data_out (ibuffer_if[w].data), + .ready_out(ibuffer_if[w].ready) ); `ifndef L1_ENABLE - assign decode_if.ibuf_pop[i] = ibuffer_if[i].valid && ibuffer_if[i].ready; + assign decode_if.ibuf_pop[w] = ibuffer_if[w].valid && ibuffer_if[w].ready; `endif end +`ifdef PERF_ENABLE + reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls; + + wire decode_if_stall = decode_if.valid && ~decode_if.ready; + + always @(posedge clk) begin + if (reset) begin + perf_ibf_stalls <= '0; + end else begin + perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'(decode_if_stall); + end + end + + assign perf_stalls = perf_ibf_stalls; +`endif + endmodule diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index c999502de..1480e6649 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -12,10 +12,9 @@ // limitations under the License. `include "VX_define.vh" -`include "VX_trace.vh" -module VX_issue #( - parameter CORE_ID = 0 +module VX_issue import VX_gpu_pkg::*; #( + parameter `STRING INSTANCE_ID = "" ) ( `SCOPE_IO_DECL @@ -23,137 +22,81 @@ module VX_issue #( input wire reset, `ifdef PERF_ENABLE - VX_pipeline_perf_if.issue perf_issue_if, + output issue_perf_t issue_perf, `endif VX_decode_if.slave decode_if, VX_writeback_if.slave writeback_if [`ISSUE_WIDTH], VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH] ); - VX_ibuffer_if ibuffer_if [`NUM_WARPS](); - VX_scoreboard_if scoreboard_if [`ISSUE_WIDTH](); - VX_operands_if operands_if [`ISSUE_WIDTH](); - - `RESET_RELAY (ibuf_reset, reset); - `RESET_RELAY (scoreboard_reset, reset); - `RESET_RELAY (operands_reset, reset); - `RESET_RELAY (dispatch_reset, reset); - - VX_ibuffer #( - .CORE_ID (CORE_ID) - ) ibuffer ( - .clk (clk), - .reset (ibuf_reset), - .decode_if (decode_if), - .ibuffer_if (ibuffer_if) - ); - - VX_scoreboard #( - .CORE_ID (CORE_ID) - ) scoreboard ( - .clk (clk), - .reset (scoreboard_reset), - `ifdef PERF_ENABLE - .perf_scb_stalls(perf_issue_if.scb_stalls), - .perf_units_uses(perf_issue_if.units_uses), - .perf_sfu_uses (perf_issue_if.sfu_uses), - `endif - .writeback_if (writeback_if), - .ibuffer_if (ibuffer_if), - .scoreboard_if (scoreboard_if) - ); - - VX_operands #( - .CORE_ID (CORE_ID) - ) operands ( - .clk (clk), - .reset (operands_reset), - .writeback_if (writeback_if), - .scoreboard_if (scoreboard_if), - .operands_if (operands_if) - ); - - VX_dispatch #( - .CORE_ID (CORE_ID) - ) dispatch ( - .clk (clk), - .reset (dispatch_reset), - `ifdef PERF_ENABLE - `UNUSED_PIN (perf_stalls), - `endif - .operands_if (operands_if), - .dispatch_if (dispatch_if) - ); - -`ifdef DBG_SCOPE_ISSUE - if (CORE_ID == 0) begin - `ifdef SCOPE - wire operands_if_fire = operands_if[0].valid && operands_if[0].ready; - wire operands_if_not_ready = ~operands_if[0].ready; - wire writeback_if_valid = writeback_if[0].valid; - VX_scope_tap #( - .SCOPE_ID (2), - .TRIGGERW (4), - .PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + - 1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) + - `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1) - ) scope_tap ( - .clk(clk), - .reset(scope_reset), - .start(1'b0), - .stop(1'b0), - .triggers({ - reset, - operands_if_fire, - operands_if_not_ready, - writeback_if_valid - }), - .probes({ - operands_if[0].data.uuid, - operands_if[0].data.tmask, - operands_if[0].data.ex_type, - operands_if[0].data.op_type, - operands_if[0].data.wb, - operands_if[0].data.rd, - operands_if[0].data.rs1_data, - operands_if[0].data.rs2_data, - operands_if[0].data.rs3_data, - writeback_if[0].data.uuid, - writeback_if[0].data.tmask, - writeback_if[0].data.rd, - writeback_if[0].data.data, - writeback_if[0].data.eop - }), - .bus_in(scope_bus_in), - .bus_out(scope_bus_out) - ); - `endif - `ifdef CHIPSCOPE - ila_issue ila_issue_inst ( - .clk (clk), - .probe0 ({operands_if.uuid, ibuffer.rs3, ibuffer.rs2, ibuffer.rs1, operands_if.PC, operands_if.tmask, operands_if.wid, operands_if.ex_type, operands_if.op_type, operands_if.ready, operands_if.valid}), - .probe1 ({writeback_if.uuid, writeback_if.data[0], writeback_if.PC, writeback_if.tmask, writeback_if.wid, writeback_if.eop, writeback_if.valid}) - ); - `endif - end -`else - `SCOPE_IO_UNUSED() -`endif `ifdef PERF_ENABLE - reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls; - - wire decode_stall = decode_if.valid && ~decode_if.ready; - - always @(posedge clk) begin - if (reset) begin - perf_ibf_stalls <= '0; - end else begin - perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'(decode_stall); - end + issue_perf_t per_issue_perf [`ISSUE_WIDTH]; + `PERF_COUNTER_ADD (issue_perf, per_issue_perf, ibf_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) + `PERF_COUNTER_ADD (issue_perf, per_issue_perf, scb_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) + `PERF_COUNTER_ADD (issue_perf, per_issue_perf, opd_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) + for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin + `PERF_COUNTER_ADD (issue_perf, per_issue_perf, units_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) + end + for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin + `PERF_COUNTER_ADD (issue_perf, per_issue_perf, sfu_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) end - - assign perf_issue_if.ibf_stalls = perf_ibf_stalls; `endif + wire [ISSUE_ISW_W-1:0] decode_isw = wid_to_isw(decode_if.data.wid); + wire [ISSUE_WIS_W-1:0] decode_wis = wid_to_wis(decode_if.data.wid); + + wire [`ISSUE_WIDTH-1:0] decode_ready_in; + assign decode_if.ready = decode_ready_in[decode_isw]; + + `SCOPE_IO_SWITCH (`ISSUE_WIDTH) + + for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : issue_slices + VX_decode_if #( + .NUM_WARPS (PER_ISSUE_WARPS) + ) per_issue_decode_if(); + + VX_dispatch_if per_issue_dispatch_if[`NUM_EX_UNITS](); + + assign per_issue_decode_if.valid = decode_if.valid && (decode_isw == ISSUE_ISW_W'(issue_id)); + assign per_issue_decode_if.data.uuid = decode_if.data.uuid; + assign per_issue_decode_if.data.wid = decode_wis; + assign per_issue_decode_if.data.tmask = decode_if.data.tmask; + assign per_issue_decode_if.data.PC = decode_if.data.PC; + assign per_issue_decode_if.data.ex_type = decode_if.data.ex_type; + assign per_issue_decode_if.data.op_type = decode_if.data.op_type; + assign per_issue_decode_if.data.op_args = decode_if.data.op_args; + assign per_issue_decode_if.data.wb = decode_if.data.wb; + assign per_issue_decode_if.data.rd = decode_if.data.rd; + assign per_issue_decode_if.data.rs1 = decode_if.data.rs1; + assign per_issue_decode_if.data.rs2 = decode_if.data.rs2; + assign per_issue_decode_if.data.rs3 = decode_if.data.rs3; + assign decode_ready_in[issue_id] = per_issue_decode_if.ready; + `ifndef L1_ENABLE + assign decode_if.ibuf_pop[issue_id * PER_ISSUE_WARPS +: PER_ISSUE_WARPS] = per_issue_decode_if.ibuf_pop; + `endif + + `RESET_RELAY (slice_reset, reset); + + VX_issue_slice #( + .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, issue_id)), + .ISSUE_ID (issue_id) + ) issue_slice ( + `SCOPE_IO_BIND(issue_id) + .clk (clk), + .reset (slice_reset), + `ifdef PERF_ENABLE + .issue_perf (per_issue_perf[issue_id]), + `endif + .decode_if (per_issue_decode_if), + .writeback_if (writeback_if[issue_id]), + .dispatch_if (per_issue_dispatch_if) + ); + + // Assign transposed dispatch_if + for (genvar ex_id = 0; ex_id < `NUM_EX_UNITS; ++ex_id) begin + `ASSIGN_VX_IF(dispatch_if[ex_id * `ISSUE_WIDTH + issue_id], per_issue_dispatch_if[ex_id]); + end + end + endmodule diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv new file mode 100644 index 000000000..03b91b5fe --- /dev/null +++ b/hw/rtl/core/VX_issue_slice.sv @@ -0,0 +1,159 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( + parameter `STRING INSTANCE_ID = "", + parameter ISSUE_ID = 0 +) ( + `SCOPE_IO_DECL + + input wire clk, + input wire reset, + +`ifdef PERF_ENABLE + output issue_perf_t issue_perf, +`endif + + VX_decode_if.slave decode_if, + VX_writeback_if.slave writeback_if, + VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS] +); + `UNUSED_PARAM (ISSUE_ID) + + VX_ibuffer_if ibuffer_if [PER_ISSUE_WARPS](); + VX_scoreboard_if scoreboard_if(); + VX_operands_if operands_if(); + + `RESET_RELAY (ibuf_reset, reset); + `RESET_RELAY (scoreboard_reset, reset); + `RESET_RELAY (operands_reset, reset); + `RESET_RELAY (dispatch_reset, reset); + + VX_ibuffer #( + .INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID)) + ) ibuffer ( + .clk (clk), + .reset (ibuf_reset), + `ifdef PERF_ENABLE + .perf_stalls (issue_perf.ibf_stalls), + `endif + .decode_if (decode_if), + .ibuffer_if (ibuffer_if) + ); + + VX_scoreboard #( + .INSTANCE_ID ($sformatf("%s-scoreboard", INSTANCE_ID)) + ) scoreboard ( + .clk (clk), + .reset (scoreboard_reset), + `ifdef PERF_ENABLE + .perf_stalls (issue_perf.scb_stalls), + .perf_units_uses(issue_perf.units_uses), + .perf_sfu_uses (issue_perf.sfu_uses), + `endif + .writeback_if (writeback_if), + .ibuffer_if (ibuffer_if), + .scoreboard_if (scoreboard_if) + ); + + VX_operands #( + .INSTANCE_ID ($sformatf("%s-operands", INSTANCE_ID)) + ) operands ( + .clk (clk), + .reset (operands_reset), + `ifdef PERF_ENABLE + .perf_stalls (issue_perf.opd_stalls), + `endif + .writeback_if (writeback_if), + .scoreboard_if (scoreboard_if), + .operands_if (operands_if) + ); + + VX_dispatch #( + .INSTANCE_ID ($sformatf("%s-dispatch", INSTANCE_ID)) + ) dispatch ( + .clk (clk), + .reset (dispatch_reset), + `ifdef PERF_ENABLE + `UNUSED_PIN (perf_stalls), + `endif + .operands_if (operands_if), + .dispatch_if (dispatch_if) + ); + +`ifdef DBG_SCOPE_ISSUE + wire operands_if_fire = operands_if.valid && operands_if.ready; + wire operands_if_not_ready = ~operands_if.ready; + wire writeback_if_valid = writeback_if.valid; + VX_scope_tap #( + .SCOPE_ID (2), + .TRIGGERW (4), + .PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + + 1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) + + `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1) + ) scope_tap ( + .clk (clk), + .reset (scope_reset), + .start (1'b0), + .stop (1'b0), + .triggers ({ + reset, + operands_if_fire, + operands_if_not_ready, + writeback_if_valid + }), + .probes ({ + operands_if.data.uuid, + operands_if.data.tmask, + operands_if.data.ex_type, + operands_if.data.op_type, + operands_if.data.wb, + operands_if.data.rd, + operands_if.data.rs1_data, + operands_if.data.rs2_data, + operands_if.data.rs3_data, + writeback_if.data.uuid, + writeback_if.data.tmask, + writeback_if.data.rd, + writeback_if.data.data, + writeback_if.data.eop + }), + .bus_in (scope_bus_in), + .bus_out (scope_bus_out) + ); +`else + `SCOPE_IO_UNUSED() +`endif + +`ifdef DBG_TRACE_PIPELINE + always @(posedge clk) begin + if (operands_if.valid && operands_if.ready) begin + `TRACE(1, ("%d: %s wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})); + trace_ex_type(1, operands_if.data.ex_type); + `TRACE(1, (", op=")); + trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args); + `TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd)); + `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `NUM_THREADS); + `TRACE(1, (", rs2_data=")); + `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `NUM_THREADS); + `TRACE(1, (", rs3_data=")); + `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `NUM_THREADS); + trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args); + `TRACE(1, (" (#%0d)\n", operands_if.data.uuid)); + end + end +`endif + +endmodule diff --git a/hw/rtl/core/VX_issue_top.sv b/hw/rtl/core/VX_issue_top.sv new file mode 100644 index 000000000..0166cf770 --- /dev/null +++ b/hw/rtl/core/VX_issue_top.sv @@ -0,0 +1,132 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_issue_top import VX_gpu_pkg::*; #( + parameter `STRING INSTANCE_ID = "issue" +) ( + // Clock + input wire clk, + input wire reset, + + input wire decode_valid, + input wire [`UUID_WIDTH-1:0] decode_uuid, + input wire [`NW_WIDTH-1:0] decode_wid, + input wire [`NUM_THREADS-1:0] decode_tmask, + input wire [`PC_BITS-1:0] decode_PC, + input wire [`EX_BITS-1:0] decode_ex_type, + input wire [`INST_OP_BITS-1:0] decode_op_type, + input op_args_t decode_op_args, + input wire decode_wb, + input wire [`NR_BITS-1:0] decode_rd, + input wire [`NR_BITS-1:0] decode_rs1, + input wire [`NR_BITS-1:0] decode_rs2, + input wire [`NR_BITS-1:0] decode_rs3, + output wire decode_ready, + + input wire writeback_valid[`ISSUE_WIDTH], + input wire [`UUID_WIDTH-1:0] writeback_uuid[`ISSUE_WIDTH], + input wire [ISSUE_WIS_W-1:0] writeback_wis[`ISSUE_WIDTH], + input wire [`NUM_THREADS-1:0] writeback_tmask[`ISSUE_WIDTH], + input wire [`PC_BITS-1:0] writeback_PC[`ISSUE_WIDTH], + input wire [`NR_BITS-1:0] writeback_rd[`ISSUE_WIDTH], + input wire [`NUM_THREADS-1:0][`XLEN-1:0] writeback_data[`ISSUE_WIDTH], + input wire writeback_sop[`ISSUE_WIDTH], + input wire writeback_eop[`ISSUE_WIDTH], + + output wire dispatch_valid[`NUM_EX_UNITS * `ISSUE_WIDTH], + output wire [`UUID_WIDTH-1:0] dispatch_uuid[`NUM_EX_UNITS * `ISSUE_WIDTH], + output wire [ISSUE_WIS_W-1:0] dispatch_wis[`NUM_EX_UNITS * `ISSUE_WIDTH], + output wire [`NUM_THREADS-1:0] dispatch_tmask[`NUM_EX_UNITS * `ISSUE_WIDTH], + output wire [`PC_BITS-1:0] dispatch_PC[`NUM_EX_UNITS * `ISSUE_WIDTH], + output wire [`INST_ALU_BITS-1:0] dispatch_op_type[`NUM_EX_UNITS * `ISSUE_WIDTH], + output op_args_t dispatch_op_args[`NUM_EX_UNITS * `ISSUE_WIDTH], + output wire dispatch_wb[`NUM_EX_UNITS * `ISSUE_WIDTH], + output wire [`NR_BITS-1:0] dispatch_rd[`NUM_EX_UNITS * `ISSUE_WIDTH], + output wire [`NT_WIDTH-1:0] dispatch_tid[`NUM_EX_UNITS * `ISSUE_WIDTH], + output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs1_data[`NUM_EX_UNITS * `ISSUE_WIDTH], + output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data[`NUM_EX_UNITS * `ISSUE_WIDTH], + output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data[`NUM_EX_UNITS * `ISSUE_WIDTH], + input wire dispatch_ready[`NUM_EX_UNITS * `ISSUE_WIDTH] +); + + VX_decode_if decode_if(); + VX_dispatch_if dispatch_if[`NUM_EX_UNITS * `ISSUE_WIDTH](); + VX_writeback_if writeback_if[`ISSUE_WIDTH](); + + assign decode_if.valid = decode_valid; + assign decode_if.data.uuid = decode_uuid; + assign decode_if.data.wid = decode_wid; + assign decode_if.data.tmask = decode_tmask; + assign decode_if.data.PC = decode_PC; + assign decode_if.data.ex_type = decode_ex_type; + assign decode_if.data.op_type = decode_op_type; + assign decode_if.data.op_args = decode_op_args; + assign decode_if.data.wb = decode_wb; + assign decode_if.data.rd = decode_rd; + assign decode_if.data.rs1 = decode_rs1; + assign decode_if.data.rs2 = decode_rs2; + assign decode_if.data.rs3 = decode_rs3; + assign decode_ready = decode_if.ready; + + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + assign writeback_if[i].valid = writeback_valid[i]; + assign writeback_if[i].data.uuid = writeback_uuid[i]; + assign writeback_if[i].data.wis = writeback_wis[i]; + assign writeback_if[i].data.tmask = writeback_tmask[i]; + assign writeback_if[i].data.PC = writeback_PC[i]; + assign writeback_if[i].data.rd = writeback_rd[i]; + assign writeback_if[i].data.data = writeback_data[i]; + assign writeback_if[i].data.sop = writeback_sop[i]; + assign writeback_if[i].data.eop = writeback_eop[i]; + end + + for (genvar i = 0; i < `NUM_EX_UNITS * `ISSUE_WIDTH; ++i) begin + assign dispatch_valid[i] = dispatch_if[i].valid; + assign dispatch_uuid[i] = dispatch_if[i].data.uuid; + assign dispatch_wis[i] = dispatch_if[i].data.wis; + assign dispatch_tmask[i] = dispatch_if[i].data.tmask; + assign dispatch_PC[i] = dispatch_if[i].data.PC; + assign dispatch_op_type[i] = dispatch_if[i].data.op_type; + assign dispatch_op_args[i] = dispatch_if[i].data.op_args; + assign dispatch_wb[i] = dispatch_if[i].data.wb; + assign dispatch_rd[i] = dispatch_if[i].data.rd; + assign dispatch_tid[i] = dispatch_if[i].data.tid; + assign dispatch_rs1_data[i] = dispatch_if[i].data.rs1_data; + assign dispatch_rs2_data[i] = dispatch_if[i].data.rs2_data; + assign dispatch_rs3_data[i] = dispatch_if[i].data.rs3_data; + assign dispatch_if[i].ready = dispatch_ready[i]; + end + +`ifdef PERF_ENABLE + issue_perf_t issue_perf = '0; +`endif + + VX_issue #( + .INSTANCE_ID (INSTANCE_ID) + ) issue ( + `SCOPE_IO_BIND (0) + .clk (clk), + .reset (reset), + + `ifdef PERF_ENABLE + .issue_perf (issue_perf), + `endif + + .decode_if (decode_if), + .writeback_if (writeback_if), + .dispatch_if (dispatch_if) + ); + +endmodule diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv index 8c1f3993e..e896b4000 100644 --- a/hw/rtl/core/VX_lmem_unit.sv +++ b/hw/rtl/core/VX_lmem_unit.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,11 +14,11 @@ `include "VX_define.vh" module VX_lmem_unit import VX_gpu_pkg::*; #( - parameter CORE_ID = 0 + parameter `STRING INSTANCE_ID = "" ) ( input wire clk, input wire reset, - + `ifdef PERF_ENABLE output cache_perf_t cache_perf, `endif @@ -37,31 +37,31 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .NUM_LANES (`NUM_LSU_LANES), .DATA_SIZE (LSU_WORD_SIZE), .TAG_WIDTH (LSU_TAG_WIDTH) - ) lmem_lsu_if[`NUM_LSU_BLOCKS](); - - `RESET_RELAY (req_reset, reset); + ) lsu_switch_if[`NUM_LSU_BLOCKS](); for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - + wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.atype[j][`ADDR_TYPE_LOCAL]; end - + wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask); - wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask); + wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask); wire req_global_ready; wire req_local_ready; + `RESET_RELAY (switch_reset, reset); + VX_elastic_buffer #( .DATAW (REQ_DATAW), .SIZE (2), .OUT_REG (1) ) req_global_buf ( .clk (clk), - .reset (req_reset), - .valid_in (lsu_mem_in_if[i].req_valid && is_addr_global), + .reset (switch_reset), + .valid_in (lsu_mem_in_if[i].req_valid && is_addr_global), .data_in ({ lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask, lsu_mem_in_if[i].req_data.rw, @@ -81,7 +81,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( lsu_mem_out_if[i].req_data.atype, lsu_mem_out_if[i].req_data.data, lsu_mem_out_if[i].req_data.tag - }), + }), .ready_out (lsu_mem_out_if[i].req_ready) ); @@ -91,8 +91,8 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .OUT_REG (0) ) req_local_buf ( .clk (clk), - .reset (req_reset), - .valid_in (lsu_mem_in_if[i].req_valid && is_addr_local), + .reset (switch_reset), + .valid_in (lsu_mem_in_if[i].req_valid && is_addr_local), .data_in ({ lsu_mem_in_if[i].req_data.mask & is_addr_local_mask, lsu_mem_in_if[i].req_data.rw, @@ -103,73 +103,47 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( lsu_mem_in_if[i].req_data.tag }), .ready_in (req_local_ready), - .valid_out (lmem_lsu_if[i].req_valid), + .valid_out (lsu_switch_if[i].req_valid), .data_out ({ - lmem_lsu_if[i].req_data.mask, - lmem_lsu_if[i].req_data.rw, - lmem_lsu_if[i].req_data.byteen, - lmem_lsu_if[i].req_data.addr, - lmem_lsu_if[i].req_data.atype, - lmem_lsu_if[i].req_data.data, - lmem_lsu_if[i].req_data.tag - }), - .ready_out (lmem_lsu_if[i].req_ready) + lsu_switch_if[i].req_data.mask, + lsu_switch_if[i].req_data.rw, + lsu_switch_if[i].req_data.byteen, + lsu_switch_if[i].req_data.addr, + lsu_switch_if[i].req_data.atype, + lsu_switch_if[i].req_data.data, + lsu_switch_if[i].req_data.tag + }), + .ready_out (lsu_switch_if[i].req_ready) ); - assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global) + assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global) || (req_local_ready && is_addr_local); - end - `RESET_RELAY (rsp_reset, reset); - - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - - wire rsp_arb_valid; - wire rsp_arb_index; - wire rsp_arb_ready; - - VX_generic_arbiter #( - .NUM_REQS (2), - .LOCK_ENABLE (1), - .TYPE ("R") - ) arbiter ( - .clk (clk), - .reset (rsp_reset), - .requests ({ - lmem_lsu_if[i].rsp_valid, + VX_stream_arb #( + .NUM_INPUTS (2), + .DATAW (RSP_DATAW), + .ARBITER ("R"), + .OUT_BUF (1) + ) rsp_arb ( + .clk (clk), + .reset (switch_reset), + .valid_in ({ + lsu_switch_if[i].rsp_valid, lsu_mem_out_if[i].rsp_valid }), - .grant_valid (rsp_arb_valid), - .grant_index (rsp_arb_index), - `UNUSED_PIN (grant_onehot), - .grant_unlock(rsp_arb_ready) - ); - - VX_elastic_buffer #( - .DATAW (RSP_DATAW), - .SIZE (2), - .OUT_REG (0) - ) rsp_buf ( - .clk (clk), - .reset (rsp_reset), - .valid_in (rsp_arb_valid), - .data_in ({ - rsp_arb_index ? lmem_lsu_if[i].rsp_data.mask : lsu_mem_out_if[i].rsp_data.mask, - rsp_arb_index ? lmem_lsu_if[i].rsp_data.data : lsu_mem_out_if[i].rsp_data.data, - rsp_arb_index ? lmem_lsu_if[i].rsp_data.tag : lsu_mem_out_if[i].rsp_data.tag + .ready_in ({ + lsu_switch_if[i].rsp_ready, + lsu_mem_out_if[i].rsp_ready }), - .ready_in (rsp_arb_ready), + .data_in ({ + lsu_switch_if[i].rsp_data, + lsu_mem_out_if[i].rsp_data + }), + .data_out (lsu_mem_in_if[i].rsp_data), .valid_out (lsu_mem_in_if[i].rsp_valid), - .data_out ({ - lsu_mem_in_if[i].rsp_data.mask, - lsu_mem_in_if[i].rsp_data.data, - lsu_mem_in_if[i].rsp_data.tag - }), - .ready_out (lsu_mem_in_if[i].rsp_ready) + .ready_out (lsu_mem_in_if[i].rsp_ready), + `UNUSED_PIN (sel_out) ); - - assign lsu_mem_out_if[i].rsp_ready = rsp_arb_ready && ~rsp_arb_index; - assign lmem_lsu_if[i].rsp_ready = rsp_arb_ready && rsp_arb_index; end VX_mem_bus_if #( @@ -177,25 +151,25 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (LSU_TAG_WIDTH) ) lmem_bus_if[LSU_NUM_REQS](); - `RESET_RELAY (adapter_reset, reset); - - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin VX_mem_bus_if #( .DATA_SIZE (LSU_WORD_SIZE), .TAG_WIDTH (LSU_TAG_WIDTH) ) lmem_bus_tmp_if[`NUM_LSU_LANES](); + `RESET_RELAY (adapter_reset, reset); + VX_lsu_adapter #( .NUM_LANES (`NUM_LSU_LANES), - .DATA_SIZE (LSU_WORD_SIZE), + .DATA_SIZE (LSU_WORD_SIZE), .TAG_WIDTH (LSU_TAG_WIDTH), .TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH), - .REQ_OUT_BUF (2), - .RSP_OUT_BUF (1) + .REQ_OUT_BUF (3), + .RSP_OUT_BUF (0) ) lsu_adapter ( .clk (clk), .reset (adapter_reset), - .lsu_mem_if (lmem_lsu_if[i]), + .lsu_mem_if (lsu_switch_if[i]), .mem_bus_if (lmem_bus_tmp_if) ); @@ -205,17 +179,18 @@ module VX_lmem_unit import VX_gpu_pkg::*; #( end `RESET_RELAY (lmem_reset, reset); - + VX_local_mem #( - .INSTANCE_ID($sformatf("core%0d-lmem", CORE_ID)), + .INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)), .SIZE (1 << `LMEM_LOG_SIZE), .NUM_REQS (LSU_NUM_REQS), .NUM_BANKS (`LMEM_NUM_BANKS), .WORD_SIZE (LSU_WORD_SIZE), .ADDR_WIDTH (LMEM_ADDR_WIDTH), - .UUID_WIDTH (`UUID_WIDTH), - .TAG_WIDTH (LSU_TAG_WIDTH) - ) local_mem ( + .UUID_WIDTH (`UUID_WIDTH), + .TAG_WIDTH (LSU_TAG_WIDTH), + .OUT_BUF (3) + ) local_mem ( .clk (clk), .reset (lmem_reset), `ifdef PERF_ENABLE diff --git a/hw/rtl/core/VX_lsu_adapter.sv b/hw/rtl/core/VX_lsu_adapter.sv index a981b27b0..21d43d280 100644 --- a/hw/rtl/core/VX_lsu_adapter.sv +++ b/hw/rtl/core/VX_lsu_adapter.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,10 +14,10 @@ `include "VX_define.vh" module VX_lsu_adapter import VX_gpu_pkg::*; #( - parameter NUM_LANES = 1, - parameter DATA_SIZE = 1, - parameter TAG_WIDTH = 1, - parameter TAG_SEL_BITS = 0, + parameter NUM_LANES = 1, + parameter DATA_SIZE = 1, + parameter TAG_WIDTH = 1, + parameter TAG_SEL_BITS = 0, parameter `STRING ARBITER = "P", parameter REQ_OUT_BUF = 0, parameter RSP_OUT_BUF = 0 @@ -63,12 +63,12 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( assign mem_bus_if[i].req_data.tag = req_tag_out[i]; assign req_ready_out[i] = mem_bus_if[i].req_ready; end - + VX_stream_unpack #( - .NUM_REQS (NUM_LANES), - .DATA_WIDTH (REQ_DATA_WIDTH), - .TAG_WIDTH (TAG_WIDTH), - .OUT_BUF (REQ_OUT_BUF) + .NUM_REQS (NUM_LANES), + .DATA_WIDTH (REQ_DATA_WIDTH), + .TAG_WIDTH (TAG_WIDTH), + .OUT_BUF (REQ_OUT_BUF) ) stream_unpack ( .clk (clk), .reset (reset), @@ -77,7 +77,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( .data_in (req_data_in), .tag_in (lsu_mem_if.req_data.tag), .ready_in (lsu_mem_if.req_ready), - .valid_out (req_valid_out), + .valid_out (req_valid_out), .data_out (req_data_out), .tag_out (req_tag_out), .ready_out (req_ready_out) diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 2425cdc28..120dc9f8e 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -13,9 +13,8 @@ `include "VX_define.vh" -module VX_lsu_slice import VX_gpu_pkg::*; #( - parameter CORE_ID = 0, - parameter BLOCK_ID = 0 +module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( + parameter `STRING INSTANCE_ID = "" ) ( `SCOPE_IO_DECL @@ -88,7 +87,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( wire [NUM_LANES-1:0] mem_req_mask; wire mem_req_rw; wire [NUM_LANES-1:0][LSU_ADDR_WIDTH-1:0] mem_req_addr; - reg [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] mem_req_byteen; + wire [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] mem_req_byteen; reg [NUM_LANES-1:0][LSU_WORD_SIZE*8-1:0] mem_req_data; wire [TAG_WIDTH-1:0] mem_req_tag; wire mem_req_ready; @@ -159,27 +158,30 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( // byte enable formatting for (genvar i = 0; i < NUM_LANES; ++i) begin + reg [LSU_WORD_SIZE-1:0] mem_req_byteen_r; always @(*) begin - mem_req_byteen[i] = '0; + mem_req_byteen_r = '0; case (`INST_LSU_WSIZE(execute_if.data.op_type)) 0: begin // 8-bit - mem_req_byteen[i][req_align[i]] = 1'b1; + mem_req_byteen_r[req_align[i]] = 1'b1; end 1: begin // 16 bit - mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1; - mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1; + mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1; + mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1; end `ifdef XLEN_64 2: begin // 32 bit - mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1; - mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1; - mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1; - mem_req_byteen[i][{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1; + mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1; + mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1; + mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1; + mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1; end `endif - default : mem_req_byteen[i] = {LSU_WORD_SIZE{1'b1}}; + // 3: 64 bit + default : mem_req_byteen_r = {LSU_WORD_SIZE{1'b1}}; endcase end + assign mem_req_byteen[i] = mem_req_byteen_r; end // memory misalignment not supported! @@ -312,7 +314,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `RESET_RELAY (mem_scheduler_reset, reset); VX_mem_scheduler #( - .INSTANCE_ID ($sformatf("core%0d-lsu-memsched%0d", CORE_ID, BLOCK_ID)), + .INSTANCE_ID ($sformatf("%s-scheduler", INSTANCE_ID)), .CORE_REQS (NUM_LANES), .MEM_CHANNELS(NUM_LANES), .WORD_SIZE (LSU_WORD_SIZE), @@ -504,11 +506,11 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_MEM always @(posedge clk) begin if (execute_if.valid && fence_lock) begin - `TRACE(1, ("%d: *** D$%0d fence wait\n", $time, CORE_ID)); + `TRACE(1, ("%d: *** %s fence wait\n", $time, INSTANCE_ID)); end if (mem_req_fire) begin if (mem_req_rw) begin - `TRACE(1, ("%d: D$%0d Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); + `TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES); `TRACE(1, (", atype=")); `TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES); @@ -516,7 +518,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES); `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if.data.uuid)); end else begin - `TRACE(1, ("%d: D$%0d Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); + `TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES); `TRACE(1, (", atype=")); `TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES); @@ -524,8 +526,8 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( end end if (mem_rsp_fire) begin - `TRACE(1, ("%d: D$%0d Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=", - $time, CORE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop)); + `TRACE(1, ("%d: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=", + $time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop)); `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES); `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid)); end @@ -533,36 +535,20 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `endif `ifdef DBG_SCOPE_LSU - if (CORE_ID == 0 && BLOCK_ID == 0) begin - `ifdef SCOPE - VX_scope_tap #( - .SCOPE_ID (3), - .TRIGGERW (3), - .PROBEW (`UUID_WIDTH+NUM_LANES*(`XLEN+4+`XLEN)+1+`UUID_WIDTH+NUM_LANES*`XLEN) - ) scope_tap ( - .clk(clk), - .reset(scope_reset), - .start(1'b0), - .stop(1'b0), - .triggers({reset, mem_req_fire, mem_rsp_fire}), - .probes({execute_if.data.uuid, full_addr, mem_req_rw, mem_req_byteen, mem_req_data, rsp_uuid, rsp_data}), - .bus_in(scope_bus_in), - .bus_out(scope_bus_out) - ); - `endif - `ifdef CHIPSCOPE - wire [31:0] full_addr_0 = full_addr[0]; - wire [31:0] mem_req_data_0 = mem_req_data[0]; - wire [31:0] rsp_data_0 = rsp_data[0]; - ila_lsu ila_lsu_inst ( - .clk (clk), - .probe0 ({mem_req_data_0, execute_if.data.uuid, execute_if.data.wid, execute_if.data.PC, mem_req_mask, full_addr_0, mem_req_byteen, mem_req_rw, mem_req_ready, mem_req_valid}), - .probe1 ({rsp_data_0, rsp_uuid, mem_rsp_eop, rsp_pc, rsp_rd, mem_rsp_mask, rsp_wid, mem_rsp_ready, mem_rsp_valid}), - .probe2 ({lsu_mem_if.req_data.data, lsu_mem_if.req_data.tag, lsu_mem_if.req_data.byteen, lsu_mem_if.req_data.addr, lsu_mem_if.req_data.rw, lsu_mem_if.req_ready, lsu_mem_if.req_valid}), - .probe3 ({lsu_mem_if.rsp_data.data, lsu_mem_if.rsp_data.tag, lsu_mem_if.rsp_ready, lsu_mem_if.rsp_valid}) - ); - `endif - end + VX_scope_tap #( + .SCOPE_ID (3), + .TRIGGERW (3), + .PROBEW (1 + NUM_LANES*(`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE*8) + `UUID_WIDTH + NUM_LANES*LSU_WORD_SIZE*8 + `UUID_WIDTH) + ) scope_tap ( + .clk (clk), + .reset (scope_reset), + .start (1'b0), + .stop (1'b0), + .triggers({reset, mem_req_fire, mem_rsp_fire}), + .probes ({mem_req_rw, full_addr, mem_req_byteen, mem_req_data, execute_if.data.uuid, rsp_data, rsp_uuid}), + .bus_in (scope_bus_in), + .bus_out(scope_bus_out) + ); `else `SCOPE_IO_UNUSED() `endif diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index e7dbe602b..d40f5fcfb 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,8 +14,8 @@ `include "VX_define.vh" module VX_lsu_unit import VX_gpu_pkg::*; #( - parameter CORE_ID = 0 -) ( + parameter `STRING INSTANCE_ID = "" +) ( `SCOPE_IO_DECL input wire clk, @@ -24,7 +24,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( // Inputs VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH], - // Outputs + // Outputs VX_commit_if.master commit_if [`ISSUE_WIDTH], VX_lsu_mem_if.master lsu_mem_if [`NUM_LSU_BLOCKS] ); @@ -32,10 +32,9 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( localparam NUM_LANES = `NUM_LSU_LANES; `ifdef SCOPE - localparam scope_lsu = 0; `SCOPE_IO_SWITCH (BLOCK_SIZE); `endif - + VX_execute_if #( .NUM_LANES (NUM_LANES) ) per_block_execute_if[BLOCK_SIZE](); @@ -55,17 +54,16 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsu_slices - `RESET_RELAY (block_reset, reset); + `RESET_RELAY (slice_reset, reset); VX_lsu_slice #( - .CORE_ID (CORE_ID), - .BLOCK_ID (block_idx) + .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx)) ) lsu_slice( - `SCOPE_IO_BIND (scope_lsu+block_idx) + `SCOPE_IO_BIND (block_idx) .clk (clk), - .reset (block_reset), + .reset (slice_reset), .execute_if (per_block_execute_if[block_idx]), .commit_if (per_block_commit_if[block_idx]), .lsu_mem_if (lsu_mem_if[block_idx]) @@ -82,5 +80,5 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( .commit_in_if (per_block_commit_if), .commit_out_if (commit_if) ); - + endmodule diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 811237195..17d8a9d0c 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,29 +14,288 @@ `include "VX_define.vh" module VX_operands import VX_gpu_pkg::*; #( - parameter CORE_ID = 0 + parameter `STRING INSTANCE_ID = "", + parameter NUM_BANKS = 4, + parameter OUT_BUF = 4 // using 2-cycle EB for area reduction ) ( input wire clk, input wire reset, - VX_writeback_if.slave writeback_if [`ISSUE_WIDTH], - VX_scoreboard_if.slave scoreboard_if [`ISSUE_WIDTH], - VX_operands_if.master operands_if [`ISSUE_WIDTH] +`ifdef PERF_ENABLE + output wire [`PERF_CTR_BITS-1:0] perf_stalls, +`endif + + VX_writeback_if.slave writeback_if, + VX_scoreboard_if.slave scoreboard_if, + VX_operands_if.master operands_if ); + `UNUSED_SPARAM (INSTANCE_ID) + localparam NUM_SRC_REGS = 3; + localparam REQ_SEL_BITS = `CLOG2(NUM_SRC_REGS); + localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS); + localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); + localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); + localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS; + localparam METADATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS; + localparam DATAW = `UUID_WIDTH + METADATAW + 3 * `NUM_THREADS * `XLEN; + localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS); + localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS; + localparam XLEN_SIZE = `XLEN / 8; + localparam BYTEENW = `NUM_THREADS * XLEN_SIZE; - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin - - `RESET_RELAY (slice_reset, reset); + `UNUSED_VAR (writeback_if.data.sop) - VX_gpr_slice #( - .CORE_ID (CORE_ID) - ) gpr_slice ( - .clk (clk), - .reset (slice_reset), - .writeback_if (writeback_if[i]), - .scoreboard_if(scoreboard_if[i]), - .operands_if (operands_if[i]) + wire [NUM_SRC_REGS-1:0] src_valid; + wire [NUM_SRC_REGS-1:0] req_in_valid; + wire [NUM_SRC_REGS-1:0] req_in_ready; + wire [NUM_SRC_REGS-1:0][PER_BANK_ADDRW-1:0] req_in_data; + wire [NUM_SRC_REGS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx; + + wire [NUM_BANKS-1:0] gpr_rd_valid_n, gpr_rd_ready; + reg [NUM_BANKS-1:0] gpr_rd_valid; + wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr_n; + reg [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr; + wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data; + wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx_n; + reg [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx; + + wire pipe_in_ready; + reg pipe_out_valid; + wire pipe_out_ready; + reg [`UUID_WIDTH-1:0] pipe_out_uuid; + reg [METADATAW-1:0] pipe_out_data; + + reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data, src_data_n; + reg [NUM_SRC_REGS-1:0] data_fetched; + reg has_collision, has_collision_n; + + wire stg_in_valid, stg_in_ready; + + wire [NUM_SRC_REGS-1:0][`NR_BITS-1:0] src_regs = {scoreboard_if.data.rs3, + scoreboard_if.data.rs2, + scoreboard_if.data.rs1}; + + for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin + if (ISSUE_WIS != 0) begin + assign req_in_data[i] = {src_regs[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis}; + end else begin + assign req_in_data[i] = src_regs[i][`NR_BITS-1:BANK_SEL_BITS]; + end + if (NUM_BANKS != 1) begin + assign req_bank_idx[i] = src_regs[i][BANK_SEL_BITS-1:0]; + end else begin + assign req_bank_idx[i] = '0; + end + end + + for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin + assign src_valid[i] = (src_regs[i] != 0) && ~data_fetched[i]; + end + + assign req_in_valid = {NUM_SRC_REGS{scoreboard_if.valid}} & src_valid; + + VX_stream_xbar #( + .NUM_INPUTS (NUM_SRC_REGS), + .NUM_OUTPUTS (NUM_BANKS), + .DATAW (PER_BANK_ADDRW), + .ARBITER ("P"), // use priority arbiter + .PERF_CTR_BITS(`PERF_CTR_BITS), + .OUT_BUF (0) // no output buffering + ) req_xbar ( + .clk (clk), + .reset (reset), + `UNUSED_PIN(collisions), + .valid_in (req_in_valid), + .data_in (req_in_data), + .sel_in (req_bank_idx), + .ready_in (req_in_ready), + .valid_out (gpr_rd_valid_n), + .data_out (gpr_rd_addr_n), + .sel_out (gpr_rd_req_idx_n), + .ready_out (gpr_rd_ready) + ); + + assign gpr_rd_ready = {NUM_BANKS{stg_in_ready}}; + + always @(*) begin + has_collision_n = 0; + for (integer i = 0; i < NUM_SRC_REGS; ++i) begin + for (integer j = 1; j < (NUM_SRC_REGS-i); ++j) begin + has_collision_n |= src_valid[i] + && src_valid[j+i] + && (req_bank_idx[i] == req_bank_idx[j+i]); + end + end + end + + always @(*) begin + src_data_n = src_data; + for (integer b = 0; b < NUM_BANKS; ++b) begin + if (gpr_rd_valid[b]) begin + src_data_n[gpr_rd_req_idx[b]] = gpr_rd_data[b]; + end + end + end + + wire pipe_stall = pipe_out_valid && ~pipe_out_ready; + assign pipe_in_ready = ~pipe_stall; + + assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n; + + wire stg_in_fire = stg_in_valid && stg_in_ready; + + always @(posedge clk) begin + if (reset) begin + pipe_out_valid <= 0; + gpr_rd_valid <= '0; + data_fetched <= '0; + src_data <= '0; + end else begin + if (~pipe_stall) begin + pipe_out_valid <= scoreboard_if.valid; + gpr_rd_valid <= gpr_rd_valid_n; + if (scoreboard_if.ready) begin + data_fetched <= '0; + end else begin + data_fetched <= data_fetched | req_in_ready; + end + if (stg_in_fire) begin + src_data <= '0; + end else begin + src_data <= src_data_n; + end + end + end + if (~pipe_stall) begin + pipe_out_uuid <= scoreboard_if.data.uuid; + pipe_out_data <= { + scoreboard_if.data.wis, + scoreboard_if.data.tmask, + scoreboard_if.data.PC, + scoreboard_if.data.wb, + scoreboard_if.data.ex_type, + scoreboard_if.data.op_type, + scoreboard_if.data.op_args, + scoreboard_if.data.rd + }; + has_collision <= has_collision_n; + gpr_rd_addr <= gpr_rd_addr_n; + gpr_rd_req_idx <= gpr_rd_req_idx_n; + end + end + + assign pipe_out_ready = stg_in_ready; + assign stg_in_valid = pipe_out_valid && ~has_collision; + + VX_elastic_buffer #( + .DATAW (DATAW), + .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), + .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), + .LUTRAM (1) + ) out_buffer ( + .clk (clk), + .reset (reset), + .valid_in (stg_in_valid), + .ready_in (stg_in_ready), + .data_in ({ + pipe_out_uuid, + pipe_out_data, + src_data_n[0], + src_data_n[1], + src_data_n[2] + }), + .data_out ({ + operands_if.data.uuid, + operands_if.data.wis, + operands_if.data.tmask, + operands_if.data.PC, + operands_if.data.wb, + operands_if.data.ex_type, + operands_if.data.op_type, + operands_if.data.op_args, + operands_if.data.rd, + operands_if.data.rs1_data, + operands_if.data.rs2_data, + operands_if.data.rs3_data + }), + .valid_out (operands_if.valid), + .ready_out (operands_if.ready) + ); + + wire [PER_BANK_ADDRW-1:0] gpr_wr_addr; + if (ISSUE_WIS != 0) begin + assign gpr_wr_addr = {writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS], writeback_if.data.wis}; + end else begin + assign gpr_wr_addr = writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS]; + end + + wire [BANK_SEL_WIDTH-1:0] gpr_wr_bank_idx; + if (NUM_BANKS != 1) begin + assign gpr_wr_bank_idx = writeback_if.data.rd[BANK_SEL_BITS-1:0]; + end else begin + assign gpr_wr_bank_idx = '0; + end + + `ifdef GPR_RESET + reg wr_enabled = 0; + always @(posedge clk) begin + if (reset) begin + wr_enabled <= 1; + end + end + `else + wire wr_enabled = 1; + `endif + + for (genvar b = 0; b < NUM_BANKS; ++b) begin + wire gpr_wr_enabled; + if (BANK_SEL_BITS != 0) begin + assign gpr_wr_enabled = wr_enabled + && writeback_if.valid + && (gpr_wr_bank_idx == BANK_SEL_BITS'(b)); + end else begin + assign gpr_wr_enabled = wr_enabled && writeback_if.valid; + end + + wire [BYTEENW-1:0] wren; + for (genvar i = 0; i < `NUM_THREADS; ++i) begin + assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}}; + end + + `ifdef GPR_RESET + VX_dp_ram_rst #( + `else + VX_dp_ram #( + `endif + .DATAW (`XLEN * `NUM_THREADS), + .SIZE (PER_BANK_REGS * PER_ISSUE_WARPS), + .WRENW (BYTEENW), + .NO_RWCHECK (1) + ) gpr_ram ( + .clk (clk), + `ifdef GPR_RESET + .reset (reset), + `endif + .read (1'b1), + .wren (wren), + .write (gpr_wr_enabled), + .waddr (gpr_wr_addr), + .wdata (writeback_if.data.data), + .raddr (gpr_rd_addr[b]), + .rdata (gpr_rd_data[b]) ); end +`ifdef PERF_ENABLE + reg [`PERF_CTR_BITS-1:0] collisions_r; + always @(posedge clk) begin + if (reset) begin + collisions_r <= '0; + end else begin + collisions_r <= collisions_r + `PERF_CTR_BITS'(scoreboard_if.valid && pipe_in_ready && has_collision_n); + end + end + assign perf_stalls = collisions_r; +`endif + endmodule diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index b76610a80..6bc748745 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -14,13 +14,14 @@ `include "VX_define.vh" module VX_schedule import VX_gpu_pkg::*; #( + parameter `STRING INSTANCE_ID = "", parameter CORE_ID = 0 ) ( input wire clk, input wire reset, `ifdef PERF_ENABLE - VX_pipeline_perf_if.schedule perf_schedule_if, + output sched_perf_t sched_perf, `endif // configuration @@ -42,6 +43,7 @@ module VX_schedule import VX_gpu_pkg::*; #( // status output wire busy ); + `UNUSED_SPARAM (INSTANCE_ID) `UNUSED_PARAM (CORE_ID) reg [`NUM_WARPS-1:0] active_warps, active_warps_n; // updated when a warp is activated or disabled @@ -290,7 +292,7 @@ module VX_schedule import VX_gpu_pkg::*; #( `RESET_RELAY (split_join_reset, reset); VX_split_join #( - .CORE_ID (CORE_ID) + .INSTANCE_ID ($sformatf("%s-splitjoin", INSTANCE_ID)) ) split_join ( .clk (clk), .reset (split_join_reset), @@ -368,24 +370,42 @@ module VX_schedule import VX_gpu_pkg::*; #( assign schedule_if.data.uuid = instr_uuid; - `RESET_RELAY (pending_instr_reset, reset); + // Track pending instructions per warp - wire no_pending_instr; - VX_pending_instr #( - .CTR_WIDTH (12), - .DECR_COUNT (`ISSUE_WIDTH), - .ALM_EMPTY (1) - ) pending_instr( - .clk (clk), - .reset (pending_instr_reset), - .incr (schedule_if_fire), - .incr_wid (schedule_if.data.wid), - .decr (commit_sched_if.committed), - .decr_wid (commit_sched_if.committed_wid), - .alm_empty_wid (sched_csr_if.alm_empty_wid), - .alm_empty (sched_csr_if.alm_empty), - .empty (no_pending_instr) - ); + reg [`NUM_WARPS-1:0] per_warp_incr; + always @(*) begin + per_warp_incr = 0; + if (schedule_if_fire) begin + per_warp_incr[schedule_if.data.wid] = 1; + end + end + + wire [`NUM_WARPS-1:0] pending_warp_empty; + wire [`NUM_WARPS-1:0] pending_warp_alm_empty; + + for (genvar i = 0; i < `NUM_WARPS; ++i) begin + + `RESET_RELAY (pending_instr_reset, reset); + + VX_pending_size #( + .SIZE (4096), + .ALM_EMPTY (1) + ) counter ( + .clk (clk), + .reset (pending_instr_reset), + .incr (per_warp_incr[i]), + .decr (commit_sched_if.committed_warps[i]), + .empty (pending_warp_empty[i]), + .alm_empty (pending_warp_alm_empty[i]), + `UNUSED_PIN (full), + `UNUSED_PIN (alm_full), + `UNUSED_PIN (size) + ); + end + + assign sched_csr_if.alm_empty = pending_warp_alm_empty[sched_csr_if.alm_empty_wid]; + + wire no_pending_instr = (& pending_warp_empty); `BUFFER_EX(busy, (active_warps != 0 || ~no_pending_instr), 1'b1, 1); @@ -412,7 +432,7 @@ module VX_schedule import VX_gpu_pkg::*; #( end end end - `RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** core%0d-scheduler-timeout: stalled_warps=%b", $time, CORE_ID, stalled_warps)) + `RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** %s timeout: stalled_warps=%b", $time, INSTANCE_ID, stalled_warps)) `ifdef PERF_ENABLE reg [`PERF_CTR_BITS-1:0] perf_sched_idles; @@ -431,8 +451,8 @@ module VX_schedule import VX_gpu_pkg::*; #( end end - assign perf_schedule_if.sched_idles = perf_sched_idles; - assign perf_schedule_if.sched_stalls = perf_sched_stalls; + assign sched_perf.idles = perf_sched_idles; + assign sched_perf.stalls = perf_sched_stalls; `endif endmodule diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index 770ad7c1c..9b3a146c6 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -14,39 +14,37 @@ `include "VX_define.vh" module VX_scoreboard import VX_gpu_pkg::*; #( - parameter CORE_ID = 0 + parameter `STRING INSTANCE_ID = "" ) ( input wire clk, input wire reset, `ifdef PERF_ENABLE - output reg [`PERF_CTR_BITS-1:0] perf_scb_stalls, - output reg [`PERF_CTR_BITS-1:0] perf_units_uses [`NUM_EX_UNITS], - output reg [`PERF_CTR_BITS-1:0] perf_sfu_uses [`NUM_SFU_UNITS], + output reg [`PERF_CTR_BITS-1:0] perf_stalls, + output reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_units_uses, + output reg [`NUM_SFU_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_sfu_uses, `endif - VX_writeback_if.slave writeback_if [`ISSUE_WIDTH], - VX_ibuffer_if.slave ibuffer_if [`NUM_WARPS], - VX_scoreboard_if.master scoreboard_if [`ISSUE_WIDTH] + VX_writeback_if.slave writeback_if, + VX_ibuffer_if.slave ibuffer_if [PER_ISSUE_WARPS], + VX_scoreboard_if.master scoreboard_if ); - `UNUSED_PARAM (CORE_ID) + `UNUSED_SPARAM (INSTANCE_ID) localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4) + 1; + VX_ibuffer_if staging_if [PER_ISSUE_WARPS](); + reg [PER_ISSUE_WARPS-1:0] operands_ready; + `ifdef PERF_ENABLE - reg [`NUM_WARPS-1:0][`NUM_EX_UNITS-1:0] perf_inuse_units_per_cycle; + reg [PER_ISSUE_WARPS-1:0][`NUM_EX_UNITS-1:0] perf_inuse_units_per_cycle; wire [`NUM_EX_UNITS-1:0] perf_units_per_cycle, perf_units_per_cycle_r; - reg [`NUM_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_inuse_sfu_per_cycle; + reg [PER_ISSUE_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_inuse_sfu_per_cycle; wire [`NUM_SFU_UNITS-1:0] perf_sfu_per_cycle, perf_sfu_per_cycle_r; - wire [`NUM_WARPS-1:0] perf_issue_stalls_per_cycle; - wire [`CLOG2(`NUM_WARPS+1)-1:0] perf_stalls_per_cycle, perf_stalls_per_cycle_r; - - `POP_COUNT(perf_stalls_per_cycle, perf_issue_stalls_per_cycle); - VX_reduce #( .DATAW_IN (`NUM_EX_UNITS), - .N (`NUM_WARPS), + .N (PER_ISSUE_WARPS), .OP ("|") ) perf_units_reduce ( .data_in (perf_inuse_units_per_cycle), @@ -55,22 +53,28 @@ module VX_scoreboard import VX_gpu_pkg::*; #( VX_reduce #( .DATAW_IN (`NUM_SFU_UNITS), - .N (`NUM_WARPS), + .N (PER_ISSUE_WARPS), .OP ("|") ) perf_sfu_reduce ( .data_in (perf_inuse_sfu_per_cycle), .data_out (perf_sfu_per_cycle) ); - `BUFFER(perf_stalls_per_cycle_r, perf_stalls_per_cycle); - `BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, `CDIV(`NUM_WARPS, `MAX_FANOUT)); - `BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(`NUM_WARPS, `MAX_FANOUT)); + `BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT)); + `BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT)); + + wire [PER_ISSUE_WARPS-1:0] stg_valid_in; + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin + assign stg_valid_in[w] = staging_if[w].valid; + end + + wire perf_stall_per_cycle = (|stg_valid_in) && ~(|(stg_valid_in & operands_ready)); always @(posedge clk) begin if (reset) begin - perf_scb_stalls <= '0; + perf_stalls <= '0; end else begin - perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'(perf_stalls_per_cycle_r); + perf_stalls <= perf_stalls + `PERF_CTR_BITS'(perf_stall_per_cycle); end end @@ -95,138 +99,121 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end `endif - VX_ibuffer_if staging_if [`NUM_WARPS](); - wire [`NUM_WARPS-1:0][3:0] staging_opds_busy; - - for (genvar i = 0; i < `NUM_WARPS; ++i) begin + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin VX_elastic_buffer #( .DATAW (DATAW), .SIZE (1) ) stanging_buf ( .clk (clk), .reset (reset), - .valid_in (ibuffer_if[i].valid), - .data_in (ibuffer_if[i].data), - .ready_in (ibuffer_if[i].ready), - .valid_out(staging_if[i].valid), - .data_out (staging_if[i].data), - .ready_out(staging_if[i].ready) + .valid_in (ibuffer_if[w].valid), + .data_in (ibuffer_if[w].data), + .ready_in (ibuffer_if[w].ready), + .valid_out(staging_if[w].valid), + .data_out (staging_if[w].data), + .ready_out(staging_if[w].ready) ); end - for (genvar i = 0; i < `NUM_WARPS; ++i) begin + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin reg [`NUM_REGS-1:0] inuse_regs; - reg [3:0] operands_busy_r, operands_busy_n; + reg [3:0] operands_busy, operands_busy_n; - localparam iw = i % `ISSUE_WIDTH; - localparam wis = i / `ISSUE_WIDTH; + wire ibuffer_fire = ibuffer_if[w].valid && ibuffer_if[w].ready; - wire ibuffer_fire = ibuffer_if[i].valid && ibuffer_if[i].ready; + wire staging_fire = staging_if[w].valid && staging_if[w].ready; - wire staging_fire = staging_if[i].valid && staging_if[i].ready; - - wire writeback_fire = writeback_if[iw].valid - && (writeback_if[iw].data.wis == ISSUE_WIS_W'(wis)) - && writeback_if[iw].data.eop; + wire writeback_fire = writeback_if.valid + && (writeback_if.data.wis == ISSUE_WIS_W'(w)) + && writeback_if.data.eop; `ifdef PERF_ENABLE reg [`NUM_REGS-1:0][`EX_WIDTH-1:0] inuse_units; reg [`NUM_REGS-1:0][`SFU_WIDTH-1:0] inuse_sfu; - reg [`SFU_WIDTH-1:0] sfu_type; always @(*) begin - case (staging_if[i].data.op_type) - `INST_SFU_CSRRW, - `INST_SFU_CSRRS, - `INST_SFU_CSRRC: sfu_type = `SFU_CSRS; - default: sfu_type = `SFU_WCTL; - endcase - end - - always @(*) begin - perf_inuse_units_per_cycle[i] = '0; - perf_inuse_sfu_per_cycle[i] = '0; - if (staging_if[i].valid) begin - if (operands_busy_r[0]) begin - perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rd]] = 1; - if (inuse_units[staging_if[i].data.rd] == `EX_SFU) begin - perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rd]] = 1; + perf_inuse_units_per_cycle[w] = '0; + perf_inuse_sfu_per_cycle[w] = '0; + if (staging_if[w].valid) begin + if (operands_busy[0]) begin + perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rd]] = 1; + if (inuse_units[staging_if[w].data.rd] == `EX_SFU) begin + perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rd]] = 1; end end - if (operands_busy_r[1]) begin - perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rs1]] = 1; - if (inuse_units[staging_if[i].data.rs1] == `EX_SFU) begin - perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rs1]] = 1; + if (operands_busy[1]) begin + perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs1]] = 1; + if (inuse_units[staging_if[w].data.rs1] == `EX_SFU) begin + perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs1]] = 1; end end - if (operands_busy_r[2]) begin - perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rs2]] = 1; - if (inuse_units[staging_if[i].data.rs2] == `EX_SFU) begin - perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rs2]] = 1; + if (operands_busy[2]) begin + perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs2]] = 1; + if (inuse_units[staging_if[w].data.rs2] == `EX_SFU) begin + perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs2]] = 1; end end - if (operands_busy_r[3]) begin - perf_inuse_units_per_cycle[i][inuse_units[staging_if[i].data.rs3]] = 1; - if (inuse_units[staging_if[i].data.rs3] == `EX_SFU) begin - perf_inuse_sfu_per_cycle[i][inuse_sfu[staging_if[i].data.rs3]] = 1; + if (operands_busy[3]) begin + perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs3]] = 1; + if (inuse_units[staging_if[w].data.rs3] == `EX_SFU) begin + perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs3]] = 1; end end end end - assign perf_issue_stalls_per_cycle[i] = staging_if[i].valid && ~staging_if[i].ready; `endif always @(*) begin - operands_busy_n = operands_busy_r; + operands_busy_n = operands_busy; if (ibuffer_fire) begin operands_busy_n = { - inuse_regs[ibuffer_if[i].data.rs3], - inuse_regs[ibuffer_if[i].data.rs2], - inuse_regs[ibuffer_if[i].data.rs1], - inuse_regs[ibuffer_if[i].data.rd] + inuse_regs[ibuffer_if[w].data.rs3], + inuse_regs[ibuffer_if[w].data.rs2], + inuse_regs[ibuffer_if[w].data.rs1], + inuse_regs[ibuffer_if[w].data.rd] }; end if (writeback_fire) begin if (ibuffer_fire) begin - if (writeback_if[iw].data.rd == ibuffer_if[i].data.rd) begin + if (writeback_if.data.rd == ibuffer_if[w].data.rd) begin operands_busy_n[0] = 0; end - if (writeback_if[iw].data.rd == ibuffer_if[i].data.rs1) begin + if (writeback_if.data.rd == ibuffer_if[w].data.rs1) begin operands_busy_n[1] = 0; end - if (writeback_if[iw].data.rd == ibuffer_if[i].data.rs2) begin + if (writeback_if.data.rd == ibuffer_if[w].data.rs2) begin operands_busy_n[2] = 0; end - if (writeback_if[iw].data.rd == ibuffer_if[i].data.rs3) begin + if (writeback_if.data.rd == ibuffer_if[w].data.rs3) begin operands_busy_n[3] = 0; end end else begin - if (writeback_if[iw].data.rd == staging_if[i].data.rd) begin + if (writeback_if.data.rd == staging_if[w].data.rd) begin operands_busy_n[0] = 0; end - if (writeback_if[iw].data.rd == staging_if[i].data.rs1) begin + if (writeback_if.data.rd == staging_if[w].data.rs1) begin operands_busy_n[1] = 0; end - if (writeback_if[iw].data.rd == staging_if[i].data.rs2) begin + if (writeback_if.data.rd == staging_if[w].data.rs2) begin operands_busy_n[2] = 0; end - if (writeback_if[iw].data.rd == staging_if[i].data.rs3) begin + if (writeback_if.data.rd == staging_if[w].data.rs3) begin operands_busy_n[3] = 0; end end end - if (staging_fire && staging_if[i].data.wb) begin - if (staging_if[i].data.rd == ibuffer_if[i].data.rd) begin + if (staging_fire && staging_if[w].data.wb) begin + if (staging_if[w].data.rd == ibuffer_if[w].data.rd) begin operands_busy_n[0] = 1; end - if (staging_if[i].data.rd == ibuffer_if[i].data.rs1) begin + if (staging_if[w].data.rd == ibuffer_if[w].data.rs1) begin operands_busy_n[1] = 1; end - if (staging_if[i].data.rd == ibuffer_if[i].data.rs2) begin + if (staging_if[w].data.rd == ibuffer_if[w].data.rs2) begin operands_busy_n[2] = 1; end - if (staging_if[i].data.rd == ibuffer_if[i].data.rs3) begin + if (staging_if[w].data.rd == ibuffer_if[w].data.rs3) begin operands_busy_n[3] = 1; end end @@ -237,25 +224,24 @@ module VX_scoreboard import VX_gpu_pkg::*; #( inuse_regs <= '0; end else begin if (writeback_fire) begin - inuse_regs[writeback_if[iw].data.rd] <= 0; + inuse_regs[writeback_if.data.rd] <= 0; end - if (staging_fire && staging_if[i].data.wb) begin - inuse_regs[staging_if[i].data.rd] <= 1; + if (staging_fire && staging_if[w].data.wb) begin + inuse_regs[staging_if[w].data.rd] <= 1; end end - operands_busy_r <= operands_busy_n; + operands_busy <= operands_busy_n; + operands_ready[w] <= ~(| operands_busy_n); `ifdef PERF_ENABLE - if (staging_fire && staging_if[i].data.wb) begin - inuse_units[staging_if[i].data.rd] <= staging_if[i].data.ex_type; - if (staging_if[i].data.ex_type == `EX_SFU) begin - inuse_sfu[staging_if[i].data.rd] <= sfu_type; + if (staging_fire && staging_if[w].data.wb) begin + inuse_units[staging_if[w].data.rd] <= staging_if[w].data.ex_type; + if (staging_if[w].data.ex_type == `EX_SFU) begin + inuse_sfu[staging_if[w].data.rd] <= op_to_sfu_type(staging_if[w].data.op_type); end end `endif end - assign staging_opds_busy[i] = operands_busy_r; - `ifdef SIMULATION reg [31:0] timeout_ctr; @@ -263,11 +249,11 @@ module VX_scoreboard import VX_gpu_pkg::*; #( if (reset) begin timeout_ctr <= '0; end else begin - if (staging_if[i].valid && ~staging_if[i].ready) begin + if (staging_if[w].valid && ~staging_if[w].ready) begin `ifdef DBG_TRACE_PIPELINE - `TRACE(3, ("%d: *** core%0d-scoreboard-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n", - $time, CORE_ID, i, {staging_if[i].data.PC, 1'b0}, staging_if[i].data.tmask, timeout_ctr, - operands_busy_r, staging_if[i].data.uuid)); + `TRACE(3, ("%d: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n", + $time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr, + operands_busy, staging_if[w].data.uuid)); `endif timeout_ctr <= timeout_ctr + 1; end else if (ibuffer_fire) begin @@ -277,59 +263,57 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end `RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT), - ("%t: *** core%0d-scoreboard-timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)", - $time, CORE_ID, i, {staging_if[i].data.PC, 1'b0}, staging_if[i].data.tmask, timeout_ctr, - operands_busy_r, staging_if[i].data.uuid)); + ("%t: *** %s timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)", + $time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr, + operands_busy, staging_if[w].data.uuid)); - `RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[iw].data.rd] != 0, - ("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)", - $time, CORE_ID, i, {writeback_if[iw].data.PC, 1'b0}, writeback_if[iw].data.tmask, writeback_if[iw].data.rd, writeback_if[iw].data.uuid)); + `RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if.data.rd] != 0, + ("%t: *** %s invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)", + $time, INSTANCE_ID, w, {writeback_if.data.PC, 1'b0}, writeback_if.data.tmask, writeback_if.data.rd, writeback_if.data.uuid)); `endif end - `RESET_RELAY (arb_reset, reset); + wire [PER_ISSUE_WARPS-1:0] arb_valid_in; + wire [PER_ISSUE_WARPS-1:0][DATAW-1:0] arb_data_in; + wire [PER_ISSUE_WARPS-1:0] arb_ready_in; - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin - wire [ISSUE_RATIO-1:0] valid_in; - wire [ISSUE_RATIO-1:0][DATAW-1:0] data_in; - wire [ISSUE_RATIO-1:0] ready_in; - - for (genvar j = 0; j < ISSUE_RATIO; ++j) begin - wire operands_ready = ~(| staging_opds_busy[j * `ISSUE_WIDTH + i]); - assign valid_in[j] = staging_if[j * `ISSUE_WIDTH + i].valid && operands_ready; - assign data_in[j] = staging_if[j * `ISSUE_WIDTH + i].data; - assign staging_if[j * `ISSUE_WIDTH + i].ready = ready_in[j] && operands_ready; - end - - VX_stream_arb #( - .NUM_INPUTS (ISSUE_RATIO), - .DATAW (DATAW), - .ARBITER ("R"), - .OUT_BUF (2) - ) out_arb ( - .clk (clk), - .reset (arb_reset), - .valid_in (valid_in), - .ready_in (ready_in), - .data_in (data_in), - .data_out ({ - scoreboard_if[i].data.uuid, - scoreboard_if[i].data.tmask, - scoreboard_if[i].data.PC, - scoreboard_if[i].data.ex_type, - scoreboard_if[i].data.op_type, - scoreboard_if[i].data.op_args, - scoreboard_if[i].data.wb, - scoreboard_if[i].data.rd, - scoreboard_if[i].data.rs1, - scoreboard_if[i].data.rs2, - scoreboard_if[i].data.rs3 - }), - .valid_out (scoreboard_if[i].valid), - .ready_out (scoreboard_if[i].ready), - .sel_out (scoreboard_if[i].data.wis) - ); + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin + assign arb_valid_in[w] = staging_if[w].valid && operands_ready[w]; + assign arb_data_in[w] = staging_if[w].data; + assign staging_if[w].ready = arb_ready_in[w] && operands_ready[w]; end + `RESET_RELAY (arb_reset, reset); + + VX_stream_arb #( + .NUM_INPUTS (PER_ISSUE_WARPS), + .DATAW (DATAW), + .ARBITER ("F"), + .LUTRAM (1), + .OUT_BUF (4) // using 2-cycle EB for area reduction + ) out_arb ( + .clk (clk), + .reset (arb_reset), + .valid_in (arb_valid_in), + .ready_in (arb_ready_in), + .data_in (arb_data_in), + .data_out ({ + scoreboard_if.data.uuid, + scoreboard_if.data.tmask, + scoreboard_if.data.PC, + scoreboard_if.data.ex_type, + scoreboard_if.data.op_type, + scoreboard_if.data.op_args, + scoreboard_if.data.wb, + scoreboard_if.data.rd, + scoreboard_if.data.rs1, + scoreboard_if.data.rs2, + scoreboard_if.data.rs3 + }), + .valid_out (scoreboard_if.valid), + .ready_out (scoreboard_if.ready), + .sel_out (scoreboard_if.data.wis) + ); + endmodule diff --git a/hw/rtl/core/VX_sfu_unit.sv b/hw/rtl/core/VX_sfu_unit.sv index 6dc52c883..add229893 100644 --- a/hw/rtl/core/VX_sfu_unit.sv +++ b/hw/rtl/core/VX_sfu_unit.sv @@ -14,6 +14,7 @@ `include "VX_define.vh" module VX_sfu_unit import VX_gpu_pkg::*; #( + parameter `STRING INSTANCE_ID = "", parameter CORE_ID = 0 ) ( input wire clk, @@ -39,7 +40,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( VX_commit_if.master commit_if [`ISSUE_WIDTH], VX_warp_ctl_if.master warp_ctl_if ); - `UNUSED_PARAM (CORE_ID) + `UNUSED_SPARAM (INSTANCE_ID) localparam BLOCK_SIZE = 1; localparam NUM_LANES = `NUM_SFU_LANES; localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); @@ -83,7 +84,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( `RESET_RELAY (wctl_reset, reset); VX_wctl_unit #( - .CORE_ID (CORE_ID), + .INSTANCE_ID ($sformatf("%s-wctl", INSTANCE_ID)), .NUM_LANES (NUM_LANES) ) wctl_unit ( .clk (clk), @@ -111,6 +112,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( `RESET_RELAY (csr_reset, reset); VX_csr_unit #( + .INSTANCE_ID ($sformatf("%s-csr", INSTANCE_ID)), .CORE_ID (CORE_ID), .NUM_LANES (NUM_LANES) ) csr_unit ( diff --git a/hw/rtl/core/VX_split_join.sv b/hw/rtl/core/VX_split_join.sv index 077ba61c6..7f887e602 100644 --- a/hw/rtl/core/VX_split_join.sv +++ b/hw/rtl/core/VX_split_join.sv @@ -14,7 +14,7 @@ `include "VX_define.vh" module VX_split_join import VX_gpu_pkg::*; #( - parameter CORE_ID = 0 + parameter `STRING INSTANCE_ID = "" ) ( input wire clk, input wire reset, @@ -31,7 +31,7 @@ module VX_split_join import VX_gpu_pkg::*; #( input wire [`NW_WIDTH-1:0] stack_wid, output wire [`DV_STACK_SIZEW-1:0] stack_ptr ); - `UNUSED_PARAM (CORE_ID) + `UNUSED_SPARAM (INSTANCE_ID) wire [(`NUM_THREADS+`PC_BITS)-1:0] ipdom_data [`NUM_WARPS-1:0]; wire [`DV_STACK_SIZEW-1:0] ipdom_q_ptr [`NUM_WARPS-1:0]; diff --git a/hw/rtl/core/VX_trace_pkg.sv b/hw/rtl/core/VX_trace_pkg.sv new file mode 100644 index 000000000..b4eae96fe --- /dev/null +++ b/hw/rtl/core/VX_trace_pkg.sv @@ -0,0 +1,399 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`ifndef VX_TRACE_PKG_VH +`define VX_TRACE_PKG_VH + +`include "VX_define.vh" + +package VX_trace_pkg; + +`ifdef SIMULATION + +`ifdef SV_DPI + import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/); +`endif + + import VX_gpu_pkg::*; + + task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type); + case (ex_type) + `EX_ALU: `TRACE(level, ("ALU")); + `EX_LSU: `TRACE(level, ("LSU")); + `EX_FPU: `TRACE(level, ("FPU")); + `EX_SFU: `TRACE(level, ("SFU")); + default: `TRACE(level, ("?")); + endcase + endtask + + task trace_ex_op(input int level, + input [`EX_BITS-1:0] ex_type, + input [`INST_OP_BITS-1:0] op_type, + input VX_gpu_pkg::op_args_t op_args + ); + case (ex_type) + `EX_ALU: begin + case (op_args.alu.xtype) + `ALU_TYPE_ARITH: begin + if (op_args.alu.is_w) begin + if (op_args.alu.use_imm) begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADDIW")); + `INST_ALU_SLL: `TRACE(level, ("SLLIW")); + `INST_ALU_SRL: `TRACE(level, ("SRLIW")); + `INST_ALU_SRA: `TRACE(level, ("SRAIW")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADDW")); + `INST_ALU_SUB: `TRACE(level, ("SUBW")); + `INST_ALU_SLL: `TRACE(level, ("SLLW")); + `INST_ALU_SRL: `TRACE(level, ("SRLW")); + `INST_ALU_SRA: `TRACE(level, ("SRAW")); + default: `TRACE(level, ("?")); + endcase + end + end else begin + if (op_args.alu.use_imm) begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADDI")); + `INST_ALU_SLL: `TRACE(level, ("SLLI")); + `INST_ALU_SRL: `TRACE(level, ("SRLI")); + `INST_ALU_SRA: `TRACE(level, ("SRAI")); + `INST_ALU_SLT: `TRACE(level, ("SLTI")); + `INST_ALU_SLTU: `TRACE(level, ("SLTIU")); + `INST_ALU_XOR: `TRACE(level, ("XORI")); + `INST_ALU_OR: `TRACE(level, ("ORI")); + `INST_ALU_AND: `TRACE(level, ("ANDI")); + `INST_ALU_LUI: `TRACE(level, ("LUI")); + `INST_ALU_AUIPC: `TRACE(level, ("AUIPC")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADD")); + `INST_ALU_SUB: `TRACE(level, ("SUB")); + `INST_ALU_SLL: `TRACE(level, ("SLL")); + `INST_ALU_SRL: `TRACE(level, ("SRL")); + `INST_ALU_SRA: `TRACE(level, ("SRA")); + `INST_ALU_SLT: `TRACE(level, ("SLT")); + `INST_ALU_SLTU: `TRACE(level, ("SLTU")); + `INST_ALU_XOR: `TRACE(level, ("XOR")); + `INST_ALU_OR: `TRACE(level, ("OR")); + `INST_ALU_AND: `TRACE(level, ("AND")); + `INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ")); + `INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ")); + default: `TRACE(level, ("?")); + endcase + end + end + end + `ALU_TYPE_BRANCH: begin + case (`INST_BR_BITS'(op_type)) + `INST_BR_EQ: `TRACE(level, ("BEQ")); + `INST_BR_NE: `TRACE(level, ("BNE")); + `INST_BR_LT: `TRACE(level, ("BLT")); + `INST_BR_GE: `TRACE(level, ("BGE")); + `INST_BR_LTU: `TRACE(level, ("BLTU")); + `INST_BR_GEU: `TRACE(level, ("BGEU")); + `INST_BR_JAL: `TRACE(level, ("JAL")); + `INST_BR_JALR: `TRACE(level, ("JALR")); + `INST_BR_ECALL: `TRACE(level, ("ECALL")); + `INST_BR_EBREAK:`TRACE(level, ("EBREAK")); + `INST_BR_URET: `TRACE(level, ("URET")); + `INST_BR_SRET: `TRACE(level, ("SRET")); + `INST_BR_MRET: `TRACE(level, ("MRET")); + default: `TRACE(level, ("?")); + endcase + end + `ALU_TYPE_MULDIV: begin + if (op_args.alu.is_w) begin + case (`INST_M_BITS'(op_type)) + `INST_M_MUL: `TRACE(level, ("MULW")); + `INST_M_DIV: `TRACE(level, ("DIVW")); + `INST_M_DIVU: `TRACE(level, ("DIVUW")); + `INST_M_REM: `TRACE(level, ("REMW")); + `INST_M_REMU: `TRACE(level, ("REMUW")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (`INST_M_BITS'(op_type)) + `INST_M_MUL: `TRACE(level, ("MUL")); + `INST_M_MULH: `TRACE(level, ("MULH")); + `INST_M_MULHSU:`TRACE(level, ("MULHSU")); + `INST_M_MULHU: `TRACE(level, ("MULHU")); + `INST_M_DIV: `TRACE(level, ("DIV")); + `INST_M_DIVU: `TRACE(level, ("DIVU")); + `INST_M_REM: `TRACE(level, ("REM")); + `INST_M_REMU: `TRACE(level, ("REMU")); + default: `TRACE(level, ("?")); + endcase + end + end + default: `TRACE(level, ("?")); + endcase + end + `EX_LSU: begin + if (op_args.lsu.is_float) begin + case (`INST_LSU_BITS'(op_type)) + `INST_LSU_LW: `TRACE(level, ("FLW")); + `INST_LSU_LD: `TRACE(level, ("FLD")); + `INST_LSU_SW: `TRACE(level, ("FSW")); + `INST_LSU_SD: `TRACE(level, ("FSD")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (`INST_LSU_BITS'(op_type)) + `INST_LSU_LB: `TRACE(level, ("LB")); + `INST_LSU_LH: `TRACE(level, ("LH")); + `INST_LSU_LW: `TRACE(level, ("LW")); + `INST_LSU_LD: `TRACE(level, ("LD")); + `INST_LSU_LBU:`TRACE(level, ("LBU")); + `INST_LSU_LHU:`TRACE(level, ("LHU")); + `INST_LSU_LWU:`TRACE(level, ("LWU")); + `INST_LSU_SB: `TRACE(level, ("SB")); + `INST_LSU_SH: `TRACE(level, ("SH")); + `INST_LSU_SW: `TRACE(level, ("SW")); + `INST_LSU_SD: `TRACE(level, ("SD")); + `INST_LSU_FENCE:`TRACE(level,("FENCE")); + default: `TRACE(level, ("?")); + endcase + end + end + `EX_FPU: begin + case (`INST_FPU_BITS'(op_type)) + `INST_FPU_ADD: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FADD.D")); + else + `TRACE(level, ("FADD.S")); + end + `INST_FPU_SUB: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FSUB.D")); + else + `TRACE(level, ("FSUB.S")); + end + `INST_FPU_MUL: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FMUL.D")); + else + `TRACE(level, ("FMUL.S")); + end + `INST_FPU_DIV: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FDIV.D")); + else + `TRACE(level, ("FDIV.S")); + end + `INST_FPU_SQRT: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FSQRT.D")); + else + `TRACE(level, ("FSQRT.S")); + end + `INST_FPU_MADD: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FMADD.D")); + else + `TRACE(level, ("FMADD.S")); + end + `INST_FPU_MSUB: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FMSUB.D")); + else + `TRACE(level, ("FMSUB.S")); + end + `INST_FPU_NMADD: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FNMADD.D")); + else + `TRACE(level, ("FNMADD.S")); + end + `INST_FPU_NMSUB: begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FNMSUB.D")); + else + `TRACE(level, ("FNMSUB.S")); + end + `INST_FPU_CMP: begin + if (op_args.fpu.fmt[0]) begin + case (op_args.fpu.frm[1:0]) + 0: `TRACE(level, ("FLE.D")); + 1: `TRACE(level, ("FLT.D")); + 2: `TRACE(level, ("FEQ.D")); + default: `TRACE(level, ("?")); + endcase + end else begin + case (op_args.fpu.frm[1:0]) + 0: `TRACE(level, ("FLE.S")); + 1: `TRACE(level, ("FLT.S")); + 2: `TRACE(level, ("FEQ.S")); + default: `TRACE(level, ("?")); + endcase + end + end + `INST_FPU_F2F: begin + if (op_args.fpu.fmt[0]) begin + `TRACE(level, ("FCVT.D.S")); + end else begin + `TRACE(level, ("FCVT.S.D")); + end + end + `INST_FPU_F2I: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.L.D")); + end else begin + `TRACE(level, ("FCVT.W.D")); + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.L.S")); + end else begin + `TRACE(level, ("FCVT.W.S")); + end + end + end + `INST_FPU_F2U: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.LU.D")); + end else begin + `TRACE(level, ("FCVT.WU.D")); + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.LU.S")); + end else begin + `TRACE(level, ("FCVT.WU.S")); + end + end + end + `INST_FPU_I2F: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.D.L")); + end else begin + `TRACE(level, ("FCVT.D.W")); + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.S.L")); + end else begin + `TRACE(level, ("FCVT.S.W")); + end + end + end + `INST_FPU_U2F: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.D.LU")); + end else begin + `TRACE(level, ("FCVT.D.WU")); + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.S.LU")); + end else begin + `TRACE(level, ("FCVT.S.WU")); + end + end + end + `INST_FPU_MISC: begin + if (op_args.fpu.fmt[0]) begin + case (op_args.fpu.frm) + 0: `TRACE(level, ("FSGNJ.D")); + 1: `TRACE(level, ("FSGNJN.D")); + 2: `TRACE(level, ("FSGNJX.D")); + 3: `TRACE(level, ("FCLASS.D")); + 4: `TRACE(level, ("FMV.X.D")); + 5: `TRACE(level, ("FMV.D.X")); + 6: `TRACE(level, ("FMIN.D")); + 7: `TRACE(level, ("FMAX.D")); + endcase + end else begin + case (op_args.fpu.frm) + 0: `TRACE(level, ("FSGNJ.S")); + 1: `TRACE(level, ("FSGNJN.S")); + 2: `TRACE(level, ("FSGNJX.S")); + 3: `TRACE(level, ("FCLASS.S")); + 4: `TRACE(level, ("FMV.X.S")); + 5: `TRACE(level, ("FMV.S.X")); + 6: `TRACE(level, ("FMIN.S")); + 7: `TRACE(level, ("FMAX.S")); + endcase + end + end + default: `TRACE(level, ("?")); + endcase + end + `EX_SFU: begin + case (`INST_SFU_BITS'(op_type)) + `INST_SFU_TMC: `TRACE(level, ("TMC")); + `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")); + `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end + `INST_SFU_JOIN: `TRACE(level, ("JOIN")); + `INST_SFU_BAR: `TRACE(level, ("BAR")); + `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end + `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end + `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end + `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end + default: `TRACE(level, ("?")); + endcase + end + default: `TRACE(level, ("?")); + endcase + endtask + + task trace_op_args(input int level, + input [`EX_BITS-1:0] ex_type, + input [`INST_OP_BITS-1:0] op_type, + input VX_gpu_pkg::op_args_t op_args + ); + case (ex_type) + `EX_ALU: begin + `TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm)); + end + `EX_LSU: begin + `TRACE(level, (", offset=0x%0h", op_args.lsu.offset)); + end + `EX_FPU: begin + `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)); + end + `EX_SFU: begin + if (`INST_SFU_IS_CSR(op_type)) begin + `TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm)); + end + end + default:; + endcase + endtask + + task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr); + case (addr) + `VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0")); + `VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1")); + `VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0")); + `VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1")); + `VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS")); + default: `TRACE(level, ("?")); + endcase + endtask + +`endif + +endpackage + +`endif // VX_TRACE_PKG_VH diff --git a/hw/rtl/core/VX_wctl_unit.sv b/hw/rtl/core/VX_wctl_unit.sv index c59260809..132f679d4 100644 --- a/hw/rtl/core/VX_wctl_unit.sv +++ b/hw/rtl/core/VX_wctl_unit.sv @@ -14,7 +14,7 @@ `include "VX_define.vh" module VX_wctl_unit import VX_gpu_pkg::*; #( - parameter CORE_ID = 0, + parameter `STRING INSTANCE_ID = "", parameter NUM_LANES = 1 ) ( input wire clk, @@ -27,7 +27,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #( VX_warp_ctl_if.master warp_ctl_if, VX_commit_if.master commit_if ); - `UNUSED_PARAM (CORE_ID) + `UNUSED_SPARAM (INSTANCE_ID) localparam LANE_BITS = `CLOG2(NUM_LANES); localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); localparam PID_WIDTH = `UP(PID_BITS); diff --git a/hw/rtl/fpu/VX_fpu_dpi.sv b/hw/rtl/fpu/VX_fpu_dpi.sv index 0d5dbb1fb..781b5b88e 100644 --- a/hw/rtl/fpu/VX_fpu_dpi.sv +++ b/hw/rtl/fpu/VX_fpu_dpi.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,7 +15,7 @@ `ifdef FPU_DPI -module VX_fpu_dpi import VX_fpu_pkg::*; #( +module VX_fpu_dpi import VX_fpu_pkg::*; #( parameter NUM_LANES = 1, parameter TAG_WIDTH = 1, parameter OUT_BUF = 0 @@ -29,7 +29,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( input wire [NUM_LANES-1:0] mask_in, input wire [TAG_WIDTH-1:0] tag_in, - + input wire [`INST_FPU_BITS-1:0] op_type, input wire [`INST_FMT_BITS-1:0] fmt, input wire [`INST_FRM_BITS-1:0] frm, @@ -37,7 +37,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( input wire [NUM_LANES-1:0][`XLEN-1:0] dataa, input wire [NUM_LANES-1:0][`XLEN-1:0] datab, input wire [NUM_LANES-1:0][`XLEN-1:0] datac, - output wire [NUM_LANES-1:0][`XLEN-1:0] result, + output wire [NUM_LANES-1:0][`XLEN-1:0] result, output wire has_fflags, output wire [`FP_FLAGS_BITS-1:0] fflags, @@ -55,31 +55,31 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( localparam FPC_BITS = `LOG2UP(NUM_FPC); localparam RSP_DATAW = (NUM_LANES * `XLEN) + 1 + $bits(fflags_t) + TAG_WIDTH; - + wire [NUM_FPC-1:0] per_core_ready_in; wire [NUM_FPC-1:0][NUM_LANES-1:0][`XLEN-1:0] per_core_result; wire [NUM_FPC-1:0][TAG_WIDTH-1:0] per_core_tag_out; reg [NUM_FPC-1:0] per_core_ready_out; - wire [NUM_FPC-1:0] per_core_valid_out; - wire [NUM_FPC-1:0] per_core_has_fflags; - fflags_t [NUM_FPC-1:0] per_core_fflags; + wire [NUM_FPC-1:0] per_core_valid_out; + wire [NUM_FPC-1:0] per_core_has_fflags; + fflags_t [NUM_FPC-1:0] per_core_fflags; wire div_ready_in, sqrt_ready_in; wire [NUM_LANES-1:0][`XLEN-1:0] div_result, sqrt_result; wire [TAG_WIDTH-1:0] div_tag_out, sqrt_tag_out; wire div_ready_out, sqrt_ready_out; - wire div_valid_out, sqrt_valid_out; - wire div_has_fflags, sqrt_has_fflags; + wire div_valid_out, sqrt_valid_out; + wire div_has_fflags, sqrt_has_fflags; fflags_t div_fflags, sqrt_fflags; reg [FPC_BITS-1:0] core_select; reg is_fadd, is_fsub, is_fmul, is_fmadd, is_fmsub, is_fnmadd, is_fnmsub; - reg is_div, is_fcmp, is_itof, is_utof, is_ftoi, is_ftou, is_f2f; + reg is_div, is_fcmp, is_itof, is_utof, is_ftoi, is_ftou, is_f2f; reg dst_fmt, int_fmt; reg [NUM_LANES-1:0][63:0] operands [3]; - + always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin operands[0][i] = 64'(dataa[i]); @@ -92,23 +92,23 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( always @(*) begin is_fadd = 0; - is_fsub = 0; - is_fmul = 0; + is_fsub = 0; + is_fmul = 0; is_fmadd = 0; is_fmsub = 0; - is_fnmadd = 0; - is_fnmsub = 0; - is_div = 0; + is_fnmadd = 0; + is_fnmsub = 0; + is_div = 0; is_fcmp = 0; is_itof = 0; is_utof = 0; is_ftoi = 0; is_ftou = 0; is_f2f = 0; - + dst_fmt = 0; int_fmt = 0; - + `ifdef FLEN_64 dst_fmt = fmt[0]; `endif @@ -132,23 +132,23 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( `INST_FPU_F2U: begin core_select = FPU_CVT; is_ftou = 1; end `INST_FPU_I2F: begin core_select = FPU_CVT; is_itof = 1; end `INST_FPU_U2F: begin core_select = FPU_CVT; is_utof = 1; end - `INST_FPU_F2F: begin core_select = FPU_CVT; is_f2f = 1; end + `INST_FPU_F2F: begin core_select = FPU_CVT; is_f2f = 1; end default: begin core_select = FPU_NCP; end endcase end - generate + generate begin : fma - + reg [NUM_LANES-1:0][`XLEN-1:0] result_fma; - wire [NUM_LANES-1:0][63:0] result_fadd; - wire [NUM_LANES-1:0][63:0] result_fsub; - wire [NUM_LANES-1:0][63:0] result_fmul; - wire [NUM_LANES-1:0][63:0] result_fmadd; - wire [NUM_LANES-1:0][63:0] result_fmsub; - wire [NUM_LANES-1:0][63:0] result_fnmadd; - wire [NUM_LANES-1:0][63:0] result_fnmsub; - + reg [NUM_LANES-1:0][63:0] result_fadd; + reg [NUM_LANES-1:0][63:0] result_fsub; + reg [NUM_LANES-1:0][63:0] result_fmul; + reg [NUM_LANES-1:0][63:0] result_fmadd; + reg [NUM_LANES-1:0][63:0] result_fmsub; + reg [NUM_LANES-1:0][63:0] result_fnmadd; + reg [NUM_LANES-1:0][63:0] result_fnmsub; + fflags_t [NUM_LANES-1:0] fflags_fma; fflags_t [NUM_LANES-1:0] fflags_fadd; fflags_t [NUM_LANES-1:0] fflags_fsub; @@ -162,7 +162,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( wire fma_ready = per_core_ready_out[FPU_FMA] || ~per_core_valid_out[FPU_FMA]; wire fma_fire = fma_valid && fma_ready; - always @(*) begin + always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin dpi_fadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fadd[i], fflags_fadd[i]); dpi_fsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fsub[i], fflags_fsub[i]); @@ -175,20 +175,20 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( result_fma[i] = is_fadd ? result_fadd[i][`XLEN-1:0] : is_fsub ? result_fsub[i][`XLEN-1:0] : is_fmul ? result_fmul[i][`XLEN-1:0] : - is_fmadd ? result_fmadd[i][`XLEN-1:0] : + is_fmadd ? result_fmadd[i][`XLEN-1:0] : is_fmsub ? result_fmsub[i][`XLEN-1:0] : - is_fnmadd ? result_fnmadd[i][`XLEN-1:0] : + is_fnmadd ? result_fnmadd[i][`XLEN-1:0] : is_fnmsub ? result_fnmsub[i][`XLEN-1:0] : '0; fflags_fma[i] = is_fadd ? fflags_fadd[i] : is_fsub ? fflags_fsub[i] : is_fmul ? fflags_fmul[i] : - is_fmadd ? fflags_fmadd[i] : + is_fmadd ? fflags_fmadd[i] : is_fmsub ? fflags_fmsub[i] : - is_fnmadd ? fflags_fnmadd[i] : - is_fnmsub ? fflags_fnmsub[i] : - '0; + is_fnmadd ? fflags_fnmadd[i] : + is_fnmsub ? fflags_fnmsub[i] : + '0; end end @@ -213,19 +213,19 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( end endgenerate - generate + generate begin : fdiv reg [NUM_LANES-1:0][`XLEN-1:0] result_fdiv_r; - wire [NUM_LANES-1:0][63:0] result_fdiv; + reg [NUM_LANES-1:0][63:0] result_fdiv; fflags_t [NUM_LANES-1:0] fflags_fdiv; wire fdiv_valid = (valid_in && core_select == FPU_DIVSQRT) && is_div; wire fdiv_ready = div_ready_out || ~div_valid_out; wire fdiv_fire = fdiv_valid && fdiv_ready; - - always @(*) begin - for (integer i = 0; i < NUM_LANES; ++i) begin + + always @(*) begin + for (integer i = 0; i < NUM_LANES; ++i) begin dpi_fdiv (fdiv_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fdiv[i], fflags_fdiv[i]); result_fdiv_r[i] = result_fdiv[i][`XLEN-1:0]; end @@ -252,18 +252,18 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( end endgenerate - generate + generate begin : fsqrt reg [NUM_LANES-1:0][`XLEN-1:0] result_fsqrt_r; - wire [NUM_LANES-1:0][63:0] result_fsqrt; + reg [NUM_LANES-1:0][63:0] result_fsqrt; fflags_t [NUM_LANES-1:0] fflags_fsqrt; wire fsqrt_valid = (valid_in && core_select == FPU_DIVSQRT) && ~is_div; - wire fsqrt_ready = sqrt_ready_out || ~sqrt_valid_out; + wire fsqrt_ready = sqrt_ready_out || ~sqrt_valid_out; wire fsqrt_fire = fsqrt_valid && fsqrt_ready; - - always @(*) begin + + always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin dpi_fsqrt (fsqrt_fire, int'(dst_fmt), operands[0][i], frm, result_fsqrt[i], fflags_fsqrt[i]); result_fsqrt_r[i] = result_fsqrt[i][`XLEN-1:0]; @@ -295,12 +295,12 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( begin : fcvt reg [NUM_LANES-1:0][`XLEN-1:0] result_fcvt; - wire [NUM_LANES-1:0][63:0] result_itof; - wire [NUM_LANES-1:0][63:0] result_utof; - wire [NUM_LANES-1:0][63:0] result_ftoi; - wire [NUM_LANES-1:0][63:0] result_ftou; - wire [NUM_LANES-1:0][63:0] result_f2f; - + reg [NUM_LANES-1:0][63:0] result_itof; + reg [NUM_LANES-1:0][63:0] result_utof; + reg [NUM_LANES-1:0][63:0] result_ftoi; + reg [NUM_LANES-1:0][63:0] result_ftou; + reg [NUM_LANES-1:0][63:0] result_f2f; + fflags_t [NUM_LANES-1:0] fflags_fcvt; fflags_t [NUM_LANES-1:0] fflags_itof; fflags_t [NUM_LANES-1:0] fflags_utof; @@ -310,20 +310,20 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( wire fcvt_valid = (valid_in && core_select == FPU_CVT); wire fcvt_ready = per_core_ready_out[FPU_CVT] || ~per_core_valid_out[FPU_CVT]; wire fcvt_fire = fcvt_valid && fcvt_ready; - - always @(*) begin + + always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin dpi_itof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_itof[i], fflags_itof[i]); dpi_utof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_utof[i], fflags_utof[i]); dpi_ftoi (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftoi[i], fflags_ftoi[i]); dpi_ftou (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftou[i], fflags_ftou[i]); - dpi_f2f (fcvt_fire, int'(dst_fmt), operands[0][i], result_f2f[i]); + dpi_f2f (fcvt_fire, int'(dst_fmt), operands[0][i], result_f2f[i]); result_fcvt[i] = is_itof ? result_itof[i][`XLEN-1:0] : is_utof ? result_utof[i][`XLEN-1:0] : is_ftoi ? result_ftoi[i][`XLEN-1:0] : - is_ftou ? result_ftou[i][`XLEN-1:0] : - is_f2f ? result_f2f[i][`XLEN-1:0] : + is_ftou ? result_ftou[i][`XLEN-1:0] : + is_f2f ? result_f2f[i][`XLEN-1:0] : '0; fflags_fcvt[i] = is_itof ? fflags_itof[i] : @@ -355,19 +355,19 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( end endgenerate - generate + generate begin : fncp reg [NUM_LANES-1:0][`XLEN-1:0] result_fncp; - wire [NUM_LANES-1:0][63:0] result_fclss; - wire [NUM_LANES-1:0][63:0] result_flt; - wire [NUM_LANES-1:0][63:0] result_fle; - wire [NUM_LANES-1:0][63:0] result_feq; - wire [NUM_LANES-1:0][63:0] result_fmin; - wire [NUM_LANES-1:0][63:0] result_fmax; - wire [NUM_LANES-1:0][63:0] result_fsgnj; - wire [NUM_LANES-1:0][63:0] result_fsgnjn; - wire [NUM_LANES-1:0][63:0] result_fsgnjx; + reg [NUM_LANES-1:0][63:0] result_fclss; + reg [NUM_LANES-1:0][63:0] result_flt; + reg [NUM_LANES-1:0][63:0] result_fle; + reg [NUM_LANES-1:0][63:0] result_feq; + reg [NUM_LANES-1:0][63:0] result_fmin; + reg [NUM_LANES-1:0][63:0] result_fmax; + reg [NUM_LANES-1:0][63:0] result_fsgnj; + reg [NUM_LANES-1:0][63:0] result_fsgnjn; + reg [NUM_LANES-1:0][63:0] result_fsgnjx; reg [NUM_LANES-1:0][63:0] result_fmvx; reg [NUM_LANES-1:0][63:0] result_fmvf; @@ -381,15 +381,15 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( wire fncp_valid = (valid_in && core_select == FPU_NCP); wire fncp_ready = per_core_ready_out[FPU_NCP] || ~per_core_valid_out[FPU_NCP]; wire fncp_fire = fncp_valid && fncp_ready; - - always @(*) begin + + always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin dpi_fclss (fncp_fire, int'(dst_fmt), operands[0][i], result_fclss[i]); dpi_fle (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fle[i], fflags_fle[i]); - dpi_flt (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]); + dpi_flt (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]); dpi_feq (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_feq[i], fflags_feq[i]); dpi_fmin (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmin[i], fflags_fmin[i]); - dpi_fmax (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]); + dpi_fmax (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]); dpi_fsgnj (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnj[i]); dpi_fsgnjn (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjn[i]); dpi_fsgnjx (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjx[i]); @@ -431,7 +431,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( .data_in ({fncp_valid, tag_in, has_fflags_fncp, result_fncp, fflags_merged}), .data_out ({per_core_valid_out[FPU_NCP], per_core_tag_out[FPU_NCP], per_core_has_fflags[FPU_NCP], per_core_result[FPU_NCP], per_core_fflags[FPU_NCP]}) ); - + assign per_core_ready_in[FPU_NCP] = fncp_ready; end @@ -443,15 +443,15 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (2), - .DATAW (RSP_DATAW), + .DATAW (RSP_DATAW), .ARBITER ("R"), .OUT_BUF (0) ) div_sqrt_arb ( .clk (clk), .reset (reset), - .valid_in ({sqrt_valid_out, div_valid_out}), + .valid_in ({sqrt_valid_out, div_valid_out}), .ready_in ({sqrt_ready_out, div_ready_out}), - .data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out}, + .data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out}, {div_result, div_has_fflags, div_fflags, div_tag_out}}), .data_out ({per_core_result[FPU_DIVSQRT], per_core_has_fflags[FPU_DIVSQRT], per_core_fflags[FPU_DIVSQRT], per_core_tag_out[FPU_DIVSQRT]}), .valid_out (per_core_valid_out[FPU_DIVSQRT]), @@ -469,13 +469,13 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (NUM_FPC), - .DATAW (RSP_DATAW), - .ARBITER ("R"), + .DATAW (RSP_DATAW), + .ARBITER ("F"), .OUT_BUF (OUT_BUF) ) rsp_arb ( .clk (clk), .reset (reset), - .valid_in (per_core_valid_out), + .valid_in (per_core_valid_out), .ready_in (per_core_ready_out), .data_in (per_core_data_out), .data_out ({result, has_fflags, fflags, tag_out}), diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index a219ce771..ad398dcd7 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -289,14 +289,14 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( end wire [NUM_LANES-1:0][31:0] result_s; - + wire [1:0] op_ret_int_out; `UNUSED_VAR (op_ret_int_out) VX_stream_arb #( .NUM_INPUTS (NUM_FPC), .DATAW (RSP_DATAW + 2), - .ARBITER ("R"), + .ARBITER ("F"), .OUT_BUF (OUT_BUF) ) rsp_arb ( .clk (clk), diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index bfbb6458c..8151fbf55 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,7 +21,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( parameter TAG_WIDTH = 1 ) ( input wire clk, - input wire reset, + input wire reset, output wire ready_in, input wire valid_in, @@ -29,7 +29,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( input wire [NUM_LANES-1:0] mask_in, input wire [TAG_WIDTH-1:0] tag_in, - + input wire [`INST_FRM_BITS-1:0] frm, input wire is_madd, @@ -39,7 +39,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( input wire [NUM_LANES-1:0][31:0] dataa, input wire [NUM_LANES-1:0][31:0] datab, input wire [NUM_LANES-1:0][31:0] datac, - output wire [NUM_LANES-1:0][31:0] result, + output wire [NUM_LANES-1:0][31:0] result, output wire has_fflags, output wire [`FP_FLAGS_BITS-1:0] fflags, @@ -52,11 +52,11 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `UNUSED_VAR (frm) wire [NUM_LANES-1:0][3*32-1:0] data_in; - wire [NUM_LANES-1:0] mask_out; + wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out; - wire pe_enable; + wire pe_enable; wire [NUM_PES-1:0][3*32-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; @@ -66,7 +66,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( always @(*) begin if (is_madd) begin // MADD / MSUB / NMADD / NMSUB - a[i] = is_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i]; + a[i] = is_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i]; b[i] = datab[i]; c[i] = (is_neg ^ is_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i]; end else begin @@ -81,7 +81,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( b[i] = dataa[i]; c[i] = is_sub ? {~datab[i][31], datab[i][30:0]} : datab[i]; end - end + end end end @@ -90,15 +90,15 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( assign data_in[i][32 +: 32] = b[i]; assign data_in[i][64 +: 32] = c[i]; end - + VX_pe_serializer #( - .NUM_LANES (NUM_LANES), - .NUM_PES (NUM_PES), + .NUM_LANES (NUM_LANES), + .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FMA), .DATA_IN_WIDTH(3*32), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), - .PE_REG (1) + .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0) ) pe_serializer ( .clk (clk), .reset (reset), @@ -123,7 +123,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( fflags_t [NUM_LANES-1:0] per_lane_fflags; `ifdef QUARTUS - + for (genvar i = 0; i < NUM_PES; ++i) begin acl_fmadd fmadd ( .clk (clk), @@ -136,7 +136,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( ); assign pe_data_out[i][32 +: `FP_FLAGS_BITS] = 'x; end - + assign has_fflags = 0; assign per_lane_fflags = 'x; @@ -144,7 +144,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( for (genvar i = 0; i < NUM_PES; ++i) begin wire [2:0] tuser; - + xil_fma fma ( .aclk (clk), .aclken (pe_enable), @@ -172,15 +172,15 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `UNUSED_VAR (r) fflags_t f; - always @(*) begin + always @(*) begin dpi_fmadd ( - pe_enable, - int'(0), - {32'hffffffff, pe_data_in[i][0 +: 32]}, - {32'hffffffff, pe_data_in[i][32 +: 32]}, - {32'hffffffff, pe_data_in[i][64 +: 32]}, - frm, - r, + pe_enable, + int'(0), + {32'hffffffff, pe_data_in[i][0 +: 32]}, + {32'hffffffff, pe_data_in[i][32 +: 32]}, + {32'hffffffff, pe_data_in[i][64 +: 32]}, + frm, + r, f ); end diff --git a/hw/rtl/fpu/VX_fpu_fpnew.sv b/hw/rtl/fpu/VX_fpu_fpnew.sv index cb8dddfb0..9ee7f1a2c 100644 --- a/hw/rtl/fpu/VX_fpu_fpnew.sv +++ b/hw/rtl/fpu/VX_fpu_fpnew.sv @@ -105,7 +105,7 @@ module VX_fpu_fpnew `UNUSED_VAR (fmt) always @(*) begin - fpu_op = 'x; + fpu_op = fpnew_pkg::operation_e'('x); fpu_rnd = frm; fpu_op_mod = 0; fpu_has_fflags = 1; diff --git a/hw/rtl/interfaces/VX_commit_sched_if.sv b/hw/rtl/interfaces/VX_commit_sched_if.sv index 487a3c6c0..eab794c93 100644 --- a/hw/rtl/interfaces/VX_commit_sched_if.sv +++ b/hw/rtl/interfaces/VX_commit_sched_if.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,17 +15,14 @@ interface VX_commit_sched_if (); - wire [`ISSUE_WIDTH-1:0] committed; - wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] committed_wid; + wire [`NUM_WARPS-1:0] committed_warps; modport master ( - output committed, - output committed_wid + output committed_warps ); modport slave ( - input committed, - input committed_wid + input committed_warps ); endinterface diff --git a/hw/rtl/interfaces/VX_decode_if.sv b/hw/rtl/interfaces/VX_decode_if.sv index b35fe002f..e99e073fd 100644 --- a/hw/rtl/interfaces/VX_decode_if.sv +++ b/hw/rtl/interfaces/VX_decode_if.sv @@ -13,11 +13,14 @@ `include "VX_define.vh" -interface VX_decode_if import VX_gpu_pkg::*; (); +interface VX_decode_if import VX_gpu_pkg::*; #( + parameter NUM_WARPS = `NUM_WARPS, + parameter NW_WIDTH = `LOG2UP(NUM_WARPS) +); typedef struct packed { logic [`UUID_WIDTH-1:0] uuid; - logic [`NW_WIDTH-1:0] wid; + logic [NW_WIDTH-1:0] wid; logic [`NUM_THREADS-1:0] tmask; logic [`PC_BITS-1:0] PC; logic [`EX_BITS-1:0] ex_type; @@ -34,7 +37,7 @@ interface VX_decode_if import VX_gpu_pkg::*; (); data_t data; logic ready; `ifndef L1_ENABLE - wire [`NUM_WARPS-1:0] ibuf_pop; + wire [NUM_WARPS-1:0] ibuf_pop; `endif modport master ( diff --git a/hw/rtl/interfaces/VX_pipeline_perf_if.sv b/hw/rtl/interfaces/VX_pipeline_perf_if.sv index 7d4218759..840630353 100644 --- a/hw/rtl/interfaces/VX_pipeline_perf_if.sv +++ b/hw/rtl/interfaces/VX_pipeline_perf_if.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,39 +13,29 @@ `include "VX_define.vh" -interface VX_pipeline_perf_if (); - wire [`PERF_CTR_BITS-1:0] sched_idles; - wire [`PERF_CTR_BITS-1:0] sched_stalls; - wire [`PERF_CTR_BITS-1:0] ibf_stalls; - wire [`PERF_CTR_BITS-1:0] scb_stalls; - wire [`PERF_CTR_BITS-1:0] units_uses [`NUM_EX_UNITS]; - wire [`PERF_CTR_BITS-1:0] sfu_uses [`NUM_SFU_UNITS]; +interface VX_pipeline_perf_if import VX_gpu_pkg::*; (); + sched_perf_t sched; + issue_perf_t issue; wire [`PERF_CTR_BITS-1:0] ifetches; wire [`PERF_CTR_BITS-1:0] loads; - wire [`PERF_CTR_BITS-1:0] stores; + wire [`PERF_CTR_BITS-1:0] stores; wire [`PERF_CTR_BITS-1:0] ifetch_latency; wire [`PERF_CTR_BITS-1:0] load_latency; - modport schedule ( - output sched_idles, - output sched_stalls - ); - - modport issue ( - output ibf_stalls, - output scb_stalls, - output units_uses, - output sfu_uses + modport master ( + output sched, + output issue, + output ifetches, + output loads, + output stores, + output ifetch_latency, + output load_latency ); modport slave ( - input sched_idles, - input sched_stalls, - input ibf_stalls, - input scb_stalls, - input units_uses, - input sfu_uses, + input sched, + input issue, input ifetches, input loads, input stores, diff --git a/hw/rtl/libs/VX_avs_adapter.sv b/hw/rtl/libs/VX_avs_adapter.sv index 6c70770cb..6e9abf597 100644 --- a/hw/rtl/libs/VX_avs_adapter.sv +++ b/hw/rtl/libs/VX_avs_adapter.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,11 +14,11 @@ `include "VX_define.vh" `TRACING_OFF -module VX_avs_adapter #( - parameter DATA_WIDTH = 1, - parameter ADDR_WIDTH = 1, +module VX_avs_adapter #( + parameter DATA_WIDTH = 1, + parameter ADDR_WIDTH = 1, parameter BURST_WIDTH = 1, - parameter NUM_BANKS = 1, + parameter NUM_BANKS = 1, parameter TAG_WIDTH = 1, parameter RD_QUEUE_SIZE = 1, parameter REQ_OUT_BUF = 0, @@ -29,15 +29,15 @@ module VX_avs_adapter #( // Memory request input wire mem_req_valid, - input wire mem_req_rw, - input wire [DATA_WIDTH/8-1:0] mem_req_byteen, + input wire mem_req_rw, + input wire [DATA_WIDTH/8-1:0] mem_req_byteen, input wire [ADDR_WIDTH-1:0] mem_req_addr, input wire [DATA_WIDTH-1:0] mem_req_data, input wire [TAG_WIDTH-1:0] mem_req_tag, output wire mem_req_ready, - // Memory response - output wire mem_rsp_valid, + // Memory response + output wire mem_rsp_valid, output wire [DATA_WIDTH-1:0] mem_rsp_data, output wire [TAG_WIDTH-1:0] mem_rsp_tag, input wire mem_rsp_ready, @@ -60,7 +60,7 @@ module VX_avs_adapter #( localparam BANK_OFFSETW = ADDR_WIDTH - LOG2_NUM_BANKS; // Requests handling ////////////////////////////////////////////////////// - + wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out; wire [NUM_BANKS-1:0] req_queue_going_full; @@ -70,38 +70,40 @@ module VX_avs_adapter #( wire [NUM_BANKS-1:0] bank_req_ready; if (NUM_BANKS > 1) begin - assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; + assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; end else begin assign req_bank_sel = '0; end assign req_bank_off = mem_req_addr[ADDR_WIDTH-1:LOG2_NUM_BANKS]; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i); end for (genvar i = 0; i < NUM_BANKS; ++i) begin - VX_pending_size #( + VX_pending_size #( .SIZE (RD_QUEUE_SIZE) ) pending_size ( .clk (clk), .reset (reset), .incr (req_queue_push[i]), - .decr (req_queue_pop[i]), + .decr (req_queue_pop[i]), + `UNUSED_PIN (empty), + `UNUSED_PIN (alm_empty), .full (req_queue_going_full[i]), - .size (req_queue_size[i]), - `UNUSED_PIN (empty) - ); + `UNUSED_PIN (alm_full), + .size (req_queue_size[i]) + ); `UNUSED_VAR (req_queue_size) - + VX_fifo_queue #( .DATAW (TAG_WIDTH), .DEPTH (RD_QUEUE_SIZE) ) rd_req_queue ( .clk (clk), .reset (reset), - .push (req_queue_push[i]), + .push (req_queue_push[i]), .pop (req_queue_pop[i]), .data_in (mem_req_tag), .data_out (req_queue_tag_out[i]), @@ -111,9 +113,9 @@ module VX_avs_adapter #( `UNUSED_PIN (alm_full), `UNUSED_PIN (size) ); - end + end - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin wire valid_out; wire rw_out; wire [DATA_SIZE-1:0] byteen_out; @@ -174,7 +176,7 @@ module VX_avs_adapter #( .reset (reset), .push (avs_readdatavalid[i]), .pop (req_queue_pop[i]), - .data_in (avs_readdata[i]), + .data_in (avs_readdata[i]), .data_out (rsp_queue_data_out[i]), .empty (rsp_queue_empty[i]), `UNUSED_PIN (full), @@ -183,7 +185,7 @@ module VX_avs_adapter #( `UNUSED_PIN (size) ); end - + for (genvar i = 0; i < NUM_BANKS; ++i) begin assign rsp_arb_valid_in[i] = !rsp_queue_empty[i]; assign rsp_arb_data_in[i] = {rsp_queue_data_out[i], req_queue_tag_out[i]}; diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index 63b621361..c4a42da14 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,22 +16,21 @@ `TRACING_OFF module VX_cyclic_arbiter #( parameter NUM_REQS = 1, - parameter LOCK_ENABLE = 0, parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) ) ( input wire clk, input wire reset, - input wire [NUM_REQS-1:0] requests, + input wire [NUM_REQS-1:0] requests, output wire [LOG_NUM_REQS-1:0] grant_index, - output wire [NUM_REQS-1:0] grant_onehot, + output wire [NUM_REQS-1:0] grant_onehot, output wire grant_valid, - input wire grant_unlock + input wire grant_ready ); - if (NUM_REQS == 1) begin + if (NUM_REQS == 1) begin `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - + `UNUSED_VAR (reset) + assign grant_index = '0; assign grant_onehot = requests; assign grant_valid = requests[0]; @@ -45,10 +44,10 @@ module VX_cyclic_arbiter #( always @(posedge clk) begin if (reset) begin grant_index_r <= '0; - end else begin + end else begin if (!IS_POW2 && grant_index_r == LOG_NUM_REQS'(NUM_REQS-1)) begin grant_index_r <= '0; - end else if (!LOCK_ENABLE || ~grant_valid || grant_unlock) begin + end else if (~grant_valid || grant_ready) begin grant_index_r <= grant_index_r + LOG_NUM_REQS'(1); end end @@ -60,11 +59,11 @@ module VX_cyclic_arbiter #( grant_onehot_r[grant_index_r] = 1'b1; end - assign grant_index = grant_index_r; + assign grant_index = grant_index_r; assign grant_onehot = grant_onehot_r; assign grant_valid = requests[grant_index_r]; end - + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 8ecfd8378..fa11a541f 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,20 +17,21 @@ module VX_dp_ram #( parameter DATAW = 1, parameter SIZE = 1, + parameter ADDR_MIN = 0, parameter WRENW = 1, parameter OUT_REG = 0, parameter NO_RWCHECK = 0, - parameter LUTRAM = 0, + parameter LUTRAM = 0, parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, parameter ADDRW = `LOG2UP(SIZE) -) ( +) ( input wire clk, input wire read, input wire write, input wire [WRENW-1:0] wren, - input wire [ADDRW-1:0] waddr, + input wire [ADDRW-1:0] waddr, input wire [DATAW-1:0] wdata, input wire [ADDRW-1:0] raddr, output wire [DATAW-1:0] rdata @@ -48,16 +49,16 @@ module VX_dp_ram #( ram[i] = INIT_VALUE; \ end \ end - + `UNUSED_VAR (read) `ifdef SYNTHESIS if (WRENW > 1) begin `ifdef QUARTUS if (LUTRAM != 0) begin - if (OUT_REG != 0) begin + if (OUT_REG != 0) begin reg [DATAW-1:0] rdata_r; - `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0]; + `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -72,7 +73,7 @@ module VX_dp_ram #( end assign rdata = rdata_r; end else begin - `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0]; + `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -87,7 +88,7 @@ module VX_dp_ram #( end else begin if (OUT_REG != 0) begin reg [DATAW-1:0] rdata_r; - reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0]; + reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -103,7 +104,7 @@ module VX_dp_ram #( assign rdata = rdata_r; end else begin if (NO_RWCHECK != 0) begin - `NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0]; + `NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -115,7 +116,7 @@ module VX_dp_ram #( end assign rdata = ram[raddr]; end else begin - reg [WRENW-1:0][WSELW-1:0] ram [SIZE-1:0]; + reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -132,9 +133,9 @@ module VX_dp_ram #( `else // default synthesis if (LUTRAM != 0) begin - `USE_FAST_BRAM reg [DATAW-1:0] ram [SIZE-1:0]; + `USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; `RAM_INITIALIZATION - if (OUT_REG != 0) begin + if (OUT_REG != 0) begin reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (write) begin @@ -161,7 +162,7 @@ module VX_dp_ram #( end end else begin if (OUT_REG != 0) begin - reg [DATAW-1:0] ram [SIZE-1:0]; + reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; reg [DATAW-1:0] rdata_r; `RAM_INITIALIZATION always @(posedge clk) begin @@ -178,7 +179,7 @@ module VX_dp_ram #( assign rdata = rdata_r; end else begin if (NO_RWCHECK != 0) begin - `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0]; + `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -190,7 +191,7 @@ module VX_dp_ram #( end assign rdata = ram[raddr]; end else begin - reg [DATAW-1:0] ram [SIZE-1:0]; + reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -208,9 +209,9 @@ module VX_dp_ram #( end else begin // (WRENW == 1) if (LUTRAM != 0) begin - `USE_FAST_BRAM reg [DATAW-1:0] ram [SIZE-1:0]; + `USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; `RAM_INITIALIZATION - if (OUT_REG != 0) begin + if (OUT_REG != 0) begin reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (write) begin @@ -231,7 +232,7 @@ module VX_dp_ram #( end end else begin if (OUT_REG != 0) begin - reg [DATAW-1:0] ram [SIZE-1:0]; + reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; reg [DATAW-1:0] rdata_r; `RAM_INITIALIZATION always @(posedge clk) begin @@ -245,7 +246,7 @@ module VX_dp_ram #( assign rdata = rdata_r; end else begin if (NO_RWCHECK != 0) begin - `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0]; + `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -254,7 +255,7 @@ module VX_dp_ram #( end assign rdata = ram[raddr]; end else begin - reg [DATAW-1:0] ram [SIZE-1:0]; + reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -265,10 +266,10 @@ module VX_dp_ram #( end end end - end + end `else // RAM emulation - reg [DATAW-1:0] ram [SIZE-1:0]; + reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; `RAM_INITIALIZATION wire [DATAW-1:0] ram_n; @@ -276,8 +277,8 @@ module VX_dp_ram #( assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; end - if (OUT_REG != 0) begin - reg [DATAW-1:0] rdata_r; + if (OUT_REG != 0) begin + reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (write) begin ram[waddr] <= ram_n; @@ -287,7 +288,7 @@ module VX_dp_ram #( end end assign rdata = rdata_r; - end else begin + end else begin reg [DATAW-1:0] prev_data; reg [ADDRW-1:0] prev_waddr; reg prev_write; @@ -298,7 +299,7 @@ module VX_dp_ram #( prev_write <= (| wren); prev_data <= ram[waddr]; prev_waddr <= waddr; - end + end if (LUTRAM || !NO_RWCHECK) begin `UNUSED_VAR (prev_write) `UNUSED_VAR (prev_data) diff --git a/hw/rtl/libs/VX_dp_ram_rst.sv b/hw/rtl/libs/VX_dp_ram_rst.sv new file mode 100644 index 000000000..e7598dbe6 --- /dev/null +++ b/hw/rtl/libs/VX_dp_ram_rst.sv @@ -0,0 +1,115 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +`TRACING_OFF +module VX_dp_ram_rst #( + parameter DATAW = 1, + parameter SIZE = 1, + parameter ADDR_MIN = 0, + parameter WRENW = 1, + parameter OUT_REG = 0, + parameter NO_RWCHECK = 0, + parameter LUTRAM = 0, + parameter INIT_ENABLE = 0, + parameter INIT_FILE = "", + parameter [DATAW-1:0] INIT_VALUE = 0, + parameter ADDRW = `LOG2UP(SIZE) +) ( + input wire clk, + input wire reset, + input wire read, + input wire write, + input wire [WRENW-1:0] wren, + input wire [ADDRW-1:0] waddr, + input wire [DATAW-1:0] wdata, + input wire [ADDRW-1:0] raddr, + output wire [DATAW-1:0] rdata +); + localparam WSELW = DATAW / WRENW; + `STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter")) + +`define RAM_INITIALIZATION \ + if (INIT_ENABLE != 0) begin \ + if (INIT_FILE != "") begin \ + initial $readmemh(INIT_FILE, ram); \ + end else begin \ + initial \ + for (integer i = 0; i < SIZE; ++i) \ + ram[i] = INIT_VALUE; \ + end \ + end + + `UNUSED_VAR (read) + + // RAM emulation + reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; + `RAM_INITIALIZATION + + wire [DATAW-1:0] ram_n; + for (genvar i = 0; i < WRENW; ++i) begin + assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; + end + + if (OUT_REG != 0) begin + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (reset) begin + for (integer i = 0; i < SIZE; ++i) begin + ram[i] <= DATAW'(INIT_VALUE); + end + rdata_r <= '0; + end else begin + if (write) begin + ram[waddr] <= ram_n; + end + if (read) begin + rdata_r <= ram[raddr]; + end + end + end + assign rdata = rdata_r; + end else begin + reg [DATAW-1:0] prev_data; + reg [ADDRW-1:0] prev_waddr; + reg prev_write; + always @(posedge clk) begin + if (reset) begin + for (integer i = 0; i < SIZE; ++i) begin + ram[i] <= DATAW'(INIT_VALUE); + end + prev_write <= 0; + prev_data <= '0; + prev_waddr <= '0; + end else begin + if (write) begin + ram[waddr] <= ram_n; + end + prev_write <= (| wren); + prev_data <= ram[waddr]; + prev_waddr <= waddr; + end + end + if (LUTRAM || !NO_RWCHECK) begin + `UNUSED_VAR (prev_write) + `UNUSED_VAR (prev_data) + `UNUSED_VAR (prev_waddr) + assign rdata = ram[raddr]; + end else begin + assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; + end + end + +endmodule +`TRACING_ON diff --git a/hw/rtl/libs/VX_elastic_buffer.sv b/hw/rtl/libs/VX_elastic_buffer.sv index b2153b6f3..01464840c 100644 --- a/hw/rtl/libs/VX_elastic_buffer.sv +++ b/hw/rtl/libs/VX_elastic_buffer.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,14 +19,14 @@ module VX_elastic_buffer #( parameter SIZE = 1, parameter OUT_REG = 0, parameter LUTRAM = 0 -) ( +) ( input wire clk, input wire reset, input wire valid_in, - output wire ready_in, + output wire ready_in, input wire [DATAW-1:0] data_in, - + output wire [DATAW-1:0] data_out, input wire ready_out, output wire valid_out @@ -55,7 +55,7 @@ module VX_elastic_buffer #( .ready_out (ready_out) ); - end else if (SIZE == 2) begin + end else if (SIZE == 2 && LUTRAM == 0) begin VX_skid_buffer #( .DATAW (DATAW), @@ -71,9 +71,9 @@ module VX_elastic_buffer #( .data_out (data_out), .ready_out (ready_out) ); - + end else begin - + wire empty, full; wire [DATAW-1:0] data_out_t; @@ -93,7 +93,7 @@ module VX_elastic_buffer #( .push (push), .pop (pop), .data_in(data_in), - .data_out(data_out_t), + .data_out(data_out_t), .empty (empty), .full (full), `UNUSED_PIN (alm_empty), @@ -105,15 +105,15 @@ module VX_elastic_buffer #( VX_elastic_buffer #( .DATAW (DATAW), - .SIZE (OUT_REG == 2) + .SIZE ((OUT_REG == 2) ? 1 : 0) ) out_buf ( .clk (clk), .reset (reset), .valid_in (~empty), .data_in (data_out_t), - .ready_in (ready_out_t), + .ready_in (ready_out_t), .valid_out (valid_out), - .data_out (data_out), + .data_out (data_out), .ready_out (ready_out) ); diff --git a/hw/rtl/libs/VX_fair_arbiter.sv b/hw/rtl/libs/VX_fair_arbiter.sv index c063b2fbc..838563dd8 100644 --- a/hw/rtl/libs/VX_fair_arbiter.sv +++ b/hw/rtl/libs/VX_fair_arbiter.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,53 +16,52 @@ `TRACING_OFF module VX_fair_arbiter #( parameter NUM_REQS = 1, - parameter LOCK_ENABLE = 0, parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) ) ( input wire clk, input wire reset, - input wire [NUM_REQS-1:0] requests, + input wire [NUM_REQS-1:0] requests, output wire [LOG_NUM_REQS-1:0] grant_index, - output wire [NUM_REQS-1:0] grant_onehot, + output wire [NUM_REQS-1:0] grant_onehot, output wire grant_valid, - input wire grant_unlock + input wire grant_ready ); - if (NUM_REQS == 1) begin - + if (NUM_REQS == 1) begin + `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - `UNUSED_VAR (grant_unlock) + `UNUSED_VAR (reset) + `UNUSED_VAR (grant_ready) assign grant_index = '0; assign grant_onehot = requests; assign grant_valid = requests[0]; - end else begin + end else begin - reg [NUM_REQS-1:0] buffer; + reg [NUM_REQS-1:0] grant_mask; - wire [NUM_REQS-1:0] buffer_qual = buffer & requests; - wire [NUM_REQS-1:0] requests_qual = (| buffer) ? buffer_qual : requests; - wire [NUM_REQS-1:0] buffer_n = requests_qual & ~grant_onehot; + wire [NUM_REQS-1:0] requests_rem = requests & ~grant_mask; + wire rem_valid = (| requests_rem); + wire [NUM_REQS-1:0] requests_qual = rem_valid ? requests_rem : requests; always @(posedge clk) begin if (reset) begin - buffer <= '0; - end else if (!LOCK_ENABLE || grant_unlock) begin - buffer <= buffer_n; + grant_mask <= '0; + end else if (grant_ready) begin + grant_mask <= rem_valid ? (grant_mask | grant_onehot) : grant_onehot; end end - + VX_priority_arbiter #( .NUM_REQS (NUM_REQS) ) priority_arbiter ( - .requests (requests_qual), + .requests (requests_qual), .grant_index (grant_index), .grant_onehot (grant_onehot), .grant_valid (grant_valid) ); end - + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 3a29410a9..a430d32f7 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,28 +22,28 @@ module VX_fifo_queue #( parameter OUT_REG = 0, parameter LUTRAM = 1, parameter SIZEW = `CLOG2(DEPTH+1) -) ( +) ( input wire clk, - input wire reset, + input wire reset, input wire push, - input wire pop, + input wire pop, input wire [DATAW-1:0] data_in, output wire [DATAW-1:0] data_out, - output wire empty, + output wire empty, output wire alm_empty, - output wire full, + output wire full, output wire alm_full, output wire [SIZEW-1:0] size -); - - localparam ADDRW = `CLOG2(DEPTH); +); + + localparam ADDRW = `CLOG2(DEPTH); `STATIC_ASSERT(ALM_FULL > 0, ("alm_full must be greater than 0!")) `STATIC_ASSERT(ALM_FULL < DEPTH, ("alm_full must be smaller than size!")) `STATIC_ASSERT(ALM_EMPTY > 0, ("alm_empty must be greater than 0!")) `STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!")) `STATIC_ASSERT(`IS_POW2(DEPTH), ("size must be a power of 2!")) - + if (DEPTH == 1) begin reg [DATAW-1:0] head_r; @@ -52,7 +52,7 @@ module VX_fifo_queue #( always @(posedge clk) begin if (reset) begin head_r <= '0; - size_r <= '0; + size_r <= '0; end else begin `ASSERT(~push || ~full, ("runtime error: writing to a full queue")); `ASSERT(~pop || ~empty, ("runtime error: reading an empty queue")); @@ -63,11 +63,11 @@ module VX_fifo_queue #( end else if (pop) begin size_r <= '0; end - if (push) begin + if (push) begin head_r <= data_in; end end - end + end assign data_out = head_r; assign empty = (size_r == 0); @@ -77,7 +77,7 @@ module VX_fifo_queue #( assign size = size_r; end else begin - + reg empty_r, alm_empty_r; reg full_r, alm_full_r; reg [ADDRW-1:0] used_r; @@ -86,8 +86,8 @@ module VX_fifo_queue #( always @(posedge clk) begin if (reset) begin empty_r <= 1; - alm_empty_r <= 1; - full_r <= 0; + alm_empty_r <= 1; + full_r <= 0; alm_full_r <= 0; used_r <= '0; end else begin @@ -106,21 +106,21 @@ module VX_fifo_queue #( end else if (pop) begin full_r <= 0; if (used_r == ADDRW'(ALM_FULL)) - alm_full_r <= 0; + alm_full_r <= 0; if (used_r == ADDRW'(1)) empty_r <= 1; if (used_r == ADDRW'(ALM_EMPTY+1)) alm_empty_r <= 1; - end - used_r <= used_n; - end + end + used_r <= used_n; + end end - if (DEPTH == 2) begin + if (DEPTH == 2 && LUTRAM == 0) begin assign used_n = used_r ^ (push ^ pop); - if (0 == OUT_REG) begin + if (0 == OUT_REG) begin reg [1:0][DATAW-1:0] shift_reg; @@ -131,8 +131,8 @@ module VX_fifo_queue #( end end - assign data_out = shift_reg[!used_r[0]]; - + assign data_out = shift_reg[!used_r[0]]; + end else begin reg [DATAW-1:0] data_out_r; @@ -152,16 +152,16 @@ module VX_fifo_queue #( assign data_out = data_out_r; end - + end else begin - + assign used_n = $signed(used_r) + ADDRW'($signed(2'(push) - 2'(pop))); - if (0 == OUT_REG) begin + if (0 == OUT_REG) begin reg [ADDRW-1:0] rd_ptr_r; reg [ADDRW-1:0] wr_ptr_r; - + always @(posedge clk) begin if (reset) begin rd_ptr_r <= '0; @@ -169,7 +169,7 @@ module VX_fifo_queue #( end else begin wr_ptr_r <= wr_ptr_r + ADDRW'(push); rd_ptr_r <= rd_ptr_r + ADDRW'(pop); - end + end end VX_dp_ram #( @@ -179,8 +179,8 @@ module VX_fifo_queue #( ) dp_ram ( .clk(clk), .read (1'b1), - .write (push), - `UNUSED_PIN (wren), + .write (push), + `UNUSED_PIN (wren), .waddr (wr_ptr_r), .wdata (data_in), .raddr (rd_ptr_r), @@ -196,18 +196,18 @@ module VX_fifo_queue #( reg [ADDRW-1:0] rd_ptr_n_r; always @(posedge clk) begin - if (reset) begin + if (reset) begin wr_ptr_r <= '0; rd_ptr_r <= '0; rd_ptr_n_r <= 1; end else begin wr_ptr_r <= wr_ptr_r + ADDRW'(push); if (pop) begin - rd_ptr_r <= rd_ptr_n_r; - if (DEPTH > 2) begin + rd_ptr_r <= rd_ptr_n_r; + if (DEPTH > 2) begin rd_ptr_n_r <= rd_ptr_r + ADDRW'(2); end else begin // (DEPTH == 2); - rd_ptr_n_r <= ~rd_ptr_n_r; + rd_ptr_n_r <= ~rd_ptr_n_r; end end end @@ -227,13 +227,13 @@ module VX_fifo_queue #( ) dp_ram ( .clk (clk), .read (1'b1), - .write (push), - `UNUSED_PIN (wren), + .write (push), + `UNUSED_PIN (wren), .waddr (wr_ptr_r), .wdata (data_in), .raddr (rd_ptr_n_r), .rdata (dout) - ); + ); always @(posedge clk) begin if (push && (empty_r || (going_empty && pop))) begin @@ -246,12 +246,12 @@ module VX_fifo_queue #( assign data_out = dout_r; end end - - assign empty = empty_r; + + assign empty = empty_r; assign alm_empty = alm_empty_r; assign full = full_r; assign alm_full = alm_full_r; - assign size = {full_r, used_r}; + assign size = {full_r, used_r}; end endmodule diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index 4573efb3e..a1f7be4a0 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,29 +16,27 @@ `TRACING_OFF module VX_generic_arbiter #( parameter NUM_REQS = 1, - parameter LOCK_ENABLE = 0, - parameter `STRING TYPE = "P", + parameter `STRING TYPE = "P", parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) ) ( input wire clk, - input wire reset, + input wire reset, input wire [NUM_REQS-1:0] requests, output wire [LOG_NUM_REQS-1:0] grant_index, - output wire [NUM_REQS-1:0] grant_onehot, + output wire [NUM_REQS-1:0] grant_onehot, output wire grant_valid, - input wire grant_unlock + input wire grant_ready ); if (TYPE == "P") begin - `UNUSED_PARAM (LOCK_ENABLE) `UNUSED_VAR (clk) `UNUSED_VAR (reset) - `UNUSED_VAR (grant_unlock) + `UNUSED_VAR (grant_ready) VX_priority_arbiter #( .NUM_REQS (NUM_REQS) ) priority_arbiter ( - .requests (requests), + .requests (requests), .grant_valid (grant_valid), .grant_index (grant_index), .grant_onehot (grant_onehot) @@ -47,68 +45,64 @@ module VX_generic_arbiter #( end else if (TYPE == "R") begin VX_rr_arbiter #( - .NUM_REQS (NUM_REQS), - .LOCK_ENABLE (LOCK_ENABLE) + .NUM_REQS (NUM_REQS) ) rr_arbiter ( .clk (clk), - .reset (reset), - .requests (requests), + .reset (reset), + .requests (requests), .grant_valid (grant_valid), .grant_index (grant_index), .grant_onehot (grant_onehot), - .grant_unlock (grant_unlock) + .grant_ready (grant_ready) ); end else if (TYPE == "F") begin VX_fair_arbiter #( - .NUM_REQS (NUM_REQS), - .LOCK_ENABLE (LOCK_ENABLE) + .NUM_REQS (NUM_REQS) ) fair_arbiter ( .clk (clk), .reset (reset), - .requests (requests), + .requests (requests), .grant_valid (grant_valid), .grant_index (grant_index), .grant_onehot (grant_onehot), - .grant_unlock (grant_unlock) + .grant_ready (grant_ready) ); end else if (TYPE == "M") begin VX_matrix_arbiter #( - .NUM_REQS (NUM_REQS), - .LOCK_ENABLE (LOCK_ENABLE) + .NUM_REQS (NUM_REQS) ) matrix_arbiter ( .clk (clk), .reset (reset), - .requests (requests), + .requests (requests), .grant_valid (grant_valid), .grant_index (grant_index), .grant_onehot (grant_onehot), - .grant_unlock (grant_unlock) + .grant_ready (grant_ready) ); end else if (TYPE == "C") begin VX_cyclic_arbiter #( - .NUM_REQS (NUM_REQS), - .LOCK_ENABLE (LOCK_ENABLE) + .NUM_REQS (NUM_REQS) ) cyclic_arbiter ( .clk (clk), .reset (reset), - .requests (requests), + .requests (requests), .grant_valid (grant_valid), .grant_index (grant_index), .grant_onehot (grant_onehot), - .grant_unlock (grant_unlock) + .grant_ready (grant_ready) ); end else begin `ERROR(("invalid parameter")); - + end - + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_matrix_arbiter.sv b/hw/rtl/libs/VX_matrix_arbiter.sv index 9333c1ac5..23f9ea2a0 100644 --- a/hw/rtl/libs/VX_matrix_arbiter.sv +++ b/hw/rtl/libs/VX_matrix_arbiter.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,52 +16,51 @@ `TRACING_OFF module VX_matrix_arbiter #( parameter NUM_REQS = 1, - parameter LOCK_ENABLE = 0, parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) ) ( input wire clk, - input wire reset, + input wire reset, input wire [NUM_REQS-1:0] requests, output wire [LOG_NUM_REQS-1:0] grant_index, - output wire [NUM_REQS-1:0] grant_onehot, + output wire [NUM_REQS-1:0] grant_onehot, output wire grant_valid, - input wire grant_unlock + input wire grant_ready ); if (NUM_REQS == 1) begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) - `UNUSED_VAR (grant_unlock) - + `UNUSED_VAR (grant_ready) + assign grant_index = '0; assign grant_onehot = requests; assign grant_valid = requests[0]; end else begin - reg [NUM_REQS-1:1] state [NUM_REQS-1:0]; + reg [NUM_REQS-1:1] state [NUM_REQS-1:0]; wire [NUM_REQS-1:0] pri [NUM_REQS-1:0]; wire [NUM_REQS-1:0] grant_unqual; - - for (genvar i = 0; i < NUM_REQS; ++i) begin + + for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar j = 0; j < NUM_REQS; ++j) begin if (j > i) begin assign pri[j][i] = requests[i] && state[i][j]; - end + end else if (j < i) begin assign pri[j][i] = requests[i] && !state[j][i]; - end + end else begin - assign pri[j][i] = 0; + assign pri[j][i] = 0; end end assign grant_unqual[i] = requests[i] && !(| pri[i]); end - - for (genvar i = 0; i < NUM_REQS; ++i) begin + + for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar j = i + 1; j < NUM_REQS; ++j) begin - always @(posedge clk) begin - if (reset) begin + always @(posedge clk) begin + if (reset) begin state[i][j] <= '0; end else begin state[i][j] <= (state[i][j] || grant_unqual[j]) && !grant_unqual[i]; @@ -70,20 +69,15 @@ module VX_matrix_arbiter #( end end - if (LOCK_ENABLE == 0) begin - `UNUSED_VAR (grant_unlock) - assign grant_onehot = grant_unqual; - end else begin - reg [NUM_REQS-1:0] grant_unqual_prev; - always @(posedge clk) begin - if (reset) begin - grant_unqual_prev <= '0; - end else if (grant_unlock) begin - grant_unqual_prev <= grant_unqual; - end + reg [NUM_REQS-1:0] grant_unqual_prev; + always @(posedge clk) begin + if (reset) begin + grant_unqual_prev <= '0; + end else if (grant_ready) begin + grant_unqual_prev <= grant_unqual; end - assign grant_onehot = grant_unlock ? grant_unqual : grant_unqual_prev; end + assign grant_onehot = grant_ready ? grant_unqual : grant_unqual_prev; VX_onehot_encoder #( .N (NUM_REQS) @@ -96,6 +90,6 @@ module VX_matrix_arbiter #( assign grant_valid = (| requests); end - + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 79ee9fa5f..17eb01642 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,13 +24,13 @@ module VX_mem_coalescer #( parameter TAG_WIDTH = 8, parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID parameter QUEUE_SIZE = 8, - + parameter DATA_IN_WIDTH = DATA_IN_SIZE * 8, parameter DATA_OUT_WIDTH= DATA_OUT_SIZE * 8, - parameter OUT_REQS = (NUM_REQS * DATA_IN_WIDTH) / DATA_OUT_WIDTH, - parameter BATCH_SIZE = DATA_OUT_SIZE / DATA_IN_SIZE, - parameter BATCH_SIZE_W = `LOG2UP(BATCH_SIZE), - parameter OUT_ADDR_WIDTH= ADDR_WIDTH - BATCH_SIZE_W, + parameter DATA_RATIO = DATA_OUT_SIZE / DATA_IN_SIZE, + parameter DATA_RATIO_W = `LOG2UP(DATA_RATIO), + parameter OUT_REQS = NUM_REQS / DATA_RATIO, + parameter OUT_ADDR_WIDTH= ADDR_WIDTH - DATA_RATIO_W, parameter QUEUE_ADDRW = `CLOG2(QUEUE_SIZE), parameter OUT_TAG_WIDTH = UUID_WIDTH + QUEUE_ADDRW ) ( @@ -45,7 +45,7 @@ module VX_mem_coalescer #( input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr, input wire [NUM_REQS-1:0][ATYPE_WIDTH-1:0] in_req_atype, input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data, - input wire [TAG_WIDTH-1:0] in_req_tag, + input wire [TAG_WIDTH-1:0] in_req_tag, output wire in_req_ready, // Input response @@ -58,7 +58,7 @@ module VX_mem_coalescer #( // Output request output wire out_req_valid, output wire out_req_rw, - output wire [OUT_REQS-1:0] out_req_mask, + output wire [OUT_REQS-1:0] out_req_mask, output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen, output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr, output wire [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype, @@ -78,27 +78,27 @@ module VX_mem_coalescer #( `STATIC_ASSERT ((NUM_REQS * DATA_IN_WIDTH >= DATA_OUT_WIDTH), ("invalid parameter")) `RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("invalid request mask")); `RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("invalid request mask")); - - localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH; - localparam NUM_REQS_W = `LOG2UP(NUM_REQS); + + localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH; + localparam NUM_REQS_W = `LOG2UP(NUM_REQS); // tag + mask + offest - localparam IBUF_DATA_WIDTH = TAG_ID_WIDTH + NUM_REQS + (NUM_REQS * BATCH_SIZE_W); + localparam IBUF_DATA_WIDTH = TAG_ID_WIDTH + NUM_REQS + (NUM_REQS * DATA_RATIO_W); localparam STATE_SETUP = 0; localparam STATE_SEND = 1; - - logic state_r, state_n; - - logic out_req_valid_r, out_req_valid_n; - logic out_req_rw_r, out_req_rw_n; - logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n; - logic [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen_r, out_req_byteen_n; - logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n; - logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n; - logic [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data_r, out_req_data_n; - logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n; - logic in_req_ready_n; + reg state_r, state_n; + + reg out_req_valid_r, out_req_valid_n; + reg out_req_rw_r, out_req_rw_n; + reg [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n; + reg [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n; + reg [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n; + reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n; + reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n; + reg [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n; + + reg in_req_ready_n; wire ibuf_push; wire ibuf_pop; @@ -108,33 +108,45 @@ module VX_mem_coalescer #( wire ibuf_empty; wire [IBUF_DATA_WIDTH-1:0] ibuf_din; wire [IBUF_DATA_WIDTH-1:0] ibuf_dout; - + logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n; logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n; logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] seed_atype_r, seed_atype_n; + logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n; logic [NUM_REQS-1:0] processed_mask_r, processed_mask_n; wire [OUT_REQS-1:0][NUM_REQS_W-1:0] seed_idx; wire [NUM_REQS-1:0][OUT_ADDR_WIDTH-1:0] in_addr_base; - wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] in_addr_offset; + wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] in_addr_offset; for (genvar i = 0; i < NUM_REQS; i++) begin - assign in_addr_base[i] = in_req_addr[i][ADDR_WIDTH-1:BATCH_SIZE_W]; - assign in_addr_offset[i] = in_req_addr[i][BATCH_SIZE_W-1:0]; + assign in_addr_base[i] = in_req_addr[i][ADDR_WIDTH-1:DATA_RATIO_W]; + assign in_addr_offset[i] = in_req_addr[i][DATA_RATIO_W-1:0]; end for (genvar i = 0; i < OUT_REQS; ++i) begin - wire [BATCH_SIZE-1:0] batch_mask = in_req_mask[BATCH_SIZE * i +: BATCH_SIZE] & ~processed_mask_r[BATCH_SIZE * i +: BATCH_SIZE]; - wire [BATCH_SIZE_W-1:0] batch_idx; + wire [DATA_RATIO-1:0] batch_mask = in_req_mask[i * DATA_RATIO +: DATA_RATIO] & ~processed_mask_r[i * DATA_RATIO +: DATA_RATIO]; + wire [DATA_RATIO_W-1:0] batch_idx; VX_priority_encoder #( - .N (BATCH_SIZE) + .N (DATA_RATIO) ) priority_encoder ( .data_in (batch_mask), - .index (batch_idx), + .index (batch_idx), `UNUSED_PIN (onehot), .valid_out (batch_valid_n[i]) ); - assign seed_idx[i] = NUM_REQS_W'(BATCH_SIZE * i) + NUM_REQS_W'(batch_idx); + assign seed_idx[i] = NUM_REQS_W'(i * DATA_RATIO) + NUM_REQS_W'(batch_idx); + end + + for (genvar i = 0; i < OUT_REQS; ++i) begin + assign seed_addr_n[i] = in_addr_base[seed_idx[i]]; + assign seed_atype_n[i] = in_req_atype[seed_idx[i]]; + end + + for (genvar i = 0; i < OUT_REQS; ++i) begin + for (genvar j = 0; j < DATA_RATIO; ++j) begin + assign addr_matches_n[i * DATA_RATIO + j] = (in_addr_base[i * DATA_RATIO + j] == seed_addr_n[i]); + end end always @(posedge clk) begin @@ -144,12 +156,13 @@ module VX_mem_coalescer #( out_req_valid_r <= 0; end else begin state_r <= state_n; - out_req_valid_r <= out_req_valid_n; batch_valid_r <= batch_valid_n; seed_addr_r <= seed_addr_n; - seed_atype_r <= seed_atype_n; - out_req_rw_r <= out_req_rw_n; - out_req_mask_r <= out_req_mask_n; + seed_atype_r <= seed_atype_n; + addr_matches_r <= addr_matches_n; + out_req_valid_r <= out_req_valid_n; + out_req_mask_r <= out_req_mask_n; + out_req_rw_r <= out_req_rw_n; out_req_addr_r <= out_req_addr_n; out_req_atype_r <= out_req_atype_n; out_req_byteen_r <= out_req_byteen_n; @@ -159,84 +172,77 @@ module VX_mem_coalescer #( end end - logic [NUM_REQS-1:0] addr_matches; + wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches_r; + + reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged; + reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] req_data_merged; always @(*) begin - addr_matches = '0; + req_byteen_merged = '0; + req_data_merged = 'x; for (integer i = 0; i < OUT_REQS; ++i) begin - for (integer j = 0; j < BATCH_SIZE; j++) begin - if (in_addr_base[BATCH_SIZE * i + j] == seed_addr_r[i]) begin - addr_matches[BATCH_SIZE * i + j] = 1; + for (integer j = 0; j < DATA_RATIO; ++j) begin + if (current_pmask[i * DATA_RATIO + j]) begin + for (integer k = 0; k < DATA_IN_SIZE; ++k) begin + if (in_req_byteen[DATA_RATIO * i + j][k]) begin + req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1; + req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8]; + end + end end end end end - wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches; + wire [OUT_REQS * DATA_RATIO - 1:0] pending_mask; + for (genvar i = 0; i < OUT_REQS * DATA_RATIO; ++i) begin + assign pending_mask[i] = in_req_mask[i] && ~addr_matches_r[i] && ~processed_mask_r[i]; + end + wire batch_completed = ~(| pending_mask); always @(*) begin state_n = state_r; + out_req_valid_n = out_req_valid_r; - seed_addr_n = seed_addr_r; - seed_atype_n = seed_atype_r; - out_req_rw_n = out_req_rw_r; - out_req_mask_n = out_req_mask_r; + out_req_mask_n = out_req_mask_r; + out_req_rw_n = out_req_rw_r; out_req_addr_n = out_req_addr_r; out_req_atype_n = out_req_atype_r; out_req_byteen_n = out_req_byteen_r; out_req_data_n = out_req_data_r; out_req_tag_n = out_req_tag_r; + processed_mask_n = processed_mask_r; in_req_ready_n = 0; case (state_r) - STATE_SETUP: begin - // find the next seed address - for (integer i = 0; i < OUT_REQS; ++i) begin - seed_addr_n[i] = in_addr_base[seed_idx[i]]; - seed_atype_n[i] = in_req_atype[seed_idx[i]]; - end + STATE_SETUP: begin // wait for pending outgoing request to submit if (out_req_valid && out_req_ready) begin out_req_valid_n = 0; end - if (in_req_valid && ~out_req_valid_n && ~ibuf_full) begin + if (in_req_valid && ~out_req_valid_n && ~ibuf_full) begin state_n = STATE_SEND; end end default/*STATE_SEND*/: begin out_req_valid_n = 1; - out_req_rw_n = in_req_rw; - out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; - in_req_ready_n = 1; - out_req_byteen_n = '0; - out_req_data_n = 'x; - for (integer i = 0; i < OUT_REQS; ++i) begin - for (integer j = 0; j < BATCH_SIZE; j++) begin - if (in_req_mask[BATCH_SIZE * i + j]) begin - if (addr_matches[BATCH_SIZE * i + j]) begin - for (integer k = 0; k < DATA_IN_SIZE; ++k) begin - if (in_req_byteen[BATCH_SIZE * i + j][k]) begin - out_req_byteen_n[i][in_addr_offset[BATCH_SIZE * i + j] * DATA_IN_SIZE + k +: 1] = 1'b1; - out_req_data_n[i][in_addr_offset[BATCH_SIZE * i + j] * DATA_IN_WIDTH + k * 8 +: 8] = in_req_data[BATCH_SIZE * i + j][k * 8 +: 8]; - end - end - end else begin - if (!processed_mask_r[BATCH_SIZE * i + j]) begin - in_req_ready_n = 0; - end - end - end - end - out_req_mask_n[i] = batch_valid_r[i]; - out_req_addr_n[i] = seed_addr_r[i]; - out_req_atype_n[i]= seed_atype_r[i]; - end - if (in_req_ready_n) begin + out_req_mask_n = batch_valid_r; + out_req_rw_n = in_req_rw; + out_req_addr_n = seed_addr_r; + out_req_atype_n = seed_atype_r; + out_req_byteen_n= req_byteen_merged; + out_req_data_n = req_data_merged; + out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; + + in_req_ready_n = batch_completed; + + if (batch_completed) begin processed_mask_n = '0; end else begin processed_mask_n = processed_mask_r | current_pmask; end + state_n = STATE_SETUP; end endcase @@ -246,13 +252,15 @@ module VX_mem_coalescer #( wire out_rsp_eop; - assign ibuf_push = (state_r == STATE_SEND) && ~in_req_rw; + wire req_sent = (state_r == STATE_SEND); + + assign ibuf_push = req_sent && ~in_req_rw; assign ibuf_pop = out_rsp_fire && out_rsp_eop; - assign ibuf_raddr = out_rsp_tag[QUEUE_ADDRW-1:0]; + assign ibuf_raddr = out_rsp_tag[QUEUE_ADDRW-1:0]; wire [TAG_ID_WIDTH-1:0] ibuf_din_tag = in_req_tag[TAG_ID_WIDTH-1:0]; - wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] ibuf_din_offset = in_addr_offset; - wire [NUM_REQS-1:0] ibuf_din_pmask = current_pmask; + wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] ibuf_din_offset = in_addr_offset; + wire [NUM_REQS-1:0] ibuf_din_pmask = current_pmask; assign ibuf_din = {ibuf_din_tag, ibuf_din_pmask, ibuf_din_offset}; @@ -286,7 +294,7 @@ module VX_mem_coalescer #( // unmerge responses - reg [QUEUE_SIZE-1:0][OUT_REQS-1:0] rsp_rem_mask; + reg [QUEUE_SIZE-1:0][OUT_REQS-1:0] rsp_rem_mask; wire [OUT_REQS-1:0] rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~out_rsp_mask; assign out_rsp_eop = ~(| rsp_rem_mask_n); @@ -299,21 +307,19 @@ module VX_mem_coalescer #( end end - wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] ibuf_dout_offset; - reg [NUM_REQS-1:0] ibuf_dout_pmask; + wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] ibuf_dout_offset; + wire [NUM_REQS-1:0] ibuf_dout_pmask; wire [TAG_ID_WIDTH-1:0] ibuf_dout_tag; assign {ibuf_dout_tag, ibuf_dout_pmask, ibuf_dout_offset} = ibuf_dout; - - logic [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_rsp_data_n; - logic [NUM_REQS-1:0] in_rsp_mask_n; - always @(*) begin - for (integer i = 0; i < OUT_REQS; ++i) begin - for (integer j = 0; j < BATCH_SIZE; j++) begin - in_rsp_mask_n[BATCH_SIZE * i + j] = out_rsp_mask[i] && ibuf_dout_pmask[BATCH_SIZE * i + j]; - in_rsp_data_n[BATCH_SIZE * i + j] = out_rsp_data[i][ibuf_dout_offset[BATCH_SIZE * i + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH]; - end + wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_rsp_data_n; + wire [NUM_REQS-1:0] in_rsp_mask_n; + + for (genvar i = 0; i < OUT_REQS; ++i) begin + for (genvar j = 0; j < DATA_RATIO; ++j) begin + assign in_rsp_mask_n[i * DATA_RATIO + j] = out_rsp_mask[i] && ibuf_dout_pmask[i * DATA_RATIO + j]; + assign in_rsp_data_n[i * DATA_RATIO + j] = out_rsp_data[i][ibuf_dout_offset[i * DATA_RATIO + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH]; end end @@ -335,11 +341,11 @@ module VX_mem_coalescer #( assign out_rsp_uuid = '0; end - reg [NUM_REQS-1:0][BATCH_SIZE_W-1:0] out_req_offset; + reg [NUM_REQS-1:0][DATA_RATIO_W-1:0] out_req_offset; reg [NUM_REQS-1:0] out_req_pmask; always @(posedge clk) begin - if (ibuf_push) begin + if (req_sent) begin out_req_offset <= ibuf_din_offset; out_req_pmask <= ibuf_din_pmask; end @@ -351,30 +357,30 @@ module VX_mem_coalescer #( if (out_req_fire) begin if (out_req_rw) begin `TRACE(1, ("%d: %s-out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); + `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); `TRACE(1, (", atype=")); - `TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS); + `TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS); `TRACE(1, (", byteen=")); `TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS); `TRACE(1, (", data=")); - `TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS); + `TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS); end else begin `TRACE(1, ("%d: %s-out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); `TRACE(1, (", atype=")); `TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS); end - `TRACE(1, (", offset=")); + `TRACE(1, (", offset=")); `TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS); - `TRACE(1, (", pmask=%b, tag=0x%0h (#%0d)\n", out_req_pmask, out_req_tag, out_req_uuid)); + `TRACE(1, (", pmask=%b, tag=0x%0h (#%0d)\n", out_req_pmask, out_req_tag, out_req_uuid)); if ($countones(out_req_pmask) > 1) begin - `TRACE(1, ("%t: *** %s: coalescing=%b (#%0d)\n", $time, INSTANCE_ID, out_req_pmask, out_req_uuid)); - end + `TRACE(1, ("%t: *** %s: coalesced=%d (#%0d)\n", $time, INSTANCE_ID, $countones(out_req_pmask), out_req_uuid)); + end end if (out_rsp_fire) begin `TRACE(1, ("%d: %s-out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)); `TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS); - `TRACE(1, (", offset=")); + `TRACE(1, (", offset=")); `TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS); `TRACE(1, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid)); end diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index b05492231..aa3ef9b2f 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,7 +23,7 @@ module VX_mem_scheduler #( parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE), parameter ATYPE_WIDTH = 1, parameter TAG_WIDTH = 8, - parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID + parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID parameter CORE_QUEUE_SIZE= 8, parameter MEM_QUEUE_SIZE= CORE_QUEUE_SIZE, parameter RSP_PARTIAL = 0, @@ -54,7 +54,7 @@ module VX_mem_scheduler #( input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data, input wire [TAG_WIDTH-1:0] core_req_tag, output wire core_req_ready, - output wire core_req_empty, + output wire core_req_empty, output wire core_req_sent, // Core response @@ -81,7 +81,7 @@ module VX_mem_scheduler #( input wire mem_rsp_valid, input wire [MEM_CHANNELS-1:0] mem_rsp_mask, input wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_rsp_data, - input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag, + input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag, output wire mem_rsp_ready ); localparam BATCH_SEL_WIDTH = `UP(MEM_BATCH_BITS); @@ -110,7 +110,7 @@ module VX_mem_scheduler #( wire reqq_valid; wire [CORE_REQS-1:0] reqq_mask; - wire reqq_rw; + wire reqq_rw; wire [CORE_REQS-1:0][WORD_SIZE-1:0] reqq_byteen; wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] reqq_addr; wire [CORE_REQS-1:0][ATYPE_WIDTH-1:0] reqq_atype; @@ -118,7 +118,7 @@ module VX_mem_scheduler #( wire [REQQ_TAG_WIDTH-1:0] reqq_tag; wire reqq_ready; - wire reqq_valid_s; + wire reqq_valid_s; wire [MERGED_REQS-1:0] reqq_mask_s; wire reqq_rw_s; wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s; @@ -139,9 +139,9 @@ module VX_mem_scheduler #( wire mem_req_ready_s; wire mem_rsp_valid_s; - wire [CORE_REQS-1:0] mem_rsp_mask_s; - wire [CORE_REQS-1:0][WORD_WIDTH-1:0] mem_rsp_data_s; - wire [REQQ_TAG_WIDTH-1:0] mem_rsp_tag_s; + wire [CORE_CHANNELS-1:0] mem_rsp_mask_s; + wire [CORE_CHANNELS-1:0][WORD_WIDTH-1:0] mem_rsp_data_s; + wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s; wire mem_rsp_ready_s; wire crsp_valid; @@ -159,7 +159,7 @@ module VX_mem_scheduler #( wire ibuf_ready = (core_req_rw || ~ibuf_full); wire reqq_valid_in = core_req_valid && ibuf_ready; wire reqq_ready_in; - + wire [REQQ_TAG_WIDTH-1:0] reqq_tag_u; if (UUID_WIDTH != 0) begin assign reqq_tag_u = {core_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; @@ -169,7 +169,7 @@ module VX_mem_scheduler #( VX_elastic_buffer #( .DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + ATYPE_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH), - .SIZE (CORE_QUEUE_SIZE), + .SIZE (CORE_QUEUE_SIZE), .OUT_REG (1) ) req_queue ( .clk (clk), @@ -188,7 +188,7 @@ module VX_mem_scheduler #( // no pending requests assign core_req_empty = !reqq_valid && ibuf_empty; - // notify request submisison + // notify request submisison assign core_req_sent = reqq_valid && reqq_ready; // Index buffer /////////////////////////////////////////////////////////// @@ -219,15 +219,15 @@ module VX_mem_scheduler #( `UNUSED_VAR (ibuf_empty) - // Handle memory coalescing /////////////////////////////////////////////// + // Handle memory coalescing /////////////////////////////////////////////// if (COALESCE_ENABLE) begin - + `RESET_RELAY (coalescer_reset, reset); VX_mem_coalescer #( .INSTANCE_ID ($sformatf("%s-coalescer", INSTANCE_ID)), - .NUM_REQS (CORE_REQS), + .NUM_REQS (CORE_REQS), .DATA_IN_SIZE (WORD_SIZE), .DATA_OUT_SIZE (LINE_SIZE), .ADDR_WIDTH (ADDR_WIDTH), @@ -238,7 +238,7 @@ module VX_mem_scheduler #( ) coalescer ( .clk (clk), .reset (coalescer_reset), - + // Input request .in_req_valid (reqq_valid), .in_req_mask (reqq_mask), @@ -280,7 +280,7 @@ module VX_mem_scheduler #( assign reqq_valid_s = reqq_valid; assign reqq_mask_s = reqq_mask; - assign reqq_rw_s = reqq_rw; + assign reqq_rw_s = reqq_rw; assign reqq_byteen_s= reqq_byteen; assign reqq_addr_s = reqq_addr; assign reqq_atype_s = reqq_atype; @@ -292,18 +292,18 @@ module VX_mem_scheduler #( assign mem_rsp_mask_s = mem_rsp_mask; assign mem_rsp_data_s = mem_rsp_data; assign mem_rsp_tag_s = mem_rsp_tag; - assign mem_rsp_ready = mem_rsp_ready_s; + assign mem_rsp_ready = mem_rsp_ready_s; end // Handle memory requests ///////////////////////////////////////////////// wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b; - wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b; + wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b; - + wire [BATCH_SEL_WIDTH-1:0] req_batch_idx; for (genvar i = 0; i < MEM_BATCHES; ++i) begin @@ -331,14 +331,19 @@ module VX_mem_scheduler #( assign mem_req_addr_s = mem_req_addr_b[req_batch_idx]; assign mem_req_atype_s = mem_req_atype_b[req_batch_idx]; assign mem_req_data_s = mem_req_data_b[req_batch_idx]; - + if (MEM_BATCHES != 1) begin reg [MEM_BATCH_BITS-1:0] req_batch_idx_r; + + wire is_degenerate_batch = ~(| mem_req_mask_s); + wire mem_req_valid_b = reqq_valid_s && ~is_degenerate_batch; + wire mem_req_ready_b = mem_req_ready_s || is_degenerate_batch; + always @(posedge clk) begin if (reset) begin req_batch_idx_r <= '0; end else begin - if (reqq_valid_s && mem_req_ready_s) begin + if (reqq_valid_s && mem_req_ready_b) begin if (req_sent_all) begin req_batch_idx_r <= '0; end else begin @@ -352,10 +357,10 @@ module VX_mem_scheduler #( wire [MEM_BATCHES-1:0][MEM_BATCH_BITS-1:0] req_batch_idxs; wire [MEM_BATCH_BITS-1:0] req_batch_idx_last; - for (genvar i = 0; i < MEM_BATCHES; ++i) begin + for (genvar i = 0; i < MEM_BATCHES; ++i) begin assign req_batch_valids[i] = (| mem_req_mask_b[i]); assign req_batch_idxs[i] = MEM_BATCH_BITS'(i); - end + end VX_find_first #( .N (MEM_BATCHES), @@ -368,21 +373,22 @@ module VX_mem_scheduler #( `UNUSED_PIN (valid_out) ); - assign req_batch_idx = req_batch_idx_r; - assign req_sent_all = mem_req_ready_s && (req_batch_idx_r == req_batch_idx_last); + assign mem_req_valid_s = mem_req_valid_b; + assign req_batch_idx = req_batch_idx_r; + assign req_sent_all = mem_req_ready_b && (req_batch_idx_r == req_batch_idx_last); assign mem_req_tag_s = {reqq_tag_s, req_batch_idx}; end else begin + assign mem_req_valid_s = reqq_valid_s; assign req_batch_idx = '0; assign req_sent_all = mem_req_ready_s; assign mem_req_tag_s = reqq_tag_s; end - assign mem_req_valid_s = reqq_valid_s; assign reqq_ready_s = req_sent_all; - + VX_elastic_buffer #( .DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + ATYPE_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH), .SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)), @@ -415,7 +421,7 @@ module VX_mem_scheduler #( localparam j = r % CORE_CHANNELS; assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j]; end - + assign rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~curr_mask; wire rsp_complete = ~(| rsp_rem_mask_n); @@ -457,19 +463,19 @@ module VX_mem_scheduler #( end else begin - reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; - reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n; - reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; + reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; + reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n; + reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; always @(*) begin - rsp_store_n = rsp_store[ibuf_raddr]; + rsp_store_n = rsp_store[ibuf_raddr]; for (integer i = 0; i < CORE_CHANNELS; ++i) begin if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin rsp_store_n[(rsp_batch_idx * CORE_CHANNELS + i) * WORD_WIDTH +: WORD_WIDTH] = mem_rsp_data_s[i]; end end - end - + end + always @(posedge clk) begin if (ibuf_push) begin rsp_orig_mask[ibuf_waddr] <= core_req_mask; @@ -490,10 +496,11 @@ module VX_mem_scheduler #( end assign mem_rsp_ready_s = crsp_ready || ~rsp_complete; + end if (UUID_WIDTH != 0) begin - assign crsp_tag = {mem_rsp_tag_s[REQQ_TAG_WIDTH-1 -: UUID_WIDTH], ibuf_dout}; + assign crsp_tag = {mem_rsp_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH], ibuf_dout}; end else begin assign crsp_tag = ibuf_dout; end @@ -509,11 +516,11 @@ module VX_mem_scheduler #( ) rsp_buf ( .clk (clk), .reset (reset), - .valid_in (crsp_valid), + .valid_in (crsp_valid), .ready_in (crsp_ready), .data_in ({crsp_mask, crsp_sop, crsp_eop, crsp_data, crsp_tag}), .data_out ({core_rsp_mask, core_rsp_sop, core_rsp_eop, core_rsp_data, core_rsp_tag}), - .valid_out (core_rsp_valid), + .valid_out (core_rsp_valid), .ready_out (core_rsp_ready) ); @@ -541,14 +548,14 @@ module VX_mem_scheduler #( end end - if (ibuf_push) begin + if (ibuf_push) begin pending_reqs_time[ibuf_waddr] <= {req_dbg_uuid, ibuf_din, $time}; end for (integer i = 0; i < CORE_QUEUE_SIZE; ++i) begin if (pending_reqs_valid[i]) begin `ASSERT(($time - pending_reqs_time[i][63:0]) < STALL_TIMEOUT, - ("%t: *** %s response timeout: tag=0x%0h (#%0d)", + ("%t: *** %s response timeout: tag=0x%0h (#%0d)", $time, INSTANCE_ID, pending_reqs_time[i][64 +: TAG_ID_WIDTH], pending_reqs_time[i][64+TAG_ID_WIDTH +: `UP(UUID_WIDTH)])); end end @@ -563,8 +570,8 @@ module VX_mem_scheduler #( wire [`UP(UUID_WIDTH)-1:0] rsp_dbg_uuid; if (UUID_WIDTH != 0) begin - assign mem_req_dbg_uuid = mem_req_tag_s[REQQ_TAG_WIDTH-1 -: UUID_WIDTH]; - assign mem_rsp_dbg_uuid = mem_rsp_tag_s[REQQ_TAG_WIDTH-1 -: UUID_WIDTH]; + assign mem_req_dbg_uuid = mem_req_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; + assign mem_rsp_dbg_uuid = mem_rsp_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; assign rsp_dbg_uuid = core_rsp_tag[TAG_WIDTH-1 -: UUID_WIDTH]; end else begin assign mem_req_dbg_uuid = '0; @@ -572,25 +579,27 @@ module VX_mem_scheduler #( assign rsp_dbg_uuid = '0; end + wire [CORE_QUEUE_ADDRW-1:0] ibuf_waddr_s = mem_req_tag_s[MEM_BATCH_BITS +: CORE_QUEUE_ADDRW]; + wire mem_req_fire_s = mem_req_valid_s && mem_req_ready_s; always @(posedge clk) begin if (core_req_fire) begin if (core_req_rw) begin `TRACE(1, ("%d: %s-core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS); + `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS); `TRACE(1, (", byteen=")); `TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS); `TRACE(1, (", data=")); - `TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS); + `TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS); end else begin `TRACE(1, ("%d: %s-core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS); - end - `TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid)); + `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS); + end + `TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid)); end if (core_rsp_valid && core_rsp_ready) begin - `TRACE(1, ("%d: %s-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)); + `TRACE(1, ("%d: %s-core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)); `TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS); `TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid)); end @@ -601,20 +610,20 @@ module VX_mem_scheduler #( `TRACE(1, (", byteen=")); `TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS); `TRACE(1, (", data=")); - `TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS); + `TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS); end else begin `TRACE(1, ("%d: %s-mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)); - `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS); + `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS); end - `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr, req_batch_idx, mem_req_dbg_uuid)); - end + `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid)); + end if (mem_rsp_fire_s) begin - `TRACE(1, ("%d: %s-mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)); + `TRACE(1, ("%d: %s-mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)); `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS); `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)); end end `endif - + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_onehot_mux.sv b/hw/rtl/libs/VX_onehot_mux.sv index 9c8fdb9cf..8d9b87c8e 100644 --- a/hw/rtl/libs/VX_onehot_mux.sv +++ b/hw/rtl/libs/VX_onehot_mux.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,131 +19,36 @@ module VX_onehot_mux #( parameter N = 1, parameter MODEL = 1 ) ( - input wire [N-1:0][DATAW-1:0] data_in, - input wire [N-1:0] sel_in, + input wire [N-1:0][DATAW-1:0] data_in, + input wire [N-1:0] sel_in, output wire [DATAW-1:0] data_out -); +); if (N == 1) begin `UNUSED_VAR (sel_in) assign data_out = data_in; - end else if (N == 2) begin - `UNUSED_VAR (sel_in) - assign data_out = sel_in[0] ? data_in[0] : data_in[1]; - end else if (N == 3) begin + end else if (MODEL == 1) begin + wire [N-1:0][DATAW-1:0] mask; + for (genvar i = 0; i < N; ++i) begin + assign mask[i] = {DATAW{sel_in[i]}} & data_in[i]; + end + for (genvar i = 0; i < DATAW; ++i) begin + wire [N-1:0] gather; + for (genvar j = 0; j < N; ++j) begin + assign gather[j] = mask[j][i]; + end + assign data_out[i] = (| gather); + end + end else if (MODEL == 2) begin reg [DATAW-1:0] data_out_r; always @(*) begin - case (sel_in) - 3'b001: data_out_r = data_in[0]; - 3'b010: data_out_r = data_in[1]; - 3'b100: data_out_r = data_in[2]; - default: data_out_r = 'x; - endcase - end - assign data_out = data_out_r; - end else if (N == 4) begin - reg [DATAW-1:0] data_out_r; - always @(*) begin - case (sel_in) - 4'b0001: data_out_r = data_in[0]; - 4'b0010: data_out_r = data_in[1]; - 4'b0100: data_out_r = data_in[2]; - 4'b1000: data_out_r = data_in[3]; - default: data_out_r = 'x; - endcase - end - assign data_out = data_out_r; - end else if (N == 5) begin - reg [DATAW-1:0] data_out_r; - always @(*) begin - case (sel_in) - 5'b00001: data_out_r = data_in[0]; - 5'b00010: data_out_r = data_in[1]; - 5'b00100: data_out_r = data_in[2]; - 5'b01000: data_out_r = data_in[3]; - 5'b10000: data_out_r = data_in[4]; - default: data_out_r = 'x; - endcase - end - assign data_out = data_out_r; - end else if (N == 6) begin - reg [DATAW-1:0] data_out_r; - always @(*) begin - case (sel_in) - 6'b000001: data_out_r = data_in[0]; - 6'b000010: data_out_r = data_in[1]; - 6'b000100: data_out_r = data_in[2]; - 6'b001000: data_out_r = data_in[3]; - 6'b010000: data_out_r = data_in[4]; - 6'b100000: data_out_r = data_in[5]; - default: data_out_r = 'x; - endcase - end - assign data_out = data_out_r; - end else if (N == 7) begin - reg [DATAW-1:0] data_out_r; - always @(*) begin - case (sel_in) - 7'b0000001: data_out_r = data_in[0]; - 7'b0000010: data_out_r = data_in[1]; - 7'b0000100: data_out_r = data_in[2]; - 7'b0001000: data_out_r = data_in[3]; - 7'b0010000: data_out_r = data_in[4]; - 7'b0100000: data_out_r = data_in[5]; - 7'b1000000: data_out_r = data_in[6]; - default: data_out_r = 'x; - endcase - end - assign data_out = data_out_r; - end else if (N == 8) begin - reg [DATAW-1:0] data_out_r; - always @(*) begin - case (sel_in) - 8'b00000001: data_out_r = data_in[0]; - 8'b00000010: data_out_r = data_in[1]; - 8'b00000100: data_out_r = data_in[2]; - 8'b00001000: data_out_r = data_in[3]; - 8'b00010000: data_out_r = data_in[4]; - 8'b00100000: data_out_r = data_in[5]; - 8'b01000000: data_out_r = data_in[6]; - 8'b10000000: data_out_r = data_in[7]; - default: data_out_r = 'x; - endcase - end - assign data_out = data_out_r; - end else begin - if (MODEL == 1) begin - reg [DATAW-1:0] data_out_r; - always @(*) begin - data_out_r = 'x; - for (integer i = 0; i < N; ++i) begin - if (sel_in[i]) begin - data_out_r = data_in[i]; - end + data_out_r = 'x; + for (integer i = 0; i < N; ++i) begin + if (sel_in[i]) begin + data_out_r = data_in[i]; end end - assign data_out = data_out_r; - end else if (MODEL == 2) begin - reg [DATAW-1:0] data_out_r; - always @(*) begin - data_out_r = '0; - for (integer i = 0; i < N; ++i) begin - data_out_r |= {DATAW{sel_in[i]}} & data_in[i]; - end - end - assign data_out = data_out_r; - end else if (MODEL == 3) begin - wire [N-1:0][DATAW-1:0] mask; - for (genvar i = 0; i < N; ++i) begin - assign mask[i] = {DATAW{sel_in[i]}} & data_in[i]; - end - for (genvar i = 0; i < DATAW; ++i) begin - wire [N-1:0] gather; - for (genvar j = 0; j < N; ++j) begin - assign gather[j] = mask[j][i]; - end - assign data_out[i] = (| gather); - end end + assign data_out = data_out_r; end endmodule diff --git a/hw/rtl/libs/VX_pe_serializer.sv b/hw/rtl/libs/VX_pe_serializer.sv index 54de665ed..7060c258c 100644 --- a/hw/rtl/libs/VX_pe_serializer.sv +++ b/hw/rtl/libs/VX_pe_serializer.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,8 +15,8 @@ `TRACING_OFF module VX_pe_serializer #( - parameter NUM_LANES = 1, - parameter NUM_PES = 1, + parameter NUM_LANES = 1, + parameter NUM_PES = 1, parameter LATENCY = 1, parameter DATA_IN_WIDTH = 1, parameter DATA_OUT_WIDTH = 1, @@ -28,12 +28,12 @@ module VX_pe_serializer #( // input input wire valid_in, - input wire [NUM_LANES-1:0][DATA_IN_WIDTH-1:0] data_in, + input wire [NUM_LANES-1:0][DATA_IN_WIDTH-1:0] data_in, input wire [TAG_WIDTH-1:0] tag_in, output wire ready_in, // PE - output wire pe_enable, + output wire pe_enable, output wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in, input wire [NUM_PES-1:0][DATA_OUT_WIDTH-1:0] pe_data_out, @@ -43,6 +43,7 @@ module VX_pe_serializer #( output wire [TAG_WIDTH-1:0] tag_out, input wire ready_out ); + wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s; wire valid_out_s; wire [TAG_WIDTH-1:0] tag_out_s; wire enable; @@ -59,6 +60,17 @@ module VX_pe_serializer #( .data_out ({valid_out_s, tag_out_s}) ); + VX_pipe_register #( + .DATAW (NUM_PES * DATA_IN_WIDTH), + .DEPTH (PE_REG) + ) pe_reg ( + .clk (clk), + .reset (reset), + .enable (enable), + .data_in (pe_data_in_s), + .data_out (pe_data_in) + ); + if (NUM_LANES != NUM_PES) begin localparam BATCH_SIZE = NUM_LANES / NUM_PES; @@ -67,6 +79,10 @@ module VX_pe_serializer #( reg [BATCH_SIZEW-1:0] batch_in_idx; reg [BATCH_SIZEW-1:0] batch_out_idx; + for (genvar i = 0; i < NUM_PES; ++i) begin + assign pe_data_in_s[i] = data_in[batch_in_idx * NUM_PES + i]; + end + always @(posedge clk) begin if (reset) begin batch_in_idx <= '0; @@ -81,45 +97,29 @@ module VX_pe_serializer #( end end - wire batch_in_done = (batch_in_idx == BATCH_SIZEW'(BATCH_SIZE-1)); + wire batch_in_done = (batch_in_idx == BATCH_SIZEW'(BATCH_SIZE-1)); wire batch_out_done = (batch_out_idx == BATCH_SIZEW'(BATCH_SIZE-1)); - wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s; - for (genvar i = 0; i < NUM_PES; ++i) begin - assign pe_data_in_s[i] = data_in[batch_in_idx * NUM_PES + i]; - end - - VX_pipe_register #( - .DATAW (NUM_PES * DATA_IN_WIDTH), - .DEPTH (PE_REG) - ) pe_reg ( - .clk (clk), - .reset (reset), - .enable (enable), - .data_in (pe_data_in_s), - .data_out (pe_data_in) - ); - reg valid_out_r; reg [BATCH_SIZE-1:0][NUM_PES-1:0][DATA_OUT_WIDTH-1:0] data_out_r; reg [TAG_WIDTH-1:0] tag_out_r; - wire valid_out_b = valid_out_s && batch_out_done; - wire enable_r = ready_out || ~valid_out; + wire valid_out_b = valid_out_s && batch_out_done; + wire ready_out_b = ready_out || ~valid_out; always @(posedge clk) begin if (reset) begin valid_out_r <= 1'b0; - end else if (enable_r) begin + end else if (ready_out_b) begin valid_out_r <= valid_out_b; end - if (enable_r) begin + if (ready_out_b) begin data_out_r[batch_out_idx] <= pe_data_out; tag_out_r <= tag_out_s; end end - - assign enable = (enable_r || ~valid_out_b); + + assign enable = ready_out_b || ~valid_out_b; assign ready_in = enable && batch_in_done; assign pe_enable = enable; @@ -130,16 +130,17 @@ module VX_pe_serializer #( end else begin + assign pe_data_in_s = data_in; + assign enable = ready_out || ~valid_out; assign ready_in = enable; assign pe_enable = enable; - assign pe_data_in= data_in; - assign valid_out = valid_out_s; + assign valid_out = valid_out_s; assign data_out = pe_data_out; assign tag_out = tag_out_s; - + end endmodule diff --git a/hw/rtl/libs/VX_pending_size.sv b/hw/rtl/libs/VX_pending_size.sv index 4f4006179..031e57695 100644 --- a/hw/rtl/libs/VX_pending_size.sv +++ b/hw/rtl/libs/VX_pending_size.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,44 +13,53 @@ `include "VX_platform.vh" -`TRACING_OFF +//`TRACING_OFF module VX_pending_size #( - parameter SIZE = 1, - parameter INCRW = 1, - parameter DECRW = 1, - parameter SIZEW = `CLOG2(SIZE+1) + parameter SIZE = 1, + parameter INCRW = 1, + parameter DECRW = 1, + parameter ALM_FULL = (SIZE - 1), + parameter ALM_EMPTY = 1, + parameter SIZEW = `CLOG2(SIZE+1) ) ( input wire clk, input wire reset, input wire [INCRW-1:0] incr, input wire [DECRW-1:0] decr, output wire empty, + output wire alm_empty, output wire full, + output wire alm_full, output wire [SIZEW-1:0] size ); - `STATIC_ASSERT(INCRW <= SIZEW, ("invalid parameter")) - `STATIC_ASSERT(DECRW <= SIZEW, ("invalid parameter")) + `STATIC_ASSERT(INCRW <= SIZEW, ("invalid parameter: %d vs %d", INCRW, SIZEW)) + `STATIC_ASSERT(DECRW <= SIZEW, ("invalid parameter: %d vs %d", DECRW, SIZEW)) localparam ADDRW = `LOG2UP(SIZE); - reg empty_r; - reg full_r; + reg empty_r, alm_empty_r; + reg full_r, alm_full_r; if (INCRW != 1 || DECRW != 1) begin reg [SIZEW-1:0] size_r; - wire [SIZEW-1:0] size_n; - assign size_n = size_r + SIZEW'(incr) - SIZEW'(decr); + wire [SIZEW-1:0] size_n = size_r + SIZEW'(incr) - SIZEW'(decr); always @(posedge clk) begin - if (reset) begin - size_r <= '0; - empty_r <= 1; - full_r <= 0; + if (reset) begin + empty_r <= 1; + alm_empty_r <= 1; + alm_full_r <= 0; + full_r <= 0; + size_r <= '0; end else begin - size_r <= size_n; - empty_r <= (size_n == SIZEW'(0)); - full_r <= (size_n == SIZEW'(SIZE)); + `ASSERT((SIZEW'(incr) >= SIZEW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow")); + `ASSERT((SIZEW'(incr) <= SIZEW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow")); + size_r <= size_n; + empty_r <= (size_n == SIZEW'(0)); + alm_empty_r <= (size_n == SIZEW'(ALM_EMPTY)); + full_r <= (size_n == SIZEW'(SIZE)); + alm_full_r <= (size_n == SIZEW'(ALM_FULL)); end end @@ -59,30 +68,47 @@ module VX_pending_size #( end else begin reg [ADDRW-1:0] used_r; + wire [ADDRW-1:0] used_n; always @(posedge clk) begin - if (reset) begin - used_r <= '0; - empty_r <= 1; - full_r <= 0; - end else begin - `ASSERT(~(incr && ~decr) || ~full, ("runtime error: incrementing full counter")); - `ASSERT(~(decr && ~incr) || ~empty, ("runtime error: decrementing empty counter")); + if (reset) begin + empty_r <= 1; + alm_empty_r <= 1; + full_r <= 0; + alm_full_r <= 0; + used_r <= '0; + end else begin + `ASSERT(~(incr && ~decr) || ~full, ("runtime error: counter overflow")); + `ASSERT(~(decr && ~incr) || ~empty, ("runtime error: counter underflow")); if (incr) begin if (~decr) begin empty_r <= 0; + if (used_r == ADDRW'(ALM_EMPTY)) + alm_empty_r <= 0; if (used_r == ADDRW'(SIZE-1)) full_r <= 1; + if (used_r == ADDRW'(ALM_FULL-1)) + alm_full_r <= 1; end end else if (decr) begin - full_r <= 0; if (used_r == ADDRW'(1)) - empty_r <= 1; + empty_r <= 1; + if (used_r == ADDRW'(ALM_EMPTY+1)) + alm_empty_r <= 1; + full_r <= 0; + if (used_r == ADDRW'(ALM_FULL)) + alm_full_r <= 0; end - used_r <= $signed(used_r) + ADDRW'($signed(2'(incr) - 2'(decr))); + used_r <= used_n; end end + if (SIZE == 2) begin + assign used_n = used_r ^ (incr ^ decr); + end else begin + assign used_n = $signed(used_r) + ADDRW'($signed(2'(incr) - 2'(decr))); + end + if (SIZE > 1) begin if (SIZEW > ADDRW) begin assign size = {full_r, used_r}; @@ -95,8 +121,10 @@ module VX_pending_size #( end - assign empty = empty_r; - assign full = full_r; - + assign empty = empty_r; + assign alm_empty = alm_empty_r; + assign alm_full = alm_full_r; + assign full = full_r; + endmodule -`TRACING_ON +//`TRACING_ON diff --git a/hw/rtl/libs/VX_priority_arbiter.sv b/hw/rtl/libs/VX_priority_arbiter.sv index e807d860b..cd4844d25 100644 --- a/hw/rtl/libs/VX_priority_arbiter.sv +++ b/hw/rtl/libs/VX_priority_arbiter.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,8 +23,8 @@ module VX_priority_arbiter #( output wire [NUM_REQS-1:0] grant_onehot, output wire grant_valid ); - if (NUM_REQS == 1) begin - + if (NUM_REQS == 1) begin + assign grant_index = '0; assign grant_onehot = requests; assign grant_valid = requests[0]; @@ -41,6 +41,6 @@ module VX_priority_arbiter #( ); end - + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index c1ee4d770..5c5f7b3b4 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,24 +16,23 @@ `TRACING_OFF module VX_rr_arbiter #( parameter NUM_REQS = 1, - parameter LOCK_ENABLE = 0, parameter MODEL = 1, parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) ) ( input wire clk, - input wire reset, - input wire [NUM_REQS-1:0] requests, + input wire reset, + input wire [NUM_REQS-1:0] requests, output wire [LOG_NUM_REQS-1:0] grant_index, - output wire [NUM_REQS-1:0] grant_onehot, + output wire [NUM_REQS-1:0] grant_onehot, output wire grant_valid, - input wire grant_unlock + input wire grant_ready ); if (NUM_REQS == 1) begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) - `UNUSED_VAR (grant_unlock) - + `UNUSED_VAR (grant_ready) + assign grant_index = '0; assign grant_onehot = requests; assign grant_valid = requests[0]; @@ -41,7 +40,7 @@ module VX_rr_arbiter #( end else if (NUM_REQS == 2) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; + reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -52,279 +51,279 @@ module VX_rr_arbiter #( endcase end - always @(posedge clk) begin - if (reset) begin + always @(posedge clk) begin + if (reset) begin state <= '0; - end else if (!LOCK_ENABLE || grant_unlock) begin + end else if (grant_ready) begin state <= grant_index_r; end end assign grant_index = grant_index_r; assign grant_onehot = grant_onehot_r; - assign grant_valid = (| requests); + assign grant_valid = (| requests); end /*else if (NUM_REQS == 3) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; + reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 5'b00_001, - 5'b01_0?1, + 5'b01_0?1, 5'b10_??1: begin grant_onehot_r = 3'b001; grant_index_r = LOG_NUM_REQS'(0); end - 5'b00_?1?, - 5'b01_010, + 5'b00_?1?, + 5'b01_010, 5'b10_?10: begin grant_onehot_r = 3'b010; grant_index_r = LOG_NUM_REQS'(1); end default: begin grant_onehot_r = 3'b100; grant_index_r = LOG_NUM_REQS'(2); end endcase end - always @(posedge clk) begin - if (reset) begin + always @(posedge clk) begin + if (reset) begin state <= '0; - end else if (!LOCK_ENABLE || grant_unlock) begin + end else if (grant_ready) begin state <= grant_index_r; end end assign grant_index = grant_index_r; assign grant_onehot = grant_onehot_r; - assign grant_valid = (| requests); + assign grant_valid = (| requests); end */else if (NUM_REQS == 4) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; + reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) - 6'b00_0001, - 6'b01_00?1, + 6'b00_0001, + 6'b01_00?1, 6'b10_0??1, 6'b11_???1: begin grant_onehot_r = 4'b0001; grant_index_r = LOG_NUM_REQS'(0); end - 6'b00_??1?, - 6'b01_0010, - 6'b10_0?10, + 6'b00_??1?, + 6'b01_0010, + 6'b10_0?10, 6'b11_??10: begin grant_onehot_r = 4'b0010; grant_index_r = LOG_NUM_REQS'(1); end - 6'b00_?10?, - 6'b01_?1??, - 6'b10_0100, + 6'b00_?10?, + 6'b01_?1??, + 6'b10_0100, 6'b11_?100: begin grant_onehot_r = 4'b0100; grant_index_r = LOG_NUM_REQS'(2); end default: begin grant_onehot_r = 4'b1000; grant_index_r = LOG_NUM_REQS'(3); end endcase end - always @(posedge clk) begin - if (reset) begin + always @(posedge clk) begin + if (reset) begin state <= '0; - end else if (!LOCK_ENABLE || grant_unlock) begin + end else if (grant_ready) begin state <= grant_index_r; end end assign grant_index = grant_index_r; assign grant_onehot = grant_onehot_r; - assign grant_valid = (| requests); + assign grant_valid = (| requests); end /*else if (NUM_REQS == 5) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; + reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) - 8'b000_00001, - 8'b001_000?1, - 8'b010_00??1, + 8'b000_00001, + 8'b001_000?1, + 8'b010_00??1, 8'b011_0???1, 8'b100_????1: begin grant_onehot_r = 5'b00001; grant_index_r = LOG_NUM_REQS'(0); end - 8'b000_???1?, - 8'b001_00010, - 8'b010_00?10, - 8'b011_0??10, + 8'b000_???1?, + 8'b001_00010, + 8'b010_00?10, + 8'b011_0??10, 8'b100_???10: begin grant_onehot_r = 5'b00010; grant_index_r = LOG_NUM_REQS'(1); end - 8'b000_??10?, - 8'b001_??1??, - 8'b010_00100, + 8'b000_??10?, + 8'b001_??1??, + 8'b010_00100, 8'b011_0?100, 8'b100_??100: begin grant_onehot_r = 5'b00100; grant_index_r = LOG_NUM_REQS'(2); end - 8'b000_?100?, - 8'b001_?10??, + 8'b000_?100?, + 8'b001_?10??, 8'b010_?1???, - 8'b011_01000, + 8'b011_01000, 8'b100_?1000: begin grant_onehot_r = 5'b01000; grant_index_r = LOG_NUM_REQS'(3); end default: begin grant_onehot_r = 5'b10000; grant_index_r = LOG_NUM_REQS'(4); end endcase end - always @(posedge clk) begin - if (reset) begin + always @(posedge clk) begin + if (reset) begin state <= '0; - end else if (!LOCK_ENABLE || grant_unlock) begin + end else if (grant_ready) begin state <= grant_index_r; end end assign grant_index = grant_index_r; assign grant_onehot = grant_onehot_r; - assign grant_valid = (| requests); + assign grant_valid = (| requests); end else if (NUM_REQS == 6) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; + reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) - 9'b000_000001, - 9'b001_0000?1, - 9'b010_000??1, - 9'b011_00???1, - 9'b100_0????1, + 9'b000_000001, + 9'b001_0000?1, + 9'b010_000??1, + 9'b011_00???1, + 9'b100_0????1, 9'b101_?????1: begin grant_onehot_r = 6'b000001; grant_index_r = LOG_NUM_REQS'(0); end - 9'b000_????1?, - 9'b001_000010, - 9'b010_000?10, - 9'b011_00??10, - 9'b100_0???10, + 9'b000_????1?, + 9'b001_000010, + 9'b010_000?10, + 9'b011_00??10, + 9'b100_0???10, 9'b101_????10: begin grant_onehot_r = 6'b000010; grant_index_r = LOG_NUM_REQS'(1); end - 9'b000_???10?, - 9'b001_???1??, - 9'b010_000100, + 9'b000_???10?, + 9'b001_???1??, + 9'b010_000100, 9'b011_00?100, - 9'b100_0??100, + 9'b100_0??100, 9'b101_???100: begin grant_onehot_r = 6'b000100; grant_index_r = LOG_NUM_REQS'(2); end - 9'b000_??100?, - 9'b001_??10??, + 9'b000_??100?, + 9'b001_??10??, 9'b010_??1???, - 9'b011_001000, - 9'b100_0?1000, + 9'b011_001000, + 9'b100_0?1000, 9'b101_??1000: begin grant_onehot_r = 6'b001000; grant_index_r = LOG_NUM_REQS'(3); end - 9'b000_?1000?, - 9'b001_?100??, + 9'b000_?1000?, + 9'b001_?100??, 9'b010_?10???, - 9'b011_?1????, - 9'b100_010000, + 9'b011_?1????, + 9'b100_010000, 9'b101_?10000: begin grant_onehot_r = 6'b010000; grant_index_r = LOG_NUM_REQS'(4); end default: begin grant_onehot_r = 6'b100000; grant_index_r = LOG_NUM_REQS'(5); end endcase end - always @(posedge clk) begin - if (reset) begin + always @(posedge clk) begin + if (reset) begin state <= '0; - end else if (!LOCK_ENABLE || grant_unlock) begin + end else if (grant_ready) begin state <= grant_index_r; end end assign grant_index = grant_index_r; assign grant_onehot = grant_onehot_r; - assign grant_valid = (| requests); + assign grant_valid = (| requests); end else if (NUM_REQS == 7) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; + reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) - 10'b000_000001, - 10'b001_0000?1, - 10'b010_000??1, - 10'b011_00???1, - 10'b100_00???1, - 10'b101_0????1, + 10'b000_000001, + 10'b001_0000?1, + 10'b010_000??1, + 10'b011_00???1, + 10'b100_00???1, + 10'b101_0????1, 10'b110_?????1: begin grant_onehot_r = 7'b0000001; grant_index_r = LOG_NUM_REQS'(0); end - 10'b000_?????1?, - 10'b001_0000010, - 10'b010_0000?10, - 10'b011_000??10, - 10'b100_00???10, - 10'b101_0????10, + 10'b000_?????1?, + 10'b001_0000010, + 10'b010_0000?10, + 10'b011_000??10, + 10'b100_00???10, + 10'b101_0????10, 10'b110_?????10: begin grant_onehot_r = 7'b0000010; grant_index_r = LOG_NUM_REQS'(1); end - 10'b000_????10?, - 10'b001_????1??, - 10'b010_0000100, + 10'b000_????10?, + 10'b001_????1??, + 10'b010_0000100, 10'b011_000?100, - 10'b100_00??100, - 10'b101_0???100, + 10'b100_00??100, + 10'b101_0???100, 10'b110_????100: begin grant_onehot_r = 7'b0000100; grant_index_r = LOG_NUM_REQS'(2); end - 10'b000_???100?, - 10'b001_???10??, + 10'b000_???100?, + 10'b001_???10??, 10'b010_???1???, - 10'b011_0001000, - 10'b100_00?1000, - 10'b101_0??1000, + 10'b011_0001000, + 10'b100_00?1000, + 10'b101_0??1000, 10'b110_???1000: begin grant_onehot_r = 7'b0001000; grant_index_r = LOG_NUM_REQS'(3); end - 10'b000_??1000?, - 10'b001_??100??, + 10'b000_??1000?, + 10'b001_??100??, 10'b010_??10???, - 10'b011_??1????, - 10'b100_0010000, - 10'b101_0?10000, + 10'b011_??1????, + 10'b100_0010000, + 10'b101_0?10000, 10'b110_??10000: begin grant_onehot_r = 7'b0010000; grant_index_r = LOG_NUM_REQS'(4); end - 10'b000_?10000?, - 10'b001_?1000??, + 10'b000_?10000?, + 10'b001_?1000??, 10'b010_?100???, - 10'b011_?10????, - 10'b100_?1?????, - 10'b101_0100000, + 10'b011_?10????, + 10'b100_?1?????, + 10'b101_0100000, 10'b110_?100000: begin grant_onehot_r = 7'b0100000; grant_index_r = LOG_NUM_REQS'(5); end default: begin grant_onehot_r = 7'b1000000; grant_index_r = LOG_NUM_REQS'(6); end endcase end - always @(posedge clk) begin - if (reset) begin + always @(posedge clk) begin + if (reset) begin state <= '0; - end else if (!LOCK_ENABLE || grant_unlock) begin + end else if (grant_ready) begin state <= grant_index_r; end end assign grant_index = grant_index_r; assign grant_onehot = grant_onehot_r; - assign grant_valid = (| requests); + assign grant_valid = (| requests); end */else if (NUM_REQS == 8) begin reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; + reg [NUM_REQS-1:0] grant_onehot_r; reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) - 11'b000_00000001, - 11'b001_000000?1, - 11'b010_00000??1, + 11'b000_00000001, + 11'b001_000000?1, + 11'b010_00000??1, 11'b011_0000???1, - 11'b100_000????1, - 11'b101_00?????1, - 11'b110_0??????1, + 11'b100_000????1, + 11'b101_00?????1, + 11'b110_0??????1, 11'b111_???????1: begin grant_onehot_r = 8'b00000001; grant_index_r = LOG_NUM_REQS'(0); end - 11'b000_??????1?, - 11'b001_00000010, - 11'b010_00000?10, + 11'b000_??????1?, + 11'b001_00000010, + 11'b010_00000?10, 11'b011_0000??10, - 11'b100_000???10, - 11'b101_00????10, - 11'b110_0?????10, + 11'b100_000???10, + 11'b101_00????10, + 11'b110_0?????10, 11'b111_??????10: begin grant_onehot_r = 8'b00000010; grant_index_r = LOG_NUM_REQS'(1); end - 11'b000_?????10?, - 11'b001_?????1??, - 11'b010_00000100, + 11'b000_?????10?, + 11'b001_?????1??, + 11'b010_00000100, 11'b011_0000?100, - 11'b100_000??100, - 11'b101_00???100, - 11'b110_0????100, + 11'b100_000??100, + 11'b101_00???100, + 11'b110_0????100, 11'b111_?????100: begin grant_onehot_r = 8'b00000100; grant_index_r = LOG_NUM_REQS'(2); end 11'b000_????100?, 11'b001_????10??, @@ -362,20 +361,20 @@ module VX_rr_arbiter #( endcase end - always @(posedge clk) begin - if (reset) begin + always @(posedge clk) begin + if (reset) begin state <= '0; - end else if (!LOCK_ENABLE || grant_unlock) begin + end else if (grant_ready) begin state <= grant_index_r; end end assign grant_index = grant_index_r; assign grant_onehot = grant_onehot_r; - assign grant_valid = (| requests); - + assign grant_valid = (| requests); + end else if (MODEL == 1) begin - + `IGNORE_UNOPTFLAT_BEGIN wire [NUM_REQS-1:0] mask_higher_pri_regs, unmask_higher_pri_regs; `IGNORE_UNOPTFLAT_END @@ -385,12 +384,18 @@ module VX_rr_arbiter #( wire [NUM_REQS-1:0] req_masked = requests & pointer_reg; - assign mask_higher_pri_regs[NUM_REQS-1:1] = mask_higher_pri_regs[NUM_REQS-2:0] | req_masked[NUM_REQS-2:0]; assign mask_higher_pri_regs[0] = 1'b0; + for (genvar i = 1; i < NUM_REQS; ++i) begin + assign mask_higher_pri_regs[i] = mask_higher_pri_regs[i-1] | req_masked[i-1]; + end + assign grant_masked[NUM_REQS-1:0] = req_masked[NUM_REQS-1:0] & ~mask_higher_pri_regs[NUM_REQS-1:0]; - assign unmask_higher_pri_regs[NUM_REQS-1:1] = unmask_higher_pri_regs[NUM_REQS-2:0] | requests[NUM_REQS-2:0]; assign unmask_higher_pri_regs[0] = 1'b0; + for (genvar i = 1; i < NUM_REQS; ++i) begin + assign unmask_higher_pri_regs[i] = unmask_higher_pri_regs[i-1] | requests[i-1]; + end + assign grant_unmasked[NUM_REQS-1:0] = requests[NUM_REQS-1:0] & ~unmask_higher_pri_regs[NUM_REQS-1:0]; wire no_req_masked = ~(|req_masked); @@ -399,7 +404,7 @@ module VX_rr_arbiter #( always @(posedge clk) begin if (reset) begin pointer_reg <= {NUM_REQS{1'b1}}; - end else if (!LOCK_ENABLE || grant_unlock) begin + end else if (grant_ready) begin if (|req_masked) begin pointer_reg <= mask_higher_pri_regs; end else if (|requests) begin @@ -410,22 +415,22 @@ module VX_rr_arbiter #( end end - assign grant_valid = (| requests); + assign grant_valid = (| requests); VX_onehot_encoder #( .N (NUM_REQS) ) onehot_encoder ( .data_in (grant_onehot), - .data_out (grant_index), + .data_out (grant_index), `UNUSED_PIN (valid_out) ); - + end else begin - + reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; - reg [NUM_REQS-1:0] state; - + reg [NUM_REQS-1:0] grant_onehot_r; + reg [NUM_REQS-1:0] state; + always @(*) begin grant_index_r = 'x; grant_onehot_r = 'x; @@ -440,18 +445,18 @@ module VX_rr_arbiter #( end end - always @(posedge clk) begin - if (reset) begin + always @(posedge clk) begin + if (reset) begin state <= '0; - end else if (!LOCK_ENABLE || grant_unlock) begin + end else if (grant_ready) begin state <= grant_index_r; end end assign grant_index = grant_index_r; assign grant_onehot = grant_onehot_r; - assign grant_valid = (| requests); + assign grant_valid = (| requests); end - + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index 1496d448e..297a23d20 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,17 +17,18 @@ module VX_sp_ram #( parameter DATAW = 1, parameter SIZE = 1, + parameter ADDR_MIN = 0, parameter WRENW = 1, parameter OUT_REG = 0, parameter NO_RWCHECK = 0, - parameter LUTRAM = 0, + parameter LUTRAM = 0, parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, parameter ADDRW = `LOG2UP(SIZE) -) ( - input wire clk, - input wire read, +) ( + input wire clk, + input wire read, input wire write, input wire [WRENW-1:0] wren, input wire [ADDRW-1:0] addr, @@ -37,6 +38,7 @@ module VX_sp_ram #( VX_dp_ram #( .DATAW (DATAW), .SIZE (SIZE), + .ADDR_MIN (ADDR_MIN), .WRENW (WRENW), .OUT_REG (OUT_REG), .NO_RWCHECK (NO_RWCHECK), diff --git a/hw/rtl/libs/VX_stream_arb.sv b/hw/rtl/libs/VX_stream_arb.sv index a687c9337..f9bb24f3d 100644 --- a/hw/rtl/libs/VX_stream_arb.sv +++ b/hw/rtl/libs/VX_stream_arb.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,7 +20,8 @@ module VX_stream_arb #( parameter DATAW = 1, parameter `STRING ARBITER = "P", parameter MAX_FANOUT = `MAX_FANOUT, - parameter OUT_BUF = 0 , + parameter OUT_BUF = 0, + parameter LUTRAM = 0, parameter NUM_REQS = `CDIV(NUM_INPUTS, NUM_OUTPUTS), parameter LOG_NUM_REQS = `CLOG2(NUM_REQS), parameter NUM_REQS_W = `UP(LOG_NUM_REQS) @@ -42,7 +43,7 @@ module VX_stream_arb #( if (NUM_OUTPUTS > 1) begin // (#inputs > #outputs) and (#outputs > 1) - + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin localparam BATCH_BEGIN = i * NUM_REQS; @@ -57,7 +58,8 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF) + .OUT_BUF (OUT_BUF), + .LUTRAM (LUTRAM) ) arb_slice ( .clk (clk), .reset (slice_reset), @@ -81,8 +83,8 @@ module VX_stream_arb #( wire [NUM_BATCHES-1:0] valid_tmp; wire [NUM_BATCHES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp; - wire [NUM_BATCHES-1:0] ready_tmp; - + wire [NUM_BATCHES-1:0] ready_tmp; + for (genvar i = 0; i < NUM_BATCHES; ++i) begin localparam BATCH_BEGIN = i * MAX_FANOUT; @@ -97,18 +99,19 @@ module VX_stream_arb #( if (MAX_FANOUT != 1) begin VX_stream_arb #( .NUM_INPUTS (BATCH_SIZE), - .NUM_OUTPUTS (1), + .NUM_OUTPUTS (1), .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF) + .OUT_BUF (3), // registered output + .LUTRAM (LUTRAM) ) fanout_slice_arb ( .clk (clk), .reset (slice_reset), .valid_in (valid_in[BATCH_END-1: BATCH_BEGIN]), .data_in (data_in[BATCH_END-1: BATCH_BEGIN]), - .ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]), - .valid_out (valid_tmp[i]), + .ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]), + .valid_out (valid_tmp[i]), .data_out (data_tmp_u), .sel_out (sel_tmp_u), .ready_out (ready_tmp[i]) @@ -123,11 +126,12 @@ module VX_stream_arb #( VX_stream_arb #( .NUM_INPUTS (NUM_BATCHES), - .NUM_OUTPUTS (1), + .NUM_OUTPUTS (1), .DATAW (DATAW + LOG_NUM_REQS2), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF) + .OUT_BUF (OUT_BUF), + .LUTRAM (LUTRAM) ) fanout_join_arb ( .clk (clk), .reset (reset), @@ -150,16 +154,15 @@ module VX_stream_arb #( wire valid_in_r; wire [DATAW-1:0] data_in_r; wire ready_in_r; - + wire arb_valid; wire [NUM_REQS_W-1:0] arb_index; wire [NUM_REQS-1:0] arb_onehot; wire arb_ready; VX_generic_arbiter #( - .NUM_REQS (NUM_REQS), - .LOCK_ENABLE (1), - .TYPE (ARBITER) + .NUM_REQS (NUM_REQS), + .TYPE (ARBITER) ) arbiter ( .clk (clk), .reset (reset), @@ -167,21 +170,30 @@ module VX_stream_arb #( .grant_valid (arb_valid), .grant_index (arb_index), .grant_onehot (arb_onehot), - .grant_unlock (arb_ready) + .grant_ready (arb_ready) ); assign valid_in_r = arb_valid; - assign data_in_r = data_in[arb_index]; assign arb_ready = ready_in_r; + VX_onehot_mux #( + .DATAW (DATAW), + .N (NUM_REQS) + ) onehot_mux ( + .data_in (data_in), + .sel_in (arb_onehot), + .data_out (data_in_r) + ); + for (genvar i = 0; i < NUM_REQS; ++i) begin - assign ready_in[i] = ready_in_r & arb_onehot[i]; + assign ready_in[i] = ready_in_r && arb_onehot[i]; end VX_elastic_buffer #( .DATAW (LOG_NUM_REQS + DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), - .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) + .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), + .LUTRAM (LUTRAM) ) out_buf ( .clk (clk), .reset (reset), @@ -214,7 +226,8 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF) + .OUT_BUF (OUT_BUF), + .LUTRAM (LUTRAM) ) arb_slice ( .clk (clk), .reset (slice_reset), @@ -248,19 +261,20 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF) + .OUT_BUF (3), // registered output + .LUTRAM (LUTRAM) ) fanout_fork_arb ( .clk (clk), .reset (reset), .valid_in (valid_in), .ready_in (ready_in), - .data_in (data_in), + .data_in (data_in), .data_out (data_tmp), .valid_out (valid_tmp), .ready_out (ready_tmp), `UNUSED_PIN (sel_out) ); - + for (genvar i = 0; i < NUM_BATCHES; ++i) begin localparam BATCH_BEGIN = i * MAX_FANOUT; @@ -271,11 +285,12 @@ module VX_stream_arb #( VX_stream_arb #( .NUM_INPUTS (1), - .NUM_OUTPUTS (BATCH_SIZE), + .NUM_OUTPUTS (BATCH_SIZE), .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF) + .OUT_BUF (OUT_BUF), + .LUTRAM (LUTRAM) ) fanout_slice_arb ( .clk (clk), .reset (slice_reset), @@ -293,25 +308,24 @@ module VX_stream_arb #( // (#inputs == 1) and (#outputs <= max_fanout) - wire [NUM_OUTPUTS-1:0] ready_in_r; - + wire [NUM_OUTPUTS-1:0] ready_in_r; + wire [NUM_OUTPUTS-1:0] arb_requests; wire arb_valid; wire [NUM_OUTPUTS-1:0] arb_onehot; wire arb_ready; VX_generic_arbiter #( - .NUM_REQS (NUM_OUTPUTS), - .LOCK_ENABLE (1), - .TYPE (ARBITER) + .NUM_REQS (NUM_OUTPUTS), + .TYPE (ARBITER) ) arbiter ( .clk (clk), .reset (reset), .requests (arb_requests), .grant_valid (arb_valid), - `UNUSED_PIN (grant_index), + `UNUSED_PIN (grant_index), .grant_onehot (arb_onehot), - .grant_unlock (arb_ready) + .grant_ready (arb_ready) ); assign arb_requests = ready_in_r; @@ -320,9 +334,10 @@ module VX_stream_arb #( for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin VX_elastic_buffer #( - .DATAW (DATAW), - .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), - .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) + .DATAW (DATAW), + .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), + .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), + .LUTRAM (LUTRAM) ) out_buf ( .clk (clk), .reset (reset), @@ -337,7 +352,7 @@ module VX_stream_arb #( end assign sel_out = 0; - + end else begin // #Inputs == #Outputs @@ -349,7 +364,8 @@ module VX_stream_arb #( VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), - .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) + .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), + .LUTRAM (LUTRAM) ) out_buf ( .clk (clk), .reset (out_buf_reset), @@ -363,6 +379,6 @@ module VX_stream_arb #( assign sel_out[i] = NUM_REQS_W'(i); end end - + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_stream_pack.sv b/hw/rtl/libs/VX_stream_pack.sv index 3c1bbdbe6..df0000307 100644 --- a/hw/rtl/libs/VX_stream_pack.sv +++ b/hw/rtl/libs/VX_stream_pack.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,9 +15,9 @@ `TRACING_OFF module VX_stream_pack #( - parameter NUM_REQS = 1, - parameter DATA_WIDTH = 1, - parameter TAG_WIDTH = 1, + parameter NUM_REQS = 1, + parameter DATA_WIDTH = 1, + parameter TAG_WIDTH = 1, parameter TAG_SEL_BITS = 0, parameter `STRING ARBITER = "P", parameter OUT_BUF = 0 @@ -38,47 +38,48 @@ module VX_stream_pack #( output wire [TAG_WIDTH-1:0] tag_out, input wire ready_out ); - localparam LOG_NUM_REQS = `CLOG2(NUM_REQS); - if (NUM_REQS > 1) begin - wire [LOG_NUM_REQS-1:0] grant_index; + wire [NUM_REQS-1:0] grant_onehot; wire grant_valid; wire grant_ready; VX_generic_arbiter #( .NUM_REQS (NUM_REQS), - .LOCK_ENABLE (1), - .TYPE (ARBITER) + .TYPE (ARBITER) ) arbiter ( .clk (clk), .reset (reset), - .requests (valid_in), + .requests (valid_in), .grant_valid (grant_valid), - .grant_index (grant_index), - `UNUSED_PIN (grant_onehot), - .grant_unlock(grant_ready) + `UNUSED_PIN (grant_index), + .grant_onehot(grant_onehot), + .grant_ready (grant_ready) ); - reg [NUM_REQS-1:0] valid_sel; - reg [NUM_REQS-1:0] ready_sel; - wire ready_unqual; + wire [TAG_WIDTH-1:0] tag_sel; - wire [TAG_WIDTH-1:0] tag_sel = tag_in[grant_index]; - - always @(*) begin - valid_sel = '0; - ready_sel = '0; - for (integer i = 0; i < NUM_REQS; ++i) begin - if (tag_in[i][TAG_SEL_BITS-1:0] == tag_sel[TAG_SEL_BITS-1:0]) begin - valid_sel[i] = valid_in[i]; - ready_sel[i] = ready_unqual; - end - end - end + VX_onehot_mux #( + .DATAW (TAG_WIDTH), + .N (NUM_REQS) + ) onehot_mux ( + .data_in (tag_in), + .sel_in (grant_onehot), + .data_out (tag_sel) + ); + + wire [NUM_REQS-1:0] tag_matches; + + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign tag_matches[i] = (tag_in[i][TAG_SEL_BITS-1:0] == tag_sel[TAG_SEL_BITS-1:0]); + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign ready_in[i] = grant_ready & tag_matches[i]; + end + + wire [NUM_REQS-1:0] mask_sel = valid_in & tag_matches; - assign grant_ready = ready_unqual; - VX_elastic_buffer #( .DATAW (NUM_REQS + TAG_WIDTH + (NUM_REQS * DATA_WIDTH)), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -86,16 +87,14 @@ module VX_stream_pack #( ) out_buf ( .clk (clk), .reset (reset), - .valid_in (grant_valid), - .data_in ({valid_sel, tag_sel, data_in}), - .ready_in (ready_unqual), + .valid_in (grant_valid), + .data_in ({mask_sel, tag_sel, data_in}), + .ready_in (grant_ready), .valid_out (valid_out), .data_out ({mask_out, tag_out, data_out}), .ready_out (ready_out) - ); + ); - assign ready_in = ready_sel; - end else begin `UNUSED_VAR (clk) diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index 735ec8474..cb0d9a179 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,7 @@ module VX_stream_xbar #( parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS), parameter ARBITER = "P", parameter OUT_BUF = 0, + parameter LUTRAM = 0, parameter MAX_FANOUT = `MAX_FANOUT, parameter PERF_CTR_BITS = `CLOG2(NUM_INPUTS+1) ) ( @@ -36,7 +37,7 @@ module VX_stream_xbar #( output wire [NUM_INPUTS-1:0] ready_in, output wire [NUM_OUTPUTS-1:0] valid_out, - output wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out, + output wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out, output wire [NUM_OUTPUTS-1:0][IN_WIDTH-1:0] sel_out, input wire [NUM_OUTPUTS-1:0] ready_out ); @@ -66,7 +67,8 @@ module VX_stream_xbar #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF) + .OUT_BUF (OUT_BUF), + .LUTRAM (LUTRAM) ) xbar_arb ( .clk (clk), .reset (slice_reset), @@ -94,7 +96,8 @@ module VX_stream_xbar #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF) + .OUT_BUF (OUT_BUF), + .LUTRAM (LUTRAM) ) xbar_arb ( .clk (clk), .reset (reset), @@ -124,13 +127,14 @@ module VX_stream_xbar #( assign ready_in = ready_out_r[sel_in]; for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin - + `RESET_RELAY (out_buf_reset, reset); VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), - .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) + .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), + .LUTRAM (LUTRAM) ) out_buf ( .clk (clk), .reset (out_buf_reset), @@ -152,7 +156,8 @@ module VX_stream_xbar #( VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), - .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) + .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), + .LUTRAM (LUTRAM) ) out_buf ( .clk (clk), .reset (reset), @@ -172,7 +177,7 @@ module VX_stream_xbar #( // compute inputs collision // we have a collision when there exists a valid transfer with multiple input candicates // we count the unique duplicates each cycle. - + reg [NUM_INPUTS-1:0] per_cycle_collision, per_cycle_collision_r; wire [`CLOG2(NUM_INPUTS+1)-1:0] collision_count; reg [PERF_CTR_BITS-1:0] collisions_r; @@ -182,14 +187,14 @@ module VX_stream_xbar #( for (integer i = 0; i < NUM_INPUTS; ++i) begin for (integer j = 1; j < (NUM_INPUTS-i); ++j) begin per_cycle_collision[i] |= valid_in[i] - && valid_in[j+i] + && valid_in[j+i] && (sel_in[i] == sel_in[j+i]) && (ready_in[i] | ready_in[j+i]); end end end - - `BUFFER(per_cycle_collision_r, per_cycle_collision); + + `BUFFER(per_cycle_collision_r, per_cycle_collision); `POP_COUNT(collision_count, per_cycle_collision_r); always @(posedge clk) begin diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 4a6562dcb..f59ebae5b 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,10 +17,10 @@ module VX_local_mem import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "", // Size of cache in bytes - parameter SIZE = (1024*16*8), - + parameter SIZE = (1024*16*8), + // Number of Word requests per cycle - parameter NUM_REQS = 4, + parameter NUM_REQS = 4, // Number of banks parameter NUM_BANKS = 4, @@ -33,8 +33,11 @@ module VX_local_mem import VX_gpu_pkg::*; #( parameter UUID_WIDTH = 0, // Request tag size - parameter TAG_WIDTH = 16 - ) ( + parameter TAG_WIDTH = 16, + + // Response buffer + parameter OUT_BUF = 0 + ) ( input wire clk, input wire reset, @@ -59,7 +62,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( localparam REQ_DATAW = 1 + BANK_ADDR_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH; localparam RSP_DATAW = WORD_WIDTH + TAG_WIDTH; - `STATIC_ASSERT(ADDR_WIDTH == (BANK_ADDR_WIDTH + `CLOG2(NUM_BANKS)), ("invalid parameter")) + `STATIC_ASSERT(ADDR_WIDTH == (BANK_ADDR_WIDTH + `CLOG2(NUM_BANKS)), ("invalid parameter")) // bank selection @@ -70,7 +73,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( end end else begin assign req_bank_idx = 0; - end + end // bank addressing @@ -83,18 +86,18 @@ module VX_local_mem import VX_gpu_pkg::*; #( // bank requests dispatch wire [NUM_BANKS-1:0] per_bank_req_valid; - wire [NUM_BANKS-1:0] per_bank_req_rw; + wire [NUM_BANKS-1:0] per_bank_req_rw; wire [NUM_BANKS-1:0][BANK_ADDR_WIDTH-1:0] per_bank_req_addr; wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_req_byteen; wire [NUM_BANKS-1:0][WORD_WIDTH-1:0] per_bank_req_data; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_req_tag; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_req_idx; wire [NUM_BANKS-1:0] per_bank_req_ready; - + wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_all; wire [NUM_REQS-1:0] req_valid_in; - wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in; + wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in; wire [NUM_REQS-1:0] req_ready_in; `ifdef PERF_ENABLE @@ -104,13 +107,13 @@ module VX_local_mem import VX_gpu_pkg::*; #( for (genvar i = 0; i < NUM_REQS; ++i) begin assign req_valid_in[i] = mem_bus_if[i].req_valid; assign req_data_in[i] = { - mem_bus_if[i].req_data.rw, + mem_bus_if[i].req_data.rw, req_bank_addr[i], mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, mem_bus_if[i].req_data.tag}; assign mem_bus_if[i].req_ready = req_ready_in[i]; - end + end VX_stream_xbar #( .NUM_INPUTS (NUM_REQS), @@ -138,10 +141,10 @@ module VX_local_mem import VX_gpu_pkg::*; #( for (genvar i = 0; i < NUM_BANKS; ++i) begin assign { - per_bank_req_rw[i], + per_bank_req_rw[i], per_bank_req_addr[i], - per_bank_req_byteen[i], - per_bank_req_data[i], + per_bank_req_byteen[i], + per_bank_req_data[i], per_bank_req_tag[i]} = per_bank_req_data_all[i]; end @@ -149,13 +152,13 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0] per_bank_rsp_valid; wire [NUM_BANKS-1:0][WORD_WIDTH-1:0] per_bank_rsp_data; - wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_rsp_idx; - wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag; + wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_rsp_idx; + wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag; wire [NUM_BANKS-1:0] per_bank_rsp_ready; `RESET_RELAY (bank_reset, reset); - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin VX_sp_ram #( .DATAW (WORD_WIDTH), .SIZE (WORDS_PER_BANK), @@ -165,7 +168,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .read (1'b1), .write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]), .wren (per_bank_req_byteen[i]), - .addr (per_bank_req_addr[i]), + .addr (per_bank_req_addr[i]), .wdata (per_bank_req_data[i]), .rdata (per_bank_rsp_data[i]) ); @@ -193,7 +196,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( // bank responses gather wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_all; - + for (genvar i = 0; i < NUM_BANKS; ++i) begin assign per_bank_rsp_data_all[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]}; end @@ -206,7 +209,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .NUM_INPUTS (NUM_BANKS), .NUM_OUTPUTS (NUM_REQS), .DATAW (RSP_DATAW), - .OUT_BUF (2) + .OUT_BUF (OUT_BUF) ) rsp_xbar ( .clk (clk), .reset (reset), @@ -302,38 +305,38 @@ module VX_local_mem import VX_gpu_pkg::*; #( assign per_bank_rsp_uuid[i] = 0; end end - + for (genvar i = 0; i < NUM_REQS; ++i) begin always @(posedge clk) begin if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin if (mem_bus_if[i].req_data.rw) begin - `TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", + `TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i])); end else begin - `TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", + `TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, req_uuid[i])); end end if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin - `TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%0h (#%0d)\n", + `TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i])); end end end - + for (genvar i = 0; i < NUM_BANKS; ++i) begin always @(posedge clk) begin if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin if (per_bank_req_rw[i]) begin - `TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", + `TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i])); end else begin - `TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", + `TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_uuid[i])); end end if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin - `TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n", + `TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i])); end end diff --git a/hw/rtl/mem/VX_mem_switch.sv b/hw/rtl/mem/VX_mem_switch.sv index 7ae91ca8e..fd26c2aa8 100644 --- a/hw/rtl/mem/VX_mem_switch.sv +++ b/hw/rtl/mem/VX_mem_switch.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,20 +17,19 @@ module VX_mem_switch import VX_gpu_pkg::*; #( parameter NUM_REQS = 1, parameter DATA_SIZE = 1, parameter TAG_WIDTH = 1, - parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, + parameter ADDR_WIDTH = 1, parameter REQ_OUT_BUF = 0, parameter RSP_OUT_BUF = 0, - parameter `STRING ARBITER = "R", + parameter `STRING ARBITER = "R", parameter LOG_NUM_REQS = `CLOG2(NUM_REQS) ) ( input wire clk, input wire reset, - + input wire [`UP(LOG_NUM_REQS)-1:0] bus_sel, VX_mem_bus_if.slave bus_in_if, VX_mem_bus_if.master bus_out_if [NUM_REQS] -); - localparam ADDR_WIDTH = (MEM_ADDR_WIDTH-`CLOG2(DATA_SIZE)); +); localparam DATA_WIDTH = (8 * DATA_SIZE); localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `ADDR_TYPE_WIDTH + 1 + DATA_SIZE + DATA_WIDTH; localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH; @@ -40,7 +39,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0] req_valid_out; wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_out; wire [NUM_REQS-1:0] req_ready_out; - + VX_stream_switch #( .NUM_OUTPUTS (NUM_REQS), .DATAW (REQ_DATAW), @@ -49,7 +48,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #( .clk (clk), .reset (reset), .sel_in (bus_sel), - .valid_in (bus_in_if.req_valid), + .valid_in (bus_in_if.req_valid), .data_in (bus_in_if.req_data), .ready_in (bus_in_if.req_ready), .valid_out (req_valid_out), @@ -68,7 +67,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0] rsp_valid_in; wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_in; wire [NUM_REQS-1:0] rsp_ready_in; - + for (genvar i = 0; i < NUM_REQS; ++i) begin assign rsp_valid_in[i] = bus_out_if[i].rsp_valid; assign rsp_data_in[i] = bus_out_if[i].rsp_data; @@ -77,15 +76,15 @@ module VX_mem_switch import VX_gpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (NUM_REQS), - .DATAW (RSP_DATAW), + .DATAW (RSP_DATAW), .ARBITER (ARBITER), .OUT_BUF (RSP_OUT_BUF) ) rsp_arb ( .clk (clk), .reset (reset), - .valid_in (rsp_valid_in), - .data_in (rsp_data_in), - .ready_in (rsp_ready_in), + .valid_in (rsp_valid_in), + .data_in (rsp_data_in), + .ready_in (rsp_ready_in), .valid_out (bus_in_if.rsp_valid), .data_out (bus_in_if.rsp_data), .ready_out (bus_in_if.rsp_ready), diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index 4a54b0cdf..235c79c8d 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -12,7 +12,6 @@ SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/opae RTL_DIR := $(VORTEX_HOME)/hw/rtl DPI_DIR := $(VORTEX_HOME)/hw/dpi AFU_DIR := $(RTL_DIR)/afu/opae -THIRD_PARTY_DIR := $(VORTEX_HOME)/third_party SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY) @@ -76,19 +75,19 @@ endif # Debugigng ifdef DEBUG - ifeq ($(TARGET), fpga) - CFLAGS += -DNDEBUG -DSCOPE $(DBG_SCOPE_FLAGS) - SCOPE_JSON += $(BUILD_DIR)/scope.json + ifneq ($(TARGET), fpga) + CFLAGS += -DNDEBUG else CFLAGS += $(DBG_TRACE_FLAGS) endif -else +else CFLAGS += -DNDEBUG endif # Enable scope analyzer ifdef SCOPE - CFLAGS += -DSCOPE + CFLAGS += -DSCOPE $(DBG_SCOPE_FLAGS) + SCOPE_JSON += $(BUILD_DIR)/scope.json endif # Enable perf counters @@ -128,7 +127,7 @@ ifeq ($(TARGET), asesim) afu_sim_setup -s $(BUILD_DIR)/setup.cfg $(BUILD_DIR)/synth else afu_synth_setup -s $(BUILD_DIR)/setup.cfg $(BUILD_DIR)/synth -endif +endif build: ip-gen setup $(SCOPE_JSON) ifeq ($(TARGET), asesim) @@ -145,5 +144,5 @@ scope-json: $(BUILD_DIR)/scope.json $(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml $(SCRIPT_DIR)/scope.py $(BUILD_DIR)/vortex.xml -o $(BUILD_DIR)/scope.json -clean: +clean: rm -rf vortex_afu.h $(BUILD_DIR) diff --git a/hw/syn/altera/opae/run_ase.sh b/hw/syn/altera/opae/run_ase.sh index 16c92f459..04fd27540 100755 --- a/hw/syn/altera/opae/run_ase.sh +++ b/hw/syn/altera/opae/run_ase.sh @@ -1,12 +1,12 @@ #!/bin/bash # Copyright © 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,19 +20,19 @@ BUILD_DIR=$(realpath $1) PROGRAM=$(basename "$2") PROGRAM_DIR=`dirname $2` -POCL_RT_PATH=$TOOLDIR/pocl/runtime +POCL_PATH=$TOOLDIR/pocl VORTEX_RT_PATH=$SCRIPT_DIR/../../../../runtime # Export ASE_WORKDIR variable export ASE_WORKDIR=$BUILD_DIR/synth/work # cleanup incomplete runs -rm -f $ASE_WORKDIR/.app_lock.pid +rm -f $ASE_WORKDIR/.app_lock.pid rm -f $ASE_WORKDIR/.ase_ready.pid rm -f $BUILD_DIR/synth/nohup.out # Start Simulator in background (capture processs group pid) -pushd $BUILD_DIR/synth +pushd $BUILD_DIR/synth echo " [DBG] starting ASE simnulator (stdout saved to '$BUILD_DIR/synth/nohup.out')" setsid make sim &> /dev/null & SIM_PID=$! @@ -49,7 +49,7 @@ done pushd $PROGRAM_DIR shift 2 echo " [DBG] running ./$PROGRAM $*" -ASE_LOG=0 LD_LIBRARY_PATH=$POCL_RT_PATH/lib:$VORTEX_RT_PATH/opae:$LD_LIBRARY_PATH ./$PROGRAM $* +ASE_LOG=0 LD_LIBRARY_PATH=$POCL_PATH/lib:$VORTEX_RT_PATH/opae:$LD_LIBRARY_PATH ./$PROGRAM $* popd # stop the simulator (kill process group) diff --git a/hw/syn/altera/quartus/Makefile b/hw/syn/altera/quartus/Makefile index d79109a90..d0a2999bd 100644 --- a/hw/syn/altera/quartus/Makefile +++ b/hw/syn/altera/quartus/Makefile @@ -9,7 +9,7 @@ SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY) -.PHONY: dogfood unittest pipeline lmem cache fpu core vortex top test +.PHONY: dogfood unittest pipeline lmem cache fpu core issue vortex top test ip-gen: $(IP_CACHE_DIR)/ip_gen.log $(IP_CACHE_DIR)/ip_gen.log: @@ -50,6 +50,11 @@ core: cp core/Makefile core/$(BUILD_DIR) $(MAKE) -C core/$(BUILD_DIR) clean && $(MAKE) -C core/$(BUILD_DIR) > core/$(BUILD_DIR)/build.log 2>&1 & +issue: + mkdir -p issue/$(BUILD_DIR) + cp issue/Makefile issue/$(BUILD_DIR) + $(MAKE) -C issue/$(BUILD_DIR) clean && $(MAKE) -C issue/$(BUILD_DIR) > issue/$(BUILD_DIR)/build.log 2>&1 & + vortex: ip-gen mkdir -p vortex/$(BUILD_DIR) cp vortex/Makefile vortex/$(BUILD_DIR) diff --git a/hw/syn/altera/quartus/common.mk b/hw/syn/altera/quartus/common.mk index 16105b7e2..3890dcfe8 100644 --- a/hw/syn/altera/quartus/common.mk +++ b/hw/syn/altera/quartus/common.mk @@ -5,7 +5,6 @@ SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/quartus RTL_DIR := $(VORTEX_HOME)/hw/rtl AFU_DIR := $(RTL_DIR)/afu/opae -THIRD_PARTY_DIR := $(VORTEX_HOME)/third_party SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY) @@ -81,7 +80,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): gen-sources quartus_sh -t $(SRC_DIR)/project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc $(SRC_DIR)/project.sdc -inc "src" - + syn.chg: $(STAMP) syn.chg @@ -99,6 +98,6 @@ pow.chg: program: $(PROJECT).sof quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof" - + clean: rm -rf src bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox diff --git a/hw/syn/altera/quartus/issue/Makefile b/hw/syn/altera/quartus/issue/Makefile new file mode 100644 index 000000000..c1804a398 --- /dev/null +++ b/hw/syn/altera/quartus/issue/Makefile @@ -0,0 +1,14 @@ +PROJECT = VX_issue_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +#CONFIGS += -DNUM_WARPS=32 +#CONFIGS += -DNUM_THREADS=32 + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src +endif +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem $(FPU_INCLUDE) -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/altera/quartus/project.sdc b/hw/syn/altera/quartus/project.sdc index 6ea508531..f6373a643 100644 --- a/hw/syn/altera/quartus/project.sdc +++ b/hw/syn/altera/quartus/project.sdc @@ -1 +1 @@ -create_clock -name {clk} -period "200 MHz" -waveform { 0.000 1.0 } [get_ports {clk}] \ No newline at end of file +create_clock -name {clk} -period "220 MHz" -waveform { 0.000 1.0 } [get_ports {clk}] \ No newline at end of file diff --git a/hw/syn/altera/quartus/top/Makefile b/hw/syn/altera/quartus/top/Makefile index a47389d70..341690206 100644 --- a/hw/syn/altera/quartus/top/Makefile +++ b/hw/syn/altera/quartus/top/Makefile @@ -4,7 +4,21 @@ SRC_FILE = $(PROJECT).sv include ../../common.mk +# AFU parameters CONFIGS += -DNOPAE +CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY +ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS))) + CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 +endif +ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 +endif +ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 +endif +ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 +endif #CONFIGS += -DNUM_CORES=2 #CONFIGS += -DNUM_WARPS=32 diff --git a/hw/syn/xilinx/README b/hw/syn/xilinx/README index f436cf869..b2218e65e 100644 --- a/hw/syn/xilinx/README +++ b/hw/syn/xilinx/README @@ -38,14 +38,10 @@ make chipscope TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 vitis_analyzer build_xilinx_u50_gen3x16_xdma_5_202210_1_hw_4c/bin/vortex_afu.xclbin.link_summary # running test -TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 ./ci/blackbox.sh --driver=xrt --app=demo -TARGET=hw_emu PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 ./ci/blackbox.sh --driver=xrt --app=demo -TARGET=hw PLATFORM=xilinx_vck5000_gen3x16_xdma_1_202120_1 ./ci/blackbox.sh --driver=xrt --app=demo -TARGET=hw_emu PLATFORM=xilinx_vck5000_gen3x16_xdma_1_202120_1 ./ci/blackbox.sh --driver=xrt --app=demo -FPGA_BIN_DIR= TARGET=hw_emu PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 ./ci/blackbox.sh --driver=xrt --app=demo -FPGA_BIN_DIR= TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 ./ci/blackbox.sh --driver=xrt --app=demo -FPGA_BIN_DIR= TARGET=hw_emu PLATFORM=xilinx_u280_gen3x16_xdma_1_202211_1 ./ci/blackbox.sh --driver=xrt --app=demo -FPGA_BIN_DIR= XRT_DEVICE_INDEX=1 TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202211_1 ./ci/blackbox.sh --driver=xrt --app=demo +FPGA_BIN_DIR= TARGET=hw_emu ./ci/blackbox.sh --driver=xrt --app=demo +FPGA_BIN_DIR= TARGET=hw ./ci/blackbox.sh --driver=xrt --app=demo +FPGA_BIN_DIR= TARGET=hw_emu ./ci/blackbox.sh --driver=xrt --app=demo +FPGA_BIN_DIR= XRT_DEVICE_INDEX=1 TARGET=hw ./ci/blackbox.sh --driver=xrt --app=demo # build report logs /bin/vortex_afu.xclbin.info diff --git a/hw/syn/xilinx/test/Makefile b/hw/syn/xilinx/test/Makefile index b2d49c161..e15789516 100644 --- a/hw/syn/xilinx/test/Makefile +++ b/hw/syn/xilinx/test/Makefile @@ -8,7 +8,6 @@ SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/test RTL_DIR := $(VORTEX_HOME)/hw/rtl DPI_DIR := $(VORTEX_HOME)/hw/dpi AFU_DIR := $(RTL_DIR)/afu/opae -THIRD_PARTY_DIR := $(VORTEX_HOME)/third_party SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts # include paths diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index b5f00f1cc..f8f0f5cb0 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -24,7 +24,6 @@ SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/xrt RTL_DIR := $(VORTEX_HOME)/hw/rtl DPI_DIR := $(VORTEX_HOME)/hw/dpi AFU_DIR := $(RTL_DIR)/afu/xrt -THIRD_PARTY_DIR := $(VORTEX_HOME)/third_party SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts VIVADO := $(XILINX_VIVADO)/bin/vivado @@ -34,7 +33,7 @@ CP = cp -rf RMDIR = rm -rf ECHO = @echo -NCPUS := $(shell grep -c ^processor /proc/cpuinfo) +NCPUS := $(shell lscpu | grep "^Core(s) per socket:" | awk '{print $$4}') JOBS ?= $(shell echo $$(( $(NCPUS) > $(MAX_JOBS) ? $(MAX_JOBS) : $(NCPUS) ))) PLATFORM_TO_XSA = $(strip $(patsubst %.xpfm, % , $(shell basename $(PLATFORM)))) @@ -115,11 +114,8 @@ endif # Debugigng ifdef DEBUG VPP_FLAGS += -g --debug.protocol all - ifeq ($(TARGET), hw) - CFLAGS += -DNDEBUG -DSCOPE $(DBG_SCOPE_FLAGS) - SCOPE_JSON += $(BUILD_DIR)/scope.json - #CFLAGS += -DNDEBUG -DCHIPSCOPE $(DBG_SCOPE_FLAGS) - #VPP_FLAGS += --debug.chipscope vortex_afu_1 + ifneq ($(TARGET), hw) + CFLAGS += -DNDEBUG else VPP_FLAGS += --vivado.prop fileset.sim_1.xsim.elaborate.debug_level=all CFLAGS += $(DBG_TRACE_FLAGS) @@ -129,6 +125,12 @@ else CFLAGS += -DNDEBUG endif +# Enable scope analyzer +ifdef SCOPE + CFLAGS += -DSCOPE $(DBG_SCOPE_FLAGS) + SCOPE_JSON += $(BUILD_DIR)/scope.json +endif + # compilation flags CFLAGS += -DSYNTHESIS -DVIVADO CFLAGS += -DXLEN_$(XLEN) diff --git a/hw/syn/yosys/Makefile b/hw/syn/yosys/Makefile index 2b89d2421..a0c4fdcc9 100644 --- a/hw/syn/yosys/Makefile +++ b/hw/syn/yosys/Makefile @@ -9,7 +9,6 @@ NUM_CORES ?= 1 SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts RTL_DIR := $(VORTEX_HOME)/hw/rtl -THIRD_PARTY_DIR := $(VORTEX_HOME)/third_party CP = cp -rf RMDIR = rm -rf @@ -52,15 +51,15 @@ RTL_INCLUDE += $(FPU_INCLUDE) # Debugigng ifdef DEBUG - CFLAGS += -DNDEBUG -DSCOPE $(DBG_SCOPE_FLAGS) - SCOPE_JSON += $(BUILD_DIR)/scope.json -else + CFLAGS += $(DBG_TRACE_FLAGS) +else CFLAGS += -DNDEBUG endif # Enable scope analyzer ifdef SCOPE - CFLAGS += -DSCOPE + CFLAGS += -DSCOPE $(DBG_SCOPE_FLAGS) + SCOPE_JSON += $(BUILD_DIR)/scope.json endif # Enable perf counters @@ -90,5 +89,8 @@ build: $(BUILD_DIR)/project.v elaborate: $(BUILD_DIR)/project.v cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="elaborate" +synthesis: $(BUILD_DIR)/project.v + cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="synthesis" + clean: $(RMDIR) $(BUILD_DIR) diff --git a/hw/syn/yosys/synth.sh b/hw/syn/yosys/synth.sh index 79f68aafc..79708b189 100755 --- a/hw/syn/yosys/synth.sh +++ b/hw/syn/yosys/synth.sh @@ -1,12 +1,12 @@ #!/bin/bash # Copyright © 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -87,18 +87,18 @@ while getopts "s:t:I:D:P:Wh" arg; do W) # allow warnings no_warnings=0 ;; - h | *) + h | *) usage exit 0 ;; esac done -{ +{ # read design sources - for dir in "${dir_list[@]}" + for dir in "${dir_list[@]}" do - for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f) + for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f) do echo "read_verilog -defer -nolatches $macro_args $inc_args -sv $file" done @@ -111,11 +111,16 @@ done if echo "$process" | grep -q "elaborate"; then echo "hierarchy -top $top_level" fi - + + # synthesize design + if echo "$process" | grep -q "synthesis"; then + echo "synth -top $top_level" + fi + # convert to netlist if echo "$process" | grep -q "netlist"; then echo "proc; opt" - fi + fi # convert to gate logic if echo "$process" | grep -q "techmap"; then @@ -126,8 +131,11 @@ done if echo "$process" | grep -q "verilog"; then echo "write_verilog synth.v" fi + + # Generate a summary report + echo "stat" } > synth.ys -yosys -l yosys.log synth.ys +yosys -l yosys.log -s synth.ys checkErrors yosys.log diff --git a/hw/unittest/Makefile b/hw/unittest/Makefile index 5a8ac941e..5722ec9bc 100644 --- a/hw/unittest/Makefile +++ b/hw/unittest/Makefile @@ -4,6 +4,7 @@ all: $(MAKE) -C mem_streamer $(MAKE) -C cache_top $(MAKE) -C core_top + $(MAKE) -C issue_top run: $(MAKE) -C cache run @@ -11,10 +12,12 @@ run: $(MAKE) -C mem_streamer run $(MAKE) -C cache_top run $(MAKE) -C core_top run + $(MAKE) -C issue_top run clean: $(MAKE) -C cache clean $(MAKE) -C generic_queue clean $(MAKE) -C mem_streamer clean $(MAKE) -C cache_top clean - $(MAKE) -C core_top clean \ No newline at end of file + $(MAKE) -C core_top clean + $(MAKE) -C issue_top clean \ No newline at end of file diff --git a/hw/unittest/cache/cachesim.cpp b/hw/unittest/cache/cachesim.cpp index 2c35f5e05..acd68419b 100644 --- a/hw/unittest/cache/cachesim.cpp +++ b/hw/unittest/cache/cachesim.cpp @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -31,12 +31,12 @@ static bool trace_enabled = false; static uint64_t trace_start_time = TRACE_START_TIME; static uint64_t trace_stop_time = TRACE_STOP_TIME; -double sc_time_stamp() { +double sc_time_stamp() { return timestamp; } bool sim_trace_enabled() { - if (timestamp >= trace_start_time + if (timestamp >= trace_start_time && timestamp < trace_stop_time) return true; return trace_enabled; @@ -47,26 +47,27 @@ void sim_trace_enable(bool enable) { } CacheSim::CacheSim() { - // force random values for uninitialized signals + // force random values for uninitialized signals Verilated::randReset(2); - ram_ = nullptr; + // create RTL module instance cache_ = new VVX_cache_top(); - mem_rsp_active_ = false; - snp_req_active_ = false; - #ifdef VCD_OUTPUT Verilated::traceEverOn(true); - trace_ = new VerilatedVcdC; - cache_->trace(trace_, 99); - trace_->open("trace.vcd"); + tfp_ = new VerilatedVcdC; + cache_->trace(tfp_, 99); + tfp_->open("trace.vcd"); #endif + + ram_ = nullptr; + mem_rsp_active_ = false; + snp_req_active_ = false; } CacheSim::~CacheSim() { #ifdef VCD_OUTPUT - trace_->close(); + tfp_->close(); #endif delete cache_; //need to delete the req and rsp vectors @@ -89,7 +90,7 @@ void CacheSim::reset() { mem_rsp_vec_.clear(); //clear req and rsp vecs - + } void CacheSim::step() { @@ -111,43 +112,43 @@ void CacheSim::step() { void CacheSim::eval() { cache_->eval(); #ifdef VCD_OUTPUT - trace_->dump(timestamp); + tfp_->dump(timestamp); #endif ++timestamp; } void CacheSim::run(){ //#ifndef NDEBUG - + //#endif this->step(); - int valid = 300; - int stalls = 20 + 10; - + int valid = 300; + int stalls = 20 + 10; + while (valid > -1) { this->step(); - display_miss(); + display_miss(); if(cache_->core_rsp_valid){ get_core_rsp(); } - + if(!cache_->core_req_valid && !cache_->core_rsp_valid){ - valid--; - + valid--; + } - stalls--; + stalls--; if (stalls == 20){ - //stall_mem(); - //send_snoop_req(); - stalls--; + //stall_mem(); + //send_snoop_req(); + stalls--; } } } void CacheSim::clear_req(){ - cache_->core_req_valid = 0; + cache_->core_req_valid = 0; } void CacheSim::send_req(core_req_t *req){ @@ -157,11 +158,11 @@ void CacheSim::send_req(core_req_t *req){ } bool CacheSim::get_core_req_ready(){ - return cache_->core_req_ready; + return cache_->core_req_ready; } bool CacheSim::get_core_rsp_ready(){ - return cache_->core_rsp_ready; + return cache_->core_rsp_ready; } void CacheSim::eval_reqs(){ @@ -170,7 +171,7 @@ void CacheSim::eval_reqs(){ core_req_t *req = core_req_vec_.front(); cache_->core_req_valid = req->valid; - cache_->core_req_rw = req->rw; + cache_->core_req_rw = req->rw; cache_->core_req_byteen = req->byteen; cache_->core_req_addr[0] = req->addr[0]; @@ -183,10 +184,10 @@ void CacheSim::eval_reqs(){ cache_->core_req_data[2] = req->data[2]; cache_->core_req_data[3] = req->data[3]; - cache_->core_req_tag = req->tag; + cache_->core_req_tag = req->tag; core_req_vec_.pop(); - + } else { clear_req(); } @@ -209,7 +210,7 @@ void CacheSim::stall_mem(){ void CacheSim::send_snoop_req(){ /*cache_->snp_req_valid = 1; cache_->snp_req_addr = 0x12222222; - cache_->snp_req_invalidate = 1; + cache_->snp_req_invalidate = 1; cache_->snp_req_tag = 0xff; */ } @@ -225,15 +226,15 @@ void CacheSim::eval_mem_bus() { if (mem_rsp_vec_[i].cycles_left > 0) { mem_rsp_vec_[i].cycles_left -= 1; } - if ((dequeue_index == -1) + if ((dequeue_index == -1) && (mem_rsp_vec_[i].cycles_left == 0)) { dequeue_index = i; } } - // send memory response + // send memory response if (mem_rsp_active_ - && cache_->mem_rsp_valid + && cache_->mem_rsp_valid && cache_->mem_rsp_ready) { mem_rsp_active_ = false; } @@ -244,7 +245,7 @@ void CacheSim::eval_mem_bus() { //copy data from the rsp queue to the cache module memcpy(cache_->mem_rsp_data.data(), mem_rsp_vec_[dequeue_index].data, MEM_BLOCK_SIZE); - cache_->mem_rsp_tag = mem_rsp_vec_[dequeue_index].tag; + cache_->mem_rsp_tag = mem_rsp_vec_[dequeue_index].tag; free(mem_rsp_vec_[dequeue_index].data); //take data out of the queue mem_rsp_vec_.erase(mem_rsp_vec_.begin() + dequeue_index); mem_rsp_active_ = true; @@ -256,7 +257,7 @@ void CacheSim::eval_mem_bus() { // handle memory stalls bool mem_stalled = false; #ifdef ENABLE_MEM_STALLS - if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) { + if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) { mem_stalled = true; } else if (mem_rsp_vec_.size() >= MEM_RQ_SIZE) { @@ -272,19 +273,19 @@ void CacheSim::eval_mem_bus() { uint64_t base_addr = (cache_->mem_req_addr * MEM_BLOCK_SIZE); uint8_t* data = reinterpret_cast(cache_->mem_req_data.data()); for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { + if ((byteen >> i) & 0x1) { (*ram_)[base_addr + i] = data[i]; } } } else { mem_req_t mem_req; - mem_req.cycles_left = MEM_LATENCY; + mem_req.cycles_left = MEM_LATENCY; mem_req.data = (uint8_t*)malloc(MEM_BLOCK_SIZE); mem_req.tag = cache_->mem_req_tag; ram_->read(cache_->mem_req_addr * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.data); mem_rsp_vec_.push_back(mem_req); - } - } + } + } } cache_->mem_req_ready = ~mem_stalled; @@ -301,15 +302,15 @@ bool CacheSim::assert_equal(unsigned int* data, unsigned int tag){ } } - return check; + return check; } //DEBUG void CacheSim::display_miss(){ - //int i = (unsigned int)cache_->miss_vec; - //std::bitset<8> x(i); + //int i = (unsigned int)cache_->miss_vec; + //std::bitset<8> x(i); //if (i) std::cout << "Miss Vec " << x << std::endl; //std::cout << "Miss Vec 0" << cache_->miss_vec[0] << std::endl; } @@ -322,18 +323,18 @@ void CacheSim::get_core_req(unsigned int (&rsp)[4]){ //std::cout << std::hex << "core_rsp_valid: " << cache_->core_rsp_valid << std::endl; //std::cout << std::hex << "core_rsp_data: " << cache_->core_rsp_data << std::endl; - //std::cout << std::hex << "core_rsp_tag: " << cache_->core_rsp_tag << std::endl; + //std::cout << std::hex << "core_rsp_tag: " << cache_->core_rsp_tag << std::endl; } void CacheSim::get_core_rsp(){ - //std::cout << cache_->genblk5_BRA_0_KET_->bank->is_fill_in_pipe<< std::endl; + //std::cout << cache_->genblk5_BRA_0_KET_->bank->is_fill_in_pipe<< std::endl; char check = cache_->core_rsp_valid; std::cout << std::hex << "core_rsp_valid: " << (unsigned int) check << std::endl; std::cout << std::hex << "core_rsp_data[0]: " << cache_->core_rsp_data[0] << std::endl; std::cout << std::hex << "core_rsp_data[1]: " << cache_->core_rsp_data[1] << std::endl; std::cout << std::hex << "core_rsp_data[2]: " << cache_->core_rsp_data[2] << std::endl; std::cout << std::hex << "core_rsp_data[3]: " << cache_->core_rsp_data[3] << std::endl; - std::cout << std::hex << "core_rsp_tag: " << cache_->core_rsp_tag << std::endl; + std::cout << std::hex << "core_rsp_tag: " << cache_->core_rsp_tag << std::endl; } void CacheSim::get_mem_req(){ @@ -341,13 +342,13 @@ void CacheSim::get_mem_req(){ std::cout << std::hex << "mem_req_rw: " << cache_->mem_req_rw << std::endl; std::cout << std::hex << "mem_req_byteen: " << cache_->mem_req_byteen << std::endl; std::cout << std::hex << "mem_req_addr: " << cache_->mem_req_addr << std::endl; - std::cout << std::hex << "mem_req_data: " << cache_->mem_req_data << std::endl; + std::cout << std::hex << "mem_req_data: " << cache_->mem_req_data << std::endl; std::cout << std::hex << "mem_req_tag: " << cache_->mem_req_tag << std::endl; } void CacheSim::get_mem_rsp(){ std::cout << std::hex << "mem_rsp_valid: " << cache_->mem_rsp_valid << std::endl; - std::cout << std::hex << "mem_rsp_data: " << cache_->mem_rsp_data << std::endl; + std::cout << std::hex << "mem_rsp_data: " << cache_->mem_rsp_data << std::endl; std::cout << std::hex << "mem_rsp_tag: " << cache_->mem_rsp_tag << std::endl; std::cout << std::hex << "mem_rsp_ready: " << cache_->mem_rsp_ready << std::endl; } diff --git a/hw/unittest/cache/cachesim.h b/hw/unittest/cache/cachesim.h index a38ed774b..5235735d6 100644 --- a/hw/unittest/cache/cachesim.h +++ b/hw/unittest/cache/cachesim.h @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -31,7 +31,6 @@ #define MEM_LATENCY 100 #define MEM_RQ_SIZE 16 #define MEM_STALLS_MODULO 16 -#define MEM_BLOCK_SIZE 16 typedef struct { int cycles_left; @@ -41,7 +40,7 @@ typedef struct { typedef struct { char valid; - char rw; + char rw; unsigned byteen; unsigned *addr; unsigned *data; @@ -50,24 +49,24 @@ typedef struct { class CacheSim { public: - + CacheSim(); virtual ~CacheSim(); - - bool busy(); + + bool busy(); void reset(); void step(); - void wait(uint32_t cycles); + void wait(uint32_t cycles); void attach_ram(RAM* ram); void run(); //run until all reqs are empty - + //req/rsp void send_req(core_req_t *req); - void clear_req(); + void clear_req(); void stall_mem(); void send_snoop_req(); - void send_snp_fwd_in(); + void send_snp_fwd_in(); //assert funcs bool assert_equal(unsigned int* data, unsigned int tag); @@ -81,14 +80,14 @@ public: void get_mem_rsp(); void display_miss(); -private: +private: - void eval(); - void eval_reqs(); + void eval(); + void eval_reqs(); void eval_rsps(); void eval_mem_bus(); - - std::queue core_req_vec_; + + std::queue core_req_vec_; std::vector mem_rsp_vec_; std::map core_rsp_vec_; int mem_rsp_active_; @@ -97,9 +96,9 @@ private: uint32_t snp_req_size_; uint32_t pending_snp_reqs_; - VVX_cache_top *cache_; - RAM *ram_; + VVX_cache_top* cache_; + RAM* ram_; #ifdef VCD_OUTPUT - VerilatedVcdC *trace_; + VerilatedVcdC* tfp_; #endif }; diff --git a/hw/unittest/common.mk b/hw/unittest/common.mk index a9ca50660..ac3e6b4ff 100644 --- a/hw/unittest/common.mk +++ b/hw/unittest/common.mk @@ -3,19 +3,19 @@ DESTDIR ?= . CONFIGS += PARAMS += -CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds +CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors -Wno-array-bounds CXXFLAGS += -fPIC -Wno-maybe-uninitialized CXXFLAGS += $(CONFIGS) LDFLAGS += RTL_PKGS += -RTL_INCLUDE += +RTL_INCLUDE += DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) VL_FLAGS = --exe VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic -VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO +VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED VL_FLAGS += --x-initial unique --x-assign unique VL_FLAGS += -DSIMULATION -DSV_DPI VL_FLAGS += $(CONFIGS) @@ -33,7 +33,7 @@ VL_FLAGS += -j $(THREADS) ifdef DEBUG VL_FLAGS += --trace --trace-structs $(DBG_FLAGS) CXXFLAGS += -g -O0 $(DBG_FLAGS) -else +else VL_FLAGS += -DNDEBUG CXXFLAGS += -O2 -DNDEBUG endif @@ -45,7 +45,7 @@ ifdef PERF endif all: $(DESTDIR)/$(PROJECT) - + $(DESTDIR)/$(PROJECT): $(SRCS) verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$@ diff --git a/hw/unittest/core_top/Makefile b/hw/unittest/core_top/Makefile index b2a0cce13..d9fbf40f6 100644 --- a/hw/unittest/core_top/Makefile +++ b/hw/unittest/core_top/Makefile @@ -16,7 +16,7 @@ SRCS += $(SRC_DIR)/main.cpp DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE -RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv +RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/fpu -I$(RTL_DIR)/core diff --git a/hw/unittest/issue_top/Makefile b/hw/unittest/issue_top/Makefile new file mode 100644 index 000000000..7e298849c --- /dev/null +++ b/hw/unittest/issue_top/Makefile @@ -0,0 +1,26 @@ +ROOT_DIR := $(realpath ../../..) +include $(ROOT_DIR)/config.mk + +PROJECT := issue_top + +RTL_DIR := $(VORTEX_HOME)/hw/rtl +DPI_DIR := $(VORTEX_HOME)/hw/dpi + +SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT) + +CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common +CXXFLAGS += -I$(ROOT_DIR)/hw + +SRCS := $(DPI_DIR)/util_dpi.cpp +SRCS += $(SRC_DIR)/main.cpp + +DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE + +RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv + +RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs +RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core + +TOP := VX_issue_top + +include ../common.mk \ No newline at end of file diff --git a/hw/unittest/issue_top/main.cpp b/hw/unittest/issue_top/main.cpp new file mode 100644 index 000000000..5191b4433 --- /dev/null +++ b/hw/unittest/issue_top/main.cpp @@ -0,0 +1,49 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "vl_simulator.h" + +#ifndef TRACE_START_TIME +#define TRACE_START_TIME 0ull +#endif + +#ifndef TRACE_STOP_TIME +#define TRACE_STOP_TIME -1ull +#endif + +static uint64_t timestamp = 0; +static bool trace_enabled = false; +static uint64_t trace_start_time = TRACE_START_TIME; +static uint64_t trace_stop_time = TRACE_STOP_TIME; + +double sc_time_stamp() { + return timestamp; +} + +bool sim_trace_enabled() { + if (timestamp >= trace_start_time + && timestamp < trace_stop_time) + return true; + return trace_enabled; +} + +void sim_trace_enable(bool enable) { + trace_enabled = enable; +} + +int main(int argc, char **argv) { + // Initialize Verilators variables + Verilated::commandArgs(argc, argv); + + return 0; +} \ No newline at end of file diff --git a/hw/unittest/mem_streamer/memsim.cpp b/hw/unittest/mem_streamer/memsim.cpp index a54a512d3..329c01708 100644 --- a/hw/unittest/mem_streamer/memsim.cpp +++ b/hw/unittest/mem_streamer/memsim.cpp @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -28,12 +28,12 @@ static uint64_t trace_start_time = 0; static uint64_t trace_stop_time = -1ull; static uint64_t timestamp = 0; -double sc_time_stamp() { +double sc_time_stamp() { return timestamp; } bool sim_trace_enabled() { - if (timestamp >= trace_start_time + if (timestamp >= trace_start_time && timestamp < trace_stop_time) return true; return trace_enabled; @@ -61,22 +61,23 @@ int generate_rand_mask (int mask) { } MemSim::MemSim() { - msu_ = new VVX_mem_scheduler(); + // force random values for uninitialized signals + Verilated::randReset(2); - // Enable tracing - Verilated::traceEverOn(true); + // create RTL module instance + msu_ = new VVX_mem_scheduler(); #ifdef VCD_OUTPUT Verilated::traceEverOn(true); - trace_ = new VerilatedVcdC; - cache_->trace(trace_, 99); + tfp_ = new VerilatedVcdC; + cache_->trace(tfp_, 99); race_->open("trace.vcd"); #endif } MemSim::~MemSim() { #ifdef VCD_OUTPUT - trace_->close(); + tfp_->close(); #endif delete msu_; } @@ -84,7 +85,7 @@ MemSim::~MemSim() { void MemSim::eval() { msu_->eval(); #ifdef VCD_OUTPUT - trace_->dump(timestamp++); + tfp_->dump(timestamp++); #endif } @@ -158,7 +159,7 @@ int main (int argc, char** argv, char** env) { Verilated::commandArgs(argc, argv); MemSim memsim; - RAM ram; + RAM ram; memsim.run(&ram); diff --git a/hw/unittest/mem_streamer/memsim.h b/hw/unittest/mem_streamer/memsim.h index 482572bb2..5c08c97b7 100644 --- a/hw/unittest/mem_streamer/memsim.h +++ b/hw/unittest/mem_streamer/memsim.h @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,10 +16,8 @@ #include #include #include -#include #include #include "VVX_mem_scheduler.h" -#include "VVX_mem_scheduler__Syms.h" #include "ram.h" #define SIM_TIME 5000 @@ -37,7 +35,7 @@ public: private: VVX_mem_scheduler *msu_; #ifdef VCD_OUTPUT - VerilatedVcdC *trace_; + VerilatedVcdC* tfp_; #endif void eval(); diff --git a/kernel/Makefile b/kernel/Makefile index 201ebc200..16d279fa0 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -32,6 +32,10 @@ CFLAGS += -O3 -mcmodel=medany -fno-exceptions -fdata-sections -ffunction-section CFLAGS += -I$(INC_DIR) -I$(ROOT_DIR)/hw CFLAGS += -DXLEN_$(XLEN) +ifeq ($(VM_ENABLE), 1) +CFLAGS += -DVM_ENABLE +endif + PROJECT := libvortex SRCS = $(SRC_DIR)/vx_start.S $(SRC_DIR)/vx_syscalls.c $(SRC_DIR)/vx_print.S $(SRC_DIR)/tinyprintf.c $(SRC_DIR)/vx_print.c $(SRC_DIR)/vx_spawn.c $(SRC_DIR)/vx_serial.S $(SRC_DIR)/vx_perf.c diff --git a/miscs/docker/Dockerfile.ubuntu b/miscs/docker/Dockerfile.ubuntu index 1e8485e75..c3e72a0f4 100644 --- a/miscs/docker/Dockerfile.ubuntu +++ b/miscs/docker/Dockerfile.ubuntu @@ -17,13 +17,12 @@ FROM ubuntu:20.04 # Set non-interactive installation to avoid user input during build ARG DEBIAN_FRONTEND=noninteractive -# Update and install basic and necessary dependencies +# Update and install necessary dependencies RUN apt-get update && apt-get install -y \ + software-properties-common \ build-essential \ - binutils \ python \ python3 \ - uuid-dev \ git \ wget \ curl \ @@ -39,9 +38,11 @@ RUN git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git /v # Set the initial working directory WORKDIR /vortex +# install system dependencies +RUN ./ci/system_updates.sh + # Configure the build folder -RUN mkdir build && cd build && \ - ../configure --tooldir=$HOME/tools +RUN mkdir build && cd build && ../configure # Install prebuilt toolchain RUN cd build && ./ci/toolchain_install.sh --all diff --git a/runtime/Makefile b/runtime/Makefile index e5f8af74c..aecac00e1 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -3,6 +3,8 @@ include $(ROOT_DIR)/config.mk all: stub rtlsim simx opae xrt +vm: stub simx + stub: $(MAKE) -C stub diff --git a/runtime/include/vortex.h b/runtime/include/vortex.h index 6f57c7de8..957e5d62a 100644 --- a/runtime/include/vortex.h +++ b/runtime/include/vortex.h @@ -36,23 +36,27 @@ typedef void* vx_buffer_h; #define VX_CAPS_ISA_FLAGS 0x7 // device isa flags -#define VX_ISA_STD_A (1ull << 0) -#define VX_ISA_STD_C (1ull << 2) -#define VX_ISA_STD_D (1ull << 3) -#define VX_ISA_STD_E (1ull << 4) -#define VX_ISA_STD_F (1ull << 5) -#define VX_ISA_STD_H (1ull << 7) -#define VX_ISA_STD_I (1ull << 8) -#define VX_ISA_STD_N (1ull << 13) -#define VX_ISA_STD_Q (1ull << 16) -#define VX_ISA_STD_S (1ull << 18) -#define VX_ISA_STD_U (1ull << 20) -#define VX_ISA_ARCH(flags) (1 << (((flags >> 30) & 0x3) + 4)) -#define VX_ISA_EXT_ICACHE (1ull << 32) -#define VX_ISA_EXT_DCACHE (1ull << 33) -#define VX_ISA_EXT_L2CACHE (1ull << 34) -#define VX_ISA_EXT_L3CACHE (1ull << 35) -#define VX_ISA_EXT_LMEM (1ull << 36) +#define VX_ISA_STD_A (1ull << ISA_STD_A) +#define VX_ISA_STD_C (1ull << ISA_STD_C) +#define VX_ISA_STD_D (1ull << ISA_STD_D) +#define VX_ISA_STD_E (1ull << ISA_STD_E) +#define VX_ISA_STD_F (1ull << ISA_STD_F) +#define VX_ISA_STD_H (1ull << ISA_STD_H) +#define VX_ISA_STD_I (1ull << ISA_STD_I) +#define VX_ISA_STD_N (1ull << ISA_STD_N) +#define VX_ISA_STD_Q (1ull << ISA_STD_Q) +#define VX_ISA_STD_S (1ull << ISA_STD_S) +#define VX_ISA_STD_U (1ull << ISA_STD_U) +#define VX_ISA_ARCH(flags) (1ull << (((flags >> 30) & 0x3) + 4)) +#define VX_ISA_EXT_ICACHE (1ull << (32+ISA_EXT_ICACHE)) +#define VX_ISA_EXT_DCACHE (1ull << (32+ISA_EXT_DCACHE)) +#define VX_ISA_EXT_L2CACHE (1ull << (32+ISA_EXT_L2CACHE)) +#define VX_ISA_EXT_L3CACHE (1ull << (32+ISA_EXT_L3CACHE)) +#define VX_ISA_EXT_LMEM (1ull << (32+ISA_EXT_LMEM)) +#define VX_ISA_EXT_ZICOND (1ull << (32+ISA_EXT_ZICOND)) +#define VX_ISA_EXT_TEX (1ull << (32+ISA_EXT_TEX)) +#define VX_ISA_EXT_RASTER (1ull << (32+ISA_EXT_RASTER)) +#define VX_ISA_EXT_OM (1ull << (32+ISA_EXT_OM)) // ready wait timeout #define VX_MAX_TIMEOUT (24*60*60*1000) // 24 Hr diff --git a/runtime/opae/Makefile b/runtime/opae/Makefile index 4eb410406..3954d3f19 100644 --- a/runtime/opae/Makefile +++ b/runtime/opae/Makefile @@ -25,15 +25,9 @@ SRCS = $(SRC_DIR)/vortex.cpp $(SRC_DIR)/driver.cpp # set up target types ifeq ($(TARGET), opaesim) OPAESIM = $(DESTDIR)/libopae-c-sim.so - CXXFLAGS += -DOPAESIM -I$(SIM_DIR)/opaesim - LDFLAGS += -L$(DESTDIR) -lopae-c-sim + CXXFLAGS += -I$(SIM_DIR)/opaesim else CXXFLAGS += -I$(SYN_DIR) - ifeq ($(TARGET), asesim) - CXXFLAGS += -DASESIM - else - CXXFLAGS += -DFPGA - endif endif # Debugigng diff --git a/runtime/opae/driver.cpp b/runtime/opae/driver.cpp index a01f1e5e8..5048cf754 100644 --- a/runtime/opae/driver.cpp +++ b/runtime/opae/driver.cpp @@ -22,13 +22,7 @@ #include #include -#ifdef OPAESIM -#define DEFAULT_OPAE_DRV_PATHS "libopae-c-sim.so" -#elif ASESIM -#define DEFAULT_OPAE_DRV_PATHS "libopae-c-ase.so" -#else #define DEFAULT_OPAE_DRV_PATHS "libopae-c.so" -#endif #define SET_API(func) \ opae_drv_funcs->func = (pfn_##func)dlsym(dl_handle, #func); \ @@ -64,7 +58,7 @@ int drv_init(opae_drv_api_t* opae_drv_funcs) { if (dl_handle) break; } - + if (dl_handle == nullptr) { printf("dlopen failed: %s\n", dlerror()); return -1; diff --git a/runtime/opae/driver.h b/runtime/opae/driver.h index d2a933d75..0d1d4daa7 100644 --- a/runtime/opae/driver.h +++ b/runtime/opae/driver.h @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,12 +13,7 @@ #pragma once -#ifndef OPAESIM -#include -#include -#else #include -#endif typedef fpga_result (*pfn_fpgaGetProperties)(fpga_token token, fpga_properties *prop); typedef fpga_result (*pfn_fpgaPropertiesSetObjectType)(fpga_properties prop, fpga_objtype objtype); @@ -38,7 +33,7 @@ typedef fpga_result (*pfn_fpgaReadMMIO64)(fpga_handle handle, uint32_t mmio_num, typedef const char *(*pfn_fpgaErrStr)(fpga_result e); struct opae_drv_api_t { - pfn_fpgaGetProperties fpgaGetProperties; + pfn_fpgaGetProperties fpgaGetProperties; pfn_fpgaPropertiesSetObjectType fpgaPropertiesSetObjectType; pfn_fpgaPropertiesSetGUID fpgaPropertiesSetGUID; pfn_fpgaDestroyProperties fpgaDestroyProperties; @@ -46,14 +41,14 @@ struct opae_drv_api_t { pfn_fpgaDestroyToken fpgaDestroyToken; pfn_fpgaPropertiesGetLocalMemorySize fpgaPropertiesGetLocalMemorySize; - pfn_fpgaOpen fpgaOpen; - pfn_fpgaClose fpgaClose; - pfn_fpgaPrepareBuffer fpgaPrepareBuffer; - pfn_fpgaReleaseBuffer fpgaReleaseBuffer; + pfn_fpgaOpen fpgaOpen; + pfn_fpgaClose fpgaClose; + pfn_fpgaPrepareBuffer fpgaPrepareBuffer; + pfn_fpgaReleaseBuffer fpgaReleaseBuffer; pfn_fpgaGetIOAddress fpgaGetIOAddress; pfn_fpgaWriteMMIO64 fpgaWriteMMIO64; - pfn_fpgaReadMMIO64 fpgaReadMMIO64; - pfn_fpgaErrStr fpgaErrStr; + pfn_fpgaReadMMIO64 fpgaReadMMIO64; + pfn_fpgaErrStr fpgaErrStr; }; int drv_init(opae_drv_api_t* opae_drv_funcs); diff --git a/runtime/opae/vortex.cpp b/runtime/opae/vortex.cpp index 0237ecd30..390d5acc4 100755 --- a/runtime/opae/vortex.cpp +++ b/runtime/opae/vortex.cpp @@ -12,9 +12,11 @@ // limitations under the License. #include -#include #include "driver.h" + +#include + #ifdef SCOPE #include "scope.h" #endif diff --git a/runtime/rtlsim/vortex.cpp b/runtime/rtlsim/vortex.cpp index a920cbeca..c75a6c12f 100644 --- a/runtime/rtlsim/vortex.cpp +++ b/runtime/rtlsim/vortex.cpp @@ -119,6 +119,10 @@ public: if (dev_addr + asize > GLOBAL_MEM_SIZE) return -1; + if (flags | VX_MEM_WRITE) { + flags |= VX_MEM_READ; // ensure caches can handle fill requests + } + ram_.set_acl(dev_addr, size, flags); return 0; diff --git a/runtime/simx/Makefile b/runtime/simx/Makefile index 7c73ca66d..7615f72b2 100644 --- a/runtime/simx/Makefile +++ b/runtime/simx/Makefile @@ -10,6 +10,10 @@ CXXFLAGS += -I$(INC_DIR) -I../common -I$(ROOT_DIR)/hw -I$(SIM_DIR)/simx -I$(COMM CXXFLAGS += $(CONFIGS) CXXFLAGS += -DXLEN_$(XLEN) +ifeq ($(VM_ENABLE), 1) +CXXFLAGS += -DVM_ENABLE +endif + LDFLAGS += -shared -pthread LDFLAGS += -L$(DESTDIR) -lsimx diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index f9143cf0f..e5ec36b60 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -120,7 +120,7 @@ public: uint64_t map_p2v(uint64_t ppn, uint32_t flags) { DBGPRINT(" [RT:MAP_P2V] ppn: %lx\n", ppn); - if (addr_mapping.contains(ppn)) return addr_mapping[ppn]; + if (addr_mapping.find(ppn) != addr_mapping.end()) return addr_mapping[ppn]; // If ppn to vpn mapping doesnt exist, create mapping DBGPRINT(" [RT:MAP_P2V] Not found. Allocate new page table or update a PTE.\n"); diff --git a/runtime/stub/utils.cpp b/runtime/stub/utils.cpp index 26b119608..eea7691f5 100644 --- a/runtime/stub/utils.cpp +++ b/runtime/stub/utils.cpp @@ -182,12 +182,12 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { uint64_t sched_stalls = 0; uint64_t ibuffer_stalls = 0; uint64_t scrb_stalls = 0; + uint64_t opds_stalls = 0; uint64_t scrb_alu = 0; uint64_t scrb_fpu = 0; uint64_t scrb_lsu = 0; - uint64_t scrb_sfu = 0; - uint64_t scrb_wctl = 0; uint64_t scrb_csrs = 0; + uint64_t scrb_wctl = 0; uint64_t ifetches = 0; uint64_t loads = 0; uint64_t stores = 0; @@ -268,7 +268,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { } sched_stalls += sched_stalls_per_core; } - // ibuffer_stalls + // ibuffer stalls { uint64_t ibuffer_stalls_per_core; CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_IBUF_ST, core_id, &ibuffer_stalls_per_core), { @@ -280,7 +280,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { } ibuffer_stalls += ibuffer_stalls_per_core; } - // issue_stalls + // scoreboard stalls { uint64_t scrb_stalls_per_core; CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_ST, core_id, &scrb_stalls_per_core), { @@ -298,49 +298,46 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_LSU, core_id, &scrb_lsu_per_core), { return err; }); - uint64_t scrb_sfu_per_core; - CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_SFU, core_id, &scrb_sfu_per_core), { - return err; - }); - scrb_alu += scrb_alu_per_core; - scrb_fpu += scrb_fpu_per_core; - scrb_lsu += scrb_lsu_per_core; - scrb_sfu += scrb_sfu_per_core; - if (num_cores > 1) { - uint64_t scrb_total = scrb_alu_per_core + scrb_fpu_per_core + scrb_lsu_per_core + scrb_sfu_per_core; - fprintf(stream, "PERF: core%d: issue stalls=%ld (alu=%d%%, fpu=%d%%, lsu=%d%%, sfu=%d%%)\n", core_id, scrb_stalls_per_core, - calcAvgPercent(scrb_alu_per_core, scrb_total), - calcAvgPercent(scrb_fpu_per_core, scrb_total), - calcAvgPercent(scrb_lsu_per_core, scrb_total), - calcAvgPercent(scrb_sfu_per_core, scrb_total)); - } - scrb_stalls += scrb_stalls_per_core; - } - // sfu_stalls - { - uint64_t scrb_sfu_per_core; - CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_SFU, core_id, &scrb_sfu_per_core), { + uint64_t scrb_csrs_per_core; + CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_CSRS, core_id, &scrb_csrs_per_core), { return err; }); uint64_t scrb_wctl_per_core; CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_WCTL, core_id, &scrb_wctl_per_core), { return err; }); - uint64_t scrb_csrs_per_core; - CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_SCRB_CSRS, core_id, &scrb_csrs_per_core), { + scrb_alu += scrb_alu_per_core; + scrb_fpu += scrb_fpu_per_core; + scrb_lsu += scrb_lsu_per_core; + scrb_csrs += scrb_csrs_per_core; + scrb_wctl += scrb_wctl_per_core; + if (num_cores > 1) { + uint64_t scrb_total = scrb_alu_per_core + scrb_fpu_per_core + scrb_lsu_per_core + scrb_csrs_per_core + scrb_wctl_per_core; + int scrb_percent_per_core = calcAvgPercent(scrb_stalls_per_core, cycles_per_core); + fprintf(stream, "PERF: core%d: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, scrs=%d%%, wctl=%d%%)\n" + , core_id + , scrb_stalls_per_core + , scrb_percent_per_core + , calcAvgPercent(scrb_alu_per_core, scrb_total) + , calcAvgPercent(scrb_fpu_per_core, scrb_total) + , calcAvgPercent(scrb_lsu_per_core, scrb_total) + , calcAvgPercent(scrb_csrs_per_core, scrb_total) + , calcAvgPercent(scrb_wctl_per_core, scrb_total) + ); + } + scrb_stalls += scrb_stalls_per_core; + } + // operands stalls + { + uint64_t opds_stalls_per_core; + CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_OPDS_ST, core_id, &opds_stalls_per_core), { return err; }); if (num_cores > 1) { - uint64_t sfu_total = scrb_wctl_per_core + scrb_csrs_per_core; - fprintf(stream, "PERF: core%d: sfu stalls=%ld (scrs=%d%%, wctl=%d%%)\n" - , core_id - , scrb_sfu_per_core - , calcAvgPercent(scrb_csrs_per_core, sfu_total) - , calcAvgPercent(scrb_wctl_per_core, sfu_total) - ); + int opds_percent_per_core = calcAvgPercent(opds_stalls_per_core, cycles_per_core); + fprintf(stream, "PERF: core%d: operands stalls=%ld (%d%%)\n", core_id, opds_stalls_per_core, opds_percent_per_core); } - scrb_wctl += scrb_wctl_per_core; - scrb_csrs += scrb_csrs_per_core; + opds_stalls += opds_stalls_per_core; } // PERF: memory // ifetches @@ -542,7 +539,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { break; } - float IPC = (float)(double(instrs_per_core) / double(cycles_per_core)); + float IPC = caclAverage(instrs_per_core, cycles_per_core); if (num_cores > 1) fprintf(stream, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs_per_core, cycles_per_core, IPC); total_instrs += instrs_per_core; total_cycles += cycles_per_core; @@ -554,23 +551,24 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { int sched_idles_percent = calcAvgPercent(sched_idles, total_cycles); int sched_stalls_percent = calcAvgPercent(sched_stalls, total_cycles); int ibuffer_percent = calcAvgPercent(ibuffer_stalls, total_cycles); - int ifetch_avg_lat = (int)(double(ifetch_lat) / double(ifetches)); - int load_avg_lat = (int)(double(load_lat) / double(loads)); - uint64_t scrb_total = scrb_alu + scrb_fpu + scrb_lsu + scrb_sfu; - uint64_t sfu_total = scrb_wctl + scrb_csrs; + int scrb_percent = calcAvgPercent(scrb_stalls, total_cycles); + int opds_percent = calcAvgPercent(opds_stalls, total_cycles); + int ifetch_avg_lat = caclAverage(ifetch_lat, ifetches); + int load_avg_lat = caclAverage(load_lat, loads); + uint64_t scrb_total = scrb_alu + scrb_fpu + scrb_lsu + scrb_csrs + scrb_wctl; fprintf(stream, "PERF: scheduler idle=%ld (%d%%)\n", sched_idles, sched_idles_percent); fprintf(stream, "PERF: scheduler stalls=%ld (%d%%)\n", sched_stalls, sched_stalls_percent); fprintf(stream, "PERF: ibuffer stalls=%ld (%d%%)\n", ibuffer_stalls, ibuffer_percent); - fprintf(stream, "PERF: issue stalls=%ld (alu=%d%%, fpu=%d%%, lsu=%d%%, sfu=%d%%)\n", scrb_stalls, - calcAvgPercent(scrb_alu, scrb_total), - calcAvgPercent(scrb_fpu, scrb_total), - calcAvgPercent(scrb_lsu, scrb_total), - calcAvgPercent(scrb_sfu, scrb_total)); - fprintf(stream, "PERF: sfu stalls=%ld (scrs=%d%%, wctl=%d%%)\n" - , scrb_sfu - , calcAvgPercent(scrb_csrs, sfu_total) - , calcAvgPercent(scrb_wctl, sfu_total) + fprintf(stream, "PERF: scoreboard stalls=%ld (%d%%) (alu=%d%%, fpu=%d%%, lsu=%d%%, scrs=%d%%, wctl=%d%%)\n" + , scrb_stalls + , scrb_percent + , calcAvgPercent(scrb_alu, scrb_total) + , calcAvgPercent(scrb_fpu, scrb_total) + , calcAvgPercent(scrb_lsu, scrb_total) + , calcAvgPercent(scrb_csrs, scrb_total) + , calcAvgPercent(scrb_wctl, scrb_total) ); + fprintf(stream, "PERF: operands stalls=%ld (%d%%)\n", opds_stalls, opds_percent); fprintf(stream, "PERF: ifetches=%ld\n", ifetches); fprintf(stream, "PERF: loads=%ld\n", loads); fprintf(stream, "PERF: stores=%ld\n", stores); @@ -618,7 +616,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { break; } - float IPC = (float)(double(total_instrs) / double(max_cycles)); + float IPC = caclAverage(total_instrs, max_cycles); fprintf(stream, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, max_cycles, IPC); fflush(stream); diff --git a/runtime/stub/vortex.cpp b/runtime/stub/vortex.cpp index 2e9a85f89..70b95dcbc 100644 --- a/runtime/stub/vortex.cpp +++ b/runtime/stub/vortex.cpp @@ -162,5 +162,22 @@ extern int vx_dcr_write(vx_device_h hdevice, uint32_t addr, uint32_t value) { } extern int vx_mpm_query(vx_device_h hdevice, uint32_t addr, uint32_t core_id, uint64_t* value) { - return (g_callbacks.mpm_query)(hdevice, addr, core_id, value); + if (core_id == 0xffffffff) { + uint64_t num_cores; + CHECK_ERR((g_callbacks.dev_caps)(hdevice, VX_CAPS_NUM_CORES, &num_cores), { + return err; + }); + uint64_t sum_value = 0; + uint64_t cur_value; + for (uint32_t i = 0; i < num_cores; ++i) { + CHECK_ERR((g_callbacks.mpm_query)(hdevice, addr, i, &cur_value), { + return err; + }); + sum_value += cur_value; + } + *value = sum_value; + return 0; + } else { + return (g_callbacks.mpm_query)(hdevice, addr, core_id, value); + } } \ No newline at end of file diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index 29f4aeeea..408bf23ed 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -134,12 +134,14 @@ static void wait_for_enter(const std::string &msg) { class vx_device { public: vx_device() - : xrtDevice_(nullptr) - , xrtKernel_(nullptr) - , global_mem_(ALLOC_BASE_ADDR, + : global_mem_(ALLOC_BASE_ADDR, GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR, RAM_PAGE_SIZE, CACHE_BLOCK_SIZE) + #ifndef CPP_API + , xrtDevice_(nullptr) + , xrtKernel_(nullptr) + #endif {} ~vx_device() { @@ -715,10 +717,10 @@ public: private: + MemoryAllocator global_mem_; xrt_device_t xrtDevice_; xrt_kernel_t xrtKernel_; platform_info_t platform_; - MemoryAllocator global_mem_; uint64_t dev_caps_; uint64_t isa_caps_; uint64_t global_mem_size_; diff --git a/sim/Makefile b/sim/Makefile index e16486e8f..4d5ea89c1 100644 --- a/sim/Makefile +++ b/sim/Makefile @@ -1,6 +1,9 @@ ROOT_DIR := $(realpath ..) include $(ROOT_DIR)/config.mk +simx: + $(MAKE) -C simx + all: $(MAKE) -C simx $(MAKE) -C rtlsim diff --git a/sim/common.mk b/sim/common.mk index 93c902a31..ab017effe 100644 --- a/sim/common.mk +++ b/sim/common.mk @@ -5,7 +5,4 @@ HW_DIR := $(VORTEX_HOME)/hw RTL_DIR := $(HW_DIR)/rtl DPI_DIR := $(HW_DIR)/dpi SCRIPT_DIR := $(HW_DIR)/scripts - -COMMON_DIR := $(VORTEX_HOME)/sim/common - -THIRD_PARTY_DIR := $(VORTEX_HOME)/third_party \ No newline at end of file +COMMON_DIR := $(VORTEX_HOME)/sim/common \ No newline at end of file diff --git a/sim/common/bitvector.h b/sim/common/bitvector.h new file mode 100644 index 000000000..9fcf22c62 --- /dev/null +++ b/sim/common/bitvector.h @@ -0,0 +1,314 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +namespace vortex { + +template +class BitVector { +private: + static constexpr size_t BITS_PER_WORD = sizeof(T) * 8; + std::vector bits_; + size_t size_; + bool all_zero_; + + size_t wordIndex(size_t pos) const { + return pos / BITS_PER_WORD; + } + + T bitMask(size_t pos) const { + return T(1) << (pos % BITS_PER_WORD); + } + + void updateAllZero() { + all_zero_ = std::all_of(bits_.begin(), bits_.end(), [](T word) { return word == 0; }); + } + +public: + explicit BitVector(size_t size = 0) + : bits_((size + (BITS_PER_WORD - 1)) / BITS_PER_WORD) + , size_(size) + , all_zero_(true) + {} + + void set(size_t pos) { + if (pos >= size_) throw std::out_of_range("Index out of range"); + bits_[this->wordIndex(pos)] |= this->bitMask(pos); + all_zero_ = false; + } + + void set(size_t pos, bool value) { + if (value) { + this->set(pos); + } else { + this->reset(pos); + } + } + + void reset() { + std::fill(bits_.begin(), bits_.end(), 0); + all_zero_ = true; + } + + void reset(size_t pos) { + if (pos >= size_) throw std::out_of_range("Index out of range"); + bits_[this->wordIndex(pos)] &= ~this->bitMask(pos); + this->updateAllZero(); + } + + bool test(size_t pos) const { + if (pos >= size_) throw std::out_of_range("Index out of range"); + return bits_[this->wordIndex(pos)] & this->bitMask(pos); + } + + size_t size() const { + return size_; + } + + void resize(size_t new_size) { + size_ = new_size; + bits_.resize((new_size + (BITS_PER_WORD - 1)) / BITS_PER_WORD, 0); + this->updateAllZero(); + } + + bool operator==(const BitVector& other) const { + return (size_ == other.size_) && (bits_ == other.bits_); + } + + bool operator!=(const BitVector& other) const { + return !(*this == other); + } + + bool operator[](size_t pos) const { + return test(pos); + } + + BitVector& operator&=(const BitVector& other) { + if (size_ != other.size_) throw std::invalid_argument("Bit sizes must match"); + for (size_t i = 0; i < bits_.size(); ++i) { + bits_[i] &= other.bits_[i]; + } + this->updateAllZero(); + return *this; + } + + BitVector& operator|=(const BitVector& other) { + if (size_ != other.size_) throw std::invalid_argument("Bit sizes must match"); + for (size_t i = 0; i < bits_.size(); ++i) { + bits_[i] |= other.bits_[i]; + } + this->updateAllZero(); + return *this; + } + + BitVector& operator^=(const BitVector& other) { + if (size_ != other.size_) throw std::invalid_argument("Bit sizes must match"); + for (size_t i = 0; i < bits_.size(); ++i) { + bits_[i] ^= other.bits_[i]; + } + this->updateAllZero(); + return *this; + } + + BitVector operator~() const { + BitVector result(size_); + for (size_t i = 0; i < bits_.size(); ++i) { + result.bits_[i] = ~bits_[i]; + } + result.updateAllZero(); + return result; + } + + void flip() { + for (auto &word : bits_) { + word = ~word; + } + this->updateAllZero(); + } + + size_t count() const { + size_t count = 0; + for (const auto &word : bits_) { + count += std::bitset(word).count(); + } + return count; + } + + bool none() const { + return all_zero_; + } + + bool any() const { + return !all_zero_; + } + + bool all() const { + size_t full_bits = size_ / BITS_PER_WORD; + size_t remaining_bits = size_ % BITS_PER_WORD; + T full_mask = ~T(0); + for (size_t i = 0; i < full_bits; ++i) { + if (bits_[i] != full_mask) + return false; + } + if (remaining_bits > 0) { + T partial_mask = (T(1) << remaining_bits) - 1; + if ((bits_[full_bits] & partial_mask) != partial_mask) + return false; + } + return true; + } + + BitVector& operator<<=(size_t pos) { + if (pos >= size_) { + reset(); + return *this; + } + + size_t word_shift = pos / BITS_PER_WORD; + size_t bit_shift = pos % BITS_PER_WORD; + + if (word_shift > 0) { + for (size_t i = bits_.size() - 1; i >= word_shift; --i) { + bits_[i] = bits_[i - word_shift]; + } + std::fill(bits_.begin(), bits_.begin() + word_shift, 0); + } + + if (bit_shift > 0) { + for (size_t i = bits_.size() - 1; i > 0; --i) { + bits_[i] = (bits_[i] << bit_shift) | (bits_[i - 1] >> (BITS_PER_WORD - bit_shift)); + } + bits_[0] <<= bit_shift; + } + + this->updateAllZero(); + return *this; + } + + BitVector& operator>>=(size_t pos) { + if (pos >= size_) { + reset(); + return *this; + } + + size_t word_shift = pos / BITS_PER_WORD; + size_t bit_shift = pos % BITS_PER_WORD; + + if (word_shift > 0) { + for (size_t i = 0; i < bits_.size() - word_shift; ++i) { + bits_[i] = bits_[i + word_shift]; + } + std::fill(bits_.end() - word_shift, bits_.end(), 0); + } + + if (bit_shift > 0) { + for (size_t i = 0; i < bits_.size() - 1; ++i) { + bits_[i] = (bits_[i] >> bit_shift) | (bits_[i + 1] << (BITS_PER_WORD - bit_shift)); + } + bits_.back() >>= bit_shift; + } + + this->updateAllZero(); + return *this; + } + + std::string to_string() const { + std::string result; + for (size_t i = 0; i < size_; ++i) { + result.push_back(test(i) ? '1' : '0'); + } + return result; + } + + unsigned long to_ulong() const { + if (size_ > sizeof(unsigned long) * 8) { + throw std::overflow_error("BitVector size exceeds unsigned long capacity"); + } + + unsigned long result = 0; + for (size_t i = 0; i < size_; ++i) { + if (test(i)) { + result |= (1UL << i); + } + } + return result; + } + + unsigned long long to_ullong() const { + if (size_ > sizeof(unsigned long long) * 8) { + throw std::overflow_error("BitVector size exceeds unsigned long long capacity"); + } + + unsigned long long result = 0; + for (size_t i = 0; i < size_; ++i) { + if (test(i)) { + result |= (1ULL << i); + } + } + return result; + } + + friend std::ostream& operator<<(std::ostream& os, const BitVector& bv) { + for (size_t i = 0; i < bv.size_; ++i) { + os << bv.test(i); + } + return os; + } + + friend BitVector operator&(const BitVector& lhs, const BitVector& rhs) { + BitVector result(lhs); + result &= rhs; + return result; + } + + friend BitVector operator|(const BitVector& lhs, const BitVector& rhs) { + BitVector result(lhs); + result |= rhs; + return result; + } + + friend BitVector operator^(const BitVector& lhs, const BitVector& rhs) { + BitVector result(lhs); + result ^= rhs; + return result; + } + + friend BitVector operator<<(const BitVector& lhs, size_t pos) { + BitVector result(lhs); + result <<= pos; + return result; + } + + friend BitVector operator>>(const BitVector& lhs, size_t pos) { + BitVector result(lhs); + result >>= pos; + return result; + } +}; + +} + +// std::hash specialization for BitVector +namespace std { + +template +struct hash> { + size_t operator()(const vortex::BitVector& bv) const { + return hash()(bv.to_string()); + } +}; + +} \ No newline at end of file diff --git a/sim/common/dram_sim.cpp b/sim/common/dram_sim.cpp new file mode 100644 index 000000000..f7cfa8a32 --- /dev/null +++ b/sim/common/dram_sim.cpp @@ -0,0 +1,120 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dram_sim.h" +#include "util.h" +#include + +DISABLE_WARNING_PUSH +DISABLE_WARNING_UNUSED_PARAMETER +DISABLE_WARNING_MISSING_FIELD_INITIALIZERS +#include +#include +#include +#include +#include +DISABLE_WARNING_POP + +using namespace vortex; + +class DramSim::Impl { +private: + Ramulator::IFrontEnd* ramulator_frontend_; + Ramulator::IMemorySystem* ramulator_memorysystem_; + +public: + Impl(int clock_ratio) { + YAML::Node dram_config; + dram_config["Frontend"]["impl"] = "GEM5"; + dram_config["MemorySystem"]["impl"] = "GenericDRAM"; + dram_config["MemorySystem"]["clock_ratio"] = clock_ratio; + dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2"; + dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb"; + dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192; + dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps"; + dram_config["MemorySystem"]["Controller"]["impl"] = "Generic"; + dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS"; + dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank"; + dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank"; + dram_config["MemorySystem"]["Controller"]["RowPolicy"]["impl"] = "OpenRowPolicy"; + { + YAML::Node draw_plugin; + draw_plugin["ControllerPlugin"]["impl"] = "TraceRecorder"; + draw_plugin["ControllerPlugin"]["path"] = "./trace/ramulator.log"; + dram_config["MemorySystem"]["Controller"]["plugins"].push_back(draw_plugin); + } + dram_config["MemorySystem"]["AddrMapper"]["impl"] = "RoBaRaCoCh"; + + ramulator_frontend_ = Ramulator::Factory::create_frontend(dram_config); + ramulator_memorysystem_ = Ramulator::Factory::create_memory_system(dram_config); + ramulator_frontend_->connect_memory_system(ramulator_memorysystem_); + ramulator_memorysystem_->connect_frontend(ramulator_frontend_); + } + + ~Impl() { + std::ofstream nullstream("ramulator.stats.log"); + auto original_buf = std::cout.rdbuf(); + std::cout.rdbuf(nullstream.rdbuf()); + ramulator_frontend_->finalize(); + ramulator_memorysystem_->finalize(); + std::cout.rdbuf(original_buf); + } + + void reset() { + //-- + } + + void tick() { + ramulator_memorysystem_->tick(); + } + + bool send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback callback, void* arg) { + if (!ramulator_frontend_->receive_external_requests( + is_write ? Ramulator::Request::Type::Write : Ramulator::Request::Type::Read, + addr, + source_id, + [callback_ = std::move(callback), arg_ = std::move(arg)](Ramulator::Request& /*dram_req*/) { + callback_(arg_); + } + )) { + return false; + } + if (is_write) { + // Ramulator does not handle write responses, so we call the callback ourselves + callback(arg); + } + return true; + } +}; + +/////////////////////////////////////////////////////////////////////////////// + +DramSim::DramSim(int clock_ratio) + : impl_(new Impl(clock_ratio)) +{} + +DramSim::~DramSim() { + delete impl_; +} + +void DramSim::reset() { + impl_->reset(); +} + +void DramSim::tick() { + impl_->tick(); +} + +bool DramSim::send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback callback, void* arg) { + return impl_->send_request(is_write, addr, source_id, callback, arg); +} \ No newline at end of file diff --git a/sim/common/dram_sim.h b/sim/common/dram_sim.h new file mode 100644 index 000000000..5fea3f27c --- /dev/null +++ b/sim/common/dram_sim.h @@ -0,0 +1,36 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +namespace vortex { + +class DramSim { +public: + typedef void (*ResponseCallback)(void *arg); + + DramSim(int clock_ratio); + ~DramSim(); + + void reset(); + + void tick(); + + bool send_request(bool is_write, uint64_t addr, int source_id, ResponseCallback callback, void* arg); + +private: + class Impl; + Impl* impl_; +}; + +} \ No newline at end of file diff --git a/sim/common/util.h b/sim/common/util.h index c8c63854f..83fdee7df 100644 --- a/sim/common/util.h +++ b/sim/common/util.h @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -34,6 +34,7 @@ const char* fileExtension(const char* filepath); #define DISABLE_WARNING_UNREFERENCED_FUNCTION __pragma(warning(disable : 4505)) #define DISABLE_WARNING_ANONYMOUS_STRUCT __pragma(warning(disable : 4201)) #define DISABLE_WARNING_UNUSED_VARIABLE __pragma(warning(disable : 4189)) +#define DISABLE_WARNING_MISSING_FIELD_INITIALIZERS __pragma(warning(disable : 4351)) #elif defined(__GNUC__) #define DISABLE_WARNING_PUSH _Pragma("GCC diagnostic push") #define DISABLE_WARNING_POP _Pragma("GCC diagnostic pop") @@ -45,6 +46,8 @@ const char* fileExtension(const char* filepath); _Pragma("GCC diagnostic ignored \"-Wpedantic\"") #define DISABLE_WARNING_UNUSED_VARIABLE \ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#define DISABLE_WARNING_MISSING_FIELD_INITIALIZERS \ + _Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"") #elif defined(__clang__) #define DISABLE_WARNING_PUSH _Pragma("clang diagnostic push") #define DISABLE_WARNING_POP _Pragma("clang diagnostic pop") @@ -56,6 +59,8 @@ const char* fileExtension(const char* filepath); _Pragma("clang diagnostic ignored \"-Wgnu-anonymous-struct\"") #define DISABLE_WARNING_UNUSED_VARIABLE \ _Pragma("clang diagnostic ignored \"-Wunused-but-set-variable\"") +#define DISABLE_WARNING_MISSING_FIELD_INITIALIZERS \ + _Pragma("clang diagnostic ignored \"-Wmissing-field-initializers\"") #else #define DISABLE_WARNING_PUSH #define DISABLE_WARNING_POP diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index 2dd99cf92..7b0d543d2 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -5,15 +5,17 @@ DESTDIR ?= $(CURDIR) SRC_DIR := $(VORTEX_HOME)/sim/opaesim AFU_DIR := $(RTL_DIR)/afu/opae -CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds +CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors -Wno-array-bounds CXXFLAGS += -fPIC -Wno-maybe-uninitialized CXXFLAGS += -I$(SRC_DIR) -I$(ROOT_DIR)/hw -I$(COMMON_DIR) -I$(DESTDIR) CXXFLAGS += -I/$(THIRD_PARTY_DIR)/softfloat/source/include -CXXFLAGS += -I/$(THIRD_PARTY_DIR) +CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/spdlog/include +CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/yaml-cpp/include +CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/src CXXFLAGS += -DXLEN_$(XLEN) LDFLAGS += -shared $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a -LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator -pthread +LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator -pthread # control RTL debug tracing states DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE @@ -47,12 +49,12 @@ endif DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) -SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp +SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/opae_sim.cpp RTL_PKGS = $(AFU_DIR)/local_mem_cfg_pkg.sv $(AFU_DIR)/ccip/ccip_if_pkg.sv -RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv +RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) @@ -65,7 +67,7 @@ RTL_INCLUDE += -I$(AFU_DIR) -I$(AFU_DIR)/ccip TOP = vortex_afu_shim VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic -VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO +VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED VL_FLAGS += --x-initial unique --x-assign unique VL_FLAGS += -DSIMULATION -DSV_DPI VL_FLAGS += -DXLEN_$(XLEN) @@ -73,7 +75,6 @@ VL_FLAGS += $(CONFIGS) VL_FLAGS += $(SRC_DIR)/verilator.vlt VL_FLAGS += $(RTL_INCLUDE) VL_FLAGS += $(RTL_PKGS) -VL_FLAGS += $(DBG_SCOPE_FLAGS) CXXFLAGS += $(CONFIGS) @@ -93,7 +94,7 @@ endif # Enable scope analyzer ifdef SCOPE - VL_FLAGS += -DSCOPE + VL_FLAGS += -DSCOPE $(DBG_SCOPE_FLAGS) CXXFLAGS += -DSCOPE SCOPE_JSON = $(DESTDIR)/scope.json endif diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 368f8aa04..d6e06721d 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -13,9 +13,7 @@ #include "opae_sim.h" -#include #include "Vvortex_afu_shim.h" -#include "Vvortex_afu_shim__Syms.h" #ifdef VCD_OUTPUT #include @@ -26,10 +24,7 @@ #include #include -#define RAMULATOR -#include -#include -#include +#include #include #include @@ -48,8 +43,8 @@ #endif #endif -#ifndef MEM_CYCLE_RATIO -#define MEM_CYCLE_RATIO -1 +#ifndef MEM_CLOCK_RATIO +#define MEM_CLOCK_RATIO 1 #endif #undef MEM_BLOCK_SIZE @@ -108,11 +103,11 @@ public: Impl() : device_(nullptr) , ram_(nullptr) - , ramulator_(nullptr) + , dram_sim_(MEM_CLOCK_RATIO) , stop_(false) , host_buffer_ids_(0) #ifdef VCD_OUTPUT - , trace_(nullptr) + , tfp_(nullptr) #endif {} @@ -125,9 +120,9 @@ public: aligned_free(buffer.second.data); } #ifdef VCD_OUTPUT - if (trace_) { - trace_->close(); - delete trace_; + if (tfp_) { + tfp_->close(); + delete tfp_; } #endif if (device_) { @@ -136,11 +131,6 @@ public: if (ram_) { delete ram_; } - if (ramulator_) { - ramulator_->finish(); - Stats::statlist.printall(); - delete ramulator_; - } } int init() { @@ -156,25 +146,25 @@ public: #ifdef VCD_OUTPUT Verilated::traceEverOn(true); - trace_ = new VerilatedVcdC(); - device_->trace(trace_, 99); - trace_->open("trace.vcd"); + tfp_ = new VerilatedVcdC(); + device_->trace(tfp_, 99); + tfp_->open("trace.vcd"); #endif ram_ = new RAM(0, RAM_PAGE_SIZE); - // initialize dram simulator - ramulator::Config ram_config; - ram_config.add("standard", "DDR4"); - ram_config.add("channels", std::to_string(MEMORY_BANKS)); - ram_config.add("ranks", "1"); - ram_config.add("speed", "DDR4_2400R"); - ram_config.add("org", "DDR4_4Gb_x8"); - ram_config.add("mapping", "defaultmapping"); - ram_config.set_core_num(1); - ramulator_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE); - Stats::statlist.output("ramulator.ddr4.log"); - + #ifndef NDEBUG + // dump device configuration + std::cout << "CONFIGS:" + << " num_threads=" << NUM_THREADS + << ", num_warps=" << NUM_WARPS + << ", num_cores=" << NUM_CORES + << ", num_clusters=" << NUM_CLUSTERS + << ", socket_size=" << SOCKET_SIZE + << ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec + << ", num_barriers=" << NUM_BARRIERS + << std::endl; + #endif // reset the device this->reset(); @@ -258,19 +248,16 @@ public: private: void reset() { - cci_reads_.clear(); - cci_writes_.clear(); - device_->vcp2af_sRxPort_c0_mmioRdValid = 0; - device_->vcp2af_sRxPort_c0_mmioWrValid = 0; - device_->vcp2af_sRxPort_c0_rspValid = 0; - device_->vcp2af_sRxPort_c1_rspValid = 0; - device_->vcp2af_sRxPort_c0_TxAlmFull = 0; - device_->vcp2af_sRxPort_c1_TxAlmFull = 0; + this->cci_bus_reset(); + this->avs_bus_reset(); - for (int b = 0; b < MEMORY_BANKS; ++b) { - pending_mem_reqs_[b].clear(); - device_->avs_readdatavalid[b] = 0; - device_->avs_waitrequest[b] = 0; + for (auto& reqs : pending_mem_reqs_) { + reqs.clear(); + } + + { + std::queue empty; + std::swap(dram_queue_, empty); } device_->reset = 1; @@ -296,13 +283,21 @@ private: } void tick() { - this->sRxPort_bus(); - this->sTxPort_bus(); - this->avs_bus(); + this->cci_bus_eval(); + this->avs_bus_eval(); if (!dram_queue_.empty()) { - if (ramulator_->send(dram_queue_.front())) + auto mem_req = dram_queue_.front(); + if (dram_sim_.send_request(mem_req->write, mem_req->addr, mem_req->bank_id, [](void* arg) { + auto orig_req = reinterpret_cast(arg); + if (orig_req->ready) { + delete orig_req; + } else { + orig_req->ready = true; + } + }, mem_req)) { dram_queue_.pop(); + } } device_->clk = 0; @@ -310,14 +305,7 @@ private: device_->clk = 1; this->eval(); - if (MEM_CYCLE_RATIO > 0) { - auto cycle = timestamp / 2; - if ((cycle % MEM_CYCLE_RATIO) == 0) - ramulator_->tick(); - } else { - for (int i = MEM_CYCLE_RATIO; i <= 0; ++i) - ramulator_->tick(); - } + dram_sim_.tick(); #ifndef NDEBUG fflush(stdout); @@ -328,13 +316,29 @@ private: device_->eval(); #ifdef VCD_OUTPUT if (sim_trace_enabled()) { - trace_->dump(timestamp); + tfp_->dump(timestamp); } #endif ++timestamp; } - void sRxPort_bus() { + void cci_bus_reset() { + cci_reads_.clear(); + cci_writes_.clear(); + device_->vcp2af_sRxPort_c0_mmioRdValid = 0; + device_->vcp2af_sRxPort_c0_mmioWrValid = 0; + device_->vcp2af_sRxPort_c0_rspValid = 0; + device_->vcp2af_sRxPort_c1_rspValid = 0; + device_->vcp2af_sRxPort_c0_TxAlmFull = 0; + device_->vcp2af_sRxPort_c1_TxAlmFull = 0; + } + + void cci_bus_eval() { + this->sRxPort_bus_eval(); + this->sTxPort_bus_eval(); + } + + void sRxPort_bus_eval() { // check mmio request bool mmio_req_enabled = device_->vcp2af_sRxPort_c0_mmioRdValid || device_->vcp2af_sRxPort_c0_mmioWrValid; @@ -384,7 +388,7 @@ private: } } - void sTxPort_bus() { + void sTxPort_bus_eval() { // process read requests if (device_->af2cp_sTxPort_c0_valid) { assert(!device_->vcp2af_sRxPort_c0_TxAlmFull); @@ -414,7 +418,15 @@ private: device_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1)); } - void avs_bus() { + void avs_bus_reset() { + for (int b = 0; b < MEMORY_BANKS; ++b) { + pending_mem_reqs_[b].clear(); + device_->avs_readdatavalid[b] = 0; + device_->avs_waitrequest[b] = 0; + } + } + + void avs_bus_eval() { for (int b = 0; b < MEMORY_BANKS; ++b) { // process memory responses device_->avs_readdatavalid[b] = 0; @@ -448,17 +460,20 @@ private: printf("\n");*/ // send dram request - ramulator::Request dram_req( - byte_addr, - ramulator::Request::Type::WRITE, - 0 - ); - dram_queue_.push(dram_req); + auto mem_req = new mem_req_t(); + mem_req->addr = device_->avs_address[b]; + mem_req->bank_id = b; + mem_req->write = true; + mem_req->ready = true; + + dram_queue_.push(mem_req); } else if (device_->avs_read[b]) { - auto mem_req = new mem_rd_req_t(); + auto mem_req = new mem_req_t(); mem_req->addr = device_->avs_address[b]; + mem_req->bank_id = b; ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE); + mem_req->write = false; mem_req->ready = false; pending_mem_reqs_[b].emplace_back(mem_req); @@ -472,15 +487,7 @@ private: printf("}\n");*/ // send dram request - ramulator::Request dram_req( - byte_addr, - ramulator::Request::Type::READ, - std::bind([](ramulator::Request& dram_req, mem_rd_req_t* mem_req) { - mem_req->ready = true; - }, placeholders::_1, mem_req), - 0 - ); - dram_queue_.push(dram_req); + dram_queue_.push(mem_req); } device_->avs_waitrequest[b] = false; @@ -488,10 +495,12 @@ private: } typedef struct { - bool ready; std::array data; uint32_t addr; - } mem_rd_req_t; + uint32_t bank_id; + bool write; + bool ready; + } mem_req_t; typedef struct { int cycles_left; @@ -513,7 +522,7 @@ private: Vvortex_afu_shim *device_; RAM* ram_; - ramulator::Gem5Wrapper* ramulator_; + DramSim dram_sim_; std::future future_; bool stop_; @@ -521,17 +530,17 @@ private: std::unordered_map host_buffers_; int64_t host_buffer_ids_; - std::list pending_mem_reqs_[MEMORY_BANKS]; + std::list pending_mem_reqs_[MEMORY_BANKS]; std::list cci_reads_; std::list cci_writes_; std::mutex mutex_; - std::queue dram_queue_; + std::queue dram_queue_; #ifdef VCD_OUTPUT - VerilatedVcdC *trace_; + VerilatedVcdC *tfp_; #endif }; diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 42a28cc54..e9487a2f4 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -4,15 +4,17 @@ DESTDIR ?= $(CURDIR) SRC_DIR = $(VORTEX_HOME)/sim/rtlsim -CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds +CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors -Wno-array-bounds CXXFLAGS += -fPIC -Wno-maybe-uninitialized CXXFLAGS += -I$(ROOT_DIR)/hw -I$(COMMON_DIR) CXXFLAGS += -I$(THIRD_PARTY_DIR)/softfloat/source/include -CXXFLAGS += -I$(THIRD_PARTY_DIR) +CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/spdlog/include +CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/yaml-cpp/include +CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/src CXXFLAGS += -DXLEN_$(XLEN) LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a -LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator +LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator # control RTL debug tracing states DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE @@ -24,7 +26,7 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) -RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv +RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) @@ -33,7 +35,7 @@ ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) endif RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) -SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp +SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp SRCS += $(SRC_DIR)/processor.cpp @@ -46,7 +48,7 @@ endif VL_FLAGS = --exe VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic -VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO +VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED VL_FLAGS += --x-initial unique --x-assign unique VL_FLAGS += $(SRC_DIR)/verilator.vlt VL_FLAGS += -DSIMULATION -DSV_DPI diff --git a/sim/rtlsim/main.cpp b/sim/rtlsim/main.cpp index 46fd7637d..ea0ba9b95 100644 --- a/sim/rtlsim/main.cpp +++ b/sim/rtlsim/main.cpp @@ -89,7 +89,9 @@ int main(int argc, char **argv) { return -1; } } - +#ifndef NDEBUG + std::cout << "[VXDRV] START: program=" << program << std::endl; +#endif // run simulation processor.run(); diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index acedde614..2c31f939b 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -13,14 +13,12 @@ #include "processor.h" -#include - #ifdef AXI_BUS #include "VVortex_axi.h" -#include "VVortex_axi__Syms.h" +typedef VVortex_axi Device; #else #include "VVortex.h" -#include "VVortex__Syms.h" +typedef VVortex Device; #endif #ifdef VCD_OUTPUT @@ -40,10 +38,7 @@ #include #include -#define RAMULATOR -#include -#include -#include +#include #ifndef MEMORY_BANKS #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS @@ -53,8 +48,8 @@ #endif #endif -#ifndef MEM_CYCLE_RATIO -#define MEM_CYCLE_RATIO -1 +#ifndef MEM_CLOCK_RATIO +#define MEM_CLOCK_RATIO 1 #endif #ifndef TRACE_START_TIME @@ -109,7 +104,7 @@ void sim_trace_enable(bool enable) { class Processor::Impl { public: - Impl() { + Impl() : dram_sim_(MEM_CLOCK_RATIO) { // force random values for unitialized signals Verilated::randReset(VERILATOR_RESET_VALUE); Verilated::randSeed(50); @@ -118,33 +113,29 @@ public: Verilated::assertOn(false); // create RTL module instance - #ifdef AXI_BUS - device_ = new VVortex_axi(); - #else - device_ = new VVortex(); - #endif + device_ = new Device(); #ifdef VCD_OUTPUT Verilated::traceEverOn(true); - trace_ = new VerilatedVcdC(); - device_->trace(trace_, 99); - trace_->open("trace.vcd"); + tfp_ = new VerilatedVcdC(); + device_->trace(tfp_, 99); + tfp_->open("trace.vcd"); #endif ram_ = nullptr; - // initialize dram simulator - ramulator::Config ram_config; - ram_config.add("standard", "DDR4"); - ram_config.add("channels", std::to_string(MEMORY_BANKS)); - ram_config.add("ranks", "1"); - ram_config.add("speed", "DDR4_2400R"); - ram_config.add("org", "DDR4_4Gb_x8"); - ram_config.add("mapping", "defaultmapping"); - ram_config.set_core_num(1); - dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE); - Stats::statlist.output("ramulator.ddr4.log"); - + #ifndef NDEBUG + // dump device configuration + std::cout << "CONFIGS:" + << " num_threads=" << NUM_THREADS + << ", num_warps=" << NUM_WARPS + << ", num_cores=" << NUM_CORES + << ", num_clusters=" << NUM_CLUSTERS + << ", socket_size=" << SOCKET_SIZE + << ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec + << ", num_barriers=" << NUM_BARRIERS + << std::endl; + #endif // reset the device this->reset(); @@ -156,17 +147,11 @@ public: this->cout_flush(); #ifdef VCD_OUTPUT - trace_->close(); - delete trace_; + tfp_->close(); + delete tfp_; #endif delete device_; - - if (dram_) { - dram_->finish(); - Stats::statlist.printall(); - delete dram_; - } } void cout_flush() { @@ -226,16 +211,17 @@ private: pending_mem_reqs_.clear(); + { + std::queue empty; + std::swap(dram_queue_, empty); + } + mem_rd_rsp_active_ = false; mem_wr_rsp_active_ = false; - #ifdef AXI_BUS - this->reset_axi_bus(); - #else - this->reset_avs_bus(); - #endif + this->mem_bus_reset(); - this->reset_dcr_bus(); + this->dcr_bus_reset(); device_->reset = 1; @@ -252,35 +238,29 @@ private: device_->clk = 0; this->eval(); - #ifdef AXI_BUS - this->eval_axi_bus(0); - #else - this->eval_avs_bus(0); - #endif - this->eval_dcr_bus(0); + this->mem_bus_eval(0); + this->dcr_bus_eval(0); device_->clk = 1; this->eval(); - #ifdef AXI_BUS - this->eval_axi_bus(1); - #else - this->eval_avs_bus(1); - #endif - this->eval_dcr_bus(1); + this->mem_bus_eval(1); + this->dcr_bus_eval(1); - if (MEM_CYCLE_RATIO > 0) { - auto cycle = timestamp / 2; - if ((cycle % MEM_CYCLE_RATIO) == 0) - dram_->tick(); - } else { - for (int i = MEM_CYCLE_RATIO; i <= 0; ++i) - dram_->tick(); - } + dram_sim_.tick(); if (!dram_queue_.empty()) { - if (dram_->send(dram_queue_.front())) + auto mem_req = dram_queue_.front(); + if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) { + auto orig_req = reinterpret_cast(arg); + if (orig_req->ready) { + delete orig_req; + } else { + orig_req->ready = true; + } + }, mem_req)) { dram_queue_.pop(); + } } #ifndef NDEBUG @@ -292,7 +272,7 @@ private: device_->eval(); #ifdef VCD_OUTPUT if (sim_trace_enabled()) { - trace_->dump(timestamp); + tfp_->dump(timestamp); } else { exit(-1); } @@ -302,7 +282,7 @@ private: #ifdef AXI_BUS - void reset_axi_bus() { + void mem_bus_reset() { device_->m_axi_wready[0] = 0; device_->m_axi_awready[0] = 0; device_->m_axi_arready[0] = 0; @@ -310,7 +290,7 @@ private: device_->m_axi_bvalid[0] = 0; } - void eval_axi_bus(bool clk) { + void mem_bus_eval(bool clk) { if (!clk) { mem_rd_rsp_ready_ = device_->m_axi_rready[0]; mem_wr_rsp_ready_ = device_->m_axi_bready[0]; @@ -324,9 +304,9 @@ private: return; } - // process memory responses + // process memory read responses if (mem_rd_rsp_active_ - && device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) { + && device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) { mem_rd_rsp_active_ = false; } if (!mem_rd_rsp_active_) { @@ -336,7 +316,7 @@ private: auto mem_rsp_it = pending_mem_reqs_.begin(); auto mem_rsp = *mem_rsp_it; /* - printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp->addr); + printf("%0ld: [sim] MEM Rd Rsp: addr=%0lx, data=", timestamp, mem_rsp->addr); for (int i = 0; i < MEM_BLOCK_SIZE; i++) { printf("%02x", mem_rsp->block[(MEM_BLOCK_SIZE-1)-i]); } @@ -355,9 +335,9 @@ private: } } - // send memory write response + // process memory write responses if (mem_wr_rsp_active_ - && device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) { + && device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) { mem_wr_rsp_active_ = false; } if (!mem_wr_rsp_active_) { @@ -367,7 +347,7 @@ private: auto mem_rsp_it = pending_mem_reqs_.begin(); auto mem_rsp = *mem_rsp_it; /* - printf("%0ld: [sim] MEM Wr Rsp: bank=%d, addr=%0lx\n", timestamp, last_mem_rsp_bank_, mem_rsp->addr); + printf("%0ld: [sim] MEM Wr Rsp: addr=%0lx\n", timestamp, mem_rsp->addr); */ device_->m_axi_bvalid[0] = 1; device_->m_axi_bid[0] = mem_rsp->tag; @@ -386,13 +366,13 @@ private: // process memory requests if ((device_->m_axi_wvalid[0] || device_->m_axi_arvalid[0]) && running_) { if (device_->m_axi_wvalid[0]) { - uint64_t byteen = device_->m_axi_wstrb[0]; - uint64_t base_addr = device_->m_axi_awaddr[0]; - uint8_t* data = (uint8_t*)device_->m_axi_wdata[0].data(); + auto byteen = device_->m_axi_wstrb[0]; + auto base_addr = device_->m_axi_awaddr[0]; + auto data = (uint8_t*)device_->m_axi_wdata[0].data(); - // check console output if (base_addr >= uint64_t(IO_COUT_ADDR) && base_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { + // process console output for (int i = 0; i < MEM_BLOCK_SIZE; i++) { if ((byteen >> i) & 0x1) { auto& ss_buf = print_bufs_[i]; @@ -405,6 +385,7 @@ private: } } } else { + // process writes /* printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen); for (int i = 0; i < MEM_BLOCK_SIZE; i++) { @@ -422,16 +403,11 @@ private: mem_req->tag = device_->m_axi_awid[0]; mem_req->addr = device_->m_axi_awaddr[0]; mem_req->write = true; - mem_req->ready = true; + mem_req->ready = false; pending_mem_reqs_.emplace_back(mem_req); // send dram request - ramulator::Request dram_req( - device_->m_axi_awaddr[0], - ramulator::Request::Type::WRITE, - 0 - ); - dram_queue_.push(dram_req); + dram_queue_.push(mem_req); } } else { // process reads @@ -444,15 +420,7 @@ private: pending_mem_reqs_.emplace_back(mem_req); // send dram request - ramulator::Request dram_req( - device_->m_axi_araddr[0], - ramulator::Request::Type::READ, - std::bind([&](ramulator::Request& dram_req, mem_req_t* mem_req) { - mem_req->ready = true; - }, placeholders::_1, mem_req), - 0 - ); - dram_queue_.push(dram_req); + dram_queue_.push(mem_req); } } @@ -463,12 +431,12 @@ private: #else - void reset_avs_bus() { + void mem_bus_reset() { device_->mem_req_ready = 0; device_->mem_rsp_valid = 0; } - void eval_avs_bus(bool clk) { + void mem_bus_eval(bool clk) { if (!clk) { mem_rd_rsp_ready_ = device_->mem_rsp_ready; return; @@ -479,7 +447,7 @@ private: return; } - // process memory responses + // process memory read responses if (mem_rd_rsp_active_ && device_->mem_rsp_valid && mem_rd_rsp_ready_) { mem_rd_rsp_active_ = false; @@ -491,7 +459,7 @@ private: auto mem_rsp_it = pending_mem_reqs_.begin(); auto mem_rsp = *mem_rsp_it; /* - printf("%0ld: [sim] MEM Rd: bank=%d, tag=%0lx, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp->tag, mem_rsp->addr); + printf("%0ld: [sim] MEM Rd: tag=%0lx, addr=%0lx, data=", timestamp, mem_rsp->tag, mem_rsp->addr); for (int i = 0; i < MEM_BLOCK_SIZE; i++) { printf("%02x", mem_rsp->block[(MEM_BLOCK_SIZE-1)-i]); } @@ -511,13 +479,12 @@ private: if (device_->mem_req_valid && running_) { uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE); if (device_->mem_req_rw) { - // process writes - uint64_t byteen = device_->mem_req_byteen; - uint8_t* data = (uint8_t*)(device_->mem_req_data.data()); + auto byteen = device_->mem_req_byteen; + auto data = (uint8_t*)(device_->mem_req_data.data()); - // check console output if (byte_addr >= uint64_t(IO_COUT_ADDR) && byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { + // process console output for (int i = 0; i < IO_COUT_SIZE; i++) { if ((byteen >> i) & 0x1) { auto& ss_buf = print_bufs_[i]; @@ -530,6 +497,7 @@ private: } } } else { + // process writes /* printf("%0ld: [sim] MEM Wr: tag=%0lx, addr=%0x, byteen=%0lx, data=", timestamp, device_->mem_req_tag, byte_addr, byteen); for (int i = 0; i < MEM_BLOCK_SIZE; i++) { @@ -543,13 +511,14 @@ private: } } + auto mem_req = new mem_req_t(); + mem_req->tag = device_->mem_req_tag; + mem_req->addr = byte_addr; + mem_req->write = true; + mem_req->ready = true; + // send dram request - ramulator::Request dram_req( - byte_addr, - ramulator::Request::Type::WRITE, - 0 - ); - dram_queue_.push(dram_req); + dram_queue_.push(mem_req); } } else { // process reads @@ -564,15 +533,7 @@ private: //printf("%0ld: [sim] MEM Rd Req: addr=%0x, tag=%0lx\n", timestamp, byte_addr, device_->mem_req_tag); // send dram request - ramulator::Request dram_req( - byte_addr, - ramulator::Request::Type::READ, - std::bind([&](ramulator::Request& dram_req, mem_req_t* mem_req) { - mem_req->ready = true; - }, placeholders::_1, mem_req), - 0 - ); - dram_queue_.push(dram_req); + dram_queue_.push(mem_req); } } @@ -581,11 +542,11 @@ private: #endif - void reset_dcr_bus() { + void dcr_bus_reset() { device_->dcr_wr_valid = 0; } - void eval_dcr_bus(bool clk) { + void dcr_bus_eval(bool clk) { if (!clk) { return; } @@ -603,38 +564,36 @@ private: private: typedef struct { - bool ready; + Device* device; std::array block; uint64_t addr; uint64_t tag; bool write; + bool ready; } mem_req_t; -#ifdef AXI_BUS - VVortex_axi *device_; -#else - VVortex *device_; -#endif -#ifdef VCD_OUTPUT - VerilatedVcdC *trace_; -#endif - std::unordered_map print_bufs_; std::list pending_mem_reqs_; + std::queue dram_queue_; + + DramSim dram_sim_; + + Device* device_; + +#ifdef VCD_OUTPUT + VerilatedVcdC *tfp_; +#endif + + RAM* ram_; + bool mem_rd_rsp_active_; bool mem_rd_rsp_ready_; bool mem_wr_rsp_active_; bool mem_wr_rsp_ready_; - RAM *ram_; - - ramulator::Gem5Wrapper* dram_; - - std::queue dram_queue_; - bool running_; }; diff --git a/sim/simx/Makefile b/sim/simx/Makefile index 4feb2c77a..8520e5191 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -4,18 +4,24 @@ DESTDIR ?= $(CURDIR) SRC_DIR = $(VORTEX_HOME)/sim/simx -CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors CXXFLAGS += -fPIC -Wno-maybe-uninitialized CXXFLAGS += -I$(SRC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw CXXFLAGS += -I$(THIRD_PARTY_DIR)/softfloat/source/include -CXXFLAGS += -I$(THIRD_PARTY_DIR) +CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/spdlog/include +CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/yaml-cpp/include +CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/src CXXFLAGS += -DXLEN_$(XLEN) CXXFLAGS += $(CONFIGS) -LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a -LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator +ifeq ($(VM_ENABLE), 1) +CXXFLAGS += -DVM_ENABLE +endif -SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp +LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a +LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator + +SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp # Debugigng diff --git a/sim/simx/arch.h b/sim/simx/arch.h index 2507bf28f..d72b4ce11 100644 --- a/sim/simx/arch.h +++ b/sim/simx/arch.h @@ -29,11 +29,7 @@ private: uint16_t num_cores_; uint16_t num_clusters_; uint16_t socket_size_; - uint16_t vsize_; - uint16_t num_regs_; - uint16_t num_csrs_; uint16_t num_barriers_; - uint16_t ipdom_size_; uint64_t local_mem_base_; public: @@ -43,26 +39,10 @@ public: , num_cores_(num_cores) , num_clusters_(NUM_CLUSTERS) , socket_size_(SOCKET_SIZE) - , vsize_(16) - , num_regs_(32) - , num_csrs_(4096) , num_barriers_(NUM_BARRIERS) - , ipdom_size_((num_threads-1) * 2) , local_mem_base_(LMEM_BASE_ADDR) {} - uint16_t vsize() const { - return vsize_; - } - - uint16_t num_regs() const { - return num_regs_; - } - - uint16_t num_csrs() const { - return num_csrs_; - } - uint16_t num_barriers() const { return num_barriers_; } @@ -71,10 +51,6 @@ public: return local_mem_base_; } - uint16_t ipdom_size() const { - return ipdom_size_; - } - uint16_t num_threads() const { return num_threads_; } diff --git a/sim/simx/cache_sim.cpp b/sim/simx/cache_sim.cpp index d92c83a03..65a8da70b 100644 --- a/sim/simx/cache_sim.cpp +++ b/sim/simx/cache_sim.cpp @@ -400,7 +400,7 @@ public: continue; auto& mem_rsp = mem_rsp_port.front(); - DT(3, simobject_->name() << "-dram-" << mem_rsp); + DT(3, simobject_->name() << "-bank" << bank_id << " fill-rsp: " << mem_rsp); pipeline_req.type = bank_req_t::Fill; pipeline_req.tag = mem_rsp.tag; mem_rsp_port.pop(); @@ -436,7 +436,7 @@ public: auto port_id = req_id % config_.ports_per_bank; // check MSHR capacity - if ((!core_req.write || !config_.write_through) + if ((!core_req.write || config_.write_back) && bank.mshr.full()) { ++perf_stats_.mshr_stalls; continue; @@ -473,7 +473,6 @@ public: ++perf_stats_.reads; // remove request - DT(3, simobject_->name() << "-core-" << core_req); auto time = core_req_port.pop(); perf_stats_.pipeline_stalls += (SimPlatform::instance().cycles() - time); } @@ -493,23 +492,21 @@ private: uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs; MemRsp core_rsp{tag, mem_rsp.cid, mem_rsp.uuid}; simobject_->CoreRspPorts.at(req_id).push(core_rsp, config_.latency); - DT(3, simobject_->name() << "-core-" << core_rsp); + DT(3, simobject_->name() << " core-rsp: " << core_rsp); } void processBypassRequest(const MemReq& core_req, uint32_t req_id) { - DT(3, simobject_->name() << "-core-" << core_req); - { MemReq mem_req(core_req); mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id; bypass_switch_->ReqIn.at(1).push(mem_req, 1); - DT(3, simobject_->name() << "-dram-" << mem_req); + DT(3, simobject_->name() << " dram-req: " << mem_req); } if (core_req.write && config_.write_reponse) { MemRsp core_rsp{core_req.tag, core_req.cid, core_req.uuid}; simobject_->CoreRspPorts.at(req_id).push(core_rsp, 1); - DT(3, simobject_->name() << "-core-" << core_rsp); + DT(3, simobject_->name() << " core-rsp: " << core_rsp); } } @@ -539,7 +536,7 @@ private: continue; MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid}; simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency); - DT(3, simobject_->name() << "-core-" << core_rsp); + DT(3, simobject_->name() << "-bank" << bank_id << " replay: " << core_rsp); } } } break; @@ -575,7 +572,7 @@ private: if (pipeline_req.write) { // handle write has_hit auto& hit_line = set.lines.at(hit_line_id); - if (config_.write_through) { + if (!config_.write_back) { // forward write request to memory MemReq mem_req; mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, pipeline_req.tag); @@ -583,7 +580,7 @@ private: mem_req.cid = pipeline_req.cid; mem_req.uuid = pipeline_req.uuid; mem_req_ports_.at(bank_id).push(mem_req, 1); - DT(3, simobject_->name() << "-dram-" << mem_req); + DT(3, simobject_->name() << "-bank" << bank_id << " writethrough: " << mem_req); } else { // mark line as dirty hit_line.dirty = true; @@ -596,7 +593,7 @@ private: continue; MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid}; simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency); - DT(3, simobject_->name() << "-core-" << core_rsp); + DT(3, simobject_->name() << "-bank" << bank_id << " core-rsp: " << core_rsp); } } } else { @@ -606,7 +603,7 @@ private: else ++perf_stats_.read_misses; - if (free_line_id == -1 && !config_.write_through) { + if (free_line_id == -1 && config_.write_back) { // write back dirty line auto& repl_line = set.lines.at(repl_line_id); if (repl_line.dirty) { @@ -615,12 +612,12 @@ private: mem_req.write = true; mem_req.cid = pipeline_req.cid; mem_req_ports_.at(bank_id).push(mem_req, 1); - DT(3, simobject_->name() << "-dram-" << mem_req); + DT(3, simobject_->name() << "-bank" << bank_id << " writeback: " << mem_req); ++perf_stats_.evictions; } } - if (pipeline_req.write && config_.write_through) { + if (pipeline_req.write && !config_.write_back) { // forward write request to memory { MemReq mem_req; @@ -629,7 +626,7 @@ private: mem_req.cid = pipeline_req.cid; mem_req.uuid = pipeline_req.uuid; mem_req_ports_.at(bank_id).push(mem_req, 1); - DT(3, simobject_->name() << "-dram-" << mem_req); + DT(3, simobject_->name() << "-bank" << bank_id << " writethrough: " << mem_req); } // send core response if (config_.write_reponse) { @@ -638,7 +635,7 @@ private: continue; MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid}; simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency); - DT(3, simobject_->name() << "-core-" << core_rsp); + DT(3, simobject_->name() << "-bank" << bank_id << " core-rsp: " << core_rsp); } } } else { @@ -657,7 +654,7 @@ private: mem_req.cid = pipeline_req.cid; mem_req.uuid = pipeline_req.uuid; mem_req_ports_.at(bank_id).push(mem_req, 1); - DT(3, simobject_->name() << "-dram-" << mem_req); + DT(3, simobject_->name() << "-bank" << bank_id << " fill: " << mem_req); ++pending_fill_reqs_; } } diff --git a/sim/simx/cache_sim.h b/sim/simx/cache_sim.h index eacad86af..df62bf854 100644 --- a/sim/simx/cache_sim.h +++ b/sim/simx/cache_sim.h @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -30,12 +30,12 @@ public: uint8_t addr_width; // word address bits uint8_t ports_per_bank; // number of ports per bank uint8_t num_inputs; // number of inputs - bool write_through; // is write-through + bool write_back; // is write-back bool write_reponse; // enable write response uint16_t mshr_size; // MSHR buffer size uint8_t latency; // pipeline latency }; - + struct PerfStats { uint64_t reads; uint64_t writes; @@ -47,7 +47,7 @@ public: uint64_t mshr_stalls; uint64_t mem_latency; - PerfStats() + PerfStats() : reads(0) , writes(0) , read_misses(0) @@ -82,11 +82,11 @@ public: ~CacheSim(); void reset(); - + void tick(); const PerfStats& perf_stats() const; - + private: class Impl; Impl* impl_; diff --git a/sim/simx/cluster.cpp b/sim/simx/cluster.cpp index cb6c3c9d6..25669e26b 100644 --- a/sim/simx/cluster.cpp +++ b/sim/simx/cluster.cpp @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,11 +15,11 @@ using namespace vortex; -Cluster::Cluster(const SimContext& ctx, +Cluster::Cluster(const SimContext& ctx, uint32_t cluster_id, - ProcessorImpl* processor, - const Arch &arch, - const DCRS &dcrs) + ProcessorImpl* processor, + const Arch &arch, + const DCRS &dcrs) : SimObject(ctx, "cluster") , mem_req_port(this) , mem_rsp_port(this) @@ -43,9 +43,9 @@ Cluster::Cluster(const SimContext& ctx, for (uint32_t i = 0; i < sockets_per_cluster; ++i) { uint32_t socket_id = cluster_id * sockets_per_cluster + i; - auto socket = Socket::Create(socket_id, - this, - arch, + auto socket = Socket::Create(socket_id, + this, + arch, dcrs); socket->icache_mem_req_port.bind(&icache_switch->ReqIn.at(i)); @@ -58,7 +58,7 @@ Cluster::Cluster(const SimContext& ctx, } // Create l2cache - + snprintf(sname, 100, "cluster%d-l2cache", cluster_id); l2cache_ = CacheSim::Create(sname, CacheSim::Config{ !L2_ENABLED, @@ -67,10 +67,10 @@ Cluster::Cluster(const SimContext& ctx, log2ceil(L1_LINE_SIZE), // W log2ceil(L2_NUM_WAYS), // A log2ceil(L2_NUM_BANKS), // B - XLEN, // address bits + XLEN, // address bits 1, // number of ports - 2, // request size - true, // write-through + 2, // request size + L2_WRITEBACK, // write-back false, // write response L2_MSHR_SIZE, // mshr size 2, // pipeline latency @@ -90,7 +90,7 @@ Cluster::~Cluster() { //-- } -void Cluster::reset() { +void Cluster::reset() { for (auto& barrier : barriers_) { barrier.reset(); } diff --git a/sim/simx/constants.h b/sim/simx/constants.h index d72cafa17..09a509ce1 100644 --- a/sim/simx/constants.h +++ b/sim/simx/constants.h @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,15 +14,15 @@ #pragma once #ifndef RAM_PAGE_SIZE -#define RAM_PAGE_SIZE 4096 +#define RAM_PAGE_SIZE 4096 #endif -#ifndef MEM_CYCLE_RATIO -#define MEM_CYCLE_RATIO -1 +#ifndef MEM_CLOCK_RATIO +#define MEM_CLOCK_RATIO 1 #endif #ifndef MEMORY_BANKS -#define MEMORY_BANKS 2 +#define MEMORY_BANKS 2 #endif #define LSU_WORD_SIZE (XLEN / 8) @@ -31,4 +31,8 @@ #define DCACHE_WORD_SIZE LSU_LINE_SIZE #define DCACHE_CHANNELS UP((NUM_LSU_LANES * (XLEN / 8)) / DCACHE_WORD_SIZE) -#define DCACHE_NUM_REQS (NUM_LSU_BLOCKS * DCACHE_CHANNELS) \ No newline at end of file +#define DCACHE_NUM_REQS (NUM_LSU_BLOCKS * DCACHE_CHANNELS) + +#define NUM_SOCKETS UP(NUM_CORES / SOCKET_SIZE) + +#define PER_ISSUE_WARPS NUM_WARPS / ISSUE_WIDTH \ No newline at end of file diff --git a/sim/simx/core.cpp b/sim/simx/core.cpp index efaa19133..6f817a3ae 100644 --- a/sim/simx/core.cpp +++ b/sim/simx/core.cpp @@ -44,8 +44,10 @@ Core::Core(const SimContext& ctx, , operands_(ISSUE_WIDTH) , dispatchers_((uint32_t)FUType::Count) , func_units_((uint32_t)FUType::Count) - , lsu_demux_(LSU_NUM_REQS) + , lsu_demux_(NUM_LSU_BLOCKS) , mem_coalescers_(NUM_LSU_BLOCKS) + , lsu_dcache_adapter_(NUM_LSU_BLOCKS) + , lsu_lmem_adapter_(NUM_LSU_BLOCKS) , pending_icache_(arch_.num_warps()) , commit_arbs_(ISSUE_WIDTH) { @@ -72,31 +74,53 @@ Core::Core(const SimContext& ctx, }); // create lsu demux - for (uint32_t i = 0; i < LSU_NUM_REQS; ++i) { + for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) { snprintf(sname, 100, "core%d-lsu_demux%d", core_id, i); lsu_demux_.at(i) = LocalMemDemux::Create(sname, 1); } - // connect dcache-coalescer - for (uint32_t b = 0; b < NUM_LSU_BLOCKS; ++b) { - for (uint32_t c = 0; c < DCACHE_CHANNELS; ++c) { - uint32_t i = b * DCACHE_CHANNELS + c; - mem_coalescers_.at(b)->ReqOut.at(c).bind(&dcache_req_ports.at(i)); - dcache_rsp_ports.at(i).bind(&mem_coalescers_.at(b)->RspOut.at(c)); - } + // create lsu dcache adapter + for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) { + snprintf(sname, 100, "core%d-lsu_dcache_adapter%d", core_id, i); + lsu_dcache_adapter_.at(i) = LsuMemAdapter::Create(sname, DCACHE_CHANNELS, 1); + } + + // create lsu lmem adapter + for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) { + snprintf(sname, 100, "core%d-lsu_lmem_adapter%d", core_id, i); + lsu_lmem_adapter_.at(i) = LsuMemAdapter::Create(sname, LSU_CHANNELS, 1); } // connect lsu demux + for (uint32_t b = 0; b < NUM_LSU_BLOCKS; ++b) { + lsu_demux_.at(b)->ReqDC.bind(&mem_coalescers_.at(b)->ReqIn); + mem_coalescers_.at(b)->RspIn.bind(&lsu_demux_.at(b)->RspDC); + + lsu_demux_.at(b)->ReqLmem.bind(&lsu_lmem_adapter_.at(b)->ReqIn); + lsu_lmem_adapter_.at(b)->RspIn.bind(&lsu_demux_.at(b)->RspLmem); + } + + // connect coalescer-adapter + for (uint32_t b = 0; b < NUM_LSU_BLOCKS; ++b) { + mem_coalescers_.at(b)->ReqOut.bind(&lsu_dcache_adapter_.at(b)->ReqIn); + lsu_dcache_adapter_.at(b)->RspIn.bind(&mem_coalescers_.at(b)->RspOut); + } + + // connect adapter-dcache + for (uint32_t b = 0; b < NUM_LSU_BLOCKS; ++b) { + for (uint32_t c = 0; c < DCACHE_CHANNELS; ++c) { + uint32_t i = b * DCACHE_CHANNELS + c; + lsu_dcache_adapter_.at(b)->ReqOut.at(c).bind(&dcache_req_ports.at(i)); + dcache_rsp_ports.at(i).bind(&lsu_dcache_adapter_.at(b)->RspOut.at(c)); + } + } + + // connect adapter-lmem for (uint32_t b = 0; b < NUM_LSU_BLOCKS; ++b) { for (uint32_t c = 0; c < LSU_CHANNELS; ++c) { uint32_t i = b * LSU_CHANNELS + c; - auto lmem_demux = lsu_demux_.at(i); - - lmem_demux->ReqDC.bind(&mem_coalescers_.at(b)->ReqIn.at(c)); - mem_coalescers_.at(b)->RspIn.at(c).bind(&lmem_demux->RspDC); - - lmem_demux->ReqSM.bind(&local_mem_->Inputs.at(i)); - local_mem_->Outputs.at(i).bind(&lmem_demux->RspSM); + lsu_lmem_adapter_.at(b)->ReqOut.at(c).bind(&local_mem_->Inputs.at(i)); + local_mem_->Outputs.at(i).bind(&lsu_lmem_adapter_.at(b)->RspOut.at(c)); } } @@ -264,69 +288,72 @@ void Core::issue() { // issue ibuffer instructions for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) { - uint32_t ii = (ibuffer_idx_ + i) % ibuffers_.size(); - auto& ibuffer = ibuffers_.at(ii); - if (ibuffer.empty()) - continue; - - auto trace = ibuffer.top(); - - // check scoreboard - if (scoreboard_.in_use(trace)) { - auto uses = scoreboard_.get_uses(trace); - if (!trace->log_once(true)) { - DTH(4, "*** scoreboard-stall: dependents={"); + bool has_instrs = false; + bool found_match = false; + for (uint32_t w = 0; w < PER_ISSUE_WARPS; ++w) { + uint32_t kk = (ibuffer_idx_ + w) % PER_ISSUE_WARPS; + uint32_t ii = kk * ISSUE_WIDTH + i; + auto& ibuffer = ibuffers_.at(ii); + if (ibuffer.empty()) + continue; + // check scoreboard + has_instrs = true; + auto trace = ibuffer.top(); + if (scoreboard_.in_use(trace)) { + auto uses = scoreboard_.get_uses(trace); + if (!trace->log_once(true)) { + DTH(4, "*** scoreboard-stall: dependents={"); + for (uint32_t j = 0, n = uses.size(); j < n; ++j) { + auto& use = uses.at(j); + __unused (use); + if (j) DTN(4, ", "); + DTN(4, use.reg_type << use.reg_id << "(#" << use.uuid << ")"); + } + DTN(4, "}, " << *trace << std::endl); + } for (uint32_t j = 0, n = uses.size(); j < n; ++j) { auto& use = uses.at(j); - __unused (use); - if (j) DTN(4, ", "); - DTN(4, use.reg_type << use.reg_id << "(#" << use.uuid << ")"); - } - DTN(4, "}, " << *trace << std::endl); - } - for (uint32_t j = 0, n = uses.size(); j < n; ++j) { - auto& use = uses.at(j); - switch (use.fu_type) { - case FUType::ALU: ++perf_stats_.scrb_alu; break; - case FUType::FPU: ++perf_stats_.scrb_fpu; break; - case FUType::LSU: ++perf_stats_.scrb_lsu; break; - case FUType::SFU: { - ++perf_stats_.scrb_sfu; - switch (use.sfu_type) { - case SfuType::TMC: - case SfuType::WSPAWN: - case SfuType::SPLIT: - case SfuType::JOIN: - case SfuType::BAR: - case SfuType::PRED: ++perf_stats_.scrb_wctl; break; - case SfuType::CSRRW: - case SfuType::CSRRS: - case SfuType::CSRRC: ++perf_stats_.scrb_csrs; break; + switch (use.fu_type) { + case FUType::ALU: ++perf_stats_.scrb_alu; break; + case FUType::FPU: ++perf_stats_.scrb_fpu; break; + case FUType::LSU: ++perf_stats_.scrb_lsu; break; + case FUType::SFU: { + ++perf_stats_.scrb_sfu; + switch (use.sfu_type) { + case SfuType::TMC: + case SfuType::WSPAWN: + case SfuType::SPLIT: + case SfuType::JOIN: + case SfuType::BAR: + case SfuType::PRED: ++perf_stats_.scrb_wctl; break; + case SfuType::CSRRW: + case SfuType::CSRRS: + case SfuType::CSRRC: ++perf_stats_.scrb_csrs; break; + default: assert(false); + } + } break; default: assert(false); } - } break; - default: assert(false); } + } else { + trace->log_once(false); + // update scoreboard + DT(3, "pipeline-scoreboard: " << *trace); + if (trace->wb) { + scoreboard_.reserve(trace); + } + // to operand stage + operands_.at(i)->Input.push(trace, 2); + ibuffer.pop(); + found_match = true; + break; } + } + if (has_instrs && !found_match) { ++perf_stats_.scrb_stalls; - continue; - } else { - trace->log_once(false); } - - // update scoreboard - if (trace->wb) { - scoreboard_.reserve(trace); - } - - DT(3, "pipeline-scoreboard: " << *trace); - - // to operand stage - operands_.at(i)->Input.push(trace, 1); - - ibuffer.pop(); } - ibuffer_idx_ += ISSUE_WIDTH; + ++ibuffer_idx_; } void Core::execute() { @@ -337,7 +364,7 @@ void Core::execute() { if (dispatch->Outputs.at(j).empty()) continue; auto trace = dispatch->Outputs.at(j).front(); - func_unit->Inputs.at(j).push(trace, 1); + func_unit->Inputs.at(j).push(trace, 2); dispatch->Outputs.at(j).pop(); } } @@ -366,6 +393,11 @@ void Core::commit() { perf_stats_.instrs += trace->tmask.count(); } + perf_stats_.opds_stalls = 0; + for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) { + perf_stats_.opds_stalls += operands_.at(i)->total_stalls(); + } + commit_arb->Outputs.at(0).pop(); // delete the trace diff --git a/sim/simx/core.h b/sim/simx/core.h index 42f72e552..339d76fb8 100644 --- a/sim/simx/core.h +++ b/sim/simx/core.h @@ -45,12 +45,13 @@ public: uint64_t sched_stalls; uint64_t ibuf_stalls; uint64_t scrb_stalls; + uint64_t opds_stalls; uint64_t scrb_alu; uint64_t scrb_fpu; uint64_t scrb_lsu; uint64_t scrb_sfu; - uint64_t scrb_wctl; uint64_t scrb_csrs; + uint64_t scrb_wctl; uint64_t ifetches; uint64_t loads; uint64_t stores; @@ -64,12 +65,13 @@ public: , sched_stalls(0) , ibuf_stalls(0) , scrb_stalls(0) + , opds_stalls(0) , scrb_alu(0) , scrb_fpu(0) , scrb_lsu(0) , scrb_sfu(0) - , scrb_wctl(0) , scrb_csrs(0) + , scrb_wctl(0) , ifetches(0) , loads(0) , stores(0) @@ -154,6 +156,8 @@ private: LocalMem::Ptr local_mem_; std::vector lsu_demux_; std::vector mem_coalescers_; + std::vector lsu_dcache_adapter_; + std::vector lsu_lmem_adapter_; PipelineLatch fetch_latch_; PipelineLatch decode_latch_; diff --git a/sim/simx/decode.cpp b/sim/simx/decode.cpp index f934524c3..6cefe378f 100644 --- a/sim/simx/decode.cpp +++ b/sim/simx/decode.cpp @@ -623,7 +623,7 @@ std::shared_ptr Emulator::decode(uint32_t code) const { instr->setDestReg(rd, RegType::Integer); auto imm = (code >> shift_func3) << shift_func3; instr->setImm(imm); - } break; + } break; case InstType::J: { instr->setDestReg(rd, RegType::Integer); diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 9e96bef2f..503e21cd9 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -42,8 +42,8 @@ Emulator::ipdom_entry_t::ipdom_entry_t(const ThreadMask &tmask) {} Emulator::warp_t::warp_t(const Arch& arch) - : ireg_file(arch.num_threads(), std::vector(arch.num_regs())) - , freg_file(arch.num_threads(), std::vector(arch.num_regs())) + : ireg_file(arch.num_threads(), std::vector(MAX_NUM_REGS)) + , freg_file(arch.num_threads(), std::vector(MAX_NUM_REGS)) , uuid(0) {} @@ -74,6 +74,7 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core) , core_(core) , warps_(arch.num_warps(), arch) , barriers_(arch.num_barriers(), 0) + , ipdom_size_((arch.num_threads()-1) * 2) { this->clear(); } @@ -186,7 +187,7 @@ instr_trace_t* Emulator::step() { this->execute(*instr, scheduled_warp, trace); DP(5, "Register state:"); - for (uint32_t i = 0; i < arch_.num_regs(); ++i) { + for (uint32_t i = 0; i < MAX_NUM_REGS; ++i) { DPN(5, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':'); // Integer register file for (uint32_t j = 0; j < arch_.num_threads(); ++j) { @@ -467,12 +468,13 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { CSR_READ_64(VX_CSR_MPM_SCHED_ST, core_perf.sched_stalls); CSR_READ_64(VX_CSR_MPM_IBUF_ST, core_perf.ibuf_stalls); CSR_READ_64(VX_CSR_MPM_SCRB_ST, core_perf.scrb_stalls); + CSR_READ_64(VX_CSR_MPM_OPDS_ST, core_perf.opds_stalls); CSR_READ_64(VX_CSR_MPM_SCRB_ALU, core_perf.scrb_alu); CSR_READ_64(VX_CSR_MPM_SCRB_FPU, core_perf.scrb_fpu); CSR_READ_64(VX_CSR_MPM_SCRB_LSU, core_perf.scrb_lsu); CSR_READ_64(VX_CSR_MPM_SCRB_SFU, core_perf.scrb_sfu); - CSR_READ_64(VX_CSR_MPM_SCRB_WCTL, core_perf.scrb_wctl); CSR_READ_64(VX_CSR_MPM_SCRB_CSRS, core_perf.scrb_csrs); + CSR_READ_64(VX_CSR_MPM_SCRB_WCTL, core_perf.scrb_wctl); CSR_READ_64(VX_CSR_MPM_IFETCHES, core_perf.ifetches); CSR_READ_64(VX_CSR_MPM_LOADS, core_perf.loads); CSR_READ_64(VX_CSR_MPM_STORES, core_perf.stores); diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index 47744c6d5..0b2d6ac03 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -122,6 +122,7 @@ private: std::vector barriers_; std::unordered_map print_bufs_; MemoryUnit mmu_; + uint32_t ipdom_size_; Word csr_mscratch_; wspawn_t wspawn_; }; diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index e0fc2b94a..a037d995c 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -63,8 +63,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { trace->wid = wid; trace->PC = warp.PC; trace->tmask = warp.tmask; - trace->rdest = instr.getRDest(); - trace->rdest_type = instr.getRDType(); + trace->dst_reg = {instr.getRDType(), instr.getRDest()}; auto next_pc = warp.PC + 4; auto next_tmask = warp.tmask; @@ -128,7 +127,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { } DPN(2, "}" << std::endl); break; - case RegType::None: + default: break; } } @@ -164,8 +163,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { case Opcode::R: { trace->fu_type = FUType::ALU; trace->alu_type = AluType::ARITH; - trace->used_iregs.set(rsrc0); - trace->used_iregs.set(rsrc1); + trace->src_regs[0] = {RegType::Integer, rsrc0}; + trace->src_regs[1] = {RegType::Integer, rsrc1}; for (uint32_t t = thread_start; t < num_threads; ++t) { if (!warp.tmask.test(t)) continue; @@ -341,7 +340,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { case Opcode::I: { trace->fu_type = FUType::ALU; trace->alu_type = AluType::ARITH; - trace->used_iregs.set(rsrc0); + trace->src_regs[0] = {RegType::Integer, rsrc0}; for (uint32_t t = thread_start; t < num_threads; ++t) { if (!warp.tmask.test(t)) continue; @@ -401,8 +400,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { case Opcode::R_W: { trace->fu_type = FUType::ALU; trace->alu_type = AluType::ARITH; - trace->used_iregs.set(rsrc0); - trace->used_iregs.set(rsrc1); + trace->src_regs[0] = {RegType::Integer, rsrc0}; + trace->src_regs[1] = {RegType::Integer, rsrc1}; for (uint32_t t = thread_start; t < num_threads; ++t) { if (!warp.tmask.test(t)) continue; @@ -528,7 +527,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { case Opcode::I_W: { trace->fu_type = FUType::ALU; trace->alu_type = AluType::ARITH; - trace->used_iregs.set(rsrc0); + trace->src_regs[0] = {RegType::Integer, rsrc0}; for (uint32_t t = thread_start; t < num_threads; ++t) { if (!warp.tmask.test(t)) continue; @@ -571,8 +570,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { case Opcode::B: { trace->fu_type = FUType::ALU; trace->alu_type = AluType::BRANCH; - trace->used_iregs.set(rsrc0); - trace->used_iregs.set(rsrc1); + trace->src_regs[0] = {RegType::Integer, rsrc0}; + trace->src_regs[1] = {RegType::Integer, rsrc1}; bool all_taken = false; for (uint32_t t = thread_start; t < num_threads; ++t) { if (!warp.tmask.test(t)) @@ -660,7 +659,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { // RV32I: JALR trace->fu_type = FUType::ALU; trace->alu_type = AluType::BRANCH; - trace->used_iregs.set(rsrc0); + trace->src_regs[0] = {RegType::Integer, rsrc0}; for (uint32_t t = thread_start; t < num_threads; ++t) { if (!warp.tmask.test(t)) continue; @@ -675,7 +674,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { case Opcode::FL: { trace->fu_type = FUType::LSU; trace->lsu_type = LsuType::LOAD; - trace->used_iregs.set(rsrc0); + trace->src_regs[0] = {RegType::Integer, rsrc0}; auto trace_data = std::make_shared(num_threads); trace->data = trace_data; uint32_t data_bytes = 1 << (func3 & 0x3); @@ -719,8 +718,9 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { case Opcode::FS: { trace->fu_type = FUType::LSU; trace->lsu_type = LsuType::STORE; - trace->used_iregs.set(rsrc0); - trace->used_iregs.set(rsrc1); + auto data_type = (opcode == Opcode::FS) ? RegType::Float : RegType::Integer; + trace->src_regs[0] = {RegType::Integer, rsrc0}; + trace->src_regs[1] = {data_type, rsrc1}; auto trace_data = std::make_shared(num_threads); trace->data = trace_data; uint32_t data_bytes = 1 << (func3 & 0x3); @@ -746,8 +746,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { case Opcode::AMO: { trace->fu_type = FUType::LSU; trace->lsu_type = LsuType::LOAD; - trace->used_iregs.set(rsrc0); - trace->used_iregs.set(rsrc1); + trace->src_regs[0] = {RegType::Integer, rsrc0}; + trace->src_regs[1] = {RegType::Integer, rsrc1}; auto trace_data = std::make_shared(num_threads); trace->data = trace_data; auto amo_type = func7 >> 2; @@ -839,14 +839,15 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { } } else { trace->fu_type = FUType::SFU; - trace->fetch_stall = true; + // stall the fetch stage for FPU CSRs + trace->fetch_stall = (csr_addr <= VX_CSR_FCSR); csr_value = this->get_csr(csr_addr, t, wid); switch (func3) { case 1: { // RV32I: CSRRW rddata[t].i = csr_value; this->set_csr(csr_addr, rsdata[t][0].i, t, wid); - trace->used_iregs.set(rsrc0); + trace->src_regs[0] = {RegType::Integer, rsrc0}; trace->sfu_type = SfuType::CSRRW; rd_write = true; break; @@ -857,7 +858,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { if (rsdata[t][0].i != 0) { this->set_csr(csr_addr, csr_value | rsdata[t][0].i, t, wid); } - trace->used_iregs.set(rsrc0); + trace->src_regs[0] = {RegType::Integer, rsrc0}; trace->sfu_type = SfuType::CSRRS; rd_write = true; break; @@ -868,7 +869,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { if (rsdata[t][0].i != 0) { this->set_csr(csr_addr, csr_value & ~rsdata[t][0].i, t, wid); } - trace->used_iregs.set(rsrc0); + trace->src_regs[0] = {RegType::Integer, rsrc0}; trace->sfu_type = SfuType::CSRRC; rd_write = true; break; @@ -925,57 +926,57 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { case 0x00: { // RV32F: FADD.S rddata[t].u64 = nan_box(rv_fadd_s(check_boxing(rsdata[t][0].u64), check_boxing(rsdata[t][1].u64), frm, &fflags)); trace->fpu_type = FpuType::FMA; - trace->used_fregs.set(rsrc0); - trace->used_fregs.set(rsrc1); + trace->src_regs[0] = {RegType::Float, rsrc0}; + trace->src_regs[1] = {RegType::Float, rsrc1}; break; } case 0x01: { // RV32D: FADD.D rddata[t].u64 = rv_fadd_d(rsdata[t][0].u64, rsdata[t][1].u64, frm, &fflags); trace->fpu_type = FpuType::FMA; - trace->used_fregs.set(rsrc0); - trace->used_fregs.set(rsrc1); + trace->src_regs[0] = {RegType::Float, rsrc0}; + trace->src_regs[1] = {RegType::Float, rsrc1}; break; } case 0x04: { // RV32F: FSUB.S rddata[t].u64 = nan_box(rv_fsub_s(check_boxing(rsdata[t][0].u64), check_boxing(rsdata[t][1].u64), frm, &fflags)); trace->fpu_type = FpuType::FMA; - trace->used_fregs.set(rsrc0); - trace->used_fregs.set(rsrc1); + trace->src_regs[0] = {RegType::Float, rsrc0}; + trace->src_regs[1] = {RegType::Float, rsrc1}; break; } case 0x05: { // RV32D: FSUB.D rddata[t].u64 = rv_fsub_d(rsdata[t][0].u64, rsdata[t][1].u64, frm, &fflags); trace->fpu_type = FpuType::FMA; - trace->used_fregs.set(rsrc0); - trace->used_fregs.set(rsrc1); + trace->src_regs[0] = {RegType::Float, rsrc0}; + trace->src_regs[1] = {RegType::Float, rsrc1}; break; } case 0x08: { // RV32F: FMUL.S rddata[t].u64 = nan_box(rv_fmul_s(check_boxing(rsdata[t][0].u64), check_boxing(rsdata[t][1].u64), frm, &fflags)); trace->fpu_type = FpuType::FMA; - trace->used_fregs.set(rsrc0); - trace->used_fregs.set(rsrc1); + trace->src_regs[0] = {RegType::Float, rsrc0}; + trace->src_regs[1] = {RegType::Float, rsrc1}; break; } case 0x09: { // RV32D: FMUL.D rddata[t].u64 = rv_fmul_d(rsdata[t][0].u64, rsdata[t][1].u64, frm, &fflags); trace->fpu_type = FpuType::FMA; - trace->used_fregs.set(rsrc0); - trace->used_fregs.set(rsrc1); + trace->src_regs[0] = {RegType::Float, rsrc0}; + trace->src_regs[1] = {RegType::Float, rsrc1}; break; } case 0x0c: { // RV32F: FDIV.S rddata[t].u64 = nan_box(rv_fdiv_s(check_boxing(rsdata[t][0].u64), check_boxing(rsdata[t][1].u64), frm, &fflags)); trace->fpu_type = FpuType::FDIV; - trace->used_fregs.set(rsrc0); - trace->used_fregs.set(rsrc1); + trace->src_regs[0] = {RegType::Float, rsrc0}; + trace->src_regs[1] = {RegType::Float, rsrc1}; break; } case 0x0d: { // RV32D: FDIV.D rddata[t].u64 = rv_fdiv_d(rsdata[t][0].u64, rsdata[t][1].u64, frm, &fflags); trace->fpu_type = FpuType::FDIV; - trace->used_fregs.set(rsrc0); - trace->used_fregs.set(rsrc1); + trace->src_regs[0] = {RegType::Float, rsrc0}; + trace->src_regs[1] = {RegType::Float, rsrc1}; break; } case 0x10: { @@ -991,8 +992,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { break; } trace->fpu_type = FpuType::FNCP; - trace->used_fregs.set(rsrc0); - trace->used_fregs.set(rsrc1); + trace->src_regs[0] = {RegType::Float, rsrc0}; + trace->src_regs[1] = {RegType::Float, rsrc1}; break; } case 0x11: { @@ -1008,8 +1009,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { break; } trace->fpu_type = FpuType::FNCP; - trace->used_fregs.set(rsrc0); - trace->used_fregs.set(rsrc1); + trace->src_regs[0] = {RegType::Float, rsrc0}; + trace->src_regs[1] = {RegType::Float, rsrc1}; break; } case 0x14: { @@ -1021,8 +1022,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { rddata[t].u64 = nan_box(rv_fmin_s(check_boxing(rsdata[t][0].u64), check_boxing(rsdata[t][1].u64), &fflags)); } trace->fpu_type = FpuType::FNCP; - trace->used_fregs.set(rsrc0); - trace->used_fregs.set(rsrc1); + trace->src_regs[0] = {RegType::Float, rsrc0}; + trace->src_regs[1] = {RegType::Float, rsrc1}; break; } case 0x15: { @@ -1034,34 +1035,34 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { rddata[t].u64 = rv_fmin_d(rsdata[t][0].u64, rsdata[t][1].u64, &fflags); } trace->fpu_type = FpuType::FNCP; - trace->used_fregs.set(rsrc0); - trace->used_fregs.set(rsrc1); + trace->src_regs[0] = {RegType::Float, rsrc0}; + trace->src_regs[1] = {RegType::Float, rsrc1}; break; } case 0x20: { // RV32D: FCVT.S.D rddata[t].u64 = nan_box(rv_dtof(rsdata[t][0].u64)); trace->fpu_type = FpuType::FNCP; - trace->used_fregs.set(rsrc0); + trace->src_regs[0] = {RegType::Float, rsrc0}; break; } case 0x21: { // RV32D: FCVT.D.S rddata[t].u64 = rv_ftod(check_boxing(rsdata[t][0].u64)); trace->fpu_type = FpuType::FNCP; - trace->used_fregs.set(rsrc0); + trace->src_regs[0] = {RegType::Float, rsrc0}; break; } case 0x2c: { // RV32F: FSQRT.S rddata[t].u64 = nan_box(rv_fsqrt_s(check_boxing(rsdata[t][0].u64), frm, &fflags)); trace->fpu_type = FpuType::FSQRT; - trace->used_fregs.set(rsrc0); + trace->src_regs[0] = {RegType::Float, rsrc0}; break; } case 0x2d: { // RV32D: FSQRT.D rddata[t].u64 = rv_fsqrt_d(rsdata[t][0].u64, frm, &fflags); trace->fpu_type = FpuType::FSQRT; - trace->used_fregs.set(rsrc0); + trace->src_regs[0] = {RegType::Float, rsrc0}; break; } case 0x50: { @@ -1080,8 +1081,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { break; } trace->fpu_type = FpuType::FNCP; - trace->used_fregs.set(rsrc0); - trace->used_fregs.set(rsrc1); + trace->src_regs[0] = {RegType::Float, rsrc0}; + trace->src_regs[1] = {RegType::Float, rsrc1}; break; } case 0x51: { @@ -1100,8 +1101,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { break; } trace->fpu_type = FpuType::FNCP; - trace->used_fregs.set(rsrc0); - trace->used_fregs.set(rsrc1); + trace->src_regs[0] = {RegType::Float, rsrc0}; + trace->src_regs[1] = {RegType::Float, rsrc1}; break; } case 0x60: { @@ -1124,7 +1125,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { break; } trace->fpu_type = FpuType::FCVT; - trace->used_fregs.set(rsrc0); + trace->src_regs[0] = {RegType::Float, rsrc0}; break; } case 0x61: { @@ -1147,7 +1148,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { break; } trace->fpu_type = FpuType::FCVT; - trace->used_fregs.set(rsrc0); + trace->src_regs[0] = {RegType::Float, rsrc0}; break; } case 0x68: { @@ -1170,7 +1171,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { break; } trace->fpu_type = FpuType::FCVT; - trace->used_iregs.set(rsrc0); + trace->src_regs[0] = {RegType::Integer, rsrc0}; break; } case 0x69: { @@ -1193,7 +1194,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { break; } trace->fpu_type = FpuType::FCVT; - trace->used_iregs.set(rsrc0); + trace->src_regs[0] = {RegType::Integer, rsrc0}; break; } case 0x70: { @@ -1206,7 +1207,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { rddata[t].i = sext((uint64_t)result, 32); } trace->fpu_type = FpuType::FNCP; - trace->used_fregs.set(rsrc0); + trace->src_regs[0] = {RegType::Float, rsrc0}; break; } case 0x71: { @@ -1218,19 +1219,19 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { rddata[t].i = rsdata[t][0].u64; } trace->fpu_type = FpuType::FNCP; - trace->used_fregs.set(rsrc0); + trace->src_regs[0] = {RegType::Float, rsrc0}; break; } case 0x78: { // RV32F: FMV.S.X rddata[t].u64 = nan_box((uint32_t)rsdata[t][0].i); trace->fpu_type = FpuType::FNCP; - trace->used_iregs.set(rsrc0); + trace->src_regs[0] = {RegType::Integer, rsrc0}; break; } case 0x79: { // RV64D: FMV.D.X rddata[t].u64 = rsdata[t][0].i; trace->fpu_type = FpuType::FNCP; - trace->used_iregs.set(rsrc0); + trace->src_regs[0] = {RegType::Integer, rsrc0}; break; } } @@ -1244,9 +1245,9 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { case Opcode::FMNMADD: case Opcode::FMNMSUB: { trace->fpu_type = FpuType::FMA; - trace->used_fregs.set(rsrc0); - trace->used_fregs.set(rsrc1); - trace->used_fregs.set(rsrc2); + trace->src_regs[0] = {RegType::Float, rsrc0}; + trace->src_regs[1] = {RegType::Float, rsrc1}; + trace->src_regs[2] = {RegType::Float, rsrc2}; for (uint32_t t = thread_start; t < num_threads; ++t) { if (!warp.tmask.test(t)) continue; @@ -1301,7 +1302,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { // TMC trace->fu_type = FUType::SFU; trace->sfu_type = SfuType::TMC; - trace->used_iregs.set(rsrc0); + trace->src_regs[0] = {RegType::Integer, rsrc0}; trace->fetch_stall = true; next_tmask.reset(); for (uint32_t t = 0; t < num_threads; ++t) { @@ -1312,8 +1313,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { // WSPAWN trace->fu_type = FUType::SFU; trace->sfu_type = SfuType::WSPAWN; - trace->used_iregs.set(rsrc0); - trace->used_iregs.set(rsrc1); + trace->src_regs[0] = {RegType::Integer, rsrc0}; + trace->src_regs[1] = {RegType::Integer, rsrc1}; trace->fetch_stall = true; trace->data = std::make_shared(rsdata.at(thread_last)[0].i, rsdata.at(thread_last)[1].i); } break; @@ -1321,7 +1322,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { // SPLIT trace->fu_type = FUType::SFU; trace->sfu_type = SfuType::SPLIT; - trace->used_iregs.set(rsrc0); + trace->src_regs[0] = {RegType::Integer, rsrc0}; trace->fetch_stall = true; auto stack_size = warp.ipdom_stack.size(); @@ -1336,7 +1337,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { bool is_divergent = then_tmask.any() && else_tmask.any(); if (is_divergent) { - if (stack_size == arch_.ipdom_size()) { + if (stack_size == ipdom_size_) { std::cout << "IPDOM stack is full! size=" << std::dec << stack_size << ", PC=0x" << std::hex << warp.PC << " (#" << std::dec << trace->uuid << ")\n" << std::flush; std::abort(); } @@ -1362,7 +1363,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { // JOIN trace->fu_type = FUType::SFU; trace->sfu_type = SfuType::JOIN; - trace->used_iregs.set(rsrc0); + trace->src_regs[0] = {RegType::Integer, rsrc0}; trace->fetch_stall = true; auto stack_ptr = warp.ireg_file.at(thread_last).at(rsrc0); @@ -1382,8 +1383,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { // BAR trace->fu_type = FUType::SFU; trace->sfu_type = SfuType::BAR; - trace->used_iregs.set(rsrc0); - trace->used_iregs.set(rsrc1); + trace->src_regs[0] = {RegType::Integer, rsrc0}; + trace->src_regs[1] = {RegType::Integer, rsrc1}; trace->fetch_stall = true; trace->data = std::make_shared(rsdata[thread_last][0].i, rsdata[thread_last][1].i); } break; @@ -1391,8 +1392,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { // PRED trace->fu_type = FUType::SFU; trace->sfu_type = SfuType::PRED; - trace->used_iregs.set(rsrc0); - trace->used_iregs.set(rsrc1); + trace->src_regs[0] = {RegType::Integer, rsrc0}; + trace->src_regs[1] = {RegType::Integer, rsrc1}; trace->fetch_stall = true; ThreadMask pred; auto not_pred = rdest & 0x1; @@ -1435,7 +1436,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { DPN(2, "0x" << std::hex << rddata[t].i); } DPN(2, "}" << std::endl); - trace->used_iregs[rdest] = 1; + trace->dst_reg = {type, rdest}; assert(rdest != 0); } else { // disable writes to x0 @@ -1454,7 +1455,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { DPN(2, "0x" << std::hex << rddata[t].f); } DPN(2, "}" << std::endl); - trace->used_fregs[rdest] = 1; + trace->dst_reg = {type, rdest}; break; default: std::abort(); diff --git a/sim/simx/func_unit.cpp b/sim/simx/func_unit.cpp index c9a3f0fc7..b03551e08 100644 --- a/sim/simx/func_unit.cpp +++ b/sim/simx/func_unit.cpp @@ -24,7 +24,7 @@ using namespace vortex; -AluUnit::AluUnit(const SimContext& ctx, Core* core) : FuncUnit(ctx, core, "ALU") {} +AluUnit::AluUnit(const SimContext& ctx, Core* core) : FuncUnit(ctx, core, "alu-unit") {} void AluUnit::tick() { for (uint32_t iw = 0; iw < ISSUE_WIDTH; ++iw) { @@ -33,20 +33,23 @@ void AluUnit::tick() { continue; auto& output = Outputs.at(iw); auto trace = input.front(); + int delay = 2; switch (trace->alu_type) { case AluType::ARITH: case AluType::BRANCH: case AluType::SYSCALL: + output.push(trace, 2+delay); + break; case AluType::IMUL: - output.push(trace, LATENCY_IMUL+1); + output.push(trace, LATENCY_IMUL+delay); break; case AluType::IDIV: - output.push(trace, XLEN+1); + output.push(trace, XLEN+delay); break; default: std::abort(); } - DT(3, "pipeline-execute: op=" << trace->alu_type << ", " << *trace); + DT(3, this->name() << ": op=" << trace->alu_type << ", " << *trace); if (trace->eop && trace->fetch_stall) { core_->resume(trace->wid); } @@ -56,7 +59,7 @@ void AluUnit::tick() { /////////////////////////////////////////////////////////////////////////////// -FpuUnit::FpuUnit(const SimContext& ctx, Core* core) : FuncUnit(ctx, core, "FPU") {} +FpuUnit::FpuUnit(const SimContext& ctx, Core* core) : FuncUnit(ctx, core, "fpu-unit") {} void FpuUnit::tick() { for (uint32_t iw = 0; iw < ISSUE_WIDTH; ++iw) { @@ -65,26 +68,27 @@ void FpuUnit::tick() { continue; auto& output = Outputs.at(iw); auto trace = input.front(); + int delay = 2; switch (trace->fpu_type) { case FpuType::FNCP: - output.push(trace, 2); + output.push(trace, 2+delay); break; case FpuType::FMA: - output.push(trace, LATENCY_FMA+1); + output.push(trace, LATENCY_FMA+delay); break; case FpuType::FDIV: - output.push(trace, LATENCY_FDIV+1); + output.push(trace, LATENCY_FDIV+delay); break; case FpuType::FSQRT: - output.push(trace, LATENCY_FSQRT+1); + output.push(trace, LATENCY_FSQRT+delay); break; case FpuType::FCVT: - output.push(trace, LATENCY_FCVT+1); + output.push(trace, LATENCY_FCVT+delay); break; default: std::abort(); } - DT(3, "pipeline-execute: op=" << trace->fpu_type << ", " << *trace); + DT(3,this->name() << ": op=" << trace->fpu_type << ", " << *trace); input.pop(); } } @@ -92,7 +96,7 @@ void FpuUnit::tick() { /////////////////////////////////////////////////////////////////////////////// LsuUnit::LsuUnit(const SimContext& ctx, Core* core) - : FuncUnit(ctx, core, "LSU") + : FuncUnit(ctx, core, "lsu-unit") , pending_loads_(0) {} @@ -110,25 +114,25 @@ void LsuUnit::tick() { core_->perf_stats_.load_latency += pending_loads_; // handle memory responses - for (uint32_t r = 0; r < LSU_NUM_REQS; ++r) { - auto& dcache_rsp_port = core_->lsu_demux_.at(r)->RspIn; - if (dcache_rsp_port.empty()) + for (uint32_t b = 0; b < NUM_LSU_BLOCKS; ++b) { + auto& lsu_rsp_port = core_->lsu_demux_.at(b)->RspIn; + if (lsu_rsp_port.empty()) continue; - uint32_t block_idx = r / LSU_CHANNELS; - auto& state = states_.at(block_idx); - auto& mem_rsp = dcache_rsp_port.front(); - auto& entry = state.pending_rd_reqs.at(mem_rsp.tag); + auto& state = states_.at(b); + auto& lsu_rsp = lsu_rsp_port.front(); + DT(3, this->name() << " mem-rsp: " << lsu_rsp); + auto& entry = state.pending_rd_reqs.at(lsu_rsp.tag); auto trace = entry.trace; - DT(3, "mem-rsp: tag=" << mem_rsp.tag << ", type=" << trace->lsu_type << ", rid=" << r << ", " << *trace); - assert(entry.count); - --entry.count; // track remaining addresses - if (0 == entry.count) { + assert(!entry.mask.none()); + entry.mask &= ~lsu_rsp.mask; // track remaining + if (entry.mask.none()) { + // whole response received, release trace int iw = trace->wid % ISSUE_WIDTH; Outputs.at(iw).push(trace, 1); - state.pending_rd_reqs.release(mem_rsp.tag); + state.pending_rd_reqs.release(lsu_rsp.tag); } - dcache_rsp_port.pop(); - --pending_loads_; + pending_loads_ -= lsu_rsp.mask.count(); + lsu_rsp_port.pop(); } // handle LSU requests @@ -141,7 +145,7 @@ void LsuUnit::tick() { continue; Outputs.at(iw).push(state.fence_trace, 1); state.fence_lock = false; - DT(3, "fence-unlock: " << state.fence_trace); + DT(3, this->name() << " fence-unlock: " << state.fence_trace); } // check input queue @@ -149,14 +153,13 @@ void LsuUnit::tick() { if (input.empty()) continue; - auto& output = Outputs.at(iw); auto trace = input.front(); if (trace->lsu_type == LsuType::FENCE) { // schedule fence lock state.fence_trace = trace; state.fence_lock = true; - DT(3, "fence-lock: " << *trace); + DT(3, this->name() << " fence-lock: " << *trace); // remove input input.pop(); continue; @@ -167,28 +170,50 @@ void LsuUnit::tick() { // check pending queue capacity if (!is_write && state.pending_rd_reqs.full()) { if (!trace->log_once(true)) { - DT(4, "*** " << this->name() << "-queue-full: " << *trace); + DT(4, "*** " << this->name() << " queue-full: " << *trace); } continue; } else { trace->log_once(false); } + // build memory request + LsuReq lsu_req(NUM_LSU_LANES); + lsu_req.write = is_write; + { + auto trace_data = std::dynamic_pointer_cast(trace->data); + auto t0 = trace->pid * NUM_LSU_LANES; + for (uint32_t i = 0; i < NUM_LSU_LANES; ++i) { + if (trace->tmask.test(t0 + i)) { + lsu_req.mask.set(i); + lsu_req.addrs.at(i) = trace_data->mem_addrs.at(t0 + i).addr; + } + } + } uint32_t tag = 0; if (!is_write) { - tag = state.pending_rd_reqs.allocate({trace, 0}); + tag = state.pending_rd_reqs.allocate({trace, lsu_req.mask}); } + lsu_req.tag = tag; + lsu_req.cid = trace->cid; + lsu_req.uuid = trace->uuid; // send memory request - auto num_reqs = this->send_requests(trace, block_idx, tag); + core_->lsu_demux_.at(block_idx)->ReqIn.push(lsu_req); + DT(3, this->name() << " mem-req: " << lsu_req); - if (!is_write) { - state.pending_rd_reqs.at(tag).count = num_reqs; + // update stats + auto num_addrs = lsu_req.mask.count(); + if (is_write) { + core_->perf_stats_.stores += num_addrs; + } else { + core_->perf_stats_.loads += num_addrs; + pending_loads_ += num_addrs; } // do not wait on writes if (is_write) { - output.push(trace, 1); + Outputs.at(iw).push(trace, 1); } // remove input @@ -196,52 +221,10 @@ void LsuUnit::tick() { } } -int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) { - int count = 0; - - auto trace_data = std::dynamic_pointer_cast(trace->data); - bool is_write = (trace->lsu_type == LsuType::STORE); - auto t0 = trace->pid * NUM_LSU_LANES; - - for (uint32_t i = 0; i < NUM_LSU_LANES; ++i) { - uint32_t t = t0 + i; - if (!trace->tmask.test(t)) - continue; - - int req_idx = block_idx * LSU_CHANNELS + (i % LSU_CHANNELS); - auto& dcache_req_port = core_->lsu_demux_.at(req_idx)->ReqIn; - - auto mem_addr = trace_data->mem_addrs.at(t); - auto type = get_addr_type(mem_addr.addr); - - MemReq mem_req; - mem_req.addr = mem_addr.addr; - mem_req.write = is_write; - mem_req.type = type; - mem_req.tag = tag; - mem_req.cid = trace->cid; - mem_req.uuid = trace->uuid; - - dcache_req_port.push(mem_req, 1); - DT(3, "mem-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << tag - << ", lsu_type=" << trace->lsu_type << ", rid=" << req_idx << ", addr_type=" << mem_req.type << ", " << *trace); - - if (is_write) { - ++core_->perf_stats_.stores; - } else { - ++core_->perf_stats_.loads; - ++pending_loads_; - } - - ++count; - } - return count; -} - /////////////////////////////////////////////////////////////////////////////// SfuUnit::SfuUnit(const SimContext& ctx, Core* core) - : FuncUnit(ctx, core, "SFU") + : FuncUnit(ctx, core, "sfu-unit") {} void SfuUnit::tick() { @@ -254,10 +237,10 @@ void SfuUnit::tick() { auto trace = input.front(); auto sfu_type = trace->sfu_type; bool release_warp = trace->fetch_stall; - + int delay = 2; switch (sfu_type) { case SfuType::WSPAWN: - output.push(trace, 1); + output.push(trace, 2+delay); if (trace->eop) { auto trace_data = std::dynamic_pointer_cast(trace->data); release_warp = core_->wspawn(trace_data->arg1, trace_data->arg2); @@ -270,10 +253,10 @@ void SfuUnit::tick() { case SfuType::CSRRW: case SfuType::CSRRS: case SfuType::CSRRC: - output.push(trace, 1); + output.push(trace, 2+delay); break; case SfuType::BAR: { - output.push(trace, 1); + output.push(trace, 2+delay); if (trace->eop) { auto trace_data = std::dynamic_pointer_cast(trace->data); release_warp = core_->barrier(trace_data->arg1, trace_data->arg2, trace->wid); @@ -283,7 +266,7 @@ void SfuUnit::tick() { std::abort(); } - DT(3, "pipeline-execute: op=" << trace->sfu_type << ", " << *trace); + DT(3, this->name() << ": op=" << trace->sfu_type << ", " << *trace); if (trace->eop && release_warp) { core_->resume(trace->wid); } diff --git a/sim/simx/func_unit.h b/sim/simx/func_unit.h index 45f0152ff..76dd16173 100644 --- a/sim/simx/func_unit.h +++ b/sim/simx/func_unit.h @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -26,13 +26,13 @@ public: std::vector> Inputs; std::vector> Outputs; - FuncUnit(const SimContext& ctx, Core* core, const char* name) - : SimObject(ctx, name) + FuncUnit(const SimContext& ctx, Core* core, const char* name) + : SimObject(ctx, name) , Inputs(ISSUE_WIDTH, this) , Outputs(ISSUE_WIDTH, this) , core_(core) {} - + virtual ~FuncUnit() {} virtual void reset() {} @@ -73,28 +73,26 @@ public: private: - int send_requests(instr_trace_t* trace, int block_idx, int tag); - - struct pending_req_t { + struct pending_req_t { instr_trace_t* trace; - uint32_t count; + BitVector<> mask; }; - struct lsu_state_t { + struct lsu_state_t { HashTable pending_rd_reqs; - instr_trace_t* fence_trace; + instr_trace_t* fence_trace; bool fence_lock; lsu_state_t() : pending_rd_reqs(LSUQ_IN_SIZE) {} - + void clear() { this->pending_rd_reqs.clear(); this->fence_trace = nullptr; this->fence_lock = false; } }; - - std::array states_; + + std::array states_; uint64_t pending_loads_; }; @@ -103,7 +101,7 @@ private: class SfuUnit : public FuncUnit { public: SfuUnit(const SimContext& ctx, Core*); - + void tick(); }; diff --git a/sim/simx/instr_trace.h b/sim/simx/instr_trace.h index 532b736f5..7f6b37580 100644 --- a/sim/simx/instr_trace.h +++ b/sim/simx/instr_trace.h @@ -45,6 +45,11 @@ struct SFUTraceData : public ITraceData { struct instr_trace_t { public: + struct reg_t { + RegType type; + uint32_t idx; + }; + //-- const uint64_t uuid; const Arch& arch; @@ -54,16 +59,13 @@ public: uint32_t wid; ThreadMask tmask; Word PC; - - //-- - uint32_t rdest; - RegType rdest_type; bool wb; //-- - RegMask used_iregs; - RegMask used_fregs; - RegMask used_vregs; + reg_t dst_reg; + + //-- + std::vector src_regs; //- FUType fu_type; @@ -79,7 +81,7 @@ public: ITraceData::Ptr data; - int pid; + int pid; bool sop; bool eop; @@ -92,12 +94,9 @@ public: , wid(0) , tmask(0) , PC(0) - , rdest(0) - , rdest_type(RegType::None) , wb(false) - , used_iregs(0) - , used_fregs(0) - , used_vregs(0) + , dst_reg({RegType::None, 0}) + , src_regs(NUM_SRC_REGS, {RegType::None, 0}) , fu_type(FUType::ALU) , unit_type(0) , data(nullptr) @@ -115,12 +114,9 @@ public: , wid(rhs.wid) , tmask(rhs.tmask) , PC(rhs.PC) - , rdest(rhs.rdest) - , rdest_type(rhs.rdest_type) , wb(rhs.wb) - , used_iregs(rhs.used_iregs) - , used_fregs(rhs.used_fregs) - , used_vregs(rhs.used_vregs) + , dst_reg(rhs.dst_reg) + , src_regs(rhs.src_regs) , fu_type(rhs.fu_type) , unit_type(rhs.unit_type) , data(rhs.data) @@ -152,8 +148,13 @@ inline std::ostream &operator<<(std::ostream &os, const instr_trace_t& trace) { } os << ", PC=0x" << std::hex << trace.PC; os << ", wb=" << trace.wb; - if (trace.wb) { - os << ", rd=" << trace.rdest_type << std::dec << trace.rdest; + if (trace.dst_reg.type != RegType::None) { + os << ", rd=" << trace.dst_reg.type << std::dec << trace.dst_reg.idx; + } + for (uint32_t i = 0; i < trace.src_regs.size(); ++i) { + if (trace.src_regs[i].type != RegType::None) { + os << ", rs" << i << "=" << trace.src_regs[i].type << std::dec << trace.src_regs[i].idx; + } } os << ", ex=" << trace.fu_type; if (trace.pid != -1) { diff --git a/sim/simx/local_mem.cpp b/sim/simx/local_mem.cpp index ca2238473..195fe5300 100644 --- a/sim/simx/local_mem.cpp +++ b/sim/simx/local_mem.cpp @@ -82,11 +82,13 @@ public: continue; } + DT(4, simobject_->name() << " mem-req" << req_id << ": "<< core_req); + in_used_banks.at(bank_id) = true; if (!core_req.write || config_.write_reponse) { // send response - MemRsp core_rsp{core_req.tag, core_req.cid}; + MemRsp core_rsp{core_req.tag, core_req.cid, core_req.uuid}; simobject_->Outputs.at(req_id).push(core_rsp, 1); } diff --git a/sim/simx/main.cpp b/sim/simx/main.cpp index be1505610..cd375b516 100644 --- a/sim/simx/main.cpp +++ b/sim/simx/main.cpp @@ -112,7 +112,9 @@ int main(int argc, char **argv) { return -1; } } - +#ifndef NDEBUG + std::cout << "[VXDRV] START: program=" << program << std::endl; +#endif // run simulation processor.run(); diff --git a/sim/simx/mem_coalescer.cpp b/sim/simx/mem_coalescer.cpp index 390fd5f01..8af567985 100644 --- a/sim/simx/mem_coalescer.cpp +++ b/sim/simx/mem_coalescer.cpp @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,100 +16,141 @@ using namespace vortex; MemCoalescer::MemCoalescer( - const SimContext& ctx, - const char* name, + const SimContext& ctx, + const char* name, uint32_t input_size, uint32_t output_size, uint32_t line_size, uint32_t queue_size, uint32_t delay -) : SimObject(ctx, name) - , ReqIn(input_size, this) - , RspIn(input_size, this) - , ReqOut(output_size, this) - , RspOut(output_size, this) +) : SimObject(ctx, name) + , ReqIn(this) + , RspIn(this) + , ReqOut(this) + , RspOut(this) + , input_size_(input_size) + , output_size_(output_size) + , output_ratio_(input_size / output_size) , pending_rd_reqs_(queue_size) + , sent_mask_(input_size) , line_size_(line_size) , delay_(delay) {} void MemCoalescer::reset() { - last_index_ = 0; sent_mask_.reset(); } -void MemCoalescer::tick() { - uint32_t I = ReqIn.size(); - uint32_t O = ReqOut.size(); - +void MemCoalescer::tick() { // process incoming responses - for (uint32_t o = 0; o < O; ++o) { - if (RspOut.at(o).empty()) - continue; - auto& mem_rsp = RspOut.at(o).front(); - DT(3, this->name() << "-" << mem_rsp); - auto& entry = pending_rd_reqs_.at(mem_rsp.tag); - for (uint32_t i = 0; i < I; ++i) { - if (entry.mask.test(i)) { - MemRsp rsp(mem_rsp); - rsp.tag = entry.tag; - RspIn.at(i).push(rsp, 1); + if (!RspOut.empty()) { + auto& out_rsp = RspOut.front(); + DT(4, this->name() << " mem-rsp: " << out_rsp); + auto& entry = pending_rd_reqs_.at(out_rsp.tag); + + BitVector<> rsp_mask(input_size_); + for (uint32_t o = 0; o < output_size_; ++o) { + if (!out_rsp.mask.test(o)) + continue; + for (uint32_t r = 0; r < output_ratio_; ++r) { + uint32_t i = o * output_ratio_ + r; + if (entry.mask.test(i)) + rsp_mask.set(i); } } - pending_rd_reqs_.release(mem_rsp.tag); - RspOut.at(o).pop(); + + // build memory response + LsuRsp in_rsp(input_size_); + in_rsp.mask = rsp_mask; + in_rsp.tag = entry.tag; + in_rsp.cid = out_rsp.cid; + in_rsp.uuid = out_rsp.uuid; + + // send memory response + RspIn.push(in_rsp, 1); + + // track remaining responses + assert(!entry.mask.none()); + entry.mask &= ~rsp_mask; + if (entry.mask.none()) { + // whole response received, release tag + pending_rd_reqs_.release(out_rsp.tag); + } + RspOut.pop(); } // process incoming requests - uint64_t addr_mask = ~uint64_t(line_size_-1); - bool completed = true; - for (uint32_t i = last_index_; i < I; ++i) { - if (sent_mask_.test(i) || ReqIn.at(i).empty()) - continue; + if (ReqIn.empty()) + return; - auto& seed = ReqIn.at(i).front(); + auto& in_req = ReqIn.front(); + assert(in_req.mask.size() == input_size_); + assert(!in_req.mask.none()); - // ensure we can allocate a response tag - if (!seed.write && pending_rd_reqs_.full()) { - DT(4, "*** " << this->name() << "-queue-full: " << seed); - last_index_ = i; - completed = false; - break; - } - - std::bitset<64> mask(0); - mask.set(i); - - // coalesce matching requests - uint64_t seed_addr = seed.addr & addr_mask; - for (uint32_t j = i + 1; j < I; ++j) { - if (sent_mask_.test(j) || ReqIn.at(j).empty()) - continue; - auto& match = ReqIn.at(j).front(); - uint64_t match_addr = match.addr & addr_mask; - if (match_addr == seed_addr) { - mask.set(j); - ReqIn.at(j).pop(); - } - } - - uint32_t tag = 0; - if (!seed.write) { - tag = pending_rd_reqs_.allocate(pending_req_t{seed.tag, mask}); - } - - MemReq mem_req{seed}; - mem_req.tag = tag; - DT(3, this->name() << "-" << mem_req << ", coalesced=" << mask.count()); - uint32_t c = i % O; - ReqOut.at(c).push(mem_req, delay_); - ReqIn.at(i).pop(); - - sent_mask_ |= mask; + // ensure we can allocate a response tag + if (pending_rd_reqs_.full()) { + DT(4, "*** " << this->name() << " queue-full: " << in_req); + return; } - if (completed) { - last_index_ = 0; + uint64_t addr_mask = ~uint64_t(line_size_-1); + + BitVector<> out_mask(output_size_); + std::vector out_addrs(output_size_); + + BitVector<> cur_mask(input_size_); + + for (uint32_t o = 0; o < output_size_; ++o) { + for (uint32_t r = 0; r < output_ratio_; ++r) { + uint32_t i = o * output_ratio_ + r; + if (sent_mask_.test(i) || !in_req.mask.test(i)) + continue; + + uint64_t seed_addr = in_req.addrs.at(i) & addr_mask; + cur_mask.set(i); + + // coalesce matching requests + for (uint32_t s = r + 1; s < output_ratio_; ++s) { + uint32_t j = o * output_ratio_ + s; + if (sent_mask_.test(j) || !in_req.mask.test(j)) + continue; + uint64_t match_addr = in_req.addrs.at(j) & addr_mask; + if (match_addr == seed_addr) { + cur_mask.set(j); + } + } + + out_mask.set(o); + out_addrs.at(o) = seed_addr; + break; + } + } + + assert(!out_mask.none()); + + uint32_t tag = 0; + if (!in_req.write) { + // allocate a response tag for read requests + tag = pending_rd_reqs_.allocate(pending_req_t{in_req.tag, cur_mask}); + } + + // build memory request + LsuReq out_req{output_size_}; + out_req.mask = out_mask; + out_req.tag = tag; + out_req.write = in_req.write; + out_req.addrs = out_addrs; + out_req.cid = in_req.cid; + out_req.uuid = in_req.uuid; + + // send memory request + ReqOut.push(out_req, delay_); + DT(4, this->name() << " mem-req: coalesced=" << cur_mask.count() << ", " << out_req); + + // update sent mask + sent_mask_ |= cur_mask; + if (sent_mask_ == in_req.mask) { + ReqIn.pop(); sent_mask_.reset(); } } \ No newline at end of file diff --git a/sim/simx/mem_coalescer.h b/sim/simx/mem_coalescer.h index 1a38dd3ac..f0e3935aa 100644 --- a/sim/simx/mem_coalescer.h +++ b/sim/simx/mem_coalescer.h @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,15 +17,15 @@ namespace vortex { class MemCoalescer : public SimObject { public: - std::vector> ReqIn; - std::vector> RspIn; + SimPort ReqIn; + SimPort RspIn; - std::vector> ReqOut; - std::vector> RspOut; + SimPort ReqOut; + SimPort RspOut; MemCoalescer( - const SimContext& ctx, - const char* name, + const SimContext& ctx, + const char* name, uint32_t input_size, uint32_t output_size, uint32_t line_size, @@ -41,14 +41,17 @@ private: struct pending_req_t { uint32_t tag; - std::bitset<64> mask; + BitVector<> mask; }; + uint32_t input_size_; + uint32_t output_size_; + uint32_t output_ratio_; + HashTable pending_rd_reqs_; + BitVector<> sent_mask_; uint32_t line_size_; uint32_t delay_; - uint32_t last_index_; - std::bitset<64> sent_mask_; }; } \ No newline at end of file diff --git a/sim/simx/mem_sim.cpp b/sim/simx/mem_sim.cpp index df3860daa..a12713fea 100644 --- a/sim/simx/mem_sim.cpp +++ b/sim/simx/mem_sim.cpp @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,14 +15,7 @@ #include #include #include - -DISABLE_WARNING_PUSH -DISABLE_WARNING_UNUSED_PARAMETER -#define RAMULATOR -#include -#include -#include -DISABLE_WARNING_POP +#include #include "constants.h" #include "types.h" @@ -32,83 +25,75 @@ using namespace vortex; class MemSim::Impl { private: - MemSim* simobject_; - Config config_; + MemSim* simobject_; + Config config_; + DramSim dram_sim_; PerfStats perf_stats_; - ramulator::Gem5Wrapper* dram_; + + struct DramCallbackArgs { + MemSim* simobject; + MemReq request; + }; public: - - Impl(MemSim* simobject, const Config& config) + Impl(MemSim* simobject, const Config& config) : simobject_(simobject) , config_(config) - { - ramulator::Config ram_config; - ram_config.add("standard", "DDR4"); - ram_config.add("channels", std::to_string(config.channels)); - ram_config.add("ranks", "1"); - ram_config.add("speed", "DDR4_2400R"); - ram_config.add("org", "DDR4_4Gb_x8"); - ram_config.add("mapping", "defaultmapping"); - ram_config.set_core_num(config.num_cores); - dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE); - Stats::statlist.output("ramulator.ddr4.log"); - } + , dram_sim_(MEM_CLOCK_RATIO) + {} ~Impl() { - dram_->finish(); - Stats::statlist.printall(); - delete dram_; + //-- } const PerfStats& perf_stats() const { return perf_stats_; } - void dram_callback(ramulator::Request& req, uint32_t tag, uint64_t uuid) { - if (req.type == ramulator::Request::Type::WRITE) - return; - MemRsp mem_rsp{tag, (uint32_t)req.coreid, uuid}; - simobject_->MemRspPort.push(mem_rsp, 1); - DT(3, simobject_->name() << "-" << mem_rsp); - } - void reset() { - perf_stats_ = PerfStats(); + dram_sim_.reset(); } void tick() { - if (MEM_CYCLE_RATIO > 0) { - auto cycle = SimPlatform::instance().cycles(); - if ((cycle % MEM_CYCLE_RATIO) == 0) - dram_->tick(); - } else { - for (int i = MEM_CYCLE_RATIO; i <= 0; ++i) - dram_->tick(); - } - + dram_sim_.tick(); + if (simobject_->MemReqPort.empty()) return; - + auto& mem_req = simobject_->MemReqPort.front(); - ramulator::Request dram_req( + // try to enqueue the request to the memory system + auto req_args = new DramCallbackArgs{simobject_, mem_req}; + auto enqueue_success = dram_sim_.send_request( + mem_req.write, mem_req.addr, - mem_req.write ? ramulator::Request::Type::WRITE : ramulator::Request::Type::READ, - std::bind(&Impl::dram_callback, this, placeholders::_1, mem_req.tag, mem_req.uuid), - mem_req.cid + 0, + [](void* arg) { + auto rsp_args = reinterpret_cast(arg); + // only send a response for read requests + if (!rsp_args->request.write) { + MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid}; + rsp_args->simobject->MemRspPort.push(mem_rsp, 1); + DT(3, rsp_args->simobject->name() << " mem-rsp: " << mem_rsp); + } + delete rsp_args; + }, + req_args ); - if (!dram_->send(dram_req)) + // check if the request was enqueued successfully + if (!enqueue_success) { + delete req_args; return; - + } + if (mem_req.write) { ++perf_stats_.writes; } else { ++perf_stats_.reads; } - - DT(3, simobject_->name() << "-" << mem_req); + + DT(3, simobject_->name() << " mem-req: " << mem_req); simobject_->MemReqPort.pop(); } @@ -116,9 +101,9 @@ public: /////////////////////////////////////////////////////////////////////////////// -MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config) +MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config) : SimObject(ctx, name) - , MemReqPort(this) + , MemReqPort(this) , MemRspPort(this) , impl_(new Impl(this, config)) {} diff --git a/sim/simx/operand.h b/sim/simx/operand.h index 53483792f..065f7d03d 100644 --- a/sim/simx/operand.h +++ b/sim/simx/operand.h @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,43 +18,61 @@ namespace vortex { class Operand : public SimObject { +private: + static constexpr uint32_t NUM_BANKS = 4; + uint32_t total_stalls_ = 0; + public: SimPort Input; SimPort Output; - Operand(const SimContext& ctx) - : SimObject(ctx, "Operand") + Operand(const SimContext& ctx) + : SimObject(ctx, "Operand") , Input(this) , Output(this) - {} + { + total_stalls_ = 0; + } virtual ~Operand() {} - virtual void reset() {} + virtual void reset() { + total_stalls_ = 0; + } virtual void tick() { if (Input.empty()) return; auto trace = Input.front(); - int delay = 1; - for (int i = 0; i < MAX_NUM_REGS; ++i) { - bool is_iregs = trace->used_iregs.test(i); - bool is_fregs = trace->used_fregs.test(i); - bool is_vregs = trace->used_vregs.test(i); - if (is_iregs || is_fregs || is_vregs) { - if (is_iregs && i == 0) - continue; - ++delay; + uint32_t stalls = 0; + + for (int i = 0; i < NUM_SRC_REGS; ++i) { + for (int j = i + 1; j < NUM_SRC_REGS; ++j) { + int bank_i = trace->src_regs[i].idx % NUM_BANKS; + int bank_j = trace->src_regs[j].idx % NUM_BANKS; + if ((trace->src_regs[i].type != RegType::None) + && (trace->src_regs[j].type != RegType::None) + && (trace->src_regs[i].idx != 0) + && (trace->src_regs[j].idx != 0) + && bank_i == bank_j) { + ++stalls; + } } } - Output.push(trace, delay); - + total_stalls_ += stalls; + + Output.push(trace, 2 + stalls); + DT(3, "pipeline-operands: " << *trace); Input.pop(); }; + + uint32_t total_stalls() const { + return total_stalls_; + } }; } \ No newline at end of file diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index 7c78218ff..01023125b 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -39,7 +39,7 @@ ProcessorImpl::ProcessorImpl(const Arch& arch) XLEN, // address bits 1, // number of ports uint8_t(arch.num_clusters()), // request size - true, // write-through + L3_WRITEBACK, // write-back false, // write response L3_MSHR_SIZE, // mshr size 2, // pipeline latency @@ -70,6 +70,19 @@ ProcessorImpl::ProcessorImpl(const Arch& arch) --perf_mem_pending_reads_; }); +#ifndef NDEBUG + // dump device configuration + std::cout << "CONFIGS:" + << " num_threads=" << arch.num_threads() + << ", num_warps=" << arch.num_warps() + << ", num_cores=" << arch.num_cores() + << ", num_clusters=" << arch.num_clusters() + << ", socket_size=" << arch.socket_size() + << ", local_mem_base=0x" << std::hex << arch.local_mem_base() << std::dec + << ", num_barriers=" << arch.num_barriers() + << std::endl; +#endif + // reset the device this->reset(); } diff --git a/sim/simx/scoreboard.h b/sim/simx/scoreboard.h index ee42c3427..0be79eb9e 100644 --- a/sim/simx/scoreboard.h +++ b/sim/simx/scoreboard.h @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,95 +24,87 @@ public: struct reg_use_t { RegType reg_type; - uint32_t reg_id; - FUType fu_type; + uint32_t reg_id; + FUType fu_type; SfuType sfu_type; uint64_t uuid; }; - - Scoreboard(const Arch &arch) - : in_use_iregs_(arch.num_warps()) - , in_use_fregs_(arch.num_warps()) - { + + Scoreboard(const Arch &arch) + : in_use_regs_(arch.num_warps()) { + for (auto& in_use_reg : in_use_regs_) { + in_use_reg.resize((int)RegType::Count); + } this->clear(); } void clear() { - for (uint32_t i = 0, n = in_use_iregs_.size(); i < n; ++i) { - in_use_iregs_.at(i).reset(); - in_use_fregs_.at(i).reset(); + for (auto& in_use_reg : in_use_regs_) { + for (auto& mask : in_use_reg) { + mask.reset(); + } } owners_.clear(); } bool in_use(instr_trace_t* trace) const { - return (trace->used_iregs & in_use_iregs_.at(trace->wid)) != 0 - || (trace->used_fregs & in_use_fregs_.at(trace->wid)) != 0; + if (trace->wb) { + assert(trace->dst_reg.type != RegType::None); + if (in_use_regs_.at(trace->wid).at((int)trace->dst_reg.type).test(trace->dst_reg.idx)) { + return true; + } + } + for (uint32_t i = 0; i < trace->src_regs.size(); ++i) { + if (trace->src_regs[i].type != RegType::None) { + if (in_use_regs_.at(trace->wid).at((int)trace->src_regs[i].type).test(trace->src_regs[i].idx)) { + return true; + } + } + } + return false; } std::vector get_uses(instr_trace_t* trace) const { std::vector out; - - auto used_iregs = trace->used_iregs & in_use_iregs_.at(trace->wid); - auto used_fregs = trace->used_fregs & in_use_fregs_.at(trace->wid); - - for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) { - if (used_iregs.test(r)) { - uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Integer; + if (trace->wb) { + assert(trace->dst_reg.type != RegType::None); + if (in_use_regs_.at(trace->wid).at((int)trace->dst_reg.type).test(trace->dst_reg.idx)) { + uint32_t tag = (trace->dst_reg.idx << 16) | (trace->wid << 4) | (int)trace->dst_reg.type; auto owner = owners_.at(tag); - out.push_back({RegType::Integer, r, owner->fu_type, owner->sfu_type, owner->uuid}); + out.push_back({trace->dst_reg.type, trace->dst_reg.idx, owner->fu_type, owner->sfu_type, owner->uuid}); } } - - for (uint32_t r = 0; r < MAX_NUM_REGS; ++r) { - if (used_fregs.test(r)) { - uint32_t tag = (r << 16) | (trace->wid << 4) | (int)RegType::Float; - auto owner = owners_.at(tag); - out.push_back({RegType::Float, r, owner->fu_type, owner->sfu_type, owner->uuid}); + for (uint32_t i = 0; i < trace->src_regs.size(); ++i) { + if (trace->src_regs[i].type != RegType::None) { + if (in_use_regs_.at(trace->wid).at((int)trace->src_regs[i].type).test(trace->src_regs[i].idx)) { + uint32_t tag = (trace->src_regs[i].idx << 16) | (trace->wid << 4) | (int)trace->src_regs[i].type; + auto owner = owners_.at(tag); + out.push_back({trace->src_regs[i].type, trace->src_regs[i].idx, owner->fu_type, owner->sfu_type, owner->uuid}); + } } } - return out; } - + void reserve(instr_trace_t* trace) { assert(trace->wb); - switch (trace->rdest_type) { - case RegType::Integer: - in_use_iregs_.at(trace->wid).set(trace->rdest); - break; - case RegType::Float: - in_use_fregs_.at(trace->wid).set(trace->rdest); - break; - default: - assert(false); - } - uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type; + in_use_regs_.at(trace->wid).at((int)trace->dst_reg.type).set(trace->dst_reg.idx); + uint32_t tag = (trace->dst_reg.idx << 16) | (trace->wid << 4) | (int)trace->dst_reg.type; assert(owners_.count(tag) == 0); owners_[tag] = trace; - assert((int)trace->fu_type < 5); } void release(instr_trace_t* trace) { assert(trace->wb); - switch (trace->rdest_type) { - case RegType::Integer: - in_use_iregs_.at(trace->wid).reset(trace->rdest); - break; - case RegType::Float: - in_use_fregs_.at(trace->wid).reset(trace->rdest); - break; - default: - assert(false); - } - uint32_t tag = (trace->rdest << 16) | (trace->wid << 4) | (int)trace->rdest_type; + in_use_regs_.at(trace->wid).at((int)trace->dst_reg.type).reset(trace->dst_reg.idx); + uint32_t tag = (trace->dst_reg.idx << 16) | (trace->wid << 4) | (int)trace->dst_reg.type; + assert(owners_.count(tag) != 0); owners_.erase(tag); } private: - std::vector in_use_iregs_; - std::vector in_use_fregs_; + std::vector> in_use_regs_; std::unordered_map owners_; }; diff --git a/sim/simx/socket.cpp b/sim/simx/socket.cpp index afda924d8..cef8a3908 100644 --- a/sim/simx/socket.cpp +++ b/sim/simx/socket.cpp @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,11 +16,11 @@ using namespace vortex; -Socket::Socket(const SimContext& ctx, +Socket::Socket(const SimContext& ctx, uint32_t socket_id, - Cluster* cluster, - const Arch &arch, - const DCRS &dcrs) + Cluster* cluster, + const Arch &arch, + const DCRS &dcrs) : SimObject(ctx, "socket") , icache_mem_req_port(this) , icache_mem_rsp_port(this) @@ -44,7 +44,7 @@ Socket::Socket(const SimContext& ctx, XLEN, // address bits 1, // number of ports 1, // number of inputs - false, // write-through + false, // write-back false, // write response (uint8_t)arch.num_warps(), // mshr size 2, // pipeline latency @@ -64,7 +64,7 @@ Socket::Socket(const SimContext& ctx, XLEN, // address bits 1, // number of ports DCACHE_NUM_REQS, // number of inputs - true, // write-through + DCACHE_WRITEBACK, // write-back false, // write response DCACHE_MSHR_SIZE, // mshr size 2, // pipeline latency diff --git a/sim/simx/types.cpp b/sim/simx/types.cpp index b32a0cee6..3e6c5960f 100644 --- a/sim/simx/types.cpp +++ b/sim/simx/types.cpp @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,14 +16,14 @@ using namespace vortex; LocalMemDemux::LocalMemDemux( - const SimContext& ctx, - const char* name, + const SimContext& ctx, + const char* name, uint32_t delay -) : SimObject(ctx, name) +) : SimObject(ctx, name) , ReqIn(this) , RspIn(this) - , ReqSM(this) - , RspSM(this) + , ReqLmem(this) + , RspLmem(this) , ReqDC(this) , RspDC(this) , delay_(delay) @@ -31,30 +31,133 @@ LocalMemDemux::LocalMemDemux( void LocalMemDemux::reset() {} -void LocalMemDemux::tick() { +void LocalMemDemux::tick() { // process incoming responses - if (!RspSM.empty()) { - auto& rsp = RspSM.front(); - DT(4, this->name() << "-" << rsp); - RspIn.push(rsp, 1); - RspSM.pop(); + if (!RspLmem.empty()) { + auto& out_rsp = RspLmem.front(); + DT(4, this->name() << " lmem-rsp: " << out_rsp); + RspIn.push(out_rsp, 1); + RspLmem.pop(); } if (!RspDC.empty()) { - auto& rsp = RspDC.front(); - DT(4, this->name() << "-" << rsp); - RspIn.push(rsp, 1); - RspDC - .pop(); + auto& out_rsp = RspDC.front(); + DT(4, this->name() << " dc-rsp: " << out_rsp); + RspIn.push(out_rsp, 1); + RspDC.pop(); } - // process incoming requests + + // process incoming requests if (!ReqIn.empty()) { - auto& req = ReqIn.front(); - DT(4, this->name() << "-" << req); - if (req.type == AddrType::Shared) { - ReqSM.push(req, delay_); - } else { - ReqDC.push(req, delay_); + auto& in_req = ReqIn.front(); + + LsuReq out_dc_req(in_req.mask.size()); + out_dc_req.write = in_req.write; + out_dc_req.tag = in_req.tag; + out_dc_req.cid = in_req.cid; + out_dc_req.uuid = in_req.uuid; + + LsuReq out_lmem_req(out_dc_req); + + for (uint32_t i = 0; i < in_req.mask.size(); ++i) { + if (in_req.mask.test(i)) { + auto type = get_addr_type(in_req.addrs.at(i)); + if (type == AddrType::Shared) { + out_lmem_req.mask.set(i); + out_lmem_req.addrs.at(i) = in_req.addrs.at(i); + } else { + out_dc_req.mask.set(i); + out_dc_req.addrs.at(i) = in_req.addrs.at(i); + } + } + } + + if (!out_dc_req.mask.none()) { + ReqDC.push(out_dc_req, delay_); + DT(4, this->name() << " dc-req: " << out_dc_req); + } + + if (!out_lmem_req.mask.none()) { + ReqLmem.push(out_lmem_req, delay_); + DT(4, this->name() << " lmem-req: " << out_lmem_req); } ReqIn.pop(); - } + } +} + +/////////////////////////////////////////////////////////////////////////////// + +LsuMemAdapter::LsuMemAdapter( + const SimContext& ctx, + const char* name, + uint32_t num_inputs, + uint32_t delay +) : SimObject(ctx, name) + , ReqIn(this) + , RspIn(this) + , ReqOut(num_inputs, this) + , RspOut(num_inputs, this) + , delay_(delay) +{} + +void LsuMemAdapter::reset() {} + +void LsuMemAdapter::tick() { + uint32_t input_size = ReqOut.size(); + + // process incoming responses + for (uint32_t i = 0; i < input_size; ++i) { + if (RspOut.at(i).empty()) + continue; + auto& out_rsp = RspOut.at(i).front(); + DT(4, this->name() << " rsp" << i << ": " << out_rsp); + + // build memory response + LsuRsp in_rsp(input_size); + in_rsp.mask.set(i); + in_rsp.tag = out_rsp.tag; + in_rsp.cid = out_rsp.cid; + in_rsp.uuid = out_rsp.uuid; + + // include other responses with the same tag + for (uint32_t j = i + 1; j < input_size; ++j) { + if (RspOut.at(j).empty()) + continue; + auto& other_rsp = RspOut.at(j).front(); + if (out_rsp.tag == other_rsp.tag) { + in_rsp.mask.set(j); + RspOut.at(j).pop(); + } + } + + // send memory response + RspIn.push(in_rsp, 1); + + // remove input + RspOut.at(i).pop(); + break; + } + + // process incoming requests + if (!ReqIn.empty()) { + auto& in_req = ReqIn.front(); + assert(in_req.mask.size() == input_size); + + for (uint32_t i = 0; i < input_size; ++i) { + if (in_req.mask.test(i)) { + // build memory request + MemReq out_req; + out_req.write = in_req.write; + out_req.addr = in_req.addrs.at(i); + out_req.type = get_addr_type(in_req.addrs.at(i)); + out_req.tag = in_req.tag; + out_req.cid = in_req.cid; + out_req.uuid = in_req.uuid; + + // send memory request + ReqOut.at(i).push(out_req, delay_); + DT(4, this->name() << " req" << i << ": " << out_req); + } + } + ReqIn.pop(); + } } \ No newline at end of file diff --git a/sim/simx/types.h b/sim/simx/types.h index a84216ae1..385015cc9 100644 --- a/sim/simx/types.h +++ b/sim/simx/types.h @@ -21,7 +21,9 @@ #include #include #include +#include #include +#include #include "debug.h" namespace vortex { @@ -47,6 +49,7 @@ typedef uint64_t WordF; #define MAX_NUM_THREADS 32 #define MAX_NUM_WARPS 32 #define MAX_NUM_REGS 32 +#define NUM_SRC_REGS 3 typedef std::bitset CoreMask; typedef std::bitset RegMask; @@ -58,7 +61,8 @@ typedef std::bitset WarpMask; enum class RegType { None, Integer, - Float + Float, + Count }; inline std::ostream &operator<<(std::ostream &os, const RegType& type) { @@ -235,6 +239,62 @@ inline std::ostream &operator<<(std::ostream &os, const ArbiterType& type) { default: assert(false); } return os; +}/////////////////////////////////////////////////////////////////////////////// + +struct LsuReq { + BitVector<> mask; + std::vector addrs; + bool write; + uint32_t tag; + uint32_t cid; + uint64_t uuid; + + LsuReq(uint32_t size) + : mask(size) + , addrs(size, 0) + , write(false) + , tag(0) + , cid(0) + , uuid(0) + {} +}; + +inline std::ostream &operator<<(std::ostream &os, const LsuReq& req) { + os << "rw=" << req.write << ", mask=" << req.mask << ", "; + for (size_t i = 0; i < req.mask.size(); ++i) { + os << "addr" << i << "="; + if (req.mask.test(i)) { + os << "0x" << std::hex << req.addrs.at(i); + } else { + os << "-"; + } + os << ", "; + } + os << std::dec << "tag=" << req.tag << ", cid=" << req.cid; + os << " (#" << std::dec << req.uuid << ")"; + return os; +} + +/////////////////////////////////////////////////////////////////////////////// + +struct LsuRsp { + BitVector<> mask; + uint64_t tag; + uint32_t cid; + uint64_t uuid; + + LsuRsp(uint32_t size) + : mask(size) + , tag (0) + , cid(0) + , uuid(0) + {} +}; + +inline std::ostream &operator<<(std::ostream &os, const LsuRsp& rsp) { + os << "mask=" << rsp.mask << ", tag=" << rsp.tag << ", cid=" << rsp.cid; + os << " (#" << std::dec << rsp.uuid << ")"; + return os; } /////////////////////////////////////////////////////////////////////////////// @@ -263,7 +323,7 @@ struct MemReq { }; inline std::ostream &operator<<(std::ostream &os, const MemReq& req) { - os << "mem-" << (req.write ? "wr" : "rd") << ": "; + os << "rw=" << req.write << ", "; os << "addr=0x" << std::hex << req.addr << ", type=" << req.type; os << std::dec << ", tag=" << req.tag << ", cid=" << req.cid; os << " (#" << std::dec << req.uuid << ")"; @@ -285,7 +345,7 @@ struct MemRsp { }; inline std::ostream &operator<<(std::ostream &os, const MemRsp& rsp) { - os << "mem-rsp: tag=" << rsp.tag << ", cid=" << rsp.cid; + os << "tag=" << rsp.tag << ", cid=" << rsp.cid; os << " (#" << std::dec << rsp.uuid << ")"; return os; } @@ -424,7 +484,6 @@ public: auto& req_in = Inputs.at(j); if (!req_in.empty()) { auto& req = req_in.front(); - DT(4, this->name() << "-" << req); Outputs.at(o).push(req, delay_); req_in.pop(); this->update_cursor(o, i); @@ -515,7 +574,7 @@ public: i = rsp.tag & (R-1); rsp.tag >>= lg_num_reqs_; } - DT(4, this->name() << "-" << rsp); + DT(4, this->name() << " rsp" << o << ": " << rsp); uint32_t j = o * R + i; RspIn.at(j).push(rsp, 1); RspOut.at(o).pop(); @@ -534,7 +593,7 @@ public: if (lg_num_reqs_ != 0) { req.tag = (req.tag << lg_num_reqs_) | i; } - DT(4, this->name() << "-" << req); + DT(4, this->name() << " req" << j << ": " << req); ReqOut.at(o).push(req, delay_); req_in.pop(); this->update_cursor(o, i); @@ -563,14 +622,14 @@ using MemSwitch = Switch; class LocalMemDemux : public SimObject { public: - SimPort ReqIn; - SimPort RspIn; + SimPort ReqIn; + SimPort RspIn; - SimPort ReqSM; - SimPort RspSM; + SimPort ReqLmem; + SimPort RspLmem; - SimPort ReqDC; - SimPort RspDC; + SimPort ReqDC; + SimPort RspDC; LocalMemDemux( const SimContext& ctx, @@ -586,4 +645,29 @@ private: uint32_t delay_; }; +/////////////////////////////////////////////////////////////////////////////// + +class LsuMemAdapter : public SimObject { +public: + SimPort ReqIn; + SimPort RspIn; + + std::vector> ReqOut; + std::vector> RspOut; + + LsuMemAdapter( + const SimContext& ctx, + const char* name, + uint32_t num_inputs, + uint32_t delay + ); + + void reset(); + + void tick(); + +private: + uint32_t delay_; +}; + } diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 5c0191368..dd11c8d64 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -5,15 +5,17 @@ DESTDIR ?= $(CURDIR) SRC_DIR := $(VORTEX_HOME)/sim/xrtsim AFU_DIR := $(RTL_DIR)/afu/xrt -CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds +CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors -Wno-array-bounds CXXFLAGS += -fPIC -Wno-maybe-uninitialized CXXFLAGS += -I$(SRC_DIR) -I$(ROOT_DIR)/hw -I$(COMMON_DIR) -I$(DESTDIR) CXXFLAGS += -I/$(THIRD_PARTY_DIR)/softfloat/source/include -CXXFLAGS += -I/$(THIRD_PARTY_DIR) +CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/spdlog/include +CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/ext/yaml-cpp/include +CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/src CXXFLAGS += -DXLEN_$(XLEN) LDFLAGS += -shared $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a -LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator -pthread +LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator -pthread # control RTL debug tracing states DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE @@ -47,11 +49,11 @@ endif DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) -SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp +SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/xrt_sim.cpp -RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv +RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) @@ -64,7 +66,7 @@ RTL_INCLUDE += -I$(AFU_DIR) TOP = vortex_afu_shim VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic -VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO +VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED VL_FLAGS += --x-initial unique --x-assign unique VL_FLAGS += -DSIMULATION -DSV_DPI VL_FLAGS += -DXLEN_$(XLEN) @@ -72,7 +74,6 @@ VL_FLAGS += $(CONFIGS) VL_FLAGS += $(SRC_DIR)/verilator.vlt VL_FLAGS += $(RTL_INCLUDE) VL_FLAGS += $(RTL_PKGS) -VL_FLAGS += $(DBG_SCOPE_FLAGS) CXXFLAGS += $(CONFIGS) @@ -92,7 +93,7 @@ endif # Enable scope analyzer ifdef SCOPE - VL_FLAGS += -DSCOPE + VL_FLAGS += -DSCOPE $(DBG_SCOPE_FLAGS) CXXFLAGS += -DSCOPE SCOPE_JSON = $(DESTDIR)/scope.json endif diff --git a/sim/xrtsim/vortex_afu_shim.sv b/sim/xrtsim/vortex_afu_shim.sv index 901acd582..648e25e7a 100644 --- a/sim/xrtsim/vortex_afu_shim.sv +++ b/sim/xrtsim/vortex_afu_shim.sv @@ -50,7 +50,6 @@ module vortex_afu_shim #( output wire interrupt `IGNORE_WARNINGS_END ); - vortex_afu #( .C_S_AXI_CTRL_ADDR_WIDTH(C_S_AXI_CTRL_ADDR_WIDTH), .C_S_AXI_CTRL_DATA_WIDTH(C_S_AXI_CTRL_DATA_WIDTH), diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 1f246051e..880983bf1 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -13,9 +13,7 @@ #include "xrt_sim.h" -#include #include "Vvortex_afu_shim.h" -#include "Vvortex_afu_shim__Syms.h" #ifdef VCD_OUTPUT #include @@ -26,10 +24,7 @@ #include #include -#define RAMULATOR -#include -#include -#include +#include #include #include @@ -46,8 +41,8 @@ #endif #endif -#ifndef MEM_CYCLE_RATIO -#define MEM_CYCLE_RATIO -1 +#ifndef MEM_CLOCK_RATIO +#define MEM_CLOCK_RATIO 1 #endif #undef MEM_BLOCK_SIZE @@ -101,10 +96,10 @@ public: Impl() : device_(nullptr) , ram_(nullptr) - , ramulator_(nullptr) + , dram_sim_(MEM_CLOCK_RATIO) , stop_(false) #ifdef VCD_OUTPUT - , trace_(nullptr) + , tfp_(nullptr) #endif {} @@ -114,9 +109,9 @@ public: future_.wait(); } #ifdef VCD_OUTPUT - if (trace_) { - trace_->close(); - delete trace_; + if (tfp_) { + tfp_->close(); + delete tfp_; } #endif if (device_) { @@ -125,11 +120,6 @@ public: if (ram_) { delete ram_; } - if (ramulator_) { - ramulator_->finish(); - Stats::statlist.printall(); - delete ramulator_; - } } int init() { @@ -145,25 +135,25 @@ public: #ifdef VCD_OUTPUT Verilated::traceEverOn(true); - trace_ = new VerilatedVcdC(); - device_->trace(trace_, 99); - trace_->open("trace.vcd"); + tfp_ = new VerilatedVcdC(); + device_->trace(tfp_, 99); + tfp_->open("trace.vcd"); #endif ram_ = new RAM(0, RAM_PAGE_SIZE); - // initialize dram simulator - ramulator::Config ram_config; - ram_config.add("standard", "DDR4"); - ram_config.add("channels", std::to_string(MEMORY_BANKS)); - ram_config.add("ranks", "1"); - ram_config.add("speed", "DDR4_2400R"); - ram_config.add("org", "DDR4_4Gb_x8"); - ram_config.add("mapping", "defaultmapping"); - ram_config.set_core_num(1); - ramulator_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE); - Stats::statlist.output("ramulator.ddr4.log"); - + #ifndef NDEBUG + // dump device configuration + std::cout << "CONFIGS:" + << " num_threads=" << NUM_THREADS + << ", num_warps=" << NUM_WARPS + << ", num_cores=" << NUM_CORES + << ", num_clusters=" << NUM_CLUSTERS + << ", socket_size=" << SOCKET_SIZE + << ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec + << ", num_barriers=" << NUM_BARRIERS + << std::endl; + #endif // reset the device this->reset(); @@ -181,7 +171,17 @@ public: private: void reset() { - //-- + this->axi_ctrl_bus_reset(); + this->axi_mem_bus_reset(); + + for (auto& reqs : pending_mem_reqs_) { + reqs.clear(); + } + + { + std::queue empty; + std::swap(dram_queue_, empty); + } device_->ap_rst_n = 0; @@ -206,11 +206,21 @@ private: } void tick() { - //-- + this->axi_ctrl_bus_eval(); + this->axi_mem_bus_eval(); if (!dram_queue_.empty()) { - if (ramulator_->send(dram_queue_.front())) + auto mem_req = dram_queue_.front(); + if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) { + auto orig_req = reinterpret_cast(arg); + if (orig_req->ready) { + delete orig_req; + } else { + orig_req->ready = true; + } + }, mem_req)) { dram_queue_.pop(); + } } device_->ap_clk = 0; @@ -218,14 +228,7 @@ private: device_->ap_clk = 1; this->eval(); - if (MEM_CYCLE_RATIO > 0) { - auto cycle = timestamp / 2; - if ((cycle % MEM_CYCLE_RATIO) == 0) - ramulator_->tick(); - } else { - for (int i = MEM_CYCLE_RATIO; i <= 0; ++i) - ramulator_->tick(); - } + dram_sim_.tick(); #ifndef NDEBUG fflush(stdout); @@ -236,25 +239,86 @@ private: device_->eval(); #ifdef VCD_OUTPUT if (sim_trace_enabled()) { - trace_->dump(timestamp); + tfp_->dump(timestamp); } #endif ++timestamp; } + void axi_ctrl_bus_reset() { + // address write request + device_->s_axi_ctrl_awvalid = 0; + //device_->s_axi_ctrl_awaddr = 0; + + // data write request + device_->s_axi_ctrl_wvalid = 0; + //device_->s_axi_ctrl_wdata = 0; + //device_->s_axi_ctrl_wstrb = 0; + + // address read request + device_->s_axi_ctrl_arvalid = 0; + //device_->s_axi_ctrl_araddr = 0; + + // data read response + device_->s_axi_ctrl_rready = 0; + + // data write response + device_->s_axi_ctrl_bready = 0; + } + + void axi_ctrl_bus_eval() { + //-- + } + + void axi_mem_bus_reset() { + // address write request + device_->m_axi_mem_0_awready = 0; + + // data write request + device_->m_axi_mem_0_wready = 0; + + // address read request + device_->m_axi_mem_0_arready = 0; + + // data read response + device_->m_axi_mem_0_rvalid = 0; + //device_->m_axi_mem_0_rdata = 0; + //device_->m_axi_mem_0_rlast = 0; + //device_->m_axi_mem_0_rid = 0; + //device_->m_axi_mem_0_rresp = 0; + + // data write response + device_->m_axi_mem_0_bvalid = 0; + //device_->m_axi_mem_0_bresp = 0; + //device_->m_axi_mem_0_bid = 0; + } + + void axi_mem_bus_eval() { + //-- + } + + typedef struct { + std::array data; + uint32_t addr; + bool write; + bool ready; + } mem_req_t; + Vvortex_afu_shim *device_; RAM* ram_; - ramulator::Gem5Wrapper* ramulator_; + DramSim dram_sim_; std::future future_; bool stop_; std::mutex mutex_; - std::queue dram_queue_; + std::list pending_mem_reqs_[MEMORY_BANKS]; + + std::queue dram_queue_; #ifdef VCD_OUTPUT - VerilatedVcdC *trace_; + VerilatedVcdC* tfp_; #endif }; diff --git a/tests/opencl/Makefile b/tests/opencl/Makefile index db8366795..e4be7e712 100644 --- a/tests/opencl/Makefile +++ b/tests/opencl/Makefile @@ -65,27 +65,6 @@ run-rtlsim: $(MAKE) -C sgemm3 run-rtlsim $(MAKE) -C psum run-rtlsim -run-opae: - $(MAKE) -C vecadd run-opae - $(MAKE) -C sgemm run-opae - $(MAKE) -C conv3 run-opae - $(MAKE) -C psort run-opae - $(MAKE) -C saxpy run-opae - $(MAKE) -C sfilter run-opae - $(MAKE) -C oclprintf run-opae - $(MAKE) -C dotproduct run-opae - $(MAKE) -C transpose run-opae - $(MAKE) -C spmv run-opae - $(MAKE) -C stencil run-opae - $(MAKE) -C nearn run-opae - $(MAKE) -C guassian run-opae - $(MAKE) -C kmeans run-opae - $(MAKE) -C blackscholes run-opae - $(MAKE) -C bfs run-opae - $(MAKE) -C sgemm2 run-opae - $(MAKE) -C sgemm3 run-opae - $(MAKE) -C psum run-opae - clean: $(MAKE) -C vecadd clean $(MAKE) -C sgemm clean diff --git a/tests/opencl/bfs/Makefile b/tests/opencl/bfs/Makefile index 1f8414189..c4b140f66 100644 --- a/tests/opencl/bfs/Makefile +++ b/tests/opencl/bfs/Makefile @@ -12,6 +12,6 @@ kernel.cl: $(SRC_DIR)/kernel.cl KERNEL_SRCS := kernel.cl -OPTS ?= $(SRC_DIR)/graph4096.txt +OPTS ?= $(SRC_DIR)/graph4k.txt include ../common.mk diff --git a/tests/opencl/bfs/graph32.txt b/tests/opencl/bfs/graph32.txt new file mode 100644 index 000000000..e22254c9c --- /dev/null +++ b/tests/opencl/bfs/graph32.txt @@ -0,0 +1,225 @@ +32 +0 6 +6 7 +13 5 +18 6 +24 4 +28 6 +34 6 +40 4 +44 7 +51 5 +56 6 +62 10 +72 4 +76 8 +84 3 +87 7 +94 6 +100 9 +109 7 +116 5 +121 5 +126 7 +133 5 +138 6 +144 4 +148 7 +155 4 +159 7 +166 6 +172 7 +179 5 +184 4 + +9 + +188 +0 6 +0 6 +6 4 +5 3 +2 2 +7 8 +1 2 +1 2 +25 8 +20 4 +13 6 +18 5 +31 4 +11 1 +18 7 +0 2 +12 5 +24 3 +10 3 +21 5 +7 10 +5 7 +8 5 +21 5 +17 1 +17 2 +20 1 +27 9 +0 3 +17 6 +3 7 +6 1 +10 7 +15 6 +0 4 +8 10 +5 1 +28 7 +12 6 +15 10 +3 10 +26 2 +0 8 +16 7 +6 10 +3 5 +31 10 +28 10 +21 8 +23 9 +27 2 +22 3 +13 7 +23 10 +17 2 +20 8 +3 3 +15 8 +5 7 +29 7 +15 3 +19 9 +2 1 +30 6 +11 2 +11 2 +21 8 +11 6 +11 6 +12 10 +13 7 +20 3 +2 5 +11 10 +25 9 +6 6 +9 7 +1 6 +30 6 +11 7 +16 2 +20 3 +25 7 +29 7 +29 5 +22 4 +19 9 +10 8 +6 10 +10 3 +5 6 +18 10 +22 9 +29 6 +7 7 +16 4 +16 4 +13 2 +25 2 +26 6 +4 1 +4 2 +5 6 +28 1 +9 2 +25 9 +18 6 +23 10 +26 10 +2 7 +15 10 +17 6 +1 5 +21 4 +26 9 +31 7 +14 9 +10 9 +29 4 +28 10 +30 1 +1 4 +13 3 +11 3 +4 1 +9 8 +3 5 +11 8 +8 8 +21 6 +21 6 +18 4 +3 5 +9 3 +14 4 +15 9 +23 3 +27 1 +9 10 +22 3 +8 9 +25 7 +17 10 +28 1 +24 1 +24 1 +30 4 +2 3 +1 8 +12 9 +17 9 +23 7 +13 7 +27 2 +16 2 +7 2 +18 9 +16 6 +17 10 +25 2 +4 9 +8 2 +29 1 +22 1 +30 4 +31 2 +6 7 +8 10 +17 1 +23 1 +19 10 +29 5 +10 7 +14 5 +19 4 +27 1 +15 6 +28 5 +13 7 +11 6 +13 6 +24 4 +27 4 +19 1 +8 10 +1 4 +18 7 +27 2 diff --git a/tests/opencl/bfs/graph4k.txt b/tests/opencl/bfs/graph4k.txt new file mode 100755 index 000000000..567432612 --- /dev/null +++ b/tests/opencl/bfs/graph4k.txt @@ -0,0 +1,28677 @@ +4096 +0 10 +10 6 +16 2 +18 5 +23 7 +30 7 +37 4 +41 4 +45 3 +48 5 +53 7 +60 4 +64 4 +68 6 +74 7 +81 5 +86 11 +97 5 +102 5 +107 8 +115 4 +119 4 +123 6 +129 4 +133 5 +138 7 +145 4 +149 2 +151 12 +163 3 +166 6 +172 6 +178 7 +185 5 +190 11 +201 4 +205 6 +211 9 +220 3 +223 4 +227 5 +232 4 +236 5 +241 6 +247 10 +257 4 +261 5 +266 7 +273 5 +278 4 +282 8 +290 5 +295 8 +303 9 +312 4 +316 5 +321 5 +326 3 +329 8 +337 5 +342 10 +352 6 +358 4 +362 5 +367 5 +372 10 +382 6 +388 8 +396 8 +404 5 +409 5 +414 5 +419 8 +427 6 +433 8 +441 9 +450 5 +455 10 +465 5 +470 11 +481 5 +486 7 +493 8 +501 9 +510 4 +514 10 +524 9 +533 5 +538 5 +543 7 +550 3 +553 3 +556 2 +558 6 +564 8 +572 3 +575 4 +579 5 +584 11 +595 8 +603 7 +610 5 +615 7 +622 4 +626 7 +633 6 +639 5 +644 4 +648 5 +653 4 +657 8 +665 8 +673 10 +683 2 +685 5 +690 5 +695 6 +701 3 +704 5 +709 9 +718 10 +728 7 +735 7 +742 9 +751 3 +754 4 +758 9 +767 6 +773 10 +783 7 +790 4 +794 8 +802 4 +806 5 +811 5 +816 8 +824 7 +831 8 +839 10 +849 5 +854 5 +859 4 +863 4 +867 7 +874 9 +883 2 +885 10 +895 8 +903 5 +908 6 +914 5 +919 11 +930 2 +932 6 +938 2 +940 4 +944 6 +950 6 +956 5 +961 4 +965 3 +968 4 +972 1 +973 10 +983 7 +990 4 +994 6 +1000 9 +1009 6 +1015 10 +1025 7 +1032 7 +1039 5 +1044 5 +1049 9 +1058 4 +1062 5 +1067 4 +1071 6 +1077 6 +1083 7 +1090 9 +1099 2 +1101 4 +1105 3 +1108 9 +1117 7 +1124 4 +1128 9 +1137 9 +1146 4 +1150 11 +1161 6 +1167 8 +1175 6 +1181 7 +1188 8 +1196 4 +1200 7 +1207 8 +1215 10 +1225 3 +1228 6 +1234 3 +1237 4 +1241 5 +1246 3 +1249 1 +1250 4 +1254 6 +1260 4 +1264 11 +1275 7 +1282 9 +1291 8 +1299 5 +1304 6 +1310 8 +1318 9 +1327 6 +1333 7 +1340 10 +1350 7 +1357 8 +1365 10 +1375 6 +1381 2 +1383 10 +1393 5 +1398 8 +1406 9 +1415 4 +1419 5 +1424 3 +1427 4 +1431 4 +1435 9 +1444 6 +1450 9 +1459 6 +1465 4 +1469 6 +1475 9 +1484 8 +1492 7 +1499 9 +1508 2 +1510 3 +1513 8 +1521 6 +1527 5 +1532 9 +1541 6 +1547 5 +1552 9 +1561 12 +1573 2 +1575 3 +1578 7 +1585 6 +1591 5 +1596 10 +1606 9 +1615 9 +1624 1 +1625 4 +1629 6 +1635 2 +1637 14 +1651 2 +1653 4 +1657 4 +1661 5 +1666 7 +1673 6 +1679 3 +1682 13 +1695 5 +1700 7 +1707 8 +1715 3 +1718 5 +1723 6 +1729 7 +1736 6 +1742 7 +1749 2 +1751 7 +1758 5 +1763 4 +1767 4 +1771 6 +1777 2 +1779 3 +1782 9 +1791 7 +1798 4 +1802 8 +1810 7 +1817 6 +1823 6 +1829 4 +1833 7 +1840 8 +1848 2 +1850 14 +1864 9 +1873 6 +1879 6 +1885 6 +1891 2 +1893 8 +1901 5 +1906 8 +1914 4 +1918 4 +1922 3 +1925 8 +1933 4 +1937 4 +1941 6 +1947 3 +1950 8 +1958 7 +1965 6 +1971 4 +1975 5 +1980 6 +1986 4 +1990 6 +1996 10 +2006 5 +2011 5 +2016 5 +2021 7 +2028 5 +2033 6 +2039 7 +2046 4 +2050 6 +2056 16 +2072 6 +2078 11 +2089 11 +2100 3 +2103 5 +2108 8 +2116 4 +2120 7 +2127 2 +2129 10 +2139 6 +2145 7 +2152 8 +2160 6 +2166 5 +2171 5 +2176 2 +2178 4 +2182 5 +2187 5 +2192 1 +2193 7 +2200 8 +2208 7 +2215 8 +2223 8 +2231 5 +2236 9 +2245 3 +2248 5 +2253 6 +2259 2 +2261 4 +2265 4 +2269 8 +2277 5 +2282 8 +2290 6 +2296 6 +2302 4 +2306 5 +2311 7 +2318 5 +2323 4 +2327 8 +2335 12 +2347 1 +2348 5 +2353 8 +2361 3 +2364 6 +2370 4 +2374 7 +2381 4 +2385 8 +2393 4 +2397 2 +2399 5 +2404 5 +2409 6 +2415 6 +2421 5 +2426 8 +2434 5 +2439 6 +2445 6 +2451 6 +2457 2 +2459 4 +2463 3 +2466 6 +2472 5 +2477 5 +2482 10 +2492 6 +2498 4 +2502 9 +2511 4 +2515 4 +2519 6 +2525 9 +2534 7 +2541 6 +2547 4 +2551 6 +2557 5 +2562 3 +2565 6 +2571 6 +2577 7 +2584 4 +2588 10 +2598 8 +2606 6 +2612 6 +2618 4 +2622 7 +2629 7 +2636 6 +2642 2 +2644 4 +2648 12 +2660 6 +2666 13 +2679 11 +2690 9 +2699 2 +2701 5 +2706 6 +2712 6 +2718 3 +2721 7 +2728 3 +2731 6 +2737 11 +2748 2 +2750 7 +2757 4 +2761 5 +2766 4 +2770 6 +2776 4 +2780 6 +2786 5 +2791 6 +2797 4 +2801 9 +2810 7 +2817 6 +2823 8 +2831 8 +2839 13 +2852 7 +2859 6 +2865 6 +2871 4 +2875 7 +2882 9 +2891 11 +2902 5 +2907 9 +2916 4 +2920 5 +2925 3 +2928 4 +2932 4 +2936 5 +2941 2 +2943 9 +2952 3 +2955 13 +2968 4 +2972 5 +2977 4 +2981 8 +2989 10 +2999 5 +3004 8 +3012 5 +3017 4 +3021 4 +3025 11 +3036 5 +3041 6 +3047 7 +3054 5 +3059 5 +3064 6 +3070 9 +3079 2 +3081 4 +3085 5 +3090 11 +3101 5 +3106 3 +3109 5 +3114 5 +3119 6 +3125 8 +3133 6 +3139 4 +3143 5 +3148 7 +3155 6 +3161 8 +3169 5 +3174 4 +3178 6 +3184 6 +3190 6 +3196 5 +3201 4 +3205 6 +3211 8 +3219 7 +3226 6 +3232 3 +3235 5 +3240 7 +3247 6 +3253 11 +3264 6 +3270 4 +3274 3 +3277 1 +3278 2 +3280 8 +3288 7 +3295 8 +3303 8 +3311 5 +3316 9 +3325 9 +3334 7 +3341 3 +3344 7 +3351 8 +3359 5 +3364 1 +3365 4 +3369 2 +3371 5 +3376 7 +3383 8 +3391 2 +3393 5 +3398 5 +3403 12 +3415 4 +3419 6 +3425 5 +3430 4 +3434 8 +3442 6 +3448 4 +3452 3 +3455 7 +3462 4 +3466 6 +3472 5 +3477 7 +3484 8 +3492 6 +3498 6 +3504 11 +3515 6 +3521 2 +3523 10 +3533 3 +3536 7 +3543 8 +3551 4 +3555 11 +3566 5 +3571 8 +3579 6 +3585 8 +3593 6 +3599 5 +3604 7 +3611 2 +3613 5 +3618 2 +3620 4 +3624 10 +3634 4 +3638 8 +3646 5 +3651 4 +3655 6 +3661 5 +3666 11 +3677 10 +3687 4 +3691 6 +3697 2 +3699 6 +3705 6 +3711 4 +3715 7 +3722 4 +3726 6 +3732 9 +3741 4 +3745 6 +3751 7 +3758 10 +3768 3 +3771 12 +3783 2 +3785 7 +3792 4 +3796 8 +3804 8 +3812 4 +3816 5 +3821 10 +3831 6 +3837 6 +3843 5 +3848 6 +3854 7 +3861 6 +3867 3 +3870 5 +3875 5 +3880 7 +3887 12 +3899 9 +3908 4 +3912 6 +3918 3 +3921 6 +3927 4 +3931 8 +3939 6 +3945 9 +3954 11 +3965 10 +3975 7 +3982 7 +3989 8 +3997 8 +4005 6 +4011 3 +4014 6 +4020 10 +4030 5 +4035 9 +4044 6 +4050 3 +4053 7 +4060 2 +4062 4 +4066 3 +4069 4 +4073 7 +4080 11 +4091 3 +4094 6 +4100 7 +4107 4 +4111 7 +4118 7 +4125 7 +4132 9 +4141 6 +4147 5 +4152 3 +4155 9 +4164 8 +4172 5 +4177 5 +4182 7 +4189 7 +4196 7 +4203 7 +4210 4 +4214 6 +4220 3 +4223 4 +4227 3 +4230 4 +4234 4 +4238 8 +4246 5 +4251 5 +4256 3 +4259 7 +4266 5 +4271 3 +4274 4 +4278 6 +4284 5 +4289 4 +4293 9 +4302 9 +4311 5 +4316 5 +4321 6 +4327 5 +4332 9 +4341 5 +4346 5 +4351 4 +4355 5 +4360 8 +4368 6 +4374 15 +4389 7 +4396 2 +4398 7 +4405 7 +4412 9 +4421 4 +4425 8 +4433 6 +4439 6 +4445 5 +4450 6 +4456 7 +4463 5 +4468 4 +4472 9 +4481 4 +4485 7 +4492 9 +4501 3 +4504 15 +4519 8 +4527 10 +4537 5 +4542 2 +4544 6 +4550 3 +4553 5 +4558 5 +4563 8 +4571 9 +4580 8 +4588 7 +4595 10 +4605 9 +4614 10 +4624 7 +4631 3 +4634 5 +4639 6 +4645 5 +4650 5 +4655 10 +4665 10 +4675 9 +4684 3 +4687 6 +4693 7 +4700 5 +4705 9 +4714 6 +4720 7 +4727 6 +4733 8 +4741 12 +4753 7 +4760 5 +4765 9 +4774 4 +4778 5 +4783 5 +4788 9 +4797 7 +4804 6 +4810 5 +4815 9 +4824 3 +4827 5 +4832 5 +4837 3 +4840 7 +4847 3 +4850 4 +4854 7 +4861 9 +4870 7 +4877 8 +4885 6 +4891 6 +4897 5 +4902 10 +4912 4 +4916 4 +4920 4 +4924 3 +4927 5 +4932 7 +4939 4 +4943 3 +4946 5 +4951 5 +4956 3 +4959 3 +4962 1 +4963 6 +4969 4 +4973 10 +4983 4 +4987 7 +4994 4 +4998 4 +5002 4 +5006 6 +5012 7 +5019 7 +5026 8 +5034 12 +5046 7 +5053 5 +5058 7 +5065 8 +5073 3 +5076 6 +5082 5 +5087 3 +5090 6 +5096 3 +5099 4 +5103 4 +5107 6 +5113 7 +5120 5 +5125 3 +5128 2 +5130 5 +5135 6 +5141 6 +5147 2 +5149 7 +5156 9 +5165 8 +5173 6 +5179 4 +5183 6 +5189 6 +5195 8 +5203 3 +5206 9 +5215 3 +5218 6 +5224 13 +5237 9 +5246 6 +5252 7 +5259 11 +5270 5 +5275 9 +5284 6 +5290 4 +5294 6 +5300 9 +5309 7 +5316 4 +5320 5 +5325 3 +5328 1 +5329 4 +5333 4 +5337 3 +5340 3 +5343 2 +5345 4 +5349 7 +5356 5 +5361 11 +5372 6 +5378 8 +5386 7 +5393 5 +5398 2 +5400 9 +5409 8 +5417 2 +5419 5 +5424 3 +5427 9 +5436 6 +5442 7 +5449 5 +5454 7 +5461 6 +5467 4 +5471 4 +5475 8 +5483 3 +5486 4 +5490 13 +5503 7 +5510 6 +5516 2 +5518 6 +5524 8 +5532 8 +5540 7 +5547 9 +5556 4 +5560 4 +5564 7 +5571 2 +5573 10 +5583 2 +5585 8 +5593 4 +5597 7 +5604 8 +5612 8 +5620 5 +5625 3 +5628 6 +5634 5 +5639 9 +5648 6 +5654 6 +5660 3 +5663 9 +5672 9 +5681 7 +5688 8 +5696 6 +5702 7 +5709 2 +5711 8 +5719 4 +5723 6 +5729 3 +5732 9 +5741 7 +5748 6 +5754 8 +5762 6 +5768 4 +5772 6 +5778 8 +5786 3 +5789 10 +5799 10 +5809 5 +5814 9 +5823 5 +5828 10 +5838 9 +5847 7 +5854 5 +5859 4 +5863 7 +5870 4 +5874 5 +5879 6 +5885 8 +5893 8 +5901 7 +5908 4 +5912 2 +5914 6 +5920 5 +5925 7 +5932 6 +5938 3 +5941 6 +5947 7 +5954 5 +5959 8 +5967 5 +5972 7 +5979 6 +5985 4 +5989 5 +5994 5 +5999 3 +6002 2 +6004 5 +6009 7 +6016 11 +6027 7 +6034 6 +6040 3 +6043 6 +6049 11 +6060 10 +6070 2 +6072 9 +6081 5 +6086 2 +6088 4 +6092 7 +6099 6 +6105 5 +6110 5 +6115 5 +6120 3 +6123 3 +6126 5 +6131 7 +6138 5 +6143 11 +6154 4 +6158 8 +6166 8 +6174 9 +6183 4 +6187 6 +6193 5 +6198 4 +6202 6 +6208 5 +6213 6 +6219 8 +6227 6 +6233 6 +6239 5 +6244 4 +6248 4 +6252 4 +6256 6 +6262 7 +6269 4 +6273 6 +6279 11 +6290 5 +6295 9 +6304 2 +6306 8 +6314 4 +6318 3 +6321 2 +6323 9 +6332 9 +6341 2 +6343 8 +6351 9 +6360 5 +6365 4 +6369 5 +6374 3 +6377 6 +6383 12 +6395 7 +6402 3 +6405 9 +6414 7 +6421 7 +6428 5 +6433 6 +6439 5 +6444 6 +6450 2 +6452 6 +6458 3 +6461 9 +6470 6 +6476 7 +6483 11 +6494 9 +6503 5 +6508 8 +6516 4 +6520 7 +6527 5 +6532 2 +6534 4 +6538 4 +6542 7 +6549 5 +6554 6 +6560 3 +6563 4 +6567 7 +6574 5 +6579 6 +6585 5 +6590 7 +6597 11 +6608 8 +6616 5 +6621 16 +6637 5 +6642 12 +6654 7 +6661 6 +6667 10 +6677 5 +6682 7 +6689 1 +6690 6 +6696 8 +6704 5 +6709 10 +6719 5 +6724 3 +6727 6 +6733 5 +6738 2 +6740 4 +6744 5 +6749 12 +6761 5 +6766 10 +6776 8 +6784 7 +6791 6 +6797 6 +6803 3 +6806 5 +6811 6 +6817 2 +6819 11 +6830 7 +6837 7 +6844 8 +6852 6 +6858 8 +6866 6 +6872 4 +6876 3 +6879 7 +6886 8 +6894 6 +6900 6 +6906 3 +6909 8 +6917 5 +6922 7 +6929 4 +6933 6 +6939 7 +6946 5 +6951 5 +6956 5 +6961 9 +6970 8 +6978 5 +6983 8 +6991 5 +6996 6 +7002 7 +7009 3 +7012 8 +7020 10 +7030 3 +7033 6 +7039 6 +7045 8 +7053 5 +7058 7 +7065 5 +7070 4 +7074 9 +7083 10 +7093 6 +7099 5 +7104 4 +7108 12 +7120 8 +7128 2 +7130 3 +7133 2 +7135 11 +7146 12 +7158 6 +7164 9 +7173 12 +7185 8 +7193 5 +7198 4 +7202 7 +7209 3 +7212 4 +7216 8 +7224 3 +7227 4 +7231 5 +7236 7 +7243 5 +7248 7 +7255 3 +7258 10 +7268 8 +7276 3 +7279 8 +7287 11 +7298 2 +7300 8 +7308 6 +7314 6 +7320 9 +7329 4 +7333 11 +7344 6 +7350 4 +7354 5 +7359 4 +7363 9 +7372 1 +7373 10 +7383 4 +7387 8 +7395 7 +7402 8 +7410 9 +7419 4 +7423 3 +7426 6 +7432 5 +7437 7 +7444 9 +7453 8 +7461 6 +7467 10 +7477 8 +7485 13 +7498 4 +7502 6 +7508 7 +7515 10 +7525 7 +7532 4 +7536 3 +7539 3 +7542 10 +7552 5 +7557 6 +7563 6 +7569 3 +7572 7 +7579 9 +7588 5 +7593 8 +7601 7 +7608 7 +7615 7 +7622 5 +7627 5 +7632 6 +7638 7 +7645 6 +7651 6 +7657 10 +7667 6 +7673 4 +7677 5 +7682 8 +7690 6 +7696 8 +7704 9 +7713 2 +7715 3 +7718 9 +7727 4 +7731 4 +7735 6 +7741 6 +7747 9 +7756 6 +7762 3 +7765 4 +7769 12 +7781 4 +7785 4 +7789 6 +7795 7 +7802 3 +7805 1 +7806 7 +7813 2 +7815 4 +7819 3 +7822 5 +7827 9 +7836 8 +7844 9 +7853 8 +7861 6 +7867 2 +7869 4 +7873 8 +7881 5 +7886 9 +7895 3 +7898 10 +7908 2 +7910 8 +7918 6 +7924 7 +7931 4 +7935 7 +7942 3 +7945 6 +7951 8 +7959 6 +7965 11 +7976 6 +7982 9 +7991 4 +7995 2 +7997 7 +8004 5 +8009 5 +8014 7 +8021 8 +8029 7 +8036 4 +8040 4 +8044 11 +8055 11 +8066 6 +8072 6 +8078 9 +8087 3 +8090 6 +8096 9 +8105 6 +8111 4 +8115 6 +8121 4 +8125 4 +8129 5 +8134 8 +8142 10 +8152 5 +8157 4 +8161 6 +8167 7 +8174 6 +8180 3 +8183 6 +8189 5 +8194 10 +8204 4 +8208 6 +8214 5 +8219 3 +8222 5 +8227 8 +8235 8 +8243 4 +8247 4 +8251 4 +8255 11 +8266 10 +8276 6 +8282 6 +8288 8 +8296 3 +8299 4 +8303 6 +8309 5 +8314 9 +8323 3 +8326 3 +8329 9 +8338 6 +8344 7 +8351 5 +8356 4 +8360 7 +8367 11 +8378 4 +8382 6 +8388 9 +8397 8 +8405 8 +8413 4 +8417 6 +8423 9 +8432 1 +8433 3 +8436 7 +8443 5 +8448 4 +8452 6 +8458 3 +8461 4 +8465 4 +8469 5 +8474 5 +8479 4 +8483 5 +8488 5 +8493 3 +8496 7 +8503 5 +8508 9 +8517 6 +8523 3 +8526 3 +8529 6 +8535 4 +8539 7 +8546 8 +8554 7 +8561 4 +8565 5 +8570 6 +8576 6 +8582 6 +8588 6 +8594 6 +8600 6 +8606 4 +8610 3 +8613 5 +8618 4 +8622 8 +8630 2 +8632 8 +8640 5 +8645 6 +8651 4 +8655 5 +8660 4 +8664 7 +8671 3 +8674 7 +8681 3 +8684 5 +8689 7 +8696 3 +8699 5 +8704 5 +8709 5 +8714 6 +8720 9 +8729 5 +8734 6 +8740 2 +8742 4 +8746 9 +8755 5 +8760 8 +8768 4 +8772 10 +8782 5 +8787 7 +8794 7 +8801 3 +8804 4 +8808 5 +8813 10 +8823 4 +8827 8 +8835 8 +8843 5 +8848 4 +8852 4 +8856 5 +8861 7 +8868 10 +8878 5 +8883 3 +8886 2 +8888 4 +8892 8 +8900 5 +8905 3 +8908 4 +8912 7 +8919 12 +8931 9 +8940 6 +8946 5 +8951 5 +8956 7 +8963 12 +8975 10 +8985 8 +8993 9 +9002 10 +9012 6 +9018 11 +9029 5 +9034 4 +9038 9 +9047 6 +9053 12 +9065 6 +9071 6 +9077 2 +9079 1 +9080 6 +9086 3 +9089 6 +9095 8 +9103 5 +9108 6 +9114 10 +9124 2 +9126 10 +9136 5 +9141 4 +9145 4 +9149 4 +9153 4 +9157 8 +9165 7 +9172 12 +9184 2 +9186 5 +9191 6 +9197 4 +9201 4 +9205 5 +9210 5 +9215 5 +9220 14 +9234 5 +9239 4 +9243 5 +9248 3 +9251 3 +9254 7 +9261 5 +9266 6 +9272 7 +9279 6 +9285 5 +9290 6 +9296 4 +9300 7 +9307 8 +9315 5 +9320 2 +9322 4 +9326 7 +9333 9 +9342 7 +9349 4 +9353 7 +9360 3 +9363 2 +9365 3 +9368 7 +9375 5 +9380 4 +9384 4 +9388 4 +9392 3 +9395 3 +9398 5 +9403 9 +9412 7 +9419 4 +9423 5 +9428 3 +9431 6 +9437 6 +9443 2 +9445 7 +9452 4 +9456 9 +9465 4 +9469 5 +9474 6 +9480 4 +9484 12 +9496 6 +9502 7 +9509 8 +9517 6 +9523 1 +9524 5 +9529 5 +9534 5 +9539 5 +9544 4 +9548 3 +9551 11 +9562 4 +9566 6 +9572 4 +9576 6 +9582 5 +9587 4 +9591 3 +9594 3 +9597 3 +9600 9 +9609 6 +9615 4 +9619 7 +9626 5 +9631 4 +9635 4 +9639 8 +9647 6 +9653 9 +9662 5 +9667 7 +9674 6 +9680 8 +9688 2 +9690 6 +9696 4 +9700 5 +9705 8 +9713 6 +9719 4 +9723 9 +9732 9 +9741 9 +9750 2 +9752 3 +9755 6 +9761 8 +9769 4 +9773 7 +9780 3 +9783 5 +9788 4 +9792 1 +9793 8 +9801 6 +9807 11 +9818 4 +9822 8 +9830 5 +9835 8 +9843 6 +9849 6 +9855 8 +9863 9 +9872 7 +9879 2 +9881 5 +9886 6 +9892 5 +9897 4 +9901 14 +9915 5 +9920 5 +9925 8 +9933 10 +9943 5 +9948 5 +9953 5 +9958 5 +9963 5 +9968 7 +9975 3 +9978 4 +9982 6 +9988 5 +9993 6 +9999 11 +10010 7 +10017 5 +10022 4 +10026 6 +10032 7 +10039 5 +10044 6 +10050 4 +10054 7 +10061 9 +10070 7 +10077 4 +10081 6 +10087 3 +10090 5 +10095 6 +10101 4 +10105 13 +10118 5 +10123 4 +10127 10 +10137 8 +10145 6 +10151 9 +10160 3 +10163 2 +10165 12 +10177 10 +10187 9 +10196 3 +10199 11 +10210 13 +10223 5 +10228 7 +10235 6 +10241 5 +10246 2 +10248 3 +10251 6 +10257 9 +10266 6 +10272 6 +10278 8 +10286 7 +10293 2 +10295 3 +10298 9 +10307 5 +10312 5 +10317 6 +10323 5 +10328 9 +10337 6 +10343 7 +10350 9 +10359 7 +10366 5 +10371 7 +10378 9 +10387 4 +10391 7 +10398 6 +10404 2 +10406 4 +10410 10 +10420 9 +10429 10 +10439 4 +10443 4 +10447 4 +10451 3 +10454 6 +10460 5 +10465 8 +10473 6 +10479 6 +10485 6 +10491 7 +10498 7 +10505 11 +10516 6 +10522 9 +10531 4 +10535 5 +10540 7 +10547 6 +10553 3 +10556 5 +10561 4 +10565 11 +10576 6 +10582 7 +10589 3 +10592 4 +10596 5 +10601 8 +10609 3 +10612 7 +10619 9 +10628 5 +10633 3 +10636 11 +10647 5 +10652 5 +10657 8 +10665 5 +10670 8 +10678 5 +10683 2 +10685 9 +10694 7 +10701 6 +10707 5 +10712 5 +10717 7 +10724 5 +10729 3 +10732 3 +10735 7 +10742 5 +10747 4 +10751 9 +10760 7 +10767 11 +10778 9 +10787 5 +10792 6 +10798 6 +10804 5 +10809 5 +10814 6 +10820 5 +10825 5 +10830 11 +10841 6 +10847 5 +10852 5 +10857 7 +10864 5 +10869 12 +10881 7 +10888 7 +10895 4 +10899 2 +10901 5 +10906 6 +10912 9 +10921 2 +10923 7 +10930 5 +10935 4 +10939 7 +10946 10 +10956 10 +10966 4 +10970 7 +10977 6 +10983 7 +10990 6 +10996 2 +10998 3 +11001 5 +11006 4 +11010 6 +11016 5 +11021 5 +11026 6 +11032 6 +11038 3 +11041 9 +11050 7 +11057 5 +11062 2 +11064 5 +11069 5 +11074 8 +11082 9 +11091 4 +11095 6 +11101 6 +11107 9 +11116 5 +11121 5 +11126 4 +11130 2 +11132 7 +11139 4 +11143 6 +11149 7 +11156 3 +11159 5 +11164 4 +11168 1 +11169 8 +11177 7 +11184 5 +11189 6 +11195 2 +11197 7 +11204 4 +11208 8 +11216 4 +11220 5 +11225 8 +11233 9 +11242 3 +11245 5 +11250 11 +11261 6 +11267 4 +11271 9 +11280 11 +11291 4 +11295 5 +11300 6 +11306 9 +11315 1 +11316 5 +11321 7 +11328 4 +11332 3 +11335 3 +11338 5 +11343 5 +11348 7 +11355 2 +11357 5 +11362 5 +11367 9 +11376 7 +11383 6 +11389 9 +11398 8 +11406 9 +11415 5 +11420 16 +11436 1 +11437 8 +11445 7 +11452 6 +11458 11 +11469 7 +11476 5 +11481 11 +11492 3 +11495 3 +11498 5 +11503 3 +11506 7 +11513 7 +11520 5 +11525 7 +11532 5 +11537 11 +11548 3 +11551 2 +11553 6 +11559 7 +11566 6 +11572 6 +11578 8 +11586 7 +11593 7 +11600 6 +11606 7 +11613 9 +11622 10 +11632 7 +11639 10 +11649 8 +11657 6 +11663 7 +11670 5 +11675 11 +11686 10 +11696 13 +11709 6 +11715 6 +11721 12 +11733 5 +11738 3 +11741 4 +11745 6 +11751 6 +11757 13 +11770 6 +11776 6 +11782 5 +11787 2 +11789 6 +11795 5 +11800 4 +11804 7 +11811 8 +11819 3 +11822 7 +11829 7 +11836 7 +11843 9 +11852 2 +11854 2 +11856 7 +11863 5 +11868 6 +11874 4 +11878 7 +11885 2 +11887 4 +11891 4 +11895 3 +11898 5 +11903 6 +11909 3 +11912 7 +11919 6 +11925 8 +11933 2 +11935 9 +11944 5 +11949 6 +11955 5 +11960 3 +11963 13 +11976 8 +11984 6 +11990 3 +11993 4 +11997 3 +12000 7 +12007 6 +12013 9 +12022 4 +12026 11 +12037 4 +12041 6 +12047 6 +12053 9 +12062 4 +12066 3 +12069 6 +12075 7 +12082 3 +12085 5 +12090 8 +12098 6 +12104 4 +12108 8 +12116 4 +12120 11 +12131 6 +12137 7 +12144 3 +12147 8 +12155 8 +12163 3 +12166 6 +12172 5 +12177 3 +12180 5 +12185 6 +12191 3 +12194 7 +12201 8 +12209 3 +12212 2 +12214 5 +12219 4 +12223 2 +12225 8 +12233 4 +12237 5 +12242 3 +12245 5 +12250 6 +12256 5 +12261 8 +12269 4 +12273 6 +12279 4 +12283 7 +12290 5 +12295 8 +12303 5 +12308 3 +12311 6 +12317 6 +12323 4 +12327 7 +12334 7 +12341 1 +12342 8 +12350 4 +12354 4 +12358 6 +12364 5 +12369 13 +12382 3 +12385 7 +12392 4 +12396 7 +12403 10 +12413 8 +12421 9 +12430 9 +12439 6 +12445 6 +12451 10 +12461 6 +12467 8 +12475 3 +12478 9 +12487 11 +12498 4 +12502 6 +12508 4 +12512 4 +12516 4 +12520 5 +12525 8 +12533 4 +12537 5 +12542 5 +12547 6 +12553 4 +12557 8 +12565 8 +12573 6 +12579 5 +12584 5 +12589 6 +12595 4 +12599 4 +12603 2 +12605 8 +12613 4 +12617 5 +12622 4 +12626 6 +12632 5 +12637 12 +12649 5 +12654 6 +12660 9 +12669 5 +12674 5 +12679 7 +12686 6 +12692 5 +12697 4 +12701 6 +12707 5 +12712 5 +12717 6 +12723 5 +12728 8 +12736 10 +12746 6 +12752 7 +12759 6 +12765 4 +12769 6 +12775 6 +12781 13 +12794 6 +12800 11 +12811 4 +12815 8 +12823 7 +12830 7 +12837 6 +12843 6 +12849 7 +12856 5 +12861 10 +12871 10 +12881 8 +12889 9 +12898 4 +12902 6 +12908 9 +12917 8 +12925 9 +12934 4 +12938 4 +12942 7 +12949 2 +12951 7 +12958 1 +12959 9 +12968 8 +12976 8 +12984 1 +12985 4 +12989 4 +12993 4 +12997 8 +13005 4 +13009 4 +13013 8 +13021 9 +13030 6 +13036 8 +13044 3 +13047 8 +13055 5 +13060 7 +13067 8 +13075 7 +13082 2 +13084 5 +13089 9 +13098 5 +13103 7 +13110 6 +13116 6 +13122 6 +13128 6 +13134 7 +13141 5 +13146 8 +13154 12 +13166 4 +13170 6 +13176 4 +13180 6 +13186 5 +13191 3 +13194 6 +13200 9 +13209 4 +13213 5 +13218 8 +13226 6 +13232 3 +13235 8 +13243 7 +13250 8 +13258 5 +13263 5 +13268 4 +13272 7 +13279 3 +13282 11 +13293 7 +13300 6 +13306 5 +13311 5 +13316 6 +13322 9 +13331 2 +13333 6 +13339 7 +13346 7 +13353 6 +13359 6 +13365 5 +13370 4 +13374 6 +13380 9 +13389 11 +13400 5 +13405 8 +13413 4 +13417 4 +13421 9 +13430 4 +13434 9 +13443 3 +13446 7 +13453 6 +13459 6 +13465 1 +13466 7 +13473 7 +13480 6 +13486 5 +13491 7 +13498 3 +13501 6 +13507 5 +13512 4 +13516 8 +13524 2 +13526 4 +13530 5 +13535 3 +13538 5 +13543 5 +13548 5 +13553 3 +13556 4 +13560 7 +13567 4 +13571 4 +13575 8 +13583 9 +13592 6 +13598 7 +13605 1 +13606 9 +13615 9 +13624 10 +13634 4 +13638 3 +13641 9 +13650 8 +13658 5 +13663 7 +13670 4 +13674 12 +13686 2 +13688 3 +13691 5 +13696 5 +13701 10 +13711 4 +13715 4 +13719 7 +13726 5 +13731 4 +13735 9 +13744 7 +13751 5 +13756 4 +13760 8 +13768 8 +13776 9 +13785 7 +13792 7 +13799 6 +13805 6 +13811 7 +13818 11 +13829 7 +13836 6 +13842 5 +13847 6 +13853 7 +13860 10 +13870 4 +13874 3 +13877 4 +13881 4 +13885 6 +13891 6 +13897 8 +13905 10 +13915 9 +13924 6 +13930 2 +13932 4 +13936 6 +13942 10 +13952 8 +13960 4 +13964 12 +13976 6 +13982 5 +13987 6 +13993 5 +13998 3 +14001 7 +14008 7 +14015 10 +14025 3 +14028 6 +14034 6 +14040 2 +14042 3 +14045 5 +14050 6 +14056 4 +14060 7 +14067 9 +14076 1 +14077 6 +14083 5 +14088 4 +14092 7 +14099 9 +14108 2 +14110 14 +14124 7 +14131 4 +14135 7 +14142 8 +14150 3 +14153 5 +14158 7 +14165 11 +14176 6 +14182 8 +14190 5 +14195 8 +14203 6 +14209 5 +14214 5 +14219 2 +14221 4 +14225 3 +14228 4 +14232 5 +14237 5 +14242 5 +14247 3 +14250 9 +14259 7 +14266 4 +14270 6 +14276 6 +14282 3 +14285 2 +14287 8 +14295 6 +14301 6 +14307 2 +14309 7 +14316 6 +14322 5 +14327 9 +14336 3 +14339 6 +14345 7 +14352 2 +14354 7 +14361 6 +14367 8 +14375 6 +14381 6 +14387 2 +14389 3 +14392 4 +14396 5 +14401 3 +14404 6 +14410 7 +14417 4 +14421 4 +14425 5 +14430 4 +14434 11 +14445 8 +14453 5 +14458 5 +14463 8 +14471 3 +14474 5 +14479 5 +14484 15 +14499 6 +14505 7 +14512 5 +14517 5 +14522 2 +14524 3 +14527 2 +14529 9 +14538 9 +14547 6 +14553 4 +14557 11 +14568 6 +14574 7 +14581 5 +14586 2 +14588 3 +14591 8 +14599 3 +14602 5 +14607 5 +14612 6 +14618 2 +14620 4 +14624 3 +14627 9 +14636 5 +14641 2 +14643 12 +14655 7 +14662 4 +14666 8 +14674 4 +14678 9 +14687 5 +14692 6 +14698 9 +14707 3 +14710 9 +14719 4 +14723 5 +14728 5 +14733 2 +14735 5 +14740 4 +14744 11 +14755 7 +14762 7 +14769 12 +14781 4 +14785 8 +14793 8 +14801 5 +14806 4 +14810 4 +14814 6 +14820 8 +14828 9 +14837 4 +14841 4 +14845 6 +14851 3 +14854 6 +14860 9 +14869 7 +14876 8 +14884 7 +14891 6 +14897 6 +14903 5 +14908 6 +14914 8 +14922 5 +14927 3 +14930 7 +14937 4 +14941 8 +14949 8 +14957 7 +14964 7 +14971 7 +14978 7 +14985 8 +14993 6 +14999 6 +15005 10 +15015 5 +15020 4 +15024 8 +15032 7 +15039 3 +15042 6 +15048 7 +15055 5 +15060 9 +15069 13 +15082 6 +15088 5 +15093 4 +15097 5 +15102 6 +15108 6 +15114 6 +15120 10 +15130 7 +15137 9 +15146 5 +15151 8 +15159 9 +15168 10 +15178 10 +15188 4 +15192 4 +15196 4 +15200 7 +15207 6 +15213 12 +15225 3 +15228 7 +15235 3 +15238 6 +15244 8 +15252 5 +15257 10 +15267 7 +15274 6 +15280 2 +15282 5 +15287 2 +15289 4 +15293 3 +15296 5 +15301 8 +15309 9 +15318 4 +15322 3 +15325 4 +15329 5 +15334 3 +15337 5 +15342 2 +15344 4 +15348 11 +15359 3 +15362 8 +15370 7 +15377 4 +15381 7 +15388 7 +15395 5 +15400 6 +15406 9 +15415 4 +15419 10 +15429 9 +15438 4 +15442 2 +15444 6 +15450 6 +15456 12 +15468 7 +15475 5 +15480 6 +15486 3 +15489 5 +15494 6 +15500 5 +15505 3 +15508 3 +15511 1 +15512 10 +15522 8 +15530 6 +15536 4 +15540 3 +15543 8 +15551 5 +15556 4 +15560 9 +15569 10 +15579 6 +15585 11 +15596 10 +15606 9 +15615 12 +15627 9 +15636 4 +15640 4 +15644 4 +15648 11 +15659 4 +15663 5 +15668 4 +15672 5 +15677 5 +15682 1 +15683 3 +15686 4 +15690 7 +15697 7 +15704 6 +15710 6 +15716 4 +15720 4 +15724 10 +15734 7 +15741 5 +15746 8 +15754 5 +15759 5 +15764 4 +15768 6 +15774 8 +15782 2 +15784 6 +15790 5 +15795 4 +15799 5 +15804 5 +15809 9 +15818 6 +15824 6 +15830 3 +15833 10 +15843 7 +15850 4 +15854 5 +15859 6 +15865 9 +15874 4 +15878 4 +15882 5 +15887 7 +15894 5 +15899 6 +15905 8 +15913 8 +15921 4 +15925 6 +15931 10 +15941 7 +15948 4 +15952 5 +15957 7 +15964 10 +15974 1 +15975 1 +15976 6 +15982 2 +15984 11 +15995 6 +16001 3 +16004 4 +16008 5 +16013 6 +16019 6 +16025 11 +16036 4 +16040 3 +16043 4 +16047 4 +16051 4 +16055 6 +16061 3 +16064 4 +16068 3 +16071 7 +16078 10 +16088 11 +16099 5 +16104 7 +16111 8 +16119 8 +16127 6 +16133 3 +16136 3 +16139 8 +16147 4 +16151 4 +16155 11 +16166 6 +16172 4 +16176 12 +16188 5 +16193 2 +16195 2 +16197 2 +16199 7 +16206 1 +16207 8 +16215 7 +16222 4 +16226 6 +16232 6 +16238 7 +16245 8 +16253 6 +16259 6 +16265 4 +16269 5 +16274 5 +16279 7 +16286 3 +16289 9 +16298 4 +16302 5 +16307 7 +16314 6 +16320 5 +16325 4 +16329 8 +16337 6 +16343 5 +16348 7 +16355 3 +16358 3 +16361 4 +16365 6 +16371 4 +16375 9 +16384 4 +16388 5 +16393 3 +16396 7 +16403 5 +16408 9 +16417 9 +16426 7 +16433 7 +16440 9 +16449 3 +16452 5 +16457 1 +16458 10 +16468 10 +16478 9 +16487 4 +16491 6 +16497 9 +16506 11 +16517 7 +16524 9 +16533 6 +16539 9 +16548 8 +16556 8 +16564 5 +16569 8 +16577 4 +16581 4 +16585 5 +16590 5 +16595 9 +16604 7 +16611 8 +16619 3 +16622 12 +16634 13 +16647 3 +16650 2 +16652 4 +16656 7 +16663 7 +16670 4 +16674 3 +16677 6 +16683 6 +16689 6 +16695 5 +16700 10 +16710 3 +16713 7 +16720 6 +16726 6 +16732 6 +16738 12 +16750 5 +16755 3 +16758 4 +16762 4 +16766 10 +16776 6 +16782 10 +16792 5 +16797 5 +16802 4 +16806 4 +16810 7 +16817 2 +16819 6 +16825 7 +16832 8 +16840 9 +16849 5 +16854 3 +16857 10 +16867 6 +16873 8 +16881 7 +16888 8 +16896 10 +16906 9 +16915 6 +16921 3 +16924 7 +16931 5 +16936 6 +16942 7 +16949 11 +16960 3 +16963 8 +16971 6 +16977 8 +16985 4 +16989 8 +16997 8 +17005 5 +17010 3 +17013 6 +17019 5 +17024 4 +17028 7 +17035 2 +17037 7 +17044 3 +17047 5 +17052 5 +17057 3 +17060 8 +17068 6 +17074 8 +17082 4 +17086 6 +17092 9 +17101 5 +17106 4 +17110 6 +17116 6 +17122 7 +17129 8 +17137 7 +17144 7 +17151 9 +17160 6 +17166 3 +17169 3 +17172 4 +17176 5 +17181 4 +17185 4 +17189 4 +17193 7 +17200 14 +17214 6 +17220 4 +17224 5 +17229 5 +17234 8 +17242 10 +17252 8 +17260 4 +17264 6 +17270 12 +17282 7 +17289 9 +17298 10 +17308 6 +17314 5 +17319 5 +17324 4 +17328 4 +17332 7 +17339 4 +17343 8 +17351 4 +17355 7 +17362 8 +17370 5 +17375 4 +17379 5 +17384 4 +17388 8 +17396 5 +17401 5 +17406 6 +17412 4 +17416 6 +17422 6 +17428 9 +17437 7 +17444 8 +17452 4 +17456 3 +17459 4 +17463 9 +17472 5 +17477 8 +17485 5 +17490 6 +17496 6 +17502 8 +17510 6 +17516 8 +17524 5 +17529 8 +17537 5 +17542 6 +17548 7 +17555 5 +17560 4 +17564 5 +17569 6 +17575 4 +17579 4 +17583 3 +17586 4 +17590 13 +17603 4 +17607 8 +17615 2 +17617 19 +17636 7 +17643 3 +17646 6 +17652 7 +17659 6 +17665 3 +17668 5 +17673 7 +17680 5 +17685 7 +17692 8 +17700 4 +17704 5 +17709 2 +17711 4 +17715 5 +17720 11 +17731 5 +17736 11 +17747 6 +17753 3 +17756 5 +17761 5 +17766 13 +17779 4 +17783 5 +17788 8 +17796 5 +17801 10 +17811 6 +17817 9 +17826 3 +17829 7 +17836 7 +17843 4 +17847 3 +17850 6 +17856 6 +17862 8 +17870 3 +17873 5 +17878 4 +17882 7 +17889 5 +17894 11 +17905 6 +17911 4 +17915 5 +17920 7 +17927 3 +17930 9 +17939 7 +17946 9 +17955 4 +17959 7 +17966 5 +17971 5 +17976 5 +17981 18 +17999 8 +18007 3 +18010 6 +18016 7 +18023 3 +18026 8 +18034 7 +18041 8 +18049 5 +18054 4 +18058 10 +18068 2 +18070 9 +18079 5 +18084 5 +18089 6 +18095 4 +18099 3 +18102 11 +18113 5 +18118 6 +18124 4 +18128 2 +18130 9 +18139 5 +18144 11 +18155 7 +18162 3 +18165 5 +18170 4 +18174 4 +18178 5 +18183 6 +18189 6 +18195 6 +18201 6 +18207 9 +18216 4 +18220 9 +18229 3 +18232 5 +18237 6 +18243 7 +18250 8 +18258 4 +18262 11 +18273 5 +18278 7 +18285 3 +18288 3 +18291 7 +18298 6 +18304 4 +18308 2 +18310 2 +18312 10 +18322 5 +18327 6 +18333 3 +18336 9 +18345 3 +18348 5 +18353 9 +18362 6 +18368 5 +18373 6 +18379 5 +18384 10 +18394 6 +18400 3 +18403 4 +18407 7 +18414 5 +18419 6 +18425 6 +18431 5 +18436 4 +18440 5 +18445 8 +18453 11 +18464 10 +18474 10 +18484 5 +18489 3 +18492 9 +18501 9 +18510 3 +18513 8 +18521 7 +18528 4 +18532 3 +18535 5 +18540 7 +18547 6 +18553 3 +18556 7 +18563 7 +18570 8 +18578 4 +18582 3 +18585 12 +18597 6 +18603 8 +18611 7 +18618 5 +18623 4 +18627 5 +18632 10 +18642 5 +18647 5 +18652 4 +18656 7 +18663 9 +18672 8 +18680 5 +18685 5 +18690 3 +18693 7 +18700 8 +18708 6 +18714 10 +18724 6 +18730 7 +18737 2 +18739 4 +18743 9 +18752 6 +18758 5 +18763 8 +18771 3 +18774 13 +18787 8 +18795 9 +18804 5 +18809 4 +18813 5 +18818 5 +18823 5 +18828 4 +18832 8 +18840 2 +18842 7 +18849 6 +18855 8 +18863 8 +18871 6 +18877 5 +18882 9 +18891 5 +18896 11 +18907 8 +18915 3 +18918 5 +18923 8 +18931 7 +18938 6 +18944 5 +18949 2 +18951 4 +18955 7 +18962 6 +18968 5 +18973 5 +18978 5 +18983 10 +18993 6 +18999 9 +19008 9 +19017 5 +19022 6 +19028 4 +19032 7 +19039 5 +19044 4 +19048 7 +19055 7 +19062 2 +19064 12 +19076 6 +19082 5 +19087 3 +19090 8 +19098 3 +19101 5 +19106 7 +19113 3 +19116 5 +19121 4 +19125 6 +19131 4 +19135 5 +19140 8 +19148 5 +19153 7 +19160 9 +19169 4 +19173 8 +19181 2 +19183 5 +19188 6 +19194 5 +19199 7 +19206 1 +19207 3 +19210 2 +19212 6 +19218 7 +19225 3 +19228 4 +19232 8 +19240 6 +19246 10 +19256 5 +19261 8 +19269 9 +19278 9 +19287 3 +19290 6 +19296 3 +19299 2 +19301 9 +19310 3 +19313 8 +19321 4 +19325 6 +19331 7 +19338 8 +19346 5 +19351 10 +19361 2 +19363 6 +19369 4 +19373 5 +19378 8 +19386 10 +19396 4 +19400 8 +19408 7 +19415 5 +19420 7 +19427 6 +19433 5 +19438 6 +19444 5 +19449 6 +19455 7 +19462 5 +19467 4 +19471 8 +19479 2 +19481 6 +19487 4 +19491 3 +19494 4 +19498 3 +19501 2 +19503 7 +19510 6 +19516 2 +19518 8 +19526 9 +19535 8 +19543 2 +19545 3 +19548 10 +19558 4 +19562 7 +19569 5 +19574 5 +19579 5 +19584 6 +19590 3 +19593 6 +19599 6 +19605 4 +19609 6 +19615 8 +19623 5 +19628 7 +19635 6 +19641 6 +19647 4 +19651 8 +19659 6 +19665 4 +19669 3 +19672 9 +19681 6 +19687 6 +19693 4 +19697 6 +19703 8 +19711 6 +19717 6 +19723 7 +19730 3 +19733 5 +19738 4 +19742 5 +19747 5 +19752 11 +19763 8 +19771 6 +19777 9 +19786 6 +19792 11 +19803 4 +19807 8 +19815 6 +19821 8 +19829 7 +19836 9 +19845 3 +19848 5 +19853 5 +19858 10 +19868 3 +19871 4 +19875 10 +19885 5 +19890 8 +19898 5 +19903 5 +19908 5 +19913 4 +19917 6 +19923 4 +19927 5 +19932 6 +19938 3 +19941 8 +19949 7 +19956 6 +19962 7 +19969 4 +19973 6 +19979 4 +19983 10 +19993 8 +20001 16 +20017 5 +20022 6 +20028 11 +20039 6 +20045 6 +20051 8 +20059 5 +20064 4 +20068 9 +20077 9 +20086 10 +20096 9 +20105 6 +20111 4 +20115 5 +20120 3 +20123 7 +20130 9 +20139 4 +20143 2 +20145 7 +20152 7 +20159 6 +20165 7 +20172 10 +20182 6 +20188 2 +20190 5 +20195 7 +20202 6 +20208 6 +20214 5 +20219 3 +20222 2 +20224 4 +20228 7 +20235 4 +20239 5 +20244 8 +20252 3 +20255 6 +20261 9 +20270 9 +20279 2 +20281 8 +20289 2 +20291 3 +20294 3 +20297 3 +20300 4 +20304 7 +20311 7 +20318 6 +20324 4 +20328 4 +20332 4 +20336 10 +20346 7 +20353 3 +20356 5 +20361 3 +20364 6 +20370 4 +20374 4 +20378 3 +20381 5 +20386 5 +20391 4 +20395 5 +20400 4 +20404 5 +20409 1 +20410 5 +20415 2 +20417 5 +20422 7 +20429 4 +20433 2 +20435 6 +20441 5 +20446 4 +20450 7 +20457 8 +20465 2 +20467 10 +20477 5 +20482 5 +20487 8 +20495 11 +20506 5 +20511 4 +20515 3 +20518 6 +20524 12 +20536 3 +20539 12 +20551 8 +20559 4 +20563 10 +20573 8 +20581 3 +20584 7 +20591 6 +20597 4 +20601 10 +20611 7 +20618 5 +20623 4 +20627 5 +20632 8 +20640 6 +20646 3 +20649 9 +20658 2 +20660 7 +20667 4 +20671 5 +20676 7 +20683 8 +20691 4 +20695 6 +20701 4 +20705 8 +20713 8 +20721 4 +20725 5 +20730 7 +20737 4 +20741 5 +20746 4 +20750 2 +20752 8 +20760 8 +20768 10 +20778 7 +20785 7 +20792 10 +20802 3 +20805 3 +20808 10 +20818 4 +20822 6 +20828 7 +20835 4 +20839 8 +20847 5 +20852 12 +20864 2 +20866 7 +20873 3 +20876 5 +20881 3 +20884 8 +20892 4 +20896 7 +20903 5 +20908 4 +20912 10 +20922 5 +20927 10 +20937 10 +20947 6 +20953 3 +20956 6 +20962 4 +20966 5 +20971 8 +20979 4 +20983 7 +20990 5 +20995 10 +21005 7 +21012 10 +21022 3 +21025 7 +21032 5 +21037 12 +21049 8 +21057 4 +21061 5 +21066 2 +21068 10 +21078 3 +21081 4 +21085 5 +21090 4 +21094 9 +21103 4 +21107 7 +21114 7 +21121 6 +21127 1 +21128 9 +21137 7 +21144 10 +21154 4 +21158 5 +21163 6 +21169 5 +21174 4 +21178 4 +21182 5 +21187 6 +21193 11 +21204 7 +21211 7 +21218 10 +21228 5 +21233 13 +21246 10 +21256 3 +21259 5 +21264 9 +21273 6 +21279 11 +21290 8 +21298 3 +21301 3 +21304 12 +21316 7 +21323 5 +21328 6 +21334 4 +21338 7 +21345 7 +21352 6 +21358 3 +21361 8 +21369 6 +21375 5 +21380 5 +21385 6 +21391 6 +21397 6 +21403 6 +21409 9 +21418 6 +21424 12 +21436 5 +21441 3 +21444 7 +21451 9 +21460 9 +21469 10 +21479 5 +21484 5 +21489 8 +21497 6 +21503 4 +21507 2 +21509 5 +21514 4 +21518 4 +21522 6 +21528 11 +21539 6 +21545 6 +21551 6 +21557 11 +21568 3 +21571 4 +21575 3 +21578 6 +21584 7 +21591 3 +21594 8 +21602 7 +21609 9 +21618 4 +21622 7 +21629 8 +21637 8 +21645 4 +21649 4 +21653 2 +21655 6 +21661 6 +21667 5 +21672 5 +21677 8 +21685 6 +21691 6 +21697 4 +21701 4 +21705 3 +21708 12 +21720 6 +21726 9 +21735 5 +21740 6 +21746 5 +21751 6 +21757 12 +21769 5 +21774 6 +21780 4 +21784 8 +21792 3 +21795 3 +21798 6 +21804 7 +21811 5 +21816 11 +21827 7 +21834 3 +21837 6 +21843 9 +21852 8 +21860 6 +21866 4 +21870 5 +21875 6 +21881 9 +21890 3 +21893 5 +21898 4 +21902 5 +21907 8 +21915 6 +21921 3 +21924 5 +21929 6 +21935 5 +21940 5 +21945 7 +21952 7 +21959 4 +21963 8 +21971 4 +21975 6 +21981 3 +21984 9 +21993 6 +21999 5 +22004 6 +22010 9 +22019 8 +22027 2 +22029 7 +22036 3 +22039 5 +22044 3 +22047 7 +22054 10 +22064 3 +22067 2 +22069 5 +22074 6 +22080 4 +22084 5 +22089 7 +22096 4 +22100 8 +22108 4 +22112 10 +22122 4 +22126 6 +22132 3 +22135 4 +22139 9 +22148 5 +22153 4 +22157 5 +22162 5 +22167 8 +22175 5 +22180 4 +22184 5 +22189 4 +22193 4 +22197 5 +22202 10 +22212 8 +22220 5 +22225 4 +22229 8 +22237 8 +22245 8 +22253 2 +22255 4 +22259 9 +22268 10 +22278 3 +22281 9 +22290 4 +22294 4 +22298 7 +22305 6 +22311 4 +22315 7 +22322 3 +22325 6 +22331 3 +22334 5 +22339 9 +22348 4 +22352 5 +22357 6 +22363 5 +22368 4 +22372 5 +22377 7 +22384 2 +22386 7 +22393 8 +22401 7 +22408 9 +22417 8 +22425 3 +22428 4 +22432 7 +22439 6 +22445 4 +22449 9 +22458 6 +22464 5 +22469 7 +22476 10 +22486 5 +22491 5 +22496 10 +22506 8 +22514 8 +22522 8 +22530 8 +22538 5 +22543 9 +22552 2 +22554 2 +22556 6 +22562 7 +22569 9 +22578 3 +22581 7 +22588 8 +22596 3 +22599 6 +22605 3 +22608 4 +22612 6 +22618 13 +22631 7 +22638 5 +22643 8 +22651 8 +22659 9 +22668 7 +22675 2 +22677 12 +22689 14 +22703 10 +22713 4 +22717 7 +22724 3 +22727 5 +22732 9 +22741 9 +22750 11 +22761 4 +22765 5 +22770 12 +22782 4 +22786 8 +22794 4 +22798 7 +22805 6 +22811 4 +22815 8 +22823 4 +22827 7 +22834 2 +22836 7 +22843 9 +22852 2 +22854 10 +22864 6 +22870 7 +22877 8 +22885 7 +22892 2 +22894 4 +22898 9 +22907 7 +22914 8 +22922 7 +22929 5 +22934 5 +22939 4 +22943 7 +22950 8 +22958 3 +22961 7 +22968 5 +22973 4 +22977 5 +22982 7 +22989 4 +22993 4 +22997 7 +23004 4 +23008 5 +23013 7 +23020 6 +23026 3 +23029 7 +23036 9 +23045 3 +23048 3 +23051 5 +23056 5 +23061 5 +23066 6 +23072 7 +23079 4 +23083 4 +23087 9 +23096 4 +23100 6 +23106 5 +23111 6 +23117 7 +23124 5 +23129 5 +23134 3 +23137 2 +23139 8 +23147 13 +23160 6 +23166 4 +23170 5 +23175 4 +23179 3 +23182 6 +23188 6 +23194 5 +23199 4 +23203 6 +23209 4 +23213 12 +23225 4 +23229 7 +23236 10 +23246 9 +23255 6 +23261 9 +23270 9 +23279 8 +23287 3 +23290 7 +23297 5 +23302 3 +23305 2 +23307 7 +23314 7 +23321 6 +23327 10 +23337 4 +23341 9 +23350 6 +23356 6 +23362 7 +23369 12 +23381 4 +23385 5 +23390 3 +23393 10 +23403 10 +23413 4 +23417 6 +23423 3 +23426 9 +23435 6 +23441 7 +23448 5 +23453 6 +23459 5 +23464 3 +23467 8 +23475 2 +23477 10 +23487 3 +23490 4 +23494 5 +23499 8 +23507 4 +23511 7 +23518 8 +23526 4 +23530 6 +23536 8 +23544 6 +23550 7 +23557 3 +23560 7 +23567 2 +23569 6 +23575 9 +23584 12 +23596 5 +23601 8 +23609 7 +23616 6 +23622 6 +23628 4 +23632 10 +23642 8 +23650 7 +23657 1 +23658 8 +23666 8 +23674 4 +23678 8 +23686 6 +23692 9 +23701 8 +23709 7 +23716 9 +23725 8 +23733 9 +23742 5 +23747 4 +23751 5 +23756 10 +23766 6 +23772 4 +23776 6 +23782 3 +23785 6 +23791 7 +23798 9 +23807 9 +23816 13 +23829 10 +23839 6 +23845 3 +23848 6 +23854 7 +23861 6 +23867 4 +23871 11 +23882 12 +23894 3 +23897 2 +23899 8 +23907 3 +23910 8 +23918 7 +23925 6 +23931 5 +23936 6 +23942 11 +23953 6 +23959 4 +23963 4 +23967 5 +23972 7 +23979 8 +23987 5 +23992 5 +23997 6 +24003 4 +24007 8 +24015 9 +24024 5 +24029 4 +24033 3 +24036 4 +24040 2 +24042 7 +24049 2 +24051 6 +24057 1 +24058 9 +24067 8 +24075 6 +24081 3 +24084 9 +24093 6 +24099 8 +24107 9 +24116 6 +24122 5 +24127 4 +24131 8 +24139 6 +24145 7 +24152 3 +24155 8 +24163 5 +24168 6 +24174 6 +24180 4 +24184 8 +24192 5 +24197 5 +24202 6 +24208 7 +24215 5 +24220 3 +24223 11 +24234 12 +24246 12 +24258 3 +24261 9 +24270 6 +24276 5 +24281 5 +24286 8 +24294 4 +24298 8 +24306 5 +24311 7 +24318 3 +24321 8 +24329 5 +24334 3 +24337 7 +24344 7 +24351 5 +24356 7 +24363 4 +24367 6 +24373 3 +24376 8 +24384 3 +24387 7 +24394 10 +24404 3 +24407 5 +24412 6 +24418 4 +24422 4 +24426 2 +24428 3 +24431 9 +24440 8 +24448 7 +24455 5 +24460 11 +24471 7 +24478 7 +24485 5 +24490 10 +24500 4 +24504 7 +24511 6 +24517 13 +24530 10 +24540 7 +24547 8 +24555 4 +24559 2 +24561 9 +24570 2 +24572 4 + +0 + +24576 +2539 2 +1187 5 +3911 2 +585 8 +1498 10 +1681 2 +2115 7 +2424 1 +3708 7 +196 1 +1852 10 +3555 8 +2134 1 +1064 9 +1293 8 +944 9 +2413 3 +1678 2 +839 9 +297 1 +174 7 +2217 9 +51 8 +3195 6 +3215 5 +332 3 +2077 7 +1214 2 +2367 10 +1947 10 +2350 6 +3441 1 +3246 7 +1999 1 +2037 5 +2227 8 +101 7 +3340 9 +3713 7 +3013 4 +1001 3 +444 6 +3306 2 +4043 1 +1361 1 +3916 6 +365 4 +1485 8 +251 8 +234 2 +4042 2 +870 7 +3803 9 +3874 4 +1058 5 +831 3 +2331 6 +1328 1 +2525 4 +255 3 +381 1 +2521 1 +3946 5 +2449 4 +285 2 +3848 4 +2669 9 +3949 3 +1050 4 +2855 9 +1974 3 +349 7 +2874 6 +192 6 +3442 4 +265 1 +2281 4 +403 6 +2359 5 +319 8 +39 1 +3893 3 +1176 1 +3154 10 +866 9 +2670 9 +3934 6 +3799 5 +393 8 +2722 10 +2107 4 +185 3 +69 1 +1958 4 +1613 2 +1908 10 +3867 5 +2950 2 +3397 10 +3737 1 +1074 9 +234 2 +2795 8 +1452 8 +1437 2 +768 7 +3400 1 +1212 6 +2675 7 +989 4 +1338 6 +764 5 +216 3 +2186 3 +2210 9 +2194 1 +1703 3 +2668 5 +3684 3 +3636 6 +3939 5 +3718 2 +3954 10 +4009 10 +703 8 +2990 8 +2162 4 +3980 1 +1245 8 +2488 1 +2391 3 +3774 9 +3238 5 +1534 4 +3440 3 +2611 6 +2878 7 +1931 8 +3668 9 +3139 10 +3822 10 +2184 3 +82 6 +3317 1 +1702 3 +4087 10 +519 3 +1944 1 +3830 9 +3563 10 +2150 5 +3735 9 +1158 2 +3265 9 +2571 6 +2587 4 +2073 3 +405 6 +3865 3 +42 4 +2358 9 +2632 1 +1629 5 +2968 10 +3160 8 +1934 7 +1108 3 +2324 9 +1923 4 +2536 10 +3112 3 +3817 1 +4008 3 +2118 10 +1034 6 +3094 8 +3868 9 +2484 6 +3791 7 +1456 5 +2643 5 +462 9 +1481 8 +1788 10 +811 5 +1441 10 +2258 6 +3559 5 +2816 2 +3886 1 +428 9 +2442 8 +873 2 +3460 2 +989 7 +2897 9 +1464 7 +1525 4 +685 7 +3906 4 +678 7 +1824 2 +2256 8 +1016 9 +3705 1 +3368 10 +136 1 +1154 8 +2478 10 +3323 2 +104 10 +932 7 +3100 8 +2465 5 +491 9 +1735 3 +1031 3 +2790 1 +1423 5 +2939 6 +1829 9 +1241 3 +386 4 +1934 8 +2883 9 +14 1 +686 2 +992 5 +3564 8 +551 10 +2074 3 +2344 1 +3593 9 +1103 6 +2668 6 +696 5 +4019 4 +1708 1 +2519 3 +3455 8 +28 4 +3639 8 +1977 7 +2429 5 +3549 7 +468 10 +2801 10 +848 7 +959 9 +2410 6 +3898 9 +2059 3 +1938 9 +3544 1 +3513 9 +1136 1 +302 4 +1589 7 +305 1 +3199 2 +847 4 +3900 6 +2632 6 +2193 6 +442 7 +3972 1 +3426 4 +1500 3 +1723 5 +2849 1 +2498 4 +3104 4 +3131 5 +1198 2 +1492 10 +2112 6 +1202 2 +2284 10 +1672 10 +3115 3 +2934 4 +990 4 +434 8 +3372 6 +1974 6 +2729 9 +3517 3 +2286 6 +1761 1 +3637 3 +3058 4 +1178 2 +985 4 +3 8 +939 6 +445 3 +1807 9 +2728 7 +1861 5 +2716 5 +3316 3 +2836 5 +174 3 +1190 4 +1061 9 +2375 6 +3599 9 +1048 3 +3021 8 +1421 5 +2090 10 +1289 6 +971 1 +3560 4 +1817 2 +3691 1 +2572 6 +1938 9 +576 6 +3178 3 +3265 6 +3747 6 +1332 1 +2812 9 +3574 5 +2033 7 +1103 4 +2806 4 +2506 5 +686 3 +3917 3 +350 5 +2609 6 +1906 7 +3969 10 +3419 10 +3338 10 +1448 9 +1050 3 +1080 5 +3620 4 +1286 10 +2202 5 +4079 7 +3722 3 +1210 7 +3678 2 +1323 7 +2341 7 +320 2 +3506 7 +649 4 +2993 5 +1165 6 +1384 9 +335 7 +2002 2 +302 7 +1502 1 +4049 9 +2628 3 +259 2 +2500 1 +2022 9 +541 9 +2910 2 +4089 6 +3356 1 +2474 9 +1941 2 +1025 2 +3026 10 +2314 6 +2102 6 +1122 7 +1833 10 +1692 1 +1372 3 +1302 4 +3883 10 +2310 9 +3151 9 +2447 2 +1205 5 +276 2 +2431 8 +611 3 +512 8 +1134 10 +758 2 +2418 6 +276 10 +2592 1 +1655 8 +2181 1 +3243 10 +2191 3 +455 4 +1130 5 +2880 8 +740 1 +635 6 +932 9 +3178 8 +1032 9 +89 6 +414 1 +730 9 +16 1 +3631 9 +1411 6 +2356 5 +2474 5 +3025 4 +3876 8 +2897 7 +957 5 +2621 6 +1568 8 +2610 8 +3253 7 +1169 1 +3292 4 +1035 2 +1417 5 +3613 10 +1063 5 +1779 7 +360 2 +208 3 +1014 7 +894 8 +2599 7 +4076 3 +3329 6 +2497 10 +1110 5 +803 8 +3322 10 +3100 7 +1921 8 +3077 2 +1052 7 +2808 5 +3802 9 +2708 9 +3412 1 +690 9 +2266 3 +112 3 +765 4 +3276 3 +3823 5 +181 9 +457 1 +299 6 +934 5 +3422 7 +3718 4 +1793 6 +3672 8 +2858 2 +3801 3 +1693 8 +3711 4 +2917 1 +291 6 +3209 1 +334 10 +3287 6 +626 5 +915 3 +2886 6 +236 3 +1390 10 +2523 8 +1386 10 +3340 2 +4047 7 +303 8 +230 2 +2390 8 +1983 5 +2897 2 +3922 3 +954 3 +3004 4 +3912 10 +393 1 +1768 3 +2783 2 +1522 6 +4055 8 +3429 6 +3884 2 +25 6 +3606 3 +3813 7 +2176 9 +2774 10 +2829 1 +2858 7 +3722 8 +1468 6 +1208 5 +3466 7 +446 2 +1824 4 +4056 8 +1036 5 +985 4 +2979 3 +3919 6 +479 3 +3896 5 +128 3 +2928 9 +1208 1 +1356 10 +928 10 +787 5 +3418 6 +421 8 +1985 3 +2218 3 +3452 1 +2255 3 +405 7 +3265 4 +2763 4 +641 10 +3202 1 +3754 8 +1949 3 +3120 10 +2017 9 +1932 9 +2302 9 +2060 9 +773 5 +3294 1 +2044 2 +2277 10 +3755 10 +3620 6 +69 6 +2237 4 +3696 3 +2141 7 +1698 7 +2629 7 +2951 1 +1211 8 +3830 3 +1858 3 +2153 10 +2512 9 +3088 10 +3996 3 +423 8 +584 7 +383 10 +2355 1 +2140 5 +954 4 +99 4 +1575 4 +2552 2 +405 4 +1175 10 +1124 10 +3839 8 +1711 6 +3475 8 +1104 5 +2724 4 +1185 4 +1081 9 +2892 8 +1177 10 +2260 8 +1362 1 +1979 3 +2161 4 +3940 7 +694 3 +254 1 +966 6 +3083 5 +920 6 +3555 6 +1233 6 +947 6 +3804 6 +1611 2 +951 1 +3524 10 +94 4 +3332 5 +3542 10 +152 7 +289 1 +539 9 +566 10 +3745 8 +2949 10 +2114 8 +2206 1 +364 5 +3081 4 +2286 9 +3450 1 +2703 10 +5 7 +1851 3 +2618 6 +1958 1 +550 3 +2220 3 +375 7 +3322 10 +3901 10 +2296 4 +732 1 +3721 8 +3064 1 +3315 5 +2066 10 +2566 7 +593 10 +36 10 +1177 2 +2225 9 +1485 8 +392 6 +3144 7 +2170 5 +2052 5 +1235 7 +801 2 +3439 1 +2565 9 +3646 7 +893 3 +1991 3 +2220 1 +1540 5 +1493 4 +3384 5 +1115 4 +488 6 +568 8 +1240 3 +4030 2 +3376 2 +3660 1 +2790 5 +3528 8 +1131 10 +1932 7 +2690 10 +3852 7 +2833 3 +785 1 +705 10 +2183 9 +3411 6 +2966 6 +2765 1 +3756 1 +199 4 +817 3 +3221 1 +1154 9 +1610 9 +1224 6 +3511 6 +3245 5 +75 3 +1353 8 +2848 7 +2353 4 +268 10 +374 8 +2591 9 +2501 8 +953 5 +2335 3 +1304 1 +407 1 +1556 9 +2965 3 +1263 7 +2258 4 +138 3 +1237 5 +1719 6 +1272 6 +1867 6 +3052 9 +2829 10 +515 10 +1874 9 +1699 8 +3351 2 +1303 10 +2853 9 +866 6 +3533 1 +895 2 +2287 9 +1954 9 +3352 9 +3760 2 +1026 9 +2074 6 +1529 4 +868 2 +3551 9 +3603 8 +1589 3 +2230 3 +1141 7 +3914 8 +3396 3 +1997 6 +898 10 +3176 8 +3063 7 +2957 5 +194 10 +2959 2 +1616 9 +686 1 +921 9 +2578 10 +3986 4 +2293 3 +2529 6 +722 7 +1783 3 +594 1 +2188 7 +1317 6 +992 1 +2754 3 +3113 7 +205 4 +3815 5 +3076 8 +1205 9 +1703 4 +3901 4 +1627 8 +2490 6 +524 4 +4031 10 +3070 1 +4004 9 +652 8 +891 8 +765 2 +248 9 +836 4 +2567 7 +1083 8 +1743 7 +3716 7 +2978 9 +2097 6 +3205 10 +310 4 +907 4 +2378 6 +85 3 +1268 1 +1250 4 +1745 4 +3608 4 +948 6 +3799 2 +552 4 +2391 9 +758 7 +2703 6 +2951 6 +2674 5 +3839 2 +1778 4 +3064 8 +2392 7 +1312 9 +798 6 +391 5 +3602 3 +1346 7 +2819 7 +3549 2 +476 8 +1661 5 +2335 8 +963 5 +3882 4 +2778 6 +521 9 +353 4 +1534 2 +3229 1 +2011 3 +3422 8 +757 9 +2851 1 +180 10 +584 10 +3797 4 +2092 8 +237 10 +2797 7 +3207 10 +3546 9 +1225 9 +282 3 +1545 2 +2111 7 +3439 1 +2231 5 +1814 3 +36 1 +1513 4 +1803 10 +2642 3 +2749 4 +3608 7 +2702 4 +1331 8 +3867 6 +883 3 +2695 6 +3879 1 +2200 10 +1720 4 +2801 5 +1463 1 +250 2 +3074 8 +1938 8 +115 3 +1161 5 +835 10 +962 7 +2543 10 +1828 7 +1488 7 +3860 1 +1497 2 +413 1 +3003 7 +3593 9 +3711 7 +1680 2 +2586 7 +3164 4 +1227 1 +2124 9 +2302 10 +541 7 +1123 7 +1261 10 +2938 9 +3420 3 +1604 3 +3772 10 +3921 10 +3518 4 +194 3 +456 2 +3212 4 +3898 5 +1158 7 +186 3 +449 3 +620 7 +330 8 +3579 1 +1214 2 +1598 2 +160 2 +3430 4 +2579 5 +2321 6 +3585 7 +1710 5 +4037 2 +3234 6 +3245 5 +3139 2 +2571 4 +536 9 +358 3 +378 8 +383 8 +1575 5 +432 5 +2731 1 +2298 2 +2600 1 +1525 5 +2324 9 +2883 4 +473 4 +934 3 +641 7 +3351 7 +1225 4 +1535 3 +2448 7 +3853 3 +1055 1 +2545 5 +3337 2 +1247 1 +2846 4 +681 2 +1495 2 +3803 4 +1023 7 +2533 8 +338 10 +3061 5 +2127 9 +1459 6 +99 7 +3569 7 +1724 8 +2816 4 +351 7 +2074 7 +193 5 +3012 7 +2078 6 +3269 10 +2182 1 +3485 10 +685 8 +2592 5 +2970 10 +170 1 +1314 7 +1342 7 +3914 8 +761 1 +3823 3 +2388 1 +3280 10 +2773 6 +3930 3 +1338 3 +895 8 +1576 3 +1445 8 +221 8 +415 6 +2915 1 +3712 2 +2374 6 +146 2 +333 10 +1369 1 +2909 10 +1699 4 +2560 8 +982 4 +716 3 +3109 4 +2823 5 +1810 2 +2582 8 +3314 3 +1875 4 +3040 1 +3229 7 +2454 6 +2690 4 +2880 4 +203 2 +3240 9 +639 6 +3636 10 +4025 5 +3986 3 +3159 8 +2873 1 +1798 1 +3724 2 +1942 6 +3947 2 +1767 8 +2916 3 +1358 8 +3242 4 +1710 2 +3440 9 +2958 4 +427 3 +1003 1 +2351 7 +2339 10 +3991 2 +3758 1 +3229 3 +2572 5 +297 4 +1987 4 +1033 4 +2941 10 +1582 1 +1775 7 +1510 1 +1216 8 +2154 6 +2178 5 +2009 10 +1887 8 +1090 10 +1213 9 +867 5 +1604 4 +3968 3 +2542 1 +156 1 +2056 6 +2008 4 +1882 1 +3508 5 +3603 10 +195 7 +226 7 +1070 8 +1523 3 +3067 10 +2665 6 +639 3 +3369 6 +3750 7 +1326 10 +3019 2 +261 2 +3191 7 +1692 9 +1403 1 +3822 8 +3 7 +1215 5 +2335 4 +52 3 +2325 8 +1872 2 +3000 4 +399 9 +962 4 +3591 5 +3366 9 +1774 8 +2512 10 +2805 7 +1001 4 +3962 8 +318 9 +2789 7 +3299 4 +1140 10 +1234 9 +1301 10 +2402 2 +2978 4 +494 1 +2857 9 +2856 8 +1970 1 +3511 8 +2335 6 +907 5 +730 5 +2194 2 +1785 10 +134 10 +4045 9 +872 5 +2925 5 +353 10 +3690 9 +3147 9 +2525 9 +1087 6 +2143 9 +1301 8 +76 9 +412 7 +2266 6 +2772 1 +1253 5 +2786 2 +186 10 +354 2 +3073 6 +1807 6 +2720 10 +496 5 +1936 9 +4044 9 +333 4 +3080 3 +2030 5 +831 8 +1824 10 +16 3 +3371 3 +3971 6 +1671 8 +183 10 +716 9 +144 3 +824 3 +1499 2 +288 7 +379 9 +2076 3 +1418 10 +3787 1 +549 2 +1904 1 +939 4 +1841 3 +2637 1 +1448 7 +420 1 +382 6 +2592 1 +2591 2 +1298 2 +2238 9 +3599 1 +2705 9 +3938 7 +2700 10 +2881 10 +3331 7 +1130 2 +3909 10 +2516 6 +1695 2 +196 7 +3700 1 +2510 7 +1838 8 +3886 8 +4041 7 +3904 4 +3272 8 +426 3 +3851 9 +1539 1 +2457 2 +2890 10 +968 9 +13 6 +613 3 +282 7 +1110 2 +2559 7 +1913 5 +153 5 +515 2 +2026 6 +2985 8 +144 3 +3929 4 +121 10 +478 6 +713 4 +1204 3 +2721 9 +171 7 +659 5 +3872 5 +719 4 +1651 4 +2765 3 +1370 10 +191 7 +3359 4 +3869 4 +900 6 +2104 2 +0 1 +3350 1 +2890 9 +305 7 +1997 7 +2885 8 +2138 9 +3940 2 +3704 8 +1379 9 +908 3 +1249 9 +1286 1 +1632 1 +2438 4 +2000 5 +1237 3 +1797 6 +3789 8 +111 4 +668 1 +3502 2 +1338 5 +1617 7 +4067 4 +2203 1 +2924 7 +2009 2 +1903 4 +3918 6 +1857 3 +1062 8 +2212 10 +1847 4 +671 8 +3686 1 +1788 1 +1578 10 +3501 1 +2554 1 +164 2 +2886 10 +1318 10 +570 5 +2649 10 +2581 4 +1103 4 +3748 7 +2504 5 +123 4 +3156 5 +1737 10 +1594 3 +667 3 +3426 7 +2117 10 +1626 9 +73 3 +933 7 +3979 9 +2199 3 +1885 2 +2168 2 +1276 8 +2458 7 +591 3 +1502 3 +2454 1 +471 2 +2918 1 +3221 4 +1135 8 +3922 4 +239 6 +723 6 +3865 8 +1145 2 +2640 7 +812 1 +3977 6 +2318 3 +558 8 +1479 7 +1248 9 +377 1 +2986 9 +3356 2 +2964 9 +2967 6 +3212 9 +2455 6 +3254 1 +933 1 +3326 3 +464 10 +4003 4 +3246 5 +2883 6 +3568 7 +3578 7 +3045 4 +1516 4 +3131 3 +2759 2 +2942 6 +1211 3 +3201 1 +333 1 +2319 6 +2033 10 +3489 6 +704 5 +1242 7 +1327 1 +1596 6 +19 3 +552 6 +4031 7 +4060 9 +3793 7 +978 3 +3817 1 +2194 4 +3677 6 +1684 10 +227 7 +2985 8 +2105 6 +3677 2 +1486 2 +3993 5 +1698 5 +3903 9 +2048 4 +568 10 +2101 2 +1272 4 +1358 10 +3457 10 +1460 7 +3763 3 +1066 8 +2459 2 +1117 5 +712 6 +4018 1 +425 8 +3698 5 +3277 2 +2648 8 +226 7 +1201 7 +158 8 +503 2 +1517 10 +803 9 +1582 4 +637 9 +3550 9 +2803 7 +2130 1 +2199 6 +1682 10 +3393 8 +352 8 +1107 2 +1994 8 +1894 7 +3787 2 +2311 9 +2262 3 +3517 1 +3867 9 +1868 2 +856 10 +4029 8 +2769 5 +253 1 +832 4 +3702 2 +468 9 +1984 8 +2524 1 +767 4 +2701 3 +4086 4 +660 4 +171 7 +221 7 +218 7 +2965 2 +3286 8 +1200 10 +3434 4 +1969 8 +1625 4 +501 10 +1701 9 +2440 7 +3201 1 +1870 8 +3934 4 +3252 9 +2169 2 +3959 5 +2629 4 +2557 10 +557 9 +817 3 +745 9 +3575 10 +651 10 +2591 5 +2432 1 +1689 9 +1173 7 +1743 5 +1163 3 +1320 3 +79 2 +1370 2 +2077 8 +3964 9 +1021 1 +1484 9 +1551 6 +3956 3 +2222 7 +3843 5 +347 10 +841 5 +3810 1 +3443 9 +9 2 +2063 9 +17 2 +3938 3 +1839 4 +1233 2 +2308 1 +2941 10 +1598 2 +1287 1 +79 3 +1377 4 +3143 7 +3366 2 +2660 2 +2225 10 +2731 3 +1070 5 +3581 7 +135 10 +1704 7 +1314 1 +1309 5 +3273 8 +3207 5 +4037 7 +1014 2 +3825 6 +2835 7 +3363 4 +3895 6 +2554 8 +1121 9 +3166 5 +211 6 +3249 2 +1744 10 +3165 10 +1191 2 +1054 10 +3828 10 +3761 3 +2625 4 +765 8 +3157 8 +3575 2 +3196 8 +577 10 +1460 7 +1447 5 +2756 6 +1208 7 +1100 5 +857 4 +1808 7 +4018 9 +2670 6 +3445 10 +3564 10 +3854 8 +3476 10 +2565 6 +1536 3 +3436 9 +2842 7 +423 3 +2912 7 +271 7 +1992 6 +883 5 +1287 6 +3277 1 +3560 9 +2265 3 +900 8 +2009 7 +2644 3 +1288 5 +2997 2 +329 2 +2344 4 +1900 5 +532 9 +2821 2 +3858 7 +1644 7 +2114 8 +848 10 +2820 6 +126 9 +2006 6 +2471 10 +3260 2 +2660 5 +3745 6 +1105 9 +3301 3 +2592 6 +3313 1 +1388 3 +2828 10 +138 2 +3765 6 +1882 3 +1552 10 +2130 8 +2421 1 +9 8 +1084 1 +1615 8 +2619 7 +1831 6 +3940 9 +1596 9 +1127 9 +1718 1 +814 5 +2365 9 +1654 4 +279 10 +3360 9 +4025 6 +224 1 +2529 2 +2277 10 +286 3 +1781 3 +2429 3 +98 1 +1214 4 +3920 2 +2300 6 +1818 10 +2490 1 +2674 10 +2767 4 +3042 9 +3007 1 +3082 6 +1264 6 +738 3 +2078 6 +3111 6 +10 3 +2939 3 +2420 9 +3298 6 +472 10 +3383 6 +3041 1 +557 2 +2520 5 +3695 4 +1487 10 +3723 3 +317 10 +3442 10 +574 8 +2151 7 +3178 1 +1727 9 +62 2 +3282 10 +564 6 +1492 6 +948 3 +3745 3 +3866 6 +749 6 +1150 2 +691 4 +1023 4 +2960 4 +173 2 +1170 5 +3284 7 +808 6 +4088 1 +2320 6 +1876 1 +1171 9 +3202 5 +1899 9 +3775 8 +1090 6 +881 1 +2486 9 +1102 10 +3164 2 +3261 7 +1200 3 +3738 10 +1250 3 +3947 10 +1409 3 +2226 5 +2599 8 +1847 3 +14 1 +587 8 +2833 8 +3511 8 +2348 5 +3814 3 +734 6 +1469 3 +3521 6 +1980 7 +2075 3 +675 3 +2818 3 +3436 7 +1169 10 +3998 8 +3268 4 +373 6 +2090 1 +1641 5 +1220 9 +502 3 +1103 1 +1907 6 +956 4 +112 10 +1229 6 +3587 4 +3008 10 +3458 5 +1991 3 +3781 9 +1335 1 +244 7 +1054 3 +1566 1 +1325 5 +3153 3 +4069 10 +318 1 +3883 6 +2088 9 +446 1 +2397 2 +2999 5 +3668 9 +2764 8 +1962 4 +2531 3 +348 4 +2445 2 +1730 2 +4070 9 +3439 9 +1430 7 +2819 7 +2225 4 +553 2 +2429 9 +1763 2 +503 4 +2692 7 +2899 1 +67 10 +3106 5 +2896 9 +66 2 +2111 3 +4061 7 +1103 8 +3469 3 +612 4 +1922 7 +300 8 +3712 10 +1386 9 +3626 4 +2924 3 +3897 4 +1149 5 +2435 6 +480 7 +2695 6 +1772 5 +1140 8 +2930 8 +2593 3 +252 10 +574 7 +648 2 +321 10 +3347 1 +391 7 +2755 6 +2834 8 +3028 8 +1339 10 +943 2 +3139 3 +193 7 +3133 6 +932 8 +135 3 +1621 4 +1837 6 +409 1 +3328 9 +3938 4 +2062 9 +3383 5 +3584 9 +1646 9 +3074 3 +1945 1 +734 10 +2558 5 +2644 4 +2850 1 +12 2 +1033 7 +1612 8 +3555 2 +3520 4 +1176 4 +254 3 +3906 6 +2661 10 +489 8 +3828 10 +2171 6 +2611 10 +1502 7 +622 5 +1913 4 +2033 6 +187 7 +1307 8 +3612 7 +2677 3 +1813 6 +3700 2 +99 1 +3300 6 +3535 7 +931 1 +3119 5 +3010 4 +78 6 +2386 7 +3633 5 +1706 7 +647 9 +669 9 +3026 4 +2259 3 +3716 3 +1337 5 +1813 8 +580 4 +3163 10 +2789 5 +3971 2 +1466 8 +1120 2 +2384 7 +4079 8 +1253 4 +428 5 +2713 10 +3247 3 +1031 1 +1398 6 +2114 6 +3392 3 +308 4 +1008 3 +655 7 +3483 8 +3 1 +418 8 +2071 4 +1238 2 +168 4 +642 4 +1031 6 +915 5 +742 4 +2101 2 +1627 9 +1593 4 +419 5 +1763 9 +77 6 +3463 6 +2740 4 +4000 2 +1192 5 +1959 2 +3150 8 +528 8 +1751 7 +1639 2 +2363 7 +277 8 +3790 7 +688 8 +1667 8 +1870 1 +3122 5 +795 7 +2150 9 +61 7 +44 4 +1213 7 +2420 3 +1355 4 +430 7 +1115 3 +3473 3 +2992 2 +2787 9 +789 6 +79 8 +786 6 +305 4 +1054 6 +2507 3 +197 7 +1296 1 +3126 2 +3274 1 +2143 3 +45 1 +4077 8 +1909 7 +304 4 +2444 5 +2457 3 +1388 5 +4003 3 +3304 7 +1671 9 +2554 6 +3080 2 +1004 8 +1610 9 +475 9 +597 9 +1984 3 +2096 9 +3046 4 +2725 6 +1141 10 +3287 10 +2049 10 +296 4 +439 2 +1424 10 +1080 9 +2884 5 +2767 1 +1619 3 +645 3 +2259 6 +866 5 +3559 10 +2414 9 +127 4 +1380 1 +1180 5 +2342 9 +467 2 +1500 7 +616 7 +2511 7 +1714 9 +828 3 +3509 4 +1002 2 +1340 7 +1886 10 +1203 3 +2010 2 +3289 10 +3963 4 +992 2 +1669 6 +1405 1 +3342 2 +1421 7 +2761 7 +459 4 +2007 5 +2400 5 +1601 4 +3057 2 +1086 9 +3956 2 +786 7 +1401 4 +1036 2 +1095 1 +258 10 +864 10 +402 6 +3880 7 +3268 2 +3968 6 +177 9 +450 1 +3520 9 +272 1 +474 4 +2195 8 +3049 4 +14 8 +1920 10 +964 7 +2409 10 +454 5 +716 1 +60 2 +280 10 +4013 10 +1825 2 +1639 10 +2149 10 +3730 4 +1631 4 +1600 1 +707 2 +765 3 +924 6 +2011 3 +3304 3 +3267 10 +1280 10 +2467 3 +3621 7 +2970 10 +3119 8 +834 1 +504 7 +2209 5 +1593 8 +2914 1 +2123 6 +2951 8 +3075 4 +3518 6 +2102 6 +1899 9 +2574 9 +4077 3 +1850 7 +3734 4 +2330 1 +2680 3 +1216 2 +3915 4 +3361 5 +358 3 +1317 8 +794 9 +1513 2 +2065 4 +3161 2 +893 1 +4062 7 +2286 7 +245 2 +4088 9 +3214 4 +4020 1 +1723 5 +1462 2 +2652 6 +2549 3 +144 8 +2646 2 +685 4 +3242 3 +2633 5 +2625 5 +2366 5 +2019 3 +3369 5 +1350 7 +4 3 +2019 10 +663 9 +2373 1 +160 10 +185 4 +215 1 +1706 3 +2565 1 +1158 1 +78 10 +2433 5 +1543 4 +1704 8 +3098 8 +832 7 +61 7 +433 7 +705 2 +837 3 +1622 3 +1025 1 +4074 2 +1897 6 +3598 2 +2113 2 +3735 5 +1622 10 +3517 5 +3540 1 +3656 6 +1388 8 +1985 7 +2284 2 +1937 1 +2800 5 +151 10 +3823 7 +2937 10 +3100 1 +2566 4 +1157 4 +1848 6 +3122 3 +2065 10 +2890 1 +869 5 +2450 1 +634 7 +661 8 +726 3 +3599 1 +1099 3 +2725 1 +1513 1 +1176 1 +3474 9 +3643 5 +627 1 +2773 4 +2173 4 +544 10 +2950 5 +1047 1 +2535 3 +2821 10 +3929 10 +3770 6 +477 1 +765 5 +3666 9 +1929 7 +715 10 +1941 4 +1299 9 +1912 7 +375 6 +1481 9 +774 1 +1516 5 +577 3 +1373 2 +2822 6 +3694 10 +3338 2 +1915 2 +2461 2 +673 7 +3165 6 +2635 5 +1900 5 +1264 7 +1580 5 +1310 8 +2815 1 +2053 2 +2750 7 +1522 5 +1601 5 +953 10 +3764 3 +4033 4 +3763 1 +3167 6 +630 10 +232 10 +3228 7 +3190 7 +1512 8 +274 4 +1299 5 +377 5 +1327 8 +860 5 +1489 3 +13 7 +1350 10 +3046 4 +3254 3 +1946 8 +2996 1 +395 7 +3068 6 +58 5 +2429 9 +1987 9 +2124 4 +2714 3 +3312 2 +153 7 +2558 3 +3051 3 +223 8 +2167 1 +2974 7 +3793 10 +918 6 +479 6 +3151 3 +2875 1 +3343 8 +132 4 +2995 1 +3006 9 +180 10 +3996 4 +3742 3 +3899 10 +3751 6 +2976 3 +1914 9 +183 2 +3004 5 +579 3 +766 7 +3381 7 +2072 9 +1223 8 +1063 1 +3020 5 +3778 4 +4055 2 +1371 4 +3756 4 +588 3 +328 3 +147 3 +2082 10 +1860 10 +3077 8 +2936 10 +3445 9 +2795 7 +3513 5 +2763 7 +73 2 +1480 7 +1475 5 +966 7 +2178 7 +4075 8 +3541 5 +3507 3 +2097 4 +1313 2 +2648 10 +3037 3 +668 3 +3828 3 +1366 9 +899 5 +1948 10 +1540 3 +2020 1 +1136 4 +3771 3 +3581 3 +1604 9 +3648 9 +3838 9 +3980 1 +100 5 +3022 9 +2117 3 +1617 2 +1856 4 +8 4 +4057 6 +2708 6 +3392 1 +764 3 +3595 5 +2560 3 +3670 2 +456 6 +542 3 +2333 3 +1134 7 +3643 3 +2835 6 +1091 2 +1616 2 +1525 2 +2960 5 +1424 1 +762 10 +2380 1 +1932 3 +377 3 +703 2 +2384 3 +1916 7 +429 7 +1986 10 +1064 4 +3871 3 +947 10 +1510 7 +1722 5 +3972 6 +442 7 +2630 4 +3923 9 +701 4 +878 2 +2700 1 +609 10 +2911 4 +2702 4 +925 9 +2769 9 +268 6 +113 8 +923 8 +1044 2 +1163 6 +3896 8 +1770 6 +343 6 +785 8 +102 7 +3757 6 +2902 3 +2140 2 +2897 1 +1369 7 +853 4 +3715 3 +3842 10 +2289 2 +3955 8 +1795 5 +2428 6 +212 1 +348 5 +368 3 +2240 3 +956 4 +3489 8 +1081 3 +1098 7 +2015 5 +147 8 +4028 2 +1067 8 +187 9 +1350 6 +1201 1 +2986 1 +2236 10 +722 3 +1902 6 +2518 1 +11 1 +407 7 +718 3 +3125 6 +1605 9 +1577 2 +1349 5 +899 7 +3277 4 +188 6 +2315 9 +2535 8 +2148 8 +2422 9 +93 10 +3583 2 +147 8 +507 7 +1484 5 +2812 6 +1520 6 +1901 10 +475 8 +1402 4 +1454 3 +2988 10 +2328 10 +2863 5 +1956 5 +1655 8 +988 3 +421 3 +3287 9 +3223 7 +2255 3 +1825 5 +2010 6 +2240 7 +2655 1 +38 4 +968 10 +3451 10 +759 1 +1362 7 +421 5 +1943 8 +1099 3 +1756 10 +513 7 +3683 10 +2108 9 +1000 7 +1072 3 +2710 6 +3839 1 +3884 9 +2408 8 +3533 10 +2453 7 +1253 3 +130 5 +280 7 +3464 3 +1994 6 +105 6 +3473 2 +1407 1 +3019 9 +1820 7 +3278 9 +16 8 +81 1 +1135 10 +2509 7 +2685 4 +1252 3 +585 1 +526 8 +1689 4 +3582 9 +350 7 +2432 4 +683 2 +437 6 +2594 4 +1520 5 +4041 9 +612 9 +2342 1 +2657 7 +1893 3 +528 1 +657 3 +1296 9 +4046 9 +1828 4 +2444 3 +1655 7 +175 9 +648 6 +1541 5 +2987 10 +944 1 +3777 2 +691 1 +1904 6 +1786 8 +1663 6 +1423 7 +597 7 +480 3 +2398 2 +417 10 +2610 9 +3464 7 +593 6 +2428 6 +2220 10 +317 6 +1135 7 +2762 3 +1943 4 +1736 3 +975 1 +14 6 +3681 6 +633 6 +2505 4 +3971 5 +2618 10 +3902 10 +618 2 +3249 9 +495 4 +4030 3 +86 7 +3327 2 +28 6 +94 4 +1717 5 +783 8 +2521 10 +4018 8 +2156 5 +1331 10 +958 2 +3362 3 +3351 2 +381 7 +114 1 +1805 7 +1903 2 +2663 9 +2542 3 +283 1 +3931 7 +1115 3 +563 3 +2584 8 +1400 6 +3584 5 +2605 10 +3338 8 +4029 5 +1157 1 +1828 3 +1982 2 +2276 6 +1531 4 +626 6 +181 7 +3734 5 +140 1 +2835 1 +3805 7 +3094 8 +2553 10 +1948 1 +69 1 +732 5 +786 2 +2152 4 +3992 10 +2884 9 +611 8 +2053 1 +3132 1 +159 6 +3376 4 +3846 2 +2703 1 +2660 4 +583 8 +3563 3 +3421 5 +2081 8 +1372 8 +3802 7 +3927 2 +1332 1 +401 10 +3164 10 +640 6 +665 6 +3261 4 +1292 4 +2037 10 +297 8 +607 7 +2218 8 +3101 1 +298 5 +709 3 +472 3 +1995 1 +1475 7 +3289 2 +1152 10 +188 1 +2554 3 +2655 8 +388 5 +386 3 +3997 5 +933 9 +2941 1 +4047 3 +85 8 +3850 5 +1757 3 +3920 3 +1611 4 +1817 6 +2138 1 +2944 4 +244 3 +3902 4 +93 8 +1614 9 +1851 3 +621 9 +3211 9 +503 4 +3034 3 +2328 6 +4021 9 +1839 6 +221 8 +908 9 +2417 7 +819 7 +590 8 +1940 1 +1652 1 +3750 3 +191 3 +2247 8 +167 3 +1034 5 +34 9 +295 5 +1149 7 +1762 6 +1853 1 +2450 5 +1682 3 +369 7 +3726 1 +613 4 +3931 5 +2214 3 +303 7 +1091 2 +642 5 +1675 8 +743 4 +1176 5 +2579 1 +2473 5 +3862 3 +3672 1 +1129 8 +1191 6 +3790 3 +2537 10 +1950 3 +2653 6 +3653 3 +1212 4 +1082 10 +147 5 +1468 5 +730 6 +2640 1 +335 7 +2568 8 +2719 1 +689 3 +686 3 +2145 6 +50 8 +2911 8 +3260 5 +3244 5 +3703 1 +577 8 +2192 6 +1459 7 +759 10 +2185 10 +895 6 +3981 4 +1420 3 +2161 5 +2529 7 +2943 10 +778 3 +828 10 +4087 7 +2416 8 +692 2 +3985 6 +395 6 +3628 10 +3951 7 +3089 8 +2571 2 +2867 2 +982 5 +1022 1 +442 4 +2390 2 +3345 1 +308 2 +3818 7 +3433 6 +3896 1 +694 6 +3157 2 +2557 7 +2151 9 +2786 1 +751 8 +371 7 +4051 2 +1717 5 +439 4 +2833 3 +3278 8 +1070 4 +459 2 +2349 3 +46 7 +588 4 +539 3 +3371 6 +1310 8 +2531 5 +2075 1 +2766 2 +3242 8 +3066 4 +2900 10 +3021 3 +7 6 +3311 6 +2171 8 +3750 1 +1550 1 +756 1 +1849 1 +2649 6 +1134 10 +2693 2 +52 3 +2004 3 +1782 10 +3076 2 +1586 7 +3650 9 +1705 5 +3287 9 +2025 8 +1077 9 +2233 3 +1816 2 +1850 7 +273 1 +3458 9 +2606 6 +83 2 +2657 10 +2486 4 +4052 7 +2874 10 +520 4 +2485 6 +2587 7 +3806 7 +4024 4 +3391 10 +2760 6 +3009 2 +144 3 +1414 5 +3565 8 +3128 9 +3192 7 +3333 8 +318 1 +3937 7 +2027 2 +951 10 +2610 9 +1260 10 +3343 9 +3218 6 +3079 9 +1587 3 +1032 5 +658 8 +868 10 +1085 10 +749 5 +4028 6 +1029 3 +3979 10 +4001 9 +1181 8 +1281 6 +320 5 +68 4 +4085 2 +1857 3 +3240 1 +1193 2 +525 5 +3535 7 +2438 6 +1771 9 +2812 1 +3815 8 +144 2 +366 6 +1847 2 +1434 3 +3170 6 +76 1 +3522 7 +3703 1 +2016 10 +1516 1 +1804 2 +1727 8 +2682 6 +2672 5 +687 1 +442 2 +314 4 +2553 3 +2489 5 +1319 9 +2001 2 +2297 1 +935 8 +3378 6 +2472 4 +1358 4 +1640 4 +1958 10 +1719 7 +32 9 +3620 10 +2455 9 +1186 8 +3283 8 +1937 2 +2787 3 +2208 6 +1680 8 +3348 5 +886 5 +213 10 +3651 6 +2328 5 +3140 1 +783 1 +3679 2 +486 3 +3997 6 +2420 2 +3116 7 +2596 6 +651 1 +594 8 +1197 5 +1954 5 +2844 1 +2550 5 +311 2 +3818 4 +3099 7 +4072 10 +4085 9 +1618 9 +2572 6 +3031 8 +43 10 +224 9 +1515 2 +1248 2 +3187 6 +2950 8 +3835 5 +2238 4 +4030 2 +2980 10 +2152 8 +1105 5 +1238 5 +3564 10 +1892 3 +1019 10 +1351 8 +2964 9 +2191 9 +4058 7 +1366 7 +1843 10 +2136 7 +210 2 +1870 3 +2307 8 +1405 4 +2098 2 +420 3 +3166 9 +3400 1 +2946 9 +1210 6 +850 6 +906 3 +256 10 +2817 7 +2319 10 +1367 8 +717 5 +149 4 +1035 4 +964 3 +1747 2 +3494 1 +3187 1 +1071 8 +2716 1 +319 4 +3392 7 +851 2 +1355 4 +1907 2 +385 8 +958 1 +933 2 +4004 7 +306 9 +637 4 +2620 10 +131 8 +3138 7 +3146 5 +3665 10 +342 1 +2361 7 +3332 7 +1153 4 +1208 3 +3453 5 +2285 2 +1692 9 +1565 9 +3932 5 +1129 2 +1282 4 +1323 7 +194 6 +1363 1 +1288 3 +4087 2 +3244 2 +1408 4 +3278 9 +3000 8 +84 3 +3788 6 +3777 9 +352 6 +3082 2 +3815 8 +401 3 +278 7 +1410 6 +736 4 +1051 2 +2683 8 +2580 3 +2862 5 +769 5 +3626 5 +4006 7 +618 5 +1977 1 +771 7 +4026 8 +3212 4 +1323 5 +2699 8 +3683 7 +4081 10 +4042 10 +2115 5 +2221 1 +1006 6 +3965 7 +1466 5 +2782 7 +3883 10 +465 3 +3280 1 +1152 10 +557 9 +1061 10 +1181 1 +1449 3 +2154 9 +3221 10 +108 6 +1115 10 +2308 6 +287 8 +1775 5 +1918 5 +978 6 +1054 9 +848 9 +1469 8 +729 4 +2086 4 +2710 6 +3468 2 +2673 3 +676 9 +3198 4 +985 1 +3647 8 +1025 7 +1461 5 +3741 7 +3780 10 +2885 8 +37 9 +3114 1 +1704 4 +2775 3 +3515 7 +2277 9 +2176 1 +3196 7 +3231 10 +2693 4 +2855 8 +1774 1 +3426 6 +1097 9 +2088 8 +178 1 +405 4 +3199 9 +2633 8 +1241 1 +3782 8 +3637 2 +2732 9 +1295 10 +2952 8 +585 2 +1605 6 +1753 6 +3015 5 +184 5 +872 3 +4030 4 +1418 5 +2318 7 +3813 7 +2012 6 +574 7 +2574 10 +2898 2 +1492 3 +3735 7 +2882 9 +1446 10 +1671 10 +1146 3 +2947 5 +3150 8 +2796 4 +872 8 +3915 10 +2135 5 +1806 4 +3748 9 +3824 10 +3866 4 +2236 9 +3597 7 +3421 7 +608 9 +227 10 +1735 10 +674 10 +2621 8 +2742 8 +2056 7 +3717 3 +2211 10 +2546 2 +2210 6 +2756 5 +268 3 +2698 2 +276 4 +424 4 +3217 3 +221 2 +660 7 +1626 4 +2158 10 +324 7 +609 3 +3056 1 +2207 8 +1253 7 +1224 3 +2636 9 +3560 6 +3137 7 +3178 5 +3879 3 +2797 5 +2394 9 +550 8 +3610 2 +384 7 +1668 7 +3456 4 +1876 5 +1874 1 +1244 8 +3161 6 +1389 6 +1097 7 +743 3 +3599 9 +2129 2 +3620 7 +858 3 +3993 4 +1686 6 +2561 3 +3456 7 +656 6 +3245 1 +734 5 +849 1 +897 2 +2861 1 +1711 8 +1549 3 +1081 3 +1208 6 +4063 3 +66 8 +2047 7 +2031 6 +704 3 +1293 5 +2441 8 +2816 9 +3667 10 +709 10 +1702 8 +3080 9 +389 7 +1949 1 +3887 7 +1712 2 +3273 10 +3876 2 +1662 6 +117 10 +923 6 +193 2 +3301 10 +3128 6 +2148 5 +2265 6 +3990 7 +2615 3 +3310 7 +2775 5 +3110 10 +2657 9 +1001 7 +1065 1 +1793 7 +533 9 +2986 6 +2089 9 +26 3 +2384 5 +3568 8 +969 2 +3062 3 +2330 6 +661 8 +956 7 +1684 3 +448 4 +2293 1 +2192 8 +3401 1 +1961 2 +869 3 +132 9 +902 9 +3533 6 +2493 5 +2162 2 +1644 7 +2240 1 +2152 6 +1617 9 +1023 5 +1934 10 +3861 3 +2532 1 +2440 2 +3584 6 +1317 1 +1939 8 +931 7 +125 4 +2073 4 +1806 3 +2377 2 +2070 3 +532 2 +741 10 +1092 6 +2350 1 +455 5 +2687 1 +2664 4 +3497 4 +1812 6 +1456 1 +394 8 +3347 3 +937 10 +3880 2 +1317 9 +3140 4 +300 8 +397 1 +2059 5 +2476 9 +1608 1 +3288 5 +2640 2 +1757 3 +2641 6 +2603 5 +2545 2 +3159 9 +3387 7 +3987 8 +1645 5 +2049 1 +2995 1 +1532 1 +2478 3 +2599 7 +3035 2 +768 6 +525 2 +3308 10 +246 9 +1723 5 +2727 9 +518 9 +1222 5 +677 2 +1196 2 +1824 3 +3310 4 +1129 5 +2665 2 +2004 6 +862 2 +1190 2 +2075 5 +2657 9 +2618 7 +3337 7 +3113 9 +1970 4 +1988 1 +863 8 +2625 10 +147 9 +1395 6 +2187 2 +1039 5 +1843 6 +1805 10 +1913 1 +2793 9 +2420 3 +1987 7 +1233 8 +3491 4 +3761 6 +2967 2 +443 3 +1502 6 +1586 10 +99 9 +2373 7 +3045 2 +945 7 +1145 2 +658 8 +682 2 +2717 2 +3663 1 +3178 1 +1558 10 +3148 3 +1159 2 +968 8 +3862 8 +2476 6 +63 9 +142 7 +2412 3 +2505 3 +4079 7 +3113 9 +1160 8 +1234 5 +2604 6 +3123 4 +366 3 +2954 1 +2298 9 +3526 7 +3071 2 +1579 2 +3108 10 +341 10 +3385 8 +3201 2 +4024 3 +3989 1 +2840 3 +803 10 +1698 8 +1100 10 +2982 9 +1657 7 +3584 8 +3626 3 +1983 1 +1765 10 +3843 4 +3101 10 +2972 8 +1692 9 +1874 4 +188 2 +1425 10 +2366 2 +3314 1 +2063 9 +2354 1 +2565 4 +1190 10 +3072 7 +945 6 +2670 10 +102 3 +3070 7 +1750 5 +506 8 +3060 2 +3108 9 +40 10 +1995 4 +2963 2 +217 6 +1585 5 +661 5 +769 4 +3476 9 +1583 7 +128 4 +1154 1 +3485 3 +276 2 +2850 4 +4026 9 +1551 8 +3113 3 +1887 5 +1895 7 +653 4 +960 4 +4060 9 +2873 8 +1374 3 +2762 2 +2336 5 +2954 4 +3048 7 +3791 9 +2818 3 +2544 9 +2364 6 +1081 8 +1369 3 +2397 9 +3635 8 +3219 2 +1811 4 +1532 2 +2492 4 +229 9 +1725 8 +1608 8 +257 2 +486 9 +2756 5 +212 8 +3191 9 +1855 4 +3752 8 +2958 4 +1134 3 +3533 3 +1951 10 +2131 4 +2610 9 +2919 4 +3949 2 +3292 2 +1456 1 +2276 4 +3196 5 +3501 8 +1020 10 +1175 5 +3252 3 +3757 5 +2920 4 +1755 9 +410 3 +605 7 +1207 8 +2536 5 +3803 4 +972 9 +259 6 +2712 3 +1886 6 +1610 10 +3107 5 +100 10 +1551 1 +2627 8 +876 6 +979 1 +3767 7 +1682 4 +3011 10 +1346 3 +4060 2 +749 2 +985 4 +1607 7 +2158 9 +219 10 +1320 10 +989 8 +2288 9 +4002 9 +3639 3 +2251 6 +108 8 +3571 5 +1871 4 +1798 8 +3303 9 +830 10 +204 5 +2710 4 +690 1 +871 2 +3513 7 +1718 6 +1493 8 +2766 2 +2847 7 +3304 4 +1122 5 +4016 9 +3035 3 +3626 7 +1202 3 +2422 2 +2267 9 +2837 2 +1253 10 +2135 4 +2592 7 +895 1 +497 7 +258 8 +2515 9 +3309 7 +1945 10 +279 7 +807 7 +750 4 +2745 7 +3154 2 +1091 1 +55 6 +3749 2 +2469 8 +1771 1 +434 8 +935 8 +3013 3 +241 10 +343 3 +3839 7 +2967 4 +2877 9 +729 1 +2844 2 +1627 1 +1805 6 +355 3 +3715 3 +3513 3 +294 4 +3911 8 +1748 6 +3890 10 +1027 1 +3646 7 +1210 8 +3549 3 +882 4 +2439 8 +3578 7 +606 3 +3881 6 +2532 6 +1396 8 +3425 10 +778 8 +3003 10 +1838 10 +1596 5 +416 8 +2314 4 +2755 9 +2133 6 +3384 1 +3039 10 +2575 8 +93 7 +134 10 +2137 3 +1431 2 +1299 6 +1745 8 +943 5 +496 2 +394 1 +0 8 +693 5 +3931 3 +3976 10 +3829 10 +3181 7 +1338 5 +3057 10 +2894 9 +2043 1 +3121 10 +2248 10 +1188 8 +265 8 +3422 3 +3565 4 +649 9 +2980 1 +2923 4 +3570 8 +357 3 +442 4 +1470 4 +2726 10 +4003 8 +1331 5 +3786 7 +2368 3 +3113 8 +902 3 +426 8 +1570 10 +1944 2 +4049 7 +3548 1 +728 5 +1047 7 +3482 10 +2645 2 +928 9 +1986 1 +209 3 +2623 2 +3860 4 +1380 8 +4026 7 +3918 5 +1051 10 +3944 3 +3250 2 +694 10 +402 6 +1707 7 +4037 10 +1283 5 +1261 1 +104 10 +2859 1 +1262 7 +3877 8 +466 8 +122 1 +3346 9 +3570 8 +1921 8 +3987 3 +1670 10 +2598 2 +3718 10 +2091 5 +3745 9 +1009 5 +2823 3 +2506 3 +2945 4 +1941 1 +2372 4 +833 7 +2509 4 +3358 1 +401 7 +3688 8 +3441 4 +306 9 +3991 7 +1636 5 +789 9 +3662 6 +728 2 +3376 3 +2619 7 +3994 8 +3485 1 +1844 4 +2819 3 +1027 9 +1267 7 +2068 4 +1659 6 +1878 4 +3620 8 +778 3 +3801 8 +1354 1 +1967 5 +3829 7 +1123 5 +3990 9 +3199 3 +2923 3 +1366 1 +3516 10 +1228 4 +1367 4 +3435 7 +1213 4 +564 7 +3668 7 +1730 5 +2317 6 +1688 4 +1647 1 +3429 6 +1080 4 +721 1 +1795 8 +3204 9 +3529 8 +581 3 +1833 6 +2435 2 +3641 4 +3085 9 +1569 6 +2799 6 +1389 7 +418 7 +3103 6 +2438 6 +3126 5 +501 9 +2675 9 +750 1 +504 3 +372 10 +1741 4 +3746 6 +4075 10 +2654 8 +622 10 +633 5 +2107 10 +869 3 +66 3 +1724 8 +2734 7 +3801 2 +414 8 +2164 1 +2812 2 +396 9 +2526 7 +3088 1 +277 4 +3455 4 +2535 8 +3039 6 +2670 3 +762 3 +2842 4 +3746 1 +1691 4 +429 4 +3319 2 +192 3 +3180 4 +3633 10 +1232 10 +2420 2 +622 8 +1721 1 +3665 8 +2476 1 +2432 5 +2419 7 +1778 6 +2852 8 +3101 5 +948 2 +1896 6 +311 7 +3321 8 +1686 3 +3126 2 +2589 5 +3920 6 +3499 3 +404 2 +1581 6 +3045 4 +3363 4 +481 5 +3439 8 +2868 8 +2306 2 +1331 1 +1352 4 +797 3 +2136 7 +2222 5 +1796 10 +1541 3 +144 7 +3522 1 +3608 5 +3480 2 +423 9 +1621 1 +3896 3 +614 8 +610 10 +975 6 +287 5 +1651 6 +4087 7 +1979 10 +1406 3 +2786 2 +2682 8 +901 6 +2356 1 +1623 9 +1311 10 +3431 6 +873 7 +2216 4 +3918 1 +3801 4 +2766 8 +4005 10 +767 1 +1933 2 +1532 3 +79 5 +2101 3 +2366 8 +412 6 +2925 9 +3375 3 +1773 4 +4009 8 +3572 8 +1512 5 +1188 2 +3942 7 +3366 6 +1544 6 +3548 10 +340 1 +678 6 +3557 6 +922 6 +3996 5 +1672 7 +1910 5 +1099 2 +3570 9 +4029 2 +3950 6 +1599 10 +1841 9 +3785 5 +3981 6 +3063 9 +986 9 +347 10 +1832 5 +2273 1 +2509 8 +2470 5 +2067 10 +719 6 +1269 8 +2941 1 +4031 7 +3032 3 +2822 5 +916 7 +1781 4 +2107 1 +3950 2 +2227 7 +3153 4 +610 5 +913 3 +403 6 +340 7 +3573 10 +1325 9 +881 7 +3903 4 +799 9 +1249 8 +2114 3 +3648 1 +4076 4 +3782 10 +68 6 +3936 9 +2202 9 +3932 4 +1467 4 +2978 5 +476 4 +222 9 +2747 1 +1227 9 +1823 8 +2387 10 +1440 4 +2887 10 +943 4 +875 5 +1401 1 +1615 10 +3520 2 +2384 10 +2884 8 +3669 5 +2387 9 +164 6 +172 3 +2510 2 +2926 9 +3235 6 +1881 5 +1950 7 +3728 6 +1128 1 +417 6 +836 6 +149 7 +1300 6 +3946 8 +86 10 +3291 8 +1233 3 +3856 1 +3118 1 +1761 1 +430 5 +938 6 +297 4 +1548 1 +2995 4 +1048 3 +3783 5 +3499 7 +3868 2 +2272 10 +4007 10 +3906 10 +309 3 +1660 10 +2925 3 +2792 7 +773 4 +3786 1 +3468 5 +2748 1 +1680 6 +978 7 +815 5 +1632 6 +291 9 +3937 1 +1277 1 +4071 2 +3781 5 +1858 3 +399 6 +1108 8 +3145 6 +2173 9 +3652 6 +1588 4 +1241 7 +2724 5 +2344 6 +279 2 +2602 8 +588 9 +3281 5 +742 2 +3824 3 +2506 9 +60 4 +2815 4 +3679 1 +2121 7 +755 9 +3033 1 +1025 3 +1265 10 +1513 2 +1802 3 +2800 9 +1695 1 +229 10 +466 1 +126 8 +4027 5 +943 7 +4066 8 +2329 5 +3925 2 +3970 6 +553 4 +3589 3 +1504 10 +939 2 +829 8 +3608 4 +3197 9 +1613 4 +2219 3 +2744 10 +296 7 +3970 6 +3902 5 +1915 2 +3423 4 +3305 9 +3303 9 +1819 5 +3765 3 +509 6 +1146 9 +2902 6 +4035 4 +950 9 +1946 7 +3092 3 +397 3 +2952 4 +870 7 +3611 6 +2213 10 +2894 3 +540 8 +1944 3 +1879 8 +2040 4 +1552 10 +2498 2 +823 4 +452 8 +3351 1 +3025 7 +3241 5 +2244 7 +3168 4 +2072 6 +195 5 +880 6 +1257 7 +3455 2 +504 7 +1848 2 +2660 4 +2317 8 +1884 3 +225 4 +1809 10 +552 5 +1112 5 +340 8 +3021 2 +3084 3 +2140 6 +519 8 +1879 2 +2878 5 +1785 10 +1589 2 +1259 2 +3609 5 +2048 10 +2345 10 +670 8 +3944 6 +1773 9 +1612 7 +4076 4 +2856 9 +332 9 +2127 8 +1091 2 +3606 6 +751 3 +4036 9 +3866 9 +1326 3 +1120 9 +3361 8 +417 6 +1075 2 +1459 6 +1269 5 +3602 5 +2276 1 +678 3 +3846 9 +206 3 +1592 5 +1677 4 +2752 4 +2158 1 +2350 6 +2931 8 +2294 1 +1215 1 +363 3 +1423 3 +1526 10 +199 1 +3893 7 +3443 1 +2004 1 +1796 3 +292 9 +3030 5 +1002 7 +1657 4 +717 4 +1567 3 +663 8 +4037 10 +1253 9 +2510 4 +1699 4 +2198 6 +202 8 +777 10 +3846 3 +2196 5 +2910 2 +2246 9 +3640 9 +1491 5 +1503 10 +1670 4 +344 7 +3988 9 +1347 2 +502 10 +2808 3 +3885 5 +2786 2 +267 3 +3512 3 +3211 10 +491 9 +2175 7 +2833 3 +3513 6 +3403 9 +973 10 +1560 3 +734 4 +533 2 +1839 1 +1926 4 +2975 1 +2156 3 +3377 2 +2299 4 +666 3 +3981 2 +2857 6 +627 6 +34 7 +3789 7 +2067 3 +751 6 +2819 9 +1311 9 +1113 5 +2389 1 +2600 9 +1820 6 +1090 9 +3392 3 +2987 2 +2031 2 +2522 2 +4004 1 +151 2 +3816 3 +2188 6 +2184 7 +540 2 +2076 6 +3861 10 +3289 2 +1024 6 +2344 1 +1880 4 +1704 3 +395 2 +3616 3 +3136 4 +2388 5 +3016 3 +3086 8 +2745 3 +2143 7 +1009 9 +3566 9 +155 8 +330 4 +3616 3 +2777 5 +34 7 +3824 2 +58 3 +1069 9 +1959 6 +1326 10 +121 1 +39 2 +708 8 +433 3 +2002 3 +1537 9 +459 1 +2062 1 +2212 1 +1689 2 +301 8 +785 9 +1777 4 +2689 4 +2614 5 +3668 7 +3096 8 +433 3 +3618 1 +902 10 +760 3 +1181 10 +570 1 +3705 6 +2119 1 +2040 7 +75 9 +945 8 +1652 2 +261 4 +1925 5 +400 1 +1630 4 +3873 6 +3964 3 +3633 10 +2434 6 +3058 9 +437 2 +1939 4 +1577 1 +585 5 +3775 1 +3825 3 +3629 7 +98 3 +593 10 +2123 9 +2668 9 +1845 8 +440 6 +3140 4 +1397 8 +2796 6 +1974 10 +2409 7 +1383 6 +3167 9 +3146 2 +3175 1 +2007 2 +4083 2 +782 9 +2423 7 +41 5 +2687 9 +1083 5 +2213 6 +1865 10 +1077 2 +770 4 +3067 1 +2747 3 +3136 5 +2861 7 +2093 4 +3547 4 +3509 10 +3388 2 +3252 6 +2245 10 +2690 1 +915 7 +2760 2 +2304 10 +1416 3 +1226 10 +2056 7 +371 4 +1700 4 +1080 3 +722 8 +1133 4 +1915 7 +22 8 +368 2 +1223 5 +513 3 +216 5 +923 10 +4081 6 +1186 7 +2072 10 +335 2 +3573 6 +1543 8 +1825 3 +110 10 +2327 1 +1010 6 +1954 4 +3420 2 +1862 4 +3075 10 +2937 9 +3747 3 +322 2 +2944 9 +3751 6 +2462 10 +3596 9 +686 8 +2853 1 +2072 1 +2941 9 +513 10 +3508 4 +419 3 +1327 2 +1594 10 +3150 7 +3013 1 +3214 3 +2671 5 +3782 10 +3802 1 +3958 10 +3795 6 +1522 6 +1401 4 +220 6 +2269 10 +3654 3 +755 1 +1803 6 +3780 2 +194 4 +4057 1 +2433 1 +856 7 +3131 5 +3963 6 +1949 6 +1643 7 +3594 6 +342 10 +3132 8 +1849 3 +2588 5 +3774 9 +186 9 +2446 4 +162 3 +1681 8 +320 1 +473 5 +1648 8 +809 5 +1421 6 +1656 7 +2678 4 +3269 2 +2563 7 +669 4 +921 2 +3819 10 +1546 6 +2286 9 +381 3 +3492 5 +1230 3 +195 4 +3236 9 +631 6 +1848 8 +2904 2 +3668 2 +1794 8 +3286 1 +3144 4 +1830 7 +1039 8 +3926 6 +3408 5 +605 1 +3806 10 +2356 3 +2266 1 +1520 5 +702 8 +380 3 +122 7 +1726 4 +1139 4 +3062 3 +2496 7 +3760 10 +211 6 +2970 4 +1211 3 +2315 9 +2739 6 +1137 9 +1725 6 +3946 4 +3446 10 +1218 10 +3736 5 +3246 1 +3816 7 +3051 6 +340 3 +3934 8 +2177 8 +963 4 +1978 9 +1076 5 +3329 7 +2824 6 +900 7 +1077 7 +591 5 +809 5 +1175 2 +598 2 +3882 2 +1753 1 +3796 1 +2958 7 +2551 5 +2574 7 +2240 1 +578 1 +2462 10 +2082 9 +4043 2 +489 4 +2008 7 +3176 4 +2675 9 +3178 2 +1655 7 +3293 3 +433 6 +3353 2 +2230 7 +179 5 +2290 5 +69 9 +2822 4 +908 1 +1488 3 +103 1 +1803 10 +3633 5 +1447 5 +1165 2 +414 5 +3311 5 +1882 8 +3396 10 +2937 9 +1823 2 +1895 2 +3746 9 +1409 3 +677 4 +266 6 +2961 3 +3229 2 +284 10 +510 5 +1385 4 +1105 9 +1481 10 +2218 7 +3113 9 +1185 10 +481 4 +3427 4 +859 5 +3885 8 +2238 5 +1933 10 +3188 7 +3824 2 +3712 2 +3336 7 +1127 8 +3648 5 +2894 9 +1370 4 +2276 2 +2952 6 +3528 10 +3977 6 +3714 3 +255 3 +1946 4 +2867 10 +978 8 +3391 1 +3137 6 +3584 6 +3170 1 +1441 6 +3988 2 +68 1 +2842 1 +2574 5 +525 10 +2742 6 +873 7 +3436 9 +836 2 +1320 4 +298 4 +3559 1 +3008 2 +2519 5 +649 2 +3098 6 +1217 9 +430 4 +508 3 +3641 8 +2941 8 +1172 7 +3938 8 +987 10 +2640 7 +2175 7 +1589 1 +3858 6 +1799 7 +2386 5 +2921 5 +229 9 +1875 8 +3662 5 +3382 5 +1457 8 +2667 1 +1020 4 +1529 8 +2273 3 +3537 9 +2486 3 +3058 8 +3500 4 +3907 2 +4023 8 +2301 5 +875 10 +853 4 +1284 10 +1577 7 +568 2 +3351 9 +3747 8 +1624 8 +3734 1 +1924 2 +453 5 +2140 10 +2486 6 +886 4 +1088 4 +1911 8 +1722 3 +260 6 +1655 1 +1627 10 +575 4 +2477 5 +3718 5 +1236 2 +1886 10 +608 1 +2025 10 +442 8 +664 3 +3810 7 +802 6 +1433 1 +1700 8 +1823 7 +3167 3 +679 6 +2025 9 +3808 7 +1765 9 +2703 1 +2508 6 +1762 5 +1219 4 +2483 10 +3182 7 +3739 2 +1473 6 +1270 1 +3942 2 +3869 10 +650 9 +713 1 +2696 6 +2817 7 +2214 9 +3339 8 +3379 2 +444 1 +837 9 +3325 6 +3605 7 +133 9 +3903 5 +129 7 +919 9 +67 2 +1519 6 +2093 2 +863 9 +2481 10 +2267 4 +388 1 +4034 5 +2236 2 +2963 8 +2563 10 +2641 10 +1925 7 +435 10 +946 2 +1408 3 +1672 9 +1064 10 +690 3 +1566 6 +3434 1 +2659 9 +3511 9 +157 1 +1768 6 +3980 2 +3126 4 +1763 7 +1494 1 +956 9 +1267 4 +1485 1 +368 10 +3108 5 +1683 9 +2098 5 +2746 6 +612 3 +1994 5 +3867 5 +2411 9 +3485 4 +3200 6 +807 3 +2942 5 +3652 6 +3093 5 +1102 9 +3343 5 +1669 7 +366 3 +2797 6 +1969 3 +3297 4 +2688 10 +3444 6 +1576 8 +2409 4 +19 5 +76 4 +241 8 +126 2 +342 5 +2267 8 +322 3 +1458 3 +771 5 +355 7 +1012 6 +1410 2 +225 4 +625 1 +1537 5 +3643 4 +4017 7 +1681 10 +18 7 +988 9 +531 6 +3340 1 +3715 5 +552 4 +481 5 +2289 9 +2799 2 +1854 9 +3959 4 +3941 7 +697 4 +3044 5 +3879 10 +823 2 +482 7 +766 5 +1611 2 +1186 1 +1063 5 +3696 4 +3997 4 +1121 2 +1532 4 +3565 2 +3844 8 +3642 2 +2298 8 +3612 4 +3319 6 +2730 3 +1361 9 +2790 3 +2653 10 +3237 4 +2719 2 +88 5 +894 1 +4048 3 +645 4 +2641 7 +970 9 +3808 3 +3216 3 +343 1 +2582 9 +3595 5 +2230 10 +2953 10 +2343 8 +2333 5 +2659 3 +3320 10 +2310 9 +3659 1 +2166 6 +1147 7 +3420 6 +3912 1 +2932 6 +4095 5 +815 3 +671 10 +1709 10 +437 3 +2612 7 +948 10 +582 8 +600 3 +2057 10 +1943 1 +3193 6 +1005 5 +2603 2 +1975 6 +1551 7 +861 9 +805 4 +2556 8 +2980 8 +1150 9 +2859 8 +3236 10 +2504 7 +3151 2 +2432 10 +1337 8 +3581 5 +2099 6 +2249 1 +2755 9 +3959 9 +2478 4 +1950 2 +696 9 +783 8 +3474 10 +1250 4 +1640 4 +406 8 +1045 2 +2403 10 +465 1 +2555 10 +867 6 +932 5 +782 8 +991 1 +3450 4 +2163 7 +4014 5 +2548 10 +2088 9 +2206 8 +2695 2 +2360 8 +3681 2 +1849 7 +2659 4 +688 9 +375 8 +1702 10 +110 1 +2464 1 +3988 5 +1309 4 +316 7 +3777 2 +304 6 +3448 1 +3484 3 +414 2 +2171 3 +2190 5 +1234 6 +85 5 +4036 8 +2928 8 +832 9 +800 10 +799 9 +598 9 +3154 3 +3829 10 +2183 9 +303 6 +2100 3 +3751 2 +1404 2 +2872 3 +3529 10 +3178 3 +3184 5 +2229 4 +2452 1 +4064 3 +2624 4 +1858 5 +4038 9 +2116 3 +3140 5 +1762 2 +1278 7 +3472 5 +3779 9 +3487 8 +1745 1 +904 3 +1487 9 +1532 8 +1159 2 +2898 1 +1408 10 +2516 10 +2320 1 +3764 3 +2506 7 +1887 2 +1457 6 +2111 3 +1434 8 +328 9 +302 7 +3819 6 +1137 3 +2846 9 +1432 1 +3129 8 +2929 5 +1912 5 +1461 10 +3630 5 +620 3 +3217 5 +3176 10 +2691 5 +923 9 +130 6 +3075 8 +3104 2 +634 9 +1953 5 +840 10 +788 9 +2142 7 +788 10 +3641 10 +2398 10 +106 2 +2817 9 +2196 2 +1266 10 +4091 1 +2069 1 +751 6 +3077 5 +2497 6 +1919 8 +2524 6 +547 10 +3896 2 +3216 6 +2263 1 +74 8 +3736 4 +2958 7 +221 9 +2353 1 +3987 7 +3894 2 +3556 3 +1661 3 +1270 4 +3749 6 +3599 1 +2712 6 +1776 8 +1370 1 +1757 9 +3157 4 +2404 10 +779 4 +3029 2 +3154 8 +1503 2 +1166 8 +1657 9 +1727 9 +2278 2 +575 7 +3046 5 +2276 1 +763 3 +3781 5 +1355 6 +2091 4 +3323 9 +904 9 +2388 8 +261 6 +1099 2 +827 10 +1204 4 +728 5 +717 5 +1425 1 +1017 5 +3516 9 +1395 3 +1883 1 +3193 8 +1838 1 +1226 10 +1646 6 +2328 1 +2603 6 +32 5 +2660 6 +3992 7 +977 5 +3369 2 +211 1 +1526 5 +3302 10 +3332 3 +1422 3 +3467 5 +252 5 +4001 10 +3832 3 +647 5 +1311 8 +2676 7 +777 3 +1459 8 +3346 1 +3498 3 +4042 10 +2097 1 +928 8 +1523 6 +1179 8 +229 3 +111 3 +3898 5 +1932 9 +1413 1 +2283 7 +3192 3 +3533 7 +3581 1 +3549 4 +425 7 +2740 2 +2600 2 +4006 3 +1513 8 +4017 5 +3449 6 +3751 5 +3518 3 +771 2 +2254 5 +3596 4 +1826 9 +1265 4 +658 4 +1015 2 +3840 2 +186 3 +1904 4 +988 6 +2508 4 +3309 9 +1553 6 +1894 7 +4064 10 +2256 1 +1399 6 +3120 9 +3891 3 +2364 1 +3351 2 +3364 9 +3724 8 +3279 7 +809 10 +3446 1 +1057 3 +3114 9 +3952 5 +2817 3 +312 3 +437 10 +1690 10 +2620 2 +2785 7 +3914 2 +654 8 +2473 5 +570 10 +1857 8 +2927 3 +3633 8 +2586 10 +1979 9 +3221 10 +185 8 +3094 3 +10 3 +335 7 +3610 7 +3820 9 +3210 9 +788 9 +224 4 +2623 5 +2714 6 +1288 6 +597 7 +1995 9 +1699 7 +2072 6 +3344 6 +2649 5 +1779 6 +324 1 +1018 1 +2155 7 +869 7 +1636 2 +3612 5 +2360 5 +1043 10 +2716 10 +1962 7 +1923 8 +2994 10 +1160 3 +138 10 +1379 4 +942 1 +2718 5 +3565 5 +1245 5 +641 6 +1953 2 +1186 4 +126 4 +3651 1 +741 2 +4026 7 +1044 1 +3329 6 +335 3 +757 9 +1959 1 +970 10 +1374 6 +1372 3 +2080 6 +3134 5 +2353 9 +3 9 +2327 3 +3715 9 +2304 7 +3320 2 +3035 2 +954 6 +3934 10 +2073 2 +2233 2 +799 10 +1736 1 +3663 9 +985 4 +233 5 +3515 2 +993 6 +2173 6 +3041 6 +2718 7 +3604 5 +1238 3 +2604 4 +3032 8 +3675 8 +905 7 +3644 2 +1388 7 +3322 1 +3798 1 +3338 4 +1194 7 +2614 3 +1600 8 +2937 8 +1452 10 +893 4 +4077 9 +2633 10 +3024 2 +45 4 +2351 1 +44 7 +248 10 +2566 1 +2282 8 +2721 6 +489 9 +2994 10 +3121 7 +3316 1 +2512 2 +2221 7 +510 1 +3000 3 +2551 5 +3512 4 +3770 5 +472 6 +1555 1 +1540 8 +474 2 +3574 1 +3385 9 +1272 3 +3225 3 +1225 8 +748 4 +1122 1 +376 4 +1160 3 +1260 2 +2478 6 +3236 4 +1873 9 +2811 2 +2034 1 +2712 3 +3957 7 +1364 4 +1303 1 +3264 5 +224 10 +714 7 +2487 8 +2272 6 +1067 9 +1252 3 +242 4 +3523 5 +3954 5 +2360 7 +1939 8 +3576 5 +1035 2 +509 3 +1477 6 +3307 8 +3731 6 +2372 7 +736 5 +1469 1 +3459 1 +3949 7 +2502 5 +2273 7 +1007 8 +2756 7 +1486 1 +2849 4 +976 1 +2354 3 +348 5 +3211 4 +3659 9 +3949 2 +1305 10 +779 9 +1559 4 +1827 8 +3133 6 +534 2 +2501 5 +1378 6 +2656 10 +4060 10 +758 9 +2607 8 +535 8 +3398 6 +1572 6 +4092 2 +2856 2 +317 10 +1333 3 +2024 4 +2912 7 +1334 10 +2471 3 +1186 7 +1027 2 +1139 1 +3857 1 +118 6 +15 9 +309 5 +2691 10 +1855 7 +3243 9 +3972 5 +170 5 +783 6 +1648 10 +2250 1 +3008 10 +452 10 +119 2 +2175 8 +2432 2 +3369 9 +340 5 +1207 2 +521 3 +3438 10 +2126 8 +3333 3 +3293 7 +610 3 +1504 9 +3487 5 +3962 8 +834 7 +3231 1 +3834 5 +2801 7 +2515 3 +3627 4 +2839 4 +1796 10 +657 7 +9 7 +1298 1 +2113 1 +2261 4 +1215 4 +570 2 +2509 3 +499 8 +1675 2 +1159 8 +1633 6 +2181 5 +180 5 +496 3 +3674 9 +1866 10 +2576 4 +2640 10 +2986 7 +741 7 +34 2 +4050 10 +624 7 +2524 2 +3795 9 +1544 8 +3232 1 +2544 7 +3116 8 +1180 9 +3784 9 +2335 1 +2941 7 +2329 8 +637 5 +1408 5 +974 6 +747 10 +2873 9 +2754 10 +1682 2 +1962 3 +3132 7 +3578 5 +566 6 +1152 6 +2729 4 +3160 9 +1700 4 +1789 5 +2309 4 +1773 4 +371 2 +3821 6 +3587 7 +2523 3 +993 9 +2604 5 +3284 7 +3117 2 +3249 6 +1839 5 +1228 6 +1835 8 +3598 2 +1284 7 +3343 1 +659 6 +2633 4 +1227 8 +2996 9 +1224 3 +634 7 +3985 4 +262 1 +2655 9 +581 4 +3039 10 +2723 1 +1957 1 +2528 2 +244 5 +137 3 +4075 3 +3436 2 +4087 7 +3641 10 +2620 2 +3511 3 +464 5 +1857 4 +749 4 +2694 7 +3515 2 +3285 4 +2205 5 +1417 8 +1834 10 +3335 9 +2735 2 +2596 5 +4057 6 +3832 2 +3595 10 +126 8 +2982 1 +2578 1 +1442 6 +3415 5 +2849 8 +2145 9 +3870 3 +3082 1 +3210 10 +3737 1 +1449 7 +1304 3 +2853 7 +329 1 +1904 3 +3690 2 +3711 2 +2568 9 +846 4 +1446 2 +106 3 +3568 9 +1030 5 +2394 10 +773 1 +3241 8 +73 8 +2778 6 +119 2 +2873 4 +158 8 +2655 4 +2269 10 +573 1 +3776 8 +435 6 +1733 8 +2862 7 +3845 2 +3189 8 +3969 1 +3108 1 +1215 2 +511 2 +2863 5 +3713 2 +3127 4 +3081 6 +1419 10 +120 10 +2843 6 +3079 7 +382 7 +2755 8 +2196 9 +363 5 +3175 7 +3447 9 +4021 1 +2999 3 +2210 4 +245 8 +3486 6 +196 6 +4055 7 +4083 5 +727 7 +623 6 +1936 1 +590 3 +690 10 +1327 1 +2046 8 +521 9 +3709 8 +1357 8 +3306 4 +2909 9 +808 9 +2466 10 +2968 7 +792 3 +1565 10 +1199 3 +977 7 +2759 7 +2836 5 +1631 9 +844 7 +1675 9 +1770 6 +1131 5 +3687 1 +2869 7 +3020 9 +2215 10 +3912 10 +3568 5 +472 3 +3801 2 +2739 1 +179 5 +127 4 +1912 5 +198 3 +2111 5 +1162 7 +425 9 +731 1 +1410 10 +3101 9 +1103 5 +1485 7 +1555 8 +2825 9 +3312 8 +1881 1 +3349 1 +3721 2 +1049 5 +2363 8 +3727 2 +2794 10 +2658 5 +3487 1 +2979 10 +2119 10 +1466 3 +3963 2 +3181 1 +1866 8 +1646 3 +2777 2 +2483 8 +3825 9 +633 3 +3489 8 +2970 7 +1956 4 +3246 3 +298 5 +79 3 +2958 10 +1600 6 +3610 7 +1230 3 +2683 6 +2687 5 +4054 2 +699 7 +3400 10 +1956 4 +1907 4 +3961 3 +3709 4 +3893 8 +2588 8 +632 7 +3859 6 +1001 3 +1108 10 +3667 8 +1009 3 +3586 9 +3187 10 +1790 8 +2542 3 +352 6 +2829 2 +758 9 +3788 8 +1950 8 +1995 2 +3562 4 +1812 3 +3072 4 +973 7 +98 6 +2162 10 +2251 7 +1984 8 +1871 8 +2085 10 +3638 3 +2192 3 +718 2 +3932 2 +2416 7 +121 9 +1394 3 +1053 4 +3505 5 +1671 9 +3121 8 +1205 3 +2068 1 +628 6 +704 10 +515 6 +798 9 +3251 1 +374 8 +2594 8 +3858 4 +2619 5 +2191 7 +1986 10 +322 6 +2839 10 +2546 6 +1236 9 +1752 8 +3056 5 +373 9 +2983 8 +2264 6 +2325 8 +2959 3 +3631 7 +1979 3 +3088 5 +3082 2 +2863 2 +2681 8 +3473 7 +816 8 +85 10 +955 7 +591 9 +3790 6 +1168 3 +2321 9 +1923 2 +2731 3 +2146 8 +2847 1 +2206 9 +1113 2 +3631 7 +2177 7 +2281 3 +2262 5 +3129 2 +2149 4 +524 7 +2552 7 +290 1 +37 7 +1938 10 +1799 9 +4080 5 +783 5 +282 8 +68 9 +2637 2 +2539 9 +213 1 +475 2 +208 7 +421 9 +1530 6 +2418 5 +3953 6 +3985 1 +3000 10 +77 5 +149 3 +2218 4 +1826 1 +2212 2 +461 8 +4087 7 +2039 5 +1590 3 +577 8 +1191 5 +2466 4 +3361 5 +527 10 +3358 1 +2079 4 +2798 5 +3990 2 +2835 6 +2139 10 +2979 7 +2117 8 +3185 3 +642 6 +188 4 +654 2 +2128 1 +3288 4 +3134 10 +51 6 +3496 9 +2883 10 +1077 7 +3069 6 +1204 10 +1396 2 +2541 1 +2317 7 +4090 7 +3539 5 +3235 4 +1110 3 +1790 6 +1968 10 +1076 7 +2311 2 +3495 9 +835 1 +585 5 +2294 5 +2840 3 +1028 4 +652 7 +1619 6 +3608 10 +281 2 +637 4 +1123 8 +1155 6 +2604 6 +2203 3 +2420 7 +3215 7 +399 1 +1 9 +1436 6 +691 8 +550 6 +2629 1 +539 7 +1001 7 +3453 6 +2965 4 +2098 5 +1789 5 +3621 6 +3958 4 +1681 3 +759 2 +1172 3 +1126 7 +3477 3 +370 10 +1318 9 +98 6 +2313 2 +1291 6 +993 4 +3593 3 +128 6 +778 10 +1473 7 +615 2 +260 3 +3651 1 +3334 8 +4042 5 +657 9 +3542 7 +2233 7 +1956 4 +2133 8 +2782 4 +3889 5 +99 1 +2578 3 +451 10 +2484 9 +1947 4 +2212 2 +2284 6 +1371 2 +3921 5 +2002 3 +114 5 +4084 1 +346 10 +4070 2 +2330 10 +2200 10 +94 4 +3099 4 +1497 7 +1740 9 +80 3 +839 6 +2305 7 +928 7 +1369 3 +2532 8 +995 9 +1568 1 +1773 3 +378 4 +2271 5 +761 9 +519 7 +3151 2 +268 4 +3857 4 +71 5 +2831 10 +2903 3 +3173 4 +3630 6 +2258 5 +1272 7 +475 1 +407 2 +1433 2 +2624 8 +1492 10 +4013 6 +2006 5 +44 9 +3647 9 +3104 1 +3251 9 +4090 1 +4053 9 +2748 6 +553 4 +2964 8 +3234 4 +2097 4 +2762 10 +3947 7 +2941 3 +3343 9 +1872 1 +3647 2 +139 7 +175 4 +1573 1 +2708 3 +2525 4 +727 4 +1281 9 +2165 6 +3119 6 +131 5 +2162 7 +2469 3 +1384 6 +1382 6 +3262 10 +2898 2 +1168 10 +320 7 +1772 4 +473 3 +3529 8 +2740 2 +3866 10 +1730 9 +1447 8 +2700 1 +1340 1 +1161 4 +1811 4 +3582 5 +98 6 +3185 1 +1405 9 +3288 4 +1797 9 +360 7 +3764 6 +1722 4 +3924 4 +2621 9 +1187 6 +1487 10 +2761 9 +541 8 +2024 6 +192 9 +3758 6 +3311 9 +2768 8 +3336 7 +386 10 +1103 5 +2229 1 +519 2 +1819 4 +2215 8 +2053 1 +1345 4 +3518 1 +1189 7 +3789 8 +1794 9 +1995 3 +2693 9 +838 10 +1363 3 +773 9 +2361 8 +1417 3 +54 1 +2915 1 +3216 8 +3374 7 +1153 7 +564 9 +3772 6 +3009 4 +920 7 +677 10 +979 3 +2910 3 +1048 2 +3011 1 +2728 9 +2689 5 +1947 9 +3480 3 +875 6 +2501 6 +403 1 +622 6 +1937 7 +1144 1 +1928 2 +3868 5 +860 1 +2372 10 +2503 8 +1345 10 +3113 10 +3953 7 +1961 4 +812 5 +3080 1 +2311 7 +3193 7 +904 7 +3556 6 +2952 4 +739 8 +217 3 +2240 4 +489 6 +646 7 +2897 2 +4053 4 +973 3 +1981 7 +1990 4 +566 1 +3001 2 +3480 7 +2082 1 +2792 4 +3419 5 +3024 8 +1277 3 +1510 9 +2498 1 +3858 4 +1157 1 +1254 2 +161 4 +438 5 +3650 4 +3831 5 +4020 3 +1006 6 +2614 3 +1326 6 +1373 1 +3721 3 +1020 1 +3233 9 +1749 10 +3807 5 +84 4 +568 4 +491 1 +841 4 +1034 6 +51 4 +3602 10 +629 9 +3973 8 +1868 9 +1446 6 +2989 9 +744 10 +1532 2 +2925 10 +825 6 +386 3 +2393 4 +4035 6 +768 9 +2040 6 +2832 10 +2975 7 +568 8 +19 4 +3984 4 +34 7 +3284 8 +3156 3 +1019 9 +2933 10 +49 4 +4077 2 +1355 3 +2545 2 +1996 10 +2248 3 +1017 5 +4089 5 +783 1 +1172 3 +40 5 +123 1 +2792 1 +2268 9 +2753 3 +313 2 +948 4 +2304 8 +879 9 +1166 6 +841 6 +3261 10 +2327 2 +3126 7 +2692 10 +3446 6 +1215 4 +3609 8 +3941 5 +1542 1 +955 9 +2203 10 +3357 6 +1738 5 +1091 1 +3621 6 +3578 2 +4064 1 +3219 5 +1585 7 +1567 6 +1242 10 +1678 3 +2076 9 +3229 6 +2482 1 +2001 5 +1968 2 +4086 8 +1474 8 +1595 7 +3949 8 +389 7 +518 7 +3353 5 +1771 5 +176 4 +3143 7 +1062 1 +3723 6 +2526 9 +6 3 +916 3 +945 7 +2457 6 +1225 7 +1501 5 +312 2 +2929 8 +669 7 +1425 6 +2928 5 +3538 6 +1444 9 +3465 8 +3437 2 +167 1 +3190 5 +2577 8 +306 8 +4033 2 +2328 5 +779 5 +1500 7 +2871 2 +1743 4 +2576 9 +1528 9 +1617 3 +2812 3 +2018 9 +3726 10 +1503 8 +3606 9 +3525 6 +484 6 +983 6 +1851 5 +2362 9 +2500 8 +2253 10 +1238 2 +859 8 +3411 2 +2654 10 +2875 10 +3981 6 +296 3 +3343 10 +2490 7 +596 5 +1242 4 +917 3 +685 9 +3037 8 +4062 8 +3358 4 +2020 4 +3051 1 +706 6 +3352 6 +3930 2 +2514 4 +2324 2 +1957 4 +1550 9 +3652 3 +766 6 +3272 9 +2208 8 +2373 7 +1449 1 +4076 3 +3757 6 +2161 2 +1279 7 +2691 5 +3233 8 +238 2 +73 7 +3186 7 +2862 5 +2711 3 +824 2 +4048 8 +3774 6 +3607 8 +1511 8 +4085 7 +1144 6 +2260 4 +35 9 +3432 7 +991 5 +1808 9 +2489 2 +809 5 +3806 8 +1757 7 +834 1 +990 9 +1455 9 +470 10 +563 10 +2445 5 +984 1 +2935 6 +746 4 +1113 5 +3351 5 +1597 7 +231 1 +3145 9 +2295 4 +2004 6 +2916 10 +3419 2 +438 1 +3711 6 +1064 5 +2075 4 +523 5 +261 4 +2574 1 +2443 7 +2812 2 +151 7 +3046 3 +3699 5 +1677 8 +1185 7 +683 6 +3300 10 +2144 7 +2628 8 +491 7 +4084 4 +2199 1 +1684 7 +336 1 +650 3 +4048 10 +64 2 +1623 7 +2228 7 +3790 5 +1977 2 +119 9 +2063 9 +1127 5 +2145 8 +1158 4 +1100 5 +3564 7 +865 2 +580 1 +3794 10 +1621 6 +599 9 +3026 8 +3182 1 +943 4 +3462 4 +3390 1 +1672 1 +454 3 +1599 1 +3866 1 +1925 10 +3973 3 +894 5 +2404 5 +3911 2 +1974 4 +2769 2 +295 1 +2131 4 +297 6 +37 3 +1655 10 +1706 8 +1380 9 +69 9 +1261 1 +452 5 +285 7 +2702 4 +2808 6 +2288 4 +168 4 +2535 4 +1179 8 +31 6 +985 6 +427 5 +1793 1 +3950 2 +4050 6 +473 4 +3749 10 +858 2 +2783 6 +2865 5 +72 2 +317 2 +83 5 +2835 10 +2970 8 +2445 4 +1907 7 +3755 7 +3220 5 +1212 9 +1866 8 +2923 7 +3425 2 +2185 9 +2268 10 +1101 1 +2508 4 +2412 6 +2231 1 +1086 10 +721 8 +536 5 +3132 9 +1583 9 +2922 4 +1733 2 +2003 8 +2151 6 +3964 4 +2653 1 +3929 6 +3772 2 +3549 8 +1585 7 +2414 9 +1398 9 +835 10 +2111 1 +1921 6 +1625 10 +4035 9 +2153 5 +2544 8 +1419 7 +837 1 +3674 10 +374 2 +783 2 +3037 1 +2860 7 +3361 9 +2160 6 +3610 5 +3669 1 +1462 9 +2179 2 +3097 8 +2400 8 +1703 2 +2742 9 +1445 10 +3308 2 +2933 3 +3671 6 +2688 1 +591 7 +2597 7 +2615 1 +341 1 +3323 10 +3673 9 +643 3 +1500 10 +2765 9 +53 3 +973 2 +2733 7 +4044 8 +3912 4 +910 5 +2219 4 +13 4 +59 3 +3989 8 +1989 6 +2264 1 +1981 4 +3312 7 +593 10 +481 2 +3357 1 +3309 4 +75 7 +3573 3 +3416 1 +922 4 +1912 9 +305 6 +1347 2 +240 10 +1340 8 +271 3 +1489 9 +4017 9 +2196 10 +489 9 +2553 9 +3552 3 +2211 4 +1707 2 +2026 6 +150 1 +2019 1 +3302 6 +1103 3 +2928 8 +1932 8 +2849 9 +3964 3 +3316 2 +827 3 +2539 6 +3906 8 +3010 2 +2978 7 +2238 10 +3688 1 +2970 4 +10 5 +3763 1 +3845 8 +1236 8 +3027 1 +1103 3 +2121 3 +1697 1 +3316 1 +3389 6 +3338 5 +3791 5 +3895 6 +1110 1 +3670 1 +53 9 +3283 1 +487 10 +1793 10 +1809 8 +1611 2 +201 8 +1001 1 +356 1 +2754 3 +771 5 +3793 10 +72 5 +2873 9 +4020 5 +2492 5 +2004 8 +760 10 +3015 9 +3595 2 +1 9 +3636 8 +369 4 +1022 5 +2738 1 +1189 3 +1904 7 +2150 3 +518 1 +2067 6 +1944 6 +1358 4 +2897 4 +3545 2 +220 8 +1115 1 +1379 3 +1382 5 +3269 6 +3510 8 +379 8 +857 9 +3631 2 +1696 3 +2309 9 +1116 4 +3279 5 +2990 8 +3186 6 +2864 5 +4065 9 +2127 1 +1925 3 +1841 3 +686 9 +1404 1 +2371 1 +3340 4 +2080 10 +237 5 +442 4 +171 8 +1959 3 +2504 1 +474 8 +1761 7 +3057 3 +2051 3 +1657 7 +2597 3 +3463 5 +2334 2 +2562 4 +2527 4 +389 3 +1929 4 +2744 7 +2109 9 +1918 9 +3515 4 +2994 6 +17 9 +2022 7 +2678 8 +666 2 +2000 1 +4083 10 +1281 5 +2689 7 +1294 7 +941 7 +727 5 +697 2 +1586 5 +445 9 +3879 4 +727 7 +939 7 +3630 10 +3746 4 +2241 10 +2441 4 +1151 2 +3696 9 +2023 2 +3502 7 +2415 4 +3238 8 +2079 10 +2813 7 +2555 8 +2569 6 +3950 3 +3784 3 +3371 10 +3265 3 +702 3 +605 4 +1510 3 +59 5 +2396 2 +3647 6 +3203 4 +2946 9 +308 9 +2141 6 +512 3 +2231 3 +556 8 +378 3 +96 9 +3837 10 +3878 3 +1685 8 +3786 2 +2974 2 +1466 5 +1173 5 +432 10 +697 5 +1109 6 +3939 2 +2166 5 +1616 2 +1415 5 +1878 10 +126 8 +251 1 +2404 6 +3118 7 +4083 9 +453 10 +2851 4 +3353 1 +3906 9 +3452 2 +1691 2 +2531 9 +1595 5 +1039 10 +3183 9 +315 9 +3580 10 +181 6 +3034 1 +3822 10 +2217 7 +1096 1 +749 4 +2775 3 +3722 3 +4013 8 +1745 6 +3560 10 +3450 6 +2212 10 +3302 5 +262 6 +680 9 +169 10 +664 2 +367 2 +576 1 +430 2 +996 1 +525 6 +2879 9 +3893 10 +2596 2 +3926 9 +3063 10 +2092 1 +3535 2 +1753 1 +1747 4 +1647 3 +1658 9 +1391 9 +317 1 +1265 3 +2018 1 +1849 9 +2974 7 +3643 8 +1490 10 +2818 8 +1796 3 +1410 8 +3495 3 +1088 1 +1461 1 +3197 3 +3555 2 +1569 2 +508 7 +494 9 +1578 10 +1367 7 +2708 4 +378 7 +3221 2 +809 2 +2226 5 +629 2 +1460 3 +2908 6 +388 3 +340 3 +3437 3 +2596 2 +3018 1 +2073 8 +1027 5 +242 5 +1226 2 +547 10 +1672 10 +3843 7 +2941 2 +2178 4 +3964 2 +1038 1 +2925 2 +2741 9 +3659 9 +1679 9 +3098 9 +3096 4 +2846 4 +262 10 +2609 10 +763 9 +909 5 +41 6 +3771 6 +3756 1 +57 4 +2278 6 +204 4 +3135 4 +1058 3 +2430 7 +968 5 +276 8 +1055 3 +1567 8 +2034 7 +268 1 +3712 1 +3462 4 +2625 9 +95 5 +2386 1 +249 9 +3086 3 +470 5 +2357 7 +4042 9 +3577 10 +1269 3 +2582 2 +2707 3 +3259 5 +734 9 +2531 2 +2497 5 +3346 7 +1471 6 +3556 8 +3881 7 +1671 5 +3761 1 +2011 3 +2994 9 +223 2 +2469 9 +2715 2 +3925 3 +917 10 +3700 5 +30 3 +648 8 +2711 8 +3955 10 +3434 9 +1332 7 +2426 1 +3265 7 +1384 10 +2200 2 +1769 6 +1083 6 +3487 4 +193 2 +74 5 +3120 10 +941 3 +1060 1 +1519 5 +3053 6 +1646 1 +3081 8 +661 5 +2178 9 +3945 8 +3594 4 +1176 9 +1021 5 +3169 2 +1224 6 +930 2 +680 5 +2877 6 +1515 2 +2755 1 +2377 4 +1256 6 +2793 3 +2184 7 +2921 8 +108 4 +303 3 +1066 1 +409 3 +2237 7 +1752 2 +488 10 +2851 8 +3101 6 +2210 9 +1068 4 +3060 6 +2083 5 +1977 6 +3496 2 +3604 2 +3007 6 +220 5 +2910 3 +1332 9 +3795 5 +3497 1 +1609 2 +3805 7 +2249 3 +2971 2 +1280 1 +1194 8 +2004 8 +294 2 +665 9 +239 9 +1689 8 +771 2 +3960 2 +572 5 +65 7 +2085 7 +853 1 +3924 9 +3364 9 +3237 6 +944 8 +3086 5 +1720 1 +3034 6 +2514 6 +602 5 +4044 4 +3773 3 +142 7 +1902 1 +3840 6 +1561 3 +1389 5 +3355 2 +94 10 +2979 9 +3224 10 +2206 6 +1175 9 +1217 10 +1768 9 +3629 10 +1207 2 +1773 2 +2941 1 +1801 6 +2920 9 +3735 6 +2572 5 +946 7 +1615 2 +3680 9 +3007 9 +1459 1 +252 9 +737 8 +2263 3 +2456 6 +4026 5 +1026 5 +2208 8 +1939 8 +2444 5 +3747 9 +1262 10 +640 1 +534 5 +3660 3 +478 2 +1703 3 +431 8 +1659 10 +68 5 +190 2 +1733 7 +110 10 +3610 1 +2266 5 +905 5 +1865 6 +2530 5 +2071 5 +3889 2 +2860 5 +1433 3 +3908 6 +702 4 +1659 4 +67 10 +3952 5 +559 3 +3869 8 +1320 2 +3978 10 +366 7 +444 10 +1468 3 +3896 3 +2353 3 +211 8 +1387 5 +2750 6 +393 10 +2379 6 +402 7 +3495 3 +2281 7 +1455 4 +1900 1 +4067 3 +1552 1 +363 4 +44 1 +2135 5 +3643 4 +2082 4 +3434 10 +724 9 +3372 2 +795 3 +1808 3 +1346 4 +3392 9 +2935 4 +1442 7 +3227 1 +2113 3 +3294 10 +866 1 +3571 10 +2258 4 +4040 6 +2070 4 +722 4 +2599 7 +3078 4 +3663 10 +279 8 +2693 10 +177 10 +1750 3 +1413 1 +307 10 +120 7 +3970 8 +3789 2 +3036 8 +2813 10 +1443 9 +1426 1 +3281 10 +3566 1 +3280 5 +3835 1 +2545 4 +1627 4 +1230 10 +2529 1 +2831 2 +4071 7 +975 1 +2329 2 +1016 6 +1995 4 +1584 2 +3436 8 +540 2 +3267 9 +211 2 +657 9 +3683 4 +3075 5 +4041 5 +498 3 +3189 1 +1738 7 +1929 5 +776 7 +1280 1 +3997 4 +2958 2 +1564 10 +2375 4 +3536 6 +2832 5 +3732 9 +1368 5 +428 7 +2208 8 +3588 2 +278 5 +1875 5 +261 2 +3375 9 +3267 9 +1845 7 +780 9 +3185 5 +2191 2 +1078 2 +2833 9 +3954 7 +3592 4 +877 6 +486 10 +420 10 +1564 3 +3518 4 +3898 7 +3228 1 +972 7 +2566 4 +3063 3 +3849 2 +477 4 +112 9 +36 8 +3299 9 +1266 3 +552 1 +1731 10 +944 6 +1160 4 +2160 5 +1836 1 +3098 5 +1702 6 +2884 3 +1573 10 +1829 4 +2323 5 +1910 4 +982 1 +3032 2 +2733 2 +339 4 +411 1 +2426 10 +1185 8 +28 2 +334 1 +1027 4 +3008 4 +2466 6 +144 7 +3098 8 +3518 4 +541 2 +872 8 +2515 2 +2123 9 +793 2 +2938 1 +1735 10 +854 3 +542 8 +1155 4 +3691 4 +3799 10 +835 3 +1495 8 +2996 1 +965 4 +2538 7 +138 5 +2403 4 +3501 6 +2046 5 +908 7 +1509 6 +3389 6 +3451 6 +230 3 +3665 9 +374 6 +3430 3 +1955 7 +1965 1 +4067 9 +3337 10 +1903 4 +61 6 +3001 8 +3400 9 +1552 4 +2890 6 +2014 3 +3231 9 +732 2 +1638 5 +3526 5 +3355 6 +806 8 +3530 9 +2698 9 +993 6 +2242 4 +3945 8 +2827 7 +1787 3 +2816 2 +3444 10 +1199 9 +964 10 +3934 8 +2028 6 +2205 10 +928 3 +72 1 +1366 7 +2770 10 +3320 3 +3434 9 +268 10 +1259 6 +3804 4 +2391 5 +2655 9 +261 5 +2951 1 +3333 2 +2649 2 +1383 10 +3011 6 +3529 10 +262 9 +2760 3 +2393 3 +992 3 +744 7 +2178 3 +3969 8 +3762 1 +946 3 +3910 1 +1213 8 +230 7 +3888 5 +1082 5 +2835 3 +3770 7 +2887 6 +1892 1 +2151 3 +2481 9 +2803 10 +563 5 +1125 9 +728 2 +3036 5 +2200 3 +94 10 +2274 8 +15 1 +430 5 +1112 9 +285 4 +1846 6 +2473 5 +1890 4 +1992 2 +340 1 +97 10 +2422 6 +1589 6 +1530 4 +1777 5 +104 2 +3022 9 +51 2 +2948 1 +2136 9 +1652 8 +1034 8 +817 8 +3157 8 +2614 3 +3735 10 +2900 10 +4014 6 +311 5 +4075 1 +3524 9 +2788 3 +2604 5 +2365 7 +3145 9 +874 9 +3140 6 +3587 6 +454 8 +1569 10 +690 10 +487 1 +1516 2 +3034 1 +3883 5 +2120 3 +3346 4 +3525 5 +2542 7 +3544 10 +2820 6 +3519 8 +96 4 +3883 7 +3115 2 +2645 8 +735 10 +1023 7 +3211 1 +3155 9 +1157 8 +2861 6 +1951 6 +836 4 +705 7 +4090 1 +1653 9 +3096 9 +463 8 +2961 10 +771 1 +1297 6 +3135 4 +865 7 +3926 8 +2438 7 +0 5 +1622 2 +1711 7 +3380 9 +967 6 +1702 5 +3013 5 +3885 4 +3042 5 +3200 8 +627 2 +2182 6 +586 8 +2083 10 +3043 2 +1938 8 +2783 10 +1891 1 +2245 8 +4068 7 +1064 3 +1700 4 +1970 4 +1818 3 +3096 6 +969 7 +550 10 +53 4 +1766 1 +2308 9 +534 2 +3906 8 +1279 5 +3918 4 +432 6 +936 5 +240 2 +2454 5 +2711 4 +1968 5 +3954 1 +2262 9 +299 5 +3757 8 +455 2 +3607 5 +3765 8 +3919 10 +2766 2 +2870 8 +845 7 +3687 4 +1119 8 +3413 8 +3969 5 +3192 9 +2188 5 +1756 2 +2089 7 +2293 8 +2774 5 +2074 2 +533 2 +3081 2 +2759 9 +467 5 +1546 6 +3195 8 +48 2 +2498 6 +1850 4 +1870 2 +3295 6 +1997 5 +3000 9 +1168 9 +904 3 +263 3 +1497 8 +227 10 +3893 3 +2863 7 +1361 9 +2345 3 +1367 3 +3590 2 +1776 1 +379 1 +221 7 +3299 1 +573 3 +48 2 +2177 6 +1485 7 +1889 10 +1443 5 +313 3 +2093 9 +1254 4 +3912 1 +809 4 +940 10 +1804 8 +2271 3 +1416 4 +1500 5 +1392 2 +194 3 +2746 9 +2724 6 +2185 9 +66 5 +1306 4 +2646 5 +922 3 +123 9 +3413 5 +2813 9 +1778 5 +1907 8 +564 8 +2539 9 +869 2 +1251 4 +3291 8 +2513 7 +3115 9 +1125 2 +2143 5 +242 7 +83 5 +3453 8 +85 1 +2734 2 +512 6 +2486 9 +3014 1 +477 3 +2338 2 +3110 5 +1220 1 +581 8 +1492 9 +3588 10 +3617 8 +472 6 +60 7 +2922 4 +2516 5 +724 3 +215 3 +90 8 +2686 5 +3807 5 +3392 1 +2021 9 +19 6 +1037 9 +2861 4 +3148 2 +432 4 +1173 8 +3121 5 +2566 6 +1968 10 +2094 2 +604 4 +1384 1 +169 9 +302 7 +254 4 +2904 5 +145 2 +4 2 +995 4 +3739 1 +3665 5 +1737 9 +1416 6 +174 5 +871 4 +667 1 +4011 5 +896 2 +2247 8 +2252 3 +2656 2 +2104 1 +169 8 +327 2 +1791 10 +1561 8 +3759 4 +743 9 +1125 10 +3357 9 +3114 9 +2583 7 +725 10 +1940 7 +3874 8 +1521 2 +1673 9 +753 4 +3418 5 +2984 2 +1502 7 +3818 3 +2957 5 +1209 1 +2764 9 +3916 4 +268 9 +1777 2 +1827 4 +3680 9 +3945 7 +533 5 +2775 8 +2880 9 +2636 8 +2401 3 +2517 2 +3195 9 +2959 8 +355 8 +703 5 +2513 8 +1113 6 +881 3 +505 3 +3941 8 +1304 10 +2610 2 +3170 7 +112 6 +1002 7 +1582 7 +853 8 +135 9 +2418 9 +149 4 +3195 1 +3857 2 +701 10 +3513 4 +3004 6 +3643 4 +3163 5 +1100 2 +810 10 +3498 10 +1793 8 +3248 4 +3043 2 +637 9 +1930 8 +1924 9 +141 1 +880 8 +1345 8 +604 4 +2442 8 +879 6 +2970 8 +3477 4 +269 6 +719 3 +2915 2 +1144 10 +3399 7 +3813 9 +915 3 +2708 9 +1565 1 +3066 7 +2478 4 +3048 7 +1340 4 +2150 2 +1241 6 +1247 2 +3721 2 +2853 8 +613 10 +642 3 +2411 9 +1623 6 +1522 9 +3000 5 +235 2 +98 6 +538 8 +1609 10 +2392 8 +1724 10 +178 9 +1825 10 +787 6 +542 5 +3492 6 +1480 6 +1532 8 +1512 1 +2820 6 +3357 6 +105 7 +2710 5 +3553 4 +925 9 +2745 3 +3180 5 +750 2 +3860 5 +3783 2 +1058 8 +3367 2 +1284 1 +1993 7 +4040 9 +3683 6 +116 5 +1362 1 +2484 6 +199 3 +1447 10 +1710 5 +2240 1 +470 5 +2704 1 +3296 1 +297 2 +1007 2 +1796 3 +842 3 +1976 10 +3880 9 +2491 8 +1334 10 +2149 1 +1534 6 +2323 9 +1435 2 +1619 1 +3436 5 +2073 5 +2741 7 +108 3 +1309 7 +38 3 +3474 10 +495 1 +1232 6 +2524 2 +648 7 +3154 8 +1669 10 +1009 4 +999 10 +2451 10 +2534 2 +216 7 +2487 8 +3495 6 +2558 6 +3902 7 +2454 3 +2625 2 +2715 3 +3779 9 +2179 8 +3318 4 +1567 9 +508 8 +1481 9 +3080 6 +2339 7 +836 5 +22 8 +3879 7 +3326 9 +2984 2 +2428 1 +151 1 +1614 2 +1930 9 +2412 3 +1842 4 +3349 1 +1232 2 +3240 8 +4036 10 +2171 8 +3873 10 +212 9 +2231 10 +468 2 +2121 10 +2691 1 +3477 6 +3542 8 +634 8 +3735 9 +198 9 +2641 1 +128 4 +2774 1 +263 3 +3531 5 +782 4 +2886 3 +1207 4 +2718 3 +394 3 +2200 7 +857 3 +2340 9 +3493 1 +1822 1 +2077 7 +295 4 +2825 6 +505 7 +3461 7 +670 9 +1836 2 +573 10 +182 5 +391 3 +982 2 +2516 2 +2574 5 +1203 4 +3513 6 +3486 3 +2267 4 +3695 9 +2363 1 +2244 8 +3503 7 +3423 3 +3999 3 +2658 7 +3913 6 +2541 3 +3290 5 +1114 6 +3576 4 +3647 4 +1646 3 +2216 2 +2457 9 +3703 5 +2746 5 +3376 3 +659 7 +2114 10 +1343 9 +2086 4 +3319 3 +2971 6 +4005 4 +1375 6 +1170 6 +3319 2 +3937 4 +2050 4 +662 2 +854 2 +3402 4 +451 10 +3349 3 +2126 3 +143 10 +2287 2 +2887 3 +593 1 +1032 1 +1656 2 +594 7 +1989 1 +1128 10 +3319 7 +1998 5 +3071 7 +2069 10 +1554 3 +1792 4 +115 7 +2918 9 +2782 4 +3855 8 +345 7 +2797 2 +2905 4 +3841 5 +3733 2 +255 6 +3498 4 +1095 3 +3065 5 +3957 2 +2924 3 +823 4 +650 10 +2729 2 +3253 3 +1513 9 +2839 6 +1538 6 +3243 7 +1154 3 +801 10 +2688 10 +762 4 +600 7 +2105 7 +2626 6 +128 1 +1377 1 +2296 9 +2118 10 +3178 7 +3396 7 +3852 4 +666 5 +1785 7 +1105 3 +3982 7 +1368 10 +631 8 +1472 5 +1935 9 +754 1 +2291 6 +2324 9 +804 4 +3661 7 +3148 9 +1855 10 +1930 6 +3434 2 +3554 6 +3591 10 +2791 6 +2845 2 +2105 3 +2015 5 +3662 1 +219 4 +116 6 +852 3 +957 7 +2338 7 +3987 5 +2602 6 +3737 4 +3056 4 +2303 5 +3697 10 +2528 6 +2937 3 +3162 9 +1836 3 +3827 8 +1876 8 +3800 7 +1712 5 +1305 5 +3222 1 +209 8 +1320 6 +981 3 +3637 5 +1975 9 +647 1 +792 7 +1507 2 +3234 2 +1938 10 +1483 4 +3101 1 +3970 4 +1582 7 +3444 1 +2949 3 +1013 7 +1190 5 +1148 1 +1817 1 +3502 3 +323 10 +3436 8 +1119 1 +3362 2 +2291 7 +1896 8 +2170 10 +1342 6 +454 6 +2343 9 +963 9 +1075 5 +1703 2 +478 4 +4009 10 +593 5 +2653 6 +2372 7 +3176 7 +1526 3 +4082 3 +2465 6 +748 10 +880 7 +3472 7 +1581 7 +2809 10 +1236 1 +3494 3 +4079 4 +3407 2 +3818 2 +2293 5 +3369 3 +2813 2 +1801 6 +59 10 +198 1 +3992 9 +2334 6 +236 1 +244 6 +3316 5 +2990 6 +3544 9 +479 3 +833 6 +2926 6 +245 5 +2019 4 +2979 7 +2851 5 +1305 10 +53 6 +2415 9 +1931 5 +3764 2 +2032 1 +2663 2 +1748 8 +947 6 +2500 2 +2854 8 +418 4 +3297 3 +513 5 +2257 10 +4082 1 +1 8 +1076 7 +2937 7 +1751 8 +3295 1 +3346 4 +1350 4 +495 10 +2518 9 +398 9 +3429 3 +3256 4 +3573 7 +305 1 +3082 7 +1754 9 +1465 5 +2276 2 +3530 5 +2678 8 +1407 5 +2504 9 +1186 6 +3854 2 +2879 3 +1378 10 +871 1 +2331 8 +3056 3 +2363 9 +1795 5 +189 2 +3143 5 +2159 4 +2537 8 +2757 6 +348 5 +2527 9 +1724 1 +3451 8 +2327 7 +584 6 +342 9 +2580 6 +2925 4 +641 6 +4050 10 +1828 1 +3155 8 +181 8 +178 10 +1668 1 +3853 1 +2350 3 +65 4 +1608 3 +2429 8 +118 10 +1882 3 +3825 1 +855 1 +2590 2 +1762 6 +893 3 +2457 3 +1224 10 +3890 10 +114 1 +1276 5 +2060 8 +3195 4 +3085 1 +2277 7 +861 10 +1500 7 +2524 7 +1289 10 +3868 6 +1205 4 +1915 2 +2565 9 +2526 6 +288 8 +2945 3 +1622 5 +1458 5 +2813 3 +3097 4 +2671 9 +2965 6 +2969 5 +3544 4 +786 4 +2405 5 +1560 1 +1605 2 +1240 7 +3215 10 +237 5 +3595 10 +3233 5 +443 8 +2368 6 +345 8 +2747 6 +1713 3 +680 9 +815 8 +2224 9 +2214 2 +623 10 +2742 6 +1951 5 +2759 7 +130 9 +2820 4 +2879 5 +1896 8 +1725 3 +4080 8 +362 2 +2954 1 +1689 3 +2461 4 +3068 8 +156 7 +237 1 +2958 7 +2694 2 +1914 6 +2391 3 +3577 6 +2343 8 +1681 4 +2299 3 +2720 2 +2622 6 +3710 2 +2106 1 +3216 10 +3716 3 +524 1 +123 6 +328 8 +3297 10 +527 9 +2020 7 +2610 10 +3599 8 +2014 3 +1796 3 +947 9 +1644 8 +3298 1 +203 10 +1728 1 +1879 9 +2273 10 +2632 6 +2498 1 +3808 2 +3092 2 +2795 2 +460 9 +3496 8 +568 10 +2417 7 +230 3 +741 4 +2318 5 +2703 7 +1276 6 +1134 2 +2665 4 +3746 2 +1546 9 +1687 4 +2365 2 +2681 1 +3724 7 +483 5 +3656 3 +3289 8 +1582 4 +478 7 +60 7 +3636 9 +2142 3 +2531 8 +3973 5 +2122 5 +272 5 +1378 5 +634 9 +1973 5 +2293 8 +173 10 +3989 1 +3287 6 +984 6 +686 10 +664 3 +3921 9 +2940 9 +216 1 +902 1 +348 8 +1656 4 +709 2 +1518 8 +1756 1 +3267 6 +1794 7 +3961 7 +1596 5 +1725 9 +2792 5 +10 1 +2822 7 +3586 2 +3986 7 +3343 5 +2071 8 +2378 9 +2608 7 +2873 7 +589 5 +2954 2 +2562 5 +137 8 +619 1 +2262 10 +406 10 +2433 4 +3242 7 +3350 5 +1676 2 +2181 3 +2854 5 +1424 8 +1790 6 +1862 4 +56 1 +1118 9 +417 1 +2873 3 +3482 7 +1108 7 +3103 8 +2080 4 +3055 1 +864 3 +3334 6 +2351 5 +1335 9 +2175 5 +1751 1 +864 10 +1238 10 +3039 6 +3767 5 +1334 9 +3747 5 +271 1 +3364 3 +3302 8 +2454 5 +3737 1 +3664 2 +1568 2 +3853 6 +3464 1 +3464 2 +781 8 +1655 9 +293 5 +2728 6 +2496 3 +3812 2 +158 3 +200 5 +1915 7 +3365 8 +1803 10 +2644 9 +585 5 +19 6 +1802 5 +3980 8 +3278 2 +2766 1 +3032 7 +281 10 +1232 4 +965 1 +1054 8 +312 7 +2148 10 +2197 2 +3863 1 +4036 4 +1551 3 +2651 4 +1281 6 +4052 8 +2956 2 +156 7 +3504 4 +2777 3 +1258 9 +2271 8 +2162 7 +3594 2 +1735 8 +4085 7 +2516 7 +1228 8 +3534 6 +1860 9 +2620 9 +3304 2 +2466 8 +976 10 +969 4 +131 7 +1138 4 +2071 3 +3482 4 +567 3 +1497 5 +1373 10 +3594 7 +2551 9 +1982 7 +674 2 +1054 2 +1821 2 +1390 8 +2456 4 +3786 5 +2738 6 +3436 10 +2349 5 +382 5 +3676 6 +3791 4 +3447 5 +4019 7 +3866 10 +350 10 +3081 7 +3204 10 +3545 9 +332 7 +379 6 +1295 4 +1439 8 +1693 3 +3008 2 +1867 5 +2420 1 +470 8 +1832 3 +619 4 +1928 4 +3900 7 +1544 10 +1451 3 +3721 7 +1719 3 +112 8 +601 1 +3971 7 +2247 8 +1774 9 +2851 7 +3945 6 +3478 2 +2522 7 +3630 9 +303 4 +2171 1 +2024 2 +807 6 +474 4 +990 3 +85 10 +3668 4 +3823 9 +2731 9 +2748 7 +2283 10 +903 8 +1807 10 +1521 10 +3026 6 +2902 10 +219 10 +461 4 +166 8 +1065 4 +2325 3 +2922 3 +2572 7 +3034 4 +3694 10 +3552 7 +3554 4 +3189 10 +1805 2 +3953 9 +4033 7 +2154 6 +772 9 +7 1 +2616 10 +1200 9 +1237 1 +388 7 +2052 2 +2777 9 +3131 9 +97 1 +1592 2 +1940 1 +479 1 +2770 3 +970 3 +1553 7 +1531 3 +855 4 +2157 2 +3786 2 +3221 7 +2133 8 +1558 4 +2759 6 +2627 10 +603 1 +3477 5 +1714 2 +1945 5 +1936 10 +471 7 +363 9 +1169 7 +1871 5 +2078 3 +1201 3 +1098 7 +2291 4 +604 4 +3558 8 +472 8 +3770 3 +3595 5 +2432 6 +2848 2 +2941 2 +1473 2 +1149 5 +3522 8 +3365 9 +1269 10 +556 3 +2778 9 +955 3 +376 7 +160 1 +2626 2 +4069 7 +196 10 +805 1 +2185 5 +3577 8 +737 4 +230 2 +3555 4 +3601 3 +356 4 +952 2 +417 8 +838 3 +65 3 +3658 8 +3607 4 +3113 6 +984 1 +1346 10 +4080 7 +343 2 +838 6 +554 3 +2613 7 +2947 2 +3981 8 +2537 10 +2894 1 +3578 9 +3568 1 +2281 8 +3941 8 +1258 6 +1634 5 +3416 3 +2580 2 +4076 3 +3048 8 +1268 1 +236 4 +3117 9 +1713 1 +1325 5 +3635 1 +1436 8 +2985 10 +862 6 +2911 6 +1297 10 +2873 1 +2195 6 +1067 3 +2452 8 +2752 3 +198 9 +835 4 +311 1 +592 8 +3676 3 +1032 9 +1838 10 +1533 7 +2586 8 +2980 1 +2646 2 +4033 3 +4062 9 +2260 1 +964 6 +1067 5 +1824 5 +1485 9 +1171 10 +4033 3 +695 6 +2703 10 +4010 9 +3927 5 +2241 9 +1109 10 +3056 10 +3626 10 +61 9 +1710 10 +2030 7 +3077 3 +3519 7 +963 6 +2565 1 +1213 1 +1956 7 +3302 2 +2640 1 +734 4 +278 9 +1605 3 +3712 9 +79 10 +2378 4 +3653 1 +3507 6 +2289 1 +3629 6 +3080 8 +1135 5 +2556 9 +3448 3 +3102 2 +2958 1 +2878 9 +1598 4 +844 7 +3508 4 +2452 7 +305 5 +249 3 +337 8 +1641 6 +1915 9 +2099 8 +1124 5 +508 6 +3461 4 +2096 10 +607 7 +79 10 +1347 8 +2840 5 +2491 6 +2309 9 +3572 3 +3204 7 +1094 9 +2553 1 +2535 6 +2120 6 +2207 1 +1486 10 +1682 2 +2187 5 +3376 5 +1829 5 +1204 2 +4088 10 +3167 7 +2291 4 +921 3 +1800 10 +2773 4 +3553 8 +536 6 +1550 7 +1631 10 +3619 1 +809 3 +2196 6 +2749 3 +940 2 +582 8 +3589 1 +695 8 +3115 3 +2531 8 +1852 9 +2842 7 +295 6 +3658 5 +1991 1 +1042 9 +2772 1 +2378 2 +2002 9 +825 6 +2908 2 +3467 3 +410 6 +3261 7 +638 1 +4001 5 +316 4 +712 4 +3943 5 +1604 1 +2972 1 +385 4 +1485 1 +174 1 +3712 8 +2121 1 +2263 2 +3527 6 +790 2 +3648 5 +1447 4 +1069 1 +472 4 +966 9 +3321 4 +2305 8 +313 1 +3054 8 +2207 10 +623 3 +2843 1 +2223 3 +1297 5 +392 1 +2024 4 +760 3 +479 4 +2098 8 +3766 1 +3740 1 +793 10 +875 5 +734 3 +2361 9 +1495 1 +2583 9 +263 3 +3311 5 +3924 4 +767 2 +1096 8 +3657 5 +1454 5 +1506 1 +480 6 +908 10 +1903 4 +70 6 +1783 3 +3006 2 +2745 1 +2778 2 +2075 1 +2682 5 +3534 5 +1141 1 +3527 8 +818 1 +3067 9 +3208 2 +3677 4 +2850 8 +3719 7 +449 5 +3184 7 +1759 3 +3547 9 +1083 5 +3088 10 +2089 10 +1204 4 +1215 6 +700 3 +2188 3 +3500 4 +3283 2 +888 8 +971 3 +2164 10 +1459 4 +2657 10 +1880 5 +72 5 +3540 6 +2516 7 +3183 2 +3925 4 +187 10 +1757 4 +496 5 +1044 7 +1674 8 +1910 5 +898 10 +436 3 +2711 10 +3553 7 +2242 7 +4093 5 +314 7 +1779 1 +717 6 +2834 4 +53 5 +3642 9 +814 3 +2008 5 +3764 8 +1903 8 +3104 8 +2883 3 +1923 2 +668 3 +3264 7 +2084 9 +400 7 +37 5 +1332 8 +2382 1 +368 1 +2821 6 +308 10 +2080 4 +549 10 +3131 9 +3545 7 +809 1 +1002 6 +3954 2 +1143 1 +2762 10 +1695 1 +2516 6 +3886 2 +2544 4 +3984 3 +3258 10 +1750 6 +3175 7 +1876 9 +1631 9 +2125 6 +1821 10 +1693 3 +2199 8 +1857 8 +3561 9 +4041 6 +275 7 +3431 9 +1890 4 +3510 9 +1703 2 +2084 6 +1740 3 +584 2 +2044 6 +3370 8 +2047 4 +796 1 +3790 2 +2454 1 +751 1 +2693 9 +2581 9 +1504 9 +1132 3 +3271 8 +958 2 +1435 6 +3812 1 +2015 2 +457 3 +794 8 +3842 10 +3216 3 +2042 9 +1434 6 +1239 2 +2127 6 +1875 3 +944 6 +3891 4 +1378 8 +3079 4 +18 2 +3976 3 +1541 5 +3214 5 +3051 7 +3073 2 +1602 2 +3425 4 +1351 8 +1690 2 +1897 3 +1664 9 +3108 10 +3148 2 +1947 9 +1882 8 +2122 10 +637 4 +2600 9 +33 10 +740 6 +4052 2 +3853 2 +2945 10 +3184 10 +1138 7 +891 6 +4046 10 +1143 9 +2222 1 +3773 4 +1202 5 +2821 10 +2819 5 +3248 10 +1468 1 +1003 9 +2874 4 +2326 3 +3856 4 +2754 9 +1046 10 +158 8 +987 6 +498 10 +1450 9 +2469 6 +893 2 +242 5 +965 8 +1404 4 +1237 10 +732 5 +1851 10 +2109 7 +59 9 +188 7 +2796 6 +1013 1 +487 3 +2324 5 +3743 8 +892 7 +4064 7 +4045 3 +3782 10 +1446 9 +2252 3 +3909 1 +2342 5 +3848 4 +2927 4 +1566 9 +2926 10 +1353 3 +2182 6 +3307 1 +3550 9 +2691 10 +2161 5 +18 8 +846 10 +3044 2 +3781 10 +3874 5 +1806 3 +3004 7 +3706 4 +1410 5 +385 3 +2192 3 +2394 5 +1136 4 +3317 4 +2178 10 +4041 5 +2993 8 +4040 9 +1019 9 +2970 6 +562 1 +32 5 +2279 10 +526 1 +2837 1 +2567 2 +3052 6 +1494 9 +4057 7 +746 8 +794 6 +2297 8 +1915 3 +2059 2 +765 3 +1307 5 +1127 1 +152 6 +2790 6 +3288 4 +666 6 +1417 4 +4066 10 +435 7 +815 8 +3398 5 +242 7 +220 7 +1099 3 +3662 1 +4005 5 +797 10 +1097 1 +2316 1 +491 5 +3261 6 +2273 7 +2782 9 +1929 3 +1046 9 +330 2 +4046 1 +3587 4 +3946 4 +3234 6 +138 1 +3011 3 +1700 6 +2820 6 +2043 1 +3290 6 +34 7 +1907 10 +1689 5 +2015 3 +3168 9 +1296 5 +485 5 +2642 9 +912 3 +3574 5 +2187 6 +294 8 +1082 5 +2047 2 +2364 8 +3798 2 +2315 10 +636 4 +3260 7 +3611 1 +83 6 +2147 6 +1444 1 +3128 4 +2620 8 +1805 8 +432 5 +1134 3 +3839 6 +3958 6 +859 1 +3553 10 +1860 10 +266 3 +3831 9 +489 8 +3482 5 +1726 5 +2778 10 +3276 1 +588 4 +1106 6 +3010 10 +1904 10 +2911 1 +1270 5 +1933 7 +1668 3 +2371 2 +1368 2 +1935 2 +754 6 +948 7 +4086 9 +1736 6 +2621 6 +3620 5 +3147 9 +2652 7 +3169 6 +1000 8 +3131 10 +3956 10 +3640 2 +1964 8 +2045 5 +3052 7 +360 5 +420 7 +3965 4 +2531 9 +1693 4 +1793 6 +3131 4 +3668 8 +3973 2 +1992 1 +858 6 +2315 3 +2248 4 +3981 5 +212 7 +2446 6 +1943 2 +2335 3 +1932 6 +2896 9 +360 7 +4019 6 +3330 10 +1234 6 +1792 7 +3785 5 +735 10 +343 9 +1244 9 +32 8 +2145 9 +2048 3 +2638 6 +2376 3 +1678 6 +1517 1 +3968 5 +1278 4 +2850 8 +384 5 +3305 4 +1696 8 +231 9 +3208 9 +2345 9 +3380 8 +105 8 +2586 4 +909 7 +762 1 +2857 6 +3035 6 +1202 7 +8 8 +1402 1 +1382 9 +3977 4 +860 1 +1392 10 +2480 10 +1663 2 +218 2 +2324 8 +3023 2 +3539 5 +1883 10 +3576 9 +258 10 +793 9 +2534 4 +967 10 +2851 4 +732 3 +3340 10 +139 7 +1777 4 +4067 4 +1892 4 +1651 6 +1054 9 +1563 5 +349 3 +1987 6 +4087 7 +1945 5 +1990 4 +1095 10 +2507 2 +2146 6 +2975 4 +2503 9 +4011 2 +2523 1 +3597 6 +2361 6 +2883 7 +4058 5 +2580 5 +672 5 +2903 3 +3705 7 +3364 7 +498 3 +3776 6 +1210 9 +260 6 +48 10 +1825 2 +3355 6 +2966 10 +958 10 +2739 3 +571 8 +2246 5 +1647 8 +107 4 +2268 7 +3306 7 +2320 3 +3845 7 +4052 7 +3121 5 +3152 8 +1457 9 +1899 4 +2679 3 +2272 4 +761 1 +1511 9 +3331 9 +2836 3 +1161 8 +1409 1 +151 2 +4039 5 +2306 10 +3518 1 +2878 1 +3216 1 +2136 3 +3066 1 +2002 8 +1853 7 +2803 8 +3575 3 +2766 10 +140 2 +2380 7 +1638 7 +954 7 +1200 8 +2932 2 +1346 5 +1628 9 +1527 2 +2214 6 +0 10 +3101 6 +3820 1 +2960 8 +3712 2 +3644 2 +186 2 +4003 2 +1005 7 +1048 10 +47 3 +1204 5 +1305 7 +311 7 +3553 5 +2177 9 +2134 4 +2156 6 +3213 6 +1712 3 +4077 4 +1002 5 +3338 9 +3790 3 +210 3 +1744 8 +2771 6 +3089 9 +2018 6 +3079 2 +539 6 +62 1 +287 7 +1220 7 +2632 9 +806 2 +2889 10 +2385 10 +1006 8 +1598 7 +672 10 +654 10 +2968 5 +2954 3 +2647 4 +1433 9 +869 9 +2516 9 +2641 4 +1410 1 +2263 4 +1278 2 +3487 1 +4044 8 +3472 8 +3228 4 +2269 6 +4083 10 +3930 9 +1976 1 +1729 3 +2474 1 +1162 6 +3393 2 +3206 10 +3661 6 +370 7 +1080 3 +169 1 +981 9 +2977 7 +1833 2 +3547 4 +1495 9 +1016 8 +2064 7 +2971 6 +3397 8 +348 8 +627 5 +3026 5 +3692 6 +3596 3 +1235 1 +651 2 +2084 7 +2432 5 +136 4 +4040 8 +820 8 +1265 9 +3425 3 +328 2 +340 1 +3161 7 +3849 5 +3448 2 +3869 8 +2734 1 +1776 7 +1113 2 +3366 9 +2128 1 +2368 5 +1645 5 +468 2 +458 1 +214 4 +1181 2 +3903 10 +343 5 +1483 1 +2450 10 +3092 5 +221 10 +3226 7 +4064 10 +3592 8 +1327 8 +758 6 +2094 7 +1110 5 +2272 8 +722 5 +3483 9 +384 6 +395 5 +1219 2 +2729 6 +2917 7 +2913 6 +2956 10 +1940 1 +4057 2 +1357 10 +712 6 +2062 4 +1233 9 +3567 1 +81 6 +346 5 +3885 8 +3340 7 +4041 2 +2606 5 +3324 2 +171 3 +3975 1 +816 6 +1556 9 +1761 3 +1811 7 +4042 8 +3559 5 +3349 5 +2184 10 +1882 1 +2481 6 +148 5 +367 2 +34 4 +813 5 +1284 3 +668 10 +3340 2 +2051 7 +1805 2 +2500 8 +3417 4 +1497 2 +2223 8 +1964 1 +3321 3 +1006 9 +1753 4 +2029 9 +3651 1 +746 8 +2755 6 +119 4 +2076 5 +1177 4 +2112 5 +2475 9 +933 6 +2400 8 +1364 3 +1998 1 +412 4 +2651 8 +2481 7 +772 4 +557 2 +3258 9 +531 1 +3685 9 +793 8 +1235 8 +3974 10 +987 2 +3499 7 +625 3 +2313 6 +3913 2 +2427 5 +3794 2 +1380 7 +2446 5 +3385 9 +133 2 +24 4 +1239 6 +1955 2 +1911 1 +150 3 +4015 2 +3292 6 +1926 3 +243 3 +3738 6 +3500 4 +687 9 +1642 9 +767 5 +1266 6 +3112 6 +3385 10 +3271 1 +3338 1 +2876 5 +4054 10 +2204 3 +1925 8 +3738 8 +192 1 +1907 5 +851 8 +3311 6 +107 5 +3225 10 +3890 5 +363 3 +2629 9 +2460 3 +399 5 +3622 5 +3672 7 +620 3 +1437 5 +3439 8 +2697 3 +3867 4 +995 1 +2512 9 +1818 1 +2488 10 +705 8 +2226 3 +334 4 +2080 3 +3440 3 +874 8 +1353 10 +2539 9 +3699 8 +627 6 +2928 5 +2244 1 +3730 9 +135 2 +3463 4 +2835 5 +1197 6 +3428 8 +1321 9 +718 6 +3813 10 +3435 4 +2379 7 +3080 2 +3083 6 +3480 1 +1848 10 +1903 7 +2182 7 +2115 7 +643 1 +2700 6 +3730 9 +2113 3 +511 3 +2279 1 +3577 6 +1012 9 +444 1 +1395 7 +232 6 +553 8 +3936 6 +3674 10 +779 7 +566 1 +1341 3 +1673 8 +1165 4 +2998 10 +658 10 +2941 6 +3713 10 +250 10 +3088 8 +1136 1 +1677 9 +2568 4 +825 6 +1363 7 +3803 10 +2531 4 +3493 6 +1263 3 +2768 1 +3134 6 +3503 5 +2271 4 +909 8 +2723 7 +3863 10 +850 1 +3385 2 +3789 3 +115 9 +3542 4 +1523 9 +2715 5 +1936 4 +541 10 +1673 1 +1365 4 +3649 5 +862 4 +1903 1 +3088 2 +2062 8 +2391 5 +2111 5 +2398 3 +677 3 +2665 1 +2741 9 +1309 1 +1217 8 +1124 3 +2501 2 +3134 3 +2086 4 +2115 3 +2170 5 +3180 6 +1963 8 +2031 3 +1489 5 +2129 2 +3046 7 +1148 10 +1152 3 +1231 1 +478 9 +904 10 +760 6 +1973 10 +271 1 +1450 9 +1904 2 +4028 3 +3952 4 +4031 2 +998 6 +3397 6 +1798 2 +1243 9 +669 3 +1103 8 +2561 9 +1336 2 +1898 10 +3757 6 +71 8 +2191 3 +955 1 +1181 10 +1097 2 +607 6 +3789 8 +2397 3 +3731 7 +590 10 +3673 1 +3001 2 +3464 6 +2933 5 +1798 7 +864 6 +3376 7 +2628 9 +2012 8 +1778 9 +4004 10 +2607 1 +2224 8 +3822 6 +1640 6 +962 1 +1156 10 +2197 2 +2335 6 +3502 3 +3850 1 +94 4 +2836 1 +3545 2 +3568 2 +147 5 +3812 9 +2883 2 +158 3 +764 8 +382 2 +3227 10 +1902 9 +693 1 +2808 6 +2778 3 +3224 7 +748 7 +3291 10 +1098 10 +202 10 +3440 3 +1715 5 +1676 5 +544 2 +2446 2 +2419 4 +2003 10 +345 5 +2569 8 +3645 9 +3442 3 +3336 5 +2466 8 +3894 9 +618 6 +2501 5 +1284 7 +2334 9 +3551 4 +222 4 +1225 7 +3703 3 +169 1 +1279 7 +1323 4 +3785 2 +1942 3 +2301 10 +1616 8 +2266 8 +3885 2 +1626 1 +552 7 +1040 9 +3796 1 +1145 2 +3568 3 +2973 1 +2361 4 +1690 5 +3478 9 +2362 1 +2586 7 +2335 6 +552 5 +1042 7 +998 7 +2295 4 +3080 3 +3340 7 +539 10 +445 7 +2453 3 +3289 10 +2697 10 +1077 5 +452 3 +3538 3 +2971 7 +2351 8 +648 4 +2591 9 +1177 6 +45 7 +120 3 +662 2 +744 1 +2748 7 +2016 5 +3566 4 +3063 2 +935 3 +2375 8 +3382 9 +3709 3 +3150 1 +2717 7 +667 5 +1362 2 +3286 6 +2738 5 +298 4 +324 8 +1649 10 +2800 8 +1823 6 +206 3 +2642 1 +710 10 +2488 5 +2058 8 +2183 5 +3690 1 +2807 3 +3797 4 +3972 10 +1086 5 +2752 2 +1000 7 +2083 8 +2655 2 +1328 5 +251 9 +582 5 +216 6 +2669 6 +1021 7 +1870 5 +2365 7 +1388 4 +236 2 +146 2 +3013 10 +1503 7 +3728 8 +1029 1 +3445 3 +3721 3 +629 10 +2488 5 +2878 10 +322 1 +845 8 +915 6 +3599 10 +315 4 +346 5 +3467 2 +1438 2 +3752 6 +2755 4 +2422 1 +3026 4 +170 4 +1402 1 +2791 8 +143 3 +364 9 +2751 1 +3433 8 +1617 10 +2479 1 +1790 4 +1386 3 +496 6 +2842 9 +381 9 +1309 2 +2860 6 +3872 4 +3481 3 +4042 1 +2633 2 +568 7 +3264 1 +1935 5 +1879 5 +3712 8 +3549 7 +1303 3 +3758 7 +557 8 +528 1 +2361 4 +3533 7 +1118 2 +1233 10 +1692 10 +565 10 +112 9 +2924 4 +306 9 +1062 2 +771 2 +422 4 +3627 6 +3759 7 +98 2 +3618 1 +2167 1 +3920 2 +3831 10 +3358 2 +285 8 +663 7 +2211 6 +1940 10 +2724 10 +2462 5 +3231 7 +4059 1 +655 4 +3209 4 +1967 2 +16 2 +2907 6 +1247 2 +423 9 +2550 8 +2504 8 +3717 6 +638 10 +3612 9 +251 8 +1957 2 +2920 8 +1126 2 +4066 6 +3226 9 +367 2 +121 9 +1582 8 +1083 2 +523 9 +2216 7 +365 2 +1006 3 +200 7 +2057 6 +2091 1 +1604 10 +468 9 +1648 10 +1240 1 +2192 8 +2788 9 +309 3 +2429 1 +943 6 +2749 7 +2008 7 +3065 3 +3963 9 +3473 2 +1899 4 +282 4 +621 1 +1027 6 +4082 2 +336 3 +3997 10 +337 10 +1187 2 +2267 1 +3160 9 +1307 5 +1026 7 +1905 10 +1233 6 +3477 7 +623 9 +1811 4 +2416 9 +749 8 +2941 7 +4067 2 +2988 9 +2802 9 +3350 10 +2006 9 +1948 8 +2569 9 +1043 10 +227 4 +2570 4 +208 9 +504 4 +2605 6 +1583 1 +2863 7 +2535 2 +1898 5 +2526 4 +1958 3 +750 10 +1144 4 +3770 10 +2773 1 +579 1 +298 9 +2876 5 +124 8 +3938 6 +2761 6 +1497 9 +2385 3 +28 5 +1902 1 +2215 1 +2232 4 +691 4 +3335 3 +1653 9 +2574 5 +905 9 +2089 1 +4054 2 +322 4 +1428 9 +3986 7 +3064 1 +1395 10 +199 1 +1969 8 +647 6 +2922 2 +3846 6 +3710 1 +2717 7 +872 6 +3434 9 +2872 5 +3901 5 +3798 1 +3308 2 +1375 5 +2324 5 +3747 1 +1766 10 +4054 1 +3359 8 +3596 1 +598 5 +1763 5 +834 2 +2993 6 +2178 8 +1166 5 +1497 7 +3001 2 +3940 1 +3314 7 +2921 9 +3621 2 +322 10 +3712 10 +1826 9 +2031 3 +300 2 +1676 9 +2713 10 +3797 4 +3538 5 +1714 8 +1573 6 +461 4 +2638 8 +3952 8 +2699 3 +782 4 +2420 9 +1389 6 +3213 9 +2469 8 +268 5 +1800 3 +3283 9 +2168 6 +2790 1 +2303 5 +1537 9 +2811 9 +2176 1 +4047 4 +4057 8 +2859 4 +715 7 +3273 8 +522 7 +2281 3 +3620 4 +1318 8 +2615 8 +247 7 +3388 4 +2357 9 +1736 4 +2903 10 +3366 2 +530 5 +4067 7 +1515 5 +1257 3 +284 9 +2575 8 +810 6 +1111 1 +912 3 +2310 5 +1689 6 +605 1 +1094 3 +1493 8 +1956 1 +2774 2 +1818 4 +717 8 +3409 7 +3451 6 +2795 10 +2000 9 +867 10 +1618 10 +3671 8 +2327 7 +3069 3 +3664 7 +3641 8 +1703 4 +1593 10 +2346 4 +2062 2 +2366 7 +2835 9 +3325 5 +1489 6 +3933 7 +622 6 +195 4 +2799 2 +691 2 +426 1 +1178 8 +2160 1 +3000 9 +3391 6 +1186 9 +3507 10 +2895 10 +1630 9 +3024 3 +2015 9 +2312 5 +252 4 +1032 10 +386 8 +1337 9 +4041 8 +67 8 +4058 2 +2072 8 +1684 8 +1896 6 +1753 4 +398 7 +749 1 +729 7 +2602 10 +2766 8 +2777 5 +717 7 +1261 2 +1327 4 +806 9 +2775 6 +1071 7 +669 4 +547 7 +2400 6 +3094 3 +3333 5 +1094 9 +2456 5 +2750 2 +3026 8 +2710 9 +3808 8 +1996 10 +3515 3 +3116 4 +600 6 +1129 10 +2806 7 +1133 4 +3239 4 +3498 8 +3927 3 +3119 6 +645 10 +3976 7 +3000 7 +1941 8 +2398 2 +804 3 +2801 9 +131 5 +3908 1 +3488 6 +2652 9 +514 6 +3429 8 +1486 2 +2305 2 +3119 6 +3841 8 +400 6 +3821 10 +1439 9 +3818 5 +3814 6 +3004 2 +2864 7 +2671 5 +2987 4 +3497 1 +2841 10 +3223 10 +2353 7 +2602 2 +2515 10 +2764 6 +3647 6 +301 8 +3496 1 +2796 1 +507 7 +3450 4 +1967 3 +1302 1 +1883 7 +1472 3 +764 7 +1242 10 +3043 2 +2329 3 +313 6 +2454 1 +595 10 +2469 7 +2829 1 +672 4 +2318 10 +3829 3 +306 9 +2391 6 +186 8 +922 9 +498 10 +2596 4 +4041 7 +3766 3 +2092 1 +1106 5 +1029 1 +760 9 +629 7 +2972 7 +49 10 +1723 1 +1100 10 +1552 8 +2948 7 +3257 6 +1219 9 +1558 1 +2476 5 +1419 8 +3284 8 +3402 6 +872 2 +905 9 +1830 6 +3549 6 +430 8 +2495 5 +1579 5 +2147 6 +3292 4 +1639 9 +1331 2 +2285 3 +1700 6 +3407 4 +1553 9 +667 4 +3829 7 +1023 8 +999 3 +2571 8 +1483 6 +4059 9 +2 2 +3736 4 +3863 6 +1784 10 +3006 6 +1101 9 +1805 7 +141 2 +4044 6 +646 6 +1909 1 +463 8 +4083 8 +3321 2 +1316 4 +2416 4 +768 10 +2575 9 +0 2 +946 3 +2547 9 +716 8 +876 2 +567 4 +429 3 +3650 3 +1392 2 +222 10 +3304 3 +1999 8 +3132 5 +2022 5 +762 9 +520 3 +218 10 +3536 7 +1025 7 +3440 10 +1655 8 +2431 4 +1081 8 +2069 3 +617 3 +2451 6 +3468 4 +2915 8 +509 6 +3601 1 +3734 1 +1848 10 +3266 5 +1321 4 +3339 1 +3907 3 +605 4 +2670 5 +3700 5 +1465 5 +230 9 +1647 6 +1121 8 +1702 10 +1313 3 +3437 7 +687 2 +394 4 +3413 7 +3785 1 +3701 7 +2420 1 +1439 2 +3617 1 +2377 7 +828 10 +1584 5 +2105 10 +613 4 +1703 9 +1085 2 +3265 7 +2187 10 +65 1 +478 9 +1802 2 +548 9 +173 9 +1609 10 +2362 1 +2078 8 +3227 8 +1351 3 +1476 4 +4030 3 +77 8 +1429 3 +2230 1 +2267 4 +3761 7 +2482 3 +3695 2 +2715 10 +1950 8 +3214 3 +191 2 +1426 1 +4025 9 +2288 1 +651 1 +3778 8 +3558 2 +3037 4 +2204 6 +1067 3 +3070 9 +1484 8 +3005 3 +1059 1 +3446 3 +4014 4 +3870 8 +547 8 +2775 6 +3845 8 +1804 4 +2908 1 +218 5 +3093 3 +89 7 +3684 6 +3658 9 +833 7 +1967 6 +161 4 +670 4 +2866 3 +117 8 +3446 3 +2549 1 +1795 3 +2873 8 +1846 1 +751 8 +701 4 +1463 6 +3840 2 +877 4 +1676 6 +1189 4 +2423 10 +2994 3 +227 9 +1188 5 +3373 3 +513 8 +1689 10 +1156 6 +2272 9 +785 10 +2816 1 +25 3 +3238 8 +2060 2 +2353 2 +1282 2 +2330 5 +2565 4 +124 4 +1431 2 +1046 2 +20 3 +1129 3 +3634 7 +1691 9 +2914 1 +1649 4 +2172 2 +237 7 +683 3 +491 4 +334 8 +2083 10 +3861 6 +2302 2 +3605 8 +4050 2 +2811 1 +445 5 +1032 8 +2550 3 +3586 7 +291 7 +333 3 +2188 10 +593 7 +3659 7 +1753 4 +1055 2 +2025 4 +42 1 +3533 3 +778 10 +3235 8 +3881 5 +167 2 +2373 7 +4031 6 +1238 5 +1384 10 +146 5 +2762 5 +95 6 +2201 3 +2946 7 +1187 7 +3056 5 +2049 6 +1761 4 +511 8 +1501 3 +2194 4 +514 2 +1275 5 +2585 9 +1824 4 +2886 6 +1378 1 +1310 3 +3751 8 +1893 6 +2449 5 +1366 2 +1640 8 +1890 2 +3838 9 +3109 8 +311 9 +2731 4 +3516 4 +4013 3 +2313 10 +2471 7 +3221 3 +3547 7 +1578 5 +2093 8 +3201 7 +3212 2 +406 5 +442 5 +2052 2 +3781 7 +3699 5 +571 6 +2319 7 +252 1 +2511 8 +2334 8 +3676 10 +3033 5 +462 7 +3261 4 +116 6 +3862 4 +1353 3 +138 4 +2869 9 +3701 5 +1123 1 +2054 4 +1928 6 +2355 5 +614 1 +2389 7 +2568 7 +3382 10 +967 4 +1844 6 +2337 4 +370 5 +749 3 +3739 3 +2660 9 +330 5 +3931 9 +2422 6 +47 5 +1672 1 +532 5 +2381 1 +153 8 +1234 10 +611 8 +1299 1 +3473 1 +3457 3 +1313 3 +557 8 +1826 8 +1328 9 +2872 10 +724 6 +3361 4 +1470 5 +2960 5 +2399 2 +3695 3 +2674 6 +2528 1 +1879 5 +3290 5 +722 4 +458 8 +3622 6 +2228 6 +2952 9 +259 9 +4081 10 +806 9 +3096 5 +1874 1 +2058 3 +2194 2 +1318 1 +3759 10 +3080 10 +1509 3 +1823 2 +2253 1 +4087 4 +3684 5 +1961 7 +965 9 +605 5 +3052 10 +274 2 +3743 9 +3707 5 +2463 6 +2156 10 +3623 5 +1155 10 +3838 2 +4078 1 +2192 2 +3102 3 +1773 1 +3948 9 +2377 8 +2888 5 +2136 3 +2060 5 +896 8 +3079 9 +1040 2 +1130 7 +3937 6 +3076 6 +3555 3 +1160 10 +502 10 +1344 8 +37 3 +1474 6 +2152 6 +3943 6 +2839 6 +3575 5 +841 1 +1645 4 +403 3 +3421 2 +2622 3 +2038 7 +1854 8 +1215 9 +2510 3 +3126 5 +2836 2 +205 10 +2493 5 +2828 7 +2832 1 +1147 7 +2746 2 +3423 1 +996 5 +3615 8 +2340 4 +3044 2 +2626 6 +1859 3 +2203 9 +2429 1 +3878 1 +1973 3 +3902 1 +1947 6 +1431 3 +954 9 +2126 9 +1750 5 +3783 7 +609 4 +3544 9 +3000 2 +3231 9 +230 5 +1005 4 +2676 3 +1779 8 +126 7 +3815 9 +1502 8 +3379 7 +239 10 +1746 8 +3556 1 +585 8 +128 4 +2657 8 +3755 6 +792 1 +3560 10 +1089 6 +1759 1 +2366 10 +3763 9 +3904 4 +3946 4 +2756 6 +1744 8 +1094 4 +2773 9 +2866 10 +473 2 +3495 1 +2644 6 +2988 4 +580 6 +3062 9 +1291 8 +3403 1 +2381 8 +3605 4 +2384 4 +2624 6 +2276 5 +3504 6 +1794 3 +984 10 +2298 4 +1741 5 +3294 1 +1427 6 +550 5 +1140 3 +3464 5 +3081 8 +2807 3 +2306 1 +1334 1 +2968 4 +300 7 +3997 5 +3240 9 +1294 8 +3015 7 +3973 6 +3172 7 +2599 10 +4076 10 +925 8 +4002 8 +1115 2 +2096 6 +2261 3 +1707 4 +496 6 +2034 5 +728 1 +1528 4 +1093 1 +1655 4 +2484 7 +2747 7 +1296 9 +3705 8 +2130 5 +2688 6 +3843 1 +3428 6 +563 9 +1196 2 +2313 8 +389 10 +2293 2 +2089 9 +1327 1 +2247 1 +1018 7 +422 3 +2384 2 +529 3 +805 9 +1418 4 +2608 5 +2303 1 +3074 1 +2861 6 +2880 3 +1415 3 +1745 1 +3101 2 +3574 1 +2530 7 +3120 1 +2466 2 +3287 6 +1071 7 +642 1 +50 1 +2096 3 +1810 4 +3897 6 +1711 4 +2236 10 +3087 1 +1523 3 +428 6 +3090 2 +752 5 +1303 6 +791 2 +3772 5 +3060 3 +276 2 +3836 6 +1636 5 +3260 9 +298 9 +761 7 +3539 10 +3033 2 +2710 5 +548 10 +2236 10 +752 9 +3956 3 +3436 4 +1190 1 +2438 8 +1635 10 +2186 5 +2279 7 +2011 2 +3246 9 +166 8 +3613 5 +2767 3 +3310 10 +3182 5 +761 6 +81 3 +1125 9 +2079 9 +2713 6 +2949 8 +1109 6 +1802 6 +3473 5 +3316 7 +1995 1 +2101 1 +3781 8 +375 6 +3845 4 +905 6 +2920 1 +2864 10 +2161 3 +2636 5 +3050 5 +1001 5 +577 1 +455 9 +279 5 +964 4 +3290 1 +3165 6 +3941 7 +663 9 +878 4 +3683 2 +1732 1 +2821 3 +626 4 +955 3 +3228 9 +1125 2 +176 8 +3467 1 +2231 1 +493 1 +1354 9 +3457 7 +489 5 +2915 6 +169 7 +2606 2 +3155 7 +1887 7 +805 8 +1201 1 +2784 7 +1515 7 +3404 2 +3131 5 +688 4 +2514 1 +1177 5 +1221 2 +1488 4 +3282 9 +3540 9 +615 6 +1572 9 +2183 8 +1206 5 +2648 5 +129 4 +73 7 +834 6 +1421 1 +3000 3 +1743 8 +3202 7 +3561 10 +254 3 +2436 2 +633 4 +2914 4 +3341 3 +2957 9 +2326 8 +3617 2 +3928 7 +2087 6 +1948 4 +3483 7 +3571 2 +445 10 +3758 6 +2060 8 +1411 3 +3633 10 +2902 3 +2883 1 +2072 9 +122 3 +3060 1 +3294 1 +1679 10 +2728 1 +2040 6 +662 10 +180 10 +1269 7 +1840 8 +2469 10 +3559 5 +2778 6 +2144 10 +2363 3 +2205 4 +2284 7 +400 8 +1167 3 +2692 8 +3226 8 +1845 4 +2370 7 +202 1 +2413 6 +32 10 +878 5 +946 5 +3493 6 +1605 4 +1332 6 +941 6 +3075 6 +2886 2 +917 8 +3930 4 +3052 9 +2986 7 +3234 3 +1216 10 +2660 2 +1263 4 +4093 10 +4015 9 +1480 7 +1227 8 +518 7 +1476 6 +2073 9 +77 6 +1061 10 +3768 1 +1034 1 +3905 3 +1328 7 +2601 8 +970 9 +2644 1 +2034 10 +720 8 +1749 3 +1298 5 +2304 10 +377 5 +3482 1 +2233 7 +3569 10 +605 8 +2151 10 +3546 4 +1699 3 +3277 1 +2573 2 +1318 3 +1096 3 +669 3 +1930 10 +620 10 +3123 4 +870 10 +1238 3 +2084 3 +2368 3 +966 9 +199 6 +3942 6 +2792 1 +569 8 +165 1 +1571 7 +2859 6 +1567 2 +3782 4 +932 9 +2540 3 +3627 1 +745 7 +2420 4 +3761 7 +3870 9 +1642 3 +1394 10 +3151 5 +1286 6 +3902 9 +1126 6 +2171 3 +2645 2 +651 3 +1339 5 +3791 7 +3945 9 +1769 6 +1692 2 +1338 10 +732 10 +2410 7 +713 6 +136 10 +2966 3 +458 2 +1204 8 +1698 4 +2628 9 +1680 7 +1361 2 +579 6 +1948 4 +3507 10 +4019 10 +3171 7 +536 10 +407 7 +1526 2 +1468 8 +3874 8 +3144 8 +499 4 +1453 3 +524 3 +2746 1 +184 6 +1811 1 +52 9 +3121 1 +1357 10 +1017 9 +2192 2 +2987 6 +1137 3 +242 7 +2761 9 +2075 10 +3275 5 +1061 8 +2137 8 +660 10 +1996 5 +163 2 +1761 4 +2318 2 +3570 5 +2478 3 +966 4 +3212 10 +2345 9 +3321 5 +1807 1 +3326 4 +2135 2 +3927 8 +2992 4 +556 4 +1623 4 +1523 7 +920 3 +526 6 +3249 1 +3437 1 +3043 8 +2877 7 +3945 4 +294 8 +289 6 +2722 7 +3440 3 +3979 1 +3144 1 +1985 3 +3975 9 +3826 8 +136 3 +3342 2 +3679 6 +3088 5 +446 2 +2292 4 +3041 10 +2656 3 +3513 6 +1280 1 +2610 9 +2661 4 +422 6 +54 2 +2021 5 +3864 2 +254 10 +1542 1 +1647 4 +3368 1 +3790 5 +3016 7 +3277 4 +1189 3 +969 4 +656 5 +3823 6 +4081 4 +393 7 +3358 1 +2825 9 +3544 1 +680 6 +1429 10 +1347 2 +3018 6 +2662 2 +3516 3 +4074 3 +3215 7 +3970 7 +1252 1 +1594 6 +1729 2 +3765 7 +637 8 +751 7 +2482 4 +733 2 +3850 10 +2449 4 +1382 5 +185 10 +83 4 +3644 8 +2661 1 +1712 4 +533 3 +35 2 +3955 4 +3133 4 +3064 10 +3728 10 +1492 2 +1234 10 +2203 2 +705 3 +321 2 +386 5 +1639 9 +1725 8 +823 9 +934 1 +1222 4 +862 8 +2665 4 +2998 4 +2214 5 +2306 3 +3735 7 +3509 5 +139 7 +398 4 +411 3 +3341 4 +1300 1 +38 9 +1877 5 +1392 5 +1156 4 +3161 1 +3027 3 +2939 10 +721 7 +3238 9 +2148 9 +1675 6 +1853 5 +1912 7 +251 6 +3098 4 +1352 3 +630 5 +3370 1 +65 10 +2325 8 +3688 8 +606 6 +1510 2 +3982 1 +3867 10 +888 10 +2874 7 +2560 7 +2199 2 +1996 5 +2965 4 +879 8 +3151 2 +1253 2 +1275 3 +1155 1 +2036 10 +3880 3 +3907 6 +283 6 +3319 7 +3543 7 +3446 9 +810 1 +2069 9 +2928 4 +191 8 +1380 10 +582 10 +425 6 +235 4 +1995 6 +677 1 +3967 9 +879 5 +3179 3 +3038 7 +1785 8 +1906 10 +4095 3 +3679 9 +2749 7 +1069 3 +188 3 +3307 2 +629 9 +2304 5 +2244 5 +1247 4 +2603 1 +3044 9 +2567 8 +3285 2 +3387 10 +2907 1 +471 10 +2077 9 +3257 10 +536 6 +1722 6 +599 4 +3487 10 +1150 7 +694 8 +1787 4 +3202 6 +3354 5 +2059 4 +1700 1 +2012 7 +1176 6 +2306 5 +2052 5 +2118 1 +1998 3 +457 2 +201 4 +264 3 +1911 6 +3168 4 +720 8 +3410 4 +2493 5 +1687 10 +660 2 +3167 3 +339 6 +1547 10 +716 3 +1095 9 +784 7 +444 1 +446 7 +2945 4 +1198 4 +2037 8 +326 7 +3370 3 +1448 10 +1007 5 +3943 6 +423 3 +101 3 +2099 3 +1 10 +2841 2 +2516 1 +4060 6 +2563 5 +1963 8 +3989 3 +1397 9 +2786 8 +3013 4 +428 1 +1830 5 +2502 3 +1496 7 +770 9 +1737 8 +2612 9 +2542 3 +3154 9 +3661 5 +1271 10 +558 4 +866 7 +365 4 +3517 3 +830 8 +455 3 +3380 8 +886 4 +1429 8 +200 3 +3908 8 +648 3 +91 3 +791 5 +3998 8 +3420 7 +3604 9 +1988 4 +1927 7 +1738 3 +3145 4 +4017 1 +3732 1 +1345 9 +1469 10 +2896 4 +358 10 +1905 5 +2025 6 +52 5 +2466 1 +1332 4 +706 4 +3153 2 +2509 10 +3789 4 +2525 6 +2994 9 +3386 9 +2353 2 +1970 8 +3150 10 +697 10 +3628 4 +3735 1 +2902 1 +2916 8 +1131 6 +2449 7 +2256 2 +1037 8 +873 10 +2524 9 +3729 7 +3510 9 +912 8 +1351 5 +3213 7 +116 6 +2781 9 +3781 7 +987 9 +224 2 +2170 7 +2957 10 +3753 8 +2546 1 +2295 6 +2162 5 +228 8 +2825 8 +471 3 +1198 2 +3532 1 +301 1 +1597 5 +569 4 +1366 5 +920 8 +1937 3 +3212 1 +2528 8 +2803 3 +961 1 +2705 3 +3672 7 +2234 9 +174 2 +854 9 +2816 5 +2280 2 +3101 2 +549 4 +2064 1 +117 9 +507 1 +1728 1 +1150 5 +3312 8 +746 8 +163 4 +1436 3 +2183 5 +3464 2 +3702 2 +1428 9 +262 1 +3486 6 +3200 10 +2428 7 +507 5 +1275 8 +3160 8 +3044 6 +3909 1 +1829 5 +2082 6 +600 4 +2011 8 +3697 8 +1983 4 +1083 10 +2200 10 +662 2 +3974 4 +2660 5 +3007 2 +2732 10 +658 8 +1607 5 +1726 5 +2072 7 +1318 9 +2327 6 +683 4 +1417 5 +4019 2 +2298 10 +2468 5 +2484 1 +640 5 +909 1 +3383 1 +733 8 +171 1 +1525 1 +3995 3 +3358 1 +1303 3 +1440 8 +2982 5 +250 3 +3681 3 +3585 7 +1668 7 +4028 10 +3734 9 +1486 10 +809 1 +2895 8 +3498 1 +1937 9 +3426 5 +4067 8 +3358 1 +2379 1 +660 3 +2233 5 +209 2 +2433 7 +2579 10 +3888 5 +3581 10 +2047 10 +3382 4 +312 10 +564 6 +750 10 +2459 7 +3991 10 +3691 6 +1776 7 +553 5 +794 2 +1928 2 +4032 5 +169 8 +2668 2 +3603 6 +3673 7 +3554 1 +3810 4 +1202 10 +1714 2 +3415 9 +4059 7 +3495 1 +3524 7 +1430 4 +1176 4 +4055 1 +1189 1 +3876 8 +3357 1 +1489 4 +1174 1 +470 3 +396 3 +3206 2 +1713 6 +3938 2 +223 7 +825 7 +3377 4 +4002 5 +2301 7 +3428 4 +3796 3 +553 7 +733 2 +1313 8 +3271 2 +616 6 +2533 7 +3916 6 +1280 8 +1655 6 +1439 3 +336 6 +4030 4 +3584 6 +1626 5 +1568 10 +2000 1 +1621 4 +326 9 +262 9 +1494 4 +3936 9 +345 5 +2071 8 +2090 4 +246 5 +2059 2 +2962 7 +2860 10 +3029 7 +1136 1 +2354 7 +2352 7 +2727 1 +385 10 +3312 9 +4075 5 +3319 7 +2917 8 +1577 9 +3490 4 +1629 1 +1123 1 +380 6 +1411 4 +1559 1 +3765 7 +408 2 +1422 8 +200 4 +1164 4 +3994 7 +1547 7 +3982 1 +188 1 +1065 7 +893 3 +400 6 +824 4 +1566 2 +1471 10 +3063 10 +1623 10 +3839 10 +2209 4 +1860 5 +3279 10 +4000 4 +3763 7 +1994 10 +1841 10 +3347 5 +58 7 +3053 10 +2020 3 +1465 10 +475 2 +3230 2 +1539 5 +1206 8 +3910 7 +3428 3 +915 4 +2602 2 +1036 7 +2873 3 +3426 2 +3789 9 +3867 10 +2420 7 +268 6 +16 10 +4072 2 +2510 4 +1975 9 +4075 6 +1680 1 +2231 6 +3514 6 +305 7 +629 5 +1157 4 +4079 8 +3085 6 +3667 1 +2830 3 +1419 5 +1535 1 +3703 7 +3475 9 +2563 5 +1847 6 +749 8 +2222 2 +3356 1 +1830 7 +1053 9 +3040 3 +907 5 +342 7 +2002 7 +2554 8 +796 5 +2960 8 +288 4 +4091 5 +537 1 +3772 4 +2944 8 +2436 5 +193 5 +4017 7 +3813 9 +1315 6 +354 9 +2268 2 +1458 3 +1338 7 +703 7 +1389 9 +3459 5 +2492 2 +1306 2 +3739 7 +3081 7 +655 2 +343 2 +2127 6 +368 7 +1965 3 +2220 4 +2810 1 +1996 10 +2980 4 +1073 9 +489 5 +2625 10 +3867 4 +3131 5 +2048 5 +802 8 +320 10 +2852 1 +3911 7 +3585 4 +1991 8 +4002 8 +2146 1 +2301 2 +595 8 +3298 9 +1043 6 +74 8 +3826 9 +3145 5 +2067 8 +2972 1 +3083 3 +2167 8 +277 7 +1423 2 +30 4 +835 8 +2595 6 +928 2 +3105 7 +2777 7 +3550 7 +749 2 +2206 6 +3923 5 +1227 9 +2410 6 +1069 3 +1539 8 +691 5 +1029 10 +759 7 +3185 1 +2948 7 +2047 1 +3145 3 +2602 10 +678 4 +1535 3 +3244 2 +2659 4 +1859 7 +1721 6 +976 2 +3808 3 +2188 10 +1352 4 +1887 2 +1073 4 +1462 3 +3347 9 +342 7 +1147 5 +3310 2 +2879 10 +1247 9 +1796 10 +1271 6 +1227 8 +2907 9 +3342 7 +3470 5 +3974 10 +3227 7 +24 8 +1290 5 +3966 3 +1480 6 +818 9 +110 7 +368 3 +2331 3 +2793 7 +1056 8 +87 9 +2185 8 +2437 5 +3128 10 +2431 1 +1472 7 +736 10 +625 2 +2524 8 +2896 6 +523 10 +3900 2 +39 8 +29 7 +3419 10 +1473 2 +3676 3 +1270 9 +1607 5 +2863 10 +2489 1 +185 9 +1366 10 +2688 8 +2721 2 +1557 4 +901 1 +3999 8 +463 2 +338 1 +975 7 +2213 9 +3579 4 +1871 3 +2407 6 +2121 5 +1883 9 +2673 2 +932 10 +1189 8 +55 9 +3505 2 +1278 10 +3984 1 +138 8 +3847 4 +44 9 +1128 8 +524 8 +3695 8 +858 8 +3998 9 +692 4 +3851 5 +1613 10 +3202 4 +2119 4 +1521 1 +2611 7 +3324 6 +426 1 +1362 1 +1218 7 +1994 6 +3575 6 +1661 8 +64 2 +3758 10 +2322 9 +3765 3 +596 1 +342 4 +2811 9 +166 6 +3821 8 +2317 7 +1582 3 +3898 2 +388 8 +403 4 +2876 4 +3466 9 +1479 2 +2638 10 +778 1 +2175 5 +26 1 +658 3 +590 2 +1065 6 +4014 1 +3093 8 +3340 1 +3835 6 +1366 5 +2207 2 +3634 10 +284 1 +1490 5 +2578 5 +574 10 +3098 5 +2438 6 +739 4 +350 8 +3544 9 +657 7 +2999 1 +2611 10 +2105 8 +3416 7 +952 4 +3886 3 +3437 2 +1740 6 +3627 4 +2275 7 +2992 8 +974 9 +3900 8 +4 10 +3258 5 +1439 9 +3007 6 +1782 4 +1625 8 +414 1 +1805 4 +2885 9 +363 10 +2635 8 +715 6 +87 3 +2050 1 +513 1 +2020 10 +2294 4 +3713 3 +3611 8 +640 7 +2474 6 +782 2 +432 3 +2424 9 +2661 6 +919 8 +2453 7 +1694 8 +560 10 +1311 5 +3812 8 +1185 6 +3277 8 +2681 3 +3695 8 +2804 10 +836 2 +2331 7 +799 5 +2602 3 +119 9 +467 5 +3483 4 +706 4 +2544 8 +2491 10 +2124 6 +3472 5 +2085 10 +3649 1 +1534 2 +1163 7 +2186 9 +1385 7 +914 4 +2603 8 +950 4 +3991 4 +1647 1 +2278 8 +385 5 +2320 3 +3261 8 +2689 7 +915 4 +1615 2 +2722 4 +1011 4 +882 1 +2544 7 +3906 3 +102 1 +3270 2 +2172 8 +461 10 +1626 3 +16 4 +686 6 +838 1 +2327 4 +299 2 +1070 3 +3076 7 +2740 3 +1730 7 +3560 8 +3786 2 +977 4 +520 2 +2333 8 +835 7 +3915 3 +876 3 +273 4 +2967 7 +1563 8 +1852 8 +3721 3 +3859 8 +1528 1 +1475 8 +3293 9 +3165 8 +3501 9 +2396 3 +3608 9 +2272 6 +2165 8 +3257 9 +2610 4 +1163 1 +3509 3 +1916 3 +3182 3 +2371 4 +2451 1 +3350 1 +2898 3 +2300 5 +1668 3 +2103 10 +1699 6 +601 5 +1613 2 +1192 5 +2242 5 +1992 4 +1000 2 +941 10 +1213 10 +3913 1 +3555 2 +1632 8 +2423 2 +227 8 +764 3 +2619 7 +3879 5 +179 1 +3913 9 +2466 4 +535 4 +2936 7 +1864 8 +2765 7 +3059 4 +1189 4 +2223 3 +2341 5 +2939 2 +3941 6 +3223 9 +1994 9 +3308 1 +3122 9 +1325 5 +1739 3 +1566 10 +50 6 +695 10 +2593 9 +13 3 +1030 4 +2702 10 +1909 9 +779 6 +3447 3 +3263 1 +1277 9 +1509 1 +3466 7 +2193 7 +1238 10 +482 1 +1026 2 +3504 5 +43 7 +1116 6 +3103 10 +3342 9 +3338 2 +727 9 +623 10 +831 9 +97 3 +926 3 +3812 1 +3470 8 +266 7 +3445 8 +2394 3 +979 7 +1050 4 +2067 2 +3617 3 +412 2 +1346 7 +3277 10 +548 1 +80 5 +3596 9 +3072 1 +2583 5 +1878 4 +307 3 +225 8 +920 8 +3260 5 +3237 4 +1813 3 +337 7 +85 3 +2357 8 +2327 4 +369 10 +924 10 +4089 1 +2310 9 +3379 2 +591 1 +2988 5 +1490 6 +4028 5 +538 7 +168 4 +2168 4 +350 9 +3798 2 +535 1 +1859 4 +2186 8 +4011 5 +3635 6 +1262 1 +2529 4 +1050 6 +2014 9 +2269 6 +3534 10 +2635 8 +1490 4 +979 4 +2981 4 +3493 9 +3085 2 +107 3 +3336 8 +270 3 +1920 8 +1398 1 +1968 4 +1477 1 +244 6 +3898 1 +1176 2 +1237 7 +3657 4 +3846 1 +3963 1 +1973 9 +223 8 +2640 8 +2148 8 +3957 8 +1940 6 +391 6 +2694 5 +2599 10 +2327 6 +1905 10 +762 5 +1770 1 +1145 4 +833 9 +420 1 +970 3 +551 4 +919 2 +1839 6 +2596 4 +991 10 +1659 10 +1917 10 +1809 5 +1835 5 +197 7 +1199 5 +120 6 +1531 1 +1847 3 +3539 7 +1262 5 +1683 8 +5 1 +3279 6 +1075 1 +199 5 +3986 7 +1648 9 +3929 9 +1898 1 +3873 5 +3550 5 +2803 7 +2429 8 +1000 5 +2265 9 +460 2 +2657 1 +687 3 +61 2 +1399 9 +1496 8 +952 3 +3675 7 +3212 1 +1912 7 +3953 2 +1041 8 +1579 10 +2090 5 +2472 10 +2296 6 +1064 8 +534 6 +669 1 +445 3 +2713 5 +1119 8 +1021 6 +3815 2 +2857 2 +2602 3 +3713 9 +2803 2 +3275 8 +959 5 +1625 9 +2189 4 +248 6 +2983 7 +3182 4 +696 2 +3458 6 +2456 10 +314 5 +3712 7 +2531 3 +3989 3 +1422 5 +1620 7 +170 4 +3562 5 +2963 7 +2518 7 +3555 2 +729 7 +3397 7 +245 7 +200 2 +169 10 +2027 8 +313 2 +386 6 +1107 3 +133 3 +323 3 +1767 2 +1878 8 +2341 2 +2469 2 +3722 4 +497 6 +1572 8 +3332 5 +3172 6 +1846 7 +3105 5 +2239 10 +3140 5 +2168 9 +1318 3 +3639 10 +1989 9 +1165 3 +2288 3 +1654 9 +1272 5 +1434 2 +1465 3 +378 5 +2543 8 +3443 7 +2578 6 +1590 5 +3397 6 +457 10 +2220 8 +3763 7 +3461 5 +87 9 +2351 3 +2952 6 +4072 1 +1095 1 +1502 6 +1006 9 +2466 1 +3924 10 +3303 5 +3884 1 +332 10 +1288 4 +331 3 +1055 1 +3754 5 +2886 8 +2959 8 +4087 6 +2734 2 +1949 10 +1009 4 +4041 4 +1906 3 +1317 7 +363 1 +1212 9 +3142 3 +1817 5 +2246 10 +3563 5 +2756 5 +63 9 +3101 4 +3782 7 +2576 7 +3221 10 +1074 7 +1683 5 +3955 2 +3645 8 +1078 2 +4021 4 +968 6 +4093 4 +1355 2 +2889 8 +1407 4 +2986 7 +864 4 +1861 6 +2654 2 +3886 1 +1707 4 +2580 10 +751 9 +750 10 +445 8 +1055 6 +2636 1 +193 6 +2010 8 +2950 3 +3717 1 +2744 6 +450 2 +3456 10 +3531 9 +3257 10 +2757 10 +1168 6 +4041 5 +1529 9 +3601 5 +2412 7 +2878 10 +3562 3 +185 5 +2563 8 +1384 7 +513 6 +1563 3 +681 2 +1639 3 +2177 1 +2432 6 +1291 1 +3617 6 +2337 7 +2274 7 +288 6 +3436 5 +3898 2 +56 7 +215 10 +2701 7 +3097 9 +855 1 +1753 5 +1794 10 +2737 4 +3033 7 +2635 3 +1103 7 +4051 5 +2734 3 +2594 8 +3391 4 +1836 10 +3074 1 +418 10 +3174 6 +5 5 +1850 8 +1737 7 +2913 2 +3168 10 +3044 9 +935 5 +3529 1 +3447 10 +658 4 +2834 5 +3690 9 +988 6 +1784 6 +2519 3 +690 7 +2426 4 +3790 9 +2893 10 +3717 3 +3165 6 +1435 9 +3512 10 +3094 6 +2585 6 +586 1 +1464 1 +2347 8 +2402 3 +4045 6 +88 2 +3054 2 +1431 6 +3923 1 +4063 4 +1475 5 +4034 9 +2639 9 +3836 8 +2603 1 +3079 9 +1162 5 +902 8 +3504 9 +3122 10 +1886 10 +1466 2 +512 7 +2840 8 +1431 4 +2923 9 +1925 1 +219 4 +1482 3 +1919 5 +662 10 +308 10 +2537 2 +3087 1 +1711 6 +2778 6 +530 1 +2722 4 +1949 1 +1259 4 +3334 7 +3745 3 +2895 8 +3042 7 +2625 10 +1071 3 +3360 1 +1526 7 +1847 5 +1362 2 +4024 1 +1717 2 +105 5 +3761 3 +3243 8 +346 2 +2754 8 +3591 1 +3572 9 +414 1 +969 1 +2714 9 +3558 3 +2297 5 +1720 4 +3720 8 +3150 4 +4073 8 +3303 2 +2692 1 +3429 5 +701 7 +170 6 +2121 2 +502 7 +2172 1 +3261 4 +1617 6 +2151 6 +778 10 +2683 1 +2626 8 +2822 3 +1594 8 +1728 3 +3762 10 +1846 4 +1900 2 +3599 10 +528 5 +1458 2 +44 3 +1305 8 +1733 5 +88 9 +1782 8 +3755 1 +1702 2 +4083 10 +3911 9 +3894 7 +3036 2 +1522 4 +3683 10 +1559 8 +687 1 +1649 2 +283 9 +3725 1 +1026 9 +234 9 +549 9 +1874 1 +3716 6 +2385 8 +1511 7 +340 10 +329 4 +3227 4 +3500 4 +3021 8 +3928 9 +3675 10 +2745 10 +4024 9 +104 10 +4067 7 +2514 4 +1982 2 +1922 8 +2539 9 +3064 2 +1065 6 +2145 4 +2365 8 +679 3 +631 10 +3391 3 +2604 3 +3610 4 +3968 5 +600 4 +922 1 +802 1 +1838 9 +3124 4 +2142 8 +1262 10 +1685 3 +2353 10 +2134 5 +525 3 +1139 4 +2110 8 +1900 8 +1330 8 +1132 5 +1346 3 +2477 3 +297 4 +2994 9 +709 1 +705 10 +3144 2 +659 6 +3842 1 +355 9 +1783 9 +1655 8 +833 6 +1879 7 +1793 9 +840 2 +2880 7 +1100 8 +1240 5 +28 3 +524 4 +3320 2 +3918 10 +3232 10 +3721 4 +2752 1 +3469 9 +119 6 +40 3 +1196 2 +153 7 +1412 1 +1023 4 +2199 6 +4020 1 +3339 1 +267 3 +534 5 +1809 10 +443 1 +3047 4 +1530 5 +999 9 +187 3 +682 6 +2101 1 +231 8 +1843 9 +4 7 +1252 7 +2628 6 +2873 7 +3224 9 +3350 2 +2356 3 +3838 10 +2271 8 +154 6 +4091 1 +1366 3 +1692 8 +255 6 +3856 3 +1769 9 +937 4 +2600 4 +1079 10 +2209 4 +1333 4 +838 6 +1543 3 +1424 4 +3972 3 +1069 10 +3741 6 +2895 4 +3091 6 +416 8 +2310 1 +3449 5 +980 1 +1137 4 +3295 1 +2537 10 +358 10 +1877 6 +3183 9 +729 9 +1705 10 +1596 8 +3885 9 +3740 2 +3226 9 +1116 5 +3267 8 +1188 10 +2489 8 +3964 10 +2518 5 +1513 7 +1431 6 +1797 3 +1423 9 +921 10 +3562 2 +1955 10 +1122 7 +3990 3 +3960 5 +1562 4 +1258 4 +490 4 +2236 2 +3664 4 +1782 6 +2973 3 +2473 6 +273 9 +784 9 +2434 2 +494 8 +1196 7 +1416 10 +1631 1 +518 9 +1756 9 +3957 7 +1900 4 +2754 9 +3777 1 +53 10 +2003 5 +4001 8 +268 1 +3237 7 +808 4 +595 5 +1617 1 +1093 1 +2162 10 +2289 8 +134 8 +1671 1 +758 2 +698 4 +1203 9 +1715 8 +2787 5 +3170 4 +3987 8 +4067 10 +1519 7 +2314 4 +1213 2 +3345 2 +3304 1 +3792 9 +3340 5 +2579 6 +307 9 +1753 6 +3547 10 +1761 3 +2886 3 +3110 10 +1389 10 +961 4 +2207 3 +2827 4 +362 4 +816 1 +127 6 +2450 10 +3879 5 +3620 9 +472 2 +946 5 +1408 8 +2322 1 +762 5 +3162 3 +1389 8 +781 6 +1851 3 +3896 10 +790 3 +3365 2 +2820 8 +3210 5 +2584 9 +626 3 +298 2 +1770 1 +219 2 +2076 1 +3885 3 +65 6 +326 6 +4068 3 +2359 7 +1967 10 +3458 8 +2498 5 +3206 6 +1216 1 +196 2 +218 6 +1272 3 +1691 10 +2849 10 +3830 7 +1267 7 +3000 1 +1946 8 +3059 8 +3379 3 +2818 10 +1316 1 +3641 7 +16 4 +633 1 +3907 6 +610 10 +2836 2 +2250 7 +3507 6 +389 9 +3438 2 +1448 7 +1073 9 +3074 8 +3004 1 +3705 6 +3537 2 +2689 8 +2070 8 +2138 7 +2334 3 +3404 2 +1043 1 +3487 2 +908 5 +276 3 +2628 4 +794 3 +2567 2 +135 7 +1559 5 +3642 1 +3973 4 +2905 4 +48 6 +1530 5 +3659 6 +3210 1 +2520 9 +871 1 +1138 3 +1548 3 +336 2 +3684 1 +248 8 +1258 10 +3858 1 +100 8 +3501 10 +3897 10 +295 6 +634 3 +4079 10 +484 5 +1548 7 +3748 9 +1562 3 +0 7 +2139 7 +4024 2 +3352 6 +2749 3 +791 3 +365 3 +3835 10 +2872 6 +2305 4 +938 8 +207 10 +2934 2 +1847 1 +3662 5 +31 10 +3231 7 +2673 2 +1268 10 +2885 5 +912 10 +1940 4 +3632 6 +690 1 +1182 3 +1392 6 +2486 4 +2463 4 +1059 3 +1403 1 +2056 2 +1248 10 +649 7 +1937 5 +3522 6 +3588 9 +3004 4 +1324 5 +1440 10 +694 9 +325 6 +2231 3 +1159 9 +2821 7 +351 4 +1955 6 +3836 4 +142 9 +3406 8 +3108 7 +2828 4 +2230 1 +3395 10 +1428 6 +3546 2 +1741 9 +2505 3 +869 8 +2601 4 +2991 10 +2413 5 +1260 3 +3700 9 +1916 6 +3677 5 +2240 8 +663 8 +1068 1 +151 9 +2250 8 +1435 6 +3274 10 +3595 1 +939 1 +3649 4 +3862 1 +3945 1 +1515 1 +4066 3 +3597 2 +509 2 +3024 1 +2732 1 +2575 3 +1563 2 +3899 1 +251 8 +3423 8 +1755 5 +222 1 +2286 1 +3037 3 +3884 2 +3108 4 +560 4 +1031 4 +2828 10 +3025 1 +3672 8 +2637 5 +2769 10 +2879 10 +2525 10 +950 8 +3348 9 +3913 3 +1365 8 +583 6 +2070 5 +2147 6 +3622 7 +2350 10 +1 1 +2998 1 +3268 6 +2171 5 +2428 5 +1500 4 +4086 2 +3881 9 +2854 1 +2452 7 +1137 5 +1811 2 +3475 7 +573 4 +499 5 +3365 2 +1496 3 +620 7 +1178 9 +471 7 +3491 9 +3427 4 +3926 2 +1732 3 +3207 3 +3701 8 +3904 6 +584 3 +2269 3 +1809 8 +198 9 +2839 1 +2380 3 +3147 4 +3633 4 +3938 5 +422 1 +2110 7 +938 10 +2953 3 +2375 9 +2152 10 +2116 7 +3214 1 +3381 9 +3935 9 +749 10 +93 5 +375 2 +3235 7 +2273 5 +661 6 +1081 6 +2591 10 +2980 4 +3576 9 +2685 6 +89 7 +3791 5 +1324 3 +799 7 +3817 2 +3597 8 +2069 8 +1208 5 +181 9 +2470 4 +305 3 +3769 7 +684 7 +3530 1 +3045 6 +1786 10 +2674 10 +3354 3 +1024 7 +3725 10 +2067 4 +3786 6 +2834 3 +1481 9 +1026 8 +433 6 +891 9 +2960 9 +2241 2 +3283 10 +3755 5 +3801 2 +2694 8 +2519 8 +3572 8 +929 8 +1920 1 +1490 6 +2965 10 +2134 6 +4094 9 +1676 6 +3291 2 +1468 6 +2697 4 +2374 2 +2226 10 +3168 1 +1341 10 +2267 5 +383 8 +1830 9 +516 5 +3775 6 +2244 5 +1994 8 +322 10 +931 4 +1239 1 +3771 3 +1065 3 +2158 1 +302 9 +1232 2 +27 5 +2198 5 +1175 3 +259 7 +1041 6 +441 9 +2057 6 +4025 7 +2997 6 +2612 3 +1795 10 +1736 6 +470 8 +2139 10 +2292 5 +3877 5 +2182 9 +522 6 +414 4 +3480 9 +2813 6 +3846 9 +2364 8 +3167 1 +3545 5 +91 10 +3297 3 +1043 5 +1361 6 +3509 10 +169 6 +487 9 +4011 2 +2829 3 +2796 7 +834 7 +1501 6 +2302 2 +678 3 +406 5 +2282 9 +1730 10 +3180 7 +2823 9 +1364 2 +2150 1 +568 9 +504 10 +3665 1 +667 1 +2582 8 +3717 9 +1298 4 +3866 6 +2818 8 +2768 5 +1045 6 +3522 3 +1155 5 +2573 7 +4050 4 +1652 1 +1452 5 +436 5 +3847 3 +3607 4 +3792 10 +97 4 +1770 3 +1013 2 +1344 7 +522 2 +2092 10 +920 10 +22 4 +1869 5 +2956 3 +963 7 +783 7 +612 1 +1417 10 +2938 2 +2513 1 +3969 8 +1965 8 +3341 3 +963 6 +2776 2 +776 6 +3961 10 +1083 5 +352 1 +2796 2 +2380 3 +3073 1 +1922 8 +3254 4 +1611 1 +2675 9 +2014 9 +1642 6 +209 2 +1987 4 +3961 2 +2989 4 +228 2 +3609 6 +3115 2 +1281 10 +1868 7 +3282 9 +105 5 +2738 3 +2689 10 +1562 5 +787 3 +287 6 +1355 1 +2487 4 +2232 8 +444 8 +2134 5 +1247 8 +1801 3 +2057 1 +1704 2 +3722 8 +3354 6 +3517 7 +1958 8 +2941 5 +341 4 +842 6 +648 9 +3942 8 +3992 1 +3825 3 +2476 9 +3122 9 +2830 9 +3434 10 +3584 6 +1944 5 +1334 5 +2300 9 +676 7 +744 7 +3021 8 +868 8 +2813 4 +82 9 +492 1 +1642 1 +3024 1 +2654 5 +727 8 +3168 1 +930 7 +2031 1 +1202 6 +1500 9 +1101 4 +1638 8 +2348 8 +1172 3 +1112 9 +1455 10 +361 7 +169 5 +2766 8 +1046 2 +3022 5 +3446 4 +2985 2 +2579 6 +1243 8 +2563 9 +3524 9 +1332 3 +872 5 +3511 7 +3603 7 +67 1 +3095 2 +1451 6 +2152 9 +1188 6 +155 1 +2701 10 +2184 9 +1547 7 +3630 1 +111 9 +1875 5 +1778 8 +789 9 +1594 5 +2222 2 +682 7 +25 3 +1114 7 +3784 10 +1524 10 +2182 9 +1933 8 +2809 3 +1038 9 +1370 5 +1205 9 +435 10 +3227 7 +1956 9 +1989 8 +3017 10 +1766 5 +19 3 +3860 5 +1692 10 +1392 5 +1466 6 +536 2 +3076 2 +682 6 +123 7 +1928 10 +1195 5 +1706 10 +1416 3 +2377 3 +2701 2 +2497 4 +2006 4 +4042 7 +3047 10 +2885 4 +787 5 +3125 10 +3153 1 +2396 8 +4022 2 +68 3 +471 9 +1151 2 +2424 10 +2315 10 +2647 6 +923 7 +1568 3 +1455 3 +1732 2 +1619 8 +2236 10 +3652 2 +921 3 +435 6 +520 8 +3827 10 +3811 9 +1808 2 +3463 1 +2904 1 +46 6 +3775 9 +1976 7 +1712 4 +180 2 +3792 4 +20 1 +217 4 +1728 2 +1379 6 +2227 7 +319 8 +4018 6 +672 5 +1396 6 +3473 8 +899 9 +801 2 +1054 10 +2683 10 +2972 4 +1341 2 +1574 2 +2958 9 +670 6 +2150 5 +3907 8 +2075 6 +209 3 +222 6 +1025 1 +1429 8 +1835 2 +138 10 +1879 10 +3717 2 +954 10 +1109 2 +1252 7 +2263 9 +1175 3 +2932 7 +1711 3 +2417 8 +2768 3 +3771 3 +60 5 +636 9 +4044 10 +3915 7 +3548 3 +1739 9 +3539 1 +996 10 +3690 2 +200 1 +944 3 +1825 2 +2821 1 +1539 3 +3258 4 +2918 2 +3429 5 +1695 6 +3019 7 +888 5 +1786 4 +1168 10 +2416 4 +930 9 +3907 1 +784 8 +1125 6 +3627 3 +1924 6 +100 1 +505 8 +1406 10 +1392 1 +2097 3 +1945 2 +3977 9 +3696 1 +3151 6 +1128 8 +1013 8 +3398 10 +3087 1 +3777 3 +1149 8 +463 6 +2299 6 +324 5 +1905 4 +2079 4 +3758 1 +900 4 +2406 7 +1115 9 +19 9 +502 6 +1055 4 +1612 6 +3175 10 +502 10 +952 2 +1090 10 +3677 8 +2921 3 +201 10 +934 2 +687 1 +697 6 +658 10 +1937 9 +1498 6 +3684 5 +2529 7 +2345 6 +2650 5 +756 9 +3051 7 +1827 5 +2805 3 +429 3 +1311 2 +1630 1 +906 10 +3972 2 +2267 9 +2787 3 +2854 8 +969 8 +3208 5 +1617 7 +1257 2 +2686 1 +3185 1 +624 4 +2806 1 +3 9 +2281 10 +1088 7 +3706 8 +86 3 +2751 6 +419 8 +934 4 +735 7 +1050 4 +2650 8 +2974 7 +3507 2 +3378 3 +655 3 +3938 10 +3890 5 +2810 6 +107 1 +402 10 +102 3 +2569 10 +1917 4 +2016 8 +484 1 +849 7 +2184 2 +2664 2 +1443 1 +620 5 +232 7 +1912 2 +3987 3 +2452 10 +1971 3 +3443 3 +1406 3 +1527 8 +3127 9 +3006 2 +1573 8 +2734 6 +2642 6 +3673 4 +3856 7 +1311 9 +3227 3 +2793 10 +104 9 +275 4 +3607 6 +236 10 +1099 5 +2699 9 +1543 3 +3014 4 +2147 10 +263 5 +2195 7 +2457 1 +3089 9 +633 7 +5 8 +1026 7 +1727 6 +3000 7 +2407 7 +2481 4 +969 1 +790 4 +2650 8 +2250 6 +3364 4 +2342 6 +2125 1 +3487 5 +3962 8 +775 10 +120 3 +2409 7 +1693 1 +730 7 +2123 3 +1081 3 +3430 7 +1039 1 +136 5 +1774 1 +1909 6 +3608 2 +2798 9 +2919 10 +1248 10 +3346 3 +1630 4 +2171 8 +3063 10 +2248 7 +446 3 +1885 5 +2906 9 +840 2 +3376 5 +950 7 +1795 7 +3019 7 +3991 5 +3399 2 +2402 8 +1872 9 +2271 9 +2391 4 +3594 3 +3902 1 +2192 10 +759 2 +2296 7 +1765 10 +380 10 +3552 2 +2086 2 +500 9 +1761 10 +3501 4 +3029 1 +89 4 +1115 7 +1058 10 +189 9 +3543 9 +2984 10 +4076 3 +3110 5 +469 4 +736 5 +3463 1 +2013 10 +3046 4 +3498 2 +1238 1 +522 1 +2127 8 +978 4 +729 1 +377 3 +386 7 +1383 9 +2361 4 +2909 3 +2145 2 +1077 10 +2420 9 +1968 5 +2732 6 +3160 9 +1420 7 +1166 4 +3797 4 +3500 1 +1842 5 +3906 4 +1545 1 +659 7 +1255 8 +2148 5 +2412 5 +4032 9 +3519 7 +2829 3 +3433 4 +1189 8 +2520 9 +699 10 +2471 1 +1493 5 +3088 5 +672 9 +2447 10 +2021 10 +3618 1 +427 8 +1215 8 +1756 1 +1354 8 +1478 4 +991 3 +586 10 +3611 2 +2232 7 +3246 3 +3589 5 +2253 1 +1119 3 +781 1 +2485 6 +2108 7 +3947 10 +2229 6 +868 1 +2127 8 +2896 3 +920 7 +4081 4 +3772 5 +568 6 +1216 3 +3173 4 +1450 3 +4033 1 +2249 1 +3957 10 +3035 1 +1729 1 +3325 5 +1007 10 +2506 3 +3994 2 +823 5 +3192 6 +86 3 +386 3 +4008 1 +2620 4 +1866 2 +3206 3 +3073 10 +825 1 +35 8 +2494 7 +1293 10 +3960 2 +1139 4 +2794 1 +33 6 +115 4 +957 5 +293 3 +2879 8 +309 6 +2931 1 +2406 4 +97 8 +2860 8 +1381 1 +3990 5 +1016 4 +1753 3 +871 4 +3896 7 +930 5 +1331 10 +223 3 +1192 9 +1507 4 +3316 9 +2379 4 +803 1 +1127 3 +2200 9 +1403 2 +3959 9 +926 6 +1050 1 +3988 7 +245 3 +3801 7 +2001 9 +516 6 +1583 8 +3727 7 +1131 5 +722 1 +181 6 +3062 2 +2831 9 +75 3 +1255 4 +2148 5 +573 9 +1622 1 +3778 8 +765 8 +1693 4 +758 4 +2215 9 +2774 2 +2932 1 +1038 10 +992 9 +1914 2 +1493 7 +713 10 +1508 6 +3977 8 +3845 8 +895 10 +2137 3 +1989 6 +3691 7 +1555 4 +2778 4 +1204 3 +2078 8 +2235 9 +956 5 +3698 10 +1343 8 +3365 5 +644 10 +4054 8 +2758 8 +1965 6 +857 6 +1702 9 +2673 2 +1519 8 +1494 4 +747 3 +631 1 +2729 9 +859 7 +1461 7 +3917 1 +2298 10 +3868 4 +1318 10 +2140 5 +2033 7 +1176 8 +2504 1 +2810 4 +2413 8 +1947 7 +3616 10 +2610 4 +738 2 +3708 8 +1749 5 +807 1 +412 6 +562 4 +1296 2 +666 1 +3297 10 +3071 1 +3072 10 +1305 7 +492 9 +88 10 +253 10 +2293 7 +3578 9 +3010 3 +1103 6 +806 2 +1956 8 +1767 7 +3676 9 +3808 10 +1456 10 +1549 1 +3459 6 +2544 6 +3331 5 +3148 4 +3730 1 +1873 2 +3367 2 +14 4 +1136 7 +3946 9 +1644 3 +2633 6 +4002 3 +930 3 +4034 7 +3655 4 +2217 10 +1375 8 +848 8 +2156 9 +1357 10 +2487 10 +818 7 +2854 5 +338 2 +1786 7 +48 10 +952 6 +478 2 +4026 5 +2973 1 +2365 10 +1676 3 +101 9 +3287 10 +3129 1 +50 6 +2367 5 +329 7 +2130 1 +3216 8 +2411 9 +718 9 +119 9 +1261 2 +2742 4 +2537 7 +2015 3 +568 9 +1695 1 +1033 4 +1387 1 +377 2 +769 9 +3557 7 +3682 4 +2298 10 +2092 8 +2861 9 +4058 9 +3866 8 +2392 5 +730 5 +1367 4 +1270 6 +1394 4 +1280 7 +3716 9 +3628 2 +3724 3 +2152 5 +3150 7 +1816 4 +3361 7 +3881 2 +2687 3 +1196 8 +520 1 +1285 5 +122 3 +1325 8 +2277 7 +1756 2 +1950 4 +943 5 +3063 9 +3271 5 +667 1 +1585 4 +1869 6 +3748 1 +1021 4 +2974 2 +3761 4 +2004 6 +2236 7 +103 4 +3308 3 +3345 7 +1268 9 +3402 9 +2054 5 +3611 7 +1457 8 +2644 5 +3963 1 +460 1 +4003 5 +1750 4 +772 8 +148 2 +543 9 +3123 5 +1880 10 +2289 10 +3171 10 +2273 10 +3375 3 +2209 6 +2851 2 +1316 3 +3785 6 +3668 8 +678 4 +3604 7 +3133 10 +1967 5 +254 6 +2175 9 +2619 10 +3425 8 +1921 2 +1895 7 +2781 7 +747 5 +3027 8 +1582 10 +2156 2 +1705 2 +142 10 +2922 9 +87 9 +2535 6 +2624 3 +2596 3 +3152 3 +1758 1 +1642 5 +1274 5 +2318 3 +2609 9 +1795 10 +993 8 +839 7 +700 10 +2971 2 +3278 1 +3266 1 +2900 4 +1841 5 +2338 4 +2353 7 +2718 8 +2117 4 +955 7 +1663 2 +2930 8 +1405 8 +1751 1 +1847 5 +2888 5 +619 2 +1495 10 +1827 3 +2583 4 +4059 2 +2441 10 +471 8 +3001 5 +489 6 +2922 8 +3143 1 +1190 9 +235 1 +3849 8 +1391 9 +2917 8 +3836 9 +3760 3 +878 4 +1067 9 +3887 7 +2617 9 +2885 6 +1647 5 +776 9 +1986 9 +2081 1 +3772 4 +2516 3 +2760 10 +65 9 +942 2 +223 9 +3817 9 +977 7 +1654 5 +2963 7 +2599 7 +1756 8 +1715 10 +947 2 +1532 6 +65 6 +3133 10 +583 4 +2094 4 +724 9 +2191 10 +1467 10 +3013 9 +1477 3 +382 9 +1461 1 +3658 7 +2626 9 +3200 1 +3371 6 +4079 5 +3058 3 +605 6 +2811 7 +3553 1 +1942 7 +3466 4 +940 7 +660 8 +2888 5 +3090 6 +1810 2 +2963 1 +3239 7 +2303 3 +1670 10 +496 7 +211 3 +1320 5 +3672 10 +2720 2 +3976 4 +1718 7 +3166 5 +3829 1 +215 6 +2918 9 +472 10 +2736 2 +794 1 +2494 1 +1493 3 +261 6 +1956 3 +146 6 +928 9 +3493 10 +2533 8 +1941 9 +2098 1 +4090 2 +1157 5 +3283 5 +2744 1 +1239 9 +3837 2 +1011 2 +1635 5 +30 9 +1449 5 +3137 2 +3188 8 +3621 6 +1270 9 +148 9 +1486 8 +3255 1 +1833 8 +3170 5 +1359 3 +3614 6 +926 8 +3692 6 +174 8 +3870 8 +3559 9 +1444 3 +1781 8 +994 2 +839 3 +1880 6 +3972 4 +1959 4 +1299 7 +3647 2 +2337 1 +1985 4 +1648 7 +705 1 +1994 6 +1005 5 +811 1 +3310 7 +464 5 +424 6 +385 10 +653 5 +1669 3 +3109 4 +875 8 +1144 2 +954 10 +1703 5 +327 1 +3600 8 +3006 1 +519 6 +1298 8 +3093 5 +1932 3 +1953 7 +10 6 +3606 1 +2383 3 +2947 9 +3537 3 +2803 7 +2514 4 +775 5 +3214 4 +1961 8 +366 3 +1582 9 +1287 6 +2457 3 +1072 2 +2354 4 +2110 3 +1718 8 +1585 6 +3362 7 +875 1 +114 3 +179 6 +174 4 +1479 3 +2347 7 +1574 6 +131 8 +2819 1 +4066 6 +554 5 +3660 1 +3713 8 +1722 4 +2032 7 +4040 4 +2327 1 +3218 9 +2304 4 +1208 2 +1272 7 +3973 2 +2546 8 +1244 7 +167 10 +1252 9 +4012 8 +1738 4 +3182 2 +3331 7 +1971 5 +2011 2 +60 7 +2230 6 +311 9 +3097 3 +3544 1 +396 1 +1450 5 +1281 9 +3761 1 +1315 8 +775 8 +3120 7 +683 1 +2369 7 +245 4 +40 1 +3887 5 +648 6 +3911 8 +1811 9 +2978 10 +2214 6 +1200 3 +662 10 +3517 5 +1484 9 +2694 1 +1649 4 +3097 1 +3759 7 +3353 7 +2757 5 +2043 8 +2335 7 +2178 8 +266 5 +2378 3 +3650 3 +3902 10 +3780 2 +442 3 +1348 5 +3576 4 +3674 1 +5 6 +2134 10 +525 1 +2398 8 +667 6 +1302 3 +2670 4 +3730 7 +3069 1 +1588 8 +2017 3 +3600 5 +847 1 +1333 5 +167 7 +1901 7 +3950 6 +1703 2 +2472 10 +2305 7 +3644 10 +838 9 +3468 2 +1665 7 +1863 2 +2069 10 +803 1 +2941 10 +3930 6 +1134 3 +112 4 +1901 7 +2829 9 +4032 2 +3564 7 +2334 4 +860 3 +549 1 +1721 5 +2537 1 +2876 7 +93 1 +2836 5 +2078 3 +70 5 +722 3 +623 1 +3732 8 +2760 8 +3092 8 +3557 5 +1105 7 +2407 1 +2697 7 +3798 6 +1644 9 +1985 8 +3751 6 +3006 3 +28 9 +2503 3 +3489 10 +14 5 +2102 7 +2773 7 +835 5 +858 7 +3046 6 +2470 7 +2434 4 +784 8 +2623 8 +1409 9 +1491 6 +1584 4 +477 7 +3550 2 +3638 7 +3988 7 +970 8 +1608 4 +2364 3 +2241 4 +3477 3 +3306 1 +1007 9 +3152 7 +1584 1 +1692 1 +3136 7 +1298 9 +1255 1 +1786 3 +300 7 +3535 9 +910 8 +3595 3 +826 1 +2153 8 +556 6 +1466 8 +2361 3 +3294 7 +1322 2 +2067 8 +252 9 +1180 7 +2591 9 +1597 7 +2285 10 +1746 10 +1650 7 +549 2 +626 8 +3492 6 +331 5 +2286 5 +3405 7 +2605 10 +3475 7 +4 10 +2768 8 +1310 6 +1797 3 +589 3 +1515 5 +3233 9 +2344 7 +2541 2 +1787 7 +4045 7 +2420 1 +1966 4 +1472 2 +1069 1 +1283 7 +858 7 +596 4 +976 10 +1710 7 +333 1 +1013 7 +4034 1 +539 7 +4080 5 +3437 8 +2147 2 +159 6 +2971 3 +2139 9 +1591 8 +53 6 +2390 5 +1148 4 +2909 2 +1482 3 +3832 4 +525 2 +2189 3 +2575 4 +1690 7 +3861 10 +3784 7 +1114 4 +2781 2 +1732 8 +128 6 +1399 2 +3284 2 +2348 3 +3542 9 +1330 9 +1386 4 +1547 7 +2263 4 +1135 6 +1884 1 +3998 5 +1497 7 +2167 3 +368 1 +2138 3 +4037 5 +2597 9 +2724 3 +2630 4 +1723 1 +1748 8 +2450 2 +3249 4 +1424 1 +3584 8 +4089 8 +2332 3 +2750 2 +1749 4 +3349 2 +1757 2 +519 5 +638 10 +294 7 +368 3 +3166 8 +1629 3 +1503 10 +3487 6 +2064 8 +3065 8 +745 5 +291 7 +3601 6 +1104 1 +3720 10 +2689 8 +639 9 +637 10 +3459 6 +684 5 +157 1 +2870 2 +3527 10 +2917 4 +808 8 +3481 3 +3827 7 +2632 10 +1721 7 +3048 8 +680 1 +80 8 +439 2 +2997 9 +2375 5 +3000 7 +23 3 +1671 6 +1170 5 +2412 4 +1315 3 +1559 5 +3466 3 +128 9 +2235 4 +1234 8 +130 7 +2290 5 +1172 3 +988 4 +3293 6 +3955 5 +3742 2 +3341 5 +1981 3 +3863 1 +1455 5 +3057 2 +2747 2 +894 10 +506 9 +3800 3 +3837 1 +3078 5 +1080 2 +2605 8 +2867 3 +2190 8 +3406 10 +1964 3 +1570 3 +3135 6 +273 2 +3114 8 +556 9 +3506 8 +3403 4 +1560 3 +1661 2 +2350 8 +401 2 +800 10 +3005 1 +3493 1 +1726 2 +3423 10 +2471 7 +2887 5 +3444 8 +3666 6 +315 5 +1658 6 +1531 8 +1046 8 +3627 6 +3978 7 +3622 4 +1222 3 +2234 8 +2044 3 +178 2 +783 10 +1162 4 +3791 1 +2718 2 +3112 9 +2532 1 +1030 5 +1084 6 +805 10 +4067 2 +2768 2 +1309 5 +3937 1 +3020 3 +3393 5 +2259 4 +2650 2 +2210 7 +3125 1 +2915 6 +2796 9 +2357 1 +2228 7 +3486 3 +1937 6 +2562 7 +2534 5 +3545 9 +390 8 +695 7 +320 10 +2230 7 +764 4 +1925 6 +2854 7 +1803 7 +2432 5 +44 6 +763 9 +1233 9 +3689 4 +2286 9 +1247 3 +2391 4 +3349 6 +541 3 +3030 5 +2707 9 +2244 5 +2029 7 +3454 3 +1038 6 +2677 7 +3681 6 +2450 6 +2275 8 +1788 6 +3029 6 +2 3 +3667 1 +2126 5 +310 9 +1042 9 +4090 8 +3951 6 +3556 6 +3841 8 +3691 7 +1078 4 +1289 9 +2909 2 +2206 4 +3091 1 +1624 9 +1681 4 +437 8 +3112 9 +2679 9 +921 7 +1320 7 +2201 8 +425 7 +2930 2 +67 6 +1225 9 +933 5 +3952 5 +3123 1 +615 7 +3958 7 +1579 4 +3453 6 +944 7 +1351 1 +537 3 +1799 4 +2370 1 +2540 7 +1640 9 +3705 3 +1689 1 +302 3 +255 9 +613 2 +2241 9 +465 2 +1907 7 +251 1 +3398 6 +3306 7 +2646 9 +3697 7 +2996 10 +1177 6 +2513 5 +573 2 +383 9 +1723 6 +2759 2 +1603 1 +1701 10 +1969 2 +3900 2 +2828 4 +696 7 +2191 10 +3280 7 +3241 6 +1950 9 +0 1 +3352 5 +3994 8 +2041 4 +1157 10 +1108 1 +1533 5 +3628 6 +402 6 +377 6 +3321 4 +1876 7 +2851 8 +2439 8 +2134 5 +1246 1 +2580 1 +254 3 +276 9 +1739 1 +2001 8 +1303 8 +3666 3 +43 5 +350 9 +1619 1 +2449 3 +3991 4 +3133 4 +2754 2 +2808 2 +1103 7 +1933 1 +66 8 +3431 3 +1685 4 +781 10 +615 5 +1513 5 +230 1 +395 4 +2410 5 +3608 6 +2031 6 +3742 3 +868 2 +1367 6 +3929 6 +714 1 +1885 7 +3334 5 +334 5 +1331 4 +3245 5 +2617 1 +2360 4 +692 6 +2537 1 +2088 2 +2656 9 +607 2 +2924 1 +2619 6 +3043 4 +278 6 +1781 2 +1913 5 +1933 5 +2976 8 +3063 6 +1946 6 +608 6 +1187 7 +4070 8 +199 4 +1766 8 +455 6 +2961 1 +581 8 +2428 8 +3609 7 +3068 5 +3723 10 +3046 9 +227 7 +523 2 +1078 4 +2307 10 +513 8 +3658 1 +2901 4 +34 8 +2467 1 +2915 8 +3072 7 +3147 10 +1228 8 +1023 7 +2446 4 +1128 5 +398 3 +4016 5 +305 5 +274 2 +1020 5 +1036 4 +3663 10 +3575 10 +1579 2 +1479 6 +3604 2 +2575 3 +716 4 +2443 4 +1533 5 +3364 8 +66 2 +2500 3 +3487 9 +2246 10 +150 7 +4006 9 +4040 4 +2430 3 +4087 9 +1824 4 +11 4 +3395 6 +1865 7 +2906 6 +1713 5 +3445 1 +3127 5 +2756 6 +2413 6 +340 1 +3958 4 +2097 10 +428 5 +2381 2 +1517 10 +1242 10 +1686 6 +1966 1 +3688 3 +2135 7 +2223 10 +1379 8 +3244 3 +3215 7 +3005 4 +790 1 +1388 7 +391 7 +2936 9 +1950 7 +1586 3 +210 1 +1433 1 +3135 8 +1670 1 +1243 3 +1335 5 +163 6 +1191 5 +3350 7 +213 6 +4045 9 +3476 10 +462 9 +3248 4 +3436 3 +1127 6 +1658 5 +1347 4 +2932 5 +2007 10 +1002 6 +1304 3 +2334 3 +192 2 +1257 9 +2227 1 +3308 1 +2814 3 +305 3 +4038 7 +2605 8 +209 7 +1887 7 +3522 1 +2492 4 +3894 7 +3459 6 +3142 10 +3991 1 +3256 3 +220 2 +1541 3 +2844 3 +3940 1 +3425 6 +1313 4 +2499 5 +3559 9 +343 2 +3789 5 +3440 10 +708 10 +1613 5 +4054 10 +729 10 +2120 4 +1730 6 +2600 10 +786 1 +3192 9 +3450 4 +2610 6 +1284 6 +37 5 +2563 4 +2821 6 +2018 1 +1970 4 +3072 10 +1158 6 +904 10 +936 4 +1861 1 +1580 8 +2758 6 +1760 2 +1345 8 +2884 1 +2442 1 +3824 6 +323 3 +3813 10 +3198 2 +3754 10 +3437 6 +3739 5 +3834 8 +2605 10 +2936 2 +1880 5 +3439 3 +2012 2 +2602 9 +2743 6 +1670 7 +1107 9 +577 8 +1446 6 +1641 8 +4044 8 +1785 10 +4063 3 +963 3 +2360 7 +2143 4 +631 5 +2770 8 +2246 1 +2591 7 +1715 7 +2399 7 +865 3 +248 10 +2736 4 +3382 2 +2004 10 +2353 10 +3988 7 +461 4 +3776 6 +3037 8 +3479 2 +2953 9 +431 5 +3361 9 +2087 6 +829 5 +1176 5 +1509 1 +64 9 +1950 6 +70 5 +2499 10 +1530 9 +3704 8 +2965 1 +1674 5 +541 6 +2724 1 +614 1 +2173 9 +528 9 +750 5 +2849 5 +4054 6 +2821 7 +2071 3 +3121 9 +3567 1 +2906 5 +2923 9 +854 6 +3856 3 +782 4 +531 3 +36 10 +1231 4 +1810 3 +3397 8 +3603 2 +3463 4 +1604 1 +3527 9 +3197 3 +1486 10 +2829 5 +4009 1 +1532 7 +1175 9 +2229 4 +758 10 +1525 6 +3036 3 +1694 3 +999 1 +1823 4 +913 8 +3362 6 +2952 9 +3089 7 +753 10 +2687 7 +1754 7 +1881 1 +1237 6 +3456 10 +3011 4 +3430 6 +31 6 +951 9 +3084 8 +2250 6 +448 6 +3423 4 +2852 5 +2908 9 +4023 3 +3381 8 +4050 7 +747 3 +749 6 +1208 9 +2120 4 +2983 2 +446 4 +262 9 +2805 5 +857 8 +2171 4 +1242 8 +3981 7 +2653 6 +2283 10 +1543 10 +23 1 +1594 5 +4005 5 +1599 5 +2883 3 +3549 2 +460 5 +1017 2 +2773 8 +1935 1 +2083 8 +125 6 +1009 7 +2563 1 +254 1 +2960 10 +2676 1 +1954 10 +3727 5 +1390 6 +2767 6 +1238 8 +1064 5 +3526 5 +3394 4 +2459 4 +3292 8 +557 4 +1915 2 +2885 4 +522 5 +1848 5 +2737 3 +3946 7 +1737 5 +2257 7 +3592 4 +2320 1 +3302 10 +3434 4 +3461 7 +3007 8 +2558 10 +1675 5 +2523 1 +723 7 +3009 5 +1337 3 +3338 7 +1106 5 +2530 5 +2830 4 +2189 4 +74 10 +3974 10 +802 6 +3327 9 +982 1 +3260 3 +1319 1 +1198 6 +658 2 +2103 5 +4028 8 +47 4 +3675 2 +3015 10 +2475 10 +2789 1 +3871 8 +4089 6 +2461 5 +63 1 +1527 8 +1007 8 +3740 6 +2447 3 +3136 4 +1291 2 +975 6 +114 8 +3956 1 +1561 2 +1581 5 +3008 1 +862 5 +3916 9 +2829 2 +3533 9 +859 5 +3800 5 +2568 3 +1853 3 +1491 9 +2359 3 +2750 2 +2781 10 +2605 9 +2696 4 +2885 10 +976 8 +205 5 +1297 9 +2274 1 +1614 8 +1070 1 +780 7 +2903 3 +2126 3 +2811 8 +2572 3 +403 4 +541 3 +3383 2 +596 3 +3481 3 +794 7 +2605 7 +2808 9 +2253 3 +57 5 +3523 9 +649 9 +305 3 +3719 2 +2525 9 +3789 4 +1490 2 +3408 1 +825 4 +1038 4 +752 6 +597 4 +631 8 +3349 5 +3790 6 +3775 6 +393 7 +871 3 +1862 10 +2850 7 +1909 4 +3082 7 +670 4 +191 7 +1737 3 +639 2 +4018 8 +1718 8 +311 7 +4081 7 +176 10 +92 9 +849 2 +3130 5 +1542 9 +2422 5 +3978 9 +2606 3 +2164 1 +2940 10 +1223 8 +1207 7 +2067 4 +1123 6 +1777 1 +1010 4 +2333 4 +3535 1 +1159 2 +3640 10 +3455 10 +870 3 +1666 10 +4002 4 +3374 7 +574 9 +794 10 +1852 1 +3033 9 +3344 7 +1505 9 +1418 7 +1254 2 +1426 6 +1210 5 +1344 7 +3439 2 +190 6 +2310 3 +3417 1 +3218 1 +3767 3 +2740 3 +3469 5 +1222 2 +2083 5 +1295 9 +380 1 +4024 2 +2008 7 +2146 8 +42 3 +742 5 +2040 3 +258 5 +3952 7 +2113 9 +2801 4 +2245 9 +2645 4 +406 10 +11 1 +3805 8 +4021 1 +3852 1 +4009 9 +1355 7 +681 2 +3999 1 +3860 7 +3918 2 +1491 1 +879 3 +79 8 +2761 1 +2495 1 +3212 9 +1934 8 +2688 6 +225 1 +3301 1 +3774 5 +1241 2 +1866 9 +1305 7 +802 6 +873 2 +1863 6 +181 9 +2133 10 +963 4 +2507 9 +3048 10 +10 4 +3178 8 +1307 6 +3644 6 +3295 4 +3342 1 +612 7 +1626 4 +3110 4 +1001 9 +3538 8 +3001 3 +1299 9 +3974 4 +1072 4 +3947 10 +1275 6 +883 2 +1872 8 +2996 8 +1726 1 +2986 9 +3383 10 +3697 10 +2214 7 +1144 1 +3011 10 +122 6 +1989 4 +253 2 +3604 2 +436 7 +3439 9 +3014 9 +1132 5 +2497 5 +1760 7 +3698 5 +3682 8 +2715 8 +2697 6 +2802 3 +274 3 +1324 8 +1397 8 +443 5 +1475 9 +3836 5 +1105 2 +2007 3 +1085 9 +1553 4 +2404 1 +582 6 +955 8 +523 1 +3553 9 +2322 8 +1896 7 +151 8 +2408 5 +1242 2 +3562 4 +1487 4 +1034 4 +1626 2 +1391 6 +341 3 +382 8 +2302 6 +612 8 +2868 8 +3886 9 +564 5 +30 10 +3082 1 +3902 10 +2355 1 +2595 5 +1375 10 +432 10 +2434 1 +2049 2 +3927 6 +2082 10 +3262 6 +2287 7 +1298 8 +2777 8 +2651 9 +2951 8 +1161 7 +0 2 +2067 9 +1207 9 +933 9 +3419 6 +1057 6 +1544 9 +3706 1 +1799 3 +2420 7 +1256 3 +2686 6 +940 1 +3258 2 +3531 9 +2370 2 +2615 3 +409 3 +3640 1 +170 1 +918 3 +1854 3 +3581 5 +1183 7 +139 10 +2701 5 +3094 8 +2015 8 +2730 10 +3635 8 +3753 1 +1954 8 +2684 3 +874 7 +2279 6 +1426 4 +1043 8 +555 9 +1957 7 +529 2 +150 5 +3874 6 +1143 4 +3684 9 +990 2 +2689 5 +3365 7 +1868 1 +3312 1 +924 6 +2338 8 +502 2 +1681 9 +3819 8 +784 10 +3578 6 +3793 8 +3022 2 +3336 1 +330 3 +1699 1 +1706 3 +467 5 +3085 5 +1614 8 +850 5 +729 5 +1346 9 +2587 9 +3329 8 +931 7 +3438 9 +94 2 +414 10 +1055 9 +2744 9 +2746 3 +3793 3 +3996 3 +459 3 +1391 1 +421 3 +2880 5 +3881 4 +306 6 +3279 6 +238 8 +2838 8 +202 1 +1912 8 +783 10 +1079 8 +3410 3 +3103 3 +780 8 +1387 9 +3247 5 +441 7 +3453 1 +229 10 +4071 5 +351 3 +1242 6 +4071 5 +284 5 +2495 10 +3582 6 +193 7 +3878 7 +1835 7 +3920 10 +366 3 +161 8 +3202 7 +1568 9 +509 3 +2408 7 +1331 5 +1072 4 +3296 8 +2598 2 +759 10 +2490 1 +2180 9 +1852 5 +2030 8 +2465 4 +1911 5 +3244 3 +2681 3 +717 7 +2784 4 +3661 9 +3235 8 +2862 1 +1307 9 +334 1 +1703 4 +106 9 +243 6 +549 4 +1384 1 +339 4 +3729 10 +848 1 +104 7 +1213 6 +2601 5 +1153 4 +1457 2 +126 7 +1842 8 +2111 2 +1553 4 +433 8 +1721 7 +893 9 +2502 3 +4031 7 +3887 2 +3853 6 +3518 8 +1580 8 +1625 9 +3938 1 +2220 10 +1079 6 +3787 4 +3303 4 +3085 2 +1625 4 +4088 9 +147 4 +1678 8 +438 2 +28 6 +2776 6 +3305 10 +55 6 +3237 8 +468 6 +2505 3 +168 5 +2744 7 +3060 5 +1359 7 +1126 5 +1796 2 +3179 2 +2160 7 +2788 6 +741 5 +2774 3 +2626 5 +1023 1 +326 9 +1254 5 +729 7 +497 10 +1630 5 +2799 7 +2377 4 +584 8 +2909 3 +2738 8 +3993 9 +1646 8 +2446 3 +1681 9 +2129 3 +1006 9 +873 4 +2022 7 +3591 10 +3020 6 +1004 8 +122 10 +2016 6 +951 3 +3229 3 +891 1 +1945 5 +2096 6 +3140 8 +146 5 +1885 10 +430 1 +2179 6 +1376 2 +3049 8 +3672 7 +4058 5 +1300 6 +2697 4 +481 3 +1491 5 +3664 2 +2914 6 +2428 1 +2025 10 +3740 5 +3495 5 +3522 6 +204 4 +1433 9 +3559 5 +3491 8 +775 9 +163 8 +4026 3 +1105 2 +2158 8 +2307 4 +3052 8 +1218 7 +1409 9 +2749 3 +1983 5 +3082 1 +2100 9 +410 8 +3202 2 +2886 2 +2837 5 +2042 6 +1712 9 +1585 7 +831 10 +141 7 +1485 4 +1380 8 +3328 4 +2552 9 +3442 10 +28 4 +3295 5 +448 7 +716 5 +3798 7 +916 8 +4084 7 +617 5 +4088 2 +1303 2 +230 5 +189 2 +2141 10 +2471 7 +3445 7 +3267 9 +3805 2 +1588 9 +113 9 +2365 9 +189 1 +156 5 +3652 10 +3773 8 +67 1 +249 6 +573 7 +3179 8 +4062 5 +2733 6 +1974 9 +3021 9 +3017 5 +279 3 +3550 4 +923 8 +2035 8 +395 4 +4089 8 +2537 5 +1923 6 +890 5 +1996 4 +3414 7 +2303 3 +1100 2 +1671 4 +1092 2 +466 6 +2381 9 +3742 1 +1047 7 +1071 3 +4085 9 +3150 4 +2563 2 +595 2 +3896 8 +3174 8 +3984 2 +1752 10 +531 7 +73 7 +1139 7 +2312 7 +263 8 +1994 10 +1441 9 +2464 10 +2079 4 +3827 8 +820 2 +3448 10 +148 1 +3872 9 +3197 6 +680 9 +3229 3 +1794 8 +3952 6 +3950 6 +2566 5 +2126 4 +1666 2 +3131 2 +2469 9 +2005 3 +1953 3 +3515 2 +1273 6 +648 8 +1925 10 +1655 10 +1907 2 +3675 6 +811 6 +779 2 +1842 1 +2046 1 +3744 3 +1956 8 +529 5 +3925 6 +2731 10 +3582 7 +843 4 +3598 7 +944 6 +879 5 +1180 5 +542 6 +3156 4 +2067 3 +411 10 +1626 6 +3324 5 +4093 7 +2506 7 +2458 8 +2468 10 +2396 8 +2503 9 +2367 10 +3787 6 +2803 2 +4077 2 +1523 5 +2728 1 +446 6 +2513 3 +3613 10 +1775 2 +3457 3 +3930 4 +1573 1 +2969 2 +863 8 +3207 2 +1758 5 +3306 4 +3130 2 +1330 7 +3733 4 +2304 9 +58 6 +1102 10 +2276 4 +1318 10 +72 8 +1817 9 +1224 2 +2639 1 +451 9 +401 9 +2464 6 +560 9 +1965 4 +287 10 +1940 7 +24 6 +1946 10 +3108 9 +778 7 +1854 9 +3398 1 +2151 3 +2923 5 +2725 9 +3378 8 +1374 7 +845 3 +688 5 +983 3 +1179 3 +3101 9 +517 3 +2542 3 +2735 10 +1047 1 +1644 8 +1361 10 +2310 9 +2434 1 +3206 3 +535 7 +102 6 +404 10 +3868 5 +3149 5 +2435 6 +251 7 +2300 10 +1969 7 +598 7 +923 5 +1468 8 +476 10 +2255 4 +828 2 +3250 8 +885 2 +1345 9 +1474 6 +3764 1 +502 8 +71 6 +967 9 +3653 10 +3014 4 +3569 7 +2820 4 +1316 6 +1736 3 +2992 3 +2360 8 +591 2 +832 5 +3902 10 +2303 3 +791 4 +1749 6 +958 8 +2051 10 +2864 3 +2891 4 +241 4 +1918 10 +331 5 +1104 9 +1243 2 +535 10 +2948 8 +2058 8 +2574 5 +2316 9 +2937 5 +1369 2 +1267 6 +1738 6 +1366 10 +2937 5 +2859 6 +566 8 +3383 4 +3538 2 +1572 9 +62 3 +3980 8 +2111 4 +1024 8 +1804 9 +2077 6 +1541 9 +229 4 +3343 5 +90 7 +945 1 +2381 4 +371 4 +2661 2 +3672 6 +3246 6 +2902 8 +3771 5 +3020 6 +3744 3 +1319 6 +3197 6 +2389 10 +46 6 +1502 9 +28 1 +2857 7 +331 5 +1607 2 +2794 10 +495 8 +2281 6 +880 4 +847 10 +3205 8 +4019 5 +1949 8 +3477 6 +1990 8 +344 5 +2752 8 +2034 3 +3588 7 +1771 5 +505 9 +2026 1 +1222 8 +933 2 +188 1 +2132 5 +3767 9 +3484 4 +2768 5 +1482 6 +1943 10 +1640 8 +2812 5 +3279 6 +3959 7 +2610 1 +2045 9 +433 1 +529 2 +873 10 +1385 1 +1994 8 +744 7 +2665 9 +3311 6 +211 7 +1250 1 +529 6 +759 10 +3624 8 +1505 4 +773 7 +1594 1 +3429 9 +1466 9 +2224 6 +136 3 +3932 4 +4086 8 +32 5 +3534 7 +245 3 +3196 7 +1338 9 +1794 1 +3218 10 +284 4 +1747 6 +3710 7 +3343 8 +2297 5 +2521 4 +3802 10 +3643 10 +591 2 +4093 3 +1801 2 +1185 8 +2421 9 +1381 2 +1205 5 +330 2 +3644 5 +1504 4 +3281 9 +3169 9 +2191 6 +3037 3 +3072 6 +1778 5 +221 8 +362 10 +3549 8 +834 5 +2804 7 +204 10 +3044 6 +3720 1 +3166 8 +1170 2 +3210 2 +444 6 +2219 8 +2214 5 +2229 8 +2406 2 +2538 9 +1531 8 +1341 4 +4000 5 +1662 9 +330 6 +3485 6 +1474 7 +2921 1 +773 10 +3340 8 +432 6 +1283 6 +2487 6 +1041 1 +3626 7 +2177 5 +610 8 +2025 2 +2665 2 +1007 10 +882 9 +421 8 +895 4 +1596 2 +1170 9 +386 1 +863 10 +1216 2 +3614 4 +2822 3 +1816 3 +2434 9 +3923 8 +2717 7 +2002 1 +1745 8 +1417 10 +446 10 +396 7 +517 9 +534 9 +2942 6 +1256 7 +4068 10 +911 5 +2907 2 +1927 4 +776 3 +3477 1 +785 4 +2842 2 +760 9 +3268 6 +3425 1 +1723 9 +1879 5 +660 4 +415 4 +1791 2 +811 6 +248 5 +236 2 +287 10 +1817 4 +2630 2 +2992 2 +1950 6 +3474 5 +1824 1 +3571 2 +2758 5 +3343 7 +1821 2 +2972 6 +1291 2 +2746 7 +408 9 +4042 10 +526 4 +3311 1 +2222 2 +3155 1 +3408 5 +3727 9 +3716 7 +1321 4 +172 6 +534 2 +1827 4 +1560 1 +2654 2 +2937 3 +3102 1 +2640 9 +3527 8 +2810 8 +746 1 +3423 9 +694 9 +41 6 +20 5 +1888 2 +2831 3 +1597 6 +12 9 +2351 4 +550 10 +1688 5 +4070 3 +3345 4 +15 9 +242 6 +2823 4 +2870 6 +3587 3 +612 3 +3067 4 +1665 5 +3909 7 +3483 9 +710 5 +1307 9 +459 5 +3370 10 +3711 6 +491 3 +1938 2 +2272 2 +2118 2 +255 10 +129 5 +1726 6 +2144 10 +3655 1 +3228 1 +19 7 +608 9 +2167 9 +3599 10 +729 9 +3547 8 +2491 1 +3318 4 +815 7 +3745 8 +1743 3 +3102 5 +3946 7 +289 3 +3352 8 +4042 4 +3943 7 +3786 1 +2910 8 +2412 7 +3851 8 +3896 10 +1297 8 +1075 8 +3520 5 +717 4 +2416 9 +3535 2 +1494 3 +3614 4 +327 3 +3272 7 +3078 7 +1952 3 +928 8 +1322 1 +2563 3 +1412 5 +623 8 +458 6 +3754 8 +2197 10 +481 8 +3081 2 +2712 6 +2057 1 +915 6 +3583 9 +2544 3 +2841 5 +3389 1 +2732 8 +393 4 +2141 6 +2216 1 +2541 6 +1211 5 +3478 10 +525 1 +2292 3 +2483 7 +696 9 +2828 1 +915 5 +1047 1 +1755 6 +2524 6 +2721 10 +1936 8 +764 10 +2789 7 +3012 3 +1266 10 +4085 8 +3797 2 +2110 8 +2170 10 +688 4 +974 5 +2386 8 +1075 7 +3606 7 +3612 2 +2545 5 +1956 7 +3552 5 +3585 1 +110 10 +163 4 +699 1 +798 5 +1452 10 +3588 10 +1014 5 +1249 1 +3817 9 +866 10 +3177 10 +276 7 +2056 1 +1787 8 +4024 4 +3284 10 +2852 9 +994 10 +3106 7 +445 2 +970 9 +1140 10 +493 4 +1433 9 +3762 2 +3608 3 +887 7 +1315 2 +2146 8 +3944 1 +2345 1 +1994 5 +279 6 +784 2 +137 6 +3041 3 +755 6 +2503 4 +2778 3 +3646 9 +2580 4 +2147 4 +1542 3 +2530 6 +2357 7 +1586 10 +503 2 +3471 4 +1166 9 +3133 8 +2226 9 +483 8 +3475 6 +1640 3 +3188 10 +1548 6 +3520 5 +965 1 +3348 1 +189 10 +3796 9 +3653 1 +3804 6 +371 1 +3046 8 +2189 2 +2543 5 +3253 2 +225 3 +2033 7 +2182 10 +1975 10 +373 4 +137 4 +1033 4 +3898 8 +129 6 +101 10 +3114 9 +3741 10 +415 1 +752 1 +1383 10 +3232 3 +3534 6 +2786 6 +1320 7 +3762 9 +3929 9 +1238 1 +3353 7 +3911 7 +189 9 +1872 3 +3941 3 +3292 1 +2412 9 +1105 3 +1231 9 +963 3 +1098 4 +3351 6 +3409 4 +75 9 +365 6 +4088 2 +570 4 +3450 7 +490 6 +3582 3 +1764 5 +1658 9 +1235 5 +389 6 +1015 3 +1108 8 +4009 7 +1420 10 +4007 3 +1191 4 +3350 10 +805 6 +855 3 +2683 6 +564 3 +1640 10 +3632 7 +1769 6 +295 10 +2004 5 +3962 4 +3720 7 +833 6 +2054 9 +351 3 +3162 6 +3564 8 +1557 5 +2737 2 +2530 8 +1694 10 +3637 9 +1107 2 +1243 3 +474 1 +835 10 +3981 4 +3722 8 +52 5 +2942 3 +3461 9 +3959 10 +4080 1 +3554 6 +1633 7 +1591 7 +2656 7 +540 2 +2305 8 +842 7 +3146 10 +1251 3 +2403 2 +835 5 +773 2 +3458 7 +3165 4 +433 1 +2319 2 +184 10 +3171 4 +1316 2 +3103 5 +195 9 +3694 4 +2688 10 +1936 2 +848 6 +3991 7 +3714 7 +16 10 +2050 4 +1957 4 +1813 7 +3883 3 +3129 10 +1555 7 +882 1 +3957 1 +1613 10 +2381 3 +1205 6 +96 4 +3400 2 +2476 1 +3132 6 +648 5 +2613 9 +307 6 +3069 2 +340 1 +4033 7 +3613 3 +3821 6 +3658 7 +588 10 +3796 5 +1901 1 +2932 8 +533 9 +2864 1 +2976 6 +4058 5 +4000 6 +52 7 +2606 1 +1784 1 +973 9 +1337 6 +1521 6 +2273 9 +50 9 +877 4 +1265 2 +3981 9 +772 3 +2543 10 +2910 10 +148 1 +929 3 +3817 10 +1356 9 +2603 10 +3064 10 +236 3 +1714 4 +2242 6 +2907 4 +1879 10 +2685 8 +2129 1 +495 9 +3688 3 +2593 6 +1157 2 +1048 7 +3763 5 +2224 6 +3561 4 +2035 3 +1208 2 +1515 1 +611 7 +2020 2 +2615 10 +889 2 +3331 2 +2320 2 +2471 4 +3194 7 +2715 2 +3911 3 +2493 3 +2034 4 +2575 8 +2170 3 +1348 6 +1592 5 +3146 3 +1064 1 +1493 3 +724 6 +907 1 +3502 3 +3672 7 +299 4 +2517 3 +3487 6 +3732 2 +964 2 +819 2 +1960 3 +2892 7 +2993 6 +1101 9 +1240 7 +1560 9 +741 6 +1046 9 +2287 4 +502 8 +1311 6 +3071 8 +2469 6 +2760 1 +2553 9 +1073 7 +3543 2 +2323 1 +2572 7 +2027 6 +655 10 +575 7 +2066 10 +1236 3 +1411 1 +684 3 +1738 2 +1257 5 +2553 3 +2663 7 +3251 4 +1204 9 +1806 1 +3003 8 +762 6 +3163 7 +1754 7 +4040 9 +2394 2 +2892 3 +637 1 +1310 6 +697 3 +3016 2 +3237 7 +1357 7 +1590 7 +646 1 +4003 10 +3500 8 +960 6 +1841 7 +1620 7 +1396 3 +137 4 +2583 3 +3340 8 +2116 3 +4047 9 +2384 2 +2503 2 +2827 5 +1135 6 +346 7 +3504 3 +3738 8 +1658 2 +2218 6 +3144 2 +1604 1 +2074 1 +1379 3 +667 4 +1595 2 +2635 8 +992 3 +876 10 +1063 3 +3065 10 +1445 9 +2430 2 +2090 9 +123 3 +3695 1 +3168 5 +2053 8 +281 6 +899 8 +1603 4 +3085 4 +583 9 +3737 8 +1113 1 +3894 10 +781 9 +1529 6 +242 6 +1746 6 +859 7 +557 5 +4039 2 +2021 5 +3493 9 +2449 6 +502 5 +2792 10 +2028 10 +1299 6 +2347 5 +2662 5 +4015 8 +2272 8 +3546 3 +3687 2 +2466 6 +1312 7 +2764 9 +3068 4 +2422 2 +1196 9 +3139 6 +904 7 +1365 6 +214 2 +700 2 +449 6 +3611 3 +3476 8 +4069 10 +2743 1 +1171 3 +4075 10 +2356 8 +3758 8 +2310 10 +1809 9 +1628 6 +3410 3 +968 9 +3434 6 +314 7 +2523 1 +3429 9 +1426 10 +961 10 +1711 5 +403 3 +3823 7 +554 2 +3537 9 +3062 3 +360 7 +3181 7 +86 4 +3597 10 +3837 3 +3963 4 +3378 10 +2796 2 +2759 9 +273 8 +1666 6 +3315 1 +3729 6 +3574 7 +1220 9 +2887 9 +2860 5 +3324 6 +1048 9 +111 1 +3535 5 +195 3 +1970 7 +1497 10 +1656 8 +2179 8 +625 8 +1339 1 +571 2 +443 2 +1193 2 +309 1 +255 4 +2777 10 +1767 3 +2491 6 +1554 1 +3238 7 +2368 8 +2160 5 +2638 5 +2201 3 +2405 2 +968 8 +224 5 +2132 10 +1030 2 +373 9 +1363 3 +1169 10 +2470 8 +3607 7 +3155 7 +1502 6 +3687 9 +2833 5 +3829 1 +3777 10 +2998 5 +182 1 +1398 1 +3701 6 +1395 4 +341 4 +1627 1 +1747 9 +3265 6 +2489 8 +3944 6 +2359 7 +157 6 +2268 2 +1250 1 +2574 3 +4020 10 +1196 5 +82 10 +1647 2 +4038 10 +1089 3 +492 3 +3633 8 +1657 6 +517 5 +1698 6 +1222 8 +3172 4 +2166 2 +2571 6 +1656 5 +1343 3 +1362 9 +3554 9 +2941 2 +2767 10 +3191 7 +3471 6 +2537 8 +912 2 +1923 7 +685 5 +2697 3 +4048 4 +2929 6 +2271 4 +1786 6 +1470 10 +132 6 +4013 10 +1369 9 +1577 3 +894 6 +1411 2 +2049 6 +3885 7 +3098 8 +3958 8 +2841 3 +3300 4 +2503 10 +2301 7 +2377 2 +1867 9 +3131 9 +485 7 +3578 7 +1263 4 +2950 9 +1461 9 +950 4 +3771 8 +1189 10 +3455 7 +81 2 +1035 6 +3512 10 +3572 6 +2891 5 +2564 4 +1776 7 +3028 4 +829 7 +2937 8 +4088 9 +183 2 +623 2 +675 2 +441 1 +1852 8 +2703 6 +2825 6 +463 3 +303 9 +2953 8 +2093 5 +2215 3 +1619 9 +2906 8 +1180 3 +3956 1 +2573 6 +3032 3 +294 5 +2959 2 +177 7 +2688 7 +2499 1 +4038 1 +3699 3 +3859 7 +1459 6 +1642 1 +3293 2 +109 5 +772 3 +3819 6 +37 1 +1604 8 +1271 6 +3470 1 +2858 10 +2757 10 +1798 1 +992 1 +980 4 +645 7 +1328 5 +4002 10 +2225 10 +1932 7 +537 9 +1114 3 +3522 4 +911 10 +2633 10 +3001 8 +2258 1 +3882 1 +3206 9 +18 8 +3612 2 +1648 10 +1319 2 +3573 4 +359 7 +499 4 +3158 10 +695 6 +3165 10 +2167 2 +3646 4 +2764 2 +2407 9 +2155 7 +1448 6 +1667 1 +3127 1 +135 7 +1264 2 +764 6 +506 5 +3105 8 +937 5 +4010 2 +2231 9 +1652 2 +769 2 +2574 7 +607 6 +1594 8 +651 9 +338 5 +3642 7 +3371 1 +3527 3 +138 5 +3833 3 +870 7 +2520 4 +3068 3 +1661 9 +43 10 +3234 4 +3111 6 +1625 9 +2898 8 +3525 1 +2530 3 +2917 7 +2001 7 +1175 10 +4027 9 +222 7 +2333 7 +1872 3 +2005 2 +1496 8 +2605 2 +3973 1 +2975 9 +2649 7 +1952 10 +3835 9 +3390 10 +2487 5 +3693 8 +3397 7 +176 7 +2214 3 +3599 2 +2217 1 +57 4 +1659 7 +1751 3 +3714 3 +2875 10 +1594 3 +3245 7 +1577 6 +75 5 +2430 2 +2506 9 +674 3 +1033 6 +2185 3 +1284 10 +2220 6 +3269 7 +1917 1 +2666 8 +2274 4 +3643 8 +1942 9 +3126 3 +2317 7 +2505 8 +1705 1 +854 2 +1642 9 +2639 5 +612 2 +1006 3 +56 9 +1023 2 +384 6 +3366 8 +455 1 +2153 6 +1079 7 +2176 4 +1206 9 +4081 6 +1285 2 +4094 2 +1142 10 +1307 3 +3587 4 +2844 7 +3226 7 +2457 3 +2921 6 +3132 2 +345 1 +649 4 +4065 10 +3693 3 +3563 5 +513 9 +1167 2 +33 2 +153 4 +3185 8 +1873 5 +1702 1 +3799 10 +756 7 +801 9 +3801 2 +827 3 +472 7 +1096 8 +268 3 +2160 8 +2931 4 +3145 5 +555 3 +3863 6 +2106 10 +2336 1 +1444 5 +3832 2 +131 7 +275 7 +679 9 +599 3 +1184 6 +1464 6 +2622 4 +248 6 +1312 4 +2100 8 +3531 7 +1235 6 +342 10 +2477 7 +247 2 +1424 6 +2989 6 +2123 7 +2465 6 +2203 1 +1443 10 +1773 3 +2058 3 +3027 10 +1329 7 +3578 7 +731 4 +632 5 +2656 3 +2901 5 +343 6 +2157 9 +596 3 +163 5 +3700 8 +2955 8 +2670 4 +3695 1 +3428 5 +727 6 +3111 7 +1253 6 +1870 8 +2787 6 +909 9 +1820 9 +3830 3 +3126 6 +3118 5 +3670 7 +3757 8 +3454 7 +2750 5 +2097 4 +3445 4 +1166 7 +3947 4 +3770 5 +2125 4 +2132 10 +3089 7 +250 10 +2423 4 +1737 7 +2687 1 +2502 2 +919 2 +2354 9 +3074 7 +2245 3 +2155 3 +3640 4 +1670 1 +82 1 +116 10 +2480 5 +2174 9 +2497 4 +1910 3 +3481 8 +957 10 +3011 3 +3902 9 +1144 2 +3894 10 +2668 3 +2266 9 +1738 1 +3002 6 +3280 6 +988 10 +3073 8 +1148 5 +3624 8 +3011 3 +442 3 +2771 5 +265 8 +1151 9 +676 3 +110 3 +1421 4 +2040 5 +281 8 +2145 3 +1174 3 +1546 5 +367 6 +413 1 +238 7 +1650 9 +937 6 +1036 10 +905 5 +2108 2 +2969 9 +2356 5 +1495 3 +1575 1 +52 5 +1737 2 +1457 1 +573 2 +3489 1 +3301 5 +2585 5 +3978 4 +3945 4 +2554 8 +1266 6 +1736 6 +2138 1 +870 4 +4036 10 +924 10 +547 3 +943 3 +3859 4 +1390 5 +2047 8 +1852 2 +2780 3 +2684 5 +1665 10 +613 4 +1398 7 +3509 7 +1605 9 +740 1 +243 7 +2659 2 +899 6 +1406 1 +579 2 +3301 8 +2814 7 +467 1 +2460 3 +3172 7 +3746 5 +3238 2 +1272 2 +3292 9 +796 9 +151 4 +3114 9 +1102 4 +4072 7 +3927 5 +930 1 +3501 3 +3166 2 +571 7 +4062 2 +1367 2 +112 7 +2477 5 +860 4 +1057 9 +2105 10 +3283 5 +47 1 +3477 5 +891 8 +553 4 +2510 7 +285 1 +1484 8 +4022 2 +1414 8 +134 1 +1085 4 +2299 2 +2428 8 +1288 5 +1487 4 +1354 7 +1115 8 +1920 1 +615 8 +2485 5 +2692 9 +709 1 +893 7 +2945 3 +118 9 +1232 8 +3262 7 +1332 5 +2284 5 +2410 7 +3191 5 +3808 6 +3573 2 +2134 1 +1291 8 +2215 8 +4017 2 +13 9 +3263 8 +3875 10 +493 8 +864 2 +179 8 +2933 7 +663 9 +2633 7 +1485 6 +2004 2 +178 9 +3816 3 +678 6 +3019 7 +2792 10 +83 7 +3328 3 +77 2 +2991 6 +1643 4 +780 8 +2627 6 +3422 10 +4085 8 +593 1 +1798 6 +1606 6 +1045 7 +2765 5 +3186 2 +2260 8 +3972 7 +1132 5 +1900 10 +1759 6 +2290 9 +1212 4 +698 7 +511 1 +3331 7 +1185 6 +2565 1 +481 5 +896 7 +3301 7 +3907 7 +1014 5 +3916 1 +3628 3 +897 5 +1626 7 +1935 10 +1200 7 +3970 8 +3287 6 +927 2 +385 5 +1665 7 +2625 3 +1068 5 +3819 1 +2727 1 +1770 10 +3401 4 +1035 5 +3934 7 +1747 10 +3304 5 +1699 3 +739 10 +2396 3 +438 2 +3852 10 +2536 8 +619 8 +3535 3 +3758 3 +3889 1 +2887 6 +1720 9 +906 7 +3930 2 +3424 8 +2388 2 +1193 8 +2670 6 +3415 6 +3748 5 +1005 2 +3621 1 +2117 6 +3173 1 +3138 4 +3527 6 +790 3 +1633 5 +1725 10 +1700 8 +895 4 +3164 10 +3433 1 +165 1 +554 8 +1332 3 +1330 7 +1063 9 +2077 7 +875 9 +1378 1 +3839 9 +1907 3 +3274 8 +1444 4 +3809 1 +1834 7 +447 10 +13 6 +353 1 +2807 10 +3759 2 +1007 10 +3404 7 +1943 4 +1538 5 +1627 5 +2355 7 +1113 6 +578 9 +3056 3 +4034 8 +1812 7 +1388 9 +662 5 +2030 10 +24 7 +1600 10 +3051 7 +1495 1 +3155 4 +2911 7 +3017 3 +3764 7 +3561 7 +2259 8 +1092 9 +1312 5 +2132 10 +1929 10 +1297 3 +164 4 +1759 3 +2554 5 +3570 9 +2073 7 +68 8 +3225 1 +1222 9 +3001 8 +189 10 +3512 8 +3954 1 +4007 10 +498 9 +3559 7 +4052 3 +4066 5 +3914 10 +214 6 +149 4 +3949 7 +1491 7 +1783 1 +39 9 +1576 2 +3915 6 +1422 3 +2488 3 +3578 5 +939 10 +2467 1 +3742 10 +3990 3 +1156 3 +638 8 +308 5 +414 9 +2119 5 +2310 6 +491 8 +1948 9 +3551 1 +197 8 +2189 4 +2492 4 +2503 10 +3930 9 +3180 3 +1251 3 +1713 6 +203 10 +79 6 +2020 8 +2585 2 +2096 3 +1790 2 +2869 6 +1174 6 +2765 9 +1261 3 +2399 5 +637 10 +2318 5 +2306 5 +3370 7 +3379 1 +1732 5 +1503 10 +3555 8 +2024 8 +3905 6 +3491 5 +197 9 +340 1 +192 10 +1165 6 +3663 2 +2625 4 +2784 5 +3138 10 +3624 2 +3707 4 +2747 3 +96 8 +3822 6 +2740 7 +4083 7 +3339 8 +2041 10 +3050 7 +3165 7 +3096 9 +1375 1 +658 3 +3089 7 +586 9 +737 9 +2962 8 +3511 4 +2051 8 +1653 10 +2080 4 +1883 8 +2251 3 +1934 6 +1480 9 +3874 6 +276 9 +3255 8 +1860 4 +376 1 +71 7 +3753 2 +80 2 +3707 6 +1065 4 +978 2 +34 9 +1967 3 +964 2 +2802 8 +497 2 +793 1 +3976 9 +276 1 +3541 7 +2997 6 +444 10 +1180 10 +3008 1 +4091 10 +2304 4 +2965 6 +3270 5 +2441 4 +2822 5 +657 6 +2631 8 +1358 10 +1783 3 +3165 3 +1865 1 +3323 6 +375 3 +3779 5 +2505 3 +1645 10 +957 3 +1491 3 +1214 5 +3670 3 +2193 1 +720 2 +3241 10 +3819 8 +2112 4 +3301 10 +1264 4 +3937 3 +3991 9 +2233 9 +2788 8 +2477 5 +2449 6 +3996 10 +1614 6 +1843 1 +2732 4 +2658 2 +1930 9 +1400 2 +3464 10 +3043 7 +1099 6 +1698 1 +2485 9 +904 9 +3305 1 +161 10 +3368 3 +2575 3 +2376 2 +3414 10 +2415 2 +2241 3 +1118 3 +672 2 +973 3 +63 2 +3909 10 +2730 10 +2677 8 +2879 7 +434 8 +3328 1 +372 4 +3892 9 +3724 3 +1471 1 +1378 6 +3369 9 +244 7 +3068 4 +864 7 +1521 6 +2038 2 +3124 2 +1781 4 +2580 6 +324 1 +1703 1 +1230 2 +2407 6 +3972 9 +1775 6 +3082 4 +2442 8 +159 1 +971 1 +1686 8 +1022 10 +166 3 +3153 3 +3406 10 +1865 8 +1902 8 +2309 8 +78 1 +1521 7 +3207 10 +3637 2 +2802 7 +2388 4 +2204 2 +1263 9 +3758 7 +210 1 +2319 9 +561 4 +3534 9 +3902 2 +3460 8 +3392 4 +2231 10 +3718 9 +3019 5 +1126 9 +563 4 +1770 1 +1615 8 +2212 3 +3923 4 +745 5 +1638 9 +2814 6 +2652 1 +1114 8 +3194 5 +2302 9 +2308 8 +1040 4 +1210 4 +1632 2 +1359 3 +2478 9 +2613 5 +1037 7 +588 4 +602 3 +4014 7 +2961 4 +2047 9 +2435 1 +200 7 +1265 3 +278 3 +1610 4 +3825 10 +3239 6 +1101 2 +1300 4 +645 3 +180 5 +987 10 +626 9 +1288 6 +4017 3 +1451 10 +3465 6 +639 9 +830 3 +3332 1 +2983 10 +3702 5 +3877 10 +1450 4 +1003 5 +1545 5 +85 9 +1838 4 +788 8 +3927 10 +1056 8 +2778 6 +3679 3 +1002 8 +3338 5 +796 5 +2418 2 +3877 6 +279 8 +2305 8 +3895 4 +3515 1 +2818 4 +667 8 +2259 1 +2268 1 +2727 8 +1497 2 +777 6 +2200 7 +2456 5 +2856 7 +1571 5 +990 10 +1046 3 +3554 2 +3317 2 +2117 2 +49 4 +3251 5 +1138 4 +1020 6 +359 10 +2453 9 +2468 2 +1970 7 +3781 8 +339 10 +707 9 +1294 7 +3950 1 +846 8 +3362 9 +1275 3 +2627 5 +2665 3 +2785 8 +2626 5 +733 9 +1160 1 +3159 6 +143 9 +2164 2 +3928 2 +1972 2 +3856 7 +3888 7 +3983 8 +1829 10 +37 6 +255 3 +1327 9 +2513 10 +1368 2 +744 8 +709 9 +3809 9 +2173 5 +2777 2 +961 3 +421 1 +875 7 +1552 6 +1624 7 +3938 4 +1100 2 +631 1 +235 10 +1125 1 +168 10 +3547 7 +2353 10 +3006 10 +763 5 +2716 3 +2657 6 +3549 9 +214 6 +3547 7 +3270 6 +436 10 +3474 8 +3223 6 +4019 3 +4083 4 +1913 8 +422 4 +707 9 +2853 3 +1850 4 +596 4 +3455 10 +1307 3 +3706 8 +1441 10 +3879 8 +3858 3 +472 9 +1711 7 +3057 7 +1080 9 +498 5 +2332 9 +1374 2 +1178 1 +1673 7 +3260 5 +2625 8 +1925 7 +1769 8 +100 10 +3527 10 +3042 7 +3425 8 +3027 6 +1279 3 +2027 3 +469 8 +17 2 +2782 9 +341 5 +129 6 +2538 8 +325 8 +3066 3 +4047 6 +90 1 +1170 1 +496 8 +3767 3 +738 6 +978 4 +1727 9 +2483 9 +2017 6 +657 4 +2139 3 +775 10 +2472 9 +2787 8 +1504 3 +543 1 +1331 2 +1313 1 +554 4 +3997 6 +2823 8 +1521 10 +1342 2 +3175 5 +2162 3 +2970 2 +1781 9 +121 5 +1868 10 +1220 5 +1315 7 +3619 1 +729 7 +1148 2 +167 4 +915 10 +2197 9 +1387 1 +558 4 +3475 5 +803 7 +1223 8 +2789 2 +2020 8 +121 2 +926 3 +368 5 +1726 5 +261 4 +3162 3 +2490 10 +3168 3 +3301 10 +3438 5 +1498 8 +1912 8 +2145 9 +3118 4 +3638 1 +1186 10 +734 3 +2438 1 +2923 4 +1900 7 +2894 8 +3372 2 +759 8 +2318 1 +2312 7 +551 2 +2008 7 +3030 8 +960 8 +212 9 +470 9 +4042 1 +115 3 +3981 1 +2901 6 +227 2 +3460 6 +3819 8 +2974 2 +945 4 +3000 9 +2475 1 +2146 10 +1307 6 +1835 4 +3016 9 +111 6 +1804 3 +1492 10 +213 6 +578 4 +1962 7 +538 2 +3498 7 +1504 5 +3276 1 +29 10 +1751 4 +3691 8 +3940 7 +3590 5 +904 7 +1308 5 +2836 9 +2607 2 +3977 4 +3483 5 +914 7 +3591 8 +2957 2 +1456 6 +1058 4 +156 10 +1229 8 +723 4 +323 10 +1036 8 +1588 7 +1119 2 +2304 2 +1258 6 +2374 3 +1511 6 +3309 8 +2197 4 +1922 1 +2663 6 +1672 7 +3887 5 +3053 6 +1402 1 +548 8 +1584 1 +2087 3 +2285 1 +2296 2 +2219 7 +352 7 +1082 2 +1095 7 +3190 3 +2965 2 +1491 4 +3628 2 +678 1 +989 7 +3992 8 +2804 9 +3427 10 +2437 8 +354 3 +3931 2 +2727 6 +3545 6 +3365 5 +1510 7 +2345 10 +127 9 +3498 10 +636 5 +1057 7 +178 4 +912 10 +1125 9 +3365 5 +84 3 +938 7 +1288 7 +1381 1 +1918 4 +2141 4 +780 8 +3992 8 +588 1 +469 10 +3797 1 +3704 4 +3692 6 +1990 4 +891 1 +4079 7 +547 9 +1882 5 +3816 10 +926 8 +2927 10 +2006 7 +2486 2 +3632 3 +1220 2 +2238 10 +3433 9 +1246 2 +3886 4 +3922 3 +218 8 +2179 2 +3334 1 +193 8 +1378 10 +3579 7 +1791 7 +3787 4 +873 7 +2528 9 +518 6 +212 9 +3299 9 +3114 10 +379 1 +2024 7 +681 2 +3421 8 +399 10 +3187 5 +1665 4 +1808 6 +1987 5 +1748 4 +1625 9 +385 10 +987 9 +3359 7 +2821 6 +2169 4 +3375 9 +3512 9 +3189 7 +1068 8 +3790 4 +3807 2 +22 8 +1287 6 +3718 9 +2858 6 +2126 10 +4011 5 +3800 10 +2661 2 +1947 8 +3834 2 +303 2 +2622 3 +3913 1 +1811 4 +61 5 +3661 5 +2741 6 +3856 9 +1455 8 +1637 6 +3822 1 +849 10 +1107 9 +4017 7 +1863 9 +835 10 +1701 3 +2071 9 +1073 6 +3155 9 +3832 10 +643 4 +530 1 +353 1 +1161 1 +350 1 +2528 8 +3713 9 +880 9 +2421 10 +3781 1 +2390 9 +2151 6 +245 2 +2899 6 +3547 9 +2772 5 +2134 1 +1827 4 +1552 10 +3487 4 +900 3 +273 5 +1946 1 +3128 2 +3301 9 +3175 5 +934 10 +1779 3 +1199 9 +1233 5 +2228 7 +2105 1 +479 8 +3535 1 +1742 2 +2390 7 +3399 2 +1660 7 +849 3 +1652 9 +3332 8 +174 4 +2965 9 +1165 8 +2794 8 +1638 2 +2881 8 +2527 3 +1570 2 +2307 5 +979 2 +2832 6 +3507 8 +3430 1 +3962 7 +140 7 +3207 2 +3306 10 +582 10 +2746 8 +81 4 +2122 4 +1226 6 +1454 7 +354 5 +1664 2 +2109 1 +1697 3 +2452 4 +2398 1 +2224 2 +1679 6 +2330 1 +2358 3 +2942 10 +3842 2 +1411 2 +353 9 +1879 2 +1117 6 +255 1 +2495 8 +1126 9 +1947 6 +3705 6 +270 10 +1351 2 +2900 1 +3427 7 +742 2 +1158 4 +2501 1 +868 10 +3810 5 +449 2 +2496 5 +972 4 +3187 9 +291 4 +2278 3 +1057 2 +1471 10 +3238 2 +1171 6 +1463 3 +2833 3 +2529 10 +2831 3 +567 10 +2484 4 +973 1 +3606 5 +154 7 +2688 3 +1188 5 +1853 4 +3407 6 +710 1 +1598 10 +6 4 +2315 9 +3218 10 +577 3 +2530 9 +2622 4 +4048 1 +1208 1 +2226 4 +1064 9 +2499 10 +3998 7 +496 5 +1751 7 +4021 7 +2966 9 +684 3 +3805 7 +2747 2 +1818 7 +2879 3 +3599 6 +2593 5 +2186 10 +3511 10 +1100 1 +1821 6 +3472 4 +2858 7 +2920 5 +173 2 +3517 4 +3322 9 +3410 4 +2233 7 +392 9 +2204 7 +3584 3 +356 5 +2406 3 +906 9 +2577 6 +2631 6 +444 3 +2593 9 +2065 8 +53 8 +661 2 +2175 8 +365 9 +1178 9 +2179 5 +2548 2 +4022 7 +1486 2 +3648 5 +1654 3 +2129 1 +3787 1 +3637 2 +980 8 +3142 1 +2176 1 +847 2 +659 7 +2132 1 +3193 6 +70 4 +3333 7 +3145 4 +1512 5 +292 4 +1357 6 +1603 4 +64 10 +4048 3 +1027 8 +3850 2 +3056 4 +1658 8 +3884 7 +2822 10 +2949 6 +1058 1 +2301 8 +3666 1 +1829 3 +3148 8 +2784 4 +281 8 +3434 1 +2237 1 +2413 6 +805 2 +1900 7 +669 5 +2412 5 +2964 8 +3704 3 +468 8 +3184 5 +3394 3 +3059 1 +632 3 +843 8 +1157 2 +2788 3 +1339 7 +2516 9 +650 1 +1764 2 +3082 10 +1718 5 +2034 7 +1360 4 +4023 7 +1123 6 +424 3 +1087 1 +1181 1 +2253 1 +531 2 +1485 6 +572 3 +3615 8 +839 2 +2062 2 +1142 8 +1175 5 +3997 2 +2481 3 +3086 5 +3060 4 +3474 1 +1045 1 +1009 8 +2648 3 +2472 8 +2130 3 +362 3 +1695 4 +3669 8 +3233 8 +1840 7 +3803 3 +3042 3 +882 10 +3123 1 +3752 8 +3475 2 +3648 4 +583 10 +1334 6 +612 6 +163 1 +3764 5 +1912 3 +1816 10 +2696 3 +842 6 +257 1 +4033 6 +3039 3 +2051 7 +1188 5 +2949 7 +255 9 +3385 1 +1189 2 +3189 9 +1669 2 +1227 2 +2908 7 +1812 8 +2435 4 +1842 9 +1452 2 +2649 6 +1876 6 +770 5 +2038 9 +3784 10 +1738 2 +2144 6 +214 4 +618 4 +539 2 +2360 6 +350 4 +307 4 +807 5 +1564 7 +3877 2 +3824 10 +1023 3 +2440 9 +2700 8 +2239 4 +2076 4 +3086 9 +3480 5 +2189 10 +3143 5 +3434 4 +2389 8 +3170 4 +1231 7 +1376 8 +554 7 +2525 10 +2580 8 +4069 5 +319 4 +1771 5 +2893 7 +3742 6 +1438 7 +1010 1 +726 6 +3146 9 +2214 7 +351 3 +2878 7 +1791 9 +1475 7 +1457 6 +2583 8 +1730 10 +116 9 +2972 6 +3886 9 +1110 6 +1906 10 +1406 8 +2044 2 +1333 1 +3736 5 +1384 10 +1298 3 +2877 3 +1274 4 +1711 5 +3467 9 +925 5 +504 1 +3689 6 +3026 4 +1071 3 +586 10 +2394 2 +315 2 +2946 7 +747 8 +51 4 +2317 3 +692 9 +3653 10 +3718 10 +2106 8 +3031 1 +1970 4 +1763 3 +3037 4 +1116 6 +1784 1 +3486 1 +551 2 +3451 8 +3809 2 +2572 5 +3576 1 +3229 1 +151 5 +723 3 +1748 9 +519 3 +2762 3 +2266 2 +121 7 +1905 10 +2294 9 +629 9 +2232 10 +1590 2 +2437 6 +1092 10 +1153 3 +2067 2 +1825 10 +1631 1 +103 1 +129 8 +2731 10 +1265 5 +2754 10 +3176 2 +2385 8 +1620 3 +444 4 +1231 7 +1496 1 +3681 10 +2951 3 +3148 10 +172 10 +1414 9 +3775 9 +2671 4 +697 1 +3632 5 +2440 5 +3099 2 +350 6 +3080 10 +1314 8 +2759 4 +2801 3 +3304 4 +2912 4 +2351 1 +940 6 +2725 2 +3543 9 +3971 3 +1649 3 +550 7 +125 1 +1696 9 +2743 8 +2277 1 +543 2 +1262 7 +550 7 +920 4 +2277 10 +2466 10 +2648 6 +2442 7 +1983 1 +1438 2 +2167 1 +2256 10 +183 6 +2832 8 +2037 1 +2829 7 +284 3 +138 8 +1758 1 +2109 8 +1146 5 +3817 10 +799 8 +325 4 +706 10 +1790 6 +445 2 +1734 6 +123 8 +2187 2 +1960 7 +75 2 +359 8 +802 5 +1384 3 +1140 4 +2396 5 +4087 7 +2680 7 +3182 8 +3436 6 +899 7 +1437 4 +1502 2 +2046 9 +452 9 +3709 5 +1733 9 +1547 2 +1729 10 +3826 7 +1387 8 +185 3 +513 9 +3068 10 +306 2 +1585 3 +1244 6 +977 1 +1751 8 +1350 7 +1112 8 +2683 2 +3677 6 +1196 2 +100 4 +4058 3 +897 6 +1915 7 +927 2 +480 2 +892 1 +3033 10 +2510 7 +2915 4 +1296 7 +2536 1 +255 6 +2584 1 +98 5 +1922 3 +1547 6 +3939 6 +3795 10 +3628 6 +2484 8 +661 3 +3160 1 +1991 2 +607 9 +1305 1 +1910 6 +3274 4 +2755 4 +2570 2 +2550 5 +3805 3 +3987 3 +1123 5 +1105 3 +3047 9 +3404 1 +684 8 +3036 5 +3368 8 +2208 1 +2049 1 +1761 1 +1416 10 +1559 2 +2246 5 +612 1 +92 10 +1815 5 +926 5 +1552 8 +438 8 +2828 7 +1502 9 +2894 7 +3200 4 +2227 9 +2483 7 +3918 5 +3274 3 +2318 6 +1762 2 +2416 1 +2081 6 +3583 6 +2357 8 +1319 2 +657 3 +4073 7 +1517 5 +3633 10 +1945 8 +2331 5 +3289 6 +763 5 +3895 6 +1698 3 +1658 3 +31 8 +2042 6 +2543 8 +413 8 +831 3 +2182 2 +3657 2 +3790 4 +2894 9 +1186 9 +3197 2 +1102 4 +1728 5 +689 8 +1189 6 +2347 1 +2034 9 +1046 8 +2342 3 +3731 8 +3407 5 +1307 4 +1156 5 +1946 5 +2779 8 +743 6 +334 8 +1101 9 +1831 4 +1158 8 +3068 2 +954 4 +3810 2 +467 7 +37 8 +339 1 +74 7 +2022 4 +419 1 +615 5 +1498 6 +548 10 +1759 2 +1873 2 +3670 4 +2614 9 +1278 1 +908 9 +1115 6 +2677 5 +1732 3 +3546 4 +3924 1 +2665 1 +1387 2 +3622 6 +1333 8 +1977 10 +4051 5 +2720 5 +2555 3 +607 6 +3498 4 +799 2 +3439 1 +1422 8 +3862 6 +959 1 +4029 2 +47 4 +2013 5 +3339 10 +2797 8 +3463 10 +1923 7 +2693 7 +276 5 +3223 2 +3887 6 +4060 1 +3765 3 +3480 6 +565 5 +3616 10 +3576 5 +2612 9 +4049 9 +762 5 +551 9 +1439 10 +2131 4 +544 10 +2124 7 +896 1 +163 4 +4021 5 +3887 4 +2329 4 +1714 8 +1209 5 +2238 5 +2096 10 +517 10 +2526 4 +2825 7 +2802 6 +3625 2 +255 6 +3419 7 +2404 9 +1538 6 +3235 2 +2416 9 +30 3 +3790 6 +977 10 +590 8 +535 9 +542 9 +553 3 +3670 5 +1373 6 +123 7 +735 9 +1218 9 +2397 8 +2703 9 +2846 9 +827 9 +491 1 +2986 10 +3797 3 +2170 2 +1397 3 +1185 2 +49 3 +1207 9 +3167 1 +466 7 +1659 4 +3479 9 +874 8 +3136 2 +1377 9 +879 2 +2961 4 +4020 10 +642 1 +2826 5 +3641 8 +3631 5 +1084 7 +324 8 +1660 6 +3774 10 +1663 6 +3907 1 +4027 1 +290 5 +963 6 +2344 7 +3325 9 +87 10 +1110 10 +1760 1 +825 9 +3647 9 +1213 5 +849 7 +1494 5 +3980 6 +922 8 +586 10 +1807 1 +3755 6 +2477 9 +302 5 +2174 9 +340 3 +2047 10 +1973 9 +3168 5 +2419 1 +3039 1 +4020 9 +2298 5 +1796 4 +3313 6 +542 4 +2913 2 +2069 4 +2407 1 +3566 7 +2190 10 +381 6 +2826 6 +2811 3 +305 2 +608 5 +3637 10 +617 2 +994 7 +1737 5 +761 4 +3223 2 +4070 3 +897 4 +2223 9 +2796 1 +2449 5 +1933 10 +450 9 +516 6 +1468 4 +2999 2 +3656 1 +3197 5 +2286 1 +3695 7 +3210 6 +2723 10 +930 2 +796 8 +2608 2 +3529 10 +2512 5 +3975 10 +1475 10 +1425 9 +2602 2 +2782 9 +1919 5 +1362 9 +214 3 +1476 4 +3714 4 +47 5 +1776 5 +714 5 +2815 2 +716 8 +1040 9 +415 1 +1683 5 +3396 1 +876 7 +2724 6 +1825 4 +2314 10 +3581 2 +2430 4 +282 6 +862 6 +2300 10 +2698 8 +3704 9 +1554 6 +939 10 +3315 9 +1561 3 +838 5 +2454 8 +2397 6 +1186 4 +1103 4 +2363 7 +698 5 +684 4 +3117 2 +2500 4 +3798 4 +4080 2 +2324 2 +739 6 +505 7 +2872 4 +476 7 +2891 10 +3213 10 +3634 4 +147 2 +282 3 +25 10 +2759 6 +465 1 +528 4 +2579 8 +2013 5 +3811 3 +694 4 +1180 6 +791 5 +3556 4 +3981 2 +3378 5 +3526 7 +2021 3 +2459 10 +3528 10 +3855 10 +3024 1 +3266 8 +1298 5 +2308 1 +236 7 +3047 5 +1001 7 +3633 3 +105 7 +2072 2 +2751 2 +1806 8 +4014 9 +720 4 +1813 1 +3026 4 +648 6 +2818 5 +1021 9 +1180 9 +1859 4 +1921 5 +1925 3 +477 5 +3051 9 +3474 4 +2718 10 +695 2 +2738 3 +181 9 +2138 4 +1474 9 +3440 10 +2442 10 +3753 7 +541 3 +1271 9 +2280 4 +1212 2 +3028 8 +3066 10 +3241 5 +1439 2 +3323 9 +3958 10 +2619 5 +4056 6 +3306 2 +2598 4 +1865 10 +300 8 +3693 7 +2055 4 +710 7 +2292 7 +3443 1 +498 8 +3295 3 +1591 1 +2208 6 +4032 7 +1800 5 +352 3 +780 2 +1835 2 +65 9 +956 2 +2303 3 +1494 8 +2362 7 +272 3 +2916 3 +2190 1 +633 4 +1862 2 +806 8 +3214 7 +15 10 +789 3 +1854 9 +575 2 +1241 8 +3633 7 +2771 7 +1776 7 +2664 1 +2994 9 +1300 8 +2878 4 +1185 9 +3652 2 +990 3 +205 5 +3316 5 +3237 9 +2604 4 +441 2 +241 8 +805 4 +3357 5 +1179 8 +2796 10 +3949 2 +530 9 +2938 6 +165 8 +3716 1 +3697 6 +3085 1 +29 8 +2242 9 +1622 9 +877 9 +1876 8 +329 2 +508 6 +3600 10 +1514 7 +3301 10 +1829 1 +2099 3 +2960 3 +3851 6 +1275 9 +2714 6 +2747 7 +294 10 +1226 5 +3453 2 +3326 8 +263 2 +2873 10 +3305 2 +417 10 +141 4 +1773 6 +3875 7 +2042 6 +2796 10 +1964 8 +2719 4 +2902 3 +2893 7 +239 10 +344 6 +2385 8 +472 9 +239 5 +2319 5 +2847 2 +2649 8 +3116 1 +347 6 +1848 3 +3705 4 +3340 8 +751 3 +695 9 +1393 7 +2153 1 +2148 1 +1848 4 +659 4 +2177 1 +2038 10 +2754 5 +1465 9 +3122 5 +2960 3 +1113 2 +3649 3 +3225 6 +2647 9 +1474 6 +2094 4 +740 1 +2325 5 +1224 7 +3048 4 +457 6 +2720 4 +3779 6 +2298 10 +1805 7 +1752 7 +3417 3 +3801 4 +2776 4 +2012 6 +3307 3 +2844 7 +2872 1 +957 4 +2252 4 +3174 9 +3675 1 +2599 8 +2037 6 +3173 9 +3304 3 +3000 5 +696 1 +3583 10 +2956 5 +899 7 +1427 7 +2211 10 +3065 2 +3351 3 +797 10 +1283 7 +120 8 +3194 3 +729 4 +2692 10 +3422 7 +2526 8 +3354 9 +790 3 +259 1 +55 3 +505 5 +68 8 +540 1 +3416 8 +3584 3 +1268 7 +729 2 +1840 3 +2573 2 +3843 3 +3823 9 +2592 8 +3453 4 +2886 3 +1236 5 +1562 6 +2156 7 +613 4 +2763 7 +912 1 +585 7 +2341 2 +754 7 +1028 1 +2006 4 +1767 5 +1965 3 +3078 8 +3587 2 +1418 2 +1086 9 +2082 9 +1415 7 +790 5 +3031 5 +1441 10 +3496 7 +966 1 +3562 5 +2816 8 +938 3 +2216 1 +1150 5 +1925 1 +1068 6 +2860 2 +1014 7 +469 6 +2987 5 +473 1 +3009 9 +917 10 +2700 10 +3394 3 +2324 8 +736 7 +2990 7 +3043 9 +896 8 +1146 1 +1360 10 +3906 6 +348 7 +3786 8 +1004 5 +2974 3 +558 9 +2854 5 +2777 7 +173 7 +3332 4 +450 7 +2464 9 +1195 9 +3235 7 +3336 2 +2254 6 +818 3 +3798 1 +810 8 +3606 2 +3025 6 +778 6 +977 7 +3549 2 +4015 3 +2736 7 +3550 6 +3889 4 +2921 5 +3176 3 +4 6 +1305 4 +4040 1 +1225 1 +3314 6 +1222 9 +1197 8 +3932 5 +3991 6 +493 7 +2644 7 +241 8 +562 5 +1097 3 +2632 6 +2480 3 +3129 5 +3096 2 +3585 8 +655 9 +2600 6 +491 4 +2467 2 +495 9 +3969 6 +3467 10 +45 2 +602 3 +763 6 +1876 10 +1188 8 +3089 4 +2316 1 +3761 10 +228 1 +3596 9 +215 1 +546 2 +1716 7 +1940 4 +2585 2 +1780 7 +262 5 +2560 7 +1845 6 +86 1 +1080 4 +1350 10 +606 9 +1391 7 +2634 8 +127 10 +2256 3 +2794 9 +2617 3 +1509 10 +2103 6 +1893 2 +238 5 +135 10 +3003 2 +2917 10 +3425 2 +2607 2 +2136 3 +2216 5 +1414 2 +1484 9 +3474 6 +3871 3 +78 1 +1613 4 +892 10 +3655 9 +3129 6 +832 9 +2100 5 +4092 3 +2112 1 +2649 2 +676 10 +3347 10 +424 9 +860 4 +3666 4 +1185 1 +1872 1 +1811 10 +213 9 +144 4 +3984 8 +3748 4 +1716 2 +2523 9 +482 4 +2002 1 +1501 6 +3333 5 +1641 9 +1867 7 +3138 10 +330 4 +3154 7 +710 3 +2139 1 +3269 3 +1694 3 +1437 5 +2333 4 +3433 4 +4 5 +2452 7 +3848 10 +944 7 +1822 7 +4025 6 +3936 9 +1309 10 +1496 1 +3341 6 +1435 3 +803 6 +3276 4 +971 8 +774 3 +2286 8 +1316 10 +3276 1 +797 5 +503 3 +4020 8 +2517 1 +452 6 +2644 10 +2338 9 +3013 10 +997 5 +3485 3 +556 2 +1037 5 +3610 9 +211 4 +4015 10 +831 10 +1715 3 +1365 7 +1098 2 +487 10 +111 1 +2022 10 +3957 7 +1276 1 +3879 9 +3127 2 +1973 9 +3891 10 +2944 6 +3106 2 +3939 3 +386 7 +1665 10 +2078 9 +1125 10 +1577 7 +3543 7 +853 3 +3798 8 +3801 7 +3169 6 +2880 1 +1540 10 +1518 7 +2083 9 +1616 9 +2814 7 +1787 8 +3727 4 +3708 6 +2186 7 +1693 8 +1577 10 +2225 3 +2065 4 +1931 7 +1138 1 +3381 3 +2675 1 +1153 1 +1507 4 +347 7 +1773 9 +2601 3 +133 1 +3813 10 +3061 1 +163 7 +168 3 +2578 3 +4076 2 +734 2 +999 6 +1907 2 +3972 4 +493 10 +870 1 +1613 7 +2118 7 +1742 9 +1165 9 +2074 10 +3320 3 +874 1 +2703 3 +1014 8 +1310 5 +3038 8 +2369 9 +984 9 +3684 4 +961 4 +1278 2 +1791 3 +3968 9 +1462 6 +2801 4 +146 6 +3717 8 +3445 10 +941 4 +1709 8 +3112 2 +3192 7 +3353 4 +2564 8 +639 6 +2140 7 +4056 4 +854 4 +719 9 +780 10 +2091 7 +2748 7 +1123 6 +773 4 +2572 8 +3240 6 +3156 9 +1985 4 +2845 2 +3011 2 +1830 9 +2768 7 +1079 8 +23 5 +1702 8 +3920 7 +2925 6 +2318 7 +3833 2 +1659 4 +164 9 +455 1 +3237 6 +3397 7 +1751 9 +1247 8 +3951 8 +659 5 +2424 6 +894 8 +4082 7 +2904 10 +3148 5 +444 8 +331 3 +3653 4 +166 4 +1331 7 +2053 8 +3411 6 +1266 7 +3971 1 +67 10 +866 9 +479 2 +2452 3 +434 5 +1926 2 +2563 3 +2434 5 +2808 7 +3612 4 +509 1 +146 5 +112 5 +726 1 +5 7 +1767 9 +213 5 +2630 6 +914 3 +2248 3 +295 3 +3251 10 +3771 3 +2556 5 +3851 3 +1227 4 +1444 10 +2455 4 +3500 9 +2382 4 +3745 1 +4040 10 +239 2 +3552 2 +1812 1 +404 9 +879 6 +593 2 +2620 8 +960 9 +2935 5 +3247 10 +923 1 +3362 4 +2746 4 +563 3 +228 9 +3501 6 +699 6 +72 7 +2701 2 +1265 3 +350 3 +213 1 +3267 7 +2167 4 +2325 1 +2896 8 +3789 9 +1296 4 +2459 3 +3485 3 +3459 7 +2028 10 +3655 4 +1965 9 +1673 6 +1843 10 +3491 5 +1532 9 +2204 4 +1427 10 +2541 2 +1947 5 +3718 6 +1105 5 +2498 3 +3322 6 +1985 5 +434 5 +2948 5 +1763 9 +248 2 +1467 7 +1719 4 +263 7 +3514 8 +2057 4 +1461 6 +993 10 +417 4 +1400 7 +1956 8 +3824 6 +964 10 +3822 8 +3459 8 +3676 1 +2537 6 +2853 7 +3629 1 +2855 8 +1975 1 +1607 1 +855 5 +1423 7 +1692 7 +1080 3 +28 9 +86 4 +3955 8 +2773 6 +1108 7 +55 6 +3905 7 +3796 7 +3143 8 +3808 8 +1687 5 +2304 1 +1328 6 +1150 9 +323 10 +2591 9 +2083 8 +1145 9 +3254 7 +2660 6 +2134 6 +317 2 +3971 6 +268 4 +155 10 +1067 6 +2810 7 +3214 3 +717 2 +3692 4 +3479 2 +2901 5 +2943 6 +1958 2 +3965 10 +1896 2 +1538 1 +2294 5 +3815 7 +1433 8 +2680 7 +1012 9 +191 8 +238 8 +3300 5 +514 10 +1643 8 +3348 8 +3547 7 +2874 8 +3090 3 +305 1 +3842 6 +3085 4 +2127 10 +3843 10 +3473 7 +2005 8 +1809 5 +3217 1 +2968 1 +3422 7 +76 3 +3216 4 +1470 1 +3350 2 +221 2 +382 4 +1982 10 +244 1 +1795 1 +1951 8 +1818 4 +393 9 +1339 2 +442 8 +479 9 +2304 1 +1068 5 +827 7 +2639 6 +2554 6 +1999 6 +4078 6 +1905 10 +3957 3 +2424 7 +1143 5 +486 1 +2832 6 +157 10 +4082 9 +1143 10 +649 5 +2647 9 +3693 2 +3595 4 +1778 9 +2170 9 +3830 2 +259 10 +1417 2 +1061 1 +2146 10 +1642 9 +463 8 +2849 5 +2323 5 +3355 5 +2378 2 +990 8 +2692 10 +879 7 +1674 8 +261 7 +3914 10 +1842 2 +887 4 +4036 7 +227 8 +1592 6 +720 1 +1761 6 +1326 6 +2286 10 +386 9 +2863 6 +78 6 +3986 9 +307 10 +445 9 +3940 7 +529 5 +939 4 +1459 4 +966 4 +3798 9 +683 2 +1323 8 +313 10 +3093 6 +420 2 +1586 10 +1256 5 +1726 5 +1772 1 +1464 6 +3980 10 +2147 2 +3727 8 +641 8 +1577 10 +1207 8 +4035 4 +562 2 +2492 8 +72 4 +1535 6 +2706 1 +2845 9 +1676 4 +730 3 +1964 9 +3894 9 +2393 6 +2790 2 +869 7 +1139 10 +1784 1 +2365 7 +1750 1 +88 1 +3565 5 +1199 6 +2526 4 +3472 9 +1295 1 +2082 1 +2587 5 +3150 3 +1238 1 +2562 8 +3926 5 +2277 10 +1317 10 +764 4 +1292 3 +2153 3 +3582 2 +1921 9 +256 6 +1318 1 +1202 1 +177 4 +1154 9 +2986 9 +3936 6 +3273 5 +290 6 +1024 10 +2780 4 +3986 5 +516 10 +249 3 +2905 10 +2844 8 +2862 7 +2524 1 +2837 5 +3402 8 +3161 10 +2999 9 +2960 10 +3824 3 +2495 10 +1385 2 +1335 8 +813 10 +1090 5 +3901 7 +1055 6 +656 9 +2570 4 +3329 5 +569 9 +2055 2 +2018 5 +306 7 +323 3 +2866 5 +2095 1 +3068 4 +3174 3 +571 4 +1682 3 +1345 2 +2909 1 +656 9 +1484 4 +3164 2 +2571 10 +3966 10 +3340 10 +3728 8 +7 2 +2608 4 +2421 7 +2362 1 +3003 10 +3149 2 +903 4 +3827 6 +1493 7 +1841 2 +858 8 +1451 1 +3172 3 +1973 1 +3439 3 +2296 3 +1634 2 +2457 1 +532 10 +1046 2 +3357 2 +2972 8 +825 9 +3344 4 +3911 4 +1051 4 +574 7 +3352 3 +534 4 +3882 1 +2328 7 +517 7 +3393 4 +1929 2 +1767 10 +733 5 +2664 1 +1410 5 +444 6 +1540 6 +968 9 +2640 6 +1875 8 +1901 9 +3463 6 +3969 6 +351 2 +3927 9 +909 8 +1050 7 +2546 1 +3510 4 +249 1 +3123 6 +163 3 +549 1 +3607 10 +1638 7 +3195 6 +3973 1 +104 5 +3502 9 +3134 9 +2764 1 +2263 9 +3943 7 +52 3 +849 1 +1057 2 +1287 5 +3156 5 +1769 7 +3908 3 +1059 1 +1455 4 +2934 2 +25 1 +2676 4 +3981 6 +3527 7 +1243 4 +1259 2 +3833 8 +1258 3 +772 6 +1262 7 +1837 7 +3722 5 +1901 7 +3677 10 +613 2 +3232 3 +776 10 +1169 3 +2073 2 +839 2 +617 8 +1811 5 +3395 1 +1528 3 +1681 2 +2428 4 +1405 4 +3810 3 +3260 6 +3019 9 +844 1 +74 10 +102 10 +3149 9 +1048 10 +808 9 +36 2 +2902 6 +2605 5 +1523 2 +2765 6 +1940 6 +3654 5 +3120 9 +2253 5 +1651 5 +757 6 +1246 9 +3442 2 +1811 4 +213 3 +3163 8 +3938 9 +405 2 +3465 10 +2497 9 +3963 4 +2858 3 +2911 1 +2586 4 +4093 9 +283 9 +3429 5 +74 6 +2552 8 +837 6 +3303 5 +727 7 +3844 5 +3646 2 +1480 10 +190 7 +1495 9 +2341 7 +2280 5 +3956 4 +3860 5 +2735 2 +2861 7 +2927 1 +2012 5 +477 7 +99 5 +3191 4 +813 3 +3000 8 +3213 5 +1658 5 +450 8 +869 3 +3025 7 +1170 2 +3437 6 +3514 5 +2433 5 +1333 6 +2050 7 +949 8 +2985 1 +3727 5 +889 9 +1630 3 +3443 4 +737 7 +1991 8 +1580 5 +3192 2 +2548 1 +968 7 +151 2 +535 7 +3856 5 +1164 10 +411 8 +1538 1 +2929 5 +1978 2 +58 10 +844 4 +1501 9 +1059 5 +2496 7 +343 2 +2893 8 +3966 7 +2075 1 +3105 10 +756 8 +1687 1 +3754 4 +3947 3 +3306 10 +1523 7 +3955 9 +6 9 +1945 1 +1488 10 +2653 8 +3688 8 +2749 8 +3167 8 +79 2 +1526 2 +1585 7 +2095 5 +768 1 +1069 4 +3216 6 +1781 3 +2165 3 +2393 5 +1828 4 +2526 1 +1814 2 +1977 9 +313 2 +1930 7 +3803 3 +2629 5 +452 9 +353 8 +961 9 +880 1 +2662 7 +3725 4 +1329 5 +1008 10 +763 5 +2644 8 +4013 5 +2516 7 +3550 5 +3797 7 +833 6 +3309 4 +2095 2 +439 1 +3984 2 +2296 7 +2670 4 +3352 8 +1106 7 +2232 3 +3932 4 +3922 10 +1295 4 +1182 4 +594 9 +815 1 +527 3 +3211 10 +1929 9 +1906 5 +280 1 +464 5 +2700 1 +2133 9 +3273 8 +4053 2 +2384 2 +2509 5 +1247 1 +3745 2 +910 1 +1524 5 +2412 6 +1260 3 +3277 2 +2078 2 +1625 10 +3767 10 +1966 1 +2711 10 +2454 7 +196 1 +1331 5 +659 1 +118 2 +3428 1 +749 9 +826 2 +2708 6 +1021 5 +407 2 +149 7 +3176 3 +3310 3 +3951 1 +2425 5 +1010 6 +119 9 +2677 8 +3760 8 +3345 8 +2116 6 +1001 5 +730 2 +1085 1 +2347 7 +2704 7 +3235 4 +3178 9 +2172 6 +1846 5 +2144 3 +1166 6 +1492 6 +3283 5 +3655 8 +1124 2 +64 1 +212 2 +1912 1 +1218 9 +1051 1 +996 6 +3157 5 +3308 2 +1891 1 +1235 6 +937 1 +1820 1 +597 1 +3382 1 +1882 1 +4090 3 +1612 2 +1884 1 +1009 4 +2989 7 +196 4 +1635 8 +3632 4 +253 9 +2051 1 +1045 9 +2473 9 +2292 7 +936 5 +1725 4 +327 5 +665 8 +2335 7 +2937 9 +2483 6 +3251 4 +407 3 +1280 2 +3407 10 +3574 10 +3480 7 +238 4 +3999 6 +618 4 +3899 5 +1123 9 +1492 7 +2447 8 +1335 3 +826 9 +2229 4 +3643 10 +2979 5 +4025 1 +2136 2 +2100 2 +1338 8 +2546 7 +3854 10 +1368 9 +2271 5 +2977 5 +1645 2 +1515 9 +236 2 +2812 8 +175 9 +627 6 +2281 2 +1236 2 +36 10 +2909 3 +3086 8 +3846 1 +1818 1 +332 5 +2912 9 +869 9 +3898 4 +1285 3 +172 6 +812 2 +2672 10 +2888 7 +1850 3 +3499 8 +1832 1 +1431 8 +2801 1 +1080 10 +443 6 +3893 5 +185 3 +2316 6 +50 6 +3849 9 +1137 2 +2962 2 +4079 10 +4014 3 +1702 3 +4055 9 +971 7 +2515 7 +2299 3 +1150 9 +2989 9 +626 3 +1572 7 +2233 5 +1392 5 +1257 3 +415 4 +598 3 +109 2 +3403 6 +3411 9 +1894 4 +678 2 +461 6 +2764 10 +3142 5 +2613 8 +2219 3 +3810 8 +3510 9 +1744 7 +3700 8 +1986 2 +2106 3 +756 2 +2888 1 +1485 8 +1857 8 +1187 9 +355 7 +3227 3 +4019 3 +2485 8 +2139 9 +3517 3 +3665 10 +3618 2 +3358 1 +1591 9 +1886 4 +746 5 +1721 10 +2471 2 +3938 9 +2506 2 +257 6 +3861 3 +3588 4 +3619 4 +2627 4 +2528 10 +1881 1 +283 5 +108 5 +583 1 +852 9 +3783 10 +1538 10 +3931 5 +545 8 +3042 1 +1533 9 +1555 2 +3840 5 +1863 9 +530 7 +1842 10 +3966 10 +699 2 +3960 2 +1644 4 +2685 1 +1162 6 +1059 6 +3406 3 +2804 10 +1028 1 +3754 2 +3937 2 +4020 10 +2036 4 +1653 6 +449 10 +739 1 +2067 3 +681 3 +296 3 +1138 9 +474 7 +365 1 +1211 1 +2919 4 +223 8 +3724 7 +3490 6 +3310 4 +1509 2 +2406 5 +3450 9 +3188 3 +2492 4 +3031 3 +3321 1 +2125 10 +4070 4 +2449 6 +1269 7 +3132 1 +733 10 +120 3 +2009 7 +4013 5 +17 10 +2805 7 +2016 6 +3240 7 +2478 8 +3622 8 +1511 8 +1567 6 +2208 10 +2421 6 +3722 8 +2612 1 +1459 5 +863 6 +3812 10 +2234 2 +1230 7 +3000 2 +1165 9 +472 1 +2724 2 +18 1 +915 10 +3744 8 +2865 4 +520 1 +2297 9 +3469 8 +3748 6 +3301 8 +1674 6 +1260 4 +677 9 +2398 4 +3377 6 +1748 1 +2111 2 +2876 7 +3086 1 +1776 2 +3505 7 +2367 7 +3830 8 +3390 3 +2124 8 +3984 7 +2916 10 +2396 10 +1677 4 +3013 6 +3362 10 +1284 2 +3097 5 +2508 1 +2664 5 +721 5 +3878 2 +3689 9 +1648 7 +2708 4 +3937 7 +3415 10 +2761 3 +1848 4 +3019 4 +2555 3 +111 6 +3243 6 +3377 9 +1007 2 +3769 3 +75 1 +1195 8 +3968 1 +1205 5 +3756 2 +1689 7 +2596 7 +2909 10 +3807 2 +2871 6 +891 5 +3409 10 +1890 9 +3724 9 +3611 6 +1052 1 +1946 7 +1375 3 +3432 3 +3178 8 +2517 1 +3172 3 +3873 8 +1527 4 +1220 5 +85 6 +3112 7 +2539 6 +980 5 +1022 2 +1934 10 +58 10 +1859 7 +143 3 +706 2 +776 6 +4088 7 +2987 8 +1736 2 +501 7 +416 5 +3276 7 +77 7 +133 8 +3566 8 +3177 7 +587 3 +2859 10 +656 4 +2130 8 +2668 9 +1738 1 +2399 10 +2485 4 +3758 8 +1255 3 +2870 8 +3970 9 +2660 1 +2949 8 +582 10 +3207 2 +2460 6 +1037 2 +2300 8 +1438 4 +4064 2 +1513 3 +47 4 +494 6 +206 7 +1883 5 +1907 2 +736 4 +4037 3 +3008 7 +2975 10 +2136 4 +3351 1 +1895 4 +2824 5 +1546 8 +1755 6 +3513 5 +3462 5 +1907 3 +3329 5 +1296 3 +2762 9 +2642 9 +82 6 +2056 5 +3469 9 +605 6 +3834 3 +1662 8 +2204 5 +2231 7 +146 4 +2484 6 +3002 1 +1163 3 +624 6 +3993 2 +2431 3 +1430 9 +1017 7 +3450 2 +3416 3 +3215 4 +2245 4 +2873 1 +2984 9 +439 6 +1604 8 +2761 6 +3029 1 +3048 4 +1137 10 +1633 9 +227 4 +1271 2 +2495 4 +1169 9 +1108 9 +2174 10 +760 1 +1547 4 +3924 9 +604 7 +3079 6 +885 2 +2456 3 +1240 5 +1766 4 +1145 8 +2033 5 +243 9 +741 9 +1280 8 +268 7 +1348 10 +2468 6 +1947 2 +3334 6 +2374 8 +1100 3 +1003 2 +1812 1 +1689 7 +2109 2 +869 10 +2552 9 +2960 5 +2530 9 +1542 8 +136 1 +106 1 +3308 3 +3104 1 +618 8 +2468 3 +274 9 +2516 2 +2462 10 +167 9 +1544 3 +24 3 +3147 10 +1578 3 +1684 10 +1813 3 +5 1 +3684 4 +597 4 +14 4 +3326 2 +3728 1 +3867 3 +1580 3 +2587 10 +258 10 +669 1 +3150 1 +2015 7 +3335 4 +233 9 +2223 3 +1279 1 +2399 8 +1167 10 +764 6 +243 10 +3235 10 +2591 7 +1599 3 +359 9 +2827 4 +3682 1 +1980 8 +3899 7 +2449 1 +1698 3 +2179 4 +827 1 +725 10 +1837 9 +994 6 +1699 3 +2040 10 +1349 5 +3794 6 +1975 3 +899 9 +1515 2 +2600 10 +786 1 +1387 3 +2082 5 +3476 8 +821 6 +3768 2 +3541 4 +3394 9 +101 1 +1668 4 +3432 2 +1090 6 +2710 7 +2464 4 +783 4 +1648 6 +1163 6 +3060 8 +1299 8 +387 10 +3744 6 +86 1 +3529 8 +1085 2 +478 5 +2557 1 +1208 8 +3767 4 +3163 2 +3179 4 +2419 6 +4022 9 +945 6 +2826 7 +2412 3 +2783 7 +2515 10 +3818 5 +42 8 +2945 10 +659 2 +612 4 +2484 10 +507 4 +2027 10 +509 7 +1775 7 +219 10 +3733 5 +1724 3 +2606 3 +270 5 +3653 1 +446 9 +2719 7 +4095 9 +2103 8 +2007 6 +3257 7 +859 1 +3995 6 +2388 6 +1915 5 +3262 8 +2459 6 +2279 6 +3530 2 +2919 8 +2965 6 +34 2 +2017 5 +1253 7 +3971 4 +2495 7 +2716 9 +1389 4 +4077 1 +1104 4 +1028 4 +3428 5 +1546 4 +299 6 +3312 6 +1072 5 +2479 4 +2192 1 +2238 1 +3105 10 +1571 6 +1337 2 +2908 10 +1875 2 +1750 5 +401 7 +1336 1 +391 3 +2926 6 +1003 8 +4024 3 +3327 10 +1976 7 +83 7 +2317 4 +1943 9 +2391 3 +1602 2 +3199 10 +814 5 +1774 1 +3056 9 +3815 3 +1400 3 +646 5 +1686 4 +490 2 +2353 2 +277 3 +2074 9 +3402 8 +3429 9 +2517 5 +1931 5 +1980 8 +2791 1 +3549 2 +2698 4 +2777 6 +3019 4 +4079 9 +792 5 +1955 5 +3295 9 +1284 7 +3477 1 +1507 8 +1621 2 +392 2 +3275 7 +928 7 +2196 8 +303 3 +1769 5 +1724 1 +3960 1 +3209 6 +3037 1 +1241 10 +3146 4 +782 10 +2661 5 +2943 8 +3586 6 +340 9 +2135 7 +1911 9 +2699 6 +95 8 +3039 2 +2367 7 +2958 5 +3882 9 +3449 8 +243 10 +552 9 +2760 8 +2455 10 +3781 10 +947 3 +2362 3 +1366 5 +2659 1 +1249 6 +2635 6 +1623 7 +3472 1 +2849 5 +1229 4 +2687 10 +1355 2 +1584 9 +3270 2 +3116 9 +2472 2 +2153 9 +3907 7 +621 2 +3047 5 +3107 6 +3363 7 +3685 2 +1547 1 +979 7 +974 3 +2389 3 +2831 8 +2506 3 +1606 3 +1470 5 +1346 4 +591 10 +1795 1 +1332 7 +4040 6 +297 8 +1954 4 +3511 6 +1782 7 +1520 9 +2969 5 +2671 9 +3977 7 +2638 4 +3619 7 +786 3 +3257 3 +763 4 +2652 6 +155 10 +599 1 +3494 7 +552 3 +3219 3 +1255 3 +1876 6 +3060 1 +900 6 +2407 3 +1507 1 +792 8 +1845 10 +2111 2 +2230 5 +2385 6 +2740 6 +2447 9 +911 1 +2998 4 +1109 4 +869 5 +1662 6 +4048 9 +914 8 +2359 10 +216 6 +378 8 +2837 1 +3926 10 +3501 3 +3393 6 +4007 1 +1902 4 +3258 5 +538 4 +2889 5 +2581 8 +2136 9 +719 5 +2366 6 +1234 6 +2322 10 +3818 3 +1252 1 +1789 6 +1990 9 +2398 1 +1553 6 +2756 9 +473 1 +3485 7 +3505 6 +1284 3 +2581 5 +1242 6 +1747 1 +942 9 +1096 3 +1135 3 +1890 1 +1320 8 +1667 1 +1116 2 +3184 7 +939 9 +3598 3 +526 4 +1118 1 +1665 1 +1227 10 +1265 4 +3687 10 +2978 10 +2239 2 +815 3 +2967 7 +1659 8 +3103 4 +1883 1 +3833 8 +1532 7 +643 7 +617 3 +3370 8 +3932 8 +747 4 +2065 4 +3693 10 +2748 8 +2243 1 +1536 4 +3248 9 +1416 4 +3548 5 +2847 3 +2237 4 +1162 6 +3825 5 +2114 10 +3252 6 +1964 9 +3489 3 +562 8 +202 1 +1575 3 +200 2 +3315 9 +1280 3 +2739 3 +1078 7 +3897 2 +1554 5 +1255 7 +1343 4 +1977 5 +1749 6 +2750 3 +2046 9 +3983 10 +3405 7 +922 5 +1938 2 +3494 6 +3635 2 +3980 3 +2397 8 +3953 1 +60 7 +1387 6 +362 3 +2219 2 +1653 10 +1805 10 +3002 8 +2108 6 +3855 7 +171 5 +3775 4 +1388 4 +709 4 +699 10 +1828 5 +3516 8 +312 4 +2154 10 +2842 7 +1965 3 +1431 9 +1067 8 +3379 9 +3313 4 +1866 9 +886 3 +3556 9 +3018 10 +179 8 +3483 6 +2181 7 +265 8 +2894 4 +760 9 +112 6 +2990 9 +850 4 +2042 10 +3815 5 +2783 10 +675 3 +2881 8 +677 6 +1226 4 +3428 5 +359 5 +579 3 +1254 6 +1816 6 +570 7 +3744 4 +44 9 +3334 5 +3261 8 +1909 6 +2931 1 +1659 3 +492 7 +1073 4 +887 2 +841 2 +2602 2 +3509 8 +603 10 +1714 4 +1821 3 +809 9 +224 1 +3666 3 +3812 6 +3970 3 +3649 6 +50 3 +3019 4 +337 5 +2172 7 +1856 3 +3381 3 +2345 5 +2569 8 +1495 1 +143 4 +822 3 +1152 4 +325 6 +1158 4 +969 1 +2245 7 +4003 2 +1184 8 +1384 7 +2700 5 +638 2 +2678 5 +318 9 +285 4 +266 6 +4054 4 +2122 6 +2459 1 +3677 8 +2581 6 +1368 8 +2160 3 +3780 4 +620 1 +2793 4 +457 7 +3707 3 +857 5 +2506 9 +99 10 +1180 9 +2180 9 +1890 7 +4050 3 +1183 5 +2802 1 +3624 2 +1006 6 +2492 5 +1166 5 +3142 7 +543 7 +2801 3 +2949 10 +1413 8 +2872 6 +2388 10 +1403 6 +2665 8 +2479 9 +3318 7 +110 8 +3980 6 +738 10 +3142 10 +1171 10 +790 10 +3130 10 +964 8 +606 8 +2039 1 +3452 8 +1297 5 +3460 2 +3782 6 +1166 9 +4016 10 +2143 1 +4041 10 +2028 9 +3978 6 +3559 7 +1250 5 +2541 9 +2820 7 +1870 1 +560 3 +819 7 +1609 7 +2502 9 +390 10 +1708 3 +118 1 +521 6 +3816 6 +3859 4 +1345 6 +2919 9 +2643 7 +1412 5 +1989 10 +2703 6 +2515 1 +2868 3 +3693 7 +455 7 +1093 2 +2679 2 +2363 9 +3000 1 +2765 5 +290 7 +1684 7 +3626 6 +3971 5 +1148 6 +2333 3 +747 9 +2110 2 +3879 6 +2762 9 +2628 2 +1588 3 +1640 5 +2527 8 +1003 6 +3761 8 +2203 1 +941 5 +1764 10 +1998 7 +1486 5 +1778 9 +1418 6 +337 1 +3546 9 +362 5 +2899 7 +3449 4 +3803 8 +950 7 +1249 8 +2378 9 +99 10 +1556 4 +2744 2 +3619 2 +2238 9 +3069 9 +3224 7 +1837 7 +2342 1 +1946 9 +4086 6 +1742 9 +1820 1 +1183 10 +1308 4 +3928 6 +1287 9 +3580 8 +44 1 +2977 6 +1350 9 +1425 7 +1066 2 +2408 9 +1575 2 +2153 5 +3102 4 +135 9 +2758 3 +3540 9 +2125 2 +3796 5 +1795 4 +2676 8 +2096 10 +1415 9 +1715 7 +698 4 +3273 7 +1510 4 +2942 7 +2997 9 +2941 7 +2202 3 +4062 10 +590 1 +3500 5 +627 10 +2489 2 +581 3 +1042 8 +1675 6 +43 7 +131 2 +3194 2 +819 4 +1607 7 +3809 1 +2648 8 +3470 2 +2942 9 +2001 5 +1924 7 +3722 5 +222 9 +3344 5 +3909 2 +2361 2 +2594 4 +1451 9 +3194 6 +1582 4 +120 9 +2885 1 +2690 5 +1055 3 +2236 2 +3249 2 +1360 7 +2533 9 +1395 8 +3741 7 +1236 4 +2317 1 +1469 10 +3676 5 +1420 7 +1500 5 +2717 6 +2934 2 +2777 9 +1271 6 +1889 1 +1360 4 +1969 2 +1 8 +1097 2 +285 2 +3900 9 +98 6 +2889 8 +1734 3 +1370 4 +3999 2 +2008 2 +3511 9 +978 6 +3747 7 +1106 8 +804 3 +2414 6 +1744 1 +3141 4 +2357 5 +2289 7 +628 6 +2054 3 +1367 8 +1695 2 +4061 4 +1786 5 +3531 7 +33 5 +742 1 +2882 7 +2326 9 +3730 8 +2581 5 +309 10 +1523 5 +2461 9 +1090 10 +245 9 +1961 8 +3826 4 +54 4 +1745 10 +505 6 +2734 4 +2879 7 +1429 9 +1780 10 +3763 8 +2085 2 +3185 5 +2030 3 +2534 4 +919 4 +2008 5 +2816 5 +27 10 +416 3 +2021 5 +243 10 +40 8 +2354 7 +1027 7 +4095 2 +2714 8 +470 10 +588 4 +772 2 +3791 7 +3294 5 +835 5 +449 8 +3746 3 +3762 4 +1143 1 +3125 7 +3422 8 +1590 4 +685 9 +4014 7 +1522 1 +2477 1 +214 7 +1584 3 +519 8 +906 5 +1375 1 +1575 2 +893 9 +3991 2 +4075 3 +2622 7 +153 7 +3756 6 +3697 7 +1795 10 +595 8 +629 9 +2880 9 +1810 5 +588 8 +2662 2 +1139 10 +569 5 +1782 2 +3787 7 +3767 1 +1391 3 +627 8 +2146 8 +2783 6 +2053 9 +1052 3 +1296 7 +634 10 +705 6 +2795 4 +2854 2 +1760 1 +3363 10 +1466 5 +56 5 +851 1 +2764 7 +1497 3 +1736 5 +1941 6 +2446 10 +241 2 +229 10 +3804 6 +3108 5 +1487 9 +3061 1 +858 5 +2141 9 +2349 4 +3767 9 +1256 4 +1550 6 +3940 3 +1370 8 +1105 10 +3710 8 +1315 6 +2278 9 +997 2 +214 7 +2548 6 +2822 7 +1375 9 +2782 7 +3766 9 +581 7 +876 5 +3832 4 +2883 5 +2986 7 +4065 7 +3648 8 +145 1 +1937 4 +4011 3 +1086 10 +3544 8 +1886 10 +237 7 +3133 2 +364 3 +819 1 +781 5 +2542 5 +2604 7 +2559 6 +3899 10 +3298 2 +966 5 +395 9 +3784 1 +4078 8 +2710 3 +4042 7 +3175 10 +2684 9 +3774 7 +383 2 +3091 6 +4046 1 +3959 8 +3781 1 +2175 6 +740 6 +411 5 +1898 6 +2382 8 +547 8 +3019 3 +523 6 +283 9 +3178 3 +1883 7 +2690 1 +3197 8 +1920 4 +146 7 +3725 7 +1329 2 +917 9 +1706 7 +3474 6 +1181 6 +2814 4 +3708 7 +1462 4 +878 7 +269 4 +3182 2 +2670 3 +2691 10 +2122 9 +2636 7 +1210 10 +3383 4 +1149 2 +653 3 +1396 1 +2248 5 +3643 1 +1201 2 +2968 5 +2970 8 +175 5 +1271 10 +2576 10 +2053 1 +1152 4 +2494 4 +1518 8 +3679 3 +41 9 +948 3 +3693 10 +140 9 +1344 2 +4017 4 +1112 4 +1346 7 +715 6 +2235 3 +775 5 +3889 4 +366 5 +1064 2 +890 10 +2363 3 +3281 4 +1309 10 +3842 9 +2127 1 +1367 5 +1636 1 +3201 9 +823 4 +708 9 +1983 9 +1512 3 +2129 2 +501 7 +1491 6 +3694 4 +2763 10 +2142 8 +4078 10 +3497 3 +880 2 +2604 7 +3884 5 +336 2 +2806 2 +1601 10 +1318 8 +189 1 +3017 6 +2059 10 +53 9 +340 1 +804 1 +508 9 +2675 10 +2330 8 +3161 10 +2351 5 +1687 1 +1371 3 +2029 5 +2386 6 +131 3 +986 10 +666 5 +2479 2 +3762 3 +1889 6 +120 8 +171 10 +2181 7 +2300 7 +1117 2 +3836 3 +1859 9 +2446 2 +842 5 +2529 2 +1749 4 +1705 8 +757 7 +664 6 +3193 2 +82 3 +1006 9 +2332 1 +3011 5 +4090 7 +2689 7 +1373 4 +2161 4 +3314 10 +1193 5 +1015 8 +2770 7 +2225 6 +621 5 +128 4 +137 7 +2432 6 +2231 2 +2693 3 +1964 9 +654 4 +943 10 +995 8 +2439 7 +2169 6 +662 5 +832 7 +1131 1 +1045 5 +3220 9 +506 2 +2067 4 +915 7 +658 6 +3416 6 +1950 8 +2760 3 +2297 7 +4051 10 +1467 1 +2248 2 +2795 2 +1615 9 +772 4 +3245 4 +288 7 +834 5 +3795 4 +2689 2 +2726 3 +2606 10 +1767 5 +72 10 +2680 4 +2656 4 +2325 6 +3643 8 +3035 8 +1738 8 +684 3 +3832 8 +3728 1 +2275 10 +3107 10 +685 3 +3846 4 +1982 3 +1690 1 +2032 6 +1210 8 +1781 2 +3382 2 +2247 1 +690 1 +3735 1 +1611 1 +2958 1 +3543 2 +3484 7 +3383 4 +1395 1 +2098 9 +1474 5 +89 6 +509 7 +1644 4 +600 8 +462 10 +59 4 +946 6 +2324 6 +2871 1 +996 6 +1638 2 +323 7 +3103 6 +2134 7 +1541 5 +2401 4 +1727 6 +3397 8 +1731 5 +3671 1 +3651 8 +3635 6 +2892 2 +2833 8 +2641 8 +3525 2 +3738 2 +3686 10 +3111 2 +278 4 +1384 10 +548 3 +3772 7 +3536 6 +481 5 +3748 10 +4052 7 +572 7 +2653 7 +3797 4 +3867 10 +1799 1 +2206 3 +1947 4 +870 4 +1611 6 +2400 6 +438 10 +2292 2 +2975 2 +2863 3 +3747 10 +3738 2 +1865 4 +2427 6 +3084 6 +4044 4 +1387 6 +3262 1 +693 7 +1125 10 +797 5 +1355 9 +957 6 +3781 10 +2182 1 +1077 10 +70 9 +930 7 +3118 5 +1067 2 +926 7 +3068 5 +2984 3 +2713 7 +3882 1 +3359 4 +2119 6 +692 10 +3093 10 +3144 3 +1783 10 +2775 8 +732 5 +2138 4 +291 5 +830 8 +3752 5 +3154 7 +613 10 +1945 10 +1703 7 +3138 4 +3954 8 +3963 5 +1989 6 +3506 2 +2544 8 +556 8 +3623 6 +1378 1 +1324 9 +21 6 +164 10 +1064 8 +1277 5 +3024 2 +3754 8 +2917 2 +3126 10 +2715 9 +50 3 +495 2 +2961 1 +921 3 +2361 7 +43 8 +2014 10 +568 3 +2542 1 +1475 2 +2515 10 +2829 4 +672 9 +3836 1 +607 4 +744 8 +2107 7 +3118 8 +885 10 +800 10 +1649 8 +772 2 +3713 10 +2800 7 +1421 9 +2111 1 +367 3 +1137 4 +2645 10 +1226 4 +1095 8 +3364 10 +2810 8 +3614 8 +767 4 +3589 1 +340 5 +2647 5 +3762 7 +2526 6 +844 2 +2353 10 +1499 2 +1824 8 +4043 8 +1580 9 +2023 8 +581 7 +2697 9 +3806 7 +3330 2 +2796 4 +106 7 +1667 6 +3121 9 +491 8 +1080 6 +959 9 +961 2 +3875 9 +1256 4 +2327 2 +3024 5 +3579 8 +635 1 +4051 8 +364 9 +737 5 +1404 5 +3039 4 +1559 5 +3169 3 +3517 6 +2128 4 +3883 4 +1955 1 +983 4 +1682 3 +2348 3 +445 9 +949 1 +1529 1 +3623 8 +836 1 +464 6 +2192 2 +3156 2 +2592 10 +648 6 +763 6 +1012 3 +3458 1 +3242 4 +2700 1 +3724 10 +3058 10 +432 3 +2621 10 +1386 1 +3954 8 +713 3 +3324 5 +3680 9 +3210 9 +3257 4 +2281 4 +2674 1 +3355 8 +3129 1 +1323 3 +3924 10 +337 6 +1993 4 +1410 5 +3095 2 +3873 10 +3867 4 +3841 9 +1699 9 +2316 7 +2441 1 +1398 5 +1372 8 +3995 4 +2726 7 +861 9 +1707 7 +3939 10 +776 1 +1101 9 +2112 6 +2337 1 +1129 3 +109 1 +2993 5 +1271 7 +1855 5 +1510 6 +2564 9 +1272 1 +2118 5 +746 5 +598 6 +1460 1 +3752 4 +2891 2 +841 9 +2446 10 +1140 10 +540 1 +3855 5 +2087 4 +2580 2 +1335 2 +1649 7 +4039 5 +3382 10 +477 10 +1215 5 +2158 1 +1163 9 +614 8 +3517 3 +2429 3 +3744 5 +342 9 +3027 1 +2399 6 +3211 4 +917 8 +513 10 +1910 1 +2413 1 +25 9 +605 7 +689 7 +273 9 +2299 8 +720 2 +1356 4 +1476 8 +3038 8 +1046 1 +638 5 +3954 7 +3113 5 +2904 3 +2826 7 +366 2 +1060 1 +3101 4 +3623 1 +1046 6 +1648 8 +2319 10 +2580 7 +2740 7 +2132 8 +77 8 +1541 7 +431 1 +2630 6 +1872 7 +1048 9 +3717 2 +1889 7 +2224 4 +1570 1 +3920 3 +2350 1 +1044 10 +873 9 +1551 10 +3882 1 +2499 2 +2603 6 +2066 10 +843 8 +3173 1 +2002 7 +1935 3 +1349 6 +2279 9 +3933 8 +4052 6 +1380 3 +3553 5 +3262 1 +1718 10 +2127 5 +3522 8 +218 2 +3081 6 +2212 8 +1414 4 +217 6 +3319 10 +4093 9 +60 2 +1841 9 +2929 3 +465 2 +3793 6 +1815 6 +3592 3 +649 1 +1222 9 +3654 9 +1126 9 +1883 3 +2413 6 +3066 10 +784 2 +403 6 +2530 8 +2289 4 +3445 1 +3904 1 +1237 6 +1146 4 +2062 10 +1773 2 +483 7 +389 10 +3913 9 +3234 4 +2214 5 +3441 4 +3730 3 +1729 5 +20 3 +1698 6 +2113 1 +2545 9 +1532 9 +3480 2 +3967 10 +202 1 +3625 10 +906 1 +2771 9 +1194 4 +3498 10 +2758 2 +1058 1 +2733 3 +1833 8 +3340 8 +2451 3 +597 8 +3409 9 +3920 5 +2411 4 +3056 6 +181 9 +2203 2 +893 2 +2040 9 +1594 1 +1887 6 +2415 7 +2968 8 +1160 4 +54 1 +2270 7 +3801 8 +3270 4 +2981 6 +1512 6 +2325 6 +2816 3 +3150 7 +4048 2 +2805 8 +3535 7 +3593 10 +3500 10 +3281 2 +1360 10 +3597 4 +2721 4 +343 10 +258 4 +1255 9 +1939 8 +2824 1 +1726 3 +3129 7 +2754 1 +1694 2 +1952 8 +4061 2 +89 3 +1078 9 +2207 1 +771 4 +3160 6 +2529 10 +1275 10 +3569 7 +2421 7 +1878 8 +221 5 +2530 5 +2271 10 +1718 5 +2790 3 +1023 5 +1544 8 +1108 5 +191 1 +2823 8 +3379 8 +289 2 +1688 5 +4060 3 +2126 9 +3701 2 +1720 5 +2772 6 +3700 2 +2136 8 +1689 7 +3815 4 +224 2 +1875 2 +2927 5 +1911 7 +1582 3 +1257 5 +434 1 +457 1 +2981 4 +198 8 +3030 3 +3133 9 +2475 8 +3167 4 +690 6 +1754 8 +2109 6 +35 1 +3007 6 +1491 7 +2420 3 +2540 1 +3714 3 +1454 4 +2217 8 +2945 8 +3523 3 +2892 4 +2897 6 +1730 5 +4003 4 +2276 8 +3587 7 +3226 6 +0 7 +916 4 +903 8 +3079 5 +1591 3 +1633 1 +1316 2 +3577 8 +2644 7 +893 2 +77 4 +140 7 +2672 6 +1022 6 +1499 2 +1639 10 +1104 1 +737 2 +1403 8 +159 2 +1386 9 +1607 8 +277 10 +2007 7 +1950 3 +6 7 +3642 10 +897 2 +2337 8 +2005 9 +1552 10 +2996 9 +2807 3 +3706 3 +2722 7 +738 3 +3131 4 +769 5 +839 9 +579 3 +376 3 +127 7 +2292 9 +2064 6 +293 3 +2664 7 +3159 1 +1316 3 +3741 10 +2200 2 +3235 8 +1615 6 +3673 2 +2027 1 +2041 3 +2158 9 +502 3 +3259 6 +2920 9 +2991 9 +750 5 +595 10 +77 4 +3058 10 +21 2 +2507 2 +1414 7 +2714 7 +2649 1 +2054 8 +2386 10 +2074 4 +3972 9 +1599 3 +984 3 +910 2 +1353 7 +103 8 +1232 2 +1963 3 +3550 5 +1089 3 +83 8 +2172 8 +2716 8 +2012 4 +3828 4 +3398 8 +60 3 +3319 5 +258 3 +2440 10 +1001 6 +1323 7 +3974 5 +3416 9 +2292 3 +3393 7 +3653 10 +826 8 +165 2 +2911 3 +2145 10 +3586 7 +2063 1 +3343 4 +429 1 +1006 10 +3920 10 +3762 8 +3335 5 +911 2 +2266 7 +3226 4 +3291 8 +2664 9 +2491 5 +3306 8 +3442 1 +1825 10 +640 6 +1598 8 +3616 1 +3793 3 +2566 10 +1866 7 +2764 6 +2351 7 +1548 9 +322 4 +2280 1 +3559 8 +1545 9 +3684 3 +1570 7 +3097 8 +858 6 +3959 6 +1860 1 +2740 2 +1148 9 +3830 1 +2356 8 +2609 4 +1264 2 +3457 5 +413 5 +327 4 +1687 1 +749 1 +1883 9 +1180 10 +337 5 +498 7 +28 9 +1865 1 +3618 1 +1249 9 +1827 7 +1126 6 +725 5 +3055 5 +1678 4 +803 4 +1274 4 +892 1 +1335 1 +17 1 +2755 8 +1539 8 +263 10 +3628 2 +1536 6 +3625 2 +2750 8 +1723 3 +754 2 +1215 1 +2468 5 +1915 7 +2581 5 +2083 2 +2500 6 +1408 1 +3553 7 +491 7 +2703 10 +3716 10 +2080 6 +3910 6 +2597 1 +2884 10 +2393 2 +3050 6 +353 3 +2432 3 +1449 8 +1730 9 +3401 8 +2603 3 +3666 5 +3757 7 +3451 6 +2631 3 +3513 4 +2051 3 +249 6 +100 8 +3249 1 +2676 8 +3349 2 +595 9 +260 3 +1321 2 +613 1 +609 6 +733 9 +3565 3 +2844 5 +1077 4 +1335 5 +56 6 +1635 1 +749 8 +3556 7 +3628 10 +707 3 +1128 9 +4037 8 +2115 9 +500 9 +205 7 +3402 6 +3212 4 +2871 5 +3626 10 +2295 1 +1035 10 +576 2 +804 6 +3995 8 +444 1 +172 7 +426 3 +2358 6 +790 2 +354 6 +707 6 +821 5 +3885 2 +1713 8 +3784 6 +3039 8 +558 8 +3662 4 +1602 6 +3633 5 +3148 7 +2544 1 +2897 2 +1868 8 +2020 5 +4075 9 +3637 8 +3963 6 +2467 10 +2682 8 +86 8 +3390 2 +3339 4 +1037 7 +89 10 +2146 5 +1745 6 +3121 6 +2060 1 +3569 6 +357 4 +1103 1 +111 1 +3413 2 +1193 8 +563 5 +2826 8 +3744 7 +375 6 +1013 6 +1568 6 +2868 3 +1608 7 +1941 10 +968 6 +3423 8 +2918 7 +1782 6 +2209 1 +167 1 +2760 8 +1729 10 +1217 4 +2875 2 +2347 7 +1611 7 +2309 3 +119 2 +723 10 +3352 8 +4079 1 +1694 7 +1800 7 +2296 4 +2053 3 +2343 1 +3538 8 +1106 1 +240 3 +3200 10 +538 6 +2704 9 +3566 4 +3644 7 +3603 3 +2059 10 +1172 1 +3726 8 +2693 2 +1746 9 +220 3 +1058 1 +2733 5 +346 1 +3561 8 +2016 7 +1905 7 +1291 2 +794 3 +2621 1 +2879 7 +1422 8 +3040 5 +966 6 +346 3 +4074 8 +3107 3 +250 6 +1903 7 +1823 7 +1941 3 +1193 8 +656 3 +3856 10 +3578 9 +1671 3 +1408 1 +3973 4 +1335 5 +2952 3 +3572 1 +567 7 +2517 3 +3453 4 +3350 10 +2637 9 +3576 9 +3449 2 +1793 1 +3411 3 +2143 7 +1627 10 +1174 7 +342 6 +850 5 +2827 5 +1367 3 +2783 8 +364 3 +1103 6 +3247 3 +2149 3 +2201 3 +2631 5 +4090 3 +3626 7 +1042 2 +972 6 +1913 4 +143 10 +2251 5 +1762 5 +2310 4 +2592 8 +1443 4 +1123 3 +716 9 +3583 7 +2524 5 +3119 10 +23 9 +1015 6 +3945 4 +4069 3 +3508 4 +3067 9 +693 1 +262 8 +2509 6 +2148 6 +2193 9 +1492 6 +2472 6 +895 8 +2772 10 +400 2 +786 2 +2090 1 +2208 3 +479 9 +356 4 +2267 8 +1695 8 +792 9 +2903 5 +3171 6 +1243 9 +2349 2 +3895 10 +491 10 +3972 7 +713 2 +3522 4 +2937 8 +1718 7 +1770 8 +807 5 +3955 1 +270 9 +2996 1 +647 5 +1867 7 +3583 1 +3476 10 +1452 10 +3630 10 +1799 4 +711 10 +1450 10 +3530 6 +635 10 +2022 7 +495 8 +3385 10 +1741 7 +1236 2 +643 5 +4066 5 +3752 6 +875 9 +3582 1 +2377 7 +2184 10 +3864 5 +1079 3 +3044 10 +3813 2 +3966 9 +629 5 +2299 6 +1582 2 +1480 5 +3984 1 +1689 1 +1082 2 +3190 8 +646 1 +1348 5 +589 7 +1961 2 +2145 6 +1364 2 +2677 1 +188 1 +2570 4 +2986 4 +3024 1 +223 2 +3571 7 +2605 6 +919 8 +479 6 +1142 2 +199 8 +2507 4 +3255 9 +1555 3 +1862 4 +1569 8 +4003 2 +969 8 +2461 5 +678 7 +1907 9 +1502 3 +2990 4 +1026 5 +3877 6 +2041 9 +3876 1 +301 7 +432 3 +2509 6 +928 6 +1432 2 +3112 6 +3095 4 +1818 5 +2142 5 +3986 1 +32 7 +1349 4 +2403 1 +1060 5 +1802 7 +555 9 +4018 4 +3565 7 +2161 10 +2194 4 +3926 7 +2095 9 +3729 3 +217 7 +352 10 +2548 8 +3679 6 +2553 3 +1063 10 +1533 2 +3447 6 +1027 10 +712 6 +1118 5 +3083 10 +873 9 +3612 4 +2727 5 +729 1 +1895 3 +2700 9 +4078 6 +4089 7 +3265 7 +1583 1 +3546 5 +2689 2 +1640 4 +3344 7 +134 4 +3114 3 +2242 4 +2980 1 +1594 4 +3626 4 +3225 8 +3137 4 +1634 1 +2588 7 +3933 1 +844 1 +1466 2 +3288 9 +3192 1 +1987 2 +2357 6 +16 5 +2817 10 +128 2 +1160 10 +2992 10 +2502 5 +3972 9 +2395 3 +1275 7 +625 4 +907 2 +2265 7 +3172 4 +3225 7 +77 3 +2146 2 +2817 2 +3845 9 +3691 8 +600 8 +611 2 +417 7 +2645 10 +75 9 +711 1 +564 4 +151 4 +3541 8 +3038 3 +3912 9 +3342 3 +9 9 +1553 10 +3576 6 +1170 4 +98 6 +2700 6 +3086 3 +2591 2 +413 7 +2521 8 +3016 7 +1118 7 +4000 10 +1018 8 +722 10 +3952 1 +3646 7 +3920 4 +448 7 +3415 2 +2990 2 +984 5 +1211 5 +1659 8 +1928 3 +1319 2 +774 3 +3266 8 +752 7 +2279 10 +2854 6 +2941 9 +3060 2 +2874 1 +3549 1 +3379 8 +751 7 +3009 5 +3099 2 +233 1 +3321 3 +1889 4 +2192 9 +3140 3 +1338 2 +1980 1 +3517 6 +1434 1 +1576 9 +3398 10 +1951 8 +3785 2 +3229 10 +497 7 +1914 9 +82 7 +2467 10 +1546 10 +3857 9 +1230 9 +266 3 +1664 6 +3702 4 +480 8 +3512 5 +123 5 +3271 7 +3467 3 +3861 9 +455 8 +1743 9 +2004 2 +726 7 +4059 6 +2982 10 +681 3 +3533 6 +2857 3 +2731 10 +3075 10 +217 1 +2691 9 +2311 9 +30 1 +2142 2 +3943 6 +1285 2 +1664 5 +3455 5 +3493 3 +467 4 +1220 3 +439 7 +2905 8 +718 10 +795 6 +2965 8 +2864 1 +2547 8 +2790 6 +832 9 +1498 1 +1664 10 +1942 8 +878 6 +2726 6 +1088 10 +174 8 +25 10 +3262 8 +1573 6 +3861 3 +2993 1 +3965 1 +2892 6 +1820 6 +339 7 +157 3 +2762 7 +76 5 +3179 9 +1356 9 +686 2 +3302 3 +3262 6 +2467 6 +500 10 +3046 10 +736 2 +649 3 +2925 10 +3501 5 +238 6 +1303 1 +913 9 +693 3 +2173 3 +1814 8 +3080 7 +3560 4 +3904 2 +1921 9 +2389 7 +2600 8 +2192 10 +1275 8 +3306 6 +287 10 +3722 4 +363 3 +240 10 +602 7 +1671 3 +1677 7 +789 10 +2319 1 +2771 1 +585 10 +91 3 +2105 7 +3282 2 +3942 9 +2825 3 +26 9 +3405 8 +3732 1 +1612 10 +983 5 +1469 9 +2819 2 +2995 10 +890 2 +3616 8 +814 3 +2376 4 +3578 4 +3499 8 +3319 8 +2801 3 +3953 6 +3239 2 +870 5 +2468 8 +2992 2 +3429 3 +2117 10 +1945 6 +1143 1 +469 5 +2804 9 +2309 9 +2124 4 +1763 6 +3604 3 +3640 1 +2045 8 +2531 5 +2763 3 +2395 1 +2323 2 +1081 10 +2078 10 +1731 2 +364 9 +1714 9 +578 7 +1469 6 +1905 10 +129 2 +389 1 +94 8 +2873 9 +1124 6 +824 2 +3386 5 +1700 2 +3658 9 +2415 8 +1264 5 +4028 8 +1663 8 +1435 10 +4002 5 +3274 6 +2072 1 +3006 2 +376 10 +3595 9 +3275 10 +1755 1 +548 4 +232 5 +3179 3 +1100 7 +772 8 +3330 5 +3967 3 +1494 7 +1770 4 +2269 8 +896 2 +1058 8 +1698 8 +3801 9 +1633 6 +667 9 +2153 9 +3867 1 +3617 4 +415 2 +671 3 +1993 1 +3368 1 +2161 3 +3957 3 +1938 4 +3215 10 +12 4 +1450 4 +3852 3 +3372 9 +1514 5 +2308 8 +1153 2 +422 5 +1824 10 +1575 1 +3979 9 +3026 2 +3162 6 +3247 3 +1939 5 +191 9 +2677 8 +3849 3 +3871 9 +1269 4 +2867 10 +2521 1 +110 7 +2569 6 +3901 9 +1302 1 +1441 2 +150 3 +4029 10 +1336 6 +243 8 +3365 10 +3901 9 +1297 2 +3664 5 +3507 7 +1263 8 +3142 10 +2079 3 +642 1 +2478 3 +3766 10 +2993 9 +3337 5 +2224 7 +1444 4 +2939 7 +1226 2 +866 1 +957 4 +3813 9 +982 4 +2114 1 +247 7 +2946 3 +744 6 +923 4 +3534 4 +2790 7 +2840 4 +1963 8 +916 6 +592 4 +2187 5 +1236 5 +2522 7 +139 1 +3331 5 +1705 6 +683 10 +3383 3 +2377 10 +523 3 +3815 9 +3822 3 +541 8 +2128 1 +431 3 +1719 4 +3104 6 +2394 1 +1679 6 +1341 1 +1555 10 +2818 6 +1818 2 +3978 6 +3784 5 +211 8 +28 3 +2160 6 +2290 8 +1029 1 +500 4 +664 9 +964 10 +1349 10 +260 6 +3889 1 +224 9 +3846 1 +3442 3 +1542 4 +1834 10 +137 6 +1918 4 +3657 4 +16 5 +3626 10 +762 5 +1907 10 +1306 6 +976 5 +31 9 +2618 5 +643 2 +2273 4 +1515 8 +196 4 +754 10 +1134 8 +892 3 +1800 9 +1698 8 +2326 8 +4061 4 +370 3 +3209 3 +3852 9 +2499 8 +195 5 +1606 4 +2600 9 +1248 10 +3417 8 +691 6 +3882 9 +2000 5 +3657 10 +1219 8 +1453 5 +1806 8 +2896 6 +10 4 +2545 6 +3165 7 +3882 2 +2855 10 +3935 1 +3647 9 +71 8 +3790 1 +1891 8 +514 2 +3790 6 +2152 5 +3046 2 +594 8 +2930 6 +2927 10 +1739 1 +2559 7 +3408 2 +1081 3 +771 10 +506 3 +3537 6 +2946 8 +3995 7 +1245 7 +2097 5 +3222 9 +137 1 +3918 9 +1969 5 +1077 4 +527 2 +317 7 +1836 3 +1238 9 +582 6 +2554 4 +2292 2 +1106 7 +1709 5 +3980 2 +2134 9 +3310 1 +3476 9 +132 4 +3924 6 +3875 2 +2794 1 +3632 1 +728 2 +3674 1 +3873 9 +3649 4 +1185 7 +2722 3 +485 10 +272 6 +1181 5 +3942 6 +4013 5 +65 10 +3027 7 +2131 2 +390 9 +82 2 +2018 1 +3598 5 +2083 9 +1188 4 +1583 2 +736 8 +2779 7 +2101 3 +1522 8 +674 5 +3751 2 +1947 3 +4044 9 +2536 9 +191 8 +3053 9 +2025 1 +2984 4 +33 1 +1426 2 +514 7 +2972 5 +3109 4 +3106 6 +2568 2 +2309 7 +3966 7 +2344 5 +1173 5 +1885 5 +2939 7 +3867 1 +3595 4 +4083 5 +1132 2 +2868 1 +950 5 +3194 4 +2220 5 +3917 4 +580 10 +1304 10 +1540 5 +3223 10 +825 3 +1436 4 +3907 1 +2911 9 +15 3 +3371 5 +1200 3 +1092 10 +916 8 +668 7 +4035 6 +2062 7 +1581 9 +804 2 +3293 9 +2459 7 +2831 10 +2755 10 +2931 4 +3780 10 +4013 1 +1060 6 +3093 6 +238 6 +440 1 +85 5 +375 8 +622 3 +803 2 +2099 10 +2261 7 +2677 10 +2598 8 +1134 3 +3503 2 +1761 6 +2114 10 +278 4 +2703 8 +1153 7 +818 5 +3369 4 +144 5 +1992 1 +2033 2 +1942 2 +44 9 +3582 10 +354 10 +4004 1 +3363 5 +2129 1 +3445 7 +1353 7 +46 6 +1934 2 +3555 9 +2423 2 +1947 8 +3976 7 +3853 9 +124 4 +3854 9 +1633 5 +102 10 +3302 7 +1801 9 +423 4 +1740 1 +2919 2 +404 10 +2831 9 +2348 10 +2235 1 +1242 7 +2536 10 +2623 10 +655 5 +634 4 +218 9 +757 5 +1516 10 +3683 1 +1746 4 +3826 2 +2137 6 +191 4 +2889 6 +1793 3 +3265 7 +2244 4 +1057 8 +1190 8 +1085 9 +286 6 +644 10 +3189 6 +3934 1 +1957 3 +34 4 +1837 6 +3480 7 +2206 1 +747 2 +1688 3 +2107 6 +3892 1 +3119 1 +2198 8 +2862 7 +1662 1 +1857 8 +1132 6 +3316 3 +1877 1 +3550 2 +2671 7 +190 10 +1450 1 +2910 10 +1173 1 +3742 6 +1907 7 +2345 8 +580 8 +0 2 +1920 7 +2737 3 +1030 2 +2061 9 +2704 7 +3309 4 +1204 1 +777 1 +81 10 +906 10 +1049 4 +3803 9 +3684 9 +1256 6 +1970 9 +2133 3 +1968 1 +1532 2 +2992 1 +3285 10 +829 2 +156 8 +2882 10 +120 8 +499 10 +1962 3 +2202 7 +4015 8 +2883 6 +327 4 +2502 9 +8 6 +1896 6 +2862 1 +1220 4 +3890 4 +58 3 +2273 1 +2074 10 +3090 5 +200 6 +2522 2 +624 1 +592 5 +1190 4 +3879 9 +84 6 +1193 10 +1612 2 +3239 7 +254 2 +3689 5 +2560 10 +422 3 +3726 10 +617 6 +3673 3 +3806 4 +143 10 +1326 9 +952 5 +211 4 +3346 10 +2984 3 +80 3 +2045 1 +371 9 +2921 4 +1924 5 +2656 8 +3435 9 +3882 6 +1410 4 +967 4 +3102 1 +2018 10 +1122 9 +3656 10 +653 2 +1418 4 +1107 3 +2603 6 +3792 7 +721 6 +3489 10 +1092 9 +1186 8 +3296 5 +2136 2 +2847 5 +1660 3 +417 2 +3312 9 +1811 8 +2537 6 +2928 10 +1383 5 +2939 2 +2065 9 +1781 7 +3544 6 +1042 6 +342 10 +2704 9 +2433 6 +194 4 +2000 9 +2886 9 +1010 2 +2869 2 +1508 9 +157 3 +2606 4 +1790 4 +2353 6 +1723 9 +429 5 +3385 5 +2976 2 +409 7 +585 3 +3346 4 +3500 8 +636 4 +478 5 +921 2 +2642 4 +3195 5 +3676 8 +3798 1 +1651 7 +16 6 +228 4 +1168 8 +2865 7 +726 8 +839 10 +3906 1 +2140 9 +3875 1 +636 9 +4087 1 +1551 6 +3299 6 +1899 9 +3215 9 +2406 1 +3391 2 +4087 1 +1259 4 +3409 7 +450 7 +2905 3 +1733 6 +647 1 +2220 10 +1894 2 +744 8 +189 7 +2138 5 +2569 1 +2941 4 +1627 6 +234 3 +3382 9 +3326 9 +283 4 +3659 10 +3223 3 +1083 2 +21 5 +3083 6 +98 7 +3288 7 +198 2 +3577 3 +1638 1 +2968 7 +251 9 +2460 1 +2706 3 +1224 8 +1773 7 +995 5 +770 7 +1972 6 +1375 8 +3830 9 +754 2 +2173 8 +627 7 +1797 6 +3883 6 +1402 5 +1736 6 +3818 6 +1851 6 +3316 7 +2677 7 +663 6 +593 3 +2773 6 +2694 1 +1355 6 +2838 4 +1222 7 +4049 4 +2128 1 +1802 9 +1112 8 +1812 4 +3774 4 +1166 8 +725 4 +2677 7 +2281 9 +1746 4 +2493 7 +641 8 +11 5 +1462 4 +2250 10 +166 2 +2528 10 +961 7 +263 10 +3339 3 +2827 4 +1732 9 +2883 7 +859 7 +861 2 +3158 2 +561 2 +12 3 +4009 3 +1000 8 +1035 2 +2937 1 +629 6 +4084 6 +633 2 +2601 6 +2352 6 +1079 3 +438 7 +3352 1 +3240 8 +2414 6 +2520 7 +3806 1 +1134 5 +1567 4 +2601 6 +827 5 +2418 5 +1640 8 +934 6 +2003 2 +1361 9 +977 7 +3833 6 +3506 1 +1192 1 +857 5 +1151 7 +21 10 +3669 7 +3653 8 +2881 1 +1425 2 +3634 8 +2023 2 +1825 4 +3340 9 +377 8 +3265 8 +1108 10 +2393 5 +3781 1 +316 2 +1475 10 +2501 1 +232 3 +3331 4 +1765 3 +2788 1 +3280 3 +2253 10 +2090 7 +3222 7 +2724 1 +1265 2 +3847 3 +855 7 +1994 8 +3149 10 +1469 6 +2450 4 +2419 7 +946 4 +2779 8 +711 10 +3970 3 +229 5 +782 9 +2264 9 +3732 6 +3980 8 +770 4 +2639 7 +2716 10 +3583 8 +3474 1 +2085 5 +1121 2 +2257 2 +3388 2 +1328 7 +916 3 +2169 2 +2166 10 +3003 7 +2230 8 +2713 4 +176 8 +869 8 +1994 1 +912 2 +313 4 +3754 6 +2763 4 +714 6 +3634 5 +3327 4 +1620 9 +2297 1 +231 9 +1057 3 +1101 2 +1041 4 +691 3 +2083 10 +485 7 +3271 10 +1475 4 +3822 1 +3339 7 +3785 9 +3305 10 +1931 3 +3387 10 +3887 7 +3685 10 +3844 3 +1839 9 +2068 5 +170 3 +3234 9 +3413 1 +317 6 +1483 5 +2165 8 +3199 6 +3312 6 +1195 5 +1172 8 +896 1 +58 10 +3517 3 +1279 4 +653 6 +1822 7 +2863 8 +1141 8 +3424 9 +3958 3 +655 6 +404 5 +294 2 +186 6 +3461 4 +1354 7 +3243 1 +3536 5 +3069 3 +3268 6 +1595 10 +2915 9 +3721 9 +2327 4 +3800 9 +2860 7 +2215 2 +3780 7 +3230 4 +46 1 +370 6 +866 5 +2080 3 +3315 1 +1324 5 +1029 3 +3766 4 +1751 6 +1476 2 +986 8 +2803 1 +2111 4 +2338 2 +3724 5 +1879 4 +1931 10 +2497 10 +1532 10 +2527 4 +3131 10 +1814 9 +1523 1 +3901 7 +2319 4 +2899 9 +1437 3 +1660 7 +585 10 +3484 7 +2269 8 +738 6 +2207 9 +2969 4 +1485 4 +211 6 +2838 4 +3531 6 +2513 9 +3864 6 +2400 7 +1134 10 +454 10 +3850 9 +208 9 +1813 1 +3291 10 +3528 6 +3959 8 +1339 8 +3506 3 +3121 6 +761 2 +22 1 +364 1 +2628 8 +3881 2 +2965 1 +2716 4 +2487 7 +2730 9 +3318 6 +629 6 +1007 6 +435 4 +1478 5 +3141 2 +1374 8 +678 2 +1904 1 +1834 1 +1269 7 +3504 10 +2939 8 +2599 2 +1427 3 +3212 8 +3345 2 +3406 7 +1938 1 +989 4 +3785 1 +934 1 +437 6 +881 4 +122 4 +3791 1 +2000 7 +1631 7 +1329 7 +164 3 +3300 5 +3287 9 +3086 3 +2094 8 +804 7 +2223 3 +595 3 +530 8 +1273 5 +786 5 +2472 7 +740 2 +2264 7 +673 9 +2361 7 +1326 1 +547 1 +2008 3 +1050 8 +1852 3 +2884 3 +602 9 +937 2 +2085 3 +516 7 +2260 5 +2234 5 +1887 10 +2430 4 +2722 7 +1956 4 +2459 1 +2905 9 +3195 6 +3568 2 +597 7 +167 2 +2975 8 +812 7 +2980 8 +2173 1 +1286 9 +414 10 +2575 9 +3431 2 +218 5 +509 4 +599 8 +2253 2 +2425 8 +1903 7 +1882 3 +3459 6 +3750 8 +3879 7 +3658 4 +93 3 +2907 10 +4093 5 +4046 10 +2553 3 +628 5 +353 4 +2955 6 +1148 4 +1622 10 +421 5 +1751 5 +3036 2 +465 6 +771 4 +2380 5 +1939 9 +3015 7 +1858 8 +268 8 +2522 1 +3363 6 +1936 8 +1255 3 +3555 2 +2728 6 +4022 1 +299 2 +3805 10 +2651 5 +1905 4 +1401 5 +454 9 +814 10 +2090 8 +2793 10 +568 9 +3842 5 +2281 3 +2515 4 +1920 8 +1894 5 +1752 8 +306 3 +3519 2 +3708 4 +213 4 +2748 10 +588 8 +1499 2 +2297 5 +3789 2 +126 9 +681 1 +3899 1 +1572 10 +475 7 +625 10 +1258 4 +1460 5 +2488 5 +481 7 +2448 9 +820 3 +2882 10 +3490 1 +2711 3 +644 10 +31 3 +2255 1 +2522 9 +627 8 +22 10 +2711 7 +1282 10 +2480 1 +3949 3 +2798 2 +1383 9 +2992 5 +1491 2 +1989 5 +2155 2 +3580 3 +1215 5 +2340 8 +1715 3 +3344 5 +3397 5 +1089 8 +2778 10 +3895 1 +321 10 +958 6 +3883 5 +1945 1 +3373 3 +1180 6 +1698 4 +3567 7 +3144 9 +783 5 +2923 7 +3221 10 +2758 8 +3915 8 +1535 2 +3194 3 +1792 9 +572 9 +3530 10 +2444 5 +2855 2 +768 7 +1914 7 +821 5 +1860 1 +2994 7 +2926 3 +3594 4 +1054 9 +406 8 +2511 8 +3791 4 +220 1 +2195 6 +242 9 +42 4 +1349 7 +2944 3 +1880 2 +1480 6 +1805 10 +2634 5 +3381 3 +1064 5 +3218 8 +3391 10 +3118 10 +330 1 +2075 1 +2774 10 +3123 9 +983 3 +2024 4 +3016 7 +425 9 +3109 5 +899 1 +2521 1 +4000 1 +2850 2 +3023 7 +2190 2 +3453 9 +4093 7 +3034 7 +747 8 +2485 3 +2066 9 +2052 1 +3465 3 +2692 4 +2116 2 +546 3 +448 4 +2518 2 +3365 1 +1695 9 +253 6 +164 5 +2151 7 +3215 6 +837 7 +553 9 +2582 3 +2285 5 +592 7 +1127 5 +482 8 +2803 9 +652 5 +3119 1 +1567 3 +1987 5 +379 2 +1883 10 +3841 8 +4038 6 +453 6 +2498 8 +224 8 +629 2 +411 5 +3853 10 +3104 2 +405 3 +1898 4 +1693 3 +109 2 +469 2 +496 4 +217 7 +632 7 +1710 6 +125 10 +1567 2 +2568 7 +2245 9 +3151 7 +2354 2 +1887 5 +1005 2 +2726 7 +1361 7 +1381 3 +1383 3 +3041 6 +2252 1 +346 4 +759 5 +2045 9 +2877 8 +2281 7 +2373 1 +3292 4 +657 4 +988 6 +3893 6 +1043 9 +788 8 +1341 4 +664 9 +1247 10 +3285 7 +2839 10 +670 10 +593 10 +3427 3 +238 7 +3747 8 +2380 5 +146 2 +2775 10 +2790 1 +2458 7 +791 9 +4028 6 +3665 5 +1495 5 +2756 2 +1237 9 +2449 4 +1139 6 +3249 10 +2747 9 +1513 8 +4050 1 +3195 1 +1455 9 +3482 6 +2337 4 +1523 2 +1430 6 +1146 5 +1655 8 +4057 6 +1455 5 +191 7 +1671 7 +2028 5 +3530 10 +395 9 +2020 4 +3583 7 +950 5 +1105 9 +816 10 +2189 7 +2677 4 +9 2 +483 10 +1606 1 +2663 10 +2964 1 +1523 8 +3645 8 +7 1 +729 2 +185 9 +1680 6 +3629 4 +3886 9 +1507 8 +2202 10 +1123 4 +1048 8 +2469 8 +2455 9 +1450 3 +4064 10 +2044 6 +180 9 +2370 7 +3996 10 +398 9 +1462 1 +1442 10 +3583 1 +2750 9 +1643 4 +2951 6 +79 7 +421 3 +2778 4 +3693 2 +1015 8 +773 3 +3014 1 +1025 10 +3488 9 +3026 3 +3108 9 +3945 4 +62 9 +590 7 +2486 7 +1035 6 +3525 3 +1705 2 +2160 4 +873 10 +4040 1 +1300 10 +442 2 +3648 8 +2035 5 +3611 10 +3103 5 +447 7 +1494 7 +1342 8 +3676 6 +1441 2 +2882 3 +3626 7 +3349 2 +979 4 +960 9 +2272 8 +2477 6 +1631 2 +2462 10 +1635 1 +3521 4 +1538 10 +915 2 +1891 1 +356 2 +3373 9 +81 8 +900 7 +3236 4 +3149 6 +83 8 +890 6 +1643 8 +714 1 +4041 6 +365 6 +1457 7 +1521 2 +2580 5 +2290 9 +471 7 +1491 5 +1655 2 +2727 5 +3081 3 +2307 2 +3816 6 +1678 9 +1613 1 +1890 7 +3107 1 +217 9 +863 10 +1852 6 +554 9 +567 2 +3700 3 +3559 4 +3870 4 +3695 2 +276 7 +2593 5 +1009 8 +329 7 +1381 9 +2848 2 +3548 10 +2045 4 +512 3 +2469 3 +791 3 +1518 10 +4088 10 +997 1 +4045 10 +825 10 +1449 7 +3425 2 +2816 10 +3579 7 +1068 9 +653 8 +1616 6 +2336 6 +1459 10 +3783 5 +2128 3 +2882 5 +2405 2 +200 4 +1164 9 +2094 10 +1884 8 +1645 7 +1624 2 +2066 7 +1488 4 +1136 3 +2658 10 +2102 3 +1189 7 +3775 3 +1370 7 +3049 5 +272 10 +2760 10 +954 2 +3127 3 +2438 8 +2670 3 +3395 4 +274 9 +2558 5 +1144 7 +2557 5 +647 2 +2018 1 +1909 2 +2846 7 +467 10 +2055 8 +3092 7 +1822 3 +3765 8 +336 2 +610 10 +362 8 +3569 3 +1180 1 +3754 9 +1901 5 +1909 6 +884 3 +2760 10 +74 3 +635 4 +1752 10 +2238 3 +663 4 +3229 2 +1013 3 +1376 3 +1501 4 +2606 2 +3462 1 +326 3 +305 8 +846 9 +990 2 +3598 10 +3582 8 +3796 6 +1731 1 +3279 6 +3472 9 +60 7 +1910 8 +2982 7 +3372 10 +2114 10 +541 7 +294 8 +2316 5 +3760 1 +1284 4 +2374 5 +2717 1 +1313 8 +932 5 +3137 2 +1373 7 +4088 5 +1820 4 +2512 7 +2813 6 +2251 4 +1727 10 +704 6 +483 10 +3281 9 +1622 2 +1284 3 +1293 1 +3241 7 +1508 10 +696 2 +2944 4 +3889 5 +1075 10 +1680 8 +1084 9 +2060 10 +2892 7 +900 5 +2589 7 +1025 4 +3950 6 +953 1 +455 2 +1016 7 +1344 7 +2688 8 +467 9 +2597 9 +2859 8 +2643 8 +3544 6 +1000 8 +225 4 +1473 9 +2134 2 +26 10 +623 7 +2449 9 +479 2 +3936 1 +935 7 +1490 7 +885 7 +437 7 +3937 1 +1729 4 +3078 7 +2020 6 +330 9 +4064 10 +1392 10 +2589 2 +4080 5 +2785 9 +2570 9 +3420 7 +2709 2 +261 1 +2595 8 +2383 8 +1986 1 +991 5 +3796 7 +63 6 +2499 6 +2323 2 +3772 3 +960 1 +1186 1 +3358 3 +2414 8 +940 7 +3606 7 +802 1 +1913 5 +2900 10 +2078 1 +864 2 +3210 3 +4023 7 +3678 9 +1792 10 +3996 5 +2024 4 +2605 7 +2645 3 +1420 5 +3328 9 +2147 9 +2813 2 +1841 3 +3458 9 +777 5 +3564 2 diff --git a/tests/opencl/lbm/main.cc b/tests/opencl/lbm/main.cc index 4f376c8b1..2da743510 100644 --- a/tests/opencl/lbm/main.cc +++ b/tests/opencl/lbm/main.cc @@ -46,7 +46,7 @@ static float* read_output_file(const char* filename, int size) { return NULL; } // Read the float data - if (fread(floats, sizeof(float), size, file) != size) { + if (fread(floats, sizeof(float), size, file) != (size_t)size) { fclose(file); free(floats); perror("Error reading floats from file"); @@ -128,6 +128,7 @@ int main(int nArgs, char *arg[]) { MAIN_initialize(¶m, &prm); for (t = 1; t <= param.nTimeSteps; t++) { + pb_SwitchToTimer(&timers, pb_TimerID_KERNEL); OpenCL_LBM_performStreamCollide(&prm, OpenCL_srcGrid, OpenCL_dstGrid); pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); @@ -198,9 +199,9 @@ void MAIN_printInfo(const MAIN_Param *param) { "\tsimulation type: %s\n" "\tobstacle file : %s\n\n", SIZE_X, SIZE_Y, SIZE_Z, 1e-6 * SIZE_X * SIZE_Y * SIZE_Z, - param->nTimeSteps, param->resultFilename, "store", "lid-driven cavity", - (param->obstacleFilename == NULL) ? "" - : param->obstacleFilename); + param->nTimeSteps, ((param->resultFilename == NULL) ? "" : param->resultFilename), "store", "lid-driven cavity", + ((param->obstacleFilename == NULL) ? "" : param->obstacleFilename) + ); } /*############################################################################*/ @@ -316,7 +317,7 @@ void OpenCL_initialize(struct pb_Parameters *p, OpenCL_Param *prm) { // read kernel binary from file uint8_t *kernel_bin = NULL; size_t kernel_size; - cl_int binary_status = 0; + //cl_int binary_status = 0; clStatus = read_kernel_file("kernel.cl", &kernel_bin, &kernel_size); CHECK_ERROR("read_kernel_file") diff --git a/tests/regression/Makefile b/tests/regression/Makefile index 56b63d1e1..f170c7335 100644 --- a/tests/regression/Makefile +++ b/tests/regression/Makefile @@ -15,6 +15,7 @@ all: $(MAKE) -C sgemmx $(MAKE) -C conv3x $(MAKE) -C sgemm2x + $(MAKE) -C stencil3d run-simx: $(MAKE) -C basic run-simx @@ -30,6 +31,7 @@ run-simx: $(MAKE) -C sgemmx run-simx $(MAKE) -C conv3x run-simx $(MAKE) -C sgemm2x run-simx + $(MAKE) -C stencil3d run-simx run-rtlsim: $(MAKE) -C basic run-rtlsim @@ -45,21 +47,7 @@ run-rtlsim: $(MAKE) -C sgemmx run-rtlsim $(MAKE) -C conv3x run-rtlsim $(MAKE) -C sgemm2x run-rtlsim - -run-opae: - $(MAKE) -C basic run-opae - $(MAKE) -C demo run-opae - $(MAKE) -C dogfood run-opae - $(MAKE) -C mstress run-opae - $(MAKE) -C io_addr run-opae - $(MAKE) -C printf run-opae - $(MAKE) -C diverge run-opae - $(MAKE) -C sort run-opae - $(MAKE) -C fence run-opae - $(MAKE) -C vecaddx run-opae - $(MAKE) -C sgemmx run-opae - $(MAKE) -C conv3x run-opae - $(MAKE) -C sgemm2x run-opae + $(MAKE) -C stencil3d run-rtlsim clean: $(MAKE) -C basic clean @@ -74,4 +62,5 @@ clean: $(MAKE) -C vecaddx clean $(MAKE) -C sgemmx clean $(MAKE) -C conv3x clean - $(MAKE) -C sgemm2x clean \ No newline at end of file + $(MAKE) -C sgemm2x clean + $(MAKE) -C stencil3d clean \ No newline at end of file diff --git a/tests/regression/stencil3d/Makefile b/tests/regression/stencil3d/Makefile new file mode 100644 index 000000000..c4aacdb94 --- /dev/null +++ b/tests/regression/stencil3d/Makefile @@ -0,0 +1,14 @@ +ROOT_DIR := $(realpath ../../..) +include $(ROOT_DIR)/config.mk + +PROJECT := stencil3d + +SRC_DIR := $(VORTEX_HOME)/tests/regression/$(PROJECT) + +SRCS := $(SRC_DIR)/main.cpp + +VX_SRCS := $(SRC_DIR)/kernel.cpp + +OPTS ?= -n32-b2 # 32x32x32 matrix and block size of 2x2x2 + +include ../common.mk \ No newline at end of file diff --git a/tests/regression/stencil3d/common.h b/tests/regression/stencil3d/common.h new file mode 100644 index 000000000..2c4a8ea00 --- /dev/null +++ b/tests/regression/stencil3d/common.h @@ -0,0 +1,18 @@ +#ifndef _COMMON_H_ +#define _COMMON_H_ + +#ifndef TYPE +#define TYPE float +#endif + +typedef struct +{ + uint32_t grid_dim[3]; + uint32_t block_dim[3]; + uint32_t size; + uint32_t block_size; + uint64_t A_addr; + uint64_t B_addr; +} kernel_arg_t; + +#endif \ No newline at end of file diff --git a/tests/regression/stencil3d/kernel.cpp b/tests/regression/stencil3d/kernel.cpp new file mode 100644 index 000000000..48e2468ab --- /dev/null +++ b/tests/regression/stencil3d/kernel.cpp @@ -0,0 +1,58 @@ +#include +#include "common.h" + +void kernel_body(kernel_arg_t *arg) +{ + auto A = reinterpret_cast(arg->A_addr); + auto B = reinterpret_cast(arg->B_addr); + auto size = arg->size; // Assuming 'size' now represents one dimension of a cubic space. + + // Calculate global column, row, and depth indices using both block and thread indices + int col = blockIdx.x * blockDim.x + threadIdx.x; + int row = blockIdx.y * blockDim.y + threadIdx.y; + int dep = blockIdx.z * blockDim.z + threadIdx.z; + + TYPE sum = 0; + int count = 0; + + // Stencil kernel size is assumed to be 3x3x3 + for (int dz = -1; dz <= 1; ++dz) + { + for (int dy = -1; dy <= 1; ++dy) + { + for (int dx = -1; dx <= 1; ++dx) + { + // Compute the neighbor's index, handling boundary conditions manually + int nz = dep + dz; + int ny = row + dy; + int nx = col + dx; + + // Clamp the indices to be within the boundary of the array + if (nz < 0) {nz = 0;} + else if (nz >= size){ + nz = size - 1;} + if (ny < 0) { + ny = 0; } + else if (ny >= size){ + ny = size - 1;} + if (nx < 0) { + nx = 0;} + else if (nx >= size){ + nx = size - 1;} + + // Add the neighbor's value to sum + sum += A[nz * size * size + ny * size + nx]; + count++; + } + } + } + + // Compute the average of the sum of neighbors and write to the output array + B[dep * size * size + row * size + col] = sum / count; +} + +int main() +{ + auto arg = (kernel_arg_t *)csr_read(VX_CSR_MSCRATCH); + return vx_spawn_threads(3, arg->grid_dim, arg->block_dim, (vx_kernel_func_cb)kernel_body, arg); +} \ No newline at end of file diff --git a/tests/regression/stencil3d/main.cpp b/tests/regression/stencil3d/main.cpp new file mode 100644 index 000000000..a47f94710 --- /dev/null +++ b/tests/regression/stencil3d/main.cpp @@ -0,0 +1,328 @@ + +#include +#include +#include +#include +#include +#include "common.h" + +#define FLOAT_ULP 6 + +#define RT_CHECK(_expr) \ + do \ + { \ + int _ret = _expr; \ + if (0 == _ret) \ + break; \ + printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ + cleanup(); \ + exit(-1); \ + } while (false) + +/////////////////////////////////////////////////////////////////////////////// + +template +class Comparator +{ +}; + +template <> +class Comparator +{ +public: + static const char *type_str() + { + return "integer"; + } + static int generate() + { + return rand(); + } + static bool compare(int a, int b, int index, int errors) + { + if (a != b) + { + if (errors < 100) + { + printf("*** error: [%d] expected=%d, actual=%d\n", index, a, b); + } + return false; + } + return true; + } +}; + +template <> +class Comparator +{ +private: + union Float_t + { + float f; + int i; + }; + +public: + static const char *type_str() + { + return "float"; + } + static float generate() + { + return static_cast(rand()) / RAND_MAX; + } + static bool compare(float a, float b, int index, int errors) + { + union fi_t + { + float f; + int32_t i; + }; + fi_t fa, fb; + fa.f = a; + fb.f = b; + auto d = std::abs(fa.i - fb.i); + if (d > FLOAT_ULP) + { + if (errors < 100) + { + printf("*** error: [%d] expected=%f, actual=%f\n", index, a, b); + } + return false; + } + return true; + } +}; + +static void stencil_cpu(TYPE *out, const TYPE *in, uint32_t width, uint32_t height, uint32_t depth) +{ + // We'll need to handle boundary conditions. Let's assume we use boundary replication. + for (uint32_t z = 0; z < depth; z++) + { + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + TYPE sum = 0; + int count = 0; + + // Iterate over the neighborhood + for (int dz = -1; dz <= 1; dz++) + { + for (int dy = -1; dy <= 1; dy++) + { + for (int dx = -1; dx <= 1; dx++) + { + // Compute the neighbor's index + int nx = (int)x + dx; + int ny = (int)y + dy; + int nz = (int)z + dz; + + // Check bounds and replicate the boundary values + if (nx < 0) + { + nx = 0; + } + else if (nx >= (int)width) + { + nx = width - 1; + } + if (ny < 0) + { + ny = 0; + } + else if (ny >= (int)height) + { + ny = height - 1; + } + if (nz < 0) + { + nz = 0; + } + else if (nz >= (int)depth) + { + nz = depth - 1; + } + + // Sum up the values + sum += in[nz * width * height + ny * width + nx]; + count++; + } + } + } + + // Write the averaged value to the output array + out[z * width * height + y * width + x] = sum / count; + } + } + } +} + +const char *kernel_file = "kernel.vxbin"; +uint32_t size = 64; +uint32_t block_size = 2; + +vx_device_h device = nullptr; +vx_buffer_h A_buffer = nullptr; +vx_buffer_h B_buffer = nullptr; +vx_buffer_h krnl_buffer = nullptr; +vx_buffer_h args_buffer = nullptr; +kernel_arg_t kernel_arg = {}; + +static void show_usage() +{ + std::cout << "Vortex Test." << std::endl; + std::cout << "Usage: [-k: kernel] [-n matrix_size] [-b:block_size] [-h: help]" << std::endl; +} + +static void parse_args(int argc, char **argv) +{ + int c; + while ((c = getopt(argc, argv, "n:t:k:h?")) != -1) + { + switch (c) + { + case 'n': + size = atoi(optarg); + break; + case 'b': + block_size = atoi(optarg); + break; + case 'k': + kernel_file = optarg; + break; + case 'h': + case '?': + { + show_usage(); + exit(0); + } + break; + default: + show_usage(); + exit(-1); + } + } +} + +void cleanup() +{ + if (device) + { + vx_mem_free(A_buffer); + vx_mem_free(B_buffer); + vx_mem_free(krnl_buffer); + vx_mem_free(args_buffer); + vx_dev_close(device); + } +} + +int main(int argc, char *argv[]) +{ + // parse command arguments + parse_args(argc, argv); + + if ((size / block_size) * block_size != size) + { + printf("Error: matrix size %d must be a multiple of block size %d\n", size, block_size); + return -1; + } + + std::srand(50); + + // open device connection + std::cout << "open device connection" << std::endl; + RT_CHECK(vx_dev_open(&device)); + + uint32_t size_cubed = size * size * size; + uint32_t buf_size = size_cubed * sizeof(TYPE); + + std::cout << "data type: " << Comparator::type_str() << std::endl; + std::cout << "matrix size: " << size << "x" << size << std::endl; + std::cout << "block size: " << block_size << "x" << block_size << std::endl; + + kernel_arg.grid_dim[0] = size / block_size; + kernel_arg.grid_dim[1] = size / block_size; + kernel_arg.grid_dim[2] = size / block_size; + kernel_arg.block_dim[0] = block_size; + kernel_arg.block_dim[1] = block_size; + kernel_arg.block_dim[2] = block_size; + kernel_arg.size = size; + kernel_arg.block_size = block_size; + + // allocate device memory + std::cout << "allocate device memory" << std::endl; + RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_READ, &A_buffer)); + RT_CHECK(vx_mem_address(A_buffer, &kernel_arg.A_addr)); + RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_WRITE, &B_buffer)); + RT_CHECK(vx_mem_address(B_buffer, &kernel_arg.B_addr)); + + std::cout << "A_addr=0x" << std::hex << kernel_arg.A_addr << std::endl; + std::cout << "B_addr=0x" << std::hex << kernel_arg.B_addr << std::endl; + + // allocate host buffers + std::cout << "allocate host buffers" << std::endl; + std::vector h_A(size_cubed); + std::vector h_B(size_cubed); + + // generate source data + for (uint32_t i = 0; i < size_cubed; ++i) + { + h_A[i] = Comparator::generate(); + } + + // upload source buffer0 + std::cout << "upload source buffer0" << std::endl; + RT_CHECK(vx_copy_to_dev(A_buffer, h_A.data(), 0, buf_size)); + + // upload program + std::cout << "upload program" << std::endl; + RT_CHECK(vx_upload_kernel_file(device, kernel_file, &krnl_buffer)); + + // upload kernel argument + std::cout << "upload kernel argument" << std::endl; + RT_CHECK(vx_upload_bytes(device, &kernel_arg, sizeof(kernel_arg_t), &args_buffer)); + + // start device + std::cout << "start device" << std::endl; + RT_CHECK(vx_start(device, krnl_buffer, args_buffer)); + + // wait for completion + std::cout << "wait for completion" << std::endl; + RT_CHECK(vx_ready_wait(device, VX_MAX_TIMEOUT)); + + // download destination buffer + std::cout << "download destination buffer" << std::endl; + RT_CHECK(vx_copy_from_dev(h_B.data(), B_buffer, 0, buf_size)); + + // verify result + std::cout << "verify result" << std::endl; + int errors = 0; + { + std::vector h_ref(size_cubed); + stencil_cpu(h_ref.data(), h_A.data(), size, size, size); + + for (uint32_t i = 0; i < h_ref.size(); ++i) + { + if (!Comparator::compare(h_B[i], h_ref[i], i, errors)) + { + ++errors; + } + } + } + + // cleanup + std::cout << "cleanup" << std::endl; + cleanup(); + + if (errors != 0) + { + std::cout << "Found " << std::dec << errors << " errors!" << std::endl; + std::cout << "FAILED!" << std::endl; + return errors; + } + + std::cout << "PASSED!" << std::endl; + + return 0; +} diff --git a/third_party/Makefile b/third_party/Makefile index 711b3ed6b..a2f74264e 100644 --- a/third_party/Makefile +++ b/third_party/Makefile @@ -5,12 +5,12 @@ fpnew: softfloat: SPECIALIZE_TYPE=RISCV SOFTFLOAT_OPTS="-fPIC -DSOFTFLOAT_ROUND_ODD -DINLINE_LEVEL=5 -DSOFTFLOAT_FAST_DIV32TO16 -DSOFTFLOAT_FAST_DIV64TO32" $(MAKE) -C softfloat/build/Linux-x86_64-GCC -ramulator: - cd ramulator && git apply ../../miscs/patches/ramulator.patch 2> /dev/null; true - $(MAKE) -C ramulator libramulator.a +ramulator/libramulator.so: + cd ramulator && mkdir -p build && cd build && cmake .. && make -j4 +ramulator: ramulator/libramulator.so clean: $(MAKE) -C softfloat/build/Linux-x86_64-GCC clean - $(MAKE) -C ramulator clean + rm -rf ramulator/build ramulator/libramulator.so .PHONY: all fpnew softfloat ramulator \ No newline at end of file diff --git a/third_party/ramulator b/third_party/ramulator index 214f63584..e62c84a6f 160000 --- a/third_party/ramulator +++ b/third_party/ramulator @@ -1 +1 @@ -Subproject commit 214f635845214adf030367939655d172ef0fed5f +Subproject commit e62c84a6f0e06566ba6e182d308434b4532068a5 From 4f9b15d96d8a72743074200d8bda1be487d67d85 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 01:54:17 -0700 Subject: [PATCH 158/488] minor update --- hw/syn/xilinx/dut/project.tcl | 16 ++++++++++++++-- hw/syn/xilinx/sandbox/project.tcl.in | 17 ++++++++++++++--- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/hw/syn/xilinx/dut/project.tcl b/hw/syn/xilinx/dut/project.tcl index 05b76d21f..c3e7e431c 100644 --- a/hw/syn/xilinx/dut/project.tcl +++ b/hw/syn/xilinx/dut/project.tcl @@ -11,6 +11,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Start time +set start_time [clock seconds] + if { $::argc != 5 } { puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" puts "Usage: $::argv0 \n" @@ -77,6 +80,15 @@ write_checkpoint -force post_route.dcp report_route_status -file route.rpt # Generate the synthesis report -report_timing -file timing.rpt +report_timing_summary -file timing.rpt report_power -file power.rpt -report_drc -file drc.rpt \ No newline at end of file +report_drc -file drc.rpt + +# End time and calculation +set elapsed_time [expr {[clock seconds] - $start_time}] + +# Display elapsed time +set hours [format "%02d" [expr {$elapsed_time / 3600}]] +set minutes [format "%02d" [expr {($elapsed_time % 3600) / 60}]] +set seconds [format "%02d" [expr {$elapsed_time % 60}]] +puts "Total elapsed time: ${hours}h ${minutes}m ${seconds}s" \ No newline at end of file diff --git a/hw/syn/xilinx/sandbox/project.tcl.in b/hw/syn/xilinx/sandbox/project.tcl.in index e92e31a44..7a25f6278 100644 --- a/hw/syn/xilinx/sandbox/project.tcl.in +++ b/hw/syn/xilinx/sandbox/project.tcl.in @@ -11,6 +11,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Start time +set start_time [clock seconds] + if { $::argc != 3 } { puts "ERROR: Program \"$::argv0\" requires 3 arguments!\n" puts "Usage: $::argv0 \n" @@ -140,7 +143,6 @@ set_property -name "top_lib" -value "xil_defaultlib" -objects $obj set_property -name "verilog_define" -value "" -objects $obj set_property -name "verilog_uppercase" -value "0" -objects $obj - # Set 'utils_1' fileset object set obj [get_filesets utils_1] # Empty (no sources present) @@ -405,6 +407,15 @@ open_run impl_1 report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages report_place_status -file place.rpt report_route_status -file route.rpt -report_timing -file timing.rpt +report_timing_summary -file timing.rpt report_power -file power.rpt -report_drc -file drc.rpt \ No newline at end of file +report_drc -file drc.rpt + +# End time and calculation +set elapsed_time [expr {[clock seconds] - $start_time}] + +# Display elapsed time +set hours [format "%02d" [expr {$elapsed_time / 3600}]] +set minutes [format "%02d" [expr {($elapsed_time % 3600) / 60}]] +set seconds [format "%02d" [expr {$elapsed_time % 60}]] +puts "Total elapsed time: ${hours}h ${minutes}m ${seconds}s" \ No newline at end of file From ade6b2c9856e0334fadf35f699f74fe7023d977c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 01:55:25 -0700 Subject: [PATCH 159/488] timing optimization --- hw/rtl/VX_socket.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 33c29e515..9ed76814b 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -105,7 +105,7 @@ module VX_socket import VX_gpu_pkg::*; #( .UUID_WIDTH (`UUID_WIDTH), .WRITE_ENABLE (0), .NC_ENABLE (0), - .CORE_OUT_BUF (2), + .CORE_OUT_BUF (3), .MEM_OUT_BUF (0) ) icache ( `ifdef PERF_ENABLE @@ -152,7 +152,7 @@ module VX_socket import VX_gpu_pkg::*; #( .WRITEBACK (`DCACHE_WRITEBACK), .DIRTY_BYTES (`DCACHE_WRITEBACK), .NC_ENABLE (1), - .CORE_OUT_BUF (2), + .CORE_OUT_BUF (3), .MEM_OUT_BUF (0) ) dcache ( `ifdef PERF_ENABLE @@ -185,7 +185,7 @@ module VX_socket import VX_gpu_pkg::*; #( .TAG_WIDTH (L1_MEM_TAG_WIDTH), .TAG_SEL_IDX (0), .ARBITER ("R"), - .REQ_OUT_BUF (0), + .REQ_OUT_BUF (3), .RSP_OUT_BUF (3) ) mem_arb ( .clk (clk), From bcf7d9f9606944b90a8012a15be5a3e677b20650 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 01:56:14 -0700 Subject: [PATCH 160/488] timing optimization --- hw/rtl/fpu/VX_fcvt_unit.sv | 73 +++++++++++++++++++------------------- hw/rtl/fpu/VX_fncp_unit.sv | 67 +++++++++++++++++----------------- hw/rtl/fpu/VX_fpu_cvt.sv | 5 +-- hw/rtl/fpu/VX_fpu_div.sv | 2 +- hw/rtl/fpu/VX_fpu_fma.sv | 2 +- hw/rtl/fpu/VX_fpu_ncp.sv | 5 +-- hw/rtl/fpu/VX_fpu_sqrt.sv | 2 +- 7 files changed, 80 insertions(+), 76 deletions(-) diff --git a/hw/rtl/fpu/VX_fcvt_unit.sv b/hw/rtl/fpu/VX_fcvt_unit.sv index b5b7b1690..5756a25ed 100644 --- a/hw/rtl/fpu/VX_fcvt_unit.sv +++ b/hw/rtl/fpu/VX_fcvt_unit.sv @@ -1,17 +1,17 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// Modified port of cast module from fpnew Libray +// Modified port of cast module from fpnew Libray // reference: https://github.com/pulp-platform/fpnew `include "VX_fpu_define.vh" @@ -22,7 +22,8 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( parameter LATENCY = 1, parameter INT_WIDTH = 32, parameter MAN_BITS = 23, - parameter EXP_BITS = 8 + parameter EXP_BITS = 8, + parameter OUT_REG = 0 ) ( input wire clk, input wire reset, @@ -35,10 +36,10 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( input wire is_signed, input wire [31:0] dataa, - output wire [31:0] result, + output wire [31:0] result, output wire [`FP_FLAGS_BITS-1:0] fflags -); +); // Constants localparam EXP_BIAS = 2**(EXP_BITS-1)-1; @@ -55,11 +56,11 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( localparam FMT_SHIFT_COMPENSATION = S_MAN_WIDTH - 1 - MAN_BITS; localparam NUM_FP_STICKY = 2 * S_MAN_WIDTH - MAN_BITS - 1; // removed mantissa, 1. and R localparam NUM_INT_STICKY = 2 * S_MAN_WIDTH - INT_WIDTH; // removed int and R - + // Input processing - - fclass_t fclass; - VX_fp_classifier #( + + fclass_t fclass; + VX_fp_classifier #( .EXP_BITS (EXP_BITS), .MAN_BITS (MAN_BITS) ) fp_classifier ( @@ -69,9 +70,9 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( ); wire [S_MAN_WIDTH-1:0] input_mant; - wire [S_EXP_WIDTH-1:0] input_exp; + wire [S_EXP_WIDTH-1:0] input_exp; wire input_sign; - + wire i2f_sign = dataa[INT_WIDTH-1]; wire f2i_sign = dataa[INT_WIDTH-1] && is_signed; wire [S_MAN_WIDTH-1:0] f2i_mantissa = f2i_sign ? (-dataa) : dataa; @@ -81,7 +82,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( assign input_sign = is_itof ? f2i_sign : i2f_sign; // Pipeline stage0 - + wire is_itof_s0; wire is_signed_s0; wire [2:0] rnd_mode_s0; @@ -92,7 +93,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( VX_pipe_register #( .DATAW (1 + `INST_FRM_BITS + 1 + $bits(fclass_t) + 1 + S_EXP_WIDTH + S_MAN_WIDTH), - .DEPTH (LATENCY > 2) + .DEPTH (LATENCY > 1) ) pipe_reg0 ( .clk (clk), .reset (reset), @@ -100,7 +101,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( .data_in ({is_itof, is_signed, frm, fclass, input_sign, input_exp, input_mant}), .data_out ({is_itof_s0, is_signed_s0, rnd_mode_s0, fclass_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0}) ); - + // Normalization wire [LZC_RESULT_WIDTH-1:0] renorm_shamt_s0; // renormalization shift amount @@ -113,12 +114,12 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( .data_out (renorm_shamt_s0), .valid_out (mant_is_nonzero_s0) ); - + wire mant_is_zero_s0 = ~mant_is_nonzero_s0; - wire [S_MAN_WIDTH-1:0] input_mant_n_s0; // normalized input mantissa + wire [S_MAN_WIDTH-1:0] input_mant_n_s0; // normalized input mantissa wire [S_EXP_WIDTH-1:0] input_exp_n_s0; // unbiased true exponent - + // Realign input mantissa, append zeroes if destination is wider assign input_mant_n_s0 = encoded_mant_s0 << renorm_shamt_s0; @@ -140,7 +141,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( VX_pipe_register #( .DATAW (1 + `INST_FRM_BITS + 1 + $bits(fclass_t) + 1 + 1 + S_MAN_WIDTH + S_EXP_WIDTH), - .DEPTH (LATENCY > 1) + .DEPTH (LATENCY > 2) ) pipe_reg1 ( .clk (clk), .reset (reset), @@ -169,30 +170,30 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( wire of_before_round_s1 = overflow; // Pipeline stage2 - + wire is_itof_s2; wire is_signed_s2; wire [2:0] rnd_mode_s2; - fclass_t fclass_s2; + fclass_t fclass_s2; wire mant_is_zero_s2; wire input_sign_s2; wire [2*S_MAN_WIDTH:0] destination_mant_s2; wire [EXP_BITS-1:0] final_exp_s2; wire of_before_round_s2; - + VX_pipe_register #( .DATAW (1 + 1 + `INST_FRM_BITS + $bits(fclass_t) + 1 + 1 + (2*S_MAN_WIDTH+1) + EXP_BITS + 1), - .DEPTH (LATENCY > 3) + .DEPTH (LATENCY > 0) ) pipe_reg2 ( .clk (clk), .reset (reset), .enable (enable), .data_in ({is_itof_s1, is_signed_s1, rnd_mode_s1, fclass_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}), .data_out ({is_itof_s2, is_signed_s2, rnd_mode_s2, fclass_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2}) - ); - + ); + // Rouding and classification - + wire [MAN_BITS-1:0] final_mant_s2; // mantissa after adjustments wire [INT_WIDTH-1:0] final_int_s2; // integer shifted in position wire [1:0] f2i_round_sticky_bits_s2, i2f_round_sticky_bits_s2; @@ -237,20 +238,20 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( wire is_itof_s3; wire is_signed_s3; - fclass_t fclass_s3; + fclass_t fclass_s3; wire mant_is_zero_s3; wire input_sign_s3; wire rounded_sign_s3; wire [INT_WIDTH-1:0] rounded_abs_s3; - wire of_before_round_s3; + wire of_before_round_s3; wire f2i_round_has_sticky_s3; wire i2f_round_has_sticky_s3; - `UNUSED_VAR (fclass_s3) + `UNUSED_VAR (fclass_s3) VX_pipe_register #( .DATAW (1 + 1 + $bits(fclass_t) + 1 + 1 + 32 + 1 + 1 + 1 + 1), - .DEPTH (LATENCY > 4) + .DEPTH (LATENCY > 3) ) pipe_reg3 ( .clk (clk), .reset (reset), @@ -258,7 +259,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( .data_in ({is_itof_s2, is_signed_s2, fclass_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2, of_before_round_s2, f2i_round_has_sticky_s2, i2f_round_has_sticky_s2}), .data_out ({is_itof_s3, is_signed_s3, fclass_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3, f2i_round_has_sticky_s3, i2f_round_has_sticky_s3}) ); - + // Assemble regular result, nan box short ones. Int zeroes need to be detected wire [INT_WIDTH-1:0] fmt_result_s3 = mant_is_zero_s3 ? 0 : {rounded_sign_s3, rounded_abs_s3[EXP_BITS+MAN_BITS-1:0]}; @@ -278,18 +279,18 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( f2i_special_result_s3[INT_WIDTH-2:0] = 2**(INT_WIDTH-1) - 1; // alone yields 2**(31)-1 f2i_special_result_s3[INT_WIDTH-1] = ~is_signed_s3; // for unsigned casts yields 2**31 end - end + end // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned) - wire f2i_result_is_special_s3 = fclass_s3.is_nan + wire f2i_result_is_special_s3 = fclass_s3.is_nan | fclass_s3.is_inf | of_before_round_s3 | (input_sign_s3 & ~is_signed_s3 & ~rounded_int_res_zero_s3); - + fflags_t f2i_special_status_s3; fflags_t i2f_status_s3, f2i_status_s3; fflags_t tmp_fflags_s3; - + // All integer special cases are invalid assign f2i_special_status_s3 = {1'b1, 4'h0}; @@ -306,7 +307,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( VX_pipe_register #( .DATAW (32 + `FP_FLAGS_BITS), - .DEPTH (LATENCY > 0) + .DEPTH (OUT_REG) ) pipe_reg4 ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fncp_unit.sv b/hw/rtl/fpu/VX_fncp_unit.sv index a0876dcd7..27836fcbc 100644 --- a/hw/rtl/fpu/VX_fncp_unit.sv +++ b/hw/rtl/fpu/VX_fncp_unit.sv @@ -1,17 +1,17 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// Modified port of noncomp module from fpnew Libray +// Modified port of noncomp module from fpnew Libray // reference: https://github.com/pulp-platform/fpnew `include "VX_fpu_define.vh" @@ -19,9 +19,10 @@ `ifdef FPU_DSP module VX_fncp_unit import VX_fpu_pkg::*; #( - parameter LATENCY = 2, + parameter LATENCY = 1, parameter EXP_BITS = 8, - parameter MAN_BITS = 23 + parameter MAN_BITS = 23, + parameter OUT_REG = 0 ) ( input wire clk, input wire reset, @@ -33,10 +34,10 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( input wire [31:0] dataa, input wire [31:0] datab, - output wire [31:0] result, + output wire [31:0] result, output wire [`FP_FLAGS_BITS-1:0] fflags -); +); localparam NEG_INF = 32'h00000001, NEG_NORM = 32'h00000002, NEG_SUBNORM = 32'h00000004, @@ -55,15 +56,15 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( wire a_smaller, ab_equal; // Setup - assign a_sign = dataa[31]; + assign a_sign = dataa[31]; assign a_exponent = dataa[30:23]; assign a_mantissa = dataa[22:0]; - assign b_sign = datab[31]; + assign b_sign = datab[31]; assign b_exponent = datab[30:23]; assign b_mantissa = datab[22:0]; - VX_fp_classifier #( + VX_fp_classifier #( .EXP_BITS (EXP_BITS), .MAN_BITS (MAN_BITS) ) fp_class_a ( @@ -72,7 +73,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( .clss_o (a_fclass) ); - VX_fp_classifier #( + VX_fp_classifier #( .EXP_BITS (EXP_BITS), .MAN_BITS (MAN_BITS) ) fp_class_b ( @@ -82,7 +83,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( ); assign a_smaller = (dataa < datab) ^ (a_sign || b_sign); - assign ab_equal = (dataa == datab) + assign ab_equal = (dataa == datab) || (a_fclass.is_zero && b_fclass.is_zero); // +0 == -0 // Pipeline stage0 @@ -101,54 +102,54 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( VX_pipe_register #( .DATAW (4 + 2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fclass_t) + 1 + 1), - .DEPTH (LATENCY > 1) + .DEPTH (LATENCY > 0) ) pipe_reg0 ( .clk (clk), .reset (reset), .enable (enable), .data_in ({op_mod, dataa, datab, a_sign, b_sign, a_exponent, a_mantissa, a_fclass, b_fclass, a_smaller, ab_equal}), .data_out ({op_mod_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_fclass_s0, b_fclass_s0, a_smaller_s0, ab_equal_s0}) - ); + ); // FCLASS reg [31:0] fclass_mask_s0; // generate a 10-bit mask for integer reg - always @(*) begin + always @(*) begin if (a_fclass_s0.is_normal) begin fclass_mask_s0 = a_sign_s0 ? NEG_NORM : POS_NORM; - end + end else if (a_fclass_s0.is_inf) begin fclass_mask_s0 = a_sign_s0 ? NEG_INF : POS_INF; - end + end else if (a_fclass_s0.is_zero) begin fclass_mask_s0 = a_sign_s0 ? NEG_ZERO : POS_ZERO; - end + end else if (a_fclass_s0.is_subnormal) begin fclass_mask_s0 = a_sign_s0 ? NEG_SUBNORM : POS_SUBNORM; - end + end else if (a_fclass_s0.is_nan) begin fclass_mask_s0 = {22'h0, a_fclass_s0.is_quiet, a_fclass_s0.is_signaling, 8'h0}; - end - else begin + end + else begin fclass_mask_s0 = QUT_NAN; end end - // Min/Max + // Min/Max reg [31:0] fminmax_res_s0; always @(*) begin if (a_fclass_s0.is_nan && b_fclass_s0.is_nan) fminmax_res_s0 = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN - else if (a_fclass_s0.is_nan) + else if (a_fclass_s0.is_nan) fminmax_res_s0 = datab_s0; - else if (b_fclass_s0.is_nan) + else if (b_fclass_s0.is_nan) fminmax_res_s0 = dataa_s0; - else begin + else begin // FMIN, FMAX fminmax_res_s0 = (op_mod_s0[0] ^ a_smaller_s0) ? dataa_s0 : datab_s0; end end - // Sign injection + // Sign injection reg [31:0] fsgnj_res_s0; // result of sign injection always @(*) begin case (op_mod_s0[1:0]) @@ -158,12 +159,12 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( endcase end - // Comparison + // Comparison reg fcmp_res_s0; // result of comparison reg fcmp_fflags_NV_s0; // comparison fflags always @(*) begin case (op_mod_s0[1:0]) - 0: begin // LE + 0: begin // LE if (a_fclass_s0.is_nan || b_fclass_s0.is_nan) begin fcmp_res_s0 = 0; fcmp_fflags_NV_s0 = 1; @@ -179,12 +180,12 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( end else begin fcmp_res_s0 = (a_smaller_s0 & ~ab_equal_s0); fcmp_fflags_NV_s0 = 0; - end + end end 2: begin // EQ if (a_fclass_s0.is_nan || b_fclass_s0.is_nan) begin fcmp_res_s0 = 0; - fcmp_fflags_NV_s0 = a_fclass_s0.is_signaling | b_fclass_s0.is_signaling; + fcmp_fflags_NV_s0 = a_fclass_s0.is_signaling | b_fclass_s0.is_signaling; end else begin fcmp_res_s0 = ab_equal_s0; fcmp_fflags_NV_s0 = 0; @@ -192,7 +193,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( end default: begin fcmp_res_s0 = 'x; - fcmp_fflags_NV_s0 = 'x; + fcmp_fflags_NV_s0 = 'x; end endcase end @@ -216,7 +217,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( // FMV result_s0 = dataa_s0; fflags_NV_s0 = 0; - end + end 6,7: begin // MIN/MAX result_s0 = fminmax_res_s0; @@ -229,7 +230,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( VX_pipe_register #( .DATAW (32 + 1), - .DEPTH (LATENCY > 0) + .DEPTH (OUT_REG) ) pipe_reg1 ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index 37a2ab419..fe99f1ea1 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -64,7 +64,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), @@ -88,7 +88,8 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( for (genvar i = 0; i < NUM_PES; ++i) begin VX_fcvt_unit #( - .LATENCY (`LATENCY_FCVT) + .LATENCY (`LATENCY_FCVT), + .OUT_REG (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) ) fcvt_unit ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index 81fc8f022..44b5bedfb 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -68,7 +68,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index 3522d8a1e..a5cb89a1a 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -99,7 +99,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index 34b822d89..a7057455b 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -69,7 +69,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), @@ -93,7 +93,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( for (genvar i = 0; i < NUM_PES; ++i) begin VX_fncp_unit #( - .LATENCY (`LATENCY_FNCP) + .LATENCY (`LATENCY_FNCP), + .OUT_REG (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) ) fncp_unit ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index a6e6dda9a..5aacf2d29 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -62,7 +62,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), From 370daf1025d27ac0436aaf70d918205507070dbf Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 01:56:56 -0700 Subject: [PATCH 161/488] fifo refactoring --- hw/rtl/libs/VX_fifo_queue.sv | 295 ++++++++++++--------------------- hw/rtl/libs/VX_pending_size.sv | 184 ++++++++++++-------- 2 files changed, 216 insertions(+), 263 deletions(-) diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index ea00d67c7..201a45aa9 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -36,225 +36,134 @@ module VX_fifo_queue #( output wire [SIZEW-1:0] size ); - localparam ADDRW = `CLOG2(DEPTH); - `STATIC_ASSERT(ALM_FULL > 0, ("alm_full must be greater than 0!")) `STATIC_ASSERT(ALM_FULL < DEPTH, ("alm_full must be smaller than size!")) `STATIC_ASSERT(ALM_EMPTY > 0, ("alm_empty must be greater than 0!")) `STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!")) `STATIC_ASSERT(`IS_POW2(DEPTH), ("size must be a power of 2!")) + VX_pending_size #( + .SIZE (DEPTH), + .ALM_EMPTY (ALM_EMPTY), + .ALM_FULL (ALM_FULL) + ) pending_size ( + .clk (clk), + .reset (reset), + .incr (push), + .decr (pop), + .empty (empty), + .full (full), + .alm_empty(alm_empty), + .alm_full(alm_full), + .size (size) + ); + if (DEPTH == 1) begin reg [DATAW-1:0] head_r; - reg size_r; always @(posedge clk) begin - if (reset) begin - head_r <= '0; - size_r <= '0; - end else begin - `ASSERT(~push || ~full, ("runtime error: writing to a full queue")); - `ASSERT(~pop || ~empty, ("runtime error: reading an empty queue")); - if (push) begin - if (~pop) begin - size_r <= 1; - end - end else if (pop) begin - size_r <= '0; - end - if (push) begin - head_r <= data_in; - end + if (push) begin + head_r <= data_in; end end - assign data_out = head_r; - assign empty = (size_r == 0); - assign alm_empty = 1'b1; - assign full = (size_r != 0); - assign alm_full = 1'b1; - assign size = size_r; + assign data_out = head_r; end else begin - reg empty_r, alm_empty_r; - reg full_r, alm_full_r; - reg [ADDRW-1:0] used_r; - wire [ADDRW-1:0] used_n; + localparam ADDRW = `CLOG2(DEPTH); - always @(posedge clk) begin - if (reset) begin - empty_r <= 1; - alm_empty_r <= 1; - full_r <= 0; - alm_full_r <= 0; - used_r <= '0; - end else begin - `ASSERT(~(push && ~pop) || ~full, ("runtime error: incrementing full queue")); - `ASSERT(~(pop && ~push) || ~empty, ("runtime error: decrementing empty queue")); - if (push) begin - if (~pop) begin - empty_r <= 0; - if (used_r == ADDRW'(ALM_EMPTY)) - alm_empty_r <= 0; - if (used_r == ADDRW'(DEPTH-1)) - full_r <= 1; - if (used_r == ADDRW'(ALM_FULL-1)) - alm_full_r <= 1; - end - end else if (pop) begin - full_r <= 0; - if (used_r == ADDRW'(ALM_FULL)) - alm_full_r <= 0; - if (used_r == ADDRW'(1)) - empty_r <= 1; - if (used_r == ADDRW'(ALM_EMPTY+1)) - alm_empty_r <= 1; - end - used_r <= used_n; - end - end + if (OUT_REG != 0) begin - if (DEPTH == 2 && LUTRAM == 0) begin + wire [DATAW-1:0] dout; + reg [DATAW-1:0] dout_r; + reg [ADDRW-1:0] wr_ptr_r; + reg [ADDRW-1:0] rd_ptr_r; + reg [ADDRW-1:0] rd_ptr_n_r; - assign used_n = used_r ^ (push ^ pop); - - if (0 == OUT_REG) begin - - reg [1:0][DATAW-1:0] shift_reg; - - always @(posedge clk) begin - if (push) begin - shift_reg[1] <= shift_reg[0]; - shift_reg[0] <= data_in; - end - end - - assign data_out = shift_reg[!used_r[0]]; - - end else begin - - reg [DATAW-1:0] data_out_r; - reg [DATAW-1:0] buffer; - - always @(posedge clk) begin - if (push) begin - buffer <= data_in; - end - if (push && (empty_r || (used_r && pop))) begin - data_out_r <= data_in; - end else if (pop) begin - data_out_r <= buffer; - end - end - - assign data_out = data_out_r; - - end - - end else begin - - assign used_n = $signed(used_r) + ADDRW'($signed(2'(push) - 2'(pop))); - - if (0 == OUT_REG) begin - - reg [ADDRW-1:0] rd_ptr_r; - reg [ADDRW-1:0] wr_ptr_r; - - always @(posedge clk) begin - if (reset) begin - rd_ptr_r <= '0; - wr_ptr_r <= '0; - end else begin - wr_ptr_r <= wr_ptr_r + ADDRW'(push); - rd_ptr_r <= rd_ptr_r + ADDRW'(pop); - end - end - - VX_dp_ram #( - .DATAW (DATAW), - .SIZE (DEPTH), - .LUTRAM (LUTRAM) - ) dp_ram ( - .clk (clk), - .reset (reset), - .read (1'b1), - .write (push), - .wren (1'b1), - .waddr (wr_ptr_r), - .wdata (data_in), - .raddr (rd_ptr_r), - .rdata (data_out) - ); - - end else begin - - wire [DATAW-1:0] dout; - reg [DATAW-1:0] dout_r; - reg [ADDRW-1:0] wr_ptr_r; - reg [ADDRW-1:0] rd_ptr_r; - reg [ADDRW-1:0] rd_ptr_n_r; - - always @(posedge clk) begin - if (reset) begin - wr_ptr_r <= '0; - rd_ptr_r <= '0; - rd_ptr_n_r <= 1; - end else begin - wr_ptr_r <= wr_ptr_r + ADDRW'(push); - if (pop) begin - rd_ptr_r <= rd_ptr_n_r; - if (DEPTH > 2) begin - rd_ptr_n_r <= rd_ptr_r + ADDRW'(2); - end else begin // (DEPTH == 2); - rd_ptr_n_r <= ~rd_ptr_n_r; - end + always @(posedge clk) begin + if (reset) begin + wr_ptr_r <= '0; + rd_ptr_r <= '0; + rd_ptr_n_r <= 1; + end else begin + wr_ptr_r <= wr_ptr_r + ADDRW'(push); + if (pop) begin + rd_ptr_r <= rd_ptr_n_r; + if (DEPTH > 2) begin + rd_ptr_n_r <= rd_ptr_r + ADDRW'(2); + end else begin // (DEPTH == 2); + rd_ptr_n_r <= ~rd_ptr_n_r; end end end - - wire going_empty; - if (ALM_EMPTY == 1) begin - assign going_empty = alm_empty_r; - end else begin - assign going_empty = (used_r == ADDRW'(1)); - end - - VX_dp_ram #( - .DATAW (DATAW), - .SIZE (DEPTH), - .LUTRAM (LUTRAM) - ) dp_ram ( - .clk (clk), - .reset (reset), - .read (1'b1), - .write (push), - .wren (1'b1), - .waddr (wr_ptr_r), - .wdata (data_in), - .raddr (rd_ptr_n_r), - .rdata (dout) - ); - - always @(posedge clk) begin - if (push && (empty_r || (going_empty && pop))) begin - dout_r <= data_in; - end else if (pop) begin - dout_r <= dout; - end - end - - assign data_out = dout_r; end - end - assign empty = empty_r; - assign alm_empty = alm_empty_r; - assign full = full_r; - assign alm_full = alm_full_r; - assign size = {full_r, used_r}; + VX_dp_ram #( + .DATAW (DATAW), + .SIZE (DEPTH), + .LUTRAM (LUTRAM) + ) dp_ram ( + .clk (clk), + .reset (reset), + .read (1'b1), + .write (push), + .wren (1'b1), + .waddr (wr_ptr_r), + .wdata (data_in), + .raddr (rd_ptr_n_r), + .rdata (dout) + ); + + wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW] == ADDRW'(1)); + + always @(posedge clk) begin + if (push && (empty || (going_empty && pop))) begin + dout_r <= data_in; + end else if (pop) begin + dout_r <= dout; + end + end + + assign data_out = dout_r; + + end else begin + + reg [ADDRW-1:0] rd_ptr_r; + reg [ADDRW-1:0] wr_ptr_r; + + always @(posedge clk) begin + if (reset) begin + rd_ptr_r <= '0; + wr_ptr_r <= '0; + end else begin + wr_ptr_r <= wr_ptr_r + ADDRW'(push); + rd_ptr_r <= rd_ptr_r + ADDRW'(pop); + end + end + + VX_dp_ram #( + .DATAW (DATAW), + .SIZE (DEPTH), + .LUTRAM (LUTRAM) + ) dp_ram ( + .clk (clk), + .reset (reset), + .read (1'b1), + .write (push), + .wren (1'b1), + .waddr (wr_ptr_r), + .wdata (data_in), + .raddr (rd_ptr_r), + .rdata (data_out) + ); + + end end + `RUNTIME_ASSERT(~(push && ~pop) || ~full, ("runtime error: incrementing full queue")); + `RUNTIME_ASSERT(~(pop && ~push) || ~empty, ("runtime error: decrementing empty queue")); + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_pending_size.sv b/hw/rtl/libs/VX_pending_size.sv index 031e57695..3d593156f 100644 --- a/hw/rtl/libs/VX_pending_size.sv +++ b/hw/rtl/libs/VX_pending_size.sv @@ -13,7 +13,7 @@ `include "VX_platform.vh" -//`TRACING_OFF +`TRACING_OFF module VX_pending_size #( parameter SIZE = 1, parameter INCRW = 1, @@ -34,97 +34,141 @@ module VX_pending_size #( ); `STATIC_ASSERT(INCRW <= SIZEW, ("invalid parameter: %d vs %d", INCRW, SIZEW)) `STATIC_ASSERT(DECRW <= SIZEW, ("invalid parameter: %d vs %d", DECRW, SIZEW)) - localparam ADDRW = `LOG2UP(SIZE); - reg empty_r, alm_empty_r; - reg full_r, alm_full_r; + if (SIZE == 1) begin - if (INCRW != 1 || DECRW != 1) begin - - reg [SIZEW-1:0] size_r; - - wire [SIZEW-1:0] size_n = size_r + SIZEW'(incr) - SIZEW'(decr); + reg size_r; always @(posedge clk) begin if (reset) begin - empty_r <= 1; - alm_empty_r <= 1; - alm_full_r <= 0; - full_r <= 0; - size_r <= '0; + size_r <= '0; end else begin - `ASSERT((SIZEW'(incr) >= SIZEW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow")); - `ASSERT((SIZEW'(incr) <= SIZEW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow")); - size_r <= size_n; - empty_r <= (size_n == SIZEW'(0)); - alm_empty_r <= (size_n == SIZEW'(ALM_EMPTY)); - full_r <= (size_n == SIZEW'(SIZE)); - alm_full_r <= (size_n == SIZEW'(ALM_FULL)); + if (incr) begin + if (~decr) begin + size_r <= 1; + end + end else if (decr) begin + size_r <= '0; + end end end - assign size = size_r; + assign empty = (size_r == 0); + assign full = (size_r != 0); + assign alm_empty = 1'b1; + assign alm_full = 1'b1; + assign size = size_r; end else begin - reg [ADDRW-1:0] used_r; - wire [ADDRW-1:0] used_n; + logic empty_r, alm_empty_r; + logic full_r, alm_full_r; - always @(posedge clk) begin - if (reset) begin - empty_r <= 1; - alm_empty_r <= 1; - full_r <= 0; - alm_full_r <= 0; - used_r <= '0; - end else begin - `ASSERT(~(incr && ~decr) || ~full, ("runtime error: counter overflow")); - `ASSERT(~(decr && ~incr) || ~empty, ("runtime error: counter underflow")); - if (incr) begin - if (~decr) begin - empty_r <= 0; - if (used_r == ADDRW'(ALM_EMPTY)) - alm_empty_r <= 0; - if (used_r == ADDRW'(SIZE-1)) - full_r <= 1; - if (used_r == ADDRW'(ALM_FULL-1)) - alm_full_r <= 1; - end - end else if (decr) begin - if (used_r == ADDRW'(1)) - empty_r <= 1; - if (used_r == ADDRW'(ALM_EMPTY+1)) - alm_empty_r <= 1; - full_r <= 0; - if (used_r == ADDRW'(ALM_FULL)) - alm_full_r <= 0; + if (INCRW != 1 || DECRW != 1) begin + + localparam SUBW = `MIN(SIZEW, `MAX(INCRW, DECRW)+1); + + logic [SIZEW-1:0] size_n, size_r; + + assign size_n = $signed(size_r) + SIZEW'($signed(SUBW'(incr) - SUBW'(decr))); + + always @(posedge clk) begin + if (reset) begin + empty_r <= 1; + full_r <= 0; + alm_empty_r <= 1; + alm_full_r <= 0; + size_r <= '0; + end else begin + `ASSERT((SIZEW'(incr) >= SIZEW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow")); + `ASSERT((SIZEW'(incr) <= SIZEW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow")); + empty_r <= (size_n == SIZEW'(0)); + full_r <= (size_n == SIZEW'(SIZE)); + alm_empty_r <= (size_n <= SIZEW'(ALM_EMPTY)); + alm_full_r <= (size_n >= SIZEW'(ALM_FULL)); + size_r <= size_n; end - used_r <= used_n; end - end - if (SIZE == 2) begin - assign used_n = used_r ^ (incr ^ decr); + assign size = size_r; + end else begin - assign used_n = $signed(used_r) + ADDRW'($signed(2'(incr) - 2'(decr))); - end - if (SIZE > 1) begin - if (SIZEW > ADDRW) begin - assign size = {full_r, used_r}; + localparam ADDRW = `LOG2UP(SIZE); + + reg [ADDRW-1:0] used_r; + + wire is_empty_n = (used_r == ADDRW'(1)); + wire is_full_n = (used_r == ADDRW'(SIZE-1)); + + if (SIZE > 2) begin + + wire is_alm_empty = (used_r == ADDRW'(ALM_EMPTY)); + wire is_alm_empty_n= (used_r == ADDRW'(ALM_EMPTY+1)); + wire is_alm_full = (used_r == ADDRW'(ALM_FULL)); + wire is_alm_full_n = (used_r == ADDRW'(ALM_FULL-1)); + + wire [1:0] push_minus_pop = {~incr & decr, incr ^ decr}; + + always @(posedge clk) begin + if (reset) begin + empty_r <= 1; + full_r <= 0; + alm_empty_r <= 0; + alm_full_r <= 0; + used_r <= '0; + end else begin + if (incr) begin + if (~decr) begin + empty_r <= 0; + if (is_alm_empty) + alm_empty_r <= 0; + if (is_full_n) + full_r <= 1; + if (is_alm_full_n) + alm_full_r <= 1; + end + end else if (decr) begin + full_r <= 0; + if (is_alm_full) + alm_full_r <= 0; + if (is_empty_n) + empty_r <= 1; + if (is_alm_empty_n) + alm_empty_r <= 1; + end + used_r <= $signed(used_r) + ADDRW'($signed(push_minus_pop)); + end + end + end else begin - assign size = used_r; + + always @(posedge clk) begin + if (reset) begin + empty_r <= 1; + full_r <= 0; + used_r <= '0; + end else begin + empty_r <= (empty_r & ~incr) | (~full_r & decr & ~incr); + full_r <= (~empty_r & incr & ~decr) | (full_r & ~(decr ^ incr)); + used_r <= used_r ^ (incr ^ decr); + end + end + + assign alm_empty_r = used_r; + assign alm_full_r = used_r; end - end else begin - assign size = full_r; + + assign size = {full_r, used_r}; + end + assign empty = empty_r; + assign full = full_r; + assign alm_empty = alm_empty_r; + assign alm_full = alm_full_r; + end - assign empty = empty_r; - assign alm_empty = alm_empty_r; - assign alm_full = alm_full_r; - assign full = full_r; - endmodule -//`TRACING_ON +`TRACING_ON From 31a5ab714ef0ef7bb2cd5dc9ad73fcac8db9bb1d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 01:57:45 -0700 Subject: [PATCH 162/488] xbar timing optimitzaion --- hw/rtl/libs/VX_stream_xbar.sv | 58 ++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index b37c9b676..3dd30bc86 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -49,15 +49,35 @@ module VX_stream_xbar #( // (#inputs > 1) and (#outputs > 1) + wire [NUM_INPUTS-1:0][NUM_OUTPUTS-1:0] per_output_valid_in; + wire [NUM_OUTPUTS-1:0][NUM_INPUTS-1:0] per_output_valid_in_w; + wire [NUM_OUTPUTS-1:0][NUM_INPUTS-1:0] per_output_ready_in; + wire [NUM_INPUTS-1:0][NUM_OUTPUTS-1:0] per_output_ready_in_w; + + VX_transpose #( + .N (NUM_OUTPUTS), + .M (NUM_INPUTS) + ) rdy_in_transpose ( + .data_in (per_output_ready_in), + .data_out (per_output_ready_in_w) + ); + + VX_transpose #( + .N (NUM_INPUTS), + .M (NUM_OUTPUTS) + ) val_in_transpose ( + .data_in (per_output_valid_in), + .data_out (per_output_valid_in_w) + ); + + for (genvar i = 0; i < NUM_INPUTS; ++i) begin + assign per_output_valid_in[i] = NUM_OUTPUTS'(valid_in[i]) << sel_in[i]; + assign ready_in[i] = | per_output_ready_in_w[i]; + end for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin - - wire [NUM_INPUTS-1:0] valid_in_q; - for (genvar j = 0; j < NUM_INPUTS; ++j) begin - assign valid_in_q[j] = valid_in[j] && (sel_in[j] == i); - end - + VX_stream_arb #( .NUM_INPUTS (NUM_INPUTS), .NUM_OUTPUTS (1), @@ -68,7 +88,7 @@ module VX_stream_xbar #( ) xbar_arb ( .clk (clk), .reset (reset), - .valid_in (valid_in_q), + .valid_in (per_output_valid_in_w[i]), .data_in (data_in), .ready_in (per_output_ready_in[i]), .valid_out (valid_out[i]), @@ -78,10 +98,6 @@ module VX_stream_xbar #( ); end - for (genvar i = 0; i < NUM_INPUTS; ++i) begin - assign ready_in[i] = per_output_ready_in[sel_in[i]][i]; - end - end else begin // (#inputs >= 1) and (#outputs == 1) @@ -112,14 +128,12 @@ module VX_stream_xbar #( // (#inputs == 1) and (#outputs > 1) - logic [NUM_OUTPUTS-1:0] valid_out_r, ready_out_r; - logic [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_r; - always @(*) begin - valid_out_r = '0; - valid_out_r[sel_in] = valid_in; - end - assign data_out_r = {NUM_OUTPUTS{data_in}}; - assign ready_in = ready_out_r[sel_in]; + wire [NUM_OUTPUTS-1:0] valid_out_w, ready_out_w; + wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w; + + assign ready_in[0] = ready_out_w[sel_in[0]]; + assign valid_out_w = NUM_OUTPUTS'(valid_in[0]) << sel_in[0]; + assign data_out_w = {NUM_OUTPUTS{data_in[0]}}; for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin VX_elastic_buffer #( @@ -130,9 +144,9 @@ module VX_stream_xbar #( ) out_buf ( .clk (clk), .reset (reset), - .valid_in (valid_out_r[i]), - .ready_in (ready_out_r[i]), - .data_in (data_out_r[i]), + .valid_in (valid_out_w[i]), + .ready_in (ready_out_w[i]), + .data_in (data_out_w[i]), .data_out (data_out[i]), .valid_out (valid_out[i]), .ready_out (ready_out[i]) From cd97945d0d62a51707dab82f702adbf70fab0d96 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 04:51:27 -0700 Subject: [PATCH 163/488] minor update --- hw/rtl/libs/VX_pending_size.sv | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hw/rtl/libs/VX_pending_size.sv b/hw/rtl/libs/VX_pending_size.sv index 3d593156f..b456239da 100644 --- a/hw/rtl/libs/VX_pending_size.sv +++ b/hw/rtl/libs/VX_pending_size.sv @@ -98,11 +98,10 @@ module VX_pending_size #( reg [ADDRW-1:0] used_r; - wire is_empty_n = (used_r == ADDRW'(1)); - wire is_full_n = (used_r == ADDRW'(SIZE-1)); - if (SIZE > 2) begin + wire is_empty_n = (used_r == ADDRW'(1)); + wire is_full_n = (used_r == ADDRW'(SIZE-1)); wire is_alm_empty = (used_r == ADDRW'(ALM_EMPTY)); wire is_alm_empty_n= (used_r == ADDRW'(ALM_EMPTY+1)); wire is_alm_full = (used_r == ADDRW'(ALM_FULL)); From 0ed589a3bfb5e49965c4dab3c74ee5ce898701cf Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 07:49:08 -0700 Subject: [PATCH 164/488] minor update --- hw/rtl/libs/VX_fifo_queue.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 201a45aa9..e6f94b3b2 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -116,7 +116,7 @@ module VX_fifo_queue #( .rdata (dout) ); - wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW] == ADDRW'(1)); + wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); always @(posedge clk) begin if (push && (empty || (going_empty && pop))) begin From 1f5cc5343415aef138e9d1a2f5acb91544584144 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 09:16:23 -0700 Subject: [PATCH 165/488] minor update --- hw/rtl/fpu/VX_fpu_cvt.sv | 4 ++-- hw/rtl/fpu/VX_fpu_ncp.sv | 4 ++-- hw/rtl/libs/VX_stream_arb.sv | 26 +++++++++++++------------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index fe99f1ea1..1b6617c60 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -64,7 +64,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (2) + .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), @@ -89,7 +89,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( for (genvar i = 0; i < NUM_PES; ++i) begin VX_fcvt_unit #( .LATENCY (`LATENCY_FCVT), - .OUT_REG (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_REG (1) ) fcvt_unit ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index a7057455b..16c0df758 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -69,7 +69,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (2) + .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), @@ -94,7 +94,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( for (genvar i = 0; i < NUM_PES; ++i) begin VX_fncp_unit #( .LATENCY (`LATENCY_FNCP), - .OUT_REG (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_REG (1) ) fncp_unit ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_stream_arb.sv b/hw/rtl/libs/VX_stream_arb.sv index ffb56eb26..413da98f0 100644 --- a/hw/rtl/libs/VX_stream_arb.sv +++ b/hw/rtl/libs/VX_stream_arb.sv @@ -143,9 +143,9 @@ module VX_stream_arb #( // (#inputs <= max_fanout) and (#outputs == 1) - wire valid_in_r; - wire [DATAW-1:0] data_in_r; - wire ready_in_r; + wire valid_in_w; + wire [DATAW-1:0] data_in_w; + wire ready_in_w; wire arb_valid; wire [NUM_REQS_W-1:0] arb_index; @@ -165,12 +165,12 @@ module VX_stream_arb #( .grant_ready (arb_ready) ); - assign valid_in_r = arb_valid; - assign data_in_r = data_in[arb_index]; - assign arb_ready = ready_in_r; + assign valid_in_w = arb_valid; + assign data_in_w = data_in[arb_index]; + assign arb_ready = ready_in_w; for (genvar i = 0; i < NUM_REQS; ++i) begin - assign ready_in[i] = ready_in_r && arb_onehot[i]; + assign ready_in[i] = ready_in_w && arb_onehot[i]; end VX_elastic_buffer #( @@ -181,9 +181,9 @@ module VX_stream_arb #( ) out_buf ( .clk (clk), .reset (reset), - .valid_in (valid_in_r), - .ready_in (ready_in_r), - .data_in ({arb_index, data_in_r}), + .valid_in (valid_in_w), + .ready_in (ready_in_w), + .data_in ({arb_index, data_in_w}), .data_out ({sel_out, data_out}), .valid_out (valid_out), .ready_out (ready_out) @@ -285,7 +285,7 @@ module VX_stream_arb #( // (#inputs == 1) and (#outputs <= max_fanout) - wire [NUM_OUTPUTS-1:0] ready_in_r; + wire [NUM_OUTPUTS-1:0] ready_in_w; wire [NUM_OUTPUTS-1:0] arb_requests; wire arb_valid; @@ -305,7 +305,7 @@ module VX_stream_arb #( .grant_ready (arb_ready) ); - assign arb_requests = ready_in_r; + assign arb_requests = ready_in_w; assign arb_ready = valid_in[0]; assign ready_in = arb_valid; @@ -319,7 +319,7 @@ module VX_stream_arb #( .clk (clk), .reset (reset), .valid_in (valid_in && arb_onehot[i]), - .ready_in (ready_in_r[i]), + .ready_in (ready_in_w[i]), .data_in (data_in), .data_out (data_out[i]), .valid_out (valid_out[i]), From 10a870516151992ada988f7278c26a63576a514b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 10:42:48 -0700 Subject: [PATCH 166/488] minor update --- hw/rtl/libs/VX_pending_size.sv | 57 ++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/hw/rtl/libs/VX_pending_size.sv b/hw/rtl/libs/VX_pending_size.sv index b456239da..610c2bc04 100644 --- a/hw/rtl/libs/VX_pending_size.sv +++ b/hw/rtl/libs/VX_pending_size.sv @@ -61,8 +61,8 @@ module VX_pending_size #( end else begin - logic empty_r, alm_empty_r; - logic full_r, alm_full_r; + reg empty_r, alm_empty_r; + reg full_r, alm_full_r; if (INCRW != 1 || DECRW != 1) begin @@ -98,43 +98,55 @@ module VX_pending_size #( reg [ADDRW-1:0] used_r; + wire is_alm_empty = (used_r == ADDRW'(ALM_EMPTY)); + wire is_alm_empty_n = (used_r == ADDRW'(ALM_EMPTY+1)); + wire is_alm_full = (used_r == ADDRW'(ALM_FULL)); + wire is_alm_full_n = (used_r == ADDRW'(ALM_FULL-1)); + + always @(posedge clk) begin + if (reset) begin + alm_empty_r <= 1; + alm_full_r <= 0; + end else begin + if (incr) begin + if (~decr) begin + if (is_alm_empty) + alm_empty_r <= 0; + if (is_alm_full_n) + alm_full_r <= 1; + end + end else if (decr) begin + if (is_alm_full) + alm_full_r <= 0; + if (is_alm_empty_n) + alm_empty_r <= 1; + end + end + end + if (SIZE > 2) begin - wire is_empty_n = (used_r == ADDRW'(1)); - wire is_full_n = (used_r == ADDRW'(SIZE-1)); - wire is_alm_empty = (used_r == ADDRW'(ALM_EMPTY)); - wire is_alm_empty_n= (used_r == ADDRW'(ALM_EMPTY+1)); - wire is_alm_full = (used_r == ADDRW'(ALM_FULL)); - wire is_alm_full_n = (used_r == ADDRW'(ALM_FULL-1)); + wire is_empty_n = (used_r == ADDRW'(1)); + wire is_full_n = (used_r == ADDRW'(SIZE-1)); wire [1:0] push_minus_pop = {~incr & decr, incr ^ decr}; always @(posedge clk) begin if (reset) begin - empty_r <= 1; - full_r <= 0; - alm_empty_r <= 0; - alm_full_r <= 0; - used_r <= '0; + empty_r <= 1; + full_r <= 0; + used_r <= '0; end else begin if (incr) begin if (~decr) begin empty_r <= 0; - if (is_alm_empty) - alm_empty_r <= 0; if (is_full_n) full_r <= 1; - if (is_alm_full_n) - alm_full_r <= 1; end end else if (decr) begin full_r <= 0; - if (is_alm_full) - alm_full_r <= 0; if (is_empty_n) empty_r <= 1; - if (is_alm_empty_n) - alm_empty_r <= 1; end used_r <= $signed(used_r) + ADDRW'($signed(push_minus_pop)); end @@ -153,9 +165,6 @@ module VX_pending_size #( used_r <= used_r ^ (incr ^ decr); end end - - assign alm_empty_r = used_r; - assign alm_full_r = used_r; end assign size = {full_r, used_r}; From 4570a20eee56931c6b7b320fdf8b9dbaa86d86a4 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 12:15:12 -0700 Subject: [PATCH 167/488] minor update --- hw/rtl/libs/VX_stream_buffer.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/libs/VX_stream_buffer.sv b/hw/rtl/libs/VX_stream_buffer.sv index ea4561933..81978b735 100644 --- a/hw/rtl/libs/VX_stream_buffer.sv +++ b/hw/rtl/libs/VX_stream_buffer.sv @@ -85,8 +85,8 @@ module VX_stream_buffer #( end else begin - reg [DATAW-1:0] shift_reg [1:0]; - reg [1:0] fifo_state; + reg [1:0][DATAW-1:0] shift_reg; + reg [1:0] fifo_state; wire fire_in = valid_in && ready_in; wire fire_out = valid_out && ready_out; From 3b336d7fb3ef638b141934bf02a8ad15b25d8671 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 16:59:18 -0700 Subject: [PATCH 168/488] register vs combinational signals naming consistency --- hw/rtl/cache/VX_cache_bank.sv | 8 +- hw/rtl/cache/VX_cache_bypass.sv | 20 ++-- hw/rtl/core/VX_csr_data.sv | 152 ++++++++++++++-------------- hw/rtl/core/VX_decode.sv | 20 ++-- hw/rtl/core/VX_gather_unit.sv | 20 ++-- hw/rtl/core/VX_lsu_slice.sv | 22 ++--- hw/rtl/fpu/VX_fpu_dsp.sv | 10 +- hw/rtl/libs/VX_onehot_encoder.sv | 12 +-- hw/rtl/libs/VX_onehot_mux.sv | 110 ++++++++++----------- hw/rtl/libs/VX_popcount.sv | 60 +++++------ hw/rtl/libs/VX_priority_encoder.sv | 16 +-- hw/rtl/libs/VX_rr_arbiter.sv | 154 ++++++++++++++--------------- 12 files changed, 302 insertions(+), 302 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 3dede22d5..22d956dba 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -414,12 +414,12 @@ module VX_cache_bank #( wire [LINE_SIZE-1:0] dirty_byteen_st1; if (`CS_WORDS_PER_LINE > 1) begin - reg [LINE_SIZE-1:0] write_byteen_r; + reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen_w; always @(*) begin - write_byteen_r = '0; - write_byteen_r[wsel_st1 * WORD_SIZE +: WORD_SIZE] = byteen_st1; + write_byteen_w = '0; + write_byteen_w[wsel_st1] = byteen_st1; end - assign write_byteen_st1 = write_byteen_r; + assign write_byteen_st1 = write_byteen_w; end else begin assign write_byteen_st1 = byteen_st1; end diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index b2aeb8791..dc88c6c1f 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -158,21 +158,21 @@ module VX_cache_bypass #( wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0]; if (WORDS_PER_LINE > 1) begin - reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_r; - reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r; + reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_w; + reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_w; wire [WSEL_BITS-1:0] req_wsel = core_req_nc_sel_addr[WSEL_BITS-1:0]; always @(*) begin - mem_req_byteen_in_r = '0; - mem_req_byteen_in_r[req_wsel] = core_req_nc_sel_byteen; + mem_req_byteen_in_w = '0; + mem_req_byteen_in_w[req_wsel] = core_req_nc_sel_byteen; - mem_req_data_in_r = 'x; - mem_req_data_in_r[req_wsel] = core_req_nc_sel_data; + mem_req_data_in_w = 'x; + mem_req_data_in_w[req_wsel] = core_req_nc_sel_data; end - assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_r; - assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_r; + assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_w; + assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_w; if (NUM_REQS > 1) begin assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id}); end else begin @@ -268,10 +268,10 @@ module VX_cache_bypass #( assign rsp_idx = 1'b0; end - wire [NUM_REQS-1:0] rsp_nc_valid_r = NUM_REQS'(is_mem_rsp_nc) << rsp_idx; + wire [NUM_REQS-1:0] rsp_nc_valid = NUM_REQS'(is_mem_rsp_nc) << rsp_idx; for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i]; + assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid[i]; assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; end diff --git a/hw/rtl/core/VX_csr_data.sv b/hw/rtl/core/VX_csr_data.sv index a2b0741ad..aa9b30e05 100644 --- a/hw/rtl/core/VX_csr_data.sv +++ b/hw/rtl/core/VX_csr_data.sv @@ -155,41 +155,41 @@ import VX_fpu_pkg::*; // CSRs read ////////////////////////////////////////////////////////////// - reg [`XLEN-1:0] read_data_ro_r; - reg [`XLEN-1:0] read_data_rw_r; - reg read_addr_valid_r; + reg [`XLEN-1:0] read_data_ro_w; + reg [`XLEN-1:0] read_data_rw_w; + reg read_addr_valid_w; always @(*) begin - read_data_ro_r = '0; - read_data_rw_r = '0; - read_addr_valid_r = 1; + read_data_ro_w = '0; + read_data_rw_w = '0; + read_addr_valid_w = 1; case (read_addr) - `VX_CSR_MVENDORID : read_data_ro_r = `XLEN'(`VENDOR_ID); - `VX_CSR_MARCHID : read_data_ro_r = `XLEN'(`ARCHITECTURE_ID); - `VX_CSR_MIMPID : read_data_ro_r = `XLEN'(`IMPLEMENTATION_ID); - `VX_CSR_MISA : read_data_ro_r = `XLEN'({2'(`CLOG2(`XLEN/16)), 30'(`MISA_STD)}); + `VX_CSR_MVENDORID : read_data_ro_w = `XLEN'(`VENDOR_ID); + `VX_CSR_MARCHID : read_data_ro_w = `XLEN'(`ARCHITECTURE_ID); + `VX_CSR_MIMPID : read_data_ro_w = `XLEN'(`IMPLEMENTATION_ID); + `VX_CSR_MISA : read_data_ro_w = `XLEN'({2'(`CLOG2(`XLEN/16)), 30'(`MISA_STD)}); `ifdef EXT_F_ENABLE - `VX_CSR_FFLAGS : read_data_rw_r = `XLEN'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]); - `VX_CSR_FRM : read_data_rw_r = `XLEN'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]); - `VX_CSR_FCSR : read_data_rw_r = `XLEN'(fcsr[read_wid]); + `VX_CSR_FFLAGS : read_data_rw_w = `XLEN'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]); + `VX_CSR_FRM : read_data_rw_w = `XLEN'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]); + `VX_CSR_FCSR : read_data_rw_w = `XLEN'(fcsr[read_wid]); `endif - `VX_CSR_MSCRATCH : read_data_rw_r = mscratch; + `VX_CSR_MSCRATCH : read_data_rw_w = mscratch; - `VX_CSR_WARP_ID : read_data_ro_r = `XLEN'(read_wid); - `VX_CSR_CORE_ID : read_data_ro_r = `XLEN'(CORE_ID); - `VX_CSR_ACTIVE_THREADS: read_data_ro_r = `XLEN'(thread_masks[read_wid]); - `VX_CSR_ACTIVE_WARPS: read_data_ro_r = `XLEN'(active_warps); - `VX_CSR_NUM_THREADS: read_data_ro_r = `XLEN'(`NUM_THREADS); - `VX_CSR_NUM_WARPS : read_data_ro_r = `XLEN'(`NUM_WARPS); - `VX_CSR_NUM_CORES : read_data_ro_r = `XLEN'(`NUM_CORES * `NUM_CLUSTERS); - `VX_CSR_LOCAL_MEM_BASE: read_data_ro_r = `XLEN'(`LMEM_BASE_ADDR); + `VX_CSR_WARP_ID : read_data_ro_w = `XLEN'(read_wid); + `VX_CSR_CORE_ID : read_data_ro_w = `XLEN'(CORE_ID); + `VX_CSR_ACTIVE_THREADS: read_data_ro_w = `XLEN'(thread_masks[read_wid]); + `VX_CSR_ACTIVE_WARPS: read_data_ro_w = `XLEN'(active_warps); + `VX_CSR_NUM_THREADS: read_data_ro_w = `XLEN'(`NUM_THREADS); + `VX_CSR_NUM_WARPS : read_data_ro_w = `XLEN'(`NUM_WARPS); + `VX_CSR_NUM_CORES : read_data_ro_w = `XLEN'(`NUM_CORES * `NUM_CLUSTERS); + `VX_CSR_LOCAL_MEM_BASE: read_data_ro_w = `XLEN'(`LMEM_BASE_ADDR); - `CSR_READ_64(`VX_CSR_MCYCLE, read_data_ro_r, cycles); + `CSR_READ_64(`VX_CSR_MCYCLE, read_data_ro_w, cycles); - `VX_CSR_MPM_RESERVED : read_data_ro_r = 'x; - `VX_CSR_MPM_RESERVED_H : read_data_ro_r = 'x; + `VX_CSR_MPM_RESERVED : read_data_ro_w = 'x; + `VX_CSR_MPM_RESERVED_H : read_data_ro_w = 'x; - `CSR_READ_64(`VX_CSR_MINSTRET, read_data_ro_r, commit_csr_if.instret); + `CSR_READ_64(`VX_CSR_MINSTRET, read_data_ro_w, commit_csr_if.instret); `VX_CSR_SATP, `VX_CSR_MSTATUS, @@ -200,77 +200,77 @@ import VX_fpu_pkg::*; `VX_CSR_MTVEC, `VX_CSR_MEPC, `VX_CSR_PMPCFG0, - `VX_CSR_PMPADDR0 : read_data_ro_r = `XLEN'(0); + `VX_CSR_PMPADDR0 : read_data_ro_w = `XLEN'(0); default: begin - read_addr_valid_r = 0; + read_addr_valid_w = 0; if ((read_addr >= `VX_CSR_MPM_USER && read_addr < (`VX_CSR_MPM_USER + 32)) || (read_addr >= `VX_CSR_MPM_USER_H && read_addr < (`VX_CSR_MPM_USER_H + 32))) begin - read_addr_valid_r = 1; + read_addr_valid_w = 1; `ifdef PERF_ENABLE case (base_dcrs.mpm_class) `VX_DCR_MPM_CLASS_CORE: begin case (read_addr) // PERF: pipeline - `CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_r, pipeline_perf_if.sched.idles); - `CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_r, pipeline_perf_if.sched.stalls); - `CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_r, pipeline_perf_if.issue.ibf_stalls); - `CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_r, pipeline_perf_if.issue.scb_stalls); - `CSR_READ_64(`VX_CSR_MPM_OPDS_ST, read_data_ro_r, pipeline_perf_if.issue.opd_stalls); - `CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_ALU]); + `CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_w, pipeline_perf_if.sched.idles); + `CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_w, pipeline_perf_if.sched.stalls); + `CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_w, pipeline_perf_if.issue.ibf_stalls); + `CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_w, pipeline_perf_if.issue.scb_stalls); + `CSR_READ_64(`VX_CSR_MPM_OPDS_ST, read_data_ro_w, pipeline_perf_if.issue.opd_stalls); + `CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_ALU]); `ifdef EXT_F_ENABLE - `CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_FPU]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_FPU]); `else - `CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, `PERF_CTR_BITS'(0)); + `CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_w, `PERF_CTR_BITS'(0)); `endif - `CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_LSU]); - `CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_SFU]); - `CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_CSRS]); - `CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_WCTL]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_LSU]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_SFU]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_w, pipeline_perf_if.issue.sfu_uses[`SFU_CSRS]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_w, pipeline_perf_if.issue.sfu_uses[`SFU_WCTL]); // PERF: memory - `CSR_READ_64(`VX_CSR_MPM_IFETCHES, read_data_ro_r, pipeline_perf_if.ifetches); - `CSR_READ_64(`VX_CSR_MPM_LOADS, read_data_ro_r, pipeline_perf_if.loads); - `CSR_READ_64(`VX_CSR_MPM_STORES, read_data_ro_r, pipeline_perf_if.stores); - `CSR_READ_64(`VX_CSR_MPM_IFETCH_LT, read_data_ro_r, pipeline_perf_if.ifetch_latency); - `CSR_READ_64(`VX_CSR_MPM_LOAD_LT, read_data_ro_r, pipeline_perf_if.load_latency); + `CSR_READ_64(`VX_CSR_MPM_IFETCHES, read_data_ro_w, pipeline_perf_if.ifetches); + `CSR_READ_64(`VX_CSR_MPM_LOADS, read_data_ro_w, pipeline_perf_if.loads); + `CSR_READ_64(`VX_CSR_MPM_STORES, read_data_ro_w, pipeline_perf_if.stores); + `CSR_READ_64(`VX_CSR_MPM_IFETCH_LT, read_data_ro_w, pipeline_perf_if.ifetch_latency); + `CSR_READ_64(`VX_CSR_MPM_LOAD_LT, read_data_ro_w, pipeline_perf_if.load_latency); default:; endcase end `VX_DCR_MPM_CLASS_MEM: begin case (read_addr) // PERF: icache - `CSR_READ_64(`VX_CSR_MPM_ICACHE_READS, read_data_ro_r, mem_perf_if.icache.reads); - `CSR_READ_64(`VX_CSR_MPM_ICACHE_MISS_R, read_data_ro_r, mem_perf_if.icache.read_misses); - `CSR_READ_64(`VX_CSR_MPM_ICACHE_MSHR_ST, read_data_ro_r, mem_perf_if.icache.mshr_stalls); + `CSR_READ_64(`VX_CSR_MPM_ICACHE_READS, read_data_ro_w, mem_perf_if.icache.reads); + `CSR_READ_64(`VX_CSR_MPM_ICACHE_MISS_R, read_data_ro_w, mem_perf_if.icache.read_misses); + `CSR_READ_64(`VX_CSR_MPM_ICACHE_MSHR_ST, read_data_ro_w, mem_perf_if.icache.mshr_stalls); // PERF: dcache - `CSR_READ_64(`VX_CSR_MPM_DCACHE_READS, read_data_ro_r, mem_perf_if.dcache.reads); - `CSR_READ_64(`VX_CSR_MPM_DCACHE_WRITES, read_data_ro_r, mem_perf_if.dcache.writes); - `CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_R, read_data_ro_r, mem_perf_if.dcache.read_misses); - `CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_W, read_data_ro_r, mem_perf_if.dcache.write_misses); - `CSR_READ_64(`VX_CSR_MPM_DCACHE_BANK_ST, read_data_ro_r, mem_perf_if.dcache.bank_stalls); - `CSR_READ_64(`VX_CSR_MPM_DCACHE_MSHR_ST, read_data_ro_r, mem_perf_if.dcache.mshr_stalls); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_READS, read_data_ro_w, mem_perf_if.dcache.reads); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_WRITES, read_data_ro_w, mem_perf_if.dcache.writes); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_R, read_data_ro_w, mem_perf_if.dcache.read_misses); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_W, read_data_ro_w, mem_perf_if.dcache.write_misses); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_BANK_ST, read_data_ro_w, mem_perf_if.dcache.bank_stalls); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_MSHR_ST, read_data_ro_w, mem_perf_if.dcache.mshr_stalls); // PERF: lmem - `CSR_READ_64(`VX_CSR_MPM_LMEM_READS, read_data_ro_r, mem_perf_if.lmem.reads); - `CSR_READ_64(`VX_CSR_MPM_LMEM_WRITES, read_data_ro_r, mem_perf_if.lmem.writes); - `CSR_READ_64(`VX_CSR_MPM_LMEM_BANK_ST, read_data_ro_r, mem_perf_if.lmem.bank_stalls); + `CSR_READ_64(`VX_CSR_MPM_LMEM_READS, read_data_ro_w, mem_perf_if.lmem.reads); + `CSR_READ_64(`VX_CSR_MPM_LMEM_WRITES, read_data_ro_w, mem_perf_if.lmem.writes); + `CSR_READ_64(`VX_CSR_MPM_LMEM_BANK_ST, read_data_ro_w, mem_perf_if.lmem.bank_stalls); // PERF: l2cache - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_READS, read_data_ro_r, mem_perf_if.l2cache.reads); - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_WRITES, read_data_ro_r, mem_perf_if.l2cache.writes); - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_R, read_data_ro_r, mem_perf_if.l2cache.read_misses); - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_W, read_data_ro_r, mem_perf_if.l2cache.write_misses); - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_BANK_ST, read_data_ro_r, mem_perf_if.l2cache.bank_stalls); - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MSHR_ST, read_data_ro_r, mem_perf_if.l2cache.mshr_stalls); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_READS, read_data_ro_w, mem_perf_if.l2cache.reads); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_WRITES, read_data_ro_w, mem_perf_if.l2cache.writes); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_R, read_data_ro_w, mem_perf_if.l2cache.read_misses); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_W, read_data_ro_w, mem_perf_if.l2cache.write_misses); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_BANK_ST, read_data_ro_w, mem_perf_if.l2cache.bank_stalls); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MSHR_ST, read_data_ro_w, mem_perf_if.l2cache.mshr_stalls); // PERF: l3cache - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_READS, read_data_ro_r, mem_perf_if.l3cache.reads); - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_WRITES, read_data_ro_r, mem_perf_if.l3cache.writes); - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_R, read_data_ro_r, mem_perf_if.l3cache.read_misses); - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_W, read_data_ro_r, mem_perf_if.l3cache.write_misses); - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_BANK_ST, read_data_ro_r, mem_perf_if.l3cache.bank_stalls); - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MSHR_ST, read_data_ro_r, mem_perf_if.l3cache.mshr_stalls); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_READS, read_data_ro_w, mem_perf_if.l3cache.reads); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_WRITES, read_data_ro_w, mem_perf_if.l3cache.writes); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_R, read_data_ro_w, mem_perf_if.l3cache.read_misses); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_W, read_data_ro_w, mem_perf_if.l3cache.write_misses); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_BANK_ST, read_data_ro_w, mem_perf_if.l3cache.bank_stalls); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MSHR_ST, read_data_ro_w, mem_perf_if.l3cache.mshr_stalls); // PERF: memory - `CSR_READ_64(`VX_CSR_MPM_MEM_READS, read_data_ro_r, mem_perf_if.mem.reads); - `CSR_READ_64(`VX_CSR_MPM_MEM_WRITES, read_data_ro_r, mem_perf_if.mem.writes); - `CSR_READ_64(`VX_CSR_MPM_MEM_LT, read_data_ro_r, mem_perf_if.mem.latency); + `CSR_READ_64(`VX_CSR_MPM_MEM_READS, read_data_ro_w, mem_perf_if.mem.reads); + `CSR_READ_64(`VX_CSR_MPM_MEM_WRITES, read_data_ro_w, mem_perf_if.mem.writes); + `CSR_READ_64(`VX_CSR_MPM_MEM_LT, read_data_ro_w, mem_perf_if.mem.latency); default:; endcase end @@ -282,12 +282,12 @@ import VX_fpu_pkg::*; endcase end - assign read_data_ro = read_data_ro_r; - assign read_data_rw = read_data_rw_r; + assign read_data_ro = read_data_ro_w; + assign read_data_rw = read_data_rw_w; `UNUSED_VAR (base_dcrs) - `RUNTIME_ASSERT(~read_enable || read_addr_valid_r, ("%t: *** invalid CSR read address: 0x%0h (#%0d)", $time, read_addr, read_uuid)) + `RUNTIME_ASSERT(~read_enable || read_addr_valid_w, ("%t: *** invalid CSR read address: 0x%0h (#%0d)", $time, read_addr, read_uuid)) `ifdef PERF_ENABLE `UNUSED_VAR (mem_perf_if.icache); diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index 4f6ffe100..de317d497 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -15,15 +15,15 @@ `ifdef EXT_F_ENABLE `define USED_IREG(x) \ - x``_r = {1'b0, ``x}; \ + x``_v = {1'b0, ``x}; \ use_``x = 1 `define USED_FREG(x) \ - x``_r = {1'b1, ``x}; \ + x``_v = {1'b1, ``x}; \ use_``x = 1 `else `define USED_IREG(x) \ - x``_r = ``x; \ + x``_v = ``x; \ use_``x = 1 `endif @@ -50,7 +50,7 @@ module VX_decode import VX_gpu_pkg::*; #( reg [`EX_BITS-1:0] ex_type; reg [`INST_OP_BITS-1:0] op_type; op_args_t op_args; - reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r; + reg [`NR_BITS-1:0] rd_v, rs1_v, rs2_v, rs3_v; reg use_rd, use_rs1, use_rs2, use_rs3; reg is_wstall; @@ -155,10 +155,10 @@ module VX_decode import VX_gpu_pkg::*; #( ex_type = '0; op_type = 'x; op_args = 'x; - rd_r = '0; - rs1_r = '0; - rs2_r = '0; - rs3_r = '0; + rd_v = '0; + rs1_v = '0; + rs2_v = '0; + rs3_v = '0; use_rd = 0; use_rs1 = 0; use_rs2 = 0; @@ -527,7 +527,7 @@ module VX_decode import VX_gpu_pkg::*; #( end // disable write to integer register r0 - wire wb = use_rd && (rd_r != 0); + wire wb = use_rd && (rd_v != 0); VX_elastic_buffer #( .DATAW (DATAW), @@ -537,7 +537,7 @@ module VX_decode import VX_gpu_pkg::*; #( .reset (reset), .valid_in (fetch_if.valid), .ready_in (fetch_if.ready), - .data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_args, wb, rd_r, rs1_r, rs2_r, rs3_r}), + .data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_args, wb, rd_v, rs1_v, rs2_v, rs3_v}), .data_out ({decode_if.data.uuid, decode_if.data.wid, decode_if.data.tmask, decode_if.data.PC, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3}), .valid_out (decode_if.valid), .ready_out (decode_if.ready) diff --git a/hw/rtl/core/VX_gather_unit.sv b/hw/rtl/core/VX_gather_unit.sv index 293495eba..402824dac 100644 --- a/hw/rtl/core/VX_gather_unit.sv +++ b/hw/rtl/core/VX_gather_unit.sv @@ -94,31 +94,31 @@ module VX_gather_unit import VX_gpu_pkg::*; #( .ready_out (commit_tmp_if.ready) ); - logic [`NUM_THREADS-1:0] commit_tmask_r; - logic [`NUM_THREADS-1:0][`XLEN-1:0] commit_data_r; + logic [`NUM_THREADS-1:0] commit_tmask_w; + logic [`NUM_THREADS-1:0][`XLEN-1:0] commit_data_w; if (PID_BITS != 0) begin always @(*) begin - commit_tmask_r = '0; - commit_data_r = 'x; + commit_tmask_w = '0; + commit_data_w = 'x; for (integer j = 0; j < NUM_LANES; ++j) begin - commit_tmask_r[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.tmask[j]; - commit_data_r[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.data[j]; + commit_tmask_w[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.tmask[j]; + commit_data_w[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.data[j]; end end end else begin - assign commit_tmask_r = commit_tmp_if.data.tmask; - assign commit_data_r = commit_tmp_if.data.data; + assign commit_tmask_w = commit_tmp_if.data.tmask; + assign commit_data_w = commit_tmp_if.data.data; end assign commit_out_if[i].valid = commit_tmp_if.valid; assign commit_out_if[i].data = { commit_tmp_if.data.uuid, commit_tmp_if.data.wid, - commit_tmask_r, + commit_tmask_w, commit_tmp_if.data.PC, commit_tmp_if.data.wb, commit_tmp_if.data.rd, - commit_data_r, + commit_data_w, 1'b0, // PID commit_tmp_if.data.sop, commit_tmp_if.data.eop diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index f83b23fb3..8c277f3e9 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -158,30 +158,30 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( // byte enable formatting for (genvar i = 0; i < NUM_LANES; ++i) begin - reg [LSU_WORD_SIZE-1:0] mem_req_byteen_r; + reg [LSU_WORD_SIZE-1:0] mem_req_byteen_w; always @(*) begin - mem_req_byteen_r = '0; + mem_req_byteen_w = '0; case (`INST_LSU_WSIZE(execute_if.data.op_type)) 0: begin // 8-bit - mem_req_byteen_r[req_align[i]] = 1'b1; + mem_req_byteen_w[req_align[i]] = 1'b1; end 1: begin // 16 bit - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1; - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1; end `ifdef XLEN_64 2: begin // 32 bit - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1; - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1; - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1; - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1; end `endif // 3: 64 bit - default : mem_req_byteen_r = {LSU_WORD_SIZE{1'b1}}; + default : mem_req_byteen_w = {LSU_WORD_SIZE{1'b1}}; endcase end - assign mem_req_byteen[i] = mem_req_byteen_r; + assign mem_req_byteen[i] = mem_req_byteen_w; end // memory misalignment not supported! diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index 2e479976a..c75e3e3fd 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -315,15 +315,15 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( for (genvar i = 0; i < NUM_LANES; ++i) begin `ifdef FPU_RV64F - reg [`XLEN-1:0] result_r; + reg [`XLEN-1:0] result_w; always @(*) begin case (op_ret_int_out) - 2'b11: result_r = `XLEN'($signed(result_s[i])); - 2'b01: result_r = {32'h00000000, result_s[i]}; - default: result_r = {32'hffffffff, result_s[i]}; + 2'b11: result_w = `XLEN'($signed(result_s[i])); + 2'b01: result_w = {32'h00000000, result_s[i]}; + default: result_w = {32'hffffffff, result_s[i]}; endcase end - assign result[i] = result_r; + assign result[i] = result_w; `else assign result[i] = result_s[i]; `endif diff --git a/hw/rtl/libs/VX_onehot_encoder.sv b/hw/rtl/libs/VX_onehot_encoder.sv index 8f7ada257..6246a673c 100644 --- a/hw/rtl/libs/VX_onehot_encoder.sv +++ b/hw/rtl/libs/VX_onehot_encoder.sv @@ -87,29 +87,29 @@ module VX_onehot_encoder #( end else begin - reg [LN-1:0] index_r; + reg [LN-1:0] index_w; if (REVERSE != 0) begin always @(*) begin - index_r = 'x; + index_w = 'x; for (integer i = N-1; i >= 0; --i) begin if (data_in[i]) begin - index_r = LN'(N-1-i); + index_w = LN'(N-1-i); end end end end else begin always @(*) begin - index_r = 'x; + index_w = 'x; for (integer i = 0; i < N; ++i) begin if (data_in[i]) begin - index_r = LN'(i); + index_w = LN'(i); end end end end - assign data_out = index_r; + assign data_out = index_w; assign valid_out = (| data_in); end diff --git a/hw/rtl/libs/VX_onehot_mux.sv b/hw/rtl/libs/VX_onehot_mux.sv index 74e19a41b..e13186015 100644 --- a/hw/rtl/libs/VX_onehot_mux.sv +++ b/hw/rtl/libs/VX_onehot_mux.sv @@ -31,86 +31,86 @@ module VX_onehot_mux #( `UNUSED_VAR (sel_in) assign data_out = sel_in[0] ? data_in[0] : data_in[1]; end else if (LUT_OPT && N == 3) begin - reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 3'b001: data_out_r = data_in[0]; - 3'b010: data_out_r = data_in[1]; - 3'b100: data_out_r = data_in[2]; - default: data_out_r = 'x; + 3'b001: data_out_w = data_in[0]; + 3'b010: data_out_w = data_in[1]; + 3'b100: data_out_w = data_in[2]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; + assign data_out = data_out_w; end else if (LUT_OPT && N == 4) begin - reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 4'b0001: data_out_r = data_in[0]; - 4'b0010: data_out_r = data_in[1]; - 4'b0100: data_out_r = data_in[2]; - 4'b1000: data_out_r = data_in[3]; - default: data_out_r = 'x; + 4'b0001: data_out_w = data_in[0]; + 4'b0010: data_out_w = data_in[1]; + 4'b0100: data_out_w = data_in[2]; + 4'b1000: data_out_w = data_in[3]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; + assign data_out = data_out_w; end else if (LUT_OPT && N == 5) begin - reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 5'b00001: data_out_r = data_in[0]; - 5'b00010: data_out_r = data_in[1]; - 5'b00100: data_out_r = data_in[2]; - 5'b01000: data_out_r = data_in[3]; - 5'b10000: data_out_r = data_in[4]; - default: data_out_r = 'x; + 5'b00001: data_out_w = data_in[0]; + 5'b00010: data_out_w = data_in[1]; + 5'b00100: data_out_w = data_in[2]; + 5'b01000: data_out_w = data_in[3]; + 5'b10000: data_out_w = data_in[4]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; + assign data_out = data_out_w; end else if (LUT_OPT && N == 6) begin - reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 6'b000001: data_out_r = data_in[0]; - 6'b000010: data_out_r = data_in[1]; - 6'b000100: data_out_r = data_in[2]; - 6'b001000: data_out_r = data_in[3]; - 6'b010000: data_out_r = data_in[4]; - 6'b100000: data_out_r = data_in[5]; - default: data_out_r = 'x; + 6'b000001: data_out_w = data_in[0]; + 6'b000010: data_out_w = data_in[1]; + 6'b000100: data_out_w = data_in[2]; + 6'b001000: data_out_w = data_in[3]; + 6'b010000: data_out_w = data_in[4]; + 6'b100000: data_out_w = data_in[5]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; + assign data_out = data_out_w; end else if (LUT_OPT && N == 7) begin - reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 7'b0000001: data_out_r = data_in[0]; - 7'b0000010: data_out_r = data_in[1]; - 7'b0000100: data_out_r = data_in[2]; - 7'b0001000: data_out_r = data_in[3]; - 7'b0010000: data_out_r = data_in[4]; - 7'b0100000: data_out_r = data_in[5]; - 7'b1000000: data_out_r = data_in[6]; - default: data_out_r = 'x; + 7'b0000001: data_out_w = data_in[0]; + 7'b0000010: data_out_w = data_in[1]; + 7'b0000100: data_out_w = data_in[2]; + 7'b0001000: data_out_w = data_in[3]; + 7'b0010000: data_out_w = data_in[4]; + 7'b0100000: data_out_w = data_in[5]; + 7'b1000000: data_out_w = data_in[6]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; + assign data_out = data_out_w; end else if (LUT_OPT && N == 8) begin - reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 8'b00000001: data_out_r = data_in[0]; - 8'b00000010: data_out_r = data_in[1]; - 8'b00000100: data_out_r = data_in[2]; - 8'b00001000: data_out_r = data_in[3]; - 8'b00010000: data_out_r = data_in[4]; - 8'b00100000: data_out_r = data_in[5]; - 8'b01000000: data_out_r = data_in[6]; - 8'b10000000: data_out_r = data_in[7]; - default: data_out_r = 'x; + 8'b00000001: data_out_w = data_in[0]; + 8'b00000010: data_out_w = data_in[1]; + 8'b00000100: data_out_w = data_in[2]; + 8'b00001000: data_out_w = data_in[3]; + 8'b00010000: data_out_w = data_in[4]; + 8'b00100000: data_out_w = data_in[5]; + 8'b01000000: data_out_w = data_in[6]; + 8'b10000000: data_out_w = data_in[7]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; + assign data_out = data_out_w; end else if (MODEL == 1) begin wire [N-1:0][DATAW-1:0] mask; for (genvar i = 0; i < N; ++i) begin @@ -134,16 +134,16 @@ module VX_onehot_mux #( `UNUSED_PIN (valid_out) ); end else if (MODEL == 3) begin - reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] data_out_w; always @(*) begin - data_out_r = 'x; + data_out_w = 'x; for (integer i = 0; i < N; ++i) begin if (sel_in[i]) begin - data_out_r = data_in[i]; + data_out_w = data_in[i]; end end end - assign data_out = data_out_r; + assign data_out = data_out_w; end endmodule diff --git a/hw/rtl/libs/VX_popcount.sv b/hw/rtl/libs/VX_popcount.sv index eaec78789..3d94dd00f 100644 --- a/hw/rtl/libs/VX_popcount.sv +++ b/hw/rtl/libs/VX_popcount.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,21 +21,21 @@ module VX_popcount63( reg [2:0] sum; always @(*) begin case (data_in) - 6'd0: sum=3'd0; 6'd1: sum=3'd1; 6'd2: sum=3'd1; 6'd3: sum=3'd2; + 6'd0: sum=3'd0; 6'd1: sum=3'd1; 6'd2: sum=3'd1; 6'd3: sum=3'd2; 6'd4: sum=3'd1; 6'd5: sum=3'd2; 6'd6: sum=3'd2; 6'd7: sum=3'd3; - 6'd8: sum=3'd1; 6'd9: sum=3'd2; 6'd10: sum=3'd2; 6'd11: sum=3'd3; + 6'd8: sum=3'd1; 6'd9: sum=3'd2; 6'd10: sum=3'd2; 6'd11: sum=3'd3; 6'd12: sum=3'd2; 6'd13: sum=3'd3; 6'd14: sum=3'd3; 6'd15: sum=3'd4; - 6'd16: sum=3'd1; 6'd17: sum=3'd2; 6'd18: sum=3'd2; 6'd19: sum=3'd3; + 6'd16: sum=3'd1; 6'd17: sum=3'd2; 6'd18: sum=3'd2; 6'd19: sum=3'd3; 6'd20: sum=3'd2; 6'd21: sum=3'd3; 6'd22: sum=3'd3; 6'd23: sum=3'd4; - 6'd24: sum=3'd2; 6'd25: sum=3'd3; 6'd26: sum=3'd3; 6'd27: sum=3'd4; + 6'd24: sum=3'd2; 6'd25: sum=3'd3; 6'd26: sum=3'd3; 6'd27: sum=3'd4; 6'd28: sum=3'd3; 6'd29: sum=3'd4; 6'd30: sum=3'd4; 6'd31: sum=3'd5; - 6'd32: sum=3'd1; 6'd33: sum=3'd2; 6'd34: sum=3'd2; 6'd35: sum=3'd3; + 6'd32: sum=3'd1; 6'd33: sum=3'd2; 6'd34: sum=3'd2; 6'd35: sum=3'd3; 6'd36: sum=3'd2; 6'd37: sum=3'd3; 6'd38: sum=3'd3; 6'd39: sum=3'd4; - 6'd40: sum=3'd2; 6'd41: sum=3'd3; 6'd42: sum=3'd3; 6'd43: sum=3'd4; + 6'd40: sum=3'd2; 6'd41: sum=3'd3; 6'd42: sum=3'd3; 6'd43: sum=3'd4; 6'd44: sum=3'd3; 6'd45: sum=3'd4; 6'd46: sum=3'd4; 6'd47: sum=3'd5; - 6'd48: sum=3'd2; 6'd49: sum=3'd3; 6'd50: sum=3'd3; 6'd51: sum=3'd4; + 6'd48: sum=3'd2; 6'd49: sum=3'd3; 6'd50: sum=3'd3; 6'd51: sum=3'd4; 6'd52: sum=3'd3; 6'd53: sum=3'd4; 6'd54: sum=3'd4; 6'd55: sum=3'd5; - 6'd56: sum=3'd3; 6'd57: sum=3'd4; 6'd58: sum=3'd4; 6'd59: sum=3'd5; + 6'd56: sum=3'd3; 6'd57: sum=3'd4; 6'd58: sum=3'd4; 6'd59: sum=3'd5; 6'd60: sum=3'd4; 6'd61: sum=3'd5; 6'd62: sum=3'd5; 6'd63: sum=3'd6; endcase end @@ -49,7 +49,7 @@ module VX_popcount32( reg [1:0] sum; always @(*) begin case (data_in) - 3'd0: sum=2'd0; 3'd1: sum=2'd1; 3'd2: sum=2'd1; 3'd3: sum=2'd2; + 3'd0: sum=2'd0; 3'd1: sum=2'd1; 3'd2: sum=2'd1; 3'd3: sum=2'd2; 3'd4: sum=2'd1; 3'd5: sum=2'd2; 3'd6: sum=2'd2; 3'd7: sum=2'd3; endcase end @@ -88,12 +88,12 @@ endmodule module VX_popcount #( parameter MODEL = 1, parameter N = 1, - parameter M = `CLOG2(N+1) + parameter M = `CLOG2(N+1) ) ( input wire [N-1:0] data_in, output wire [M-1:0] data_out ); - `UNUSED_PARAM (MODEL) + `UNUSED_PARAM (MODEL) `ifndef SYNTHESIS assign data_out = $countones(data_in); @@ -113,10 +113,10 @@ module VX_popcount #( t_in[N-1:0] = data_in; end VX_popcount32 pc32(t_in, t_out); - assign data_out = t_out[M-1:0]; - + assign data_out = t_out[M-1:0]; + end else if (N <= 6) begin - + reg [5:0] t_in; wire [2:0] t_out; always @(*) begin @@ -125,9 +125,9 @@ module VX_popcount #( end VX_popcount63 pc63(t_in, t_out); assign data_out = t_out[M-1:0]; - + end else if (N <= 9) begin - + reg [8:0] t_in; wire [4:0] t1_out; wire [3:0] t2_out; @@ -141,7 +141,7 @@ module VX_popcount #( assign data_out = t2_out[M-1:0]; end else if (N <= 12) begin - + reg [11:0] t_in; wire [5:0] t1_out; wire [3:0] t2_out; @@ -155,7 +155,7 @@ module VX_popcount #( assign data_out = t2_out[M-1:0]; end else if (N <= 18) begin - + reg [17:0] t_in; wire [8:0] t1_out; wire [5:0] t2_out; @@ -177,17 +177,17 @@ module VX_popcount #( localparam LOGPN = `CLOG2(PN); `IGNORE_UNOPTFLAT_BEGIN - wire [M-1:0] tmp [LOGPN-1:0][PN-1:0]; + wire [M-1:0] tmp [LOGPN-1:0][PN-1:0]; `IGNORE_UNOPTFLAT_END for (genvar j = 0; j < LOGPN; ++j) begin localparam D = j + 1; localparam Q = (D < LOGPN) ? (D + 1) : M; - for (genvar i = 0; i < (1 << (LOGPN-j-1)); ++i) begin + for (genvar i = 0; i < (1 << (LOGPN-j-1)); ++i) begin localparam l = i * 2; localparam r = i * 2 + 1; - wire [Q-1:0] res; - if (j == 0) begin + wire [Q-1:0] res; + if (j == 0) begin if (r < N) begin assign res = data_in[l] + data_in[r]; end else if (l < N) begin @@ -203,20 +203,20 @@ module VX_popcount #( end assign data_out = tmp[LOGPN-1][0]; - + end else begin - reg [M-1:0] cnt_r; + reg [M-1:0] cnt_w; always @(*) begin - cnt_r = '0; + cnt_w = '0; for (integer i = 0; i < N; ++i) begin - cnt_r = cnt_r + M'(data_in[i]); + cnt_w = cnt_w + M'(data_in[i]); end end - assign data_out = cnt_r; - + assign data_out = cnt_w; + end `endif diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index 2138ea457..3dc5291ee 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -106,22 +106,22 @@ module VX_priority_encoder #( end else begin - reg [LN-1:0] index_r; - reg [N-1:0] onehot_r; + reg [LN-1:0] index_w; + reg [N-1:0] onehot_w; always @(*) begin - index_r = 'x; - onehot_r = 'x; + index_w = 'x; + onehot_w = 'x; for (integer i = N-1; i >= 0; --i) begin if (reversed[i]) begin - index_r = LN'(i); - onehot_r = N'(1) << i; + index_w = LN'(i); + onehot_w = N'(1) << i; end end end - assign index_out = index_r; - assign onehot_out = onehot_r; + assign index_out = index_w; + assign onehot_out = onehot_w; assign valid_out = (| reversed); end diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index bbfd8269d..6199d5794 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -40,16 +40,16 @@ module VX_rr_arbiter #( end else if (LUT_OPT && NUM_REQS == 2) begin - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 3'b0_01, - 3'b1_?1: begin grant_index_r = LOG_NUM_REQS'(0); end + 3'b1_?1: begin grant_index_w = LOG_NUM_REQS'(0); end 3'b0_1?, - 3'b1_10: begin grant_index_r = LOG_NUM_REQS'(1); end - default: begin grant_index_r = 'x; end + 3'b1_10: begin grant_index_w = LOG_NUM_REQS'(1); end + default: begin grant_index_w = 'x; end endcase end @@ -57,31 +57,31 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = NUM_REQS'(1) << grant_index_r; + assign grant_index = grant_index_w; + assign grant_onehot = NUM_REQS'(1) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 3) begin - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 5'b00_001, 5'b01_0?1, - 5'b10_??1: begin grant_index_r = LOG_NUM_REQS'(0); end + 5'b10_??1: begin grant_index_w = LOG_NUM_REQS'(0); end 5'b00_?1?, 5'b01_010, - 5'b10_?10: begin grant_index_r = LOG_NUM_REQS'(1); end + 5'b10_?10: begin grant_index_w = LOG_NUM_REQS'(1); end 5'b00_10?, 5'b01_1??, - 5'b10_100: begin grant_index_r = LOG_NUM_REQS'(2); end - default: begin grant_index_r = 'x; end + 5'b10_100: begin grant_index_w = LOG_NUM_REQS'(2); end + default: begin grant_index_w = 'x; end endcase end @@ -89,38 +89,38 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = NUM_REQS'(1) << grant_index_r; + assign grant_index = grant_index_w; + assign grant_onehot = NUM_REQS'(1) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 4) begin - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 6'b00_0001, 6'b01_00?1, 6'b10_0??1, - 6'b11_???1: begin grant_index_r = LOG_NUM_REQS'(0); end + 6'b11_???1: begin grant_index_w = LOG_NUM_REQS'(0); end 6'b00_??1?, 6'b01_0010, 6'b10_0?10, - 6'b11_??10: begin grant_index_r = LOG_NUM_REQS'(1); end + 6'b11_??10: begin grant_index_w = LOG_NUM_REQS'(1); end 6'b00_?10?, 6'b01_?1??, 6'b10_0100, - 6'b11_?100: begin grant_index_r = LOG_NUM_REQS'(2); end + 6'b11_?100: begin grant_index_w = LOG_NUM_REQS'(2); end 6'b00_100?, 6'b01_10??, 6'b10_1???, - 6'b11_1000: begin grant_index_r = LOG_NUM_REQS'(3); end - default: begin grant_index_r = 'x; end + 6'b11_1000: begin grant_index_w = LOG_NUM_REQS'(3); end + default: begin grant_index_w = 'x; end endcase end @@ -128,18 +128,18 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = NUM_REQS'(1) << grant_index_r; + assign grant_index = grant_index_w; + assign grant_onehot = NUM_REQS'(1) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 5) begin - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) @@ -147,28 +147,28 @@ module VX_rr_arbiter #( 8'b001_000?1, 8'b010_00??1, 8'b011_0???1, - 8'b100_????1: begin grant_index_r = LOG_NUM_REQS'(0); end + 8'b100_????1: begin grant_index_w = LOG_NUM_REQS'(0); end 8'b000_???1?, 8'b001_00010, 8'b010_00?10, 8'b011_0??10, - 8'b100_???10: begin grant_index_r = LOG_NUM_REQS'(1); end + 8'b100_???10: begin grant_index_w = LOG_NUM_REQS'(1); end 8'b000_??10?, 8'b001_??1??, 8'b010_00100, 8'b011_0?100, - 8'b100_??100: begin grant_index_r = LOG_NUM_REQS'(2); end + 8'b100_??100: begin grant_index_w = LOG_NUM_REQS'(2); end 8'b000_?100?, 8'b001_?10??, 8'b010_?1???, 8'b011_01000, - 8'b100_?1000: begin grant_index_r = LOG_NUM_REQS'(3); end + 8'b100_?1000: begin grant_index_w = LOG_NUM_REQS'(3); end 8'b000_1000?, 8'b001_100??, 8'b010_10???, 8'b011_1????, - 8'b100_10000: begin grant_index_r = LOG_NUM_REQS'(4); end - default: begin grant_index_r = 'x; end + 8'b100_10000: begin grant_index_w = LOG_NUM_REQS'(4); end + default: begin grant_index_w = 'x; end endcase end @@ -176,18 +176,18 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = NUM_REQS'(1) << grant_index_r; + assign grant_index = grant_index_w; + assign grant_onehot = NUM_REQS'(1) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 6) begin - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) @@ -196,38 +196,38 @@ module VX_rr_arbiter #( 9'b010_000??1, 9'b011_00???1, 9'b100_0????1, - 9'b101_?????1: begin grant_index_r = LOG_NUM_REQS'(0); end + 9'b101_?????1: begin grant_index_w = LOG_NUM_REQS'(0); end 9'b000_????1?, 9'b001_000010, 9'b010_000?10, 9'b011_00??10, 9'b100_0???10, - 9'b101_????10: begin grant_index_r = LOG_NUM_REQS'(1); end + 9'b101_????10: begin grant_index_w = LOG_NUM_REQS'(1); end 9'b000_???10?, 9'b001_???1??, 9'b010_000100, 9'b011_00?100, 9'b100_0??100, - 9'b101_???100: begin grant_index_r = LOG_NUM_REQS'(2); end + 9'b101_???100: begin grant_index_w = LOG_NUM_REQS'(2); end 9'b000_??100?, 9'b001_??10??, 9'b010_??1???, 9'b011_001000, 9'b100_0?1000, - 9'b101_??1000: begin grant_index_r = LOG_NUM_REQS'(3); end + 9'b101_??1000: begin grant_index_w = LOG_NUM_REQS'(3); end 9'b000_?1000?, 9'b001_?100??, 9'b010_?10???, 9'b011_?1????, 9'b100_010000, - 9'b101_?10000: begin grant_index_r = LOG_NUM_REQS'(4); end + 9'b101_?10000: begin grant_index_w = LOG_NUM_REQS'(4); end 9'b000_10000?, 9'b001_1000??, 9'b010_100???, 9'b011_10????, 9'b100_1?????, - 9'b101_100000: begin grant_index_r = LOG_NUM_REQS'(5); end - default: begin grant_index_r = 'x; end + 9'b101_100000: begin grant_index_w = LOG_NUM_REQS'(5); end + default: begin grant_index_w = 'x; end endcase end @@ -235,18 +235,18 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = NUM_REQS'(1) << grant_index_r; + assign grant_index = grant_index_w; + assign grant_onehot = NUM_REQS'(1) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 7) begin - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) @@ -256,50 +256,50 @@ module VX_rr_arbiter #( 10'b011_000???1, 10'b100_000???1, 10'b101_00????1, - 10'b110_??????1: begin grant_index_r = LOG_NUM_REQS'(0); end + 10'b110_??????1: begin grant_index_w = LOG_NUM_REQS'(0); end 10'b000_?????1?, 10'b001_0000010, 10'b010_0000?10, 10'b011_000??10, 10'b100_00???10, 10'b101_0????10, - 10'b110_?????10: begin grant_index_r = LOG_NUM_REQS'(1); end + 10'b110_?????10: begin grant_index_w = LOG_NUM_REQS'(1); end 10'b000_????10?, 10'b001_????1??, 10'b010_0000100, 10'b011_000?100, 10'b100_00??100, 10'b101_0???100, - 10'b110_????100: begin grant_index_r = LOG_NUM_REQS'(2); end + 10'b110_????100: begin grant_index_w = LOG_NUM_REQS'(2); end 10'b000_???100?, 10'b001_???10??, 10'b010_???1???, 10'b011_0001000, 10'b100_00?1000, 10'b101_0??1000, - 10'b110_???1000: begin grant_index_r = LOG_NUM_REQS'(3); end + 10'b110_???1000: begin grant_index_w = LOG_NUM_REQS'(3); end 10'b000_??1000?, 10'b001_??100??, 10'b010_??10???, 10'b011_??1????, 10'b100_0010000, 10'b101_0?10000, - 10'b110_??10000: begin grant_index_r = LOG_NUM_REQS'(4); end + 10'b110_??10000: begin grant_index_w = LOG_NUM_REQS'(4); end 10'b000_?10000?, 10'b001_?1000??, 10'b010_?100???, 10'b011_?10????, 10'b100_?1?????, 10'b101_0100000, - 10'b110_?100000: begin grant_index_r = LOG_NUM_REQS'(5); end + 10'b110_?100000: begin grant_index_w = LOG_NUM_REQS'(5); end 10'b000_100000?, 10'b001_10000??, 10'b010_1000???, 10'b011_100????, 10'b100_10?????, 10'b101_1??????, - 10'b110_1000000: begin grant_index_r = LOG_NUM_REQS'(6); end - default: begin grant_index_r = 'x; end + 10'b110_1000000: begin grant_index_w = LOG_NUM_REQS'(6); end + default: begin grant_index_w = 'x; end endcase end @@ -307,18 +307,18 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = NUM_REQS'(1) << grant_index_r; + assign grant_index = grant_index_w; + assign grant_onehot = NUM_REQS'(1) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 8) begin - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) @@ -329,7 +329,7 @@ module VX_rr_arbiter #( 11'b100_000????1, 11'b101_00?????1, 11'b110_0??????1, - 11'b111_???????1: begin grant_index_r = LOG_NUM_REQS'(0); end + 11'b111_???????1: begin grant_index_w = LOG_NUM_REQS'(0); end 11'b000_??????1?, 11'b001_00000010, 11'b010_00000?10, @@ -337,7 +337,7 @@ module VX_rr_arbiter #( 11'b100_000???10, 11'b101_00????10, 11'b110_0?????10, - 11'b111_??????10: begin grant_index_r = LOG_NUM_REQS'(1); end + 11'b111_??????10: begin grant_index_w = LOG_NUM_REQS'(1); end 11'b000_?????10?, 11'b001_?????1??, 11'b010_00000100, @@ -345,7 +345,7 @@ module VX_rr_arbiter #( 11'b100_000??100, 11'b101_00???100, 11'b110_0????100, - 11'b111_?????100: begin grant_index_r = LOG_NUM_REQS'(2); end + 11'b111_?????100: begin grant_index_w = LOG_NUM_REQS'(2); end 11'b000_????100?, 11'b001_????10??, 11'b010_????1???, @@ -353,7 +353,7 @@ module VX_rr_arbiter #( 11'b100_000?1000, 11'b101_00??1000, 11'b110_0???1000, - 11'b111_????1000: begin grant_index_r = LOG_NUM_REQS'(3); end + 11'b111_????1000: begin grant_index_w = LOG_NUM_REQS'(3); end 11'b000_???1000?, 11'b001_???100??, 11'b010_???10???, @@ -361,7 +361,7 @@ module VX_rr_arbiter #( 11'b100_00010000, 11'b101_00?10000, 11'b110_0??10000, - 11'b111_???10000: begin grant_index_r = LOG_NUM_REQS'(4); end + 11'b111_???10000: begin grant_index_w = LOG_NUM_REQS'(4); end 11'b000_??10000?, 11'b001_??1000??, 11'b010_??100???, @@ -369,7 +369,7 @@ module VX_rr_arbiter #( 11'b100_??1?????, 11'b101_00100000, 11'b110_0?100000, - 11'b111_??100000: begin grant_index_r = LOG_NUM_REQS'(5); end + 11'b111_??100000: begin grant_index_w = LOG_NUM_REQS'(5); end 11'b000_?100000?, 11'b001_?10000??, 11'b010_?1000???, @@ -377,7 +377,7 @@ module VX_rr_arbiter #( 11'b100_?10?????, 11'b101_?1??????, 11'b110_01000000, - 11'b111_?1000000: begin grant_index_r = LOG_NUM_REQS'(6); end + 11'b111_?1000000: begin grant_index_w = LOG_NUM_REQS'(6); end 11'b000_1000000?, 11'b001_100000??, 11'b010_10000???, @@ -385,8 +385,8 @@ module VX_rr_arbiter #( 11'b100_100?????, 11'b101_10??????, 11'b110_1???????, - 11'b111_10000000: begin grant_index_r = LOG_NUM_REQS'(7); end - default: begin grant_index_r = 'x; end + 11'b111_10000000: begin grant_index_w = LOG_NUM_REQS'(7); end + default: begin grant_index_w = 'x; end endcase end @@ -394,12 +394,12 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = NUM_REQS'(1) << grant_index_r; + assign grant_index = grant_index_w; + assign grant_onehot = NUM_REQS'(1) << grant_index_w; assign grant_valid = (| requests); end else if (MODEL == 1) begin From 383dc1f6b8d0b87e665b50865acb6f2cf3525d12 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 17:38:01 -0700 Subject: [PATCH 169/488] timing optimization --- hw/rtl/core/VX_alu_unit.sv | 2 +- hw/rtl/core/VX_fpu_unit.sv | 2 +- hw/rtl/core/VX_lsu_unit.sv | 2 +- hw/rtl/core/VX_sfu_unit.sv | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index adbc7898b..7ab808c70 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -43,7 +43,7 @@ module VX_alu_unit #( VX_dispatch_unit #( .BLOCK_SIZE (BLOCK_SIZE), .NUM_LANES (NUM_LANES), - .OUT_BUF (PARTIAL_BW ? 1 : 0) + .OUT_BUF (PARTIAL_BW ? 3 : 0) ) dispatch_unit ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_fpu_unit.sv b/hw/rtl/core/VX_fpu_unit.sv index 127ba9755..c13055ab7 100644 --- a/hw/rtl/core/VX_fpu_unit.sv +++ b/hw/rtl/core/VX_fpu_unit.sv @@ -41,7 +41,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( VX_dispatch_unit #( .BLOCK_SIZE (BLOCK_SIZE), .NUM_LANES (NUM_LANES), - .OUT_BUF (PARTIAL_BW ? 1 : 0) + .OUT_BUF (PARTIAL_BW ? 3 : 0) ) dispatch_unit ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index febaec5aa..425f1aeee 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -42,7 +42,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( VX_dispatch_unit #( .BLOCK_SIZE (BLOCK_SIZE), .NUM_LANES (NUM_LANES), - .OUT_BUF (1) + .OUT_BUF (3) ) dispatch_unit ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_sfu_unit.sv b/hw/rtl/core/VX_sfu_unit.sv index 5ef4211d0..a77520866 100644 --- a/hw/rtl/core/VX_sfu_unit.sv +++ b/hw/rtl/core/VX_sfu_unit.sv @@ -58,7 +58,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( VX_dispatch_unit #( .BLOCK_SIZE (BLOCK_SIZE), .NUM_LANES (NUM_LANES), - .OUT_BUF (1) + .OUT_BUF (3) ) dispatch_unit ( .clk (clk), .reset (reset), From e05fe0d75bcb90e80de6c6b0b4955e6f54e5f6b4 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 18:11:06 -0700 Subject: [PATCH 170/488] dispatch_unit speed up --- hw/rtl/core/VX_dispatch_unit.sv | 40 +++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/hw/rtl/core/VX_dispatch_unit.sv b/hw/rtl/core/VX_dispatch_unit.sv index 3c84649bd..3281dd9f9 100644 --- a/hw/rtl/core/VX_dispatch_unit.sv +++ b/hw/rtl/core/VX_dispatch_unit.sv @@ -55,7 +55,6 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( assign dispatch_if[i].ready = dispatch_ready[i]; end - wire [BLOCK_SIZE-1:0][ISSUE_W-1:0] issue_indices; wire [BLOCK_SIZE-1:0] block_ready; wire [BLOCK_SIZE-1:0][NUM_LANES-1:0] block_tmask; wire [BLOCK_SIZE-1:0][2:0][NUM_LANES-1:0][`XLEN-1:0] block_regs; @@ -66,25 +65,42 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( wire batch_done = (& block_done); + // batch select logic + logic [BATCH_COUNT_W-1:0] batch_idx; if (BATCH_COUNT != 1) begin - always @(posedge clk) begin - if (reset) begin - batch_idx <= '0; - end else begin - batch_idx <= batch_idx + BATCH_COUNT_W'(batch_done); - end + + wire [BATCH_COUNT-1:0] valid_batches; + for (genvar i = 0; i < BATCH_COUNT; ++i) begin + assign valid_batches[i] = | dispatch_valid[i * BLOCK_SIZE +: BLOCK_SIZE]; end + + VX_generic_arbiter #( + .NUM_REQS (BATCH_COUNT), + .TYPE ("P") + ) batch_sel ( + .clk (clk), + .reset (reset), + .requests (valid_batches), + .grant_index (batch_idx), + `UNUSED_PIN (grant_onehot), + `UNUSED_PIN (grant_valid), + .grant_ready (batch_done) + ); + end else begin assign batch_idx = 0; `UNUSED_VAR (batch_done) end + wire [BLOCK_SIZE-1:0][ISSUE_W-1:0] issue_indices; + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin + assign issue_indices[block_idx] = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx); + end + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin - wire [ISSUE_W-1:0] issue_idx = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx); - assign issue_indices[block_idx] = issue_idx; - + wire [ISSUE_W-1:0] issue_idx = issue_indices[block_idx]; wire valid_p, ready_p; if (`NUM_THREADS != NUM_LANES) begin @@ -246,8 +262,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( reg [`ISSUE_WIDTH-1:0] ready_in; always @(*) begin ready_in = 0; - for (integer i = 0; i < BLOCK_SIZE; ++i) begin - ready_in[issue_indices[i]] = block_ready[i] && block_eop[i]; + for (integer block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin + ready_in[issue_indices[block_idx]] = block_ready[block_idx] && block_eop[block_idx]; end end assign dispatch_ready = ready_in; From e538dfa3164523c59a018afed04aef2e2dd21e4e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 19:11:06 -0700 Subject: [PATCH 171/488] minor update --- hw/rtl/VX_define.vh | 8 -------- hw/rtl/core/VX_mem_unit.sv | 4 ++-- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 8050ad6fc..9a8d81c67 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -390,14 +390,6 @@ assign src.rsp_data.tag = dst.rsp_data.tag[TD-1 -: TS]; \ assign dst.rsp_ready = src.rsp_ready -`define ASSIGN_VX_LSU_MEM_IF(dst, src) \ - assign dst.req_valid = src.req_valid; \ - assign dst.req_data = src.req_data; \ - assign src.req_ready = dst.req_ready; \ - assign src.rsp_valid = dst.rsp_valid; \ - assign src.rsp_data = dst.rsp_data; \ - assign dst.rsp_ready = src.rsp_ready - `define BUFFER_DCR_BUS_IF(dst, src, enable) \ if (enable) begin \ reg [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __dst; \ diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 6569c1d47..7a7e9e2db 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -118,7 +118,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( assign lmem_perf = '0; `endif for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - `ASSIGN_VX_LSU_MEM_IF (lsu_dcache_if[i], lsu_mem_if[i]); + `ASSIGN_VX_MEM_BUS_IF (lsu_dcache_if[i], lsu_mem_if[i]); end `endif @@ -190,7 +190,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( end else begin for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - `ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if[i], lsu_dcache_if[i]); + `ASSIGN_VX_MEM_BUS_IF (dcache_coalesced_if[i], lsu_dcache_if[i]); end end From 592297582e7786da4981e32ee2ba579991c5c9f6 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 19:44:03 -0700 Subject: [PATCH 172/488] fpu_unit timing optimization --- hw/rtl/core/VX_dispatch_unit.sv | 14 ++++++++++- hw/rtl/core/VX_fpu_unit.sv | 42 ++++++++++++++++++++++++++------- 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/hw/rtl/core/VX_dispatch_unit.sv b/hw/rtl/core/VX_dispatch_unit.sv index 3281dd9f9..5e6893e97 100644 --- a/hw/rtl/core/VX_dispatch_unit.sv +++ b/hw/rtl/core/VX_dispatch_unit.sv @@ -233,6 +233,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( wire [`NW_WIDTH-1:0] block_wid = wis_to_wid(dispatch_data[issue_idx][DATA_TMASK_OFF+`NUM_THREADS +: ISSUE_WIS_W], isw); + logic [OUT_DATAW-1:0] execute_data, execute_data_w; + VX_elastic_buffer #( .DATAW (OUT_DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -253,10 +255,20 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( block_pid[block_idx], block_sop[block_idx], block_eop[block_idx]}), - .data_out (execute_if[block_idx].data), + .data_out (execute_data), .valid_out (execute_if[block_idx].valid), .ready_out (execute_if[block_idx].ready) ); + + if (`NUM_THREADS != NUM_LANES) begin + assign execute_data_w = execute_data; + end else begin + always @(*) begin + execute_data_w = execute_data; + execute_data_w[2:0] = {1'b0, 1'b1, 1'b1}; // default pid, sop, and eop + end + end + assign execute_if[block_idx].data = execute_data_w; end reg [`ISSUE_WIDTH-1:0] ready_in; diff --git a/hw/rtl/core/VX_fpu_unit.sv b/hw/rtl/core/VX_fpu_unit.sv index c13055ab7..ae36e4b22 100644 --- a/hw/rtl/core/VX_fpu_unit.sv +++ b/hw/rtl/core/VX_fpu_unit.sv @@ -71,9 +71,9 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( wire [NUM_LANES-1:0] fpu_rsp_tmask; wire [`PC_BITS-1:0] fpu_rsp_PC; wire [`NR_BITS-1:0] fpu_rsp_rd; - wire [PID_WIDTH-1:0] fpu_rsp_pid; - wire fpu_rsp_sop; - wire fpu_rsp_eop; + wire [PID_WIDTH-1:0] fpu_rsp_pid, fpu_rsp_pid_u; + wire fpu_rsp_sop, fpu_rsp_sop_u; + wire fpu_rsp_eop, fpu_rsp_eop_u; wire [TAG_WIDTH-1:0] fpu_req_tag, fpu_rsp_tag; wire mdata_full; @@ -93,13 +93,26 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .acquire_en (execute_fire), .write_addr (fpu_req_tag), .write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}), - .read_data ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}), + .read_data ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_pid_u, fpu_rsp_sop_u, fpu_rsp_eop_u}), .read_addr (fpu_rsp_tag), .release_en (fpu_rsp_fire), .full (mdata_full), `UNUSED_PIN (empty) ); + if (PID_BITS != 0) begin + assign fpu_rsp_pid = fpu_rsp_pid_u; + assign fpu_rsp_sop = fpu_rsp_sop_u; + assign fpu_rsp_eop = fpu_rsp_eop_u; + end else begin + `UNUSED_VAR (fpu_rsp_pid_u) + `UNUSED_VAR (fpu_rsp_sop_u) + `UNUSED_VAR (fpu_rsp_eop_u) + assign fpu_rsp_pid = 0; + assign fpu_rsp_sop = 1; + assign fpu_rsp_eop = 1; + end + // resolve dynamic FRM from CSR wire [`INST_FRM_BITS-1:0] fpu_req_frm; `ASSIGN_BLOCKED_WID (fpu_csr_if[block_idx].read_wid, per_block_execute_if[block_idx].data.wid, block_idx, `NUM_FPU_BLOCKS) @@ -200,8 +213,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( `endif - // handle FPU response - + // handle CSR update fflags_t fpu_rsp_fflags_q; if (PID_BITS != 0) begin @@ -218,9 +230,21 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( assign fpu_rsp_fflags_q = fpu_rsp_fflags; end - assign fpu_csr_if[block_idx].write_enable = fpu_rsp_fire && fpu_rsp_eop && fpu_rsp_has_fflags; - `ASSIGN_BLOCKED_WID (fpu_csr_if[block_idx].write_wid, fpu_rsp_wid, block_idx, `NUM_FPU_BLOCKS) - assign fpu_csr_if[block_idx].write_fflags = fpu_rsp_fflags_q; + VX_fpu_csr_if fpu_csr_tmp_if(); + assign fpu_csr_tmp_if.write_enable = fpu_rsp_fire && fpu_rsp_eop && fpu_rsp_has_fflags; + `ASSIGN_BLOCKED_WID (fpu_csr_tmp_if.write_wid, fpu_rsp_wid, block_idx, `NUM_FPU_BLOCKS) + assign fpu_csr_tmp_if.write_fflags = fpu_rsp_fflags_q; + + VX_pipe_register #( + .DATAW (1 + `NW_WIDTH + $bits(fflags_t)), + .RESETW (1) + ) fpu_csr_reg ( + .clk (clk), + .reset (reset), + .enable (1'b1), + .data_in ({fpu_csr_tmp_if.write_enable, fpu_csr_tmp_if.write_wid, fpu_csr_tmp_if.write_fflags}), + .data_out ({fpu_csr_if[block_idx].write_enable, fpu_csr_if[block_idx].write_wid, fpu_csr_if[block_idx].write_fflags}) + ); // send response From b6879b25e33a45c747b9888d8ebbb1d927634046 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 24 Aug 2024 20:46:25 -0700 Subject: [PATCH 173/488] switching to python3 dependency --- ci/travis_run.py | 2 +- hw/unittest/common.mk | 2 +- kernel/scripts/vxbin.py | 2 +- miscs/docker/Dockerfile.ubuntu | 1 - sim/opaesim/Makefile | 2 +- sim/rtlsim/Makefile | 2 +- sim/xrtsim/Makefile | 2 +- 7 files changed, 6 insertions(+), 7 deletions(-) diff --git a/ci/travis_run.py b/ci/travis_run.py index 907cf5ce4..70459cbee 100755 --- a/ci/travis_run.py +++ b/ci/travis_run.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Copyright 2019-2023 # diff --git a/hw/unittest/common.mk b/hw/unittest/common.mk index 48aefd415..71f6914bf 100644 --- a/hw/unittest/common.mk +++ b/hw/unittest/common.mk @@ -25,7 +25,7 @@ VL_FLAGS += $(RTL_PKGS) VL_FLAGS += --cc $(TOP) --top-module $(TOP) # Enable Verilator multithreaded simulation -THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())') +THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(mp.cpu_count())') VL_FLAGS += -j $(THREADS) #VL_FLAGS += --threads $(THREADS) diff --git a/kernel/scripts/vxbin.py b/kernel/scripts/vxbin.py index 501d8949a..1dcd6a099 100755 --- a/kernel/scripts/vxbin.py +++ b/kernel/scripts/vxbin.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Copyright 2019-2023 # diff --git a/miscs/docker/Dockerfile.ubuntu b/miscs/docker/Dockerfile.ubuntu index f3a864ce5..64bb5813d 100644 --- a/miscs/docker/Dockerfile.ubuntu +++ b/miscs/docker/Dockerfile.ubuntu @@ -21,7 +21,6 @@ ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y \ software-properties-common \ build-essential \ - python \ python3 \ git \ wget \ diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index 9c6314ecf..32182d5a8 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -79,7 +79,7 @@ VL_FLAGS += $(RTL_PKGS) CXXFLAGS += $(CONFIGS) # Enable Verilator multithreaded simulation -THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())') +THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(mp.cpu_count())') VL_FLAGS += -j $(THREADS) #VL_FLAGS += --threads $(THREADS) diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 638d7403f..2f38ae1f2 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -61,7 +61,7 @@ VL_FLAGS += --cc $(TOP) --top-module $(TOP) CXXFLAGS += $(CONFIGS) # Enable Verilator multithreaded simulation -THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())') +THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(mp.cpu_count())') VL_FLAGS += -j $(THREADS) #VL_FLAGS += --threads $(THREADS) diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 1e0d11b66..c63fe3d56 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -78,7 +78,7 @@ VL_FLAGS += $(RTL_PKGS) CXXFLAGS += $(CONFIGS) # Enable Verilator multithreaded simulation -THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())') +THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(mp.cpu_count())') VL_FLAGS += -j $(THREADS) #VL_FLAGS += --threads $(THREADS) From bdcc5f59913e6b8bbdd682223a24bb5f584012fe Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Aug 2024 05:11:48 -0700 Subject: [PATCH 174/488] FPU decode optimization --- hw/rtl/VX_define.vh | 29 +++++++------- hw/rtl/VX_gpu_pkg.sv | 79 ++++++++++++++++++++------------------ hw/rtl/core/VX_decode.sv | 26 +++++++++---- hw/rtl/fpu/VX_fpu_cvt.sv | 4 +- hw/rtl/fpu/VX_fpu_div.sv | 4 +- hw/rtl/fpu/VX_fpu_dpi.sv | 74 +++++++++++++++-------------------- hw/rtl/fpu/VX_fpu_dsp.sv | 40 +++++++++---------- hw/rtl/fpu/VX_fpu_fma.sv | 4 +- hw/rtl/fpu/VX_fpu_fpnew.sv | 13 ++----- hw/rtl/fpu/VX_fpu_ncp.sv | 4 +- hw/rtl/fpu/VX_fpu_sqrt.sv | 4 +- 11 files changed, 134 insertions(+), 147 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 9a8d81c67..861d9f28c 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -227,22 +227,19 @@ `define INST_FENCE_D 1'h0 `define INST_FENCE_I 1'h1 -`define INST_FPU_ADD 4'b0000 -`define INST_FPU_SUB 4'b0001 -`define INST_FPU_MUL 4'b0010 -`define INST_FPU_DIV 4'b0011 -`define INST_FPU_SQRT 4'b0100 -`define INST_FPU_CMP 4'b0101 // frm: LE=0, LT=1, EQ=2 -`define INST_FPU_F2F 4'b0110 -`define INST_FPU_MISC 4'b0111 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7 -`define INST_FPU_F2I 4'b1000 -`define INST_FPU_F2U 4'b1001 -`define INST_FPU_I2F 4'b1010 -`define INST_FPU_U2F 4'b1011 -`define INST_FPU_MADD 4'b1100 -`define INST_FPU_MSUB 4'b1101 -`define INST_FPU_NMSUB 4'b1110 -`define INST_FPU_NMADD 4'b1111 +`define INST_FPU_ADD 4'b0000 // SUB=fmt[1] +`define INST_FPU_MUL 4'b0001 +`define INST_FPU_MADD 4'b0010 // SUB=fmt[1] +`define INST_FPU_NMADD 4'b0011 // SUB=fmt[1] +`define INST_FPU_DIV 4'b0100 +`define INST_FPU_SQRT 4'b0101 +`define INST_FPU_F2I 4'b1000 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1 +`define INST_FPU_F2U 4'b1001 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1 +`define INST_FPU_I2F 4'b1010 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1 +`define INST_FPU_U2F 4'b1011 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1 +`define INST_FPU_CMP 4'b1100 // frm: LE=0, LT=1, EQ=2 +`define INST_FPU_F2F 4'b1101 // fmt[0]: F32=0, F64=1 +`define INST_FPU_MISC 4'b1110 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7 `define INST_FPU_BITS 4 `define INST_FPU_IS_CLASS(op, frm) (op == `INST_FPU_MISC && frm == 3) `define INST_FPU_IS_MVXW(op, frm) (op == `INST_FPU_MISC && frm == 4) diff --git a/hw/rtl/VX_gpu_pkg.sv b/hw/rtl/VX_gpu_pkg.sv index f29067855..f94714d06 100644 --- a/hw/rtl/VX_gpu_pkg.sv +++ b/hw/rtl/VX_gpu_pkg.sv @@ -464,61 +464,64 @@ package VX_gpu_pkg; `EX_FPU: begin case (`INST_FPU_BITS'(op_type)) `INST_FPU_ADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FADD.D")); - else - `TRACE(level, ("FADD.S")); + if (op_args.fpu.fmt[1]) begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FSUB.D")); + else + `TRACE(level, ("FSUB.S")); + end else begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FADD.D")); + else + `TRACE(level, ("FADD.S")); + end end - `INST_FPU_SUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FSUB.D")); - else - `TRACE(level, ("FSUB.S")); + `INST_FPU_MADD: begin + if (op_args.fpu.fmt[1]) begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FMSUB.D")); + else + `TRACE(level, ("FMSUB.S")); + end else begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FMADD.D")); + else + `TRACE(level, ("FMADD.S")); + end + end + `INST_FPU_NMADD: begin + if (op_args.fpu.fmt[1]) begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FNMSUB.D")); + else + `TRACE(level, ("FNMSUB.S")); + end else begin + if (op_args.fpu.fmt[0]) + `TRACE(level, ("FNMADD.D")); + else + `TRACE(level, ("FNMADD.S")); + end end `INST_FPU_MUL: begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) `TRACE(level, ("FMUL.D")); else `TRACE(level, ("FMUL.S")); end `INST_FPU_DIV: begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) `TRACE(level, ("FDIV.D")); else `TRACE(level, ("FDIV.S")); end `INST_FPU_SQRT: begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) `TRACE(level, ("FSQRT.D")); else `TRACE(level, ("FSQRT.S")); end - `INST_FPU_MADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMADD.D")); - else - `TRACE(level, ("FMADD.S")); - end - `INST_FPU_MSUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMSUB.D")); - else - `TRACE(level, ("FMSUB.S")); - end - `INST_FPU_NMADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMADD.D")); - else - `TRACE(level, ("FNMADD.S")); - end - `INST_FPU_NMSUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMSUB.D")); - else - `TRACE(level, ("FNMSUB.S")); - end `INST_FPU_CMP: begin - if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[0]) begin case (op_args.fpu.frm[1:0]) 0: `TRACE(level, ("FLE.D")); 1: `TRACE(level, ("FLT.D")); @@ -602,7 +605,7 @@ package VX_gpu_pkg; end end `INST_FPU_MISC: begin - if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[0]) begin case (op_args.fpu.frm) 0: `TRACE(level, ("FSGNJ.D")); 1: `TRACE(level, ("FSGNJN.D")); diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index de317d497..d3ca4d6e4 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -376,14 +376,16 @@ module VX_decode import VX_gpu_pkg::*; #( `USED_IREG (rs2); end `ifdef EXT_F_ENABLE - `INST_FMADD, - `INST_FMSUB, - `INST_FNMSUB, - `INST_FNMADD: begin + `INST_FMADD, // 7'b1000011 + `INST_FMSUB, // 7'b1000111 + `INST_FNMSUB, // 7'b1001011 + `INST_FNMADD: // 7'b1001111 + begin ex_type = `EX_FPU; - op_type = `INST_OP_BITS'({2'b11, opcode[3:2]}); + op_type = `INST_OP_BITS'({2'b00, 1'b1, opcode[3]}); op_args.fpu.frm = func3; op_args.fpu.fmt[0] = func2[0]; // float / double + op_args.fpu.fmt[1] = opcode[3] ^ opcode[2]; // SUB use_rd = 1; `USED_FREG (rd); `USED_FREG (rs1); @@ -399,9 +401,10 @@ module VX_decode import VX_gpu_pkg::*; #( case (func5) 5'b00000, // FADD 5'b00001, // FSUB - 5'b00010, // FMUL - 5'b00011: begin // FDIV - op_type = `INST_OP_BITS'(func5[1:0]); + 5'b00010: // FMUL + begin + op_type = `INST_OP_BITS'({2'b00, 1'b0, func5[1]}); + op_args.fpu.fmt[1] = func5[0]; // SUB `USED_FREG (rd); `USED_FREG (rs1); `USED_FREG (rs2); @@ -430,6 +433,13 @@ module VX_decode import VX_gpu_pkg::*; #( `USED_FREG (rs1); end `endif + 5'b00011: begin + // FDIV + op_type = `INST_OP_BITS'(`INST_FPU_DIV); + `USED_FREG (rd); + `USED_FREG (rs1); + `USED_FREG (rs2); + end 5'b01011: begin // FSQRT op_type = `INST_OP_BITS'(`INST_FPU_SQRT); diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index 1b6617c60..7587f8342 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -73,8 +73,8 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), - .pe_data_in (pe_data_in), - .pe_data_out(pe_data_out), + .pe_data_out(pe_data_in), + .pe_data_in (pe_data_out), .valid_out (valid_out), .data_out (data_out), .tag_out ({mask_out, tag_out}), diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index 44b5bedfb..68138bb7c 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -77,8 +77,8 @@ module VX_fpu_div import VX_fpu_pkg::*; #( .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), - .pe_data_in (pe_data_in), - .pe_data_out(pe_data_out), + .pe_data_out(pe_data_in), + .pe_data_in (pe_data_out), .valid_out (valid_out), .data_out (data_out), .tag_out ({mask_out, tag_out}), diff --git a/hw/rtl/fpu/VX_fpu_dpi.sv b/hw/rtl/fpu/VX_fpu_dpi.sv index 67022e8fd..0ba7d54f3 100644 --- a/hw/rtl/fpu/VX_fpu_dpi.sv +++ b/hw/rtl/fpu/VX_fpu_dpi.sv @@ -76,7 +76,6 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( reg is_fadd, is_fsub, is_fmul, is_fmadd, is_fmsub, is_fnmadd, is_fnmsub; reg is_div, is_fcmp, is_itof, is_utof, is_ftoi, is_ftou, is_f2f; - reg dst_fmt, int_fmt; reg [NUM_LANES-1:0][63:0] operands [3]; @@ -88,7 +87,8 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( end end - `UNUSED_VAR (fmt) + wire f_fmt = fmt[0]; + wire i_fmt = fmt[1]; always @(*) begin is_fadd = 0; @@ -106,25 +106,11 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( is_ftou = 0; is_f2f = 0; - dst_fmt = 0; - int_fmt = 0; - - `ifdef FLEN_64 - dst_fmt = fmt[0]; - `endif - - `ifdef XLEN_64 - int_fmt = fmt[1]; - `endif - case (op_type) - `INST_FPU_ADD: begin core_select = FPU_FMA; is_fadd = 1; end - `INST_FPU_SUB: begin core_select = FPU_FMA; is_fsub = 1; end + `INST_FPU_ADD: begin core_select = FPU_FMA; is_fadd = ~i_fmt; is_fsub = i_fmt; end + `INST_FPU_MADD: begin core_select = FPU_FMA; is_fmadd = ~i_fmt; is_fmsub = i_fmt; end + `INST_FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = ~i_fmt; is_fnmsub = i_fmt; end `INST_FPU_MUL: begin core_select = FPU_FMA; is_fmul = 1; end - `INST_FPU_MADD: begin core_select = FPU_FMA; is_fmadd = 1; end - `INST_FPU_MSUB: begin core_select = FPU_FMA; is_fmsub = 1; end - `INST_FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = 1; end - `INST_FPU_NMSUB: begin core_select = FPU_FMA; is_fnmsub = 1; end `INST_FPU_DIV: begin core_select = FPU_DIVSQRT; is_div = 1; end `INST_FPU_SQRT: begin core_select = FPU_DIVSQRT; end `INST_FPU_CMP: begin core_select = FPU_NCP; is_fcmp = 1; end @@ -164,13 +150,13 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin - dpi_fadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fadd[i], fflags_fadd[i]); - dpi_fsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fsub[i], fflags_fsub[i]); - dpi_fmul (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fmul[i], fflags_fmul[i]); - dpi_fmadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmadd[i], fflags_fmadd[i]); - dpi_fmsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmsub[i], fflags_fmsub[i]); - dpi_fnmadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmadd[i], fflags_fnmadd[i]); - dpi_fnmsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmsub[i], fflags_fnmsub[i]); + dpi_fadd (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fadd[i], fflags_fadd[i]); + dpi_fsub (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fsub[i], fflags_fsub[i]); + dpi_fmul (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fmul[i], fflags_fmul[i]); + dpi_fmadd (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmadd[i], fflags_fmadd[i]); + dpi_fmsub (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmsub[i], fflags_fmsub[i]); + dpi_fnmadd (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmadd[i], fflags_fnmadd[i]); + dpi_fnmsub (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmsub[i], fflags_fnmsub[i]); result_fma[i] = is_fadd ? result_fadd[i][`XLEN-1:0] : is_fsub ? result_fsub[i][`XLEN-1:0] : @@ -226,7 +212,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin - dpi_fdiv (fdiv_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fdiv[i], fflags_fdiv[i]); + dpi_fdiv (fdiv_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fdiv[i], fflags_fdiv[i]); result_fdiv_r[i] = result_fdiv[i][`XLEN-1:0]; end end @@ -265,7 +251,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin - dpi_fsqrt (fsqrt_fire, int'(dst_fmt), operands[0][i], frm, result_fsqrt[i], fflags_fsqrt[i]); + dpi_fsqrt (fsqrt_fire, int'(f_fmt), operands[0][i], frm, result_fsqrt[i], fflags_fsqrt[i]); result_fsqrt_r[i] = result_fsqrt[i][`XLEN-1:0]; end end @@ -313,11 +299,11 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin - dpi_itof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_itof[i], fflags_itof[i]); - dpi_utof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_utof[i], fflags_utof[i]); - dpi_ftoi (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftoi[i], fflags_ftoi[i]); - dpi_ftou (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftou[i], fflags_ftou[i]); - dpi_f2f (fcvt_fire, int'(dst_fmt), operands[0][i], result_f2f[i]); + dpi_itof (fcvt_fire, int'(f_fmt), int'(i_fmt), operands[0][i], frm, result_itof[i], fflags_itof[i]); + dpi_utof (fcvt_fire, int'(f_fmt), int'(i_fmt), operands[0][i], frm, result_utof[i], fflags_utof[i]); + dpi_ftoi (fcvt_fire, int'(i_fmt), int'(f_fmt), operands[0][i], frm, result_ftoi[i], fflags_ftoi[i]); + dpi_ftou (fcvt_fire, int'(i_fmt), int'(f_fmt), operands[0][i], frm, result_ftou[i], fflags_ftou[i]); + dpi_f2f (fcvt_fire, int'(f_fmt), operands[0][i], result_f2f[i]); result_fcvt[i] = is_itof ? result_itof[i][`XLEN-1:0] : is_utof ? result_utof[i][`XLEN-1:0] : @@ -384,17 +370,17 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin - dpi_fclss (fncp_fire, int'(dst_fmt), operands[0][i], result_fclss[i]); - dpi_fle (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fle[i], fflags_fle[i]); - dpi_flt (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]); - dpi_feq (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_feq[i], fflags_feq[i]); - dpi_fmin (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmin[i], fflags_fmin[i]); - dpi_fmax (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]); - dpi_fsgnj (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnj[i]); - dpi_fsgnjn (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjn[i]); - dpi_fsgnjx (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjx[i]); - result_fmvx[i] = dst_fmt ? operands[0][i] : 64'($signed(operands[0][i][31:0])); // sign-extension - result_fmvf[i] = dst_fmt ? operands[0][i] : (operands[0][i] | 64'hffffffff00000000); // nan-boxing + dpi_fclss (fncp_fire, int'(f_fmt), operands[0][i], result_fclss[i]); + dpi_fle (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fle[i], fflags_fle[i]); + dpi_flt (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]); + dpi_feq (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_feq[i], fflags_feq[i]); + dpi_fmin (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fmin[i], fflags_fmin[i]); + dpi_fmax (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]); + dpi_fsgnj (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fsgnj[i]); + dpi_fsgnjn (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fsgnjn[i]); + dpi_fsgnjx (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fsgnjx[i]); + result_fmvx[i] = f_fmt ? operands[0][i] : 64'($signed(operands[0][i][31:0])); // sign-extension + result_fmvf[i] = f_fmt ? operands[0][i] : (operands[0][i] | 64'hffffffff00000000); // nan-boxing end end diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index c75e3e3fd..9e8edef09 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -74,31 +74,29 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire div_has_fflags, sqrt_has_fflags; fflags_t div_fflags, sqrt_fflags; - reg [FPCORES_BITS-1:0] core_select; reg is_madd, is_sub, is_neg, is_div, is_itof, is_signed; + wire [FPCORES_BITS-1:0] core_select = op_type[3:2]; + always @(*) begin - is_madd = 0; - is_sub = 0; - is_neg = 0; - is_div = 0; - is_itof = 0; - is_signed = 0; + is_madd = 'x; + is_sub = 'x; + is_neg = 'x; + is_div = 'x; + is_itof = 'x; + is_signed = 'x; case (op_type) - `INST_FPU_ADD: begin core_select = FPU_FMA; end - `INST_FPU_SUB: begin core_select = FPU_FMA; is_sub = 1; end - `INST_FPU_MUL: begin core_select = FPU_FMA; is_neg = 1; end - `INST_FPU_MADD: begin core_select = FPU_FMA; is_madd = 1; end - `INST_FPU_MSUB: begin core_select = FPU_FMA; is_madd = 1; is_sub = 1; end - `INST_FPU_NMADD: begin core_select = FPU_FMA; is_madd = 1; is_neg = 1; end - `INST_FPU_NMSUB: begin core_select = FPU_FMA; is_madd = 1; is_sub = 1; is_neg = 1; end - `INST_FPU_DIV: begin core_select = FPU_DIVSQRT; is_div = 1; end - `INST_FPU_SQRT: begin core_select = FPU_DIVSQRT; end - `INST_FPU_F2I: begin core_select = FPU_CVT; is_signed = 1; end - `INST_FPU_F2U: begin core_select = FPU_CVT; end - `INST_FPU_I2F: begin core_select = FPU_CVT; is_itof = 1; is_signed = 1; end - `INST_FPU_U2F: begin core_select = FPU_CVT; is_itof = 1; end - default: begin core_select = FPU_NCP; end + `INST_FPU_ADD: begin is_madd = 0; is_neg = 0; is_sub = fmt[1]; end + `INST_FPU_MUL: begin is_madd = 0; is_neg = 1; is_sub = 0; end + `INST_FPU_MADD: begin is_madd = 1; is_neg = 0; is_sub = fmt[1]; end + `INST_FPU_NMADD: begin is_madd = 1; is_neg = 1; is_sub = fmt[1]; end + `INST_FPU_DIV: begin is_div = 1; end + `INST_FPU_SQRT: begin is_div = 0; end + `INST_FPU_F2I: begin is_itof = 0; is_signed = 1; end + `INST_FPU_F2U: begin is_itof = 0; is_signed = 0; end + `INST_FPU_I2F: begin is_itof = 1; is_signed = 1; end + `INST_FPU_U2F: begin is_itof = 1; is_signed = 0; end + default: begin end endcase end diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index a5cb89a1a..ce99138cb 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -108,8 +108,8 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), - .pe_data_in (pe_data_in), - .pe_data_out(pe_data_out), + .pe_data_out(pe_data_in), + .pe_data_in (pe_data_out), .valid_out (valid_out), .data_out (data_out), .tag_out ({mask_out, tag_out}), diff --git a/hw/rtl/fpu/VX_fpu_fpnew.sv b/hw/rtl/fpu/VX_fpu_fpnew.sv index 9ee7f1a2c..ad95f0347 100644 --- a/hw/rtl/fpu/VX_fpu_fpnew.sv +++ b/hw/rtl/fpu/VX_fpu_fpnew.sv @@ -134,20 +134,13 @@ module VX_fpu_fpnew fpu_op = fpnew_pkg::ADD; fpu_operands[1] = dataa; fpu_operands[2] = datab; - end - `INST_FPU_SUB: begin - fpu_op = fpnew_pkg::ADD; - fpu_operands[1] = dataa; - fpu_operands[2] = datab; - fpu_op_mod = 1; + fpu_op_mod = fmt[1]; // FADD or FSUB end `INST_FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end + `INST_FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = fmt[1]; end + `INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = ~fmt[1]; end `INST_FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end `INST_FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end - `INST_FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end - `INST_FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end - `INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end - `INST_FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end `ifdef FLEN_64 `INST_FPU_F2F: begin fpu_op = fpnew_pkg::F2F; fpu_src_fmt = fmt[0] ? fpnew_pkg::FP32 : fpnew_pkg::FP64; end `endif diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index 16c0df758..bfc69316b 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -78,8 +78,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), - .pe_data_in (pe_data_in), - .pe_data_out(pe_data_out), + .pe_data_out(pe_data_in), + .pe_data_in (pe_data_out), .valid_out (valid_out), .data_out (data_out), .tag_out ({mask_out, tag_out}), diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index 5aacf2d29..425f43d6e 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -71,8 +71,8 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), - .pe_data_in (pe_data_in), - .pe_data_out(pe_data_out), + .pe_data_out(pe_data_in), + .pe_data_in (pe_data_out), .valid_out (valid_out), .data_out (data_out), .tag_out ({mask_out, tag_out}), From b40441b68f5ecbe6f4eb33b26f1d64568581dc21 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Aug 2024 05:12:44 -0700 Subject: [PATCH 175/488] minor update --- hw/rtl/libs/VX_pe_serializer.sv | 83 ++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 38 deletions(-) diff --git a/hw/rtl/libs/VX_pe_serializer.sv b/hw/rtl/libs/VX_pe_serializer.sv index 4e3a29132..8ae7900b5 100644 --- a/hw/rtl/libs/VX_pe_serializer.sv +++ b/hw/rtl/libs/VX_pe_serializer.sv @@ -35,8 +35,8 @@ module VX_pe_serializer #( // PE output wire pe_enable, - output wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in, - input wire [NUM_PES-1:0][DATA_OUT_WIDTH-1:0] pe_data_out, + output wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_out, + input wire [NUM_PES-1:0][DATA_OUT_WIDTH-1:0] pe_data_in, // output output wire valid_out, @@ -49,32 +49,44 @@ module VX_pe_serializer #( wire [TAG_WIDTH-1:0] tag_out_u; wire ready_out_u; - wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s; - wire valid_out_s; - wire [TAG_WIDTH-1:0] tag_out_s; + wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_out_w; + wire pe_valid_in; + wire [TAG_WIDTH-1:0] pe_tag_in; wire enable; VX_shift_register #( .DATAW (1 + TAG_WIDTH), - .DEPTH (LATENCY + PE_REG), + .DEPTH (PE_REG + LATENCY), .RESETW (1) ) shift_reg ( .clk (clk), .reset (reset), .enable (enable), - .data_in ({valid_in, tag_in}), - .data_out ({valid_out_s, tag_out_s}) + .data_in ({valid_in, tag_in}), + .data_out ({pe_valid_in, pe_tag_in}) ); VX_pipe_register #( - .DATAW (NUM_PES * DATA_IN_WIDTH), - .DEPTH (PE_REG) - ) pe_reg ( + .DATAW (NUM_PES * DATA_IN_WIDTH), + .DEPTH (PE_REG) + ) pe_data_reg ( .clk (clk), .reset (reset), .enable (enable), - .data_in (pe_data_in_s), - .data_out (pe_data_in) + .data_in (pe_data_out_w), + .data_out (pe_data_out) + ); + + VX_pipe_register #( + .DATAW (1), + .RESETW (1), + .DEPTH (PE_REG) + ) pe_en_reg ( + .clk (clk), + .reset (reset), + .enable (1'b1), + .data_in (enable), + .data_out (pe_enable) ); if (NUM_LANES != NUM_PES) begin @@ -82,35 +94,32 @@ module VX_pe_serializer #( localparam BATCH_SIZE = NUM_LANES / NUM_PES; localparam BATCH_SIZEW = `LOG2UP(BATCH_SIZE); - reg [BATCH_SIZEW-1:0] batch_in_idx; - reg [BATCH_SIZEW-1:0] batch_out_idx; + reg [BATCH_SIZEW-1:0] batch_in_idx, batch_out_idx; + reg batch_in_done, batch_out_done; for (genvar i = 0; i < NUM_PES; ++i) begin - assign pe_data_in_s[i] = data_in[batch_in_idx * NUM_PES + i]; + assign pe_data_out_w[i] = data_in[batch_in_idx * NUM_PES + i]; end always @(posedge clk) begin if (reset) begin - batch_in_idx <= '0; - batch_out_idx <= '0; + batch_in_idx <= '0; + batch_out_idx <= '0; + batch_in_done <= 0; + batch_out_done <= 0; end else if (enable) begin - if (valid_in) begin - batch_in_idx <= batch_in_idx + BATCH_SIZEW'(1); - end - if (valid_out_s) begin - batch_out_idx <= batch_out_idx + BATCH_SIZEW'(1); - end + batch_in_idx <= batch_in_idx + BATCH_SIZEW'(valid_in); + batch_out_idx <= batch_out_idx + BATCH_SIZEW'(pe_valid_in); + batch_in_done <= valid_in && (batch_in_idx == BATCH_SIZEW'(BATCH_SIZE-2)); + batch_out_done <= pe_valid_in && (batch_out_idx == BATCH_SIZEW'(BATCH_SIZE-2)); end end - wire batch_in_done = (batch_in_idx == BATCH_SIZEW'(BATCH_SIZE-1)); - wire batch_out_done = (batch_out_idx == BATCH_SIZEW'(BATCH_SIZE-1)); - - reg valid_out_r; reg [BATCH_SIZE-1:0][NUM_PES-1:0][DATA_OUT_WIDTH-1:0] data_out_r; reg [TAG_WIDTH-1:0] tag_out_r; + reg valid_out_r; - wire valid_out_b = valid_out_s && batch_out_done; + wire valid_out_b = pe_valid_in && batch_out_done; wire ready_out_b = ready_out_u || ~valid_out_u; always @(posedge clk) begin @@ -120,14 +129,13 @@ module VX_pe_serializer #( valid_out_r <= valid_out_b; end if (ready_out_b) begin - data_out_r[batch_out_idx] <= pe_data_out; - tag_out_r <= tag_out_s; + data_out_r[batch_out_idx] <= pe_data_in; + tag_out_r <= pe_tag_in; end end assign enable = ready_out_b || ~valid_out_b; assign ready_in = enable && batch_in_done; - assign pe_enable = enable; assign valid_out_u = valid_out_r; assign data_out_u = data_out_r; @@ -135,15 +143,14 @@ module VX_pe_serializer #( end else begin - assign pe_data_in_s = data_in; + assign pe_data_out_w = data_in; - assign enable = ready_out_u || ~valid_out_s; + assign enable = ready_out_u || ~pe_valid_in; assign ready_in = enable; - assign pe_enable = enable; - assign valid_out_u = valid_out_s; - assign data_out_u = pe_data_out; - assign tag_out_u = tag_out_s; + assign valid_out_u = pe_valid_in; + assign data_out_u = pe_data_in; + assign tag_out_u = pe_tag_in; end From df3fc150f4af27f5cbe50c3a7fe06a0428b48070 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Aug 2024 06:06:52 -0700 Subject: [PATCH 176/488] minor update --- hw/rtl/libs/VX_cyclic_arbiter.sv | 12 +++++------- hw/rtl/libs/VX_rr_arbiter.sv | 16 ++++++++-------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index 0b8fcedfe..e134bea91 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -40,17 +40,17 @@ module VX_cyclic_arbiter #( localparam IS_POW2 = (1 << LOG_NUM_REQS) == NUM_REQS; - wire [LOG_NUM_REQS-1:0] grant_index_um, grant_index_ql; + wire [LOG_NUM_REQS-1:0] grant_index_um; reg [LOG_NUM_REQS-1:0] grant_index_r; always @(posedge clk) begin if (reset) begin grant_index_r <= '0; end else if (grant_valid && grant_ready) begin - if (!IS_POW2 && grant_index_ql == LOG_NUM_REQS'(NUM_REQS-1)) begin + if (!IS_POW2 && grant_index == LOG_NUM_REQS'(NUM_REQS-1)) begin grant_index_r <= '0; end else begin - grant_index_r <= grant_index_ql + LOG_NUM_REQS'(1); + grant_index_r <= grant_index + LOG_NUM_REQS'(1); end end end @@ -64,10 +64,8 @@ module VX_cyclic_arbiter #( .valid_out (grant_valid) ); - assign grant_index_ql = requests[grant_index_r] ? grant_index_r : grant_index_um; - - assign grant_index = grant_index_ql; - assign grant_onehot = NUM_REQS'(1) << grant_index_ql; + assign grant_index = requests[grant_index_r] ? grant_index_r : grant_index_um; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index; end diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 6199d5794..e0af433f5 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -62,7 +62,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(1) << grant_index_w; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 3) begin @@ -94,7 +94,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(1) << grant_index_w; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 4) begin @@ -133,7 +133,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(1) << grant_index_w; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 5) begin @@ -181,7 +181,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(1) << grant_index_w; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 6) begin @@ -240,7 +240,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(1) << grant_index_w; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 7) begin @@ -312,7 +312,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(1) << grant_index_w; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 8) begin @@ -399,7 +399,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(1) << grant_index_w; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; assign grant_valid = (| requests); end else if (MODEL == 1) begin @@ -474,7 +474,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_table[state]; - assign grant_onehot = NUM_REQS'(1) << grant_index; + assign grant_onehot = NUM_REQS'(grant_valid) << grant_index; assign grant_valid = (| requests); end From 088aed022ff239a5f4b901a2915d9f928536d4a4 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Aug 2024 15:52:17 -0700 Subject: [PATCH 177/488] minor update --- hw/rtl/core/VX_dispatch.sv | 6 +- hw/rtl/libs/VX_rr_arbiter.sv | 105 +++++++++++++++++++---------------- 2 files changed, 59 insertions(+), 52 deletions(-) diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index 0766fd83f..3fe98ba93 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -50,8 +50,8 @@ module VX_dispatch import VX_gpu_pkg::*; #( `UNUSED_PIN (valid_out) ); - wire [`NUM_EX_UNITS-1:0] operands_reset; - assign operands_if.ready = operands_reset[operands_if.data.ex_type]; + wire [`NUM_EX_UNITS-1:0] operands_ready_in; + assign operands_if.ready = operands_ready_in[operands_if.data.ex_type]; for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin VX_elastic_buffer #( @@ -62,7 +62,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( .clk (clk), .reset (reset), .valid_in (operands_if.valid && (operands_if.data.ex_type == `EX_BITS'(i))), - .ready_in (operands_reset[i]), + .ready_in (operands_ready_in[i]), .data_in ({ operands_if.data.uuid, operands_if.data.wis, diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index e0af433f5..5c279989b 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -41,15 +41,16 @@ module VX_rr_arbiter #( end else if (LUT_OPT && NUM_REQS == 2) begin reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 3'b0_01, - 3'b1_?1: begin grant_index_w = LOG_NUM_REQS'(0); end + 3'b1_?1: begin grant_onehot_w = 2'b01; grant_index_w = LOG_NUM_REQS'(0); end 3'b0_1?, - 3'b1_10: begin grant_index_w = LOG_NUM_REQS'(1); end - default: begin grant_index_w = 'x; end + 3'b1_10: begin grant_onehot_w = 2'b10; grant_index_w = LOG_NUM_REQS'(1); end + default: begin grant_onehot_w = 2'b00; grant_index_w = 'x; end endcase end @@ -62,26 +63,27 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 3) begin reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 5'b00_001, 5'b01_0?1, - 5'b10_??1: begin grant_index_w = LOG_NUM_REQS'(0); end + 5'b10_??1: begin grant_onehot_w = 3'b001; grant_index_w = LOG_NUM_REQS'(0); end 5'b00_?1?, 5'b01_010, - 5'b10_?10: begin grant_index_w = LOG_NUM_REQS'(1); end + 5'b10_?10: begin grant_onehot_w = 3'b010; grant_index_w = LOG_NUM_REQS'(1); end 5'b00_10?, 5'b01_1??, - 5'b10_100: begin grant_index_w = LOG_NUM_REQS'(2); end - default: begin grant_index_w = 'x; end + 5'b10_100: begin grant_onehot_w = 3'b100; grant_index_w = LOG_NUM_REQS'(2); end + default: begin grant_onehot_w = 3'b000; grant_index_w = 'x; end endcase end @@ -94,12 +96,13 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 4) begin reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -107,20 +110,20 @@ module VX_rr_arbiter #( 6'b00_0001, 6'b01_00?1, 6'b10_0??1, - 6'b11_???1: begin grant_index_w = LOG_NUM_REQS'(0); end + 6'b11_???1: begin grant_onehot_w = 4'b0001; grant_index_w = LOG_NUM_REQS'(0); end 6'b00_??1?, 6'b01_0010, 6'b10_0?10, - 6'b11_??10: begin grant_index_w = LOG_NUM_REQS'(1); end + 6'b11_??10: begin grant_onehot_w = 4'b0010; grant_index_w = LOG_NUM_REQS'(1); end 6'b00_?10?, 6'b01_?1??, 6'b10_0100, - 6'b11_?100: begin grant_index_w = LOG_NUM_REQS'(2); end + 6'b11_?100: begin grant_onehot_w = 4'b0100; grant_index_w = LOG_NUM_REQS'(2); end 6'b00_100?, 6'b01_10??, 6'b10_1???, - 6'b11_1000: begin grant_index_w = LOG_NUM_REQS'(3); end - default: begin grant_index_w = 'x; end + 6'b11_1000: begin grant_onehot_w = 4'b1000; grant_index_w = LOG_NUM_REQS'(3); end + default: begin grant_onehot_w = 4'b0000; grant_index_w = 'x; end endcase end @@ -133,12 +136,13 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 5) begin reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -147,28 +151,28 @@ module VX_rr_arbiter #( 8'b001_000?1, 8'b010_00??1, 8'b011_0???1, - 8'b100_????1: begin grant_index_w = LOG_NUM_REQS'(0); end + 8'b100_????1: begin grant_onehot_w = 5'b00001; grant_index_w = LOG_NUM_REQS'(0); end 8'b000_???1?, 8'b001_00010, 8'b010_00?10, 8'b011_0??10, - 8'b100_???10: begin grant_index_w = LOG_NUM_REQS'(1); end + 8'b100_???10: begin grant_onehot_w = 5'b00010; grant_index_w = LOG_NUM_REQS'(1); end 8'b000_??10?, 8'b001_??1??, 8'b010_00100, 8'b011_0?100, - 8'b100_??100: begin grant_index_w = LOG_NUM_REQS'(2); end + 8'b100_??100: begin grant_onehot_w = 5'b00100; grant_index_w = LOG_NUM_REQS'(2); end 8'b000_?100?, 8'b001_?10??, 8'b010_?1???, 8'b011_01000, - 8'b100_?1000: begin grant_index_w = LOG_NUM_REQS'(3); end + 8'b100_?1000: begin grant_onehot_w = 5'b01000; grant_index_w = LOG_NUM_REQS'(3); end 8'b000_1000?, 8'b001_100??, 8'b010_10???, 8'b011_1????, - 8'b100_10000: begin grant_index_w = LOG_NUM_REQS'(4); end - default: begin grant_index_w = 'x; end + 8'b100_10000: begin grant_onehot_w = 5'b10000; grant_index_w = LOG_NUM_REQS'(4); end + default: begin grant_onehot_w = 5'b00000; grant_index_w = 'x; end endcase end @@ -181,12 +185,13 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 6) begin reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -196,38 +201,38 @@ module VX_rr_arbiter #( 9'b010_000??1, 9'b011_00???1, 9'b100_0????1, - 9'b101_?????1: begin grant_index_w = LOG_NUM_REQS'(0); end + 9'b101_?????1: begin grant_onehot_w = 6'b000001; grant_index_w = LOG_NUM_REQS'(0); end 9'b000_????1?, 9'b001_000010, 9'b010_000?10, 9'b011_00??10, 9'b100_0???10, - 9'b101_????10: begin grant_index_w = LOG_NUM_REQS'(1); end + 9'b101_????10: begin grant_onehot_w = 6'b000010; grant_index_w = LOG_NUM_REQS'(1); end 9'b000_???10?, 9'b001_???1??, 9'b010_000100, 9'b011_00?100, 9'b100_0??100, - 9'b101_???100: begin grant_index_w = LOG_NUM_REQS'(2); end + 9'b101_???100: begin grant_onehot_w = 6'b000100; grant_index_w = LOG_NUM_REQS'(2); end 9'b000_??100?, 9'b001_??10??, 9'b010_??1???, 9'b011_001000, 9'b100_0?1000, - 9'b101_??1000: begin grant_index_w = LOG_NUM_REQS'(3); end + 9'b101_??1000: begin grant_onehot_w = 6'b001000; grant_index_w = LOG_NUM_REQS'(3); end 9'b000_?1000?, 9'b001_?100??, 9'b010_?10???, 9'b011_?1????, 9'b100_010000, - 9'b101_?10000: begin grant_index_w = LOG_NUM_REQS'(4); end + 9'b101_?10000: begin grant_onehot_w = 6'b010000; grant_index_w = LOG_NUM_REQS'(4); end 9'b000_10000?, 9'b001_1000??, 9'b010_100???, 9'b011_10????, 9'b100_1?????, - 9'b101_100000: begin grant_index_w = LOG_NUM_REQS'(5); end - default: begin grant_index_w = 'x; end + 9'b101_100000: begin grant_onehot_w = 6'b100000; grant_index_w = LOG_NUM_REQS'(5); end + default: begin grant_onehot_w = 6'b000000; grant_index_w = 'x; end endcase end @@ -240,12 +245,13 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 7) begin reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -256,50 +262,50 @@ module VX_rr_arbiter #( 10'b011_000???1, 10'b100_000???1, 10'b101_00????1, - 10'b110_??????1: begin grant_index_w = LOG_NUM_REQS'(0); end + 10'b110_??????1: begin grant_onehot_w = 7'b0000001; grant_index_w = LOG_NUM_REQS'(0); end 10'b000_?????1?, 10'b001_0000010, 10'b010_0000?10, 10'b011_000??10, 10'b100_00???10, 10'b101_0????10, - 10'b110_?????10: begin grant_index_w = LOG_NUM_REQS'(1); end + 10'b110_?????10: begin grant_onehot_w = 7'b0000010; grant_index_w = LOG_NUM_REQS'(1); end 10'b000_????10?, 10'b001_????1??, 10'b010_0000100, 10'b011_000?100, 10'b100_00??100, 10'b101_0???100, - 10'b110_????100: begin grant_index_w = LOG_NUM_REQS'(2); end + 10'b110_????100: begin grant_onehot_w = 7'b0000100; grant_index_w = LOG_NUM_REQS'(2); end 10'b000_???100?, 10'b001_???10??, 10'b010_???1???, 10'b011_0001000, 10'b100_00?1000, 10'b101_0??1000, - 10'b110_???1000: begin grant_index_w = LOG_NUM_REQS'(3); end + 10'b110_???1000: begin grant_onehot_w = 7'b0001000; grant_index_w = LOG_NUM_REQS'(3); end 10'b000_??1000?, 10'b001_??100??, 10'b010_??10???, 10'b011_??1????, 10'b100_0010000, 10'b101_0?10000, - 10'b110_??10000: begin grant_index_w = LOG_NUM_REQS'(4); end + 10'b110_??10000: begin grant_onehot_w = 7'b0010000; grant_index_w = LOG_NUM_REQS'(4); end 10'b000_?10000?, 10'b001_?1000??, 10'b010_?100???, 10'b011_?10????, 10'b100_?1?????, 10'b101_0100000, - 10'b110_?100000: begin grant_index_w = LOG_NUM_REQS'(5); end + 10'b110_?100000: begin grant_onehot_w = 7'b0100000; grant_index_w = LOG_NUM_REQS'(5); end 10'b000_100000?, 10'b001_10000??, 10'b010_1000???, 10'b011_100????, 10'b100_10?????, 10'b101_1??????, - 10'b110_1000000: begin grant_index_w = LOG_NUM_REQS'(6); end - default: begin grant_index_w = 'x; end + 10'b110_1000000: begin grant_onehot_w = 7'b1000000; grant_index_w = LOG_NUM_REQS'(6); end + default: begin grant_onehot_w = 7'b0000000; grant_index_w = 'x; end endcase end @@ -312,12 +318,13 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); end else if (LUT_OPT && NUM_REQS == 8) begin reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; reg [LOG_NUM_REQS-1:0] state; always @(*) begin @@ -329,7 +336,7 @@ module VX_rr_arbiter #( 11'b100_000????1, 11'b101_00?????1, 11'b110_0??????1, - 11'b111_???????1: begin grant_index_w = LOG_NUM_REQS'(0); end + 11'b111_???????1: begin grant_onehot_w = 8'b00000001; grant_index_w = LOG_NUM_REQS'(0); end 11'b000_??????1?, 11'b001_00000010, 11'b010_00000?10, @@ -337,7 +344,7 @@ module VX_rr_arbiter #( 11'b100_000???10, 11'b101_00????10, 11'b110_0?????10, - 11'b111_??????10: begin grant_index_w = LOG_NUM_REQS'(1); end + 11'b111_??????10: begin grant_onehot_w = 8'b00000010; grant_index_w = LOG_NUM_REQS'(1); end 11'b000_?????10?, 11'b001_?????1??, 11'b010_00000100, @@ -345,7 +352,7 @@ module VX_rr_arbiter #( 11'b100_000??100, 11'b101_00???100, 11'b110_0????100, - 11'b111_?????100: begin grant_index_w = LOG_NUM_REQS'(2); end + 11'b111_?????100: begin grant_onehot_w = 8'b00000100; grant_index_w = LOG_NUM_REQS'(2); end 11'b000_????100?, 11'b001_????10??, 11'b010_????1???, @@ -353,7 +360,7 @@ module VX_rr_arbiter #( 11'b100_000?1000, 11'b101_00??1000, 11'b110_0???1000, - 11'b111_????1000: begin grant_index_w = LOG_NUM_REQS'(3); end + 11'b111_????1000: begin grant_onehot_w = 8'b00001000; grant_index_w = LOG_NUM_REQS'(3); end 11'b000_???1000?, 11'b001_???100??, 11'b010_???10???, @@ -361,7 +368,7 @@ module VX_rr_arbiter #( 11'b100_00010000, 11'b101_00?10000, 11'b110_0??10000, - 11'b111_???10000: begin grant_index_w = LOG_NUM_REQS'(4); end + 11'b111_???10000: begin grant_onehot_w = 8'b00010000; grant_index_w = LOG_NUM_REQS'(4); end 11'b000_??10000?, 11'b001_??1000??, 11'b010_??100???, @@ -369,7 +376,7 @@ module VX_rr_arbiter #( 11'b100_??1?????, 11'b101_00100000, 11'b110_0?100000, - 11'b111_??100000: begin grant_index_w = LOG_NUM_REQS'(5); end + 11'b111_??100000: begin grant_onehot_w = 8'b00100000; grant_index_w = LOG_NUM_REQS'(5); end 11'b000_?100000?, 11'b001_?10000??, 11'b010_?1000???, @@ -377,7 +384,7 @@ module VX_rr_arbiter #( 11'b100_?10?????, 11'b101_?1??????, 11'b110_01000000, - 11'b111_?1000000: begin grant_index_w = LOG_NUM_REQS'(6); end + 11'b111_?1000000: begin grant_onehot_w = 8'b01000000; grant_index_w = LOG_NUM_REQS'(6); end 11'b000_1000000?, 11'b001_100000??, 11'b010_10000???, @@ -385,8 +392,8 @@ module VX_rr_arbiter #( 11'b100_100?????, 11'b101_10??????, 11'b110_1???????, - 11'b111_10000000: begin grant_index_w = LOG_NUM_REQS'(7); end - default: begin grant_index_w = 'x; end + 11'b111_10000000: begin grant_onehot_w = 8'b10000000; grant_index_w = LOG_NUM_REQS'(7); end + default: begin grant_onehot_w = 8'b00000000; grant_index_w = 'x; end endcase end @@ -399,7 +406,7 @@ module VX_rr_arbiter #( end assign grant_index = grant_index_w; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); end else if (MODEL == 1) begin From 2ca343910969dafea2f011d6cbb5d650fa9048e7 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Aug 2024 15:52:27 -0700 Subject: [PATCH 178/488] xrt runtime update --- runtime/xrt/vortex.cpp | 65 ++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 24 deletions(-) diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index 408bf23ed..8c273cf7f 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -66,11 +66,14 @@ struct platform_info_t { }; static const platform_info_t g_platforms[] = { - {"vortex_xrtsim", 4, 0x10, 0x0}, // 64 KB banks - {"xilinx_u50", 4, 0x1C, 0x0}, // 16 MB banks - {"xilinx_u200", 4, 0x1C, 0x0}, // 16 MB banks - {"xilinx_u280", 4, 0x1C, 0x0}, // 16 MB banks - {"xilinx_vck5000", 0, 0x21, 0xC000000000}, + {"vortex_xrtsim", 4, 16, 0x0}, // 16 x 64 KB = 1 MB + {"xilinx_u200", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4 + {"xilinx_u250", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4 + {"xilinx_u50", 5, 28, 0x0}, // 32 x 256 MB = 8 GB HBM2 + {"xilinx_u280", 5, 28, 0x0}, // 32 x 256 MB = 8 GB HBM2 + {"xilinx_u55c", 5, 29, 0x0}, // 32 x 512 MB = 16 GB HBM2 + {"xilinx_vck5000", 0, 33, 0xC000000000}, // 1 x 8 GB = 8 GB DDR4 + {"xilinx_kv260", 0, 32, 0x0}, // 1 x 4 GB = 4 GB DDR4 }; #ifdef CPP_API @@ -277,6 +280,8 @@ public: xrtDevice_ = xrtDevice; xrtKernel_ = xrtKernel; + printf("info: device name=%s.\n", device_name.c_str()); + CHECK_ERR(get_platform_info(device_name, &platform_), { fprintf(stderr, "[VXDRV] Error: platform not supported: %s\n", device_name.c_str()); return err; @@ -286,25 +291,6 @@ public: return err; }); - uint32_t num_banks = 1 << platform_.lg2_num_banks; - uint64_t bank_size = 1ull << platform_.lg2_bank_size; - - for (uint32_t i = 0; i < num_banks; ++i) { - uint32_t reg_addr = MMIO_MEM_ADDR + (i * 12); - uint64_t reg_value = platform_.mem_base + i * bank_size; - - CHECK_ERR(this->write_register(reg_addr, reg_value & 0xffffffff), { - return err; - }); - - CHECK_ERR(this->write_register(reg_addr + 4, (reg_value >> 32) & 0xffffffff), { - return err; - }); - #ifndef BANK_INTERLEAVE - break; - #endif - } - CHECK_ERR(this->read_register(MMIO_DEV_ADDR, (uint32_t *)&dev_caps_), { return err; }); @@ -321,6 +307,37 @@ public: return err; }); + uint32_t num_banks = 1 << platform_.lg2_num_banks; + uint64_t bank_size = 1ull << platform_.lg2_bank_size; + + // adjust memory bank size to architecture limit + int isa_arch = VX_ISA_ARCH(isa_caps_); + if (isa_arch == 32) { + uint64_t max_mem_size = 1ull << 32; + uint64_t need_bank_size = max_mem_size / num_banks; + if (bank_size > need_bank_size) { + printf("info: adjusted bank size from 0x%lx to 0x%lx bytes.\n", bank_size, need_bank_size); + bank_size = need_bank_size; + platform_.lg2_bank_size = log2ceil(bank_size); + } + } + + for (uint32_t i = 0; i < num_banks; ++i) { + uint32_t reg_addr = MMIO_MEM_ADDR + (i * 12); + uint64_t reg_value = platform_.mem_base + i * bank_size; + + CHECK_ERR(this->write_register(reg_addr, reg_value & 0xffffffff), { + return err; + }); + + CHECK_ERR(this->write_register(reg_addr + 4, (reg_value >> 32) & 0xffffffff), { + return err; + }); + #ifndef BANK_INTERLEAVE + break; + #endif + } + global_mem_size_ = num_banks * bank_size; #ifdef BANK_INTERLEAVE From 51719f69bb127ab08892941020e3e0ad1a65b6df Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Aug 2024 16:51:00 -0700 Subject: [PATCH 179/488] minor update --- hw/rtl/cache/VX_cache.sv | 12 ++++++------ hw/rtl/cache/VX_cache_bank.sv | 12 ++++++------ hw/rtl/cache/VX_cache_bypass.sv | 4 ++-- hw/rtl/cache/VX_cache_cluster.sv | 6 +++--- hw/rtl/libs/VX_stream_arb.sv | 1 - 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 60493665b..90b34a1e4 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -93,8 +93,8 @@ module VX_cache import VX_gpu_pkg::*; #( localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1; localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH; - localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1); - localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1); + localparam CORE_RSP_REG_DISABLE = (NUM_BANKS != 1) || (NUM_REQS != 1); + localparam MEM_REQ_REG_DISABLE = (NUM_BANKS != 1); localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0; @@ -139,7 +139,7 @@ module VX_cache import VX_gpu_pkg::*; #( for (genvar i = 0; i < NUM_REQS; ++i) begin VX_elastic_buffer #( .DATAW (`CS_WORD_WIDTH + TAG_WIDTH), - .SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), + .SIZE (CORE_RSP_REG_DISABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) ) core_rsp_buf ( .clk (clk), @@ -198,7 +198,7 @@ module VX_cache import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), - .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), + .SIZE (MEM_REQ_REG_DISABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) ) mem_req_buf ( .clk (clk), @@ -388,8 +388,8 @@ module VX_cache import VX_gpu_pkg::*; #( .WRITEBACK (WRITEBACK), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (TAG_WIDTH), - .CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF), - .MEM_OUT_BUF (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_BUF) + .CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(CORE_OUT_BUF)), + .MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF)) ) bank ( .clk (clk), .reset (bank_reset), diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 22d956dba..19c24ad5d 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -53,11 +53,11 @@ module VX_cache_bank #( // core request tag size parameter TAG_WIDTH = UUID_WIDTH + 1, - // Core response output buffer - parameter CORE_OUT_BUF = 0, + // Core response output register + parameter CORE_OUT_REG = 0, - // Memory request output buffer - parameter MEM_OUT_BUF = 0, + // Memory request output register + parameter MEM_OUT_REG = 0, parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE), parameter REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS), @@ -567,7 +567,7 @@ module VX_cache_bank #( VX_elastic_buffer #( .DATAW (TAG_WIDTH + `CS_WORD_WIDTH + REQ_SEL_WIDTH), .SIZE (CRSQ_SIZE), - .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) + .OUT_REG (CORE_OUT_REG) ) core_rsp_queue ( .clk (clk), .reset (reset), @@ -632,7 +632,7 @@ module VX_cache_bank #( .DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1), .DEPTH (MREQ_SIZE), .ALM_FULL (MREQ_SIZE-PIPELINE_STAGES), - .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) + .OUT_REG (MEM_OUT_REG) ) mem_req_queue ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index dc88c6c1f..7992ec9e8 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -219,7 +219,7 @@ module VX_cache_bypass #( VX_elastic_buffer #( .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH), - .SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), + .SIZE (DIRECT_PASSTHRU ? 0 : `TO_OUT_BUF_SIZE(MEM_OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) ) mem_req_buf ( .clk (clk), @@ -307,7 +307,7 @@ module VX_cache_bypass #( for (genvar i = 0; i < NUM_REQS; ++i) begin VX_elastic_buffer #( .DATAW (`CS_WORD_WIDTH + CORE_TAG_WIDTH), - .SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), + .SIZE (DIRECT_PASSTHRU ? 0 : `TO_OUT_BUF_SIZE(CORE_OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) ) core_rsp_buf ( .clk (clk), diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 5e0010a8c..a56c9a817 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -125,8 +125,8 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .TAG_SEL_IDX (TAG_SEL_IDX), .ARBITER ("R"), .REQ_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0), - .RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0) - ) cache_arb ( + .RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? CORE_OUT_BUF : 0) + ) core_arb ( .clk (clk), .reset (reset), .bus_in_if (core_bus_tmp_if), @@ -186,7 +186,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .TAG_WIDTH (MEM_TAG_WIDTH), .TAG_SEL_IDX (TAG_SEL_IDX), .ARBITER ("R"), - .REQ_OUT_BUF ((NUM_CACHES > 1) ? 2 : 0), + .REQ_OUT_BUF ((NUM_CACHES > 1) ? MEM_OUT_BUF : 0), .RSP_OUT_BUF ((NUM_CACHES > 1) ? 2 : 0) ) mem_arb ( .clk (clk), diff --git a/hw/rtl/libs/VX_stream_arb.sv b/hw/rtl/libs/VX_stream_arb.sv index 413da98f0..13cde1cd9 100644 --- a/hw/rtl/libs/VX_stream_arb.sv +++ b/hw/rtl/libs/VX_stream_arb.sv @@ -335,7 +335,6 @@ module VX_stream_arb #( // #Inputs == #Outputs for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin - VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), From 9718a5b405e644238edd876635c8d3ebd77929b2 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Aug 2024 19:20:07 -0700 Subject: [PATCH 180/488] fpu timing optimization --- hw/rtl/fpu/VX_fpu_dsp.sv | 103 +++++++++++++++------------------ hw/rtl/libs/VX_onehot_shift.sv | 2 + hw/rtl/libs/VX_transpose.sv | 2 + 3 files changed, 51 insertions(+), 56 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index 9e8edef09..5e3738785 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -54,51 +54,25 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( localparam NUM_FPCORES = 4; localparam FPCORES_BITS = `LOG2UP(NUM_FPCORES); - localparam RSP_DATAW = (NUM_LANES * 32) + 1 + $bits(fflags_t) + TAG_WIDTH; + localparam RSP_DATAW = (NUM_LANES * `XLEN) + 1 + $bits(fflags_t) + TAG_WIDTH; `UNUSED_VAR (fmt) wire [NUM_FPCORES-1:0] per_core_ready_in; - wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_result; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][`XLEN-1:0] per_core_result; wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_out; wire [NUM_FPCORES-1:0] per_core_ready_out; wire [NUM_FPCORES-1:0] per_core_valid_out; wire [NUM_FPCORES-1:0] per_core_has_fflags; fflags_t [NUM_FPCORES-1:0] per_core_fflags; - wire div_ready_in, sqrt_ready_in; - wire [NUM_LANES-1:0][31:0] div_result, sqrt_result; - wire [TAG_WIDTH-1:0] div_tag_out, sqrt_tag_out; - wire div_ready_out, sqrt_ready_out; - wire div_valid_out, sqrt_valid_out; - wire div_has_fflags, sqrt_has_fflags; - fflags_t div_fflags, sqrt_fflags; - - reg is_madd, is_sub, is_neg, is_div, is_itof, is_signed; - - wire [FPCORES_BITS-1:0] core_select = op_type[3:2]; - - always @(*) begin - is_madd = 'x; - is_sub = 'x; - is_neg = 'x; - is_div = 'x; - is_itof = 'x; - is_signed = 'x; - case (op_type) - `INST_FPU_ADD: begin is_madd = 0; is_neg = 0; is_sub = fmt[1]; end - `INST_FPU_MUL: begin is_madd = 0; is_neg = 1; is_sub = 0; end - `INST_FPU_MADD: begin is_madd = 1; is_neg = 0; is_sub = fmt[1]; end - `INST_FPU_NMADD: begin is_madd = 1; is_neg = 1; is_sub = fmt[1]; end - `INST_FPU_DIV: begin is_div = 1; end - `INST_FPU_SQRT: begin is_div = 0; end - `INST_FPU_F2I: begin is_itof = 0; is_signed = 1; end - `INST_FPU_F2U: begin is_itof = 0; is_signed = 0; end - `INST_FPU_I2F: begin is_itof = 1; is_signed = 1; end - `INST_FPU_U2F: begin is_itof = 1; is_signed = 0; end - default: begin end - endcase - end + wire [1:0] div_sqrt_ready_in; + wire [1:0][NUM_LANES*`XLEN-1:0] div_sqrt_result; + wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_out; + wire [1:0] div_sqrt_ready_out; + wire [1:0] div_sqrt_valid_out; + wire [1:0] div_sqrt_has_fflags; + fflags_t [1:0] div_sqrt_fflags; `RESET_RELAY (fma_reset, reset); `RESET_RELAY (div_reset, reset); @@ -120,7 +94,17 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( `UNUSED_VAR (datab) `UNUSED_VAR (datac) + // Decode instruction type + wire [FPCORES_BITS-1:0] core_select = op_type[3:2]; + wire is_sqrt = op_type[0]; + wire is_itof = op_type[1]; + wire is_signed = ~op_type[0]; + wire is_madd = op_type[1]; + wire is_neg = op_type[0]; + wire is_sub = fmt[1]; + // can accept new request? + assign per_core_ready_in[FPU_DIVSQRT] = div_sqrt_ready_in[is_sqrt]; assign ready_in = per_core_ready_in[core_select]; VX_fpu_fma #( @@ -154,19 +138,19 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( ) fpu_div ( .clk (clk), .reset (div_reset), - .valid_in (valid_in && (core_select == FPU_DIVSQRT) && is_div), - .ready_in (div_ready_in), + .valid_in (valid_in && (core_select == FPU_DIVSQRT) && ~is_sqrt), + .ready_in (div_sqrt_ready_in[0]), .mask_in (mask_in), .tag_in (tag_in), .frm (frm), .dataa (dataa_s), .datab (datab_s), - .has_fflags (div_has_fflags), - .fflags (div_fflags), - .result (div_result), - .tag_out (div_tag_out), - .valid_out (div_valid_out), - .ready_out (div_ready_out) + .has_fflags (div_sqrt_has_fflags[0]), + .fflags (div_sqrt_fflags[0]), + .result (div_sqrt_result[0]), + .tag_out (div_sqrt_tag_out[0]), + .valid_out (div_sqrt_valid_out[0]), + .ready_out (div_sqrt_ready_out[0]) ); VX_fpu_sqrt #( @@ -175,18 +159,18 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( ) fpu_sqrt ( .clk (clk), .reset (sqrt_reset), - .valid_in (valid_in && (core_select == FPU_DIVSQRT) && ~is_div), - .ready_in (sqrt_ready_in), + .valid_in (valid_in && (core_select == FPU_DIVSQRT) && is_sqrt), + .ready_in (div_sqrt_ready_in[1]), .mask_in (mask_in), .tag_in (tag_in), .frm (frm), .dataa (dataa_s), - .has_fflags (sqrt_has_fflags), - .fflags (sqrt_fflags), - .result (sqrt_result), - .tag_out (sqrt_tag_out), - .valid_out (sqrt_valid_out), - .ready_out (sqrt_ready_out) + .has_fflags (div_sqrt_has_fflags[1]), + .fflags (div_sqrt_fflags[1]), + .result (div_sqrt_result[1]), + .tag_out (div_sqrt_tag_out[1]), + .valid_out (div_sqrt_valid_out[1]), + .ready_out (div_sqrt_ready_out[1]) ); wire cvt_ret_int_in = ~is_itof; @@ -246,7 +230,15 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( /////////////////////////////////////////////////////////////////////////// - assign per_core_ready_in[FPU_DIVSQRT] = is_div ? div_ready_in : sqrt_ready_in; + wire [1:0][RSP_DATAW-1:0] div_sqrt_arb_data_in; + for (genvar i = 0; i < 2; ++i) begin + assign div_sqrt_arb_data_in[i] = { + div_sqrt_result[i], + div_sqrt_has_fflags[i], + div_sqrt_fflags[i], + div_sqrt_tag_out[i] + }; + end VX_stream_arb #( .NUM_INPUTS (2), @@ -256,10 +248,9 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( ) div_sqrt_arb ( .clk (clk), .reset (reset), - .valid_in ({sqrt_valid_out, div_valid_out}), - .ready_in ({sqrt_ready_out, div_ready_out}), - .data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out}, - {div_result, div_has_fflags, div_fflags, div_tag_out}}), + .valid_in (div_sqrt_valid_out), + .ready_in (div_sqrt_ready_out), + .data_in (div_sqrt_arb_data_in), .data_out ({ per_core_result[FPU_DIVSQRT], per_core_has_fflags[FPU_DIVSQRT], diff --git a/hw/rtl/libs/VX_onehot_shift.sv b/hw/rtl/libs/VX_onehot_shift.sv index 950d1f380..5ab5712a2 100644 --- a/hw/rtl/libs/VX_onehot_shift.sv +++ b/hw/rtl/libs/VX_onehot_shift.sv @@ -13,6 +13,7 @@ `include "VX_platform.vh" +`TRACING_OFF module VX_onehot_shift #( parameter N = 1, parameter M = 1 @@ -28,3 +29,4 @@ module VX_onehot_shift #( end endmodule +`TRACING_ON diff --git a/hw/rtl/libs/VX_transpose.sv b/hw/rtl/libs/VX_transpose.sv index 93a8c1683..7b2c273ef 100644 --- a/hw/rtl/libs/VX_transpose.sv +++ b/hw/rtl/libs/VX_transpose.sv @@ -13,6 +13,7 @@ `include "VX_platform.vh" +`TRACING_OFF module VX_transpose #( parameter N = 1, parameter M = 1 @@ -27,3 +28,4 @@ module VX_transpose #( end endmodule +`TRACING_ON From 6d5e71a062424f07a3ca80fff4900c17e779b9f0 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 25 Aug 2024 20:12:05 -0700 Subject: [PATCH 181/488] minor update --- hw/rtl/core/VX_decode.sv | 7 ++++--- hw/rtl/core/VX_schedule.sv | 4 ++-- hw/rtl/interfaces/VX_decode_sched_if.sv | 10 +++++----- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index d3ca4d6e4..897dfcc11 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -557,9 +557,10 @@ module VX_decode import VX_gpu_pkg::*; #( wire fetch_fire = fetch_if.valid && fetch_if.ready; - assign decode_sched_if.valid = fetch_fire; - assign decode_sched_if.wid = fetch_if.data.wid; - assign decode_sched_if.is_wstall = is_wstall; + assign decode_sched_if.valid = fetch_fire; + assign decode_sched_if.wid = fetch_if.data.wid; + assign decode_sched_if.unlock = ~is_wstall; + `ifndef L1_ENABLE assign fetch_if.ibuf_pop = decode_if.ibuf_pop; `endif diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 9cdf879eb..fbe0bd959 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -189,7 +189,7 @@ module VX_schedule import VX_gpu_pkg::*; #( end // decode unlock - if (decode_sched_if.valid && ~decode_sched_if.is_wstall) begin + if (decode_sched_if.valid && decode_sched_if.unlock) begin stalled_warps_n[decode_sched_if.wid] = 0; end @@ -415,7 +415,7 @@ module VX_schedule import VX_gpu_pkg::*; #( timeout_ctr <= '0; timeout_enable <= 0; end else begin - if (decode_sched_if.valid && ~decode_sched_if.is_wstall) begin + if (decode_sched_if.valid && decode_sched_if.unlock) begin timeout_enable <= 1; end if (timeout_enable && active_warps !=0 && active_warps == stalled_warps) begin diff --git a/hw/rtl/interfaces/VX_decode_sched_if.sv b/hw/rtl/interfaces/VX_decode_sched_if.sv index b82aafb55..1f47c30e9 100644 --- a/hw/rtl/interfaces/VX_decode_sched_if.sv +++ b/hw/rtl/interfaces/VX_decode_sched_if.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,18 +16,18 @@ interface VX_decode_sched_if (); wire valid; - wire is_wstall; + wire unlock; wire [`NW_WIDTH-1:0] wid; modport master ( output valid, - output is_wstall, + output unlock, output wid ); modport slave ( input valid, - input is_wstall, + input unlock, input wid ); From 5adfd5ec68a4851121d68a1ac82d8d940f5ee9a3 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 26 Aug 2024 23:45:00 -0700 Subject: [PATCH 182/488] minor update --- hw/rtl/VX_socket.sv | 2 +- hw/rtl/core/VX_alu_muldiv.sv | 2 +- hw/rtl/core/VX_mem_unit.sv | 2 +- hw/rtl/fpu/VX_fpu_dpi.sv | 2 +- hw/rtl/fpu/VX_fpu_dsp.sv | 2 +- hw/rtl/libs/VX_priority_encoder.sv | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 9ed76814b..54822176d 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -184,7 +184,7 @@ module VX_socket import VX_gpu_pkg::*; #( .DATA_SIZE (`L1_LINE_SIZE), .TAG_WIDTH (L1_MEM_TAG_WIDTH), .TAG_SEL_IDX (0), - .ARBITER ("R"), + .ARBITER ("P"), // prioritize the icache .REQ_OUT_BUF (3), .RSP_OUT_BUF (3) ) mem_arb ( diff --git a/hw/rtl/core/VX_alu_muldiv.sv b/hw/rtl/core/VX_alu_muldiv.sv index 650c27833..8e3a1ba4f 100644 --- a/hw/rtl/core/VX_alu_muldiv.sv +++ b/hw/rtl/core/VX_alu_muldiv.sv @@ -324,7 +324,7 @@ module VX_alu_muldiv #( VX_stream_arb #( .NUM_INPUTS (2), .DATAW (TAG_WIDTH + (NUM_LANES * `XLEN)), - .ARBITER ("R"), + .ARBITER ("P"), .OUT_BUF (1) ) rsp_buf ( .clk (clk), diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 7a7e9e2db..9f1695a28 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -50,7 +50,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .REQ0_OUT_BUF (3), .REQ1_OUT_BUF (0), .RSP_OUT_BUF (1), - .ARBITER ("R") + .ARBITER ("P") ) lmem_switch ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_dpi.sv b/hw/rtl/fpu/VX_fpu_dpi.sv index 0ba7d54f3..9670241b3 100644 --- a/hw/rtl/fpu/VX_fpu_dpi.sv +++ b/hw/rtl/fpu/VX_fpu_dpi.sv @@ -430,7 +430,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (2), .DATAW (RSP_DATAW), - .ARBITER ("R"), + .ARBITER ("P"), .OUT_BUF (0) ) div_sqrt_arb ( .clk (clk), diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index 5e3738785..bfe0baa05 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -243,7 +243,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (2), .DATAW (RSP_DATAW), - .ARBITER ("R"), + .ARBITER ("P"), .OUT_BUF (0) ) div_sqrt_arb ( .clk (clk), diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index 3dc5291ee..a3928492a 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -43,7 +43,7 @@ module VX_priority_encoder #( end else if (N == 2) begin - assign onehot_out = {~reversed[0], reversed[0]}; + assign onehot_out = {reversed[1] && ~reversed[0], reversed[0]}; assign index_out = ~reversed[0]; assign valid_out = (| reversed); From 4480ed8b0e3e03a3081e0e6afdcabad22f3de8f7 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 27 Aug 2024 01:19:02 -0700 Subject: [PATCH 183/488] minor update --- hw/rtl/libs/VX_cyclic_arbiter.sv | 9 ++++++--- hw/rtl/mem/VX_local_mem.sv | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index e134bea91..a6673c8b7 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -41,6 +41,7 @@ module VX_cyclic_arbiter #( localparam IS_POW2 = (1 << LOG_NUM_REQS) == NUM_REQS; wire [LOG_NUM_REQS-1:0] grant_index_um; + wire [NUM_REQS-1:0] grant_onehot_um; reg [LOG_NUM_REQS-1:0] grant_index_r; always @(posedge clk) begin @@ -59,13 +60,15 @@ module VX_cyclic_arbiter #( .N (NUM_REQS) ) priority_encoder ( .data_in (requests), - `UNUSED_PIN (onehot_out), + .onehot_out (grant_onehot_um), .index_out (grant_index_um), .valid_out (grant_valid) ); - assign grant_index = requests[grant_index_r] ? grant_index_r : grant_index_um; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index; + wire is_hit = requests[grant_index_r]; + + assign grant_index = is_hit ? grant_index_r : grant_index_um; + assign grant_onehot = is_hit ? (NUM_REQS'(1) << grant_index) : grant_onehot_um; end diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 72e55fe8b..6f0c1270e 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -123,7 +123,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .NUM_OUTPUTS (NUM_BANKS), .DATAW (REQ_DATAW), .PERF_CTR_BITS (`PERF_CTR_BITS), - .ARBITER ("R"), + .ARBITER ("C"), .OUT_BUF (3) // output should be registered for the data_store addressing ) req_xbar ( .clk (clk), From c162d04b8fe7604d6672af535a78561ec5dcb21d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 27 Aug 2024 03:17:01 -0700 Subject: [PATCH 184/488] minor update --- hw/rtl/fpu/VX_fpu_dsp.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index bfe0baa05..fcf94591c 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -54,12 +54,12 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( localparam NUM_FPCORES = 4; localparam FPCORES_BITS = `LOG2UP(NUM_FPCORES); - localparam RSP_DATAW = (NUM_LANES * `XLEN) + 1 + $bits(fflags_t) + TAG_WIDTH; + localparam RSP_DATAW = (NUM_LANES * 32) + 1 + $bits(fflags_t) + TAG_WIDTH; `UNUSED_VAR (fmt) wire [NUM_FPCORES-1:0] per_core_ready_in; - wire [NUM_FPCORES-1:0][NUM_LANES-1:0][`XLEN-1:0] per_core_result; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_result; wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_out; wire [NUM_FPCORES-1:0] per_core_ready_out; wire [NUM_FPCORES-1:0] per_core_valid_out; @@ -67,7 +67,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( fflags_t [NUM_FPCORES-1:0] per_core_fflags; wire [1:0] div_sqrt_ready_in; - wire [1:0][NUM_LANES*`XLEN-1:0] div_sqrt_result; + wire [1:0][NUM_LANES*32:0] div_sqrt_result; wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_out; wire [1:0] div_sqrt_ready_out; wire [1:0] div_sqrt_valid_out; From 91b8c6e67a0634b313d6d4af1cf111e91f925b1e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 00:40:28 -0700 Subject: [PATCH 185/488] fixed xilinx fpu ip dut synthesis --- hw/syn/xilinx/dut/common.mk | 6 ++- hw/syn/xilinx/dut/core/Makefile | 1 + hw/syn/xilinx/dut/fpu/Makefile | 1 + hw/syn/xilinx/dut/project.tcl | 52 +++++++++++++------ hw/syn/xilinx/dut/top/Makefile | 1 + hw/syn/xilinx/dut/vortex/Makefile | 1 + hw/syn/xilinx/sandbox/project.tcl.in | 4 +- hw/syn/xilinx/{xrt => }/scripts/gen_ip.tcl | 23 ++++---- hw/syn/xilinx/{xrt => }/scripts/gen_xo.tcl | 0 hw/syn/xilinx/{xrt => scripts}/kill_build.sh | 0 .../xilinx/{xrt => scripts}/kill_hwserver.sh | 0 hw/syn/xilinx/{xrt => scripts}/kill_sim.sh | 0 .../{xrt => }/scripts/package_kernel.tcl | 12 ++--- hw/syn/xilinx/{xrt => }/scripts/xsim.tcl | 0 hw/syn/xilinx/xrt/Makefile | 2 +- 15 files changed, 70 insertions(+), 33 deletions(-) rename hw/syn/xilinx/{xrt => }/scripts/gen_ip.tcl (86%) rename hw/syn/xilinx/{xrt => }/scripts/gen_xo.tcl (100%) rename hw/syn/xilinx/{xrt => scripts}/kill_build.sh (100%) rename hw/syn/xilinx/{xrt => scripts}/kill_hwserver.sh (100%) rename hw/syn/xilinx/{xrt => scripts}/kill_sim.sh (100%) rename hw/syn/xilinx/{xrt => }/scripts/package_kernel.tcl (98%) rename hw/syn/xilinx/{xrt => }/scripts/xsim.tcl (100%) diff --git a/hw/syn/xilinx/dut/common.mk b/hw/syn/xilinx/dut/common.mk index b435b1409..f0588ede8 100644 --- a/hw/syn/xilinx/dut/common.mk +++ b/hw/syn/xilinx/dut/common.mk @@ -25,7 +25,11 @@ project_1/sources.txt: build: $(PROJECT).xpr $(PROJECT).xpr: project_1/sources.txt - $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) +ifdef FPU_IP + FPU_IP=project_1/ip $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) $(SRC_DIR)/../scripts +else + $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) $(SRC_DIR)/../scripts +endif clean: rm -rf project_1 diff --git a/hw/syn/xilinx/dut/core/Makefile b/hw/syn/xilinx/dut/core/Makefile index 86bb0b53c..deda5cce9 100644 --- a/hw/syn/xilinx/dut/core/Makefile +++ b/hw/syn/xilinx/dut/core/Makefile @@ -1,6 +1,7 @@ PROJECT = VX_core_top TOP_LEVEL_ENTITY = $(PROJECT) SRC_FILE = $(PROJECT).sv +FPU_IP = 1 include ../../common.mk diff --git a/hw/syn/xilinx/dut/fpu/Makefile b/hw/syn/xilinx/dut/fpu/Makefile index 133a8a4e9..bb6610375 100644 --- a/hw/syn/xilinx/dut/fpu/Makefile +++ b/hw/syn/xilinx/dut/fpu/Makefile @@ -1,6 +1,7 @@ PROJECT = VX_fpu_dsp TOP_LEVEL_ENTITY = $(PROJECT) SRC_FILE = $(PROJECT).sv +FPU_IP = 1 include ../../common.mk diff --git a/hw/syn/xilinx/dut/project.tcl b/hw/syn/xilinx/dut/project.tcl index c3e7e431c..bd9cb02e7 100644 --- a/hw/syn/xilinx/dut/project.tcl +++ b/hw/syn/xilinx/dut/project.tcl @@ -14,9 +14,9 @@ # Start time set start_time [clock seconds] -if { $::argc != 5 } { +if { $::argc != 6 } { puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" - puts "Usage: $::argv0 \n" + puts "Usage: $::argv0 \n" exit } @@ -28,6 +28,7 @@ set device_part [lindex $::argv 1] set vcs_file [lindex $::argv 2] set xdc_file [lindex $::argv 3] set tool_dir [lindex $::argv 4] +set script_dir [lindex $::argv 5] #puts top_module #puts $device_part @@ -35,6 +36,14 @@ set tool_dir [lindex $::argv 4] #puts xdc_file #puts $tool_dir +# create fpu ip +if {[info exists ::env(FPU_IP)]} { + set ip_dir $::env(FPU_IP) + set argv [list $ip_dir $device_part] + set argc 2 + source ${script_dir}/gen_ip.tcl +} + source "${tool_dir}/parse_vcs_list.tcl" set vlist [parse_vcs_list "${vcs_file}"] @@ -61,25 +70,38 @@ foreach def $vdefines_list { set_property verilog_define $def $obj } +# add fpu ip +if {[info exists ::env(FPU_IP)]} { + set ip_dir $::env(FPU_IP) + add_files -norecurse -verbose ${ip_dir}/xil_fma/xil_fma.xci + add_files -norecurse -verbose ${ip_dir}/xil_fdiv/xil_fdiv.xci + add_files -norecurse -verbose ${ip_dir}/xil_fsqrt/xil_fsqrt.xci +} + +update_compile_order -fileset sources_1 + +set_property top $top_module [current_fileset] +set_property \ + -name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} \ + -value {-mode out_of_context -flatten_hierarchy "rebuilt"} \ + -objects [get_runs synth_1] + # Synthesis -synth_design -top $top_module -include_dirs $vincludes_list -mode out_of_context -flatten_hierarchy none +launch_runs synth_1 +wait_on_run synth_1 +open_run synth_1 write_checkpoint -force post_synth.dcp report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages -# Optimize -opt_design - -# Place -place_design -write_checkpoint -force post_place.dcp -report_place_status -file place.rpt - -# Route -route_design -write_checkpoint -force post_route.dcp -report_route_status -file route.rpt +# Implementation +launch_runs impl_1 +wait_on_run impl_1 +open_run impl_1 +write_checkpoint -force post_impl.dcp # Generate the synthesis report +report_place_status -file place.rpt +report_route_status -file route.rpt report_timing_summary -file timing.rpt report_power -file power.rpt report_drc -file drc.rpt diff --git a/hw/syn/xilinx/dut/top/Makefile b/hw/syn/xilinx/dut/top/Makefile index bc55224f6..0480b08e5 100644 --- a/hw/syn/xilinx/dut/top/Makefile +++ b/hw/syn/xilinx/dut/top/Makefile @@ -1,6 +1,7 @@ PROJECT = vortex_afu TOP_LEVEL_ENTITY = $(PROJECT) SRC_FILE = $(PROJECT).sv +FPU_IP = 1 include ../../common.mk diff --git a/hw/syn/xilinx/dut/vortex/Makefile b/hw/syn/xilinx/dut/vortex/Makefile index ee49be436..e2525fae2 100644 --- a/hw/syn/xilinx/dut/vortex/Makefile +++ b/hw/syn/xilinx/dut/vortex/Makefile @@ -1,6 +1,7 @@ PROJECT = Vortex TOP_LEVEL_ENTITY = $(PROJECT) SRC_FILE = $(PROJECT).sv +FPU_IP = 1 include ../../common.mk diff --git a/hw/syn/xilinx/sandbox/project.tcl.in b/hw/syn/xilinx/sandbox/project.tcl.in index 7a25f6278..0e9a23f0a 100644 --- a/hw/syn/xilinx/sandbox/project.tcl.in +++ b/hw/syn/xilinx/sandbox/project.tcl.in @@ -397,14 +397,16 @@ update_compile_order -fileset sources_1 launch_runs synth_1 wait_on_run synth_1 open_run synth_1 +write_checkpoint -force post_synth.dcp +report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages # Implementation launch_runs impl_1 wait_on_run impl_1 open_run impl_1 +write_checkpoint -force post_impl.dcp # Generate reports -report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages report_place_status -file place.rpt report_route_status -file route.rpt report_timing_summary -file timing.rpt diff --git a/hw/syn/xilinx/xrt/scripts/gen_ip.tcl b/hw/syn/xilinx/scripts/gen_ip.tcl similarity index 86% rename from hw/syn/xilinx/xrt/scripts/gen_ip.tcl rename to hw/syn/xilinx/scripts/gen_ip.tcl index 5aae6db74..a1048fc77 100644 --- a/hw/syn/xilinx/xrt/scripts/gen_ip.tcl +++ b/hw/syn/xilinx/scripts/gen_ip.tcl @@ -1,31 +1,36 @@ # Copyright © 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -if { $::argc != 1 } { - puts "ERROR: Program \"$::argv0\" requires 1 arguments!\n" - puts "Usage: $::argv0 \n" +if { $::argc < 1 || $::argc > 2 } { + puts "ERROR: Program \"$::argv0\" requires 1 or 2 arguments!\n" + puts "Usage: $::argv0 []\n" exit } set ip_dir [lindex $::argv 0] +# create_ip requires that a project is open in memory. +if { $::argc == 2 } { + set device_part [lindex $::argv 1] + create_project -in_memory -part $device_part +} else { + # Create project without specifying a device part + create_project -in_memory +} + # IP folder does not exist. Create IP folder file mkdir ${ip_dir} -# create_ip requires that a project is open in memory. -# Create project but don't do anything with it -create_project -in_memory - create_ip -name floating_point -vendor xilinx.com -library ip -version 7.1 -module_name xil_fdiv -dir ${ip_dir} set_property -dict [list CONFIG.Component_Name {xil_fdiv} CONFIG.Operation_Type {Divide} CONFIG.Flow_Control {NonBlocking} CONFIG.Has_ACLKEN {true} CONFIG.C_Has_UNDERFLOW {true} CONFIG.C_Has_OVERFLOW {true} CONFIG.C_Has_INVALID_OP {true} CONFIG.C_Has_DIVIDE_BY_ZERO {true} CONFIG.A_Precision_Type {Single} CONFIG.C_A_Exponent_Width {8} CONFIG.C_A_Fraction_Width {24} CONFIG.Result_Precision_Type {Single} CONFIG.C_Result_Exponent_Width {8} CONFIG.C_Result_Fraction_Width {24} CONFIG.C_Mult_Usage {No_Usage} CONFIG.Has_RESULT_TREADY {false} CONFIG.C_Latency {28} CONFIG.C_Rate {1}] [get_ips xil_fdiv] diff --git a/hw/syn/xilinx/xrt/scripts/gen_xo.tcl b/hw/syn/xilinx/scripts/gen_xo.tcl similarity index 100% rename from hw/syn/xilinx/xrt/scripts/gen_xo.tcl rename to hw/syn/xilinx/scripts/gen_xo.tcl diff --git a/hw/syn/xilinx/xrt/kill_build.sh b/hw/syn/xilinx/scripts/kill_build.sh similarity index 100% rename from hw/syn/xilinx/xrt/kill_build.sh rename to hw/syn/xilinx/scripts/kill_build.sh diff --git a/hw/syn/xilinx/xrt/kill_hwserver.sh b/hw/syn/xilinx/scripts/kill_hwserver.sh similarity index 100% rename from hw/syn/xilinx/xrt/kill_hwserver.sh rename to hw/syn/xilinx/scripts/kill_hwserver.sh diff --git a/hw/syn/xilinx/xrt/kill_sim.sh b/hw/syn/xilinx/scripts/kill_sim.sh similarity index 100% rename from hw/syn/xilinx/xrt/kill_sim.sh rename to hw/syn/xilinx/scripts/kill_sim.sh diff --git a/hw/syn/xilinx/xrt/scripts/package_kernel.tcl b/hw/syn/xilinx/scripts/package_kernel.tcl similarity index 98% rename from hw/syn/xilinx/xrt/scripts/package_kernel.tcl rename to hw/syn/xilinx/scripts/package_kernel.tcl index 607e7955d..c88bca229 100644 --- a/hw/syn/xilinx/xrt/scripts/package_kernel.tcl +++ b/hw/syn/xilinx/scripts/package_kernel.tcl @@ -1,10 +1,10 @@ # Copyright © 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -51,15 +51,15 @@ create_project -force kernel_pack $path_to_tmp_project add_files -norecurse ${vsources_list} set obj [get_filesets sources_1] -set files [list \ +set ip_files [list \ [file normalize "${build_dir}/ip/xil_fdiv/xil_fdiv.xci"] \ [file normalize "${build_dir}/ip/xil_fma/xil_fma.xci"] \ [file normalize "${build_dir}/ip/xil_fsqrt/xil_fsqrt.xci"] \ ] -add_files -verbose -norecurse -fileset $obj $files +add_files -verbose -norecurse -fileset $obj $ip_files set_property include_dirs ${vincludes_list} [current_fileset] -#set_property verilog_define ${vdefines_list} [current_fileset] +set_property verilog_define ${vdefines_list} [current_fileset] set obj [get_filesets sources_1] set_property -verbose -name "top" -value ${krnl_name} -objects $obj @@ -238,7 +238,7 @@ for {set i 0} {$i < 1} {incr i} { set reg [::ipx::add_register -quiet "MEM_$i" $addr_block] set_property address_offset [expr {0x040 + $i * 12}] $reg set_property size [expr {8*8}] $reg - set regparam [::ipx::add_register_parameter -quiet {ASSOCIATED_BUSIF} $reg] + set regparam [::ipx::add_register_parameter -quiet {ASSOCIATED_BUSIF} $reg] set_property value m_axi_mem_$i $regparam } diff --git a/hw/syn/xilinx/xrt/scripts/xsim.tcl b/hw/syn/xilinx/scripts/xsim.tcl similarity index 100% rename from hw/syn/xilinx/xrt/scripts/xsim.tcl rename to hw/syn/xilinx/scripts/xsim.tcl diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index e1acce8d6..9e86bd1a5 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -163,7 +163,7 @@ $(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml gen-xo: $(XO_CONTAINER) $(XO_CONTAINER): $(BUILD_DIR)/sources.txt - mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/scripts/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) + mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/../scripts/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) gen-bin: $(XCLBIN_CONTAINER) $(XCLBIN_CONTAINER): $(XO_CONTAINER) $(SCOPE_JSON) From f4426e012704082f069b8ee6e2e50d0150edef32 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 01:27:51 -0700 Subject: [PATCH 186/488] fpu timing optimization --- hw/rtl/fpu/VX_fpu_cvt.sv | 2 +- hw/rtl/fpu/VX_fpu_div.sv | 4 ++-- hw/rtl/fpu/VX_fpu_fma.sv | 2 +- hw/rtl/fpu/VX_fpu_ncp.sv | 2 +- hw/rtl/fpu/VX_fpu_sqrt.sv | 4 ++-- hw/rtl/libs/VX_pe_serializer.sv | 32 +++++++++++++------------------- 6 files changed, 20 insertions(+), 26 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index 7587f8342..a2bf93988 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -64,7 +64,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) + .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index 68138bb7c..9cdea7867 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -67,8 +67,8 @@ module VX_fpu_div import VX_fpu_pkg::*; #( .DATA_IN_WIDTH(2*32), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), - .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) + .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs + .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index ce99138cb..c42de701c 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -99,7 +99,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) + .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index bfc69316b..225033e1e 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -69,7 +69,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) + .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index 425f43d6e..c6961e1db 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -61,8 +61,8 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( .DATA_IN_WIDTH(32), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), - .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 2 : 0) + .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs + .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_pe_serializer.sv b/hw/rtl/libs/VX_pe_serializer.sv index 8ae7900b5..7a891cfc7 100644 --- a/hw/rtl/libs/VX_pe_serializer.sv +++ b/hw/rtl/libs/VX_pe_serializer.sv @@ -115,31 +115,25 @@ module VX_pe_serializer #( end end - reg [BATCH_SIZE-1:0][NUM_PES-1:0][DATA_OUT_WIDTH-1:0] data_out_r; - reg [TAG_WIDTH-1:0] tag_out_r; - reg valid_out_r; + reg [BATCH_SIZE-1:0][NUM_PES-1:0][DATA_OUT_WIDTH-1:0] data_out_r, data_out_n; - wire valid_out_b = pe_valid_in && batch_out_done; - wire ready_out_b = ready_out_u || ~valid_out_u; - - always @(posedge clk) begin - if (reset) begin - valid_out_r <= 1'b0; - end else if (ready_out_b) begin - valid_out_r <= valid_out_b; - end - if (ready_out_b) begin - data_out_r[batch_out_idx] <= pe_data_in; - tag_out_r <= pe_tag_in; + always @(*) begin + data_out_n = data_out_r; + if (pe_valid_in) begin + data_out_n[batch_out_idx] = pe_data_in; end end - assign enable = ready_out_b || ~valid_out_b; + always @(posedge clk) begin + data_out_r <= data_out_n; + end + + assign enable = ready_out_u || ~batch_out_done; assign ready_in = enable && batch_in_done; - assign valid_out_u = valid_out_r; - assign data_out_u = data_out_r; - assign tag_out_u = tag_out_r; + assign valid_out_u = batch_out_done; + assign data_out_u = data_out_n; + assign tag_out_u = pe_tag_in; end else begin From cf42025c205a71864e0f993aadb05dbdef448476 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 01:35:55 -0700 Subject: [PATCH 187/488] minor update --- hw/rtl/fpu/VX_fpu_dsp.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index fcf94591c..b692d2cda 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -67,7 +67,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( fflags_t [NUM_FPCORES-1:0] per_core_fflags; wire [1:0] div_sqrt_ready_in; - wire [1:0][NUM_LANES*32:0] div_sqrt_result; + wire [1:0][NUM_LANES*32-1:0] div_sqrt_result; wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_out; wire [1:0] div_sqrt_ready_out; wire [1:0] div_sqrt_valid_out; From 4cc7426c441c6552736223dfa1f2973f74e0326c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 02:52:20 -0700 Subject: [PATCH 188/488] minor update --- hw/rtl/cache/VX_cache_bank.sv | 6 +++--- hw/rtl/cache/VX_cache_data.sv | 4 ++-- hw/rtl/cache/VX_cache_wrap.sv | 6 +++--- hw/rtl/libs/VX_mem_adapter.sv | 2 +- hw/rtl/libs/VX_mem_coalescer.sv | 2 +- hw/rtl/mem/VX_local_mem.sv | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 19c24ad5d..883a561a1 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -673,7 +673,7 @@ module VX_cache_bank #( end if (core_req_fire) begin if (core_req_rw) - `TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)); + `TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)); else `TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)); end @@ -682,9 +682,9 @@ module VX_cache_bank #( end if (mreq_queue_push) begin if (do_creq_wr_st1 && !WRITEBACK) - `TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)); + `TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)); else if (do_writeback_st1) - `TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)); + `TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)); else `TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)); end diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index efc873f41..318463f76 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -185,13 +185,13 @@ module VX_cache_data #( `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)); end if (flush && ~stall) begin - `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data)); + `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data)); end if (read && ~stall) begin `TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid)); end if (write && ~stall) begin - `TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)); + `TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)); end end `endif diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 3b1076d46..513c29b5d 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -158,7 +158,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( end if (PASSTHRU == 0) begin : cache_if - + VX_cache #( .INSTANCE_ID (INSTANCE_ID), .CACHE_SIZE (CACHE_SIZE), @@ -234,7 +234,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (core_req_fire) begin if (core_bus_if[i].req_data.rw) - `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)); + `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)); else `TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)); end @@ -261,7 +261,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (mem_req_fire) begin if (mem_bus_if.req_data.rw) - `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n", + `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid)); else `TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 263df0159..068628be2 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -153,7 +153,7 @@ module VX_mem_adapter #( end assign mem_rsp_tag_in_x = (rsp_ctr != 0) ? mem_rsp_tag_in_r : mem_rsp_tag_out; `RUNTIME_ASSERT(!mem_rsp_in_fire || (mem_rsp_tag_in_x == mem_rsp_tag_out), - ("%t: *** out-of-order memory reponse! cur=%d, expected=%d", $time, mem_rsp_tag_in_x, mem_rsp_tag_out)) + ("%t: *** out-of-order memory reponse! cur=0x%0h, expected=0x%0h", $time, mem_rsp_tag_in_x, mem_rsp_tag_out)) wire [SRC_ADDR_WIDTH+D-1:0] mem_req_addr_in_qual = {mem_req_addr_in, req_ctr}; diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 17e5923bd..75563b71b 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -360,7 +360,7 @@ module VX_mem_coalescer #( `TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS); `TRACE(1, (", pmask=%b, tag=0x%0h (#%0d)\n", out_req_pmask, out_req_tag, out_req_uuid)); if ($countones(out_req_pmask) > 1) begin - `TRACE(1, ("%t: *** %s: coalesced=%d (#%0d)\n", $time, INSTANCE_ID, $countones(out_req_pmask), out_req_uuid)); + `TRACE(1, ("%t: *** %s: coalesced=%0d (#%0d)\n", $time, INSTANCE_ID, $countones(out_req_pmask), out_req_uuid)); end end if (out_rsp_fire) begin diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 6f0c1270e..2ba09fd61 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -334,7 +334,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( always @(posedge clk) begin if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin if (mem_bus_if[i].req_data.rw) begin - `TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n", + `TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i])); end else begin `TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", @@ -352,7 +352,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( always @(posedge clk) begin if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin if (per_bank_req_rw[i]) begin - `TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n", + `TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i])); end else begin `TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", From 6c1e7850046c606aac2e1b1d6b56ea31141e2def Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 03:08:08 -0700 Subject: [PATCH 189/488] minor update --- hw/rtl/core/VX_alu_int.sv | 2 +- hw/rtl/libs/VX_mem_coalescer.sv | 11 ++++------- hw/rtl/libs/VX_mem_scheduler.sv | 12 ++++++------ hw/rtl/mem/VX_gbar_unit.sv | 14 +++++++------- 4 files changed, 18 insertions(+), 21 deletions(-) diff --git a/hw/rtl/core/VX_alu_int.sv b/hw/rtl/core/VX_alu_int.sv index 47bfcc6bf..06acfde39 100644 --- a/hw/rtl/core/VX_alu_int.sv +++ b/hw/rtl/core/VX_alu_int.sv @@ -194,7 +194,7 @@ module VX_alu_int #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (br_enable) begin - `TRACE(1, ("%d: %s-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", + `TRACE(1, ("%d: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", $time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid)); end end diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 75563b71b..e15d06564 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -342,7 +342,7 @@ module VX_mem_coalescer #( always @(posedge clk) begin if (out_req_fire) begin if (out_req_rw) begin - `TRACE(1, ("%d: %s-out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); + `TRACE(1, ("%d: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); `TRACE(1, (", flags=")); `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS); @@ -351,20 +351,17 @@ module VX_mem_coalescer #( `TRACE(1, (", data=")); `TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS); end else begin - `TRACE(1, ("%d: %s-out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); + `TRACE(1, ("%d: %s out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); `TRACE(1, (", flags=")); `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS); end `TRACE(1, (", offset=")); `TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS); - `TRACE(1, (", pmask=%b, tag=0x%0h (#%0d)\n", out_req_pmask, out_req_tag, out_req_uuid)); - if ($countones(out_req_pmask) > 1) begin - `TRACE(1, ("%t: *** %s: coalesced=%0d (#%0d)\n", $time, INSTANCE_ID, $countones(out_req_pmask), out_req_uuid)); - end + `TRACE(1, (", pmask=%b, coalesced=%0d, tag=0x%0h (#%0d)\n", out_req_pmask, $countones(out_req_pmask), out_req_tag, out_req_uuid)); end if (out_rsp_fire) begin - `TRACE(1, ("%d: %s-out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)); + `TRACE(1, ("%d: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)); `TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS); `TRACE(1, (", offset=")); `TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS); diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 5324d7ffa..c5b302177 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -584,39 +584,39 @@ module VX_mem_scheduler #( always @(posedge clk) begin if (core_req_fire) begin if (core_req_rw) begin - `TRACE(1, ("%d: %s-core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); + `TRACE(1, ("%d: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS); `TRACE(1, (", byteen=")); `TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS); `TRACE(1, (", data=")); `TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS); end else begin - `TRACE(1, ("%d: %s-core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); + `TRACE(1, ("%d: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS); end `TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid)); end if (core_rsp_valid && core_rsp_ready) begin - `TRACE(1, ("%d: %s-core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)); + `TRACE(1, ("%d: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)); `TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS); `TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid)); end if (| mem_req_fire_s) begin if (| mem_req_rw_s) begin - `TRACE(1, ("%d: %s-mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)); + `TRACE(1, ("%d: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)); `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS); `TRACE(1, (", byteen=")); `TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS); `TRACE(1, (", data=")); `TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS); end else begin - `TRACE(1, ("%d: %s-mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)); + `TRACE(1, ("%d: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)); `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS); end `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid)); end if (mem_rsp_fire_s) begin - `TRACE(1, ("%d: %s-mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)); + `TRACE(1, ("%d: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)); `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS); `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)); end diff --git a/hw/rtl/mem/VX_gbar_unit.sv b/hw/rtl/mem/VX_gbar_unit.sv index a6e5d9baa..3e5bbebcb 100644 --- a/hw/rtl/mem/VX_gbar_unit.sv +++ b/hw/rtl/mem/VX_gbar_unit.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,7 +13,7 @@ `include "VX_define.vh" -module VX_gbar_unit #( +module VX_gbar_unit #( parameter `STRING INSTANCE_ID = "" ) ( input wire clk, @@ -26,7 +26,7 @@ module VX_gbar_unit #( reg [`NB_WIDTH-1:0][`NUM_CORES-1:0] barrier_masks; wire [`CLOG2(`NUM_CORES+1)-1:0] active_barrier_count; wire [`NUM_CORES-1:0] curr_barrier_mask = barrier_masks[gbar_bus_if.req_id]; - + `POP_COUNT(active_barrier_count, curr_barrier_mask); `UNUSED_VAR (active_barrier_count) @@ -56,15 +56,15 @@ module VX_gbar_unit #( assign gbar_bus_if.rsp_valid = rsp_valid; assign gbar_bus_if.rsp_id = rsp_bar_id; assign gbar_bus_if.req_ready = 1; // global barrier unit is always ready (no dependencies) - + `ifdef DBG_TRACE_GBAR always @(posedge clk) begin if (gbar_bus_if.req_valid && gbar_bus_if.req_ready) begin - `TRACE(1, ("%d: %s-acquire: bar_id=%0d, size=%0d, core_id=%0d\n", + `TRACE(1, ("%d: %s acquire: bar_id=%0d, size=%0d, core_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.req_id, gbar_bus_if.req_size_m1, gbar_bus_if.req_core_id)); end if (gbar_bus_if.rsp_valid) begin - `TRACE(1, ("%d: %s-release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)); + `TRACE(1, ("%d: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)); end end `endif From 74a47ebbe473b80f9eb0f191944b1eb949804aaf Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 04:36:13 -0700 Subject: [PATCH 190/488] displatch unit fix --- hw/rtl/core/VX_dispatch_unit.sv | 20 ++++++++++++++------ hw/rtl/core/VX_lsu_slice.sv | 4 ++-- hw/rtl/fpu/VX_fpu_dsp.sv | 2 +- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/hw/rtl/core/VX_dispatch_unit.sv b/hw/rtl/core/VX_dispatch_unit.sv index 5e6893e97..0bd4b45c4 100644 --- a/hw/rtl/core/VX_dispatch_unit.sv +++ b/hw/rtl/core/VX_dispatch_unit.sv @@ -68,8 +68,9 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( // batch select logic logic [BATCH_COUNT_W-1:0] batch_idx; - if (BATCH_COUNT != 1) begin + if (BATCH_COUNT != 1) begin + wire [BATCH_COUNT_W-1:0] batch_idx_n; wire [BATCH_COUNT-1:0] valid_batches; for (genvar i = 0; i < BATCH_COUNT; ++i) begin assign valid_batches[i] = | dispatch_valid[i * BLOCK_SIZE +: BLOCK_SIZE]; @@ -82,12 +83,19 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( .clk (clk), .reset (reset), .requests (valid_batches), - .grant_index (batch_idx), + .grant_index (batch_idx_n), `UNUSED_PIN (grant_onehot), `UNUSED_PIN (grant_valid), .grant_ready (batch_done) ); + always @(posedge clk) begin + if (reset) begin + batch_idx <= '0; + end else if (batch_done) begin + batch_idx <= batch_idx_n; + end + end end else begin assign batch_idx = 0; `UNUSED_VAR (batch_done) @@ -98,12 +106,12 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( assign issue_indices[block_idx] = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx); end - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : blocks wire [ISSUE_W-1:0] issue_idx = issue_indices[block_idx]; wire valid_p, ready_p; - if (`NUM_THREADS != NUM_LANES) begin + if (`NUM_THREADS != NUM_LANES) begin : threads_split reg [NUM_PACKETS-1:0] sent_mask_p; wire [PID_WIDTH-1:0] start_p_n, start_p, end_p; wire dispatch_valid_r; @@ -206,7 +214,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( end else begin assign block_ready[block_idx] = ready_p && block_enable; end - assign block_done[block_idx] = ~dispatch_valid[issue_idx] || fire_eop; + assign block_done[block_idx] = fire_eop || ~dispatch_valid[issue_idx]; end else begin assign valid_p = dispatch_valid[issue_idx]; assign block_tmask[block_idx] = dispatch_data[issue_idx][DATA_TMASK_OFF +: `NUM_THREADS]; @@ -217,7 +225,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( assign block_sop[block_idx] = 1'b1; assign block_eop[block_idx] = 1'b1; assign block_ready[block_idx] = ready_p; - assign block_done[block_idx] = ~valid_p || ready_p; + assign block_done[block_idx] = ready_p || ~valid_p; end wire [ISSUE_ISW_W-1:0] isw; diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 8c277f3e9..25a8223a8 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -517,13 +517,13 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES); `TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen)); `TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES); - `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if.data.uuid)); + `TRACE(1, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)); end else begin `TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES); `TRACE(1, (", flags=")); `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES); - `TRACE(1, (", byteen=0x%0h, rd=%0d, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, mem_req_tag, execute_if.data.uuid)); + `TRACE(1, (", byteen=0x%0h, rd=%0d, sop=%b, eop=%b, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)); end end if (mem_rsp_fire) begin diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index b692d2cda..0f0e551b7 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -67,7 +67,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( fflags_t [NUM_FPCORES-1:0] per_core_fflags; wire [1:0] div_sqrt_ready_in; - wire [1:0][NUM_LANES*32-1:0] div_sqrt_result; + wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_result; wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_out; wire [1:0] div_sqrt_ready_out; wire [1:0] div_sqrt_valid_out; From 41e41c9688ee04e7af107a96836f6469d22b15c1 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 18:46:30 -0700 Subject: [PATCH 191/488] adjust SimX's split/join to match RTL. --- sim/simx/emulator.cpp | 13 +------------ sim/simx/emulator.h | 11 ++++++++--- sim/simx/execute.cpp | 15 ++++++++------- tests/regression/dogfood/main.cpp | 2 +- 4 files changed, 18 insertions(+), 23 deletions(-) diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 7ed9a10f9..14e213ba6 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -30,17 +30,6 @@ using namespace vortex; -Emulator::ipdom_entry_t::ipdom_entry_t(const ThreadMask &tmask, Word PC) - : tmask(tmask) - , PC(PC) - , fallthrough(false) -{} - -Emulator::ipdom_entry_t::ipdom_entry_t(const ThreadMask &tmask) - : tmask(tmask) - , fallthrough(true) -{} - Emulator::warp_t::warp_t(const Arch& arch) : ireg_file(arch.num_threads(), std::vector(MAX_NUM_REGS)) , freg_file(arch.num_threads(), std::vector(MAX_NUM_REGS)) @@ -85,7 +74,7 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core) , core_(core) , warps_(arch.num_warps(), arch) , barriers_(arch.num_barriers(), 0) - , ipdom_size_((arch.num_threads()-1) * 2) + , ipdom_size_(arch.num_threads()-1) { this->clear(); } diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index de466d352..c37bbd68b 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -57,10 +57,15 @@ public: private: struct ipdom_entry_t { - ipdom_entry_t(const ThreadMask &tmask, Word PC); - ipdom_entry_t(const ThreadMask &tmask); + ipdom_entry_t(const ThreadMask &orig_tmask, const ThreadMask &else_tmask, Word PC) + : orig_tmask (orig_tmask) + , else_tmask (else_tmask) + , PC (PC) + , fallthrough(false) + {} - ThreadMask tmask; + ThreadMask orig_tmask; + ThreadMask else_tmask; Word PC; bool fallthrough; }; diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index db098726b..9f7e6a74a 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -1347,11 +1347,9 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { } else { next_tmask = else_tmask; } - // push reconvergence thread mask onto the stack - warp.ipdom_stack.emplace(warp.tmask); - // push not taken thread mask onto the stack + // push reconvergence and not-taken thread mask onto the stack auto ntaken_tmask = ~next_tmask & warp.tmask; - warp.ipdom_stack.emplace(ntaken_tmask, next_pc); + warp.ipdom_stack.emplace(warp.tmask, ntaken_tmask, next_pc); } // return divergent state for (uint32_t t = thread_start; t < num_threads; ++t) { @@ -1372,11 +1370,14 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { std::cout << "IPDOM stack is empty!\n" << std::flush; std::abort(); } - next_tmask = warp.ipdom_stack.top().tmask; - if (!warp.ipdom_stack.top().fallthrough) { + if (warp.ipdom_stack.top().fallthrough) { + next_tmask = warp.ipdom_stack.top().orig_tmask; + warp.ipdom_stack.pop(); + } else { + next_tmask = warp.ipdom_stack.top().else_tmask; next_pc = warp.ipdom_stack.top().PC; + warp.ipdom_stack.top().fallthrough = true; } - warp.ipdom_stack.pop(); } } break; case 4: { diff --git a/tests/regression/dogfood/main.cpp b/tests/regression/dogfood/main.cpp index 1fcf9d511..d308821f0 100644 --- a/tests/regression/dogfood/main.cpp +++ b/tests/regression/dogfood/main.cpp @@ -12,7 +12,7 @@ TestSuite* testSuite = nullptr; const char* kernel_file = "kernel.vxbin"; -int count = 1; +int count = 64; std::unordered_set selected; std::unordered_set excluded; int testid_s = 0; From 0f41774fea57748fda1bcea2ea02fe88f19f946a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 19:07:15 -0700 Subject: [PATCH 192/488] SimX's decode minor fix --- sim/simx/decode.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sim/simx/decode.cpp b/sim/simx/decode.cpp index dba57c4ef..c8af41b26 100644 --- a/sim/simx/decode.cpp +++ b/sim/simx/decode.cpp @@ -460,6 +460,11 @@ std::shared_ptr Emulator::decode(uint32_t code) const { switch (op) { case Opcode::FCI: switch (func7) { + case 0x20: // FCVT.S.D + case 0x21: // FCVT.D.S + instr->setDestReg(rd, RegType::Float); + instr->addSrcReg(rs1, RegType::Float); + break; case 0x2c: // FSQRT.S case 0x2d: // FSQRT.D instr->setDestReg(rd, RegType::Float); From a38960674ef2dca89f3d802bf532a03d0daca11f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 21:10:05 -0700 Subject: [PATCH 193/488] SimX split.N fix --- sim/simx/decode.cpp | 12 ++++++------ sim/simx/emulator.cpp | 6 ++---- sim/simx/execute.cpp | 7 ++----- sim/simx/types.h | 21 +++++++++++++++++++++ 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/sim/simx/decode.cpp b/sim/simx/decode.cpp index c8af41b26..795a05eed 100644 --- a/sim/simx/decode.cpp +++ b/sim/simx/decode.cpp @@ -86,7 +86,7 @@ static const char* op_string(const Instr &instr) { auto func3 = instr.getFunc3(); auto func7 = instr.getFunc7(); auto rd = instr.getRDest(); - auto rs2 = instr.getRSrc(1); + auto rs1 = instr.getRSrc(1); auto imm = instr.getImm(); switch (opcode) { @@ -343,7 +343,7 @@ static const char* op_string(const Instr &instr) { std::abort(); } case 0x60: - switch (rs2) { + switch (rs1) { case 0: return "FCVT.W.S"; case 1: return "FCVT.WU.S"; case 2: return "FCVT.L.S"; @@ -352,7 +352,7 @@ static const char* op_string(const Instr &instr) { std::abort(); } case 0x61: - switch (rs2) { + switch (rs1) { case 0: return "FCVT.W.D"; case 1: return "FCVT.WU.D"; case 2: return "FCVT.L.D"; @@ -361,7 +361,7 @@ static const char* op_string(const Instr &instr) { std::abort(); } case 0x68: - switch (rs2) { + switch (rs1) { case 0: return "FCVT.S.W"; case 1: return "FCVT.S.WU"; case 2: return "FCVT.S.L"; @@ -370,7 +370,7 @@ static const char* op_string(const Instr &instr) { std::abort(); } case 0x69: - switch (rs2) { + switch (rs1) { case 0: return "FCVT.D.W"; case 1: return "FCVT.D.WU"; case 2: return "FCVT.D.L"; @@ -395,7 +395,7 @@ static const char* op_string(const Instr &instr) { switch (func3) { case 0: return "TMC"; case 1: return "WSPAWN"; - case 2: return rs2 ? "SPLIT.N" : "SPLIT"; + case 2: return rs1 ? "SPLIT.N" : "SPLIT"; case 3: return "JOIN"; case 4: return "BAR"; case 5: return rd ? "PRED.N" : "PRED"; diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 14e213ba6..88a0ecff3 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -162,10 +162,8 @@ instr_trace_t* Emulator::step() { uint64_t uuid = 0; #endif - DPH(1, "Fetch: cid=" << core_->id() << ", wid=" << scheduled_warp << ", tmask="); - for (uint32_t i = 0, n = arch_.num_threads(); i < n; ++i) - DPN(1, warp.tmask.test(i)); - DPN(1, ", PC=0x" << std::hex << warp.PC << " (#" << std::dec << uuid << ")" << std::endl); + DP(1, "Fetch: cid=" << core_->id() << ", wid=" << scheduled_warp << ", tmask=" << ThreadMaskOS(warp.tmask, arch_.num_threads()) + << ", PC=0x" << std::hex << warp.PC << " (#" << std::dec << uuid << ")"); // Fetch uint32_t instr_code = 0; diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index 9f7e6a74a..ca4dac8d4 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -1328,7 +1328,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { auto stack_size = warp.ipdom_stack.size(); ThreadMask then_tmask, else_tmask; - auto not_pred = rsrc2 & 0x1; + auto not_pred = (rsrc1 != 0); for (uint32_t t = 0; t < num_threads; ++t) { auto cond = (warp.ireg_file.at(t).at(rsrc0) & 0x1) ^ not_pred; then_tmask[t] = warp.tmask.test(t) && cond; @@ -1472,10 +1472,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { } if (warp.tmask != next_tmask) { - DPH(3, "*** New Tmask="); - for (uint32_t i = 0; i < num_threads; ++i) - DPN(3, next_tmask.test(i)); - DPN(3, std::endl); + DP(3, "*** New Tmask=" << ThreadMaskOS(next_tmask, num_threads)); warp.tmask = next_tmask; if (!next_tmask.any()) { active_warps_.reset(wid); diff --git a/sim/simx/types.h b/sim/simx/types.h index b452dd379..17cf1685f 100644 --- a/sim/simx/types.h +++ b/sim/simx/types.h @@ -58,6 +58,27 @@ typedef std::bitset WarpMask; /////////////////////////////////////////////////////////////////////////////// +class ThreadMaskOS { +public: + ThreadMaskOS(const ThreadMask& mask, int size) + : mask_(mask) + , size_(size) + {} + + friend std::ostream& operator<<(std::ostream& os, const ThreadMaskOS& wrapper) { + for (int i = 0; i < wrapper.size_; ++i) { + os << wrapper.mask_[i]; + } + return os; + } + +private: + const ThreadMask& mask_; + int size_; +}; + +/////////////////////////////////////////////////////////////////////////////// + enum class RegType { None, Integer, From fa1fd396456aec14602437b1f394ed74440a8c20 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 28 Aug 2024 21:31:09 -0700 Subject: [PATCH 194/488] minor updates --- hw/rtl/cache/VX_cache.sv | 2 +- hw/rtl/cache/VX_cache_cluster.sv | 2 +- hw/rtl/cache/VX_cache_tags.sv | 2 +- hw/rtl/core/VX_alu_unit.sv | 2 +- hw/rtl/core/VX_commit.sv | 2 +- hw/rtl/core/VX_dispatch.sv | 2 +- hw/rtl/core/VX_fetch.sv | 2 +- hw/rtl/core/VX_fpu_unit.sv | 2 +- hw/rtl/core/VX_gather_unit.sv | 2 +- hw/rtl/core/VX_ibuffer.sv | 2 +- hw/rtl/core/VX_lsu_unit.sv | 2 +- hw/rtl/core/VX_mem_unit.sv | 8 ++++---- hw/rtl/core/VX_operands.sv | 2 +- hw/rtl/core/VX_schedule.sv | 2 +- hw/rtl/core/VX_scoreboard.sv | 2 +- hw/rtl/core/VX_split_join.sv | 2 +- hw/rtl/fpu/VX_fpu_cvt.sv | 2 +- hw/rtl/fpu/VX_fpu_div.sv | 6 +++--- hw/rtl/fpu/VX_fpu_fma.sv | 6 +++--- hw/rtl/fpu/VX_fpu_fpnew.sv | 2 +- hw/rtl/fpu/VX_fpu_ncp.sv | 2 +- hw/rtl/fpu/VX_fpu_sqrt.sv | 6 +++--- hw/rtl/libs/VX_avs_adapter.sv | 14 ++++++-------- 23 files changed, 37 insertions(+), 39 deletions(-) diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 90b34a1e4..6d3e1351e 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -136,7 +136,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s; wire [NUM_REQS-1:0] core_rsp_ready_s; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : core_rsp_bufs VX_elastic_buffer #( .DATAW (`CS_WORD_WIDTH + TAG_WIDTH), .SIZE (CORE_RSP_REG_DISABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index a56c9a817..dbf4ffec7 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -102,7 +102,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .TAG_WIDTH (ARB_TAG_WIDTH) ) arb_core_bus_if[NUM_CACHES * NUM_REQS](); - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : core_arbs VX_mem_bus_if #( .DATA_SIZE (WORD_SIZE), .TAG_WIDTH (TAG_WIDTH) diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 6c6ac92f2..4d5b0bcd3 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -100,7 +100,7 @@ module VX_cache_tags #( wire fill_s = fill && (!WRITEBACK || ~stall); wire flush_s = flush && (!WRITEBACK || ~stall); - for (genvar i = 0; i < NUM_WAYS; ++i) begin : ways + for (genvar i = 0; i < NUM_WAYS; ++i) begin : tag_stores wire do_fill = fill_s && evict_way[i]; wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 7ab808c70..8b2bf7363 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -55,7 +55,7 @@ module VX_alu_unit #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : alu_blocks + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : alus `RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1)); diff --git a/hw/rtl/core/VX_commit.sv b/hw/rtl/core/VX_commit.sv index f945c7903..160bcf4d4 100644 --- a/hw/rtl/core/VX_commit.sv +++ b/hw/rtl/core/VX_commit.sv @@ -41,7 +41,7 @@ module VX_commit import VX_gpu_pkg::*; #( wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] per_issue_commit_tmask; wire [`ISSUE_WIDTH-1:0] per_issue_commit_eop; - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : commit_arbs wire [`NUM_EX_UNITS-1:0] valid_in; wire [`NUM_EX_UNITS-1:0][DATAW-1:0] data_in; diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index 3fe98ba93..4326298a1 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -53,7 +53,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( wire [`NUM_EX_UNITS-1:0] operands_ready_in; assign operands_if.ready = operands_ready_in[operands_if.data.ex_type]; - for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin + for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : buffers VX_elastic_buffer #( .DATAW (DATAW), .SIZE (2), diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index de622bd1d..44f3e51da 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -71,7 +71,7 @@ module VX_fetch import VX_gpu_pkg::*; #( // This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache requests. // This issue is particularly prevalent when the icache and dcache are disabled and both requests share the same bus. wire [`NUM_WARPS-1:0] pending_ibuf_full; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : pending_reads VX_pending_size #( .SIZE (`IBUF_SIZE) ) pending_reads ( diff --git a/hw/rtl/core/VX_fpu_unit.sv b/hw/rtl/core/VX_fpu_unit.sv index ae36e4b22..0d7f02311 100644 --- a/hw/rtl/core/VX_fpu_unit.sv +++ b/hw/rtl/core/VX_fpu_unit.sv @@ -53,7 +53,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : fpu_blocks + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : fpus `UNUSED_VAR (per_block_execute_if[block_idx].data.tid) `UNUSED_VAR (per_block_execute_if[block_idx].data.wb) diff --git a/hw/rtl/core/VX_gather_unit.sv b/hw/rtl/core/VX_gather_unit.sv index 402824dac..69295321b 100644 --- a/hw/rtl/core/VX_gather_unit.sv +++ b/hw/rtl/core/VX_gather_unit.sv @@ -74,7 +74,7 @@ module VX_gather_unit import VX_gpu_pkg::*; #( assign commit_in_ready[i] = commit_out_ready[commit_in_isw[i]]; end - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin: out_bufs VX_commit_if #( .NUM_LANES (NUM_LANES) ) commit_tmp_if(); diff --git a/hw/rtl/core/VX_ibuffer.sv b/hw/rtl/core/VX_ibuffer.sv index 6f068d45f..f5d879f33 100644 --- a/hw/rtl/core/VX_ibuffer.sv +++ b/hw/rtl/core/VX_ibuffer.sv @@ -35,7 +35,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #( wire [PER_ISSUE_WARPS-1:0] ibuf_ready_in; assign decode_if.ready = ibuf_ready_in[decode_if.data.wid]; - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : ibuf_slices + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : instr_bufs VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`IBUF_SIZE), diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index 425f1aeee..b155ed0d7 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -54,7 +54,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsu_blocks + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsus `RESET_RELAY_EN (slice_reset, reset, (BLOCK_SIZE > 1)); diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 9f1695a28..bb00df0b5 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -45,7 +45,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (LSU_TAG_WIDTH) ) lsu_lmem_if[`NUM_LSU_BLOCKS](); - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : demux_slices + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : lmem_switches VX_lmem_switch #( .REQ0_OUT_BUF (3), .REQ1_OUT_BUF (0), @@ -65,7 +65,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (LSU_TAG_WIDTH) ) lmem_bus_if[LSU_NUM_REQS](); - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : lmem_adapter_slices + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : lmem_adapters VX_mem_bus_if #( .DATA_SIZE (LSU_WORD_SIZE), .TAG_WIDTH (LSU_TAG_WIDTH) @@ -131,7 +131,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescer_blocks + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescers `RESET_RELAY (mem_coalescer_reset, reset); @@ -195,7 +195,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( end - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : dcache_adapter_slices + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : dcache_adapters VX_mem_bus_if #( .DATA_SIZE (DCACHE_WORD_SIZE), diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index f47b4964f..d84c1a072 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -246,7 +246,7 @@ module VX_operands import VX_gpu_pkg::*; #( assign gpr_wr_bank_idx = '0; end - for (genvar b = 0; b < NUM_BANKS; ++b) begin + for (genvar b = 0; b < NUM_BANKS; ++b) begin : gpr_rams wire gpr_wr_enabled; if (BANK_SEL_BITS != 0) begin assign gpr_wr_enabled = writeback_if.valid diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index fbe0bd959..b1b855aaf 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -379,7 +379,7 @@ module VX_schedule import VX_gpu_pkg::*; #( `RESET_RELAY (pending_instr_reset, reset); - for (genvar i = 0; i < `NUM_WARPS; ++i) begin + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : pending_sizes VX_pending_size #( .SIZE (4096), .ALM_EMPTY (1) diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index 503cc22c8..b2d9ff2be 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -101,7 +101,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end `endif - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : stanging_bufs VX_pipe_buffer #( .DATAW (DATAW) ) stanging_buf ( diff --git a/hw/rtl/core/VX_split_join.sv b/hw/rtl/core/VX_split_join.sv index 4b58ebc26..8689d216d 100644 --- a/hw/rtl/core/VX_split_join.sv +++ b/hw/rtl/core/VX_split_join.sv @@ -45,7 +45,7 @@ module VX_split_join import VX_gpu_pkg::*; #( wire ipdom_push = valid && split.valid && split.is_dvg; wire ipdom_pop = valid && sjoin.valid && sjoin_is_dvg; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin : ipdom_slices + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : ipdom_stacks VX_ipdom_stack #( .WIDTH (`NUM_THREADS+`PC_BITS), .DEPTH (`DV_STACK_SIZE) diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index a2bf93988..94dee7316 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -86,7 +86,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fcvt_units VX_fcvt_unit #( .LATENCY (`LATENCY_FCVT), .OUT_REG (1) diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index 9cdea7867..ea63387d7 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -94,7 +94,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `ifdef QUARTUS - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fdivs acl_fdiv fdiv ( .clk (clk), .areset (1'b0), @@ -112,7 +112,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `elsif VIVADO - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fdivs wire [3:0] tuser; xil_fdiv fdiv ( .aclk (clk), @@ -134,7 +134,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `else - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin fdivs reg [63:0] r; `UNUSED_VAR (r) fflags_t f; diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index c42de701c..331074cf0 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -125,7 +125,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `ifdef QUARTUS - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fmadds acl_fmadd fmadd ( .clk (clk), .areset (1'b0), @@ -143,7 +143,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `elsif VIVADO - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fmas wire [2:0] tuser; xil_fma fma ( @@ -168,7 +168,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `else - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fmas reg [63:0] r; `UNUSED_VAR (r) fflags_t f; diff --git a/hw/rtl/fpu/VX_fpu_fpnew.sv b/hw/rtl/fpu/VX_fpu_fpnew.sv index ad95f0347..85e790996 100644 --- a/hw/rtl/fpu/VX_fpu_fpnew.sv +++ b/hw/rtl/fpu/VX_fpu_fpnew.sv @@ -162,7 +162,7 @@ module VX_fpu_fpnew end `UNUSED_VAR (mask_in) - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : fpnew_cores wire [(TAG_WIDTH+1)-1:0] fpu_tag; wire fpu_valid_out_uq; wire fpu_ready_in_uq; diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index 225033e1e..52b2979b6 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -91,7 +91,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fncp_units VX_fncp_unit #( .LATENCY (`LATENCY_FNCP), .OUT_REG (1) diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index c6961e1db..f6c542fc3 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -88,7 +88,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `ifdef QUARTUS - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fsqrts acl_fsqrt fsqrt ( .clk (clk), .areset (1'b0), @@ -105,7 +105,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `elsif VIVADO - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fsqrts wire tuser; xil_fsqrt fsqrt ( @@ -126,7 +126,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `else - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : fsqrts reg [63:0] r; `UNUSED_VAR (r) fflags_t f; diff --git a/hw/rtl/libs/VX_avs_adapter.sv b/hw/rtl/libs/VX_avs_adapter.sv index f0941b028..046c32bfc 100644 --- a/hw/rtl/libs/VX_avs_adapter.sv +++ b/hw/rtl/libs/VX_avs_adapter.sv @@ -64,7 +64,6 @@ module VX_avs_adapter #( wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out; wire [NUM_BANKS-1:0] req_queue_going_full; - wire [NUM_BANKS-1:0][RD_QUEUE_ADDR_WIDTH-1:0] req_queue_size; wire [BANK_ADDRW-1:0] req_bank_sel; wire [BANK_OFFSETW-1:0] req_bank_off; wire [NUM_BANKS-1:0] bank_req_ready; @@ -81,8 +80,7 @@ module VX_avs_adapter #( assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i); end - for (genvar i = 0; i < NUM_BANKS; ++i) begin - + for (genvar i = 0; i < NUM_BANKS; ++i) begin : pending_sizes VX_pending_size #( .SIZE (RD_QUEUE_SIZE) ) pending_size ( @@ -94,10 +92,11 @@ module VX_avs_adapter #( `UNUSED_PIN (alm_empty), .full (req_queue_going_full[i]), `UNUSED_PIN (alm_full), - .size (req_queue_size[i]) + `UNUSED_PIN (size) ); - `UNUSED_VAR (req_queue_size) + end + for (genvar i = 0; i < NUM_BANKS; ++i) begin : rd_req_queues VX_fifo_queue #( .DATAW (TAG_WIDTH), .DEPTH (RD_QUEUE_SIZE) @@ -116,7 +115,7 @@ module VX_avs_adapter #( ); end - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : req_out_bufs wire valid_out; wire rw_out; wire [DATA_SIZE-1:0] byteen_out; @@ -168,8 +167,7 @@ module VX_avs_adapter #( wire [NUM_BANKS-1:0][DATA_WIDTH-1:0] rsp_queue_data_out; wire [NUM_BANKS-1:0] rsp_queue_empty; - for (genvar i = 0; i < NUM_BANKS; ++i) begin - + for (genvar i = 0; i < NUM_BANKS; ++i) begin : rd_rsp_queues VX_fifo_queue #( .DATAW (DATA_WIDTH), .DEPTH (RD_QUEUE_SIZE) From 105f8841291aab0235410b13e8dcc3752f9403d8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 29 Aug 2024 00:48:51 -0700 Subject: [PATCH 195/488] migration from fpnew to latest cvfpu core to resolve fpnew bugs and feature limitations --- .gitmodules | 6 +++--- hw/rtl/fpu/VX_fpu_fpnew.sv | 2 +- hw/rtl/libs/VX_avs_adapter.sv | 1 - hw/syn/altera/dut/core/Makefile | 3 ++- hw/syn/altera/dut/fpu/Makefile | 3 ++- hw/syn/altera/dut/issue/Makefile | 3 ++- hw/syn/altera/dut/top/Makefile | 3 ++- hw/syn/altera/dut/unittest/Makefile | 3 ++- hw/syn/altera/dut/vortex/Makefile | 3 ++- hw/syn/altera/opae/Makefile | 3 ++- hw/syn/xilinx/dut/core/Makefile | 3 ++- hw/syn/xilinx/dut/fpu/Makefile | 3 ++- hw/syn/xilinx/dut/issue/Makefile | 3 ++- hw/syn/xilinx/dut/top/Makefile | 3 ++- hw/syn/xilinx/dut/unittest/Makefile | 3 ++- hw/syn/xilinx/dut/vortex/Makefile | 3 ++- hw/syn/xilinx/sandbox/Makefile | 3 ++- hw/syn/xilinx/xrt/Makefile | 3 ++- hw/syn/yosys/Makefile | 3 ++- sim/opaesim/Makefile | 7 ++++--- sim/opaesim/verilator.vlt | 8 -------- sim/opaesim/verilator.vlt.in | 8 ++++++++ sim/rtlsim/Makefile | 7 ++++--- sim/rtlsim/verilator.vlt | 5 ----- sim/rtlsim/verilator.vlt.in | 5 +++++ sim/xrtsim/Makefile | 5 +++-- sim/xrtsim/verilator.vlt | 5 ----- sim/xrtsim/verilator.vlt.in | 5 +++++ third_party/cvfpu | 1 + third_party/fpnew | 1 - 30 files changed, 66 insertions(+), 48 deletions(-) delete mode 100644 sim/opaesim/verilator.vlt create mode 100644 sim/opaesim/verilator.vlt.in delete mode 100644 sim/rtlsim/verilator.vlt create mode 100644 sim/rtlsim/verilator.vlt.in delete mode 100644 sim/xrtsim/verilator.vlt create mode 100644 sim/xrtsim/verilator.vlt.in create mode 160000 third_party/cvfpu delete mode 160000 third_party/fpnew diff --git a/.gitmodules b/.gitmodules index df3ca47e2..32abfe9cb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,9 @@ -[submodule "third_party/fpnew"] - path = third_party/fpnew - url = https://github.com/pulp-platform/fpnew.git [submodule "third_party/softfloat"] path = third_party/softfloat url = https://github.com/ucb-bar/berkeley-softfloat-3.git [submodule "third_party/ramulator"] path = third_party/ramulator url = https://github.com/CMU-SAFARI/ramulator2.git +[submodule "third_party/cvfpu"] + path = third_party/cvfpu + url = https://github.com/openhwgroup/cvfpu.git diff --git a/hw/rtl/fpu/VX_fpu_fpnew.sv b/hw/rtl/fpu/VX_fpu_fpnew.sv index 85e790996..a2b0e170a 100644 --- a/hw/rtl/fpu/VX_fpu_fpnew.sv +++ b/hw/rtl/fpu/VX_fpu_fpnew.sv @@ -193,7 +193,7 @@ module VX_fpu_fpnew .tag_i ({fpu_tag_in, fpu_has_fflags}), .in_valid_i (fpu_valid_in), .in_ready_o (fpu_ready_in_uq), - .flush_i (reset), + .flush_i (1'b0), .result_o (fpu_result[i]), .status_o (fpu_status_uq), .tag_o (fpu_tag), diff --git a/hw/rtl/libs/VX_avs_adapter.sv b/hw/rtl/libs/VX_avs_adapter.sv index 046c32bfc..61322f673 100644 --- a/hw/rtl/libs/VX_avs_adapter.sv +++ b/hw/rtl/libs/VX_avs_adapter.sv @@ -54,7 +54,6 @@ module VX_avs_adapter #( input wire avs_readdatavalid [NUM_BANKS] ); localparam DATA_SIZE = DATA_WIDTH/8; - localparam RD_QUEUE_ADDR_WIDTH = `CLOG2(RD_QUEUE_SIZE+1); localparam BANK_ADDRW = `LOG2UP(NUM_BANKS); localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS); localparam BANK_OFFSETW = ADDR_WIDTH - LOG2_NUM_BANKS; diff --git a/hw/syn/altera/dut/core/Makefile b/hw/syn/altera/dut/core/Makefile index eeeaa5233..0a3b19285 100644 --- a/hw/syn/altera/dut/core/Makefile +++ b/hw/syn/altera/dut/core/Makefile @@ -9,6 +9,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/altera/dut/fpu/Makefile b/hw/syn/altera/dut/fpu/Makefile index b7826dc68..e3cb9445b 100644 --- a/hw/syn/altera/dut/fpu/Makefile +++ b/hw/syn/altera/dut/fpu/Makefile @@ -6,6 +6,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(IP_CACHE_DIR) diff --git a/hw/syn/altera/dut/issue/Makefile b/hw/syn/altera/dut/issue/Makefile index c1804a398..8e3bead11 100644 --- a/hw/syn/altera/dut/issue/Makefile +++ b/hw/syn/altera/dut/issue/Makefile @@ -9,6 +9,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem $(FPU_INCLUDE) -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/altera/dut/top/Makefile b/hw/syn/altera/dut/top/Makefile index 341690206..224939236 100644 --- a/hw/syn/altera/dut/top/Makefile +++ b/hw/syn/altera/dut/top/Makefile @@ -27,6 +27,7 @@ endif FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(AFU_DIR)/ccip -I$(IP_CACHE_DIR) $(FPU_INCLUDE) diff --git a/hw/syn/altera/dut/unittest/Makefile b/hw/syn/altera/dut/unittest/Makefile index 2bfb18e4e..d26eabe3c 100644 --- a/hw/syn/altera/dut/unittest/Makefile +++ b/hw/syn/altera/dut/unittest/Makefile @@ -6,6 +6,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/altera/dut/vortex/Makefile b/hw/syn/altera/dut/vortex/Makefile index 7429df414..64fb051e0 100644 --- a/hw/syn/altera/dut/vortex/Makefile +++ b/hw/syn/altera/dut/vortex/Makefile @@ -11,6 +11,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index 62a9bb72c..5ec7a7ff8 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -58,7 +58,8 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c) # include paths FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(IP_CACHE_DIR) RTL_INCLUDE += $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/dut/core/Makefile b/hw/syn/xilinx/dut/core/Makefile index deda5cce9..c94fd8637 100644 --- a/hw/syn/xilinx/dut/core/Makefile +++ b/hw/syn/xilinx/dut/core/Makefile @@ -10,6 +10,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/fpu/Makefile b/hw/syn/xilinx/dut/fpu/Makefile index bb6610375..ba76e8eb8 100644 --- a/hw/syn/xilinx/dut/fpu/Makefile +++ b/hw/syn/xilinx/dut/fpu/Makefile @@ -7,6 +7,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces diff --git a/hw/syn/xilinx/dut/issue/Makefile b/hw/syn/xilinx/dut/issue/Makefile index bb93f44d2..b5690ca01 100644 --- a/hw/syn/xilinx/dut/issue/Makefile +++ b/hw/syn/xilinx/dut/issue/Makefile @@ -9,6 +9,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem $(FPU_INCLUDE) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/top/Makefile b/hw/syn/xilinx/dut/top/Makefile index 0480b08e5..ab7a18162 100644 --- a/hw/syn/xilinx/dut/top/Makefile +++ b/hw/syn/xilinx/dut/top/Makefile @@ -28,6 +28,7 @@ endif FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(AFU_DIR)/ccip $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/dut/unittest/Makefile b/hw/syn/xilinx/dut/unittest/Makefile index 061e75441..7f4dfd3a3 100644 --- a/hw/syn/xilinx/dut/unittest/Makefile +++ b/hw/syn/xilinx/dut/unittest/Makefile @@ -6,6 +6,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/vortex/Makefile b/hw/syn/xilinx/dut/vortex/Makefile index e2525fae2..45423f7ae 100644 --- a/hw/syn/xilinx/dut/vortex/Makefile +++ b/hw/syn/xilinx/dut/vortex/Makefile @@ -12,6 +12,7 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/sandbox/Makefile b/hw/syn/xilinx/sandbox/Makefile index bcfd91f9c..c4e4db43c 100644 --- a/hw/syn/xilinx/sandbox/Makefile +++ b/hw/syn/xilinx/sandbox/Makefile @@ -20,7 +20,8 @@ ESCAPED_COE_FILE := $(shell echo "$(COE_FILE)" | sed -e 's/[\/&]/\\&/g') # include paths FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache RTL_INCLUDE += $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 9e86bd1a5..1a7589f56 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -75,7 +75,8 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c) # include paths FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) RTL_INCLUDE += $(FPU_INCLUDE) diff --git a/hw/syn/yosys/Makefile b/hw/syn/yosys/Makefile index 80bfdae02..911361df8 100644 --- a/hw/syn/yosys/Makefile +++ b/hw/syn/yosys/Makefile @@ -44,7 +44,8 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c) # include paths FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache RTL_INCLUDE += $(FPU_INCLUDE) diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index 32182d5a8..6402fb475 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -58,8 +58,9 @@ RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - RTL_PKGS += $(THIRD_PARTY_DIR)/fpnew/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/fpnew/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src + RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv + FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -I$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(SRC_DIR) -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) RTL_INCLUDE += -I$(AFU_DIR) -I$(AFU_DIR)/ccip @@ -72,7 +73,7 @@ VL_FLAGS += --x-initial unique --x-assign unique VL_FLAGS += -DSIMULATION -DSV_DPI VL_FLAGS += -DXLEN_$(XLEN) VL_FLAGS += $(CONFIGS) -VL_FLAGS += $(SRC_DIR)/verilator.vlt +VL_FLAGS += verilator.vlt VL_FLAGS += $(RTL_INCLUDE) VL_FLAGS += $(RTL_PKGS) diff --git a/sim/opaesim/verilator.vlt b/sim/opaesim/verilator.vlt deleted file mode 100644 index 66a59bd12..000000000 --- a/sim/opaesim/verilator.vlt +++ /dev/null @@ -1,8 +0,0 @@ -`verilator_config - -lint_off -rule BLKANDNBLK -file "*/fpnew/src/*" -lint_off -rule UNOPTFLAT -file "*/fpnew/src/*" -lint_off -file "*/fpnew/src/*" - -lint_off -file "*/afu/opae/ccip/ccip_if_pkg.sv" -lint_off -file "*/afu/opae/local_mem_cfg_pkg.sv" diff --git a/sim/opaesim/verilator.vlt.in b/sim/opaesim/verilator.vlt.in new file mode 100644 index 000000000..0b118e05e --- /dev/null +++ b/sim/opaesim/verilator.vlt.in @@ -0,0 +1,8 @@ +`verilator_config + +lint_off -rule BLKANDNBLK -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -rule UNOPTFLAT -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -file "@VORTEX_HOME@/third_party/cvfpu/*" + +lint_off -file "@VORTEX_HOME@/hw/rtl/afu/opae/ccip/ccip_if_pkg.sv" +lint_off -file "@VORTEX_HOME@/hw/rtl/afu/opae/local_mem_cfg_pkg.sv" diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 2f38ae1f2..89ba412f5 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -30,8 +30,9 @@ RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - RTL_PKGS += $(THIRD_PARTY_DIR)/fpnew/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/fpnew/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src + RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv + FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -I$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) @@ -50,7 +51,7 @@ VL_FLAGS = --exe VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED VL_FLAGS += --x-initial unique --x-assign unique -VL_FLAGS += $(SRC_DIR)/verilator.vlt +VL_FLAGS += verilator.vlt VL_FLAGS += -DSIMULATION -DSV_DPI VL_FLAGS += -DXLEN_$(XLEN) VL_FLAGS += $(CONFIGS) diff --git a/sim/rtlsim/verilator.vlt b/sim/rtlsim/verilator.vlt deleted file mode 100644 index 9cfccbeb4..000000000 --- a/sim/rtlsim/verilator.vlt +++ /dev/null @@ -1,5 +0,0 @@ -`verilator_config - -lint_off -rule BLKANDNBLK -file "*/fpnew/src/*" -lint_off -rule UNOPTFLAT -file "*/fpnew/src/*" -lint_off -file "*/fpnew/src/*" diff --git a/sim/rtlsim/verilator.vlt.in b/sim/rtlsim/verilator.vlt.in new file mode 100644 index 000000000..56de6b2cf --- /dev/null +++ b/sim/rtlsim/verilator.vlt.in @@ -0,0 +1,5 @@ +`verilator_config + +lint_off -rule BLKANDNBLK -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -rule UNOPTFLAT -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -file "@VORTEX_HOME@/third_party/cvfpu/*" diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index c63fe3d56..88dc930b7 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -57,8 +57,9 @@ RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - RTL_PKGS += $(THIRD_PARTY_DIR)/fpnew/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/fpnew/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src + RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv + FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -I$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/cvfpu/src + FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(SRC_DIR) -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) RTL_INCLUDE += -I$(AFU_DIR) diff --git a/sim/xrtsim/verilator.vlt b/sim/xrtsim/verilator.vlt deleted file mode 100644 index 9cfccbeb4..000000000 --- a/sim/xrtsim/verilator.vlt +++ /dev/null @@ -1,5 +0,0 @@ -`verilator_config - -lint_off -rule BLKANDNBLK -file "*/fpnew/src/*" -lint_off -rule UNOPTFLAT -file "*/fpnew/src/*" -lint_off -file "*/fpnew/src/*" diff --git a/sim/xrtsim/verilator.vlt.in b/sim/xrtsim/verilator.vlt.in new file mode 100644 index 000000000..893ecbbd3 --- /dev/null +++ b/sim/xrtsim/verilator.vlt.in @@ -0,0 +1,5 @@ +`verilator_config + +lint_off -rule BLKANDNBLK -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -rule UNOPTFLAT -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -file "@VORTEX_HOME@/third_party/cvfpu/*" \ No newline at end of file diff --git a/third_party/cvfpu b/third_party/cvfpu new file mode 160000 index 000000000..a6af69155 --- /dev/null +++ b/third_party/cvfpu @@ -0,0 +1 @@ +Subproject commit a6af691551ffbd76d5d9cf30774d3295a41615e4 diff --git a/third_party/fpnew b/third_party/fpnew deleted file mode 160000 index 79e453139..000000000 --- a/third_party/fpnew +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 79e453139072df42c9ec8f697132ba485d74e23d From 847dee347389193e4f6f9e30257c75aabfdc633c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 29 Aug 2024 01:30:54 -0700 Subject: [PATCH 196/488] minor update --- hw/rtl/fpu/VX_fpu_div.sv | 2 +- hw/rtl/fpu/VX_fpu_fma.sv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index ea63387d7..79b91a1f5 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -134,7 +134,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `else - for (genvar i = 0; i < NUM_PES; ++i) begin fdivs + for (genvar i = 0; i < NUM_PES; ++i) begin : fdivs reg [63:0] r; `UNUSED_VAR (r) fflags_t f; diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index 331074cf0..3095846c1 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -125,7 +125,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `ifdef QUARTUS - for (genvar i = 0; i < NUM_PES; ++i) begin : fmadds + for (genvar i = 0; i < NUM_PES; ++i) begin : fmas acl_fmadd fmadd ( .clk (clk), .areset (1'b0), From 5f2bf2418b42883e045b5d3b8f8b342f423367a9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 29 Aug 2024 02:40:54 -0700 Subject: [PATCH 197/488] minor update --- hw/rtl/fpu/VX_fpu_fpnew.sv | 5 ++--- hw/syn/altera/dut/core/Makefile | 1 - hw/syn/altera/dut/fpu/Makefile | 1 - hw/syn/altera/dut/issue/Makefile | 1 - hw/syn/altera/dut/top/Makefile | 1 - hw/syn/altera/dut/unittest/Makefile | 1 - hw/syn/altera/dut/vortex/Makefile | 1 - hw/syn/altera/opae/Makefile | 1 - hw/syn/xilinx/dut/core/Makefile | 1 - hw/syn/xilinx/dut/fpu/Makefile | 1 - hw/syn/xilinx/dut/issue/Makefile | 1 - hw/syn/xilinx/dut/top/Makefile | 1 - hw/syn/xilinx/dut/unittest/Makefile | 1 - hw/syn/xilinx/dut/vortex/Makefile | 1 - hw/syn/xilinx/sandbox/Makefile | 1 - hw/syn/xilinx/xrt/Makefile | 1 - hw/syn/yosys/Makefile | 1 - 17 files changed, 2 insertions(+), 19 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_fpnew.sv b/hw/rtl/fpu/VX_fpu_fpnew.sv index a2b0e170a..030ae3557 100644 --- a/hw/rtl/fpu/VX_fpu_fpnew.sv +++ b/hw/rtl/fpu/VX_fpu_fpnew.sv @@ -176,8 +176,7 @@ module VX_fpu_fpnew .Features (FPU_FEATURES), .Implementation (FPU_IMPLEMENTATION), .TagType (logic[(TAG_WIDTH+1)-1:0]), - .TrueSIMDClass (1), - .EnableSIMDMask (1) + .DivSqrtSel (fpnew_pkg::PULP) ) fpnew_core ( .clk_i (clk), .rst_ni (~reset), @@ -189,7 +188,7 @@ module VX_fpu_fpnew .dst_fmt_i (fpu_dst_fmt), .int_fmt_i (fpu_int_fmt), .vectorial_op_i (1'b0), - .simd_mask_i (mask_in[i]), + .simd_mask_i (1'b1), .tag_i ({fpu_tag_in, fpu_has_fflags}), .in_valid_i (fpu_valid_in), .in_ready_o (fpu_ready_in_uq), diff --git a/hw/syn/altera/dut/core/Makefile b/hw/syn/altera/dut/core/Makefile index 0a3b19285..c78c4a651 100644 --- a/hw/syn/altera/dut/core/Makefile +++ b/hw/syn/altera/dut/core/Makefile @@ -10,6 +10,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/altera/dut/fpu/Makefile b/hw/syn/altera/dut/fpu/Makefile index e3cb9445b..38d5c718c 100644 --- a/hw/syn/altera/dut/fpu/Makefile +++ b/hw/syn/altera/dut/fpu/Makefile @@ -7,6 +7,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(IP_CACHE_DIR) diff --git a/hw/syn/altera/dut/issue/Makefile b/hw/syn/altera/dut/issue/Makefile index 8e3bead11..45f6981d6 100644 --- a/hw/syn/altera/dut/issue/Makefile +++ b/hw/syn/altera/dut/issue/Makefile @@ -10,6 +10,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem $(FPU_INCLUDE) -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/altera/dut/top/Makefile b/hw/syn/altera/dut/top/Makefile index 224939236..99889f4ae 100644 --- a/hw/syn/altera/dut/top/Makefile +++ b/hw/syn/altera/dut/top/Makefile @@ -28,6 +28,5 @@ endif FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(AFU_DIR)/ccip -I$(IP_CACHE_DIR) $(FPU_INCLUDE) diff --git a/hw/syn/altera/dut/unittest/Makefile b/hw/syn/altera/dut/unittest/Makefile index d26eabe3c..c4479f154 100644 --- a/hw/syn/altera/dut/unittest/Makefile +++ b/hw/syn/altera/dut/unittest/Makefile @@ -7,6 +7,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/altera/dut/vortex/Makefile b/hw/syn/altera/dut/vortex/Makefile index 64fb051e0..80c256021 100644 --- a/hw/syn/altera/dut/vortex/Makefile +++ b/hw/syn/altera/dut/vortex/Makefile @@ -12,6 +12,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index 5ec7a7ff8..53b1210d8 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -59,7 +59,6 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c) FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(IP_CACHE_DIR) RTL_INCLUDE += $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/dut/core/Makefile b/hw/syn/xilinx/dut/core/Makefile index c94fd8637..2ce824a3f 100644 --- a/hw/syn/xilinx/dut/core/Makefile +++ b/hw/syn/xilinx/dut/core/Makefile @@ -11,6 +11,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/fpu/Makefile b/hw/syn/xilinx/dut/fpu/Makefile index ba76e8eb8..c3d3fd99f 100644 --- a/hw/syn/xilinx/dut/fpu/Makefile +++ b/hw/syn/xilinx/dut/fpu/Makefile @@ -8,6 +8,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces diff --git a/hw/syn/xilinx/dut/issue/Makefile b/hw/syn/xilinx/dut/issue/Makefile index b5690ca01..07e8f343d 100644 --- a/hw/syn/xilinx/dut/issue/Makefile +++ b/hw/syn/xilinx/dut/issue/Makefile @@ -10,6 +10,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem $(FPU_INCLUDE) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/top/Makefile b/hw/syn/xilinx/dut/top/Makefile index ab7a18162..3a06715b5 100644 --- a/hw/syn/xilinx/dut/top/Makefile +++ b/hw/syn/xilinx/dut/top/Makefile @@ -29,6 +29,5 @@ endif FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(AFU_DIR)/ccip $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/dut/unittest/Makefile b/hw/syn/xilinx/dut/unittest/Makefile index 7f4dfd3a3..1bc66aa38 100644 --- a/hw/syn/xilinx/dut/unittest/Makefile +++ b/hw/syn/xilinx/dut/unittest/Makefile @@ -7,6 +7,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/vortex/Makefile b/hw/syn/xilinx/dut/vortex/Makefile index 45423f7ae..eb6d45a88 100644 --- a/hw/syn/xilinx/dut/vortex/Makefile +++ b/hw/syn/xilinx/dut/vortex/Makefile @@ -13,6 +13,5 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/sandbox/Makefile b/hw/syn/xilinx/sandbox/Makefile index c4e4db43c..94c054b57 100644 --- a/hw/syn/xilinx/sandbox/Makefile +++ b/hw/syn/xilinx/sandbox/Makefile @@ -21,7 +21,6 @@ ESCAPED_COE_FILE := $(shell echo "$(COE_FILE)" | sed -e 's/[\/&]/\\&/g') FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache RTL_INCLUDE += $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 1a7589f56..6368441e0 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -76,7 +76,6 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c) FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) RTL_INCLUDE += $(FPU_INCLUDE) diff --git a/hw/syn/yosys/Makefile b/hw/syn/yosys/Makefile index 911361df8..cba0137a3 100644 --- a/hw/syn/yosys/Makefile +++ b/hw/syn/yosys/Makefile @@ -45,7 +45,6 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c) FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -J$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache RTL_INCLUDE += $(FPU_INCLUDE) From 961b9c3d635bfc92bf866ea80d4c4ddfecfee96d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 29 Aug 2024 02:41:36 -0700 Subject: [PATCH 198/488] minor update --- sim/opaesim/Makefile | 1 - sim/rtlsim/Makefile | 1 - sim/xrtsim/Makefile | 1 - 3 files changed, 3 deletions(-) diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index 6402fb475..984686d3b 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -60,7 +60,6 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -I$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(SRC_DIR) -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) RTL_INCLUDE += -I$(AFU_DIR) -I$(AFU_DIR)/ccip diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 89ba412f5..591a2c226 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -32,7 +32,6 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -I$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 88dc930b7..81f8f28b5 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -59,7 +59,6 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -I$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/cvfpu/src - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl -I$(THIRD_PARTY_DIR)/cvfpu/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl endif RTL_INCLUDE = -I$(SRC_DIR) -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) RTL_INCLUDE += -I$(AFU_DIR) From fc5bb387a25d0e98530a3b0ace6de9cf7a2d5d14 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 29 Aug 2024 03:02:50 -0700 Subject: [PATCH 199/488] minor update --- ci/regression.sh.in | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index e0da29e20..aee991cd4 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -240,8 +240,14 @@ config2() ./ci/blackbox.sh --driver=opae --app=diverge # disable DPI - CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood - CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood + if [ "$XLEN" == "64" ]; then + # need to disable trig on 64-bit due to a bug inside fpnew's sqrt core. + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-xtrig -xbar -xgbar" + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-xtrig -xbar -xgbar" + else + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood + fi # custom program startup address make -C tests/regression/dogfood clean-kernel From 6eee0728fbe5eb67af87f7cacccb0a02dbb87c72 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 29 Aug 2024 03:22:09 -0700 Subject: [PATCH 200/488] minor update --- hw/syn/xilinx/dut/project.tcl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/hw/syn/xilinx/dut/project.tcl b/hw/syn/xilinx/dut/project.tcl index bd9cb02e7..c89645c5f 100644 --- a/hw/syn/xilinx/dut/project.tcl +++ b/hw/syn/xilinx/dut/project.tcl @@ -65,10 +65,7 @@ read_xdc $xdc_file add_files -norecurse -verbose $vsources_list # process defines -set obj [current_fileset] -foreach def $vdefines_list { - set_property verilog_define $def $obj -} +set_property verilog_define ${vdefines_list} [current_fileset] # add fpu ip if {[info exists ::env(FPU_IP)]} { From 7d0c1411297d99a34c49ed58a60fde63077d8b83 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 31 Aug 2024 01:44:41 -0700 Subject: [PATCH 201/488] minor updates --- hw/rtl/VX_platform.vh | 3 - hw/rtl/core/VX_alu_muldiv.sv | 2 +- hw/rtl/core/VX_alu_unit.sv | 1 + hw/rtl/core/VX_core.sv | 2 +- hw/rtl/core/VX_mem_unit.sv | 4 +- hw/rtl/fpu/VX_fpu_dsp.sv | 139 +++++++++++++++++++++----------- hw/rtl/libs/VX_mem_coalescer.sv | 31 +++---- hw/rtl/libs/VX_pe_serializer.sv | 2 +- hw/rtl/libs/VX_stream_arb.sv | 4 +- hw/rtl/libs/VX_stream_switch.sv | 47 ++++++----- hw/rtl/libs/VX_stream_unpack.sv | 6 +- hw/syn/xilinx/xrt/Makefile | 6 +- sim/xrtsim/Makefile | 2 +- sim/xrtsim/verilator.vlt.in | 2 +- 14 files changed, 146 insertions(+), 105 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 730b3cd7d..74907ad4c 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -254,9 +254,6 @@ // lut(x): (x & 8) != 0 `define TO_OUT_BUF_LUTRAM(s) ((s & 8) != 0) -// rbuf(x): (x <= 2) ? 3 : x -`define TO_OUT_RBUF(s) ((s & 8) | `MAX(s & 7, 3)) - `define REPEAT(n,f,s) `_REPEAT_``n(f,s) `define _REPEAT_0(f,s) `define _REPEAT_1(f,s) `f(0) diff --git a/hw/rtl/core/VX_alu_muldiv.sv b/hw/rtl/core/VX_alu_muldiv.sv index 8e3a1ba4f..bd498a0bb 100644 --- a/hw/rtl/core/VX_alu_muldiv.sv +++ b/hw/rtl/core/VX_alu_muldiv.sv @@ -325,7 +325,7 @@ module VX_alu_muldiv #( .NUM_INPUTS (2), .DATAW (TAG_WIDTH + (NUM_LANES * `XLEN)), .ARBITER ("P"), - .OUT_BUF (1) + .OUT_BUF (2) ) rsp_buf ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 8b2bf7363..9b3d6deea 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -109,6 +109,7 @@ module VX_alu_unit #( `endif + // can accept new request? assign per_block_execute_if[block_idx].ready = `ifdef EXT_M_ENABLE is_muldiv_op ? muldiv_execute_if.ready : diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 30a774ee5..f306c5d23 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -212,7 +212,7 @@ module VX_core import VX_gpu_pkg::*; #( `ifdef PERF_ENABLE .lmem_perf (mem_perf_tmp_if.lmem), `endif - .lsu_mem_in_if (lsu_mem_if), + .lsu_mem_if (lsu_mem_if), .dcache_bus_if (dcache_bus_if) ); diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index bb00df0b5..cd901f8ac 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -23,7 +23,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( output cache_perf_t lmem_perf, `endif - VX_lsu_mem_if.slave lsu_mem_in_if [`NUM_LSU_BLOCKS], + VX_lsu_mem_if.slave lsu_mem_if [`NUM_LSU_BLOCKS], VX_mem_bus_if.master dcache_bus_if [DCACHE_NUM_REQS] ); VX_lsu_mem_if #( @@ -54,7 +54,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( ) lmem_switch ( .clk (clk), .reset (reset), - .lsu_in_if (lsu_mem_in_if[i]), + .lsu_in_if (lsu_mem_if[i]), .global_out_if(lsu_dcache_if[i]), .local_out_if (lsu_lmem_if[i]) ); diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index 0f0e551b7..b1f115155 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -54,11 +54,23 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( localparam NUM_FPCORES = 4; localparam FPCORES_BITS = `LOG2UP(NUM_FPCORES); + localparam REQ_DATAW = NUM_LANES + TAG_WIDTH + `INST_FPU_BITS + `INST_FMT_BITS + `INST_FRM_BITS + 3 * (NUM_LANES * `XLEN); localparam RSP_DATAW = (NUM_LANES * 32) + 1 + $bits(fflags_t) + TAG_WIDTH; `UNUSED_VAR (fmt) + wire [NUM_FPCORES-1:0] per_core_valid_in; + wire [NUM_FPCORES-1:0][REQ_DATAW-1:0] per_core_data_in; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0] per_core_mask_in; + wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_in; + wire [NUM_FPCORES-1:0][`INST_FPU_BITS-1:0] per_core_op_type; + wire [NUM_FPCORES-1:0][`INST_FMT_BITS-1:0] per_core_fmt; + wire [NUM_FPCORES-1:0][`INST_FRM_BITS-1:0] per_core_frm; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_dataa; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_datab; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_datac; wire [NUM_FPCORES-1:0] per_core_ready_in; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_result; wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_out; wire [NUM_FPCORES-1:0] per_core_ready_out; @@ -94,18 +106,44 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( `UNUSED_VAR (datab) `UNUSED_VAR (datac) - // Decode instruction type + // Decode fpu core type wire [FPCORES_BITS-1:0] core_select = op_type[3:2]; - wire is_sqrt = op_type[0]; - wire is_itof = op_type[1]; - wire is_signed = ~op_type[0]; - wire is_madd = op_type[1]; - wire is_neg = op_type[0]; - wire is_sub = fmt[1]; - // can accept new request? - assign per_core_ready_in[FPU_DIVSQRT] = div_sqrt_ready_in[is_sqrt]; - assign ready_in = per_core_ready_in[core_select]; + VX_stream_switch #( + .DATAW (REQ_DATAW), + .NUM_INPUTS (1), + .NUM_OUTPUTS (NUM_FPCORES), + .OUT_BUF (0) + ) req_switch ( + .clk (clk), + .reset (reset), + .sel_in (core_select), + .valid_in (valid_in), + .ready_in (ready_in), + .data_in ({mask_in, tag_in, op_type, fmt, frm, dataa_s, datab_s, datac_s}), + .data_out (per_core_data_in), + .valid_out (per_core_valid_in), + .ready_out (per_core_ready_in) + ); + + for (genvar i = 0; i < NUM_FPCORES; ++i) begin + assign { + per_core_mask_in[i], + per_core_tag_in[i], + per_core_op_type[i], + per_core_fmt[i], + per_core_frm[i], + per_core_dataa[i], + per_core_datab[i], + per_core_datac[i] + } = per_core_data_in[i]; + end + + // FMA core + + wire is_madd = per_core_op_type[FPU_FMA][1]; + wire is_neg = per_core_op_type[FPU_FMA][0]; + wire is_sub = per_core_fmt[FPU_FMA][1]; VX_fpu_fma #( .NUM_LANES (NUM_LANES), @@ -113,17 +151,17 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( ) fpu_fma ( .clk (clk), .reset (fma_reset), - .valid_in (valid_in && (core_select == FPU_FMA)), + .valid_in (per_core_valid_in[FPU_FMA]), .ready_in (per_core_ready_in[FPU_FMA]), - .mask_in (mask_in), - .tag_in (tag_in), - .frm (frm), + .mask_in (per_core_mask_in[FPU_FMA]), + .tag_in (per_core_tag_in[FPU_FMA]), + .frm (per_core_frm[FPU_FMA]), .is_madd (is_madd), .is_sub (is_sub), .is_neg (is_neg), - .dataa (dataa_s), - .datab (datab_s), - .datac (datac_s), + .dataa (per_core_dataa[FPU_FMA]), + .datab (per_core_datab[FPU_FMA]), + .datac (per_core_datac[FPU_FMA]), .has_fflags (per_core_has_fflags[FPU_FMA]), .fflags (per_core_fflags[FPU_FMA]), .result (per_core_result[FPU_FMA]), @@ -132,19 +170,24 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .valid_out (per_core_valid_out[FPU_FMA]) ); + // Div/Sqrt cores + + wire is_sqrt = per_core_op_type[FPU_DIVSQRT][0]; + assign per_core_ready_in[FPU_DIVSQRT] = div_sqrt_ready_in[is_sqrt]; + VX_fpu_div #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH) ) fpu_div ( .clk (clk), .reset (div_reset), - .valid_in (valid_in && (core_select == FPU_DIVSQRT) && ~is_sqrt), + .valid_in (per_core_valid_in[FPU_DIVSQRT] && ~is_sqrt), .ready_in (div_sqrt_ready_in[0]), - .mask_in (mask_in), - .tag_in (tag_in), - .frm (frm), - .dataa (dataa_s), - .datab (datab_s), + .mask_in (per_core_mask_in[FPU_DIVSQRT]), + .tag_in (per_core_tag_in[FPU_DIVSQRT]), + .frm (per_core_frm[FPU_DIVSQRT]), + .dataa (per_core_dataa[FPU_DIVSQRT]), + .datab (per_core_datab[FPU_DIVSQRT]), .has_fflags (div_sqrt_has_fflags[0]), .fflags (div_sqrt_fflags[0]), .result (div_sqrt_result[0]), @@ -159,12 +202,12 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( ) fpu_sqrt ( .clk (clk), .reset (sqrt_reset), - .valid_in (valid_in && (core_select == FPU_DIVSQRT) && is_sqrt), + .valid_in (per_core_valid_in[FPU_DIVSQRT] && is_sqrt), .ready_in (div_sqrt_ready_in[1]), - .mask_in (mask_in), - .tag_in (tag_in), - .frm (frm), - .dataa (dataa_s), + .mask_in (per_core_mask_in[FPU_DIVSQRT]), + .tag_in (per_core_tag_in[FPU_DIVSQRT]), + .frm (per_core_frm[FPU_DIVSQRT]), + .dataa (per_core_dataa[FPU_DIVSQRT]), .has_fflags (div_sqrt_has_fflags[1]), .fflags (div_sqrt_fflags[1]), .result (div_sqrt_result[1]), @@ -173,23 +216,27 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .ready_out (div_sqrt_ready_out[1]) ); + // CVT core + + wire is_itof = per_core_op_type[FPU_CVT][1]; + wire is_signed = ~per_core_op_type[FPU_CVT][0]; wire cvt_ret_int_in = ~is_itof; wire cvt_ret_int_out; VX_fpu_cvt #( .NUM_LANES (NUM_LANES), - .TAG_WIDTH (TAG_WIDTH+1) + .TAG_WIDTH (1+TAG_WIDTH) ) fpu_cvt ( .clk (clk), .reset (cvt_reset), - .valid_in (valid_in && (core_select == FPU_CVT)), + .valid_in (per_core_valid_in[FPU_CVT]), .ready_in (per_core_ready_in[FPU_CVT]), - .mask_in (mask_in), - .tag_in ({cvt_ret_int_in, tag_in}), - .frm (frm), + .mask_in (per_core_mask_in[FPU_CVT]), + .tag_in ({cvt_ret_int_in, per_core_tag_in[FPU_CVT]}), + .frm (per_core_frm[FPU_CVT]), .is_itof (is_itof), .is_signed (is_signed), - .dataa (dataa_s), + .dataa (per_core_dataa[FPU_CVT]), .has_fflags (per_core_has_fflags[FPU_CVT]), .fflags (per_core_fflags[FPU_CVT]), .result (per_core_result[FPU_CVT]), @@ -198,12 +245,14 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .ready_out (per_core_ready_out[FPU_CVT]) ); - wire ncp_ret_int_in = (op_type == `INST_FPU_CMP) - || `INST_FPU_IS_CLASS(op_type, frm) - || `INST_FPU_IS_MVXW(op_type, frm); + // NCP core + + wire ncp_ret_int_in = (per_core_op_type[FPU_NCP] == `INST_FPU_CMP) + || `INST_FPU_IS_CLASS(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]) + || `INST_FPU_IS_MVXW(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]); wire ncp_ret_int_out; - wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(op_type, frm); + wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]); wire ncp_ret_sext_out; VX_fpu_ncp #( @@ -212,14 +261,14 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( ) fpu_ncp ( .clk (clk), .reset (ncp_reset), - .valid_in (valid_in && (core_select == FPU_NCP)), + .valid_in (per_core_valid_in[FPU_NCP]), .ready_in (per_core_ready_in[FPU_NCP]), - .mask_in (mask_in), - .tag_in ({ncp_ret_sext_in, ncp_ret_int_in, tag_in}), - .op_type (op_type), - .frm (frm), - .dataa (dataa_s), - .datab (datab_s), + .mask_in (per_core_mask_in[FPU_NCP]), + .tag_in ({ncp_ret_sext_in, ncp_ret_int_in, per_core_tag_in[FPU_NCP]}), + .op_type (per_core_op_type[FPU_NCP]), + .frm (per_core_frm[FPU_NCP]), + .dataa (per_core_dataa[FPU_NCP]), + .datab (per_core_datab[FPU_NCP]), .result (per_core_result[FPU_NCP]), .has_fflags (per_core_has_fflags[FPU_NCP]), .fflags (per_core_fflags[FPU_NCP]), diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index e15d06564..5c283e06c 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -80,7 +80,6 @@ module VX_mem_coalescer #( `RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("invalid request mask")); localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH; - localparam NUM_REQS_W = `LOG2UP(NUM_REQS); // tag + mask + offest localparam IBUF_DATA_WIDTH = TAG_ID_WIDTH + NUM_REQS + (NUM_REQS * DATA_RATIO_W); @@ -115,13 +114,8 @@ module VX_mem_coalescer #( logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n; logic [NUM_REQS-1:0] req_rem_mask_r, req_rem_mask_n; - wire [OUT_REQS-1:0][NUM_REQS_W-1:0] seed_idx; - - wire [NUM_REQS-1:0][OUT_ADDR_WIDTH-1:0] in_addr_base; wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] in_addr_offset; - for (genvar i = 0; i < NUM_REQS; i++) begin - assign in_addr_base[i] = in_req_addr[i][ADDR_WIDTH-1:DATA_RATIO_W]; assign in_addr_offset[i] = in_req_addr[i][DATA_RATIO_W-1:0]; end @@ -140,21 +134,18 @@ module VX_mem_coalescer #( .valid_out (batch_valid_n[i]) ); - if (OUT_REQS > 1) begin - assign seed_idx[i] = {(NUM_REQS_W-DATA_RATIO_W)'(i), batch_idx}; - end else begin - assign seed_idx[i] = batch_idx; - end - end - - for (genvar i = 0; i < OUT_REQS; ++i) begin - assign seed_addr_n[i] = in_addr_base[seed_idx[i]]; - assign seed_flags_n[i] = in_req_flags[seed_idx[i]]; - end - - for (genvar i = 0; i < OUT_REQS; ++i) begin + wire [DATA_RATIO-1:0][OUT_ADDR_WIDTH-1:0] addr_base; + wire [DATA_RATIO-1:0][FLAGS_WIDTH-1:0] req_flags; for (genvar j = 0; j < DATA_RATIO; ++j) begin - assign addr_matches_n[i * DATA_RATIO + j] = (in_addr_base[i * DATA_RATIO + j] == seed_addr_n[i]); + assign addr_base[j] = in_req_addr[DATA_RATIO * i + j][ADDR_WIDTH-1:DATA_RATIO_W]; + assign req_flags[j] = in_req_flags[DATA_RATIO * i + j]; + end + + assign seed_addr_n[i] = addr_base[batch_idx]; + assign seed_flags_n[i] = req_flags[batch_idx]; + + for (genvar j = 0; j < DATA_RATIO; ++j) begin + assign addr_matches_n[i * DATA_RATIO + j] = (addr_base[j] == seed_addr_n[i]); end end diff --git a/hw/rtl/libs/VX_pe_serializer.sv b/hw/rtl/libs/VX_pe_serializer.sv index 7a891cfc7..2f9c83483 100644 --- a/hw/rtl/libs/VX_pe_serializer.sv +++ b/hw/rtl/libs/VX_pe_serializer.sv @@ -128,7 +128,7 @@ module VX_pe_serializer #( data_out_r <= data_out_n; end - assign enable = ready_out_u || ~batch_out_done; + assign enable = ready_out_u || ~valid_out_u; assign ready_in = enable && batch_in_done; assign valid_out_u = batch_out_done; diff --git a/hw/rtl/libs/VX_stream_arb.sv b/hw/rtl/libs/VX_stream_arb.sv index 13cde1cd9..3a457f8b8 100644 --- a/hw/rtl/libs/VX_stream_arb.sv +++ b/hw/rtl/libs/VX_stream_arb.sv @@ -97,7 +97,7 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (`TO_OUT_RBUF(OUT_BUF)) // to registered output + .OUT_BUF (3) ) fanout_slice_arb ( .clk (clk), .reset (reset), @@ -242,7 +242,7 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (`TO_OUT_RBUF(OUT_BUF)) // to registered output + .OUT_BUF (3) ) fanout_fork_arb ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_stream_switch.sv b/hw/rtl/libs/VX_stream_switch.sv index c379dd7c0..f3723ebb0 100644 --- a/hw/rtl/libs/VX_stream_switch.sv +++ b/hw/rtl/libs/VX_stream_switch.sv @@ -38,36 +38,36 @@ module VX_stream_switch #( ); if (NUM_INPUTS > NUM_OUTPUTS) begin - wire [NUM_OUTPUTS-1:0][NUM_REQS-1:0] valid_in_r; - wire [NUM_OUTPUTS-1:0][NUM_REQS-1:0][DATAW-1:0] data_in_r; + wire [NUM_OUTPUTS-1:0][NUM_REQS-1:0] valid_in_w; + wire [NUM_OUTPUTS-1:0][NUM_REQS-1:0][DATAW-1:0] data_in_w; for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin for (genvar j = 0; j < NUM_REQS; ++j) begin localparam ii = i * NUM_REQS + j; if (ii < NUM_INPUTS) begin - assign valid_in_r[i][j] = valid_in[ii]; - assign data_in_r[i][j] = data_in[ii]; + assign valid_in_w[i][j] = valid_in[ii]; + assign data_in_w[i][j] = data_in[ii]; end else begin - assign valid_in_r[i][j] = 0; - assign data_in_r[i][j] = '0; + assign valid_in_w[i][j] = 0; + assign data_in_w[i][j] = '0; end end end - wire [NUM_OUTPUTS-1:0] valid_out_r; - wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_r; - wire [NUM_OUTPUTS-1:0] ready_out_r; + wire [NUM_OUTPUTS-1:0] valid_out_w; + wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w; + wire [NUM_OUTPUTS-1:0] ready_out_w; for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin - assign valid_out_r[i] = valid_in_r[i][sel_in[i]]; - assign data_out_r[i] = data_in_r[i][sel_in[i]]; + assign valid_out_w[i] = valid_in_w[i][sel_in[i]]; + assign data_out_w[i] = data_in_w[i][sel_in[i]]; end for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin for (genvar j = 0; j < NUM_REQS; ++j) begin localparam ii = i * NUM_REQS + j; if (ii < NUM_INPUTS) begin - assign ready_in[ii] = ready_out_r[i] & (sel_in[i] == LOG_NUM_REQS'(j)); + assign ready_in[ii] = ready_out_w[i] && (sel_in[i] == LOG_NUM_REQS'(j)); end end end @@ -80,9 +80,9 @@ module VX_stream_switch #( ) out_buf ( .clk (clk), .reset (reset), - .valid_in (valid_out_r[i]), - .ready_in (ready_out_r[i]), - .data_in (data_out_r[i]), + .valid_in (valid_out_w[i]), + .ready_in (ready_out_w[i]), + .data_in (data_out_w[i]), .data_out (data_out[i]), .valid_out (valid_out[i]), .ready_out (ready_out[i]) @@ -91,14 +91,14 @@ module VX_stream_switch #( end else if (NUM_OUTPUTS > NUM_INPUTS) begin - wire [NUM_INPUTS-1:0][NUM_REQS-1:0] valid_out_r; - wire [NUM_INPUTS-1:0][NUM_REQS-1:0] ready_out_r; + wire [NUM_INPUTS-1:0][NUM_REQS-1:0] valid_out_w; + wire [NUM_INPUTS-1:0][NUM_REQS-1:0] ready_out_w; for (genvar i = 0; i < NUM_INPUTS; ++i) begin for (genvar j = 0; j < NUM_REQS; ++j) begin - assign valid_out_r[i][j] = valid_in[i] & (sel_in[i] == LOG_NUM_REQS'(j)); + assign valid_out_w[i][j] = valid_in[i] && (sel_in[i] == LOG_NUM_REQS'(j)); end - assign ready_in[i] = ready_out_r[i][sel_in[i]]; + assign ready_in[i] = ready_out_w[i][sel_in[i]]; end for (genvar i = 0; i < NUM_INPUTS; ++i) begin @@ -112,17 +112,16 @@ module VX_stream_switch #( ) out_buf ( .clk (clk), .reset (reset), - .valid_in (valid_out_r[i][j]), - .ready_in (ready_out_r[i][j]), + .valid_in (valid_out_w[i][j]), + .ready_in (ready_out_w[i][j]), .data_in (data_in[i]), .data_out (data_out[ii]), .valid_out (valid_out[ii]), .ready_out (ready_out[ii]) ); end else begin - `UNUSED_VAR (reset) - `UNUSED_VAR (valid_out_r[i][j]) - assign ready_out_r[i][j] = '0; + `UNUSED_VAR (valid_out_w[i][j]) + assign ready_out_w[i][j] = '0; end end end diff --git a/hw/rtl/libs/VX_stream_unpack.sv b/hw/rtl/libs/VX_stream_unpack.sv index c81b30099..cb85d4804 100644 --- a/hw/rtl/libs/VX_stream_unpack.sv +++ b/hw/rtl/libs/VX_stream_unpack.sv @@ -39,9 +39,9 @@ module VX_stream_unpack #( if (NUM_REQS > 1) begin reg [NUM_REQS-1:0] rem_mask; - wire [NUM_REQS-1:0] ready_out_r; + wire [NUM_REQS-1:0] ready_out_w; - wire [NUM_REQS-1:0] rem_mask_n = rem_mask & ~ready_out_r; + wire [NUM_REQS-1:0] rem_mask_n = rem_mask & ~ready_out_w; wire sent_all = ~(| (mask_in & rem_mask_n)); always @(posedge clk) begin @@ -65,7 +65,7 @@ module VX_stream_unpack #( .clk (clk), .reset (reset), .valid_in (valid_in && mask_in[i] && rem_mask[i]), - .ready_in (ready_out_r[i]), + .ready_in (ready_out_w[i]), .data_in ({data_in[i], tag_in}), .data_out ({data_out[i], tag_out[i]}), .valid_out (valid_out[i]), diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 6368441e0..4e3259f34 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -90,7 +90,11 @@ else ifeq ($(DEV_ARCH), versal) # versal else # alveo -VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] +ifneq ($(findstring xilinx_u55c,$(XSA)),) + VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31] +else + VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] +endif endif VPP_FLAGS += --report_level 2 diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 81f8f28b5..e45b0bfa2 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -71,7 +71,7 @@ VL_FLAGS += --x-initial unique --x-assign unique VL_FLAGS += -DSIMULATION -DSV_DPI VL_FLAGS += -DXLEN_$(XLEN) VL_FLAGS += $(CONFIGS) -VL_FLAGS += $(SRC_DIR)/verilator.vlt +VL_FLAGS += verilator.vlt VL_FLAGS += $(RTL_INCLUDE) VL_FLAGS += $(RTL_PKGS) diff --git a/sim/xrtsim/verilator.vlt.in b/sim/xrtsim/verilator.vlt.in index 893ecbbd3..56de6b2cf 100644 --- a/sim/xrtsim/verilator.vlt.in +++ b/sim/xrtsim/verilator.vlt.in @@ -2,4 +2,4 @@ lint_off -rule BLKANDNBLK -file "@VORTEX_HOME@/third_party/cvfpu/*" lint_off -rule UNOPTFLAT -file "@VORTEX_HOME@/third_party/cvfpu/*" -lint_off -file "@VORTEX_HOME@/third_party/cvfpu/*" \ No newline at end of file +lint_off -file "@VORTEX_HOME@/third_party/cvfpu/*" From 01fedb066c4f602a7f8433cd4e4a1ce0b98cf332 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 31 Aug 2024 01:57:08 -0700 Subject: [PATCH 202/488] minor updates --- hw/syn/xilinx/dut/common.mk | 9 ++++-- hw/syn/xilinx/dut/project.tcl | 37 +++++++++++++++++------- hw/syn/xilinx/sandbox/Makefile | 7 ++++- hw/syn/xilinx/sandbox/project.tcl.in | 27 +++++++++++++---- hw/syn/xilinx/scripts/package_kernel.tcl | 5 ++++ 5 files changed, 67 insertions(+), 18 deletions(-) diff --git a/hw/syn/xilinx/dut/common.mk b/hw/syn/xilinx/dut/common.mk index f0588ede8..b2a8e71c7 100644 --- a/hw/syn/xilinx/dut/common.mk +++ b/hw/syn/xilinx/dut/common.mk @@ -3,6 +3,8 @@ include $(ROOT_DIR)/config.mk DEVICE ?= xcu55c-fsvh2892-2L-e +MAX_JOBS ?= 8 + VIVADO := $(XILINX_VIVADO)/bin/vivado SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/dut @@ -11,6 +13,9 @@ RTL_DIR := $(VORTEX_HOME)/hw/rtl AFU_DIR := $(RTL_DIR)/afu/xrt SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts +NCPUS := $(shell lscpu | grep "^Core(s) per socket:" | awk '{print $$4}') +JOBS ?= $(shell echo $$(( $(NCPUS) > $(MAX_JOBS) ? $(MAX_JOBS) : $(NCPUS) ))) + CONFIGS += -DNDEBUG CONFIGS += -DVIVADO CONFIGS += -DSYNTHESIS @@ -26,9 +31,9 @@ project_1/sources.txt: build: $(PROJECT).xpr $(PROJECT).xpr: project_1/sources.txt ifdef FPU_IP - FPU_IP=project_1/ip $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) $(SRC_DIR)/../scripts + MAX_JOBS=$(JOBS) FPU_IP=project_1/ip $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) $(SRC_DIR)/../scripts else - $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) $(SRC_DIR)/../scripts + MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) $(SRC_DIR)/../scripts endif clean: diff --git a/hw/syn/xilinx/dut/project.tcl b/hw/syn/xilinx/dut/project.tcl index c89645c5f..e23ce2997 100644 --- a/hw/syn/xilinx/dut/project.tcl +++ b/hw/syn/xilinx/dut/project.tcl @@ -15,9 +15,9 @@ set start_time [clock seconds] if { $::argc != 6 } { - puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" - puts "Usage: $::argv0 \n" - exit + puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" + puts "Usage: $::argv0 \n" + exit } # Set the project name @@ -30,11 +30,20 @@ set xdc_file [lindex $::argv 3] set tool_dir [lindex $::argv 4] set script_dir [lindex $::argv 5] -#puts top_module -#puts $device_part -#puts $vcs_file -#puts xdc_file -#puts $tool_dir +puts "Using top_module=$top_module" +puts "Using device_part=$device_part" +puts "Using vcs_file=$vcs_file" +puts "Using xdc_file=$xdc_file" +puts "Using tool_dir=$tool_dir" +puts "Using script_dir=$script_dir" + +# Set the number of jobs based on MAX_JOBS environment variable +if {[info exists ::env(MAX_JOBS)]} { + set num_jobs $::env(MAX_JOBS) + puts "using num_jobs=$num_jobs" +} else { + set num_jobs 0 +} # create fpu ip if {[info exists ::env(FPU_IP)]} { @@ -84,14 +93,22 @@ set_property \ -objects [get_runs synth_1] # Synthesis -launch_runs synth_1 +if {$num_jobs != 0} { + launch_runs synth_1 -jobs $num_jobs +} else { + launch_runs synth_1 +} wait_on_run synth_1 open_run synth_1 write_checkpoint -force post_synth.dcp report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages # Implementation -launch_runs impl_1 +if {$num_jobs != 0} { + launch_runs impl_1 -jobs $num_jobs +} else { + launch_runs impl_1 +} wait_on_run impl_1 open_run impl_1 write_checkpoint -force post_impl.dcp diff --git a/hw/syn/xilinx/sandbox/Makefile b/hw/syn/xilinx/sandbox/Makefile index 94c054b57..d1ebf9afa 100644 --- a/hw/syn/xilinx/sandbox/Makefile +++ b/hw/syn/xilinx/sandbox/Makefile @@ -3,6 +3,8 @@ include $(ROOT_DIR)/config.mk DEVICE ?= xcu55c-fsvh2892-2L-e +MAX_JOBS ?= 8 + VIVADO := $(XILINX_VIVADO)/bin/vivado SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/sandbox @@ -14,6 +16,9 @@ SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts KERNEL ?= fibonacci +NCPUS := $(shell lscpu | grep "^Core(s) per socket:" | awk '{print $$4}') +JOBS ?= $(shell echo $$(( $(NCPUS) > $(MAX_JOBS) ? $(MAX_JOBS) : $(NCPUS) ))) + COE_FILE := $(shell realpath kernel.bin.coe) ESCAPED_COE_FILE := $(shell echo "$(COE_FILE)" | sed -e 's/[\/&]/\\&/g') @@ -58,7 +63,7 @@ project_1/sources.txt: build: project_1/project_1.xpr project_1/project_1.xpr: project_1/sources.txt kernel.bin.coe project2.tcl - $(VIVADO) -mode batch -source project2.tcl -tclargs $(DEVICE) project_1/sources.txt $(SCRIPT_DIR) + MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source project2.tcl -tclargs $(DEVICE) project_1/sources.txt $(SCRIPT_DIR) run: project_1/project_1.xpr $(VIVADO) project_1/project_1.xpr & diff --git a/hw/syn/xilinx/sandbox/project.tcl.in b/hw/syn/xilinx/sandbox/project.tcl.in index 0e9a23f0a..d4fa45581 100644 --- a/hw/syn/xilinx/sandbox/project.tcl.in +++ b/hw/syn/xilinx/sandbox/project.tcl.in @@ -24,9 +24,18 @@ set device_part [lindex $::argv 0] set vcs_file [lindex $::argv 1] set tool_dir [lindex $::argv 2] -#puts $device_part -#puts $vcs_file -#puts $tool_dir +uuts "Using device_part=$device_part" +puts "Using vcs_file=$vcs_file" +puts "Using tool_dir=$tool_dir" + +# Set the number of jobs based on MAX_JOBS environment variable +if {[info exists ::env(MAX_JOBS)]} { + set num_jobs $::env(MAX_JOBS) + puts "using num_jobs=$num_jobs" + #puts $num_jobs +} else { + set num_jobs 0 +} set origin_dir [file normalize "."] @@ -394,14 +403,22 @@ add_files -norecurse -fileset sources_1 $wrapper_path update_compile_order -fileset sources_1 # Synthesis -launch_runs synth_1 +if {$num_jobs != 0} { + launch_runs synth_1 -jobs $num_jobs +} else { + launch_runs synth_1 +} wait_on_run synth_1 open_run synth_1 write_checkpoint -force post_synth.dcp report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages # Implementation -launch_runs impl_1 +if {$num_jobs != 0} { + launch_runs impl_1 -jobs $num_jobs +} else { + launch_runs impl_1 +} wait_on_run impl_1 open_run impl_1 write_checkpoint -force post_impl.dcp diff --git a/hw/syn/xilinx/scripts/package_kernel.tcl b/hw/syn/xilinx/scripts/package_kernel.tcl index c88bca229..ed8a683ac 100644 --- a/hw/syn/xilinx/scripts/package_kernel.tcl +++ b/hw/syn/xilinx/scripts/package_kernel.tcl @@ -22,6 +22,11 @@ set vcs_file [lindex $::argv 1] set tool_dir [lindex $::argv 2] set build_dir [lindex $::argv 3] +puts "Using krnl_name=$krnl_name" +puts "Using vcs_file=$vcs_file" +puts "Using tool_dir=$tool_dir" +puts "Using build_dir=$build_dir" + set path_to_packaged "${build_dir}/xo/packaged_kernel" set path_to_tmp_project "${build_dir}/xo/project" From 83ea236b840ecd83e55a22e28d439f6d67edde29 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 31 Aug 2024 01:58:21 -0700 Subject: [PATCH 203/488] minor update --- hw/syn/altera/dut/Makefile | 7 +------ hw/syn/xilinx/dut/Makefile | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/hw/syn/altera/dut/Makefile b/hw/syn/altera/dut/Makefile index 924b7602b..5f1dd62fe 100644 --- a/hw/syn/altera/dut/Makefile +++ b/hw/syn/altera/dut/Makefile @@ -9,17 +9,12 @@ SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY) -.PHONY: dogfood unittest pipeline mem_unit lmem cache fpu core issue vortex top +.PHONY: unittest pipeline mem_unit lmem cache fpu core issue vortex top ip-gen: $(IP_CACHE_DIR)/ip_gen.log $(IP_CACHE_DIR)/ip_gen.log: $(SCRIPT_DIR)/ip_gen.sh $(IP_CACHE_DIR) -dogfood: - mkdir -p dogfood/$(BUILD_DIR) - cp dogfood/Makefile dogfood/$(BUILD_DIR) - $(MAKE) -C dogfood/$(BUILD_DIR) clean && $(MAKE) -C dogfood/$(BUILD_DIR) > dogfood/$(BUILD_DIR)/build.log 2>&1 & - unittest: mkdir -p unittest/$(BUILD_DIR) cp unittest/Makefile unittest/$(BUILD_DIR) diff --git a/hw/syn/xilinx/dut/Makefile b/hw/syn/xilinx/dut/Makefile index b8f67b8a5..0255287fb 100644 --- a/hw/syn/xilinx/dut/Makefile +++ b/hw/syn/xilinx/dut/Makefile @@ -5,12 +5,7 @@ PREFIX ?= build BUILD_DIR := $(PREFIX) -.PHONY: dogfood unittest pipeline mem_unit lmem cache fpu core issue vortex top - -dogfood: - mkdir -p dogfood/$(BUILD_DIR) - cp dogfood/Makefile dogfood/$(BUILD_DIR) - $(MAKE) -C dogfood/$(BUILD_DIR) clean && $(MAKE) -C dogfood/$(BUILD_DIR) > dogfood/$(BUILD_DIR)/build.log 2>&1 & +.PHONY: unittest pipeline mem_unit lmem cache fpu core issue vortex top unittest: mkdir -p unittest/$(BUILD_DIR) From 431c0cfc46d6ac55b0550f9daeb8190ee9029a8a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 31 Aug 2024 02:14:08 -0700 Subject: [PATCH 204/488] minor update --- hw/rtl/VX_socket.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 54822176d..833ba49d7 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -106,7 +106,7 @@ module VX_socket import VX_gpu_pkg::*; #( .WRITE_ENABLE (0), .NC_ENABLE (0), .CORE_OUT_BUF (3), - .MEM_OUT_BUF (0) + .MEM_OUT_BUF (2) ) icache ( `ifdef PERF_ENABLE .cache_perf (mem_perf_tmp_if.icache), @@ -153,7 +153,7 @@ module VX_socket import VX_gpu_pkg::*; #( .DIRTY_BYTES (`DCACHE_WRITEBACK), .NC_ENABLE (1), .CORE_OUT_BUF (3), - .MEM_OUT_BUF (0) + .MEM_OUT_BUF (2) ) dcache ( `ifdef PERF_ENABLE .cache_perf (mem_perf_tmp_if.dcache), From 72c63a47f366704acfe936c861f0613b9a361e0a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 1 Sep 2024 01:19:24 -0700 Subject: [PATCH 205/488] adding read-first mode support to block ram --- hw/rtl/VX_platform.vh | 2 +- hw/rtl/core/VX_mem_unit_top.sv | 2 +- hw/rtl/core/VX_operands.sv | 2 +- hw/rtl/core/VX_split_join.sv | 3 +- hw/rtl/libs/VX_dp_ram.sv | 474 +++++++++++++++++++++------------ hw/rtl/libs/VX_sp_ram.sv | 28 +- hw/rtl/mem/VX_local_mem.sv | 2 +- 7 files changed, 317 insertions(+), 196 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 74907ad4c..e15758d27 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -158,7 +158,7 @@ `define MAX_FANOUT 8 `define IF_DATA_SIZE(x) $bits(x.data) `define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *) -`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *) +`define NO_RW_RAM_CHECK (* ramstyle = "no_rw_check" *) `define DISABLE_BRAM (* ramstyle = "logic" *) `define PRESERVE_NET (* preserve *) `elsif VIVADO diff --git a/hw/rtl/core/VX_mem_unit_top.sv b/hw/rtl/core/VX_mem_unit_top.sv index c1acb6382..1eac9da10 100644 --- a/hw/rtl/core/VX_mem_unit_top.sv +++ b/hw/rtl/core/VX_mem_unit_top.sv @@ -120,7 +120,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( `ifdef PERF_ENABLE .lmem_perf (lmem_perf), `endif - .lsu_mem_in_if (lsu_mem_if), + .lsu_mem_if (lsu_mem_if), .dcache_bus_if (mem_bus_if) ); diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index d84c1a072..3025b9dab 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -263,8 +263,8 @@ module VX_operands import VX_gpu_pkg::*; #( VX_dp_ram #( .DATAW (REGS_DATAW), .SIZE (PER_BANK_REGS * PER_ISSUE_WARPS), - .READ_ENABLE (1), .OUT_REG (1), + .READ_ENABLE (1), .WRENW (BYTEENW), `ifdef GPR_RESET .RESET_RAM (1), diff --git a/hw/rtl/core/VX_split_join.sv b/hw/rtl/core/VX_split_join.sv index 8689d216d..c5542e137 100644 --- a/hw/rtl/core/VX_split_join.sv +++ b/hw/rtl/core/VX_split_join.sv @@ -48,7 +48,8 @@ module VX_split_join import VX_gpu_pkg::*; #( for (genvar i = 0; i < `NUM_WARPS; ++i) begin : ipdom_stacks VX_ipdom_stack #( .WIDTH (`NUM_THREADS+`PC_BITS), - .DEPTH (`DV_STACK_SIZE) + .DEPTH (`DV_STACK_SIZE), + .OUT_REG (0) ) ipdom_stack ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 6683eaecc..70df4f688 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -17,13 +17,13 @@ module VX_dp_ram #( parameter DATAW = 1, parameter SIZE = 1, - parameter ADDR_MIN = 0, parameter WRENW = 1, parameter OUT_REG = 0, - parameter NO_RWCHECK = 0, parameter LUTRAM = 0, + parameter NO_RWCHECK = 0, parameter RW_ASSERT = 0, parameter RESET_RAM = 0, + parameter RESET_OUT = 0, parameter READ_ENABLE = 0, parameter INIT_ENABLE = 0, parameter INIT_FILE = "", @@ -48,9 +48,10 @@ module VX_dp_ram #( if (INIT_FILE != "") begin \ initial $readmemh(INIT_FILE, ram); \ end else begin \ - initial \ + initial begin \ for (integer i = 0; i < SIZE; ++i) \ ram[i] = INIT_VALUE; \ + end \ end \ end @@ -61,185 +62,304 @@ module VX_dp_ram #( `RUNTIME_ASSERT(~write || (| wren), ("invalid write enable mask")); end - wire [DATAW-1:0] rdata_w; - -`ifdef SYNTHESIS - if (WRENW > 1) begin - `ifdef QUARTUS - if (LUTRAM != 0) begin - `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; - end - end - end - assign rdata_w = ram[raddr]; - end else begin - if (NO_RWCHECK != 0) begin - `NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; - end - end - end - assign rdata_w = ram[raddr]; - end else begin - reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; - end - end - end - assign rdata_w = ram[raddr]; - end - end - `else - // default synthesis - if (LUTRAM != 0) begin - `USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; - end - end - end - assign rdata_w = ram[raddr]; - end else begin - if (NO_RWCHECK != 0) begin - `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; - end - end - end - assign rdata_w = ram[raddr]; - end else begin - reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; - end - end - end - assign rdata_w = ram[raddr]; - end - end - `endif - end else begin - // (WRENW == 1) - if (LUTRAM != 0) begin - `USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - ram[waddr] <= wdata; - end - end - assign rdata_w = ram[raddr]; - end else begin - if (NO_RWCHECK != 0) begin - `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - ram[waddr] <= wdata; - end - end - assign rdata_w = ram[raddr]; - end else begin - reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - ram[waddr] <= wdata; - end - end - assign rdata_w = ram[raddr]; - end - end - end -`else - // simulation - reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - - wire [DATAW-1:0] ram_n; - for (genvar i = 0; i < WRENW; ++i) begin - assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; - end - - reg [DATAW-1:0] prev_data; - reg [ADDRW-1:0] prev_waddr; - reg prev_write; - - always @(posedge clk) begin - if (RESET_RAM && reset) begin - for (integer i = 0; i < SIZE; ++i) begin - ram[i] <= DATAW'(INIT_VALUE); - end - end else begin - if (write) begin - ram[waddr] <= ram_n; - end - end - if (reset) begin - prev_write <= 0; - prev_data <= '0; - prev_waddr <= '0; - end else begin - prev_write <= write; - prev_data <= ram[waddr]; - prev_waddr <= waddr; - end - end - - if (LUTRAM || !NO_RWCHECK) begin - `UNUSED_VAR (prev_write) - `UNUSED_VAR (prev_data) - `UNUSED_VAR (prev_waddr) - assign rdata_w = ram[raddr]; - end else begin - assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; - if (RW_ASSERT) begin - `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("read after write hazard")); - end - end -`endif - - if (OUT_REG != 0) begin + if (OUT_REG && !READ_ENABLE) begin + `UNUSED_PARAM (NO_RWCHECK) reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (READ_ENABLE && reset) begin - rdata_r <= '0; - end else if (!READ_ENABLE || read) begin - rdata_r <= rdata_w; + wire cs = read || write; + if (WRENW != 1) begin + `ifdef QUARTUS + if (LUTRAM != 0) begin + `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (cs) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + end + end + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; + end + end + end + end else begin + reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (cs) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + end + end + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; + end + end + end + end + `else + // default synthesis + if (LUTRAM != 0) begin + `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (cs) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + end + end + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; + end + end + end + end else begin + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (cs) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + end + end + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; + end + end + end + end + `endif + end else begin + if (LUTRAM != 0) begin + `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (cs) begin + if (write) + ram[waddr] <= wdata; + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; + end + end + end + + end else begin + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (cs) begin + if (write) + ram[waddr] <= wdata; + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; + end + end + end end end assign rdata = rdata_r; end else begin - assign rdata = rdata_w; + // OUT_REG==0 || READ_ENABLE=1 + wire [DATAW-1:0] rdata_w; + `ifdef SYNTHESIS + if (WRENW > 1) begin + `ifdef QUARTUS + if (LUTRAM != 0) begin + `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + end + end + end + assign rdata_w = ram[raddr]; + end else begin + if (NO_RWCHECK != 0) begin + `NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + end + end + end + assign rdata_w = ram[raddr]; + end else begin + reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + end + end + end + assign rdata_w = ram[raddr]; + end + end + `else + // default synthesis + if (LUTRAM != 0) begin + `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + end + end + end + assign rdata_w = ram[raddr]; + end else begin + if (NO_RWCHECK != 0) begin + `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + end + end + end + assign rdata_w = ram[raddr]; + end else begin + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + end + end + end + assign rdata_w = ram[raddr]; + end + end + `endif + end else begin + // (WRENW == 1) + if (LUTRAM != 0) begin + `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + end + assign rdata_w = ram[raddr]; + end else begin + if (NO_RWCHECK != 0) begin + `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + end + assign rdata_w = ram[raddr]; + end else begin + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + end + assign rdata_w = ram[raddr]; + end + end + end + `else + // simulation + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + + wire [DATAW-1:0] ram_n; + for (genvar i = 0; i < WRENW; ++i) begin + assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; + end + + always @(posedge clk) begin + if (RESET_RAM && reset) begin + for (integer i = 0; i < SIZE; ++i) begin + ram[i] <= DATAW'(INIT_VALUE); + end + end else begin + if (write) begin + ram[waddr] <= ram_n; + end + end + end + + if (LUTRAM || !NO_RWCHECK) begin + assign rdata_w = ram[raddr]; + end else begin + reg [DATAW-1:0] prev_data; + reg [ADDRW-1:0] prev_waddr; + reg prev_write; + + always @(posedge clk) begin + if (reset) begin + prev_write <= 0; + prev_data <= '0; + prev_waddr <= '0; + end else begin + prev_write <= write; + prev_data <= ram[waddr]; + prev_waddr <= waddr; + end + end + + assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; + if (RW_ASSERT) begin + `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("read after write hazard")); + end + end + `endif + + if (OUT_REG != 0) begin + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (READ_ENABLE && reset) begin + rdata_r <= '0; + end else if (!READ_ENABLE || read) begin + rdata_r <= rdata_w; + end + end + assign rdata = rdata_r; + end else begin + assign rdata = rdata_w; + end + end endmodule diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index 3e73a013f..efce4b5f2 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -17,13 +17,13 @@ module VX_sp_ram #( parameter DATAW = 1, parameter SIZE = 1, - parameter ADDR_MIN = 0, parameter WRENW = 1, parameter OUT_REG = 0, + parameter LUTRAM = 0, parameter NO_RWCHECK = 0, parameter RW_ASSERT = 0, - parameter LUTRAM = 0, parameter RESET_RAM = 0, + parameter RESET_OUT = 0, parameter READ_ENABLE = 0, parameter INIT_ENABLE = 0, parameter INIT_FILE = "", @@ -40,20 +40,20 @@ module VX_sp_ram #( output wire [DATAW-1:0] rdata ); VX_dp_ram #( - .DATAW (DATAW), - .SIZE (SIZE), - .ADDR_MIN (ADDR_MIN), - .WRENW (WRENW), - .OUT_REG (OUT_REG), + .DATAW (DATAW), + .SIZE (SIZE), + .WRENW (WRENW), + .OUT_REG (OUT_REG), + .LUTRAM (LUTRAM), .NO_RWCHECK (NO_RWCHECK), - .RW_ASSERT (RW_ASSERT), - .LUTRAM (LUTRAM), - .RESET_RAM (RESET_RAM), - .READ_ENABLE (READ_ENABLE), - .INIT_ENABLE (INIT_ENABLE), - .INIT_FILE (INIT_FILE), + .RW_ASSERT (RW_ASSERT), + .RESET_RAM (RESET_RAM), + .RESET_OUT (RESET_OUT), + .READ_ENABLE(READ_ENABLE), + .INIT_ENABLE(INIT_ENABLE), + .INIT_FILE (INIT_FILE), .INIT_VALUE (INIT_VALUE), - .ADDRW (ADDRW) + .ADDRW (ADDRW) ) dp_ram ( .clk (clk), .reset (reset), diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 2ba09fd61..462103c09 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -168,8 +168,8 @@ module VX_local_mem import VX_gpu_pkg::*; #( .DATAW (WORD_WIDTH), .SIZE (WORDS_PER_BANK), .WRENW (WORD_SIZE), - .READ_ENABLE (1), .OUT_REG (1), + .READ_ENABLE (0), .NO_RWCHECK (1) ) data_store ( .clk (clk), From d979cf277fef9ad3f19ec7ff296a290e5b422070 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 1 Sep 2024 04:00:57 -0700 Subject: [PATCH 206/488] decoder logic specialization --- hw/rtl/afu/opae/vortex_afu.sv | 2 +- hw/rtl/cache/VX_bank_flush.sv | 8 +++- hw/rtl/cache/VX_cache_bypass.sv | 9 ++-- hw/rtl/cache/VX_cache_data.sv | 2 +- hw/rtl/cache/VX_cache_mshr.sv | 2 +- hw/rtl/libs/VX_cyclic_arbiter.sv | 12 ++++- hw/rtl/libs/VX_decoder.sv | 46 +++++++++++++++++++ .../{VX_onehot_encoder.sv => VX_encoder.sv} | 2 +- hw/rtl/libs/VX_matrix_arbiter.sv | 2 +- hw/rtl/libs/VX_mem_adapter.sv | 20 +++++++- hw/rtl/libs/VX_rr_arbiter.sv | 15 ++++-- hw/rtl/libs/VX_stream_xbar.sv | 18 ++++++-- 12 files changed, 116 insertions(+), 22 deletions(-) create mode 100644 hw/rtl/libs/VX_decoder.sv rename hw/rtl/libs/{VX_onehot_encoder.sv => VX_encoder.sv} (99%) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index cb5725e78..61465103e 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -963,7 +963,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire [COUT_TID_WIDTH-1:0] cout_tid; - VX_onehot_encoder #( + VX_encoder #( .N (`VX_MEM_BYTEEN_WIDTH) ) cout_tid_enc ( .data_in (vx_mem_req_byteen), diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index 2d62e354c..608eefa7d 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -114,7 +114,13 @@ module VX_bank_flush #( assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0]; if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin - assign flush_way = NUM_WAYS'(1) << counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]; + VX_decoder #( + .N (`CS_WAY_SEL_BITS) + ) ctr_decoder ( + .shift_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), + .data_in (1'b1), + .data_out (flush_way) + ); end else begin assign flush_way = {NUM_WAYS{1'b1}}; end diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 7992ec9e8..a3d872d7f 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -56,6 +56,7 @@ module VX_cache_bypass #( localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1); localparam REQ_SEL_BITS = `CLOG2(NUM_REQS); + localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS); localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH; localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE; @@ -72,7 +73,7 @@ module VX_cache_bypass #( wire core_req_nc_valid; wire [NUM_REQS-1:0] core_req_nc_valids; wire [NUM_REQS-1:0] core_req_nc_idxs; - wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx; + wire [REQ_SEL_WIDTH-1:0] core_req_nc_idx; wire [NUM_REQS-1:0] core_req_nc_sel; wire core_req_nc_ready; @@ -261,17 +262,15 @@ module VX_cache_bypass #( .data_out (mem_rsp_tag_id_nc) ); - wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx; + wire [REQ_SEL_WIDTH-1:0] rsp_idx; if (NUM_REQS > 1) begin assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS]; end else begin assign rsp_idx = 1'b0; end - wire [NUM_REQS-1:0] rsp_nc_valid = NUM_REQS'(is_mem_rsp_nc) << rsp_idx; - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid[i]; + assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || (is_mem_rsp_nc && rsp_idx == REQ_SEL_WIDTH'(i)); assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; end diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 318463f76..18d44b6db 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -140,7 +140,7 @@ module VX_cache_data #( assign line_wren = fill; end - VX_onehot_encoder #( + VX_encoder #( .N (NUM_WAYS) ) way_enc ( .data_in (way_sel), diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index 4f8163269..0ca67d159 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -148,7 +148,7 @@ module VX_cache_mshr #( .valid_out (allocate_rdy_n) ); - VX_onehot_encoder #( + VX_encoder #( .N (MSHR_SIZE) ) prev_sel ( .data_in (addr_matches & ~next_table_x), diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index a6673c8b7..592b7a03b 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -41,7 +41,7 @@ module VX_cyclic_arbiter #( localparam IS_POW2 = (1 << LOG_NUM_REQS) == NUM_REQS; wire [LOG_NUM_REQS-1:0] grant_index_um; - wire [NUM_REQS-1:0] grant_onehot_um; + wire [NUM_REQS-1:0] grant_onehot_w, grant_onehot_um; reg [LOG_NUM_REQS-1:0] grant_index_r; always @(posedge clk) begin @@ -65,10 +65,18 @@ module VX_cyclic_arbiter #( .valid_out (grant_valid) ); + VX_decoder #( + .N (LOG_NUM_REQS) + ) grant_decoder ( + .shift_in (grant_index), + .data_in (1'b1), + .data_out (grant_onehot_w) + ); + wire is_hit = requests[grant_index_r]; assign grant_index = is_hit ? grant_index_r : grant_index_um; - assign grant_onehot = is_hit ? (NUM_REQS'(1) << grant_index) : grant_onehot_um; + assign grant_onehot = is_hit ? grant_onehot_w : grant_onehot_um; end diff --git a/hw/rtl/libs/VX_decoder.sv b/hw/rtl/libs/VX_decoder.sv new file mode 100644 index 000000000..34a378e71 --- /dev/null +++ b/hw/rtl/libs/VX_decoder.sv @@ -0,0 +1,46 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +// Fast encoder using parallel prefix computation +// Adapted from BaseJump STL: http://bjump.org/data_out.html + +`TRACING_OFF +module VX_decoder #( + parameter N = 1, + parameter M = 1, +`ifdef VIVADO + parameter MODEL = 1, +`else + parameter MODEL = 0, +`endif + parameter D = 1 << N +) ( + input wire [N-1:0] shift_in, + input wire [M-1:0] data_in, + output wire [D-1:0][M-1:0] data_out +); + if (MODEL == 1) begin + reg [D-1:0][M-1:0] data_out_w; + always @(*) begin + data_out_w = '0; + data_out_w[shift_in] = data_in; + end + assign data_out = data_out_w; + end else begin + assign data_out = (D*M)'(data_in) << (shift_in * M); + end + +endmodule +`TRACING_ON diff --git a/hw/rtl/libs/VX_onehot_encoder.sv b/hw/rtl/libs/VX_encoder.sv similarity index 99% rename from hw/rtl/libs/VX_onehot_encoder.sv rename to hw/rtl/libs/VX_encoder.sv index 6246a673c..85d72ce52 100644 --- a/hw/rtl/libs/VX_onehot_encoder.sv +++ b/hw/rtl/libs/VX_encoder.sv @@ -17,7 +17,7 @@ // Adapted from BaseJump STL: http://bjump.org/data_out.html `TRACING_OFF -module VX_onehot_encoder #( +module VX_encoder #( parameter N = 1, parameter REVERSE = 0, parameter MODEL = 1, diff --git a/hw/rtl/libs/VX_matrix_arbiter.sv b/hw/rtl/libs/VX_matrix_arbiter.sv index 9f0ead356..eff4eb7e1 100644 --- a/hw/rtl/libs/VX_matrix_arbiter.sv +++ b/hw/rtl/libs/VX_matrix_arbiter.sv @@ -74,7 +74,7 @@ module VX_matrix_arbiter #( assign grant_onehot = grant; - VX_onehot_encoder #( + VX_encoder #( .N (NUM_REQS) ) encoder ( .data_in (grant_onehot), diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 068628be2..3e84a6292 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -97,10 +97,26 @@ module VX_mem_adapter #( assign mem_req_addr_out_w = mem_req_addr_in_qual; end + VX_decoder #( + .N (D), + .M (SRC_DATA_WIDTH/8) + ) req_be_dec ( + .shift_in (req_idx), + .data_in (mem_req_byteen_in), + .data_out (mem_req_byteen_out_w) + ); + + VX_decoder #( + .N (D), + .M (SRC_DATA_WIDTH) + ) req_data_dec ( + .shift_in (req_idx), + .data_in (mem_req_data_in), + .data_out (mem_req_data_out_w) + ); + assign mem_req_valid_out_w = mem_req_valid_in; assign mem_req_rw_out_w = mem_req_rw_in; - assign mem_req_byteen_out_w = DST_DATA_SIZE'(mem_req_byteen_in) << ((DST_LDATAW-3)'(req_idx) << (SRC_LDATAW-3)); - assign mem_req_data_out_w = DST_DATA_WIDTH'(mem_req_data_in) << ((DST_LDATAW'(req_idx)) << SRC_LDATAW); assign mem_req_tag_out_w = DST_TAG_WIDTH'({mem_req_tag_in, req_idx}); assign mem_req_ready_in = mem_req_ready_out_w; diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 5c279989b..4b22a4004 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -448,7 +448,7 @@ module VX_rr_arbiter #( end end - VX_onehot_encoder #( + VX_encoder #( .N (NUM_REQS) ) onehot_encoder ( .data_in (grant_onehot), @@ -480,9 +480,16 @@ module VX_rr_arbiter #( end end - assign grant_index = grant_table[state]; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index; - assign grant_valid = (| requests); + VX_decoder #( + .N (LOG_NUM_REQS) + ) grant_decoder ( + .shift_in (grant_index), + .data_in (grant_valid), + .data_out (grant_onehot) + ); + + assign grant_index = grant_table[state]; + assign grant_valid = (| requests); end diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index 3dd30bc86..5a3b129ea 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -72,12 +72,17 @@ module VX_stream_xbar #( ); for (genvar i = 0; i < NUM_INPUTS; ++i) begin - assign per_output_valid_in[i] = NUM_OUTPUTS'(valid_in[i]) << sel_in[i]; + VX_decoder #( + .N (OUT_WIDTH) + ) sel_in_decoder ( + .shift_in (sel_in[i]), + .data_in (valid_in[i]), + .data_out (per_output_valid_in[i]) + ); assign ready_in[i] = | per_output_ready_in_w[i]; end for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin - VX_stream_arb #( .NUM_INPUTS (NUM_INPUTS), .NUM_OUTPUTS (1), @@ -131,8 +136,15 @@ module VX_stream_xbar #( wire [NUM_OUTPUTS-1:0] valid_out_w, ready_out_w; wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w; + VX_decoder #( + .N (OUT_WIDTH) + ) sel_in_decoder ( + .shift_in (sel_in[0]), + .data_in (valid_in[0]), + .data_out (valid_out_w) + ); + assign ready_in[0] = ready_out_w[sel_in[0]]; - assign valid_out_w = NUM_OUTPUTS'(valid_in[0]) << sel_in[0]; assign data_out_w = {NUM_OUTPUTS{data_in[0]}}; for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin From 82150891944a2dcb46a12557671e23d6830da3aa Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 1 Sep 2024 04:03:46 -0700 Subject: [PATCH 207/488] minor update --- hw/rtl/fpu/VX_fpu_dsp.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index b1f115155..00b79ba21 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -54,7 +54,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( localparam NUM_FPCORES = 4; localparam FPCORES_BITS = `LOG2UP(NUM_FPCORES); - localparam REQ_DATAW = NUM_LANES + TAG_WIDTH + `INST_FPU_BITS + `INST_FMT_BITS + `INST_FRM_BITS + 3 * (NUM_LANES * `XLEN); + localparam REQ_DATAW = NUM_LANES + TAG_WIDTH + `INST_FPU_BITS + `INST_FMT_BITS + `INST_FRM_BITS + 3 * (NUM_LANES * 32); localparam RSP_DATAW = (NUM_LANES * 32) + 1 + $bits(fflags_t) + TAG_WIDTH; `UNUSED_VAR (fmt) From 32636fac70c011d494da5931ed618f32794bfdcc Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 1 Sep 2024 10:15:02 -0700 Subject: [PATCH 208/488] minor update --- runtime/opae/vortex.cpp | 3 +-- runtime/xrt/vortex.cpp | 14 +++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/runtime/opae/vortex.cpp b/runtime/opae/vortex.cpp index 390d5acc4..970ccb12a 100755 --- a/runtime/opae/vortex.cpp +++ b/runtime/opae/vortex.cpp @@ -206,7 +206,6 @@ public: int get_caps(uint32_t caps_id, uint64_t * value) { uint64_t _value; - switch (caps_id) { case VX_CAPS_VERSION: _value = (dev_caps_ >> 0) & 0xff; @@ -227,7 +226,7 @@ public: _value = global_mem_size_; break; case VX_CAPS_LOCAL_MEM_SIZE: - _value = 1ull << ((dev_caps_ >> 48) & 0xff); + _value = 1ull << ((dev_caps_ >> 40) & 0xff); break; case VX_CAPS_ISA_FLAGS: _value = isa_caps_; diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index 8c273cf7f..0ee9653df 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -310,15 +310,15 @@ public: uint32_t num_banks = 1 << platform_.lg2_num_banks; uint64_t bank_size = 1ull << platform_.lg2_bank_size; - // adjust memory bank size to architecture limit + // adjust memory banks allocation to architecture limit int isa_arch = VX_ISA_ARCH(isa_caps_); if (isa_arch == 32) { uint64_t max_mem_size = 1ull << 32; - uint64_t need_bank_size = max_mem_size / num_banks; - if (bank_size > need_bank_size) { - printf("info: adjusted bank size from 0x%lx to 0x%lx bytes.\n", bank_size, need_bank_size); - bank_size = need_bank_size; - platform_.lg2_bank_size = log2ceil(bank_size); + uint32_t need_num_banks = max_mem_size / bank_size; + if (num_banks > need_num_banks) { + printf("info: adjusted number of banks from %d to %d.\n", num_banks, need_num_banks); + num_banks = need_num_banks; + platform_.lg2_num_banks = log2ceil(num_banks); } } @@ -416,7 +416,7 @@ public: _value = global_mem_size_; break; case VX_CAPS_LOCAL_MEM_SIZE: - _value = 1ull << ((dev_caps_ >> 48) & 0xff); + _value = 1ull << ((dev_caps_ >> 40) & 0xff); break; case VX_CAPS_ISA_FLAGS: _value = isa_caps_; From d7eae0c8862469e42c4d58647e720cc4b404f9bc Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 2 Sep 2024 02:33:30 -0700 Subject: [PATCH 209/488] minor update --- hw/rtl/VX_platform.vh | 2 +- hw/rtl/cache/VX_bank_flush.sv | 4 ++-- hw/rtl/libs/VX_cyclic_arbiter.sv | 4 ++-- hw/rtl/libs/VX_decoder.sv | 12 ++++-------- hw/rtl/libs/VX_mem_adapter.sv | 8 ++++---- hw/rtl/libs/VX_rr_arbiter.sv | 4 ++-- hw/rtl/libs/VX_stream_xbar.sv | 8 ++++---- 7 files changed, 19 insertions(+), 23 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index e15758d27..74907ad4c 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -158,7 +158,7 @@ `define MAX_FANOUT 8 `define IF_DATA_SIZE(x) $bits(x.data) `define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *) -`define NO_RW_RAM_CHECK (* ramstyle = "no_rw_check" *) +`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *) `define DISABLE_BRAM (* ramstyle = "logic" *) `define PRESERVE_NET (* preserve *) `elsif VIVADO diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index 608eefa7d..3ceffaa6b 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -117,8 +117,8 @@ module VX_bank_flush #( VX_decoder #( .N (`CS_WAY_SEL_BITS) ) ctr_decoder ( - .shift_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), - .data_in (1'b1), + .data_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), + .valid_in (1'b1), .data_out (flush_way) ); end else begin diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index 592b7a03b..167042a3a 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -68,8 +68,8 @@ module VX_cyclic_arbiter #( VX_decoder #( .N (LOG_NUM_REQS) ) grant_decoder ( - .shift_in (grant_index), - .data_in (1'b1), + .data_in (grant_index), + .valid_in (1'b1), .data_out (grant_onehot_w) ); diff --git a/hw/rtl/libs/VX_decoder.sv b/hw/rtl/libs/VX_decoder.sv index 34a378e71..45b37b1db 100644 --- a/hw/rtl/libs/VX_decoder.sv +++ b/hw/rtl/libs/VX_decoder.sv @@ -20,26 +20,22 @@ module VX_decoder #( parameter N = 1, parameter M = 1, -`ifdef VIVADO - parameter MODEL = 1, -`else parameter MODEL = 0, -`endif parameter D = 1 << N ) ( - input wire [N-1:0] shift_in, - input wire [M-1:0] data_in, + input wire [N-1:0] data_in, + input wire [M-1:0] valid_in, output wire [D-1:0][M-1:0] data_out ); if (MODEL == 1) begin reg [D-1:0][M-1:0] data_out_w; always @(*) begin data_out_w = '0; - data_out_w[shift_in] = data_in; + data_out_w[data_in] = valid_in; end assign data_out = data_out_w; end else begin - assign data_out = (D*M)'(data_in) << (shift_in * M); + assign data_out = (D*M)'(valid_in) << (data_in * M); end endmodule diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 3e84a6292..6ee6060b8 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -101,8 +101,8 @@ module VX_mem_adapter #( .N (D), .M (SRC_DATA_WIDTH/8) ) req_be_dec ( - .shift_in (req_idx), - .data_in (mem_req_byteen_in), + .data_in (req_idx), + .valid_in (mem_req_byteen_in), .data_out (mem_req_byteen_out_w) ); @@ -110,8 +110,8 @@ module VX_mem_adapter #( .N (D), .M (SRC_DATA_WIDTH) ) req_data_dec ( - .shift_in (req_idx), - .data_in (mem_req_data_in), + .data_in (req_idx), + .valid_in (mem_req_data_in), .data_out (mem_req_data_out_w) ); diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 4b22a4004..a2a9a9654 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -483,8 +483,8 @@ module VX_rr_arbiter #( VX_decoder #( .N (LOG_NUM_REQS) ) grant_decoder ( - .shift_in (grant_index), - .data_in (grant_valid), + .data_in (grant_index), + .valid_in (grant_valid), .data_out (grant_onehot) ); diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index 5a3b129ea..d1b01125f 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -75,8 +75,8 @@ module VX_stream_xbar #( VX_decoder #( .N (OUT_WIDTH) ) sel_in_decoder ( - .shift_in (sel_in[i]), - .data_in (valid_in[i]), + .data_in (sel_in[i]), + .valid_in (valid_in[i]), .data_out (per_output_valid_in[i]) ); assign ready_in[i] = | per_output_ready_in_w[i]; @@ -139,8 +139,8 @@ module VX_stream_xbar #( VX_decoder #( .N (OUT_WIDTH) ) sel_in_decoder ( - .shift_in (sel_in[0]), - .data_in (valid_in[0]), + .data_in (sel_in[0]), + .valid_in (valid_in[0]), .data_out (valid_out_w) ); From 40e04a409e57798c07896f838c244a8018dcb436 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 2 Sep 2024 02:34:08 -0700 Subject: [PATCH 210/488] adding PE switch --- hw/rtl/core/VX_alu_unit.sv | 111 +++++++++++--------------------- hw/rtl/core/VX_pe_switch.sv | 92 +++++++++++++++++++++++++++ hw/rtl/core/VX_sfu_unit.sv | 123 +++++++++++++----------------------- 3 files changed, 173 insertions(+), 153 deletions(-) create mode 100644 hw/rtl/core/VX_pe_switch.sv diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 9b3d6deea..120ecd5f0 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -30,16 +30,20 @@ module VX_alu_unit #( `UNUSED_SPARAM (INSTANCE_ID) localparam BLOCK_SIZE = `NUM_ALU_BLOCKS; localparam NUM_LANES = `NUM_ALU_LANES; - localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); - localparam PID_WIDTH = `UP(PID_BITS); - localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1; - localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED; localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS); + localparam PE_COUNT = 1 + `EXT_M_ENABLED; + localparam PE_SEL_BITS = `CLOG2(PE_COUNT); + localparam PE_IDX_INT = 0; + localparam PE_IDX_MDV = PE_IDX_INT + `EXT_M_ENABLED; VX_execute_if #( .NUM_LANES (NUM_LANES) ) per_block_execute_if[BLOCK_SIZE](); + VX_commit_if #( + .NUM_LANES (NUM_LANES) + ) per_block_commit_if[BLOCK_SIZE](); + VX_dispatch_unit #( .BLOCK_SIZE (BLOCK_SIZE), .NUM_LANES (NUM_LANES), @@ -51,26 +55,41 @@ module VX_alu_unit #( .execute_if (per_block_execute_if) ); - VX_commit_if #( - .NUM_LANES (NUM_LANES) - ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : alus `RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1)); - wire is_muldiv_op = `EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV); - VX_execute_if #( .NUM_LANES (NUM_LANES) - ) int_execute_if(); + ) pe_execute_if[PE_COUNT](); - VX_commit_if #( + VX_commit_if#( .NUM_LANES (NUM_LANES) - ) int_commit_if(); + ) pe_commit_if[PE_COUNT](); - assign int_execute_if.valid = per_block_execute_if[block_idx].valid && ~is_muldiv_op; - assign int_execute_if.data = per_block_execute_if[block_idx].data; + reg [PE_SEL_BITS-1:0] pe_select; + always @(*) begin + if (`EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV)) + pe_select = PE_IDX_MDV; + else + pe_select = PE_IDX_INT; + end + + VX_pe_switch #( + .PE_COUNT (PE_COUNT), + .NUM_LANES (NUM_LANES), + .ARBITER ("R"), + .REQ_OUT_BUF (0), + .RSP_OUT_BUF (PARTIAL_BW ? 1 : 3) + ) pe_switch ( + .clk (clk), + .reset (block_reset), + .pe_sel (pe_select), + .execute_in_if (per_block_execute_if[block_idx]), + .commit_out_if (per_block_commit_if[block_idx]), + .execute_out_if (pe_execute_if), + .commit_in_if (pe_commit_if) + ); VX_alu_int #( .INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)), @@ -79,76 +98,22 @@ module VX_alu_unit #( ) alu_int ( .clk (clk), .reset (block_reset), - .execute_if (int_execute_if), + .execute_if (pe_execute_if[PE_IDX_INT]), .branch_ctl_if (branch_ctl_if[block_idx]), - .commit_if (int_commit_if) + .commit_if (pe_commit_if[PE_IDX_INT]) ); `ifdef EXT_M_ENABLE - - VX_execute_if #( - .NUM_LANES (NUM_LANES) - ) muldiv_execute_if(); - - VX_commit_if #( - .NUM_LANES (NUM_LANES) - ) muldiv_commit_if(); - - assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op; - assign muldiv_execute_if.data = per_block_execute_if[block_idx].data; - VX_alu_muldiv #( .INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)), .NUM_LANES (NUM_LANES) ) muldiv_unit ( .clk (clk), .reset (block_reset), - .execute_if (muldiv_execute_if), - .commit_if (muldiv_commit_if) + .execute_if (pe_execute_if[PE_IDX_MDV]), + .commit_if (pe_commit_if[PE_IDX_MDV]) ); - `endif - - // can accept new request? - assign per_block_execute_if[block_idx].ready = - `ifdef EXT_M_ENABLE - is_muldiv_op ? muldiv_execute_if.ready : - `endif - int_execute_if.ready; - - // send response - - VX_stream_arb #( - .NUM_INPUTS (RSP_ARB_SIZE), - .DATAW (RSP_ARB_DATAW), - .OUT_BUF (PARTIAL_BW ? 1 : 3), - .ARBITER ("R") - ) rsp_arb ( - .clk (clk), - .reset (block_reset), - .valid_in ({ - `ifdef EXT_M_ENABLE - muldiv_commit_if.valid, - `endif - int_commit_if.valid - }), - .ready_in ({ - `ifdef EXT_M_ENABLE - muldiv_commit_if.ready, - `endif - int_commit_if.ready - }), - .data_in ({ - `ifdef EXT_M_ENABLE - muldiv_commit_if.data, - `endif - int_commit_if.data - }), - .data_out (per_block_commit_if[block_idx].data), - .valid_out (per_block_commit_if[block_idx].valid), - .ready_out (per_block_commit_if[block_idx].ready), - `UNUSED_PIN (sel_out) - ); end VX_gather_unit #( diff --git a/hw/rtl/core/VX_pe_switch.sv b/hw/rtl/core/VX_pe_switch.sv new file mode 100644 index 000000000..9c8d7a7b3 --- /dev/null +++ b/hw/rtl/core/VX_pe_switch.sv @@ -0,0 +1,92 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_pe_switch import VX_gpu_pkg::*; #( + parameter PE_COUNT = 0, + parameter NUM_LANES = 0, + parameter REQ_OUT_BUF = 0, + parameter RSP_OUT_BUF = 0, + parameter `STRING ARBITER = "R" +) ( + input wire clk, + input wire reset, + input wire [PE_SEL_BITS-1:0] pe_sel, + VX_execute_if.slave execute_in_if, + VX_commit_if.master commit_out_if, + VX_execute_if.master execute_out_if[PE_COUNT], + VX_commit_if .slave commit_in_if[PE_COUNT] +); + localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); + localparam PID_WIDTH = `UP(PID_BITS); + localparam REQ_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `INST_ALU_BITS + $bits(op_args_t) + 1 + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1; + localparam RSP_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1; + localparam PE_SEL_BITS = `CLOG2(PE_COUNT); + + wire [PE_COUNT-1:0] pe_req_valid; + wire [PE_COUNT-1:0][REQ_DATAW-1:0] pe_req_data; + wire [PE_COUNT-1:0] pe_req_ready; + + VX_stream_switch #( + .DATAW (REQ_DATAW), + .NUM_OUTPUTS (PE_COUNT), + .OUT_BUF (REQ_OUT_BUF) + ) req_switch ( + .clk (clk), + .reset (reset), + .sel_in (pe_sel), + .valid_in (execute_in_if.valid), + .ready_in (execute_in_if.ready), + .data_in (execute_in_if.data), + .data_out (pe_req_data), + .valid_out (pe_req_valid), + .ready_out (pe_req_ready) + ); + + for (genvar i = 0; i < PE_COUNT; ++i) begin + assign execute_out_if[i].valid = pe_req_valid[i]; + assign execute_out_if[i].data = pe_req_data[i]; + assign pe_req_ready[i] = execute_out_if[i].ready; + end + + /////////////////////////////////////////////////////////////////////////// + + wire [PE_COUNT-1:0] pe_rsp_valid; + wire [PE_COUNT-1:0][RSP_DATAW-1:0] pe_rsp_data; + wire [PE_COUNT-1:0] pe_rsp_ready; + + for (genvar i = 0; i < PE_COUNT; ++i) begin + assign pe_rsp_valid[i] = commit_in_if[i].valid; + assign pe_rsp_data[i] = commit_in_if[i].data; + assign commit_in_if[i].ready = pe_rsp_ready[i]; + end + + VX_stream_arb #( + .NUM_INPUTS (PE_COUNT), + .DATAW (RSP_DATAW), + .ARBITER (ARBITER), + .OUT_BUF (RSP_OUT_BUF) + ) rsp_arb ( + .clk (clk), + .reset (reset), + .valid_in (pe_rsp_valid), + .ready_in (pe_rsp_ready), + .data_in (pe_rsp_data), + .data_out (commit_out_if.data), + .valid_out (commit_out_if.valid), + .ready_out (commit_out_if.ready), + `UNUSED_PIN (sel_out) + ); + +endmodule diff --git a/hw/rtl/core/VX_sfu_unit.sv b/hw/rtl/core/VX_sfu_unit.sv index a77520866..de0ce9fc4 100644 --- a/hw/rtl/core/VX_sfu_unit.sv +++ b/hw/rtl/core/VX_sfu_unit.sv @@ -41,20 +41,21 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( VX_warp_ctl_if.master warp_ctl_if ); `UNUSED_SPARAM (INSTANCE_ID) - localparam BLOCK_SIZE = 1; - localparam NUM_LANES = `NUM_SFU_LANES; - localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); - localparam PID_WIDTH = `UP(PID_BITS); - - localparam RSP_ARB_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + (NUM_LANES * `XLEN) + `NR_BITS + 1 + `PC_BITS + PID_WIDTH + 1 + 1; - localparam RSP_ARB_SIZE = 1 + 1; - localparam RSP_ARB_IDX_WCTL = 0; - localparam RSP_ARB_IDX_CSRS = 1; + localparam BLOCK_SIZE = 1; + localparam NUM_LANES = `NUM_SFU_LANES; + localparam PE_COUNT = 2; + localparam PE_SEL_BITS = `CLOG2(PE_COUNT); + localparam PE_IDX_WCTL = 0; + localparam PE_IDX_CSRS = 1; VX_execute_if #( .NUM_LANES (NUM_LANES) ) per_block_execute_if[BLOCK_SIZE](); + VX_commit_if #( + .NUM_LANES (NUM_LANES) + ) per_block_commit_if[BLOCK_SIZE](); + VX_dispatch_unit #( .BLOCK_SIZE (BLOCK_SIZE), .NUM_LANES (NUM_LANES), @@ -66,20 +67,37 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( .execute_if (per_block_execute_if) ); - wire [RSP_ARB_SIZE-1:0] rsp_arb_valid_in; - wire [RSP_ARB_SIZE-1:0] rsp_arb_ready_in; - wire [RSP_ARB_SIZE-1:0][RSP_ARB_DATAW-1:0] rsp_arb_data_in; - - // Warp control block VX_execute_if #( .NUM_LANES (NUM_LANES) - ) wctl_execute_if(); + ) pe_execute_if[PE_COUNT](); + VX_commit_if#( .NUM_LANES (NUM_LANES) - ) wctl_commit_if(); + ) pe_commit_if[PE_COUNT](); - assign wctl_execute_if.valid = per_block_execute_if[0].valid && `INST_SFU_IS_WCTL(per_block_execute_if[0].data.op_type); - assign wctl_execute_if.data = per_block_execute_if[0].data; + reg [PE_SEL_BITS-1:0] pe_select; + always @(*) begin + if (`INST_SFU_IS_CSR(per_block_execute_if[0].data.op_type)) + pe_select = PE_IDX_CSRS; + else + pe_select = PE_IDX_WCTL; + end + + VX_pe_switch #( + .PE_COUNT (PE_COUNT), + .NUM_LANES (NUM_LANES), + .ARBITER ("R"), + .REQ_OUT_BUF(0), + .RSP_OUT_BUF(3) + ) pe_switch ( + .clk (clk), + .reset (reset), + .pe_sel (pe_select), + .execute_in_if (per_block_execute_if[0]), + .commit_out_if (per_block_commit_if[0]), + .execute_out_if (pe_execute_if), + .commit_in_if (pe_commit_if) + ); `RESET_RELAY (wctl_reset, reset); @@ -89,26 +107,11 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( ) wctl_unit ( .clk (clk), .reset (wctl_reset), - .execute_if (wctl_execute_if), + .execute_if (pe_execute_if[PE_IDX_WCTL]), .warp_ctl_if(warp_ctl_if), - .commit_if (wctl_commit_if) + .commit_if (pe_commit_if[PE_IDX_WCTL]) ); - assign rsp_arb_valid_in[RSP_ARB_IDX_WCTL] = wctl_commit_if.valid; - assign rsp_arb_data_in[RSP_ARB_IDX_WCTL] = wctl_commit_if.data; - assign wctl_commit_if.ready = rsp_arb_ready_in[RSP_ARB_IDX_WCTL]; - - // CSR unit - VX_execute_if #( - .NUM_LANES (NUM_LANES) - ) csr_execute_if(); - VX_commit_if #( - .NUM_LANES (NUM_LANES) - ) csr_commit_if(); - - assign csr_execute_if.valid = per_block_execute_if[0].valid && `INST_SFU_IS_CSR(per_block_execute_if[0].data.op_type); - assign csr_execute_if.data = per_block_execute_if[0].data; - `RESET_RELAY (csr_reset, reset); VX_csr_unit #( @@ -120,7 +123,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( .reset (csr_reset), .base_dcrs (base_dcrs), - .execute_if (csr_execute_if), + .execute_if (pe_execute_if[PE_IDX_CSRS]), `ifdef PERF_ENABLE .mem_perf_if (mem_perf_if), @@ -133,47 +136,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( .sched_csr_if (sched_csr_if), .commit_csr_if (commit_csr_if), - .commit_if (csr_commit_if) - ); - - assign rsp_arb_valid_in[RSP_ARB_IDX_CSRS] = csr_commit_if.valid; - assign rsp_arb_data_in[RSP_ARB_IDX_CSRS] = csr_commit_if.data; - assign csr_commit_if.ready = rsp_arb_ready_in[RSP_ARB_IDX_CSRS]; - - // can accept new request? - - reg sfu_req_ready; - always @(*) begin - case (per_block_execute_if[0].data.op_type) - `INST_SFU_CSRRW, - `INST_SFU_CSRRS, - `INST_SFU_CSRRC: sfu_req_ready = csr_execute_if.ready; - default: sfu_req_ready = wctl_execute_if.ready; - endcase - end - assign per_block_execute_if[0].ready = sfu_req_ready; - - // response arbitration - - VX_commit_if #( - .NUM_LANES (NUM_LANES) - ) arb_commit_if[BLOCK_SIZE](); - - VX_stream_arb #( - .NUM_INPUTS (RSP_ARB_SIZE), - .DATAW (RSP_ARB_DATAW), - .ARBITER ("R"), - .OUT_BUF (3) - ) rsp_arb ( - .clk (clk), - .reset (reset), - .valid_in (rsp_arb_valid_in), - .ready_in (rsp_arb_ready_in), - .data_in (rsp_arb_data_in), - .data_out (arb_commit_if[0].data), - .valid_out (arb_commit_if[0].valid), - .ready_out (arb_commit_if[0].ready), - `UNUSED_PIN (sel_out) + .commit_if (pe_commit_if[PE_IDX_CSRS]) ); VX_gather_unit #( @@ -181,9 +144,9 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( .NUM_LANES (NUM_LANES), .OUT_BUF (3) ) gather_unit ( - .clk (clk), - .reset (reset), - .commit_in_if (arb_commit_if), + .clk (clk), + .reset (reset), + .commit_in_if (per_block_commit_if), .commit_out_if (commit_if) ); From a17580375bcc8f82c01376a11be98823b3399565 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 2 Sep 2024 03:11:26 -0700 Subject: [PATCH 211/488] fpu timing optimization --- hw/rtl/fpu/VX_fpu_cvt.sv | 2 +- hw/rtl/fpu/VX_fpu_div.sv | 4 ++-- hw/rtl/fpu/VX_fpu_dsp.sv | 4 +--- hw/rtl/fpu/VX_fpu_fma.sv | 4 ++-- hw/rtl/fpu/VX_fpu_ncp.sv | 2 +- hw/rtl/fpu/VX_fpu_sqrt.sv | 4 ++-- 6 files changed, 9 insertions(+), 11 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index 94dee7316..b622f5153 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -64,7 +64,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index 79b91a1f5..b6cfeb632 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -67,8 +67,8 @@ module VX_fpu_div import VX_fpu_pkg::*; #( .DATA_IN_WIDTH(2*32), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), - .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs - .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) + .PE_REG (0), + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index 00b79ba21..1a6e944e2 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -111,9 +111,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( VX_stream_switch #( .DATAW (REQ_DATAW), - .NUM_INPUTS (1), - .NUM_OUTPUTS (NUM_FPCORES), - .OUT_BUF (0) + .NUM_OUTPUTS (NUM_FPCORES) ) req_switch ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index 3095846c1..30939be45 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -98,8 +98,8 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( .DATA_IN_WIDTH(3*32), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), - .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs - .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) + .PE_REG (1), // must be registered for DSPs + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index 52b2979b6..cccc09b97 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -69,7 +69,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index f6c542fc3..0ca7a02df 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -61,8 +61,8 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( .DATA_IN_WIDTH(32), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), - .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs - .OUT_BUF ((NUM_LANES != NUM_PES) ? 2 : 0) + .PE_REG (0), + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), From 33bec667c2c1214ddda910fd1d6e3f69eff20e63 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 2 Sep 2024 04:12:58 -0700 Subject: [PATCH 212/488] minor update --- hw/rtl/core/VX_alu_unit.sv | 2 +- hw/rtl/core/VX_pe_switch.sv | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 120ecd5f0..c853a5d60 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -67,7 +67,7 @@ module VX_alu_unit #( .NUM_LANES (NUM_LANES) ) pe_commit_if[PE_COUNT](); - reg [PE_SEL_BITS-1:0] pe_select; + reg [`UP(PE_SEL_BITS)-1:0] pe_select; always @(*) begin if (`EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV)) pe_select = PE_IDX_MDV; diff --git a/hw/rtl/core/VX_pe_switch.sv b/hw/rtl/core/VX_pe_switch.sv index 9c8d7a7b3..384fce329 100644 --- a/hw/rtl/core/VX_pe_switch.sv +++ b/hw/rtl/core/VX_pe_switch.sv @@ -18,11 +18,12 @@ module VX_pe_switch import VX_gpu_pkg::*; #( parameter NUM_LANES = 0, parameter REQ_OUT_BUF = 0, parameter RSP_OUT_BUF = 0, - parameter `STRING ARBITER = "R" + parameter `STRING ARBITER = "R", + parameter PE_SEL_BITS = `CLOG2(PE_COUNT) ) ( input wire clk, input wire reset, - input wire [PE_SEL_BITS-1:0] pe_sel, + input wire [`UP(PE_SEL_BITS)-1:0] pe_sel, VX_execute_if.slave execute_in_if, VX_commit_if.master commit_out_if, VX_execute_if.master execute_out_if[PE_COUNT], @@ -32,7 +33,6 @@ module VX_pe_switch import VX_gpu_pkg::*; #( localparam PID_WIDTH = `UP(PID_BITS); localparam REQ_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `INST_ALU_BITS + $bits(op_args_t) + 1 + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1; localparam RSP_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1; - localparam PE_SEL_BITS = `CLOG2(PE_COUNT); wire [PE_COUNT-1:0] pe_req_valid; wire [PE_COUNT-1:0][REQ_DATAW-1:0] pe_req_data; From d16aee3ecd75f6636852a478856fa97f92426ba2 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 2 Sep 2024 10:37:51 -0700 Subject: [PATCH 213/488] minor update --- hw/rtl/core/VX_alu_unit.sv | 3 +-- hw/rtl/core/VX_sfu_unit.sv | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index c853a5d60..f3e0b19e7 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -69,10 +69,9 @@ module VX_alu_unit #( reg [`UP(PE_SEL_BITS)-1:0] pe_select; always @(*) begin + pe_select = PE_IDX_INT; if (`EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV)) pe_select = PE_IDX_MDV; - else - pe_select = PE_IDX_INT; end VX_pe_switch #( diff --git a/hw/rtl/core/VX_sfu_unit.sv b/hw/rtl/core/VX_sfu_unit.sv index de0ce9fc4..93686ca55 100644 --- a/hw/rtl/core/VX_sfu_unit.sv +++ b/hw/rtl/core/VX_sfu_unit.sv @@ -77,10 +77,9 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( reg [PE_SEL_BITS-1:0] pe_select; always @(*) begin + pe_select = PE_IDX_WCTL; if (`INST_SFU_IS_CSR(per_block_execute_if[0].data.op_type)) pe_select = PE_IDX_CSRS; - else - pe_select = PE_IDX_WCTL; end VX_pe_switch #( From 45ed8abf22657e4750f5d48ff5d19de6fbd2f36c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 2 Sep 2024 19:39:28 -0700 Subject: [PATCH 214/488] minor update --- hw/rtl/libs/VX_stream_xbar.sv | 16 ++++++++-------- third_party/Makefile | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index d1b01125f..f2d9aa856 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -63,14 +63,6 @@ module VX_stream_xbar #( .data_out (per_output_ready_in_w) ); - VX_transpose #( - .N (NUM_INPUTS), - .M (NUM_OUTPUTS) - ) val_in_transpose ( - .data_in (per_output_valid_in), - .data_out (per_output_valid_in_w) - ); - for (genvar i = 0; i < NUM_INPUTS; ++i) begin VX_decoder #( .N (OUT_WIDTH) @@ -82,6 +74,14 @@ module VX_stream_xbar #( assign ready_in[i] = | per_output_ready_in_w[i]; end + VX_transpose #( + .N (NUM_INPUTS), + .M (NUM_OUTPUTS) + ) val_in_transpose ( + .data_in (per_output_valid_in), + .data_out (per_output_valid_in_w) + ); + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin VX_stream_arb #( .NUM_INPUTS (NUM_INPUTS), diff --git a/third_party/Makefile b/third_party/Makefile index a2f74264e..24905e58c 100644 --- a/third_party/Makefile +++ b/third_party/Makefile @@ -1,6 +1,6 @@ -all: fpnew softfloat ramulator +all: cvfpu softfloat ramulator -fpnew: +cvfpu: softfloat: SPECIALIZE_TYPE=RISCV SOFTFLOAT_OPTS="-fPIC -DSOFTFLOAT_ROUND_ODD -DINLINE_LEVEL=5 -DSOFTFLOAT_FAST_DIV32TO16 -DSOFTFLOAT_FAST_DIV64TO32" $(MAKE) -C softfloat/build/Linux-x86_64-GCC @@ -13,4 +13,4 @@ clean: $(MAKE) -C softfloat/build/Linux-x86_64-GCC clean rm -rf ramulator/build ramulator/libramulator.so -.PHONY: all fpnew softfloat ramulator \ No newline at end of file +.PHONY: all cvfpu softfloat ramulator \ No newline at end of file From c28449f51500388515c7458f8aa60f2dcba99651 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 2 Sep 2024 21:58:12 -0700 Subject: [PATCH 215/488] minor update --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d4ed68a59..5f61d06fd 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ More detailed build instructions can be found [here](docs/install_vortex.md). - [LLVM](https://llvm.org/) - [RISCV-GNU-TOOLCHAIN](https://github.com/riscv-collab/riscv-gnu-toolchain) - [Verilator](https://www.veripool.org/verilator) -- [FpNew](https://github.com/pulp-platform/fpnew.git) +- [cvfpu](https://github.com/openhwgroup/cvfpu.git) - [SoftFloat](https://github.com/ucb-bar/berkeley-softfloat-3.git) - [Ramulator](https://github.com/CMU-SAFARI/ramulator.git) - [Yosys](https://github.com/YosysHQ/yosys) From 19d614202327e5b4f610911f82e2ea293a6b1cb7 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 3 Sep 2024 04:54:29 -0700 Subject: [PATCH 216/488] fixed fpu serialization --- ci/regression.sh.in | 27 +++++++++------------------ hw/rtl/fpu/VX_fpu_cvt.sv | 24 +++++++++++++++++------- hw/rtl/fpu/VX_fpu_div.sv | 8 +++++--- hw/rtl/fpu/VX_fpu_fma.sv | 29 ++++++++++++++++------------- hw/rtl/fpu/VX_fpu_ncp.sv | 16 ++++++++++------ hw/rtl/fpu/VX_fpu_sqrt.sv | 6 ++++-- hw/rtl/libs/VX_pe_serializer.sv | 12 +----------- 7 files changed, 62 insertions(+), 60 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index aee991cd4..32e479c1e 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -41,31 +41,23 @@ isa() make -C tests/riscv/isa run-simx make -C tests/riscv/isa run-rtlsim - make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-32f + make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f - make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-32f + make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f - make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-32f + make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f if [ "$XLEN" == "64" ] then - make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-64d + make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d - make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-64d + make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d - make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-64f + make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f - make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-64f + make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f - make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-64fx + make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64fx fi # clean build @@ -257,8 +249,7 @@ config2() make -C tests/regression/dogfood clean-kernel # disabling M & F extensions - make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-32i + make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32i make -C sim/rtlsim clean # disabling ZICOND extension diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index b622f5153..5f9dc944c 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -46,21 +46,29 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( input wire ready_out, output wire valid_out ); - `UNUSED_VAR (frm) + localparam DATAW = 32 + `INST_FRM_BITS + 1 + 1; + wire [NUM_LANES-1:0][DATAW-1:0] data_in; wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; fflags_t [NUM_LANES-1:0] fflags_out; wire pe_enable; - wire [NUM_PES-1:0][31:0] pe_data_in; + wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; + for (genvar i = 0; i < NUM_LANES; ++i) begin + assign data_in[i][0 +: 32] = dataa[i]; + assign data_in[i][32 +: `INST_FRM_BITS] = frm; + assign data_in[i][32 + `INST_FRM_BITS +: 1] = is_itof; + assign data_in[i][32 + `INST_FRM_BITS + 1 +: 1] = is_signed; + end + VX_pe_serializer #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FCVT), - .DATA_IN_WIDTH(32), + .DATA_IN_WIDTH(DATAW), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), @@ -69,7 +77,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .clk (clk), .reset (reset), .valid_in (valid_in), - .data_in (dataa), + .data_in (data_in), .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), @@ -81,6 +89,8 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .ready_out (ready_out) ); + `UNUSED_VAR (pe_data_in) + for (genvar i = 0; i < NUM_LANES; ++i) begin assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; @@ -94,9 +104,9 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .clk (clk), .reset (reset), .enable (pe_enable), - .frm (frm), - .is_itof (is_itof), - .is_signed (is_signed), + .frm (pe_data_in[0][32 +: `INST_FRM_BITS]), + .is_itof (pe_data_in[0][32 + `INST_FRM_BITS +: 1]), + .is_signed (pe_data_in[0][32 + `INST_FRM_BITS + 1 +: 1]), .dataa (pe_data_in[i][0 +: 32]), .result (pe_data_out[i][0 +: 32]), .fflags (pe_data_out[i][32 +: `FP_FLAGS_BITS]) diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index b6cfeb632..6108b2580 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -46,13 +46,15 @@ module VX_fpu_div import VX_fpu_pkg::*; #( ); `UNUSED_VAR (frm) - wire [NUM_LANES-1:0][2*32-1:0] data_in; + localparam DATAW = 2 * 32; + + wire [NUM_LANES-1:0][DATAW-1:0] data_in; wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out; wire pe_enable; - wire [NUM_PES-1:0][2*32-1:0] pe_data_in; + wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; for (genvar i = 0; i < NUM_LANES; ++i) begin @@ -64,7 +66,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FDIV), - .DATA_IN_WIDTH(2*32), + .DATA_IN_WIDTH(DATAW), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index 30939be45..1bcc5d008 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -49,15 +49,15 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( input wire ready_out, output wire valid_out ); - `UNUSED_VAR (frm) + localparam DATAW = 3 * 32 + `INST_FRM_BITS; - wire [NUM_LANES-1:0][3*32-1:0] data_in; + wire [NUM_LANES-1:0][DATAW-1:0] data_in; wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out; wire pe_enable; - wire [NUM_PES-1:0][3*32-1:0] pe_data_in; + wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; reg [NUM_LANES-1:0][31:0] a, b, c; @@ -66,9 +66,9 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( always @(*) begin if (is_madd) begin // MADD / MSUB / NMADD / NMSUB - a[i] = is_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i]; + a[i] = {is_neg ^ dataa[i][31], dataa[i][30:0]}; b[i] = datab[i]; - c[i] = (is_neg ^ is_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i]; + c[i] = {is_neg ^ is_sub ^ datac[i][31], datac[i][30:0]}; end else begin if (is_neg) begin // MUL @@ -77,9 +77,9 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( c[i] = '0; end else begin // ADD / SUB - a[i] = 32'h3f800000; // 1.0f - b[i] = dataa[i]; - c[i] = is_sub ? {~datab[i][31], datab[i][30:0]} : datab[i]; + a[i] = dataa[i]; + b[i] = 32'h3f800000; // 1.0f + c[i] = {is_sub ^ datab[i][31], datab[i][30:0]}; end end end @@ -89,13 +89,14 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( assign data_in[i][0 +: 32] = a[i]; assign data_in[i][32 +: 32] = b[i]; assign data_in[i][64 +: 32] = c[i]; + assign data_in[i][96 +: `INST_FRM_BITS] = frm; end VX_pe_serializer #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FMA), - .DATA_IN_WIDTH(3*32), + .DATA_IN_WIDTH(DATAW), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (1), // must be registered for DSPs @@ -116,6 +117,8 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( .ready_out (ready_out) ); + `UNUSED_VAR (pe_data_in) + for (genvar i = 0; i < NUM_LANES; ++i) begin assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; @@ -177,10 +180,10 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( dpi_fmadd ( pe_enable, int'(0), - {32'hffffffff, pe_data_in[i][0 +: 32]}, - {32'hffffffff, pe_data_in[i][32 +: 32]}, - {32'hffffffff, pe_data_in[i][64 +: 32]}, - frm, + {32'hffffffff, pe_data_in[i][0 +: 32]}, // a + {32'hffffffff, pe_data_in[i][32 +: 32]}, // b + {32'hffffffff, pe_data_in[i][64 +: 32]}, // c + pe_data_in[0][96 +: `INST_FRM_BITS], // frm r, f ); diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index cccc09b97..3728c2932 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -45,27 +45,29 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( input wire ready_out, output wire valid_out ); - `UNUSED_VAR (frm) + localparam DATAW = 2 * 32 + `INST_FRM_BITS + `INST_FPU_BITS; - wire [NUM_LANES-1:0][2*32-1:0] data_in; + wire [NUM_LANES-1:0][DATAW-1:0] data_in; wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; fflags_t [NUM_LANES-1:0] fflags_out; wire pe_enable; - wire [NUM_PES-1:0][2*32-1:0] pe_data_in; + wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; for (genvar i = 0; i < NUM_LANES; ++i) begin assign data_in[i][0 +: 32] = dataa[i]; assign data_in[i][32 +: 32] = datab[i]; + assign data_in[i][64 +: `INST_FRM_BITS] = frm; + assign data_in[i][64 + `INST_FRM_BITS +: `INST_FPU_BITS] = op_type; end VX_pe_serializer #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FNCP), - .DATA_IN_WIDTH(2*32), + .DATA_IN_WIDTH(DATAW), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), @@ -86,6 +88,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( .ready_out (ready_out) ); + `UNUSED_VAR (pe_data_in) + for (genvar i = 0; i < NUM_LANES; ++i) begin assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; @@ -99,8 +103,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( .clk (clk), .reset (reset), .enable (pe_enable), - .frm (frm), - .op_type (op_type), + .frm (pe_data_in[0][64 +: `INST_FRM_BITS]), + .op_type (pe_data_in[0][64 + `INST_FRM_BITS +: `INST_FPU_BITS]), .dataa (pe_data_in[i][0 +: 32]), .datab (pe_data_in[i][32 +: 32]), .result (pe_data_out[i][0 +: 32]), diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index 0ca7a02df..c9d97af7f 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -46,19 +46,21 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `UNUSED_VAR (frm) + localparam DATAW = 32; + wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out; wire pe_enable; - wire [NUM_PES-1:0][31:0] pe_data_in; + wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; VX_pe_serializer #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FSQRT), - .DATA_IN_WIDTH(32), + .DATA_IN_WIDTH(DATAW), .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), diff --git a/hw/rtl/libs/VX_pe_serializer.sv b/hw/rtl/libs/VX_pe_serializer.sv index 2f9c83483..d96db52f0 100644 --- a/hw/rtl/libs/VX_pe_serializer.sv +++ b/hw/rtl/libs/VX_pe_serializer.sv @@ -77,17 +77,7 @@ module VX_pe_serializer #( .data_out (pe_data_out) ); - VX_pipe_register #( - .DATAW (1), - .RESETW (1), - .DEPTH (PE_REG) - ) pe_en_reg ( - .clk (clk), - .reset (reset), - .enable (1'b1), - .data_in (enable), - .data_out (pe_enable) - ); + assign pe_enable = enable; if (NUM_LANES != NUM_PES) begin From f9230bdac3f0095974b3876936259721991f4ec7 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 3 Sep 2024 06:14:09 -0700 Subject: [PATCH 217/488] minor update --- hw/rtl/fpu/VX_fpu_cvt.sv | 5 +++-- hw/rtl/fpu/VX_fpu_div.sv | 18 ++++++++++-------- hw/rtl/fpu/VX_fpu_fma.sv | 5 +++-- hw/rtl/fpu/VX_fpu_ncp.sv | 5 +++-- hw/rtl/fpu/VX_fpu_sqrt.sv | 22 ++++++++++++++-------- 5 files changed, 33 insertions(+), 22 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index 5f9dc944c..b3d1e099a 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -49,6 +49,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( localparam DATAW = 32 + `INST_FRM_BITS + 1 + 1; wire [NUM_LANES-1:0][DATAW-1:0] data_in; + wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; fflags_t [NUM_LANES-1:0] fflags_out; @@ -68,8 +69,8 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FCVT), - .DATA_IN_WIDTH(DATAW), - .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), + .DATA_IN_WIDTH (DATAW), + .DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), .OUT_BUF (2) diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index 6108b2580..1a1da2758 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -44,11 +44,10 @@ module VX_fpu_div import VX_fpu_pkg::*; #( output wire valid_out, input wire ready_out ); - `UNUSED_VAR (frm) - - localparam DATAW = 2 * 32; + localparam DATAW = 2 * 32 + `INST_FRM_BITS; wire [NUM_LANES-1:0][DATAW-1:0] data_in; + wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out; @@ -60,14 +59,15 @@ module VX_fpu_div import VX_fpu_pkg::*; #( for (genvar i = 0; i < NUM_LANES; ++i) begin assign data_in[i][0 +: 32] = dataa[i]; assign data_in[i][32 +: 32] = datab[i]; + assign data_in[i][64 +: `INST_FRM_BITS] = frm; end VX_pe_serializer #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FDIV), - .DATA_IN_WIDTH(DATAW), - .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), + .DATA_IN_WIDTH (DATAW), + .DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), .OUT_BUF (2) @@ -87,6 +87,8 @@ module VX_fpu_div import VX_fpu_pkg::*; #( .ready_out (ready_out) ); + `UNUSED_VAR (pe_data_in) + for (genvar i = 0; i < NUM_LANES; ++i) begin assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; @@ -145,9 +147,9 @@ module VX_fpu_div import VX_fpu_pkg::*; #( dpi_fdiv ( pe_enable, int'(0), - {32'hffffffff, pe_data_in[i][0 +: 32]}, - {32'hffffffff, pe_data_in[i][32 +: 32]}, - frm, + {32'hffffffff, pe_data_in[i][0 +: 32]}, // a + {32'hffffffff, pe_data_in[i][32 +: 32]}, // b + pe_data_in[0][64 +: `INST_FRM_BITS], // frm r, f ); diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index 1bcc5d008..ce09830d0 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -52,6 +52,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( localparam DATAW = 3 * 32 + `INST_FRM_BITS; wire [NUM_LANES-1:0][DATAW-1:0] data_in; + wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out; @@ -96,8 +97,8 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FMA), - .DATA_IN_WIDTH(DATAW), - .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), + .DATA_IN_WIDTH (DATAW), + .DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (1), // must be registered for DSPs .OUT_BUF (2) diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index 3728c2932..e39af4296 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -48,6 +48,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( localparam DATAW = 2 * 32 + `INST_FRM_BITS + `INST_FPU_BITS; wire [NUM_LANES-1:0][DATAW-1:0] data_in; + wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; fflags_t [NUM_LANES-1:0] fflags_out; @@ -67,8 +68,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FNCP), - .DATA_IN_WIDTH(DATAW), - .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), + .DATA_IN_WIDTH (DATAW), + .DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), .OUT_BUF (2) diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index c9d97af7f..557e21f20 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -43,10 +43,9 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( input wire ready_out, output wire valid_out ); + localparam DATAW = 32 + `INST_FRM_BITS; - `UNUSED_VAR (frm) - - localparam DATAW = 32; + wire [NUM_LANES-1:0][DATAW-1:0] data_in; wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; @@ -56,12 +55,17 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; + for (genvar i = 0; i < NUM_LANES; ++i) begin + assign data_in[i][0 +: 32] = dataa[i]; + assign data_in[i][32 +: `INST_FRM_BITS] = frm; + end + VX_pe_serializer #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FSQRT), - .DATA_IN_WIDTH(DATAW), - .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), + .DATA_IN_WIDTH (DATAW), + .DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), .OUT_BUF (2) @@ -69,7 +73,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( .clk (clk), .reset (reset), .valid_in (valid_in), - .data_in (dataa), + .data_in (data_in), .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), @@ -81,6 +85,8 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( .ready_out (ready_out) ); + `UNUSED_VAR (pe_data_in) + for (genvar i = 0; i < NUM_LANES; ++i) begin assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; @@ -137,8 +143,8 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( dpi_fsqrt ( pe_enable, int'(0), - {32'hffffffff, pe_data_in[i]}, - frm, + {32'hffffffff, pe_data_in[i][0 +: 32]}, // a + pe_data_in[0][32 +: `INST_FRM_BITS], // frm r, f ); From 335b53475a4cd37151d3e06884445f556fa7bc69 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 02:01:59 -0700 Subject: [PATCH 218/488] minor updates --- hw/rtl/fpu/VX_fpu_dsp.sv | 208 +++++++++++++++++++++----------- hw/rtl/fpu/VX_fpu_fma.sv | 2 +- hw/rtl/libs/VX_mem_coalescer.sv | 12 +- hw/rtl/libs/VX_rr_arbiter.sv | 2 +- hw/rtl/libs/VX_stream_buffer.sv | 6 +- hw/rtl/libs/VX_stream_unpack.sv | 12 +- hw/rtl/mem/VX_local_mem.sv | 2 +- 7 files changed, 152 insertions(+), 92 deletions(-) diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index 1a6e944e2..c12c82d87 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -61,6 +61,8 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire [NUM_FPCORES-1:0] per_core_valid_in; wire [NUM_FPCORES-1:0][REQ_DATAW-1:0] per_core_data_in; + wire [NUM_FPCORES-1:0] per_core_ready_in; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0] per_core_mask_in; wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_in; wire [NUM_FPCORES-1:0][`INST_FPU_BITS-1:0] per_core_op_type; @@ -69,28 +71,13 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_dataa; wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_datab; wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_datac; - wire [NUM_FPCORES-1:0] per_core_ready_in; + wire [NUM_FPCORES-1:0] per_core_valid_out; wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_result; wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_out; - wire [NUM_FPCORES-1:0] per_core_ready_out; - wire [NUM_FPCORES-1:0] per_core_valid_out; wire [NUM_FPCORES-1:0] per_core_has_fflags; fflags_t [NUM_FPCORES-1:0] per_core_fflags; - - wire [1:0] div_sqrt_ready_in; - wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_result; - wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_out; - wire [1:0] div_sqrt_ready_out; - wire [1:0] div_sqrt_valid_out; - wire [1:0] div_sqrt_has_fflags; - fflags_t [1:0] div_sqrt_fflags; - - `RESET_RELAY (fma_reset, reset); - `RESET_RELAY (div_reset, reset); - `RESET_RELAY (sqrt_reset, reset); - `RESET_RELAY (cvt_reset, reset); - `RESET_RELAY (ncp_reset, reset); + wire [NUM_FPCORES-1:0] per_core_ready_out; wire [NUM_LANES-1:0][31:0] dataa_s; wire [NUM_LANES-1:0][31:0] datab_s; @@ -118,7 +105,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .sel_in (core_select), .valid_in (valid_in), .ready_in (ready_in), - .data_in ({mask_in, tag_in, op_type, fmt, frm, dataa_s, datab_s, datac_s}), + .data_in ({mask_in, tag_in, fmt, frm, dataa_s, datab_s, datac_s, op_type}), .data_out (per_core_data_in), .valid_out (per_core_valid_in), .ready_out (per_core_ready_in) @@ -128,21 +115,23 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( assign { per_core_mask_in[i], per_core_tag_in[i], - per_core_op_type[i], per_core_fmt[i], per_core_frm[i], per_core_dataa[i], per_core_datab[i], - per_core_datac[i] + per_core_datac[i], + per_core_op_type[i] } = per_core_data_in[i]; end - // FMA core + // FMA core /////////////////////////////////////////////////////////////// wire is_madd = per_core_op_type[FPU_FMA][1]; wire is_neg = per_core_op_type[FPU_FMA][0]; wire is_sub = per_core_fmt[FPU_FMA][1]; + `RESET_RELAY (fma_reset, reset); + VX_fpu_fma #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH) @@ -168,24 +157,95 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .valid_out (per_core_valid_out[FPU_FMA]) ); - // Div/Sqrt cores + // Div/Sqrt cores ///////////////////////////////////////////////////////// - wire is_sqrt = per_core_op_type[FPU_DIVSQRT][0]; - assign per_core_ready_in[FPU_DIVSQRT] = div_sqrt_ready_in[is_sqrt]; + wire [1:0] div_sqrt_valid_in; + wire [1:0][REQ_DATAW-1:0] div_sqrt_data_in; + wire [1:0] div_sqrt_ready_in; + + wire [1:0][NUM_LANES-1:0] div_sqrt_mask_in; + wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_in; + wire [1:0][`INST_FPU_BITS-1:0] div_sqrt_op_type; + wire [1:0][`INST_FMT_BITS-1:0] div_sqrt_fmt; + wire [1:0][`INST_FRM_BITS-1:0] div_sqrt_frm; + wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_dataa; + wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_datab; + wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_datac; + + wire [1:0] div_sqrt_valid_out; + wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_result; + wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_out; + wire [1:0] div_sqrt_has_fflags; + fflags_t [1:0] div_sqrt_fflags; + wire [1:0] div_sqrt_ready_out; + + wire div_sqrt_valid_tmp_in; + wire [REQ_DATAW-1:0] div_sqrt_data_tmp_in; + wire div_sqrt_ready_tmp_in; + + VX_elastic_buffer #( + .DATAW (REQ_DATAW) + ) div_sqrt_req_buffer ( + .clk (clk), + .reset (reset), + .valid_in (per_core_valid_in[FPU_DIVSQRT]), + .ready_in (per_core_ready_in[FPU_DIVSQRT]), + .data_in (per_core_data_in[FPU_DIVSQRT]), + .data_out (div_sqrt_data_tmp_in), + .valid_out (div_sqrt_valid_tmp_in), + .ready_out (div_sqrt_ready_tmp_in) + ); + + wire is_sqrt = div_sqrt_data_tmp_in[0]; // op_type[0] + + VX_stream_switch #( + .DATAW (REQ_DATAW), + .NUM_OUTPUTS (2) + ) div_sqrt_req_switch ( + .clk (clk), + .reset (reset), + .sel_in (is_sqrt), + .valid_in (div_sqrt_valid_tmp_in), + .ready_in (div_sqrt_ready_tmp_in), + .data_in (div_sqrt_data_tmp_in), + .data_out (div_sqrt_data_in), + .valid_out (div_sqrt_valid_in), + .ready_out (div_sqrt_ready_in) + ); + + for (genvar i = 0; i < 2; ++i) begin + assign { + div_sqrt_mask_in[i], + div_sqrt_tag_in[i], + div_sqrt_fmt[i], + div_sqrt_frm[i], + div_sqrt_dataa[i], + div_sqrt_datab[i], + div_sqrt_datac[i], + div_sqrt_op_type[i] + } = div_sqrt_data_in[i]; + end + + `UNUSED_VAR (div_sqrt_op_type) + `UNUSED_VAR (div_sqrt_fmt) + `UNUSED_VAR (div_sqrt_datab) + `UNUSED_VAR (div_sqrt_datac) + + `RESET_RELAY (div_sqrt_reset, reset); VX_fpu_div #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH) ) fpu_div ( .clk (clk), - .reset (div_reset), - .valid_in (per_core_valid_in[FPU_DIVSQRT] && ~is_sqrt), + .reset (div_sqrt_reset), + .valid_in (div_sqrt_valid_in[0]), .ready_in (div_sqrt_ready_in[0]), - .mask_in (per_core_mask_in[FPU_DIVSQRT]), - .tag_in (per_core_tag_in[FPU_DIVSQRT]), - .frm (per_core_frm[FPU_DIVSQRT]), - .dataa (per_core_dataa[FPU_DIVSQRT]), - .datab (per_core_datab[FPU_DIVSQRT]), + .mask_in (div_sqrt_mask_in[0]), + .tag_in (div_sqrt_tag_in[0]), + .frm (div_sqrt_frm[0]), + .dataa (div_sqrt_dataa[0]), + .datab (div_sqrt_datab[0]), .has_fflags (div_sqrt_has_fflags[0]), .fflags (div_sqrt_fflags[0]), .result (div_sqrt_result[0]), @@ -199,13 +259,13 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .TAG_WIDTH (TAG_WIDTH) ) fpu_sqrt ( .clk (clk), - .reset (sqrt_reset), - .valid_in (per_core_valid_in[FPU_DIVSQRT] && is_sqrt), + .reset (div_sqrt_reset), + .valid_in (div_sqrt_valid_in[1]), .ready_in (div_sqrt_ready_in[1]), - .mask_in (per_core_mask_in[FPU_DIVSQRT]), - .tag_in (per_core_tag_in[FPU_DIVSQRT]), - .frm (per_core_frm[FPU_DIVSQRT]), - .dataa (per_core_dataa[FPU_DIVSQRT]), + .mask_in (div_sqrt_mask_in[1]), + .tag_in (div_sqrt_tag_in[1]), + .frm (div_sqrt_frm[1]), + .dataa (div_sqrt_dataa[1]), .has_fflags (div_sqrt_has_fflags[1]), .fflags (div_sqrt_fflags[1]), .result (div_sqrt_result[1]), @@ -214,13 +274,47 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .ready_out (div_sqrt_ready_out[1]) ); - // CVT core + wire [1:0][RSP_DATAW-1:0] div_sqrt_arb_data_in; + for (genvar i = 0; i < 2; ++i) begin + assign div_sqrt_arb_data_in[i] = { + div_sqrt_result[i], + div_sqrt_has_fflags[i], + div_sqrt_fflags[i], + div_sqrt_tag_out[i] + }; + end + + VX_stream_arb #( + .NUM_INPUTS (2), + .DATAW (RSP_DATAW), + .ARBITER ("P"), + .OUT_BUF (0) + ) div_sqrt_rsp_arb ( + .clk (clk), + .reset (reset), + .valid_in (div_sqrt_valid_out), + .ready_in (div_sqrt_ready_out), + .data_in (div_sqrt_arb_data_in), + .data_out ({ + per_core_result[FPU_DIVSQRT], + per_core_has_fflags[FPU_DIVSQRT], + per_core_fflags[FPU_DIVSQRT], + per_core_tag_out[FPU_DIVSQRT] + }), + .valid_out (per_core_valid_out[FPU_DIVSQRT]), + .ready_out (per_core_ready_out[FPU_DIVSQRT]), + `UNUSED_PIN (sel_out) + ); + + // CVT core /////////////////////////////////////////////////////////////// wire is_itof = per_core_op_type[FPU_CVT][1]; wire is_signed = ~per_core_op_type[FPU_CVT][0]; wire cvt_ret_int_in = ~is_itof; wire cvt_ret_int_out; + `RESET_RELAY (cvt_reset, reset); + VX_fpu_cvt #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (1+TAG_WIDTH) @@ -243,7 +337,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .ready_out (per_core_ready_out[FPU_CVT]) ); - // NCP core + // NCP core /////////////////////////////////////////////////////////////// wire ncp_ret_int_in = (per_core_op_type[FPU_NCP] == `INST_FPU_CMP) || `INST_FPU_IS_CLASS(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]) @@ -253,6 +347,8 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]); wire ncp_ret_sext_out; + `RESET_RELAY (ncp_reset, reset); + VX_fpu_ncp #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH+2) @@ -277,40 +373,6 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( /////////////////////////////////////////////////////////////////////////// - wire [1:0][RSP_DATAW-1:0] div_sqrt_arb_data_in; - for (genvar i = 0; i < 2; ++i) begin - assign div_sqrt_arb_data_in[i] = { - div_sqrt_result[i], - div_sqrt_has_fflags[i], - div_sqrt_fflags[i], - div_sqrt_tag_out[i] - }; - end - - VX_stream_arb #( - .NUM_INPUTS (2), - .DATAW (RSP_DATAW), - .ARBITER ("P"), - .OUT_BUF (0) - ) div_sqrt_arb ( - .clk (clk), - .reset (reset), - .valid_in (div_sqrt_valid_out), - .ready_in (div_sqrt_ready_out), - .data_in (div_sqrt_arb_data_in), - .data_out ({ - per_core_result[FPU_DIVSQRT], - per_core_has_fflags[FPU_DIVSQRT], - per_core_fflags[FPU_DIVSQRT], - per_core_tag_out[FPU_DIVSQRT] - }), - .valid_out (per_core_valid_out[FPU_DIVSQRT]), - .ready_out (per_core_ready_out[FPU_DIVSQRT]), - `UNUSED_PIN (sel_out) - ); - - /////////////////////////////////////////////////////////////////////////// - reg [NUM_FPCORES-1:0][RSP_DATAW+2-1:0] per_core_data_out; always @(*) begin diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index ce09830d0..8ab5b10b3 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -100,7 +100,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( .DATA_IN_WIDTH (DATAW), .DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), - .PE_REG (1), // must be registered for DSPs + .PE_REG (0), .OUT_BUF (2) ) pe_serializer ( .clk (clk), diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 5c283e06c..32ad728b8 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -159,13 +159,11 @@ module VX_mem_coalescer #( req_data_merged = 'x; for (integer i = 0; i < OUT_REQS; ++i) begin for (integer j = 0; j < DATA_RATIO; ++j) begin - if (current_pmask[i * DATA_RATIO + j]) begin - for (integer k = 0; k < DATA_IN_SIZE; ++k) begin - // perform byte-level merge since each thread may have different bytes enabled - if (in_req_byteen[DATA_RATIO * i + j][k]) begin - req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1; - req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8]; - end + for (integer k = 0; k < DATA_IN_SIZE; ++k) begin + // perform byte-level merge since each thread may have different bytes enabled + if (current_pmask[i * DATA_RATIO + j] && in_req_byteen[DATA_RATIO * i + j][k]) begin + req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1; + req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8]; end end end diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index a2a9a9654..894f4e312 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -434,7 +434,7 @@ module VX_rr_arbiter #( wire has_masked_reqs = (| masked_reqs); wire has_unmasked_reqs = (| requests); - assign grant_onehot = ({NUM_REQS{~has_masked_reqs}} & grant_unmasked) | grant_masked; + assign grant_onehot = has_masked_reqs ? grant_masked : grant_unmasked; always @(posedge clk) begin if (reset) begin diff --git a/hw/rtl/libs/VX_stream_buffer.sv b/hw/rtl/libs/VX_stream_buffer.sv index 81978b735..5e8297f7a 100644 --- a/hw/rtl/libs/VX_stream_buffer.sv +++ b/hw/rtl/libs/VX_stream_buffer.sv @@ -52,16 +52,16 @@ module VX_stream_buffer #( reg no_buffer; wire fire_in = valid_in && ready_in; - wire flow_out = ready_out || ~valid_out_r; + wire flow_out = ready_out || ~valid_out; always @(posedge clk) begin if (reset) begin valid_out_r <= 0; no_buffer <= 1; end else begin - if (ready_out) begin + if (flow_out) begin no_buffer <= 1; - end else if (valid_in && valid_out) begin + end else if (valid_in) begin no_buffer <= 0; end if (flow_out) begin diff --git a/hw/rtl/libs/VX_stream_unpack.sv b/hw/rtl/libs/VX_stream_unpack.sv index cb85d4804..30e2a444f 100644 --- a/hw/rtl/libs/VX_stream_unpack.sv +++ b/hw/rtl/libs/VX_stream_unpack.sv @@ -38,18 +38,18 @@ module VX_stream_unpack #( ); if (NUM_REQS > 1) begin - reg [NUM_REQS-1:0] rem_mask; + reg [NUM_REQS-1:0] rem_mask_r; wire [NUM_REQS-1:0] ready_out_w; - wire [NUM_REQS-1:0] rem_mask_n = rem_mask & ~ready_out_w; - wire sent_all = ~(| (mask_in & rem_mask_n)); + wire [NUM_REQS-1:0] rem_mask_n = rem_mask_r & ~ready_out_w; + wire sent_all = (mask_in & rem_mask_n) == '0; always @(posedge clk) begin if (reset) begin - rem_mask <= '1; + rem_mask_r <= {NUM_REQS{1'b1}}; end else begin if (valid_in) begin - rem_mask <= sent_all ? '1 : rem_mask_n; + rem_mask_r <= {NUM_REQS{sent_all}} | rem_mask_n; end end end @@ -64,7 +64,7 @@ module VX_stream_unpack #( ) out_buf ( .clk (clk), .reset (reset), - .valid_in (valid_in && mask_in[i] && rem_mask[i]), + .valid_in (valid_in && mask_in[i] && rem_mask_r[i]), .ready_in (ready_out_w[i]), .data_in ({data_in[i], tag_in}), .data_out ({data_out[i], tag_out[i]}), diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 462103c09..6dbe28325 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -123,7 +123,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .NUM_OUTPUTS (NUM_BANKS), .DATAW (REQ_DATAW), .PERF_CTR_BITS (`PERF_CTR_BITS), - .ARBITER ("C"), + .ARBITER ("P"), .OUT_BUF (3) // output should be registered for the data_store addressing ) req_xbar ( .clk (clk), From fd5903fef1c79165cf9102af9d9d678f51724261 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 03:34:25 -0700 Subject: [PATCH 219/488] minor update --- hw/rtl/afu/opae/vortex_afu.sv | 2 +- hw/syn/xilinx/dut/top/Makefile | 16 ---------------- sim/opaesim/opae_sim.cpp | 19 ++++--------------- sim/rtlsim/processor.cpp | 8 -------- sim/xrtsim/Makefile | 15 --------------- sim/xrtsim/xrt_sim.cpp | 11 ----------- 6 files changed, 5 insertions(+), 66 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 61465103e..5dcb9a430 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -1011,7 +1011,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire mem_rsp_fire = mem_bus_if[0].rsp_valid && mem_bus_if[0].rsp_ready; wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0]; wire avs_read_fire = avs_read[0] && ~avs_waitrequest[0]; - wire [$bits(t_local_mem_addr)-1:0] mem_bus_if_addr = mem_bus_if[0].req_data.addr; + wire [LMEM_ADDR_WIDTH-1:0] mem_bus_if_addr = mem_bus_if[0].req_data.addr; reg [STATE_WIDTH-1:0] state_prev; always @(posedge clk) begin diff --git a/hw/syn/xilinx/dut/top/Makefile b/hw/syn/xilinx/dut/top/Makefile index 3a06715b5..c471b7807 100644 --- a/hw/syn/xilinx/dut/top/Makefile +++ b/hw/syn/xilinx/dut/top/Makefile @@ -5,22 +5,6 @@ FPU_IP = 1 include ../../common.mk -# AFU parameters -CONFIGS += -DNOPAE -CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 -endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 -endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 -endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 -endif - #CONFIGS += -DNUM_CORES=2 #CONFIGS += -DNUM_WARPS=32 #CONFIGS += -DNUM_THREADS=32 diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 7a1bae3e4..9cf185abf 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -35,21 +35,10 @@ #include #include -//#ifndef MEMORY_BANKS - #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #else - #define MEMORY_BANKS 2 - #endif -//#endif - #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 #endif -#undef MEM_BLOCK_SIZE -#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8) - #define CACHE_BLOCK_SIZE 64 #define CCI_LATENCY 8 @@ -419,7 +408,7 @@ private: } void avs_bus_reset() { - for (int b = 0; b < MEMORY_BANKS; ++b) { + for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) { pending_mem_reqs_[b].clear(); device_->avs_readdatavalid[b] = 0; device_->avs_waitrequest[b] = 0; @@ -427,7 +416,7 @@ private: } void avs_bus_eval() { - for (int b = 0; b < MEMORY_BANKS; ++b) { + for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) { // process memory responses device_->avs_readdatavalid[b] = 0; if (!pending_mem_reqs_[b].empty() @@ -443,7 +432,7 @@ private: // process memory requests assert(!device_->avs_read[b] || !device_->avs_write[b]); - unsigned byte_addr = (device_->avs_address[b] * MEMORY_BANKS + b) * MEM_BLOCK_SIZE; + unsigned byte_addr = (device_->avs_address[b] * PLATFORM_PARAM_LOCAL_MEMORY_BANKS + b) * MEM_BLOCK_SIZE; if (device_->avs_write[b]) { uint64_t byteen = device_->avs_byteenable[b]; uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data()); @@ -530,7 +519,7 @@ private: std::unordered_map host_buffers_; int64_t host_buffer_ids_; - std::list pending_mem_reqs_[MEMORY_BANKS]; + std::list pending_mem_reqs_[PLATFORM_PARAM_LOCAL_MEMORY_BANKS]; std::list cci_reads_; std::list cci_writes_; diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index e5e00f49e..d964a3d5a 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -41,14 +41,6 @@ typedef VVortex Device; #include #include -#ifndef MEMORY_BANKS - #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #else - #define MEMORY_BANKS 2 - #endif -#endif - #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 #endif diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index e45b0bfa2..63787e5b6 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -32,21 +32,6 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED -# AFU parameters -CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 -endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 -endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 -endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 -endif - DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 880983bf1..80aed7f7d 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -33,21 +33,10 @@ #include #include -#ifndef MEMORY_BANKS - #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #else - #define MEMORY_BANKS 2 - #endif -#endif - #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 #endif -#undef MEM_BLOCK_SIZE -#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8) - #define CACHE_BLOCK_SIZE 64 #ifndef TRACE_START_TIME From 32738e0b74e203827d3de1731e353c428bc42a28 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 03:39:29 -0700 Subject: [PATCH 220/488] CI script update --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 724ec2a13..64317337b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -106,7 +106,7 @@ jobs: make tests -s > /dev/null - name: Upload Build Artifact - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: build-${{ matrix.xlen }} path: build${{ matrix.xlen }} @@ -147,7 +147,7 @@ jobs: ${{ runner.os }}-thirdparty- - name: Download Build Artifact - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: build-${{ matrix.xlen }} path: build${{ matrix.xlen }} From 039e5e2ffce77ee473a05f14bc35fa8c486d6e23 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 03:52:55 -0700 Subject: [PATCH 221/488] minor update --- runtime/opae/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/runtime/opae/Makefile b/runtime/opae/Makefile index 9650915ea..56355890d 100644 --- a/runtime/opae/Makefile +++ b/runtime/opae/Makefile @@ -25,9 +25,10 @@ SRCS = $(SRC_DIR)/vortex.cpp $(SRC_DIR)/driver.cpp # set up target types ifeq ($(TARGET), opaesim) - OPAESIM = $(DESTDIR)/libopae-c-sim.so + BUILD_DEPS = $(DESTDIR)/libopae-c-sim.so CXXFLAGS += -DOPAESIM -I$(SIM_DIR)/opaesim else + BUILD_DEPS = $(ROOT_DIR)/hw/syn/altera/opae/vortex_afu.h CXXFLAGS += -I$(SYN_DIR) -I$(ROOT_DIR)/hw/syn/altera/opae endif @@ -48,12 +49,15 @@ PROJECT := libvortex-opae.so all: $(DESTDIR)/$(PROJECT) +$(ROOT_DIR)/hw/syn/altera/opae/vortex_afu.h: + $(MAKE) -C $(ROOT_DIR)/hw/syn/altera/opae swconfig + driver: $(DESTDIR)/libopae-c-sim.so $(DESTDIR)/libopae-c-sim.so: DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/opaesim $(DESTDIR)/libopae-c-sim.so -$(DESTDIR)/$(PROJECT): $(SRCS) $(OPAESIM) +$(DESTDIR)/$(PROJECT): $(SRCS) $(BUILD_DEPS) $(CXX) $(CXXFLAGS) $(SRCS) $(LDFLAGS) -o $@ clean-driver: From 7ca9a5e87e3b9b1a46f7a994ee56fd82d8c6b3b9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 13:39:51 -0700 Subject: [PATCH 222/488] reset relay refactory --- hw/rtl/afu/opae/vortex_afu.sv | 12 +++--------- hw/rtl/cache/VX_cache.sv | 12 +++--------- hw/rtl/cache/VX_cache_cluster.sv | 5 +---- hw/rtl/core/VX_alu_unit.sv | 8 +++----- hw/rtl/core/VX_fpu_unit.sv | 14 ++++++-------- hw/rtl/core/VX_issue.sv | 4 +--- hw/rtl/core/VX_issue_slice.sv | 15 +++++---------- hw/rtl/core/VX_lsu_slice.sv | 4 +--- hw/rtl/core/VX_lsu_unit.sv | 5 +---- hw/rtl/core/VX_mem_unit.sv | 11 +++-------- hw/rtl/core/VX_operands.sv | 8 ++------ hw/rtl/core/VX_schedule.sv | 8 ++------ hw/rtl/core/VX_sfu_unit.sv | 8 ++------ hw/rtl/fpu/VX_fpu_dsp.sv | 16 ++++------------ hw/rtl/libs/VX_stream_unpack.sv | 6 +++--- hw/rtl/mem/VX_local_mem.sv | 8 ++------ 16 files changed, 42 insertions(+), 102 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 5dcb9a430..e5ff16483 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -475,8 +475,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .TAG_WIDTH (AVS_REQ_TAGW) ) cci_vx_mem_bus_if[2](); - `RESET_RELAY (cci_adapter_reset, reset); - VX_mem_adapter #( .SRC_DATA_WIDTH (CCI_DATA_WIDTH), .DST_DATA_WIDTH (LMEM_DATA_WIDTH), @@ -488,7 +486,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .RSP_OUT_BUF (0) ) cci_mem_adapter ( .clk (clk), - .reset (cci_adapter_reset), + .reset (reset), .mem_req_valid_in (cci_mem_req_valid), .mem_req_addr_in (cci_mem_req_addr), @@ -527,8 +525,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ assign vx_mem_req_valid_qual = vx_mem_req_valid && ~vx_mem_is_cout; - `RESET_RELAY (vx_adapter_reset, reset); - VX_mem_adapter #( .SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH), .DST_DATA_WIDTH (LMEM_DATA_WIDTH), @@ -540,7 +536,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .RSP_OUT_BUF (2) ) vx_mem_adapter ( .clk (clk), - .reset (vx_adapter_reset), + .reset (reset), .mem_req_valid_in (vx_mem_req_valid_qual), .mem_req_addr_in (vx_mem_req_addr), @@ -595,8 +591,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ //-- - `RESET_RELAY (avs_adapter_reset, reset); - VX_avs_adapter #( .DATA_WIDTH (LMEM_DATA_WIDTH), .ADDR_WIDTH (LMEM_ADDR_WIDTH), @@ -608,7 +602,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .RSP_OUT_BUF (0) ) avs_adapter ( .clk (clk), - .reset (avs_adapter_reset), + .reset (reset), // Memory request .mem_req_valid (mem_bus_if[0].req_valid), diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 6d3e1351e..ebb5d1519 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -319,8 +319,6 @@ module VX_cache import VX_gpu_pkg::*; #( wire [`PERF_CTR_BITS-1:0] perf_collisions; `endif - `RESET_RELAY (req_xbar_reset, reset); - VX_stream_xbar #( .NUM_INPUTS (NUM_REQS), .NUM_OUTPUTS (NUM_BANKS), @@ -330,7 +328,7 @@ module VX_cache import VX_gpu_pkg::*; #( .OUT_BUF (REQ_XBAR_BUF) ) req_xbar ( .clk (clk), - .reset (req_xbar_reset), + .reset (reset), `ifdef PERF_ENABLE .collisions(perf_collisions), `else @@ -369,8 +367,6 @@ module VX_cache import VX_gpu_pkg::*; #( assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == bank_id); end - `RESET_RELAY (bank_reset, reset); - VX_cache_bank #( .BANK_ID (bank_id), .INSTANCE_ID ($sformatf("%s-bank%0d", INSTANCE_ID, bank_id)), @@ -392,7 +388,7 @@ module VX_cache import VX_gpu_pkg::*; #( .MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF)) ) bank ( .clk (clk), - .reset (bank_reset), + .reset (reset), `ifdef PERF_ENABLE .perf_read_misses (perf_read_miss_per_bank[bank_id]), @@ -455,8 +451,6 @@ module VX_cache import VX_gpu_pkg::*; #( assign core_rsp_data_in[i] = {per_bank_core_rsp_data[i], per_bank_core_rsp_tag[i]}; end - `RESET_RELAY (rsp_xbar_reset, reset); - VX_stream_xbar #( .NUM_INPUTS (NUM_BANKS), .NUM_OUTPUTS (NUM_REQS), @@ -464,7 +458,7 @@ module VX_cache import VX_gpu_pkg::*; #( .ARBITER ("R") ) rsp_xbar ( .clk (clk), - .reset (rsp_xbar_reset), + .reset (reset), `UNUSED_PIN (collisions), .valid_in (per_bank_core_rsp_valid), .data_in (core_rsp_data_in), diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index dbf4ffec7..7173444ec 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -139,9 +139,6 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( end for (genvar i = 0; i < NUM_CACHES; ++i) begin : caches - - `RESET_RELAY (cache_reset, reset); - VX_cache_wrap #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)), .CACHE_SIZE (CACHE_SIZE), @@ -169,7 +166,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .cache_perf (perf_cache_unit[i]), `endif .clk (clk), - .reset (cache_reset), + .reset (reset), .core_bus_if (arb_core_bus_if[i * NUM_REQS +: NUM_REQS]), .mem_bus_if (cache_mem_bus_if[i]) ); diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index f3e0b19e7..8ec044eeb 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -57,8 +57,6 @@ module VX_alu_unit #( for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : alus - `RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1)); - VX_execute_if #( .NUM_LANES (NUM_LANES) ) pe_execute_if[PE_COUNT](); @@ -82,7 +80,7 @@ module VX_alu_unit #( .RSP_OUT_BUF (PARTIAL_BW ? 1 : 3) ) pe_switch ( .clk (clk), - .reset (block_reset), + .reset (reset), .pe_sel (pe_select), .execute_in_if (per_block_execute_if[block_idx]), .commit_out_if (per_block_commit_if[block_idx]), @@ -96,7 +94,7 @@ module VX_alu_unit #( .NUM_LANES (NUM_LANES) ) alu_int ( .clk (clk), - .reset (block_reset), + .reset (reset), .execute_if (pe_execute_if[PE_IDX_INT]), .branch_ctl_if (branch_ctl_if[block_idx]), .commit_if (pe_commit_if[PE_IDX_INT]) @@ -108,7 +106,7 @@ module VX_alu_unit #( .NUM_LANES (NUM_LANES) ) muldiv_unit ( .clk (clk), - .reset (block_reset), + .reset (reset), .execute_if (pe_execute_if[PE_IDX_MDV]), .commit_if (pe_commit_if[PE_IDX_MDV]) ); diff --git a/hw/rtl/core/VX_fpu_unit.sv b/hw/rtl/core/VX_fpu_unit.sv index 0d7f02311..10e5c236b 100644 --- a/hw/rtl/core/VX_fpu_unit.sv +++ b/hw/rtl/core/VX_fpu_unit.sv @@ -57,8 +57,6 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( `UNUSED_VAR (per_block_execute_if[block_idx].data.tid) `UNUSED_VAR (per_block_execute_if[block_idx].data.wb) - `RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1)); - // Store request info wire fpu_req_valid, fpu_req_ready; wire fpu_rsp_valid, fpu_rsp_ready; @@ -89,7 +87,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .SIZE (`FPUQ_SIZE) ) tag_store ( .clk (clk), - .reset (block_reset), + .reset (reset), .acquire_en (execute_fire), .write_addr (fpu_req_tag), .write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}), @@ -132,7 +130,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .OUT_BUF (PARTIAL_BW ? 1 : 3) ) fpu_dpi ( .clk (clk), - .reset (block_reset), + .reset (reset), .valid_in (fpu_req_valid), .mask_in (per_block_execute_if[block_idx].data.tmask), @@ -161,7 +159,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .OUT_BUF (PARTIAL_BW ? 1 : 3) ) fpu_fpnew ( .clk (clk), - .reset (block_reset), + .reset (reset), .valid_in (fpu_req_valid), .mask_in (per_block_execute_if[block_idx].data.tmask), @@ -190,7 +188,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .OUT_BUF (PARTIAL_BW ? 1 : 3) ) fpu_dsp ( .clk (clk), - .reset (block_reset), + .reset (reset), .valid_in (fpu_req_valid), .mask_in (per_block_execute_if[block_idx].data.tmask), @@ -219,7 +217,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( if (PID_BITS != 0) begin fflags_t fpu_rsp_fflags_r; always @(posedge clk) begin - if (block_reset) begin + if (reset) begin fpu_rsp_fflags_r <= '0; end else if (fpu_rsp_fire) begin fpu_rsp_fflags_r <= fpu_rsp_eop ? '0 : (fpu_rsp_fflags_r | fpu_rsp_fflags); @@ -253,7 +251,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .SIZE (0) ) rsp_buf ( .clk (clk), - .reset (block_reset), + .reset (reset), .valid_in (fpu_rsp_valid), .ready_in (fpu_rsp_ready), .data_in ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_result, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}), diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index 5d5af64d9..a0f223ff5 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -77,15 +77,13 @@ module VX_issue import VX_gpu_pkg::*; #( assign decode_if.ibuf_pop[issue_id * PER_ISSUE_WARPS +: PER_ISSUE_WARPS] = per_issue_decode_if.ibuf_pop; `endif - `RESET_RELAY_EN (slice_reset, reset, (`ISSUE_WIDTH > 1)); - VX_issue_slice #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, issue_id)), .ISSUE_ID (issue_id) ) issue_slice ( `SCOPE_IO_BIND(issue_id) .clk (clk), - .reset (slice_reset), + .reset (reset), `ifdef PERF_ENABLE .issue_perf (per_issue_perf[issue_id]), `endif diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 4b4e168a2..24430a53f 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -36,16 +36,11 @@ module VX_issue_slice import VX_gpu_pkg::*; #( VX_scoreboard_if scoreboard_if(); VX_operands_if operands_if(); - `RESET_RELAY (ibuf_reset, reset); - `RESET_RELAY (scoreboard_reset, reset); - `RESET_RELAY (operands_reset, reset); - `RESET_RELAY (dispatch_reset, reset); - VX_ibuffer #( .INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID)) ) ibuffer ( .clk (clk), - .reset (ibuf_reset), + .reset (reset), `ifdef PERF_ENABLE .perf_stalls (issue_perf.ibf_stalls), `endif @@ -57,7 +52,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( .INSTANCE_ID ($sformatf("%s-scoreboard", INSTANCE_ID)) ) scoreboard ( .clk (clk), - .reset (scoreboard_reset), + .reset (reset), `ifdef PERF_ENABLE .perf_stalls (issue_perf.scb_stalls), .perf_units_uses(issue_perf.units_uses), @@ -72,7 +67,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( .INSTANCE_ID ($sformatf("%s-operands", INSTANCE_ID)) ) operands ( .clk (clk), - .reset (operands_reset), + .reset (reset), `ifdef PERF_ENABLE .perf_stalls (issue_perf.opd_stalls), `endif @@ -85,7 +80,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( .INSTANCE_ID ($sformatf("%s-dispatch", INSTANCE_ID)) ) dispatch ( .clk (clk), - .reset (dispatch_reset), + .reset (reset), `ifdef PERF_ENABLE `UNUSED_PIN (perf_stalls), `endif @@ -105,7 +100,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1) ) scope_tap ( .clk (clk), - .reset (scope_reset), + .reset (reset), .start (1'b0), .stop (1'b0), .triggers ({ diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 25a8223a8..49195eee6 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -311,8 +311,6 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( wire [LSU_TAG_WIDTH-1:0] lsu_mem_rsp_tag; wire lsu_mem_rsp_ready; - `RESET_RELAY (mem_scheduler_reset, reset); - VX_mem_scheduler #( .INSTANCE_ID ($sformatf("%s-scheduler", INSTANCE_ID)), .CORE_REQS (NUM_LANES), @@ -330,7 +328,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( .CORE_OUT_BUF(0) ) mem_scheduler ( .clk (clk), - .reset (mem_scheduler_reset), + .reset (reset), // Input request .core_req_valid (mem_req_valid), diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index b155ed0d7..8c594f533 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -55,15 +55,12 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( ) per_block_commit_if[BLOCK_SIZE](); for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsus - - `RESET_RELAY_EN (slice_reset, reset, (BLOCK_SIZE > 1)); - VX_lsu_slice #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx)) ) lsu_slice( `SCOPE_IO_BIND (block_idx) .clk (clk), - .reset (slice_reset), + .reset (reset), .execute_if (per_block_execute_if[block_idx]), .commit_if (per_block_commit_if[block_idx]), .lsu_mem_if (lsu_mem_if[block_idx]) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index cd901f8ac..75f60e63c 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -91,8 +91,6 @@ module VX_mem_unit import VX_gpu_pkg::*; #( end end - `RESET_RELAY (lmem_reset, reset); - VX_local_mem #( .INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)), .SIZE (1 << `LMEM_LOG_SIZE), @@ -105,7 +103,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .OUT_BUF (3) ) local_mem ( .clk (clk), - .reset (lmem_reset), + .reset (reset), `ifdef PERF_ENABLE .lmem_perf (lmem_perf), `endif @@ -132,9 +130,6 @@ module VX_mem_unit import VX_gpu_pkg::*; #( if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescers - - `RESET_RELAY (mem_coalescer_reset, reset); - VX_mem_coalescer #( .INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)), .NUM_REQS (`NUM_LSU_LANES), @@ -146,8 +141,8 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .UUID_WIDTH (`UUID_WIDTH), .QUEUE_SIZE (`LSUQ_OUT_SIZE) ) mem_coalescer ( - .clk (clk), - .reset (mem_coalescer_reset), + .clk (clk), + .reset (reset), // Input request .in_req_valid (lsu_dcache_if[i].req_valid), diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 3025b9dab..a88522ee7 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -99,8 +99,6 @@ module VX_operands import VX_gpu_pkg::*; #( assign req_in_valid = {NUM_SRC_OPDS{scoreboard_if.valid}} & src_valid; - `RESET_RELAY (req_xbar_reset, reset); - VX_stream_xbar #( .NUM_INPUTS (NUM_SRC_OPDS), .NUM_OUTPUTS (NUM_BANKS), @@ -110,7 +108,7 @@ module VX_operands import VX_gpu_pkg::*; #( .OUT_BUF (0) // no output buffering ) req_xbar ( .clk (clk), - .reset (req_xbar_reset), + .reset (reset), `UNUSED_PIN(collisions), .valid_in (req_in_valid), .data_in (req_in_data), @@ -179,14 +177,12 @@ module VX_operands import VX_gpu_pkg::*; #( wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1; - `RESET_RELAY (pipe2_reset, reset); // needed for pipe_reg2's wide RESETW - VX_pipe_buffer #( .DATAW (NUM_SRC_OPDS * REGS_DATAW + NUM_BANKS + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH), .RESETW (NUM_SRC_OPDS * REGS_DATAW) ) pipe_reg2 ( .clk (clk), - .reset (pipe2_reset), + .reset (reset), .valid_in (pipe_valid2_st1), .ready_in (pipe_ready_st1), .data_in ({src_data_st1, gpr_rd_valid_st1, pipe_data_st1, gpr_rd_req_idx_st1}), diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index b1b855aaf..af0ee5621 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -289,13 +289,11 @@ module VX_schedule import VX_gpu_pkg::*; #( // split/join handling - `RESET_RELAY (split_join_reset, reset); - VX_split_join #( .INSTANCE_ID ($sformatf("%s-splitjoin", INSTANCE_ID)) ) split_join ( .clk (clk), - .reset (split_join_reset), + .reset (reset), .valid (warp_ctl_if.valid), .wid (warp_ctl_if.wid), .split (warp_ctl_if.split), @@ -377,15 +375,13 @@ module VX_schedule import VX_gpu_pkg::*; #( wire [`NUM_WARPS-1:0] pending_warp_empty; wire [`NUM_WARPS-1:0] pending_warp_alm_empty; - `RESET_RELAY (pending_instr_reset, reset); - for (genvar i = 0; i < `NUM_WARPS; ++i) begin : pending_sizes VX_pending_size #( .SIZE (4096), .ALM_EMPTY (1) ) counter ( .clk (clk), - .reset (pending_instr_reset), + .reset (reset), .incr (schedule_if_fire && (schedule_if.data.wid == `NW_WIDTH'(i))), .decr (commit_sched_if.committed_warps[i]), .empty (pending_warp_empty[i]), diff --git a/hw/rtl/core/VX_sfu_unit.sv b/hw/rtl/core/VX_sfu_unit.sv index 93686ca55..5af6211f6 100644 --- a/hw/rtl/core/VX_sfu_unit.sv +++ b/hw/rtl/core/VX_sfu_unit.sv @@ -98,28 +98,24 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( .commit_in_if (pe_commit_if) ); - `RESET_RELAY (wctl_reset, reset); - VX_wctl_unit #( .INSTANCE_ID ($sformatf("%s-wctl", INSTANCE_ID)), .NUM_LANES (NUM_LANES) ) wctl_unit ( .clk (clk), - .reset (wctl_reset), + .reset (reset), .execute_if (pe_execute_if[PE_IDX_WCTL]), .warp_ctl_if(warp_ctl_if), .commit_if (pe_commit_if[PE_IDX_WCTL]) ); - `RESET_RELAY (csr_reset, reset); - VX_csr_unit #( .INSTANCE_ID ($sformatf("%s-csr", INSTANCE_ID)), .CORE_ID (CORE_ID), .NUM_LANES (NUM_LANES) ) csr_unit ( .clk (clk), - .reset (csr_reset), + .reset (reset), .base_dcrs (base_dcrs), .execute_if (pe_execute_if[PE_IDX_CSRS]), diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index c12c82d87..22e2b652d 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -130,14 +130,12 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire is_neg = per_core_op_type[FPU_FMA][0]; wire is_sub = per_core_fmt[FPU_FMA][1]; - `RESET_RELAY (fma_reset, reset); - VX_fpu_fma #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH) ) fpu_fma ( .clk (clk), - .reset (fma_reset), + .reset (reset), .valid_in (per_core_valid_in[FPU_FMA]), .ready_in (per_core_ready_in[FPU_FMA]), .mask_in (per_core_mask_in[FPU_FMA]), @@ -231,14 +229,12 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( `UNUSED_VAR (div_sqrt_datab) `UNUSED_VAR (div_sqrt_datac) - `RESET_RELAY (div_sqrt_reset, reset); - VX_fpu_div #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH) ) fpu_div ( .clk (clk), - .reset (div_sqrt_reset), + .reset (reset), .valid_in (div_sqrt_valid_in[0]), .ready_in (div_sqrt_ready_in[0]), .mask_in (div_sqrt_mask_in[0]), @@ -313,14 +309,12 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire cvt_ret_int_in = ~is_itof; wire cvt_ret_int_out; - `RESET_RELAY (cvt_reset, reset); - VX_fpu_cvt #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (1+TAG_WIDTH) ) fpu_cvt ( .clk (clk), - .reset (cvt_reset), + .reset (reset), .valid_in (per_core_valid_in[FPU_CVT]), .ready_in (per_core_ready_in[FPU_CVT]), .mask_in (per_core_mask_in[FPU_CVT]), @@ -347,14 +341,12 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]); wire ncp_ret_sext_out; - `RESET_RELAY (ncp_reset, reset); - VX_fpu_ncp #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH+2) ) fpu_ncp ( .clk (clk), - .reset (ncp_reset), + .reset (reset), .valid_in (per_core_valid_in[FPU_NCP]), .ready_in (per_core_ready_in[FPU_NCP]), .mask_in (per_core_mask_in[FPU_NCP]), diff --git a/hw/rtl/libs/VX_stream_unpack.sv b/hw/rtl/libs/VX_stream_unpack.sv index 30e2a444f..37c238a77 100644 --- a/hw/rtl/libs/VX_stream_unpack.sv +++ b/hw/rtl/libs/VX_stream_unpack.sv @@ -42,14 +42,14 @@ module VX_stream_unpack #( wire [NUM_REQS-1:0] ready_out_w; wire [NUM_REQS-1:0] rem_mask_n = rem_mask_r & ~ready_out_w; - wire sent_all = (mask_in & rem_mask_n) == '0; + wire sent_all = ~(| (mask_in & rem_mask_n)); always @(posedge clk) begin if (reset) begin - rem_mask_r <= {NUM_REQS{1'b1}}; + rem_mask_r <= '1; end else begin if (valid_in) begin - rem_mask_r <= {NUM_REQS{sent_all}} | rem_mask_n; + rem_mask_r <= sent_all ? '1 : rem_mask_n; end end end diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 6dbe28325..700bcb48c 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -116,8 +116,6 @@ module VX_local_mem import VX_gpu_pkg::*; #( assign mem_bus_if[i].req_ready = req_ready_in[i]; end - `RESET_RELAY (req_xbar_reset, reset); - VX_stream_xbar #( .NUM_INPUTS (NUM_REQS), .NUM_OUTPUTS (NUM_BANKS), @@ -127,7 +125,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .OUT_BUF (3) // output should be registered for the data_store addressing ) req_xbar ( .clk (clk), - .reset (req_xbar_reset), + .reset (reset), `ifdef PERF_ENABLE .collisions (perf_collisions), `else @@ -226,8 +224,6 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out; wire [NUM_REQS-1:0] rsp_ready_out; - `RESET_RELAY (rsp_xbar_reset, reset); - VX_stream_xbar #( .NUM_INPUTS (NUM_BANKS), .NUM_OUTPUTS (NUM_REQS), @@ -236,7 +232,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .OUT_BUF (OUT_BUF) ) rsp_xbar ( .clk (clk), - .reset (rsp_xbar_reset), + .reset (reset), `UNUSED_PIN (collisions), .sel_in (per_bank_rsp_idx), .valid_in (per_bank_rsp_valid), From 96fb3566a94291050795d17911eef7f3a9716eca Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 13:44:23 -0700 Subject: [PATCH 223/488] minor update --- hw/rtl/VX_socket.sv | 4 +--- hw/rtl/core/VX_core.sv | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 833ba49d7..a6e58ebd1 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -49,14 +49,12 @@ module VX_socket import VX_gpu_pkg::*; #( `ifdef GBAR_ENABLE VX_gbar_bus_if per_core_gbar_bus_if[`SOCKET_SIZE](); - `RESET_RELAY (gbar_arb_reset, reset); - VX_gbar_arb #( .NUM_REQS (`SOCKET_SIZE), .OUT_BUF ((`SOCKET_SIZE > 1) ? 2 : 0) ) gbar_arb ( .clk (clk), - .reset (gbar_arb_reset), + .reset (reset), .bus_in_if (per_core_gbar_bus_if), .bus_out_if (gbar_bus_if) ); diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index f306c5d23..f97370e89 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -75,7 +75,6 @@ module VX_core import VX_gpu_pkg::*; #( assign mem_perf_tmp_if.mem = mem_perf_if.mem; `endif - `RESET_RELAY (dcr_data_reset, reset); `RESET_RELAY (schedule_reset, reset); `RESET_RELAY (fetch_reset, reset); `RESET_RELAY (decode_reset, reset); @@ -87,7 +86,7 @@ module VX_core import VX_gpu_pkg::*; #( VX_dcr_data dcr_data ( .clk (clk), - .reset (dcr_data_reset), + .reset (reset), .dcr_bus_if (dcr_bus_if), .base_dcrs (base_dcrs) ); From 37555b12086612bd46649a57c01a62226dcc86df Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 15:18:39 -0700 Subject: [PATCH 224/488] minor update --- ci/regression.sh.in | 2 +- hw/rtl/VX_config.vh | 2 +- hw/rtl/VX_define.vh | 18 ++++++++++++++++++ sim/opaesim/opae_sim.cpp | 22 ++++++++++++---------- 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 32e479c1e..2c56377c0 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -259,7 +259,7 @@ config2() AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=mstress # test 128-bit MEM block - CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=128" ./ci/blackbox.sh --driver=opae --app=mstress # test XLEN-bit MEM block CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 9a95fb0be..a93b73b30 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -651,7 +651,7 @@ `endif `ifndef MEMORY_BANKS -`define MEMORY_BANKS 8 +`define MEMORY_BANKS 2 `endif // Number of Memory Ports from LLC diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 861d9f28c..5ef9a46d2 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -14,6 +14,24 @@ `ifndef VX_DEFINE_VH `define VX_DEFINE_VH +`ifndef MEM_BLOCK_SIZE +`ifdef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH +`define MEM_BLOCK_SIZE (`PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH/8) +`endif +`endif + +`ifndef MEM_ADDR_WIDTH +`ifdef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH +`define MEM_ADDR_WIDTH `PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH +`endif +`endif + +`ifndef MEMORY_BANKS +`ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS +`define MEMORY_BANKS `PLATFORM_PARAM_LOCAL_MEMORY_BANKS +`endif +`endif + `include "VX_platform.vh" `include "VX_config.vh" `include "VX_types.vh" diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 9cf185abf..2f847dc20 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -35,6 +35,8 @@ #include #include +#define PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH/8) + #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 #endif @@ -424,7 +426,7 @@ private: auto mem_rd_it = pending_mem_reqs_[b].begin(); auto mem_req = *mem_rd_it; device_->avs_readdatavalid[b] = 1; - memcpy(device_->avs_readdata[b], mem_req->data.data(), MEM_BLOCK_SIZE); + memcpy(device_->avs_readdata[b], mem_req->data.data(), PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); uint32_t addr = mem_req->addr; pending_mem_reqs_[b].erase(mem_rd_it); delete mem_req; @@ -432,19 +434,19 @@ private: // process memory requests assert(!device_->avs_read[b] || !device_->avs_write[b]); - unsigned byte_addr = (device_->avs_address[b] * PLATFORM_PARAM_LOCAL_MEMORY_BANKS + b) * MEM_BLOCK_SIZE; + unsigned byte_addr = (device_->avs_address[b] * PLATFORM_PARAM_LOCAL_MEMORY_BANKS + b) * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; if (device_->avs_write[b]) { uint64_t byteen = device_->avs_byteenable[b]; uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data()); - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + for (int i = 0; i < PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; i++) { if ((byteen >> i) & 0x1) { (*ram_)[byte_addr + i] = data[i]; } } /*printf("%0ld: [sim] MEM Wr Req: bank=%d, 0x%x, data=0x", timestamp, b, byte_addr); - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]); + for (int i = 0; i < PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; i++) { + printf("%02x", data[(PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE-1)-i]); } printf("\n");*/ @@ -461,17 +463,17 @@ private: auto mem_req = new mem_req_t(); mem_req->addr = device_->avs_address[b]; mem_req->bank_id = b; - ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE); + ram_->read(mem_req->data.data(), byte_addr, PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); mem_req->write = false; mem_req->ready = false; pending_mem_reqs_[b].emplace_back(mem_req); - /*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=%x, pending={", timestamp, b, mem_req.addr * MEM_BLOCK_SIZE); + /*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=%x, pending={", timestamp, b, mem_req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); for (auto& req : pending_mem_reqs_[b]) { if (req.cycles_left != 0) - printf(" !%0x", req.addr * MEM_BLOCK_SIZE); + printf(" !%0x", req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); else - printf(" %0x", req.addr * MEM_BLOCK_SIZE); + printf(" %0x", req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); } printf("}\n");*/ @@ -484,7 +486,7 @@ private: } typedef struct { - std::array data; + std::array data; uint32_t addr; uint32_t bank_id; bool write; From 8d1baf677d538e1a56a801d4310a6aeb9204fa34 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 18:17:27 -0700 Subject: [PATCH 225/488] minor update --- ci/regression.sh.in | 2 +- sim/opaesim/vortex_afu_shim.sv | 62 ++++++++++++++++------------------ 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 2c56377c0..32e479c1e 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -259,7 +259,7 @@ config2() AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=mstress # test 128-bit MEM block - CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=128" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress # test XLEN-bit MEM block CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress diff --git a/sim/opaesim/vortex_afu_shim.sv b/sim/opaesim/vortex_afu_shim.sv index 8c64c8332..2a0d63e42 100644 --- a/sim/opaesim/vortex_afu_shim.sv +++ b/sim/opaesim/vortex_afu_shim.sv @@ -1,30 +1,28 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -`include "VX_platform.vh" +`include "VX_define.vh" `IGNORE_WARNINGS_BEGIN `include "vortex_afu.vh" `IGNORE_WARNINGS_END -`include "VX_define.vh" - module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; ( // global signals input clk, input reset, // IF signals between CCI and AFU - input logic vcp2af_sRxPort_c0_TxAlmFull, + input logic vcp2af_sRxPort_c0_TxAlmFull, input logic vcp2af_sRxPort_c1_TxAlmFull, input t_ccip_vc vcp2af_sRxPort_c0_hdr_vc_used, @@ -35,15 +33,15 @@ module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; ( input t_ccip_c0_rsp vcp2af_sRxPort_c0_hdr_resp_type, input t_ccip_mdata vcp2af_sRxPort_c0_hdr_mdata, input t_ccip_clData vcp2af_sRxPort_c0_data, - input logic vcp2af_sRxPort_c0_rspValid, - input logic vcp2af_sRxPort_c0_mmioRdValid, - input logic vcp2af_sRxPort_c0_mmioWrValid, + input logic vcp2af_sRxPort_c0_rspValid, + input logic vcp2af_sRxPort_c0_mmioRdValid, + input logic vcp2af_sRxPort_c0_mmioWrValid, input t_ccip_mmioAddr vcp2af_sRxPort_c0_ReqMmioHdr_address, - input logic [1:0] vcp2af_sRxPort_c0_ReqMmioHdr_length, + input logic [1:0] vcp2af_sRxPort_c0_ReqMmioHdr_length, input logic vcp2af_sRxPort_c0_ReqMmioHdr_rsvd, - input t_ccip_tid vcp2af_sRxPort_c0_ReqMmioHdr_tid, - + input t_ccip_tid vcp2af_sRxPort_c0_ReqMmioHdr_tid, + input t_ccip_vc vcp2af_sRxPort_c1_hdr_vc_used, input logic vcp2af_sRxPort_c1_hdr_rsvd1, input logic vcp2af_sRxPort_c1_hdr_hit_miss, @@ -51,34 +49,34 @@ module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; ( input logic vcp2af_sRxPort_c1_hdr_rsvd0, input t_ccip_clNum vcp2af_sRxPort_c1_hdr_cl_num, input t_ccip_c1_rsp vcp2af_sRxPort_c1_hdr_resp_type, - input t_ccip_mdata vcp2af_sRxPort_c1_hdr_mdata, - input logic vcp2af_sRxPort_c1_rspValid, - + input t_ccip_mdata vcp2af_sRxPort_c1_hdr_mdata, + input logic vcp2af_sRxPort_c1_rspValid, + output t_ccip_vc af2cp_sTxPort_c0_hdr_vc_sel, - output logic [1:0] af2cp_sTxPort_c0_hdr_rsvd1, + output logic [1:0] af2cp_sTxPort_c0_hdr_rsvd1, output t_ccip_clLen af2cp_sTxPort_c0_hdr_cl_len, output t_ccip_c0_req af2cp_sTxPort_c0_hdr_req_type, - output logic [5:0] af2cp_sTxPort_c0_hdr_rsvd0, + output logic [5:0] af2cp_sTxPort_c0_hdr_rsvd0, output t_ccip_clAddr af2cp_sTxPort_c0_hdr_address, output t_ccip_mdata af2cp_sTxPort_c0_hdr_mdata, - output logic af2cp_sTxPort_c0_valid, + output logic af2cp_sTxPort_c0_valid, output logic [5:0] af2cp_sTxPort_c1_hdr_rsvd2, output t_ccip_vc af2cp_sTxPort_c1_hdr_vc_sel, output logic af2cp_sTxPort_c1_hdr_sop, - output logic af2cp_sTxPort_c1_hdr_rsvd1, + output logic af2cp_sTxPort_c1_hdr_rsvd1, output t_ccip_clLen af2cp_sTxPort_c1_hdr_cl_len, output t_ccip_c1_req af2cp_sTxPort_c1_hdr_req_type, - output logic [5:0] af2cp_sTxPort_c1_hdr_rsvd0, + output logic [5:0] af2cp_sTxPort_c1_hdr_rsvd0, output t_ccip_clAddr af2cp_sTxPort_c1_hdr_address, output t_ccip_mdata af2cp_sTxPort_c1_hdr_mdata, - output t_ccip_clData af2cp_sTxPort_c1_data, - output logic af2cp_sTxPort_c1_valid, + output t_ccip_clData af2cp_sTxPort_c1_data, + output logic af2cp_sTxPort_c1_valid, output t_ccip_tid af2cp_sTxPort_c2_hdr_tid, - output logic af2cp_sTxPort_c2_mmioRdValid, - output t_ccip_mmioData af2cp_sTxPort_c2_data, - + output logic af2cp_sTxPort_c2_mmioRdValid, + output t_ccip_mmioData af2cp_sTxPort_c2_data, + // Avalon signals for local memory access output t_local_mem_data avs_writedata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], input t_local_mem_data avs_readdata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], @@ -119,7 +117,7 @@ always @ (*) begin c0_RxHdr.reqMmioHdr.address = vcp2af_sRxPort_c0_ReqMmioHdr_address; c0_RxHdr.reqMmioHdr.length = vcp2af_sRxPort_c0_ReqMmioHdr_length; c0_RxHdr.reqMmioHdr.rsvd = vcp2af_sRxPort_c0_ReqMmioHdr_rsvd; - c0_RxHdr.reqMmioHdr.tid = vcp2af_sRxPort_c0_ReqMmioHdr_tid; + c0_RxHdr.reqMmioHdr.tid = vcp2af_sRxPort_c0_ReqMmioHdr_tid; end else begin c0_RxHdr.rspMemHdr.vc_used = vcp2af_sRxPort_c0_hdr_vc_used; c0_RxHdr.rspMemHdr.rsvd1 = vcp2af_sRxPort_c0_hdr_rsvd1; @@ -134,7 +132,7 @@ end assign cp2af_sRxPort.c0TxAlmFull = vcp2af_sRxPort_c0_TxAlmFull; assign cp2af_sRxPort.c1TxAlmFull = vcp2af_sRxPort_c1_TxAlmFull; -assign cp2af_sRxPort.c0.hdr = c0_RxHdr; +assign cp2af_sRxPort.c0.hdr = c0_RxHdr; assign cp2af_sRxPort.c0.data = vcp2af_sRxPort_c0_data; assign cp2af_sRxPort.c0.rspValid = vcp2af_sRxPort_c0_rspValid; assign cp2af_sRxPort.c0.mmioRdValid = vcp2af_sRxPort_c0_mmioRdValid; @@ -147,8 +145,8 @@ assign cp2af_sRxPort.c1.hdr.format = vcp2af_sRxPort_c1_hdr_format; assign cp2af_sRxPort.c1.hdr.rsvd0 = vcp2af_sRxPort_c1_hdr_rsvd0; assign cp2af_sRxPort.c1.hdr.cl_num = vcp2af_sRxPort_c1_hdr_cl_num; assign cp2af_sRxPort.c1.hdr.resp_type = vcp2af_sRxPort_c1_hdr_resp_type; -assign cp2af_sRxPort.c1.hdr.mdata = vcp2af_sRxPort_c1_hdr_mdata; -assign cp2af_sRxPort.c1.rspValid = vcp2af_sRxPort_c1_rspValid; +assign cp2af_sRxPort.c1.hdr.mdata = vcp2af_sRxPort_c1_hdr_mdata; +assign cp2af_sRxPort.c1.rspValid = vcp2af_sRxPort_c1_rspValid; assign af2cp_sTxPort_c0_hdr_vc_sel = af2cp_sTxPort.c0.hdr.vc_sel; assign af2cp_sTxPort_c0_hdr_rsvd1 = af2cp_sTxPort.c0.hdr.rsvd1; @@ -168,11 +166,11 @@ assign af2cp_sTxPort_c1_hdr_req_type = af2cp_sTxPort.c1.hdr.req_type; assign af2cp_sTxPort_c1_hdr_rsvd0 = af2cp_sTxPort.c1.hdr.rsvd0; assign af2cp_sTxPort_c1_hdr_address = af2cp_sTxPort.c1.hdr.address; assign af2cp_sTxPort_c1_hdr_mdata = af2cp_sTxPort.c1.hdr.mdata; -assign af2cp_sTxPort_c1_data = af2cp_sTxPort.c1.data; +assign af2cp_sTxPort_c1_data = af2cp_sTxPort.c1.data; assign af2cp_sTxPort_c1_valid = af2cp_sTxPort.c1.valid; -assign af2cp_sTxPort_c2_hdr_tid = af2cp_sTxPort.c2.hdr.tid; -assign af2cp_sTxPort_c2_mmioRdValid = af2cp_sTxPort.c2.mmioRdValid; +assign af2cp_sTxPort_c2_hdr_tid = af2cp_sTxPort.c2.hdr.tid; +assign af2cp_sTxPort_c2_mmioRdValid = af2cp_sTxPort.c2.mmioRdValid; assign af2cp_sTxPort_c2_data = af2cp_sTxPort.c2.data; endmodule From 0aaca84016bfa6ed328439f134190a373ed070a8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 18:22:37 -0700 Subject: [PATCH 226/488] minor update --- hw/rtl/core/VX_issue_slice.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 24430a53f..0d7fdea53 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -100,7 +100,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1) ) scope_tap ( .clk (clk), - .reset (reset), + .reset (scope_reset), .start (1'b0), .stop (1'b0), .triggers ({ From fb0cd1c2724c028ced53891121d824f5571eabd7 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 18:24:42 -0700 Subject: [PATCH 227/488] minor update --- hw/rtl/VX_define.vh | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 5ef9a46d2..861d9f28c 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -14,24 +14,6 @@ `ifndef VX_DEFINE_VH `define VX_DEFINE_VH -`ifndef MEM_BLOCK_SIZE -`ifdef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH -`define MEM_BLOCK_SIZE (`PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH/8) -`endif -`endif - -`ifndef MEM_ADDR_WIDTH -`ifdef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH -`define MEM_ADDR_WIDTH `PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH -`endif -`endif - -`ifndef MEMORY_BANKS -`ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS -`define MEMORY_BANKS `PLATFORM_PARAM_LOCAL_MEMORY_BANKS -`endif -`endif - `include "VX_platform.vh" `include "VX_config.vh" `include "VX_types.vh" From cf9172b8fcf53fc4f83443de4e912f85700cef58 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 4 Sep 2024 20:16:54 -0700 Subject: [PATCH 228/488] minor update --- hw/rtl/cache/VX_cache_bank.sv | 4 ++-- hw/rtl/core/VX_lsu_slice.sv | 8 ++++---- hw/rtl/libs/VX_dp_ram.sv | 4 ++-- hw/rtl/libs/VX_fifo_queue.sv | 4 ++-- hw/rtl/libs/VX_generic_arbiter.sv | 2 +- hw/rtl/libs/VX_mem_coalescer.sv | 4 ++-- hw/rtl/libs/VX_mem_scheduler.sv | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 883a561a1..4dff675bd 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -394,7 +394,7 @@ module VX_cache_bank #( `UNUSED_VAR (do_write_miss_st1) // ensure mshr replay always get a hit - `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("missed mshr replay")); + `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time)); // both tag and data stores use BRAM with no read-during-write protection. // we ned to stall the pipeline to prevent read-after-write hazards. @@ -599,7 +599,7 @@ module VX_cache_bank #( if (DIRTY_BYTES) begin // ensure dirty bytes match the tag info wire has_dirty_bytes = (| dirty_byteen_st1); - `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))); + `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))); end assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1) || do_writeback_st1) diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 49195eee6..7ee15bb14 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -188,8 +188,8 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( for (genvar i = 0; i < NUM_LANES; ++i) begin wire lsu_req_fire = execute_if.valid && execute_if.ready; `RUNTIME_ASSERT((~lsu_req_fire || ~execute_if.data.tmask[i] || req_is_fence || (full_addr[i] % (1 << `INST_LSU_WSIZE(execute_if.data.op_type))) == 0), - ("misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)", - execute_if.data.wid, {execute_if.data.PC, 1'b0}, full_addr[i], `INST_LSU_WSIZE(execute_if.data.op_type), execute_if.data.uuid)); + ("%t: misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)", + $time, execute_if.data.wid, {execute_if.data.PC, 1'b0}, full_addr[i], `INST_LSU_WSIZE(execute_if.data.op_type), execute_if.data.uuid)); end // store data formatting @@ -271,8 +271,8 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( assign mem_rsp_sop_pkt = pkt_sop[pkt_raddr]; assign mem_rsp_eop_pkt = mem_rsp_eop_fire && pkt_eop[pkt_raddr] && (pkt_ctr[pkt_raddr] == 1); - `RUNTIME_ASSERT(~(mem_req_rd_fire && full), ("allocator full!")) - `RUNTIME_ASSERT(~mem_req_rd_sop_fire || 0 == pkt_ctr[pkt_waddr], ("Oops!")) + `RUNTIME_ASSERT(~(mem_req_rd_fire && full), ("%t: allocator full!", $time)) + `RUNTIME_ASSERT(~mem_req_rd_sop_fire || 0 == pkt_ctr[pkt_waddr], ("%t: oops! broken sop request!", $time)) `UNUSED_VAR (mem_rsp_sop) end else begin assign pkt_waddr = 0; diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 70df4f688..64b22150c 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -59,7 +59,7 @@ module VX_dp_ram #( `UNUSED_VAR (read) if (WRENW > 1) begin - `RUNTIME_ASSERT(~write || (| wren), ("invalid write enable mask")); + `RUNTIME_ASSERT(~write || (| wren), ("%t: invalid write enable mask", $time)); end if (OUT_REG && !READ_ENABLE) begin @@ -341,7 +341,7 @@ module VX_dp_ram #( assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; if (RW_ASSERT) begin - `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("read after write hazard")); + `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)); end end `endif diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index e6f94b3b2..eba9532f4 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -162,8 +162,8 @@ module VX_fifo_queue #( end end - `RUNTIME_ASSERT(~(push && ~pop) || ~full, ("runtime error: incrementing full queue")); - `RUNTIME_ASSERT(~(pop && ~push) || ~empty, ("runtime error: decrementing empty queue")); + `RUNTIME_ASSERT(~(push && ~pop) || ~full, ("%t: runtime error: incrementing full queue", $time)); + `RUNTIME_ASSERT(~(pop && ~push) || ~empty, ("%t: runtime error: decrementing empty queue", $time)); endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index db0173349..3a3737d04 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -90,7 +90,7 @@ module VX_generic_arbiter #( end - `RUNTIME_ASSERT ((~(| requests) || (grant_valid && (requests[grant_index] != 0) && (grant_onehot == (NUM_REQS'(1) << grant_index)))), ("invalid arbiter grant!")) + `RUNTIME_ASSERT ((~(| requests) || (grant_valid && (requests[grant_index] != 0) && (grant_onehot == (NUM_REQS'(1) << grant_index)))), ("%t: invalid arbiter grant!", $time)) endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 32ad728b8..b284a6449 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -76,8 +76,8 @@ module VX_mem_coalescer #( `UNUSED_SPARAM (INSTANCE_ID) `STATIC_ASSERT (`IS_DIVISBLE(NUM_REQS * DATA_IN_WIDTH, DATA_OUT_WIDTH), ("invalid parameter")) `STATIC_ASSERT ((NUM_REQS * DATA_IN_WIDTH >= DATA_OUT_WIDTH), ("invalid parameter")) - `RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("invalid request mask")); - `RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("invalid request mask")); + `RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("%t: invalid request mask", $time)); + `RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("%t: invalid request mask", $time)); localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH; // tag + mask + offest diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index c5b302177..9599adf13 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -97,7 +97,7 @@ module VX_mem_scheduler #( `STATIC_ASSERT (`IS_DIVISBLE(CORE_REQS * WORD_SIZE, LINE_SIZE), ("invalid parameter")) `STATIC_ASSERT ((TAG_WIDTH >= UUID_WIDTH), ("invalid parameter")) `STATIC_ASSERT ((0 == RSP_PARTIAL) || (1 == RSP_PARTIAL), ("invalid parameter")) - `RUNTIME_ASSERT((~core_req_valid || core_req_mask != 0), ("invalid request mask")); + `RUNTIME_ASSERT((~core_req_valid || core_req_mask != 0), ("%t: invalid request mask", $time)); wire ibuf_push; wire ibuf_pop; From 8db77ea1cd4b394f9b558e38cf10a059117bcf9d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 5 Sep 2024 21:29:01 -0700 Subject: [PATCH 229/488] minor updates --- hw/rtl/VX_cluster.sv | 3 ++- hw/syn/xilinx/scripts/xsim.tcl | 13 +++++-------- hw/syn/xilinx/xrt/Makefile | 2 +- hw/syn/xilinx/xrt/{xrt.ini => xrt.ini.in} | 6 +++--- sim/opaesim/opae_sim.cpp | 2 +- sim/rtlsim/processor.cpp | 2 +- sim/xrtsim/xrt_sim.cpp | 2 +- 7 files changed, 14 insertions(+), 16 deletions(-) rename hw/syn/xilinx/xrt/{xrt.ini => xrt.ini.in} (51%) diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index b9a43f845..ef845ae07 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -119,7 +119,8 @@ module VX_cluster import VX_gpu_pkg::*; #( /////////////////////////////////////////////////////////////////////////// VX_dcr_bus_if socket_dcr_bus_tmp_if(); - assign socket_dcr_bus_tmp_if.write_valid = dcr_bus_if.write_valid && (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END); + wire is_dcr_base_addr = (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END); + assign socket_dcr_bus_tmp_if.write_valid = dcr_bus_if.write_valid && is_dcr_base_addr; assign socket_dcr_bus_tmp_if.write_addr = dcr_bus_if.write_addr; assign socket_dcr_bus_tmp_if.write_data = dcr_bus_if.write_data; diff --git a/hw/syn/xilinx/scripts/xsim.tcl b/hw/syn/xilinx/scripts/xsim.tcl index 061bc17ab..ccdc1262f 100644 --- a/hw/syn/xilinx/scripts/xsim.tcl +++ b/hw/syn/xilinx/scripts/xsim.tcl @@ -14,12 +14,9 @@ # limitations under the License. # -#log_wave -r * -#run all -#exit +log_wave -r * -open_vcd xsim_dump.vcd -log_vcd /* -run all -close_vcd -exit +#open_vcd xsim_dump.vcd +#log_vcd /* +#run all +#close_vcd \ No newline at end of file diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 4e3259f34..fe9a56dc8 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -117,7 +117,7 @@ endif # Debugging ifdef DEBUG - VPP_FLAGS += -g --debug.protocol all + VPP_FLAGS += -g --optimize 0 --debug.protocol all ifneq ($(TARGET), hw) VPP_FLAGS += --vivado.prop fileset.sim_1.xsim.elaborate.debug_level=all CFLAGS += -DDEBUG_LEVEL=$(DEBUG) $(DBG_TRACE_FLAGS) diff --git a/hw/syn/xilinx/xrt/xrt.ini b/hw/syn/xilinx/xrt/xrt.ini.in similarity index 51% rename from hw/syn/xilinx/xrt/xrt.ini rename to hw/syn/xilinx/xrt/xrt.ini.in index 094219112..99511f884 100644 --- a/hw/syn/xilinx/xrt/xrt.ini +++ b/hw/syn/xilinx/xrt/xrt.ini.in @@ -1,9 +1,9 @@ -[Runtime] +[Runtime] runtime_log=console [Emulation] -#debug_mode=batch -#user_pre_sim_script=xsim.tcl +debug_mode=batch +user_pre_sim_script=@VORTEX_HOME@/hw/syn/xilinx/scripts/xsim.tcl [Debug] profile=true diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 2f847dc20..8e9e8c4d8 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -125,7 +125,7 @@ public: } int init() { - // force random values for unitialized signals + // force random values for uninitialized signals Verilated::randReset(VERILATOR_RESET_VALUE); Verilated::randSeed(50); diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index d964a3d5a..25d219fcf 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -98,7 +98,7 @@ void sim_trace_enable(bool enable) { class Processor::Impl { public: Impl() : dram_sim_(MEM_CLOCK_RATIO) { - // force random values for unitialized signals + // force random values for uninitialized signals Verilated::randReset(VERILATOR_RESET_VALUE); Verilated::randSeed(50); diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 80aed7f7d..12a78c23d 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -112,7 +112,7 @@ public: } int init() { - // force random values for unitialized signals + // force random values for uninitialized signals Verilated::randReset(VERILATOR_RESET_VALUE); Verilated::randSeed(50); From efc8834c750d47b24cd2ed717fc26cb139007dd8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 5 Sep 2024 21:32:25 -0700 Subject: [PATCH 230/488] xilinx afu reset refactoring --- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 104 ++++++++++++++--------------- hw/rtl/afu/xrt/VX_afu_wrap.sv | 119 ++++++++++++++++------------------ 2 files changed, 105 insertions(+), 118 deletions(-) diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index 687b55a8c..a6cd31b05 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -21,7 +21,6 @@ module VX_afu_ctrl #( // axi4 lite slave signals input wire clk, input wire reset, - input wire clk_en, input wire s_axi_awvalid, input wire [AXI_ADDR_WIDTH-1:0] s_axi_awaddr, @@ -191,7 +190,7 @@ module VX_afu_ctrl #( cmd_scope_writing <= 0; scope_bus_ctr <= '0; scope_bus_out_r <= 0; - end else if (clk_en) begin + end else begin if (s_axi_w_fire && waddr == ADDR_SCP_0) begin scope_bus_wdata[31:0] <= (s_axi_wdata & wmask) | (scope_bus_wdata[31:0] & ~wmask); end @@ -244,7 +243,7 @@ module VX_afu_ctrl #( always @(posedge clk) begin if (reset) begin wstate <= WSTATE_IDLE; - end else if (clk_en) begin + end else begin case (wstate) WSTATE_IDLE: wstate <= s_axi_awvalid ? WSTATE_DATA : WSTATE_IDLE; WSTATE_DATA: wstate <= s_axi_wvalid ? WSTATE_RESP : WSTATE_DATA; @@ -256,10 +255,8 @@ module VX_afu_ctrl #( // waddr always @(posedge clk) begin - if (clk_en) begin - if (s_axi_aw_fire) - waddr <= s_axi_awaddr[ADDR_BITS-1:0]; - end + if (s_axi_aw_fire) + waddr <= s_axi_awaddr[ADDR_BITS-1:0]; end // wdata @@ -280,12 +277,13 @@ module VX_afu_ctrl #( for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin mem_r[i] <= '0; end - end else if (clk_en) begin + end else begin + dcr_wr_valid_r <= 0; + ap_reset_r <= 0; + if (ap_ready) ap_start_r <= auto_restart_r; - dcr_wr_valid_r <= 0; - if (s_axi_w_fire) begin case (waddr) ADDR_AP_CTRL: begin @@ -351,7 +349,7 @@ module VX_afu_ctrl #( always @(posedge clk) begin if (reset) begin rstate <= RSTATE_IDLE; - end else if (clk_en) begin + end else begin case (rstate) RSTATE_IDLE: rstate <= s_axi_arvalid ? RSTATE_DATA : RSTATE_IDLE; RSTATE_DATA: rstate <= (s_axi_rready & s_axi_rvalid) ? RSTATE_IDLE : RSTATE_DATA; @@ -362,49 +360,47 @@ module VX_afu_ctrl #( // rdata always @(posedge clk) begin - if (clk_en) begin - if (s_axi_ar_fire) begin - rdata <= '0; - case (raddr) - ADDR_AP_CTRL: begin - rdata[0] <= ap_start_r; - rdata[1] <= ap_done; - rdata[2] <= ap_idle; - rdata[3] <= ap_ready; - rdata[7] <= auto_restart_r; - end - ADDR_GIE: begin - rdata <= 32'(gie_r); - end - ADDR_IER: begin - rdata <= 32'(ier_r); - end - ADDR_ISR: begin - rdata <= 32'(isr_r); - end - ADDR_DEV_0: begin - rdata <= dev_caps[31:0]; - end - ADDR_DEV_1: begin - rdata <= dev_caps[63:32]; - end - ADDR_ISA_0: begin - rdata <= isa_caps[31:0]; - end - ADDR_ISA_1: begin - rdata <= isa_caps[63:32]; - end - `ifdef SCOPE - ADDR_SCP_0: begin - rdata <= scope_bus_rdata[31:0]; - end - ADDR_SCP_1: begin - rdata <= scope_bus_rdata[63:32]; - end - `endif - default:; - endcase - end + if (s_axi_ar_fire) begin + rdata <= '0; + case (raddr) + ADDR_AP_CTRL: begin + rdata[0] <= ap_start_r; + rdata[1] <= ap_done; + rdata[2] <= ap_idle; + rdata[3] <= ap_ready; + rdata[7] <= auto_restart_r; + end + ADDR_GIE: begin + rdata <= 32'(gie_r); + end + ADDR_IER: begin + rdata <= 32'(ier_r); + end + ADDR_ISR: begin + rdata <= 32'(isr_r); + end + ADDR_DEV_0: begin + rdata <= dev_caps[31:0]; + end + ADDR_DEV_1: begin + rdata <= dev_caps[63:32]; + end + ADDR_ISA_0: begin + rdata <= isa_caps[31:0]; + end + ADDR_ISA_1: begin + rdata <= isa_caps[63:32]; + end + `ifdef SCOPE + ADDR_SCP_0: begin + rdata <= scope_bus_rdata[31:0]; + end + ADDR_SCP_1: begin + rdata <= scope_bus_rdata[63:32]; + end + `endif + default:; + endcase end end diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index a844802e9..e1ba82126 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -87,8 +87,7 @@ module VX_afu_wrap #( reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; reg [15:0] vx_pending_writes; reg vx_busy_wait; - reg vx_running; - + reg vx_reset = 1; // asserted at initialization wire vx_busy; wire [63:0] mem_base [C_M_AXI_MEM_NUM_BANKS]; @@ -101,8 +100,8 @@ module VX_afu_wrap #( wire ap_reset; wire ap_start; - wire ap_idle = ~vx_running; - wire ap_done = ~(state == STATE_RUN || vx_pending_writes != 0); + wire ap_idle = vx_reset; + wire ap_done = (state == STATE_IDLE) && (vx_pending_writes == '0); wire ap_ready = 1'b1; `ifdef SCOPE @@ -111,54 +110,6 @@ module VX_afu_wrap #( wire scope_reset = reset; `endif - always @(posedge ap_clk) begin - if (reset || ap_reset) begin - state <= STATE_IDLE; - vx_busy_wait <= 0; - vx_running <= 0; - end else begin - case (state) - STATE_IDLE: begin - if (ap_start) begin - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE RUN\n", $time)); - `endif - state <= STATE_RUN; - vx_running <= 0; - end - end - STATE_RUN: begin - if (vx_running) begin - if (vx_busy_wait) begin - // wait until processor goes busy - if (vx_busy) begin - vx_busy_wait <= 0; - end - end else begin - // wait until the processor is not busy - if (~vx_busy) begin - state <= STATE_IDLE; - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: End execution\n", $time)); - `TRACE(2, ("%d: STATE IDLE\n", $time)); - `endif - end - end - end else begin - // wait until the reset sequence is complete - if (vx_reset_ctr == (`RESET_DELAY-1)) begin - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Begin execution\n", $time)); - `endif - vx_running <= 1; - vx_busy_wait <= 1; - end - end - end - endcase - end - end - reg m_axi_mem_wfire; reg m_axi_mem_bfire; @@ -173,8 +124,57 @@ module VX_afu_wrap #( always @(posedge ap_clk) begin if (reset || ap_reset) begin + state <= STATE_IDLE; vx_pending_writes <= '0; + vx_reset_ctr <= (`RESET_DELAY-1); + vx_reset <= 1; end else begin + case (state) + STATE_IDLE: begin + if (ap_start) begin + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%d: STATE RUN\n", $time)); + `endif + state <= STATE_RUN; + vx_reset_ctr <= 0; + vx_reset <= 1; + end + end + STATE_RUN: begin + if (vx_reset) begin + // wait until the reset network is ready + if (vx_reset_ctr == 0) begin + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%d: AFU: Begin execution\n", $time)); + `endif + vx_busy_wait <= 1; + vx_reset <= 0; + end + end else begin + if (vx_busy_wait) begin + // wait until processor goes busy + if (vx_busy) begin + vx_busy_wait <= 0; + end + end else begin + // wait until the processor is not busy + if (~vx_busy) begin + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%d: AFU: End execution\n", $time)); + `endif + state <= STATE_IDLE; + end + end + end + end + endcase + + // ensure reset network initialization + if (vx_reset_ctr != 0) begin + vx_reset_ctr <= vx_reset_ctr - 1; + end + + // track pending writes if (m_axi_mem_wfire && ~m_axi_mem_bfire) vx_pending_writes <= vx_pending_writes + 1; if (~m_axi_mem_wfire && m_axi_mem_bfire) @@ -182,22 +182,13 @@ module VX_afu_wrap #( end end - always @(posedge ap_clk) begin - if (state == STATE_RUN) begin - vx_reset_ctr <= vx_reset_ctr + 1; - end else begin - vx_reset_ctr <= '0; - end - end - VX_afu_ctrl #( .AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH), .AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), .AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) ) afu_ctrl ( .clk (ap_clk), - .reset (reset || ap_reset), - .clk_en (1'b1), + .reset (reset), .s_axi_awvalid (s_axi_ctrl_awvalid), .s_axi_awready (s_axi_ctrl_awready), @@ -255,7 +246,7 @@ module VX_afu_wrap #( `SCOPE_IO_BIND (1) .clk (ap_clk), - .reset (reset || ap_reset || ~vx_running), + .reset (vx_reset), .m_axi_awvalid (m_axi_mem_awvalid_a), .m_axi_awready (m_axi_mem_awready_a), @@ -319,7 +310,7 @@ module VX_afu_wrap #( interrupt, \ vx_busy_wait, \ vx_busy, \ - vx_running \ + vx_reset \ } `define PROBES { \ From 7cbb026a12ea414eb25195d67b92100cd660ca39 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 5 Sep 2024 21:34:44 -0700 Subject: [PATCH 231/488] minor update --- hw/rtl/core/VX_operands.sv | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index a88522ee7..e4bad5ced 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -54,8 +54,8 @@ module VX_operands import VX_gpu_pkg::*; #( `UNUSED_VAR (writeback_if.data.sop) wire [NUM_SRC_OPDS-1:0] src_valid; - wire [NUM_SRC_OPDS-1:0] req_in_valid, req_in_ready; - wire [NUM_SRC_OPDS-1:0][PER_BANK_ADDRW-1:0] req_in_data; + wire [NUM_SRC_OPDS-1:0] req_valid_in, req_ready_in; + wire [NUM_SRC_OPDS-1:0][PER_BANK_ADDRW-1:0] req_data_in; wire [NUM_SRC_OPDS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx; wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready; @@ -64,7 +64,8 @@ module VX_operands import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st2; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2; - wire pipe_valid_st1, pipe_ready_st1, pipe_in_ready; + wire pipe_ready_in; + wire pipe_valid_st1, pipe_ready_st1; wire pipe_valid_st2, pipe_ready_st2; wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2; @@ -82,9 +83,9 @@ module VX_operands import VX_gpu_pkg::*; #( for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin if (ISSUE_WIS != 0) begin - assign req_in_data[i] = {src_opds[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis}; + assign req_data_in[i] = {src_opds[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis}; end else begin - assign req_in_data[i] = src_opds[i][`NR_BITS-1:BANK_SEL_BITS]; + assign req_data_in[i] = src_opds[i][`NR_BITS-1:BANK_SEL_BITS]; end if (NUM_BANKS != 1) begin assign req_bank_idx[i] = src_opds[i][BANK_SEL_BITS-1:0]; @@ -97,7 +98,7 @@ module VX_operands import VX_gpu_pkg::*; #( assign src_valid[i] = (src_opds[i] != 0) && ~data_fetched_st1[i]; end - assign req_in_valid = {NUM_SRC_OPDS{scoreboard_if.valid}} & src_valid; + assign req_valid_in = {NUM_SRC_OPDS{scoreboard_if.valid}} & src_valid; VX_stream_xbar #( .NUM_INPUTS (NUM_SRC_OPDS), @@ -110,17 +111,17 @@ module VX_operands import VX_gpu_pkg::*; #( .clk (clk), .reset (reset), `UNUSED_PIN(collisions), - .valid_in (req_in_valid), - .data_in (req_in_data), + .valid_in (req_valid_in), + .data_in (req_data_in), .sel_in (req_bank_idx), - .ready_in (req_in_ready), + .ready_in (req_ready_in), .valid_out (gpr_rd_valid), .data_out (gpr_rd_addr), .sel_out (gpr_rd_req_idx), .ready_out (gpr_rd_ready) ); - assign gpr_rd_ready = {NUM_BANKS{pipe_in_ready}}; + assign gpr_rd_ready = {NUM_BANKS{pipe_ready_in}}; always @(*) begin has_collision_n = 0; @@ -138,7 +139,7 @@ module VX_operands import VX_gpu_pkg::*; #( if (scoreboard_if.ready) begin data_fetched_n = '0; end else begin - data_fetched_n = data_fetched_st1 | req_in_ready; + data_fetched_n = data_fetched_st1 | req_ready_in; end end @@ -154,7 +155,7 @@ module VX_operands import VX_gpu_pkg::*; #( scoreboard_if.data.uuid }; - assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n; + assign scoreboard_if.ready = pipe_ready_in && ~has_collision_n; wire pipe_fire_st1 = pipe_valid_st1 && pipe_ready_st1; wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2; @@ -166,7 +167,7 @@ module VX_operands import VX_gpu_pkg::*; #( .clk (clk), .reset (reset), .valid_in (scoreboard_if.valid), - .ready_in (pipe_in_ready), + .ready_in (pipe_ready_in), .data_in ({data_fetched_n, gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}), .data_out ({data_fetched_st1, gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1}), .valid_out(pipe_valid_st1), @@ -285,7 +286,7 @@ module VX_operands import VX_gpu_pkg::*; #( if (reset) begin collisions_r <= '0; end else begin - collisions_r <= collisions_r + `PERF_CTR_BITS'(scoreboard_if.valid && pipe_in_ready && has_collision_n); + collisions_r <= collisions_r + `PERF_CTR_BITS'(scoreboard_if.valid && pipe_ready_in && has_collision_n); end end assign perf_stalls = collisions_r; From e178eb13300ba92a57099abdcac488f778be90b9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 5 Sep 2024 21:35:10 -0700 Subject: [PATCH 232/488] operands's x-propagation bug fix (caught using vivado simulator) --- hw/rtl/core/VX_operands.sv | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index e4bad5ced..ef98ea79e 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -134,12 +134,14 @@ module VX_operands import VX_gpu_pkg::*; #( end end + wire [NUM_SRC_OPDS-1:0] req_fire_in = req_valid_in & req_ready_in; + always @(*) begin data_fetched_n = data_fetched_st1; if (scoreboard_if.ready) begin data_fetched_n = '0; end else begin - data_fetched_n = data_fetched_st1 | req_ready_in; + data_fetched_n = data_fetched_st1 | req_fire_in; end end From fdc62c5f98e43ec5974824362d7970b5fd3d05da Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 6 Sep 2024 01:27:54 -0700 Subject: [PATCH 233/488] minor update --- tests/regression/common.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/regression/common.mk b/tests/regression/common.mk index c4a00bc13..317e94b09 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -99,9 +99,9 @@ run-opae: $(PROJECT) kernel.vxbin run-xrt: $(PROJECT) kernel.vxbin ifeq ($(TARGET), hw) - XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XRT_INI_PATH=$(ROOT_DIR)/hw/syn/xilinx/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(ROOT_DIR)/hw/syn/xilinx/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) From bfbe642170790fa488adb863c58dfff34007f524 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 7 Sep 2024 01:36:17 -0700 Subject: [PATCH 234/488] adding RTL uuigen --- hw/dpi/util_dpi.cpp | 16 -------------- hw/dpi/util_dpi.vh | 2 -- hw/rtl/core/VX_schedule.sv | 37 +++++++++++++------------------- hw/rtl/core/VX_uuid_gen.sv | 43 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 57 insertions(+), 41 deletions(-) create mode 100644 hw/rtl/core/VX_uuid_gen.sv diff --git a/hw/dpi/util_dpi.cpp b/hw/dpi/util_dpi.cpp index 020816b0b..d804d4885 100644 --- a/hw/dpi/util_dpi.cpp +++ b/hw/dpi/util_dpi.cpp @@ -47,8 +47,6 @@ extern "C" { void dpi_trace(int level, const char* format, ...); void dpi_trace_start(); void dpi_trace_stop(); - - uint64_t dpi_uuid_gen(bool reset, int wid); } bool sim_trace_enabled(); @@ -204,17 +202,3 @@ void dpi_trace_start() { void dpi_trace_stop() { sim_trace_enable(false); } - -/////////////////////////////////////////////////////////////////////////////// - -std::unordered_map g_uuid_gens; - -uint64_t dpi_uuid_gen(bool reset, int wid) { - if (reset) { - g_uuid_gens.clear(); - return 0; - } - uint32_t instr_uuid = g_uuid_gens[wid]++; - uint64_t uuid = (uint64_t(wid) << 32) | instr_uuid; - return uuid; -} \ No newline at end of file diff --git a/hw/dpi/util_dpi.vh b/hw/dpi/util_dpi.vh index 0da62b041..74b095af1 100644 --- a/hw/dpi/util_dpi.vh +++ b/hw/dpi/util_dpi.vh @@ -30,6 +30,4 @@ import "DPI-C" function void dpi_trace(input int level, input string format /*ve import "DPI-C" function void dpi_trace_start(); import "DPI-C" function void dpi_trace_stop(); -import "DPI-C" function longint dpi_uuid_gen(input logic reset, input int wid); - `endif diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index af0ee5621..6916d3e00 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -331,30 +331,23 @@ module VX_schedule import VX_gpu_pkg::*; #( schedule_data[schedule_wid][(`NUM_THREADS + `PC_BITS)-5:0] }; + wire [`UUID_WIDTH-1:0] instr_uuid; `ifndef NDEBUG - localparam GNW_WIDTH = `LOG2UP(`NUM_CLUSTERS * `NUM_CORES * `NUM_WARPS); - reg [`UUID_WIDTH-1:0] instr_uuid; - wire [GNW_WIDTH-1:0] g_wid = (GNW_WIDTH'(CORE_ID) << `NW_BITS) + GNW_WIDTH'(schedule_wid); -`ifdef SV_DPI - always @(posedge clk) begin - if (reset) begin - instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(1, 32'd0)); - end else if (schedule_fire) begin - instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(0, 32'(g_wid))); - end - end + VX_uuid_gen #( + .CORE_ID (CORE_ID) + ) uuid_gen ( + .clk (clk), + .reset (reset), + .incr (schedule_fire), + .wid (schedule_wid), + .uuid (instr_uuid) + ); `else - wire [GNW_WIDTH+16-1:0] w_uuid = {g_wid, 16'(schedule_pc)}; - always @(*) begin - instr_uuid = `UUID_WIDTH'(w_uuid); - end -`endif -`else - wire [`UUID_WIDTH-1:0] instr_uuid = '0; + assign instr_uuid = '0; `endif VX_elastic_buffer #( - .DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH), + .DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH + `UUID_WIDTH), .SIZE (2), // need to buffer out ready_in .OUT_REG (1) // should be registered for BRAM acces in fetch unit ) out_buf ( @@ -362,14 +355,12 @@ module VX_schedule import VX_gpu_pkg::*; #( .reset (reset), .valid_in (schedule_valid), .ready_in (schedule_ready), - .data_in ({schedule_tmask, schedule_pc, schedule_wid}), - .data_out ({schedule_if.data.tmask, schedule_if.data.PC, schedule_if.data.wid}), + .data_in ({schedule_tmask, schedule_pc, schedule_wid, instr_uuid}), + .data_out ({schedule_if.data.tmask, schedule_if.data.PC, schedule_if.data.wid, schedule_if.data.uuid}), .valid_out (schedule_if.valid), .ready_out (schedule_if.ready) ); - assign schedule_if.data.uuid = instr_uuid; - // Track pending instructions per warp wire [`NUM_WARPS-1:0] pending_warp_empty; diff --git a/hw/rtl/core/VX_uuid_gen.sv b/hw/rtl/core/VX_uuid_gen.sv new file mode 100644 index 000000000..8dca50e91 --- /dev/null +++ b/hw/rtl/core/VX_uuid_gen.sv @@ -0,0 +1,43 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_uuid_gen import VX_gpu_pkg::*; #( + parameter CORE_ID = 0 +) ( + input wire clk, + input wire reset, + input wire incr, + input wire [`NW_WIDTH-1:0] wid, + output wire [`UUID_WIDTH-1:0] uuid +); + localparam GNW_WIDTH = `UUID_WIDTH - 32; + reg [31:0] uuid_cntrs [0:`NUM_WARPS-1]; + reg [`NUM_WARPS-1:0] has_uuid_cntrs; + + always @(posedge clk) begin + if (reset) begin + has_uuid_cntrs <= '0; + end else if (incr) begin + has_uuid_cntrs[wid] <= 1; + end + if (incr) begin + uuid_cntrs[wid] <= has_uuid_cntrs[wid] ? (uuid_cntrs[wid] + 1) : 1; + end + end + + wire [GNW_WIDTH-1:0] g_wid = (GNW_WIDTH'(CORE_ID) << `NW_BITS) + GNW_WIDTH'(wid); + assign uuid = {g_wid, (has_uuid_cntrs[wid] ? uuid_cntrs[wid] : 0)}; + +endmodule From 2041a4ad4ad78baa5afc587a34c8f41f1ea15105 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 7 Sep 2024 01:43:30 -0700 Subject: [PATCH 235/488] xrt.ini update --- {hw/syn/xilinx => runtime}/xrt/xrt.ini.in | 2 +- {hw/syn/xilinx/scripts => runtime/xrt}/xsim.tcl | 0 tests/opencl/common.mk | 4 ++-- tests/regression/common.mk | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) rename {hw/syn/xilinx => runtime}/xrt/xrt.ini.in (65%) rename {hw/syn/xilinx/scripts => runtime/xrt}/xsim.tcl (100%) diff --git a/hw/syn/xilinx/xrt/xrt.ini.in b/runtime/xrt/xrt.ini.in similarity index 65% rename from hw/syn/xilinx/xrt/xrt.ini.in rename to runtime/xrt/xrt.ini.in index 99511f884..90affb447 100644 --- a/hw/syn/xilinx/xrt/xrt.ini.in +++ b/runtime/xrt/xrt.ini.in @@ -3,7 +3,7 @@ runtime_log=console [Emulation] debug_mode=batch -user_pre_sim_script=@VORTEX_HOME@/hw/syn/xilinx/scripts/xsim.tcl +user_pre_sim_script=@VORTEX_HOME@/runtime/xrt/xsim.tcl [Debug] profile=true diff --git a/hw/syn/xilinx/scripts/xsim.tcl b/runtime/xrt/xsim.tcl similarity index 100% rename from hw/syn/xilinx/scripts/xsim.tcl rename to runtime/xrt/xsim.tcl diff --git a/tests/opencl/common.mk b/tests/opencl/common.mk index dd5af90db..8173a2535 100644 --- a/tests/opencl/common.mk +++ b/tests/opencl/common.mk @@ -102,9 +102,9 @@ run-opae: $(PROJECT) $(KERNEL_SRCS) run-xrt: $(PROJECT) $(KERNEL_SRCS) ifeq ($(TARGET), hw) - XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) diff --git a/tests/regression/common.mk b/tests/regression/common.mk index 317e94b09..4edc5c859 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -99,9 +99,9 @@ run-opae: $(PROJECT) kernel.vxbin run-xrt: $(PROJECT) kernel.vxbin ifeq ($(TARGET), hw) - XRT_INI_PATH=$(ROOT_DIR)/hw/syn/xilinx/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(ROOT_DIR)/hw/syn/xilinx/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) From a75ed78bf2274d5ec64ef049803a8d4a74bcf086 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 7 Sep 2024 03:42:46 -0700 Subject: [PATCH 236/488] fixed getopt exitcode with invalid parameters --- sim/rtlsim/main.cpp | 8 +++----- sim/simx/main.cpp | 11 +++++------ tests/opencl/conv3/main.cc | 5 ++--- tests/opencl/oclprintf/main.cc | 5 ++--- tests/opencl/psort/main.cc | 5 ++--- tests/opencl/psum/main.cc | 5 ++--- tests/opencl/saxpy/main.cc | 5 ++--- tests/opencl/sfilter/main.cc | 5 ++--- tests/opencl/sgemm/main.cc | 5 ++--- tests/opencl/sgemm2/main.cc | 5 ++--- tests/opencl/sgemm3/main.cc | 5 ++--- tests/opencl/vecadd/main.cc | 5 ++--- tests/regression/basic/main.cpp | 5 ++--- tests/regression/conv3x/main.cpp | 5 ++--- tests/regression/demo/main.cpp | 5 ++--- tests/regression/diverge/main.cpp | 5 ++--- tests/regression/dogfood/main.cpp | 5 ++--- tests/regression/fence/main.cpp | 5 ++--- tests/regression/io_addr/main.cpp | 5 ++--- tests/regression/mstress/main.cpp | 5 ++--- tests/regression/printf/main.cpp | 5 ++--- tests/regression/sgemm2x/main.cpp | 5 ++--- tests/regression/sgemmx/main.cpp | 5 ++--- tests/regression/sort/main.cpp | 5 ++--- tests/regression/stencil3d/main.cpp | 9 +++------ tests/regression/vecaddx/main.cpp | 5 ++--- 26 files changed, 57 insertions(+), 86 deletions(-) diff --git a/sim/rtlsim/main.cpp b/sim/rtlsim/main.cpp index ea0ba9b95..16ce79550 100644 --- a/sim/rtlsim/main.cpp +++ b/sim/rtlsim/main.cpp @@ -33,13 +33,11 @@ const char* program = nullptr; static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "rh?")) != -1) { + while ((c = getopt(argc, argv, "rh")) != -1) { switch (c) { case 'h': - case '?': - show_usage(); - exit(0); - break; + show_usage(); + exit(0); default: show_usage(); exit(-1); diff --git a/sim/simx/main.cpp b/sim/simx/main.cpp index a8883c696..6e4c96a82 100644 --- a/sim/simx/main.cpp +++ b/sim/simx/main.cpp @@ -40,7 +40,7 @@ const char* program = nullptr; static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "t:w:c:rsh?")) != -1) { + while ((c = getopt(argc, argv, "t:w:c:rsh")) != -1) { switch (c) { case 't': num_threads = atoi(optarg); @@ -55,13 +55,12 @@ static void parse_args(int argc, char **argv) { showStats = true; break; case 'h': - case '?': - show_usage(); - exit(0); + show_usage(); + exit(0); break; default: - show_usage(); - exit(-1); + show_usage(); + exit(-1); } } diff --git a/tests/opencl/conv3/main.cc b/tests/opencl/conv3/main.cc index cda8e74ac..1220dabdb 100644 --- a/tests/opencl/conv3/main.cc +++ b/tests/opencl/conv3/main.cc @@ -116,16 +116,15 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/oclprintf/main.cc b/tests/opencl/oclprintf/main.cc index ef82a33e5..c23e6dec0 100644 --- a/tests/opencl/oclprintf/main.cc +++ b/tests/opencl/oclprintf/main.cc @@ -81,16 +81,15 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/psort/main.cc b/tests/opencl/psort/main.cc index e0bd49b8e..8ecfdc523 100644 --- a/tests/opencl/psort/main.cc +++ b/tests/opencl/psort/main.cc @@ -87,7 +87,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "fn:h?")) != -1) { + while ((c = getopt(argc, argv, "fn:h")) != -1) { switch (c) { case 'f': float_enable = 1; @@ -96,10 +96,9 @@ static void parse_args(int argc, char **argv) { size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/psum/main.cc b/tests/opencl/psum/main.cc index 749d40619..5606de8c5 100644 --- a/tests/opencl/psum/main.cc +++ b/tests/opencl/psum/main.cc @@ -104,7 +104,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:l:h?")) != -1) { + while ((c = getopt(argc, argv, "n:l:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -113,10 +113,9 @@ static void parse_args(int argc, char **argv) { local_size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/saxpy/main.cc b/tests/opencl/saxpy/main.cc index 2d896e6a9..2397c720e 100644 --- a/tests/opencl/saxpy/main.cc +++ b/tests/opencl/saxpy/main.cc @@ -126,13 +126,12 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; - case 'h': - case '?': { + case 'h':{ show_usage(); exit(0); } break; diff --git a/tests/opencl/sfilter/main.cc b/tests/opencl/sfilter/main.cc index b9d2356b2..97cfb689e 100644 --- a/tests/opencl/sfilter/main.cc +++ b/tests/opencl/sfilter/main.cc @@ -124,16 +124,15 @@ int size = 16; static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/sgemm/main.cc b/tests/opencl/sgemm/main.cc index 41c1bc5e8..31f99d2e4 100644 --- a/tests/opencl/sgemm/main.cc +++ b/tests/opencl/sgemm/main.cc @@ -147,16 +147,15 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/sgemm2/main.cc b/tests/opencl/sgemm2/main.cc index 595a9fc51..c4ca06fdb 100644 --- a/tests/opencl/sgemm2/main.cc +++ b/tests/opencl/sgemm2/main.cc @@ -147,16 +147,15 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/sgemm3/main.cc b/tests/opencl/sgemm3/main.cc index 570cee9ae..24dd39752 100644 --- a/tests/opencl/sgemm3/main.cc +++ b/tests/opencl/sgemm3/main.cc @@ -148,7 +148,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:t:h?")) != -1) { + while ((c = getopt(argc, argv, "n:t:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -157,10 +157,9 @@ static void parse_args(int argc, char **argv) { tile_size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/vecadd/main.cc b/tests/opencl/vecadd/main.cc index e1316ad3f..190d29450 100644 --- a/tests/opencl/vecadd/main.cc +++ b/tests/opencl/vecadd/main.cc @@ -141,16 +141,15 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/basic/main.cpp b/tests/regression/basic/main.cpp index 73f3e29a2..575333c4b 100755 --- a/tests/regression/basic/main.cpp +++ b/tests/regression/basic/main.cpp @@ -38,7 +38,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:t:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:t:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -50,10 +50,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/conv3x/main.cpp b/tests/regression/conv3x/main.cpp index d5f8b4e81..3a0e192fb 100644 --- a/tests/regression/conv3x/main.cpp +++ b/tests/regression/conv3x/main.cpp @@ -109,7 +109,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:lh?")) != -1) { + while ((c = getopt(argc, argv, "n:k:lh")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -121,10 +121,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/demo/main.cpp b/tests/regression/demo/main.cpp index 4947cb64f..3fdd03601 100644 --- a/tests/regression/demo/main.cpp +++ b/tests/regression/demo/main.cpp @@ -87,7 +87,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -95,8 +95,7 @@ static void parse_args(int argc, char **argv) { case 'k': kernel_file = optarg; break; - case 'h': - case '?': { + case 'h':{ show_usage(); exit(0); } break; diff --git a/tests/regression/diverge/main.cpp b/tests/regression/diverge/main.cpp index fc4384610..d858b1729 100644 --- a/tests/regression/diverge/main.cpp +++ b/tests/regression/diverge/main.cpp @@ -35,7 +35,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -44,10 +44,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/dogfood/main.cpp b/tests/regression/dogfood/main.cpp index d308821f0..f2922c632 100644 --- a/tests/regression/dogfood/main.cpp +++ b/tests/regression/dogfood/main.cpp @@ -35,7 +35,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:t:x:s:e:k:ch?")) != -1) { + while ((c = getopt(argc, argv, "n:t:x:s:e:k:ch")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -59,10 +59,9 @@ static void parse_args(int argc, char **argv) { stop_on_error = false; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/fence/main.cpp b/tests/regression/fence/main.cpp index ead4ad551..716036b11 100644 --- a/tests/regression/fence/main.cpp +++ b/tests/regression/fence/main.cpp @@ -35,7 +35,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -44,10 +44,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/io_addr/main.cpp b/tests/regression/io_addr/main.cpp index 602064ffe..78d7cf56f 100644 --- a/tests/regression/io_addr/main.cpp +++ b/tests/regression/io_addr/main.cpp @@ -42,7 +42,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -51,10 +51,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/mstress/main.cpp b/tests/regression/mstress/main.cpp index 7bf0dbe0e..5a1f0d300 100644 --- a/tests/regression/mstress/main.cpp +++ b/tests/regression/mstress/main.cpp @@ -83,7 +83,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -92,10 +92,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/printf/main.cpp b/tests/regression/printf/main.cpp index 18d778c4b..eefa32592 100644 --- a/tests/regression/printf/main.cpp +++ b/tests/regression/printf/main.cpp @@ -33,7 +33,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -42,10 +42,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/sgemm2x/main.cpp b/tests/regression/sgemm2x/main.cpp index 3da359ee5..f10f8fcd1 100644 --- a/tests/regression/sgemm2x/main.cpp +++ b/tests/regression/sgemm2x/main.cpp @@ -103,7 +103,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:t:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:t:k:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -115,10 +115,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/sgemmx/main.cpp b/tests/regression/sgemmx/main.cpp index 4c2b18c30..b31af9b04 100644 --- a/tests/regression/sgemmx/main.cpp +++ b/tests/regression/sgemmx/main.cpp @@ -99,7 +99,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -108,10 +108,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/sort/main.cpp b/tests/regression/sort/main.cpp index 19e9aee50..032ce18df 100644 --- a/tests/regression/sort/main.cpp +++ b/tests/regression/sort/main.cpp @@ -34,7 +34,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -43,10 +43,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/stencil3d/main.cpp b/tests/regression/stencil3d/main.cpp index 0536effc0..5a5fcc716 100644 --- a/tests/regression/stencil3d/main.cpp +++ b/tests/regression/stencil3d/main.cpp @@ -128,7 +128,7 @@ static void stencil_cpu(TYPE *out, const TYPE *in, uint32_t width, uint32_t heig {ny = 0;} else if (ny >= (int)height) {ny = height - 1;} - + if (nz < 0) {nz = 0;} else if (nz >= (int)depth) @@ -168,7 +168,7 @@ static void show_usage() static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:t:k:h?")) != -1) + while ((c = getopt(argc, argv, "n:t:k:h")) != -1) { switch (c) { @@ -182,12 +182,9 @@ static void parse_args(int argc, char **argv) kernel_file = optarg; break; case 'h': - case '?': - { show_usage(); exit(0); - } - break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/vecaddx/main.cpp b/tests/regression/vecaddx/main.cpp index d80e2fdc1..4a79861d3 100644 --- a/tests/regression/vecaddx/main.cpp +++ b/tests/regression/vecaddx/main.cpp @@ -87,7 +87,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -96,10 +96,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); From aa1489d8ebbae963f74339a2c53c5c74082ae328 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 7 Sep 2024 03:45:23 -0700 Subject: [PATCH 237/488] fixed trace.vcd copy --- ci/blackbox.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/ci/blackbox.sh b/ci/blackbox.sh index 5c0dfbde1..51639b201 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -61,11 +61,11 @@ parse_args() { --driver=*) DRIVER=${i#*=} ;; --app=*) APP=${i#*=} ;; --clusters=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CLUSTERS=${i#*=}") ;; - --cores=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CORES=${i#*=}") ;; - --warps=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_WARPS=${i#*=}") ;; + --cores=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CORES=${i#*=}") ;; + --warps=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_WARPS=${i#*=}") ;; --threads=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_THREADS=${i#*=}") ;; - --l2cache) CONFIGS=$(add_option "$CONFIGS" "-DL2_ENABLE") ;; - --l3cache) CONFIGS=$(add_option "$CONFIGS" "-DL3_ENABLE") ;; + --l2cache) CONFIGS=$(add_option "$CONFIGS" "-DL2_ENABLE") ;; + --l3cache) CONFIGS=$(add_option "$CONFIGS" "-DL3_ENABLE") ;; --perf=*) CONFIGS=$(add_option "$CONFIGS" "-DPERF_ENABLE"); PERF_CLASS=${i#*=} ;; --debug=*) DEBUG=1; DEBUG_LEVEL=${i#*=} ;; --scope) SCOPE=1; ;; @@ -143,7 +143,7 @@ run_app() { fi fi status=$? - exit $status + return $status } main() { @@ -154,7 +154,7 @@ main() { # execute on default installed GPU if [ "$DRIVER" = "gpu" ]; then run_app - exit $status + exit $? fi if [ -n "$CONFIGS" ]; then @@ -189,6 +189,7 @@ main() { build_driver run_app + status=$? if [ $DEBUG -eq 1 ] && [ -f "$APP_PATH/trace.vcd" ]; then mv -f $APP_PATH/trace.vcd . From 0cbdc3be9e654226f3d506d08b0312ebcb356355 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 7 Sep 2024 21:32:11 -0700 Subject: [PATCH 238/488] opae afu x warning fixes --- hw/rtl/afu/opae/vortex_afu.sv | 160 ++++++++++++++++++---------------- 1 file changed, 83 insertions(+), 77 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index e5ff16483..ffc0af282 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -64,6 +64,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ localparam AFU_ID_L = 16'h0002; // AFU ID Lower localparam AFU_ID_H = 16'h0004; // AFU ID Higher + localparam CMD_IDLE = 0; localparam CMD_MEM_READ = `AFU_IMAGE_CMD_MEM_READ; localparam CMD_MEM_WRITE = `AFU_IMAGE_CMD_MEM_WRITE; localparam CMD_DCR_WRITE = `AFU_IMAGE_CMD_DCR_WRITE; @@ -139,14 +140,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // MMIO controller //////////////////////////////////////////////////////////// - t_ccip_c0_ReqMmioHdr mmio_hdr; - assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr); - `UNUSED_VAR (mmio_hdr) + t_ccip_c0_ReqMmioHdr mmio_req_hdr; + assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr[$bits(t_ccip_c0_ReqMmioHdr)-1:0]); + `UNUSED_VAR (mmio_req_hdr) - `STATIC_ASSERT(($bits(t_ccip_c0_ReqMmioHdr)-$bits(mmio_hdr.address)) == 12, ("Oops!")) - - t_if_ccip_c2_Tx mmio_tx; - assign af2cp_sTxPort.c2 = mmio_tx; + t_if_ccip_c2_Tx mmio_rsp; + assign af2cp_sTxPort.c2 = mmio_rsp; `ifdef SCOPE @@ -178,7 +177,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ end scope_bus_in <= 0; if (cp2af_sRxPort.c0.mmioWrValid - && (MMIO_SCOPE_WRITE == mmio_hdr.address)) begin + && (MMIO_SCOPE_WRITE == mmio_req_hdr.address)) begin cmd_scope_wdata <= 64'(cp2af_sRxPort.c0.data); cmd_scope_writing <= 1; scope_bus_ctr <= 63; @@ -206,6 +205,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire [COUT_QUEUE_DATAW-1:0] cout_q_dout; wire cout_q_full, cout_q_empty; + wire [COUT_QUEUE_DATAW-1:0] cout_q_dout_s = cout_q_dout & {COUT_QUEUE_DATAW{!cout_q_empty}}; + `ifdef SIMULATION `ifndef VERILATOR // disable assertions until full reset @@ -226,17 +227,79 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ `endif `endif + // MMIO controller //////////////////////////////////////////////////////////// + + // Handle MMIO read requests always @(posedge clk) begin if (reset) begin - mmio_tx.mmioRdValid <= 0; - mmio_tx.hdr <= '0; + mmio_rsp.mmioRdValid <= 0; end else begin - mmio_tx.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid; - mmio_tx.hdr.tid <= mmio_hdr.tid; + mmio_rsp.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid; end - // serve MMIO write request + + mmio_rsp.hdr.tid <= mmio_req_hdr.tid; + + if (cp2af_sRxPort.c0.mmioRdValid) begin + case (mmio_req_hdr.address) + // AFU header + 16'h0000: mmio_rsp.data <= { + 4'b0001, // Feature type = AFU + 8'b0, // reserved + 4'b0, // afu minor revision = 0 + 7'b0, // reserved + 1'b1, // end of DFH list = 1 + 24'b0, // next DFH offset = 0 + 4'b0, // afu major revision = 0 + 12'b0 // feature ID = 0 + }; + AFU_ID_L: mmio_rsp.data <= afu_id[63:0]; // afu id low + AFU_ID_H: mmio_rsp.data <= afu_id[127:64]; // afu id hi + 16'h0006: mmio_rsp.data <= 64'h0; // next AFU + 16'h0008: mmio_rsp.data <= 64'h0; // reserved + MMIO_STATUS: begin + mmio_rsp.data <= 64'({cout_q_dout_s, !cout_q_empty, 8'(state)}); + `ifdef DBG_TRACE_AFU + if (state != STATE_WIDTH'(mmio_rsp.data)) begin + `TRACE(2, ("%d: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state)); + end + `endif + end + `ifdef SCOPE + MMIO_SCOPE_READ: begin + mmio_rsp.data <= cmd_scope_rdata; + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata)); + `endif + end + `endif + MMIO_DEV_CAPS: begin + mmio_rsp.data <= dev_caps; + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps)); + `endif + end + MMIO_ISA_CAPS: begin + mmio_rsp.data <= isa_caps; + `ifdef DBG_TRACE_AFU + if (state != STATE_WIDTH'(mmio_rsp.data)) begin + `TRACE(2, ("%d: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps)); + end + `endif + end + default: begin + mmio_rsp.data <= 64'h0; + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%d: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_req_hdr.address)); + `endif + end + endcase + end + end + + // Handle MMIO write requests + always @(posedge clk) begin if (cp2af_sRxPort.c0.mmioWrValid) begin - case (mmio_hdr.address) + case (mmio_req_hdr.address) MMIO_CMD_ARG0: begin cmd_args[0] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU @@ -269,68 +332,11 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ `endif default: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data))); + `TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_req_hdr.address, 64'(cp2af_sRxPort.c0.data))); `endif end endcase end - - // serve MMIO read requests - if (cp2af_sRxPort.c0.mmioRdValid) begin - case (mmio_hdr.address) - // AFU header - 16'h0000: mmio_tx.data <= { - 4'b0001, // Feature type = AFU - 8'b0, // reserved - 4'b0, // afu minor revision = 0 - 7'b0, // reserved - 1'b1, // end of DFH list = 1 - 24'b0, // next DFH offset = 0 - 4'b0, // afu major revision = 0 - 12'b0 // feature ID = 0 - }; - AFU_ID_L: mmio_tx.data <= afu_id[63:0]; // afu id low - AFU_ID_H: mmio_tx.data <= afu_id[127:64]; // afu id hi - 16'h0006: mmio_tx.data <= 64'h0; // next AFU - 16'h0008: mmio_tx.data <= 64'h0; // reserved - MMIO_STATUS: begin - mmio_tx.data <= 64'({cout_q_dout, !cout_q_empty, 8'(state)}); - `ifdef DBG_TRACE_AFU - if (state != STATE_WIDTH'(mmio_tx.data)) begin - `TRACE(2, ("%d: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_hdr.address, state)); - end - `endif - end - `ifdef SCOPE - MMIO_SCOPE_READ: begin - mmio_tx.data <= cmd_scope_rdata; - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata)); - `endif - end - `endif - MMIO_DEV_CAPS: begin - mmio_tx.data <= dev_caps; - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps)); - `endif - end - MMIO_ISA_CAPS: begin - mmio_tx.data <= isa_caps; - `ifdef DBG_TRACE_AFU - if (state != STATE_WIDTH'(mmio_tx.data)) begin - `TRACE(2, ("%d: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps)); - end - `endif - end - default: begin - mmio_tx.data <= 64'h0; - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_hdr.address)); - `endif - end - endcase - end end // COMMAND FSM //////////////////////////////////////////////////////////////// @@ -351,9 +357,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ end end - wire is_mmio_wr_cmd = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hdr.address); + wire is_mmio_wr_cmd = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_req_hdr.address); wire [CMD_TYPE_WIDTH-1:0] cmd_type = is_mmio_wr_cmd ? - CMD_TYPE_WIDTH'(cp2af_sRxPort.c0.data) : CMD_TYPE_WIDTH'(0); + CMD_TYPE_WIDTH'(cp2af_sRxPort.c0.data) : CMD_TYPE_WIDTH'(CMD_IDLE); always @(posedge clk) begin if (reset) begin @@ -978,7 +984,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire cout_q_push = vx_mem_req_valid && vx_mem_is_cout && ~cout_q_full; wire cout_q_pop = cp2af_sRxPort.c0.mmioRdValid - && (mmio_hdr.address == MMIO_STATUS) + && (mmio_req_hdr.address == MMIO_STATUS) && ~cout_q_empty; VX_fifo_queue #( @@ -1051,8 +1057,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .probes({ cmd_type, state, - mmio_hdr.address, - mmio_hdr.length, + mmio_req_hdr.address, + mmio_req_hdr.length, cp2af_sRxPort.c0.hdr.mdata, af2cp_sTxPort.c0.hdr.address, af2cp_sTxPort.c0.hdr.mdata, From 1a35d3fed120c3b035a07372d2d8f2f608cac7f9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 7 Sep 2024 21:33:45 -0700 Subject: [PATCH 239/488] fixed byteen signal on memory read --- hw/rtl/VX_define.vh | 2 +- hw/rtl/cache/VX_cache_bank.sv | 2 +- hw/rtl/cache/VX_cache_data.sv | 4 ++-- hw/rtl/core/VX_fetch.sv | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 861d9f28c..69b14c748 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -361,7 +361,7 @@ assign dst.req_data.rw = 0; \ assign dst.req_data.addr = src.req_data.addr; \ assign dst.req_data.data = '0; \ - assign dst.req_data.byteen = '0; \ + assign dst.req_data.byteen = '1; \ assign dst.req_data.flags = src.req_data.flags; \ assign dst.req_data.tag = src.req_data.tag; \ assign src.req_ready = dst.req_ready; \ diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 4dff675bd..a8f8dbdf2 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -623,7 +623,7 @@ module VX_cache_bank #( end else begin assign mreq_queue_rw = 0; assign mreq_queue_data = '0; - assign mreq_queue_byteen = '0; + assign mreq_queue_byteen = '1; `UNUSED_VAR (dirty_data_st1) `UNUSED_VAR (dirty_byteen_st1) end diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 18d44b6db..302a99e5e 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -101,7 +101,7 @@ module VX_cache_data #( assign dirty_byteen = bs_rdata[way_idx]; end else begin - assign dirty_byteen = {LINE_SIZE{1'b1}}; + assign dirty_byteen = '1; end wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] flipped_rdata; @@ -112,7 +112,7 @@ module VX_cache_data #( end assign dirty_data = flipped_rdata[way_idx]; end else begin - assign dirty_byteen = '0; + assign dirty_byteen = '1; assign dirty_data = '0; end diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 44f3e51da..1da184288 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -118,7 +118,7 @@ module VX_fetch import VX_gpu_pkg::*; #( assign icache_bus_if.req_data.flags = '0; assign icache_bus_if.req_data.rw = 0; - assign icache_bus_if.req_data.byteen = 4'b1111; + assign icache_bus_if.req_data.byteen = '1; assign icache_bus_if.req_data.data = '0; // Icache Response From 7bef62aef81c522477a08bd899fe41f0c388899b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 01:37:20 -0700 Subject: [PATCH 240/488] minor update --- hw/rtl/VX_gpu_pkg.sv | 30 ++++++++++++++++-------------- hw/rtl/Vortex.sv | 6 ++++++ hw/rtl/afu/opae/vortex_afu.vh | 8 ++++---- tests/regression/demo/common.h | 4 ++-- 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/hw/rtl/VX_gpu_pkg.sv b/hw/rtl/VX_gpu_pkg.sv index f94714d06..1a55a18fe 100644 --- a/hw/rtl/VX_gpu_pkg.sv +++ b/hw/rtl/VX_gpu_pkg.sv @@ -461,6 +461,21 @@ package VX_gpu_pkg; endcase end end + `EX_SFU: begin + case (`INST_SFU_BITS'(op_type)) + `INST_SFU_TMC: `TRACE(level, ("TMC")); + `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")); + `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end + `INST_SFU_JOIN: `TRACE(level, ("JOIN")); + `INST_SFU_BAR: `TRACE(level, ("BAR")); + `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end + `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end + `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end + `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end + default: `TRACE(level, ("?")); + endcase + end + `ifdef EXT_F_ENABLE `EX_FPU: begin case (`INST_FPU_BITS'(op_type)) `INST_FPU_ADD: begin @@ -632,20 +647,7 @@ package VX_gpu_pkg; default: `TRACE(level, ("?")); endcase end - `EX_SFU: begin - case (`INST_SFU_BITS'(op_type)) - `INST_SFU_TMC: `TRACE(level, ("TMC")); - `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")); - `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end - `INST_SFU_JOIN: `TRACE(level, ("JOIN")); - `INST_SFU_BAR: `TRACE(level, ("BAR")); - `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end - `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end - `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end - `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end - default: `TRACE(level, ("?")); - endcase - end + `endif default: `TRACE(level, ("?")); endcase endtask diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 875faf47e..8f171a486 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -189,6 +189,12 @@ module Vortex import VX_gpu_pkg::*; ( `endif + // dump device configuration + initial begin + `TRACE(0, ("CONFIGS: num_threads=%0d, num_warps=%0d, num_cores=%0d, num_clusters=%0d, socket_size=%0d, local_mem_base=0x%0h, num_barriers=%0d\n", + `NUM_THREADS, `NUM_WARPS, `NUM_CORES, `NUM_CLUSTERS, `SOCKET_SIZE, `LMEM_BASE_ADDR, `NUM_BARRIERS)); + end + `ifdef DBG_TRACE_MEM always @(posedge clk) begin if (mem_req_fire) begin diff --git a/hw/rtl/afu/opae/vortex_afu.vh b/hw/rtl/afu/opae/vortex_afu.vh index 6aa532983..31f09ae90 100644 --- a/hw/rtl/afu/opae/vortex_afu.vh +++ b/hw/rtl/afu/opae/vortex_afu.vh @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,9 +17,9 @@ `define AFU_ACCEL_NAME "vortex_afu" `define AFU_ACCEL_UUID 128'h35F9452B_25C2_434C_93D5_6F8C60DB361C -`define AFU_IMAGE_CMD_MEM_READ 1 +`define AFU_IMAGE_CMD_MEM_READ 1 `define AFU_IMAGE_CMD_MEM_WRITE 2 -`define AFU_IMAGE_CMD_RUN 3 +`define AFU_IMAGE_CMD_RUN 3 `define AFU_IMAGE_CMD_DCR_WRITE 4 `define AFU_IMAGE_CMD_MAX_VALUE 4 diff --git a/tests/regression/demo/common.h b/tests/regression/demo/common.h index 98b8ff587..be200ec04 100644 --- a/tests/regression/demo/common.h +++ b/tests/regression/demo/common.h @@ -2,7 +2,7 @@ #define _COMMON_H_ #ifndef TYPE -#define TYPE float +#define TYPE int #endif typedef struct { @@ -10,7 +10,7 @@ typedef struct { uint32_t task_size; uint64_t src0_addr; uint64_t src1_addr; - uint64_t dst_addr; + uint64_t dst_addr; } kernel_arg_t; #endif From 7823f5529cf7c677232000af9947880a2c9af4cd Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 01:38:48 -0700 Subject: [PATCH 241/488] minor update --- sim/opaesim/opae_sim.cpp | 14 +------------- sim/rtlsim/processor.cpp | 14 +------------- sim/xrtsim/xrt_sim.cpp | 14 +------------- 3 files changed, 3 insertions(+), 39 deletions(-) diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 8e9e8c4d8..f5acc3d21 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -143,19 +143,7 @@ public: #endif ram_ = new RAM(0, RAM_PAGE_SIZE); - - #ifndef NDEBUG - // dump device configuration - std::cout << "CONFIGS:" - << " num_threads=" << NUM_THREADS - << ", num_warps=" << NUM_WARPS - << ", num_cores=" << NUM_CORES - << ", num_clusters=" << NUM_CLUSTERS - << ", socket_size=" << SOCKET_SIZE - << ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec - << ", num_barriers=" << NUM_BARRIERS - << std::endl; - #endif + // reset the device this->reset(); diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index 25d219fcf..f52e7c8da 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -116,19 +116,7 @@ public: #endif ram_ = nullptr; - - #ifndef NDEBUG - // dump device configuration - std::cout << "CONFIGS:" - << " num_threads=" << NUM_THREADS - << ", num_warps=" << NUM_WARPS - << ", num_cores=" << NUM_CORES - << ", num_clusters=" << NUM_CLUSTERS - << ", socket_size=" << SOCKET_SIZE - << ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec - << ", num_barriers=" << NUM_BARRIERS - << std::endl; - #endif + // reset the device this->reset(); diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 12a78c23d..21961e5dd 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -130,19 +130,7 @@ public: #endif ram_ = new RAM(0, RAM_PAGE_SIZE); - - #ifndef NDEBUG - // dump device configuration - std::cout << "CONFIGS:" - << " num_threads=" << NUM_THREADS - << ", num_warps=" << NUM_WARPS - << ", num_cores=" << NUM_CORES - << ", num_clusters=" << NUM_CLUSTERS - << ", socket_size=" << SOCKET_SIZE - << ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec - << ", num_barriers=" << NUM_BARRIERS - << std::endl; - #endif + // reset the device this->reset(); From 6626f9201c2820f78e12a2152df16e26f5143d0a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 02:46:32 -0700 Subject: [PATCH 242/488] minor update --- hw/rtl/VX_gpu_pkg.sv | 12 ++++++++---- hw/rtl/core/VX_issue_slice.sv | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/hw/rtl/VX_gpu_pkg.sv b/hw/rtl/VX_gpu_pkg.sv index 1a55a18fe..67ff2176b 100644 --- a/hw/rtl/VX_gpu_pkg.sv +++ b/hw/rtl/VX_gpu_pkg.sv @@ -320,8 +320,10 @@ package VX_gpu_pkg; case (ex_type) `EX_ALU: `TRACE(level, ("ALU")); `EX_LSU: `TRACE(level, ("LSU")); - `EX_FPU: `TRACE(level, ("FPU")); `EX_SFU: `TRACE(level, ("SFU")); + `ifdef EXT_F_ENABLE + `EX_FPU: `TRACE(level, ("FPU")); + `endif default: `TRACE(level, ("?")); endcase endtask @@ -664,14 +666,16 @@ package VX_gpu_pkg; `EX_LSU: begin `TRACE(level, (", offset=0x%0h", op_args.lsu.offset)); end - `EX_FPU: begin - `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)); - end `EX_SFU: begin if (`INST_SFU_IS_CSR(op_type)) begin `TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm)); end end + `ifdef EXT_F_ENABLE + `EX_FPU: begin + `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)); + end + `endif default:; endcase endtask diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 0d7fdea53..18dd41cd7 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -135,7 +135,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (operands_if.valid && operands_if.ready) begin - `TRACE(1, ("%d: %s wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})); + `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})); trace_ex_type(1, operands_if.data.ex_type); `TRACE(1, (", op=")); trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args); From fa11d4c5022393e374f989623649be866fe8a19c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 05:26:00 -0700 Subject: [PATCH 243/488] TRACING refactoring to support vivado/quartus simulators --- ci/trace_csv.py | 5 +- hw/rtl/VX_gpu_pkg.sv | 342 ++++++++++++++++---------------- hw/rtl/VX_platform.vh | 32 +-- hw/rtl/Vortex.sv | 8 +- hw/rtl/afu/opae/vortex_afu.sv | 58 +++--- hw/rtl/afu/xrt/VX_afu_wrap.sv | 14 +- hw/rtl/cache/VX_cache_bank.sv | 22 +- hw/rtl/cache/VX_cache_data.sv | 8 +- hw/rtl/cache/VX_cache_mshr.sv | 22 +- hw/rtl/cache/VX_cache_tags.sv | 14 +- hw/rtl/cache/VX_cache_wrap.sv | 12 +- hw/rtl/core/VX_alu_int.sv | 2 +- hw/rtl/core/VX_commit.sv | 8 +- hw/rtl/core/VX_dcr_data.sv | 4 +- hw/rtl/core/VX_decode.sv | 8 +- hw/rtl/core/VX_fetch.sv | 4 +- hw/rtl/core/VX_issue_slice.sv | 18 +- hw/rtl/core/VX_lsu_slice.sv | 34 ++-- hw/rtl/core/VX_scoreboard.sv | 6 +- hw/rtl/libs/VX_axi_adapter.sv | 6 +- hw/rtl/libs/VX_dp_ram.sv | 4 +- hw/rtl/libs/VX_fifo_queue.sv | 4 +- hw/rtl/libs/VX_index_queue.sv | 24 +-- hw/rtl/libs/VX_mem_coalescer.sv | 44 ++-- hw/rtl/libs/VX_mem_scheduler.sv | 52 ++--- hw/rtl/libs/VX_scope_tap.sv | 102 +++++----- hw/rtl/mem/VX_gbar_unit.sv | 4 +- hw/rtl/mem/VX_local_mem.sv | 12 +- 28 files changed, 441 insertions(+), 432 deletions(-) diff --git a/ci/trace_csv.py b/ci/trace_csv.py index 4a36f5f6a..077f8027e 100755 --- a/ci/trace_csv.py +++ b/ci/trace_csv.py @@ -44,7 +44,8 @@ def load_config(filename): 'num_barriers': int(config_match.group(7)), } return config - return None + print("Error: missing CONFIGS: header") + sys.exit(1) def parse_simx(log_lines): pc_pattern = r"PC=(0x[0-9a-fA-F]+)" @@ -274,6 +275,8 @@ def split_log_file(log_filename): if current_sublog is not None: sublogs.append(current_sublog) + else: + sublogs.append(log_lines) return sublogs diff --git a/hw/rtl/VX_gpu_pkg.sv b/hw/rtl/VX_gpu_pkg.sv index 67ff2176b..7748b8eec 100644 --- a/hw/rtl/VX_gpu_pkg.sv +++ b/hw/rtl/VX_gpu_pkg.sv @@ -318,13 +318,13 @@ package VX_gpu_pkg; task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type); case (ex_type) - `EX_ALU: `TRACE(level, ("ALU")); - `EX_LSU: `TRACE(level, ("LSU")); - `EX_SFU: `TRACE(level, ("SFU")); + `EX_ALU: `TRACE(level, ("ALU")) + `EX_LSU: `TRACE(level, ("LSU")) + `EX_SFU: `TRACE(level, ("SFU")) `ifdef EXT_F_ENABLE - `EX_FPU: `TRACE(level, ("FPU")); + `EX_FPU: `TRACE(level, ("FPU")) `endif - default: `TRACE(level, ("?")); + default: `TRACE(level, ("?")) endcase endtask @@ -340,141 +340,141 @@ package VX_gpu_pkg; if (op_args.alu.is_w) begin if (op_args.alu.use_imm) begin case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDIW")); - `INST_ALU_SLL: `TRACE(level, ("SLLIW")); - `INST_ALU_SRL: `TRACE(level, ("SRLIW")); - `INST_ALU_SRA: `TRACE(level, ("SRAIW")); - default: `TRACE(level, ("?")); + `INST_ALU_ADD: `TRACE(level, ("ADDIW")) + `INST_ALU_SLL: `TRACE(level, ("SLLIW")) + `INST_ALU_SRL: `TRACE(level, ("SRLIW")) + `INST_ALU_SRA: `TRACE(level, ("SRAIW")) + default: `TRACE(level, ("?")) endcase end else begin case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDW")); - `INST_ALU_SUB: `TRACE(level, ("SUBW")); - `INST_ALU_SLL: `TRACE(level, ("SLLW")); - `INST_ALU_SRL: `TRACE(level, ("SRLW")); - `INST_ALU_SRA: `TRACE(level, ("SRAW")); - default: `TRACE(level, ("?")); + `INST_ALU_ADD: `TRACE(level, ("ADDW")) + `INST_ALU_SUB: `TRACE(level, ("SUBW")) + `INST_ALU_SLL: `TRACE(level, ("SLLW")) + `INST_ALU_SRL: `TRACE(level, ("SRLW")) + `INST_ALU_SRA: `TRACE(level, ("SRAW")) + default: `TRACE(level, ("?")) endcase end end else begin if (op_args.alu.use_imm) begin case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDI")); - `INST_ALU_SLL: `TRACE(level, ("SLLI")); - `INST_ALU_SRL: `TRACE(level, ("SRLI")); - `INST_ALU_SRA: `TRACE(level, ("SRAI")); - `INST_ALU_SLT: `TRACE(level, ("SLTI")); - `INST_ALU_SLTU: `TRACE(level, ("SLTIU")); - `INST_ALU_XOR: `TRACE(level, ("XORI")); - `INST_ALU_OR: `TRACE(level, ("ORI")); - `INST_ALU_AND: `TRACE(level, ("ANDI")); - `INST_ALU_LUI: `TRACE(level, ("LUI")); - `INST_ALU_AUIPC: `TRACE(level, ("AUIPC")); - default: `TRACE(level, ("?")); + `INST_ALU_ADD: `TRACE(level, ("ADDI")) + `INST_ALU_SLL: `TRACE(level, ("SLLI")) + `INST_ALU_SRL: `TRACE(level, ("SRLI")) + `INST_ALU_SRA: `TRACE(level, ("SRAI")) + `INST_ALU_SLT: `TRACE(level, ("SLTI")) + `INST_ALU_SLTU: `TRACE(level, ("SLTIU")) + `INST_ALU_XOR: `TRACE(level, ("XORI")) + `INST_ALU_OR: `TRACE(level, ("ORI")) + `INST_ALU_AND: `TRACE(level, ("ANDI")) + `INST_ALU_LUI: `TRACE(level, ("LUI")) + `INST_ALU_AUIPC: `TRACE(level, ("AUIPC")) + default: `TRACE(level, ("?")) endcase end else begin case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADD")); - `INST_ALU_SUB: `TRACE(level, ("SUB")); - `INST_ALU_SLL: `TRACE(level, ("SLL")); - `INST_ALU_SRL: `TRACE(level, ("SRL")); - `INST_ALU_SRA: `TRACE(level, ("SRA")); - `INST_ALU_SLT: `TRACE(level, ("SLT")); - `INST_ALU_SLTU: `TRACE(level, ("SLTU")); - `INST_ALU_XOR: `TRACE(level, ("XOR")); - `INST_ALU_OR: `TRACE(level, ("OR")); - `INST_ALU_AND: `TRACE(level, ("AND")); - `INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ")); - `INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ")); - default: `TRACE(level, ("?")); + `INST_ALU_ADD: `TRACE(level, ("ADD")) + `INST_ALU_SUB: `TRACE(level, ("SUB")) + `INST_ALU_SLL: `TRACE(level, ("SLL")) + `INST_ALU_SRL: `TRACE(level, ("SRL")) + `INST_ALU_SRA: `TRACE(level, ("SRA")) + `INST_ALU_SLT: `TRACE(level, ("SLT")) + `INST_ALU_SLTU: `TRACE(level, ("SLTU")) + `INST_ALU_XOR: `TRACE(level, ("XOR")) + `INST_ALU_OR: `TRACE(level, ("OR")) + `INST_ALU_AND: `TRACE(level, ("AND")) + `INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ")) + `INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ")) + default: `TRACE(level, ("?")) endcase end end end `ALU_TYPE_BRANCH: begin case (`INST_BR_BITS'(op_type)) - `INST_BR_EQ: `TRACE(level, ("BEQ")); - `INST_BR_NE: `TRACE(level, ("BNE")); - `INST_BR_LT: `TRACE(level, ("BLT")); - `INST_BR_GE: `TRACE(level, ("BGE")); - `INST_BR_LTU: `TRACE(level, ("BLTU")); - `INST_BR_GEU: `TRACE(level, ("BGEU")); - `INST_BR_JAL: `TRACE(level, ("JAL")); - `INST_BR_JALR: `TRACE(level, ("JALR")); - `INST_BR_ECALL: `TRACE(level, ("ECALL")); - `INST_BR_EBREAK:`TRACE(level, ("EBREAK")); - `INST_BR_URET: `TRACE(level, ("URET")); - `INST_BR_SRET: `TRACE(level, ("SRET")); - `INST_BR_MRET: `TRACE(level, ("MRET")); - default: `TRACE(level, ("?")); + `INST_BR_EQ: `TRACE(level, ("BEQ")) + `INST_BR_NE: `TRACE(level, ("BNE")) + `INST_BR_LT: `TRACE(level, ("BLT")) + `INST_BR_GE: `TRACE(level, ("BGE")) + `INST_BR_LTU: `TRACE(level, ("BLTU")) + `INST_BR_GEU: `TRACE(level, ("BGEU")) + `INST_BR_JAL: `TRACE(level, ("JAL")) + `INST_BR_JALR: `TRACE(level, ("JALR")) + `INST_BR_ECALL: `TRACE(level, ("ECALL")) + `INST_BR_EBREAK:`TRACE(level, ("EBREAK")) + `INST_BR_URET: `TRACE(level, ("URET")) + `INST_BR_SRET: `TRACE(level, ("SRET")) + `INST_BR_MRET: `TRACE(level, ("MRET")) + default: `TRACE(level, ("?")) endcase end `ALU_TYPE_MULDIV: begin if (op_args.alu.is_w) begin case (`INST_M_BITS'(op_type)) - `INST_M_MUL: `TRACE(level, ("MULW")); - `INST_M_DIV: `TRACE(level, ("DIVW")); - `INST_M_DIVU: `TRACE(level, ("DIVUW")); - `INST_M_REM: `TRACE(level, ("REMW")); - `INST_M_REMU: `TRACE(level, ("REMUW")); - default: `TRACE(level, ("?")); + `INST_M_MUL: `TRACE(level, ("MULW")) + `INST_M_DIV: `TRACE(level, ("DIVW")) + `INST_M_DIVU: `TRACE(level, ("DIVUW")) + `INST_M_REM: `TRACE(level, ("REMW")) + `INST_M_REMU: `TRACE(level, ("REMUW")) + default: `TRACE(level, ("?")) endcase end else begin case (`INST_M_BITS'(op_type)) - `INST_M_MUL: `TRACE(level, ("MUL")); - `INST_M_MULH: `TRACE(level, ("MULH")); - `INST_M_MULHSU:`TRACE(level, ("MULHSU")); - `INST_M_MULHU: `TRACE(level, ("MULHU")); - `INST_M_DIV: `TRACE(level, ("DIV")); - `INST_M_DIVU: `TRACE(level, ("DIVU")); - `INST_M_REM: `TRACE(level, ("REM")); - `INST_M_REMU: `TRACE(level, ("REMU")); - default: `TRACE(level, ("?")); + `INST_M_MUL: `TRACE(level, ("MUL")) + `INST_M_MULH: `TRACE(level, ("MULH")) + `INST_M_MULHSU:`TRACE(level, ("MULHSU")) + `INST_M_MULHU: `TRACE(level, ("MULHU")) + `INST_M_DIV: `TRACE(level, ("DIV")) + `INST_M_DIVU: `TRACE(level, ("DIVU")) + `INST_M_REM: `TRACE(level, ("REM")) + `INST_M_REMU: `TRACE(level, ("REMU")) + default: `TRACE(level, ("?")) endcase end end - default: `TRACE(level, ("?")); + default: `TRACE(level, ("?")) endcase end `EX_LSU: begin if (op_args.lsu.is_float) begin case (`INST_LSU_BITS'(op_type)) - `INST_LSU_LW: `TRACE(level, ("FLW")); - `INST_LSU_LD: `TRACE(level, ("FLD")); - `INST_LSU_SW: `TRACE(level, ("FSW")); - `INST_LSU_SD: `TRACE(level, ("FSD")); - default: `TRACE(level, ("?")); + `INST_LSU_LW: `TRACE(level, ("FLW")) + `INST_LSU_LD: `TRACE(level, ("FLD")) + `INST_LSU_SW: `TRACE(level, ("FSW")) + `INST_LSU_SD: `TRACE(level, ("FSD")) + default: `TRACE(level, ("?")) endcase end else begin case (`INST_LSU_BITS'(op_type)) - `INST_LSU_LB: `TRACE(level, ("LB")); - `INST_LSU_LH: `TRACE(level, ("LH")); - `INST_LSU_LW: `TRACE(level, ("LW")); - `INST_LSU_LD: `TRACE(level, ("LD")); - `INST_LSU_LBU:`TRACE(level, ("LBU")); - `INST_LSU_LHU:`TRACE(level, ("LHU")); - `INST_LSU_LWU:`TRACE(level, ("LWU")); - `INST_LSU_SB: `TRACE(level, ("SB")); - `INST_LSU_SH: `TRACE(level, ("SH")); - `INST_LSU_SW: `TRACE(level, ("SW")); - `INST_LSU_SD: `TRACE(level, ("SD")); - `INST_LSU_FENCE:`TRACE(level,("FENCE")); - default: `TRACE(level, ("?")); + `INST_LSU_LB: `TRACE(level, ("LB")) + `INST_LSU_LH: `TRACE(level, ("LH")) + `INST_LSU_LW: `TRACE(level, ("LW")) + `INST_LSU_LD: `TRACE(level, ("LD")) + `INST_LSU_LBU:`TRACE(level, ("LBU")) + `INST_LSU_LHU:`TRACE(level, ("LHU")) + `INST_LSU_LWU:`TRACE(level, ("LWU")) + `INST_LSU_SB: `TRACE(level, ("SB")) + `INST_LSU_SH: `TRACE(level, ("SH")) + `INST_LSU_SW: `TRACE(level, ("SW")) + `INST_LSU_SD: `TRACE(level, ("SD")) + `INST_LSU_FENCE:`TRACE(level,("FENCE")) + default: `TRACE(level, ("?")) endcase end end `EX_SFU: begin case (`INST_SFU_BITS'(op_type)) - `INST_SFU_TMC: `TRACE(level, ("TMC")); - `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")); - `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end - `INST_SFU_JOIN: `TRACE(level, ("JOIN")); - `INST_SFU_BAR: `TRACE(level, ("BAR")); - `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end - `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end - `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end - `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end - default: `TRACE(level, ("?")); + `INST_SFU_TMC: `TRACE(level, ("TMC")) + `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")) + `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")) else `TRACE(level, ("SPLIT")) end + `INST_SFU_JOIN: `TRACE(level, ("JOIN")) + `INST_SFU_BAR: `TRACE(level, ("BAR")) + `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")) else `TRACE(level, ("PRED")) end + `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")) else `TRACE(level, ("CSRRW")) end + `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")) else `TRACE(level, ("CSRRS")) end + `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")) else `TRACE(level, ("CSRRC")) end + default: `TRACE(level, ("?")) endcase end `ifdef EXT_F_ENABLE @@ -483,174 +483,174 @@ package VX_gpu_pkg; `INST_FPU_ADD: begin if (op_args.fpu.fmt[1]) begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FSUB.D")); + `TRACE(level, ("FSUB.D")) else - `TRACE(level, ("FSUB.S")); + `TRACE(level, ("FSUB.S")) end else begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FADD.D")); + `TRACE(level, ("FADD.D")) else - `TRACE(level, ("FADD.S")); + `TRACE(level, ("FADD.S")) end end `INST_FPU_MADD: begin if (op_args.fpu.fmt[1]) begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMSUB.D")); + `TRACE(level, ("FMSUB.D")) else - `TRACE(level, ("FMSUB.S")); + `TRACE(level, ("FMSUB.S")) end else begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMADD.D")); + `TRACE(level, ("FMADD.D")) else - `TRACE(level, ("FMADD.S")); + `TRACE(level, ("FMADD.S")) end end `INST_FPU_NMADD: begin if (op_args.fpu.fmt[1]) begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMSUB.D")); + `TRACE(level, ("FNMSUB.D")) else - `TRACE(level, ("FNMSUB.S")); + `TRACE(level, ("FNMSUB.S")) end else begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMADD.D")); + `TRACE(level, ("FNMADD.D")) else - `TRACE(level, ("FNMADD.S")); + `TRACE(level, ("FNMADD.S")) end end `INST_FPU_MUL: begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMUL.D")); + `TRACE(level, ("FMUL.D")) else - `TRACE(level, ("FMUL.S")); + `TRACE(level, ("FMUL.S")) end `INST_FPU_DIV: begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FDIV.D")); + `TRACE(level, ("FDIV.D")) else - `TRACE(level, ("FDIV.S")); + `TRACE(level, ("FDIV.S")) end `INST_FPU_SQRT: begin if (op_args.fpu.fmt[0]) - `TRACE(level, ("FSQRT.D")); + `TRACE(level, ("FSQRT.D")) else - `TRACE(level, ("FSQRT.S")); + `TRACE(level, ("FSQRT.S")) end `INST_FPU_CMP: begin if (op_args.fpu.fmt[0]) begin case (op_args.fpu.frm[1:0]) - 0: `TRACE(level, ("FLE.D")); - 1: `TRACE(level, ("FLT.D")); - 2: `TRACE(level, ("FEQ.D")); - default: `TRACE(level, ("?")); + 0: `TRACE(level, ("FLE.D")) + 1: `TRACE(level, ("FLT.D")) + 2: `TRACE(level, ("FEQ.D")) + default: `TRACE(level, ("?")) endcase end else begin case (op_args.fpu.frm[1:0]) - 0: `TRACE(level, ("FLE.S")); - 1: `TRACE(level, ("FLT.S")); - 2: `TRACE(level, ("FEQ.S")); - default: `TRACE(level, ("?")); + 0: `TRACE(level, ("FLE.S")) + 1: `TRACE(level, ("FLT.S")) + 2: `TRACE(level, ("FEQ.S")) + default: `TRACE(level, ("?")) endcase end end `INST_FPU_F2F: begin if (op_args.fpu.fmt[0]) begin - `TRACE(level, ("FCVT.D.S")); + `TRACE(level, ("FCVT.D.S")) end else begin - `TRACE(level, ("FCVT.S.D")); + `TRACE(level, ("FCVT.S.D")) end end `INST_FPU_F2I: begin if (op_args.fpu.fmt[0]) begin if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.L.D")); + `TRACE(level, ("FCVT.L.D")) end else begin - `TRACE(level, ("FCVT.W.D")); + `TRACE(level, ("FCVT.W.D")) end end else begin if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.L.S")); + `TRACE(level, ("FCVT.L.S")) end else begin - `TRACE(level, ("FCVT.W.S")); + `TRACE(level, ("FCVT.W.S")) end end end `INST_FPU_F2U: begin if (op_args.fpu.fmt[0]) begin if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.LU.D")); + `TRACE(level, ("FCVT.LU.D")) end else begin - `TRACE(level, ("FCVT.WU.D")); + `TRACE(level, ("FCVT.WU.D")) end end else begin if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.LU.S")); + `TRACE(level, ("FCVT.LU.S")) end else begin - `TRACE(level, ("FCVT.WU.S")); + `TRACE(level, ("FCVT.WU.S")) end end end `INST_FPU_I2F: begin if (op_args.fpu.fmt[0]) begin if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.D.L")); + `TRACE(level, ("FCVT.D.L")) end else begin - `TRACE(level, ("FCVT.D.W")); + `TRACE(level, ("FCVT.D.W")) end end else begin if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.S.L")); + `TRACE(level, ("FCVT.S.L")) end else begin - `TRACE(level, ("FCVT.S.W")); + `TRACE(level, ("FCVT.S.W")) end end end `INST_FPU_U2F: begin if (op_args.fpu.fmt[0]) begin if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.D.LU")); + `TRACE(level, ("FCVT.D.LU")) end else begin - `TRACE(level, ("FCVT.D.WU")); + `TRACE(level, ("FCVT.D.WU")) end end else begin if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.S.LU")); + `TRACE(level, ("FCVT.S.LU")) end else begin - `TRACE(level, ("FCVT.S.WU")); + `TRACE(level, ("FCVT.S.WU")) end end end `INST_FPU_MISC: begin if (op_args.fpu.fmt[0]) begin case (op_args.fpu.frm) - 0: `TRACE(level, ("FSGNJ.D")); - 1: `TRACE(level, ("FSGNJN.D")); - 2: `TRACE(level, ("FSGNJX.D")); - 3: `TRACE(level, ("FCLASS.D")); - 4: `TRACE(level, ("FMV.X.D")); - 5: `TRACE(level, ("FMV.D.X")); - 6: `TRACE(level, ("FMIN.D")); - 7: `TRACE(level, ("FMAX.D")); + 0: `TRACE(level, ("FSGNJ.D")) + 1: `TRACE(level, ("FSGNJN.D")) + 2: `TRACE(level, ("FSGNJX.D")) + 3: `TRACE(level, ("FCLASS.D")) + 4: `TRACE(level, ("FMV.X.D")) + 5: `TRACE(level, ("FMV.D.X")) + 6: `TRACE(level, ("FMIN.D")) + 7: `TRACE(level, ("FMAX.D")) endcase end else begin case (op_args.fpu.frm) - 0: `TRACE(level, ("FSGNJ.S")); - 1: `TRACE(level, ("FSGNJN.S")); - 2: `TRACE(level, ("FSGNJX.S")); - 3: `TRACE(level, ("FCLASS.S")); - 4: `TRACE(level, ("FMV.X.S")); - 5: `TRACE(level, ("FMV.S.X")); - 6: `TRACE(level, ("FMIN.S")); - 7: `TRACE(level, ("FMAX.S")); + 0: `TRACE(level, ("FSGNJ.S")) + 1: `TRACE(level, ("FSGNJN.S")) + 2: `TRACE(level, ("FSGNJX.S")) + 3: `TRACE(level, ("FCLASS.S")) + 4: `TRACE(level, ("FMV.X.S")) + 5: `TRACE(level, ("FMV.S.X")) + 6: `TRACE(level, ("FMIN.S")) + 7: `TRACE(level, ("FMAX.S")) endcase end end - default: `TRACE(level, ("?")); + default: `TRACE(level, ("?")) endcase end `endif - default: `TRACE(level, ("?")); + default: `TRACE(level, ("?")) endcase endtask @@ -661,19 +661,19 @@ package VX_gpu_pkg; ); case (ex_type) `EX_ALU: begin - `TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm)); + `TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm)) end `EX_LSU: begin - `TRACE(level, (", offset=0x%0h", op_args.lsu.offset)); + `TRACE(level, (", offset=0x%0h", op_args.lsu.offset)) end `EX_SFU: begin if (`INST_SFU_IS_CSR(op_type)) begin - `TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm)); + `TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm)) end end `ifdef EXT_F_ENABLE `EX_FPU: begin - `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)); + `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)) end `endif default:; @@ -682,12 +682,12 @@ package VX_gpu_pkg; task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr); case (addr) - `VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0")); - `VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1")); - `VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0")); - `VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1")); - `VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS")); - default: `TRACE(level, ("?")); + `VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0")) + `VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1")) + `VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0")) + `VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1")) + `VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS")) + default: `TRACE(level, ("?")) endcase endtask diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 74907ad4c..5a4426b28 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -47,7 +47,10 @@ `define UNUSED_VAR(x) `define UNUSED_PIN(x) . x () `define UNUSED_ARG(x) x -`define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args +`define TRACE(level, args) \ + if (level <= `DEBUG_LEVEL) begin \ + $write args; \ + end `else `ifdef VERILATOR @@ -122,9 +125,12 @@ `endif `ifdef SV_DPI -`define TRACE(level, args) dpi_trace(level, $sformatf args) +`define TRACE(level, args) dpi_trace(level, $sformatf args); `else -`define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args +`define TRACE(level, args) \ + if (level <= `DEBUG_LEVEL) begin \ + $write args; \ + end `endif `endif @@ -211,23 +217,23 @@ `define SEXT(len, x) {{(len-$bits(x)+1){x[$bits(x)-1]}}, x[$bits(x)-2:0]} `define TRACE_ARRAY1D(lvl, fmt, arr, n) \ - `TRACE(lvl, ("{")); \ + `TRACE(lvl, ("{")) \ for (integer __i = (n-1); __i >= 0; --__i) begin \ - if (__i != (n-1)) `TRACE(lvl, (", ")); \ - `TRACE(lvl, (fmt, arr[__i])); \ + if (__i != (n-1)) `TRACE(lvl, (", ")) \ + `TRACE(lvl, (fmt, arr[__i])) \ end \ - `TRACE(lvl, ("}")); + `TRACE(lvl, ("}")) `define TRACE_ARRAY2D(lvl, fmt, arr, m, n) \ - `TRACE(lvl, ("{")); \ + `TRACE(lvl, ("{")) \ for (integer __i = n-1; __i >= 0; --__i) begin \ - if (__i != (n-1)) `TRACE(lvl, (", ")); \ - `TRACE(lvl, ("{")); \ + if (__i != (n-1)) `TRACE(lvl, (", ")) \ + `TRACE(lvl, ("{")) \ for (integer __j = (m-1); __j >= 0; --__j) begin \ - if (__j != (m-1)) `TRACE(lvl, (", "));\ - `TRACE(lvl, (fmt, arr[__i][__j])); \ + if (__j != (m-1)) `TRACE(lvl, (", "))\ + `TRACE(lvl, (fmt, arr[__i][__j])) \ end \ - `TRACE(lvl, ("}")); \ + `TRACE(lvl, ("}")) \ end \ `TRACE(lvl, ("}")) diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 8f171a486..dc9f6f034 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -192,19 +192,19 @@ module Vortex import VX_gpu_pkg::*; ( // dump device configuration initial begin `TRACE(0, ("CONFIGS: num_threads=%0d, num_warps=%0d, num_cores=%0d, num_clusters=%0d, socket_size=%0d, local_mem_base=0x%0h, num_barriers=%0d\n", - `NUM_THREADS, `NUM_WARPS, `NUM_CORES, `NUM_CLUSTERS, `SOCKET_SIZE, `LMEM_BASE_ADDR, `NUM_BARRIERS)); + `NUM_THREADS, `NUM_WARPS, `NUM_CORES, `NUM_CLUSTERS, `SOCKET_SIZE, `LMEM_BASE_ADDR, `NUM_BARRIERS)) end `ifdef DBG_TRACE_MEM always @(posedge clk) begin if (mem_req_fire) begin if (mem_req_rw) - `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data)); + `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data)) else - `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen)); + `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen)) end if (mem_rsp_fire) begin - `TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data)); + `TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data)) end end `endif diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index ffc0af282..2ebd66fcf 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -260,7 +260,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ mmio_rsp.data <= 64'({cout_q_dout_s, !cout_q_empty, 8'(state)}); `ifdef DBG_TRACE_AFU if (state != STATE_WIDTH'(mmio_rsp.data)) begin - `TRACE(2, ("%d: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state)); + `TRACE(2, ("%d: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state)) end `endif end @@ -268,28 +268,28 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ MMIO_SCOPE_READ: begin mmio_rsp.data <= cmd_scope_rdata; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata)); + `TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata)) `endif end `endif MMIO_DEV_CAPS: begin mmio_rsp.data <= dev_caps; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps)); + `TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps)) `endif end MMIO_ISA_CAPS: begin mmio_rsp.data <= isa_caps; `ifdef DBG_TRACE_AFU if (state != STATE_WIDTH'(mmio_rsp.data)) begin - `TRACE(2, ("%d: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps)); + `TRACE(2, ("%d: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps)) end `endif end default: begin mmio_rsp.data <= 64'h0; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_req_hdr.address)); + `TRACE(2, ("%d: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_req_hdr.address)) `endif end endcase @@ -303,36 +303,36 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ MMIO_CMD_ARG0: begin cmd_args[0] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))); + `TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_ARG1: begin cmd_args[1] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))); + `TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_ARG2: begin cmd_args[2] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))); + `TRACE(2, ("%d: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_TYPE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))); + `TRACE(2, ("%d: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end `ifdef SCOPE MMIO_SCOPE_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata)); + `TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata)) `endif end `endif default: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_req_hdr.address, 64'(cp2af_sRxPort.c0.data))); + `TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_req_hdr.address, 64'(cp2af_sRxPort.c0.data))) `endif end endcase @@ -372,25 +372,25 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ case (cmd_type) CMD_MEM_READ: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)); + `TRACE(2, ("%d: STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) `endif state <= STATE_MEM_READ; end CMD_MEM_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)); + `TRACE(2, ("%d: STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) `endif state <= STATE_MEM_WRITE; end CMD_DCR_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data)); + `TRACE(2, ("%d: STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data)) `endif state <= STATE_DCR_WRITE; end CMD_RUN: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE RUN\n", $time)); + `TRACE(2, ("%d: STATE RUN\n", $time)) `endif state <= STATE_RUN; vx_running <= 0; @@ -404,7 +404,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ if (cmd_mem_rd_done) begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE IDLE\n", $time)); + `TRACE(2, ("%d: STATE IDLE\n", $time)) `endif end end @@ -412,14 +412,14 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ if (cmd_mem_wr_done) begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE IDLE\n", $time)); + `TRACE(2, ("%d: STATE IDLE\n", $time)) `endif end end STATE_DCR_WRITE: begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE IDLE\n", $time)); + `TRACE(2, ("%d: STATE IDLE\n", $time)) `endif end STATE_RUN: begin @@ -434,8 +434,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ if (~vx_busy) begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: End execution\n", $time)); - `TRACE(2, ("%d: STATE IDLE\n", $time)); + `TRACE(2, ("%d: AFU: End execution\n", $time)) + `TRACE(2, ("%d: STATE IDLE\n", $time)) `endif end end @@ -443,7 +443,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // wait until the reset sequence is complete if (vx_reset_ctr == (`RESET_DELAY-1)) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Begin execution\n", $time)); + `TRACE(2, ("%d: AFU: Begin execution\n", $time)) `endif vx_running <= 1; vx_busy_wait <= 1; @@ -745,7 +745,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_rd_req_addr <= cci_rd_req_addr + 1; cci_rd_req_ctr <= cci_rd_req_ctr + $bits(cci_rd_req_ctr)'(1); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads)); + `TRACE(2, ("%d: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads)) `endif end @@ -755,13 +755,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE); end `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data)); + `TRACE(2, ("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data)) `endif end if (cci_rdq_pop) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads)); + `TRACE(2, ("%d: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads)) `endif end @@ -899,13 +899,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_wr_req_done <= 1; end `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data)); + `TRACE(2, ("%d: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data)) `endif end if (cci_wr_rsp_fire) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes)); + `TRACE(2, ("%d: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes)) `endif end end @@ -1086,13 +1086,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ always @(posedge clk) begin for (integer i = 0; i < NUM_LOCAL_MEM_BANKS; ++i) begin if (avs_write[i] && ~avs_waitrequest[i]) begin - `TRACE(2, ("%d: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i])); + `TRACE(2, ("%d: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i])) end if (avs_read[i] && ~avs_waitrequest[i]) begin - `TRACE(2, ("%d: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i])); + `TRACE(2, ("%d: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i])) end if (avs_readdatavalid[i]) begin - `TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i])); + `TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i])) end end end diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index e1ba82126..1efda8029 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -133,7 +133,7 @@ module VX_afu_wrap #( STATE_IDLE: begin if (ap_start) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE RUN\n", $time)); + `TRACE(2, ("%d: STATE RUN\n", $time)) `endif state <= STATE_RUN; vx_reset_ctr <= 0; @@ -145,7 +145,7 @@ module VX_afu_wrap #( // wait until the reset network is ready if (vx_reset_ctr == 0) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Begin execution\n", $time)); + `TRACE(2, ("%d: AFU: Begin execution\n", $time)) `endif vx_busy_wait <= 1; vx_reset <= 0; @@ -160,7 +160,7 @@ module VX_afu_wrap #( // wait until the processor is not busy if (~vx_busy) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: End execution\n", $time)); + `TRACE(2, ("%d: AFU: End execution\n", $time)) `endif state <= STATE_IDLE; end @@ -365,16 +365,16 @@ module VX_afu_wrap #( always @(posedge ap_clk) begin for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin if (m_axi_mem_awvalid_a[i] && m_axi_mem_awready_a[i]) begin - `TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i])); + `TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i])) end if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin - `TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i])); + `TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i])) end if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin - `TRACE(2, ("%d: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i])); + `TRACE(2, ("%d: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i])) end if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin - `TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i])); + `TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i])) end end end diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index a8f8dbdf2..e18be4b66 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -394,7 +394,7 @@ module VX_cache_bank #( `UNUSED_VAR (do_write_miss_st1) // ensure mshr replay always get a hit - `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time)); + `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time)) // both tag and data stores use BRAM with no read-during-write protection. // we ned to stall the pipeline to prevent read-after-write hazards. @@ -599,7 +599,7 @@ module VX_cache_bank #( if (DIRTY_BYTES) begin // ensure dirty bytes match the tag info wire has_dirty_bytes = (| dirty_byteen_st1); - `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))); + `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) end assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1) || do_writeback_st1) @@ -663,30 +663,30 @@ module VX_cache_bank #( && ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire); always @(posedge clk) begin if (input_stall || pipe_stall) begin - `TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1)); + `TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1)) end if (mem_rsp_fire) begin - `TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data)); + `TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data)) end if (replay_fire) begin - `TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)); + `TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)) end if (core_req_fire) begin if (core_req_rw) - `TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)); + `TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)) else - `TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)); + `TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)) end if (crsp_queue_fire) begin - `TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)); + `TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)) end if (mreq_queue_push) begin if (do_creq_wr_st1 && !WRITEBACK) - `TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)); + `TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) else if (do_writeback_st1) - `TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)); + `TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)) else - `TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)); + `TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)) end end `endif diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 302a99e5e..12e0e1ca3 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -182,16 +182,16 @@ module VX_cache_data #( `ifdef DBG_TRACE_CACHE always @(posedge clk) begin if (fill && ~stall) begin - `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)); + `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)) end if (flush && ~stall) begin - `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data)); + `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data)) end if (read && ~stall) begin - `TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid)); + `TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid)) end if (write && ~stall) begin - `TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)); + `TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)) end end `endif diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index 0ca67d159..855b95324 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -269,33 +269,33 @@ module VX_cache_mshr #( end if (allocate_fire) `TRACE(3, ("%d: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid)); + `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid)) if (lookup_valid) `TRACE(3, ("%d: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid)); + `CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid)) if (finalize_valid) `TRACE(3, ("%d: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, - finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid)); + finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid)) if (fill_valid) `TRACE(3, ("%d: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id)); + `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id)) if (dequeue_fire) `TRACE(3, ("%d: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid)); + `CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid)) if (show_table) begin - `TRACE(3, ("%d: %s table", $time, INSTANCE_ID)); + `TRACE(3, ("%d: %s table", $time, INSTANCE_ID)) for (integer i = 0; i < MSHR_SIZE; ++i) begin if (valid_table[i]) begin - `TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID))); + `TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID))) if (write_table[i]) - `TRACE(3, ("(w)")); + `TRACE(3, ("(w)")) else - `TRACE(3, ("(r)")); + `TRACE(3, ("(r)")) if (next_table[i]) - `TRACE(3, ("->%0d", next_index[i])); + `TRACE(3, ("->%0d", next_index[i])) end end - `TRACE(3, ("\n")); + `TRACE(3, ("\n")) end end `endif diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 4d5b0bcd3..dc2e77092 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -149,25 +149,25 @@ module VX_cache_tags #( wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel}; always @(posedge clk) begin if (fill && ~stall) begin - `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))); + `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) end if (init) begin - `TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel)); + `TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel)) end if (flush && ~stall) begin - `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty)); + `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty)) end if (lookup && ~stall) begin if (tag_matches != 0) begin if (write) - `TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)); + `TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) else - `TRACE(3, ("%d: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)); + `TRACE(3, ("%d: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) end else begin if (write) - `TRACE(3, ("%d: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)); + `TRACE(3, ("%d: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) else - `TRACE(3, ("%d: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)); + `TRACE(3, ("%d: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) end end end diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 513c29b5d..6210c313e 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -234,12 +234,12 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (core_req_fire) begin if (core_bus_if[i].req_data.rw) - `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)); + `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)) else - `TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)); + `TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)) end if (core_rsp_fire) begin - `TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)); + `TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)) end end end @@ -262,14 +262,14 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( if (mem_req_fire) begin if (mem_bus_if.req_data.rw) `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid)); + $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid)) else `TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", - $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid)); + $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid)) end if (mem_rsp_fire) begin `TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid)); + $time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid)) end end `endif diff --git a/hw/rtl/core/VX_alu_int.sv b/hw/rtl/core/VX_alu_int.sv index 06acfde39..083438e88 100644 --- a/hw/rtl/core/VX_alu_int.sv +++ b/hw/rtl/core/VX_alu_int.sv @@ -195,7 +195,7 @@ module VX_alu_int #( always @(posedge clk) begin if (br_enable) begin `TRACE(1, ("%d: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", - $time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid)); + $time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid)) end end `endif diff --git a/hw/rtl/core/VX_commit.sv b/hw/rtl/core/VX_commit.sv index 160bcf4d4..f993c9648 100644 --- a/hw/rtl/core/VX_commit.sv +++ b/hw/rtl/core/VX_commit.sv @@ -178,11 +178,11 @@ module VX_commit import VX_gpu_pkg::*; #( for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin always @(posedge clk) begin if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin - `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0})); + `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0})) trace_ex_type(1, j); - `TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop)); - `TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS); - `TRACE(1, (" (#%0d)\n", commit_if[j * `ISSUE_WIDTH + i].data.uuid)); + `TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop)) + `TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS) + `TRACE(1, (" (#%0d)\n", commit_if[j * `ISSUE_WIDTH + i].data.uuid)) end end end diff --git a/hw/rtl/core/VX_dcr_data.sv b/hw/rtl/core/VX_dcr_data.sv index b20d95fc7..03c5be61f 100644 --- a/hw/rtl/core/VX_dcr_data.sv +++ b/hw/rtl/core/VX_dcr_data.sv @@ -50,9 +50,9 @@ module VX_dcr_data import VX_gpu_pkg::*; ( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (dcr_bus_if.write_valid) begin - `TRACE(1, ("%d: base-dcr: state=", $time)); + `TRACE(1, ("%d: base-dcr: state=", $time)) trace_base_dcr(1, dcr_bus_if.write_addr); - `TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data)); + `TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data)) end end `endif diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index 897dfcc11..28d27a299 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -568,14 +568,14 @@ module VX_decode import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (decode_if.valid && decode_if.ready) begin - `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr)); + `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr)) trace_ex_type(1, decode_if.data.ex_type); - `TRACE(1, (", op=")); + `TRACE(1, (", op=")) trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args); `TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, opds=%b%b%b%b", - decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, use_rd, use_rs1, use_rs2, use_rs3)); + decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, use_rd, use_rs1, use_rs2, use_rs3)) trace_op_args(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args); - `TRACE(1, (" (#%0d)\n", decode_if.data.uuid)); + `TRACE(1, (" (#%0d)\n", decode_if.data.uuid)) end end `endif diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 1da184288..46283818a 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -168,10 +168,10 @@ module VX_fetch import VX_gpu_pkg::*; #( wire fetch_fire = fetch_if.valid && fetch_if.ready; always @(posedge clk) begin if (schedule_fire) begin - `TRACE(1, ("%d: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid)); + `TRACE(1, ("%d: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid)) end if (fetch_fire) begin - `TRACE(1, ("%d: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid)); + `TRACE(1, ("%d: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid)) end end `endif diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 18dd41cd7..a99bf2c8f 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -135,18 +135,18 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (operands_if.valid && operands_if.ready) begin - `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})); + `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})) trace_ex_type(1, operands_if.data.ex_type); - `TRACE(1, (", op=")); + `TRACE(1, (", op=")) trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args); - `TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd)); - `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `NUM_THREADS); - `TRACE(1, (", rs2_data=")); - `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `NUM_THREADS); - `TRACE(1, (", rs3_data=")); - `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `NUM_THREADS); + `TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd)) + `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `NUM_THREADS) + `TRACE(1, (", rs2_data=")) + `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `NUM_THREADS) + `TRACE(1, (", rs3_data=")) + `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `NUM_THREADS) trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args); - `TRACE(1, (" (#%0d)\n", operands_if.data.uuid)); + `TRACE(1, (" (#%0d)\n", operands_if.data.uuid)) end end `endif diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 7ee15bb14..bd82aee31 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -189,7 +189,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( wire lsu_req_fire = execute_if.valid && execute_if.ready; `RUNTIME_ASSERT((~lsu_req_fire || ~execute_if.data.tmask[i] || req_is_fence || (full_addr[i] % (1 << `INST_LSU_WSIZE(execute_if.data.op_type))) == 0), ("%t: misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)", - $time, execute_if.data.wid, {execute_if.data.PC, 1'b0}, full_addr[i], `INST_LSU_WSIZE(execute_if.data.op_type), execute_if.data.uuid)); + $time, execute_if.data.wid, {execute_if.data.PC, 1'b0}, full_addr[i], `INST_LSU_WSIZE(execute_if.data.op_type), execute_if.data.uuid)) end // store data formatting @@ -505,30 +505,30 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_MEM always @(posedge clk) begin if (execute_if.valid && fence_lock) begin - `TRACE(1, ("%d: *** %s fence wait\n", $time, INSTANCE_ID)); + `TRACE(1, ("%d: *** %s fence wait\n", $time, INSTANCE_ID)) end if (mem_req_fire) begin if (mem_req_rw) begin - `TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES); - `TRACE(1, (", flags=")); - `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES); - `TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen)); - `TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES); - `TRACE(1, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)); + `TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES) + `TRACE(1, (", flags=")) + `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES) + `TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen)) + `TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES) + `TRACE(1, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)) end else begin - `TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES); - `TRACE(1, (", flags=")); - `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES); - `TRACE(1, (", byteen=0x%0h, rd=%0d, sop=%b, eop=%b, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)); + `TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES) + `TRACE(1, (", flags=")) + `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES) + `TRACE(1, (", byteen=0x%0h, rd=%0d, sop=%b, eop=%b, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)) end end if (mem_rsp_fire) begin `TRACE(1, ("%d: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=", - $time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop)); - `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES); - `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid)); + $time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop)) + `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES) + `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid)) end end `endif diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index b2d9ff2be..14d88b8b1 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -208,7 +208,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_PIPELINE `TRACE(3, ("%d: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n", $time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr, - operands_busy, staging_if[w].data.uuid)); + operands_busy, staging_if[w].data.uuid)) `endif timeout_ctr <= timeout_ctr + 1; end else if (ibuffer_fire) begin @@ -220,11 +220,11 @@ module VX_scoreboard import VX_gpu_pkg::*; #( `RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT), ("%t: *** %s timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)", $time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr, - operands_busy, staging_if[w].data.uuid)); + operands_busy, staging_if[w].data.uuid)) `RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if.data.rd] != 0, ("%t: *** %s invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)", - $time, INSTANCE_ID, w, {writeback_if.data.PC, 1'b0}, writeback_if.data.tmask, writeback_if.data.rd, writeback_if.data.uuid)); + $time, INSTANCE_ID, w, {writeback_if.data.PC, 1'b0}, writeback_if.data.tmask, writeback_if.data.rd, writeback_if.data.uuid)) `endif end diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index 9cd862560..25ce1081b 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -170,7 +170,7 @@ module VX_axi_adapter #( `UNUSED_VAR (m_axi_bid[i]) `UNUSED_VAR (m_axi_bresp[i]) assign m_axi_bready[i] = 1'b1; - `RUNTIME_ASSERT(~m_axi_bvalid[i] || m_axi_bresp[i] == 0, ("%t: *** AXI response error", $time)); + `RUNTIME_ASSERT(~m_axi_bvalid[i] || m_axi_bresp[i] == 0, ("%t: *** AXI response error", $time)) end // AXI read request channel @@ -200,8 +200,8 @@ module VX_axi_adapter #( assign rsp_arb_valid_in[i] = m_axi_rvalid[i]; assign rsp_arb_data_in[i] = {m_axi_rdata[i], m_axi_rid[i]}; assign m_axi_rready[i] = rsp_arb_ready_in[i]; - `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time)); - `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time)); + `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time)) + `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time)) end VX_stream_arb #( diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 64b22150c..49f37caff 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -59,7 +59,7 @@ module VX_dp_ram #( `UNUSED_VAR (read) if (WRENW > 1) begin - `RUNTIME_ASSERT(~write || (| wren), ("%t: invalid write enable mask", $time)); + `RUNTIME_ASSERT(~write || (| wren), ("%t: invalid write enable mask", $time)) end if (OUT_REG && !READ_ENABLE) begin @@ -341,7 +341,7 @@ module VX_dp_ram #( assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; if (RW_ASSERT) begin - `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)); + `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)) end end `endif diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index eba9532f4..dd772ea73 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -162,8 +162,8 @@ module VX_fifo_queue #( end end - `RUNTIME_ASSERT(~(push && ~pop) || ~full, ("%t: runtime error: incrementing full queue", $time)); - `RUNTIME_ASSERT(~(pop && ~push) || ~empty, ("%t: runtime error: decrementing empty queue", $time)); + `RUNTIME_ASSERT(~(push && ~pop) || ~full, ("%t: runtime error: incrementing full queue", $time)) + `RUNTIME_ASSERT(~(pop && ~push) || ~empty, ("%t: runtime error: decrementing empty queue", $time)) endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_index_queue.sv b/hw/rtl/libs/VX_index_queue.sv index 23ec6ed83..e73db0ff9 100644 --- a/hw/rtl/libs/VX_index_queue.sv +++ b/hw/rtl/libs/VX_index_queue.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,9 +20,9 @@ module VX_index_queue #( ) ( input wire clk, input wire reset, - input wire [DATAW-1:0] write_data, + input wire [DATAW-1:0] write_data, output wire [`LOG2UP(SIZE)-1:0] write_addr, - input wire push, + input wire push, input wire pop, output wire full, output wire empty, @@ -30,33 +30,33 @@ module VX_index_queue #( output wire [DATAW-1:0] read_data ); reg [DATAW-1:0] entries [SIZE-1:0]; - reg [SIZE-1:0] valid; + reg [SIZE-1:0] valid; reg [`LOG2UP(SIZE):0] rd_ptr, wr_ptr; wire [`LOG2UP(SIZE)-1:0] rd_a, wr_a; wire enqueue, dequeue; assign rd_a = rd_ptr[`LOG2UP(SIZE)-1:0]; - assign wr_a = wr_ptr[`LOG2UP(SIZE)-1:0]; + assign wr_a = wr_ptr[`LOG2UP(SIZE)-1:0]; assign empty = (wr_ptr == rd_ptr); assign full = (wr_a == rd_a) && (wr_ptr[`LOG2UP(SIZE)] != rd_ptr[`LOG2UP(SIZE)]); - assign enqueue = push; + assign enqueue = push; assign dequeue = !empty && !valid[rd_a]; // auto-remove when head is invalid - `RUNTIME_ASSERT(!push || !full, ("%t: *** invalid inputs", $time)); - + `RUNTIME_ASSERT(!push || !full, ("%t: *** invalid inputs", $time)) + always @(posedge clk) begin if (reset) begin rd_ptr <= '0; wr_ptr <= '0; - valid <= '0; + valid <= '0; end else begin if (enqueue) begin valid[wr_a] <= 1; wr_ptr <= wr_ptr + 1; - end + end if (dequeue) begin rd_ptr <= rd_ptr + 1; end @@ -67,7 +67,7 @@ module VX_index_queue #( if (enqueue) begin entries[wr_a] <= write_data; - end + end end assign write_addr = wr_a; diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index b284a6449..e56d802e1 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -76,8 +76,8 @@ module VX_mem_coalescer #( `UNUSED_SPARAM (INSTANCE_ID) `STATIC_ASSERT (`IS_DIVISBLE(NUM_REQS * DATA_IN_WIDTH, DATA_OUT_WIDTH), ("invalid parameter")) `STATIC_ASSERT ((NUM_REQS * DATA_IN_WIDTH >= DATA_OUT_WIDTH), ("invalid parameter")) - `RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("%t: invalid request mask", $time)); - `RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("%t: invalid request mask", $time)); + `RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("%t: invalid request mask", $time)) + `RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("%t: invalid request mask", $time)) localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH; // tag + mask + offest @@ -331,30 +331,30 @@ module VX_mem_coalescer #( always @(posedge clk) begin if (out_req_fire) begin if (out_req_rw) begin - `TRACE(1, ("%d: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); - `TRACE(1, (", flags=")); - `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS); - `TRACE(1, (", byteen=")); - `TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS); - `TRACE(1, (", data=")); - `TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS); + `TRACE(1, ("%d: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS) + `TRACE(1, (", flags=")) + `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS) + `TRACE(1, (", byteen=")) + `TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS) + `TRACE(1, (", data=")) + `TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS) end else begin - `TRACE(1, ("%d: %s out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); - `TRACE(1, (", flags=")); - `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS); + `TRACE(1, ("%d: %s out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS) + `TRACE(1, (", flags=")) + `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS) end - `TRACE(1, (", offset=")); - `TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS); - `TRACE(1, (", pmask=%b, coalesced=%0d, tag=0x%0h (#%0d)\n", out_req_pmask, $countones(out_req_pmask), out_req_tag, out_req_uuid)); + `TRACE(1, (", offset=")) + `TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS) + `TRACE(1, (", pmask=%b, coalesced=%0d, tag=0x%0h (#%0d)\n", out_req_pmask, $countones(out_req_pmask), out_req_tag, out_req_uuid)) end if (out_rsp_fire) begin - `TRACE(1, ("%d: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)); - `TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS); - `TRACE(1, (", offset=")); - `TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS); - `TRACE(1, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid)); + `TRACE(1, ("%d: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)) + `TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS) + `TRACE(1, (", offset=")) + `TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS) + `TRACE(1, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid)) end end `endif diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 9599adf13..b0d8704e3 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -97,8 +97,8 @@ module VX_mem_scheduler #( `STATIC_ASSERT (`IS_DIVISBLE(CORE_REQS * WORD_SIZE, LINE_SIZE), ("invalid parameter")) `STATIC_ASSERT ((TAG_WIDTH >= UUID_WIDTH), ("invalid parameter")) `STATIC_ASSERT ((0 == RSP_PARTIAL) || (1 == RSP_PARTIAL), ("invalid parameter")) - `RUNTIME_ASSERT((~core_req_valid || core_req_mask != 0), ("%t: invalid request mask", $time)); - + `RUNTIME_ASSERT((~core_req_valid || core_req_mask != 0), ("%t: invalid request mask", $time)) + wire ibuf_push; wire ibuf_pop; wire [CORE_QUEUE_ADDRW-1:0] ibuf_waddr; @@ -584,41 +584,41 @@ module VX_mem_scheduler #( always @(posedge clk) begin if (core_req_fire) begin if (core_req_rw) begin - `TRACE(1, ("%d: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS); - `TRACE(1, (", byteen=")); - `TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS); - `TRACE(1, (", data=")); - `TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS); + `TRACE(1, ("%d: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS) + `TRACE(1, (", byteen=")) + `TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS) + `TRACE(1, (", data=")) + `TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS) end else begin - `TRACE(1, ("%d: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS); + `TRACE(1, ("%d: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS) end - `TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid)); + `TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid)) end if (core_rsp_valid && core_rsp_ready) begin - `TRACE(1, ("%d: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)); - `TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS); - `TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid)); + `TRACE(1, ("%d: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)) + `TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS) + `TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid)) end if (| mem_req_fire_s) begin if (| mem_req_rw_s) begin - `TRACE(1, ("%d: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)); - `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS); - `TRACE(1, (", byteen=")); - `TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS); - `TRACE(1, (", data=")); - `TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS); + `TRACE(1, ("%d: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) + `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS) + `TRACE(1, (", byteen=")) + `TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS) + `TRACE(1, (", data=")) + `TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS) end else begin - `TRACE(1, ("%d: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)); - `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS); + `TRACE(1, ("%d: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) + `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS) end - `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid)); + `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid)) end if (mem_rsp_fire_s) begin - `TRACE(1, ("%d: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)); - `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS); - `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)); + `TRACE(1, ("%d: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)) + `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS) + `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)) end end `endif diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index c5ba778a2..5ec39438c 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,14 +14,14 @@ `include "VX_platform.vh" `TRACING_OFF -module VX_scope_tap #( +module VX_scope_tap #( parameter SCOPE_ID = 0, // scope identifier parameter SCOPE_IDW = 8, // scope identifier width parameter TRIGGERW = 0, // trigger signals width parameter PROBEW = 0, // probe signal width parameter SIZE = 256, // trace buffer size parameter IDLE_CTRW = 16 // idle time between triggers counter width -) ( +) ( input wire clk, input wire reset, input wire start, @@ -29,16 +29,16 @@ module VX_scope_tap #( input wire [TRIGGERW-1:0] triggers, input wire [PROBEW-1:0] probes, input wire bus_in, - output wire bus_out + output wire bus_out ); localparam TX_DATAW = 64; localparam TX_DATA_BITS = `LOG2UP(TX_DATAW); - localparam DATAW = PROBEW + TRIGGERW; + localparam DATAW = PROBEW + TRIGGERW; localparam DATA_BITS = `LOG2UP(DATAW); localparam ADDRW = `CLOG2(SIZE); localparam TRIGGER_ENABLE = (TRIGGERW != 0); - localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; - + localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; + localparam CTRL_STATE_IDLE = 2'd0; localparam CTRL_STATE_RECV = 2'd1; localparam CTRL_STATE_CMD = 2'd2; @@ -80,7 +80,7 @@ module VX_scope_tap #( reg [TAP_STATE_BITS-1:0] tap_state; reg [CTRL_STATE_BITS-1:0] ctrl_state; reg [GET_TYPE_BITS-1:0] get_type; - + reg [TX_DATA_BITS-1:0] ser_tx_ctr; reg [DATA_BITS-1:0] read_offset; reg [ADDRW-1:0] raddr; @@ -109,20 +109,20 @@ module VX_scope_tap #( case (tap_state) TAP_STATE_IDLE: begin - if (start || cmd_start) begin + if (start || cmd_start) begin delta <= '0; - delta_flush <= 1; + delta_flush <= 1; if (0 == start_delay) begin tap_state <= TAP_STATE_RUN; start_time <= timestamp; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)); + `TRACE(2, ("%d: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) `endif end else begin tap_state <= TAP_STATE_WAIT; - delay_cntr <= start_delay; + delay_cntr <= start_delay; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: delayed start - time=%0d\n", $time, SCOPE_ID, start_delay)); + `TRACE(2, ("%d: *** scope #%0d: delayed start - time=%0d\n", $time, SCOPE_ID, start_delay)) `endif end end @@ -133,13 +133,13 @@ module VX_scope_tap #( tap_state <= TAP_STATE_RUN; start_time <= timestamp; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)); + `TRACE(2, ("%d: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) `endif end end TAP_STATE_RUN: begin if (TRIGGER_ENABLE != 0) begin - if (delta_flush || (triggers != prev_triggers)) begin + if (delta_flush || (triggers != prev_triggers)) begin data_store[waddr] <= {probes, triggers}; delta_store[waddr] <= delta; waddr <= waddr + 1; @@ -150,7 +150,7 @@ module VX_scope_tap #( delta_flush <= (delta == (MAX_IDLE_CTR-1)); end prev_triggers <= triggers; - end else begin + end else begin data_store[waddr] <= {probes, triggers}; delta_store[waddr] <= '0; waddr <= waddr + 1; @@ -158,26 +158,26 @@ module VX_scope_tap #( if (stop || (waddr >= waddr_end)) begin waddr <= waddr; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: recording stop - waddr=(%0d, %0d)\n", $time, SCOPE_ID, waddr, waddr_end)); + `TRACE(2, ("%d: *** scope #%0d: recording stop - waddr=(%0d, %0d)\n", $time, SCOPE_ID, waddr, waddr_end)) `endif - tap_state <= TAP_STATE_IDLE; + tap_state <= TAP_STATE_IDLE; end end default:; endcase - - if (ctrl_state == CTRL_STATE_SEND + + if (ctrl_state == CTRL_STATE_SEND && get_type == GET_TYPE_DATA && ser_tx_ctr == 0) begin if (~read_data) begin read_data <= 1; end else begin if (DATAW > TX_DATAW) begin - `IGNORE_WARNINGS_BEGIN + `IGNORE_WARNINGS_BEGIN if (read_offset < DATA_BITS'(DATAW-TX_DATAW)) begin read_offset <= read_offset + DATA_BITS'(TX_DATAW); end else begin - raddr <= raddr_n; + raddr <= raddr_n; read_data <= 0; read_offset <= '0; end @@ -185,7 +185,7 @@ module VX_scope_tap #( end else begin raddr <= raddr_n; read_data <= 0; - end + end if (raddr_n == waddr) begin raddr <= 0; end @@ -197,9 +197,9 @@ module VX_scope_tap #( // // command controller // - + reg bus_out_r; - + reg [TX_DATAW-1:0] ser_buf_in; wire [TX_DATAW-1:0] ser_buf_in_n = {ser_buf_in[TX_DATAW-2:0], bus_in}; `UNUSED_VAR (ser_buf_in) @@ -210,16 +210,16 @@ module VX_scope_tap #( wire [TX_DATAW-1:0] data_chunk = TX_DATAW'(DATAW'(data_store[raddr] >> read_offset)); wire [TX_DATAW-1:0] get_data = read_data ? data_chunk : TX_DATAW'(delta_store[raddr]); - + always @(posedge clk) begin if (reset) begin ctrl_state <= CTRL_STATE_IDLE; cmd_start <= 0; start_delay <= '0; - waddr_end <= ADDRW'(SIZE-1); - bus_out_r <= 0; + waddr_end <= ADDRW'(SIZE-1); + bus_out_r <= 0; end else begin - bus_out_r <= 0; + bus_out_r <= 0; cmd_start <= 0; case (ctrl_state) @@ -236,9 +236,9 @@ module VX_scope_tap #( ctrl_state <= (cmd_scope_id == SCOPE_ID) ? CTRL_STATE_CMD : CTRL_STATE_IDLE; end end - CTRL_STATE_CMD: begin + CTRL_STATE_CMD: begin ctrl_state <= CTRL_STATE_IDLE; - case (cmd_type) + case (cmd_type) CMD_SET_START: begin start_delay <= 64'(cmd_data); cmd_start <= 1; @@ -249,16 +249,16 @@ module VX_scope_tap #( CMD_GET_WIDTH, CMD_GET_START, CMD_GET_COUNT, - CMD_GET_DATA: begin - ctrl_state <= CTRL_STATE_SEND; + CMD_GET_DATA: begin + ctrl_state <= CTRL_STATE_SEND; get_type <= GET_TYPE_BITS'(cmd_type); ser_tx_ctr <= TX_DATA_BITS'(TX_DATAW-1); bus_out_r <= 1; end default:; - endcase + endcase `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: CMD: type=%0d\n", $time, SCOPE_ID, cmd_type)); + `TRACE(2, ("%d: *** scope #%0d: CMD: type=%0d\n", $time, SCOPE_ID, cmd_type)) `endif end CTRL_STATE_SEND: begin @@ -268,43 +268,43 @@ module VX_scope_tap #( bus_out_r <= 1'(DATAW >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND width=%0d\n", $time, SCOPE_ID, DATAW)); - end - `endif + `TRACE(2, ("%d: *** scope #%0d: SEND width=%0d\n", $time, SCOPE_ID, DATAW)) + end + `endif end GET_TYPE_COUNT: begin bus_out_r <= 1'(count >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND count=%0d\n", $time, SCOPE_ID, count)); - end - `endif + `TRACE(2, ("%d: *** scope #%0d: SEND count=%0d\n", $time, SCOPE_ID, count)) + end + `endif end GET_TYPE_START: begin - bus_out_r <= 1'(start_time >> ser_tx_ctr); + bus_out_r <= 1'(start_time >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND start=%0d\n", $time, SCOPE_ID, start_time)); - end - `endif + `TRACE(2, ("%d: *** scope #%0d: SEND start=%0d\n", $time, SCOPE_ID, start_time)) + end + `endif end GET_TYPE_DATA: begin bus_out_r <= 1'(get_data >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND data=%0d\n", $time, SCOPE_ID, get_data)); - end - `endif + `TRACE(2, ("%d: *** scope #%0d: SEND data=%0d\n", $time, SCOPE_ID, get_data)) + end + `endif end default:; endcase if (ser_tx_ctr == 0) begin ctrl_state <= CTRL_STATE_IDLE; - end + end end default:; endcase - end + end end assign bus_out = bus_out_r; diff --git a/hw/rtl/mem/VX_gbar_unit.sv b/hw/rtl/mem/VX_gbar_unit.sv index 3e5bbebcb..7e03c1378 100644 --- a/hw/rtl/mem/VX_gbar_unit.sv +++ b/hw/rtl/mem/VX_gbar_unit.sv @@ -61,10 +61,10 @@ module VX_gbar_unit #( always @(posedge clk) begin if (gbar_bus_if.req_valid && gbar_bus_if.req_ready) begin `TRACE(1, ("%d: %s acquire: bar_id=%0d, size=%0d, core_id=%0d\n", - $time, INSTANCE_ID, gbar_bus_if.req_id, gbar_bus_if.req_size_m1, gbar_bus_if.req_core_id)); + $time, INSTANCE_ID, gbar_bus_if.req_id, gbar_bus_if.req_size_m1, gbar_bus_if.req_core_id)) end if (gbar_bus_if.rsp_valid) begin - `TRACE(1, ("%d: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)); + `TRACE(1, ("%d: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)) end end `endif diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 700bcb48c..1c03b0387 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -331,15 +331,15 @@ module VX_local_mem import VX_gpu_pkg::*; #( if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin if (mem_bus_if[i].req_data.rw) begin `TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i])); + $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i])) end else begin `TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", - $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, req_uuid[i])); + $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, req_uuid[i])) end end if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin `TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i])); + $time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i])) end end end @@ -349,15 +349,15 @@ module VX_local_mem import VX_gpu_pkg::*; #( if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin if (per_bank_req_rw[i]) begin `TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i])); + $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i])) end else begin `TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", - $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_uuid[i])); + $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_uuid[i])) end end if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin `TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i])); + $time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i])) end end end From cc105eaea96f69bb8ff913893aa13cf1f03eb579 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 14:54:04 -0700 Subject: [PATCH 244/488] tracing refactoring --- hw/rtl/VX_gpu_pkg.sv | 85 +++++++++++++++++++++++++---------- hw/rtl/Vortex.sv | 5 ++- hw/rtl/cache/VX_cache_bank.sv | 12 ++--- hw/rtl/cache/VX_cache_mshr.sv | 23 ++++++---- hw/rtl/cache/VX_cache_tags.sv | 10 +++-- hw/rtl/cache/VX_cache_wrap.sv | 10 +++-- 6 files changed, 99 insertions(+), 46 deletions(-) diff --git a/hw/rtl/VX_gpu_pkg.sv b/hw/rtl/VX_gpu_pkg.sv index 7748b8eec..e5afefe8e 100644 --- a/hw/rtl/VX_gpu_pkg.sv +++ b/hw/rtl/VX_gpu_pkg.sv @@ -467,13 +467,43 @@ package VX_gpu_pkg; case (`INST_SFU_BITS'(op_type)) `INST_SFU_TMC: `TRACE(level, ("TMC")) `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")) - `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")) else `TRACE(level, ("SPLIT")) end + `INST_SFU_SPLIT: begin + if (op_args.wctl.is_neg) begin + `TRACE(level, ("SPLIT.N")) + end else begin + `TRACE(level, ("SPLIT")) + end + end `INST_SFU_JOIN: `TRACE(level, ("JOIN")) `INST_SFU_BAR: `TRACE(level, ("BAR")) - `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")) else `TRACE(level, ("PRED")) end - `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")) else `TRACE(level, ("CSRRW")) end - `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")) else `TRACE(level, ("CSRRS")) end - `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")) else `TRACE(level, ("CSRRC")) end + `INST_SFU_PRED: begin + if (op_args.wctl.is_neg) begin + `TRACE(level, ("PRED.N")) + end else begin + `TRACE(level, ("PRED")) + end + end + `INST_SFU_CSRRW: begin + if (op_args.csr.use_imm) begin + `TRACE(level, ("CSRRWI")) + end else begin + `TRACE(level, ("CSRRW")) + end + end + `INST_SFU_CSRRS: begin + if (op_args.csr.use_imm) begin + `TRACE(level, ("CSRRSI")) + end else begin + `TRACE(level, ("CSRRS")) + end + end + `INST_SFU_CSRRC: begin + if (op_args.csr.use_imm) begin + `TRACE(level, ("CSRRCI")) + end else begin + `TRACE(level, ("CSRRC")) + end + end default: `TRACE(level, ("?")) endcase end @@ -482,60 +512,69 @@ package VX_gpu_pkg; case (`INST_FPU_BITS'(op_type)) `INST_FPU_ADD: begin if (op_args.fpu.fmt[1]) begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FSUB.D")) - else + end else begin `TRACE(level, ("FSUB.S")) + end end else begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FADD.D")) - else + end else begin `TRACE(level, ("FADD.S")) + end end end `INST_FPU_MADD: begin if (op_args.fpu.fmt[1]) begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FMSUB.D")) - else + end else begin `TRACE(level, ("FMSUB.S")) + end end else begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FMADD.D")) - else + end else begin `TRACE(level, ("FMADD.S")) + end end end `INST_FPU_NMADD: begin if (op_args.fpu.fmt[1]) begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FNMSUB.D")) - else + end else begin `TRACE(level, ("FNMSUB.S")) + end end else begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FNMADD.D")) - else + end else begin `TRACE(level, ("FNMADD.S")) + end end end `INST_FPU_MUL: begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FMUL.D")) - else + end else begin `TRACE(level, ("FMUL.S")) + end end `INST_FPU_DIV: begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FDIV.D")) - else + end else begin `TRACE(level, ("FDIV.S")) + end end `INST_FPU_SQRT: begin - if (op_args.fpu.fmt[0]) + if (op_args.fpu.fmt[0]) begin `TRACE(level, ("FSQRT.D")) - else + end else begin `TRACE(level, ("FSQRT.S")) + end end `INST_FPU_CMP: begin if (op_args.fpu.fmt[0]) begin diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index dc9f6f034..0263e1790 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -198,10 +198,11 @@ module Vortex import VX_gpu_pkg::*; ( `ifdef DBG_TRACE_MEM always @(posedge clk) begin if (mem_req_fire) begin - if (mem_req_rw) + if (mem_req_rw) begin `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data)) - else + end else begin `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen)) + end end if (mem_rsp_fire) begin `TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data)) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index e18be4b66..a8355ac76 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -672,21 +672,23 @@ module VX_cache_bank #( `TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)) end if (core_req_fire) begin - if (core_req_rw) + if (core_req_rw) begin `TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)) - else + end else begin `TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)) + end end if (crsp_queue_fire) begin `TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)) end if (mreq_queue_push) begin - if (do_creq_wr_st1 && !WRITEBACK) + if (do_creq_wr_st1 && !WRITEBACK) begin `TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) - else if (do_writeback_st1) + end else if (do_writeback_st1) begin `TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)) - else + end else begin `TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)) + end end end `endif diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index 855b95324..d771a20e0 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -267,32 +267,39 @@ module VX_cache_mshr #( end else begin show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire; end - if (allocate_fire) + if (allocate_fire) begin `TRACE(3, ("%d: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid)) - if (lookup_valid) + end + if (lookup_valid) begin `TRACE(3, ("%d: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid)) - if (finalize_valid) + end + if (finalize_valid) begin `TRACE(3, ("%d: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid)) - if (fill_valid) + end + if (fill_valid) begin `TRACE(3, ("%d: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id)) - if (dequeue_fire) + end + if (dequeue_fire) begin `TRACE(3, ("%d: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid)) + end if (show_table) begin `TRACE(3, ("%d: %s table", $time, INSTANCE_ID)) for (integer i = 0; i < MSHR_SIZE; ++i) begin if (valid_table[i]) begin `TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID))) - if (write_table[i]) + if (write_table[i]) begin `TRACE(3, ("(w)")) - else + end else begin `TRACE(3, ("(r)")) - if (next_table[i]) + end + if (next_table[i]) begin `TRACE(3, ("->%0d", next_index[i])) + end end end `TRACE(3, ("\n")) diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index dc2e77092..b6c3735b5 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -159,15 +159,17 @@ module VX_cache_tags #( end if (lookup && ~stall) begin if (tag_matches != 0) begin - if (write) + if (write) begin `TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) - else + end else begin `TRACE(3, ("%d: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) + end end else begin - if (write) + if (write) begin `TRACE(3, ("%d: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) - else + end else begin `TRACE(3, ("%d: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) + end end end end diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 6210c313e..578768981 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -233,10 +233,11 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (core_req_fire) begin - if (core_bus_if[i].req_data.rw) + if (core_bus_if[i].req_data.rw) begin `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)) - else + end else begin `TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)) + end end if (core_rsp_fire) begin `TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)) @@ -260,12 +261,13 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (mem_req_fire) begin - if (mem_bus_if.req_data.rw) + if (mem_bus_if.req_data.rw) begin `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid)) - else + end else begin `TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid)) + end end if (mem_rsp_fire) begin `TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", From b1dc2fba42709ab2f3b4c335f4d490b9444131b8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 17:47:17 -0700 Subject: [PATCH 245/488] cache read byteenable bug fix --- hw/rtl/cache/VX_cache_bank.sv | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index a8355ac76..4abd7bad9 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -617,9 +617,18 @@ module VX_cache_bank #( assign mreq_queue_flush = creq_flush_st1; if (WRITE_ENABLE) begin - assign mreq_queue_rw = WRITEBACK ? is_fill_or_flush_st1 : rw_st1; - assign mreq_queue_data = WRITEBACK ? dirty_data_st1 : write_data_st1; - assign mreq_queue_byteen = WRITEBACK ? dirty_byteen_st1 : write_byteen_st1; + if (WRITEBACK) begin + assign mreq_queue_rw = is_fill_or_flush_st1; + assign mreq_queue_data = dirty_data_st1; + assign mreq_queue_byteen = is_fill_or_flush_st1 ? dirty_byteen_st1 : '1; + end else begin + assign mreq_queue_rw = rw_st1; + assign mreq_queue_data = write_data_st1; + assign mreq_queue_byteen = rw_st1 ? write_byteen_st1 : '1; + `UNUSED_VAR (is_fill_or_flush_st1) + `UNUSED_VAR (dirty_data_st1) + `UNUSED_VAR (dirty_byteen_st1) + end end else begin assign mreq_queue_rw = 0; assign mreq_queue_data = '0; From 207840a97e36197e3ff16cf5120b2ec4e2c3ad53 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 17:49:28 -0700 Subject: [PATCH 246/488] minor update --- hw/rtl/afu/opae/vortex_afu.sv | 122 ++++++++++++++++------------------ hw/rtl/afu/xrt/VX_afu_wrap.sv | 8 +-- 2 files changed, 62 insertions(+), 68 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 2ebd66fcf..1fbb9d1b7 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -260,7 +260,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ mmio_rsp.data <= 64'({cout_q_dout_s, !cout_q_empty, 8'(state)}); `ifdef DBG_TRACE_AFU if (state != STATE_WIDTH'(mmio_rsp.data)) begin - `TRACE(2, ("%d: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state)) + `TRACE(2, ("%d: AFU: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state)) end `endif end @@ -268,28 +268,28 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ MMIO_SCOPE_READ: begin mmio_rsp.data <= cmd_scope_rdata; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata)) + `TRACE(2, ("%d: AFU: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata)) `endif end `endif MMIO_DEV_CAPS: begin mmio_rsp.data <= dev_caps; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps)) + `TRACE(2, ("%d: AFU: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps)) `endif end MMIO_ISA_CAPS: begin mmio_rsp.data <= isa_caps; `ifdef DBG_TRACE_AFU if (state != STATE_WIDTH'(mmio_rsp.data)) begin - `TRACE(2, ("%d: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps)) + `TRACE(2, ("%d: AFU: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps)) end `endif end default: begin mmio_rsp.data <= 64'h0; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_req_hdr.address)) + `TRACE(2, ("%d: AFU: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_req_hdr.address)) `endif end endcase @@ -303,30 +303,30 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ MMIO_CMD_ARG0: begin cmd_args[0] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%d: AFU: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_ARG1: begin cmd_args[1] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%d: AFU: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_ARG2: begin cmd_args[2] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%d: AFU: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_TYPE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%d: AFU: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end `ifdef SCOPE MMIO_SCOPE_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata)) + `TRACE(2, ("%d: AFU: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata)) `endif end `endif @@ -344,18 +344,10 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire cmd_mem_rd_done; reg cmd_mem_wr_done; - reg vx_busy_wait; - reg vx_running; - wire vx_busy; - reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; - always @(posedge clk) begin - if (state == STATE_RUN) begin - vx_reset_ctr <= vx_reset_ctr + $bits(vx_reset_ctr)'(1); - end else begin - vx_reset_ctr <= '0; - end - end + reg vx_busy_wait; + reg vx_reset = 1; // asserted at initialization + wire vx_busy; wire is_mmio_wr_cmd = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_req_hdr.address); wire [CMD_TYPE_WIDTH-1:0] cmd_type = is_mmio_wr_cmd ? @@ -363,37 +355,37 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ always @(posedge clk) begin if (reset) begin - state <= STATE_IDLE; - vx_busy_wait <= 0; - vx_running <= 0; + state <= STATE_IDLE; + vx_reset <= 1; end else begin case (state) STATE_IDLE: begin case (cmd_type) CMD_MEM_READ: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) + `TRACE(2, ("%d: AFU: Goto STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) `endif state <= STATE_MEM_READ; end CMD_MEM_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) + `TRACE(2, ("%d: AFU: Goto STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) `endif state <= STATE_MEM_WRITE; end CMD_DCR_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data)) + `TRACE(2, ("%d: AFU: Goto STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data)) `endif state <= STATE_DCR_WRITE; end CMD_RUN: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE RUN\n", $time)) + `TRACE(2, ("%d: AFU: Goto STATE RUN\n", $time)) `endif state <= STATE_RUN; - vx_running <= 0; + vx_reset_ctr <= (`RESET_DELAY-1); + vx_reset <= 1; end default: begin state <= state; @@ -404,54 +396,56 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ if (cmd_mem_rd_done) begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE IDLE\n", $time)) + `TRACE(2, ("%d: AFU: Goto STATE IDLE\n", $time)) `endif end end STATE_MEM_WRITE: begin if (cmd_mem_wr_done) begin state <= STATE_IDLE; - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE IDLE\n", $time)) - `endif end end STATE_DCR_WRITE: begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE IDLE\n", $time)) + `TRACE(2, ("%d: AFU: Goto STATE IDLE\n", $time)) `endif end STATE_RUN: begin - if (vx_running) begin - if (vx_busy_wait) begin - // wait until the gpu goes busy - if (vx_busy) begin - vx_busy_wait <= 0; - end - end else begin - // wait until the gpu is not busy - if (~vx_busy) begin - state <= STATE_IDLE; - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: End execution\n", $time)) - `TRACE(2, ("%d: STATE IDLE\n", $time)) - `endif - end - end + if (vx_reset) begin + // wait until the reset network is ready + if (vx_reset_ctr == 0) begin + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%d: AFU: Begin execution\n", $time)) + `endif + vx_busy_wait <= 1; + vx_reset <= 0; + end end else begin - // wait until the reset sequence is complete - if (vx_reset_ctr == (`RESET_DELAY-1)) begin - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Begin execution\n", $time)) - `endif - vx_running <= 1; - vx_busy_wait <= 1; - end + if (vx_busy_wait) begin + // wait until processor goes busy + if (vx_busy) begin + vx_busy_wait <= 0; + end + end else begin + // wait until the processor is not busy + if (~vx_busy) begin + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%d: AFU: End execution\n", $time)) + `TRACE(2, ("%d: AFU: Goto STATE IDLE\n", $time)) + `endif + state <= STATE_IDLE; + end + end end end default:; endcase + + // ensure reset network initialization + if (vx_reset_ctr != '0) begin + vx_reset_ctr <= vx_reset_ctr - 1; + end end end @@ -745,7 +739,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_rd_req_addr <= cci_rd_req_addr + 1; cci_rd_req_ctr <= cci_rd_req_ctr + $bits(cci_rd_req_ctr)'(1); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads)) + `TRACE(2, ("%d: AFU: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads)) `endif end @@ -755,13 +749,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE); end `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data)) + `TRACE(2, ("%d: AFU: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data)) `endif end if (cci_rdq_pop) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads)) + `TRACE(2, ("%d: AFU: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads)) `endif end @@ -899,13 +893,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_wr_req_done <= 1; end `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data)) + `TRACE(2, ("%d: AFU: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data)) `endif end if (cci_wr_rsp_fire) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes)) + `TRACE(2, ("%d: AFU: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes)) `endif end end @@ -933,7 +927,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ `SCOPE_IO_BIND (1) .clk (clk), - .reset (reset || ~vx_running), + .reset (vx_reset), // Memory request .mem_req_valid (vx_mem_req_valid), diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 1efda8029..d2a3f4c51 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -126,17 +126,16 @@ module VX_afu_wrap #( if (reset || ap_reset) begin state <= STATE_IDLE; vx_pending_writes <= '0; - vx_reset_ctr <= (`RESET_DELAY-1); vx_reset <= 1; end else begin case (state) STATE_IDLE: begin if (ap_start) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE RUN\n", $time)) + `TRACE(2, ("%d: AFU: Goto STATE RUN\n", $time)) `endif state <= STATE_RUN; - vx_reset_ctr <= 0; + vx_reset_ctr <= (`RESET_DELAY-1); vx_reset <= 1; end end @@ -161,6 +160,7 @@ module VX_afu_wrap #( if (~vx_busy) begin `ifdef DBG_TRACE_AFU `TRACE(2, ("%d: AFU: End execution\n", $time)) + `TRACE(2, ("%d: AFU: Goto STATE IDLE\n", $time)) `endif state <= STATE_IDLE; end @@ -170,7 +170,7 @@ module VX_afu_wrap #( endcase // ensure reset network initialization - if (vx_reset_ctr != 0) begin + if (vx_reset_ctr != '0) begin vx_reset_ctr <= vx_reset_ctr - 1; end From 202af1e783362f5feab4932eccf64d4d15ecb6c4 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 20:33:27 -0700 Subject: [PATCH 247/488] rtl bug fix --- hw/rtl/fpu/VX_fpu_dsp.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index 22e2b652d..a04f96c3b 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -255,7 +255,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .TAG_WIDTH (TAG_WIDTH) ) fpu_sqrt ( .clk (clk), - .reset (div_sqrt_reset), + .reset (reset), .valid_in (div_sqrt_valid_in[1]), .ready_in (div_sqrt_ready_in[1]), .mask_in (div_sqrt_mask_in[1]), From b56aa00f4f282b39fc5df11929b19f11a7b84a99 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 8 Sep 2024 20:37:28 -0700 Subject: [PATCH 248/488] reset cleanup --- hw/rtl/VX_cluster.sv | 6 ++---- hw/rtl/core/VX_core.sv | 23 +++++++---------------- hw/rtl/core/VX_execute.sv | 14 ++++---------- 3 files changed, 13 insertions(+), 30 deletions(-) diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index ef845ae07..3e9324437 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -56,14 +56,12 @@ module VX_cluster import VX_gpu_pkg::*; #( VX_gbar_bus_if per_socket_gbar_bus_if[`NUM_SOCKETS](); VX_gbar_bus_if gbar_bus_if(); - `RESET_RELAY (gbar_reset, reset); - VX_gbar_arb #( .NUM_REQS (`NUM_SOCKETS), .OUT_BUF ((`NUM_SOCKETS > 2) ? 1 : 0) // bgar_unit has no backpressure ) gbar_arb ( .clk (clk), - .reset (gbar_reset), + .reset (reset), .bus_in_if (per_socket_gbar_bus_if), .bus_out_if (gbar_bus_if) ); @@ -72,7 +70,7 @@ module VX_cluster import VX_gpu_pkg::*; #( .INSTANCE_ID ($sformatf("gbar%0d", CLUSTER_ID)) ) gbar_unit ( .clk (clk), - .reset (gbar_reset), + .reset (reset), .gbar_bus_if (gbar_bus_if) ); diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index f97370e89..d9f3de687 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -75,13 +75,6 @@ module VX_core import VX_gpu_pkg::*; #( assign mem_perf_tmp_if.mem = mem_perf_if.mem; `endif - `RESET_RELAY (schedule_reset, reset); - `RESET_RELAY (fetch_reset, reset); - `RESET_RELAY (decode_reset, reset); - `RESET_RELAY (issue_reset, reset); - `RESET_RELAY (execute_reset, reset); - `RESET_RELAY (commit_reset, reset); - base_dcrs_t base_dcrs; VX_dcr_data dcr_data ( @@ -98,7 +91,7 @@ module VX_core import VX_gpu_pkg::*; #( .CORE_ID (CORE_ID) ) schedule ( .clk (clk), - .reset (schedule_reset), + .reset (reset), `ifdef PERF_ENABLE .sched_perf (pipeline_perf_if.sched), @@ -126,7 +119,7 @@ module VX_core import VX_gpu_pkg::*; #( ) fetch ( `SCOPE_IO_BIND (0) .clk (clk), - .reset (fetch_reset), + .reset (reset), .icache_bus_if (icache_bus_if), .schedule_if (schedule_if), .fetch_if (fetch_if) @@ -136,7 +129,7 @@ module VX_core import VX_gpu_pkg::*; #( .INSTANCE_ID ($sformatf("%s-decode", INSTANCE_ID)) ) decode ( .clk (clk), - .reset (decode_reset), + .reset (reset), .fetch_if (fetch_if), .decode_if (decode_if), .decode_sched_if(decode_sched_if) @@ -148,7 +141,7 @@ module VX_core import VX_gpu_pkg::*; #( `SCOPE_IO_BIND (1) .clk (clk), - .reset (issue_reset), + .reset (reset), `ifdef PERF_ENABLE .issue_perf (pipeline_perf_if.issue), @@ -166,7 +159,7 @@ module VX_core import VX_gpu_pkg::*; #( `SCOPE_IO_BIND (2) .clk (clk), - .reset (execute_reset), + .reset (reset), `ifdef PERF_ENABLE .mem_perf_if (mem_perf_tmp_if), @@ -191,7 +184,7 @@ module VX_core import VX_gpu_pkg::*; #( .INSTANCE_ID ($sformatf("%s-commit", INSTANCE_ID)) ) commit ( .clk (clk), - .reset (commit_reset), + .reset (reset), .commit_if (commit_if), @@ -201,13 +194,11 @@ module VX_core import VX_gpu_pkg::*; #( .commit_sched_if(commit_sched_if) ); - `RESET_RELAY (lmem_unit_reset, reset); - VX_mem_unit #( .INSTANCE_ID (INSTANCE_ID) ) mem_unit ( .clk (clk), - .reset (lmem_unit_reset), + .reset (reset), `ifdef PERF_ENABLE .lmem_perf (mem_perf_tmp_if.lmem), `endif diff --git a/hw/rtl/core/VX_execute.sv b/hw/rtl/core/VX_execute.sv index ded25918c..6c148649b 100644 --- a/hw/rtl/core/VX_execute.sv +++ b/hw/rtl/core/VX_execute.sv @@ -51,15 +51,11 @@ module VX_execute import VX_gpu_pkg::*; #( VX_fpu_csr_if fpu_csr_if[`NUM_FPU_BLOCKS](); `endif - `RESET_RELAY (alu_reset, reset); - `RESET_RELAY (lsu_reset, reset); - `RESET_RELAY (sfu_reset, reset); - VX_alu_unit #( .INSTANCE_ID ($sformatf("%s-alu", INSTANCE_ID)) ) alu_unit ( .clk (clk), - .reset (alu_reset), + .reset (reset), .dispatch_if (dispatch_if[`EX_ALU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .commit_if (commit_if[`EX_ALU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .branch_ctl_if (branch_ctl_if) @@ -72,20 +68,18 @@ module VX_execute import VX_gpu_pkg::*; #( ) lsu_unit ( `SCOPE_IO_BIND (0) .clk (clk), - .reset (lsu_reset), + .reset (reset), .dispatch_if (dispatch_if[`EX_LSU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .commit_if (commit_if[`EX_LSU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .lsu_mem_if (lsu_mem_if) ); `ifdef EXT_F_ENABLE - `RESET_RELAY (fpu_reset, reset); - VX_fpu_unit #( .INSTANCE_ID ($sformatf("%s-fpu", INSTANCE_ID)) ) fpu_unit ( .clk (clk), - .reset (fpu_reset), + .reset (reset), .dispatch_if (dispatch_if[`EX_FPU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .commit_if (commit_if[`EX_FPU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .fpu_csr_if (fpu_csr_if) @@ -97,7 +91,7 @@ module VX_execute import VX_gpu_pkg::*; #( .CORE_ID (CORE_ID) ) sfu_unit ( .clk (clk), - .reset (sfu_reset), + .reset (reset), `ifdef PERF_ENABLE .mem_perf_if (mem_perf_if), .pipeline_perf_if (pipeline_perf_if), From 63840a20da52a951ec21d8902e59519b40cdc40b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 9 Sep 2024 06:10:56 -0700 Subject: [PATCH 249/488] minor update --- hw/rtl/core/VX_schedule.sv | 3 ++- hw/rtl/core/VX_uuid_gen.sv | 7 ++++--- hw/rtl/libs/VX_generic_arbiter.sv | 2 +- hw/rtl/libs/VX_mem_scheduler.sv | 11 +++++------ 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 6916d3e00..77e00156b 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -334,7 +334,8 @@ module VX_schedule import VX_gpu_pkg::*; #( wire [`UUID_WIDTH-1:0] instr_uuid; `ifndef NDEBUG VX_uuid_gen #( - .CORE_ID (CORE_ID) + .CORE_ID (CORE_ID), + .UUID_WIDTH (`UUID_WIDTH) ) uuid_gen ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_uuid_gen.sv b/hw/rtl/core/VX_uuid_gen.sv index 8dca50e91..cbde9091d 100644 --- a/hw/rtl/core/VX_uuid_gen.sv +++ b/hw/rtl/core/VX_uuid_gen.sv @@ -14,15 +14,16 @@ `include "VX_define.vh" module VX_uuid_gen import VX_gpu_pkg::*; #( - parameter CORE_ID = 0 + parameter CORE_ID = 0, + parameter UUID_WIDTH = 48 ) ( input wire clk, input wire reset, input wire incr, input wire [`NW_WIDTH-1:0] wid, - output wire [`UUID_WIDTH-1:0] uuid + output wire [UUID_WIDTH-1:0] uuid ); - localparam GNW_WIDTH = `UUID_WIDTH - 32; + localparam GNW_WIDTH = UUID_WIDTH - 32; reg [31:0] uuid_cntrs [0:`NUM_WARPS-1]; reg [`NUM_WARPS-1:0] has_uuid_cntrs; diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index 3a3737d04..5cc9a9aab 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -90,7 +90,7 @@ module VX_generic_arbiter #( end - `RUNTIME_ASSERT ((~(| requests) || (grant_valid && (requests[grant_index] != 0) && (grant_onehot == (NUM_REQS'(1) << grant_index)))), ("%t: invalid arbiter grant!", $time)) + `RUNTIME_ASSERT (((~(| requests) != 1) || (grant_valid && (requests[grant_index] != 0) && (grant_onehot == (NUM_REQS'(1) << grant_index)))), ("%t: invalid arbiter grant!", $time)) endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index b0d8704e3..24ad5cdf1 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -96,9 +96,8 @@ module VX_mem_scheduler #( `STATIC_ASSERT (`IS_DIVISBLE(CORE_REQS * WORD_SIZE, LINE_SIZE), ("invalid parameter")) `STATIC_ASSERT ((TAG_WIDTH >= UUID_WIDTH), ("invalid parameter")) - `STATIC_ASSERT ((0 == RSP_PARTIAL) || (1 == RSP_PARTIAL), ("invalid parameter")) `RUNTIME_ASSERT((~core_req_valid || core_req_mask != 0), ("%t: invalid request mask", $time)) - + wire ibuf_push; wire ibuf_pop; wire [CORE_QUEUE_ADDRW-1:0] ibuf_waddr; @@ -435,7 +434,7 @@ module VX_mem_scheduler #( end end - if (RSP_PARTIAL == 1) begin + if (RSP_PARTIAL != 0) begin reg [CORE_QUEUE_SIZE-1:0] rsp_sop_r; @@ -462,14 +461,14 @@ module VX_mem_scheduler #( end else begin reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; - reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n; + reg [CORE_BATCHES-1:00][CORE_CHANNELS-1:0][WORD_WIDTH-1:0] rsp_store_n; reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; always @(*) begin rsp_store_n = rsp_store[ibuf_raddr]; for (integer i = 0; i < CORE_CHANNELS; ++i) begin if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin - rsp_store_n[(rsp_batch_idx * CORE_CHANNELS + i) * WORD_WIDTH +: WORD_WIDTH] = mem_rsp_data_s[i]; + rsp_store_n[rsp_batch_idx][i] = mem_rsp_data_s[i]; end end end @@ -490,7 +489,7 @@ module VX_mem_scheduler #( for (genvar r = 0; r < CORE_REQS; ++r) begin localparam i = r / CORE_CHANNELS; localparam j = r % CORE_CHANNELS; - assign crsp_data[r] = rsp_store_n[(i * CORE_CHANNELS + j) * WORD_WIDTH +: WORD_WIDTH]; + assign crsp_data[r] = rsp_store_n[i][j]; end assign mem_rsp_ready_s = crsp_ready || ~rsp_complete; From 83d65e2cf16c913a7242cb8318231bed5138da35 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 10 Sep 2024 16:22:34 -0700 Subject: [PATCH 250/488] tracing update --- hw/rtl/Vortex.sv | 6 ++-- hw/rtl/afu/opae/vortex_afu.sv | 56 +++++++++++++++++------------------ hw/rtl/afu/xrt/VX_afu_wrap.sv | 16 +++++----- hw/rtl/cache/VX_cache_bank.sv | 18 +++++------ hw/rtl/cache/VX_cache_data.sv | 8 ++--- hw/rtl/cache/VX_cache_mshr.sv | 12 ++++---- hw/rtl/cache/VX_cache_tags.sv | 14 ++++----- hw/rtl/cache/VX_cache_wrap.sv | 12 ++++---- hw/rtl/core/VX_alu_int.sv | 2 +- hw/rtl/core/VX_commit.sv | 2 +- hw/rtl/core/VX_dcr_data.sv | 2 +- hw/rtl/core/VX_decode.sv | 2 +- hw/rtl/core/VX_fetch.sv | 4 +-- hw/rtl/core/VX_issue_slice.sv | 2 +- hw/rtl/core/VX_lsu_slice.sv | 8 ++--- hw/rtl/core/VX_scoreboard.sv | 2 +- 16 files changed, 83 insertions(+), 83 deletions(-) diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 0263e1790..fd7ef0267 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -199,13 +199,13 @@ module Vortex import VX_gpu_pkg::*; ( always @(posedge clk) begin if (mem_req_fire) begin if (mem_req_rw) begin - `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data)) + `TRACE(1, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data)) end else begin - `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen)) + `TRACE(1, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen)) end end if (mem_rsp_fire) begin - `TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data)) + `TRACE(1, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data)) end end `endif diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 1fbb9d1b7..d97be483d 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -260,7 +260,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ mmio_rsp.data <= 64'({cout_q_dout_s, !cout_q_empty, 8'(state)}); `ifdef DBG_TRACE_AFU if (state != STATE_WIDTH'(mmio_rsp.data)) begin - `TRACE(2, ("%d: AFU: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state)) + `TRACE(2, ("%t: AFU: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state)) end `endif end @@ -268,28 +268,28 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ MMIO_SCOPE_READ: begin mmio_rsp.data <= cmd_scope_rdata; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata)) + `TRACE(2, ("%t: AFU: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata)) `endif end `endif MMIO_DEV_CAPS: begin mmio_rsp.data <= dev_caps; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps)) + `TRACE(2, ("%t: AFU: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps)) `endif end MMIO_ISA_CAPS: begin mmio_rsp.data <= isa_caps; `ifdef DBG_TRACE_AFU if (state != STATE_WIDTH'(mmio_rsp.data)) begin - `TRACE(2, ("%d: AFU: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps)) + `TRACE(2, ("%t: AFU: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps)) end `endif end default: begin mmio_rsp.data <= 64'h0; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_req_hdr.address)) + `TRACE(2, ("%t: AFU: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_req_hdr.address)) `endif end endcase @@ -303,36 +303,36 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ MMIO_CMD_ARG0: begin cmd_args[0] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%t: AFU: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_ARG1: begin cmd_args[1] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%t: AFU: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_ARG2: begin cmd_args[2] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%t: AFU: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end MMIO_CMD_TYPE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%t: AFU: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end `ifdef SCOPE MMIO_SCOPE_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata)) + `TRACE(2, ("%t: AFU: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata)) `endif end `endif default: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_req_hdr.address, 64'(cp2af_sRxPort.c0.data))) + `TRACE(2, ("%t: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_req_hdr.address, 64'(cp2af_sRxPort.c0.data))) `endif end endcase @@ -363,25 +363,25 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ case (cmd_type) CMD_MEM_READ: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Goto STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) + `TRACE(2, ("%t: AFU: Goto STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) `endif state <= STATE_MEM_READ; end CMD_MEM_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Goto STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) + `TRACE(2, ("%t: AFU: Goto STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) `endif state <= STATE_MEM_WRITE; end CMD_DCR_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Goto STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data)) + `TRACE(2, ("%t: AFU: Goto STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data)) `endif state <= STATE_DCR_WRITE; end CMD_RUN: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Goto STATE RUN\n", $time)) + `TRACE(2, ("%t: AFU: Goto STATE RUN\n", $time)) `endif state <= STATE_RUN; vx_reset_ctr <= (`RESET_DELAY-1); @@ -396,7 +396,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ if (cmd_mem_rd_done) begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Goto STATE IDLE\n", $time)) + `TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time)) `endif end end @@ -408,7 +408,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ STATE_DCR_WRITE: begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Goto STATE IDLE\n", $time)) + `TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time)) `endif end STATE_RUN: begin @@ -416,7 +416,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // wait until the reset network is ready if (vx_reset_ctr == 0) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Begin execution\n", $time)) + `TRACE(2, ("%t: AFU: Begin execution\n", $time)) `endif vx_busy_wait <= 1; vx_reset <= 0; @@ -431,8 +431,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // wait until the processor is not busy if (~vx_busy) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: End execution\n", $time)) - `TRACE(2, ("%d: AFU: Goto STATE IDLE\n", $time)) + `TRACE(2, ("%t: AFU: End execution\n", $time)) + `TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time)) `endif state <= STATE_IDLE; end @@ -739,7 +739,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_rd_req_addr <= cci_rd_req_addr + 1; cci_rd_req_ctr <= cci_rd_req_ctr + $bits(cci_rd_req_ctr)'(1); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads)) + `TRACE(2, ("%t: AFU: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads)) `endif end @@ -749,13 +749,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE); end `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data)) + `TRACE(2, ("%t: AFU: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data)) `endif end if (cci_rdq_pop) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads)) + `TRACE(2, ("%t: AFU: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads)) `endif end @@ -893,13 +893,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_wr_req_done <= 1; end `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data)) + `TRACE(2, ("%t: AFU: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data)) `endif end if (cci_wr_rsp_fire) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes)) + `TRACE(2, ("%t: AFU: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes)) `endif end end @@ -1080,13 +1080,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ always @(posedge clk) begin for (integer i = 0; i < NUM_LOCAL_MEM_BANKS; ++i) begin if (avs_write[i] && ~avs_waitrequest[i]) begin - `TRACE(2, ("%d: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i])) + `TRACE(2, ("%t: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i])) end if (avs_read[i] && ~avs_waitrequest[i]) begin - `TRACE(2, ("%d: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i])) + `TRACE(2, ("%t: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i])) end if (avs_readdatavalid[i]) begin - `TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i])) + `TRACE(2, ("%t: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i])) end end end diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index d2a3f4c51..ff07cc09a 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -132,7 +132,7 @@ module VX_afu_wrap #( STATE_IDLE: begin if (ap_start) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Goto STATE RUN\n", $time)) + `TRACE(2, ("%t: AFU: Goto STATE RUN\n", $time)) `endif state <= STATE_RUN; vx_reset_ctr <= (`RESET_DELAY-1); @@ -144,7 +144,7 @@ module VX_afu_wrap #( // wait until the reset network is ready if (vx_reset_ctr == 0) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Begin execution\n", $time)) + `TRACE(2, ("%t: AFU: Begin execution\n", $time)) `endif vx_busy_wait <= 1; vx_reset <= 0; @@ -159,8 +159,8 @@ module VX_afu_wrap #( // wait until the processor is not busy if (~vx_busy) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: End execution\n", $time)) - `TRACE(2, ("%d: AFU: Goto STATE IDLE\n", $time)) + `TRACE(2, ("%t: AFU: End execution\n", $time)) + `TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time)) `endif state <= STATE_IDLE; end @@ -365,16 +365,16 @@ module VX_afu_wrap #( always @(posedge ap_clk) begin for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin if (m_axi_mem_awvalid_a[i] && m_axi_mem_awready_a[i]) begin - `TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i])) + `TRACE(2, ("%t: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i])) end if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin - `TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i])) + `TRACE(2, ("%t: AFU Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i])) end if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin - `TRACE(2, ("%d: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i])) + `TRACE(2, ("%t: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i])) end if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin - `TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i])) + `TRACE(2, ("%t: AVS Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i])) end end end diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 4abd7bad9..181f63fbf 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -672,31 +672,31 @@ module VX_cache_bank #( && ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire); always @(posedge clk) begin if (input_stall || pipe_stall) begin - `TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1)) + `TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1)) end if (mem_rsp_fire) begin - `TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data)) + `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data)) end if (replay_fire) begin - `TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)) + `TRACE(2, ("%t: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)) end if (core_req_fire) begin if (core_req_rw) begin - `TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)) + `TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)) end else begin - `TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)) + `TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)) end end if (crsp_queue_fire) begin - `TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)) + `TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)) end if (mreq_queue_push) begin if (do_creq_wr_st1 && !WRITEBACK) begin - `TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) + `TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) end else if (do_writeback_st1) begin - `TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)) + `TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)) end else begin - `TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)) + `TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)) end end end diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 12e0e1ca3..c4713f813 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -182,16 +182,16 @@ module VX_cache_data #( `ifdef DBG_TRACE_CACHE always @(posedge clk) begin if (fill && ~stall) begin - `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)) + `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)) end if (flush && ~stall) begin - `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data)) + `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data)) end if (read && ~stall) begin - `TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid)) + `TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid)) end if (write && ~stall) begin - `TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)) + `TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)) end end `endif diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index d771a20e0..d51d0f0d4 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -268,27 +268,27 @@ module VX_cache_mshr #( show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire; end if (allocate_fire) begin - `TRACE(3, ("%d: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid)) end if (lookup_valid) begin - `TRACE(3, ("%d: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid)) end if (finalize_valid) begin - `TRACE(3, ("%d: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid)) end if (fill_valid) begin - `TRACE(3, ("%d: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id)) end if (dequeue_fire) begin - `TRACE(3, ("%d: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid)) end if (show_table) begin - `TRACE(3, ("%d: %s table", $time, INSTANCE_ID)) + `TRACE(3, ("%t: %s table", $time, INSTANCE_ID)) for (integer i = 0; i < MSHR_SIZE; ++i) begin if (valid_table[i]) begin `TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID))) diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index b6c3735b5..4d9fc81de 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -149,26 +149,26 @@ module VX_cache_tags #( wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel}; always @(posedge clk) begin if (fill && ~stall) begin - `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) + `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) end if (init) begin - `TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel)) + `TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel)) end if (flush && ~stall) begin - `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty)) + `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty)) end if (lookup && ~stall) begin if (tag_matches != 0) begin if (write) begin - `TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) + `TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) end else begin - `TRACE(3, ("%d: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) + `TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) end end else begin if (write) begin - `TRACE(3, ("%d: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) + `TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) end else begin - `TRACE(3, ("%d: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) + `TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) end end end diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 578768981..502e5b9d4 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -234,13 +234,13 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (core_req_fire) begin if (core_bus_if[i].req_data.rw) begin - `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)) + `TRACE(1, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)) end else begin - `TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)) + `TRACE(1, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)) end end if (core_rsp_fire) begin - `TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)) + `TRACE(1, ("%t: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)) end end end @@ -262,15 +262,15 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (mem_req_fire) begin if (mem_bus_if.req_data.rw) begin - `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", + `TRACE(1, ("%t: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid)) end else begin - `TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", + `TRACE(1, ("%t: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid)) end end if (mem_rsp_fire) begin - `TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", + `TRACE(1, ("%t: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid)) end end diff --git a/hw/rtl/core/VX_alu_int.sv b/hw/rtl/core/VX_alu_int.sv index 083438e88..04d123860 100644 --- a/hw/rtl/core/VX_alu_int.sv +++ b/hw/rtl/core/VX_alu_int.sv @@ -194,7 +194,7 @@ module VX_alu_int #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (br_enable) begin - `TRACE(1, ("%d: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", + `TRACE(1, ("%t: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", $time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid)) end end diff --git a/hw/rtl/core/VX_commit.sv b/hw/rtl/core/VX_commit.sv index f993c9648..acfae9e4d 100644 --- a/hw/rtl/core/VX_commit.sv +++ b/hw/rtl/core/VX_commit.sv @@ -178,7 +178,7 @@ module VX_commit import VX_gpu_pkg::*; #( for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin always @(posedge clk) begin if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin - `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0})) + `TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0})) trace_ex_type(1, j); `TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop)) `TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS) diff --git a/hw/rtl/core/VX_dcr_data.sv b/hw/rtl/core/VX_dcr_data.sv index 03c5be61f..042c87e55 100644 --- a/hw/rtl/core/VX_dcr_data.sv +++ b/hw/rtl/core/VX_dcr_data.sv @@ -50,7 +50,7 @@ module VX_dcr_data import VX_gpu_pkg::*; ( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (dcr_bus_if.write_valid) begin - `TRACE(1, ("%d: base-dcr: state=", $time)) + `TRACE(1, ("%t: base-dcr: state=", $time)) trace_base_dcr(1, dcr_bus_if.write_addr); `TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data)) end diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index 28d27a299..79a8d9c3d 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -568,7 +568,7 @@ module VX_decode import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (decode_if.valid && decode_if.ready) begin - `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr)) + `TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr)) trace_ex_type(1, decode_if.data.ex_type); `TRACE(1, (", op=")) trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args); diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 46283818a..dab4772db 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -168,10 +168,10 @@ module VX_fetch import VX_gpu_pkg::*; #( wire fetch_fire = fetch_if.valid && fetch_if.ready; always @(posedge clk) begin if (schedule_fire) begin - `TRACE(1, ("%d: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid)) + `TRACE(1, ("%t: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid)) end if (fetch_fire) begin - `TRACE(1, ("%d: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid)) + `TRACE(1, ("%t: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid)) end end `endif diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index a99bf2c8f..63d811328 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -135,7 +135,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (operands_if.valid && operands_if.ready) begin - `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})) + `TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})) trace_ex_type(1, operands_if.data.ex_type); `TRACE(1, (", op=")) trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args); diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index bd82aee31..b880eee2e 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -505,11 +505,11 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_MEM always @(posedge clk) begin if (execute_if.valid && fence_lock) begin - `TRACE(1, ("%d: *** %s fence wait\n", $time, INSTANCE_ID)) + `TRACE(1, ("%t: *** %s fence wait\n", $time, INSTANCE_ID)) end if (mem_req_fire) begin if (mem_req_rw) begin - `TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) + `TRACE(1, ("%t: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES) `TRACE(1, (", flags=")) `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES) @@ -517,7 +517,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES) `TRACE(1, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)) end else begin - `TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) + `TRACE(1, ("%t: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES) `TRACE(1, (", flags=")) `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES) @@ -525,7 +525,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( end end if (mem_rsp_fire) begin - `TRACE(1, ("%d: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=", + `TRACE(1, ("%t: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=", $time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop)) `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES) `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid)) diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index 14d88b8b1..b4fd5c08c 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -206,7 +206,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end else begin if (staging_if[w].valid && ~staging_if[w].ready) begin `ifdef DBG_TRACE_PIPELINE - `TRACE(3, ("%d: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n", + `TRACE(3, ("%t: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n", $time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr, operands_busy, staging_if[w].data.uuid)) `endif From ae24264a2a39a42e4b746a14d88dac1032da1c6e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 11 Sep 2024 05:40:05 -0700 Subject: [PATCH 251/488] minor update --- hw/rtl/VX_socket.sv | 14 +++++++------- hw/rtl/core/VX_ipdom_stack.sv | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index a6e58ebd1..17a027c95 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -178,13 +178,13 @@ module VX_socket import VX_gpu_pkg::*; #( `ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[1], dcache_mem_bus_if, L1_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH); VX_mem_arb #( - .NUM_INPUTS (2), - .DATA_SIZE (`L1_LINE_SIZE), - .TAG_WIDTH (L1_MEM_TAG_WIDTH), - .TAG_SEL_IDX (0), - .ARBITER ("P"), // prioritize the icache - .REQ_OUT_BUF (3), - .RSP_OUT_BUF (3) + .NUM_INPUTS (2), + .DATA_SIZE (`L1_LINE_SIZE), + .TAG_WIDTH (L1_MEM_TAG_WIDTH), + .TAG_SEL_IDX(0), + .ARBITER ("P"), // prioritize the icache + .REQ_OUT_BUF(3), + .RSP_OUT_BUF(3) ) mem_arb ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_ipdom_stack.sv b/hw/rtl/core/VX_ipdom_stack.sv index 0ec05cbae..ded232f30 100644 --- a/hw/rtl/core/VX_ipdom_stack.sv +++ b/hw/rtl/core/VX_ipdom_stack.sv @@ -48,9 +48,9 @@ module VX_ipdom_stack #( empty_r <= 1; full_r <= 0; end else begin - `ASSERT(~push || ~full, ("runtime error: writing to a full stack!")); - `ASSERT(~pop || ~empty, ("runtime error: reading an empty stack!")); - `ASSERT(~push || ~pop, ("runtime error: push and pop in same cycle not supported!")); + `ASSERT(~push || ~full, ("%t: runtime error: writing to a full stack!", $time)); + `ASSERT(~pop || ~empty, ("%t: runtime error: reading an empty stack!", $time)); + `ASSERT(~push || ~pop, ("%t: runtime error: push and pop in same cycle not supported!", $time)); if (push) begin rd_ptr <= wr_ptr; wr_ptr <= wr_ptr + ADDRW'(1); From bb9ae8576dba625758376d9cd155ede3c8a44bcc Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 11 Sep 2024 06:47:33 -0700 Subject: [PATCH 252/488] adding uuid support to memory transactions --- hw/rtl/VX_define.vh | 16 +++++----- hw/rtl/VX_gpu_pkg.sv | 8 ++--- hw/rtl/Vortex.sv | 9 ++++-- hw/rtl/Vortex_axi.sv | 4 ++- hw/rtl/afu/xrt/VX_afu_wrap.sv | 12 +++---- hw/rtl/cache/VX_cache.sv | 54 +++++++++++++++++++------------- hw/rtl/cache/VX_cache_bank.sv | 47 ++++++++++++++++++++------- hw/rtl/cache/VX_cache_cluster.sv | 4 +-- hw/rtl/cache/VX_cache_define.vh | 1 - hw/rtl/cache/VX_cache_flush.sv | 20 ++++++++++++ hw/rtl/cache/VX_cache_wrap.sv | 9 +++--- 11 files changed, 121 insertions(+), 63 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 69b14c748..f42f0b018 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -264,14 +264,14 @@ /////////////////////////////////////////////////////////////////////////////// -`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks) \ - (`CLOG2(mshr_size) + `CLOG2(num_banks)) +`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width) \ + (uuid_width + `CLOG2(mshr_size) + `CLOG2(num_banks)) `define CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width) \ (`CLOG2(num_reqs) + `CLOG2(line_size / word_size) + tag_width) -`define CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width) \ - (`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), `CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width)) + 1) +`define CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, uuid_width) \ + (`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width), `CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width)) + 1) /////////////////////////////////////////////////////////////////////////////// @@ -281,14 +281,14 @@ `define CACHE_CLUSTER_MEM_ARB_TAG(tag_width, num_caches) \ (tag_width + `ARB_SEL_BITS(`UP(num_caches), 1)) -`define CACHE_CLUSTER_MEM_TAG_WIDTH(mshr_size, num_banks, num_caches) \ - `CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), num_caches) +`define CACHE_CLUSTER_MEM_TAG_WIDTH(mshr_size, num_banks, num_caches, uuid_width) \ + `CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width), num_caches) `define CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \ `CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches) -`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \ - `CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches) +`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, num_inputs, num_caches, uuid_width) \ + `CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches), uuid_width), num_caches) /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/VX_gpu_pkg.sv b/hw/rtl/VX_gpu_pkg.sv index e5afefe8e..fe35fb391 100644 --- a/hw/rtl/VX_gpu_pkg.sv +++ b/hw/rtl/VX_gpu_pkg.sv @@ -166,7 +166,7 @@ package VX_gpu_pkg; // Memory request tag bits `ifdef ICACHE_ENABLE - localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES); + localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES, `UUID_WIDTH); `else localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES); `endif @@ -197,7 +197,7 @@ package VX_gpu_pkg; // Memory request tag bits `ifdef DCACHE_ENABLE - localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES); + localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES, `UUID_WIDTH); `else localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES); `endif @@ -226,7 +226,7 @@ package VX_gpu_pkg; // Memory request tag bits `ifdef L2_ENABLE - localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH); + localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH, `UUID_WIDTH); `else localparam L2_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH); `endif @@ -247,7 +247,7 @@ package VX_gpu_pkg; // Memory request tag bits `ifdef L3_ENABLE - localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH); + localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH, `UUID_WIDTH); `else localparam L3_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH); `endif diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index fd7ef0267..6dc59cad2 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -196,16 +196,19 @@ module Vortex import VX_gpu_pkg::*; ( end `ifdef DBG_TRACE_MEM + wire [`UUID_WIDTH-1:0] mem_req_uuid = mem_req_tag[`VX_MEM_TAG_WIDTH-1 -: `UUID_WIDTH]; + wire [`UUID_WIDTH-1:0] mem_rsp_uuid = mem_rsp_tag[`VX_MEM_TAG_WIDTH-1 -: `UUID_WIDTH]; + always @(posedge clk) begin if (mem_req_fire) begin if (mem_req_rw) begin - `TRACE(1, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data)) + `TRACE(1, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data, mem_req_uuid)) end else begin - `TRACE(1, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen)) + `TRACE(1, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_uuid)) end end if (mem_rsp_fire) begin - `TRACE(1, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data)) + `TRACE(1, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, mem_rsp_tag, mem_rsp_data, mem_rsp_uuid)) end end `endif diff --git a/hw/rtl/Vortex_axi.sv b/hw/rtl/Vortex_axi.sv index 5d2f5b0a7..c5aa655c5 100644 --- a/hw/rtl/Vortex_axi.sv +++ b/hw/rtl/Vortex_axi.sv @@ -82,9 +82,11 @@ module Vortex_axi import VX_gpu_pkg::*; #( // Status output wire busy ); + localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH; + `STATIC_ASSERT((AXI_DATA_WIDTH == `VX_MEM_DATA_WIDTH), ("invalid memory data size: current=%0d, expected=%0d", AXI_DATA_WIDTH, `VX_MEM_DATA_WIDTH)) `STATIC_ASSERT((AXI_ADDR_WIDTH >= `MEM_ADDR_WIDTH), ("invalid memory address size: current=%0d, expected=%0d", AXI_ADDR_WIDTH, `VX_MEM_ADDR_WIDTH)) - //`STATIC_ASSERT((AXI_TID_WIDTH >= `VX_MEM_TAG_WIDTH), ("invalid memory tag size: current=%0d, expected=%0d", AXI_TID_WIDTH, `VX_MEM_TAG_WIDTH)) + `STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag size: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH)) wire mem_req_valid; wire mem_req_rw; diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index ff07cc09a..191fbe078 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -227,12 +227,12 @@ module VX_afu_wrap #( .dcr_wr_data (dcr_wr_data) ); - wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_w [C_M_AXI_MEM_NUM_BANKS]; - wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_w [C_M_AXI_MEM_NUM_BANKS]; + wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS]; + wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS]; for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin - assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_w[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); - assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_w[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); + assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); + assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); end `SCOPE_IO_SWITCH (2) @@ -250,7 +250,7 @@ module VX_afu_wrap #( .m_axi_awvalid (m_axi_mem_awvalid_a), .m_axi_awready (m_axi_mem_awready_a), - .m_axi_awaddr (m_axi_mem_awaddr_w), + .m_axi_awaddr (m_axi_mem_awaddr_u), .m_axi_awid (m_axi_mem_awid_a), .m_axi_awlen (m_axi_mem_awlen_a), `UNUSED_PIN (m_axi_awsize), @@ -274,7 +274,7 @@ module VX_afu_wrap #( .m_axi_arvalid (m_axi_mem_arvalid_a), .m_axi_arready (m_axi_mem_arready_a), - .m_axi_araddr (m_axi_mem_araddr_w), + .m_axi_araddr (m_axi_mem_araddr_u), .m_axi_arid (m_axi_mem_arid_a), .m_axi_arlen (m_axi_mem_arlen_a), `UNUSED_PIN (m_axi_arsize), diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index ebb5d1519..3e5a486c6 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -83,7 +83,7 @@ module VX_cache import VX_gpu_pkg::*; #( localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS); localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS); localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); - localparam MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS; + localparam MEM_TAG_WIDTH = `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH); localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE; localparam WORD_WIDTH = WORD_SIZE * 8; localparam WORD_SEL_BITS = `CLOG2(WORDS_PER_LINE); @@ -92,6 +92,7 @@ module VX_cache import VX_gpu_pkg::*; #( localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS); localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1; localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH; + localparam BANK_MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH; localparam CORE_RSP_REG_DISABLE = (NUM_BANKS != 1) || (NUM_REQS != 1); localparam MEM_REQ_REG_DISABLE = (NUM_BANKS != 1); @@ -110,6 +111,7 @@ module VX_cache import VX_gpu_pkg::*; #( ) core_bus2_if[NUM_REQS](); wire [NUM_BANKS-1:0] per_bank_flush_begin; + wire [`UP(UUID_WIDTH)-1:0] flush_uuid; wire [NUM_BANKS-1:0] per_bank_flush_end; wire [NUM_BANKS-1:0] per_bank_core_req_fire; @@ -117,6 +119,8 @@ module VX_cache import VX_gpu_pkg::*; #( VX_cache_flush #( .NUM_REQS (NUM_REQS), .NUM_BANKS (NUM_BANKS), + .UUID_WIDTH(UUID_WIDTH), + .TAG_WIDTH (TAG_WIDTH), .BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency ) flush_unit ( .clk (clk), @@ -125,6 +129,7 @@ module VX_cache import VX_gpu_pkg::*; #( .core_bus_out_if (core_bus2_if), .bank_req_fire (per_bank_core_req_fire), .flush_begin (per_bank_flush_begin), + .flush_uuid (flush_uuid), .flush_end (per_bank_flush_end) ); @@ -182,6 +187,17 @@ module VX_cache import VX_gpu_pkg::*; #( .ready_out (mem_rsp_ready_s) ); + wire [BANK_MEM_TAG_WIDTH-1:0] bank_mem_rsp_tag; + wire [`UP(`CS_BANK_SEL_BITS)-1:0] mem_rsp_bank_id; + + if (NUM_BANKS > 1) begin + assign bank_mem_rsp_tag = mem_rsp_tag_s[MEM_TAG_WIDTH-1:`CS_BANK_SEL_BITS]; + assign mem_rsp_bank_id = mem_rsp_tag_s[`CS_BANK_SEL_BITS-1:0]; + end else begin + assign bank_mem_rsp_tag = mem_rsp_tag_s; + assign mem_rsp_bank_id = 0; + end + // Memory request buffering wire mem_req_valid; @@ -190,7 +206,6 @@ module VX_cache import VX_gpu_pkg::*; #( wire [LINE_SIZE-1:0] mem_req_byteen; wire [`CS_LINE_WIDTH-1:0] mem_req_data; wire [MEM_TAG_WIDTH-1:0] mem_req_tag; - wire [MSHR_ADDR_WIDTH-1:0] mem_req_id; wire mem_req_flush; wire mem_req_ready; @@ -243,7 +258,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0] per_bank_mem_req_rw; wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen; wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data; - wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id; + wire [NUM_BANKS-1:0][BANK_MEM_TAG_WIDTH-1:0] per_bank_mem_req_tag; wire [NUM_BANKS-1:0] per_bank_mem_req_flush; wire [NUM_BANKS-1:0] per_bank_mem_req_ready; @@ -251,11 +266,7 @@ module VX_cache import VX_gpu_pkg::*; #( assign per_bank_core_req_fire = per_bank_core_req_valid & per_bank_mem_req_ready; - if (NUM_BANKS == 1) begin - assign mem_rsp_ready_s = per_bank_mem_rsp_ready; - end else begin - assign mem_rsp_ready_s = per_bank_mem_rsp_ready[`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s)]; - end + assign mem_rsp_ready_s = per_bank_mem_rsp_ready[mem_rsp_bank_id]; // Bank requests dispatch @@ -359,13 +370,8 @@ module VX_cache import VX_gpu_pkg::*; #( // Banks access for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr; - wire curr_bank_mem_rsp_valid; - if (NUM_BANKS == 1) begin - assign curr_bank_mem_rsp_valid = mem_rsp_valid_s; - end else begin - assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == bank_id); - end + wire curr_bank_mem_rsp_valid = mem_rsp_valid_s && (mem_rsp_bank_id == bank_id); VX_cache_bank #( .BANK_ID (bank_id), @@ -421,17 +427,19 @@ module VX_cache import VX_gpu_pkg::*; #( .mem_req_rw (per_bank_mem_req_rw[bank_id]), .mem_req_byteen (per_bank_mem_req_byteen[bank_id]), .mem_req_data (per_bank_mem_req_data[bank_id]), - .mem_req_id (per_bank_mem_req_id[bank_id]), + .mem_req_tag (per_bank_mem_req_tag[bank_id]), .mem_req_flush (per_bank_mem_req_flush[bank_id]), .mem_req_ready (per_bank_mem_req_ready[bank_id]), // Memory response .mem_rsp_valid (curr_bank_mem_rsp_valid), .mem_rsp_data (mem_rsp_data_s), - .mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)), + .mem_rsp_tag (bank_mem_rsp_tag), .mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]), + // Flush request .flush_begin (per_bank_flush_begin[bank_id]), + .flush_uuid (flush_uuid), .flush_end (per_bank_flush_end[bank_id]) ); @@ -476,7 +484,7 @@ module VX_cache import VX_gpu_pkg::*; #( // Memory request arbitration - wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in; + wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1)-1:0] data_in; for (genvar i = 0; i < NUM_BANKS; ++i) begin assign data_in[i] = { @@ -484,14 +492,16 @@ module VX_cache import VX_gpu_pkg::*; #( per_bank_mem_req_rw[i], per_bank_mem_req_byteen[i], per_bank_mem_req_data[i], - per_bank_mem_req_id[i], + per_bank_mem_req_tag[i], per_bank_mem_req_flush[i] }; end + wire [BANK_MEM_TAG_WIDTH-1:0] bank_mem_req_tag; + VX_stream_arb #( .NUM_INPUTS (NUM_BANKS), - .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1), + .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1), .ARBITER ("R") ) mem_req_arb ( .clk (clk), @@ -499,7 +509,7 @@ module VX_cache import VX_gpu_pkg::*; #( .valid_in (per_bank_mem_req_valid), .ready_in (per_bank_mem_req_ready), .data_in (data_in), - .data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, mem_req_id, mem_req_flush}), + .data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flush}), .valid_out (mem_req_valid), .ready_out (mem_req_ready), `UNUSED_PIN (sel_out) @@ -507,9 +517,9 @@ module VX_cache import VX_gpu_pkg::*; #( if (NUM_BANKS > 1) begin wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr); - assign mem_req_tag = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id}); + assign mem_req_tag = MEM_TAG_WIDTH'({bank_mem_req_tag, mem_req_bank_id}); end else begin - assign mem_req_tag = MEM_TAG_WIDTH'(mem_req_id); + assign mem_req_tag = MEM_TAG_WIDTH'(bank_mem_req_tag); end `ifdef PERF_ENABLE diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 181f63fbf..3e51052ef 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -60,6 +60,7 @@ module VX_cache_bank #( parameter MEM_OUT_REG = 0, parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE), + parameter MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH, parameter REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS), parameter WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS) ) ( @@ -97,18 +98,19 @@ module VX_cache_bank #( output wire mem_req_rw, output wire [LINE_SIZE-1:0] mem_req_byteen, output wire [`CS_LINE_WIDTH-1:0] mem_req_data, - output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id, // index of the head entry in the mshr + output wire [MEM_TAG_WIDTH-1:0] mem_req_tag, output wire mem_req_flush, input wire mem_req_ready, // Memory response input wire mem_rsp_valid, input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data, - input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id, + input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag, output wire mem_rsp_ready, // flush input wire flush_begin, + input wire [`UP(UUID_WIDTH)-1:0] flush_uuid, output wire flush_end ); @@ -241,12 +243,30 @@ module VX_cache_bank #( wire flush_fire = flush_valid && flush_ready; wire core_req_fire = core_req_valid && core_req_ready; + wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id = mem_rsp_tag[MSHR_ADDR_WIDTH-1:0]; + + wire [TAG_WIDTH-1:0] mem_rsp_tag_s; + if (TAG_WIDTH > MEM_TAG_WIDTH) begin + assign mem_rsp_tag_s = {mem_rsp_tag, (TAG_WIDTH-MEM_TAG_WIDTH)'(1'b0)}; + end else begin + assign mem_rsp_tag_s = mem_rsp_tag[MEM_TAG_WIDTH-1 -: TAG_WIDTH]; + `UNUSED_VAR (mem_rsp_tag) + end + + wire [TAG_WIDTH-1:0] flush_tag; + if (UUID_WIDTH != 0) begin + assign flush_tag = {flush_uuid, (TAG_WIDTH-UUID_WIDTH)'(1'b0)}; + end else begin + assign flush_tag = '0; + end + assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire; assign rw_sel = replay_valid ? replay_rw : core_req_rw; assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen; assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel; assign req_idx_sel = replay_valid ? replay_idx : core_req_idx; - assign tag_sel = replay_valid ? replay_tag : core_req_tag; + assign tag_sel = (init_valid | flush_valid) ? (flush_valid ? flush_tag : '0) : + (replay_valid ? replay_tag : (mem_rsp_valid ? mem_rsp_tag_s : core_req_tag)); assign creq_flush_sel = core_req_valid && core_req_flush; assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) : @@ -587,7 +607,7 @@ module VX_cache_bank #( wire [`CS_LINE_WIDTH-1:0] mreq_queue_data; wire [LINE_SIZE-1:0] mreq_queue_byteen; wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr; - wire [MSHR_ADDR_WIDTH-1:0] mreq_queue_id; + wire [MEM_TAG_WIDTH-1:0] mreq_queue_tag; wire mreq_queue_rw; wire mreq_queue_flush; @@ -613,7 +633,6 @@ module VX_cache_bank #( assign mreq_queue_pop = mem_req_valid && mem_req_ready; assign mreq_queue_addr = addr_st1; - assign mreq_queue_id = mshr_id_st1; assign mreq_queue_flush = creq_flush_st1; if (WRITE_ENABLE) begin @@ -637,8 +656,14 @@ module VX_cache_bank #( `UNUSED_VAR (dirty_byteen_st1) end + if (UUID_WIDTH != 0) begin + assign mreq_queue_tag = {req_uuid_st1, mshr_id_st1}; + end else begin + assign mreq_queue_tag = mshr_id_st1; + end + VX_fifo_queue #( - .DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1), + .DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), .DEPTH (MREQ_SIZE), .ALM_FULL (MREQ_SIZE-PIPELINE_STAGES), .OUT_REG (MEM_OUT_REG) @@ -647,8 +672,8 @@ module VX_cache_bank #( .reset (reset), .push (mreq_queue_push), .pop (mreq_queue_pop), - .data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_data, mreq_queue_flush}), - .data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_data, mem_req_flush}), + .data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flush}), + .data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flush}), .empty (mreq_queue_empty), .alm_full (mreq_queue_alm_full), `UNUSED_PIN (full), @@ -675,7 +700,7 @@ module VX_cache_bank #( `TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1)) end if (mem_rsp_fire) begin - `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data)) + `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel)) end if (replay_fire) begin `TRACE(2, ("%t: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)) @@ -694,9 +719,9 @@ module VX_cache_bank #( if (do_creq_wr_st1 && !WRITEBACK) begin `TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) end else if (do_writeback_st1) begin - `TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)) + `TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) end else begin - `TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)) + `TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1)) end end end diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 7173444ec..91055a548 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -82,8 +82,8 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( localparam PASSTHRU = (NUM_UNITS == 0); localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES); localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) : - (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) : - `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)); + (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH, UUID_WIDTH) : + `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH)); `STATIC_ASSERT(NUM_INPUTS >= NUM_CACHES, ("invalid parameter")) diff --git a/hw/rtl/cache/VX_cache_define.vh b/hw/rtl/cache/VX_cache_define.vh index e6d7da167..342a40a1b 100644 --- a/hw/rtl/cache/VX_cache_define.vh +++ b/hw/rtl/cache/VX_cache_define.vh @@ -57,7 +57,6 @@ `define CS_LINE_TO_MEM_ADDR(x, i) {x, `CS_BANK_SEL_BITS'(i)} `define CS_MEM_ADDR_TO_BANK_ID(x) x[0 +: `CS_BANK_SEL_BITS] `define CS_MEM_TAG_TO_REQ_ID(x) x[MSHR_ADDR_WIDTH-1:0] -`define CS_MEM_TAG_TO_BANK_ID(x) x[MSHR_ADDR_WIDTH +: `CS_BANK_SEL_BITS] `define CS_LINE_TO_FULL_ADDR(x, i) {x, (`XLEN-$bits(x))'(i << (`XLEN-$bits(x)-`CS_BANK_SEL_BITS))} `define CS_MEM_TO_FULL_ADDR(x) {x, (`XLEN-$bits(x))'(0)} diff --git a/hw/rtl/cache/VX_cache_flush.sv b/hw/rtl/cache/VX_cache_flush.sv index 648fbebb3..a841f3ebc 100644 --- a/hw/rtl/cache/VX_cache_flush.sv +++ b/hw/rtl/cache/VX_cache_flush.sv @@ -18,6 +18,10 @@ module VX_cache_flush #( parameter NUM_REQS = 4, // Number of banks parameter NUM_BANKS = 1, + // Request debug identifier + parameter UUID_WIDTH = 0, + // core request tag size + parameter TAG_WIDTH = UUID_WIDTH + 1, // Bank select latency parameter BANK_SEL_LATENCY = 1 ) ( @@ -27,6 +31,7 @@ module VX_cache_flush #( VX_mem_bus_if.master core_bus_out_if [NUM_REQS], input wire [NUM_BANKS-1:0] bank_req_fire, output wire [NUM_BANKS-1:0] flush_begin, + output wire [`UP(UUID_WIDTH)-1:0] flush_uuid, input wire [NUM_BANKS-1:0] flush_end ); localparam STATE_IDLE = 0; @@ -88,6 +93,7 @@ module VX_cache_flush #( wire flush_req_enable = (| flush_req_mask); reg [NUM_REQS-1:0] lock_released, lock_released_n; + reg [`UP(UUID_WIDTH)-1:0] flush_uuid_r, flush_uuid_n; for (genvar i = 0; i < NUM_REQS; ++i) begin wire input_enable = ~flush_req_enable || lock_released[i]; @@ -102,8 +108,14 @@ module VX_cache_flush #( assign core_bus_out_if[i].rsp_ready = core_bus_in_if[i].rsp_ready; end + reg [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] core_bus_out_uuid; wire [NUM_REQS-1:0] core_bus_out_ready; for (genvar i = 0; i < NUM_REQS; ++i) begin + if (UUID_WIDTH != 0) begin + assign core_bus_out_uuid[i] = core_bus_in_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; + end else begin + assign core_bus_out_uuid[i] = 0; + end assign core_bus_out_ready[i] = core_bus_out_if[i].req_ready; end @@ -111,10 +123,16 @@ module VX_cache_flush #( state_n = state; flush_done_n = flush_done; lock_released_n = lock_released; + flush_uuid_n = flush_uuid_r; case (state) STATE_IDLE: begin if (flush_req_enable) begin state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT1 : STATE_FLUSH; + for (integer i = NUM_REQS-1; i >= 0; --i) begin + if (flush_req_mask[i]) begin + flush_uuid_n = core_bus_out_uuid[i]; + end + end end end STATE_WAIT1: begin @@ -158,8 +176,10 @@ module VX_cache_flush #( flush_done <= flush_done_n; lock_released <= lock_released_n; end + flush_uuid_r <= flush_uuid_n; end assign flush_begin = {NUM_BANKS{state == STATE_FLUSH}}; + assign flush_uuid = flush_uuid_r; endmodule diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 502e5b9d4..bf4f6de7e 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -84,12 +84,11 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( `STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter")) - localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); - localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS; + localparam CACHE_MEM_TAG_WIDTH = `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH); - localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : - (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : - `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)); + localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : + (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH, UUID_WIDTH) : + CACHE_MEM_TAG_WIDTH); localparam NC_OR_BYPASS = (NC_ENABLE || PASSTHRU); From 230b29de6f1953bfa56e7086a200a35bad5615d4 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 11 Sep 2024 06:57:43 -0700 Subject: [PATCH 253/488] minor update --- hw/rtl/cache/VX_cache_bank.sv | 1 + hw/rtl/core/VX_decode.sv | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 3e51052ef..0c199577b 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -257,6 +257,7 @@ module VX_cache_bank #( if (UUID_WIDTH != 0) begin assign flush_tag = {flush_uuid, (TAG_WIDTH-UUID_WIDTH)'(1'b0)}; end else begin + `UNUSED_VAR (flush_uuid) assign flush_tag = '0; end diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index 79a8d9c3d..70bb181a1 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -152,7 +152,7 @@ module VX_decode import VX_gpu_pkg::*; #( always @(*) begin - ex_type = '0; + ex_type = 'x; op_type = 'x; op_args = 'x; rd_v = '0; From f00f96377bc08580194960698e435442851889ef Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 11 Sep 2024 17:16:34 -0700 Subject: [PATCH 254/488] disable tracing on synthesis mode --- hw/rtl/VX_platform.vh | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 5a4426b28..aa63255df 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -29,13 +29,21 @@ `endif `ifdef SYNTHESIS + `define TRACING_ON `define TRACING_OFF + `ifndef NDEBUG `define DEBUG_BLOCK(x) x + `define TRACE(level, args) \ + if (level <= `DEBUG_LEVEL) begin \ + $write args; \ + end `else `define DEBUG_BLOCK(x) + `define TRACE(level, args) `endif + `define IGNORE_UNOPTFLAT_BEGIN `define IGNORE_UNOPTFLAT_END `define IGNORE_UNUSED_BEGIN @@ -47,11 +55,9 @@ `define UNUSED_VAR(x) `define UNUSED_PIN(x) . x () `define UNUSED_ARG(x) x -`define TRACE(level, args) \ - if (level <= `DEBUG_LEVEL) begin \ - $write args; \ - end -`else + +`else // not SYNTHESIS + `ifdef VERILATOR `ifndef TRACING_ALL @@ -122,7 +128,7 @@ `define UNUSED_ARG(x) /* verilator lint_off UNUSED */ \ x \ /* verilator lint_on UNUSED */ -`endif +`endif // not VERILATOR `ifdef SV_DPI `define TRACE(level, args) dpi_trace(level, $sformatf args); @@ -151,7 +157,7 @@ always @(posedge clk) begin \ assert(cond) else $error msg; \ end -`else +`else // not SIMULATION `define STATIC_ASSERT(cond, msg) `define ERROR(msg) // `define ASSERT(cond, msg) // From 5c726853567bf01006db7cc490ba56bb6c6c67c0 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 11 Sep 2024 17:27:36 -0700 Subject: [PATCH 255/488] minor update --- hw/rtl/libs/VX_mem_coalescer.sv | 4 ++-- hw/rtl/libs/VX_mem_scheduler.sv | 12 ++++++------ hw/rtl/libs/VX_scope_tap.sv | 18 +++++++++--------- hw/rtl/mem/VX_gbar_unit.sv | 4 ++-- hw/rtl/mem/VX_local_mem.sv | 12 ++++++------ 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index e56d802e1..84c417bd3 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -331,7 +331,7 @@ module VX_mem_coalescer #( always @(posedge clk) begin if (out_req_fire) begin if (out_req_rw) begin - `TRACE(1, ("%d: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) + `TRACE(1, ("%t: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS) `TRACE(1, (", flags=")) `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS) @@ -350,7 +350,7 @@ module VX_mem_coalescer #( `TRACE(1, (", pmask=%b, coalesced=%0d, tag=0x%0h (#%0d)\n", out_req_pmask, $countones(out_req_pmask), out_req_tag, out_req_uuid)) end if (out_rsp_fire) begin - `TRACE(1, ("%d: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)) + `TRACE(1, ("%t: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)) `TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS) `TRACE(1, (", offset=")) `TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS) diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 24ad5cdf1..1dddaba11 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -583,39 +583,39 @@ module VX_mem_scheduler #( always @(posedge clk) begin if (core_req_fire) begin if (core_req_rw) begin - `TRACE(1, ("%d: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) + `TRACE(1, ("%t: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS) `TRACE(1, (", byteen=")) `TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS) `TRACE(1, (", data=")) `TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS) end else begin - `TRACE(1, ("%d: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) + `TRACE(1, ("%t: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS) end `TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid)) end if (core_rsp_valid && core_rsp_ready) begin - `TRACE(1, ("%d: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)) + `TRACE(1, ("%t: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)) `TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS) `TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid)) end if (| mem_req_fire_s) begin if (| mem_req_rw_s) begin - `TRACE(1, ("%d: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) + `TRACE(1, ("%t: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS) `TRACE(1, (", byteen=")) `TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS) `TRACE(1, (", data=")) `TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS) end else begin - `TRACE(1, ("%d: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) + `TRACE(1, ("%t: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS) end `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid)) end if (mem_rsp_fire_s) begin - `TRACE(1, ("%d: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)) + `TRACE(1, ("%t: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)) `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS) `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)) end diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index 5ec39438c..010b6f2cc 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -116,13 +116,13 @@ module VX_scope_tap #( tap_state <= TAP_STATE_RUN; start_time <= timestamp; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) + `TRACE(2, ("%t: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) `endif end else begin tap_state <= TAP_STATE_WAIT; delay_cntr <= start_delay; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: delayed start - time=%0d\n", $time, SCOPE_ID, start_delay)) + `TRACE(2, ("%t: *** scope #%0d: delayed start - time=%0d\n", $time, SCOPE_ID, start_delay)) `endif end end @@ -133,7 +133,7 @@ module VX_scope_tap #( tap_state <= TAP_STATE_RUN; start_time <= timestamp; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) + `TRACE(2, ("%t: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) `endif end end @@ -158,7 +158,7 @@ module VX_scope_tap #( if (stop || (waddr >= waddr_end)) begin waddr <= waddr; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: recording stop - waddr=(%0d, %0d)\n", $time, SCOPE_ID, waddr, waddr_end)) + `TRACE(2, ("%t: *** scope #%0d: recording stop - waddr=(%0d, %0d)\n", $time, SCOPE_ID, waddr, waddr_end)) `endif tap_state <= TAP_STATE_IDLE; end @@ -258,7 +258,7 @@ module VX_scope_tap #( default:; endcase `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: CMD: type=%0d\n", $time, SCOPE_ID, cmd_type)) + `TRACE(2, ("%t: *** scope #%0d: CMD: type=%0d\n", $time, SCOPE_ID, cmd_type)) `endif end CTRL_STATE_SEND: begin @@ -268,7 +268,7 @@ module VX_scope_tap #( bus_out_r <= 1'(DATAW >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND width=%0d\n", $time, SCOPE_ID, DATAW)) + `TRACE(2, ("%t: *** scope #%0d: SEND width=%0d\n", $time, SCOPE_ID, DATAW)) end `endif end @@ -276,7 +276,7 @@ module VX_scope_tap #( bus_out_r <= 1'(count >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND count=%0d\n", $time, SCOPE_ID, count)) + `TRACE(2, ("%t: *** scope #%0d: SEND count=%0d\n", $time, SCOPE_ID, count)) end `endif end @@ -284,7 +284,7 @@ module VX_scope_tap #( bus_out_r <= 1'(start_time >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND start=%0d\n", $time, SCOPE_ID, start_time)) + `TRACE(2, ("%t: *** scope #%0d: SEND start=%0d\n", $time, SCOPE_ID, start_time)) end `endif end @@ -292,7 +292,7 @@ module VX_scope_tap #( bus_out_r <= 1'(get_data >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND data=%0d\n", $time, SCOPE_ID, get_data)) + `TRACE(2, ("%t: *** scope #%0d: SEND data=%0d\n", $time, SCOPE_ID, get_data)) end `endif end diff --git a/hw/rtl/mem/VX_gbar_unit.sv b/hw/rtl/mem/VX_gbar_unit.sv index 7e03c1378..c9707748f 100644 --- a/hw/rtl/mem/VX_gbar_unit.sv +++ b/hw/rtl/mem/VX_gbar_unit.sv @@ -60,11 +60,11 @@ module VX_gbar_unit #( `ifdef DBG_TRACE_GBAR always @(posedge clk) begin if (gbar_bus_if.req_valid && gbar_bus_if.req_ready) begin - `TRACE(1, ("%d: %s acquire: bar_id=%0d, size=%0d, core_id=%0d\n", + `TRACE(1, ("%t: %s acquire: bar_id=%0d, size=%0d, core_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.req_id, gbar_bus_if.req_size_m1, gbar_bus_if.req_core_id)) end if (gbar_bus_if.rsp_valid) begin - `TRACE(1, ("%d: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)) + `TRACE(1, ("%t: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)) end end `endif diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 1c03b0387..578f4552b 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -330,15 +330,15 @@ module VX_local_mem import VX_gpu_pkg::*; #( always @(posedge clk) begin if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin if (mem_bus_if[i].req_data.rw) begin - `TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", + `TRACE(1, ("%t: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i])) end else begin - `TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", + `TRACE(1, ("%t: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, req_uuid[i])) end end if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin - `TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n", + `TRACE(1, ("%t: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i])) end end @@ -348,15 +348,15 @@ module VX_local_mem import VX_gpu_pkg::*; #( always @(posedge clk) begin if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin if (per_bank_req_rw[i]) begin - `TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", + `TRACE(2, ("%t: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i])) end else begin - `TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", + `TRACE(2, ("%t: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_uuid[i])) end end if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin - `TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", + `TRACE(2, ("%t: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i])) end end From daec55ae95b5739c9d2534e31be27a2950185c3b Mon Sep 17 00:00:00 2001 From: jaewon-lee-github Date: Thu, 12 Sep 2024 11:24:37 -0400 Subject: [PATCH 256/488] change the ci version --- .github/workflows/ci.yml | 22 +++++++++++----------- configure | 2 -- hw/rtl/VX_config.vh | 2 +- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6009e0c37..474b1af00 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,13 +21,13 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: submodules: recursive - name: Cache Toolchain Directory id: cache-toolchain - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: tools key: ${{ runner.os }}-toolchain-v0.1 @@ -36,7 +36,7 @@ jobs: - name: Cache Third Party Directory id: cache-thirdparty - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: third_party key: ${{ runner.os }}-thirdparty-v0.1 @@ -175,7 +175,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Install Dependencies run: | @@ -183,7 +183,7 @@ jobs: - name: Cache Toolchain Directory id: cache-toolchain - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: tools key: ${{ runner.os }}-toolchain-v0.1 @@ -192,7 +192,7 @@ jobs: - name: Cache Third Party Directory id: cache-thirdparty - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: third_party key: ${{ runner.os }}-thirdparty-v0.1 @@ -210,7 +210,7 @@ jobs: make tests -s > /dev/null - name: Upload Build Artifact - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: build-${{ matrix.xlen }}-vm path: build${{ matrix.xlen }}-vm @@ -226,7 +226,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Install Dependencies run: | @@ -234,7 +234,7 @@ jobs: - name: Cache Toolchain Directory id: cache-toolchain - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: tools key: ${{ runner.os }}-toolchain-v0.1 @@ -243,7 +243,7 @@ jobs: - name: Cache Third Party Directory id: cache-thirdparty - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: third_party key: ${{ runner.os }}-thirdparty-v0.1 @@ -251,7 +251,7 @@ jobs: ${{ runner.os }}-thirdparty- - name: Download Build Artifact - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 with: name: build-${{ matrix.xlen }}-vm path: build${{ matrix.xlen }}-vm diff --git a/configure b/configure index c8f932488..f2e4781ef 100755 --- a/configure +++ b/configure @@ -177,5 +177,3 @@ SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) THIRD_PARTY_DIR=$SCRIPT_DIR/third_party copy_files "$SCRIPT_DIR" "$CURRENT_DIR" - -echo "VM Enable: "$VM_ENABLE diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 6b6a0ad86..3826918f4 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -217,7 +217,7 @@ `ifndef IO_COUT_ADDR `define IO_COUT_ADDR `IO_BASE_ADDR `endif -`define IO_COUT_SIZE 64 +`define IO_COUT_SIZE `MEM_BLOCK_SIZE `ifndef IO_MPM_ADDR `define IO_MPM_ADDR (`IO_COUT_ADDR + `IO_COUT_SIZE) From 6cf0d9f7b447995cfb67d8763077ef0e54ad1da9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 12 Sep 2024 20:00:50 -0700 Subject: [PATCH 257/488] fixed generate labels lint warnings to improve hardware debugging --- hw/rtl/VX_cluster.sv | 2 +- hw/rtl/VX_define.vh | 19 +++-- hw/rtl/VX_platform.vh | 11 ++- hw/rtl/VX_socket.sv | 2 +- hw/rtl/Vortex.sv | 2 +- hw/rtl/Vortex_axi.sv | 2 +- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 2 +- hw/rtl/afu/xrt/VX_afu_wrap.sv | 2 +- hw/rtl/cache/VX_bank_flush.sv | 4 +- hw/rtl/cache/VX_cache.sv | 51 +++++++------- hw/rtl/cache/VX_cache_bank.sv | 55 +++++++-------- hw/rtl/cache/VX_cache_bypass.sv | 108 ++++++++++++++--------------- hw/rtl/cache/VX_cache_cluster.sv | 12 ++-- hw/rtl/cache/VX_cache_data.sv | 77 ++++++++++---------- hw/rtl/cache/VX_cache_flush.sv | 21 +++--- hw/rtl/cache/VX_cache_mshr.sv | 2 +- hw/rtl/cache/VX_cache_tags.sv | 12 ++-- hw/rtl/cache/VX_cache_wrap.sv | 27 ++++---- hw/rtl/core/VX_alu_int.sv | 16 ++--- hw/rtl/core/VX_alu_muldiv.sv | 14 ++-- hw/rtl/core/VX_alu_unit.sv | 2 +- hw/rtl/core/VX_commit.sv | 12 ++-- hw/rtl/core/VX_core.sv | 4 +- hw/rtl/core/VX_csr_data.sv | 4 +- hw/rtl/core/VX_csr_unit.sv | 11 +-- hw/rtl/core/VX_dispatch.sv | 6 +- hw/rtl/core/VX_dispatch_unit.sv | 44 ++++++------ hw/rtl/core/VX_fetch.sv | 8 +-- hw/rtl/core/VX_fpu_unit.sv | 10 +-- hw/rtl/core/VX_gather_unit.sv | 19 ++--- hw/rtl/core/VX_ibuffer.sv | 2 +- hw/rtl/core/VX_issue.sv | 8 +-- hw/rtl/core/VX_issue_top.sv | 4 +- hw/rtl/core/VX_lsu_slice.sv | 19 ++--- hw/rtl/core/VX_lsu_unit.sv | 2 +- hw/rtl/core/VX_mem_unit.sv | 20 +++--- hw/rtl/core/VX_mem_unit_top.sv | 8 +-- hw/rtl/core/VX_operands.sv | 31 +++++---- hw/rtl/core/VX_pe_switch.sv | 4 +- hw/rtl/core/VX_schedule.sv | 6 +- hw/rtl/core/VX_scoreboard.sv | 14 ++-- hw/rtl/core/VX_split_join.sv | 2 +- hw/rtl/core/VX_wctl_unit.sv | 10 +-- hw/rtl/fpu/VX_fpu_cvt.sv | 6 +- hw/rtl/fpu/VX_fpu_div.sv | 10 +-- hw/rtl/fpu/VX_fpu_dpi.sv | 12 ++-- hw/rtl/fpu/VX_fpu_dsp.sv | 10 +-- hw/rtl/fpu/VX_fpu_fma.sv | 12 ++-- hw/rtl/fpu/VX_fpu_fpnew.sv | 4 +- hw/rtl/fpu/VX_fpu_ncp.sv | 6 +- hw/rtl/fpu/VX_fpu_sqrt.sv | 10 +-- hw/rtl/libs/VX_avs_adapter.sv | 24 +++---- hw/rtl/libs/VX_axi_adapter.sv | 26 +++---- hw/rtl/libs/VX_bits_insert.sv | 20 +++--- hw/rtl/libs/VX_bits_remove.sv | 18 ++--- hw/rtl/libs/VX_bypass_buffer.sv | 32 +++++---- hw/rtl/libs/VX_cyclic_arbiter.sv | 4 +- hw/rtl/libs/VX_decoder.sv | 4 +- hw/rtl/libs/VX_divider.sv | 30 ++++---- hw/rtl/libs/VX_dp_ram.sv | 72 ++++++++++--------- hw/rtl/libs/VX_elastic_buffer.sv | 8 +-- hw/rtl/libs/VX_encoder.sv | 28 ++++---- hw/rtl/libs/VX_fifo_queue.sv | 8 +-- hw/rtl/libs/VX_find_first.sv | 10 +-- hw/rtl/libs/VX_generic_arbiter.sv | 10 +-- hw/rtl/libs/VX_lzc.sv | 14 ++-- hw/rtl/libs/VX_matrix_arbiter.sv | 22 +++--- hw/rtl/libs/VX_mem_adapter.sv | 24 +++---- hw/rtl/libs/VX_mem_coalescer.sv | 37 ++++++---- hw/rtl/libs/VX_mem_scheduler.sv | 49 +++++++------ hw/rtl/libs/VX_multiplier.sv | 20 +++--- hw/rtl/libs/VX_mux.sv | 14 ++-- hw/rtl/libs/VX_onehot_mux.sv | 28 ++++---- hw/rtl/libs/VX_onehot_shift.sv | 4 +- hw/rtl/libs/VX_pe_serializer.sv | 6 +- hw/rtl/libs/VX_pending_size.sv | 12 ++-- hw/rtl/libs/VX_pipe_buffer.sv | 6 +- hw/rtl/libs/VX_pipe_register.sv | 36 +++++----- hw/rtl/libs/VX_popcount.sv | 16 ++--- hw/rtl/libs/VX_priority_arbiter.sv | 4 +- hw/rtl/libs/VX_priority_encoder.sv | 20 +++--- hw/rtl/libs/VX_reduce.sv | 40 ++++++----- hw/rtl/libs/VX_reset_relay.sv | 8 +-- hw/rtl/libs/VX_rr_arbiter.sv | 26 +++---- hw/rtl/libs/VX_scan.sv | 26 +++---- hw/rtl/libs/VX_scope_switch.sv | 16 ++--- hw/rtl/libs/VX_serial_div.sv | 17 +++-- hw/rtl/libs/VX_serial_mul.sv | 16 ++--- hw/rtl/libs/VX_shift_register.sv | 18 ++--- hw/rtl/libs/VX_skid_buffer.sv | 16 ++--- hw/rtl/libs/VX_stream_arb.sv | 72 ++++++++++--------- hw/rtl/libs/VX_stream_buffer.sv | 18 ++--- hw/rtl/libs/VX_stream_pack.sv | 9 +-- hw/rtl/libs/VX_stream_switch.sv | 42 +++++------ hw/rtl/libs/VX_stream_unpack.sv | 6 +- hw/rtl/libs/VX_stream_xbar.sv | 16 ++--- hw/rtl/libs/VX_toggle_buffer.sv | 23 +++--- hw/rtl/libs/VX_transpose.sv | 4 +- hw/rtl/mem/VX_gbar_arb.sv | 8 +-- hw/rtl/mem/VX_lmem_switch.sv | 2 +- hw/rtl/mem/VX_local_mem.sv | 36 +++++----- hw/rtl/mem/VX_lsu_adapter.sv | 30 ++++---- hw/rtl/mem/VX_mem_arb.sv | 18 ++--- sim/opaesim/Makefile | 2 +- sim/rtlsim/Makefile | 2 +- sim/xrtsim/Makefile | 2 +- 106 files changed, 939 insertions(+), 901 deletions(-) diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index 3e9324437..6109e873a 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -128,7 +128,7 @@ module VX_cluster import VX_gpu_pkg::*; #( `BUFFER_DCR_BUS_IF (socket_dcr_bus_if, socket_dcr_bus_tmp_if, (`NUM_SOCKETS > 1)); // Generate all sockets - for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : sockets + for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : g_sockets `RESET_RELAY (socket_reset, reset); diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index f42f0b018..fdd066434 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -377,10 +377,13 @@ assign dst.req_data.data = src.req_data.data; \ assign dst.req_data.byteen = src.req_data.byteen; \ assign dst.req_data.flags = src.req_data.flags; \ - if (TD != TS) \ + /* verilator lint_off GENUNNAMED */ \ + if (TD != TS) begin \ assign dst.req_data.tag = {src.req_data.tag, {(TD-TS){1'b0}}}; \ - else \ + end else begin \ assign dst.req_data.tag = src.req_data.tag; \ + end \ + /* verilator lint_on GENUNNAMED */ \ assign src.req_ready = dst.req_ready; \ assign src.rsp_valid = dst.rsp_valid; \ assign src.rsp_data.data = dst.rsp_data.data; \ @@ -388,6 +391,7 @@ assign dst.rsp_ready = src.rsp_ready `define BUFFER_DCR_BUS_IF(dst, src, enable) \ + /* verilator lint_off GENUNNAMED */ \ if (enable) begin \ reg [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __dst; \ always @(posedge clk) begin \ @@ -396,9 +400,11 @@ assign {dst.write_valid, dst.write_addr, dst.write_data} = __dst; \ end else begin \ assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid, src.write_addr, src.write_data}; \ - end + end \ + /* verilator lint_on GENUNNAMED */ `define PERF_COUNTER_ADD(dst, src, field, width, count, reg_enable) \ + /* verilator lint_off GENUNNAMED */ \ if (count > 1) begin \ wire [count-1:0][width-1:0] __reduce_add_i_field; \ wire [width-1:0] __reduce_add_o_field; \ @@ -424,9 +430,11 @@ end \ end else begin \ assign dst.``field = src[0].``field; \ - end + end \ + /* verilator lint_on GENUNNAMED */ `define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \ + /* verilator lint_off GENUNNAMED */ \ if (block_size != 1) begin \ if (block_size != `NUM_WARPS) begin \ assign dst = {src[`NW_WIDTH-1:`CLOG2(block_size)], `CLOG2(block_size)'(block_idx)}; \ @@ -435,6 +443,7 @@ end \ end else begin \ assign dst = src; \ - end + end \ + /* verilator lint_on GENUNNAMED */ `endif // VX_DEFINE_VH diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index aa63255df..f2d0f6a36 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -116,15 +116,18 @@ localparam `STRING __``x = x; \ /* verilator lint_on UNUSED */ -`define UNUSED_VAR(x) if (1) begin \ +`define UNUSED_VAR(x) /* verilator lint_off GENUNNAMED */ \ + if (1) begin \ /* verilator lint_off UNUSED */ \ wire [$bits(x)-1:0] __x = x; \ /* verilator lint_on UNUSED */ \ - end + end \ + /* verilator lint_on GENUNNAMED */ `define UNUSED_PIN(x) /* verilator lint_off PINCONNECTEMPTY */ \ . x () \ /* verilator lint_on PINCONNECTEMPTY */ + `define UNUSED_ARG(x) /* verilator lint_off UNUSED */ \ x \ /* verilator lint_on UNUSED */ @@ -143,8 +146,10 @@ `ifdef SIMULATION `define STATIC_ASSERT(cond, msg) \ - generate \ + generate \ + /* verilator lint_off GENUNNAMED */ \ if (!(cond)) $error msg; \ + /* verilator lint_on GENUNNAMED */ \ endgenerate `define ERROR(msg) \ diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 17a027c95..766ff468a 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -202,7 +202,7 @@ module VX_socket import VX_gpu_pkg::*; #( `BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, (`SOCKET_SIZE > 1)); // Generate all cores - for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : cores + for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : g_cores `RESET_RELAY (core_reset, reset); diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 6dc59cad2..0bdbec843 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -129,7 +129,7 @@ module Vortex import VX_gpu_pkg::*; ( wire [`NUM_CLUSTERS-1:0] per_cluster_busy; // Generate all clusters - for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters + for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : g_clusters `RESET_RELAY (cluster_reset, reset); diff --git a/hw/rtl/Vortex_axi.sv b/hw/rtl/Vortex_axi.sv index c5aa655c5..a15a478ee 100644 --- a/hw/rtl/Vortex_axi.sv +++ b/hw/rtl/Vortex_axi.sv @@ -110,7 +110,7 @@ module Vortex_axi import VX_gpu_pkg::*; #( wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_bid_unqual [AXI_NUM_BANKS]; wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_rid_unqual [AXI_NUM_BANKS]; - for (genvar i = 0; i < AXI_NUM_BANKS; ++i) begin + for (genvar i = 0; i < AXI_NUM_BANKS; ++i) begin : g_padding assign m_axi_awaddr[i] = `MEM_ADDR_WIDTH'(m_axi_awaddr_unqual[i]); assign m_axi_araddr[i] = `MEM_ADDR_WIDTH'(m_axi_araddr_unqual[i]); diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index a6cd31b05..0acf87744 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -235,7 +235,7 @@ module VX_afu_ctrl #( assign s_axi_aw_fire = s_axi_awvalid && s_axi_awready; assign s_axi_w_fire = s_axi_wvalid && s_axi_wready; - for (genvar i = 0; i < 4; ++i) begin + for (genvar i = 0; i < 4; ++i) begin : g_wmask assign wmask[8 * i +: 8] = {8{s_axi_wstrb[i]}}; end diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 191fbe078..c92d94c7c 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -230,7 +230,7 @@ module VX_afu_wrap #( wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS]; wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS]; - for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin + for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_addressing assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); end diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index 3ceffaa6b..e90c93cf6 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -113,7 +113,7 @@ module VX_bank_flush #( assign flush_valid = (state_r == STATE_FLUSH); assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0]; - if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin + if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way VX_decoder #( .N (`CS_WAY_SEL_BITS) ) ctr_decoder ( @@ -121,7 +121,7 @@ module VX_bank_flush #( .valid_in (1'b1), .data_out (flush_way) ); - end else begin + end else begin : g_flush_way_all assign flush_way = {NUM_WAYS{1'b1}}; end diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 3e5a486c6..b6d3f9552 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -141,7 +141,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s; wire [NUM_REQS-1:0] core_rsp_ready_s; - for (genvar i = 0; i < NUM_REQS; ++i) begin : core_rsp_bufs + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_buf VX_elastic_buffer #( .DATAW (`CS_WORD_WIDTH + TAG_WIDTH), .SIZE (CORE_RSP_REG_DISABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), @@ -190,10 +190,10 @@ module VX_cache import VX_gpu_pkg::*; #( wire [BANK_MEM_TAG_WIDTH-1:0] bank_mem_rsp_tag; wire [`UP(`CS_BANK_SEL_BITS)-1:0] mem_rsp_bank_id; - if (NUM_BANKS > 1) begin + if (NUM_BANKS > 1) begin : g_mem_rsp_tag_s_with_banks assign bank_mem_rsp_tag = mem_rsp_tag_s[MEM_TAG_WIDTH-1:`CS_BANK_SEL_BITS]; assign mem_rsp_bank_id = mem_rsp_tag_s[`CS_BANK_SEL_BITS-1:0]; - end else begin + end else begin : g_mem_rsp_tag_s_no_bank assign bank_mem_rsp_tag = mem_rsp_tag_s; assign mem_rsp_bank_id = 0; end @@ -228,9 +228,9 @@ module VX_cache import VX_gpu_pkg::*; #( assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0; - if (WRITE_ENABLE) begin + if (WRITE_ENABLE) begin : g_mem_bus_if `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if); - end else begin + end else begin : g_mem_bus_if_ro `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if); end @@ -286,7 +286,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in; wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req assign core_req_valid[i] = core_bus2_if[i].req_valid; assign core_req_rw[i] = core_bus2_if[i].req_data.rw; assign core_req_byteen[i] = core_bus2_if[i].req_data.byteen; @@ -297,24 +297,27 @@ module VX_cache import VX_gpu_pkg::*; #( assign core_bus2_if[i].req_ready = core_req_ready[i]; end - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (WORDS_PER_LINE > 1) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_wsel + if (WORDS_PER_LINE > 1) begin : g_wsel assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS]; - end else begin + end else begin : g_no_wsel assign core_req_wsel[i] = '0; end + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_line_addr assign core_req_line_addr[i] = core_req_addr[i][(BANK_SEL_BITS + WORD_SEL_BITS) +: LINE_ADDR_WIDTH]; end - if (NUM_BANKS > 1) begin - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_bid + if (NUM_BANKS > 1) begin : g_multibank assign core_req_bid[i] = core_req_addr[i][WORD_SEL_BITS +: BANK_SEL_BITS]; + end else begin : g_singlebank + assign core_req_bid[i] = '0; end - end else begin - assign core_req_bid = '0; end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_data_in assign core_req_data_in[i] = { core_req_line_addr[i], core_req_rw[i], @@ -355,7 +358,7 @@ module VX_cache import VX_gpu_pkg::*; #( .ready_out (per_bank_core_req_ready) ); - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_core_req_data_out assign { per_bank_core_req_addr[i], per_bank_core_req_rw[i], @@ -368,7 +371,7 @@ module VX_cache import VX_gpu_pkg::*; #( end // Banks access - for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks + for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : g_banks wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr; wire curr_bank_mem_rsp_valid = mem_rsp_valid_s && (mem_rsp_bank_id == bank_id); @@ -443,9 +446,9 @@ module VX_cache import VX_gpu_pkg::*; #( .flush_end (per_bank_flush_end[bank_id]) ); - if (NUM_BANKS == 1) begin + if (NUM_BANKS == 1) begin : g_per_bank_mem_req_addr_multibanks assign per_bank_mem_req_addr[bank_id] = curr_bank_mem_req_addr; - end else begin + end else begin : g_per_bank_mem_req_addr_one_bank assign per_bank_mem_req_addr[bank_id] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, bank_id); end end @@ -455,7 +458,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_in; wire [NUM_REQS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_out; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_core_rsp_data_in assign core_rsp_data_in[i] = {per_bank_core_rsp_data[i], per_bank_core_rsp_tag[i]}; end @@ -478,7 +481,7 @@ module VX_cache import VX_gpu_pkg::*; #( `UNUSED_PIN (sel_out) ); - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_data_s assign {core_rsp_data_s[i], core_rsp_tag_s[i]} = core_rsp_data_out[i]; end @@ -486,7 +489,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1)-1:0] data_in; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_data_in assign data_in[i] = { per_bank_mem_req_addr[i], per_bank_mem_req_rw[i], @@ -515,10 +518,10 @@ module VX_cache import VX_gpu_pkg::*; #( `UNUSED_PIN (sel_out) ); - if (NUM_BANKS > 1) begin + if (NUM_BANKS > 1) begin : g_mem_req_tag_multibanks wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr); assign mem_req_tag = MEM_TAG_WIDTH'({bank_mem_req_tag, mem_req_bank_id}); - end else begin + end else begin : g_mem_req_tag_one_bank assign mem_req_tag = MEM_TAG_WIDTH'(bank_mem_req_tag); end @@ -546,7 +549,7 @@ module VX_cache import VX_gpu_pkg::*; #( `POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank); wire [NUM_REQS-1:0] perf_crsp_stall_per_req; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_perf_crsp_stall_per_req assign perf_crsp_stall_per_req[i] = core_bus2_if[i].rsp_valid && ~core_bus2_if[i].rsp_ready; end diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 0c199577b..5054fa333 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -246,17 +246,17 @@ module VX_cache_bank #( wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id = mem_rsp_tag[MSHR_ADDR_WIDTH-1:0]; wire [TAG_WIDTH-1:0] mem_rsp_tag_s; - if (TAG_WIDTH > MEM_TAG_WIDTH) begin + if (TAG_WIDTH > MEM_TAG_WIDTH) begin : g_mem_rsp_tag_s_pad assign mem_rsp_tag_s = {mem_rsp_tag, (TAG_WIDTH-MEM_TAG_WIDTH)'(1'b0)}; - end else begin + end else begin : g_mem_rsp_tag_s_cut assign mem_rsp_tag_s = mem_rsp_tag[MEM_TAG_WIDTH-1 -: TAG_WIDTH]; `UNUSED_VAR (mem_rsp_tag) end wire [TAG_WIDTH-1:0] flush_tag; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_flush_tag_uuid assign flush_tag = {flush_uuid, (TAG_WIDTH-UUID_WIDTH)'(1'b0)}; - end else begin + end else begin : g_flush_tag_0 `UNUSED_VAR (flush_uuid) assign flush_tag = '0; end @@ -273,20 +273,21 @@ module VX_cache_bank #( assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) : (replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr)); - if (WRITE_ENABLE) begin + if (WRITE_ENABLE) begin : g_data_sel assign data_sel[`CS_WORD_WIDTH-1:0] = replay_valid ? replay_data : (mem_rsp_valid ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : core_req_data); - end else begin + end else begin : g_data_sel_ro assign data_sel[`CS_WORD_WIDTH-1:0] = mem_rsp_data[`CS_WORD_WIDTH-1:0]; `UNUSED_VAR (core_req_data) `UNUSED_VAR (replay_data) end - for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin + + for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin : g_data_sel assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel end - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_req_uuid_sel assign req_uuid_sel = tag_sel[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_req_uuid_sel_0 assign req_uuid_sel = '0; end @@ -301,9 +302,9 @@ module VX_cache_bank #( .data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0}) ); - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_req_uuid_st0 assign req_uuid_st0 = tag_st0[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_req_uuid_st0_0 assign req_uuid_st0 = '0; end @@ -383,9 +384,9 @@ module VX_cache_bank #( // we have a tag hit wire is_hit_st1 = (| way_sel_st1); - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_req_uuid_st1 assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_req_uuid_st1_0 assign req_uuid_st1 = '0; end @@ -434,14 +435,14 @@ module VX_cache_bank #( wire [`CS_LINE_WIDTH-1:0] dirty_data_st1; wire [LINE_SIZE-1:0] dirty_byteen_st1; - if (`CS_WORDS_PER_LINE > 1) begin + if (`CS_WORDS_PER_LINE > 1) begin : g_write_byteen_st1_wsel reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen_w; always @(*) begin write_byteen_w = '0; write_byteen_w[wsel_st1] = byteen_st1; end assign write_byteen_st1 = write_byteen_w; - end else begin + end else begin : g_write_byteen_st1 assign write_byteen_st1 = byteen_st1; end @@ -489,9 +490,9 @@ module VX_cache_bank #( // release allocated mshr entry if we had a hit wire mshr_release_st1; - if (WRITEBACK) begin + if (WRITEBACK) begin : g_mshr_release_st1 assign mshr_release_st1 = is_hit_st1; - end else begin + end else begin : g_mshr_release_st1_ro // we need to keep missed write requests in MSHR if there is already a pending entry to the same address // this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content // this can happen when writes are sent late, when the fill was already in flight. @@ -566,7 +567,7 @@ module VX_cache_bank #( // check if there are pending requests to same line in the MSHR wire [MSHR_SIZE-1:0] lookup_matches; - for (genvar i = 0; i < MSHR_SIZE; ++i) begin + for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_lookup_matches assign lookup_matches[i] = mshr_lookup_pending_st0[i] && (i != mshr_alloc_id_st0) // exclude current mshr id && (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough @@ -616,8 +617,8 @@ module VX_cache_bank #( wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1; wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1; - if (WRITEBACK) begin - if (DIRTY_BYTES) begin + if (WRITEBACK) begin : g_mreq_queue_push + if (DIRTY_BYTES) begin : g_dirty_bytes // ensure dirty bytes match the tag info wire has_dirty_bytes = (| dirty_byteen_st1); `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) @@ -625,7 +626,7 @@ module VX_cache_bank #( assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1) || do_writeback_st1) && ~rdw_hazard3_st1; - end else begin + end else begin : g_mreq_queue_push_ro `UNUSED_VAR (do_writeback_st1) assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1) || do_creq_wr_st1) @@ -636,12 +637,12 @@ module VX_cache_bank #( assign mreq_queue_addr = addr_st1; assign mreq_queue_flush = creq_flush_st1; - if (WRITE_ENABLE) begin - if (WRITEBACK) begin + if (WRITE_ENABLE) begin : g_mreq_queue + if (WRITEBACK) begin : g_writeback assign mreq_queue_rw = is_fill_or_flush_st1; assign mreq_queue_data = dirty_data_st1; assign mreq_queue_byteen = is_fill_or_flush_st1 ? dirty_byteen_st1 : '1; - end else begin + end else begin : g_writethrough assign mreq_queue_rw = rw_st1; assign mreq_queue_data = write_data_st1; assign mreq_queue_byteen = rw_st1 ? write_byteen_st1 : '1; @@ -649,7 +650,7 @@ module VX_cache_bank #( `UNUSED_VAR (dirty_data_st1) `UNUSED_VAR (dirty_byteen_st1) end - end else begin + end else begin : g_mreq_queue_ro assign mreq_queue_rw = 0; assign mreq_queue_data = '0; assign mreq_queue_byteen = '1; @@ -657,9 +658,9 @@ module VX_cache_bank #( `UNUSED_VAR (dirty_byteen_st1) end - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_mreq_queue_tag_uuid assign mreq_queue_tag = {req_uuid_st1, mshr_id_st1}; - end else begin + end else begin : g_mreq_queue_tag assign mreq_queue_tag = mshr_id_st1; end diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index a3d872d7f..a60904d46 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -77,12 +77,12 @@ module VX_cache_bypass #( wire [NUM_REQS-1:0] core_req_nc_sel; wire core_req_nc_ready; - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (PASSTHRU != 0) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_nc + if (PASSTHRU != 0) begin : g_passthru assign core_req_nc_idxs[i] = 1'b1; - end else if (NC_ENABLE) begin + end else if (NC_ENABLE) begin : g_nc assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.flags[`MEM_REQ_FLAG_IO]; - end else begin + end else begin : g_no_nc assign core_req_nc_idxs[i] = 1'b0; end assign core_req_nc_valids[i] = core_bus_in_if[i].req_valid && core_req_nc_idxs[i]; @@ -101,7 +101,7 @@ module VX_cache_bypass #( .grant_ready (core_req_nc_ready) ); - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_if assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && ~core_req_nc_idxs[i]; assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data; assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i]) @@ -127,7 +127,7 @@ module VX_cache_bypass #( wire [CORE_TAG_WIDTH-1:0] core_req_nc_sel_tag; wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_nc_mux_in assign core_req_nc_mux_in[i] = { core_bus_in_if[i].req_data.rw, core_bus_in_if[i].req_data.addr, @@ -158,7 +158,7 @@ module VX_cache_bypass #( wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0]; - if (WORDS_PER_LINE > 1) begin + if (WORDS_PER_LINE > 1) begin : g_mem_req_multi_word_line reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_w; reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_w; @@ -174,46 +174,44 @@ module VX_cache_bypass #( assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_w; assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_w; - if (NUM_REQS > 1) begin + if (NUM_REQS > 1) begin : g_multiple_requests assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id}); - end else begin + end else begin : g_single_request assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id}); end - end else begin + end else begin : g_mem_req_single_word_line assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : core_req_nc_sel_byteen; assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : core_req_nc_sel_data; - if (NUM_REQS > 1) begin + if (NUM_REQS > 1) begin : g_multiple_requests assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, core_req_in_id}); - end else begin + end else begin : g_single_request assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_in_id}); end end wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_mem_req_tag_bypass_with_uuid assign mem_req_tag_bypass = {core_req_nc_sel_tag[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass}; - end else begin + end else begin : g_mem_req_tag_bypass assign mem_req_tag_bypass = mem_req_tag_id_bypass; end - if (PASSTHRU != 0) begin + if (PASSTHRU != 0) begin : g_mem_req_out_tag_passthru assign mem_req_out_tag = mem_req_tag_bypass; `UNUSED_VAR (mem_bus_in_if.req_data.tag) - end else begin - if (NC_ENABLE) begin - VX_bits_insert #( - .N (MEM_TAG_OUT_WIDTH-1), - .S (1), - .POS (TAG_SEL_IDX) - ) mem_req_tag_in_nc_insert ( - .data_in (mem_bus_in_if.req_valid ? (MEM_TAG_OUT_WIDTH-1)'(mem_bus_in_if.req_data.tag) : (MEM_TAG_OUT_WIDTH-1)'(mem_req_tag_bypass)), - .ins_in (~mem_bus_in_if.req_valid), - .data_out (mem_req_out_tag) - ); - end else begin - assign mem_req_out_tag = mem_bus_in_if.req_data.tag; - end + end else if (NC_ENABLE) begin : g_mem_req_out_tag_nc + VX_bits_insert #( + .N (MEM_TAG_OUT_WIDTH-1), + .S (1), + .POS (TAG_SEL_IDX) + ) mem_req_tag_in_nc_insert ( + .data_in (mem_bus_in_if.req_valid ? (MEM_TAG_OUT_WIDTH-1)'(mem_bus_in_if.req_data.tag) : (MEM_TAG_OUT_WIDTH-1)'(mem_req_tag_bypass)), + .ins_in (~mem_bus_in_if.req_valid), + .data_out (mem_req_out_tag) + ); + end else begin : g_mem_req_out_tag + assign mem_req_out_tag = mem_bus_in_if.req_data.tag; end assign mem_bus_in_if.req_ready = mem_req_out_ready; @@ -241,14 +239,12 @@ module VX_cache_bypass #( wire [NUM_REQS-1:0] core_rsp_in_ready; wire is_mem_rsp_nc; - if (PASSTHRU != 0) begin + if (PASSTHRU != 0) begin : g_is_mem_rsp_nc_passthru assign is_mem_rsp_nc = mem_bus_out_if.rsp_valid; - end else begin - if (NC_ENABLE) begin - assign is_mem_rsp_nc = mem_bus_out_if.rsp_valid && mem_bus_out_if.rsp_data.tag[TAG_SEL_IDX]; - end else begin - assign is_mem_rsp_nc = 1'b0; - end + end else if (NC_ENABLE) begin : g_is_mem_rsp_nc + assign is_mem_rsp_nc = mem_bus_out_if.rsp_valid && mem_bus_out_if.rsp_data.tag[TAG_SEL_IDX]; + end else begin : g_is_no_mem_rsp_nc + assign is_mem_rsp_nc = 1'b0; end wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc; @@ -263,47 +259,47 @@ module VX_cache_bypass #( ); wire [REQ_SEL_WIDTH-1:0] rsp_idx; - if (NUM_REQS > 1) begin + if (NUM_REQS > 1) begin : g_rsp_idx assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS]; - end else begin + end else begin : g_rsp_idx_0 assign rsp_idx = 1'b0; end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_valid assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || (is_mem_rsp_nc && rsp_idx == REQ_SEL_WIDTH'(i)); + end + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_ready assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; end - if (WORDS_PER_LINE > 1) begin - wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS]; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_data + if (WORDS_PER_LINE > 1) begin : g_wsel + wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS]; assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.data : mem_bus_out_if.rsp_data.data[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH]; - end - end else begin - for (genvar i = 0; i < NUM_REQS; ++i) begin + end else begin : g_no_wsel assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.data : mem_bus_out_if.rsp_data.data; end end wire [(CORE_TAG_ID_BITS + UUID_WIDTH)-1:0] mem_rsp_tag_in_nc2; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_mem_rsp_tag_in_nc2_uuid assign mem_rsp_tag_in_nc2 = {mem_rsp_tag_id_nc[(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1 -: UUID_WIDTH], mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]}; - end else begin + end else begin : g_mem_rsp_tag_in_nc2 assign mem_rsp_tag_in_nc2 = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]; end - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (PASSTHRU) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_tag + if (PASSTHRU) begin : g_passthru assign core_rsp_in_tag[i] = mem_rsp_tag_in_nc2; - end else if (NC_ENABLE) begin + end else if (NC_ENABLE) begin : g_nc assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.tag : mem_rsp_tag_in_nc2; - end else begin + end else begin : g_no_nc assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_data.tag; end end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_buf VX_elastic_buffer #( .DATAW (`CS_WORD_WIDTH + CORE_TAG_WIDTH), .SIZE (DIRECT_PASSTHRU ? 0 : `TO_OUT_BUF_SIZE(CORE_OUT_BUF)), @@ -322,22 +318,22 @@ module VX_cache_bypass #( // handle memory responses //////////////////////////////////////////////// - if (PASSTHRU != 0) begin + if (PASSTHRU != 0) begin : g_mem_bus_in_if_passthru assign mem_bus_in_if.rsp_valid = 1'b0; assign mem_bus_in_if.rsp_data.data = '0; assign mem_bus_in_if.rsp_data.tag = '0; - end else if (NC_ENABLE) begin + end else if (NC_ENABLE) begin : g_mem_bus_in_if_nc assign mem_bus_in_if.rsp_valid = mem_bus_out_if.rsp_valid && ~mem_bus_out_if.rsp_data.tag[TAG_SEL_IDX]; assign mem_bus_in_if.rsp_data.data = mem_bus_out_if.rsp_data.data; assign mem_bus_in_if.rsp_data.tag = mem_rsp_tag_id_nc[MEM_TAG_IN_WIDTH-1:0]; - end else begin + end else begin : g_mem_bus_in_if assign mem_bus_in_if.rsp_valid = mem_bus_out_if.rsp_valid; assign mem_bus_in_if.rsp_data.data = mem_bus_out_if.rsp_data.data; assign mem_bus_in_if.rsp_data.tag = mem_rsp_tag_id_nc; end wire [NUM_REQS-1:0] core_rsp_out_valid; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_out_valid assign core_rsp_out_valid[i] = core_bus_out_if[i].rsp_valid; end diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 91055a548..5a8bb9865 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -102,7 +102,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .TAG_WIDTH (ARB_TAG_WIDTH) ) arb_core_bus_if[NUM_CACHES * NUM_REQS](); - for (genvar i = 0; i < NUM_REQS; ++i) begin : core_arbs + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_arb VX_mem_bus_if #( .DATA_SIZE (WORD_SIZE), .TAG_WIDTH (TAG_WIDTH) @@ -113,7 +113,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .TAG_WIDTH (ARB_TAG_WIDTH) ) arb_core_bus_tmp_if[NUM_CACHES](); - for (genvar j = 0; j < NUM_INPUTS; ++j) begin + for (genvar j = 0; j < NUM_INPUTS; ++j) begin : g_core_bus_tmp_if `ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]); end @@ -133,12 +133,12 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .bus_out_if (arb_core_bus_tmp_if) ); - for (genvar k = 0; k < NUM_CACHES; ++k) begin + for (genvar k = 0; k < NUM_CACHES; ++k) begin : g_arb_core_bus_if `ASSIGN_VX_MEM_BUS_IF (arb_core_bus_if[k * NUM_REQS + i], arb_core_bus_tmp_if[k]); end end - for (genvar i = 0; i < NUM_CACHES; ++i) begin : caches + for (genvar i = 0; i < NUM_CACHES; ++i) begin : g_cache_wrap VX_cache_wrap #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)), .CACHE_SIZE (CACHE_SIZE), @@ -192,9 +192,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .bus_out_if (mem_bus_tmp_if) ); - if (WRITE_ENABLE) begin + if (WRITE_ENABLE) begin : g_mem_bus_if `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]); - end else begin + end else begin : g_mem_bus_if_ro `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if[0]); end diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index c4713f813..27844fd6f 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -75,64 +75,63 @@ module VX_cache_data #( wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata; wire [`LOG2UP(NUM_WAYS)-1:0] way_idx; - if (WRITEBACK) begin : dirty_bytes - if (DIRTY_BYTES) begin - wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata; - wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata; - - for (genvar i = 0; i < NUM_WAYS; ++i) begin - wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]); - assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]); - end - - VX_sp_ram #( - .DATAW (LINE_SIZE * NUM_WAYS), - .SIZE (`CS_LINES_PER_BANK) - ) byteen_store ( - .clk (clk), - .reset (reset), - .read (write || fill || flush), - .write (init || write || fill || flush), - .wren (1'b1), - .addr (line_sel), - .wdata (bs_wdata), - .rdata (bs_rdata) - ); - - assign dirty_byteen = bs_rdata[way_idx]; - end else begin - assign dirty_byteen = '1; - end - + if (WRITEBACK) begin : g_dirty_data wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] flipped_rdata; - for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin - for (genvar j = 0; j < NUM_WAYS; ++j) begin + for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin : g_flipped_rdata + for (genvar j = 0; j < NUM_WAYS; ++j) begin : g_j assign flipped_rdata[j][i] = line_rdata[i][j]; end end assign dirty_data = flipped_rdata[way_idx]; - end else begin - assign dirty_byteen = '1; + end else begin : g_dirty_data_0 assign dirty_data = '0; end + if (DIRTY_BYTES) begin : g_dirty_byteen + wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata; + wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata; + + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_bs_wdata + wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]); + assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]); + end + + VX_sp_ram #( + .DATAW (LINE_SIZE * NUM_WAYS), + .SIZE (`CS_LINES_PER_BANK) + ) byteen_store ( + .clk (clk), + .reset (reset), + .read (write || fill || flush), + .write (init || write || fill || flush), + .wren (1'b1), + .addr (line_sel), + .wdata (bs_wdata), + .rdata (bs_rdata) + ); + + assign dirty_byteen = bs_rdata[way_idx]; + end else begin : g_dirty_byteen_0 + assign dirty_byteen = '1; + end + // order the data layout to perform ways multiplexing last. // this allows converting way index to binary in parallel with BRAM readaccess and way selection. wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata; wire [BYTEENW-1:0] line_wren; - if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin + if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin : g_line_wdata wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w; - for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin - for (genvar j = 0; j < NUM_WAYS; ++j) begin + for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin : g_i + for (genvar j = 0; j < NUM_WAYS; ++j) begin : g_j assign line_wdata[i][j] = (fill || !WRITE_ENABLE) ? fill_data[i] : write_data[i]; assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i]) & {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}}; end end assign line_wren = wren_w; - end else begin + end else begin : g_line_wdata_ro `UNUSED_VAR (write) `UNUSED_VAR (write_byteen) `UNUSED_VAR (write_data) @@ -171,9 +170,9 @@ module VX_cache_data #( ); wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata; - if (`CS_WORDS_PER_LINE > 1) begin + if (`CS_WORDS_PER_LINE > 1) begin : g_per_way_rdata_wsel assign per_way_rdata = line_rdata[wsel]; - end else begin + end else begin : g_per_way_rdata `UNUSED_VAR (wsel) assign per_way_rdata = line_rdata; end diff --git a/hw/rtl/cache/VX_cache_flush.sv b/hw/rtl/cache/VX_cache_flush.sv index a841f3ebc..b318dc5af 100644 --- a/hw/rtl/cache/VX_cache_flush.sv +++ b/hw/rtl/cache/VX_cache_flush.sv @@ -46,13 +46,13 @@ module VX_cache_flush #( wire no_inflight_reqs; - if (BANK_SEL_LATENCY != 0) begin + if (BANK_SEL_LATENCY != 0) begin : g_bank_sel_latency localparam NUM_REQS_W = `CLOG2(NUM_REQS+1); localparam NUM_BANKS_W = `CLOG2(NUM_BANKS+1); wire [NUM_REQS-1:0] core_bus_out_fire; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_fire assign core_bus_out_fire[i] = core_bus_out_if[i].req_valid && core_bus_out_if[i].req_ready; end @@ -79,7 +79,7 @@ module VX_cache_flush #( `UNUSED_PIN (size) ); - end else begin + end else begin : g_no_bank_sel_latency assign no_inflight_reqs = 0; `UNUSED_VAR (bank_req_fire) end @@ -87,7 +87,7 @@ module VX_cache_flush #( reg [NUM_BANKS-1:0] flush_done, flush_done_n; wire [NUM_REQS-1:0] flush_req_mask; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_flush_req_mask assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH]; end wire flush_req_enable = (| flush_req_mask); @@ -95,14 +95,14 @@ module VX_cache_flush #( reg [NUM_REQS-1:0] lock_released, lock_released_n; reg [`UP(UUID_WIDTH)-1:0] flush_uuid_r, flush_uuid_n; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_req wire input_enable = ~flush_req_enable || lock_released[i]; assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && input_enable; assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data; assign core_bus_in_if[i].req_ready = core_bus_out_if[i].req_ready && input_enable; end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_in_rsp assign core_bus_in_if[i].rsp_valid = core_bus_out_if[i].rsp_valid; assign core_bus_in_if[i].rsp_data = core_bus_out_if[i].rsp_data; assign core_bus_out_if[i].rsp_ready = core_bus_in_if[i].rsp_ready; @@ -110,12 +110,15 @@ module VX_cache_flush #( reg [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] core_bus_out_uuid; wire [NUM_REQS-1:0] core_bus_out_ready; - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (UUID_WIDTH != 0) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_uuid + if (UUID_WIDTH != 0) begin : g_uuid assign core_bus_out_uuid[i] = core_bus_in_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_no_uuid assign core_bus_out_uuid[i] = 0; end + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_ready assign core_bus_out_ready[i] = core_bus_out_if[i].req_ready; end diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index d51d0f0d4..482c110dc 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -135,7 +135,7 @@ module VX_cache_mshr #( wire dequeue_fire = dequeue_valid && dequeue_ready; wire [MSHR_SIZE-1:0] addr_matches; - for (genvar i = 0; i < MSHR_SIZE; ++i) begin + for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_addr_matches assign addr_matches[i] = valid_table[i] && (addr_table[i] == lookup_addr); end diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 4d9fc81de..92497b80b 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -69,7 +69,7 @@ module VX_cache_tags #( wire [NUM_WAYS-1:0] read_valid; wire [NUM_WAYS-1:0] read_dirty; - if (NUM_WAYS > 1) begin + if (NUM_WAYS > 1) begin : g_evict_way reg [NUM_WAYS-1:0] evict_way_r; // cyclic assignment of replacement way always @(posedge clk) begin @@ -90,7 +90,7 @@ module VX_cache_tags #( .sel_in (evict_way), .data_out (evict_tag) ); - end else begin + end else begin : g_evict_way_0 `UNUSED_VAR (stall) assign evict_way = 1'b1; assign evict_tag = read_tag; @@ -100,7 +100,7 @@ module VX_cache_tags #( wire fill_s = fill && (!WRITEBACK || ~stall); wire flush_s = flush && (!WRITEBACK || ~stall); - for (genvar i = 0; i < NUM_WAYS; ++i) begin : tag_stores + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store wire do_fill = fill_s && evict_way[i]; wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode @@ -113,10 +113,10 @@ module VX_cache_tags #( wire [TAG_WIDTH-1:0] line_wdata; wire [TAG_WIDTH-1:0] line_rdata; - if (WRITEBACK) begin + if (WRITEBACK) begin : g_writeback assign line_wdata = {line_valid, write, line_tag}; assign {read_valid[i], read_dirty[i], read_tag[i]} = line_rdata; - end else begin + end else begin : g_writethrough assign line_wdata = {line_valid, line_tag}; assign {read_valid[i], read_tag[i]} = line_rdata; assign read_dirty[i] = 1'b0; @@ -139,7 +139,7 @@ module VX_cache_tags #( ); end - for (genvar i = 0; i < NUM_WAYS; ++i) begin + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_matches assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]); end diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index bf4f6de7e..0b8a1f3c4 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -107,7 +107,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .TAG_WIDTH (MEM_TAG_WIDTH) ) mem_bus_tmp_if(); - if (NC_OR_BYPASS) begin : bypass_if + if (NC_OR_BYPASS) begin : g_bypass VX_cache_bypass #( .NUM_REQS (NUM_REQS), @@ -141,22 +141,22 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .mem_bus_out_if (mem_bus_tmp_if) ); - end else begin + end else begin : g_no_bypass - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_cache_if `ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]); end `ASSIGN_VX_MEM_BUS_IF (mem_bus_tmp_if, mem_bus_cache_if); end - if (WRITE_ENABLE) begin + if (WRITE_ENABLE) begin : g_mem_bus_if `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if); - end else begin + end else begin : g_mem_bus_if_ro `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if); end - if (PASSTHRU == 0) begin : cache_if + if (PASSTHRU == 0) begin : g_cache VX_cache #( .INSTANCE_ID (INSTANCE_ID), @@ -187,9 +187,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .mem_bus_if (mem_bus_cache_if) ); - end else begin + end else begin : g_passthru - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_cache_if `UNUSED_VAR (core_bus_cache_if[i].req_valid) `UNUSED_VAR (core_bus_cache_if[i].req_data) assign core_bus_cache_if[i].req_ready = 0; @@ -214,15 +214,14 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( end `ifdef DBG_TRACE_CACHE - - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_trace wire [`UP(UUID_WIDTH)-1:0] core_req_uuid; wire [`UP(UUID_WIDTH)-1:0] core_rsp_uuid; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_core_rsp_uuid assign core_req_uuid = core_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; assign core_rsp_uuid = core_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_no_core_rsp_uuid assign core_req_uuid = 0; assign core_rsp_uuid = 0; end @@ -247,10 +246,10 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( wire [`UP(UUID_WIDTH)-1:0] mem_req_uuid; wire [`UP(UUID_WIDTH)-1:0] mem_rsp_uuid; - if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin + if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin : g_mem_req_uuid assign mem_req_uuid = mem_bus_if.req_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; assign mem_rsp_uuid = mem_bus_if.rsp_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_no_mem_req_uuid assign mem_req_uuid = 0; assign mem_rsp_uuid = 0; end diff --git a/hw/rtl/core/VX_alu_int.sv b/hw/rtl/core/VX_alu_int.sv index 04d123860..53c7ae57a 100644 --- a/hw/rtl/core/VX_alu_int.sv +++ b/hw/rtl/core/VX_alu_int.sv @@ -71,19 +71,19 @@ module VX_alu_int #( wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_imm = execute_if.data.op_args.alu.use_imm ? {NUM_LANES{`SEXT(`XLEN, execute_if.data.op_args.alu.imm)}} : alu_in2; wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_br = (execute_if.data.op_args.alu.use_imm && ~is_br_op) ? {NUM_LANES{`SEXT(`XLEN, execute_if.data.op_args.alu.imm)}} : alu_in2; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_add_result assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i]; assign add_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] + alu_in2_imm[i][31:0])); end - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_sub_result wire [`XLEN:0] sub_in1 = {is_signed & alu_in1[i][`XLEN-1], alu_in1[i]}; wire [`XLEN:0] sub_in2 = {is_signed & alu_in2_br[i][`XLEN-1], alu_in2_br[i]}; assign sub_result[i] = sub_in1 - sub_in2; assign sub_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] - alu_in2_imm[i][31:0])); end - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_shr_result wire [`XLEN:0] shr_in1 = {is_signed && alu_in1[i][`XLEN-1], alu_in1[i]}; always @(*) begin case (alu_op[1:0]) @@ -102,7 +102,7 @@ module VX_alu_int #( assign shr_result_w[i] = `XLEN'($signed(shr_res_w)); end - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_msc_result always @(*) begin case (alu_op[1:0]) 2'b00: msc_result[i] = alu_in1[i] & alu_in2_imm[i]; // AND @@ -114,7 +114,7 @@ module VX_alu_int #( assign msc_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] << alu_in2_imm[i][4:0])); // SLLW end - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_alu_result wire [`XLEN-1:0] slt_br_result = `XLEN'({is_br_op && ~(| sub_result[i][`XLEN-1:0]), sub_result[i][`XLEN]}); wire [`XLEN-1:0] sub_slt_br_result = (is_sub_op && ~is_br_op) ? sub_result[i][`XLEN-1:0] : slt_br_result; always @(*) begin @@ -141,9 +141,9 @@ module VX_alu_int #( assign cbr_dest = add_result[0][1 +: `PC_BITS]; - if (LANE_BITS != 0) begin + if (LANE_BITS != 0) begin : g_tid assign tid = execute_if.data.tid[0 +: LANE_BITS]; - end else begin + end else begin : g_tid_0 assign tid = 0; end @@ -185,7 +185,7 @@ module VX_alu_int #( .data_out ({branch_ctl_if.valid, branch_ctl_if.wid, branch_ctl_if.taken, branch_ctl_if.dest}) ); - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_commit assign commit_if.data.data[i] = (is_br_op_r && is_br_static) ? {(PC_r + `PC_BITS'(2)), 1'd0} : alu_result_r[i]; end diff --git a/hw/rtl/core/VX_alu_muldiv.sv b/hw/rtl/core/VX_alu_muldiv.sv index bd498a0bb..d374013bc 100644 --- a/hw/rtl/core/VX_alu_muldiv.sv +++ b/hw/rtl/core/VX_alu_muldiv.sv @@ -68,7 +68,7 @@ module VX_alu_muldiv #( wire mul_fire_in = mul_valid_in && mul_ready_in; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mul_result_tmp reg [`XLEN-1:0] mul_resultl, mul_resulth; wire [`XLEN-1:0] mul_in1 = is_alu_w ? (execute_if.data.rs1_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs1_data[i]; wire [`XLEN-1:0] mul_in2 = is_alu_w ? (execute_if.data.rs2_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs2_data[i]; @@ -103,7 +103,7 @@ module VX_alu_muldiv #( wire [NUM_LANES-1:0][`XLEN:0] mul_in1; wire [NUM_LANES-1:0][`XLEN:0] mul_in2; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mul_in assign mul_in1[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]} : {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]}; assign mul_in2[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]} : {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]}; end @@ -149,7 +149,7 @@ module VX_alu_muldiv #( `else - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_multiplier wire [`XLEN:0] mul_in1 = {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]}; wire [`XLEN:0] mul_in2 = {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]}; @@ -184,7 +184,7 @@ module VX_alu_muldiv #( `endif - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mul_result_out `ifdef XLEN_64 assign mul_result_out[i] = is_mulh_out ? mul_result_tmp[i][2*(`XLEN)-1:`XLEN] : (is_mul_w_out ? `XLEN'($signed(mul_result_tmp[i][31:0])) : @@ -219,7 +219,7 @@ module VX_alu_muldiv #( wire [NUM_LANES-1:0][`XLEN-1:0] div_in1; wire [NUM_LANES-1:0][`XLEN-1:0] div_in2; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_div_in `ifdef XLEN_64 assign div_in1[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]}: execute_if.data.rs1_data[i]; assign div_in2[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]}: execute_if.data.rs2_data[i]; @@ -234,7 +234,7 @@ module VX_alu_muldiv #( wire [NUM_LANES-1:0][`XLEN-1:0] div_result_in; wire div_fire_in = div_valid_in && div_ready_in; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_div_result_in reg [`XLEN-1:0] div_quotient, div_remainder; always @(*) begin dpi_idiv (div_fire_in, is_signed_op, div_in1[i], div_in2[i], div_quotient, div_remainder); @@ -306,7 +306,7 @@ module VX_alu_muldiv #( assign {div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out, is_div_w_out, div_pid_out, div_sop_out, div_eop_out} = div_tag_r; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_div_result_out `ifdef XLEN_64 assign div_result_out[i] = is_rem_op_out ? (is_div_w_out ? `XLEN'($signed(div_remainder[i][31:0])) : div_remainder[i]) : (is_div_w_out ? `XLEN'($signed(div_quotient[i][31:0])) : div_quotient[i]); diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 8ec044eeb..951cd811b 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -55,7 +55,7 @@ module VX_alu_unit #( .execute_if (per_block_execute_if) ); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : alus + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_alus VX_execute_if #( .NUM_LANES (NUM_LANES) diff --git a/hw/rtl/core/VX_commit.sv b/hw/rtl/core/VX_commit.sv index acfae9e4d..d2e705674 100644 --- a/hw/rtl/core/VX_commit.sv +++ b/hw/rtl/core/VX_commit.sv @@ -41,13 +41,13 @@ module VX_commit import VX_gpu_pkg::*; #( wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] per_issue_commit_tmask; wire [`ISSUE_WIDTH-1:0] per_issue_commit_eop; - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : commit_arbs + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_commit_arbs wire [`NUM_EX_UNITS-1:0] valid_in; wire [`NUM_EX_UNITS-1:0][DATAW-1:0] data_in; wire [`NUM_EX_UNITS-1:0] ready_in; - for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin + for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin : g_data_in assign valid_in[j] = commit_if[j * `ISSUE_WIDTH + i].valid; assign data_in[j] = commit_if[j * `ISSUE_WIDTH + i].data; assign commit_if[j * `ISSUE_WIDTH + i].ready = ready_in[j]; @@ -84,7 +84,7 @@ module VX_commit import VX_gpu_pkg::*; #( assign commit_fire_any = (| per_issue_commit_fire); - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_commit_size wire [COMMIT_SIZEW-1:0] count; `POP_COUNT(count, per_issue_commit_tmask[i]); assign commit_size[i] = count; @@ -160,7 +160,7 @@ module VX_commit import VX_gpu_pkg::*; #( // Writeback - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_writeback assign writeback_if[i].valid = commit_arb_if[i].valid && commit_arb_if[i].data.wb; assign writeback_if[i].data.uuid = commit_arb_if[i].data.uuid; assign writeback_if[i].data.wis = wid_to_wis(commit_arb_if[i].data.wid); @@ -174,8 +174,8 @@ module VX_commit import VX_gpu_pkg::*; #( end `ifdef DBG_TRACE_PIPELINE - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin - for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_trace + for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin : g_j always @(posedge clk) begin if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin `TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0})) diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index d9f3de687..1d3e12613 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -229,8 +229,8 @@ module VX_core import VX_gpu_pkg::*; #( wire [LSU_NUM_REQS-1:0] perf_dcache_wr_req_fire, perf_dcache_wr_req_fire_r; wire [LSU_NUM_REQS-1:0] perf_dcache_rsp_fire; - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_perf_dcache + for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin : g_j assign perf_dcache_rd_req_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].req_valid && lsu_mem_if[i].req_data.mask[j] && lsu_mem_if[i].req_ready && ~lsu_mem_if[i].req_data.rw; assign perf_dcache_wr_req_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].req_valid && lsu_mem_if[i].req_data.mask[j] && lsu_mem_if[i].req_ready && lsu_mem_if[i].req_data.rw; assign perf_dcache_rsp_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].rsp_valid && lsu_mem_if[i].rsp_data.mask[j] && lsu_mem_if[i].rsp_ready; diff --git a/hw/rtl/core/VX_csr_data.sv b/hw/rtl/core/VX_csr_data.sv index aa9b30e05..68bf7f739 100644 --- a/hw/rtl/core/VX_csr_data.sv +++ b/hw/rtl/core/VX_csr_data.sv @@ -83,7 +83,7 @@ import VX_fpu_pkg::*; wire [`NUM_FPU_BLOCKS-1:0][`NW_WIDTH-1:0] fpu_write_wid; fflags_t [`NUM_FPU_BLOCKS-1:0] fpu_write_fflags; - for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin : g_fpu_write assign fpu_write_enable[i] = fpu_csr_if[i].write_enable; assign fpu_write_wid[i] = fpu_csr_if[i].write_wid; assign fpu_write_fflags[i] = fpu_csr_if[i].write_fflags; @@ -107,7 +107,7 @@ import VX_fpu_pkg::*; end end - for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin : g_fpu_csr_read_frm assign fpu_csr_if[i].read_frm = fcsr[fpu_csr_if[i].read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]; end diff --git a/hw/rtl/core/VX_csr_unit.sv b/hw/rtl/core/VX_csr_unit.sv index 999c9c416..be4f7321d 100644 --- a/hw/rtl/core/VX_csr_unit.sv +++ b/hw/rtl/core/VX_csr_unit.sv @@ -66,7 +66,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #( wire [NUM_LANES-1:0][`XLEN-1:0] rs1_data; `UNUSED_VAR (rs1_data) - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_rs1_data assign rs1_data[i] = execute_if.data.rs1_data[i]; end @@ -113,12 +113,15 @@ module VX_csr_unit import VX_gpu_pkg::*; #( wire [NUM_LANES-1:0][`XLEN-1:0] wtid, gtid; - for (genvar i = 0; i < NUM_LANES; ++i) begin - if (PID_BITS != 0) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_wtid + if (PID_BITS != 0) begin : g_pid assign wtid[i] = `XLEN'(execute_if.data.pid * NUM_LANES + i); - end else begin + end else begin : g_no_pid assign wtid[i] = `XLEN'(i); end + end + + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_gtid assign gtid[i] = (`XLEN'(CORE_ID) << (`NW_BITS + `NT_BITS)) + (`XLEN'(execute_if.data.wid) << `NT_BITS) + wtid[i]; end diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index 4326298a1..1c24fe46d 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -33,7 +33,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH; wire [`NUM_THREADS-1:0][`NT_WIDTH-1:0] tids; - for (genvar i = 0; i < `NUM_THREADS; ++i) begin + for (genvar i = 0; i < `NUM_THREADS; ++i) begin : g_tids assign tids[i] = `NT_WIDTH'(i); end @@ -53,7 +53,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( wire [`NUM_EX_UNITS-1:0] operands_ready_in; assign operands_if.ready = operands_ready_in[operands_if.data.ex_type]; - for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : buffers + for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_buffers VX_elastic_buffer #( .DATAW (DATAW), .SIZE (2), @@ -88,7 +88,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( wire operands_if_stall = operands_if.valid && ~operands_if.ready; - for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin + for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_perf_stalls always @(posedge clk) begin if (reset) begin perf_stalls_r[i] <= '0; diff --git a/hw/rtl/core/VX_dispatch_unit.sv b/hw/rtl/core/VX_dispatch_unit.sv index 0bd4b45c4..5d37d0578 100644 --- a/hw/rtl/core/VX_dispatch_unit.sv +++ b/hw/rtl/core/VX_dispatch_unit.sv @@ -49,7 +49,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( wire [`ISSUE_WIDTH-1:0][IN_DATAW-1:0] dispatch_data; wire [`ISSUE_WIDTH-1:0] dispatch_ready; - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_dispatch_data assign dispatch_valid[i] = dispatch_if[i].valid; assign dispatch_data[i] = dispatch_if[i].data; assign dispatch_if[i].ready = dispatch_ready[i]; @@ -69,10 +69,10 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( logic [BATCH_COUNT_W-1:0] batch_idx; - if (BATCH_COUNT != 1) begin + if (BATCH_COUNT != 1) begin : g_batch_idx wire [BATCH_COUNT_W-1:0] batch_idx_n; wire [BATCH_COUNT-1:0] valid_batches; - for (genvar i = 0; i < BATCH_COUNT; ++i) begin + for (genvar i = 0; i < BATCH_COUNT; ++i) begin : g_valid_batches assign valid_batches[i] = | dispatch_valid[i * BLOCK_SIZE +: BLOCK_SIZE]; end @@ -96,22 +96,22 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( batch_idx <= batch_idx_n; end end - end else begin + end else begin : g_batch_idx_0 assign batch_idx = 0; `UNUSED_VAR (batch_done) end wire [BLOCK_SIZE-1:0][ISSUE_W-1:0] issue_indices; - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_issue_indices assign issue_indices[block_idx] = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx); end - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : blocks + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_blocks wire [ISSUE_W-1:0] issue_idx = issue_indices[block_idx]; wire valid_p, ready_p; - if (`NUM_THREADS != NUM_LANES) begin : threads_split + if (`NUM_THREADS > NUM_LANES) begin : g_partial_threads reg [NUM_PACKETS-1:0] sent_mask_p; wire [PID_WIDTH-1:0] start_p_n, start_p, end_p; wire dispatch_valid_r; @@ -146,8 +146,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data = dispatch_data[issue_idx][DATA_REGS_OFF + 1 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN]; wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data = dispatch_data[issue_idx][DATA_REGS_OFF + 0 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN]; - for (genvar i = 0; i < NUM_PACKETS; ++i) begin - for (genvar j = 0; j < NUM_LANES; ++j) begin + for (genvar i = 0; i < NUM_PACKETS; ++i) begin : g_per_packet_data + for (genvar j = 0; j < NUM_LANES; ++j) begin : g_j localparam k = i * NUM_LANES + j; assign per_packet_tmask[i][j] = dispatch_tmask[k]; assign per_packet_regs[i][0][j] = dispatch_rs1_data[k]; @@ -157,10 +157,12 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( end wire [NUM_PACKETS-1:0] packet_valids; - wire [NUM_PACKETS-1:0][PID_WIDTH-1:0] packet_ids; - - for (genvar i = 0; i < NUM_PACKETS; ++i) begin + for (genvar i = 0; i < NUM_PACKETS; ++i) begin : g_packet_valids assign packet_valids[i] = (| per_packet_tmask[i]); + end + + wire [NUM_PACKETS-1:0][PID_WIDTH-1:0] packet_ids; + for (genvar i = 0; i < NUM_PACKETS; ++i) begin : g_packet_ids assign packet_ids[i] = PID_WIDTH'(i); end @@ -209,13 +211,13 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( assign block_pid[block_idx] = start_p; assign block_sop[block_idx] = is_first_p; assign block_eop[block_idx] = is_last_p; - if (FANOUT_ENABLE) begin + if (FANOUT_ENABLE) begin : g_block_ready_fanout assign block_ready[block_idx] = dispatch_valid_r && ready_p && block_enable; - end else begin + end else begin : g_block_ready assign block_ready[block_idx] = ready_p && block_enable; end assign block_done[block_idx] = fire_eop || ~dispatch_valid[issue_idx]; - end else begin + end else begin : g_full_threads assign valid_p = dispatch_valid[issue_idx]; assign block_tmask[block_idx] = dispatch_data[issue_idx][DATA_TMASK_OFF +: `NUM_THREADS]; assign block_regs[block_idx][0] = dispatch_data[issue_idx][DATA_REGS_OFF + 2 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN]; @@ -229,13 +231,13 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( end wire [ISSUE_ISW_W-1:0] isw; - if (BATCH_COUNT != 1) begin - if (BLOCK_SIZE != 1) begin + if (BATCH_COUNT != 1) begin : g_isw_batch + if (BLOCK_SIZE != 1) begin : g_block assign isw = {batch_idx, BLOCK_SIZE_W'(block_idx)}; - end else begin + end else begin : g_no_block assign isw = batch_idx; end - end else begin + end else begin : g_isw assign isw = block_idx; end @@ -268,9 +270,9 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( .ready_out (execute_if[block_idx].ready) ); - if (`NUM_THREADS != NUM_LANES) begin + if (`NUM_THREADS != NUM_LANES) begin : g_execute_data_w_partial assign execute_data_w = execute_data; - end else begin + end else begin : g_execute_data_w_full always @(*) begin execute_data_w = execute_data; execute_data_w[2:0] = {1'b0, 1'b1, 1'b1}; // default pid, sop, and eop diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index dab4772db..a2a80ed94 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -71,7 +71,7 @@ module VX_fetch import VX_gpu_pkg::*; #( // This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache requests. // This issue is particularly prevalent when the icache and dcache are disabled and both requests share the same bus. wire [`NUM_WARPS-1:0] pending_ibuf_full; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin : pending_reads + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_pending_reads VX_pending_size #( .SIZE (`IBUF_SIZE) ) pending_reads ( @@ -164,13 +164,11 @@ module VX_fetch import VX_gpu_pkg::*; #( `endif `ifdef DBG_TRACE_MEM - wire schedule_fire = schedule_if.valid && schedule_if.ready; - wire fetch_fire = fetch_if.valid && fetch_if.ready; always @(posedge clk) begin - if (schedule_fire) begin + if (schedule_if.valid && schedule_if.ready) begin `TRACE(1, ("%t: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid)) end - if (fetch_fire) begin + if (fetch_if.valid && fetch_if.ready) begin `TRACE(1, ("%t: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid)) end end diff --git a/hw/rtl/core/VX_fpu_unit.sv b/hw/rtl/core/VX_fpu_unit.sv index 10e5c236b..1565f3728 100644 --- a/hw/rtl/core/VX_fpu_unit.sv +++ b/hw/rtl/core/VX_fpu_unit.sv @@ -53,7 +53,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : fpus + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_fpus `UNUSED_VAR (per_block_execute_if[block_idx].data.tid) `UNUSED_VAR (per_block_execute_if[block_idx].data.wb) @@ -98,11 +98,11 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( `UNUSED_PIN (empty) ); - if (PID_BITS != 0) begin + if (PID_BITS != 0) begin : g_fpu_rsp_pid assign fpu_rsp_pid = fpu_rsp_pid_u; assign fpu_rsp_sop = fpu_rsp_sop_u; assign fpu_rsp_eop = fpu_rsp_eop_u; - end else begin + end else begin : g_no_fpu_rsp_pid `UNUSED_VAR (fpu_rsp_pid_u) `UNUSED_VAR (fpu_rsp_sop_u) `UNUSED_VAR (fpu_rsp_eop_u) @@ -214,7 +214,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( // handle CSR update fflags_t fpu_rsp_fflags_q; - if (PID_BITS != 0) begin + if (PID_BITS != 0) begin : g_pid fflags_t fpu_rsp_fflags_r; always @(posedge clk) begin if (reset) begin @@ -224,7 +224,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( end end assign fpu_rsp_fflags_q = fpu_rsp_fflags_r | fpu_rsp_fflags; - end else begin + end else begin : g_no_pid assign fpu_rsp_fflags_q = fpu_rsp_fflags; end diff --git a/hw/rtl/core/VX_gather_unit.sv b/hw/rtl/core/VX_gather_unit.sv index 69295321b..284d5c167 100644 --- a/hw/rtl/core/VX_gather_unit.sv +++ b/hw/rtl/core/VX_gather_unit.sv @@ -41,17 +41,17 @@ module VX_gather_unit import VX_gpu_pkg::*; #( wire [BLOCK_SIZE-1:0] commit_in_ready; wire [BLOCK_SIZE-1:0][ISSUE_ISW_W-1:0] commit_in_isw; - for (genvar i = 0; i < BLOCK_SIZE; ++i) begin + for (genvar i = 0; i < BLOCK_SIZE; ++i) begin : g_commit_in assign commit_in_valid[i] = commit_in_if[i].valid; assign commit_in_data[i] = commit_in_if[i].data; assign commit_in_if[i].ready = commit_in_ready[i]; - if (BLOCK_SIZE != `ISSUE_WIDTH) begin - if (BLOCK_SIZE != 1) begin + if (BLOCK_SIZE != `ISSUE_WIDTH) begin : g_commit_in_isw_partial + if (BLOCK_SIZE != 1) begin : g_block assign commit_in_isw[i] = {commit_in_data[i][DATA_WIS_OFF+BLOCK_SIZE_W +: (ISSUE_ISW_W-BLOCK_SIZE_W)], BLOCK_SIZE_W'(i)}; - end else begin + end else begin : g_no_block assign commit_in_isw[i] = commit_in_data[i][DATA_WIS_OFF +: ISSUE_ISW_W]; end - end else begin + end else begin : g_commit_in_isw_full assign commit_in_isw[i] = BLOCK_SIZE_W'(i); end end @@ -70,11 +70,12 @@ module VX_gather_unit import VX_gpu_pkg::*; #( commit_out_data[commit_in_isw[i]] = commit_in_data[i]; end end - for (genvar i = 0; i < BLOCK_SIZE; ++i) begin + + for (genvar i = 0; i < BLOCK_SIZE; ++i) begin : g_commit_in_ready assign commit_in_ready[i] = commit_out_ready[commit_in_isw[i]]; end - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin: out_bufs + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin: g_out_bufs VX_commit_if #( .NUM_LANES (NUM_LANES) ) commit_tmp_if(); @@ -96,7 +97,7 @@ module VX_gather_unit import VX_gpu_pkg::*; #( logic [`NUM_THREADS-1:0] commit_tmask_w; logic [`NUM_THREADS-1:0][`XLEN-1:0] commit_data_w; - if (PID_BITS != 0) begin + if (PID_BITS != 0) begin : g_commit_data_with_pid always @(*) begin commit_tmask_w = '0; commit_data_w = 'x; @@ -105,7 +106,7 @@ module VX_gather_unit import VX_gpu_pkg::*; #( commit_data_w[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.data[j]; end end - end else begin + end else begin : g_commit_data_no_pid assign commit_tmask_w = commit_tmp_if.data.tmask; assign commit_data_w = commit_tmp_if.data.data; end diff --git a/hw/rtl/core/VX_ibuffer.sv b/hw/rtl/core/VX_ibuffer.sv index f5d879f33..e1a9457de 100644 --- a/hw/rtl/core/VX_ibuffer.sv +++ b/hw/rtl/core/VX_ibuffer.sv @@ -35,7 +35,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #( wire [PER_ISSUE_WARPS-1:0] ibuf_ready_in; assign decode_if.ready = ibuf_ready_in[decode_if.data.wid]; - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : instr_bufs + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_instr_bufs VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`IBUF_SIZE), diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index a0f223ff5..a2e689b7c 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -36,10 +36,10 @@ module VX_issue import VX_gpu_pkg::*; #( `PERF_COUNTER_ADD (issue_perf, per_issue_perf, ibf_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) `PERF_COUNTER_ADD (issue_perf, per_issue_perf, scb_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) `PERF_COUNTER_ADD (issue_perf, per_issue_perf, opd_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) - for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin + for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_issue_perf_units_uses `PERF_COUNTER_ADD (issue_perf, per_issue_perf, units_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) end - for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin + for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin : g_issue_perf_sfu_uses `PERF_COUNTER_ADD (issue_perf, per_issue_perf, sfu_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) end `endif @@ -52,7 +52,7 @@ module VX_issue import VX_gpu_pkg::*; #( `SCOPE_IO_SWITCH (`ISSUE_WIDTH) - for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : issue_slices + for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : g_issue_slices VX_decode_if #( .NUM_WARPS (PER_ISSUE_WARPS) ) per_issue_decode_if(); @@ -93,7 +93,7 @@ module VX_issue import VX_gpu_pkg::*; #( ); // Assign transposed dispatch_if - for (genvar ex_id = 0; ex_id < `NUM_EX_UNITS; ++ex_id) begin + for (genvar ex_id = 0; ex_id < `NUM_EX_UNITS; ++ex_id) begin : g_dispatch_if `ASSIGN_VX_IF(dispatch_if[ex_id * `ISSUE_WIDTH + issue_id], per_issue_dispatch_if[ex_id]); end end diff --git a/hw/rtl/core/VX_issue_top.sv b/hw/rtl/core/VX_issue_top.sv index 0166cf770..e148b02f6 100644 --- a/hw/rtl/core/VX_issue_top.sv +++ b/hw/rtl/core/VX_issue_top.sv @@ -80,7 +80,7 @@ module VX_issue_top import VX_gpu_pkg::*; #( assign decode_if.data.rs3 = decode_rs3; assign decode_ready = decode_if.ready; - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_writeback_if assign writeback_if[i].valid = writeback_valid[i]; assign writeback_if[i].data.uuid = writeback_uuid[i]; assign writeback_if[i].data.wis = writeback_wis[i]; @@ -92,7 +92,7 @@ module VX_issue_top import VX_gpu_pkg::*; #( assign writeback_if[i].data.eop = writeback_eop[i]; end - for (genvar i = 0; i < `NUM_EX_UNITS * `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `NUM_EX_UNITS * `ISSUE_WIDTH; ++i) begin : g_dispatch_if assign dispatch_valid[i] = dispatch_if[i].valid; assign dispatch_uuid[i] = dispatch_if[i].data.uuid; assign dispatch_wis[i] = dispatch_if[i].data.wis; diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index b880eee2e..43f787ae9 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -59,14 +59,14 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( wire req_is_fence, rsp_is_fence; wire [NUM_LANES-1:0][`XLEN-1:0] full_addr; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_full_addr assign full_addr[i] = execute_if.data.rs1_data[i] + `SEXT(`XLEN, execute_if.data.op_args.lsu.offset); end // address type calculation wire [NUM_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_flags; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_flags wire [MEM_ADDRW-1:0] block_addr = full_addr[i][MEM_ASHIFT +: MEM_ADDRW]; // is I/O address wire [MEM_ADDRW-1:0] io_addr_start = MEM_ADDRW'(`XLEN'(`IO_BASE_ADDR) >> MEM_ASHIFT); @@ -151,13 +151,13 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( wire [NUM_LANES-1:0][REQ_ASHIFT-1:0] req_align; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_addr assign req_align[i] = full_addr[i][REQ_ASHIFT-1:0]; assign mem_req_addr[i] = full_addr[i][`MEM_ADDR_WIDTH-1:REQ_ASHIFT]; end // byte enable formatting - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_byteen_w reg [LSU_WORD_SIZE-1:0] mem_req_byteen_w; always @(*) begin mem_req_byteen_w = '0; @@ -185,7 +185,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( end // memory misalignment not supported! - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_missalign wire lsu_req_fire = execute_if.valid && execute_if.ready; `RUNTIME_ASSERT((~lsu_req_fire || ~execute_if.data.tmask[i] || req_is_fence || (full_addr[i] % (1 << `INST_LSU_WSIZE(execute_if.data.op_type))) == 0), ("%t: misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)", @@ -193,7 +193,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( end // store data formatting - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_data always @(*) begin mem_req_data[i] = execute_if.data.rs2_data[i]; case (req_align[i]) @@ -215,7 +215,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( wire [LSUQ_SIZEW-1:0] pkt_waddr, pkt_raddr; - if (PID_BITS != 0) begin + if (PID_BITS != 0) begin : g_pids reg [`LSUQ_IN_SIZE-1:0][PID_BITS:0] pkt_ctr; reg [`LSUQ_IN_SIZE-1:0] pkt_sop, pkt_eop; @@ -274,7 +274,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `RUNTIME_ASSERT(~(mem_req_rd_fire && full), ("%t: allocator full!", $time)) `RUNTIME_ASSERT(~mem_req_rd_sop_fire || 0 == pkt_ctr[pkt_waddr], ("%t: oops! broken sop request!", $time)) `UNUSED_VAR (mem_rsp_sop) - end else begin + end else begin : g_no_pids assign pkt_waddr = 0; assign mem_rsp_sop_pkt = mem_rsp_sop; assign mem_rsp_eop_pkt = mem_rsp_eop; @@ -424,7 +424,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `endif `endif - for (genvar i = 0; i < NUM_LANES; i++) begin + for (genvar i = 0; i < NUM_LANES; i++) begin : g_rsp_data `ifdef XLEN_64 wire [63:0] rsp_data64 = mem_rsp_data[i]; wire [31:0] rsp_data32 = (rsp_align[i][2] ? mem_rsp_data[i][63:32] : mem_rsp_data[i][31:0]); @@ -481,6 +481,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( .valid_out (commit_no_rsp_if.valid), .ready_out (commit_no_rsp_if.ready) ); + assign commit_no_rsp_if.data.rd = '0; assign commit_no_rsp_if.data.wb = 1'b0; assign commit_no_rsp_if.data.data = commit_rsp_if.data.data; // arbiter MUX optimization diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index 8c594f533..f4a1fc4ae 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -54,7 +54,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsus + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_lsus VX_lsu_slice #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx)) ) lsu_slice( diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 75f60e63c..c02e99b29 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -45,7 +45,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (LSU_TAG_WIDTH) ) lsu_lmem_if[`NUM_LSU_BLOCKS](); - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : lmem_switches + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lmem_switches VX_lmem_switch #( .REQ0_OUT_BUF (3), .REQ1_OUT_BUF (0), @@ -65,7 +65,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (LSU_TAG_WIDTH) ) lmem_bus_if[LSU_NUM_REQS](); - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : lmem_adapters + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lmem_adapters VX_mem_bus_if #( .DATA_SIZE (LSU_WORD_SIZE), .TAG_WIDTH (LSU_TAG_WIDTH) @@ -86,7 +86,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .mem_bus_if (lmem_bus_tmp_if) ); - for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin + for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin : g_lmem_bus_if `ASSIGN_VX_MEM_BUS_IF (lmem_bus_if[i * `NUM_LSU_LANES + j], lmem_bus_tmp_if[j]); end end @@ -115,7 +115,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( `ifdef PERF_ENABLE assign lmem_perf = '0; `endif - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lsu_dcache_if `ASSIGN_VX_MEM_BUS_IF (lsu_dcache_if[i], lsu_mem_if[i]); end @@ -127,9 +127,9 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (DCACHE_TAG_WIDTH) ) dcache_coalesced_if[`NUM_LSU_BLOCKS](); - if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if + if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : g_enabled - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescers + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_coalescers VX_mem_coalescer #( .INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)), .NUM_REQS (`NUM_LSU_LANES), @@ -182,15 +182,15 @@ module VX_mem_unit import VX_gpu_pkg::*; #( ); end - end else begin + end else begin : g_passthru - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_dcache_coalesced_if `ASSIGN_VX_MEM_BUS_IF (dcache_coalesced_if[i], lsu_dcache_if[i]); end end - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : dcache_adapters + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_dcache_adapters VX_mem_bus_if #( .DATA_SIZE (DCACHE_WORD_SIZE), @@ -212,7 +212,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .mem_bus_if (dcache_bus_tmp_if) ); - for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin + for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin : g_dcache_bus_if `ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]); end diff --git a/hw/rtl/core/VX_mem_unit_top.sv b/hw/rtl/core/VX_mem_unit_top.sv index 1eac9da10..17786a09b 100644 --- a/hw/rtl/core/VX_mem_unit_top.sv +++ b/hw/rtl/core/VX_mem_unit_top.sv @@ -62,7 +62,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( ) lsu_mem_if[`NUM_LSU_BLOCKS](); // LSU memory request - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lsu_mem_req assign lsu_mem_if[i].req_valid = lsu_req_valid[i]; assign lsu_mem_if[i].req_data.rw = lsu_req_rw[i]; assign lsu_mem_if[i].req_data.mask = lsu_req_mask[i]; @@ -75,7 +75,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( end // LSU memory response - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lsu_rsp assign lsu_rsp_valid[i] = lsu_mem_if[i].rsp_valid; assign lsu_rsp_mask[i] = lsu_mem_if[i].rsp_data.mask; assign lsu_rsp_data[i] = lsu_mem_if[i].rsp_data.data; @@ -89,7 +89,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( ) mem_bus_if[DCACHE_NUM_REQS](); // memory request - for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin + for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin : g_mem_req assign mem_req_valid[i] = mem_bus_if[i].req_valid; assign mem_req_rw[i] = mem_bus_if[i].req_data.rw; assign mem_req_byteen[i] = mem_bus_if[i].req_data.byteen; @@ -101,7 +101,7 @@ module VX_mem_unit_top import VX_gpu_pkg::*; #( end // memory response - for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin + for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin : g_mem_bus_rsp assign mem_bus_if[i].rsp_valid = mem_rsp_valid[i]; assign mem_bus_if[i].rsp_data.tag = mem_rsp_tag[i]; assign mem_bus_if[i].rsp_data.data = mem_rsp_data[i]; diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index ef98ea79e..066db15cd 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -81,20 +81,23 @@ module VX_operands import VX_gpu_pkg::*; #( wire [NUM_SRC_OPDS-1:0][`NR_BITS-1:0] src_opds; assign src_opds = {scoreboard_if.data.rs3, scoreboard_if.data.rs2, scoreboard_if.data.rs1}; - for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin - if (ISSUE_WIS != 0) begin + for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_req_data_in + if (ISSUE_WIS != 0) begin : g_wis assign req_data_in[i] = {src_opds[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis}; - end else begin + end else begin : g_no_wis assign req_data_in[i] = src_opds[i][`NR_BITS-1:BANK_SEL_BITS]; end - if (NUM_BANKS != 1) begin + end + + for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_req_bank_idx + if (NUM_BANKS != 1) begin : g_banks assign req_bank_idx[i] = src_opds[i][BANK_SEL_BITS-1:0]; - end else begin + end else begin : g_1bank assign req_bank_idx[i] = '0; end end - for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin + for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_src_valid assign src_valid[i] = (src_opds[i] != 0) && ~data_fetched_st1[i]; end @@ -232,30 +235,30 @@ module VX_operands import VX_gpu_pkg::*; #( ); wire [PER_BANK_ADDRW-1:0] gpr_wr_addr; - if (ISSUE_WIS != 0) begin + if (ISSUE_WIS != 0) begin : g_gpr_wr_addr assign gpr_wr_addr = {writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS], writeback_if.data.wis}; - end else begin + end else begin : g_gpr_wr_addr_no_wis assign gpr_wr_addr = writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS]; end wire [BANK_SEL_WIDTH-1:0] gpr_wr_bank_idx; - if (NUM_BANKS != 1) begin + if (NUM_BANKS != 1) begin : g_gpr_wr_bank_idx assign gpr_wr_bank_idx = writeback_if.data.rd[BANK_SEL_BITS-1:0]; - end else begin + end else begin : g_gpr_wr_bank_idx_0 assign gpr_wr_bank_idx = '0; end - for (genvar b = 0; b < NUM_BANKS; ++b) begin : gpr_rams + for (genvar b = 0; b < NUM_BANKS; ++b) begin : g_gpr_rams wire gpr_wr_enabled; - if (BANK_SEL_BITS != 0) begin + if (BANK_SEL_BITS != 0) begin : g_gpr_wr_enabled assign gpr_wr_enabled = writeback_if.valid && (gpr_wr_bank_idx == BANK_SEL_BITS'(b)); - end else begin + end else begin : g_gpr_wr_enabled_1bank assign gpr_wr_enabled = writeback_if.valid; end wire [BYTEENW-1:0] wren; - for (genvar i = 0; i < `NUM_THREADS; ++i) begin + for (genvar i = 0; i < `NUM_THREADS; ++i) begin : g_wren assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}}; end diff --git a/hw/rtl/core/VX_pe_switch.sv b/hw/rtl/core/VX_pe_switch.sv index 384fce329..163d76c64 100644 --- a/hw/rtl/core/VX_pe_switch.sv +++ b/hw/rtl/core/VX_pe_switch.sv @@ -54,7 +54,7 @@ module VX_pe_switch import VX_gpu_pkg::*; #( .ready_out (pe_req_ready) ); - for (genvar i = 0; i < PE_COUNT; ++i) begin + for (genvar i = 0; i < PE_COUNT; ++i) begin : g_execute_out_if assign execute_out_if[i].valid = pe_req_valid[i]; assign execute_out_if[i].data = pe_req_data[i]; assign pe_req_ready[i] = execute_out_if[i].ready; @@ -66,7 +66,7 @@ module VX_pe_switch import VX_gpu_pkg::*; #( wire [PE_COUNT-1:0][RSP_DATAW-1:0] pe_rsp_data; wire [PE_COUNT-1:0] pe_rsp_ready; - for (genvar i = 0; i < PE_COUNT; ++i) begin + for (genvar i = 0; i < PE_COUNT; ++i) begin : g_commit_in_if assign pe_rsp_valid[i] = commit_in_if[i].valid; assign pe_rsp_data[i] = commit_in_if[i].data; assign commit_in_if[i].ready = pe_rsp_ready[i]; diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 77e00156b..e7937fe49 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -78,7 +78,7 @@ module VX_schedule import VX_gpu_pkg::*; #( wire [`NUM_ALU_BLOCKS-1:0][`NW_WIDTH-1:0] branch_wid; wire [`NUM_ALU_BLOCKS-1:0] branch_taken; wire [`NUM_ALU_BLOCKS-1:0][`PC_BITS-1:0] branch_dest; - for (genvar i = 0; i < `NUM_ALU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_ALU_BLOCKS; ++i) begin : g_branch_init assign branch_valid[i] = branch_ctl_if[i].valid; assign branch_wid[i] = branch_ctl_if[i].wid; assign branch_taken[i] = branch_ctl_if[i].taken; @@ -322,7 +322,7 @@ module VX_schedule import VX_gpu_pkg::*; #( ); wire [`NUM_WARPS-1:0][(`NUM_THREADS + `PC_BITS)-1:0] schedule_data; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_schedule_data assign schedule_data[i] = {thread_masks[i], warp_pcs[i]}; end @@ -367,7 +367,7 @@ module VX_schedule import VX_gpu_pkg::*; #( wire [`NUM_WARPS-1:0] pending_warp_empty; wire [`NUM_WARPS-1:0] pending_warp_alm_empty; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin : pending_sizes + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_pending_sizes VX_pending_size #( .SIZE (4096), .ALM_EMPTY (1) diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index b4fd5c08c..1fe9a7f44 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -66,13 +66,13 @@ module VX_scoreboard import VX_gpu_pkg::*; #( `BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT)); wire [PER_ISSUE_WARPS-1:0] stg_valid_in; - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_stg_valid_in assign stg_valid_in[w] = staging_if[w].valid; end wire perf_stall_per_cycle = (|stg_valid_in) && ~(|(stg_valid_in & operands_ready)); - always @(posedge clk) begin + always @(posedge clk) begin : g_perf_stalls if (reset) begin perf_stalls <= '0; end else begin @@ -80,7 +80,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end end - for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin + for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_perf_units_uses always @(posedge clk) begin if (reset) begin perf_units_uses[i] <= '0; @@ -90,7 +90,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end end - for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin + for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin : g_perf_sfu_uses always @(posedge clk) begin if (reset) begin perf_sfu_uses[i] <= '0; @@ -101,7 +101,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end `endif - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : stanging_bufs + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_stanging_bufs VX_pipe_buffer #( .DATAW (DATAW) ) stanging_buf ( @@ -116,7 +116,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( ); end - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_scoreboard reg [`NUM_REGS-1:0] inuse_regs; reg [NUM_OPDS-1:0] operands_busy, operands_busy_n; @@ -233,7 +233,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( wire [PER_ISSUE_WARPS-1:0][DATAW-1:0] arb_data_in; wire [PER_ISSUE_WARPS-1:0] arb_ready_in; - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_arb_data_in assign arb_valid_in[w] = staging_if[w].valid && operands_ready[w]; assign arb_data_in[w] = staging_if[w].data; assign staging_if[w].ready = arb_ready_in[w] && operands_ready[w]; diff --git a/hw/rtl/core/VX_split_join.sv b/hw/rtl/core/VX_split_join.sv index c5542e137..7955437a6 100644 --- a/hw/rtl/core/VX_split_join.sv +++ b/hw/rtl/core/VX_split_join.sv @@ -45,7 +45,7 @@ module VX_split_join import VX_gpu_pkg::*; #( wire ipdom_push = valid && split.valid && split.is_dvg; wire ipdom_pop = valid && sjoin.valid && sjoin_is_dvg; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin : ipdom_stacks + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_ipdom_stacks VX_ipdom_stack #( .WIDTH (`NUM_THREADS+`PC_BITS), .DEPTH (`DV_STACK_SIZE), diff --git a/hw/rtl/core/VX_wctl_unit.sv b/hw/rtl/core/VX_wctl_unit.sv index 132f679d4..bb85b70c9 100644 --- a/hw/rtl/core/VX_wctl_unit.sv +++ b/hw/rtl/core/VX_wctl_unit.sv @@ -50,9 +50,9 @@ module VX_wctl_unit import VX_gpu_pkg::*; #( wire is_bar = (execute_if.data.op_type == `INST_SFU_BAR); wire [`UP(LANE_BITS)-1:0] tid; - if (LANE_BITS != 0) begin + if (LANE_BITS != 0) begin : g_tid assign tid = execute_if.data.tid[0 +: LANE_BITS]; - end else begin + end else begin : g_no_tid assign tid = 0; end @@ -63,7 +63,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #( wire not_pred = execute_if.data.op_args.wctl.is_neg; wire [NUM_LANES-1:0] taken; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_taken assign taken[i] = (execute_if.data.rs1_data[i][0] ^ not_pred); end @@ -131,7 +131,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #( // wspawn wire [`NUM_WARPS-1:0] wspawn_wmask; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_wspawn_wmask assign wspawn_wmask[i] = (i < rs1_data[`NW_BITS:0]) && (i != execute_if.data.wid); end assign wspawn.valid = is_wspawn; @@ -162,7 +162,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #( assign warp_ctl_if.sjoin = sjoin_r; assign warp_ctl_if.barrier = barrier_r; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_commit_if assign commit_if.data.data[i] = `XLEN'(dvstack_ptr); end diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index b3d1e099a..2d0d52753 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -58,7 +58,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in assign data_in[i][0 +: 32] = dataa[i]; assign data_in[i][32 +: `INST_FRM_BITS] = frm; assign data_in[i][32 + `INST_FRM_BITS +: 1] = is_itof; @@ -92,12 +92,12 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( `UNUSED_VAR (pe_data_in) - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end - for (genvar i = 0; i < NUM_PES; ++i) begin : fcvt_units + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fcvt_units VX_fcvt_unit #( .LATENCY (`LATENCY_FCVT), .OUT_REG (1) diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index 1a1da2758..2238307a6 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -56,7 +56,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in assign data_in[i][0 +: 32] = dataa[i]; assign data_in[i][32 +: 32] = datab[i]; assign data_in[i][64 +: `INST_FRM_BITS] = frm; @@ -89,7 +89,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `UNUSED_VAR (pe_data_in) - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end @@ -98,7 +98,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `ifdef QUARTUS - for (genvar i = 0; i < NUM_PES; ++i) begin : fdivs + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fdivs acl_fdiv fdiv ( .clk (clk), .areset (1'b0), @@ -116,7 +116,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `elsif VIVADO - for (genvar i = 0; i < NUM_PES; ++i) begin : fdivs + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fdivs wire [3:0] tuser; xil_fdiv fdiv ( .aclk (clk), @@ -138,7 +138,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `else - for (genvar i = 0; i < NUM_PES; ++i) begin : fdivs + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fdivs reg [63:0] r; `UNUSED_VAR (r) fflags_t f; diff --git a/hw/rtl/fpu/VX_fpu_dpi.sv b/hw/rtl/fpu/VX_fpu_dpi.sv index 9670241b3..e900e105c 100644 --- a/hw/rtl/fpu/VX_fpu_dpi.sv +++ b/hw/rtl/fpu/VX_fpu_dpi.sv @@ -124,7 +124,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( end generate - begin : fma + begin : g_fma reg [NUM_LANES-1:0][`XLEN-1:0] result_fma; reg [NUM_LANES-1:0][63:0] result_fadd; @@ -200,7 +200,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( endgenerate generate - begin : fdiv + begin : g_fdiv reg [NUM_LANES-1:0][`XLEN-1:0] result_fdiv_r; reg [NUM_LANES-1:0][63:0] result_fdiv; @@ -239,7 +239,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( endgenerate generate - begin : fsqrt + begin : g_fsqrt reg [NUM_LANES-1:0][`XLEN-1:0] result_fsqrt_r; reg [NUM_LANES-1:0][63:0] result_fsqrt; @@ -278,7 +278,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( endgenerate generate - begin : fcvt + begin : g_fcvt reg [NUM_LANES-1:0][`XLEN-1:0] result_fcvt; reg [NUM_LANES-1:0][63:0] result_itof; @@ -342,7 +342,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( endgenerate generate - begin : fncp + begin : g_fncp reg [NUM_LANES-1:0][`XLEN-1:0] result_fncp; reg [NUM_LANES-1:0][63:0] result_fclss; @@ -449,7 +449,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( wire [NUM_FPC-1:0][RSP_DATAW-1:0] per_core_data_out; - for (genvar i = 0; i < NUM_FPC; ++i) begin + for (genvar i = 0; i < NUM_FPC; ++i) begin : g_per_core_data_out assign per_core_data_out[i] = {per_core_result[i], per_core_has_fflags[i], per_core_fflags[i], per_core_tag_out[i]}; end diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index a04f96c3b..af75c8a75 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -83,7 +83,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( wire [NUM_LANES-1:0][31:0] datab_s; wire [NUM_LANES-1:0][31:0] datac_s; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data assign dataa_s[i] = dataa[i][31:0]; assign datab_s[i] = datab[i][31:0]; assign datac_s[i] = datac[i][31:0]; @@ -111,7 +111,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .ready_out (per_core_ready_in) ); - for (genvar i = 0; i < NUM_FPCORES; ++i) begin + for (genvar i = 0; i < NUM_FPCORES; ++i) begin : g_per_core_data_in assign { per_core_mask_in[i], per_core_tag_in[i], @@ -211,7 +211,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .ready_out (div_sqrt_ready_in) ); - for (genvar i = 0; i < 2; ++i) begin + for (genvar i = 0; i < 2; ++i) begin : g_div_sqrt_data_in assign { div_sqrt_mask_in[i], div_sqrt_tag_in[i], @@ -271,7 +271,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( ); wire [1:0][RSP_DATAW-1:0] div_sqrt_arb_data_in; - for (genvar i = 0; i < 2; ++i) begin + for (genvar i = 0; i < 2; ++i) begin : g_div_sqrt_arb_data_in assign div_sqrt_arb_data_in[i] = { div_sqrt_result[i], div_sqrt_has_fflags[i], @@ -403,7 +403,7 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( `UNUSED_PIN (sel_out) ); - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result `ifdef FPU_RV64F reg [`XLEN-1:0] result_w; always @(*) begin diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index 8ab5b10b3..e793ff55b 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -63,7 +63,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( reg [NUM_LANES-1:0][31:0] a, b, c; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_select always @(*) begin if (is_madd) begin // MADD / MSUB / NMADD / NMSUB @@ -86,7 +86,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( end end - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in assign data_in[i][0 +: 32] = a[i]; assign data_in[i][32 +: 32] = b[i]; assign data_in[i][64 +: 32] = c[i]; @@ -120,7 +120,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `UNUSED_VAR (pe_data_in) - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end @@ -129,7 +129,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `ifdef QUARTUS - for (genvar i = 0; i < NUM_PES; ++i) begin : fmas + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fmas acl_fmadd fmadd ( .clk (clk), .areset (1'b0), @@ -147,7 +147,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `elsif VIVADO - for (genvar i = 0; i < NUM_PES; ++i) begin : fmas + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fmas wire [2:0] tuser; xil_fma fma ( @@ -172,7 +172,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `else - for (genvar i = 0; i < NUM_PES; ++i) begin : fmas + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fmas reg [63:0] r; `UNUSED_VAR (r) fflags_t f; diff --git a/hw/rtl/fpu/VX_fpu_fpnew.sv b/hw/rtl/fpu/VX_fpu_fpnew.sv index 030ae3557..15a6c8d52 100644 --- a/hw/rtl/fpu/VX_fpu_fpnew.sv +++ b/hw/rtl/fpu/VX_fpu_fpnew.sv @@ -162,7 +162,7 @@ module VX_fpu_fpnew end `UNUSED_VAR (mask_in) - for (genvar i = 0; i < NUM_LANES; ++i) begin : fpnew_cores + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_fpnew_coreses wire [(TAG_WIDTH+1)-1:0] fpu_tag; wire fpu_valid_out_uq; wire fpu_ready_in_uq; @@ -201,7 +201,7 @@ module VX_fpu_fpnew `UNUSED_PIN (busy_o) ); - if (i == 0) begin + if (i == 0) begin : g_output_0 assign {fpu_tag_out, fpu_has_fflags_out} = fpu_tag; assign fpu_valid_out = fpu_valid_out_uq; assign fpu_ready_in = fpu_ready_in_uq; diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index e39af4296..21162dd6c 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -57,7 +57,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in assign data_in[i][0 +: 32] = dataa[i]; assign data_in[i][32 +: 32] = datab[i]; assign data_in[i][64 +: `INST_FRM_BITS] = frm; @@ -91,12 +91,12 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( `UNUSED_VAR (pe_data_in) - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end - for (genvar i = 0; i < NUM_PES; ++i) begin : fncp_units + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fncp_units VX_fncp_unit #( .LATENCY (`LATENCY_FNCP), .OUT_REG (1) diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index 557e21f20..fbfb86175 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -55,7 +55,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in assign data_in[i][0 +: 32] = dataa[i]; assign data_in[i][32 +: `INST_FRM_BITS] = frm; end @@ -87,7 +87,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `UNUSED_VAR (pe_data_in) - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end @@ -96,7 +96,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `ifdef QUARTUS - for (genvar i = 0; i < NUM_PES; ++i) begin : fsqrts + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fsqrts acl_fsqrt fsqrt ( .clk (clk), .areset (1'b0), @@ -113,7 +113,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `elsif VIVADO - for (genvar i = 0; i < NUM_PES; ++i) begin : fsqrts + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fsqrts wire tuser; xil_fsqrt fsqrt ( @@ -134,7 +134,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `else - for (genvar i = 0; i < NUM_PES; ++i) begin : fsqrts + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fsqrts reg [63:0] r; `UNUSED_VAR (r) fflags_t f; diff --git a/hw/rtl/libs/VX_avs_adapter.sv b/hw/rtl/libs/VX_avs_adapter.sv index 61322f673..8d308ec36 100644 --- a/hw/rtl/libs/VX_avs_adapter.sv +++ b/hw/rtl/libs/VX_avs_adapter.sv @@ -67,19 +67,19 @@ module VX_avs_adapter #( wire [BANK_OFFSETW-1:0] req_bank_off; wire [NUM_BANKS-1:0] bank_req_ready; - if (NUM_BANKS > 1) begin + if (NUM_BANKS > 1) begin : g_bank_sel assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; - end else begin + end else begin : g_bank_sel assign req_bank_sel = '0; end assign req_bank_off = mem_req_addr[ADDR_WIDTH-1:LOG2_NUM_BANKS]; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_queue_push assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i); end - for (genvar i = 0; i < NUM_BANKS; ++i) begin : pending_sizes + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_pending_sizes VX_pending_size #( .SIZE (RD_QUEUE_SIZE) ) pending_size ( @@ -95,7 +95,7 @@ module VX_avs_adapter #( ); end - for (genvar i = 0; i < NUM_BANKS; ++i) begin : rd_req_queues + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_rd_req_queues VX_fifo_queue #( .DATAW (TAG_WIDTH), .DEPTH (RD_QUEUE_SIZE) @@ -114,7 +114,7 @@ module VX_avs_adapter #( ); end - for (genvar i = 0; i < NUM_BANKS; ++i) begin : req_out_bufs + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_out_bufs wire valid_out; wire rw_out; wire [DATA_SIZE-1:0] byteen_out; @@ -151,11 +151,7 @@ module VX_avs_adapter #( assign bank_req_ready[i] = ready_out_w && ~req_queue_going_full[i]; end - if (NUM_BANKS > 1) begin - assign mem_req_ready = bank_req_ready[req_bank_sel]; - end else begin - assign mem_req_ready = bank_req_ready; - end + assign mem_req_ready = bank_req_ready[req_bank_sel]; // Responses handling ///////////////////////////////////////////////////// @@ -166,7 +162,7 @@ module VX_avs_adapter #( wire [NUM_BANKS-1:0][DATA_WIDTH-1:0] rsp_queue_data_out; wire [NUM_BANKS-1:0] rsp_queue_empty; - for (genvar i = 0; i < NUM_BANKS; ++i) begin : rd_rsp_queues + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_rd_rsp_queues VX_fifo_queue #( .DATAW (DATA_WIDTH), .DEPTH (RD_QUEUE_SIZE) @@ -185,8 +181,8 @@ module VX_avs_adapter #( ); end - for (genvar i = 0; i < NUM_BANKS; ++i) begin - assign rsp_arb_valid_in[i] = !rsp_queue_empty[i]; + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_rsp_arbs + assign rsp_arb_valid_in[i] = ~rsp_queue_empty[i]; assign rsp_arb_data_in[i] = {rsp_queue_data_out[i], req_queue_tag_out[i]}; assign req_queue_pop[i] = rsp_arb_valid_in[i] && rsp_arb_ready_in[i]; end diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index 25ce1081b..4755764a4 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -95,9 +95,9 @@ module VX_axi_adapter #( wire [BANK_ADDRW-1:0] req_bank_sel; - if (NUM_BANKS > 1) begin + if (NUM_BANKS > 1) begin : g_req_bank_sel assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; - end else begin + end else begin : g_req_bank_sel_0 assign req_bank_sel = '0; end @@ -106,7 +106,7 @@ module VX_axi_adapter #( reg [NUM_BANKS-1:0] m_axi_aw_ack; reg [NUM_BANKS-1:0] m_axi_w_ack; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_m_axi_w wire m_axi_aw_fire = m_axi_awvalid[i] && m_axi_awready[i]; wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i]; always @(posedge clk) begin @@ -129,20 +129,16 @@ module VX_axi_adapter #( wire axi_write_ready [NUM_BANKS]; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i]) && (m_axi_wready[i] || m_axi_w_ack[i]); end - // Vortex request ack - if (NUM_BANKS > 1) begin - assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]; - end else begin - assign mem_req_ready = mem_req_rw ? axi_write_ready[0] : m_axi_arready[0]; - end + // request ack + assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]; // AXI write request address channel - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i]; assign m_axi_awaddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE; assign m_axi_awid[i] = mem_req_tag; @@ -157,7 +153,7 @@ module VX_axi_adapter #( end // AXI write request data channel - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_data assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_w_ack[i]; assign m_axi_wdata[i] = mem_req_data; assign m_axi_wstrb[i] = mem_req_byteen; @@ -165,7 +161,7 @@ module VX_axi_adapter #( end // AXI write response channel (ignore) - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_rsp `UNUSED_VAR (m_axi_bvalid[i]) `UNUSED_VAR (m_axi_bid[i]) `UNUSED_VAR (m_axi_bresp[i]) @@ -174,7 +170,7 @@ module VX_axi_adapter #( end // AXI read request channel - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i); assign m_axi_araddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE; assign m_axi_arid[i] = mem_req_tag; @@ -196,7 +192,7 @@ module VX_axi_adapter #( `UNUSED_VAR (m_axi_rlast) - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_rsp assign rsp_arb_valid_in[i] = m_axi_rvalid[i]; assign rsp_arb_data_in[i] = {m_axi_rdata[i], m_axi_rid[i]}; assign m_axi_rready[i] = rsp_arb_ready_in[i]; diff --git a/hw/rtl/libs/VX_bits_insert.sv b/hw/rtl/libs/VX_bits_insert.sv index f0f00a2b5..dee8141bb 100644 --- a/hw/rtl/libs/VX_bits_insert.sv +++ b/hw/rtl/libs/VX_bits_insert.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,19 +19,19 @@ module VX_bits_insert #( parameter S = 1, parameter POS = 0 ) ( - input wire [N-1:0] data_in, - input wire [`UP(S)-1:0] ins_in, + input wire [N-1:0] data_in, + input wire [`UP(S)-1:0] ins_in, output wire [N+S-1:0] data_out -); - if (S == 0) begin +); + if (S == 0) begin : g_passthru `UNUSED_VAR (ins_in) assign data_out = data_in; - end else begin - if (POS == 0) begin + end else begin : g_insert + if (POS == 0) begin : g_pos_0 assign data_out = {data_in, ins_in}; - end else if (POS == N) begin + end else if (POS == N) begin : g_pos_N assign data_out = {ins_in, data_in}; - end else begin + end else begin : g_pos assign data_out = {data_in[N-1:POS], ins_in, data_in[POS-1:0]}; end end diff --git a/hw/rtl/libs/VX_bits_remove.sv b/hw/rtl/libs/VX_bits_remove.sv index bc2f60a70..159bd4993 100644 --- a/hw/rtl/libs/VX_bits_remove.sv +++ b/hw/rtl/libs/VX_bits_remove.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,17 +19,19 @@ module VX_bits_remove #( parameter S = 1, parameter POS = 0 ) ( - input wire [N-1:0] data_in, + input wire [N-1:0] data_in, output wire [N-S-1:0] data_out ); `STATIC_ASSERT (((0 == S) || ((POS + S) <= N)), ("invalid parameter")) - - if (POS == 0 || S == 0) begin + + if (S == 0) begin : g_passthru + assign data_out = data_in; + end else if (POS == 0) begin : g_pos_0 assign data_out = data_in[N-1:S]; - end else if ((POS + S) < N) begin - assign data_out = {data_in[N-1:(POS+S)], data_in[POS-1:0]}; - end else begin + end else if ((POS + S) == N) begin : g_pos_N assign data_out = data_in[POS-1:0]; + end else begin : g_pos + assign data_out = {data_in[N-1:(POS+S)], data_in[POS-1:0]}; end `UNUSED_VAR (data_in) diff --git a/hw/rtl/libs/VX_bypass_buffer.sv b/hw/rtl/libs/VX_bypass_buffer.sv index 4eefce440..14079395b 100644 --- a/hw/rtl/libs/VX_bypass_buffer.sv +++ b/hw/rtl/libs/VX_bypass_buffer.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,30 +25,25 @@ module VX_bypass_buffer #( parameter DATAW = 1, parameter PASSTHRU = 0 -) ( +) ( input wire clk, input wire reset, input wire valid_in, - output wire ready_in, + output wire ready_in, input wire [DATAW-1:0] data_in, output wire [DATAW-1:0] data_out, input wire ready_out, output wire valid_out -); - if (PASSTHRU != 0) begin - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - assign ready_in = ready_out; - assign valid_out = valid_in; - assign data_out = data_in; - end else begin +); + if (PASSTHRU == 0) begin : g_buffer + reg [DATAW-1:0] buffer; reg has_data; always @(posedge clk) begin if (reset) begin has_data <= 0; - end else begin + end else begin if (ready_out) begin has_data <= 0; end else if (~has_data) begin @@ -63,7 +58,16 @@ module VX_bypass_buffer #( assign ready_in = ready_out || ~has_data; assign data_out = has_data ? buffer : data_in; assign valid_out = valid_in || has_data; - end + + end else begin : g_passthru + + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + assign ready_in = ready_out; + assign valid_out = valid_in; + assign data_out = data_in; + + end else endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index 167042a3a..ff803b910 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -26,7 +26,7 @@ module VX_cyclic_arbiter #( output wire grant_valid, input wire grant_ready ); - if (NUM_REQS == 1) begin + if (NUM_REQS == 1) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -36,7 +36,7 @@ module VX_cyclic_arbiter #( assign grant_onehot = requests; assign grant_valid = requests[0]; - end else begin + end else begin : g_arbiter localparam IS_POW2 = (1 << LOG_NUM_REQS) == NUM_REQS; diff --git a/hw/rtl/libs/VX_decoder.sv b/hw/rtl/libs/VX_decoder.sv index 45b37b1db..c5c7b8706 100644 --- a/hw/rtl/libs/VX_decoder.sv +++ b/hw/rtl/libs/VX_decoder.sv @@ -27,14 +27,14 @@ module VX_decoder #( input wire [M-1:0] valid_in, output wire [D-1:0][M-1:0] data_out ); - if (MODEL == 1) begin + if (MODEL == 1) begin : g_model1 reg [D-1:0][M-1:0] data_out_w; always @(*) begin data_out_w = '0; data_out_w[data_in] = valid_in; end assign data_out = data_out_w; - end else begin + end else begin : g_model0 assign data_out = (D*M)'(valid_in) << (data_in * M); end diff --git a/hw/rtl/libs/VX_divider.sv b/hw/rtl/libs/VX_divider.sv index 551940da1..b8424843d 100644 --- a/hw/rtl/libs/VX_divider.sv +++ b/hw/rtl/libs/VX_divider.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,7 +24,7 @@ module VX_divider #( parameter LATENCY = 0 ) ( input wire clk, - input wire enable, + input wire enable, input wire [N_WIDTH-1:0] numer, input wire [D_WIDTH-1:0] denom, output wire [Q_WIDTH-1:0] quotient, @@ -37,7 +37,7 @@ module VX_divider #( wire [D_WIDTH-1:0] remainder_unqual; lpm_divide divide ( - .clock (clk), + .clock (clk), .clken (enable), .numer (numer), .denom (denom), @@ -47,7 +47,7 @@ module VX_divider #( defparam divide.lpm_type = "LPM_DIVIDE", - divide.lpm_widthn = N_WIDTH, + divide.lpm_widthn = N_WIDTH, divide.lpm_widthd = D_WIDTH, divide.lpm_nrepresentation = N_SIGNED ? "SIGNED" : "UNSIGNED", divide.lpm_drepresentation = D_SIGNED ? "SIGNED" : "UNSIGNED", @@ -62,36 +62,36 @@ module VX_divider #( reg [N_WIDTH-1:0] quotient_unqual; reg [D_WIDTH-1:0] remainder_unqual; - always @(*) begin + always @(*) begin begin if (N_SIGNED && D_SIGNED) begin quotient_unqual = $signed(numer) / $signed(denom); remainder_unqual = $signed(numer) % $signed(denom); - end + end else if (N_SIGNED && !D_SIGNED) begin quotient_unqual = $signed(numer) / denom; remainder_unqual = $signed(numer) % denom; - end + end else if (!N_SIGNED && D_SIGNED) begin quotient_unqual = numer / $signed(denom); remainder_unqual = numer % $signed(denom); - end + end else begin quotient_unqual = numer / denom; - remainder_unqual = numer % denom; + remainder_unqual = numer % denom; end end end - if (LATENCY == 0) begin + if (LATENCY == 0) begin : g_comb assign quotient = quotient_unqual [Q_WIDTH-1:0]; assign remainder = remainder_unqual [R_WIDTH-1:0]; - end else begin + end else begin : g_pipe reg [N_WIDTH-1:0] quotient_pipe [LATENCY-1:0]; reg [D_WIDTH-1:0] remainder_pipe [LATENCY-1:0]; - for (genvar i = 0; i < LATENCY; ++i) begin - always @(posedge clk) begin + for (genvar i = 0; i < LATENCY; ++i) begin : g_reg + always @(posedge clk) begin if (enable) begin quotient_pipe[i] <= (0 == i) ? quotient_unqual : quotient_pipe[i-1]; remainder_pipe[i] <= (0 == i) ? remainder_unqual : remainder_pipe[i-1]; @@ -101,7 +101,7 @@ module VX_divider #( assign quotient = quotient_pipe[LATENCY-1][Q_WIDTH-1:0]; assign remainder = remainder_pipe[LATENCY-1][R_WIDTH-1:0]; - end + end `endif diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 49f37caff..21ab03ad5 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -44,10 +44,10 @@ module VX_dp_ram #( `STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter")) `define RAM_INITIALIZATION \ - if (INIT_ENABLE != 0) begin \ - if (INIT_FILE != "") begin \ + if (INIT_ENABLE != 0) begin : g_init \ + if (INIT_FILE != "") begin : g_file \ initial $readmemh(INIT_FILE, ram); \ - end else begin \ + end else begin : g_value \ initial begin \ for (integer i = 0; i < SIZE; ++i) \ ram[i] = INIT_VALUE; \ @@ -58,17 +58,15 @@ module VX_dp_ram #( `UNUSED_PARAM (RW_ASSERT) `UNUSED_VAR (read) - if (WRENW > 1) begin - `RUNTIME_ASSERT(~write || (| wren), ("%t: invalid write enable mask", $time)) - end + `RUNTIME_ASSERT((((WRENW == 1) ) || ~write) || (| wren), ("%t: invalid write enable mask", $time)) - if (OUT_REG && !READ_ENABLE) begin + if (OUT_REG && !READ_ENABLE) begin : g_out_reg `UNUSED_PARAM (NO_RWCHECK) reg [DATAW-1:0] rdata_r; wire cs = read || write; - if (WRENW != 1) begin + if (WRENW != 1) begin : g_writeen `ifdef QUARTUS - if (LUTRAM != 0) begin + if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -86,7 +84,7 @@ module VX_dp_ram #( end end end - end else begin + end else begin : g_no_lutram reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -107,7 +105,7 @@ module VX_dp_ram #( end `else // default synthesis - if (LUTRAM != 0) begin + if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -125,7 +123,7 @@ module VX_dp_ram #( end end end - end else begin + end else begin : g_no_lutram reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -145,8 +143,8 @@ module VX_dp_ram #( end end `endif - end else begin - if (LUTRAM != 0) begin + end else begin : g_no_writeen + if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -161,7 +159,7 @@ module VX_dp_ram #( end end - end else begin + end else begin : g_no_lutram reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -178,13 +176,13 @@ module VX_dp_ram #( end end assign rdata = rdata_r; - end else begin + end else begin : g_no_out_reg // OUT_REG==0 || READ_ENABLE=1 wire [DATAW-1:0] rdata_w; `ifdef SYNTHESIS - if (WRENW > 1) begin + if (WRENW > 1) begin : g_writeen `ifdef QUARTUS - if (LUTRAM != 0) begin + if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -196,8 +194,8 @@ module VX_dp_ram #( end end assign rdata_w = ram[raddr]; - end else begin - if (NO_RWCHECK != 0) begin + end else begin : g_no_lutram + if (NO_RWCHECK != 0) begin : g_no_rwcheck `NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -209,7 +207,7 @@ module VX_dp_ram #( end end assign rdata_w = ram[raddr]; - end else begin + end else begin : g_rwcheck reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -225,7 +223,7 @@ module VX_dp_ram #( end `else // default synthesis - if (LUTRAM != 0) begin + if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -237,8 +235,8 @@ module VX_dp_ram #( end end assign rdata_w = ram[raddr]; - end else begin - if (NO_RWCHECK != 0) begin + end else begin : g_no_lutram + if (NO_RWCHECK != 0) begin : g_no_rwcheck `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -250,7 +248,7 @@ module VX_dp_ram #( end end assign rdata_w = ram[raddr]; - end else begin + end else begin : g_rwcheck reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -265,9 +263,9 @@ module VX_dp_ram #( end end `endif - end else begin + end else begin : g_no_writeen // (WRENW == 1) - if (LUTRAM != 0) begin + if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -276,8 +274,8 @@ module VX_dp_ram #( end end assign rdata_w = ram[raddr]; - end else begin - if (NO_RWCHECK != 0) begin + end else begin : g_no_lutram + if (NO_RWCHECK != 0) begin : g_no_rwcheck `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -286,7 +284,7 @@ module VX_dp_ram #( end end assign rdata_w = ram[raddr]; - end else begin + end else begin : g_rwcheck reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin @@ -304,7 +302,7 @@ module VX_dp_ram #( `RAM_INITIALIZATION wire [DATAW-1:0] ram_n; - for (genvar i = 0; i < WRENW; ++i) begin + for (genvar i = 0; i < WRENW; ++i) begin : g_ram_n assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; end @@ -320,9 +318,7 @@ module VX_dp_ram #( end end - if (LUTRAM || !NO_RWCHECK) begin - assign rdata_w = ram[raddr]; - end else begin + if (!LUTRAM && NO_RWCHECK) begin : g_rdata_no_bypass reg [DATAW-1:0] prev_data; reg [ADDRW-1:0] prev_waddr; reg prev_write; @@ -340,13 +336,15 @@ module VX_dp_ram #( end assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; - if (RW_ASSERT) begin + if (RW_ASSERT) begin : g_rw_assert `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)) end + end else begin : g_rdata_with_bypass + assign rdata_w = ram[raddr]; end `endif - if (OUT_REG != 0) begin + if (OUT_REG != 0) begin : g_rdata_req reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (READ_ENABLE && reset) begin @@ -356,7 +354,7 @@ module VX_dp_ram #( end end assign rdata = rdata_r; - end else begin + end else begin : g_rdata_comb assign rdata = rdata_w; end diff --git a/hw/rtl/libs/VX_elastic_buffer.sv b/hw/rtl/libs/VX_elastic_buffer.sv index 3bfcdeb9c..5067a4dd3 100644 --- a/hw/rtl/libs/VX_elastic_buffer.sv +++ b/hw/rtl/libs/VX_elastic_buffer.sv @@ -31,7 +31,7 @@ module VX_elastic_buffer #( input wire ready_out, output wire valid_out ); - if (SIZE == 0) begin + if (SIZE == 0) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -40,7 +40,7 @@ module VX_elastic_buffer #( assign data_out = data_in; assign ready_in = ready_out; - end else if (SIZE == 1) begin + end else if (SIZE == 1) begin : g_eb1 VX_pipe_buffer #( .DATAW (DATAW), @@ -56,7 +56,7 @@ module VX_elastic_buffer #( .ready_out (ready_out) ); - end else if (SIZE == 2 && LUTRAM == 0) begin + end else if (SIZE == 2 && LUTRAM == 0) begin : g_eb2 wire valid_out_t; wire [DATAW-1:0] data_out_t; @@ -90,7 +90,7 @@ module VX_elastic_buffer #( .ready_out (ready_out) ); - end else begin + end else begin : g_ebN wire empty, full; diff --git a/hw/rtl/libs/VX_encoder.sv b/hw/rtl/libs/VX_encoder.sv index 85d72ce52..ed65ed4f6 100644 --- a/hw/rtl/libs/VX_encoder.sv +++ b/hw/rtl/libs/VX_encoder.sv @@ -27,17 +27,17 @@ module VX_encoder #( output wire [LN-1:0] data_out, output wire valid_out ); - if (N == 1) begin + if (N == 1) begin : g_n1 assign data_out = 0; assign valid_out = data_in; - end else if (N == 2) begin + end else if (N == 2) begin : g_n2 assign data_out = data_in[!REVERSE]; assign valid_out = (| data_in); - end else if (MODEL == 1) begin + end else if (MODEL == 1) begin : g_model1 localparam M = 1 << LN; `IGNORE_UNOPTFLAT_BEGIN wire [LN-1:0][M-1:0] addr; @@ -47,21 +47,19 @@ module VX_encoder #( // base case, also handle padding for non-power of two inputs assign v[0] = REVERSE ? (M'(data_in) << (M - N)) : M'(data_in); - for (genvar lvl = 1; lvl < (LN+1); ++lvl) begin + for (genvar lvl = 1; lvl < (LN+1); ++lvl) begin : g_scan_l localparam SN = 1 << (LN - lvl); localparam SI = M / SN; localparam SW = lvl; - for (genvar s = 0; s < SN; ++s) begin + for (genvar s = 0; s < SN; ++s) begin : g_scan_s `IGNORE_UNOPTFLAT_BEGIN wire [1:0] vs = {v[lvl-1][s*SI+(SI>>1)], v[lvl-1][s*SI]}; `IGNORE_UNOPTFLAT_END - assign v[lvl][s*SI] = (| vs); - - if (lvl == 1) begin + if (lvl == 1) begin : g_lvl_1 assign addr[lvl-1][s*SI +: SW] = vs[!REVERSE]; - end else begin + end else begin : g_lvl_n assign addr[lvl-1][s*SI +: SW] = { vs[!REVERSE], addr[lvl-2][s*SI +: SW-1] | addr[lvl-2][s*SI+(SI>>1) +: SW-1] @@ -73,11 +71,11 @@ module VX_encoder #( assign data_out = addr[LN-1][LN-1:0]; assign valid_out = v[LN][0]; - end else if (MODEL == 2 && REVERSE == 0) begin + end else if (MODEL == 2 && REVERSE == 0) begin : g_model2 - for (genvar j = 0; j < LN; ++j) begin + for (genvar j = 0; j < LN; ++j) begin : g_data_out wire [N-1:0] mask; - for (genvar i = 0; i < N; ++i) begin + for (genvar i = 0; i < N; ++i) begin : g_mask assign mask[i] = i[j]; end assign data_out[j] = | (mask & data_in); @@ -85,11 +83,11 @@ module VX_encoder #( assign valid_out = (| data_in); - end else begin + end else begin : g_model0 reg [LN-1:0] index_w; - if (REVERSE != 0) begin + if (REVERSE != 0) begin : g_msb always @(*) begin index_w = 'x; for (integer i = N-1; i >= 0; --i) begin @@ -98,7 +96,7 @@ module VX_encoder #( end end end - end else begin + end else begin : g_lsb always @(*) begin index_w = 'x; for (integer i = 0; i < N; ++i) begin diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index dd772ea73..7eb760e6b 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -58,7 +58,7 @@ module VX_fifo_queue #( .size (size) ); - if (DEPTH == 1) begin + if (DEPTH == 1) begin : g_depth_1 reg [DATAW-1:0] head_r; @@ -70,11 +70,11 @@ module VX_fifo_queue #( assign data_out = head_r; - end else begin + end else begin : g_depth_n localparam ADDRW = `CLOG2(DEPTH); - if (OUT_REG != 0) begin + if (OUT_REG != 0) begin : g_out_reg wire [DATAW-1:0] dout; reg [DATAW-1:0] dout_r; @@ -128,7 +128,7 @@ module VX_fifo_queue #( assign data_out = dout_r; - end else begin + end else begin : g_no_out_reg reg [ADDRW-1:0] rd_ptr_r; reg [ADDRW-1:0] wr_ptr_r; diff --git a/hw/rtl/libs/VX_find_first.sv b/hw/rtl/libs/VX_find_first.sv index 18f345855..43666737c 100644 --- a/hw/rtl/libs/VX_find_first.sv +++ b/hw/rtl/libs/VX_find_first.sv @@ -33,20 +33,20 @@ module VX_find_first #( wire [TN-1:0][DATAW-1:0] d_n; `IGNORE_WARNINGS_END - for (genvar i = 0; i < N; ++i) begin + for (genvar i = 0; i < N; ++i) begin : g_reverse assign s_n[TL+i] = REVERSE ? valid_in[N-1-i] : valid_in[i]; assign d_n[TL+i] = REVERSE ? data_in[N-1-i] : data_in[i]; end - if (TL < (TN-N)) begin - for (genvar i = TL+N; i < TN; ++i) begin + if (TL < (TN-N)) begin : g_fill + for (genvar i = TL+N; i < TN; ++i) begin : g_i assign s_n[i] = 0; assign d_n[i] = '0; end end - for (genvar j = 0; j < LOGN; ++j) begin - for (genvar i = 0; i < (2**j); ++i) begin + for (genvar j = 0; j < LOGN; ++j) begin : g_scan + for (genvar i = 0; i < (2**j); ++i) begin : g_i assign s_n[2**j-1+i] = s_n[2**(j+1)-1+i*2] | s_n[2**(j+1)-1+i*2+1]; assign d_n[2**j-1+i] = s_n[2**(j+1)-1+i*2] ? d_n[2**(j+1)-1+i*2] : d_n[2**(j+1)-1+i*2+1]; end diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index 5cc9a9aab..5e090ebdd 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -27,7 +27,7 @@ module VX_generic_arbiter #( output wire grant_valid, input wire grant_ready ); - if (TYPE == "P") begin + if (TYPE == "P") begin : g_priority `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -42,7 +42,7 @@ module VX_generic_arbiter #( .grant_onehot (grant_onehot) ); - end else if (TYPE == "R") begin + end else if (TYPE == "R") begin : g_round_robin VX_rr_arbiter #( .NUM_REQS (NUM_REQS) @@ -56,7 +56,7 @@ module VX_generic_arbiter #( .grant_ready (grant_ready) ); - end else if (TYPE == "M") begin + end else if (TYPE == "M") begin : g_matrix VX_matrix_arbiter #( .NUM_REQS (NUM_REQS) @@ -70,7 +70,7 @@ module VX_generic_arbiter #( .grant_ready (grant_ready) ); - end else if (TYPE == "C") begin + end else if (TYPE == "C") begin : g_cyclic VX_cyclic_arbiter #( .NUM_REQS (NUM_REQS) @@ -84,7 +84,7 @@ module VX_generic_arbiter #( .grant_ready (grant_ready) ); - end else begin + end else begin : g_invalid `ERROR(("invalid parameter")); diff --git a/hw/rtl/libs/VX_lzc.sv b/hw/rtl/libs/VX_lzc.sv index 2589bf5a7..af2cb650d 100644 --- a/hw/rtl/libs/VX_lzc.sv +++ b/hw/rtl/libs/VX_lzc.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,18 +23,18 @@ module VX_lzc #( output wire [LOGN-1:0] data_out, output wire valid_out ); - if (N == 1) begin + if (N == 1) begin : g_passthru `UNUSED_PARAM (REVERSE) assign data_out = '0; assign valid_out = data_in; - end else begin + end else begin : g_lzc wire [N-1:0][LOGN-1:0] indices; - for (genvar i = 0; i < N; ++i) begin + for (genvar i = 0; i < N; ++i) begin : g_indices assign indices[i] = REVERSE ? LOGN'(i) : LOGN'(N-1-i); end @@ -42,7 +42,7 @@ module VX_lzc #( .N (N), .DATAW (LOGN), .REVERSE (!REVERSE) - ) find_first ( + ) find_first ( .data_in (indices), .valid_in (data_in), .data_out (data_out), @@ -50,6 +50,6 @@ module VX_lzc #( ); end - + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_matrix_arbiter.sv b/hw/rtl/libs/VX_matrix_arbiter.sv index eff4eb7e1..2840ef43e 100644 --- a/hw/rtl/libs/VX_matrix_arbiter.sv +++ b/hw/rtl/libs/VX_matrix_arbiter.sv @@ -26,7 +26,7 @@ module VX_matrix_arbiter #( output wire grant_valid, input wire grant_ready ); - if (NUM_REQS == 1) begin + if (NUM_REQS == 1) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -36,32 +36,30 @@ module VX_matrix_arbiter #( assign grant_onehot = requests; assign grant_valid = requests[0]; - end else begin + end else begin : g_arbiter reg [NUM_REQS-1:1] state [NUM_REQS-1:0]; wire [NUM_REQS-1:0] pri [NUM_REQS-1:0]; wire [NUM_REQS-1:0] grant; - for (genvar r = 0; r < NUM_REQS; ++r) begin - for (genvar c = 0; c < NUM_REQS; ++c) begin - if (r > c) begin + for (genvar r = 0; r < NUM_REQS; ++r) begin : g_pri_r + for (genvar c = 0; c < NUM_REQS; ++c) begin : g_pri_c + if (r > c) begin : g_row assign pri[r][c] = requests[c] && state[c][r]; - end - else if (r < c) begin + end else if (r < c) begin : g_col assign pri[r][c] = requests[c] && !state[r][c]; - end - else begin + end else begin : g_equal assign pri[r][c] = 0; end end end - for (genvar r = 0; r < NUM_REQS; ++r) begin + for (genvar r = 0; r < NUM_REQS; ++r) begin : g_grant assign grant[r] = requests[r] && ~(| pri[r]); end - for (genvar r = 0; r < NUM_REQS; ++r) begin - for (genvar c = r + 1; c < NUM_REQS; ++c) begin + for (genvar r = 0; r < NUM_REQS; ++r) begin : g_state_r + for (genvar c = r + 1; c < NUM_REQS; ++c) begin : g_state_c always @(posedge clk) begin if (reset) begin state[r][c] <= '0; diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 6ee6060b8..5f32e1aa1 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -76,7 +76,7 @@ module VX_mem_adapter #( `UNUSED_VAR (mem_rsp_tag_out) - if (DST_LDATAW > SRC_LDATAW) begin + if (DST_LDATAW > SRC_LDATAW) begin : g_wider_dst_data `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -88,12 +88,12 @@ module VX_mem_adapter #( wire [P-1:0][SRC_DATA_WIDTH-1:0] mem_rsp_data_out_w = mem_rsp_data_out; - if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH - D)) begin + if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH - D)) begin : g_mem_req_addr_out_w_src `UNUSED_VAR (mem_req_addr_in_qual) assign mem_req_addr_out_w = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0]; - end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH - D)) begin + end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH - D)) begin : g_mem_req_addr_out_w_dst assign mem_req_addr_out_w = DST_ADDR_WIDTH'(mem_req_addr_in_qual); - end else begin + end else begin : g_mem_req_addr_out_w assign mem_req_addr_out_w = mem_req_addr_in_qual; end @@ -125,7 +125,7 @@ module VX_mem_adapter #( assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[SRC_TAG_WIDTH+D-1:D]); assign mem_rsp_ready_out = mem_rsp_ready_in_w; - end else if (DST_LDATAW < SRC_LDATAW) begin + end else if (DST_LDATAW < SRC_LDATAW) begin : g_wider_src_data reg [D-1:0] req_ctr, rsp_ctr; @@ -173,12 +173,12 @@ module VX_mem_adapter #( wire [SRC_ADDR_WIDTH+D-1:0] mem_req_addr_in_qual = {mem_req_addr_in, req_ctr}; - if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH + D)) begin + if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH + D)) begin : g_mem_req_addr_out_w_src `UNUSED_VAR (mem_req_addr_in_qual) assign mem_req_addr_out_w = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0]; - end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH + D)) begin + end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH + D)) begin : g_mem_req_addr_out_w_dst assign mem_req_addr_out_w = DST_ADDR_WIDTH'(mem_req_addr_in_qual); - end else begin + end else begin : g_mem_req_addr_out_w assign mem_req_addr_out_w = mem_req_addr_in_qual; end @@ -194,17 +194,17 @@ module VX_mem_adapter #( assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out); assign mem_rsp_ready_out = mem_rsp_ready_in_w; - end else begin + end else begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) - if (DST_ADDR_WIDTH < SRC_ADDR_WIDTH) begin + if (DST_ADDR_WIDTH < SRC_ADDR_WIDTH) begin : g_mem_req_addr_out_w_src `UNUSED_VAR (mem_req_addr_in) assign mem_req_addr_out_w = mem_req_addr_in[DST_ADDR_WIDTH-1:0]; - end else if (DST_ADDR_WIDTH > SRC_ADDR_WIDTH) begin + end else if (DST_ADDR_WIDTH > SRC_ADDR_WIDTH) begin : g_mem_req_addr_out_w_dst assign mem_req_addr_out_w = DST_ADDR_WIDTH'(mem_req_addr_in); - end else begin + end else begin : g_mem_req_addr_out_w assign mem_req_addr_out_w = mem_req_addr_in; end diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 84c417bd3..55cad2df7 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -115,11 +115,11 @@ module VX_mem_coalescer #( logic [NUM_REQS-1:0] req_rem_mask_r, req_rem_mask_n; wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] in_addr_offset; - for (genvar i = 0; i < NUM_REQS; i++) begin + for (genvar i = 0; i < NUM_REQS; i++) begin : g_in_addr_offset assign in_addr_offset[i] = in_req_addr[i][DATA_RATIO_W-1:0]; end - for (genvar i = 0; i < OUT_REQS; ++i) begin + for (genvar i = 0; i < OUT_REQS; ++i) begin : g_seed_gen wire [DATA_RATIO-1:0] batch_mask; wire [DATA_RATIO_W-1:0] batch_idx; @@ -135,16 +135,19 @@ module VX_mem_coalescer #( ); wire [DATA_RATIO-1:0][OUT_ADDR_WIDTH-1:0] addr_base; - wire [DATA_RATIO-1:0][FLAGS_WIDTH-1:0] req_flags; - for (genvar j = 0; j < DATA_RATIO; ++j) begin + for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_addr_base assign addr_base[j] = in_req_addr[DATA_RATIO * i + j][ADDR_WIDTH-1:DATA_RATIO_W]; + end + + wire [DATA_RATIO-1:0][FLAGS_WIDTH-1:0] req_flags; + for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_req_flags assign req_flags[j] = in_req_flags[DATA_RATIO * i + j]; end assign seed_addr_n[i] = addr_base[batch_idx]; assign seed_flags_n[i] = req_flags[batch_idx]; - for (genvar j = 0; j < DATA_RATIO; ++j) begin + for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_addr_matches_n assign addr_matches_n[i * DATA_RATIO + j] = (addr_base[j] == seed_addr_n[i]); end end @@ -291,15 +294,19 @@ module VX_mem_coalescer #( assign {ibuf_dout_tag, ibuf_dout_pmask, ibuf_dout_offset} = ibuf_dout; wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_rsp_data_n; - wire [NUM_REQS-1:0] in_rsp_mask_n; - - for (genvar i = 0; i < OUT_REQS; ++i) begin - for (genvar j = 0; j < DATA_RATIO; ++j) begin - assign in_rsp_mask_n[i * DATA_RATIO + j] = out_rsp_mask[i] && ibuf_dout_pmask[i * DATA_RATIO + j]; + for (genvar i = 0; i < OUT_REQS; ++i) begin : g_in_rsp_data_n + for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_j assign in_rsp_data_n[i * DATA_RATIO + j] = out_rsp_data[i][ibuf_dout_offset[i * DATA_RATIO + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH]; end end + wire [NUM_REQS-1:0] in_rsp_mask_n; + for (genvar i = 0; i < OUT_REQS; ++i) begin : g_in_rsp_mask_n + for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_j + assign in_rsp_mask_n[i * DATA_RATIO + j] = out_rsp_mask[i] && ibuf_dout_pmask[i * DATA_RATIO + j]; + end + end + assign in_rsp_valid = out_rsp_valid; assign in_rsp_mask = in_rsp_mask_n; assign in_rsp_data = in_rsp_data_n; @@ -310,11 +317,15 @@ module VX_mem_coalescer #( wire [`UP(UUID_WIDTH)-1:0] out_req_uuid; wire [`UP(UUID_WIDTH)-1:0] out_rsp_uuid; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_out_req_uuid assign out_req_uuid = out_req_tag[OUT_TAG_WIDTH-1 -: UUID_WIDTH]; - assign out_rsp_uuid = out_rsp_tag[OUT_TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_out_req_uuid_0 assign out_req_uuid = '0; + end + + if (UUID_WIDTH != 0) begin : g_out_rsp_uuid + assign out_rsp_uuid = out_rsp_tag[OUT_TAG_WIDTH-1 -: UUID_WIDTH]; + end else begin : g_out_rsp_uuid_0 assign out_rsp_uuid = '0; end diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 1dddaba11..9dada16bc 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -160,9 +160,9 @@ module VX_mem_scheduler #( wire reqq_ready_in; wire [REQQ_TAG_WIDTH-1:0] reqq_tag_u; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_reqq_tag_u_uuid assign reqq_tag_u = {core_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; - end else begin + end else begin : g_reqq_tag_u assign reqq_tag_u = ibuf_waddr; end @@ -220,7 +220,7 @@ module VX_mem_scheduler #( // Handle memory coalescing /////////////////////////////////////////////// - if (COALESCE_ENABLE) begin + if (COALESCE_ENABLE) begin : g_coalescer VX_mem_coalescer #( .INSTANCE_ID ($sformatf("%s-coalescer", INSTANCE_ID)), @@ -273,8 +273,7 @@ module VX_mem_scheduler #( .out_rsp_ready (mem_rsp_ready) ); - end else begin - + end else begin : g_no_coalescer assign reqq_valid_s = reqq_valid; assign reqq_mask_s = reqq_mask; assign reqq_rw_s = reqq_rw; @@ -303,16 +302,16 @@ module VX_mem_scheduler #( wire [BATCH_SEL_WIDTH-1:0] req_batch_idx; - for (genvar i = 0; i < MEM_BATCHES; ++i) begin - for (genvar j = 0; j < MEM_CHANNELS; ++j) begin + for (genvar i = 0; i < MEM_BATCHES; ++i) begin : g_mem_req_data_b + for (genvar j = 0; j < MEM_CHANNELS; ++j) begin : g_j localparam r = i * MEM_CHANNELS + j; - if (r < MERGED_REQS) begin + if (r < MERGED_REQS) begin : g_valid assign mem_req_mask_b[i][j] = reqq_mask_s[r]; assign mem_req_byteen_b[i][j] = reqq_byteen_s[r]; assign mem_req_addr_b[i][j] = reqq_addr_s[r]; assign mem_req_flags_b[i][j] = reqq_flags_s[r]; assign mem_req_data_b[i][j] = reqq_data_s[r]; - end else begin + end else begin : g_extra assign mem_req_mask_b[i][j] = 0; assign mem_req_byteen_b[i][j] = '0; assign mem_req_addr_b[i][j] = '0; @@ -329,7 +328,7 @@ module VX_mem_scheduler #( assign mem_req_flags_s = mem_req_flags_b[req_batch_idx]; assign mem_req_data_s = mem_req_data_b[req_batch_idx]; - if (MEM_BATCHES != 1) begin + if (MEM_BATCHES != 1) begin : g_batch reg [MEM_BATCH_BITS-1:0] req_batch_idx_r; wire is_degenerate_batch = ~(| mem_req_mask_s); @@ -354,7 +353,7 @@ module VX_mem_scheduler #( wire [MEM_BATCHES-1:0][MEM_BATCH_BITS-1:0] req_batch_idxs; wire [MEM_BATCH_BITS-1:0] req_batch_idx_last; - for (genvar i = 0; i < MEM_BATCHES; ++i) begin + for (genvar i = 0; i < MEM_BATCHES; ++i) begin : g_req_batch assign req_batch_valids[i] = (| mem_req_mask_b[i]); assign req_batch_idxs[i] = MEM_BATCH_BITS'(i); end @@ -375,7 +374,7 @@ module VX_mem_scheduler #( assign req_sent_all = mem_req_ready_b && (req_batch_idx_r == req_batch_idx_last); assign mem_req_tag_s = {reqq_tag_s, req_batch_idx}; - end else begin + end else begin : g_no_batch assign mem_req_valid_s = reqq_valid_s; assign req_batch_idx = '0; @@ -407,13 +406,13 @@ module VX_mem_scheduler #( wire [CORE_REQS-1:0] rsp_rem_mask_n, curr_mask; wire [BATCH_SEL_WIDTH-1:0] rsp_batch_idx; - if (CORE_BATCHES > 1) begin + if (CORE_BATCHES > 1) begin : g_rsp_batch_idx assign rsp_batch_idx = mem_rsp_tag_s[CORE_BATCH_BITS-1:0]; - end else begin + end else begin : g_rsp_batch_idx_0 assign rsp_batch_idx = '0; end - for (genvar r = 0; r < CORE_REQS; ++r) begin + for (genvar r = 0; r < CORE_REQS; ++r) begin : g_curr_mask localparam i = r / CORE_CHANNELS; localparam j = r % CORE_CHANNELS; assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j]; @@ -434,7 +433,7 @@ module VX_mem_scheduler #( end end - if (RSP_PARTIAL != 0) begin + if (RSP_PARTIAL != 0) begin : g_rsp_partial reg [CORE_QUEUE_SIZE-1:0] rsp_sop_r; @@ -451,14 +450,14 @@ module VX_mem_scheduler #( assign crsp_mask = curr_mask; assign crsp_sop = rsp_sop_r[ibuf_raddr]; - for (genvar r = 0; r < CORE_REQS; ++r) begin + for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data localparam j = r % CORE_CHANNELS; assign crsp_data[r] = mem_rsp_data_s[j]; end assign mem_rsp_ready_s = crsp_ready; - end else begin + end else begin : g_rsp_full reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; reg [CORE_BATCHES-1:00][CORE_CHANNELS-1:0][WORD_WIDTH-1:0] rsp_store_n; @@ -486,7 +485,7 @@ module VX_mem_scheduler #( assign crsp_mask = rsp_orig_mask[ibuf_raddr]; assign crsp_sop = 1'b1; - for (genvar r = 0; r < CORE_REQS; ++r) begin + for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data localparam i = r / CORE_CHANNELS; localparam j = r % CORE_CHANNELS; assign crsp_data[r] = rsp_store_n[i][j]; @@ -496,9 +495,9 @@ module VX_mem_scheduler #( end - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_crsp_tag assign crsp_tag = {mem_rsp_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH], ibuf_dout}; - end else begin + end else begin : g_crsp_tag_0 assign crsp_tag = ibuf_dout; end @@ -524,9 +523,9 @@ module VX_mem_scheduler #( `ifdef SIMULATION wire [`UP(UUID_WIDTH)-1:0] req_dbg_uuid; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_req_dbg_uuid assign req_dbg_uuid = core_req_tag[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_req_dbg_uuid_0 assign req_dbg_uuid = '0; end @@ -566,11 +565,11 @@ module VX_mem_scheduler #( wire [`UP(UUID_WIDTH)-1:0] mem_rsp_dbg_uuid; wire [`UP(UUID_WIDTH)-1:0] rsp_dbg_uuid; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_dbg_uuid assign mem_req_dbg_uuid = mem_req_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; assign mem_rsp_dbg_uuid = mem_rsp_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; assign rsp_dbg_uuid = core_rsp_tag[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_dbg_uuid_0 assign mem_req_dbg_uuid = '0; assign mem_rsp_dbg_uuid = '0; assign rsp_dbg_uuid = '0; diff --git a/hw/rtl/libs/VX_multiplier.sv b/hw/rtl/libs/VX_multiplier.sv index 2f046779f..11bf13a9f 100644 --- a/hw/rtl/libs/VX_multiplier.sv +++ b/hw/rtl/libs/VX_multiplier.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,7 +21,7 @@ module VX_multiplier #( parameter SIGNED = 0, parameter LATENCY = 0 ) ( - input wire clk, + input wire clk, input wire enable, input wire [A_WIDTH-1:0] dataa, input wire [B_WIDTH-1:0] datab, @@ -29,15 +29,15 @@ module VX_multiplier #( ); wire [R_WIDTH-1:0] prod_w; - if (SIGNED != 0) begin + if (SIGNED != 0) begin : g_prod_s assign prod_w = R_WIDTH'($signed(dataa) * $signed(datab)); - end else begin + end else begin : g_prod_u assign prod_w = R_WIDTH'(dataa * datab); end - - if (LATENCY == 0) begin + + if (LATENCY == 0) begin : g_passthru assign result = prod_w; - end else begin + end else begin : g_latency reg [LATENCY-1:0][R_WIDTH-1:0] prod_r; always @(posedge clk) begin if (enable) begin @@ -46,8 +46,8 @@ module VX_multiplier #( prod_r[i] <= prod_r[i-1]; end end - end - assign result = prod_r[LATENCY-1]; + end + assign result = prod_r[LATENCY-1]; end endmodule diff --git a/hw/rtl/libs/VX_mux.sv b/hw/rtl/libs/VX_mux.sv index f0bc78cae..19a06600f 100644 --- a/hw/rtl/libs/VX_mux.sv +++ b/hw/rtl/libs/VX_mux.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,13 +19,13 @@ module VX_mux #( parameter N = 1, parameter LN = `LOG2UP(N) ) ( - input wire [N-1:0][DATAW-1:0] data_in, - input wire [LN-1:0] sel_in, + input wire [N-1:0][DATAW-1:0] data_in, + input wire [LN-1:0] sel_in, output wire [DATAW-1:0] data_out -); - if (N > 1) begin +); + if (N > 1) begin : g_mux assign data_out = data_in[sel_in]; - end else begin + end else begin : g_passthru `UNUSED_VAR (sel_in) assign data_out = data_in; end diff --git a/hw/rtl/libs/VX_onehot_mux.sv b/hw/rtl/libs/VX_onehot_mux.sv index e13186015..8b97692f5 100644 --- a/hw/rtl/libs/VX_onehot_mux.sv +++ b/hw/rtl/libs/VX_onehot_mux.sv @@ -24,13 +24,13 @@ module VX_onehot_mux #( input wire [N-1:0] sel_in, output wire [DATAW-1:0] data_out ); - if (N == 1) begin + if (N == 1) begin : g_passthru `UNUSED_VAR (sel_in) assign data_out = data_in; - end else if (LUT_OPT && N == 2) begin + end else if (LUT_OPT && N == 2) begin : g_lut2 `UNUSED_VAR (sel_in) assign data_out = sel_in[0] ? data_in[0] : data_in[1]; - end else if (LUT_OPT && N == 3) begin + end else if (LUT_OPT && N == 3) begin : g_lut3 reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) @@ -41,7 +41,7 @@ module VX_onehot_mux #( endcase end assign data_out = data_out_w; - end else if (LUT_OPT && N == 4) begin + end else if (LUT_OPT && N == 4) begin : g_lut4 reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) @@ -53,7 +53,7 @@ module VX_onehot_mux #( endcase end assign data_out = data_out_w; - end else if (LUT_OPT && N == 5) begin + end else if (LUT_OPT && N == 5) begin : g_lut5 reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) @@ -66,7 +66,7 @@ module VX_onehot_mux #( endcase end assign data_out = data_out_w; - end else if (LUT_OPT && N == 6) begin + end else if (LUT_OPT && N == 6) begin : g_lut6 reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) @@ -80,7 +80,7 @@ module VX_onehot_mux #( endcase end assign data_out = data_out_w; - end else if (LUT_OPT && N == 7) begin + end else if (LUT_OPT && N == 7) begin : g_lut7 reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) @@ -95,7 +95,7 @@ module VX_onehot_mux #( endcase end assign data_out = data_out_w; - end else if (LUT_OPT && N == 8) begin + end else if (LUT_OPT && N == 8) begin : g_lut8 reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) @@ -111,19 +111,19 @@ module VX_onehot_mux #( endcase end assign data_out = data_out_w; - end else if (MODEL == 1) begin + end else if (MODEL == 1) begin : g_model1 wire [N-1:0][DATAW-1:0] mask; - for (genvar i = 0; i < N; ++i) begin + for (genvar i = 0; i < N; ++i) begin : g_mask assign mask[i] = {DATAW{sel_in[i]}} & data_in[i]; end - for (genvar i = 0; i < DATAW; ++i) begin + for (genvar i = 0; i < DATAW; ++i) begin : g_data_out wire [N-1:0] gather; - for (genvar j = 0; j < N; ++j) begin + for (genvar j = 0; j < N; ++j) begin : g_gather assign gather[j] = mask[j][i]; end assign data_out[i] = (| gather); end - end else if (MODEL == 2) begin + end else if (MODEL == 2) begin : g_model2 VX_find_first #( .N (N), .DATAW (DATAW) @@ -133,7 +133,7 @@ module VX_onehot_mux #( .data_out (data_out), `UNUSED_PIN (valid_out) ); - end else if (MODEL == 3) begin + end else if (MODEL == 3) begin : g_model3 reg [DATAW-1:0] data_out_w; always @(*) begin data_out_w = 'x; diff --git a/hw/rtl/libs/VX_onehot_shift.sv b/hw/rtl/libs/VX_onehot_shift.sv index 5ab5712a2..3222e3067 100644 --- a/hw/rtl/libs/VX_onehot_shift.sv +++ b/hw/rtl/libs/VX_onehot_shift.sv @@ -22,8 +22,8 @@ module VX_onehot_shift #( input wire [M-1:0] data_in1, output wire [N*M-1:0] data_out ); - for (genvar i = 0; i < M; ++i) begin - for (genvar j = 0; j < N; ++j) begin + for (genvar i = 0; i < M; ++i) begin : g_i + for (genvar j = 0; j < N; ++j) begin : g_j assign data_out[i*N + j] = data_in1[i] & data_in0[j]; end end diff --git a/hw/rtl/libs/VX_pe_serializer.sv b/hw/rtl/libs/VX_pe_serializer.sv index d96db52f0..58fced410 100644 --- a/hw/rtl/libs/VX_pe_serializer.sv +++ b/hw/rtl/libs/VX_pe_serializer.sv @@ -79,7 +79,7 @@ module VX_pe_serializer #( assign pe_enable = enable; - if (NUM_LANES != NUM_PES) begin + if (NUM_LANES != NUM_PES) begin : g_serialize localparam BATCH_SIZE = NUM_LANES / NUM_PES; localparam BATCH_SIZEW = `LOG2UP(BATCH_SIZE); @@ -87,7 +87,7 @@ module VX_pe_serializer #( reg [BATCH_SIZEW-1:0] batch_in_idx, batch_out_idx; reg batch_in_done, batch_out_done; - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : g_pe_data_out_w assign pe_data_out_w[i] = data_in[batch_in_idx * NUM_PES + i]; end @@ -125,7 +125,7 @@ module VX_pe_serializer #( assign data_out_u = data_out_n; assign tag_out_u = pe_tag_in; - end else begin + end else begin : g_passthru assign pe_data_out_w = data_in; diff --git a/hw/rtl/libs/VX_pending_size.sv b/hw/rtl/libs/VX_pending_size.sv index 610c2bc04..475bbb36c 100644 --- a/hw/rtl/libs/VX_pending_size.sv +++ b/hw/rtl/libs/VX_pending_size.sv @@ -35,7 +35,7 @@ module VX_pending_size #( `STATIC_ASSERT(INCRW <= SIZEW, ("invalid parameter: %d vs %d", INCRW, SIZEW)) `STATIC_ASSERT(DECRW <= SIZEW, ("invalid parameter: %d vs %d", DECRW, SIZEW)) - if (SIZE == 1) begin + if (SIZE == 1) begin : g_size1 reg size_r; @@ -59,12 +59,12 @@ module VX_pending_size #( assign alm_full = 1'b1; assign size = size_r; - end else begin + end else begin : g_sizeN reg empty_r, alm_empty_r; reg full_r, alm_full_r; - if (INCRW != 1 || DECRW != 1) begin + if (INCRW != 1 || DECRW != 1) begin : g_wide_step localparam SUBW = `MIN(SIZEW, `MAX(INCRW, DECRW)+1); @@ -92,7 +92,7 @@ module VX_pending_size #( assign size = size_r; - end else begin + end else begin : g_single_step localparam ADDRW = `LOG2UP(SIZE); @@ -124,7 +124,7 @@ module VX_pending_size #( end end - if (SIZE > 2) begin + if (SIZE > 2) begin : g_sizeN wire is_empty_n = (used_r == ADDRW'(1)); wire is_full_n = (used_r == ADDRW'(SIZE-1)); @@ -152,7 +152,7 @@ module VX_pending_size #( end end - end else begin + end else begin : g_size2 always @(posedge clk) begin if (reset) begin diff --git a/hw/rtl/libs/VX_pipe_buffer.sv b/hw/rtl/libs/VX_pipe_buffer.sv index 6ed6cf8ec..d71a78dac 100644 --- a/hw/rtl/libs/VX_pipe_buffer.sv +++ b/hw/rtl/libs/VX_pipe_buffer.sv @@ -37,13 +37,13 @@ module VX_pipe_buffer #( input wire ready_out, output wire valid_out ); - if (DEPTH == 0) begin + if (DEPTH == 0) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) assign ready_in = ready_out; assign valid_out = valid_in; assign data_out = data_in; - end else begin + end else begin : g_register wire [DEPTH:0] valid; `IGNORE_UNOPTFLAT_BEGIN wire [DEPTH:0] ready; @@ -54,7 +54,7 @@ module VX_pipe_buffer #( assign data[0] = data_in; assign ready_in = ready[0]; - for (genvar i = 0; i < DEPTH; ++i) begin + for (genvar i = 0; i < DEPTH; ++i) begin : g_pipe_regs assign ready[i] = (ready[i+1] || ~valid[i+1]); VX_pipe_register #( .DATAW (1 + DATAW), diff --git a/hw/rtl/libs/VX_pipe_register.sv b/hw/rtl/libs/VX_pipe_register.sv index 69184898f..ef19cb58b 100644 --- a/hw/rtl/libs/VX_pipe_register.sv +++ b/hw/rtl/libs/VX_pipe_register.sv @@ -26,13 +26,13 @@ module VX_pipe_register #( input wire [DATAW-1:0] data_in, output wire [DATAW-1:0] data_out ); - if (DEPTH == 0) begin + if (DEPTH == 0) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) `UNUSED_VAR (enable) assign data_out = data_in; - end else if (DEPTH == 1) begin - if (RESETW == 0) begin + end else if (DEPTH == 1) begin : g_depth1 + if (RESETW == 0) begin : g_no_reset `UNUSED_VAR (reset) reg [DATAW-1:0] value; @@ -42,18 +42,7 @@ module VX_pipe_register #( end end assign data_out = value; - end else if (RESETW == DATAW) begin - reg [DATAW-1:0] value; - - always @(posedge clk) begin - if (reset) begin - value <= INIT_VALUE; - end else if (enable) begin - value <= data_in; - end - end - assign data_out = value; - end else begin + end else if (RESETW < DATAW) begin : g_partial_reset reg [DATAW-RESETW-1:0] value_d; reg [RESETW-1:0] value_r; @@ -71,12 +60,23 @@ module VX_pipe_register #( end end assign data_out = {value_r, value_d}; + end else begin : g_full_reset + reg [DATAW-1:0] value; + + always @(posedge clk) begin + if (reset) begin + value <= INIT_VALUE; + end else if (enable) begin + value <= data_in; + end + end + assign data_out = value; end - end else begin + end else begin : g_recursive wire [DEPTH:0][DATAW-1:0] data_delayed; assign data_delayed[0] = data_in; - - for (genvar i = 1; i <= DEPTH; ++i) begin + + for (genvar i = 1; i <= DEPTH; ++i) begin : g_pipe_reg VX_pipe_register #( .DATAW (DATAW), .RESETW (RESETW), diff --git a/hw/rtl/libs/VX_popcount.sv b/hw/rtl/libs/VX_popcount.sv index 3d94dd00f..fa8c49099 100644 --- a/hw/rtl/libs/VX_popcount.sv +++ b/hw/rtl/libs/VX_popcount.sv @@ -100,11 +100,11 @@ module VX_popcount #( `elsif QUARTUS assign data_out = $countones(data_in); `else - if (N == 1) begin + if (N == 1) begin : g_passthru assign data_out = data_in; - end else if (N <= 3) begin + end else if (N <= 3) begin : g_popcount3 reg [2:0] t_in; wire [1:0] t_out; @@ -115,7 +115,7 @@ module VX_popcount #( VX_popcount32 pc32(t_in, t_out); assign data_out = t_out[M-1:0]; - end else if (N <= 6) begin + end else if (N <= 6) begin : g_popcount6 reg [5:0] t_in; wire [2:0] t_out; @@ -126,7 +126,7 @@ module VX_popcount #( VX_popcount63 pc63(t_in, t_out); assign data_out = t_out[M-1:0]; - end else if (N <= 9) begin + end else if (N <= 9) begin : g_popcount9 reg [8:0] t_in; wire [4:0] t1_out; @@ -140,7 +140,7 @@ module VX_popcount #( VX_sum33 sum33(t1_out[2:0], {1'b0, t1_out[4:3]}, t2_out); assign data_out = t2_out[M-1:0]; - end else if (N <= 12) begin + end else if (N <= 12) begin : g_popcount12 reg [11:0] t_in; wire [5:0] t1_out; @@ -154,7 +154,7 @@ module VX_popcount #( VX_sum33 sum33(t1_out[2:0], t1_out[5:3], t2_out); assign data_out = t2_out[M-1:0]; - end else if (N <= 18) begin + end else if (N <= 18) begin : g_popcount18 reg [17:0] t_in; wire [8:0] t1_out; @@ -171,7 +171,7 @@ module VX_popcount #( VX_popcount32 pc32c({t1_out[2], t1_out[5], t1_out[8]}, t2_out[5:4]); assign data_out = {2'b0,t2_out[1:0]} + {1'b0,t2_out[3:2],1'b0} + {t2_out[5:4],2'b0}; - end else if (MODEL == 1) begin + end else if (MODEL == 1) begin : g_model1 localparam PN = 1 << `CLOG2(N); localparam LOGPN = `CLOG2(PN); @@ -204,7 +204,7 @@ module VX_popcount #( assign data_out = tmp[LOGPN-1][0]; - end else begin + end else begin : g_model2 reg [M-1:0] cnt_w; diff --git a/hw/rtl/libs/VX_priority_arbiter.sv b/hw/rtl/libs/VX_priority_arbiter.sv index 13a940178..de5a3b3b1 100644 --- a/hw/rtl/libs/VX_priority_arbiter.sv +++ b/hw/rtl/libs/VX_priority_arbiter.sv @@ -23,13 +23,13 @@ module VX_priority_arbiter #( output wire [NUM_REQS-1:0] grant_onehot, output wire grant_valid ); - if (NUM_REQS == 1) begin + if (NUM_REQS == 1) begin : g_passthru assign grant_index = '0; assign grant_onehot = requests; assign grant_valid = requests[0]; - end else begin + end else begin : g_encoder VX_priority_encoder #( .N (NUM_REQS) diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index a3928492a..444c40683 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -27,34 +27,34 @@ module VX_priority_encoder #( ); wire [N-1:0] reversed; - if (REVERSE != 0) begin - for (genvar i = 0; i < N; ++i) begin + if (REVERSE != 0) begin : g_reverse + for (genvar i = 0; i < N; ++i) begin : g_i assign reversed[N-i-1] = data_in[i]; end - end else begin + end else begin : g_no_reverse assign reversed = data_in; end - if (N == 1) begin + if (N == 1) begin : g_n1 assign onehot_out = reversed; assign index_out = '0; assign valid_out = reversed; - end else if (N == 2) begin + end else if (N == 2) begin : g_n2 assign onehot_out = {reversed[1] && ~reversed[0], reversed[0]}; assign index_out = ~reversed[0]; assign valid_out = (| reversed); - end else if (MODEL == 1) begin + end else if (MODEL == 1) begin : g_model1 `IGNORE_UNOPTFLAT_BEGIN wire [N-1:0] higher_pri_regs; `IGNORE_UNOPTFLAT_END assign higher_pri_regs[0] = 1'b0; - for (genvar i = 1; i < N; ++i) begin + for (genvar i = 1; i < N; ++i) begin : g_higher_pri_regs assign higher_pri_regs[i] = higher_pri_regs[i-1] | reversed[i-1]; end assign onehot_out[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0]; @@ -68,7 +68,7 @@ module VX_priority_encoder #( .valid_out (valid_out) ); - end else if (MODEL == 2) begin + end else if (MODEL == 2) begin : g_model2 wire [N-1:0] scan_lo; @@ -91,7 +91,7 @@ module VX_priority_encoder #( assign onehot_out = scan_lo & {(~scan_lo[N-2:0]), 1'b1}; - end else if (MODEL == 3) begin + end else if (MODEL == 3) begin : g_model3 assign onehot_out = reversed & -reversed; @@ -104,7 +104,7 @@ module VX_priority_encoder #( .valid_out (valid_out) ); - end else begin + end else begin : g_model0 reg [LN-1:0] index_w; reg [N-1:0] onehot_w; diff --git a/hw/rtl/libs/VX_reduce.sv b/hw/rtl/libs/VX_reduce.sv index ac0117567..15c0f0228 100644 --- a/hw/rtl/libs/VX_reduce.sv +++ b/hw/rtl/libs/VX_reduce.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,7 +14,7 @@ `include "VX_platform.vh" `TRACING_OFF -module VX_reduce #( +module VX_reduce #( parameter DATAW_IN = 1, parameter DATAW_OUT = DATAW_IN, parameter N = 1, @@ -23,9 +23,9 @@ module VX_reduce #( input wire [N-1:0][DATAW_IN-1:0] data_in, output wire [DATAW_OUT-1:0] data_out ); - if (N == 1) begin + if (N == 1) begin : g_passthru assign data_out = DATAW_OUT'(data_in[0]); - end else begin + end else begin : g_reduce localparam int N_A = N / 2; localparam int N_B = N - N_A; @@ -33,40 +33,46 @@ module VX_reduce #( wire [N_B-1:0][DATAW_IN-1:0] in_B; wire [DATAW_OUT-1:0] out_A, out_B; - for (genvar i = 0; i < N_A; i++) begin + for (genvar i = 0; i < N_A; i++) begin : g_in_A assign in_A[i] = data_in[i]; end - for (genvar i = 0; i < N_B; i++) begin + for (genvar i = 0; i < N_B; i++) begin : g_in_B assign in_B[i] = data_in[N_A + i]; end VX_reduce #( - .DATAW_IN (DATAW_IN), + .DATAW_IN (DATAW_IN), .DATAW_OUT (DATAW_OUT), .N (N_A), .OP (OP) ) reduce_A ( - .data_in (in_A), + .data_in (in_A), .data_out (out_A) ); VX_reduce #( - .DATAW_IN (DATAW_IN), + .DATAW_IN (DATAW_IN), .DATAW_OUT (DATAW_OUT), .N (N_B), .OP (OP) ) reduce_B ( - .data_in (in_B), + .data_in (in_B), .data_out (out_B) ); - if (OP == "+") assign data_out = out_A + out_B; - else if (OP == "^") assign data_out = out_A ^ out_B; - else if (OP == "&") assign data_out = out_A & out_B; - else if (OP == "|") assign data_out = out_A | out_B; - else `ERROR(("invalid parameter")); + if (OP == "+") begin : g_plus + assign data_out = out_A + out_B; + end else if (OP == "^") begin : g_xor + assign data_out = out_A ^ out_B; + end else if (OP == "&") begin : g_and + assign data_out = out_A & out_B; + end else if (OP == "|") begin : g_or + assign data_out = out_A | out_B; + end else begin : g_error + `ERROR(("invalid parameter")); + end end - + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_reset_relay.sv b/hw/rtl/libs/VX_reset_relay.sv index d7e735c25..0e2a7f4ca 100644 --- a/hw/rtl/libs/VX_reset_relay.sv +++ b/hw/rtl/libs/VX_reset_relay.sv @@ -22,19 +22,19 @@ module VX_reset_relay #( input wire reset, output wire [N-1:0] reset_o ); - if (MAX_FANOUT >= 0 && N > (MAX_FANOUT + MAX_FANOUT/2)) begin + if (MAX_FANOUT >= 0 && N > (MAX_FANOUT + MAX_FANOUT/2)) begin : g_relay localparam F = `UP(MAX_FANOUT); localparam R = N / F; `PRESERVE_NET reg [R-1:0] reset_r; - for (genvar i = 0; i < R; ++i) begin + for (genvar i = 0; i < R; ++i) begin : g_reset_r always @(posedge clk) begin reset_r[i] <= reset; end end - for (genvar i = 0; i < N; ++i) begin + for (genvar i = 0; i < N; ++i) begin : g_reset_o assign reset_o[i] = reset_r[i / F]; end - end else begin + end else begin : g_passthru `UNUSED_VAR (clk) assign reset_o = {N{reset}}; end diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 894f4e312..3831238dc 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -28,7 +28,7 @@ module VX_rr_arbiter #( output wire grant_valid, input wire grant_ready ); - if (NUM_REQS == 1) begin + if (NUM_REQS == 1) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -38,7 +38,7 @@ module VX_rr_arbiter #( assign grant_onehot = requests; assign grant_valid = requests[0]; - end else if (LUT_OPT && NUM_REQS == 2) begin + end else if (LUT_OPT && NUM_REQS == 2) begin : g_lut2 reg [LOG_NUM_REQS-1:0] grant_index_w; reg [NUM_REQS-1:0] grant_onehot_w; @@ -66,7 +66,7 @@ module VX_rr_arbiter #( assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 3) begin + end else if (LUT_OPT && NUM_REQS == 3) begin : g_lut3 reg [LOG_NUM_REQS-1:0] grant_index_w; reg [NUM_REQS-1:0] grant_onehot_w; @@ -99,7 +99,7 @@ module VX_rr_arbiter #( assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 4) begin + end else if (LUT_OPT && NUM_REQS == 4) begin : g_lut4 reg [LOG_NUM_REQS-1:0] grant_index_w; reg [NUM_REQS-1:0] grant_onehot_w; @@ -139,7 +139,7 @@ module VX_rr_arbiter #( assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 5) begin + end else if (LUT_OPT && NUM_REQS == 5) begin : g_lut5 reg [LOG_NUM_REQS-1:0] grant_index_w; reg [NUM_REQS-1:0] grant_onehot_w; @@ -188,7 +188,7 @@ module VX_rr_arbiter #( assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 6) begin + end else if (LUT_OPT && NUM_REQS == 6) begin : g_lut6 reg [LOG_NUM_REQS-1:0] grant_index_w; reg [NUM_REQS-1:0] grant_onehot_w; @@ -248,7 +248,7 @@ module VX_rr_arbiter #( assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 7) begin + end else if (LUT_OPT && NUM_REQS == 7) begin : g_lut7 reg [LOG_NUM_REQS-1:0] grant_index_w; reg [NUM_REQS-1:0] grant_onehot_w; @@ -321,7 +321,7 @@ module VX_rr_arbiter #( assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 8) begin + end else if (LUT_OPT && NUM_REQS == 8) begin : g_lut8 reg [LOG_NUM_REQS-1:0] grant_index_w; reg [NUM_REQS-1:0] grant_onehot_w; @@ -409,7 +409,7 @@ module VX_rr_arbiter #( assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (MODEL == 1) begin + end else if (MODEL == 1) begin : g_model1 `IGNORE_UNOPTFLAT_BEGIN wire [NUM_REQS-1:0] masked_pri_reqs, unmasked_pri_reqs; @@ -419,12 +419,12 @@ module VX_rr_arbiter #( wire [NUM_REQS-1:0] masked_reqs = requests & reqs_mask; assign masked_pri_reqs[0] = 1'b0; - for (genvar i = 1; i < NUM_REQS; ++i) begin + for (genvar i = 1; i < NUM_REQS; ++i) begin : g_masked_pri_reqs assign masked_pri_reqs[i] = masked_pri_reqs[i-1] | masked_reqs[i-1]; end assign unmasked_pri_reqs[0] = 1'b0; - for (genvar i = 1; i < NUM_REQS; ++i) begin + for (genvar i = 1; i < NUM_REQS; ++i) begin : g_unmasked_pri_reqs assign unmasked_pri_reqs[i] = unmasked_pri_reqs[i-1] | requests[i-1]; end @@ -456,12 +456,12 @@ module VX_rr_arbiter #( .valid_out(grant_valid) ); - end else if (MODEL == 2) begin + end else if (MODEL == 2) begin : g_model2 reg [NUM_REQS-1:0][LOG_NUM_REQS-1:0] grant_table; reg [LOG_NUM_REQS-1:0] state; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_grant_table always @(*) begin grant_table[i] = 'x; for (integer j = NUM_REQS-1; j >= 0; --j) begin diff --git a/hw/rtl/libs/VX_scan.sv b/hw/rtl/libs/VX_scan.sv index 48de2964a..6effd5814 100644 --- a/hw/rtl/libs/VX_scan.sv +++ b/hw/rtl/libs/VX_scan.sv @@ -32,31 +32,31 @@ module VX_scan #( `IGNORE_UNOPTFLAT_END // reverses bits - if (REVERSE != 0) begin + if (REVERSE != 0) begin : g_data_in_reverse assign t[0] = data_in; - end else begin + end else begin : g_data_in_no_reverse assign t[0] = {<<{data_in}}; end // optimize for the common case of small and-scans - if ((N == 2) && (OP == "&")) begin + if ((N == 2) && (OP == "&")) begin : g_scan_n2_and assign t[LOGN] = {t[0][1], &t[0][1:0]}; - end else if ((N == 3) && (OP == "&")) begin + end else if ((N == 3) && (OP == "&")) begin : g_scan_n3_and assign t[LOGN] = {t[0][2], &t[0][2:1], &t[0][2:0]}; - end else if ((N == 4) && (OP == "&")) begin + end else if ((N == 4) && (OP == "&")) begin : g_scan_n4_and assign t[LOGN] = {t[0][3], &t[0][3:2], &t[0][3:1], &t[0][3:0]}; - end else begin + end else begin : g_scan // general case wire [N-1:0] fill; - for (genvar i = 0; i < LOGN; ++i) begin + for (genvar i = 0; i < LOGN; ++i) begin : g_i wire [N-1:0] shifted = N'({fill, t[i]} >> (1< 1) begin + if (N > 1) begin : g_switch reg req_out_r [N]; reg rsp_out_r; @@ -34,7 +34,7 @@ module VX_scope_switch #( req_out_r[i] <= 0; end rsp_out_r <= 0; - end else begin + end else begin for (integer i = 0; i < N; ++i) begin req_out_r[i] <= req_in; end @@ -48,8 +48,8 @@ module VX_scope_switch #( assign req_out = req_out_r; assign rsp_out = rsp_out_r; - - end else begin + + end else begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) diff --git a/hw/rtl/libs/VX_serial_div.sv b/hw/rtl/libs/VX_serial_div.sv index e7af40009..593be2d9a 100644 --- a/hw/rtl/libs/VX_serial_div.sv +++ b/hw/rtl/libs/VX_serial_div.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,7 +29,7 @@ module VX_serial_div #( input wire is_signed, input wire [LANES-1:0][WIDTHN-1:0] numer, - input wire [LANES-1:0][WIDTHD-1:0] denom, + input wire [LANES-1:0][WIDTHD-1:0] denom, output wire [LANES-1:0][WIDTHQ-1:0] quotient, output wire [LANES-1:0][WIDTHR-1:0] remainder @@ -49,14 +49,14 @@ module VX_serial_div #( reg [CNTRW-1:0] cntr; reg busy_r; - for (genvar i = 0; i < LANES; ++i) begin + for (genvar i = 0; i < LANES; ++i) begin : g_setup wire negate_numer = is_signed && numer[i][WIDTHN-1]; wire negate_denom = is_signed && denom[i][WIDTHD-1]; assign numer_qual[i] = negate_numer ? -$signed(numer[i]) : numer[i]; assign denom_qual[i] = negate_denom ? -$signed(denom[i]) : denom[i]; assign sub_result[i] = working[i][WIDTHN + MIN_ND : WIDTHN] - denom_r[i]; end - + always @(posedge clk) begin if (reset) begin busy_r <= 0; @@ -74,18 +74,21 @@ module VX_serial_div #( end end - for (genvar i = 0; i < LANES; ++i) begin + for (genvar i = 0; i < LANES; ++i) begin : g_div always @(posedge clk) begin if (strobe) begin working[i] <= {{WIDTHD{1'b0}}, numer_qual[i], 1'b0}; denom_r[i] <= denom_qual[i]; inv_quot[i] <= (denom[i] != 0) && is_signed && (numer[i][31] ^ denom[i][31]); inv_rem[i] <= is_signed && numer[i][31]; - end else if (busy_r) begin + end else if (busy_r) begin working[i] <= sub_result[i][WIDTHD] ? {working[i][WIDTHN+MIN_ND-1:0], 1'b0} : {sub_result[i][WIDTHD-1:0], working[i][WIDTHN-1:0], 1'b1}; end end + end + + for (genvar i = 0; i < LANES; ++i) begin : g_output wire [WIDTHQ-1:0] q = working[i][WIDTHQ-1:0]; wire [WIDTHR-1:0] r = working[i][WIDTHN+WIDTHR:WIDTHN+1]; assign quotient[i] = inv_quot[i] ? -$signed(q) : q; diff --git a/hw/rtl/libs/VX_serial_mul.sv b/hw/rtl/libs/VX_serial_mul.sv index 9369dfd10..d847b7111 100644 --- a/hw/rtl/libs/VX_serial_mul.sv +++ b/hw/rtl/libs/VX_serial_mul.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,7 +13,7 @@ `include "VX_platform.vh" -// Iterative integer multiplier +// Iterative integer multiplier // An adaptation of ZipCPU algorithm for a multi-lane elastic architecture. // https://zipcpu.com/zipcpu/2021/07/03/slowmpy.html @@ -65,7 +65,7 @@ module VX_serial_mul #( end end - for (genvar i = 0; i < LANES; ++i) begin + for (genvar i = 0; i < LANES; ++i) begin : g_mul wire [X_WIDTH-1:0] axb = b[i][0] ? a[i] : '0; always @(posedge clk) begin @@ -73,12 +73,12 @@ module VX_serial_mul #( if (SIGNED) begin a[i] <= X_WIDTH'($signed(dataa[i])); b[i] <= Y_WIDTH'($signed(datab[i])); - end else begin + end else begin a[i] <= dataa[i]; b[i] <= datab[i]; end p[i] <= 0; - end else if (busy_r) begin + end else if (busy_r) begin b[i] <= (b[i] >> 1); p[i][Y_WIDTH-2:0] <= p[i][Y_WIDTH-1:1]; if (SIGNED) begin @@ -93,9 +93,9 @@ module VX_serial_mul #( end end - if (SIGNED) begin + if (SIGNED) begin : g_signed assign result[i] = R_WIDTH'(p[i][P_WIDTH-1:0] + {1'b1, {(X_WIDTH-2){1'b0}}, 1'b1, {(Y_WIDTH){1'b0}}}); - end else begin + end else begin : g_unsigned assign result[i] = R_WIDTH'(p[i]); end end diff --git a/hw/rtl/libs/VX_shift_register.sv b/hw/rtl/libs/VX_shift_register.sv index 56726d2cb..b4809fe90 100644 --- a/hw/rtl/libs/VX_shift_register.sv +++ b/hw/rtl/libs/VX_shift_register.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,13 +14,13 @@ `include "VX_platform.vh" `TRACING_OFF -module VX_shift_register #( +module VX_shift_register #( parameter DATAW = 1, parameter RESETW = 0, parameter DEPTH = 1, - parameter NUM_TAPS = 1, + parameter NUM_TAPS = 1, parameter TAP_START = 0, - parameter TAP_STRIDE = 1 + parameter TAP_STRIDE = 1 ) ( input wire clk, input wire reset, @@ -28,7 +28,7 @@ module VX_shift_register #( input wire [DATAW-1:0] data_in, output wire [NUM_TAPS-1:0][DATAW-1:0] data_out ); - if (DEPTH != 0) begin + if (DEPTH != 0) begin : g_shift_register reg [DEPTH-1:0][DATAW-1:0] entries; always @(posedge clk) begin @@ -36,7 +36,7 @@ module VX_shift_register #( if ((i >= (DATAW-RESETW)) && reset) begin for (integer j = 0; j < DEPTH; ++j) entries[j][i] <= 0; - end else if (enable) begin + end else if (enable) begin for (integer j = 1; j < DEPTH; ++j) entries[j-1][i] <= entries[j][i]; entries[DEPTH-1][i] <= data_in[i]; @@ -44,10 +44,10 @@ module VX_shift_register #( end end - for (genvar i = 0; i < NUM_TAPS; ++i) begin + for (genvar i = 0; i < NUM_TAPS; ++i) begin : g_data_out assign data_out[i] = entries[i * TAP_STRIDE + TAP_START]; end - end else begin + end else begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) `UNUSED_VAR (enable) diff --git a/hw/rtl/libs/VX_skid_buffer.sv b/hw/rtl/libs/VX_skid_buffer.sv index 53c213622..b77cce2a4 100644 --- a/hw/rtl/libs/VX_skid_buffer.sv +++ b/hw/rtl/libs/VX_skid_buffer.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,19 +19,19 @@ module VX_skid_buffer #( parameter PASSTHRU = 0, parameter HALF_BW = 0, parameter OUT_REG = 0 -) ( +) ( input wire clk, input wire reset, - + input wire valid_in, - output wire ready_in, + output wire ready_in, input wire [DATAW-1:0] data_in, output wire [DATAW-1:0] data_out, input wire ready_out, output wire valid_out ); - if (PASSTHRU != 0) begin + if (PASSTHRU != 0) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -40,7 +40,7 @@ module VX_skid_buffer #( assign data_out = data_in; assign ready_in = ready_out; - end else if (HALF_BW != 0) begin + end else if (HALF_BW != 0) begin : g_half_bw VX_toggle_buffer #( .DATAW (DATAW) @@ -55,7 +55,7 @@ module VX_skid_buffer #( .ready_out (ready_out) ); - end else begin + end else begin : g_full_bw VX_stream_buffer #( .DATAW (DATAW), diff --git a/hw/rtl/libs/VX_stream_arb.sv b/hw/rtl/libs/VX_stream_arb.sv index 3a457f8b8..ba824236e 100644 --- a/hw/rtl/libs/VX_stream_arb.sv +++ b/hw/rtl/libs/VX_stream_arb.sv @@ -37,13 +37,13 @@ module VX_stream_arb #( output wire [NUM_OUTPUTS-1:0][NUM_REQS_W-1:0] sel_out, input wire [NUM_OUTPUTS-1:0] ready_out ); - if (NUM_INPUTS > NUM_OUTPUTS) begin + if (NUM_INPUTS > NUM_OUTPUTS) begin : g_more_inputs - if (NUM_OUTPUTS > 1) begin + if (NUM_OUTPUTS > 1) begin : g_multiple_outputs // (#inputs > #outputs) and (#outputs > 1) - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_arb_slices localparam SLICE_BEGIN = i * NUM_REQS; localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_INPUTS); @@ -69,7 +69,7 @@ module VX_stream_arb #( ); end - end else if (MAX_FANOUT != 0 && (NUM_INPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin + end else if (MAX_FANOUT != 0 && (NUM_INPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin : g_fanout // (#inputs > max_fanout) and (#outputs == 1) @@ -81,7 +81,7 @@ module VX_stream_arb #( wire [NUM_SLICES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp; wire [NUM_SLICES-1:0] ready_tmp; - for (genvar i = 0; i < NUM_SLICES; ++i) begin + for (genvar i = 0; i < NUM_SLICES; ++i) begin : g_fanout_slice_arbs localparam SLICE_BEGIN = i * MAX_FANOUT; localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_INPUTS); @@ -90,26 +90,24 @@ module VX_stream_arb #( wire [DATAW-1:0] data_tmp_u; wire [`LOG2UP(SLICE_SIZE)-1:0] sel_tmp_u; - if (MAX_FANOUT != 1) begin - VX_stream_arb #( - .NUM_INPUTS (SLICE_SIZE), - .NUM_OUTPUTS (1), - .DATAW (DATAW), - .ARBITER (ARBITER), - .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (3) - ) fanout_slice_arb ( - .clk (clk), - .reset (reset), - .valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]), - .data_in (data_in[SLICE_END-1: SLICE_BEGIN]), - .ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]), - .valid_out (valid_tmp[i]), - .data_out (data_tmp_u), - .sel_out (sel_tmp_u), - .ready_out (ready_tmp[i]) - ); - end + VX_stream_arb #( + .NUM_INPUTS (SLICE_SIZE), + .NUM_OUTPUTS (1), + .DATAW (DATAW), + .ARBITER (ARBITER), + .MAX_FANOUT (MAX_FANOUT), + .OUT_BUF (3) + ) fanout_slice_arb ( + .clk (clk), + .reset (reset), + .valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]), + .data_in (data_in[SLICE_END-1: SLICE_BEGIN]), + .ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]), + .valid_out (valid_tmp[i]), + .data_out (data_tmp_u), + .sel_out (sel_tmp_u), + .ready_out (ready_tmp[i]) + ); assign data_tmp[i] = {data_tmp_u, LOG_NUM_REQS2'(sel_tmp_u)}; end @@ -139,7 +137,7 @@ module VX_stream_arb #( assign data_out = data_out_u[LOG_NUM_REQS2 +: DATAW]; assign sel_out = {sel_out_u, data_out_u[0 +: LOG_NUM_REQS2]}; - end else begin + end else begin : g_one_output // (#inputs <= max_fanout) and (#outputs == 1) @@ -169,7 +167,7 @@ module VX_stream_arb #( assign data_in_w = data_in[arb_index]; assign arb_ready = ready_in_w; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_ready_in assign ready_in[i] = ready_in_w && arb_onehot[i]; end @@ -190,13 +188,13 @@ module VX_stream_arb #( ); end - end else if (NUM_OUTPUTS > NUM_INPUTS) begin + end else if (NUM_OUTPUTS > NUM_INPUTS) begin : g_more_outputs - if (NUM_INPUTS > 1) begin + if (NUM_INPUTS > 1) begin : g_multiple_inputs // (#inputs > 1) and (#outputs > #inputs) - for (genvar i = 0; i < NUM_INPUTS; ++i) begin + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_arb_slices localparam SLICE_BEGIN = i * NUM_REQS; localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_OUTPUTS); @@ -221,12 +219,12 @@ module VX_stream_arb #( `UNUSED_PIN (sel_out) ); - for (genvar j = SLICE_BEGIN; j < SLICE_END; ++j) begin + for (genvar j = SLICE_BEGIN; j < SLICE_END; ++j) begin : g_sel_out assign sel_out[j] = i; end end - end else if (MAX_FANOUT != 0 && (NUM_OUTPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin + end else if (MAX_FANOUT != 0 && (NUM_OUTPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin : g_fanout // (#inputs == 1) and (#outputs > max_fanout) @@ -255,7 +253,7 @@ module VX_stream_arb #( `UNUSED_PIN (sel_out) ); - for (genvar i = 0; i < NUM_SLICES; ++i) begin + for (genvar i = 0; i < NUM_SLICES; ++i) begin : g_fanout_slice_arbs localparam SLICE_BEGIN = i * MAX_FANOUT; localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_OUTPUTS); @@ -281,7 +279,7 @@ module VX_stream_arb #( ); end - end else begin + end else begin : g_one_input // (#inputs == 1) and (#outputs <= max_fanout) @@ -309,7 +307,7 @@ module VX_stream_arb #( assign arb_ready = valid_in[0]; assign ready_in = arb_valid; - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_out_buf VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -330,11 +328,11 @@ module VX_stream_arb #( assign sel_out = 0; - end else begin + end else begin : g_passthru // #Inputs == #Outputs - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_out_buf VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), diff --git a/hw/rtl/libs/VX_stream_buffer.sv b/hw/rtl/libs/VX_stream_buffer.sv index 5e8297f7a..7670b40fe 100644 --- a/hw/rtl/libs/VX_stream_buffer.sv +++ b/hw/rtl/libs/VX_stream_buffer.sv @@ -37,14 +37,8 @@ module VX_stream_buffer #( input wire ready_out, output wire valid_out ); - if (PASSTHRU != 0) begin - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - assign ready_in = ready_out; - assign valid_out = valid_in; - assign data_out = data_in; - end else begin - if (OUT_REG != 0) begin + if (PASSTHRU == 0) begin : g_buffer + if (OUT_REG != 0) begin : g_with_reg reg [DATAW-1:0] data_out_r; reg [DATAW-1:0] buffer; @@ -83,7 +77,7 @@ module VX_stream_buffer #( assign valid_out = valid_out_r; assign data_out = data_out_r; - end else begin + end else begin : g_no_reg reg [1:0][DATAW-1:0] shift_reg; reg [1:0] fifo_state; @@ -115,6 +109,12 @@ module VX_stream_buffer #( assign data_out = shift_reg[fifo_state[1]]; end + end else begin : g_passthru + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + assign ready_in = ready_out; + assign valid_out = valid_in; + assign data_out = data_in; end endmodule diff --git a/hw/rtl/libs/VX_stream_pack.sv b/hw/rtl/libs/VX_stream_pack.sv index 7f024b184..944b120c2 100644 --- a/hw/rtl/libs/VX_stream_pack.sv +++ b/hw/rtl/libs/VX_stream_pack.sv @@ -38,7 +38,8 @@ module VX_stream_pack #( output wire [TAG_WIDTH-1:0] tag_out, input wire ready_out ); - if (NUM_REQS > 1) begin + if (NUM_REQS > 1) begin : g_pack + localparam LOG_NUM_REQS = `CLOG2(NUM_REQS); wire [LOG_NUM_REQS-1:0] grant_index; @@ -62,11 +63,11 @@ module VX_stream_pack #( wire [NUM_REQS-1:0] tag_matches; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_tag_matches assign tag_matches[i] = (tag_in[i][TAG_SEL_BITS-1:0] == tag_sel[TAG_SEL_BITS-1:0]); end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_ready_in assign ready_in[i] = grant_ready & tag_matches[i]; end @@ -87,7 +88,7 @@ module VX_stream_pack #( .ready_out (ready_out) ); - end else begin + end else begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) diff --git a/hw/rtl/libs/VX_stream_switch.sv b/hw/rtl/libs/VX_stream_switch.sv index f3723ebb0..01217b668 100644 --- a/hw/rtl/libs/VX_stream_switch.sv +++ b/hw/rtl/libs/VX_stream_switch.sv @@ -36,18 +36,17 @@ module VX_stream_switch #( output wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out, input wire [NUM_OUTPUTS-1:0] ready_out ); - if (NUM_INPUTS > NUM_OUTPUTS) begin - + if (NUM_INPUTS > NUM_OUTPUTS) begin : g_more_inputs wire [NUM_OUTPUTS-1:0][NUM_REQS-1:0] valid_in_w; wire [NUM_OUTPUTS-1:0][NUM_REQS-1:0][DATAW-1:0] data_in_w; - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin - for (genvar j = 0; j < NUM_REQS; ++j) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_data_in + for (genvar j = 0; j < NUM_REQS; ++j) begin : g_j localparam ii = i * NUM_REQS + j; - if (ii < NUM_INPUTS) begin + if (ii < NUM_INPUTS) begin : g_valid assign valid_in_w[i][j] = valid_in[ii]; assign data_in_w[i][j] = data_in[ii]; - end else begin + end else begin : g_extra assign valid_in_w[i][j] = 0; assign data_in_w[i][j] = '0; end @@ -58,21 +57,21 @@ module VX_stream_switch #( wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w; wire [NUM_OUTPUTS-1:0] ready_out_w; - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_data_out_w assign valid_out_w[i] = valid_in_w[i][sel_in[i]]; assign data_out_w[i] = data_in_w[i][sel_in[i]]; end - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin - for (genvar j = 0; j < NUM_REQS; ++j) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_ready_out_w + for (genvar j = 0; j < NUM_REQS; ++j) begin : g_j localparam ii = i * NUM_REQS + j; - if (ii < NUM_INPUTS) begin + if (ii < NUM_INPUTS) begin : g_valid assign ready_in[ii] = ready_out_w[i] && (sel_in[i] == LOG_NUM_REQS'(j)); end end end - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_out_buf VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -89,22 +88,25 @@ module VX_stream_switch #( ); end - end else if (NUM_OUTPUTS > NUM_INPUTS) begin + end else if (NUM_OUTPUTS > NUM_INPUTS) begin : g_more_outputs wire [NUM_INPUTS-1:0][NUM_REQS-1:0] valid_out_w; wire [NUM_INPUTS-1:0][NUM_REQS-1:0] ready_out_w; - for (genvar i = 0; i < NUM_INPUTS; ++i) begin - for (genvar j = 0; j < NUM_REQS; ++j) begin + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_valid_out_w + for (genvar j = 0; j < NUM_REQS; ++j) begin : g_j assign valid_out_w[i][j] = valid_in[i] && (sel_in[i] == LOG_NUM_REQS'(j)); end + end + + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_ready_in assign ready_in[i] = ready_out_w[i][sel_in[i]]; end - for (genvar i = 0; i < NUM_INPUTS; ++i) begin - for (genvar j = 0; j < NUM_REQS; ++j) begin + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_out_buf + for (genvar j = 0; j < NUM_REQS; ++j) begin : g_j localparam ii = i * NUM_REQS + j; - if (ii < NUM_OUTPUTS) begin + if (ii < NUM_OUTPUTS) begin : g_valid VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -119,20 +121,20 @@ module VX_stream_switch #( .valid_out (valid_out[ii]), .ready_out (ready_out[ii]) ); - end else begin + end else begin : g_extra `UNUSED_VAR (valid_out_w[i][j]) assign ready_out_w[i][j] = '0; end end end - end else begin + end else begin : g_passthru // #Inputs == #Outputs `UNUSED_VAR (sel_in) - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_out_buf VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), diff --git a/hw/rtl/libs/VX_stream_unpack.sv b/hw/rtl/libs/VX_stream_unpack.sv index 37c238a77..b0cca961a 100644 --- a/hw/rtl/libs/VX_stream_unpack.sv +++ b/hw/rtl/libs/VX_stream_unpack.sv @@ -36,7 +36,7 @@ module VX_stream_unpack #( output wire [NUM_REQS-1:0][TAG_WIDTH-1:0] tag_out, input wire [NUM_REQS-1:0] ready_out ); - if (NUM_REQS > 1) begin + if (NUM_REQS > 1) begin : g_unpack reg [NUM_REQS-1:0] rem_mask_r; wire [NUM_REQS-1:0] ready_out_w; @@ -56,7 +56,7 @@ module VX_stream_unpack #( assign ready_in = sent_all; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_outbuf VX_elastic_buffer #( .DATAW (DATA_WIDTH + TAG_WIDTH), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -73,7 +73,7 @@ module VX_stream_unpack #( ); end - end else begin + end else begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index f2d9aa856..db59f895e 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -43,9 +43,9 @@ module VX_stream_xbar #( `UNUSED_VAR (clk) `UNUSED_VAR (reset) - if (NUM_INPUTS != 1) begin + if (NUM_INPUTS != 1) begin : g_multiple_inputs - if (NUM_OUTPUTS != 1) begin + if (NUM_OUTPUTS != 1) begin : g_multiple_outputs // (#inputs > 1) and (#outputs > 1) @@ -63,7 +63,7 @@ module VX_stream_xbar #( .data_out (per_output_ready_in_w) ); - for (genvar i = 0; i < NUM_INPUTS; ++i) begin + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_sel_in_decoders VX_decoder #( .N (OUT_WIDTH) ) sel_in_decoder ( @@ -82,7 +82,7 @@ module VX_stream_xbar #( .data_out (per_output_valid_in_w) ); - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_xbar_arbs VX_stream_arb #( .NUM_INPUTS (NUM_INPUTS), .NUM_OUTPUTS (1), @@ -103,7 +103,7 @@ module VX_stream_xbar #( ); end - end else begin + end else begin : g_one_output // (#inputs >= 1) and (#outputs == 1) @@ -129,7 +129,7 @@ module VX_stream_xbar #( `UNUSED_VAR (sel_in) end - end else if (NUM_OUTPUTS != 1) begin + end else if (NUM_OUTPUTS != 1) begin : g_one_input // (#inputs == 1) and (#outputs > 1) @@ -147,7 +147,7 @@ module VX_stream_xbar #( assign ready_in[0] = ready_out_w[sel_in[0]]; assign data_out_w = {NUM_OUTPUTS{data_in[0]}}; - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_out_buf VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -167,7 +167,7 @@ module VX_stream_xbar #( assign sel_out = 0; - end else begin + end else begin : g_passthru // (#inputs == 1) and (#outputs == 1) diff --git a/hw/rtl/libs/VX_toggle_buffer.sv b/hw/rtl/libs/VX_toggle_buffer.sv index fb24a7f79..9d6b42720 100644 --- a/hw/rtl/libs/VX_toggle_buffer.sv +++ b/hw/rtl/libs/VX_toggle_buffer.sv @@ -1,11 +1,11 @@ // Copyright 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -26,23 +26,26 @@ module VX_toggle_buffer #( parameter DATAW = 1, parameter PASSTHRU = 0 -) ( +) ( input wire clk, input wire reset, input wire valid_in, - output wire ready_in, + output wire ready_in, input wire [DATAW-1:0] data_in, output wire [DATAW-1:0] data_out, input wire ready_out, output wire valid_out -); - if (PASSTHRU != 0) begin +); + if (PASSTHRU != 0) begin : g_passthru + `UNUSED_VAR (clk) `UNUSED_VAR (reset) assign ready_in = ready_out; - assign valid_out = valid_in; + assign valid_out = valid_in; assign data_out = data_in; - end else begin + + end else begin : g_buffer + reg [DATAW-1:0] buffer; reg has_data; @@ -54,7 +57,7 @@ module VX_toggle_buffer #( has_data <= valid_in; end else if (ready_out) begin has_data <= 0; - end + end end if (~has_data) begin buffer <= data_in; diff --git a/hw/rtl/libs/VX_transpose.sv b/hw/rtl/libs/VX_transpose.sv index 7b2c273ef..769a78422 100644 --- a/hw/rtl/libs/VX_transpose.sv +++ b/hw/rtl/libs/VX_transpose.sv @@ -21,8 +21,8 @@ module VX_transpose #( input wire [N-1:0][M-1:0] data_in, output wire [M-1:0][N-1:0] data_out ); - for (genvar i = 0; i < N; ++i) begin - for (genvar j = 0; j < M; ++j) begin + for (genvar i = 0; i < N; ++i) begin : g_i + for (genvar j = 0; j < M; ++j) begin : g_j assign data_out[j][i] = data_in[i][j]; end end diff --git a/hw/rtl/mem/VX_gbar_arb.sv b/hw/rtl/mem/VX_gbar_arb.sv index 9ff761ec2..2b0856980 100644 --- a/hw/rtl/mem/VX_gbar_arb.sv +++ b/hw/rtl/mem/VX_gbar_arb.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -33,7 +33,7 @@ module VX_gbar_arb #( wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in; wire [NUM_REQS-1:0] req_ready_in; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_data_in assign req_valid_in[i] = bus_in_if[i].req_valid; assign req_data_in[i] = {bus_in_if[i].req_id, bus_in_if[i].req_size_m1, bus_in_if[i].req_core_id}; assign bus_in_if[i].req_ready = req_ready_in[i]; @@ -71,7 +71,7 @@ module VX_gbar_arb #( rsp_id <= bus_out_if.rsp_id; end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_bus_in_if assign bus_in_if[i].rsp_valid = rsp_valid; assign bus_in_if[i].rsp_id = rsp_id; end diff --git a/hw/rtl/mem/VX_lmem_switch.sv b/hw/rtl/mem/VX_lmem_switch.sv index 642907785..345f357a3 100644 --- a/hw/rtl/mem/VX_lmem_switch.sv +++ b/hw/rtl/mem/VX_lmem_switch.sv @@ -32,7 +32,7 @@ module VX_lmem_switch import VX_gpu_pkg::*; #( wire req_global_ready; wire req_local_ready; - for (genvar i = 0; i < `NUM_LSU_LANES; ++i) begin + for (genvar i = 0; i < `NUM_LSU_LANES; ++i) begin : g_is_addr_local_mask assign is_addr_local_mask[i] = lsu_in_if.req_data.flags[i][`MEM_REQ_FLAG_LOCAL]; end diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 578f4552b..7131c3f21 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -67,18 +67,18 @@ module VX_local_mem import VX_gpu_pkg::*; #( // bank selection wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx; - if (NUM_BANKS > 1) begin - for (genvar i = 0; i < NUM_REQS; ++i) begin + if (NUM_BANKS > 1) begin : g_req_bank_idx + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_bank_idxs assign req_bank_idx[i] = mem_bus_if[i].req_data.addr[0 +: BANK_SEL_BITS]; end - end else begin + end else begin : g_req_bank_idx_0 assign req_bank_idx = 0; end // bank addressing wire [NUM_REQS-1:0][BANK_ADDR_WIDTH-1:0] req_bank_addr; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_bank_addr assign req_bank_addr[i] = mem_bus_if[i].req_data.addr[BANK_SEL_BITS +: BANK_ADDR_WIDTH]; `UNUSED_VAR (mem_bus_if[i].req_data.flags) end @@ -104,7 +104,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [`PERF_CTR_BITS-1:0] perf_collisions; `endif - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_data_in assign req_valid_in[i] = mem_bus_if[i].req_valid; assign req_data_in[i] = { mem_bus_if[i].req_data.rw, @@ -141,7 +141,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .ready_out (per_bank_req_ready) ); - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_per_bank_req_data_soa assign { per_bank_req_rw[i], per_bank_req_addr[i], @@ -159,7 +159,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag; wire [NUM_BANKS-1:0] per_bank_rsp_ready; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_data_store wire bank_rsp_valid, bank_rsp_ready; VX_sp_ram #( @@ -216,7 +216,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_aos; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_per_bank_rsp_data_aos assign per_bank_rsp_data_aos[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]}; end @@ -244,7 +244,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( `UNUSED_PIN (sel_out) ); - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_mem_bus_if assign mem_bus_if[i].rsp_valid = rsp_valid_out[i]; assign mem_bus_if[i].rsp_data = rsp_data_out[i]; assign rsp_ready_out[i] = mem_bus_if[i].rsp_ready; @@ -257,7 +257,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [`CLOG2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle; wire [NUM_REQS-1:0] req_rw; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_rw assign req_rw[i] = mem_bus_if[i].req_data.rw; end @@ -303,11 +303,11 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] req_uuid; wire [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] rsp_uuid; - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (UUID_WIDTH != 0) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_uuid + if (UUID_WIDTH != 0) begin : g_uuid assign req_uuid[i] = mem_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; assign rsp_uuid[i] = mem_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_no_uuid assign req_uuid[i] = 0; assign rsp_uuid[i] = 0; end @@ -316,17 +316,17 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][`UP(UUID_WIDTH)-1:0] per_bank_req_uuid; wire [NUM_BANKS-1:0][`UP(UUID_WIDTH)-1:0] per_bank_rsp_uuid; - for (genvar i = 0; i < NUM_BANKS; ++i) begin - if (UUID_WIDTH != 0) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_per_bank_req_uuid + if (UUID_WIDTH != 0) begin : g_uuid assign per_bank_req_uuid[i] = per_bank_req_tag[i][TAG_WIDTH-1 -: UUID_WIDTH]; assign per_bank_rsp_uuid[i] = per_bank_rsp_tag[i][TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_no_uuid assign per_bank_req_uuid[i] = 0; assign per_bank_rsp_uuid[i] = 0; end end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_trace always @(posedge clk) begin if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin if (mem_bus_if[i].req_data.rw) begin @@ -344,7 +344,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( end end - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_bank_trace always @(posedge clk) begin if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin if (per_bank_req_rw[i]) begin diff --git a/hw/rtl/mem/VX_lsu_adapter.sv b/hw/rtl/mem/VX_lsu_adapter.sv index 822341692..eb5dd102a 100644 --- a/hw/rtl/mem/VX_lsu_adapter.sv +++ b/hw/rtl/mem/VX_lsu_adapter.sv @@ -41,7 +41,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( wire [NUM_LANES-1:0][TAG_WIDTH-1:0] req_tag_out; wire [NUM_LANES-1:0] req_ready_out; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_req_data_in assign req_data_in[i] = { lsu_mem_if.req_data.rw, lsu_mem_if.req_data.addr[i], @@ -51,19 +51,6 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( }; end - for (genvar i = 0; i < NUM_LANES; ++i) begin - assign mem_bus_if[i].req_valid = req_valid_out[i]; - assign { - mem_bus_if[i].req_data.rw, - mem_bus_if[i].req_data.addr, - mem_bus_if[i].req_data.data, - mem_bus_if[i].req_data.byteen, - mem_bus_if[i].req_data.flags - } = req_data_out[i]; - assign mem_bus_if[i].req_data.tag = req_tag_out[i]; - assign req_ready_out[i] = mem_bus_if[i].req_ready; - end - VX_stream_unpack #( .NUM_REQS (NUM_LANES), .DATA_WIDTH (REQ_DATA_WIDTH), @@ -83,6 +70,19 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( .ready_out (req_ready_out) ); + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_bus_req + assign mem_bus_if[i].req_valid = req_valid_out[i]; + assign { + mem_bus_if[i].req_data.rw, + mem_bus_if[i].req_data.addr, + mem_bus_if[i].req_data.data, + mem_bus_if[i].req_data.byteen, + mem_bus_if[i].req_data.flags + } = req_data_out[i]; + assign mem_bus_if[i].req_data.tag = req_tag_out[i]; + assign req_ready_out[i] = mem_bus_if[i].req_ready; + end + // handle response packing wire [NUM_LANES-1:0] rsp_valid_out; @@ -90,7 +90,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( wire [NUM_LANES-1:0][TAG_WIDTH-1:0] rsp_tag_out; wire [NUM_LANES-1:0] rsp_ready_out; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_bus_rsp assign rsp_valid_out[i] = mem_bus_if[i].rsp_valid; assign rsp_data_out[i] = mem_bus_if[i].rsp_data.data; assign rsp_tag_out[i] = mem_bus_if[i].rsp_data.tag; diff --git a/hw/rtl/mem/VX_mem_arb.sv b/hw/rtl/mem/VX_mem_arb.sv index f45a7ea75..321bbb270 100644 --- a/hw/rtl/mem/VX_mem_arb.sv +++ b/hw/rtl/mem/VX_mem_arb.sv @@ -47,7 +47,7 @@ module VX_mem_arb #( wire [NUM_OUTPUTS-1:0][`UP(LOG_NUM_REQS)-1:0] req_sel_out; wire [NUM_OUTPUTS-1:0] req_ready_out; - for (genvar i = 0; i < NUM_INPUTS; ++i) begin + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_req_data_in assign req_valid_in[i] = bus_in_if[i].req_valid; assign req_data_in[i] = { bus_in_if[i].req_data.rw, @@ -78,7 +78,7 @@ module VX_mem_arb #( .ready_out (req_ready_out) ); - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_bus_out_if wire [TAG_WIDTH-1:0] req_tag_out; VX_bits_insert #( .N (TAG_WIDTH), @@ -111,11 +111,11 @@ module VX_mem_arb #( wire [NUM_OUTPUTS-1:0][RSP_DATAW-1:0] rsp_data_in; wire [NUM_OUTPUTS-1:0] rsp_ready_in; - if (NUM_INPUTS > NUM_OUTPUTS) begin + if (NUM_INPUTS > NUM_OUTPUTS) begin : g_rsp_enabled wire [NUM_OUTPUTS-1:0][LOG_NUM_REQS-1:0] rsp_sel_in; - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_rsp_data_in wire [TAG_WIDTH-1:0] rsp_tag_out; VX_bits_remove #( .N (TAG_WIDTH + LOG_NUM_REQS), @@ -130,9 +130,9 @@ module VX_mem_arb #( assign rsp_data_in[i] = {rsp_tag_out, bus_out_if[i].rsp_data.data}; assign bus_out_if[i].rsp_ready = rsp_ready_in[i]; - if (NUM_INPUTS > 1) begin + if (NUM_INPUTS > 1) begin : g_rsp_sel_in assign rsp_sel_in[i] = bus_out_if[i].rsp_data.tag[TAG_SEL_IDX +: LOG_NUM_REQS]; - end else begin + end else begin : g_no_rsp_sel_in assign rsp_sel_in[i] = '0; end end @@ -154,9 +154,9 @@ module VX_mem_arb #( .ready_out (rsp_ready_out) ); - end else begin + end else begin : g_passthru - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_rsp_data_in assign rsp_valid_in[i] = bus_out_if[i].rsp_valid; assign rsp_data_in[i] = { bus_out_if[i].rsp_data.tag, @@ -185,7 +185,7 @@ module VX_mem_arb #( end - for (genvar i = 0; i < NUM_INPUTS; ++i) begin + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_output assign bus_in_if[i].rsp_valid = rsp_valid_out[i]; assign { bus_in_if[i].rsp_data.tag, diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index 984686d3b..2def887e9 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -67,7 +67,7 @@ RTL_INCLUDE += -I$(AFU_DIR) -I$(AFU_DIR)/ccip TOP = vortex_afu_shim VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic -VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED +VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO VL_FLAGS += --x-initial unique --x-assign unique VL_FLAGS += -DSIMULATION -DSV_DPI VL_FLAGS += -DXLEN_$(XLEN) diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 591a2c226..24287aa56 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -48,7 +48,7 @@ endif VL_FLAGS = --exe VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic -VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED +VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO VL_FLAGS += --x-initial unique --x-assign unique VL_FLAGS += verilator.vlt VL_FLAGS += -DSIMULATION -DSV_DPI diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 63787e5b6..3e256ffb3 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -51,7 +51,7 @@ RTL_INCLUDE += -I$(AFU_DIR) TOP = vortex_afu_shim VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic -VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED +VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO VL_FLAGS += --x-initial unique --x-assign unique VL_FLAGS += -DSIMULATION -DSV_DPI VL_FLAGS += -DXLEN_$(XLEN) From 7208f251b771da6af7ed5f61df954d949009cbc9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 12 Sep 2024 20:07:19 -0700 Subject: [PATCH 258/488] minor update --- hw/rtl/VX_cluster.sv | 13 ++++--------- hw/rtl/VX_define.vh | 22 ++++++++++++++-------- hw/rtl/VX_socket.sv | 6 +++--- hw/rtl/Vortex.sv | 2 +- 4 files changed, 22 insertions(+), 21 deletions(-) diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index 6109e873a..73d9b34ab 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -116,22 +116,17 @@ module VX_cluster import VX_gpu_pkg::*; #( /////////////////////////////////////////////////////////////////////////// - VX_dcr_bus_if socket_dcr_bus_tmp_if(); - wire is_dcr_base_addr = (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END); - assign socket_dcr_bus_tmp_if.write_valid = dcr_bus_if.write_valid && is_dcr_base_addr; - assign socket_dcr_bus_tmp_if.write_addr = dcr_bus_if.write_addr; - assign socket_dcr_bus_tmp_if.write_data = dcr_bus_if.write_data; - wire [`NUM_SOCKETS-1:0] per_socket_busy; - VX_dcr_bus_if socket_dcr_bus_if(); - `BUFFER_DCR_BUS_IF (socket_dcr_bus_if, socket_dcr_bus_tmp_if, (`NUM_SOCKETS > 1)); - // Generate all sockets for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : g_sockets `RESET_RELAY (socket_reset, reset); + VX_dcr_bus_if socket_dcr_bus_if(); + wire is_base_dcr_addr = (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END); + `BUFFER_DCR_BUS_IF (socket_dcr_bus_if, dcr_bus_if, is_base_dcr_addr, (`NUM_SOCKETS > 1)) + VX_socket #( .SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id), .INSTANCE_ID ($sformatf("%s-socket%0d", INSTANCE_ID, socket_id)) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index fdd066434..502f794bb 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -390,16 +390,22 @@ assign src.rsp_data.tag = dst.rsp_data.tag[TD-1 -: TS]; \ assign dst.rsp_ready = src.rsp_ready -`define BUFFER_DCR_BUS_IF(dst, src, enable) \ +`define BUFFER_DCR_BUS_IF(dst, src, ena, latency) \ /* verilator lint_off GENUNNAMED */ \ - if (enable) begin \ - reg [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __dst; \ - always @(posedge clk) begin \ - __dst <= {src.write_valid, src.write_addr, src.write_data}; \ - end \ - assign {dst.write_valid, dst.write_addr, dst.write_data} = __dst; \ + if (latency != 0) begin \ + VX_pipe_register #( \ + .DATAW (1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH), \ + .RESETW (1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH), \ + .DEPTH (latency) \ + ) pipe_reg ( \ + .clk (clk), \ + .reset (reset), \ + .enable (1'b1), \ + .data_in ({src.write_valid && ena, src.write_addr, src.write_data}), \ + .data_out ({dst.write_valid, dst.write_addr, dst.write_data}) \ + ); \ end else begin \ - assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid, src.write_addr, src.write_data}; \ + assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid && ena, src.write_addr, src.write_data}; \ end \ /* verilator lint_on GENUNNAMED */ diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 766ff468a..c2b5746e8 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -198,14 +198,14 @@ module VX_socket import VX_gpu_pkg::*; #( wire [`SOCKET_SIZE-1:0] per_core_busy; - VX_dcr_bus_if core_dcr_bus_if(); - `BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, (`SOCKET_SIZE > 1)); - // Generate all cores for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : g_cores `RESET_RELAY (core_reset, reset); + VX_dcr_bus_if core_dcr_bus_if(); + `BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, 1'b1, (`SOCKET_SIZE > 1)); + VX_core #( .CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id), .INSTANCE_ID ($sformatf("%s-core%0d", INSTANCE_ID, core_id)) diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 0bdbec843..fad67be4c 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -134,7 +134,7 @@ module Vortex import VX_gpu_pkg::*; ( `RESET_RELAY (cluster_reset, reset); VX_dcr_bus_if cluster_dcr_bus_if(); - `BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1)); + `BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, 1'b1, (`NUM_CLUSTERS > 1)); VX_cluster #( .CLUSTER_ID (cluster_id), From 49ed88e59f51b73b33728911ac4f679a8e4b9318 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 12 Sep 2024 20:12:18 -0700 Subject: [PATCH 259/488] minor update --- hw/rtl/VX_socket.sv | 2 +- hw/rtl/Vortex.sv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index c2b5746e8..69ff88a2c 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -204,7 +204,7 @@ module VX_socket import VX_gpu_pkg::*; #( `RESET_RELAY (core_reset, reset); VX_dcr_bus_if core_dcr_bus_if(); - `BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, 1'b1, (`SOCKET_SIZE > 1)); + `BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, 1'b1, (`SOCKET_SIZE > 1)) VX_core #( .CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id), diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index fad67be4c..e07aaae4d 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -134,7 +134,7 @@ module Vortex import VX_gpu_pkg::*; ( `RESET_RELAY (cluster_reset, reset); VX_dcr_bus_if cluster_dcr_bus_if(); - `BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, 1'b1, (`NUM_CLUSTERS > 1)); + `BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, 1'b1, (`NUM_CLUSTERS > 1)) VX_cluster #( .CLUSTER_ID (cluster_id), From 1ddd1ba1ccf36440f8f768f856d062b69cfbbbb5 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 12 Sep 2024 20:15:41 -0700 Subject: [PATCH 260/488] minor update --- hw/rtl/libs/VX_decoder.sv | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/rtl/libs/VX_decoder.sv b/hw/rtl/libs/VX_decoder.sv index c5c7b8706..3e463326c 100644 --- a/hw/rtl/libs/VX_decoder.sv +++ b/hw/rtl/libs/VX_decoder.sv @@ -27,16 +27,16 @@ module VX_decoder #( input wire [M-1:0] valid_in, output wire [D-1:0][M-1:0] data_out ); + logic [D-1:0][M-1:0] shift; if (MODEL == 1) begin : g_model1 - reg [D-1:0][M-1:0] data_out_w; always @(*) begin - data_out_w = '0; - data_out_w[data_in] = valid_in; + shift = '0; + shift[data_in] = 1'b1; end - assign data_out = data_out_w; end else begin : g_model0 - assign data_out = (D*M)'(valid_in) << (data_in * M); + assign shift = (D*M)'(1'b1) << (data_in * M); end + assign data_out = {D{valid_in}} & shift; endmodule `TRACING_ON From 145eacc451ee552efc261997871ca56d9fb27c4b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 12 Sep 2024 21:08:19 -0700 Subject: [PATCH 261/488] minor update --- hw/rtl/libs/VX_pending_size.sv | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/hw/rtl/libs/VX_pending_size.sv b/hw/rtl/libs/VX_pending_size.sv index 475bbb36c..50737634f 100644 --- a/hw/rtl/libs/VX_pending_size.sv +++ b/hw/rtl/libs/VX_pending_size.sv @@ -167,7 +167,15 @@ module VX_pending_size #( end end - assign size = {full_r, used_r}; + if (SIZE > 1) begin : g_sizeN + if (SIZEW > ADDRW) begin : g_not_log2 + assign size = {full_r, used_r}; + end else begin : g_log2 + assign size = used_r; + end + end else begin : g_size1 + assign size = full_r; + end end From b77fff764e2c7b1acc3bd8d46e11332b2663da46 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 12 Sep 2024 22:12:03 -0700 Subject: [PATCH 262/488] minor update --- hw/rtl/libs/VX_bypass_buffer.sv | 20 ++--- hw/rtl/libs/VX_stream_buffer.sv | 147 ++++++++++++++++---------------- 2 files changed, 83 insertions(+), 84 deletions(-) diff --git a/hw/rtl/libs/VX_bypass_buffer.sv b/hw/rtl/libs/VX_bypass_buffer.sv index 14079395b..7378a4fdd 100644 --- a/hw/rtl/libs/VX_bypass_buffer.sv +++ b/hw/rtl/libs/VX_bypass_buffer.sv @@ -35,7 +35,15 @@ module VX_bypass_buffer #( input wire ready_out, output wire valid_out ); - if (PASSTHRU == 0) begin : g_buffer + if (PASSTHRU != 0) begin : g_passthru + + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + assign ready_in = ready_out; + assign valid_out = valid_in; + assign data_out = data_in; + + end else begin : g_buffer reg [DATAW-1:0] buffer; reg has_data; @@ -59,15 +67,7 @@ module VX_bypass_buffer #( assign data_out = has_data ? buffer : data_in; assign valid_out = valid_in || has_data; - end else begin : g_passthru - - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - assign ready_in = ready_out; - assign valid_out = valid_in; - assign data_out = data_in; - - end else + end endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_stream_buffer.sv b/hw/rtl/libs/VX_stream_buffer.sv index 7670b40fe..51e33db60 100644 --- a/hw/rtl/libs/VX_stream_buffer.sv +++ b/hw/rtl/libs/VX_stream_buffer.sv @@ -37,86 +37,85 @@ module VX_stream_buffer #( input wire ready_out, output wire valid_out ); - if (PASSTHRU == 0) begin : g_buffer - if (OUT_REG != 0) begin : g_with_reg + if (PASSTHRU != 0) begin : g_passthru - reg [DATAW-1:0] data_out_r; - reg [DATAW-1:0] buffer; - reg valid_out_r; - reg no_buffer; - - wire fire_in = valid_in && ready_in; - wire flow_out = ready_out || ~valid_out; - - always @(posedge clk) begin - if (reset) begin - valid_out_r <= 0; - no_buffer <= 1; - end else begin - if (flow_out) begin - no_buffer <= 1; - end else if (valid_in) begin - no_buffer <= 0; - end - if (flow_out) begin - valid_out_r <= valid_in || ~no_buffer; - end - end - end - - always @(posedge clk) begin - if (fire_in) begin - buffer <= data_in; - end - if (flow_out) begin - data_out_r <= no_buffer ? data_in : buffer; - end - end - - assign ready_in = no_buffer; - assign valid_out = valid_out_r; - assign data_out = data_out_r; - - end else begin : g_no_reg - - reg [1:0][DATAW-1:0] shift_reg; - reg [1:0] fifo_state; - - wire fire_in = valid_in && ready_in; - wire fire_out = valid_out && ready_out; - - always @(posedge clk) begin - if (reset) begin - fifo_state <= 2'b00; - end else begin - case ({fire_in, fire_out}) - 2'b10: fifo_state <= {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10 - 2'b01: fifo_state <= {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00 - default: fifo_state <= fifo_state; - endcase - end - end - - always @(posedge clk) begin - if (fire_in) begin - shift_reg[1] <= shift_reg[0]; - shift_reg[0] <= data_in; - end - end - - assign ready_in = ~fifo_state[1]; - assign valid_out = fifo_state[0]; - assign data_out = shift_reg[fifo_state[1]]; - - end - end else begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) assign ready_in = ready_out; assign valid_out = valid_in; assign data_out = data_in; - end + + end else if (OUT_REG != 0) begin : g_with_reg + + reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] buffer; + reg valid_out_r; + reg no_buffer; + + wire fire_in = valid_in && ready_in; + wire flow_out = ready_out || ~valid_out; + + always @(posedge clk) begin + if (reset) begin + valid_out_r <= 0; + no_buffer <= 1; + end else begin + if (flow_out) begin + no_buffer <= 1; + end else if (valid_in) begin + no_buffer <= 0; + end + if (flow_out) begin + valid_out_r <= valid_in || ~no_buffer; + end + end + end + + always @(posedge clk) begin + if (fire_in) begin + buffer <= data_in; + end + if (flow_out) begin + data_out_r <= no_buffer ? data_in : buffer; + end + end + + assign ready_in = no_buffer; + assign valid_out = valid_out_r; + assign data_out = data_out_r; + + end else begin : g_no_reg + + reg [1:0][DATAW-1:0] shift_reg; + reg [1:0] fifo_state; + + wire fire_in = valid_in && ready_in; + wire fire_out = valid_out && ready_out; + + always @(posedge clk) begin + if (reset) begin + fifo_state <= 2'b00; + end else begin + case ({fire_in, fire_out}) + 2'b10: fifo_state <= {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10 + 2'b01: fifo_state <= {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00 + default: fifo_state <= fifo_state; + endcase + end + end + + always @(posedge clk) begin + if (fire_in) begin + shift_reg[1] <= shift_reg[0]; + shift_reg[0] <= data_in; + end + end + + assign ready_in = ~fifo_state[1]; + assign valid_out = fifo_state[0]; + assign data_out = shift_reg[fifo_state[1]]; + + end endmodule `TRACING_ON - From 263893eb7c6a9593ffaa9d09085e1c1898790387 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 13 Sep 2024 00:03:08 -0700 Subject: [PATCH 263/488] minor update --- hw/rtl/VX_define.vh | 3 +-- hw/rtl/cache/VX_cache.sv | 6 +++--- hw/rtl/cache/VX_cache_bypass.sv | 1 + hw/rtl/cache/VX_cache_data.sv | 17 ++++++++++------- hw/rtl/core/VX_operands.sv | 8 ++++---- hw/rtl/libs/VX_transpose.sv | 5 +++-- 6 files changed, 22 insertions(+), 18 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 502f794bb..85fa40f0d 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -395,11 +395,10 @@ if (latency != 0) begin \ VX_pipe_register #( \ .DATAW (1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH), \ - .RESETW (1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH), \ .DEPTH (latency) \ ) pipe_reg ( \ .clk (clk), \ - .reset (reset), \ + .reset (1'b0), \ .enable (1'b1), \ .data_in ({src.write_valid && ena, src.write_addr, src.write_data}), \ .data_out ({dst.write_valid, dst.write_addr, dst.write_data}) \ diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index b6d3f9552..06887944c 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -310,7 +310,7 @@ module VX_cache import VX_gpu_pkg::*; #( end for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_bid - if (NUM_BANKS > 1) begin : g_multibank + if (NUM_BANKS > 1) begin : g_multibanks assign core_req_bid[i] = core_req_addr[i][WORD_SEL_BITS +: BANK_SEL_BITS]; end else begin : g_singlebank assign core_req_bid[i] = '0; @@ -448,7 +448,7 @@ module VX_cache import VX_gpu_pkg::*; #( if (NUM_BANKS == 1) begin : g_per_bank_mem_req_addr_multibanks assign per_bank_mem_req_addr[bank_id] = curr_bank_mem_req_addr; - end else begin : g_per_bank_mem_req_addr_one_bank + end else begin : g_per_bank_mem_req_addr_singlebank assign per_bank_mem_req_addr[bank_id] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, bank_id); end end @@ -521,7 +521,7 @@ module VX_cache import VX_gpu_pkg::*; #( if (NUM_BANKS > 1) begin : g_mem_req_tag_multibanks wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr); assign mem_req_tag = MEM_TAG_WIDTH'({bank_mem_req_tag, mem_req_bank_id}); - end else begin : g_mem_req_tag_one_bank + end else begin : g_mem_req_tag assign mem_req_tag = MEM_TAG_WIDTH'(bank_mem_req_tag); end diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index a60904d46..4b3b3a59a 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -268,6 +268,7 @@ module VX_cache_bypass #( for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_valid assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || (is_mem_rsp_nc && rsp_idx == REQ_SEL_WIDTH'(i)); end + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_ready assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; end diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 27844fd6f..04b0ff746 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -76,13 +76,16 @@ module VX_cache_data #( wire [`LOG2UP(NUM_WAYS)-1:0] way_idx; if (WRITEBACK) begin : g_dirty_data - wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] flipped_rdata; - for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin : g_flipped_rdata - for (genvar j = 0; j < NUM_WAYS; ++j) begin : g_j - assign flipped_rdata[j][i] = line_rdata[i][j]; - end - end - assign dirty_data = flipped_rdata[way_idx]; + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata; + VX_transpose #( + .DATAW (`CS_WORD_WIDTH), + .N (`CS_WORDS_PER_LINE), + .M (NUM_WAYS) + ) transpose ( + .data_in (line_rdata), + .data_out (transposed_rdata) + ); + assign dirty_data = transposed_rdata[way_idx]; end else begin : g_dirty_data_0 assign dirty_data = '0; end diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 066db15cd..2ca847394 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -90,9 +90,9 @@ module VX_operands import VX_gpu_pkg::*; #( end for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_req_bank_idx - if (NUM_BANKS != 1) begin : g_banks + if (NUM_BANKS != 1) begin : g_multibanks assign req_bank_idx[i] = src_opds[i][BANK_SEL_BITS-1:0]; - end else begin : g_1bank + end else begin : g_singlebank assign req_bank_idx[i] = '0; end end @@ -250,10 +250,10 @@ module VX_operands import VX_gpu_pkg::*; #( for (genvar b = 0; b < NUM_BANKS; ++b) begin : g_gpr_rams wire gpr_wr_enabled; - if (BANK_SEL_BITS != 0) begin : g_gpr_wr_enabled + if (BANK_SEL_BITS != 0) begin : g_gpr_wr_enabled_multibanks assign gpr_wr_enabled = writeback_if.valid && (gpr_wr_bank_idx == BANK_SEL_BITS'(b)); - end else begin : g_gpr_wr_enabled_1bank + end else begin : g_gpr_wr_enabled assign gpr_wr_enabled = writeback_if.valid; end diff --git a/hw/rtl/libs/VX_transpose.sv b/hw/rtl/libs/VX_transpose.sv index 769a78422..2fc0bd695 100644 --- a/hw/rtl/libs/VX_transpose.sv +++ b/hw/rtl/libs/VX_transpose.sv @@ -15,11 +15,12 @@ `TRACING_OFF module VX_transpose #( + parameter DATAW = 1, parameter N = 1, parameter M = 1 ) ( - input wire [N-1:0][M-1:0] data_in, - output wire [M-1:0][N-1:0] data_out + input wire [N-1:0][M-1:0][DATAW-1:0] data_in, + output wire [M-1:0][N-1:0][DATAW-1:0] data_out ); for (genvar i = 0; i < N; ++i) begin : g_i for (genvar j = 0; j < M; ++j) begin : g_j From bbe9c0372fc893197fdd77f0095422a7596159a9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 13 Sep 2024 00:35:42 -0700 Subject: [PATCH 264/488] minor update --- hw/rtl/libs/VX_decoder.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/libs/VX_decoder.sv b/hw/rtl/libs/VX_decoder.sv index 3e463326c..7c0c760e5 100644 --- a/hw/rtl/libs/VX_decoder.sv +++ b/hw/rtl/libs/VX_decoder.sv @@ -31,10 +31,10 @@ module VX_decoder #( if (MODEL == 1) begin : g_model1 always @(*) begin shift = '0; - shift[data_in] = 1'b1; + shift[data_in] = {M{1'b1}}; end end else begin : g_model0 - assign shift = (D*M)'(1'b1) << (data_in * M); + assign shift = ((D*M)'({M{1'b1}})) << (data_in * M); end assign data_out = {D{valid_in}} & shift; From dc7610106873d46e280e74fe406f08d2cf9c6d2d Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Fri, 13 Sep 2024 09:09:38 -0400 Subject: [PATCH 265/488] contribution stats --- docs/contributing.md | 36 +++++++++++++++++++++++++++++++----- docs/environment_setup.md | 23 +++++++++++++++-------- 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/docs/contributing.md b/docs/contributing.md index 14e0ccd0c..5264454d2 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -1,11 +1,37 @@ -# Contributing to Vortex on Github +# Contributing to Vortex -## Github Details -- There are two main repos, `vortex` (public, this one) and `vortex-dev` (private) -- todo: Most current development is on `vortex` -- If you have a legacy version of `vortex`, you can use the releases branch or tags to access the repo at that point in time +## Github +Vortex uses Github to host its git repositories. +There are a lot of ways to use the features on Github for collaboration. +Therefore, this documentation details the standard procedure for contributing to Vortex. +Development of Vortex is consolidated to this repo, `vortex` and any associated forks. +Previously, there was active work done on a private repo named `vortex-dev`. +`vortex-dev` has officially been deprecated and fully merged into this public repo, `vortex`. +If you are returning to this project and have legacy versions of Vortex, you can use the releases branches to access older versions. ## Contribution Process +In an effort to keep `vortex` organized, permissions to directly create branches and push code has been limited to admins. +However, contributions are strongly encouraged and keep the project moving forward! Here is the procedure for contributing: + +1. Create a fork of `vortex` +2. In your fork, create a branch that briefly explains the work you are adding (ie: `develop-documentation`) branches from `develop` and adds some documentation +3. Make your changes on your new branch in your fork. You may create as many commits as you need, which might be common if you are making multiple iterations +4. Since you are the owner of your fork, you have full permissions to push commits to your fork +4. When you are satisfied with the changes on your fork, you can open a PR from your fork using the online interface +5. If you recently made a push, you will get automatically get a prompt on Github online to create a PR, which you can press +6. Otherwise, you can go to your fork on Github online and manually create a PR (todo) +(todo): how to name and format your PR, what information you should add to the PR, does not need to be too strict if you are attending the weekly meetings* +7. Github uses the following semantics: `base repository` gets the changes from your `head repository` +8. Therefore, you should set the `base repository` to `vortexgpgpu/vortex` and the `base` branch to `develop` since active development should only be added to this branch +9. And you should assign the `head repository` to `/vortex` (which represents your fork of vortex) and the `base` branch to the one created in step 2 +10. Now that your intended PR has been specified, you should review the status. Check for merge conflicts, if all your commits are present, and all the modified files make sense +11. You can still make a PR if there are issues in step 10, just make sure the structure is correct according to steps 7-9 +12. Once the PR is made, the CI pipeline will run automatically, testing your changes +13. Remember, a PR is flexible if you need to make changes to the code you can go back to your branch of the fork to commit and push any updates +14. As long as the `head repository`'s `base` branch is the one you edited, the PR will automatically get the most recent changes +15. When all merge conflicts are resolved, changes are made, and tests pass you can have an admin merge your PR + + - You should create a new branch from develop that is clearly named with the feature that you want to add - Avoid pushing directly to the `master` branch instead you will need to make a Pull Request (PR) - There should be protections in place that prevent pushing directly to the main branch, but don't rely on it diff --git a/docs/environment_setup.md b/docs/environment_setup.md index 1c15495fa..be0a1328a 100644 --- a/docs/environment_setup.md +++ b/docs/environment_setup.md @@ -1,30 +1,35 @@ -# Environment Setup# Vortex Dev Environment Setup -These instructions apply to the development vortex repo using the *updated toolchain*. The updated toolchain is considered to be any commit of `master` pulled from *July 2, 2023* onwards. The toolchain update in question can be viewed in this [commit](https://github.com/vortexgpgpu/vortex-dev/commit/0048496ba28d7b9a209a0e569d52d60f2b68fc04). Therefore, if you are unsure whether you are using the new toolchain or not, then you should check the `ci` folder for the existence of the `toolchain_prebuilt.sh` script. Furthermore, you should notice that the `toolchain_install.sh` script has the legacy `llvm()` split into `llvm-vortex()` and `llvm-pocl()`. +# Environment Setup + +These instructions apply to the development vortex repo using the _updated toolchain_. The updated toolchain is considered to be any commit of `master` pulled from _July 2, 2023_ onwards. The toolchain update in question can be viewed in this [commit](https://github.com/vortexgpgpu/vortex-dev/commit/0048496ba28d7b9a209a0e569d52d60f2b68fc04). Therefore, if you are unsure whether you are using the new toolchain or not, then you should check the `ci` folder for the existence of the `toolchain_prebuilt.sh` script. Furthermore, you should notice that the `toolchain_install.sh` script has the legacy `llvm()` split into `llvm-vortex()` and `llvm-pocl()`. > Note: As it stands right now, there a few test suites which are not working due to this toolchain migration. We are working to determine an exact list of which ones are working and which ones are not. For now, if the repo builds at a minimum, then you can consider all these steps to have worked successfully. ## Choosing an Development Environment + There are three primary environments you can use. Each has its own pros and cons. Refer to this section to help you determine which environment best suits your needs. + 1. Volvo 2. Docker 3. Local ### Volvo + Volvo is a server provided by Georgia Tech. As such, it provides high performance compute, but you need valid credentials to access it. If you don't already have credentials, you can get in contact with your mentor to ask about setting your account up. Pros: -1. Native x86_64 architecture, AMD EPYC 7702P 64-Core Processor (*fast*) +1. Native x86_64 architecture, AMD EPYC 7702P 64-Core Processor (_fast_) 2. Packages and difficult configurations are already done for you 3. Consistent environment as others, allowing for easier troubleshooting 4. Just need to SSH into Volvo, minimal impact on local computer resources 5. VScode remote development tools are phenomenal over SSH Cons: + 1. Volvo is accessed via gatech vpn, external contributors might encounter issues with it -- especially from other university networks 2. Account creation is not immediate and is subject to processing time -3. Volvo might have outtages (*pretty uncommon*) -5. SSH development requires internet and other remote development tools (*vscode works!*) +3. Volvo might have outtages (_pretty uncommon_) +4. SSH development requires internet and other remote development tools (_vscode works!_) ### Docker @@ -44,18 +49,21 @@ Cons: 2. Limited to your computer's performance, and Vortex is a large repo to build 3. Will utilize a few gigabytes of storage on your computer for saving binaries to run the container - ### Local + You can reverse engineer the Dockerfile and scripts above to get a working environment setup locally. This option is for experienced users, who have already considered the pros and cons of Volvo and Docker. ## Setup on Volvo + 1. Clone Repo Recursively: `git clone --recursive https://github.com/vortexgpgpu/vortex-dev.git` 2. Source `/opt/set_vortex_env_dev.sh` to initialize pre-installed toolchain 3. `make -s` in `vortex-dev` root directory 4. Run a test program: `./ci/blackbox.sh --cores=2 --app=dogfood` ## Setup with Docker + Currently the Dockerfile is not included with the official vortex-dev repository, however you can quickly add it to repo and get started. + 1. Clone repo recursively onto your local machine: `git clone --recursive https://github.com/vortexgpgpu/vortex-dev.git` 2. Download a copy of `Dockerfile.dev` and place it in the root of the repo. 3. Build the Dockerfile into an image: `docker build --platform=linux/amd64 -t vortex-dev -f Dockerfile.dev .` @@ -64,8 +72,7 @@ Currently the Dockerfile is not included with the official vortex-dev repository 6. `make -s` in `vortex-dev` root directory 7. Run a test program: `./ci/blackbox.sh --cores=2 --app=dogfood` - ### Additional Docker Commands + - Exit from a container (does not stop or remove it) - Resume a container you have exited or start a second terminal session `docker exec -it bash` - From 0a48d98bc12b60e65a22469cb61d4b428c9e05f3 Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Fri, 13 Sep 2024 09:39:28 -0400 Subject: [PATCH 266/488] Update README.md It has the instruction about the other branch(Vortex_vm). --- README.md | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 4322f06bc..ec8d10bd5 100644 --- a/README.md +++ b/README.md @@ -59,20 +59,17 @@ sudo apt-get install git ``` ### Configure your build folder ```sh - # - # By default, the toolchain default install location is the /opt folder and can be overridden by setting --tooldir. - # This is the example for volvo server mkdir build - mkdir out - export OUT_DIR=`pwd`/out cd build - # Run the following to disble virtual memory feature in compilation - ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR - # Run the following instead to enable virtual memory feature in compilation - ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR --vm_enable=1 + # for 32bit + ../configure --xlen=32 --tooldir=$HOME/tools + # for 64bit + ../configure --xlen=64 --tooldir=$HOME/tools ``` ### Install prebuilt toolchain - # We will use the precomipled tools in volvo toolchanin directory +```sh + ./ci/toolchain_install.sh --all +``` ### set environment variables ```sh # should always run before using the toolchain! @@ -82,7 +79,6 @@ sudo apt-get install git ```sh make -s ``` - ### Quick demo running vecadd OpenCL kernel on 2 cores ```sh ./ci/blackbox.sh --cores=2 --app=vecadd From 50458bbae04b73fa0813b92f00bc7f00767517ba Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 17 Sep 2024 06:22:07 -0700 Subject: [PATCH 267/488] xilinx synthesis debugging foxes --- configure | 2 +- hw/rtl/VX_platform.vh | 17 + hw/rtl/afu/xrt/VX_afu_wrap.sv | 39 +- hw/rtl/afu/xrt/vortex_afu.v | 22 +- hw/scripts/{ip_gen.sh => altera_ip_gen.sh} | 0 hw/scripts/ila_insert.tcl | 231 ++++++ .../gen_ip.tcl => scripts/xilinx_ip_gen.tcl} | 0 hw/syn/altera/dut/Makefile | 2 +- hw/syn/altera/opae/Makefile | 2 +- hw/syn/xilinx/README | 2 +- hw/syn/xilinx/dut/common.mk | 4 +- hw/syn/xilinx/dut/project.tcl | 8 +- hw/syn/xilinx/{scripts => }/gen_xo.tcl | 6 +- hw/syn/xilinx/{scripts => }/kill_build.sh | 0 hw/syn/xilinx/{scripts => }/kill_hwserver.sh | 0 hw/syn/xilinx/{scripts => }/kill_sim.sh | 0 .../xilinx/{scripts => }/package_kernel.tcl | 39 +- hw/syn/xilinx/sandbox/Makefile | 22 +- hw/syn/xilinx/sandbox/project.tcl.in | 721 +++++++++--------- hw/syn/xilinx/xrt/Makefile | 5 +- sim/simx/cache_sim.cpp | 1 + 21 files changed, 710 insertions(+), 413 deletions(-) rename hw/scripts/{ip_gen.sh => altera_ip_gen.sh} (100%) create mode 100644 hw/scripts/ila_insert.tcl rename hw/{syn/xilinx/scripts/gen_ip.tcl => scripts/xilinx_ip_gen.tcl} (100%) rename hw/syn/xilinx/{scripts => }/gen_xo.tcl (94%) rename hw/syn/xilinx/{scripts => }/kill_build.sh (100%) rename hw/syn/xilinx/{scripts => }/kill_hwserver.sh (100%) rename hw/syn/xilinx/{scripts => }/kill_sim.sh (100%) rename hw/syn/xilinx/{scripts => }/package_kernel.tcl (86%) diff --git a/configure b/configure index de04b648b..bbeda59c9 100755 --- a/configure +++ b/configure @@ -65,7 +65,7 @@ copy_files() { filename_no_ext="${filename%.in}" dest_file="$dest_dir/$filename_no_ext" mkdir -p "$dest_dir" - sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g" "$file" > "$dest_file" + sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@CURRENTDIR@|$CURRENT_DIR|g" "$file" > "$dest_file" # apply permissions to bash scripts read -r firstline < "$dest_file" if [[ "$firstline" =~ ^#!.*bash ]]; then diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index f2d0f6a36..7f6805c50 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -56,8 +56,25 @@ `define UNUSED_PIN(x) . x () `define UNUSED_ARG(x) x +`define __SCOPE (* mark_debug="true" *) + +`define __SCOPE_X + +`define __SCOPE_ON \ + `undef __SCOPE_X \ + `define __SCOPE_X `__SCOPE + +`define __SCOPE_OFF \ + `undef __SCOPE_X \ + `define __SCOPE_X + `else // not SYNTHESIS +`define __SCOPE +`define __SCOPE_X +`define __SCOPE_ON +`define __SCOPE_OFF + `ifdef VERILATOR `ifndef TRACING_ALL diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index c92d94c7c..9872ae3c1 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -21,8 +21,8 @@ module VX_afu_wrap #( parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH ) ( // System signals - input wire ap_clk, - input wire ap_rst_n, + input wire clk, + input wire reset, // AXI4 master interface `REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA), @@ -82,8 +82,6 @@ module VX_afu_wrap #( // convert memory interface to array `REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON); - wire reset = ~ap_rst_n; - reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; reg [15:0] vx_pending_writes; reg vx_busy_wait; @@ -122,7 +120,7 @@ module VX_afu_wrap #( end end - always @(posedge ap_clk) begin + always @(posedge clk) begin if (reset || ap_reset) begin state <= STATE_IDLE; vx_pending_writes <= '0; @@ -187,7 +185,7 @@ module VX_afu_wrap #( .AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), .AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) ) afu_ctrl ( - .clk (ap_clk), + .clk (clk), .reset (reset), .s_axi_awvalid (s_axi_ctrl_awvalid), @@ -245,7 +243,7 @@ module VX_afu_wrap #( ) vortex_axi ( `SCOPE_IO_BIND (1) - .clk (ap_clk), + .clk (clk), .reset (vx_reset), .m_axi_awvalid (m_axi_mem_awvalid_a), @@ -301,9 +299,32 @@ module VX_afu_wrap #( // SCOPE ////////////////////////////////////////////////////////////////////// +`ifdef CHIPSCOPE + ila_afu ila_afu_inst ( + .clk (clk), + .probe0 ({ + ap_reset, + ap_start, + ap_done, + ap_idle, + interrupt + }), + .probe1 ({ + vx_pending_writes, + vx_busy_wait, + vx_busy, + vx_reset, + dcr_wr_valid, + dcr_wr_addr, + dcr_wr_data + }) + ); +`endif + `ifdef DBG_SCOPE_AFU `define TRIGGERS { \ reset, \ + ap_reset, \ ap_start, \ ap_done, \ ap_idle, \ @@ -343,7 +364,7 @@ module VX_afu_wrap #( initial begin $assertoff(0, vortex_axi); end - always @(posedge ap_clk) begin + always @(posedge clk) begin if (reset) begin assert_delay_ctr <= '0; assert_enabled <= 0; @@ -362,7 +383,7 @@ module VX_afu_wrap #( `endif `ifdef DBG_TRACE_AFU - always @(posedge ap_clk) begin + always @(posedge clk) begin for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin if (m_axi_mem_awvalid_a[i] && m_axi_mem_awready_a[i]) begin `TRACE(2, ("%t: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i])) diff --git a/hw/rtl/afu/xrt/vortex_afu.v b/hw/rtl/afu/xrt/vortex_afu.v index 2c31900cb..1973ec0aa 100644 --- a/hw/rtl/afu/xrt/vortex_afu.v +++ b/hw/rtl/afu/xrt/vortex_afu.v @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,12 +18,12 @@ module vortex_afu #( parameter C_S_AXI_CTRL_DATA_WIDTH = 32, parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH, parameter C_M_AXI_MEM_ADDR_WIDTH = 64, - parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH + parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH ) ( // System signals input wire ap_clk, input wire ap_rst_n, - + // AXI4 master interface `REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA), @@ -45,8 +45,8 @@ module vortex_afu #( output wire s_axi_ctrl_bvalid, input wire s_axi_ctrl_bready, output wire [1:0] s_axi_ctrl_bresp, - - output wire interrupt + + output wire interrupt ); VX_afu_wrap #( @@ -56,14 +56,14 @@ module vortex_afu #( .C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), .C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH) ) afu_wrap ( - .ap_clk (ap_clk), - .ap_rst_n (ap_rst_n), + .clk (ap_clk), + .reset (~ap_rst_n), `REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA), - + .s_axi_ctrl_awvalid (s_axi_ctrl_awvalid), .s_axi_ctrl_awready (s_axi_ctrl_awready), - .s_axi_ctrl_awaddr (s_axi_ctrl_awaddr), + .s_axi_ctrl_awaddr (s_axi_ctrl_awaddr), .s_axi_ctrl_wvalid (s_axi_ctrl_wvalid), .s_axi_ctrl_wready (s_axi_ctrl_wready), .s_axi_ctrl_wdata (s_axi_ctrl_wdata), @@ -81,5 +81,5 @@ module vortex_afu #( .interrupt (interrupt) ); - + endmodule diff --git a/hw/scripts/ip_gen.sh b/hw/scripts/altera_ip_gen.sh similarity index 100% rename from hw/scripts/ip_gen.sh rename to hw/scripts/altera_ip_gen.sh diff --git a/hw/scripts/ila_insert.tcl b/hw/scripts/ila_insert.tcl new file mode 100644 index 000000000..de9f0eec0 --- /dev/null +++ b/hw/scripts/ila_insert.tcl @@ -0,0 +1,231 @@ +###################################################################### +# Automatically inserts ILA instances in a batch flow, and calls "implement_debug_core". Can also be used in a GUI flow +# This should ONLY be invoked after synthesis, and before opt_design. If opt_design is called first, marked nets may be missing and not found +# Warning: Currently will skip a net if it has no obvious clock domain on the driver. Nets connected to input buffers will be dropped unless "mark_debug_clock" is attached to the net. +# Nets attached to VIO cores have the "mark_debug" attribute, and will be filtered out unless the "mark_debug_valid" attribute is attached. +# Supports the following additional attributes beyond "mark_debug" +# attribute mark_debug_valid of X : signal is "true"; -- Marks a net for ILA capture, even if net is also attached to a VIO core +# attribute mark_debug_clock of X : signal is "inst1_bufg/clock"; -- Specifies clock net to use for capturing this net. May create a new ILA core for that clock domain +# attribute mark_debug_depth of X : signal is "4096"; -- overrides default depth for this ILA core. valid values: 1024, 2048, ... 132072. Last attribute that is scanned will win. +# attribute mark_debug_adv_trigger of X : signal is "true"; -- specifies that advanced trigger capability will be added to ILA core +# Engineer: J. McCluskey +proc insert_ila { depth } { + # sequence through debug nets and organize them by clock in the + # clock_list array. Also create max and min array for bus indices + set dbgs [get_nets -hierarchical -filter {MARK_DEBUG}] + if {[llength $dbgs] == 0} { + puts "No debug net found. No ILA cores created" + return + } + + # process list of nets to find and reject nets that are attached to VIO cores. + # This has a side effect that VIO nets can't be monitored with an ILA + # This can be overridden by using the attribute "mark_debug_valid" = "true" on a net like this. + set net_list {} + foreach net $dbgs { + if { [get_property -quiet MARK_DEBUG_VALID $net] != "true" } { + set pin_list [get_pins -of_objects [get_nets -segments $net]] + set not_vio_net 1 + foreach pin $pin_list { + if { [get_property IS_DEBUG_CORE [get_cells -of_object $pin]] == 1 } { + # It seems this net is attached to a debug core (i.e. VIO core) already, so we should skip adding it to the netlist + set not_vio_net 0 + break + } + } + if { $not_vio_net == 1 } { lappend net_list $net; } + } else { + lappend net_list $net + } + } + + # check again to see if we have any nets left now + if {[llength $net_list] == 0} { + puts "All nets with MARK_DEBUG are already connected to VIO cores. No ILA cores created" + return + } + + # Now that the netlist has been filtered, determine bus names and clock domains + foreach d $net_list { + # name is root name of a bus, index is the bit index in the bus + set name [regsub {\[[[:digit:]]+\]$} $d {}] + set index [regsub {^.*\[([[:digit:]]+)\]$} $d {\1}] + if {[string is integer -strict $index]} { + if {![info exists max($name)]} { + set max($name) $index + set min($name) $index + } elseif {$index > $max($name)} { + set max($name) $index + } elseif {$index < $min($name)} { + set min($name) $index + } + } else { + set max($name) -1 + } + # Now we search for the local clock net associated with the target net. + # There may be ambiguities or no answer in some cases + if {![info exists clocks($name)]} { + # does MARK_DEBUG_CLOCK decorate this net? If not, then search backwards to the driver cell + set clk_name [get_property -quiet MARK_DEBUG_CLOCK $d] + if { [llength $clk_name] == 0 } { + # trace to the clock net, tracing backwards via the driver pin. + set driver_pin [get_pins -filter {DIRECTION == "OUT" && IS_LEAF == TRUE } -of_objects [ get_nets -segments $d ]] + set driver_cell [get_cells -of_objects $driver_pin] + if { [get_property IS_SEQUENTIAL $driver_cell] == 1 } { + set timing_arc [get_timing_arcs -to $driver_pin] + set cell_clock_pin [get_pins -filter {IS_CLOCK} [get_property FROM_PIN $timing_arc]] + if { [llength $cell_clock_pin] > 1 } { + puts "Error: in insert_ila. Found more than 1 clock pin in driver cell $driver_cell with timing arc $timing_arc for net $d" + continue + } + } else { + # our driver cell is a LUT or LUTMEM in combinatorial mode, we need to trace further. + set paths [get_timing_paths -quiet -through $driver_pin ] + if { [llength $paths] > 0 } { + # note that here we arbitrarily select the start point of the FIRST timing path... there might be multiple clocks with timing paths for this net. + # use MARK_DEBUG_CLOCK to specify another clock in this case. + set cell_clock_pin [get_pins [get_property STARTPOINT_PIN [lindex $paths 0]]] + } else { + # Can't find any timing path, so skip the net, and warn the user. + puts "Critical Warning: from insert_ila.tcl Can't trace any clock domain on driver of net $d" + puts "Please attach the attribute MARK_DEBUG_CLOCK with a string containing the net name of the desired sampling clock, .i.e." + puts "attribute mark_debug_clock of $d : signal is \"inst_bufg/clk\";" + continue + } + } + # clk_net will usually be a list of net segments, which needs filtering to determine the net connected to the driver pin + set clk_net [get_nets -segments -of_objects $cell_clock_pin] + } else { + set clk_net [get_nets -segments $clk_name] + if { [llength $clk_net] == 0 } { puts "MARK_DEBUG_CLOCK attribute on net $d does not match any known net. Please fix."; continue; } + } + # trace forward to net actually connected to clock buffer output, not any of the lower level segment names + set clocks($name) [get_nets -of_objects [get_pins -filter {DIRECTION == "OUT" && IS_LEAF == TRUE } -of_objects $clk_net]] + if { [llength $clocks($name)] == 0 } { + puts "Critical Warning: from insert_ila.tcl Can't trace any clock domain on driver of net $d" + puts "Please attach the attribute MARK_DEBUG_CLOCK with a string containing the net name of the desired sampling clock, .i.e." + puts "attribute mark_debug_clock of $d : signal is \"inst_bufg/clk\";" + continue + } + if {![info exists clock_list($clocks($name))]} { + # found a new clock + puts "New clock found is $clocks($name)" + set clock_list($clocks($name)) [list $name] + set ila_depth($clocks($name)) $depth + set ila_adv_trigger($clocks($name)) false + } else { + lappend clock_list($clocks($name)) $name + } + # Does this net have a "MARK_DEBUG_DEPTH" attribute attached? + set clk_depth [get_property -quiet MARK_DEBUG_DEPTH $d] + if { [llength $clk_depth] != 0 } { + set ila_depth($clocks($name)) $clk_depth + } + # Does this net have a "MARK_DEBUG_ADV_TRIGGER" attribute attached? + set trigger [get_property -quiet MARK_DEBUG_ADV_TRIGGER $d] + if { $trigger == "true" } { + set ila_adv_trigger($clocks($name)) true + } + } + } + + set ila_count 0 + set trig_out "" + set trig_out_ack "" + + if { [llength [array names clock_list]] > 1 } { + set enable_trigger true + } else { + set enable_trigger false + } + + foreach c [array names clock_list] { + # Now build and connect an ILA core for each clock domain + [incr ila_count ] + set ila_inst "ila_$ila_count" + # first verify if depth is a member of the set, 1024, 2048, 4096, 8192, ... 131072 + if { $ila_depth($c) < 1024 || [expr $ila_depth($c) & ($ila_depth($c) - 1)] || $ila_depth($c) > 131072 } { + # Depth is not right... lets fix it, and continue + if { $ila_depth($c) < 1024 } { + set new_depth 1024 + } elseif { $ila_depth($c) > 131072 } { + set new_depth 131072 + } else { + # round value to next highest power of 2, (in log space) + set new_depth [expr 1 << int( log($ila_depth($c))/log(2) + .9999 )] + } + puts "Can't create ILA core $ila_inst with depth of $ila_depth($c)! Changed capture depth to $new_depth" + set ila_depth($c) $new_depth + } + # create ILA and connect its clock + puts "Creating ILA $ila_inst with clock $c, capture depth $ila_depth($c) and advanced trigger = $ila_adv_trigger($c)" + create_debug_core $ila_inst ila + if { $ila_adv_trigger($c) } { set mu_cnt 4; } else { set mu_cnt 2; } + set_property C_DATA_DEPTH $ila_depth($c) [get_debug_cores $ila_inst] + set_property C_TRIGIN_EN $enable_trigger [get_debug_cores $ila_inst] + set_property C_TRIGOUT_EN $enable_trigger [get_debug_cores $ila_inst] + set_property C_ADV_TRIGGER $ila_adv_trigger($c) [get_debug_cores $ila_inst] + set_property C_INPUT_PIPE_STAGES 1 [get_debug_cores $ila_inst] + set_property C_EN_STRG_QUAL true [get_debug_cores $ila_inst] + set_property ALL_PROBE_SAME_MU true [get_debug_cores $ila_inst] + set_property ALL_PROBE_SAME_MU_CNT $mu_cnt [get_debug_cores $ila_inst] + set_property port_width 1 [get_debug_ports $ila_inst/clk] + connect_debug_port $ila_inst/clk $c + # hookup trigger ports in a circle if more than one ILA is created + if { $enable_trigger == true } { + create_debug_port $ila_inst trig_in + create_debug_port $ila_inst trig_in_ack + create_debug_port $ila_inst trig_out + create_debug_port $ila_inst trig_out_ack + if { $trig_out != "" } { + connect_debug_port $ila_inst/trig_in [get_nets $trig_out] + } + if { $trig_out_ack != "" } { + connect_debug_port $ila_inst/trig_in_ack [get_nets $trig_out_ack] + } + set trig_out ${ila_inst}_trig_out_$ila_count + create_net $trig_out + connect_debug_port $ila_inst/trig_out [get_nets $trig_out] + set trig_out_ack ${ila_inst}_trig_out_ack_$ila_count + create_net $trig_out_ack + connect_debug_port $ila_inst/trig_out_ack [get_nets $trig_out_ack] + } + # add probes + set nprobes 0 + foreach n [lsort $clock_list($c)] { + set nets {} + if {$max($n) < 0} { + lappend nets [get_nets $n] + } else { + # n is a bus name + for {set i $min($n)} {$i <= $max($n)} {incr i} { + lappend nets [get_nets $n[$i]] + } + } + set prb probe$nprobes + if {$nprobes > 0} { + create_debug_port $ila_inst probe + } + set_property port_width [llength $nets] [get_debug_ports $ila_inst/$prb] + connect_debug_port $ila_inst/$prb $nets + incr nprobes + } + } + + # at this point, we need to complete the circular connection of trigger outputs and acks + if { $enable_trigger == true } { + connect_debug_port ila_1/trig_in [get_nets $trig_out] + connect_debug_port ila_1/trig_in_ack [get_nets $trig_out_ack] + } + set project_found [get_projects -quiet] + if { $project_found != "New Project" } { + puts "Saving constraints now in project [current_project -quiet]" + save_constraints_as debug_constraints.xdc + } + + # run ILA cores implementation + implement_debug_core + + # write out probe info file + write_debug_probes -force debug_nets.ltx +} \ No newline at end of file diff --git a/hw/syn/xilinx/scripts/gen_ip.tcl b/hw/scripts/xilinx_ip_gen.tcl similarity index 100% rename from hw/syn/xilinx/scripts/gen_ip.tcl rename to hw/scripts/xilinx_ip_gen.tcl diff --git a/hw/syn/altera/dut/Makefile b/hw/syn/altera/dut/Makefile index 5f1dd62fe..e5655c5fd 100644 --- a/hw/syn/altera/dut/Makefile +++ b/hw/syn/altera/dut/Makefile @@ -13,7 +13,7 @@ IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY) ip-gen: $(IP_CACHE_DIR)/ip_gen.log $(IP_CACHE_DIR)/ip_gen.log: - $(SCRIPT_DIR)/ip_gen.sh $(IP_CACHE_DIR) + $(SCRIPT_DIR)/altera_ip_gen.sh $(IP_CACHE_DIR) unittest: mkdir -p unittest/$(BUILD_DIR) diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index 53b1210d8..a3d373cb0 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -102,7 +102,7 @@ all: swconfig ip-gen setup build ip-gen: $(IP_CACHE_DIR)/ip-gen.log $(IP_CACHE_DIR)/ip-gen.log: - $(SCRIPT_DIR)/ip_gen.sh $(IP_CACHE_DIR) + $(SCRIPT_DIR)/altera_ip_gen.sh $(IP_CACHE_DIR) swconfig: vortex_afu.h vortex_afu.h: $(SRC_DIR)/vortex_afu.json diff --git a/hw/syn/xilinx/README b/hw/syn/xilinx/README index 563c4c17e..17d398dfa 100644 --- a/hw/syn/xilinx/README +++ b/hw/syn/xilinx/README @@ -5,7 +5,7 @@ platforminfo -l xbutil validate --device 0000:09:00.1 --verbose # generate FPU IPs -vivado -mode batch -source scripts/gen_ip.tcl -tclargs ip/xilinx_u50_gen3x16_xdma_5_202210_1 +vivado -mode batch -source xilinx_ip_gen.tcl -tclargs ip/xilinx_u50_gen3x16_xdma_5_202210_1 # build FPGA PREFIX=build_base_1c NUM_CORES=1 TARGET=hw_emu PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make > build_u50_hw_emu_base_1c.log 2>&1 & diff --git a/hw/syn/xilinx/dut/common.mk b/hw/syn/xilinx/dut/common.mk index b2a8e71c7..933621bef 100644 --- a/hw/syn/xilinx/dut/common.mk +++ b/hw/syn/xilinx/dut/common.mk @@ -31,9 +31,9 @@ project_1/sources.txt: build: $(PROJECT).xpr $(PROJECT).xpr: project_1/sources.txt ifdef FPU_IP - MAX_JOBS=$(JOBS) FPU_IP=project_1/ip $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) $(SRC_DIR)/../scripts + MAX_JOBS=$(JOBS) FPU_IP=project_1/ip $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) else - MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) $(SRC_DIR)/../scripts + MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) endif clean: diff --git a/hw/syn/xilinx/dut/project.tcl b/hw/syn/xilinx/dut/project.tcl index e23ce2997..dcaf883fa 100644 --- a/hw/syn/xilinx/dut/project.tcl +++ b/hw/syn/xilinx/dut/project.tcl @@ -14,9 +14,9 @@ # Start time set start_time [clock seconds] -if { $::argc != 6 } { +if { $::argc != 5 } { puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" - puts "Usage: $::argv0 \n" + puts "Usage: $::argv0 \n" exit } @@ -28,14 +28,12 @@ set device_part [lindex $::argv 1] set vcs_file [lindex $::argv 2] set xdc_file [lindex $::argv 3] set tool_dir [lindex $::argv 4] -set script_dir [lindex $::argv 5] puts "Using top_module=$top_module" puts "Using device_part=$device_part" puts "Using vcs_file=$vcs_file" puts "Using xdc_file=$xdc_file" puts "Using tool_dir=$tool_dir" -puts "Using script_dir=$script_dir" # Set the number of jobs based on MAX_JOBS environment variable if {[info exists ::env(MAX_JOBS)]} { @@ -50,7 +48,7 @@ if {[info exists ::env(FPU_IP)]} { set ip_dir $::env(FPU_IP) set argv [list $ip_dir $device_part] set argc 2 - source ${script_dir}/gen_ip.tcl + source ${tool_dir}/xilinx_ip_gen.tcl } source "${tool_dir}/parse_vcs_list.tcl" diff --git a/hw/syn/xilinx/scripts/gen_xo.tcl b/hw/syn/xilinx/gen_xo.tcl similarity index 94% rename from hw/syn/xilinx/scripts/gen_xo.tcl rename to hw/syn/xilinx/gen_xo.tcl index 0f95f09be..7d3342a4c 100644 --- a/hw/syn/xilinx/scripts/gen_xo.tcl +++ b/hw/syn/xilinx/gen_xo.tcl @@ -1,10 +1,10 @@ # Copyright © 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -12,7 +12,7 @@ # limitations under the License. if { $::argc != 5 } { - puts "ERROR: Program \"$::argv0\" requires 4 arguments!\n" + puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" puts "Usage: $::argv0 \n" exit } diff --git a/hw/syn/xilinx/scripts/kill_build.sh b/hw/syn/xilinx/kill_build.sh similarity index 100% rename from hw/syn/xilinx/scripts/kill_build.sh rename to hw/syn/xilinx/kill_build.sh diff --git a/hw/syn/xilinx/scripts/kill_hwserver.sh b/hw/syn/xilinx/kill_hwserver.sh similarity index 100% rename from hw/syn/xilinx/scripts/kill_hwserver.sh rename to hw/syn/xilinx/kill_hwserver.sh diff --git a/hw/syn/xilinx/scripts/kill_sim.sh b/hw/syn/xilinx/kill_sim.sh similarity index 100% rename from hw/syn/xilinx/scripts/kill_sim.sh rename to hw/syn/xilinx/kill_sim.sh diff --git a/hw/syn/xilinx/scripts/package_kernel.tcl b/hw/syn/xilinx/package_kernel.tcl similarity index 86% rename from hw/syn/xilinx/scripts/package_kernel.tcl rename to hw/syn/xilinx/package_kernel.tcl index ed8a683ac..2c314754d 100644 --- a/hw/syn/xilinx/scripts/package_kernel.tcl +++ b/hw/syn/xilinx/package_kernel.tcl @@ -71,65 +71,60 @@ set_property -verbose -name "top" -value ${krnl_name} -objects $obj if { $chipscope == 1 } { # hw debugging - create_ip -name axis_ila -vendor xilinx.com -library ip -version 1.1 -module_name ila_afu + create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_afu set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ CONFIG.C_EN_STRG_QUAL {1} \ - CONFIG.C_DATA_DEPTH {4096} \ + CONFIG.C_DATA_DEPTH {8192} \ CONFIG.C_NUM_OF_PROBES {2} \ CONFIG.C_PROBE0_WIDTH {8} \ - CONFIG.C_PROBE1_WIDTH {24} \ + CONFIG.C_PROBE1_WIDTH {64} \ + CONFIG.ALL_PROBE_SAME_MU {false} \ + CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_afu] generate_target {instantiation_template} [get_files ila_afu.xci] set_property generate_synth_checkpoint false [get_files ila_afu.xci] - create_ip -name axis_ila -vendor xilinx.com -library ip -version 1.1 -module_name ila_fetch + create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_fetch set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ CONFIG.C_EN_STRG_QUAL {1} \ - CONFIG.C_DATA_DEPTH {4096} \ + CONFIG.C_DATA_DEPTH {8192} \ CONFIG.C_NUM_OF_PROBES {3} \ CONFIG.C_PROBE0_WIDTH {128} \ CONFIG.C_PROBE1_WIDTH {128} \ CONFIG.C_PROBE2_WIDTH {128} \ + CONFIG.ALL_PROBE_SAME_MU {false} \ + CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_fetch] generate_target {instantiation_template} [get_files ila_fetch.xci] set_property generate_synth_checkpoint false [get_files ila_fetch.xci] - create_ip -name axis_ila -vendor xilinx.com -library ip -version 1.1 -module_name ila_issue + create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_issue set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ CONFIG.C_EN_STRG_QUAL {1} \ - CONFIG.C_DATA_DEPTH {4096} \ + CONFIG.C_DATA_DEPTH {8192} \ CONFIG.C_NUM_OF_PROBES {2} \ CONFIG.C_PROBE0_WIDTH {256} \ CONFIG.C_PROBE1_WIDTH {128} \ + CONFIG.ALL_PROBE_SAME_MU {false} \ + CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_issue] generate_target {instantiation_template} [get_files ila_issue.xci] set_property generate_synth_checkpoint false [get_files ila_issue.xci] - create_ip -name axis_ila -vendor xilinx.com -library ip -version 1.1 -module_name ila_lsu + create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_lsu set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ CONFIG.C_EN_STRG_QUAL {1} \ - CONFIG.C_DATA_DEPTH {4096} \ + CONFIG.C_DATA_DEPTH {8192} \ CONFIG.C_NUM_OF_PROBES {4} \ CONFIG.C_PROBE0_WIDTH {256} \ CONFIG.C_PROBE1_WIDTH {128} \ CONFIG.C_PROBE2_WIDTH {288} \ CONFIG.C_PROBE3_WIDTH {256} \ + CONFIG.ALL_PROBE_SAME_MU {false} \ + CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_lsu] generate_target {instantiation_template} [get_files ila_lsu.xci] set_property generate_synth_checkpoint false [get_files ila_lsu.xci] - - create_ip -name axis_ila -vendor xilinx.com -library ip -version 1.1 -module_name ila_msched - set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ - CONFIG.C_EN_STRG_QUAL {1} \ - CONFIG.C_DATA_DEPTH {4096} \ - CONFIG.C_NUM_OF_PROBES {4} \ - CONFIG.C_PROBE0_WIDTH {128} \ - CONFIG.C_PROBE1_WIDTH {128} \ - CONFIG.C_PROBE2_WIDTH {128} \ - CONFIG.C_PROBE3_WIDTH {128} \ - ] [get_ips ila_msched] - generate_target {instantiation_template} [get_files ila_msched.xci] - set_property generate_synth_checkpoint false [get_files ila_msched.xci] } update_compile_order -fileset sources_1 diff --git a/hw/syn/xilinx/sandbox/Makefile b/hw/syn/xilinx/sandbox/Makefile index d1ebf9afa..e4def9c4e 100644 --- a/hw/syn/xilinx/sandbox/Makefile +++ b/hw/syn/xilinx/sandbox/Makefile @@ -19,16 +19,16 @@ KERNEL ?= fibonacci NCPUS := $(shell lscpu | grep "^Core(s) per socket:" | awk '{print $$4}') JOBS ?= $(shell echo $$(( $(NCPUS) > $(MAX_JOBS) ? $(MAX_JOBS) : $(NCPUS) ))) -COE_FILE := $(shell realpath kernel.bin.coe) -ESCAPED_COE_FILE := $(shell echo "$(COE_FILE)" | sed -e 's/[\/&]/\\&/g') - # include paths FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif +TEX_INCLUDE = -I$(RTL_DIR)/tex +RASTER_INCLUDE = -I$(RTL_DIR)/raster +OM_INCLUDE = -I$(RTL_DIR)/om RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -RTL_INCLUDE += $(FPU_INCLUDE) +RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(OM_INCLUDE) RTL_INCLUDE += -I$(SRC_DIR) # compilation flags @@ -43,9 +43,6 @@ CFLAGS += -DSTACK_BASE_ADDR=32\'hFF000 all: build -project2.tcl: project.tcl - @sed -e "s/@COE_FILE@/$(ESCAPED_COE_FILE)/g" $< > $@ - $(KERNEL).bin: $(MAKE) -C $(ROOT_DIR)/kernel clean STACK_BASE_ADDR=0xFF000 $(MAKE) -C $(ROOT_DIR)/kernel @@ -61,13 +58,14 @@ project_1/sources.txt: mkdir -p project_1 $(SCRIPT_DIR)/gen_sources.sh $(CFLAGS) -P -Cproject_1/src -Oproject_1/sources.txt -build: project_1/project_1.xpr -project_1/project_1.xpr: project_1/sources.txt kernel.bin.coe project2.tcl - MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source project2.tcl -tclargs $(DEVICE) project_1/sources.txt $(SCRIPT_DIR) +build: done.dcp +done.dcp: project_1/sources.txt kernel.bin.coe project.tcl + MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source project.tcl -tclargs $(DEVICE) project_1/sources.txt $(SCRIPT_DIR) + echo done > done.dcp run: project_1/project_1.xpr $(VIVADO) project_1/project_1.xpr & clean: - rm -rf project_1 project2.tcl $(KERNEL).bin kernel.bin.coe - rm -rf .Xil *.log *.jou + rm -rf project_1 project1.tcl $(KERNEL).bin kernel.bin.coe + rm -rf .Xil *.log *.jou *.dcp *.rpt diff --git a/hw/syn/xilinx/sandbox/project.tcl.in b/hw/syn/xilinx/sandbox/project.tcl.in index d4fa45581..8926b43ad 100644 --- a/hw/syn/xilinx/sandbox/project.tcl.in +++ b/hw/syn/xilinx/sandbox/project.tcl.in @@ -11,9 +11,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Start time -set start_time [clock seconds] - if { $::argc != 3 } { puts "ERROR: Program \"$::argv0\" requires 3 arguments!\n" puts "Usage: $::argv0 \n" @@ -24,7 +21,7 @@ set device_part [lindex $::argv 0] set vcs_file [lindex $::argv 1] set tool_dir [lindex $::argv 2] -uuts "Using device_part=$device_part" +puts "Using device_part=$device_part" puts "Using vcs_file=$vcs_file" puts "Using tool_dir=$tool_dir" @@ -37,398 +34,436 @@ if {[info exists ::env(MAX_JOBS)]} { set num_jobs 0 } -set origin_dir [file normalize "."] +proc run_setup {} { + global device_part vcs_file tool_dir -# Use origin directory path location variable, if specified in the tcl shell -if { [info exists ::origin_dir_loc] } { - set origin_dir $::origin_dir_loc -} + # Set the project name + set project_name "project_1" -# Set the project name -set project_name "project_1" + # Use project name variable, if specified in the tcl shell + if { [info exists ::user_project_name] } { + set project_name $::user_project_name + } -# Use project name variable, if specified in the tcl shell -if { [info exists ::user_project_name] } { - set project_name $::user_project_name -} + source "${tool_dir}/parse_vcs_list.tcl" + set vlist [parse_vcs_list "${vcs_file}"] -source "${tool_dir}/parse_vcs_list.tcl" -set vlist [parse_vcs_list "${vcs_file}"] + set vsources_list [lindex $vlist 0] + set vincludes_list [lindex $vlist 1] + set vdefines_list [lindex $vlist 2] -set vsources_list [lindex $vlist 0] -set vincludes_list [lindex $vlist 1] -set vdefines_list [lindex $vlist 2] + #puts ${vsources_list} + #puts ${vincludes_list} + #puts ${vdefines_list} -#puts ${vsources_list} -#puts ${vincludes_list} -#puts ${vdefines_list} + # Create project + create_project $project_name $project_name -force -part $device_part -# Create project -create_project $project_name $project_name -force -part $device_part + # Set the directory path for the new project + set proj_dir [get_property directory [current_project]] -# Set the directory path for the new project -set proj_dir [get_property directory [current_project]] + # Create 'sources_1' fileset (if not found) + if {[string equal [get_filesets -quiet sources_1] ""]} { + create_fileset -srcset sources_1 + } -# Create 'sources_1' fileset (if not found) -if {[string equal [get_filesets -quiet sources_1] ""]} { - create_fileset -srcset sources_1 -} + # add source files + set obj [get_filesets sources_1] + add_files -norecurse -verbose -fileset $obj ${vsources_list} -# add source files -set obj [get_filesets sources_1] -add_files -norecurse -verbose -fileset $obj ${vsources_list} + # process defines + set obj [get_filesets sources_1] + foreach def $vdefines_list { + set_property -name "verilog_define" -value $def -objects $obj + } -# process defines -set obj [get_filesets sources_1] -foreach def $vdefines_list { - set_property -name "verilog_define" -value $def -objects $obj -} + # Set 'sources_1' fileset properties + set obj [get_filesets sources_1] + set_property -name "name" -value "sources_1" -objects $obj + set_property -name "top" -value "design_1_wrapper" -objects $obj -# Set 'sources_1' fileset properties -set obj [get_filesets sources_1] -set_property -name "name" -value "sources_1" -objects $obj -set_property -name "top" -value "design_1_wrapper" -objects $obj + # Create 'constrs_1' fileset (if not found) + if {[string equal [get_filesets -quiet constrs_1] ""]} { + create_fileset -constrset constrs_1 + } -# Create 'constrs_1' fileset (if not found) -if {[string equal [get_filesets -quiet constrs_1] ""]} { - create_fileset -constrset constrs_1 -} + # Set 'constrs_1' fileset object + set obj [get_filesets constrs_1] -# Set 'constrs_1' fileset object -set obj [get_filesets constrs_1] + # Empty (no sources present) -# Empty (no sources present) + # Set 'constrs_1' fileset properties + set obj [get_filesets constrs_1] + set_property -name "constrs_type" -value "XDC" -objects $obj + set_property -name "name" -value "constrs_1" -objects $obj + set_property -name "target_constrs_file" -value "" -objects $obj -# Set 'constrs_1' fileset properties -set obj [get_filesets constrs_1] -set_property -name "constrs_type" -value "XDC" -objects $obj -set_property -name "name" -value "constrs_1" -objects $obj -set_property -name "target_constrs_file" -value "" -objects $obj + # Create 'sim_1' fileset (if not found) + if {[string equal [get_filesets -quiet sim_1] ""]} { + create_fileset -simset sim_1 + } -# Create 'sim_1' fileset (if not found) -if {[string equal [get_filesets -quiet sim_1] ""]} { - create_fileset -simset sim_1 -} + set testbench_file "" + foreach file ${vsources_list} { + if {[string match "*testbench.v" $file]} { + set testbench_file [file normalize $file] + break + } + } -# Set 'sim_1' fileset object -set obj [get_filesets sim_1] -# Import local files from the original project -set files [list \ - [file normalize "testbench.v" ]\ -] -set imported_files [import_files -fileset sim_1 $files] + # Set 'sim_1' fileset object + set obj [get_filesets sim_1] + # Import local files from the original project + set files [list $testbench_file] + set imported_files [import_files -fileset sim_1 $files] -# Set 'sim_1' fileset file properties for remote files -# None + # Set 'sim_1' fileset file properties for remote files + # None -# Set 'sim_1' fileset file properties for local files + # Set 'sim_1' fileset file properties for local files set file "testbench.v" set file_obj [get_files -of_objects [get_filesets sim_1] [list "*$file"]] -set_property -name "file_type" -value "Verilog" -objects $file_obj -set_property -name "is_enabled" -value "1" -objects $file_obj -set_property -name "is_global_include" -value "0" -objects $file_obj -set_property -name "library" -value "xil_defaultlib" -objects $file_obj -set_property -name "path_mode" -value "RelativeFirst" -objects $file_obj -set_property -name "used_in" -value "synthesis implementation simulation" -objects $file_obj -set_property -name "used_in_implementation" -value "1" -objects $file_obj -set_property -name "used_in_simulation" -value "1" -objects $file_obj -set_property -name "used_in_synthesis" -value "1" -objects $file_obj + set_property -name "file_type" -value "Verilog" -objects $file_obj + set_property -name "is_enabled" -value "1" -objects $file_obj + set_property -name "is_global_include" -value "0" -objects $file_obj + set_property -name "library" -value "xil_defaultlib" -objects $file_obj + set_property -name "path_mode" -value "RelativeFirst" -objects $file_obj + set_property -name "used_in" -value "synthesis implementation simulation" -objects $file_obj + set_property -name "used_in_implementation" -value "1" -objects $file_obj + set_property -name "used_in_simulation" -value "1" -objects $file_obj + set_property -name "used_in_synthesis" -value "1" -objects $file_obj -# Set 'sim_1' fileset properties -set obj [get_filesets sim_1] -set_property -name "32bit" -value "0" -objects $obj -set_property -name "force_compile_glbl" -value "0" -objects $obj -set_property -name "generate_scripts_only" -value "0" -objects $obj -set_property -name "generic" -value "" -objects $obj -set_property -name "hbs.configure_design_for_hier_access" -value "1" -objects $obj -set_property -name "include_dirs" -value "" -objects $obj -set_property -name "incremental" -value "1" -objects $obj -set_property -name "name" -value "sim_1" -objects $obj -set_property -name "source_set" -value "sources_1" -objects $obj -set_property -name "systemc_include_dirs" -value "" -objects $obj -set_property -name "top" -value "testbench" -objects $obj -set_property -name "top_auto_set" -value "0" -objects $obj -set_property -name "top_lib" -value "xil_defaultlib" -objects $obj -set_property -name "verilog_define" -value "" -objects $obj -set_property -name "verilog_uppercase" -value "0" -objects $obj + # Set 'sim_1' fileset properties + set obj [get_filesets sim_1] + set_property -name "32bit" -value "0" -objects $obj + set_property -name "force_compile_glbl" -value "0" -objects $obj + set_property -name "generate_scripts_only" -value "0" -objects $obj + set_property -name "generic" -value "" -objects $obj + set_property -name "hbs.configure_design_for_hier_access" -value "1" -objects $obj + set_property -name "include_dirs" -value "" -objects $obj + set_property -name "incremental" -value "1" -objects $obj + set_property -name "name" -value "sim_1" -objects $obj + set_property -name "source_set" -value "sources_1" -objects $obj + set_property -name "systemc_include_dirs" -value "" -objects $obj + set_property -name "top" -value "testbench" -objects $obj + set_property -name "top_auto_set" -value "0" -objects $obj + set_property -name "top_lib" -value "xil_defaultlib" -objects $obj + set_property -name "verilog_define" -value "" -objects $obj + set_property -name "verilog_uppercase" -value "0" -objects $obj -# Set 'utils_1' fileset object -set obj [get_filesets utils_1] -# Empty (no sources present) + # Set 'utils_1' fileset object + set obj [get_filesets utils_1] + # Empty (no sources present) -# Set 'utils_1' fileset properties -set obj [get_filesets utils_1] -set_property -name "name" -value "utils_1" -objects $obj + # Set 'utils_1' fileset properties + set obj [get_filesets utils_1] + set_property -name "name" -value "utils_1" -objects $obj -# Proc to create BD design_1 -proc cr_bd_design_1 { parentCell } { -# The design that will be created by this Tcl proc contains the following -# module references: -# Vortex_top + # Proc to create BD design_1 + proc cr_bd_design_1 { parentCell } { + # The design that will be created by this Tcl proc contains the following + # module references: + # Vortex_top -# CHANGE DESIGN NAME HERE -set design_name design_1 + # CHANGE DESIGN NAME HERE + set design_name design_1 -common::send_gid_msg -ssname BD::TCL -id 2010 -severity "INFO" "Currently there is no design <$design_name> in project, so creating one..." + common::send_gid_msg -ssname BD::TCL -id 2010 -severity "INFO" "Currently there is no design <$design_name> in project, so creating one..." -create_bd_design $design_name + create_bd_design $design_name -set bCheckIPsPassed 1 -################################################################## -# CHECK IPs -################################################################## -set bCheckIPs 1 -if { $bCheckIPs == 1 } { - set list_check_ips "\ - xilinx.com:ip:axi_bram_ctrl:4.1\ - xilinx.com:ip:blk_mem_gen:8.4\ - " + set bCheckIPsPassed 1 + ################################################################## + # CHECK IPs + ################################################################## + set bCheckIPs 1 + if { $bCheckIPs == 1 } { + set list_check_ips "\ + xilinx.com:ip:axi_bram_ctrl:4.1\ + xilinx.com:ip:blk_mem_gen:8.4\ + " - set list_ips_missing "" - common::send_gid_msg -ssname BD::TCL -id 2011 -severity "INFO" "Checking if the following IPs exist in the project's IP catalog: $list_check_ips ." + set list_ips_missing "" + common::send_gid_msg -ssname BD::TCL -id 2011 -severity "INFO" "Checking if the following IPs exist in the project's IP catalog: $list_check_ips ." - foreach ip_vlnv $list_check_ips { - set ip_obj [get_ipdefs -all $ip_vlnv] - if { $ip_obj eq "" } { - lappend list_ips_missing $ip_vlnv - } - } + foreach ip_vlnv $list_check_ips { + set ip_obj [get_ipdefs -all $ip_vlnv] + if { $ip_obj eq "" } { + lappend list_ips_missing $ip_vlnv + } + } - if { $list_ips_missing ne "" } { - catch {common::send_gid_msg -ssname BD::TCL -id 2012 -severity "ERROR" "The following IPs are not found in the IP Catalog:\n $list_ips_missing\n\nResolution: Please add the repository containing the IP(s) to the project." } - set bCheckIPsPassed 0 - } + if { $list_ips_missing ne "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2012 -severity "ERROR" "The following IPs are not found in the IP Catalog:\n $list_ips_missing\n\nResolution: Please add the repository containing the IP(s) to the project." } + set bCheckIPsPassed 0 + } + } + + ################################################################## + # CHECK Modules + ################################################################## + set bCheckModules 1 + if { $bCheckModules == 1 } { + set list_check_mods "\ + Vortex_top\ + " + + set list_mods_missing "" + common::send_gid_msg -ssname BD::TCL -id 2020 -severity "INFO" "Checking if the following modules exist in the project's sources: $list_check_mods ." + + foreach mod_vlnv $list_check_mods { + if { [can_resolve_reference $mod_vlnv] == 0 } { + lappend list_mods_missing $mod_vlnv + } + } + + if { $list_mods_missing ne "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2021 -severity "ERROR" "The following module(s) are not found in the project: $list_mods_missing" } + common::send_gid_msg -ssname BD::TCL -id 2022 -severity "INFO" "Please add source files for the missing module(s) above." + set bCheckIPsPassed 0 + } } - ################################################################## - # CHECK Modules - ################################################################## - set bCheckModules 1 - if { $bCheckModules == 1 } { - set list_check_mods "\ - Vortex_top\ - " - - set list_mods_missing "" - common::send_gid_msg -ssname BD::TCL -id 2020 -severity "INFO" "Checking if the following modules exist in the project's sources: $list_check_mods ." - - foreach mod_vlnv $list_check_mods { - if { [can_resolve_reference $mod_vlnv] == 0 } { - lappend list_mods_missing $mod_vlnv - } - } - - if { $list_mods_missing ne "" } { - catch {common::send_gid_msg -ssname BD::TCL -id 2021 -severity "ERROR" "The following module(s) are not found in the project: $list_mods_missing" } - common::send_gid_msg -ssname BD::TCL -id 2022 -severity "INFO" "Please add source files for the missing module(s) above." - set bCheckIPsPassed 0 - } -} - -if { $bCheckIPsPassed != 1 } { - common::send_gid_msg -ssname BD::TCL -id 2023 -severity "WARNING" "Will not continue with creation of design due to the error(s) above." - return 3 -} - -variable script_folder - -if { $parentCell eq "" } { - set parentCell [get_bd_cells /] -} - -# Get object for parentCell -set parentObj [get_bd_cells $parentCell] -if { $parentObj == "" } { - catch {common::send_gid_msg -ssname BD::TCL -id 2090 -severity "ERROR" "Unable to find parent cell <$parentCell>!"} - return -} - -# Make sure parentObj is hier blk -set parentType [get_property TYPE $parentObj] -if { $parentType ne "hier" } { - catch {common::send_gid_msg -ssname BD::TCL -id 2091 -severity "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} - return -} - -# Save current instance; Restore later -set oldCurInst [current_bd_instance .] - -# Set parent object as current -current_bd_instance $parentObj - - -# Create interface ports - -# Create ports -set clk_100MHz [ create_bd_port -dir I -type clk -freq_hz 100000000 clk_100MHz ] -set resetn [ create_bd_port -dir I -type rst resetn ] -set_property -dict [ list \ - CONFIG.POLARITY {ACTIVE_LOW} \ -] $resetn -set vx_busy [ create_bd_port -dir O vx_busy ] -set vx_reset [ create_bd_port -dir I -type rst vx_reset ] -set_property -dict [ list \ - CONFIG.POLARITY {ACTIVE_HIGH} \ -] $vx_reset - -set dcr_wr_valid [ create_bd_port -dir I dcr_wr_valid ] -set dcr_wr_addr [ create_bd_port -dir I -from 11 -to 0 dcr_wr_addr ] -set dcr_wr_data [ create_bd_port -dir I -from 31 -to 0 dcr_wr_data ] - -# Create instance: Vortex_top_0, and set properties -set block_name Vortex_top -set block_cell_name Vortex_top_0 -if { [catch {set Vortex_top_0 [create_bd_cell -type module -reference $block_name $block_cell_name] } errmsg] } { - catch {common::send_gid_msg -ssname BD::TCL -id 2095 -severity "ERROR" "Unable to add referenced block <$block_name>. Please add the files for ${block_name}'s definition into the project."} - return 1 - } elseif { $Vortex_top_0 eq "" } { - catch {common::send_gid_msg -ssname BD::TCL -id 2096 -severity "ERROR" "Unable to referenced block <$block_name>. Please add the files for ${block_name}'s definition into the project."} - return 1 + if { $bCheckIPsPassed != 1 } { + common::send_gid_msg -ssname BD::TCL -id 2023 -severity "WARNING" "Will not continue with creation of design due to the error(s) above." + return 3 } -# Create instance: axi_bram_ctrl_0, and set properties -set axi_bram_ctrl_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_bram_ctrl:4.1 axi_bram_ctrl_0 ] -set_property -dict [ list \ - CONFIG.DATA_WIDTH {512} \ - CONFIG.ECC_TYPE {0} \ -] $axi_bram_ctrl_0 + variable script_folder -# Create instance: axi_bram_ctrl_0_bram, and set properties -set axi_bram_ctrl_0_bram [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.4 axi_bram_ctrl_0_bram ] + if { $parentCell eq "" } { + set parentCell [get_bd_cells /] + } -set_property -dict [ list \ - CONFIG.Assume_Synchronous_Clk {true} \ - CONFIG.Byte_Size {8} \ - CONFIG.Load_Init_File {true} \ - CONFIG.Coe_File {@COE_FILE@} \ - CONFIG.EN_SAFETY_CKT {true} \ - CONFIG.Enable_32bit_Address {true} \ - CONFIG.Fill_Remaining_Memory_Locations {false} \ - CONFIG.Memory_Type {Simple_Dual_Port_RAM} \ - CONFIG.Operating_Mode_A {NO_CHANGE} \ - CONFIG.Operating_Mode_B {READ_FIRST} \ - CONFIG.Port_B_Write_Rate {0} \ - CONFIG.Read_Width_A {512} \ - CONFIG.Read_Width_B {512} \ - CONFIG.Register_PortA_Output_of_Memory_Primitives {false} \ - CONFIG.Register_PortB_Output_of_Memory_Primitives {false} \ - CONFIG.Remaining_Memory_Locations {0} \ - CONFIG.Use_Byte_Write_Enable {true} \ - CONFIG.Use_RSTA_Pin {false} \ - CONFIG.Use_RSTB_Pin {true} \ - CONFIG.Write_Width_A {512} \ - CONFIG.Write_Depth_A {16384} \ - CONFIG.use_bram_block {Stand_Alone} \ -] $axi_bram_ctrl_0_bram + # Get object for parentCell + set parentObj [get_bd_cells $parentCell] + if { $parentObj == "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2090 -severity "ERROR" "Unable to find parent cell <$parentCell>!"} + return + } -# Create interface connections -connect_bd_intf_net -intf_net Vortex_top_0_m_axi_mem [get_bd_intf_pins Vortex_top_0/m_axi_mem] [get_bd_intf_pins axi_bram_ctrl_0/S_AXI] -connect_bd_intf_net -intf_net axi_bram_ctrl_0_BRAM_PORTA [get_bd_intf_pins axi_bram_ctrl_0/BRAM_PORTA] [get_bd_intf_pins axi_bram_ctrl_0_bram/BRAM_PORTA] -connect_bd_intf_net -intf_net axi_bram_ctrl_0_BRAM_PORTB [get_bd_intf_pins axi_bram_ctrl_0/BRAM_PORTB] [get_bd_intf_pins axi_bram_ctrl_0_bram/BRAM_PORTB] + # Make sure parentObj is hier blk + set parentType [get_property TYPE $parentObj] + if { $parentType ne "hier" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2091 -severity "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return + } -# Create port connections -connect_bd_net -net Vortex_top_0_busy [get_bd_ports vx_busy] [get_bd_pins Vortex_top_0/busy] -connect_bd_net -net clk_wiz_clk_out1 [get_bd_ports clk_100MHz] [get_bd_pins Vortex_top_0/clk] [get_bd_pins axi_bram_ctrl_0/s_axi_aclk] -connect_bd_net -net resetn_1 [get_bd_ports resetn] [get_bd_pins axi_bram_ctrl_0/s_axi_aresetn] -connect_bd_net -net vx_reset_1 [get_bd_ports vx_reset] [get_bd_pins Vortex_top_0/reset] -connect_bd_net -net dcr_wr_valid_1 [get_bd_ports dcr_wr_valid] [get_bd_pins Vortex_top_0/dcr_wr_valid] -connect_bd_net -net dcr_wr_addr_1 [get_bd_ports dcr_wr_addr] [get_bd_pins Vortex_top_0/dcr_wr_addr] -connect_bd_net -net dcr_wr_data_1 [get_bd_ports dcr_wr_data] [get_bd_pins Vortex_top_0/dcr_wr_data] + # Save current instance; Restore later + set oldCurInst [current_bd_instance .] -# Create address segments -assign_bd_address -offset 0x00000000 -range 0x00100000 -target_address_space [get_bd_addr_spaces Vortex_top_0/m_axi_mem] [get_bd_addr_segs axi_bram_ctrl_0/S_AXI/Mem0] -force + # Set parent object as current + current_bd_instance $parentObj -# Perform GUI Layout -regenerate_bd_layout -layout_string { - "ActiveEmotionalView":"Default View", - "Default View_ScaleFactor":"1.0", - "Default View_TopLeft":"-195,-165", - "ExpandedHierarchyInLayout":"", - "guistr":"# # String gsaved with Nlview 7.0r4 2019-12-20 bk=1.5203 VDI=41 GEI=36 GUI=JA:10.0 TLS -# -string -flagsOSRD -preplace port clk_100MHz -pg 1 -lvl 0 -x 0 -y 40 -defaultsOSRD -preplace port resetn -pg 1 -lvl 0 -x 0 -y 20 -defaultsOSRD -preplace port vx_busy -pg 1 -lvl 4 -x 950 -y 220 -defaultsOSRD -preplace port vx_reset -pg 1 -lvl 0 -x 0 -y 110 -defaultsOSRD -preplace port dcr_wr_valid -pg 1 -lvl 0 -x 0 -y 130 -defaultsOSRD -preplace portBus dcr_wr_addr -pg 1 -lvl 0 -x 0 -y 150 -defaultsOSRD -preplace portBus dcr_wr_data -pg 1 -lvl 0 -x 0 -y 170 -defaultsOSRD -preplace inst Vortex_top_0 -pg 1 -lvl 1 -x 190 -y 130 -defaultsOSRD -preplace inst axi_bram_ctrl_0 -pg 1 -lvl 2 -x 520 -y 140 -defaultsOSRD -preplace inst axi_bram_ctrl_0_bram -pg 1 -lvl 3 -x 800 -y 140 -defaultsOSRD -preplace netloc Vortex_top_0_busy 1 1 3 360J 220 NJ 220 NJ -preplace netloc clk_wiz_clk_out1 1 0 2 20 30 370 -preplace netloc resetn_1 1 0 2 NJ 20 380J -preplace netloc vx_reset_1 1 0 1 NJ 110 -preplace netloc dcr_wr_valid_1 1 0 1 NJ 130 -preplace netloc dcr_wr_addr_1 1 0 1 NJ 150 -preplace netloc dcr_wr_data_1 1 0 1 NJ 170 -preplace netloc axi_bram_ctrl_0_BRAM_PORTB 1 2 1 N 150 -preplace netloc axi_bram_ctrl_0_BRAM_PORTA 1 2 1 N 130 -preplace netloc Vortex_top_0_m_axi_mem 1 1 1 N 120 -levelinfo -pg 1 0 190 520 800 950 -pagesize -pg 1 -db -bbox -sgen -180 0 1060 240 -" + + # Create interface ports + + # Create ports + set clk_100MHz [ create_bd_port -dir I -type clk -freq_hz 100000000 clk_100MHz ] + set resetn [ create_bd_port -dir I -type rst resetn ] + set_property -dict [ list \ + CONFIG.POLARITY {ACTIVE_LOW} \ + ] $resetn + set vx_busy [ create_bd_port -dir O vx_busy ] + set vx_reset [ create_bd_port -dir I -type rst vx_reset ] + set_property -dict [ list \ + CONFIG.POLARITY {ACTIVE_HIGH} \ + ] $vx_reset + + set dcr_wr_valid [ create_bd_port -dir I dcr_wr_valid ] + set dcr_wr_addr [ create_bd_port -dir I -from 11 -to 0 dcr_wr_addr ] + set dcr_wr_data [ create_bd_port -dir I -from 31 -to 0 dcr_wr_data ] + + # Create instance: Vortex_top_0, and set properties + set block_name Vortex_top + set block_cell_name Vortex_top_0 + if { [catch {set Vortex_top_0 [create_bd_cell -type module -reference $block_name $block_cell_name] } errmsg] } { + catch {common::send_gid_msg -ssname BD::TCL -id 2095 -severity "ERROR" "Unable to add referenced block <$block_name>. Please add the files for ${block_name}'s definition into the project."} + return 1 + } elseif { $Vortex_top_0 eq "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2096 -severity "ERROR" "Unable to referenced block <$block_name>. Please add the files for ${block_name}'s definition into the project."} + return 1 + } + + # Create instance: axi_bram_ctrl_0, and set properties + set axi_bram_ctrl_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_bram_ctrl:4.1 axi_bram_ctrl_0 ] + set_property -dict [ list \ + CONFIG.DATA_WIDTH {512} \ + CONFIG.ECC_TYPE {0} \ + ] $axi_bram_ctrl_0 + + # Create instance: axi_bram_ctrl_0_bram, and set properties + set axi_bram_ctrl_0_bram [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.4 axi_bram_ctrl_0_bram ] + + set_property -dict [ list \ + CONFIG.Assume_Synchronous_Clk {true} \ + CONFIG.Byte_Size {8} \ + CONFIG.Load_Init_File {true} \ + CONFIG.Coe_File {@CURRENTDIR@/hw/syn/xilinx/sandbox/kernel.bin.coe} \ + CONFIG.EN_SAFETY_CKT {true} \ + CONFIG.Enable_32bit_Address {true} \ + CONFIG.Fill_Remaining_Memory_Locations {false} \ + CONFIG.Memory_Type {Simple_Dual_Port_RAM} \ + CONFIG.Operating_Mode_A {NO_CHANGE} \ + CONFIG.Operating_Mode_B {READ_FIRST} \ + CONFIG.Port_B_Write_Rate {0} \ + CONFIG.Read_Width_A {512} \ + CONFIG.Read_Width_B {512} \ + CONFIG.Register_PortA_Output_of_Memory_Primitives {false} \ + CONFIG.Register_PortB_Output_of_Memory_Primitives {false} \ + CONFIG.Remaining_Memory_Locations {0} \ + CONFIG.Use_Byte_Write_Enable {true} \ + CONFIG.Use_RSTA_Pin {false} \ + CONFIG.Use_RSTB_Pin {true} \ + CONFIG.Write_Width_A {512} \ + CONFIG.Write_Depth_A {16384} \ + CONFIG.use_bram_block {Stand_Alone} \ + ] $axi_bram_ctrl_0_bram + + # Create interface connections + connect_bd_intf_net -intf_net Vortex_top_0_m_axi_mem [get_bd_intf_pins Vortex_top_0/m_axi_mem] [get_bd_intf_pins axi_bram_ctrl_0/S_AXI] + connect_bd_intf_net -intf_net axi_bram_ctrl_0_BRAM_PORTA [get_bd_intf_pins axi_bram_ctrl_0/BRAM_PORTA] [get_bd_intf_pins axi_bram_ctrl_0_bram/BRAM_PORTA] + connect_bd_intf_net -intf_net axi_bram_ctrl_0_BRAM_PORTB [get_bd_intf_pins axi_bram_ctrl_0/BRAM_PORTB] [get_bd_intf_pins axi_bram_ctrl_0_bram/BRAM_PORTB] + + # Create port connections + connect_bd_net -net Vortex_top_0_busy [get_bd_ports vx_busy] [get_bd_pins Vortex_top_0/busy] + connect_bd_net -net clk_wiz_clk_out1 [get_bd_ports clk_100MHz] [get_bd_pins Vortex_top_0/clk] [get_bd_pins axi_bram_ctrl_0/s_axi_aclk] + connect_bd_net -net resetn_1 [get_bd_ports resetn] [get_bd_pins axi_bram_ctrl_0/s_axi_aresetn] + connect_bd_net -net vx_reset_1 [get_bd_ports vx_reset] [get_bd_pins Vortex_top_0/reset] + connect_bd_net -net dcr_wr_valid_1 [get_bd_ports dcr_wr_valid] [get_bd_pins Vortex_top_0/dcr_wr_valid] + connect_bd_net -net dcr_wr_addr_1 [get_bd_ports dcr_wr_addr] [get_bd_pins Vortex_top_0/dcr_wr_addr] + connect_bd_net -net dcr_wr_data_1 [get_bd_ports dcr_wr_data] [get_bd_pins Vortex_top_0/dcr_wr_data] + + # Create address segments + assign_bd_address -offset 0x00000000 -range 0x00100000 -target_address_space [get_bd_addr_spaces Vortex_top_0/m_axi_mem] [get_bd_addr_segs axi_bram_ctrl_0/S_AXI/Mem0] -force + + # Perform GUI Layout + regenerate_bd_layout -layout_string { + "ActiveEmotionalView":"Default View", + "Default View_ScaleFactor":"1.0", + "Default View_TopLeft":"-195,-165", + "ExpandedHierarchyInLayout":"", + "guistr":"# # String gsaved with Nlview 7.0r4 2019-12-20 bk=1.5203 VDI=41 GEI=36 GUI=JA:10.0 TLS + # -string -flagsOSRD + preplace port clk_100MHz -pg 1 -lvl 0 -x 0 -y 40 -defaultsOSRD + preplace port resetn -pg 1 -lvl 0 -x 0 -y 20 -defaultsOSRD + preplace port vx_busy -pg 1 -lvl 4 -x 950 -y 220 -defaultsOSRD + preplace port vx_reset -pg 1 -lvl 0 -x 0 -y 110 -defaultsOSRD + preplace port dcr_wr_valid -pg 1 -lvl 0 -x 0 -y 130 -defaultsOSRD + preplace portBus dcr_wr_addr -pg 1 -lvl 0 -x 0 -y 150 -defaultsOSRD + preplace portBus dcr_wr_data -pg 1 -lvl 0 -x 0 -y 170 -defaultsOSRD + preplace inst Vortex_top_0 -pg 1 -lvl 1 -x 190 -y 130 -defaultsOSRD + preplace inst axi_bram_ctrl_0 -pg 1 -lvl 2 -x 520 -y 140 -defaultsOSRD + preplace inst axi_bram_ctrl_0_bram -pg 1 -lvl 3 -x 800 -y 140 -defaultsOSRD + preplace netloc Vortex_top_0_busy 1 1 3 360J 220 NJ 220 NJ + preplace netloc clk_wiz_clk_out1 1 0 2 20 30 370 + preplace netloc resetn_1 1 0 2 NJ 20 380J + preplace netloc vx_reset_1 1 0 1 NJ 110 + preplace netloc dcr_wr_valid_1 1 0 1 NJ 130 + preplace netloc dcr_wr_addr_1 1 0 1 NJ 150 + preplace netloc dcr_wr_data_1 1 0 1 NJ 170 + preplace netloc axi_bram_ctrl_0_BRAM_PORTB 1 2 1 N 150 + preplace netloc axi_bram_ctrl_0_BRAM_PORTA 1 2 1 N 130 + preplace netloc Vortex_top_0_m_axi_mem 1 1 1 N 120 + levelinfo -pg 1 0 190 520 800 950 + pagesize -pg 1 -db -bbox -sgen -180 0 1060 240 + " + } + + # Restore current instance + current_bd_instance $oldCurInst + + validate_bd_design + save_bd_design + close_bd_design $design_name + } + # End of cr_bd_design_1() + cr_bd_design_1 "" + set_property EXCLUDE_DEBUG_LOGIC "0" [get_files design_1.bd ] + set_property GENERATE_SYNTH_CHECKPOINT "1" [get_files design_1.bd ] + set_property IS_ENABLED "1" [get_files design_1.bd ] + set_property IS_GLOBAL_INCLUDE "0" [get_files design_1.bd ] + #set_property IS_LOCKED "0" [get_files design_1.bd ] + set_property LIBRARY "xil_defaultlib" [get_files design_1.bd ] + set_property PATH_MODE "RelativeFirst" [get_files design_1.bd ] + set_property PFM_NAME "" [get_files design_1.bd ] + set_property REGISTERED_WITH_MANAGER "1" [get_files design_1.bd ] + set_property SYNTH_CHECKPOINT_MODE "Hierarchical" [get_files design_1.bd ] + set_property USED_IN "synthesis implementation simulation" [get_files design_1.bd ] + set_property USED_IN_IMPLEMENTATION "1" [get_files design_1.bd ] + set_property USED_IN_SIMULATION "1" [get_files design_1.bd ] + set_property USED_IN_SYNTHESIS "1" [get_files design_1.bd ] + + # Call make_wrapper to create wrapper files + set wrapper_path [make_wrapper -fileset sources_1 -files [ get_files -norecurse design_1.bd] -top] + add_files -norecurse -fileset sources_1 $wrapper_path + + update_compile_order -fileset sources_1 } - # Restore current instance - current_bd_instance $oldCurInst +proc run_synthesis {} { + global num_jobs + # Synthesis + if {$num_jobs != 0} { + launch_runs synth_1 -jobs $num_jobs + } else { + launch_runs synth_1 + } + wait_on_run synth_1 + open_run synth_1 + report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages - validate_bd_design - save_bd_design - close_bd_design $design_name + write_checkpoint -force post_synth.dcp } -# End of cr_bd_design_1() -cr_bd_design_1 "" -set_property EXCLUDE_DEBUG_LOGIC "0" [get_files design_1.bd ] -set_property GENERATE_SYNTH_CHECKPOINT "1" [get_files design_1.bd ] -set_property IS_ENABLED "1" [get_files design_1.bd ] -set_property IS_GLOBAL_INCLUDE "0" [get_files design_1.bd ] -#set_property IS_LOCKED "0" [get_files design_1.bd ] -set_property LIBRARY "xil_defaultlib" [get_files design_1.bd ] -set_property PATH_MODE "RelativeFirst" [get_files design_1.bd ] -set_property PFM_NAME "" [get_files design_1.bd ] -set_property REGISTERED_WITH_MANAGER "1" [get_files design_1.bd ] -set_property SYNTH_CHECKPOINT_MODE "Hierarchical" [get_files design_1.bd ] -set_property USED_IN "synthesis implementation simulation" [get_files design_1.bd ] -set_property USED_IN_IMPLEMENTATION "1" [get_files design_1.bd ] -set_property USED_IN_SIMULATION "1" [get_files design_1.bd ] -set_property USED_IN_SYNTHESIS "1" [get_files design_1.bd ] -# Call make_wrapper to create wrapper files -set wrapper_path [make_wrapper -fileset sources_1 -files [ get_files -norecurse design_1.bd] -top] -add_files -norecurse -fileset sources_1 $wrapper_path +proc run_implementation {} { + global tool_dir num_jobs + source "${tool_dir}/ila_insert.tcl" + insert_ila 8192 -update_compile_order -fileset sources_1 + # Implementation + if {$num_jobs != 0} { + launch_runs impl_1 -jobs $num_jobs + } else { + launch_runs impl_1 + } + wait_on_run impl_1 + open_run impl_1 + report_place_status -file place.rpt + report_route_status -file route.rpt + write_checkpoint -force post_impl.dcp +} -# Synthesis -if {$num_jobs != 0} { - launch_runs synth_1 -jobs $num_jobs +proc run_report {} { + # Generate reports + report_timing_summary -file timing.rpt + report_power -file power.rpt + report_drc -file drc.rpt +} + +############################################################################### + +# Start time +set start_time [clock seconds] + +# Check if the post-implementation checkpoint exists +if { [file exists post_impl.dcp] } { + puts "Resuming from post-implementation checkpoint: post_impl.dcp" + open_checkpoint post_impl.dcp + run_report +} elseif { [file exists post_synth.dcp] } { + puts "Resuming from post-synthesis checkpoint: post_synth.dcp" + open_checkpoint post_synth.dcp + run_implementation + run_report } else { - launch_runs synth_1 + # execute full pipeline + run_setup + run_synthesis + run_implementation + run_report } -wait_on_run synth_1 -open_run synth_1 -write_checkpoint -force post_synth.dcp -report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages - -# Implementation -if {$num_jobs != 0} { - launch_runs impl_1 -jobs $num_jobs -} else { - launch_runs impl_1 -} -wait_on_run impl_1 -open_run impl_1 -write_checkpoint -force post_impl.dcp - -# Generate reports -report_place_status -file place.rpt -report_route_status -file route.rpt -report_timing_summary -file timing.rpt -report_power -file power.rpt -report_drc -file drc.rpt # End time and calculation set elapsed_time [expr {[clock seconds] - $start_time}] diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index fe9a56dc8..44b04c1a2 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -122,7 +122,8 @@ ifdef DEBUG VPP_FLAGS += --vivado.prop fileset.sim_1.xsim.elaborate.debug_level=all CFLAGS += -DDEBUG_LEVEL=$(DEBUG) $(DBG_TRACE_FLAGS) else - CFLAGS += -DNDEBUG + VPP_FLAGS += --debug.chipscope vortex_afu_1 + CFLAGS += -DNDEBUG -DCHIPSCOPE $(DBG_SCOPE_FLAGS) endif else VPP_FLAGS += --optimize 3 @@ -167,7 +168,7 @@ $(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml gen-xo: $(XO_CONTAINER) $(XO_CONTAINER): $(BUILD_DIR)/sources.txt - mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/../scripts/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) + mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) gen-bin: $(XCLBIN_CONTAINER) $(XCLBIN_CONTAINER): $(XO_CONTAINER) $(SCOPE_JSON) diff --git a/sim/simx/cache_sim.cpp b/sim/simx/cache_sim.cpp index 4f357f195..71b2f4699 100644 --- a/sim/simx/cache_sim.cpp +++ b/sim/simx/cache_sim.cpp @@ -514,6 +514,7 @@ public: bank_req.type = bank_req_t::Core; bank_req.write = core_req.write; pipeline_req = bank_req; + DT(3, simobject_->name() << " core-req: " << core_req); } if (core_req.write) From 8135f72cc9280a86ee8b4542a2fde589acc6c0b9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 17 Sep 2024 06:45:22 -0700 Subject: [PATCH 268/488] configure update --- configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure b/configure index bbeda59c9..d2483a796 100755 --- a/configure +++ b/configure @@ -166,7 +166,7 @@ if [ "$OSVERSION" == "unsupported" ]; then fi # project subdirectories to build -SUBDIRS=("." "!ci" "!perf" "hw*" "!hw/syn*" "kernel*" "runtime*" "sim*" "tests*") +SUBDIRS=("." "!ci" "!perf" "hw*" "kernel*" "runtime*" "sim*" "tests*") # Get the directory of the script SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" From f2c1ad783126898093190b83401f85cff23db9d4 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 17 Sep 2024 09:56:54 -0700 Subject: [PATCH 269/488] minor update --- hw/syn/xilinx/{ => xrt}/gen_xo.tcl | 0 hw/syn/xilinx/{ => xrt}/package_kernel.tcl | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename hw/syn/xilinx/{ => xrt}/gen_xo.tcl (100%) rename hw/syn/xilinx/{ => xrt}/package_kernel.tcl (100%) diff --git a/hw/syn/xilinx/gen_xo.tcl b/hw/syn/xilinx/xrt/gen_xo.tcl similarity index 100% rename from hw/syn/xilinx/gen_xo.tcl rename to hw/syn/xilinx/xrt/gen_xo.tcl diff --git a/hw/syn/xilinx/package_kernel.tcl b/hw/syn/xilinx/xrt/package_kernel.tcl similarity index 100% rename from hw/syn/xilinx/package_kernel.tcl rename to hw/syn/xilinx/xrt/package_kernel.tcl From 8908f3e006d260c4a51f5692315b4a34d4a236c2 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 17 Sep 2024 10:05:17 -0700 Subject: [PATCH 270/488] minor update --- hw/syn/xilinx/xrt/gen_xo.tcl | 2 +- runtime/xrt/Makefile | 5 +++++ runtime/xrt/vortex.cpp | 12 +++++------- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/hw/syn/xilinx/xrt/gen_xo.tcl b/hw/syn/xilinx/xrt/gen_xo.tcl index 7d3342a4c..bad41f5cd 100644 --- a/hw/syn/xilinx/xrt/gen_xo.tcl +++ b/hw/syn/xilinx/xrt/gen_xo.tcl @@ -31,7 +31,7 @@ if {[file exists "${xoname}"]} { set argv [list ${build_dir}/ip] set argc 1 -source ${script_path}/gen_ip.tcl +source ${script_path}/xilinx_ip_gen.tcl set argv [list ${krnl_name} ${vcs_file} ${tool_dir} ${build_dir}] set argc 4 diff --git a/runtime/xrt/Makefile b/runtime/xrt/Makefile index 66d3e481b..d4fbc51a8 100644 --- a/runtime/xrt/Makefile +++ b/runtime/xrt/Makefile @@ -39,6 +39,11 @@ ifdef SCOPE SRCS += $(COMMON_DIR)/scope.cpp endif +# Enable ILA logic analyzer +ifdef CHIPSCOPE + CXXFLAGS += -DCHIPSCOPE +endif + all: $(DESTDIR)/$(PROJECT) driver: $(DESTDIR)/libxrtsim.so diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index b4e6090e1..a02a84990 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -125,13 +125,6 @@ static int get_platform_info(const std::string &device_name, return -1; } -/* -static void wait_for_enter(const std::string &msg) { - std::cout << msg << std::endl; - std::cin.ignore(std::numeric_limits::max(), '\n'); -} -*/ - /////////////////////////////////////////////////////////////////////////////// class vx_device { @@ -390,6 +383,11 @@ public: } #endif + #ifdef CHIPSCOPE + std::cout << "\nPress ENTER to continue after setting up ILA trigger..." << std::endl; + std::cin.ignore(std::numeric_limits::max(), '\n'); + #endif + return 0; } From 992f8d97d3fe821caa083da6f0b8d7fe07fce131 Mon Sep 17 00:00:00 2001 From: sij814 Date: Tue, 17 Sep 2024 19:47:13 -0700 Subject: [PATCH 271/488] sliced the bypass requests --- hw/rtl/Vortex_hbm.sv | 6 +- hw/rtl/cache/VX_cache_bypass.sv | 2 + hw/rtl/cache/VX_cache_bypass_l3.sv | 355 ++++++++++++++++ hw/rtl/cache/VX_cache_l3.sv | 628 +++++++++++++++++++++++++++ hw/rtl/cache/VX_cache_wrap_l3.sv | 21 +- sim/rtlsim/Makefile | 2 +- sim/rtlsim/processor.cpp | 246 +++++------ sim/rtlsim/processor_hbm.cpp | 656 +++++++++++++++++++++++++++++ 8 files changed, 1770 insertions(+), 146 deletions(-) create mode 100644 hw/rtl/cache/VX_cache_bypass_l3.sv create mode 100644 hw/rtl/cache/VX_cache_l3.sv create mode 100644 sim/rtlsim/processor_hbm.cpp diff --git a/hw/rtl/Vortex_hbm.sv b/hw/rtl/Vortex_hbm.sv index 253c325bb..d2ffc344d 100644 --- a/hw/rtl/Vortex_hbm.sv +++ b/hw/rtl/Vortex_hbm.sv @@ -209,12 +209,12 @@ module Vortex_hbm import VX_gpu_pkg::*; ( for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin if (mem_req_fire[i]) begin if (mem_req_rw[i]) - `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], mem_req_data[i])); + `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h, bank=%d\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], mem_req_data[i], i)); else - `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i])); + `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h, bank=%d\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], i)); end if (mem_rsp_fire[i]) begin - `TRACE(1, ("%d: MEM Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag[i], mem_rsp_data[i])); + `TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag[i], mem_rsp_data[i])); end end end diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 379d33e8a..18dfd50ad 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -250,7 +250,9 @@ module VX_cache_bypass #( end end + `IGNORE_UNUSED_BEGIN wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc; + `IGNORE_UNUSED_END VX_bits_remove #( .N (MEM_TAG_OUT_WIDTH), diff --git a/hw/rtl/cache/VX_cache_bypass_l3.sv b/hw/rtl/cache/VX_cache_bypass_l3.sv new file mode 100644 index 000000000..69393cfc6 --- /dev/null +++ b/hw/rtl/cache/VX_cache_bypass_l3.sv @@ -0,0 +1,355 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_cache_define.vh" + +module VX_cache_bypass_l3 #( + parameter NUM_REQS = 1, + parameter NUM_OUTPUTS = 1, + parameter TAG_SEL_IDX = 0, + + parameter PASSTHRU = 0, + parameter NC_ENABLE = 0, + + parameter WORD_SIZE = 1, + parameter LINE_SIZE = 1, + + parameter CORE_ADDR_WIDTH = 1, + + parameter CORE_TAG_WIDTH = 1, + + parameter MEM_ADDR_WIDTH = 1, + parameter MEM_TAG_IN_WIDTH = 1, + parameter MEM_TAG_OUT_WIDTH = 1, + + parameter UUID_WIDTH = 0, + + parameter CORE_OUT_BUF = 0, + parameter MEM_OUT_BUF = 0, + + parameter CORE_DATA_WIDTH = WORD_SIZE * 8 + ) ( + input wire clk, + input wire reset, + + // Core request in + VX_mem_bus_if.slave core_bus_in_if [NUM_REQS], + + // Core request out + VX_mem_bus_if.master core_bus_out_if [NUM_REQS], + + // Memory request in + VX_mem_bus_if.slave mem_bus_in_if, + + // Memory request out + VX_mem_bus_if.master mem_bus_out_if +); + localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1); + + localparam REQ_SEL_BITS = `CLOG2(NUM_REQS); + localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `ADDR_TYPE_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH; + + localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE; + localparam WSEL_BITS = `CLOG2(WORDS_PER_LINE); + + localparam CORE_TAG_ID_BITS = CORE_TAG_WIDTH - UUID_WIDTH; + localparam MEM_TAG_ID_BITS = REQ_SEL_BITS + WSEL_BITS + CORE_TAG_ID_BITS; + localparam MEM_TAG_BYPASS_BITS = UUID_WIDTH + MEM_TAG_ID_BITS; + + `STATIC_ASSERT(0 == (`IO_BASE_ADDR % `MEM_BLOCK_SIZE), ("invalid parameter")) + + // handle core requests /////////////////////////////////////////////////// + + wire core_req_nc_valid; + wire [NUM_REQS-1:0] core_req_nc_valids; + wire [NUM_REQS-1:0] core_req_nc_idxs; + wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx; + wire [NUM_REQS-1:0] core_req_nc_sel; + wire [NUM_REQS-1:0] core_req_nc_ready; + + for (genvar i = 0; i < NUM_REQS; ++i) begin + if (PASSTHRU != 0) begin + assign core_req_nc_idxs[i] = 1'b1; + end else if (NC_ENABLE) begin + assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO]; + end else begin + assign core_req_nc_idxs[i] = 1'b0; + end + assign core_req_nc_valids[i] = core_bus_in_if[i].req_valid && core_req_nc_idxs[i]; + end + + /* + + VX_generic_arbiter #( + .NUM_REQS (NUM_REQS), + .TYPE (PASSTHRU ? "R" : "P") + ) core_req_nc_arb ( + .clk (clk), + .reset (reset), + .requests (core_req_nc_valids), + .grant_index (core_req_nc_idx), + .grant_onehot (core_req_nc_sel), + .grant_valid (core_req_nc_valid), + .grant_ready (core_req_nc_ready) + ); + */ + + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && ~core_req_nc_idxs[i]; + assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data; + assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i]) + : core_bus_out_if[i].req_ready; + end + + // handle memory requests ///////////////////////////////////////////////// + + wire [NUM_OUTPUTS-1:0] mem_req_out_valid; + wire [NUM_OUTPUTS-1:0] mem_req_out_rw; + wire [NUM_OUTPUTS-1:0][LINE_SIZE-1:0] mem_req_out_byteen; + wire [NUM_OUTPUTS-1:0][`CS_MEM_ADDR_WIDTH-1:0] mem_req_out_addr; + wire [NUM_OUTPUTS-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_out_atype; + wire [NUM_OUTPUTS-1:0][`CS_LINE_WIDTH-1:0] mem_req_out_data; + wire [NUM_OUTPUTS-1:0][MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag; + wire [NUM_OUTPUTS-1:0] mem_req_out_ready; + + wire [NUM_REQS-1:0] core_req_nc_sel_rw; + wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_nc_sel_byteen; + wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr; + wire [NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] core_req_nc_sel_atype; + wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_nc_sel_data; + wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_nc_sel_tag; + + wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in; + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_req_nc_mux_in[i] = { + core_bus_in_if[i].req_data.rw, + core_bus_in_if[i].req_data.byteen, + core_bus_in_if[i].req_data.addr, + core_bus_in_if[i].req_data.atype, + core_bus_in_if[i].req_data.data, + core_bus_in_if[i].req_data.tag + }; + end + + assign { + core_req_nc_sel_rw, + core_req_nc_sel_byteen, + core_req_nc_sel_addr, + core_req_nc_sel_atype, + core_req_nc_sel_data, + core_req_nc_sel_tag + } = core_req_nc_mux_in; + + assign core_req_nc_ready = ~mem_bus_in_if.req_valid && mem_req_out_ready; + + assign mem_req_out_valid = mem_bus_in_if.req_valid || core_req_nc_valid; + assign mem_req_out_rw = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.rw : core_req_nc_sel_rw; + assign mem_req_out_addr = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.addr : core_req_nc_sel_addr[WSEL_BITS +: MEM_ADDR_WIDTH]; + assign mem_req_out_atype = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.atype : core_req_nc_sel_atype; + + wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass; + + wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0]; + + if (WORDS_PER_LINE > 1) begin + reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_r; + reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r; + + wire [WSEL_BITS-1:0] req_wsel = core_req_nc_sel_addr[WSEL_BITS-1:0]; + + always @(*) begin + mem_req_byteen_in_r = '0; + mem_req_byteen_in_r[req_wsel] = core_req_nc_sel_byteen; + + mem_req_data_in_r = 'x; + mem_req_data_in_r[req_wsel] = core_req_nc_sel_data; + end + + assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_r; + assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_r; + if (NUM_REQS > 1) begin + assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id}); + end else begin + assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id}); + end + end else begin + assign mem_req_out_byteen = mem_bus_in_if[0].req_valid ? mem_bus_in_if[0].req_data.byteen : core_req_nc_sel_byteen; + assign mem_req_out_data = mem_bus_in_if[0].req_valid ? mem_bus_in_if[0].req_data.data : core_req_nc_sel_data; + if (NUM_REQS > 1) begin + assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, core_req_in_id}); + end else begin + assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_in_id}); + end + end + + wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass; + + if (UUID_WIDTH != 0) begin + assign mem_req_tag_bypass = {core_req_nc_sel_tag[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass}; + end else begin + assign mem_req_tag_bypass = mem_req_tag_id_bypass; + end + + if (PASSTHRU != 0) begin + assign mem_req_out_tag = mem_req_tag_bypass; + `UNUSED_VAR (mem_bus_in_if[0].req_data.tag) + end else begin + if (NC_ENABLE) begin + VX_bits_insert #( + .N (MEM_TAG_OUT_WIDTH-1), + .S (1), + .POS (TAG_SEL_IDX) + ) mem_req_tag_in_nc_insert ( + .data_in (mem_bus_in_if[0].req_valid ? (MEM_TAG_OUT_WIDTH-1)'(mem_bus_in_if[0].req_data.tag) : (MEM_TAG_OUT_WIDTH-1)'(mem_req_tag_bypass)), + .ins_in (~mem_bus_in_if[0].req_valid), + .data_out (mem_req_out_tag) + ); + end else begin + assign mem_req_out_tag = mem_bus_in_if[0].req_data.tag; + end + end + + assign mem_bus_in_if[0].req_ready = mem_req_out_ready; + + VX_elastic_buffer #( + .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `ADDR_TYPE_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH), + .SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), + .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) + ) mem_req_buf ( + .clk (clk), + .reset (reset), + .valid_in (mem_req_out_valid), + .ready_in (mem_req_out_ready), + .data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}), + .data_out ({mem_bus_out_if[0].req_data.rw, mem_bus_out_if[0].req_data.byteen, mem_bus_out_if[0].req_data.addr, mem_bus_out_if[0].req_data.atype, mem_bus_out_if[0].req_data.data, mem_bus_out_if[0].req_data.tag}), + .valid_out (mem_bus_out_if[0].req_valid), + .ready_out (mem_bus_out_if[0].req_ready) + ); + + // handle core responses ////////////////////////////////////////////////// + + wire [NUM_REQS-1:0] core_rsp_in_valid; + wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_in_data; + wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_in_tag; + wire [NUM_REQS-1:0] core_rsp_in_ready; + + wire is_mem_rsp_nc; + if (PASSTHRU != 0) begin + assign is_mem_rsp_nc = mem_bus_out_if[0].rsp_valid; + end else begin + if (NC_ENABLE) begin + assign is_mem_rsp_nc = mem_bus_out_if[0].rsp_valid && mem_bus_out_if[0].rsp_data.tag[TAG_SEL_IDX]; + end else begin + assign is_mem_rsp_nc = 1'b0; + end + end + + wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc; + + VX_bits_remove #( + .N (MEM_TAG_OUT_WIDTH), + .S (NC_ENABLE), + .POS (TAG_SEL_IDX) + ) mem_rsp_tag_in_nc_remove ( + .data_in (mem_bus_out_if[0].rsp_data.tag), + .data_out (mem_rsp_tag_id_nc) + ); + + wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx; + if (NUM_REQS > 1) begin + assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS]; + end else begin + assign rsp_idx = 1'b0; + end + + reg [NUM_REQS-1:0] rsp_nc_valid_r; + always @(*) begin + rsp_nc_valid_r = '0; + rsp_nc_valid_r[rsp_idx] = is_mem_rsp_nc; + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i]; + assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; + end + + if (WORDS_PER_LINE > 1) begin + wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS]; + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? + core_bus_out_if[i].rsp_data.data : mem_bus_out_if[0].rsp_data.data[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH]; + end + end else begin + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.data : mem_bus_out_if[0].rsp_data.data; + end + end + + wire [(CORE_TAG_ID_BITS + UUID_WIDTH)-1:0] mem_rsp_tag_in_nc2; + if (UUID_WIDTH != 0) begin + assign mem_rsp_tag_in_nc2 = {mem_rsp_tag_id_nc[(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1 -: UUID_WIDTH], mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]}; + end else begin + assign mem_rsp_tag_in_nc2 = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]; + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin + if (PASSTHRU) begin + assign core_rsp_in_tag[i] = mem_rsp_tag_in_nc2; + end else if (NC_ENABLE) begin + assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.tag : mem_rsp_tag_in_nc2; + end else begin + assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_data.tag; + end + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin + VX_elastic_buffer #( + .DATAW (`CS_WORD_WIDTH + CORE_TAG_WIDTH), + .SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), + .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) + ) core_rsp_buf ( + .clk (clk), + .reset (reset), + .valid_in (core_rsp_in_valid[i]), + .ready_in (core_rsp_in_ready[i]), + .data_in ({core_rsp_in_data[i], core_rsp_in_tag[i]}), + .data_out ({core_bus_in_if[i].rsp_data.data, core_bus_in_if[i].rsp_data.tag}), + .valid_out (core_bus_in_if[i].rsp_valid), + .ready_out (core_bus_in_if[i].rsp_ready) + ); + end + + // handle memory responses //////////////////////////////////////////////// + + if (PASSTHRU != 0) begin + assign mem_bus_in_if[0].rsp_valid = 1'b0; + assign mem_bus_in_if[0].rsp_data.data = '0; + assign mem_bus_in_if[0].rsp_data.tag = '0; + end else if (NC_ENABLE) begin + assign mem_bus_in_if[0].rsp_valid = mem_bus_out_if[0].rsp_valid && ~mem_bus_out_if[0].rsp_data.tag[TAG_SEL_IDX]; + assign mem_bus_in_if[0].rsp_data.data = mem_bus_out_if[0].rsp_data.data; + assign mem_bus_in_if[0].rsp_data.tag = mem_rsp_tag_id_nc[MEM_TAG_IN_WIDTH-1:0]; + end else begin + assign mem_bus_in_if[0].rsp_valid = mem_bus_out_if[0].rsp_valid; + assign mem_bus_in_if[0].rsp_data.data = mem_bus_out_if[0].rsp_data.data; + assign mem_bus_in_if[0].rsp_data.tag = mem_rsp_tag_id_nc; + end + + wire [NUM_REQS-1:0] core_rsp_out_valid; + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_rsp_out_valid[i] = core_bus_out_if[i].rsp_valid; + end + + assign mem_bus_out_if[0].rsp_ready = is_mem_rsp_nc ? (~core_rsp_out_valid[rsp_idx] && core_rsp_in_ready[rsp_idx]) : mem_bus_in_if[0].rsp_ready; + +endmodule diff --git a/hw/rtl/cache/VX_cache_l3.sv b/hw/rtl/cache/VX_cache_l3.sv new file mode 100644 index 000000000..326a4fc65 --- /dev/null +++ b/hw/rtl/cache/VX_cache_l3.sv @@ -0,0 +1,628 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_cache_define.vh" + +module VX_cache_l3 import VX_gpu_pkg::*; #( + parameter `STRING INSTANCE_ID = "", + + // Number of Word requests per cycle + parameter NUM_REQS = 4, + + // Size of cache in bytes + parameter CACHE_SIZE = 4096, + // Size of line inside a bank in bytes + parameter LINE_SIZE = 64, + // Number of banks + parameter NUM_BANKS = 1, + // Number of memory ports + parameter NUM_MEM_PORTS = 1, + // Number of associative ways + parameter NUM_WAYS = 1, + // Size of a word in bytes + parameter WORD_SIZE = `XLEN/8, + + // Core Response Queue Size + parameter CRSQ_SIZE = 2, + // Miss Reserv Queue Knob + parameter MSHR_SIZE = 8, + // Memory Response Queue Size + parameter MRSQ_SIZE = 0, + // Memory Request Queue Size + parameter MREQ_SIZE = 4, + + // Enable cache writeable + parameter WRITE_ENABLE = 1, + + // Enable cache writeback + parameter WRITEBACK = 0, + + // Enable dirty bytes on writeback + parameter DIRTY_BYTES = 0, + + // Request debug identifier + parameter UUID_WIDTH = 0, + + // core request tag size + parameter TAG_WIDTH = UUID_WIDTH + 1, + + // Core response output register + parameter CORE_OUT_BUF = 0, + + // Memory request output register + parameter MEM_OUT_BUF = 0 + ) ( + // PERF +`ifdef PERF_ENABLE + output cache_perf_t cache_perf, +`endif + + input wire clk, + input wire reset, + + VX_mem_bus_if.slave core_bus_if [NUM_REQS], + VX_mem_bus_if.master mem_bus_if [NUM_MEM_PORTS] +); + + `STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter: number of banks must be power of 2")) + `STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter: writeback requires write enable")) + `STATIC_ASSERT(WRITEBACK || !DIRTY_BYTES, ("invalid parameter: dirty bytes require writeback")) + + // In writeback mode, memory fill response may issue a new memory request to handle evicted blocks. + // We need to ensure that the memory request queue never fills up to avoid deadlock. + `STATIC_ASSERT(!WRITEBACK || (MREQ_SIZE >= MSHR_SIZE), ("invalid parameter: writeback requires MREQ_SIZE >= MSHR_SIZE")) + + localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS); + localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS); + localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); + localparam MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS; + localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE; + localparam WORD_WIDTH = WORD_SIZE * 8; + localparam WORD_SEL_BITS = `CLOG2(WORDS_PER_LINE); + localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); + localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); + localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS); + localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1; + localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH; + + localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1); + localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1); + + localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0; + +`ifdef PERF_ENABLE + wire [NUM_BANKS-1:0] perf_read_miss_per_bank; + wire [NUM_BANKS-1:0] perf_write_miss_per_bank; + wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank; +`endif + + VX_mem_bus_if #( + .DATA_SIZE (WORD_SIZE), + .TAG_WIDTH (TAG_WIDTH) + ) core_bus2_if[NUM_REQS](); + + wire [NUM_BANKS-1:0] per_bank_flush_begin; + wire [NUM_BANKS-1:0] per_bank_flush_end; + + wire [NUM_BANKS-1:0] per_bank_core_req_fire; + + VX_cache_flush #( + .NUM_REQS (NUM_REQS), + .NUM_BANKS (NUM_BANKS), + .BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency + ) flush_unit ( + .clk (clk), + .reset (reset), + .core_bus_in_if (core_bus_if), + .core_bus_out_if (core_bus2_if), + .bank_req_fire (per_bank_core_req_fire), + .flush_begin (per_bank_flush_begin), + .flush_end (per_bank_flush_end) + ); + + /////////////////////////////////////////////////////////////////////////// + + // Core response buffering + wire [NUM_REQS-1:0] core_rsp_valid_s; + wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data_s; + wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s; + wire [NUM_REQS-1:0] core_rsp_ready_s; + + `RESET_RELAY_EX (core_rsp_reset, reset, NUM_REQS, `MAX_FANOUT); + + for (genvar i = 0; i < NUM_REQS; ++i) begin + + VX_elastic_buffer #( + .DATAW (`CS_WORD_WIDTH + TAG_WIDTH), + .SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), + .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) + ) core_rsp_buf ( + .clk (clk), + .reset (core_rsp_reset[i]), + .valid_in (core_rsp_valid_s[i]), + .ready_in (core_rsp_ready_s[i]), + .data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}), + .data_out ({core_bus2_if[i].rsp_data.data, core_bus2_if[i].rsp_data.tag}), + .valid_out (core_bus2_if[i].rsp_valid), + .ready_out (core_bus2_if[i].rsp_ready) + ); + end + + /////////////////////////////////////////////////////////////////////////// + + // Memory request buffering + wire [NUM_MEM_PORTS-1:0] mem_req_valid_s; + wire [NUM_MEM_PORTS-1:0][`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_s; + wire [NUM_MEM_PORTS-1:0] mem_req_rw_s; + wire [NUM_MEM_PORTS-1:0][LINE_SIZE-1:0] mem_req_byteen_s; + wire [NUM_MEM_PORTS-1:0][`CS_LINE_WIDTH-1:0] mem_req_data_s; + wire [NUM_MEM_PORTS-1:0][MEM_TAG_WIDTH-1:0] mem_req_tag_s; + wire [NUM_MEM_PORTS-1:0] mem_req_flush_s; + wire [NUM_MEM_PORTS-1:0] mem_req_ready_s; + + wire [NUM_MEM_PORTS-1:0] mem_bus_if_flush; + + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + VX_elastic_buffer #( + .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), + .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), + .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) + ) mem_req_buf ( + .clk (clk), + .reset (reset), + .valid_in (mem_req_valid_s[i]), + .ready_in (mem_req_ready_s[i]), + .data_in ({mem_req_rw_s[i], mem_req_byteen_s[i], mem_req_addr_s[i], mem_req_data_s[i], mem_req_tag_s[i], mem_req_flush_s[i]}), + .data_out ({mem_bus_if[i].req_data.rw, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.data, mem_bus_if[i].req_data.tag, mem_bus_if_flush[i]}), + .valid_out (mem_bus_if[i].req_valid), + .ready_out (mem_bus_if[i].req_ready) + ); + + assign mem_bus_if[i].req_data.atype = mem_bus_if_flush[i] ? `ADDR_TYPE_WIDTH'(1 << `ADDR_TYPE_FLUSH) : '0; + + end + + /////////////////////////////////////////////////////////////////////////// + + // Memory response buffering + wire [NUM_MEM_PORTS-1:0] mem_rsp_valid_s; + wire [NUM_MEM_PORTS-1:0][`CS_LINE_WIDTH-1:0] mem_rsp_data_s; + wire [NUM_MEM_PORTS-1:0][MEM_TAG_WIDTH-1:0] mem_rsp_tag_s; + wire [NUM_MEM_PORTS-1:0] mem_rsp_ready_s; + + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + VX_elastic_buffer #( + .DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH), + .SIZE (MRSQ_SIZE), + .OUT_REG (MRSQ_SIZE > 2) + ) mem_rsp_queue ( + .clk (clk), + .reset (reset), + .valid_in (mem_bus_if[i].rsp_valid), + .ready_in (mem_bus_if[i].rsp_ready), + .data_in ({mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data}), + .data_out ({mem_rsp_tag_s[i], mem_rsp_data_s[i]}), + .valid_out (mem_rsp_valid_s[i]), + .ready_out (mem_rsp_ready_s[i]) + ); + end + + /////////////////////////////////////////////////////////////////////////// + + wire [NUM_BANKS-1:0] per_bank_core_req_valid; + wire [NUM_BANKS-1:0][`CS_LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr; + wire [NUM_BANKS-1:0] per_bank_core_req_rw; + wire [NUM_BANKS-1:0][WORD_SEL_WIDTH-1:0] per_bank_core_req_wsel; + wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen; + wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data; + wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag; + wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx; + wire [NUM_BANKS-1:0] per_bank_core_req_flush; + wire [NUM_BANKS-1:0] per_bank_core_req_ready; + + wire [NUM_BANKS-1:0] per_bank_core_rsp_valid; + wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_rsp_data; + wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_rsp_tag; + wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_rsp_idx; + wire [NUM_BANKS-1:0] per_bank_core_rsp_ready; + + wire [NUM_BANKS-1:0] per_bank_mem_req_valid; + wire [NUM_BANKS-1:0][`CS_MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr; + wire [NUM_BANKS-1:0] per_bank_mem_req_rw; + wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen; + wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data; + wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id; + wire [NUM_BANKS-1:0] per_bank_mem_req_flush; + wire [NUM_BANKS-1:0] per_bank_mem_req_ready; + + wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready; + + assign per_bank_core_req_fire = per_bank_core_req_valid & per_bank_mem_req_ready; + + if (NUM_BANKS == 1) begin + assign mem_rsp_ready_s = per_bank_mem_rsp_ready; + end else begin + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + assign mem_rsp_ready_s[i] = per_bank_mem_rsp_ready[`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s[i])]; + end + end + + // Bank requests dispatch + + wire [NUM_REQS-1:0] core_req_valid; + wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr; + wire [NUM_REQS-1:0] core_req_rw; + wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen; + wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data; + wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag; + wire [NUM_REQS-1:0] core_req_flush; + wire [NUM_REQS-1:0] core_req_ready; + + wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr; + wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] core_req_bid; + wire [NUM_REQS-1:0][WORD_SEL_WIDTH-1:0] core_req_wsel; + + wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in; + wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out; + + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_req_valid[i] = core_bus2_if[i].req_valid; + assign core_req_rw[i] = core_bus2_if[i].req_data.rw; + assign core_req_byteen[i] = core_bus2_if[i].req_data.byteen; + assign core_req_addr[i] = core_bus2_if[i].req_data.addr; + assign core_req_data[i] = core_bus2_if[i].req_data.data; + assign core_req_tag[i] = core_bus2_if[i].req_data.tag; + assign core_req_flush[i] = core_bus2_if[i].req_data.atype[`ADDR_TYPE_FLUSH]; + assign core_bus2_if[i].req_ready = core_req_ready[i]; + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin + if (WORDS_PER_LINE > 1) begin + assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS]; + end else begin + assign core_req_wsel[i] = '0; + end + assign core_req_line_addr[i] = core_req_addr[i][(BANK_SEL_BITS + WORD_SEL_BITS) +: LINE_ADDR_WIDTH]; + end + + if (NUM_BANKS > 1) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_req_bid[i] = core_req_addr[i][WORD_SEL_BITS +: BANK_SEL_BITS]; + end + end else begin + assign core_req_bid = '0; + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign core_req_data_in[i] = { + core_req_line_addr[i], + core_req_rw[i], + core_req_wsel[i], + core_req_byteen[i], + core_req_data[i], + core_req_tag[i], + core_req_flush[i] + }; + end + +`ifdef PERF_ENABLE + wire [`PERF_CTR_BITS-1:0] perf_collisions; +`endif + + `RESET_RELAY (req_xbar_reset, reset); + + VX_stream_xbar #( + .NUM_INPUTS (NUM_REQS), + .NUM_OUTPUTS (NUM_BANKS), + .DATAW (CORE_REQ_DATAW), + .PERF_CTR_BITS (`PERF_CTR_BITS), + .ARBITER ("F"), + .OUT_BUF (REQ_XBAR_BUF) + ) req_xbar ( + .clk (clk), + .reset (req_xbar_reset), + `ifdef PERF_ENABLE + .collisions(perf_collisions), + `else + `UNUSED_PIN(collisions), + `endif + .valid_in (core_req_valid), + .data_in (core_req_data_in), + .sel_in (core_req_bid), + .ready_in (core_req_ready), + .valid_out (per_bank_core_req_valid), + .data_out (core_req_data_out), + .sel_out (per_bank_core_req_idx), + .ready_out (per_bank_core_req_ready) + ); + + for (genvar i = 0; i < NUM_BANKS; ++i) begin + assign { + per_bank_core_req_addr[i], + per_bank_core_req_rw[i], + per_bank_core_req_wsel[i], + per_bank_core_req_byteen[i], + per_bank_core_req_data[i], + per_bank_core_req_tag[i], + per_bank_core_req_flush[i] + } = core_req_data_out[i]; + end + + // Banks access + for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks + wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr; + wire curr_bank_mem_rsp_valid; + + if (NUM_BANKS == 1) begin + assign curr_bank_mem_rsp_valid = mem_rsp_valid_s; + end else begin + assign curr_bank_mem_rsp_valid = mem_rsp_valid_s[bank_id] && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s[bank_id]) == bank_id); + end + + `RESET_RELAY (bank_reset, reset); + + VX_cache_bank #( + .BANK_ID (bank_id), + .INSTANCE_ID ($sformatf("%s-bank%0d", INSTANCE_ID, bank_id)), + .CACHE_SIZE (CACHE_SIZE), + .LINE_SIZE (LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .NUM_WAYS (NUM_WAYS), + .WORD_SIZE (WORD_SIZE), + .NUM_REQS (NUM_REQS), + .CRSQ_SIZE (CRSQ_SIZE), + .MSHR_SIZE (MSHR_SIZE), + .MREQ_SIZE (MREQ_SIZE), + .WRITE_ENABLE (WRITE_ENABLE), + .DIRTY_BYTES (DIRTY_BYTES), + .WRITEBACK (WRITEBACK), + .UUID_WIDTH (UUID_WIDTH), + .TAG_WIDTH (TAG_WIDTH), + .CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF), + .MEM_OUT_BUF (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_BUF) + ) bank ( + .clk (clk), + .reset (bank_reset), + + `ifdef PERF_ENABLE + .perf_read_misses (perf_read_miss_per_bank[bank_id]), + .perf_write_misses (perf_write_miss_per_bank[bank_id]), + .perf_mshr_stalls (perf_mshr_stall_per_bank[bank_id]), + `endif + + // Core request + .core_req_valid (per_bank_core_req_valid[bank_id]), + .core_req_addr (per_bank_core_req_addr[bank_id]), + .core_req_rw (per_bank_core_req_rw[bank_id]), + .core_req_wsel (per_bank_core_req_wsel[bank_id]), + .core_req_byteen (per_bank_core_req_byteen[bank_id]), + .core_req_data (per_bank_core_req_data[bank_id]), + .core_req_tag (per_bank_core_req_tag[bank_id]), + .core_req_idx (per_bank_core_req_idx[bank_id]), + .core_req_flush (per_bank_core_req_flush[bank_id]), + .core_req_ready (per_bank_core_req_ready[bank_id]), + + // Core response + .core_rsp_valid (per_bank_core_rsp_valid[bank_id]), + .core_rsp_data (per_bank_core_rsp_data[bank_id]), + .core_rsp_tag (per_bank_core_rsp_tag[bank_id]), + .core_rsp_idx (per_bank_core_rsp_idx[bank_id]), + .core_rsp_ready (per_bank_core_rsp_ready[bank_id]), + + // Memory request + .mem_req_valid (per_bank_mem_req_valid[bank_id]), + .mem_req_addr (curr_bank_mem_req_addr), + .mem_req_rw (per_bank_mem_req_rw[bank_id]), + .mem_req_byteen (per_bank_mem_req_byteen[bank_id]), + .mem_req_data (per_bank_mem_req_data[bank_id]), + .mem_req_id (per_bank_mem_req_id[bank_id]), + .mem_req_flush (per_bank_mem_req_flush[bank_id]), + .mem_req_ready (per_bank_mem_req_ready[bank_id]), + + // Memory response + .mem_rsp_valid (curr_bank_mem_rsp_valid), + .mem_rsp_data (mem_rsp_data_s[bank_id]), + .mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s[bank_id])), + .mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]), + + .flush_begin (per_bank_flush_begin[bank_id]), + .flush_end (per_bank_flush_end[bank_id]) + ); + + if (NUM_BANKS == 1) begin + assign per_bank_mem_req_addr[bank_id] = curr_bank_mem_req_addr; + end else begin + assign per_bank_mem_req_addr[bank_id] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, bank_id); + end + end + + // Bank responses gather + + wire [NUM_BANKS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_in; + wire [NUM_REQS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_out; + + for (genvar i = 0; i < NUM_BANKS; ++i) begin + assign core_rsp_data_in[i] = {per_bank_core_rsp_data[i], per_bank_core_rsp_tag[i]}; + end + + `RESET_RELAY (rsp_xbar_reset, reset); + + VX_stream_xbar #( + .NUM_INPUTS (NUM_BANKS), + .NUM_OUTPUTS (NUM_REQS), + .DATAW (CORE_RSP_DATAW), + .ARBITER ("F") + ) rsp_xbar ( + .clk (clk), + .reset (rsp_xbar_reset), + `UNUSED_PIN (collisions), + .valid_in (per_bank_core_rsp_valid), + .data_in (core_rsp_data_in), + .sel_in (per_bank_core_rsp_idx), + .ready_in (per_bank_core_rsp_ready), + .valid_out (core_rsp_valid_s), + .data_out (core_rsp_data_out), + .ready_out (core_rsp_ready_s), + `UNUSED_PIN (sel_out) + ); + + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign {core_rsp_data_s[i], core_rsp_tag_s[i]} = core_rsp_data_out[i]; + end + + /////////////////////////////////////////////////////////////////////////// + + wire [NUM_MEM_PORTS-1:0] mem_req_valid_p; + wire [NUM_MEM_PORTS-1:0][`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p; + wire [NUM_MEM_PORTS-1:0] mem_req_rw_p; + wire [NUM_MEM_PORTS-1:0][LINE_SIZE-1:0] mem_req_byteen_p; + wire [NUM_MEM_PORTS-1:0][`CS_LINE_WIDTH-1:0] mem_req_data_p; + wire [NUM_MEM_PORTS-1:0][MEM_TAG_WIDTH-1:0] mem_req_tag_p; + wire [NUM_MEM_PORTS-1:0][MSHR_ADDR_WIDTH-1:0] mem_req_id_p; + wire [NUM_MEM_PORTS-1:0] mem_req_flush_p; + wire [NUM_MEM_PORTS-1:0] mem_req_ready_p; + + // Memory request arbitration + + wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in; + + for (genvar i = 0; i < NUM_BANKS; ++i) begin + assign data_in[i] = { + per_bank_mem_req_addr[i], + per_bank_mem_req_rw[i], + per_bank_mem_req_byteen[i], + per_bank_mem_req_data[i], + per_bank_mem_req_id[i], + per_bank_mem_req_flush[i] + }; + end + + VX_stream_arb #( + .NUM_INPUTS (NUM_BANKS), + .NUM_OUTPUTS (NUM_MEM_PORTS), + .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1), + .ARBITER ("F") + ) mem_req_arb ( + .clk (clk), + .reset (reset), + .valid_in (per_bank_mem_req_valid), + .ready_in (per_bank_mem_req_ready), + .data_in (data_in), + .data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p, mem_req_flush_p}), + .valid_out (mem_req_valid_p), + .ready_out (mem_req_ready_p), + `UNUSED_PIN (sel_out) + ); + + if (NUM_BANKS > 1) begin + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p[i]); + assign mem_req_tag_p[i] = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p[i]}); + end + end else begin + assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p); + end + + // Memory request multi-port handling + + assign mem_req_valid_s = mem_req_valid_p; + assign mem_req_addr_s = mem_req_addr_p; + assign mem_req_tag_s = mem_req_tag_p; + assign mem_req_flush_s = mem_req_flush_p; + assign mem_req_ready_p = mem_req_ready_s; + + if (WRITE_ENABLE != 0) begin + assign mem_req_rw_s = mem_req_rw_p; + assign mem_req_byteen_s = mem_req_byteen_p; + assign mem_req_data_s = mem_req_data_p; + end else begin + `UNUSED_VAR (mem_req_byteen_p) + `UNUSED_VAR (mem_req_data_p) + `UNUSED_VAR (mem_req_rw_p) + + assign mem_req_rw_s = 0; + assign mem_req_byteen_s = {LINE_SIZE{1'b1}}; + assign mem_req_data_s = '0; + end + +`ifdef PERF_ENABLE + // per cycle: core_reads, core_writes + wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle; + wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle; + + wire [NUM_REQS-1:0] perf_core_reads_per_req; + wire [NUM_REQS-1:0] perf_core_writes_per_req; + + // per cycle: read misses, write misses, msrq stalls, pipeline stalls + wire [`CLOG2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle; + wire [`CLOG2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle; + wire [`CLOG2(NUM_BANKS+1)-1:0] perf_mshr_stall_per_cycle; + wire [`CLOG2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle; + + `BUFFER(perf_core_reads_per_req, core_req_valid & core_req_ready & ~core_req_rw); + `BUFFER(perf_core_writes_per_req, core_req_valid & core_req_ready & core_req_rw); + + `POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_req); + `POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_req); + `POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank); + `POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank); + `POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank); + + wire [NUM_REQS-1:0] perf_crsp_stall_per_req; + for (genvar i = 0; i < NUM_REQS; ++i) begin + assign perf_crsp_stall_per_req[i] = core_bus2_if[i].rsp_valid && ~core_bus2_if[i].rsp_ready; + end + + `POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req); + + wire perf_mem_stall_per_cycle = mem_bus_if[0].req_valid && ~mem_bus_if[0].req_ready; + + reg [`PERF_CTR_BITS-1:0] perf_core_reads; + reg [`PERF_CTR_BITS-1:0] perf_core_writes; + reg [`PERF_CTR_BITS-1:0] perf_read_misses; + reg [`PERF_CTR_BITS-1:0] perf_write_misses; + reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls; + reg [`PERF_CTR_BITS-1:0] perf_mem_stalls; + reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls; + + always @(posedge clk) begin + if (reset) begin + perf_core_reads <= '0; + perf_core_writes <= '0; + perf_read_misses <= '0; + perf_write_misses <= '0; + perf_mshr_stalls <= '0; + perf_mem_stalls <= '0; + perf_crsp_stalls <= '0; + end else begin + perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle); + perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle); + perf_read_misses <= perf_read_misses + `PERF_CTR_BITS'(perf_read_miss_per_cycle); + perf_write_misses <= perf_write_misses + `PERF_CTR_BITS'(perf_write_miss_per_cycle); + perf_mshr_stalls <= perf_mshr_stalls + `PERF_CTR_BITS'(perf_mshr_stall_per_cycle); + perf_mem_stalls <= perf_mem_stalls + `PERF_CTR_BITS'(perf_mem_stall_per_cycle); + perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle); + end + end + + assign cache_perf.reads = perf_core_reads; + assign cache_perf.writes = perf_core_writes; + assign cache_perf.read_misses = perf_read_misses; + assign cache_perf.write_misses = perf_write_misses; + assign cache_perf.bank_stalls = perf_collisions; + assign cache_perf.mshr_stalls = perf_mshr_stalls; + assign cache_perf.mem_stalls = perf_mem_stalls; + assign cache_perf.crsp_stalls = perf_crsp_stalls; +`endif + +endmodule diff --git a/hw/rtl/cache/VX_cache_wrap_l3.sv b/hw/rtl/cache/VX_cache_wrap_l3.sv index 9a8f1688f..403edf554 100644 --- a/hw/rtl/cache/VX_cache_wrap_l3.sv +++ b/hw/rtl/cache/VX_cache_wrap_l3.sv @@ -95,6 +95,8 @@ module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( localparam NC_OR_BYPASS = (NC_ENABLE || PASSTHRU); + localparam NUM_REQS_P = NUM_REQS / NUM_MEM_PORTS; + VX_mem_bus_if #( .DATA_SIZE (WORD_SIZE), .TAG_WIDTH (TAG_WIDTH) @@ -108,9 +110,13 @@ module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( if (NC_OR_BYPASS) begin `RESET_RELAY (nc_bypass_reset, reset); - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + + localparam SLICE_BEGIN = i * NUM_REQS_P; + localparam SLICE_END = SLICE_BEGIN + NUM_REQS_P; + VX_cache_bypass #( - .NUM_REQS (NUM_REQS), + .NUM_REQS (NUM_REQS_P), .TAG_SEL_IDX (TAG_SEL_IDX), .PASSTHRU (PASSTHRU), @@ -134,13 +140,13 @@ module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( .clk (clk), .reset (nc_bypass_reset), - .core_bus_in_if (core_bus_if), - .core_bus_out_if(core_bus_cache_if), + .core_bus_in_if (core_bus_if[SLICE_END-1:SLICE_BEGIN]), + .core_bus_out_if(core_bus_cache_if[SLICE_END-1:SLICE_BEGIN]), .mem_bus_in_if (mem_bus_cache_if[i]), .mem_bus_out_if (mem_bus_if[i]) ); - end + end end else begin @@ -183,11 +189,12 @@ module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( `RESET_RELAY (cache_reset, reset); - VX_cache #( + VX_cache_l3 #( .INSTANCE_ID (INSTANCE_ID), .CACHE_SIZE (CACHE_SIZE), .LINE_SIZE (LINE_SIZE), .NUM_BANKS (NUM_BANKS), + .NUM_MEM_PORTS (NUM_MEM_PORTS), .NUM_WAYS (NUM_WAYS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), @@ -209,7 +216,7 @@ module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( .cache_perf (cache_perf), `endif .core_bus_if (core_bus_cache_if), - .mem_bus_if (mem_bus_cache_if[0]) + .mem_bus_if (mem_bus_cache_if) ); end diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 197078813..9ddccc19d 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -37,7 +37,7 @@ RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interface SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp -SRCS += $(SRC_DIR)/processor.cpp +SRCS += $(SRC_DIR)/processor_hbm.cpp ifdef AXI_BUS TOP = Vortex_axi diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index 7c812f7e8..e5e00f49e 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -17,8 +17,8 @@ #include "VVortex_axi.h" typedef VVortex_axi Device; #else -#include "VVortex_hbm.h" -typedef VVortex_hbm Device; +#include "VVortex.h" +typedef VVortex Device; #endif #ifdef VCD_OUTPUT @@ -123,15 +123,6 @@ public: tfp_->open("trace.vcd"); #endif - pending_mem_reqs_.resize(NUM_MEM_PORTS); - dram_queue_.resize(NUM_MEM_PORTS); - - mem_rd_rsp_active_.resize(NUM_MEM_PORTS); - mem_rd_rsp_ready_.resize(NUM_MEM_PORTS); - - mem_wr_rsp_active_.resize(NUM_MEM_PORTS); - mem_wr_rsp_ready_.resize(NUM_MEM_PORTS); - ram_ = nullptr; #ifndef NDEBUG @@ -219,19 +210,16 @@ private: print_bufs_.clear(); - for (int i = 0; i < NUM_MEM_PORTS; ++i) { + pending_mem_reqs_.clear(); - pending_mem_reqs_.at(i).clear(); - - { - std::queue empty; - std::swap(dram_queue_.at(i), empty); - } - - mem_rd_rsp_active_.at(i) = false; - mem_wr_rsp_active_.at(i) = false; + { + std::queue empty; + std::swap(dram_queue_, empty); } + mem_rd_rsp_active_ = false; + mem_wr_rsp_active_ = false; + this->mem_bus_reset(); this->dcr_bus_reset(); @@ -262,19 +250,17 @@ private: dram_sim_.tick(); - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - if (!dram_queue_.at(i).empty()) { - auto mem_req = dram_queue_.at(i).front(); - if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) { - auto orig_req = reinterpret_cast(arg); - if (orig_req->ready) { - delete orig_req; - } else { - orig_req->ready = true; - } - }, mem_req)) { - dram_queue_.at(i).pop(); + if (!dram_queue_.empty()) { + auto mem_req = dram_queue_.front(); + if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) { + auto orig_req = reinterpret_cast(arg); + if (orig_req->ready) { + delete orig_req; + } else { + orig_req->ready = true; } + }, mem_req)) { + dram_queue_.pop(); } } @@ -451,126 +437,116 @@ private: #else void mem_bus_reset() { - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - device_->mem_req_ready[i] = 0; - device_->mem_rsp_valid[i] = 0; - } + device_->mem_req_ready = 0; + device_->mem_rsp_valid = 0; } void mem_bus_eval(bool clk) { - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - if (!clk) { - mem_rd_rsp_ready_.at(i) = device_->mem_rsp_ready[i]; - return; - } + if (!clk) { + mem_rd_rsp_ready_ = device_->mem_rsp_ready; + return; } - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - if (ram_ == nullptr) { - device_->mem_req_ready[i] = 0; - return; - } + if (ram_ == nullptr) { + device_->mem_req_ready = 0; + return; } // process memory read responses - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - if (mem_rd_rsp_active_.at(i) - && device_->mem_rsp_valid[i] && mem_rd_rsp_ready_.at(i)) { - mem_rd_rsp_active_.at(i) = false; - } - if (!mem_rd_rsp_active_.at(i)) { - if (!pending_mem_reqs_.at(i).empty() - && (*pending_mem_reqs_.at(i).begin())->ready) { - device_->mem_rsp_valid[i] = 1; - auto mem_rsp_it = pending_mem_reqs_.at(i).begin(); - auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", mem_rsp->block[i]); - } - printf("\n"); - */ - memcpy(VDataCast::get(device_->mem_rsp_data[i]), mem_rsp->block.data(), MEM_BLOCK_SIZE); - device_->mem_rsp_tag[i] = mem_rsp->tag; - pending_mem_reqs_.at(i).erase(mem_rsp_it); - mem_rd_rsp_active_.at(i) = true; - delete mem_rsp; - } else { - device_->mem_rsp_valid[i] = 0; + if (mem_rd_rsp_active_ + && device_->mem_rsp_valid && mem_rd_rsp_ready_) { + mem_rd_rsp_active_ = false; + } + if (!mem_rd_rsp_active_) { + if (!pending_mem_reqs_.empty() + && (*pending_mem_reqs_.begin())->ready) { + device_->mem_rsp_valid = 1; + auto mem_rsp_it = pending_mem_reqs_.begin(); + auto mem_rsp = *mem_rsp_it; + /* + printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); + for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { + printf("%02x", mem_rsp->block[i]); } + printf("\n"); + */ + memcpy(VDataCast::get(device_->mem_rsp_data), mem_rsp->block.data(), MEM_BLOCK_SIZE); + device_->mem_rsp_tag = mem_rsp->tag; + pending_mem_reqs_.erase(mem_rsp_it); + mem_rd_rsp_active_ = true; + delete mem_rsp; + } else { + device_->mem_rsp_valid = 0; } } // process memory requests - for (int j = 0; j < NUM_MEM_PORTS; ++j) { - if (device_->mem_req_valid[j] && running_) { - uint64_t byte_addr = (device_->mem_req_addr[j] * MEM_BLOCK_SIZE); - if (device_->mem_req_rw[j]) { - auto byteen = device_->mem_req_byteen[j]; - auto data = VDataCast::get(device_->mem_req_data[j]); + if (device_->mem_req_valid && running_) { + uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE); + if (device_->mem_req_rw) { + auto byteen = device_->mem_req_byteen; + auto data = VDataCast::get(device_->mem_req_data); - if (byte_addr >= uint64_t(IO_COUT_ADDR) - && byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { - // process console output - for (int i = 0; i < IO_COUT_SIZE; i++) { - if ((byteen >> i) & 0x1) { - auto& ss_buf = print_bufs_[i]; - char c = data[i]; - ss_buf << c; - if (c == '\n') { - std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; - ss_buf.str(""); - } + if (byte_addr >= uint64_t(IO_COUT_ADDR) + && byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { + // process console output + for (int i = 0; i < IO_COUT_SIZE; i++) { + if ((byteen >> i) & 0x1) { + auto& ss_buf = print_bufs_[i]; + char c = data[i]; + ss_buf << c; + if (c == '\n') { + std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; + ss_buf.str(""); } } - } else { - // process writes - /* - printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr); - for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { - printf("%x", (int)((byteen >> (4 * i)) & 0xf)); - } - printf(", data=0x"); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%d=%02x,", i, data[i]); - } - printf("\n"); - */ - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - (*ram_)[byte_addr + i] = data[i]; - } - } - - auto mem_req = new mem_req_t(); - mem_req->tag = device_->mem_req_tag[j]; - mem_req->addr = byte_addr; - mem_req->write = true; - mem_req->ready = true; - - // send dram request - dram_queue_.at(j).push(mem_req); } } else { - // process reads - auto mem_req = new mem_req_t(); - mem_req->tag = device_->mem_req_tag[j]; - mem_req->addr = byte_addr; - mem_req->write = false; - mem_req->ready = false; - ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE); - pending_mem_reqs_.at(j).emplace_back(mem_req); + // process writes + /* + printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr); + for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { + printf("%x", (int)((byteen >> (4 * i)) & 0xf)); + } + printf(", data=0x"); + for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { + printf("%d=%02x,", i, data[i]); + } + printf("\n"); + */ + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + (*ram_)[byte_addr + i] = data[i]; + } + } - //printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag); + auto mem_req = new mem_req_t(); + mem_req->tag = device_->mem_req_tag; + mem_req->addr = byte_addr; + mem_req->write = true; + mem_req->ready = true; // send dram request - dram_queue_.at(j).push(mem_req); + dram_queue_.push(mem_req); } - } + } else { + // process reads + auto mem_req = new mem_req_t(); + mem_req->tag = device_->mem_req_tag; + mem_req->addr = byte_addr; + mem_req->write = false; + mem_req->ready = false; + ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE); + pending_mem_reqs_.emplace_back(mem_req); - device_->mem_req_ready[j] = running_; + //printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag); + + // send dram request + dram_queue_.push(mem_req); + } } + + device_->mem_req_ready = running_; } #endif @@ -607,9 +583,9 @@ private: std::unordered_map print_bufs_; - std::vector> pending_mem_reqs_; + std::list pending_mem_reqs_; - std::vector> dram_queue_; + std::queue dram_queue_; DramSim dram_sim_; @@ -621,11 +597,11 @@ private: RAM* ram_; - std::vector mem_rd_rsp_active_; - std::vector mem_rd_rsp_ready_; + bool mem_rd_rsp_active_; + bool mem_rd_rsp_ready_; - std::vector mem_wr_rsp_active_; - std::vector mem_wr_rsp_ready_; + bool mem_wr_rsp_active_; + bool mem_wr_rsp_ready_; bool running_; }; diff --git a/sim/rtlsim/processor_hbm.cpp b/sim/rtlsim/processor_hbm.cpp new file mode 100644 index 000000000..5f7bee7ee --- /dev/null +++ b/sim/rtlsim/processor_hbm.cpp @@ -0,0 +1,656 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "processor.h" + +#ifdef AXI_BUS +#include "VVortex_axi.h" +typedef VVortex_axi Device; +#else +#include "VVortex_hbm.h" +typedef VVortex_hbm Device; +#endif + +#ifdef VCD_OUTPUT +#include +#endif + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifndef MEMORY_BANKS + #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS + #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS + #else + #define MEMORY_BANKS 2 + #endif +#endif + +#ifndef MEM_CLOCK_RATIO +#define MEM_CLOCK_RATIO 1 +#endif + +#ifndef TRACE_START_TIME +#define TRACE_START_TIME 0ull +#endif + +#ifndef TRACE_STOP_TIME +#define TRACE_STOP_TIME -1ull +#endif + +#ifndef VERILATOR_RESET_VALUE +#define VERILATOR_RESET_VALUE 2 +#endif + +#if (XLEN == 32) +typedef uint32_t Word; +#elif (XLEN == 64) +typedef uint64_t Word; +#else +#error unsupported XLEN +#endif + +#define VL_WDATA_GETW(lwp, i, n, w) \ + VL_SEL_IWII(0, n * w, 0, 0, lwp, i * w, w) + +using namespace vortex; + +static uint64_t timestamp = 0; + +double sc_time_stamp() { + return timestamp; +} + +/////////////////////////////////////////////////////////////////////////////// + +static bool trace_enabled = false; +static uint64_t trace_start_time = TRACE_START_TIME; +static uint64_t trace_stop_time = TRACE_STOP_TIME; + +bool sim_trace_enabled() { + if (timestamp >= trace_start_time + && timestamp < trace_stop_time) + return true; + return trace_enabled; +} + +void sim_trace_enable(bool enable) { + trace_enabled = enable; +} + +/////////////////////////////////////////////////////////////////////////////// + +class Processor::Impl { +public: + Impl() : dram_sim_(MEM_CLOCK_RATIO) { + // force random values for unitialized signals + Verilated::randReset(VERILATOR_RESET_VALUE); + Verilated::randSeed(50); + + // turn off assertion before reset + Verilated::assertOn(false); + + // create RTL module instance + device_ = new Device(); + + #ifdef VCD_OUTPUT + Verilated::traceEverOn(true); + tfp_ = new VerilatedVcdC(); + device_->trace(tfp_, 99); + tfp_->open("trace.vcd"); + #endif + + pending_mem_reqs_.resize(NUM_MEM_PORTS); + dram_queue_.resize(NUM_MEM_PORTS); + + mem_rd_rsp_active_.resize(NUM_MEM_PORTS); + mem_rd_rsp_ready_.resize(NUM_MEM_PORTS); + + mem_wr_rsp_active_.resize(NUM_MEM_PORTS); + mem_wr_rsp_ready_.resize(NUM_MEM_PORTS); + + ram_ = nullptr; + + #ifndef NDEBUG + // dump device configuration + std::cout << "CONFIGS:" + << " num_threads=" << NUM_THREADS + << ", num_warps=" << NUM_WARPS + << ", num_cores=" << NUM_CORES + << ", num_clusters=" << NUM_CLUSTERS + << ", socket_size=" << SOCKET_SIZE + << ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec + << ", num_barriers=" << NUM_BARRIERS + << std::endl; + #endif + // reset the device + this->reset(); + + // Turn on assertion after reset + Verilated::assertOn(true); + } + + ~Impl() { + this->cout_flush(); + + #ifdef VCD_OUTPUT + tfp_->close(); + delete tfp_; + #endif + + delete device_; + } + + void cout_flush() { + for (auto& buf : print_bufs_) { + auto str = buf.second.str(); + if (!str.empty()) { + std::cout << "#" << buf.first << ": " << str << std::endl; + } + } + } + + void attach_ram(RAM* ram) { + ram_ = ram; + } + + void run() { + + #ifndef NDEBUG + std::cout << std::dec << timestamp << ": [sim] run()" << std::endl; + #endif + + // start execution + running_ = true; + device_->reset = 0; + + /* + device_->mem_req_valid[1] = 0; + device_->mem_req_ready[1] = 0; + device_->mem_rsp_valid[1] = 0; + device_->mem_rsp_ready[1] = 0; + */ + + // wait on device to go busy + while (!device_->busy) { + this->tick(); + } + + // wait on device to go idle + while (device_->busy) { + this->tick(); + } + + // reset device + this->reset(); + + this->cout_flush(); + } + + void dcr_write(uint32_t addr, uint32_t value) { + device_->dcr_wr_valid = 1; + device_->dcr_wr_addr = addr; + device_->dcr_wr_data = value; + while (device_->dcr_wr_valid) { + this->tick(); + } + } + +private: + + void reset() { + running_ = false; + + print_bufs_.clear(); + + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + + pending_mem_reqs_.at(i).clear(); + + { + std::queue empty; + std::swap(dram_queue_.at(i), empty); + } + + mem_rd_rsp_active_.at(i) = false; + mem_wr_rsp_active_.at(i) = false; + } + + this->mem_bus_reset(); + + this->dcr_bus_reset(); + + device_->reset = 1; + + for (int i = 0; i < RESET_DELAY; ++i) { + device_->clk = 0; + this->eval(); + device_->clk = 1; + this->eval(); + } + } + + void tick() { + + device_->clk = 0; + this->eval(); + + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + this->mem_bus_eval(0, i); + } + this->dcr_bus_eval(0); + + device_->clk = 1; + this->eval(); + + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + this->mem_bus_eval(1, i); + } + this->dcr_bus_eval(1); + + dram_sim_.tick(); + + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + if (!dram_queue_.at(i).empty()) { + auto mem_req = dram_queue_.at(i).front(); + if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) { + auto orig_req = reinterpret_cast(arg); + if (orig_req->ready) { + delete orig_req; + } else { + orig_req->ready = true; + } + }, mem_req)) { + dram_queue_.at(i).pop(); + } + } + } + + #ifndef NDEBUG + fflush(stdout); + #endif + } + + void eval() { + device_->eval(); + #ifdef VCD_OUTPUT + if (sim_trace_enabled()) { + tfp_->dump(timestamp); + } else { + exit(-1); + } + #endif + ++timestamp; + } + +#ifdef AXI_BUS + + void mem_bus_reset() { + device_->m_axi_wready[0] = 0; + device_->m_axi_awready[0] = 0; + device_->m_axi_arready[0] = 0; + device_->m_axi_rvalid[0] = 0; + device_->m_axi_bvalid[0] = 0; + } + + void mem_bus_eval(bool clk) { + if (!clk) { + mem_rd_rsp_ready_ = device_->m_axi_rready[0]; + mem_wr_rsp_ready_ = device_->m_axi_bready[0]; + return; + } + + if (ram_ == nullptr) { + device_->m_axi_wready[0] = 0; + device_->m_axi_awready[0] = 0; + device_->m_axi_arready[0] = 0; + return; + } + + // process memory read responses + if (mem_rd_rsp_active_ + && device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) { + mem_rd_rsp_active_ = false; + } + if (!mem_rd_rsp_active_) { + if (!pending_mem_reqs_.empty() + && (*pending_mem_reqs_.begin())->ready + && !(*pending_mem_reqs_.begin())->write) { + auto mem_rsp_it = pending_mem_reqs_.begin(); + auto mem_rsp = *mem_rsp_it; + /* + printf("%0ld: [sim] MEM Rd Rsp: addr=0x%0lx, data=0x", timestamp, mem_rsp->addr); + for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { + printf("%02x", mem_rsp->block[i]); + } + printf("\n"); + */ + device_->m_axi_rvalid[0] = 1; + device_->m_axi_rid[0] = mem_rsp->tag; + device_->m_axi_rresp[0] = 0; + device_->m_axi_rlast[0] = 1; + memcpy(device_->m_axi_rdata[0].data(), mem_rsp->block.data(), MEM_BLOCK_SIZE); + pending_mem_reqs_.erase(mem_rsp_it); + mem_rd_rsp_active_ = true; + delete mem_rsp; + } else { + device_->m_axi_rvalid[0] = 0; + } + } + + // process memory write responses + if (mem_wr_rsp_active_ + && device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) { + mem_wr_rsp_active_ = false; + } + if (!mem_wr_rsp_active_) { + if (!pending_mem_reqs_.empty() + && (*pending_mem_reqs_.begin())->ready + && (*pending_mem_reqs_.begin())->write) { + auto mem_rsp_it = pending_mem_reqs_.begin(); + auto mem_rsp = *mem_rsp_it; + /* + printf("%0ld: [sim] MEM Wr Rsp: addr=0x%0lx\n", timestamp, mem_rsp->addr); + */ + device_->m_axi_bvalid[0] = 1; + device_->m_axi_bid[0] = mem_rsp->tag; + device_->m_axi_bresp[0] = 0; + pending_mem_reqs_.erase(mem_rsp_it); + mem_wr_rsp_active_ = true; + delete mem_rsp; + } else { + device_->m_axi_bvalid[0] = 0; + } + } + + // select the memory bank + uint32_t req_addr = device_->m_axi_wvalid[0] ? device_->m_axi_awaddr[0] : device_->m_axi_araddr[0]; + + // process memory requests + if ((device_->m_axi_wvalid[0] || device_->m_axi_arvalid[0]) && running_) { + if (device_->m_axi_wvalid[0]) { + auto byteen = device_->m_axi_wstrb[0]; + auto base_addr = device_->m_axi_awaddr[0]; + auto data = (uint8_t*)device_->m_axi_wdata[0].data(); + + if (base_addr >= uint64_t(IO_COUT_ADDR) + && base_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { + // process console output + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + auto& ss_buf = print_bufs_[i]; + char c = data[i]; + ss_buf << c; + if (c == '\n') { + std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; + ss_buf.str(""); + } + } + } + } else { + // process writes + /* + printf("%0ld: [sim] MEM Wr: addr=0x%0lx, byteen=0x", timestamp, base_addr); + for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { + printf("%x", (int)((byteen >> (4 * i)) & 0xf)); + } + printf(", data=0x"); + for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { + printf("%02x", data[i]); + } + printf("\n"); + */ + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + (*ram_)[base_addr + i] = data[i]; + } + } + + auto mem_req = new mem_req_t(); + mem_req->tag = device_->m_axi_awid[0]; + mem_req->addr = device_->m_axi_awaddr[0]; + mem_req->write = true; + mem_req->ready = false; + pending_mem_reqs_.emplace_back(mem_req); + + // send dram request + dram_queue_.push(mem_req); + } + } else { + // process reads + auto mem_req = new mem_req_t(); + mem_req->tag = device_->m_axi_arid[0]; + mem_req->addr = device_->m_axi_araddr[0]; + ram_->read(mem_req->block.data(), device_->m_axi_araddr[0], MEM_BLOCK_SIZE); + mem_req->write = false; + mem_req->ready = false; + pending_mem_reqs_.emplace_back(mem_req); + + // send dram request + dram_queue_.push(mem_req); + } + } + + device_->m_axi_wready[0] = running_; + device_->m_axi_awready[0] = running_; + device_->m_axi_arready[0] = running_; + } + +#else + + void mem_bus_reset() { + for (int i = 0; i < NUM_MEM_PORTS; ++i) { + device_->mem_req_ready[i] = 0; + device_->mem_rsp_valid[i] = 0; + } + } + + void mem_bus_eval(bool clk, int n) { + if (!clk) { + mem_rd_rsp_ready_.at(n) = device_->mem_rsp_ready[n]; + return; + } + + if (ram_ == nullptr) { + device_->mem_req_ready[n] = 0; + return; + } + + // process memory read responses + if (mem_rd_rsp_active_.at(n) + && device_->mem_rsp_valid[n] && mem_rd_rsp_ready_.at(n)) { + mem_rd_rsp_active_.at(n) = false; + } + if (!mem_rd_rsp_active_.at(n)) { + if (!pending_mem_reqs_.at(n).empty() + && (*pending_mem_reqs_.at(n).begin())->ready) { + device_->mem_rsp_valid[n] = 1; + auto mem_rsp_it = pending_mem_reqs_.at(n).begin(); + auto mem_rsp = *mem_rsp_it; + /* + printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); + for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { + printf("%02x", mem_rsp->block[i]); + } + printf("\n"); + */ + memcpy(VDataCast::get(device_->mem_rsp_data[n]), mem_rsp->block.data(), MEM_BLOCK_SIZE); + device_->mem_rsp_tag[n] = mem_rsp->tag; + pending_mem_reqs_.at(n).erase(mem_rsp_it); + mem_rd_rsp_active_.at(n) = true; + delete mem_rsp; + } else { + device_->mem_rsp_valid[n] = 0; + } + } + + // process memory requests + if (device_->mem_req_valid[n] && running_) { + uint64_t byte_addr = (device_->mem_req_addr[n] * MEM_BLOCK_SIZE); + if (device_->mem_req_rw[n]) { + auto byteen = device_->mem_req_byteen[n]; + auto data = VDataCast::get(device_->mem_req_data[n]); + + if (byte_addr >= uint64_t(IO_COUT_ADDR) + && byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { + // process console output + for (int i = 0; i < IO_COUT_SIZE; i++) { + if ((byteen >> i) & 0x1) { + auto& ss_buf = print_bufs_[i]; + char c = data[i]; + ss_buf << c; + if (c == '\n') { + std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; + ss_buf.str(""); + } + } + } + } else { + // process writes + /* + printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr); + for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { + printf("%x", (int)((byteen >> (4 * i)) & 0xf)); + } + printf(", data=0x"); + for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { + printf("%d=%02x,", i, data[i]); + } + printf("\n"); + */ + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + (*ram_)[byte_addr + i] = data[i]; + } + } + + auto mem_req = new mem_req_t(); + mem_req->tag = device_->mem_req_tag[n]; + mem_req->addr = byte_addr; + mem_req->write = true; + mem_req->ready = true; + + // send dram request + dram_queue_.at(n).push(mem_req); + } + } else { + // process reads + auto mem_req = new mem_req_t(); + mem_req->tag = device_->mem_req_tag[n]; + mem_req->addr = byte_addr; + mem_req->write = false; + mem_req->ready = false; + ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE); + pending_mem_reqs_.at(n).emplace_back(mem_req); + + //printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag); + + // send dram request + dram_queue_.at(n).push(mem_req); + } + } + + device_->mem_req_ready[n] = running_; + } + +#endif + + void dcr_bus_reset() { + device_->dcr_wr_valid = 0; + } + + void dcr_bus_eval(bool clk) { + if (!clk) { + return; + } + if (device_->dcr_wr_valid) { + device_->dcr_wr_valid = 0; + } + } + + void wait(uint32_t cycles) { + for (int i = 0; i < cycles; ++i) { + this->tick(); + } + } + +private: + + typedef struct { + Device* device; + std::array block; + uint64_t addr; + uint64_t tag; + bool write; + bool ready; + } mem_req_t; + + std::unordered_map print_bufs_; + + std::vector> pending_mem_reqs_; + + std::vector> dram_queue_; + + DramSim dram_sim_; + + Device* device_; + +#ifdef VCD_OUTPUT + VerilatedVcdC *tfp_; +#endif + + RAM* ram_; + + std::vector mem_rd_rsp_active_; + std::vector mem_rd_rsp_ready_; + + std::vector mem_wr_rsp_active_; + std::vector mem_wr_rsp_ready_; + + bool running_; +}; + +/////////////////////////////////////////////////////////////////////////////// + +Processor::Processor() + : impl_(new Impl()) +{} + +Processor::~Processor() { + delete impl_; +} + +void Processor::attach_ram(RAM* mem) { + impl_->attach_ram(mem); +} + +void Processor::run() { + impl_->run(); +} + +void Processor::dcr_write(uint32_t addr, uint32_t value) { + return impl_->dcr_write(addr, value); +} \ No newline at end of file From 8e3bd5696b389baf3962b65b60c0b2e210b6fd27 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 17 Sep 2024 19:52:51 -0700 Subject: [PATCH 272/488] xilinx synthesis debugging fixes --- hw/rtl/afu/xrt/VX_afu_wrap.sv | 67 ++++++++++++++-------------- hw/rtl/core/VX_fetch.sv | 10 +++++ hw/rtl/core/VX_issue_slice.sv | 11 +++++ hw/rtl/core/VX_lsu_slice.sv | 12 ++++- hw/syn/xilinx/README | 20 ++++++--- hw/syn/xilinx/xrt/Makefile | 18 ++++++-- hw/syn/xilinx/xrt/gen_xo.tcl | 2 +- hw/syn/xilinx/xrt/package_kernel.tcl | 23 +++++----- 8 files changed, 107 insertions(+), 56 deletions(-) diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 9872ae3c1..c2f865076 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -299,6 +299,39 @@ module VX_afu_wrap #( // SCOPE ////////////////////////////////////////////////////////////////////// +`ifdef DBG_SCOPE_AFU +`ifdef SCOPE + `define TRIGGERS { \ + reset, \ + ap_reset, \ + ap_start, \ + ap_done, \ + ap_idle, \ + interrupt, \ + vx_busy_wait, \ + vx_busy, \ + vx_reset \ + } + `define PROBES { \ + vx_pending_writes \ + } + VX_scope_tap #( + .SCOPE_ID (0), + .TRIGGERW ($bits(`TRIGGERS)), + .PROBEW ($bits(`PROBES)) + ) scope_tap ( + .clk (clk), + .reset (scope_reset_w[0]), + .start (1'b0), + .stop (1'b0), + .triggers (`TRIGGERS), + .probes (`PROBES), + .bus_in (scope_bus_in_w[0]), + .bus_out (scope_bus_out_w[0]) + ); +`else + `SCOPE_IO_UNUSED_W(0) +`endif `ifdef CHIPSCOPE ila_afu ila_afu_inst ( .clk (clk), @@ -320,40 +353,6 @@ module VX_afu_wrap #( }) ); `endif - -`ifdef DBG_SCOPE_AFU - `define TRIGGERS { \ - reset, \ - ap_reset, \ - ap_start, \ - ap_done, \ - ap_idle, \ - interrupt, \ - vx_busy_wait, \ - vx_busy, \ - vx_reset \ - } - - `define PROBES { \ - vx_pending_writes \ - } - - VX_scope_tap #( - .SCOPE_ID (0), - .TRIGGERW ($bits(`TRIGGERS)), - .PROBEW ($bits(`PROBES)) - ) scope_tap ( - .clk (clk), - .reset (scope_reset_w[0]), - .start (1'b0), - .stop (1'b0), - .triggers (`TRIGGERS), - .probes (`PROBES), - .bus_in (scope_bus_in_w[0]), - .bus_out (scope_bus_out_w[0]) - ); -`else - `SCOPE_IO_UNUSED_W(0) `endif `ifdef SIMULATION diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index a2a80ed94..044cd0aba 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -132,6 +132,7 @@ module VX_fetch import VX_gpu_pkg::*; #( assign icache_bus_if.rsp_ready = fetch_if.ready; `ifdef DBG_SCOPE_FETCH +`ifdef SCOPE wire schedule_fire = schedule_if.valid && schedule_if.ready; wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready; VX_scope_tap #( @@ -162,6 +163,15 @@ module VX_fetch import VX_gpu_pkg::*; #( `else `SCOPE_IO_UNUSED() `endif +`ifdef CHIPSCOPE + ila_fetch ila_fetch_inst ( + .clk (clk), + .probe0 ({schedule_if.valid, schedule_if.data, schedule_if.ready}), + .probe1 ({icache_bus_if.req_valid, icache_bus_if.req_data, icache_bus_if.req_ready}), + .probe2 ({icache_bus_if.rsp_valid, icache_bus_if.rsp_data, icache_bus_if.rsp_ready}) + ); +`endif +`endif `ifdef DBG_TRACE_MEM always @(posedge clk) begin diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 63d811328..34b60676f 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -89,6 +89,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( ); `ifdef DBG_SCOPE_ISSUE +`ifdef SCOPE wire operands_if_fire = operands_if.valid && operands_if.ready; wire operands_if_not_ready = ~operands_if.ready; wire writeback_if_valid = writeback_if.valid; @@ -131,6 +132,16 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `else `SCOPE_IO_UNUSED() `endif +`ifdef CHIPSCOPE + ila_issue ila_issue_inst ( + .clk (clk), + .probe0 ({decode_if.valid, decode_if.data, decode_if.ready}), + .probe1 ({scoreboard_if.valid, scoreboard_if.data, scoreboard_if.ready}), + .probe2 ({operands_if.valid, operands_if.data, operands_if.ready}), + .probe3 ({writeback_if.valid, writeback_if.data}) + ); +`endif +`endif `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 43f787ae9..2664202e7 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -341,7 +341,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( .core_req_tag (mem_req_tag), .core_req_ready (mem_req_ready), `UNUSED_PIN (core_req_empty), - `UNUSED_PIN (core_req_sent), + `UNUSED_PIN (core_write_notify), // Output response .core_rsp_valid (mem_rsp_valid), @@ -535,6 +535,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `endif `ifdef DBG_SCOPE_LSU +`ifdef SCOPE VX_scope_tap #( .SCOPE_ID (3), .TRIGGERW (3), @@ -552,5 +553,14 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `else `SCOPE_IO_UNUSED() `endif +`ifdef CHIPSCOPE + ila_lsu ila_lsu_inst ( + .clk (clk), + .probe0 ({execute_if.valid, execute_if.data, execute_if.ready}), + .probe1 ({lsu_mem_if.req_valid, lsu_mem_if.req_data, lsu_mem_if.req_ready}), + .probe2 ({lsu_mem_if.rsp_valid, lsu_mem_if.rsp_data, lsu_mem_if.rsp_ready}) + ); +`endif +`endif endmodule diff --git a/hw/syn/xilinx/README b/hw/syn/xilinx/README index 17d398dfa..0fb83e71b 100644 --- a/hw/syn/xilinx/README +++ b/hw/syn/xilinx/README @@ -8,6 +8,9 @@ xbutil validate --device 0000:09:00.1 --verbose vivado -mode batch -source xilinx_ip_gen.tcl -tclargs ip/xilinx_u50_gen3x16_xdma_5_202210_1 # build FPGA +PREFIX=build_base_1c NUM_CORES=1 TARGET=hw_emu PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 make > build_u55c_hw_emu_base_1c.log 2>&1 & +PREFIX=build_base_1c NUM_CORES=1 TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 make > build_u55c_hw_base_1c.log 2>&1 & + PREFIX=build_base_1c NUM_CORES=1 TARGET=hw_emu PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make > build_u50_hw_emu_base_1c.log 2>&1 & PREFIX=build_base_1c NUM_CORES=1 TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make > build_u50_hw_base_1c.log 2>&1 & @@ -25,14 +28,21 @@ PREFIX=build TARGET=hw_emu PLATFORM=xilinx_vck5000_gen3x16_xdma_1_202120_1 make # debug hw_emu using xsim xsim --gui xilinx_u50_gen3x16_xdma_5_202210_1-0-vortex_afu.wdb & -# debug hw using ILA +# h/w debugging using ILA +## (1) check for ILA support platforminfo --json="hardwarePlatform.extensions.chipscope_debug" xilinx_u50_gen3x16_xdma_5_202210_1 +## (2) chedk for XVC full path to get device id ls /dev/xfpga/xvc_pub* -ls /dev/xvc_pub* -debug_hw --xvc_pcie /dev/xfpga/xvc_pub.u2305.0 --hw_server -debug_hw --xvc_pcie /dev/xvc_pub.u0 --hw_server +## (3) start h/w server +debug_hw --xvc_pcie /dev/xfpga/xvc_pub. --hw_server +## (4) start application and pause +## (5) start vivado to connect to h/w server and select ILA probes debug_hw --vivado --host localhost --ltx_file ./build_xilinx_u50_gen3x16_xdma_5_202210_1_hw/_x/link/vivado/vpl/prj/prj.runs/impl_1/debug_nets.ltx & -make chipscope TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 +## (6) resume application + +# supported ILA Makefie targets +TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make hw_server +TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make chipscope # analyze build report vitis_analyzer build_xilinx_u50_gen3x16_xdma_5_202210_1_hw_4c/bin/vortex_afu.xclbin.link_summary diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 44b04c1a2..0e2aea5a9 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -53,6 +53,9 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE DBG_TRACE_FLAGS += -DDBG_TRACE_MEM DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE DBG_TRACE_FLAGS += -DDBG_TRACE_AFU +DBG_TRACE_FLAGS += -DDBG_TRACE_TEX +DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER +DBG_TRACE_FLAGS += -DDBG_TRACE_OM DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR # Control logic analyzer monitors @@ -60,6 +63,9 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU +DBG_SCOPE_FLAGS += -DDBG_SCOPE_TEX +DBG_SCOPE_FLAGS += -DDBG_SCOPE_OM +DBG_SCOPE_FLAGS += -DDBG_SCOPE_RASTER DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED # cluster configuration @@ -77,8 +83,11 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif +TEX_INCLUDE = -I$(RTL_DIR)/tex +RASTER_INCLUDE = -I$(RTL_DIR)/raster +OM_INCLUDE = -I$(RTL_DIR)/om RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -RTL_INCLUDE += $(FPU_INCLUDE) +RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(OM_INCLUDE) # Kernel compiler global settings VPP_FLAGS += --link --target $(TARGET) --platform $(PLATFORM) --save-temps --no_ip_cache @@ -179,17 +188,18 @@ $(BIN_DIR)/emconfig.json: mkdir -p $(BIN_DIR); cd $(BUILD_DIR); emconfigutil --platform $(PLATFORM) --od ../$(BIN_DIR) report: $(XCLBIN_CONTAINER) -ifeq ($(TARGET),$(findstring $(TARGET), hw)) - cp $(BUILD_DIR)/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log $(BUILD_DIR)/bin/runme.log +ifeq ($(TARGET), hw) + cp $(BUILD_DIR)/_x/logs/link/vivado.log $(BUILD_DIR)/bin/vivado.log cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_full_util_routed.rpt $(BUILD_DIR)/bin/synthesis.log cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin/timing.log + [ -f "$(BUILD_DIR)/_x/link/vivado/vpl/prj/prj.runs/impl_1/debug_nets.ltx" ] && cp $(BUILD_DIR)/_x/link/vivado/vpl/prj/prj.runs/impl_1/debug_nets.ltx $(BUILD_DIR)/bin/debug_nets.ltx endif hwserver: debug_hw --xvc_pcie /dev/xfpga/xvc_pub.u2305.0 --hw_server & chipscope: - debug_hw --vivado --host localhost --ltx_file $(BUILD_DIR)/_x/link/vivado/vpl/prj/prj.runs/impl_1/debug_nets.ltx & + debug_hw --vivado --host localhost --ltx_file $(BUILD_DIR)/bin/debug_nets.ltx & clean: $(RMDIR) $(BUILD_DIR) diff --git a/hw/syn/xilinx/xrt/gen_xo.tcl b/hw/syn/xilinx/xrt/gen_xo.tcl index bad41f5cd..c36c98e36 100644 --- a/hw/syn/xilinx/xrt/gen_xo.tcl +++ b/hw/syn/xilinx/xrt/gen_xo.tcl @@ -31,7 +31,7 @@ if {[file exists "${xoname}"]} { set argv [list ${build_dir}/ip] set argc 1 -source ${script_path}/xilinx_ip_gen.tcl +source ${tool_dir}/xilinx_ip_gen.tcl set argv [list ${krnl_name} ${vcs_file} ${tool_dir} ${build_dir}] set argc 4 diff --git a/hw/syn/xilinx/xrt/package_kernel.tcl b/hw/syn/xilinx/xrt/package_kernel.tcl index 2c314754d..aa7e96f3f 100644 --- a/hw/syn/xilinx/xrt/package_kernel.tcl +++ b/hw/syn/xilinx/xrt/package_kernel.tcl @@ -89,9 +89,9 @@ if { $chipscope == 1 } { CONFIG.C_EN_STRG_QUAL {1} \ CONFIG.C_DATA_DEPTH {8192} \ CONFIG.C_NUM_OF_PROBES {3} \ - CONFIG.C_PROBE0_WIDTH {128} \ - CONFIG.C_PROBE1_WIDTH {128} \ - CONFIG.C_PROBE2_WIDTH {128} \ + CONFIG.C_PROBE0_WIDTH {40} \ + CONFIG.C_PROBE1_WIDTH {80} \ + CONFIG.C_PROBE2_WIDTH {40} \ CONFIG.ALL_PROBE_SAME_MU {false} \ CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_fetch] @@ -102,9 +102,11 @@ if { $chipscope == 1 } { set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ CONFIG.C_EN_STRG_QUAL {1} \ CONFIG.C_DATA_DEPTH {8192} \ - CONFIG.C_NUM_OF_PROBES {2} \ - CONFIG.C_PROBE0_WIDTH {256} \ - CONFIG.C_PROBE1_WIDTH {128} \ + CONFIG.C_NUM_OF_PROBES {4} \ + CONFIG.C_PROBE0_WIDTH {112} \ + CONFIG.C_PROBE1_WIDTH {112} \ + CONFIG.C_PROBE2_WIDTH {280} \ + CONFIG.C_PROBE3_WIDTH {112} \ CONFIG.ALL_PROBE_SAME_MU {false} \ CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_issue] @@ -115,11 +117,10 @@ if { $chipscope == 1 } { set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ CONFIG.C_EN_STRG_QUAL {1} \ CONFIG.C_DATA_DEPTH {8192} \ - CONFIG.C_NUM_OF_PROBES {4} \ - CONFIG.C_PROBE0_WIDTH {256} \ - CONFIG.C_PROBE1_WIDTH {128} \ - CONFIG.C_PROBE2_WIDTH {288} \ - CONFIG.C_PROBE3_WIDTH {256} \ + CONFIG.C_NUM_OF_PROBES {3} \ + CONFIG.C_PROBE0_WIDTH {288} \ + CONFIG.C_PROBE1_WIDTH {152} \ + CONFIG.C_PROBE2_WIDTH {72} \ CONFIG.ALL_PROBE_SAME_MU {false} \ CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_lsu] From f0bff2a4a23f21d9de2006add2862662bcfbc539 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 17 Sep 2024 20:31:12 -0700 Subject: [PATCH 273/488] minor update --- hw/rtl/core/VX_lsu_slice.sv | 2 +- hw/rtl/libs/VX_mem_scheduler.sv | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 2664202e7..4a8e79953 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -341,7 +341,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( .core_req_tag (mem_req_tag), .core_req_ready (mem_req_ready), `UNUSED_PIN (core_req_empty), - `UNUSED_PIN (core_write_notify), + `UNUSED_PIN (core_req_wr_notify), // Output response .core_rsp_valid (mem_rsp_valid), diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 9dada16bc..229ff6cf2 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -55,7 +55,7 @@ module VX_mem_scheduler #( input wire [TAG_WIDTH-1:0] core_req_tag, output wire core_req_ready, output wire core_req_empty, - output wire core_req_sent, + output wire core_req_wr_notify, // Core response output wire core_rsp_valid, @@ -187,8 +187,8 @@ module VX_mem_scheduler #( // no pending requests assign core_req_empty = !reqq_valid && ibuf_empty; - // notify request submisison - assign core_req_sent = reqq_valid && reqq_ready; + // notify write request submisison + assign core_req_wr_notify = reqq_valid && reqq_ready && reqq_rw; // Index buffer /////////////////////////////////////////////////////////// From 48f86a48f60ba699430dad41132ec3ada0d95e8c Mon Sep 17 00:00:00 2001 From: sij814 Date: Wed, 18 Sep 2024 22:05:40 -0700 Subject: [PATCH 274/488] changed mem_req_arb in VX_cache_l3.sv to accept data_out --- hw/rtl/cache/VX_cache_l3.sv | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/hw/rtl/cache/VX_cache_l3.sv b/hw/rtl/cache/VX_cache_l3.sv index 326a4fc65..7eb7556de 100644 --- a/hw/rtl/cache/VX_cache_l3.sv +++ b/hw/rtl/cache/VX_cache_l3.sv @@ -495,6 +495,7 @@ module VX_cache_l3 import VX_gpu_pkg::*; #( // Memory request arbitration wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in; + wire [NUM_MEM_PORTS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_out; for (genvar i = 0; i < NUM_BANKS; ++i) begin assign data_in[i] = { @@ -518,12 +519,23 @@ module VX_cache_l3 import VX_gpu_pkg::*; #( .valid_in (per_bank_mem_req_valid), .ready_in (per_bank_mem_req_ready), .data_in (data_in), - .data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p, mem_req_flush_p}), + .data_out (data_out), .valid_out (mem_req_valid_p), .ready_out (mem_req_ready_p), `UNUSED_PIN (sel_out) ); + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + assign { + mem_req_addr_p[i], + mem_req_rw_p[i], + mem_req_byteen_p[i], + mem_req_data_p[i], + mem_req_id_p[i], + mem_req_flush_p[i] + } = data_out[i]; + end + if (NUM_BANKS > 1) begin for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p[i]); From a37309c6b001cb25c6d760b606e853a997789652 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 19 Sep 2024 04:24:20 -0700 Subject: [PATCH 275/488] xrtsim implementation --- ci/regression.sh.in | 18 +- hw/rtl/afu/xrt/vortex_afu.v | 4 +- hw/rtl/afu/xrt/vortex_afu.vh | 14 +- hw/rtl/cache/VX_cache_bank.sv | 6 +- hw/rtl/core/VX_operands.sv | 45 +- hw/rtl/libs/VX_axi_adapter.sv | 54 +-- runtime/common/common.h | 3 +- runtime/opae/Makefile | 2 +- runtime/xrt/vortex.cpp | 12 +- .../common/malloc.h => sim/common/mem_alloc.h | 0 sim/common/mp_macros.h | 327 +++++++++++++ sim/opaesim/fpga.cpp | 2 + sim/opaesim/opae_sim.cpp | 37 +- sim/opaesim/opae_sim.h | 2 + sim/rtlsim/Makefile | 9 +- sim/rtlsim/processor.cpp | 268 ++--------- sim/xrtsim/Makefile | 13 +- sim/xrtsim/{fpga.cpp => xrt.cpp} | 57 ++- sim/xrtsim/{fpga.h => xrt.h} | 6 +- sim/xrtsim/xrt_sim.cpp | 453 +++++++++++++++--- sim/xrtsim/xrt_sim.h | 14 + 21 files changed, 940 insertions(+), 406 deletions(-) rename runtime/common/malloc.h => sim/common/mem_alloc.h (100%) create mode 100644 sim/common/mp_macros.h rename sim/xrtsim/{fpga.cpp => xrt.cpp} (62%) rename sim/xrtsim/{fpga.h => xrt.h} (98%) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 32e479c1e..fb25ef480 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -92,10 +92,12 @@ regression() # test global barrier CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2 CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tgbar" --cores=2 + CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tgbar" --cores=2 # test local barrier ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tbar" + ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tbar" # test temp driver mode for ./ci/blackbox.sh --driver=simx --app=vecadd --rebuild=3 @@ -230,15 +232,18 @@ config2() # test opaesim ./ci/blackbox.sh --driver=opae --app=printf ./ci/blackbox.sh --driver=opae --app=diverge + ./ci/blackbox.sh --driver=xrt --app=diverge # disable DPI if [ "$XLEN" == "64" ]; then # need to disable trig on 64-bit due to a bug inside fpnew's sqrt core. CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-xtrig -xbar -xgbar" CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-xtrig -xbar -xgbar" + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-xtrig -xbar -xgbar" else CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood fi # custom program startup address @@ -255,11 +260,9 @@ config2() # disabling ZICOND extension CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo - # test AXI bus - AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=mstress - # test 128-bit MEM block CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=xrt --app=mstress # test XLEN-bit MEM block CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress @@ -299,10 +302,11 @@ debug() test_csv_trace - CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" + CONFIGS="-O0 -DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" + CONFIGS="-O0 -DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=xrt --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" - ./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1" - + ./ci/blackbox.sh --driver=opae --scope --app=demo --args="-n1" + echo "debugging tests done!" } @@ -312,7 +316,7 @@ stress() # test verilator reset values CONFIGS="-DVERILATOR_RESET_VALUE=1 -DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood - CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --args="-n128" --l2cache + CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=xrt --app=sgemmx --args="-n128" --l2cache echo "stress tests done!" } diff --git a/hw/rtl/afu/xrt/vortex_afu.v b/hw/rtl/afu/xrt/vortex_afu.v index 1973ec0aa..0e042c32b 100644 --- a/hw/rtl/afu/xrt/vortex_afu.v +++ b/hw/rtl/afu/xrt/vortex_afu.v @@ -17,8 +17,8 @@ module vortex_afu #( parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH, - parameter C_M_AXI_MEM_ADDR_WIDTH = 64, - parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH + parameter C_M_AXI_MEM_ADDR_WIDTH = `M_AXI_MEM_ADDR_WIDTH, + parameter C_M_AXI_MEM_DATA_WIDTH = `M_AXI_MEM_DATA_WIDTH ) ( // System signals input wire ap_clk, diff --git a/hw/rtl/afu/xrt/vortex_afu.vh b/hw/rtl/afu/xrt/vortex_afu.vh index 3616b0794..1a14e1316 100644 --- a/hw/rtl/afu/xrt/vortex_afu.vh +++ b/hw/rtl/afu/xrt/vortex_afu.vh @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,7 +15,15 @@ `define VORTEX_AFU_VH `ifndef M_AXI_MEM_NUM_BANKS -`define M_AXI_MEM_NUM_BANKS 1 +`define M_AXI_MEM_NUM_BANKS 4 +`endif + +`ifndef M_AXI_MEM_ADDR_WIDTH +`define M_AXI_MEM_ADDR_WIDTH 30 +`endif + +`ifndef M_AXI_MEM_DATA_WIDTH +`define M_AXI_MEM_DATA_WIDTH 512 `endif `ifndef M_AXI_MEM_ID_WIDTH diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 5054fa333..59b4be871 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -273,15 +273,15 @@ module VX_cache_bank #( assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) : (replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr)); - if (WRITE_ENABLE) begin : g_data_sel + if (WRITE_ENABLE) begin : g_data_sel_lo assign data_sel[`CS_WORD_WIDTH-1:0] = replay_valid ? replay_data : (mem_rsp_valid ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : core_req_data); - end else begin : g_data_sel_ro + end else begin : g_data_sel_lo_ro assign data_sel[`CS_WORD_WIDTH-1:0] = mem_rsp_data[`CS_WORD_WIDTH-1:0]; `UNUSED_VAR (core_req_data) `UNUSED_VAR (replay_data) end - for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin : g_data_sel + for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin : g_data_sel_hi assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel end diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 2ca847394..f30681263 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -69,11 +69,9 @@ module VX_operands import VX_gpu_pkg::*; #( wire pipe_valid_st2, pipe_ready_st2; wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2; - reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_m_st2; - wire [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2; + reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st2, src_data_m_st2; - reg [NUM_SRC_OPDS-1:0] data_fetched_n; - wire [NUM_SRC_OPDS-1:0] data_fetched_st1; + reg [NUM_SRC_OPDS-1:0] data_fetched_st1; reg has_collision_n; wire has_collision_st1; @@ -139,15 +137,6 @@ module VX_operands import VX_gpu_pkg::*; #( wire [NUM_SRC_OPDS-1:0] req_fire_in = req_valid_in & req_ready_in; - always @(*) begin - data_fetched_n = data_fetched_st1; - if (scoreboard_if.ready) begin - data_fetched_n = '0; - end else begin - data_fetched_n = data_fetched_st1 | req_fire_in; - end - end - assign pipe_data = { scoreboard_if.data.wis, scoreboard_if.data.tmask, @@ -166,33 +155,37 @@ module VX_operands import VX_gpu_pkg::*; #( wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2; VX_pipe_buffer #( - .DATAW (NUM_SRC_OPDS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)), - .RESETW (NUM_SRC_OPDS) + .DATAW (NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)) ) pipe_reg1 ( .clk (clk), .reset (reset), .valid_in (scoreboard_if.valid), .ready_in (pipe_ready_in), - .data_in ({data_fetched_n, gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}), - .data_out ({data_fetched_st1, gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1}), + .data_in ({gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}), + .data_out ({gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1}), .valid_out(pipe_valid_st1), .ready_out(pipe_ready_st1) ); - assign src_data_st1 = pipe_fire_st2 ? '0 : src_data_m_st2; + always @(posedge clk) begin + if (reset || scoreboard_if.ready) begin + data_fetched_st1 <= 0; + end else begin + data_fetched_st1 <= data_fetched_st1 | req_fire_in; + end + end wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1; VX_pipe_buffer #( - .DATAW (NUM_SRC_OPDS * REGS_DATAW + NUM_BANKS + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH), - .RESETW (NUM_SRC_OPDS * REGS_DATAW) + .DATAW (NUM_BANKS + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH) ) pipe_reg2 ( .clk (clk), .reset (reset), .valid_in (pipe_valid2_st1), .ready_in (pipe_ready_st1), - .data_in ({src_data_st1, gpr_rd_valid_st1, pipe_data_st1, gpr_rd_req_idx_st1}), - .data_out ({src_data_st2, gpr_rd_valid_st2, pipe_data_st2, gpr_rd_req_idx_st2}), + .data_in ({gpr_rd_valid_st1, pipe_data_st1, gpr_rd_req_idx_st1}), + .data_out ({gpr_rd_valid_st2, pipe_data_st2, gpr_rd_req_idx_st2}), .valid_out(pipe_valid_st2), .ready_out(pipe_ready_st2) ); @@ -206,6 +199,14 @@ module VX_operands import VX_gpu_pkg::*; #( end end + always @(posedge clk) begin + if (reset || pipe_fire_st2) begin + src_data_st2 <= 0; + end else begin + src_data_st2 <= src_data_m_st2; + end + end + VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index 4755764a4..6c231cb95 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -94,49 +94,36 @@ module VX_axi_adapter #( localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS); wire [BANK_ADDRW-1:0] req_bank_sel; - if (NUM_BANKS > 1) begin : g_req_bank_sel assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; end else begin : g_req_bank_sel_0 assign req_bank_sel = '0; end - wire mem_req_fire = mem_req_valid && mem_req_ready; - - reg [NUM_BANKS-1:0] m_axi_aw_ack; - reg [NUM_BANKS-1:0] m_axi_w_ack; - - for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_m_axi_w - wire m_axi_aw_fire = m_axi_awvalid[i] && m_axi_awready[i]; - wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i]; - always @(posedge clk) begin - if (reset) begin - m_axi_aw_ack[i] <= 0; - m_axi_w_ack[i] <= 0; - end else begin - if (mem_req_fire && (req_bank_sel == i)) begin - m_axi_aw_ack[i] <= 0; - m_axi_w_ack[i] <= 0; - end else begin - if (m_axi_aw_fire) - m_axi_aw_ack[i] <= 1; - if (m_axi_w_fire) - m_axi_w_ack[i] <= 1; - end - end - end - end - - wire axi_write_ready [NUM_BANKS]; - + wire [NUM_BANKS-1:0] axi_aw_ready, axi_write_ready; for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready - assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i]) - && (m_axi_wready[i] || m_axi_w_ack[i]); + assign axi_aw_ready[i] = m_axi_awready[i] || m_axi_aw_ack[i]; + assign axi_write_ready[i] = m_axi_wready[i] && axi_aw_ready[i]; end // request ack assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]; + reg [NUM_BANKS-1:0] m_axi_aw_ack; + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_m_axi_w + always @(posedge clk) begin + if (reset) begin + m_axi_aw_ack[i] <= 0; + end else begin + if (m_axi_wvalid[i] && m_axi_wready[i]) begin + m_axi_aw_ack[i] <= 0; + end else if (m_axi_awvalid[i] && m_axi_awready[i]) begin + m_axi_aw_ack[i] <= 1; + end + end + end + end + // AXI write request address channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i]; @@ -154,7 +141,7 @@ module VX_axi_adapter #( // AXI write request data channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_data - assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_w_ack[i]; + assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && axi_aw_ready[i]; assign m_axi_wdata[i] = mem_req_data; assign m_axi_wstrb[i] = mem_req_byteen; assign m_axi_wlast[i] = 1'b1; @@ -190,14 +177,13 @@ module VX_axi_adapter #( wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH-1:0] rsp_arb_data_in; wire [NUM_BANKS-1:0] rsp_arb_ready_in; - `UNUSED_VAR (m_axi_rlast) - for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_rsp assign rsp_arb_valid_in[i] = m_axi_rvalid[i]; assign rsp_arb_data_in[i] = {m_axi_rdata[i], m_axi_rid[i]}; assign m_axi_rready[i] = rsp_arb_ready_in[i]; `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time)) `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time)) + `UNUSED_VAR (m_axi_rlast[i]) end VX_stream_arb #( diff --git a/runtime/common/common.h b/runtime/common/common.h index 1f718f938..27335455b 100644 --- a/runtime/common/common.h +++ b/runtime/common/common.h @@ -13,11 +13,12 @@ #pragma once +#include #include #include #include #include -#include +#include #include #include diff --git a/runtime/opae/Makefile b/runtime/opae/Makefile index 56355890d..b002375d9 100644 --- a/runtime/opae/Makefile +++ b/runtime/opae/Makefile @@ -10,7 +10,7 @@ SYN_DIR := $(HW_DIR)/syn/altera/opae SRC_DIR := $(VORTEX_HOME)/runtime/opae CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors -CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(DESTDIR) +CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(DESTDIR) -I$(SIM_DIR)/common CXXFLAGS += -DXLEN_$(XLEN) # Position independent code diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index a02a84990..de65c1e85 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -18,15 +18,15 @@ #endif // XRT includes -#ifndef XRTSIM +#ifdef XRTSIM +#include +#else #include "experimental/xrt_bo.h" #include "experimental/xrt_device.h" #include "experimental/xrt_error.h" #include "experimental/xrt_ip.h" #include "experimental/xrt_kernel.h" #include "experimental/xrt_xclbin.h" -#else -#include #endif #include @@ -66,7 +66,7 @@ struct platform_info_t { }; static const platform_info_t g_platforms[] = { - {"vortex_xrtsim", 4, 16, 0x0}, // 16 x 64 KB = 1 MB + {"vortex_xrtsim", 0, 32, 0x0}, // 16 x 256 MB = 4 GB {"xilinx_u200", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4 {"xilinx_u250", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4 {"xilinx_u50", 5, 28, 0x0}, // 32 x 256 MB = 8 GB HBM2 @@ -258,7 +258,7 @@ public: return -1; }); #else - xrtKernelHandle xrtKernel = nullptr; + xrtKernelHandle xrtKernel = xrtDevice; #endif // get device name @@ -538,7 +538,6 @@ public: return err; }); #endif - DBGPRINT("*** write_register: addr=0x%x, value=0x%x\n", addr, value); return 0; } @@ -551,7 +550,6 @@ public: return err; }); #endif - DBGPRINT("*** read_register: addr=0x%x, value=0x%x\n", addr, *value); return 0; } diff --git a/runtime/common/malloc.h b/sim/common/mem_alloc.h similarity index 100% rename from runtime/common/malloc.h rename to sim/common/mem_alloc.h diff --git a/sim/common/mp_macros.h b/sim/common/mp_macros.h new file mode 100644 index 000000000..fde5ac79e --- /dev/null +++ b/sim/common/mp_macros.h @@ -0,0 +1,327 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +// macro primitives + +#define MP_COMMA , +#define MP_REM(...) __VA_ARGS__ +#define MP_EAT(...) + +#define MP_STRINGIZE_(x) #x +#define MP_STRINGIZE(x) MP_STRINGIZE_(x) + +#define MP_CONCAT_(x, ...) x ## __VA_ARGS__ +#define MP_CONCAT(x, ...) MP_CONCAT_(x, __VA_ARGS__) + +#define MP_COUNTOF(arr) (sizeof(arr) / sizeof(arr[0])) + +// conditional macro + +#define MP_IIF_0(x, y) y +#define MP_IIF_1(x, y) x +#define MP_IIF(c) MP_CONCAT(MP_IIF_, c) + +#define MP_PAIR_FIRST(a, b) a +#define MP_PAIR_SECOND(a, b) b + +// pair macros + +#define MP_PAIR(x) MP_REM x +#define MP_PAIR_HEAD_(x, ...) MP_PAIR(x) +#define MP_PAIR_PROBE_(...) (__VA_ARGS__), +#define MP_PAIR_L_(...) MP_PAIR_HEAD_(__VA_ARGS__) +#define MP_PAIR_L(x) MP_PAIR_L_(MP_PAIR_PROBE_ x,) +#define MP_PAIR_R(x) MP_EAT x + +// separator macros + +#define MP_SEP_COMMA() , +#define MP_SEP_SEMICOLON() ; +#define MP_SEP_PLUS() + +#define MP_SEP_AND() & +#define MP_SEP_OR() | +#define MP_SEP_COLON() : +#define MP_SEP_SPACE() /**/ +#define MP_SEP_LESS() < +#define MP_SEP_GREATER() > +#define MP_SEP_ANDL() && +#define MP_SEP_ORL() || + +// MAKE_UNIQUE macro + +#define MP_MAKE_UNIQUE(x) MP_CONCAT(x, __COUNTER__) + +// increment macro + +#define MP_INC(x) MP_INC_ ## x +#define MP_INC_0 1 +#define MP_INC_1 2 +#define MP_INC_2 3 +#define MP_INC_3 4 +#define MP_INC_4 5 +#define MP_INC_5 6 +#define MP_INC_6 7 +#define MP_INC_7 8 +#define MP_INC_8 9 +#define MP_INC_9 10 +#define MP_INC_10 11 +#define MP_INC_11 12 +#define MP_INC_12 13 +#define MP_INC_13 14 +#define MP_INC_14 15 +#define MP_INC_15 16 +#define MP_INC_16 17 +#define MP_INC_17 18 +#define MP_INC_18 19 +#define MP_INC_19 20 +#define MP_INC_20 21 +#define MP_INC_21 22 +#define MP_INC_22 23 +#define MP_INC_23 24 +#define MP_INC_24 25 +#define MP_INC_25 26 +#define MP_INC_26 27 +#define MP_INC_27 28 +#define MP_INC_28 29 +#define MP_INC_29 30 +#define MP_INC_30 31 +#define MP_INC_31 32 +#define MP_INC_32 33 +#define MP_INC_33 34 +#define MP_INC_34 35 +#define MP_INC_35 36 +#define MP_INC_36 37 +#define MP_INC_37 38 +#define MP_INC_38 39 +#define MP_INC_39 40 +#define MP_INC_40 41 +#define MP_INC_41 42 +#define MP_INC_42 43 +#define MP_INC_43 44 +#define MP_INC_44 45 +#define MP_INC_45 46 +#define MP_INC_46 47 +#define MP_INC_47 48 +#define MP_INC_48 49 +#define MP_INC_49 50 +#define MP_INC_50 51 +#define MP_INC_51 52 +#define MP_INC_52 53 +#define MP_INC_53 54 +#define MP_INC_54 55 +#define MP_INC_55 56 +#define MP_INC_56 57 +#define MP_INC_57 58 +#define MP_INC_58 59 +#define MP_INC_59 60 +#define MP_INC_60 61 +#define MP_INC_61 62 +#define MP_INC_62 63 +#define MP_INC_63 64 + +// NARG macro + +#define MP_NARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10,_11,_12,_13,_14,_15,_16, \ + _17,_18,_19,_20,_21,_22,_23,_24,_25,_26,_27,_28,_29,_30,_31,_32, \ + _33,_34,_35,_36,_37,_38,_39,_40,_41,_42,_43,_44,_45,_46,_47,_48, \ + _49,_50,_51,_52,_53,_54,_55,_56,_57,_58,_59,_60,_61,_62,_63, N, ...) N + +#define MP_NARG_R() 63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48, \ + 47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32, \ + 31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16, \ + 15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +#define MP_NARG_(...) MP_NARG_N(__VA_ARGS__) +#define MP_NARG(...) MP_NARG_(__VA_ARGS__, MP_NARG_R()) + +// FOR_EACH macro + +#define MP_FOR_EACH_1(idx, func, arg, sep, ...) func(arg, idx, __VA_ARGS__) +#define MP_FOR_EACH_2(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_1(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_3(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_2(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_4(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_3(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_5(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_4(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_6(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_5(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_7(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_6(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_8(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_7(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_9(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_8(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_10(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_9(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_11(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_10(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_12(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_11(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_13(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_12(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_14(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_13(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_15(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_14(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_16(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_15(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_17(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_16(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_18(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_17(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_19(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_18(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_20(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_19(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_21(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_20(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_22(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_21(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_23(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_22(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_24(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_23(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_25(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_24(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_26(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_25(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_27(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_26(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_28(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_27(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_29(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_28(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_30(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_29(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_31(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_30(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_32(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_31(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_33(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_32(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_34(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_33(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_35(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_34(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_36(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_35(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_37(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_36(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_38(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_37(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_39(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_38(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_40(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_39(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_41(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_40(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_42(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_41(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_43(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_42(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_44(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_43(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_45(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_44(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_46(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_45(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_47(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_46(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_48(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_47(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_49(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_48(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_50(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_49(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_51(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_50(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_52(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_51(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_53(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_52(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_54(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_53(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_55(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_54(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_56(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_55(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_57(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_56(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_58(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_57(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_59(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_58(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_60(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_59(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_61(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_60(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_62(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_61(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_63(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_62(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_64(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_63(MP_INC(idx), func, arg, sep, __VA_ARGS__) + +#define MP_FOR_EACH_(N, func, arg, sep, ...) MP_CONCAT(MP_FOR_EACH_, N)(0, func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH(func, arg, sep, ...) MP_FOR_EACH_(MP_NARG(__VA_ARGS__), func, arg, sep, __VA_ARGS__) + +// REVERSE_FOR_EACH macro + +#define MP_REVERSE_FOR_EACH_1(func, arg, sep, ...) func(arg, 0, __VA_ARGS__) +#define MP_REVERSE_FOR_EACH_2(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_1(func, arg, sep, __VA_ARGS__) sep() func(arg, 1, x) +#define MP_REVERSE_FOR_EACH_3(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_2(func, arg, sep, __VA_ARGS__) sep() func(arg, 2, x) +#define MP_REVERSE_FOR_EACH_4(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_3(func, arg, sep, __VA_ARGS__) sep() func(arg, 3, x) +#define MP_REVERSE_FOR_EACH_5(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_4(func, arg, sep, __VA_ARGS__) sep() func(arg, 4, x) +#define MP_REVERSE_FOR_EACH_6(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_5(func, arg, sep, __VA_ARGS__) sep() func(arg, 5, x) +#define MP_REVERSE_FOR_EACH_7(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_6(func, arg, sep, __VA_ARGS__) sep() func(arg, 6, x) +#define MP_REVERSE_FOR_EACH_8(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_7(func, arg, sep, __VA_ARGS__) sep() func(arg, 7, x) +#define MP_REVERSE_FOR_EACH_9(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_8(func, arg, sep, __VA_ARGS__) sep() func(arg, 8, x) +#define MP_REVERSE_FOR_EACH_10(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_9(func, arg, sep, __VA_ARGS__) sep() func(arg, 9, x) +#define MP_REVERSE_FOR_EACH_11(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_10(func, arg, sep, __VA_ARGS__) sep() func(arg, 10, x) +#define MP_REVERSE_FOR_EACH_12(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_11(func, arg, sep, __VA_ARGS__) sep() func(arg, 11, x) +#define MP_REVERSE_FOR_EACH_13(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_12(func, arg, sep, __VA_ARGS__) sep() func(arg, 12, x) +#define MP_REVERSE_FOR_EACH_14(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_13(func, arg, sep, __VA_ARGS__) sep() func(arg, 13, x) +#define MP_REVERSE_FOR_EACH_15(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_14(func, arg, sep, __VA_ARGS__) sep() func(arg, 14, x) +#define MP_REVERSE_FOR_EACH_16(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_15(func, arg, sep, __VA_ARGS__) sep() func(arg, 15, x) +#define MP_REVERSE_FOR_EACH_17(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_16(func, arg, sep, __VA_ARGS__) sep() func(arg, 16, x) +#define MP_REVERSE_FOR_EACH_18(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_17(func, arg, sep, __VA_ARGS__) sep() func(arg, 17, x) +#define MP_REVERSE_FOR_EACH_19(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_18(func, arg, sep, __VA_ARGS__) sep() func(arg, 18, x) +#define MP_REVERSE_FOR_EACH_20(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_19(func, arg, sep, __VA_ARGS__) sep() func(arg, 19, x) +#define MP_REVERSE_FOR_EACH_21(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_20(func, arg, sep, __VA_ARGS__) sep() func(arg, 20, x) +#define MP_REVERSE_FOR_EACH_22(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_21(func, arg, sep, __VA_ARGS__) sep() func(arg, 21, x) +#define MP_REVERSE_FOR_EACH_23(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_22(func, arg, sep, __VA_ARGS__) sep() func(arg, 22, x) +#define MP_REVERSE_FOR_EACH_24(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_23(func, arg, sep, __VA_ARGS__) sep() func(arg, 23, x) +#define MP_REVERSE_FOR_EACH_25(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_24(func, arg, sep, __VA_ARGS__) sep() func(arg, 24, x) +#define MP_REVERSE_FOR_EACH_26(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_25(func, arg, sep, __VA_ARGS__) sep() func(arg, 25, x) +#define MP_REVERSE_FOR_EACH_27(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_26(func, arg, sep, __VA_ARGS__) sep() func(arg, 26, x) +#define MP_REVERSE_FOR_EACH_28(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_27(func, arg, sep, __VA_ARGS__) sep() func(arg, 27, x) +#define MP_REVERSE_FOR_EACH_29(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_28(func, arg, sep, __VA_ARGS__) sep() func(arg, 28, x) +#define MP_REVERSE_FOR_EACH_30(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_29(func, arg, sep, __VA_ARGS__) sep() func(arg, 29, x) +#define MP_REVERSE_FOR_EACH_31(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_30(func, arg, sep, __VA_ARGS__) sep() func(arg, 30, x) +#define MP_REVERSE_FOR_EACH_32(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_31(func, arg, sep, __VA_ARGS__) sep() func(arg, 31, x) +#define MP_REVERSE_FOR_EACH_33(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_32(func, arg, sep, __VA_ARGS__) sep() func(arg, 32, x) +#define MP_REVERSE_FOR_EACH_34(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_33(func, arg, sep, __VA_ARGS__) sep() func(arg, 33, x) +#define MP_REVERSE_FOR_EACH_35(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_34(func, arg, sep, __VA_ARGS__) sep() func(arg, 34, x) +#define MP_REVERSE_FOR_EACH_36(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_35(func, arg, sep, __VA_ARGS__) sep() func(arg, 35, x) +#define MP_REVERSE_FOR_EACH_37(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_36(func, arg, sep, __VA_ARGS__) sep() func(arg, 36, x) +#define MP_REVERSE_FOR_EACH_38(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_37(func, arg, sep, __VA_ARGS__) sep() func(arg, 37, x) +#define MP_REVERSE_FOR_EACH_39(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_38(func, arg, sep, __VA_ARGS__) sep() func(arg, 38, x) +#define MP_REVERSE_FOR_EACH_40(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_39(func, arg, sep, __VA_ARGS__) sep() func(arg, 39, x) +#define MP_REVERSE_FOR_EACH_41(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_40(func, arg, sep, __VA_ARGS__) sep() func(arg, 40, x) +#define MP_REVERSE_FOR_EACH_42(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_41(func, arg, sep, __VA_ARGS__) sep() func(arg, 41, x) +#define MP_REVERSE_FOR_EACH_43(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_42(func, arg, sep, __VA_ARGS__) sep() func(arg, 42, x) +#define MP_REVERSE_FOR_EACH_44(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_43(func, arg, sep, __VA_ARGS__) sep() func(arg, 43, x) +#define MP_REVERSE_FOR_EACH_45(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_44(func, arg, sep, __VA_ARGS__) sep() func(arg, 44, x) +#define MP_REVERSE_FOR_EACH_46(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_45(func, arg, sep, __VA_ARGS__) sep() func(arg, 45, x) +#define MP_REVERSE_FOR_EACH_47(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_46(func, arg, sep, __VA_ARGS__) sep() func(arg, 46, x) +#define MP_REVERSE_FOR_EACH_48(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_47(func, arg, sep, __VA_ARGS__) sep() func(arg, 47, x) +#define MP_REVERSE_FOR_EACH_49(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_48(func, arg, sep, __VA_ARGS__) sep() func(arg, 48, x) +#define MP_REVERSE_FOR_EACH_50(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_49(func, arg, sep, __VA_ARGS__) sep() func(arg, 49, x) +#define MP_REVERSE_FOR_EACH_51(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_50(func, arg, sep, __VA_ARGS__) sep() func(arg, 50, x) +#define MP_REVERSE_FOR_EACH_52(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_51(func, arg, sep, __VA_ARGS__) sep() func(arg, 51, x) +#define MP_REVERSE_FOR_EACH_53(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_52(func, arg, sep, __VA_ARGS__) sep() func(arg, 52, x) +#define MP_REVERSE_FOR_EACH_54(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_53(func, arg, sep, __VA_ARGS__) sep() func(arg, 53, x) +#define MP_REVERSE_FOR_EACH_55(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_54(func, arg, sep, __VA_ARGS__) sep() func(arg, 54, x) +#define MP_REVERSE_FOR_EACH_56(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_55(func, arg, sep, __VA_ARGS__) sep() func(arg, 55, x) +#define MP_REVERSE_FOR_EACH_57(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_56(func, arg, sep, __VA_ARGS__) sep() func(arg, 56, x) +#define MP_REVERSE_FOR_EACH_58(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_57(func, arg, sep, __VA_ARGS__) sep() func(arg, 57, x) +#define MP_REVERSE_FOR_EACH_59(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_58(func, arg, sep, __VA_ARGS__) sep() func(arg, 58, x) +#define MP_REVERSE_FOR_EACH_60(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_59(func, arg, sep, __VA_ARGS__) sep() func(arg, 59, x) +#define MP_REVERSE_FOR_EACH_61(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_60(func, arg, sep, __VA_ARGS__) sep() func(arg, 60, x) +#define MP_REVERSE_FOR_EACH_62(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_61(func, arg, sep, __VA_ARGS__) sep() func(arg, 61, x) +#define MP_REVERSE_FOR_EACH_63(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_62(func, arg, sep, __VA_ARGS__) sep() func(arg, 62, x) +#define MP_REVERSE_FOR_EACH_64(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_63(func, arg, sep, __VA_ARGS__) sep() func(arg, 63, x) + +#define MP_REVERSE_FOR_EACH_(N, func, arg, sep, ...) MP_CONCAT(MP_REVERSE_FOR_EACH_, N)(func, arg, sep, __VA_ARGS__) +#define MP_REVERSE_FOR_EACH(func, arg, sep, ...) MP_REVERSE_FOR_EACH_(MP_NARG(__VA_ARGS__), func, arg, sep, __VA_ARGS__) + +#define MP_FIRST_ARG_(N, ...) N +#define MP_FIRST_ARG(...) MP_FIRST_ARG_(__VA_ARGS__, ignore) + +// MP_REPEAT macro + +#define MP_REPEAT_0(func, sep) +#define MP_REPEAT_1(func, sep) func(0) +#define MP_REPEAT_2(func, sep) MP_REPEAT_1(func, sep) sep func(1) +#define MP_REPEAT_3(func, sep) MP_REPEAT_2(func, sep) sep func(2) +#define MP_REPEAT_4(func, sep) MP_REPEAT_3(func, sep) sep func(3) +#define MP_REPEAT_5(func, sep) MP_REPEAT_4(func, sep) sep func(4) +#define MP_REPEAT_6(func, sep) MP_REPEAT_5(func, sep) sep func(5) +#define MP_REPEAT_7(func, sep) MP_REPEAT_6(func, sep) sep func(6) +#define MP_REPEAT_8(func, sep) MP_REPEAT_7(func, sep) sep func(7) +#define MP_REPEAT_9(func, sep) MP_REPEAT_8(func, sep) sep func(8) +#define MP_REPEAT_10(func, sep) MP_REPEAT_9(func, sep) sep func(9) +#define MP_REPEAT_11(func, sep) MP_REPEAT_10(func, sep) sep func(10) +#define MP_REPEAT_12(func, sep) MP_REPEAT_11(func, sep) sep func(11) +#define MP_REPEAT_13(func, sep) MP_REPEAT_12(func, sep) sep func(12) +#define MP_REPEAT_14(func, sep) MP_REPEAT_13(func, sep) sep func(13) +#define MP_REPEAT_15(func, sep) MP_REPEAT_14(func, sep) sep func(14) +#define MP_REPEAT_16(func, sep) MP_REPEAT_15(func, sep) sep func(15) +#define MP_REPEAT_17(func, sep) MP_REPEAT_16(func, sep) sep func(16) +#define MP_REPEAT_18(func, sep) MP_REPEAT_17(func, sep) sep func(17) +#define MP_REPEAT_19(func, sep) MP_REPEAT_18(func, sep) sep func(18) +#define MP_REPEAT_20(func, sep) MP_REPEAT_19(func, sep) sep func(19) +#define MP_REPEAT_21(func, sep) MP_REPEAT_20(func, sep) sep func(20) +#define MP_REPEAT_22(func, sep) MP_REPEAT_21(func, sep) sep func(21) +#define MP_REPEAT_23(func, sep) MP_REPEAT_22(func, sep) sep func(22) +#define MP_REPEAT_24(func, sep) MP_REPEAT_23(func, sep) sep func(23) +#define MP_REPEAT_25(func, sep) MP_REPEAT_24(func, sep) sep func(24) +#define MP_REPEAT_26(func, sep) MP_REPEAT_25(func, sep) sep func(25) +#define MP_REPEAT_27(func, sep) MP_REPEAT_26(func, sep) sep func(26) +#define MP_REPEAT_28(func, sep) MP_REPEAT_27(func, sep) sep func(27) +#define MP_REPEAT_29(func, sep) MP_REPEAT_28(func, sep) sep func(28) +#define MP_REPEAT_30(func, sep) MP_REPEAT_29(func, sep) sep func(29) +#define MP_REPEAT_31(func, sep) MP_REPEAT_30(func, sep) sep func(30) +#define MP_REPEAT_32(func, sep) MP_REPEAT_31(func, sep) sep func(31) +#define MP_REPEAT(N, func, sep) MP_CONCAT(MP_REPEAT_, N)(func, sep) diff --git a/sim/opaesim/fpga.cpp b/sim/opaesim/fpga.cpp index 6c8ce8b2f..d16ef97a1 100644 --- a/sim/opaesim/fpga.cpp +++ b/sim/opaesim/fpga.cpp @@ -93,6 +93,8 @@ extern fpga_result fpgaClose(fpga_handle handle) { return FPGA_INVALID_PARAM; auto sim = reinterpret_cast(handle); + sim->shutdown(); + delete sim; return FPGA_OK; diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index f5acc3d21..430e4478b 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -110,6 +110,9 @@ public: for (auto& buffer : host_buffers_) { aligned_free(buffer.second.data); } + if (ram_) { + delete ram_; + } #ifdef VCD_OUTPUT if (tfp_) { tfp_->close(); @@ -119,9 +122,6 @@ public: if (device_) { delete device_; } - if (ram_) { - delete ram_; - } } int init() { @@ -142,11 +142,15 @@ public: tfp_->open("trace.vcd"); #endif + // allocate RAM ram_ = new RAM(0, RAM_PAGE_SIZE); - + // reset the device this->reset(); + // Turn on assertion after reset + Verilated::assertOn(true); + // launch execution thread future_ = std::async(std::launch::async, [&]{ while (!stop_) { @@ -158,6 +162,13 @@ public: return 0; } + void shutdown() { + stop_ = true; + if (future_.valid()) { + future_.wait(); + } + } + int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) { auto alloc = aligned_malloc(len, CACHE_BLOCK_SIZE); if (alloc == NULL) @@ -256,9 +267,6 @@ private: device_->clk = 1; this->eval(); } - - // Turn on assertion after reset - Verilated::assertOn(true); } void tick() { @@ -279,13 +287,13 @@ private: } } + dram_sim_.tick(); + device_->clk = 0; this->eval(); device_->clk = 1; this->eval(); - dram_sim_.tick(); - #ifndef NDEBUG fflush(stdout); #endif @@ -399,7 +407,6 @@ private: void avs_bus_reset() { for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) { - pending_mem_reqs_[b].clear(); device_->avs_readdatavalid[b] = 0; device_->avs_waitrequest[b] = 0; } @@ -422,7 +429,7 @@ private: // process memory requests assert(!device_->avs_read[b] || !device_->avs_write[b]); - unsigned byte_addr = (device_->avs_address[b] * PLATFORM_PARAM_LOCAL_MEMORY_BANKS + b) * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; + uint64_t byte_addr = (uint64_t(device_->avs_address[b]) * PLATFORM_PARAM_LOCAL_MEMORY_BANKS + b) * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; if (device_->avs_write[b]) { uint64_t byteen = device_->avs_byteenable[b]; uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data()); @@ -432,7 +439,7 @@ private: } } - /*printf("%0ld: [sim] MEM Wr Req: bank=%d, 0x%x, data=0x", timestamp, b, byte_addr); + /*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=0x%lx, data=0x", timestamp, b, byte_addr); for (int i = 0; i < PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; i++) { printf("%02x", data[(PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE-1)-i]); } @@ -456,7 +463,7 @@ private: mem_req->ready = false; pending_mem_reqs_[b].emplace_back(mem_req); - /*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=%x, pending={", timestamp, b, mem_req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); + /*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=0x%lx, pending={", timestamp, b, mem_req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); for (auto& req : pending_mem_reqs_[b]) { if (req.cycles_left != 0) printf(" !%0x", req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); @@ -537,6 +544,10 @@ int opae_sim::init() { return impl_->init(); } +void opae_sim::shutdown() { + impl_->shutdown(); +} + int opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) { return impl_->prepare_buffer(len, buf_addr, wsid, flags); } diff --git a/sim/opaesim/opae_sim.h b/sim/opaesim/opae_sim.h index a04ade0a0..454cc1bf7 100644 --- a/sim/opaesim/opae_sim.h +++ b/sim/opaesim/opae_sim.h @@ -25,6 +25,8 @@ public: int init(); + void shutdown(); + int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags); void release_buffer(uint64_t wsid); diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 24287aa56..ecaee717b 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -39,13 +39,6 @@ SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $ SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp SRCS += $(SRC_DIR)/processor.cpp -ifdef AXI_BUS - TOP = Vortex_axi - CXXFLAGS += -DAXI_BUS -else - TOP = Vortex -endif - VL_FLAGS = --exe VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO @@ -56,7 +49,7 @@ VL_FLAGS += -DXLEN_$(XLEN) VL_FLAGS += $(CONFIGS) VL_FLAGS += $(RTL_INCLUDE) VL_FLAGS += $(RTL_PKGS) -VL_FLAGS += --cc $(TOP) --top-module $(TOP) +VL_FLAGS += --cc Vortex --top-module Vortex CXXFLAGS += $(CONFIGS) diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index f52e7c8da..1f6af60dd 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -13,13 +13,7 @@ #include "processor.h" -#ifdef AXI_BUS -#include "VVortex_axi.h" -typedef VVortex_axi Device; -#else #include "VVortex.h" -typedef VVortex Device; -#endif #ifdef VCD_OUTPUT #include @@ -106,7 +100,7 @@ public: Verilated::assertOn(false); // create RTL module instance - device_ = new Device(); + device_ = new VVortex(); #ifdef VCD_OUTPUT Verilated::traceEverOn(true); @@ -116,7 +110,7 @@ public: #endif ram_ = nullptr; - + // reset the device this->reset(); @@ -154,9 +148,11 @@ public: std::cout << std::dec << timestamp << ": [sim] run()" << std::endl; #endif + // reset device + this->reset(); + // start execution running_ = true; - device_->reset = 0; // wait on device to go busy while (!device_->busy) { @@ -168,9 +164,6 @@ public: this->tick(); } - // reset device - this->reset(); - this->cout_flush(); } @@ -178,14 +171,16 @@ public: device_->dcr_wr_valid = 1; device_->dcr_wr_addr = addr; device_->dcr_wr_data = value; - while (device_->dcr_wr_valid) { - this->tick(); - } + this->tick(); + device_->dcr_wr_valid = 0; } private: void reset() { + this->mem_bus_reset(); + this->dcr_bus_reset(); + running_ = false; print_bufs_.clear(); @@ -198,11 +193,6 @@ private: } mem_rd_rsp_active_ = false; - mem_wr_rsp_active_ = false; - - this->mem_bus_reset(); - - this->dcr_bus_reset(); device_->reset = 1; @@ -212,23 +202,19 @@ private: device_->clk = 1; this->eval(); } + + device_->reset = 0; + + for (int i = 0; i < RESET_DELAY; ++i) { + device_->clk = 0; + this->eval(); + device_->clk = 1; + this->eval(); + } } void tick() { - - device_->clk = 0; - this->eval(); - - this->mem_bus_eval(0); - this->dcr_bus_eval(0); - - device_->clk = 1; - this->eval(); - - this->mem_bus_eval(1); - this->dcr_bus_eval(1); - - dram_sim_.tick(); + this->mem_bus_eval(); if (!dram_queue_.empty()) { auto mem_req = dram_queue_.front(); @@ -244,6 +230,13 @@ private: } } + dram_sim_.tick(); + + device_->clk = 0; + this->eval(); + device_->clk = 1; + this->eval(); + #ifndef NDEBUG fflush(stdout); #endif @@ -261,207 +254,39 @@ private: ++timestamp; } -#ifdef AXI_BUS - - void mem_bus_reset() { - device_->m_axi_wready[0] = 0; - device_->m_axi_awready[0] = 0; - device_->m_axi_arready[0] = 0; - device_->m_axi_rvalid[0] = 0; - device_->m_axi_bvalid[0] = 0; - } - - void mem_bus_eval(bool clk) { - if (!clk) { - mem_rd_rsp_ready_ = device_->m_axi_rready[0]; - mem_wr_rsp_ready_ = device_->m_axi_bready[0]; - return; - } - - if (ram_ == nullptr) { - device_->m_axi_wready[0] = 0; - device_->m_axi_awready[0] = 0; - device_->m_axi_arready[0] = 0; - return; - } - - // process memory read responses - if (mem_rd_rsp_active_ - && device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) { - mem_rd_rsp_active_ = false; - } - if (!mem_rd_rsp_active_) { - if (!pending_mem_reqs_.empty() - && (*pending_mem_reqs_.begin())->ready - && !(*pending_mem_reqs_.begin())->write) { - auto mem_rsp_it = pending_mem_reqs_.begin(); - auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Rd Rsp: addr=0x%0lx, data=0x", timestamp, mem_rsp->addr); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", mem_rsp->block[i]); - } - printf("\n"); - */ - device_->m_axi_rvalid[0] = 1; - device_->m_axi_rid[0] = mem_rsp->tag; - device_->m_axi_rresp[0] = 0; - device_->m_axi_rlast[0] = 1; - memcpy(device_->m_axi_rdata[0].data(), mem_rsp->block.data(), MEM_BLOCK_SIZE); - pending_mem_reqs_.erase(mem_rsp_it); - mem_rd_rsp_active_ = true; - delete mem_rsp; - } else { - device_->m_axi_rvalid[0] = 0; - } - } - - // process memory write responses - if (mem_wr_rsp_active_ - && device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) { - mem_wr_rsp_active_ = false; - } - if (!mem_wr_rsp_active_) { - if (!pending_mem_reqs_.empty() - && (*pending_mem_reqs_.begin())->ready - && (*pending_mem_reqs_.begin())->write) { - auto mem_rsp_it = pending_mem_reqs_.begin(); - auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Wr Rsp: addr=0x%0lx\n", timestamp, mem_rsp->addr); - */ - device_->m_axi_bvalid[0] = 1; - device_->m_axi_bid[0] = mem_rsp->tag; - device_->m_axi_bresp[0] = 0; - pending_mem_reqs_.erase(mem_rsp_it); - mem_wr_rsp_active_ = true; - delete mem_rsp; - } else { - device_->m_axi_bvalid[0] = 0; - } - } - - // select the memory bank - uint32_t req_addr = device_->m_axi_wvalid[0] ? device_->m_axi_awaddr[0] : device_->m_axi_araddr[0]; - - // process memory requests - if ((device_->m_axi_wvalid[0] || device_->m_axi_arvalid[0]) && running_) { - if (device_->m_axi_wvalid[0]) { - auto byteen = device_->m_axi_wstrb[0]; - auto base_addr = device_->m_axi_awaddr[0]; - auto data = (uint8_t*)device_->m_axi_wdata[0].data(); - - if (base_addr >= uint64_t(IO_COUT_ADDR) - && base_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { - // process console output - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - auto& ss_buf = print_bufs_[i]; - char c = data[i]; - ss_buf << c; - if (c == '\n') { - std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; - ss_buf.str(""); - } - } - } - } else { - // process writes - /* - printf("%0ld: [sim] MEM Wr: addr=0x%0lx, byteen=0x", timestamp, base_addr); - for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { - printf("%x", (int)((byteen >> (4 * i)) & 0xf)); - } - printf(", data=0x"); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", data[i]); - } - printf("\n"); - */ - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - (*ram_)[base_addr + i] = data[i]; - } - } - - auto mem_req = new mem_req_t(); - mem_req->tag = device_->m_axi_awid[0]; - mem_req->addr = device_->m_axi_awaddr[0]; - mem_req->write = true; - mem_req->ready = false; - pending_mem_reqs_.emplace_back(mem_req); - - // send dram request - dram_queue_.push(mem_req); - } - } else { - // process reads - auto mem_req = new mem_req_t(); - mem_req->tag = device_->m_axi_arid[0]; - mem_req->addr = device_->m_axi_araddr[0]; - ram_->read(mem_req->block.data(), device_->m_axi_araddr[0], MEM_BLOCK_SIZE); - mem_req->write = false; - mem_req->ready = false; - pending_mem_reqs_.emplace_back(mem_req); - - // send dram request - dram_queue_.push(mem_req); - } - } - - device_->m_axi_wready[0] = running_; - device_->m_axi_awready[0] = running_; - device_->m_axi_arready[0] = running_; - } - -#else - void mem_bus_reset() { device_->mem_req_ready = 0; device_->mem_rsp_valid = 0; } - void mem_bus_eval(bool clk) { - if (!clk) { - mem_rd_rsp_ready_ = device_->mem_rsp_ready; - return; - } - - if (ram_ == nullptr) { - device_->mem_req_ready = 0; - return; - } - + void mem_bus_eval() { // process memory read responses - if (mem_rd_rsp_active_ - && device_->mem_rsp_valid && mem_rd_rsp_ready_) { + if (mem_rd_rsp_active_ && device_->mem_rsp_ready) { + device_->mem_rsp_valid = 0; mem_rd_rsp_active_ = false; } if (!mem_rd_rsp_active_) { if (!pending_mem_reqs_.empty() && (*pending_mem_reqs_.begin())->ready) { - device_->mem_rsp_valid = 1; auto mem_rsp_it = pending_mem_reqs_.begin(); auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); + /*printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", mem_rsp->block[i]); + printf("%02x", mem_rsp->data[i]); } printf("\n"); */ - memcpy(VDataCast::get(device_->mem_rsp_data), mem_rsp->block.data(), MEM_BLOCK_SIZE); + device_->mem_rsp_valid = 1; + memcpy(VDataCast::get(device_->mem_rsp_data), mem_rsp->data.data(), MEM_BLOCK_SIZE); device_->mem_rsp_tag = mem_rsp->tag; pending_mem_reqs_.erase(mem_rsp_it); mem_rd_rsp_active_ = true; delete mem_rsp; - } else { - device_->mem_rsp_valid = 0; } } // process memory requests - if (device_->mem_req_valid && running_) { + if (device_->mem_req_valid && device_->mem_req_ready) { uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE); if (device_->mem_req_rw) { auto byteen = device_->mem_req_byteen; @@ -516,7 +341,7 @@ private: mem_req->addr = byte_addr; mem_req->write = false; mem_req->ready = false; - ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE); + ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE); pending_mem_reqs_.emplace_back(mem_req); //printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag); @@ -529,21 +354,10 @@ private: device_->mem_req_ready = running_; } -#endif - void dcr_bus_reset() { device_->dcr_wr_valid = 0; } - void dcr_bus_eval(bool clk) { - if (!clk) { - return; - } - if (device_->dcr_wr_valid) { - device_->dcr_wr_valid = 0; - } - } - void wait(uint32_t cycles) { for (int i = 0; i < cycles; ++i) { this->tick(); @@ -553,8 +367,8 @@ private: private: typedef struct { - Device* device; - std::array block; + VVortex* device; + std::array data; uint64_t addr; uint64_t tag; bool write; @@ -569,7 +383,7 @@ private: DramSim dram_sim_; - Device* device_; + VVortex* device_; #ifdef VCD_OUTPUT VerilatedVcdC *tfp_; @@ -578,10 +392,6 @@ private: RAM* ram_; bool mem_rd_rsp_active_; - bool mem_rd_rsp_ready_; - - bool mem_wr_rsp_active_; - bool mem_wr_rsp_ready_; bool running_; }; diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 3e256ffb3..6296b88eb 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -32,11 +32,22 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED +# AFU parameters +ifeq (,$(findstring M_AXI_MEM_NUM_BANKS,$(CONFIGS))) + CONFIGS += -DM_AXI_MEM_NUM_BANKS=1 +endif +ifeq (,$(findstring M_AXI_MEM_ADDR_WIDTH,$(CONFIGS))) + CONFIGS += -DM_AXI_MEM_ADDR_WIDTH=32 +endif +ifeq (,$(findstring M_AXI_MEM_DATA_WIDTH,$(CONFIGS))) + CONFIGS += -DM_AXI_MEM_DATA_WIDTH=512 +endif + DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp -SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/xrt_sim.cpp +SRCS += $(SRC_DIR)/xrt.cpp $(SRC_DIR)/xrt_sim.cpp RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv diff --git a/sim/xrtsim/fpga.cpp b/sim/xrtsim/xrt.cpp similarity index 62% rename from sim/xrtsim/fpga.cpp rename to sim/xrtsim/xrt.cpp index bc1f0cb07..c0b5aac28 100644 --- a/sim/xrtsim/fpga.cpp +++ b/sim/xrtsim/xrt.cpp @@ -19,7 +19,7 @@ #include #include #include -#include "fpga.h" +#include "xrt.h" #include "xrt_sim.h" #include #include @@ -30,6 +30,13 @@ using namespace vortex; extern "C" { #endif +typedef struct { + size_t size; + xrt_sim* sim; + uint32_t bank; + uint64_t addr; +} buffer_t; + extern xrtDeviceHandle xrtDeviceOpen(unsigned int index) { if (index != 0) return nullptr; @@ -45,6 +52,8 @@ extern xrtDeviceHandle xrtDeviceOpen(unsigned int index) { extern int xrtXclbinGetXSAName(xrtDeviceHandle /*dhdl*/, char* name, int size, int* ret_size) { static const char* deviceName = "vortex_xrtsim"; if (name) { + if (size < strlen(deviceName) + 1) + return -1; memcpy(name, deviceName, size); } if (ret_size) { @@ -54,7 +63,10 @@ extern int xrtXclbinGetXSAName(xrtDeviceHandle /*dhdl*/, char* name, int size, i } extern int xrtDeviceClose(xrtDeviceHandle dhdl) { + if (dhdl == nullptr) + return -1; auto sim = reinterpret_cast(dhdl); + sim->shutdown(); delete sim; return 0; } @@ -64,19 +76,38 @@ extern int xrtKernelClose(xrtKernelHandle /*kernelHandle*/) { } extern xrtBufferHandle xrtBOAlloc(xrtDeviceHandle dhdl, size_t size, xrtBufferFlags flags, xrtMemoryGroup grp) { - return 0; + auto sim = reinterpret_cast(dhdl); + uint64_t addr; + int err = sim->mem_alloc(size, grp, &addr); + if (err != 0) + return nullptr; + auto buffer = new buffer_t(); + buffer->size = size; + buffer->bank = grp; + buffer->sim = sim; + buffer->addr = addr; + return buffer; } extern int xrtBOFree(xrtBufferHandle bhdl) { - return 0; + if (bhdl == nullptr) + return -1; + auto buffer = reinterpret_cast(bhdl); + return buffer->sim->mem_free(buffer->bank, buffer->addr); } -extern int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t seek) { - return 0; +extern int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t offset) { + if (bhdl == nullptr) + return -1; + auto buffer = reinterpret_cast(bhdl); + return buffer->sim->mem_write(buffer->bank, buffer->addr + offset, size, src); } -extern int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t skip) { - return 0; +extern int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t offset) { + if (bhdl == nullptr) + return -1; + auto buffer = reinterpret_cast(bhdl); + return buffer->sim->mem_read(buffer->bank, buffer->addr + offset, size, dst); } extern int xrtBOSync(xrtBufferHandle bhdl, enum xclBOSyncDirection dir, size_t size, size_t offset) { @@ -84,11 +115,17 @@ extern int xrtBOSync(xrtBufferHandle bhdl, enum xclBOSyncDirection dir, size_t s } extern int xrtKernelWriteRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t data) { - return 0; + if (kernelHandle == nullptr) + return -1; + auto sim = reinterpret_cast(kernelHandle); + return sim->register_write(offset, data); } -extern int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* datap) { - return 0; +extern int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* data) { + if (kernelHandle == nullptr) + return -1; + auto sim = reinterpret_cast(kernelHandle); + return sim->register_read(offset, data); } extern int xrtErrorGetString(xrtDeviceHandle, xrtErrorCode error, char* out, size_t len, size_t* out_len) { diff --git a/sim/xrtsim/fpga.h b/sim/xrtsim/xrt.h similarity index 98% rename from sim/xrtsim/fpga.h rename to sim/xrtsim/xrt.h index f36bbadab..0dbd5cf42 100644 --- a/sim/xrtsim/fpga.h +++ b/sim/xrtsim/xrt.h @@ -94,15 +94,15 @@ xrtBufferHandle xrtBOAlloc(xrtDeviceHandle dhdl, size_t size, xrtBufferFlags fla int xrtBOFree(xrtBufferHandle bhdl); -int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t seek); +int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t offset); -int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t skip); +int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t offset); int xrtBOSync(xrtBufferHandle bhdl, enum xclBOSyncDirection dir, size_t size, size_t offset); int xrtKernelWriteRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t data); -int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* datap); +int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* data); int xrtErrorGetString(xrtDeviceHandle, xrtErrorCode error, char* out, size_t len, size_t* out_len); diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 21961e5dd..822f91d94 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -32,6 +32,12 @@ #include #include #include +#include +#include + +#include + +#define M_AXI_MEM_DATA_SIZE (M_AXI_MEM_DATA_WIDTH/8) #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 @@ -53,6 +59,8 @@ #define RAM_PAGE_SIZE 4096 +#define MEM_BANK_SIZE (1ull << M_AXI_MEM_ADDR_WIDTH) + #define CPU_GPU_LATENCY 200 using namespace vortex; @@ -80,6 +88,35 @@ void sim_trace_enable(bool enable) { /////////////////////////////////////////////////////////////////////////////// +#define MP_M_AXI_MEM_EACH(i) \ + m_axi_mem_[i].awvalid = &device_->m_axi_mem_##i##_awvalid; \ + m_axi_mem_[i].awready = &device_->m_axi_mem_##i##_awready; \ + m_axi_mem_[i].awaddr = &device_->m_axi_mem_##i##_awaddr; \ + m_axi_mem_[i].awid = &device_->m_axi_mem_##i##_awid; \ + m_axi_mem_[i].awlen = &device_->m_axi_mem_##i##_awlen; \ + m_axi_mem_[i].wvalid = &device_->m_axi_mem_##i##_wvalid; \ + m_axi_mem_[i].wready = &device_->m_axi_mem_##i##_wready; \ + m_axi_mem_[i].wdata = &device_->m_axi_mem_##i##_wdata; \ + m_axi_mem_[i].wstrb = &device_->m_axi_mem_##i##_wstrb; \ + m_axi_mem_[i].wlast = &device_->m_axi_mem_##i##_wlast; \ + m_axi_mem_[i].arvalid = &device_->m_axi_mem_##i##_arvalid; \ + m_axi_mem_[i].arready = &device_->m_axi_mem_##i##_arready; \ + m_axi_mem_[i].araddr = &device_->m_axi_mem_##i##_araddr; \ + m_axi_mem_[i].arid = &device_->m_axi_mem_##i##_arid; \ + m_axi_mem_[i].arlen = &device_->m_axi_mem_##i##_arlen; \ + m_axi_mem_[i].rvalid = &device_->m_axi_mem_##i##_rvalid; \ + m_axi_mem_[i].rready = &device_->m_axi_mem_##i##_rready; \ + m_axi_mem_[i].rdata = &device_->m_axi_mem_##i##_rdata; \ + m_axi_mem_[i].rlast = &device_->m_axi_mem_##i##_rlast; \ + m_axi_mem_[i].rid = &device_->m_axi_mem_##i##_rid; \ + m_axi_mem_[i].rresp = &device_->m_axi_mem_##i##_rresp; \ + m_axi_mem_[i].bvalid = &device_->m_axi_mem_##i##_bvalid; \ + m_axi_mem_[i].bready = &device_->m_axi_mem_##i##_bready; \ + m_axi_mem_[i].bresp = &device_->m_axi_mem_##i##_bresp; \ + m_axi_mem_[i].bid = &device_->m_axi_mem_##i##_bid; + +#define MP_M_AXI_MEM(n) MP_REPEAT(n, MP_M_AXI_MEM_EACH, ;) + class xrt_sim::Impl { public: Impl() @@ -97,6 +134,12 @@ public: if (future_.valid()) { future_.wait(); } + for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + delete mem_alloc_[i]; + } + if (ram_) { + delete ram_; + } #ifdef VCD_OUTPUT if (tfp_) { tfp_->close(); @@ -106,9 +149,6 @@ public: if (device_) { delete device_; } - if (ram_) { - delete ram_; - } } int init() { @@ -129,22 +169,136 @@ public: tfp_->open("trace.vcd"); #endif + // allocate RAM ram_ = new RAM(0, RAM_PAGE_SIZE); - + + // initialize AXI memory interfaces + MP_M_AXI_MEM(M_AXI_MEM_NUM_BANKS); + + // initialize memory allocator + for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + mem_alloc_[i] = new MemoryAllocator(0, MEM_BANK_SIZE, 4096, 64); + } + // reset the device this->reset(); + // Turn on assertion after reset + Verilated::assertOn(true); + // launch execution thread future_ = std::async(std::launch::async, [&]{ - while (!stop_) { - std::lock_guard guard(mutex_); - this->tick(); - } + while (!stop_) { + std::lock_guard guard(mutex_); + this->tick(); + } }); return 0; } + void shutdown() { + stop_ = true; + if (future_.valid()) { + future_.wait(); + } + } + + int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) { + if (bank_id >= M_AXI_MEM_NUM_BANKS) + return -1; + return mem_alloc_[bank_id]->allocate(size, addr); + } + + int mem_free(uint32_t bank_id, uint64_t addr) { + if (bank_id >= M_AXI_MEM_NUM_BANKS) + return -1; + return mem_alloc_[bank_id]->release(addr); + } + + int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) { + if (bank_id >= M_AXI_MEM_NUM_BANKS) + return -1; + uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr; + ram_->write(data, base_addr, size); + /*printf("%0ld: [sim] xrt-mem-write: addr=0x%lx, size=%ld, data=0x", timestamp, base_addr, size); + for (int i = size-1; i >= 0; --i) { + printf("%02x", ((const uint8_t*)data)[i]); + } + printf(")\n");*/ + return 0; + } + + int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) { + if (bank_id >= M_AXI_MEM_NUM_BANKS) + return -1; + uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr; + ram_->read(data, base_addr, size); + /*printf("%0ld: [sim] xrt-mem-read: addr=0x%lx, size=%ld, data=0x", timestamp, base_addr, size); + for (int i = size-1; i >= 0; --i) { + printf("%02x", ((uint8_t*)data)[i]); + } + printf(")\n");*/ + return 0; + } + + int register_write(uint32_t offset, uint32_t value) { + std::lock_guard guard(mutex_); + + // write address + device_->s_axi_ctrl_awvalid = 1; + device_->s_axi_ctrl_awaddr = offset; + auto s_axi_ctrl_awready = device_->s_axi_ctrl_awready; + do { + this->tick(); + } while (!(s_axi_ctrl_awready || device_->s_axi_ctrl_awready)); + device_->s_axi_ctrl_awvalid = 0; + + // write data + device_->s_axi_ctrl_wvalid = 1; + device_->s_axi_ctrl_wdata = value; + device_->s_axi_ctrl_wstrb = 0xf; + auto s_axi_ctrl_wready = device_->s_axi_ctrl_wready; + do { + this->tick(); + } while (!(s_axi_ctrl_wready || device_->s_axi_ctrl_wready)); + device_->s_axi_ctrl_wvalid = 0; + + // write response + device_->s_axi_ctrl_bready = 1; + auto s_axi_ctrl_bvalid = device_->s_axi_ctrl_bvalid; + do { + this->tick(); + } while (!(s_axi_ctrl_bvalid || device_->s_axi_ctrl_bvalid)); + device_->s_axi_ctrl_bready = 0; + + return 0; + } + + int register_read(uint32_t offset, uint32_t* value) { + std::lock_guard guard(mutex_); + + // read address + device_->s_axi_ctrl_arvalid = 1; + device_->s_axi_ctrl_araddr = offset; + auto s_axi_ctrl_arready = device_->s_axi_ctrl_arready; + do { + this->tick(); + } while (!(s_axi_ctrl_arready || device_->s_axi_ctrl_arready)); + device_->s_axi_ctrl_arvalid = 0; + + // read data + device_->s_axi_ctrl_rready = 1; + auto s_axi_ctrl_rvalid = device_->s_axi_ctrl_rvalid; + do { + this->tick(); + } while (!(s_axi_ctrl_rvalid || device_->s_axi_ctrl_rvalid)); + *value = device_->s_axi_ctrl_rdata; + device_->s_axi_ctrl_rready = 0; + + return 0; + } + private: void reset() { @@ -155,9 +309,9 @@ private: reqs.clear(); } - { + for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { std::queue empty; - std::swap(dram_queue_, empty); + std::swap(dram_queues_[i], empty); } device_->ap_rst_n = 0; @@ -177,36 +331,34 @@ private: device_->ap_clk = 1; this->eval(); } - - // Turn on assertion after reset - Verilated::assertOn(true); } void tick() { - this->axi_ctrl_bus_eval(); this->axi_mem_bus_eval(); - if (!dram_queue_.empty()) { - auto mem_req = dram_queue_.front(); - if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) { - auto orig_req = reinterpret_cast(arg); - if (orig_req->ready) { - delete orig_req; - } else { - orig_req->ready = true; + for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + if (!dram_queues_[i].empty()) { + auto mem_req = dram_queues_[i].front(); + if (dram_sim_.send_request(mem_req->write, mem_req->addr, i, [](void* arg) { + auto orig_req = reinterpret_cast(arg); + if (orig_req->ready) { + delete orig_req; + } else { + orig_req->ready = true; + } + }, mem_req)) { + dram_queues_[i].pop(); } - }, mem_req)) { - dram_queue_.pop(); } } + dram_sim_.tick(); + device_->ap_clk = 0; this->eval(); device_->ap_clk = 1; this->eval(); - dram_sim_.tick(); - #ifndef NDEBUG fflush(stdout); #endif @@ -223,65 +375,208 @@ private: } void axi_ctrl_bus_reset() { - // address write request - device_->s_axi_ctrl_awvalid = 0; - //device_->s_axi_ctrl_awaddr = 0; - - // data write request - device_->s_axi_ctrl_wvalid = 0; - //device_->s_axi_ctrl_wdata = 0; - //device_->s_axi_ctrl_wstrb = 0; - // address read request device_->s_axi_ctrl_arvalid = 0; - //device_->s_axi_ctrl_araddr = 0; + device_->s_axi_ctrl_araddr = 0; // data read response device_->s_axi_ctrl_rready = 0; + // address write request + device_->s_axi_ctrl_awvalid = 0; + device_->s_axi_ctrl_awaddr = 0; + + // data write request + device_->s_axi_ctrl_wvalid = 0; + device_->s_axi_ctrl_wdata = 0; + device_->s_axi_ctrl_wstrb = 0; + // data write response device_->s_axi_ctrl_bready = 0; } - void axi_ctrl_bus_eval() { - //-- - } - void axi_mem_bus_reset() { - // address write request - device_->m_axi_mem_0_awready = 0; + for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + // address read request + *m_axi_mem_[i].arready = 1; - // data write request - device_->m_axi_mem_0_wready = 0; + // address write request + *m_axi_mem_[i].awready = 1; - // address read request - device_->m_axi_mem_0_arready = 0; + // data write request + *m_axi_mem_[i].wready = 0; - // data read response - device_->m_axi_mem_0_rvalid = 0; - //device_->m_axi_mem_0_rdata = 0; - //device_->m_axi_mem_0_rlast = 0; - //device_->m_axi_mem_0_rid = 0; - //device_->m_axi_mem_0_rresp = 0; + // data read response + *m_axi_mem_[i].rvalid = 0; - // data write response - device_->m_axi_mem_0_bvalid = 0; - //device_->m_axi_mem_0_bresp = 0; - //device_->m_axi_mem_0_bid = 0; + // data write response + *m_axi_mem_[i].bvalid = 0; + + // states + m_axi_states_[i].write_req_pending = false; + m_axi_states_[i].write_rsp_pending = false; + m_axi_states_[i].read_rsp_pending = false; + } } void axi_mem_bus_eval() { - //-- + for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + // handle read responses + if (m_axi_states_[i].read_rsp_pending + && (*m_axi_mem_[i].rready)) { + *m_axi_mem_[i].rvalid = 0; + m_axi_states_[i].read_rsp_pending = false; + } + } + if (!m_axi_states_[i].read_rsp_pending) { + if (!pending_mem_reqs_[i].empty() + && (*pending_mem_reqs_[i].begin())->ready + && !(*pending_mem_reqs_[i].begin())->write) { + auto mem_rsp_it = pending_mem_reqs_[i].begin(); + auto mem_rsp = *mem_rsp_it; + *m_axi_mem_[i].rvalid = 1; + *m_axi_mem_[i].rid = mem_rsp->tag; + *m_axi_mem_[i].rresp = 0; + *m_axi_mem_[i].rlast = 1; + memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), M_AXI_MEM_DATA_SIZE); + pending_mem_reqs_[i].erase(mem_rsp_it); + m_axi_states_[i].read_rsp_pending = true; + delete mem_rsp; + } + } + + // handle write responses + if (m_axi_states_[i].write_rsp_pending) { + if (*m_axi_mem_[i].bready) { + *m_axi_mem_[i].bvalid = 0; + m_axi_states_[i].write_rsp_pending = false; + } + } + if (!m_axi_states_[i].write_rsp_pending) { + if (!pending_mem_reqs_[i].empty() + && (*pending_mem_reqs_[i].begin())->ready + && (*pending_mem_reqs_[i].begin())->write) { + auto mem_rsp_it = pending_mem_reqs_[i].begin(); + auto mem_rsp = *mem_rsp_it; + *m_axi_mem_[i].bvalid = 1; + *m_axi_mem_[i].bid = mem_rsp->tag; + *m_axi_mem_[i].bresp = 0; + pending_mem_reqs_[i].erase(mem_rsp_it); + m_axi_states_[i].write_rsp_pending = true; + delete mem_rsp; + } + } + + // handle read requests + if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) { + auto mem_req = new mem_req_t(); + mem_req->tag = *m_axi_mem_[i].arid; + mem_req->addr = uint64_t(*m_axi_mem_[i].araddr) * M_AXI_MEM_NUM_BANKS + i * M_AXI_MEM_DATA_SIZE; + ram_->read(mem_req->data.data(), mem_req->addr, M_AXI_MEM_DATA_SIZE); + mem_req->write = false; + mem_req->ready = false; + pending_mem_reqs_[i].emplace_back(mem_req); + + /*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, mem_req->tag); + for (int i = M_AXI_MEM_DATA_SIZE-1; i >= 0; --i) { + printf("%02x", mem_req->data[i]); + } + printf("\n");*/ + + // send dram request + dram_queues_[i].push(mem_req); + } + + // handle address write requests + if (*m_axi_mem_[i].awvalid && *m_axi_mem_[i].awready && !m_axi_states_[i].write_req_pending) { + m_axi_states_[i].write_req_addr = *m_axi_mem_[i].awaddr; + m_axi_states_[i].write_req_tag = *m_axi_mem_[i].awid; + m_axi_states_[i].write_req_pending = true; + } + + // handle data write requests + *m_axi_mem_[i].wready = false; + if (*m_axi_mem_[i].wvalid && m_axi_states_[i].write_req_pending) { + + auto byteen = *m_axi_mem_[i].wstrb; + auto data = (uint8_t*)m_axi_mem_[i].wdata->data(); + auto byte_addr = m_axi_states_[i].write_req_addr * M_AXI_MEM_NUM_BANKS + i * M_AXI_MEM_DATA_SIZE; + + for (int i = 0; i < M_AXI_MEM_DATA_SIZE; i++) { + if ((byteen >> i) & 0x1) { + (*ram_)[byte_addr + i] = data[i]; + } + } + + auto mem_req = new mem_req_t(); + mem_req->tag = m_axi_states_[i].write_req_tag; + mem_req->addr = byte_addr; + mem_req->write = true; + mem_req->ready = false; + pending_mem_reqs_[i].emplace_back(mem_req); + + /*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, byteen, mem_req->tag); + for (int i = M_AXI_MEM_DATA_SIZE-1; i >= 0; --i) { + printf("%02x", data[i]); + } + printf("\n");*/ + + // send dram request + dram_queues_[i].push(mem_req); + + m_axi_states_[i].write_req_pending = false; + + // acquire write data + *m_axi_mem_[i].wready = true; + } + } } typedef struct { - std::array data; - uint32_t addr; + uint64_t write_req_addr; + uint32_t write_req_tag; + bool write_req_pending; + bool read_rsp_pending; + bool write_rsp_pending; + } m_axi_state_t; + + typedef struct { + std::array data; + uint32_t tag; + uint64_t addr; bool write; bool ready; } mem_req_t; - Vvortex_afu_shim *device_; + typedef struct { + CData* awvalid; + CData* awready; + QData* awaddr; + IData* awid; + CData* awlen; + CData* wvalid; + CData* wready; + VlWide<16>* wdata; + QData* wstrb; + CData* wlast; + CData* arvalid; + CData* arready; + QData* araddr; + IData* arid; + CData* arlen; + CData* rvalid; + CData* rready; + VlWide<16>* rdata; + CData* rlast; + IData* rid; + CData* rresp; + CData* bvalid; + CData* bready; + CData* bresp; + IData* bid; + } m_axi_mem_t; + + Vvortex_afu_shim* device_; RAM* ram_; DramSim dram_sim_; @@ -290,9 +585,15 @@ private: std::mutex mutex_; - std::list pending_mem_reqs_[MEMORY_BANKS]; + std::list pending_mem_reqs_[M_AXI_MEM_NUM_BANKS]; - std::queue dram_queue_; + m_axi_mem_t m_axi_mem_[M_AXI_MEM_NUM_BANKS]; + + MemoryAllocator* mem_alloc_[M_AXI_MEM_NUM_BANKS]; + + m_axi_state_t m_axi_states_[M_AXI_MEM_NUM_BANKS]; + + std::queue dram_queues_[M_AXI_MEM_NUM_BANKS]; #ifdef VCD_OUTPUT VerilatedVcdC* tfp_; @@ -311,4 +612,32 @@ xrt_sim::~xrt_sim() { int xrt_sim::init() { return impl_->init(); +} + +void xrt_sim::shutdown() { + impl_->shutdown(); +} + +int xrt_sim::mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) { + return impl_->mem_alloc(size, bank_id, addr); +} + +int xrt_sim::mem_free(uint32_t bank_id, uint64_t addr) { + return impl_->mem_free(bank_id, addr); +} + +int xrt_sim::mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) { + return impl_->mem_write(bank_id, addr, size, data); +} + +int xrt_sim::mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) { + return impl_->mem_read(bank_id, addr, size, data); +} + +int xrt_sim::register_write(uint32_t offset, uint32_t value) { + return impl_->register_write(offset, value); +} + +int xrt_sim::register_read(uint32_t offset, uint32_t* value) { + return impl_->register_read(offset, value); } \ No newline at end of file diff --git a/sim/xrtsim/xrt_sim.h b/sim/xrtsim/xrt_sim.h index e399c33de..5823f468f 100644 --- a/sim/xrtsim/xrt_sim.h +++ b/sim/xrtsim/xrt_sim.h @@ -25,6 +25,20 @@ public: int init(); + void shutdown(); + + int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr); + + int mem_free(uint32_t bank_id, uint64_t addr); + + int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* value); + + int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* value); + + int register_write(uint32_t offset, uint32_t value); + + int register_read(uint32_t offset, uint32_t* value); + private: class Impl; From 2d7f9eae0a84c3c80d6f516305b97b3401df743c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 19 Sep 2024 04:44:00 -0700 Subject: [PATCH 276/488] minor update --- hw/rtl/libs/VX_axi_adapter.sv | 25 ++++++++++++++++--------- sim/xrtsim/xrt_sim.cpp | 16 ++++++---------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index 6c231cb95..06216f2ab 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -100,26 +100,33 @@ module VX_axi_adapter #( assign req_bank_sel = '0; end - wire [NUM_BANKS-1:0] axi_aw_ready, axi_write_ready; + wire [NUM_BANKS-1:0] axi_write_ready; for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready - assign axi_aw_ready[i] = m_axi_awready[i] || m_axi_aw_ack[i]; - assign axi_write_ready[i] = m_axi_wready[i] && axi_aw_ready[i]; + assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i]) + && (m_axi_wready[i] || m_axi_w_ack[i]); end // request ack assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]; - reg [NUM_BANKS-1:0] m_axi_aw_ack; + wire mem_req_fire = mem_req_valid && mem_req_ready; + + // AXi write request synchronization + reg [NUM_BANKS-1:0] m_axi_aw_ack, m_axi_w_ack; for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_m_axi_w + wire m_axi_aw_fire = m_axi_awvalid[i] && m_axi_awready[i]; + wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i]; always @(posedge clk) begin - if (reset) begin + if (reset || (mem_req_fire && (req_bank_sel == i))) begin m_axi_aw_ack[i] <= 0; + m_axi_w_ack[i] <= 0; end else begin - if (m_axi_wvalid[i] && m_axi_wready[i]) begin - m_axi_aw_ack[i] <= 0; - end else if (m_axi_awvalid[i] && m_axi_awready[i]) begin + if (m_axi_aw_fire) begin m_axi_aw_ack[i] <= 1; end + if (m_axi_w_fire) begin + m_axi_w_ack[i] <= 1; + end end end end @@ -141,7 +148,7 @@ module VX_axi_adapter #( // AXI write request data channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_data - assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && axi_aw_ready[i]; + assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_w_ack[i]; assign m_axi_wdata[i] = mem_req_data; assign m_axi_wstrb[i] = mem_req_byteen; assign m_axi_wlast[i] = 1'b1; diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 822f91d94..b8af57cfe 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -422,11 +422,9 @@ private: void axi_mem_bus_eval() { for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { // handle read responses - if (m_axi_states_[i].read_rsp_pending - && (*m_axi_mem_[i].rready)) { - *m_axi_mem_[i].rvalid = 0; - m_axi_states_[i].read_rsp_pending = false; - } + if (m_axi_states_[i].read_rsp_pending && (*m_axi_mem_[i].rready)) { + *m_axi_mem_[i].rvalid = 0; + m_axi_states_[i].read_rsp_pending = false; } if (!m_axi_states_[i].read_rsp_pending) { if (!pending_mem_reqs_[i].empty() @@ -446,11 +444,9 @@ private: } // handle write responses - if (m_axi_states_[i].write_rsp_pending) { - if (*m_axi_mem_[i].bready) { - *m_axi_mem_[i].bvalid = 0; - m_axi_states_[i].write_rsp_pending = false; - } + if (m_axi_states_[i].write_rsp_pending && *m_axi_mem_[i].bready) { + *m_axi_mem_[i].bvalid = 0; + m_axi_states_[i].write_rsp_pending = false; } if (!m_axi_states_[i].write_rsp_pending) { if (!pending_mem_reqs_[i].empty() From 4fff940e42647d0546f817bd1cda921495fe3aaa Mon Sep 17 00:00:00 2001 From: sij814 Date: Thu, 19 Sep 2024 13:21:14 -0700 Subject: [PATCH 277/488] two different versions of bypass connection --- hw/rtl/cache/VX_cache_wrap_l3.sv | 40 +++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/hw/rtl/cache/VX_cache_wrap_l3.sv b/hw/rtl/cache/VX_cache_wrap_l3.sv index 403edf554..def7237b1 100644 --- a/hw/rtl/cache/VX_cache_wrap_l3.sv +++ b/hw/rtl/cache/VX_cache_wrap_l3.sv @@ -108,8 +108,9 @@ module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( ) mem_bus_cache_if[NUM_MEM_PORTS](); if (NC_OR_BYPASS) begin - `RESET_RELAY (nc_bypass_reset, reset); + + // Slicing version for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin localparam SLICE_BEGIN = i * NUM_REQS_P; @@ -148,6 +149,43 @@ module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( ); end + // Connect everything + /* + for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin + VX_cache_bypass #( + .NUM_REQS (NUM_REQS), + .TAG_SEL_IDX (TAG_SEL_IDX), + + .PASSTHRU (PASSTHRU), + .NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE), + + .WORD_SIZE (WORD_SIZE), + .LINE_SIZE (LINE_SIZE), + + .CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH), + .CORE_TAG_WIDTH (TAG_WIDTH), + + .MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH), + .MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH), + .MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH), + + .UUID_WIDTH (UUID_WIDTH), + + .CORE_OUT_BUF (CORE_OUT_BUF), + .MEM_OUT_BUF (MEM_OUT_BUF) + ) cache_bypass ( + .clk (clk), + .reset (nc_bypass_reset), + + .core_bus_in_if (core_bus_if), + .core_bus_out_if(core_bus_cache_if), + + .mem_bus_in_if (mem_bus_cache_if[i]), + .mem_bus_out_if (mem_bus_if[i]) + ); + end + */ + end else begin for (genvar i = 0; i < NUM_REQS; ++i) begin From 380c36d93084a312aac0923a46117e0510bb749c Mon Sep 17 00:00:00 2001 From: sij814 Date: Thu, 19 Sep 2024 13:31:25 -0700 Subject: [PATCH 278/488] merged rtlsim branch --- third_party/fpnew | 2 +- third_party/softfloat | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/fpnew b/third_party/fpnew index 79e453139..a6af69155 160000 --- a/third_party/fpnew +++ b/third_party/fpnew @@ -1 +1 @@ -Subproject commit 79e453139072df42c9ec8f697132ba485d74e23d +Subproject commit a6af691551ffbd76d5d9cf30774d3295a41615e4 diff --git a/third_party/softfloat b/third_party/softfloat index b51ef8f32..3b70b5d81 160000 --- a/third_party/softfloat +++ b/third_party/softfloat @@ -1 +1 @@ -Subproject commit b51ef8f3201669b2288104c28546fc72532a1ea4 +Subproject commit 3b70b5d8147675932c38b36cd09af6df4eedd919 From d2db612bb40754c177eb471527e9114996932d99 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 19 Sep 2024 22:33:28 -0700 Subject: [PATCH 279/488] adding scope support to xrtsim --- hw/rtl/VX_scope.vh | 6 +- hw/rtl/afu/opae/vortex_afu.sv | 2 +- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 190 +++++++++++++++++++++------------- runtime/opae/vortex.cpp | 7 +- runtime/xrt/vortex.cpp | 8 +- sim/xrtsim/xrt_sim.cpp | 43 ++++---- tests/opencl/common.mk | 4 +- tests/regression/common.mk | 4 +- 8 files changed, 155 insertions(+), 109 deletions(-) diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index a74770640..a677975ce 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,7 +24,7 @@ `define SCOPE_IO_SWITCH(__count) \ wire scope_bus_in_w [__count]; \ wire scope_bus_out_w [__count]; \ - `RESET_RELAY_EX(scope_reset_w, scope_reset, __count, 4); \ + `RESET_RELAY_EX(scope_reset_w, scope_reset, __count, `MAX_FANOUT); \ VX_scope_switch #( \ .N (__count) \ ) scope_switch ( \ diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index d97be483d..7d5a10b94 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -175,7 +175,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cmd_scope_reading <= 1; scope_bus_ctr <= 63; end - scope_bus_in <= 0; if (cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mmio_req_hdr.address)) begin cmd_scope_wdata <= 64'(cp2af_sRxPort.c0.data); @@ -189,6 +188,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ scope_bus_ctr <= scope_bus_ctr - 1; if (scope_bus_ctr == 0) begin cmd_scope_writing <= 0; + scope_bus_in <= 0; end end if (cmd_scope_reading) begin diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index 0acf87744..c842e25d5 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -132,13 +132,16 @@ module VX_afu_ctrl #( ADDR_BITS = 8; localparam - WSTATE_IDLE = 2'd0, + WSTATE_ADDR = 2'd0, WSTATE_DATA = 2'd1, - WSTATE_RESP = 2'd2; + WSTATE_RESP = 2'd2, + WSTATE_WIDTH = 2; localparam - RSTATE_IDLE = 2'd0, - RSTATE_DATA = 2'd1; + RSTATE_ADDR = 2'd0, + RSTATE_DATA = 2'd1, + RSTATE_RESP = 2'd2, + RSTATE_WIDTH = 2; // device caps wire [63:0] dev_caps = {16'b0, @@ -152,16 +155,18 @@ module VX_afu_ctrl #( 2'(`CLOG2(`XLEN)-4), 30'(`MISA_STD)}; - reg [1:0] wstate; + reg [WSTATE_WIDTH-1:0] wstate; reg [ADDR_BITS-1:0] waddr; wire [31:0] wmask; wire s_axi_aw_fire; wire s_axi_w_fire; + wire s_axi_b_fire; - reg [1:0] rstate; + logic [RSTATE_WIDTH-1:0] rstate; reg [31:0] rdata; - wire [ADDR_BITS-1:0] raddr; + reg [ADDR_BITS-1:0] raddr; wire s_axi_ar_fire; + wire s_axi_r_fire; reg ap_reset_r; reg ap_start_r; @@ -174,15 +179,19 @@ module VX_afu_ctrl #( reg [31:0] dcrv_r; reg dcr_wr_valid_r; + logic wready_stall; + logic rvalid_stall; + `ifdef SCOPE - reg [63:0] scope_bus_wdata; - reg [63:0] scope_bus_rdata; + reg [63:0] scope_bus_wdata, scope_bus_rdata; reg [5:0] scope_bus_ctr; - reg cmd_scope_reading; - reg cmd_scope_writing; + reg cmd_scope_writing, cmd_scope_reading; reg scope_bus_out_r; + reg scope_rdata_valid; + + reg is_scope_waddr, is_scope_raddr; always @(posedge clk) begin if (reset) begin @@ -190,18 +199,33 @@ module VX_afu_ctrl #( cmd_scope_writing <= 0; scope_bus_ctr <= '0; scope_bus_out_r <= 0; + is_scope_waddr <= 0; + is_scope_raddr <= 0; + scope_bus_rdata <= '0; + scope_rdata_valid <= 0; end else begin + if (s_axi_aw_fire) begin + is_scope_waddr <= (s_axi_awaddr[ADDR_BITS-1:0] == ADDR_SCP_0) + || (s_axi_awaddr[ADDR_BITS-1:0] == ADDR_SCP_1); + end + if (s_axi_ar_fire) begin + is_scope_raddr <= (s_axi_araddr[ADDR_BITS-1:0] == ADDR_SCP_0) + || (s_axi_araddr[ADDR_BITS-1:0] == ADDR_SCP_1); + end if (s_axi_w_fire && waddr == ADDR_SCP_0) begin scope_bus_wdata[31:0] <= (s_axi_wdata & wmask) | (scope_bus_wdata[31:0] & ~wmask); end if (s_axi_w_fire && waddr == ADDR_SCP_1) begin scope_bus_wdata[63:32] <= (s_axi_wdata & wmask) | (scope_bus_wdata[63:32] & ~wmask); cmd_scope_writing <= 1; + scope_rdata_valid <= 0; scope_bus_out_r <= 1; scope_bus_ctr <= 63; + end if (scope_bus_in) begin cmd_scope_reading <= 1; + scope_bus_rdata <= '0; scope_bus_ctr <= 63; end if (cmd_scope_reading) begin @@ -209,6 +233,7 @@ module VX_afu_ctrl #( scope_bus_ctr <= scope_bus_ctr - 1; if (scope_bus_ctr == 0) begin cmd_scope_reading <= 0; + scope_rdata_valid <= 1; end end if (cmd_scope_writing) begin @@ -216,6 +241,7 @@ module VX_afu_ctrl #( scope_bus_ctr <= scope_bus_ctr - 1; if (scope_bus_ctr == 0) begin cmd_scope_writing <= 0; + scope_bus_out_r <= '0; end end end @@ -223,40 +249,51 @@ module VX_afu_ctrl #( assign scope_bus_out = scope_bus_out_r; + assign wready_stall = is_scope_waddr && cmd_scope_writing; + assign rvalid_stall = is_scope_raddr && ~scope_rdata_valid; + +`else + + assign wready_stall = 0; + assign rvalid_stall = 0; + `endif - // AXI Write + // AXI Write Request + assign s_axi_awready = (wstate == WSTATE_ADDR); + assign s_axi_wready = (wstate == WSTATE_DATA) && ~wready_stall; - assign s_axi_awready = (wstate == WSTATE_IDLE); - assign s_axi_wready = (wstate == WSTATE_DATA); + // AXI Write Response assign s_axi_bvalid = (wstate == WSTATE_RESP); assign s_axi_bresp = 2'b00; // OKAY - assign s_axi_aw_fire = s_axi_awvalid && s_axi_awready; - assign s_axi_w_fire = s_axi_wvalid && s_axi_wready; - for (genvar i = 0; i < 4; ++i) begin : g_wmask assign wmask[8 * i +: 8] = {8{s_axi_wstrb[i]}}; end + assign s_axi_aw_fire = s_axi_awvalid && s_axi_awready; + assign s_axi_w_fire = s_axi_wvalid && s_axi_wready; + assign s_axi_b_fire = s_axi_bvalid && s_axi_bready; + // wstate always @(posedge clk) begin if (reset) begin - wstate <= WSTATE_IDLE; + wstate <= WSTATE_ADDR; end else begin case (wstate) - WSTATE_IDLE: wstate <= s_axi_awvalid ? WSTATE_DATA : WSTATE_IDLE; - WSTATE_DATA: wstate <= s_axi_wvalid ? WSTATE_RESP : WSTATE_DATA; - WSTATE_RESP: wstate <= s_axi_bready ? WSTATE_IDLE : WSTATE_RESP; - default: wstate <= WSTATE_IDLE; + WSTATE_ADDR: wstate <= s_axi_aw_fire ? WSTATE_DATA : WSTATE_ADDR; + WSTATE_DATA: wstate <= s_axi_w_fire ? WSTATE_RESP : WSTATE_DATA; + WSTATE_RESP: wstate <= s_axi_b_fire ? WSTATE_ADDR : WSTATE_RESP; + default: wstate <= WSTATE_ADDR; endcase end end // waddr always @(posedge clk) begin - if (s_axi_aw_fire) + if (s_axi_aw_fire) begin waddr <= s_axi_awaddr[ADDR_BITS-1:0]; + end end // wdata @@ -335,73 +372,80 @@ module VX_afu_ctrl #( end end - // AXI Read + // AXI Read Request + assign s_axi_arready = (rstate == RSTATE_ADDR); - assign s_axi_arready = (rstate == RSTATE_IDLE); - assign s_axi_rvalid = (rstate == RSTATE_DATA); + // AXI Read Response + assign s_axi_rvalid = (rstate == RSTATE_RESP); assign s_axi_rdata = rdata; assign s_axi_rresp = 2'b00; // OKAY assign s_axi_ar_fire = s_axi_arvalid && s_axi_arready; - assign raddr = s_axi_araddr[ADDR_BITS-1:0]; + assign s_axi_r_fire = s_axi_rvalid && s_axi_rready; // rstate always @(posedge clk) begin if (reset) begin - rstate <= RSTATE_IDLE; + rstate <= RSTATE_ADDR; end else begin case (rstate) - RSTATE_IDLE: rstate <= s_axi_arvalid ? RSTATE_DATA : RSTATE_IDLE; - RSTATE_DATA: rstate <= (s_axi_rready & s_axi_rvalid) ? RSTATE_IDLE : RSTATE_DATA; - default: rstate <= RSTATE_IDLE; + RSTATE_ADDR: rstate <= s_axi_ar_fire ? RSTATE_DATA : RSTATE_ADDR; + RSTATE_DATA: rstate <= (~rvalid_stall) ? RSTATE_RESP : RSTATE_DATA; + RSTATE_RESP: rstate <= s_axi_r_fire ? RSTATE_ADDR : RSTATE_RESP; + default: rstate <= RSTATE_ADDR; endcase end end + // raddr + always @(posedge clk) begin + if (s_axi_ar_fire) begin + raddr <= s_axi_araddr[ADDR_BITS-1:0]; + end + end + // rdata always @(posedge clk) begin - if (s_axi_ar_fire) begin - rdata <= '0; - case (raddr) - ADDR_AP_CTRL: begin - rdata[0] <= ap_start_r; - rdata[1] <= ap_done; - rdata[2] <= ap_idle; - rdata[3] <= ap_ready; - rdata[7] <= auto_restart_r; - end - ADDR_GIE: begin - rdata <= 32'(gie_r); - end - ADDR_IER: begin - rdata <= 32'(ier_r); - end - ADDR_ISR: begin - rdata <= 32'(isr_r); - end - ADDR_DEV_0: begin - rdata <= dev_caps[31:0]; - end - ADDR_DEV_1: begin - rdata <= dev_caps[63:32]; - end - ADDR_ISA_0: begin - rdata <= isa_caps[31:0]; - end - ADDR_ISA_1: begin - rdata <= isa_caps[63:32]; - end - `ifdef SCOPE - ADDR_SCP_0: begin - rdata <= scope_bus_rdata[31:0]; - end - ADDR_SCP_1: begin - rdata <= scope_bus_rdata[63:32]; - end - `endif - default:; - endcase - end + rdata <= '0; + case (raddr) + ADDR_AP_CTRL: begin + rdata[0] <= ap_start_r; + rdata[1] <= ap_done; + rdata[2] <= ap_idle; + rdata[3] <= ap_ready; + rdata[7] <= auto_restart_r; + end + ADDR_GIE: begin + rdata <= 32'(gie_r); + end + ADDR_IER: begin + rdata <= 32'(ier_r); + end + ADDR_ISR: begin + rdata <= 32'(isr_r); + end + ADDR_DEV_0: begin + rdata <= dev_caps[31:0]; + end + ADDR_DEV_1: begin + rdata <= dev_caps[63:32]; + end + ADDR_ISA_0: begin + rdata <= isa_caps[31:0]; + end + ADDR_ISA_1: begin + rdata <= isa_caps[63:32]; + end + `ifdef SCOPE + ADDR_SCP_0: begin + rdata <= scope_bus_rdata[31:0]; + end + ADDR_SCP_1: begin + rdata <= scope_bus_rdata[63:32]; + end + `endif + default:; + endcase end assign ap_reset = ap_reset_r; diff --git a/runtime/opae/vortex.cpp b/runtime/opae/vortex.cpp index 3829abcdd..1bc913cc8 100755 --- a/runtime/opae/vortex.cpp +++ b/runtime/opae/vortex.cpp @@ -194,11 +194,10 @@ public: return device->api_.fpgaReadMMIO64(device->fpga_, 0, MMIO_SCOPE_READ, value); }; - int ret = vx_scope_start(&callback, this, 0, -1); - if (ret != 0) { + CHECK_ERR(vx_scope_start(&callback, this, 0, -1), { api_.fpgaClose(fpga_); - return ret; - } + return err; + }); } #endif return 0; diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index de65c1e85..ae551bfa2 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -375,11 +375,9 @@ public: *value = (((uint64_t)value_hi) << 32) | value_lo; return 0; }; - int ret = vx_scope_start(&callback, device, 0, -1); - if (ret != 0) { - delete device; - return ret; - } + CHECK_ERR(vx_scope_start(&callback, this, 0, -1), { + return err; + }); } #endif diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index b8af57cfe..1aaccc392 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -217,6 +217,8 @@ public: } int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) { + std::lock_guard guard(mutex_); + if (bank_id >= M_AXI_MEM_NUM_BANKS) return -1; uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr; @@ -230,6 +232,8 @@ public: } int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) { + std::lock_guard guard(mutex_); + if (bank_id >= M_AXI_MEM_NUM_BANKS) return -1; uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr; @@ -246,56 +250,57 @@ public: std::lock_guard guard(mutex_); // write address + //printf("%0ld: [sim] register_write: address=0x%x\n", timestamp, offset); device_->s_axi_ctrl_awvalid = 1; device_->s_axi_ctrl_awaddr = offset; - auto s_axi_ctrl_awready = device_->s_axi_ctrl_awready; - do { + while (!device_->s_axi_ctrl_awready) this->tick(); - } while (!(s_axi_ctrl_awready || device_->s_axi_ctrl_awready)); + this->tick(); device_->s_axi_ctrl_awvalid = 0; // write data + //printf("%0ld: [sim] register_write: data=0x%x\n", timestamp, value); device_->s_axi_ctrl_wvalid = 1; device_->s_axi_ctrl_wdata = value; device_->s_axi_ctrl_wstrb = 0xf; - auto s_axi_ctrl_wready = device_->s_axi_ctrl_wready; - do { + while (!device_->s_axi_ctrl_wready) this->tick(); - } while (!(s_axi_ctrl_wready || device_->s_axi_ctrl_wready)); + this->tick(); device_->s_axi_ctrl_wvalid = 0; // write response - device_->s_axi_ctrl_bready = 1; - auto s_axi_ctrl_bvalid = device_->s_axi_ctrl_bvalid; + //printf("%0ld: [sim] register_write: response\n", timestamp); do { this->tick(); - } while (!(s_axi_ctrl_bvalid || device_->s_axi_ctrl_bvalid)); + } while (!device_->s_axi_ctrl_bvalid); + device_->s_axi_ctrl_bready = 1; + this->tick(); device_->s_axi_ctrl_bready = 0; - + //printf("%0ld: [sim] register_write: done\n", timestamp); return 0; } int register_read(uint32_t offset, uint32_t* value) { std::lock_guard guard(mutex_); - // read address + //printf("%0ld: [sim] register_read: address=0x%x\n", timestamp, offset); device_->s_axi_ctrl_arvalid = 1; device_->s_axi_ctrl_araddr = offset; - auto s_axi_ctrl_arready = device_->s_axi_ctrl_arready; - do { + while (!device_->s_axi_ctrl_arready) this->tick(); - } while (!(s_axi_ctrl_arready || device_->s_axi_ctrl_arready)); + this->tick(); device_->s_axi_ctrl_arvalid = 0; - // read data - device_->s_axi_ctrl_rready = 1; - auto s_axi_ctrl_rvalid = device_->s_axi_ctrl_rvalid; + // read response + //printf("%0ld: [sim] register_read: response\n", timestamp); do { this->tick(); - } while (!(s_axi_ctrl_rvalid || device_->s_axi_ctrl_rvalid)); + } while (!device_->s_axi_ctrl_rvalid); *value = device_->s_axi_ctrl_rdata; + device_->s_axi_ctrl_rready = 1; + this->tick(); device_->s_axi_ctrl_rready = 0; - + //printf("%0ld: [sim] register_read: done (value=0x%x)\n", timestamp, *value); return 0; } diff --git a/tests/opencl/common.mk b/tests/opencl/common.mk index 8173a2535..53903dd41 100644 --- a/tests/opencl/common.mk +++ b/tests/opencl/common.mk @@ -102,9 +102,9 @@ run-opae: $(PROJECT) $(KERNEL_SRCS) run-xrt: $(PROJECT) $(KERNEL_SRCS) ifeq ($(TARGET), hw) - XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) diff --git a/tests/regression/common.mk b/tests/regression/common.mk index 4edc5c859..0f97d4979 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -99,9 +99,9 @@ run-opae: $(PROJECT) kernel.vxbin run-xrt: $(PROJECT) kernel.vxbin ifeq ($(TARGET), hw) - XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) From 63cce35c1a182a8262704ebd3086d2f5f81c8688 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 19 Sep 2024 23:33:23 -0700 Subject: [PATCH 280/488] scope taps annotation --- ci/regression.sh.in | 3 ++- hw/rtl/afu/opae/vortex_afu.sv | 3 ++- hw/rtl/afu/xrt/VX_afu_wrap.sv | 26 +++++++++++++++++++++++--- hw/rtl/core/VX_fetch.sv | 11 +++++++---- hw/rtl/core/VX_issue_slice.sv | 19 ++++++++++--------- hw/rtl/core/VX_lsu_slice.sv | 22 ++++++++++++++++++---- hw/rtl/libs/VX_scope_tap.sv | 10 +++++----- runtime/xrt/vortex.cpp | 5 ++--- tests/unittest/common.mk | 2 +- tests/unittest/vx_malloc/main.cpp | 4 ++-- 10 files changed, 72 insertions(+), 33 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index fb25ef480..8c88c368a 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -306,7 +306,8 @@ debug() CONFIGS="-O0 -DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=xrt --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" ./ci/blackbox.sh --driver=opae --scope --app=demo --args="-n1" - + ./ci/blackbox.sh --driver=xrt --scope --app=demo --args="-n1" + echo "debugging tests done!" } diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 7d5a10b94..126c14eba 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -1016,7 +1016,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ VX_scope_tap #( .SCOPE_ID (0), .TRIGGERW (24), - .PROBEW (431) + .PROBEW (431), + .DEPTH (4096) ) scope_tap ( .clk(clk), .reset(scope_reset_w[0]), diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index c2f865076..0484f46a7 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -310,15 +310,35 @@ module VX_afu_wrap #( interrupt, \ vx_busy_wait, \ vx_busy, \ - vx_reset \ + vx_reset, \ + m_axi_mem_awvalid_a, \ + m_axi_mem_awready_a, \ + m_axi_mem_wvalid_a, \ + m_axi_mem_wready_a, \ + m_axi_mem_bvalid_a, \ + m_axi_mem_bready_a, \ + m_axi_mem_arvalid_a, \ + m_axi_mem_arready_a, \ + m_axi_mem_rvalid_a, \ + m_axi_mem_rready_a, \ + dcr_wr_valid \ } `define PROBES { \ - vx_pending_writes \ + vx_pending_writes, \ + m_axi_mem_awaddr_u, \ + m_axi_mem_awid_a, \ + m_axi_mem_bid_a, \ + m_axi_mem_araddr_u, \ + m_axi_mem_arid_a, \ + m_axi_mem_rid_a, \ + dcr_wr_addr, \ + dcr_wr_data \ } VX_scope_tap #( .SCOPE_ID (0), .TRIGGERW ($bits(`TRIGGERS)), - .PROBEW ($bits(`PROBES)) + .PROBEW ($bits(`PROBES)), + .DEPTH (4096) ) scope_tap ( .clk (clk), .reset (scope_reset_w[0]), diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 044cd0aba..f07ab39f5 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -41,7 +41,11 @@ module VX_fetch import VX_gpu_pkg::*; #( wire [`UUID_WIDTH-1:0] rsp_uuid; wire [`NW_WIDTH-1:0] req_tag, rsp_tag; + wire schedule_fire = schedule_if.valid && schedule_if.ready; wire icache_req_fire = icache_req_valid && icache_req_ready; + wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready; + `UNUSED_VAR (schedule_fire) + `UNUSED_VAR (icache_rsp_fire) assign req_tag = schedule_if.data.wid; @@ -133,14 +137,13 @@ module VX_fetch import VX_gpu_pkg::*; #( `ifdef DBG_SCOPE_FETCH `ifdef SCOPE - wire schedule_fire = schedule_if.valid && schedule_if.ready; - wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready; VX_scope_tap #( .SCOPE_ID (1), .TRIGGERW (4), - .PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + + .PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH + - (ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH) + (ICACHE_WORD_SIZE * 8) + ICACHE_TAG_WIDTH), + .DEPTH (4096) ) scope_tap ( .clk (clk), .reset (scope_reset), diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 34b60676f..19b2ba8bb 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -36,6 +36,11 @@ module VX_issue_slice import VX_gpu_pkg::*; #( VX_scoreboard_if scoreboard_if(); VX_operands_if operands_if(); + wire operands_if_fire = operands_if.valid && operands_if.ready; + wire writeback_if_valid = writeback_if.valid; + `UNUSED_VAR (operands_if_fire) + `UNUSED_VAR (writeback_if_valid) + VX_ibuffer #( .INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID)) ) ibuffer ( @@ -90,24 +95,20 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef DBG_SCOPE_ISSUE `ifdef SCOPE - wire operands_if_fire = operands_if.valid && operands_if.ready; - wire operands_if_not_ready = ~operands_if.ready; - wire writeback_if_valid = writeback_if.valid; VX_scope_tap #( .SCOPE_ID (2), - .TRIGGERW (4), - .PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + + .TRIGGERW (2), + .PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) + - `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1) + `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1), + .DEPTH (4096) ) scope_tap ( .clk (clk), .reset (scope_reset), .start (1'b0), .stop (1'b0), .triggers ({ - reset, operands_if_fire, - operands_if_not_ready, writeback_if_valid }), .probes ({ @@ -145,7 +146,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin - if (operands_if.valid && operands_if.ready) begin + if (operands_if_fire) begin `TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})) trace_ex_type(1, operands_if.data.ex_type); `TRACE(1, (", op=")) diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 4a8e79953..d4de245bf 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -536,17 +536,31 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `ifdef DBG_SCOPE_LSU `ifdef SCOPE + `define TRIGGERS { \ + mem_req_fire, \ + mem_rsp_fire \ + } + `define PROBES { \ + mem_req_rw, \ + full_addr, \ + mem_req_byteen, \ + mem_req_data, \ + execute_if.data.uuid, \ + rsp_data, \ + rsp_uuid \ + } VX_scope_tap #( .SCOPE_ID (3), - .TRIGGERW (3), - .PROBEW (1 + NUM_LANES*(`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE*8) + `UUID_WIDTH + NUM_LANES*LSU_WORD_SIZE*8 + `UUID_WIDTH) + .TRIGGERW (2), + .PROBEW (1 + NUM_LANES * (`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE * 8) + `UUID_WIDTH + NUM_LANES * LSU_WORD_SIZE * 8 + `UUID_WIDTH), + .DEPTH (4096) ) scope_tap ( .clk (clk), .reset (scope_reset), .start (1'b0), .stop (1'b0), - .triggers({reset, mem_req_fire, mem_rsp_fire}), - .probes ({mem_req_rw, full_addr, mem_req_byteen, mem_req_data, execute_if.data.uuid, rsp_data, rsp_uuid}), + .triggers(`TRIGGERS), + .probes (`PROBES), .bus_in (scope_bus_in), .bus_out(scope_bus_out) ); diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index 010b6f2cc..88a3e9418 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -19,7 +19,7 @@ module VX_scope_tap #( parameter SCOPE_IDW = 8, // scope identifier width parameter TRIGGERW = 0, // trigger signals width parameter PROBEW = 0, // probe signal width - parameter SIZE = 256, // trace buffer size + parameter DEPTH = 256, // trace buffer depth parameter IDLE_CTRW = 16 // idle time between triggers counter width ) ( input wire clk, @@ -35,7 +35,7 @@ module VX_scope_tap #( localparam TX_DATA_BITS = `LOG2UP(TX_DATAW); localparam DATAW = PROBEW + TRIGGERW; localparam DATA_BITS = `LOG2UP(DATAW); - localparam ADDRW = `CLOG2(SIZE); + localparam ADDRW = `CLOG2(DEPTH); localparam TRIGGER_ENABLE = (TRIGGERW != 0); localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; @@ -64,8 +64,8 @@ module VX_scope_tap #( localparam GET_TYPE_DATA = 2'd3; localparam GET_TYPE_BITS = 2; - `NO_RW_RAM_CHECK reg [DATAW-1:0] data_store [SIZE-1:0]; - `NO_RW_RAM_CHECK reg [IDLE_CTRW-1:0] delta_store [SIZE-1:0]; + `NO_RW_RAM_CHECK reg [DATAW-1:0] data_store [DEPTH-1:0]; + `NO_RW_RAM_CHECK reg [IDLE_CTRW-1:0] delta_store [DEPTH-1:0]; reg [TRIGGERW-1:0] prev_triggers; reg [IDLE_CTRW-1:0] delta; @@ -216,7 +216,7 @@ module VX_scope_tap #( ctrl_state <= CTRL_STATE_IDLE; cmd_start <= 0; start_delay <= '0; - waddr_end <= ADDRW'(SIZE-1); + waddr_end <= ADDRW'(DEPTH-1); bus_out_r <= 0; end else begin bus_out_r <= 0; diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index ae551bfa2..511a87be5 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -757,7 +757,7 @@ private: if (pOff) { *pOff = offset; } - printf("get_bank_info(addr=0x%lx, bank=%d, offset=0x%lx\n", addr, index, offset); + //printf("get_bank_info(addr=0x%lx, bank=%d, offset=0x%lx\n", addr, index, offset); return 0; } @@ -792,8 +792,7 @@ private: if (pOff) { *pOff = offset; } - printf("get_bank_info(addr=0x%lx, bank=%d, offset=0x%lx\n", addr, index, - offset); + //printf("get_bank_info(addr=0x%lx, bank=%d, offset=0x%lx\n", addr, index, offset); return 0; } diff --git a/tests/unittest/common.mk b/tests/unittest/common.mk index 384a2f02c..9c3e384be 100644 --- a/tests/unittest/common.mk +++ b/tests/unittest/common.mk @@ -2,7 +2,7 @@ ROOT_DIR := $(realpath ../../..) CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors -CXXFLAGS += -I$(VORTEX_HOME)/runtime/common +CXXFLAGS += -I$(VORTEX_HOME)/sim/common # Debugging ifdef DEBUG diff --git a/tests/unittest/vx_malloc/main.cpp b/tests/unittest/vx_malloc/main.cpp index f10f986ca..d7e20b439 100644 --- a/tests/unittest/vx_malloc/main.cpp +++ b/tests/unittest/vx_malloc/main.cpp @@ -1,4 +1,4 @@ -#include +#include #include #define RT_CHECK(_expr) \ @@ -12,7 +12,7 @@ static uint64_t minAddress = 0; static uint64_t maxAddress = 0xffffffff; -static uint32_t pageAlign = 4096; +static uint32_t pageAlign = 4096; static uint32_t blockAlign = 64; int main() { From e5f2442353982eed40d9c0d3fc5e803fb2cf5141 Mon Sep 17 00:00:00 2001 From: jaewon-lee-github Date: Fri, 20 Sep 2024 08:58:11 -0400 Subject: [PATCH 281/488] Update Virtual Memory testing --- .github/workflows/ci.yml | 139 ++++++--------------------------------- Makefile.in | 8 --- ci/regression.sh.in | 19 ++---- ci/toolchain_env.sh.in | 4 +- config.mk.in | 2 - configure | 7 +- kernel/Makefile | 4 -- runtime/simx/Makefile | 6 +- runtime/simx/vortex.cpp | 3 +- sim/simx/Makefile | 4 -- tests/kernel/common.mk | 4 +- 11 files changed, 34 insertions(+), 166 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 474b1af00..9af9352e3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -62,111 +62,7 @@ jobs: run: | make -C third_party > /dev/null - # build: - # runs-on: ubuntu-20.04 - # needs: setup - # strategy: - # matrix: - # xlen: [32, 64] - - # steps: - # - name: Checkout code - # uses: actions/checkout@v2 - - # - name: Install Dependencies - # run: | - # sudo bash ./ci/system_updates.sh - - # - name: Cache Toolchain Directory - # id: cache-toolchain - # uses: actions/cache@v2 - # with: - # path: tools - # key: ${{ runner.os }}-toolchain-v0.1 - # restore-keys: | - # ${{ runner.os }}-toolchain- - - # - name: Cache Third Party Directory - # id: cache-thirdparty - # uses: actions/cache@v2 - # with: - # path: third_party - # key: ${{ runner.os }}-thirdparty-v0.1 - # restore-keys: | - # ${{ runner.os }}-thirdparty- - - # - name: Run Build - # run: | - # TOOLDIR=$PWD/tools - # mkdir -p build${{ matrix.xlen }} - # cd build${{ matrix.xlen }} - # ../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }} - # source ci/toolchain_env.sh - # make software -s > /dev/null - # make tests -s > /dev/null - - # - name: Upload Build Artifact - # uses: actions/upload-artifact@v2 - # with: - # name: build-${{ matrix.xlen }} - # path: build${{ matrix.xlen }} - - # tests: - # runs-on: ubuntu-20.04 - # needs: build - # strategy: - # matrix: - # name: [regression, opencl, config1, config2, debug, stress] - # xlen: [32, 64] - - # steps: - # - name: Checkout code - # uses: actions/checkout@v2 - - # - name: Install Dependencies - # run: | - # sudo bash ./ci/system_updates.sh - - # - name: Cache Toolchain Directory - # id: cache-toolchain - # uses: actions/cache@v2 - # with: - # path: tools - # key: ${{ runner.os }}-toolchain-v0.1 - # restore-keys: | - # ${{ runner.os }}-toolchain- - - # - name: Cache Third Party Directory - # id: cache-thirdparty - # uses: actions/cache@v2 - # with: - # path: third_party - # key: ${{ runner.os }}-thirdparty-v0.1 - # restore-keys: | - # ${{ runner.os }}-thirdparty- - - # - name: Download Build Artifact - # uses: actions/download-artifact@v2 - # with: - # name: build-${{ matrix.xlen }} - # path: build${{ matrix.xlen }} - - # - name: Run tests - # run: | - # cd build${{ matrix.xlen }} - # source ci/toolchain_env.sh - # chmod -R +x . # Ensure all files have executable permissions - # if [ "${{ matrix.name }}" == "regression" ]; then - # ./ci/regression.sh --unittest - # ./ci/regression.sh --isa - # ./ci/regression.sh --kernel - # ./ci/regression.sh --synthesis - # ./ci/regression.sh --regression - # else - # ./ci/regression.sh --${{ matrix.name }} - # fi - - build_vm: + build: runs-on: ubuntu-20.04 needs: setup strategy: @@ -202,9 +98,9 @@ jobs: - name: Run Build run: | TOOLDIR=$PWD/tools - mkdir -p build${{ matrix.xlen }}-vm - cd build${{ matrix.xlen }}-vm - ../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }} --vm_enable=1 + mkdir -p build${{ matrix.xlen }} + cd build${{ matrix.xlen }} + ../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }} source ci/toolchain_env.sh make software -s > /dev/null make tests -s > /dev/null @@ -212,12 +108,12 @@ jobs: - name: Upload Build Artifact uses: actions/upload-artifact@v4 with: - name: build-${{ matrix.xlen }}-vm - path: build${{ matrix.xlen }}-vm + name: build-${{ matrix.xlen }} + path: build${{ matrix.xlen }} - test_vm: + test: runs-on: ubuntu-20.04 - needs: build_vm + needs: build strategy: fail-fast: false matrix: @@ -253,19 +149,26 @@ jobs: - name: Download Build Artifact uses: actions/download-artifact@v4 with: - name: build-${{ matrix.xlen }}-vm - path: build${{ matrix.xlen }}-vm - + name: build-${{ matrix.xlen }} + path: build${{ matrix.xlen }} - name: Run tests run: | - cd build${{ matrix.xlen }}-vm + cd build${{ matrix.xlen }} source ci/toolchain_env.sh chmod -R +x . # Ensure all files have executable permissions - ./ci/regression.sh --vm + if [ "${{ matrix.name }}" == "regression" ]; then + ./ci/regression.sh --unittest + ./ci/regression.sh --isa + ./ci/regression.sh --kernel + ./ci/regression.sh --synthesis + ./ci/regression.sh --regression + else + ./ci/regression.sh --${{ matrix.name }} + fi complete: runs-on: ubuntu-20.04 - needs: test_vm + needs: test steps: - name: Check Completion diff --git a/Makefile.in b/Makefile.in index bfe944998..264738aca 100644 --- a/Makefile.in +++ b/Makefile.in @@ -2,14 +2,6 @@ include config.mk .PHONY: build software tests -vm: - $(MAKE) -C $(VORTEX_HOME)/third_party - $(MAKE) -C hw - $(MAKE) -C sim simx - $(MAKE) -C kernel - $(MAKE) -C runtime vm - $(MAKE) -C tests - all: $(MAKE) -C $(VORTEX_HOME)/third_party $(MAKE) -C hw diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 9ba65cfee..afd23c9ff 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -129,23 +129,11 @@ opencl() vm(){ echo "begin vm tests..." - make -C sim/simx - make -C runtime/simx - - make -C tests/kernel run-simx + CONFIGS="-DVM_ENABLE" make -C sim/simx + CONFIGS="-DVM_ENABLE" make -C runtime/simx - # Regression tests - make -C tests/regression run-simx - - # test global barrier - CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2 - - # test local barrier - ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar" - - # OpenCL tests make -C tests/opencl run-simx - ./ci/blackbox.sh --driver=simx --app=lbm --warps=8 + make -C tests/regression run-simx echo "vm tests done!" } @@ -415,6 +403,7 @@ while [ "$1" != "" ]; do tests+=("regression") tests+=("opencl") tests+=("cache") + tests+=("vm") tests+=("config1") tests+=("config2") tests+=("debug") diff --git a/ci/toolchain_env.sh.in b/ci/toolchain_env.sh.in index be140d28d..dc50389a9 100755 --- a/ci/toolchain_env.sh.in +++ b/ci/toolchain_env.sh.in @@ -16,8 +16,8 @@ TOOLDIR=${TOOLDIR:=@TOOLDIR@} -# export VERILATOR_ROOT=$TOOLDIR/verilator -# export PATH=$VERILATOR_ROOT/bin:$PATH +export VERILATOR_ROOT=$TOOLDIR/verilator +export PATH=$VERILATOR_ROOT/bin:$PATH export SV2V_PATH=$TOOLDIR/sv2v export PATH=$SV2V_PATH/bin:$PATH diff --git a/config.mk.in b/config.mk.in index 8ec052094..be369b56e 100644 --- a/config.mk.in +++ b/config.mk.in @@ -35,5 +35,3 @@ VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party - -VM_ENABLE ?= @VM_ENABLE@ \ No newline at end of file diff --git a/configure b/configure index f2e4781ef..62975784b 100755 --- a/configure +++ b/configure @@ -63,7 +63,7 @@ copy_files() { filename_no_ext="${filename%.in}" dest_file="$dest_dir/$filename_no_ext" mkdir -p "$dest_dir" - sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@VM_ENABLE@|$VM_ENABLE|g" "$file" > "$dest_file" + sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g" "$file" > "$dest_file" # apply permissions to bash scripts read -r firstline < "$dest_file" if [[ "$firstline" =~ ^#!.*bash ]]; then @@ -114,7 +114,6 @@ default_xlen=32 default_tooldir=$HOME/tools default_osversion=$(detect_osversion) default_prefix=$CURRENT_DIR -default_vm=0 # load default configuration parameters from existing config.mk if [ -f "config.mk" ]; then @@ -127,7 +126,6 @@ if [ -f "config.mk" ]; then TOOLDIR\ ?*) default_tooldir=${value//\?=/} ;; OSVERSION\ ?*) default_osversion=${value//\?=/} ;; PREFIX\ ?*) default_prefix=${value//\?=/} ;; - VM_ENABLE\ ?*) default_vm=${value//\?=/} ;; esac done < config.mk fi @@ -137,7 +135,6 @@ XLEN=${XLEN:=$default_xlen} TOOLDIR=${TOOLDIR:=$default_tooldir} OSVERSION=${OSVERSION:=$default_osversion} PREFIX=${PREFIX:=$default_prefix} -VM_ENABLE=${VM_ENABLE:=$default_vm} # parse command line arguments usage() { @@ -146,7 +143,6 @@ usage() { echo " --tooldir= Set the TOOLDIR path (default: $HOME/tools)" echo " --osversion= Set the OS Version (default: $(detect_osversion))" echo " --prefix= Set installation directory" - echo " --vm_enable= Enable Virtual Memory support (default: 0)" exit 1 } while [[ "$#" -gt 0 ]]; do @@ -155,7 +151,6 @@ while [[ "$#" -gt 0 ]]; do --tooldir=*) TOOLDIR="${1#*=}" ;; --osversion=*) OSVERSION="${1#*=}" ;; --prefix=*) PREFIX="${1#*=}" ;; - --vm_enable=*) VM_ENABLE="${1#*=}" ;; -h|--help) usage ;; *) echo "Unknown parameter passed: $1"; usage ;; esac diff --git a/kernel/Makefile b/kernel/Makefile index 16d279fa0..201ebc200 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -32,10 +32,6 @@ CFLAGS += -O3 -mcmodel=medany -fno-exceptions -fdata-sections -ffunction-section CFLAGS += -I$(INC_DIR) -I$(ROOT_DIR)/hw CFLAGS += -DXLEN_$(XLEN) -ifeq ($(VM_ENABLE), 1) -CFLAGS += -DVM_ENABLE -endif - PROJECT := libvortex SRCS = $(SRC_DIR)/vx_start.S $(SRC_DIR)/vx_syscalls.c $(SRC_DIR)/vx_print.S $(SRC_DIR)/tinyprintf.c $(SRC_DIR)/vx_print.c $(SRC_DIR)/vx_spawn.c $(SRC_DIR)/vx_serial.S $(SRC_DIR)/vx_perf.c diff --git a/runtime/simx/Makefile b/runtime/simx/Makefile index 31ab483e7..89ad5dd3a 100644 --- a/runtime/simx/Makefile +++ b/runtime/simx/Makefile @@ -10,10 +10,6 @@ CXXFLAGS += -I$(INC_DIR) -I../common -I$(ROOT_DIR)/hw -I$(SIM_DIR)/simx -I$(COMM CXXFLAGS += $(CONFIGS) CXXFLAGS += -DXLEN_$(XLEN) -ifeq ($(VM_ENABLE), 1) -CXXFLAGS += -DVM_ENABLE -endif - LDFLAGS += -shared -pthread LDFLAGS += -L$(DESTDIR) -lsimx @@ -46,4 +42,4 @@ clean-runtime: clean: clean-driver clean-runtime -.PHONY: all driver clean-driver clean-runtime clean \ No newline at end of file +.PHONY: all driver clean-driver clean-runtime clean diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 1c8f47eaf..c2d04400e 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -56,7 +56,8 @@ public: { // attach memory module processor_.attach_ram(&ram_); -#ifdef VM_ENABLE +#ifdef VM_ENABLE + //std::cout << "***VM ENABLED!!***"<< std::endl; CHECK_ERR(init_VM(), ); #endif } diff --git a/sim/simx/Makefile b/sim/simx/Makefile index 33120b13c..31fde7023 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -14,10 +14,6 @@ CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/src CXXFLAGS += -DXLEN_$(XLEN) CXXFLAGS += $(CONFIGS) -ifeq ($(VM_ENABLE), 1) -CXXFLAGS += -DVM_ENABLE -endif - LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator diff --git a/tests/kernel/common.mk b/tests/kernel/common.mk index e3f6b472b..cd0d2c409 100644 --- a/tests/kernel/common.mk +++ b/tests/kernel/common.mk @@ -2,8 +2,10 @@ ROOT_DIR := $(realpath ../../..) ifeq ($(XLEN),64) CFLAGS += -march=rv64imafd -mabi=lp64d +STARTUP_ADDR ?= 0x180000000 else CFLAGS += -march=rv32imaf -mabi=ilp32f +STARTUP_ADDR ?= 0x80000000 endif LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT) @@ -29,7 +31,7 @@ CFLAGS += -DXLEN_$(XLEN) -DNDEBUG LIBC_LIB += -L$(LIBC_VORTEX)/lib -lm -lc LIBC_LIB += $(LIBCRT_VORTEX)/lib/baremetal/libclang_rt.builtins-riscv$(XLEN).a -LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 $(ROOT_DIR)/kernel/libvortex.a $(LIBC_LIB) +LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(ROOT_DIR)/kernel/libvortex.a $(LIBC_LIB) all: $(PROJECT).elf $(PROJECT).bin $(PROJECT).dump From 9902856221685fb735a82a3b50cbb550a515c143 Mon Sep 17 00:00:00 2001 From: jaewon-lee-github Date: Fri, 20 Sep 2024 09:05:54 -0400 Subject: [PATCH 282/488] VERILATOR --- ci/toolchain_env.sh.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/toolchain_env.sh.in b/ci/toolchain_env.sh.in index dc50389a9..2c7373237 100755 --- a/ci/toolchain_env.sh.in +++ b/ci/toolchain_env.sh.in @@ -16,8 +16,8 @@ TOOLDIR=${TOOLDIR:=@TOOLDIR@} -export VERILATOR_ROOT=$TOOLDIR/verilator -export PATH=$VERILATOR_ROOT/bin:$PATH +#export VERILATOR_ROOT=$TOOLDIR/verilator +#export PATH=$VERILATOR_ROOT/bin:$PATH export SV2V_PATH=$TOOLDIR/sv2v export PATH=$SV2V_PATH/bin:$PATH From 9cc00108350f7dc6fb9fa28a155eb0ad6e73cde6 Mon Sep 17 00:00:00 2001 From: jaewon-lee-github Date: Fri, 20 Sep 2024 09:19:17 -0400 Subject: [PATCH 283/488] change verilator path --- ci/toolchain_env.sh.in | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ci/toolchain_env.sh.in b/ci/toolchain_env.sh.in index 2c7373237..9c3387c13 100755 --- a/ci/toolchain_env.sh.in +++ b/ci/toolchain_env.sh.in @@ -15,9 +15,7 @@ # limitations under the License. TOOLDIR=${TOOLDIR:=@TOOLDIR@} - -#export VERILATOR_ROOT=$TOOLDIR/verilator -#export PATH=$VERILATOR_ROOT/bin:$PATH +export PATH=$TOOLDIR/verilator/bin:$PATH export SV2V_PATH=$TOOLDIR/sv2v export PATH=$SV2V_PATH/bin:$PATH From 4383631543e61919f5ac6c35d740c1b3480f2387 Mon Sep 17 00:00:00 2001 From: jaewon-lee-github Date: Fri, 20 Sep 2024 09:58:50 -0400 Subject: [PATCH 284/488] Add BARE mode test and print out VM info --- ci/regression.sh.in | 10 +++++++--- runtime/simx/vortex.cpp | 8 +++++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index afd23c9ff..281635793 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -129,9 +129,13 @@ opencl() vm(){ echo "begin vm tests..." - CONFIGS="-DVM_ENABLE" make -C sim/simx - CONFIGS="-DVM_ENABLE" make -C runtime/simx - + make -C sim/simx clean && CONFIGS="-DVM_ENABLE" make -C sim/simx + make -C runtime/simx clean && CONFIGS="-DVM_ENABLE" make -C runtime/simx + make -C tests/opencl run-simx + make -C tests/regression run-simx + + make -C sim/simx clean && CONFIGS="-DVM_ENABLE -DVM_ADDR_MODE=BARE" make -C sim/simx + make -C runtime/simx clean && CONFIGS="-DVM_ENABLE -DVM_ADDR_MODE=BARE" make -C runtime/simx make -C tests/opencl run-simx make -C tests/regression run-simx diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index c2d04400e..673d73aa0 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -57,7 +57,7 @@ public: // attach memory module processor_.attach_ram(&ram_); #ifdef VM_ENABLE - //std::cout << "***VM ENABLED!!***"<< std::endl; + std::cout << "*** VM ENABLED!! ***"<< std::endl; CHECK_ERR(init_VM(), ); #endif } @@ -433,6 +433,12 @@ public: uint64_t pt_addr = 0; // Reserve space for PT DBGPRINT("[RT:init_VM] Initialize VM\n"); + DBGPRINT("* VM_ADDR_MODE=0x%lx", VM_ADDR_MODE); + DBGPRINT("* PAGE_TABLE_BASE_ADDR=0x%lx", PAGE_TABLE_BASE_ADDR); + DBGPRINT("* PT_LEVEL=0x%lx", PT_LEVEL); + DBGPRINT("* PT_SIZE=0x%lx", PT_SIZE); + DBGPRINT("* PTE_SIZE=0x%lx", PTE_SIZE); + DBGPRINT("* TLB_SIZE=0x%lx", TLB_SIZE); CHECK_ERR(mem_reserve(PAGE_TABLE_BASE_ADDR, PT_SIZE_LIMIT, VX_MEM_READ_WRITE), { return err; }); From 5ab13559e0d03d7329e028fc34a5fee15b18761f Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Fri, 20 Sep 2024 10:08:53 -0400 Subject: [PATCH 285/488] Update README.md --- README.md | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 4322f06bc..ec8d10bd5 100644 --- a/README.md +++ b/README.md @@ -59,20 +59,17 @@ sudo apt-get install git ``` ### Configure your build folder ```sh - # - # By default, the toolchain default install location is the /opt folder and can be overridden by setting --tooldir. - # This is the example for volvo server mkdir build - mkdir out - export OUT_DIR=`pwd`/out cd build - # Run the following to disble virtual memory feature in compilation - ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR - # Run the following instead to enable virtual memory feature in compilation - ../configure --xlen=32 --tooldir=/software/vortex-toolchain-2024-2024-08-09 --prefix=$OUT_DIR --vm_enable=1 + # for 32bit + ../configure --xlen=32 --tooldir=$HOME/tools + # for 64bit + ../configure --xlen=64 --tooldir=$HOME/tools ``` ### Install prebuilt toolchain - # We will use the precomipled tools in volvo toolchanin directory +```sh + ./ci/toolchain_install.sh --all +``` ### set environment variables ```sh # should always run before using the toolchain! @@ -82,7 +79,6 @@ sudo apt-get install git ```sh make -s ``` - ### Quick demo running vecadd OpenCL kernel on 2 cores ```sh ./ci/blackbox.sh --cores=2 --app=vecadd From a61f97f6c66e2837392b7b7d89319f3928e0a164 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 20 Sep 2024 08:09:46 -0700 Subject: [PATCH 286/488] minor update --- hw/rtl/afu/opae/vortex_afu.sv | 2 +- hw/rtl/afu/xrt/vortex_afu.vh | 4 ++-- hw/rtl/libs/VX_axi_adapter.sv | 18 +++++++++--------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 126c14eba..4bfacf960 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -79,7 +79,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ localparam COUT_TID_WIDTH = `CLOG2(`VX_MEM_BYTEEN_WIDTH); localparam COUT_QUEUE_DATAW = COUT_TID_WIDTH + 8; - localparam COUT_QUEUE_SIZE = 64; + localparam COUT_QUEUE_SIZE = 1024; localparam MMIO_DEV_CAPS = `AFU_IMAGE_MMIO_DEV_CAPS; localparam MMIO_ISA_CAPS = `AFU_IMAGE_MMIO_ISA_CAPS; diff --git a/hw/rtl/afu/xrt/vortex_afu.vh b/hw/rtl/afu/xrt/vortex_afu.vh index 1a14e1316..bf70cb885 100644 --- a/hw/rtl/afu/xrt/vortex_afu.vh +++ b/hw/rtl/afu/xrt/vortex_afu.vh @@ -15,11 +15,11 @@ `define VORTEX_AFU_VH `ifndef M_AXI_MEM_NUM_BANKS -`define M_AXI_MEM_NUM_BANKS 4 +`define M_AXI_MEM_NUM_BANKS 1 `endif `ifndef M_AXI_MEM_ADDR_WIDTH -`define M_AXI_MEM_ADDR_WIDTH 30 +`define M_AXI_MEM_ADDR_WIDTH 34 `endif `ifndef M_AXI_MEM_DATA_WIDTH diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index 06216f2ab..952497186 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -100,15 +100,6 @@ module VX_axi_adapter #( assign req_bank_sel = '0; end - wire [NUM_BANKS-1:0] axi_write_ready; - for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready - assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i]) - && (m_axi_wready[i] || m_axi_w_ack[i]); - end - - // request ack - assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]; - wire mem_req_fire = mem_req_valid && mem_req_ready; // AXi write request synchronization @@ -131,6 +122,15 @@ module VX_axi_adapter #( end end + wire [NUM_BANKS-1:0] axi_write_ready; + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready + assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i]) + && (m_axi_wready[i] || m_axi_w_ack[i]); + end + + // request ack + assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]; + // AXI write request address channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i]; From 3bac7eae6aff0dbb5996ee3889630289d2ccd919 Mon Sep 17 00:00:00 2001 From: sij814 Date: Fri, 20 Sep 2024 16:52:12 -0700 Subject: [PATCH 287/488] changed fpnew commit --- third_party/fpnew | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/fpnew b/third_party/fpnew index a6af69155..79e453139 160000 --- a/third_party/fpnew +++ b/third_party/fpnew @@ -1 +1 @@ -Subproject commit a6af691551ffbd76d5d9cf30774d3295a41615e4 +Subproject commit 79e453139072df42c9ec8f697132ba485d74e23d From 7938c7be5f92cebdd02defeb1dee55691eef0516 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 20 Sep 2024 20:35:58 -0700 Subject: [PATCH 288/488] synthesis updates --- hw/rtl/afu/opae/vortex_afu.sv | 114 ++++++++++----------- hw/rtl/afu/xrt/VX_afu_wrap.sv | 44 ++++----- hw/rtl/core/VX_issue_top.sv | 7 ++ hw/rtl/core/VX_lsu_slice.sv | 17 +--- hw/rtl/fpu/VX_fpu_sqrt.sv | 8 +- hw/rtl/libs/VX_scope_tap.sv | 164 ++++++++++++++++++++----------- hw/syn/altera/dut/Makefile | 10 +- hw/syn/altera/dut/scope/Makefile | 7 ++ hw/syn/altera/opae/Makefile | 8 +- hw/syn/xilinx/dut/Makefile | 10 +- hw/syn/xilinx/dut/scope/Makefile | 7 ++ hw/syn/xilinx/xrt/Makefile | 16 +-- hw/syn/yosys/Makefile | 2 +- sim/opaesim/Makefile | 1 - sim/xrtsim/Makefile | 1 - 15 files changed, 237 insertions(+), 179 deletions(-) create mode 100755 hw/syn/altera/dut/scope/Makefile create mode 100644 hw/syn/xilinx/dut/scope/Makefile diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 4bfacf960..3e605462f 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -40,7 +40,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ output t_local_mem_burst_cnt avs_burstcount [NUM_LOCAL_MEM_BANKS], input wire avs_readdatavalid [NUM_LOCAL_MEM_BANKS] ); - localparam LMEM_DATA_WIDTH = $bits(t_local_mem_data); localparam LMEM_DATA_SIZE = LMEM_DATA_WIDTH / 8; localparam LMEM_ADDR_WIDTH = $bits(t_local_mem_addr); @@ -50,6 +49,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ localparam CCI_DATA_SIZE = CCI_DATA_WIDTH / 8; localparam CCI_ADDR_WIDTH = $bits(t_ccip_clAddr); + localparam RESET_CTR_WIDTH = `CLOG2(`RESET_DELAY+1); + localparam AVS_RD_QUEUE_SIZE = 32; localparam _VX_MEM_TAG_WIDTH = `VX_MEM_TAG_WIDTH; localparam _AVS_REQ_TAGW_VX = _VX_MEM_TAG_WIDTH + `CLOG2(LMEM_DATA_WIDTH) - `CLOG2(`VX_MEM_DATA_WIDTH); @@ -185,7 +186,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ end if (cmd_scope_writing) begin scope_bus_in <= 1'(cmd_scope_wdata >> scope_bus_ctr); - scope_bus_ctr <= scope_bus_ctr - 1; + scope_bus_ctr <= scope_bus_ctr - 6'd1; if (scope_bus_ctr == 0) begin cmd_scope_writing <= 0; scope_bus_in <= 0; @@ -193,7 +194,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ end if (cmd_scope_reading) begin cmd_scope_rdata <= {cmd_scope_rdata[62:0], scope_bus_out}; - scope_bus_ctr <= scope_bus_ctr - 1; + scope_bus_ctr <= scope_bus_ctr - 6'd1; if (scope_bus_ctr == 0) begin cmd_scope_reading <= 0; end @@ -344,7 +345,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire cmd_mem_rd_done; reg cmd_mem_wr_done; - reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; + reg [RESET_CTR_WIDTH-1:0] vx_reset_ctr; reg vx_busy_wait; reg vx_reset = 1; // asserted at initialization wire vx_busy; @@ -384,7 +385,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ `TRACE(2, ("%t: AFU: Goto STATE RUN\n", $time)) `endif state <= STATE_RUN; - vx_reset_ctr <= (`RESET_DELAY-1); + vx_reset_ctr <= RESET_CTR_WIDTH'(`RESET_DELAY-1); vx_reset <= 1; end default: begin @@ -414,7 +415,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ STATE_RUN: begin if (vx_reset) begin // wait until the reset network is ready - if (vx_reset_ctr == 0) begin + if (vx_reset_ctr == RESET_CTR_WIDTH'(0)) begin `ifdef DBG_TRACE_AFU `TRACE(2, ("%t: AFU: Begin execution\n", $time)) `endif @@ -443,8 +444,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ endcase // ensure reset network initialization - if (vx_reset_ctr != '0) begin - vx_reset_ctr <= vx_reset_ctr - 1; + if (vx_reset_ctr != RESET_CTR_WIDTH'(0)) begin + vx_reset_ctr <= vx_reset_ctr - RESET_CTR_WIDTH'(1); end end end @@ -1013,61 +1014,64 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ end wire state_changed = (state != state_prev); + `define AFU_TRIGGERS { \ + reset, \ + state_changed, \ + mem_req_fire, \ + mem_rsp_fire, \ + avs_write_fire, \ + avs_read_fire, \ + avs_waitrequest[0], \ + avs_readdatavalid[0], \ + cp2af_sRxPort.c0.mmioRdValid, \ + cp2af_sRxPort.c0.mmioWrValid, \ + cp2af_sRxPort.c0.rspValid, \ + cp2af_sRxPort.c1.rspValid, \ + af2cp_sTxPort.c0.valid, \ + af2cp_sTxPort.c1.valid, \ + cp2af_sRxPort.c0TxAlmFull, \ + cp2af_sRxPort.c1TxAlmFull, \ + af2cp_sTxPort.c2.mmioRdValid, \ + cci_wr_req_fire, \ + cci_wr_rsp_fire, \ + cci_rd_req_fire, \ + cci_rd_rsp_fire, \ + cci_pending_reads_full, \ + cci_pending_writes_empty, \ + cci_pending_writes_full \ + } + + `define AFU_PROBES { \ + cmd_type, \ + state, \ + mmio_req_hdr.address, \ + cp2af_sRxPort.c0.hdr.mdata, \ + af2cp_sTxPort.c0.hdr.address, \ + af2cp_sTxPort.c0.hdr.mdata, \ + af2cp_sTxPort.c1.hdr.address, \ + avs_address[0], \ + avs_byteenable[0], \ + avs_burstcount[0], \ + cci_mem_rd_req_ctr, \ + cci_mem_wr_req_ctr, \ + cci_rd_req_ctr, \ + cci_rd_rsp_ctr, \ + cci_wr_req_ctr, \ + mem_bus_if_addr \ + } + VX_scope_tap #( .SCOPE_ID (0), - .TRIGGERW (24), - .PROBEW (431), + .TRIGGERW ($bits(`AFU_TRIGGERS)), + .PROBEW ($bits(`AFU_PROBES)), .DEPTH (4096) ) scope_tap ( .clk(clk), .reset(scope_reset_w[0]), .start(1'b0), .stop(1'b0), - .triggers({ - reset, - state_changed, - mem_req_fire, - mem_rsp_fire, - avs_write_fire, - avs_read_fire, - avs_waitrequest[0], - avs_readdatavalid[0], - cp2af_sRxPort.c0.mmioRdValid, - cp2af_sRxPort.c0.mmioWrValid, - cp2af_sRxPort.c0.rspValid, - cp2af_sRxPort.c1.rspValid, - af2cp_sTxPort.c0.valid, - af2cp_sTxPort.c1.valid, - cp2af_sRxPort.c0TxAlmFull, - cp2af_sRxPort.c1TxAlmFull, - af2cp_sTxPort.c2.mmioRdValid, - cci_wr_req_fire, - cci_wr_rsp_fire, - cci_rd_req_fire, - cci_rd_rsp_fire, - cci_pending_reads_full, - cci_pending_writes_empty, - cci_pending_writes_full - }), - .probes({ - cmd_type, - state, - mmio_req_hdr.address, - mmio_req_hdr.length, - cp2af_sRxPort.c0.hdr.mdata, - af2cp_sTxPort.c0.hdr.address, - af2cp_sTxPort.c0.hdr.mdata, - af2cp_sTxPort.c1.hdr.address, - avs_address[0], - avs_byteenable[0], - avs_burstcount[0], - cci_mem_rd_req_ctr, - cci_mem_wr_req_ctr, - cci_rd_req_ctr, - cci_rd_rsp_ctr, - cci_wr_req_ctr, - mem_bus_if_addr - }), + .triggers(`AFU_TRIGGERS), + .probes(`AFU_PROBES), .bus_in(scope_bus_in_w[0]), .bus_out(scope_bus_out_w[0]) ); diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 0484f46a7..e51d8f17b 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -301,7 +301,7 @@ module VX_afu_wrap #( `ifdef DBG_SCOPE_AFU `ifdef SCOPE - `define TRIGGERS { \ + `define AFU_TRIGGERS { \ reset, \ ap_reset, \ ap_start, \ @@ -311,41 +311,41 @@ module VX_afu_wrap #( vx_busy_wait, \ vx_busy, \ vx_reset, \ - m_axi_mem_awvalid_a, \ - m_axi_mem_awready_a, \ - m_axi_mem_wvalid_a, \ - m_axi_mem_wready_a, \ - m_axi_mem_bvalid_a, \ - m_axi_mem_bready_a, \ - m_axi_mem_arvalid_a, \ - m_axi_mem_arready_a, \ - m_axi_mem_rvalid_a, \ - m_axi_mem_rready_a, \ + m_axi_mem_awvalid_a[0], \ + m_axi_mem_awready_a[0], \ + m_axi_mem_wvalid_a[0], \ + m_axi_mem_wready_a[0], \ + m_axi_mem_bvalid_a[0], \ + m_axi_mem_bready_a[0], \ + m_axi_mem_arvalid_a[0], \ + m_axi_mem_arready_a[0], \ + m_axi_mem_rvalid_a[0], \ + m_axi_mem_rready_a[0], \ dcr_wr_valid \ } - `define PROBES { \ + `define AFU_PROBES { \ vx_pending_writes, \ - m_axi_mem_awaddr_u, \ - m_axi_mem_awid_a, \ - m_axi_mem_bid_a, \ - m_axi_mem_araddr_u, \ - m_axi_mem_arid_a, \ - m_axi_mem_rid_a, \ + m_axi_mem_awaddr_u[0], \ + m_axi_mem_awid_a[0], \ + m_axi_mem_bid_a[0], \ + m_axi_mem_araddr_u[0], \ + m_axi_mem_arid_a[0], \ + m_axi_mem_rid_a[0], \ dcr_wr_addr, \ dcr_wr_data \ } VX_scope_tap #( .SCOPE_ID (0), - .TRIGGERW ($bits(`TRIGGERS)), - .PROBEW ($bits(`PROBES)), + .TRIGGERW ($bits(`AFU_TRIGGERS)), + .PROBEW ($bits(`AFU_PROBES)), .DEPTH (4096) ) scope_tap ( .clk (clk), .reset (scope_reset_w[0]), .start (1'b0), .stop (1'b0), - .triggers (`TRIGGERS), - .probes (`PROBES), + .triggers (`AFU_TRIGGERS), + .probes (`AFU_PROBES), .bus_in (scope_bus_in_w[0]), .bus_out (scope_bus_out_w[0]) ); diff --git a/hw/rtl/core/VX_issue_top.sv b/hw/rtl/core/VX_issue_top.sv index e148b02f6..2d81ee044 100644 --- a/hw/rtl/core/VX_issue_top.sv +++ b/hw/rtl/core/VX_issue_top.sv @@ -113,6 +113,13 @@ module VX_issue_top import VX_gpu_pkg::*; #( issue_perf_t issue_perf = '0; `endif +`ifdef SCOPE + wire [0:0] scope_reset_w = 1'b0; + wire [0:0] scope_bus_in_w = 1'b0; + wire [0:0] scope_bus_out_w; + `UNUSED_VAR (scope_bus_out_w) +`endif + VX_issue #( .INSTANCE_ID (INSTANCE_ID) ) issue ( diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index d4de245bf..0452d0c79 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -536,19 +536,6 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `ifdef DBG_SCOPE_LSU `ifdef SCOPE - `define TRIGGERS { \ - mem_req_fire, \ - mem_rsp_fire \ - } - `define PROBES { \ - mem_req_rw, \ - full_addr, \ - mem_req_byteen, \ - mem_req_data, \ - execute_if.data.uuid, \ - rsp_data, \ - rsp_uuid \ - } VX_scope_tap #( .SCOPE_ID (3), .TRIGGERW (2), @@ -559,8 +546,8 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( .reset (scope_reset), .start (1'b0), .stop (1'b0), - .triggers(`TRIGGERS), - .probes (`PROBES), + .triggers({mem_req_fire, mem_rsp_fire}), + .probes ({mem_req_rw, full_addr, mem_req_byteen, mem_req_data, execute_if.data.uuid, rsp_data, rsp_uuid}), .bus_in (scope_bus_in), .bus_out(scope_bus_out) ); diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index fbfb86175..172a42e6f 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -101,7 +101,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( .clk (clk), .areset (1'b0), .en (pe_enable), - .a (pe_data_in[i]), + .a (pe_data_in[i][0 +: 32]), .q (pe_data_out[i][0 +: 32]) ); assign pe_data_out[i][32 +: `FP_FLAGS_BITS] = 'x; @@ -120,7 +120,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( .aclk (clk), .aclken (pe_enable), .s_axis_a_tvalid (1'b1), - .s_axis_a_tdata (pe_data_in[i]), + .s_axis_a_tdata (pe_data_in[i][0 +: 32]), `UNUSED_PIN (m_axis_result_tvalid), .m_axis_result_tdata (pe_data_out[i][0 +: 32]), .m_axis_result_tuser (tuser) @@ -143,8 +143,8 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( dpi_fsqrt ( pe_enable, int'(0), - {32'hffffffff, pe_data_in[i][0 +: 32]}, // a - pe_data_in[0][32 +: `INST_FRM_BITS], // frm + {32'hffffffff, pe_data_in[i][0 +: 32]}, // a + pe_data_in[0][32 +: `INST_FRM_BITS], // frm r, f ); diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index 88a3e9418..f44678079 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -17,82 +17,138 @@ module VX_scope_tap #( parameter SCOPE_ID = 0, // scope identifier parameter SCOPE_IDW = 8, // scope identifier width - parameter TRIGGERW = 0, // trigger signals width - parameter PROBEW = 0, // probe signal width - parameter DEPTH = 256, // trace buffer depth - parameter IDLE_CTRW = 16 // idle time between triggers counter width + parameter TRIGGERW = 16, // trigger signals width + parameter PROBEW = 256, // probe signal width + parameter DEPTH = 1024, // trace buffer depth + parameter IDLE_CTRW = 16, // idle time between triggers counter width + parameter TX_DATAW = 64 // transfer data width ) ( input wire clk, input wire reset, input wire start, input wire stop, - input wire [TRIGGERW-1:0] triggers, + input wire [`UP(TRIGGERW)-1:0] triggers, input wire [PROBEW-1:0] probes, input wire bus_in, output wire bus_out ); - localparam TX_DATAW = 64; - localparam TX_DATA_BITS = `LOG2UP(TX_DATAW); - localparam DATAW = PROBEW + TRIGGERW; - localparam DATA_BITS = `LOG2UP(DATAW); - localparam ADDRW = `CLOG2(DEPTH); - localparam TRIGGER_ENABLE = (TRIGGERW != 0); - localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; + localparam CTR_WIDTH = 64; + localparam TX_DATA_BITS = `LOG2UP(TX_DATAW); + localparam DATAW = PROBEW + TRIGGERW; + localparam DATA_BITS = `LOG2UP(DATAW); + localparam ADDRW = `CLOG2(DEPTH); + localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; - localparam CTRL_STATE_IDLE = 2'd0; - localparam CTRL_STATE_RECV = 2'd1; - localparam CTRL_STATE_CMD = 2'd2; - localparam CTRL_STATE_SEND = 2'd3; - localparam CTRL_STATE_BITS = 2; + localparam CTRL_STATE_IDLE = 2'd0; + localparam CTRL_STATE_RECV = 2'd1; + localparam CTRL_STATE_CMD = 2'd2; + localparam CTRL_STATE_SEND = 2'd3; + localparam CTRL_STATE_BITS = 2; - localparam TAP_STATE_IDLE = 2'd0; - localparam TAP_STATE_WAIT = 2'd1; - localparam TAP_STATE_RUN = 2'd2; - localparam TAP_STATE_BITS = 2; + localparam TAP_STATE_IDLE = 2'd0; + localparam TAP_STATE_WAIT = 2'd1; + localparam TAP_STATE_RUN = 2'd2; + localparam TAP_STATE_BITS = 2; - localparam CMD_GET_WIDTH = 3'd0; - localparam CMD_GET_COUNT = 3'd1; - localparam CMD_GET_START = 3'd2; - localparam CMD_GET_DATA = 3'd3; - localparam CMD_SET_START = 3'd4; - localparam CMD_SET_STOP = 3'd5; - localparam CMD_TYPE_BITS = 3; + localparam CMD_GET_WIDTH = 3'd0; + localparam CMD_GET_COUNT = 3'd1; + localparam CMD_GET_START = 3'd2; + localparam CMD_GET_DATA = 3'd3; + localparam CMD_SET_START = 3'd4; + localparam CMD_SET_STOP = 3'd5; + localparam CMD_TYPE_BITS = 3; - localparam GET_TYPE_WIDTH = 2'd0; - localparam GET_TYPE_COUNT = 2'd1; - localparam GET_TYPE_START = 2'd2; - localparam GET_TYPE_DATA = 2'd3; - localparam GET_TYPE_BITS = 2; + localparam GET_TYPE_WIDTH = 2'd0; + localparam GET_TYPE_COUNT = 2'd1; + localparam GET_TYPE_START = 2'd2; + localparam GET_TYPE_DATA = 2'd3; + localparam GET_TYPE_BITS = 2; - `NO_RW_RAM_CHECK reg [DATAW-1:0] data_store [DEPTH-1:0]; - `NO_RW_RAM_CHECK reg [IDLE_CTRW-1:0] delta_store [DEPTH-1:0]; - - reg [TRIGGERW-1:0] prev_triggers; + reg [`UP(TRIGGERW)-1:0] prev_triggers; reg [IDLE_CTRW-1:0] delta; - reg [63:0] timestamp, start_time; + reg [CTR_WIDTH-1:0] timestamp, start_time; reg [ADDRW-1:0] waddr, waddr_end; + reg write_en; reg cmd_start, delta_flush; - reg [63:0] start_delay, delay_cntr; + reg [CTR_WIDTH-1:0] start_delay, delay_cntr; reg [TAP_STATE_BITS-1:0] tap_state; reg [CTRL_STATE_BITS-1:0] ctrl_state; reg [GET_TYPE_BITS-1:0] get_type; + wire [DATAW-1:0] data_value; + wire [IDLE_CTRW-1:0] delta_value; reg [TX_DATA_BITS-1:0] ser_tx_ctr; reg [DATA_BITS-1:0] read_offset; reg [ADDRW-1:0] raddr; reg read_data; + wire [DATAW-1:0] data_in; + if (TRIGGERW != 0) begin + assign data_in = {probes, triggers}; + end else begin + assign data_in = probes; + end + + VX_dp_ram #( + .DATAW (DATAW), + .SIZE (DEPTH), + .NO_RWCHECK (1) + ) data_store ( + .clk (clk), + .reset (reset), + .read (1'b1), + .wren (1'b1), + .write (write_en), + .waddr (waddr), + .wdata (data_in), + .raddr (raddr), + .rdata (data_value) + ); + + if (TRIGGERW != 0) begin + VX_dp_ram #( + .DATAW (IDLE_CTRW), + .SIZE (DEPTH), + .NO_RWCHECK (1) + ) delta_store ( + .clk (clk), + .reset (reset), + .read (1'b1), + .wren (1'b1), + .write (write_en), + .waddr (waddr), + .wdata (delta), + .raddr (raddr), + .rdata (delta_value) + ); + end else begin + assign delta_value = '0; + end + // // trace capture // - wire [ADDRW-1:0] raddr_n = raddr + 1; + wire [ADDRW-1:0] raddr_n = raddr + ADDRW'(1); - wire [ADDRW:0] count = (ADDRW+1)'(waddr) + 1; + wire [ADDRW:0] count = (ADDRW+1)'(waddr) + (ADDRW+1)'(1); + + always @(*) begin + write_en = 0; + if (tap_state == TAP_STATE_RUN) begin + if (TRIGGERW != 0) begin + if (delta_flush || (triggers != prev_triggers)) begin + write_en = 1; + end + end else begin + write_en = 1; + end + end + end always @(posedge clk) begin if (reset) begin @@ -105,7 +161,7 @@ module VX_scope_tap #( read_data <= 0; timestamp <= '0; end else begin - timestamp <= timestamp + 1; + timestamp <= timestamp + CTR_WIDTH'(1); case (tap_state) TAP_STATE_IDLE: begin @@ -128,7 +184,7 @@ module VX_scope_tap #( end end TAP_STATE_WAIT: begin - delay_cntr <= delay_cntr - 1; + delay_cntr <= delay_cntr - CTR_WIDTH'(1); if (1 == delay_cntr) begin tap_state <= TAP_STATE_RUN; start_time <= timestamp; @@ -138,22 +194,18 @@ module VX_scope_tap #( end end TAP_STATE_RUN: begin - if (TRIGGER_ENABLE != 0) begin + if (TRIGGERW != 0) begin if (delta_flush || (triggers != prev_triggers)) begin - data_store[waddr] <= {probes, triggers}; - delta_store[waddr] <= delta; - waddr <= waddr + 1; + waddr <= waddr + ADDRW'(1); delta <= '0; delta_flush <= 0; end else begin - delta <= delta + 1; - delta_flush <= (delta == (MAX_IDLE_CTR-1)); + delta <= delta + IDLE_CTRW'(1); + delta_flush <= (delta == IDLE_CTRW'(MAX_IDLE_CTR-1)); end prev_triggers <= triggers; end else begin - data_store[waddr] <= {probes, triggers}; - delta_store[waddr] <= '0; - waddr <= waddr + 1; + waddr <= waddr + ADDRW'(1); end if (stop || (waddr >= waddr_end)) begin waddr <= waddr; @@ -208,8 +260,8 @@ module VX_scope_tap #( wire [SCOPE_IDW-1:0] cmd_scope_id = ser_buf_in_n[CMD_TYPE_BITS +: SCOPE_IDW]; wire [TX_DATAW-CMD_TYPE_BITS-SCOPE_IDW-1:0] cmd_data = ser_buf_in[TX_DATAW-1:CMD_TYPE_BITS+SCOPE_IDW]; - wire [TX_DATAW-1:0] data_chunk = TX_DATAW'(DATAW'(data_store[raddr] >> read_offset)); - wire [TX_DATAW-1:0] get_data = read_data ? data_chunk : TX_DATAW'(delta_store[raddr]); + wire [TX_DATAW-1:0] data_chunk = TX_DATAW'(DATAW'(data_value >> read_offset)); + wire [TX_DATAW-1:0] get_data = read_data ? data_chunk : TX_DATAW'(delta_value); always @(posedge clk) begin if (reset) begin @@ -230,7 +282,7 @@ module VX_scope_tap #( ser_tx_ctr <= TX_DATA_BITS'(TX_DATAW-1); end CTRL_STATE_RECV: begin - ser_tx_ctr <= ser_tx_ctr - 1; + ser_tx_ctr <= ser_tx_ctr - TX_DATA_BITS'(1); ser_buf_in <= ser_buf_in_n; if (ser_tx_ctr == 0) begin ctrl_state <= (cmd_scope_id == SCOPE_ID) ? CTRL_STATE_CMD : CTRL_STATE_IDLE; @@ -262,7 +314,7 @@ module VX_scope_tap #( `endif end CTRL_STATE_SEND: begin - ser_tx_ctr <= ser_tx_ctr - 1; + ser_tx_ctr <= ser_tx_ctr - TX_DATA_BITS'(1); case (get_type) GET_TYPE_WIDTH: begin bus_out_r <= 1'(DATAW >> ser_tx_ctr); diff --git a/hw/syn/altera/dut/Makefile b/hw/syn/altera/dut/Makefile index e5655c5fd..173408eca 100644 --- a/hw/syn/altera/dut/Makefile +++ b/hw/syn/altera/dut/Makefile @@ -9,7 +9,7 @@ SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY) -.PHONY: unittest pipeline mem_unit lmem cache fpu core issue vortex top +.PHONY: unittest scope mem_unit lmem cache fpu core issue vortex top ip-gen: $(IP_CACHE_DIR)/ip_gen.log $(IP_CACHE_DIR)/ip_gen.log: @@ -20,10 +20,10 @@ unittest: cp unittest/Makefile unittest/$(BUILD_DIR) $(MAKE) -C unittest/$(BUILD_DIR) clean && $(MAKE) -C unittest/$(BUILD_DIR) > unittest/$(BUILD_DIR)/build.log 2>&1 & -pipeline: - mkdir -p pipeline/$(BUILD_DIR) - cp pipeline/Makefile pipeline/$(BUILD_DIR) - $(MAKE) -C pipeline/$(BUILD_DIR) clean && $(MAKE) -C pipeline/$(BUILD_DIR) > pipeline/$(BUILD_DIR)/build.log 2>&1 & +scope: + mkdir -p scope/$(BUILD_DIR) + cp scope/Makefile scope/$(BUILD_DIR) + $(MAKE) -C scope/$(BUILD_DIR) clean && $(MAKE) -C scope/$(BUILD_DIR) > scope/$(BUILD_DIR)/build.log 2>&1 & mem_unit: mkdir -p mem_unit/$(BUILD_DIR) diff --git a/hw/syn/altera/dut/scope/Makefile b/hw/syn/altera/dut/scope/Makefile new file mode 100755 index 000000000..405f05e8a --- /dev/null +++ b/hw/syn/altera/dut/scope/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_scope_tap +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index a3d373cb0..e961be453 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -36,7 +36,6 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU -DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED ifeq ($(DEVICE_FAMILY), stratix10) CONFIGS += -DALTERA_S10 @@ -55,9 +54,12 @@ CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16 CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16 CONFIGS += $(CONFIGS_$(NUM_CORES)c) -# include paths +# include sources +RTL_PKGS = $(AFU_DIR)/local_mem_cfg_pkg.sv $(AFU_DIR)/ccip/ccip_if_pkg.sv +RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(IP_CACHE_DIR) @@ -96,7 +98,7 @@ ifdef PERF endif # ast dump flags -XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DNOPAE +XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI all: swconfig ip-gen setup build diff --git a/hw/syn/xilinx/dut/Makefile b/hw/syn/xilinx/dut/Makefile index 0255287fb..fe37eb4b8 100644 --- a/hw/syn/xilinx/dut/Makefile +++ b/hw/syn/xilinx/dut/Makefile @@ -5,17 +5,17 @@ PREFIX ?= build BUILD_DIR := $(PREFIX) -.PHONY: unittest pipeline mem_unit lmem cache fpu core issue vortex top +.PHONY: unittest scope mem_unit lmem cache fpu core issue vortex top unittest: mkdir -p unittest/$(BUILD_DIR) cp unittest/Makefile unittest/$(BUILD_DIR) $(MAKE) -C unittest/$(BUILD_DIR) clean && $(MAKE) -C unittest/$(BUILD_DIR) > unittest/$(BUILD_DIR)/build.log 2>&1 & -pipeline: - mkdir -p pipeline/$(BUILD_DIR) - cp pipeline/Makefile pipeline/$(BUILD_DIR) - $(MAKE) -C pipeline/$(BUILD_DIR) clean && $(MAKE) -C pipeline/$(BUILD_DIR) > pipeline/$(BUILD_DIR)/build.log 2>&1 & +scope: + mkdir -p scope/$(BUILD_DIR) + cp scope/Makefile scope/$(BUILD_DIR) + $(MAKE) -C scope/$(BUILD_DIR) clean && $(MAKE) -C scope/$(BUILD_DIR) > scope/$(BUILD_DIR)/build.log 2>&1 & mem_unit: mkdir -p mem_unit/$(BUILD_DIR) diff --git a/hw/syn/xilinx/dut/scope/Makefile b/hw/syn/xilinx/dut/scope/Makefile new file mode 100644 index 000000000..405f05e8a --- /dev/null +++ b/hw/syn/xilinx/dut/scope/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_scope_tap +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 0e2aea5a9..fa0a7873b 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -63,10 +63,6 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU -DBG_SCOPE_FLAGS += -DDBG_SCOPE_TEX -DBG_SCOPE_FLAGS += -DDBG_SCOPE_OM -DBG_SCOPE_FLAGS += -DDBG_SCOPE_RASTER -DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED # cluster configuration CONFIGS_1c := -DNUM_CLUSTERS=1 -DNUM_CORES=1 @@ -78,9 +74,11 @@ CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16 CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16 CONFIGS += $(CONFIGS_$(NUM_CORES)c) -# include paths +# include sources +RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif TEX_INCLUDE = -I$(RTL_DIR)/tex @@ -152,7 +150,7 @@ CFLAGS += $(CONFIGS) CFLAGS += $(RTL_INCLUDE) # ast dump flags -XML_CFLAGS = $(filter-out -DSYNTHESIS -DVIVADO, $(CFLAGS)) -I$(DPI_DIR) +XML_CFLAGS = $(filter-out -DSYNTHESIS -DVIVADO, $(CFLAGS)) $(RTL_PKGS) -I$(DPI_DIR) -DSV_DPI # RTL Kernel only supports Hardware and Hardware Emulation. ifneq ($(TARGET),$(findstring $(TARGET), hw hw_emu)) @@ -192,14 +190,10 @@ ifeq ($(TARGET), hw) cp $(BUILD_DIR)/_x/logs/link/vivado.log $(BUILD_DIR)/bin/vivado.log cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_full_util_routed.rpt $(BUILD_DIR)/bin/synthesis.log cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin/timing.log - [ -f "$(BUILD_DIR)/_x/link/vivado/vpl/prj/prj.runs/impl_1/debug_nets.ltx" ] && cp $(BUILD_DIR)/_x/link/vivado/vpl/prj/prj.runs/impl_1/debug_nets.ltx $(BUILD_DIR)/bin/debug_nets.ltx endif -hwserver: - debug_hw --xvc_pcie /dev/xfpga/xvc_pub.u2305.0 --hw_server & - chipscope: - debug_hw --vivado --host localhost --ltx_file $(BUILD_DIR)/bin/debug_nets.ltx & + debug_hw --vivado --host localhost --ltx_file $(BUILD_DIR)/bin/vortex_afu.ltx & clean: $(RMDIR) $(BUILD_DIR) diff --git a/hw/syn/yosys/Makefile b/hw/syn/yosys/Makefile index cba0137a3..a09d9198d 100644 --- a/hw/syn/yosys/Makefile +++ b/hw/syn/yosys/Makefile @@ -29,7 +29,7 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU -DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED + # cluster configuration CONFIGS_1c := -DNUM_CLUSTERS=1 -DNUM_CORES=1 diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index 2def887e9..ffbfece13 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -30,7 +30,6 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU -DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED # AFU parameters CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 6296b88eb..4ac3f6edd 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -30,7 +30,6 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU -DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED # AFU parameters ifeq (,$(findstring M_AXI_MEM_NUM_BANKS,$(CONFIGS))) From 00feb8b424012ee3b765a4042e50eaaa13184be2 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 21 Sep 2024 08:39:20 -0700 Subject: [PATCH 289/488] scope analyzer bug fixes --- hw/rtl/afu/opae/vortex_afu.sv | 74 ++++++----- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 5 +- hw/rtl/afu/xrt/VX_afu_wrap.sv | 29 ++-- hw/rtl/core/VX_fetch.sv | 4 +- hw/rtl/core/VX_issue_slice.sv | 4 +- hw/rtl/core/VX_lsu_slice.sv | 4 +- hw/rtl/core/VX_lsu_unit.sv | 2 - hw/rtl/libs/VX_scope_tap.sv | 242 ++++++++++++++++------------------ runtime/common/scope.cpp | 89 +++++++------ runtime/xrt/vortex.cpp | 16 +-- sim/xrtsim/xrt.cpp | 1 - sim/xrtsim/xrt_sim.cpp | 11 -- sim/xrtsim/xrt_sim.h | 2 - 13 files changed, 241 insertions(+), 242 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 3e605462f..b0de60cf3 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -170,8 +170,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ if (reset) begin cmd_scope_reading <= 0; cmd_scope_writing <= 0; - scope_bus_in <= 0; + scope_bus_in <= 0; end else begin + scope_bus_in <= 0; if (scope_bus_out) begin cmd_scope_reading <= 1; scope_bus_ctr <= 63; @@ -183,20 +184,21 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ scope_bus_ctr <= 63; scope_bus_in <= 1; end - end - if (cmd_scope_writing) begin - scope_bus_in <= 1'(cmd_scope_wdata >> scope_bus_ctr); - scope_bus_ctr <= scope_bus_ctr - 6'd1; - if (scope_bus_ctr == 0) begin - cmd_scope_writing <= 0; - scope_bus_in <= 0; + if (cmd_scope_writing) begin + scope_bus_in <= 1'(cmd_scope_wdata >> scope_bus_ctr); + scope_bus_ctr <= scope_bus_ctr - 6'd1; + if (scope_bus_ctr == 0) begin + cmd_scope_writing <= 0; + scope_bus_ctr <= 0; + end end - end - if (cmd_scope_reading) begin - cmd_scope_rdata <= {cmd_scope_rdata[62:0], scope_bus_out}; - scope_bus_ctr <= scope_bus_ctr - 6'd1; - if (scope_bus_ctr == 0) begin - cmd_scope_reading <= 0; + if (cmd_scope_reading) begin + cmd_scope_rdata <= {cmd_scope_rdata[62:0], scope_bus_out}; + scope_bus_ctr <= scope_bus_ctr - 6'd1; + if (scope_bus_ctr == 0) begin + cmd_scope_reading <= 0; + scope_bus_ctr <= 0; + end end end end @@ -327,7 +329,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ `ifdef SCOPE MMIO_SCOPE_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%t: AFU: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata)) + `TRACE(2, ("%t: AFU: MMIO_SCOPE_WRITE: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end `endif @@ -918,7 +920,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // Vortex /////////////////////////////////////////////////////////////////// - wire vx_dcr_wr_valid = (STATE_DCR_WRITE == state); + wire vx_dcr_wr_valid = (STATE_DCR_WRITE == state); wire [`VX_DCR_ADDR_WIDTH-1:0] vx_dcr_wr_addr = cmd_dcr_addr; wire [`VX_DCR_DATA_WIDTH-1:0] vx_dcr_wr_data = cmd_dcr_data; @@ -1002,11 +1004,10 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // SCOPE ////////////////////////////////////////////////////////////////////// `ifdef DBG_SCOPE_AFU - wire mem_req_fire = mem_bus_if[0].req_valid && mem_bus_if[0].req_ready; - wire mem_rsp_fire = mem_bus_if[0].rsp_valid && mem_bus_if[0].rsp_ready; - wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0]; - wire avs_read_fire = avs_read[0] && ~avs_waitrequest[0]; - wire [LMEM_ADDR_WIDTH-1:0] mem_bus_if_addr = mem_bus_if[0].req_data.addr; + wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0]; + wire avs_read_fire = avs_read[0] && ~avs_waitrequest[0]; + wire vx_mem_req_fire = vx_mem_req_valid && vx_mem_req_ready; + wire vx_mem_rsp_fire = vx_mem_rsp_valid && vx_mem_rsp_ready; reg [STATE_WIDTH-1:0] state_prev; always @(posedge clk) begin @@ -1016,9 +1017,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ `define AFU_TRIGGERS { \ reset, \ + vx_reset, \ + vx_busy, \ + vx_mem_req_fire, \ + vx_mem_rsp_fire, \ + vx_dcr_wr_valid, \ state_changed, \ - mem_req_fire, \ - mem_rsp_fire, \ avs_write_fire, \ avs_read_fire, \ avs_waitrequest[0], \ @@ -1044,6 +1048,15 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ `define AFU_PROBES { \ cmd_type, \ state, \ + vx_mem_req_rw, \ + vx_mem_req_byteen, \ + vx_mem_req_addr, \ + vx_mem_req_data, \ + vx_mem_req_tag, \ + vx_mem_rsp_data, \ + vx_mem_rsp_tag, \ + vx_dcr_wr_addr, \ + vx_dcr_wr_data, \ mmio_req_hdr.address, \ cp2af_sRxPort.c0.hdr.mdata, \ af2cp_sTxPort.c0.hdr.address, \ @@ -1056,8 +1069,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_mem_wr_req_ctr, \ cci_rd_req_ctr, \ cci_rd_rsp_ctr, \ - cci_wr_req_ctr, \ - mem_bus_if_addr \ + cci_wr_req_ctr \ } VX_scope_tap #( @@ -1066,13 +1078,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .PROBEW ($bits(`AFU_PROBES)), .DEPTH (4096) ) scope_tap ( - .clk(clk), - .reset(scope_reset_w[0]), - .start(1'b0), - .stop(1'b0), + .clk (clk), + .reset (scope_reset_w[0]), + .start (1'b0), + .stop (1'b0), .triggers(`AFU_TRIGGERS), - .probes(`AFU_PROBES), - .bus_in(scope_bus_in_w[0]), + .probes (`AFU_PROBES), + .bus_in (scope_bus_in_w[0]), .bus_out(scope_bus_out_w[0]) ); `else diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index c842e25d5..12a55ec69 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -204,6 +204,7 @@ module VX_afu_ctrl #( scope_bus_rdata <= '0; scope_rdata_valid <= 0; end else begin + scope_bus_out_r <= 0; if (s_axi_aw_fire) begin is_scope_waddr <= (s_axi_awaddr[ADDR_BITS-1:0] == ADDR_SCP_0) || (s_axi_awaddr[ADDR_BITS-1:0] == ADDR_SCP_1); @@ -221,7 +222,6 @@ module VX_afu_ctrl #( scope_rdata_valid <= 0; scope_bus_out_r <= 1; scope_bus_ctr <= 63; - end if (scope_bus_in) begin cmd_scope_reading <= 1; @@ -234,6 +234,7 @@ module VX_afu_ctrl #( if (scope_bus_ctr == 0) begin cmd_scope_reading <= 0; scope_rdata_valid <= 1; + scope_bus_ctr <= 0; end end if (cmd_scope_writing) begin @@ -241,7 +242,7 @@ module VX_afu_ctrl #( scope_bus_ctr <= scope_bus_ctr - 1; if (scope_bus_ctr == 0) begin cmd_scope_writing <= 0; - scope_bus_out_r <= '0; + scope_bus_ctr <= 0; end end end diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index e51d8f17b..d5726dc73 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -299,8 +299,8 @@ module VX_afu_wrap #( // SCOPE ////////////////////////////////////////////////////////////////////// -`ifdef DBG_SCOPE_AFU `ifdef SCOPE +`ifdef DBG_SCOPE_AFU `define AFU_TRIGGERS { \ reset, \ ap_reset, \ @@ -308,9 +308,9 @@ module VX_afu_wrap #( ap_done, \ ap_idle, \ interrupt, \ - vx_busy_wait, \ - vx_busy, \ vx_reset, \ + vx_busy, \ + dcr_wr_valid, \ m_axi_mem_awvalid_a[0], \ m_axi_mem_awready_a[0], \ m_axi_mem_wvalid_a[0], \ @@ -320,19 +320,18 @@ module VX_afu_wrap #( m_axi_mem_arvalid_a[0], \ m_axi_mem_arready_a[0], \ m_axi_mem_rvalid_a[0], \ - m_axi_mem_rready_a[0], \ - dcr_wr_valid \ + m_axi_mem_rready_a[0] \ } `define AFU_PROBES { \ + dcr_wr_addr, \ + dcr_wr_data, \ vx_pending_writes, \ m_axi_mem_awaddr_u[0], \ m_axi_mem_awid_a[0], \ m_axi_mem_bid_a[0], \ m_axi_mem_araddr_u[0], \ m_axi_mem_arid_a[0], \ - m_axi_mem_rid_a[0], \ - dcr_wr_addr, \ - dcr_wr_data \ + m_axi_mem_rid_a[0] \ } VX_scope_tap #( .SCOPE_ID (0), @@ -340,18 +339,19 @@ module VX_afu_wrap #( .PROBEW ($bits(`AFU_PROBES)), .DEPTH (4096) ) scope_tap ( - .clk (clk), - .reset (scope_reset_w[0]), - .start (1'b0), - .stop (1'b0), - .triggers (`AFU_TRIGGERS), + .clk (clk), + .reset (scope_reset_w[0]), + .start (1'b0), + .stop (1'b0), + .triggers(`AFU_TRIGGERS), .probes (`AFU_PROBES), .bus_in (scope_bus_in_w[0]), - .bus_out (scope_bus_out_w[0]) + .bus_out(scope_bus_out_w[0]) ); `else `SCOPE_IO_UNUSED_W(0) `endif +`endif `ifdef CHIPSCOPE ila_afu ila_afu_inst ( .clk (clk), @@ -373,7 +373,6 @@ module VX_afu_wrap #( }) ); `endif -`endif `ifdef SIMULATION `ifndef VERILATOR diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index f07ab39f5..c1c0e6a57 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -135,8 +135,8 @@ module VX_fetch import VX_gpu_pkg::*; #( assign fetch_if.data.uuid = rsp_uuid; assign icache_bus_if.rsp_ready = fetch_if.ready; -`ifdef DBG_SCOPE_FETCH `ifdef SCOPE +`ifdef DBG_SCOPE_FETCH VX_scope_tap #( .SCOPE_ID (1), .TRIGGERW (4), @@ -166,6 +166,7 @@ module VX_fetch import VX_gpu_pkg::*; #( `else `SCOPE_IO_UNUSED() `endif +`endif `ifdef CHIPSCOPE ila_fetch ila_fetch_inst ( .clk (clk), @@ -174,7 +175,6 @@ module VX_fetch import VX_gpu_pkg::*; #( .probe2 ({icache_bus_if.rsp_valid, icache_bus_if.rsp_data, icache_bus_if.rsp_ready}) ); `endif -`endif `ifdef DBG_TRACE_MEM always @(posedge clk) begin diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 19b2ba8bb..38e54fcc0 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -93,8 +93,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #( .dispatch_if (dispatch_if) ); -`ifdef DBG_SCOPE_ISSUE `ifdef SCOPE +`ifdef DBG_SCOPE_ISSUE VX_scope_tap #( .SCOPE_ID (2), .TRIGGERW (2), @@ -133,6 +133,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `else `SCOPE_IO_UNUSED() `endif +`endif `ifdef CHIPSCOPE ila_issue ila_issue_inst ( .clk (clk), @@ -142,7 +143,6 @@ module VX_issue_slice import VX_gpu_pkg::*; #( .probe3 ({writeback_if.valid, writeback_if.data}) ); `endif -`endif `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 0452d0c79..d703291c4 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -534,8 +534,8 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( end `endif -`ifdef DBG_SCOPE_LSU `ifdef SCOPE +`ifdef DBG_SCOPE_LSU VX_scope_tap #( .SCOPE_ID (3), .TRIGGERW (2), @@ -554,6 +554,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `else `SCOPE_IO_UNUSED() `endif +`endif `ifdef CHIPSCOPE ila_lsu ila_lsu_inst ( .clk (clk), @@ -562,6 +563,5 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( .probe2 ({lsu_mem_if.rsp_valid, lsu_mem_if.rsp_data, lsu_mem_if.rsp_ready}) ); `endif -`endif endmodule diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index f4a1fc4ae..6e9e2081c 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -31,9 +31,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( localparam BLOCK_SIZE = `NUM_LSU_BLOCKS; localparam NUM_LANES = `NUM_LSU_LANES; -`ifdef SCOPE `SCOPE_IO_SWITCH (BLOCK_SIZE); -`endif VX_execute_if #( .NUM_LANES (NUM_LANES) diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index f44678079..f77a4e744 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -20,7 +20,7 @@ module VX_scope_tap #( parameter TRIGGERW = 16, // trigger signals width parameter PROBEW = 256, // probe signal width parameter DEPTH = 1024, // trace buffer depth - parameter IDLE_CTRW = 16, // idle time between triggers counter width + parameter IDLE_CTRW = 32, // idle time between triggers counter width parameter TX_DATAW = 64 // transfer data width ) ( input wire clk, @@ -64,33 +64,52 @@ module VX_scope_tap #( localparam GET_TYPE_DATA = 2'd3; localparam GET_TYPE_BITS = 2; - reg [`UP(TRIGGERW)-1:0] prev_triggers; - reg [IDLE_CTRW-1:0] delta; - reg [CTR_WIDTH-1:0] timestamp, start_time; - - reg [ADDRW-1:0] waddr, waddr_end; - reg write_en; - - reg cmd_start, delta_flush; - - reg [CTR_WIDTH-1:0] start_delay, delay_cntr; + `STATIC_ASSERT ((IDLE_CTRW <= TX_DATAW), ("invalid parameter")) reg [TAP_STATE_BITS-1:0] tap_state; reg [CTRL_STATE_BITS-1:0] ctrl_state; reg [GET_TYPE_BITS-1:0] get_type; + reg [CTR_WIDTH-1:0] timestamp, start_time; + reg [CTR_WIDTH-1:0] start_delay, delay_cntr; + reg [`UP(TRIGGERW)-1:0] prev_trig; + reg [IDLE_CTRW-1:0] delta; + reg cmd_start, dflush; + + reg [ADDRW-1:0] waddr, waddr_end; + wire [DATAW-1:0] data_in; + wire write_en; + wire [DATAW-1:0] data_value; wire [IDLE_CTRW-1:0] delta_value; - reg [TX_DATA_BITS-1:0] ser_tx_ctr; - reg [DATA_BITS-1:0] read_offset; reg [ADDRW-1:0] raddr; - reg read_data; - wire [DATAW-1:0] data_in; - if (TRIGGERW != 0) begin - assign data_in = {probes, triggers}; - end else begin - assign data_in = probes; + // + // trace capture + // + + if (TRIGGERW != 0) begin : g_delta_store + assign data_in = {probes, triggers}; + assign write_en = (tap_state == TAP_STATE_RUN) && (dflush || (triggers != prev_trig)); + VX_dp_ram #( + .DATAW (IDLE_CTRW), + .SIZE (DEPTH), + .NO_RWCHECK (1) + ) delta_store ( + .clk (clk), + .reset (reset), + .read (1'b1), + .wren (1'b1), + .write (write_en), + .waddr (waddr), + .wdata (delta), + .raddr (raddr), + .rdata (delta_value) + ); + end else begin : g_no_delta_store + assign data_in = probes; + assign write_en = (tap_state == TAP_STATE_RUN); + assign delta_value = '0; end VX_dp_ram #( @@ -109,76 +128,38 @@ module VX_scope_tap #( .rdata (data_value) ); - if (TRIGGERW != 0) begin - VX_dp_ram #( - .DATAW (IDLE_CTRW), - .SIZE (DEPTH), - .NO_RWCHECK (1) - ) delta_store ( - .clk (clk), - .reset (reset), - .read (1'b1), - .wren (1'b1), - .write (write_en), - .waddr (waddr), - .wdata (delta), - .raddr (raddr), - .rdata (delta_value) - ); - end else begin - assign delta_value = '0; - end - - // - // trace capture - // - - wire [ADDRW-1:0] raddr_n = raddr + ADDRW'(1); - - wire [ADDRW:0] count = (ADDRW+1)'(waddr) + (ADDRW+1)'(1); - - always @(*) begin - write_en = 0; - if (tap_state == TAP_STATE_RUN) begin - if (TRIGGERW != 0) begin - if (delta_flush || (triggers != prev_triggers)) begin - write_en = 1; - end - end else begin - write_en = 1; - end + always @(posedge clk) begin + if (reset) begin + timestamp <= '0; + end else begin + timestamp <= timestamp + CTR_WIDTH'(1); end end always @(posedge clk) begin if (reset) begin - tap_state <= TAP_STATE_IDLE; - raddr <= '0; - waddr <= '0; - delta <= '0; - prev_triggers <= '0; - read_offset <= '0; - read_data <= 0; - timestamp <= '0; + tap_state <= TAP_STATE_IDLE; + delta <= '0; + dflush <= 0; + prev_trig <= '0; + waddr <= '0; end else begin - timestamp <= timestamp + CTR_WIDTH'(1); - case (tap_state) TAP_STATE_IDLE: begin if (start || cmd_start) begin - delta <= '0; - delta_flush <= 1; + delta <= '0; + dflush <= 1; if (0 == start_delay) begin tap_state <= TAP_STATE_RUN; start_time <= timestamp; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%t: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) + `TRACE(2, ("%t: scope_tap%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) `endif end else begin tap_state <= TAP_STATE_WAIT; delay_cntr <= start_delay; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%t: *** scope #%0d: delayed start - time=%0d\n", $time, SCOPE_ID, start_delay)) + `TRACE(2, ("%t: scope_tap%0d: delayed start - time=%0d\n", $time, SCOPE_ID, start_delay)) `endif end end @@ -189,65 +170,39 @@ module VX_scope_tap #( tap_state <= TAP_STATE_RUN; start_time <= timestamp; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%t: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) + `TRACE(2, ("%t: scope_tap%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) `endif end end TAP_STATE_RUN: begin - if (TRIGGERW != 0) begin - if (delta_flush || (triggers != prev_triggers)) begin - waddr <= waddr + ADDRW'(1); - delta <= '0; - delta_flush <= 0; + dflush <= 0; + if (!stop && (waddr < waddr_end)) begin + if (TRIGGERW != 0) begin + if (dflush || (triggers != prev_trig)) begin + waddr <= waddr + ADDRW'(1); + delta <= '0; + end else begin + delta <= delta + IDLE_CTRW'(1); + dflush <= (delta == IDLE_CTRW'(MAX_IDLE_CTR-1)); + end + prev_trig <= triggers; end else begin - delta <= delta + IDLE_CTRW'(1); - delta_flush <= (delta == IDLE_CTRW'(MAX_IDLE_CTR-1)); + waddr <= waddr + ADDRW'(1); end - prev_triggers <= triggers; end else begin - waddr <= waddr + ADDRW'(1); - end - if (stop || (waddr >= waddr_end)) begin - waddr <= waddr; - `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%t: *** scope #%0d: recording stop - waddr=(%0d, %0d)\n", $time, SCOPE_ID, waddr, waddr_end)) - `endif tap_state <= TAP_STATE_IDLE; + `ifdef DBG_TRACE_SCOPE + `TRACE(2, ("%t: scope_tap%0d: recording stop - waddr=(%0d, %0d)\n", $time, SCOPE_ID, waddr, waddr_end)) + `endif end end default:; endcase - - if (ctrl_state == CTRL_STATE_SEND - && get_type == GET_TYPE_DATA - && ser_tx_ctr == 0) begin - if (~read_data) begin - read_data <= 1; - end else begin - if (DATAW > TX_DATAW) begin - `IGNORE_WARNINGS_BEGIN - if (read_offset < DATA_BITS'(DATAW-TX_DATAW)) begin - read_offset <= read_offset + DATA_BITS'(TX_DATAW); - end else begin - raddr <= raddr_n; - read_data <= 0; - read_offset <= '0; - end - `IGNORE_WARNINGS_END - end else begin - raddr <= raddr_n; - read_data <= 0; - end - if (raddr_n == waddr) begin - raddr <= 0; - end - end - end end end // - // command controller + // trace controller // reg bus_out_r; @@ -256,35 +211,45 @@ module VX_scope_tap #( wire [TX_DATAW-1:0] ser_buf_in_n = {ser_buf_in[TX_DATAW-2:0], bus_in}; `UNUSED_VAR (ser_buf_in) + reg [TX_DATA_BITS-1:0] ser_tx_ctr; + reg [DATA_BITS-1:0] read_offset; + reg is_read_data; + wire [CMD_TYPE_BITS-1:0] cmd_type = ser_buf_in[CMD_TYPE_BITS-1:0]; wire [SCOPE_IDW-1:0] cmd_scope_id = ser_buf_in_n[CMD_TYPE_BITS +: SCOPE_IDW]; wire [TX_DATAW-CMD_TYPE_BITS-SCOPE_IDW-1:0] cmd_data = ser_buf_in[TX_DATAW-1:CMD_TYPE_BITS+SCOPE_IDW]; wire [TX_DATAW-1:0] data_chunk = TX_DATAW'(DATAW'(data_value >> read_offset)); - wire [TX_DATAW-1:0] get_data = read_data ? data_chunk : TX_DATAW'(delta_value); + wire [TX_DATAW-1:0] get_data = is_read_data ? data_chunk : TX_DATAW'(delta_value); + + wire [ADDRW-1:0] raddr_n = raddr + ADDRW'(1); always @(posedge clk) begin if (reset) begin ctrl_state <= CTRL_STATE_IDLE; + waddr_end <= ADDRW'(DEPTH-1); cmd_start <= 0; start_delay <= '0; - waddr_end <= ADDRW'(DEPTH-1); bus_out_r <= 0; + read_offset <= '0; + raddr <= '0; + is_read_data<= 0; + ser_tx_ctr <= '0; end else begin bus_out_r <= 0; cmd_start <= 0; - case (ctrl_state) CTRL_STATE_IDLE: begin if (bus_in) begin + ser_tx_ctr <= TX_DATA_BITS'(TX_DATAW-1); ctrl_state <= CTRL_STATE_RECV; end - ser_tx_ctr <= TX_DATA_BITS'(TX_DATAW-1); end CTRL_STATE_RECV: begin ser_tx_ctr <= ser_tx_ctr - TX_DATA_BITS'(1); ser_buf_in <= ser_buf_in_n; if (ser_tx_ctr == 0) begin + // check if command is for this scope ctrl_state <= (cmd_scope_id == SCOPE_ID) ? CTRL_STATE_CMD : CTRL_STATE_IDLE; end end @@ -302,33 +267,32 @@ module VX_scope_tap #( CMD_GET_START, CMD_GET_COUNT, CMD_GET_DATA: begin - ctrl_state <= CTRL_STATE_SEND; get_type <= GET_TYPE_BITS'(cmd_type); ser_tx_ctr <= TX_DATA_BITS'(TX_DATAW-1); bus_out_r <= 1; + ctrl_state <= CTRL_STATE_SEND; end default:; endcase `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%t: *** scope #%0d: CMD: type=%0d\n", $time, SCOPE_ID, cmd_type)) + `TRACE(2, ("%t: scope_tap%0d: CMD: type=%0d\n", $time, SCOPE_ID, cmd_type)) `endif end CTRL_STATE_SEND: begin - ser_tx_ctr <= ser_tx_ctr - TX_DATA_BITS'(1); case (get_type) GET_TYPE_WIDTH: begin bus_out_r <= 1'(DATAW >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%t: *** scope #%0d: SEND width=%0d\n", $time, SCOPE_ID, DATAW)) + `TRACE(2, ("%t: scope_tap%0d: SEND width=%0d\n", $time, SCOPE_ID, DATAW)) end `endif end GET_TYPE_COUNT: begin - bus_out_r <= 1'(count >> ser_tx_ctr); + bus_out_r <= 1'(waddr >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%t: *** scope #%0d: SEND count=%0d\n", $time, SCOPE_ID, count)) + `TRACE(2, ("%t: scope_tap%0d: SEND count=%0d\n", $time, SCOPE_ID, waddr)) end `endif end @@ -336,20 +300,46 @@ module VX_scope_tap #( bus_out_r <= 1'(start_time >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%t: *** scope #%0d: SEND start=%0d\n", $time, SCOPE_ID, start_time)) + `TRACE(2, ("%t: scope_tap%0d: SEND start=%0d\n", $time, SCOPE_ID, start_time)) end `endif end GET_TYPE_DATA: begin bus_out_r <= 1'(get_data >> ser_tx_ctr); + if (ser_tx_ctr == 0) begin + if (is_read_data) begin + if (DATAW > TX_DATAW) begin + if (read_offset < DATA_BITS'(DATAW-TX_DATAW)) begin + read_offset <= read_offset + DATA_BITS'(TX_DATAW); + end else begin + read_offset <= '0; + raddr <= raddr_n; + is_read_data <= 0; // swutch delta mode + end + end else begin + raddr <= raddr_n; + is_read_data <= 0; // swutch delta mode + end + if (raddr_n == waddr) begin + raddr <= 0; // end-of-samples reset + end + end else begin + is_read_data <= 1; // switch to data mode + end + end `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%t: *** scope #%0d: SEND data=%0d\n", $time, SCOPE_ID, get_data)) + if (is_read_data) begin + `TRACE(2, ("%t: scope_tap%0d: SEND data=0x%0h\n", $time, SCOPE_ID, get_data)) + end else begin + `TRACE(2, ("%t: scope_tap%0d: SEND delta=0x%0h\n", $time, SCOPE_ID, get_data)) + end end `endif end default:; endcase + ser_tx_ctr <= ser_tx_ctr - TX_DATA_BITS'(1); if (ser_tx_ctr == 0) begin ctrl_state <= CTRL_STATE_IDLE; end diff --git a/runtime/common/scope.cpp b/runtime/common/scope.cpp index 33b13cab4..7edd67692 100644 --- a/runtime/common/scope.cpp +++ b/runtime/common/scope.cpp @@ -28,7 +28,7 @@ #include #include -#define FRAME_FLUSH_SIZE 100 +#define SAMPLE_FLUSH_SIZE 100 #define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4) #define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4) @@ -58,8 +58,8 @@ struct tap_signal_t { struct tap_t { uint32_t id; uint32_t width; - uint32_t frames; - uint32_t cur_frame; + uint32_t samples; + uint32_t cur_sample; uint64_t cycle_time; std::string path; std::vector signals; @@ -135,22 +135,25 @@ static void dump_header(std::ofstream& ofs, std::vector& taps) { ofs << "enddefinitions $end" << std::endl; } -static tap_t* find_nearest_tap(std::vector& taps) { - tap_t* nearest = nullptr; +// return the earliest tap that has data to dump +static tap_t* find_earliest_tap(std::vector& taps) { + tap_t* earliest = nullptr; for (auto& tap : taps) { - if (tap.cur_frame == tap.frames) - continue; - if (nearest != nullptr) { - if (tap.cycle_time < nearest->cycle_time) - nearest = &tap; + if (tap.samples == 0) + continue; // skip empty taps + if (tap.cur_sample == tap.samples) + continue; // skip finished taps + if (earliest != nullptr) { + if (tap.cycle_time < earliest->cycle_time) + earliest = &tap; } else { - nearest = &tap; + earliest = &tap; } } - return nearest; + return earliest; } -static uint64_t advance_time(std::ofstream& ofs, uint64_t next_time, uint64_t cur_time) { +static uint64_t advance_time(std::ofstream& ofs, uint64_t cur_time, uint64_t next_time) { while (cur_time < next_time) { ofs << '#' << (cur_time * 2 + 0) << std::endl; ofs << "b0 0" << std::endl; @@ -163,7 +166,7 @@ static uint64_t advance_time(std::ofstream& ofs, uint64_t next_time, uint64_t cu static int dump_tap(std::ofstream& ofs, tap_t* tap, vx_device_h hdevice) { uint32_t signal_offset = 0; - uint32_t frame_offset = 0; + uint32_t sample_offset = 0; uint64_t word; std::vector signal_data(tap->width); @@ -176,24 +179,24 @@ static int dump_tap(std::ofstream& ofs, tap_t* tap, vx_device_h hdevice) { CHECK_ERR(g_callback.registerWrite(hdevice, cmd_data)); CHECK_ERR(g_callback.registerRead(hdevice, &word)); do { - uint32_t word_offset = frame_offset % 64; + uint32_t word_offset = sample_offset % 64; signal_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0'; ++signal_offset; - ++frame_offset; + ++sample_offset; if (signal_offset == signal_width) { signal_data[signal_width] = 0; // string null termination ofs << 'b' << signal_data.data() << ' ' << signal_it->id << std::endl; - if (frame_offset == tap->width) { - // end-of-frame - ++tap->cur_frame; - if (tap->cur_frame != tap->frames) { + if (sample_offset == tap->width) { + // end-of-sample + ++tap->cur_sample; + if (tap->cur_sample != tap->samples) { // read next delta CHECK_ERR(g_callback.registerWrite(hdevice, cmd_data)); CHECK_ERR(g_callback.registerRead(hdevice, &word)); tap->cycle_time += 1 + word; - if (0 == (tap->cur_frame % FRAME_FLUSH_SIZE)) { + if (0 == (tap->cur_sample % SAMPLE_FLUSH_SIZE)) { ofs << std::flush; - std::cout << std::dec << "[SCOPE] flush tap #" << tap->id << ": "<< tap->cur_frame << "/" << tap->frames << " frames, next_time=" << tap->cycle_time << std::endl; + std::cout << std::dec << "[SCOPE] flush tap #" << tap->id << ": "<< tap->cur_sample << "/" << tap->samples << " samples, next_time=" << tap->cycle_time << std::endl; } } break; @@ -202,8 +205,8 @@ static int dump_tap(std::ofstream& ofs, tap_t* tap, vx_device_h hdevice) { ++signal_it; signal_width = signal_it->width; } - } while ((frame_offset % 64) != 0); - } while (frame_offset != tap->width); + } while ((sample_offset % 64) != 0); + } while (sample_offset != tap->width); return 0; } @@ -285,8 +288,8 @@ int vx_scope_stop(vx_device_h hdevice) { _tap.width = tap["width"].get(); _tap.path = tap["path"].get(); _tap.cycle_time = 0; - _tap.frames = 0; - _tap.cur_frame = 0; + _tap.samples = 0; + _tap.cur_sample = 0; for (auto& signal : tap["signals"]) { auto name = signal[0].get(); @@ -299,19 +302,15 @@ int vx_scope_stop(vx_device_h hdevice) { } } - // stop recording + std::cout << "[SCOPE] stop recording..." << std::endl; + for (auto& tap : taps) { uint64_t cmd_stop = (0 << 11) | (tap.id << 3) | CMD_SET_STOP; CHECK_ERR(g_callback.registerWrite(hdevice, cmd_stop)); } - std::cout << "[SCOPE] trace dump begin..." << std::endl; + std::cout << "[SCOPE] load trace info..." << std::endl; - std::ofstream ofs("scope.vcd"); - - dump_header(ofs, taps); - - // load trace info for (auto& tap : taps) { uint64_t count, start, delta; @@ -320,39 +319,53 @@ int vx_scope_stop(vx_device_h hdevice) { CHECK_ERR(g_callback.registerWrite(hdevice, cmd_count)); CHECK_ERR(g_callback.registerRead(hdevice, &count)); + if (count == 0) + continue; + // get start uint64_t cmd_start = (tap.id << 3) | CMD_GET_START; CHECK_ERR(g_callback.registerWrite(hdevice, cmd_start)); CHECK_ERR(g_callback.registerRead(hdevice, &start)); - // get data + // get delta uint64_t cmd_data = (tap.id << 3) | CMD_GET_DATA; CHECK_ERR(g_callback.registerWrite(hdevice, cmd_data)); CHECK_ERR(g_callback.registerRead(hdevice, &delta)); - tap.frames = count; + tap.samples = count; tap.cycle_time = 1 + start + delta; std::cout << std::dec << "[SCOPE] tap #" << tap.id << ": width=" << tap.width - << ", num_frames=" << tap.frames + << ", num_samples=" << tap.samples << ", start_time=" << tap.cycle_time << ", path=" << tap.path << std::endl; } + std::cout << "[SCOPE] dump header..." << std::endl; + + std::ofstream ofs("scope.vcd"); + + dump_header(ofs, taps); + + std::cout << "[SCOPE] dump taps..." << std::endl; + uint64_t cur_time = 0; while (true) { // find the nearest tap - auto tap = find_nearest_tap(taps); + auto tap = find_earliest_tap(taps); if (tap == nullptr) break; // advance clock - cur_time = advance_time(ofs, tap->cycle_time, cur_time); + cur_time = advance_time(ofs, cur_time, tap->cycle_time); // dump tap CHECK_ERR(dump_tap(ofs, tap, hdevice)); }; + // advance clock + advance_time(ofs, cur_time, cur_time + 1); + std::cout << "[SCOPE] trace dump done! - " << (cur_time/2) << " cycles" << std::endl; return 0; diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index 511a87be5..48926e80b 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -581,14 +581,14 @@ public: return err; }); #ifdef CPP_API - xrtBuffer.write(host_ptr, asize, bo_offset); - xrtBuffer.sync(XCL_BO_SYNC_BO_TO_DEVICE, asize, bo_offset); + xrtBuffer.write(host_ptr, size, bo_offset); + xrtBuffer.sync(XCL_BO_SYNC_BO_TO_DEVICE, size, bo_offset); #else - CHECK_ERR(xrtBOWrite(xrtBuffer, host_ptr, asize, bo_offset), { + CHECK_ERR(xrtBOWrite(xrtBuffer, host_ptr, size, bo_offset), { dump_xrt_error(xrtDevice_, err); return err; }); - CHECK_ERR(xrtBOSync(xrtBuffer, XCL_BO_SYNC_BO_TO_DEVICE, asize, bo_offset), { + CHECK_ERR(xrtBOSync(xrtBuffer, XCL_BO_SYNC_BO_TO_DEVICE, size, bo_offset), { dump_xrt_error(xrtDevice_, err); return err; }); @@ -627,14 +627,14 @@ public: return err; }); #ifdef CPP_API - xrtBuffer.sync(XCL_BO_SYNC_BO_FROM_DEVICE, asize, bo_offset); - xrtBuffer.read(host_ptr, asize, bo_offset); + xrtBuffer.sync(XCL_BO_SYNC_BO_FROM_DEVICE, size, bo_offset); + xrtBuffer.read(host_ptr, size, bo_offset); #else - CHECK_ERR(xrtBOSync(xrtBuffer, XCL_BO_SYNC_BO_FROM_DEVICE, asize, bo_offset), { + CHECK_ERR(xrtBOSync(xrtBuffer, XCL_BO_SYNC_BO_FROM_DEVICE, size, bo_offset), { dump_xrt_error(xrtDevice_, err); return err; }); - CHECK_ERR(xrtBORead(xrtBuffer, host_ptr, asize, bo_offset), { + CHECK_ERR(xrtBORead(xrtBuffer, host_ptr, size, bo_offset), { dump_xrt_error(xrtDevice_, err); return err; }); diff --git a/sim/xrtsim/xrt.cpp b/sim/xrtsim/xrt.cpp index c0b5aac28..2123358a0 100644 --- a/sim/xrtsim/xrt.cpp +++ b/sim/xrtsim/xrt.cpp @@ -66,7 +66,6 @@ extern int xrtDeviceClose(xrtDeviceHandle dhdl) { if (dhdl == nullptr) return -1; auto sim = reinterpret_cast(dhdl); - sim->shutdown(); delete sim; return 0; } diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 1aaccc392..a2725f32d 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -197,13 +197,6 @@ public: return 0; } - void shutdown() { - stop_ = true; - if (future_.valid()) { - future_.wait(); - } - } - int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) { if (bank_id >= M_AXI_MEM_NUM_BANKS) return -1; @@ -615,10 +608,6 @@ int xrt_sim::init() { return impl_->init(); } -void xrt_sim::shutdown() { - impl_->shutdown(); -} - int xrt_sim::mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) { return impl_->mem_alloc(size, bank_id, addr); } diff --git a/sim/xrtsim/xrt_sim.h b/sim/xrtsim/xrt_sim.h index 5823f468f..6a2d5d7da 100644 --- a/sim/xrtsim/xrt_sim.h +++ b/sim/xrtsim/xrt_sim.h @@ -25,8 +25,6 @@ public: int init(); - void shutdown(); - int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr); int mem_free(uint32_t bank_id, uint64_t addr); From b8199decf47028b1f59cb34f16fd3fcffe50462f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 22 Sep 2024 03:54:40 -0700 Subject: [PATCH 290/488] opaesim and xrtsim multi-bank memory support --- ci/regression.sh.in | 5 +- hw/rtl/Vortex_axi.sv | 222 +++++++++++++++------------ hw/rtl/afu/opae/local_mem_cfg_pkg.sv | 14 +- hw/rtl/afu/opae/vortex_afu.sv | 10 +- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 44 ++---- hw/rtl/afu/xrt/VX_afu_wrap.sv | 32 ++-- hw/rtl/afu/xrt/vortex_afu.v | 14 +- hw/rtl/afu/xrt/vortex_afu.vh | 20 ++- hw/rtl/libs/VX_avs_adapter.sv | 31 ++-- hw/rtl/libs/VX_axi_adapter.sv | 44 ++++-- hw/rtl/libs/VX_mem_adapter.sv | 5 +- hw/syn/altera/dut/top/Makefile | 20 ++- hw/syn/altera/opae/Makefile | 2 +- runtime/include/vortex.h | 1 + runtime/opae/vortex.cpp | 16 +- runtime/rtlsim/vortex.cpp | 3 + runtime/simx/vortex.cpp | 3 + runtime/xrt/Makefile | 1 + runtime/xrt/vortex.cpp | 144 +++-------------- sim/common/bitmanip.h | 36 ++++- sim/common/mem_alloc.h | 17 +- sim/opaesim/Makefile | 21 +-- sim/opaesim/opae_sim.cpp | 43 +++--- sim/opaesim/vortex_afu_shim.sv | 20 +-- sim/xrtsim/Makefile | 19 ++- sim/xrtsim/vortex_afu_shim.sv | 75 ++++----- sim/xrtsim/xrt_sim.cpp | 90 ++++++----- 27 files changed, 488 insertions(+), 464 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 8c88c368a..37f5d2b20 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -273,10 +273,11 @@ config2() CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8 # test single-bank DRAM - CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress # test 27-bit DRAM address - CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=xrt --app=mstress echo "configuration-2 tests done!" } diff --git a/hw/rtl/Vortex_axi.sv b/hw/rtl/Vortex_axi.sv index a15a478ee..17d5d660e 100644 --- a/hw/rtl/Vortex_axi.sv +++ b/hw/rtl/Vortex_axi.sv @@ -15,7 +15,7 @@ module Vortex_axi import VX_gpu_pkg::*; #( parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH, - parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH, + parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH + (`VX_MEM_DATA_WIDTH/8), parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH, parameter AXI_NUM_BANKS = 1 )( @@ -82,11 +82,10 @@ module Vortex_axi import VX_gpu_pkg::*; #( // Status output wire busy ); - localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH; - - `STATIC_ASSERT((AXI_DATA_WIDTH == `VX_MEM_DATA_WIDTH), ("invalid memory data size: current=%0d, expected=%0d", AXI_DATA_WIDTH, `VX_MEM_DATA_WIDTH)) - `STATIC_ASSERT((AXI_ADDR_WIDTH >= `MEM_ADDR_WIDTH), ("invalid memory address size: current=%0d, expected=%0d", AXI_ADDR_WIDTH, `VX_MEM_ADDR_WIDTH)) - `STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag size: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH)) + localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH; + localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH + `CLOG2(`VX_MEM_DATA_WIDTH) - `CLOG2(AXI_DATA_WIDTH); + + `STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag width: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH)) wire mem_req_valid; wire mem_req_rw; @@ -101,94 +100,6 @@ module Vortex_axi import VX_gpu_pkg::*; #( wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag; wire mem_rsp_ready; - wire [`MEM_ADDR_WIDTH-1:0] m_axi_awaddr_unqual [AXI_NUM_BANKS]; - wire [`MEM_ADDR_WIDTH-1:0] m_axi_araddr_unqual [AXI_NUM_BANKS]; - - wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_awid_unqual [AXI_NUM_BANKS]; - wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_arid_unqual [AXI_NUM_BANKS]; - - wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_bid_unqual [AXI_NUM_BANKS]; - wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_rid_unqual [AXI_NUM_BANKS]; - - for (genvar i = 0; i < AXI_NUM_BANKS; ++i) begin : g_padding - assign m_axi_awaddr[i] = `MEM_ADDR_WIDTH'(m_axi_awaddr_unqual[i]); - assign m_axi_araddr[i] = `MEM_ADDR_WIDTH'(m_axi_araddr_unqual[i]); - - assign m_axi_awid[i] = AXI_TID_WIDTH'(m_axi_awid_unqual[i]); - assign m_axi_arid[i] = AXI_TID_WIDTH'(m_axi_arid_unqual[i]); - - assign m_axi_rid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_rid[i]); - assign m_axi_bid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_bid[i]); - end - - VX_axi_adapter #( - .DATA_WIDTH (`VX_MEM_DATA_WIDTH), - .ADDR_WIDTH (`MEM_ADDR_WIDTH), - .TAG_WIDTH (`VX_MEM_TAG_WIDTH), - .NUM_BANKS (AXI_NUM_BANKS), - .RSP_OUT_BUF((AXI_NUM_BANKS > 1) ? 2 : 0) - ) axi_adapter ( - .clk (clk), - .reset (reset), - - .mem_req_valid (mem_req_valid), - .mem_req_rw (mem_req_rw), - .mem_req_byteen (mem_req_byteen), - .mem_req_addr (mem_req_addr), - .mem_req_data (mem_req_data), - .mem_req_tag (mem_req_tag), - .mem_req_ready (mem_req_ready), - - .mem_rsp_valid (mem_rsp_valid), - .mem_rsp_data (mem_rsp_data), - .mem_rsp_tag (mem_rsp_tag), - .mem_rsp_ready (mem_rsp_ready), - - .m_axi_awvalid (m_axi_awvalid), - .m_axi_awready (m_axi_awready), - .m_axi_awaddr (m_axi_awaddr_unqual), - .m_axi_awid (m_axi_awid_unqual), - .m_axi_awlen (m_axi_awlen), - .m_axi_awsize (m_axi_awsize), - .m_axi_awburst (m_axi_awburst), - .m_axi_awlock (m_axi_awlock), - .m_axi_awcache (m_axi_awcache), - .m_axi_awprot (m_axi_awprot), - .m_axi_awqos (m_axi_awqos), - .m_axi_awregion (m_axi_awregion), - - .m_axi_wvalid (m_axi_wvalid), - .m_axi_wready (m_axi_wready), - .m_axi_wdata (m_axi_wdata), - .m_axi_wstrb (m_axi_wstrb), - .m_axi_wlast (m_axi_wlast), - - .m_axi_bvalid (m_axi_bvalid), - .m_axi_bready (m_axi_bready), - .m_axi_bid (m_axi_bid_unqual), - .m_axi_bresp (m_axi_bresp), - - .m_axi_arvalid (m_axi_arvalid), - .m_axi_arready (m_axi_arready), - .m_axi_araddr (m_axi_araddr_unqual), - .m_axi_arid (m_axi_arid_unqual), - .m_axi_arlen (m_axi_arlen), - .m_axi_arsize (m_axi_arsize), - .m_axi_arburst (m_axi_arburst), - .m_axi_arlock (m_axi_arlock), - .m_axi_arcache (m_axi_arcache), - .m_axi_arprot (m_axi_arprot), - .m_axi_arqos (m_axi_arqos), - .m_axi_arregion (m_axi_arregion), - - .m_axi_rvalid (m_axi_rvalid), - .m_axi_rready (m_axi_rready), - .m_axi_rdata (m_axi_rdata), - .m_axi_rlast (m_axi_rlast) , - .m_axi_rid (m_axi_rid_unqual), - .m_axi_rresp (m_axi_rresp) - ); - `SCOPE_IO_SWITCH (1) Vortex vortex ( @@ -217,4 +128,127 @@ module Vortex_axi import VX_gpu_pkg::*; #( .busy (busy) ); + wire mem_req_valid_a; + wire mem_req_rw_a; + wire [(AXI_DATA_WIDTH/8)-1:0] mem_req_byteen_a; + wire [VX_MEM_ADDR_A_WIDTH-1:0] mem_req_addr_a; + wire [AXI_DATA_WIDTH-1:0] mem_req_data_a; + wire [AXI_TID_WIDTH-1:0] mem_req_tag_a; + wire mem_req_ready_a; + + wire mem_rsp_valid_a; + wire [AXI_DATA_WIDTH-1:0] mem_rsp_data_a; + wire [AXI_TID_WIDTH-1:0] mem_rsp_tag_a; + wire mem_rsp_ready_a; + + VX_mem_adapter #( + .SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH), + .DST_DATA_WIDTH (AXI_DATA_WIDTH), + .SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH), + .DST_ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH), + .SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH), + .DST_TAG_WIDTH (AXI_TID_WIDTH), + .REQ_OUT_BUF (0), + .RSP_OUT_BUF (0) + ) mem_adapter ( + .clk (clk), + .reset (reset), + + .mem_req_valid_in (mem_req_valid), + .mem_req_addr_in (mem_req_addr), + .mem_req_rw_in (mem_req_rw), + .mem_req_byteen_in (mem_req_byteen), + .mem_req_data_in (mem_req_data), + .mem_req_tag_in (mem_req_tag), + .mem_req_ready_in (mem_req_ready), + + .mem_rsp_valid_in (mem_rsp_valid), + .mem_rsp_data_in (mem_rsp_data), + .mem_rsp_tag_in (mem_rsp_tag), + .mem_rsp_ready_in (mem_rsp_ready), + + .mem_req_valid_out (mem_req_valid_a), + .mem_req_addr_out (mem_req_addr_a), + .mem_req_rw_out (mem_req_rw_a), + .mem_req_byteen_out (mem_req_byteen_a), + .mem_req_data_out (mem_req_data_a), + .mem_req_tag_out (mem_req_tag_a), + .mem_req_ready_out (mem_req_ready_a), + + .mem_rsp_valid_out (mem_rsp_valid_a), + .mem_rsp_data_out (mem_rsp_data_a), + .mem_rsp_tag_out (mem_rsp_tag_a), + .mem_rsp_ready_out (mem_rsp_ready_a) + ); + + VX_axi_adapter #( + .DATA_WIDTH (AXI_DATA_WIDTH), + .ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH), + .TAG_WIDTH (AXI_TID_WIDTH), + .NUM_BANKS (AXI_NUM_BANKS), + .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), + .BANK_INTERLEAVE (0), + .RSP_OUT_BUF((AXI_NUM_BANKS > 1) ? 2 : 0) + ) axi_adapter ( + .clk (clk), + .reset (reset), + + .mem_req_valid (mem_req_valid_a), + .mem_req_rw (mem_req_rw_a), + .mem_req_byteen (mem_req_byteen_a), + .mem_req_addr (mem_req_addr_a), + .mem_req_data (mem_req_data_a), + .mem_req_tag (mem_req_tag_a), + .mem_req_ready (mem_req_ready_a), + + .mem_rsp_valid (mem_rsp_valid_a), + .mem_rsp_data (mem_rsp_data_a), + .mem_rsp_tag (mem_rsp_tag_a), + .mem_rsp_ready (mem_rsp_ready_a), + + .m_axi_awvalid (m_axi_awvalid), + .m_axi_awready (m_axi_awready), + .m_axi_awaddr (m_axi_awaddr), + .m_axi_awid (m_axi_awid), + .m_axi_awlen (m_axi_awlen), + .m_axi_awsize (m_axi_awsize), + .m_axi_awburst (m_axi_awburst), + .m_axi_awlock (m_axi_awlock), + .m_axi_awcache (m_axi_awcache), + .m_axi_awprot (m_axi_awprot), + .m_axi_awqos (m_axi_awqos), + .m_axi_awregion (m_axi_awregion), + + .m_axi_wvalid (m_axi_wvalid), + .m_axi_wready (m_axi_wready), + .m_axi_wdata (m_axi_wdata), + .m_axi_wstrb (m_axi_wstrb), + .m_axi_wlast (m_axi_wlast), + + .m_axi_bvalid (m_axi_bvalid), + .m_axi_bready (m_axi_bready), + .m_axi_bid (m_axi_bid), + .m_axi_bresp (m_axi_bresp), + + .m_axi_arvalid (m_axi_arvalid), + .m_axi_arready (m_axi_arready), + .m_axi_araddr (m_axi_araddr), + .m_axi_arid (m_axi_arid), + .m_axi_arlen (m_axi_arlen), + .m_axi_arsize (m_axi_arsize), + .m_axi_arburst (m_axi_arburst), + .m_axi_arlock (m_axi_arlock), + .m_axi_arcache (m_axi_arcache), + .m_axi_arprot (m_axi_arprot), + .m_axi_arqos (m_axi_arqos), + .m_axi_arregion (m_axi_arregion), + + .m_axi_rvalid (m_axi_rvalid), + .m_axi_rready (m_axi_rready), + .m_axi_rdata (m_axi_rdata), + .m_axi_rlast (m_axi_rlast), + .m_axi_rid (m_axi_rid), + .m_axi_rresp (m_axi_rresp) + ); + endmodule diff --git a/hw/rtl/afu/opae/local_mem_cfg_pkg.sv b/hw/rtl/afu/opae/local_mem_cfg_pkg.sv index ef9fae28a..8b0ebaa0b 100644 --- a/hw/rtl/afu/opae/local_mem_cfg_pkg.sv +++ b/hw/rtl/afu/opae/local_mem_cfg_pkg.sv @@ -30,7 +30,17 @@ //`include "platform_afu_top_config.vh" -`ifdef PLATFORM_PROVIDES_LOCAL_MEMORY +`ifndef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH +`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH `PLATFORM_MEMORY_ADDR_WIDTH +`endif + +`ifndef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH +`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH `PLATFORM_MEMORY_DATA_WIDTH +`endif + +`ifndef PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH +`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH `PLATFORM_MEMORY_BURST_CNT_WIDTH +`endif package local_mem_cfg_pkg; @@ -57,5 +67,3 @@ package local_mem_cfg_pkg; typedef logic [LOCAL_MEM_DATA_N_BYTES-1:0] t_local_mem_byte_mask; endpackage // local_mem_cfg_pkg - -`endif // PLATFORM_PROVIDES_LOCAL_MEMORY diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index b0de60cf3..4060a3011 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -42,7 +42,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ ); localparam LMEM_DATA_WIDTH = $bits(t_local_mem_data); localparam LMEM_DATA_SIZE = LMEM_DATA_WIDTH / 8; - localparam LMEM_ADDR_WIDTH = $bits(t_local_mem_addr); + localparam LMEM_ADDR_WIDTH = `VX_MEM_ADDR_WIDTH + ($clog2(`VX_MEM_DATA_WIDTH) - $clog2(LMEM_DATA_WIDTH)); localparam LMEM_BURST_CTRW = $bits(t_local_mem_burst_cnt); localparam CCI_DATA_WIDTH = $bits(t_ccip_clData); @@ -96,9 +96,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ localparam STATE_DCR_WRITE = 4; localparam STATE_WIDTH = `CLOG2(STATE_DCR_WRITE+1); + localparam BANK_BYTE_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + `CLOG2(`PLATFORM_MEMORY_DATA_WIDTH/8); + wire [127:0] afu_id = `AFU_ACCEL_UUID; - wire [63:0] dev_caps = {16'b0, + wire [63:0] dev_caps = {8'b0, + 5'(BANK_BYTE_ADDR_WIDTH-16), + 3'(`CLOG2(`PLATFORM_MEMORY_BANKS)), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 16'(`NUM_CORES * `NUM_CLUSTERS), 8'(`NUM_WARPS), @@ -601,6 +605,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .NUM_BANKS (NUM_LOCAL_MEM_BANKS), .TAG_WIDTH (AVS_REQ_TAGW + 1), .RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE), + .AVS_ADDR_WIDTH($bits(t_local_mem_addr)), + .BANK_INTERLEAVE (1), .REQ_OUT_BUF (2), .RSP_OUT_BUF (0) ) avs_adapter ( diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index 12a55ec69..e30219270 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -14,21 +14,21 @@ `include "vortex_afu.vh" module VX_afu_ctrl #( - parameter AXI_ADDR_WIDTH = 8, - parameter AXI_DATA_WIDTH = 32, - parameter AXI_NUM_BANKS = 1 + parameter S_AXI_ADDR_WIDTH = 8, + parameter S_AXI_DATA_WIDTH = 32, + parameter M_AXI_ADDR_WIDTH = 25 ) ( // axi4 lite slave signals input wire clk, input wire reset, input wire s_axi_awvalid, - input wire [AXI_ADDR_WIDTH-1:0] s_axi_awaddr, + input wire [S_AXI_ADDR_WIDTH-1:0] s_axi_awaddr, output wire s_axi_awready, input wire s_axi_wvalid, - input wire [AXI_DATA_WIDTH-1:0] s_axi_wdata, - input wire [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb, + input wire [S_AXI_DATA_WIDTH-1:0] s_axi_wdata, + input wire [S_AXI_DATA_WIDTH/8-1:0]s_axi_wstrb, output wire s_axi_wready, output wire s_axi_bvalid, @@ -36,11 +36,11 @@ module VX_afu_ctrl #( input wire s_axi_bready, input wire s_axi_arvalid, - input wire [AXI_ADDR_WIDTH-1:0] s_axi_araddr, + input wire [S_AXI_ADDR_WIDTH-1:0] s_axi_araddr, output wire s_axi_arready, output wire s_axi_rvalid, - output wire [AXI_DATA_WIDTH-1:0] s_axi_rdata, + output wire [S_AXI_DATA_WIDTH-1:0] s_axi_rdata, output wire [1:0] s_axi_rresp, input wire s_axi_rready, @@ -56,8 +56,6 @@ module VX_afu_ctrl #( output wire scope_bus_out, `endif - output wire [63:0] mem_base [AXI_NUM_BANKS], - output wire dcr_wr_valid, output wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr, output wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data @@ -125,10 +123,6 @@ module VX_afu_ctrl #( //ADDR_SCP_CTRL = 8'h3C, `endif - ADDR_MEM_0 = 8'h40, - ADDR_MEM_1 = 8'h44, - //ADDR_MEM_CTRL = 8'h48, - ADDR_BITS = 8; localparam @@ -144,7 +138,9 @@ module VX_afu_ctrl #( RSTATE_WIDTH = 2; // device caps - wire [63:0] dev_caps = {16'b0, + wire [63:0] dev_caps = {8'b0, + 5'(M_AXI_ADDR_WIDTH-16), + 3'(`CLOG2(`PLATFORM_MEMORY_BANKS)), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 16'(`NUM_CORES * `NUM_CLUSTERS), 8'(`NUM_WARPS), @@ -174,7 +170,6 @@ module VX_afu_ctrl #( reg gie_r; reg [1:0] ier_r; reg [1:0] isr_r; - reg [63:0] mem_r [AXI_NUM_BANKS]; reg [31:0] dcra_r; reg [31:0] dcrv_r; reg dcr_wr_valid_r; @@ -311,10 +306,6 @@ module VX_afu_ctrl #( dcra_r <= '0; dcrv_r <= '0; dcr_wr_valid_r <= 0; - - for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin - mem_r[i] <= '0; - end end else begin dcr_wr_valid_r <= 0; ap_reset_r <= 0; @@ -353,16 +344,7 @@ module VX_afu_ctrl #( dcrv_r <= (s_axi_wdata & wmask) | (dcrv_r & ~wmask); dcr_wr_valid_r <= 1; end - default: begin - for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin - if (waddr == (ADDR_MEM_0 + 8'(i) * 8'd12)) begin - mem_r[i][31:0] <= (s_axi_wdata & wmask) | (mem_r[i][31:0] & ~wmask); - end - if (waddr == (ADDR_MEM_1 + 8'(i) * 8'd12)) begin - mem_r[i][63:32] <= (s_axi_wdata & wmask) | (mem_r[i][63:32] & ~wmask); - end - end - end + default:; endcase if (ier_r[0] & ap_done) @@ -453,8 +435,6 @@ module VX_afu_ctrl #( assign ap_start = ap_start_r; assign interrupt = gie_r & (| isr_r); - assign mem_base = mem_r; - assign dcr_wr_valid = dcr_wr_valid_r; assign dcr_wr_addr = `VX_DCR_ADDR_WIDTH'(dcra_r); assign dcr_wr_data = `VX_DCR_DATA_WIDTH'(dcrv_r); diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index d5726dc73..ca6fed1ae 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -16,16 +16,17 @@ module VX_afu_wrap #( parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, - parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH, - parameter C_M_AXI_MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, - parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH + parameter C_M_AXI_MEM_ID_WIDTH = 32, + parameter C_M_AXI_MEM_ADDR_WIDTH = 25, + parameter C_M_AXI_MEM_DATA_WIDTH = 512, + parameter C_M_AXI_MEM_NUM_BANKS = 2 ) ( // System signals input wire clk, input wire reset, // AXI4 master interface - `REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA), + `REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA), // AXI4-Lite slave interface input wire s_axi_ctrl_awvalid, @@ -48,7 +49,6 @@ module VX_afu_wrap #( output wire interrupt ); - localparam C_M_AXI_MEM_NUM_BANKS = `M_AXI_MEM_NUM_BANKS; localparam STATE_IDLE = 0; localparam STATE_RUN = 1; @@ -80,7 +80,7 @@ module VX_afu_wrap #( wire [1:0] m_axi_mem_rresp_a [C_M_AXI_MEM_NUM_BANKS]; // convert memory interface to array - `REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON); + `REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON); reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; reg [15:0] vx_pending_writes; @@ -88,8 +88,6 @@ module VX_afu_wrap #( reg vx_reset = 1; // asserted at initialization wire vx_busy; - wire [63:0] mem_base [C_M_AXI_MEM_NUM_BANKS]; - wire dcr_wr_valid; wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr; wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data; @@ -181,9 +179,9 @@ module VX_afu_wrap #( end VX_afu_ctrl #( - .AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH), - .AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), - .AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) + .S_AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH), + .S_AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), + .M_AXI_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH) ) afu_ctrl ( .clk (clk), .reset (reset), @@ -218,26 +216,24 @@ module VX_afu_wrap #( .scope_bus_out (scope_bus_in), `endif - .mem_base (mem_base), - .dcr_wr_valid (dcr_wr_valid), .dcr_wr_addr (dcr_wr_addr), .dcr_wr_data (dcr_wr_data) ); - wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS]; - wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS]; + wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS]; for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_addressing - assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); - assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); + assign m_axi_mem_awaddr_a[i] = m_axi_mem_awaddr_u[i] + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET); + assign m_axi_mem_araddr_a[i] = m_axi_mem_araddr_u[i] + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET); end `SCOPE_IO_SWITCH (2) Vortex_axi #( .AXI_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH), - .AXI_ADDR_WIDTH (`MEM_ADDR_WIDTH), + .AXI_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), .AXI_TID_WIDTH (C_M_AXI_MEM_ID_WIDTH), .AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) ) vortex_axi ( diff --git a/hw/rtl/afu/xrt/vortex_afu.v b/hw/rtl/afu/xrt/vortex_afu.v index 0e042c32b..985d029cf 100644 --- a/hw/rtl/afu/xrt/vortex_afu.v +++ b/hw/rtl/afu/xrt/vortex_afu.v @@ -16,16 +16,17 @@ module vortex_afu #( parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, - parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH, - parameter C_M_AXI_MEM_ADDR_WIDTH = `M_AXI_MEM_ADDR_WIDTH, - parameter C_M_AXI_MEM_DATA_WIDTH = `M_AXI_MEM_DATA_WIDTH + parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH, + parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8), + parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH, + parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS ) ( // System signals input wire ap_clk, input wire ap_rst_n, // AXI4 master interface - `REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA), + `REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA), // AXI4-Lite slave interface input wire s_axi_ctrl_awvalid, @@ -54,12 +55,13 @@ module vortex_afu #( .C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), .C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH), .C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), - .C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH) + .C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH), + .C_M_AXI_MEM_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) ) afu_wrap ( .clk (ap_clk), .reset (~ap_rst_n), - `REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA), + `REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA), .s_axi_ctrl_awvalid (s_axi_ctrl_awvalid), .s_axi_ctrl_awready (s_axi_ctrl_awready), diff --git a/hw/rtl/afu/xrt/vortex_afu.vh b/hw/rtl/afu/xrt/vortex_afu.vh index bf70cb885..f35980c2a 100644 --- a/hw/rtl/afu/xrt/vortex_afu.vh +++ b/hw/rtl/afu/xrt/vortex_afu.vh @@ -14,20 +14,24 @@ `ifndef VORTEX_AFU_VH `define VORTEX_AFU_VH -`ifndef M_AXI_MEM_NUM_BANKS -`define M_AXI_MEM_NUM_BANKS 1 +`ifndef PLATFORM_MEMORY_BANKS +`define PLATFORM_MEMORY_BANKS 2 `endif -`ifndef M_AXI_MEM_ADDR_WIDTH -`define M_AXI_MEM_ADDR_WIDTH 34 +`ifndef PLATFORM_MEMORY_ADDR_WIDTH +`define PLATFORM_MEMORY_ADDR_WIDTH 25 `endif -`ifndef M_AXI_MEM_DATA_WIDTH -`define M_AXI_MEM_DATA_WIDTH 512 +`ifndef PLATFORM_MEMORY_DATA_WIDTH +`define PLATFORM_MEMORY_DATA_WIDTH 512 `endif -`ifndef M_AXI_MEM_ID_WIDTH -`define M_AXI_MEM_ID_WIDTH 32 +`ifndef PLATFORM_MEMORY_OFFSET +`define PLATFORM_MEMORY_OFFSET 0 +`endif + +`ifndef PLATFORM_MEMORY_ID_WIDTH +`define PLATFORM_MEMORY_ID_WIDTH 32 `endif `define GEN_AXI_MEM(i) \ diff --git a/hw/rtl/libs/VX_avs_adapter.sv b/hw/rtl/libs/VX_avs_adapter.sv index 8d308ec36..fe9a9a53b 100644 --- a/hw/rtl/libs/VX_avs_adapter.sv +++ b/hw/rtl/libs/VX_avs_adapter.sv @@ -21,6 +21,8 @@ module VX_avs_adapter #( parameter NUM_BANKS = 1, parameter TAG_WIDTH = 1, parameter RD_QUEUE_SIZE = 1, + parameter BANK_INTERLEAVE= 0, + parameter AVS_ADDR_WIDTH = ADDR_WIDTH - `CLOG2(NUM_BANKS), parameter REQ_OUT_BUF = 0, parameter RSP_OUT_BUF = 0 ) ( @@ -45,7 +47,7 @@ module VX_avs_adapter #( // AVS bus output wire [DATA_WIDTH-1:0] avs_writedata [NUM_BANKS], input wire [DATA_WIDTH-1:0] avs_readdata [NUM_BANKS], - output wire [ADDR_WIDTH-1:0] avs_address [NUM_BANKS], + output wire [AVS_ADDR_WIDTH-1:0] avs_address [NUM_BANKS], input wire avs_waitrequest [NUM_BANKS], output wire avs_write [NUM_BANKS], output wire avs_read [NUM_BANKS], @@ -53,28 +55,35 @@ module VX_avs_adapter #( output wire [BURST_WIDTH-1:0] avs_burstcount [NUM_BANKS], input wire avs_readdatavalid [NUM_BANKS] ); - localparam DATA_SIZE = DATA_WIDTH/8; - localparam BANK_ADDRW = `LOG2UP(NUM_BANKS); - localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS); - localparam BANK_OFFSETW = ADDR_WIDTH - LOG2_NUM_BANKS; + localparam DATA_SIZE = DATA_WIDTH/8; + localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); + localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); + localparam BANK_OFFSETW = ADDR_WIDTH - BANK_SEL_BITS; + + `STATIC_ASSERT ((AVS_ADDR_WIDTH >= BANK_OFFSETW), ("invalid parameter")) // Requests handling ////////////////////////////////////////////////////// wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out; wire [NUM_BANKS-1:0] req_queue_going_full; - wire [BANK_ADDRW-1:0] req_bank_sel; + wire [BANK_SEL_WIDTH-1:0] req_bank_sel; wire [BANK_OFFSETW-1:0] req_bank_off; wire [NUM_BANKS-1:0] bank_req_ready; if (NUM_BANKS > 1) begin : g_bank_sel - assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; - end else begin : g_bank_sel + if (BANK_INTERLEAVE) begin : g_interleave + assign req_bank_sel = mem_req_addr[BANK_SEL_BITS-1:0]; + assign req_bank_off = mem_req_addr[BANK_SEL_BITS +: BANK_OFFSETW]; + end else begin : g_no_interleave + assign req_bank_sel = mem_req_addr[BANK_OFFSETW +: BANK_SEL_BITS]; + assign req_bank_off = mem_req_addr[BANK_OFFSETW-1:0]; + end + end else begin : g_no_bank_sel assign req_bank_sel = '0; + assign req_bank_off = mem_req_addr; end - assign req_bank_off = mem_req_addr[ADDR_WIDTH-1:LOG2_NUM_BANKS]; - for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_queue_push assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i); end @@ -142,7 +151,7 @@ module VX_avs_adapter #( assign avs_read[i] = valid_out && ~rw_out; assign avs_write[i] = valid_out && rw_out; - assign avs_address[i] = ADDR_WIDTH'(addr_out); + assign avs_address[i] = AVS_ADDR_WIDTH'(addr_out); assign avs_byteenable[i] = byteen_out; assign avs_writedata[i] = data_out; assign avs_burstcount[i] = BURST_WIDTH'(1); diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index 952497186..bdd699053 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -19,7 +19,8 @@ module VX_axi_adapter #( parameter ADDR_WIDTH = 32, parameter TAG_WIDTH = 8, parameter NUM_BANKS = 1, - parameter AVS_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)), + parameter AXI_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)), + parameter BANK_INTERLEAVE= 0, parameter RSP_OUT_BUF = 0 ) ( input wire clk, @@ -29,7 +30,7 @@ module VX_axi_adapter #( input wire mem_req_valid, input wire mem_req_rw, input wire [DATA_WIDTH/8-1:0] mem_req_byteen, - input wire [AVS_ADDR_WIDTH-1:0] mem_req_addr, + input wire [ADDR_WIDTH-1:0] mem_req_addr, input wire [DATA_WIDTH-1:0] mem_req_data, input wire [TAG_WIDTH-1:0] mem_req_tag, output wire mem_req_ready, @@ -43,7 +44,7 @@ module VX_axi_adapter #( // AXI write request address channel output wire m_axi_awvalid [NUM_BANKS], input wire m_axi_awready [NUM_BANKS], - output wire [ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS], + output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS], output wire [TAG_WIDTH-1:0] m_axi_awid [NUM_BANKS], output wire [7:0] m_axi_awlen [NUM_BANKS], output wire [2:0] m_axi_awsize [NUM_BANKS], @@ -70,7 +71,7 @@ module VX_axi_adapter #( // AXI read address channel output wire m_axi_arvalid [NUM_BANKS], input wire m_axi_arready [NUM_BANKS], - output wire [ADDR_WIDTH-1:0] m_axi_araddr [NUM_BANKS], + output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr [NUM_BANKS], output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS], output wire [7:0] m_axi_arlen [NUM_BANKS], output wire [2:0] m_axi_arsize [NUM_BANKS], @@ -89,15 +90,28 @@ module VX_axi_adapter #( input wire [TAG_WIDTH-1:0] m_axi_rid [NUM_BANKS], input wire [1:0] m_axi_rresp [NUM_BANKS] ); - localparam AXSIZE = `CLOG2(DATA_WIDTH/8); - localparam BANK_ADDRW = `LOG2UP(NUM_BANKS); - localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS); + localparam DATA_SIZE = `CLOG2(DATA_WIDTH/8); + localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); + localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); + localparam BANK_OFFSETW = ADDR_WIDTH - BANK_SEL_BITS; + localparam DST_ADDR_WDITH = BANK_OFFSETW + `CLOG2(DATA_WIDTH/8); - wire [BANK_ADDRW-1:0] req_bank_sel; - if (NUM_BANKS > 1) begin : g_req_bank_sel - assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; - end else begin : g_req_bank_sel_0 + `STATIC_ASSERT ((AXI_ADDR_WIDTH >= DST_ADDR_WDITH), ("invalid tag width: current=%0d, expected=%0d", AXI_ADDR_WIDTH, DST_ADDR_WDITH)) + + wire [BANK_SEL_WIDTH-1:0] req_bank_sel; + wire [BANK_OFFSETW-1:0] req_bank_off; + + if (NUM_BANKS > 1) begin : g_bank_sel + if (BANK_INTERLEAVE) begin : g_interleave + assign req_bank_sel = mem_req_addr[BANK_SEL_BITS-1:0]; + assign req_bank_off = mem_req_addr[BANK_SEL_BITS +: BANK_OFFSETW]; + end else begin : g_no_interleave + assign req_bank_sel = mem_req_addr[BANK_OFFSETW +: BANK_SEL_BITS]; + assign req_bank_off = mem_req_addr[BANK_OFFSETW-1:0]; + end + end else begin : g_no_bank_sel assign req_bank_sel = '0; + assign req_bank_off = mem_req_addr; end wire mem_req_fire = mem_req_valid && mem_req_ready; @@ -134,10 +148,10 @@ module VX_axi_adapter #( // AXI write request address channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i]; - assign m_axi_awaddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE; + assign m_axi_awaddr[i] = AXI_ADDR_WIDTH'(req_bank_off); assign m_axi_awid[i] = mem_req_tag; assign m_axi_awlen[i] = 8'b00000000; - assign m_axi_awsize[i] = 3'(AXSIZE); + assign m_axi_awsize[i] = 3'(DATA_SIZE); assign m_axi_awburst[i] = 2'b00; assign m_axi_awlock[i] = 2'b00; assign m_axi_awcache[i] = 4'b0000; @@ -166,10 +180,10 @@ module VX_axi_adapter #( // AXI read request channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i); - assign m_axi_araddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE; + assign m_axi_araddr[i] = AXI_ADDR_WIDTH'(req_bank_off); assign m_axi_arid[i] = mem_req_tag; assign m_axi_arlen[i] = 8'b00000000; - assign m_axi_arsize[i] = 3'(AXSIZE); + assign m_axi_arsize[i] = 3'(DATA_SIZE); assign m_axi_arburst[i] = 2'b00; assign m_axi_arlock[i] = 2'b00; assign m_axi_arcache[i] = 4'b0000; diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 5f32e1aa1..066de829f 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -53,8 +53,6 @@ module VX_mem_adapter #( input wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_out, output wire mem_rsp_ready_out ); - `STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!")) - localparam DST_DATA_SIZE = (DST_DATA_WIDTH / 8); localparam DST_LDATAW = `CLOG2(DST_DATA_WIDTH); localparam SRC_LDATAW = `CLOG2(SRC_DATA_WIDTH); @@ -74,6 +72,7 @@ module VX_mem_adapter #( wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in_w; wire mem_rsp_ready_in_w; + `UNUSED_VAR (mem_req_tag_in) `UNUSED_VAR (mem_rsp_tag_out) if (DST_LDATAW > SRC_LDATAW) begin : g_wider_dst_data @@ -122,7 +121,7 @@ module VX_mem_adapter #( assign mem_rsp_valid_in_w = mem_rsp_valid_out; assign mem_rsp_data_in_w = mem_rsp_data_out_w[rsp_idx]; - assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[SRC_TAG_WIDTH+D-1:D]); + assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[DST_TAG_WIDTH-1:D]); assign mem_rsp_ready_out = mem_rsp_ready_in_w; end else if (DST_LDATAW < SRC_LDATAW) begin : g_wider_src_data diff --git a/hw/syn/altera/dut/top/Makefile b/hw/syn/altera/dut/top/Makefile index 99889f4ae..e4dfae274 100644 --- a/hw/syn/altera/dut/top/Makefile +++ b/hw/syn/altera/dut/top/Makefile @@ -7,17 +7,21 @@ include ../../common.mk # AFU parameters CONFIGS += -DNOPAE CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 +ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_BANKS=2 endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 +ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS))) + ifeq ($(XLEN),64) + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41 + else + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25 + endif endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 +ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512 endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 +ifeq (,$(findstring PLATFORM_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 endif #CONFIGS += -DNUM_CORES=2 diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index e961be453..19f9d0836 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -98,7 +98,7 @@ ifdef PERF endif # ast dump flags -XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI +XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=26 -DPLATFORM_MEMORY_DATA_WIDTH=512 -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI all: swconfig ip-gen setup build diff --git a/runtime/include/vortex.h b/runtime/include/vortex.h index 853da5994..8fa6c2057 100644 --- a/runtime/include/vortex.h +++ b/runtime/include/vortex.h @@ -35,6 +35,7 @@ typedef void* vx_buffer_h; #define VX_CAPS_LOCAL_MEM_SIZE 0x6 #define VX_CAPS_ISA_FLAGS 0x7 #define VX_CAPS_NUM_MEM_BANKS 0x8 +#define VX_CAPS_MEM_BANK_SIZE 0x9 // device isa flags #define VX_ISA_STD_A (1ull << ISA_STD_A) diff --git a/runtime/opae/vortex.cpp b/runtime/opae/vortex.cpp index 1bc913cc8..f06f34bea 100755 --- a/runtime/opae/vortex.cpp +++ b/runtime/opae/vortex.cpp @@ -163,11 +163,6 @@ public: }); { - // retrieve FPGA global memory size - CHECK_FPGA_ERR(api_.fpgaPropertiesGetLocalMemorySize(filter, &global_mem_size_), { - global_mem_size_ = GLOBAL_MEM_SIZE; - }); - // Load ISA CAPS CHECK_FPGA_ERR(api_.fpgaReadMMIO64(fpga_, 0, MMIO_ISA_CAPS, &isa_caps_), { api_.fpgaClose(fpga_); @@ -179,6 +174,12 @@ public: api_.fpgaClose(fpga_); return -1; }); + + // Determine global memory size + uint64_t num_banks, bank_size; + this->get_caps(VX_CAPS_NUM_MEM_BANKS, &num_banks); + this->get_caps(VX_CAPS_MEM_BANK_SIZE, &bank_size); + global_mem_size_ = num_banks * bank_size; } #ifdef SCOPE @@ -231,7 +232,10 @@ public: _value = isa_caps_; break; case VX_CAPS_NUM_MEM_BANKS: - _value = MEMORY_BANKS; + _value = 1 << ((dev_caps_ >> 48) & 0x7); + break; + case VX_CAPS_MEM_BANK_SIZE: + _value = 1ull << (16 + ((dev_caps_ >> 51) & 0x1f)); break; default: fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); diff --git a/runtime/rtlsim/vortex.cpp b/runtime/rtlsim/vortex.cpp index 91df7f7e8..7ba7f9471 100644 --- a/runtime/rtlsim/vortex.cpp +++ b/runtime/rtlsim/vortex.cpp @@ -80,6 +80,9 @@ public: case VX_CAPS_NUM_MEM_BANKS: _value = MEMORY_BANKS; break; + case VX_CAPS_MEM_BANK_SIZE: + _value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS); + break; default: std::cout << "invalid caps id: " << caps_id << std::endl; std::abort(); diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 70ceb7fc4..eb32709ec 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -84,6 +84,9 @@ public: case VX_CAPS_NUM_MEM_BANKS: _value = MEMORY_BANKS; break; + case VX_CAPS_MEM_BANK_SIZE: + _value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS); + break; default: std::cout << "invalid caps id: " << caps_id << std::endl; std::abort(); diff --git a/runtime/xrt/Makefile b/runtime/xrt/Makefile index d4fbc51a8..7fadb43fd 100644 --- a/runtime/xrt/Makefile +++ b/runtime/xrt/Makefile @@ -8,6 +8,7 @@ SRC_DIR := $(VORTEX_HOME)/runtime/xrt CXXFLAGS += -std=c++14 -Wall -Wextra -Wfatal-errors CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(XILINX_XRT)/include -I$(SIM_DIR)/common +CXXFLAGS += -DXLEN_$(XLEN) CXXFLAGS += -fPIC LDFLAGS += -shared -pthread diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index 48926e80b..3acb9b3c6 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -49,7 +49,6 @@ using namespace vortex; #define MMIO_ISA_ADDR 0x1C #define MMIO_DCR_ADDR 0x28 #define MMIO_SCP_ADDR 0x34 -#define MMIO_MEM_ADDR 0x40 #define CTL_AP_START (1 << 0) #define CTL_AP_DONE (1 << 1) @@ -58,24 +57,6 @@ using namespace vortex; #define CTL_AP_RESET (1 << 4) #define CTL_AP_RESTART (1 << 7) -struct platform_info_t { - const char *prefix_name; - uint8_t lg2_num_banks; - uint8_t lg2_bank_size; - uint64_t mem_base; -}; - -static const platform_info_t g_platforms[] = { - {"vortex_xrtsim", 0, 32, 0x0}, // 16 x 256 MB = 4 GB - {"xilinx_u200", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4 - {"xilinx_u250", 2, 34, 0x0}, // 4 x 16 GB = 64 GB DDR4 - {"xilinx_u50", 5, 28, 0x0}, // 32 x 256 MB = 8 GB HBM2 - {"xilinx_u280", 5, 28, 0x0}, // 32 x 256 MB = 8 GB HBM2 - {"xilinx_u55c", 5, 29, 0x0}, // 32 x 512 MB = 16 GB HBM2 - {"xilinx_vck5000", 0, 33, 0xC000000000}, // 1 x 8 GB = 8 GB DDR4 - {"xilinx_kv260", 0, 32, 0x0}, // 1 x 4 GB = 4 GB DDR4 -}; - #ifdef CPP_API typedef xrt::device xrt_device_t; @@ -113,18 +94,6 @@ static void dump_xrt_error(xrtDeviceHandle xrtDevice, xrtErrorCode err) { } #endif -static int get_platform_info(const std::string &device_name, - platform_info_t *platform_info) { - for (size_t i = 0; i < (sizeof(g_platforms) / sizeof(platform_info_t)); ++i) { - auto &platform = g_platforms[i]; - if (device_name.rfind(platform.prefix_name, 0) == 0) { - *platform_info = platform; - return 0; - } - } - return -1; -} - /////////////////////////////////////////////////////////////////////////////// class vx_device { @@ -181,58 +150,6 @@ public: auto xclbin = xrt::xclbin(xlbin_path_s); auto device_name = xrtDevice.get_info(); - /*{ - uint32_t num_banks = 0; - uint64_t bank_size = 0; - uint64_t mem_base = 0; - - auto mem_json = - nlohmann::json::parse(xrtDevice.get_info()); if - (!mem_json.is_null()) { uint32_t index = 0; for (auto& mem : - mem_json["board"]["memory"]["memories"]) { auto enabled = - mem["enabled"].get(); if (enabled == "true") { if (index == 0) - { mem_base = std::stoull(mem["base_address"].get(), nullptr, - 16); bank_size = std::stoull(mem["range_bytes"].get(), nullptr, - 16); - } - ++index; - } - } - num_banks = index; - } - - fprintf(stderr, "[VXDRV] memory description: base=0x%lx, size=0x%lx, - count=%d\n", mem_base, bank_size, num_banks); - }*/ - - /*{ - std::cout << "Device" << device_index << " : " << - xrtDevice.get_info() << std::endl; std::cout << " - bdf : " << xrtDevice.get_info() << std::endl; - std::cout << " kdma : " << - xrtDevice.get_info() << std::endl; std::cout << " - max_freq : " << - xrtDevice.get_info() << - std::endl; std::cout << " memory : " << - xrtDevice.get_info() << std::endl; std::cout << " - thermal : " << xrtDevice.get_info() << - std::endl; std::cout << " m2m : " << std::boolalpha << - xrtDevice.get_info() << std::dec << std::endl; - std::cout << " nodma : " << std::boolalpha << - xrtDevice.get_info() << std::dec << std::endl; - - std::cout << "Memory info :" << std::endl; - for (const auto& mem_bank : xclbin.get_mems()) { - std::cout << " index : " << mem_bank.get_index() << std::endl; - std::cout << " tag : " << mem_bank.get_tag() << std::endl; - std::cout << " type : " << (int)mem_bank.get_type() << std::endl; - std::cout << " base_address : 0x" << std::hex << - mem_bank.get_base_address() << std::endl; std::cout << " size : 0x" << - (mem_bank.get_size_kb() * 1000) << std::dec << std::endl; std::cout << " - used :" << mem_bank.get_used() << std::endl; - } - }*/ - #else CHECK_HANDLE(xrtDevice, xrtDeviceOpen(device_index), { @@ -275,11 +192,6 @@ public: printf("info: device name=%s.\n", device_name.c_str()); - CHECK_ERR(get_platform_info(device_name, &platform_), { - fprintf(stderr, "[VXDRV] Error: platform not supported: %s\n", device_name.c_str()); - return err; - }); - CHECK_ERR(this->write_register(MMIO_CTL_ADDR, CTL_AP_RESET), { return err; }); @@ -300,36 +212,13 @@ public: return err; }); - uint32_t num_banks = 1 << platform_.lg2_num_banks; - uint64_t bank_size = 1ull << platform_.lg2_bank_size; + uint64_t num_banks; + this->get_caps(VX_CAPS_NUM_MEM_BANKS, &num_banks); + lg2_num_banks_ = log2ceil(num_banks); - // adjust memory banks allocation to architecture limit - int isa_arch = VX_ISA_ARCH(isa_caps_); - if (isa_arch == 32) { - uint64_t max_mem_size = 1ull << 32; - uint32_t need_num_banks = max_mem_size / bank_size; - if (num_banks > need_num_banks) { - printf("info: adjusted number of banks from %d to %d.\n", num_banks, need_num_banks); - num_banks = need_num_banks; - platform_.lg2_num_banks = log2ceil(num_banks); - } - } - - for (uint32_t i = 0; i < num_banks; ++i) { - uint32_t reg_addr = MMIO_MEM_ADDR + (i * 12); - uint64_t reg_value = platform_.mem_base + i * bank_size; - - CHECK_ERR(this->write_register(reg_addr, reg_value & 0xffffffff), { - return err; - }); - - CHECK_ERR(this->write_register(reg_addr + 4, (reg_value >> 32) & 0xffffffff), { - return err; - }); - #ifndef BANK_INTERLEAVE - break; - #endif - } + uint64_t bank_size; + this->get_caps(VX_CAPS_MEM_BANK_SIZE, &bank_size); + lg2_bank_size_ = log2ceil(bank_size); global_mem_size_ = num_banks * bank_size; @@ -418,7 +307,10 @@ public: _value = isa_caps_; break; case VX_CAPS_NUM_MEM_BANKS: - _value = MEMORY_BANKS; + _value = 1 << ((dev_caps_ >> 48) & 0x7); + break; + case VX_CAPS_MEM_BANK_SIZE: + _value = 1ull << (16 + ((dev_caps_ >> 51) & 0x1f)); break; default: fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); @@ -734,23 +626,23 @@ private: MemoryAllocator global_mem_; xrt_device_t xrtDevice_; xrt_kernel_t xrtKernel_; - platform_info_t platform_; uint64_t dev_caps_; uint64_t isa_caps_; uint64_t global_mem_size_; DeviceConfig dcrs_; std::unordered_map> mpm_cache_; + uint32_t lg2_num_banks_; + uint32_t lg2_bank_size_; #ifdef BANK_INTERLEAVE std::vector xrtBuffers_; int get_bank_info(uint64_t addr, uint32_t *pIdx, uint64_t *pOff) { - uint32_t num_banks = 1 << platform_.lg2_num_banks; + uint32_t num_banks = 1 << lg2_num_banks_; uint64_t block_addr = addr / CACHE_BLOCK_SIZE; uint32_t index = block_addr & (num_banks - 1); - uint64_t offset = - (block_addr >> platform_.lg2_num_banks) * CACHE_BLOCK_SIZE; + uint64_t offset = (block_addr >> lg2_num_banks_) * CACHE_BLOCK_SIZE; if (pIdx) { *pIdx = index; } @@ -778,9 +670,9 @@ private: std::unordered_map xrtBuffers_; int get_bank_info(uint64_t addr, uint32_t *pIdx, uint64_t *pOff) { - uint32_t num_banks = 1 << platform_.lg2_num_banks; - uint64_t bank_size = 1ull << platform_.lg2_bank_size; - uint32_t index = addr >> platform_.lg2_bank_size; + uint32_t num_banks = 1 << lg2_num_banks_; + uint64_t bank_size = 1ull << lg2_bank_size_; + uint32_t index = addr >> lg2_bank_size_; uint64_t offset = addr & (bank_size - 1); if (index > num_banks) { fprintf(stderr, "[VXDRV] Error: address out of range: 0x%lx\n", addr); @@ -807,7 +699,7 @@ private: } } else { printf("allocating bank%d...\n", bank_id); - uint64_t bank_size = 1ull << platform_.lg2_bank_size; + uint64_t bank_size = 1ull << lg2_bank_size_; #ifdef CPP_API xrt::bo xrtBuffer(xrtDevice_, bank_size, xrt::bo::flags::normal, bank_id); #else diff --git a/sim/common/bitmanip.h b/sim/common/bitmanip.h index a6cd87ff1..3c5858043 100644 --- a/sim/common/bitmanip.h +++ b/sim/common/bitmanip.h @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,30 +20,58 @@ constexpr uint32_t count_leading_zeros(uint32_t value) { return value ? __builtin_clz(value) : 32; } +constexpr uint32_t count_leading_zeros(uint64_t value) { + return value ? __builtin_clzll(value) : 64; +} + constexpr uint32_t count_trailing_zeros(uint32_t value) { return value ? __builtin_ctz(value) : 32; } +constexpr uint32_t count_trailing_zeros(uint64_t value) { + return value ? __builtin_ctzll(value) : 64; +} + constexpr bool ispow2(uint32_t value) { return value && !(value & (value - 1)); } +constexpr bool ispow2(uint64_t value) { + return value && !(value & (value - 1)); +} + constexpr uint32_t log2ceil(uint32_t value) { return 32 - count_leading_zeros(value - 1); } +constexpr uint32_t log2ceil(uint64_t value) { + return 64 - count_leading_zeros(value - 1); +} + inline unsigned log2up(uint32_t value) { return std::max(1, log2ceil(value)); } +inline unsigned log2up(uint64_t value) { + return std::max(1, log2ceil(value)); +} + constexpr unsigned log2floor(uint32_t value) { return 31 - count_leading_zeros(value); } +constexpr unsigned log2floor(uint64_t value) { + return 63 - count_leading_zeros(value); +} + constexpr unsigned ceil2(uint32_t value) { return 32 - count_leading_zeros(value); } +constexpr unsigned ceil2(uint64_t value) { + return 64 - count_leading_zeros(value); +} + inline uint64_t bit_clr(uint64_t bits, uint32_t index) { assert(index <= 63); return bits & ~(1ull << index); @@ -86,7 +114,7 @@ template T sext(const T& word, uint32_t width) { assert(width > 1); assert(width <= (sizeof(T) * 8)); - if (width == (sizeof(T) * 8)) + if (width == (sizeof(T) * 8)) return word; T mask((static_cast(1) << width) - 1); return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : (word & mask); @@ -96,7 +124,7 @@ template T zext(const T& word, uint32_t width) { assert(width > 1); assert(width <= (sizeof(T) * 8)); - if (width == (sizeof(T) * 8)) + if (width == (sizeof(T) * 8)) return word; T mask((static_cast(1) << width) - 1); return word & mask; diff --git a/sim/common/mem_alloc.h b/sim/common/mem_alloc.h index 480c198a6..9ea6660d9 100644 --- a/sim/common/mem_alloc.h +++ b/sim/common/mem_alloc.h @@ -71,13 +71,14 @@ public: // Check if the reservation is within memory capacity bounds if (addr + size > capacity_) { - printf("error: address range out of bounds\n"); + printf("error: address range out of bounds - requested=0x%lx, capacity=0x%lx\n", (addr + size), capacity_); return -1; } // Ensure the reservation does not overlap with existing pages - if (hasPageOverlap(addr, size)) { - printf("error: address range overlaps with existing allocation\n"); + uint64_t overlapStart, overlapEnd; + if (hasPageOverlap(addr, size, &overlapStart, &overlapEnd)) { + printf("error: address range overlaps with existing allocation - requested=[0x%lx-0x%lx], existing=[0x%lx, 0x%lx]\n", addr, addr+size, overlapStart, overlapEnd); return -1; } @@ -509,15 +510,15 @@ private: return false; } - bool hasPageOverlap(uint64_t start, uint64_t size) { + bool hasPageOverlap(uint64_t start, uint64_t size, uint64_t* overlapStart, uint64_t* overlapEnd) { page_t* current = pages_; while (current != nullptr) { uint64_t pageStart = current->addr; uint64_t pageEnd = pageStart + current->size; - uint64_t requestEnd = start + size; - if ((start >= pageStart && start < pageEnd) || // Start of request is inside the page - (requestEnd > pageStart && requestEnd <= pageEnd) || // End of request is inside the page - (start <= pageStart && requestEnd >= pageEnd)) { // Request envelops the page + uint64_t end = start + size; + if ((start <= pageEnd) && (end >= pageStart)) { + *overlapStart = pageStart; + *overlapEnd = pageEnd; return true; } current = current->next; diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index ffbfece13..ce8602c18 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -32,18 +32,21 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU # AFU parameters -CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 +ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_BANKS=2 endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 +ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS))) + ifeq ($(XLEN),64) + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41 + else + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25 + endif endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 +ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512 endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 +ifeq (,$(findstring PLATFORM_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 endif DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 430e4478b..2a06595df 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -35,7 +35,7 @@ #include #include -#define PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH/8) +#define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8) #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 @@ -145,6 +145,9 @@ public: // allocate RAM ram_ = new RAM(0, RAM_PAGE_SIZE); + // calculate memory bank size + mem_bank_size_ = (1ull << PLATFORM_MEMORY_ADDR_WIDTH) * PLATFORM_MEMORY_DATA_SIZE; + // reset the device this->reset(); @@ -406,14 +409,14 @@ private: } void avs_bus_reset() { - for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) { + for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) { device_->avs_readdatavalid[b] = 0; device_->avs_waitrequest[b] = 0; } } void avs_bus_eval() { - for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) { + for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) { // process memory responses device_->avs_readdatavalid[b] = 0; if (!pending_mem_reqs_[b].empty() @@ -421,7 +424,7 @@ private: auto mem_rd_it = pending_mem_reqs_[b].begin(); auto mem_req = *mem_rd_it; device_->avs_readdatavalid[b] = 1; - memcpy(device_->avs_readdata[b], mem_req->data.data(), PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); + memcpy(device_->avs_readdata[b], mem_req->data.data(), PLATFORM_MEMORY_DATA_SIZE); uint32_t addr = mem_req->addr; pending_mem_reqs_[b].erase(mem_rd_it); delete mem_req; @@ -429,19 +432,20 @@ private: // process memory requests assert(!device_->avs_read[b] || !device_->avs_write[b]); - uint64_t byte_addr = (uint64_t(device_->avs_address[b]) * PLATFORM_PARAM_LOCAL_MEMORY_BANKS + b) * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; + uint64_t byte_addr = b * mem_bank_size_ + uint64_t(device_->avs_address[b]) * PLATFORM_MEMORY_DATA_SIZE; if (device_->avs_write[b]) { + // process write request uint64_t byteen = device_->avs_byteenable[b]; uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data()); - for (int i = 0; i < PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; i++) { + for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) { if ((byteen >> i) & 0x1) { (*ram_)[byte_addr + i] = data[i]; } } - /*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=0x%lx, data=0x", timestamp, b, byte_addr); - for (int i = 0; i < PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE; i++) { - printf("%02x", data[(PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE-1)-i]); + /*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=0x%lx, byteen=0x%lx, data=0x", timestamp, b, byte_addr, byteen); + for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) { + printf("%02x", data[i]); } printf("\n");*/ @@ -455,22 +459,20 @@ private: dram_queue_.push(mem_req); } else if (device_->avs_read[b]) { + // process read request auto mem_req = new mem_req_t(); mem_req->addr = device_->avs_address[b]; mem_req->bank_id = b; - ram_->read(mem_req->data.data(), byte_addr, PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); + ram_->read(mem_req->data.data(), byte_addr, PLATFORM_MEMORY_DATA_SIZE); mem_req->write = false; mem_req->ready = false; pending_mem_reqs_[b].emplace_back(mem_req); - /*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=0x%lx, pending={", timestamp, b, mem_req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); - for (auto& req : pending_mem_reqs_[b]) { - if (req.cycles_left != 0) - printf(" !%0x", req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); - else - printf(" %0x", req.addr * PLATFORM_PARAM_LOCAL_MEMORY_DATA_SIZE); + /*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=0x%lx, pending={", timestamp, b, byte_addr); + for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) { + printf("%02x", mem_req->data[i]); } - printf("}\n");*/ + printf("\n");*/ // send dram request dram_queue_.push(mem_req); @@ -481,7 +483,7 @@ private: } typedef struct { - std::array data; + std::array data; uint32_t addr; uint32_t bank_id; bool write; @@ -514,9 +516,10 @@ private: bool stop_; std::unordered_map host_buffers_; - int64_t host_buffer_ids_; + uint64_t host_buffer_ids_; + uint64_t mem_bank_size_; - std::list pending_mem_reqs_[PLATFORM_PARAM_LOCAL_MEMORY_BANKS]; + std::list pending_mem_reqs_[PLATFORM_MEMORY_BANKS]; std::list cci_reads_; std::list cci_writes_; diff --git a/sim/opaesim/vortex_afu_shim.sv b/sim/opaesim/vortex_afu_shim.sv index 2a0d63e42..e494ada8e 100644 --- a/sim/opaesim/vortex_afu_shim.sv +++ b/sim/opaesim/vortex_afu_shim.sv @@ -78,22 +78,22 @@ module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; ( output t_ccip_mmioData af2cp_sTxPort_c2_data, // Avalon signals for local memory access - output t_local_mem_data avs_writedata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - input t_local_mem_data avs_readdata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - output t_local_mem_addr avs_address [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - input logic avs_waitrequest [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - output logic avs_write [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - output logic avs_read [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - output t_local_mem_byte_mask avs_byteenable [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - input avs_readdatavalid [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS] + output t_local_mem_data avs_writedata [`PLATFORM_MEMORY_BANKS], + input t_local_mem_data avs_readdata [`PLATFORM_MEMORY_BANKS], + output t_local_mem_addr avs_address [`PLATFORM_MEMORY_BANKS], + input logic avs_waitrequest [`PLATFORM_MEMORY_BANKS], + output logic avs_write [`PLATFORM_MEMORY_BANKS], + output logic avs_read [`PLATFORM_MEMORY_BANKS], + output t_local_mem_byte_mask avs_byteenable [`PLATFORM_MEMORY_BANKS], + output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_MEMORY_BANKS], + input avs_readdatavalid [`PLATFORM_MEMORY_BANKS] ); t_if_ccip_Rx cp2af_sRxPort; t_if_ccip_Tx af2cp_sTxPort; vortex_afu #( - .NUM_LOCAL_MEM_BANKS(`PLATFORM_PARAM_LOCAL_MEMORY_BANKS) + .NUM_LOCAL_MEM_BANKS(`PLATFORM_MEMORY_BANKS) ) afu ( .clk(clk), .reset(reset), diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 4ac3f6edd..4b95d55bd 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -32,14 +32,21 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU # AFU parameters -ifeq (,$(findstring M_AXI_MEM_NUM_BANKS,$(CONFIGS))) - CONFIGS += -DM_AXI_MEM_NUM_BANKS=1 +ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_BANKS=2 endif -ifeq (,$(findstring M_AXI_MEM_ADDR_WIDTH,$(CONFIGS))) - CONFIGS += -DM_AXI_MEM_ADDR_WIDTH=32 +ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS))) + ifeq ($(XLEN),64) + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41 + else + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25 + endif endif -ifeq (,$(findstring M_AXI_MEM_DATA_WIDTH,$(CONFIGS))) - CONFIGS += -DM_AXI_MEM_DATA_WIDTH=512 +ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512 +endif +ifeq (,$(findstring PLATFORM_MEMORY_OFFSET,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_OFFSET=0 endif DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) diff --git a/sim/xrtsim/vortex_afu_shim.sv b/sim/xrtsim/vortex_afu_shim.sv index 648e25e7a..04350055b 100644 --- a/sim/xrtsim/vortex_afu_shim.sv +++ b/sim/xrtsim/vortex_afu_shim.sv @@ -11,22 +11,22 @@ // See the License for the specific language governing permissions and // limitations under the License. -`include "VX_platform.vh" `include "vortex_afu.vh" module vortex_afu_shim #( - parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, + parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, - parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH, - parameter C_M_AXI_MEM_ADDR_WIDTH = 64, - parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH + parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH, + parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8), + parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH, + parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS ) ( // System signals input wire ap_clk, input wire ap_rst_n, // AXI4 master interface - `REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA), + `REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA), // AXI4-Lite slave interface input wire s_axi_ctrl_awvalid, @@ -50,35 +50,38 @@ module vortex_afu_shim #( output wire interrupt `IGNORE_WARNINGS_END ); - vortex_afu #( - .C_S_AXI_CTRL_ADDR_WIDTH(C_S_AXI_CTRL_ADDR_WIDTH), - .C_S_AXI_CTRL_DATA_WIDTH(C_S_AXI_CTRL_DATA_WIDTH), - .C_M_AXI_MEM_ID_WIDTH(C_M_AXI_MEM_ID_WIDTH), - .C_M_AXI_MEM_ADDR_WIDTH(C_M_AXI_MEM_ADDR_WIDTH), - .C_M_AXI_MEM_DATA_WIDTH(C_M_AXI_MEM_DATA_WIDTH) - ) afu ( - .ap_clk(ap_clk), - .ap_rst_n(ap_rst_n), - // AXI4 master interface - `REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA), - .s_axi_ctrl_awvalid(s_axi_ctrl_awvalid), - .s_axi_ctrl_awready(s_axi_ctrl_awready), - .s_axi_ctrl_awaddr(s_axi_ctrl_awaddr), - .s_axi_ctrl_wvalid(s_axi_ctrl_wvalid), - .s_axi_ctrl_wready(s_axi_ctrl_wready), - .s_axi_ctrl_wdata(s_axi_ctrl_wdata), - .s_axi_ctrl_wstrb(s_axi_ctrl_wstrb), - .s_axi_ctrl_arvalid(s_axi_ctrl_arvalid), - .s_axi_ctrl_arready(s_axi_ctrl_arready), - .s_axi_ctrl_araddr(s_axi_ctrl_araddr), - .s_axi_ctrl_rvalid(s_axi_ctrl_rvalid), - .s_axi_ctrl_rready(s_axi_ctrl_rready), - .s_axi_ctrl_rdata(s_axi_ctrl_rdata), - .s_axi_ctrl_rresp(s_axi_ctrl_rresp), - .s_axi_ctrl_bvalid(s_axi_ctrl_bvalid), - .s_axi_ctrl_bready(s_axi_ctrl_bready), - .s_axi_ctrl_bresp(s_axi_ctrl_bresp), - .interrupt(interrupt) - ); + VX_afu_wrap #( + .C_S_AXI_CTRL_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH), + .C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), + .C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH), + .C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), + .C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH), + .C_M_AXI_MEM_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) + ) afu_wrap ( + .clk (ap_clk), + .reset (~ap_rst_n), + + `REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA), + + .s_axi_ctrl_awvalid (s_axi_ctrl_awvalid), + .s_axi_ctrl_awready (s_axi_ctrl_awready), + .s_axi_ctrl_awaddr (s_axi_ctrl_awaddr), + .s_axi_ctrl_wvalid (s_axi_ctrl_wvalid), + .s_axi_ctrl_wready (s_axi_ctrl_wready), + .s_axi_ctrl_wdata (s_axi_ctrl_wdata), + .s_axi_ctrl_wstrb (s_axi_ctrl_wstrb), + .s_axi_ctrl_arvalid (s_axi_ctrl_arvalid), + .s_axi_ctrl_arready (s_axi_ctrl_arready), + .s_axi_ctrl_araddr (s_axi_ctrl_araddr), + .s_axi_ctrl_rvalid (s_axi_ctrl_rvalid), + .s_axi_ctrl_rready (s_axi_ctrl_rready), + .s_axi_ctrl_rdata (s_axi_ctrl_rdata), + .s_axi_ctrl_rresp (s_axi_ctrl_rresp), + .s_axi_ctrl_bvalid (s_axi_ctrl_bvalid), + .s_axi_ctrl_bready (s_axi_ctrl_bready), + .s_axi_ctrl_bresp (s_axi_ctrl_bresp), + + .interrupt (interrupt) + ); endmodule diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index a2725f32d..1a63cdfdc 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -37,7 +37,7 @@ #include -#define M_AXI_MEM_DATA_SIZE (M_AXI_MEM_DATA_WIDTH/8) +#define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8) #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 @@ -59,10 +59,24 @@ #define RAM_PAGE_SIZE 4096 -#define MEM_BANK_SIZE (1ull << M_AXI_MEM_ADDR_WIDTH) - #define CPU_GPU_LATENCY 200 +#if PLATFORM_MEMORY_ADDR_WIDTH > 32 + typedef QData Vl_m_addr_t; +#else + typedef IData Vl_m_addr_t; +#endif + +#if PLATFORM_MEMORY_DATA_WIDTH > 64 + typedef VlWide<(PLATFORM_MEMORY_DATA_WIDTH/32)> Vl_m_data_t; +#else +#if PLATFORM_MEMORY_DATA_WIDTH > 32 + typedef QData Vl_m_data_t; +#else + typedef IData Vl_m_data_t; +#endif +#endif + using namespace vortex; static uint64_t timestamp = 0; @@ -134,7 +148,7 @@ public: if (future_.valid()) { future_.wait(); } - for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { delete mem_alloc_[i]; } if (ram_) { @@ -169,15 +183,18 @@ public: tfp_->open("trace.vcd"); #endif + // calculate memory bank size + mem_bank_size_ = ((1ull << PLATFORM_MEMORY_ADDR_WIDTH) / PLATFORM_MEMORY_BANKS) * PLATFORM_MEMORY_DATA_SIZE; + // allocate RAM ram_ = new RAM(0, RAM_PAGE_SIZE); // initialize AXI memory interfaces - MP_M_AXI_MEM(M_AXI_MEM_NUM_BANKS); + MP_M_AXI_MEM(PLATFORM_MEMORY_BANKS); // initialize memory allocator - for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { - mem_alloc_[i] = new MemoryAllocator(0, MEM_BANK_SIZE, 4096, 64); + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { + mem_alloc_[i] = new MemoryAllocator(0, mem_bank_size_, 4096, 64); } // reset the device @@ -198,13 +215,13 @@ public: } int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) { - if (bank_id >= M_AXI_MEM_NUM_BANKS) + if (bank_id >= PLATFORM_MEMORY_BANKS) return -1; return mem_alloc_[bank_id]->allocate(size, addr); } int mem_free(uint32_t bank_id, uint64_t addr) { - if (bank_id >= M_AXI_MEM_NUM_BANKS) + if (bank_id >= PLATFORM_MEMORY_BANKS) return -1; return mem_alloc_[bank_id]->release(addr); } @@ -212,11 +229,11 @@ public: int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) { std::lock_guard guard(mutex_); - if (bank_id >= M_AXI_MEM_NUM_BANKS) + if (bank_id >= PLATFORM_MEMORY_BANKS) return -1; - uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr; + uint64_t base_addr = bank_id * mem_bank_size_ + addr; ram_->write(data, base_addr, size); - /*printf("%0ld: [sim] xrt-mem-write: addr=0x%lx, size=%ld, data=0x", timestamp, base_addr, size); + /*printf("%0ld: [sim] xrt-mem-write: bank_id=%0d, addr=0x%lx, size=%ld, data=0x", timestamp, bank_id, base_addr, size); for (int i = size-1; i >= 0; --i) { printf("%02x", ((const uint8_t*)data)[i]); } @@ -227,11 +244,11 @@ public: int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) { std::lock_guard guard(mutex_); - if (bank_id >= M_AXI_MEM_NUM_BANKS) + if (bank_id >= PLATFORM_MEMORY_BANKS) return -1; - uint64_t base_addr = uint64_t(bank_id) * MEM_BANK_SIZE + addr; + uint64_t base_addr = bank_id * mem_bank_size_ + addr; ram_->read(data, base_addr, size); - /*printf("%0ld: [sim] xrt-mem-read: addr=0x%lx, size=%ld, data=0x", timestamp, base_addr, size); + /*printf("%0ld: [sim] xrt-mem-read: bank_id=%0d, addr=0x%lx, size=%ld, data=0x", timestamp, bank_id, base_addr, size); for (int i = size-1; i >= 0; --i) { printf("%02x", ((uint8_t*)data)[i]); } @@ -307,7 +324,7 @@ private: reqs.clear(); } - for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { std::queue empty; std::swap(dram_queues_[i], empty); } @@ -334,7 +351,7 @@ private: void tick() { this->axi_mem_bus_eval(); - for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { if (!dram_queues_[i].empty()) { auto mem_req = dram_queues_[i].front(); if (dram_sim_.send_request(mem_req->write, mem_req->addr, i, [](void* arg) { @@ -394,7 +411,7 @@ private: } void axi_mem_bus_reset() { - for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { // address read request *m_axi_mem_[i].arready = 1; @@ -418,7 +435,7 @@ private: } void axi_mem_bus_eval() { - for (int i = 0; i < M_AXI_MEM_NUM_BANKS; ++i) { + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { // handle read responses if (m_axi_states_[i].read_rsp_pending && (*m_axi_mem_[i].rready)) { *m_axi_mem_[i].rvalid = 0; @@ -434,7 +451,7 @@ private: *m_axi_mem_[i].rid = mem_rsp->tag; *m_axi_mem_[i].rresp = 0; *m_axi_mem_[i].rlast = 1; - memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), M_AXI_MEM_DATA_SIZE); + memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE); pending_mem_reqs_[i].erase(mem_rsp_it); m_axi_states_[i].read_rsp_pending = true; delete mem_rsp; @@ -465,14 +482,14 @@ private: if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) { auto mem_req = new mem_req_t(); mem_req->tag = *m_axi_mem_[i].arid; - mem_req->addr = uint64_t(*m_axi_mem_[i].araddr) * M_AXI_MEM_NUM_BANKS + i * M_AXI_MEM_DATA_SIZE; - ram_->read(mem_req->data.data(), mem_req->addr, M_AXI_MEM_DATA_SIZE); + mem_req->addr = i * mem_bank_size_ + uint64_t(*m_axi_mem_[i].araddr) * PLATFORM_MEMORY_DATA_SIZE; + ram_->read(mem_req->data.data(), mem_req->addr, PLATFORM_MEMORY_DATA_SIZE); mem_req->write = false; mem_req->ready = false; pending_mem_reqs_[i].emplace_back(mem_req); /*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, mem_req->tag); - for (int i = M_AXI_MEM_DATA_SIZE-1; i >= 0; --i) { + for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) { printf("%02x", mem_req->data[i]); } printf("\n");*/ @@ -494,9 +511,9 @@ private: auto byteen = *m_axi_mem_[i].wstrb; auto data = (uint8_t*)m_axi_mem_[i].wdata->data(); - auto byte_addr = m_axi_states_[i].write_req_addr * M_AXI_MEM_NUM_BANKS + i * M_AXI_MEM_DATA_SIZE; + auto byte_addr = i * mem_bank_size_ + m_axi_states_[i].write_req_addr * PLATFORM_MEMORY_DATA_SIZE; - for (int i = 0; i < M_AXI_MEM_DATA_SIZE; i++) { + for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) { if ((byteen >> i) & 0x1) { (*ram_)[byte_addr + i] = data[i]; } @@ -510,7 +527,7 @@ private: pending_mem_reqs_[i].emplace_back(mem_req); /*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, byteen, mem_req->tag); - for (int i = M_AXI_MEM_DATA_SIZE-1; i >= 0; --i) { + for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) { printf("%02x", data[i]); } printf("\n");*/ @@ -535,7 +552,7 @@ private: } m_axi_state_t; typedef struct { - std::array data; + std::array data; uint32_t tag; uint64_t addr; bool write; @@ -545,22 +562,22 @@ private: typedef struct { CData* awvalid; CData* awready; - QData* awaddr; + Vl_m_addr_t* awaddr; IData* awid; CData* awlen; CData* wvalid; CData* wready; - VlWide<16>* wdata; + Vl_m_data_t* wdata; QData* wstrb; CData* wlast; CData* arvalid; CData* arready; - QData* araddr; + Vl_m_addr_t* araddr; IData* arid; CData* arlen; CData* rvalid; CData* rready; - VlWide<16>* rdata; + Vl_m_data_t* rdata; CData* rlast; IData* rid; CData* rresp; @@ -573,21 +590,22 @@ private: Vvortex_afu_shim* device_; RAM* ram_; DramSim dram_sim_; + uint64_t mem_bank_size_; std::future future_; bool stop_; std::mutex mutex_; - std::list pending_mem_reqs_[M_AXI_MEM_NUM_BANKS]; + std::list pending_mem_reqs_[PLATFORM_MEMORY_BANKS]; - m_axi_mem_t m_axi_mem_[M_AXI_MEM_NUM_BANKS]; + m_axi_mem_t m_axi_mem_[PLATFORM_MEMORY_BANKS]; - MemoryAllocator* mem_alloc_[M_AXI_MEM_NUM_BANKS]; + MemoryAllocator* mem_alloc_[PLATFORM_MEMORY_BANKS]; - m_axi_state_t m_axi_states_[M_AXI_MEM_NUM_BANKS]; + m_axi_state_t m_axi_states_[PLATFORM_MEMORY_BANKS]; - std::queue dram_queues_[M_AXI_MEM_NUM_BANKS]; + std::queue dram_queues_[PLATFORM_MEMORY_BANKS]; #ifdef VCD_OUTPUT VerilatedVcdC* tfp_; From 54f0c8e270c19ae4b554f365fff64a47c395d301 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 22 Sep 2024 22:31:14 -0700 Subject: [PATCH 291/488] scope analyzer optimization --- hw/rtl/afu/opae/vortex_afu.sv | 2 +- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 2 +- hw/rtl/libs/VX_fifo_queue.sv | 2 +- hw/rtl/libs/VX_mem_scheduler.sv | 2 +- hw/rtl/libs/VX_scope_tap.sv | 45 +++++++++++++++++++++++++++------ 5 files changed, 41 insertions(+), 12 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 4060a3011..7a9ef4526 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -189,7 +189,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ scope_bus_in <= 1; end if (cmd_scope_writing) begin - scope_bus_in <= 1'(cmd_scope_wdata >> scope_bus_ctr); + scope_bus_in <= cmd_scope_wdata[scope_bus_ctr]; scope_bus_ctr <= scope_bus_ctr - 6'd1; if (scope_bus_ctr == 0) begin cmd_scope_writing <= 0; diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index e30219270..4c8cc95a0 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -233,7 +233,7 @@ module VX_afu_ctrl #( end end if (cmd_scope_writing) begin - scope_bus_out_r <= 1'(scope_bus_wdata >> scope_bus_ctr); + scope_bus_out_r <= scope_bus_wdata[scope_bus_ctr]; scope_bus_ctr <= scope_bus_ctr - 1; if (scope_bus_ctr == 0) begin cmd_scope_writing <= 0; diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 7eb760e6b..c5a4bf32e 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -40,7 +40,7 @@ module VX_fifo_queue #( `STATIC_ASSERT(ALM_FULL < DEPTH, ("alm_full must be smaller than size!")) `STATIC_ASSERT(ALM_EMPTY > 0, ("alm_empty must be greater than 0!")) `STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!")) - `STATIC_ASSERT(`IS_POW2(DEPTH), ("size must be a power of 2!")) + `STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!")) VX_pending_size #( .SIZE (DEPTH), diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 229ff6cf2..913656bf8 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -311,7 +311,7 @@ module VX_mem_scheduler #( assign mem_req_addr_b[i][j] = reqq_addr_s[r]; assign mem_req_flags_b[i][j] = reqq_flags_s[r]; assign mem_req_data_b[i][j] = reqq_data_s[r]; - end else begin : g_extra + end else begin : g_padding assign mem_req_mask_b[i][j] = 0; assign mem_req_byteen_b[i][j] = '0; assign mem_req_addr_b[i][j] = '0; diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index f77a4e744..c3d111c05 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -17,9 +17,9 @@ module VX_scope_tap #( parameter SCOPE_ID = 0, // scope identifier parameter SCOPE_IDW = 8, // scope identifier width - parameter TRIGGERW = 16, // trigger signals width - parameter PROBEW = 256, // probe signal width - parameter DEPTH = 1024, // trace buffer depth + parameter TRIGGERW = 32, // trigger signals width + parameter PROBEW = 4999, // probe signal width + parameter DEPTH = 8192, // trace buffer depth parameter IDLE_CTRW = 32, // idle time between triggers counter width parameter TX_DATAW = 64 // transfer data width ) ( @@ -38,6 +38,7 @@ module VX_scope_tap #( localparam DATA_BITS = `LOG2UP(DATAW); localparam ADDRW = `CLOG2(DEPTH); localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; + localparam TX_DATA_BLOCKS = `CDIV(DATAW, TX_DATAW); localparam CTRL_STATE_IDLE = 2'd0; localparam CTRL_STATE_RECV = 2'd1; @@ -65,6 +66,7 @@ module VX_scope_tap #( localparam GET_TYPE_BITS = 2; `STATIC_ASSERT ((IDLE_CTRW <= TX_DATAW), ("invalid parameter")) + `STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!")) reg [TAP_STATE_BITS-1:0] tap_state; reg [CTRL_STATE_BITS-1:0] ctrl_state; @@ -94,6 +96,8 @@ module VX_scope_tap #( VX_dp_ram #( .DATAW (IDLE_CTRW), .SIZE (DEPTH), + .OUT_REG (1), + .READ_ENABLE (0), .NO_RWCHECK (1) ) delta_store ( .clk (clk), @@ -115,6 +119,8 @@ module VX_scope_tap #( VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), + .OUT_REG (1), + .READ_ENABLE (0), .NO_RWCHECK (1) ) data_store ( .clk (clk), @@ -214,14 +220,12 @@ module VX_scope_tap #( reg [TX_DATA_BITS-1:0] ser_tx_ctr; reg [DATA_BITS-1:0] read_offset; reg is_read_data; + reg [1:0] read_en; wire [CMD_TYPE_BITS-1:0] cmd_type = ser_buf_in[CMD_TYPE_BITS-1:0]; wire [SCOPE_IDW-1:0] cmd_scope_id = ser_buf_in_n[CMD_TYPE_BITS +: SCOPE_IDW]; wire [TX_DATAW-CMD_TYPE_BITS-SCOPE_IDW-1:0] cmd_data = ser_buf_in[TX_DATAW-1:CMD_TYPE_BITS+SCOPE_IDW]; - wire [TX_DATAW-1:0] data_chunk = TX_DATAW'(DATAW'(data_value >> read_offset)); - wire [TX_DATAW-1:0] get_data = is_read_data ? data_chunk : TX_DATAW'(delta_value); - wire [ADDRW-1:0] raddr_n = raddr + ADDRW'(1); always @(posedge clk) begin @@ -235,9 +239,11 @@ module VX_scope_tap #( raddr <= '0; is_read_data<= 0; ser_tx_ctr <= '0; + read_en <= '0; end else begin bus_out_r <= 0; cmd_start <= 0; + read_en <= '0; case (ctrl_state) CTRL_STATE_IDLE: begin if (bus_in) begin @@ -305,7 +311,7 @@ module VX_scope_tap #( `endif end GET_TYPE_DATA: begin - bus_out_r <= 1'(get_data >> ser_tx_ctr); + read_en <= {is_read_data, 1'b1}; if (ser_tx_ctr == 0) begin if (is_read_data) begin if (DATAW > TX_DATAW) begin @@ -349,7 +355,30 @@ module VX_scope_tap #( end end - assign bus_out = bus_out_r; + wire [TX_DATA_BLOCKS-1:0][TX_DATAW-1:0] data_blocks; + for (genvar i = 0; i < TX_DATA_BLOCKS; ++i) begin : g_data_blocks + for (genvar j = 0; j < TX_DATAW; ++j) begin : g_j + localparam k = i * TX_DATAW + j; + if (k < DATAW) begin : g_valid + assign data_blocks[i][j] = data_value[k]; + end else begin : g_padding + assign data_blocks[i][j] = '0; + end + end + end + + wire [TX_DATAW-1:0] get_data = read_en[1] ? data_blocks[read_offset] : TX_DATAW'(delta_value); + wire bus_out_w = read_en[0] ? get_data[ser_tx_ctr] : bus_out_r; + + VX_pipe_register #( + .DATAW (1) + ) buf_out ( + .clk (clk), + .reset (reset), + .enable (1'b1), + .data_in (bus_out_w), + .data_out (bus_out) + ); endmodule `TRACING_ON From 5e123d0507da426e7852e9555a622d1ffe98268a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 22 Sep 2024 22:31:54 -0700 Subject: [PATCH 292/488] minor update --- hw/rtl/libs/VX_stream_switch.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/libs/VX_stream_switch.sv b/hw/rtl/libs/VX_stream_switch.sv index 01217b668..e3848e4c3 100644 --- a/hw/rtl/libs/VX_stream_switch.sv +++ b/hw/rtl/libs/VX_stream_switch.sv @@ -46,7 +46,7 @@ module VX_stream_switch #( if (ii < NUM_INPUTS) begin : g_valid assign valid_in_w[i][j] = valid_in[ii]; assign data_in_w[i][j] = data_in[ii]; - end else begin : g_extra + end else begin : g_padding assign valid_in_w[i][j] = 0; assign data_in_w[i][j] = '0; end @@ -121,7 +121,7 @@ module VX_stream_switch #( .valid_out (valid_out[ii]), .ready_out (ready_out[ii]) ); - end else begin : g_extra + end else begin : g_padding `UNUSED_VAR (valid_out_w[i][j]) assign ready_out_w[i][j] = '0; end From f5eca75311b655a53a8c5e30d25a359e2e52b5f1 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 22 Sep 2024 22:43:48 -0700 Subject: [PATCH 293/488] handling synthesis builds with simulation enabled (e.g xrt with hw_emu) --- hw/rtl/VX_platform.vh | 118 ++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 67 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 7f6805c50..3e9042737 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -22,61 +22,31 @@ /////////////////////////////////////////////////////////////////////////////// -`ifdef VIVADO -`define STRING -`else -`define STRING string -`endif +`ifdef SIMULATION -`ifdef SYNTHESIS +`define STATIC_ASSERT(cond, msg) \ +generate \ + /* verilator lint_off GENUNNAMED */ \ + if (!(cond)) $error msg; \ + /* verilator lint_on GENUNNAMED */ \ +endgenerate -`define TRACING_ON -`define TRACING_OFF +`define ERROR(msg) \ + $error msg -`ifndef NDEBUG - `define DEBUG_BLOCK(x) x - `define TRACE(level, args) \ - if (level <= `DEBUG_LEVEL) begin \ - $write args; \ - end -`else - `define DEBUG_BLOCK(x) - `define TRACE(level, args) -`endif +`define ASSERT(cond, msg) \ + assert(cond) else $error msg -`define IGNORE_UNOPTFLAT_BEGIN -`define IGNORE_UNOPTFLAT_END -`define IGNORE_UNUSED_BEGIN -`define IGNORE_UNUSED_END -`define IGNORE_WARNINGS_BEGIN -`define IGNORE_WARNINGS_END -`define UNUSED_PARAM(x) -`define UNUSED_SPARAM(x) -`define UNUSED_VAR(x) -`define UNUSED_PIN(x) . x () -`define UNUSED_ARG(x) x - -`define __SCOPE (* mark_debug="true" *) - -`define __SCOPE_X - -`define __SCOPE_ON \ - `undef __SCOPE_X \ - `define __SCOPE_X `__SCOPE - -`define __SCOPE_OFF \ - `undef __SCOPE_X \ - `define __SCOPE_X - -`else // not SYNTHESIS +`define RUNTIME_ASSERT(cond, msg) \ + always @(posedge clk) begin \ + assert(cond) else $error msg; \ + end `define __SCOPE `define __SCOPE_X `define __SCOPE_ON `define __SCOPE_OFF -`ifdef VERILATOR - `ifndef TRACING_ALL `define TRACING_ON /* verilator tracing_on */ `define TRACING_OFF /* verilator tracing_off */ @@ -148,7 +118,6 @@ `define UNUSED_ARG(x) /* verilator lint_off UNUSED */ \ x \ /* verilator lint_on UNUSED */ -`endif // not VERILATOR `ifdef SV_DPI `define TRACE(level, args) dpi_trace(level, $sformatf args); @@ -159,31 +128,43 @@ end `endif -`endif +`else // SYNTHESIS -`ifdef SIMULATION - `define STATIC_ASSERT(cond, msg) \ - generate \ - /* verilator lint_off GENUNNAMED */ \ - if (!(cond)) $error msg; \ - /* verilator lint_on GENUNNAMED */ \ - endgenerate +`define STATIC_ASSERT(cond, msg) +`define ERROR(msg) // +`define ASSERT(cond, msg) // +`define RUNTIME_ASSERT(cond, msg) - `define ERROR(msg) \ - $error msg +`define DEBUG_BLOCK(x) +`define TRACE(level, args) - `define ASSERT(cond, msg) \ - assert(cond) else $error msg +`define TRACING_ON +`define TRACING_OFF + +`define IGNORE_UNOPTFLAT_BEGIN +`define IGNORE_UNOPTFLAT_END +`define IGNORE_UNUSED_BEGIN +`define IGNORE_UNUSED_END +`define IGNORE_WARNINGS_BEGIN +`define IGNORE_WARNINGS_END +`define UNUSED_PARAM(x) +`define UNUSED_SPARAM(x) +`define UNUSED_VAR(x) +`define UNUSED_PIN(x) . x () +`define UNUSED_ARG(x) x + +`define __SCOPE (* mark_debug="true" *) + +`define __SCOPE_X + +`define __SCOPE_ON \ + `undef __SCOPE_X \ + `define __SCOPE_X `__SCOPE + +`define __SCOPE_OFF \ + `undef __SCOPE_X \ + `define __SCOPE_X - `define RUNTIME_ASSERT(cond, msg) \ - always @(posedge clk) begin \ - assert(cond) else $error msg; \ - end -`else // not SIMULATION - `define STATIC_ASSERT(cond, msg) - `define ERROR(msg) // - `define ASSERT(cond, msg) // - `define RUNTIME_ASSERT(cond, msg) `endif /////////////////////////////////////////////////////////////////////////////// @@ -195,6 +176,7 @@ `define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *) `define DISABLE_BRAM (* ramstyle = "logic" *) `define PRESERVE_NET (* preserve *) +`define STRING string `elsif VIVADO `define MAX_FANOUT 8 `define IF_DATA_SIZE(x) $bits(x.data) @@ -202,6 +184,7 @@ `define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *) `define DISABLE_BRAM (* ram_style = "registers" *) `define PRESERVE_NET (* keep = "true" *) +`define STRING `else `define MAX_FANOUT 8 `define IF_DATA_SIZE(x) x.DATA_WIDTH @@ -209,6 +192,7 @@ `define NO_RW_RAM_CHECK `define DISABLE_BRAM `define PRESERVE_NET +`define STRING string `endif /////////////////////////////////////////////////////////////////////////////// From 15ead4acf6887aca5e2a2b02e8bcc1cc1d799fc1 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 22 Sep 2024 22:46:10 -0700 Subject: [PATCH 294/488] xrt with merge memory interface --- hw/rtl/afu/xrt/VX_afu_wrap.sv | 9 ++++++++- hw/rtl/afu/xrt/vortex_afu.v | 17 ++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index ca6fed1ae..8530ee97a 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -26,8 +26,11 @@ module VX_afu_wrap #( input wire reset, // AXI4 master interface +`ifdef PLATFORM_MERGED_MEMORY_INTERFACE + `REPEAT (1, GEN_AXI_MEM, REPEAT_COMMA), +`else `REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA), - +`endif // AXI4-Lite slave interface input wire s_axi_ctrl_awvalid, output wire s_axi_ctrl_awready, @@ -80,7 +83,11 @@ module VX_afu_wrap #( wire [1:0] m_axi_mem_rresp_a [C_M_AXI_MEM_NUM_BANKS]; // convert memory interface to array +`ifdef PLATFORM_MERGED_MEMORY_INTERFACE + `REPEAT (1, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON); +`else `REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON); +`endif reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; reg [15:0] vx_pending_writes; diff --git a/hw/rtl/afu/xrt/vortex_afu.v b/hw/rtl/afu/xrt/vortex_afu.v index 985d029cf..94aced3ec 100644 --- a/hw/rtl/afu/xrt/vortex_afu.v +++ b/hw/rtl/afu/xrt/vortex_afu.v @@ -17,16 +17,25 @@ module vortex_afu #( parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH, - parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8), parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH, +`ifdef SYNTHESIS + parameter C_M_AXI_MEM_ADDR_WIDTH = 64, + parameter C_M_AXI_MEM_NUM_BANKS = 1 +`else + parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog(`PLATFORM_MEMORY_DATA_WIDTH/8), parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS +`endif ) ( // System signals input wire ap_clk, input wire ap_rst_n, // AXI4 master interface +`ifdef PLATFORM_MERGED_MEMORY_INTERFACE + `REPEAT (1, GEN_AXI_MEM, REPEAT_COMMA), +`else `REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA), +`endif // AXI4-Lite slave interface input wire s_axi_ctrl_awvalid, @@ -60,9 +69,11 @@ module vortex_afu #( ) afu_wrap ( .clk (ap_clk), .reset (~ap_rst_n), - + `ifdef PLATFORM_MERGED_MEMORY_INTERFACE + `REPEAT (1, AXI_MEM_ARGS, REPEAT_COMMA), + `else `REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA), - + `endif .s_axi_ctrl_awvalid (s_axi_ctrl_awvalid), .s_axi_ctrl_awready (s_axi_ctrl_awready), .s_axi_ctrl_awaddr (s_axi_ctrl_awaddr), From b146fab2909add2a29206f19bfce0d1ab0b8fc19 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 22 Sep 2024 22:46:55 -0700 Subject: [PATCH 295/488] xrt kernel registers update --- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 19 +++++++++---------- runtime/xrt/vortex.cpp | 7 ++++--- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index 4c8cc95a0..a544983d6 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -107,22 +107,21 @@ module VX_afu_ctrl #( ADDR_DEV_0 = 8'h10, ADDR_DEV_1 = 8'h14, - //ADDR_DEV_CTRL = 8'h18, - ADDR_ISA_0 = 8'h1C, - ADDR_ISA_1 = 8'h20, - //ADDR_ISA_CTRL = 8'h24, + ADDR_ISA_0 = 8'h18, + ADDR_ISA_1 = 8'h1C, - ADDR_DCR_0 = 8'h28, - ADDR_DCR_1 = 8'h2C, - //ADDR_DCR_CTRL = 8'h30, + ADDR_DCR_0 = 8'h20, + ADDR_DCR_1 = 8'h24, `ifdef SCOPE - ADDR_SCP_0 = 8'h34, - ADDR_SCP_1 = 8'h38, - //ADDR_SCP_CTRL = 8'h3C, + ADDR_SCP_0 = 8'h28, + ADDR_SCP_1 = 8'h2C, `endif + ADDR_MEM_0 = 8'h30, + ADDR_MEM_1 = 8'h34, + ADDR_BITS = 8; localparam diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index 3acb9b3c6..9385457f5 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -46,9 +46,10 @@ using namespace vortex; #define MMIO_CTL_ADDR 0x00 #define MMIO_DEV_ADDR 0x10 -#define MMIO_ISA_ADDR 0x1C -#define MMIO_DCR_ADDR 0x28 -#define MMIO_SCP_ADDR 0x34 +#define MMIO_ISA_ADDR 0x18 +#define MMIO_DCR_ADDR 0x20 +#define MMIO_SCP_ADDR 0x28 +#define MMIO_MEM_ADDR 0x30 #define CTL_AP_START (1 << 0) #define CTL_AP_DONE (1 << 1) From 8bb5e5ab8af42136f1372d8e4f1ca3e7edfa6741 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 22 Sep 2024 22:47:23 -0700 Subject: [PATCH 296/488] build error fix --- hw/rtl/libs/VX_pending_size.sv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/rtl/libs/VX_pending_size.sv b/hw/rtl/libs/VX_pending_size.sv index 50737634f..1e72cef19 100644 --- a/hw/rtl/libs/VX_pending_size.sv +++ b/hw/rtl/libs/VX_pending_size.sv @@ -35,7 +35,7 @@ module VX_pending_size #( `STATIC_ASSERT(INCRW <= SIZEW, ("invalid parameter: %d vs %d", INCRW, SIZEW)) `STATIC_ASSERT(DECRW <= SIZEW, ("invalid parameter: %d vs %d", DECRW, SIZEW)) - if (SIZE == 1) begin : g_size1 + if (SIZE == 1) begin : g_size_eq1 reg size_r; @@ -59,7 +59,7 @@ module VX_pending_size #( assign alm_full = 1'b1; assign size = size_r; - end else begin : g_sizeN + end else begin : g_size_gt1 reg empty_r, alm_empty_r; reg full_r, alm_full_r; @@ -124,7 +124,7 @@ module VX_pending_size #( end end - if (SIZE > 2) begin : g_sizeN + if (SIZE > 2) begin : g_size_gt2 wire is_empty_n = (used_r == ADDRW'(1)); wire is_full_n = (used_r == ADDRW'(SIZE-1)); @@ -152,7 +152,7 @@ module VX_pending_size #( end end - end else begin : g_size2 + end else begin : g_size_eq2 always @(posedge clk) begin if (reset) begin From e38c2c1fbaa2198122251c76355d961e4dae5e1d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 02:12:47 -0700 Subject: [PATCH 297/488] xilinx xrt platforms configuration --- ci/regression.sh.in | 6 +- hw/rtl/afu/opae/local_mem_cfg_pkg.sv | 2 +- hw/rtl/afu/opae/vortex_afu.sv | 4 +- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 3 - hw/rtl/afu/xrt/vortex_afu.v | 2 +- hw/rtl/afu/xrt/vortex_afu.vh | 2 +- hw/syn/altera/dut/top/Makefile | 4 +- hw/syn/altera/opae/Makefile | 2 +- hw/syn/xilinx/xrt/Makefile | 21 ++--- hw/syn/xilinx/xrt/gen_xml.py | 75 +++++++++++++++++ hw/syn/xilinx/xrt/gen_xo.tcl | 2 +- hw/syn/xilinx/xrt/package_kernel.tcl | 115 ++++----------------------- hw/syn/xilinx/xrt/platforms.mk | 51 ++++++++++++ sim/opaesim/Makefile | 4 +- sim/opaesim/opae_sim.cpp | 2 +- sim/xrtsim/Makefile | 4 +- sim/xrtsim/vortex_afu_shim.sv | 2 +- sim/xrtsim/xrt_sim.cpp | 2 +- 18 files changed, 167 insertions(+), 136 deletions(-) create mode 100644 hw/syn/xilinx/xrt/gen_xml.py create mode 100644 hw/syn/xilinx/xrt/platforms.mk diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 37f5d2b20..c45e8c3ff 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -275,9 +275,9 @@ config2() # test single-bank DRAM CONFIGS="-DPLATFORM_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress - # test 27-bit DRAM address - CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress - CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=xrt --app=mstress + # test 33-bit DRAM address + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=xrt --app=mstress echo "configuration-2 tests done!" } diff --git a/hw/rtl/afu/opae/local_mem_cfg_pkg.sv b/hw/rtl/afu/opae/local_mem_cfg_pkg.sv index 8b0ebaa0b..c63825548 100644 --- a/hw/rtl/afu/opae/local_mem_cfg_pkg.sv +++ b/hw/rtl/afu/opae/local_mem_cfg_pkg.sv @@ -31,7 +31,7 @@ //`include "platform_afu_top_config.vh" `ifndef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH -`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH `PLATFORM_MEMORY_ADDR_WIDTH +`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH (`PLATFORM_MEMORY_ADDR_WIDTH - $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8)) `endif `ifndef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 7a9ef4526..57b03cb21 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -96,12 +96,10 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ localparam STATE_DCR_WRITE = 4; localparam STATE_WIDTH = `CLOG2(STATE_DCR_WRITE+1); - localparam BANK_BYTE_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + `CLOG2(`PLATFORM_MEMORY_DATA_WIDTH/8); - wire [127:0] afu_id = `AFU_ACCEL_UUID; wire [63:0] dev_caps = {8'b0, - 5'(BANK_BYTE_ADDR_WIDTH-16), + 5'(`PLATFORM_MEMORY_ADDR_WIDTH-16), 3'(`CLOG2(`PLATFORM_MEMORY_BANKS)), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 16'(`NUM_CORES * `NUM_CLUSTERS), diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index a544983d6..1db8cc4e2 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -119,9 +119,6 @@ module VX_afu_ctrl #( ADDR_SCP_1 = 8'h2C, `endif - ADDR_MEM_0 = 8'h30, - ADDR_MEM_1 = 8'h34, - ADDR_BITS = 8; localparam diff --git a/hw/rtl/afu/xrt/vortex_afu.v b/hw/rtl/afu/xrt/vortex_afu.v index 94aced3ec..918474d52 100644 --- a/hw/rtl/afu/xrt/vortex_afu.v +++ b/hw/rtl/afu/xrt/vortex_afu.v @@ -22,7 +22,7 @@ module vortex_afu #( parameter C_M_AXI_MEM_ADDR_WIDTH = 64, parameter C_M_AXI_MEM_NUM_BANKS = 1 `else - parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog(`PLATFORM_MEMORY_DATA_WIDTH/8), + parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH, parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS `endif ) ( diff --git a/hw/rtl/afu/xrt/vortex_afu.vh b/hw/rtl/afu/xrt/vortex_afu.vh index f35980c2a..8018171e7 100644 --- a/hw/rtl/afu/xrt/vortex_afu.vh +++ b/hw/rtl/afu/xrt/vortex_afu.vh @@ -19,7 +19,7 @@ `endif `ifndef PLATFORM_MEMORY_ADDR_WIDTH -`define PLATFORM_MEMORY_ADDR_WIDTH 25 +`define PLATFORM_MEMORY_ADDR_WIDTH 31 `endif `ifndef PLATFORM_MEMORY_DATA_WIDTH diff --git a/hw/syn/altera/dut/top/Makefile b/hw/syn/altera/dut/top/Makefile index e4dfae274..2a273e698 100644 --- a/hw/syn/altera/dut/top/Makefile +++ b/hw/syn/altera/dut/top/Makefile @@ -12,9 +12,9 @@ ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS))) endif ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS))) ifeq ($(XLEN),64) - CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41 + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47 else - CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25 + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31 endif endif ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS))) diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index 19f9d0836..61935f2e4 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -98,7 +98,7 @@ ifdef PERF endif # ast dump flags -XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=26 -DPLATFORM_MEMORY_DATA_WIDTH=512 -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI +XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32 -DPLATFORM_MEMORY_DATA_WIDTH=512 -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI all: swconfig ip-gen setup build diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index fa0a7873b..a5a38e281 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -76,6 +76,7 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c) # include sources RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv +RTL_PKGS += $(RTL_DIR)/tex/VX_tex_pkg.sv $(RTL_DIR)/raster/VX_raster_pkg.sv $(RTL_DIR)/om/VX_om_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv @@ -91,18 +92,8 @@ RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(OM_INCLUDE) VPP_FLAGS += --link --target $(TARGET) --platform $(PLATFORM) --save-temps --no_ip_cache VPP_FLAGS += --vivado.synth.jobs $(JOBS) --vivado.impl.jobs $(JOBS) -ifeq ($(DEV_ARCH), zynquplus) -# ztnq -else ifeq ($(DEV_ARCH), versal) -# versal -else -# alveo -ifneq ($(findstring xilinx_u55c,$(XSA)),) - VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31] -else - VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] -endif -endif +# load platform settings +include $(SRC_DIR)/platforms.mk VPP_FLAGS += --report_level 2 VPP_FLAGS += --config $(SRC_DIR)/vitis.ini @@ -173,8 +164,12 @@ scope-json: $(BUILD_DIR)/scope.json $(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(SCRIPT_DIR)/scope.py vortex.xml -o scope.json +gen-xml: +$(BUILD_DIR)/kernel.xml: + mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(SRC_DIR)/gen_xml.py -n $(M_AXI_NUM_BANKS) -d $(M_AXI_DATA_WIDTH) -a $(M_AXI_ADDRESS_WIDTH) -o kernel.xml + gen-xo: $(XO_CONTAINER) -$(XO_CONTAINER): $(BUILD_DIR)/sources.txt +$(XO_CONTAINER): $(BUILD_DIR)/sources.txt $(BUILD_DIR)/kernel.xml mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) gen-bin: $(XCLBIN_CONTAINER) diff --git a/hw/syn/xilinx/xrt/gen_xml.py b/hw/syn/xilinx/xrt/gen_xml.py new file mode 100644 index 000000000..4ba906b9a --- /dev/null +++ b/hw/syn/xilinx/xrt/gen_xml.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 + +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import xml.etree.ElementTree as ET +from xml.dom import minidom + +def prettify(elem): + """Return a pretty-printed XML string for the Element.""" + rough_string = ET.tostring(elem, 'utf-8') + reparsed = minidom.parseString(rough_string) + return reparsed.toprettyxml(indent=" ") + +def generate_xml(numbanks, datawidth, addresswidth, offset, output_file): + root = ET.Element("root", versionMajor="1", versionMinor="6") + kernel = ET.SubElement(root, "kernel", name="vortex_afu", language="ip_c", + vlnv="mycompany.com:kernel:vortex_afu:1.0", + attributes="", preferredWorkGroupSizeMultiple="0", + workGroupSize="1", interrupt="true") + + ports = ET.SubElement(kernel, "ports") + + # control ports + ET.SubElement(ports, "port", name="s_axi_ctrl", mode="slave", range="0x1000", dataWidth="32", portType="addressable", base="0x0") + + # memory ports + for i in range(numbanks): + port_name = f"m_axi_mem_{i}" + ET.SubElement(ports, "port", name=port_name, mode="master", range=f"0x{(1 << addresswidth) - 1:X}", dataWidth=str(datawidth), portType="addressable", base=f"0x0") + + args = ET.SubElement(kernel, "args") + + # control args + ET.SubElement(args, "arg", name="dev", addressQualifier="0", id="0", port="s_axi_ctrl", size="0x4", offset="0x010", type="uint", hostOffset="0x0", hostSize="0x4") + ET.SubElement(args, "arg", name="isa", addressQualifier="0", id="1", port="s_axi_ctrl", size="0x4", offset="0x018", type="uint", hostOffset="0x0", hostSize="0x4") + ET.SubElement(args, "arg", name="dcr", addressQualifier="0", id="2", port="s_axi_ctrl", size="0x4", offset="0x020", type="uint", hostOffset="0x0", hostSize="0x4") + ET.SubElement(args, "arg", name="scp", addressQualifier="0", id="3", port="s_axi_ctrl", size="0x4", offset="0x028", type="uint", hostOffset="0x0", hostSize="0x4") + + # memory args + for i in range(numbanks): + arg_name = f"mem_{i}" + ET.SubElement(args, "arg", name=arg_name, addressQualifier="1", id=str(4 + i), + port=f"m_axi_mem_{i}", size="0x8", offset=f"0x{offset + (i * 8):X}", + type="int*", hostOffset="0x0", hostSize="0x8") + + # Pretty-print and write the XML to file + with open(output_file, "w") as f: + f.write(prettify(root)) + +def main(): + parser = argparse.ArgumentParser(description="Kernel Configuration File Generator") + parser.add_argument("-n", "--numbanks", type=int, default=1, help="Number of AXI memory banks") + parser.add_argument("-d", "--datawidth", type=int, default=512, help="Data width of the AXI memory ports") + parser.add_argument("-a", "--addresswidth", type=int, default=28, help="Address width of the AXI memory ports") + parser.add_argument("-x", "--offset", type=lambda x: int(x, 0), default=0x30, help="Starting offset for kernel args (hex)") + parser.add_argument("-o", "--output", type=str, default="kernel.xml", help="Output XML file name") + args = parser.parse_args() + + # Call the generate function + generate_xml(args.numbanks, args.datawidth, args.addresswidth, args.offset, args.output) + +if __name__ == "__main__": + main() diff --git a/hw/syn/xilinx/xrt/gen_xo.tcl b/hw/syn/xilinx/xrt/gen_xo.tcl index c36c98e36..9301a096e 100644 --- a/hw/syn/xilinx/xrt/gen_xo.tcl +++ b/hw/syn/xilinx/xrt/gen_xo.tcl @@ -37,4 +37,4 @@ set argv [list ${krnl_name} ${vcs_file} ${tool_dir} ${build_dir}] set argc 4 source ${script_path}/package_kernel.tcl -package_xo -xo_path ${xoname} -kernel_name ${krnl_name} -ip_directory "${build_dir}/xo/packaged_kernel" +package_xo -xo_path ${xoname} -kernel_name ${krnl_name} -ip_directory "${build_dir}/xo/packaged_kernel" -kernel_xml ${build_dir}/kernel.xml diff --git a/hw/syn/xilinx/xrt/package_kernel.tcl b/hw/syn/xilinx/xrt/package_kernel.tcl index aa7e96f3f..ed09639dd 100644 --- a/hw/syn/xilinx/xrt/package_kernel.tcl +++ b/hw/syn/xilinx/xrt/package_kernel.tcl @@ -41,14 +41,27 @@ set vdefines_list [lindex $vlist 2] #puts ${vincludes_list} #puts ${vdefines_list} -# find if chipscope is enabled set chipscope 0 +set num_banks 1 +set merged_mem_if 0 + +# parse vdefines_list for configuration parameters foreach def $vdefines_list { set fields [split $def "="] set name [lindex $fields 0] if { $name == "CHIPSCOPE" } { set chipscope 1 } + if { $name == "PLATFORM_MEMORY_BANKS" } { + set num_banks [lindex $fields 1] + } + if { $name == "PLATFORM_MERGED_MEMORY_INTERFACE" } { + set merged_mem_if 1 + } +} + +if { $merged_mem_if == 1 } { + set num_banks 1 } create_project -force kernel_pack $path_to_tmp_project @@ -143,108 +156,10 @@ foreach up [ipx::get_user_parameters] { ipx::associate_bus_interfaces -busif s_axi_ctrl -clock ap_clk $core -for {set i 0} {$i < 1} {incr i} { +for {set i 0} {$i < $num_banks} {incr i} { ipx::associate_bus_interfaces -busif m_axi_mem_$i -clock ap_clk $core } -set mem_map [::ipx::add_memory_map -quiet "s_axi_ctrl" $core] -set addr_block [::ipx::add_address_block -quiet "reg0" $mem_map] - -set reg [::ipx::add_register "CTRL" $addr_block] - set_property description "Control signals" $reg - set_property address_offset 0x000 $reg - set_property size 32 $reg - -set field [ipx::add_field AP_START $reg] - set_property ACCESS {read-write} $field - set_property BIT_OFFSET {0} $field - set_property BIT_WIDTH {1} $field - set_property DESCRIPTION {Control signal Register for 'ap_start'.} $field - set_property MODIFIED_WRITE_VALUE {modify} $field - -set field [ipx::add_field AP_DONE $reg] - set_property ACCESS {read-only} $field - set_property BIT_OFFSET {1} $field - set_property BIT_WIDTH {1} $field - set_property DESCRIPTION {Control signal Register for 'ap_done'.} $field - set_property READ_ACTION {modify} $field - -set field [ipx::add_field AP_IDLE $reg] - set_property ACCESS {read-only} $field - set_property BIT_OFFSET {2} $field - set_property BIT_WIDTH {1} $field - set_property DESCRIPTION {Control signal Register for 'ap_idle'.} $field - set_property READ_ACTION {modify} $field - -set field [ipx::add_field AP_READY $reg] - set_property ACCESS {read-only} $field - set_property BIT_OFFSET {3} $field - set_property BIT_WIDTH {1} $field - set_property DESCRIPTION {Control signal Register for 'ap_ready'.} $field - set_property READ_ACTION {modify} $field - -set field [ipx::add_field RESERVED_1 $reg] - set_property ACCESS {read-only} $field - set_property BIT_OFFSET {4} $field - set_property BIT_WIDTH {3} $field - set_property DESCRIPTION {Reserved. 0s on read.} $field - set_property READ_ACTION {modify} $field - -set field [ipx::add_field AUTO_RESTART $reg] - set_property ACCESS {read-write} $field - set_property BIT_OFFSET {7} $field - set_property BIT_WIDTH {1} $field - set_property DESCRIPTION {Control signal Register for 'auto_restart'.} $field - set_property MODIFIED_WRITE_VALUE {modify} $field - -set field [ipx::add_field RESERVED_2 $reg] - set_property ACCESS {read-only} $field - set_property BIT_OFFSET {8} $field - set_property BIT_WIDTH {24} $field - set_property DESCRIPTION {Reserved. 0s on read.} $field - set_property READ_ACTION {modify} $field - -set reg [::ipx::add_register "GIER" $addr_block] - set_property description "Global Interrupt Enable Register" $reg - set_property address_offset 0x004 $reg - set_property size 32 $reg - -set reg [::ipx::add_register "IP_IER" $addr_block] - set_property description "IP Interrupt Enable Register" $reg - set_property address_offset 0x008 $reg - set_property size 32 $reg - -set reg [::ipx::add_register "IP_ISR" $addr_block] - set_property description "IP Interrupt Status Register" $reg - set_property address_offset 0x00C $reg - set_property size 32 $reg - -set reg [::ipx::add_register -quiet "DEV" $addr_block] - set_property address_offset 0x010 $reg - set_property size [expr {8*8}] $reg - -set reg [::ipx::add_register -quiet "ISA" $addr_block] - set_property address_offset 0x01C $reg - set_property size [expr {8*8}] $reg - -set reg [::ipx::add_register -quiet "DCR" $addr_block] - set_property address_offset 0x028 $reg - set_property size [expr {8*8}] $reg - -set reg [::ipx::add_register -quiet "SCP" $addr_block] - set_property address_offset 0x034 $reg - set_property size [expr {8*8}] $reg - -for {set i 0} {$i < 1} {incr i} { - set reg [::ipx::add_register -quiet "MEM_$i" $addr_block] - set_property address_offset [expr {0x040 + $i * 12}] $reg - set_property size [expr {8*8}] $reg - set regparam [::ipx::add_register_parameter -quiet {ASSOCIATED_BUSIF} $reg] - set_property value m_axi_mem_$i $regparam -} - -set_property slave_memory_map_ref "s_axi_ctrl" [::ipx::get_bus_interfaces -of $core "s_axi_ctrl"] - set_property xpm_libraries {XPM_CDC XPM_MEMORY XPM_FIFO} $core set_property sdx_kernel true $core set_property sdx_kernel_type rtl $core diff --git a/hw/syn/xilinx/xrt/platforms.mk b/hw/syn/xilinx/xrt/platforms.mk new file mode 100644 index 000000000..a3584942c --- /dev/null +++ b/hw/syn/xilinx/xrt/platforms.mk @@ -0,0 +1,51 @@ +# Platform specific configurations +# Add your platform specific configurations here + +M_AXI_NUM_BANKS := 1 +M_AXI_DATA_WIDTH := 512 +M_AXI_ADDRESS_WIDTH := 32 + +ifeq ($(DEV_ARCH), zynquplus) +# zynquplus +CONFIGS += -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32 +else ifeq ($(DEV_ARCH), versal) +# versal +CONFIGS += -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32 +ifneq ($(findstring xilinx_vck5000,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_OFFSET=40'hC000000000 +endif +else +# alveo +ifneq ($(findstring xilinx_u55c,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=28 + #VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31] + #CONFIGS += -DPLATFORM_MERGED_MEMORY_INTERFACE + VPP_FLAGS += $(foreach i,$(shell seq 0 31), --connectivity.sp vortex_afu_1.m_axi_mem_$(i):HBM[$(i)]) + M_AXI_NUM_BANKS := 32 + M_AXI_ADDRESS_WIDTH := 28 +else ifneq ($(findstring xilinx_u50,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_BANKS=16 -DPLATFORM_MEMORY_ADDR_WIDTH=28 + VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] + M_AXI_NUM_BANKS := 16 + M_AXI_ADDRESS_WIDTH := 28 +else ifneq ($(findstring xilinx_u280,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_BANKS=16 -DPLATFORM_MEMORY_ADDR_WIDTH=28 + VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] + M_AXI_NUM_BANKS := 16 + M_AXI_ADDRESS_WIDTH := 28 +else ifneq ($(findstring xilinx_u250,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=34 + M_AXI_NUM_BANKS := 4 + M_AXI_ADDRESS_WIDTH := 34 +else ifneq ($(findstring xilinx_u200,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=34 + M_AXI_NUM_BANKS := 4 + M_AXI_ADDRESS_WIDTH := 34 +else + CONFIGS += -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32 + M_AXI_NUM_BANKS := 1 + M_AXI_ADDRESS_WIDTH := 32 +endif +endif + +CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=$(M_AXI_DATA_WIDTH) \ No newline at end of file diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index ce8602c18..b04f8ddb4 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -37,9 +37,9 @@ ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS))) endif ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS))) ifeq ($(XLEN),64) - CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41 + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47 else - CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25 + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31 endif endif ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS))) diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 2a06595df..0f0d67d9c 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -146,7 +146,7 @@ public: ram_ = new RAM(0, RAM_PAGE_SIZE); // calculate memory bank size - mem_bank_size_ = (1ull << PLATFORM_MEMORY_ADDR_WIDTH) * PLATFORM_MEMORY_DATA_SIZE; + mem_bank_size_ = 1ull << PLATFORM_MEMORY_ADDR_WIDTH; // reset the device this->reset(); diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 4b95d55bd..83efa688f 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -37,9 +37,9 @@ ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS))) endif ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS))) ifeq ($(XLEN),64) - CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=41 + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47 else - CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=25 + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31 endif endif ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS))) diff --git a/sim/xrtsim/vortex_afu_shim.sv b/sim/xrtsim/vortex_afu_shim.sv index 04350055b..9b3e2e8ed 100644 --- a/sim/xrtsim/vortex_afu_shim.sv +++ b/sim/xrtsim/vortex_afu_shim.sv @@ -17,7 +17,7 @@ module vortex_afu_shim #( parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH, - parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8), + parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH, parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH, parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS ) ( diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 1a63cdfdc..feb13dd1c 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -184,7 +184,7 @@ public: #endif // calculate memory bank size - mem_bank_size_ = ((1ull << PLATFORM_MEMORY_ADDR_WIDTH) / PLATFORM_MEMORY_BANKS) * PLATFORM_MEMORY_DATA_SIZE; + mem_bank_size_ = 1ull << PLATFORM_MEMORY_ADDR_WIDTH; // allocate RAM ram_ = new RAM(0, RAM_PAGE_SIZE); From 923d2bb94c1a569fad413088d2da87bfba252830 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 02:30:34 -0700 Subject: [PATCH 298/488] mark as executable --- hw/syn/altera/power_play.sh | 0 hw/syn/xilinx/xrt/gen_xml.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 hw/syn/altera/power_play.sh mode change 100644 => 100755 hw/syn/xilinx/xrt/gen_xml.py diff --git a/hw/syn/altera/power_play.sh b/hw/syn/altera/power_play.sh old mode 100644 new mode 100755 diff --git a/hw/syn/xilinx/xrt/gen_xml.py b/hw/syn/xilinx/xrt/gen_xml.py old mode 100644 new mode 100755 From a80be895baae04383941da222bc7a85247efd76f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 03:05:46 -0700 Subject: [PATCH 299/488] fixed compiler errors --- runtime/opae/Makefile | 2 +- runtime/rtlsim/Makefile | 2 +- runtime/simx/Makefile | 2 +- runtime/stub/Makefile | 2 +- runtime/xrt/Makefile | 2 +- sim/common/bitmanip.h | 82 +++++++++++++++++++------------------- tests/opencl/common.mk | 2 +- tests/regression/common.mk | 2 +- tests/unittest/common.mk | 2 +- 9 files changed, 50 insertions(+), 48 deletions(-) diff --git a/runtime/opae/Makefile b/runtime/opae/Makefile index b002375d9..04545c887 100644 --- a/runtime/opae/Makefile +++ b/runtime/opae/Makefile @@ -9,7 +9,7 @@ SYN_DIR := $(HW_DIR)/syn/altera/opae SRC_DIR := $(VORTEX_HOME)/runtime/opae -CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(DESTDIR) -I$(SIM_DIR)/common CXXFLAGS += -DXLEN_$(XLEN) diff --git a/runtime/rtlsim/Makefile b/runtime/rtlsim/Makefile index f6adbf8c8..a7b15d9ac 100644 --- a/runtime/rtlsim/Makefile +++ b/runtime/rtlsim/Makefile @@ -4,7 +4,7 @@ DESTDIR ?= $(CURDIR)/.. SRC_DIR := $(VORTEX_HOME)/runtime/rtlsim -CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(SIM_DIR)/rtlsim -I$(COMMON_DIR) -I$(SIM_DIR)/common CXXFLAGS += -DXLEN_$(XLEN) diff --git a/runtime/simx/Makefile b/runtime/simx/Makefile index c20e33b53..8eb0e6b44 100644 --- a/runtime/simx/Makefile +++ b/runtime/simx/Makefile @@ -4,7 +4,7 @@ DESTDIR ?= $(CURDIR)/.. SRC_DIR := $(VORTEX_HOME)/runtime/simx -CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors CXXFLAGS += -fPIC -Wno-maybe-uninitialized CXXFLAGS += -I$(INC_DIR) -I../common -I$(ROOT_DIR)/hw -I$(SIM_DIR)/simx -I$(COMMON_DIR) -I$(SIM_DIR)/common CXXFLAGS += $(CONFIGS) diff --git a/runtime/stub/Makefile b/runtime/stub/Makefile index ae6e27ed1..8315bd8af 100644 --- a/runtime/stub/Makefile +++ b/runtime/stub/Makefile @@ -4,7 +4,7 @@ DESTDIR ?= $(CURDIR)/.. SRC_DIR := $(VORTEX_HOME)/runtime/stub -CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(SIM_DIR)/common CXXFLAGS += -fPIC diff --git a/runtime/xrt/Makefile b/runtime/xrt/Makefile index 7fadb43fd..f255002f2 100644 --- a/runtime/xrt/Makefile +++ b/runtime/xrt/Makefile @@ -6,7 +6,7 @@ DESTDIR ?= $(CURDIR)/.. SRC_DIR := $(VORTEX_HOME)/runtime/xrt -CXXFLAGS += -std=c++14 -Wall -Wextra -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(XILINX_XRT)/include -I$(SIM_DIR)/common CXXFLAGS += -DXLEN_$(XLEN) CXXFLAGS += -fPIC diff --git a/sim/common/bitmanip.h b/sim/common/bitmanip.h index 3c5858043..89247b89c 100644 --- a/sim/common/bitmanip.h +++ b/sim/common/bitmanip.h @@ -16,60 +16,62 @@ #include #include -constexpr uint32_t count_leading_zeros(uint32_t value) { - return value ? __builtin_clz(value) : 32; +template +constexpr uint32_t count_leading_zeros(T value) { + static_assert(std::is_integral::value, "invalid data type"); + if constexpr (sizeof(T) > 4) { + return value ? __builtin_clzll(value) : 64; + } else { + return value ? __builtin_clz(value) : 32; + } } -constexpr uint32_t count_leading_zeros(uint64_t value) { - return value ? __builtin_clzll(value) : 64; +template +constexpr uint32_t count_trailing_zeros(T value) { + static_assert(std::is_integral::value, "invalid data type"); + if constexpr (sizeof(T) > 4) { + return value ? __builtin_ctzll(value) : 64; + } else { + return value ? __builtin_ctz(value) : 32; + } } -constexpr uint32_t count_trailing_zeros(uint32_t value) { - return value ? __builtin_ctz(value) : 32; -} - -constexpr uint32_t count_trailing_zeros(uint64_t value) { - return value ? __builtin_ctzll(value) : 64; -} - -constexpr bool ispow2(uint32_t value) { +template +constexpr bool ispow2(T value) { + static_assert(std::is_integral::value, "invalid data type"); return value && !(value & (value - 1)); } -constexpr bool ispow2(uint64_t value) { - return value && !(value & (value - 1)); +template +constexpr uint32_t log2ceil(T value) { + static_assert(std::is_integral::value, "invalid data type"); + return (sizeof(T) * 8) - count_leading_zeros(value - 1); } -constexpr uint32_t log2ceil(uint32_t value) { - return 32 - count_leading_zeros(value - 1); -} - -constexpr uint32_t log2ceil(uint64_t value) { - return 64 - count_leading_zeros(value - 1); -} - -inline unsigned log2up(uint32_t value) { +template +inline unsigned log2up(T value) { + static_assert(std::is_integral::value, "invalid data type"); return std::max(1, log2ceil(value)); } -inline unsigned log2up(uint64_t value) { - return std::max(1, log2ceil(value)); +template +constexpr unsigned log2floor(T value) { + static_assert(std::is_integral::value, "invalid data type"); + if constexpr (sizeof(T) > 4) { + return 63 - count_leading_zeros(value); + } else { + return 31 - count_leading_zeros(value); + } } -constexpr unsigned log2floor(uint32_t value) { - return 31 - count_leading_zeros(value); -} - -constexpr unsigned log2floor(uint64_t value) { - return 63 - count_leading_zeros(value); -} - -constexpr unsigned ceil2(uint32_t value) { - return 32 - count_leading_zeros(value); -} - -constexpr unsigned ceil2(uint64_t value) { - return 64 - count_leading_zeros(value); +template +constexpr unsigned ceil2(T value) { + static_assert(std::is_integral::value, "invalid data type"); + if constexpr (sizeof(T) > 4) { + return 64 - count_leading_zeros(value); + } else { + return 32 - count_leading_zeros(value); + } } inline uint64_t bit_clr(uint64_t bits, uint32_t index) { diff --git a/tests/opencl/common.mk b/tests/opencl/common.mk index 53903dd41..36d2956cb 100644 --- a/tests/opencl/common.mk +++ b/tests/opencl/common.mk @@ -40,7 +40,7 @@ VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T$(VORTEX_HOME)/kernel/scripts/link$(X VX_BINTOOL += OBJCOPY=$(LLVM_VORTEX)/bin/llvm-objcopy $(VORTEX_HOME)/kernel/scripts/vxbin.py -CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors CXXFLAGS += -Wno-deprecated-declarations -Wno-unused-parameter -Wno-narrowing CXXFLAGS += -pthread CXXFLAGS += -I$(POCL_PATH)/include diff --git a/tests/regression/common.mk b/tests/regression/common.mk index 0f97d4979..142d5cb2e 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -50,7 +50,7 @@ VX_LIBS += $(LIBCRT_VORTEX)/lib/baremetal/libclang_rt.builtins-riscv$(XLEN).a VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(VORTEX_KN_PATH)/libvortex.a $(VX_LIBS) -CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I$(VORTEX_HOME)/runtime/include -I$(ROOT_DIR)/hw LDFLAGS += -L$(VORTEX_RT_PATH) -lvortex diff --git a/tests/unittest/common.mk b/tests/unittest/common.mk index 9c3e384be..c04db4d11 100644 --- a/tests/unittest/common.mk +++ b/tests/unittest/common.mk @@ -1,7 +1,7 @@ ROOT_DIR := $(realpath ../../..) -CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I$(VORTEX_HOME)/sim/common # Debugging From 828b8827e796b731ce883c092734b45f752ca24e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 03:36:35 -0700 Subject: [PATCH 300/488] build error fix --- tests/opencl/bfs/CLHelper.h | 24 ++++++++++++------------ tests/opencl/bfs/main.cc | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/opencl/bfs/CLHelper.h b/tests/opencl/bfs/CLHelper.h index d485cbc36..e4106845b 100755 --- a/tests/opencl/bfs/CLHelper.h +++ b/tests/opencl/bfs/CLHelper.h @@ -431,7 +431,7 @@ void _clRelease() { } //-------------------------------------------------------- //--cambine:create buffer and then copy data from host to device -cl_mem _clCreateAndCpyMem(int size, void *h_mem_source) throw(string) { +cl_mem _clCreateAndCpyMem(int size, void *h_mem_source) { cl_mem d_mem; d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size, @@ -445,7 +445,7 @@ cl_mem _clCreateAndCpyMem(int size, void *h_mem_source) throw(string) { //------------------------------------------------------- //--cambine: create read only buffer for devices //--date: 17/01/2011 -cl_mem _clMallocRW(int size, void *h_mem_ptr) throw(string) { +cl_mem _clMallocRW(int size, void *h_mem_ptr) { cl_mem d_mem; d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size, @@ -459,7 +459,7 @@ cl_mem _clMallocRW(int size, void *h_mem_ptr) throw(string) { //------------------------------------------------------- //--cambine: create read and write buffer for devices //--date: 17/01/2011 -cl_mem _clMalloc(int size, void *h_mem_ptr) throw(string) { +cl_mem _clMalloc(int size, void *h_mem_ptr) { cl_mem d_mem; d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size, @@ -474,7 +474,7 @@ cl_mem _clMalloc(int size, void *h_mem_ptr) throw(string) { //------------------------------------------------------- //--cambine: transfer data from host to device //--date: 17/01/2011 -void _clMemcpyH2D(cl_mem d_mem, int size, const void *h_mem_ptr) throw(string) { +void _clMemcpyH2D(cl_mem d_mem, int size, const void *h_mem_ptr) { oclHandles.cl_status = clEnqueueWriteBuffer( oclHandles.queue, d_mem, CL_TRUE, 0, size, h_mem_ptr, 0, NULL, NULL); #ifdef ERRMSG @@ -485,7 +485,7 @@ void _clMemcpyH2D(cl_mem d_mem, int size, const void *h_mem_ptr) throw(string) { //-------------------------------------------------------- //--cambine:create buffer and then copy data from host to device with pinned // memory -cl_mem _clCreateAndCpyPinnedMem(int size, float *h_mem_source) throw(string) { +cl_mem _clCreateAndCpyPinnedMem(int size, float *h_mem_source) { cl_mem d_mem, d_mem_pinned; float *h_mem_pinned = NULL; d_mem_pinned = clCreateBuffer(oclHandles.context, @@ -528,7 +528,7 @@ cl_mem _clCreateAndCpyPinnedMem(int size, float *h_mem_source) throw(string) { //-------------------------------------------------------- //--cambine:create write only buffer on device -cl_mem _clMallocWO(int size) throw(string) { +cl_mem _clMallocWO(int size) { cl_mem d_mem; d_mem = clCreateBuffer(oclHandles.context, CL_MEM_WRITE_ONLY, size, 0, &oclHandles.cl_status); @@ -541,7 +541,7 @@ cl_mem _clMallocWO(int size) throw(string) { //-------------------------------------------------------- // transfer data from device to host -void _clMemcpyD2H(cl_mem d_mem, int size, void *h_mem) throw(string) { +void _clMemcpyD2H(cl_mem d_mem, int size, void *h_mem) { oclHandles.cl_status = clEnqueueReadBuffer(oclHandles.queue, d_mem, CL_TRUE, 0, size, h_mem, 0, 0, 0); #ifdef ERRMSG @@ -580,7 +580,7 @@ void _clMemcpyD2H(cl_mem d_mem, int size, void *h_mem) throw(string) { //-------------------------------------------------------- // set kernel arguments void _clSetArgs(int kernel_id, int arg_idx, void *d_mem, - int size = 0) throw(string) { + int size = 0) { if (!size) { oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, sizeof(d_mem), &d_mem); @@ -657,7 +657,7 @@ void _clSetArgs(int kernel_id, int arg_idx, void *d_mem, #endif } } -void _clFinish() throw(string) { +void _clFinish() { oclHandles.cl_status = clFinish(oclHandles.queue); #ifdef ERRMSG oclHandles.error_str = "excpetion in _clFinish"; @@ -683,7 +683,7 @@ void _clFinish() throw(string) { //-------------------------------------------------------- //--cambine:enqueue kernel void _clInvokeKernel(int kernel_id, int work_items, - int work_group_size) throw(string) { + int work_group_size) { cl_uint work_dim = WORK_DIM; //cl_event e[1]; if (work_items % work_group_size != 0) // process situations that work_items @@ -755,7 +755,7 @@ void _clInvokeKernel(int kernel_id, int work_items, // #endif } void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, - int group_y) throw(string) { + int group_y) { cl_uint work_dim = WORK_DIM; size_t local_work_size[] = {group_x, group_y}; size_t global_work_size[] = {range_x, range_y}; @@ -832,7 +832,7 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, //-------------------------------------------------------- // release OpenCL objects -void _clFree(cl_mem ob) throw(string) { +void _clFree(cl_mem ob) { if (ob != NULL) oclHandles.cl_status = clReleaseMemObject(ob); #ifdef ERRMSG diff --git a/tests/opencl/bfs/main.cc b/tests/opencl/bfs/main.cc index 537950603..cd55f5b3f 100755 --- a/tests/opencl/bfs/main.cc +++ b/tests/opencl/bfs/main.cc @@ -72,7 +72,7 @@ void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, char *h_graph_visited, - int *h_cost) throw(std::string) { + int *h_cost) { // int number_elements = height*width; char h_over; From 29ea3041c48c332ba6f2aa364f111a95223d8de5 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 03:52:03 -0700 Subject: [PATCH 301/488] build fix --- hw/rtl/cache/VX_cache_bank.sv | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 59b4be871..054b7c589 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -273,18 +273,20 @@ module VX_cache_bank #( assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) : (replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr)); - if (WRITE_ENABLE) begin : g_data_sel_lo - assign data_sel[`CS_WORD_WIDTH-1:0] = replay_valid ? replay_data : (mem_rsp_valid ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : core_req_data); - end else begin : g_data_sel_lo_ro - assign data_sel[`CS_WORD_WIDTH-1:0] = mem_rsp_data[`CS_WORD_WIDTH-1:0]; + if (WRITE_ENABLE) begin : g_data_sel + for (genvar i = 0; i < `CS_LINE_WIDTH; ++i) begin : g_i + if (i < `CS_WORD_WIDTH) begin : g_lo + assign data_sel[i] = replay_valid ? replay_data[i] : (mem_rsp_valid ? mem_rsp_data[i] : core_req_data[i]); + end else begin : g_hi + assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel + end + end + end else begin : g_data_sel_ro + assign data_sel = mem_rsp_data; `UNUSED_VAR (core_req_data) `UNUSED_VAR (replay_data) end - for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin : g_data_sel_hi - assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel - end - if (UUID_WIDTH != 0) begin : g_req_uuid_sel assign req_uuid_sel = tag_sel[TAG_WIDTH-1 -: UUID_WIDTH]; end else begin : g_req_uuid_sel_0 From 406583c0bdc091a4eb25f067a1b81340f49cbed1 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 04:00:23 -0700 Subject: [PATCH 302/488] build fix --- ci/regression.sh.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index c45e8c3ff..6590da3d6 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -273,7 +273,7 @@ config2() CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8 # test single-bank DRAM - CONFIGS="-DPLATFORM_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=${XLEN}" ./ci/blackbox.sh --driver=opae --app=mstress # test 33-bit DRAM address CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress From e5e9a5c2e9fef9390c43f1d0d3a852e86b08956e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 04:03:04 -0700 Subject: [PATCH 303/488] build fix --- ci/regression.sh.in | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 6590da3d6..cead14925 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -273,7 +273,13 @@ config2() CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8 # test single-bank DRAM - CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=${XLEN}" ./ci/blackbox.sh --driver=opae --app=mstress + if [ "$XLEN" == "64" ]; then + CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=xrt --app=mstress + else + CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=xrt --app=mstress + fi # test 33-bit DRAM address CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress From 030071571d45ada0b68fb983f6e12584f64032e1 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 04:30:28 -0700 Subject: [PATCH 304/488] test memory bank interleaving --- ci/regression.sh.in | 4 ++++ hw/rtl/afu/opae/vortex_afu.sv | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index cead14925..a0506f117 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -285,6 +285,10 @@ config2() CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=xrt --app=mstress + # test DRAM banks interleaving + CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=1" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress + echo "configuration-2 tests done!" } diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 57b03cb21..435455ae0 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -18,6 +18,10 @@ `endif `include "VX_define.vh" +`ifndef PLATFORM_MEMORY_INTERLEAVE +`define PLATFORM_MEMORY_INTERLEAVE 1 +`endif + module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_gpu_pkg::*; #( parameter NUM_LOCAL_MEM_BANKS = 2 ) ( @@ -604,7 +608,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .TAG_WIDTH (AVS_REQ_TAGW + 1), .RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE), .AVS_ADDR_WIDTH($bits(t_local_mem_addr)), - .BANK_INTERLEAVE (1), + .BANK_INTERLEAVE (`PLATFORM_MEMORY_INTERLEAVE), .REQ_OUT_BUF (2), .RSP_OUT_BUF (0) ) avs_adapter ( From 818522f7e401a7251500036661b0209f7a6b7617 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 05:57:08 -0700 Subject: [PATCH 305/488] CI scripts update --- .github/workflows/ci.yml | 3 +-- ci/regression.sh.in | 6 ++++-- runtime/xrt/vortex.cpp | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 64317337b..1676aea4c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -117,7 +117,7 @@ jobs: strategy: fail-fast: false matrix: - name: [regression, opencl, cache, config1, config2, debug, stress] + name: [regression, opencl, cache, config1, config2, debug, stress, synthesis] xlen: [32, 64] steps: @@ -161,7 +161,6 @@ jobs: ./ci/regression.sh --unittest ./ci/regression.sh --isa ./ci/regression.sh --kernel - ./ci/regression.sh --synthesis ./ci/regression.sh --regression else ./ci/regression.sh --${{ matrix.name }} diff --git a/ci/regression.sh.in b/ci/regression.sh.in index a0506f117..ea9aa2560 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -313,8 +313,10 @@ debug() test_csv_trace - CONFIGS="-O0 -DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" - CONFIGS="-O0 -DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=xrt --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" + CONFIGS="-O0" ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" + CONFIGS="-O0" ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" + CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" + CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=xrt --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" ./ci/blackbox.sh --driver=opae --scope --app=demo --args="-n1" ./ci/blackbox.sh --driver=xrt --scope --app=demo --args="-n1" diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index 9385457f5..d542e72fe 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -191,8 +191,6 @@ public: xrtDevice_ = xrtDevice; xrtKernel_ = xrtKernel; - printf("info: device name=%s.\n", device_name.c_str()); - CHECK_ERR(this->write_register(MMIO_CTL_ADDR, CTL_AP_RESET), { return err; }); @@ -223,6 +221,8 @@ public: global_mem_size_ = num_banks * bank_size; + printf("info: device name=%s, memory_capacity=0x%lx bytes, memory_banks=%ld.\n", device_name.c_str(), global_mem_size_, num_banks); + #ifdef BANK_INTERLEAVE xrtBuffers_.reserve(num_banks); for (uint32_t i = 0; i < num_banks; ++i) { From 9a6dbdf1a97c77f4f95a6223e9f6014a561bda45 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 08:56:57 -0700 Subject: [PATCH 306/488] xrtsim addressing fix --- hw/rtl/Vortex_axi.sv | 16 ++++++++-------- hw/rtl/afu/opae/vortex_afu.sv | 6 +++--- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 5 ++--- hw/rtl/afu/xrt/VX_afu_wrap.sv | 20 ++++++++++++-------- hw/rtl/afu/xrt/vortex_afu.v | 3 +-- hw/rtl/libs/VX_avs_adapter.sv | 32 ++++++++++++++++++-------------- hw/rtl/libs/VX_axi_adapter.sv | 34 ++++++++++++++++++---------------- sim/xrtsim/vortex_afu_shim.sv | 4 ++-- sim/xrtsim/xrt_sim.cpp | 14 ++++---------- 9 files changed, 68 insertions(+), 66 deletions(-) diff --git a/hw/rtl/Vortex_axi.sv b/hw/rtl/Vortex_axi.sv index 17d5d660e..758206396 100644 --- a/hw/rtl/Vortex_axi.sv +++ b/hw/rtl/Vortex_axi.sv @@ -84,7 +84,7 @@ module Vortex_axi import VX_gpu_pkg::*; #( ); localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH; localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH + `CLOG2(`VX_MEM_DATA_WIDTH) - `CLOG2(AXI_DATA_WIDTH); - + `STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag width: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH)) wire mem_req_valid; @@ -182,13 +182,13 @@ module Vortex_axi import VX_gpu_pkg::*; #( ); VX_axi_adapter #( - .DATA_WIDTH (AXI_DATA_WIDTH), - .ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH), - .TAG_WIDTH (AXI_TID_WIDTH), - .NUM_BANKS (AXI_NUM_BANKS), - .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), - .BANK_INTERLEAVE (0), - .RSP_OUT_BUF((AXI_NUM_BANKS > 1) ? 2 : 0) + .DATA_WIDTH (AXI_DATA_WIDTH), + .ADDR_WIDTH_IN (VX_MEM_ADDR_A_WIDTH), + .ADDR_WIDTH_OUT (AXI_ADDR_WIDTH), + .TAG_WIDTH (AXI_TID_WIDTH), + .NUM_BANKS (AXI_NUM_BANKS), + .BANK_INTERLEAVE(0), + .RSP_OUT_BUF ((AXI_NUM_BANKS > 1) ? 2 : 0) ) axi_adapter ( .clk (clk), .reset (reset), diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 435455ae0..1440b2808 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -602,13 +602,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ VX_avs_adapter #( .DATA_WIDTH (LMEM_DATA_WIDTH), - .ADDR_WIDTH (LMEM_ADDR_WIDTH), + .ADDR_WIDTH_IN (LMEM_ADDR_WIDTH), + .ADDR_WIDTH_OUT($bits(t_local_mem_addr)), .BURST_WIDTH (LMEM_BURST_CTRW), .NUM_BANKS (NUM_LOCAL_MEM_BANKS), .TAG_WIDTH (AVS_REQ_TAGW + 1), .RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE), - .AVS_ADDR_WIDTH($bits(t_local_mem_addr)), - .BANK_INTERLEAVE (`PLATFORM_MEMORY_INTERLEAVE), + .BANK_INTERLEAVE(`PLATFORM_MEMORY_INTERLEAVE), .REQ_OUT_BUF (2), .RSP_OUT_BUF (0) ) avs_adapter ( diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index 1db8cc4e2..382b31f8a 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -15,8 +15,7 @@ module VX_afu_ctrl #( parameter S_AXI_ADDR_WIDTH = 8, - parameter S_AXI_DATA_WIDTH = 32, - parameter M_AXI_ADDR_WIDTH = 25 + parameter S_AXI_DATA_WIDTH = 32 ) ( // axi4 lite slave signals input wire clk, @@ -135,7 +134,7 @@ module VX_afu_ctrl #( // device caps wire [63:0] dev_caps = {8'b0, - 5'(M_AXI_ADDR_WIDTH-16), + 5'(`PLATFORM_MEMORY_ADDR_WIDTH-16), 3'(`CLOG2(`PLATFORM_MEMORY_BANKS)), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 16'(`NUM_CORES * `NUM_CLUSTERS), diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 8530ee97a..235247177 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -17,8 +17,8 @@ module VX_afu_wrap #( parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, parameter C_M_AXI_MEM_ID_WIDTH = 32, - parameter C_M_AXI_MEM_ADDR_WIDTH = 25, parameter C_M_AXI_MEM_DATA_WIDTH = 512, + parameter C_M_AXI_MEM_ADDR_WIDTH = 25, parameter C_M_AXI_MEM_NUM_BANKS = 2 ) ( // System signals @@ -52,6 +52,11 @@ module VX_afu_wrap #( output wire interrupt ); +`ifdef PLATFORM_MERGED_MEMORY_INTERFACE + localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_BANKS); +`else + localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH; +`endif localparam STATE_IDLE = 0; localparam STATE_RUN = 1; @@ -187,8 +192,7 @@ module VX_afu_wrap #( VX_afu_ctrl #( .S_AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH), - .S_AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), - .M_AXI_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH) + .S_AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH) ) afu_ctrl ( .clk (clk), .reset (reset), @@ -228,19 +232,19 @@ module VX_afu_wrap #( .dcr_wr_data (dcr_wr_data) ); - wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS]; - wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS]; + wire [M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS]; + wire [M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS]; for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_addressing - assign m_axi_mem_awaddr_a[i] = m_axi_mem_awaddr_u[i] + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET); - assign m_axi_mem_araddr_a[i] = m_axi_mem_araddr_u[i] + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET); + assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET); + assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET); end `SCOPE_IO_SWITCH (2) Vortex_axi #( .AXI_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH), - .AXI_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), + .AXI_ADDR_WIDTH (M_AXI_MEM_ADDR_WIDTH), .AXI_TID_WIDTH (C_M_AXI_MEM_ID_WIDTH), .AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) ) vortex_axi ( diff --git a/hw/rtl/afu/xrt/vortex_afu.v b/hw/rtl/afu/xrt/vortex_afu.v index 918474d52..afda57f72 100644 --- a/hw/rtl/afu/xrt/vortex_afu.v +++ b/hw/rtl/afu/xrt/vortex_afu.v @@ -18,11 +18,10 @@ module vortex_afu #( parameter C_S_AXI_CTRL_DATA_WIDTH = 32, parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH, parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH, -`ifdef SYNTHESIS parameter C_M_AXI_MEM_ADDR_WIDTH = 64, +`ifdef PLATFORM_MERGED_MEMORY_INTERFACE parameter C_M_AXI_MEM_NUM_BANKS = 1 `else - parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH, parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS `endif ) ( diff --git a/hw/rtl/libs/VX_avs_adapter.sv b/hw/rtl/libs/VX_avs_adapter.sv index fe9a9a53b..58144e7fe 100644 --- a/hw/rtl/libs/VX_avs_adapter.sv +++ b/hw/rtl/libs/VX_avs_adapter.sv @@ -16,13 +16,13 @@ `TRACING_OFF module VX_avs_adapter #( parameter DATA_WIDTH = 1, - parameter ADDR_WIDTH = 1, + parameter ADDR_WIDTH_IN = 1, + parameter ADDR_WIDTH_OUT= 32, parameter BURST_WIDTH = 1, parameter NUM_BANKS = 1, parameter TAG_WIDTH = 1, parameter RD_QUEUE_SIZE = 1, parameter BANK_INTERLEAVE= 0, - parameter AVS_ADDR_WIDTH = ADDR_WIDTH - `CLOG2(NUM_BANKS), parameter REQ_OUT_BUF = 0, parameter RSP_OUT_BUF = 0 ) ( @@ -33,7 +33,7 @@ module VX_avs_adapter #( input wire mem_req_valid, input wire mem_req_rw, input wire [DATA_WIDTH/8-1:0] mem_req_byteen, - input wire [ADDR_WIDTH-1:0] mem_req_addr, + input wire [ADDR_WIDTH_IN-1:0] mem_req_addr, input wire [DATA_WIDTH-1:0] mem_req_data, input wire [TAG_WIDTH-1:0] mem_req_tag, output wire mem_req_ready, @@ -47,7 +47,7 @@ module VX_avs_adapter #( // AVS bus output wire [DATA_WIDTH-1:0] avs_writedata [NUM_BANKS], input wire [DATA_WIDTH-1:0] avs_readdata [NUM_BANKS], - output wire [AVS_ADDR_WIDTH-1:0] avs_address [NUM_BANKS], + output wire [ADDR_WIDTH_OUT-1:0] avs_address [NUM_BANKS], input wire avs_waitrequest [NUM_BANKS], output wire avs_write [NUM_BANKS], output wire avs_read [NUM_BANKS], @@ -58,30 +58,34 @@ module VX_avs_adapter #( localparam DATA_SIZE = DATA_WIDTH/8; localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); - localparam BANK_OFFSETW = ADDR_WIDTH - BANK_SEL_BITS; + localparam DST_ADDR_WDITH = ADDR_WIDTH_OUT + BANK_SEL_BITS; // to input space + localparam BANK_OFFSETW = DST_ADDR_WDITH - BANK_SEL_BITS; - `STATIC_ASSERT ((AVS_ADDR_WIDTH >= BANK_OFFSETW), ("invalid parameter")) + `STATIC_ASSERT ((DST_ADDR_WDITH >= ADDR_WIDTH_IN), ("invalid address width: current=%0d, expected=%0d", DST_ADDR_WDITH, ADDR_WIDTH_IN)) // Requests handling ////////////////////////////////////////////////////// wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out; wire [NUM_BANKS-1:0] req_queue_going_full; - wire [BANK_SEL_WIDTH-1:0] req_bank_sel; - wire [BANK_OFFSETW-1:0] req_bank_off; wire [NUM_BANKS-1:0] bank_req_ready; + wire [BANK_OFFSETW-1:0] req_bank_off; + wire [BANK_SEL_WIDTH-1:0] req_bank_sel; + + wire [DST_ADDR_WDITH-1:0] mem_req_addr_out = DST_ADDR_WDITH'(mem_req_addr); + if (NUM_BANKS > 1) begin : g_bank_sel if (BANK_INTERLEAVE) begin : g_interleave - assign req_bank_sel = mem_req_addr[BANK_SEL_BITS-1:0]; - assign req_bank_off = mem_req_addr[BANK_SEL_BITS +: BANK_OFFSETW]; + assign req_bank_sel = mem_req_addr_out[BANK_SEL_BITS-1:0]; + assign req_bank_off = mem_req_addr_out[BANK_SEL_BITS +: BANK_OFFSETW]; end else begin : g_no_interleave - assign req_bank_sel = mem_req_addr[BANK_OFFSETW +: BANK_SEL_BITS]; - assign req_bank_off = mem_req_addr[BANK_OFFSETW-1:0]; + assign req_bank_sel = mem_req_addr_out[BANK_OFFSETW +: BANK_SEL_BITS]; + assign req_bank_off = mem_req_addr_out[BANK_OFFSETW-1:0]; end end else begin : g_no_bank_sel assign req_bank_sel = '0; - assign req_bank_off = mem_req_addr; + assign req_bank_off = mem_req_addr_out; end for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_queue_push @@ -151,7 +155,7 @@ module VX_avs_adapter #( assign avs_read[i] = valid_out && ~rw_out; assign avs_write[i] = valid_out && rw_out; - assign avs_address[i] = AVS_ADDR_WIDTH'(addr_out); + assign avs_address[i] = ADDR_WIDTH_OUT'(addr_out); assign avs_byteenable[i] = byteen_out; assign avs_writedata[i] = data_out; assign avs_burstcount[i] = BURST_WIDTH'(1); diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index bdd699053..a21b8554f 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -16,10 +16,10 @@ `TRACING_OFF module VX_axi_adapter #( parameter DATA_WIDTH = 512, - parameter ADDR_WIDTH = 32, + parameter ADDR_WIDTH_IN = 1, + parameter ADDR_WIDTH_OUT = 32, parameter TAG_WIDTH = 8, parameter NUM_BANKS = 1, - parameter AXI_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)), parameter BANK_INTERLEAVE= 0, parameter RSP_OUT_BUF = 0 ) ( @@ -30,7 +30,7 @@ module VX_axi_adapter #( input wire mem_req_valid, input wire mem_req_rw, input wire [DATA_WIDTH/8-1:0] mem_req_byteen, - input wire [ADDR_WIDTH-1:0] mem_req_addr, + input wire [ADDR_WIDTH_IN-1:0] mem_req_addr, input wire [DATA_WIDTH-1:0] mem_req_data, input wire [TAG_WIDTH-1:0] mem_req_tag, output wire mem_req_ready, @@ -44,7 +44,7 @@ module VX_axi_adapter #( // AXI write request address channel output wire m_axi_awvalid [NUM_BANKS], input wire m_axi_awready [NUM_BANKS], - output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS], + output wire [ADDR_WIDTH_OUT-1:0] m_axi_awaddr [NUM_BANKS], output wire [TAG_WIDTH-1:0] m_axi_awid [NUM_BANKS], output wire [7:0] m_axi_awlen [NUM_BANKS], output wire [2:0] m_axi_awsize [NUM_BANKS], @@ -71,7 +71,7 @@ module VX_axi_adapter #( // AXI read address channel output wire m_axi_arvalid [NUM_BANKS], input wire m_axi_arready [NUM_BANKS], - output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr [NUM_BANKS], + output wire [ADDR_WIDTH_OUT-1:0] m_axi_araddr [NUM_BANKS], output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS], output wire [7:0] m_axi_arlen [NUM_BANKS], output wire [2:0] m_axi_arsize [NUM_BANKS], @@ -93,25 +93,27 @@ module VX_axi_adapter #( localparam DATA_SIZE = `CLOG2(DATA_WIDTH/8); localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); - localparam BANK_OFFSETW = ADDR_WIDTH - BANK_SEL_BITS; - localparam DST_ADDR_WDITH = BANK_OFFSETW + `CLOG2(DATA_WIDTH/8); + localparam DST_ADDR_WDITH = ADDR_WIDTH_OUT + BANK_SEL_BITS - `CLOG2(DATA_WIDTH/8); // to input space + localparam BANK_OFFSETW = DST_ADDR_WDITH - BANK_SEL_BITS; - `STATIC_ASSERT ((AXI_ADDR_WIDTH >= DST_ADDR_WDITH), ("invalid tag width: current=%0d, expected=%0d", AXI_ADDR_WIDTH, DST_ADDR_WDITH)) + `STATIC_ASSERT ((DST_ADDR_WDITH >= ADDR_WIDTH_IN), ("invalid address width: current=%0d, expected=%0d", DST_ADDR_WDITH, ADDR_WIDTH_IN)) - wire [BANK_SEL_WIDTH-1:0] req_bank_sel; wire [BANK_OFFSETW-1:0] req_bank_off; + wire [BANK_SEL_WIDTH-1:0] req_bank_sel; + + wire [DST_ADDR_WDITH-1:0] mem_req_addr_out = DST_ADDR_WDITH'(mem_req_addr); if (NUM_BANKS > 1) begin : g_bank_sel if (BANK_INTERLEAVE) begin : g_interleave - assign req_bank_sel = mem_req_addr[BANK_SEL_BITS-1:0]; - assign req_bank_off = mem_req_addr[BANK_SEL_BITS +: BANK_OFFSETW]; + assign req_bank_sel = mem_req_addr_out[BANK_SEL_BITS-1:0]; + assign req_bank_off = mem_req_addr_out[BANK_SEL_BITS +: BANK_OFFSETW]; end else begin : g_no_interleave - assign req_bank_sel = mem_req_addr[BANK_OFFSETW +: BANK_SEL_BITS]; - assign req_bank_off = mem_req_addr[BANK_OFFSETW-1:0]; + assign req_bank_sel = mem_req_addr_out[BANK_OFFSETW +: BANK_SEL_BITS]; + assign req_bank_off = mem_req_addr_out[BANK_OFFSETW-1:0]; end end else begin : g_no_bank_sel assign req_bank_sel = '0; - assign req_bank_off = mem_req_addr; + assign req_bank_off = mem_req_addr_out; end wire mem_req_fire = mem_req_valid && mem_req_ready; @@ -148,7 +150,7 @@ module VX_axi_adapter #( // AXI write request address channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i]; - assign m_axi_awaddr[i] = AXI_ADDR_WIDTH'(req_bank_off); + assign m_axi_awaddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8); assign m_axi_awid[i] = mem_req_tag; assign m_axi_awlen[i] = 8'b00000000; assign m_axi_awsize[i] = 3'(DATA_SIZE); @@ -180,7 +182,7 @@ module VX_axi_adapter #( // AXI read request channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i); - assign m_axi_araddr[i] = AXI_ADDR_WIDTH'(req_bank_off); + assign m_axi_araddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8); assign m_axi_arid[i] = mem_req_tag; assign m_axi_arlen[i] = 8'b00000000; assign m_axi_arsize[i] = 3'(DATA_SIZE); diff --git a/sim/xrtsim/vortex_afu_shim.sv b/sim/xrtsim/vortex_afu_shim.sv index 9b3e2e8ed..f94617f1e 100644 --- a/sim/xrtsim/vortex_afu_shim.sv +++ b/sim/xrtsim/vortex_afu_shim.sv @@ -17,8 +17,8 @@ module vortex_afu_shim #( parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH, - parameter C_M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH, parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH, + parameter C_M_AXI_MEM_ADDR_WIDTH = 64, parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS ) ( // System signals @@ -54,8 +54,8 @@ module vortex_afu_shim #( .C_S_AXI_CTRL_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH), .C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), .C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH), - .C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), .C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH), + .C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), .C_M_AXI_MEM_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) ) afu_wrap ( .clk (ap_clk), diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index feb13dd1c..4ee15baa1 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -61,12 +61,6 @@ #define CPU_GPU_LATENCY 200 -#if PLATFORM_MEMORY_ADDR_WIDTH > 32 - typedef QData Vl_m_addr_t; -#else - typedef IData Vl_m_addr_t; -#endif - #if PLATFORM_MEMORY_DATA_WIDTH > 64 typedef VlWide<(PLATFORM_MEMORY_DATA_WIDTH/32)> Vl_m_data_t; #else @@ -482,7 +476,7 @@ private: if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) { auto mem_req = new mem_req_t(); mem_req->tag = *m_axi_mem_[i].arid; - mem_req->addr = i * mem_bank_size_ + uint64_t(*m_axi_mem_[i].araddr) * PLATFORM_MEMORY_DATA_SIZE; + mem_req->addr = i * mem_bank_size_ + uint64_t(*m_axi_mem_[i].araddr); ram_->read(mem_req->data.data(), mem_req->addr, PLATFORM_MEMORY_DATA_SIZE); mem_req->write = false; mem_req->ready = false; @@ -511,7 +505,7 @@ private: auto byteen = *m_axi_mem_[i].wstrb; auto data = (uint8_t*)m_axi_mem_[i].wdata->data(); - auto byte_addr = i * mem_bank_size_ + m_axi_states_[i].write_req_addr * PLATFORM_MEMORY_DATA_SIZE; + auto byte_addr = i * mem_bank_size_ + m_axi_states_[i].write_req_addr; for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) { if ((byteen >> i) & 0x1) { @@ -562,7 +556,7 @@ private: typedef struct { CData* awvalid; CData* awready; - Vl_m_addr_t* awaddr; + QData* awaddr; IData* awid; CData* awlen; CData* wvalid; @@ -572,7 +566,7 @@ private: CData* wlast; CData* arvalid; CData* arready; - Vl_m_addr_t* araddr; + QData* araddr; IData* arid; CData* arlen; CData* rvalid; From 2cf483ddf5755a48ae493e1a78dfffef4986fbee Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 21:01:24 -0700 Subject: [PATCH 307/488] xrt afu bug fixes --- hw/rtl/afu/xrt/VX_afu_wrap.sv | 5 +- hw/syn/xilinx/xrt/Makefile | 6 +- hw/syn/xilinx/xrt/gen_xml.py | 75 -------------------- hw/syn/xilinx/xrt/gen_xo.tcl | 2 +- hw/syn/xilinx/xrt/package_kernel.tcl | 100 +++++++++++++++++++++++++++ hw/syn/xilinx/xrt/platforms.mk | 34 +++------ sim/xrtsim/xrt_sim.cpp | 4 +- 7 files changed, 116 insertions(+), 110 deletions(-) delete mode 100755 hw/syn/xilinx/xrt/gen_xml.py diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 235247177..e515b080b 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -236,8 +236,9 @@ module VX_afu_wrap #( wire [M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS]; for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_addressing - assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET); - assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET); + localparam [C_M_AXI_MEM_ADDR_WIDTH-1:0] BANK_OFFSET = C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET) + C_M_AXI_MEM_ADDR_WIDTH'(i) << M_AXI_MEM_ADDR_WIDTH; + assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + BANK_OFFSET; + assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + BANK_OFFSET; end `SCOPE_IO_SWITCH (2) diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index a5a38e281..957940afa 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -164,12 +164,8 @@ scope-json: $(BUILD_DIR)/scope.json $(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(SCRIPT_DIR)/scope.py vortex.xml -o scope.json -gen-xml: -$(BUILD_DIR)/kernel.xml: - mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(SRC_DIR)/gen_xml.py -n $(M_AXI_NUM_BANKS) -d $(M_AXI_DATA_WIDTH) -a $(M_AXI_ADDRESS_WIDTH) -o kernel.xml - gen-xo: $(XO_CONTAINER) -$(XO_CONTAINER): $(BUILD_DIR)/sources.txt $(BUILD_DIR)/kernel.xml +$(XO_CONTAINER): $(BUILD_DIR)/sources.txt mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) gen-bin: $(XCLBIN_CONTAINER) diff --git a/hw/syn/xilinx/xrt/gen_xml.py b/hw/syn/xilinx/xrt/gen_xml.py deleted file mode 100755 index 4ba906b9a..000000000 --- a/hw/syn/xilinx/xrt/gen_xml.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright © 2019-2023 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import xml.etree.ElementTree as ET -from xml.dom import minidom - -def prettify(elem): - """Return a pretty-printed XML string for the Element.""" - rough_string = ET.tostring(elem, 'utf-8') - reparsed = minidom.parseString(rough_string) - return reparsed.toprettyxml(indent=" ") - -def generate_xml(numbanks, datawidth, addresswidth, offset, output_file): - root = ET.Element("root", versionMajor="1", versionMinor="6") - kernel = ET.SubElement(root, "kernel", name="vortex_afu", language="ip_c", - vlnv="mycompany.com:kernel:vortex_afu:1.0", - attributes="", preferredWorkGroupSizeMultiple="0", - workGroupSize="1", interrupt="true") - - ports = ET.SubElement(kernel, "ports") - - # control ports - ET.SubElement(ports, "port", name="s_axi_ctrl", mode="slave", range="0x1000", dataWidth="32", portType="addressable", base="0x0") - - # memory ports - for i in range(numbanks): - port_name = f"m_axi_mem_{i}" - ET.SubElement(ports, "port", name=port_name, mode="master", range=f"0x{(1 << addresswidth) - 1:X}", dataWidth=str(datawidth), portType="addressable", base=f"0x0") - - args = ET.SubElement(kernel, "args") - - # control args - ET.SubElement(args, "arg", name="dev", addressQualifier="0", id="0", port="s_axi_ctrl", size="0x4", offset="0x010", type="uint", hostOffset="0x0", hostSize="0x4") - ET.SubElement(args, "arg", name="isa", addressQualifier="0", id="1", port="s_axi_ctrl", size="0x4", offset="0x018", type="uint", hostOffset="0x0", hostSize="0x4") - ET.SubElement(args, "arg", name="dcr", addressQualifier="0", id="2", port="s_axi_ctrl", size="0x4", offset="0x020", type="uint", hostOffset="0x0", hostSize="0x4") - ET.SubElement(args, "arg", name="scp", addressQualifier="0", id="3", port="s_axi_ctrl", size="0x4", offset="0x028", type="uint", hostOffset="0x0", hostSize="0x4") - - # memory args - for i in range(numbanks): - arg_name = f"mem_{i}" - ET.SubElement(args, "arg", name=arg_name, addressQualifier="1", id=str(4 + i), - port=f"m_axi_mem_{i}", size="0x8", offset=f"0x{offset + (i * 8):X}", - type="int*", hostOffset="0x0", hostSize="0x8") - - # Pretty-print and write the XML to file - with open(output_file, "w") as f: - f.write(prettify(root)) - -def main(): - parser = argparse.ArgumentParser(description="Kernel Configuration File Generator") - parser.add_argument("-n", "--numbanks", type=int, default=1, help="Number of AXI memory banks") - parser.add_argument("-d", "--datawidth", type=int, default=512, help="Data width of the AXI memory ports") - parser.add_argument("-a", "--addresswidth", type=int, default=28, help="Address width of the AXI memory ports") - parser.add_argument("-x", "--offset", type=lambda x: int(x, 0), default=0x30, help="Starting offset for kernel args (hex)") - parser.add_argument("-o", "--output", type=str, default="kernel.xml", help="Output XML file name") - args = parser.parse_args() - - # Call the generate function - generate_xml(args.numbanks, args.datawidth, args.addresswidth, args.offset, args.output) - -if __name__ == "__main__": - main() diff --git a/hw/syn/xilinx/xrt/gen_xo.tcl b/hw/syn/xilinx/xrt/gen_xo.tcl index 9301a096e..d5b1e41a2 100644 --- a/hw/syn/xilinx/xrt/gen_xo.tcl +++ b/hw/syn/xilinx/xrt/gen_xo.tcl @@ -37,4 +37,4 @@ set argv [list ${krnl_name} ${vcs_file} ${tool_dir} ${build_dir}] set argc 4 source ${script_path}/package_kernel.tcl -package_xo -xo_path ${xoname} -kernel_name ${krnl_name} -ip_directory "${build_dir}/xo/packaged_kernel" -kernel_xml ${build_dir}/kernel.xml +package_xo -xo_path ${xoname} -kernel_name ${krnl_name} -ip_directory "${build_dir}/xo/packaged_kernel" \ No newline at end of file diff --git a/hw/syn/xilinx/xrt/package_kernel.tcl b/hw/syn/xilinx/xrt/package_kernel.tcl index ed09639dd..ebe767c69 100644 --- a/hw/syn/xilinx/xrt/package_kernel.tcl +++ b/hw/syn/xilinx/xrt/package_kernel.tcl @@ -160,6 +160,106 @@ for {set i 0} {$i < $num_banks} {incr i} { ipx::associate_bus_interfaces -busif m_axi_mem_$i -clock ap_clk $core } +set mem_map [::ipx::add_memory_map -quiet "s_axi_ctrl" $core] +set addr_block [::ipx::add_address_block -quiet "reg0" $mem_map] + +set reg [::ipx::add_register "CTRL" $addr_block] +set_property description "Control signals" $reg +set_property address_offset 0x000 $reg +set_property size 32 $reg + +set field [ipx::add_field AP_START $reg] +set_property ACCESS {read-write} $field +set_property BIT_OFFSET {0} $field +set_property BIT_WIDTH {1} $field +set_property DESCRIPTION {Control signal Register for 'ap_start'.} $field +set_property MODIFIED_WRITE_VALUE {modify} $field + +set field [ipx::add_field AP_DONE $reg] +set_property ACCESS {read-only} $field +set_property BIT_OFFSET {1} $field +set_property BIT_WIDTH {1} $field +set_property DESCRIPTION {Control signal Register for 'ap_done'.} $field +set_property READ_ACTION {modify} $field + +set field [ipx::add_field AP_IDLE $reg] +set_property ACCESS {read-only} $field +set_property BIT_OFFSET {2} $field +set_property BIT_WIDTH {1} $field +set_property DESCRIPTION {Control signal Register for 'ap_idle'.} $field +set_property READ_ACTION {modify} $field + +set field [ipx::add_field AP_READY $reg] +set_property ACCESS {read-only} $field +set_property BIT_OFFSET {3} $field +set_property BIT_WIDTH {1} $field +set_property DESCRIPTION {Control signal Register for 'ap_ready'.} $field +set_property READ_ACTION {modify} $field + +set field [ipx::add_field RESERVED_1 $reg] +set_property ACCESS {read-only} $field +set_property BIT_OFFSET {4} $field +set_property BIT_WIDTH {3} $field +set_property DESCRIPTION {Reserved. 0s on read.} $field +set_property READ_ACTION {modify} $field + +set field [ipx::add_field AUTO_RESTART $reg] +set_property ACCESS {read-write} $field +set_property BIT_OFFSET {7} $field +set_property BIT_WIDTH {1} $field +set_property DESCRIPTION {Control signal Register for 'auto_restart'.} $field +set_property MODIFIED_WRITE_VALUE {modify} $field + +set field [ipx::add_field RESERVED_2 $reg] +set_property ACCESS {read-only} $field +set_property BIT_OFFSET {8} $field +set_property BIT_WIDTH {24} $field +set_property DESCRIPTION {Reserved. 0s on read.} $field +set_property READ_ACTION {modify} $field + +set reg [::ipx::add_register "GIER" $addr_block] +set_property description "Global Interrupt Enable Register" $reg +set_property address_offset 0x004 $reg +set_property size 32 $reg + +set reg [::ipx::add_register "IP_IER" $addr_block] +set_property description "IP Interrupt Enable Register" $reg +set_property address_offset 0x008 $reg +set_property size 32 $reg + +set reg [::ipx::add_register "IP_ISR" $addr_block] +set_property description "IP Interrupt Status Register" $reg +set_property address_offset 0x00C $reg +set_property size 32 $reg + +set reg [::ipx::add_register -quiet "DEV" $addr_block] +set_property address_offset 0x010 $reg +set_property size [expr {8*8}] $reg + +set reg [::ipx::add_register -quiet "ISA" $addr_block] +set_property address_offset 0x018 $reg +set_property size [expr {8*8}] $reg + +set reg [::ipx::add_register -quiet "DCR" $addr_block] +set_property address_offset 0x020 $reg +set_property size [expr {8*8}] $reg + +set reg [::ipx::add_register -quiet "SCP" $addr_block] +set_property address_offset 0x028 $reg +set_property size [expr {8*8}] $reg + +for {set i 0} {$i < $num_banks} {incr i} { +# Add register for each memory bank +set reg [::ipx::add_register -quiet "MEM_$i" $addr_block] +set_property address_offset [expr {0x30 + $i * 8}] $reg +set_property size [expr {8*8}] $reg +# Associate the bus interface +set regparam [::ipx::add_register_parameter ASSOCIATED_BUSIF $reg] +set_property value m_axi_mem_$i $regparam +} + +set_property slave_memory_map_ref "s_axi_ctrl" [::ipx::get_bus_interfaces -of $core "s_axi_ctrl"] + set_property xpm_libraries {XPM_CDC XPM_MEMORY XPM_FIFO} $core set_property sdx_kernel true $core set_property sdx_kernel_type rtl $core diff --git a/hw/syn/xilinx/xrt/platforms.mk b/hw/syn/xilinx/xrt/platforms.mk index a3584942c..5a9a88e4d 100644 --- a/hw/syn/xilinx/xrt/platforms.mk +++ b/hw/syn/xilinx/xrt/platforms.mk @@ -1,9 +1,7 @@ # Platform specific configurations # Add your platform specific configurations here -M_AXI_NUM_BANKS := 1 -M_AXI_DATA_WIDTH := 512 -M_AXI_ADDRESS_WIDTH := 32 +CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512 ifeq ($(DEV_ARCH), zynquplus) # zynquplus @@ -17,35 +15,21 @@ endif else # alveo ifneq ($(findstring xilinx_u55c,$(XSA)),) - CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=28 - #VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31] - #CONFIGS += -DPLATFORM_MERGED_MEMORY_INTERFACE - VPP_FLAGS += $(foreach i,$(shell seq 0 31), --connectivity.sp vortex_afu_1.m_axi_mem_$(i):HBM[$(i)]) - M_AXI_NUM_BANKS := 32 - M_AXI_ADDRESS_WIDTH := 28 + CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=29 + CONFIGS += -DPLATFORM_MERGED_MEMORY_INTERFACE + VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31] + #VPP_FLAGS += $(foreach i,$(shell seq 0 31), --connectivity.sp vortex_afu_1.m_axi_mem_$(i):HBM[$(i)]) else ifneq ($(findstring xilinx_u50,$(XSA)),) - CONFIGS += -DPLATFORM_MEMORY_BANKS=16 -DPLATFORM_MEMORY_ADDR_WIDTH=28 - VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] - M_AXI_NUM_BANKS := 16 - M_AXI_ADDRESS_WIDTH := 28 + CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=28 + VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31] else ifneq ($(findstring xilinx_u280,$(XSA)),) - CONFIGS += -DPLATFORM_MEMORY_BANKS=16 -DPLATFORM_MEMORY_ADDR_WIDTH=28 - VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] - M_AXI_NUM_BANKS := 16 - M_AXI_ADDRESS_WIDTH := 28 + CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=28 + VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31] else ifneq ($(findstring xilinx_u250,$(XSA)),) CONFIGS += -DPLATFORM_MEMORY_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=34 - M_AXI_NUM_BANKS := 4 - M_AXI_ADDRESS_WIDTH := 34 else ifneq ($(findstring xilinx_u200,$(XSA)),) CONFIGS += -DPLATFORM_MEMORY_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=34 - M_AXI_NUM_BANKS := 4 - M_AXI_ADDRESS_WIDTH := 34 else CONFIGS += -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32 - M_AXI_NUM_BANKS := 1 - M_AXI_ADDRESS_WIDTH := 32 endif endif - -CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=$(M_AXI_DATA_WIDTH) \ No newline at end of file diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 4ee15baa1..96adf0858 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -476,7 +476,7 @@ private: if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) { auto mem_req = new mem_req_t(); mem_req->tag = *m_axi_mem_[i].arid; - mem_req->addr = i * mem_bank_size_ + uint64_t(*m_axi_mem_[i].araddr); + mem_req->addr = uint64_t(*m_axi_mem_[i].araddr); ram_->read(mem_req->data.data(), mem_req->addr, PLATFORM_MEMORY_DATA_SIZE); mem_req->write = false; mem_req->ready = false; @@ -505,7 +505,7 @@ private: auto byteen = *m_axi_mem_[i].wstrb; auto data = (uint8_t*)m_axi_mem_[i].wdata->data(); - auto byte_addr = i * mem_bank_size_ + m_axi_states_[i].write_req_addr; + auto byte_addr = m_axi_states_[i].write_req_addr; for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) { if ((byteen >> i) & 0x1) { From a9a5ded030ebee8b76631a7cc4681adbf84605cf Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 23 Sep 2024 23:54:43 -0700 Subject: [PATCH 308/488] bitmanip logceil fix --- sim/common/bitmanip.h | 22 +++++++--------------- sim/simx/cache_sim.cpp | 26 +++++++++++++++++++++++--- sim/simx/mem_sim.cpp | 4 ++-- sim/simx/types.h | 9 +++++---- 4 files changed, 37 insertions(+), 24 deletions(-) diff --git a/sim/common/bitmanip.h b/sim/common/bitmanip.h index 89247b89c..053f254c8 100644 --- a/sim/common/bitmanip.h +++ b/sim/common/bitmanip.h @@ -20,9 +20,9 @@ template constexpr uint32_t count_leading_zeros(T value) { static_assert(std::is_integral::value, "invalid data type"); if constexpr (sizeof(T) > 4) { - return value ? __builtin_clzll(value) : 64; + return value ? __builtin_clzll(value) : (sizeof(T) * 8); } else { - return value ? __builtin_clz(value) : 32; + return value ? __builtin_clz(value) : (sizeof(T) * 8); } } @@ -30,9 +30,9 @@ template constexpr uint32_t count_trailing_zeros(T value) { static_assert(std::is_integral::value, "invalid data type"); if constexpr (sizeof(T) > 4) { - return value ? __builtin_ctzll(value) : 64; + return value ? __builtin_ctzll(value) : (sizeof(T) * 8); } else { - return value ? __builtin_ctz(value) : 32; + return value ? __builtin_ctz(value) : (sizeof(T) * 8); } } @@ -45,7 +45,7 @@ constexpr bool ispow2(T value) { template constexpr uint32_t log2ceil(T value) { static_assert(std::is_integral::value, "invalid data type"); - return (sizeof(T) * 8) - count_leading_zeros(value - 1); + return (sizeof(T) * 8) - count_leading_zeros(value - 1); } template @@ -57,21 +57,13 @@ inline unsigned log2up(T value) { template constexpr unsigned log2floor(T value) { static_assert(std::is_integral::value, "invalid data type"); - if constexpr (sizeof(T) > 4) { - return 63 - count_leading_zeros(value); - } else { - return 31 - count_leading_zeros(value); - } + return (sizeof(T) * 8 - 1) - count_leading_zeros(value); } template constexpr unsigned ceil2(T value) { static_assert(std::is_integral::value, "invalid data type"); - if constexpr (sizeof(T) > 4) { - return 64 - count_leading_zeros(value); - } else { - return 32 - count_leading_zeros(value); - } + return (sizeof(T) * 8) - count_leading_zeros(value); } inline uint64_t bit_clr(uint64_t bits, uint32_t index) { diff --git a/sim/simx/cache_sim.cpp b/sim/simx/cache_sim.cpp index 71b2f4699..27a73ba72 100644 --- a/sim/simx/cache_sim.cpp +++ b/sim/simx/cache_sim.cpp @@ -170,6 +170,25 @@ struct bank_req_t { } }; +inline std::ostream &operator<<(std::ostream &os, const bank_req_t& req) { + os << "set=" << req.set_id << ", rw=" << req.write; + os << std::dec << ", type=" << req.type; + os << ", tag=0x" << std::hex << req.tag; + os << ", req_tags={"; + bool first_port = true; + for (auto& port : req.ports) { + if (port.valid) { + if (!first_port) os << ", "; + first_port = false; + os << "[" << std::dec << port.req_id << "]=0x" << std::hex << port.req_tag; + } + } + os << "}"; + os << std::dec << ", cid=" << req.cid; + os << " (#" << req.uuid << ")"; + return os; +} + struct mshr_entry_t { bank_req_t bank_req; uint32_t line_id; @@ -542,7 +561,7 @@ private: uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs; MemRsp core_rsp{tag, mem_rsp.cid, mem_rsp.uuid}; simobject_->CoreRspPorts.at(req_id).push(core_rsp, config_.latency); - DT(3, simobject_->name() << " core-rsp: " << core_rsp); + DT(3, simobject_->name() << " bypass-core-rsp: " << core_rsp); } void processBypassRequest(const MemReq& core_req, uint32_t req_id) { @@ -550,13 +569,13 @@ private: MemReq mem_req(core_req); mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id; bypass_switch_->ReqIn.at(1).push(mem_req, 1); - DT(3, simobject_->name() << " dram-req: " << mem_req); + DT(3, simobject_->name() << " bypass-dram-req: " << mem_req); } if (core_req.write && config_.write_reponse) { MemRsp core_rsp{core_req.tag, core_req.cid, core_req.uuid}; simobject_->CoreRspPorts.at(req_id).push(core_rsp, 1); - DT(3, simobject_->name() << " core-rsp: " << core_rsp); + DT(3, simobject_->name() << " bypass-core-rsp: " << core_rsp); } } @@ -694,6 +713,7 @@ private: // allocate MSHR auto mshr_id = bank.mshr.allocate(pipeline_req, (free_line_id != -1) ? free_line_id : repl_line_id); + DT(3, simobject_->name() << "-bank" << bank_id << " mshr-enqueue: " << pipeline_req); // send fill request if (!mshr_pending) { diff --git a/sim/simx/mem_sim.cpp b/sim/simx/mem_sim.cpp index a38f4c01c..37ea3bb88 100644 --- a/sim/simx/mem_sim.cpp +++ b/sim/simx/mem_sim.cpp @@ -77,7 +77,7 @@ public: if (!rsp_args->request.write) { MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid}; rsp_args->simobject->MemRspPorts.at(rsp_args->i).push(mem_rsp, 1); - DT(3, rsp_args->simobject->name() << " mem-rsp: " << mem_rsp << " bank: " << rsp_args->i); + DT(3, rsp_args->simobject->name() << " mem-rsp: bank=" << rsp_args->i << ", " << mem_rsp); } delete rsp_args; }, @@ -90,7 +90,7 @@ public: continue; } - DT(3, simobject_->name() << " mem-req: " << mem_req << " bank: " << i); + DT(3, simobject_->name() << " mem-req: bank=" << i << ", " << mem_req); simobject_->MemReqPorts.at(i).pop(); counter++; diff --git a/sim/simx/types.h b/sim/simx/types.h index 17cf1685f..2ca6dc8fb 100644 --- a/sim/simx/types.h +++ b/sim/simx/types.h @@ -281,17 +281,18 @@ struct LsuReq { }; inline std::ostream &operator<<(std::ostream &os, const LsuReq& req) { - os << "rw=" << req.write << ", mask=" << req.mask << ", "; + os << "rw=" << req.write << ", mask=" << req.mask << ", addr={"; + bool first_addr = true; for (size_t i = 0; i < req.mask.size(); ++i) { - os << "addr" << i << "="; + if (!first_addr) os << ", "; + first_addr = false; if (req.mask.test(i)) { os << "0x" << std::hex << req.addrs.at(i) << std::dec; } else { os << "-"; } - os << ", "; } - os << "tag=0x" << std::hex << req.tag << std::dec << ", cid=" << req.cid; + os << "}, tag=0x" << std::hex << req.tag << std::dec << ", cid=" << req.cid; os << " (#" << req.uuid << ")"; return os; } From ce4f90e843bb0ac123fdc6060cf046de4ed58d7a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 24 Sep 2024 01:20:26 -0700 Subject: [PATCH 309/488] scope analyzer updates --- hw/syn/xilinx/xrt/Makefile | 6 +++--- runtime/common/scope.cpp | 32 ++++++++++++++++++++++++++++++++ tests/opencl/common.mk | 6 ++++-- tests/regression/common.mk | 6 ++++-- 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 957940afa..ee2d64219 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -160,9 +160,9 @@ gen-ast: $(BUILD_DIR)/vortex.xml $(BUILD_DIR)/vortex.xml: mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); verilator --xml-only -O0 $(XML_CFLAGS) vortex_afu.v --xml-output vortex.xml -scope-json: $(BUILD_DIR)/scope.json -$(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml - mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(SCRIPT_DIR)/scope.py vortex.xml -o scope.json +scope-json: $(BIN_DIR)/scope.json +$(BIN_DIR)/scope.json: $(BUILD_DIR)/vortex.xml + mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(SCRIPT_DIR)/scope.py vortex.xml -o bin/scope.json gen-xo: $(XO_CONTAINER) $(XO_CONTAINER): $(BUILD_DIR)/sources.txt diff --git a/runtime/common/scope.cpp b/runtime/common/scope.cpp index 7edd67692..def7be20b 100644 --- a/runtime/common/scope.cpp +++ b/runtime/common/scope.cpp @@ -30,6 +30,8 @@ #define SAMPLE_FLUSH_SIZE 100 +#define TIMEOUT_TIME (60*60) + #define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4) #define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4) @@ -67,6 +69,10 @@ struct tap_t { static scope_callback_t g_callback; +static bool g_running = false; + +static std::mutex g_stop_mutex; + using json = nlohmann::json; static std::vector split(const std::string &s, char delimiter) { @@ -264,13 +270,39 @@ int vx_scope_start(scope_callback_t* callback, vx_device_h hdevice, uint64_t sta } } + g_running = true; + + // create auto-stop thread + uint32_t timeout_time = TIMEOUT_TIME; + const char* env_timeout = std::getenv("SCOPE_TIMEOUT"); + if (env_timeout != nullptr) { + std::stringstream ss(env_timeout); + uint32_t env_value; + if (ss >> env_value) { + timeout_time = env_value; + std::cout << "[SCOPE] timeout time=" << env_value << std::endl; + } + } + std::thread([hdevice, timeout_time]() { + std::this_thread::sleep_for(std::chrono::seconds(timeout_time)); + std::cout << "[SCOPE] auto-stop timeout!" << std::endl; + vx_scope_stop(hdevice); + }).detach(); + return 0; } int vx_scope_stop(vx_device_h hdevice) { + std::lock_guard lock(g_stop_mutex); + if (nullptr == hdevice) return -1; + if (!g_running) + return 0; + + g_running = false; + std::vector taps; { diff --git a/tests/opencl/common.mk b/tests/opencl/common.mk index 36d2956cb..3a3de87ee 100644 --- a/tests/opencl/common.mk +++ b/tests/opencl/common.mk @@ -102,9 +102,11 @@ run-opae: $(PROJECT) $(KERNEL_SRCS) run-xrt: $(PROJECT) $(KERNEL_SRCS) ifeq ($(TARGET), hw) - SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) +else ifeq ($(TARGET), hw_emu) + SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) diff --git a/tests/regression/common.mk b/tests/regression/common.mk index 142d5cb2e..94fe840df 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -99,9 +99,11 @@ run-opae: $(PROJECT) kernel.vxbin run-xrt: $(PROJECT) kernel.vxbin ifeq ($(TARGET), hw) - SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) +else ifeq ($(TARGET), hw_emu) + SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) From 0e3206747a6ece3b2fc6bd66f89bba7c5c37c6ec Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 24 Sep 2024 21:46:26 -0700 Subject: [PATCH 310/488] scope_tap bug fix --- ci/blackbox.sh | 4 ++ hw/rtl/libs/VX_scope_tap.sv | 136 +++++++++++++++++++++--------------- hw/syn/xilinx/xrt/Makefile | 2 +- runtime/common/scope.cpp | 28 ++++---- 4 files changed, 100 insertions(+), 70 deletions(-) diff --git a/ci/blackbox.sh b/ci/blackbox.sh index 51639b201..27a43781b 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -195,6 +195,10 @@ main() { mv -f $APP_PATH/trace.vcd . fi + if [ $SCOPE -eq 1 ] && [ -f "$APP_PATH/scope.vcd" ]; then + mv -f $APP_PATH/scope.vcd . + fi + exit $status } diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index c3d111c05..b1977d388 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -33,12 +33,12 @@ module VX_scope_tap #( output wire bus_out ); localparam CTR_WIDTH = 64; - localparam TX_DATA_BITS = `LOG2UP(TX_DATAW); + localparam DATA_IDX_WISTH = `LOG2UP(TX_DATAW); localparam DATAW = PROBEW + TRIGGERW; - localparam DATA_BITS = `LOG2UP(DATAW); localparam ADDRW = `CLOG2(DEPTH); localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; - localparam TX_DATA_BLOCKS = `CDIV(DATAW, TX_DATAW); + localparam DATA_BLOCKS = `CDIV(DATAW, TX_DATAW); + localparam BLOCK_IDX_WISTH = `LOG2UP(DATA_BLOCKS); localparam CTRL_STATE_IDLE = 2'd0; localparam CTRL_STATE_RECV = 2'd1; @@ -59,18 +59,18 @@ module VX_scope_tap #( localparam CMD_SET_STOP = 3'd5; localparam CMD_TYPE_BITS = 3; - localparam GET_TYPE_WIDTH = 2'd0; - localparam GET_TYPE_COUNT = 2'd1; - localparam GET_TYPE_START = 2'd2; - localparam GET_TYPE_DATA = 2'd3; - localparam GET_TYPE_BITS = 2; + localparam SEND_TYPE_WIDTH = 2'd0; + localparam SEND_TYPE_COUNT = 2'd1; + localparam SEND_TYPE_START = 2'd2; + localparam SEND_TYPE_DATA = 2'd3; + localparam SEND_TYPE_BITS = 2; `STATIC_ASSERT ((IDLE_CTRW <= TX_DATAW), ("invalid parameter")) `STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!")) reg [TAP_STATE_BITS-1:0] tap_state; reg [CTRL_STATE_BITS-1:0] ctrl_state; - reg [GET_TYPE_BITS-1:0] get_type; + reg [SEND_TYPE_BITS-1:0] send_type; reg [CTR_WIDTH-1:0] timestamp, start_time; reg [CTR_WIDTH-1:0] start_delay, delay_cntr; @@ -217,42 +217,73 @@ module VX_scope_tap #( wire [TX_DATAW-1:0] ser_buf_in_n = {ser_buf_in[TX_DATAW-2:0], bus_in}; `UNUSED_VAR (ser_buf_in) - reg [TX_DATA_BITS-1:0] ser_tx_ctr; - reg [DATA_BITS-1:0] read_offset; + wire [DATA_BLOCKS-1:0][TX_DATAW-1:0] data_blocks; + logic [BLOCK_IDX_WISTH-1:0] data_block_idx; + reg [DATA_IDX_WISTH-1:0] ser_tx_ctr; reg is_read_data; - reg [1:0] read_en; + reg is_get_data; wire [CMD_TYPE_BITS-1:0] cmd_type = ser_buf_in[CMD_TYPE_BITS-1:0]; wire [SCOPE_IDW-1:0] cmd_scope_id = ser_buf_in_n[CMD_TYPE_BITS +: SCOPE_IDW]; wire [TX_DATAW-CMD_TYPE_BITS-SCOPE_IDW-1:0] cmd_data = ser_buf_in[TX_DATAW-1:CMD_TYPE_BITS+SCOPE_IDW]; + for (genvar i = 0; i < DATA_BLOCKS; ++i) begin : g_data_blocks + for (genvar j = 0; j < TX_DATAW; ++j) begin : g_j + localparam k = i * TX_DATAW + j; + if (k < DATAW) begin : g_valid + assign data_blocks[i][j] = data_value[k]; + end else begin : g_padding + assign data_blocks[i][j] = '0; + end + end + end + + if (DATA_BLOCKS > 1) begin : g_data_block_idx + always @(posedge clk) begin + if (reset) begin + data_block_idx <= '0; + end else if ((ctrl_state == CTRL_STATE_SEND) + && (send_type == SEND_TYPE_DATA) + && (ser_tx_ctr == 0) + && is_read_data) begin + if (data_block_idx < BLOCK_IDX_WISTH'(DATA_BLOCKS-1)) begin + data_block_idx <= data_block_idx + BLOCK_IDX_WISTH'(1); + end else begin + data_block_idx <= '0; + end + end + end + end else begin : g_data_block_idx_0 + assign data_block_idx = 0; + end + wire [ADDRW-1:0] raddr_n = raddr + ADDRW'(1); always @(posedge clk) begin if (reset) begin ctrl_state <= CTRL_STATE_IDLE; + send_type <= SEND_TYPE_BITS'(SEND_TYPE_WIDTH); waddr_end <= ADDRW'(DEPTH-1); cmd_start <= 0; start_delay <= '0; bus_out_r <= 0; - read_offset <= '0; raddr <= '0; is_read_data<= 0; ser_tx_ctr <= '0; - read_en <= '0; + is_get_data <= 0; end else begin bus_out_r <= 0; cmd_start <= 0; - read_en <= '0; + is_get_data <= 0; case (ctrl_state) CTRL_STATE_IDLE: begin if (bus_in) begin - ser_tx_ctr <= TX_DATA_BITS'(TX_DATAW-1); + ser_tx_ctr <= DATA_IDX_WISTH'(TX_DATAW-1); ctrl_state <= CTRL_STATE_RECV; end end CTRL_STATE_RECV: begin - ser_tx_ctr <= ser_tx_ctr - TX_DATA_BITS'(1); + ser_tx_ctr <= ser_tx_ctr - DATA_IDX_WISTH'(1); ser_buf_in <= ser_buf_in_n; if (ser_tx_ctr == 0) begin // check if command is for this scope @@ -273,10 +304,10 @@ module VX_scope_tap #( CMD_GET_START, CMD_GET_COUNT, CMD_GET_DATA: begin - get_type <= GET_TYPE_BITS'(cmd_type); - ser_tx_ctr <= TX_DATA_BITS'(TX_DATAW-1); - bus_out_r <= 1; + send_type <= SEND_TYPE_BITS'(cmd_type); + ser_tx_ctr <= DATA_IDX_WISTH'(TX_DATAW-1); ctrl_state <= CTRL_STATE_SEND; + bus_out_r <= 1; end default:; endcase @@ -285,8 +316,8 @@ module VX_scope_tap #( `endif end CTRL_STATE_SEND: begin - case (get_type) - GET_TYPE_WIDTH: begin + case (send_type) + SEND_TYPE_WIDTH: begin bus_out_r <= 1'(DATAW >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin @@ -294,7 +325,7 @@ module VX_scope_tap #( end `endif end - GET_TYPE_COUNT: begin + SEND_TYPE_COUNT: begin bus_out_r <= 1'(waddr >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin @@ -302,7 +333,7 @@ module VX_scope_tap #( end `endif end - GET_TYPE_START: begin + SEND_TYPE_START: begin bus_out_r <= 1'(start_time >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin @@ -310,24 +341,16 @@ module VX_scope_tap #( end `endif end - GET_TYPE_DATA: begin - read_en <= {is_read_data, 1'b1}; + SEND_TYPE_DATA: begin + is_get_data <= 1; if (ser_tx_ctr == 0) begin if (is_read_data) begin - if (DATAW > TX_DATAW) begin - if (read_offset < DATA_BITS'(DATAW-TX_DATAW)) begin - read_offset <= read_offset + DATA_BITS'(TX_DATAW); - end else begin - read_offset <= '0; - raddr <= raddr_n; - is_read_data <= 0; // swutch delta mode - end - end else begin + if (data_block_idx == BLOCK_IDX_WISTH'(DATA_BLOCKS-1)) begin raddr <= raddr_n; - is_read_data <= 0; // swutch delta mode - end - if (raddr_n == waddr) begin - raddr <= 0; // end-of-samples reset + is_read_data <= 0; // switch to delta mode + if (raddr_n == waddr) begin + raddr <= 0; // end-of-samples reset + end end end else begin is_read_data <= 1; // switch to data mode @@ -345,7 +368,7 @@ module VX_scope_tap #( end default:; endcase - ser_tx_ctr <= ser_tx_ctr - TX_DATA_BITS'(1); + ser_tx_ctr <= ser_tx_ctr - DATA_IDX_WISTH'(1); if (ser_tx_ctr == 0) begin ctrl_state <= CTRL_STATE_IDLE; end @@ -355,23 +378,26 @@ module VX_scope_tap #( end end - wire [TX_DATA_BLOCKS-1:0][TX_DATAW-1:0] data_blocks; - for (genvar i = 0; i < TX_DATA_BLOCKS; ++i) begin : g_data_blocks - for (genvar j = 0; j < TX_DATAW; ++j) begin : g_j - localparam k = i * TX_DATAW + j; - if (k < DATAW) begin : g_valid - assign data_blocks[i][j] = data_value[k]; - end else begin : g_padding - assign data_blocks[i][j] = '0; - end - end - end - - wire [TX_DATAW-1:0] get_data = read_en[1] ? data_blocks[read_offset] : TX_DATAW'(delta_value); - wire bus_out_w = read_en[0] ? get_data[ser_tx_ctr] : bus_out_r; + wire [BLOCK_IDX_WISTH-1:0] data_block_idx_r; + wire [DATA_IDX_WISTH-1:0] ser_tx_ctr_r; + wire is_read_data_r; VX_pipe_register #( - .DATAW (1) + .DATAW (1 + DATA_IDX_WISTH + BLOCK_IDX_WISTH) + ) data_sel_buf ( + .clk (clk), + .reset (reset), + .enable (1'b1), + .data_in ({is_read_data, ser_tx_ctr, data_block_idx}), + .data_out ({is_read_data_r, ser_tx_ctr_r, data_block_idx_r}) + ); + + wire [TX_DATAW-1:0] get_data = is_read_data_r ? data_blocks[data_block_idx_r] : TX_DATAW'(delta_value); + wire bus_out_w = is_get_data ? get_data[ser_tx_ctr_r] : bus_out_r; + + VX_pipe_register #( + .DATAW (1), + .DEPTH (1) ) buf_out ( .clk (clk), .reset (reset), diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index ee2d64219..67eccf841 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -131,7 +131,7 @@ endif # Enable scope analyzer ifdef SCOPE CFLAGS += -DSCOPE $(DBG_SCOPE_FLAGS) - SCOPE_JSON += $(BUILD_DIR)/scope.json + SCOPE_JSON += $(BIN_DIR)/scope.json endif # compilation flags diff --git a/runtime/common/scope.cpp b/runtime/common/scope.cpp index def7be20b..820fa54f5 100644 --- a/runtime/common/scope.cpp +++ b/runtime/common/scope.cpp @@ -159,7 +159,7 @@ static tap_t* find_earliest_tap(std::vector& taps) { return earliest; } -static uint64_t advance_time(std::ofstream& ofs, uint64_t cur_time, uint64_t next_time) { +static uint64_t advance_clock(std::ofstream& ofs, uint64_t cur_time, uint64_t next_time) { while (cur_time < next_time) { ofs << '#' << (cur_time * 2 + 0) << std::endl; ofs << "b0 0" << std::endl; @@ -383,20 +383,20 @@ int vx_scope_stop(vx_device_h hdevice) { std::cout << "[SCOPE] dump taps..." << std::endl; uint64_t cur_time = 0; - - while (true) { - // find the nearest tap - auto tap = find_earliest_tap(taps); - if (tap == nullptr) - break; + auto tap = find_earliest_tap(taps); + if (tap != nullptr) { + cur_time = (tap->cycle_time > 0) ? (tap->cycle_time-1) : 0; + do { + // advance clock + cur_time = advance_clock(ofs, cur_time, tap->cycle_time); + // dump tap + CHECK_ERR(dump_tap(ofs, tap, hdevice)); + // find the nearest tap + tap = find_earliest_tap(taps); + } while (tap != nullptr); // advance clock - cur_time = advance_time(ofs, cur_time, tap->cycle_time); - // dump tap - CHECK_ERR(dump_tap(ofs, tap, hdevice)); - }; - - // advance clock - advance_time(ofs, cur_time, cur_time + 1); + advance_clock(ofs, cur_time, cur_time + 1); + } std::cout << "[SCOPE] trace dump done! - " << (cur_time/2) << " cycles" << std::endl; From 4f11278d2cce934e50b5c13d1a2bc839b5ff6429 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 25 Sep 2024 10:28:19 -0700 Subject: [PATCH 311/488] scope_tap bug fixes and improvements --- hw/rtl/VX_define.vh | 12 +++ hw/rtl/VX_scope.vh | 59 +++++++++------ hw/rtl/Vortex_axi.sv | 2 +- hw/rtl/afu/opae/vortex_afu.sv | 131 +++++++++++++++------------------ hw/rtl/afu/xrt/VX_afu_wrap.sv | 84 +++++++++------------ hw/rtl/core/VX_core.sv | 2 +- hw/rtl/core/VX_execute.sv | 2 +- hw/rtl/core/VX_fetch.sv | 30 +++----- hw/rtl/core/VX_issue.sv | 2 +- hw/rtl/core/VX_issue_slice.sv | 28 +++---- hw/rtl/core/VX_lsu_slice.sv | 33 +++++---- hw/rtl/libs/VX_edge_trigger.sv | 43 +++++++++++ hw/rtl/libs/VX_scope_switch.sv | 9 ++- hw/rtl/libs/VX_scope_tap.sv | 130 ++++++++++++++++---------------- runtime/common/scope.cpp | 17 ++++- runtime/opae/vortex.cpp | 2 +- runtime/xrt/vortex.cpp | 2 +- 17 files changed, 312 insertions(+), 276 deletions(-) create mode 100644 hw/rtl/libs/VX_edge_trigger.sv diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 85fa40f0d..7c1590dff 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -317,6 +317,18 @@ /////////////////////////////////////////////////////////////////////////////// +`define NEG_EDGE(dst, src) \ + wire dst; \ + VX_edge_trigger #( \ + .POS (0), \ + .INIT (0) \ + ) __``dst``__ ( \ + .clk (clk), \ + .reset (1'b0), \ + .data_in (src), \ + .data_out (dst) \ + ) + `define BUFFER_EX(dst, src, ena, latency) \ VX_pipe_register #( \ .DATAW ($bits(dst)), \ diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index a677975ce..b88a2718b 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -21,10 +21,20 @@ input wire scope_bus_in, \ output wire scope_bus_out, +`define SCOPE_IO_BIND(__i) \ + .scope_reset (scope_reset_w[__i]), \ + .scope_bus_in (scope_bus_in_w[__i]), \ + .scope_bus_out (scope_bus_out_w[__i]), + +`define SCOPE_IO_UNUSED(__i) \ + `UNUSED_VAR (scope_reset_w[__i]); \ + `UNUSED_VAR (scope_bus_in_w[__i]); \ + assign scope_bus_out_w[__i] = 0; + `define SCOPE_IO_SWITCH(__count) \ - wire scope_bus_in_w [__count]; \ - wire scope_bus_out_w [__count]; \ - `RESET_RELAY_EX(scope_reset_w, scope_reset, __count, `MAX_FANOUT); \ + wire [__count-1:0] scope_bus_in_w; \ + wire [__count-1:0] scope_bus_out_w; \ + wire [__count-1:0] scope_reset_w = {__count{scope_reset}}; \ VX_scope_switch #( \ .N (__count) \ ) scope_switch ( \ @@ -34,35 +44,42 @@ .rsp_out (scope_bus_out), \ .req_out (scope_bus_in_w), \ .rsp_in (scope_bus_out_w) \ - ); + ) -`define SCOPE_IO_BIND(__i) \ - .scope_reset (scope_reset_w[__i]), \ - .scope_bus_in (scope_bus_in_w[__i]), \ - .scope_bus_out (scope_bus_out_w[__i]), +`define SCOPE_TAP_EX(__idx, __id, __triggers_w, __probes_w, __triggers, __probes, __start, __stop, __depth) \ + VX_scope_tap #( \ + .SCOPE_ID (__id), \ + .TRIGGERW (__triggers_w), \ + .PROBEW (__probes_w), \ + .DEPTH (__depth) \ + ) scope_tap_``idx ( \ + .clk (clk), \ + .reset (scope_reset_w[__idx]), \ + .start (__start), \ + .stop (__stop), \ + .triggers(__triggers), \ + .probes (__probes), \ + .bus_in (scope_bus_in_w[__idx]), \ + .bus_out(scope_bus_out_w[__idx]) \ + ) -`define SCOPE_IO_UNUSED() \ - `UNUSED_VAR (scope_reset); \ - `UNUSED_VAR (scope_bus_in); \ - assign scope_bus_out = 0; - -`define SCOPE_IO_UNUSED_W(__i) \ - `UNUSED_VAR (scope_reset_w[__i]); \ - `UNUSED_VAR (scope_bus_in_w[__i]); \ - assign scope_bus_out_w[__i] = 0; +`define SCOPE_TAP(__idx, __id, __triggers, __probes, __start, __stop, __depth) \ + `SCOPE_TAP_EX(__idx, __id, $bits(__triggers), $bits(__probes), __triggers, __probes, __start, __stop, __depth) `else `define SCOPE_IO_DECL -`define SCOPE_IO_SWITCH(__count) - `define SCOPE_IO_BIND(__i) -`define SCOPE_IO_UNUSED_W(__i) - `define SCOPE_IO_UNUSED(__i) +`define SCOPE_IO_SWITCH(__count) + +`define SCOPE_TAP(__idx, __id, __triggers, __probes, __depth) + +`define SCOPE_TAP_EX(__idx, __id, __triggers_w, __probes_w, __triggers, __probes, __depth) + `endif `endif // VX_SCOPE_VH diff --git a/hw/rtl/Vortex_axi.sv b/hw/rtl/Vortex_axi.sv index 758206396..7d238aacd 100644 --- a/hw/rtl/Vortex_axi.sv +++ b/hw/rtl/Vortex_axi.sv @@ -100,7 +100,7 @@ module Vortex_axi import VX_gpu_pkg::*; #( wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag; wire mem_rsp_ready; - `SCOPE_IO_SWITCH (1) + `SCOPE_IO_SWITCH (1); Vortex vortex ( `SCOPE_IO_BIND (0) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 1440b2808..38994c1c5 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -932,7 +932,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire [`VX_DCR_ADDR_WIDTH-1:0] vx_dcr_wr_addr = cmd_dcr_addr; wire [`VX_DCR_DATA_WIDTH-1:0] vx_dcr_wr_data = cmd_dcr_data; - `SCOPE_IO_SWITCH (2) + `SCOPE_IO_SWITCH (2); Vortex vortex ( `SCOPE_IO_BIND (1) @@ -1023,80 +1023,65 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ end wire state_changed = (state != state_prev); - `define AFU_TRIGGERS { \ - reset, \ - vx_reset, \ - vx_busy, \ - vx_mem_req_fire, \ - vx_mem_rsp_fire, \ - vx_dcr_wr_valid, \ - state_changed, \ - avs_write_fire, \ - avs_read_fire, \ - avs_waitrequest[0], \ - avs_readdatavalid[0], \ - cp2af_sRxPort.c0.mmioRdValid, \ - cp2af_sRxPort.c0.mmioWrValid, \ - cp2af_sRxPort.c0.rspValid, \ - cp2af_sRxPort.c1.rspValid, \ - af2cp_sTxPort.c0.valid, \ - af2cp_sTxPort.c1.valid, \ - cp2af_sRxPort.c0TxAlmFull, \ - cp2af_sRxPort.c1TxAlmFull, \ - af2cp_sTxPort.c2.mmioRdValid, \ - cci_wr_req_fire, \ - cci_wr_rsp_fire, \ - cci_rd_req_fire, \ - cci_rd_rsp_fire, \ - cci_pending_reads_full, \ - cci_pending_writes_empty, \ - cci_pending_writes_full \ - } + `NEG_EDGE (reset_negedge, reset); - `define AFU_PROBES { \ - cmd_type, \ - state, \ - vx_mem_req_rw, \ - vx_mem_req_byteen, \ - vx_mem_req_addr, \ - vx_mem_req_data, \ - vx_mem_req_tag, \ - vx_mem_rsp_data, \ - vx_mem_rsp_tag, \ - vx_dcr_wr_addr, \ - vx_dcr_wr_data, \ - mmio_req_hdr.address, \ - cp2af_sRxPort.c0.hdr.mdata, \ - af2cp_sTxPort.c0.hdr.address, \ - af2cp_sTxPort.c0.hdr.mdata, \ - af2cp_sTxPort.c1.hdr.address, \ - avs_address[0], \ - avs_byteenable[0], \ - avs_burstcount[0], \ - cci_mem_rd_req_ctr, \ - cci_mem_wr_req_ctr, \ - cci_rd_req_ctr, \ - cci_rd_rsp_ctr, \ - cci_wr_req_ctr \ - } - - VX_scope_tap #( - .SCOPE_ID (0), - .TRIGGERW ($bits(`AFU_TRIGGERS)), - .PROBEW ($bits(`AFU_PROBES)), - .DEPTH (4096) - ) scope_tap ( - .clk (clk), - .reset (scope_reset_w[0]), - .start (1'b0), - .stop (1'b0), - .triggers(`AFU_TRIGGERS), - .probes (`AFU_PROBES), - .bus_in (scope_bus_in_w[0]), - .bus_out(scope_bus_out_w[0]) - ); + `SCOPE_TAP (0, 0, { + vx_reset, + vx_busy, + vx_mem_req_fire, + vx_mem_rsp_fire, + vx_dcr_wr_valid, + state_changed, + avs_write_fire, + avs_read_fire, + avs_waitrequest[0], + avs_readdatavalid[0], + cp2af_sRxPort.c0.mmioRdValid, + cp2af_sRxPort.c0.mmioWrValid, + cp2af_sRxPort.c0.rspValid, + cp2af_sRxPort.c1.rspValid, + af2cp_sTxPort.c0.valid, + af2cp_sTxPort.c1.valid, + cp2af_sRxPort.c0TxAlmFull, + cp2af_sRxPort.c1TxAlmFull, + af2cp_sTxPort.c2.mmioRdValid, + cci_wr_req_fire, + cci_wr_rsp_fire, + cci_rd_req_fire, + cci_rd_rsp_fire, + cci_pending_reads_full, + cci_pending_writes_empty, + cci_pending_writes_full + },{ + cmd_type, + state, + vx_mem_req_rw, + vx_mem_req_byteen, + vx_mem_req_addr, + vx_mem_req_data, + vx_mem_req_tag, + vx_mem_rsp_data, + vx_mem_rsp_tag, + vx_dcr_wr_addr, + vx_dcr_wr_data, + mmio_req_hdr.address, + cp2af_sRxPort.c0.hdr.mdata, + af2cp_sTxPort.c0.hdr.address, + af2cp_sTxPort.c0.hdr.mdata, + af2cp_sTxPort.c1.hdr.address, + avs_address[0], + avs_byteenable[0], + avs_burstcount[0], + cci_mem_rd_req_ctr, + cci_mem_wr_req_ctr, + cci_rd_req_ctr, + cci_rd_rsp_ctr, + cci_wr_req_ctr + }, + reset_negedge, 1'b0, 4096 + ); `else - `SCOPE_IO_UNUSED_W(0) + `SCOPE_IO_UNUSED(0) `endif /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index e515b080b..73da63e58 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -241,7 +241,7 @@ module VX_afu_wrap #( assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + BANK_OFFSET; end - `SCOPE_IO_SWITCH (2) + `SCOPE_IO_SWITCH (2); Vortex_axi #( .AXI_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH), @@ -309,55 +309,41 @@ module VX_afu_wrap #( `ifdef SCOPE `ifdef DBG_SCOPE_AFU - `define AFU_TRIGGERS { \ - reset, \ - ap_reset, \ - ap_start, \ - ap_done, \ - ap_idle, \ - interrupt, \ - vx_reset, \ - vx_busy, \ - dcr_wr_valid, \ - m_axi_mem_awvalid_a[0], \ - m_axi_mem_awready_a[0], \ - m_axi_mem_wvalid_a[0], \ - m_axi_mem_wready_a[0], \ - m_axi_mem_bvalid_a[0], \ - m_axi_mem_bready_a[0], \ - m_axi_mem_arvalid_a[0], \ - m_axi_mem_arready_a[0], \ - m_axi_mem_rvalid_a[0], \ - m_axi_mem_rready_a[0] \ - } - `define AFU_PROBES { \ - dcr_wr_addr, \ - dcr_wr_data, \ - vx_pending_writes, \ - m_axi_mem_awaddr_u[0], \ - m_axi_mem_awid_a[0], \ - m_axi_mem_bid_a[0], \ - m_axi_mem_araddr_u[0], \ - m_axi_mem_arid_a[0], \ - m_axi_mem_rid_a[0] \ - } - VX_scope_tap #( - .SCOPE_ID (0), - .TRIGGERW ($bits(`AFU_TRIGGERS)), - .PROBEW ($bits(`AFU_PROBES)), - .DEPTH (4096) - ) scope_tap ( - .clk (clk), - .reset (scope_reset_w[0]), - .start (1'b0), - .stop (1'b0), - .triggers(`AFU_TRIGGERS), - .probes (`AFU_PROBES), - .bus_in (scope_bus_in_w[0]), - .bus_out(scope_bus_out_w[0]) - ); + `NEG_EDGE (reset_negedge, reset); + `SCOPE_TAP (0, 0, { + ap_reset, + ap_start, + ap_done, + ap_idle, + interrupt, + vx_reset, + vx_busy, + dcr_wr_valid, + m_axi_mem_awvalid_a[0], + m_axi_mem_awready_a[0], + m_axi_mem_wvalid_a[0], + m_axi_mem_wready_a[0], + m_axi_mem_bvalid_a[0], + m_axi_mem_bready_a[0], + m_axi_mem_arvalid_a[0], + m_axi_mem_arready_a[0], + m_axi_mem_rvalid_a[0], + m_axi_mem_rready_a[0] + }, { + dcr_wr_addr, + dcr_wr_data, + vx_pending_writes, + m_axi_mem_awaddr_u[0], + m_axi_mem_awid_a[0], + m_axi_mem_bid_a[0], + m_axi_mem_araddr_u[0], + m_axi_mem_arid_a[0], + m_axi_mem_rid_a[0] + }, + reset_negedge, 1'b0, 4096 + ); `else - `SCOPE_IO_UNUSED_W(0) + `SCOPE_IO_UNUSED(0) `endif `endif `ifdef CHIPSCOPE diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 1d3e12613..260cedca3 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -84,7 +84,7 @@ module VX_core import VX_gpu_pkg::*; #( .base_dcrs (base_dcrs) ); - `SCOPE_IO_SWITCH (3) + `SCOPE_IO_SWITCH (3); VX_schedule #( .INSTANCE_ID ($sformatf("%s-schedule", INSTANCE_ID)), diff --git a/hw/rtl/core/VX_execute.sv b/hw/rtl/core/VX_execute.sv index 6c148649b..4f66757f1 100644 --- a/hw/rtl/core/VX_execute.sv +++ b/hw/rtl/core/VX_execute.sv @@ -61,7 +61,7 @@ module VX_execute import VX_gpu_pkg::*; #( .branch_ctl_if (branch_ctl_if) ); - `SCOPE_IO_SWITCH (1) + `SCOPE_IO_SWITCH (1); VX_lsu_unit #( .INSTANCE_ID ($sformatf("%s-lsu", INSTANCE_ID)) diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index c1c0e6a57..baeb152f2 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -137,34 +137,24 @@ module VX_fetch import VX_gpu_pkg::*; #( `ifdef SCOPE `ifdef DBG_SCOPE_FETCH - VX_scope_tap #( - .SCOPE_ID (1), - .TRIGGERW (4), - .PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + - ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH + - (ICACHE_WORD_SIZE * 8) + ICACHE_TAG_WIDTH), - .DEPTH (4096) - ) scope_tap ( - .clk (clk), - .reset (scope_reset), - .start (1'b0), - .stop (1'b0), - .triggers ({ - reset, + `SCOPE_IO_SWITCH (1); + `NEG_EDGE (reset_negedge, reset); + `SCOPE_TAP_EX (0, 1, 3, ( + `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + + ICACHE_ADDR_WIDTH + (ICACHE_WORD_SIZE * 8) + ICACHE_TAG_WIDTH + ), { schedule_fire, icache_req_fire, icache_rsp_fire - }), - .probes ({ + }, { schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag - }), - .bus_in (scope_bus_in), - .bus_out (scope_bus_out) + }, + reset_negedge, 1'b0, 4096 ); `else - `SCOPE_IO_UNUSED() + `SCOPE_IO_UNUSED(0) `endif `endif `ifdef CHIPSCOPE diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index a2e689b7c..84bcc0072 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -50,7 +50,7 @@ module VX_issue import VX_gpu_pkg::*; #( wire [`ISSUE_WIDTH-1:0] decode_ready_in; assign decode_if.ready = decode_ready_in[decode_isw]; - `SCOPE_IO_SWITCH (`ISSUE_WIDTH) + `SCOPE_IO_SWITCH (`ISSUE_WIDTH); for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : g_issue_slices VX_decode_if #( diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 38e54fcc0..a496af8e3 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -95,23 +95,16 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef SCOPE `ifdef DBG_SCOPE_ISSUE - VX_scope_tap #( - .SCOPE_ID (2), - .TRIGGERW (2), - .PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + + `SCOPE_IO_SWITCH (1); + `NEG_EDGE (reset_negedge, reset); + `SCOPE_TAP_EX (0, 2, 2, ( + `UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) + - `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1), - .DEPTH (4096) - ) scope_tap ( - .clk (clk), - .reset (scope_reset), - .start (1'b0), - .stop (1'b0), - .triggers ({ + `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1 + ), { operands_if_fire, writeback_if_valid - }), - .probes ({ + }, { operands_if.data.uuid, operands_if.data.tmask, operands_if.data.ex_type, @@ -126,12 +119,11 @@ module VX_issue_slice import VX_gpu_pkg::*; #( writeback_if.data.rd, writeback_if.data.data, writeback_if.data.eop - }), - .bus_in (scope_bus_in), - .bus_out (scope_bus_out) + }, + reset_negedge, 1'b0, 4096 ); `else - `SCOPE_IO_UNUSED() + `SCOPE_IO_UNUSED(0) `endif `endif `ifdef CHIPSCOPE diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index d703291c4..962bcd70c 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -536,23 +536,26 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `ifdef SCOPE `ifdef DBG_SCOPE_LSU - VX_scope_tap #( - .SCOPE_ID (3), - .TRIGGERW (2), - .PROBEW (1 + NUM_LANES * (`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE * 8) + `UUID_WIDTH + NUM_LANES * LSU_WORD_SIZE * 8 + `UUID_WIDTH), - .DEPTH (4096) - ) scope_tap ( - .clk (clk), - .reset (scope_reset), - .start (1'b0), - .stop (1'b0), - .triggers({mem_req_fire, mem_rsp_fire}), - .probes ({mem_req_rw, full_addr, mem_req_byteen, mem_req_data, execute_if.data.uuid, rsp_data, rsp_uuid}), - .bus_in (scope_bus_in), - .bus_out(scope_bus_out) + `SCOPE_IO_SWITCH (1); + `NEG_EDGE (reset_negedge, reset); + `SCOPE_TAP_EX (0, 3, 2, ( + 1 + NUM_LANES * (`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE * 8) + `UUID_WIDTH + NUM_LANES * LSU_WORD_SIZE * 8 + `UUID_WIDTH + ), { + mem_req_fire, + mem_rsp_fire + }, { + mem_req_rw, + full_addr, + mem_req_byteen, + mem_req_data, + execute_if.data.uuid, + rsp_data, + rsp_uuid + }, + reset_negedge, 1'b0, 4096 ); `else - `SCOPE_IO_UNUSED() + `SCOPE_IO_UNUSED(0) `endif `endif `ifdef CHIPSCOPE diff --git a/hw/rtl/libs/VX_edge_trigger.sv b/hw/rtl/libs/VX_edge_trigger.sv new file mode 100644 index 000000000..9e876985c --- /dev/null +++ b/hw/rtl/libs/VX_edge_trigger.sv @@ -0,0 +1,43 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +`TRACING_OFF +module VX_edge_trigger #( + parameter POS = 0, + parameter INIT = 0 +) ( + input wire clk, + input wire reset, + input wire data_in, + output wire data_out +); + reg prev; + + always @(posedge clk) begin + if (reset) begin + prev <= INIT; + end else begin + prev <= data_in; + end + end + + if (POS != 0) begin : g_pos + assign data_out = data_in & ~prev; + end else begin : g_neg + assign data_out = ~data_in & prev; + end + +endmodule +`TRACING_ON diff --git a/hw/rtl/libs/VX_scope_switch.sv b/hw/rtl/libs/VX_scope_switch.sv index da5f13bcb..2e964f31e 100644 --- a/hw/rtl/libs/VX_scope_switch.sv +++ b/hw/rtl/libs/VX_scope_switch.sv @@ -20,8 +20,8 @@ module VX_scope_switch #( input wire clk, input wire reset, input wire req_in, - output wire req_out [N], - input wire rsp_in [N], + output wire [N-1:0] req_out, + input wire [N-1:0] rsp_in, output wire rsp_out ); if (N > 1) begin : g_switch @@ -46,7 +46,10 @@ module VX_scope_switch #( end end - assign req_out = req_out_r; + for (genvar i = 0; i < N; ++i) begin : g_req_out + assign req_out[i] = req_out_r[i]; + end + assign rsp_out = rsp_out_r; end else begin : g_passthru diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index b1977d388..8b6eee65e 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -33,12 +33,13 @@ module VX_scope_tap #( output wire bus_out ); localparam CTR_WIDTH = 64; - localparam DATA_IDX_WISTH = `LOG2UP(TX_DATAW); + localparam SER_CTR_WIDTH = `LOG2UP(TX_DATAW); localparam DATAW = PROBEW + TRIGGERW; localparam ADDRW = `CLOG2(DEPTH); + localparam SIZEW = `CLOG2(DEPTH+1); localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; localparam DATA_BLOCKS = `CDIV(DATAW, TX_DATAW); - localparam BLOCK_IDX_WISTH = `LOG2UP(DATA_BLOCKS); + localparam BLOCK_IDX_WIDTH = `LOG2UP(DATA_BLOCKS); localparam CTRL_STATE_IDLE = 2'd0; localparam CTRL_STATE_RECV = 2'd1; @@ -47,8 +48,8 @@ module VX_scope_tap #( localparam CTRL_STATE_BITS = 2; localparam TAP_STATE_IDLE = 2'd0; - localparam TAP_STATE_WAIT = 2'd1; - localparam TAP_STATE_RUN = 2'd2; + localparam TAP_STATE_RUN = 2'd1; + localparam TAP_STATE_DONE = 2'd2; localparam TAP_STATE_BITS = 2; localparam CMD_GET_WIDTH = 3'd0; @@ -57,13 +58,14 @@ module VX_scope_tap #( localparam CMD_GET_DATA = 3'd3; localparam CMD_SET_START = 3'd4; localparam CMD_SET_STOP = 3'd5; + localparam CMD_SET_DEPTH = 3'd6; localparam CMD_TYPE_BITS = 3; - localparam SEND_TYPE_WIDTH = 2'd0; - localparam SEND_TYPE_COUNT = 2'd1; - localparam SEND_TYPE_START = 2'd2; - localparam SEND_TYPE_DATA = 2'd3; - localparam SEND_TYPE_BITS = 2; + localparam SEND_TYPE_WIDTH = 2'd0; + localparam SEND_TYPE_COUNT = 2'd1; + localparam SEND_TYPE_START = 2'd2; + localparam SEND_TYPE_DATA = 2'd3; + localparam SEND_TYPE_BITS = 2; `STATIC_ASSERT ((IDLE_CTRW <= TX_DATAW), ("invalid parameter")) `STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!")) @@ -73,12 +75,13 @@ module VX_scope_tap #( reg [SEND_TYPE_BITS-1:0] send_type; reg [CTR_WIDTH-1:0] timestamp, start_time; - reg [CTR_WIDTH-1:0] start_delay, delay_cntr; + reg [CTR_WIDTH-1:0] start_delay, stop_delay; reg [`UP(TRIGGERW)-1:0] prev_trig; reg [IDLE_CTRW-1:0] delta; - reg cmd_start, dflush; + reg cmd_start, cmd_stop; + reg dflush; - reg [ADDRW-1:0] waddr, waddr_end; + reg [SIZEW-1:0] waddr, waddr_end; wire [DATAW-1:0] data_in; wire write_en; @@ -105,7 +108,7 @@ module VX_scope_tap #( .read (1'b1), .wren (1'b1), .write (write_en), - .waddr (waddr), + .waddr (waddr[ADDRW-1:0]), .wdata (delta), .raddr (raddr), .rdata (delta_value) @@ -128,7 +131,7 @@ module VX_scope_tap #( .read (1'b1), .wren (1'b1), .write (write_en), - .waddr (waddr), + .waddr (waddr[ADDRW-1:0]), .wdata (data_in), .raddr (raddr), .rdata (data_value) @@ -144,35 +147,16 @@ module VX_scope_tap #( always @(posedge clk) begin if (reset) begin - tap_state <= TAP_STATE_IDLE; - delta <= '0; - dflush <= 0; - prev_trig <= '0; - waddr <= '0; + tap_state <= TAP_STATE_IDLE; + delta <= '0; + dflush <= 0; + prev_trig <= '0; + waddr <= '0; end else begin case (tap_state) TAP_STATE_IDLE: begin if (start || cmd_start) begin - delta <= '0; - dflush <= 1; - if (0 == start_delay) begin - tap_state <= TAP_STATE_RUN; - start_time <= timestamp; - `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%t: scope_tap%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) - `endif - end else begin - tap_state <= TAP_STATE_WAIT; - delay_cntr <= start_delay; - `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%t: scope_tap%0d: delayed start - time=%0d\n", $time, SCOPE_ID, start_delay)) - `endif - end - end - end - TAP_STATE_WAIT: begin - delay_cntr <= delay_cntr - CTR_WIDTH'(1); - if (1 == delay_cntr) begin + dflush <= 1; tap_state <= TAP_STATE_RUN; start_time <= timestamp; `ifdef DBG_TRACE_SCOPE @@ -182,10 +166,10 @@ module VX_scope_tap #( end TAP_STATE_RUN: begin dflush <= 0; - if (!stop && (waddr < waddr_end)) begin + if (!(stop || cmd_stop) && (waddr < waddr_end)) begin if (TRIGGERW != 0) begin if (dflush || (triggers != prev_trig)) begin - waddr <= waddr + ADDRW'(1); + waddr <= waddr + SIZEW'(1); delta <= '0; end else begin delta <= delta + IDLE_CTRW'(1); @@ -193,10 +177,10 @@ module VX_scope_tap #( end prev_trig <= triggers; end else begin - waddr <= waddr + ADDRW'(1); + waddr <= waddr + SIZEW'(1); end end else begin - tap_state <= TAP_STATE_IDLE; + tap_state <= TAP_STATE_DONE; `ifdef DBG_TRACE_SCOPE `TRACE(2, ("%t: scope_tap%0d: recording stop - waddr=(%0d, %0d)\n", $time, SCOPE_ID, waddr, waddr_end)) `endif @@ -218,8 +202,8 @@ module VX_scope_tap #( `UNUSED_VAR (ser_buf_in) wire [DATA_BLOCKS-1:0][TX_DATAW-1:0] data_blocks; - logic [BLOCK_IDX_WISTH-1:0] data_block_idx; - reg [DATA_IDX_WISTH-1:0] ser_tx_ctr; + logic [BLOCK_IDX_WIDTH-1:0] data_block_idx; + reg [SER_CTR_WIDTH-1:0] ser_tx_ctr; reg is_read_data; reg is_get_data; @@ -246,8 +230,8 @@ module VX_scope_tap #( && (send_type == SEND_TYPE_DATA) && (ser_tx_ctr == 0) && is_read_data) begin - if (data_block_idx < BLOCK_IDX_WISTH'(DATA_BLOCKS-1)) begin - data_block_idx <= data_block_idx + BLOCK_IDX_WISTH'(1); + if (data_block_idx < BLOCK_IDX_WIDTH'(DATA_BLOCKS-1)) begin + data_block_idx <= data_block_idx + BLOCK_IDX_WIDTH'(1); end else begin data_block_idx <= '0; end @@ -257,15 +241,15 @@ module VX_scope_tap #( assign data_block_idx = 0; end - wire [ADDRW-1:0] raddr_n = raddr + ADDRW'(1); - always @(posedge clk) begin if (reset) begin ctrl_state <= CTRL_STATE_IDLE; send_type <= SEND_TYPE_BITS'(SEND_TYPE_WIDTH); - waddr_end <= ADDRW'(DEPTH-1); + waddr_end <= SIZEW'(DEPTH); cmd_start <= 0; + cmd_stop <= 0; start_delay <= '0; + stop_delay <= '0; bus_out_r <= 0; raddr <= '0; is_read_data<= 0; @@ -273,17 +257,28 @@ module VX_scope_tap #( is_get_data <= 0; end else begin bus_out_r <= 0; - cmd_start <= 0; is_get_data <= 0; + + if (start_delay != 0) begin + start_delay <= start_delay - CTR_WIDTH'(1); + end + + if (stop_delay != 0) begin + stop_delay <= stop_delay - CTR_WIDTH'(1); + end + + cmd_start <= (start_delay == CTR_WIDTH'(1)); + cmd_stop <= (stop_delay == CTR_WIDTH'(1)); + case (ctrl_state) CTRL_STATE_IDLE: begin if (bus_in) begin - ser_tx_ctr <= DATA_IDX_WISTH'(TX_DATAW-1); + ser_tx_ctr <= SER_CTR_WIDTH'(TX_DATAW-1); ctrl_state <= CTRL_STATE_RECV; end end CTRL_STATE_RECV: begin - ser_tx_ctr <= ser_tx_ctr - DATA_IDX_WISTH'(1); + ser_tx_ctr <= ser_tx_ctr - SER_CTR_WIDTH'(1); ser_buf_in <= ser_buf_in_n; if (ser_tx_ctr == 0) begin // check if command is for this scope @@ -294,18 +289,22 @@ module VX_scope_tap #( ctrl_state <= CTRL_STATE_IDLE; case (cmd_type) CMD_SET_START: begin - start_delay <= 64'(cmd_data); - cmd_start <= 1; + start_delay <= CTR_WIDTH'(cmd_data); + cmd_start <= (cmd_data == 0); end CMD_SET_STOP: begin - waddr_end <= ADDRW'(cmd_data); + stop_delay <= CTR_WIDTH'(cmd_data); + cmd_stop <= (cmd_data == 0); + end + CMD_SET_DEPTH: begin + waddr_end <= SIZEW'(cmd_data); end CMD_GET_WIDTH, CMD_GET_START, CMD_GET_COUNT, CMD_GET_DATA: begin - send_type <= SEND_TYPE_BITS'(cmd_type); - ser_tx_ctr <= DATA_IDX_WISTH'(TX_DATAW-1); + send_type <= SEND_TYPE_BITS'(cmd_type); + ser_tx_ctr <= SER_CTR_WIDTH'(TX_DATAW-1); ctrl_state <= CTRL_STATE_SEND; bus_out_r <= 1; end @@ -345,12 +344,9 @@ module VX_scope_tap #( is_get_data <= 1; if (ser_tx_ctr == 0) begin if (is_read_data) begin - if (data_block_idx == BLOCK_IDX_WISTH'(DATA_BLOCKS-1)) begin - raddr <= raddr_n; + if (data_block_idx == BLOCK_IDX_WIDTH'(DATA_BLOCKS-1)) begin + raddr <= raddr + ADDRW'(1); is_read_data <= 0; // switch to delta mode - if (raddr_n == waddr) begin - raddr <= 0; // end-of-samples reset - end end end else begin is_read_data <= 1; // switch to data mode @@ -368,7 +364,7 @@ module VX_scope_tap #( end default:; endcase - ser_tx_ctr <= ser_tx_ctr - DATA_IDX_WISTH'(1); + ser_tx_ctr <= ser_tx_ctr - SER_CTR_WIDTH'(1); if (ser_tx_ctr == 0) begin ctrl_state <= CTRL_STATE_IDLE; end @@ -378,12 +374,12 @@ module VX_scope_tap #( end end - wire [BLOCK_IDX_WISTH-1:0] data_block_idx_r; - wire [DATA_IDX_WISTH-1:0] ser_tx_ctr_r; + wire [BLOCK_IDX_WIDTH-1:0] data_block_idx_r; + wire [SER_CTR_WIDTH-1:0] ser_tx_ctr_r; wire is_read_data_r; VX_pipe_register #( - .DATAW (1 + DATA_IDX_WISTH + BLOCK_IDX_WISTH) + .DATAW (1 + SER_CTR_WIDTH + BLOCK_IDX_WIDTH) ) data_sel_buf ( .clk (clk), .reset (reset), diff --git a/runtime/common/scope.cpp b/runtime/common/scope.cpp index 820fa54f5..361a327ca 100644 --- a/runtime/common/scope.cpp +++ b/runtime/common/scope.cpp @@ -32,6 +32,8 @@ #define TIMEOUT_TIME (60*60) +#define MAX_DELAY_CYCLES 10000 + #define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4) #define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4) @@ -41,6 +43,7 @@ #define CMD_GET_DATA 3 #define CMD_SET_START 4 #define CMD_SET_STOP 5 +#define CMD_SET_DEPTH 6 #define CHECK_ERR(_expr) \ do { \ @@ -96,7 +99,7 @@ static void dump_module(std::ofstream& ofs, auto itt = tails.find(name); if (itt != tails.end()) { for (auto& signal : itt->second->signals) { - ofs << indent << " $var reg " << signal.width << " " << signal.id << " " << signal.name << " $end" << std::endl; + ofs << indent << " $var wire " << signal.width << " " << signal.id << " " << signal.name << " $end" << std::endl; } } @@ -114,7 +117,7 @@ static void dump_header(std::ofstream& ofs, std::vector& taps) { ofs << "$version Generated by Vortex Scope Analyzer $end" << std::endl; ofs << "$timescale 1 ns $end" << std::endl; ofs << "$scope module TOP $end" << std::endl; - ofs << " $var reg 1 0 clk $end" << std::endl; + ofs << " $var wire 1 0 clk $end" << std::endl; std::unordered_map> hierarchy; std::unordered_set heads; @@ -160,6 +163,14 @@ static tap_t* find_earliest_tap(std::vector& taps) { } static uint64_t advance_clock(std::ofstream& ofs, uint64_t cur_time, uint64_t next_time) { + uint64_t delta = next_time - cur_time; + if (delta > MAX_DELAY_CYCLES) { + ofs << '#' << (cur_time * 2 + 0) << std::endl; + ofs << "bx 0" << std::endl; + ofs << '#' << (cur_time * 2 + 1) << std::endl; + ofs << "bx 0" << std::endl; + cur_time = next_time - MAX_DELAY_CYCLES; + } while (cur_time < next_time) { ofs << '#' << (cur_time * 2 + 0) << std::endl; ofs << "b0 0" << std::endl; @@ -350,7 +361,6 @@ int vx_scope_stop(vx_device_h hdevice) { uint64_t cmd_count = (tap.id << 3) | CMD_GET_COUNT; CHECK_ERR(g_callback.registerWrite(hdevice, cmd_count)); CHECK_ERR(g_callback.registerRead(hdevice, &count)); - if (count == 0) continue; @@ -385,7 +395,6 @@ int vx_scope_stop(vx_device_h hdevice) { uint64_t cur_time = 0; auto tap = find_earliest_tap(taps); if (tap != nullptr) { - cur_time = (tap->cycle_time > 0) ? (tap->cycle_time-1) : 0; do { // advance clock cur_time = advance_clock(ofs, cur_time, tap->cycle_time); diff --git a/runtime/opae/vortex.cpp b/runtime/opae/vortex.cpp index f06f34bea..a7f77ee97 100755 --- a/runtime/opae/vortex.cpp +++ b/runtime/opae/vortex.cpp @@ -195,7 +195,7 @@ public: return device->api_.fpgaReadMMIO64(device->fpga_, 0, MMIO_SCOPE_READ, value); }; - CHECK_ERR(vx_scope_start(&callback, this, 0, -1), { + CHECK_ERR(vx_scope_start(&callback, this, -1, -1), { api_.fpgaClose(fpga_); return err; }); diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index d542e72fe..ffc7870d4 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -265,7 +265,7 @@ public: *value = (((uint64_t)value_hi) << 32) | value_lo; return 0; }; - CHECK_ERR(vx_scope_start(&callback, this, 0, -1), { + CHECK_ERR(vx_scope_start(&callback, this, -1, -1), { return err; }); } From 27543e240edd5763df0c275db9f1bf19ef2380de Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 25 Sep 2024 19:11:40 -0700 Subject: [PATCH 312/488] minor update --- hw/rtl/afu/opae/vortex_afu.sv | 27 +++++++++------------------ hw/rtl/core/VX_fetch.sv | 15 +++++++-------- hw/rtl/core/VX_issue_slice.sv | 14 +++++--------- hw/rtl/core/VX_lsu_slice.sv | 10 +++++----- 4 files changed, 26 insertions(+), 40 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 38994c1c5..ff5ce4179 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -1012,11 +1012,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // SCOPE ////////////////////////////////////////////////////////////////////// `ifdef DBG_SCOPE_AFU - wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0]; - wire avs_read_fire = avs_read[0] && ~avs_waitrequest[0]; - wire vx_mem_req_fire = vx_mem_req_valid && vx_mem_req_ready; - wire vx_mem_rsp_fire = vx_mem_rsp_valid && vx_mem_rsp_ready; - reg [STATE_WIDTH-1:0] state_prev; always @(posedge clk) begin state_prev <= state; @@ -1028,12 +1023,15 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ `SCOPE_TAP (0, 0, { vx_reset, vx_busy, - vx_mem_req_fire, - vx_mem_rsp_fire, + vx_mem_req_valid, + vx_mem_req_ready, + vx_mem_rsp_valid, + vx_mem_rsp_ready, vx_dcr_wr_valid, state_changed, - avs_write_fire, - avs_read_fire, + avs_read[0], + avs_write[0], + avs_waitrequest[0], avs_waitrequest[0], avs_readdatavalid[0], cp2af_sRxPort.c0.mmioRdValid, @@ -1044,14 +1042,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ af2cp_sTxPort.c1.valid, cp2af_sRxPort.c0TxAlmFull, cp2af_sRxPort.c1TxAlmFull, - af2cp_sTxPort.c2.mmioRdValid, - cci_wr_req_fire, - cci_wr_rsp_fire, - cci_rd_req_fire, - cci_rd_rsp_fire, - cci_pending_reads_full, - cci_pending_writes_empty, - cci_pending_writes_full + af2cp_sTxPort.c2.mmioRdValid },{ cmd_type, state, @@ -1081,7 +1072,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ reset_negedge, 1'b0, 4096 ); `else - `SCOPE_IO_UNUSED(0) + `SCOPE_IO_UNUSED_W(0) `endif /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index baeb152f2..d96ef7abd 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -41,11 +41,7 @@ module VX_fetch import VX_gpu_pkg::*; #( wire [`UUID_WIDTH-1:0] rsp_uuid; wire [`NW_WIDTH-1:0] req_tag, rsp_tag; - wire schedule_fire = schedule_if.valid && schedule_if.ready; wire icache_req_fire = icache_req_valid && icache_req_ready; - wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready; - `UNUSED_VAR (schedule_fire) - `UNUSED_VAR (icache_rsp_fire) assign req_tag = schedule_if.data.wid; @@ -139,13 +135,16 @@ module VX_fetch import VX_gpu_pkg::*; #( `ifdef DBG_SCOPE_FETCH `SCOPE_IO_SWITCH (1); `NEG_EDGE (reset_negedge, reset); - `SCOPE_TAP_EX (0, 1, 3, ( + `SCOPE_TAP_EX (0, 1, 6, ( `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH + (ICACHE_WORD_SIZE * 8) + ICACHE_TAG_WIDTH ), { - schedule_fire, - icache_req_fire, - icache_rsp_fire + schedule_if.valid, + schedule_if.ready, + icache_bus_if.req_valid, + icache_bus_if.req_ready, + icache_bus_if.rsp_valid, + icache_bus_if.rsp_ready }, { schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index a496af8e3..583967cc8 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -36,11 +36,6 @@ module VX_issue_slice import VX_gpu_pkg::*; #( VX_scoreboard_if scoreboard_if(); VX_operands_if operands_if(); - wire operands_if_fire = operands_if.valid && operands_if.ready; - wire writeback_if_valid = writeback_if.valid; - `UNUSED_VAR (operands_if_fire) - `UNUSED_VAR (writeback_if_valid) - VX_ibuffer #( .INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID)) ) ibuffer ( @@ -97,13 +92,14 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef DBG_SCOPE_ISSUE `SCOPE_IO_SWITCH (1); `NEG_EDGE (reset_negedge, reset); - `SCOPE_TAP_EX (0, 2, 2, ( + `SCOPE_TAP_EX (0, 2, 3, ( `UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) + `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1 ), { - operands_if_fire, - writeback_if_valid + operands_if.valid, + operands_if.ready, + writeback_if.valid }, { operands_if.data.uuid, operands_if.data.tmask, @@ -138,7 +134,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin - if (operands_if_fire) begin + if (operands_if.valid && operands_if.ready) begin `TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})) trace_ex_type(1, operands_if.data.ex_type); `TRACE(1, (", op=")) diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 962bcd70c..4ca88c7b3 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -102,8 +102,6 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( wire mem_req_fire = mem_req_valid && mem_req_ready; wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready; - `UNUSED_VAR (mem_req_fire) - `UNUSED_VAR (mem_rsp_fire) wire mem_rsp_sop_pkt, mem_rsp_eop_pkt; wire no_rsp_buf_valid, no_rsp_buf_ready; @@ -538,11 +536,13 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `ifdef DBG_SCOPE_LSU `SCOPE_IO_SWITCH (1); `NEG_EDGE (reset_negedge, reset); - `SCOPE_TAP_EX (0, 3, 2, ( + `SCOPE_TAP_EX (0, 3, 4, ( 1 + NUM_LANES * (`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE * 8) + `UUID_WIDTH + NUM_LANES * LSU_WORD_SIZE * 8 + `UUID_WIDTH ), { - mem_req_fire, - mem_rsp_fire + mem_req_valid, + mem_req_ready, + mem_rsp_valid, + mem_rsp_ready }, { mem_req_rw, full_addr, From 9a3eb7405188ef6b391ec67aa0d5a563aee5d62f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 26 Sep 2024 09:50:38 -0700 Subject: [PATCH 313/488] adding scope.py support for structs --- hw/rtl/VX_scope.vh | 8 +-- hw/rtl/afu/opae/vortex_afu.sv | 1 - hw/scripts/scope.py | 117 ++++++++++++++++++++++++---------- 3 files changed, 88 insertions(+), 38 deletions(-) diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index b88a2718b..43ad91e85 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -38,12 +38,12 @@ VX_scope_switch #( \ .N (__count) \ ) scope_switch ( \ - .clk (clk), \ - .reset (scope_reset), \ - .req_in (scope_bus_in), \ + .clk (clk), \ + .reset (scope_reset), \ + .req_in (scope_bus_in), \ .rsp_out (scope_bus_out), \ .req_out (scope_bus_in_w), \ - .rsp_in (scope_bus_out_w) \ + .rsp_in (scope_bus_out_w) \ ) `define SCOPE_TAP_EX(__idx, __id, __triggers_w, __probes_w, __triggers, __probes, __start, __stop, __depth) \ diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index ff5ce4179..37afa9326 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -1032,7 +1032,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ avs_read[0], avs_write[0], avs_waitrequest[0], - avs_waitrequest[0], avs_readdatavalid[0], cp2af_sRxPort.c0.mmioRdValid, cp2af_sRxPort.c0.mmioWrValid, diff --git a/hw/scripts/scope.py b/hw/scripts/scope.py index 5361e8afe..931371643 100755 --- a/hw/scripts/scope.py +++ b/hw/scripts/scope.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 # Copyright © 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,9 +19,9 @@ import xml.etree.ElementTree as ET import re import json -vl_int_re = re.compile(r"\d+'s*h([\da-fA-F]+)") +vl_int_re = re.compile(r"\d+'s*h([\da-fA-F]+)") -def parse_vl_int(text): +def parse_vl_int(text): str_hex = re.sub(vl_int_re, r'\1', text) return int(str_hex, 16) @@ -34,15 +34,17 @@ def source_loc(xml_doc, xml_loc): end_col = loc[4] file = xml_doc.find(".//file/[@id='" + file_id + "']").get("filename") return file + " (" + start_line + ":" + start_col + "-" + end_line + ":" + end_col + ")" - + def parse_dtype_width(xml_doc, dtype_id): xml_type = xml_doc.find(".//typetable/*[@id='" + dtype_id + "']") - if xml_type.tag == "packarraydtype" or xml_type.tag == "unpackarraydtype": + if xml_type.tag in ["packarraydtype", "unpackarraydtype"]: sub_dtype_id = xml_type.get("sub_dtype_id") base_width = parse_dtype_width(xml_doc, sub_dtype_id) - const = xml_type.iter("const") - left = parse_vl_int(next(const).get("name")) - right = parse_vl_int(next(const).get("name")) + const_iter = xml_type.iter("const") + first_const = next(const_iter) + second_const = next(const_iter) + left = parse_vl_int(first_const.get("name")) + right = parse_vl_int(second_const.get("name")) return base_width * (left - right + 1) elif xml_type.tag == "structdtype": width = 0 @@ -65,31 +67,74 @@ def parse_dtype_width(xml_doc, dtype_id): if left != None and right != None: return int(left) - int(right) + 1 return 1 - + def parse_var_name(xml_doc, xml_node): if xml_node.tag == "varref": return xml_node.get("name") elif xml_node.tag == "varxref": name = xml_node.get("name") dotted = xml_node.get("dotted") - return dotted + '.' + name + return f"{dotted}.{name}" else: raise ET.ParseError("invalid probe entry" + source_loc(xml_doc, xml_node.get("loc"))) return name -def parse_sel_name(xml_doc, xml_node): - name = parse_var_name(xml_doc, xml_node.find("*")) - const = xml_node.iter("const") - offset = parse_vl_int(next(const).get("name")) - #size = parse_vl_int(next(const).get("name")) - return name + '_' + str(offset) +def parse_sel_field(xml_doc, dtype_id, offset, width): + xml_type = xml_doc.find(".//typetable/*[@id='" + dtype_id + "']") + name = xml_type.get("name") + if xml_type.tag == "structdtype": + bit_offset = 0 + members = list(xml_type.findall("memberdtype")) + members.reverse() + for member in members: + sub_dtype_id = member.get("sub_dtype_id") + member_name = member.get("name") + member_width = parse_dtype_width(xml_doc, sub_dtype_id) + if bit_offset <= offset < bit_offset + member_width: + if sub_dtype_id: + sub_field = parse_sel_field(xml_doc, sub_dtype_id, offset - bit_offset, width) + return f".{member_name}{sub_field}" + else: + return f".{member_name}" + bit_offset += member_width + raise ET.ParseError("invalid probe entry: " + source_loc(xml_doc, xml_type.get("loc"))) + elif xml_type.tag in ["packarraydtype", "unpackarraydtype"]: + sub_dtype_id = xml_type.get("sub_dtype_id") + base_width = parse_dtype_width(xml_doc, sub_dtype_id) + if width > base_width: + return "" + array_index = offset // base_width + sub_offset = offset % base_width + array_sel_name = f"[{array_index}]" + sub_field = parse_sel_field(xml_doc, sub_dtype_id, sub_offset, width) + return f"{array_sel_name}{sub_field}" + elif xml_type.tag == "basicdtype": + if (offset == 0): + return "" + return f"_{offset}" + else: + raise ET.ParseError("invalid probe entry: " + source_loc(xml_doc, xml_type.get("loc"))) + return None -def parse_array_name(xml_doc, xml_node): +def parse_sel_name(xml_doc, xml_node): + first_child = xml_node.find("*") + name = parse_var_name(xml_doc, first_child) + dtype_id = first_child.get("dtype_id") + const_iter = xml_node.iter("const") + first_const = next(const_iter) + second_const = next(const_iter) + offset = parse_vl_int(first_const.get("name")) + width = parse_vl_int(second_const.get("name")) + return name + parse_sel_field(xml_doc, dtype_id, offset, width) + +def parse_arraysel_name(xml_doc, xml_node): if xml_node.tag == "arraysel": - name = parse_array_name(xml_doc, xml_node.find("*")) - xml_size = xml_node.find("const").get("name") - array_size = parse_vl_int(xml_size) - name = name + '_' + str(array_size) + first_child = xml_node.find("*") + name = parse_arraysel_name(xml_doc, first_child) + const_iter = xml_node.iter("const") + first_const = next(const_iter) + offset = parse_vl_int(first_const.get("name")) + name = f"{name}[{offset}]" else: name = parse_var_name(xml_doc, xml_node) return name @@ -97,9 +142,10 @@ def parse_array_name(xml_doc, xml_node): def parse_vl_port(xml_doc, xml_node, signals): total_width = 0 if xml_node.tag == "concat": - for xml_child in xml_node.findall("*"): + child_nodes = xml_node.findall("*") + for xml_child in child_nodes: total_width = total_width + parse_vl_port(xml_doc, xml_child, signals) - elif xml_node.tag == "varref" or xml_node.tag == "varxref": + elif xml_node.tag in ["varref", "varxref"]: name = parse_var_name(xml_doc, xml_node) dtype_id = xml_node.get("dtype_id") signal_width = parse_dtype_width(xml_doc, dtype_id) @@ -112,20 +158,25 @@ def parse_vl_port(xml_doc, xml_node, signals): signals.append([name, signal_width]) total_width = total_width + signal_width elif xml_node.tag == "arraysel": - name = parse_array_name(xml_doc, xml_node) + name = parse_arraysel_name(xml_doc, xml_node) dtype_id = xml_node.get("dtype_id") signal_width = parse_dtype_width(xml_doc, dtype_id) signals.append([name, signal_width]) total_width = total_width + signal_width else: raise ET.ParseError("invalid probe entry: " + source_loc(xml_doc, xml_node.get("loc"))) + # Check for duplicate signal names + signal_names = [signal[0] for signal in signals] + duplicates = set([name for name in signal_names if signal_names.count(name) > 1]) + if len(duplicates) > 0: + raise ET.ParseError("duplicate signal names: " + ", ".join(duplicates)) return total_width def parse_xml(filename, max_taps): xml_doc = ET.parse(filename) modules = {} xml_modules = xml_doc.findall(".//module/[@origName='VX_scope_tap']") - for xml_module in xml_modules: + for xml_module in xml_modules: scope_id = parse_vl_int(xml_module.find(".//var/[@name='SCOPE_ID']/const").get("name")) triggerw = parse_vl_int(xml_module.find(".//var/[@name='TRIGGERW']/const").get("name")) probew = parse_vl_int(xml_module.find(".//var/[@name='PROBEW']/const").get("name")) @@ -133,16 +184,16 @@ def parse_xml(filename, max_taps): modules[module_name] = [scope_id, triggerw, probew] taps = [] - xml_instances = xml_doc.iter("instance") - for xml_instance in xml_instances: + xml_instances = xml_doc.iter("instance") + for xml_instance in xml_instances: if (max_taps != -1 and len(taps) >= max_taps): - break + break defName = xml_instance.get("defName") module = modules.get(defName) if module is None: continue triggers = [] - probes = [] + probes = [] w = parse_vl_port(xml_doc, xml_instance.find(".//port/[@name='triggers']/*"), triggers) if w != module[1]: raise ET.ParseError("invalid triggers width: actual=" + str(w) + ", expected=" + str(module[1])) @@ -157,19 +208,19 @@ def parse_xml(filename, max_taps): path = hier.rsplit(".", 1)[0] taps.append({"id":module[0], "width":module[1] + module[2], - "signals":signals, + "signals":signals, "path":path}) return {"version":"0.1.0", "taps":taps} -def main(): +def main(): parser = argparse.ArgumentParser(description='Scope headers generator.') parser.add_argument('-o', nargs='?', default='scope.json', metavar='o', help='Output JSON manifest') parser.add_argument('-n', nargs='?', default=-1, metavar='n', type=int, help='Maximum number of taps to read') parser.add_argument('xml', help='Design XML descriptor file') args = parser.parse_args() #print("args=", args) - scope_taps = parse_xml(args.xml, args.n) + scope_taps = parse_xml(args.xml, args.n) with open(args.o, "w") as f: json.dump(scope_taps, f, ensure_ascii=False, indent=4) From 5db1937a5efa45fc71a6a413ed8bbd1b72914aa6 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 07:52:38 -0700 Subject: [PATCH 314/488] fixed scope parser array indexing --- hw/scripts/scope.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/hw/scripts/scope.py b/hw/scripts/scope.py index 931371643..db5fda1a9 100755 --- a/hw/scripts/scope.py +++ b/hw/scripts/scope.py @@ -33,7 +33,7 @@ def source_loc(xml_doc, xml_loc): end_line = loc[3] end_col = loc[4] file = xml_doc.find(".//file/[@id='" + file_id + "']").get("filename") - return file + " (" + start_line + ":" + start_col + "-" + end_line + ":" + end_col + ")" + return f"{file} ({start_line}:{start_col}-{end_line}:{end_col})" def parse_dtype_width(xml_doc, dtype_id): xml_type = xml_doc.find(".//typetable/*[@id='" + dtype_id + "']") @@ -75,6 +75,8 @@ def parse_var_name(xml_doc, xml_node): name = xml_node.get("name") dotted = xml_node.get("dotted") return f"{dotted}.{name}" + elif xml_node.tag == "arraysel": + return parse_arraysel_name(xml_doc, xml_node) else: raise ET.ParseError("invalid probe entry" + source_loc(xml_doc, xml_node.get("loc"))) return name @@ -91,7 +93,7 @@ def parse_sel_field(xml_doc, dtype_id, offset, width): member_name = member.get("name") member_width = parse_dtype_width(xml_doc, sub_dtype_id) if bit_offset <= offset < bit_offset + member_width: - if sub_dtype_id: + if width != member_width and sub_dtype_id: sub_field = parse_sel_field(xml_doc, sub_dtype_id, offset - bit_offset, width) return f".{member_name}{sub_field}" else: @@ -105,13 +107,14 @@ def parse_sel_field(xml_doc, dtype_id, offset, width): return "" array_index = offset // base_width sub_offset = offset % base_width - array_sel_name = f"[{array_index}]" + array_sel_name = f"_{array_index}" # array indexing is not supported in VCD sub_field = parse_sel_field(xml_doc, sub_dtype_id, sub_offset, width) return f"{array_sel_name}{sub_field}" elif xml_type.tag == "basicdtype": - if (offset == 0): - return "" - return f"_{offset}" + if width == 1: + return F"[{offset}]" + end = width - 1 + offset + return F"[{end}:{offset}]" else: raise ET.ParseError("invalid probe entry: " + source_loc(xml_doc, xml_type.get("loc"))) return None @@ -134,7 +137,7 @@ def parse_arraysel_name(xml_doc, xml_node): const_iter = xml_node.iter("const") first_const = next(const_iter) offset = parse_vl_int(first_const.get("name")) - name = f"{name}[{offset}]" + name = f"{name}_{offset}" # array indexing is not supported in VCD else: name = parse_var_name(xml_doc, xml_node) return name From e9f19a0bf9fbfdcf16d2f5a2f3235c858346b412 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 09:13:24 -0700 Subject: [PATCH 315/488] fixed BRAM multi-dimensional array bug on Xilinx Vivado --- hw/rtl/libs/VX_mem_scheduler.sv | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 913656bf8..ef41a89f0 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -460,14 +460,15 @@ module VX_mem_scheduler #( end else begin : g_rsp_full reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; - reg [CORE_BATCHES-1:00][CORE_CHANNELS-1:0][WORD_WIDTH-1:0] rsp_store_n; + reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n; // use flattened array for BRAM synthesis compatibility reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; always @(*) begin rsp_store_n = rsp_store[ibuf_raddr]; for (integer i = 0; i < CORE_CHANNELS; ++i) begin if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin - rsp_store_n[rsp_batch_idx][i] = mem_rsp_data_s[i]; + integer k = (rsp_batch_idx * CORE_CHANNELS * WORD_WIDTH) + (i * WORD_WIDTH); + rsp_store_n[k +: WORD_WIDTH] = mem_rsp_data_s[i]; end end end @@ -488,7 +489,8 @@ module VX_mem_scheduler #( for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data localparam i = r / CORE_CHANNELS; localparam j = r % CORE_CHANNELS; - assign crsp_data[r] = rsp_store_n[i][j]; + localparam k = (i * CORE_CHANNELS * WORD_WIDTH) + (j * WORD_WIDTH); + assign crsp_data[r] = rsp_store_n[k +: WORD_WIDTH]; end assign mem_rsp_ready_s = crsp_ready || ~rsp_complete; From 533ddffc476caef4673b99bd77b5d42d4f8d546f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 09:48:05 -0700 Subject: [PATCH 316/488] cleanup multi-dimensional array to improve synthesis compatibility --- hw/rtl/core/VX_operands.sv | 2 +- hw/rtl/fpu/VX_fpu_fpnew.sv | 2 +- hw/rtl/libs/VX_mem_coalescer.sv | 20 ++++++++++++-------- hw/rtl/libs/VX_mem_scheduler.sv | 4 ++-- hw/rtl/libs/VX_pe_serializer.sv | 2 +- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index f30681263..42a91e4c2 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -69,7 +69,7 @@ module VX_operands import VX_gpu_pkg::*; #( wire pipe_valid_st2, pipe_ready_st2; wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2; - reg [NUM_SRC_OPDS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st2, src_data_m_st2; + reg [NUM_SRC_OPDS-1:0][(`NUM_THREADS * `XLEN)-1:0] src_data_st2, src_data_m_st2; reg [NUM_SRC_OPDS-1:0] data_fetched_st1; diff --git a/hw/rtl/fpu/VX_fpu_fpnew.sv b/hw/rtl/fpu/VX_fpu_fpnew.sv index 15a6c8d52..596a86513 100644 --- a/hw/rtl/fpu/VX_fpu_fpnew.sv +++ b/hw/rtl/fpu/VX_fpu_fpnew.sv @@ -90,7 +90,7 @@ module VX_fpu_fpnew reg [TAG_WIDTH-1:0] fpu_tag_in, fpu_tag_out; - reg [2:0][NUM_LANES-1:0][`XLEN-1:0] fpu_operands; + logic [2:0][NUM_LANES-1:0][`XLEN-1:0] fpu_operands; wire [NUM_LANES-1:0][`XLEN-1:0] fpu_result; fpnew_pkg::status_t fpu_status; diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 55cad2df7..c27f04da4 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -154,23 +154,27 @@ module VX_mem_coalescer #( wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches_r; - reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged; - reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] req_data_merged; + wire [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged; + wire [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] req_data_merged; - always @(*) begin - req_byteen_merged = '0; - req_data_merged = 'x; - for (integer i = 0; i < OUT_REQS; ++i) begin + for (genvar i = 0; i < OUT_REQS; ++i) begin : g_data_merged + reg [DATA_RATIO-1:0][DATA_IN_SIZE-1:0] byteen_merged; + reg [DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] data_merged; + always @(*) begin + byteen_merged = '0; + data_merged = 'x; for (integer j = 0; j < DATA_RATIO; ++j) begin for (integer k = 0; k < DATA_IN_SIZE; ++k) begin // perform byte-level merge since each thread may have different bytes enabled if (current_pmask[i * DATA_RATIO + j] && in_req_byteen[DATA_RATIO * i + j][k]) begin - req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1; - req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8]; + byteen_merged[in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1; + data_merged[in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8]; end end end end + assign req_byteen_merged[i] = byteen_merged; + assign req_data_merged[i] = data_merged; end wire is_last_batch = ~(| (in_req_mask & ~addr_matches_r & req_rem_mask_r)); diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index ef41a89f0..3d6884f1a 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -459,8 +459,8 @@ module VX_mem_scheduler #( end else begin : g_rsp_full - reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; - reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n; // use flattened array for BRAM synthesis compatibility + reg [(CORE_BATCHES * CORE_CHANNELS * WORD_WIDTH)-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; + reg [(CORE_BATCHES * CORE_CHANNELS * WORD_WIDTH)-1:0] rsp_store_n; // use flattened array for BRAM synthesis compatibility reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; always @(*) begin diff --git a/hw/rtl/libs/VX_pe_serializer.sv b/hw/rtl/libs/VX_pe_serializer.sv index 58fced410..4a66a6399 100644 --- a/hw/rtl/libs/VX_pe_serializer.sv +++ b/hw/rtl/libs/VX_pe_serializer.sv @@ -105,7 +105,7 @@ module VX_pe_serializer #( end end - reg [BATCH_SIZE-1:0][NUM_PES-1:0][DATA_OUT_WIDTH-1:0] data_out_r, data_out_n; + reg [BATCH_SIZE-1:0][(NUM_PES * DATA_OUT_WIDTH)-1:0] data_out_r, data_out_n; always @(*) begin data_out_n = data_out_r; From f2c970868e9788cddf789d92f389cfb4ae24955e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 10:02:59 -0700 Subject: [PATCH 317/488] minor update --- hw/rtl/libs/VX_mem_scheduler.sv | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 3d6884f1a..73647911a 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -459,16 +459,16 @@ module VX_mem_scheduler #( end else begin : g_rsp_full + // use flattened arrays for BRAM synthesis compatibility reg [(CORE_BATCHES * CORE_CHANNELS * WORD_WIDTH)-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; - reg [(CORE_BATCHES * CORE_CHANNELS * WORD_WIDTH)-1:0] rsp_store_n; // use flattened array for BRAM synthesis compatibility + reg [(CORE_BATCHES * CORE_CHANNELS)-1:0][WORD_WIDTH-1:0] rsp_store_n; reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; always @(*) begin rsp_store_n = rsp_store[ibuf_raddr]; for (integer i = 0; i < CORE_CHANNELS; ++i) begin if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin - integer k = (rsp_batch_idx * CORE_CHANNELS * WORD_WIDTH) + (i * WORD_WIDTH); - rsp_store_n[k +: WORD_WIDTH] = mem_rsp_data_s[i]; + rsp_store_n[rsp_batch_idx * CORE_CHANNELS + i] = mem_rsp_data_s[i]; end end end @@ -489,8 +489,7 @@ module VX_mem_scheduler #( for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data localparam i = r / CORE_CHANNELS; localparam j = r % CORE_CHANNELS; - localparam k = (i * CORE_CHANNELS * WORD_WIDTH) + (j * WORD_WIDTH); - assign crsp_data[r] = rsp_store_n[k +: WORD_WIDTH]; + assign crsp_data[r] = rsp_store_n[i * CORE_CHANNELS + j]; end assign mem_rsp_ready_s = crsp_ready || ~rsp_complete; From 6e401620279ca89863199df85d777c3c5487932d Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 11:36:31 -0700 Subject: [PATCH 318/488] extending scope triggering to capture continous firing events --- hw/rtl/VX_scope.vh | 16 ++++++++------- hw/rtl/afu/opae/vortex_afu.sv | 28 +++++++++++++++++--------- hw/rtl/afu/xrt/VX_afu_wrap.sv | 12 +++++++++++- hw/rtl/core/VX_fetch.sv | 9 ++++++++- hw/rtl/core/VX_issue_slice.sv | 9 ++++++--- hw/rtl/core/VX_lsu_slice.sv | 5 ++++- hw/rtl/libs/VX_scope_tap.sv | 35 +++++++++++++++++++++------------ hw/scripts/scope.py | 37 ++++++++++++++++++++++++----------- 8 files changed, 105 insertions(+), 46 deletions(-) diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index 43ad91e85..b3d427ede 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -46,10 +46,11 @@ .rsp_in (scope_bus_out_w) \ ) -`define SCOPE_TAP_EX(__idx, __id, __triggers_w, __probes_w, __triggers, __probes, __start, __stop, __depth) \ +`define SCOPE_TAP_EX(__idx, __id, __xtriggers_w, __htriggers_w, __probes_w, __xtriggers, __htriggers, __probes, __start, __stop, __depth) \ VX_scope_tap #( \ .SCOPE_ID (__id), \ - .TRIGGERW (__triggers_w), \ + .XTRIGGERW(__xtriggers_w), \ + .HTRIGGERW(__htriggers_w), \ .PROBEW (__probes_w), \ .DEPTH (__depth) \ ) scope_tap_``idx ( \ @@ -57,14 +58,15 @@ .reset (scope_reset_w[__idx]), \ .start (__start), \ .stop (__stop), \ - .triggers(__triggers), \ + .xtriggers(__xtriggers), \ + .htriggers(__htriggers), \ .probes (__probes), \ .bus_in (scope_bus_in_w[__idx]), \ .bus_out(scope_bus_out_w[__idx]) \ ) -`define SCOPE_TAP(__idx, __id, __triggers, __probes, __start, __stop, __depth) \ - `SCOPE_TAP_EX(__idx, __id, $bits(__triggers), $bits(__probes), __triggers, __probes, __start, __stop, __depth) +`define SCOPE_TAP(__idx, __id, __xtriggers, __htriggers, __probes, __start, __stop, __depth) \ + `SCOPE_TAP_EX(__idx, __id, $bits(__xtriggers), $bits(__htriggers), $bits(__probes), __xtriggers, __htriggers, __probes, __start, __stop, __depth) `else @@ -76,9 +78,9 @@ `define SCOPE_IO_SWITCH(__count) -`define SCOPE_TAP(__idx, __id, __triggers, __probes, __depth) +`define SCOPE_TAP(__idx, __id, __xtriggers, __probes, __depth) -`define SCOPE_TAP_EX(__idx, __id, __triggers_w, __probes_w, __triggers, __probes, __depth) +`define SCOPE_TAP_EX(__idx, __id, __xtriggers_w, __probes_w, __xtriggers, __probes, __depth) `endif diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 37afa9326..b872efa97 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -1016,10 +1016,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ always @(posedge clk) begin state_prev <= state; end - wire state_changed = (state != state_prev); + wire state_changed = (state != state_prev); + wire vx_mem_req_fire = vx_mem_req_valid && vx_mem_req_ready; + wire vx_mem_rsp_fire = vx_mem_rsp_valid && vx_mem_rsp_ready; + wire avs_req_fire = (avs_write[0] || avs_read[0]) && ~avs_waitrequest[0]; `NEG_EDGE (reset_negedge, reset); - `SCOPE_TAP (0, 0, { vx_reset, vx_busy, @@ -1027,21 +1029,29 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ vx_mem_req_ready, vx_mem_rsp_valid, vx_mem_rsp_ready, - vx_dcr_wr_valid, - state_changed, avs_read[0], avs_write[0], avs_waitrequest[0], - avs_readdatavalid[0], - cp2af_sRxPort.c0.mmioRdValid, - cp2af_sRxPort.c0.mmioWrValid, cp2af_sRxPort.c0.rspValid, cp2af_sRxPort.c1.rspValid, af2cp_sTxPort.c0.valid, af2cp_sTxPort.c1.valid, cp2af_sRxPort.c0TxAlmFull, - cp2af_sRxPort.c1TxAlmFull, - af2cp_sTxPort.c2.mmioRdValid + cp2af_sRxPort.c1TxAlmFull + },{ + state_changed, + vx_dcr_wr_valid, // ack-free + avs_readdatavalid[0], // ack-free + cp2af_sRxPort.c0.mmioRdValid, // ack-free + cp2af_sRxPort.c0.mmioWrValid, // ack-free + af2cp_sTxPort.c2.mmioRdValid, // ack-free + cp2af_sRxPort.c0.rspValid, // ack-free + cp2af_sRxPort.c1.rspValid, // ack-free + cci_rd_req_fire, + cci_wr_req_fire, + avs_req_fire, + vx_mem_req_fire, + vx_mem_rsp_fire },{ cmd_type, state, diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 73da63e58..c4ff50d45 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -309,6 +309,11 @@ module VX_afu_wrap #( `ifdef SCOPE `ifdef DBG_SCOPE_AFU + wire m_axi_mem_awfire_0 = m_axi_mem_awvalid_a[0] & m_axi_mem_awready_a[0]; + wire m_axi_mem_arfire_0 = m_axi_mem_arvalid_a[0] & m_axi_mem_arready_a[0]; + wire m_axi_mem_wfire_0 = m_axi_mem_wvalid_a[0] & m_axi_mem_wready_a[0]; + wire m_axi_mem_bfire_0 = m_axi_mem_bvalid_a[0] & m_axi_mem_bready_a[0]; + `NEG_EDGE (reset_negedge, reset); `SCOPE_TAP (0, 0, { ap_reset, @@ -318,7 +323,6 @@ module VX_afu_wrap #( interrupt, vx_reset, vx_busy, - dcr_wr_valid, m_axi_mem_awvalid_a[0], m_axi_mem_awready_a[0], m_axi_mem_wvalid_a[0], @@ -330,6 +334,12 @@ module VX_afu_wrap #( m_axi_mem_rvalid_a[0], m_axi_mem_rready_a[0] }, { + dcr_wr_valid, + m_axi_mem_awfire_0, + m_axi_mem_arfire_0, + m_axi_mem_wfire_0, + m_axi_mem_bfire_0 + },{ dcr_wr_addr, dcr_wr_data, vx_pending_writes, diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index d96ef7abd..35e106037 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -134,8 +134,11 @@ module VX_fetch import VX_gpu_pkg::*; #( `ifdef SCOPE `ifdef DBG_SCOPE_FETCH `SCOPE_IO_SWITCH (1); + wire schedule_fire = schedule_if.valid && schedule_if.ready; + wire icache_bus_req_fire = icache_bus_if.req_valid && icache_bus_if.req_ready; + wire icache_bus_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready; `NEG_EDGE (reset_negedge, reset); - `SCOPE_TAP_EX (0, 1, 6, ( + `SCOPE_TAP_EX (0, 1, 6, 3, ( `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH + (ICACHE_WORD_SIZE * 8) + ICACHE_TAG_WIDTH ), { @@ -146,6 +149,10 @@ module VX_fetch import VX_gpu_pkg::*; #( icache_bus_if.rsp_valid, icache_bus_if.rsp_ready }, { + schedule_fire, + icache_bus_req_fire, + icache_bus_rsp_fire + },{ schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 583967cc8..5032065d3 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -91,15 +91,18 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef SCOPE `ifdef DBG_SCOPE_ISSUE `SCOPE_IO_SWITCH (1); + wire operands_fire = operands_if.valid && operands_if.ready; `NEG_EDGE (reset_negedge, reset); - `SCOPE_TAP_EX (0, 2, 3, ( + `SCOPE_TAP_EX (0, 2, 2, 2, ( `UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) + `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1 ), { operands_if.valid, - operands_if.ready, - writeback_if.valid + operands_if.ready + }, { + operands_fire, + writeback_if.valid // ack-free }, { operands_if.data.uuid, operands_if.data.tmask, diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 4ca88c7b3..0f947af78 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -536,13 +536,16 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `ifdef DBG_SCOPE_LSU `SCOPE_IO_SWITCH (1); `NEG_EDGE (reset_negedge, reset); - `SCOPE_TAP_EX (0, 3, 4, ( + `SCOPE_TAP_EX (0, 3, 4, 2, ( 1 + NUM_LANES * (`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE * 8) + `UUID_WIDTH + NUM_LANES * LSU_WORD_SIZE * 8 + `UUID_WIDTH ), { mem_req_valid, mem_req_ready, mem_rsp_valid, mem_rsp_ready + }, { + mem_req_fire, + mem_rsp_fire }, { mem_req_rw, full_addr, diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index 8b6eee65e..d3c42c5b5 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -17,9 +17,10 @@ module VX_scope_tap #( parameter SCOPE_ID = 0, // scope identifier parameter SCOPE_IDW = 8, // scope identifier width - parameter TRIGGERW = 32, // trigger signals width - parameter PROBEW = 4999, // probe signal width - parameter DEPTH = 8192, // trace buffer depth + parameter XTRIGGERW = 0, // changed trigger signals width + parameter HTRIGGERW = 0, // high trigger signals width + parameter PROBEW = 1, // probe signal width + parameter DEPTH = 256, // trace buffer depth parameter IDLE_CTRW = 32, // idle time between triggers counter width parameter TX_DATAW = 64 // transfer data width ) ( @@ -27,14 +28,15 @@ module VX_scope_tap #( input wire reset, input wire start, input wire stop, - input wire [`UP(TRIGGERW)-1:0] triggers, + input wire [`UP(XTRIGGERW)-1:0] xtriggers, + input wire [`UP(HTRIGGERW)-1:0] htriggers, input wire [PROBEW-1:0] probes, input wire bus_in, output wire bus_out ); localparam CTR_WIDTH = 64; localparam SER_CTR_WIDTH = `LOG2UP(TX_DATAW); - localparam DATAW = PROBEW + TRIGGERW; + localparam DATAW = PROBEW + XTRIGGERW + HTRIGGERW; localparam ADDRW = `CLOG2(DEPTH); localparam SIZEW = `CLOG2(DEPTH+1); localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; @@ -76,7 +78,7 @@ module VX_scope_tap #( reg [CTR_WIDTH-1:0] timestamp, start_time; reg [CTR_WIDTH-1:0] start_delay, stop_delay; - reg [`UP(TRIGGERW)-1:0] prev_trig; + reg [`UP(XTRIGGERW)-1:0] prev_xtrig; reg [IDLE_CTRW-1:0] delta; reg cmd_start, cmd_stop; reg dflush; @@ -93,9 +95,16 @@ module VX_scope_tap #( // trace capture // - if (TRIGGERW != 0) begin : g_delta_store - assign data_in = {probes, triggers}; - assign write_en = (tap_state == TAP_STATE_RUN) && (dflush || (triggers != prev_trig)); + if (XTRIGGERW != 0 || HTRIGGERW != 0) begin : g_delta_store + if (XTRIGGERW != 0 && HTRIGGERW != 0) begin : g_data_in_pxh + assign data_in = {probes, xtriggers, htriggers}; + end else if (XTRIGGERW != 0) begin : g_data_in_px + assign data_in = {probes, xtriggers}; + end else begin : g_data_in_ph + assign data_in = {probes, htriggers}; + end + wire has_triggered = (xtriggers != prev_xtrig) || (htriggers != 0); + assign write_en = (tap_state == TAP_STATE_RUN) && (has_triggered || dflush); VX_dp_ram #( .DATAW (IDLE_CTRW), .SIZE (DEPTH), @@ -150,7 +159,7 @@ module VX_scope_tap #( tap_state <= TAP_STATE_IDLE; delta <= '0; dflush <= 0; - prev_trig <= '0; + prev_xtrig <= '0; waddr <= '0; end else begin case (tap_state) @@ -167,15 +176,15 @@ module VX_scope_tap #( TAP_STATE_RUN: begin dflush <= 0; if (!(stop || cmd_stop) && (waddr < waddr_end)) begin - if (TRIGGERW != 0) begin - if (dflush || (triggers != prev_trig)) begin + if (XTRIGGERW != 0) begin + if (dflush || (xtriggers != prev_xtrig)) begin waddr <= waddr + SIZEW'(1); delta <= '0; end else begin delta <= delta + IDLE_CTRW'(1); dflush <= (delta == IDLE_CTRW'(MAX_IDLE_CTR-1)); end - prev_trig <= triggers; + prev_xtrig <= xtriggers; end else begin waddr <= waddr + SIZEW'(1); end diff --git a/hw/scripts/scope.py b/hw/scripts/scope.py index db5fda1a9..9503fd757 100755 --- a/hw/scripts/scope.py +++ b/hw/scripts/scope.py @@ -181,10 +181,11 @@ def parse_xml(filename, max_taps): xml_modules = xml_doc.findall(".//module/[@origName='VX_scope_tap']") for xml_module in xml_modules: scope_id = parse_vl_int(xml_module.find(".//var/[@name='SCOPE_ID']/const").get("name")) - triggerw = parse_vl_int(xml_module.find(".//var/[@name='TRIGGERW']/const").get("name")) + xtriggerw = parse_vl_int(xml_module.find(".//var/[@name='XTRIGGERW']/const").get("name")) + htriggerw = parse_vl_int(xml_module.find(".//var/[@name='HTRIGGERW']/const").get("name")) probew = parse_vl_int(xml_module.find(".//var/[@name='PROBEW']/const").get("name")) module_name = xml_module.get("name") - modules[module_name] = [scope_id, triggerw, probew] + modules[module_name] = [scope_id, xtriggerw, htriggerw, probew] taps = [] xml_instances = xml_doc.iter("instance") @@ -195,22 +196,36 @@ def parse_xml(filename, max_taps): module = modules.get(defName) if module is None: continue - triggers = [] + + xtriggers = [] + htriggers = [] probes = [] - w = parse_vl_port(xml_doc, xml_instance.find(".//port/[@name='triggers']/*"), triggers) - if w != module[1]: - raise ET.ParseError("invalid triggers width: actual=" + str(w) + ", expected=" + str(module[1])) + + if module[1] > 0: + w = parse_vl_port(xml_doc, xml_instance.find(".//port/[@name='xtriggers']/*"), xtriggers) + if w != module[1]: + raise ET.ParseError("invalid xtriggers width: actual=" + str(w) + ", expected=" + str(module[1])) + + if module[2] > 0: + w = parse_vl_port(xml_doc, xml_instance.find(".//port/[@name='htriggers']/*"), htriggers) + if w != module[2]: + raise ET.ParseError("invalid htriggers width: actual=" + str(w) + ", expected=" + str(module[2])) + w = parse_vl_port(xml_doc, xml_instance.find(".//port/[@name='probes']/*"), probes) - if w != module[2]: - raise ET.ParseError("invalid probes width: actual=" + str(w) + ", expected=" + str(module[2])) + if w != module[3]: + raise ET.ParseError("invalid probes width: actual=" + str(w) + ", expected=" + str(module[3])) + signals = probes - for trigger in triggers: - signals.append(trigger) + for xtrigger in xtriggers: + signals.append(xtrigger) + for htrigger in htriggers: + signals.append(htrigger) + loc = xml_instance.get("loc") hier = xml_doc.find(".//cell/[@loc='" + loc + "']").get("hier") path = hier.rsplit(".", 1)[0] taps.append({"id":module[0], - "width":module[1] + module[2], + "width":module[1] + module[2] + module[3], "signals":signals, "path":path}) From ec8cc4c84ddf12f96375c8f4d5299b82f6c24a9a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 14:21:09 -0700 Subject: [PATCH 319/488] minor update --- hw/rtl/libs/VX_scope_tap.sv | 61 ++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index d3c42c5b5..c4bf918ff 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -34,6 +34,7 @@ module VX_scope_tap #( input wire bus_in, output wire bus_out ); + localparam HAS_TRIGGERS = XTRIGGERW != 0 || HTRIGGERW != 0; localparam CTR_WIDTH = 64; localparam SER_CTR_WIDTH = `LOG2UP(TX_DATAW); localparam DATAW = PROBEW + XTRIGGERW + HTRIGGERW; @@ -95,7 +96,9 @@ module VX_scope_tap #( // trace capture // - if (XTRIGGERW != 0 || HTRIGGERW != 0) begin : g_delta_store + wire do_capture; + + if (HAS_TRIGGERS) begin : g_delta_store if (XTRIGGERW != 0 && HTRIGGERW != 0) begin : g_data_in_pxh assign data_in = {probes, xtriggers, htriggers}; end else if (XTRIGGERW != 0) begin : g_data_in_px @@ -103,8 +106,9 @@ module VX_scope_tap #( end else begin : g_data_in_ph assign data_in = {probes, htriggers}; end - wire has_triggered = (xtriggers != prev_xtrig) || (htriggers != 0); - assign write_en = (tap_state == TAP_STATE_RUN) && (has_triggered || dflush); + wire has_triggered = (xtriggers != prev_xtrig) || (htriggers != '0); + assign do_capture = dflush || has_triggered; + assign write_en = (tap_state == TAP_STATE_RUN) && do_capture; VX_dp_ram #( .DATAW (IDLE_CTRW), .SIZE (DEPTH), @@ -112,20 +116,21 @@ module VX_scope_tap #( .READ_ENABLE (0), .NO_RWCHECK (1) ) delta_store ( - .clk (clk), - .reset (reset), - .read (1'b1), - .wren (1'b1), - .write (write_en), - .waddr (waddr[ADDRW-1:0]), - .wdata (delta), - .raddr (raddr), - .rdata (delta_value) + .clk (clk), + .reset (reset), + .read (1'b1), + .wren (1'b1), + .write (write_en), + .waddr (waddr[ADDRW-1:0]), + .wdata (delta), + .raddr (raddr), + .rdata (delta_value) ); end else begin : g_no_delta_store - assign data_in = probes; + assign data_in = probes; assign write_en = (tap_state == TAP_STATE_RUN); assign delta_value = '0; + assign do_capture = 1; end VX_dp_ram #( @@ -135,15 +140,15 @@ module VX_scope_tap #( .READ_ENABLE (0), .NO_RWCHECK (1) ) data_store ( - .clk (clk), - .reset (reset), - .read (1'b1), - .wren (1'b1), - .write (write_en), - .waddr (waddr[ADDRW-1:0]), - .wdata (data_in), - .raddr (raddr), - .rdata (data_value) + .clk (clk), + .reset (reset), + .read (1'b1), + .wren (1'b1), + .write (write_en), + .waddr (waddr[ADDRW-1:0]), + .wdata (data_in), + .raddr (raddr), + .rdata (data_value) ); always @(posedge clk) begin @@ -159,7 +164,7 @@ module VX_scope_tap #( tap_state <= TAP_STATE_IDLE; delta <= '0; dflush <= 0; - prev_xtrig <= '0; + prev_xtrig <= '0; waddr <= '0; end else begin case (tap_state) @@ -176,17 +181,17 @@ module VX_scope_tap #( TAP_STATE_RUN: begin dflush <= 0; if (!(stop || cmd_stop) && (waddr < waddr_end)) begin - if (XTRIGGERW != 0) begin - if (dflush || (xtriggers != prev_xtrig)) begin - waddr <= waddr + SIZEW'(1); + if (do_capture) begin + waddr <= waddr + SIZEW'(1); + end + if (HAS_TRIGGERS) begin + if (do_capture) begin delta <= '0; end else begin delta <= delta + IDLE_CTRW'(1); dflush <= (delta == IDLE_CTRW'(MAX_IDLE_CTR-1)); end prev_xtrig <= xtriggers; - end else begin - waddr <= waddr + SIZEW'(1); end end else begin tap_state <= TAP_STATE_DONE; From 989341a77dc3012b3c1113f9e3d2ffd7843665a0 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 15:13:42 -0700 Subject: [PATCH 320/488] minor udpate --- hw/rtl/libs/VX_scope_tap.sv | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index c4bf918ff..6a9b70ff1 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -80,13 +80,13 @@ module VX_scope_tap #( reg [CTR_WIDTH-1:0] timestamp, start_time; reg [CTR_WIDTH-1:0] start_delay, stop_delay; reg [`UP(XTRIGGERW)-1:0] prev_xtrig; + reg [`UP(HTRIGGERW)-1:0] prev_htrig; reg [IDLE_CTRW-1:0] delta; reg cmd_start, cmd_stop; reg dflush; reg [SIZEW-1:0] waddr, waddr_end; wire [DATAW-1:0] data_in; - wire write_en; wire [DATAW-1:0] data_value; wire [IDLE_CTRW-1:0] delta_value; @@ -98,6 +98,8 @@ module VX_scope_tap #( wire do_capture; + wire write_en = (tap_state == TAP_STATE_RUN) && do_capture; + if (HAS_TRIGGERS) begin : g_delta_store if (XTRIGGERW != 0 && HTRIGGERW != 0) begin : g_data_in_pxh assign data_in = {probes, xtriggers, htriggers}; @@ -106,9 +108,7 @@ module VX_scope_tap #( end else begin : g_data_in_ph assign data_in = {probes, htriggers}; end - wire has_triggered = (xtriggers != prev_xtrig) || (htriggers != '0); - assign do_capture = dflush || has_triggered; - assign write_en = (tap_state == TAP_STATE_RUN) && do_capture; + assign do_capture = dflush || (xtriggers != prev_xtrig) || (htriggers != prev_htrig) || (htriggers != '0); VX_dp_ram #( .DATAW (IDLE_CTRW), .SIZE (DEPTH), @@ -128,7 +128,6 @@ module VX_scope_tap #( ); end else begin : g_no_delta_store assign data_in = probes; - assign write_en = (tap_state == TAP_STATE_RUN); assign delta_value = '0; assign do_capture = 1; end @@ -165,6 +164,7 @@ module VX_scope_tap #( delta <= '0; dflush <= 0; prev_xtrig <= '0; + prev_htrig <= '0; waddr <= '0; end else begin case (tap_state) @@ -192,6 +192,7 @@ module VX_scope_tap #( dflush <= (delta == IDLE_CTRW'(MAX_IDLE_CTR-1)); end prev_xtrig <= xtriggers; + prev_htrig <= htriggers; end end else begin tap_state <= TAP_STATE_DONE; From 9027555e6a446295905c4a4af55c5d5916deb7cb Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 20:30:57 -0700 Subject: [PATCH 321/488] minor update --- hw/rtl/afu/opae/vortex_afu.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index b872efa97..4737eb43c 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -1081,7 +1081,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ reset_negedge, 1'b0, 4096 ); `else - `SCOPE_IO_UNUSED_W(0) + `SCOPE_IO_UNUSED(0) `endif /////////////////////////////////////////////////////////////////////////////// From eee037ffcd65b3f9b535b0715fc56e7585acc762 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 27 Sep 2024 20:59:29 -0700 Subject: [PATCH 322/488] minor update --- hw/rtl/VX_config.vh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index a93b73b30..1e10aca8e 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -221,8 +221,10 @@ `endif `ifndef SV_DPI +`ifndef DPI_DISABLE `define DPI_DISABLE `endif +`endif `ifndef FPU_FPNEW `ifndef FPU_DSP From 87e613d29dea5a08db59509e38dcd26489745413 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 28 Sep 2024 05:20:37 -0700 Subject: [PATCH 323/488] fixed XRT AFU deadlock on exit --- hw/rtl/afu/xrt/VX_afu_wrap.sv | 67 +++++++++++++++++++++------------ hw/rtl/libs/VX_axi_adapter.sv | 37 +++++++----------- hw/rtl/libs/VX_axi_write_ack.sv | 60 +++++++++++++++++++++++++++++ sim/opaesim/opae_sim.cpp | 7 ---- sim/rtlsim/processor.cpp | 28 ++------------ sim/xrtsim/xrt_sim.cpp | 56 ++++++++++++--------------- 6 files changed, 143 insertions(+), 112 deletions(-) create mode 100644 hw/rtl/libs/VX_axi_write_ack.sv diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index c4ff50d45..2b1bfb7c2 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -61,6 +61,9 @@ module VX_afu_wrap #( localparam STATE_IDLE = 0; localparam STATE_RUN = 1; + localparam PENDING_SIZEW = 12; // max outstanding requests size + localparam C_M_AXI_MEM_NUM_BANKS_SW = `CLOG2(C_M_AXI_MEM_NUM_BANKS+1); + wire m_axi_mem_awvalid_a [C_M_AXI_MEM_NUM_BANKS]; wire m_axi_mem_awready_a [C_M_AXI_MEM_NUM_BANKS]; wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_a [C_M_AXI_MEM_NUM_BANKS]; @@ -95,7 +98,7 @@ module VX_afu_wrap #( `endif reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; - reg [15:0] vx_pending_writes; + reg [PENDING_SIZEW-1:0] vx_pending_writes; reg vx_busy_wait; reg vx_reset = 1; // asserted at initialization wire vx_busy; @@ -118,23 +121,10 @@ module VX_afu_wrap #( wire scope_reset = reset; `endif - reg m_axi_mem_wfire; - reg m_axi_mem_bfire; - - always @(*) begin - m_axi_mem_wfire = 0; - m_axi_mem_bfire = 0; - for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin - m_axi_mem_wfire |= m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]; - m_axi_mem_bfire |= m_axi_mem_bvalid_a[i] && m_axi_mem_bready_a[i]; - end - end - always @(posedge clk) begin if (reset || ap_reset) begin - state <= STATE_IDLE; - vx_pending_writes <= '0; - vx_reset <= 1; + state <= STATE_IDLE; + vx_reset <= 1; end else begin case (state) STATE_IDLE: begin @@ -181,12 +171,39 @@ module VX_afu_wrap #( if (vx_reset_ctr != '0) begin vx_reset_ctr <= vx_reset_ctr - 1; end + end + end - // track pending writes - if (m_axi_mem_wfire && ~m_axi_mem_bfire) - vx_pending_writes <= vx_pending_writes + 1; - if (~m_axi_mem_wfire && m_axi_mem_bfire) - vx_pending_writes <= vx_pending_writes - 1; + wire [C_M_AXI_MEM_NUM_BANKS-1:0] m_axi_wr_req_fire, m_axi_wr_rsp_fire; + wire [C_M_AXI_MEM_NUM_BANKS_SW-1:0] cur_wr_reqs, cur_wr_rsps; + + for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_awfire + VX_axi_write_ack axi_write_ack ( + .clk (clk), + .reset (reset), + .awvalid(m_axi_mem_awvalid_a[i]), + .awready(m_axi_mem_awready_a[i]), + .wvalid (m_axi_mem_wvalid_a[i]), + .wready (m_axi_mem_wready_a[i]), + .tx_ack (m_axi_wr_req_fire[i]), + `UNUSED_PIN (aw_ack), + `UNUSED_PIN (w_ack), + `UNUSED_PIN (tx_rdy) + ); + assign m_axi_wr_rsp_fire[i] = m_axi_mem_bvalid_a[i] & m_axi_mem_bready_a[i]; + end + + `POP_COUNT(cur_wr_reqs, m_axi_wr_req_fire); + `POP_COUNT(cur_wr_rsps, m_axi_wr_rsp_fire); + + wire signed [C_M_AXI_MEM_NUM_BANKS_SW:0] reqs_sub = (C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_reqs) - + (C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_rsps); + + always @(posedge clk) begin + if (reset) begin + vx_pending_writes <= '0; + end else begin + vx_pending_writes <= vx_pending_writes + PENDING_SIZEW'(reqs_sub); end end @@ -408,16 +425,16 @@ module VX_afu_wrap #( always @(posedge clk) begin for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin if (m_axi_mem_awvalid_a[i] && m_axi_mem_awready_a[i]) begin - `TRACE(2, ("%t: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i])) + `TRACE(2, ("%t: AXI Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i])) end if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin - `TRACE(2, ("%t: AFU Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i])) + `TRACE(2, ("%t: AXI Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i])) end if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin - `TRACE(2, ("%t: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i])) + `TRACE(2, ("%t: AXI Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i])) end if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin - `TRACE(2, ("%t: AVS Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i])) + `TRACE(2, ("%t: AXI Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i])) end end end diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index a21b8554f..f0144ff91 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -116,32 +116,21 @@ module VX_axi_adapter #( assign req_bank_off = mem_req_addr_out; end - wire mem_req_fire = mem_req_valid && mem_req_ready; - // AXi write request synchronization - reg [NUM_BANKS-1:0] m_axi_aw_ack, m_axi_w_ack; - for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_m_axi_w - wire m_axi_aw_fire = m_axi_awvalid[i] && m_axi_awready[i]; - wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i]; - always @(posedge clk) begin - if (reset || (mem_req_fire && (req_bank_sel == i))) begin - m_axi_aw_ack[i] <= 0; - m_axi_w_ack[i] <= 0; - end else begin - if (m_axi_aw_fire) begin - m_axi_aw_ack[i] <= 1; - end - if (m_axi_w_fire) begin - m_axi_w_ack[i] <= 1; - end - end - end - end - - wire [NUM_BANKS-1:0] axi_write_ready; + reg [NUM_BANKS-1:0] m_axi_aw_ack, m_axi_w_ack, axi_write_ready; for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready - assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i]) - && (m_axi_wready[i] || m_axi_w_ack[i]); + VX_axi_write_ack axi_write_ack ( + .clk (clk), + .reset (reset), + .awvalid(m_axi_awvalid[i]), + .awready(m_axi_awready[i]), + .wvalid (m_axi_wvalid[i]), + .wready (m_axi_wready[i]), + .aw_ack (m_axi_aw_ack[i]), + .w_ack (m_axi_w_ack[i]), + .tx_rdy (axi_write_ready[i]), + `UNUSED_PIN (tx_ack) + ); end // request ack diff --git a/hw/rtl/libs/VX_axi_write_ack.sv b/hw/rtl/libs/VX_axi_write_ack.sv new file mode 100644 index 000000000..257ef18e5 --- /dev/null +++ b/hw/rtl/libs/VX_axi_write_ack.sv @@ -0,0 +1,60 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +`TRACING_OFF +module VX_axi_write_ack ( + input wire clk, + input wire reset, + input wire awvalid, + input wire awready, + input wire wvalid, + input wire wready, + output wire aw_ack, + output wire w_ack, + output wire tx_ack, + output wire tx_rdy +); + reg awfired; + reg wfired; + + wire awfire = awvalid && awready; + wire wfire = wvalid && wready; + + always @(posedge clk) begin + if (reset) begin + awfired <= 0; + wfired <= 0; + end else begin + if (awfire) begin + awfired <= 1; + end + if (wfire) begin + wfired <= 1; + end + if (tx_ack) begin + awfired <= 0; + wfired <= 0; + end + end + end + + assign aw_ack = awfired; + assign w_ack = wfired; + + assign tx_ack = (awfire || awfired) && (wfire || wfired); + assign tx_rdy = (awready || awfired) && (wready || wfired); + +endmodule +`TRACING_ON diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 0f0d67d9c..5f619a743 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -263,13 +263,6 @@ private: } device_->reset = 0; - - for (int i = 0; i < RESET_DELAY; ++i) { - device_->clk = 0; - this->eval(); - device_->clk = 1; - this->eval(); - } } void tick() { diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index 1f6af60dd..f651ad9d8 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -151,9 +151,6 @@ public: // reset device this->reset(); - // start execution - running_ = true; - // wait on device to go busy while (!device_->busy) { this->tick(); @@ -181,8 +178,6 @@ private: this->mem_bus_reset(); this->dcr_bus_reset(); - running_ = false; - print_bufs_.clear(); pending_mem_reqs_.clear(); @@ -192,8 +187,6 @@ private: std::swap(dram_queue_, empty); } - mem_rd_rsp_active_ = false; - device_->reset = 1; for (int i = 0; i < RESET_DELAY; ++i) { @@ -204,13 +197,7 @@ private: } device_->reset = 0; - - for (int i = 0; i < RESET_DELAY; ++i) { - device_->clk = 0; - this->eval(); - device_->clk = 1; - this->eval(); - } + device_->mem_req_ready = 1; } void tick() { @@ -261,11 +248,10 @@ private: void mem_bus_eval() { // process memory read responses - if (mem_rd_rsp_active_ && device_->mem_rsp_ready) { + if (device_->mem_rsp_valid && device_->mem_rsp_ready) { device_->mem_rsp_valid = 0; - mem_rd_rsp_active_ = false; } - if (!mem_rd_rsp_active_) { + if (!device_->mem_rsp_valid) { if (!pending_mem_reqs_.empty() && (*pending_mem_reqs_.begin())->ready) { auto mem_rsp_it = pending_mem_reqs_.begin(); @@ -280,7 +266,6 @@ private: memcpy(VDataCast::get(device_->mem_rsp_data), mem_rsp->data.data(), MEM_BLOCK_SIZE); device_->mem_rsp_tag = mem_rsp->tag; pending_mem_reqs_.erase(mem_rsp_it); - mem_rd_rsp_active_ = true; delete mem_rsp; } } @@ -291,7 +276,6 @@ private: if (device_->mem_req_rw) { auto byteen = device_->mem_req_byteen; auto data = VDataCast::get(device_->mem_req_data); - if (byte_addr >= uint64_t(IO_COUT_ADDR) && byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { // process console output @@ -350,8 +334,6 @@ private: dram_queue_.push(mem_req); } } - - device_->mem_req_ready = running_; } void dcr_bus_reset() { @@ -390,10 +372,6 @@ private: #endif RAM* ram_; - - bool mem_rd_rsp_active_; - - bool running_; }; /////////////////////////////////////////////////////////////////////////////// diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 96adf0858..d572b9479 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -333,12 +333,9 @@ private: } device_->ap_rst_n = 1; - - for (int i = 0; i < RESET_DELAY; ++i) { - device_->ap_clk = 0; - this->eval(); - device_->ap_clk = 1; - this->eval(); + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { + *m_axi_mem_[i].arready = 1; + *m_axi_mem_[i].awready = 1; } } @@ -407,10 +404,10 @@ private: void axi_mem_bus_reset() { for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { // address read request - *m_axi_mem_[i].arready = 1; + *m_axi_mem_[i].arready = 0; // address write request - *m_axi_mem_[i].awready = 1; + *m_axi_mem_[i].awready = 0; // data write request *m_axi_mem_[i].wready = 0; @@ -423,19 +420,16 @@ private: // states m_axi_states_[i].write_req_pending = false; - m_axi_states_[i].write_rsp_pending = false; - m_axi_states_[i].read_rsp_pending = false; } } void axi_mem_bus_eval() { for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { // handle read responses - if (m_axi_states_[i].read_rsp_pending && (*m_axi_mem_[i].rready)) { - *m_axi_mem_[i].rvalid = 0; - m_axi_states_[i].read_rsp_pending = false; + if (*m_axi_mem_[i].rvalid && *m_axi_mem_[i].rready) { + *m_axi_mem_[i].rvalid = 0; } - if (!m_axi_states_[i].read_rsp_pending) { + if (!*m_axi_mem_[i].rvalid) { if (!pending_mem_reqs_[i].empty() && (*pending_mem_reqs_[i].begin())->ready && !(*pending_mem_reqs_[i].begin())->write) { @@ -447,17 +441,15 @@ private: *m_axi_mem_[i].rlast = 1; memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE); pending_mem_reqs_[i].erase(mem_rsp_it); - m_axi_states_[i].read_rsp_pending = true; delete mem_rsp; } } // handle write responses - if (m_axi_states_[i].write_rsp_pending && *m_axi_mem_[i].bready) { + if (*m_axi_mem_[i].bvalid && *m_axi_mem_[i].bready) { *m_axi_mem_[i].bvalid = 0; - m_axi_states_[i].write_rsp_pending = false; } - if (!m_axi_states_[i].write_rsp_pending) { + if (!*m_axi_mem_[i].bvalid) { if (!pending_mem_reqs_[i].empty() && (*pending_mem_reqs_[i].begin())->ready && (*pending_mem_reqs_[i].begin())->write) { @@ -467,7 +459,6 @@ private: *m_axi_mem_[i].bid = mem_rsp->tag; *m_axi_mem_[i].bresp = 0; pending_mem_reqs_[i].erase(mem_rsp_it); - m_axi_states_[i].write_rsp_pending = true; delete mem_rsp; } } @@ -492,17 +483,21 @@ private: dram_queues_[i].push(mem_req); } - // handle address write requests - if (*m_axi_mem_[i].awvalid && *m_axi_mem_[i].awready && !m_axi_states_[i].write_req_pending) { + if (*m_axi_mem_[i].wready && !m_axi_states_[i].write_req_pending) { + *m_axi_mem_[i].wready = 0; + } + + // handle address write requestsls + if (*m_axi_mem_[i].awvalid && *m_axi_mem_[i].awready && !*m_axi_mem_[i].wready) { m_axi_states_[i].write_req_addr = *m_axi_mem_[i].awaddr; m_axi_states_[i].write_req_tag = *m_axi_mem_[i].awid; - m_axi_states_[i].write_req_pending = true; + // activate data channel + *m_axi_mem_[i].wready = 1; + m_axi_states_[i].write_req_pending = !*m_axi_mem_[i].wvalid; } // handle data write requests - *m_axi_mem_[i].wready = false; - if (*m_axi_mem_[i].wvalid && m_axi_states_[i].write_req_pending) { - + if (*m_axi_mem_[i].wvalid && *m_axi_mem_[i].wready) { auto byteen = *m_axi_mem_[i].wstrb; auto data = (uint8_t*)m_axi_mem_[i].wdata->data(); auto byte_addr = m_axi_states_[i].write_req_addr; @@ -529,10 +524,11 @@ private: // send dram request dram_queues_[i].push(mem_req); - m_axi_states_[i].write_req_pending = false; - - // acquire write data - *m_axi_mem_[i].wready = true; + // deactivate data channel + if (m_axi_states_[i].write_req_pending) { + *m_axi_mem_[i].wready = 0; + m_axi_states_[i].write_req_pending = false; + } } } } @@ -541,8 +537,6 @@ private: uint64_t write_req_addr; uint32_t write_req_tag; bool write_req_pending; - bool read_rsp_pending; - bool write_rsp_pending; } m_axi_state_t; typedef struct { From b634f9f47d88baff1ab96196e147abf7b5302770 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 28 Sep 2024 20:15:03 -0700 Subject: [PATCH 324/488] count_leading_zeros fix --- sim/common/bitmanip.h | 4 ++-- sim/simx/constants.h | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sim/common/bitmanip.h b/sim/common/bitmanip.h index 053f254c8..4bfe56961 100644 --- a/sim/common/bitmanip.h +++ b/sim/common/bitmanip.h @@ -20,9 +20,9 @@ template constexpr uint32_t count_leading_zeros(T value) { static_assert(std::is_integral::value, "invalid data type"); if constexpr (sizeof(T) > 4) { - return value ? __builtin_clzll(value) : (sizeof(T) * 8); + return value ? __builtin_clzll(value) - (64 - sizeof(T) * 8) : sizeof(T) * 8; } else { - return value ? __builtin_clz(value) : (sizeof(T) * 8); + return value ? __builtin_clz(value) - (32 - sizeof(T) * 8) : sizeof(T) * 8; } } diff --git a/sim/simx/constants.h b/sim/simx/constants.h index 0c707b55c..33fa9979c 100644 --- a/sim/simx/constants.h +++ b/sim/simx/constants.h @@ -21,14 +21,14 @@ #define MEM_CLOCK_RATIO 1 #endif -#define LSU_WORD_SIZE (XLEN / 8) -#define LSU_CHANNELS NUM_LSU_LANES -#define LSU_NUM_REQS (NUM_LSU_BLOCKS * LSU_CHANNELS) +inline constexpr int LSU_WORD_SIZE = (XLEN / 8); +inline constexpr int LSU_CHANNELS = NUM_LSU_LANES; +inline constexpr int LSU_NUM_REQS = (NUM_LSU_BLOCKS * LSU_CHANNELS); -#define DCACHE_WORD_SIZE LSU_LINE_SIZE -#define DCACHE_CHANNELS UP((NUM_LSU_LANES * (XLEN / 8)) / DCACHE_WORD_SIZE) -#define DCACHE_NUM_REQS (NUM_LSU_BLOCKS * DCACHE_CHANNELS) +inline constexpr int DCACHE_WORD_SIZE = LSU_LINE_SIZE; +inline constexpr int DCACHE_CHANNELS = UP((NUM_LSU_LANES * (XLEN / 8)) / DCACHE_WORD_SIZE); +inline constexpr int DCACHE_NUM_REQS = (NUM_LSU_BLOCKS * DCACHE_CHANNELS); -#define NUM_SOCKETS UP(NUM_CORES / SOCKET_SIZE) +inline constexpr int NUM_SOCKETS = UP(NUM_CORES / SOCKET_SIZE); -#define PER_ISSUE_WARPS NUM_WARPS / ISSUE_WIDTH \ No newline at end of file +inline constexpr int PER_ISSUE_WARPS = NUM_WARPS / ISSUE_WIDTH; \ No newline at end of file From 4329e3f968c99f2cd5c1ba0c87cd12fb0a0903da Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 28 Sep 2024 20:28:57 -0700 Subject: [PATCH 325/488] minor update --- sim/simx/constants.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sim/simx/constants.h b/sim/simx/constants.h index 33fa9979c..c651bbfc4 100644 --- a/sim/simx/constants.h +++ b/sim/simx/constants.h @@ -13,6 +13,8 @@ #pragma once +#include + #ifndef RAM_PAGE_SIZE #define RAM_PAGE_SIZE 4096 #endif From b8475c65dc9864d87920bd71fa6b805b9a59426e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 28 Sep 2024 21:25:55 -0700 Subject: [PATCH 326/488] adjusting platform caps --- hw/rtl/afu/opae/vortex_afu.sv | 2 +- hw/rtl/afu/xrt/VX_afu_ctrl.sv | 2 +- runtime/opae/vortex.cpp | 2 +- runtime/xrt/vortex.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 4737eb43c..7e0bcfaed 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -103,7 +103,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire [127:0] afu_id = `AFU_ACCEL_UUID; wire [63:0] dev_caps = {8'b0, - 5'(`PLATFORM_MEMORY_ADDR_WIDTH-16), + 5'(`PLATFORM_MEMORY_ADDR_WIDTH-20), 3'(`CLOG2(`PLATFORM_MEMORY_BANKS)), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 16'(`NUM_CORES * `NUM_CLUSTERS), diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index 382b31f8a..d14328c7d 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -134,7 +134,7 @@ module VX_afu_ctrl #( // device caps wire [63:0] dev_caps = {8'b0, - 5'(`PLATFORM_MEMORY_ADDR_WIDTH-16), + 5'(`PLATFORM_MEMORY_ADDR_WIDTH-20), 3'(`CLOG2(`PLATFORM_MEMORY_BANKS)), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 16'(`NUM_CORES * `NUM_CLUSTERS), diff --git a/runtime/opae/vortex.cpp b/runtime/opae/vortex.cpp index a7f77ee97..38ee514ab 100755 --- a/runtime/opae/vortex.cpp +++ b/runtime/opae/vortex.cpp @@ -235,7 +235,7 @@ public: _value = 1 << ((dev_caps_ >> 48) & 0x7); break; case VX_CAPS_MEM_BANK_SIZE: - _value = 1ull << (16 + ((dev_caps_ >> 51) & 0x1f)); + _value = 1ull << (20 + ((dev_caps_ >> 51) & 0x1f)); break; default: fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index ffc7870d4..d71f2e142 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -311,7 +311,7 @@ public: _value = 1 << ((dev_caps_ >> 48) & 0x7); break; case VX_CAPS_MEM_BANK_SIZE: - _value = 1ull << (16 + ((dev_caps_ >> 51) & 0x1f)); + _value = 1ull << (20 + ((dev_caps_ >> 51) & 0x1f)); break; default: fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); From 30571d716cf6d44e2e80d30025b2a4b86bd47af8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 28 Sep 2024 21:37:48 -0700 Subject: [PATCH 327/488] updated scope CI test --- .github/workflows/ci.yml | 2 +- ci/regression.sh.in | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1676aea4c..d2bbd9a9f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -117,7 +117,7 @@ jobs: strategy: fail-fast: false matrix: - name: [regression, opencl, cache, config1, config2, debug, stress, synthesis] + name: [regression, opencl, cache, config1, config2, debug, scope, stress, synthesis] xlen: [32, 64] steps: diff --git a/ci/regression.sh.in b/ci/regression.sh.in index ea9aa2560..71172599e 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -318,10 +318,18 @@ debug() CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=xrt --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" + + echo "debugging tests done!" +} + +scope() +{ + echo "begin scope tests..." + ./ci/blackbox.sh --driver=opae --scope --app=demo --args="-n1" ./ci/blackbox.sh --driver=xrt --scope --app=demo --args="-n1" - echo "debugging tests done!" + echo "debugging scope done!" } stress() @@ -348,7 +356,7 @@ synthesis() show_usage() { echo "Vortex Regression Test" - echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--stress] [--synthesis] [--all] [--h|--help]" + echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--all] [--h|--help]" } declare -a tests=() @@ -386,6 +394,9 @@ while [ "$1" != "" ]; do --debug ) tests+=("debug") ;; + --scope ) + tests+=("scope") + ;; --stress ) tests+=("stress") ;; @@ -403,6 +414,7 @@ while [ "$1" != "" ]; do tests+=("config1") tests+=("config2") tests+=("debug") + tests+=("scope") tests+=("stress") tests+=("synthesis") ;; From 5c694a997c0ca6b97d8f0d68993a2b4cd64978c5 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 29 Sep 2024 00:09:25 -0700 Subject: [PATCH 328/488] update scope tap testing --- ci/regression.sh.in | 23 ++++++++++++++--------- runtime/common/scope.cpp | 14 ++++++++++++++ sim/opaesim/opae_sim.cpp | 29 +++++++++++++++++------------ 3 files changed, 45 insertions(+), 21 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 71172599e..f2ce1b17d 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -260,11 +260,11 @@ config2() # disabling ZICOND extension CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo - # test 128-bit MEM block + # test 128-bit memory block CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=xrt --app=mstress - # test XLEN-bit MEM block + # test XLEN-bit memory block CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=mstress @@ -272,7 +272,7 @@ config2() CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8 CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8 - # test single-bank DRAM + # test single-bank memory if [ "$XLEN" == "64" ]; then CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=opae --app=mstress CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=xrt --app=mstress @@ -281,11 +281,16 @@ config2() CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=xrt --app=mstress fi - # test 33-bit DRAM address - CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress - CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=xrt --app=mstress + # test larger memory address + if [ "$XLEN" == "64" ]; then + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=49" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=49" ./ci/blackbox.sh --driver=xrt --app=mstress + else + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=xrt --app=mstress + fi - # test DRAM banks interleaving + # test memory banks interleaving CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=1" ./ci/blackbox.sh --driver=opae --app=mstress CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress @@ -326,8 +331,8 @@ scope() { echo "begin scope tests..." - ./ci/blackbox.sh --driver=opae --scope --app=demo --args="-n1" - ./ci/blackbox.sh --driver=xrt --scope --app=demo --args="-n1" + SCOPE_DEPTH=1024 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope + SCOPE_DEPTH=1024 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope echo "debugging scope done!" } diff --git a/runtime/common/scope.cpp b/runtime/common/scope.cpp index 361a327ca..8f8670944 100644 --- a/runtime/common/scope.cpp +++ b/runtime/common/scope.cpp @@ -261,6 +261,20 @@ int vx_scope_start(scope_callback_t* callback, vx_device_h hdevice, uint64_t sta } } + // setup capture size + const char* capture_size_env = std::getenv("SCOPE_DEPTH"); + if (capture_size_env != nullptr) { + std::stringstream ss(capture_size_env); + uint32_t capture_size; + if (ss >> capture_size) { + for (auto& tap : json_obj["taps"]) { + auto id = tap["id"].get(); + uint64_t cmd_depth = (capture_size << 11) | (id << 3) | CMD_SET_DEPTH; + CHECK_ERR(g_callback.registerWrite(hdevice, cmd_depth)); + } + } + } + // set stop time if (stop_time != uint64_t(-1)) { std::cout << "[SCOPE] stop time: " << std::dec << stop_time << "s" << std::endl; diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 5f619a743..fe1832c1b 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -78,8 +78,9 @@ static uint64_t trace_stop_time = TRACE_STOP_TIME; bool sim_trace_enabled() { if (timestamp >= trace_start_time - && timestamp < trace_stop_time) + && timestamp < trace_stop_time) { return true; + } return trace_enabled; } @@ -156,10 +157,10 @@ public: // launch execution thread future_ = std::async(std::launch::async, [&]{ - while (!stop_) { - std::lock_guard guard(mutex_); - this->tick(); - } + while (!stop_) { + std::lock_guard guard(mutex_); + this->tick(); + } }); return 0; @@ -178,7 +179,7 @@ public: return -1; // set uninitialized data to "baadf00d" for (uint32_t i = 0; i < len; ++i) { - ((uint8_t*)alloc)[i] = (0xbaadf00d >> ((i & 0x3) * 8)) & 0xff; + ((uint8_t*)alloc)[i] = (0xbaadf00d >> ((i & 0x3) * 8)) & 0xff; } host_buffer_t buffer; buffer.data = (uint64_t*)alloc; @@ -207,8 +208,9 @@ public: std::lock_guard guard(mutex_); // simulate CPU-GPU latency - for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i) + for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i) { this->tick(); + } // simulate mmio request device_->vcp2af_sRxPort_c0_mmioRdValid = 1; @@ -225,8 +227,9 @@ public: std::lock_guard guard(mutex_); // simulate CPU-GPU latency - for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i) + for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i) { this->tick(); + } // simulate mmio request device_->vcp2af_sRxPort_c0_mmioWrValid = 1; @@ -324,13 +327,14 @@ private: void sRxPort_bus_eval() { // check mmio request bool mmio_req_enabled = device_->vcp2af_sRxPort_c0_mmioRdValid - || device_->vcp2af_sRxPort_c0_mmioWrValid; + || device_->vcp2af_sRxPort_c0_mmioWrValid; // schedule CCI read responses std::list::iterator cci_rd_it(cci_reads_.end()); for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) { - if (it->cycles_left > 0) + if (it->cycles_left > 0) { it->cycles_left -= 1; + } if ((cci_rd_it == ie) && (it->cycles_left == 0)) { cci_rd_it = it; } @@ -339,8 +343,9 @@ private: // schedule CCI write responses std::list::iterator cci_wr_it(cci_writes_.end()); for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) { - if (it->cycles_left > 0) + if (it->cycles_left > 0) { it->cycles_left -= 1; + } if ((cci_wr_it == ie) && (it->cycles_left == 0)) { cci_wr_it = it; } @@ -358,7 +363,7 @@ private: // send CCI read response (ensure mmio disabled) device_->vcp2af_sRxPort_c0_rspValid = 0; if (!mmio_req_enabled - && (cci_rd_it != cci_reads_.end())) { + && (cci_rd_it != cci_reads_.end())) { device_->vcp2af_sRxPort_c0_rspValid = 1; device_->vcp2af_sRxPort_c0_hdr_resp_type = 0; memcpy(device_->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE); From cf3909a9101026350d68f4acabd187b55f55ebf9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 29 Sep 2024 07:52:53 -0700 Subject: [PATCH 329/488] minor update --- hw/rtl/libs/VX_encoder.sv | 12 +++++------- hw/rtl/libs/VX_find_first.sv | 16 +++++++++------- hw/rtl/libs/VX_pipe_buffer.sv | 3 +-- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/hw/rtl/libs/VX_encoder.sv b/hw/rtl/libs/VX_encoder.sv index ed65ed4f6..86ccad792 100644 --- a/hw/rtl/libs/VX_encoder.sv +++ b/hw/rtl/libs/VX_encoder.sv @@ -40,8 +40,8 @@ module VX_encoder #( end else if (MODEL == 1) begin : g_model1 localparam M = 1 << LN; `IGNORE_UNOPTFLAT_BEGIN - wire [LN-1:0][M-1:0] addr; - wire [LN:0][M-1:0] v; + wire [M-1:0] addr [LN]; + wire [M-1:0] v [LN+1]; `IGNORE_UNOPTFLAT_END // base case, also handle padding for non-power of two inputs @@ -50,19 +50,17 @@ module VX_encoder #( for (genvar lvl = 1; lvl < (LN+1); ++lvl) begin : g_scan_l localparam SN = 1 << (LN - lvl); localparam SI = M / SN; - localparam SW = lvl; - for (genvar s = 0; s < SN; ++s) begin : g_scan_s `IGNORE_UNOPTFLAT_BEGIN wire [1:0] vs = {v[lvl-1][s*SI+(SI>>1)], v[lvl-1][s*SI]}; `IGNORE_UNOPTFLAT_END assign v[lvl][s*SI] = (| vs); if (lvl == 1) begin : g_lvl_1 - assign addr[lvl-1][s*SI +: SW] = vs[!REVERSE]; + assign addr[lvl-1][s*SI +: lvl] = vs[!REVERSE]; end else begin : g_lvl_n - assign addr[lvl-1][s*SI +: SW] = { + assign addr[lvl-1][s*SI +: lvl] = { vs[!REVERSE], - addr[lvl-2][s*SI +: SW-1] | addr[lvl-2][s*SI+(SI>>1) +: SW-1] + addr[lvl-2][s*SI +: lvl-1] | addr[lvl-2][s*SI+(SI>>1) +: lvl-1] }; end end diff --git a/hw/rtl/libs/VX_find_first.sv b/hw/rtl/libs/VX_find_first.sv index 43666737c..2a1714e18 100644 --- a/hw/rtl/libs/VX_find_first.sv +++ b/hw/rtl/libs/VX_find_first.sv @@ -28,10 +28,10 @@ module VX_find_first #( localparam TL = (1 << LOGN) - 1; localparam TN = (1 << (LOGN+1)) - 1; -`IGNORE_WARNINGS_BEGIN - wire [TN-1:0] s_n; - wire [TN-1:0][DATAW-1:0] d_n; -`IGNORE_WARNINGS_END +`IGNORE_UNOPTFLAT_BEGIN + wire s_n [TN]; + wire [DATAW-1:0] d_n [TN]; +`IGNORE_UNOPTFLAT_END for (genvar i = 0; i < N; ++i) begin : g_reverse assign s_n[TL+i] = REVERSE ? valid_in[N-1-i] : valid_in[i]; @@ -46,9 +46,11 @@ module VX_find_first #( end for (genvar j = 0; j < LOGN; ++j) begin : g_scan - for (genvar i = 0; i < (2**j); ++i) begin : g_i - assign s_n[2**j-1+i] = s_n[2**(j+1)-1+i*2] | s_n[2**(j+1)-1+i*2+1]; - assign d_n[2**j-1+i] = s_n[2**(j+1)-1+i*2] ? d_n[2**(j+1)-1+i*2] : d_n[2**(j+1)-1+i*2+1]; + localparam I = 1 << j; + for (genvar i = 0; i < I; ++i) begin : g_i + localparam K = I+i-1; + assign s_n[K] = s_n[2*K+1] | s_n[2*K+2]; + assign d_n[K] = s_n[2*K+1] ? d_n[2*K+1] : d_n[2*K+2]; end end diff --git a/hw/rtl/libs/VX_pipe_buffer.sv b/hw/rtl/libs/VX_pipe_buffer.sv index d71a78dac..5ba23bc08 100644 --- a/hw/rtl/libs/VX_pipe_buffer.sv +++ b/hw/rtl/libs/VX_pipe_buffer.sv @@ -46,7 +46,7 @@ module VX_pipe_buffer #( end else begin : g_register wire [DEPTH:0] valid; `IGNORE_UNOPTFLAT_BEGIN - wire [DEPTH:0] ready; + wire ready [DEPTH+1]; `IGNORE_UNOPTFLAT_END wire [DEPTH:0][DATAW-1:0] data; @@ -71,7 +71,6 @@ module VX_pipe_buffer #( assign valid_out = valid[DEPTH]; assign data_out = data[DEPTH]; assign ready[DEPTH] = ready_out; - end endmodule From 60860ec684d20d7592dd2f978fb5da48dc799413 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 29 Sep 2024 09:03:24 -0700 Subject: [PATCH 330/488] minor update --- sim/rtlsim/processor.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index f651ad9d8..2e0189a71 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -143,7 +143,6 @@ public: } void run() { - #ifndef NDEBUG std::cout << std::dec << timestamp << ": [sim] run()" << std::endl; #endif @@ -151,6 +150,9 @@ public: // reset device this->reset(); + // start + device_->reset = 0; + // wait on device to go busy while (!device_->busy) { this->tick(); @@ -161,6 +163,9 @@ public: this->tick(); } + // stop + device_->reset = 1; + this->cout_flush(); } @@ -196,7 +201,6 @@ private: this->eval(); } - device_->reset = 0; device_->mem_req_ready = 1; } From a3031922ce88d704eb6dbcc898c566f22f3e8829 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 29 Sep 2024 09:07:45 -0700 Subject: [PATCH 331/488] minor update --- ci/regression.sh.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index f2ce1b17d..9827199bb 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -331,8 +331,8 @@ scope() { echo "begin scope tests..." - SCOPE_DEPTH=1024 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope - SCOPE_DEPTH=1024 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope + SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope + SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope echo "debugging scope done!" } From 2d00cec9d3f31e5bf3487bae8a83bc8c9ca44438 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 30 Sep 2024 02:12:30 -0700 Subject: [PATCH 332/488] minor update --- hw/rtl/libs/VX_mem_scheduler.sv | 9 ++++----- hw/syn/xilinx/xrt/Makefile | 6 +++--- sim/rtlsim/processor.cpp | 36 +++++++++++++++++++++------------ 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 73647911a..1a0b2c597 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -433,7 +433,7 @@ module VX_mem_scheduler #( end end - if (RSP_PARTIAL != 0) begin : g_rsp_partial + if (RSP_PARTIAL != 0 || CORE_REQS == 1) begin : g_rsp_partial reg [CORE_QUEUE_SIZE-1:0] rsp_sop_r; @@ -459,16 +459,15 @@ module VX_mem_scheduler #( end else begin : g_rsp_full - // use flattened arrays for BRAM synthesis compatibility reg [(CORE_BATCHES * CORE_CHANNELS * WORD_WIDTH)-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; - reg [(CORE_BATCHES * CORE_CHANNELS)-1:0][WORD_WIDTH-1:0] rsp_store_n; + reg [CORE_BATCHES-1:0][CORE_CHANNELS-1:0][WORD_WIDTH-1:0] rsp_store_n; reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; always @(*) begin rsp_store_n = rsp_store[ibuf_raddr]; for (integer i = 0; i < CORE_CHANNELS; ++i) begin if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin - rsp_store_n[rsp_batch_idx * CORE_CHANNELS + i] = mem_rsp_data_s[i]; + rsp_store_n[rsp_batch_idx][i] = mem_rsp_data_s[i]; end end end @@ -489,7 +488,7 @@ module VX_mem_scheduler #( for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data localparam i = r / CORE_CHANNELS; localparam j = r % CORE_CHANNELS; - assign crsp_data[r] = rsp_store_n[i * CORE_CHANNELS + j]; + assign crsp_data[r] = rsp_store_n[i][j]; end assign mem_rsp_ready_s = crsp_ready || ~rsp_complete; diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 67eccf841..f5997352c 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -178,9 +178,9 @@ $(BIN_DIR)/emconfig.json: report: $(XCLBIN_CONTAINER) ifeq ($(TARGET), hw) - cp $(BUILD_DIR)/_x/logs/link/vivado.log $(BUILD_DIR)/bin/vivado.log - cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_full_util_routed.rpt $(BUILD_DIR)/bin/synthesis.log - cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin/timing.log + cp $(BUILD_DIR)/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log $(BUILD_DIR)/bin + cp $(BUILD_DIR)/_x/reports/link/syn/ulp_vortex_afu_1_0_synth_1_ulp_vortex_afu_1_0_utilization_synth.rpt $(BUILD_DIR)/bin + cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin endif chipscope: diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index 2e0189a71..32f4b4e1e 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -152,6 +152,7 @@ public: // start device_->reset = 0; + device_->mem_req_ready = 1; // wait on device to go busy while (!device_->busy) { @@ -175,6 +176,7 @@ public: device_->dcr_wr_data = value; this->tick(); device_->dcr_wr_valid = 0; + this->tick(); } private: @@ -184,7 +186,6 @@ private: this->dcr_bus_reset(); print_bufs_.clear(); - pending_mem_reqs_.clear(); { @@ -200,12 +201,21 @@ private: device_->clk = 1; this->eval(); } - - device_->mem_req_ready = 1; } void tick() { - this->mem_bus_eval(); + + device_->clk = 0; + this->eval(); + + this->mem_bus_eval(0); + + device_->clk = 1; + this->eval(); + + this->mem_bus_eval(1); + + dram_sim_.tick(); if (!dram_queue_.empty()) { auto mem_req = dram_queue_.front(); @@ -221,13 +231,6 @@ private: } } - dram_sim_.tick(); - - device_->clk = 0; - this->eval(); - device_->clk = 1; - this->eval(); - #ifndef NDEBUG fflush(stdout); #endif @@ -250,9 +253,14 @@ private: device_->mem_rsp_valid = 0; } - void mem_bus_eval() { + void mem_bus_eval(bool clk) { + if (!clk) { + mem_rd_rsp_ready_ = device_->mem_rsp_ready; + return; + } + // process memory read responses - if (device_->mem_rsp_valid && device_->mem_rsp_ready) { + if (device_->mem_rsp_valid && mem_rd_rsp_ready_) { device_->mem_rsp_valid = 0; } if (!device_->mem_rsp_valid) { @@ -375,6 +383,8 @@ private: VerilatedVcdC *tfp_; #endif + bool mem_rd_rsp_ready_; + RAM* ram_; }; From 1deb13c469f87003d3d99498df3cc0ba84284f6f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 30 Sep 2024 03:36:00 -0700 Subject: [PATCH 333/488] minor update --- hw/rtl/cache/VX_bank_flush.sv | 3 ++- hw/rtl/libs/VX_cyclic_arbiter.sv | 3 ++- hw/rtl/libs/VX_rr_arbiter.sv | 3 ++- hw/rtl/libs/VX_stream_xbar.sv | 6 ++++-- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index e90c93cf6..a01ae0e0b 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -115,7 +115,8 @@ module VX_bank_flush #( if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way VX_decoder #( - .N (`CS_WAY_SEL_BITS) + .N (`CS_WAY_SEL_BITS), + .D (NUM_WAYS) ) ctr_decoder ( .data_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), .valid_in (1'b1), diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index ff803b910..a4dead008 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -66,7 +66,8 @@ module VX_cyclic_arbiter #( ); VX_decoder #( - .N (LOG_NUM_REQS) + .N (LOG_NUM_REQS), + .D (NUM_REQS) ) grant_decoder ( .data_in (grant_index), .valid_in (1'b1), diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 3831238dc..efe9838d6 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -481,7 +481,8 @@ module VX_rr_arbiter #( end VX_decoder #( - .N (LOG_NUM_REQS) + .N (LOG_NUM_REQS), + .D (NUM_REQS) ) grant_decoder ( .data_in (grant_index), .valid_in (grant_valid), diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index db59f895e..febfd0465 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -65,7 +65,8 @@ module VX_stream_xbar #( for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_sel_in_decoders VX_decoder #( - .N (OUT_WIDTH) + .N (OUT_WIDTH), + .D (NUM_OUTPUTS) ) sel_in_decoder ( .data_in (sel_in[i]), .valid_in (valid_in[i]), @@ -137,7 +138,8 @@ module VX_stream_xbar #( wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w; VX_decoder #( - .N (OUT_WIDTH) + .N (OUT_WIDTH), + .D (NUM_OUTPUTS) ) sel_in_decoder ( .data_in (sel_in[0]), .valid_in (valid_in[0]), From 6f81df5edb1828f610327a94772fc51017550e02 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 30 Sep 2024 06:25:50 -0700 Subject: [PATCH 334/488] axi_adapter large tags support --- hw/rtl/VX_define.vh | 4 +++ hw/rtl/Vortex_axi.sv | 18 +++++----- hw/rtl/core/VX_fetch.sv | 11 +++--- hw/rtl/libs/VX_axi_adapter.sv | 67 ++++++++++++++++++++++++++--------- hw/rtl/libs/VX_mem_adapter.sv | 4 +++ hw/scripts/scope.py | 6 ++-- 6 files changed, 79 insertions(+), 31 deletions(-) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 7c1590dff..8b59bc910 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -52,8 +52,12 @@ `ifndef NDEBUG `define UUID_WIDTH 44 `else +`ifdef SCOPE +`define UUID_WIDTH 44 +`else `define UUID_WIDTH 1 `endif +`endif `define PC_BITS (`XLEN-1) `define OFFSET_BITS 12 diff --git a/hw/rtl/Vortex_axi.sv b/hw/rtl/Vortex_axi.sv index 7d238aacd..483773223 100644 --- a/hw/rtl/Vortex_axi.sv +++ b/hw/rtl/Vortex_axi.sv @@ -82,10 +82,11 @@ module Vortex_axi import VX_gpu_pkg::*; #( // Status output wire busy ); - localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH; - localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH + `CLOG2(`VX_MEM_DATA_WIDTH) - `CLOG2(AXI_DATA_WIDTH); - - `STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag width: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH)) + localparam DST_LDATAW = `CLOG2(`VX_MEM_DATA_WIDTH); + localparam SRC_LDATAW = `CLOG2(AXI_DATA_WIDTH); + localparam SUB_LDATAW = DST_LDATAW - SRC_LDATAW; + localparam VX_MEM_TAG_A_WIDTH = `VX_MEM_TAG_WIDTH + `MAX(SUB_LDATAW, 0); + localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH + SUB_LDATAW; wire mem_req_valid; wire mem_req_rw; @@ -133,12 +134,12 @@ module Vortex_axi import VX_gpu_pkg::*; #( wire [(AXI_DATA_WIDTH/8)-1:0] mem_req_byteen_a; wire [VX_MEM_ADDR_A_WIDTH-1:0] mem_req_addr_a; wire [AXI_DATA_WIDTH-1:0] mem_req_data_a; - wire [AXI_TID_WIDTH-1:0] mem_req_tag_a; + wire [VX_MEM_TAG_A_WIDTH-1:0] mem_req_tag_a; wire mem_req_ready_a; wire mem_rsp_valid_a; wire [AXI_DATA_WIDTH-1:0] mem_rsp_data_a; - wire [AXI_TID_WIDTH-1:0] mem_rsp_tag_a; + wire [VX_MEM_TAG_A_WIDTH-1:0] mem_rsp_tag_a; wire mem_rsp_ready_a; VX_mem_adapter #( @@ -147,7 +148,7 @@ module Vortex_axi import VX_gpu_pkg::*; #( .SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH), .DST_ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH), .SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH), - .DST_TAG_WIDTH (AXI_TID_WIDTH), + .DST_TAG_WIDTH (VX_MEM_TAG_A_WIDTH), .REQ_OUT_BUF (0), .RSP_OUT_BUF (0) ) mem_adapter ( @@ -185,7 +186,8 @@ module Vortex_axi import VX_gpu_pkg::*; #( .DATA_WIDTH (AXI_DATA_WIDTH), .ADDR_WIDTH_IN (VX_MEM_ADDR_A_WIDTH), .ADDR_WIDTH_OUT (AXI_ADDR_WIDTH), - .TAG_WIDTH (AXI_TID_WIDTH), + .TAG_WIDTH_IN (VX_MEM_TAG_A_WIDTH), + .TAG_WIDTH_OUT (AXI_TID_WIDTH), .NUM_BANKS (AXI_NUM_BANKS), .BANK_INTERLEAVE(0), .RSP_OUT_BUF ((AXI_NUM_BANKS > 1) ? 2 : 0) diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 35e106037..cf862aa06 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -137,10 +137,13 @@ module VX_fetch import VX_gpu_pkg::*; #( wire schedule_fire = schedule_if.valid && schedule_if.ready; wire icache_bus_req_fire = icache_bus_if.req_valid && icache_bus_if.req_ready; wire icache_bus_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready; + wire [`UUID_WIDTH-1:0] icache_bus_req_uuid = icache_bus_if.req_data.tag[ICACHE_TAG_WIDTH-1 -: `UUID_WIDTH]; + wire [`UUID_WIDTH-1:0] icache_bus_rsp_uuid = icache_bus_if.rsp_data.tag[ICACHE_TAG_WIDTH-1 -: `UUID_WIDTH]; `NEG_EDGE (reset_negedge, reset); `SCOPE_TAP_EX (0, 1, 6, 3, ( - `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + - ICACHE_ADDR_WIDTH + (ICACHE_WORD_SIZE * 8) + ICACHE_TAG_WIDTH + `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + + `UUID_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH + + `UUID_WIDTH + (ICACHE_WORD_SIZE * 8) ), { schedule_if.valid, schedule_if.ready, @@ -154,8 +157,8 @@ module VX_fetch import VX_gpu_pkg::*; #( icache_bus_rsp_fire },{ schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, - icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, - icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag + icache_bus_req_uuid, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, + icache_bus_rsp_uuid, icache_bus_if.rsp_data.data }, reset_negedge, 1'b0, 4096 ); diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index f0144ff91..255789fd7 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -18,9 +18,11 @@ module VX_axi_adapter #( parameter DATA_WIDTH = 512, parameter ADDR_WIDTH_IN = 1, parameter ADDR_WIDTH_OUT = 32, - parameter TAG_WIDTH = 8, + parameter TAG_WIDTH_IN = 8, + parameter TAG_WIDTH_OUT = 8, parameter NUM_BANKS = 1, parameter BANK_INTERLEAVE= 0, + parameter TAG_BUFFER_SIZE= 32, parameter RSP_OUT_BUF = 0 ) ( input wire clk, @@ -32,20 +34,20 @@ module VX_axi_adapter #( input wire [DATA_WIDTH/8-1:0] mem_req_byteen, input wire [ADDR_WIDTH_IN-1:0] mem_req_addr, input wire [DATA_WIDTH-1:0] mem_req_data, - input wire [TAG_WIDTH-1:0] mem_req_tag, + input wire [TAG_WIDTH_IN-1:0] mem_req_tag, output wire mem_req_ready, // Vortex response output wire mem_rsp_valid, output wire [DATA_WIDTH-1:0] mem_rsp_data, - output wire [TAG_WIDTH-1:0] mem_rsp_tag, + output wire [TAG_WIDTH_IN-1:0] mem_rsp_tag, input wire mem_rsp_ready, // AXI write request address channel output wire m_axi_awvalid [NUM_BANKS], input wire m_axi_awready [NUM_BANKS], output wire [ADDR_WIDTH_OUT-1:0] m_axi_awaddr [NUM_BANKS], - output wire [TAG_WIDTH-1:0] m_axi_awid [NUM_BANKS], + output wire [TAG_WIDTH_OUT-1:0] m_axi_awid [NUM_BANKS], output wire [7:0] m_axi_awlen [NUM_BANKS], output wire [2:0] m_axi_awsize [NUM_BANKS], output wire [1:0] m_axi_awburst [NUM_BANKS], @@ -65,14 +67,14 @@ module VX_axi_adapter #( // AXI write response channel input wire m_axi_bvalid [NUM_BANKS], output wire m_axi_bready [NUM_BANKS], - input wire [TAG_WIDTH-1:0] m_axi_bid [NUM_BANKS], + input wire [TAG_WIDTH_OUT-1:0] m_axi_bid [NUM_BANKS], input wire [1:0] m_axi_bresp [NUM_BANKS], // AXI read address channel output wire m_axi_arvalid [NUM_BANKS], input wire m_axi_arready [NUM_BANKS], output wire [ADDR_WIDTH_OUT-1:0] m_axi_araddr [NUM_BANKS], - output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS], + output wire [TAG_WIDTH_OUT-1:0] m_axi_arid [NUM_BANKS], output wire [7:0] m_axi_arlen [NUM_BANKS], output wire [2:0] m_axi_arsize [NUM_BANKS], output wire [1:0] m_axi_arburst [NUM_BANKS], @@ -87,7 +89,7 @@ module VX_axi_adapter #( output wire m_axi_rready [NUM_BANKS], input wire [DATA_WIDTH-1:0] m_axi_rdata [NUM_BANKS], input wire m_axi_rlast [NUM_BANKS], - input wire [TAG_WIDTH-1:0] m_axi_rid [NUM_BANKS], + input wire [TAG_WIDTH_OUT-1:0] m_axi_rid [NUM_BANKS], input wire [1:0] m_axi_rresp [NUM_BANKS] ); localparam DATA_SIZE = `CLOG2(DATA_WIDTH/8); @@ -133,14 +135,47 @@ module VX_axi_adapter #( ); end + wire tbuf_full; + wire [TAG_WIDTH_OUT-1:0] mem_req_tag_out; + wire [TAG_WIDTH_OUT-1:0] mem_rsp_tag_out; + + // handle tag width mismatch + if (TAG_WIDTH_IN > TAG_WIDTH_OUT) begin : g_tag_buf + localparam TBUF_ADDRW = `CLOG2(TAG_BUFFER_SIZE); + wire [TBUF_ADDRW-1:0] tbuf_waddr, tbuf_raddr; + VX_index_buffer #( + .DATAW (TAG_WIDTH_IN), + .SIZE (TAG_BUFFER_SIZE) + ) tag_buf ( + .clk (clk), + .reset (reset), + .acquire_en (mem_req_valid && !mem_req_rw && mem_req_ready), + .write_addr (tbuf_waddr), + .write_data (mem_req_tag), + .read_data (mem_rsp_tag), + .read_addr (tbuf_raddr), + .release_en (mem_rsp_valid && mem_rsp_ready), + .full (tbuf_full), + `UNUSED_PIN (empty) + ); + assign mem_req_tag_out = TAG_WIDTH_OUT'(tbuf_waddr); + assign tbuf_raddr = mem_rsp_tag_out[TBUF_ADDRW-1:0]; + `UNUSED_VAR (mem_rsp_tag_out) + end else begin : g_no_tag_buf + assign tbuf_full = 0; + assign mem_req_tag_out = TAG_WIDTH_OUT'(mem_req_tag); + assign mem_rsp_tag = mem_rsp_tag_out[TAG_WIDTH_IN-1:0]; + `UNUSED_VAR (mem_rsp_tag_out) + end + // request ack - assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]; + assign mem_req_ready = (mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]) && ~tbuf_full; // AXI write request address channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr - assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i]; + assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~tbuf_full && ~m_axi_aw_ack[i]; assign m_axi_awaddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8); - assign m_axi_awid[i] = mem_req_tag; + assign m_axi_awid[i] = mem_req_tag_out; assign m_axi_awlen[i] = 8'b00000000; assign m_axi_awsize[i] = 3'(DATA_SIZE); assign m_axi_awburst[i] = 2'b00; @@ -153,7 +188,7 @@ module VX_axi_adapter #( // AXI write request data channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_data - assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_w_ack[i]; + assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~tbuf_full && ~m_axi_w_ack[i]; assign m_axi_wdata[i] = mem_req_data; assign m_axi_wstrb[i] = mem_req_byteen; assign m_axi_wlast[i] = 1'b1; @@ -170,9 +205,9 @@ module VX_axi_adapter #( // AXI read request channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req - assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i); + assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i) && ~tbuf_full; assign m_axi_araddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8); - assign m_axi_arid[i] = mem_req_tag; + assign m_axi_arid[i] = mem_req_tag_out; assign m_axi_arlen[i] = 8'b00000000; assign m_axi_arsize[i] = 3'(DATA_SIZE); assign m_axi_arburst[i] = 2'b00; @@ -186,7 +221,7 @@ module VX_axi_adapter #( // AXI read response channel wire [NUM_BANKS-1:0] rsp_arb_valid_in; - wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH-1:0] rsp_arb_data_in; + wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH_OUT-1:0] rsp_arb_data_in; wire [NUM_BANKS-1:0] rsp_arb_ready_in; for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_rsp @@ -200,7 +235,7 @@ module VX_axi_adapter #( VX_stream_arb #( .NUM_INPUTS (NUM_BANKS), - .DATAW (DATA_WIDTH + TAG_WIDTH), + .DATAW (DATA_WIDTH + TAG_WIDTH_OUT), .ARBITER ("R"), .OUT_BUF (RSP_OUT_BUF) ) rsp_arb ( @@ -209,7 +244,7 @@ module VX_axi_adapter #( .valid_in (rsp_arb_valid_in), .data_in (rsp_arb_data_in), .ready_in (rsp_arb_ready_in), - .data_out ({mem_rsp_data, mem_rsp_tag}), + .data_out ({mem_rsp_data, mem_rsp_tag_out}), .valid_out (mem_rsp_valid), .ready_out (mem_rsp_ready), `UNUSED_PIN (sel_out) diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 066de829f..4ece7cf69 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -59,6 +59,10 @@ module VX_mem_adapter #( localparam D = `ABS(DST_LDATAW - SRC_LDATAW); localparam P = 2**D; + localparam EXPECTED_TAG_WIDTH = SRC_TAG_WIDTH + ((DST_LDATAW > SRC_LDATAW) ? D : 0); + + `STATIC_ASSERT(DST_TAG_WIDTH >= EXPECTED_TAG_WIDTH, ("invalid DST_TAG_WIDTH parameter, current=%0d, expected=%0d", DST_TAG_WIDTH, EXPECTED_TAG_WIDTH)) + wire mem_req_valid_out_w; wire [DST_ADDR_WIDTH-1:0] mem_req_addr_out_w; wire mem_req_rw_out_w; diff --git a/hw/scripts/scope.py b/hw/scripts/scope.py index 9503fd757..f6d93961b 100755 --- a/hw/scripts/scope.py +++ b/hw/scripts/scope.py @@ -78,7 +78,7 @@ def parse_var_name(xml_doc, xml_node): elif xml_node.tag == "arraysel": return parse_arraysel_name(xml_doc, xml_node) else: - raise ET.ParseError("invalid probe entry" + source_loc(xml_doc, xml_node.get("loc"))) + raise ET.ParseError("invalid probe entry: tag=" + xml_node.tag + ", " + source_loc(xml_doc, xml_node.get("loc"))) return name def parse_sel_field(xml_doc, dtype_id, offset, width): @@ -116,7 +116,7 @@ def parse_sel_field(xml_doc, dtype_id, offset, width): end = width - 1 + offset return F"[{end}:{offset}]" else: - raise ET.ParseError("invalid probe entry: " + source_loc(xml_doc, xml_type.get("loc"))) + raise ET.ParseError("invalid probe entry: tag=" + xml_type.tag + ", " + source_loc(xml_doc, xml_type.get("loc"))) return None def parse_sel_name(xml_doc, xml_node): @@ -167,7 +167,7 @@ def parse_vl_port(xml_doc, xml_node, signals): signals.append([name, signal_width]) total_width = total_width + signal_width else: - raise ET.ParseError("invalid probe entry: " + source_loc(xml_doc, xml_node.get("loc"))) + raise ET.ParseError("invalid probe entry: tag=" + xml_node.tag + ", " + source_loc(xml_doc, xml_node.get("loc"))) # Check for duplicate signal names signal_names = [signal[0] for signal in signals] duplicates = set([name for name in signal_names if signal_names.count(name) > 1]) From ee690248414cff7650b259ce7e9cc0b2ab1a7c41 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 30 Sep 2024 09:17:42 -0700 Subject: [PATCH 335/488] minor update --- hw/rtl/Vortex_axi.sv | 6 +++--- hw/rtl/core/VX_schedule.sv | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/rtl/Vortex_axi.sv b/hw/rtl/Vortex_axi.sv index 483773223..418a2aa5c 100644 --- a/hw/rtl/Vortex_axi.sv +++ b/hw/rtl/Vortex_axi.sv @@ -82,11 +82,11 @@ module Vortex_axi import VX_gpu_pkg::*; #( // Status output wire busy ); - localparam DST_LDATAW = `CLOG2(`VX_MEM_DATA_WIDTH); - localparam SRC_LDATAW = `CLOG2(AXI_DATA_WIDTH); + localparam DST_LDATAW = `CLOG2(AXI_DATA_WIDTH); + localparam SRC_LDATAW = `CLOG2(`VX_MEM_DATA_WIDTH); localparam SUB_LDATAW = DST_LDATAW - SRC_LDATAW; localparam VX_MEM_TAG_A_WIDTH = `VX_MEM_TAG_WIDTH + `MAX(SUB_LDATAW, 0); - localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH + SUB_LDATAW; + localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH - SUB_LDATAW; wire mem_req_valid; wire mem_req_rw; diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index e7937fe49..9b49ae268 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -332,7 +332,7 @@ module VX_schedule import VX_gpu_pkg::*; #( }; wire [`UUID_WIDTH-1:0] instr_uuid; -`ifndef NDEBUG +`ifdef UUID_ENABLE VX_uuid_gen #( .CORE_ID (CORE_ID), .UUID_WIDTH (`UUID_WIDTH) From a3aca502b7db8d67448b36aa5841089f18290d29 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 30 Sep 2024 14:20:48 -0700 Subject: [PATCH 336/488] minor update --- hw/rtl/VX_define.vh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 8b59bc910..4ccb00880 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -50,9 +50,11 @@ `define PERF_CTR_BITS 44 `ifndef NDEBUG +`define UUID_ENABLE `define UUID_WIDTH 44 `else `ifdef SCOPE +`define UUID_ENABLE `define UUID_WIDTH 44 `else `define UUID_WIDTH 1 From 44ebc12ed4011fd997083f340372f130d3d9eb33 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 1 Oct 2024 00:55:45 -0700 Subject: [PATCH 337/488] minor update --- hw/rtl/core/VX_issue_slice.sv | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 5032065d3..f287525c7 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -91,29 +91,47 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `ifdef SCOPE `ifdef DBG_SCOPE_ISSUE `SCOPE_IO_SWITCH (1); + wire decode_fire = decode_if.valid && decode_if.ready; wire operands_fire = operands_if.valid && operands_if.ready; `NEG_EDGE (reset_negedge, reset); - `SCOPE_TAP_EX (0, 2, 2, 2, ( - `UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + - 1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) + - `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1 + `SCOPE_TAP_EX (0, 2, 4, 3, ( + `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS * 4 + + `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS + (3 * `XLEN) + + `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * `XLEN) + 1 ), { + decode_if.valid, + decode_if.ready, operands_if.valid, operands_if.ready }, { + decode_fire, operands_fire, writeback_if.valid // ack-free }, { + decode_if.data.uuid, + decode_if.data.wid, + decode_if.data.tmask, + decode_if.data.PC, + decode_if.data.ex_type, + decode_if.data.op_type, + decode_if.data.wb, + decode_if.data.rd, + decode_if.data.rs1, + decode_if.data.rs2, + decode_if.data.rs3, operands_if.data.uuid, + operands_if.data.wis, operands_if.data.tmask, + operands_if.data.PC, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.wb, operands_if.data.rd, - operands_if.data.rs1_data, - operands_if.data.rs2_data, - operands_if.data.rs3_data, + operands_if.data.rs1_data[0], + operands_if.data.rs2_data[0], + operands_if.data.rs3_data[0], writeback_if.data.uuid, + writeback_if.data.wis, writeback_if.data.tmask, writeback_if.data.rd, writeback_if.data.data, From 5cb033ae13cd288622ac5e102db7b1c45b76fa58 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 2 Oct 2024 07:12:30 -0700 Subject: [PATCH 338/488] minor update --- hw/rtl/core/VX_lsu_slice.sv | 2 +- hw/rtl/libs/VX_mem_scheduler.sv | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 0f947af78..1f39ab5a7 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -310,7 +310,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( wire lsu_mem_rsp_ready; VX_mem_scheduler #( - .INSTANCE_ID ($sformatf("%s-scheduler", INSTANCE_ID)), + .INSTANCE_ID ($sformatf("%s-memsched", INSTANCE_ID)), .CORE_REQS (NUM_LANES), .MEM_CHANNELS(NUM_LANES), .WORD_SIZE (LSU_WORD_SIZE), diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 1a0b2c597..1426d59c0 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -459,15 +459,15 @@ module VX_mem_scheduler #( end else begin : g_rsp_full - reg [(CORE_BATCHES * CORE_CHANNELS * WORD_WIDTH)-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; - reg [CORE_BATCHES-1:0][CORE_CHANNELS-1:0][WORD_WIDTH-1:0] rsp_store_n; + reg [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; + reg [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store_n; reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; - always @(*) begin - rsp_store_n = rsp_store[ibuf_raddr]; - for (integer i = 0; i < CORE_CHANNELS; ++i) begin + for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store_n + always @(*) begin + rsp_store_n[i] = rsp_store[ibuf_raddr][i]; if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin - rsp_store_n[rsp_batch_idx][i] = mem_rsp_data_s[i]; + rsp_store_n[i][rsp_batch_idx] = mem_rsp_data_s[i]; end end end @@ -488,7 +488,7 @@ module VX_mem_scheduler #( for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data localparam i = r / CORE_CHANNELS; localparam j = r % CORE_CHANNELS; - assign crsp_data[r] = rsp_store_n[i][j]; + assign crsp_data[r] = rsp_store_n[j][i]; end assign mem_rsp_ready_s = crsp_ready || ~rsp_complete; From ad7377c8bab97d909f16b2d498e56815597c79eb Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 2 Oct 2024 07:41:29 -0700 Subject: [PATCH 339/488] minor udpate --- hw/rtl/libs/VX_mem_scheduler.sv | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 1426d59c0..4ba8bf147 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -459,16 +459,21 @@ module VX_mem_scheduler #( end else begin : g_rsp_full - reg [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; - reg [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store_n; + wire [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store_n; reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; - for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store_n - always @(*) begin - rsp_store_n[i] = rsp_store[ibuf_raddr][i]; - if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin - rsp_store_n[i][rsp_batch_idx] = mem_rsp_data_s[i]; + for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store + for (genvar j = 0; j < CORE_BATCHES; ++j) begin : g_j + reg [WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; + wire rsp_wren = mem_rsp_fire_s + && (BATCH_SEL_WIDTH'(j) == rsp_batch_idx) + && ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]); + always @(posedge clk) begin + if (rsp_wren) begin + rsp_store[ibuf_raddr] <= mem_rsp_data_s[i]; + end end + assign rsp_store_n[i][j] = rsp_wren ? mem_rsp_data_s[i] : rsp_store[ibuf_raddr]; end end @@ -476,9 +481,6 @@ module VX_mem_scheduler #( if (ibuf_push) begin rsp_orig_mask[ibuf_waddr] <= core_req_mask; end - if (mem_rsp_valid_s) begin - rsp_store[ibuf_raddr] <= rsp_store_n; - end end assign crsp_valid = mem_rsp_valid_s && rsp_complete; From 4b8ca42e85186ba73597b87fa378645a5dbe0e68 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 2 Oct 2024 09:27:26 -0700 Subject: [PATCH 340/488] minor update --- hw/rtl/libs/VX_stream_buffer.sv | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/hw/rtl/libs/VX_stream_buffer.sv b/hw/rtl/libs/VX_stream_buffer.sv index 51e33db60..4b77df83d 100644 --- a/hw/rtl/libs/VX_stream_buffer.sv +++ b/hw/rtl/libs/VX_stream_buffer.sv @@ -45,7 +45,7 @@ module VX_stream_buffer #( assign valid_out = valid_in; assign data_out = data_in; - end else if (OUT_REG != 0) begin : g_with_reg + end else if (OUT_REG != 0) begin : g_out_reg reg [DATAW-1:0] data_out_r; reg [DATAW-1:0] buffer; @@ -84,23 +84,27 @@ module VX_stream_buffer #( assign valid_out = valid_out_r; assign data_out = data_out_r; - end else begin : g_no_reg + end else begin : g_no_out_reg reg [1:0][DATAW-1:0] shift_reg; - reg [1:0] fifo_state; + reg [1:0] fifo_state, fifo_state_n; - wire fire_in = valid_in && ready_in; + wire fire_in = valid_in && ready_in; wire fire_out = valid_out && ready_out; + always @(*) begin + case ({fire_in, fire_out}) + 2'b10: fifo_state_n = {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10 + 2'b01: fifo_state_n = {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00 + default: fifo_state_n = fifo_state; + endcase + end + always @(posedge clk) begin if (reset) begin fifo_state <= 2'b00; end else begin - case ({fire_in, fire_out}) - 2'b10: fifo_state <= {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10 - 2'b01: fifo_state <= {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00 - default: fifo_state <= fifo_state; - endcase + fifo_state <= fifo_state_n; end end From 83badaac86cd4578484ebfd3b1b11fe089f666b7 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 2 Oct 2024 11:10:33 -0700 Subject: [PATCH 341/488] minor update --- hw/rtl/libs/VX_elastic_buffer.sv | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/rtl/libs/VX_elastic_buffer.sv b/hw/rtl/libs/VX_elastic_buffer.sv index 5067a4dd3..c90aa0616 100644 --- a/hw/rtl/libs/VX_elastic_buffer.sv +++ b/hw/rtl/libs/VX_elastic_buffer.sv @@ -97,8 +97,10 @@ module VX_elastic_buffer #( wire [DATAW-1:0] data_out_t; wire ready_out_t; + wire valid_out_t = ~empty; + wire push = valid_in && ready_in; - wire pop = ~empty && ready_out_t; + wire pop = valid_out_t && ready_out_t; VX_fifo_queue #( .DATAW (DATAW), @@ -127,7 +129,7 @@ module VX_elastic_buffer #( ) out_buf ( .clk (clk), .reset (reset), - .valid_in (~empty), + .valid_in (valid_out_t), .data_in (data_out_t), .ready_in (ready_out_t), .valid_out (valid_out), From d1175a03c9606dce16cdc8f16772fba701fbb0af Mon Sep 17 00:00:00 2001 From: jaewon-lee-github Date: Wed, 2 Oct 2024 14:16:57 -0400 Subject: [PATCH 342/488] update the code accessing registers in obsoleted way --- sim/simx/emulator.cpp | 2 +- sim/simx/execute.cpp | 2 +- sim/simx/func_unit.cpp | 6 +++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 8d57f69fa..08c51845c 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -74,11 +74,11 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core) , core_(core) , warps_(arch.num_warps(), arch) , barriers_(arch.num_barriers(), 0) + , ipdom_size_(arch.num_threads()-1) // [TBC] Currently, tradeoff between scratchpad size & performance has not been evaluated. Scratchpad is // considered to be big enough to hold input tiles for one output tile. // In future versions, scratchpad size should be fixed to an appropriate value. , scratchpad(std::vector(32 * 32 * 32768)) - , ipdom_size_(arch.num_threads()-1) { this->clear(); } diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index a7d8a937d..e70d45cb2 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -1473,7 +1473,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { trace->fu_type = FUType::LSU; trace->lsu_type = LsuType::TCU_LOAD; - trace->used_iregs.set(rsrc0); + trace->src_regs[0] = {RegType::Integer, rsrc0}; auto trace_data = std::make_shared(num_threads); trace->data = trace_data; diff --git a/sim/simx/func_unit.cpp b/sim/simx/func_unit.cpp index 2de58639b..a182f6d8b 100644 --- a/sim/simx/func_unit.cpp +++ b/sim/simx/func_unit.cpp @@ -222,7 +222,10 @@ void LsuUnit::tick() { input.pop(); } } -///////// TENSOR code TBC //////////////////////////////// +/* TO BE FIXED:Tensor_core code + send_request is not used anymore. Need to be modified number of load +*/ +/* int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) { int count = 0; @@ -275,6 +278,7 @@ int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) { } return count; } +*/ /////////////////////////////////////////////////////////////////////////////// From b7531c9de1d4acbc33e2040fb6f4f100eb96d015 Mon Sep 17 00:00:00 2001 From: jaewon-lee-github Date: Wed, 2 Oct 2024 17:46:01 -0400 Subject: [PATCH 343/488] support 64bit --- tests/regression/matmul/kernel.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/regression/matmul/kernel.cpp b/tests/regression/matmul/kernel.cpp index b0b4753c7..5fa976df4 100644 --- a/tests/regression/matmul/kernel.cpp +++ b/tests/regression/matmul/kernel.cpp @@ -8,9 +8,9 @@ void kernel_body(kernel_arg_t* __UNIFORM__ arg) { int32_t* src0_ptr = (int32_t*)arg->src0_addr; int32_t* src1_ptr = (int32_t*)arg->src1_addr; int32_t* dst_ptr = (int32_t*)arg->dst_addr; - unsigned a_addr = reinterpret_cast(src0_ptr); - unsigned b_addr = reinterpret_cast(src1_ptr); - unsigned c_addr = reinterpret_cast(dst_ptr); + uint64_t a_addr = reinterpret_cast(src0_ptr); + uint64_t b_addr = reinterpret_cast(src1_ptr); + uint64_t c_addr = reinterpret_cast(dst_ptr); uint32_t tc_size = arg->tc_size; uint32_t TC_per_warp = arg->TC_per_warp; @@ -100,9 +100,9 @@ void kernel_body(kernel_arg_t* __UNIFORM__ arg) { //TODO :: change this for new task->thread distribution if (((task_id%num_tasks_per_warp)/num_tasks_per_thread) < thread_limit) { - unsigned a_addr_base = a_addr + offset*arg->data_size; - unsigned b_addr_base = b_addr + offset*arg->data_size; - unsigned c_addr_base = c_addr + offset_c*arg->data_size; + uint64_t a_addr_base = a_addr + offset*arg->data_size; + uint64_t b_addr_base = b_addr + offset*arg->data_size; + uint64_t c_addr_base = c_addr + offset_c*arg->data_size; csr_write(VX_MAT_MUL_SIZE,n_tiles); csr_write(VX_TC_NUM,TC_per_warp); csr_write(VX_TC_SIZE,tc_size); From 5cf6797bd36b737ed1dda5482389a5ee4162c750 Mon Sep 17 00:00:00 2001 From: jaewon-lee-github Date: Thu, 3 Oct 2024 15:19:39 -0400 Subject: [PATCH 344/488] - Change STARTUP_ADDR to use the same 0x80000000 address - Fix environment variable for vortex kernel directories --- .github/workflows/ci.yml | 29 +++++++++++++++-------------- ci/regression.sh.in | 6 +----- config.mk.in | 3 --- hw/rtl/VX_config.vh | 4 ++-- tests/kernel/common.mk | 3 +-- tests/opencl/common.mk | 3 +-- tests/regression/common.mk | 3 +-- 7 files changed, 21 insertions(+), 30 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 404edc12c..f4f5902a8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,13 +21,13 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v2 with: submodules: recursive - name: Cache Toolchain Directory id: cache-toolchain - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: tools key: ${{ runner.os }}-toolchain-v0.1 @@ -36,7 +36,7 @@ jobs: - name: Cache Third Party Directory id: cache-thirdparty - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: third_party key: ${{ runner.os }}-thirdparty-v0.1 @@ -71,7 +71,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v2 - name: Install Dependencies run: | @@ -79,7 +79,7 @@ jobs: - name: Cache Toolchain Directory id: cache-toolchain - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: tools key: ${{ runner.os }}-toolchain-v0.1 @@ -88,7 +88,7 @@ jobs: - name: Cache Third Party Directory id: cache-thirdparty - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: third_party key: ${{ runner.os }}-thirdparty-v0.1 @@ -106,23 +106,23 @@ jobs: make tests -s > /dev/null - name: Upload Build Artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: name: build-${{ matrix.xlen }} path: build${{ matrix.xlen }} - test: + tests: runs-on: ubuntu-20.04 needs: build strategy: fail-fast: false matrix: - name: [regression, opencl, cache, config1, config2, debug, scope, stress, synthesis, vm ] + name: [regression, opencl, cache, config1, config2, debug, scope, stress, synthesis, vm] xlen: [32, 64] steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v2 - name: Install Dependencies run: | @@ -130,7 +130,7 @@ jobs: - name: Cache Toolchain Directory id: cache-toolchain - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: tools key: ${{ runner.os }}-toolchain-v0.1 @@ -139,7 +139,7 @@ jobs: - name: Cache Third Party Directory id: cache-thirdparty - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: third_party key: ${{ runner.os }}-thirdparty-v0.1 @@ -147,10 +147,11 @@ jobs: ${{ runner.os }}-thirdparty- - name: Download Build Artifact - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v3 with: name: build-${{ matrix.xlen }} path: build${{ matrix.xlen }} + - name: Run tests run: | cd build${{ matrix.xlen }} @@ -167,7 +168,7 @@ jobs: complete: runs-on: ubuntu-20.04 - needs: test + needs: tests steps: - name: Check Completion diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 92a56d56f..4297eee8d 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -269,11 +269,7 @@ config2() # custom program startup address make -C tests/regression/dogfood clean-kernel - if [ "$XLEN" == "64" ]; then - STARTUP_ADDR=0x180000000 make -C tests/regression/dogfood - else - STARTUP_ADDR=0x80000000 make -C tests/regression/dogfood - fi + STARTUP_ADDR=0x80000000 make -C tests/regression/dogfood ./ci/blackbox.sh --driver=simx --app=dogfood ./ci/blackbox.sh --driver=rtlsim --app=dogfood make -C tests/regression/dogfood clean-kernel diff --git a/config.mk.in b/config.mk.in index be369b56e..57f77059e 100644 --- a/config.mk.in +++ b/config.mk.in @@ -31,7 +31,4 @@ RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv$(XLEN)-gnu-toolchain RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX) -VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime -VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel - THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 69f72b7a1..c349f367a 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -177,7 +177,7 @@ `endif `ifndef STARTUP_ADDR -`define STARTUP_ADDR 64'h180000000 +`define STARTUP_ADDR 64'h080000000 `endif `ifndef USER_BASE_ADDR @@ -190,7 +190,7 @@ `ifdef VM_ENABLE `ifndef PAGE_TABLE_BASE_ADDR -`define PAGE_TABLE_BASE_ADDR 64'h1F0000000 +`define PAGE_TABLE_BASE_ADDR 64'h0F0000000 `endif `endif diff --git a/tests/kernel/common.mk b/tests/kernel/common.mk index 8fd609d58..7829ffb14 100644 --- a/tests/kernel/common.mk +++ b/tests/kernel/common.mk @@ -2,11 +2,10 @@ ROOT_DIR := $(realpath ../../..) ifeq ($(XLEN),64) CFLAGS += -march=rv64imafd -mabi=lp64d -STARTUP_ADDR ?= 0x180000000 else CFLAGS += -march=rv32imaf -mabi=ilp32f -STARTUP_ADDR ?= 0x80000000 endif +STARTUP_ADDR ?= 0x80000000 VORTEX_KN_PATH ?= $(ROOT_DIR)/kernel diff --git a/tests/opencl/common.mk b/tests/opencl/common.mk index 3a3de87ee..bb7b1e0d6 100644 --- a/tests/opencl/common.mk +++ b/tests/opencl/common.mk @@ -5,13 +5,12 @@ TARGET ?= opaesim XRT_SYN_DIR ?= $(VORTEX_HOME)/hw/syn/xilinx/xrt XRT_DEVICE_INDEX ?= 0 +STARTUP_ADDR ?= 0x80000000 ifeq ($(XLEN),64) VX_CFLAGS += -march=rv64imafd -mabi=lp64d -STARTUP_ADDR ?= 0x180000000 POCL_CC_FLAGS += POCL_VORTEX_XLEN=64 else VX_CFLAGS += -march=rv32imaf -mabi=ilp32f -STARTUP_ADDR ?= 0x80000000 POCL_CC_FLAGS += POCL_VORTEX_XLEN=32 endif diff --git a/tests/regression/common.mk b/tests/regression/common.mk index 94fe840df..2cba5ef9a 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -8,12 +8,11 @@ XRT_DEVICE_INDEX ?= 0 VORTEX_RT_PATH ?= $(ROOT_DIR)/runtime VORTEX_KN_PATH ?= $(ROOT_DIR)/kernel +STARTUP_ADDR ?= 0x80000000 ifeq ($(XLEN),64) VX_CFLAGS += -march=rv64imafd -mabi=lp64d -STARTUP_ADDR ?= 0x180000000 else VX_CFLAGS += -march=rv32imaf -mabi=ilp32f -STARTUP_ADDR ?= 0x80000000 endif LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT) From dd16d70515e7f37e5efb15c3f7196c2cefaf82e3 Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Thu, 3 Oct 2024 17:29:21 -0400 Subject: [PATCH 345/488] contributing and fpga docs --- docs/contributing.md | 2 +- docs/fpga_setup.md | 148 +++++++++++++++++++++++++++++++++++++++++++ docs/simulation.md | 2 +- 3 files changed, 150 insertions(+), 2 deletions(-) diff --git a/docs/contributing.md b/docs/contributing.md index 5264454d2..e87158272 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -41,4 +41,4 @@ However, contributions are strongly encouraged and keep the project moving forwa ## Creating and Adding Tests -see `testing.md` \ No newline at end of file +The CI pipeline the vortex tests. If you are contributing code changes, then review `testing.md` to learn how to integrate your own tests \ No newline at end of file diff --git a/docs/fpga_setup.md b/docs/fpga_setup.md index 80d71e45f..2a673f8fc 100644 --- a/docs/fpga_setup.md +++ b/docs/fpga_setup.md @@ -1,5 +1,153 @@ # FPGA Startup and Configuration Guide +## Gaining Access to FPGA's with CRNCH +If you are associated with Georgia Tech and need remote access to the FPGA's, you can utilize CRNCH's server. + +## What is CRNCH? + +**C**enter for **R**esearch into **N**ovel **C**omputing **H**ierarchies + +## What does CRNCH Offer? + +**The Rogues Gallery (RG)**: new concept focused on developing our understanding of next-generation hardware with a focus on unorthodox and uncommon technologies. **RG** will acquire new and unique hardware (ie, the aforementioned “*rogues*”) from vendors, research labs, and startups and make this hardware available to students, faculty, and industry collaborators within a managed data center environment + +## Why are the Rouges Important? + +By exposing students and researchers to this set of unique hardware, we hope to foster cross-cutting discussions about hardware designs that will drive future *performance improvements in computing long after the Moore’s Law era of “cheap transistors” ends*. + +## How is the Rouges Gallery Funded? + +Rogues Gallery testbed is primarily supported by the National Science Foundation (NSF) under NSF Award Number [#2016701](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2016701&HistoricalAwards=false) + +## Rouges Gallery Documentation + +You can read about RG in more detail on its official documentation [page](https://gt-crnch-rg.readthedocs.io/en/main/index.html#). + +You can listen to a talk about RG [here](https://mediaspace.gatech.edu/media/Jeff%20Young%20-%20Rogues%20Gallery%20-%20CRNCH%20Summit%202021/1_lqlgr0jj) + +[CRNCH Summit 2023](https://github.com/gt-crnch/crnch-summit-2023/tree/main) + +## Request Access for Rouges Gallery + +You should use [this form](https://crnch-rg.cc.gatech.edu/request-rogues-gallery-access/) to request access to RG’s reconfigurable computing (vortex fpga) resources. You should receive an email with your ticket item being created. Once it gets processed, you should get an email confirmed your access has been granted. It might take some time to get processed. + +## How to Access Rouges Gallery? + +CRNCH resources do not require any VPN access for GT members so you can head to the web url for open on-demand: [rg-ood.crnch.gatech.edu](http://rg-ood.crnch.gatech.edu/) + +Alternatively, you can `ssh` into rg with: `ssh @rg-login.crnch.gatech.edu` + +(`ssh usubramanya3@rg-login.crnch.gatech.edu`) + +Once you’ve logged in, you can use Slurm to request other nodes within the testbed. See more information on Slurm at [this page](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm.html). + +Note that you can also use VSCode to log into the Rogues Gallery via its terminal functionality. See [this page for more details](https://gt-crnch-rg.readthedocs.io/en/main/general/visual-studio-code.html). + +## **What Machines are Available in the Rogues Gallery?** + +Complete list of machines can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/rg-hardware.html). + +## Which Machine do we Need from RG? + +There are three primary nodes you might use. The table below summarizes: + +| Name | Device | Description | +| --- | --- | --- | +| flubber1 | u50 | can synthesize vortex | +| flubber4 | u250 | missing HBM | +| flubber5 | u280 | can synthesize vortex | + + +*Note*: The `USERSCRATCH` folder is synchronized between all RG nodes. That means you can upload your files to `rg-login` and have them available on `flubber[1,4-5`. Changes on one node will be reflected across all nodes. + +## How to Access flubber for Synthesis? + +Now that you have the files prepared and available on the FPGA node, you can start the synthesis. To run on hardware we need a rg-xilinx-fpga-hw cluster which includes **flubber[1,4-5]**. First `ssh` into the rouges gallery: + +```bash +ssh [@rg-login.crnch.gatech.edu](mailto:usubramanya3@rg-login.crnch.gatech.edu) +``` + +Then, to access the hardware node you need to `ssh` into flubber: + +```bash +ssh flubber1 +``` + +## Synthesis for Xillinx Boards + +XRT Environment Setup +---------------------- + + $ source /opt/xilinx/Vitis/2023.1/settings64.sh + $ source /opt/xilinx/xrt/setup.sh + + +Check Installed FPGA Platforms +------------------------------ + + $ platforminfo -l + + +Build FPGA image +---------------- + + $ cd hw/syn/xilinx/xrt + $ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=4 make + +Will run the synthesis under new build directory: BUILD_DIR := "\\_\\_\" + +The generated bitstream will be located under /bin/vortex_afu.xclbin + +Sample FPGA Run Test +-------------------- + +Ensure you have the correct opae runtime for the FPGA target + + $ make -C runtime/xrt clean + $ TARGET=hw make -C runtime/xrt + +Run the following from your Vortex build directory + + $ TARGET=hw FPGA_BIN_DIR=/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128" + +--- + +The directory `hw/syn/xilinx/xrt` contains the makefile used to synthesize Vortex. + +For long-running jobs, invocation of this makefile can be made of the following form: + +`[CONFIGS=] [PREFIX=] [NUM_CORES=<#>] TARGET=hw|hw_emu PLATFORM= nohup make > 2>&1 &` + +For example: + +```bash +CONFIGS="-DL2_ENABLE -DDCACHE_SIZE=8192" PREFIX=build_4c_u280 NUM_CORES=4 TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202211_1 nohup make > build_u280_hw_4c.log 2>&1 & +``` + +The build is complete when the bitstream file `vortex_afu.xclbin` exists in `hw|hw_emu/bin`. + +## Running a Program on FPGA + +The blackbox.sh script in `ci` can be used to run a test with Vortex’s xrt driver using the following command: + +`FPGA_BIN_DIR= TARGET=hw|hw_emu PLATFORM= ./ci/blackbox.sh --driver=xrt --app=` + +For example: + +`FPGA_BIN_DIR=`realpath hw/syn/xilinx/xrt/build_4c_u280_xilinx_u280_gen3x16_xdma_1_202211_1_hw/bin` TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202211_1 ./ci/blackbox.sh --driver=xrt --app=demo` + +## Synthesis for Intel (Altera) Boards + +To set up the environment, source the XRT setup.sh and other Xilinx scripts. For example: + +``` +source /opt/xilinx/xrt/setup.sh +source /tools/reconfig/xilinx/Vivado/2022.1/settings64.sh +source /tools/reconfig/xilinx/Vitis/2022.1/settings64.sh + +``` + OPAE Environment Setup ---------------------- diff --git a/docs/simulation.md b/docs/simulation.md index 86ce1f135..e1c578034 100644 --- a/docs/simulation.md +++ b/docs/simulation.md @@ -10,7 +10,7 @@ SimX is a C++ cycle-level in-house simulator developed for Vortex. The relevant ### FGPA Simulation -The current target FPGA for simulation is the Arria10 Intel Accelerator Card v1.0. The guide to build the fpga with specific configurations is located [here.](fpga_setup.md) +The guide to build the fpga with specific configurations is located [here.](fpga_setup.md) You can find instructions for both Xilinx and Intel (Altera) based FPGAs. ### How to Test From 6a447350b7000c959c325198bc573d89a963c2b8 Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Thu, 3 Oct 2024 17:42:47 -0400 Subject: [PATCH 346/488] remove redundant docs after consolidating --- docs/altera_fpga_guide.md | 79 --------------------------------------- docs/xilinx_fpga_guide.md | 36 ------------------ 2 files changed, 115 deletions(-) delete mode 100644 docs/altera_fpga_guide.md delete mode 100644 docs/xilinx_fpga_guide.md diff --git a/docs/altera_fpga_guide.md b/docs/altera_fpga_guide.md deleted file mode 100644 index 61d1ae26e..000000000 --- a/docs/altera_fpga_guide.md +++ /dev/null @@ -1,79 +0,0 @@ -# FPGA Startup and Configuration Guide - -OPAE Environment Setup ----------------------- - - $ source /opt/inteldevstack/init_env_user.sh - $ export OPAE_HOME=/opt/opae/1.1.2 - $ export PATH=$OPAE_HOME/bin:$PATH - $ export C_INCLUDE_PATH=$OPAE_HOME/include:$C_INCLUDE_PATH - $ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH - $ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH - -OPAE Build ------------------- - -The FPGA has to following configuration options: -- DEVICE_FAMILY=arria10 | stratix10 -- NUM_CORES=#n - -Command line: - - $ cd hw/syn/altera/opae - $ PREFIX=test1 TARGET=fpga NUM_CORES=4 make - -A new folder (ex: `test1_xxx_4c`) will be created and the build will start and take ~30-480 min to complete. -Setting TARGET=ase will build the project for simulation using Intel ASE. - - -OPAE Build Configuration ------------------------- - -The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware parameters that can be modified when build the processor.For example, have the following parameters that can be configured: -- `NUM_WARPS`: Number of warps per cores -- `NUM_THREADS`: Number of threads per warps -- `PERF_ENABLE`: enable the use of all profile counters - -You configure the syntesis build from the command line: - - $ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make - -OPAE Build Progress -------------------- - -You could check the last 10 lines in the build log for possible errors until build completion. - - $ tail -n 10 /build.log - -Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs. - - $ ps -u - -If the build fails and you need to restart it, clean up the build folder using the following command: - - $ make clean - -The bitstream file `vortex_afu.gbs` should exist when the build is done: - - $ ls -lsa /synth/vortex_afu.gbs - - -Signing the bitstream and Programming the FPGA ----------------------------------------------- - - $ cd - $ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs - $ fpgasupdate vortex_afu_unsigned_ssl.gbs - -Sample FPGA Run Test --------------------- - -Ensure you have the correct opae runtime for the FPGA target - - $ make -C runtime/opae clean - $ TARGET=FPGA make -C runtime/opae - -Run the following from your Vortex build directory - - $ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128" - diff --git a/docs/xilinx_fpga_guide.md b/docs/xilinx_fpga_guide.md deleted file mode 100644 index f2960deb6..000000000 --- a/docs/xilinx_fpga_guide.md +++ /dev/null @@ -1,36 +0,0 @@ -# FPGA Startup and Configuration Guide - -XRT Environment Setup ----------------------- - - $ source /opt/xilinx/Vitis/2023.1/settings64.sh - $ source /opt/xilinx/xrt/setup.sh - - -Check Installed FPGA Platforms ------------------------------- - - $ platforminfo -l - - -Build FPGA image ----------------- - - $ cd hw/syn/xilinx/xrt - $ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=4 make - -Will run the synthesis under new build directory: BUILD_DIR := "\\_\\_\" - -The generated bitstream will be located under /bin/vortex_afu.xclbin - -Sample FPGA Run Test --------------------- - -Ensure you have the correct opae runtime for the FPGA target - - $ make -C runtime/xrt clean - $ TARGET=hw make -C runtime/xrt - -Run the following from your Vortex build directory - - $ TARGET=hw FPGA_BIN_DIR=/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128" \ No newline at end of file From 32b0376b28a68ec5d8158229210999a39638ee9c Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Thu, 3 Oct 2024 17:43:39 -0400 Subject: [PATCH 347/488] remove old artifacts --- sim/common/bfloat.cpp | 221 ------------------------------------------ sim/common/bfloat.hpp | 0 2 files changed, 221 deletions(-) delete mode 100644 sim/common/bfloat.cpp delete mode 100644 sim/common/bfloat.hpp diff --git a/sim/common/bfloat.cpp b/sim/common/bfloat.cpp deleted file mode 100644 index e44f81b8b..000000000 --- a/sim/common/bfloat.cpp +++ /dev/null @@ -1,221 +0,0 @@ -#include -#include - -#include -#include - -// get float "in-memory" to exploit iee754 binary representation of floating point values -// use a u to trick compiler into letting you access float's bits directly -// bitwise operations cannot be done directly on iee754 representations per compiler settings -// ordering of the fields is important here -class MyFloat -{ -private: - void printBinary(int n, int i) - { - // Prints the binary representation - // of a number n up to i-bits. - int k; - for (k = i - 1; k >= 0; k--) - { - - if ((n >> k) & 1) - std::cout << "1"; - else - std::cout << "0"; - } - } - -public: - union BFloat_t - { - float f; - int i; - struct - { - uint32_t dead : 16; // don't use these, just place-holders - uint32_t mantissa : 7; // Mantissa (fractional part) of the number - uint32_t exponent : 8; // Exponent (power of 2) of the number - uint32_t sign : 1; - } parts; - }; - - void printBFloat(BFloat_t b) - { - std::cout << b.parts.sign << " | "; - printBinary(b.parts.exponent, 8); - std::cout << " | "; - printBinary(b.parts.mantissa, 7); - std::cout << std::endl; - } - - BFloat_t in_mem; - - MyFloat(float x) - { - in_mem.f = x; - printBFloat(in_mem); - } - - MyFloat(uint8_t mantissa, uint8_t exponent, bool sign) - { - in_mem.parts.mantissa = mantissa & 0x7F; - in_mem.parts.exponent = exponent; - in_mem.parts.sign = (int)sign; - - std::cout << "inside constructor" << std::endl; - std::cout << "bfloat:" << in_mem.f << std::endl; - printBFloat(in_mem); - } - - friend MyFloat operator+(const MyFloat &a, const MyFloat &b) - { - // get fields - bool a_sign = (bool)a.in_mem.parts.sign; - uint8_t a_exp = a.in_mem.parts.exponent - 127; - uint8_t a_mantissa = a.in_mem.parts.mantissa | 0x80; // add in the implicit bit - - bool b_sign = (bool)b.in_mem.parts.sign; - uint8_t b_exp = b.in_mem.parts.exponent - 127; - uint8_t b_mantissa = b.in_mem.parts.mantissa | 0x80; // add in the implicit bit - - // align mantissas by shifting the smaller exponent to the larger exponent - if (a_exp < b_exp) - { - a_mantissa >>= (b_exp - a_exp); - a_exp = b_exp; - } - else - { - b_mantissa >>= (a_exp - b_exp); - b_exp = a_exp; - } - - // add mantissas and adjust exponent if necessary - int sum_mantissa = a_mantissa + b_mantissa; - if (sum_mantissa & 0x100) - { // this val check might be wrong - sum_mantissa >>= 1; - a_exp++; - } - - // build binary representation of result - return MyFloat(sum_mantissa, a_exp, a_sign); - } - - friend MyFloat operator*(const MyFloat &a, const MyFloat &b) - { - uint16_t a_exp = a.in_mem.parts.exponent; - uint16_t b_exp = b.in_mem.parts.exponent; - uint16_t a_mantissa = a.in_mem.parts.mantissa | 0x0080; // Add implicit bit - uint16_t b_mantissa = b.in_mem.parts.mantissa | 0x0080; // Add implicit bi - - std::bitset<8> bits(a_exp); - std::cout << "Binary a exp: " << bits << std::endl; - - bool product_sign = a.in_mem.parts.sign ^ b.in_mem.parts.sign; - - if (a_exp == 0xFF || b_exp == 0xff) - { - return MyFloat(0, 0xFF, product_sign); - } - // Multiply mantissas - uint32_t product_mantissa = static_cast(a_mantissa) * static_cast(b_mantissa); - - // Add exponents - int product_exp = a_exp + b_exp - 127; - - product_mantissa = (product_mantissa + 0x40) >> 7; - - // Round to nearest even (round half to even) - if ((product_mantissa & 0x7F) == 0x40 && (product_mantissa & 0x1) != 0) - { - product_mantissa++; - } - if (product_mantissa & 0x0100) - { // Check if the implicit bit shifted to the left - product_mantissa >>= 1; - product_exp++; - } - else - { - product_mantissa &= 0x7F; // Remove the implicit bit - } - return MyFloat(product_mantissa, product_exp, product_sign); - } - - friend MyFloat operator/(const MyFloat &a, const MyFloat &b) - { - uint16_t a_exp = a.in_mem.parts.exponent; - uint16_t b_exp = b.in_mem.parts.exponent; - std::bitset<8> bits(b_exp); - std::cout << "Binary b exp: " << bits << std::endl; - uint16_t a_mantissa = a.in_mem.parts.mantissa | 0x0080; // Add implicit bit - uint16_t b_mantissa = b.in_mem.parts.mantissa | 0x0080; // Add implicit bit - - bool quotient_sign = a.in_mem.parts.sign ^ b.in_mem.parts.sign; - - // Check if divisor is zero - if (b_exp == 0 && b_mantissa == 0) - { - std::cout << "HERE" << std::endl; - return MyFloat(0, 0xFF, quotient_sign); // Return infinity with the appropriate sign - } - - // Check for infinity or zero in dividend - if (a_exp == 0xFF || a_exp == 0) - { - return MyFloat(0, a_exp, quotient_sign); - } - - // Subtract exponents - int quotient_exp = a_exp - b_exp + 127; - - // Divide mantissas - uint32_t quotient_mantissa = (static_cast(a_mantissa) << 8) / static_cast(b_mantissa); - - quotient_mantissa = (quotient_mantissa + 0x40) >> 8; - - // Round to nearest even (round half to even) - if ((quotient_mantissa & 0x1) != 0 && (quotient_mantissa & 0x7F) == 0x40) - { - quotient_mantissa--; - } - else if ((quotient_mantissa & 0x7F) == 0x40) - { - quotient_mantissa++; - } - - if (quotient_mantissa & 0x0100) - { // Check if the implicit bit shifted to the left - quotient_mantissa >>= 1; - quotient_exp++; - } - else - { - quotient_mantissa &= 0x7F; // Remove the implicit bit - } - return MyFloat(quotient_mantissa, quotient_exp, quotient_sign); - } -}; - -int main() -{ - float a = 8; - float b = 0; - std::cout << a << std::endl; - - std::bitset bits(*reinterpret_cast(&a)); - std::cout << "Binary representation of " << a << " is \n" - << bits << std::endl; - std::cout << "Binary representation of " << b << " is \n" - << bits << std::endl; - - MyFloat bfloat_version_of_a(a); - MyFloat bfloat_version_of_b(b); - MyFloat c = bfloat_version_of_a / bfloat_version_of_b; - - // You can now print the result stored in c or perform other operations with it. - - return 0; -} diff --git a/sim/common/bfloat.hpp b/sim/common/bfloat.hpp deleted file mode 100644 index e69de29bb..000000000 From 208c5b3804636dd6cf0690e00d681ddebfb5bb92 Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Fri, 4 Oct 2024 08:56:49 -0400 Subject: [PATCH 348/488] reorg docs --- README.md | 10 +++--- docs/contributing.md | 17 +++------- docs/fpga_setup.md | 77 +++++++++++++++++--------------------------- docs/index.md | 25 +------------- docs/simulation.md | 24 ++++++++++++-- 5 files changed, 63 insertions(+), 90 deletions(-) diff --git a/README.md b/README.md index ec8d10bd5..553939b50 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Vortex GPGPU -Vortex is a full-stack open-source RISC-V GPGPU. +Vortex is a full-stack open-source RISC-V GPGPU. Vortex supports multiple *backend drivers*, including our C++ simulator (simx), an RTL simulator, and physical Xilinx and Altera FPGAs-- all controlled by a single driver script. The chosen driver determines the corresponding code invoked to run Vortex. Generally, developers will prototype their intended design in simx, before completing going forward with an RTL implementation. Alternatively, you can get up and running by selecting a driver of your choice and running a demo program. ## Specifications @@ -29,12 +29,14 @@ Vortex is a full-stack open-source RISC-V GPGPU. - `ci`: Continuous integration scripts. - `miscs`: Miscellaneous resources. -## Build Instructions -More detailed build instructions can be found [here](docs/install_vortex.md). +## Quick Start +The following steps demonstrate how to run Vortex with the default driver: simx. If you are interested in a different backend, look [here](docs/simulation.md). + ### Supported OS Platforms - Ubuntu 18.04, 20.04 - Centos 7 ### Toolchain Dependencies +The following dependencies will be fetched prebuilt by `toolchain_install.sh`. - [POCL](http://portablecl.org/) - [LLVM](https://llvm.org/) - [RISCV-GNU-TOOLCHAIN](https://github.com/riscv-collab/riscv-gnu-toolchain) @@ -107,4 +109,4 @@ echo "source /ci/toolchain_env.sh" >> ~/.bashrc ```sh ./ci/blackbox.sh --app=demo --debug=3 ``` -- For additional information, check out the /docs. +- For additional information, check out the [documentation](docs/index.md) diff --git a/docs/contributing.md b/docs/contributing.md index e87158272..f10f4017b 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -22,7 +22,7 @@ However, contributions are strongly encouraged and keep the project moving forwa 6. Otherwise, you can go to your fork on Github online and manually create a PR (todo) (todo): how to name and format your PR, what information you should add to the PR, does not need to be too strict if you are attending the weekly meetings* 7. Github uses the following semantics: `base repository` gets the changes from your `head repository` -8. Therefore, you should set the `base repository` to `vortexgpgpu/vortex` and the `base` branch to `develop` since active development should only be added to this branch +8. Therefore, you should set the `base repository` to `vortexgpgpu/vortex` and the `base` branch to `master` since the master branch is protected by reviewed PRs. 9. And you should assign the `head repository` to `/vortex` (which represents your fork of vortex) and the `base` branch to the one created in step 2 10. Now that your intended PR has been specified, you should review the status. Check for merge conflicts, if all your commits are present, and all the modified files make sense 11. You can still make a PR if there are issues in step 10, just make sure the structure is correct according to steps 7-9 @@ -31,14 +31,7 @@ However, contributions are strongly encouraged and keep the project moving forwa 14. As long as the `head repository`'s `base` branch is the one you edited, the PR will automatically get the most recent changes 15. When all merge conflicts are resolved, changes are made, and tests pass you can have an admin merge your PR - -- You should create a new branch from develop that is clearly named with the feature that you want to add -- Avoid pushing directly to the `master` branch instead you will need to make a Pull Request (PR) -- There should be protections in place that prevent pushing directly to the main branch, but don't rely on it -- When you make a PR it will be tested against the continuous integration (ci) pipeline (see `continuous_integration.md`) -- It is not sufficient to just write some tests, they need to be incorporated into the ci pipeline to make sure they are run -- During a PR, you might receive feedback regarding your changes and you might need to make further commits to your branch - - -## Creating and Adding Tests -The CI pipeline the vortex tests. If you are contributing code changes, then review `testing.md` to learn how to integrate your own tests \ No newline at end of file +## What Makes a Good Contribution? +- If you are contributing code changes, then review `testing.md` to ensure your tests are integrated into the CI pipeline +- During a PR, you should consider the advice you are provided by your reviewers. Remember you keep adding commits to an open PR! +- If your change aims to fix an issue opened on Github, please tag that issue in the PR itself \ No newline at end of file diff --git a/docs/fpga_setup.md b/docs/fpga_setup.md index 2a673f8fc..78ed63e25 100644 --- a/docs/fpga_setup.md +++ b/docs/fpga_setup.md @@ -1,7 +1,7 @@ # FPGA Startup and Configuration Guide ## Gaining Access to FPGA's with CRNCH -If you are associated with Georgia Tech and need remote access to the FPGA's, you can utilize CRNCH's server. +If you are associated with Georgia Tech (or related workshops) you can use CRNCH's server to gain remote access to FPGA's. Otherwise, you can skip to the Xilinx or Intel (Altera) synthesis steps below. ## What is CRNCH? @@ -37,11 +37,10 @@ CRNCH resources do not require any VPN access for GT members so you can head to Alternatively, you can `ssh` into rg with: `ssh @rg-login.crnch.gatech.edu` -(`ssh usubramanya3@rg-login.crnch.gatech.edu`) +(`ssh gburdell3@rg-login.crnch.gatech.edu`) -Once you’ve logged in, you can use Slurm to request other nodes within the testbed. See more information on Slurm at [this page](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm.html). - -Note that you can also use VSCode to log into the Rogues Gallery via its terminal functionality. See [this page for more details](https://gt-crnch-rg.readthedocs.io/en/main/general/visual-studio-code.html). +## Synthesis for Xilinx Boards +First, you need to get access to the server with the Xilinx FPGAs. ## **What Machines are Available in the Rogues Gallery?** @@ -49,7 +48,7 @@ Complete list of machines can be found [here](https://gt-crnch-rg.readthedocs.io ## Which Machine do we Need from RG? -There are three primary nodes you might use. The table below summarizes: +There are three primary nodes you might use for Xilinx FPGAs. The table below summarizes: | Name | Device | Description | | --- | --- | --- | @@ -62,58 +61,42 @@ There are three primary nodes you might use. The table below summarizes: ## How to Access flubber for Synthesis? -Now that you have the files prepared and available on the FPGA node, you can start the synthesis. To run on hardware we need a rg-xilinx-fpga-hw cluster which includes **flubber[1,4-5]**. First `ssh` into the rouges gallery: +Now that you have the files prepared and available on the FPGA node, you can start the synthesis. To run on hardware we need a rg-xilinx-fpga-hw cluster which includes **flubber[1,4-5]**. First `ssh` into the rouges gallery, if you have not already. ```bash ssh [@rg-login.crnch.gatech.edu](mailto:usubramanya3@rg-login.crnch.gatech.edu) ``` -Then, to access the hardware node you need to `ssh` into flubber: +Once you’ve logged in, you can use Slurm to request an interactive job. First, view the available Slurm Partitions here [here](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm.html). Then, the example requests can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm-examples.html). +In our case we might run: ```bash -ssh flubber1 +salloc -p rg-fpga --nodes=1 --ntasks-per-node=1 --nodelist flubber1 --time=01:00:00 ``` -## Synthesis for Xillinx Boards +## Environment Setup +Once you are logged in, you will need to complete some first time configurations. -XRT Environment Setup ----------------------- +### Clone Repo - $ source /opt/xilinx/Vitis/2023.1/settings64.sh - $ source /opt/xilinx/xrt/setup.sh +### Source Configuration Scripts +``` +$ source /opt/xilinx/xrt/setup.sh +$ source /opt/xilinx/Vitis/2023.1/settings64.sh +``` + +### Check Installed FPGA Platforms +`platforminfo -l` -Check Installed FPGA Platforms ------------------------------- - - $ platforminfo -l - - -Build FPGA image ----------------- - - $ cd hw/syn/xilinx/xrt - $ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=4 make - -Will run the synthesis under new build directory: BUILD_DIR := "\\_\\_\" - -The generated bitstream will be located under /bin/vortex_afu.xclbin - -Sample FPGA Run Test --------------------- - -Ensure you have the correct opae runtime for the FPGA target - - $ make -C runtime/xrt clean - $ TARGET=hw make -C runtime/xrt - -Run the following from your Vortex build directory - - $ TARGET=hw FPGA_BIN_DIR=/bin ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n128" - ---- - +### Build FPGA image The directory `hw/syn/xilinx/xrt` contains the makefile used to synthesize Vortex. +``` + $ cd hw/syn/xilinx/xrt + $ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=4 make build_u50_hw_4c.log 2>&1 & +``` +Will run the synthesis under new build directory: BUILD_DIR := "\\_\\_\" +The generated bitstream will be located under /bin/vortex_afu.xclbin For long-running jobs, invocation of this makefile can be made of the following form: @@ -127,7 +110,7 @@ CONFIGS="-DL2_ENABLE -DDCACHE_SIZE=8192" PREFIX=build_4c_u280 NUM_CORES=4 TARGET The build is complete when the bitstream file `vortex_afu.xclbin` exists in `hw|hw_emu/bin`. -## Running a Program on FPGA +### Running a Program on Xilinx FPGA The blackbox.sh script in `ci` can be used to run a test with Vortex’s xrt driver using the following command: @@ -135,9 +118,9 @@ The blackbox.sh script in `ci` can be used to run a test with Vortex’s xrt d For example: -`FPGA_BIN_DIR=`realpath hw/syn/xilinx/xrt/build_4c_u280_xilinx_u280_gen3x16_xdma_1_202211_1_hw/bin` TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202211_1 ./ci/blackbox.sh --driver=xrt --app=demo` +```FPGA_BIN_DIR= hw/syn/xilinx/xrt/build_4c_u280_xilinx_u280_gen3x16_xdma_1_202211_1_hw/bin TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202211_1 ./ci/blackbox.sh --driver=xrt --app=demo``` -## Synthesis for Intel (Altera) Boards +### Synthesis for Intel (Altera) Boards To set up the environment, source the XRT setup.sh and other Xilinx scripts. For example: diff --git a/docs/index.md b/docs/index.md index 14a45f335..a53a2fd15 100644 --- a/docs/index.md +++ b/docs/index.md @@ -5,29 +5,6 @@ - [Codebase Layout](codebase.md) - [Microarchitecture](microarchitecture.md) - [Cache Subsystem](cache_subsystem.md) -- [Software](software.md) - [Simulation](simulation.md) -- [Altera FPGA Setup Guide](altera_fpga_guide.md) -- [Xilinx FPGA Setup Guide](xilinx_fpga_guide.md) +- [Contributing](contributing.md) - [Debugging](debugging.md) -- [Useful Links](references.md) - -## Installation - -- For the different environments Vortex supports, [read this document](environment_setup.md). -- To install on your own system, [follow this document](install_vortex.md). - -## Quick Start Scenarios - -Running Vortex simulators with different configurations: -- Run basic driver test with rtlsim driver and Vortex config of 2 clusters, 2 cores, 2 warps, 4 threads - - $ ./ci/blackbox.sh --driver=rtlsim --clusters=2 --cores=2 --warps=2 --threads=4 --app=basic - -- Run demo driver test with opae driver and Vortex config of 1 clusters, 4 cores, 4 warps, 2 threads - - $ ./ci/blackbox.sh --driver=opae --clusters=1 --cores=4 --warps=4 --threads=2 --app=demo - -- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads - - $ ./ci/blackbox.sh --driver=simx --clusters=4 --cores=4 --warps=8 --threads=6 --app=dogfood diff --git a/docs/simulation.md b/docs/simulation.md index e1c578034..d55b3cd94 100644 --- a/docs/simulation.md +++ b/docs/simulation.md @@ -6,11 +6,14 @@ ### Cycle-Approximate Simulation -SimX is a C++ cycle-level in-house simulator developed for Vortex. The relevant files are located in the `simX` folder. +SimX is a C++ cycle-level in-house simulator developed for Vortex. The relevant files are located in the `simx` folder. The [readme](README.md) has the most detailed instructions for building and running simX. + +- To install on your own system, [follow this document](install_vortex.md). +- For the different Georgia Tech environments Vortex supports, [read this document](environment_setup.md). ### FGPA Simulation -The guide to build the fpga with specific configurations is located [here.](fpga_setup.md) You can find instructions for both Xilinx and Intel (Altera) based FPGAs. +The guide to build the fpga with specific configurations is located [here.](fpga_setup.md) You can find instructions for both Xilinx and Altera based FPGAs. ### How to Test @@ -47,4 +50,19 @@ PERF: core1: instrs=90693, cycles=53108, IPC=1.707709 PERF: core2: instrs=90849, cycles=53107, IPC=1.710678 PERF: core3: instrs=90836, cycles=50347, IPC=1.804199 PERF: instrs=363180, cycles=53108, IPC=6.838518 -``` \ No newline at end of file +``` + +## Additional Quick Start Scenarios + +Running Vortex simulators with different configurations: +- Run basic driver test with rtlsim driver and Vortex config of 2 clusters, 2 cores, 2 warps, 4 threads + + $ ./ci/blackbox.sh --driver=rtlsim --clusters=2 --cores=2 --warps=2 --threads=4 --app=basic + +- Run demo driver test with opae driver and Vortex config of 1 clusters, 4 cores, 4 warps, 2 threads + + $ ./ci/blackbox.sh --driver=opae --clusters=1 --cores=4 --warps=4 --threads=2 --app=demo + +- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads + + $ ./ci/blackbox.sh --driver=simx --clusters=4 --cores=4 --warps=8 --threads=6 --app=dogfood \ No newline at end of file From 0bf79a0f0526621932d9cf502be0ee50ae5d49af Mon Sep 17 00:00:00 2001 From: Jaewon Lee Date: Fri, 4 Oct 2024 10:13:31 -0400 Subject: [PATCH 349/488] Revert "Initial HBM changes for RTL" --- hw/rtl/Vortex_hbm.sv | 229 ---------- hw/rtl/cache/VX_cache_bypass.sv | 2 - hw/rtl/cache/VX_cache_bypass_l3.sv | 355 ---------------- hw/rtl/cache/VX_cache_l3.sv | 640 ---------------------------- hw/rtl/cache/VX_cache_wrap_l3.sv | 331 --------------- sim/rtlsim/Makefile | 4 +- sim/rtlsim/processor_hbm.cpp | 656 ----------------------------- third_party/softfloat | 2 +- 8 files changed, 3 insertions(+), 2216 deletions(-) delete mode 100644 hw/rtl/Vortex_hbm.sv delete mode 100644 hw/rtl/cache/VX_cache_bypass_l3.sv delete mode 100644 hw/rtl/cache/VX_cache_l3.sv delete mode 100644 hw/rtl/cache/VX_cache_wrap_l3.sv delete mode 100644 sim/rtlsim/processor_hbm.cpp diff --git a/hw/rtl/Vortex_hbm.sv b/hw/rtl/Vortex_hbm.sv deleted file mode 100644 index d2ffc344d..000000000 --- a/hw/rtl/Vortex_hbm.sv +++ /dev/null @@ -1,229 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_define.vh" - -module Vortex_hbm import VX_gpu_pkg::*; ( - `SCOPE_IO_DECL - - // Clock - input wire clk, - input wire reset, - - // Memory request - output wire mem_req_valid [`NUM_MEM_PORTS], - output wire mem_req_rw [`NUM_MEM_PORTS], - output wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen [`NUM_MEM_PORTS], - output wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr [`NUM_MEM_PORTS], - output wire [`VX_MEM_DATA_WIDTH-1:0] mem_req_data [`NUM_MEM_PORTS], - output wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag [`NUM_MEM_PORTS], - input wire mem_req_ready [`NUM_MEM_PORTS], - - // Memory response - input wire mem_rsp_valid [`NUM_MEM_PORTS], - input wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data [`NUM_MEM_PORTS], - input wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag [`NUM_MEM_PORTS], - output wire mem_rsp_ready [`NUM_MEM_PORTS], - - // DCR write request - input wire dcr_wr_valid, - input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr, - input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data, - - // Status - output wire busy -); - -`ifdef SCOPE - localparam scope_cluster = 0; - `SCOPE_IO_SWITCH (`NUM_CLUSTERS); -`endif - -`ifdef PERF_ENABLE - VX_mem_perf_if mem_perf_if(); - assign mem_perf_if.icache = 'x; - assign mem_perf_if.dcache = 'x; - assign mem_perf_if.l2cache = 'x; - assign mem_perf_if.lmem = 'x; -`endif - - VX_mem_bus_if #( - .DATA_SIZE (`L2_LINE_SIZE), - .TAG_WIDTH (L2_MEM_TAG_WIDTH) - ) per_cluster_mem_bus_if[`NUM_CLUSTERS](); - - VX_mem_bus_if #( - .DATA_SIZE (`L3_LINE_SIZE), - .TAG_WIDTH (L3_MEM_TAG_WIDTH) - ) mem_bus_if[`NUM_MEM_PORTS](); - - `RESET_RELAY (l3_reset, reset); - - VX_cache_wrap_l3 #( - .INSTANCE_ID ("l3cache"), - .CACHE_SIZE (`L3_CACHE_SIZE), - .LINE_SIZE (`L3_LINE_SIZE), - .NUM_BANKS (`L3_NUM_BANKS), - .NUM_WAYS (`L3_NUM_WAYS), - .WORD_SIZE (L3_WORD_SIZE), - .NUM_MEM_PORTS (`NUM_MEM_PORTS), - .NUM_REQS (L3_NUM_REQS), - .CRSQ_SIZE (`L3_CRSQ_SIZE), - .MSHR_SIZE (`L3_MSHR_SIZE), - .MRSQ_SIZE (`L3_MRSQ_SIZE), - .MREQ_SIZE (`L3_WRITEBACK ? `L3_MSHR_SIZE : `L3_MREQ_SIZE), - .TAG_WIDTH (L2_MEM_TAG_WIDTH), - .WRITE_ENABLE (1), - .WRITEBACK (`L3_WRITEBACK), - .DIRTY_BYTES (`L3_WRITEBACK), - .UUID_WIDTH (`UUID_WIDTH), - .CORE_OUT_BUF (2), - .MEM_OUT_BUF (2), - .NC_ENABLE (1), - .PASSTHRU (!`L3_ENABLED) - ) l3cache ( - .clk (clk), - .reset (l3_reset), - - `ifdef PERF_ENABLE - .cache_perf (mem_perf_if.l3cache), - `endif - - .core_bus_if (per_cluster_mem_bus_if), - .mem_bus_if (mem_bus_if) - ); - - wire mem_req_fire[`NUM_MEM_PORTS-1:0]; - wire mem_rsp_fire[`NUM_MEM_PORTS-1:0]; - - for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin - assign mem_req_valid[i] = mem_bus_if[i].req_valid; - assign mem_req_rw[i] = mem_bus_if[i].req_data.rw; - assign mem_req_byteen[i]= mem_bus_if[i].req_data.byteen; - assign mem_req_addr[i] = mem_bus_if[i].req_data.addr; - assign mem_req_data[i] = mem_bus_if[i].req_data.data; - assign mem_req_tag[i] = mem_bus_if[i].req_data.tag; - assign mem_bus_if[i].req_ready = mem_req_ready[i]; - `UNUSED_VAR (mem_bus_if[i].req_data.atype) - - assign mem_bus_if[i].rsp_valid = mem_rsp_valid[i]; - assign mem_bus_if[i].rsp_data.data = mem_rsp_data[i]; - assign mem_bus_if[i].rsp_data.tag = mem_rsp_tag[i]; - assign mem_rsp_ready[i] = mem_bus_if[i].rsp_ready; - - assign mem_req_fire[i] = mem_req_valid[i] && mem_req_ready[i]; - assign mem_rsp_fire[i] = mem_rsp_valid[i] && mem_rsp_ready[i]; - `UNUSED_VAR (mem_req_fire[i]) - `UNUSED_VAR (mem_rsp_fire[i]) - end - - VX_dcr_bus_if dcr_bus_if(); - assign dcr_bus_if.write_valid = dcr_wr_valid; - assign dcr_bus_if.write_addr = dcr_wr_addr; - assign dcr_bus_if.write_data = dcr_wr_data; - - wire [`NUM_CLUSTERS-1:0] per_cluster_busy; - - // Generate all clusters - for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters - - `RESET_RELAY (cluster_reset, reset); - - VX_dcr_bus_if cluster_dcr_bus_if(); - `BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1)); - - VX_cluster #( - .CLUSTER_ID (cluster_id), - .INSTANCE_ID ($sformatf("cluster%0d", cluster_id)) - ) cluster ( - `SCOPE_IO_BIND (scope_cluster + cluster_id) - - .clk (clk), - .reset (cluster_reset), - - `ifdef PERF_ENABLE - .mem_perf_if (mem_perf_if), - `endif - - .dcr_bus_if (cluster_dcr_bus_if), - - .mem_bus_if (per_cluster_mem_bus_if[cluster_id]), - - .busy (per_cluster_busy[cluster_id]) - ); - end - - `BUFFER_EX(busy, (| per_cluster_busy), 1'b1, (`NUM_CLUSTERS > 1)); - -`ifdef PERF_ENABLE - - reg [`PERF_CTR_BITS-1:0] perf_mem_pending_reads; - mem_perf_t mem_perf; - - for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin - always @(posedge clk) begin - if (reset) begin - perf_mem_pending_reads <= '0; - end else begin - perf_mem_pending_reads <= $signed(perf_mem_pending_reads) + - `PERF_CTR_BITS'($signed(2'(mem_req_fire[i] && ~mem_bus_if[i].req_data.rw) - 2'(mem_rsp_fire[i]))); - end - end - end - - wire mem_rd_req_fire[`NUM_MEM_PORTS-1:0]; - wire mem_wr_req_fire[`NUM_MEM_PORTS-1:0]; - - for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin - assign mem_rd_req_fire[i] = mem_req_fire[i] && ~mem_bus_if[i].req_data.rw; - assign mem_wr_req_fire[i] = mem_req_fire[i] && mem_bus_if[i].req_data.rw; - end - - always @(posedge clk) begin - if (reset) begin - mem_perf <= '0; - end else begin - for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin - mem_perf.reads <= mem_perf.reads + `PERF_CTR_BITS'(mem_rd_req_fire[i]); - mem_perf.writes <= mem_perf.writes + `PERF_CTR_BITS'(mem_wr_req_fire[i]); - end - mem_perf.latency <= mem_perf.latency + perf_mem_pending_reads; - end - end - assign mem_perf_if.mem = mem_perf; - -`endif - -`ifdef DBG_TRACE_MEM - always @(posedge clk) begin - for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin - if (mem_req_fire[i]) begin - if (mem_req_rw[i]) - `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h, bank=%d\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], mem_req_data[i], i)); - else - `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h, bank=%d\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], i)); - end - if (mem_rsp_fire[i]) begin - `TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag[i], mem_rsp_data[i])); - end - end - end -`endif - -`ifdef SIMULATION - always @(posedge clk) begin - $fflush(); // flush stdout buffer - end -`endif - -endmodule diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 18dfd50ad..379d33e8a 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -250,9 +250,7 @@ module VX_cache_bypass #( end end - `IGNORE_UNUSED_BEGIN wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc; - `IGNORE_UNUSED_END VX_bits_remove #( .N (MEM_TAG_OUT_WIDTH), diff --git a/hw/rtl/cache/VX_cache_bypass_l3.sv b/hw/rtl/cache/VX_cache_bypass_l3.sv deleted file mode 100644 index 69393cfc6..000000000 --- a/hw/rtl/cache/VX_cache_bypass_l3.sv +++ /dev/null @@ -1,355 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_cache_define.vh" - -module VX_cache_bypass_l3 #( - parameter NUM_REQS = 1, - parameter NUM_OUTPUTS = 1, - parameter TAG_SEL_IDX = 0, - - parameter PASSTHRU = 0, - parameter NC_ENABLE = 0, - - parameter WORD_SIZE = 1, - parameter LINE_SIZE = 1, - - parameter CORE_ADDR_WIDTH = 1, - - parameter CORE_TAG_WIDTH = 1, - - parameter MEM_ADDR_WIDTH = 1, - parameter MEM_TAG_IN_WIDTH = 1, - parameter MEM_TAG_OUT_WIDTH = 1, - - parameter UUID_WIDTH = 0, - - parameter CORE_OUT_BUF = 0, - parameter MEM_OUT_BUF = 0, - - parameter CORE_DATA_WIDTH = WORD_SIZE * 8 - ) ( - input wire clk, - input wire reset, - - // Core request in - VX_mem_bus_if.slave core_bus_in_if [NUM_REQS], - - // Core request out - VX_mem_bus_if.master core_bus_out_if [NUM_REQS], - - // Memory request in - VX_mem_bus_if.slave mem_bus_in_if, - - // Memory request out - VX_mem_bus_if.master mem_bus_out_if -); - localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1); - - localparam REQ_SEL_BITS = `CLOG2(NUM_REQS); - localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `ADDR_TYPE_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH; - - localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE; - localparam WSEL_BITS = `CLOG2(WORDS_PER_LINE); - - localparam CORE_TAG_ID_BITS = CORE_TAG_WIDTH - UUID_WIDTH; - localparam MEM_TAG_ID_BITS = REQ_SEL_BITS + WSEL_BITS + CORE_TAG_ID_BITS; - localparam MEM_TAG_BYPASS_BITS = UUID_WIDTH + MEM_TAG_ID_BITS; - - `STATIC_ASSERT(0 == (`IO_BASE_ADDR % `MEM_BLOCK_SIZE), ("invalid parameter")) - - // handle core requests /////////////////////////////////////////////////// - - wire core_req_nc_valid; - wire [NUM_REQS-1:0] core_req_nc_valids; - wire [NUM_REQS-1:0] core_req_nc_idxs; - wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx; - wire [NUM_REQS-1:0] core_req_nc_sel; - wire [NUM_REQS-1:0] core_req_nc_ready; - - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (PASSTHRU != 0) begin - assign core_req_nc_idxs[i] = 1'b1; - end else if (NC_ENABLE) begin - assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO]; - end else begin - assign core_req_nc_idxs[i] = 1'b0; - end - assign core_req_nc_valids[i] = core_bus_in_if[i].req_valid && core_req_nc_idxs[i]; - end - - /* - - VX_generic_arbiter #( - .NUM_REQS (NUM_REQS), - .TYPE (PASSTHRU ? "R" : "P") - ) core_req_nc_arb ( - .clk (clk), - .reset (reset), - .requests (core_req_nc_valids), - .grant_index (core_req_nc_idx), - .grant_onehot (core_req_nc_sel), - .grant_valid (core_req_nc_valid), - .grant_ready (core_req_nc_ready) - ); - */ - - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && ~core_req_nc_idxs[i]; - assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data; - assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i]) - : core_bus_out_if[i].req_ready; - end - - // handle memory requests ///////////////////////////////////////////////// - - wire [NUM_OUTPUTS-1:0] mem_req_out_valid; - wire [NUM_OUTPUTS-1:0] mem_req_out_rw; - wire [NUM_OUTPUTS-1:0][LINE_SIZE-1:0] mem_req_out_byteen; - wire [NUM_OUTPUTS-1:0][`CS_MEM_ADDR_WIDTH-1:0] mem_req_out_addr; - wire [NUM_OUTPUTS-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_out_atype; - wire [NUM_OUTPUTS-1:0][`CS_LINE_WIDTH-1:0] mem_req_out_data; - wire [NUM_OUTPUTS-1:0][MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag; - wire [NUM_OUTPUTS-1:0] mem_req_out_ready; - - wire [NUM_REQS-1:0] core_req_nc_sel_rw; - wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_nc_sel_byteen; - wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr; - wire [NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] core_req_nc_sel_atype; - wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_nc_sel_data; - wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_nc_sel_tag; - - wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in; - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_req_nc_mux_in[i] = { - core_bus_in_if[i].req_data.rw, - core_bus_in_if[i].req_data.byteen, - core_bus_in_if[i].req_data.addr, - core_bus_in_if[i].req_data.atype, - core_bus_in_if[i].req_data.data, - core_bus_in_if[i].req_data.tag - }; - end - - assign { - core_req_nc_sel_rw, - core_req_nc_sel_byteen, - core_req_nc_sel_addr, - core_req_nc_sel_atype, - core_req_nc_sel_data, - core_req_nc_sel_tag - } = core_req_nc_mux_in; - - assign core_req_nc_ready = ~mem_bus_in_if.req_valid && mem_req_out_ready; - - assign mem_req_out_valid = mem_bus_in_if.req_valid || core_req_nc_valid; - assign mem_req_out_rw = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.rw : core_req_nc_sel_rw; - assign mem_req_out_addr = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.addr : core_req_nc_sel_addr[WSEL_BITS +: MEM_ADDR_WIDTH]; - assign mem_req_out_atype = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.atype : core_req_nc_sel_atype; - - wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass; - - wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0]; - - if (WORDS_PER_LINE > 1) begin - reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_r; - reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r; - - wire [WSEL_BITS-1:0] req_wsel = core_req_nc_sel_addr[WSEL_BITS-1:0]; - - always @(*) begin - mem_req_byteen_in_r = '0; - mem_req_byteen_in_r[req_wsel] = core_req_nc_sel_byteen; - - mem_req_data_in_r = 'x; - mem_req_data_in_r[req_wsel] = core_req_nc_sel_data; - end - - assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_r; - assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_r; - if (NUM_REQS > 1) begin - assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id}); - end else begin - assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id}); - end - end else begin - assign mem_req_out_byteen = mem_bus_in_if[0].req_valid ? mem_bus_in_if[0].req_data.byteen : core_req_nc_sel_byteen; - assign mem_req_out_data = mem_bus_in_if[0].req_valid ? mem_bus_in_if[0].req_data.data : core_req_nc_sel_data; - if (NUM_REQS > 1) begin - assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, core_req_in_id}); - end else begin - assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_in_id}); - end - end - - wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass; - - if (UUID_WIDTH != 0) begin - assign mem_req_tag_bypass = {core_req_nc_sel_tag[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass}; - end else begin - assign mem_req_tag_bypass = mem_req_tag_id_bypass; - end - - if (PASSTHRU != 0) begin - assign mem_req_out_tag = mem_req_tag_bypass; - `UNUSED_VAR (mem_bus_in_if[0].req_data.tag) - end else begin - if (NC_ENABLE) begin - VX_bits_insert #( - .N (MEM_TAG_OUT_WIDTH-1), - .S (1), - .POS (TAG_SEL_IDX) - ) mem_req_tag_in_nc_insert ( - .data_in (mem_bus_in_if[0].req_valid ? (MEM_TAG_OUT_WIDTH-1)'(mem_bus_in_if[0].req_data.tag) : (MEM_TAG_OUT_WIDTH-1)'(mem_req_tag_bypass)), - .ins_in (~mem_bus_in_if[0].req_valid), - .data_out (mem_req_out_tag) - ); - end else begin - assign mem_req_out_tag = mem_bus_in_if[0].req_data.tag; - end - end - - assign mem_bus_in_if[0].req_ready = mem_req_out_ready; - - VX_elastic_buffer #( - .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `ADDR_TYPE_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH), - .SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), - .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) - ) mem_req_buf ( - .clk (clk), - .reset (reset), - .valid_in (mem_req_out_valid), - .ready_in (mem_req_out_ready), - .data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}), - .data_out ({mem_bus_out_if[0].req_data.rw, mem_bus_out_if[0].req_data.byteen, mem_bus_out_if[0].req_data.addr, mem_bus_out_if[0].req_data.atype, mem_bus_out_if[0].req_data.data, mem_bus_out_if[0].req_data.tag}), - .valid_out (mem_bus_out_if[0].req_valid), - .ready_out (mem_bus_out_if[0].req_ready) - ); - - // handle core responses ////////////////////////////////////////////////// - - wire [NUM_REQS-1:0] core_rsp_in_valid; - wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_in_data; - wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_in_tag; - wire [NUM_REQS-1:0] core_rsp_in_ready; - - wire is_mem_rsp_nc; - if (PASSTHRU != 0) begin - assign is_mem_rsp_nc = mem_bus_out_if[0].rsp_valid; - end else begin - if (NC_ENABLE) begin - assign is_mem_rsp_nc = mem_bus_out_if[0].rsp_valid && mem_bus_out_if[0].rsp_data.tag[TAG_SEL_IDX]; - end else begin - assign is_mem_rsp_nc = 1'b0; - end - end - - wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc; - - VX_bits_remove #( - .N (MEM_TAG_OUT_WIDTH), - .S (NC_ENABLE), - .POS (TAG_SEL_IDX) - ) mem_rsp_tag_in_nc_remove ( - .data_in (mem_bus_out_if[0].rsp_data.tag), - .data_out (mem_rsp_tag_id_nc) - ); - - wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx; - if (NUM_REQS > 1) begin - assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS]; - end else begin - assign rsp_idx = 1'b0; - end - - reg [NUM_REQS-1:0] rsp_nc_valid_r; - always @(*) begin - rsp_nc_valid_r = '0; - rsp_nc_valid_r[rsp_idx] = is_mem_rsp_nc; - end - - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i]; - assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; - end - - if (WORDS_PER_LINE > 1) begin - wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS]; - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? - core_bus_out_if[i].rsp_data.data : mem_bus_out_if[0].rsp_data.data[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH]; - end - end else begin - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.data : mem_bus_out_if[0].rsp_data.data; - end - end - - wire [(CORE_TAG_ID_BITS + UUID_WIDTH)-1:0] mem_rsp_tag_in_nc2; - if (UUID_WIDTH != 0) begin - assign mem_rsp_tag_in_nc2 = {mem_rsp_tag_id_nc[(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1 -: UUID_WIDTH], mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]}; - end else begin - assign mem_rsp_tag_in_nc2 = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]; - end - - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (PASSTHRU) begin - assign core_rsp_in_tag[i] = mem_rsp_tag_in_nc2; - end else if (NC_ENABLE) begin - assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.tag : mem_rsp_tag_in_nc2; - end else begin - assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_data.tag; - end - end - - for (genvar i = 0; i < NUM_REQS; ++i) begin - VX_elastic_buffer #( - .DATAW (`CS_WORD_WIDTH + CORE_TAG_WIDTH), - .SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), - .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) - ) core_rsp_buf ( - .clk (clk), - .reset (reset), - .valid_in (core_rsp_in_valid[i]), - .ready_in (core_rsp_in_ready[i]), - .data_in ({core_rsp_in_data[i], core_rsp_in_tag[i]}), - .data_out ({core_bus_in_if[i].rsp_data.data, core_bus_in_if[i].rsp_data.tag}), - .valid_out (core_bus_in_if[i].rsp_valid), - .ready_out (core_bus_in_if[i].rsp_ready) - ); - end - - // handle memory responses //////////////////////////////////////////////// - - if (PASSTHRU != 0) begin - assign mem_bus_in_if[0].rsp_valid = 1'b0; - assign mem_bus_in_if[0].rsp_data.data = '0; - assign mem_bus_in_if[0].rsp_data.tag = '0; - end else if (NC_ENABLE) begin - assign mem_bus_in_if[0].rsp_valid = mem_bus_out_if[0].rsp_valid && ~mem_bus_out_if[0].rsp_data.tag[TAG_SEL_IDX]; - assign mem_bus_in_if[0].rsp_data.data = mem_bus_out_if[0].rsp_data.data; - assign mem_bus_in_if[0].rsp_data.tag = mem_rsp_tag_id_nc[MEM_TAG_IN_WIDTH-1:0]; - end else begin - assign mem_bus_in_if[0].rsp_valid = mem_bus_out_if[0].rsp_valid; - assign mem_bus_in_if[0].rsp_data.data = mem_bus_out_if[0].rsp_data.data; - assign mem_bus_in_if[0].rsp_data.tag = mem_rsp_tag_id_nc; - end - - wire [NUM_REQS-1:0] core_rsp_out_valid; - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_out_valid[i] = core_bus_out_if[i].rsp_valid; - end - - assign mem_bus_out_if[0].rsp_ready = is_mem_rsp_nc ? (~core_rsp_out_valid[rsp_idx] && core_rsp_in_ready[rsp_idx]) : mem_bus_in_if[0].rsp_ready; - -endmodule diff --git a/hw/rtl/cache/VX_cache_l3.sv b/hw/rtl/cache/VX_cache_l3.sv deleted file mode 100644 index 7eb7556de..000000000 --- a/hw/rtl/cache/VX_cache_l3.sv +++ /dev/null @@ -1,640 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_cache_define.vh" - -module VX_cache_l3 import VX_gpu_pkg::*; #( - parameter `STRING INSTANCE_ID = "", - - // Number of Word requests per cycle - parameter NUM_REQS = 4, - - // Size of cache in bytes - parameter CACHE_SIZE = 4096, - // Size of line inside a bank in bytes - parameter LINE_SIZE = 64, - // Number of banks - parameter NUM_BANKS = 1, - // Number of memory ports - parameter NUM_MEM_PORTS = 1, - // Number of associative ways - parameter NUM_WAYS = 1, - // Size of a word in bytes - parameter WORD_SIZE = `XLEN/8, - - // Core Response Queue Size - parameter CRSQ_SIZE = 2, - // Miss Reserv Queue Knob - parameter MSHR_SIZE = 8, - // Memory Response Queue Size - parameter MRSQ_SIZE = 0, - // Memory Request Queue Size - parameter MREQ_SIZE = 4, - - // Enable cache writeable - parameter WRITE_ENABLE = 1, - - // Enable cache writeback - parameter WRITEBACK = 0, - - // Enable dirty bytes on writeback - parameter DIRTY_BYTES = 0, - - // Request debug identifier - parameter UUID_WIDTH = 0, - - // core request tag size - parameter TAG_WIDTH = UUID_WIDTH + 1, - - // Core response output register - parameter CORE_OUT_BUF = 0, - - // Memory request output register - parameter MEM_OUT_BUF = 0 - ) ( - // PERF -`ifdef PERF_ENABLE - output cache_perf_t cache_perf, -`endif - - input wire clk, - input wire reset, - - VX_mem_bus_if.slave core_bus_if [NUM_REQS], - VX_mem_bus_if.master mem_bus_if [NUM_MEM_PORTS] -); - - `STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter: number of banks must be power of 2")) - `STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter: writeback requires write enable")) - `STATIC_ASSERT(WRITEBACK || !DIRTY_BYTES, ("invalid parameter: dirty bytes require writeback")) - - // In writeback mode, memory fill response may issue a new memory request to handle evicted blocks. - // We need to ensure that the memory request queue never fills up to avoid deadlock. - `STATIC_ASSERT(!WRITEBACK || (MREQ_SIZE >= MSHR_SIZE), ("invalid parameter: writeback requires MREQ_SIZE >= MSHR_SIZE")) - - localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS); - localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS); - localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); - localparam MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS; - localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE; - localparam WORD_WIDTH = WORD_SIZE * 8; - localparam WORD_SEL_BITS = `CLOG2(WORDS_PER_LINE); - localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); - localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); - localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS); - localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1; - localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH; - - localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1); - localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1); - - localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0; - -`ifdef PERF_ENABLE - wire [NUM_BANKS-1:0] perf_read_miss_per_bank; - wire [NUM_BANKS-1:0] perf_write_miss_per_bank; - wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank; -`endif - - VX_mem_bus_if #( - .DATA_SIZE (WORD_SIZE), - .TAG_WIDTH (TAG_WIDTH) - ) core_bus2_if[NUM_REQS](); - - wire [NUM_BANKS-1:0] per_bank_flush_begin; - wire [NUM_BANKS-1:0] per_bank_flush_end; - - wire [NUM_BANKS-1:0] per_bank_core_req_fire; - - VX_cache_flush #( - .NUM_REQS (NUM_REQS), - .NUM_BANKS (NUM_BANKS), - .BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency - ) flush_unit ( - .clk (clk), - .reset (reset), - .core_bus_in_if (core_bus_if), - .core_bus_out_if (core_bus2_if), - .bank_req_fire (per_bank_core_req_fire), - .flush_begin (per_bank_flush_begin), - .flush_end (per_bank_flush_end) - ); - - /////////////////////////////////////////////////////////////////////////// - - // Core response buffering - wire [NUM_REQS-1:0] core_rsp_valid_s; - wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data_s; - wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s; - wire [NUM_REQS-1:0] core_rsp_ready_s; - - `RESET_RELAY_EX (core_rsp_reset, reset, NUM_REQS, `MAX_FANOUT); - - for (genvar i = 0; i < NUM_REQS; ++i) begin - - VX_elastic_buffer #( - .DATAW (`CS_WORD_WIDTH + TAG_WIDTH), - .SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), - .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) - ) core_rsp_buf ( - .clk (clk), - .reset (core_rsp_reset[i]), - .valid_in (core_rsp_valid_s[i]), - .ready_in (core_rsp_ready_s[i]), - .data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}), - .data_out ({core_bus2_if[i].rsp_data.data, core_bus2_if[i].rsp_data.tag}), - .valid_out (core_bus2_if[i].rsp_valid), - .ready_out (core_bus2_if[i].rsp_ready) - ); - end - - /////////////////////////////////////////////////////////////////////////// - - // Memory request buffering - wire [NUM_MEM_PORTS-1:0] mem_req_valid_s; - wire [NUM_MEM_PORTS-1:0][`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_s; - wire [NUM_MEM_PORTS-1:0] mem_req_rw_s; - wire [NUM_MEM_PORTS-1:0][LINE_SIZE-1:0] mem_req_byteen_s; - wire [NUM_MEM_PORTS-1:0][`CS_LINE_WIDTH-1:0] mem_req_data_s; - wire [NUM_MEM_PORTS-1:0][MEM_TAG_WIDTH-1:0] mem_req_tag_s; - wire [NUM_MEM_PORTS-1:0] mem_req_flush_s; - wire [NUM_MEM_PORTS-1:0] mem_req_ready_s; - - wire [NUM_MEM_PORTS-1:0] mem_bus_if_flush; - - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - VX_elastic_buffer #( - .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), - .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), - .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) - ) mem_req_buf ( - .clk (clk), - .reset (reset), - .valid_in (mem_req_valid_s[i]), - .ready_in (mem_req_ready_s[i]), - .data_in ({mem_req_rw_s[i], mem_req_byteen_s[i], mem_req_addr_s[i], mem_req_data_s[i], mem_req_tag_s[i], mem_req_flush_s[i]}), - .data_out ({mem_bus_if[i].req_data.rw, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.data, mem_bus_if[i].req_data.tag, mem_bus_if_flush[i]}), - .valid_out (mem_bus_if[i].req_valid), - .ready_out (mem_bus_if[i].req_ready) - ); - - assign mem_bus_if[i].req_data.atype = mem_bus_if_flush[i] ? `ADDR_TYPE_WIDTH'(1 << `ADDR_TYPE_FLUSH) : '0; - - end - - /////////////////////////////////////////////////////////////////////////// - - // Memory response buffering - wire [NUM_MEM_PORTS-1:0] mem_rsp_valid_s; - wire [NUM_MEM_PORTS-1:0][`CS_LINE_WIDTH-1:0] mem_rsp_data_s; - wire [NUM_MEM_PORTS-1:0][MEM_TAG_WIDTH-1:0] mem_rsp_tag_s; - wire [NUM_MEM_PORTS-1:0] mem_rsp_ready_s; - - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - VX_elastic_buffer #( - .DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH), - .SIZE (MRSQ_SIZE), - .OUT_REG (MRSQ_SIZE > 2) - ) mem_rsp_queue ( - .clk (clk), - .reset (reset), - .valid_in (mem_bus_if[i].rsp_valid), - .ready_in (mem_bus_if[i].rsp_ready), - .data_in ({mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data}), - .data_out ({mem_rsp_tag_s[i], mem_rsp_data_s[i]}), - .valid_out (mem_rsp_valid_s[i]), - .ready_out (mem_rsp_ready_s[i]) - ); - end - - /////////////////////////////////////////////////////////////////////////// - - wire [NUM_BANKS-1:0] per_bank_core_req_valid; - wire [NUM_BANKS-1:0][`CS_LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr; - wire [NUM_BANKS-1:0] per_bank_core_req_rw; - wire [NUM_BANKS-1:0][WORD_SEL_WIDTH-1:0] per_bank_core_req_wsel; - wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen; - wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data; - wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag; - wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx; - wire [NUM_BANKS-1:0] per_bank_core_req_flush; - wire [NUM_BANKS-1:0] per_bank_core_req_ready; - - wire [NUM_BANKS-1:0] per_bank_core_rsp_valid; - wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_rsp_data; - wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_rsp_tag; - wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_rsp_idx; - wire [NUM_BANKS-1:0] per_bank_core_rsp_ready; - - wire [NUM_BANKS-1:0] per_bank_mem_req_valid; - wire [NUM_BANKS-1:0][`CS_MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr; - wire [NUM_BANKS-1:0] per_bank_mem_req_rw; - wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen; - wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data; - wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id; - wire [NUM_BANKS-1:0] per_bank_mem_req_flush; - wire [NUM_BANKS-1:0] per_bank_mem_req_ready; - - wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready; - - assign per_bank_core_req_fire = per_bank_core_req_valid & per_bank_mem_req_ready; - - if (NUM_BANKS == 1) begin - assign mem_rsp_ready_s = per_bank_mem_rsp_ready; - end else begin - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - assign mem_rsp_ready_s[i] = per_bank_mem_rsp_ready[`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s[i])]; - end - end - - // Bank requests dispatch - - wire [NUM_REQS-1:0] core_req_valid; - wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr; - wire [NUM_REQS-1:0] core_req_rw; - wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen; - wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data; - wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag; - wire [NUM_REQS-1:0] core_req_flush; - wire [NUM_REQS-1:0] core_req_ready; - - wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr; - wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] core_req_bid; - wire [NUM_REQS-1:0][WORD_SEL_WIDTH-1:0] core_req_wsel; - - wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in; - wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out; - - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_req_valid[i] = core_bus2_if[i].req_valid; - assign core_req_rw[i] = core_bus2_if[i].req_data.rw; - assign core_req_byteen[i] = core_bus2_if[i].req_data.byteen; - assign core_req_addr[i] = core_bus2_if[i].req_data.addr; - assign core_req_data[i] = core_bus2_if[i].req_data.data; - assign core_req_tag[i] = core_bus2_if[i].req_data.tag; - assign core_req_flush[i] = core_bus2_if[i].req_data.atype[`ADDR_TYPE_FLUSH]; - assign core_bus2_if[i].req_ready = core_req_ready[i]; - end - - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (WORDS_PER_LINE > 1) begin - assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS]; - end else begin - assign core_req_wsel[i] = '0; - end - assign core_req_line_addr[i] = core_req_addr[i][(BANK_SEL_BITS + WORD_SEL_BITS) +: LINE_ADDR_WIDTH]; - end - - if (NUM_BANKS > 1) begin - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_req_bid[i] = core_req_addr[i][WORD_SEL_BITS +: BANK_SEL_BITS]; - end - end else begin - assign core_req_bid = '0; - end - - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_req_data_in[i] = { - core_req_line_addr[i], - core_req_rw[i], - core_req_wsel[i], - core_req_byteen[i], - core_req_data[i], - core_req_tag[i], - core_req_flush[i] - }; - end - -`ifdef PERF_ENABLE - wire [`PERF_CTR_BITS-1:0] perf_collisions; -`endif - - `RESET_RELAY (req_xbar_reset, reset); - - VX_stream_xbar #( - .NUM_INPUTS (NUM_REQS), - .NUM_OUTPUTS (NUM_BANKS), - .DATAW (CORE_REQ_DATAW), - .PERF_CTR_BITS (`PERF_CTR_BITS), - .ARBITER ("F"), - .OUT_BUF (REQ_XBAR_BUF) - ) req_xbar ( - .clk (clk), - .reset (req_xbar_reset), - `ifdef PERF_ENABLE - .collisions(perf_collisions), - `else - `UNUSED_PIN(collisions), - `endif - .valid_in (core_req_valid), - .data_in (core_req_data_in), - .sel_in (core_req_bid), - .ready_in (core_req_ready), - .valid_out (per_bank_core_req_valid), - .data_out (core_req_data_out), - .sel_out (per_bank_core_req_idx), - .ready_out (per_bank_core_req_ready) - ); - - for (genvar i = 0; i < NUM_BANKS; ++i) begin - assign { - per_bank_core_req_addr[i], - per_bank_core_req_rw[i], - per_bank_core_req_wsel[i], - per_bank_core_req_byteen[i], - per_bank_core_req_data[i], - per_bank_core_req_tag[i], - per_bank_core_req_flush[i] - } = core_req_data_out[i]; - end - - // Banks access - for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks - wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr; - wire curr_bank_mem_rsp_valid; - - if (NUM_BANKS == 1) begin - assign curr_bank_mem_rsp_valid = mem_rsp_valid_s; - end else begin - assign curr_bank_mem_rsp_valid = mem_rsp_valid_s[bank_id] && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s[bank_id]) == bank_id); - end - - `RESET_RELAY (bank_reset, reset); - - VX_cache_bank #( - .BANK_ID (bank_id), - .INSTANCE_ID ($sformatf("%s-bank%0d", INSTANCE_ID, bank_id)), - .CACHE_SIZE (CACHE_SIZE), - .LINE_SIZE (LINE_SIZE), - .NUM_BANKS (NUM_BANKS), - .NUM_WAYS (NUM_WAYS), - .WORD_SIZE (WORD_SIZE), - .NUM_REQS (NUM_REQS), - .CRSQ_SIZE (CRSQ_SIZE), - .MSHR_SIZE (MSHR_SIZE), - .MREQ_SIZE (MREQ_SIZE), - .WRITE_ENABLE (WRITE_ENABLE), - .DIRTY_BYTES (DIRTY_BYTES), - .WRITEBACK (WRITEBACK), - .UUID_WIDTH (UUID_WIDTH), - .TAG_WIDTH (TAG_WIDTH), - .CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF), - .MEM_OUT_BUF (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_BUF) - ) bank ( - .clk (clk), - .reset (bank_reset), - - `ifdef PERF_ENABLE - .perf_read_misses (perf_read_miss_per_bank[bank_id]), - .perf_write_misses (perf_write_miss_per_bank[bank_id]), - .perf_mshr_stalls (perf_mshr_stall_per_bank[bank_id]), - `endif - - // Core request - .core_req_valid (per_bank_core_req_valid[bank_id]), - .core_req_addr (per_bank_core_req_addr[bank_id]), - .core_req_rw (per_bank_core_req_rw[bank_id]), - .core_req_wsel (per_bank_core_req_wsel[bank_id]), - .core_req_byteen (per_bank_core_req_byteen[bank_id]), - .core_req_data (per_bank_core_req_data[bank_id]), - .core_req_tag (per_bank_core_req_tag[bank_id]), - .core_req_idx (per_bank_core_req_idx[bank_id]), - .core_req_flush (per_bank_core_req_flush[bank_id]), - .core_req_ready (per_bank_core_req_ready[bank_id]), - - // Core response - .core_rsp_valid (per_bank_core_rsp_valid[bank_id]), - .core_rsp_data (per_bank_core_rsp_data[bank_id]), - .core_rsp_tag (per_bank_core_rsp_tag[bank_id]), - .core_rsp_idx (per_bank_core_rsp_idx[bank_id]), - .core_rsp_ready (per_bank_core_rsp_ready[bank_id]), - - // Memory request - .mem_req_valid (per_bank_mem_req_valid[bank_id]), - .mem_req_addr (curr_bank_mem_req_addr), - .mem_req_rw (per_bank_mem_req_rw[bank_id]), - .mem_req_byteen (per_bank_mem_req_byteen[bank_id]), - .mem_req_data (per_bank_mem_req_data[bank_id]), - .mem_req_id (per_bank_mem_req_id[bank_id]), - .mem_req_flush (per_bank_mem_req_flush[bank_id]), - .mem_req_ready (per_bank_mem_req_ready[bank_id]), - - // Memory response - .mem_rsp_valid (curr_bank_mem_rsp_valid), - .mem_rsp_data (mem_rsp_data_s[bank_id]), - .mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s[bank_id])), - .mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]), - - .flush_begin (per_bank_flush_begin[bank_id]), - .flush_end (per_bank_flush_end[bank_id]) - ); - - if (NUM_BANKS == 1) begin - assign per_bank_mem_req_addr[bank_id] = curr_bank_mem_req_addr; - end else begin - assign per_bank_mem_req_addr[bank_id] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, bank_id); - end - end - - // Bank responses gather - - wire [NUM_BANKS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_in; - wire [NUM_REQS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_out; - - for (genvar i = 0; i < NUM_BANKS; ++i) begin - assign core_rsp_data_in[i] = {per_bank_core_rsp_data[i], per_bank_core_rsp_tag[i]}; - end - - `RESET_RELAY (rsp_xbar_reset, reset); - - VX_stream_xbar #( - .NUM_INPUTS (NUM_BANKS), - .NUM_OUTPUTS (NUM_REQS), - .DATAW (CORE_RSP_DATAW), - .ARBITER ("F") - ) rsp_xbar ( - .clk (clk), - .reset (rsp_xbar_reset), - `UNUSED_PIN (collisions), - .valid_in (per_bank_core_rsp_valid), - .data_in (core_rsp_data_in), - .sel_in (per_bank_core_rsp_idx), - .ready_in (per_bank_core_rsp_ready), - .valid_out (core_rsp_valid_s), - .data_out (core_rsp_data_out), - .ready_out (core_rsp_ready_s), - `UNUSED_PIN (sel_out) - ); - - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign {core_rsp_data_s[i], core_rsp_tag_s[i]} = core_rsp_data_out[i]; - end - - /////////////////////////////////////////////////////////////////////////// - - wire [NUM_MEM_PORTS-1:0] mem_req_valid_p; - wire [NUM_MEM_PORTS-1:0][`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p; - wire [NUM_MEM_PORTS-1:0] mem_req_rw_p; - wire [NUM_MEM_PORTS-1:0][LINE_SIZE-1:0] mem_req_byteen_p; - wire [NUM_MEM_PORTS-1:0][`CS_LINE_WIDTH-1:0] mem_req_data_p; - wire [NUM_MEM_PORTS-1:0][MEM_TAG_WIDTH-1:0] mem_req_tag_p; - wire [NUM_MEM_PORTS-1:0][MSHR_ADDR_WIDTH-1:0] mem_req_id_p; - wire [NUM_MEM_PORTS-1:0] mem_req_flush_p; - wire [NUM_MEM_PORTS-1:0] mem_req_ready_p; - - // Memory request arbitration - - wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in; - wire [NUM_MEM_PORTS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_out; - - for (genvar i = 0; i < NUM_BANKS; ++i) begin - assign data_in[i] = { - per_bank_mem_req_addr[i], - per_bank_mem_req_rw[i], - per_bank_mem_req_byteen[i], - per_bank_mem_req_data[i], - per_bank_mem_req_id[i], - per_bank_mem_req_flush[i] - }; - end - - VX_stream_arb #( - .NUM_INPUTS (NUM_BANKS), - .NUM_OUTPUTS (NUM_MEM_PORTS), - .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1), - .ARBITER ("F") - ) mem_req_arb ( - .clk (clk), - .reset (reset), - .valid_in (per_bank_mem_req_valid), - .ready_in (per_bank_mem_req_ready), - .data_in (data_in), - .data_out (data_out), - .valid_out (mem_req_valid_p), - .ready_out (mem_req_ready_p), - `UNUSED_PIN (sel_out) - ); - - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - assign { - mem_req_addr_p[i], - mem_req_rw_p[i], - mem_req_byteen_p[i], - mem_req_data_p[i], - mem_req_id_p[i], - mem_req_flush_p[i] - } = data_out[i]; - end - - if (NUM_BANKS > 1) begin - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p[i]); - assign mem_req_tag_p[i] = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p[i]}); - end - end else begin - assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p); - end - - // Memory request multi-port handling - - assign mem_req_valid_s = mem_req_valid_p; - assign mem_req_addr_s = mem_req_addr_p; - assign mem_req_tag_s = mem_req_tag_p; - assign mem_req_flush_s = mem_req_flush_p; - assign mem_req_ready_p = mem_req_ready_s; - - if (WRITE_ENABLE != 0) begin - assign mem_req_rw_s = mem_req_rw_p; - assign mem_req_byteen_s = mem_req_byteen_p; - assign mem_req_data_s = mem_req_data_p; - end else begin - `UNUSED_VAR (mem_req_byteen_p) - `UNUSED_VAR (mem_req_data_p) - `UNUSED_VAR (mem_req_rw_p) - - assign mem_req_rw_s = 0; - assign mem_req_byteen_s = {LINE_SIZE{1'b1}}; - assign mem_req_data_s = '0; - end - -`ifdef PERF_ENABLE - // per cycle: core_reads, core_writes - wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle; - wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle; - - wire [NUM_REQS-1:0] perf_core_reads_per_req; - wire [NUM_REQS-1:0] perf_core_writes_per_req; - - // per cycle: read misses, write misses, msrq stalls, pipeline stalls - wire [`CLOG2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle; - wire [`CLOG2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle; - wire [`CLOG2(NUM_BANKS+1)-1:0] perf_mshr_stall_per_cycle; - wire [`CLOG2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle; - - `BUFFER(perf_core_reads_per_req, core_req_valid & core_req_ready & ~core_req_rw); - `BUFFER(perf_core_writes_per_req, core_req_valid & core_req_ready & core_req_rw); - - `POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_req); - `POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_req); - `POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank); - `POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank); - `POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank); - - wire [NUM_REQS-1:0] perf_crsp_stall_per_req; - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign perf_crsp_stall_per_req[i] = core_bus2_if[i].rsp_valid && ~core_bus2_if[i].rsp_ready; - end - - `POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req); - - wire perf_mem_stall_per_cycle = mem_bus_if[0].req_valid && ~mem_bus_if[0].req_ready; - - reg [`PERF_CTR_BITS-1:0] perf_core_reads; - reg [`PERF_CTR_BITS-1:0] perf_core_writes; - reg [`PERF_CTR_BITS-1:0] perf_read_misses; - reg [`PERF_CTR_BITS-1:0] perf_write_misses; - reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls; - reg [`PERF_CTR_BITS-1:0] perf_mem_stalls; - reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls; - - always @(posedge clk) begin - if (reset) begin - perf_core_reads <= '0; - perf_core_writes <= '0; - perf_read_misses <= '0; - perf_write_misses <= '0; - perf_mshr_stalls <= '0; - perf_mem_stalls <= '0; - perf_crsp_stalls <= '0; - end else begin - perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle); - perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle); - perf_read_misses <= perf_read_misses + `PERF_CTR_BITS'(perf_read_miss_per_cycle); - perf_write_misses <= perf_write_misses + `PERF_CTR_BITS'(perf_write_miss_per_cycle); - perf_mshr_stalls <= perf_mshr_stalls + `PERF_CTR_BITS'(perf_mshr_stall_per_cycle); - perf_mem_stalls <= perf_mem_stalls + `PERF_CTR_BITS'(perf_mem_stall_per_cycle); - perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle); - end - end - - assign cache_perf.reads = perf_core_reads; - assign cache_perf.writes = perf_core_writes; - assign cache_perf.read_misses = perf_read_misses; - assign cache_perf.write_misses = perf_write_misses; - assign cache_perf.bank_stalls = perf_collisions; - assign cache_perf.mshr_stalls = perf_mshr_stalls; - assign cache_perf.mem_stalls = perf_mem_stalls; - assign cache_perf.crsp_stalls = perf_crsp_stalls; -`endif - -endmodule diff --git a/hw/rtl/cache/VX_cache_wrap_l3.sv b/hw/rtl/cache/VX_cache_wrap_l3.sv deleted file mode 100644 index def7237b1..000000000 --- a/hw/rtl/cache/VX_cache_wrap_l3.sv +++ /dev/null @@ -1,331 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_cache_define.vh" - -module VX_cache_wrap_l3 import VX_gpu_pkg::*; #( - parameter `STRING INSTANCE_ID = "", - - parameter TAG_SEL_IDX = 0, - - // Number of Word requests per cycle - parameter NUM_REQS = 4, - - - // Size of cache in bytes - parameter CACHE_SIZE = 4096, - // Size of line inside a bank in bytes - parameter LINE_SIZE = 64, - // Number of banks - parameter NUM_BANKS = 1, - // Number of associative ways - parameter NUM_WAYS = 1, - // Size of a word in bytes - parameter WORD_SIZE = 4, - // Number of memory ports - parameter NUM_MEM_PORTS = 4, - - // Core Response Queue Size - parameter CRSQ_SIZE = 2, - // Miss Reserv Queue Knob - parameter MSHR_SIZE = 8, - // Memory Response Queue Size - parameter MRSQ_SIZE = 0, - // Memory Request Queue Size - parameter MREQ_SIZE = 4, - - // Enable cache writeable - parameter WRITE_ENABLE = 1, - - // Enable cache writeback - parameter WRITEBACK = 0, - - // Enable dirty bytes on writeback - parameter DIRTY_BYTES = 0, - - // Request debug identifier - parameter UUID_WIDTH = 0, - - // core request tag size - parameter TAG_WIDTH = UUID_WIDTH + 1, - - // enable bypass for non-cacheable addresses - parameter NC_ENABLE = 0, - - // Force bypass for all requests - parameter PASSTHRU = 0, - - // Core response output buffer - parameter CORE_OUT_BUF = 0, - - // Memory request output buffer - parameter MEM_OUT_BUF = 0 - ) ( - - input wire clk, - input wire reset, - - // PERF -`ifdef PERF_ENABLE - output cache_perf_t cache_perf, -`endif - - VX_mem_bus_if.slave core_bus_if [NUM_REQS], - VX_mem_bus_if.master mem_bus_if [NUM_MEM_PORTS] -); - - `STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter")) - - localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); - localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS; - - localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : - (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : - `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)); - - localparam NC_OR_BYPASS = (NC_ENABLE || PASSTHRU); - - localparam NUM_REQS_P = NUM_REQS / NUM_MEM_PORTS; - - VX_mem_bus_if #( - .DATA_SIZE (WORD_SIZE), - .TAG_WIDTH (TAG_WIDTH) - ) core_bus_cache_if[NUM_REQS](); - - VX_mem_bus_if #( - .DATA_SIZE (LINE_SIZE), - .TAG_WIDTH (CACHE_MEM_TAG_WIDTH) - ) mem_bus_cache_if[NUM_MEM_PORTS](); - - if (NC_OR_BYPASS) begin - `RESET_RELAY (nc_bypass_reset, reset); - - // Slicing version - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - - localparam SLICE_BEGIN = i * NUM_REQS_P; - localparam SLICE_END = SLICE_BEGIN + NUM_REQS_P; - - VX_cache_bypass #( - .NUM_REQS (NUM_REQS_P), - .TAG_SEL_IDX (TAG_SEL_IDX), - - .PASSTHRU (PASSTHRU), - .NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE), - - .WORD_SIZE (WORD_SIZE), - .LINE_SIZE (LINE_SIZE), - - .CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH), - .CORE_TAG_WIDTH (TAG_WIDTH), - - .MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH), - .MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH), - .MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH), - - .UUID_WIDTH (UUID_WIDTH), - - .CORE_OUT_BUF (CORE_OUT_BUF), - .MEM_OUT_BUF (MEM_OUT_BUF) - ) cache_bypass ( - .clk (clk), - .reset (nc_bypass_reset), - - .core_bus_in_if (core_bus_if[SLICE_END-1:SLICE_BEGIN]), - .core_bus_out_if(core_bus_cache_if[SLICE_END-1:SLICE_BEGIN]), - - .mem_bus_in_if (mem_bus_cache_if[i]), - .mem_bus_out_if (mem_bus_if[i]) - ); - end - - // Connect everything - /* - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - VX_cache_bypass #( - .NUM_REQS (NUM_REQS), - .TAG_SEL_IDX (TAG_SEL_IDX), - - .PASSTHRU (PASSTHRU), - .NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE), - - .WORD_SIZE (WORD_SIZE), - .LINE_SIZE (LINE_SIZE), - - .CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH), - .CORE_TAG_WIDTH (TAG_WIDTH), - - .MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH), - .MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH), - .MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH), - - .UUID_WIDTH (UUID_WIDTH), - - .CORE_OUT_BUF (CORE_OUT_BUF), - .MEM_OUT_BUF (MEM_OUT_BUF) - ) cache_bypass ( - .clk (clk), - .reset (nc_bypass_reset), - - .core_bus_in_if (core_bus_if), - .core_bus_out_if(core_bus_cache_if), - - .mem_bus_in_if (mem_bus_cache_if[i]), - .mem_bus_out_if (mem_bus_if[i]) - ); - end - */ - - end else begin - - for (genvar i = 0; i < NUM_REQS; ++i) begin - `ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]); - end - - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - `ASSIGN_VX_MEM_BUS_IF (mem_bus_if[i], mem_bus_cache_if[i]); - end - end - - if (PASSTHRU != 0) begin - - for (genvar i = 0; i < NUM_REQS; ++i) begin - `UNUSED_VAR (core_bus_cache_if[i].req_valid) - `UNUSED_VAR (core_bus_cache_if[i].req_data) - assign core_bus_cache_if[i].req_ready = 0; - - assign core_bus_cache_if[i].rsp_valid = 0; - assign core_bus_cache_if[i].rsp_data = '0; - `UNUSED_VAR (core_bus_cache_if[i].rsp_ready) - end - - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - assign mem_bus_cache_if[i].req_valid = 0; - assign mem_bus_cache_if[i].req_data = '0; - `UNUSED_VAR (mem_bus_cache_if[i].req_ready) - - `UNUSED_VAR (mem_bus_cache_if[i].rsp_valid) - `UNUSED_VAR (mem_bus_cache_if[i].rsp_data) - assign mem_bus_cache_if[i].rsp_ready = 0; - end - - `ifdef PERF_ENABLE - assign cache_perf = '0; - `endif - - end else begin - - `RESET_RELAY (cache_reset, reset); - - VX_cache_l3 #( - .INSTANCE_ID (INSTANCE_ID), - .CACHE_SIZE (CACHE_SIZE), - .LINE_SIZE (LINE_SIZE), - .NUM_BANKS (NUM_BANKS), - .NUM_MEM_PORTS (NUM_MEM_PORTS), - .NUM_WAYS (NUM_WAYS), - .WORD_SIZE (WORD_SIZE), - .NUM_REQS (NUM_REQS), - .CRSQ_SIZE (CRSQ_SIZE), - .MSHR_SIZE (MSHR_SIZE), - .MRSQ_SIZE (MRSQ_SIZE), - .MREQ_SIZE (MREQ_SIZE), - .WRITE_ENABLE (WRITE_ENABLE), - .WRITEBACK (WRITEBACK), - .DIRTY_BYTES (DIRTY_BYTES), - .UUID_WIDTH (UUID_WIDTH), - .TAG_WIDTH (TAG_WIDTH), - .CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF), - .MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF) - ) cache ( - .clk (clk), - .reset (cache_reset), - `ifdef PERF_ENABLE - .cache_perf (cache_perf), - `endif - .core_bus_if (core_bus_cache_if), - .mem_bus_if (mem_bus_cache_if) - ); - - end - -`ifdef DBG_TRACE_CACHE - - for (genvar i = 0; i < NUM_REQS; ++i) begin - wire [`UP(UUID_WIDTH)-1:0] core_req_uuid; - wire [`UP(UUID_WIDTH)-1:0] core_rsp_uuid; - - if (UUID_WIDTH != 0) begin - assign core_req_uuid = core_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; - assign core_rsp_uuid = core_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin - assign core_req_uuid = 0; - assign core_rsp_uuid = 0; - end - - wire core_req_fire = core_bus_if[i].req_valid && core_bus_if[i].req_ready; - wire core_rsp_fire = core_bus_if[i].rsp_valid && core_bus_if[i].rsp_ready; - - always @(posedge clk) begin - if (core_req_fire) begin - if (core_bus_if[i].req_data.rw) - `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)); - else - `TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)); - end - if (core_rsp_fire) begin - `TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)); - end - end - end - - wire [NUM_MEM_PORTS-1:0][`UP(UUID_WIDTH)-1:0] mem_req_uuid; - wire [NUM_MEM_PORTS-1:0][`UP(UUID_WIDTH)-1:0] mem_rsp_uuid; - - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin - assign mem_req_uuid[i] = mem_bus_if[i].req_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; - assign mem_rsp_uuid[i] = mem_bus_if[i].rsp_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin - assign mem_req_uuid[i] = 0; - assign mem_rsp_uuid[i] = 0; - end - end - - wire mem_req_fire [NUM_MEM_PORTS-1:0]; - wire mem_rsp_fire [NUM_MEM_PORTS-1:0]; - - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - assign mem_req_fire[i] = mem_bus_if[i].req_valid && mem_bus_if[i].req_ready; - assign mem_rsp_fire[i] = mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready; - end - - for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin - always @(posedge clk) begin - if (mem_req_fire[i]) begin - if (mem_bus_if[i].req_data.rw) - `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d) bank=%d\n", - $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, mem_req_uuid[i], i)); - else - `TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d) bank=%d\n", - $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag, mem_req_uuid[i], i)); - end - if (mem_rsp_fire[i]) begin - `TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n", - $time, INSTANCE_ID, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data, mem_rsp_uuid[i])); - end - end - end -`endif - -endmodule diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 9ddccc19d..3deffc759 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -37,13 +37,13 @@ RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interface SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp -SRCS += $(SRC_DIR)/processor_hbm.cpp +SRCS += $(SRC_DIR)/processor.cpp ifdef AXI_BUS TOP = Vortex_axi CXXFLAGS += -DAXI_BUS else - TOP = Vortex_hbm + TOP = Vortex endif VL_FLAGS = --exe diff --git a/sim/rtlsim/processor_hbm.cpp b/sim/rtlsim/processor_hbm.cpp deleted file mode 100644 index 5f7bee7ee..000000000 --- a/sim/rtlsim/processor_hbm.cpp +++ /dev/null @@ -1,656 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "processor.h" - -#ifdef AXI_BUS -#include "VVortex_axi.h" -typedef VVortex_axi Device; -#else -#include "VVortex_hbm.h" -typedef VVortex_hbm Device; -#endif - -#ifdef VCD_OUTPUT -#include -#endif - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#ifndef MEMORY_BANKS - #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #else - #define MEMORY_BANKS 2 - #endif -#endif - -#ifndef MEM_CLOCK_RATIO -#define MEM_CLOCK_RATIO 1 -#endif - -#ifndef TRACE_START_TIME -#define TRACE_START_TIME 0ull -#endif - -#ifndef TRACE_STOP_TIME -#define TRACE_STOP_TIME -1ull -#endif - -#ifndef VERILATOR_RESET_VALUE -#define VERILATOR_RESET_VALUE 2 -#endif - -#if (XLEN == 32) -typedef uint32_t Word; -#elif (XLEN == 64) -typedef uint64_t Word; -#else -#error unsupported XLEN -#endif - -#define VL_WDATA_GETW(lwp, i, n, w) \ - VL_SEL_IWII(0, n * w, 0, 0, lwp, i * w, w) - -using namespace vortex; - -static uint64_t timestamp = 0; - -double sc_time_stamp() { - return timestamp; -} - -/////////////////////////////////////////////////////////////////////////////// - -static bool trace_enabled = false; -static uint64_t trace_start_time = TRACE_START_TIME; -static uint64_t trace_stop_time = TRACE_STOP_TIME; - -bool sim_trace_enabled() { - if (timestamp >= trace_start_time - && timestamp < trace_stop_time) - return true; - return trace_enabled; -} - -void sim_trace_enable(bool enable) { - trace_enabled = enable; -} - -/////////////////////////////////////////////////////////////////////////////// - -class Processor::Impl { -public: - Impl() : dram_sim_(MEM_CLOCK_RATIO) { - // force random values for unitialized signals - Verilated::randReset(VERILATOR_RESET_VALUE); - Verilated::randSeed(50); - - // turn off assertion before reset - Verilated::assertOn(false); - - // create RTL module instance - device_ = new Device(); - - #ifdef VCD_OUTPUT - Verilated::traceEverOn(true); - tfp_ = new VerilatedVcdC(); - device_->trace(tfp_, 99); - tfp_->open("trace.vcd"); - #endif - - pending_mem_reqs_.resize(NUM_MEM_PORTS); - dram_queue_.resize(NUM_MEM_PORTS); - - mem_rd_rsp_active_.resize(NUM_MEM_PORTS); - mem_rd_rsp_ready_.resize(NUM_MEM_PORTS); - - mem_wr_rsp_active_.resize(NUM_MEM_PORTS); - mem_wr_rsp_ready_.resize(NUM_MEM_PORTS); - - ram_ = nullptr; - - #ifndef NDEBUG - // dump device configuration - std::cout << "CONFIGS:" - << " num_threads=" << NUM_THREADS - << ", num_warps=" << NUM_WARPS - << ", num_cores=" << NUM_CORES - << ", num_clusters=" << NUM_CLUSTERS - << ", socket_size=" << SOCKET_SIZE - << ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec - << ", num_barriers=" << NUM_BARRIERS - << std::endl; - #endif - // reset the device - this->reset(); - - // Turn on assertion after reset - Verilated::assertOn(true); - } - - ~Impl() { - this->cout_flush(); - - #ifdef VCD_OUTPUT - tfp_->close(); - delete tfp_; - #endif - - delete device_; - } - - void cout_flush() { - for (auto& buf : print_bufs_) { - auto str = buf.second.str(); - if (!str.empty()) { - std::cout << "#" << buf.first << ": " << str << std::endl; - } - } - } - - void attach_ram(RAM* ram) { - ram_ = ram; - } - - void run() { - - #ifndef NDEBUG - std::cout << std::dec << timestamp << ": [sim] run()" << std::endl; - #endif - - // start execution - running_ = true; - device_->reset = 0; - - /* - device_->mem_req_valid[1] = 0; - device_->mem_req_ready[1] = 0; - device_->mem_rsp_valid[1] = 0; - device_->mem_rsp_ready[1] = 0; - */ - - // wait on device to go busy - while (!device_->busy) { - this->tick(); - } - - // wait on device to go idle - while (device_->busy) { - this->tick(); - } - - // reset device - this->reset(); - - this->cout_flush(); - } - - void dcr_write(uint32_t addr, uint32_t value) { - device_->dcr_wr_valid = 1; - device_->dcr_wr_addr = addr; - device_->dcr_wr_data = value; - while (device_->dcr_wr_valid) { - this->tick(); - } - } - -private: - - void reset() { - running_ = false; - - print_bufs_.clear(); - - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - - pending_mem_reqs_.at(i).clear(); - - { - std::queue empty; - std::swap(dram_queue_.at(i), empty); - } - - mem_rd_rsp_active_.at(i) = false; - mem_wr_rsp_active_.at(i) = false; - } - - this->mem_bus_reset(); - - this->dcr_bus_reset(); - - device_->reset = 1; - - for (int i = 0; i < RESET_DELAY; ++i) { - device_->clk = 0; - this->eval(); - device_->clk = 1; - this->eval(); - } - } - - void tick() { - - device_->clk = 0; - this->eval(); - - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - this->mem_bus_eval(0, i); - } - this->dcr_bus_eval(0); - - device_->clk = 1; - this->eval(); - - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - this->mem_bus_eval(1, i); - } - this->dcr_bus_eval(1); - - dram_sim_.tick(); - - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - if (!dram_queue_.at(i).empty()) { - auto mem_req = dram_queue_.at(i).front(); - if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) { - auto orig_req = reinterpret_cast(arg); - if (orig_req->ready) { - delete orig_req; - } else { - orig_req->ready = true; - } - }, mem_req)) { - dram_queue_.at(i).pop(); - } - } - } - - #ifndef NDEBUG - fflush(stdout); - #endif - } - - void eval() { - device_->eval(); - #ifdef VCD_OUTPUT - if (sim_trace_enabled()) { - tfp_->dump(timestamp); - } else { - exit(-1); - } - #endif - ++timestamp; - } - -#ifdef AXI_BUS - - void mem_bus_reset() { - device_->m_axi_wready[0] = 0; - device_->m_axi_awready[0] = 0; - device_->m_axi_arready[0] = 0; - device_->m_axi_rvalid[0] = 0; - device_->m_axi_bvalid[0] = 0; - } - - void mem_bus_eval(bool clk) { - if (!clk) { - mem_rd_rsp_ready_ = device_->m_axi_rready[0]; - mem_wr_rsp_ready_ = device_->m_axi_bready[0]; - return; - } - - if (ram_ == nullptr) { - device_->m_axi_wready[0] = 0; - device_->m_axi_awready[0] = 0; - device_->m_axi_arready[0] = 0; - return; - } - - // process memory read responses - if (mem_rd_rsp_active_ - && device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) { - mem_rd_rsp_active_ = false; - } - if (!mem_rd_rsp_active_) { - if (!pending_mem_reqs_.empty() - && (*pending_mem_reqs_.begin())->ready - && !(*pending_mem_reqs_.begin())->write) { - auto mem_rsp_it = pending_mem_reqs_.begin(); - auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Rd Rsp: addr=0x%0lx, data=0x", timestamp, mem_rsp->addr); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", mem_rsp->block[i]); - } - printf("\n"); - */ - device_->m_axi_rvalid[0] = 1; - device_->m_axi_rid[0] = mem_rsp->tag; - device_->m_axi_rresp[0] = 0; - device_->m_axi_rlast[0] = 1; - memcpy(device_->m_axi_rdata[0].data(), mem_rsp->block.data(), MEM_BLOCK_SIZE); - pending_mem_reqs_.erase(mem_rsp_it); - mem_rd_rsp_active_ = true; - delete mem_rsp; - } else { - device_->m_axi_rvalid[0] = 0; - } - } - - // process memory write responses - if (mem_wr_rsp_active_ - && device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) { - mem_wr_rsp_active_ = false; - } - if (!mem_wr_rsp_active_) { - if (!pending_mem_reqs_.empty() - && (*pending_mem_reqs_.begin())->ready - && (*pending_mem_reqs_.begin())->write) { - auto mem_rsp_it = pending_mem_reqs_.begin(); - auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Wr Rsp: addr=0x%0lx\n", timestamp, mem_rsp->addr); - */ - device_->m_axi_bvalid[0] = 1; - device_->m_axi_bid[0] = mem_rsp->tag; - device_->m_axi_bresp[0] = 0; - pending_mem_reqs_.erase(mem_rsp_it); - mem_wr_rsp_active_ = true; - delete mem_rsp; - } else { - device_->m_axi_bvalid[0] = 0; - } - } - - // select the memory bank - uint32_t req_addr = device_->m_axi_wvalid[0] ? device_->m_axi_awaddr[0] : device_->m_axi_araddr[0]; - - // process memory requests - if ((device_->m_axi_wvalid[0] || device_->m_axi_arvalid[0]) && running_) { - if (device_->m_axi_wvalid[0]) { - auto byteen = device_->m_axi_wstrb[0]; - auto base_addr = device_->m_axi_awaddr[0]; - auto data = (uint8_t*)device_->m_axi_wdata[0].data(); - - if (base_addr >= uint64_t(IO_COUT_ADDR) - && base_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { - // process console output - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - auto& ss_buf = print_bufs_[i]; - char c = data[i]; - ss_buf << c; - if (c == '\n') { - std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; - ss_buf.str(""); - } - } - } - } else { - // process writes - /* - printf("%0ld: [sim] MEM Wr: addr=0x%0lx, byteen=0x", timestamp, base_addr); - for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { - printf("%x", (int)((byteen >> (4 * i)) & 0xf)); - } - printf(", data=0x"); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", data[i]); - } - printf("\n"); - */ - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - (*ram_)[base_addr + i] = data[i]; - } - } - - auto mem_req = new mem_req_t(); - mem_req->tag = device_->m_axi_awid[0]; - mem_req->addr = device_->m_axi_awaddr[0]; - mem_req->write = true; - mem_req->ready = false; - pending_mem_reqs_.emplace_back(mem_req); - - // send dram request - dram_queue_.push(mem_req); - } - } else { - // process reads - auto mem_req = new mem_req_t(); - mem_req->tag = device_->m_axi_arid[0]; - mem_req->addr = device_->m_axi_araddr[0]; - ram_->read(mem_req->block.data(), device_->m_axi_araddr[0], MEM_BLOCK_SIZE); - mem_req->write = false; - mem_req->ready = false; - pending_mem_reqs_.emplace_back(mem_req); - - // send dram request - dram_queue_.push(mem_req); - } - } - - device_->m_axi_wready[0] = running_; - device_->m_axi_awready[0] = running_; - device_->m_axi_arready[0] = running_; - } - -#else - - void mem_bus_reset() { - for (int i = 0; i < NUM_MEM_PORTS; ++i) { - device_->mem_req_ready[i] = 0; - device_->mem_rsp_valid[i] = 0; - } - } - - void mem_bus_eval(bool clk, int n) { - if (!clk) { - mem_rd_rsp_ready_.at(n) = device_->mem_rsp_ready[n]; - return; - } - - if (ram_ == nullptr) { - device_->mem_req_ready[n] = 0; - return; - } - - // process memory read responses - if (mem_rd_rsp_active_.at(n) - && device_->mem_rsp_valid[n] && mem_rd_rsp_ready_.at(n)) { - mem_rd_rsp_active_.at(n) = false; - } - if (!mem_rd_rsp_active_.at(n)) { - if (!pending_mem_reqs_.at(n).empty() - && (*pending_mem_reqs_.at(n).begin())->ready) { - device_->mem_rsp_valid[n] = 1; - auto mem_rsp_it = pending_mem_reqs_.at(n).begin(); - auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", mem_rsp->block[i]); - } - printf("\n"); - */ - memcpy(VDataCast::get(device_->mem_rsp_data[n]), mem_rsp->block.data(), MEM_BLOCK_SIZE); - device_->mem_rsp_tag[n] = mem_rsp->tag; - pending_mem_reqs_.at(n).erase(mem_rsp_it); - mem_rd_rsp_active_.at(n) = true; - delete mem_rsp; - } else { - device_->mem_rsp_valid[n] = 0; - } - } - - // process memory requests - if (device_->mem_req_valid[n] && running_) { - uint64_t byte_addr = (device_->mem_req_addr[n] * MEM_BLOCK_SIZE); - if (device_->mem_req_rw[n]) { - auto byteen = device_->mem_req_byteen[n]; - auto data = VDataCast::get(device_->mem_req_data[n]); - - if (byte_addr >= uint64_t(IO_COUT_ADDR) - && byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { - // process console output - for (int i = 0; i < IO_COUT_SIZE; i++) { - if ((byteen >> i) & 0x1) { - auto& ss_buf = print_bufs_[i]; - char c = data[i]; - ss_buf << c; - if (c == '\n') { - std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; - ss_buf.str(""); - } - } - } - } else { - // process writes - /* - printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr); - for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { - printf("%x", (int)((byteen >> (4 * i)) & 0xf)); - } - printf(", data=0x"); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%d=%02x,", i, data[i]); - } - printf("\n"); - */ - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - (*ram_)[byte_addr + i] = data[i]; - } - } - - auto mem_req = new mem_req_t(); - mem_req->tag = device_->mem_req_tag[n]; - mem_req->addr = byte_addr; - mem_req->write = true; - mem_req->ready = true; - - // send dram request - dram_queue_.at(n).push(mem_req); - } - } else { - // process reads - auto mem_req = new mem_req_t(); - mem_req->tag = device_->mem_req_tag[n]; - mem_req->addr = byte_addr; - mem_req->write = false; - mem_req->ready = false; - ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE); - pending_mem_reqs_.at(n).emplace_back(mem_req); - - //printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag); - - // send dram request - dram_queue_.at(n).push(mem_req); - } - } - - device_->mem_req_ready[n] = running_; - } - -#endif - - void dcr_bus_reset() { - device_->dcr_wr_valid = 0; - } - - void dcr_bus_eval(bool clk) { - if (!clk) { - return; - } - if (device_->dcr_wr_valid) { - device_->dcr_wr_valid = 0; - } - } - - void wait(uint32_t cycles) { - for (int i = 0; i < cycles; ++i) { - this->tick(); - } - } - -private: - - typedef struct { - Device* device; - std::array block; - uint64_t addr; - uint64_t tag; - bool write; - bool ready; - } mem_req_t; - - std::unordered_map print_bufs_; - - std::vector> pending_mem_reqs_; - - std::vector> dram_queue_; - - DramSim dram_sim_; - - Device* device_; - -#ifdef VCD_OUTPUT - VerilatedVcdC *tfp_; -#endif - - RAM* ram_; - - std::vector mem_rd_rsp_active_; - std::vector mem_rd_rsp_ready_; - - std::vector mem_wr_rsp_active_; - std::vector mem_wr_rsp_ready_; - - bool running_; -}; - -/////////////////////////////////////////////////////////////////////////////// - -Processor::Processor() - : impl_(new Impl()) -{} - -Processor::~Processor() { - delete impl_; -} - -void Processor::attach_ram(RAM* mem) { - impl_->attach_ram(mem); -} - -void Processor::run() { - impl_->run(); -} - -void Processor::dcr_write(uint32_t addr, uint32_t value) { - return impl_->dcr_write(addr, value); -} \ No newline at end of file diff --git a/third_party/softfloat b/third_party/softfloat index 3b70b5d81..b51ef8f32 160000 --- a/third_party/softfloat +++ b/third_party/softfloat @@ -1 +1 @@ -Subproject commit 3b70b5d8147675932c38b36cd09af6df4eedd919 +Subproject commit b51ef8f3201669b2288104c28546fc72532a1ea4 From 2eeb2ac532eadf158ac4b41d2cc98cda2f350cb9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 5 Oct 2024 13:46:10 -0700 Subject: [PATCH 350/488] fixed memory flags propagation through the cache hierarchy --- ci/regression.sh.in | 12 +- hw/rtl/VX_cluster.sv | 1 + hw/rtl/VX_config.vh | 8 -- hw/rtl/VX_socket.sv | 2 + hw/rtl/Vortex.sv | 1 + hw/rtl/cache/VX_bank_flush.sv | 4 +- hw/rtl/cache/VX_cache.sv | 45 +++++--- hw/rtl/cache/VX_cache_bank.sv | 114 ++++++++++--------- hw/rtl/cache/VX_cache_cluster.sv | 4 + hw/rtl/cache/VX_cache_data.sv | 182 ++++++++++++++++--------------- hw/rtl/cache/VX_cache_tags.sv | 30 ++--- hw/rtl/cache/VX_cache_wrap.sv | 4 + hw/rtl/libs/VX_cyclic_arbiter.sv | 4 +- hw/rtl/libs/VX_decoder.sv | 27 +++-- hw/rtl/libs/VX_mem_adapter.sv | 8 +- hw/rtl/libs/VX_mem_coalescer.sv | 21 ++-- hw/rtl/libs/VX_mem_scheduler.sv | 29 +++-- hw/rtl/libs/VX_rr_arbiter.sv | 4 +- hw/rtl/libs/VX_stream_xbar.sv | 8 +- 19 files changed, 279 insertions(+), 229 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 9827199bb..443b34f5a 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -142,8 +142,8 @@ cache() CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx # test cache ways - CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx - CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx + CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx + CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx # test cache banking CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx @@ -154,10 +154,10 @@ cache() CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx # test writeback - CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --app=mstress - CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --app=mstress - CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress - CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress + CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress + CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=simx --app=mstress + CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress + CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress # cache clustering CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=4 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=4 --warps=1 --threads=2 diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index 73d9b34ab..b5e9e0a5c 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -100,6 +100,7 @@ module VX_cluster import VX_gpu_pkg::*; #( .WRITEBACK (`L2_WRITEBACK), .DIRTY_BYTES (`L2_WRITEBACK), .UUID_WIDTH (`UUID_WIDTH), + .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .CORE_OUT_BUF (3), .MEM_OUT_BUF (3), .NC_ENABLE (1), diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 1e10aca8e..fb4756633 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -564,12 +564,8 @@ // Cache Size `ifndef L2_CACHE_SIZE -`ifdef ALTERA_S10 -`define L2_CACHE_SIZE 2097152 -`else `define L2_CACHE_SIZE 1048576 `endif -`endif // Number of Banks `ifndef L2_NUM_BANKS @@ -610,11 +606,7 @@ // Cache Size `ifndef L3_CACHE_SIZE -`ifdef ALTERA_S10 `define L3_CACHE_SIZE 2097152 -`else -`define L3_CACHE_SIZE 1048576 -`endif `endif // Number of Banks diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 69ff88a2c..9c7fe1287 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -100,6 +100,7 @@ module VX_socket import VX_gpu_pkg::*; #( .MRSQ_SIZE (`ICACHE_MRSQ_SIZE), .MREQ_SIZE (`ICACHE_MREQ_SIZE), .TAG_WIDTH (ICACHE_TAG_WIDTH), + .FLAGS_WIDTH (0), .UUID_WIDTH (`UUID_WIDTH), .WRITE_ENABLE (0), .NC_ENABLE (0), @@ -146,6 +147,7 @@ module VX_socket import VX_gpu_pkg::*; #( .MREQ_SIZE (`DCACHE_WRITEBACK ? `DCACHE_MSHR_SIZE : `DCACHE_MREQ_SIZE), .TAG_WIDTH (DCACHE_TAG_WIDTH), .UUID_WIDTH (`UUID_WIDTH), + .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .WRITE_ENABLE (1), .WRITEBACK (`DCACHE_WRITEBACK), .DIRTY_BYTES (`DCACHE_WRITEBACK), diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index e07aaae4d..4f9f495ce 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -86,6 +86,7 @@ module Vortex import VX_gpu_pkg::*; ( .WRITEBACK (`L3_WRITEBACK), .DIRTY_BYTES (`L3_WRITEBACK), .UUID_WIDTH (`UUID_WIDTH), + .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .CORE_OUT_BUF (3), .MEM_OUT_BUF (3), .NC_ENABLE (1), diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index a01ae0e0b..3228bd3a5 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -118,8 +118,8 @@ module VX_bank_flush #( .N (`CS_WAY_SEL_BITS), .D (NUM_WAYS) ) ctr_decoder ( - .data_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), - .valid_in (1'b1), + .sel_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), + .data_in (1'b1), .data_out (flush_way) ); end else begin : g_flush_way_all diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 06887944c..d749e6ee9 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -54,6 +54,9 @@ module VX_cache import VX_gpu_pkg::*; #( // core request tag size parameter TAG_WIDTH = UUID_WIDTH + 1, + // core request flags + parameter FLAGS_WIDTH = 0, + // Core response output register parameter CORE_OUT_BUF = 0, @@ -90,7 +93,7 @@ module VX_cache import VX_gpu_pkg::*; #( localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS); - localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1; + localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + `UP(FLAGS_WIDTH); localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH; localparam BANK_MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH; @@ -206,13 +209,13 @@ module VX_cache import VX_gpu_pkg::*; #( wire [LINE_SIZE-1:0] mem_req_byteen; wire [`CS_LINE_WIDTH-1:0] mem_req_data; wire [MEM_TAG_WIDTH-1:0] mem_req_tag; - wire mem_req_flush; + wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flags; wire mem_req_ready; - wire mem_req_flush_b; + wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flush_b; VX_elastic_buffer #( - .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), + .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)), .SIZE (MEM_REQ_REG_DISABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) ) mem_req_buf ( @@ -220,13 +223,18 @@ module VX_cache import VX_gpu_pkg::*; #( .reset (reset), .valid_in (mem_req_valid), .ready_in (mem_req_ready), - .data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flush}), + .data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flags}), .data_out ({mem_bus_tmp_if.req_data.rw, mem_bus_tmp_if.req_data.byteen, mem_bus_tmp_if.req_data.addr, mem_bus_tmp_if.req_data.data, mem_bus_tmp_if.req_data.tag, mem_req_flush_b}), .valid_out (mem_bus_tmp_if.req_valid), .ready_out (mem_bus_tmp_if.req_ready) ); - assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0; + if (FLAGS_WIDTH != 0) begin : g_mem_req_flags + assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b; + end else begin : g_no_mem_req_flags + assign mem_bus_tmp_if.req_data.flags = '0; + `UNUSED_VAR (mem_req_flush_b) + end if (WRITE_ENABLE) begin : g_mem_bus_if `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if); @@ -244,7 +252,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx; - wire [NUM_BANKS-1:0] per_bank_core_req_flush; + wire [NUM_BANKS-1:0][`UP(FLAGS_WIDTH)-1:0] per_bank_core_req_flags; wire [NUM_BANKS-1:0] per_bank_core_req_ready; wire [NUM_BANKS-1:0] per_bank_core_rsp_valid; @@ -259,7 +267,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen; wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data; wire [NUM_BANKS-1:0][BANK_MEM_TAG_WIDTH-1:0] per_bank_mem_req_tag; - wire [NUM_BANKS-1:0] per_bank_mem_req_flush; + wire [NUM_BANKS-1:0][`UP(FLAGS_WIDTH)-1:0] per_bank_mem_req_flags; wire [NUM_BANKS-1:0] per_bank_mem_req_ready; wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready; @@ -276,7 +284,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen; wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data; wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag; - wire [NUM_REQS-1:0] core_req_flush; + wire [NUM_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] core_req_flags; wire [NUM_REQS-1:0] core_req_ready; wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr; @@ -293,7 +301,7 @@ module VX_cache import VX_gpu_pkg::*; #( assign core_req_addr[i] = core_bus2_if[i].req_data.addr; assign core_req_data[i] = core_bus2_if[i].req_data.data; assign core_req_tag[i] = core_bus2_if[i].req_data.tag; - assign core_req_flush[i] = core_bus2_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH]; + assign core_req_flags[i] = `UP(FLAGS_WIDTH)'(core_bus2_if[i].req_data.flags); assign core_bus2_if[i].req_ready = core_req_ready[i]; end @@ -325,7 +333,7 @@ module VX_cache import VX_gpu_pkg::*; #( core_req_byteen[i], core_req_data[i], core_req_tag[i], - core_req_flush[i] + core_req_flags[i] }; end @@ -366,7 +374,7 @@ module VX_cache import VX_gpu_pkg::*; #( per_bank_core_req_byteen[i], per_bank_core_req_data[i], per_bank_core_req_tag[i], - per_bank_core_req_flush[i] + per_bank_core_req_flags[i] } = core_req_data_out[i]; end @@ -393,6 +401,7 @@ module VX_cache import VX_gpu_pkg::*; #( .WRITEBACK (WRITEBACK), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (TAG_WIDTH), + .FLAGS_WIDTH (FLAGS_WIDTH), .CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(CORE_OUT_BUF)), .MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF)) ) bank ( @@ -414,7 +423,7 @@ module VX_cache import VX_gpu_pkg::*; #( .core_req_data (per_bank_core_req_data[bank_id]), .core_req_tag (per_bank_core_req_tag[bank_id]), .core_req_idx (per_bank_core_req_idx[bank_id]), - .core_req_flush (per_bank_core_req_flush[bank_id]), + .core_req_flags (per_bank_core_req_flags[bank_id]), .core_req_ready (per_bank_core_req_ready[bank_id]), // Core response @@ -431,7 +440,7 @@ module VX_cache import VX_gpu_pkg::*; #( .mem_req_byteen (per_bank_mem_req_byteen[bank_id]), .mem_req_data (per_bank_mem_req_data[bank_id]), .mem_req_tag (per_bank_mem_req_tag[bank_id]), - .mem_req_flush (per_bank_mem_req_flush[bank_id]), + .mem_req_flags (per_bank_mem_req_flags[bank_id]), .mem_req_ready (per_bank_mem_req_ready[bank_id]), // Memory response @@ -487,7 +496,7 @@ module VX_cache import VX_gpu_pkg::*; #( // Memory request arbitration - wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1)-1:0] data_in; + wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + `UP(FLAGS_WIDTH))-1:0] data_in; for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_data_in assign data_in[i] = { @@ -496,7 +505,7 @@ module VX_cache import VX_gpu_pkg::*; #( per_bank_mem_req_byteen[i], per_bank_mem_req_data[i], per_bank_mem_req_tag[i], - per_bank_mem_req_flush[i] + per_bank_mem_req_flags[i] }; end @@ -504,7 +513,7 @@ module VX_cache import VX_gpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (NUM_BANKS), - .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1), + .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)), .ARBITER ("R") ) mem_req_arb ( .clk (clk), @@ -512,7 +521,7 @@ module VX_cache import VX_gpu_pkg::*; #( .valid_in (per_bank_mem_req_valid), .ready_in (per_bank_mem_req_ready), .data_in (data_in), - .data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flush}), + .data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flags}), .valid_out (mem_req_valid), .ready_out (mem_req_ready), `UNUSED_PIN (sel_out) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 054b7c589..c1fea14b2 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -53,6 +53,9 @@ module VX_cache_bank #( // core request tag size parameter TAG_WIDTH = UUID_WIDTH + 1, + // core request flags + parameter FLAGS_WIDTH = 0, + // Core response output register parameter CORE_OUT_REG = 0, @@ -82,7 +85,7 @@ module VX_cache_bank #( input wire [`CS_WORD_WIDTH-1:0] core_req_data, // data to be written input wire [TAG_WIDTH-1:0] core_req_tag, // identifier of the request (request id) input wire [REQ_SEL_WIDTH-1:0] core_req_idx, // index of the request in the core request array - input wire core_req_flush, // flush enable + input wire [`UP(FLAGS_WIDTH)-1:0] core_req_flags, output wire core_req_ready, // Core Response @@ -99,7 +102,7 @@ module VX_cache_bank #( output wire [LINE_SIZE-1:0] mem_req_byteen, output wire [`CS_LINE_WIDTH-1:0] mem_req_data, output wire [MEM_TAG_WIDTH-1:0] mem_req_tag, - output wire mem_req_flush, + output wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flags, input wire mem_req_ready, // Memory response @@ -143,22 +146,25 @@ module VX_cache_bank #( wire [NUM_WAYS-1:0] flush_way_st0; wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1; - wire [`CS_LINE_SEL_BITS-1:0] line_sel_st0, line_sel_st1; + wire [`CS_LINE_SEL_BITS-1:0] line_idx_st0, line_idx_st1; + wire [`CS_TAG_SEL_BITS-1:0] line_tag_st0, line_tag_st1; wire rw_sel, rw_st0, rw_st1; - wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1; + wire [WORD_SEL_WIDTH-1:0] word_idx_sel, word_idx_st0, word_idx_st1; wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1; wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1; wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1; + wire [`CS_WORD_WIDTH-1:0] write_data_st0, write_data_st1; wire [`CS_WORD_WIDTH-1:0] read_data_st1; wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1; - wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0, mshr_id_st0, mshr_id_st1; + wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1; + wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0; wire valid_sel, valid_st0, valid_st1; wire is_creq_st0, is_creq_st1; wire is_fill_st0, is_fill_st1; wire is_replay_st0, is_replay_st1; - wire creq_flush_sel, creq_flush_st0, creq_flush_st1; + wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1; wire evict_dirty_st0, evict_dirty_st1; - wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1; + wire [NUM_WAYS-1:0] way_idx_st0, way_idx_st1; wire [NUM_WAYS-1:0] tag_matches_st0; wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1; @@ -264,11 +270,11 @@ module VX_cache_bank #( assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire; assign rw_sel = replay_valid ? replay_rw : core_req_rw; assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen; - assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel; + assign word_idx_sel= replay_valid ? replay_wsel : core_req_wsel; assign req_idx_sel = replay_valid ? replay_idx : core_req_idx; assign tag_sel = (init_valid | flush_valid) ? (flush_valid ? flush_tag : '0) : (replay_valid ? replay_tag : (mem_rsp_valid ? mem_rsp_tag_s : core_req_tag)); - assign creq_flush_sel = core_req_valid && core_req_flush; + assign flags_sel = core_req_valid ? core_req_flags : '0; assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) : (replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr)); @@ -294,14 +300,14 @@ module VX_cache_bank #( end VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH), .RESETW (1) ) pipe_reg0 ( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, creq_flush_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}), - .data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0}) + .data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, flags_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, word_idx_sel, req_idx_sel, tag_sel, replay_id}), + .data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, flags_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, replay_id_st0}) ); if (UUID_WIDTH != 0) begin : g_req_uuid_st0 @@ -321,9 +327,10 @@ module VX_cache_bank #( wire do_cache_wr_st0 = do_creq_wr_st0 || do_replay_wr_st0; wire do_lookup_st0 = do_cache_rd_st0 || do_cache_wr_st0; - wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0]; + assign write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0]; - assign line_sel_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0]; + assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0]; + assign line_tag_st0 = `CS_LINE_ADDR_TAG(addr_st0); wire [NUM_WAYS-1:0] evict_way_st0; wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0; @@ -353,7 +360,9 @@ module VX_cache_bank #( .write (do_cache_wr_st0), .lookup (do_lookup_st0), .line_addr (addr_st0), - .way_sel (flush_way_st0), + .way_idx (flush_way_st0), + + // tag matches .tag_matches(tag_matches_st0), // replacement @@ -362,29 +371,29 @@ module VX_cache_bank #( .evict_tag (evict_tag_st0) ); - wire [`CS_LINE_ADDR_WIDTH-1:0] addr2_st0; + wire [`CS_TAG_SEL_BITS-1:0] line_tag2_st0; wire is_flush2_st0 = WRITEBACK && is_flush_st0; assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0; - assign way_sel_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0; + assign way_idx_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0; - assign addr2_st0 = (is_fill_st0 || is_flush2_st0) ? {evict_tag_st0, line_sel_st0} : addr_st0; + assign line_tag2_st0 = (is_fill_st0 || is_flush2_st0) ? evict_tag_st0 : line_tag_st0; VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, creq_flush_st0, rw_st0, addr2_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, evict_dirty_st0, mshr_pending_st0}), - .data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, evict_dirty_st1, mshr_pending_st1}) + .data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, rw_st0, flags_st0, line_tag2_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_idx_st0, evict_dirty_st0, mshr_pending_st0}), + .data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, rw_st1, flags_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_idx_st1, evict_dirty_st1, mshr_pending_st1}) ); // we have a tag hit - wire is_hit_st1 = (| way_sel_st1); + wire is_hit_st1 = (| way_idx_st1); if (UUID_WIDTH != 0) begin : g_req_uuid_st1 assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH]; @@ -413,9 +422,7 @@ module VX_cache_bank #( wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1; wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1; - assign line_sel_st1 = addr_st1[`CS_LINE_SEL_BITS-1:0]; - - `UNUSED_VAR (do_write_miss_st1) + assign addr_st1 = {line_tag_st1, line_idx_st1}; // ensure mshr replay always get a hit `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time)) @@ -426,28 +433,16 @@ module VX_cache_bank #( assign rdw_hazard2_sel = WRITEBACK && do_cache_wr_st0; // a writeback can evict any preceeding write always @(posedge clk) begin // stall reads following writes to same line address - rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_sel_st0 == line_sel_st1) + rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_idx_st0 == line_idx_st1) && ~rdw_hazard3_st1; // release pipeline stall end - wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}}; + assign write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0]; wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1; - wire [LINE_SIZE-1:0] write_byteen_st1; wire [`CS_LINE_WIDTH-1:0] dirty_data_st1; wire [LINE_SIZE-1:0] dirty_byteen_st1; - if (`CS_WORDS_PER_LINE > 1) begin : g_write_byteen_st1_wsel - reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen_w; - always @(*) begin - write_byteen_w = '0; - write_byteen_w[wsel_st1] = byteen_st1; - end - assign write_byteen_st1 = write_byteen_w; - end else begin : g_write_byteen_st1 - assign write_byteen_st1 = byteen_st1; - end - VX_cache_data #( .INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)), .BANK_ID (BANK_ID), @@ -473,12 +468,12 @@ module VX_cache_bank #( .fill (do_fill_st1), .flush (do_flush_st1), .write (do_cache_wr_st1), - .way_sel (way_sel_st1), + .way_idx (way_idx_st1), .line_addr (addr_st1), - .wsel (wsel_st1), + .word_idx (word_idx_st1), .fill_data (fill_data_st1), .write_data (write_data_st1), - .write_byteen(write_byteen_st1), + .write_byteen(byteen_st1), .read_data (read_data_st1), .dirty_data (dirty_data_st1), .dirty_byteen(dirty_byteen_st1) @@ -488,13 +483,14 @@ module VX_cache_bank #( wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0; wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall; wire mshr_lookup_st0 = mshr_allocate_st0; + wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall; // release allocated mshr entry if we had a hit wire mshr_release_st1; - if (WRITEBACK) begin : g_mshr_release_st1 + if (WRITEBACK) begin : g_mshr_release assign mshr_release_st1 = is_hit_st1; - end else begin : g_mshr_release_st1_ro + end else begin : g_mshr_release_ro // we need to keep missed write requests in MSHR if there is already a pending entry to the same address // this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content // this can happen when writes are sent late, when the fill was already in flight. @@ -548,7 +544,7 @@ module VX_cache_bank #( .allocate_valid (mshr_allocate_st0), .allocate_addr (addr_st0), .allocate_rw (rw_st0), - .allocate_data ({wsel_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}), + .allocate_data ({word_idx_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}), .allocate_id (mshr_alloc_id_st0), .allocate_prev (mshr_prev_st0), `UNUSED_PIN (allocate_ready), @@ -571,7 +567,7 @@ module VX_cache_bank #( wire [MSHR_SIZE-1:0] lookup_matches; for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_lookup_matches assign lookup_matches[i] = mshr_lookup_pending_st0[i] - && (i != mshr_alloc_id_st0) // exclude current mshr id + && (i != mshr_id_st0) // exclude current mshr id && (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough end assign mshr_pending_st0 = (| lookup_matches); @@ -613,7 +609,7 @@ module VX_cache_bank #( wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr; wire [MEM_TAG_WIDTH-1:0] mreq_queue_tag; wire mreq_queue_rw; - wire mreq_queue_flush; + wire [`UP(FLAGS_WIDTH)-1:0] mreq_queue_flags; wire is_fill_or_flush_st1 = is_fill_st1 || is_flush_st1; wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1; @@ -629,6 +625,7 @@ module VX_cache_bank #( || do_writeback_st1) && ~rdw_hazard3_st1; end else begin : g_mreq_queue_push_ro + `UNUSED_VAR (do_write_miss_st1) `UNUSED_VAR (do_writeback_st1) assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1) || do_creq_wr_st1) @@ -637,7 +634,7 @@ module VX_cache_bank #( assign mreq_queue_pop = mem_req_valid && mem_req_ready; assign mreq_queue_addr = addr_st1; - assign mreq_queue_flush = creq_flush_st1; + assign mreq_queue_flags = flags_st1; if (WRITE_ENABLE) begin : g_mreq_queue if (WRITEBACK) begin : g_writeback @@ -645,9 +642,18 @@ module VX_cache_bank #( assign mreq_queue_data = dirty_data_st1; assign mreq_queue_byteen = is_fill_or_flush_st1 ? dirty_byteen_st1 : '1; end else begin : g_writethrough + wire [LINE_SIZE-1:0] line_byteen; + VX_decoder #( + .N (`CS_WORD_SEL_BITS), + .M (WORD_SIZE) + ) byteen_dec ( + .sel_in (word_idx_st1), + .data_in (byteen_st1), + .data_out (line_byteen) + ); assign mreq_queue_rw = rw_st1; - assign mreq_queue_data = write_data_st1; - assign mreq_queue_byteen = rw_st1 ? write_byteen_st1 : '1; + assign mreq_queue_data = {`CS_WORDS_PER_LINE{write_data_st1}}; + assign mreq_queue_byteen = rw_st1 ? line_byteen : '1; `UNUSED_VAR (is_fill_or_flush_st1) `UNUSED_VAR (dirty_data_st1) `UNUSED_VAR (dirty_byteen_st1) @@ -667,17 +673,17 @@ module VX_cache_bank #( end VX_fifo_queue #( - .DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), + .DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)), .DEPTH (MREQ_SIZE), - .ALM_FULL (MREQ_SIZE-PIPELINE_STAGES), + .ALM_FULL (MREQ_SIZE - PIPELINE_STAGES), .OUT_REG (MEM_OUT_REG) ) mem_req_queue ( .clk (clk), .reset (reset), .push (mreq_queue_push), .pop (mreq_queue_pop), - .data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flush}), - .data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flush}), + .data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flags}), + .data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flags}), .empty (mreq_queue_empty), .alm_full (mreq_queue_alm_full), `UNUSED_PIN (full), diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 5a8bb9865..71a2ad00b 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -58,6 +58,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( // core request tag size parameter TAG_WIDTH = UUID_WIDTH + 1, + // core request flags + parameter FLAGS_WIDTH = 0, + // enable bypass for non-cacheable addresses parameter NC_ENABLE = 0, @@ -156,6 +159,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .DIRTY_BYTES (DIRTY_BYTES), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (ARB_TAG_WIDTH), + .FLAGS_WIDTH (FLAGS_WIDTH), .TAG_SEL_IDX (TAG_SEL_IDX), .CORE_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : CORE_OUT_BUF), .MEM_OUT_BUF ((NUM_CACHES > 1) ? 2 : MEM_OUT_BUF), diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 04b0ff746..aa2a1d0ef 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -50,11 +50,11 @@ module VX_cache_data #( input wire flush, input wire write, input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, - input wire [`UP(`CS_WORD_SEL_BITS)-1:0] wsel, + input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx, input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data, - input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] write_data, - input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen, - input wire [NUM_WAYS-1:0] way_sel, + input wire [`CS_WORD_WIDTH-1:0] write_data, + input wire [WORD_SIZE-1:0] write_byteen, + input wire [NUM_WAYS-1:0] way_idx, output wire [`CS_WORD_WIDTH-1:0] read_data, output wire [`CS_LINE_WIDTH-1:0] dirty_data, output wire [LINE_SIZE-1:0] dirty_byteen @@ -68,132 +68,144 @@ module VX_cache_data #( `UNUSED_VAR (read) `UNUSED_VAR (flush) - localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1; + localparam BYTEENW = (WRITE_ENABLE != 0) ? LINE_SIZE : 1; - wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0]; + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata; + wire [`LOG2UP(NUM_WAYS)-1:0] way_idx_bin; + wire [`CS_LINE_SEL_BITS-1:0] line_idx; - wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata; - wire [`LOG2UP(NUM_WAYS)-1:0] way_idx; + assign line_idx = line_addr[`CS_LINE_SEL_BITS-1:0]; + + VX_encoder #( + .N (NUM_WAYS) + ) way_idx_enc ( + .data_in (way_idx), + .data_out (way_idx_bin), + `UNUSED_PIN (valid_out) + ); if (WRITEBACK) begin : g_dirty_data - wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata; - VX_transpose #( - .DATAW (`CS_WORD_WIDTH), - .N (`CS_WORDS_PER_LINE), - .M (NUM_WAYS) - ) transpose ( - .data_in (line_rdata), - .data_out (transposed_rdata) - ); - assign dirty_data = transposed_rdata[way_idx]; + assign dirty_data = line_rdata[way_idx_bin]; end else begin : g_dirty_data_0 assign dirty_data = '0; end if (DIRTY_BYTES) begin : g_dirty_byteen - wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata; - wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata; + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bs_rdata; + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bs_wdata; for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_bs_wdata - wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]); - assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]); + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j + wire [WORD_SIZE-1:0] word_mask = {WORD_SIZE{(WORD_SIZE == 1) || (word_idx == j)}}; + wire [WORD_SIZE-1:0] wdata = write ? (bs_rdata[i][j] | (write_byteen & word_mask)) : ((fill || flush) ? '0 : bs_rdata[i][j]); + assign bs_wdata[i][j] = init ? '0 : (way_idx[i] ? wdata : bs_rdata[i][j]); + end end + wire bs_read = write || fill || flush; + wire bs_write = init || write || fill || flush; + VX_sp_ram #( .DATAW (LINE_SIZE * NUM_WAYS), .SIZE (`CS_LINES_PER_BANK) ) byteen_store ( .clk (clk), .reset (reset), - .read (write || fill || flush), - .write (init || write || fill || flush), + .read (bs_read && ~stall), + .write (bs_write && ~stall), .wren (1'b1), - .addr (line_sel), + .addr (line_idx), .wdata (bs_wdata), .rdata (bs_rdata) ); - assign dirty_byteen = bs_rdata[way_idx]; + assign dirty_byteen = bs_rdata[way_idx_bin]; end else begin : g_dirty_byteen_0 assign dirty_byteen = '1; end - // order the data layout to perform ways multiplexing last. - // this allows converting way index to binary in parallel with BRAM readaccess and way selection. + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store - wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata; - wire [BYTEENW-1:0] line_wren; + wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata; + wire [BYTEENW-1:0] line_wren; + wire line_write; + wire line_read; - if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin : g_line_wdata - wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w; - for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin : g_i - for (genvar j = 0; j < NUM_WAYS; ++j) begin : g_j - assign line_wdata[i][j] = (fill || !WRITE_ENABLE) ? fill_data[i] : write_data[i]; - assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i]) - & {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}}; + wire way_en = (NUM_WAYS == 1) || way_idx[i]; + + if (WRITE_ENABLE != 0) begin : g_line_data + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_w; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j + wire word_en = (WORD_SIZE == 1) || (word_idx == j); + assign line_wdata[j] = fill ? fill_data[j] : write_data; + assign wren_w[j] = {WORD_SIZE{fill}} | (write_byteen & {WORD_SIZE{word_en}}); end + assign line_wren = wren_w; + assign line_write = (fill || write) && way_en; + if (WRITEBACK) begin : g_line_read_wb + assign line_read = (read || fill || flush); + end else begin : g_line_read_wt + assign line_read = read; + end + end else begin : g_line_data_ro + `UNUSED_VAR (write) + `UNUSED_VAR (write_byteen) + `UNUSED_VAR (write_data) + assign line_wdata = fill_data; + assign line_wren = 1'b1; + assign line_write = fill && way_en; + assign line_read = read; end - assign line_wren = wren_w; - end else begin : g_line_wdata_ro - `UNUSED_VAR (write) - `UNUSED_VAR (write_byteen) - `UNUSED_VAR (write_data) - assign line_wdata = fill_data; - assign line_wren = fill; + + VX_sp_ram #( + .DATAW (`CS_LINE_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .WRENW (BYTEENW), + .NO_RWCHECK (1), + .RW_ASSERT (1) + ) data_store ( + .clk (clk), + .reset (reset), + .read (line_read && ~stall), + .write (line_write && ~stall), + .wren (line_wren), + .addr (line_idx), + .wdata (line_wdata), + .rdata (line_rdata[i]) + ); end - VX_encoder #( - .N (NUM_WAYS) - ) way_enc ( - .data_in (way_sel), - .data_out (way_idx), - `UNUSED_PIN (valid_out) - ); - - wire line_read = (read && ~stall) - || (WRITEBACK && (fill || flush)); - - wire line_write = write || fill; - - VX_sp_ram #( - .DATAW (`CS_LINE_WIDTH * NUM_WAYS), - .SIZE (`CS_LINES_PER_BANK), - .WRENW (BYTEENW), - .NO_RWCHECK (1), - .RW_ASSERT (1) - ) data_store ( - .clk (clk), - .reset (reset), - .read (line_read), - .write (line_write), - .wren (line_wren), - .addr (line_sel), - .wdata (line_wdata), - .rdata (line_rdata) - ); - - wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata; - if (`CS_WORDS_PER_LINE > 1) begin : g_per_way_rdata_wsel - assign per_way_rdata = line_rdata[wsel]; - end else begin : g_per_way_rdata - `UNUSED_VAR (wsel) - assign per_way_rdata = line_rdata; + if (`CS_WORDS_PER_LINE > 1) begin : g_read_data + // order the data layout to perform ways multiplexing last. + // this allows converting way index to binary in parallel with BRAM readaccess and way selection. + wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata; + VX_transpose #( + .DATAW (`CS_WORD_WIDTH), + .N (NUM_WAYS), + .M (`CS_WORDS_PER_LINE) + ) transpose ( + .data_in (line_rdata), + .data_out (transposed_rdata) + ); + assign read_data = transposed_rdata[word_idx][way_idx_bin]; + end else begin : g_read_data_1w + `UNUSED_VAR (word_idx) + assign read_data = line_rdata[way_idx_bin]; end - assign read_data = per_way_rdata[way_idx]; `ifdef DBG_TRACE_CACHE always @(posedge clk) begin if (fill && ~stall) begin - `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)) + `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, fill_data)) end if (flush && ~stall) begin - `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data)) + `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, dirty_byteen, dirty_data)) end if (read && ~stall) begin - `TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid)) + `TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, read_data, req_uuid)) end if (write && ~stall) begin - `TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)) + `TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, write_byteen, write_data, req_uuid)) end end `endif diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 92497b80b..cc8fa8571 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -47,7 +47,7 @@ module VX_cache_tags #( input wire write, input wire lookup, input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, - input wire [NUM_WAYS-1:0] way_sel, + input wire [NUM_WAYS-1:0] way_idx, output wire [NUM_WAYS-1:0] tag_matches, // eviction @@ -62,7 +62,7 @@ module VX_cache_tags #( // valid, dirty, tag localparam TAG_WIDTH = 1 + WRITEBACK + `CS_TAG_SEL_BITS; - wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0]; + wire [`CS_LINE_SEL_BITS-1:0] line_idx = line_addr[`CS_LINE_SEL_BITS-1:0]; wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr); wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag; @@ -80,7 +80,7 @@ module VX_cache_tags #( end end - assign evict_way = fill ? evict_way_r : way_sel; + assign evict_way = fill ? evict_way_r : way_idx; VX_onehot_mux #( .DATAW (`CS_TAG_SEL_BITS), @@ -103,7 +103,7 @@ module VX_cache_tags #( for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store wire do_fill = fill_s && evict_way[i]; - wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode + wire do_flush = flush_s && (!WRITEBACK || way_idx[i]); // flush the whole line in writethrough mode wire do_write = WRITEBACK && write && tag_matches[i]; wire line_read = (WRITEBACK && (fill_s || flush_s)); @@ -130,10 +130,10 @@ module VX_cache_tags #( ) tag_store ( .clk (clk), .reset (reset), - .read (line_read), - .write (line_write), + .read (line_read && ~stall), + .write (line_write && ~stall), .wren (1'b1), - .addr (line_sel), + .addr (line_idx), .wdata (line_wdata), .rdata (line_rdata) ); @@ -146,29 +146,29 @@ module VX_cache_tags #( assign evict_dirty = | (read_dirty & evict_way); `ifdef DBG_TRACE_CACHE - wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel}; + wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_idx}; always @(posedge clk) begin if (fill && ~stall) begin - `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) + `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_idx, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) end if (init) begin - `TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel)) + `TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx)) end if (flush && ~stall) begin - `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty)) + `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_idx, line_idx, evict_dirty)) end if (lookup && ~stall) begin if (tag_matches != 0) begin if (write) begin - `TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) + `TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) end else begin - `TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) + `TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) end end else begin if (write) begin - `TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) + `TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid)) end else begin - `TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) + `TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid)) end end end diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 0b8a1f3c4..d958736c4 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -57,6 +57,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( // core request tag size parameter TAG_WIDTH = UUID_WIDTH + 1, + // core request flags + parameter FLAGS_WIDTH = 0, + // enable bypass for non-cacheable addresses parameter NC_ENABLE = 0, @@ -175,6 +178,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .DIRTY_BYTES (DIRTY_BYTES), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (TAG_WIDTH), + .FLAGS_WIDTH (FLAGS_WIDTH), .CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF), .MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF) ) cache ( diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index a4dead008..2899b55fd 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -69,8 +69,8 @@ module VX_cyclic_arbiter #( .N (LOG_NUM_REQS), .D (NUM_REQS) ) grant_decoder ( - .data_in (grant_index), - .valid_in (1'b1), + .sel_in (grant_index), + .data_in (1'b1), .data_out (grant_onehot_w) ); diff --git a/hw/rtl/libs/VX_decoder.sv b/hw/rtl/libs/VX_decoder.sv index 7c0c760e5..ce2c509e6 100644 --- a/hw/rtl/libs/VX_decoder.sv +++ b/hw/rtl/libs/VX_decoder.sv @@ -18,25 +18,30 @@ `TRACING_OFF module VX_decoder #( - parameter N = 1, + parameter N = 0, parameter M = 1, parameter MODEL = 0, parameter D = 1 << N ) ( - input wire [N-1:0] data_in, - input wire [M-1:0] valid_in, + input wire [`UP(N)-1:0] sel_in, + input wire [M-1:0] data_in, output wire [D-1:0][M-1:0] data_out ); - logic [D-1:0][M-1:0] shift; - if (MODEL == 1) begin : g_model1 - always @(*) begin - shift = '0; - shift[data_in] = {M{1'b1}}; + if (N != 0) begin : g_decoder + logic [D-1:0][M-1:0] shift; + if (MODEL == 1) begin : g_model1 + always @(*) begin + shift = '0; + shift[sel_in] = {M{1'b1}}; + end + end else begin : g_model0 + assign shift = ((D*M)'({M{1'b1}})) << (sel_in * M); end - end else begin : g_model0 - assign shift = ((D*M)'({M{1'b1}})) << (data_in * M); + assign data_out = {D{data_in}} & shift; + end else begin : g_passthru + `UNUSED_VAR (sel_in) + assign data_out = data_in; end - assign data_out = {D{valid_in}} & shift; endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 4ece7cf69..2cae6fead 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -104,8 +104,8 @@ module VX_mem_adapter #( .N (D), .M (SRC_DATA_WIDTH/8) ) req_be_dec ( - .data_in (req_idx), - .valid_in (mem_req_byteen_in), + .sel_in (req_idx), + .data_in (mem_req_byteen_in), .data_out (mem_req_byteen_out_w) ); @@ -113,8 +113,8 @@ module VX_mem_adapter #( .N (D), .M (SRC_DATA_WIDTH) ) req_data_dec ( - .data_in (req_idx), - .valid_in (mem_req_data_in), + .sel_in (req_idx), + .data_in (mem_req_data_in), .data_out (mem_req_data_out_w) ); diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index c27f04da4..760290a1c 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -18,7 +18,7 @@ module VX_mem_coalescer #( parameter `STRING INSTANCE_ID = "", parameter NUM_REQS = 1, parameter ADDR_WIDTH = 32, - parameter FLAGS_WIDTH = 1, + parameter FLAGS_WIDTH = 0, parameter DATA_IN_SIZE = 4, parameter DATA_OUT_SIZE = 64, parameter TAG_WIDTH = 8, @@ -43,7 +43,7 @@ module VX_mem_coalescer #( input wire [NUM_REQS-1:0] in_req_mask, input wire [NUM_REQS-1:0][DATA_IN_SIZE-1:0] in_req_byteen, input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr, - input wire [NUM_REQS-1:0][FLAGS_WIDTH-1:0] in_req_flags, + input wire [NUM_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] in_req_flags, input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data, input wire [TAG_WIDTH-1:0] in_req_tag, output wire in_req_ready, @@ -61,7 +61,7 @@ module VX_mem_coalescer #( output wire [OUT_REQS-1:0] out_req_mask, output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen, output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr, - output wire [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags, + output wire [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] out_req_flags, output wire [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data, output wire [OUT_TAG_WIDTH-1:0] out_req_tag, input wire out_req_ready, @@ -92,7 +92,7 @@ module VX_mem_coalescer #( logic out_req_rw_r, out_req_rw_n; logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n; logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n; - logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags_r, out_req_flags_n; + logic [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] out_req_flags_r, out_req_flags_n; logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n; logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n; logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n; @@ -110,7 +110,7 @@ module VX_mem_coalescer #( logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n; logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n; - logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] seed_flags_r, seed_flags_n; + logic [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] seed_flags_r, seed_flags_n; logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n; logic [NUM_REQS-1:0] req_rem_mask_r, req_rem_mask_n; @@ -139,7 +139,7 @@ module VX_mem_coalescer #( assign addr_base[j] = in_req_addr[DATA_RATIO * i + j][ADDR_WIDTH-1:DATA_RATIO_W]; end - wire [DATA_RATIO-1:0][FLAGS_WIDTH-1:0] req_flags; + wire [DATA_RATIO-1:0][`UP(FLAGS_WIDTH)-1:0] req_flags; for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_req_flags assign req_flags[j] = in_req_flags[DATA_RATIO * i + j]; end @@ -221,7 +221,7 @@ module VX_mem_coalescer #( end VX_pipe_register #( - .DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + FLAGS_WIDTH + OUT_ADDR_WIDTH + FLAGS_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH), + .DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + `UP(FLAGS_WIDTH) + OUT_ADDR_WIDTH + `UP(FLAGS_WIDTH) + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH), .RESETW (1 + NUM_REQS + 1), .INIT_VALUE ({1'b0, {NUM_REQS{1'b1}}, 1'b0}) ) pipe_reg ( @@ -270,7 +270,12 @@ module VX_mem_coalescer #( assign out_req_mask = out_req_mask_r; assign out_req_byteen = out_req_byteen_r; assign out_req_addr = out_req_addr_r; - assign out_req_flags = out_req_flags_r; + if (FLAGS_WIDTH != 0) begin : g_out_req_flags + assign out_req_flags = out_req_flags_r; + end else begin : g_out_req_flags_0 + `UNUSED_VAR (out_req_flags_r) + assign out_req_flags = '0; + end assign out_req_data = out_req_data_r; assign out_req_tag = out_req_tag_r; diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 4ba8bf147..abd68da24 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -21,7 +21,7 @@ module VX_mem_scheduler #( parameter WORD_SIZE = 4, parameter LINE_SIZE = WORD_SIZE, parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE), - parameter FLAGS_WIDTH = 1, + parameter FLAGS_WIDTH = 0, parameter TAG_WIDTH = 8, parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID parameter CORE_QUEUE_SIZE= 8, @@ -50,7 +50,7 @@ module VX_mem_scheduler #( input wire [CORE_REQS-1:0] core_req_mask, input wire [CORE_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, input wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] core_req_addr, - input wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] core_req_flags, + input wire [CORE_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] core_req_flags, input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data, input wire [TAG_WIDTH-1:0] core_req_tag, output wire core_req_ready, @@ -72,7 +72,7 @@ module VX_mem_scheduler #( output wire [MEM_CHANNELS-1:0] mem_req_mask, output wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen, output wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr, - output wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags, + output wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags, output wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data, output wire [MEM_TAG_WIDTH-1:0] mem_req_tag, input wire mem_req_ready, @@ -112,7 +112,7 @@ module VX_mem_scheduler #( wire reqq_rw; wire [CORE_REQS-1:0][WORD_SIZE-1:0] reqq_byteen; wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] reqq_addr; - wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags; + wire [CORE_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] reqq_flags; wire [CORE_REQS-1:0][WORD_WIDTH-1:0] reqq_data; wire [REQQ_TAG_WIDTH-1:0] reqq_tag; wire reqq_ready; @@ -122,7 +122,7 @@ module VX_mem_scheduler #( wire reqq_rw_s; wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s; wire [MERGED_REQS-1:0][MEM_ADDR_WIDTH-1:0] reqq_addr_s; - wire [MERGED_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags_s; + wire [MERGED_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] reqq_flags_s; wire [MERGED_REQS-1:0][LINE_WIDTH-1:0] reqq_data_s; wire [MERGED_TAG_WIDTH-1:0] reqq_tag_s; wire reqq_ready_s; @@ -132,7 +132,7 @@ module VX_mem_scheduler #( wire mem_req_rw_s; wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_s; wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_s; - wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_s; + wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_s; wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_s; wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s; wire mem_req_ready_s; @@ -167,7 +167,7 @@ module VX_mem_scheduler #( end VX_elastic_buffer #( - .DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + FLAGS_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH), + .DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + `UP(FLAGS_WIDTH) + WORD_WIDTH) + REQQ_TAG_WIDTH), .SIZE (CORE_QUEUE_SIZE), .OUT_REG (1) ) req_queue ( @@ -297,7 +297,7 @@ module VX_mem_scheduler #( wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b; - wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_b; + wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b; wire [BATCH_SEL_WIDTH-1:0] req_batch_idx; @@ -385,8 +385,10 @@ module VX_mem_scheduler #( assign reqq_ready_s = req_sent_all; + wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_u; + VX_elastic_buffer #( - .DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + FLAGS_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH), + .DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + `UP(FLAGS_WIDTH) + LINE_WIDTH) + MEM_TAG_WIDTH), .SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) ) mem_req_buf ( @@ -395,11 +397,18 @@ module VX_mem_scheduler #( .valid_in (mem_req_valid_s), .ready_in (mem_req_ready_s), .data_in ({mem_req_mask_s, mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_flags_s, mem_req_data_s, mem_req_tag_s}), - .data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_flags, mem_req_data, mem_req_tag}), + .data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_flags_u, mem_req_data, mem_req_tag}), .valid_out (mem_req_valid), .ready_out (mem_req_ready) ); + if (FLAGS_WIDTH != 0) begin : g_mem_req_flags + assign mem_req_flags = mem_req_flags_u; + end else begin : g_mem_req_flags_0 + `UNUSED_VAR (mem_req_flags_u) + assign mem_req_flags = '0; + end + // Handle memory responses //////////////////////////////////////////////// reg [CORE_QUEUE_SIZE-1:0][CORE_REQS-1:0] rsp_rem_mask; diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index efe9838d6..3ca1f57e5 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -484,8 +484,8 @@ module VX_rr_arbiter #( .N (LOG_NUM_REQS), .D (NUM_REQS) ) grant_decoder ( - .data_in (grant_index), - .valid_in (grant_valid), + .sel_in (grant_index), + .data_in (grant_valid), .data_out (grant_onehot) ); diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index febfd0465..0c4eff2f1 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -68,8 +68,8 @@ module VX_stream_xbar #( .N (OUT_WIDTH), .D (NUM_OUTPUTS) ) sel_in_decoder ( - .data_in (sel_in[i]), - .valid_in (valid_in[i]), + .sel_in (sel_in[i]), + .data_in (valid_in[i]), .data_out (per_output_valid_in[i]) ); assign ready_in[i] = | per_output_ready_in_w[i]; @@ -141,8 +141,8 @@ module VX_stream_xbar #( .N (OUT_WIDTH), .D (NUM_OUTPUTS) ) sel_in_decoder ( - .data_in (sel_in[0]), - .valid_in (valid_in[0]), + .sel_in (sel_in[0]), + .data_in (valid_in[0]), .data_out (valid_out_w) ); From 07ce16e75cc808458cdd7c9ca3037cc161859278 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 5 Oct 2024 17:42:26 -0700 Subject: [PATCH 351/488] minor update --- hw/rtl/cache/VX_cache_bank.sv | 32 +++++++++++++------------------- hw/rtl/cache/VX_cache_data.sv | 21 +++++++++------------ hw/rtl/cache/VX_cache_tags.sv | 25 +++++++++---------------- 3 files changed, 31 insertions(+), 47 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index c1fea14b2..bbf896584 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -351,14 +351,12 @@ module VX_cache_bank #( .req_uuid (req_uuid_st0), - .stall (pipe_stall), - // init/flush/fill/write/lookup .init (do_init_st0), - .flush (do_flush_st0), - .fill (do_fill_st0), - .write (do_cache_wr_st0), - .lookup (do_lookup_st0), + .flush (do_flush_st0 && ~pipe_stall), + .fill (do_fill_st0 && ~pipe_stall), + .write (do_cache_wr_st0 && ~pipe_stall), + .lookup (do_lookup_st0 && ~pipe_stall), .line_addr (addr_st0), .way_idx (flush_way_st0), @@ -458,16 +456,12 @@ module VX_cache_bank #( ) cache_data ( .clk (clk), .reset (reset), - .req_uuid (req_uuid_st1), - - .stall (pipe_stall), - .init (do_init_st1), - .read (do_cache_rd_st1), - .fill (do_fill_st1), - .flush (do_flush_st1), - .write (do_cache_wr_st1), + .fill (do_fill_st1 && ~pipe_stall), + .flush (do_flush_st1 && ~pipe_stall), + .write (do_cache_wr_st1 && ~pipe_stall), + .read (do_cache_rd_st1 && ~pipe_stall), .way_idx (way_idx_st1), .line_addr (addr_st1), .word_idx (word_idx_st1), @@ -481,10 +475,10 @@ module VX_cache_bank #( wire [MSHR_SIZE-1:0] mshr_lookup_pending_st0; wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0; - wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall; + wire mshr_allocate_st0 = valid_st0 && is_creq_st0; wire mshr_lookup_st0 = mshr_allocate_st0; - wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall; + wire mshr_finalize_st1 = valid_st1 && is_creq_st1; // release allocated mshr entry if we had a hit wire mshr_release_st1; @@ -541,7 +535,7 @@ module VX_cache_bank #( .dequeue_ready (replay_ready), // allocate - .allocate_valid (mshr_allocate_st0), + .allocate_valid (mshr_allocate_st0 && ~pipe_stall), .allocate_addr (addr_st0), .allocate_rw (rw_st0), .allocate_data ({word_idx_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}), @@ -550,13 +544,13 @@ module VX_cache_bank #( `UNUSED_PIN (allocate_ready), // lookup - .lookup_valid (mshr_lookup_st0), + .lookup_valid (mshr_lookup_st0 && ~pipe_stall), .lookup_addr (addr_st0), .lookup_pending (mshr_lookup_pending_st0), .lookup_rw (mshr_lookup_rw_st0), // finalize - .finalize_valid (mshr_finalize_st1), + .finalize_valid (mshr_finalize_st1 && ~pipe_stall), .finalize_release(mshr_release_st1), .finalize_pending(mshr_pending_st1), .finalize_id (mshr_id_st1), diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index aa2a1d0ef..641934303 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -42,13 +42,11 @@ module VX_cache_data #( input wire[`UP(UUID_WIDTH)-1:0] req_uuid, `IGNORE_UNUSED_END - input wire stall, - input wire init, - input wire read, input wire fill, input wire flush, input wire write, + input wire read, input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx, input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data, @@ -62,7 +60,6 @@ module VX_cache_data #( `UNUSED_SPARAM (INSTANCE_ID) `UNUSED_PARAM (BANK_ID) `UNUSED_PARAM (WORD_SIZE) - `UNUSED_VAR (stall) `UNUSED_VAR (line_addr) `UNUSED_VAR (init) `UNUSED_VAR (read) @@ -111,8 +108,8 @@ module VX_cache_data #( ) byteen_store ( .clk (clk), .reset (reset), - .read (bs_read && ~stall), - .write (bs_write && ~stall), + .read (bs_read), + .write (bs_write), .wren (1'b1), .addr (line_idx), .wdata (bs_wdata), @@ -166,8 +163,8 @@ module VX_cache_data #( ) data_store ( .clk (clk), .reset (reset), - .read (line_read && ~stall), - .write (line_write && ~stall), + .read (line_read), + .write (line_write), .wren (line_wren), .addr (line_idx), .wdata (line_wdata), @@ -195,16 +192,16 @@ module VX_cache_data #( `ifdef DBG_TRACE_CACHE always @(posedge clk) begin - if (fill && ~stall) begin + if (fill) begin `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, fill_data)) end - if (flush && ~stall) begin + if (flush) begin `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, dirty_byteen, dirty_data)) end - if (read && ~stall) begin + if (read) begin `TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, read_data, req_uuid)) end - if (write && ~stall) begin + if (write) begin `TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, write_byteen, write_data, req_uuid)) end end diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index cc8fa8571..354a57b0b 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -38,8 +38,6 @@ module VX_cache_tags #( input wire [`UP(UUID_WIDTH)-1:0] req_uuid, `IGNORE_UNUSED_END - input wire stall, - // init/fill/lookup input wire init, input wire flush, @@ -75,7 +73,7 @@ module VX_cache_tags #( always @(posedge clk) begin if (reset) begin evict_way_r <= 1; - end else if (~stall) begin // holding the value on stalls prevents filling different slots twice + end else if (lookup) begin evict_way_r <= {evict_way_r[NUM_WAYS-2:0], evict_way_r[NUM_WAYS-1]}; end end @@ -91,22 +89,17 @@ module VX_cache_tags #( .data_out (evict_tag) ); end else begin : g_evict_way_0 - `UNUSED_VAR (stall) assign evict_way = 1'b1; assign evict_tag = read_tag; end - // fill and flush need to also read in writeback mode - wire fill_s = fill && (!WRITEBACK || ~stall); - wire flush_s = flush && (!WRITEBACK || ~stall); - for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store - wire do_fill = fill_s && evict_way[i]; - wire do_flush = flush_s && (!WRITEBACK || way_idx[i]); // flush the whole line in writethrough mode + wire do_fill = fill && evict_way[i]; + wire do_flush = flush && (!WRITEBACK || way_idx[i]); // flush the whole line in writethrough mode wire do_write = WRITEBACK && write && tag_matches[i]; - wire line_read = (WRITEBACK && (fill_s || flush_s)); + wire line_read = (WRITEBACK && (fill || flush)); wire line_write = init || do_fill || do_flush || do_write; wire line_valid = ~(init || flush); @@ -130,8 +123,8 @@ module VX_cache_tags #( ) tag_store ( .clk (clk), .reset (reset), - .read (line_read && ~stall), - .write (line_write && ~stall), + .read (line_read), + .write (line_write), .wren (1'b1), .addr (line_idx), .wdata (line_wdata), @@ -148,16 +141,16 @@ module VX_cache_tags #( `ifdef DBG_TRACE_CACHE wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_idx}; always @(posedge clk) begin - if (fill && ~stall) begin + if (fill) begin `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_idx, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) end if (init) begin `TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx)) end - if (flush && ~stall) begin + if (flush) begin `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_idx, line_idx, evict_dirty)) end - if (lookup && ~stall) begin + if (lookup) begin if (tag_matches != 0) begin if (write) begin `TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) From c91f9684fcd5e8e876143b7da9028456fa7692a8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 5 Oct 2024 18:35:26 -0700 Subject: [PATCH 352/488] minor update --- hw/rtl/cache/VX_cache_bank.sv | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index bbf896584..cbc8d30b4 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -491,13 +491,15 @@ module VX_cache_bank #( assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1); end + wire mshr_dequeue = mshr_finalize_st1 && mshr_release_st1 && ~pipe_stall; + VX_pending_size #( .SIZE (MSHR_SIZE) ) mshr_pending_size ( .clk (clk), .reset (reset), .incr (core_req_fire), - .decr (replay_fire || (mshr_finalize_st1 && mshr_release_st1)), + .decr (replay_fire || mshr_dequeue), .empty (mshr_empty), `UNUSED_PIN (alm_empty), .full (mshr_alm_full), From ee96d4334b19397836c6ba1fa88ac1a540db5aad Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 8 Oct 2024 23:01:01 -0700 Subject: [PATCH 353/488] VX_onehot_encoder update --- hw/rtl/VX_platform.vh | 11 ++++++++--- hw/rtl/afu/opae/vortex_afu.sv | 2 +- hw/rtl/cache/VX_cache_data.sv | 2 +- hw/rtl/cache/VX_cache_mshr.sv | 5 +++-- hw/rtl/libs/VX_matrix_arbiter.sv | 2 +- .../libs/{VX_encoder.sv => VX_onehot_encoder.sv} | 4 ++-- hw/rtl/libs/VX_pending_size.sv | 14 ++++++++------ hw/rtl/libs/VX_rr_arbiter.sv | 2 +- 8 files changed, 25 insertions(+), 17 deletions(-) rename hw/rtl/libs/{VX_encoder.sv => VX_onehot_encoder.sv} (97%) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 3e9042737..4f78fee24 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -37,9 +37,11 @@ endgenerate `define ASSERT(cond, msg) \ assert(cond) else $error msg -`define RUNTIME_ASSERT(cond, msg) \ - always @(posedge clk) begin \ - assert(cond) else $error msg; \ +`define RUNTIME_ASSERT(cond, msg) \ + always @(posedge clk) begin \ + if (!reset) begin \ + `ASSERT(cond, msg); \ + end \ end `define __SCOPE @@ -172,6 +174,7 @@ endgenerate `ifdef QUARTUS `define MAX_FANOUT 8 `define IF_DATA_SIZE(x) $bits(x.data) +`define USE_BLOCK_BRAM (* ramstyle = "block" *) `define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *) `define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *) `define DISABLE_BRAM (* ramstyle = "logic" *) @@ -180,6 +183,7 @@ endgenerate `elsif VIVADO `define MAX_FANOUT 8 `define IF_DATA_SIZE(x) $bits(x.data) +`define USE_BLOCK_BRAM (* ram_style = "block" *) `define USE_FAST_BRAM (* ram_style = "distributed" *) `define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *) `define DISABLE_BRAM (* ram_style = "registers" *) @@ -188,6 +192,7 @@ endgenerate `else `define MAX_FANOUT 8 `define IF_DATA_SIZE(x) x.DATA_WIDTH +`define USE_BLOCK_BRAM `define USE_FAST_BRAM `define NO_RW_RAM_CHECK `define DISABLE_BRAM diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 7e0bcfaed..f21f851c0 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -968,7 +968,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire [COUT_TID_WIDTH-1:0] cout_tid; - VX_encoder #( + VX_onehot_encoder #( .N (`VX_MEM_BYTEEN_WIDTH) ) cout_tid_enc ( .data_in (vx_mem_req_byteen), diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 641934303..d749cdcd4 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -73,7 +73,7 @@ module VX_cache_data #( assign line_idx = line_addr[`CS_LINE_SEL_BITS-1:0]; - VX_encoder #( + VX_onehot_encoder #( .N (NUM_WAYS) ) way_idx_enc ( .data_in (way_idx), diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index 482c110dc..4e86f25c7 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -148,11 +148,12 @@ module VX_cache_mshr #( .valid_out (allocate_rdy_n) ); - VX_encoder #( + VX_priority_encoder #( .N (MSHR_SIZE) ) prev_sel ( .data_in (addr_matches & ~next_table_x), - .data_out (prev_idx), + .index_out (prev_idx), + `UNUSED_PIN (onehot_out), `UNUSED_PIN (valid_out) ); diff --git a/hw/rtl/libs/VX_matrix_arbiter.sv b/hw/rtl/libs/VX_matrix_arbiter.sv index 2840ef43e..b6b88e47a 100644 --- a/hw/rtl/libs/VX_matrix_arbiter.sv +++ b/hw/rtl/libs/VX_matrix_arbiter.sv @@ -72,7 +72,7 @@ module VX_matrix_arbiter #( assign grant_onehot = grant; - VX_encoder #( + VX_onehot_encoder #( .N (NUM_REQS) ) encoder ( .data_in (grant_onehot), diff --git a/hw/rtl/libs/VX_encoder.sv b/hw/rtl/libs/VX_onehot_encoder.sv similarity index 97% rename from hw/rtl/libs/VX_encoder.sv rename to hw/rtl/libs/VX_onehot_encoder.sv index 86ccad792..08198e430 100644 --- a/hw/rtl/libs/VX_encoder.sv +++ b/hw/rtl/libs/VX_onehot_encoder.sv @@ -13,11 +13,11 @@ `include "VX_platform.vh" -// Fast encoder using parallel prefix computation +// Fast one-hot encoder using parallel prefix computation // Adapted from BaseJump STL: http://bjump.org/data_out.html `TRACING_OFF -module VX_encoder #( +module VX_onehot_encoder #( parameter N = 1, parameter REVERSE = 0, parameter MODEL = 1, diff --git a/hw/rtl/libs/VX_pending_size.sv b/hw/rtl/libs/VX_pending_size.sv index 1e72cef19..b94889e6e 100644 --- a/hw/rtl/libs/VX_pending_size.sv +++ b/hw/rtl/libs/VX_pending_size.sv @@ -66,11 +66,13 @@ module VX_pending_size #( if (INCRW != 1 || DECRW != 1) begin : g_wide_step - localparam SUBW = `MIN(SIZEW, `MAX(INCRW, DECRW)+1); + localparam DELTAW = `MIN(SIZEW, `MAX(INCRW, DECRW)+1); logic [SIZEW-1:0] size_n, size_r; - assign size_n = $signed(size_r) + SIZEW'($signed(SUBW'(incr) - SUBW'(decr))); + wire [DELTAW-1:0] delta = DELTAW'(incr) - DELTAW'(decr); + + assign size_n = $signed(size_r) + SIZEW'($signed(delta)); always @(posedge clk) begin if (reset) begin @@ -80,8 +82,8 @@ module VX_pending_size #( alm_full_r <= 0; size_r <= '0; end else begin - `ASSERT((SIZEW'(incr) >= SIZEW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow")); - `ASSERT((SIZEW'(incr) <= SIZEW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow")); + `ASSERT((DELTAW'(incr) <= DELTAW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow")); + `ASSERT((DELTAW'(incr) >= DELTAW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow")); empty_r <= (size_n == SIZEW'(0)); full_r <= (size_n == SIZEW'(SIZE)); alm_empty_r <= (size_n <= SIZEW'(ALM_EMPTY)); @@ -129,7 +131,7 @@ module VX_pending_size #( wire is_empty_n = (used_r == ADDRW'(1)); wire is_full_n = (used_r == ADDRW'(SIZE-1)); - wire [1:0] push_minus_pop = {~incr & decr, incr ^ decr}; + wire [1:0] delta = {~incr & decr, incr ^ decr}; always @(posedge clk) begin if (reset) begin @@ -148,7 +150,7 @@ module VX_pending_size #( if (is_empty_n) empty_r <= 1; end - used_r <= $signed(used_r) + ADDRW'($signed(push_minus_pop)); + used_r <= $signed(used_r) + ADDRW'($signed(delta)); end end diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 3ca1f57e5..f5304b023 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -448,7 +448,7 @@ module VX_rr_arbiter #( end end - VX_encoder #( + VX_onehot_encoder #( .N (NUM_REQS) ) onehot_encoder ( .data_in (grant_onehot), From f49084b2987b011ea8f4527546c86e8ffc7630fd Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 8 Oct 2024 23:44:36 -0700 Subject: [PATCH 354/488] improving block rams inference with registered read address. --- hw/rtl/libs/VX_dp_ram.sv | 159 +++++++++++++++++++++++++++++---------- hw/rtl/libs/VX_sp_ram.sv | 2 + 2 files changed, 121 insertions(+), 40 deletions(-) diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 21ab03ad5..c27827552 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -19,6 +19,7 @@ module VX_dp_ram #( parameter SIZE = 1, parameter WRENW = 1, parameter OUT_REG = 0, + parameter RADDR_REG = 0, parameter LUTRAM = 0, parameter NO_RWCHECK = 0, parameter RW_ASSERT = 0, @@ -57,8 +58,7 @@ module VX_dp_ram #( `UNUSED_PARAM (RW_ASSERT) `UNUSED_VAR (read) - - `RUNTIME_ASSERT((((WRENW == 1) ) || ~write) || (| wren), ("%t: invalid write enable mask", $time)) + `UNUSED_VAR (wren) if (OUT_REG && !READ_ENABLE) begin : g_out_reg `UNUSED_PARAM (NO_RWCHECK) @@ -78,7 +78,7 @@ module VX_dp_ram #( end end if (RESET_OUT && reset) begin - rdata_r <= '0; + rdata_r <= INIT_VALUE; end else begin rdata_r <= ram[raddr]; end @@ -96,7 +96,7 @@ module VX_dp_ram #( end end if (RESET_OUT && reset) begin - rdata_r <= '0; + rdata_r <= INIT_VALUE; end else begin rdata_r <= ram[raddr]; end @@ -104,7 +104,7 @@ module VX_dp_ram #( end end `else - // default synthesis + // Not Quartus if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION @@ -117,7 +117,7 @@ module VX_dp_ram #( end end if (RESET_OUT && reset) begin - rdata_r <= '0; + rdata_r <= INIT_VALUE; end else begin rdata_r <= ram[raddr]; end @@ -135,7 +135,7 @@ module VX_dp_ram #( end end if (RESET_OUT && reset) begin - rdata_r <= '0; + rdata_r <= INIT_VALUE; end else begin rdata_r <= ram[raddr]; end @@ -152,7 +152,7 @@ module VX_dp_ram #( if (write) ram[waddr] <= wdata; if (RESET_OUT && reset) begin - rdata_r <= '0; + rdata_r <= INIT_VALUE; end else begin rdata_r <= ram[raddr]; end @@ -167,7 +167,7 @@ module VX_dp_ram #( if (write) ram[waddr] <= wdata; if (RESET_OUT && reset) begin - rdata_r <= '0; + rdata_r <= INIT_VALUE; end else begin rdata_r <= ram[raddr]; end @@ -179,6 +179,7 @@ module VX_dp_ram #( end else begin : g_no_out_reg // OUT_REG==0 || READ_ENABLE=1 wire [DATAW-1:0] rdata_w; + reg [ADDRW-1:0] raddr_reg; `ifdef SYNTHESIS if (WRENW > 1) begin : g_writeen `ifdef QUARTUS @@ -192,8 +193,16 @@ module VX_dp_ram #( ram[waddr][i] <= wdata[i * WSELW +: WSELW]; end end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end else begin : g_no_lutram if (NO_RWCHECK != 0) begin : g_no_rwcheck `NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; @@ -205,8 +214,16 @@ module VX_dp_ram #( ram[waddr][i] <= wdata[i * WSELW +: WSELW]; end end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end else begin : g_rwcheck reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION @@ -217,8 +234,16 @@ module VX_dp_ram #( ram[waddr][i] <= wdata[i * WSELW +: WSELW]; end end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end end `else @@ -233,8 +258,16 @@ module VX_dp_ram #( ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; end end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end else begin : g_no_lutram if (NO_RWCHECK != 0) begin : g_no_rwcheck `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; @@ -246,8 +279,16 @@ module VX_dp_ram #( ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; end end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end else begin : g_rwcheck reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION @@ -258,8 +299,16 @@ module VX_dp_ram #( ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; end end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end end `endif @@ -272,8 +321,16 @@ module VX_dp_ram #( if (write) begin ram[waddr] <= wdata; end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end else begin : g_no_lutram if (NO_RWCHECK != 0) begin : g_no_rwcheck `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; @@ -282,8 +339,16 @@ module VX_dp_ram #( if (write) begin ram[waddr] <= wdata; end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end else begin : g_rwcheck reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION @@ -291,8 +356,16 @@ module VX_dp_ram #( if (write) begin ram[waddr] <= wdata; end + if (read) begin + raddr_reg <= raddr; + end + end + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + assign rdata_w = ram[raddr]; + `UNUSED_VAR (raddr_reg) end - assign rdata_w = ram[raddr]; end end end @@ -316,39 +389,46 @@ module VX_dp_ram #( ram[waddr] <= ram_n; end end + if (read) begin + raddr_reg <= raddr; + end end - if (!LUTRAM && NO_RWCHECK) begin : g_rdata_no_bypass - reg [DATAW-1:0] prev_data; - reg [ADDRW-1:0] prev_waddr; - reg prev_write; + if (RADDR_REG != 0) begin : g_rdata_async + assign rdata_w = ram[raddr_reg]; + end else begin : g_rdata_sync + `UNUSED_VAR (raddr_reg) + if (!LUTRAM && NO_RWCHECK) begin : g_rdata_no_bypass + reg [DATAW-1:0] prev_data; + reg [ADDRW-1:0] prev_waddr; + reg prev_write; - always @(posedge clk) begin - if (reset) begin - prev_write <= 0; - prev_data <= '0; - prev_waddr <= '0; - end else begin - prev_write <= write; - prev_data <= ram[waddr]; - prev_waddr <= waddr; + always @(posedge clk) begin + if (reset) begin + prev_write <= 0; + prev_data <= '0; + prev_waddr <= '0; + end else begin + prev_write <= write; + prev_data <= ram[waddr]; + prev_waddr <= waddr; + end end - end - assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; - if (RW_ASSERT) begin : g_rw_assert - `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)) + assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; + if (RW_ASSERT) begin : g_rw_assert + `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)) + end + end else begin : g_rdata_with_bypass + assign rdata_w = ram[raddr]; end - end else begin : g_rdata_with_bypass - assign rdata_w = ram[raddr]; end `endif - if (OUT_REG != 0) begin : g_rdata_req reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (READ_ENABLE && reset) begin - rdata_r <= '0; + rdata_r <= INIT_VALUE; end else if (!READ_ENABLE || read) begin rdata_r <= rdata_w; end @@ -357,7 +437,6 @@ module VX_dp_ram #( end else begin : g_rdata_comb assign rdata = rdata_w; end - end endmodule diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index efce4b5f2..7974cb679 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -26,6 +26,7 @@ module VX_sp_ram #( parameter RESET_OUT = 0, parameter READ_ENABLE = 0, parameter INIT_ENABLE = 0, + parameter RADDR_REG = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, parameter ADDRW = `LOG2UP(SIZE) @@ -44,6 +45,7 @@ module VX_sp_ram #( .SIZE (SIZE), .WRENW (WRENW), .OUT_REG (OUT_REG), + .RADDR_REG (RADDR_REG), .LUTRAM (LUTRAM), .NO_RWCHECK (NO_RWCHECK), .RW_ASSERT (RW_ASSERT), From a5381fd78867525209fab3de5952f326ace6def5 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 9 Oct 2024 04:14:15 -0700 Subject: [PATCH 355/488] async bram optimization --- hw/rtl/cache/VX_cache_bank.sv | 50 ++++-------- hw/rtl/cache/VX_cache_data.sv | 8 +- hw/rtl/cache/VX_cache_mshr.sv | 134 +++++++++++++++------------------ hw/rtl/cache/VX_cache_tags.sv | 14 ++-- hw/rtl/core/VX_fetch.sv | 5 +- hw/rtl/core/VX_ipdom_stack.sv | 28 ++++--- hw/rtl/core/VX_split_join.sv | 3 +- hw/rtl/libs/VX_fifo_queue.sv | 47 +++++++----- hw/rtl/libs/VX_index_buffer.sv | 2 +- 9 files changed, 135 insertions(+), 156 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index cbc8d30b4..6a7fcaf52 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -167,7 +167,6 @@ module VX_cache_bank #( wire [NUM_WAYS-1:0] way_idx_st0, way_idx_st1; wire [NUM_WAYS-1:0] tag_matches_st0; wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; - wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1; wire mshr_pending_st0, mshr_pending_st1; wire mshr_empty; @@ -380,14 +379,14 @@ module VX_cache_bank #( assign line_tag2_st0 = (is_fill_st0 || is_flush2_st0) ? evict_tag_st0 : line_tag_st0; VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, rw_st0, flags_st0, line_tag2_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_idx_st0, evict_dirty_st0, mshr_pending_st0}), - .data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, rw_st1, flags_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_idx_st1, evict_dirty_st1, mshr_pending_st1}) + .data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, rw_st0, flags_st0, line_tag2_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, way_idx_st0, evict_dirty_st0, mshr_pending_st0}), + .data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, rw_st1, flags_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, way_idx_st1, evict_dirty_st1, mshr_pending_st1}) ); // we have a tag hit @@ -473,25 +472,20 @@ module VX_cache_bank #( .dirty_byteen(dirty_byteen_st1) ); - wire [MSHR_SIZE-1:0] mshr_lookup_pending_st0; - wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0; wire mshr_allocate_st0 = valid_st0 && is_creq_st0; - wire mshr_lookup_st0 = mshr_allocate_st0; - - wire mshr_finalize_st1 = valid_st1 && is_creq_st1; // release allocated mshr entry if we had a hit wire mshr_release_st1; if (WRITEBACK) begin : g_mshr_release - assign mshr_release_st1 = is_hit_st1; + assign mshr_release_st1 = valid_st1 && is_creq_st1 && is_hit_st1; end else begin : g_mshr_release_ro // we need to keep missed write requests in MSHR if there is already a pending entry to the same address // this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content // this can happen when writes are sent late, when the fill was already in flight. - assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1); + assign mshr_release_st1 = valid_st1 && is_creq_st1 && (is_hit_st1 || (rw_st1 && ~mshr_pending_st1)); end - wire mshr_dequeue = mshr_finalize_st1 && mshr_release_st1 && ~pipe_stall; + wire mshr_dequeue = mshr_release_st1 && ~pipe_stall; VX_pending_size #( .SIZE (MSHR_SIZE) @@ -513,6 +507,8 @@ module VX_cache_bank #( .LINE_SIZE (LINE_SIZE), .NUM_BANKS (NUM_BANKS), .MSHR_SIZE (MSHR_SIZE), + .WRITEBACK (WRITEBACK), + .RDW_STALL (1), .UUID_WIDTH (UUID_WIDTH), .DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH) ) cache_mshr ( @@ -520,8 +516,8 @@ module VX_cache_bank #( .reset (reset), .deq_req_uuid (req_uuid_sel), - .lkp_req_uuid (req_uuid_st0), - .fin_req_uuid (req_uuid_st1), + .alc_req_uuid (req_uuid_st0), + .rel_req_uuid (req_uuid_st1), // memory fill .fill_valid (mem_rsp_fire), @@ -542,32 +538,14 @@ module VX_cache_bank #( .allocate_rw (rw_st0), .allocate_data ({word_idx_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}), .allocate_id (mshr_alloc_id_st0), - .allocate_prev (mshr_prev_st0), + .allocate_pending(mshr_pending_st0), `UNUSED_PIN (allocate_ready), - // lookup - .lookup_valid (mshr_lookup_st0 && ~pipe_stall), - .lookup_addr (addr_st0), - .lookup_pending (mshr_lookup_pending_st0), - .lookup_rw (mshr_lookup_rw_st0), - - // finalize - .finalize_valid (mshr_finalize_st1 && ~pipe_stall), - .finalize_release(mshr_release_st1), - .finalize_pending(mshr_pending_st1), - .finalize_id (mshr_id_st1), - .finalize_prev (mshr_prev_st1) + // release + .release_valid (mshr_release_st1 && ~pipe_stall), + .release_id (mshr_id_st1) ); - // check if there are pending requests to same line in the MSHR - wire [MSHR_SIZE-1:0] lookup_matches; - for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_lookup_matches - assign lookup_matches[i] = mshr_lookup_pending_st0[i] - && (i != mshr_id_st0) // exclude current mshr id - && (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough - end - assign mshr_pending_st0 = (| lookup_matches); - // schedule core response wire crsp_queue_valid, crsp_queue_ready; diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index d749cdcd4..54a78e357 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -193,16 +193,16 @@ module VX_cache_data #( `ifdef DBG_TRACE_CACHE always @(posedge clk) begin if (fill) begin - `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, fill_data)) + `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, line=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, fill_data)) end if (flush) begin - `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, dirty_byteen, dirty_data)) + `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, line=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, dirty_byteen, dirty_data)) end if (read) begin - `TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, read_data, req_uuid)) + `TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, read_data, req_uuid)) end if (write) begin - `TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, write_byteen, write_data, req_uuid)) + `TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, write_byteen, write_data, req_uuid)) end end `endif diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index 4e86f25c7..ff3ead64f 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -24,36 +24,22 @@ // arrival and are dequeued in the same order. // Each entry has a next pointer to the next entry pending for the same cache line. // -// During the fill operation, the MSHR will release the MSHR entry at fill_id +// During the fill request, the MSHR will release the MSHR entry at fill_id // which represents the first request in the pending list that initiated the memory fill. // -// The dequeue operation directly follows the fill operation and will release +// The dequeue response directly follows the fill request and will release // all the subsequent entries linked to fill_id (pending the same cache line). // -// During the allocation operation, the MSHR will allocate the next free slot +// During the allocation request, the MSHR will allocate the next free slot // for the incoming core request. We return the allocated slot id as well as // the slot id of the previous entry for the same cache line. This is used to -// link the new entry to the pending list during finalization. +// link the new entry to the pending list. // -// The lookup operation is used to find all pending entries for a given cache line. -// This is used to by the cache bank to determine if a cache miss is already pending -// and therefore avoid issuing a memory fill request. -// -// The finalize operation is used to release the allocated MSHR entry if we had a hit. -// If we had a miss and finalize_pending is true, we link the allocated entry to -// its corresponding pending list (via finalize_prev). +// The release request is used to invalidate the allocated MSHR entry if we had a cache hit. // // Warning: This MSHR implementation is strongly coupled with the bank pipeline // and as such changes to either module requires careful evaluation. // -// This architecture implements three pipeline stages: -// - Arbitration: cache bank arbitration before entering pipeline. -// fill and dequeue operations are executed at this stage. -// - stage 0: cache bank tag access stage. -// allocate and lookup operations are executed at this stage. -// - stage 1: cache bank tdatag access stage. -// finalize operation is executed at this stage. -// module VX_cache_mshr #( parameter `STRING INSTANCE_ID= "", @@ -68,6 +54,11 @@ module VX_cache_mshr #( parameter UUID_WIDTH = 0, // MSHR parameters parameter DATA_WIDTH = 1, + // Enable cache writeback + parameter WRITEBACK = 0, + // Cache stall on read during write + RDW_STALL = 0, + parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE) ) ( input wire clk, @@ -75,8 +66,8 @@ module VX_cache_mshr #( `IGNORE_UNUSED_BEGIN input wire[`UP(UUID_WIDTH)-1:0] deq_req_uuid, - input wire[`UP(UUID_WIDTH)-1:0] lkp_req_uuid, - input wire[`UP(UUID_WIDTH)-1:0] fin_req_uuid, + input wire[`UP(UUID_WIDTH)-1:0] alc_req_uuid, + input wire[`UP(UUID_WIDTH)-1:0] rel_req_uuid, `IGNORE_UNUSED_END // memory fill @@ -98,21 +89,12 @@ module VX_cache_mshr #( input wire allocate_rw, input wire [DATA_WIDTH-1:0] allocate_data, output wire [MSHR_ADDR_WIDTH-1:0] allocate_id, - output wire [MSHR_ADDR_WIDTH-1:0] allocate_prev, + output wire allocate_pending, output wire allocate_ready, - // lookup - input wire lookup_valid, - input wire [`CS_LINE_ADDR_WIDTH-1:0] lookup_addr, - output wire [MSHR_SIZE-1:0] lookup_pending, - output wire [MSHR_SIZE-1:0] lookup_rw, - - // finalize - input wire finalize_valid, - input wire finalize_release, - input wire finalize_pending, - input wire [MSHR_ADDR_WIDTH-1:0] finalize_id, - input wire [MSHR_ADDR_WIDTH-1:0] finalize_prev + // release + input wire release_valid, + input wire [MSHR_ADDR_WIDTH-1:0] release_id ); `UNUSED_PARAM (BANK_ID) @@ -130,13 +112,15 @@ module VX_cache_mshr #( reg [MSHR_ADDR_WIDTH-1:0] dequeue_id_r, dequeue_id_n; wire [MSHR_ADDR_WIDTH-1:0] prev_idx; + reg [MSHR_ADDR_WIDTH-1:0] post_alloc_id, post_alloc_previd; + reg post_alloc_val; wire allocate_fire = allocate_valid && allocate_ready; wire dequeue_fire = dequeue_valid && dequeue_ready; wire [MSHR_SIZE-1:0] addr_matches; for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_addr_matches - assign addr_matches[i] = valid_table[i] && (addr_table[i] == lookup_addr); + assign addr_matches[i] = valid_table[i] && (addr_table[i] == allocate_addr); end VX_lzc #( @@ -148,6 +132,7 @@ module VX_cache_mshr #( .valid_out (allocate_rdy_n) ); + // find matching tail-entry VX_priority_encoder #( .N (MSHR_SIZE) ) prev_sel ( @@ -172,18 +157,19 @@ module VX_cache_mshr #( valid_table_n[dequeue_id] = 0; if (next_table[dequeue_id]) begin dequeue_id_n = next_index[dequeue_id]; + end else if (!RDW_STALL && post_alloc_val && (post_alloc_previd == dequeue_id)) begin + dequeue_id_n = post_alloc_id; end else begin dequeue_val_n = 0; end end - if (finalize_valid) begin - if (finalize_release) begin - valid_table_n[finalize_id] = 0; - end - if (finalize_pending) begin - next_table_x[finalize_prev] = 1; - end + if (release_valid) begin + valid_table_n[release_id] = 0; + end + + if (post_alloc_val) begin + next_table_x[post_alloc_previd] = 1; end next_table_n = next_table_x; @@ -198,39 +184,43 @@ module VX_cache_mshr #( valid_table <= '0; allocate_rdy <= 0; dequeue_val <= 0; + post_alloc_val <= 0; end else begin valid_table <= valid_table_n; allocate_rdy <= allocate_rdy_n; dequeue_val <= dequeue_val_n; + post_alloc_val <= allocate_fire && allocate_pending; end if (allocate_fire) begin - addr_table[allocate_id] <= allocate_addr; + addr_table[allocate_id] <= allocate_addr; write_table[allocate_id] <= allocate_rw; end - if (finalize_valid && finalize_pending) begin - next_index[finalize_prev] <= finalize_id; + if (post_alloc_val) begin + next_index[post_alloc_previd] <= post_alloc_id; end dequeue_id_r <= dequeue_id_n; allocate_id_r <= allocate_id_n; next_table <= next_table_n; + post_alloc_id <= allocate_id; + post_alloc_previd <= prev_idx; end `RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, lkp_req_uuid)) + `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, alc_req_uuid)) - `RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid)) + `RUNTIME_ASSERT((~release_valid || valid_table[release_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_table[release_id], BANK_ID), release_id, rel_req_uuid)) `RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id)) VX_dp_ram #( - .DATAW (DATA_WIDTH), - .SIZE (MSHR_SIZE), - .LUTRAM (1) + .DATAW (DATA_WIDTH), + .SIZE (MSHR_SIZE), + .RADDR_REG (1) ) entries ( .clk (clk), .reset (reset), @@ -239,7 +229,7 @@ module VX_cache_mshr #( .wren (1'b1), .waddr (allocate_id_r), .wdata (allocate_data), - .raddr (dequeue_id_r), + .raddr (dequeue_id_n), .rdata (dequeue_data) ); @@ -247,18 +237,17 @@ module VX_cache_mshr #( assign allocate_ready = allocate_rdy; assign allocate_id = allocate_id_r; - assign allocate_prev = prev_idx; + if (WRITEBACK) begin : g_pending_wb + assign allocate_pending = |addr_matches; + end else begin : g_pending_wt + // exclude write requests if writethrough + assign allocate_pending = |(addr_matches & ~write_table); + end - assign dequeue_valid = dequeue_val; - assign dequeue_addr = addr_table[dequeue_id_r]; - assign dequeue_rw = write_table[dequeue_id_r]; - assign dequeue_id = dequeue_id_r; - - // return pending entries for the given cache line - assign lookup_pending = addr_matches; - assign lookup_rw = write_table; - - `UNUSED_VAR (lookup_valid) + assign dequeue_valid = dequeue_val; + assign dequeue_addr = addr_table[dequeue_id_r]; + assign dequeue_rw = write_table[dequeue_id_r]; + assign dequeue_id = dequeue_id_r; `ifdef DBG_TRACE_CACHE reg show_table; @@ -266,23 +255,18 @@ module VX_cache_mshr #( if (reset) begin show_table <= 0; end else begin - show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire; + show_table <= allocate_fire || post_alloc_val || release_valid || fill_valid || dequeue_fire; end if (allocate_fire) begin - `TRACE(3, ("%t: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid)) + `TRACE(3, ("%t: %s allocate: addr=0x%0h, id=%0d, pending=%b, prev=%0d (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id, allocate_pending, prev_idx, alc_req_uuid)) end - if (lookup_valid) begin - `TRACE(3, ("%t: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid)) - end - if (finalize_valid) begin - `TRACE(3, ("%t: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, - finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid)) + if (release_valid) begin + `TRACE(3, ("%t: %s release: id=%0d (#%0d)\n", $time, INSTANCE_ID, release_id, rel_req_uuid)) end if (fill_valid) begin - `TRACE(3, ("%t: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id)) + `TRACE(3, ("%t: %s fill: addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id)) end if (dequeue_fire) begin `TRACE(3, ("%t: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 354a57b0b..678f7af76 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -142,26 +142,26 @@ module VX_cache_tags #( wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_idx}; always @(posedge clk) begin if (fill) begin - `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_idx, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) + `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_idx, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) end if (init) begin - `TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx)) + `TRACE(3, ("%t: %s init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx)) end if (flush) begin - `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_idx, line_idx, evict_dirty)) + `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, line=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_idx, line_idx, evict_dirty)) end if (lookup) begin if (tag_matches != 0) begin if (write) begin - `TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) + `TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) end else begin - `TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) + `TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) end end else begin if (write) begin - `TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid)) + `TRACE(3, ("%t: %s write-miss: addr=0x%0h, line=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid)) end else begin - `TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid)) + `TRACE(3, ("%t: %s read-miss: addr=0x%0h, line=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid)) end end end diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index cf862aa06..eb1f3d761 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -51,9 +51,8 @@ module VX_fetch import VX_gpu_pkg::*; #( wire [`NUM_THREADS-1:0] rsp_tmask; VX_dp_ram #( - .DATAW (`PC_BITS + `NUM_THREADS), - .SIZE (`NUM_WARPS), - .LUTRAM (1) + .DATAW (`PC_BITS + `NUM_THREADS), + .SIZE (`NUM_WARPS) ) tag_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_ipdom_stack.sv b/hw/rtl/core/VX_ipdom_stack.sv index ded232f30..04efd91d3 100644 --- a/hw/rtl/core/VX_ipdom_stack.sv +++ b/hw/rtl/core/VX_ipdom_stack.sv @@ -16,7 +16,6 @@ module VX_ipdom_stack #( parameter WIDTH = 1, parameter DEPTH = 1, - parameter OUT_REG = 0, parameter ADDRW = `LOG2UP(DEPTH) ) ( input wire clk, @@ -33,7 +32,7 @@ module VX_ipdom_stack #( ); reg slot_set [DEPTH-1:0]; - reg [ADDRW-1:0] rd_ptr, wr_ptr; + reg [ADDRW-1:0] rd_ptr, rd_ptr_n, wr_ptr; reg empty_r, full_r; @@ -41,35 +40,42 @@ module VX_ipdom_stack #( wire d_set_n = slot_set[rd_ptr]; + always @(*) begin + rd_ptr_n = rd_ptr; + if (push) begin + rd_ptr_n = wr_ptr; + end else if (pop) begin + rd_ptr_n = rd_ptr - ADDRW'(d_set_n); + end + end + always @(posedge clk) begin if (reset) begin - rd_ptr <= '0; wr_ptr <= '0; empty_r <= 1; full_r <= 0; + rd_ptr <= '0; end else begin `ASSERT(~push || ~full, ("%t: runtime error: writing to a full stack!", $time)); `ASSERT(~pop || ~empty, ("%t: runtime error: reading an empty stack!", $time)); `ASSERT(~push || ~pop, ("%t: runtime error: push and pop in same cycle not supported!", $time)); if (push) begin - rd_ptr <= wr_ptr; wr_ptr <= wr_ptr + ADDRW'(1); empty_r <= 0; full_r <= (ADDRW'(DEPTH-1) == wr_ptr); end else if (pop) begin wr_ptr <= wr_ptr - ADDRW'(d_set_n); - rd_ptr <= rd_ptr - ADDRW'(d_set_n); empty_r <= (rd_ptr == 0) && (d_set_n == 1); full_r <= 0; end + rd_ptr <= rd_ptr_n; end end VX_dp_ram #( - .DATAW (WIDTH * 2), - .SIZE (DEPTH), - .OUT_REG (OUT_REG ? 1 : 0), - .LUTRAM (OUT_REG ? 0 : 1) + .DATAW (WIDTH * 2), + .SIZE (DEPTH), + .RADDR_REG (1) ) store ( .clk (clk), .reset (reset), @@ -78,7 +84,7 @@ module VX_ipdom_stack #( .wren (1'b1), .waddr (wr_ptr), .wdata ({q1, q0}), - .raddr (rd_ptr), + .raddr (rd_ptr_n), .rdata ({d1, d0}) ); @@ -94,7 +100,7 @@ module VX_ipdom_stack #( VX_pipe_register #( .DATAW (1), - .DEPTH (OUT_REG) + .DEPTH (0) ) pipe_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_split_join.sv b/hw/rtl/core/VX_split_join.sv index 7955437a6..c3f1f73f3 100644 --- a/hw/rtl/core/VX_split_join.sv +++ b/hw/rtl/core/VX_split_join.sv @@ -48,8 +48,7 @@ module VX_split_join import VX_gpu_pkg::*; #( for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_ipdom_stacks VX_ipdom_stack #( .WIDTH (`NUM_THREADS+`PC_BITS), - .DEPTH (`DV_STACK_SIZE), - .OUT_REG (0) + .DEPTH (`DV_STACK_SIZE) ) ipdom_stack ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index c5a4bf32e..03521ce1a 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -20,7 +20,7 @@ module VX_fifo_queue #( parameter ALM_FULL = (DEPTH - 1), parameter ALM_EMPTY = 1, parameter OUT_REG = 0, - parameter LUTRAM = 1, + parameter LUTRAM = 0, parameter SIZEW = `CLOG2(DEPTH+1) ) ( input wire clk, @@ -80,30 +80,38 @@ module VX_fifo_queue #( reg [DATAW-1:0] dout_r; reg [ADDRW-1:0] wr_ptr_r; reg [ADDRW-1:0] rd_ptr_r; - reg [ADDRW-1:0] rd_ptr_n_r; + reg [ADDRW-1:0] rd_ptr_n_r, rd_ptr_n_n; + + always @(*) begin + rd_ptr_n_n = rd_ptr_r; + if (pop) begin + if (DEPTH > 2) begin + rd_ptr_n_n = rd_ptr_r + ADDRW'(2); + end else begin // (DEPTH == 2); + rd_ptr_n_n = ~rd_ptr_n_r; + end + end + end always @(posedge clk) begin if (reset) begin - wr_ptr_r <= '0; - rd_ptr_r <= '0; - rd_ptr_n_r <= 1; + wr_ptr_r <= '0; + rd_ptr_r <= '0; + rd_ptr_n_r <= '0; end else begin wr_ptr_r <= wr_ptr_r + ADDRW'(push); if (pop) begin rd_ptr_r <= rd_ptr_n_r; - if (DEPTH > 2) begin - rd_ptr_n_r <= rd_ptr_r + ADDRW'(2); - end else begin // (DEPTH == 2); - rd_ptr_n_r <= ~rd_ptr_n_r; - end end + rd_ptr_n_r <= rd_ptr_n_n; end end VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .LUTRAM (LUTRAM) + .LUTRAM (LUTRAM), + .RADDR_REG (1) ) dp_ram ( .clk (clk), .reset (reset), @@ -112,7 +120,7 @@ module VX_fifo_queue #( .wren (1'b1), .waddr (wr_ptr_r), .wdata (data_in), - .raddr (rd_ptr_n_r), + .raddr (rd_ptr_n_n), .rdata (dout) ); @@ -130,23 +138,28 @@ module VX_fifo_queue #( end else begin : g_no_out_reg - reg [ADDRW-1:0] rd_ptr_r; + reg [ADDRW-1:0] rd_ptr_r, rd_ptr_n; reg [ADDRW-1:0] wr_ptr_r; + always @(*) begin + rd_ptr_n = rd_ptr_r + ADDRW'(pop); + end + always @(posedge clk) begin if (reset) begin - rd_ptr_r <= '0; wr_ptr_r <= '0; + rd_ptr_r <= '0; end else begin wr_ptr_r <= wr_ptr_r + ADDRW'(push); - rd_ptr_r <= rd_ptr_r + ADDRW'(pop); + rd_ptr_r <= rd_ptr_n; end end VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .LUTRAM (LUTRAM) + .LUTRAM (LUTRAM), + .RADDR_REG (1) ) dp_ram ( .clk (clk), .reset (reset), @@ -155,7 +168,7 @@ module VX_fifo_queue #( .wren (1'b1), .waddr (wr_ptr_r), .wdata (data_in), - .raddr (rd_ptr_r), + .raddr (rd_ptr_n), .rdata (data_out) ); diff --git a/hw/rtl/libs/VX_index_buffer.sv b/hw/rtl/libs/VX_index_buffer.sv index 4e8439818..61875b7fb 100644 --- a/hw/rtl/libs/VX_index_buffer.sv +++ b/hw/rtl/libs/VX_index_buffer.sv @@ -17,7 +17,7 @@ module VX_index_buffer #( parameter DATAW = 1, parameter SIZE = 1, - parameter LUTRAM = 1, + parameter LUTRAM = 0, parameter ADDRW = `LOG2UP(SIZE) ) ( input wire clk, From d3df61abb06eef0f89bf4113ff9e9909dae3ffc7 Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Wed, 9 Oct 2024 12:32:49 -0400 Subject: [PATCH 356/488] add initial development and production dockerfiles --- Dockerfile.dev | 20 +++++++++++ .../{Dockerfile.ubuntu => Dockerfile.prod} | 33 +++++++++++++------ 2 files changed, 43 insertions(+), 10 deletions(-) create mode 100644 Dockerfile.dev rename miscs/docker/{Dockerfile.ubuntu => Dockerfile.prod} (61%) diff --git a/Dockerfile.dev b/Dockerfile.dev new file mode 100644 index 000000000..22cd74155 --- /dev/null +++ b/Dockerfile.dev @@ -0,0 +1,20 @@ +FROM ubuntu:20.04 + +LABEL "Udit Subramanya"="usubramanya3@gatech.edu" + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && \ + apt-get install -y build-essential valgrind git wget libpng-dev libboost-all-dev uuid-dev ccache cmake + +# Third-Party Repository to Install g++11 on Ubuntu 18.04 +RUN apt-get install -y manpages-dev software-properties-common +RUN add-apt-repository -y ppa:ubuntu-toolchain-r/test + +RUN apt-get install -y gcc-11 g++-11 + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 11 +RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 11 + +# create a directory for mounting the volume +WORKDIR /root/vortex \ No newline at end of file diff --git a/miscs/docker/Dockerfile.ubuntu b/miscs/docker/Dockerfile.prod similarity index 61% rename from miscs/docker/Dockerfile.ubuntu rename to miscs/docker/Dockerfile.prod index 64bb5813d..e1a8d94b5 100644 --- a/miscs/docker/Dockerfile.ubuntu +++ b/miscs/docker/Dockerfile.prod @@ -17,29 +17,42 @@ FROM ubuntu:20.04 # Set non-interactive installation to avoid user input during build ARG DEBIAN_FRONTEND=noninteractive -# Update and install necessary dependencies -RUN apt-get update && apt-get install -y \ +# Install necessary dependencies and upgrade installed components +RUN apt-get update -y && \ + apt-get install -y \ software-properties-common \ build-essential \ python3 \ git \ wget \ curl \ - ca-certificates && \ + ca-certificates \ + valgrind \ + libstdc++6 \ + binutils \ + uuid-dev \ + ccache \ + cmake && \ + apt-get upgrade -y && \ + gcc_version=$(gcc -dumpversion) && \ + if dpkg --compare-versions "$gcc_version" lt 11; then \ + echo "GCC version is less than 11. Installing GCC 11..." && \ + add-apt-repository -y ppa:ubuntu-toolchain-r/test && \ + apt-get update -y && \ + apt-get install -y g++-11 gcc-11 && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100; \ + else \ + echo "GCC version is 11 or greater. No need to install GCC 11."; \ + fi && \ rm -rf /var/lib/apt/lists/* -# upgrade installed components -RUN apt-get upgrade && apt-get update - # Clone the Vortex repository RUN git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git /vortex # Set the initial working directory WORKDIR /vortex -# install system dependencies -RUN ./ci/install_dependencies.sh - # Configure the build folder RUN mkdir build && cd build && ../configure @@ -50,4 +63,4 @@ RUN cd build && ./ci/toolchain_install.sh --all RUN echo "source /vortex/build/ci/toolchain_env.sh" >> ~/.bashrc # Set the working directory to /vortex/build -WORKDIR /vortex/build \ No newline at end of file +WORKDIR /vortex/build From 8155173aab19b723e7fd8a612881ff5ec871e5fe Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Fri, 11 Oct 2024 07:40:21 -0700 Subject: [PATCH 357/488] add documentation based on intial feedback --- README.md | 5 ++++- docs/contributing.md | 6 +++--- docs/testing.md | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 553939b50..83a81a421 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,9 @@ Vortex is a full-stack open-source RISC-V GPGPU. Vortex supports multiple *backend drivers*, including our C++ simulator (simx), an RTL simulator, and physical Xilinx and Altera FPGAs-- all controlled by a single driver script. The chosen driver determines the corresponding code invoked to run Vortex. Generally, developers will prototype their intended design in simx, before completing going forward with an RTL implementation. Alternatively, you can get up and running by selecting a driver of your choice and running a demo program. +## Website +Vortex news can be found on its [website](https://vortex.cc.gatech.edu/) + ## Specifications - Support RISC-V RV32IMAF and RV64IMAFD @@ -30,7 +33,7 @@ Vortex is a full-stack open-source RISC-V GPGPU. Vortex supports multiple *backe - `miscs`: Miscellaneous resources. ## Quick Start -The following steps demonstrate how to run Vortex with the default driver: simx. If you are interested in a different backend, look [here](docs/simulation.md). +If you are interested in a stable release of Vortex, you can download the latest release [here](https://github.com/vortexgpgpu/vortex/releases/latest). Otherwise, you can pull the most recent, but (potentially) unstable version as shown below. The following steps demonstrate how to build and run Vortex with the default driver: SimX. If you are interested in a different backend, look [here](docs/simulation.md). ### Supported OS Platforms - Ubuntu 18.04, 20.04 diff --git a/docs/contributing.md b/docs/contributing.md index f10f4017b..0250e9f9f 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -14,8 +14,8 @@ In an effort to keep `vortex` organized, permissions to directly create branches However, contributions are strongly encouraged and keep the project moving forward! Here is the procedure for contributing: 1. Create a fork of `vortex` -2. In your fork, create a branch that briefly explains the work you are adding (ie: `develop-documentation`) branches from `develop` and adds some documentation -3. Make your changes on your new branch in your fork. You may create as many commits as you need, which might be common if you are making multiple iterations +2. In your fork, create a branch from `master` that briefly explains the work you are adding (ie: `develop-documentation`) +3. Make your changes on the new branch in your fork. You may create as many commits as you need, which might be common if you are making multiple iterations 4. Since you are the owner of your fork, you have full permissions to push commits to your fork 4. When you are satisfied with the changes on your fork, you can open a PR from your fork using the online interface 5. If you recently made a push, you will get automatically get a prompt on Github online to create a PR, which you can press @@ -32,6 +32,6 @@ However, contributions are strongly encouraged and keep the project moving forwa 15. When all merge conflicts are resolved, changes are made, and tests pass you can have an admin merge your PR ## What Makes a Good Contribution? -- If you are contributing code changes, then review `testing.md` to ensure your tests are integrated into the CI pipeline +- If you are contributing code changes, then review [testing.md](./testing.md) to ensure your tests are integrated into the [CI pipeline](continuous_integration.md) - During a PR, you should consider the advice you are provided by your reviewers. Remember you keep adding commits to an open PR! - If your change aims to fix an issue opened on Github, please tag that issue in the PR itself \ No newline at end of file diff --git a/docs/testing.md b/docs/testing.md index b2ae8fb2c..0ec46bda9 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -49,4 +49,4 @@ Compile your test: `$ make -C tests/regression/` Run your test: `$ ./ci/blackbox.sh --driver=simx --app= --debug` ## Adding Your Tests to the CI Pipeline -See `continuous_integration.md` \ No newline at end of file +If you are a contributor, then you will need to add tests that integrate into the continuous integration pipeline. Remember, Pull Requests cannot be merged unless new code has tests and existing tests do not regress. See more at [contributing.md](contributing.md) and [continuous_integration.md](continuous_integration.md). \ No newline at end of file From 28bf27e951b7343087368b295f9e8b9b429217ae Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 03:40:45 -0700 Subject: [PATCH 358/488] rtl cache redesign to support xilinx bram types --- ci/regression.sh.in | 1 + hw/rtl/VX_cluster.sv | 2 +- hw/rtl/VX_config.vh | 15 ++ hw/rtl/VX_socket.sv | 2 +- hw/rtl/Vortex.sv | 2 +- hw/rtl/cache/VX_bank_flush.sv | 38 ++-- hw/rtl/cache/VX_cache_bank.sv | 376 ++++++++++++++++++++-------------- hw/rtl/cache/VX_cache_data.sv | 177 ++++++++-------- hw/rtl/cache/VX_cache_mshr.sv | 67 +++--- hw/rtl/cache/VX_cache_tags.sv | 123 ++++------- 10 files changed, 419 insertions(+), 384 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 443b34f5a..ddd4f12bd 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -155,6 +155,7 @@ cache() # test writeback CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress + CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=simx --app=mstress CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index b5e9e0a5c..366d1bbac 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -98,7 +98,7 @@ module VX_cluster import VX_gpu_pkg::*; #( .TAG_WIDTH (L2_TAG_WIDTH), .WRITE_ENABLE (1), .WRITEBACK (`L2_WRITEBACK), - .DIRTY_BYTES (`L2_WRITEBACK), + .DIRTY_BYTES (`L2_DIRTYBYTES), .UUID_WIDTH (`UUID_WIDTH), .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .CORE_OUT_BUF (3), diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index fb4756633..4f666ce20 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -542,6 +542,11 @@ `define DCACHE_WRITEBACK 0 `endif +// Enable Cache Dirty bytes +`ifndef DCACHE_DIRTYBYTES +`define DCACHE_DIRTYBYTES 0 +`endif + // LMEM Configurable Knobs //////////////////////////////////////////////////// `ifndef LMEM_DISABLE @@ -602,6 +607,11 @@ `define L2_WRITEBACK 0 `endif +// Enable Cache Dirty bytes +`ifndef L2_DIRTYBYTES +`define L2_DIRTYBYTES 0 +`endif + // L3cache Configurable Knobs ///////////////////////////////////////////////// // Cache Size @@ -644,6 +654,11 @@ `define L3_WRITEBACK 0 `endif +// Enable Cache Dirty bytes +`ifndef L3_DIRTYBYTES +`define L3_DIRTYBYTES 0 +`endif + `ifndef MEMORY_BANKS `define MEMORY_BANKS 2 `endif diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 9c7fe1287..4ce547c7e 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -150,7 +150,7 @@ module VX_socket import VX_gpu_pkg::*; #( .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .WRITE_ENABLE (1), .WRITEBACK (`DCACHE_WRITEBACK), - .DIRTY_BYTES (`DCACHE_WRITEBACK), + .DIRTY_BYTES (`DCACHE_DIRTYBYTES), .NC_ENABLE (1), .CORE_OUT_BUF (3), .MEM_OUT_BUF (2) diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 4f9f495ce..40f95a81a 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -84,7 +84,7 @@ module Vortex import VX_gpu_pkg::*; ( .TAG_WIDTH (L2_MEM_TAG_WIDTH), .WRITE_ENABLE (1), .WRITEBACK (`L3_WRITEBACK), - .DIRTY_BYTES (`L3_WRITEBACK), + .DIRTY_BYTES (`L3_DIRTYBYTES), .UUID_WIDTH (`UUID_WIDTH), .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .CORE_OUT_BUF (3), diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index 3228bd3a5..ca28d749b 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -48,20 +48,20 @@ module VX_bank_flush #( localparam STATE_WAIT2 = 4; localparam STATE_DONE = 5; - reg [2:0] state_r, state_n; + reg [2:0] state, state_n; - reg [CTR_WIDTH-1:0] counter_r; + reg [CTR_WIDTH-1:0] counter; always @(*) begin - state_n = state_r; - case (state_r) + state_n = state; + case (state) STATE_IDLE: begin if (flush_begin) begin state_n = STATE_WAIT1; end end STATE_INIT: begin - if (counter_r == ((2 ** `CS_LINE_SEL_BITS)-1)) begin + if (counter == ((2 ** `CS_LINE_SEL_BITS)-1)) begin state_n = STATE_IDLE; end end @@ -72,7 +72,7 @@ module VX_bank_flush #( end end STATE_FLUSH: begin - if (counter_r == ((2 ** CTR_WIDTH)-1) && flush_ready) begin + if (counter == ((2 ** CTR_WIDTH)-1) && flush_ready) begin state_n = (BANK_ID == 0) ? STATE_DONE : STATE_WAIT2; end end @@ -93,32 +93,32 @@ module VX_bank_flush #( always @(posedge clk) begin if (reset) begin - state_r <= STATE_INIT; - counter_r <= '0; + state <= STATE_INIT; + counter <= '0; end else begin - state_r <= state_n; - if (state_r != STATE_IDLE) begin - if ((state_r == STATE_INIT) - || ((state_r == STATE_FLUSH) && flush_ready)) begin - counter_r <= counter_r + CTR_WIDTH'(1); + state <= state_n; + if (state != STATE_IDLE) begin + if ((state == STATE_INIT) + || ((state == STATE_FLUSH) && flush_ready)) begin + counter <= counter + CTR_WIDTH'(1); end end else begin - counter_r <= '0; + counter <= '0; end end end - assign flush_end = (state_r == STATE_DONE); - assign flush_init = (state_r == STATE_INIT); - assign flush_valid = (state_r == STATE_FLUSH); - assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0]; + assign flush_end = (state == STATE_DONE); + assign flush_init = (state == STATE_INIT); + assign flush_valid = (state == STATE_FLUSH); + assign flush_line = counter[`CS_LINE_SEL_BITS-1:0]; if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way VX_decoder #( .N (`CS_WAY_SEL_BITS), .D (NUM_WAYS) ) ctr_decoder ( - .sel_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), + .sel_in (counter[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), .data_in (1'b1), .data_out (flush_way) ); diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 6a7fcaf52..d32e9423f 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -141,13 +141,18 @@ module VX_cache_bank #( wire [MSHR_ADDR_WIDTH-1:0] replay_id; wire replay_ready; - wire is_init_st0, is_init_st1; + + wire valid_sel, valid_st0, valid_st1; + wire is_init_st0; + wire is_creq_st0, is_creq_st1; + wire is_fill_st0, is_fill_st1; wire is_flush_st0, is_flush_st1; wire [NUM_WAYS-1:0] flush_way_st0; + wire [NUM_WAYS-1:0] evict_way_st0, evict_way_st1; wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1; wire [`CS_LINE_SEL_BITS-1:0] line_idx_st0, line_idx_st1; - wire [`CS_TAG_SEL_BITS-1:0] line_tag_st0, line_tag_st1; + wire [`CS_TAG_SEL_BITS-1:0] line_tag_st1; wire rw_sel, rw_st0, rw_st1; wire [WORD_SEL_WIDTH-1:0] word_idx_sel, word_idx_st0, word_idx_st1; wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1; @@ -158,16 +163,10 @@ module VX_cache_bank #( wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1; wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1; wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0; - wire valid_sel, valid_st0, valid_st1; - wire is_creq_st0, is_creq_st1; - wire is_fill_st0, is_fill_st1; wire is_replay_st0, is_replay_st1; wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1; - wire evict_dirty_st0, evict_dirty_st1; - wire [NUM_WAYS-1:0] way_idx_st0, way_idx_st1; - wire [NUM_WAYS-1:0] tag_matches_st0; - wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; wire mshr_pending_st0, mshr_pending_st1; + wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_id_st0, mshr_prev_id_st1; wire mshr_empty; wire flush_valid; @@ -201,11 +200,9 @@ module VX_cache_bank #( .bank_empty (no_pending_req) ); - wire rdw_hazard1_sel; - wire rdw_hazard2_sel; - reg rdw_hazard3_st1; + logic rdw_hazard, post_hazard; - wire pipe_stall = crsp_queue_stall || rdw_hazard3_st1; + wire pipe_stall = crsp_queue_stall || rdw_hazard; // inputs arbitration: // mshr replay has highest priority to maximize utilization since there is no miss. @@ -224,17 +221,14 @@ module VX_cache_bank #( wire creq_enable = creq_grant && core_req_valid; assign replay_ready = replay_grant - && ~rdw_hazard1_sel && ~pipe_stall; assign mem_rsp_ready = fill_grant && (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions - && ~rdw_hazard2_sel && ~pipe_stall; assign flush_ready = flush_grant && (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions - && ~rdw_hazard2_sel && ~pipe_stall; assign core_req_ready = creq_grant @@ -298,6 +292,12 @@ module VX_cache_bank #( assign req_uuid_sel = '0; end + wire is_init_sel = init_valid; + wire is_creq_sel = creq_enable || replay_enable; + wire is_fill_sel = fill_enable; + wire is_flush_sel = flush_enable; + wire is_replay_sel = replay_enable; + VX_pipe_register #( .DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH), .RESETW (1) @@ -305,8 +305,8 @@ module VX_cache_bank #( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, flags_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, word_idx_sel, req_idx_sel, tag_sel, replay_id}), - .data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, flags_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, replay_id_st0}) + .data_in ({valid_sel, is_init_sel, is_fill_sel, is_flush_sel, is_creq_sel, is_replay_sel, flags_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, word_idx_sel, req_idx_sel, tag_sel, replay_id}), + .data_out ({valid_st0, is_init_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, flags_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, replay_id_st0}) ); if (UUID_WIDTH != 0) begin : g_req_uuid_st0 @@ -315,82 +315,67 @@ module VX_cache_bank #( assign req_uuid_st0 = '0; end - wire do_init_st0 = valid_st0 && is_init_st0; - wire do_flush_st0 = valid_st0 && is_flush_st0; - wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0; - wire do_creq_wr_st0 = valid_st0 && is_creq_st0 && rw_st0; - wire do_replay_rd_st0 = valid_st0 && is_replay_st0 && ~rw_st0; - wire do_replay_wr_st0 = valid_st0 && is_replay_st0 && rw_st0; - wire do_fill_st0 = valid_st0 && is_fill_st0; - wire do_cache_rd_st0 = do_creq_rd_st0 || do_replay_rd_st0; - wire do_cache_wr_st0 = do_creq_wr_st0 || do_replay_wr_st0; - wire do_lookup_st0 = do_cache_rd_st0 || do_cache_wr_st0; + wire is_read_st0 = is_creq_st0 && ~rw_st0; + wire is_write_st0 = is_creq_st0 && rw_st0; + + wire do_init_st0 = valid_st0 && is_init_st0; + wire do_flush_st0 = valid_st0 && is_flush_st0; + wire do_read_st0 = valid_st0 && is_read_st0; + wire do_write_st0 = valid_st0 && is_write_st0; + wire do_fill_st0 = valid_st0 && is_fill_st0; assign write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0]; assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0]; - assign line_tag_st0 = `CS_LINE_ADDR_TAG(addr_st0); - wire [NUM_WAYS-1:0] evict_way_st0; - wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0; + wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st1; + wire [NUM_WAYS-1:0] tag_matches_st1; + + wire do_lookup_st0 = do_read_st0 || do_write_st0; VX_cache_tags #( - .INSTANCE_ID($sformatf("%s-tags", INSTANCE_ID)), - .BANK_ID (BANK_ID), .CACHE_SIZE (CACHE_SIZE), .LINE_SIZE (LINE_SIZE), .NUM_BANKS (NUM_BANKS), .NUM_WAYS (NUM_WAYS), .WORD_SIZE (WORD_SIZE), - .WRITEBACK (WRITEBACK), - .UUID_WIDTH (UUID_WIDTH) + .WRITEBACK (WRITEBACK) ) cache_tags ( .clk (clk), .reset (reset), - - .req_uuid (req_uuid_st0), - - // init/flush/fill/write/lookup + .stall (pipe_stall), + // inputs .init (do_init_st0), .flush (do_flush_st0 && ~pipe_stall), .fill (do_fill_st0 && ~pipe_stall), - .write (do_cache_wr_st0 && ~pipe_stall), .lookup (do_lookup_st0 && ~pipe_stall), .line_addr (addr_st0), - .way_idx (flush_way_st0), - - // tag matches - .tag_matches(tag_matches_st0), - - // replacement - .evict_dirty(evict_dirty_st0), + .flush_way (flush_way_st0), + // outputs + .tag_matches_r(tag_matches_st1), + .line_tag_r (line_tag_st1), + .evict_tag_r(evict_tag_st1), .evict_way (evict_way_st0), - .evict_tag (evict_tag_st0) + .evict_way_r(evict_way_st1) ); - wire [`CS_TAG_SEL_BITS-1:0] line_tag2_st0; - wire is_flush2_st0 = WRITEBACK && is_flush_st0; - - assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0; - - assign way_idx_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0; - - assign line_tag2_st0 = (is_fill_st0 || is_flush2_st0) ? evict_tag_st0 : line_tag_st0; + wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; + assign mshr_id_st0 = is_replay_st0 ? replay_id_st0 : mshr_alloc_id_st0; VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, rw_st0, flags_st0, line_tag2_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, way_idx_st0, evict_dirty_st0, mshr_pending_st0}), - .data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, rw_st1, flags_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, way_idx_st1, evict_dirty_st1, mshr_pending_st1}) + .data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, rw_st0, flags_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_id_st0, mshr_pending_st0}), + .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, rw_st1, flags_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_id_st1, mshr_pending_st1}) ); // we have a tag hit - wire is_hit_st1 = (| way_idx_st1); + wire is_hit_st1 = (| tag_matches_st1); if (UUID_WIDTH != 0) begin : g_req_uuid_st1 assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH]; @@ -398,51 +383,71 @@ module VX_cache_bank #( assign req_uuid_st1 = '0; end - wire is_read_st1 = is_creq_st1 && ~rw_st1; - wire is_write_st1 = is_creq_st1 && rw_st1; + wire is_read_st1 = is_creq_st1 && ~rw_st1; + wire is_write_st1 = is_creq_st1 && rw_st1; - wire do_init_st1 = valid_st1 && is_init_st1; - wire do_fill_st1 = valid_st1 && is_fill_st1; - wire do_flush_st1 = valid_st1 && is_flush_st1; - - wire do_creq_rd_st1 = valid_st1 && is_read_st1; - wire do_creq_wr_st1 = valid_st1 && is_write_st1; - wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1; - wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1; - - wire do_read_hit_st1 = do_creq_rd_st1 && is_hit_st1; - wire do_read_miss_st1 = do_creq_rd_st1 && ~is_hit_st1; - - wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1; - wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1; - - wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1; - wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1; + wire do_read_st1 = valid_st1 && is_read_st1; + wire do_write_st1 = valid_st1 && is_write_st1; + wire do_fill_st1 = valid_st1 && is_fill_st1; + wire do_flush_st1 = valid_st1 && is_flush_st1 && WRITEBACK; assign addr_st1 = {line_tag_st1, line_idx_st1}; // ensure mshr replay always get a hit `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time)) - // both tag and data stores use BRAM with no read-during-write protection. - // we ned to stall the pipeline to prevent read-after-write hazards. - assign rdw_hazard1_sel = do_fill_st0; // stall first replay following a fill - assign rdw_hazard2_sel = WRITEBACK && do_cache_wr_st0; // a writeback can evict any preceeding write - always @(posedge clk) begin - // stall reads following writes to same line address - rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_idx_st0 == line_idx_st1) - && ~rdw_hazard3_st1; // release pipeline stall + if (WRITE_ENABLE) begin : g_rdw_hazard + // This implementation uses single-port BRAMs for the tags and data stores. + // Using different stages for read and write operations requires a pipeline stall in between due to address port sharing. + // Tags fill/flush can perform read and write in the same stage, since no dependency between. + // Data fill/flush can perform read and write in the same stage, since way_idx is available in st0. + // A data read should happen in st0 for its result to be available in st1. + // A data write should happen in st1 when the tag hit status is available. + wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0]; + wire is_read_sel = is_creq_sel && !rw_sel; + wire is_write_sel = is_creq_sel && rw_sel; + wire is_same_read_sel = is_read_sel && (line_idx_sel == line_idx_st0); + always @(posedge clk) begin + if (reset) begin + post_hazard <= 0; + rdw_hazard <= 0; + end else begin + if (!crsp_queue_stall) begin + post_hazard <= rdw_hazard; + rdw_hazard <= do_write_st0 && valid_sel && !(is_write_sel || is_same_read_sel || (is_flush_sel && !WRITEBACK)); + end + end + end + end else begin : g_rdw_hazard_ro + assign rdw_hazard = 0; + assign post_hazard = 0; end assign write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0]; - wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1; + `UNUSED_VAR (data_st1) - wire [`CS_LINE_WIDTH-1:0] dirty_data_st1; - wire [LINE_SIZE-1:0] dirty_byteen_st1; + wire [`CS_LINE_WIDTH-1:0] evict_data_st1; + wire [LINE_SIZE-1:0] evict_byteen_st1; + wire line_dirty_st1; + + wire data_write; + wire [`CS_LINE_SEL_BITS-1:0] data_line_idx; + + if (WRITE_ENABLE) begin : g_data_ctrl + // by default all data accesses happen in sto and use line_idx_st0. + // data writes should happen in st1 when the tag hit is available, + // and use line_idx_st1 to ensure the correct line is updated. + // if a rdw hazard is active due to conflict, ensure we don't write twice. + assign data_write = do_write_st1 && !post_hazard && ~crsp_queue_stall; + assign data_line_idx = data_write ? line_idx_st1 : line_idx_st0; + end else begin : g_data_ctrl_ro + `UNUSED_VAR (post_hazard) + `UNUSED_VAR (do_write_st1) + assign data_write = 0; + assign data_line_idx = line_idx_st0; + end VX_cache_data #( - .INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)), - .BANK_ID (BANK_ID), .CACHE_SIZE (CACHE_SIZE), .LINE_SIZE (LINE_SIZE), .NUM_BANKS (NUM_BANKS), @@ -450,50 +455,58 @@ module VX_cache_bank #( .WORD_SIZE (WORD_SIZE), .WRITE_ENABLE (WRITE_ENABLE), .WRITEBACK (WRITEBACK), - .DIRTY_BYTES (DIRTY_BYTES), - .UUID_WIDTH (UUID_WIDTH) + .DIRTY_BYTES (DIRTY_BYTES) ) cache_data ( .clk (clk), .reset (reset), - .req_uuid (req_uuid_st1), - .init (do_init_st1), - .fill (do_fill_st1 && ~pipe_stall), - .flush (do_flush_st1 && ~pipe_stall), - .write (do_cache_wr_st1 && ~pipe_stall), - .read (do_cache_rd_st1 && ~pipe_stall), - .way_idx (way_idx_st1), - .line_addr (addr_st1), - .word_idx (word_idx_st1), - .fill_data (fill_data_st1), + .stall (pipe_stall), + // inputs + .init (do_init_st0), + .fill (do_fill_st0 && ~pipe_stall), + .flush (do_flush_st0 && ~pipe_stall), + .read (do_read_st0 && ~pipe_stall), + .write (data_write), + .evict_way (evict_way_st0), + .tag_matches(tag_matches_st1), + .line_idx (data_line_idx), + .fill_data (data_st0), .write_data (write_data_st1), + .word_idx (word_idx_st1), .write_byteen(byteen_st1), + // outputs .read_data (read_data_st1), - .dirty_data (dirty_data_st1), - .dirty_byteen(dirty_byteen_st1) + .line_dirty (line_dirty_st1), + .evict_data (evict_data_st1), + .evict_byteen(evict_byteen_st1) ); - wire mshr_allocate_st0 = valid_st0 && is_creq_st0; + wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~is_replay_st0; + wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~is_replay_st1; // release allocated mshr entry if we had a hit wire mshr_release_st1; if (WRITEBACK) begin : g_mshr_release - assign mshr_release_st1 = valid_st1 && is_creq_st1 && is_hit_st1; + assign mshr_release_st1 = is_hit_st1; end else begin : g_mshr_release_ro - // we need to keep missed write requests in MSHR if there is already a pending entry to the same address - // this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content - // this can happen when writes are sent late, when the fill was already in flight. - assign mshr_release_st1 = valid_st1 && is_creq_st1 && (is_hit_st1 || (rw_st1 && ~mshr_pending_st1)); + // we need to keep missed write requests in MSHR if there is already a pending entry to the same address. + // this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content. + // this can happen when writes are sent to memory late, when a related fill was already in flight. + assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1); end - wire mshr_dequeue = mshr_release_st1 && ~pipe_stall; + wire mshr_release_fire = mshr_finalize_st1 && mshr_release_st1 && ~pipe_stall; + + wire [1:0] mshr_dequeue; + `POP_COUNT(mshr_dequeue, {replay_fire, mshr_release_fire}); VX_pending_size #( - .SIZE (MSHR_SIZE) + .SIZE (MSHR_SIZE), + .DECRW (2) ) mshr_pending_size ( .clk (clk), .reset (reset), .incr (core_req_fire), - .decr (replay_fire || mshr_dequeue), + .decr (mshr_dequeue), .empty (mshr_empty), `UNUSED_PIN (alm_empty), .full (mshr_alm_full), @@ -508,7 +521,6 @@ module VX_cache_bank #( .NUM_BANKS (NUM_BANKS), .MSHR_SIZE (MSHR_SIZE), .WRITEBACK (WRITEBACK), - .RDW_STALL (1), .UUID_WIDTH (UUID_WIDTH), .DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH) ) cache_mshr ( @@ -517,7 +529,7 @@ module VX_cache_bank #( .deq_req_uuid (req_uuid_sel), .alc_req_uuid (req_uuid_st0), - .rel_req_uuid (req_uuid_st1), + .fin_req_uuid (req_uuid_st1), // memory fill .fill_valid (mem_rsp_fire), @@ -539,11 +551,15 @@ module VX_cache_bank #( .allocate_data ({word_idx_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}), .allocate_id (mshr_alloc_id_st0), .allocate_pending(mshr_pending_st0), + .allocate_previd(mshr_prev_id_st0), `UNUSED_PIN (allocate_ready), - // release - .release_valid (mshr_release_st1 && ~pipe_stall), - .release_id (mshr_id_st1) + // finalize + .finalize_valid (mshr_finalize_st1 && ~pipe_stall), + .finalize_is_release(mshr_release_st1), + .finalize_is_pending(mshr_pending_st1), + .finalize_id (mshr_id_st1), + .finalize_previd(mshr_prev_id_st1) ); // schedule core response @@ -553,7 +569,7 @@ module VX_cache_bank #( wire [REQ_SEL_WIDTH-1:0] crsp_queue_idx; wire [TAG_WIDTH-1:0] crsp_queue_tag; - assign crsp_queue_valid = do_cache_rd_st1; + assign crsp_queue_valid = do_read_st1 && is_hit_st1; assign crsp_queue_idx = req_idx_st1; assign crsp_queue_data = read_data_st1; assign crsp_queue_tag = tag_st1; @@ -565,7 +581,7 @@ module VX_cache_bank #( ) core_rsp_queue ( .clk (clk), .reset (reset), - .valid_in (crsp_queue_valid && ~rdw_hazard3_st1), + .valid_in (crsp_queue_valid && ~rdw_hazard), .ready_in (crsp_queue_ready), .data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}), .data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}), @@ -585,37 +601,26 @@ module VX_cache_bank #( wire mreq_queue_rw; wire [`UP(FLAGS_WIDTH)-1:0] mreq_queue_flags; - wire is_fill_or_flush_st1 = is_fill_st1 || is_flush_st1; + wire is_fill_or_flush_st1 = is_fill_st1 || (is_flush_st1 && WRITEBACK); wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1; - wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1; - - if (WRITEBACK) begin : g_mreq_queue_push - if (DIRTY_BYTES) begin : g_dirty_bytes - // ensure dirty bytes match the tag info - wire has_dirty_bytes = (| dirty_byteen_st1); - `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) - end - assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1) - || do_writeback_st1) - && ~rdw_hazard3_st1; - end else begin : g_mreq_queue_push_ro - `UNUSED_VAR (do_write_miss_st1) - `UNUSED_VAR (do_writeback_st1) - assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1) - || do_creq_wr_st1) - && ~rdw_hazard3_st1; - end - - assign mreq_queue_pop = mem_req_valid && mem_req_ready; - assign mreq_queue_addr = addr_st1; - assign mreq_queue_flags = flags_st1; + wire do_writeback_st1 = do_fill_or_flush_st1 && line_dirty_st1; + wire [`CS_LINE_ADDR_WIDTH-1:0] evict_addr_st1 = {evict_tag_st1, line_idx_st1}; if (WRITE_ENABLE) begin : g_mreq_queue - if (WRITEBACK) begin : g_writeback + if (WRITEBACK) begin : g_wb + if (DIRTY_BYTES) begin : g_dirty_bytes + // ensure dirty bytes match the tag info + wire has_dirty_bytes = (| evict_byteen_st1); + `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (line_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, line_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) + end + assign mreq_queue_push = (((do_read_st1 || do_write_st1) && ~is_hit_st1 && ~mshr_pending_st1) + || do_writeback_st1) + && ~pipe_stall; + assign mreq_queue_addr = is_fill_or_flush_st1 ? evict_addr_st1 : addr_st1; assign mreq_queue_rw = is_fill_or_flush_st1; - assign mreq_queue_data = dirty_data_st1; - assign mreq_queue_byteen = is_fill_or_flush_st1 ? dirty_byteen_st1 : '1; - end else begin : g_writethrough + assign mreq_queue_data = evict_data_st1; + assign mreq_queue_byteen = is_fill_or_flush_st1 ? evict_byteen_st1 : '1; + end else begin : g_wt wire [LINE_SIZE-1:0] line_byteen; VX_decoder #( .N (`CS_WORD_SEL_BITS), @@ -625,19 +630,30 @@ module VX_cache_bank #( .data_in (byteen_st1), .data_out (line_byteen) ); + assign mreq_queue_push = ((do_read_st1 && ~is_hit_st1 && ~mshr_pending_st1) + || do_write_st1) + && ~pipe_stall; + assign mreq_queue_addr = addr_st1; assign mreq_queue_rw = rw_st1; assign mreq_queue_data = {`CS_WORDS_PER_LINE{write_data_st1}}; assign mreq_queue_byteen = rw_st1 ? line_byteen : '1; `UNUSED_VAR (is_fill_or_flush_st1) - `UNUSED_VAR (dirty_data_st1) - `UNUSED_VAR (dirty_byteen_st1) + `UNUSED_VAR (do_writeback_st1) + `UNUSED_VAR (evict_addr_st1) + `UNUSED_VAR (evict_data_st1) + `UNUSED_VAR (evict_byteen_st1) end end else begin : g_mreq_queue_ro + assign mreq_queue_push = (do_read_st1 && ~is_hit_st1 && ~mshr_pending_st1) + && ~pipe_stall; + assign mreq_queue_addr = addr_st1; assign mreq_queue_rw = 0; assign mreq_queue_data = '0; assign mreq_queue_byteen = '1; - `UNUSED_VAR (dirty_data_st1) - `UNUSED_VAR (dirty_byteen_st1) + `UNUSED_VAR (do_writeback_st1) + `UNUSED_VAR (evict_addr_st1) + `UNUSED_VAR (evict_data_st1) + `UNUSED_VAR (evict_byteen_st1) end if (UUID_WIDTH != 0) begin : g_mreq_queue_tag_uuid @@ -646,6 +662,9 @@ module VX_cache_bank #( assign mreq_queue_tag = mshr_id_st1; end + assign mreq_queue_pop = mem_req_valid && mem_req_ready; + assign mreq_queue_flags = flags_st1; + VX_fifo_queue #( .DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)), .DEPTH (MREQ_SIZE), @@ -667,6 +686,10 @@ module VX_cache_bank #( assign mem_req_valid = ~mreq_queue_empty; + `UNUSED_VAR (do_fill_st1) + `UNUSED_VAR (do_flush_st1) + `UNUSED_VAR (evict_way_st1) + /////////////////////////////////////////////////////////////////////////////// `ifdef PERF_ENABLE @@ -681,7 +704,7 @@ module VX_cache_bank #( && ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire); always @(posedge clk) begin if (input_stall || pipe_stall) begin - `TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1)) + `TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard)) end if (mem_rsp_fire) begin `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel)) @@ -696,13 +719,54 @@ module VX_cache_bank #( `TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)) end end + if (do_init_st0) begin + `TRACE(3, ("%t: %s tags-init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), line_idx_st0)) + end + if (do_fill_st0 && ~pipe_stall) begin + `TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) + end + if (do_flush_st0 && ~pipe_stall) begin + `TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) + end + if (do_read_st1 && ~pipe_stall) begin + if (is_hit_st1) begin + `TRACE(3, ("%t: %s tags-rd-hit: addr=0x%0h, way=%b, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) + end else begin + `TRACE(3, ("%t: %s tags-rd-miss: addr=0x%0h, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), line_idx_st1, line_tag_st1, req_uuid_st1)) + end + end + if (do_write_st1 && ~pipe_stall) begin + if (is_hit_st1) begin + `TRACE(3, ("%t: %s tags-wr-hit: addr=0x%0h, way=%b, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) + end else begin + `TRACE(3, ("%t: %s tags-wr-miss: addr=0x%0h, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), line_idx_st1, line_tag_st1, req_uuid_st1)) + end + end + if (do_fill_st0 && ~pipe_stall) begin + `TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%b, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, data_st0, req_uuid_st0)) + end + if (do_flush_st0 && ~pipe_stall) begin + `TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) + end + if (do_read_st1 && is_hit_st1 && ~pipe_stall) begin + `TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, read_data_st1, req_uuid_st1)) + end + if (do_write_st1 && is_hit_st1 && ~pipe_stall) begin + `TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, byteen_st1, write_data_st1, req_uuid_st1)) + end if (crsp_queue_fire) begin `TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)) end if (mreq_queue_push) begin - if (do_creq_wr_st1 && !WRITEBACK) begin + if (!WRITEBACK && do_write_st1) begin `TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) - end else if (do_writeback_st1) begin + end else if (WRITEBACK && do_writeback_st1) begin `TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) end else begin `TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1)) diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 54a78e357..278caccd5 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -14,8 +14,6 @@ `include "VX_cache_define.vh" module VX_cache_data #( - parameter `STRING INSTANCE_ID= "", - parameter BANK_ID = 0, // Size of cache in bytes parameter CACHE_SIZE = 1024, // Size of line inside a bank in bytes @@ -31,94 +29,105 @@ module VX_cache_data #( // Enable cache writeback parameter WRITEBACK = 0, // Enable dirty bytes on writeback - parameter DIRTY_BYTES = 0, - // Request debug identifier - parameter UUID_WIDTH = 0 + parameter DIRTY_BYTES = 0 ) ( input wire clk, input wire reset, - -`IGNORE_UNUSED_BEGIN - input wire[`UP(UUID_WIDTH)-1:0] req_uuid, -`IGNORE_UNUSED_END - + input wire stall, + // inputs input wire init, input wire fill, input wire flush, - input wire write, input wire read, - input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, - input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx, + input wire write, + input wire [`CS_LINE_SEL_BITS-1:0] line_idx, + input wire [NUM_WAYS-1:0] evict_way, + input wire [NUM_WAYS-1:0] tag_matches, input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data, input wire [`CS_WORD_WIDTH-1:0] write_data, input wire [WORD_SIZE-1:0] write_byteen, - input wire [NUM_WAYS-1:0] way_idx, + input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx, + // outputs output wire [`CS_WORD_WIDTH-1:0] read_data, - output wire [`CS_LINE_WIDTH-1:0] dirty_data, - output wire [LINE_SIZE-1:0] dirty_byteen + output wire line_dirty, + output wire [`CS_LINE_WIDTH-1:0] evict_data, + output wire [LINE_SIZE-1:0] evict_byteen ); - `UNUSED_SPARAM (INSTANCE_ID) - `UNUSED_PARAM (BANK_ID) `UNUSED_PARAM (WORD_SIZE) - `UNUSED_VAR (line_addr) - `UNUSED_VAR (init) - `UNUSED_VAR (read) - `UNUSED_VAR (flush) + `UNUSED_VAR (stall) localparam BYTEENW = (WRITE_ENABLE != 0) ? LINE_SIZE : 1; wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata; - wire [`LOG2UP(NUM_WAYS)-1:0] way_idx_bin; - wire [`CS_LINE_SEL_BITS-1:0] line_idx; - assign line_idx = line_addr[`CS_LINE_SEL_BITS-1:0]; + if (WRITEBACK != 0) begin : g_writeback + localparam BYTEEN_DATAW = 1 + ((DIRTY_BYTES != 0) ? LINE_SIZE : 0); + wire [`LOG2UP(NUM_WAYS)-1:0] evict_way_idx, evict_way_idx_r; - VX_onehot_encoder #( - .N (NUM_WAYS) - ) way_idx_enc ( - .data_in (way_idx), - .data_out (way_idx_bin), - `UNUSED_PIN (valid_out) - ); + VX_onehot_encoder #( + .N (NUM_WAYS) + ) fill_way_enc ( + .data_in (evict_way), + .data_out (evict_way_idx), + `UNUSED_PIN (valid_out) + ); - if (WRITEBACK) begin : g_dirty_data - assign dirty_data = line_rdata[way_idx_bin]; - end else begin : g_dirty_data_0 - assign dirty_data = '0; - end + `BUFFER_EX(evict_way_idx_r, evict_way_idx, ~stall, 1); - if (DIRTY_BYTES) begin : g_dirty_byteen - wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bs_rdata; - wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bs_wdata; + wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_rdata; + wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wdata; + wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wren; - for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_bs_wdata - for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j - wire [WORD_SIZE-1:0] word_mask = {WORD_SIZE{(WORD_SIZE == 1) || (word_idx == j)}}; - wire [WORD_SIZE-1:0] wdata = write ? (bs_rdata[i][j] | (write_byteen & word_mask)) : ((fill || flush) ? '0 : bs_rdata[i][j]); - assign bs_wdata[i][j] = init ? '0 : (way_idx[i] ? wdata : bs_rdata[i][j]); + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata + wire dirty_data = write; // only asserted on writes + wire dirty_wren = init || (write ? tag_matches[i] : evict_way[i]); + + if (DIRTY_BYTES != 0) begin : g_dirty_bytes + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bytes_data; + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bytes_wren; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j + wire word_sel = tag_matches[i] && ((WORD_SIZE == 1) || (word_idx == j)); + wire [WORD_SIZE-1:0] word_en = write_byteen & {WORD_SIZE{word_sel}}; + assign bytes_data[j] = {WORD_SIZE{write}}; // only asserted on writes + assign bytes_wren[j] = {WORD_SIZE{init}} | (write ? word_en : {WORD_SIZE{evict_way[i]}}); + end + assign byteen_wdata[i] = {dirty_data, bytes_data}; + assign byteen_wren[i] = {dirty_wren, bytes_wren}; + assign {line_dirty, evict_byteen} = byteen_rdata[evict_way_idx_r]; + end else begin : g_no_dirty_bytes + assign byteen_wdata[i] = dirty_data; + assign byteen_wren[i] = dirty_wren; + assign line_dirty = byteen_rdata[evict_way_idx_r]; + assign evict_byteen = '1; end end - wire bs_read = write || fill || flush; - wire bs_write = init || write || fill || flush; + wire byteen_read = fill || flush; + wire byteen_write = init || write || fill || flush; VX_sp_ram #( - .DATAW (LINE_SIZE * NUM_WAYS), - .SIZE (`CS_LINES_PER_BANK) + .DATAW (BYTEEN_DATAW * NUM_WAYS), + .WRENW (BYTEEN_DATAW * NUM_WAYS), + .SIZE (`CS_LINES_PER_BANK), + .OUT_REG (1) ) byteen_store ( .clk (clk), .reset (reset), - .read (bs_read), - .write (bs_write), - .wren (1'b1), + .read (byteen_read), + .write (byteen_write), + .wren (byteen_wren), .addr (line_idx), - .wdata (bs_wdata), - .rdata (bs_rdata) + .wdata (byteen_wdata), + .rdata (byteen_rdata) ); - assign dirty_byteen = bs_rdata[way_idx_bin]; - end else begin : g_dirty_byteen_0 - assign dirty_byteen = '1; + assign evict_data = line_rdata[evict_way_idx_r]; + + end else begin : g_no_writeback + `UNUSED_VAR (init) + assign line_dirty = 0; + assign evict_data = '0; + assign evict_byteen = '0; end for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store @@ -128,29 +137,26 @@ module VX_cache_data #( wire line_write; wire line_read; - wire way_en = (NUM_WAYS == 1) || way_idx[i]; - if (WRITE_ENABLE != 0) begin : g_line_data wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_w; for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j wire word_en = (WORD_SIZE == 1) || (word_idx == j); - assign line_wdata[j] = fill ? fill_data[j] : write_data; - assign wren_w[j] = {WORD_SIZE{fill}} | (write_byteen & {WORD_SIZE{word_en}}); + assign line_wdata[j] = write ? write_data : fill_data[j]; + assign wren_w[j] = write ? (write_byteen & {WORD_SIZE{word_en}}) : {WORD_SIZE{1'b1}}; end assign line_wren = wren_w; - assign line_write = (fill || write) && way_en; - if (WRITEBACK) begin : g_line_read_wb - assign line_read = (read || fill || flush); - end else begin : g_line_read_wt - assign line_read = read; - end + assign line_write = (fill && ((NUM_WAYS == 1) || evict_way[i])) + || (write && tag_matches[i]); + assign line_read = read || ((fill || flush) && WRITEBACK); end else begin : g_line_data_ro `UNUSED_VAR (write) + `UNUSED_VAR (flush) `UNUSED_VAR (write_byteen) `UNUSED_VAR (write_data) + `UNUSED_VAR (word_idx) assign line_wdata = fill_data; assign line_wren = 1'b1; - assign line_write = fill && way_en; + assign line_write = fill && ((NUM_WAYS == 1) || evict_way[i]); assign line_read = read; end @@ -158,8 +164,7 @@ module VX_cache_data #( .DATAW (`CS_LINE_WIDTH), .SIZE (`CS_LINES_PER_BANK), .WRENW (BYTEENW), - .NO_RWCHECK (1), - .RW_ASSERT (1) + .OUT_REG (1) ) data_store ( .clk (clk), .reset (reset), @@ -172,9 +177,18 @@ module VX_cache_data #( ); end + wire [`LOG2UP(NUM_WAYS)-1:0] hit_way_idx; + VX_onehot_encoder #( + .N (NUM_WAYS) + ) hit_idx_enc ( + .data_in (tag_matches), + .data_out (hit_way_idx), + `UNUSED_PIN (valid_out) + ); + if (`CS_WORDS_PER_LINE > 1) begin : g_read_data // order the data layout to perform ways multiplexing last. - // this allows converting way index to binary in parallel with BRAM readaccess and way selection. + // this allows converting way index to binary in parallel with BRAM read and word indexing. wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata; VX_transpose #( .DATAW (`CS_WORD_WIDTH), @@ -184,27 +198,10 @@ module VX_cache_data #( .data_in (line_rdata), .data_out (transposed_rdata) ); - assign read_data = transposed_rdata[word_idx][way_idx_bin]; + assign read_data = transposed_rdata[word_idx][hit_way_idx]; end else begin : g_read_data_1w `UNUSED_VAR (word_idx) - assign read_data = line_rdata[way_idx_bin]; + assign read_data = line_rdata[hit_way_idx]; end -`ifdef DBG_TRACE_CACHE - always @(posedge clk) begin - if (fill) begin - `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, line=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, fill_data)) - end - if (flush) begin - `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, line=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, dirty_byteen, dirty_data)) - end - if (read) begin - `TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, read_data, req_uuid)) - end - if (write) begin - `TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, write_byteen, write_data, req_uuid)) - end - end -`endif - endmodule diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index ff3ead64f..c8f89376a 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -24,7 +24,7 @@ // arrival and are dequeued in the same order. // Each entry has a next pointer to the next entry pending for the same cache line. // -// During the fill request, the MSHR will release the MSHR entry at fill_id +// During the fill request, the MSHR will dequue the MSHR entry at the fill_id location // which represents the first request in the pending list that initiated the memory fill. // // The dequeue response directly follows the fill request and will release @@ -35,7 +35,8 @@ // the slot id of the previous entry for the same cache line. This is used to // link the new entry to the pending list. // -// The release request is used to invalidate the allocated MSHR entry if we had a cache hit. +// The finalize request is used to persit or release the currently allocated MSHR entry +// if we had a cache miss or a hit, respectively. // // Warning: This MSHR implementation is strongly coupled with the bank pipeline // and as such changes to either module requires careful evaluation. @@ -56,8 +57,6 @@ module VX_cache_mshr #( parameter DATA_WIDTH = 1, // Enable cache writeback parameter WRITEBACK = 0, - // Cache stall on read during write - RDW_STALL = 0, parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE) ) ( @@ -67,7 +66,7 @@ module VX_cache_mshr #( `IGNORE_UNUSED_BEGIN input wire[`UP(UUID_WIDTH)-1:0] deq_req_uuid, input wire[`UP(UUID_WIDTH)-1:0] alc_req_uuid, - input wire[`UP(UUID_WIDTH)-1:0] rel_req_uuid, + input wire[`UP(UUID_WIDTH)-1:0] fin_req_uuid, `IGNORE_UNUSED_END // memory fill @@ -90,11 +89,15 @@ module VX_cache_mshr #( input wire [DATA_WIDTH-1:0] allocate_data, output wire [MSHR_ADDR_WIDTH-1:0] allocate_id, output wire allocate_pending, + output wire [MSHR_ADDR_WIDTH-1:0] allocate_previd, output wire allocate_ready, - // release - input wire release_valid, - input wire [MSHR_ADDR_WIDTH-1:0] release_id + // finalize + input wire finalize_valid, + input wire finalize_is_release, + input wire finalize_is_pending, + input wire [MSHR_ADDR_WIDTH-1:0] finalize_previd, + input wire [MSHR_ADDR_WIDTH-1:0] finalize_id ); `UNUSED_PARAM (BANK_ID) @@ -112,8 +115,6 @@ module VX_cache_mshr #( reg [MSHR_ADDR_WIDTH-1:0] dequeue_id_r, dequeue_id_n; wire [MSHR_ADDR_WIDTH-1:0] prev_idx; - reg [MSHR_ADDR_WIDTH-1:0] post_alloc_id, post_alloc_previd; - reg post_alloc_val; wire allocate_fire = allocate_valid && allocate_ready; wire dequeue_fire = dequeue_valid && dequeue_ready; @@ -157,19 +158,20 @@ module VX_cache_mshr #( valid_table_n[dequeue_id] = 0; if (next_table[dequeue_id]) begin dequeue_id_n = next_index[dequeue_id]; - end else if (!RDW_STALL && post_alloc_val && (post_alloc_previd == dequeue_id)) begin - dequeue_id_n = post_alloc_id; + end else if (finalize_valid && finalize_is_pending && (finalize_previd == dequeue_id)) begin + dequeue_id_n = finalize_id; end else begin dequeue_val_n = 0; end end - if (release_valid) begin - valid_table_n[release_id] = 0; - end - - if (post_alloc_val) begin - next_table_x[post_alloc_previd] = 1; + if (finalize_valid) begin + if (finalize_is_release) begin + valid_table_n[finalize_id] = 0; + end + if (finalize_is_pending) begin + next_table_x[finalize_previd] = 1; + end end next_table_n = next_table_x; @@ -184,12 +186,10 @@ module VX_cache_mshr #( valid_table <= '0; allocate_rdy <= 0; dequeue_val <= 0; - post_alloc_val <= 0; end else begin valid_table <= valid_table_n; allocate_rdy <= allocate_rdy_n; dequeue_val <= dequeue_val_n; - post_alloc_val <= allocate_fire && allocate_pending; end if (allocate_fire) begin @@ -197,22 +197,20 @@ module VX_cache_mshr #( write_table[allocate_id] <= allocate_rw; end - if (post_alloc_val) begin - next_index[post_alloc_previd] <= post_alloc_id; + if (finalize_valid && finalize_is_pending) begin + next_index[finalize_previd] <= finalize_id; end dequeue_id_r <= dequeue_id_n; allocate_id_r <= allocate_id_n; next_table <= next_table_n; - post_alloc_id <= allocate_id; - post_alloc_previd <= prev_idx; end - `RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, + `RUNTIME_ASSERT(~(allocate_fire && valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, alc_req_uuid)) - `RUNTIME_ASSERT((~release_valid || valid_table[release_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_table[release_id], BANK_ID), release_id, rel_req_uuid)) + `RUNTIME_ASSERT(~(finalize_valid && ~valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid)) `RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id)) @@ -220,7 +218,7 @@ module VX_cache_mshr #( VX_dp_ram #( .DATAW (DATA_WIDTH), .SIZE (MSHR_SIZE), - .RADDR_REG (1) + .OUT_REG (1) ) entries ( .clk (clk), .reset (reset), @@ -236,7 +234,9 @@ module VX_cache_mshr #( assign fill_addr = addr_table[fill_id]; assign allocate_ready = allocate_rdy; - assign allocate_id = allocate_id_r; + assign allocate_id = allocate_id_r; + assign allocate_previd = prev_idx; + if (WRITEBACK) begin : g_pending_wb assign allocate_pending = |addr_matches; end else begin : g_pending_wt @@ -255,14 +255,17 @@ module VX_cache_mshr #( if (reset) begin show_table <= 0; end else begin - show_table <= allocate_fire || post_alloc_val || release_valid || fill_valid || dequeue_fire; + show_table <= allocate_fire || finalize_valid || fill_valid || dequeue_fire; end if (allocate_fire) begin `TRACE(3, ("%t: %s allocate: addr=0x%0h, id=%0d, pending=%b, prev=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id, allocate_pending, prev_idx, alc_req_uuid)) end - if (release_valid) begin - `TRACE(3, ("%t: %s release: id=%0d (#%0d)\n", $time, INSTANCE_ID, release_id, rel_req_uuid)) + if (finalize_valid && finalize_is_release) begin + `TRACE(3, ("%t: %s release: id=%0d (#%0d)\n", $time, INSTANCE_ID, finalize_id, fin_req_uuid)) + end + if (finalize_valid && finalize_is_pending) begin + `TRACE(3, ("%t: %s finalize: id=%0d (#%0d)\n", $time, INSTANCE_ID, finalize_id, fin_req_uuid)) end if (fill_valid) begin `TRACE(3, ("%t: %s fill: addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 678f7af76..b7a1957ef 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -14,8 +14,6 @@ `include "VX_cache_define.vh" module VX_cache_tags #( - parameter `STRING INSTANCE_ID = "", - parameter BANK_ID = 0, // Size of cache in bytes parameter CACHE_SIZE = 1024, // Size of line inside a bank in bytes @@ -27,99 +25,86 @@ module VX_cache_tags #( // Size of a word in bytes parameter WORD_SIZE = 1, // Enable cache writeback - parameter WRITEBACK = 0, - // Request debug identifier - parameter UUID_WIDTH = 0 + parameter WRITEBACK = 0 ) ( input wire clk, input wire reset, + input wire stall, -`IGNORE_UNUSED_BEGIN - input wire [`UP(UUID_WIDTH)-1:0] req_uuid, -`IGNORE_UNUSED_END - - // init/fill/lookup + // inputs input wire init, input wire flush, input wire fill, - input wire write, input wire lookup, input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, - input wire [NUM_WAYS-1:0] way_idx, - output wire [NUM_WAYS-1:0] tag_matches, + input wire [NUM_WAYS-1:0] flush_way, - // eviction - output wire evict_dirty, + // outputs + output wire [NUM_WAYS-1:0] tag_matches_r, + output wire [`CS_TAG_SEL_BITS-1:0] line_tag_r, output wire [NUM_WAYS-1:0] evict_way, - output wire [`CS_TAG_SEL_BITS-1:0] evict_tag + output wire [NUM_WAYS-1:0] evict_way_r, + output wire [`CS_TAG_SEL_BITS-1:0] evict_tag_r ); - `UNUSED_SPARAM (INSTANCE_ID) - `UNUSED_PARAM (BANK_ID) - `UNUSED_VAR (lookup) - - // valid, dirty, tag - localparam TAG_WIDTH = 1 + WRITEBACK + `CS_TAG_SEL_BITS; + // valid, tag + localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS; wire [`CS_LINE_SEL_BITS-1:0] line_idx = line_addr[`CS_LINE_SEL_BITS-1:0]; wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr); wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag; wire [NUM_WAYS-1:0] read_valid; - wire [NUM_WAYS-1:0] read_dirty; - if (NUM_WAYS > 1) begin : g_evict_way - reg [NUM_WAYS-1:0] evict_way_r; + if (NUM_WAYS > 1) begin : g_evict_way + reg [NUM_WAYS-1:0] victim_way; // cyclic assignment of replacement way always @(posedge clk) begin if (reset) begin - evict_way_r <= 1; - end else if (lookup) begin - evict_way_r <= {evict_way_r[NUM_WAYS-2:0], evict_way_r[NUM_WAYS-1]}; + victim_way <= 1; + end else if (~stall) begin + victim_way <= {victim_way[NUM_WAYS-2:0], victim_way[NUM_WAYS-1]}; end end + assign evict_way = fill ? victim_way : flush_way; + `BUFFER_EX(evict_way_r, evict_way, ~stall, 1); + end else begin : g_evict_way_0 + `UNUSED_VAR (flush_way) + assign evict_way = 1'b1; + assign evict_way_r = 1'b1; + end - assign evict_way = fill ? evict_way_r : way_idx; - + if (WRITEBACK) begin : g_evict_tag_wb VX_onehot_mux #( .DATAW (`CS_TAG_SEL_BITS), .N (NUM_WAYS) ) evict_tag_sel ( .data_in (read_tag), - .sel_in (evict_way), - .data_out (evict_tag) + .sel_in (evict_way_r), + .data_out (evict_tag_r) ); - end else begin : g_evict_way_0 - assign evict_way = 1'b1; - assign evict_tag = read_tag; + end else begin : g_evict_tag_wt + assign evict_tag_r = '0; end for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store - wire do_fill = fill && evict_way[i]; - wire do_flush = flush && (!WRITEBACK || way_idx[i]); // flush the whole line in writethrough mode - wire do_write = WRITEBACK && write && tag_matches[i]; + wire do_fill = fill && evict_way[i]; + wire do_flush = flush && (!WRITEBACK || evict_way[i]); // flush the whole line in writethrough mode - wire line_read = (WRITEBACK && (fill || flush)); - wire line_write = init || do_fill || do_flush || do_write; - wire line_valid = ~(init || flush); + wire line_read = lookup || (WRITEBACK && (fill || flush)); + wire line_write = init || do_fill || do_flush; + wire line_valid = fill; wire [TAG_WIDTH-1:0] line_wdata; wire [TAG_WIDTH-1:0] line_rdata; - if (WRITEBACK) begin : g_writeback - assign line_wdata = {line_valid, write, line_tag}; - assign {read_valid[i], read_dirty[i], read_tag[i]} = line_rdata; - end else begin : g_writethrough - assign line_wdata = {line_valid, line_tag}; - assign {read_valid[i], read_tag[i]} = line_rdata; - assign read_dirty[i] = 1'b0; - end + assign line_wdata = {line_valid, line_tag}; + assign {read_valid[i], read_tag[i]} = line_rdata; VX_sp_ram #( .DATAW (TAG_WIDTH), .SIZE (`CS_LINES_PER_BANK), - .NO_RWCHECK (1), - .RW_ASSERT (1) + .OUT_REG (1) ) tag_store ( .clk (clk), .reset (reset), @@ -132,40 +117,10 @@ module VX_cache_tags #( ); end + `BUFFER_EX(line_tag_r, line_tag, ~stall, 1); + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_matches - assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]); + assign tag_matches_r[i] = read_valid[i] && (line_tag_r == read_tag[i]); end - assign evict_dirty = | (read_dirty & evict_way); - -`ifdef DBG_TRACE_CACHE - wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_idx}; - always @(posedge clk) begin - if (fill) begin - `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_idx, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) - end - if (init) begin - `TRACE(3, ("%t: %s init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx)) - end - if (flush) begin - `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, line=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_idx, line_idx, evict_dirty)) - end - if (lookup) begin - if (tag_matches != 0) begin - if (write) begin - `TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) - end else begin - `TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid)) - end - end else begin - if (write) begin - `TRACE(3, ("%t: %s write-miss: addr=0x%0h, line=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid)) - end else begin - `TRACE(3, ("%t: %s read-miss: addr=0x%0h, line=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid)) - end - end - end - end -`endif - endmodule From 684f2e2d3d118efcc7e1b650c905f110381f3f5b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 03:42:51 -0700 Subject: [PATCH 359/488] minor update --- hw/rtl/core/VX_ipdom_stack.sv | 45 ++++++++++------------------------- hw/rtl/libs/VX_fifo_queue.sv | 10 ++++---- 2 files changed, 16 insertions(+), 39 deletions(-) diff --git a/hw/rtl/core/VX_ipdom_stack.sv b/hw/rtl/core/VX_ipdom_stack.sv index 04efd91d3..9bc39b864 100644 --- a/hw/rtl/core/VX_ipdom_stack.sv +++ b/hw/rtl/core/VX_ipdom_stack.sv @@ -30,22 +30,20 @@ module VX_ipdom_stack #( output wire empty, output wire full ); - reg slot_set [DEPTH-1:0]; - reg [ADDRW-1:0] rd_ptr, rd_ptr_n, wr_ptr; reg empty_r, full_r; wire [WIDTH-1:0] d0, d1; - wire d_set_n = slot_set[rd_ptr]; + wire d_set_r; always @(*) begin rd_ptr_n = rd_ptr; if (push) begin rd_ptr_n = wr_ptr; end else if (pop) begin - rd_ptr_n = rd_ptr - ADDRW'(d_set_n); + rd_ptr_n = rd_ptr - ADDRW'(d_set_r); end end @@ -64,49 +62,30 @@ module VX_ipdom_stack #( empty_r <= 0; full_r <= (ADDRW'(DEPTH-1) == wr_ptr); end else if (pop) begin - wr_ptr <= wr_ptr - ADDRW'(d_set_n); - empty_r <= (rd_ptr == 0) && (d_set_n == 1); + wr_ptr <= wr_ptr - ADDRW'(d_set_r); + empty_r <= (rd_ptr == 0) && d_set_r; full_r <= 0; end rd_ptr <= rd_ptr_n; end end + wire [WIDTH * 2:0] qout = push ? {1'b0, q1, q0} : {1'b1, d1, d0}; + VX_dp_ram #( - .DATAW (WIDTH * 2), + .DATAW (1 + WIDTH * 2), .SIZE (DEPTH), - .RADDR_REG (1) + .OUT_REG (1) ) store ( .clk (clk), .reset (reset), .read (1'b1), - .write (push), + .write (push || pop), .wren (1'b1), - .waddr (wr_ptr), - .wdata ({q1, q0}), + .waddr (push ? wr_ptr : rd_ptr), + .wdata (qout), .raddr (rd_ptr_n), - .rdata ({d1, d0}) - ); - - always @(posedge clk) begin - if (push) begin - slot_set[wr_ptr] <= 0; - end else if (pop) begin - slot_set[rd_ptr] <= 1; - end - end - - wire d_set_r; - - VX_pipe_register #( - .DATAW (1), - .DEPTH (0) - ) pipe_reg ( - .clk (clk), - .reset (reset), - .enable (1'b1), - .data_in (d_set_n), - .data_out (d_set_r) + .rdata ({d_set_r, d1, d0}) ); assign d = d_set_r ? d0 : d1; diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 03521ce1a..8af35bc7b 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -110,8 +110,7 @@ module VX_fifo_queue #( VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .LUTRAM (LUTRAM), - .RADDR_REG (1) + .LUTRAM (LUTRAM) ) dp_ram ( .clk (clk), .reset (reset), @@ -120,7 +119,7 @@ module VX_fifo_queue #( .wren (1'b1), .waddr (wr_ptr_r), .wdata (data_in), - .raddr (rd_ptr_n_n), + .raddr (rd_ptr_n_r), .rdata (dout) ); @@ -158,8 +157,7 @@ module VX_fifo_queue #( VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .LUTRAM (LUTRAM), - .RADDR_REG (1) + .LUTRAM (LUTRAM) ) dp_ram ( .clk (clk), .reset (reset), @@ -168,7 +166,7 @@ module VX_fifo_queue #( .wren (1'b1), .waddr (wr_ptr_r), .wdata (data_in), - .raddr (rd_ptr_n), + .raddr (rd_ptr_r), .rdata (data_out) ); From 9f32e5693c012019984d7f04c1ae8f504cf1ad79 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 10:41:32 -0700 Subject: [PATCH 360/488] minor update --- hw/syn/xilinx/sandbox/Makefile | 5 +---- hw/syn/xilinx/xrt/Makefile | 6 +----- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/hw/syn/xilinx/sandbox/Makefile b/hw/syn/xilinx/sandbox/Makefile index e4def9c4e..074fcb87c 100644 --- a/hw/syn/xilinx/sandbox/Makefile +++ b/hw/syn/xilinx/sandbox/Makefile @@ -24,11 +24,8 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif -TEX_INCLUDE = -I$(RTL_DIR)/tex -RASTER_INCLUDE = -I$(RTL_DIR)/raster -OM_INCLUDE = -I$(RTL_DIR)/om RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(OM_INCLUDE) +RTL_INCLUDE += $(FPU_INCLUDE) RTL_INCLUDE += -I$(SRC_DIR) # compilation flags diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index f5997352c..2517f2777 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -76,17 +76,13 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c) # include sources RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv -RTL_PKGS += $(RTL_DIR)/tex/VX_tex_pkg.sv $(RTL_DIR)/raster/VX_raster_pkg.sv $(RTL_DIR)/om/VX_om_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif -TEX_INCLUDE = -I$(RTL_DIR)/tex -RASTER_INCLUDE = -I$(RTL_DIR)/raster -OM_INCLUDE = -I$(RTL_DIR)/om RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(OM_INCLUDE) +RTL_INCLUDE += $(FPU_INCLUDE) # Kernel compiler global settings VPP_FLAGS += --link --target $(TARGET) --platform $(PLATFORM) --save-temps --no_ip_cache From 37f4d053937534b9a6275a584c5d9081e6b7e496 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 10:44:04 -0700 Subject: [PATCH 361/488] minor update --- hw/rtl/cache/VX_cache_bank.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index d32e9423f..7f1153ede 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -693,8 +693,8 @@ module VX_cache_bank #( /////////////////////////////////////////////////////////////////////////////// `ifdef PERF_ENABLE - assign perf_read_misses = do_read_miss_st1; - assign perf_write_misses = do_write_miss_st1; + assign perf_read_misses = do_read_st1 && ~is_hit_st1; + assign perf_write_misses = do_write_st1 && ~is_hit_st1; assign perf_mshr_stalls = mshr_alm_full; `endif From 1d626588ef79a6862f8148cf48198b1873dde435 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 11:49:12 -0700 Subject: [PATCH 362/488] minor update --- hw/rtl/cache/VX_cache_mshr.sv | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index c8f89376a..10c2c948b 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -169,6 +169,9 @@ module VX_cache_mshr #( if (finalize_is_release) begin valid_table_n[finalize_id] = 0; end + // warning: This code allows 'finalize_is_pending' to be asserted regardless of hit/miss + // to reduce the its propagation delay into the MSHR. this is safe because wrong updates + // to 'next_table_n' will be cleared during 'allocate_fire' below. if (finalize_is_pending) begin next_table_x[finalize_previd] = 1; end From 9e5638c9b082ca567ea7796d2e735fbcc69c4126 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 12:06:55 -0700 Subject: [PATCH 363/488] minor update --- hw/rtl/libs/VX_fifo_queue.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 8af35bc7b..99efd3d38 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -83,7 +83,7 @@ module VX_fifo_queue #( reg [ADDRW-1:0] rd_ptr_n_r, rd_ptr_n_n; always @(*) begin - rd_ptr_n_n = rd_ptr_r; + rd_ptr_n_n = rd_ptr_n_r; if (pop) begin if (DEPTH > 2) begin rd_ptr_n_n = rd_ptr_r + ADDRW'(2); @@ -97,7 +97,7 @@ module VX_fifo_queue #( if (reset) begin wr_ptr_r <= '0; rd_ptr_r <= '0; - rd_ptr_n_r <= '0; + rd_ptr_n_r <= 1; end else begin wr_ptr_r <= wr_ptr_r + ADDRW'(push); if (pop) begin From f63233334e3545893fe5053da9abede3def8eb09 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 16:22:59 -0700 Subject: [PATCH 364/488] minor update --- hw/rtl/cache/VX_cache_mshr.sv | 2 +- hw/rtl/libs/VX_axi_adapter.sv | 22 +++++++++++--------- sim/xrtsim/xrt_sim.cpp | 38 ++++++++++++++++++++++------------- 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index 10c2c948b..c94cf8e65 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -215,7 +215,7 @@ module VX_cache_mshr #( `RUNTIME_ASSERT(~(finalize_valid && ~valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid)) - `RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID, + `RUNTIME_ASSERT(~(fill_valid && ~valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id)) VX_dp_ram #( diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index 255789fd7..162b0581a 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -135,7 +135,7 @@ module VX_axi_adapter #( ); end - wire tbuf_full; + wire mem_req_tag_ready; wire [TAG_WIDTH_OUT-1:0] mem_req_tag_out; wire [TAG_WIDTH_OUT-1:0] mem_rsp_tag_out; @@ -143,13 +143,14 @@ module VX_axi_adapter #( if (TAG_WIDTH_IN > TAG_WIDTH_OUT) begin : g_tag_buf localparam TBUF_ADDRW = `CLOG2(TAG_BUFFER_SIZE); wire [TBUF_ADDRW-1:0] tbuf_waddr, tbuf_raddr; + wire tbuf_full; VX_index_buffer #( .DATAW (TAG_WIDTH_IN), .SIZE (TAG_BUFFER_SIZE) ) tag_buf ( .clk (clk), .reset (reset), - .acquire_en (mem_req_valid && !mem_req_rw && mem_req_ready), + .acquire_en (mem_req_valid && ~mem_req_rw && mem_req_ready), .write_addr (tbuf_waddr), .write_data (mem_req_tag), .read_data (mem_rsp_tag), @@ -158,22 +159,24 @@ module VX_axi_adapter #( .full (tbuf_full), `UNUSED_PIN (empty) ); + assign mem_req_tag_ready = mem_req_rw || ~tbuf_full; assign mem_req_tag_out = TAG_WIDTH_OUT'(tbuf_waddr); assign tbuf_raddr = mem_rsp_tag_out[TBUF_ADDRW-1:0]; `UNUSED_VAR (mem_rsp_tag_out) end else begin : g_no_tag_buf - assign tbuf_full = 0; + assign mem_req_tag_ready = 1; assign mem_req_tag_out = TAG_WIDTH_OUT'(mem_req_tag); assign mem_rsp_tag = mem_rsp_tag_out[TAG_WIDTH_IN-1:0]; `UNUSED_VAR (mem_rsp_tag_out) end // request ack - assign mem_req_ready = (mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]) && ~tbuf_full; + assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : + (m_axi_arready[req_bank_sel] && mem_req_tag_ready); // AXI write request address channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr - assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~tbuf_full && ~m_axi_aw_ack[i]; + assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i]; assign m_axi_awaddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8); assign m_axi_awid[i] = mem_req_tag_out; assign m_axi_awlen[i] = 8'b00000000; @@ -188,7 +191,7 @@ module VX_axi_adapter #( // AXI write request data channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_data - assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~tbuf_full && ~m_axi_w_ack[i]; + assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_w_ack[i]; assign m_axi_wdata[i] = mem_req_data; assign m_axi_wstrb[i] = mem_req_byteen; assign m_axi_wlast[i] = 1'b1; @@ -205,7 +208,7 @@ module VX_axi_adapter #( // AXI read request channel for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req - assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i) && ~tbuf_full; + assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i) && mem_req_tag_ready; assign m_axi_araddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8); assign m_axi_arid[i] = mem_req_tag_out; assign m_axi_arlen[i] = 8'b00000000; @@ -228,9 +231,8 @@ module VX_axi_adapter #( assign rsp_arb_valid_in[i] = m_axi_rvalid[i]; assign rsp_arb_data_in[i] = {m_axi_rdata[i], m_axi_rid[i]}; assign m_axi_rready[i] = rsp_arb_ready_in[i]; - `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time)) - `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time)) - `UNUSED_VAR (m_axi_rlast[i]) + `RUNTIME_ASSERT(~(m_axi_rvalid[i] && m_axi_rlast[i] == 0), ("%t: *** AXI response error", $time)) + `RUNTIME_ASSERT(~(m_axi_rvalid[i] && m_axi_rresp[i] != 0), ("%t: *** AXI response error", $time)) end VX_stream_arb #( diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index d572b9479..cd2e1b90c 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -333,6 +333,8 @@ private: } device_->ap_rst_n = 1; + + // this AXI device is always ready to accept new requests for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { *m_axi_mem_[i].arready = 1; *m_axi_mem_[i].awready = 1; @@ -381,53 +383,56 @@ private: } void axi_ctrl_bus_reset() { - // address read request + // read request address device_->s_axi_ctrl_arvalid = 0; device_->s_axi_ctrl_araddr = 0; - // data read response + // read response device_->s_axi_ctrl_rready = 0; - // address write request + // write request address device_->s_axi_ctrl_awvalid = 0; device_->s_axi_ctrl_awaddr = 0; - // data write request + // write request data device_->s_axi_ctrl_wvalid = 0; device_->s_axi_ctrl_wdata = 0; device_->s_axi_ctrl_wstrb = 0; - // data write response + // write response device_->s_axi_ctrl_bready = 0; } void axi_mem_bus_reset() { for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { - // address read request + // read request address *m_axi_mem_[i].arready = 0; - // address write request + // write request address *m_axi_mem_[i].awready = 0; - // data write request + // write request data *m_axi_mem_[i].wready = 0; - // data read response + // read response *m_axi_mem_[i].rvalid = 0; - // data write response + // write response *m_axi_mem_[i].bvalid = 0; // states m_axi_states_[i].write_req_pending = false; + m_axi_states_[i].write_rsp_pending = false; + m_axi_states_[i].read_rsp_pending = false; } } void axi_mem_bus_eval() { for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { // handle read responses - if (*m_axi_mem_[i].rvalid && *m_axi_mem_[i].rready) { - *m_axi_mem_[i].rvalid = 0; + if (*m_axi_mem_[i].rvalid && (*m_axi_mem_[i].rready || ~m_axi_states_[i].read_rsp_pending)) { + *m_axi_mem_[i].rvalid = 0; + m_axi_states_[i].read_rsp_pending = false; } if (!*m_axi_mem_[i].rvalid) { if (!pending_mem_reqs_[i].empty() @@ -441,13 +446,15 @@ private: *m_axi_mem_[i].rlast = 1; memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE); pending_mem_reqs_[i].erase(mem_rsp_it); + m_axi_states_[i].read_rsp_pending = !*m_axi_mem_[i].rready; delete mem_rsp; } } // handle write responses - if (*m_axi_mem_[i].bvalid && *m_axi_mem_[i].bready) { + if (*m_axi_mem_[i].bvalid && (*m_axi_mem_[i].bready || ~m_axi_states_[i].write_rsp_pending)) { *m_axi_mem_[i].bvalid = 0; + m_axi_states_[i].write_rsp_pending = false; } if (!*m_axi_mem_[i].bvalid) { if (!pending_mem_reqs_[i].empty() @@ -459,6 +466,7 @@ private: *m_axi_mem_[i].bid = mem_rsp->tag; *m_axi_mem_[i].bresp = 0; pending_mem_reqs_[i].erase(mem_rsp_it); + m_axi_states_[i].write_rsp_pending = !*m_axi_mem_[i].bready; delete mem_rsp; } } @@ -487,7 +495,7 @@ private: *m_axi_mem_[i].wready = 0; } - // handle address write requestsls + // handle address write requestsls if (*m_axi_mem_[i].awvalid && *m_axi_mem_[i].awready && !*m_axi_mem_[i].wready) { m_axi_states_[i].write_req_addr = *m_axi_mem_[i].awaddr; m_axi_states_[i].write_req_tag = *m_axi_mem_[i].awid; @@ -537,6 +545,8 @@ private: uint64_t write_req_addr; uint32_t write_req_tag; bool write_req_pending; + bool write_rsp_pending; + bool read_rsp_pending; } m_axi_state_t; typedef struct { From 26df675e24e1bc05deb3610b1425f413f41364e6 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 20:08:38 -0700 Subject: [PATCH 365/488] minor update --- sim/xrtsim/xrt_sim.cpp | 181 +++++++++++++++++++++-------------------- 1 file changed, 95 insertions(+), 86 deletions(-) diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index cd2e1b90c..8dd800931 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -338,11 +338,22 @@ private: for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { *m_axi_mem_[i].arready = 1; *m_axi_mem_[i].awready = 1; + *m_axi_mem_[i].wready = 1; } } void tick() { - this->axi_mem_bus_eval(); + device_->ap_clk = 0; + this->eval(); + + this->axi_mem_bus_eval(0); + + device_->ap_clk = 1; + this->eval(); + + this->axi_mem_bus_eval(1); + + dram_sim_.tick(); for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { if (!dram_queues_[i].empty()) { @@ -360,13 +371,6 @@ private: } } - dram_sim_.tick(); - - device_->ap_clk = 0; - this->eval(); - device_->ap_clk = 1; - this->eval(); - #ifndef NDEBUG fflush(stdout); #endif @@ -404,149 +408,154 @@ private: } void axi_mem_bus_reset() { - for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { + for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) { // read request address - *m_axi_mem_[i].arready = 0; + *m_axi_mem_[b].arready = 0; // write request address - *m_axi_mem_[i].awready = 0; + *m_axi_mem_[b].awready = 0; // write request data - *m_axi_mem_[i].wready = 0; + *m_axi_mem_[b].wready = 0; // read response - *m_axi_mem_[i].rvalid = 0; + *m_axi_mem_[b].rvalid = 0; // write response - *m_axi_mem_[i].bvalid = 0; + *m_axi_mem_[b].bvalid = 0; // states - m_axi_states_[i].write_req_pending = false; - m_axi_states_[i].write_rsp_pending = false; - m_axi_states_[i].read_rsp_pending = false; + m_axi_states_[b].write_req_addr_ack = false; + m_axi_states_[b].write_req_data_ack = false; } } - void axi_mem_bus_eval() { - for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { - // handle read responses - if (*m_axi_mem_[i].rvalid && (*m_axi_mem_[i].rready || ~m_axi_states_[i].read_rsp_pending)) { - *m_axi_mem_[i].rvalid = 0; - m_axi_states_[i].read_rsp_pending = false; + void axi_mem_bus_eval(bool clk) { + if (!clk) { + for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) { + m_axi_states_[b].read_rsp_ready = *m_axi_mem_[b].rready; + m_axi_states_[b].write_rsp_ready = *m_axi_mem_[b].bready; } - if (!*m_axi_mem_[i].rvalid) { - if (!pending_mem_reqs_[i].empty() - && (*pending_mem_reqs_[i].begin())->ready - && !(*pending_mem_reqs_[i].begin())->write) { - auto mem_rsp_it = pending_mem_reqs_[i].begin(); + return; + } + + for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) { + // handle read responses + if (*m_axi_mem_[b].rvalid && m_axi_states_[b].read_rsp_ready) { + *m_axi_mem_[b].rvalid = 0; + } + if (!*m_axi_mem_[b].rvalid) { + if (!pending_mem_reqs_[b].empty() + && (*pending_mem_reqs_[b].begin())->ready + && !(*pending_mem_reqs_[b].begin())->write) { + auto mem_rsp_it = pending_mem_reqs_[b].begin(); auto mem_rsp = *mem_rsp_it; - *m_axi_mem_[i].rvalid = 1; - *m_axi_mem_[i].rid = mem_rsp->tag; - *m_axi_mem_[i].rresp = 0; - *m_axi_mem_[i].rlast = 1; - memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE); - pending_mem_reqs_[i].erase(mem_rsp_it); - m_axi_states_[i].read_rsp_pending = !*m_axi_mem_[i].rready; + *m_axi_mem_[b].rvalid = 1; + *m_axi_mem_[b].rid = mem_rsp->tag; + *m_axi_mem_[b].rresp = 0; + *m_axi_mem_[b].rlast = 1; + memcpy(m_axi_mem_[b].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE); + pending_mem_reqs_[b].erase(mem_rsp_it); delete mem_rsp; } } // handle write responses - if (*m_axi_mem_[i].bvalid && (*m_axi_mem_[i].bready || ~m_axi_states_[i].write_rsp_pending)) { - *m_axi_mem_[i].bvalid = 0; - m_axi_states_[i].write_rsp_pending = false; + if (*m_axi_mem_[b].bvalid && m_axi_states_[b].write_rsp_ready) { + *m_axi_mem_[b].bvalid = 0; } - if (!*m_axi_mem_[i].bvalid) { - if (!pending_mem_reqs_[i].empty() - && (*pending_mem_reqs_[i].begin())->ready - && (*pending_mem_reqs_[i].begin())->write) { - auto mem_rsp_it = pending_mem_reqs_[i].begin(); + if (!*m_axi_mem_[b].bvalid) { + if (!pending_mem_reqs_[b].empty() + && (*pending_mem_reqs_[b].begin())->ready + && (*pending_mem_reqs_[b].begin())->write) { + auto mem_rsp_it = pending_mem_reqs_[b].begin(); auto mem_rsp = *mem_rsp_it; - *m_axi_mem_[i].bvalid = 1; - *m_axi_mem_[i].bid = mem_rsp->tag; - *m_axi_mem_[i].bresp = 0; - pending_mem_reqs_[i].erase(mem_rsp_it); - m_axi_states_[i].write_rsp_pending = !*m_axi_mem_[i].bready; + *m_axi_mem_[b].bvalid = 1; + *m_axi_mem_[b].bid = mem_rsp->tag; + *m_axi_mem_[b].bresp = 0; + pending_mem_reqs_[b].erase(mem_rsp_it); delete mem_rsp; } } // handle read requests - if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) { + if (*m_axi_mem_[b].arvalid && *m_axi_mem_[b].arready) { auto mem_req = new mem_req_t(); - mem_req->tag = *m_axi_mem_[i].arid; - mem_req->addr = uint64_t(*m_axi_mem_[i].araddr); + mem_req->tag = *m_axi_mem_[b].arid; + mem_req->addr = uint64_t(*m_axi_mem_[b].araddr); ram_->read(mem_req->data.data(), mem_req->addr, PLATFORM_MEMORY_DATA_SIZE); mem_req->write = false; mem_req->ready = false; - pending_mem_reqs_[i].emplace_back(mem_req); + pending_mem_reqs_[b].emplace_back(mem_req); - /*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, mem_req->tag); + /*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, b, mem_req->addr, mem_req->tag); for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) { - printf("%02x", mem_req->data[i]); + printf("%02x", mem_req->data[b]); } printf("\n");*/ // send dram request - dram_queues_[i].push(mem_req); + dram_queues_[b].push(mem_req); } - if (*m_axi_mem_[i].wready && !m_axi_states_[i].write_req_pending) { - *m_axi_mem_[i].wready = 0; + // handle write address requests + if (*m_axi_mem_[b].awvalid && *m_axi_mem_[b].awready && !m_axi_states_[b].write_req_addr_ack) { + m_axi_states_[b].write_req_addr = *m_axi_mem_[b].awaddr; + m_axi_states_[b].write_req_tag = *m_axi_mem_[b].awid; + m_axi_states_[b].write_req_addr_ack = true; } - // handle address write requestsls - if (*m_axi_mem_[i].awvalid && *m_axi_mem_[i].awready && !*m_axi_mem_[i].wready) { - m_axi_states_[i].write_req_addr = *m_axi_mem_[i].awaddr; - m_axi_states_[i].write_req_tag = *m_axi_mem_[i].awid; - // activate data channel - *m_axi_mem_[i].wready = 1; - m_axi_states_[i].write_req_pending = !*m_axi_mem_[i].wvalid; + // handle write data requests + if (*m_axi_mem_[b].wvalid && *m_axi_mem_[b].wready && !m_axi_states_[b].write_req_data_ack) { + m_axi_states_[b].write_req_byteen = *m_axi_mem_[b].wstrb; + auto data = (const uint8_t*)m_axi_mem_[b].wdata->data(); + for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; ++i) { + m_axi_states_[b].write_req_data[i] = data[i]; + } + m_axi_states_[b].write_req_data_ack = true; } - // handle data write requests - if (*m_axi_mem_[i].wvalid && *m_axi_mem_[i].wready) { - auto byteen = *m_axi_mem_[i].wstrb; - auto data = (uint8_t*)m_axi_mem_[i].wdata->data(); - auto byte_addr = m_axi_states_[i].write_req_addr; - - for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) { + // handle write requests + if (m_axi_states_[b].write_req_addr_ack && m_axi_states_[b].write_req_data_ack) { + auto byteen = m_axi_states_[b].write_req_byteen; + auto byte_addr = m_axi_states_[b].write_req_addr; + for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; ++i) { if ((byteen >> i) & 0x1) { - (*ram_)[byte_addr + i] = data[i]; + (*ram_)[byte_addr + i] = m_axi_states_[b].write_req_data[i]; } } - auto mem_req = new mem_req_t(); - mem_req->tag = m_axi_states_[i].write_req_tag; + mem_req->tag = m_axi_states_[b].write_req_tag; mem_req->addr = byte_addr; mem_req->write = true; mem_req->ready = false; - pending_mem_reqs_[i].emplace_back(mem_req); + pending_mem_reqs_[b].emplace_back(mem_req); - /*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, byteen, mem_req->tag); + /*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, b, mem_req->addr, byteen, mem_req->tag); for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) { - printf("%02x", data[i]); + printf("%02x", m_axi_states_[b].write_req_data[i]]); } printf("\n");*/ // send dram request - dram_queues_[i].push(mem_req); + dram_queues_[b].push(mem_req); - // deactivate data channel - if (m_axi_states_[i].write_req_pending) { - *m_axi_mem_[i].wready = 0; - m_axi_states_[i].write_req_pending = false; - } + // clear acks + m_axi_states_[b].write_req_addr_ack = false; + m_axi_states_[b].write_req_data_ack = false; } } } typedef struct { + std::array write_req_data; + uint64_t write_req_byteen; uint64_t write_req_addr; uint32_t write_req_tag; - bool write_req_pending; - bool write_rsp_pending; - bool read_rsp_pending; + bool read_rsp_ready; + bool write_rsp_ready; + bool write_req_addr_ack; + bool write_req_data_ack; } m_axi_state_t; typedef struct { From 2a2fc2ae3934e912313a8d1b567d9457a405bde8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 23:25:41 -0700 Subject: [PATCH 366/488] minor update --- ci/regression.sh.in | 2 +- hw/rtl/VX_config.vh | 6 +++--- hw/rtl/cache/VX_cache_mshr.sv | 2 +- hw/rtl/core/VX_ipdom_stack.sv | 2 +- hw/rtl/mem/VX_local_mem.sv | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index ddd4f12bd..662b40717 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -154,7 +154,7 @@ cache() CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx # test writeback - CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress + CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=0 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=simx --app=mstress CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 4f666ce20..a4e48da5f 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -544,7 +544,7 @@ // Enable Cache Dirty bytes `ifndef DCACHE_DIRTYBYTES -`define DCACHE_DIRTYBYTES 0 +`define DCACHE_DIRTYBYTES 1 `endif // LMEM Configurable Knobs //////////////////////////////////////////////////// @@ -609,7 +609,7 @@ // Enable Cache Dirty bytes `ifndef L2_DIRTYBYTES -`define L2_DIRTYBYTES 0 +`define L2_DIRTYBYTES 1 `endif // L3cache Configurable Knobs ///////////////////////////////////////////////// @@ -656,7 +656,7 @@ // Enable Cache Dirty bytes `ifndef L3_DIRTYBYTES -`define L3_DIRTYBYTES 0 +`define L3_DIRTYBYTES 1 `endif `ifndef MEMORY_BANKS diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index c94cf8e65..ae6ebb7fe 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -222,7 +222,7 @@ module VX_cache_mshr #( .DATAW (DATA_WIDTH), .SIZE (MSHR_SIZE), .OUT_REG (1) - ) entries ( + ) mshr_store ( .clk (clk), .reset (reset), .read (1'b1), diff --git a/hw/rtl/core/VX_ipdom_stack.sv b/hw/rtl/core/VX_ipdom_stack.sv index 9bc39b864..d5d000132 100644 --- a/hw/rtl/core/VX_ipdom_stack.sv +++ b/hw/rtl/core/VX_ipdom_stack.sv @@ -76,7 +76,7 @@ module VX_ipdom_stack #( .DATAW (1 + WIDTH * 2), .SIZE (DEPTH), .OUT_REG (1) - ) store ( + ) ipdom_store ( .clk (clk), .reset (reset), .read (1'b1), diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 7131c3f21..2ba66347e 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -169,7 +169,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .OUT_REG (1), .READ_ENABLE (0), .NO_RWCHECK (1) - ) data_store ( + ) lmem_store ( .clk (clk), .reset (reset), .read (per_bank_req_valid[i] && per_bank_req_ready[i] && ~per_bank_req_rw[i]), From fe5442dbb3594e74136e12b7645dad87d8e905eb Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 13 Oct 2024 23:34:57 -0700 Subject: [PATCH 367/488] minor update --- hw/rtl/VX_config.vh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index a4e48da5f..0cff1810e 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -544,7 +544,7 @@ // Enable Cache Dirty bytes `ifndef DCACHE_DIRTYBYTES -`define DCACHE_DIRTYBYTES 1 +`define DCACHE_DIRTYBYTES `DCACHE_WRITEBACK `endif // LMEM Configurable Knobs //////////////////////////////////////////////////// @@ -609,7 +609,7 @@ // Enable Cache Dirty bytes `ifndef L2_DIRTYBYTES -`define L2_DIRTYBYTES 1 +`define L2_DIRTYBYTES `L2_WRITEBACK `endif // L3cache Configurable Knobs ///////////////////////////////////////////////// @@ -656,7 +656,7 @@ // Enable Cache Dirty bytes `ifndef L3_DIRTYBYTES -`define L3_DIRTYBYTES 1 +`define L3_DIRTYBYTES `L3_WRITEBACK `endif `ifndef MEMORY_BANKS From 0d044230742312de09c3ddf0dc3b9836a6cd2d7b Mon Sep 17 00:00:00 2001 From: MichaelJSr Date: Mon, 14 Oct 2024 10:12:33 -0700 Subject: [PATCH 368/488] Readded the ecall and ebreak instruction traps so that the riscv-vector tests run properly --- sim/simx/emulator.cpp | 12 ++++++++++++ sim/simx/emulator.h | 4 ++++ sim/simx/execute.cpp | 4 ++++ 3 files changed, 20 insertions(+) diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 4fc066d66..05b3497c4 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -625,3 +625,15 @@ void Emulator::update_fcrs(uint32_t fflags, uint32_t tid, uint32_t wid) { this->set_csr(VX_CSR_FFLAGS, this->get_csr(VX_CSR_FFLAGS, tid, wid) | fflags, tid, wid); } } + +// For riscv-vector test functionality, ecall and ebreak must trap +// These instructions are used in the vector tests to stop execution of the test +// Therefore, without these instructions, undefined and incorrect behavior happens +// +// For now, we need these instructions to trap for testing the riscv-vector isa +void Emulator::trigger_ecall() { + active_warps_.reset(); +} +void Emulator::trigger_ebreak() { + active_warps_.reset(); +} \ No newline at end of file diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index d1b14daca..5f1b91d5d 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -122,6 +122,10 @@ private: void update_fcrs(uint32_t fflags, uint32_t tid, uint32_t wid); + void trigger_ecall(); // Re-added for riscv-vector test functionality + + void trigger_ebreak(); // Re-added for riscv-vector test functionality + const Arch& arch_; const DCRS& dcrs_; Core* core_; diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index e70d45cb2..dd8253571 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -830,7 +830,11 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { trace->fetch_stall = true; switch (csr_addr) { case 0x000: // RV32I: ECALL + this->trigger_ecall(); // Re-added for riscv-vector test functionality + break; case 0x001: // RV32I: EBREAK + this->trigger_ebreak(); // Re-added for riscv-vector test functionality + break; case 0x002: // RV32I: URET case 0x102: // RV32I: SRET case 0x302: // RV32I: MRET From 37757fab8ffac71df2a8cc8a6d52de547184bdb7 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 14 Oct 2024 15:48:49 -0700 Subject: [PATCH 369/488] fixed fifo_queue support for BRAM --- hw/rtl/VX_config.vh | 12 +-- hw/rtl/cache/VX_cache.sv | 4 +- hw/rtl/core/VX_ibuffer.sv | 2 +- hw/rtl/libs/VX_fifo_queue.sv | 152 ++++++++++++++++------------------- 4 files changed, 80 insertions(+), 90 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 0cff1810e..da05fc9e9 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -480,7 +480,7 @@ // Number of Associative Ways `ifndef ICACHE_NUM_WAYS -`define ICACHE_NUM_WAYS 1 +`define ICACHE_NUM_WAYS 4 `endif // Dcache Configurable Knobs ////////////////////////////////////////////////// @@ -529,12 +529,12 @@ // Memory Response Queue Size `ifndef DCACHE_MRSQ_SIZE -`define DCACHE_MRSQ_SIZE 0 +`define DCACHE_MRSQ_SIZE 4 `endif // Number of Associative Ways `ifndef DCACHE_NUM_WAYS -`define DCACHE_NUM_WAYS 1 +`define DCACHE_NUM_WAYS 4 `endif // Enable Cache Writeback @@ -594,12 +594,12 @@ // Memory Response Queue Size `ifndef L2_MRSQ_SIZE -`define L2_MRSQ_SIZE 0 +`define L2_MRSQ_SIZE 4 `endif // Number of Associative Ways `ifndef L2_NUM_WAYS -`define L2_NUM_WAYS 2 +`define L2_NUM_WAYS 4 `endif // Enable Cache Writeback @@ -641,7 +641,7 @@ // Memory Response Queue Size `ifndef L3_MRSQ_SIZE -`define L3_MRSQ_SIZE 0 +`define L3_MRSQ_SIZE 4 `endif // Number of Associative Ways diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index d749e6ee9..c31699c1e 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -402,8 +402,8 @@ module VX_cache import VX_gpu_pkg::*; #( .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (TAG_WIDTH), .FLAGS_WIDTH (FLAGS_WIDTH), - .CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(CORE_OUT_BUF)), - .MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF)) + .CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : 1), + .MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : 1) ) bank ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_ibuffer.sv b/hw/rtl/core/VX_ibuffer.sv index e1a9457de..abb261b7e 100644 --- a/hw/rtl/core/VX_ibuffer.sv +++ b/hw/rtl/core/VX_ibuffer.sv @@ -39,7 +39,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #( VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`IBUF_SIZE), - .OUT_REG (2) // 2-cycle EB for area reduction + .OUT_REG (1) ) instr_buf ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 99efd3d38..ca1185780 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -42,6 +42,9 @@ module VX_fifo_queue #( `STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!")) `STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!")) + `UNUSED_PARAM (OUT_REG) + `UNUSED_PARAM (LUTRAM) + VX_pending_size #( .SIZE (DEPTH), .ALM_EMPTY (ALM_EMPTY), @@ -74,102 +77,89 @@ module VX_fifo_queue #( localparam ADDRW = `CLOG2(DEPTH); + wire [DATAW-1:0] data_out_w; + reg [ADDRW-1:0] rd_ptr_r, rd_ptr_n; + reg [ADDRW-1:0] wr_ptr_r; + + always @(*) begin + rd_ptr_n = rd_ptr_r + ADDRW'(pop); + end + + always @(posedge clk) begin + if (reset) begin + wr_ptr_r <= '0; + rd_ptr_r <= (OUT_REG != 0) ? 1 : 0; + end else begin + wr_ptr_r <= wr_ptr_r + ADDRW'(push); + rd_ptr_r <= rd_ptr_n; + end + end + + wire [ADDRW-1:0] rd_ptr_w = LUTRAM ? rd_ptr_r : rd_ptr_n; + + wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); + wire bypass = push && (empty || (going_empty && pop)); + wire read = ((OUT_REG != 0) || !LUTRAM) ? ~bypass : pop; + + VX_dp_ram #( + .DATAW (DATAW), + .SIZE (DEPTH), + .LUTRAM (LUTRAM), + .OUT_REG(!LUTRAM) + ) dp_ram ( + .clk (clk), + .reset (reset), + .read (read), + .write (push), + .wren (1'b1), + .waddr (wr_ptr_r), + .wdata (data_in), + .raddr (rd_ptr_w), + .rdata (data_out_w) + ); + if (OUT_REG != 0) begin : g_out_reg + reg [DATAW-1:0] data_out_r, data_out_n; - wire [DATAW-1:0] dout; - reg [DATAW-1:0] dout_r; - reg [ADDRW-1:0] wr_ptr_r; - reg [ADDRW-1:0] rd_ptr_r; - reg [ADDRW-1:0] rd_ptr_n_r, rd_ptr_n_n; - - always @(*) begin - rd_ptr_n_n = rd_ptr_n_r; - if (pop) begin - if (DEPTH > 2) begin - rd_ptr_n_n = rd_ptr_r + ADDRW'(2); - end else begin // (DEPTH == 2); - rd_ptr_n_n = ~rd_ptr_n_r; + if (LUTRAM) begin : g_lutram + assign data_out_n = data_out_w; + end else begin : g_no_lutram + reg [DATAW-1:0] data_out_p; + reg rdw_hazard_r; + wire rdw_hazard = push && (wr_ptr_r == rd_ptr_w); + always @(posedge clk) begin + if (rdw_hazard) begin + data_out_p <= data_in; end + rdw_hazard_r <= rdw_hazard; end + assign data_out_n = rdw_hazard_r ? data_out_p : data_out_w; end always @(posedge clk) begin - if (reset) begin - wr_ptr_r <= '0; - rd_ptr_r <= '0; - rd_ptr_n_r <= 1; - end else begin - wr_ptr_r <= wr_ptr_r + ADDRW'(push); - if (pop) begin - rd_ptr_r <= rd_ptr_n_r; - end - rd_ptr_n_r <= rd_ptr_n_n; - end - end - - VX_dp_ram #( - .DATAW (DATAW), - .SIZE (DEPTH), - .LUTRAM (LUTRAM) - ) dp_ram ( - .clk (clk), - .reset (reset), - .read (1'b1), - .write (push), - .wren (1'b1), - .waddr (wr_ptr_r), - .wdata (data_in), - .raddr (rd_ptr_n_r), - .rdata (dout) - ); - - wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); - - always @(posedge clk) begin - if (push && (empty || (going_empty && pop))) begin - dout_r <= data_in; + if (bypass) begin + data_out_r <= data_in; end else if (pop) begin - dout_r <= dout; + data_out_r <= data_out_n; end end - assign data_out = dout_r; + assign data_out = data_out_r; end else begin : g_no_out_reg - - reg [ADDRW-1:0] rd_ptr_r, rd_ptr_n; - reg [ADDRW-1:0] wr_ptr_r; - - always @(*) begin - rd_ptr_n = rd_ptr_r + ADDRW'(pop); - end - - always @(posedge clk) begin - if (reset) begin - wr_ptr_r <= '0; - rd_ptr_r <= '0; - end else begin - wr_ptr_r <= wr_ptr_r + ADDRW'(push); - rd_ptr_r <= rd_ptr_n; + if (LUTRAM) begin : g_lutram + assign data_out = data_out_w; + end else begin : g_no_lutram + reg [DATAW-1:0] data_in_r; + reg bypass_r; + always @(posedge clk) begin + if (bypass) begin + data_in_r <= data_in; + end + bypass_r <= bypass; end + assign data_out = bypass_r ? data_in_r : data_out_w; end - - VX_dp_ram #( - .DATAW (DATAW), - .SIZE (DEPTH), - .LUTRAM (LUTRAM) - ) dp_ram ( - .clk (clk), - .reset (reset), - .read (1'b1), - .write (push), - .wren (1'b1), - .waddr (wr_ptr_r), - .wdata (data_in), - .raddr (rd_ptr_r), - .rdata (data_out) - ); - end end From 03a1e2582894ef0a291b7f80deec33a4ee48027e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 15 Oct 2024 00:28:09 -0700 Subject: [PATCH 370/488] adding cache replacement policy --- hw/rtl/VX_cluster.sv | 1 + hw/rtl/VX_config.vh | 24 +++- hw/rtl/VX_platform.vh | 2 +- hw/rtl/VX_socket.sv | 2 + hw/rtl/Vortex.sv | 1 + hw/rtl/cache/VX_cache.sv | 10 +- hw/rtl/cache/VX_cache_bank.sv | 49 ++++++-- hw/rtl/cache/VX_cache_cluster.sv | 10 +- hw/rtl/cache/VX_cache_define.vh | 6 + hw/rtl/cache/VX_cache_repl.sv | 200 +++++++++++++++++++++++++++++++ hw/rtl/cache/VX_cache_tags.sv | 16 +-- hw/rtl/cache/VX_cache_wrap.sv | 10 +- 12 files changed, 292 insertions(+), 39 deletions(-) create mode 100644 hw/rtl/cache/VX_cache_repl.sv diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index 366d1bbac..9aa5fe706 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -99,6 +99,7 @@ module VX_cluster import VX_gpu_pkg::*; #( .WRITE_ENABLE (1), .WRITEBACK (`L2_WRITEBACK), .DIRTY_BYTES (`L2_DIRTYBYTES), + .REPL_POLICY (`L2_REPL_POLICY), .UUID_WIDTH (`UUID_WIDTH), .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .CORE_OUT_BUF (3), diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index da05fc9e9..48f8ca3dc 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -151,6 +151,10 @@ `define L3_LINE_SIZE `MEM_BLOCK_SIZE `endif +`ifndef MEMORY_BANKS +`define MEMORY_BANKS 2 +`endif + `ifdef XLEN_64 `ifndef STACK_BASE_ADDR @@ -483,6 +487,11 @@ `define ICACHE_NUM_WAYS 4 `endif +// Replacement Policy +`ifndef ICACHE_REPL_POLICY +`define ICACHE_REPL_POLICY 1 +`endif + // Dcache Configurable Knobs ////////////////////////////////////////////////// // Cache Enable @@ -547,6 +556,11 @@ `define DCACHE_DIRTYBYTES `DCACHE_WRITEBACK `endif +// Replacement Policy +`ifndef DCACHE_REPL_POLICY +`define DCACHE_REPL_POLICY 1 +`endif + // LMEM Configurable Knobs //////////////////////////////////////////////////// `ifndef LMEM_DISABLE @@ -612,6 +626,11 @@ `define L2_DIRTYBYTES `L2_WRITEBACK `endif +// Replacement Policy +`ifndef L2_REPL_POLICY +`define L2_REPL_POLICY 1 +`endif + // L3cache Configurable Knobs ///////////////////////////////////////////////// // Cache Size @@ -659,8 +678,9 @@ `define L3_DIRTYBYTES `L3_WRITEBACK `endif -`ifndef MEMORY_BANKS -`define MEMORY_BANKS 2 +// Replacement Policy +`ifndef L3_REPL_POLICY +`define L3_REPL_POLICY 1 `endif // Number of Memory Ports from LLC diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 4f78fee24..8ea849ed3 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -222,7 +222,7 @@ endgenerate `define CLAMP(x, lo, hi) (((x) > (hi)) ? (hi) : (((x) < (lo)) ? (lo) : (x))) -`define UP(x) (((x) != 0) ? (x) : 1) +`define UP(x) (((x) > 0) ? (x) : 1) `define CDIV(n,d) ((n + d - 1) / (d)) diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 4ce547c7e..d9a8f5bf8 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -103,6 +103,7 @@ module VX_socket import VX_gpu_pkg::*; #( .FLAGS_WIDTH (0), .UUID_WIDTH (`UUID_WIDTH), .WRITE_ENABLE (0), + .REPL_POLICY (`ICACHE_REPL_POLICY), .NC_ENABLE (0), .CORE_OUT_BUF (3), .MEM_OUT_BUF (2) @@ -151,6 +152,7 @@ module VX_socket import VX_gpu_pkg::*; #( .WRITE_ENABLE (1), .WRITEBACK (`DCACHE_WRITEBACK), .DIRTY_BYTES (`DCACHE_DIRTYBYTES), + .REPL_POLICY (`DCACHE_REPL_POLICY), .NC_ENABLE (1), .CORE_OUT_BUF (3), .MEM_OUT_BUF (2) diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 40f95a81a..0fa3ce31f 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -85,6 +85,7 @@ module Vortex import VX_gpu_pkg::*; ( .WRITE_ENABLE (1), .WRITEBACK (`L3_WRITEBACK), .DIRTY_BYTES (`L3_DIRTYBYTES), + .REPL_POLICY (`L3_REPL_POLICY), .UUID_WIDTH (`UUID_WIDTH), .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .CORE_OUT_BUF (3), diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index c31699c1e..b27b2df31 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -48,6 +48,9 @@ module VX_cache import VX_gpu_pkg::*; #( // Enable dirty bytes on writeback parameter DIRTY_BYTES = 0, + // Replacement policy + parameter REPL_POLICY = `CS_REPL_CYCLIC, + // Request debug identifier parameter UUID_WIDTH = 0, @@ -393,12 +396,13 @@ module VX_cache import VX_gpu_pkg::*; #( .NUM_WAYS (NUM_WAYS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), + .WRITE_ENABLE (WRITE_ENABLE), + .WRITEBACK (WRITEBACK), + .DIRTY_BYTES (DIRTY_BYTES), + .REPL_POLICY (REPL_POLICY), .CRSQ_SIZE (CRSQ_SIZE), .MSHR_SIZE (MSHR_SIZE), .MREQ_SIZE (MREQ_SIZE), - .WRITE_ENABLE (WRITE_ENABLE), - .DIRTY_BYTES (DIRTY_BYTES), - .WRITEBACK (WRITEBACK), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (TAG_WIDTH), .FLAGS_WIDTH (FLAGS_WIDTH), diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 7f1153ede..7c5ca1e40 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -47,6 +47,9 @@ module VX_cache_bank #( // Enable dirty bytes on writeback parameter DIRTY_BYTES = 0, + // Replacement policy + parameter REPL_POLICY = `CS_REPL_CYCLIC, + // Request debug identifier parameter UUID_WIDTH = 0, @@ -324,6 +327,14 @@ module VX_cache_bank #( wire do_write_st0 = valid_st0 && is_write_st0; wire do_fill_st0 = valid_st0 && is_fill_st0; + wire is_read_st1 = is_creq_st1 && ~rw_st1; + wire is_write_st1 = is_creq_st1 && rw_st1; + + wire do_read_st1 = valid_st1 && is_read_st1; + wire do_write_st1 = valid_st1 && is_write_st1; + wire do_fill_st1 = valid_st1 && is_fill_st1; + wire do_flush_st1 = valid_st1 && is_flush_st1 && WRITEBACK; + assign write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0]; assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0]; @@ -331,8 +342,32 @@ module VX_cache_bank #( wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st1; wire [NUM_WAYS-1:0] tag_matches_st1; + wire is_hit_st1 = (| tag_matches_st1); + wire do_lookup_st0 = do_read_st0 || do_write_st0; + reg [NUM_WAYS-1:0] victim_way_st0; + + VX_cache_repl #( + .CACHE_SIZE (CACHE_SIZE), + .LINE_SIZE (LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .NUM_WAYS (NUM_WAYS), + .REPL_POLICY (REPL_POLICY) + ) cache_repl ( + .clk (clk), + .reset (reset), + .stall (pipe_stall), + .hit_valid ((do_read_st1 || do_write_st1) && is_hit_st1), + .hit_line (line_idx_st1), + .hit_way (tag_matches_st1), + .repl_valid (do_fill_st0), + .repl_line (line_idx_st0), + .repl_way (victim_way_st0) + ); + + assign evict_way_st0 = is_fill_st0 ? victim_way_st0 : flush_way_st0; + VX_cache_tags #( .CACHE_SIZE (CACHE_SIZE), .LINE_SIZE (LINE_SIZE), @@ -350,12 +385,11 @@ module VX_cache_bank #( .fill (do_fill_st0 && ~pipe_stall), .lookup (do_lookup_st0 && ~pipe_stall), .line_addr (addr_st0), - .flush_way (flush_way_st0), + .evict_way (evict_way_st0), // outputs .tag_matches_r(tag_matches_st1), .line_tag_r (line_tag_st1), .evict_tag_r(evict_tag_st1), - .evict_way (evict_way_st0), .evict_way_r(evict_way_st1) ); @@ -374,23 +408,12 @@ module VX_cache_bank #( .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, rw_st1, flags_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_id_st1, mshr_pending_st1}) ); - // we have a tag hit - wire is_hit_st1 = (| tag_matches_st1); - if (UUID_WIDTH != 0) begin : g_req_uuid_st1 assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH]; end else begin : g_req_uuid_st1_0 assign req_uuid_st1 = '0; end - wire is_read_st1 = is_creq_st1 && ~rw_st1; - wire is_write_st1 = is_creq_st1 && rw_st1; - - wire do_read_st1 = valid_st1 && is_read_st1; - wire do_write_st1 = valid_st1 && is_write_st1; - wire do_fill_st1 = valid_st1 && is_fill_st1; - wire do_flush_st1 = valid_st1 && is_flush_st1 && WRITEBACK; - assign addr_st1 = {line_tag_st1, line_idx_st1}; // ensure mshr replay always get a hit diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 71a2ad00b..b4c2db979 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -52,6 +52,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( // Enable dirty bytes on writeback parameter DIRTY_BYTES = 0, + // Replacement policy + parameter REPL_POLICY = `CS_REPL_CYCLIC, + // Request debug identifier parameter UUID_WIDTH = 0, @@ -150,13 +153,14 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .NUM_WAYS (NUM_WAYS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), + .WRITE_ENABLE (WRITE_ENABLE), + .WRITEBACK (WRITEBACK), + .DIRTY_BYTES (DIRTY_BYTES), + .REPL_POLICY (REPL_POLICY), .CRSQ_SIZE (CRSQ_SIZE), .MSHR_SIZE (MSHR_SIZE), .MRSQ_SIZE (MRSQ_SIZE), .MREQ_SIZE (MREQ_SIZE), - .WRITE_ENABLE (WRITE_ENABLE), - .WRITEBACK (WRITEBACK), - .DIRTY_BYTES (DIRTY_BYTES), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (ARB_TAG_WIDTH), .FLAGS_WIDTH (FLAGS_WIDTH), diff --git a/hw/rtl/cache/VX_cache_define.vh b/hw/rtl/cache/VX_cache_define.vh index 342a40a1b..b75845eca 100644 --- a/hw/rtl/cache/VX_cache_define.vh +++ b/hw/rtl/cache/VX_cache_define.vh @@ -73,4 +73,10 @@ `PERF_COUNTER_ADD (dst, src, mem_stalls, `PERF_CTR_BITS, count, (count > 1)) \ `PERF_COUNTER_ADD (dst, src, crsp_stalls, `PERF_CTR_BITS, count, (count > 1)) +/////////////////////////////////////////////////////////////////////////////// + +`define CS_REPL_RANDOM 0 +`define CS_REPL_CYCLIC 1 +`define CS_REPL_PLRU 2 + `endif // VX_CACHE_DEFINE_VH diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv new file mode 100644 index 000000000..59c5deddb --- /dev/null +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -0,0 +1,200 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_cache_define.vh" + +// Fast PLRU encoder and decoder utility +// Adapted from BaseJump STL: http://bjump.org/data_out.html + +module plru_decoder #( + parameter NUM_WAYS = 1, + parameter WAY_IDX_BITS = $clog2(NUM_WAYS), + parameter WAY_IDX_WIDTH = `UP(WAY_IDX_BITS) +) ( + input wire [WAY_IDX_WIDTH-1:0] way_idx, + input wire [`UP(NUM_WAYS-1)-1:0] lru_in, + output wire [`UP(NUM_WAYS-1)-1:0] lru_out +); + if (NUM_WAYS != 1) begin : g_plru_decoder + wire [`UP(NUM_WAYS-1)-1:0] data; + `IGNORE_UNOPTFLAT_BEGIN + wire [`UP(NUM_WAYS-1)-1:0] mask; + `IGNORE_UNOPTFLAT_END + for (genvar i = 0; i < NUM_WAYS-1; ++i) begin : g_i + if (i == 0) begin : g_i_0 + assign mask[i] = 1'b1; + end else if (i % 2 == 1) begin : g_i_odd + assign mask[i] = mask[(i-1)/2] & ~way_idx[WAY_IDX_BITS-$clog2(i+2)+1]; + end else begin : g_i_even + assign mask[i] = mask[(i-2)/2] & way_idx[WAY_IDX_BITS-$clog2(i+2)+1]; + end + assign data[i] = ~way_idx[WAY_IDX_BITS-$clog2(i+2)]; + end + assign lru_out = (data & mask) | (lru_in & ~mask); + end else begin : g_plru_decoder_1 + `UNUSED_VAR (way_idx) + `UNUSED_VAR (lru_in) + assign lru_out = '0; + end + +endmodule + +module plru_encoder #( + parameter NUM_WAYS = 1, + parameter WAY_IDX_BITS = $clog2(NUM_WAYS), + parameter WAY_IDX_WIDTH = `UP(WAY_IDX_BITS) +) ( + input wire [`UP(NUM_WAYS-1)-1:0] lru_in, + output wire [WAY_IDX_WIDTH-1:0] way_idx +); + if (NUM_WAYS != 1) begin : g_plru_encoder + wire [WAY_IDX_WIDTH-1:0] tmp; + for (genvar i = 0; i < WAY_IDX_WIDTH; ++i) begin : g_i + if (i == 0) begin : g_i_0 + assign tmp[WAY_IDX_WIDTH-1] = lru_in[0]; + end else begin : g_i_n + assign tmp[WAY_IDX_WIDTH-1-i] = lru_in[((2**i)-1)+:(1 << i)][tmp[WAY_IDX_WIDTH-1-:i]]; + end + end + assign way_idx = tmp; + end else begin : g_plru_encoder_1 + `UNUSED_VAR (lru_in) + assign way_idx = '0; + end + +endmodule + +module VX_cache_repl #( + parameter CACHE_SIZE = 1024, + // Size of line inside a bank in bytes + parameter LINE_SIZE = 64, + // Number of banks + parameter NUM_BANKS = 1, + // Number of associative ways + parameter NUM_WAYS = 1, + // replacement policy + parameter REPL_POLICY = `CS_REPL_CYCLIC +) ( + input wire clk, + input wire reset, + input wire stall, + input wire hit_valid, + input wire [`CS_LINE_SEL_BITS-1:0] hit_line, + input wire [NUM_WAYS-1:0] hit_way, + input wire repl_valid, + input wire [`CS_LINE_SEL_BITS-1:0] repl_line, + output wire [NUM_WAYS-1:0] repl_way +); + `UNUSED_VAR (stall) + + localparam WAY_IDX_BITS = $clog2(NUM_WAYS); + localparam WAY_IDX_WIDTH = `UP(WAY_IDX_BITS); + + if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru + // Pseudo Least Recently Used replacement policy + localparam LRU_WIDTH = NUM_WAYS-1; + `UNUSED_VAR (repl_valid) + + reg [`CS_LINES_PER_BANK-1:0][`UP(LRU_WIDTH)-1:0] plru_tree; + + wire [WAY_IDX_WIDTH-1:0] repl_way_idx; + wire [WAY_IDX_WIDTH-1:0] hit_way_idx; + wire [`UP(LRU_WIDTH)-1:0] plru_update; + + always @(posedge clk) begin + if (reset) begin + plru_tree <= '0; + end else begin + if (hit_valid) begin + plru_tree[hit_line] <= plru_update; + end + end + end + + VX_onehot_encoder #( + .N (NUM_WAYS) + ) hit_way_enc ( + .data_in (hit_way), + .data_out (hit_way_idx), + `UNUSED_PIN (valid_out) + ); + + plru_decoder #( + .NUM_WAYS (NUM_WAYS) + ) plru_dec ( + .way_idx (hit_way_idx), + .lru_in (plru_tree[hit_line]), + .lru_out (plru_update) + ); + + plru_encoder #( + .NUM_WAYS (NUM_WAYS) + ) plru_enc ( + .lru_in (plru_tree[repl_line]), + .way_idx (repl_way_idx) + ); + + VX_decoder #( + .N (WAY_IDX_BITS) + ) repl_way_dec ( + .sel_in (repl_way_idx), + .data_in (1'b1), + .data_out (repl_way) + ); + + end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic + // Cyclic replacement policy + localparam CTR_WIDTH = $clog2(NUM_WAYS); + `UNUSED_VAR (hit_valid) + `UNUSED_VAR (hit_line) + `UNUSED_VAR (hit_way) + reg [`CS_LINES_PER_BANK-1:0][`UP(CTR_WIDTH)-1:0] counters; + always @(posedge clk) begin + if (reset) begin + counters <= '0; + end else if (repl_valid) begin + counters[repl_line] <= counters[repl_line] + 1; + end + end + VX_decoder #( + .N (WAY_IDX_BITS) + ) ctr_decoder ( + .sel_in (counters[repl_line]), + .data_in (1'b1), + .data_out (repl_way) + ); + end else begin : g_random + // Random replacement policy + `UNUSED_VAR (hit_valid) + `UNUSED_VAR (hit_line) + `UNUSED_VAR (hit_way) + `UNUSED_VAR (repl_valid) + `UNUSED_VAR (repl_line) + if (NUM_WAYS != 1) begin : g_repl_way + reg [NUM_WAYS-1:0] victim_way; + always @(posedge clk) begin + if (reset) begin + victim_way <= 1; + end else if (~stall) begin + victim_way <= {victim_way[NUM_WAYS-2:0], victim_way[NUM_WAYS-1]}; + end + end + assign repl_way = victim_way; + end else begin : g_repl_way_1 + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + assign repl_way = 1'b1; + end + end + +endmodule diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index b7a1957ef..8793420e1 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -37,12 +37,11 @@ module VX_cache_tags #( input wire fill, input wire lookup, input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, - input wire [NUM_WAYS-1:0] flush_way, + input wire [NUM_WAYS-1:0] evict_way, // outputs output wire [NUM_WAYS-1:0] tag_matches_r, output wire [`CS_TAG_SEL_BITS-1:0] line_tag_r, - output wire [NUM_WAYS-1:0] evict_way, output wire [NUM_WAYS-1:0] evict_way_r, output wire [`CS_TAG_SEL_BITS-1:0] evict_tag_r ); @@ -56,20 +55,9 @@ module VX_cache_tags #( wire [NUM_WAYS-1:0] read_valid; if (NUM_WAYS > 1) begin : g_evict_way - reg [NUM_WAYS-1:0] victim_way; - // cyclic assignment of replacement way - always @(posedge clk) begin - if (reset) begin - victim_way <= 1; - end else if (~stall) begin - victim_way <= {victim_way[NUM_WAYS-2:0], victim_way[NUM_WAYS-1]}; - end - end - assign evict_way = fill ? victim_way : flush_way; `BUFFER_EX(evict_way_r, evict_way, ~stall, 1); end else begin : g_evict_way_0 - `UNUSED_VAR (flush_way) - assign evict_way = 1'b1; + `UNUSED_VAR (evict_way) assign evict_way_r = 1'b1; end diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index d958736c4..ca8c53eda 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -51,6 +51,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( // Enable dirty bytes on writeback parameter DIRTY_BYTES = 0, + // Replacement policy + parameter REPL_POLICY = `CS_REPL_CYCLIC, + // Request debug identifier parameter UUID_WIDTH = 0, @@ -169,13 +172,14 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .NUM_WAYS (NUM_WAYS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), + .WRITE_ENABLE (WRITE_ENABLE), + .WRITEBACK (WRITEBACK), + .DIRTY_BYTES (DIRTY_BYTES), + .REPL_POLICY (REPL_POLICY), .CRSQ_SIZE (CRSQ_SIZE), .MSHR_SIZE (MSHR_SIZE), .MRSQ_SIZE (MRSQ_SIZE), .MREQ_SIZE (MREQ_SIZE), - .WRITE_ENABLE (WRITE_ENABLE), - .WRITEBACK (WRITEBACK), - .DIRTY_BYTES (DIRTY_BYTES), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (TAG_WIDTH), .FLAGS_WIDTH (FLAGS_WIDTH), From db98965f567e4a9ca4254d9f30598906f754ceb5 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 15 Oct 2024 02:27:07 -0700 Subject: [PATCH 371/488] minor update --- hw/rtl/libs/VX_dp_ram.sv | 470 ++++++++++++--------------------------- hw/rtl/libs/VX_sp_ram.sv | 2 - 2 files changed, 147 insertions(+), 325 deletions(-) diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index c27827552..595b3a42a 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -19,7 +19,6 @@ module VX_dp_ram #( parameter SIZE = 1, parameter WRENW = 1, parameter OUT_REG = 0, - parameter RADDR_REG = 0, parameter LUTRAM = 0, parameter NO_RWCHECK = 0, parameter RW_ASSERT = 0, @@ -44,328 +43,174 @@ module VX_dp_ram #( localparam WSELW = DATAW / WRENW; `STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter")) -`define RAM_INITIALIZATION \ - if (INIT_ENABLE != 0) begin : g_init \ - if (INIT_FILE != "") begin : g_file \ - initial $readmemh(INIT_FILE, ram); \ - end else begin : g_value \ - initial begin \ - for (integer i = 0; i < SIZE; ++i) \ - ram[i] = INIT_VALUE; \ - end \ - end \ +`define RAM_INITIALIZATION \ + if (INIT_ENABLE != 0) begin : g_init \ + if (INIT_FILE != "") begin : g_file \ + initial $readmemh(INIT_FILE, ram); \ + end else begin : g_value \ + initial begin \ + for (integer i = 0; i < SIZE; ++i) begin : g_i \ + ram[i] = INIT_VALUE; \ + end \ + end \ + end \ + end + +`define RAM_WREN_BLOCK_ALTERA(__we__) \ + reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + always @(posedge clk) begin \ + if (__we__) begin \ + for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; \ + end \ + end \ + end \ + end + +`define RAM_WREN_BLOCK_XILINX(__we__) \ + reg [DATAW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + always @(posedge clk) begin \ + if (__we__) begin \ + for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end \ + end \ + end + +`define RAM_WRITE_BLOCK(__we__) \ + reg [DATAW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + always @(posedge clk) begin \ + if (__we__) begin \ + ram[waddr] <= wdata; \ + end \ + end + +`define RAM_READ_BLOCK_OUT_REG(__re__) \ + always @(posedge clk) begin \ + if (__re__) begin \ + if (RESET_OUT && reset) begin \ + rdata_r <= INIT_VALUE; \ + end else begin \ + rdata_r <= ram[raddr]; \ + end \ + end \ end `UNUSED_PARAM (RW_ASSERT) `UNUSED_VAR (read) `UNUSED_VAR (wren) - if (OUT_REG && !READ_ENABLE) begin : g_out_reg - `UNUSED_PARAM (NO_RWCHECK) + if (OUT_REG) begin : g_out_reg reg [DATAW-1:0] rdata_r; - wire cs = read || write; - if (WRENW != 1) begin : g_writeen - `ifdef QUARTUS - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (cs) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; - end - end - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else begin - rdata_r <= ram[raddr]; - end - end + if (READ_ENABLE) begin : g_readen + if (WRENW != 1) begin : g_writeen + `ifdef QUARTUS + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM `RAM_WREN_BLOCK_ALTERA(write) + `RAM_READ_BLOCK_OUT_REG(read || write) + end else begin : g_no_lutram + `RAM_WREN_BLOCK_ALTERA(write) + `RAM_READ_BLOCK_OUT_REG(read || write) end - end else begin : g_no_lutram - reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (cs) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; - end - end - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else begin - rdata_r <= ram[raddr]; - end - end + `else + // Not Quartus + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM `RAM_WREN_BLOCK_XILINX(write) + `RAM_READ_BLOCK_OUT_REG(read || write) + end else begin : g_no_lutram + `RAM_WREN_BLOCK_XILINX(write) + `RAM_READ_BLOCK_OUT_REG(read || write) + end + `endif + end else begin : g_no_writeen + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM `RAM_WRITE_BLOCK(write) + `RAM_READ_BLOCK_OUT_REG(read || write) + end else begin : g_no_lutram + `RAM_WRITE_BLOCK(write) + `RAM_READ_BLOCK_OUT_REG(read || write) end end - `else - // Not Quartus - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (cs) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; - end - end - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else begin - rdata_r <= ram[raddr]; - end - end + end else begin : g_no_readen + if (WRENW != 1) begin : g_writeen + `ifdef QUARTUS + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM `RAM_WREN_BLOCK_ALTERA(write) + `RAM_READ_BLOCK_OUT_REG(read) + end else begin : g_no_lutram + `RAM_WREN_BLOCK_ALTERA(write) + `RAM_READ_BLOCK_OUT_REG(read) end - end else begin : g_no_lutram - reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (cs) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; - end - end - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else begin - rdata_r <= ram[raddr]; - end - end + `else + // Not Quartus + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM `RAM_WREN_BLOCK_XILINX(write) + `RAM_READ_BLOCK_OUT_REG(read) + end else begin : g_no_lutram + `RAM_WREN_BLOCK_XILINX(write) + `RAM_READ_BLOCK_OUT_REG(read) end - end - `endif - end else begin : g_no_writeen - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (cs) begin - if (write) - ram[waddr] <= wdata; - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else begin - rdata_r <= ram[raddr]; - end - end - end - - end else begin : g_no_lutram - reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (cs) begin - if (write) - ram[waddr] <= wdata; - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else begin - rdata_r <= ram[raddr]; - end - end + `endif + end else begin : g_no_writeen + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM `RAM_WRITE_BLOCK(write) + `RAM_READ_BLOCK_OUT_REG(read) + end else begin : g_no_lutram + `RAM_WRITE_BLOCK(write) + `RAM_READ_BLOCK_OUT_REG(read) end end end assign rdata = rdata_r; end else begin : g_no_out_reg - // OUT_REG==0 || READ_ENABLE=1 - wire [DATAW-1:0] rdata_w; - reg [ADDRW-1:0] raddr_reg; `ifdef SYNTHESIS if (WRENW > 1) begin : g_writeen `ifdef QUARTUS if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; - end - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `USE_FAST_BRAM `RAM_WREN_BLOCK_ALTERA(write) + assign rdata = ram[raddr]; end else begin : g_no_lutram if (NO_RWCHECK != 0) begin : g_no_rwcheck - `NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; - end - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `NO_RW_RAM_CHECK `RAM_WREN_BLOCK_ALTERA(write) + assign rdata = ram[raddr]; end else begin : g_rwcheck - reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; - end - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `RAM_WREN_BLOCK_ALTERA(write) + assign rdata = ram[raddr]; end end `else // default synthesis if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; - end - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `USE_FAST_BRAM `RAM_WREN_BLOCK_XILINX(write) + assign rdata = ram[raddr]; end else begin : g_no_lutram if (NO_RWCHECK != 0) begin : g_no_rwcheck - `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; - end - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `NO_RW_RAM_CHECK `RAM_WREN_BLOCK_XILINX(write) + assign rdata = ram[raddr]; end else begin : g_rwcheck - reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; - end - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `RAM_WREN_BLOCK_XILINX(write) + assign rdata = ram[raddr]; end end `endif end else begin : g_no_writeen // (WRENW == 1) if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - ram[waddr] <= wdata; - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `USE_FAST_BRAM `RAM_WRITE_BLOCK(write) + assign rdata = ram[raddr]; end else begin : g_no_lutram if (NO_RWCHECK != 0) begin : g_no_rwcheck - `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - ram[waddr] <= wdata; - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `NO_RW_RAM_CHECK `RAM_WRITE_BLOCK(write) + assign rdata = ram[raddr]; end else begin : g_rwcheck - reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - ram[waddr] <= wdata; - end - if (read) begin - raddr_reg <= raddr; - end - end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - assign rdata_w = ram[raddr]; - `UNUSED_VAR (raddr_reg) - end + `RAM_WRITE_BLOCK(write) + assign rdata = ram[raddr]; end end end @@ -389,54 +234,33 @@ module VX_dp_ram #( ram[waddr] <= ram_n; end end - if (read) begin - raddr_reg <= raddr; - end end - if (RADDR_REG != 0) begin : g_rdata_async - assign rdata_w = ram[raddr_reg]; - end else begin : g_rdata_sync - `UNUSED_VAR (raddr_reg) - if (!LUTRAM && NO_RWCHECK) begin : g_rdata_no_bypass - reg [DATAW-1:0] prev_data; - reg [ADDRW-1:0] prev_waddr; - reg prev_write; + if (!LUTRAM && NO_RWCHECK) begin : g_rdata_no_bypass + reg [DATAW-1:0] prev_data; + reg [ADDRW-1:0] prev_waddr; + reg prev_write; - always @(posedge clk) begin - if (reset) begin - prev_write <= 0; - prev_data <= '0; - prev_waddr <= '0; - end else begin - prev_write <= write; - prev_data <= ram[waddr]; - prev_waddr <= waddr; - end + always @(posedge clk) begin + if (reset) begin + prev_write <= 0; + prev_data <= '0; + prev_waddr <= '0; + end else begin + prev_write <= write; + prev_data <= ram[waddr]; + prev_waddr <= waddr; end - - assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; - if (RW_ASSERT) begin : g_rw_assert - `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)) - end - end else begin : g_rdata_with_bypass - assign rdata_w = ram[raddr]; end + + assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; + if (RW_ASSERT) begin : g_rw_assert + `RUNTIME_ASSERT(~read || (rdata == ram[raddr]), ("%t: read after write hazard", $time)) + end + end else begin : g_rdata_with_bypass + assign rdata = ram[raddr]; end `endif - if (OUT_REG != 0) begin : g_rdata_req - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (READ_ENABLE && reset) begin - rdata_r <= INIT_VALUE; - end else if (!READ_ENABLE || read) begin - rdata_r <= rdata_w; - end - end - assign rdata = rdata_r; - end else begin : g_rdata_comb - assign rdata = rdata_w; - end end endmodule diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index 7974cb679..efce4b5f2 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -26,7 +26,6 @@ module VX_sp_ram #( parameter RESET_OUT = 0, parameter READ_ENABLE = 0, parameter INIT_ENABLE = 0, - parameter RADDR_REG = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, parameter ADDRW = `LOG2UP(SIZE) @@ -45,7 +44,6 @@ module VX_sp_ram #( .SIZE (SIZE), .WRENW (WRENW), .OUT_REG (OUT_REG), - .RADDR_REG (RADDR_REG), .LUTRAM (LUTRAM), .NO_RWCHECK (NO_RWCHECK), .RW_ASSERT (RW_ASSERT), From 68b78fc42fed53e99415945777a6c1e5f6968124 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 15 Oct 2024 02:32:17 -0700 Subject: [PATCH 372/488] minor update --- hw/rtl/libs/VX_dp_ram.sv | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 595b3a42a..4220eca18 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -113,28 +113,28 @@ module VX_dp_ram #( `ifdef QUARTUS if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM `RAM_WREN_BLOCK_ALTERA(write) - `RAM_READ_BLOCK_OUT_REG(read || write) + `RAM_READ_BLOCK_OUT_REG(read) end else begin : g_no_lutram `RAM_WREN_BLOCK_ALTERA(write) - `RAM_READ_BLOCK_OUT_REG(read || write) + `RAM_READ_BLOCK_OUT_REG(read) end `else // Not Quartus if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM `RAM_WREN_BLOCK_XILINX(write) - `RAM_READ_BLOCK_OUT_REG(read || write) + `RAM_READ_BLOCK_OUT_REG(read) end else begin : g_no_lutram `RAM_WREN_BLOCK_XILINX(write) - `RAM_READ_BLOCK_OUT_REG(read || write) + `RAM_READ_BLOCK_OUT_REG(read) end `endif end else begin : g_no_writeen if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM `RAM_WRITE_BLOCK(write) - `RAM_READ_BLOCK_OUT_REG(read || write) + `RAM_READ_BLOCK_OUT_REG(read) end else begin : g_no_lutram `RAM_WRITE_BLOCK(write) - `RAM_READ_BLOCK_OUT_REG(read || write) + `RAM_READ_BLOCK_OUT_REG(read) end end end else begin : g_no_readen @@ -142,28 +142,28 @@ module VX_dp_ram #( `ifdef QUARTUS if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM `RAM_WREN_BLOCK_ALTERA(write) - `RAM_READ_BLOCK_OUT_REG(read) + `RAM_READ_BLOCK_OUT_REG(read || write) end else begin : g_no_lutram `RAM_WREN_BLOCK_ALTERA(write) - `RAM_READ_BLOCK_OUT_REG(read) + `RAM_READ_BLOCK_OUT_REG(read || write) end `else // Not Quartus if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM `RAM_WREN_BLOCK_XILINX(write) - `RAM_READ_BLOCK_OUT_REG(read) + `RAM_READ_BLOCK_OUT_REG(read || write) end else begin : g_no_lutram `RAM_WREN_BLOCK_XILINX(write) - `RAM_READ_BLOCK_OUT_REG(read) + `RAM_READ_BLOCK_OUT_REG(read || write) end `endif end else begin : g_no_writeen if (LUTRAM != 0) begin : g_lutram `USE_FAST_BRAM `RAM_WRITE_BLOCK(write) - `RAM_READ_BLOCK_OUT_REG(read) + `RAM_READ_BLOCK_OUT_REG(read || write) end else begin : g_no_lutram `RAM_WRITE_BLOCK(write) - `RAM_READ_BLOCK_OUT_REG(read) + `RAM_READ_BLOCK_OUT_REG(read || write) end end end From 1d5e4f63dd7e9fada25ecf1a9b6d7e7c86b364a8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 15 Oct 2024 03:24:02 -0700 Subject: [PATCH 373/488] minor update --- hw/rtl/cache/VX_cache_bank.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 7c5ca1e40..9b55734e7 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -358,10 +358,10 @@ module VX_cache_bank #( .clk (clk), .reset (reset), .stall (pipe_stall), - .hit_valid ((do_read_st1 || do_write_st1) && is_hit_st1), + .hit_valid ((do_read_st1 || do_write_st1) && is_hit_st1 && ~pipe_stall), .hit_line (line_idx_st1), .hit_way (tag_matches_st1), - .repl_valid (do_fill_st0), + .repl_valid (do_fill_st0 && ~pipe_stall), .repl_line (line_idx_st0), .repl_way (victim_way_st0) ); From e62b638d886d5df38be7871a5af59e1f800362a1 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 15 Oct 2024 10:36:05 -0700 Subject: [PATCH 374/488] minor update --- hw/rtl/cache/VX_cache_repl.sv | 34 ++++++++++++++++----------------- hw/rtl/libs/VX_mem_scheduler.sv | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv index 59c5deddb..68f2e89d3 100644 --- a/hw/rtl/cache/VX_cache_repl.sv +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -25,7 +25,7 @@ module plru_decoder #( input wire [`UP(NUM_WAYS-1)-1:0] lru_in, output wire [`UP(NUM_WAYS-1)-1:0] lru_out ); - if (NUM_WAYS != 1) begin : g_plru_decoder + if (NUM_WAYS > 1) begin : g_dec wire [`UP(NUM_WAYS-1)-1:0] data; `IGNORE_UNOPTFLAT_BEGIN wire [`UP(NUM_WAYS-1)-1:0] mask; @@ -41,7 +41,7 @@ module plru_decoder #( assign data[i] = ~way_idx[WAY_IDX_BITS-$clog2(i+2)]; end assign lru_out = (data & mask) | (lru_in & ~mask); - end else begin : g_plru_decoder_1 + end else begin : g_no_dec `UNUSED_VAR (way_idx) `UNUSED_VAR (lru_in) assign lru_out = '0; @@ -57,17 +57,19 @@ module plru_encoder #( input wire [`UP(NUM_WAYS-1)-1:0] lru_in, output wire [WAY_IDX_WIDTH-1:0] way_idx ); - if (NUM_WAYS != 1) begin : g_plru_encoder - wire [WAY_IDX_WIDTH-1:0] tmp; - for (genvar i = 0; i < WAY_IDX_WIDTH; ++i) begin : g_i - if (i == 0) begin : g_i_0 - assign tmp[WAY_IDX_WIDTH-1] = lru_in[0]; - end else begin : g_i_n - assign tmp[WAY_IDX_WIDTH-1-i] = lru_in[((2**i)-1)+:(1 << i)][tmp[WAY_IDX_WIDTH-1-:i]]; - end + if (NUM_WAYS > 1) begin : g_enc + wire [WAY_IDX_BITS-1:0] tmp; + for (genvar i = 0; i < WAY_IDX_BITS; ++i) begin : g_i + VX_mux #( + .N (2**i) + ) mux ( + .data_in (lru_in[((2**i)-1)+:(2**i)]), + .sel_in (tmp[WAY_IDX_BITS-1-:i]), + .data_out (tmp[WAY_IDX_BITS-1-i]) + ); end assign way_idx = tmp; - end else begin : g_plru_encoder_1 + end else begin : g_no_enc `UNUSED_VAR (lru_in) assign way_idx = '0; end @@ -105,7 +107,7 @@ module VX_cache_repl #( localparam LRU_WIDTH = NUM_WAYS-1; `UNUSED_VAR (repl_valid) - reg [`CS_LINES_PER_BANK-1:0][`UP(LRU_WIDTH)-1:0] plru_tree; + reg [`UP(LRU_WIDTH)-1:0] plru_tree [0:`CS_LINES_PER_BANK-1]; wire [WAY_IDX_WIDTH-1:0] repl_way_idx; wire [WAY_IDX_WIDTH-1:0] hit_way_idx; @@ -158,11 +160,9 @@ module VX_cache_repl #( `UNUSED_VAR (hit_valid) `UNUSED_VAR (hit_line) `UNUSED_VAR (hit_way) - reg [`CS_LINES_PER_BANK-1:0][`UP(CTR_WIDTH)-1:0] counters; + reg [`UP(CTR_WIDTH)-1:0] counters [0:`CS_LINES_PER_BANK-1]; always @(posedge clk) begin - if (reset) begin - counters <= '0; - end else if (repl_valid) begin + if (repl_valid) begin counters[repl_line] <= counters[repl_line] + 1; end end @@ -180,7 +180,7 @@ module VX_cache_repl #( `UNUSED_VAR (hit_way) `UNUSED_VAR (repl_valid) `UNUSED_VAR (repl_line) - if (NUM_WAYS != 1) begin : g_repl_way + if (NUM_WAYS > 1) begin : g_repl_way reg [NUM_WAYS-1:0] victim_way; always @(posedge clk) begin if (reset) begin diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index abd68da24..2ff21655a 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -473,7 +473,7 @@ module VX_mem_scheduler #( for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store for (genvar j = 0; j < CORE_BATCHES; ++j) begin : g_j - reg [WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; + reg [WORD_WIDTH-1:0] rsp_store [0:CORE_QUEUE_SIZE-1]; wire rsp_wren = mem_rsp_fire_s && (BATCH_SEL_WIDTH'(j) == rsp_batch_idx) && ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]); From 645befdce6ebe6b06438a8a0ea632da1f8860cdf Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 15 Oct 2024 11:23:29 -0700 Subject: [PATCH 375/488] minor update --- hw/rtl/cache/VX_cache_repl.sv | 98 +++++++++++++++++++++-------------- 1 file changed, 60 insertions(+), 38 deletions(-) diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv index 68f2e89d3..aac0483fd 100644 --- a/hw/rtl/cache/VX_cache_repl.sv +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -21,9 +21,9 @@ module plru_decoder #( parameter WAY_IDX_BITS = $clog2(NUM_WAYS), parameter WAY_IDX_WIDTH = `UP(WAY_IDX_BITS) ) ( - input wire [WAY_IDX_WIDTH-1:0] way_idx, - input wire [`UP(NUM_WAYS-1)-1:0] lru_in, - output wire [`UP(NUM_WAYS-1)-1:0] lru_out + input wire [WAY_IDX_WIDTH-1:0] way_idx, + output wire [`UP(NUM_WAYS-1)-1:0] lru_data, + output wire [`UP(NUM_WAYS-1)-1:0] lru_mask ); if (NUM_WAYS > 1) begin : g_dec wire [`UP(NUM_WAYS-1)-1:0] data; @@ -40,11 +40,12 @@ module plru_decoder #( end assign data[i] = ~way_idx[WAY_IDX_BITS-$clog2(i+2)]; end - assign lru_out = (data & mask) | (lru_in & ~mask); + assign lru_data = data; + assign lru_mask = mask; end else begin : g_no_dec `UNUSED_VAR (way_idx) - `UNUSED_VAR (lru_in) - assign lru_out = '0; + assign lru_data = '0; + assign lru_mask = '0; end endmodule @@ -60,13 +61,17 @@ module plru_encoder #( if (NUM_WAYS > 1) begin : g_enc wire [WAY_IDX_BITS-1:0] tmp; for (genvar i = 0; i < WAY_IDX_BITS; ++i) begin : g_i - VX_mux #( - .N (2**i) - ) mux ( - .data_in (lru_in[((2**i)-1)+:(2**i)]), - .sel_in (tmp[WAY_IDX_BITS-1-:i]), - .data_out (tmp[WAY_IDX_BITS-1-i]) - ); + if (i == 0) begin : g_i_0 + assign tmp[WAY_IDX_WIDTH-1] = lru_in[0]; + end else begin : g_i_n + VX_mux #( + .N (2**i) + ) mux ( + .data_in (lru_in[((2**i)-1)+:(2**i)]), + .sel_in (tmp[WAY_IDX_BITS-1-:i]), + .data_out (tmp[WAY_IDX_BITS-1-i]) + ); + end end assign way_idx = tmp; end else begin : g_no_enc @@ -104,24 +109,29 @@ module VX_cache_repl #( if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru // Pseudo Least Recently Used replacement policy - localparam LRU_WIDTH = NUM_WAYS-1; - `UNUSED_VAR (repl_valid) - - reg [`UP(LRU_WIDTH)-1:0] plru_tree [0:`CS_LINES_PER_BANK-1]; + localparam LRU_WIDTH = `UP(NUM_WAYS-1); wire [WAY_IDX_WIDTH-1:0] repl_way_idx; wire [WAY_IDX_WIDTH-1:0] hit_way_idx; - wire [`UP(LRU_WIDTH)-1:0] plru_update; + wire [LRU_WIDTH-1:0] plru_rdata; + wire [LRU_WIDTH-1:0] plru_wdata; + wire [LRU_WIDTH-1:0] plru_wmask; - always @(posedge clk) begin - if (reset) begin - plru_tree <= '0; - end else begin - if (hit_valid) begin - plru_tree[hit_line] <= plru_update; - end - end - end + VX_dp_ram #( + .DATAW (LRU_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .WRENW (LRU_WIDTH) + ) plru_store ( + .clk (clk), + .reset (reset), + .read (repl_valid), + .write (hit_valid), + .wren (plru_wmask), + .waddr (hit_line), + .raddr (repl_line), + .wdata (plru_wdata), + .rdata (plru_rdata) + ); VX_onehot_encoder #( .N (NUM_WAYS) @@ -134,15 +144,15 @@ module VX_cache_repl #( plru_decoder #( .NUM_WAYS (NUM_WAYS) ) plru_dec ( - .way_idx (hit_way_idx), - .lru_in (plru_tree[hit_line]), - .lru_out (plru_update) + .way_idx (hit_way_idx), + .lru_data (plru_wdata), + .lru_mask (plru_wmask) ); plru_encoder #( .NUM_WAYS (NUM_WAYS) ) plru_enc ( - .lru_in (plru_tree[repl_line]), + .lru_in (plru_rdata), .way_idx (repl_way_idx) ); @@ -160,16 +170,28 @@ module VX_cache_repl #( `UNUSED_VAR (hit_valid) `UNUSED_VAR (hit_line) `UNUSED_VAR (hit_way) - reg [`UP(CTR_WIDTH)-1:0] counters [0:`CS_LINES_PER_BANK-1]; - always @(posedge clk) begin - if (repl_valid) begin - counters[repl_line] <= counters[repl_line] + 1; - end - end + + wire [`UP(CTR_WIDTH)-1:0] ctr_rdata; + wire [`UP(CTR_WIDTH)-1:0] ctr_wdata = ctr_rdata + 1; + + VX_sp_ram #( + .DATAW (`UP(CTR_WIDTH)), + .SIZE (`CS_LINES_PER_BANK) + ) ctr_store ( + .clk (clk), + .reset (reset), + .read (repl_valid), + .write (repl_valid), + .wren (1'b1), + .addr (repl_line), + .wdata (ctr_wdata), + .rdata (ctr_rdata) + ); + VX_decoder #( .N (WAY_IDX_BITS) ) ctr_decoder ( - .sel_in (counters[repl_line]), + .sel_in (ctr_rdata), .data_in (1'b1), .data_out (repl_way) ); From e06333b3c0f7670cf502d094f7dd3bcf44535f45 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 15 Oct 2024 11:28:33 -0700 Subject: [PATCH 376/488] minor update --- ci/regression.sh.in | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 662b40717..390fd1459 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -153,6 +153,11 @@ cache() CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx + # replacement policy + CONFIGS="-DDCACHE_REPL_POLICY=0" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx + CONFIGS="-DDCACHE_REPL_POLICY=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx + CONFIGS="-DDCACHE_REPL_POLICY=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx + # test writeback CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=0 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress From f695e4d75447f9abd7820404bc3ac6a334ef0a11 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 15 Oct 2024 14:59:31 -0700 Subject: [PATCH 377/488] minor update --- hw/rtl/cache/VX_cache_bank.sv | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 9b55734e7..942f35740 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -224,25 +224,26 @@ module VX_cache_bank #( wire creq_enable = creq_grant && core_req_valid; assign replay_ready = replay_grant + && ~(!WRITEBACK && replay_rw && mreq_queue_alm_full) // needed for writethrough && ~pipe_stall; assign mem_rsp_ready = fill_grant - && (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions + && ~(WRITEBACK && mreq_queue_alm_full) // needed for writeback && ~pipe_stall; assign flush_ready = flush_grant - && (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions + && ~(WRITEBACK && mreq_queue_alm_full) // needed for writeback && ~pipe_stall; assign core_req_ready = creq_grant - && ~mreq_queue_alm_full - && ~mshr_alm_full + && ~mreq_queue_alm_full // needed for fill requests + && ~mshr_alm_full // needed for mshr allocation && ~pipe_stall; wire init_fire = init_valid; wire replay_fire = replay_valid && replay_ready; wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready; - wire flush_fire = flush_valid && flush_ready; + wire flush_fire = flush_valid && flush_ready; wire core_req_fire = core_req_valid && core_req_ready; wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id = mem_rsp_tag[MSHR_ADDR_WIDTH-1:0]; @@ -266,15 +267,14 @@ module VX_cache_bank #( assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire; assign rw_sel = replay_valid ? replay_rw : core_req_rw; assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen; + assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) : + (replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr)); assign word_idx_sel= replay_valid ? replay_wsel : core_req_wsel; assign req_idx_sel = replay_valid ? replay_idx : core_req_idx; assign tag_sel = (init_valid | flush_valid) ? (flush_valid ? flush_tag : '0) : (replay_valid ? replay_tag : (mem_rsp_valid ? mem_rsp_tag_s : core_req_tag)); assign flags_sel = core_req_valid ? core_req_flags : '0; - assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) : - (replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr)); - if (WRITE_ENABLE) begin : g_data_sel for (genvar i = 0; i < `CS_LINE_WIDTH; ++i) begin : g_i if (i < `CS_WORD_WIDTH) begin : g_lo @@ -417,7 +417,7 @@ module VX_cache_bank #( assign addr_st1 = {line_tag_st1, line_idx_st1}; // ensure mshr replay always get a hit - `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time)) + `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1 && ~is_hit_st1), ("%t: missed mshr replay", $time)) if (WRITE_ENABLE) begin : g_rdw_hazard // This implementation uses single-port BRAMs for the tags and data stores. @@ -503,6 +503,7 @@ module VX_cache_bank #( .evict_byteen(evict_byteen_st1) ); + // only allocate MSHR entries for non-replay core requests wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~is_replay_st0; wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~is_replay_st1; @@ -636,6 +637,8 @@ module VX_cache_bank #( wire has_dirty_bytes = (| evict_byteen_st1); `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (line_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, line_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) end + // issue a fill request on a read/write miss + // issue a writeback on a dirty line eviction assign mreq_queue_push = (((do_read_st1 || do_write_st1) && ~is_hit_st1 && ~mshr_pending_st1) || do_writeback_st1) && ~pipe_stall; @@ -653,6 +656,8 @@ module VX_cache_bank #( .data_in (byteen_st1), .data_out (line_byteen) ); + // issue a fill request on a read miss + // issue a memory write on a write request assign mreq_queue_push = ((do_read_st1 && ~is_hit_st1 && ~mshr_pending_st1) || do_write_st1) && ~pipe_stall; @@ -667,6 +672,7 @@ module VX_cache_bank #( `UNUSED_VAR (evict_byteen_st1) end end else begin : g_mreq_queue_ro + // issue a fill request on a read miss assign mreq_queue_push = (do_read_st1 && ~is_hit_st1 && ~mshr_pending_st1) && ~pipe_stall; assign mreq_queue_addr = addr_st1; From a7ba377581aaaa9fb08e228fd87db045ec084575 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 16 Oct 2024 18:04:11 -0700 Subject: [PATCH 378/488] minor update --- hw/rtl/cache/VX_cache_bank.sv | 20 ++++++++++++-------- hw/rtl/cache/VX_cache_data.sv | 5 +++-- hw/rtl/core/VX_issue.sv | 2 +- hw/rtl/core/VX_lsu_unit.sv | 2 +- sim/rtlsim/processor.cpp | 2 -- 5 files changed, 17 insertions(+), 14 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 942f35740..3b6b3d076 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -169,7 +169,7 @@ module VX_cache_bank #( wire is_replay_st0, is_replay_st1; wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1; wire mshr_pending_st0, mshr_pending_st1; - wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_id_st0, mshr_prev_id_st1; + wire [MSHR_ADDR_WIDTH-1:0] mshr_previd_st0, mshr_previd_st1; wire mshr_empty; wire flush_valid; @@ -404,8 +404,8 @@ module VX_cache_bank #( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, rw_st0, flags_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_id_st0, mshr_pending_st0}), - .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, rw_st1, flags_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_id_st1, mshr_pending_st1}) + .data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, rw_st0, flags_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}), + .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, rw_st1, flags_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1}) ); if (UUID_WIDTH != 0) begin : g_req_uuid_st1 @@ -426,10 +426,13 @@ module VX_cache_bank #( // Data fill/flush can perform read and write in the same stage, since way_idx is available in st0. // A data read should happen in st0 for its result to be available in st1. // A data write should happen in st1 when the tag hit status is available. + // The r/w hazard is needed for consecutive writes since they both wonly write in st1. + // The r/w hazard is also not needed for next writethrough fill/flush to the same line. + // For reads or writeback fill/flush to the same line, we sill need the hazard + // because the data writeen in st1 cannot be read at the same time in st0 without extra forwarding logic. wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0]; - wire is_read_sel = is_creq_sel && !rw_sel; wire is_write_sel = is_creq_sel && rw_sel; - wire is_same_read_sel = is_read_sel && (line_idx_sel == line_idx_st0); + wire is_same_line = (line_idx_sel == line_idx_st0); always @(posedge clk) begin if (reset) begin post_hazard <= 0; @@ -437,7 +440,8 @@ module VX_cache_bank #( end else begin if (!crsp_queue_stall) begin post_hazard <= rdw_hazard; - rdw_hazard <= do_write_st0 && valid_sel && !(is_write_sel || is_same_read_sel || (is_flush_sel && !WRITEBACK)); + rdw_hazard <= do_write_st0 && valid_sel + && !(is_write_sel || (is_same_line && !WRITEBACK && (is_fill_sel || is_flush_sel))); end end end @@ -575,7 +579,7 @@ module VX_cache_bank #( .allocate_data ({word_idx_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}), .allocate_id (mshr_alloc_id_st0), .allocate_pending(mshr_pending_st0), - .allocate_previd(mshr_prev_id_st0), + .allocate_previd(mshr_previd_st0), `UNUSED_PIN (allocate_ready), // finalize @@ -583,7 +587,7 @@ module VX_cache_bank #( .finalize_is_release(mshr_release_st1), .finalize_is_pending(mshr_pending_st1), .finalize_id (mshr_id_st1), - .finalize_previd(mshr_prev_id_st1) + .finalize_previd(mshr_previd_st1) ); // schedule core response diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 278caccd5..ebce2109d 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -141,8 +141,9 @@ module VX_cache_data #( wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_w; for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j wire word_en = (WORD_SIZE == 1) || (word_idx == j); - assign line_wdata[j] = write ? write_data : fill_data[j]; - assign wren_w[j] = write ? (write_byteen & {WORD_SIZE{word_en}}) : {WORD_SIZE{1'b1}}; + // warning: should prioritize the fill over write to handle the case where both are asserted + assign line_wdata[j] = fill ? fill_data[j] : write_data; + assign wren_w[j] = fill ? {WORD_SIZE{1'b1}} : (write_byteen & {WORD_SIZE{word_en}}); end assign line_wren = wren_w; assign line_write = (fill && ((NUM_WAYS == 1) || evict_way[i])) diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index 84bcc0072..5da33cbba 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -52,7 +52,7 @@ module VX_issue import VX_gpu_pkg::*; #( `SCOPE_IO_SWITCH (`ISSUE_WIDTH); - for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : g_issue_slices + for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : g_slices VX_decode_if #( .NUM_WARPS (PER_ISSUE_WARPS) ) per_issue_decode_if(); diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index 6e9e2081c..674ca2686 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -52,7 +52,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_lsus + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_slices VX_lsu_slice #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx)) ) lsu_slice( diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index 32f4b4e1e..1807e5630 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -241,8 +241,6 @@ private: #ifdef VCD_OUTPUT if (sim_trace_enabled()) { tfp_->dump(timestamp); - } else { - exit(-1); } #endif ++timestamp; From 5971158f434872f59a0b28225d3255b0f4f1f528 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 16 Oct 2024 20:22:42 -0700 Subject: [PATCH 379/488] minor update --- ci/regression.sh.in | 1 + hw/rtl/cache/VX_cache_bank.sv | 61 +++++++++++---------- hw/rtl/cache/VX_cache_data.sv | 100 +++++++++++++++++++--------------- 3 files changed, 88 insertions(+), 74 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 390fd1459..c3abb43df 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -142,6 +142,7 @@ cache() CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx # test cache ways + CONFIGS="-DICACHE_NUM_WAYS=1 -DDCACHE_NUM_WAYS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 3b6b3d076..ad9ad588a 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -393,7 +393,6 @@ module VX_cache_bank #( .evict_way_r(evict_way_st1) ); - wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; assign mshr_id_st0 = is_replay_st0 ? replay_id_st0 : mshr_alloc_id_st0; @@ -440,8 +439,7 @@ module VX_cache_bank #( end else begin if (!crsp_queue_stall) begin post_hazard <= rdw_hazard; - rdw_hazard <= do_write_st0 && valid_sel - && !(is_write_sel || (is_same_line && !WRITEBACK && (is_fill_sel || is_flush_sel))); + rdw_hazard <= do_write_st0 && valid_sel && !(is_write_sel || (is_same_line && !WRITEBACK && (is_fill_sel || is_flush_sel))); end end end @@ -737,19 +735,24 @@ module VX_cache_bank #( && ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire); always @(posedge clk) begin if (input_stall || pipe_stall) begin - `TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard)) + `TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw=%b\n", $time, INSTANCE_ID, + rsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard)) end if (mem_rsp_fire) begin - `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel)) + `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel)) end if (replay_fire) begin - `TRACE(2, ("%t: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)) + `TRACE(2, ("%t: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)) end if (core_req_fire) begin if (core_req_rw) begin - `TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)) + `TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)) end else begin - `TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)) + `TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)) end end if (do_init_st0) begin @@ -764,45 +767,43 @@ module VX_cache_bank #( `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) end if (do_read_st1 && ~pipe_stall) begin - if (is_hit_st1) begin - `TRACE(3, ("%t: %s tags-rd-hit: addr=0x%0h, way=%b, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) - end else begin - `TRACE(3, ("%t: %s tags-rd-miss: addr=0x%0h, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), line_idx_st1, line_tag_st1, req_uuid_st1)) - end + `TRACE(3, ("%t: %s tags-read: addr=0x%0h, way=%b, line=%0d, tag=0x%0h, hit=%b (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, is_hit_st1, req_uuid_st1)) end if (do_write_st1 && ~pipe_stall) begin - if (is_hit_st1) begin - `TRACE(3, ("%t: %s tags-wr-hit: addr=0x%0h, way=%b, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) - end else begin - `TRACE(3, ("%t: %s tags-wr-miss: addr=0x%0h, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), line_idx_st1, line_tag_st1, req_uuid_st1)) - end + `TRACE(3, ("%t: %s tags-write: addr=0x%0h, way=%b, line=%0d, tag=0x%0h, hit=%b (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, is_hit_st1, req_uuid_st1)) end if (do_fill_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%b, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, data_st0, req_uuid_st0)) + `TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%b, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, data_st0, req_uuid_st0)) end if (do_flush_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) + `TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) end if (do_read_st1 && is_hit_st1 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, read_data_st1, req_uuid_st1)) + `TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, read_data_st1, req_uuid_st1)) end if (do_write_st1 && is_hit_st1 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, byteen_st1, write_data_st1, req_uuid_st1)) + `TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, byteen_st1, write_data_st1, req_uuid_st1)) end if (crsp_queue_fire) begin - `TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)) + `TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)) end if (mreq_queue_push) begin if (!WRITEBACK && do_write_st1) begin - `TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) + `TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) end else if (WRITEBACK && do_writeback_st1) begin - `TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) + `TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) end else begin - `TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1)) + `TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1)) end end end diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index ebce2109d..dc07af1ed 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -56,7 +56,7 @@ module VX_cache_data #( `UNUSED_PARAM (WORD_SIZE) `UNUSED_VAR (stall) - localparam BYTEENW = (WRITE_ENABLE != 0) ? LINE_SIZE : 1; + localparam BYTEENW = (WRITE_ENABLE != 0 || NUM_WAYS != 1) ? (LINE_SIZE * NUM_WAYS) : 1; wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata; @@ -125,59 +125,71 @@ module VX_cache_data #( end else begin : g_no_writeback `UNUSED_VAR (init) + `UNUSED_VAR (flush) assign line_dirty = 0; assign evict_data = '0; assign evict_byteen = '0; end - for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata; + wire [BYTEENW-1:0] line_wren; + wire line_write; + wire line_read; - wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata; - wire [BYTEENW-1:0] line_wren; - wire line_write; - wire line_read; - - if (WRITE_ENABLE != 0) begin : g_line_data - wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_w; - for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j - wire word_en = (WORD_SIZE == 1) || (word_idx == j); - // warning: should prioritize the fill over write to handle the case where both are asserted - assign line_wdata[j] = fill ? fill_data[j] : write_data; - assign wren_w[j] = fill ? {WORD_SIZE{1'b1}} : (write_byteen & {WORD_SIZE{word_en}}); + if (BYTEENW != 1) begin : g_wdata + wire [NUM_WAYS-1:0][LINE_SIZE-1:0] line_wren_w; + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways + wire fill_way_en = (NUM_WAYS == 1) || evict_way[i]; + if (WRITE_ENABLE != 0) begin : g_we + wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] word_wdata; + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] word_wren; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_words + wire word_en = (WORD_SIZE == 1) || (word_idx == j); + // warning: should prioritize the fill over write in case both are asserted + assign word_wdata[j] = fill ? fill_data[j] : write_data; + assign word_wren[j] = fill ? {WORD_SIZE{1'b1}} : (write_byteen & {WORD_SIZE{word_en}}); + end + wire way_en = fill ? fill_way_en : tag_matches[i]; + assign line_wdata[i] = word_wdata; + assign line_wren_w[i] = word_wren & {LINE_SIZE{way_en}}; + end else begin : g_ro + `UNUSED_VAR (write) + `UNUSED_VAR (write_byteen) + `UNUSED_VAR (write_data) + `UNUSED_VAR (word_idx) + assign line_wdata[i] = fill_data; + assign line_wren_w[i] = {LINE_SIZE{fill_way_en}}; end - assign line_wren = wren_w; - assign line_write = (fill && ((NUM_WAYS == 1) || evict_way[i])) - || (write && tag_matches[i]); - assign line_read = read || ((fill || flush) && WRITEBACK); - end else begin : g_line_data_ro - `UNUSED_VAR (write) - `UNUSED_VAR (flush) - `UNUSED_VAR (write_byteen) - `UNUSED_VAR (write_data) - `UNUSED_VAR (word_idx) - assign line_wdata = fill_data; - assign line_wren = 1'b1; - assign line_write = fill && ((NUM_WAYS == 1) || evict_way[i]); - assign line_read = read; end - - VX_sp_ram #( - .DATAW (`CS_LINE_WIDTH), - .SIZE (`CS_LINES_PER_BANK), - .WRENW (BYTEENW), - .OUT_REG (1) - ) data_store ( - .clk (clk), - .reset (reset), - .read (line_read), - .write (line_write), - .wren (line_wren), - .addr (line_idx), - .wdata (line_wdata), - .rdata (line_rdata[i]) - ); + assign line_wren = line_wren_w; + end else begin : g_ro_1w_wdata + `UNUSED_VAR (write) + `UNUSED_VAR (evict_way) + `UNUSED_VAR (write_byteen) + `UNUSED_VAR (write_data) + assign line_wdata = fill_data; + assign line_wren = 1'b1; end + assign line_write = fill || (write && WRITE_ENABLE); + assign line_read = read || ((fill || flush) && WRITEBACK); + + VX_sp_ram #( + .DATAW (NUM_WAYS * `CS_LINE_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .WRENW (BYTEENW), + .OUT_REG (1) + ) data_store ( + .clk (clk), + .reset (reset), + .read (line_read), + .write (line_write), + .wren (line_wren), + .addr (line_idx), + .wdata (line_wdata), + .rdata (line_rdata) + ); + wire [`LOG2UP(NUM_WAYS)-1:0] hit_way_idx; VX_onehot_encoder #( .N (NUM_WAYS) From 077b682d7d649dcd51a41a41da488c12d83d3842 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 17 Oct 2024 04:58:29 -0700 Subject: [PATCH 380/488] minor update --- hw/rtl/Vortex.sv | 6 ++-- hw/rtl/cache/VX_cache_bank.sv | 41 +++++++++++------------- hw/rtl/cache/VX_cache_data.sv | 44 ++++++++++++------------- hw/rtl/cache/VX_cache_wrap.sv | 12 +++---- hw/rtl/core/VX_alu_int.sv | 2 +- hw/rtl/core/VX_dcr_data.sv | 4 +-- hw/rtl/core/VX_lsu_slice.sv | 32 +++++++++---------- hw/rtl/core/VX_scoreboard.sv | 2 +- hw/rtl/libs/VX_mem_coalescer.sv | 40 +++++++++++------------ hw/rtl/libs/VX_mem_scheduler.sv | 48 ++++++++++++++-------------- hw/rtl/mem/VX_gbar_unit.sv | 4 +-- hw/rtl/mem/VX_local_mem.sv | 6 ++-- tests/regression/dogfood/testcases.h | 48 ++++++++++++++-------------- 13 files changed, 143 insertions(+), 146 deletions(-) diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 0fa3ce31f..bae697c65 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -204,13 +204,13 @@ module Vortex import VX_gpu_pkg::*; ( always @(posedge clk) begin if (mem_req_fire) begin if (mem_req_rw) begin - `TRACE(1, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data, mem_req_uuid)) + `TRACE(2, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data, mem_req_uuid)) end else begin - `TRACE(1, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_uuid)) + `TRACE(2, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_uuid)) end end if (mem_rsp_fire) begin - `TRACE(1, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, mem_rsp_tag, mem_rsp_data, mem_rsp_uuid)) + `TRACE(2, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, mem_rsp_tag, mem_rsp_data, mem_rsp_uuid)) end end `endif diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index ad9ad588a..0e16e6c65 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -161,7 +161,7 @@ module VX_cache_bank #( wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1; wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1; wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1; - wire [`CS_WORD_WIDTH-1:0] write_data_st0, write_data_st1; + wire [`CS_WORD_WIDTH-1:0] write_word_st0, write_word_st1; wire [`CS_WORD_WIDTH-1:0] read_data_st1; wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1; wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1; @@ -335,8 +335,7 @@ module VX_cache_bank #( wire do_fill_st1 = valid_st1 && is_fill_st1; wire do_flush_st1 = valid_st1 && is_flush_st1 && WRITEBACK; - assign write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0]; - + assign write_word_st0 = data_st0[`CS_WORD_WIDTH-1:0]; assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0]; wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st1; @@ -346,6 +345,8 @@ module VX_cache_bank #( wire do_lookup_st0 = do_read_st0 || do_write_st0; + wire do_lookup_st1 = do_read_st1 || do_write_st1; + reg [NUM_WAYS-1:0] victim_way_st0; VX_cache_repl #( @@ -358,7 +359,7 @@ module VX_cache_bank #( .clk (clk), .reset (reset), .stall (pipe_stall), - .hit_valid ((do_read_st1 || do_write_st1) && is_hit_st1 && ~pipe_stall), + .hit_valid (do_lookup_st1 && is_hit_st1 && ~pipe_stall), .hit_line (line_idx_st1), .hit_way (tag_matches_st1), .repl_valid (do_fill_st0 && ~pipe_stall), @@ -437,9 +438,9 @@ module VX_cache_bank #( post_hazard <= 0; rdw_hazard <= 0; end else begin - if (!crsp_queue_stall) begin + if (~crsp_queue_stall) begin post_hazard <= rdw_hazard; - rdw_hazard <= do_write_st0 && valid_sel && !(is_write_sel || (is_same_line && !WRITEBACK && (is_fill_sel || is_flush_sel))); + rdw_hazard <= do_write_st0 && valid_sel && ~(is_write_sel || (is_same_line && !WRITEBACK && (/*is_fill_sel ||*/is_flush_sel))); end end end @@ -448,7 +449,7 @@ module VX_cache_bank #( assign post_hazard = 0; end - assign write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0]; + assign write_word_st1 = data_st1[`CS_WORD_WIDTH-1:0]; `UNUSED_VAR (data_st1) wire [`CS_LINE_WIDTH-1:0] evict_data_st1; @@ -463,7 +464,7 @@ module VX_cache_bank #( // data writes should happen in st1 when the tag hit is available, // and use line_idx_st1 to ensure the correct line is updated. // if a rdw hazard is active due to conflict, ensure we don't write twice. - assign data_write = do_write_st1 && !post_hazard && ~crsp_queue_stall; + assign data_write = do_write_st1 && ~post_hazard && ~crsp_queue_stall; assign data_line_idx = data_write ? line_idx_st1 : line_idx_st0; end else begin : g_data_ctrl_ro `UNUSED_VAR (post_hazard) @@ -495,7 +496,7 @@ module VX_cache_bank #( .tag_matches(tag_matches_st1), .line_idx (data_line_idx), .fill_data (data_st0), - .write_data (write_data_st1), + .write_word (write_word_st1), .word_idx (word_idx_st1), .write_byteen(byteen_st1), // outputs @@ -574,7 +575,7 @@ module VX_cache_bank #( .allocate_valid (mshr_allocate_st0 && ~pipe_stall), .allocate_addr (addr_st0), .allocate_rw (rw_st0), - .allocate_data ({word_idx_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}), + .allocate_data ({word_idx_st0, byteen_st0, write_word_st0, tag_st0, req_idx_st0}), .allocate_id (mshr_alloc_id_st0), .allocate_pending(mshr_pending_st0), .allocate_previd(mshr_previd_st0), @@ -641,7 +642,7 @@ module VX_cache_bank #( end // issue a fill request on a read/write miss // issue a writeback on a dirty line eviction - assign mreq_queue_push = (((do_read_st1 || do_write_st1) && ~is_hit_st1 && ~mshr_pending_st1) + assign mreq_queue_push = ((do_lookup_st1 && ~is_hit_st1 && ~mshr_pending_st1) || do_writeback_st1) && ~pipe_stall; assign mreq_queue_addr = is_fill_or_flush_st1 ? evict_addr_st1 : addr_st1; @@ -665,7 +666,7 @@ module VX_cache_bank #( && ~pipe_stall; assign mreq_queue_addr = addr_st1; assign mreq_queue_rw = rw_st1; - assign mreq_queue_data = {`CS_WORDS_PER_LINE{write_data_st1}}; + assign mreq_queue_data = {`CS_WORDS_PER_LINE{write_word_st1}}; assign mreq_queue_byteen = rw_st1 ? line_byteen : '1; `UNUSED_VAR (is_fill_or_flush_st1) `UNUSED_VAR (do_writeback_st1) @@ -735,8 +736,8 @@ module VX_cache_bank #( && ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire); always @(posedge clk) begin if (input_stall || pipe_stall) begin - `TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw=%b\n", $time, INSTANCE_ID, - rsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard)) + `TRACE(4, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw=%b\n", $time, INSTANCE_ID, + crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard)) end if (mem_rsp_fire) begin `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, @@ -766,13 +767,9 @@ module VX_cache_bank #( `TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) end - if (do_read_st1 && ~pipe_stall) begin - `TRACE(3, ("%t: %s tags-read: addr=0x%0h, way=%b, line=%0d, tag=0x%0h, hit=%b (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, is_hit_st1, req_uuid_st1)) - end - if (do_write_st1 && ~pipe_stall) begin - `TRACE(3, ("%t: %s tags-write: addr=0x%0h, way=%b, line=%0d, tag=0x%0h, hit=%b (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, is_hit_st1, req_uuid_st1)) + if (do_lookup_st1 && ~pipe_stall) begin + `TRACE(3, ("%t: %s tags-Lookup: addr=0x%0h, rw=%b, way=%b, line=%0d, tag=0x%0h, hit=%b (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, tag_matches_st1, line_idx_st1, line_tag_st1, is_hit_st1, req_uuid_st1)) end if (do_fill_st0 && ~pipe_stall) begin `TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%b, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, @@ -788,7 +785,7 @@ module VX_cache_bank #( end if (do_write_st1 && is_hit_st1 && ~pipe_stall) begin `TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, byteen_st1, write_data_st1, req_uuid_st1)) + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, byteen_st1, write_word_st1, req_uuid_st1)) end if (crsp_queue_fire) begin `TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index dc07af1ed..7b5f1c552 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -44,7 +44,7 @@ module VX_cache_data #( input wire [NUM_WAYS-1:0] evict_way, input wire [NUM_WAYS-1:0] tag_matches, input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data, - input wire [`CS_WORD_WIDTH-1:0] write_data, + input wire [`CS_WORD_WIDTH-1:0] write_word, input wire [WORD_SIZE-1:0] write_byteen, input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx, // outputs @@ -79,26 +79,23 @@ module VX_cache_data #( wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wren; for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata + wire evict_way_en = (NUM_WAYS == 1) || evict_way[i]; wire dirty_data = write; // only asserted on writes - wire dirty_wren = init || (write ? tag_matches[i] : evict_way[i]); - + wire dirty_wren = init || (write ? tag_matches[i] : evict_way_en); if (DIRTY_BYTES != 0) begin : g_dirty_bytes wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bytes_data; wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bytes_wren; - for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j - wire word_sel = tag_matches[i] && ((WORD_SIZE == 1) || (word_idx == j)); - wire [WORD_SIZE-1:0] word_en = write_byteen & {WORD_SIZE{word_sel}}; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_words + wire word_en = ((`CS_WORDS_PER_LINE == 1) || (word_idx == j)); + wire [WORD_SIZE-1:0] write_mask = write_byteen & {WORD_SIZE{word_en && tag_matches[i]}}; assign bytes_data[j] = {WORD_SIZE{write}}; // only asserted on writes - assign bytes_wren[j] = {WORD_SIZE{init}} | (write ? word_en : {WORD_SIZE{evict_way[i]}}); + assign bytes_wren[j] = {WORD_SIZE{init}} | (write ? write_mask : {WORD_SIZE{evict_way_en}}); end assign byteen_wdata[i] = {dirty_data, bytes_data}; assign byteen_wren[i] = {dirty_wren, bytes_wren}; - assign {line_dirty, evict_byteen} = byteen_rdata[evict_way_idx_r]; end else begin : g_no_dirty_bytes assign byteen_wdata[i] = dirty_data; assign byteen_wren[i] = dirty_wren; - assign line_dirty = byteen_rdata[evict_way_idx_r]; - assign evict_byteen = '1; end end @@ -121,6 +118,13 @@ module VX_cache_data #( .rdata (byteen_rdata) ); + if (DIRTY_BYTES != 0) begin : g_line_dirty_and_byteen + assign {line_dirty, evict_byteen} = byteen_rdata[evict_way_idx_r]; + end else begin : g_line_dirty + assign line_dirty = byteen_rdata[evict_way_idx_r]; + assign evict_byteen = '1; + end + assign evict_data = line_rdata[evict_way_idx_r]; end else begin : g_no_writeback @@ -141,21 +145,17 @@ module VX_cache_data #( for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways wire fill_way_en = (NUM_WAYS == 1) || evict_way[i]; if (WRITE_ENABLE != 0) begin : g_we - wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] word_wdata; - wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] word_wren; - for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_words - wire word_en = (WORD_SIZE == 1) || (word_idx == j); - // warning: should prioritize the fill over write in case both are asserted - assign word_wdata[j] = fill ? fill_data[j] : write_data; - assign word_wren[j] = fill ? {WORD_SIZE{1'b1}} : (write_byteen & {WORD_SIZE{word_en}}); + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_wren; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_wren + wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j); + assign write_wren[j] = write_byteen & {WORD_SIZE{word_en}}; end - wire way_en = fill ? fill_way_en : tag_matches[i]; - assign line_wdata[i] = word_wdata; - assign line_wren_w[i] = word_wren & {LINE_SIZE{way_en}}; + assign line_wdata[i] = fill ? fill_data : {`CS_WORDS_PER_LINE{write_word}}; + assign line_wren_w[i] = fill ? {LINE_SIZE{fill_way_en}} : (write_wren & {LINE_SIZE{tag_matches[i]}}); end else begin : g_ro `UNUSED_VAR (write) `UNUSED_VAR (write_byteen) - `UNUSED_VAR (write_data) + `UNUSED_VAR (write_word) `UNUSED_VAR (word_idx) assign line_wdata[i] = fill_data; assign line_wren_w[i] = {LINE_SIZE{fill_way_en}}; @@ -166,7 +166,7 @@ module VX_cache_data #( `UNUSED_VAR (write) `UNUSED_VAR (evict_way) `UNUSED_VAR (write_byteen) - `UNUSED_VAR (write_data) + `UNUSED_VAR (write_word) assign line_wdata = fill_data; assign line_wren = 1'b1; end diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index ca8c53eda..a9b872dd0 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -240,13 +240,13 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (core_req_fire) begin if (core_bus_if[i].req_data.rw) begin - `TRACE(1, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)) + `TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)) end else begin - `TRACE(1, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)) + `TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)) end end if (core_rsp_fire) begin - `TRACE(1, ("%t: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)) + `TRACE(2, ("%t: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)) end end end @@ -268,15 +268,15 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (mem_req_fire) begin if (mem_bus_if.req_data.rw) begin - `TRACE(1, ("%t: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", + `TRACE(2, ("%t: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid)) end else begin - `TRACE(1, ("%t: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", + `TRACE(2, ("%t: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid)) end end if (mem_rsp_fire) begin - `TRACE(1, ("%t: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", + `TRACE(2, ("%t: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid)) end end diff --git a/hw/rtl/core/VX_alu_int.sv b/hw/rtl/core/VX_alu_int.sv index 53c7ae57a..8e43d8f3f 100644 --- a/hw/rtl/core/VX_alu_int.sv +++ b/hw/rtl/core/VX_alu_int.sv @@ -194,7 +194,7 @@ module VX_alu_int #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (br_enable) begin - `TRACE(1, ("%t: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", + `TRACE(2, ("%t: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", $time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid)) end end diff --git a/hw/rtl/core/VX_dcr_data.sv b/hw/rtl/core/VX_dcr_data.sv index 042c87e55..6a13e034a 100644 --- a/hw/rtl/core/VX_dcr_data.sv +++ b/hw/rtl/core/VX_dcr_data.sv @@ -50,9 +50,9 @@ module VX_dcr_data import VX_gpu_pkg::*; ( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (dcr_bus_if.write_valid) begin - `TRACE(1, ("%t: base-dcr: state=", $time)) + `TRACE(2, ("%t: base-dcr: state=", $time)) trace_base_dcr(1, dcr_bus_if.write_addr); - `TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data)) + `TRACE(2, (", data=0x%h\n", dcr_bus_if.write_data)) end end `endif diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 1f39ab5a7..67fc3eaa8 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -504,30 +504,30 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `ifdef DBG_TRACE_MEM always @(posedge clk) begin if (execute_if.valid && fence_lock) begin - `TRACE(1, ("%t: *** %s fence wait\n", $time, INSTANCE_ID)) + `TRACE(2, ("%t: *** %s fence wait\n", $time, INSTANCE_ID)) end if (mem_req_fire) begin if (mem_req_rw) begin - `TRACE(1, ("%t: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) - `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES) - `TRACE(1, (", flags=")) - `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES) - `TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen)) - `TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES) - `TRACE(1, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)) + `TRACE(2, ("%t: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) + `TRACE_ARRAY1D(2, "0x%h", full_addr, NUM_LANES) + `TRACE(2, (", flags=")) + `TRACE_ARRAY1D(2, "%b", mem_req_flags, NUM_LANES) + `TRACE(2, (", byteen=0x%0h, data=", mem_req_byteen)) + `TRACE_ARRAY1D(2, "0x%0h", mem_req_data, NUM_LANES) + `TRACE(2, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)) end else begin - `TRACE(1, ("%t: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) - `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES) - `TRACE(1, (", flags=")) - `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES) - `TRACE(1, (", byteen=0x%0h, rd=%0d, sop=%b, eop=%b, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)) + `TRACE(2, ("%t: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) + `TRACE_ARRAY1D(2, "0x%h", full_addr, NUM_LANES) + `TRACE(2, (", flags=")) + `TRACE_ARRAY1D(2, "%b", mem_req_flags, NUM_LANES) + `TRACE(2, (", byteen=0x%0h, rd=%0d, sop=%b, eop=%b, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)) end end if (mem_rsp_fire) begin - `TRACE(1, ("%t: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=", + `TRACE(2, ("%t: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=", $time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop)) - `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES) - `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid)) + `TRACE_ARRAY1D(2, "0x%0h", mem_rsp_data, NUM_LANES) + `TRACE(2, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid)) end end `endif diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index 1fe9a7f44..5b01cc550 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -206,7 +206,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end else begin if (staging_if[w].valid && ~staging_if[w].ready) begin `ifdef DBG_TRACE_PIPELINE - `TRACE(3, ("%t: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n", + `TRACE(4, ("%t: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n", $time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr, operands_busy, staging_if[w].data.uuid)) `endif diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 760290a1c..19a704095 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -351,30 +351,30 @@ module VX_mem_coalescer #( always @(posedge clk) begin if (out_req_fire) begin if (out_req_rw) begin - `TRACE(1, ("%t: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) - `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS) - `TRACE(1, (", flags=")) - `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS) - `TRACE(1, (", byteen=")) - `TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS) - `TRACE(1, (", data=")) - `TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS) + `TRACE(2, ("%t: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) + `TRACE_ARRAY1D(2, "0x%h", out_req_addr, OUT_REQS) + `TRACE(2, (", flags=")) + `TRACE_ARRAY1D(2, "%b", out_req_flags, OUT_REQS) + `TRACE(2, (", byteen=")) + `TRACE_ARRAY1D(2, "0x%h", out_req_byteen, OUT_REQS) + `TRACE(2, (", data=")) + `TRACE_ARRAY1D(2, "0x%0h", out_req_data, OUT_REQS) end else begin - `TRACE(1, ("%d: %s out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) - `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS) - `TRACE(1, (", flags=")) - `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS) + `TRACE(2, ("%d: %s out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) + `TRACE_ARRAY1D(2, "0x%h", out_req_addr, OUT_REQS) + `TRACE(2, (", flags=")) + `TRACE_ARRAY1D(2, "%b", out_req_flags, OUT_REQS) end - `TRACE(1, (", offset=")) - `TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS) - `TRACE(1, (", pmask=%b, coalesced=%0d, tag=0x%0h (#%0d)\n", out_req_pmask, $countones(out_req_pmask), out_req_tag, out_req_uuid)) + `TRACE(2, (", offset=")) + `TRACE_ARRAY1D(2, "%0d", out_req_offset, NUM_REQS) + `TRACE(2, (", pmask=%b, coalesced=%0d, tag=0x%0h (#%0d)\n", out_req_pmask, $countones(out_req_pmask), out_req_tag, out_req_uuid)) end if (out_rsp_fire) begin - `TRACE(1, ("%t: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)) - `TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS) - `TRACE(1, (", offset=")) - `TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS) - `TRACE(1, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid)) + `TRACE(2, ("%t: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)) + `TRACE_ARRAY1D(2, "0x%0h", out_rsp_data, OUT_REQS) + `TRACE(2, (", offset=")) + `TRACE_ARRAY1D(2, "%0d", ibuf_dout_offset, NUM_REQS) + `TRACE(2, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid)) end end `endif diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 2ff21655a..523257eb4 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -593,41 +593,41 @@ module VX_mem_scheduler #( always @(posedge clk) begin if (core_req_fire) begin if (core_req_rw) begin - `TRACE(1, ("%t: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) - `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS) - `TRACE(1, (", byteen=")) - `TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS) - `TRACE(1, (", data=")) - `TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS) + `TRACE(2, ("%t: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) + `TRACE_ARRAY1D(2, "0x%h", core_req_addr, CORE_REQS) + `TRACE(2, (", byteen=")) + `TRACE_ARRAY1D(2, "0x%h", core_req_byteen, CORE_REQS) + `TRACE(2, (", data=")) + `TRACE_ARRAY1D(2, "0x%0h", core_req_data, CORE_REQS) end else begin - `TRACE(1, ("%t: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) - `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS) + `TRACE(2, ("%t: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) + `TRACE_ARRAY1D(2, "0x%h", core_req_addr, CORE_REQS) end - `TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid)) + `TRACE(2, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid)) end if (core_rsp_valid && core_rsp_ready) begin - `TRACE(1, ("%t: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)) - `TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS) - `TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid)) + `TRACE(2, ("%t: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)) + `TRACE_ARRAY1D(2, "0x%0h", core_rsp_data, CORE_REQS) + `TRACE(2, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid)) end if (| mem_req_fire_s) begin if (| mem_req_rw_s) begin - `TRACE(1, ("%t: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) - `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS) - `TRACE(1, (", byteen=")) - `TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS) - `TRACE(1, (", data=")) - `TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS) + `TRACE(2, ("%t: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) + `TRACE_ARRAY1D(2, "0x%h", mem_req_addr_s, CORE_CHANNELS) + `TRACE(2, (", byteen=")) + `TRACE_ARRAY1D(2, "0x%h", mem_req_byteen_s, CORE_CHANNELS) + `TRACE(2, (", data=")) + `TRACE_ARRAY1D(2, "0x%0h", mem_req_data_s, CORE_CHANNELS) end else begin - `TRACE(1, ("%t: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) - `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS) + `TRACE(2, ("%t: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) + `TRACE_ARRAY1D(2, "0x%h", mem_req_addr_s, CORE_CHANNELS) end - `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid)) + `TRACE(2, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid)) end if (mem_rsp_fire_s) begin - `TRACE(1, ("%t: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)) - `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS) - `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)) + `TRACE(2, ("%t: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)) + `TRACE_ARRAY1D(2, "0x%0h", mem_rsp_data_s, CORE_CHANNELS) + `TRACE(2, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)) end end `endif diff --git a/hw/rtl/mem/VX_gbar_unit.sv b/hw/rtl/mem/VX_gbar_unit.sv index c9707748f..ac4c09349 100644 --- a/hw/rtl/mem/VX_gbar_unit.sv +++ b/hw/rtl/mem/VX_gbar_unit.sv @@ -60,11 +60,11 @@ module VX_gbar_unit #( `ifdef DBG_TRACE_GBAR always @(posedge clk) begin if (gbar_bus_if.req_valid && gbar_bus_if.req_ready) begin - `TRACE(1, ("%t: %s acquire: bar_id=%0d, size=%0d, core_id=%0d\n", + `TRACE(2, ("%t: %s acquire: bar_id=%0d, size=%0d, core_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.req_id, gbar_bus_if.req_size_m1, gbar_bus_if.req_core_id)) end if (gbar_bus_if.rsp_valid) begin - `TRACE(1, ("%t: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)) + `TRACE(2, ("%t: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)) end end `endif diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 2ba66347e..03c4acdd1 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -330,15 +330,15 @@ module VX_local_mem import VX_gpu_pkg::*; #( always @(posedge clk) begin if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin if (mem_bus_if[i].req_data.rw) begin - `TRACE(1, ("%t: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", + `TRACE(2, ("%t: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i])) end else begin - `TRACE(1, ("%t: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", + `TRACE(2, ("%t: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, req_uuid[i])) end end if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin - `TRACE(1, ("%t: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n", + `TRACE(2, ("%t: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i])) end end diff --git a/tests/regression/dogfood/testcases.h b/tests/regression/dogfood/testcases.h index f5760ec06..f3562bb17 100644 --- a/tests/regression/dogfood/testcases.h +++ b/tests/regression/dogfood/testcases.h @@ -141,7 +141,7 @@ public: for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] + b[i]; if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -171,7 +171,7 @@ public: for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] * b[i]; if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -201,7 +201,7 @@ public: for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] / b[i]; if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -233,7 +233,7 @@ public: auto y = a[i] * b[i]; auto ref = x + y; if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -263,7 +263,7 @@ public: for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] + b[i]; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -293,7 +293,7 @@ public: for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] - b[i]; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -323,7 +323,7 @@ public: for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] * b[i]; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -353,7 +353,7 @@ public: for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] * b[i] + b[i]; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -383,7 +383,7 @@ public: for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] * b[i] - b[i]; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -413,7 +413,7 @@ public: for (uint32_t i = 0; i < n; ++i) { auto ref = -a[i] * b[i] - b[i]; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -443,7 +443,7 @@ public: for (uint32_t i = 0; i < n; ++i) { auto ref = -a[i] * b[i] + b[i]; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -475,7 +475,7 @@ public: auto y = a[i] * b[i] + b[i]; auto ref = x + y; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -505,7 +505,7 @@ public: for (uint32_t i = 0; i < n; ++i) { auto ref = a[i] / b[i]; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -537,7 +537,7 @@ public: auto y = b[i] / a[i]; auto ref = x + y; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -568,7 +568,7 @@ public: for (uint32_t i = 0; i < n; ++i) { auto ref = sqrt(a[i] * b[i]); if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -600,7 +600,7 @@ public: auto x = a[i] + b[i]; auto ref = (int32_t)x; if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -632,7 +632,7 @@ public: auto x = a[i] + b[i]; auto ref = (uint32_t)x; if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -663,7 +663,7 @@ public: auto x = a[i] + b[i]; auto ref = (float)x; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -694,7 +694,7 @@ public: auto x = a[i] + b[i]; auto ref = (float)x; if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -724,7 +724,7 @@ public: for (uint32_t i = 0; i < n; ++i) { auto ref = fmin(fmax(1.0f, a[i]), b[i]); if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -754,7 +754,7 @@ public: for (uint32_t i = 0; i < n; ++i) { auto ref = std::min(std::max(1, a[i]), b[i]); if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -787,7 +787,7 @@ public: ref = sinf(ref); } if (!almost_equal(c[i], ref)) { - std::cout << "error at result #" << i << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << ref << ", actual=" << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl; ++errors; } } @@ -820,7 +820,7 @@ public: for (uint32_t i = 0; i < n; ++i) { uint32_t ref = a[i] + 1; if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl; ++errors; } } @@ -857,7 +857,7 @@ public: for (uint32_t i = 0; i < n; ++i) { uint32_t ref = a[i] + 1; if (c[i] != ref) { - std::cout << "error at result #" << i << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl; + std::cout << "error at result #" << std::dec << i << std::hex << ": expected=" << std::hex << ref << ", actual=" << c[i] << std::endl; ++errors; } } From 91fee5da1154aa891362e864dd1c79ee9cd67a32 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 17 Oct 2024 11:25:17 -0700 Subject: [PATCH 381/488] minor update --- hw/rtl/cache/VX_cache_bank.sv | 2 +- hw/rtl/cache/VX_cache_data.sv | 29 ++++++++++++++++------------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 0e16e6c65..9f0575328 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -440,7 +440,7 @@ module VX_cache_bank #( end else begin if (~crsp_queue_stall) begin post_hazard <= rdw_hazard; - rdw_hazard <= do_write_st0 && valid_sel && ~(is_write_sel || (is_same_line && !WRITEBACK && (/*is_fill_sel ||*/is_flush_sel))); + rdw_hazard <= do_write_st0 && valid_sel && ~(is_write_sel || (is_same_line && !WRITEBACK && (is_fill_sel || is_flush_sel))); end end end diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 7b5f1c552..aeb4a11b3 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -79,18 +79,20 @@ module VX_cache_data #( wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wren; for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata + wire evict = fill || flush; wire evict_way_en = (NUM_WAYS == 1) || evict_way[i]; wire dirty_data = write; // only asserted on writes - wire dirty_wren = init || (write ? tag_matches[i] : evict_way_en); + wire dirty_wren = init || (evict && evict_way_en) || (write && tag_matches[i]); if (DIRTY_BYTES != 0) begin : g_dirty_bytes - wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bytes_data; - wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bytes_wren; - for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_words - wire word_en = ((`CS_WORDS_PER_LINE == 1) || (word_idx == j)); - wire [WORD_SIZE-1:0] write_mask = write_byteen & {WORD_SIZE{word_en && tag_matches[i]}}; - assign bytes_data[j] = {WORD_SIZE{write}}; // only asserted on writes - assign bytes_wren[j] = {WORD_SIZE{init}} | (write ? write_mask : {WORD_SIZE{evict_way_en}}); + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask + wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j); + assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}}; end + wire [LINE_SIZE-1:0] bytes_data = {LINE_SIZE{write}}; // only asserted on writes + wire [LINE_SIZE-1:0] bytes_wren = {LINE_SIZE{init}} + | {LINE_SIZE{evict && evict_way_en}} + | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); assign byteen_wdata[i] = {dirty_data, bytes_data}; assign byteen_wren[i] = {dirty_wren, bytes_wren}; end else begin : g_no_dirty_bytes @@ -145,13 +147,14 @@ module VX_cache_data #( for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways wire fill_way_en = (NUM_WAYS == 1) || evict_way[i]; if (WRITE_ENABLE != 0) begin : g_we - wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_wren; - for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_wren + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j); - assign write_wren[j] = write_byteen & {WORD_SIZE{word_en}}; + assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}}; end - assign line_wdata[i] = fill ? fill_data : {`CS_WORDS_PER_LINE{write_word}}; - assign line_wren_w[i] = fill ? {LINE_SIZE{fill_way_en}} : (write_wren & {LINE_SIZE{tag_matches[i]}}); + assign line_wdata[i] = (fill && fill_way_en) ? fill_data : {`CS_WORDS_PER_LINE{write_word}}; + assign line_wren_w[i] = {LINE_SIZE{fill && fill_way_en}} + | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); end else begin : g_ro `UNUSED_VAR (write) `UNUSED_VAR (write_byteen) From 6b1091e08f78d152e6fb560b350f82f60fedd002 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 17 Oct 2024 14:07:22 -0700 Subject: [PATCH 382/488] minor update --- hw/rtl/cache/VX_cache_bank.sv | 4 +++- hw/rtl/cache/VX_cache_mshr.sv | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 9f0575328..a24a07ee9 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -616,7 +616,9 @@ module VX_cache_bank #( .ready_out (core_rsp_ready) ); - assign crsp_queue_stall = crsp_queue_valid && ~crsp_queue_ready; + // we use 'do_read_st1' instead 'crsp_queue_valid' + // to remove costly 'is_hit_st1' signal from critical paths. + assign crsp_queue_stall = do_read_st1 && ~crsp_queue_ready; // schedule memory request diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index ae6ebb7fe..17546ba2a 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -101,8 +101,8 @@ module VX_cache_mshr #( ); `UNUSED_PARAM (BANK_ID) - reg [`CS_LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0]; - reg [MSHR_ADDR_WIDTH-1:0] next_index [MSHR_SIZE-1:0]; + reg [`CS_LINE_ADDR_WIDTH-1:0] addr_table [0:MSHR_SIZE-1]; + reg [MSHR_ADDR_WIDTH-1:0] next_index [0:MSHR_SIZE-1]; reg [MSHR_SIZE-1:0] valid_table, valid_table_n; reg [MSHR_SIZE-1:0] next_table, next_table_x, next_table_n; From 8f29ad58aeaa2525b6f9ddbfbe404fb7e9ede7ab Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 18 Oct 2024 23:54:20 -0700 Subject: [PATCH 383/488] block ram redesign to support synthesizable write-first mode --- hw/rtl/VX_platform.vh | 3 + hw/rtl/cache/VX_cache.sv | 20 +- hw/rtl/cache/VX_cache_bank.sv | 4 +- hw/rtl/cache/VX_cache_bypass.sv | 2 +- hw/rtl/cache/VX_cache_cluster.sv | 18 +- hw/rtl/cache/VX_cache_data.sv | 93 ++++----- hw/rtl/cache/VX_cache_repl.sv | 29 ++- hw/rtl/cache/VX_cache_top.sv | 12 +- hw/rtl/cache/VX_cache_wrap.sv | 18 +- hw/rtl/core/VX_operands.sv | 4 +- hw/rtl/libs/VX_dp_ram.sv | 331 +++++++++++++++---------------- hw/rtl/libs/VX_fifo_queue.sv | 43 +--- hw/rtl/libs/VX_scope_tap.sv | 8 +- hw/rtl/libs/VX_sp_ram.sv | 4 +- hw/rtl/mem/VX_local_mem.sv | 4 +- 15 files changed, 273 insertions(+), 320 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 8ea849ed3..eb58e1798 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -173,6 +173,7 @@ endgenerate `ifdef QUARTUS `define MAX_FANOUT 8 +`define MAX_LUTRAM 1024 `define IF_DATA_SIZE(x) $bits(x.data) `define USE_BLOCK_BRAM (* ramstyle = "block" *) `define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *) @@ -182,6 +183,7 @@ endgenerate `define STRING string `elsif VIVADO `define MAX_FANOUT 8 +`define MAX_LUTRAM 1024 `define IF_DATA_SIZE(x) $bits(x.data) `define USE_BLOCK_BRAM (* ram_style = "block" *) `define USE_FAST_BRAM (* ram_style = "distributed" *) @@ -191,6 +193,7 @@ endgenerate `define STRING `else `define MAX_FANOUT 8 +`define MAX_LUTRAM 1024 `define IF_DATA_SIZE(x) x.DATA_WIDTH `define USE_BLOCK_BRAM `define USE_FAST_BRAM diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index b27b2df31..40f062ecc 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -20,22 +20,22 @@ module VX_cache import VX_gpu_pkg::*; #( parameter NUM_REQS = 4, // Size of cache in bytes - parameter CACHE_SIZE = 4096, + parameter CACHE_SIZE = 32768, // Size of line inside a bank in bytes parameter LINE_SIZE = 64, // Number of banks - parameter NUM_BANKS = 1, + parameter NUM_BANKS = 4, // Number of associative ways - parameter NUM_WAYS = 1, + parameter NUM_WAYS = 4, // Size of a word in bytes - parameter WORD_SIZE = `XLEN/8, + parameter WORD_SIZE = 16, // Core Response Queue Size - parameter CRSQ_SIZE = 2, + parameter CRSQ_SIZE = 4, // Miss Reserv Queue Knob - parameter MSHR_SIZE = 8, + parameter MSHR_SIZE = 16, // Memory Response Queue Size - parameter MRSQ_SIZE = 0, + parameter MRSQ_SIZE = 4, // Memory Request Queue Size parameter MREQ_SIZE = 4, @@ -49,7 +49,7 @@ module VX_cache import VX_gpu_pkg::*; #( parameter DIRTY_BYTES = 0, // Replacement policy - parameter REPL_POLICY = `CS_REPL_CYCLIC, + parameter REPL_POLICY = `CS_REPL_CYCLIC, // Request debug identifier parameter UUID_WIDTH = 0, @@ -61,10 +61,10 @@ module VX_cache import VX_gpu_pkg::*; #( parameter FLAGS_WIDTH = 0, // Core response output register - parameter CORE_OUT_BUF = 0, + parameter CORE_OUT_BUF = 3, // Memory request output register - parameter MEM_OUT_BUF = 0 + parameter MEM_OUT_BUF = 3 ) ( // PERF `ifdef PERF_ENABLE diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index a24a07ee9..20c0c0612 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -295,6 +295,8 @@ module VX_cache_bank #( assign req_uuid_sel = '0; end + wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0]; + wire is_init_sel = init_valid; wire is_creq_sel = creq_enable || replay_enable; wire is_fill_sel = fill_enable; @@ -364,6 +366,7 @@ module VX_cache_bank #( .hit_way (tag_matches_st1), .repl_valid (do_fill_st0 && ~pipe_stall), .repl_line (line_idx_st0), + .repl_line_n(line_idx_sel), .repl_way (victim_way_st0) ); @@ -430,7 +433,6 @@ module VX_cache_bank #( // The r/w hazard is also not needed for next writethrough fill/flush to the same line. // For reads or writeback fill/flush to the same line, we sill need the hazard // because the data writeen in st1 cannot be read at the same time in st0 without extra forwarding logic. - wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0]; wire is_write_sel = is_creq_sel && rw_sel; wire is_same_line = (line_idx_sel == line_idx_st0); always @(posedge clk) begin diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 4b3b3a59a..8f6234364 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -268,7 +268,7 @@ module VX_cache_bypass #( for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_valid assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || (is_mem_rsp_nc && rsp_idx == REQ_SEL_WIDTH'(i)); end - + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_ready assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; end diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index b4c2db979..32662e848 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -24,22 +24,22 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( parameter NUM_REQS = 4, // Size of cache in bytes - parameter CACHE_SIZE = 16384, + parameter CACHE_SIZE = 32768, // Size of line inside a bank in bytes parameter LINE_SIZE = 64, // Number of banks - parameter NUM_BANKS = 1, + parameter NUM_BANKS = 4, // Number of associative ways parameter NUM_WAYS = 4, // Size of a word in bytes - parameter WORD_SIZE = 4, + parameter WORD_SIZE = 16, // Core Response Queue Size - parameter CRSQ_SIZE = 2, + parameter CRSQ_SIZE = 4, // Miss Reserv Queue Knob - parameter MSHR_SIZE = 8, + parameter MSHR_SIZE = 16, // Memory Response Queue Size - parameter MRSQ_SIZE = 0, + parameter MRSQ_SIZE = 4, // Memory Request Queue Size parameter MREQ_SIZE = 4, @@ -53,7 +53,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( parameter DIRTY_BYTES = 0, // Replacement policy - parameter REPL_POLICY = `CS_REPL_CYCLIC, + parameter REPL_POLICY = `CS_REPL_CYCLIC, // Request debug identifier parameter UUID_WIDTH = 0, @@ -68,10 +68,10 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( parameter NC_ENABLE = 0, // Core response output buffer - parameter CORE_OUT_BUF = 0, + parameter CORE_OUT_BUF = 3, // Memory request output buffer - parameter MEM_OUT_BUF = 0 + parameter MEM_OUT_BUF = 3 ) ( input wire clk, input wire reset, diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index aeb4a11b3..22326e63b 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -56,7 +56,7 @@ module VX_cache_data #( `UNUSED_PARAM (WORD_SIZE) `UNUSED_VAR (stall) - localparam BYTEENW = (WRITE_ENABLE != 0 || NUM_WAYS != 1) ? (LINE_SIZE * NUM_WAYS) : 1; + localparam BYTEENW = (WRITE_ENABLE != 0) ? LINE_SIZE : 1; wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata; @@ -137,62 +137,51 @@ module VX_cache_data #( assign evict_byteen = '0; end - wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata; - wire [BYTEENW-1:0] line_wren; - wire line_write; - wire line_read; + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store + wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata; + wire [BYTEENW-1:0] line_wren; - if (BYTEENW != 1) begin : g_wdata - wire [NUM_WAYS-1:0][LINE_SIZE-1:0] line_wren_w; - for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways - wire fill_way_en = (NUM_WAYS == 1) || evict_way[i]; - if (WRITE_ENABLE != 0) begin : g_we - wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask; - for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask - wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j); - assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}}; - end - assign line_wdata[i] = (fill && fill_way_en) ? fill_data : {`CS_WORDS_PER_LINE{write_word}}; - assign line_wren_w[i] = {LINE_SIZE{fill && fill_way_en}} - | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); - end else begin : g_ro - `UNUSED_VAR (write) - `UNUSED_VAR (write_byteen) - `UNUSED_VAR (write_word) - `UNUSED_VAR (word_idx) - assign line_wdata[i] = fill_data; - assign line_wren_w[i] = {LINE_SIZE{fill_way_en}}; + wire fill_way_en = (NUM_WAYS == 1) || evict_way[i]; + + if (WRITE_ENABLE != 0) begin : g_wdata + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask + wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j); + assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}}; end + assign line_wdata = (fill && fill_way_en) ? fill_data : {`CS_WORDS_PER_LINE{write_word}}; + assign line_wren = {LINE_SIZE{fill && fill_way_en}} + | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); + + end else begin : g_ro_wdata + `UNUSED_VAR (write) + `UNUSED_VAR (write_byteen) + `UNUSED_VAR (write_word) + `UNUSED_VAR (word_idx) + assign line_wdata = fill_data; + assign line_wren = fill_way_en; end - assign line_wren = line_wren_w; - end else begin : g_ro_1w_wdata - `UNUSED_VAR (write) - `UNUSED_VAR (evict_way) - `UNUSED_VAR (write_byteen) - `UNUSED_VAR (write_word) - assign line_wdata = fill_data; - assign line_wren = 1'b1; + + wire line_write = fill || (write && WRITE_ENABLE); + wire line_read = read || ((fill || flush) && WRITEBACK); + + VX_sp_ram #( + .DATAW (`CS_LINE_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .WRENW (BYTEENW), + .OUT_REG (1) + ) data_store ( + .clk (clk), + .reset (reset), + .read (line_read), + .write (line_write), + .wren (line_wren), + .addr (line_idx), + .wdata (line_wdata), + .rdata (line_rdata[i]) + ); end - assign line_write = fill || (write && WRITE_ENABLE); - assign line_read = read || ((fill || flush) && WRITEBACK); - - VX_sp_ram #( - .DATAW (NUM_WAYS * `CS_LINE_WIDTH), - .SIZE (`CS_LINES_PER_BANK), - .WRENW (BYTEENW), - .OUT_REG (1) - ) data_store ( - .clk (clk), - .reset (reset), - .read (line_read), - .write (line_write), - .wren (line_wren), - .addr (line_idx), - .wdata (line_wdata), - .rdata (line_rdata) - ); - wire [`LOG2UP(NUM_WAYS)-1:0] hit_way_idx; VX_onehot_encoder #( .N (NUM_WAYS) diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv index aac0483fd..dbd51afdd 100644 --- a/hw/rtl/cache/VX_cache_repl.sv +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -99,6 +99,7 @@ module VX_cache_repl #( input wire [`CS_LINE_SEL_BITS-1:0] hit_line, input wire [NUM_WAYS-1:0] hit_way, input wire repl_valid, + input wire [`CS_LINE_SEL_BITS-1:0] repl_line_n, input wire [`CS_LINE_SEL_BITS-1:0] repl_line, output wire [NUM_WAYS-1:0] repl_way ); @@ -110,6 +111,7 @@ module VX_cache_repl #( if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru // Pseudo Least Recently Used replacement policy localparam LRU_WIDTH = `UP(NUM_WAYS-1); + localparam FORCE_BRAM = (LRU_WIDTH * `CS_LINES_PER_BANK) >= 1024; wire [WAY_IDX_WIDTH-1:0] repl_way_idx; wire [WAY_IDX_WIDTH-1:0] hit_way_idx; @@ -118,17 +120,18 @@ module VX_cache_repl #( wire [LRU_WIDTH-1:0] plru_wmask; VX_dp_ram #( - .DATAW (LRU_WIDTH), - .SIZE (`CS_LINES_PER_BANK), - .WRENW (LRU_WIDTH) + .DATAW (LRU_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .WRENW (LRU_WIDTH), + .OUT_REG (FORCE_BRAM) ) plru_store ( .clk (clk), .reset (reset), - .read (repl_valid), + .read (FORCE_BRAM ? ~stall : repl_valid), .write (hit_valid), .wren (plru_wmask), .waddr (hit_line), - .raddr (repl_line), + .raddr (FORCE_BRAM ? repl_line_n : repl_line), .wdata (plru_wdata), .rdata (plru_rdata) ); @@ -167,23 +170,28 @@ module VX_cache_repl #( end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic // Cyclic replacement policy localparam CTR_WIDTH = $clog2(NUM_WAYS); + localparam FORCE_BRAM = (CTR_WIDTH * `CS_LINES_PER_BANK) >= 1024; + `UNUSED_VAR (hit_valid) `UNUSED_VAR (hit_line) `UNUSED_VAR (hit_way) + `UNUSED_VAR (repl_valid) wire [`UP(CTR_WIDTH)-1:0] ctr_rdata; wire [`UP(CTR_WIDTH)-1:0] ctr_wdata = ctr_rdata + 1; - VX_sp_ram #( - .DATAW (`UP(CTR_WIDTH)), - .SIZE (`CS_LINES_PER_BANK) + VX_dp_ram #( + .DATAW (`UP(CTR_WIDTH)), + .SIZE (`CS_LINES_PER_BANK), + .OUT_REG (FORCE_BRAM) ) ctr_store ( .clk (clk), .reset (reset), - .read (repl_valid), + .read (FORCE_BRAM ? ~stall : repl_valid), .write (repl_valid), .wren (1'b1), - .addr (repl_line), + .raddr (FORCE_BRAM ? repl_line_n : repl_line), + .waddr (repl_line), .wdata (ctr_wdata), .rdata (ctr_rdata) ); @@ -202,6 +210,7 @@ module VX_cache_repl #( `UNUSED_VAR (hit_way) `UNUSED_VAR (repl_valid) `UNUSED_VAR (repl_line) + `UNUSED_VAR (repl_line_n) if (NUM_WAYS > 1) begin : g_repl_way reg [NUM_WAYS-1:0] victim_way; always @(posedge clk) begin diff --git a/hw/rtl/cache/VX_cache_top.sv b/hw/rtl/cache/VX_cache_top.sv index 3fa0e5d65..d6bd4aace 100644 --- a/hw/rtl/cache/VX_cache_top.sv +++ b/hw/rtl/cache/VX_cache_top.sv @@ -20,7 +20,7 @@ module VX_cache_top import VX_gpu_pkg::*; #( parameter NUM_REQS = 4, // Size of cache in bytes - parameter CACHE_SIZE = 16384, + parameter CACHE_SIZE = 32768, // Size of line inside a bank in bytes parameter LINE_SIZE = 64, // Number of banks @@ -28,14 +28,14 @@ module VX_cache_top import VX_gpu_pkg::*; #( // Number of associative ways parameter NUM_WAYS = 4, // Size of a word in bytes - parameter WORD_SIZE = 4, + parameter WORD_SIZE = 16, // Core Response Queue Size - parameter CRSQ_SIZE = 2, + parameter CRSQ_SIZE = 4, // Miss Reserv Queue Knob parameter MSHR_SIZE = 16, // Memory Response Queue Size - parameter MRSQ_SIZE = 0, + parameter MRSQ_SIZE = 4, // Memory Request Queue Size parameter MREQ_SIZE = 4, @@ -55,10 +55,10 @@ module VX_cache_top import VX_gpu_pkg::*; #( parameter TAG_WIDTH = 16, // Core response output buffer - parameter CORE_OUT_BUF = 2, + parameter CORE_OUT_BUF = 3, // Memory request output buffer - parameter MEM_OUT_BUF = 2, + parameter MEM_OUT_BUF = 3, parameter MEM_TAG_WIDTH = `CLOG2(MSHR_SIZE) + `CLOG2(NUM_BANKS) ) ( diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index a9b872dd0..c181fb466 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -27,18 +27,18 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( // Size of line inside a bank in bytes parameter LINE_SIZE = 64, // Number of banks - parameter NUM_BANKS = 1, + parameter NUM_BANKS = 4, // Number of associative ways - parameter NUM_WAYS = 1, + parameter NUM_WAYS = 4, // Size of a word in bytes - parameter WORD_SIZE = 4, + parameter WORD_SIZE = 16, // Core Response Queue Size - parameter CRSQ_SIZE = 2, + parameter CRSQ_SIZE = 4, // Miss Reserv Queue Knob - parameter MSHR_SIZE = 8, + parameter MSHR_SIZE = 16, // Memory Response Queue Size - parameter MRSQ_SIZE = 0, + parameter MRSQ_SIZE = 4, // Memory Request Queue Size parameter MREQ_SIZE = 4, @@ -52,7 +52,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( parameter DIRTY_BYTES = 0, // Replacement policy - parameter REPL_POLICY = `CS_REPL_CYCLIC, + parameter REPL_POLICY = `CS_REPL_CYCLIC, // Request debug identifier parameter UUID_WIDTH = 0, @@ -70,10 +70,10 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( parameter PASSTHRU = 0, // Core response output buffer - parameter CORE_OUT_BUF = 0, + parameter CORE_OUT_BUF = 3, // Memory request output buffer - parameter MEM_OUT_BUF = 0 + parameter MEM_OUT_BUF = 3 ) ( input wire clk, diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 42a91e4c2..b396d1830 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -266,9 +266,9 @@ module VX_operands import VX_gpu_pkg::*; #( VX_dp_ram #( .DATAW (REGS_DATAW), .SIZE (PER_BANK_REGS * PER_ISSUE_WARPS), - .OUT_REG (1), - .READ_ENABLE (1), .WRENW (BYTEENW), + .OUT_REG (1), + .WRITE_MODE ("U"), `ifdef GPR_RESET .RESET_RAM (1), `endif diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 4220eca18..b770cfa68 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -24,7 +24,7 @@ module VX_dp_ram #( parameter RW_ASSERT = 0, parameter RESET_RAM = 0, parameter RESET_OUT = 0, - parameter READ_ENABLE = 0, + parameter `STRING WRITE_MODE = "R", // R: read-first, W: write-first, N: no-change, U: undefined parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, @@ -41,7 +41,10 @@ module VX_dp_ram #( output wire [DATAW-1:0] rdata ); localparam WSELW = DATAW / WRENW; + localparam USE_BRAM = !LUTRAM && ((DATAW * SIZE) >= `MAX_LUTRAM); + `STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter")) + `UNUSED_PARAM (RW_ASSERT) `define RAM_INITIALIZATION \ if (INIT_ENABLE != 0) begin : g_init \ @@ -56,187 +59,155 @@ module VX_dp_ram #( end \ end -`define RAM_WREN_BLOCK_ALTERA(__we__) \ - reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; \ - `RAM_INITIALIZATION \ - always @(posedge clk) begin \ - if (__we__) begin \ - for (integer i = 0; i < WRENW; ++i) begin \ - if (wren[i]) begin \ - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; \ - end \ - end \ - end \ - end - -`define RAM_WREN_BLOCK_XILINX(__we__) \ - reg [DATAW-1:0] ram [0:SIZE-1]; \ - `RAM_INITIALIZATION \ - always @(posedge clk) begin \ - if (__we__) begin \ - for (integer i = 0; i < WRENW; ++i) begin \ - if (wren[i]) begin \ - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ - end \ - end \ - end \ - end - -`define RAM_WRITE_BLOCK(__we__) \ - reg [DATAW-1:0] ram [0:SIZE-1]; \ - `RAM_INITIALIZATION \ - always @(posedge clk) begin \ - if (__we__) begin \ - ram[waddr] <= wdata; \ - end \ - end - -`define RAM_READ_BLOCK_OUT_REG(__re__) \ - always @(posedge clk) begin \ - if (__re__) begin \ - if (RESET_OUT && reset) begin \ - rdata_r <= INIT_VALUE; \ - end else begin \ - rdata_r <= ram[raddr]; \ - end \ - end \ - end - - `UNUSED_PARAM (RW_ASSERT) - `UNUSED_VAR (read) - `UNUSED_VAR (wren) - +`ifdef SYNTHESIS +`ifdef QUARTUS + localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "MLAB, no_rw_check" : ""); + localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "-name add_pass_through_logic_to_inferred_rams off" : ""; + `define RAM_ARRAY (* ramstyle = RAM_STYLE_VALUE *) reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; \ + end \ + end + `define RAM_NO_RWCHECK (* altera_attribute = RAM_NO_RWCHECK_VALUE *) +`else + localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "distributed" : ""); + localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "no" : ""; + `define RAM_ARRAY (* ram_style = RAM_STYLE_VALUE *) reg [DATAW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end + `define RAM_NO_RWCHECK (* rw_addr_collision = RAM_NO_RWCHECK_VALUE *) +`endif if (OUT_REG) begin : g_out_reg reg [DATAW-1:0] rdata_r; - if (READ_ENABLE) begin : g_readen - if (WRENW != 1) begin : g_writeen - `ifdef QUARTUS - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WREN_BLOCK_ALTERA(write) - `RAM_READ_BLOCK_OUT_REG(read) - end else begin : g_no_lutram - `RAM_WREN_BLOCK_ALTERA(write) - `RAM_READ_BLOCK_OUT_REG(read) + if (WRITE_MODE == "R") begin : g_read_first + `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE end - `else - // Not Quartus - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WREN_BLOCK_XILINX(write) - `RAM_READ_BLOCK_OUT_REG(read) - end else begin : g_no_lutram - `RAM_WREN_BLOCK_XILINX(write) - `RAM_READ_BLOCK_OUT_REG(read) - end - `endif - end else begin : g_no_writeen - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WRITE_BLOCK(write) - `RAM_READ_BLOCK_OUT_REG(read) - end else begin : g_no_lutram - `RAM_WRITE_BLOCK(write) - `RAM_READ_BLOCK_OUT_REG(read) + if (RESET_OUT && reset) begin + rdata_r <= INIT_VALUE; + end else if (read || write) begin + rdata_r <= ram[raddr]; end end - end else begin : g_no_readen - if (WRENW != 1) begin : g_writeen - `ifdef QUARTUS - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WREN_BLOCK_ALTERA(write) - `RAM_READ_BLOCK_OUT_REG(read || write) - end else begin : g_no_lutram - `RAM_WREN_BLOCK_ALTERA(write) - `RAM_READ_BLOCK_OUT_REG(read || write) + end else if (WRITE_MODE == "W") begin : g_write_first + `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE end - `else - // Not Quartus - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WREN_BLOCK_XILINX(write) - `RAM_READ_BLOCK_OUT_REG(read || write) - end else begin : g_no_lutram - `RAM_WREN_BLOCK_XILINX(write) - `RAM_READ_BLOCK_OUT_REG(read || write) + if (RESET_OUT && reset) begin + rdata_r <= INIT_VALUE; + end else if (read || write) begin + rdata_r = ram[raddr]; end - `endif - end else begin : g_no_writeen - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WRITE_BLOCK(write) - `RAM_READ_BLOCK_OUT_REG(read || write) - end else begin : g_no_lutram - `RAM_WRITE_BLOCK(write) - `RAM_READ_BLOCK_OUT_REG(read || write) + end + end else if (WRITE_MODE == "N") begin : g_no_change + `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + if (RESET_OUT && reset) begin + rdata_r <= INIT_VALUE; + end else if (read && ~write) begin + rdata_r <= ram[raddr]; + end + end + end end else if (WRITE_MODE == "U") begin : g_undefined + `RAM_NO_RWCHECK `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + if (RESET_OUT && reset) begin + rdata_r <= INIT_VALUE; + end else if (read) begin + rdata_r <= ram[raddr]; + end + end + end else begin + `STATIC_ASSERT(0, ("invalid write mode: %s", WRITE_MODE)) + end + else begin : g_no_out_reg + `UNUSED_VAR (read) + `RAM_NO_RWCHECK `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + end + assign rdata = ram[raddr]; + end +`else + // simulation + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + + wire [DATAW-1:0] ram_n; + for (genvar i = 0; i < WRENW; ++i) begin : g_ram_n + assign ram_n[i * WSELW +: WSELW] = wren[i] ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; + end + + always @(posedge clk) begin + if (RESET_RAM && reset) begin + for (integer i = 0; i < SIZE; ++i) begin + ram[i] <= DATAW'(INIT_VALUE); + end + end else begin + if (write) begin + ram[waddr] <= ram_n; + end + end + end + + if (OUT_REG && WRITE_MODE == "R") begin : g_read_first + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (RESET_OUT && reset) begin + rdata_r <= DATAW'(INIT_VALUE); + end else if (read || write) begin + rdata_r <= ram[raddr]; + end + end + assign rdata = rdata_r; + end else if (OUT_REG && WRITE_MODE == "W") begin : g_read_first + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (RESET_OUT && reset) begin + rdata_r <= DATAW'(INIT_VALUE); + end else if (read || write) begin + if (write && (raddr == waddr)) begin + rdata_r <= ram_n; + end else begin + rdata_r <= ram[raddr]; end end end assign rdata = rdata_r; - end else begin : g_no_out_reg - `ifdef SYNTHESIS - if (WRENW > 1) begin : g_writeen - `ifdef QUARTUS - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WREN_BLOCK_ALTERA(write) - assign rdata = ram[raddr]; - end else begin : g_no_lutram - if (NO_RWCHECK != 0) begin : g_no_rwcheck - `NO_RW_RAM_CHECK `RAM_WREN_BLOCK_ALTERA(write) - assign rdata = ram[raddr]; - end else begin : g_rwcheck - `RAM_WREN_BLOCK_ALTERA(write) - assign rdata = ram[raddr]; - end - end - `else - // default synthesis - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WREN_BLOCK_XILINX(write) - assign rdata = ram[raddr]; - end else begin : g_no_lutram - if (NO_RWCHECK != 0) begin : g_no_rwcheck - `NO_RW_RAM_CHECK `RAM_WREN_BLOCK_XILINX(write) - assign rdata = ram[raddr]; - end else begin : g_rwcheck - `RAM_WREN_BLOCK_XILINX(write) - assign rdata = ram[raddr]; - end - end - `endif - end else begin : g_no_writeen - // (WRENW == 1) - if (LUTRAM != 0) begin : g_lutram - `USE_FAST_BRAM `RAM_WRITE_BLOCK(write) - assign rdata = ram[raddr]; - end else begin : g_no_lutram - if (NO_RWCHECK != 0) begin : g_no_rwcheck - `NO_RW_RAM_CHECK `RAM_WRITE_BLOCK(write) - assign rdata = ram[raddr]; - end else begin : g_rwcheck - `RAM_WRITE_BLOCK(write) - assign rdata = ram[raddr]; - end - end - end - `else - // simulation - reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - - wire [DATAW-1:0] ram_n; - for (genvar i = 0; i < WRENW; ++i) begin : g_ram_n - assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; - end - + end else if (OUT_REG && WRITE_MODE == "N") begin : g_read_first + reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (RESET_RAM && reset) begin - for (integer i = 0; i < SIZE; ++i) begin - ram[i] <= DATAW'(INIT_VALUE); - end - end else begin - if (write) begin - ram[waddr] <= ram_n; - end + if (RESET_OUT && reset) begin + rdata_r <= DATAW'(INIT_VALUE); + end else if (read && ~write) begin + rdata_r <= ram[raddr]; end end - - if (!LUTRAM && NO_RWCHECK) begin : g_rdata_no_bypass + assign rdata = rdata_r; + end else begin : g_async_or_undef + wire [DATAW-1:0] rdata_w; + if (USE_BRAM && NO_RWCHECK) begin : g_rdata_no_bypass reg [DATAW-1:0] prev_data; reg [ADDRW-1:0] prev_waddr; reg prev_write; @@ -253,15 +224,29 @@ module VX_dp_ram #( end end - assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; - if (RW_ASSERT) begin : g_rw_assert - `RUNTIME_ASSERT(~read || (rdata == ram[raddr]), ("%t: read after write hazard", $time)) + assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; + if (RW_ASSERT) begin : g_rw_asert + `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)) end end else begin : g_rdata_with_bypass - assign rdata = ram[raddr]; + assign rdata_w = ram[raddr]; + end + if (OUT_REG) begin : g_out_reg + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (RESET_OUT && reset) begin + rdata_r <= DATAW'(INIT_VALUE); + end else if (read) begin + rdata_r <= rdata_w; + end + end + assign rdata = rdata_r; + end else begin : g_no_out_reg + `UNUSED_VAR (read) + assign rdata = rdata_w; end - `endif end +`endif endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index ca1185780..1410a0dd0 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -16,11 +16,11 @@ `TRACING_OFF module VX_fifo_queue #( parameter DATAW = 1, - parameter DEPTH = 2, + parameter DEPTH = 1, parameter ALM_FULL = (DEPTH - 1), parameter ALM_EMPTY = 1, parameter OUT_REG = 0, - parameter LUTRAM = 0, + parameter LUTRAM = ((DATAW * DEPTH) < `MAX_LUTRAM), parameter SIZEW = `CLOG2(DEPTH+1) ) ( input wire clk, @@ -105,7 +105,8 @@ module VX_fifo_queue #( .DATAW (DATAW), .SIZE (DEPTH), .LUTRAM (LUTRAM), - .OUT_REG(!LUTRAM) + .OUT_REG(!LUTRAM), + .WRITE_MODE("W") ) dp_ram ( .clk (clk), .reset (reset), @@ -119,47 +120,17 @@ module VX_fifo_queue #( ); if (OUT_REG != 0) begin : g_out_reg - reg [DATAW-1:0] data_out_r, data_out_n; - - if (LUTRAM) begin : g_lutram - assign data_out_n = data_out_w; - end else begin : g_no_lutram - reg [DATAW-1:0] data_out_p; - reg rdw_hazard_r; - wire rdw_hazard = push && (wr_ptr_r == rd_ptr_w); - always @(posedge clk) begin - if (rdw_hazard) begin - data_out_p <= data_in; - end - rdw_hazard_r <= rdw_hazard; - end - assign data_out_n = rdw_hazard_r ? data_out_p : data_out_w; - end - + reg [DATAW-1:0] data_out_r; always @(posedge clk) begin if (bypass) begin data_out_r <= data_in; end else if (pop) begin - data_out_r <= data_out_n; + data_out_r <= data_out_w; end end - assign data_out = data_out_r; - end else begin : g_no_out_reg - if (LUTRAM) begin : g_lutram - assign data_out = data_out_w; - end else begin : g_no_lutram - reg [DATAW-1:0] data_in_r; - reg bypass_r; - always @(posedge clk) begin - if (bypass) begin - data_in_r <= data_in; - end - bypass_r <= bypass; - end - assign data_out = bypass_r ? data_in_r : data_out_w; - end + assign data_out = data_out_w; end end diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index 6a9b70ff1..78e85e16f 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -112,9 +112,7 @@ module VX_scope_tap #( VX_dp_ram #( .DATAW (IDLE_CTRW), .SIZE (DEPTH), - .OUT_REG (1), - .READ_ENABLE (0), - .NO_RWCHECK (1) + .OUT_REG (1) ) delta_store ( .clk (clk), .reset (reset), @@ -135,9 +133,7 @@ module VX_scope_tap #( VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .OUT_REG (1), - .READ_ENABLE (0), - .NO_RWCHECK (1) + .OUT_REG (1) ) data_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index efce4b5f2..faaf0dd2f 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -24,7 +24,7 @@ module VX_sp_ram #( parameter RW_ASSERT = 0, parameter RESET_RAM = 0, parameter RESET_OUT = 0, - parameter READ_ENABLE = 0, + parameter `STRING WRITE_MODE = "R", // R: read-first, W: write-first, N: no-change, U: undefined parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, @@ -49,7 +49,7 @@ module VX_sp_ram #( .RW_ASSERT (RW_ASSERT), .RESET_RAM (RESET_RAM), .RESET_OUT (RESET_OUT), - .READ_ENABLE(READ_ENABLE), + .WRITE_MODE (WRITE_MODE), .INIT_ENABLE(INIT_ENABLE), .INIT_FILE (INIT_FILE), .INIT_VALUE (INIT_VALUE), diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 03c4acdd1..557f4a9f7 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -166,9 +166,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .DATAW (WORD_WIDTH), .SIZE (WORDS_PER_BANK), .WRENW (WORD_SIZE), - .OUT_REG (1), - .READ_ENABLE (0), - .NO_RWCHECK (1) + .OUT_REG (1) ) lmem_store ( .clk (clk), .reset (reset), From b6bd6467efe685c27b031e5478a21d5ec3050aed Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 19 Oct 2024 20:04:51 -0700 Subject: [PATCH 384/488] cache hit timing optimization --- hw/rtl/cache/VX_bank_flush.sv | 15 +-- hw/rtl/cache/VX_cache_bank.sv | 179 ++++++++++---------------- hw/rtl/cache/VX_cache_data.sv | 123 +++++++++--------- hw/rtl/cache/VX_cache_define.vh | 1 + hw/rtl/cache/VX_cache_repl.sv | 195 +++++++++++++---------------- hw/rtl/cache/VX_cache_tags.sv | 55 +++----- hw/rtl/libs/VX_dp_ram.sv | 21 +++- hw/rtl/libs/VX_fifo_queue.sv | 16 +-- hw/unittest/generic_queue/Makefile | 2 + 9 files changed, 257 insertions(+), 350 deletions(-) diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index ca28d749b..68eefd363 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -33,7 +33,7 @@ module VX_bank_flush #( output wire flush_init, output wire flush_valid, output wire [`CS_LINE_SEL_BITS-1:0] flush_line, - output wire [NUM_WAYS-1:0] flush_way, + output wire [`CS_WAY_SEL_WIDTH-1:0] flush_way, input wire flush_ready, input wire mshr_empty, input wire bank_empty @@ -113,17 +113,10 @@ module VX_bank_flush #( assign flush_valid = (state == STATE_FLUSH); assign flush_line = counter[`CS_LINE_SEL_BITS-1:0]; - if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way - VX_decoder #( - .N (`CS_WAY_SEL_BITS), - .D (NUM_WAYS) - ) ctr_decoder ( - .sel_in (counter[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), - .data_in (1'b1), - .data_out (flush_way) - ); + if (WRITEBACK && (NUM_WAYS > 1)) begin : g_flush_way + assign flush_way = counter[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]; end else begin : g_flush_way_all - assign flush_way = {NUM_WAYS{1'b1}}; + assign flush_way = '0; end endmodule diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 20c0c0612..574659d7e 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -150,19 +150,19 @@ module VX_cache_bank #( wire is_creq_st0, is_creq_st1; wire is_fill_st0, is_fill_st1; wire is_flush_st0, is_flush_st1; - wire [NUM_WAYS-1:0] flush_way_st0; - wire [NUM_WAYS-1:0] evict_way_st0, evict_way_st1; + wire [`CS_WAY_SEL_WIDTH-1:0] flush_way_st0, evict_way_st0; + wire [`CS_WAY_SEL_WIDTH-1:0] way_idx_st1; wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1; - wire [`CS_LINE_SEL_BITS-1:0] line_idx_st0, line_idx_st1; - wire [`CS_TAG_SEL_BITS-1:0] line_tag_st1; + wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel, line_idx_st0, line_idx_st1; + wire [`CS_TAG_SEL_BITS-1:0] line_tag_st0, line_tag_st1; + wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0, evict_tag_st1; wire rw_sel, rw_st0, rw_st1; wire [WORD_SEL_WIDTH-1:0] word_idx_sel, word_idx_st0, word_idx_st1; wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1; wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1; wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1; wire [`CS_WORD_WIDTH-1:0] write_word_st0, write_word_st1; - wire [`CS_WORD_WIDTH-1:0] read_data_st1; wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1; wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1; wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0; @@ -170,18 +170,18 @@ module VX_cache_bank #( wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1; wire mshr_pending_st0, mshr_pending_st1; wire [MSHR_ADDR_WIDTH-1:0] mshr_previd_st0, mshr_previd_st1; + wire is_hit_st0, is_hit_st1; wire mshr_empty; wire flush_valid; wire init_valid; wire [`CS_LINE_SEL_BITS-1:0] flush_sel; - wire [NUM_WAYS-1:0] flush_way; + wire [`CS_WAY_SEL_WIDTH-1:0] flush_way; wire flush_ready; // ensure we have no pending memory request in the bank wire no_pending_req = ~valid_st0 && ~valid_st1 && mreq_queue_empty; - // flush unit VX_bank_flush #( .BANK_ID (BANK_ID), .CACHE_SIZE (CACHE_SIZE), @@ -203,9 +203,7 @@ module VX_cache_bank #( .bank_empty (no_pending_req) ); - logic rdw_hazard, post_hazard; - - wire pipe_stall = crsp_queue_stall || rdw_hazard; + wire pipe_stall = crsp_queue_stall; // inputs arbitration: // mshr replay has highest priority to maximize utilization since there is no miss. @@ -295,8 +293,6 @@ module VX_cache_bank #( assign req_uuid_sel = '0; end - wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0]; - wire is_init_sel = init_valid; wire is_creq_sel = creq_enable || replay_enable; wire is_fill_sel = fill_enable; @@ -304,7 +300,7 @@ module VX_cache_bank #( wire is_replay_sel = replay_enable; VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_WAY_SEL_WIDTH + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH), .RESETW (1) ) pipe_reg0 ( .clk (clk), @@ -334,22 +330,18 @@ module VX_cache_bank #( wire do_read_st1 = valid_st1 && is_read_st1; wire do_write_st1 = valid_st1 && is_write_st1; - wire do_fill_st1 = valid_st1 && is_fill_st1; - wire do_flush_st1 = valid_st1 && is_flush_st1 && WRITEBACK; + + assign line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0]; + assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0]; + assign line_tag_st0 = `CS_LINE_ADDR_TAG(addr_st0); assign write_word_st0 = data_st0[`CS_WORD_WIDTH-1:0]; - assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0]; - - wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st1; - wire [NUM_WAYS-1:0] tag_matches_st1; - - wire is_hit_st1 = (| tag_matches_st1); wire do_lookup_st0 = do_read_st0 || do_write_st0; - wire do_lookup_st1 = do_read_st1 || do_write_st1; - reg [NUM_WAYS-1:0] victim_way_st0; + wire [`CS_WAY_SEL_WIDTH-1:0] victim_way_st0; + wire [NUM_WAYS-1:0] tag_matches_st0; VX_cache_repl #( .CACHE_SIZE (CACHE_SIZE), @@ -363,10 +355,10 @@ module VX_cache_bank #( .stall (pipe_stall), .hit_valid (do_lookup_st1 && is_hit_st1 && ~pipe_stall), .hit_line (line_idx_st1), - .hit_way (tag_matches_st1), + .hit_way (way_idx_st1), .repl_valid (do_fill_st0 && ~pipe_stall), - .repl_line (line_idx_st0), .repl_line_n(line_idx_sel), + .repl_line (line_idx_st0), .repl_way (victim_way_st0) ); @@ -388,27 +380,29 @@ module VX_cache_bank #( .flush (do_flush_st0 && ~pipe_stall), .fill (do_fill_st0 && ~pipe_stall), .lookup (do_lookup_st0 && ~pipe_stall), - .line_addr (addr_st0), + .line_idx_n (line_idx_sel), + .line_idx (line_idx_st0), + .line_tag (line_tag_st0), .evict_way (evict_way_st0), // outputs - .tag_matches_r(tag_matches_st1), - .line_tag_r (line_tag_st1), - .evict_tag_r(evict_tag_st1), - .evict_way_r(evict_way_st1) + .tag_matches(tag_matches_st0), + .evict_tag (evict_tag_st0) ); + assign is_hit_st0 = (| tag_matches_st0); + wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; assign mshr_id_st0 = is_replay_st0 ? replay_id_st0 : mshr_alloc_id_st0; VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, rw_st0, flags_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}), - .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, rw_st1, flags_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1}) + .data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, is_hit_st0, rw_st0, flags_st0, evict_tag_st0, line_tag_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}), + .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, is_hit_st1, rw_st1, flags_st1, evict_tag_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1}) ); if (UUID_WIDTH != 0) begin : g_req_uuid_st1 @@ -422,58 +416,12 @@ module VX_cache_bank #( // ensure mshr replay always get a hit `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1 && ~is_hit_st1), ("%t: missed mshr replay", $time)) - if (WRITE_ENABLE) begin : g_rdw_hazard - // This implementation uses single-port BRAMs for the tags and data stores. - // Using different stages for read and write operations requires a pipeline stall in between due to address port sharing. - // Tags fill/flush can perform read and write in the same stage, since no dependency between. - // Data fill/flush can perform read and write in the same stage, since way_idx is available in st0. - // A data read should happen in st0 for its result to be available in st1. - // A data write should happen in st1 when the tag hit status is available. - // The r/w hazard is needed for consecutive writes since they both wonly write in st1. - // The r/w hazard is also not needed for next writethrough fill/flush to the same line. - // For reads or writeback fill/flush to the same line, we sill need the hazard - // because the data writeen in st1 cannot be read at the same time in st0 without extra forwarding logic. - wire is_write_sel = is_creq_sel && rw_sel; - wire is_same_line = (line_idx_sel == line_idx_st0); - always @(posedge clk) begin - if (reset) begin - post_hazard <= 0; - rdw_hazard <= 0; - end else begin - if (~crsp_queue_stall) begin - post_hazard <= rdw_hazard; - rdw_hazard <= do_write_st0 && valid_sel && ~(is_write_sel || (is_same_line && !WRITEBACK && (is_fill_sel || is_flush_sel))); - end - end - end - end else begin : g_rdw_hazard_ro - assign rdw_hazard = 0; - assign post_hazard = 0; - end - assign write_word_st1 = data_st1[`CS_WORD_WIDTH-1:0]; `UNUSED_VAR (data_st1) - wire [`CS_LINE_WIDTH-1:0] evict_data_st1; + wire[`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] read_data_st1; wire [LINE_SIZE-1:0] evict_byteen_st1; - wire line_dirty_st1; - - wire data_write; - wire [`CS_LINE_SEL_BITS-1:0] data_line_idx; - - if (WRITE_ENABLE) begin : g_data_ctrl - // by default all data accesses happen in sto and use line_idx_st0. - // data writes should happen in st1 when the tag hit is available, - // and use line_idx_st1 to ensure the correct line is updated. - // if a rdw hazard is active due to conflict, ensure we don't write twice. - assign data_write = do_write_st1 && ~post_hazard && ~crsp_queue_stall; - assign data_line_idx = data_write ? line_idx_st1 : line_idx_st0; - end else begin : g_data_ctrl_ro - `UNUSED_VAR (post_hazard) - `UNUSED_VAR (do_write_st1) - assign data_write = 0; - assign data_line_idx = line_idx_st0; - end + wire evict_dirty_st1; VX_cache_data #( .CACHE_SIZE (CACHE_SIZE), @@ -493,18 +441,18 @@ module VX_cache_bank #( .fill (do_fill_st0 && ~pipe_stall), .flush (do_flush_st0 && ~pipe_stall), .read (do_read_st0 && ~pipe_stall), - .write (data_write), + .write (do_write_st0 && ~pipe_stall), .evict_way (evict_way_st0), - .tag_matches(tag_matches_st1), - .line_idx (data_line_idx), + .tag_matches(tag_matches_st0), + .line_idx (line_idx_st0), .fill_data (data_st0), - .write_word (write_word_st1), - .word_idx (word_idx_st1), - .write_byteen(byteen_st1), + .write_word (write_word_st0), + .word_idx (word_idx_st0), + .write_byteen(byteen_st0), // outputs + .way_idx (way_idx_st1), .read_data (read_data_st1), - .line_dirty (line_dirty_st1), - .evict_data (evict_data_st1), + .evict_dirty(evict_dirty_st1), .evict_byteen(evict_byteen_st1) ); @@ -600,7 +548,7 @@ module VX_cache_bank #( assign crsp_queue_valid = do_read_st1 && is_hit_st1; assign crsp_queue_idx = req_idx_st1; - assign crsp_queue_data = read_data_st1; + assign crsp_queue_data = read_data_st1[word_idx_st1]; assign crsp_queue_tag = tag_st1; VX_elastic_buffer #( @@ -610,7 +558,7 @@ module VX_cache_bank #( ) core_rsp_queue ( .clk (clk), .reset (reset), - .valid_in (crsp_queue_valid && ~rdw_hazard), + .valid_in (crsp_queue_valid), .ready_in (crsp_queue_ready), .data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}), .data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}), @@ -618,9 +566,7 @@ module VX_cache_bank #( .ready_out (core_rsp_ready) ); - // we use 'do_read_st1' instead 'crsp_queue_valid' - // to remove costly 'is_hit_st1' signal from critical paths. - assign crsp_queue_stall = do_read_st1 && ~crsp_queue_ready; + assign crsp_queue_stall = crsp_queue_valid && ~crsp_queue_ready; // schedule memory request @@ -634,7 +580,7 @@ module VX_cache_bank #( wire is_fill_or_flush_st1 = is_fill_st1 || (is_flush_st1 && WRITEBACK); wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1; - wire do_writeback_st1 = do_fill_or_flush_st1 && line_dirty_st1; + wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1; wire [`CS_LINE_ADDR_WIDTH-1:0] evict_addr_st1 = {evict_tag_st1, line_idx_st1}; if (WRITE_ENABLE) begin : g_mreq_queue @@ -642,7 +588,7 @@ module VX_cache_bank #( if (DIRTY_BYTES) begin : g_dirty_bytes // ensure dirty bytes match the tag info wire has_dirty_bytes = (| evict_byteen_st1); - `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (line_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, line_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) + `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) end // issue a fill request on a read/write miss // issue a writeback on a dirty line eviction @@ -651,8 +597,10 @@ module VX_cache_bank #( && ~pipe_stall; assign mreq_queue_addr = is_fill_or_flush_st1 ? evict_addr_st1 : addr_st1; assign mreq_queue_rw = is_fill_or_flush_st1; - assign mreq_queue_data = evict_data_st1; + assign mreq_queue_data = read_data_st1; assign mreq_queue_byteen = is_fill_or_flush_st1 ? evict_byteen_st1 : '1; + `UNUSED_VAR (write_word_st1) + `UNUSED_VAR (byteen_st1) end else begin : g_wt wire [LINE_SIZE-1:0] line_byteen; VX_decoder #( @@ -675,7 +623,6 @@ module VX_cache_bank #( `UNUSED_VAR (is_fill_or_flush_st1) `UNUSED_VAR (do_writeback_st1) `UNUSED_VAR (evict_addr_st1) - `UNUSED_VAR (evict_data_st1) `UNUSED_VAR (evict_byteen_st1) end end else begin : g_mreq_queue_ro @@ -688,8 +635,9 @@ module VX_cache_bank #( assign mreq_queue_byteen = '1; `UNUSED_VAR (do_writeback_st1) `UNUSED_VAR (evict_addr_st1) - `UNUSED_VAR (evict_data_st1) `UNUSED_VAR (evict_byteen_st1) + `UNUSED_VAR (write_word_st1) + `UNUSED_VAR (byteen_st1) end if (UUID_WIDTH != 0) begin : g_mreq_queue_tag_uuid @@ -722,10 +670,6 @@ module VX_cache_bank #( assign mem_req_valid = ~mreq_queue_empty; - `UNUSED_VAR (do_fill_st1) - `UNUSED_VAR (do_flush_st1) - `UNUSED_VAR (evict_way_st1) - /////////////////////////////////////////////////////////////////////////////// `ifdef PERF_ENABLE @@ -740,8 +684,8 @@ module VX_cache_bank #( && ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire); always @(posedge clk) begin if (input_stall || pipe_stall) begin - `TRACE(4, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw=%b\n", $time, INSTANCE_ID, - crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard)) + `TRACE(4, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID, + crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full)) end if (mem_rsp_fire) begin `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, @@ -764,32 +708,37 @@ module VX_cache_bank #( `TRACE(3, ("%t: %s tags-init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), line_idx_st0)) end if (do_fill_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) end if (do_flush_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) end if (do_lookup_st1 && ~pipe_stall) begin - `TRACE(3, ("%t: %s tags-Lookup: addr=0x%0h, rw=%b, way=%b, line=%0d, tag=0x%0h, hit=%b (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, tag_matches_st1, line_idx_st1, line_tag_st1, is_hit_st1, req_uuid_st1)) + if (is_hit_st1) begin + `TRACE(3, ("%t: %s tags-hit: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, way_idx_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) + end else begin + `TRACE(3, ("%t: %s tags-miss: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, way_idx_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) + end end if (do_fill_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%b, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%0d, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, data_st0, req_uuid_st0)) end if (do_flush_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, + `TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) end if (do_read_st1 && is_hit_st1 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, read_data_st1, req_uuid_st1)) + `TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), way_idx_st1, line_idx_st1, word_idx_st1, crsp_queue_data, req_uuid_st1)) end if (do_write_st1 && is_hit_st1 && ~pipe_stall) begin - `TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, byteen_st1, write_word_st1, req_uuid_st1)) + `TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), way_idx_st1, line_idx_st1, word_idx_st1, byteen_st1, write_word_st1, req_uuid_st1)) end if (crsp_queue_fire) begin `TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 22326e63b..65cf9e026 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -41,38 +41,23 @@ module VX_cache_data #( input wire read, input wire write, input wire [`CS_LINE_SEL_BITS-1:0] line_idx, - input wire [NUM_WAYS-1:0] evict_way, + input wire [`CS_WAY_SEL_WIDTH-1:0] evict_way, input wire [NUM_WAYS-1:0] tag_matches, input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data, input wire [`CS_WORD_WIDTH-1:0] write_word, input wire [WORD_SIZE-1:0] write_byteen, input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx, // outputs - output wire [`CS_WORD_WIDTH-1:0] read_data, - output wire line_dirty, - output wire [`CS_LINE_WIDTH-1:0] evict_data, + output wire [`CS_WAY_SEL_WIDTH-1:0] way_idx, + output wire [`CS_LINE_WIDTH-1:0] read_data, + output wire evict_dirty, output wire [LINE_SIZE-1:0] evict_byteen ); `UNUSED_PARAM (WORD_SIZE) `UNUSED_VAR (stall) - localparam BYTEENW = (WRITE_ENABLE != 0) ? LINE_SIZE : 1; - - wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata; - if (WRITEBACK != 0) begin : g_writeback localparam BYTEEN_DATAW = 1 + ((DIRTY_BYTES != 0) ? LINE_SIZE : 0); - wire [`LOG2UP(NUM_WAYS)-1:0] evict_way_idx, evict_way_idx_r; - - VX_onehot_encoder #( - .N (NUM_WAYS) - ) fill_way_enc ( - .data_in (evict_way), - .data_out (evict_way_idx), - `UNUSED_PIN (valid_out) - ); - - `BUFFER_EX(evict_way_idx_r, evict_way_idx, ~stall, 1); wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_rdata; wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wdata; @@ -80,7 +65,7 @@ module VX_cache_data #( for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata wire evict = fill || flush; - wire evict_way_en = (NUM_WAYS == 1) || evict_way[i]; + wire evict_way_en = (NUM_WAYS == 1) || (evict_way == i); wire dirty_data = write; // only asserted on writes wire dirty_wren = init || (evict && evict_way_en) || (write && tag_matches[i]); if (DIRTY_BYTES != 0) begin : g_dirty_bytes @@ -121,54 +106,47 @@ module VX_cache_data #( ); if (DIRTY_BYTES != 0) begin : g_line_dirty_and_byteen - assign {line_dirty, evict_byteen} = byteen_rdata[evict_way_idx_r]; + assign {evict_dirty, evict_byteen} = byteen_rdata[way_idx]; end else begin : g_line_dirty - assign line_dirty = byteen_rdata[evict_way_idx_r]; + assign evict_dirty = byteen_rdata[way_idx]; assign evict_byteen = '1; end - assign evict_data = line_rdata[evict_way_idx_r]; - end else begin : g_no_writeback `UNUSED_VAR (init) `UNUSED_VAR (flush) - assign line_dirty = 0; - assign evict_data = '0; + assign evict_dirty = 0; assign evict_byteen = '0; end - for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store - wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata; - wire [BYTEENW-1:0] line_wren; + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata; - wire fill_way_en = (NUM_WAYS == 1) || evict_way[i]; + if (WRITE_ENABLE) begin : g_data_store + // create a single write-enable block ram to reduce area overhead + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata; + wire [NUM_WAYS-1:0][LINE_SIZE-1:0] line_wren; + wire line_write; + wire line_read; - if (WRITE_ENABLE != 0) begin : g_wdata + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_wdata + wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i); wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask; for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j); assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}}; end - assign line_wdata = (fill && fill_way_en) ? fill_data : {`CS_WORDS_PER_LINE{write_word}}; - assign line_wren = {LINE_SIZE{fill && fill_way_en}} - | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); - - end else begin : g_ro_wdata - `UNUSED_VAR (write) - `UNUSED_VAR (write_byteen) - `UNUSED_VAR (write_word) - `UNUSED_VAR (word_idx) - assign line_wdata = fill_data; - assign line_wren = fill_way_en; + assign line_wdata[i] = fill ? fill_data : {`CS_WORDS_PER_LINE{write_word}}; + assign line_wren[i] = {LINE_SIZE{fill && fill_way_en}} + | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); end - wire line_write = fill || (write && WRITE_ENABLE); - wire line_read = read || ((fill || flush) && WRITEBACK); + assign line_write = fill || (write && WRITE_ENABLE); + assign line_read = read || ((fill || flush) && WRITEBACK); VX_sp_ram #( - .DATAW (`CS_LINE_WIDTH), + .DATAW (NUM_WAYS * `CS_LINE_WIDTH), .SIZE (`CS_LINES_PER_BANK), - .WRENW (BYTEENW), + .WRENW (NUM_WAYS * LINE_SIZE), .OUT_REG (1) ) data_store ( .clk (clk), @@ -178,35 +156,46 @@ module VX_cache_data #( .wren (line_wren), .addr (line_idx), .wdata (line_wdata), - .rdata (line_rdata[i]) + .rdata (line_rdata) ); + end else begin : g_data_store + `UNUSED_VAR (write) + `UNUSED_VAR (write_byteen) + `UNUSED_VAR (write_word) + `UNUSED_VAR (word_idx) + + // we don't merge the ways into a single block ram due to WREN overhead + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways + wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i); + VX_sp_ram #( + .DATAW (`CS_LINE_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .OUT_REG (1) + ) data_store ( + .clk (clk), + .reset (reset), + .read (read), + .write (fill && fill_way_en), + .wren (1'b1), + .addr (line_idx), + .wdata (fill_data), + .rdata (line_rdata[i]) + ); + end end - wire [`LOG2UP(NUM_WAYS)-1:0] hit_way_idx; + wire [`CS_WAY_SEL_WIDTH-1:0] hit_idx; + VX_onehot_encoder #( .N (NUM_WAYS) - ) hit_idx_enc ( + ) way_idx_enc ( .data_in (tag_matches), - .data_out (hit_way_idx), + .data_out (hit_idx), `UNUSED_PIN (valid_out) ); - if (`CS_WORDS_PER_LINE > 1) begin : g_read_data - // order the data layout to perform ways multiplexing last. - // this allows converting way index to binary in parallel with BRAM read and word indexing. - wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata; - VX_transpose #( - .DATAW (`CS_WORD_WIDTH), - .N (NUM_WAYS), - .M (`CS_WORDS_PER_LINE) - ) transpose ( - .data_in (line_rdata), - .data_out (transposed_rdata) - ); - assign read_data = transposed_rdata[word_idx][hit_way_idx]; - end else begin : g_read_data_1w - `UNUSED_VAR (word_idx) - assign read_data = line_rdata[hit_way_idx]; - end + `BUFFER_EX(way_idx, (read ? hit_idx : evict_way), ~stall, 1); + + assign read_data = line_rdata[way_idx]; endmodule diff --git a/hw/rtl/cache/VX_cache_define.vh b/hw/rtl/cache/VX_cache_define.vh index b75845eca..65b239900 100644 --- a/hw/rtl/cache/VX_cache_define.vh +++ b/hw/rtl/cache/VX_cache_define.vh @@ -22,6 +22,7 @@ `define CS_LINE_WIDTH (8 * LINE_SIZE) `define CS_BANK_SIZE (CACHE_SIZE / NUM_BANKS) `define CS_WAY_SEL_BITS `CLOG2(NUM_WAYS) +`define CS_WAY_SEL_WIDTH `UP(`CS_WAY_SEL_BITS) `define CS_LINES_PER_BANK (`CS_BANK_SIZE / (LINE_SIZE * NUM_WAYS)) `define CS_WORDS_PER_LINE (LINE_SIZE / WORD_SIZE) diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv index dbd51afdd..24425328d 100644 --- a/hw/rtl/cache/VX_cache_repl.sv +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -97,135 +97,114 @@ module VX_cache_repl #( input wire stall, input wire hit_valid, input wire [`CS_LINE_SEL_BITS-1:0] hit_line, - input wire [NUM_WAYS-1:0] hit_way, + input wire [`CS_WAY_SEL_WIDTH-1:0] hit_way, input wire repl_valid, input wire [`CS_LINE_SEL_BITS-1:0] repl_line_n, input wire [`CS_LINE_SEL_BITS-1:0] repl_line, - output wire [NUM_WAYS-1:0] repl_way + output wire [`CS_WAY_SEL_WIDTH-1:0] repl_way ); + localparam WAY_SEL_WIDTH = `CS_WAY_SEL_WIDTH; `UNUSED_VAR (stall) - localparam WAY_IDX_BITS = $clog2(NUM_WAYS); - localparam WAY_IDX_WIDTH = `UP(WAY_IDX_BITS); + if (NUM_WAYS > 1) begin : g_enable + if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru + // Pseudo Least Recently Used replacement policy + localparam LRU_WIDTH = `UP(NUM_WAYS-1); + localparam USE_BRAM = (LRU_WIDTH * `CS_LINES_PER_BANK) >= `MAX_LUTRAM; - if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru - // Pseudo Least Recently Used replacement policy - localparam LRU_WIDTH = `UP(NUM_WAYS-1); - localparam FORCE_BRAM = (LRU_WIDTH * `CS_LINES_PER_BANK) >= 1024; + wire [LRU_WIDTH-1:0] plru_rdata; + wire [LRU_WIDTH-1:0] plru_wdata; + wire [LRU_WIDTH-1:0] plru_wmask; - wire [WAY_IDX_WIDTH-1:0] repl_way_idx; - wire [WAY_IDX_WIDTH-1:0] hit_way_idx; - wire [LRU_WIDTH-1:0] plru_rdata; - wire [LRU_WIDTH-1:0] plru_wdata; - wire [LRU_WIDTH-1:0] plru_wmask; + VX_dp_ram #( + .DATAW (LRU_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .WRENW (LRU_WIDTH), + .OUT_REG (USE_BRAM) + ) plru_store ( + .clk (clk), + .reset (reset), + .read (USE_BRAM ? ~stall : repl_valid), + .write (hit_valid), + .wren (plru_wmask), + .waddr (hit_line), + .raddr (USE_BRAM ? repl_line_n : repl_line), + .wdata (plru_wdata), + .rdata (plru_rdata) + ); - VX_dp_ram #( - .DATAW (LRU_WIDTH), - .SIZE (`CS_LINES_PER_BANK), - .WRENW (LRU_WIDTH), - .OUT_REG (FORCE_BRAM) - ) plru_store ( - .clk (clk), - .reset (reset), - .read (FORCE_BRAM ? ~stall : repl_valid), - .write (hit_valid), - .wren (plru_wmask), - .waddr (hit_line), - .raddr (FORCE_BRAM ? repl_line_n : repl_line), - .wdata (plru_wdata), - .rdata (plru_rdata) - ); + plru_decoder #( + .NUM_WAYS (NUM_WAYS) + ) plru_dec ( + .way_idx (hit_way), + .lru_data (plru_wdata), + .lru_mask (plru_wmask) + ); - VX_onehot_encoder #( - .N (NUM_WAYS) - ) hit_way_enc ( - .data_in (hit_way), - .data_out (hit_way_idx), - `UNUSED_PIN (valid_out) - ); + plru_encoder #( + .NUM_WAYS (NUM_WAYS) + ) plru_enc ( + .lru_in (plru_rdata), + .way_idx (repl_way) + ); - plru_decoder #( - .NUM_WAYS (NUM_WAYS) - ) plru_dec ( - .way_idx (hit_way_idx), - .lru_data (plru_wdata), - .lru_mask (plru_wmask) - ); + end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic + // Cyclic replacement policy + localparam USE_BRAM = (WAY_SEL_WIDTH * `CS_LINES_PER_BANK) >= `MAX_LUTRAM; - plru_encoder #( - .NUM_WAYS (NUM_WAYS) - ) plru_enc ( - .lru_in (plru_rdata), - .way_idx (repl_way_idx) - ); + `UNUSED_VAR (hit_valid) + `UNUSED_VAR (hit_line) + `UNUSED_VAR (hit_way) + `UNUSED_VAR (repl_valid) - VX_decoder #( - .N (WAY_IDX_BITS) - ) repl_way_dec ( - .sel_in (repl_way_idx), - .data_in (1'b1), - .data_out (repl_way) - ); + wire [WAY_SEL_WIDTH-1:0] ctr_rdata; + wire [WAY_SEL_WIDTH-1:0] ctr_wdata = ctr_rdata + 1; - end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic - // Cyclic replacement policy - localparam CTR_WIDTH = $clog2(NUM_WAYS); - localparam FORCE_BRAM = (CTR_WIDTH * `CS_LINES_PER_BANK) >= 1024; + VX_dp_ram #( + .DATAW (WAY_SEL_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .OUT_REG (USE_BRAM) + ) ctr_store ( + .clk (clk), + .reset (reset), + .read (USE_BRAM ? ~stall : repl_valid), + .write (repl_valid), + .wren (1'b1), + .raddr (USE_BRAM ? repl_line_n : repl_line), + .waddr (repl_line), + .wdata (ctr_wdata), + .rdata (ctr_rdata) + ); - `UNUSED_VAR (hit_valid) - `UNUSED_VAR (hit_line) - `UNUSED_VAR (hit_way) - `UNUSED_VAR (repl_valid) - - wire [`UP(CTR_WIDTH)-1:0] ctr_rdata; - wire [`UP(CTR_WIDTH)-1:0] ctr_wdata = ctr_rdata + 1; - - VX_dp_ram #( - .DATAW (`UP(CTR_WIDTH)), - .SIZE (`CS_LINES_PER_BANK), - .OUT_REG (FORCE_BRAM) - ) ctr_store ( - .clk (clk), - .reset (reset), - .read (FORCE_BRAM ? ~stall : repl_valid), - .write (repl_valid), - .wren (1'b1), - .raddr (FORCE_BRAM ? repl_line_n : repl_line), - .waddr (repl_line), - .wdata (ctr_wdata), - .rdata (ctr_rdata) - ); - - VX_decoder #( - .N (WAY_IDX_BITS) - ) ctr_decoder ( - .sel_in (ctr_rdata), - .data_in (1'b1), - .data_out (repl_way) - ); - end else begin : g_random - // Random replacement policy + assign repl_way = ctr_rdata; + end else begin : g_random + // Random replacement policy + `UNUSED_VAR (hit_valid) + `UNUSED_VAR (hit_line) + `UNUSED_VAR (hit_way) + `UNUSED_VAR (repl_valid) + `UNUSED_VAR (repl_line) + `UNUSED_VAR (repl_line_n) + reg [WAY_SEL_WIDTH-1:0] victim_idx; + always @(posedge clk) begin + if (reset) begin + victim_idx <= 0; + end else if (~stall) begin + victim_idx <= victim_idx + 1; + end + end + assign repl_way = victim_idx; + end + end else begin : g_disable + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) `UNUSED_VAR (hit_valid) `UNUSED_VAR (hit_line) `UNUSED_VAR (hit_way) `UNUSED_VAR (repl_valid) `UNUSED_VAR (repl_line) `UNUSED_VAR (repl_line_n) - if (NUM_WAYS > 1) begin : g_repl_way - reg [NUM_WAYS-1:0] victim_way; - always @(posedge clk) begin - if (reset) begin - victim_way <= 1; - end else if (~stall) begin - victim_way <= {victim_way[NUM_WAYS-2:0], victim_way[NUM_WAYS-1]}; - end - end - assign repl_way = victim_way; - end else begin : g_repl_way_1 - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - assign repl_way = 1'b1; - end + assign repl_way = 1'b0; end endmodule diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 8793420e1..71f7809dc 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -36,50 +36,35 @@ module VX_cache_tags #( input wire flush, input wire fill, input wire lookup, - input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr, - input wire [NUM_WAYS-1:0] evict_way, + input wire [`CS_LINE_SEL_BITS-1:0] line_idx_n, + input wire [`CS_LINE_SEL_BITS-1:0] line_idx, + input wire [`CS_TAG_SEL_BITS-1:0] line_tag, + input wire [`CS_WAY_SEL_WIDTH-1:0] evict_way, // outputs - output wire [NUM_WAYS-1:0] tag_matches_r, - output wire [`CS_TAG_SEL_BITS-1:0] line_tag_r, - output wire [NUM_WAYS-1:0] evict_way_r, - output wire [`CS_TAG_SEL_BITS-1:0] evict_tag_r + output wire [NUM_WAYS-1:0] tag_matches, + output wire [`CS_TAG_SEL_BITS-1:0] evict_tag ); // valid, tag localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS; - wire [`CS_LINE_SEL_BITS-1:0] line_idx = line_addr[`CS_LINE_SEL_BITS-1:0]; - wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr); - wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag; wire [NUM_WAYS-1:0] read_valid; - - if (NUM_WAYS > 1) begin : g_evict_way - `BUFFER_EX(evict_way_r, evict_way, ~stall, 1); - end else begin : g_evict_way_0 - `UNUSED_VAR (evict_way) - assign evict_way_r = 1'b1; - end + `UNUSED_VAR (lookup) if (WRITEBACK) begin : g_evict_tag_wb - VX_onehot_mux #( - .DATAW (`CS_TAG_SEL_BITS), - .N (NUM_WAYS) - ) evict_tag_sel ( - .data_in (read_tag), - .sel_in (evict_way_r), - .data_out (evict_tag_r) - ); + assign evict_tag = read_tag[evict_way]; end else begin : g_evict_tag_wt - assign evict_tag_r = '0; + assign evict_tag = '0; end for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store - wire do_fill = fill && evict_way[i]; - wire do_flush = flush && (!WRITEBACK || evict_way[i]); // flush the whole line in writethrough mode + wire way_en = (NUM_WAYS == 1) || (evict_way == i); + wire do_fill = fill && way_en; + wire do_flush = flush && (!WRITEBACK || way_en); // flush the whole line in writethrough mode - wire line_read = lookup || (WRITEBACK && (fill || flush)); + //wire line_read = lookup || (WRITEBACK && (fill || flush)); wire line_write = init || do_fill || do_flush; wire line_valid = fill; @@ -89,26 +74,26 @@ module VX_cache_tags #( assign line_wdata = {line_valid, line_tag}; assign {read_valid[i], read_tag[i]} = line_rdata; - VX_sp_ram #( + VX_dp_ram #( .DATAW (TAG_WIDTH), .SIZE (`CS_LINES_PER_BANK), - .OUT_REG (1) + .OUT_REG (1), + .WRITE_MODE ("W") ) tag_store ( .clk (clk), .reset (reset), - .read (line_read), + .read (~stall), .write (line_write), .wren (1'b1), - .addr (line_idx), + .waddr (line_idx), + .raddr (line_idx_n), .wdata (line_wdata), .rdata (line_rdata) ); end - `BUFFER_EX(line_tag_r, line_tag, ~stall, 1); - for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_matches - assign tag_matches_r[i] = read_valid[i] && (line_tag_r == read_tag[i]); + assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]); end endmodule diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index b770cfa68..7616aa5b9 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -61,7 +61,7 @@ module VX_dp_ram #( `ifdef SYNTHESIS `ifdef QUARTUS - localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "MLAB, no_rw_check" : ""); + localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "MLAB, no_rw_check" : "auto"); localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "-name add_pass_through_logic_to_inferred_rams off" : ""; `define RAM_ARRAY (* ramstyle = RAM_STYLE_VALUE *) reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ @@ -70,9 +70,9 @@ module VX_dp_ram #( end \ end `define RAM_NO_RWCHECK (* altera_attribute = RAM_NO_RWCHECK_VALUE *) -`else - localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "distributed" : ""); - localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "no" : ""; +`elif VIVADO + localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "distributed" : "auto"); + localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "no" : "auto"; `define RAM_ARRAY (* ram_style = RAM_STYLE_VALUE *) reg [DATAW-1:0] ram [0:SIZE-1]; `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ if (wren[i]) begin \ @@ -80,6 +80,14 @@ module VX_dp_ram #( end \ end `define RAM_NO_RWCHECK (* rw_addr_collision = RAM_NO_RWCHECK_VALUE *) +`else + `define RAM_ARRAY reg [DATAW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end + `define RAM_NO_RWCHECK `endif if (OUT_REG) begin : g_out_reg reg [DATAW-1:0] rdata_r; @@ -122,7 +130,7 @@ module VX_dp_ram #( rdata_r <= ram[raddr]; end end - end end else if (WRITE_MODE == "U") begin : g_undefined + end else if (WRITE_MODE == "U") begin : g_undefined `RAM_NO_RWCHECK `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -138,7 +146,8 @@ module VX_dp_ram #( end else begin `STATIC_ASSERT(0, ("invalid write mode: %s", WRITE_MODE)) end - else begin : g_no_out_reg + assign rdata = rdata_r; + end else begin : g_no_out_reg `UNUSED_VAR (read) `RAM_NO_RWCHECK `RAM_ARRAY `RAM_INITIALIZATION diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 1410a0dd0..9323c4dc0 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -20,7 +20,7 @@ module VX_fifo_queue #( parameter ALM_FULL = (DEPTH - 1), parameter ALM_EMPTY = 1, parameter OUT_REG = 0, - parameter LUTRAM = ((DATAW * DEPTH) < `MAX_LUTRAM), + parameter LUTRAM = 0, parameter SIZEW = `CLOG2(DEPTH+1) ) ( input wire clk, @@ -42,9 +42,6 @@ module VX_fifo_queue #( `STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!")) `STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!")) - `UNUSED_PARAM (OUT_REG) - `UNUSED_PARAM (LUTRAM) - VX_pending_size #( .SIZE (DEPTH), .ALM_EMPTY (ALM_EMPTY), @@ -62,6 +59,8 @@ module VX_fifo_queue #( ); if (DEPTH == 1) begin : g_depth_1 + `UNUSED_PARAM (OUT_REG) + `UNUSED_PARAM (LUTRAM) reg [DATAW-1:0] head_r; @@ -75,6 +74,7 @@ module VX_fifo_queue #( end else begin : g_depth_n + localparam USE_BRAM = !LUTRAM && ((DATAW * DEPTH) >= `MAX_LUTRAM); localparam ADDRW = `CLOG2(DEPTH); wire [DATAW-1:0] data_out_w; @@ -95,17 +95,17 @@ module VX_fifo_queue #( end end - wire [ADDRW-1:0] rd_ptr_w = LUTRAM ? rd_ptr_r : rd_ptr_n; + wire [ADDRW-1:0] rd_ptr_w = USE_BRAM ? rd_ptr_n : rd_ptr_r; wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); wire bypass = push && (empty || (going_empty && pop)); - wire read = ((OUT_REG != 0) || !LUTRAM) ? ~bypass : pop; + wire read = ((OUT_REG != 0) || USE_BRAM) ? ~bypass : pop; VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .LUTRAM (LUTRAM), - .OUT_REG(!LUTRAM), + .LUTRAM (!USE_BRAM), + .OUT_REG(USE_BRAM), .WRITE_MODE("W") ) dp_ram ( .clk (clk), diff --git a/hw/unittest/generic_queue/Makefile b/hw/unittest/generic_queue/Makefile index 0adf78fae..ad79c6f94 100644 --- a/hw/unittest/generic_queue/Makefile +++ b/hw/unittest/generic_queue/Makefile @@ -21,4 +21,6 @@ RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs TOP := VX_fifo_queue +PARAMS := -GDATAW=32 -GDEPTH=8 + include ../common.mk \ No newline at end of file From 4206ffdb80586cc433d3eb10681410565365a5d9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 19 Oct 2024 21:39:34 -0700 Subject: [PATCH 385/488] minor update --- hw/rtl/cache/VX_cache_bank.sv | 55 +++++++++++++++++----------- hw/rtl/cache/VX_cache_data.sv | 67 ++++++++++------------------------- hw/rtl/cache/VX_cache_tags.sv | 36 ++++++++++++------- 3 files changed, 77 insertions(+), 81 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 574659d7e..7d1022378 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -151,7 +151,7 @@ module VX_cache_bank #( wire is_fill_st0, is_fill_st1; wire is_flush_st0, is_flush_st1; wire [`CS_WAY_SEL_WIDTH-1:0] flush_way_st0, evict_way_st0; - wire [`CS_WAY_SEL_WIDTH-1:0] way_idx_st1; + wire [`CS_WAY_SEL_WIDTH-1:0] way_idx_st0, way_idx_st1; wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1; wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel, line_idx_st0, line_idx_st1; @@ -166,11 +166,12 @@ module VX_cache_bank #( wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1; wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1; wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0; + wire is_dirty_st0, is_dirty_st1; wire is_replay_st0, is_replay_st1; + wire is_hit_st0, is_hit_st1; wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1; wire mshr_pending_st0, mshr_pending_st1; wire [MSHR_ADDR_WIDTH-1:0] mshr_previd_st0, mshr_previd_st1; - wire is_hit_st0, is_hit_st1; wire mshr_empty; wire flush_valid; @@ -379,30 +380,42 @@ module VX_cache_bank #( .init (do_init_st0), .flush (do_flush_st0 && ~pipe_stall), .fill (do_fill_st0 && ~pipe_stall), - .lookup (do_lookup_st0 && ~pipe_stall), + .read (do_read_st0 && ~pipe_stall), + .write (do_write_st0 && ~pipe_stall), .line_idx_n (line_idx_sel), .line_idx (line_idx_st0), .line_tag (line_tag_st0), .evict_way (evict_way_st0), // outputs .tag_matches(tag_matches_st0), + .evict_dirty(is_dirty_st0), .evict_tag (evict_tag_st0) ); + wire [`CS_WAY_SEL_WIDTH-1:0] hit_idx_st0; + VX_onehot_encoder #( + .N (NUM_WAYS) + ) way_idx_enc ( + .data_in (tag_matches_st0), + .data_out (hit_idx_st0), + `UNUSED_PIN (valid_out) + ); + + assign way_idx_st0 = is_creq_st0 ? hit_idx_st0 : evict_way_st0; assign is_hit_st0 = (| tag_matches_st0); wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0; assign mshr_id_st0 = is_replay_st0 ? replay_id_st0 : mshr_alloc_id_st0; VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_WAY_SEL_WIDTH + `CS_TAG_SEL_BITS + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~pipe_stall), - .data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, is_hit_st0, rw_st0, flags_st0, evict_tag_st0, line_tag_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}), - .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, is_hit_st1, rw_st1, flags_st1, evict_tag_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1}) + .data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, is_dirty_st0, is_hit_st0, rw_st0, flags_st0, way_idx_st0, evict_tag_st0, line_tag_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}), + .data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, is_dirty_st1, is_hit_st1, rw_st1, flags_st1, way_idx_st1, evict_tag_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1}) ); if (UUID_WIDTH != 0) begin : g_req_uuid_st1 @@ -421,7 +434,6 @@ module VX_cache_bank #( wire[`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] read_data_st1; wire [LINE_SIZE-1:0] evict_byteen_st1; - wire evict_dirty_st1; VX_cache_data #( .CACHE_SIZE (CACHE_SIZE), @@ -449,10 +461,9 @@ module VX_cache_bank #( .write_word (write_word_st0), .word_idx (word_idx_st0), .write_byteen(byteen_st0), - // outputs .way_idx (way_idx_st1), + // outputs .read_data (read_data_st1), - .evict_dirty(evict_dirty_st1), .evict_byteen(evict_byteen_st1) ); @@ -580,7 +591,7 @@ module VX_cache_bank #( wire is_fill_or_flush_st1 = is_fill_st1 || (is_flush_st1 && WRITEBACK); wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1; - wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1; + wire do_writeback_st1 = do_fill_or_flush_st1 && is_dirty_st1; wire [`CS_LINE_ADDR_WIDTH-1:0] evict_addr_st1 = {evict_tag_st1, line_idx_st1}; if (WRITE_ENABLE) begin : g_mreq_queue @@ -588,7 +599,7 @@ module VX_cache_bank #( if (DIRTY_BYTES) begin : g_dirty_bytes // ensure dirty bytes match the tag info wire has_dirty_bytes = (| evict_byteen_st1); - `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) + `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (is_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, is_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) end // issue a fill request on a read/write miss // issue a writeback on a dirty line eviction @@ -670,6 +681,8 @@ module VX_cache_bank #( assign mem_req_valid = ~mreq_queue_empty; + `UNUSED_VAR (do_lookup_st0) + /////////////////////////////////////////////////////////////////////////////// `ifdef PERF_ENABLE @@ -708,29 +721,29 @@ module VX_cache_bank #( `TRACE(3, ("%t: %s tags-init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), line_idx_st0)) end if (do_fill_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) + `TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%0d, line=%0d, dirty=%b (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, is_dirty_st0, req_uuid_st0)) end if (do_flush_st0 && ~pipe_stall) begin - `TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) + `TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%0d, line=%0d, dirty=%b (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, is_dirty_st0, req_uuid_st0)) end - if (do_lookup_st1 && ~pipe_stall) begin - if (is_hit_st1) begin + if (do_lookup_st0 && ~pipe_stall) begin + if (is_hit_st0) begin `TRACE(3, ("%t: %s tags-hit: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, way_idx_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), rw_st0, way_idx_st0, line_idx_st0, line_tag_st0, req_uuid_st0)) end else begin `TRACE(3, ("%t: %s tags-miss: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, way_idx_st1, line_idx_st1, line_tag_st1, req_uuid_st1)) + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), rw_st0, way_idx_st0, line_idx_st0, line_tag_st0, req_uuid_st0)) end end if (do_fill_st0 && ~pipe_stall) begin `TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%0d, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, data_st0, req_uuid_st0)) + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), way_idx_st0, line_idx_st0, data_st0, req_uuid_st0)) end if (do_flush_st0 && ~pipe_stall) begin `TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0)) + `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), way_idx_st0, line_idx_st0, req_uuid_st0)) end if (do_read_st1 && is_hit_st1 && ~pipe_stall) begin `TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 65cf9e026..75e2c7935 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -47,51 +47,40 @@ module VX_cache_data #( input wire [`CS_WORD_WIDTH-1:0] write_word, input wire [WORD_SIZE-1:0] write_byteen, input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx, + input wire [`CS_WAY_SEL_WIDTH-1:0] way_idx, // outputs - output wire [`CS_WAY_SEL_WIDTH-1:0] way_idx, output wire [`CS_LINE_WIDTH-1:0] read_data, - output wire evict_dirty, output wire [LINE_SIZE-1:0] evict_byteen ); `UNUSED_PARAM (WORD_SIZE) `UNUSED_VAR (stall) - if (WRITEBACK != 0) begin : g_writeback - localparam BYTEEN_DATAW = 1 + ((DIRTY_BYTES != 0) ? LINE_SIZE : 0); + if (DIRTY_BYTES != 0) begin : g_dirty_bytes - wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_rdata; - wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wdata; - wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wren; + wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_rdata; + wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_wdata; + wire [NUM_WAYS-1:0][LINE_SIZE-1:0] byteen_wren; for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata wire evict = fill || flush; wire evict_way_en = (NUM_WAYS == 1) || (evict_way == i); - wire dirty_data = write; // only asserted on writes - wire dirty_wren = init || (evict && evict_way_en) || (write && tag_matches[i]); - if (DIRTY_BYTES != 0) begin : g_dirty_bytes - wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask; - for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask - wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j); - assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}}; - end - wire [LINE_SIZE-1:0] bytes_data = {LINE_SIZE{write}}; // only asserted on writes - wire [LINE_SIZE-1:0] bytes_wren = {LINE_SIZE{init}} - | {LINE_SIZE{evict && evict_way_en}} - | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); - assign byteen_wdata[i] = {dirty_data, bytes_data}; - assign byteen_wren[i] = {dirty_wren, bytes_wren}; - end else begin : g_no_dirty_bytes - assign byteen_wdata[i] = dirty_data; - assign byteen_wren[i] = dirty_wren; + wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask; + for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask + wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j); + assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}}; end + assign byteen_wdata[i] = {LINE_SIZE{write}}; // only asserted on writes + assign byteen_wren[i] = {LINE_SIZE{init}} + | {LINE_SIZE{evict && evict_way_en}} + | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); end wire byteen_read = fill || flush; wire byteen_write = init || write || fill || flush; VX_sp_ram #( - .DATAW (BYTEEN_DATAW * NUM_WAYS), - .WRENW (BYTEEN_DATAW * NUM_WAYS), + .DATAW (LINE_SIZE * NUM_WAYS), + .WRENW (LINE_SIZE * NUM_WAYS), .SIZE (`CS_LINES_PER_BANK), .OUT_REG (1) ) byteen_store ( @@ -105,17 +94,10 @@ module VX_cache_data #( .rdata (byteen_rdata) ); - if (DIRTY_BYTES != 0) begin : g_line_dirty_and_byteen - assign {evict_dirty, evict_byteen} = byteen_rdata[way_idx]; - end else begin : g_line_dirty - assign evict_dirty = byteen_rdata[way_idx]; - assign evict_byteen = '1; - end - - end else begin : g_no_writeback + assign evict_byteen = byteen_rdata[way_idx]; + end else begin : g_no_dirty_bytes `UNUSED_VAR (init) `UNUSED_VAR (flush) - assign evict_dirty = 0; assign evict_byteen = '0; end @@ -140,8 +122,8 @@ module VX_cache_data #( | ({LINE_SIZE{write && tag_matches[i]}} & write_mask); end - assign line_write = fill || (write && WRITE_ENABLE); assign line_read = read || ((fill || flush) && WRITEBACK); + assign line_write = fill || (write && WRITE_ENABLE); VX_sp_ram #( .DATAW (NUM_WAYS * `CS_LINE_WIDTH), @@ -163,6 +145,7 @@ module VX_cache_data #( `UNUSED_VAR (write_byteen) `UNUSED_VAR (write_word) `UNUSED_VAR (word_idx) + `UNUSED_VAR (tag_matches) // we don't merge the ways into a single block ram due to WREN overhead for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways @@ -184,18 +167,6 @@ module VX_cache_data #( end end - wire [`CS_WAY_SEL_WIDTH-1:0] hit_idx; - - VX_onehot_encoder #( - .N (NUM_WAYS) - ) way_idx_enc ( - .data_in (tag_matches), - .data_out (hit_idx), - `UNUSED_PIN (valid_out) - ); - - `BUFFER_EX(way_idx, (read ? hit_idx : evict_way), ~stall, 1); - assign read_data = line_rdata[way_idx]; endmodule diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 71f7809dc..79afb29d4 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -35,7 +35,8 @@ module VX_cache_tags #( input wire init, input wire flush, input wire fill, - input wire lookup, + input wire read, + input wire write, input wire [`CS_LINE_SEL_BITS-1:0] line_idx_n, input wire [`CS_LINE_SEL_BITS-1:0] line_idx, input wire [`CS_TAG_SEL_BITS-1:0] line_tag, @@ -43,36 +44,47 @@ module VX_cache_tags #( // outputs output wire [NUM_WAYS-1:0] tag_matches, + output wire evict_dirty, output wire [`CS_TAG_SEL_BITS-1:0] evict_tag ); - // valid, tag - localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS; + // valid, dirty, tag + localparam TAG_WIDTH = 1 + WRITEBACK + `CS_TAG_SEL_BITS; wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag; wire [NUM_WAYS-1:0] read_valid; - `UNUSED_VAR (lookup) + wire [NUM_WAYS-1:0] read_dirty; + `UNUSED_VAR (read) if (WRITEBACK) begin : g_evict_tag_wb + assign evict_dirty = read_dirty[evict_way]; assign evict_tag = read_tag[evict_way]; end else begin : g_evict_tag_wt + `UNUSED_VAR (read_dirty) + assign evict_dirty = 1'b0; assign evict_tag = '0; end for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store - - wire way_en = (NUM_WAYS == 1) || (evict_way == i); - wire do_fill = fill && way_en; + wire way_en = (NUM_WAYS == 1) || (evict_way == i); + wire do_fill = fill && way_en; wire do_flush = flush && (!WRITEBACK || way_en); // flush the whole line in writethrough mode + wire do_write = WRITEBACK && write && tag_matches[i]; // only write on hit - //wire line_read = lookup || (WRITEBACK && (fill || flush)); - wire line_write = init || do_fill || do_flush; - wire line_valid = fill; + //wire line_read = read || write || (WRITEBACK && (fill || flush)); + wire line_write = init || do_fill || do_flush || do_write; + wire line_valid = fill || write; wire [TAG_WIDTH-1:0] line_wdata; wire [TAG_WIDTH-1:0] line_rdata; - assign line_wdata = {line_valid, line_tag}; - assign {read_valid[i], read_tag[i]} = line_rdata; + if (WRITEBACK) begin : g_wdata + assign line_wdata = {line_valid, write, line_tag}; + assign {read_valid[i], read_dirty[i], read_tag[i]} = line_rdata; + end else begin : g_wdata + assign line_wdata = {line_valid, line_tag}; + assign {read_valid[i], read_tag[i]} = line_rdata; + assign read_dirty[i] = 1'b0; + end VX_dp_ram #( .DATAW (TAG_WIDTH), From 2bd22253ebff32c928c28ccfd9abc99953eb65db Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 19 Oct 2024 22:14:38 -0700 Subject: [PATCH 386/488] minor update --- hw/rtl/cache/VX_bank_flush.sv | 3 ++- hw/rtl/cache/VX_cache_flush.sv | 3 ++- hw/rtl/libs/VX_dp_ram.sv | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index 68eefd363..e50f8ef44 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -55,7 +55,8 @@ module VX_bank_flush #( always @(*) begin state_n = state; case (state) - STATE_IDLE: begin + //STATE_IDLE: + default : begin if (flush_begin) begin state_n = STATE_WAIT1; end diff --git a/hw/rtl/cache/VX_cache_flush.sv b/hw/rtl/cache/VX_cache_flush.sv index b318dc5af..d10cb5275 100644 --- a/hw/rtl/cache/VX_cache_flush.sv +++ b/hw/rtl/cache/VX_cache_flush.sv @@ -128,7 +128,8 @@ module VX_cache_flush #( lock_released_n = lock_released; flush_uuid_n = flush_uuid_r; case (state) - STATE_IDLE: begin + //STATE_IDLE: + default: begin if (flush_req_enable) begin state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT1 : STATE_FLUSH; for (integer i = NUM_REQS-1; i >= 0; --i) begin diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 7616aa5b9..9e863f713 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -70,7 +70,7 @@ module VX_dp_ram #( end \ end `define RAM_NO_RWCHECK (* altera_attribute = RAM_NO_RWCHECK_VALUE *) -`elif VIVADO +`elsif VIVADO localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "distributed" : "auto"); localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "no" : "auto"; `define RAM_ARRAY (* ram_style = RAM_STYLE_VALUE *) reg [DATAW-1:0] ram [0:SIZE-1]; From 9373e2195004270342b409b28a3be1787e6f4242 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 20 Oct 2024 07:32:32 -0700 Subject: [PATCH 387/488] minor update --- hw/rtl/cache/VX_cache_tags.sv | 2 +- hw/rtl/core/VX_operands.sv | 4 +- hw/rtl/libs/VX_dp_ram.sv | 199 +++++++++++--------------------- hw/rtl/libs/VX_fifo_queue.sv | 9 +- hw/rtl/libs/VX_index_buffer.sv | 12 +- hw/rtl/libs/VX_sp_ram.sv | 201 ++++++++++++++++++++++++++++----- 6 files changed, 248 insertions(+), 179 deletions(-) diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 79afb29d4..7afbbfff4 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -90,7 +90,7 @@ module VX_cache_tags #( .DATAW (TAG_WIDTH), .SIZE (`CS_LINES_PER_BANK), .OUT_REG (1), - .WRITE_MODE ("W") + .NEW_DATA (1) ) tag_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index b396d1830..5bfbe3aa6 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -267,12 +267,10 @@ module VX_operands import VX_gpu_pkg::*; #( .DATAW (REGS_DATAW), .SIZE (PER_BANK_REGS * PER_ISSUE_WARPS), .WRENW (BYTEENW), - .OUT_REG (1), - .WRITE_MODE ("U"), `ifdef GPR_RESET .RESET_RAM (1), `endif - .NO_RWCHECK (1) + .OUT_REG (1) ) gpr_ram ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 9e863f713..c964c101b 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -19,12 +19,10 @@ module VX_dp_ram #( parameter SIZE = 1, parameter WRENW = 1, parameter OUT_REG = 0, - parameter LUTRAM = 0, parameter NO_RWCHECK = 0, parameter RW_ASSERT = 0, parameter RESET_RAM = 0, - parameter RESET_OUT = 0, - parameter `STRING WRITE_MODE = "R", // R: read-first, W: write-first, N: no-change, U: undefined + parameter NEW_DATA = 0, parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, @@ -41,9 +39,8 @@ module VX_dp_ram #( output wire [DATAW-1:0] rdata ); localparam WSELW = DATAW / WRENW; - localparam USE_BRAM = !LUTRAM && ((DATAW * SIZE) >= `MAX_LUTRAM); - `STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter")) + `STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter")) `UNUSED_PARAM (RW_ASSERT) `define RAM_INITIALIZATION \ @@ -61,25 +58,12 @@ module VX_dp_ram #( `ifdef SYNTHESIS `ifdef QUARTUS - localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "MLAB, no_rw_check" : "auto"); - localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "-name add_pass_through_logic_to_inferred_rams off" : ""; - `define RAM_ARRAY (* ramstyle = RAM_STYLE_VALUE *) reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `define RAM_ARRAY reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ if (wren[i]) begin \ ram[waddr][i] <= wdata[i * WSELW +: WSELW]; \ end \ end - `define RAM_NO_RWCHECK (* altera_attribute = RAM_NO_RWCHECK_VALUE *) -`elsif VIVADO - localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "distributed" : "auto"); - localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "no" : "auto"; - `define RAM_ARRAY (* ram_style = RAM_STYLE_VALUE *) reg [DATAW-1:0] ram [0:SIZE-1]; - `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ - if (wren[i]) begin \ - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ - end \ - end - `define RAM_NO_RWCHECK (* rw_addr_collision = RAM_NO_RWCHECK_VALUE *) `else `define RAM_ARRAY reg [DATAW-1:0] ram [0:SIZE-1]; `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ @@ -87,136 +71,96 @@ module VX_dp_ram #( ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ end \ end - `define RAM_NO_RWCHECK `endif - if (OUT_REG) begin : g_out_reg - reg [DATAW-1:0] rdata_r; - if (WRITE_MODE == "R") begin : g_read_first - `RAM_ARRAY + if (OUT_REG) begin : g_sync + if (NEW_DATA) begin : g_new_data + (* rw_addr_collision = "yes" *) `RAM_ARRAY + `UNUSED_VAR (wren) `RAM_INITIALIZATION + reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin if (write) begin `RAM_WRITE end - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else if (read || write) begin + if (read) begin + addr_reg <= raddr; + end + end + assign rdata = ram[addr_reg]; + end else begin : g_old_data + `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + if (read) begin rdata_r <= ram[raddr]; end end - end else if (WRITE_MODE == "W") begin : g_write_first - `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE - end - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else if (read || write) begin - rdata_r = ram[raddr]; - end - end - end else if (WRITE_MODE == "N") begin : g_no_change - `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE - end - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else if (read && ~write) begin - rdata_r <= ram[raddr]; - end - end - end else if (WRITE_MODE == "U") begin : g_undefined - `RAM_NO_RWCHECK `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE - end - if (RESET_OUT && reset) begin - rdata_r <= INIT_VALUE; - end else if (read) begin - rdata_r <= ram[raddr]; - end - end - end else begin - `STATIC_ASSERT(0, ("invalid write mode: %s", WRITE_MODE)) + assign rdata = rdata_r; end - assign rdata = rdata_r; - end else begin : g_no_out_reg - `UNUSED_VAR (read) - `RAM_NO_RWCHECK `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + end else begin : g_async + if (NO_RWCHECK) begin : g_no_rwcehck + `NO_RW_RAM_CHECK `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end end + assign rdata = ram[raddr]; + end else begin : g_rwcheck + `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + end + assign rdata = ram[raddr]; end - assign rdata = ram[raddr]; end `else // simulation reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION - wire [DATAW-1:0] ram_n; - for (genvar i = 0; i < WRENW; ++i) begin : g_ram_n - assign ram_n[i * WSELW +: WSELW] = wren[i] ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; - end - always @(posedge clk) begin if (RESET_RAM && reset) begin for (integer i = 0; i < SIZE; ++i) begin ram[i] <= DATAW'(INIT_VALUE); end - end else begin - if (write) begin - ram[waddr] <= ram_n; + end else if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) begin + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + end end end end - if (OUT_REG && WRITE_MODE == "R") begin : g_read_first - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (RESET_OUT && reset) begin - rdata_r <= DATAW'(INIT_VALUE); - end else if (read || write) begin - rdata_r <= ram[raddr]; + if (OUT_REG) begin : g_sync + if (NEW_DATA) begin : g_new_data + reg [ADDRW-1:0] addr_reg; + always @(posedge clk) begin + if (read) begin + addr_reg <= raddr; + end end - end - assign rdata = rdata_r; - end else if (OUT_REG && WRITE_MODE == "W") begin : g_read_first - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (RESET_OUT && reset) begin - rdata_r <= DATAW'(INIT_VALUE); - end else if (read || write) begin - if (write && (raddr == waddr)) begin - rdata_r <= ram_n; - end else begin + assign rdata = ram[addr_reg]; + end else begin : g_old_data + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read) begin rdata_r <= ram[raddr]; end end + assign rdata = rdata_r; end - assign rdata = rdata_r; - end else if (OUT_REG && WRITE_MODE == "N") begin : g_read_first - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (RESET_OUT && reset) begin - rdata_r <= DATAW'(INIT_VALUE); - end else if (read && ~write) begin - rdata_r <= ram[raddr]; - end - end - assign rdata = rdata_r; - end else begin : g_async_or_undef - wire [DATAW-1:0] rdata_w; - if (USE_BRAM && NO_RWCHECK) begin : g_rdata_no_bypass + end else begin : g_async + if (NO_RWCHECK) begin : g_no_rwcheck reg [DATAW-1:0] prev_data; reg [ADDRW-1:0] prev_waddr; reg prev_write; @@ -233,26 +177,13 @@ module VX_dp_ram #( end end - assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; + assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; if (RW_ASSERT) begin : g_rw_asert - `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)) + `RUNTIME_ASSERT(~read || (rdata == ram[raddr]), ("%t: read after write hazard", $time)) end - end else begin : g_rdata_with_bypass - assign rdata_w = ram[raddr]; - end - if (OUT_REG) begin : g_out_reg - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (RESET_OUT && reset) begin - rdata_r <= DATAW'(INIT_VALUE); - end else if (read) begin - rdata_r <= rdata_w; - end - end - assign rdata = rdata_r; - end else begin : g_no_out_reg + end else begin : g_rwcheck `UNUSED_VAR (read) - assign rdata = rdata_w; + assign rdata = ram[raddr]; end end `endif diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 9323c4dc0..7d51e618a 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -102,11 +102,10 @@ module VX_fifo_queue #( wire read = ((OUT_REG != 0) || USE_BRAM) ? ~bypass : pop; VX_dp_ram #( - .DATAW (DATAW), - .SIZE (DEPTH), - .LUTRAM (!USE_BRAM), - .OUT_REG(USE_BRAM), - .WRITE_MODE("W") + .DATAW (DATAW), + .SIZE (DEPTH), + .OUT_REG (USE_BRAM), + .NEW_DATA (1) ) dp_ram ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_index_buffer.sv b/hw/rtl/libs/VX_index_buffer.sv index 61875b7fb..8e2b7e8d8 100644 --- a/hw/rtl/libs/VX_index_buffer.sv +++ b/hw/rtl/libs/VX_index_buffer.sv @@ -15,10 +15,9 @@ `TRACING_OFF module VX_index_buffer #( - parameter DATAW = 1, - parameter SIZE = 1, - parameter LUTRAM = 0, - parameter ADDRW = `LOG2UP(SIZE) + parameter DATAW = 1, + parameter SIZE = 1, + parameter ADDRW = `LOG2UP(SIZE) ) ( input wire clk, input wire reset, @@ -49,9 +48,8 @@ module VX_index_buffer #( ); VX_dp_ram #( - .DATAW (DATAW), - .SIZE (SIZE), - .LUTRAM (LUTRAM) + .DATAW (DATAW), + .SIZE (SIZE) ) data_table ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index faaf0dd2f..efdd836d8 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -19,12 +19,10 @@ module VX_sp_ram #( parameter SIZE = 1, parameter WRENW = 1, parameter OUT_REG = 0, - parameter LUTRAM = 0, parameter NO_RWCHECK = 0, parameter RW_ASSERT = 0, parameter RESET_RAM = 0, - parameter RESET_OUT = 0, - parameter `STRING WRITE_MODE = "R", // R: read-first, W: write-first, N: no-change, U: undefined + parameter `STRING WRITE_MODE = "R", // R: read-first, W: write-first, N: no-change parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, @@ -39,32 +37,177 @@ module VX_sp_ram #( input wire [DATAW-1:0] wdata, output wire [DATAW-1:0] rdata ); - VX_dp_ram #( - .DATAW (DATAW), - .SIZE (SIZE), - .WRENW (WRENW), - .OUT_REG (OUT_REG), - .LUTRAM (LUTRAM), - .NO_RWCHECK (NO_RWCHECK), - .RW_ASSERT (RW_ASSERT), - .RESET_RAM (RESET_RAM), - .RESET_OUT (RESET_OUT), - .WRITE_MODE (WRITE_MODE), - .INIT_ENABLE(INIT_ENABLE), - .INIT_FILE (INIT_FILE), - .INIT_VALUE (INIT_VALUE), - .ADDRW (ADDRW) - ) dp_ram ( - .clk (clk), - .reset (reset), - .read (read), - .write (write), - .wren (wren), - .waddr (addr), - .wdata (wdata), - .raddr (addr), - .rdata (rdata) - ); + localparam WSELW = DATAW / WRENW; + + `STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter")) + `UNUSED_PARAM (RW_ASSERT) + +`define RAM_INITIALIZATION \ + if (INIT_ENABLE != 0) begin : g_init \ + if (INIT_FILE != "") begin : g_file \ + initial $readmemh(INIT_FILE, ram); \ + end else begin : g_value \ + initial begin \ + for (integer i = 0; i < SIZE; ++i) begin : g_i \ + ram[i] = INIT_VALUE; \ + end \ + end \ + end \ + end + +`ifdef SYNTHESIS +`ifdef QUARTUS + `define RAM_ARRAY reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[addr][i] <= wdata[i * WSELW +: WSELW]; \ + end \ + end +`else + `define RAM_ARRAY reg [DATAW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[addr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end +`endif + if (OUT_REG) begin : g_sync + wire cs = read || write; + if (WRITE_MODE == "R") begin : g_read_first + `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; + end else if (WRITE_MODE == "W") begin : g_write_first + `UNUSED_VAR (wren) + `RAM_ARRAY + `RAM_INITIALIZATION + reg [ADDRW-1:0] addr_reg; + always @(posedge clk) begin + if (cs) begin + addr_reg <= addr; + if (write) begin + `RAM_WRITE + end + end + end + assign rdata = ram[addr_reg]; + end else if (WRITE_MODE == "N") begin : g_no_change + `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end else begin + rdata_r <= ram[addr]; + end + end + end + assign rdata = rdata_r; + end + end else begin : g_async + if (NO_RWCHECK) begin : g_no_rwcehck + `NO_RW_RAM_CHECK `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + end + assign rdata = ram[addr]; + end else begin : g_rwcheck + `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + end + assign rdata = ram[addr]; + end + end +`else + // simulation + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + + wire [DATAW-1:0] ram_n; + for (genvar i = 0; i < WRENW; ++i) begin : g_ram_n + assign ram_n[i * WSELW +: WSELW] = wren[i] ? wdata[i * WSELW +: WSELW] : ram[addr][i * WSELW +: WSELW]; + end + + always @(posedge clk) begin + if (RESET_RAM && reset) begin + for (integer i = 0; i < SIZE; ++i) begin + ram[i] <= DATAW'(INIT_VALUE); + end + end else if (write) begin + ram[addr] <= ram_n; + end + end + + if (OUT_REG) begin : g_sync + if (WRITE_MODE == "R") begin : g_read_first + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; + end else if (WRITE_MODE == "W") begin : g_write_first + reg [ADDRW-1:0] addr_reg; + always @(posedge clk) begin + if (read || write) begin + addr_reg <= addr; + end + end + assign rdata = ram[addr_reg]; + end else if (WRITE_MODE == "N") begin : g_no_change + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read && ~write) begin + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; + end + end else begin : g_async + if (NO_RWCHECK) begin : g_no_rwcheck + reg [DATAW-1:0] prev_data; + reg [ADDRW-1:0] prev_waddr; + reg prev_write; + always @(posedge clk) begin + if (reset) begin + prev_write <= 0; + prev_data <= '0; + prev_waddr <= '0; + end else begin + prev_write <= write; + prev_data <= ram[addr]; + prev_waddr <= addr; + end + end + assign rdata = (prev_write && (prev_waddr == addr)) ? prev_data : ram[addr]; + if (RW_ASSERT) begin : g_rw_asert + `RUNTIME_ASSERT(~read || (rdata == ram[addr]), ("%t: read after write hazard", $time)) + end + end else begin : g_rwcheck + `UNUSED_VAR (read) + assign rdata = ram[addr]; + end + end +`endif endmodule `TRACING_ON From 0f380a3d78110129316984aa6c5c1673bc5680a9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 20 Oct 2024 07:49:27 -0700 Subject: [PATCH 388/488] minor update --- hw/rtl/cache/VX_cache_repl.sv | 15 ++++++--------- hw/rtl/libs/VX_sp_ram.sv | 11 +++++------ 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv index 24425328d..f9c511e6d 100644 --- a/hw/rtl/cache/VX_cache_repl.sv +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -110,7 +110,6 @@ module VX_cache_repl #( if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru // Pseudo Least Recently Used replacement policy localparam LRU_WIDTH = `UP(NUM_WAYS-1); - localparam USE_BRAM = (LRU_WIDTH * `CS_LINES_PER_BANK) >= `MAX_LUTRAM; wire [LRU_WIDTH-1:0] plru_rdata; wire [LRU_WIDTH-1:0] plru_wdata; @@ -120,15 +119,15 @@ module VX_cache_repl #( .DATAW (LRU_WIDTH), .SIZE (`CS_LINES_PER_BANK), .WRENW (LRU_WIDTH), - .OUT_REG (USE_BRAM) + .OUT_REG (1) ) plru_store ( .clk (clk), .reset (reset), - .read (USE_BRAM ? ~stall : repl_valid), + .read (~stall), .write (hit_valid), .wren (plru_wmask), .waddr (hit_line), - .raddr (USE_BRAM ? repl_line_n : repl_line), + .raddr (repl_line_n), .wdata (plru_wdata), .rdata (plru_rdata) ); @@ -150,8 +149,6 @@ module VX_cache_repl #( end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic // Cyclic replacement policy - localparam USE_BRAM = (WAY_SEL_WIDTH * `CS_LINES_PER_BANK) >= `MAX_LUTRAM; - `UNUSED_VAR (hit_valid) `UNUSED_VAR (hit_line) `UNUSED_VAR (hit_way) @@ -163,14 +160,14 @@ module VX_cache_repl #( VX_dp_ram #( .DATAW (WAY_SEL_WIDTH), .SIZE (`CS_LINES_PER_BANK), - .OUT_REG (USE_BRAM) + .OUT_REG (1) ) ctr_store ( .clk (clk), .reset (reset), - .read (USE_BRAM ? ~stall : repl_valid), + .read (~stall), .write (repl_valid), .wren (1'b1), - .raddr (USE_BRAM ? repl_line_n : repl_line), + .raddr (repl_line_n), .waddr (repl_line), .wdata (ctr_wdata), .rdata (ctr_rdata) diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index efdd836d8..eb21144f4 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -141,18 +141,17 @@ module VX_sp_ram #( reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION - wire [DATAW-1:0] ram_n; - for (genvar i = 0; i < WRENW; ++i) begin : g_ram_n - assign ram_n[i * WSELW +: WSELW] = wren[i] ? wdata[i * WSELW +: WSELW] : ram[addr][i * WSELW +: WSELW]; - end - always @(posedge clk) begin if (RESET_RAM && reset) begin for (integer i = 0; i < SIZE; ++i) begin ram[i] <= DATAW'(INIT_VALUE); end end else if (write) begin - ram[addr] <= ram_n; + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) begin + ram[addr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + end + end end end From acc1e3dfd8557d9cb368a19455cee6b488566197 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 20 Oct 2024 20:07:34 -0700 Subject: [PATCH 389/488] minor update --- hw/rtl/VX_cluster.sv | 2 +- hw/rtl/VX_define.vh | 6 ++-- hw/rtl/VX_socket.sv | 2 +- hw/rtl/Vortex.sv | 2 +- hw/rtl/cache/VX_cache_bank.sv | 2 +- hw/rtl/cache/VX_cache_data.sv | 8 ++--- hw/rtl/cache/VX_cache_tags.sv | 7 ++-- hw/rtl/core/VX_operands.sv | 12 +++---- hw/rtl/core/VX_schedule.sv | 2 +- hw/rtl/core/VX_scoreboard.sv | 4 +-- hw/rtl/libs/VX_dp_ram.sv | 53 ++++++++++++++++--------------- hw/rtl/libs/VX_fifo_queue.sv | 2 +- hw/rtl/libs/VX_generic_arbiter.sv | 8 ++--- hw/rtl/libs/VX_sp_ram.sv | 39 ++++++++++++----------- 14 files changed, 76 insertions(+), 73 deletions(-) diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index 9aa5fe706..bec4e232f 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -154,6 +154,6 @@ module VX_cluster import VX_gpu_pkg::*; #( ); end - `BUFFER_EX(busy, (| per_socket_busy), 1'b1, (`NUM_SOCKETS > 1)); + `BUFFER_EX(busy, (| per_socket_busy), 1'b1, 1, (`NUM_SOCKETS > 1)); endmodule diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 4ccb00880..6519984ad 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -335,10 +335,10 @@ .data_out (dst) \ ) -`define BUFFER_EX(dst, src, ena, latency) \ +`define BUFFER_EX(dst, src, ena, RSTW, latency) \ VX_pipe_register #( \ .DATAW ($bits(dst)), \ - .RESETW ($bits(dst)), \ + .RESETW (RSTW), \ .DEPTH (latency) \ ) __``dst``__ ( \ .clk (clk), \ @@ -348,7 +348,7 @@ .data_out (dst) \ ) -`define BUFFER(dst, src) `BUFFER_EX(dst, src, 1'b1, 1) +`define BUFFER(dst, src) `BUFFER_EX(dst, src, 1'b1, 0, 1) `define POP_COUNT_EX(out, in, model) \ VX_popcount #( \ diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index d9a8f5bf8..299fb6791 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -237,6 +237,6 @@ module VX_socket import VX_gpu_pkg::*; #( ); end - `BUFFER_EX(busy, (| per_core_busy), 1'b1, (`SOCKET_SIZE > 1)); + `BUFFER_EX(busy, (| per_core_busy), 1'b1, 1, (`SOCKET_SIZE > 1)); endmodule diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index bae697c65..5df403880 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -159,7 +159,7 @@ module Vortex import VX_gpu_pkg::*; ( ); end - `BUFFER_EX(busy, (| per_cluster_busy), 1'b1, (`NUM_CLUSTERS > 1)); + `BUFFER_EX(busy, (| per_cluster_busy), 1'b1, 1, (`NUM_CLUSTERS > 1)); `ifdef PERF_ENABLE diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 7d1022378..7258e847e 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -461,7 +461,7 @@ module VX_cache_bank #( .write_word (write_word_st0), .word_idx (word_idx_st0), .write_byteen(byteen_st0), - .way_idx (way_idx_st1), + .way_idx_r (way_idx_st1), // outputs .read_data (read_data_st1), .evict_byteen(evict_byteen_st1) diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 75e2c7935..03e2629c6 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -47,7 +47,7 @@ module VX_cache_data #( input wire [`CS_WORD_WIDTH-1:0] write_word, input wire [WORD_SIZE-1:0] write_byteen, input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx, - input wire [`CS_WAY_SEL_WIDTH-1:0] way_idx, + input wire [`CS_WAY_SEL_WIDTH-1:0] way_idx_r, // outputs output wire [`CS_LINE_WIDTH-1:0] read_data, output wire [LINE_SIZE-1:0] evict_byteen @@ -94,11 +94,11 @@ module VX_cache_data #( .rdata (byteen_rdata) ); - assign evict_byteen = byteen_rdata[way_idx]; + assign evict_byteen = byteen_rdata[way_idx_r]; end else begin : g_no_dirty_bytes `UNUSED_VAR (init) `UNUSED_VAR (flush) - assign evict_byteen = '0; + assign evict_byteen = '1; // update whole line end wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata; @@ -167,6 +167,6 @@ module VX_cache_data #( end end - assign read_data = line_rdata[way_idx]; + assign read_data = line_rdata[way_idx_r]; endmodule diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 7afbbfff4..970d54d91 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -66,12 +66,13 @@ module VX_cache_tags #( for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store wire way_en = (NUM_WAYS == 1) || (evict_way == i); + wire do_init = init; // init all ways wire do_fill = fill && way_en; wire do_flush = flush && (!WRITEBACK || way_en); // flush the whole line in writethrough mode - wire do_write = WRITEBACK && write && tag_matches[i]; // only write on hit + wire do_write = WRITEBACK && write && tag_matches[i]; // only write on tag hit //wire line_read = read || write || (WRITEBACK && (fill || flush)); - wire line_write = init || do_fill || do_flush || do_write; + wire line_write = do_init || do_fill || do_flush || do_write; wire line_valid = fill || write; wire [TAG_WIDTH-1:0] line_wdata; @@ -90,7 +91,7 @@ module VX_cache_tags #( .DATAW (TAG_WIDTH), .SIZE (`CS_LINES_PER_BANK), .OUT_REG (1), - .NEW_DATA (1) + .RDW_MODE ("W") ) tag_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 5bfbe3aa6..06d226161 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -61,7 +61,7 @@ module VX_operands import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready; wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2; wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr, gpr_rd_addr_st1; - wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st2; + wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st1, gpr_rd_data_st2; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2; wire pipe_ready_in; @@ -178,14 +178,14 @@ module VX_operands import VX_gpu_pkg::*; #( wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1; VX_pipe_buffer #( - .DATAW (NUM_BANKS + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH) + .DATAW (NUM_BANKS * (1 + REQ_SEL_WIDTH + REGS_DATAW) + META_DATAW) ) pipe_reg2 ( .clk (clk), .reset (reset), .valid_in (pipe_valid2_st1), .ready_in (pipe_ready_st1), - .data_in ({gpr_rd_valid_st1, pipe_data_st1, gpr_rd_req_idx_st1}), - .data_out ({gpr_rd_valid_st2, pipe_data_st2, gpr_rd_req_idx_st2}), + .data_in ({gpr_rd_valid_st1, gpr_rd_req_idx_st1, gpr_rd_data_st1, pipe_data_st1}), + .data_out ({gpr_rd_valid_st2, gpr_rd_req_idx_st2, gpr_rd_data_st2, pipe_data_st2}), .valid_out(pipe_valid_st2), .ready_out(pipe_ready_st2) ); @@ -270,7 +270,7 @@ module VX_operands import VX_gpu_pkg::*; #( `ifdef GPR_RESET .RESET_RAM (1), `endif - .OUT_REG (1) + .OUT_REG (0) ) gpr_ram ( .clk (clk), .reset (reset), @@ -280,7 +280,7 @@ module VX_operands import VX_gpu_pkg::*; #( .waddr (gpr_wr_addr), .wdata (writeback_if.data.data), .raddr (gpr_rd_addr_st1[b]), - .rdata (gpr_rd_data_st2[b]) + .rdata (gpr_rd_data_st1[b]) ); end diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 9b49ae268..5011ccb2c 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -388,7 +388,7 @@ module VX_schedule import VX_gpu_pkg::*; #( wire no_pending_instr = (& pending_warp_empty); - `BUFFER_EX(busy, (active_warps != 0 || ~no_pending_instr), 1'b1, 1); + `BUFFER_EX(busy, (active_warps != 0 || ~no_pending_instr), 1'b1, 1, 1); // export CSRs assign sched_csr_if.cycles = cycles; diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index 5b01cc550..9ec9a6287 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -62,8 +62,8 @@ module VX_scoreboard import VX_gpu_pkg::*; #( .data_out (perf_sfu_per_cycle) ); - `BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT)); - `BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT)); + `BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, 0, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT)); + `BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, 0, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT)); wire [PER_ISSUE_WARPS-1:0] stg_valid_in; for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_stg_valid_in diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index c964c101b..2adb27e2d 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -19,10 +19,9 @@ module VX_dp_ram #( parameter SIZE = 1, parameter WRENW = 1, parameter OUT_REG = 0, - parameter NO_RWCHECK = 0, - parameter RW_ASSERT = 0, + parameter `STRING RDW_MODE = "R", // R: read-first, W: write-first + parameter RDW_ASSERT = 0, parameter RESET_RAM = 0, - parameter NEW_DATA = 0, parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, @@ -41,7 +40,8 @@ module VX_dp_ram #( localparam WSELW = DATAW / WRENW; `STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter")) - `UNUSED_PARAM (RW_ASSERT) + `STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W"), ("invalid parameter")) + `UNUSED_PARAM (RDW_ASSERT) `define RAM_INITIALIZATION \ if (INIT_ENABLE != 0) begin : g_init \ @@ -73,16 +73,17 @@ module VX_dp_ram #( end `endif if (OUT_REG) begin : g_sync - if (NEW_DATA) begin : g_new_data + wire cs = read || write; + if (RDW_MODE == "W") begin : g_new_data (* rw_addr_collision = "yes" *) `RAM_ARRAY `UNUSED_VAR (wren) `RAM_INITIALIZATION reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin - if (write) begin - `RAM_WRITE - end - if (read) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end addr_reg <= raddr; end end @@ -92,18 +93,19 @@ module VX_dp_ram #( `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (write) begin - `RAM_WRITE - end - if (read) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end rdata_r <= ram[raddr]; end end assign rdata = rdata_r; end end else begin : g_async - if (NO_RWCHECK) begin : g_no_rwcehck - `NO_RW_RAM_CHECK `RAM_ARRAY + `UNUSED_VAR (read) + if (RDW_MODE == "W") begin : g_new_data + `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -111,8 +113,8 @@ module VX_dp_ram #( end end assign rdata = ram[raddr]; - end else begin : g_rwcheck - `RAM_ARRAY + end else begin : g_old_data + `NO_RW_RAM_CHECK `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -142,10 +144,11 @@ module VX_dp_ram #( end if (OUT_REG) begin : g_sync - if (NEW_DATA) begin : g_new_data + wire cs = read || write; + if (RDW_MODE == "W") begin : g_new_data reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin - if (read) begin + if (cs) begin addr_reg <= raddr; end end @@ -153,14 +156,17 @@ module VX_dp_ram #( end else begin : g_old_data reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (read) begin + if (cs) begin rdata_r <= ram[raddr]; end end assign rdata = rdata_r; end end else begin : g_async - if (NO_RWCHECK) begin : g_no_rwcheck + `UNUSED_VAR (read) + if (RDW_MODE == "W") begin : g_new_data + assign rdata = ram[raddr]; + end else begin : g_old_data reg [DATAW-1:0] prev_data; reg [ADDRW-1:0] prev_waddr; reg prev_write; @@ -178,12 +184,9 @@ module VX_dp_ram #( end assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; - if (RW_ASSERT) begin : g_rw_asert + if (RDW_ASSERT) begin : g_rw_asert `RUNTIME_ASSERT(~read || (rdata == ram[raddr]), ("%t: read after write hazard", $time)) end - end else begin : g_rwcheck - `UNUSED_VAR (read) - assign rdata = ram[raddr]; end end `endif diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 7d51e618a..7e2eba402 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -105,7 +105,7 @@ module VX_fifo_queue #( .DATAW (DATAW), .SIZE (DEPTH), .OUT_REG (USE_BRAM), - .NEW_DATA (1) + .RDW_MODE ("W") ) dp_ram ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index 5e090ebdd..2b0d086db 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -16,7 +16,7 @@ `TRACING_OFF module VX_generic_arbiter #( parameter NUM_REQS = 1, - parameter `STRING TYPE = "P", + parameter `STRING TYPE = "P", // P: priority, R: round-robin, M: matrix, C: cyclic parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) ) ( input wire clk, @@ -27,6 +27,8 @@ module VX_generic_arbiter #( output wire grant_valid, input wire grant_ready ); + `STATIC_ASSERT((TYPE == "P" || TYPE == "R" || TYPE == "M" || TYPE == "C"), ("invalid parameter")) + if (TYPE == "P") begin : g_priority `UNUSED_VAR (clk) @@ -84,10 +86,6 @@ module VX_generic_arbiter #( .grant_ready (grant_ready) ); - end else begin : g_invalid - - `ERROR(("invalid parameter")); - end `RUNTIME_ASSERT (((~(| requests) != 1) || (grant_valid && (requests[grant_index] != 0) && (grant_onehot == (NUM_REQS'(1) << grant_index)))), ("%t: invalid arbiter grant!", $time)) diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index eb21144f4..cd43e40ff 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -19,10 +19,9 @@ module VX_sp_ram #( parameter SIZE = 1, parameter WRENW = 1, parameter OUT_REG = 0, - parameter NO_RWCHECK = 0, - parameter RW_ASSERT = 0, + parameter `STRING RDW_MODE = "R", // R: read-first, W: write-first, N: no-change + parameter RDW_ASSERT = 0, parameter RESET_RAM = 0, - parameter `STRING WRITE_MODE = "R", // R: read-first, W: write-first, N: no-change parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, @@ -40,7 +39,8 @@ module VX_sp_ram #( localparam WSELW = DATAW / WRENW; `STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter")) - `UNUSED_PARAM (RW_ASSERT) + `STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W" || RDW_MODE == "N"), ("invalid parameter")) + `UNUSED_PARAM (RDW_ASSERT) `define RAM_INITIALIZATION \ if (INIT_ENABLE != 0) begin : g_init \ @@ -73,7 +73,7 @@ module VX_sp_ram #( `endif if (OUT_REG) begin : g_sync wire cs = read || write; - if (WRITE_MODE == "R") begin : g_read_first + if (RDW_MODE == "R") begin : g_read_first `RAM_ARRAY `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; @@ -86,7 +86,7 @@ module VX_sp_ram #( end end assign rdata = rdata_r; - end else if (WRITE_MODE == "W") begin : g_write_first + end else if (RDW_MODE == "W") begin : g_write_first `UNUSED_VAR (wren) `RAM_ARRAY `RAM_INITIALIZATION @@ -100,7 +100,7 @@ module VX_sp_ram #( end end assign rdata = ram[addr_reg]; - end else if (WRITE_MODE == "N") begin : g_no_change + end else if (RDW_MODE == "N") begin : g_no_change `RAM_ARRAY `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; @@ -116,8 +116,9 @@ module VX_sp_ram #( assign rdata = rdata_r; end end else begin : g_async - if (NO_RWCHECK) begin : g_no_rwcehck - `NO_RW_RAM_CHECK `RAM_ARRAY + `UNUSED_VAR (read) + if (RDW_MODE == "W") begin : g_rwcehck + `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -125,8 +126,8 @@ module VX_sp_ram #( end end assign rdata = ram[addr]; - end else begin : g_rwcheck - `RAM_ARRAY + end else begin : g_no_rwcheck + `NO_RW_RAM_CHECK `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -156,7 +157,7 @@ module VX_sp_ram #( end if (OUT_REG) begin : g_sync - if (WRITE_MODE == "R") begin : g_read_first + if (RDW_MODE == "R") begin : g_read_first reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (read || write) begin @@ -164,7 +165,7 @@ module VX_sp_ram #( end end assign rdata = rdata_r; - end else if (WRITE_MODE == "W") begin : g_write_first + end else if (RDW_MODE == "W") begin : g_write_first reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin if (read || write) begin @@ -172,7 +173,7 @@ module VX_sp_ram #( end end assign rdata = ram[addr_reg]; - end else if (WRITE_MODE == "N") begin : g_no_change + end else if (RDW_MODE == "N") begin : g_no_change reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (read && ~write) begin @@ -182,7 +183,10 @@ module VX_sp_ram #( assign rdata = rdata_r; end end else begin : g_async - if (NO_RWCHECK) begin : g_no_rwcheck + `UNUSED_VAR (read) + if (RDW_MODE == "W") begin : g_rwcheck + assign rdata = ram[addr]; + end else begin : g_no_rwcheck reg [DATAW-1:0] prev_data; reg [ADDRW-1:0] prev_waddr; reg prev_write; @@ -198,12 +202,9 @@ module VX_sp_ram #( end end assign rdata = (prev_write && (prev_waddr == addr)) ? prev_data : ram[addr]; - if (RW_ASSERT) begin : g_rw_asert + if (RDW_ASSERT) begin : g_rw_asert `RUNTIME_ASSERT(~read || (rdata == ram[addr]), ("%t: read after write hazard", $time)) end - end else begin : g_rwcheck - `UNUSED_VAR (read) - assign rdata = ram[addr]; end end `endif From 22c3828bf5022905db600f5fce91512b6fa95265 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 20 Oct 2024 21:12:49 -0700 Subject: [PATCH 390/488] minor update --- hw/rtl/VX_config.vh | 4 +- hw/rtl/cache/VX_cache_repl.sv | 2 +- hw/rtl/libs/VX_dp_ram.sv | 130 ++++++++++++++++------ hw/rtl/libs/VX_sp_ram.sv | 204 +++++++++++++++++++++++++--------- 4 files changed, 248 insertions(+), 92 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 48f8ca3dc..c2d16ea3a 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -613,7 +613,7 @@ // Number of Associative Ways `ifndef L2_NUM_WAYS -`define L2_NUM_WAYS 4 +`define L2_NUM_WAYS 8 `endif // Enable Cache Writeback @@ -665,7 +665,7 @@ // Number of Associative Ways `ifndef L3_NUM_WAYS -`define L3_NUM_WAYS 4 +`define L3_NUM_WAYS 8 `endif // Enable Cache Writeback diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv index f9c511e6d..85f234112 100644 --- a/hw/rtl/cache/VX_cache_repl.sv +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -110,6 +110,7 @@ module VX_cache_repl #( if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru // Pseudo Least Recently Used replacement policy localparam LRU_WIDTH = `UP(NUM_WAYS-1); + `UNUSED_VAR (repl_valid) wire [LRU_WIDTH-1:0] plru_rdata; wire [LRU_WIDTH-1:0] plru_wdata; @@ -152,7 +153,6 @@ module VX_cache_repl #( `UNUSED_VAR (hit_valid) `UNUSED_VAR (hit_line) `UNUSED_VAR (hit_way) - `UNUSED_VAR (repl_valid) wire [WAY_SEL_WIDTH-1:0] ctr_rdata; wire [WAY_SEL_WIDTH-1:0] ctr_wdata = ctr_rdata + 1; diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 2adb27e2d..fc94b99c3 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -19,6 +19,7 @@ module VX_dp_ram #( parameter SIZE = 1, parameter WRENW = 1, parameter OUT_REG = 0, + parameter LUTRAM = 0, parameter `STRING RDW_MODE = "R", // R: read-first, W: write-first parameter RDW_ASSERT = 0, parameter RESET_RAM = 0, @@ -38,6 +39,7 @@ module VX_dp_ram #( output wire [DATAW-1:0] rdata ); localparam WSELW = DATAW / WRENW; + `UNUSED_PARAM (LUTRAM) `STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter")) `STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W"), ("invalid parameter")) @@ -57,6 +59,7 @@ module VX_dp_ram #( end `ifdef SYNTHESIS + localparam FORCE_BRAM = !LUTRAM && (SIZE * DATAW >= `MAX_LUTRAM); `ifdef QUARTUS `define RAM_ARRAY reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ @@ -74,54 +77,107 @@ module VX_dp_ram #( `endif if (OUT_REG) begin : g_sync wire cs = read || write; - if (RDW_MODE == "W") begin : g_new_data - (* rw_addr_collision = "yes" *) `RAM_ARRAY - `UNUSED_VAR (wren) - `RAM_INITIALIZATION - reg [ADDRW-1:0] addr_reg; - always @(posedge clk) begin - if (cs) begin - if (write) begin - `RAM_WRITE + if (FORCE_BRAM) begin : g_bram + if (RDW_MODE == "W") begin : g_new_data + (* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM `RAM_ARRAY + `UNUSED_VAR (wren) + `RAM_INITIALIZATION + reg [ADDRW-1:0] addr_reg; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end + addr_reg <= raddr; end - addr_reg <= raddr; end - end - assign rdata = ram[addr_reg]; - end else begin : g_old_data - `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (cs) begin - if (write) begin - `RAM_WRITE + assign rdata = ram[addr_reg]; + end else begin : g_old_data + `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end + rdata_r <= ram[raddr]; end - rdata_r <= ram[raddr]; end + assign rdata = rdata_r; + end + end else begin : g_auto + if (RDW_MODE == "W") begin : g_new_data + (* rw_addr_collision = "yes" *) `RAM_ARRAY + `UNUSED_VAR (wren) + `RAM_INITIALIZATION + reg [ADDRW-1:0] addr_reg; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end + addr_reg <= raddr; + end + end + assign rdata = ram[addr_reg]; + end else begin : g_old_data + `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end + rdata_r <= ram[raddr]; + end + end + assign rdata = rdata_r; end - assign rdata = rdata_r; end end else begin : g_async `UNUSED_VAR (read) - if (RDW_MODE == "W") begin : g_new_data - `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (FORCE_BRAM) begin : g_bram + if (RDW_MODE == "W") begin : g_new_data + `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end end - end - assign rdata = ram[raddr]; - end else begin : g_old_data - `NO_RW_RAM_CHECK `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + assign rdata = ram[raddr]; + end else begin : g_old_data + `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end end + assign rdata = ram[raddr]; + end + end else begin : g_auto + if (RDW_MODE == "W") begin : g_new_data + `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + end + assign rdata = ram[raddr]; + end else begin : g_old_data + `NO_RW_RAM_CHECK `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + end + assign rdata = ram[raddr]; end - assign rdata = ram[raddr]; end end `else diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index cd43e40ff..1acbf733a 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -19,6 +19,7 @@ module VX_sp_ram #( parameter SIZE = 1, parameter WRENW = 1, parameter OUT_REG = 0, + parameter LUTRAM = 0, parameter `STRING RDW_MODE = "R", // R: read-first, W: write-first, N: no-change parameter RDW_ASSERT = 0, parameter RESET_RAM = 0, @@ -37,6 +38,7 @@ module VX_sp_ram #( output wire [DATAW-1:0] rdata ); localparam WSELW = DATAW / WRENW; + `UNUSED_PARAM (LUTRAM) `STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter")) `STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W" || RDW_MODE == "N"), ("invalid parameter")) @@ -56,6 +58,7 @@ module VX_sp_ram #( end `ifdef SYNTHESIS + localparam FORCE_BRAM = !LUTRAM && (SIZE * DATAW >= `MAX_LUTRAM); `ifdef QUARTUS `define RAM_ARRAY reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ @@ -73,68 +76,165 @@ module VX_sp_ram #( `endif if (OUT_REG) begin : g_sync wire cs = read || write; - if (RDW_MODE == "R") begin : g_read_first - `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (cs) begin - if (write) begin - `RAM_WRITE - end - rdata_r <= ram[addr]; - end - end - assign rdata = rdata_r; - end else if (RDW_MODE == "W") begin : g_write_first - `UNUSED_VAR (wren) - `RAM_ARRAY - `RAM_INITIALIZATION - reg [ADDRW-1:0] addr_reg; - always @(posedge clk) begin - if (cs) begin - addr_reg <= addr; - if (write) begin - `RAM_WRITE - end - end - end - assign rdata = ram[addr_reg]; - end else if (RDW_MODE == "N") begin : g_no_change - `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (cs) begin - if (write) begin - `RAM_WRITE - end else begin + if (FORCE_BRAM) begin : g_bram + if (RDW_MODE == "R") begin : g_read_first + `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end rdata_r <= ram[addr]; end end + assign rdata = rdata_r; + end else if (RDW_MODE == "W") begin : g_write_first + `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + if (WRENW > 1) begin : g_wren + reg [ADDRW-1:0] addr_reg; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end + addr_reg <= addr; + end + end + assign rdata = ram[addr_reg]; + end else begin : g_no_wren + `UNUSED_VAR (wren) + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + ram[addr] <= wdata; + rdata_r <= wdata; + end else begin + rdata_r <= ram[addr]; + end + end + end + assign rdata = rdata_r; + end + end else if (RDW_MODE == "N") begin : g_no_change + `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end else begin + rdata_r <= ram[addr]; + end + end + end + assign rdata = rdata_r; + end + end else begin : g_auto + if (RDW_MODE == "R") begin : g_read_first + `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; + end else if (RDW_MODE == "W") begin : g_write_first + `RAM_ARRAY + `RAM_INITIALIZATION + if (WRENW > 1) begin : g_wren + reg [ADDRW-1:0] addr_reg; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end + addr_reg <= addr; + end + end + assign rdata = ram[addr_reg]; + end else begin : g_no_wren + `UNUSED_VAR (wren) + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + ram[addr] <= wdata; + rdata_r <= wdata; + end else begin + rdata_r <= ram[addr]; + end + end + end + assign rdata = rdata_r; + end + end else if (RDW_MODE == "N") begin : g_no_change + `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (cs) begin + if (write) begin + `RAM_WRITE + end else begin + rdata_r <= ram[addr]; + end + end + end + assign rdata = rdata_r; end - assign rdata = rdata_r; end end else begin : g_async `UNUSED_VAR (read) - if (RDW_MODE == "W") begin : g_rwcehck - `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (FORCE_BRAM) begin : g_bram + if (RDW_MODE == "W") begin : g_new_data + `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end end - end - assign rdata = ram[addr]; - end else begin : g_no_rwcheck - `NO_RW_RAM_CHECK `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + assign rdata = ram[addr]; + end else begin : g_old_data + `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end end + assign rdata = ram[addr]; + end + end else begin : g_auto + if (RDW_MODE == "W") begin : g_new_data + `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + end + assign rdata = ram[addr]; + end else begin : g_old_data + `NO_RW_RAM_CHECK `RAM_ARRAY + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + end + assign rdata = ram[addr]; end - assign rdata = ram[addr]; end end `else From 1e4f0fa0bd465e7487a0b3ec3cb6374e788a777b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 20 Oct 2024 21:42:02 -0700 Subject: [PATCH 391/488] minor update --- hw/rtl/cache/VX_cache_repl.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv index 85f234112..909123046 100644 --- a/hw/rtl/cache/VX_cache_repl.sv +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -111,6 +111,7 @@ module VX_cache_repl #( // Pseudo Least Recently Used replacement policy localparam LRU_WIDTH = `UP(NUM_WAYS-1); `UNUSED_VAR (repl_valid) + `UNUSED_VAR (repl_line) wire [LRU_WIDTH-1:0] plru_rdata; wire [LRU_WIDTH-1:0] plru_wdata; From fccbadfe252c8a9c5abc343eed0300cef2d60d50 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 20 Oct 2024 23:32:22 -0700 Subject: [PATCH 392/488] minor update --- hw/rtl/core/VX_fetch.sv | 3 ++- hw/rtl/libs/VX_fifo_queue.sv | 11 ++++------- hw/rtl/libs/VX_index_buffer.sv | 4 +++- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index eb1f3d761..6a35602e8 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -52,7 +52,8 @@ module VX_fetch import VX_gpu_pkg::*; #( VX_dp_ram #( .DATAW (`PC_BITS + `NUM_THREADS), - .SIZE (`NUM_WARPS) + .SIZE (`NUM_WARPS), + .OUT_REG (0) ) tag_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 7e2eba402..6de6ddc24 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -74,7 +74,6 @@ module VX_fifo_queue #( end else begin : g_depth_n - localparam USE_BRAM = !LUTRAM && ((DATAW * DEPTH) >= `MAX_LUTRAM); localparam ADDRW = `CLOG2(DEPTH); wire [DATAW-1:0] data_out_w; @@ -95,26 +94,24 @@ module VX_fifo_queue #( end end - wire [ADDRW-1:0] rd_ptr_w = USE_BRAM ? rd_ptr_n : rd_ptr_r; - wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); wire bypass = push && (empty || (going_empty && pop)); - wire read = ((OUT_REG != 0) || USE_BRAM) ? ~bypass : pop; VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .OUT_REG (USE_BRAM), + .OUT_REG (1), + .LUTRAM (LUTRAM), .RDW_MODE ("W") ) dp_ram ( .clk (clk), .reset (reset), - .read (read), + .read (~bypass), .write (push), .wren (1'b1), .waddr (wr_ptr_r), .wdata (data_in), - .raddr (rd_ptr_w), + .raddr (rd_ptr_n), .rdata (data_out_w) ); diff --git a/hw/rtl/libs/VX_index_buffer.sv b/hw/rtl/libs/VX_index_buffer.sv index 8e2b7e8d8..422c317e1 100644 --- a/hw/rtl/libs/VX_index_buffer.sv +++ b/hw/rtl/libs/VX_index_buffer.sv @@ -49,7 +49,9 @@ module VX_index_buffer #( VX_dp_ram #( .DATAW (DATAW), - .SIZE (SIZE) + .SIZE (SIZE), + .OUT_REG (0), + .RDW_MODE("W") ) data_table ( .clk (clk), .reset (reset), From 2b3d1f08600ff80864b6292cb9ef0851b80e893b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 20 Oct 2024 23:54:42 -0700 Subject: [PATCH 393/488] minor update --- hw/rtl/core/VX_operands.sv | 13 ++++--- hw/rtl/libs/VX_dp_ram.sv | 76 +++++++++++++++++++++++++++----------- hw/rtl/libs/VX_sp_ram.sv | 12 +++--- 3 files changed, 67 insertions(+), 34 deletions(-) diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index 06d226161..48b01b4c6 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -61,7 +61,7 @@ module VX_operands import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready; wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2; wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr, gpr_rd_addr_st1; - wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st1, gpr_rd_data_st2; + wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st2; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2; wire pipe_ready_in; @@ -178,14 +178,14 @@ module VX_operands import VX_gpu_pkg::*; #( wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1; VX_pipe_buffer #( - .DATAW (NUM_BANKS * (1 + REQ_SEL_WIDTH + REGS_DATAW) + META_DATAW) + .DATAW (NUM_BANKS * (1 + REQ_SEL_WIDTH) + META_DATAW) ) pipe_reg2 ( .clk (clk), .reset (reset), .valid_in (pipe_valid2_st1), .ready_in (pipe_ready_st1), - .data_in ({gpr_rd_valid_st1, gpr_rd_req_idx_st1, gpr_rd_data_st1, pipe_data_st1}), - .data_out ({gpr_rd_valid_st2, gpr_rd_req_idx_st2, gpr_rd_data_st2, pipe_data_st2}), + .data_in ({gpr_rd_valid_st1, gpr_rd_req_idx_st1, pipe_data_st1}), + .data_out ({gpr_rd_valid_st2, gpr_rd_req_idx_st2, pipe_data_st2}), .valid_out(pipe_valid_st2), .ready_out(pipe_ready_st2) ); @@ -270,7 +270,8 @@ module VX_operands import VX_gpu_pkg::*; #( `ifdef GPR_RESET .RESET_RAM (1), `endif - .OUT_REG (0) + .OUT_REG (1), + .RDW_MODE ("U") ) gpr_ram ( .clk (clk), .reset (reset), @@ -280,7 +281,7 @@ module VX_operands import VX_gpu_pkg::*; #( .waddr (gpr_wr_addr), .wdata (writeback_if.data.data), .raddr (gpr_rd_addr_st1[b]), - .rdata (gpr_rd_data_st1[b]) + .rdata (gpr_rd_data_st2[b]) ); end diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index fc94b99c3..b778ce88e 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -20,7 +20,7 @@ module VX_dp_ram #( parameter WRENW = 1, parameter OUT_REG = 0, parameter LUTRAM = 0, - parameter `STRING RDW_MODE = "R", // R: read-first, W: write-first + parameter `STRING RDW_MODE = "R", // R: read-first, W: write-first, U: undefined parameter RDW_ASSERT = 0, parameter RESET_RAM = 0, parameter INIT_ENABLE = 0, @@ -42,7 +42,7 @@ module VX_dp_ram #( `UNUSED_PARAM (LUTRAM) `STATIC_ASSERT(!(WRENW * WSELW != DATAW), ("invalid parameter")) - `STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W"), ("invalid parameter")) + `STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W" || RDW_MODE == "U"), ("invalid parameter")) `UNUSED_PARAM (RDW_ASSERT) `define RAM_INITIALIZATION \ @@ -76,15 +76,14 @@ module VX_dp_ram #( end `endif if (OUT_REG) begin : g_sync - wire cs = read || write; if (FORCE_BRAM) begin : g_bram - if (RDW_MODE == "W") begin : g_new_data + if (RDW_MODE == "W") begin : g_write_first (* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM `RAM_ARRAY `UNUSED_VAR (wren) `RAM_INITIALIZATION reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end @@ -92,12 +91,12 @@ module VX_dp_ram #( end end assign rdata = ram[addr_reg]; - end else begin : g_old_data + end else if (RDW_MODE == "R") begin : g_read_first `USE_BLOCK_BRAM `RAM_ARRAY `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end @@ -105,15 +104,28 @@ module VX_dp_ram #( end end assign rdata = rdata_r; + end else begin : g_undefined + `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + if (read) begin + rdata_r <= ram[raddr]; + end + end + assign rdata = rdata_r; end end else begin : g_auto - if (RDW_MODE == "W") begin : g_new_data + if (RDW_MODE == "W") begin : g_write_first (* rw_addr_collision = "yes" *) `RAM_ARRAY `UNUSED_VAR (wren) `RAM_INITIALIZATION reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end @@ -121,12 +133,12 @@ module VX_dp_ram #( end end assign rdata = ram[addr_reg]; - end else begin : g_old_data + end else if (RDW_MODE == "R") begin : g_read_first `RAM_ARRAY `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end @@ -134,12 +146,25 @@ module VX_dp_ram #( end end assign rdata = rdata_r; + end else begin + `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + if (read) begin + rdata_r <= ram[raddr]; + end + end + assign rdata = rdata_r; end end end else begin : g_async `UNUSED_VAR (read) if (FORCE_BRAM) begin : g_bram - if (RDW_MODE == "W") begin : g_new_data + if (RDW_MODE == "W") begin : g_write_first `USE_BLOCK_BRAM `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -148,7 +173,7 @@ module VX_dp_ram #( end end assign rdata = ram[raddr]; - end else begin : g_old_data + end else begin : g_read_first `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -159,7 +184,7 @@ module VX_dp_ram #( assign rdata = ram[raddr]; end end else begin : g_auto - if (RDW_MODE == "W") begin : g_new_data + if (RDW_MODE == "W") begin : g_write_first `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -168,7 +193,7 @@ module VX_dp_ram #( end end assign rdata = ram[raddr]; - end else begin : g_old_data + end else begin : g_read_first `NO_RW_RAM_CHECK `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -200,19 +225,26 @@ module VX_dp_ram #( end if (OUT_REG) begin : g_sync - wire cs = read || write; - if (RDW_MODE == "W") begin : g_new_data + if (RDW_MODE == "W") begin : g_write_first reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin - if (cs) begin + if (read || write) begin addr_reg <= raddr; end end assign rdata = ram[addr_reg]; - end else begin : g_old_data + end else if (RDW_MODE == "R") begin : g_read_first reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read || write) begin + rdata_r <= ram[raddr]; + end + end + assign rdata = rdata_r; + end else begin : g_undefined + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read) begin rdata_r <= ram[raddr]; end end @@ -220,9 +252,9 @@ module VX_dp_ram #( end end else begin : g_async `UNUSED_VAR (read) - if (RDW_MODE == "W") begin : g_new_data + if (RDW_MODE == "W") begin : g_write_first assign rdata = ram[raddr]; - end else begin : g_old_data + end else begin : g_read_first reg [DATAW-1:0] prev_data; reg [ADDRW-1:0] prev_waddr; reg prev_write; diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index 1acbf733a..ee1316271 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -196,7 +196,7 @@ module VX_sp_ram #( end else begin : g_async `UNUSED_VAR (read) if (FORCE_BRAM) begin : g_bram - if (RDW_MODE == "W") begin : g_new_data + if (RDW_MODE == "W") begin : g_write_first `USE_BLOCK_BRAM `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -205,7 +205,7 @@ module VX_sp_ram #( end end assign rdata = ram[addr]; - end else begin : g_old_data + end else begin : g_read_first `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -216,7 +216,7 @@ module VX_sp_ram #( assign rdata = ram[addr]; end end else begin : g_auto - if (RDW_MODE == "W") begin : g_new_data + if (RDW_MODE == "W") begin : g_write_first `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -225,7 +225,7 @@ module VX_sp_ram #( end end assign rdata = ram[addr]; - end else begin : g_old_data + end else begin : g_read_first `NO_RW_RAM_CHECK `RAM_ARRAY `RAM_INITIALIZATION always @(posedge clk) begin @@ -284,9 +284,9 @@ module VX_sp_ram #( end end else begin : g_async `UNUSED_VAR (read) - if (RDW_MODE == "W") begin : g_rwcheck + if (RDW_MODE == "W") begin : g_write_first assign rdata = ram[addr]; - end else begin : g_no_rwcheck + end else begin : g_read_first reg [DATAW-1:0] prev_data; reg [ADDRW-1:0] prev_waddr; reg prev_write; From d584e7bac182bd6be2b49bee5f0f2025fbf6d277 Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Mon, 21 Oct 2024 13:28:57 -0400 Subject: [PATCH 394/488] intermediate docs update --- docs/fpga_setup.md | 56 ++++++++++++++++------------------------------ 1 file changed, 19 insertions(+), 37 deletions(-) diff --git a/docs/fpga_setup.md b/docs/fpga_setup.md index 78ed63e25..6fcf926d3 100644 --- a/docs/fpga_setup.md +++ b/docs/fpga_setup.md @@ -13,7 +13,7 @@ If you are associated with Georgia Tech (or related workshops) you can use CRNCH ## Why are the Rouges Important? -By exposing students and researchers to this set of unique hardware, we hope to foster cross-cutting discussions about hardware designs that will drive future *performance improvements in computing long after the Moore’s Law era of “cheap transistors” ends*. +By exposing students and researchers to this set of unique hardware, we hope to foster cross-cutting discussions about hardware designs that will drive future *performance improvements in computing long after the Moore’s Law era of “cheap transistors” ends*. Specifically, the Rouges Gallery contains FPGA's which can be synthesized into Vortex hardware. ## How is the Rouges Gallery Funded? @@ -32,68 +32,50 @@ You can listen to a talk about RG [here](https://mediaspace.gatech.edu/media/Jef You should use [this form](https://crnch-rg.cc.gatech.edu/request-rogues-gallery-access/) to request access to RG’s reconfigurable computing (vortex fpga) resources. You should receive an email with your ticket item being created. Once it gets processed, you should get an email confirmed your access has been granted. It might take some time to get processed. ## How to Access Rouges Gallery? +There are two methods of accessing CRNCH's Rouges Gallery +1) Web-based GUI: [rg-ood.crnch.gatech.edu](http://rg-ood.crnch.gatech.edu/) +2) SSH: `ssh @rg-login.crnch.gatech.edu` -CRNCH resources do not require any VPN access for GT members so you can head to the web url for open on-demand: [rg-ood.crnch.gatech.edu](http://rg-ood.crnch.gatech.edu/) -Alternatively, you can `ssh` into rg with: `ssh @rg-login.crnch.gatech.edu` - -(`ssh gburdell3@rg-login.crnch.gatech.edu`) - -## Synthesis for Xilinx Boards -First, you need to get access to the server with the Xilinx FPGAs. +## Where should I keep my files? +The CRNCH servers have a folder called `USERSCRATCH` which can be found in your home directory: `echo $HOME`. You should keep all your files in this folder since it is available across all the Rouges Gallery Nodes. ## **What Machines are Available in the Rogues Gallery?** -Complete list of machines can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/rg-hardware.html). +Complete list of machines can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/rg-hardware.html). Furthermore, you can find detailed information about the FPGA hardware [here](https://gt-crnch-rg.readthedocs.io/en/main/reconfig/xilinx/xilinx-getting-started.html). -## Which Machine do we Need from RG? - -There are three primary nodes you might use for Xilinx FPGAs. The table below summarizes: - -| Name | Device | Description | -| --- | --- | --- | -| flubber1 | u50 | can synthesize vortex | -| flubber4 | u250 | missing HBM | -| flubber5 | u280 | can synthesize vortex | +## Allocate an FPGA Node +Once you’ve connected to the CRNCH login node, you can use the Slurm scheduler to request an interactive job using `salloc`. This [page](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm.html) explains why we use Slurm to request resources. Documentation for `salloc` can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm-examples.html). And here. -*Note*: The `USERSCRATCH` folder is synchronized between all RG nodes. That means you can upload your files to `rg-login` and have them available on `flubber[1,4-5`. Changes on one node will be reflected across all nodes. - -## How to Access flubber for Synthesis? - -Now that you have the files prepared and available on the FPGA node, you can start the synthesis. To run on hardware we need a rg-xilinx-fpga-hw cluster which includes **flubber[1,4-5]**. First `ssh` into the rouges gallery, if you have not already. - +To request 16 cores and 64GB of RAM for 6 hours on flubber9, a fpga dev node: ```bash -ssh [@rg-login.crnch.gatech.edu](mailto:usubramanya3@rg-login.crnch.gatech.edu) -``` - -Once you’ve logged in, you can use Slurm to request an interactive job. First, view the available Slurm Partitions here [here](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm.html). Then, the example requests can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm-examples.html). - -In our case we might run: -```bash -salloc -p rg-fpga --nodes=1 --ntasks-per-node=1 --nodelist flubber1 --time=01:00:00 +salloc -p rg-fpga --nodes=1 --ntasks-per-node=16 --mem=64G --nodelist flubber9 --time=06:00:00 ``` ## Environment Setup Once you are logged in, you will need to complete some first time configurations. -### Clone Repo - ### Source Configuration Scripts ``` $ source /opt/xilinx/xrt/setup.sh $ source /opt/xilinx/Vitis/2023.1/settings64.sh ``` +``` +$ source /opt/xilinx/xrt/setup.sh +$ source /tools/reconfig/xilinx/Vitis/2023.1/settings64.sh +``` + ### Check Installed FPGA Platforms -`platforminfo -l` +`platforminfo -l` which tells us the correct name of the platform installed on the current fpga node. It should be used for the `PLATFORM` variable below. ### Build FPGA image The directory `hw/syn/xilinx/xrt` contains the makefile used to synthesize Vortex. ``` $ cd hw/syn/xilinx/xrt - $ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=4 make build_u50_hw_4c.log 2>&1 & + $ PREFIX=test1 PLATFORM=xilinx_u250_gen3x16_xdma_4_1_202210_1 TARGET=hw NUM_CORES=1 make build_u250_hw_1c.log 2>&1 & ``` Will run the synthesis under new build directory: BUILD_DIR := "\\_\\_\" The generated bitstream will be located under /bin/vortex_afu.xclbin @@ -105,7 +87,7 @@ For long-running jobs, invocation of this makefile can be made of the following For example: ```bash -CONFIGS="-DL2_ENABLE -DDCACHE_SIZE=8192" PREFIX=build_4c_u280 NUM_CORES=4 TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202211_1 nohup make > build_u280_hw_4c.log 2>&1 & +CONFIGS="-DL2_ENABLE -DDCACHE_SIZE=8192" PREFIX=build_4c_u280 NUM_CORES=4 TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202310_1 nohup make > build_u250_hw_4c.log 2>&1 & ``` The build is complete when the bitstream file `vortex_afu.xclbin` exists in `hw|hw_emu/bin`. From 8fdca0e52afb71e2ba1d4f7e3cf3457bb12b2d4d Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Mon, 21 Oct 2024 15:38:53 -0400 Subject: [PATCH 395/488] correct vitis env --- docs/fpga_setup.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/docs/fpga_setup.md b/docs/fpga_setup.md index 6fcf926d3..5b90df0b6 100644 --- a/docs/fpga_setup.md +++ b/docs/fpga_setup.md @@ -57,11 +57,6 @@ salloc -p rg-fpga --nodes=1 --ntasks-per-node=16 --mem=64G --nodelist flubber9 - Once you are logged in, you will need to complete some first time configurations. ### Source Configuration Scripts -``` -$ source /opt/xilinx/xrt/setup.sh -$ source /opt/xilinx/Vitis/2023.1/settings64.sh -``` - ``` $ source /opt/xilinx/xrt/setup.sh $ source /tools/reconfig/xilinx/Vitis/2023.1/settings64.sh From 519023fb2b644e1455da3b0d9da29c222c3470c6 Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Mon, 21 Oct 2024 15:39:10 -0400 Subject: [PATCH 396/488] add citation for MICRO 21 paper --- README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/README.md b/README.md index ed4c89d88..97686c641 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,27 @@ Vortex is a full-stack open-source RISC-V GPGPU. Vortex supports multiple *backe ## Website Vortex news can be found on its [website](https://vortex.cc.gatech.edu/) +## Citation +``` +@inproceedings{10.1145/3466752.3480128, + author = {Tine, Blaise and Yalamarthy, Krishna Praveen and Elsabbagh, Fares and Hyesoon, Kim}, + title = {Vortex: Extending the RISC-V ISA for GPGPU and 3D-Graphics}, + year = {2021}, + isbn = {9781450385572}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + url = {https://doi.org/10.1145/3466752.3480128}, + doi = {10.1145/3466752.3480128}, + abstract = {The importance of open-source hardware and software has been increasing. However, despite GPUs being one of the more popular accelerators across various applications, there is very little open-source GPU infrastructure in the public domain. We argue that one of the reasons for the lack of open-source infrastructure for GPUs is rooted in the complexity of their ISA and software stacks. In this work, we first propose an ISA extension to RISC-V that supports GPGPUs and graphics. The main goal of the ISA extension proposal is to minimize the ISA changes so that the corresponding changes to the open-source ecosystem are also minimal, which makes for a sustainable development ecosystem. To demonstrate the feasibility of the minimally extended RISC-V ISA, we implemented the complete software and hardware stacks of Vortex on FPGA. Vortex is a PCIe-based soft GPU that supports OpenCL and OpenGL. Vortex can be used in a variety of applications, including machine learning, graph analytics, and graphics rendering. Vortex can scale up to 32 cores on an Altera Stratix 10 FPGA, delivering a peak performance of 25.6 GFlops at 200 Mhz.}, + booktitle = {MICRO-54: 54th Annual IEEE/ACM International Symposium on Microarchitecture}, + pages = {754–766}, + numpages = {13}, + keywords = {reconfigurable computing, memory systems., computer graphics}, + location = {Virtual Event, Greece}, + series = {MICRO '21} +} +``` + ## Specifications - Support RISC-V RV32IMAF and RV64IMAFD From ff50306833d8b287305ca5da98c8780a4362c526 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 21 Oct 2024 22:24:54 -0700 Subject: [PATCH 397/488] minor update --- hw/syn/xilinx/xrt/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 2517f2777..5d536a069 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -174,6 +174,7 @@ $(BIN_DIR)/emconfig.json: report: $(XCLBIN_CONTAINER) ifeq ($(TARGET), hw) + cp $(BUILD_DIR)/_x/logs/link/vivado.log $(BUILD_DIR)/bin cp $(BUILD_DIR)/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log $(BUILD_DIR)/bin cp $(BUILD_DIR)/_x/reports/link/syn/ulp_vortex_afu_1_0_synth_1_ulp_vortex_afu_1_0_utilization_synth.rpt $(BUILD_DIR)/bin cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin From 3a3bb7b70a395a6f96a2bfe657bc724186565fe0 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 21 Oct 2024 22:46:04 -0700 Subject: [PATCH 398/488] cleanup deleted files --- hw/rtl/core/VX_gpr_slice.sv | 286 ----------------------- hw/rtl/core/VX_pending_instr.sv | 79 ------- hw/rtl/core/VX_trace.vh | 387 -------------------------------- 3 files changed, 752 deletions(-) delete mode 100644 hw/rtl/core/VX_gpr_slice.sv delete mode 100644 hw/rtl/core/VX_pending_instr.sv delete mode 100644 hw/rtl/core/VX_trace.vh diff --git a/hw/rtl/core/VX_gpr_slice.sv b/hw/rtl/core/VX_gpr_slice.sv deleted file mode 100644 index b036fc555..000000000 --- a/hw/rtl/core/VX_gpr_slice.sv +++ /dev/null @@ -1,286 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_define.vh" - -module VX_gpr_slice import VX_gpu_pkg::*; #( - parameter CORE_ID = 0, - parameter CACHE_ENABLE = 0 -) ( - input wire clk, - input wire reset, - - VX_writeback_if.slave writeback_if, - VX_scoreboard_if.slave scoreboard_if, - VX_operands_if.master operands_if -); - `UNUSED_PARAM (CORE_ID) - localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS; - localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * ISSUE_RATIO); - - localparam STATE_IDLE = 2'd0; - localparam STATE_FETCH1 = 2'd1; - localparam STATE_FETCH2 = 2'd2; - localparam STATE_FETCH3 = 2'd3; - localparam STATE_BITS = 2; - - wire [`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data; - reg [`NR_BITS-1:0] gpr_rd_rid, gpr_rd_rid_n; - reg [ISSUE_WIS_W-1:0] gpr_rd_wis, gpr_rd_wis_n; - - reg [`NUM_THREADS-1:0][`XLEN-1:0] cache_data [ISSUE_RATIO-1:0]; - reg [`NUM_THREADS-1:0][`XLEN-1:0] cache_data_n [ISSUE_RATIO-1:0]; - reg [`NR_BITS-1:0] cache_reg [ISSUE_RATIO-1:0]; - reg [`NR_BITS-1:0] cache_reg_n [ISSUE_RATIO-1:0]; - reg [`NUM_THREADS-1:0] cache_tmask [ISSUE_RATIO-1:0]; - reg [`NUM_THREADS-1:0] cache_tmask_n [ISSUE_RATIO-1:0]; - reg [ISSUE_RATIO-1:0] cache_eop, cache_eop_n; - - reg [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data, rs1_data_n; - reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n; - reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n; - - reg [STATE_BITS-1:0] state, state_n; - reg [`NR_BITS-1:0] rs2, rs2_n; - reg [`NR_BITS-1:0] rs3, rs3_n; - reg rs2_ready, rs2_ready_n; - reg rs3_ready, rs3_ready_n; - reg data_ready, data_ready_n; - - wire stg_valid_in, stg_ready_in; - - wire is_rs1_zero = (scoreboard_if.data.rs1 == 0); - wire is_rs2_zero = (scoreboard_if.data.rs2 == 0); - wire is_rs3_zero = (scoreboard_if.data.rs3 == 0); - - always @(*) begin - state_n = state; - rs2_n = rs2; - rs3_n = rs3; - rs2_ready_n = rs2_ready; - rs3_ready_n = rs3_ready; - rs1_data_n = rs1_data; - rs2_data_n = rs2_data; - rs3_data_n = rs3_data; - cache_data_n = cache_data; - cache_reg_n = cache_reg; - cache_tmask_n= cache_tmask; - cache_eop_n = cache_eop; - gpr_rd_rid_n = gpr_rd_rid; - gpr_rd_wis_n = gpr_rd_wis; - data_ready_n = data_ready; - - case (state) - STATE_IDLE: begin - if (operands_if.valid && operands_if.ready) begin - data_ready_n = 0; - end - if (scoreboard_if.valid && data_ready_n == 0) begin - data_ready_n = 1; - if (is_rs3_zero || (CACHE_ENABLE != 0 && - scoreboard_if.data.rs3 == cache_reg[scoreboard_if.data.wis] && - (scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin - rs3_data_n = (is_rs3_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis]; - rs3_ready_n = 1; - end else begin - rs3_ready_n = 0; - gpr_rd_rid_n = scoreboard_if.data.rs3; - data_ready_n = 0; - state_n = STATE_FETCH3; - end - if (is_rs2_zero || (CACHE_ENABLE != 0 && - scoreboard_if.data.rs2 == cache_reg[scoreboard_if.data.wis] && - (scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin - rs2_data_n = (is_rs2_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis]; - rs2_ready_n = 1; - end else begin - rs2_ready_n = 0; - gpr_rd_rid_n = scoreboard_if.data.rs2; - data_ready_n = 0; - state_n = STATE_FETCH2; - end - if (is_rs1_zero || (CACHE_ENABLE != 0 && - scoreboard_if.data.rs1 == cache_reg[scoreboard_if.data.wis] && - (scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin - rs1_data_n = (is_rs1_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis]; - end else begin - gpr_rd_rid_n = scoreboard_if.data.rs1; - data_ready_n = 0; - state_n = STATE_FETCH1; - end - end - gpr_rd_wis_n = scoreboard_if.data.wis; - rs2_n = scoreboard_if.data.rs2; - rs3_n = scoreboard_if.data.rs3; - end - STATE_FETCH1: begin - rs1_data_n = gpr_rd_data; - if (~rs2_ready) begin - gpr_rd_rid_n = rs2; - state_n = STATE_FETCH2; - end else if (~rs3_ready) begin - gpr_rd_rid_n = rs3; - state_n = STATE_FETCH3; - end else begin - data_ready_n = 1; - state_n = STATE_IDLE; - end - end - STATE_FETCH2: begin - rs2_data_n = gpr_rd_data; - if (~rs3_ready) begin - gpr_rd_rid_n = rs3; - state_n = STATE_FETCH3; - end else begin - data_ready_n = 1; - state_n = STATE_IDLE; - end - end - STATE_FETCH3: begin - rs3_data_n = gpr_rd_data; - data_ready_n = 1; - state_n = STATE_IDLE; - end - endcase - - if (CACHE_ENABLE != 0 && writeback_if.valid) begin - if ((cache_reg[writeback_if.data.wis] == writeback_if.data.rd) - || (cache_eop[writeback_if.data.wis] && writeback_if.data.sop)) begin - for (integer j = 0; j < `NUM_THREADS; ++j) begin - if (writeback_if.data.tmask[j]) begin - cache_data_n[writeback_if.data.wis][j] = writeback_if.data.data[j]; - end - end - cache_reg_n[writeback_if.data.wis] = writeback_if.data.rd; - cache_eop_n[writeback_if.data.wis] = writeback_if.data.eop; - cache_tmask_n[writeback_if.data.wis] = writeback_if.data.sop ? writeback_if.data.tmask : - (cache_tmask_n[writeback_if.data.wis] | writeback_if.data.tmask); - end - end - end - - always @(posedge clk) begin - if (reset) begin - state <= STATE_IDLE; - cache_eop <= {ISSUE_RATIO{1'b1}}; - data_ready <= 0; - end else begin - state <= state_n; - cache_eop <= cache_eop_n; - data_ready <= data_ready_n; - end - gpr_rd_rid <= gpr_rd_rid_n; - gpr_rd_wis <= gpr_rd_wis_n; - rs2_ready <= rs2_ready_n; - rs3_ready <= rs3_ready_n; - rs2 <= rs2_n; - rs3 <= rs3_n; - rs1_data <= rs1_data_n; - rs2_data <= rs2_data_n; - rs3_data <= rs3_data_n; - cache_data <= cache_data_n; - cache_reg <= cache_reg_n; - cache_tmask <= cache_tmask_n; - end - - assign stg_valid_in = scoreboard_if.valid && data_ready; - assign scoreboard_if.ready = stg_ready_in && data_ready; - - VX_toggle_buffer #( - .DATAW (DATAW) - ) toggle_buffer ( - .clk (clk), - .reset (reset), - .valid_in (stg_valid_in), - .data_in ({ - scoreboard_if.data.uuid, - scoreboard_if.data.wis, - scoreboard_if.data.tmask, - scoreboard_if.data.PC, - scoreboard_if.data.wb, - scoreboard_if.data.ex_type, - scoreboard_if.data.op_type, - scoreboard_if.data.op_args, - scoreboard_if.data.rd - }), - .ready_in (stg_ready_in), - .valid_out (operands_if.valid), - .data_out ({ - operands_if.data.uuid, - operands_if.data.wis, - operands_if.data.tmask, - operands_if.data.PC, - operands_if.data.wb, - operands_if.data.ex_type, - operands_if.data.op_type, - operands_if.data.op_args, - operands_if.data.rd - }), - .ready_out (operands_if.ready) - ); - - assign operands_if.data.rs1_data = rs1_data; - assign operands_if.data.rs2_data = rs2_data; - assign operands_if.data.rs3_data = rs3_data; - - // GPR banks - - reg [RAM_ADDRW-1:0] gpr_rd_addr; - wire [RAM_ADDRW-1:0] gpr_wr_addr; - if (ISSUE_WIS != 0) begin - assign gpr_wr_addr = {writeback_if.data.wis, writeback_if.data.rd}; - always @(posedge clk) begin - gpr_rd_addr <= {gpr_rd_wis_n, gpr_rd_rid_n}; - end - end else begin - assign gpr_wr_addr = writeback_if.data.rd; - always @(posedge clk) begin - gpr_rd_addr <= gpr_rd_rid_n; - end - end - -`ifdef GPR_RESET - reg wr_enabled = 0; - always @(posedge clk) begin - if (reset) begin - wr_enabled <= 1; - end - end -`endif - - for (genvar j = 0; j < `NUM_THREADS; ++j) begin - VX_dp_ram #( - .DATAW (`XLEN), - .SIZE (`NUM_REGS * ISSUE_RATIO), - `ifdef GPR_RESET - .INIT_ENABLE (1), - .INIT_VALUE (0), - `endif - .NO_RWCHECK (1) - ) gpr_ram ( - .clk (clk), - .read (1'b1), - `UNUSED_PIN (wren), - `ifdef GPR_RESET - .write (wr_enabled && writeback_if.valid && writeback_if.data.tmask[j]), - `else - .write (writeback_if.valid && writeback_if.data.tmask[j]), - `endif - .waddr (gpr_wr_addr), - .wdata (writeback_if.data.data[j]), - .raddr (gpr_rd_addr), - .rdata (gpr_rd_data[j]) - ); - end - -endmodule diff --git a/hw/rtl/core/VX_pending_instr.sv b/hw/rtl/core/VX_pending_instr.sv deleted file mode 100644 index af87b53e0..000000000 --- a/hw/rtl/core/VX_pending_instr.sv +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_define.vh" - -module VX_pending_instr #( - parameter CTR_WIDTH = 12, - parameter ALM_EMPTY = 1, - parameter DECR_COUNT = 1 -) ( - input wire clk, - input wire reset, - input wire incr, - input wire [`NW_WIDTH-1:0] incr_wid, - input wire [DECR_COUNT-1:0] decr, - input wire [DECR_COUNT-1:0][`NW_WIDTH-1:0] decr_wid, - input wire [`NW_WIDTH-1:0] alm_empty_wid, - output wire empty, - output wire alm_empty -); - localparam COUNTW = `CLOG2(DECR_COUNT+1); - - reg [`NUM_WARPS-1:0][CTR_WIDTH-1:0] pending_instrs; - reg [`NUM_WARPS-1:0][COUNTW-1:0] decr_cnt; - reg [`NUM_WARPS-1:0][DECR_COUNT-1:0] decr_mask; - reg [`NUM_WARPS-1:0] incr_cnt, incr_cnt_n; - reg [`NUM_WARPS-1:0] alm_empty_r, empty_r; - - always @(*) begin - incr_cnt_n = 0; - decr_mask = 0; - if (incr) begin - incr_cnt_n[incr_wid] = 1; - end - for (integer i = 0; i < DECR_COUNT; ++i) begin - if (decr[i]) begin - decr_mask[decr_wid[i]][i] = 1; - end - end - end - - for (genvar i = 0; i < `NUM_WARPS; ++i) begin - - wire [COUNTW-1:0] decr_cnt_n; - `POP_COUNT(decr_cnt_n, decr_mask[i]); - - wire [CTR_WIDTH-1:0] pending_instrs_n = pending_instrs[i] + CTR_WIDTH'(incr_cnt[i]) - CTR_WIDTH'(decr_cnt[i]); - - always @(posedge clk) begin - if (reset) begin - incr_cnt[i] <= '0; - decr_cnt[i] <= '0; - pending_instrs[i] <= '0; - alm_empty_r[i] <= 0; - empty_r[i] <= 1; - end else begin - incr_cnt[i] <= incr_cnt_n[i]; - decr_cnt[i] <= decr_cnt_n; - pending_instrs[i] <= pending_instrs_n; - alm_empty_r[i] <= (pending_instrs_n == ALM_EMPTY); - empty_r[i] <= (pending_instrs_n == 0); - end - end - end - - assign alm_empty = alm_empty_r[alm_empty_wid]; - assign empty = (& empty_r); - -endmodule diff --git a/hw/rtl/core/VX_trace.vh b/hw/rtl/core/VX_trace.vh deleted file mode 100644 index 5dc4bc304..000000000 --- a/hw/rtl/core/VX_trace.vh +++ /dev/null @@ -1,387 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`ifndef VX_TRACE_VH -`define VX_TRACE_VH - -`ifdef SIMULATION - - task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type); - case (ex_type) - `EX_ALU: `TRACE(level, ("ALU")); - `EX_LSU: `TRACE(level, ("LSU")); - `EX_FPU: `TRACE(level, ("FPU")); - `EX_SFU: `TRACE(level, ("SFU")); - default: `TRACE(level, ("?")); - endcase - endtask - - task trace_ex_op(input int level, - input [`EX_BITS-1:0] ex_type, - input [`INST_OP_BITS-1:0] op_type, - input VX_gpu_pkg::op_args_t op_args - ); - case (ex_type) - `EX_ALU: begin - case (op_args.alu.xtype) - `ALU_TYPE_ARITH: begin - if (op_args.alu.is_w) begin - if (op_args.alu.use_imm) begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDIW")); - `INST_ALU_SLL: `TRACE(level, ("SLLIW")); - `INST_ALU_SRL: `TRACE(level, ("SRLIW")); - `INST_ALU_SRA: `TRACE(level, ("SRAIW")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDW")); - `INST_ALU_SUB: `TRACE(level, ("SUBW")); - `INST_ALU_SLL: `TRACE(level, ("SLLW")); - `INST_ALU_SRL: `TRACE(level, ("SRLW")); - `INST_ALU_SRA: `TRACE(level, ("SRAW")); - default: `TRACE(level, ("?")); - endcase - end - end else begin - if (op_args.alu.use_imm) begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDI")); - `INST_ALU_SLL: `TRACE(level, ("SLLI")); - `INST_ALU_SRL: `TRACE(level, ("SRLI")); - `INST_ALU_SRA: `TRACE(level, ("SRAI")); - `INST_ALU_SLT: `TRACE(level, ("SLTI")); - `INST_ALU_SLTU: `TRACE(level, ("SLTIU")); - `INST_ALU_XOR: `TRACE(level, ("XORI")); - `INST_ALU_OR: `TRACE(level, ("ORI")); - `INST_ALU_AND: `TRACE(level, ("ANDI")); - `INST_ALU_LUI: `TRACE(level, ("LUI")); - `INST_ALU_AUIPC: `TRACE(level, ("AUIPC")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADD")); - `INST_ALU_SUB: `TRACE(level, ("SUB")); - `INST_ALU_SLL: `TRACE(level, ("SLL")); - `INST_ALU_SRL: `TRACE(level, ("SRL")); - `INST_ALU_SRA: `TRACE(level, ("SRA")); - `INST_ALU_SLT: `TRACE(level, ("SLT")); - `INST_ALU_SLTU: `TRACE(level, ("SLTU")); - `INST_ALU_XOR: `TRACE(level, ("XOR")); - `INST_ALU_OR: `TRACE(level, ("OR")); - `INST_ALU_AND: `TRACE(level, ("AND")); - `INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ")); - `INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ")); - default: `TRACE(level, ("?")); - endcase - end - end - end - `ALU_TYPE_BRANCH: begin - case (`INST_BR_BITS'(op_type)) - `INST_BR_EQ: `TRACE(level, ("BEQ")); - `INST_BR_NE: `TRACE(level, ("BNE")); - `INST_BR_LT: `TRACE(level, ("BLT")); - `INST_BR_GE: `TRACE(level, ("BGE")); - `INST_BR_LTU: `TRACE(level, ("BLTU")); - `INST_BR_GEU: `TRACE(level, ("BGEU")); - `INST_BR_JAL: `TRACE(level, ("JAL")); - `INST_BR_JALR: `TRACE(level, ("JALR")); - `INST_BR_ECALL: `TRACE(level, ("ECALL")); - `INST_BR_EBREAK:`TRACE(level, ("EBREAK")); - `INST_BR_URET: `TRACE(level, ("URET")); - `INST_BR_SRET: `TRACE(level, ("SRET")); - `INST_BR_MRET: `TRACE(level, ("MRET")); - default: `TRACE(level, ("?")); - endcase - end - `ALU_TYPE_MULDIV: begin - if (op_args.alu.is_w) begin - case (`INST_M_BITS'(op_type)) - `INST_M_MUL: `TRACE(level, ("MULW")); - `INST_M_DIV: `TRACE(level, ("DIVW")); - `INST_M_DIVU: `TRACE(level, ("DIVUW")); - `INST_M_REM: `TRACE(level, ("REMW")); - `INST_M_REMU: `TRACE(level, ("REMUW")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_M_BITS'(op_type)) - `INST_M_MUL: `TRACE(level, ("MUL")); - `INST_M_MULH: `TRACE(level, ("MULH")); - `INST_M_MULHSU:`TRACE(level, ("MULHSU")); - `INST_M_MULHU: `TRACE(level, ("MULHU")); - `INST_M_DIV: `TRACE(level, ("DIV")); - `INST_M_DIVU: `TRACE(level, ("DIVU")); - `INST_M_REM: `TRACE(level, ("REM")); - `INST_M_REMU: `TRACE(level, ("REMU")); - default: `TRACE(level, ("?")); - endcase - end - end - default: `TRACE(level, ("?")); - endcase - end - `EX_LSU: begin - if (op_args.lsu.is_float) begin - case (`INST_LSU_BITS'(op_type)) - `INST_LSU_LW: `TRACE(level, ("FLW")); - `INST_LSU_LD: `TRACE(level, ("FLD")); - `INST_LSU_SW: `TRACE(level, ("FSW")); - `INST_LSU_SD: `TRACE(level, ("FSD")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_LSU_BITS'(op_type)) - `INST_LSU_LB: `TRACE(level, ("LB")); - `INST_LSU_LH: `TRACE(level, ("LH")); - `INST_LSU_LW: `TRACE(level, ("LW")); - `INST_LSU_LD: `TRACE(level, ("LD")); - `INST_LSU_LBU:`TRACE(level, ("LBU")); - `INST_LSU_LHU:`TRACE(level, ("LHU")); - `INST_LSU_LWU:`TRACE(level, ("LWU")); - `INST_LSU_SB: `TRACE(level, ("SB")); - `INST_LSU_SH: `TRACE(level, ("SH")); - `INST_LSU_SW: `TRACE(level, ("SW")); - `INST_LSU_SD: `TRACE(level, ("SD")); - `INST_LSU_FENCE:`TRACE(level,("FENCE")); - default: `TRACE(level, ("?")); - endcase - end - end - `EX_FPU: begin - case (`INST_FPU_BITS'(op_type)) - `INST_FPU_ADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FADD.D")); - else - `TRACE(level, ("FADD.S")); - end - `INST_FPU_SUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FSUB.D")); - else - `TRACE(level, ("FSUB.S")); - end - `INST_FPU_MUL: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMUL.D")); - else - `TRACE(level, ("FMUL.S")); - end - `INST_FPU_DIV: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FDIV.D")); - else - `TRACE(level, ("FDIV.S")); - end - `INST_FPU_SQRT: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FSQRT.D")); - else - `TRACE(level, ("FSQRT.S")); - end - `INST_FPU_MADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMADD.D")); - else - `TRACE(level, ("FMADD.S")); - end - `INST_FPU_MSUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMSUB.D")); - else - `TRACE(level, ("FMSUB.S")); - end - `INST_FPU_NMADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMADD.D")); - else - `TRACE(level, ("FNMADD.S")); - end - `INST_FPU_NMSUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMSUB.D")); - else - `TRACE(level, ("FNMSUB.S")); - end - `INST_FPU_CMP: begin - if (op_args.fpu.fmt[0]) begin - case (op_args.fpu.frm[1:0]) - 0: `TRACE(level, ("FLE.D")); - 1: `TRACE(level, ("FLT.D")); - 2: `TRACE(level, ("FEQ.D")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (op_args.fpu.frm[1:0]) - 0: `TRACE(level, ("FLE.S")); - 1: `TRACE(level, ("FLT.S")); - 2: `TRACE(level, ("FEQ.S")); - default: `TRACE(level, ("?")); - endcase - end - end - `INST_FPU_F2F: begin - if (op_args.fpu.fmt[0]) begin - `TRACE(level, ("FCVT.D.S")); - end else begin - `TRACE(level, ("FCVT.S.D")); - end - end - `INST_FPU_F2I: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.L.D")); - end else begin - `TRACE(level, ("FCVT.W.D")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.L.S")); - end else begin - `TRACE(level, ("FCVT.W.S")); - end - end - end - `INST_FPU_F2U: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.LU.D")); - end else begin - `TRACE(level, ("FCVT.WU.D")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.LU.S")); - end else begin - `TRACE(level, ("FCVT.WU.S")); - end - end - end - `INST_FPU_I2F: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.D.L")); - end else begin - `TRACE(level, ("FCVT.D.W")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.S.L")); - end else begin - `TRACE(level, ("FCVT.S.W")); - end - end - end - `INST_FPU_U2F: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.D.LU")); - end else begin - `TRACE(level, ("FCVT.D.WU")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.S.LU")); - end else begin - `TRACE(level, ("FCVT.S.WU")); - end - end - end - `INST_FPU_MISC: begin - if (op_args.fpu.fmt[0]) begin - case (op_args.fpu.frm) - 0: `TRACE(level, ("FSGNJ.D")); - 1: `TRACE(level, ("FSGNJN.D")); - 2: `TRACE(level, ("FSGNJX.D")); - 3: `TRACE(level, ("FCLASS.D")); - 4: `TRACE(level, ("FMV.X.D")); - 5: `TRACE(level, ("FMV.D.X")); - 6: `TRACE(level, ("FMIN.D")); - 7: `TRACE(level, ("FMAX.D")); - endcase - end else begin - case (op_args.fpu.frm) - 0: `TRACE(level, ("FSGNJ.S")); - 1: `TRACE(level, ("FSGNJN.S")); - 2: `TRACE(level, ("FSGNJX.S")); - 3: `TRACE(level, ("FCLASS.S")); - 4: `TRACE(level, ("FMV.X.S")); - 5: `TRACE(level, ("FMV.S.X")); - 6: `TRACE(level, ("FMIN.S")); - 7: `TRACE(level, ("FMAX.S")); - endcase - end - end - default: `TRACE(level, ("?")); - endcase - end - `EX_SFU: begin - case (`INST_SFU_BITS'(op_type)) - `INST_SFU_TMC: `TRACE(level, ("TMC")); - `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")); - `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end - `INST_SFU_JOIN: `TRACE(level, ("JOIN")); - `INST_SFU_BAR: `TRACE(level, ("BAR")); - `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end - `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end - `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end - `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end - default: `TRACE(level, ("?")); - endcase - end - default: `TRACE(level, ("?")); - endcase - endtask - - task trace_op_args(input int level, - input [`EX_BITS-1:0] ex_type, - input [`INST_OP_BITS-1:0] op_type, - input VX_gpu_pkg::op_args_t op_args - ); - case (ex_type) - `EX_ALU: begin - `TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm)); - end - `EX_LSU: begin - `TRACE(level, (", offset=0x%0h", op_args.lsu.offset)); - end - `EX_FPU: begin - `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)); - end - `EX_SFU: begin - if (`INST_SFU_IS_CSR(op_type)) begin - `TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm)); - end - end - default:; - endcase - endtask - - task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr); - case (addr) - `VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0")); - `VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1")); - `VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0")); - `VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1")); - `VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS")); - default: `TRACE(level, ("?")); - endcase - endtask - -`endif - -`endif // VX_TRACE_VH From 1fa4603fa2e0ef8a1da43e9df45abe323add1d5f Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 23 Oct 2024 01:14:19 -0700 Subject: [PATCH 399/488] disable sformatf during synthesis --- hw/rtl/VX_cluster.sv | 6 +++--- hw/rtl/VX_platform.vh | 3 +++ hw/rtl/VX_socket.sv | 6 +++--- hw/rtl/Vortex.sv | 2 +- hw/rtl/cache/VX_cache.sv | 2 +- hw/rtl/cache/VX_cache_bank.sv | 2 +- hw/rtl/cache/VX_cache_cluster.sv | 2 +- hw/rtl/cache/VX_cache_top.sv | 2 +- hw/rtl/core/VX_alu_unit.sv | 4 ++-- hw/rtl/core/VX_core.sv | 12 ++++++------ hw/rtl/core/VX_core_top.sv | 2 +- hw/rtl/core/VX_execute.sv | 8 ++++---- hw/rtl/core/VX_issue.sv | 2 +- hw/rtl/core/VX_issue_slice.sv | 8 ++++---- hw/rtl/core/VX_lsu_slice.sv | 2 +- hw/rtl/core/VX_lsu_unit.sv | 2 +- hw/rtl/core/VX_mem_unit.sv | 4 ++-- hw/rtl/core/VX_schedule.sv | 2 +- hw/rtl/core/VX_sfu_unit.sv | 4 ++-- hw/rtl/libs/VX_mem_scheduler.sv | 2 +- 20 files changed, 40 insertions(+), 37 deletions(-) diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index bec4e232f..853881c08 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -67,7 +67,7 @@ module VX_cluster import VX_gpu_pkg::*; #( ); VX_gbar_unit #( - .INSTANCE_ID ($sformatf("gbar%0d", CLUSTER_ID)) + .INSTANCE_ID (`SFORMATF(("gbar%0d", CLUSTER_ID))) ) gbar_unit ( .clk (clk), .reset (reset), @@ -84,7 +84,7 @@ module VX_cluster import VX_gpu_pkg::*; #( `RESET_RELAY (l2_reset, reset); VX_cache_wrap #( - .INSTANCE_ID ($sformatf("%s-l2cache", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-l2cache", INSTANCE_ID))), .CACHE_SIZE (`L2_CACHE_SIZE), .LINE_SIZE (`L2_LINE_SIZE), .NUM_BANKS (`L2_NUM_BANKS), @@ -131,7 +131,7 @@ module VX_cluster import VX_gpu_pkg::*; #( VX_socket #( .SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id), - .INSTANCE_ID ($sformatf("%s-socket%0d", INSTANCE_ID, socket_id)) + .INSTANCE_ID (`SFORMATF(("%s-socket%0d", INSTANCE_ID, socket_id))) ) socket ( `SCOPE_IO_BIND (scope_socket+socket_id) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index eb58e1798..8c4effaf4 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -130,6 +130,8 @@ endgenerate end `endif +`define SFORMATF(x) $sformatf x + `else // SYNTHESIS `define STATIC_ASSERT(cond, msg) @@ -139,6 +141,7 @@ endgenerate `define DEBUG_BLOCK(x) `define TRACE(level, args) +`define SFORMATF(x) `define TRACING_ON `define TRACING_OFF diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 299fb6791..87dcbd02e 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -85,7 +85,7 @@ module VX_socket import VX_gpu_pkg::*; #( `RESET_RELAY (icache_reset, reset); VX_cache_cluster #( - .INSTANCE_ID ($sformatf("%s-icache", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-icache", INSTANCE_ID))), .NUM_UNITS (`NUM_ICACHES), .NUM_INPUTS (`SOCKET_SIZE), .TAG_SEL_IDX (0), @@ -132,7 +132,7 @@ module VX_socket import VX_gpu_pkg::*; #( `RESET_RELAY (dcache_reset, reset); VX_cache_cluster #( - .INSTANCE_ID ($sformatf("%s-dcache", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-dcache", INSTANCE_ID))), .NUM_UNITS (`NUM_DCACHES), .NUM_INPUTS (`SOCKET_SIZE), .TAG_SEL_IDX (0), @@ -212,7 +212,7 @@ module VX_socket import VX_gpu_pkg::*; #( VX_core #( .CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id), - .INSTANCE_ID ($sformatf("%s-core%0d", INSTANCE_ID, core_id)) + .INSTANCE_ID (`SFORMATF(("%s-core%0d", INSTANCE_ID, core_id))) ) core ( `SCOPE_IO_BIND (scope_core + core_id) diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 5df403880..bce771340 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -140,7 +140,7 @@ module Vortex import VX_gpu_pkg::*; ( VX_cluster #( .CLUSTER_ID (cluster_id), - .INSTANCE_ID ($sformatf("cluster%0d", cluster_id)) + .INSTANCE_ID (`SFORMATF(("cluster%0d", cluster_id))) ) cluster ( `SCOPE_IO_BIND (scope_cluster + cluster_id) diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 40f062ecc..8c3db21f4 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -389,7 +389,7 @@ module VX_cache import VX_gpu_pkg::*; #( VX_cache_bank #( .BANK_ID (bank_id), - .INSTANCE_ID ($sformatf("%s-bank%0d", INSTANCE_ID, bank_id)), + .INSTANCE_ID (`SFORMATF(("%s-bank%0d", INSTANCE_ID, bank_id))), .CACHE_SIZE (CACHE_SIZE), .LINE_SIZE (LINE_SIZE), .NUM_BANKS (NUM_BANKS), diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 7258e847e..d3218c54c 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -503,7 +503,7 @@ module VX_cache_bank #( ); VX_cache_mshr #( - .INSTANCE_ID ($sformatf("%s-mshr", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-mshr", INSTANCE_ID))), .BANK_ID (BANK_ID), .LINE_SIZE (LINE_SIZE), .NUM_BANKS (NUM_BANKS), diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 32662e848..fc4afdb0a 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -146,7 +146,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( for (genvar i = 0; i < NUM_CACHES; ++i) begin : g_cache_wrap VX_cache_wrap #( - .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)), + .INSTANCE_ID (`SFORMATF(("%s%0d", INSTANCE_ID, i))), .CACHE_SIZE (CACHE_SIZE), .LINE_SIZE (LINE_SIZE), .NUM_BANKS (NUM_BANKS), diff --git a/hw/rtl/cache/VX_cache_top.sv b/hw/rtl/cache/VX_cache_top.sv index d6bd4aace..6dad5b6a8 100644 --- a/hw/rtl/cache/VX_cache_top.sv +++ b/hw/rtl/cache/VX_cache_top.sv @@ -20,7 +20,7 @@ module VX_cache_top import VX_gpu_pkg::*; #( parameter NUM_REQS = 4, // Size of cache in bytes - parameter CACHE_SIZE = 32768, + parameter CACHE_SIZE = 65536, // Size of line inside a bank in bytes parameter LINE_SIZE = 64, // Number of banks diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 951cd811b..e87221709 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -89,7 +89,7 @@ module VX_alu_unit #( ); VX_alu_int #( - .INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)), + .INSTANCE_ID (`SFORMATF(("%s-int%0d", INSTANCE_ID, block_idx))), .BLOCK_IDX (block_idx), .NUM_LANES (NUM_LANES) ) alu_int ( @@ -102,7 +102,7 @@ module VX_alu_unit #( `ifdef EXT_M_ENABLE VX_alu_muldiv #( - .INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)), + .INSTANCE_ID (`SFORMATF(("%s-muldiv%0d", INSTANCE_ID, block_idx))), .NUM_LANES (NUM_LANES) ) muldiv_unit ( .clk (clk), diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 260cedca3..62ed016af 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -87,7 +87,7 @@ module VX_core import VX_gpu_pkg::*; #( `SCOPE_IO_SWITCH (3); VX_schedule #( - .INSTANCE_ID ($sformatf("%s-schedule", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-schedule", INSTANCE_ID))), .CORE_ID (CORE_ID) ) schedule ( .clk (clk), @@ -115,7 +115,7 @@ module VX_core import VX_gpu_pkg::*; #( ); VX_fetch #( - .INSTANCE_ID ($sformatf("%s-fetch", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-fetch", INSTANCE_ID))) ) fetch ( `SCOPE_IO_BIND (0) .clk (clk), @@ -126,7 +126,7 @@ module VX_core import VX_gpu_pkg::*; #( ); VX_decode #( - .INSTANCE_ID ($sformatf("%s-decode", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-decode", INSTANCE_ID))) ) decode ( .clk (clk), .reset (reset), @@ -136,7 +136,7 @@ module VX_core import VX_gpu_pkg::*; #( ); VX_issue #( - .INSTANCE_ID ($sformatf("%s-issue", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-issue", INSTANCE_ID))) ) issue ( `SCOPE_IO_BIND (1) @@ -153,7 +153,7 @@ module VX_core import VX_gpu_pkg::*; #( ); VX_execute #( - .INSTANCE_ID ($sformatf("%s-execute", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-execute", INSTANCE_ID))), .CORE_ID (CORE_ID) ) execute ( `SCOPE_IO_BIND (2) @@ -181,7 +181,7 @@ module VX_core import VX_gpu_pkg::*; #( ); VX_commit #( - .INSTANCE_ID ($sformatf("%s-commit", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-commit", INSTANCE_ID))) ) commit ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_core_top.sv b/hw/rtl/core/VX_core_top.sv index 9ade1c28b..e16a80259 100644 --- a/hw/rtl/core/VX_core_top.sv +++ b/hw/rtl/core/VX_core_top.sv @@ -144,7 +144,7 @@ module VX_core_top import VX_gpu_pkg::*; #( `endif VX_core #( - .INSTANCE_ID ($sformatf("core")), + .INSTANCE_ID (`SFORMATF(("core"))), .CORE_ID (CORE_ID) ) core ( `SCOPE_IO_BIND (0) diff --git a/hw/rtl/core/VX_execute.sv b/hw/rtl/core/VX_execute.sv index 4f66757f1..b737725ea 100644 --- a/hw/rtl/core/VX_execute.sv +++ b/hw/rtl/core/VX_execute.sv @@ -52,7 +52,7 @@ module VX_execute import VX_gpu_pkg::*; #( `endif VX_alu_unit #( - .INSTANCE_ID ($sformatf("%s-alu", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-alu", INSTANCE_ID))) ) alu_unit ( .clk (clk), .reset (reset), @@ -64,7 +64,7 @@ module VX_execute import VX_gpu_pkg::*; #( `SCOPE_IO_SWITCH (1); VX_lsu_unit #( - .INSTANCE_ID ($sformatf("%s-lsu", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-lsu", INSTANCE_ID))) ) lsu_unit ( `SCOPE_IO_BIND (0) .clk (clk), @@ -76,7 +76,7 @@ module VX_execute import VX_gpu_pkg::*; #( `ifdef EXT_F_ENABLE VX_fpu_unit #( - .INSTANCE_ID ($sformatf("%s-fpu", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-fpu", INSTANCE_ID))) ) fpu_unit ( .clk (clk), .reset (reset), @@ -87,7 +87,7 @@ module VX_execute import VX_gpu_pkg::*; #( `endif VX_sfu_unit #( - .INSTANCE_ID ($sformatf("%s-sfu", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-sfu", INSTANCE_ID))), .CORE_ID (CORE_ID) ) sfu_unit ( .clk (clk), diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index 5da33cbba..924d1a67d 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -78,7 +78,7 @@ module VX_issue import VX_gpu_pkg::*; #( `endif VX_issue_slice #( - .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, issue_id)), + .INSTANCE_ID (`SFORMATF(("%s%0d", INSTANCE_ID, issue_id))), .ISSUE_ID (issue_id) ) issue_slice ( `SCOPE_IO_BIND(issue_id) diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index f287525c7..d72937251 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -37,7 +37,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( VX_operands_if operands_if(); VX_ibuffer #( - .INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-ibuffer", INSTANCE_ID))) ) ibuffer ( .clk (clk), .reset (reset), @@ -49,7 +49,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( ); VX_scoreboard #( - .INSTANCE_ID ($sformatf("%s-scoreboard", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-scoreboard", INSTANCE_ID))) ) scoreboard ( .clk (clk), .reset (reset), @@ -64,7 +64,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( ); VX_operands #( - .INSTANCE_ID ($sformatf("%s-operands", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-operands", INSTANCE_ID))) ) operands ( .clk (clk), .reset (reset), @@ -77,7 +77,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( ); VX_dispatch #( - .INSTANCE_ID ($sformatf("%s-dispatch", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-dispatch", INSTANCE_ID))) ) dispatch ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 67fc3eaa8..0018db08d 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -310,7 +310,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( wire lsu_mem_rsp_ready; VX_mem_scheduler #( - .INSTANCE_ID ($sformatf("%s-memsched", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-memsched", INSTANCE_ID))), .CORE_REQS (NUM_LANES), .MEM_CHANNELS(NUM_LANES), .WORD_SIZE (LSU_WORD_SIZE), diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index 674ca2686..7a64a849b 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -54,7 +54,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_slices VX_lsu_slice #( - .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx)) + .INSTANCE_ID (`SFORMATF(("%s%0d", INSTANCE_ID, block_idx))) ) lsu_slice( `SCOPE_IO_BIND (block_idx) .clk (clk), diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index c02e99b29..57961a24b 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -92,7 +92,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( end VX_local_mem #( - .INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)), + .INSTANCE_ID(`SFORMATF(("%s-lmem", INSTANCE_ID))), .SIZE (1 << `LMEM_LOG_SIZE), .NUM_REQS (LSU_NUM_REQS), .NUM_BANKS (`LMEM_NUM_BANKS), @@ -131,7 +131,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_coalescers VX_mem_coalescer #( - .INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)), + .INSTANCE_ID (`SFORMATF(("%s-coalescer%0d", INSTANCE_ID, i))), .NUM_REQS (`NUM_LSU_LANES), .DATA_IN_SIZE (LSU_WORD_SIZE), .DATA_OUT_SIZE (DCACHE_WORD_SIZE), diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 5011ccb2c..800b6b63f 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -290,7 +290,7 @@ module VX_schedule import VX_gpu_pkg::*; #( // split/join handling VX_split_join #( - .INSTANCE_ID ($sformatf("%s-splitjoin", INSTANCE_ID)) + .INSTANCE_ID (`SFORMATF(("%s-splitjoin", INSTANCE_ID))) ) split_join ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_sfu_unit.sv b/hw/rtl/core/VX_sfu_unit.sv index 5af6211f6..dccfcfe46 100644 --- a/hw/rtl/core/VX_sfu_unit.sv +++ b/hw/rtl/core/VX_sfu_unit.sv @@ -99,7 +99,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( ); VX_wctl_unit #( - .INSTANCE_ID ($sformatf("%s-wctl", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-wctl", INSTANCE_ID))), .NUM_LANES (NUM_LANES) ) wctl_unit ( .clk (clk), @@ -110,7 +110,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( ); VX_csr_unit #( - .INSTANCE_ID ($sformatf("%s-csr", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-csr", INSTANCE_ID))), .CORE_ID (CORE_ID), .NUM_LANES (NUM_LANES) ) csr_unit ( diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index 523257eb4..f89b663e9 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -223,7 +223,7 @@ module VX_mem_scheduler #( if (COALESCE_ENABLE) begin : g_coalescer VX_mem_coalescer #( - .INSTANCE_ID ($sformatf("%s-coalescer", INSTANCE_ID)), + .INSTANCE_ID (`SFORMATF(("%s-coalescer", INSTANCE_ID))), .NUM_REQS (CORE_REQS), .DATA_IN_SIZE (WORD_SIZE), .DATA_OUT_SIZE (LINE_SIZE), From 24d018b4c9a273f18d18e071ee0f2cad803e886b Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Wed, 23 Oct 2024 05:18:53 -0400 Subject: [PATCH 400/488] documentation updates --- README.md | 2 +- docs/fpga_setup.md | 66 +++++++++++++++++++++++---------------- docs/index.md | 11 +++---- docs/microarchitecture.md | 5 ++- docs/simulation.md | 5 +-- docs/testing.md | 4 +-- 6 files changed, 54 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 97686c641..a7228e772 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Vortex GPGPU -Vortex is a full-stack open-source RISC-V GPGPU. Vortex supports multiple *backend drivers*, including our C++ simulator (simx), an RTL simulator, and physical Xilinx and Altera FPGAs-- all controlled by a single driver script. The chosen driver determines the corresponding code invoked to run Vortex. Generally, developers will prototype their intended design in simx, before completing going forward with an RTL implementation. Alternatively, you can get up and running by selecting a driver of your choice and running a demo program. +Vortex is a full-stack open-source RISC-V GPGPU. Vortex supports multiple **backend drivers**, including our C++ simulator (simx), an RTL simulator, and physical Xilinx and Altera FPGAs-- all controlled by a single driver script. The chosen driver determines the corresponding code invoked to run Vortex. Generally, developers will prototype their intended design in simx, before completing going forward with an RTL implementation. Alternatively, you can get up and running by selecting a driver of your choice and running a demo program. ## Website Vortex news can be found on its [website](https://vortex.cc.gatech.edu/) diff --git a/docs/fpga_setup.md b/docs/fpga_setup.md index 5b90df0b6..e7ab0ecbb 100644 --- a/docs/fpga_setup.md +++ b/docs/fpga_setup.md @@ -52,9 +52,9 @@ To request 16 cores and 64GB of RAM for 6 hours on flubber9, a fpga dev node: ```bash salloc -p rg-fpga --nodes=1 --ntasks-per-node=16 --mem=64G --nodelist flubber9 --time=06:00:00 ``` - -## Environment Setup -Once you are logged in, you will need to complete some first time configurations. +Synthesis for Xilinx Boards +---------------------- +Once you are logged in, you will need to complete some first time configurations. If you are interested in the Intel (Altera) synthesis steps, scroll down below. ### Source Configuration Scripts ``` @@ -89,7 +89,7 @@ The build is complete when the bitstream file `vortex_afu.xclbin` exists in ` ### Running a Program on Xilinx FPGA -The blackbox.sh script in `ci` can be used to run a test with Vortex’s xrt driver using the following command: +The [blackbox.sh](./simulation.md) script within the build directory can be used to run a test with Vortex’s xrt driver using the following command: `FPGA_BIN_DIR= TARGET=hw|hw_emu PLATFORM= ./ci/blackbox.sh --driver=xrt --app=` @@ -97,20 +97,12 @@ For example: ```FPGA_BIN_DIR= hw/syn/xilinx/xrt/build_4c_u280_xilinx_u280_gen3x16_xdma_1_202211_1_hw/bin TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202211_1 ./ci/blackbox.sh --driver=xrt --app=demo``` -### Synthesis for Intel (Altera) Boards - -To set up the environment, source the XRT setup.sh and other Xilinx scripts. For example: - -``` -source /opt/xilinx/xrt/setup.sh -source /tools/reconfig/xilinx/Vivado/2022.1/settings64.sh -source /tools/reconfig/xilinx/Vitis/2022.1/settings64.sh - -``` - -OPAE Environment Setup +Synthesis for Intel (Altera) Boards ---------------------- +### OPAE Environment Setup + + $ source /opt/inteldevstack/init_env_user.sh $ export OPAE_HOME=/opt/opae/1.1.2 $ export PATH=$OPAE_HOME/bin:$PATH @@ -118,8 +110,7 @@ OPAE Environment Setup $ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH $ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH -OPAE Build ------------------- +### OPAE Build The FPGA has to following configuration options: - DEVICE_FAMILY=arria10 | stratix10 @@ -134,8 +125,7 @@ A new folder (ex: `test1_xxx_4c`) will be created and the build will start and t Setting TARGET=ase will build the project for simulation using Intel ASE. -OPAE Build Configuration ------------------------- +### OPAE Build Configuration The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware parameters that can be modified when build the processor.For example, have the following parameters that can be configured: - `NUM_WARPS`: Number of warps per cores @@ -146,8 +136,7 @@ You configure the syntesis build from the command line: $ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make -OPAE Build Progress -------------------- +### OPAE Build Progress You could check the last 10 lines in the build log for possible errors until build completion. @@ -166,17 +155,40 @@ The file `vortex_afu.gbs` should exist when the build is done: $ ls -lsa /synth/vortex_afu.gbs -Signing the bitstream and Programming the FPGA ----------------------------------------------- +### Signing the bitstream and Programming the FPGA $ cd $ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs $ fpgasupdate vortex_afu_unsigned_ssl.gbs -FPGA sample test running OpenCL sgemm kernel --------------------------------------------- +### Sample FPGA Run Test +Ensure you have the correct opae runtime for the FPGA target -Run the following from the Vortex root directory +``` +$ TARGET=FPGA make -C runtime/opae +``` + +Run the [blackbox.sh](./simulation.md) from your Vortex build directory + +``` +$ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128" +``` + +### FPGA sample test running OpenCL sgemm kernel + +You can use the `blackbox.sh` script to run the following from your Vortex build directory $ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128" +### Testing Vortex using OPAE with Intel ASE Simulation +Building ASE synthesis + +```$ TARGET=asesim make -C runtime/opae``` + +Building ASE runtime + +```$ TARGET=asesim make -C runtime/opae``` + +Running ASE simulation + +```$ ASE_LOG=0 ASE_WORKDIR=/synth/work TARGET=asesim ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n16"``` diff --git a/docs/index.md b/docs/index.md index a53a2fd15..351e41fbb 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,9 +2,8 @@ ## Table of Contents -- [Codebase Layout](codebase.md) -- [Microarchitecture](microarchitecture.md) -- [Cache Subsystem](cache_subsystem.md) -- [Simulation](simulation.md) -- [Contributing](contributing.md) -- [Debugging](debugging.md) +- [Codebase Layout](codebase.md): Summary of repo file tree +- [Microarchitecture](microarchitecture.md): Vortex Pipeline and cache microarchitectural details and reconfigurability +- [Simulation](simulation.md): Details for building and running each simulation driver +- [Contributing](contributing.md): Process for contributing your own features including repo semantics and testing +- [Debugging](debugging.md): Debugging configurations for each Vortex driver diff --git a/docs/microarchitecture.md b/docs/microarchitecture.md index 3459abcc4..85fa52fd5 100644 --- a/docs/microarchitecture.md +++ b/docs/microarchitecture.md @@ -77,4 +77,7 @@ Vortex has a 6-stage pipeline: - Sockets - Grouping multiple cores sharing L1 cache - Clusters - - Grouping of sockets sharing L2 cache \ No newline at end of file + - Grouping of sockets sharing L2 cache + +### Vortex Cache Subsystem +More details about the cache subsystem are provided [here](./cache_subsystem.md). \ No newline at end of file diff --git a/docs/simulation.md b/docs/simulation.md index d55b3cd94..4201a64d4 100644 --- a/docs/simulation.md +++ b/docs/simulation.md @@ -15,7 +15,7 @@ SimX is a C++ cycle-level in-house simulator developed for Vortex. The relevant The guide to build the fpga with specific configurations is located [here.](fpga_setup.md) You can find instructions for both Xilinx and Altera based FPGAs. -### How to Test +### How to Test (using `blackbox.sh`) Running tests under specific drivers (rtlsim,simx,fpga) is done using the script named `blackbox.sh` located in the `ci` folder. Running command `./ci/blackbox.sh --help` from the Vortex root directory will display the following command line arguments for `blackbox.sh`: @@ -54,7 +54,8 @@ PERF: instrs=363180, cycles=53108, IPC=6.838518 ## Additional Quick Start Scenarios -Running Vortex simulators with different configurations: +Running Vortex simulators with different configurations and drivers is supported. For example: + - Run basic driver test with rtlsim driver and Vortex config of 2 clusters, 2 cores, 2 warps, 4 threads $ ./ci/blackbox.sh --driver=rtlsim --clusters=2 --cores=2 --warps=2 --threads=4 --app=basic diff --git a/docs/testing.md b/docs/testing.md index 0ec46bda9..739193ce3 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -2,7 +2,7 @@ ## Running a Vortex application -The framework provides a utility script: blackbox.sh under the /ci/ folder for executing applications in the tests tree. +The framework provides a utility script: blackbox.sh under the /ci/ folder for executing applications in the tests tree. It gets copied into the `build` directory with all the environment variables resolved, so you should run it from the `build` directory as follows: You can query the commandline options of the tool using: $ ./ci/blackbox.sh --help @@ -49,4 +49,4 @@ Compile your test: `$ make -C tests/regression/` Run your test: `$ ./ci/blackbox.sh --driver=simx --app= --debug` ## Adding Your Tests to the CI Pipeline -If you are a contributor, then you will need to add tests that integrate into the continuous integration pipeline. Remember, Pull Requests cannot be merged unless new code has tests and existing tests do not regress. See more at [contributing.md](contributing.md) and [continuous_integration.md](continuous_integration.md). \ No newline at end of file +If you are a contributor, then you will need to add tests that integrate into the continuous integration pipeline. Remember, Pull Requests cannot be merged unless new code has tests and existing tests do not regress. Furthermore, if you are contributing a new feature, it is recommended that you add the ability to enable / disable the new feature that you are adding. See more at [contributing.md](contributing.md) and [continuous_integration.md](continuous_integration.md). \ No newline at end of file From 1c384c096d66a5003faef66c3d701ab1e865c69e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 23 Oct 2024 12:27:44 -0700 Subject: [PATCH 401/488] minor update --- hw/rtl/core/VX_mem_unit.sv | 2 +- hw/rtl/libs/VX_mem_coalescer.sv | 1 + hw/rtl/libs/VX_mem_scheduler.sv | 173 +++++++++++++++++--------------- 3 files changed, 95 insertions(+), 81 deletions(-) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 57961a24b..931ad65cd 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -127,7 +127,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .TAG_WIDTH (DCACHE_TAG_WIDTH) ) dcache_coalesced_if[`NUM_LSU_BLOCKS](); - if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : g_enabled + if ((`NUM_LSU_LANES > 1) && (LSU_WORD_SIZE != DCACHE_WORD_SIZE)) begin : g_enabled for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_coalescers VX_mem_coalescer #( diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index 19a704095..1a7030b86 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -74,6 +74,7 @@ module VX_mem_coalescer #( output wire out_rsp_ready ); `UNUSED_SPARAM (INSTANCE_ID) + `STATIC_ASSERT ((NUM_REQS > 1), ("invalid parameter")) `STATIC_ASSERT (`IS_DIVISBLE(NUM_REQS * DATA_IN_WIDTH, DATA_OUT_WIDTH), ("invalid parameter")) `STATIC_ASSERT ((NUM_REQS * DATA_IN_WIDTH >= DATA_OUT_WIDTH), ("invalid parameter")) `RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("%t: invalid request mask", $time)) diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index f89b663e9..f77854ec1 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -32,7 +32,7 @@ module VX_mem_scheduler #( parameter WORD_WIDTH = WORD_SIZE * 8, parameter LINE_WIDTH = LINE_SIZE * 8, - parameter COALESCE_ENABLE = (LINE_SIZE != WORD_SIZE), + parameter COALESCE_ENABLE = (CORE_REQS > 1) && (LINE_SIZE != WORD_SIZE), parameter PER_LINE_REQS = LINE_SIZE / WORD_SIZE, parameter MERGED_REQS = CORE_REQS / PER_LINE_REQS, parameter MEM_BATCHES = `CDIV(MERGED_REQS, MEM_CHANNELS), @@ -94,6 +94,7 @@ module VX_mem_scheduler #( localparam CORE_BATCHES = COALESCE_ENABLE ? 1 : MEM_BATCHES; localparam CORE_BATCH_BITS = `CLOG2(CORE_BATCHES); + `STATIC_ASSERT ((MEM_CHANNELS <= CORE_REQS), ("invalid parameter")) `STATIC_ASSERT (`IS_DIVISBLE(CORE_REQS * WORD_SIZE, LINE_SIZE), ("invalid parameter")) `STATIC_ASSERT ((TAG_WIDTH >= UUID_WIDTH), ("invalid parameter")) `RUNTIME_ASSERT((~core_req_valid || core_req_mask != 0), ("%t: invalid request mask", $time)) @@ -411,99 +412,113 @@ module VX_mem_scheduler #( // Handle memory responses //////////////////////////////////////////////// - reg [CORE_QUEUE_SIZE-1:0][CORE_REQS-1:0] rsp_rem_mask; - wire [CORE_REQS-1:0] rsp_rem_mask_n, curr_mask; - wire [BATCH_SEL_WIDTH-1:0] rsp_batch_idx; - - if (CORE_BATCHES > 1) begin : g_rsp_batch_idx - assign rsp_batch_idx = mem_rsp_tag_s[CORE_BATCH_BITS-1:0]; - end else begin : g_rsp_batch_idx_0 - assign rsp_batch_idx = '0; - end - - for (genvar r = 0; r < CORE_REQS; ++r) begin : g_curr_mask - localparam i = r / CORE_CHANNELS; - localparam j = r % CORE_CHANNELS; - assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j]; - end - - assign rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~curr_mask; - - wire rsp_complete = ~(| rsp_rem_mask_n); - - wire mem_rsp_fire_s = mem_rsp_valid_s && mem_rsp_ready_s; - - always @(posedge clk) begin - if (ibuf_push) begin - rsp_rem_mask[ibuf_waddr] <= core_req_mask; - end - if (mem_rsp_fire_s) begin - rsp_rem_mask[ibuf_raddr] <= rsp_rem_mask_n; - end - end - - if (RSP_PARTIAL != 0 || CORE_REQS == 1) begin : g_rsp_partial - - reg [CORE_QUEUE_SIZE-1:0] rsp_sop_r; - - always @(posedge clk) begin - if (ibuf_push) begin - rsp_sop_r[ibuf_waddr] <= 1; - end - if (mem_rsp_fire_s) begin - rsp_sop_r[ibuf_raddr] <= 0; - end - end + if (CORE_REQS == 1) begin : g_rsp_1 assign crsp_valid = mem_rsp_valid_s; - assign crsp_mask = curr_mask; - assign crsp_sop = rsp_sop_r[ibuf_raddr]; - - for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data - localparam j = r % CORE_CHANNELS; - assign crsp_data[r] = mem_rsp_data_s[j]; - end + assign crsp_mask = mem_rsp_mask_s; + assign crsp_sop = 1'b1; + assign crsp_eop = 1'b1; + assign crsp_data = mem_rsp_data_s; assign mem_rsp_ready_s = crsp_ready; - end else begin : g_rsp_full + end else begin : g_rsp_N - wire [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store_n; - reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; + reg [CORE_QUEUE_SIZE-1:0][CORE_REQS-1:0] rsp_rem_mask; + wire [CORE_REQS-1:0] rsp_rem_mask_n, curr_mask; + wire [BATCH_SEL_WIDTH-1:0] rsp_batch_idx; - for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store - for (genvar j = 0; j < CORE_BATCHES; ++j) begin : g_j - reg [WORD_WIDTH-1:0] rsp_store [0:CORE_QUEUE_SIZE-1]; - wire rsp_wren = mem_rsp_fire_s - && (BATCH_SEL_WIDTH'(j) == rsp_batch_idx) - && ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]); - always @(posedge clk) begin - if (rsp_wren) begin - rsp_store[ibuf_raddr] <= mem_rsp_data_s[i]; - end - end - assign rsp_store_n[i][j] = rsp_wren ? mem_rsp_data_s[i] : rsp_store[ibuf_raddr]; - end + if (CORE_BATCHES > 1) begin : g_rsp_batch_idx + assign rsp_batch_idx = mem_rsp_tag_s[CORE_BATCH_BITS-1:0]; + end else begin : g_rsp_batch_idx_0 + assign rsp_batch_idx = '0; end + for (genvar r = 0; r < CORE_REQS; ++r) begin : g_curr_mask + localparam i = r / CORE_CHANNELS; + localparam j = r % CORE_CHANNELS; + assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j]; + end + + assign rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~curr_mask; + + wire mem_rsp_fire_s = mem_rsp_valid_s && mem_rsp_ready_s; + always @(posedge clk) begin if (ibuf_push) begin - rsp_orig_mask[ibuf_waddr] <= core_req_mask; + rsp_rem_mask[ibuf_waddr] <= core_req_mask; + end + if (mem_rsp_fire_s) begin + rsp_rem_mask[ibuf_raddr] <= rsp_rem_mask_n; end end - assign crsp_valid = mem_rsp_valid_s && rsp_complete; - assign crsp_mask = rsp_orig_mask[ibuf_raddr]; - assign crsp_sop = 1'b1; + wire rsp_complete = ~(| rsp_rem_mask_n) || (CORE_REQS == 1); - for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data - localparam i = r / CORE_CHANNELS; - localparam j = r % CORE_CHANNELS; - assign crsp_data[r] = rsp_store_n[j][i]; + if (RSP_PARTIAL != 0) begin : g_rsp_partial + + reg [CORE_QUEUE_SIZE-1:0] rsp_sop_r; + + always @(posedge clk) begin + if (ibuf_push) begin + rsp_sop_r[ibuf_waddr] <= 1; + end + if (mem_rsp_fire_s) begin + rsp_sop_r[ibuf_raddr] <= 0; + end + end + + assign crsp_valid = mem_rsp_valid_s; + assign crsp_mask = curr_mask; + assign crsp_sop = rsp_sop_r[ibuf_raddr]; + + for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data + localparam j = r % CORE_CHANNELS; + assign crsp_data[r] = mem_rsp_data_s[j]; + end + + assign mem_rsp_ready_s = crsp_ready; + + end else begin : g_rsp_full + + wire [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store_n; + reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; + + for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store + for (genvar j = 0; j < CORE_BATCHES; ++j) begin : g_j + reg [WORD_WIDTH-1:0] rsp_store [0:CORE_QUEUE_SIZE-1]; + wire rsp_wren = mem_rsp_fire_s + && (BATCH_SEL_WIDTH'(j) == rsp_batch_idx) + && ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]); + always @(posedge clk) begin + if (rsp_wren) begin + rsp_store[ibuf_raddr] <= mem_rsp_data_s[i]; + end + end + assign rsp_store_n[i][j] = rsp_wren ? mem_rsp_data_s[i] : rsp_store[ibuf_raddr]; + end + end + + always @(posedge clk) begin + if (ibuf_push) begin + rsp_orig_mask[ibuf_waddr] <= core_req_mask; + end + end + + assign crsp_valid = mem_rsp_valid_s && rsp_complete; + assign crsp_mask = rsp_orig_mask[ibuf_raddr]; + assign crsp_sop = 1'b1; + + for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data + localparam i = r / CORE_CHANNELS; + localparam j = r % CORE_CHANNELS; + assign crsp_data[r] = rsp_store_n[j][i]; + end + + assign mem_rsp_ready_s = crsp_ready || ~rsp_complete; end - assign mem_rsp_ready_s = crsp_ready || ~rsp_complete; - + assign crsp_eop = rsp_complete; end if (UUID_WIDTH != 0) begin : g_crsp_tag @@ -512,8 +527,6 @@ module VX_mem_scheduler #( assign crsp_tag = ibuf_dout; end - assign crsp_eop = rsp_complete; - // Send response to caller VX_elastic_buffer #( @@ -525,7 +538,7 @@ module VX_mem_scheduler #( .reset (reset), .valid_in (crsp_valid), .ready_in (crsp_ready), - .data_in ({crsp_mask, crsp_sop, crsp_eop, crsp_data, crsp_tag}), + .data_in ({crsp_mask, crsp_sop, crsp_eop, crsp_data, crsp_tag}), .data_out ({core_rsp_mask, core_rsp_sop, core_rsp_eop, core_rsp_data, core_rsp_tag}), .valid_out (core_rsp_valid), .ready_out (core_rsp_ready) From 7ab58111d8b4dc61e47379d53cee2b87519046c2 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 23 Oct 2024 12:30:39 -0700 Subject: [PATCH 402/488] minor update --- hw/rtl/VX_platform.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 8c4effaf4..2e05ab44b 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -141,7 +141,7 @@ endgenerate `define DEBUG_BLOCK(x) `define TRACE(level, args) -`define SFORMATF(x) +`define SFORMATF(x) "" `define TRACING_ON `define TRACING_OFF From e7d09feb4a851ff336f38cd801615e192031e00a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 23 Oct 2024 13:06:45 -0700 Subject: [PATCH 403/488] decode => demux --- hw/rtl/cache/VX_cache_bank.sv | 4 ++-- hw/rtl/libs/VX_cyclic_arbiter.sv | 2 +- hw/rtl/libs/{VX_decoder.sv => VX_demux.sv} | 2 +- hw/rtl/libs/VX_mem_adapter.sv | 8 ++++---- hw/rtl/libs/VX_rr_arbiter.sv | 4 ++-- hw/rtl/libs/VX_stream_xbar.sv | 8 ++++---- 6 files changed, 14 insertions(+), 14 deletions(-) rename hw/rtl/libs/{VX_decoder.sv => VX_demux.sv} (98%) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index d3218c54c..2d6dd6a5b 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -614,10 +614,10 @@ module VX_cache_bank #( `UNUSED_VAR (byteen_st1) end else begin : g_wt wire [LINE_SIZE-1:0] line_byteen; - VX_decoder #( + VX_demux #( .N (`CS_WORD_SEL_BITS), .M (WORD_SIZE) - ) byteen_dec ( + ) byteen_demux ( .sel_in (word_idx_st1), .data_in (byteen_st1), .data_out (line_byteen) diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index 2899b55fd..9c28fcc4a 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -65,7 +65,7 @@ module VX_cyclic_arbiter #( .valid_out (grant_valid) ); - VX_decoder #( + VX_demux #( .N (LOG_NUM_REQS), .D (NUM_REQS) ) grant_decoder ( diff --git a/hw/rtl/libs/VX_decoder.sv b/hw/rtl/libs/VX_demux.sv similarity index 98% rename from hw/rtl/libs/VX_decoder.sv rename to hw/rtl/libs/VX_demux.sv index ce2c509e6..b76ab42aa 100644 --- a/hw/rtl/libs/VX_decoder.sv +++ b/hw/rtl/libs/VX_demux.sv @@ -17,7 +17,7 @@ // Adapted from BaseJump STL: http://bjump.org/data_out.html `TRACING_OFF -module VX_decoder #( +module VX_demux #( parameter N = 0, parameter M = 1, parameter MODEL = 0, diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 2cae6fead..d5efc7d6e 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -100,19 +100,19 @@ module VX_mem_adapter #( assign mem_req_addr_out_w = mem_req_addr_in_qual; end - VX_decoder #( + VX_demux #( .N (D), .M (SRC_DATA_WIDTH/8) - ) req_be_dec ( + ) req_be_demux ( .sel_in (req_idx), .data_in (mem_req_byteen_in), .data_out (mem_req_byteen_out_w) ); - VX_decoder #( + VX_demux #( .N (D), .M (SRC_DATA_WIDTH) - ) req_data_dec ( + ) req_data_demux ( .sel_in (req_idx), .data_in (mem_req_data_in), .data_out (mem_req_data_out_w) diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index f5304b023..1d3b479bf 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -480,12 +480,12 @@ module VX_rr_arbiter #( end end - VX_decoder #( + VX_demux #( .N (LOG_NUM_REQS), .D (NUM_REQS) ) grant_decoder ( .sel_in (grant_index), - .data_in (grant_valid), + .data_in (1'b1), .data_out (grant_onehot) ); diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index 0c4eff2f1..68a31c4fc 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -64,10 +64,10 @@ module VX_stream_xbar #( ); for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_sel_in_decoders - VX_decoder #( + VX_demux #( .N (OUT_WIDTH), .D (NUM_OUTPUTS) - ) sel_in_decoder ( + ) sel_in_demux ( .sel_in (sel_in[i]), .data_in (valid_in[i]), .data_out (per_output_valid_in[i]) @@ -137,10 +137,10 @@ module VX_stream_xbar #( wire [NUM_OUTPUTS-1:0] valid_out_w, ready_out_w; wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w; - VX_decoder #( + VX_demux #( .N (OUT_WIDTH), .D (NUM_OUTPUTS) - ) sel_in_decoder ( + ) sel_in_demux ( .sel_in (sel_in[0]), .data_in (valid_in[0]), .data_out (valid_out_w) From ec12b500074ae7d325064f12d7e6969dc981c496 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 23 Oct 2024 13:09:34 -0700 Subject: [PATCH 404/488] minor udpate --- hw/rtl/libs/VX_mem_scheduler.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index f77854ec1..f162a370e 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -637,7 +637,7 @@ module VX_mem_scheduler #( end `TRACE(2, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid)) end - if (mem_rsp_fire_s) begin + if (mem_rsp_valid_s && mem_rsp_ready_s) begin `TRACE(2, ("%t: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)) `TRACE_ARRAY1D(2, "0x%0h", mem_rsp_data_s, CORE_CHANNELS) `TRACE(2, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)) From cc5ac8388b1de44f43c89a8e731795aac8458dbc Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 23 Oct 2024 14:03:19 -0700 Subject: [PATCH 405/488] minor update --- hw/rtl/libs/VX_mem_scheduler.sv | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index f162a370e..65a057b80 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -412,7 +412,15 @@ module VX_mem_scheduler #( // Handle memory responses //////////////////////////////////////////////// + wire [BATCH_SEL_WIDTH-1:0] rsp_batch_idx; + if (CORE_BATCHES > 1) begin : g_rsp_batch_idx + assign rsp_batch_idx = mem_rsp_tag_s[CORE_BATCH_BITS-1:0]; + end else begin : g_rsp_batch_idx_0 + assign rsp_batch_idx = '0; + end + if (CORE_REQS == 1) begin : g_rsp_1 + `UNUSED_VAR (rsp_batch_idx) assign crsp_valid = mem_rsp_valid_s; assign crsp_mask = mem_rsp_mask_s; @@ -426,13 +434,6 @@ module VX_mem_scheduler #( reg [CORE_QUEUE_SIZE-1:0][CORE_REQS-1:0] rsp_rem_mask; wire [CORE_REQS-1:0] rsp_rem_mask_n, curr_mask; - wire [BATCH_SEL_WIDTH-1:0] rsp_batch_idx; - - if (CORE_BATCHES > 1) begin : g_rsp_batch_idx - assign rsp_batch_idx = mem_rsp_tag_s[CORE_BATCH_BITS-1:0]; - end else begin : g_rsp_batch_idx_0 - assign rsp_batch_idx = '0; - end for (genvar r = 0; r < CORE_REQS; ++r) begin : g_curr_mask localparam i = r / CORE_CHANNELS; From 22ade31fd53708e1a97dd3ce9054c581244b4075 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Wed, 23 Oct 2024 15:55:11 -0700 Subject: [PATCH 406/488] minor updates --- hw/rtl/VX_platform.vh | 17 ----------------- hw/rtl/afu/xrt/VX_afu_wrap.sv | 3 +++ hw/rtl/core/VX_fetch.sv | 3 +++ hw/rtl/core/VX_issue_slice.sv | 3 +++ hw/rtl/core/VX_lsu_slice.sv | 3 +++ 5 files changed, 12 insertions(+), 17 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 2e05ab44b..6e0b755e2 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -44,11 +44,6 @@ endgenerate end \ end -`define __SCOPE -`define __SCOPE_X -`define __SCOPE_ON -`define __SCOPE_OFF - `ifndef TRACING_ALL `define TRACING_ON /* verilator tracing_on */ `define TRACING_OFF /* verilator tracing_off */ @@ -158,18 +153,6 @@ endgenerate `define UNUSED_PIN(x) . x () `define UNUSED_ARG(x) x -`define __SCOPE (* mark_debug="true" *) - -`define __SCOPE_X - -`define __SCOPE_ON \ - `undef __SCOPE_X \ - `define __SCOPE_X `__SCOPE - -`define __SCOPE_OFF \ - `undef __SCOPE_X \ - `define __SCOPE_X - `endif /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index 2b1bfb7c2..7d13344a4 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -373,7 +373,9 @@ module VX_afu_wrap #( `SCOPE_IO_UNUSED(0) `endif `endif + `ifdef CHIPSCOPE +`ifdef DBG_SCOPE_AFU ila_afu ila_afu_inst ( .clk (clk), .probe0 ({ @@ -394,6 +396,7 @@ module VX_afu_wrap #( }) ); `endif +`endif `ifdef SIMULATION `ifndef VERILATOR diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 6a35602e8..807548614 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -166,7 +166,9 @@ module VX_fetch import VX_gpu_pkg::*; #( `SCOPE_IO_UNUSED(0) `endif `endif + `ifdef CHIPSCOPE +`ifdef DBG_SCOPE_FETCH ila_fetch ila_fetch_inst ( .clk (clk), .probe0 ({schedule_if.valid, schedule_if.data, schedule_if.ready}), @@ -174,6 +176,7 @@ module VX_fetch import VX_gpu_pkg::*; #( .probe2 ({icache_bus_if.rsp_valid, icache_bus_if.rsp_data, icache_bus_if.rsp_ready}) ); `endif +`endif `ifdef DBG_TRACE_MEM always @(posedge clk) begin diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index d72937251..5af5f0ef0 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -143,7 +143,9 @@ module VX_issue_slice import VX_gpu_pkg::*; #( `SCOPE_IO_UNUSED(0) `endif `endif + `ifdef CHIPSCOPE +`ifdef DBG_SCOPE_ISSUE ila_issue ila_issue_inst ( .clk (clk), .probe0 ({decode_if.valid, decode_if.data, decode_if.ready}), @@ -152,6 +154,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #( .probe3 ({writeback_if.valid, writeback_if.data}) ); `endif +`endif `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 0018db08d..333cbfa54 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -561,7 +561,9 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( `SCOPE_IO_UNUSED(0) `endif `endif + `ifdef CHIPSCOPE +`ifdef DBG_SCOPE_LSU ila_lsu ila_lsu_inst ( .clk (clk), .probe0 ({execute_if.valid, execute_if.data, execute_if.ready}), @@ -569,5 +571,6 @@ module VX_lsu_slice import VX_gpu_pkg::*; #( .probe2 ({lsu_mem_if.rsp_valid, lsu_mem_if.rsp_data, lsu_mem_if.rsp_ready}) ); `endif +`endif endmodule From 8b172d07ec6d96232fff7820bd4db7366279ef66 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 24 Oct 2024 01:44:55 -0700 Subject: [PATCH 407/488] revert xilinx's asynchronous bram workaround --- hw/rtl/cache/VX_cache_bank.sv | 6 +--- hw/rtl/cache/VX_cache_data.sv | 9 ++++-- hw/rtl/cache/VX_cache_mshr.sv | 4 +-- hw/rtl/cache/VX_cache_repl.sv | 30 ++++++++----------- hw/rtl/cache/VX_cache_tags.sv | 12 +++----- hw/rtl/core/VX_fetch.sv | 2 +- hw/rtl/core/VX_ipdom_stack.sv | 3 +- hw/rtl/libs/VX_dp_ram.sv | 2 +- hw/rtl/libs/VX_fifo_queue.sv | 11 ++----- hw/rtl/libs/VX_index_buffer.sv | 3 +- hw/rtl/libs/VX_scope_tap.sv | 6 ++-- hw/rtl/libs/VX_sp_ram.sv | 53 +++++++++++++++++++++++++++------- hw/rtl/mem/VX_local_mem.sv | 3 +- 13 files changed, 82 insertions(+), 62 deletions(-) diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index 2d6dd6a5b..fdee28bf1 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -154,7 +154,7 @@ module VX_cache_bank #( wire [`CS_WAY_SEL_WIDTH-1:0] way_idx_st0, way_idx_st1; wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1; - wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel, line_idx_st0, line_idx_st1; + wire [`CS_LINE_SEL_BITS-1:0] line_idx_st0, line_idx_st1; wire [`CS_TAG_SEL_BITS-1:0] line_tag_st0, line_tag_st1; wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0, evict_tag_st1; wire rw_sel, rw_st0, rw_st1; @@ -332,7 +332,6 @@ module VX_cache_bank #( wire do_read_st1 = valid_st1 && is_read_st1; wire do_write_st1 = valid_st1 && is_write_st1; - assign line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0]; assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0]; assign line_tag_st0 = `CS_LINE_ADDR_TAG(addr_st0); @@ -358,7 +357,6 @@ module VX_cache_bank #( .hit_line (line_idx_st1), .hit_way (way_idx_st1), .repl_valid (do_fill_st0 && ~pipe_stall), - .repl_line_n(line_idx_sel), .repl_line (line_idx_st0), .repl_way (victim_way_st0) ); @@ -375,14 +373,12 @@ module VX_cache_bank #( ) cache_tags ( .clk (clk), .reset (reset), - .stall (pipe_stall), // inputs .init (do_init_st0), .flush (do_flush_st0 && ~pipe_stall), .fill (do_fill_st0 && ~pipe_stall), .read (do_read_st0 && ~pipe_stall), .write (do_write_st0 && ~pipe_stall), - .line_idx_n (line_idx_sel), .line_idx (line_idx_st0), .line_tag (line_tag_st0), .evict_way (evict_way_st0), diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 03e2629c6..ddc40b1bd 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -82,7 +82,8 @@ module VX_cache_data #( .DATAW (LINE_SIZE * NUM_WAYS), .WRENW (LINE_SIZE * NUM_WAYS), .SIZE (`CS_LINES_PER_BANK), - .OUT_REG (1) + .OUT_REG (1), + .RDW_MODE ("R") ) byteen_store ( .clk (clk), .reset (reset), @@ -129,7 +130,8 @@ module VX_cache_data #( .DATAW (NUM_WAYS * `CS_LINE_WIDTH), .SIZE (`CS_LINES_PER_BANK), .WRENW (NUM_WAYS * LINE_SIZE), - .OUT_REG (1) + .OUT_REG (1), + .RDW_MODE ("R") ) data_store ( .clk (clk), .reset (reset), @@ -153,7 +155,8 @@ module VX_cache_data #( VX_sp_ram #( .DATAW (`CS_LINE_WIDTH), .SIZE (`CS_LINES_PER_BANK), - .OUT_REG (1) + .OUT_REG (1), + .RDW_MODE ("R") ) data_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index 17546ba2a..78557e1ce 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -221,7 +221,7 @@ module VX_cache_mshr #( VX_dp_ram #( .DATAW (DATA_WIDTH), .SIZE (MSHR_SIZE), - .OUT_REG (1) + .RDW_MODE ("R") ) mshr_store ( .clk (clk), .reset (reset), @@ -230,7 +230,7 @@ module VX_cache_mshr #( .wren (1'b1), .waddr (allocate_id_r), .wdata (allocate_data), - .raddr (dequeue_id_n), + .raddr (dequeue_id_r), .rdata (dequeue_data) ); diff --git a/hw/rtl/cache/VX_cache_repl.sv b/hw/rtl/cache/VX_cache_repl.sv index 909123046..578c87002 100644 --- a/hw/rtl/cache/VX_cache_repl.sv +++ b/hw/rtl/cache/VX_cache_repl.sv @@ -99,7 +99,6 @@ module VX_cache_repl #( input wire [`CS_LINE_SEL_BITS-1:0] hit_line, input wire [`CS_WAY_SEL_WIDTH-1:0] hit_way, input wire repl_valid, - input wire [`CS_LINE_SEL_BITS-1:0] repl_line_n, input wire [`CS_LINE_SEL_BITS-1:0] repl_line, output wire [`CS_WAY_SEL_WIDTH-1:0] repl_way ); @@ -110,26 +109,24 @@ module VX_cache_repl #( if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru // Pseudo Least Recently Used replacement policy localparam LRU_WIDTH = `UP(NUM_WAYS-1); - `UNUSED_VAR (repl_valid) - `UNUSED_VAR (repl_line) wire [LRU_WIDTH-1:0] plru_rdata; wire [LRU_WIDTH-1:0] plru_wdata; wire [LRU_WIDTH-1:0] plru_wmask; VX_dp_ram #( - .DATAW (LRU_WIDTH), - .SIZE (`CS_LINES_PER_BANK), - .WRENW (LRU_WIDTH), - .OUT_REG (1) + .DATAW (LRU_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .WRENW (LRU_WIDTH), + .RDW_MODE ("R") ) plru_store ( .clk (clk), .reset (reset), - .read (~stall), + .read (repl_valid), .write (hit_valid), .wren (plru_wmask), .waddr (hit_line), - .raddr (repl_line_n), + .raddr (repl_line), .wdata (plru_wdata), .rdata (plru_rdata) ); @@ -158,18 +155,17 @@ module VX_cache_repl #( wire [WAY_SEL_WIDTH-1:0] ctr_rdata; wire [WAY_SEL_WIDTH-1:0] ctr_wdata = ctr_rdata + 1; - VX_dp_ram #( - .DATAW (WAY_SEL_WIDTH), - .SIZE (`CS_LINES_PER_BANK), - .OUT_REG (1) + VX_sp_ram #( + .DATAW (WAY_SEL_WIDTH), + .SIZE (`CS_LINES_PER_BANK), + .RDW_MODE ("R") ) ctr_store ( .clk (clk), .reset (reset), - .read (~stall), + .read (repl_valid), .write (repl_valid), .wren (1'b1), - .raddr (repl_line_n), - .waddr (repl_line), + .addr (repl_line), .wdata (ctr_wdata), .rdata (ctr_rdata) ); @@ -182,7 +178,6 @@ module VX_cache_repl #( `UNUSED_VAR (hit_way) `UNUSED_VAR (repl_valid) `UNUSED_VAR (repl_line) - `UNUSED_VAR (repl_line_n) reg [WAY_SEL_WIDTH-1:0] victim_idx; always @(posedge clk) begin if (reset) begin @@ -201,7 +196,6 @@ module VX_cache_repl #( `UNUSED_VAR (hit_way) `UNUSED_VAR (repl_valid) `UNUSED_VAR (repl_line) - `UNUSED_VAR (repl_line_n) assign repl_way = 1'b0; end diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 970d54d91..e086ea94f 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -29,7 +29,6 @@ module VX_cache_tags #( ) ( input wire clk, input wire reset, - input wire stall, // inputs input wire init, @@ -37,7 +36,6 @@ module VX_cache_tags #( input wire fill, input wire read, input wire write, - input wire [`CS_LINE_SEL_BITS-1:0] line_idx_n, input wire [`CS_LINE_SEL_BITS-1:0] line_idx, input wire [`CS_TAG_SEL_BITS-1:0] line_tag, input wire [`CS_WAY_SEL_WIDTH-1:0] evict_way, @@ -71,7 +69,7 @@ module VX_cache_tags #( wire do_flush = flush && (!WRITEBACK || way_en); // flush the whole line in writethrough mode wire do_write = WRITEBACK && write && tag_matches[i]; // only write on tag hit - //wire line_read = read || write || (WRITEBACK && (fill || flush)); + wire line_read = read || write || (WRITEBACK && (fill || flush)); wire line_write = do_init || do_fill || do_flush || do_write; wire line_valid = fill || write; @@ -87,19 +85,17 @@ module VX_cache_tags #( assign read_dirty[i] = 1'b0; end - VX_dp_ram #( + VX_sp_ram #( .DATAW (TAG_WIDTH), .SIZE (`CS_LINES_PER_BANK), - .OUT_REG (1), .RDW_MODE ("W") ) tag_store ( .clk (clk), .reset (reset), - .read (~stall), + .read (line_read), .write (line_write), .wren (1'b1), - .waddr (line_idx), - .raddr (line_idx_n), + .addr (line_idx), .wdata (line_wdata), .rdata (line_rdata) ); diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 807548614..802effe07 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -53,7 +53,7 @@ module VX_fetch import VX_gpu_pkg::*; #( VX_dp_ram #( .DATAW (`PC_BITS + `NUM_THREADS), .SIZE (`NUM_WARPS), - .OUT_REG (0) + .RDW_MODE ("R") ) tag_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_ipdom_stack.sv b/hw/rtl/core/VX_ipdom_stack.sv index d5d000132..6bec14504 100644 --- a/hw/rtl/core/VX_ipdom_stack.sv +++ b/hw/rtl/core/VX_ipdom_stack.sv @@ -75,7 +75,8 @@ module VX_ipdom_stack #( VX_dp_ram #( .DATAW (1 + WIDTH * 2), .SIZE (DEPTH), - .OUT_REG (1) + .OUT_REG (1), + .RDW_MODE ("R") ) ipdom_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index b778ce88e..0ce68ea1a 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -20,7 +20,7 @@ module VX_dp_ram #( parameter WRENW = 1, parameter OUT_REG = 0, parameter LUTRAM = 0, - parameter `STRING RDW_MODE = "R", // R: read-first, W: write-first, U: undefined + parameter `STRING RDW_MODE = "W", // W: write-first, R: read-first, U: undefined parameter RDW_ASSERT = 0, parameter RESET_RAM = 0, parameter INIT_ENABLE = 0, diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 6de6ddc24..d53903bfd 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -77,20 +77,16 @@ module VX_fifo_queue #( localparam ADDRW = `CLOG2(DEPTH); wire [DATAW-1:0] data_out_w; - reg [ADDRW-1:0] rd_ptr_r, rd_ptr_n; + reg [ADDRW-1:0] rd_ptr_r; reg [ADDRW-1:0] wr_ptr_r; - always @(*) begin - rd_ptr_n = rd_ptr_r + ADDRW'(pop); - end - always @(posedge clk) begin if (reset) begin wr_ptr_r <= '0; rd_ptr_r <= (OUT_REG != 0) ? 1 : 0; end else begin wr_ptr_r <= wr_ptr_r + ADDRW'(push); - rd_ptr_r <= rd_ptr_n; + rd_ptr_r <= rd_ptr_r + ADDRW'(pop); end end @@ -100,7 +96,6 @@ module VX_fifo_queue #( VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .OUT_REG (1), .LUTRAM (LUTRAM), .RDW_MODE ("W") ) dp_ram ( @@ -109,9 +104,9 @@ module VX_fifo_queue #( .read (~bypass), .write (push), .wren (1'b1), + .raddr (rd_ptr_r), .waddr (wr_ptr_r), .wdata (data_in), - .raddr (rd_ptr_n), .rdata (data_out_w) ); diff --git a/hw/rtl/libs/VX_index_buffer.sv b/hw/rtl/libs/VX_index_buffer.sv index 422c317e1..4a1e05845 100644 --- a/hw/rtl/libs/VX_index_buffer.sv +++ b/hw/rtl/libs/VX_index_buffer.sv @@ -50,8 +50,7 @@ module VX_index_buffer #( VX_dp_ram #( .DATAW (DATAW), .SIZE (SIZE), - .OUT_REG (0), - .RDW_MODE("W") + .RDW_MODE ("R") ) data_table ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index 78e85e16f..6c0914b0c 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -112,7 +112,8 @@ module VX_scope_tap #( VX_dp_ram #( .DATAW (IDLE_CTRW), .SIZE (DEPTH), - .OUT_REG (1) + .OUT_REG (1), + .RDW_MODE ("R") ) delta_store ( .clk (clk), .reset (reset), @@ -133,7 +134,8 @@ module VX_scope_tap #( VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), - .OUT_REG (1) + .OUT_REG (1), + .RDW_MODE ("R") ) data_store ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index ee1316271..bdf41eb50 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -20,7 +20,7 @@ module VX_sp_ram #( parameter WRENW = 1, parameter OUT_REG = 0, parameter LUTRAM = 0, - parameter `STRING RDW_MODE = "R", // R: read-first, W: write-first, N: no-change + parameter `STRING RDW_MODE = "W", // W: write-first, R: read-first, N: no-change, U: undefined parameter RDW_ASSERT = 0, parameter RESET_RAM = 0, parameter INIT_ENABLE = 0, @@ -75,14 +75,13 @@ module VX_sp_ram #( end `endif if (OUT_REG) begin : g_sync - wire cs = read || write; if (FORCE_BRAM) begin : g_bram if (RDW_MODE == "R") begin : g_read_first `USE_BLOCK_BRAM `RAM_ARRAY `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end @@ -96,7 +95,7 @@ module VX_sp_ram #( if (WRENW > 1) begin : g_wren reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end @@ -108,7 +107,7 @@ module VX_sp_ram #( `UNUSED_VAR (wren) reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin ram[addr] <= wdata; rdata_r <= wdata; @@ -124,7 +123,7 @@ module VX_sp_ram #( `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end else begin @@ -133,6 +132,19 @@ module VX_sp_ram #( end end assign rdata = rdata_r; + end else if (RDW_MODE == "U") begin : g_unknown + `USE_BLOCK_BRAM `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + if (read) begin + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; end end else begin : g_auto if (RDW_MODE == "R") begin : g_read_first @@ -140,7 +152,7 @@ module VX_sp_ram #( `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end @@ -154,7 +166,7 @@ module VX_sp_ram #( if (WRENW > 1) begin : g_wren reg [ADDRW-1:0] addr_reg; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end @@ -166,7 +178,7 @@ module VX_sp_ram #( `UNUSED_VAR (wren) reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin ram[addr] <= wdata; rdata_r <= wdata; @@ -182,7 +194,7 @@ module VX_sp_ram #( `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin - if (cs) begin + if (read || write) begin if (write) begin `RAM_WRITE end else begin @@ -191,6 +203,19 @@ module VX_sp_ram #( end end assign rdata = rdata_r; + end else if (RDW_MODE == "U") begin : g_unknown + `RAM_ARRAY + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + `RAM_WRITE + end + if (read) begin + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; end end end else begin : g_async @@ -281,6 +306,14 @@ module VX_sp_ram #( end end assign rdata = rdata_r; + end else if (RDW_MODE == "U") begin : g_unknown + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read) begin + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; end end else begin : g_async `UNUSED_VAR (read) diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 557f4a9f7..fd0694fe3 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -166,7 +166,8 @@ module VX_local_mem import VX_gpu_pkg::*; #( .DATAW (WORD_WIDTH), .SIZE (WORDS_PER_BANK), .WRENW (WORD_SIZE), - .OUT_REG (1) + .OUT_REG (1), + .RDW_MODE ("R") ) lmem_store ( .clk (clk), .reset (reset), From 98b58606e5120299d06513eb0c0c8324be6b624a Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 24 Oct 2024 02:18:00 -0700 Subject: [PATCH 408/488] merge fixes --- docs/fpga_setup.md | 74 ----------------------------------- miscs/patches/ramulator.patch | 46 ---------------------- sim/rtlsim/Makefile | 7 ---- tests/regression/common.mk | 3 +- 4 files changed, 2 insertions(+), 128 deletions(-) delete mode 100644 docs/fpga_setup.md delete mode 100644 miscs/patches/ramulator.patch diff --git a/docs/fpga_setup.md b/docs/fpga_setup.md deleted file mode 100644 index 80d71e45f..000000000 --- a/docs/fpga_setup.md +++ /dev/null @@ -1,74 +0,0 @@ -# FPGA Startup and Configuration Guide - -OPAE Environment Setup ----------------------- - - $ source /opt/inteldevstack/init_env_user.sh - $ export OPAE_HOME=/opt/opae/1.1.2 - $ export PATH=$OPAE_HOME/bin:$PATH - $ export C_INCLUDE_PATH=$OPAE_HOME/include:$C_INCLUDE_PATH - $ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH - $ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH - -OPAE Build ------------------- - -The FPGA has to following configuration options: -- DEVICE_FAMILY=arria10 | stratix10 -- NUM_CORES=#n - -Command line: - - $ cd hw/syn/altera/opae - $ PREFIX=test1 TARGET=fpga NUM_CORES=4 make - -A new folder (ex: `test1_xxx_4c`) will be created and the build will start and take ~30-480 min to complete. -Setting TARGET=ase will build the project for simulation using Intel ASE. - - -OPAE Build Configuration ------------------------- - -The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware parameters that can be modified when build the processor.For example, have the following parameters that can be configured: -- `NUM_WARPS`: Number of warps per cores -- `NUM_THREADS`: Number of threads per warps -- `PERF_ENABLE`: enable the use of all profile counters - -You configure the syntesis build from the command line: - - $ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make - -OPAE Build Progress -------------------- - -You could check the last 10 lines in the build log for possible errors until build completion. - - $ tail -n 10 /build.log - -Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs. - - $ ps -u - -If the build fails and you need to restart it, clean up the build folder using the following command: - - $ make clean - -The file `vortex_afu.gbs` should exist when the build is done: - - $ ls -lsa /synth/vortex_afu.gbs - - -Signing the bitstream and Programming the FPGA ----------------------------------------------- - - $ cd - $ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs - $ fpgasupdate vortex_afu_unsigned_ssl.gbs - -FPGA sample test running OpenCL sgemm kernel --------------------------------------------- - -Run the following from the Vortex root directory - - $ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128" - diff --git a/miscs/patches/ramulator.patch b/miscs/patches/ramulator.patch deleted file mode 100644 index e24b5d230..000000000 --- a/miscs/patches/ramulator.patch +++ /dev/null @@ -1,46 +0,0 @@ -diff --git a/Makefile b/Makefile -index ea340c8..d2aac5b 100644 ---- a/Makefile -+++ b/Makefile -@@ -7,16 +7,16 @@ OBJS := $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.o, $(SRCS)) - - # Ramulator currently supports g++ 5.1+ or clang++ 3.4+. It will NOT work with - # g++ 4.x due to an internal compiler error when processing lambda functions. --CXX := clang++ -+#CXX := clang++ - # CXX := g++-5 --CXXFLAGS := -O3 -std=c++11 -g -Wall -+CXXFLAGS := -std=c++11 -O3 -g -Wall -fPIC - - .PHONY: all clean depend - - all: depend ramulator - - clean: -- rm -f ramulator -+ rm -f ramulator libramulator.a - rm -rf $(OBJDIR) - - depend: $(OBJDIR)/.depend -@@ -36,7 +36,7 @@ ramulator: $(MAIN) $(OBJS) $(SRCDIR)/*.h | depend - $(CXX) $(CXXFLAGS) -DRAMULATOR -o $@ $(MAIN) $(OBJS) - - libramulator.a: $(OBJS) $(OBJDIR)/Gem5Wrapper.o -- libtool -static -o $@ $(OBJS) $(OBJDIR)/Gem5Wrapper.o -+ $(AR) rcs $@ $^ - - $(OBJS): | $(OBJDIR) - -diff --git a/src/Request.h b/src/Request.h -index 57abd0d..a5ce061 100644 ---- a/src/Request.h -+++ b/src/Request.h -@@ -36,7 +36,7 @@ public: - - Request(long addr, Type type, int coreid = 0) - : is_first_command(true), addr(addr), coreid(coreid), type(type), -- callback([](Request& req){}) {} -+ callback([](Request&){}) {} - - Request(long addr, Type type, function callback, int coreid = 0) - : is_first_command(true), addr(addr), coreid(coreid), type(type), callback(callback) {} diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 50b9c5c1f..ecaee717b 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -39,13 +39,6 @@ SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $ SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp SRCS += $(SRC_DIR)/processor.cpp -ifdef AXI_BUS - TOP = Vortex_axi - CXXFLAGS += -DAXI_BUS -else - TOP = Vortex -endif - VL_FLAGS = --exe VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO diff --git a/tests/regression/common.mk b/tests/regression/common.mk index 2cba5ef9a..94fe840df 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -8,11 +8,12 @@ XRT_DEVICE_INDEX ?= 0 VORTEX_RT_PATH ?= $(ROOT_DIR)/runtime VORTEX_KN_PATH ?= $(ROOT_DIR)/kernel -STARTUP_ADDR ?= 0x80000000 ifeq ($(XLEN),64) VX_CFLAGS += -march=rv64imafd -mabi=lp64d +STARTUP_ADDR ?= 0x180000000 else VX_CFLAGS += -march=rv32imaf -mabi=ilp32f +STARTUP_ADDR ?= 0x80000000 endif LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT) From eecff10deac07ebb00c68805b6fe32ea3c5c4d12 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 24 Oct 2024 02:51:08 -0700 Subject: [PATCH 409/488] minor update --- hw/rtl/libs/VX_index_buffer.sv | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hw/rtl/libs/VX_index_buffer.sv b/hw/rtl/libs/VX_index_buffer.sv index 4a1e05845..96caec50e 100644 --- a/hw/rtl/libs/VX_index_buffer.sv +++ b/hw/rtl/libs/VX_index_buffer.sv @@ -17,6 +17,7 @@ module VX_index_buffer #( parameter DATAW = 1, parameter SIZE = 1, + parameter LUTRAM = 1, parameter ADDRW = `LOG2UP(SIZE) ) ( input wire clk, @@ -50,7 +51,8 @@ module VX_index_buffer #( VX_dp_ram #( .DATAW (DATAW), .SIZE (SIZE), - .RDW_MODE ("R") + .LUTRAM (LUTRAM), + .RDW_MODE ("W") ) data_table ( .clk (clk), .reset (reset), @@ -63,5 +65,7 @@ module VX_index_buffer #( .rdata (read_data) ); + `RUNTIME_ASSERT (~(acquire_en && write_addr == read_addr), ("%t: oops!", $time)) + endmodule `TRACING_ON From ce510d78c7991fd28f3d7be99a4a34d0d6a4ab7c Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 24 Oct 2024 05:02:46 -0700 Subject: [PATCH 410/488] minor update --- hw/rtl/libs/VX_index_buffer.sv | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hw/rtl/libs/VX_index_buffer.sv b/hw/rtl/libs/VX_index_buffer.sv index 96caec50e..8d0320c5d 100644 --- a/hw/rtl/libs/VX_index_buffer.sv +++ b/hw/rtl/libs/VX_index_buffer.sv @@ -17,7 +17,7 @@ module VX_index_buffer #( parameter DATAW = 1, parameter SIZE = 1, - parameter LUTRAM = 1, + parameter LUTRAM = 0, parameter ADDRW = `LOG2UP(SIZE) ) ( input wire clk, @@ -65,7 +65,5 @@ module VX_index_buffer #( .rdata (read_data) ); - `RUNTIME_ASSERT (~(acquire_en && write_addr == read_addr), ("%t: oops!", $time)) - endmodule `TRACING_ON From d475e9d201d3cb15d6c752a03acd2d362ceaf678 Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Fri, 25 Oct 2024 12:59:24 -0400 Subject: [PATCH 411/488] remove duplicate block --- docs/fpga_setup.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/fpga_setup.md b/docs/fpga_setup.md index e7ab0ecbb..3fb02cfe5 100644 --- a/docs/fpga_setup.md +++ b/docs/fpga_setup.md @@ -70,7 +70,7 @@ $ source /tools/reconfig/xilinx/Vitis/2023.1/settings64.sh The directory `hw/syn/xilinx/xrt` contains the makefile used to synthesize Vortex. ``` $ cd hw/syn/xilinx/xrt - $ PREFIX=test1 PLATFORM=xilinx_u250_gen3x16_xdma_4_1_202210_1 TARGET=hw NUM_CORES=1 make build_u250_hw_1c.log 2>&1 & + $ PREFIX=test1 PLATFORM=xilinx_u250_gen3x16_xdma_4_1_202210_1 TARGET=hw NUM_CORES=1 make > build_u250_hw_1c.log 2>&1 & ``` Will run the synthesis under new build directory: BUILD_DIR := "\\_\\_\" The generated bitstream will be located under /bin/vortex_afu.xclbin From e73e1c2bb3de9d2dd7e77f213cf0ca6c6108accc Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Fri, 1 Nov 2024 13:56:01 -0400 Subject: [PATCH 412/488] update xilinx fpga steps with environment variable steps --- docs/fpga_setup.md | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/docs/fpga_setup.md b/docs/fpga_setup.md index 3fb02cfe5..d909d8687 100644 --- a/docs/fpga_setup.md +++ b/docs/fpga_setup.md @@ -50,7 +50,7 @@ Once you’ve connected to the CRNCH login node, you can use the Slurm scheduler To request 16 cores and 64GB of RAM for 6 hours on flubber9, a fpga dev node: ```bash -salloc -p rg-fpga --nodes=1 --ntasks-per-node=16 --mem=64G --nodelist flubber9 --time=06:00:00 +salloc -p rg-fpga --nodes=1 --ntasks-per-node=16 --mem=64G --nodelist flubber1 --time=06:00:00 ``` Synthesis for Xilinx Boards ---------------------- @@ -58,19 +58,42 @@ Once you are logged in, you will need to complete some first time configurations ### Source Configuration Scripts ``` +# From any directory $ source /opt/xilinx/xrt/setup.sh $ source /tools/reconfig/xilinx/Vitis/2023.1/settings64.sh ``` ### Check Installed FPGA Platforms -`platforminfo -l` which tells us the correct name of the platform installed on the current fpga node. It should be used for the `PLATFORM` variable below. +`platforminfo -l` which tells us the correct name of the platform installed on the current fpga node. It should be used for the `PLATFORM` variable below. Otherwise, if there is an error then there was an issue with the previous two commands. +### Install Vortex Toolchain +The Xilinx synthesis process requires verilator to generate the bitstream. Eventually, you will need the whole toolchain to run the bitstream on the FPGA. Therefore, the Vortex toolchain and can be installed as follows. If you complete these steps properly, you should only need to complete them once and you can skip to `Activate Vortex Toolchain` +``` +# Make a build directory from root and configure scripts for your environment +mkdir build && cd build && ../configure --tooldir=$HOME/tools + +# Install the whole prebuilt toolchain +./ci/toolchain_install.sh --all + +# Add environment variables to bashrc +echo "source /vortex/build/ci/toolchain_env.sh" >> ~/.bashrc +``` + +### Activate Vortex Toolchain +``` +# From any directory +source ~/.bashrc + +# Check environment setup +verilator --version +``` + +### Build the FPGA Bitstream +The root directory contains the path `hw/syn/xilinx/xrt` which has the makefile used to generate the Vortex bitstream. -### Build FPGA image -The directory `hw/syn/xilinx/xrt` contains the makefile used to synthesize Vortex. ``` $ cd hw/syn/xilinx/xrt - $ PREFIX=test1 PLATFORM=xilinx_u250_gen3x16_xdma_4_1_202210_1 TARGET=hw NUM_CORES=1 make > build_u250_hw_1c.log 2>&1 & + $ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=1 make > build_u250_hw_1c.log 2>&1 & ``` Will run the synthesis under new build directory: BUILD_DIR := "\\_\\_\" The generated bitstream will be located under /bin/vortex_afu.xclbin From 667fa1662d316bd3d6a1d618a2883ffe16d34028 Mon Sep 17 00:00:00 2001 From: Udit Subramanya Date: Fri, 1 Nov 2024 14:46:38 -0400 Subject: [PATCH 413/488] update docker for micro apptainer --- ci/install_dependencies.sh | 2 +- miscs/docker/Dockerfile.prod | 33 ++++++++++++--------------------- miscs/docker/README.md | 29 ++++++++++++++++++++++------- 3 files changed, 35 insertions(+), 29 deletions(-) diff --git a/ci/install_dependencies.sh b/ci/install_dependencies.sh index a62ed253b..4dab27786 100755 --- a/ci/install_dependencies.sh +++ b/ci/install_dependencies.sh @@ -31,7 +31,7 @@ check_gcc_version() { apt-get update -y # install system dependencies -apt-get install -y build-essential valgrind libstdc++6 binutils python3 uuid-dev ccache +apt-get install -y build-essential valgrind libstdc++6 binutils python3 uuid-dev ccache cmake # Check and install GCC 11 if necessary if check_gcc_version; then diff --git a/miscs/docker/Dockerfile.prod b/miscs/docker/Dockerfile.prod index e1a8d94b5..20c9c033b 100644 --- a/miscs/docker/Dockerfile.prod +++ b/miscs/docker/Dockerfile.prod @@ -18,41 +18,32 @@ FROM ubuntu:20.04 ARG DEBIAN_FRONTEND=noninteractive # Install necessary dependencies and upgrade installed components -RUN apt-get update -y && \ - apt-get install -y \ +# Update and install necessary dependencies +RUN apt-get update && apt-get install -y \ software-properties-common \ build-essential \ python3 \ git \ wget \ curl \ - ca-certificates \ - valgrind \ - libstdc++6 \ - binutils \ - uuid-dev \ - ccache \ - cmake && \ - apt-get upgrade -y && \ - gcc_version=$(gcc -dumpversion) && \ - if dpkg --compare-versions "$gcc_version" lt 11; then \ - echo "GCC version is less than 11. Installing GCC 11..." && \ - add-apt-repository -y ppa:ubuntu-toolchain-r/test && \ - apt-get update -y && \ - apt-get install -y g++-11 gcc-11 && \ - update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100 && \ - update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100; \ - else \ - echo "GCC version is 11 or greater. No need to install GCC 11."; \ - fi && \ + ca-certificates && \ rm -rf /var/lib/apt/lists/* +# upgrade installed components +RUN apt-get upgrade && apt-get update + +# temporary until remote dependency script gets updated +RUN apt-get install -y cmake + # Clone the Vortex repository RUN git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git /vortex # Set the initial working directory WORKDIR /vortex +# install system dependencies +RUN ./ci/install_dependencies.sh + # Configure the build folder RUN mkdir build && cd build && ../configure diff --git a/miscs/docker/README.md b/miscs/docker/README.md index 897f8f9fb..c077102da 100644 --- a/miscs/docker/README.md +++ b/miscs/docker/README.md @@ -4,17 +4,32 @@ You can install Docker desktop on MAC or PC or Ubuntu. - MAC: https://docs.docker.com/desktop/install/mac-install - Ubuntu: https://docs.docker.com/desktop/install/ubuntu -### 1- Create a Docker image from the Dockerfile - $ docker build -f Dockerfile.ubuntu -t vortex +### 1- Build a Docker Image from the Dockerfile + $ docker build --platform=linux/amd64 -t vortex-packaged -f Dockerfile.prod . -### 2- Build the Docker image - $ docker docker run -it vortex /bin/bash +### 2- Construct and run a Container from the Docker Image + $ docker run -it --name vortex --privileged=true --platform=linux/amd64 vortex-packaged -### 3- Build the project +### 3- Build the Project One you login the Docker terminal, you will be in the build directory. $ make -s -### 4- Run a simple test +### 4- Run a Simple Test +See `docs/` to learn more! - $ ./ci/blackbox.sh --cores=2 --app=vecadd \ No newline at end of file + $ ./ci/blackbox.sh --cores=2 --app=vecadd + +### 5- Exit the Container + + $ exit + $ docker stop vortex + +### 6- Restart and Re-Enter the Container +If you ran step `2` and then step `5` then, you have to start and re-enter the container + + $ docker start vortex + $ docker exec -it vortex + +--- +Note: Apple Silicon macs will run the container in emulation mode, so compiling and running will take a considerable amount of time -- but it still works! \ No newline at end of file From bffc6d9610fe2e4bf1445d89462bae94c41cbd5e Mon Sep 17 00:00:00 2001 From: tinebp Date: Wed, 13 Nov 2024 16:20:25 -0800 Subject: [PATCH 414/488] enabling Vivado's asynchronous bram suppot via direct netlist transformation --- configure | 8 +- hw/rtl/VX_platform.vh | 6 +- hw/rtl/cache/VX_cache_top.sv | 12 +- hw/rtl/libs/VX_async_ram_patch.sv | 158 ++++++++ hw/rtl/libs/VX_dp_ram.sv | 377 ++++++++++++------ hw/rtl/libs/VX_fifo_queue.sv | 4 +- hw/rtl/libs/VX_placeholder.sv | 27 ++ hw/rtl/libs/VX_sp_ram.sv | 435 +++++++++++++------- hw/scripts/xilinx_async_bram_patch.tcl | 525 +++++++++++++++++++++++++ hw/scripts/xilinx_export_netlist.tcl | 71 ++++ hw/syn/xilinx/dut/common.mk | 4 +- hw/syn/xilinx/dut/project.tcl | 46 ++- hw/syn/xilinx/dut/unittest/Makefile | 2 +- hw/syn/xilinx/sandbox/project.tcl.in | 6 +- hw/syn/xilinx/xrt/Makefile | 3 + 15 files changed, 1401 insertions(+), 283 deletions(-) create mode 100644 hw/rtl/libs/VX_async_ram_patch.sv create mode 100644 hw/rtl/libs/VX_placeholder.sv create mode 100644 hw/scripts/xilinx_async_bram_patch.tcl create mode 100644 hw/scripts/xilinx_export_netlist.tcl diff --git a/configure b/configure index d2483a796..fbcd3f130 100755 --- a/configure +++ b/configure @@ -65,7 +65,7 @@ copy_files() { filename_no_ext="${filename%.in}" dest_file="$dest_dir/$filename_no_ext" mkdir -p "$dest_dir" - sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@CURRENTDIR@|$CURRENT_DIR|g" "$file" > "$dest_file" + sed "s|@VORTEX_HOME@|$SOURCE_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@BUILDDIR@|$CURRENT_DIR|g" "$file" > "$dest_file" # apply permissions to bash scripts read -r firstline < "$dest_file" if [[ "$firstline" =~ ^#!.*bash ]]; then @@ -169,8 +169,8 @@ fi SUBDIRS=("." "!ci" "!perf" "hw*" "kernel*" "runtime*" "sim*" "tests*") # Get the directory of the script -SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +SOURCE_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -THIRD_PARTY_DIR=$SCRIPT_DIR/third_party +THIRD_PARTY_DIR=$SOURCE_DIR/third_party -copy_files "$SCRIPT_DIR" "$CURRENT_DIR" +copy_files "$SOURCE_DIR" "$CURRENT_DIR" diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 6e0b755e2..d874b9b2b 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -160,32 +160,32 @@ endgenerate `ifdef QUARTUS `define MAX_FANOUT 8 `define MAX_LUTRAM 1024 -`define IF_DATA_SIZE(x) $bits(x.data) `define USE_BLOCK_BRAM (* ramstyle = "block" *) `define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *) `define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *) `define DISABLE_BRAM (* ramstyle = "logic" *) `define PRESERVE_NET (* preserve *) +`define BLACKBOX_CELL (* black_box *) `define STRING string `elsif VIVADO `define MAX_FANOUT 8 `define MAX_LUTRAM 1024 -`define IF_DATA_SIZE(x) $bits(x.data) `define USE_BLOCK_BRAM (* ram_style = "block" *) `define USE_FAST_BRAM (* ram_style = "distributed" *) `define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *) `define DISABLE_BRAM (* ram_style = "registers" *) `define PRESERVE_NET (* keep = "true" *) +`define BLACKBOX_CELL (* black_box *) `define STRING `else `define MAX_FANOUT 8 `define MAX_LUTRAM 1024 -`define IF_DATA_SIZE(x) x.DATA_WIDTH `define USE_BLOCK_BRAM `define USE_FAST_BRAM `define NO_RW_RAM_CHECK `define DISABLE_BRAM `define PRESERVE_NET +`define BLACKBOX_CELL `define STRING string `endif diff --git a/hw/rtl/cache/VX_cache_top.sv b/hw/rtl/cache/VX_cache_top.sv index 6dad5b6a8..45664af2b 100644 --- a/hw/rtl/cache/VX_cache_top.sv +++ b/hw/rtl/cache/VX_cache_top.sv @@ -31,28 +31,28 @@ module VX_cache_top import VX_gpu_pkg::*; #( parameter WORD_SIZE = 16, // Core Response Queue Size - parameter CRSQ_SIZE = 4, + parameter CRSQ_SIZE = 8, // Miss Reserv Queue Knob parameter MSHR_SIZE = 16, // Memory Response Queue Size - parameter MRSQ_SIZE = 4, + parameter MRSQ_SIZE = 8, // Memory Request Queue Size - parameter MREQ_SIZE = 4, + parameter MREQ_SIZE = 8, // Enable cache writeable parameter WRITE_ENABLE = 1, // Enable cache writeback - parameter WRITEBACK = 0, + parameter WRITEBACK = 1, // Enable dirty bytes on writeback - parameter DIRTY_BYTES = 0, + parameter DIRTY_BYTES = 1, // Request debug identifier parameter UUID_WIDTH = 0, // core request tag size - parameter TAG_WIDTH = 16, + parameter TAG_WIDTH = 32, // Core response output buffer parameter CORE_OUT_BUF = 3, diff --git a/hw/rtl/libs/VX_async_ram_patch.sv b/hw/rtl/libs/VX_async_ram_patch.sv new file mode 100644 index 000000000..fd29e881d --- /dev/null +++ b/hw/rtl/libs/VX_async_ram_patch.sv @@ -0,0 +1,158 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +`define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end + +`define RAM_INITIALIZATION \ + if (INIT_ENABLE != 0) begin : g_init \ + if (INIT_FILE != "") begin : g_file \ + initial $readmemh(INIT_FILE, ram); \ + end else begin : g_value \ + initial begin \ + for (integer i = 0; i < SIZE; ++i) begin : g_i \ + ram[i] = INIT_VALUE; \ + end \ + end \ + end \ + end + +`define RAM_BYPASS(__d) \ + reg [DATAW-1:0] bypass_data_r; \ + reg bypass_valid_r; \ + always @(posedge clk) begin \ + bypass_valid_r <= read_s && write && (raddr_s == waddr); \ + bypass_data_r <= wdata; \ + end \ + assign __d = bypass_valid_r ? bypass_data_r : rdata_r + +`TRACING_OFF +module VX_async_ram_patch #( + parameter DATAW = 1, + parameter SIZE = 1, + parameter WRENW = 1, + parameter DUAL_PORT = 0, + parameter INIT_ENABLE = 0, + parameter INIT_FILE = "", + parameter [DATAW-1:0] INIT_VALUE = 0, + parameter ADDRW = `LOG2UP(SIZE) +) ( + input wire clk, + input wire reset, + input wire read, + input wire write, + input wire [WRENW-1:0] wren, + input wire [ADDRW-1:0] waddr, + input wire [DATAW-1:0] wdata, + input wire [ADDRW-1:0] raddr, + output wire [DATAW-1:0] rdata +); + localparam WSELW = DATAW / WRENW; + + `UNUSED_VAR (reset) + + (* keep = "true" *) wire [ADDRW-1:0] raddr_w, raddr_s; + (* keep = "true" *) wire read_s, is_raddr_reg; + + assign raddr_w = raddr; + + VX_placeholder #( + .I (ADDRW), + .O (ADDRW + 1 + 1) + ) placeholder ( + .in (raddr_w), + .out ({raddr_s, read_s, is_raddr_reg}) + ); + + // synchroneous ram + + wire [DATAW-1:0] rdata_s; + + if (WRENW != 1) begin : g_wren_sync_ram + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + reg [DATAW-1:0] rdata_r; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (read_s || write) begin + if (write) begin + `RAM_WRITE_WREN + end + rdata_r <= ram[raddr_s]; + end + end + `RAM_BYPASS(rdata_s); + end else begin : g_no_wren_sync_ram + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + reg [DATAW-1:0] rdata_r; + `RAM_INITIALIZATION + `UNUSED_VAR (wren) + always @(posedge clk) begin + if (read_s || write) begin + if (write) begin + ram[waddr] <= wdata; + end + rdata_r <= ram[raddr_s]; + end + end + `RAM_BYPASS(rdata_s); + end + + // asynchronous ram (fallback) + + wire [DATAW-1:0] rdata_a; + + if (DUAL_PORT != 0) begin : g_dp_async_ram + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + if (WRENW != 1) begin : g_wren + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end + end + end else begin : g_no_wren + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + end + end + assign rdata_a = ram[raddr]; + end else begin : g_sp_async_ram + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + if (WRENW != 1) begin : g_wren + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end + end + end else begin : g_no_wren + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + end + end + assign rdata_a = ram[waddr]; + end + + assign rdata = is_raddr_reg ? rdata_s : rdata_a; + +endmodule +`TRACING_ON diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 0ce68ea1a..0cff67882 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -13,6 +13,35 @@ `include "VX_platform.vh" +`define RAM_INITIALIZATION \ + if (INIT_ENABLE != 0) begin : g_init \ + if (INIT_FILE != "") begin : g_file \ + initial $readmemh(INIT_FILE, ram); \ + end else begin : g_value \ + initial begin \ + for (integer i = 0; i < SIZE; ++i) begin : g_i \ + ram[i] = INIT_VALUE; \ + end \ + end \ + end \ + end + +`ifdef QUARTUS + `define RAM_ARRAY_WREN reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; \ + end \ + end +`else + `define RAM_ARRAY_WREN reg [DATAW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end +`endif + `TRACING_OFF module VX_dp_ram #( parameter DATAW = 1, @@ -45,163 +74,289 @@ module VX_dp_ram #( `STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W" || RDW_MODE == "U"), ("invalid parameter")) `UNUSED_PARAM (RDW_ASSERT) -`define RAM_INITIALIZATION \ - if (INIT_ENABLE != 0) begin : g_init \ - if (INIT_FILE != "") begin : g_file \ - initial $readmemh(INIT_FILE, ram); \ - end else begin : g_value \ - initial begin \ - for (integer i = 0; i < SIZE; ++i) begin : g_i \ - ram[i] = INIT_VALUE; \ - end \ - end \ - end \ - end - `ifdef SYNTHESIS localparam FORCE_BRAM = !LUTRAM && (SIZE * DATAW >= `MAX_LUTRAM); -`ifdef QUARTUS - `define RAM_ARRAY reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; - `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ - if (wren[i]) begin \ - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; \ - end \ - end -`else - `define RAM_ARRAY reg [DATAW-1:0] ram [0:SIZE-1]; - `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ - if (wren[i]) begin \ - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ - end \ - end -`endif if (OUT_REG) begin : g_sync if (FORCE_BRAM) begin : g_bram if (RDW_MODE == "W") begin : g_write_first - (* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM `RAM_ARRAY - `UNUSED_VAR (wren) - `RAM_INITIALIZATION - reg [ADDRW-1:0] addr_reg; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + (* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [ADDRW-1:0] raddr_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end + raddr_r <= raddr; end - addr_reg <= raddr; end + assign rdata = ram[raddr_r]; + end else begin : g_no_wren + (* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [ADDRW-1:0] raddr_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[waddr] <= wdata; + end + raddr_r <= raddr; + end + end + assign rdata = ram[raddr_r]; end - assign rdata = ram[addr_reg]; end else if (RDW_MODE == "R") begin : g_read_first - `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end + rdata_r <= ram[raddr]; end - rdata_r <= ram[raddr]; end + assign rdata = rdata_r; + end else begin : g_no_wren + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[waddr] <= wdata; + end + rdata_r <= ram[raddr]; + end + end + assign rdata = rdata_r; end - assign rdata = rdata_r; end else begin : g_undefined - `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end + if (read) begin + rdata_r <= ram[raddr]; + end end - if (read) begin - rdata_r <= ram[raddr]; + assign rdata = rdata_r; + end else begin : g_no_wren + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + if (read) begin + rdata_r <= ram[raddr]; + end end + assign rdata = rdata_r; end - assign rdata = rdata_r; end end else begin : g_auto if (RDW_MODE == "W") begin : g_write_first - (* rw_addr_collision = "yes" *) `RAM_ARRAY - `UNUSED_VAR (wren) - `RAM_INITIALIZATION - reg [ADDRW-1:0] addr_reg; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + (* rw_addr_collision = "yes" *) `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [ADDRW-1:0] raddr_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end + raddr_r <= raddr; end - addr_reg <= raddr; end + assign rdata = ram[raddr_r]; + end else begin : g_no_wren + (* rw_addr_collision = "yes" *) reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [ADDRW-1:0] raddr_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[waddr] <= wdata; + end + raddr_r <= raddr; + end + end + assign rdata = ram[raddr_r]; end - assign rdata = ram[addr_reg]; end else if (RDW_MODE == "R") begin : g_read_first - `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end + rdata_r <= ram[raddr]; end - rdata_r <= ram[raddr]; end + assign rdata = rdata_r; + end else begin : g_no_wren + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[waddr] <= wdata; + end + rdata_r <= ram[raddr]; + end + end + assign rdata = rdata_r; end - assign rdata = rdata_r; end else begin - `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end + if (read) begin + rdata_r <= ram[raddr]; + end end - if (read) begin - rdata_r <= ram[raddr]; + assign rdata = rdata_r; + end else begin : g_no_wren + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + if (read) begin + rdata_r <= ram[raddr]; + end end + assign rdata = rdata_r; end - assign rdata = rdata_r; end end end else begin : g_async `UNUSED_VAR (read) if (FORCE_BRAM) begin : g_bram if (RDW_MODE == "W") begin : g_write_first - `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + `ifdef VIVADO + VX_async_ram_patch #( + .DATAW (DATAW), + .SIZE (SIZE), + .WRENW (WRENW), + .DUAL_PORT (1), + .INIT_ENABLE(INIT_ENABLE), + .INIT_FILE (INIT_FILE), + .INIT_VALUE (INIT_VALUE) + ) async_ram_patch ( + .clk (clk), + .reset (reset), + .read (read), + .write (write), + .wren (wren), + .waddr (waddr), + .wdata (wdata), + .raddr (raddr), + .rdata (rdata) + ); + `else + if (WRENW != 1) begin : g_wren + `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end end + assign rdata = ram[raddr]; + end else begin : g_no_wren + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + end + assign rdata = ram[raddr]; end - assign rdata = ram[raddr]; + `endif end else begin : g_read_first - `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end end + assign rdata = ram[raddr]; + end else begin : g_no_wren + `NO_RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + end + assign rdata = ram[raddr]; end - assign rdata = ram[raddr]; end end else begin : g_auto if (RDW_MODE == "W") begin : g_write_first - `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `RAM_ARRAY_WREN + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end end + assign rdata = ram[raddr]; + end else begin : g_no_wren + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + end + assign rdata = ram[raddr]; end - assign rdata = ram[raddr]; end else begin : g_read_first - `NO_RW_RAM_CHECK `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `NO_RW_RAM_CHECK `RAM_ARRAY_WREN + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end end + assign rdata = ram[raddr]; + end else begin : g_no_wren + `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + end + assign rdata = ram[raddr]; end - assign rdata = ram[raddr]; end end end @@ -226,13 +381,13 @@ module VX_dp_ram #( if (OUT_REG) begin : g_sync if (RDW_MODE == "W") begin : g_write_first - reg [ADDRW-1:0] addr_reg; + reg [ADDRW-1:0] raddr_r; always @(posedge clk) begin if (read || write) begin - addr_reg <= raddr; + raddr_r <= raddr; end end - assign rdata = ram[addr_reg]; + assign rdata = ram[raddr_r]; end else if (RDW_MODE == "R") begin : g_read_first reg [DATAW-1:0] rdata_r; always @(posedge clk) begin diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index d53903bfd..720a1a2c6 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -15,8 +15,8 @@ `TRACING_OFF module VX_fifo_queue #( - parameter DATAW = 1, - parameter DEPTH = 1, + parameter DATAW = 32, + parameter DEPTH = 32, parameter ALM_FULL = (DEPTH - 1), parameter ALM_EMPTY = 1, parameter OUT_REG = 0, diff --git a/hw/rtl/libs/VX_placeholder.sv b/hw/rtl/libs/VX_placeholder.sv new file mode 100644 index 000000000..738da615b --- /dev/null +++ b/hw/rtl/libs/VX_placeholder.sv @@ -0,0 +1,27 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +`TRACING_OFF +`BLACKBOX_CELL module VX_placeholder #( + parameter I = 0, + parameter O = 0 +) ( + input wire [`UP(I)-1:0] in, + output wire [`UP(O)-1:0] out +); + // empty module + +endmodule +`TRACING_ON diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index bdf41eb50..88b922384 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -13,6 +13,35 @@ `include "VX_platform.vh" +`define RAM_INITIALIZATION \ + if (INIT_ENABLE != 0) begin : g_init \ + if (INIT_FILE != "") begin : g_file \ + initial $readmemh(INIT_FILE, ram); \ + end else begin : g_value \ + initial begin \ + for (integer i = 0; i < SIZE; ++i) begin : g_i \ + ram[i] = INIT_VALUE; \ + end \ + end \ + end \ + end + +`ifdef QUARTUS + `define RAM_ARRAY_WREN reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[addr][i] <= wdata[i * WSELW +: WSELW]; \ + end \ + end +`else + `define RAM_ARRAY_WREN reg [DATAW-1:0] ram [0:SIZE-1]; + `define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[addr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end +`endif + `TRACING_OFF module VX_sp_ram #( parameter DATAW = 1, @@ -44,67 +73,55 @@ module VX_sp_ram #( `STATIC_ASSERT((RDW_MODE == "R" || RDW_MODE == "W" || RDW_MODE == "N"), ("invalid parameter")) `UNUSED_PARAM (RDW_ASSERT) -`define RAM_INITIALIZATION \ - if (INIT_ENABLE != 0) begin : g_init \ - if (INIT_FILE != "") begin : g_file \ - initial $readmemh(INIT_FILE, ram); \ - end else begin : g_value \ - initial begin \ - for (integer i = 0; i < SIZE; ++i) begin : g_i \ - ram[i] = INIT_VALUE; \ - end \ - end \ - end \ - end - `ifdef SYNTHESIS localparam FORCE_BRAM = !LUTRAM && (SIZE * DATAW >= `MAX_LUTRAM); -`ifdef QUARTUS - `define RAM_ARRAY reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; - `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ - if (wren[i]) begin \ - ram[addr][i] <= wdata[i * WSELW +: WSELW]; \ - end \ - end -`else - `define RAM_ARRAY reg [DATAW-1:0] ram [0:SIZE-1]; - `define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \ - if (wren[i]) begin \ - ram[addr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ - end \ - end -`endif if (OUT_REG) begin : g_sync if (FORCE_BRAM) begin : g_bram if (RDW_MODE == "R") begin : g_read_first - `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - `RAM_WRITE - end - rdata_r <= ram[addr]; - end - end - assign rdata = rdata_r; - end else if (RDW_MODE == "W") begin : g_write_first - `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - if (WRENW > 1) begin : g_wren - reg [ADDRW-1:0] addr_reg; + if (WRENW != 1) begin : g_wren + `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (read || write) begin if (write) begin - `RAM_WRITE + `RAM_WRITE_WREN end - addr_reg <= addr; + rdata_r <= ram[addr]; end end - assign rdata = ram[addr_reg]; + assign rdata = rdata_r; end else begin : g_no_wren - `UNUSED_VAR (wren) + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[addr] <= wdata; + end + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; + end + end else if (RDW_MODE == "W") begin : g_write_first + if (WRENW != 1) begin : g_wren + `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [ADDRW-1:0] addr_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end + addr_r <= addr; + end + end + assign rdata = ram[addr_r]; + end else begin : g_no_wren + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (read || write) begin @@ -119,63 +136,110 @@ module VX_sp_ram #( assign rdata = rdata_r; end end else if (RDW_MODE == "N") begin : g_no_change - `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (read || write) begin + if (WRENW != 1) begin : g_wren + `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end else begin + rdata_r <= ram[addr]; + end + end + end + assign rdata = rdata_r; + end else begin : g_no_wren + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[addr] <= wdata; + end else begin + rdata_r <= ram[addr]; + end + end + end + assign rdata = rdata_r; + end + end else if (RDW_MODE == "U") begin : g_unknown + if (WRENW != 1) begin : g_wren + `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin if (write) begin - `RAM_WRITE - end else begin + `RAM_WRITE_WREN + end + if (read) begin rdata_r <= ram[addr]; end end - end - assign rdata = rdata_r; - end else if (RDW_MODE == "U") begin : g_unknown - `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (write) begin - `RAM_WRITE - end - if (read) begin - rdata_r <= ram[addr]; + assign rdata = rdata_r; + end else begin : g_no_wren + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + ram[addr] <= wdata; + end + if (read) begin + rdata_r <= ram[addr]; + end end + assign rdata = rdata_r; end - assign rdata = rdata_r; end end else begin : g_auto if (RDW_MODE == "R") begin : g_read_first - `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - `RAM_WRITE - end - rdata_r <= ram[addr]; - end - end - assign rdata = rdata_r; - end else if (RDW_MODE == "W") begin : g_write_first - `RAM_ARRAY - `RAM_INITIALIZATION - if (WRENW > 1) begin : g_wren - reg [ADDRW-1:0] addr_reg; + if (WRENW != 1) begin : g_wren + `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (read || write) begin if (write) begin - `RAM_WRITE + `RAM_WRITE_WREN end - addr_reg <= addr; + rdata_r <= ram[addr]; end end - assign rdata = ram[addr_reg]; + assign rdata = rdata_r; end else begin : g_no_wren - `UNUSED_VAR (wren) + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[addr] <= wdata; + end + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; + end + end else if (RDW_MODE == "W") begin : g_write_first + if (WRENW != 1) begin : g_wren + `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [ADDRW-1:0] addr_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end + addr_r <= addr; + end + end + assign rdata = ram[addr_r]; + end else begin : g_no_wren + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION reg [DATAW-1:0] rdata_r; always @(posedge clk) begin if (read || write) begin @@ -190,75 +254,172 @@ module VX_sp_ram #( assign rdata = rdata_r; end end else if (RDW_MODE == "N") begin : g_no_change - `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (read || write) begin + if (WRENW != 1) begin : g_wren + `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end else begin + rdata_r <= ram[addr]; + end + end + end + assign rdata = rdata_r; + end else begin : g_no_wren + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[addr] <= wdata; + end else begin + rdata_r <= ram[addr]; + end + end + end + assign rdata = rdata_r; + end + end else if (RDW_MODE == "U") begin : g_unknown + if (WRENW != 1) begin : g_wren + `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin if (write) begin - `RAM_WRITE - end else begin + `RAM_WRITE_WREN + end + if (read) begin rdata_r <= ram[addr]; end end - end - assign rdata = rdata_r; - end else if (RDW_MODE == "U") begin : g_unknown - `RAM_ARRAY - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (write) begin - `RAM_WRITE - end - if (read) begin - rdata_r <= ram[addr]; + assign rdata = rdata_r; + end else begin : g_no_wren + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (write) begin + ram[addr] <= wdata; + end + if (read) begin + rdata_r <= ram[addr]; + end end + assign rdata = rdata_r; end - assign rdata = rdata_r; end end end else begin : g_async `UNUSED_VAR (read) if (FORCE_BRAM) begin : g_bram if (RDW_MODE == "W") begin : g_write_first - `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + `ifdef VIVADO + VX_async_ram_patch #( + .DATAW (DATAW), + .SIZE (SIZE), + .WRENW (WRENW), + .DUAL_PORT (0), + .INIT_ENABLE(INIT_ENABLE), + .INIT_FILE (INIT_FILE), + .INIT_VALUE (INIT_VALUE) + ) async_ram_patch ( + .clk (clk), + .reset (reset), + .read (read), + .write (write), + .wren (wren), + .waddr (addr), + .wdata (wdata), + .raddr (addr), + .rdata (rdata) + ); + `else + if (WRENW != 1) begin : g_wren + `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end end + assign rdata = ram[addr]; + end else begin : g_no_wren + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[addr] <= wdata; + end + end + assign rdata = ram[addr]; end - assign rdata = ram[addr]; + `endif end else begin : g_read_first - `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end end + assign rdata = ram[addr]; + end else begin : g_no_wren + `NO_RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[addr] <= wdata; + end + end + assign rdata = ram[addr]; end - assign rdata = ram[addr]; end end else begin : g_auto if (RDW_MODE == "W") begin : g_write_first - `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `RAM_ARRAY_WREN + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end end + assign rdata = ram[addr]; + end else begin : g_no_wren + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[addr] <= wdata; + end + end + assign rdata = ram[addr]; end - assign rdata = ram[addr]; end else begin : g_read_first - `NO_RW_RAM_CHECK `RAM_ARRAY - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - `RAM_WRITE + if (WRENW != 1) begin : g_wren + `NO_RW_RAM_CHECK `RAM_ARRAY_WREN + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + `RAM_WRITE_WREN + end end + assign rdata = ram[addr]; + end else begin : g_no_wren + `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[addr] <= wdata; + end + end + assign rdata = ram[addr]; end - assign rdata = ram[addr]; end end end @@ -291,13 +452,13 @@ module VX_sp_ram #( end assign rdata = rdata_r; end else if (RDW_MODE == "W") begin : g_write_first - reg [ADDRW-1:0] addr_reg; + reg [ADDRW-1:0] addr_r; always @(posedge clk) begin if (read || write) begin - addr_reg <= addr; + addr_r <= addr; end end - assign rdata = ram[addr_reg]; + assign rdata = ram[addr_r]; end else if (RDW_MODE == "N") begin : g_no_change reg [DATAW-1:0] rdata_r; always @(posedge clk) begin diff --git a/hw/scripts/xilinx_async_bram_patch.tcl b/hw/scripts/xilinx_async_bram_patch.tcl new file mode 100644 index 000000000..5af7ba953 --- /dev/null +++ b/hw/scripts/xilinx_async_bram_patch.tcl @@ -0,0 +1,525 @@ +namespace eval vortex { + +variable debug 0 + +proc print_error {msg {do_exit 1}} { + if {$do_exit} { + puts "ERROR: $msg" + exit -1 + } else { + puts "WARNING: $msg" + } +} + +proc str_replace {str match repl} { + set result "" + regsub $match $str $repl result + return $result +} + +proc unique_cell_name {name} { + if {[get_cells -quiet $name] == {}} { return $name } + set index 0 + while {[get_cells -quiet ${name}_${index}] != {}} { incr index } + return ${name}_${index} +} + +proc unique_net_name {name} { + if {[get_nets -quiet $name] == {}} { return $name } + set index 0 + while {[get_nets -quiet ${name}_${index}] != {}} { incr index } + return ${name}_${index} +} + +proc find_nested_cells {parent name_match {should_exist 1}} { + set matching_cells {} + foreach cell [get_cells -hierarchical -include_replicated_objects -filter "PARENT == $parent"] { + set name [get_property NAME $cell] + if {[regexp $name_match $name]} { + lappend matching_cells $cell + } + } + if {[llength $matching_cells] == 0} { + print_error "No matching cell found for '$parent' matching '$name_match'." $should_exist + } + return $matching_cells +} + +proc find_nested_cell {parent name_match} { + foreach cell [get_cells -hierarchical -filter "PARENT == $parent"] { + set name [get_property NAME $cell] + if {$name == $name_match} { + return $cell + } + } + puts "ERROR: No matching cell found for '$parent' matching '$name_match'." + exit -1 +} + +proc find_cell_nets {cell name_match {should_exist 1}} { + set matching_nets {} + foreach net [get_nets -hierarchical -filter "PARENT_CELL == $cell"] { + set name [get_property NAME $net] + if {[regexp $name_match $name]} { + lappend matching_nets $net + } + } + if {[llength $matching_nets] == 0} { + print_error "No matching net found for '$cell' matching '$name_match'." $should_exist + } + return $matching_nets +} + +proc get_cell_net {cell name_match} { + foreach net [get_nets -hierarchical -filter "PARENT_CELL == $cell"] { + set name [get_property NAME $net] + if {$name == $name_match} { + return $net + } + } + puts "ERROR: No matching net found for '$cell' matching '$name_match'." + exit -1 +} + +proc find_cell_pins {cell name_match {should_exist 1}} { + set matching_pins {} + foreach pin [get_pins -of_objects $cell] { + set name [get_property NAME $pin] + if {[regexp $name_match $name]} { + lappend matching_pins $pin + } + } + if {[llength $matching_pins] == 0} { + print_error "No matching pin found for '$cell' matching '$name_match'." $should_exist + } + return $matching_pins +} + +proc get_cell_pin {cell name_match} { + foreach pin [get_pins -of_objects $cell] { + set name [get_property NAME $pin] + if {$name == $name_match} { + return $pin + } + } + puts "ERROR: No matching pin found for '$cell' matching '$name_match'." + exit -1 +} + +proc replace_pin_source {pin source_pin} { + variable debug + + # Disconnect existing net from pin + set net [get_nets -of_objects $pin] + if {[llength $net] == 1} { + disconnect_net -net $net -objects $pin + if {$debug} {puts "DEBUG: Disconnected net '$net' from pin '$pin'."} + } elseif {[llength $net] > 1} { + puts "ERROR: Multiple nets connected to pin '$pin'." + exit -1 + } else { + puts "WARNING: No net connected to pin '$pin'." + } + + set source_net [get_nets -quiet -of_objects $source_pin] + if {[llength $source_net] == 0} { + # Create a new net if none exists + set source_cell [get_cells -of_objects $source_pin] + set net_name [unique_net_name "${source_cell}_net"] + set source_net [create_net $net_name] + if {$debug} {puts "DEBUG: Created source_net: '$source_net'"} + # Connect the source pin to the new net + connect_net -net $source_net -objects $source_pin -hierarchical + if {$debug} {puts "DEBUG: Connected net '$source_net' to pin '$source_pin'."} + } elseif {[llength $source_net] > 1} { + puts "ERROR: Multiple nets connected to pin '$source_pin'." + exit -1 + } + + # Connect pin to the new source net + connect_net -net $source_net -objects $pin -hierarchical + if {$debug} {puts "DEBUG: Connected net '$source_net' to pin '$pin'."} +} + +proc create_register_next {reg_cell prefix_name} { + variable debug + + set reg_d_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/D"}] + if {[llength $reg_d_pin] == 0} { + puts "ERROR: No D pin found on register cell '$reg_cell'." + exit -1 + } elseif {[llength $reg_d_pin] > 1} { + puts "ERROR: Multiple D pins found on register cell '$reg_cell'." + exit -1 + } + + if {$debug} {puts "DEBUG: reg_d_pin: '$reg_d_pin'"} + + set reg_d_src_pin [find_pin_driver $reg_d_pin] + if {$reg_d_src_pin == ""} { + puts "ERROR: No source pin found connected to '$reg_d_pin'." + exit -1 + } + + if {$debug} {puts "DEBUG: reg_d_src_pin: '$reg_d_src_pin'"} + + set reg_r_src_pin "" + + set register_type [get_property REF_NAME $reg_cell] + if {$register_type == "FDRE"} { + set reg_r_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/R"}] + if {[llength $reg_r_pin] == 0} { + puts "ERROR: No R pin found on FDRE cell '$reg_cell'." + exit -1 + } elseif {[llength $reg_r_pin] > 1} { + puts "ERROR: Multiple R pins found on FDRE cell '$reg_cell'." + exit -1 + } + + if {$debug} {puts "DEBUG: reg_r_pin: '$reg_r_pin'"} + + set reg_r_src_pin [find_pin_driver $reg_r_pin] + if {$reg_r_src_pin == ""} { + puts "ERROR: No source pin found connected to '$reg_r_pin'." + exit -1 + } + } elseif {$register_type == "FDSE"} { + set reg_s_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/S"}] + if {[llength $reg_s_pin] == 0} { + puts "ERROR: No S pin found on FDSE cell '$reg_cell'." + exit -1 + } elseif {[llength $reg_s_pin] > 1} { + puts "ERROR: Multiple S pins found on FDSE cell '$reg_cell'." + exit -1 + } + + if {$debug} {puts "DEBUG: reg_s_pin: '$reg_s_pin'"} + + set reg_r_src_pin [find_pin_driver $reg_s_pin] + if {$reg_r_src_pin == ""} { + puts "ERROR: No source pin found connected to '$reg_s_pin'." + exit -1 + } + } else { + puts "ERROR: Unsupported register type: '$register_type'." + exit 1 + } + + if {$debug} {puts "DEBUG: reg_r_src_pin: '$reg_r_src_pin'"} + + set reg_d_src_net [get_nets -of_objects $reg_d_src_pin] + if {[llength $reg_d_src_net] == 0} { + puts "ERROR: Unable to get source nets for pins." + exit -1 + } elseif {[llength $reg_d_src_net] > 1} { + puts "ERROR: Multiple source nets found for pins." + exit -1 + } + + set reg_r_src_net [get_nets -of_objects $reg_r_src_pin] + if {[llength $reg_r_src_net] == 0} { + puts "ERROR: Unable to get source nets for pins." + exit -1 + } elseif {[llength $reg_r_src_net] > 1} { + puts "ERROR: Multiple source nets found for pins." + exit -1 + } + + # Create a MUX cell to implement register next value + # Use a 2x1 LUT to describe the logic: + # FDRE: O = I1 ? 0 : I0; where I0=D, I1=R + # FDSE: O = I1 ? 1 : I0; where I0=D, I1=S + set lut_name [unique_cell_name $prefix_name] + set lut_cell [create_cell -reference LUT2 $lut_name] + puts "INFO: Created lut cell: '$lut_cell'" + + if {$register_type == "FDRE"} { + set_property INIT 4'b0010 $lut_cell + } elseif {$register_type == "FDSE"} { + set_property INIT 4'b1110 $lut_cell + } else { + puts "ERROR: Unsupported register type: '$register_type'." + exit 1 + } + + set lut_i0_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/I0"}] + if {[llength $lut_i0_pin] == 0} { + puts "ERROR: No I0 pin found on FDSE cell '$lut_cell'." + exit -1 + } elseif {[llength $lut_i0_pin] > 1} { + puts "ERROR: Multiple I0 pins found on FDSE cell '$lut_cell'." + exit -1 + } + + set lut_i1_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/I1"}] + if {[llength $lut_i1_pin] == 0} { + puts "ERROR: No I1 pin found on FDSE cell '$lut_cell'." + exit -1 + } elseif {[llength $lut_i1_pin] > 1} { + puts "ERROR: Multiple I1 pins found on FDSE cell '$lut_cell'." + exit -1 + } + + set lut_o_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/O"}] + if {[llength $lut_o_pin] == 0} { + puts "ERROR: No O pin found on FDSE cell '$lut_cell'." + exit -1 + } elseif {[llength $lut_o_pin] > 1} { + puts "ERROR: Multiple O pins found on FDSE cell '$lut_cell'." + exit -1 + } + + connect_net -net $reg_d_src_net -objects $lut_i0_pin -hierarchical + if {$debug} {puts "DEBUG: Connected net '$reg_d_src_net' to pin '$lut_i0_pin'."} + + connect_net -net $reg_r_src_net -objects $lut_i1_pin -hierarchical + if {$debug} {puts "DEBUG: Connected net '$reg_r_src_net' to pin '$lut_i1_pin'."} + + return $lut_o_pin +} + +proc getOrCreateVCCPin {prefix_name} { + variable debug + + set vcc_cell "" + set vcc_cells [get_cells -quiet -filter {REF_NAME == VCC}] + if {[llength $vcc_cells] == 0} { + set cell_name [unique_cell_name $prefix_name] + set vcc_cell [create_cell -reference VCC $cell_name] + puts "INFO: Created VCC cell: '$vcc_cell'" + } else { + set vcc_cell [lindex $vcc_cells 0] + } + set vcc_pin [get_pins -of_objects $vcc_cell -filter {NAME =~ "*/P"}] + if {[llength $vcc_pin] == 0} { + puts "ERROR: No VCC pin found on VCC cell '$vcc_cell'." + exit -1 + } elseif {[llength $vcc_pin] > 1} { + puts "ERROR: Multiple VCC pins found on VCC cell '$vcc_cell'." + exit -1 + } + return $vcc_pin +} + +proc getOrCreateGNDPin {prefix_name} { + variable debug + + set gnd_cell "" + set gnd_cells [get_cells -quiet -filter {REF_NAME == GND}] + if {[llength $gnd_cells] == 0} { + set cell_name [unique_cell_name $prefix_name] + set gnd_cell [create_cell -reference GND $cell_name] + puts "INFO: Created GND cell: '$gnd_cell'" + } else { + set gnd_cell [lindex $gnd_cells 0] + } + set gnd_pin [get_pins -of_objects $gnd_cell -filter {NAME =~ "*/G"}] + if {[llength $gnd_pin] == 0} { + puts "ERROR: No GND pin found on GND cell '$gnd_cell'." + exit -1 + } elseif {[llength $gnd_pin] > 1} { + puts "ERROR: Multiple GND pins found on GND cell '$gnd_cell'." + exit -1 + } + return $gnd_pin +} + +proc find_net_sinks {input_net {should_exist 1}} { + set sink_pins {} + foreach pin [get_pins -quiet -leaf -of_objects $input_net -filter {DIRECTION == "IN"}] { + lappend sink_pins $pin + } + foreach port [get_ports -quiet -of_objects $input_net -filter {DIRECTION == "OUT"}] { + lappend sink_pins $port + } + if {[llength $sink_pins] == 0} { + print_error "No sink found for '$input_net'." $should_exist + } + return $sink_pins +} + +proc find_net_driver {input_net {should_exist 1}} { + set driverPins [get_pins -quiet -leaf -of_objects $input_net -filter {DIRECTION == "OUT"}] + if {[llength $driverPins] == 0} { + set driverPorts [get_ports -quiet -of_objects $input_net -filter {DIRECTION == "IN"}] + if {[llength $driverPorts] == 0} { + print_error "No driver found for '$input_net'." $should_exist + } elseif {[llength $driverPorts] > 1} { + puts "WARNING: Multiple driver ports found for '$input_net'." + return [lindex $driverPorts 0] + } + return $driverPorts + } elseif {[llength $driverPins] > 1} { + puts "WARNING: Multiple driver pins found for '$input_net'." + return [lindex $driverPins 0] + } + return $driverPins +} + +proc find_pin_driver {input_pin {should_exist 1}} { + set net [get_nets -quiet -of_objects $input_pin] + if {[llength $net] == 0} { + print_error "No net connected to pin '$input_pin'." $should_exist + } elseif {[llength $net] > 1} { + puts "ERROR: Multiple nets connected to pin '$input_pin'." + exit -1 + } + return [find_net_driver $net] +} + +proc find_matching_nets {cell nets match repl} { + set matching_nets {} + foreach net $nets { + set net_name [str_replace $net $match $repl] + set matching_net [get_cell_net $cell $net_name] + if {$matching_net != ""} { + lappend matching_nets $matching_net + } + } + if {[llength $matching_nets] == 0} { + puts "ERROR: No matching nets found for '$nets'." + exit -1 + } elseif {[llength $matching_nets] != [llength $nets]} { + puts "ERROR: Mismatch in number of matching nets." + exit -1 + } + return $matching_nets +} + +proc replace_net_source {net source_pin} { + foreach pin [find_net_sinks $net 0] { + replace_pin_source $pin $source_pin + } +} + +proc resolve_async_bram {inst} { + variable debug + + puts "INFO: Resolving asynchronous BRAM patch: '$inst'." + + set raddr_w_nets [find_cell_nets $inst "raddr_w(\\\[\\d+\\\])?$"] + set read_s_net [find_cell_nets $inst "read_s$"] + set is_raddr_reg_net [find_cell_nets $inst "is_raddr_reg$"] + + set raddr_s_nets [find_matching_nets $inst $raddr_w_nets "raddr_w(\\\[\\d+\\\])?$" "raddr_s\\1"] + + set reg_next_pins {} + set reg_ce_src_pin "" + + foreach raddr_w_net $raddr_w_nets { + if {$debug} {puts "DEBUG: Processing raddr_w net: '$raddr_w_net'"} + + # Find raddr_w_net's driver pin + set raddr_src_pin [find_net_driver $raddr_w_net] + if {$debug} {puts "DEBUG: raddr_src_pin: '$raddr_src_pin'"} + + # Get the driver cell + set raddr_src_cell [get_cells -of_objects $raddr_src_pin] + if {[llength $raddr_src_cell] == 0} { + puts "ERROR: No source cell found connected to pin '$raddr_src_pin'." + exit -1 + } elseif {[llength $raddr_src_cell] > 1} { + puts "ERROR: Multiple source cells found connected to pin '$raddr_src_pin'." + exit -1 + } + + # Check driver type + set driver_type [get_property REF_NAME $raddr_src_cell] + if {$driver_type == "FDRE" || $driver_type == "FDSE"} { + if {$debug} {puts "DEBUG: Net '$raddr_w_net' is registered, driver_type='$driver_type'"} + } else { + puts "WARNING: Net '$raddr_w_net' is not be registered, driver_type='$driver_type'" + break + } + + # Create register next cell and return output pin + set reg_next_pin [create_register_next $raddr_src_cell "$inst/raddr_next"] + if {$reg_next_pin == ""} { + puts "ERROR: failed to create register next value for '$raddr_src_cell'." + exit -1 + } + if {$debug} {puts "DEBUG: reg_next_pin: '$reg_next_pin'"} + + lappend reg_next_pins $reg_next_pin + + # Find the CE pin on raddr_src_cell + if {$reg_ce_src_pin == ""} { + set reg_ce_pin [get_pins -of_objects $raddr_src_cell -filter {NAME =~ "*/CE"}] + if {[llength $reg_ce_pin] == 0} { + puts "ERROR: No CE pin found on register cell '$raddr_src_cell'." + exit -1 + } elseif {[llength $reg_ce_pin] > 1} { + puts "ERROR: Multiple CE pins found on register cell '$raddr_src_cell'." + exit -1 + } + if {$debug} {puts "DEBUG: reg_ce_pin: '$reg_ce_pin'"} + + set reg_ce_src_pin [find_pin_driver $reg_ce_pin] + if {$reg_ce_src_pin == ""} { + puts "ERROR: No source pin found connected to '$reg_ce_pin'." + exit -1 + } + if {$debug} {puts "DEBUG: reg_ce_src_pin: '$reg_ce_src_pin'"} + } + } + + # do we have a fully registered read address? + if {[llength $reg_next_pins] == [llength $raddr_w_nets]} { + puts "INFO: Fully registered read address detected." + set addr_width [llength $raddr_w_nets] + for {set addr_idx 0} {$addr_idx < $addr_width} {incr addr_idx} { + set raddr_w_net [lindex $raddr_w_nets $addr_idx] + set raddr_s_net [lindex $raddr_s_nets $addr_idx] + set reg_next_pin [lindex $reg_next_pins $addr_idx] + puts "INFO: Connecting pin '$reg_next_pin' to '$raddr_s_net's pins." + # Connect reg_next_pin to all input pins attached to raddr_s_net + replace_net_source $raddr_s_net $reg_next_pin + } + + # Connect reg_ce_src_pin to all input pins attached to read_s_net + puts "INFO: Connecting pin '$reg_ce_src_pin' to '$read_s_net's pins." + replace_net_source $read_s_net $reg_ce_src_pin + + # Create Const<1>'s pin + set vcc_pin [getOrCreateVCCPin "$inst/VCC"] + + # Connect vcc_pin to all input pins attached to is_raddr_reg_net + puts "INFO: Connecting pin '$vcc_pin' to '$is_raddr_reg_net's pins." + replace_net_source $is_raddr_reg_net $vcc_pin + } else { + puts "WARNING: Not all read addresses are registered!" + + # Create Const<0>'s pin + set gnd_pin [getOrCreateGNDPin "$inst/GND"] + + # Connect gnd_pin to all input pins attached to is_raddr_reg_net + puts "INFO: Connecting pin '$gnd_pin' to '$is_raddr_reg_net's pins." + replace_net_source $is_raddr_reg_net $gnd_pin + } + + # Remove all placeholder cells + foreach cell [find_nested_cells $inst "placeholder$"] { + remove_cell $cell + if {$debug} {puts "DEBUG: Cell '$cell' was removed successfully."} + } +} + +proc resolve_async_brams {} { + set bram_patch_cells {} + foreach cell [get_cells -hierarchical -filter {REF_NAME =~ "*VX_async_ram_patch*"}] { + puts "INFO: Found async BRAM patch cell: '$cell'." + lappend bram_patch_cells $cell + } + if {[llength $bram_patch_cells] != 0} { + foreach cell $bram_patch_cells { + resolve_async_bram $cell + } + } else { + puts "INFO: No async BRAM patch cells found in the design." + } +} + +} + +# Invoke the procedure to resolve async BRAM +vortex::resolve_async_brams diff --git a/hw/scripts/xilinx_export_netlist.tcl b/hw/scripts/xilinx_export_netlist.tcl new file mode 100644 index 000000000..25a0d17e8 --- /dev/null +++ b/hw/scripts/xilinx_export_netlist.tcl @@ -0,0 +1,71 @@ +# Function to export netlist to a Graphviz DOT file +proc export_netlist {dot_file_name} { + # Open the DOT file for writing + set dot_file [open $dot_file_name "w"] + + # Start the DOT graph definition + puts $dot_file "digraph Netlist {" + puts $dot_file "rankdir=LR;" ;# Set the graph direction from left to right + + # Extract and add cells to the graph + foreach cell [get_cells -hierarchical] { + set cell_name [get_property NAME $cell] + set cell_type [get_property REF_NAME $cell] + puts $dot_file "\"$cell_name\" \[label=\"$cell_name\\n($cell_type)\", shape=box\];" + } + + # Extract and add ports to the graph + foreach port [get_ports] { + set port_name [get_property NAME $port] + set direction [get_property DIRECTION $port] + set shape "ellipse" + + # Color code input and output ports for easier identification + if {$direction == "IN"} { + set color "lightblue" + } else { + set color "lightgreen" + } + puts $dot_file "\"$port_name\" \[label=\"$port_name\", shape=$shape, style=filled, fillcolor=$color\];" + } + + # Traverse nets and create edges between ports and pins + foreach net [get_nets -hierarchical] { + set net_name [get_property NAME $net] + + # Find source and destination pins + set source_pin "" + set sink_pins {} + + foreach pin [get_pins -of_objects $net] { + set direction [get_property DIRECTION $pin] + set cell [get_cells -of_objects $pin] + set pin_name [get_property NAME $pin] + + if {$direction == "OUT"} { + # Set as source pin + set source_pin "$cell/$pin_name" + } else { + # Collect as sink pin + lappend sink_pins "$cell/$pin_name" + } + } + + # Output edges from source to all sinks + if {$source_pin != ""} { + foreach sink_pin $sink_pins { + puts $dot_file "\"$source_pin\" -> \"$sink_pin\" \[label=\"$net_name\"\];" + } + } + } + + # End the DOT graph definition + puts $dot_file "}" + + # Close the DOT file + close $dot_file + puts "Netlist exported to DOT file: $dot_file_name" +} + +# Run the export function +export_netlist "netlist.dot" \ No newline at end of file diff --git a/hw/syn/xilinx/dut/common.mk b/hw/syn/xilinx/dut/common.mk index 933621bef..81946c88f 100644 --- a/hw/syn/xilinx/dut/common.mk +++ b/hw/syn/xilinx/dut/common.mk @@ -31,9 +31,9 @@ project_1/sources.txt: build: $(PROJECT).xpr $(PROJECT).xpr: project_1/sources.txt ifdef FPU_IP - MAX_JOBS=$(JOBS) FPU_IP=project_1/ip $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) + MAX_JOBS=$(JOBS) FPU_IP=project_1/ip SCRIPT_DIR=$(SCRIPT_DIR) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc else - MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) + MAX_JOBS=$(JOBS) SCRIPT_DIR=$(SCRIPT_DIR) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc endif clean: diff --git a/hw/syn/xilinx/dut/project.tcl b/hw/syn/xilinx/dut/project.tcl index dcaf883fa..9cb173c22 100644 --- a/hw/syn/xilinx/dut/project.tcl +++ b/hw/syn/xilinx/dut/project.tcl @@ -14,9 +14,9 @@ # Start time set start_time [clock seconds] -if { $::argc != 5 } { - puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" - puts "Usage: $::argv0 \n" +if { $::argc != 4 } { + puts "ERROR: Program \"$::argv0\" requires 4 arguments!\n" + puts "Usage: $::argv0 \n" exit } @@ -27,13 +27,16 @@ set top_module [lindex $::argv 0] set device_part [lindex $::argv 1] set vcs_file [lindex $::argv 2] set xdc_file [lindex $::argv 3] -set tool_dir [lindex $::argv 4] + +set script_dir $::env(SCRIPT_DIR) +set source_dir [file dirname [info script]] puts "Using top_module=$top_module" puts "Using device_part=$device_part" puts "Using vcs_file=$vcs_file" puts "Using xdc_file=$xdc_file" -puts "Using tool_dir=$tool_dir" +puts "Using script_dir=$script_dir" +puts "Using source_dir=$source_dir" # Set the number of jobs based on MAX_JOBS environment variable if {[info exists ::env(MAX_JOBS)]} { @@ -48,10 +51,10 @@ if {[info exists ::env(FPU_IP)]} { set ip_dir $::env(FPU_IP) set argv [list $ip_dir $device_part] set argc 2 - source ${tool_dir}/xilinx_ip_gen.tcl + source ${script_dir}/xilinx_ip_gen.tcl } -source "${tool_dir}/parse_vcs_list.tcl" +source "${script_dir}/parse_vcs_list.tcl" set vlist [parse_vcs_list "${vcs_file}"] set vsources_list [lindex $vlist 0] @@ -84,37 +87,52 @@ if {[info exists ::env(FPU_IP)]} { update_compile_order -fileset sources_1 +# Synthesis set_property top $top_module [current_fileset] + set_property \ -name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} \ -value {-mode out_of_context -flatten_hierarchy "rebuilt"} \ -objects [get_runs synth_1] -# Synthesis +# register compilation hooks +#set_property STEPS.SYNTH_DESIGN.TCL.PRE ${source_dir}/pre_synth_hook.tcl [get_runs synth_1] +#set_property STEPS.SYNTH_DESIGN.TCL.POST ${source_dir}/post_synth_hook.tcl [get_runs synth_1] +set_property STEPS.OPT_DESIGN.TCL.PRE ${script_dir}/xilinx_async_bram_patch.tcl [get_runs impl_1] +#set_property STEPS.OPT_DESIGN.TCL.POST ${source_dir}/post_opt_hook.tcl [get_runs impl_1] +#set_property STEPS.ROUTE_DESIGN.TCL.PRE ${source_dir}/pre_route_hook.tcl [get_runs impl_1] +#set_property STEPS.ROUTE_DESIGN.TCL.POST ${source_dir}/post_route_hook.tcl [get_runs impl_1] + if {$num_jobs != 0} { - launch_runs synth_1 -jobs $num_jobs + launch_runs synth_1 -verbose -jobs $num_jobs } else { - launch_runs synth_1 + launch_runs synth_1 -verbose } wait_on_run synth_1 open_run synth_1 write_checkpoint -force post_synth.dcp -report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages +report_utilization -file post_synth_util.rpt -hierarchical -hierarchical_percentages # Implementation if {$num_jobs != 0} { - launch_runs impl_1 -jobs $num_jobs + launch_runs impl_1 -verbose -jobs $num_jobs } else { - launch_runs impl_1 + launch_runs impl_1 -verbose } wait_on_run impl_1 open_run impl_1 write_checkpoint -force post_impl.dcp +report_utilization -file post_impl_util.rpt -hierarchical -hierarchical_percentages # Generate the synthesis report report_place_status -file place.rpt report_route_status -file route.rpt report_timing_summary -file timing.rpt + +# Generate timing report +report_timing -nworst 10 -delay_type max -sort_by group -file timing.rpt + +# Generate power and drc reports report_power -file power.rpt report_drc -file drc.rpt @@ -125,4 +143,4 @@ set elapsed_time [expr {[clock seconds] - $start_time}] set hours [format "%02d" [expr {$elapsed_time / 3600}]] set minutes [format "%02d" [expr {($elapsed_time % 3600) / 60}]] set seconds [format "%02d" [expr {$elapsed_time % 60}]] -puts "Total elapsed time: ${hours}h ${minutes}m ${seconds}s" \ No newline at end of file +puts "Total elapsed time: ${hours}h ${minutes}m ${seconds}s" diff --git a/hw/syn/xilinx/dut/unittest/Makefile b/hw/syn/xilinx/dut/unittest/Makefile index 1bc66aa38..3d756562e 100644 --- a/hw/syn/xilinx/dut/unittest/Makefile +++ b/hw/syn/xilinx/dut/unittest/Makefile @@ -1,4 +1,4 @@ -PROJECT = Unittest +PROJECT = VX_fifo_queue TOP_LEVEL_ENTITY = $(PROJECT) SRC_FILE = $(PROJECT).sv diff --git a/hw/syn/xilinx/sandbox/project.tcl.in b/hw/syn/xilinx/sandbox/project.tcl.in index 8926b43ad..bb1bf86f2 100644 --- a/hw/syn/xilinx/sandbox/project.tcl.in +++ b/hw/syn/xilinx/sandbox/project.tcl.in @@ -121,8 +121,8 @@ proc run_setup {} { # None # Set 'sim_1' fileset file properties for local files -set file "testbench.v" -set file_obj [get_files -of_objects [get_filesets sim_1] [list "*$file"]] + set file "testbench.v" + set file_obj [get_files -of_objects [get_filesets sim_1] [list "*$file"]] set_property -name "file_type" -value "Verilog" -objects $file_obj set_property -name "is_enabled" -value "1" -objects $file_obj set_property -name "is_global_include" -value "0" -objects $file_obj @@ -300,7 +300,7 @@ set file_obj [get_files -of_objects [get_filesets sim_1] [list "*$file"]] CONFIG.Assume_Synchronous_Clk {true} \ CONFIG.Byte_Size {8} \ CONFIG.Load_Init_File {true} \ - CONFIG.Coe_File {@CURRENTDIR@/hw/syn/xilinx/sandbox/kernel.bin.coe} \ + CONFIG.Coe_File {@BUILDDIR@/hw/syn/xilinx/sandbox/kernel.bin.coe} \ CONFIG.EN_SAFETY_CKT {true} \ CONFIG.Enable_32bit_Address {true} \ CONFIG.Fill_Remaining_Memory_Locations {false} \ diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 5d536a069..643724069 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -88,6 +88,9 @@ RTL_INCLUDE += $(FPU_INCLUDE) VPP_FLAGS += --link --target $(TARGET) --platform $(PLATFORM) --save-temps --no_ip_cache VPP_FLAGS += --vivado.synth.jobs $(JOBS) --vivado.impl.jobs $(JOBS) +# register compilation hooks +VPP_FLAGS += --xp "vivado_prop:run.impl_1.STEPS.OPT_DESIGN.TCL.PRE={$(SCRIPT_DIR)/xilinx_async_bram_patch.tcl}" + # load platform settings include $(SRC_DIR)/platforms.mk From dfc7b6178c57b8badae492fa644766ec6d7ce95c Mon Sep 17 00:00:00 2001 From: tinebp Date: Wed, 13 Nov 2024 20:56:06 -0800 Subject: [PATCH 415/488] cleanup old cache test --- hw/rtl/cache/VX_cache.sv | 4 - hw/unittest/Makefile | 3 - hw/unittest/cache/Makefile | 26 --- hw/unittest/cache/cachesim.cpp | 354 -------------------------------- hw/unittest/cache/cachesim.h | 104 ---------- hw/unittest/cache/ram.h | 77 ------- hw/unittest/cache/testbench.cpp | 248 ---------------------- 7 files changed, 816 deletions(-) delete mode 100644 hw/unittest/cache/Makefile delete mode 100644 hw/unittest/cache/cachesim.cpp delete mode 100644 hw/unittest/cache/cachesim.h delete mode 100644 hw/unittest/cache/ram.h delete mode 100644 hw/unittest/cache/testbench.cpp diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index 8c3db21f4..d8a5dbaa2 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -82,10 +82,6 @@ module VX_cache import VX_gpu_pkg::*; #( `STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter: writeback requires write enable")) `STATIC_ASSERT(WRITEBACK || !DIRTY_BYTES, ("invalid parameter: dirty bytes require writeback")) - // In writeback mode, memory fill response may issue a new memory request to handle evicted blocks. - // We need to ensure that the memory request queue never fills up to avoid deadlock. - `STATIC_ASSERT(!WRITEBACK || (MREQ_SIZE >= MSHR_SIZE), ("invalid parameter: writeback requires MREQ_SIZE >= MSHR_SIZE")) - localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS); localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS); localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); diff --git a/hw/unittest/Makefile b/hw/unittest/Makefile index f37d6ae1b..d3a74d794 100644 --- a/hw/unittest/Makefile +++ b/hw/unittest/Makefile @@ -1,5 +1,4 @@ all: - $(MAKE) -C cache $(MAKE) -C generic_queue $(MAKE) -C mem_streamer $(MAKE) -C cache_top @@ -9,7 +8,6 @@ all: $(MAKE) -C mem_unit_top run: - $(MAKE) -C cache run $(MAKE) -C generic_queue run $(MAKE) -C mem_streamer run $(MAKE) -C cache_top run @@ -19,7 +17,6 @@ run: $(MAKE) -C mem_unit_top run clean: - $(MAKE) -C cache clean $(MAKE) -C generic_queue clean $(MAKE) -C mem_streamer clean $(MAKE) -C cache_top clean diff --git a/hw/unittest/cache/Makefile b/hw/unittest/cache/Makefile deleted file mode 100644 index b734aaedd..000000000 --- a/hw/unittest/cache/Makefile +++ /dev/null @@ -1,26 +0,0 @@ -ROOT_DIR := $(realpath ../../..) -include $(ROOT_DIR)/config.mk - -PROJECT := cache - -RTL_DIR := $(VORTEX_HOME)/hw/rtl -DPI_DIR := $(VORTEX_HOME)/hw/dpi - -SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT) - -CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common -CXXFLAGS += -I$(ROOT_DIR)/hw - -SRCS := $(DPI_DIR)/util_dpi.cpp -SRCS += $(SRC_DIR)/cachesim.cpp $(SRC_DIR)/testbench.cpp - -DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE - -RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv - -RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache - -TOP := VX_cache_top - -include ../common.mk \ No newline at end of file diff --git a/hw/unittest/cache/cachesim.cpp b/hw/unittest/cache/cachesim.cpp deleted file mode 100644 index acd68419b..000000000 --- a/hw/unittest/cache/cachesim.cpp +++ /dev/null @@ -1,354 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "cachesim.h" -#include -#include -#include -#include -#include - -#ifndef TRACE_START_TIME -#define TRACE_START_TIME 0ull -#endif - -#ifndef TRACE_STOP_TIME -#define TRACE_STOP_TIME -1ull -#endif - -static uint64_t timestamp = 0; -static bool trace_enabled = false; -static uint64_t trace_start_time = TRACE_START_TIME; -static uint64_t trace_stop_time = TRACE_STOP_TIME; - -double sc_time_stamp() { - return timestamp; -} - -bool sim_trace_enabled() { - if (timestamp >= trace_start_time - && timestamp < trace_stop_time) - return true; - return trace_enabled; -} - -void sim_trace_enable(bool enable) { - trace_enabled = enable; -} - -CacheSim::CacheSim() { - // force random values for uninitialized signals - Verilated::randReset(2); - - // create RTL module instance - cache_ = new VVX_cache_top(); - -#ifdef VCD_OUTPUT - Verilated::traceEverOn(true); - tfp_ = new VerilatedVcdC; - cache_->trace(tfp_, 99); - tfp_->open("trace.vcd"); -#endif - - ram_ = nullptr; - mem_rsp_active_ = false; - snp_req_active_ = false; -} - -CacheSim::~CacheSim() { -#ifdef VCD_OUTPUT - tfp_->close(); -#endif - delete cache_; - //need to delete the req and rsp vectors -} - -void CacheSim::attach_ram(RAM* ram) { - ram_ = ram; - mem_rsp_vec_.clear(); -} - -void CacheSim::reset() { -#ifndef NDEBUG - std::cout << timestamp << ": [sim] reset()" << std::endl; -#endif - - cache_->reset = 1; - this->step(); - cache_->reset = 0; - this->step(); - - mem_rsp_vec_.clear(); - //clear req and rsp vecs - -} - -void CacheSim::step() { - //std::cout << timestamp << ": [sim] step()" << std::endl; - //toggle clock - cache_->clk = 0; - this->eval(); - - cache_->clk = 1; - this->eval(); - - //handle core and memory reqs and rsps - this->eval_reqs(); - this->eval_rsps(); - this->eval_mem_bus(); - timestamp++; -} - -void CacheSim::eval() { - cache_->eval(); -#ifdef VCD_OUTPUT - tfp_->dump(timestamp); -#endif - ++timestamp; -} - -void CacheSim::run(){ -//#ifndef NDEBUG - -//#endif - this->step(); - - int valid = 300; - int stalls = 20 + 10; - - while (valid > -1) { - - this->step(); - display_miss(); - if(cache_->core_rsp_valid){ - get_core_rsp(); - } - - if(!cache_->core_req_valid && !cache_->core_rsp_valid){ - valid--; - - } - stalls--; - if (stalls == 20){ - //stall_mem(); - //send_snoop_req(); - stalls--; - } - } -} - -void CacheSim::clear_req(){ - cache_->core_req_valid = 0; -} - -void CacheSim::send_req(core_req_t *req){ - core_req_vec_.push(req); - unsigned int *data = new unsigned int[4]; - core_rsp_vec_.insert(std::pair(req->tag, data)); -} - -bool CacheSim::get_core_req_ready(){ - return cache_->core_req_ready; -} - -bool CacheSim::get_core_rsp_ready(){ - return cache_->core_rsp_ready; -} - -void CacheSim::eval_reqs(){ - //check to see if cache is accepting reqs - if(!core_req_vec_.empty() && cache_->core_req_ready){ - core_req_t *req = core_req_vec_.front(); - - cache_->core_req_valid = req->valid; - cache_->core_req_rw = req->rw; - cache_->core_req_byteen = req->byteen; - - cache_->core_req_addr[0] = req->addr[0]; - cache_->core_req_addr[1] = req->addr[1]; - cache_->core_req_addr[2] = req->addr[2]; - cache_->core_req_addr[3] = req->addr[3]; - - cache_->core_req_data[0] = req->data[0]; - cache_->core_req_data[1] = req->data[1]; - cache_->core_req_data[2] = req->data[2]; - cache_->core_req_data[3] = req->data[3]; - - cache_->core_req_tag = req->tag; - - core_req_vec_.pop(); - - } else { - clear_req(); - } -} - -void CacheSim::eval_rsps(){ - //check to see if a request has been responded to - if (cache_->core_rsp_valid){ - core_rsp_vec_.at(cache_->core_rsp_tag)[0] = cache_->core_rsp_data[0]; - core_rsp_vec_.at(cache_->core_rsp_tag)[1] = cache_->core_rsp_data[1]; - core_rsp_vec_.at(cache_->core_rsp_tag)[2] = cache_->core_rsp_data[2]; - core_rsp_vec_.at(cache_->core_rsp_tag)[3] = cache_->core_rsp_data[3]; - } -} - -void CacheSim::stall_mem(){ - cache_->mem_req_ready = 0; -} - -void CacheSim::send_snoop_req(){ - /*cache_->snp_req_valid = 1; - cache_->snp_req_addr = 0x12222222; - cache_->snp_req_invalidate = 1; - cache_->snp_req_tag = 0xff; */ -} - -void CacheSim::eval_mem_bus() { - if (ram_ == nullptr) { - cache_->mem_req_ready = 0; - return; - } - - // schedule memory responses - int dequeue_index = -1; - for (int i = 0; i < mem_rsp_vec_.size(); i++) { - if (mem_rsp_vec_[i].cycles_left > 0) { - mem_rsp_vec_[i].cycles_left -= 1; - } - if ((dequeue_index == -1) - && (mem_rsp_vec_[i].cycles_left == 0)) { - dequeue_index = i; - } - } - - // send memory response - if (mem_rsp_active_ - && cache_->mem_rsp_valid - && cache_->mem_rsp_ready) { - mem_rsp_active_ = false; - } - if (!mem_rsp_active_) { - if (dequeue_index != -1) { //time to respond to the request - cache_->mem_rsp_valid = 1; - - //copy data from the rsp queue to the cache module - memcpy(cache_->mem_rsp_data.data(), mem_rsp_vec_[dequeue_index].data, MEM_BLOCK_SIZE); - - cache_->mem_rsp_tag = mem_rsp_vec_[dequeue_index].tag; - free(mem_rsp_vec_[dequeue_index].data); //take data out of the queue - mem_rsp_vec_.erase(mem_rsp_vec_.begin() + dequeue_index); - mem_rsp_active_ = true; - } else { - cache_->mem_rsp_valid = 0; - } - } - - // handle memory stalls - bool mem_stalled = false; -#ifdef ENABLE_MEM_STALLS - if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) { - mem_stalled = true; - } else - if (mem_rsp_vec_.size() >= MEM_RQ_SIZE) { - mem_stalled = true; - } -#endif - - // process memory requests - if (!mem_stalled) { - if (cache_->mem_req_valid) { - if (cache_->mem_req_rw) { //write = 1 - uint64_t byteen = cache_->mem_req_byteen; - uint64_t base_addr = (cache_->mem_req_addr * MEM_BLOCK_SIZE); - uint8_t* data = reinterpret_cast(cache_->mem_req_data.data()); - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - (*ram_)[base_addr + i] = data[i]; - } - } - } else { - mem_req_t mem_req; - mem_req.cycles_left = MEM_LATENCY; - mem_req.data = (uint8_t*)malloc(MEM_BLOCK_SIZE); - mem_req.tag = cache_->mem_req_tag; - ram_->read(cache_->mem_req_addr * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.data); - mem_rsp_vec_.push_back(mem_req); - } - } - } - - cache_->mem_req_ready = ~mem_stalled; -} - -bool CacheSim::assert_equal(unsigned int* data, unsigned int tag){ - int check = 0; - unsigned int *rsp = core_rsp_vec_.at(tag); - for (int i = 0; i < 4; ++i){ - for (int j = 0; j < 4; ++j){ - if (data[i] == rsp[j]){ - check++; - } - } - } - - return check; - -} - -//DEBUG - -void CacheSim::display_miss(){ - //int i = (unsigned int)cache_->miss_vec; - //std::bitset<8> x(i); - //if (i) std::cout << "Miss Vec " << x << std::endl; - //std::cout << "Miss Vec 0" << cache_->miss_vec[0] << std::endl; -} - -void CacheSim::get_core_req(unsigned int (&rsp)[4]){ - rsp[0] = cache_->core_rsp_data[0]; - rsp[1] = cache_->core_rsp_data[1]; - rsp[2] = cache_->core_rsp_data[2]; - rsp[3] = cache_->core_rsp_data[3]; - - //std::cout << std::hex << "core_rsp_valid: " << cache_->core_rsp_valid << std::endl; - //std::cout << std::hex << "core_rsp_data: " << cache_->core_rsp_data << std::endl; - //std::cout << std::hex << "core_rsp_tag: " << cache_->core_rsp_tag << std::endl; -} - -void CacheSim::get_core_rsp(){ - //std::cout << cache_->genblk5_BRA_0_KET_->bank->is_fill_in_pipe<< std::endl; - char check = cache_->core_rsp_valid; - std::cout << std::hex << "core_rsp_valid: " << (unsigned int) check << std::endl; - std::cout << std::hex << "core_rsp_data[0]: " << cache_->core_rsp_data[0] << std::endl; - std::cout << std::hex << "core_rsp_data[1]: " << cache_->core_rsp_data[1] << std::endl; - std::cout << std::hex << "core_rsp_data[2]: " << cache_->core_rsp_data[2] << std::endl; - std::cout << std::hex << "core_rsp_data[3]: " << cache_->core_rsp_data[3] << std::endl; - std::cout << std::hex << "core_rsp_tag: " << cache_->core_rsp_tag << std::endl; -} - -void CacheSim::get_mem_req(){ - std::cout << std::hex << "mem_req_valid: " << cache_->mem_req_valid << std::endl; - std::cout << std::hex << "mem_req_rw: " << cache_->mem_req_rw << std::endl; - std::cout << std::hex << "mem_req_byteen: " << cache_->mem_req_byteen << std::endl; - std::cout << std::hex << "mem_req_addr: " << cache_->mem_req_addr << std::endl; - std::cout << std::hex << "mem_req_data: " << cache_->mem_req_data << std::endl; - std::cout << std::hex << "mem_req_tag: " << cache_->mem_req_tag << std::endl; -} - -void CacheSim::get_mem_rsp(){ - std::cout << std::hex << "mem_rsp_valid: " << cache_->mem_rsp_valid << std::endl; - std::cout << std::hex << "mem_rsp_data: " << cache_->mem_rsp_data << std::endl; - std::cout << std::hex << "mem_rsp_tag: " << cache_->mem_rsp_tag << std::endl; - std::cout << std::hex << "mem_rsp_ready: " << cache_->mem_rsp_ready << std::endl; -} diff --git a/hw/unittest/cache/cachesim.h b/hw/unittest/cache/cachesim.h deleted file mode 100644 index 5235735d6..000000000 --- a/hw/unittest/cache/cachesim.h +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "VVX_cache_top.h" -#include "VVX_cache_top__Syms.h" -#include "verilated.h" - -#ifdef VCD_OUTPUT -#include -#endif - -#include -#include "ram.h" -#include -#include -#include - -#define ENABLE_MEM_STALLS -#define MEM_LATENCY 100 -#define MEM_RQ_SIZE 16 -#define MEM_STALLS_MODULO 16 - -typedef struct { - int cycles_left; - uint8_t *data; - unsigned tag; -} mem_req_t; - -typedef struct { - char valid; - char rw; - unsigned byteen; - unsigned *addr; - unsigned *data; - unsigned int tag; -} core_req_t; - -class CacheSim { -public: - - CacheSim(); - virtual ~CacheSim(); - - bool busy(); - - void reset(); - void step(); - void wait(uint32_t cycles); - void attach_ram(RAM* ram); - void run(); //run until all reqs are empty - - //req/rsp - void send_req(core_req_t *req); - void clear_req(); - void stall_mem(); - void send_snoop_req(); - void send_snp_fwd_in(); - - //assert funcs - bool assert_equal(unsigned int* data, unsigned int tag); - - //debug funcs - void get_mem_req(); - void get_core_req(unsigned int (&rsp)[4]); - void get_core_rsp(); - bool get_core_req_ready(); - bool get_core_rsp_ready(); - void get_mem_rsp(); - void display_miss(); - -private: - - void eval(); - void eval_reqs(); - void eval_rsps(); - void eval_mem_bus(); - - std::queue core_req_vec_; - std::vector mem_rsp_vec_; - std::map core_rsp_vec_; - int mem_rsp_active_; - - uint32_t snp_req_active_; - uint32_t snp_req_size_; - uint32_t pending_snp_reqs_; - - VVX_cache_top* cache_; - RAM* ram_; -#ifdef VCD_OUTPUT - VerilatedVcdC* tfp_; -#endif -}; diff --git a/hw/unittest/cache/ram.h b/hw/unittest/cache/ram.h deleted file mode 100644 index d01934a52..000000000 --- a/hw/unittest/cache/ram.h +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include - -class RAM { -private: - - mutable uint8_t *mem_[(1 << 12)]; - - uint8_t *get(uint32_t address) const { - uint32_t block_addr = address >> 20; - uint32_t block_offset = address & 0x000FFFFF; - if (mem_[block_addr] == NULL) { - mem_[block_addr] = new uint8_t[(1 << 20)]; - } - return mem_[block_addr] + block_offset; - } - -public: - - RAM() { - for (uint32_t i = 0; i < (1 << 12); i++) { - mem_[i] = NULL; - } - } - - ~RAM() { - this->clear(); - } - - size_t size() const { - return (1ull << 32); - } - - void clear() { - for (uint32_t i = 0; i < (1 << 12); i++) { - if (mem_[i]) { - delete [] mem_[i]; - mem_[i] = NULL; - } - } - } - - void read(uint32_t address, uint32_t length, uint8_t *data) const { - for (unsigned i = 0; i < length; i++) { - data[i] = *this->get(address + i); - } - } - - void write(uint32_t address, uint32_t length, const uint8_t *data) { - for (unsigned i = 0; i < length; i++) { - *this->get(address + i) = data[i]; - } - } - - uint8_t& operator[](uint32_t address) { - return *get(address); - } - - const uint8_t& operator[](uint32_t address) const { - return *get(address); - } -}; \ No newline at end of file diff --git a/hw/unittest/cache/testbench.cpp b/hw/unittest/cache/testbench.cpp deleted file mode 100644 index bf9dfb340..000000000 --- a/hw/unittest/cache/testbench.cpp +++ /dev/null @@ -1,248 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "cachesim.h" -#include -#include -#include - -#define VCD_OUTPUT 1 - - -int REQ_RSP(CacheSim *sim){ //verified - unsigned int addr[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444}; - unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333}; - unsigned int rsp[4] = {0,0,0,0}; - char responded = 0; - //write req - core_req_t* write = new core_req_t; - write->valid = 0xf; - write->rw = 0xf; - write->byteen = 0xffff; - write->addr = addr; - write->data = data; - write->tag = 0xff; - - //read req - core_req_t* read = new core_req_t; - read->valid = 0xf; - read->rw = 0; - read->byteen = 0xffff; - read->addr = addr; - read->data = addr; - read->tag = 0xff; - - // reset the device - sim->reset(); - - //queue reqs - sim->send_req(write); - sim->send_req(read); - - sim->run(); - - int check = sim->assert_equal(data, write->tag); - - if (check == 4) return 1; - - return 0; -} - -int HIT_1(CacheSim *sim){ - unsigned int addr[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444}; - unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333}; - unsigned int rsp[4] = {0,0,0,0}; - char responded = 0; - //write req - core_req_t* write = new core_req_t; - write->valid = 0xf; - write->rw = 0xf; - write->byteen = 0xffff; - write->addr = addr; - write->data = data; - write->tag = 0x11; - - //read req - core_req_t* read = new core_req_t; - read->valid = 0xf; - read->rw = 0; - read->byteen = 0xffff; - read->addr = addr; - read->data = addr; - read->tag = 0x22; - - // reset the device - sim->reset(); - - //queue reqs - sim->send_req(write); - sim->send_req(read); - - sim->run(); - - bool check = sim->assert_equal(data, write->tag); - - return check; -} - -int MISS_1(CacheSim *sim){ - unsigned int addr1[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444}; - unsigned int addr2[4] = {0x12229222, 0xabbbb4bb, 0xcddd47dd, 0xe4423544}; - unsigned int addr3[4] = {0x12223332, 0xabb454bb, 0xcdddeefd, 0xe4447744}; - unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333}; - unsigned int rsp[4] = {0,0,0,0}; - char responded = 0; - //write req - core_req_t* write = new core_req_t; - write->valid = 0xf; - write->rw = 0xf; - write->byteen = 0xffff; - write->addr = addr1; - write->data = data; - write->tag = 0xff; - - //read req - core_req_t* read1 = new core_req_t; - read1->valid = 0xf; - read1->rw = 0; - read1->byteen = 0xffff; - read1->addr = addr1; - read1->data = data; - read1->tag = 0xff; - - core_req_t* read2 = new core_req_t; - read2->valid = 0xf; - read2->rw = 0; - read2->byteen = 0xffff; - read2->addr = addr2; - read2->data = data; - read2->tag = 0xff; - - core_req_t* read3 = new core_req_t; - read3->valid = 0xf; - read3->rw = 0; - read3->byteen = 0xffff; - read3->addr = addr3; - read3->data = data; - read3->tag = 0xff; - - // reset the device - sim->reset(); - - //queue reqs - sim->send_req(write); - sim->send_req(read1); - sim->send_req(read2); - sim->send_req(read3); - - sim->run(); - - bool check = sim->assert_equal(data, write->tag); - - return check; -} -int FLUSH(CacheSim *sim){ - unsigned int addr[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444}; - unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333}; - unsigned int rsp[4] = {0,0,0,0}; - char responded = 0; - //write req - core_req_t* write = new core_req_t; - write->valid = 0xf; - write->rw = 0xf; - write->byteen = 0xffff; - write->addr = addr; - write->data = data; - write->tag = 0xff; - - //read req - core_req_t* read = new core_req_t; - read->valid = 0xf; - read->rw = 0; - read->byteen = 0xffff; - read->addr = addr; - read->data = addr; - read->tag = 0xff; - - // reset the device - sim->reset(); - - //queue reqs - sim->send_req(write); - sim->send_req(read); - - sim->run(); - - bool check = sim->assert_equal(data, write->tag); - - return check; -} - - -int BACK_PRESSURE(CacheSim *sim){ - //happens whenever the core is stalled or memory is stalled - unsigned int addr[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444}; - unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333}; - unsigned int rsp[4] = {0,0,0,0}; - char responded = 0; - - //write req - core_req_t* write = new core_req_t; - write->valid = 0xf; - write->rw = 0xf; - write->byteen = 0xffff; - write->addr = addr; - write->data = data; - write->tag = 0xff; - - //read req - core_req_t* read = new core_req_t; - read->valid = 0xf; - read->rw = 0; - read->byteen = 0xffff; - read->addr = addr; - read->data = addr; - read->tag = 0xff; - - // reset the device - sim->reset(); - - //queue reqs - for (int i = 0; i < 10; i++){ - sim->send_req(write); - } - sim->send_req(read); - - sim->run(); - - bool check = sim->assert_equal(data, write->tag); - - return check; -} - - -int main(int argc, char **argv) -{ - //init - RAM ram; - CacheSim cachesim; - cachesim.attach_ram(&ram); - int check = REQ_RSP(&cachesim); - if(check){ - std::cout << "PASSED" << std::endl; - } else { - std::cout << "FAILED" << std::endl; - } - - return 0; -} From 8230b37411dfe28fe1b59a25a5de4c7de276cf90 Mon Sep 17 00:00:00 2001 From: tinebp Date: Thu, 14 Nov 2024 11:42:21 -0800 Subject: [PATCH 416/488] fixed opae build bug --- hw/rtl/afu/opae/vortex_afu.sv | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index f21f851c0..fc4301de7 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -46,7 +46,11 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ ); localparam LMEM_DATA_WIDTH = $bits(t_local_mem_data); localparam LMEM_DATA_SIZE = LMEM_DATA_WIDTH / 8; - localparam LMEM_ADDR_WIDTH = `VX_MEM_ADDR_WIDTH + ($clog2(`VX_MEM_DATA_WIDTH) - $clog2(LMEM_DATA_WIDTH)); + localparam LMEM_ADDR_WIDTH = $bits(t_local_mem_addr); + + localparam LMEM_BYTE_ADDR_WIDTH = LMEM_ADDR_WIDTH + $clog2(LMEM_DATA_SIZE); + localparam CCI_VX_ADDR_WIDTH = `VX_MEM_ADDR_WIDTH + ($clog2(`VX_MEM_DATA_WIDTH) - $clog2(LMEM_DATA_WIDTH)); + localparam LMEM_BURST_CTRW = $bits(t_local_mem_burst_cnt); localparam CCI_DATA_WIDTH = $bits(t_ccip_clData); @@ -103,8 +107,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire [127:0] afu_id = `AFU_ACCEL_UUID; wire [63:0] dev_caps = {8'b0, - 5'(`PLATFORM_MEMORY_ADDR_WIDTH-20), - 3'(`CLOG2(`PLATFORM_MEMORY_BANKS)), + 5'(LMEM_BYTE_ADDR_WIDTH-20), + 3'(`CLOG2(NUM_LOCAL_MEM_BANKS)), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 16'(`NUM_CORES * `NUM_CLUSTERS), 8'(`NUM_WARPS), @@ -480,7 +484,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ VX_mem_bus_if #( .DATA_SIZE (LMEM_DATA_SIZE), - .ADDR_WIDTH (LMEM_ADDR_WIDTH), + .ADDR_WIDTH (CCI_VX_ADDR_WIDTH), .TAG_WIDTH (AVS_REQ_TAGW) ) cci_vx_mem_bus_if[2](); @@ -488,7 +492,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .SRC_DATA_WIDTH (CCI_DATA_WIDTH), .DST_DATA_WIDTH (LMEM_DATA_WIDTH), .SRC_ADDR_WIDTH (CCI_ADDR_WIDTH), - .DST_ADDR_WIDTH (LMEM_ADDR_WIDTH), + .DST_ADDR_WIDTH (CCI_VX_ADDR_WIDTH), .SRC_TAG_WIDTH (CCI_ADDR_WIDTH), .DST_TAG_WIDTH (AVS_REQ_TAGW), .REQ_OUT_BUF (0), @@ -538,7 +542,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH), .DST_DATA_WIDTH (LMEM_DATA_WIDTH), .SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH), - .DST_ADDR_WIDTH (LMEM_ADDR_WIDTH), + .DST_ADDR_WIDTH (CCI_VX_ADDR_WIDTH), .SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH), .DST_TAG_WIDTH (AVS_REQ_TAGW), .REQ_OUT_BUF (0), @@ -579,14 +583,14 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ //-- VX_mem_bus_if #( .DATA_SIZE (LMEM_DATA_SIZE), - .ADDR_WIDTH (LMEM_ADDR_WIDTH), + .ADDR_WIDTH (CCI_VX_ADDR_WIDTH), .TAG_WIDTH (AVS_REQ_TAGW+1) ) mem_bus_if[1](); VX_mem_arb #( .NUM_INPUTS (2), .DATA_SIZE (LMEM_DATA_SIZE), - .ADDR_WIDTH (LMEM_ADDR_WIDTH), + .ADDR_WIDTH (CCI_VX_ADDR_WIDTH), .TAG_WIDTH (AVS_REQ_TAGW), .ARBITER ("P"), // prioritize VX requests .REQ_OUT_BUF (0), @@ -602,8 +606,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ VX_avs_adapter #( .DATA_WIDTH (LMEM_DATA_WIDTH), - .ADDR_WIDTH_IN (LMEM_ADDR_WIDTH), - .ADDR_WIDTH_OUT($bits(t_local_mem_addr)), + .ADDR_WIDTH_IN (CCI_VX_ADDR_WIDTH), + .ADDR_WIDTH_OUT(LMEM_ADDR_WIDTH), .BURST_WIDTH (LMEM_BURST_CTRW), .NUM_BANKS (NUM_LOCAL_MEM_BANKS), .TAG_WIDTH (AVS_REQ_TAGW + 1), From b48b605b51eaddac879d4642021ccbe1de7656a5 Mon Sep 17 00:00:00 2001 From: tinebp Date: Fri, 15 Nov 2024 03:42:06 -0800 Subject: [PATCH 417/488] remove deprecared yosys link --- hw/syn/yosys/synth.sh | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/hw/syn/yosys/synth.sh b/hw/syn/yosys/synth.sh index b44f16e6b..76559b8d3 100755 --- a/hw/syn/yosys/synth.sh +++ b/hw/syn/yosys/synth.sh @@ -28,7 +28,7 @@ dir_list=() inc_args="" macro_args="" no_warnings=1 -process="elaborate,netlist,techmap,verilog,link" +process="elaborate,netlist,techmap,verilog" declare -a excluded_warnings=("Resizing cell port") @@ -135,11 +135,6 @@ done echo "synth -top $top_level" fi - # link design - if echo "$process" | grep -q "link"; then - echo "link_design -top $top_level" - fi - # convert to netlist if echo "$process" | grep -q "netlist"; then echo "proc; opt" From 320c090613ab4a17be410e3c1860cf689c0b3da5 Mon Sep 17 00:00:00 2001 From: tinebp Date: Tue, 19 Nov 2024 01:57:33 -0800 Subject: [PATCH 418/488] xilinx asynchronous bram patch fixes --- hw/rtl/VX_platform.vh | 3 + hw/rtl/libs/VX_async_ram_patch.sv | 244 ++++++++++++++------ hw/rtl/libs/VX_dp_ram.sv | 64 +++--- hw/rtl/libs/VX_rr_arbiter.sv | 2 +- hw/rtl/libs/VX_sp_ram.sv | 194 ++++++++-------- hw/scripts/xilinx_async_bram_patch.tcl | 301 +++++++++++++++++-------- hw/scripts/xilinx_export_netlist.tcl | 13 ++ hw/syn/xilinx/README | 3 + hw/syn/xilinx/xrt/Makefile | 1 + 9 files changed, 529 insertions(+), 296 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index d874b9b2b..08a2f6ca5 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -163,6 +163,7 @@ endgenerate `define USE_BLOCK_BRAM (* ramstyle = "block" *) `define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *) `define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *) +`define RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams on" *) `define DISABLE_BRAM (* ramstyle = "logic" *) `define PRESERVE_NET (* preserve *) `define BLACKBOX_CELL (* black_box *) @@ -173,6 +174,7 @@ endgenerate `define USE_BLOCK_BRAM (* ram_style = "block" *) `define USE_FAST_BRAM (* ram_style = "distributed" *) `define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *) +`define RW_RAM_CHECK (* rw_addr_collision = "yes" *) `define DISABLE_BRAM (* ram_style = "registers" *) `define PRESERVE_NET (* keep = "true" *) `define BLACKBOX_CELL (* black_box *) @@ -183,6 +185,7 @@ endgenerate `define USE_BLOCK_BRAM `define USE_FAST_BRAM `define NO_RW_RAM_CHECK +`define RW_RAM_CHECK `define DISABLE_BRAM `define PRESERVE_NET `define BLACKBOX_CELL diff --git a/hw/rtl/libs/VX_async_ram_patch.sv b/hw/rtl/libs/VX_async_ram_patch.sv index fd29e881d..43e8139e6 100644 --- a/hw/rtl/libs/VX_async_ram_patch.sv +++ b/hw/rtl/libs/VX_async_ram_patch.sv @@ -13,12 +13,6 @@ `include "VX_platform.vh" -`define RAM_WRITE_WREN for (integer i = 0; i < WRENW; ++i) begin \ - if (wren[i]) begin \ - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ - end \ - end - `define RAM_INITIALIZATION \ if (INIT_ENABLE != 0) begin : g_init \ if (INIT_FILE != "") begin : g_file \ @@ -32,14 +26,93 @@ end \ end -`define RAM_BYPASS(__d) \ - reg [DATAW-1:0] bypass_data_r; \ - reg bypass_valid_r; \ +`define SYNC_RAM_WF_BLOCK(__d, __re, __we, __ra, __wa) \ + `RAM_ATTRIBUTES `RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + reg [ADDRW-1:0] raddr_r; \ always @(posedge clk) begin \ - bypass_valid_r <= read_s && write && (raddr_s == waddr); \ - bypass_data_r <= wdata; \ + if (__re || __we) begin \ + if (__we) begin \ + ram[__wa] <= wdata; \ + end \ + raddr_r <= __ra; \ + end \ end \ - assign __d = bypass_valid_r ? bypass_data_r : rdata_r + assign __d = ram[raddr_r] + +`define SYNC_RAM_WF_WREN_BLOCK(__d, __re, __we, __ra, __wa) \ + `RAM_ATTRIBUTES `RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + reg [ADDRW-1:0] raddr_r; \ + always @(posedge clk) begin \ + if (__re || __we) begin \ + if (__we) begin \ + for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[__wa][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end \ + end \ + raddr_r <= __ra; \ + end \ + end \ + assign __d = ram[raddr_r] + +`define SYNC_RAM_RF_BLOCK(__d, __re, __we, __ra, __wa) \ + `RAM_ATTRIBUTES reg [DATAW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + reg [DATAW-1:0] rdata_r; \ + always @(posedge clk) begin \ + if (__re || __we) begin \ + if (__we) begin \ + ram[__wa] <= wdata; \ + end \ + rdata_r <= ram[__ra]; \ + end \ + end \ + assign __d = rdata_r + +`define SYNC_RAM_RF_WREN_BLOCK(__d, __re, __we, __ra, __wa) \ + `RAM_ATTRIBUTES reg [DATAW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + reg [DATAW-1:0] rdata_r; \ + always @(posedge clk) begin \ + if (__re || __we) begin \ + if (__we) begin \ + for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[__wa][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end \ + end \ + rdata_r <= ram[__ra]; \ + end \ + end \ + assign __d = rdata_r + +`define ASYNC_RAM_BLOCK(__d, __we, __ra, __wa) \ + `RAM_ATTRIBUTES reg [DATAW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + always @(posedge clk) begin \ + if (__we) begin \ + ram[__wa] <= wdata; \ + end \ + end \ + assign __d = ram[__ra] + +`define ASYNC_RAM_BLOCK_WREN(__d, __we, __ra, __wa) \ + `RAM_ATTRIBUTES reg [DATAW-1:0] ram [0:SIZE-1]; \ + `RAM_INITIALIZATION \ + always @(posedge clk) begin \ + if (__we) begin \ + for (integer i = 0; i < WRENW; ++i) begin \ + if (wren[i]) begin \ + ram[__wa][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \ + end \ + end \ + end \ + end \ + assign __d = ram[__ra] `TRACING_OFF module VX_async_ram_patch #( @@ -47,6 +120,8 @@ module VX_async_ram_patch #( parameter SIZE = 1, parameter WRENW = 1, parameter DUAL_PORT = 0, + parameter FORCE_BRAM = 0, + parameter WRITE_FIRST = 0, parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, @@ -79,77 +154,102 @@ module VX_async_ram_patch #( .out ({raddr_s, read_s, is_raddr_reg}) ); - // synchroneous ram + wire [DATAW-1:0] rdata_s, rdata_a; - wire [DATAW-1:0] rdata_s; - - if (WRENW != 1) begin : g_wren_sync_ram - `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; - reg [DATAW-1:0] rdata_r; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (read_s || write) begin - if (write) begin - `RAM_WRITE_WREN + if (1) begin : g_sync_ram + if (WRENW != 1) begin : g_wren + if (FORCE_BRAM) begin : g_bram + if (WRITE_FIRST) begin : g_write_first + `define RAM_ATTRIBUTES `USE_BLOCK_BRAM + `SYNC_RAM_WF_WREN_BLOCK(rdata_s, read_s, write, raddr_s, waddr); + `undef RAM_ATTRIBUTES + end else begin : g_read_first + `define RAM_ATTRIBUTES `USE_BLOCK_BRAM + `SYNC_RAM_RF_WREN_BLOCK(rdata_s, read_s, write, raddr_s, waddr); + `undef RAM_ATTRIBUTES + end + end else begin : g_lutram + if (WRITE_FIRST) begin : g_write_first + `define RAM_ATTRIBUTES + `SYNC_RAM_WF_WREN_BLOCK(rdata_s, read_s, write, raddr_s, waddr); + `undef RAM_ATTRIBUTES + end else begin : g_read_first + `define RAM_ATTRIBUTES + `SYNC_RAM_RF_WREN_BLOCK(rdata_s, read_s, write, raddr_s, waddr); + `undef RAM_ATTRIBUTES + end + end + end else begin : g_no_wren + if (FORCE_BRAM) begin : g_bram + if (WRITE_FIRST) begin : g_write_first + `define RAM_ATTRIBUTES `USE_BLOCK_BRAM + `SYNC_RAM_WF_BLOCK(rdata_s, read_s, write, raddr_s, waddr); + `undef RAM_ATTRIBUTES + end else begin : g_read_first + `define RAM_ATTRIBUTES `USE_BLOCK_BRAM + `SYNC_RAM_RF_BLOCK(rdata_s, read_s, write, raddr_s, waddr); + `undef RAM_ATTRIBUTES + end + end else begin : g_lutram + if (WRITE_FIRST) begin : g_write_first + `define RAM_ATTRIBUTES + `SYNC_RAM_WF_BLOCK(rdata_s, read_s, write, raddr_s, waddr); + `undef RAM_ATTRIBUTES + end else begin : g_read_first + `define RAM_ATTRIBUTES + `SYNC_RAM_RF_BLOCK(rdata_s, read_s, write, raddr_s, waddr); + `undef RAM_ATTRIBUTES end - rdata_r <= ram[raddr_s]; end end - `RAM_BYPASS(rdata_s); - end else begin : g_no_wren_sync_ram - `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; - reg [DATAW-1:0] rdata_r; - `RAM_INITIALIZATION - `UNUSED_VAR (wren) - always @(posedge clk) begin - if (read_s || write) begin - if (write) begin - ram[waddr] <= wdata; - end - rdata_r <= ram[raddr_s]; - end - end - `RAM_BYPASS(rdata_s); end - // asynchronous ram (fallback) - - wire [DATAW-1:0] rdata_a; - - if (DUAL_PORT != 0) begin : g_dp_async_ram - reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - if (WRENW != 1) begin : g_wren - always @(posedge clk) begin - if (write) begin - `RAM_WRITE_WREN + if (1) begin : g_async_ram + if (DUAL_PORT != 0) begin : g_dp + if (WRENW != 1) begin : g_wren + if (WRITE_FIRST) begin : g_write_first + `define RAM_ATTRIBUTES `RW_RAM_CHECK + `ASYNC_RAM_BLOCK_WREN(rdata_a, write, raddr, waddr); + `undef RAM_ATTRIBUTES + end else begin : g_read_first + `define RAM_ATTRIBUTES `NO_RW_RAM_CHECK + `ASYNC_RAM_BLOCK_WREN(rdata_a, write, raddr, waddr); + `undef RAM_ATTRIBUTES + end + end else begin : g_no_wren + if (WRITE_FIRST) begin : g_write_first + `define RAM_ATTRIBUTES `RW_RAM_CHECK + `ASYNC_RAM_BLOCK(rdata_a, write, raddr, waddr); + `undef RAM_ATTRIBUTES + end else begin : g_read_first + `define RAM_ATTRIBUTES `NO_RW_RAM_CHECK + `ASYNC_RAM_BLOCK(rdata_a, write, raddr, waddr); + `undef RAM_ATTRIBUTES end end - end else begin : g_no_wren - always @(posedge clk) begin - if (write) begin - ram[waddr] <= wdata; + end else begin : g_sp + if (WRENW != 1) begin : g_wren + if (WRITE_FIRST) begin : g_write_first + `define RAM_ATTRIBUTES `RW_RAM_CHECK + `ASYNC_RAM_BLOCK_WREN(rdata_a, write, waddr, waddr); + `undef RAM_ATTRIBUTES + end else begin : g_read_first + `define RAM_ATTRIBUTES `NO_RW_RAM_CHECK + `ASYNC_RAM_BLOCK_WREN(rdata_a, write, waddr, waddr); + `undef RAM_ATTRIBUTES + end + end else begin : g_no_wren + if (WRITE_FIRST) begin : g_write_first + `define RAM_ATTRIBUTES `RW_RAM_CHECK + `ASYNC_RAM_BLOCK(rdata_a, write, waddr, waddr); + `undef RAM_ATTRIBUTES + end else begin : g_read_first + `define RAM_ATTRIBUTES `NO_RW_RAM_CHECK + `ASYNC_RAM_BLOCK(rdata_a, write, waddr, waddr); + `undef RAM_ATTRIBUTES end end end - assign rdata_a = ram[raddr]; - end else begin : g_sp_async_ram - reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - if (WRENW != 1) begin : g_wren - always @(posedge clk) begin - if (write) begin - `RAM_WRITE_WREN - end - end - end else begin : g_no_wren - always @(posedge clk) begin - if (write) begin - ram[waddr] <= wdata; - end - end - end - assign rdata_a = ram[waddr]; end assign rdata = is_raddr_reg ? rdata_s : rdata_a; diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 0cff67882..2cb88efe5 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -80,7 +80,7 @@ module VX_dp_ram #( if (FORCE_BRAM) begin : g_bram if (RDW_MODE == "W") begin : g_write_first if (WRENW != 1) begin : g_wren - (* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN `RAM_INITIALIZATION reg [ADDRW-1:0] raddr_r; always @(posedge clk) begin @@ -93,7 +93,7 @@ module VX_dp_ram #( end assign rdata = ram[raddr_r]; end else begin : g_no_wren - (* rw_addr_collision = "yes" *) `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION reg [ADDRW-1:0] raddr_r; always @(posedge clk) begin @@ -166,7 +166,7 @@ module VX_dp_ram #( end else begin : g_auto if (RDW_MODE == "W") begin : g_write_first if (WRENW != 1) begin : g_wren - (* rw_addr_collision = "yes" *) `RAM_ARRAY_WREN + `RW_RAM_CHECK `RAM_ARRAY_WREN `RAM_INITIALIZATION reg [ADDRW-1:0] raddr_r; always @(posedge clk) begin @@ -179,7 +179,7 @@ module VX_dp_ram #( end assign rdata = ram[raddr_r]; end else begin : g_no_wren - (* rw_addr_collision = "yes" *) reg [DATAW-1:0] ram [0:SIZE-1]; + `RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION reg [ADDRW-1:0] raddr_r; always @(posedge clk) begin @@ -220,7 +220,7 @@ module VX_dp_ram #( end assign rdata = rdata_r; end - end else begin + end else begin : g_undefined if (WRENW != 1) begin : g_wren `RAM_ARRAY_WREN `RAM_INITIALIZATION @@ -253,30 +253,32 @@ module VX_dp_ram #( end else begin : g_async `UNUSED_VAR (read) if (FORCE_BRAM) begin : g_bram + `ifdef VIVADO + VX_async_ram_patch #( + .DATAW (DATAW), + .SIZE (SIZE), + .WRENW (WRENW), + .DUAL_PORT (1), + .FORCE_BRAM (FORCE_BRAM), + .WRITE_FIRST(RDW_MODE == "W"), + .INIT_ENABLE(INIT_ENABLE), + .INIT_FILE (INIT_FILE), + .INIT_VALUE (INIT_VALUE) + ) async_ram_patch ( + .clk (clk), + .reset (reset), + .read (read), + .write (write), + .wren (wren), + .waddr (waddr), + .wdata (wdata), + .raddr (raddr), + .rdata (rdata) + ); + `else if (RDW_MODE == "W") begin : g_write_first - `ifdef VIVADO - VX_async_ram_patch #( - .DATAW (DATAW), - .SIZE (SIZE), - .WRENW (WRENW), - .DUAL_PORT (1), - .INIT_ENABLE(INIT_ENABLE), - .INIT_FILE (INIT_FILE), - .INIT_VALUE (INIT_VALUE) - ) async_ram_patch ( - .clk (clk), - .reset (reset), - .read (read), - .write (write), - .wren (wren), - .waddr (waddr), - .wdata (wdata), - .raddr (raddr), - .rdata (rdata) - ); - `else if (WRENW != 1) begin : g_wren - `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -285,7 +287,7 @@ module VX_dp_ram #( end assign rdata = ram[raddr]; end else begin : g_no_wren - `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -294,7 +296,6 @@ module VX_dp_ram #( end assign rdata = ram[raddr]; end - `endif end else begin : g_read_first if (WRENW != 1) begin : g_wren `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN @@ -316,10 +317,11 @@ module VX_dp_ram #( assign rdata = ram[raddr]; end end + `endif end else begin : g_auto if (RDW_MODE == "W") begin : g_write_first if (WRENW != 1) begin : g_wren - `RAM_ARRAY_WREN + `RW_RAM_CHECK `RAM_ARRAY_WREN `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -328,7 +330,7 @@ module VX_dp_ram #( end assign rdata = ram[raddr]; end else begin : g_no_wren - reg [DATAW-1:0] ram [0:SIZE-1]; + `RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 1d3b479bf..c86da584a 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -485,7 +485,7 @@ module VX_rr_arbiter #( .D (NUM_REQS) ) grant_decoder ( .sel_in (grant_index), - .data_in (1'b1), + .data_in (grant_valid), .data_out (grant_onehot) ); diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index 88b922384..3c673e462 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -77,37 +77,9 @@ module VX_sp_ram #( localparam FORCE_BRAM = !LUTRAM && (SIZE * DATAW >= `MAX_LUTRAM); if (OUT_REG) begin : g_sync if (FORCE_BRAM) begin : g_bram - if (RDW_MODE == "R") begin : g_read_first + if (RDW_MODE == "W") begin : g_write_first if (WRENW != 1) begin : g_wren - `USE_BLOCK_BRAM `RAM_ARRAY_WREN - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - `RAM_WRITE_WREN - end - rdata_r <= ram[addr]; - end - end - assign rdata = rdata_r; - end else begin : g_no_wren - `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - ram[addr] <= wdata; - end - rdata_r <= ram[addr]; - end - end - assign rdata = rdata_r; - end - end else if (RDW_MODE == "W") begin : g_write_first - if (WRENW != 1) begin : g_wren - `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN `RAM_INITIALIZATION reg [ADDRW-1:0] addr_r; always @(posedge clk) begin @@ -135,6 +107,34 @@ module VX_sp_ram #( end assign rdata = rdata_r; end + end else if (RDW_MODE == "R") begin : g_read_first + if (WRENW != 1) begin : g_wren + `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; + end else begin : g_no_wren + `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[addr] <= wdata; + end + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; + end end else if (RDW_MODE == "N") begin : g_no_change if (WRENW != 1) begin : g_wren `USE_BLOCK_BRAM `RAM_ARRAY_WREN @@ -165,7 +165,7 @@ module VX_sp_ram #( end assign rdata = rdata_r; end - end else if (RDW_MODE == "U") begin : g_unknown + end else if (RDW_MODE == "U") begin : g_undefined if (WRENW != 1) begin : g_wren `USE_BLOCK_BRAM `RAM_ARRAY_WREN `RAM_INITIALIZATION @@ -195,35 +195,7 @@ module VX_sp_ram #( end end end else begin : g_auto - if (RDW_MODE == "R") begin : g_read_first - if (WRENW != 1) begin : g_wren - `RAM_ARRAY_WREN - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - `RAM_WRITE_WREN - end - rdata_r <= ram[addr]; - end - end - assign rdata = rdata_r; - end else begin : g_no_wren - reg [DATAW-1:0] ram [0:SIZE-1]; - `RAM_INITIALIZATION - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (read || write) begin - if (write) begin - ram[addr] <= wdata; - end - rdata_r <= ram[addr]; - end - end - assign rdata = rdata_r; - end - end else if (RDW_MODE == "W") begin : g_write_first + if (RDW_MODE == "W") begin : g_write_first if (WRENW != 1) begin : g_wren `RAM_ARRAY_WREN `RAM_INITIALIZATION @@ -253,6 +225,34 @@ module VX_sp_ram #( end assign rdata = rdata_r; end + end else if (RDW_MODE == "R") begin : g_read_first + if (WRENW != 1) begin : g_wren + `RAM_ARRAY_WREN + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + `RAM_WRITE_WREN + end + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; + end else begin : g_no_wren + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + if (write) begin + ram[addr] <= wdata; + end + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; + end end else if (RDW_MODE == "N") begin : g_no_change if (WRENW != 1) begin : g_wren `RAM_ARRAY_WREN @@ -283,7 +283,7 @@ module VX_sp_ram #( end assign rdata = rdata_r; end - end else if (RDW_MODE == "U") begin : g_unknown + end else if (RDW_MODE == "U") begin : g_undefined if (WRENW != 1) begin : g_wren `RAM_ARRAY_WREN `RAM_INITIALIZATION @@ -316,30 +316,32 @@ module VX_sp_ram #( end else begin : g_async `UNUSED_VAR (read) if (FORCE_BRAM) begin : g_bram + `ifdef VIVADO + VX_async_ram_patch #( + .DATAW (DATAW), + .SIZE (SIZE), + .WRENW (WRENW), + .DUAL_PORT (0), + .FORCE_BRAM (FORCE_BRAM), + .WRITE_FIRST(RDW_MODE == "W"), + .INIT_ENABLE(INIT_ENABLE), + .INIT_FILE (INIT_FILE), + .INIT_VALUE (INIT_VALUE) + ) async_ram_patch ( + .clk (clk), + .reset (reset), + .read (read), + .write (write), + .wren (wren), + .waddr (addr), + .wdata (wdata), + .raddr (addr), + .rdata (rdata) + ); + `else if (RDW_MODE == "W") begin : g_write_first - `ifdef VIVADO - VX_async_ram_patch #( - .DATAW (DATAW), - .SIZE (SIZE), - .WRENW (WRENW), - .DUAL_PORT (0), - .INIT_ENABLE(INIT_ENABLE), - .INIT_FILE (INIT_FILE), - .INIT_VALUE (INIT_VALUE) - ) async_ram_patch ( - .clk (clk), - .reset (reset), - .read (read), - .write (write), - .wren (wren), - .waddr (addr), - .wdata (wdata), - .raddr (addr), - .rdata (rdata) - ); - `else if (WRENW != 1) begin : g_wren - `USE_BLOCK_BRAM `RAM_ARRAY_WREN + `RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -348,7 +350,7 @@ module VX_sp_ram #( end assign rdata = ram[addr]; end else begin : g_no_wren - `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RW_RAM_CHECK `USE_BLOCK_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -357,7 +359,6 @@ module VX_sp_ram #( end assign rdata = ram[addr]; end - `endif end else begin : g_read_first if (WRENW != 1) begin : g_wren `NO_RW_RAM_CHECK `USE_BLOCK_BRAM `RAM_ARRAY_WREN @@ -379,10 +380,11 @@ module VX_sp_ram #( assign rdata = ram[addr]; end end + `endif end else begin : g_auto if (RDW_MODE == "W") begin : g_write_first if (WRENW != 1) begin : g_wren - `RAM_ARRAY_WREN + `RW_RAM_CHECK `RAM_ARRAY_WREN `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -391,7 +393,7 @@ module VX_sp_ram #( end assign rdata = ram[addr]; end else begin : g_no_wren - reg [DATAW-1:0] ram [0:SIZE-1]; + `RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -443,15 +445,7 @@ module VX_sp_ram #( end if (OUT_REG) begin : g_sync - if (RDW_MODE == "R") begin : g_read_first - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (read || write) begin - rdata_r <= ram[addr]; - end - end - assign rdata = rdata_r; - end else if (RDW_MODE == "W") begin : g_write_first + if (RDW_MODE == "W") begin : g_write_first reg [ADDRW-1:0] addr_r; always @(posedge clk) begin if (read || write) begin @@ -459,6 +453,14 @@ module VX_sp_ram #( end end assign rdata = ram[addr_r]; + end else if (RDW_MODE == "R") begin : g_read_first + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (read || write) begin + rdata_r <= ram[addr]; + end + end + assign rdata = rdata_r; end else if (RDW_MODE == "N") begin : g_no_change reg [DATAW-1:0] rdata_r; always @(posedge clk) begin diff --git a/hw/scripts/xilinx_async_bram_patch.tcl b/hw/scripts/xilinx_async_bram_patch.tcl index 5af7ba953..f0a49ecd6 100644 --- a/hw/scripts/xilinx_async_bram_patch.tcl +++ b/hw/scripts/xilinx_async_bram_patch.tcl @@ -1,3 +1,16 @@ +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + namespace eval vortex { variable debug 0 @@ -17,6 +30,25 @@ proc str_replace {str match repl} { return $result } +proc regex_escape {str} { + return [string map { + \\ \\\\ + ^ \\^ + . \\. + \[ \\\[ + \] \\\] + \$ \\\$ + \( \\\( + \) \\\) + | \\| + * \\* + + \\+ + ? \\? + \{ \\\{ + \} \\\} + } $str] +} + proc unique_cell_name {name} { if {[get_cells -quiet $name] == {}} { return $name } set index 0 @@ -31,31 +63,60 @@ proc unique_net_name {name} { return ${name}_${index} } -proc find_nested_cells {parent name_match {should_exist 1}} { +proc build_parent_child_map {all_cells} { + set parent_child_map {} + foreach cell $all_cells { + set parent [get_property PARENT $cell] + if {$parent ne ""} { + if {[dict exists $parent_child_map $parent]} { + dict lappend parent_child_map $parent $cell + } else { + dict set parent_child_map $parent [list $cell] + } + } + } + return $parent_child_map +} + +proc find_cell_descendants_recursive {parent_cell parent_child_map} { + set descendants {} + if {[dict exists $parent_child_map $parent_cell]} { + set children [dict get $parent_child_map $parent_cell] + foreach child $children { + # Add the child to the list + lappend descendants $child + # Recursively add its descendants + set sub_descendants [find_cell_descendants_recursive $child $parent_child_map] + lappend descendants {*}$sub_descendants + } + } + return $descendants +} + +proc find_cell_descendants {parent_cell} { + set all_cells [get_cells -hierarchical] + set parent_child_map [build_parent_child_map $all_cells] + return [find_cell_descendants_recursive $parent_cell $parent_child_map] +} + +proc find_nested_cells {parent_cell name_match {should_exist 1}} { + set hier_sep [get_hierarchy_separator] set matching_cells {} - foreach cell [get_cells -hierarchical -include_replicated_objects -filter "PARENT == $parent"] { - set name [get_property NAME $cell] - if {[regexp $name_match $name]} { + foreach cell [find_cell_descendants $parent_cell] { + set parent_name [get_property PARENT $cell] + set cell_name [get_property NAME $cell] + set name_prefix [regex_escape "${parent_name}${hier_sep}"] + set pattern "${name_prefix}${name_match}" + if {[regexp $pattern $cell_name]} { lappend matching_cells $cell } } if {[llength $matching_cells] == 0} { - print_error "No matching cell found for '$parent' matching '$name_match'." $should_exist + print_error "No matching cell found for '$parent_cell' matching '$name_match'." $should_exist } return $matching_cells } -proc find_nested_cell {parent name_match} { - foreach cell [get_cells -hierarchical -filter "PARENT == $parent"] { - set name [get_property NAME $cell] - if {$name == $name_match} { - return $cell - } - } - puts "ERROR: No matching cell found for '$parent' matching '$name_match'." - exit -1 -} - proc find_cell_nets {cell name_match {should_exist 1}} { set matching_nets {} foreach net [get_nets -hierarchical -filter "PARENT_CELL == $cell"] { @@ -70,22 +131,23 @@ proc find_cell_nets {cell name_match {should_exist 1}} { return $matching_nets } -proc get_cell_net {cell name_match} { - foreach net [get_nets -hierarchical -filter "PARENT_CELL == $cell"] { - set name [get_property NAME $net] - if {$name == $name_match} { - return $net - } +proc get_cell_net {cell name} { + set net [get_nets -hierarchical -filter "PARENT_CELL == $cell && NAME == $name"] + if {[llength $net] == 0} { + puts "ERROR: No matching net found for '$cell' matching '$name'." + exit -1 } - puts "ERROR: No matching net found for '$cell' matching '$name_match'." - exit -1 + return $net; } proc find_cell_pins {cell name_match {should_exist 1}} { + set hier_sep [get_hierarchy_separator] set matching_pins {} foreach pin [get_pins -of_objects $cell] { set name [get_property NAME $pin] - if {[regexp $name_match $name]} { + set name_prefix [regex_escape "${cell}${hier_sep}"] + set pattern "${name_prefix}${name_match}" + if {[regexp $pattern $name]} { lappend matching_pins $pin } } @@ -95,15 +157,31 @@ proc find_cell_pins {cell name_match {should_exist 1}} { return $matching_pins } -proc get_cell_pin {cell name_match} { - foreach pin [get_pins -of_objects $cell] { - set name [get_property NAME $pin] - if {$name == $name_match} { - return $pin - } +proc get_cell_pin {cell name} { + set pin [get_pins -of_objects $cell -filter "NAME == $name"] + if {[llength $pin] == 0} { + puts "ERROR: No matching pin found for '$cell' matching '$name'." + exit -1 } - puts "ERROR: No matching pin found for '$cell' matching '$name_match'." - exit -1 + return $pin +} + +proc remove_cell_from_netlist {cell} { + variable debug + + puts "INFO: Removing cell '$cell' from the netlist." + + # Disconnect all pins of the cell + #foreach pin [get_pins -quiet -of_objects $cell] { + # foreach net [get_nets -quiet -of_objects $pin] { + # disconnect_net -net $net -objects $pin + # if {$debug} {puts "DEBUG: Disconnected net '$net' from pin '$pin'."} + # } + #} + + # Remove the cell + remove_cell $cell + if {$debug} {puts "DEBUG: Cell '$cell' was removed successfully."} } proc replace_pin_source {pin source_pin} { @@ -141,10 +219,42 @@ proc replace_pin_source {pin source_pin} { if {$debug} {puts "DEBUG: Connected net '$source_net' to pin '$pin'."} } -proc create_register_next {reg_cell prefix_name} { +proc find_net_driver {input_net {should_exist 1}} { + set driverPins [get_pins -quiet -leaf -of_objects $input_net -filter {DIRECTION == "OUT"}] + if {[llength $driverPins] == 0} { + set driverPorts [get_ports -quiet -of_objects $input_net -filter {DIRECTION == "IN"}] + if {[llength $driverPorts] == 0} { + print_error "No driver found for '$input_net'." $should_exist + } elseif {[llength $driverPorts] > 1} { + puts "WARNING: Multiple driver ports found for '$input_net'." + return [lindex $driverPorts 0] + } + return $driverPorts + } elseif {[llength $driverPins] > 1} { + puts "WARNING: Multiple driver pins found for '$input_net'." + return [lindex $driverPins 0] + } + return $driverPins +} + +proc find_pin_driver {input_pin {should_exist 1}} { + set net [get_nets -quiet -of_objects $input_pin] + if {[llength $net] == 0} { + print_error "No net connected to pin '$input_pin'." $should_exist + return "" + } elseif {[llength $net] > 1} { + puts "ERROR: Multiple nets connected to pin '$input_pin'." + exit -1 + } + return [find_net_driver $net] +} + +proc create_register_next {parent reg_cell} { variable debug - set reg_d_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/D"}] + set hier_sep [get_hierarchy_separator] + + set reg_d_pin [get_pins "${reg_cell}${hier_sep}D"] if {[llength $reg_d_pin] == 0} { puts "ERROR: No D pin found on register cell '$reg_cell'." exit -1 @@ -167,7 +277,7 @@ proc create_register_next {reg_cell prefix_name} { set register_type [get_property REF_NAME $reg_cell] if {$register_type == "FDRE"} { - set reg_r_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/R"}] + set reg_r_pin [get_pins "${reg_cell}${hier_sep}R"] if {[llength $reg_r_pin] == 0} { puts "ERROR: No R pin found on FDRE cell '$reg_cell'." exit -1 @@ -184,7 +294,7 @@ proc create_register_next {reg_cell prefix_name} { exit -1 } } elseif {$register_type == "FDSE"} { - set reg_s_pin [get_pins -of_objects $reg_cell -filter {NAME =~ "*/S"}] + set reg_s_pin [get_pins "${reg_cell}${hier_sep}S"] if {[llength $reg_s_pin] == 0} { puts "ERROR: No S pin found on FDSE cell '$reg_cell'." exit -1 @@ -229,7 +339,7 @@ proc create_register_next {reg_cell prefix_name} { # Use a 2x1 LUT to describe the logic: # FDRE: O = I1 ? 0 : I0; where I0=D, I1=R # FDSE: O = I1 ? 1 : I0; where I0=D, I1=S - set lut_name [unique_cell_name $prefix_name] + set lut_name [unique_cell_name "${parent}${hier_sep}raddr_next"] set lut_cell [create_cell -reference LUT2 $lut_name] puts "INFO: Created lut cell: '$lut_cell'" @@ -242,7 +352,7 @@ proc create_register_next {reg_cell prefix_name} { exit 1 } - set lut_i0_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/I0"}] + set lut_i0_pin [get_pins "${lut_cell}${hier_sep}I0"] if {[llength $lut_i0_pin] == 0} { puts "ERROR: No I0 pin found on FDSE cell '$lut_cell'." exit -1 @@ -251,7 +361,7 @@ proc create_register_next {reg_cell prefix_name} { exit -1 } - set lut_i1_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/I1"}] + set lut_i1_pin [get_pins "${lut_cell}${hier_sep}I1"] if {[llength $lut_i1_pin] == 0} { puts "ERROR: No I1 pin found on FDSE cell '$lut_cell'." exit -1 @@ -260,7 +370,7 @@ proc create_register_next {reg_cell prefix_name} { exit -1 } - set lut_o_pin [get_pins -of_objects $lut_cell -filter {NAME =~ "*/O"}] + set lut_o_pin [get_pins "${lut_cell}${hier_sep}O"] if {[llength $lut_o_pin] == 0} { puts "ERROR: No O pin found on FDSE cell '$lut_cell'." exit -1 @@ -278,19 +388,22 @@ proc create_register_next {reg_cell prefix_name} { return $lut_o_pin } -proc getOrCreateVCCPin {prefix_name} { +proc getOrCreateVCCPin {parent} { variable debug - set vcc_cell "" - set vcc_cells [get_cells -quiet -filter {REF_NAME == VCC}] - if {[llength $vcc_cells] == 0} { - set cell_name [unique_cell_name $prefix_name] + set hier_sep [get_hierarchy_separator] + set cell_name "${parent}${hier_sep}VCC" + + set vcc_cell [get_cells -quiet $cell_name] + if {[llength $vcc_cell] == 0} { set vcc_cell [create_cell -reference VCC $cell_name] puts "INFO: Created VCC cell: '$vcc_cell'" - } else { - set vcc_cell [lindex $vcc_cells 0] + } elseif {[llength $vcc_cell] > 1} { + puts "ERROR: Multiple VCC cells found with name '$cell_name'." + exit -1 } - set vcc_pin [get_pins -of_objects $vcc_cell -filter {NAME =~ "*/P"}] + + set vcc_pin [get_pins "${vcc_cell}${hier_sep}P"] if {[llength $vcc_pin] == 0} { puts "ERROR: No VCC pin found on VCC cell '$vcc_cell'." exit -1 @@ -298,22 +411,26 @@ proc getOrCreateVCCPin {prefix_name} { puts "ERROR: Multiple VCC pins found on VCC cell '$vcc_cell'." exit -1 } + return $vcc_pin } -proc getOrCreateGNDPin {prefix_name} { +proc getOrCreateGNDPin {parent} { variable debug - set gnd_cell "" - set gnd_cells [get_cells -quiet -filter {REF_NAME == GND}] - if {[llength $gnd_cells] == 0} { - set cell_name [unique_cell_name $prefix_name] + set hier_sep [get_hierarchy_separator] + set cell_name "${parent}${hier_sep}GND" + + set gnd_cell [get_cells -quiet $cell_name] + if {[llength $gnd_cell] == 0} { set gnd_cell [create_cell -reference GND $cell_name] puts "INFO: Created GND cell: '$gnd_cell'" - } else { - set gnd_cell [lindex $gnd_cells 0] + } elseif {[llength $gnd_cell] > 1} { + puts "ERROR: Multiple GND cells found with name '$cell_name'." + exit -1 } - set gnd_pin [get_pins -of_objects $gnd_cell -filter {NAME =~ "*/G"}] + + set gnd_pin [get_pins "${gnd_cell}${hier_sep}G"] if {[llength $gnd_pin] == 0} { puts "ERROR: No GND pin found on GND cell '$gnd_cell'." exit -1 @@ -321,6 +438,7 @@ proc getOrCreateGNDPin {prefix_name} { puts "ERROR: Multiple GND pins found on GND cell '$gnd_cell'." exit -1 } + return $gnd_pin } @@ -338,35 +456,6 @@ proc find_net_sinks {input_net {should_exist 1}} { return $sink_pins } -proc find_net_driver {input_net {should_exist 1}} { - set driverPins [get_pins -quiet -leaf -of_objects $input_net -filter {DIRECTION == "OUT"}] - if {[llength $driverPins] == 0} { - set driverPorts [get_ports -quiet -of_objects $input_net -filter {DIRECTION == "IN"}] - if {[llength $driverPorts] == 0} { - print_error "No driver found for '$input_net'." $should_exist - } elseif {[llength $driverPorts] > 1} { - puts "WARNING: Multiple driver ports found for '$input_net'." - return [lindex $driverPorts 0] - } - return $driverPorts - } elseif {[llength $driverPins] > 1} { - puts "WARNING: Multiple driver pins found for '$input_net'." - return [lindex $driverPins 0] - } - return $driverPins -} - -proc find_pin_driver {input_pin {should_exist 1}} { - set net [get_nets -quiet -of_objects $input_pin] - if {[llength $net] == 0} { - print_error "No net connected to pin '$input_pin'." $should_exist - } elseif {[llength $net] > 1} { - puts "ERROR: Multiple nets connected to pin '$input_pin'." - exit -1 - } - return [find_net_driver $net] -} - proc find_matching_nets {cell nets match repl} { set matching_nets {} foreach net $nets { @@ -386,6 +475,25 @@ proc find_matching_nets {cell nets match repl} { return $matching_nets } +proc find_matching_pins {cell pins match repl} { + set matching_pins {} + foreach pin $pins { + set pin_name [str_replace $pin $match $repl] + set matching_pin [get_cell_pin $cell $pin_name] + if {$matching_pin != ""} { + lappend matching_pins $matching_pin + } + } + if {[llength $matching_pins] == 0} { + puts "ERROR: No matching pins found for '$pins'." + exit -1 + } elseif {[llength $matching_pins] != [llength $pins]} { + puts "ERROR: Mismatch in number of matching pins." + exit -1 + } + return $matching_pins +} + proc replace_net_source {net source_pin} { foreach pin [find_net_sinks $net 0] { replace_pin_source $pin $source_pin @@ -397,6 +505,8 @@ proc resolve_async_bram {inst} { puts "INFO: Resolving asynchronous BRAM patch: '$inst'." + set hier_sep [get_hierarchy_separator] + set raddr_w_nets [find_cell_nets $inst "raddr_w(\\\[\\d+\\\])?$"] set read_s_net [find_cell_nets $inst "read_s$"] set is_raddr_reg_net [find_cell_nets $inst "is_raddr_reg$"] @@ -433,7 +543,7 @@ proc resolve_async_bram {inst} { } # Create register next cell and return output pin - set reg_next_pin [create_register_next $raddr_src_cell "$inst/raddr_next"] + set reg_next_pin [create_register_next $inst $raddr_src_cell] if {$reg_next_pin == ""} { puts "ERROR: failed to create register next value for '$raddr_src_cell'." exit -1 @@ -444,7 +554,7 @@ proc resolve_async_bram {inst} { # Find the CE pin on raddr_src_cell if {$reg_ce_src_pin == ""} { - set reg_ce_pin [get_pins -of_objects $raddr_src_cell -filter {NAME =~ "*/CE"}] + set reg_ce_pin [get_pins "${raddr_src_cell}${hier_sep}CE"] if {[llength $reg_ce_pin] == 0} { puts "ERROR: No CE pin found on register cell '$raddr_src_cell'." exit -1 @@ -466,9 +576,10 @@ proc resolve_async_bram {inst} { # do we have a fully registered read address? if {[llength $reg_next_pins] == [llength $raddr_w_nets]} { puts "INFO: Fully registered read address detected." + + # Connect all reg_next_pins to all input pins attached to raddr_s_nets set addr_width [llength $raddr_w_nets] for {set addr_idx 0} {$addr_idx < $addr_width} {incr addr_idx} { - set raddr_w_net [lindex $raddr_w_nets $addr_idx] set raddr_s_net [lindex $raddr_s_nets $addr_idx] set reg_next_pin [lindex $reg_next_pins $addr_idx] puts "INFO: Connecting pin '$reg_next_pin' to '$raddr_s_net's pins." @@ -481,7 +592,7 @@ proc resolve_async_bram {inst} { replace_net_source $read_s_net $reg_ce_src_pin # Create Const<1>'s pin - set vcc_pin [getOrCreateVCCPin "$inst/VCC"] + set vcc_pin [getOrCreateVCCPin $inst] # Connect vcc_pin to all input pins attached to is_raddr_reg_net puts "INFO: Connecting pin '$vcc_pin' to '$is_raddr_reg_net's pins." @@ -490,18 +601,16 @@ proc resolve_async_bram {inst} { puts "WARNING: Not all read addresses are registered!" # Create Const<0>'s pin - set gnd_pin [getOrCreateGNDPin "$inst/GND"] + set gnd_pin [getOrCreateGNDPin $inst] # Connect gnd_pin to all input pins attached to is_raddr_reg_net puts "INFO: Connecting pin '$gnd_pin' to '$is_raddr_reg_net's pins." replace_net_source $is_raddr_reg_net $gnd_pin } - # Remove all placeholder cells - foreach cell [find_nested_cells $inst "placeholder$"] { - remove_cell $cell - if {$debug} {puts "DEBUG: Cell '$cell' was removed successfully."} - } + # Remove placeholder cell + set placeholder [get_cells "${inst}${hier_sep}placeholder"] + remove_cell_from_netlist $placeholder } proc resolve_async_brams {} { diff --git a/hw/scripts/xilinx_export_netlist.tcl b/hw/scripts/xilinx_export_netlist.tcl index 25a0d17e8..a6ff22ff5 100644 --- a/hw/scripts/xilinx_export_netlist.tcl +++ b/hw/scripts/xilinx_export_netlist.tcl @@ -1,3 +1,16 @@ +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Function to export netlist to a Graphviz DOT file proc export_netlist {dot_file_name} { # Open the DOT file for writing diff --git a/hw/syn/xilinx/README b/hw/syn/xilinx/README index 0fb83e71b..a1ca231fe 100644 --- a/hw/syn/xilinx/README +++ b/hw/syn/xilinx/README @@ -47,6 +47,9 @@ TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make chipscope # analyze build report vitis_analyzer build_xilinx_u50_gen3x16_xdma_5_202210_1_hw_4c/bin/vortex_afu.xclbin.link_summary +# resuming build for routing +TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 VPP_FLAGS="--from_step vpl.impl.route_design" make > build.log 2>&1 & + # running test FPGA_BIN_DIR= TARGET=hw_emu ./ci/blackbox.sh --driver=xrt --app=demo FPGA_BIN_DIR= TARGET=hw ./ci/blackbox.sh --driver=xrt --app=demo diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 643724069..288031e2e 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -180,6 +180,7 @@ ifeq ($(TARGET), hw) cp $(BUILD_DIR)/_x/logs/link/vivado.log $(BUILD_DIR)/bin cp $(BUILD_DIR)/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log $(BUILD_DIR)/bin cp $(BUILD_DIR)/_x/reports/link/syn/ulp_vortex_afu_1_0_synth_1_ulp_vortex_afu_1_0_utilization_synth.rpt $(BUILD_DIR)/bin + cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_utilization_placed.rpt $(BUILD_DIR)/bin cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin endif From b0c48e7a46dbd5169c500c4e51f6949587184c67 Mon Sep 17 00:00:00 2001 From: tinebp Date: Wed, 20 Nov 2024 18:27:52 -0800 Subject: [PATCH 419/488] stream buffer area optimization --- hw/rtl/libs/VX_stream_buffer.sv | 39 ++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/hw/rtl/libs/VX_stream_buffer.sv b/hw/rtl/libs/VX_stream_buffer.sv index 4b77df83d..2cf08c0f4 100644 --- a/hw/rtl/libs/VX_stream_buffer.sv +++ b/hw/rtl/libs/VX_stream_buffer.sv @@ -86,38 +86,47 @@ module VX_stream_buffer #( end else begin : g_no_out_reg - reg [1:0][DATAW-1:0] shift_reg; - reg [1:0] fifo_state, fifo_state_n; + reg [DATAW-1:0] data_out_r, buffer; + reg valid_in_r, valid_out_r; wire fire_in = valid_in && ready_in; wire fire_out = valid_out && ready_out; - always @(*) begin - case ({fire_in, fire_out}) - 2'b10: fifo_state_n = {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10 - 2'b01: fifo_state_n = {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00 - default: fifo_state_n = fifo_state; - endcase + always @(posedge clk) begin + if (reset) begin + valid_in_r <= 1'b1; + end else begin + if (fire_in ^ fire_out) begin + valid_in_r <= valid_out_r ^ fire_in; + end + end end always @(posedge clk) begin if (reset) begin - fifo_state <= 2'b00; + valid_out_r <= 1'b0; end else begin - fifo_state <= fifo_state_n; + if (fire_in ^ fire_out) begin + valid_out_r <= valid_in_r ^ fire_out; + end end end always @(posedge clk) begin if (fire_in) begin - shift_reg[1] <= shift_reg[0]; - shift_reg[0] <= data_in; + data_out_r <= data_in; end end - assign ready_in = ~fifo_state[1]; - assign valid_out = fifo_state[0]; - assign data_out = shift_reg[fifo_state[1]]; + always @(posedge clk) begin + if (fire_in) begin + buffer <= data_out_r; + end + end + + assign ready_in = valid_in_r; + assign valid_out = valid_out_r; + assign data_out = valid_in_r ? data_out_r : buffer; end From 8d8769c7100b9abcad3d1c1ff0eb011d2cfbb5dc Mon Sep 17 00:00:00 2001 From: tinebp Date: Wed, 20 Nov 2024 19:15:51 -0800 Subject: [PATCH 420/488] stream_buffer area optimization --- hw/rtl/libs/VX_stream_buffer.sv | 100 +++++++++++++------------------- 1 file changed, 39 insertions(+), 61 deletions(-) diff --git a/hw/rtl/libs/VX_stream_buffer.sv b/hw/rtl/libs/VX_stream_buffer.sv index 2cf08c0f4..ea4467cb3 100644 --- a/hw/rtl/libs/VX_stream_buffer.sv +++ b/hw/rtl/libs/VX_stream_buffer.sv @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// A stream elastic buffer operates at full-bandwidth where fire_in and fire_out can happen simultaneously +// A stream elastic buffer_r operates at full-bandwidth where fire_in and fire_out can happen simultaneously // It has the following benefits: // + full-bandwidth throughput // + ready_in and ready_out are decoupled @@ -45,88 +45,66 @@ module VX_stream_buffer #( assign valid_out = valid_in; assign data_out = data_in; - end else if (OUT_REG != 0) begin : g_out_reg + end else begin : g_buffer - reg [DATAW-1:0] data_out_r; - reg [DATAW-1:0] buffer; - reg valid_out_r; - reg no_buffer; + reg [DATAW-1:0] data_out_r, buffer_r; + reg valid_out_r, valid_in_r; wire fire_in = valid_in && ready_in; wire flow_out = ready_out || ~valid_out; always @(posedge clk) begin if (reset) begin - valid_out_r <= 0; - no_buffer <= 1; - end else begin - if (flow_out) begin - no_buffer <= 1; - end else if (valid_in) begin - no_buffer <= 0; - end - if (flow_out) begin - valid_out_r <= valid_in || ~no_buffer; - end - end - end - - always @(posedge clk) begin - if (fire_in) begin - buffer <= data_in; - end - if (flow_out) begin - data_out_r <= no_buffer ? data_in : buffer; - end - end - - assign ready_in = no_buffer; - assign valid_out = valid_out_r; - assign data_out = data_out_r; - - end else begin : g_no_out_reg - - reg [DATAW-1:0] data_out_r, buffer; - reg valid_in_r, valid_out_r; - - wire fire_in = valid_in && ready_in; - wire fire_out = valid_out && ready_out; - - always @(posedge clk) begin - if (reset) begin - valid_in_r <= 1'b1; - end else begin - if (fire_in ^ fire_out) begin - valid_in_r <= valid_out_r ^ fire_in; - end + valid_in_r <= 1'b1; + end else if (valid_in || flow_out) begin + valid_in_r <= flow_out; end end always @(posedge clk) begin if (reset) begin valid_out_r <= 1'b0; - end else begin - if (fire_in ^ fire_out) begin - valid_out_r <= valid_in_r ^ fire_out; + end else if (flow_out) begin + valid_out_r <= valid_in || ~valid_in_r; + end + end + + if (OUT_REG != 0) begin : g_out_reg + + always @(posedge clk) begin + if (fire_in) begin + buffer_r <= data_in; end end - end - always @(posedge clk) begin - if (fire_in) begin - data_out_r <= data_in; + always @(posedge clk) begin + if (flow_out) begin + data_out_r <= valid_in_r ? data_in : buffer_r; + end end - end - always @(posedge clk) begin - if (fire_in) begin - buffer <= data_out_r; + assign data_out = data_out_r; + + end else begin : g_no_out_reg + + always @(posedge clk) begin + if (fire_in) begin + data_out_r <= data_in; + end end + + always @(posedge clk) begin + if (fire_in) begin + buffer_r <= data_out_r; + end + end + + assign data_out = valid_in_r ? data_out_r : buffer_r; + end - assign ready_in = valid_in_r; assign valid_out = valid_out_r; - assign data_out = valid_in_r ? data_out_r : buffer; + assign ready_in = valid_in_r; end From 180735c531df8f4dafcc484814ea2600ce9cb711 Mon Sep 17 00:00:00 2001 From: tinebp Date: Thu, 21 Nov 2024 16:47:00 -0800 Subject: [PATCH 421/488] fifoqueue area optimization --- hw/rtl/libs/VX_fifo_queue.sv | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index 720a1a2c6..f3cc65b7b 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -90,9 +90,6 @@ module VX_fifo_queue #( end end - wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); - wire bypass = push && (empty || (going_empty && pop)); - VX_dp_ram #( .DATAW (DATAW), .SIZE (DEPTH), @@ -101,7 +98,7 @@ module VX_fifo_queue #( ) dp_ram ( .clk (clk), .reset (reset), - .read (~bypass), + .read (1'b1), .write (push), .wren (1'b1), .raddr (rd_ptr_r), @@ -112,11 +109,10 @@ module VX_fifo_queue #( if (OUT_REG != 0) begin : g_out_reg reg [DATAW-1:0] data_out_r; + wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); always @(posedge clk) begin - if (bypass) begin - data_out_r <= data_in; - end else if (pop) begin - data_out_r <= data_out_w; + if (pop || (push && empty)) begin + data_out_r <= (empty || going_empty) ? data_in : data_out_w; end end assign data_out = data_out_r; From 18bf49d1e0254e4236a51355edc5c11e1116d624 Mon Sep 17 00:00:00 2001 From: tinebp Date: Thu, 21 Nov 2024 16:48:18 -0800 Subject: [PATCH 422/488] minor update --- hw/scripts/xilinx_async_bram_patch.tcl | 34 ++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/hw/scripts/xilinx_async_bram_patch.tcl b/hw/scripts/xilinx_async_bram_patch.tcl index f0a49ecd6..e4a684e3b 100644 --- a/hw/scripts/xilinx_async_bram_patch.tcl +++ b/hw/scripts/xilinx_async_bram_patch.tcl @@ -597,6 +597,11 @@ proc resolve_async_bram {inst} { # Connect vcc_pin to all input pins attached to is_raddr_reg_net puts "INFO: Connecting pin '$vcc_pin' to '$is_raddr_reg_net's pins." replace_net_source $is_raddr_reg_net $vcc_pin + + # Remove all async_ram cells + foreach cell [find_nested_cells $inst "g_async_ram.*" 0] { + remove_cell_from_netlist $cell + } } else { puts "WARNING: Not all read addresses are registered!" @@ -606,11 +611,17 @@ proc resolve_async_bram {inst} { # Connect gnd_pin to all input pins attached to is_raddr_reg_net puts "INFO: Connecting pin '$gnd_pin' to '$is_raddr_reg_net's pins." replace_net_source $is_raddr_reg_net $gnd_pin + + # Remove all sync_ram cells + foreach cell [find_nested_cells $inst "g_sync_ram.*" 0] { + remove_cell_from_netlist $cell + } } # Remove placeholder cell - set placeholder [get_cells "${inst}${hier_sep}placeholder"] - remove_cell_from_netlist $placeholder + foreach cell [find_nested_cells $inst "placeholder$"] { + remove_cell_from_netlist $cell + } } proc resolve_async_brams {} { @@ -628,7 +639,26 @@ proc resolve_async_brams {} { } } +proc dump_async_bram_cells {} { + set bram_patch_cells [get_cells -hierarchical -filter {REF_NAME =~ "*VX_async_ram_patch*"}] + if {[llength $bram_patch_cells] != 0} { + foreach cell $bram_patch_cells { + puts "INFO: Found async BRAM patch cell: '$cell'." + set child_cells [find_cell_descendants $cell] + foreach child $child_cells { + set type [get_property REF_NAME $child] + puts "INFO: child cell: '$child', type: '$type'" + } + } + } else { + puts "INFO: No async BRAM patch cells found in the design." + } +} + } # Invoke the procedure to resolve async BRAM vortex::resolve_async_brams + +# dump async bram cells +#vortex::dump_async_bram_cells From 7c4ce748011e33f8f9e1ce0e2c65744d3f5dd187 Mon Sep 17 00:00:00 2001 From: tinebp Date: Thu, 21 Nov 2024 16:48:41 -0800 Subject: [PATCH 423/488] memory unit timing optimization --- hw/rtl/core/VX_mem_unit.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv index 931ad65cd..98491e73d 100644 --- a/hw/rtl/core/VX_mem_unit.sv +++ b/hw/rtl/core/VX_mem_unit.sv @@ -47,7 +47,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lmem_switches VX_lmem_switch #( - .REQ0_OUT_BUF (3), + .REQ0_OUT_BUF (1), .REQ1_OUT_BUF (0), .RSP_OUT_BUF (1), .ARBITER ("P") @@ -78,7 +78,7 @@ module VX_mem_unit import VX_gpu_pkg::*; #( .TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH), .ARBITER ("P"), .REQ_OUT_BUF (3), - .RSP_OUT_BUF (0) + .RSP_OUT_BUF (2) ) lmem_adapter ( .clk (clk), .reset (reset), From 3e4bbfc9f04d29e67bb23b4d25497744ebf85aaa Mon Sep 17 00:00:00 2001 From: tinebp Date: Fri, 22 Nov 2024 11:12:17 -0800 Subject: [PATCH 424/488] minor update --- hw/rtl/libs/VX_fifo_queue.sv | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index f3cc65b7b..c7a4aab6d 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -110,9 +110,12 @@ module VX_fifo_queue #( if (OUT_REG != 0) begin : g_out_reg reg [DATAW-1:0] data_out_r; wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); + wire bypass = push && (empty || (going_empty && pop)); always @(posedge clk) begin - if (pop || (push && empty)) begin - data_out_r <= (empty || going_empty) ? data_in : data_out_w; + if (bypass) begin + data_out_r <= data_in; + end else if (pop) begin + data_out_r <= data_out_w; end end assign data_out = data_out_r; From 1e4583ac17cb600b74a6d104395759eed1dbb601 Mon Sep 17 00:00:00 2001 From: MichaelJSr Date: Tue, 26 Nov 2024 18:41:01 -0800 Subject: [PATCH 425/488] Adds the riscv vector extension into simx --- ci/regression.sh.in | 16 +- hw/rtl/VX_config.vh | 4 + hw/rtl/VX_types.vh | 13 + perf/cache/cache_perf.log | 2 +- sim/common/rvfloats.cpp | 34 + sim/common/rvfloats.h | 5 + sim/common/softfloat_ext.cpp | 486 ++ sim/common/softfloat_ext.h | 14 + sim/opaesim/Makefile | 2 +- sim/rtlsim/Makefile | 2 +- sim/simx/Makefile | 4 +- sim/simx/arch.h | 6 + sim/simx/decode.cpp | 184 +- sim/simx/emulator.cpp | 75 + sim/simx/emulator.h | 90 +- sim/simx/execute.cpp | 141 +- sim/simx/execute_vector.cpp | 4493 +++++++++++++++++ sim/simx/instr.h | 89 +- sim/simx/types.h | 4 +- sim/xrtsim/Makefile | 2 +- tests/riscv/riscv-vector-tests/README | 39 + tests/riscv/riscv-vector-tests/run-test.sh.in | 117 + 22 files changed, 5717 insertions(+), 105 deletions(-) create mode 100644 sim/common/softfloat_ext.cpp create mode 100644 sim/common/softfloat_ext.h create mode 100644 sim/simx/execute_vector.cpp create mode 100644 tests/riscv/riscv-vector-tests/README create mode 100755 tests/riscv/riscv-vector-tests/run-test.sh.in diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 849a8769f..53819490f 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -386,10 +386,20 @@ synthesis() echo "synthesis tests done!" } +vector() +{ + echo "begin vector tests..." + + make -C sim/simx + TOOLDIR=@TOOLDIR@ XLEN=@XLEN@ VLEN=256 REG_TESTS=1 ./tests/riscv/riscv-vector-tests/run-test.sh + + echo "vector tests done!" +} + show_usage() { echo "Vortex Regression Test" - echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--all] [--h|--help]" + echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--vector] [--all] [--h|--help]" } declare -a tests=() @@ -439,6 +449,9 @@ while [ "$1" != "" ]; do --synthesis ) tests+=("synthesis") ;; + --vector ) + tests+=("vector") + ;; --all ) tests=() tests+=("unittest") @@ -454,6 +467,7 @@ while [ "$1" != "" ]; do tests+=("scope") tests+=("stress") tests+=("synthesis") + tests+=("vector") ;; -h | --help ) show_usage diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 29eb5c9d8..3badaa3d3 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -87,6 +87,10 @@ `endif `endif +`ifndef VLEN +`define VLEN 256 +`endif + `ifndef NUM_CLUSTERS `define NUM_CLUSTERS 1 `endif diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index 048ba0a5c..4c8505e5e 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -188,6 +188,19 @@ `define VX_CSR_MIMPID 12'hF13 `define VX_CSR_MHARTID 12'hF14 +// Vector CSRs + +`define VX_CSR_VSTART 12'h008 +`define VX_CSR_VXSAT 12'h009 +`define VX_CSR_VXRM 12'h00A +`define VX_CSR_VCSR 12'h00F +`define VX_CSR_VL 12'hC20 +`define VX_CSR_VTYPE 12'hC21 +`define VX_CSR_VLENB 12'hC22 +`define VX_CSR_VCYCLE 12'hC00 +`define VX_CSR_VTIME 12'hC01 +`define VX_CSR_VINSTRET 12'hC02 + // GPGU CSRs `define VX_CSR_THREAD_ID 12'hCC0 diff --git a/perf/cache/cache_perf.log b/perf/cache/cache_perf.log index 21a446d25..0a4a55cc8 100644 --- a/perf/cache/cache_perf.log +++ b/perf/cache/cache_perf.log @@ -1,3 +1,3 @@ CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 running: CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 make -C ./ci/../driver/rtlsim -verilator --build --exe --cc Vortex --top-module Vortex --language 1800-2009 --assert -Wall -Wpedantic -Wno-DECLFILENAME -Wno-REDEFMACRO --x-initial unique --x-assign unique verilator.vlt -I../../hw/rtl -I../../hw/dpi -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/cache -I../../hw/rtl/simulate -I../../hw/rtl/fp_cores -I../../third_party/fpnew/src/common_cells/include -I../../third_party/fpnew/src/common_cells/src -I../../third_party/fpnew/src/fpu_div_sqrt_mvp/hdl -I../../third_party/fpnew/src -I../../hw/rtl/tex_unit -I../../hw/rtl/raster_unit -I../../hw/rtl/rop_unit -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -j 64 -DNDEBUG -DIMUL_DPI -DIDIV_DPI -DFPU_DPI ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp ../../hw/dpi/util_dpi.cpp ../../hw/dpi/float_dpi.cpp processor.cpp -CFLAGS '-std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds -fPIC -Wno-maybe-uninitialized -I../../../hw -I../../common -I../../../third_party/softfloat/source/include -I../../../third_party -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -O2 -DNDEBUG' -LDFLAGS '-shared ../../../third_party/softfloat/build/Linux-x86_64-GCC/softfloat.a -L../../../third_party/ramulator -lramulator' -o ../../../driver/rtlsim/librtlsim.so +verilator --build --exe --cc Vortex --top-module Vortex --language 1800-2009 --assert -Wall -Wpedantic -Wno-DECLFILENAME -Wno-REDEFMACRO --x-initial unique --x-assign unique verilator.vlt -I../../hw/rtl -I../../hw/dpi -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/cache -I../../hw/rtl/simulate -I../../hw/rtl/fp_cores -I../../third_party/fpnew/src/common_cells/include -I../../third_party/fpnew/src/common_cells/src -I../../third_party/fpnew/src/fpu_div_sqrt_mvp/hdl -I../../third_party/fpnew/src -I../../hw/rtl/tex_unit -I../../hw/rtl/raster_unit -I../../hw/rtl/rop_unit -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -j 64 -DNDEBUG -DIMUL_DPI -DIDIV_DPI -DFPU_DPI ../common/util.cpp ../common/mem.cpp ../common/softfloat_ext.cpp ../common/rvfloats.cpp ../../hw/dpi/util_dpi.cpp ../../hw/dpi/float_dpi.cpp processor.cpp -CFLAGS '-std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds -fPIC -Wno-maybe-uninitialized -I../../../hw -I../../common -I../../../third_party/softfloat/source/include -I../../../third_party -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -O2 -DNDEBUG' -LDFLAGS '-shared ../../../third_party/softfloat/build/Linux-x86_64-GCC/softfloat.a -L../../../third_party/ramulator -lramulator' -o ../../../driver/rtlsim/librtlsim.so diff --git a/sim/common/rvfloats.cpp b/sim/common/rvfloats.cpp index 3e577f7f9..2b252010c 100644 --- a/sim/common/rvfloats.cpp +++ b/sim/common/rvfloats.cpp @@ -12,6 +12,7 @@ // limitations under the License. #include "rvfloats.h" +#include "softfloat_ext.h" #include extern "C" { @@ -158,6 +159,34 @@ uint64_t rv_fdiv_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) { return from_float64_t(r); } +uint32_t rv_frecip7_s(uint32_t a, uint32_t frm, uint32_t* fflags) { + softfloat_roundingMode = frm; + auto r = f32_recip7(to_float32_t(a)); + if (fflags) { *fflags = softfloat_exceptionFlags; } + return from_float32_t(r); +} + +uint64_t rv_frecip7_d(uint64_t a, uint32_t frm, uint32_t* fflags) { + softfloat_roundingMode = frm; + auto r = f64_recip7(to_float64_t(a)); + if (fflags) { *fflags = softfloat_exceptionFlags; } + return from_float64_t(r); +} + +uint32_t rv_frsqrt7_s(uint32_t a, uint32_t frm, uint32_t* fflags) { + softfloat_roundingMode = frm; + auto r = f32_rsqrte7(to_float32_t(a)); + if (fflags) { *fflags =softfloat_exceptionFlags; } + return from_float32_t(r); +} + +uint64_t rv_frsqrt7_d(uint64_t a, uint32_t frm, uint32_t* fflags) { + softfloat_roundingMode = frm; + auto r = f64_rsqrte7(to_float64_t(a)); + if (fflags) { *fflags = softfloat_exceptionFlags; } + return from_float64_t(r); +} + uint32_t rv_fsqrt_s(uint32_t a, uint32_t frm, uint32_t* fflags) { rv_init(frm); auto r = f32_sqrt(to_float32_t(a)); @@ -486,6 +515,11 @@ uint64_t rv_fsgnjx_d(uint64_t a, uint64_t b) { return r; } +uint32_t rv_dtof_r(uint64_t a, uint32_t frm) { + rv_init(frm); + return rv_dtof(a); +} + uint32_t rv_dtof(uint64_t a) { auto r = f64_to_f32(to_float64_t(a)); return from_float32_t(r); diff --git a/sim/common/rvfloats.h b/sim/common/rvfloats.h index d921846dd..86b60e8ee 100644 --- a/sim/common/rvfloats.h +++ b/sim/common/rvfloats.h @@ -28,6 +28,8 @@ uint32_t rv_fnmadd_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* uint32_t rv_fnmsub_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags); uint32_t rv_fdiv_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags); uint32_t rv_fsqrt_s(uint32_t a, uint32_t frm, uint32_t* fflags); +uint32_t rv_frecip7_s(uint32_t a, uint32_t frm, uint32_t* fflags); +uint32_t rv_frsqrt7_s(uint32_t a, uint32_t frm, uint32_t* fflags); uint32_t rv_ftoi_s(uint32_t a, uint32_t frm, uint32_t* fflags); uint32_t rv_ftou_s(uint32_t a, uint32_t frm, uint32_t* fflags); @@ -58,6 +60,8 @@ uint64_t rv_fsub_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags); uint64_t rv_fmul_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags); uint64_t rv_fdiv_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags); uint64_t rv_fsqrt_d(uint64_t a, uint32_t frm, uint32_t* fflags); +uint64_t rv_frecip7_d(uint64_t a, uint32_t frm, uint32_t* fflags); +uint64_t rv_frsqrt7_d(uint64_t a, uint32_t frm, uint32_t* fflags); uint64_t rv_fmadd_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags); uint64_t rv_fmsub_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags); @@ -85,6 +89,7 @@ uint64_t rv_fmin_d(uint64_t a, uint64_t b, uint32_t* fflags); uint64_t rv_fmax_d(uint64_t a, uint64_t b, uint32_t* fflags); uint32_t rv_dtof(uint64_t a); +uint32_t rv_dtof_r(uint64_t a, uint32_t frm); uint64_t rv_ftod(uint32_t a); #ifdef __cplusplus diff --git a/sim/common/softfloat_ext.cpp b/sim/common/softfloat_ext.cpp new file mode 100644 index 000000000..877bdc8ac --- /dev/null +++ b/sim/common/softfloat_ext.cpp @@ -0,0 +1,486 @@ +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3e, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include +#include +#include <../RISCV/specialize.h> +#include +#include "softfloat_ext.h" + +uint_fast16_t f16_classify( float16_t a ) +{ + union ui16_f16 uA; + uint_fast16_t uiA; + + uA.f = a; + uiA = uA.ui; + + uint_fast16_t infOrNaN = expF16UI( uiA ) == 0x1F; + uint_fast16_t subnormalOrZero = expF16UI( uiA ) == 0; + bool sign = signF16UI( uiA ); + bool fracZero = fracF16UI( uiA ) == 0; + bool isNaN = isNaNF16UI( uiA ); + bool isSNaN = softfloat_isSigNaNF16UI( uiA ); + + return + ( sign && infOrNaN && fracZero ) << 0 | + ( sign && !infOrNaN && !subnormalOrZero ) << 1 | + ( sign && subnormalOrZero && !fracZero ) << 2 | + ( sign && subnormalOrZero && fracZero ) << 3 | + ( !sign && infOrNaN && fracZero ) << 7 | + ( !sign && !infOrNaN && !subnormalOrZero ) << 6 | + ( !sign && subnormalOrZero && !fracZero ) << 5 | + ( !sign && subnormalOrZero && fracZero ) << 4 | + ( isNaN && isSNaN ) << 8 | + ( isNaN && !isSNaN ) << 9; +} + +uint_fast16_t f32_classify( float32_t a ) +{ + union ui32_f32 uA; + uint_fast32_t uiA; + + uA.f = a; + uiA = uA.ui; + + uint_fast16_t infOrNaN = expF32UI( uiA ) == 0xFF; + uint_fast16_t subnormalOrZero = expF32UI( uiA ) == 0; + bool sign = signF32UI( uiA ); + bool fracZero = fracF32UI( uiA ) == 0; + bool isNaN = isNaNF32UI( uiA ); + bool isSNaN = softfloat_isSigNaNF32UI( uiA ); + + return + ( sign && infOrNaN && fracZero ) << 0 | + ( sign && !infOrNaN && !subnormalOrZero ) << 1 | + ( sign && subnormalOrZero && !fracZero ) << 2 | + ( sign && subnormalOrZero && fracZero ) << 3 | + ( !sign && infOrNaN && fracZero ) << 7 | + ( !sign && !infOrNaN && !subnormalOrZero ) << 6 | + ( !sign && subnormalOrZero && !fracZero ) << 5 | + ( !sign && subnormalOrZero && fracZero ) << 4 | + ( isNaN && isSNaN ) << 8 | + ( isNaN && !isSNaN ) << 9; +} + +uint_fast16_t f64_classify( float64_t a ) +{ + union ui64_f64 uA; + uint_fast64_t uiA; + + uA.f = a; + uiA = uA.ui; + + uint_fast16_t infOrNaN = expF64UI( uiA ) == 0x7FF; + uint_fast16_t subnormalOrZero = expF64UI( uiA ) == 0; + bool sign = signF64UI( uiA ); + bool fracZero = fracF64UI( uiA ) == 0; + bool isNaN = isNaNF64UI( uiA ); + bool isSNaN = softfloat_isSigNaNF64UI( uiA ); + + return + ( sign && infOrNaN && fracZero ) << 0 | + ( sign && !infOrNaN && !subnormalOrZero ) << 1 | + ( sign && subnormalOrZero && !fracZero ) << 2 | + ( sign && subnormalOrZero && fracZero ) << 3 | + ( !sign && infOrNaN && fracZero ) << 7 | + ( !sign && !infOrNaN && !subnormalOrZero ) << 6 | + ( !sign && subnormalOrZero && !fracZero ) << 5 | + ( !sign && subnormalOrZero && fracZero ) << 4 | + ( isNaN && isSNaN ) << 8 | + ( isNaN && !isSNaN ) << 9; +} + +static inline uint64_t extract64(uint64_t val, int pos, int len) +{ + assert(pos >= 0 && len > 0 && len <= 64 - pos); + return (val >> pos) & (~UINT64_C(0) >> (64 - len)); +} + +static inline uint64_t make_mask64(int pos, int len) +{ + assert(pos >= 0 && len > 0 && pos < 64 && len <= 64); + return (UINT64_MAX >> (64 - len)) << pos; +} + +//user needs to truncate output to required length +static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) { + uint64_t exp = extract64(val, s, e); + uint64_t sig = extract64(val, 0, s); + uint64_t sign = extract64(val, s + e, 1); + const int p = 7; + + static const uint8_t table[] = { + 52, 51, 50, 48, 47, 46, 44, 43, + 42, 41, 40, 39, 38, 36, 35, 34, + 33, 32, 31, 30, 30, 29, 28, 27, + 26, 25, 24, 23, 23, 22, 21, 20, + 19, 19, 18, 17, 16, 16, 15, 14, + 14, 13, 12, 12, 11, 10, 10, 9, + 9, 8, 7, 7, 6, 6, 5, 4, + 4, 3, 3, 2, 2, 1, 1, 0, + 127, 125, 123, 121, 119, 118, 116, 114, + 113, 111, 109, 108, 106, 105, 103, 102, + 100, 99, 97, 96, 95, 93, 92, 91, + 90, 88, 87, 86, 85, 84, 83, 82, + 80, 79, 78, 77, 76, 75, 74, 73, + 72, 71, 70, 70, 69, 68, 67, 66, + 65, 64, 63, 63, 62, 61, 60, 59, + 59, 58, 57, 56, 56, 55, 54, 53}; + + if (sub) { + while (extract64(sig, s - 1, 1) == 0) + exp--, sig <<= 1; + + sig = (sig << 1) & make_mask64(0 ,s); + } + + int idx = ((exp & 1) << (p-1)) | (sig >> (s-p+1)); + uint64_t out_sig = (uint64_t)(table[idx]) << (s-p); + uint64_t out_exp = (3 * make_mask64(0, e - 1) + ~exp) / 2; + + return (sign << (s+e)) | (out_exp << s) | out_sig; +} + +float16_t f16_rsqrte7(float16_t in) +{ + union ui16_f16 uA; + + uA.f = in; + unsigned int ret = f16_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + [[fallthrough]]; + case 0x200: //qNaN + uA.ui = defaultNaNF16UI; + break; + case 0x008: // -0 + uA.ui = 0xfc00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7c00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + [[fallthrough]]; + default: // +num + uA.ui = rsqrte7(uA.ui, 5, 10, sub); + break; + } + + return uA.f; +} + +float32_t f32_rsqrte7(float32_t in) +{ + union ui32_f32 uA; + + uA.f = in; + unsigned int ret = f32_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + [[fallthrough]]; + case 0x200: //qNaN + uA.ui = defaultNaNF32UI; + break; + case 0x008: // -0 + uA.ui = 0xff800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7f800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + [[fallthrough]]; + default: // +num + uA.ui = rsqrte7(uA.ui, 8, 23, sub); + break; + } + + return uA.f; +} + +float64_t f64_rsqrte7(float64_t in) +{ + union ui64_f64 uA; + + uA.f = in; + unsigned int ret = f64_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + [[fallthrough]]; + case 0x200: //qNaN + uA.ui = defaultNaNF64UI; + break; + case 0x008: // -0 + uA.ui = 0xfff0000000000000ul; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7ff0000000000000ul; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + [[fallthrough]]; + default: // +num + uA.ui = rsqrte7(uA.ui, 11, 52, sub); + break; + } + + return uA.f; +} + +//user needs to truncate output to required length +static inline uint64_t recip7(uint64_t val, int e, int s, int rm, bool sub, + bool *round_abnormal) +{ + uint64_t exp = extract64(val, s, e); + uint64_t sig = extract64(val, 0, s); + uint64_t sign = extract64(val, s + e, 1); + const int p = 7; + + static const uint8_t table[] = { + 127, 125, 123, 121, 119, 117, 116, 114, + 112, 110, 109, 107, 105, 104, 102, 100, + 99, 97, 96, 94, 93, 91, 90, 88, + 87, 85, 84, 83, 81, 80, 79, 77, + 76, 75, 74, 72, 71, 70, 69, 68, + 66, 65, 64, 63, 62, 61, 60, 59, + 58, 57, 56, 55, 54, 53, 52, 51, + 50, 49, 48, 47, 46, 45, 44, 43, + 42, 41, 40, 40, 39, 38, 37, 36, + 35, 35, 34, 33, 32, 31, 31, 30, + 29, 28, 28, 27, 26, 25, 25, 24, + 23, 23, 22, 21, 21, 20, 19, 19, + 18, 17, 17, 16, 15, 15, 14, 14, + 13, 12, 12, 11, 11, 10, 9, 9, + 8, 8, 7, 7, 6, 5, 5, 4, + 4, 3, 3, 2, 2, 1, 1, 0}; + + if (sub) { + while (extract64(sig, s - 1, 1) == 0) + exp--, sig <<= 1; + + sig = (sig << 1) & make_mask64(0 ,s); + + if (exp != 0 && exp != UINT64_MAX) { + *round_abnormal = true; + if (rm == 1 || + (rm == 2 && !sign) || + (rm == 3 && sign)) + return ((sign << (s+e)) | make_mask64(s, e)) - 1; + else + return (sign << (s+e)) | make_mask64(s, e); + } + } + + int idx = sig >> (s-p); + uint64_t out_sig = (uint64_t)(table[idx]) << (s-p); + uint64_t out_exp = 2 * make_mask64(0, e - 1) + ~exp; + if (out_exp == 0 || out_exp == UINT64_MAX) { + out_sig = (out_sig >> 1) | make_mask64(s - 1, 1); + if (out_exp == UINT64_MAX) { + out_sig >>= 1; + out_exp = 0; + } + } + + return (sign << (s+e)) | (out_exp << s) | out_sig; +} + +float16_t f16_recip7(float16_t in) +{ + union ui16_f16 uA; + + uA.f = in; + unsigned int ret = f16_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x8000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xfc00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7c00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + [[fallthrough]]; + case 0x200: //qNaN + uA.ui = defaultNaNF16UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + [[fallthrough]]; + default: // +- normal + uA.ui = recip7(uA.ui, 5, 10, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} + +float32_t f32_recip7(float32_t in) +{ + union ui32_f32 uA; + + uA.f = in; + unsigned int ret = f32_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x80000000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xff800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7f800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + [[fallthrough]]; + case 0x200: //qNaN + uA.ui = defaultNaNF32UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + [[fallthrough]]; + default: // +- normal + uA.ui = recip7(uA.ui, 8, 23, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} + +float64_t f64_recip7(float64_t in) +{ + union ui64_f64 uA; + + uA.f = in; + unsigned int ret = f64_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x8000000000000000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xfff0000000000000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7ff0000000000000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + [[fallthrough]]; + case 0x200: //qNaN + uA.ui = defaultNaNF64UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + [[fallthrough]]; + default: // +- normal + uA.ui = recip7(uA.ui, 11, 52, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} \ No newline at end of file diff --git a/sim/common/softfloat_ext.h b/sim/common/softfloat_ext.h new file mode 100644 index 000000000..7a18af9f7 --- /dev/null +++ b/sim/common/softfloat_ext.h @@ -0,0 +1,14 @@ +#include +#include + +uint_fast16_t f16_classify( float16_t ); +float16_t f16_rsqrte7( float16_t ); +float16_t f16_recip7( float16_t ); + +uint_fast16_t f32_classify( float32_t ); +float32_t f32_rsqrte7( float32_t ); +float32_t f32_recip7( float32_t ); + +uint_fast16_t f64_classify( float64_t ); +float64_t f64_rsqrte7( float64_t ); +float64_t f64_recip7( float64_t ); \ No newline at end of file diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index b04f8ddb4..49b0f4ab8 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -51,7 +51,7 @@ endif DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) -SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp +SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/opae_sim.cpp diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index ecaee717b..3903bbd85 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -35,7 +35,7 @@ ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) endif RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) -SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp +SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp SRCS += $(SRC_DIR)/processor.cpp diff --git a/sim/simx/Makefile b/sim/simx/Makefile index 31fde7023..b97e9c00f 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -17,8 +17,8 @@ CXXFLAGS += $(CONFIGS) LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator -SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp -SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp +SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp +SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/execute_vector.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp # Debugging ifdef DEBUG diff --git a/sim/simx/arch.h b/sim/simx/arch.h index 6becf5c91..d68345db6 100644 --- a/sim/simx/arch.h +++ b/sim/simx/arch.h @@ -29,6 +29,7 @@ private: uint16_t num_cores_; uint16_t num_clusters_; uint16_t socket_size_; + uint16_t vsize_; uint16_t num_barriers_; uint64_t local_mem_base_; @@ -39,6 +40,7 @@ public: , num_cores_(num_cores) , num_clusters_(NUM_CLUSTERS) , socket_size_(SOCKET_SIZE) + , vsize_(VLEN / 8) , num_barriers_(NUM_BARRIERS) , local_mem_base_(LMEM_BASE_ADDR) {} @@ -71,6 +73,10 @@ public: return socket_size_; } + uint16_t vsize() const { + return vsize_; + } + }; } \ No newline at end of file diff --git a/sim/simx/decode.cpp b/sim/simx/decode.cpp index 7a37e79e2..3c184879d 100644 --- a/sim/simx/decode.cpp +++ b/sim/simx/decode.cpp @@ -47,6 +47,7 @@ static const std::unordered_map sc_instTable = { {Opcode::FMSUB, InstType::R4}, {Opcode::FMNMADD, InstType::R4}, {Opcode::FMNMSUB, InstType::R4}, + {Opcode::VSET, InstType::V}, {Opcode::EXT1, InstType::R}, {Opcode::EXT2, InstType::R4}, {Opcode::R_W, InstType::R}, @@ -54,33 +55,6 @@ static const std::unordered_map sc_instTable = { {Opcode::TCU, InstType::I}, }; -enum Constants { - width_opcode= 7, - width_reg = 5, - width_func2 = 2, - width_func3 = 3, - width_func7 = 7, - width_i_imm = 12, - width_j_imm = 20, - - shift_opcode= 0, - shift_rd = width_opcode, - shift_func3 = shift_rd + width_reg, - shift_rs1 = shift_func3 + width_func3, - shift_rs2 = shift_rs1 + width_reg, - shift_func2 = shift_rs2 + width_reg, - shift_func7 = shift_rs2 + width_reg, - shift_rs3 = shift_func7 + width_func2, - - mask_opcode = (1 << width_opcode) - 1, - mask_reg = (1 << width_reg) - 1, - mask_func2 = (1 << width_func2) - 1, - mask_func3 = (1 << width_func3) - 1, - mask_func7 = (1 << width_func7) - 1, - mask_i_imm = (1 << width_i_imm) - 1, - mask_j_imm = (1 << width_j_imm) - 1, -}; - static const char* op_string(const Instr &instr) { auto opcode = instr.getOpcode(); auto func2 = instr.getFunc2(); @@ -230,10 +204,14 @@ static const char* op_string(const Instr &instr) { case Opcode::FENCE: return "FENCE"; case Opcode::FL: switch (func3) { - case 0x1: return "VL"; case 0x2: return "FLW"; case 0x3: return "FLD"; + case 0x0: return "VL8"; + case 0x5: return "VL16"; + case 0x6: return "VL32"; + case 0x7: return "VL64"; default: + std::cout << "Could not decode float/vector load with func3: " << func3 << std::endl; std::abort(); } case Opcode::FS: @@ -241,7 +219,12 @@ static const char* op_string(const Instr &instr) { case 0x1: return "VS"; case 0x2: return "FSW"; case 0x3: return "FSD"; + case 0x0: return "VS8"; + case 0x5: return "VS16"; + case 0x6: return "VS32"; + case 0x7: return "VS64"; default: + std::cout << "Could not decode float/vector store with func3: " << func3 << std::endl; std::abort(); } case Opcode::AMO: { @@ -390,6 +373,7 @@ static const char* op_string(const Instr &instr) { case Opcode::FMSUB: return func2 ? "FMSUB.D" : "FMSUB.S"; case Opcode::FMNMADD: return func2 ? "FNMADD.D" : "FNMADD.S"; case Opcode::FMNMSUB: return func2 ? "FNMSUB.D" : "FNMSUB.S"; + case Opcode::VSET: return "VSET"; case Opcode::EXT1: switch (func7) { case 0: @@ -421,6 +405,39 @@ static const char* op_string(const Instr &instr) { } } +inline void vec_log(std::ostream &os, const Instr &instr) { + if (instr.getVUseMask() & set_func3) + os << ", func3:" << instr.getFunc3(); + if (instr.getVUseMask() & set_func6) + os << ", func6:" << instr.getFunc6(); + if (instr.getVUseMask() & set_imm) + os << ", imm:" << instr.getImm(); + if (instr.getVUseMask() & set_vlswidth) + os << ", width:" << instr.getVlsWidth(); + if (instr.getVUseMask() & set_vmop) + os << ", mop:" << instr.getVmop(); + if (instr.getVUseMask() & set_vumop) + os << ", umop:" << instr.getVumop(); + if (instr.getVUseMask() & set_vnf) + os << ", nf:" << instr.getVnf(); + if (instr.getVUseMask() & set_vmask) + os << ", vmask:" << instr.getVmask(); + if (instr.getVUseMask() & set_vs3) + os << ", vs3:" << instr.getVs3(); + if (instr.getVUseMask() & set_zimm) + os << ", zimm:" << ((instr.hasZimm()) ? "true" : "false"); + if (instr.getVUseMask() & set_vlmul) + os << ", lmul:" << instr.getVlmul(); + if (instr.getVUseMask() & set_vsew) + os << ", sew:" << instr.getVsew(); + if (instr.getVUseMask() & set_vta) + os << ", ta:" << instr.getVta(); + if (instr.getVUseMask() & set_vma) + os << ", ma:" << instr.getVma(); + if (instr.getVUseMask() & set_vediv) + os << ", ediv:" << instr.getVediv(); +} + namespace vortex { std::ostream &operator<<(std::ostream &os, const Instr &instr) { os << op_string(instr); @@ -441,6 +458,13 @@ std::ostream &operator<<(std::ostream &os, const Instr &instr) { if (sep++ != 0) { os << ", "; } else { os << " "; } os << "0x" << std::hex << instr.getImm() << std::dec; } + if (instr.getOpcode() == Opcode::SYS && instr.getFunc3() >= 5) { + // CSRs with immediate values + if (sep++ != 0) { os << ", "; } else { os << " "; } + os << "0x" << std::hex << instr.getRSrc(0); + } + // Log vector-specific vtype and vreg info + if (instr.isVec()) vec_log(os, instr); return os; } } @@ -452,6 +476,7 @@ std::shared_ptr Emulator::decode(uint32_t code) const { auto func2 = (code >> shift_func2) & mask_func2; auto func3 = (code >> shift_func3) & mask_func3; + auto func6 = (code >> shift_func6) & mask_func6; auto func7 = (code >> shift_func7) & mask_func7; auto rd = (code >> shift_rd) & mask_reg; @@ -466,6 +491,12 @@ std::shared_ptr Emulator::decode(uint32_t code) const { } auto iType = op_it->second; + if (op == Opcode::FL || op == Opcode::FS) { + if (func3 != 0x2 && func3 != 0x3) { + iType = InstType::V; + } + } + switch (iType) { case InstType::R: switch (op) { @@ -659,7 +690,104 @@ std::shared_ptr Emulator::decode(uint32_t code) const { auto imm = (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20); instr->setImm(sext(imm, width_j_imm+1)); } break; + + case InstType::V: + instr->setVec(true); + switch (op) { + case Opcode::VSET: { + instr->setDestReg(rd, RegType::Integer); + instr->setFunc3(func3); + switch (func3) { + case 7: { + if (code >> (shift_vset - 1) == 0b10) { // vsetvl + instr->addSrcReg(rs1, RegType::Integer); + instr->addSrcReg(rs2, RegType::Integer); + } else { + auto zimm = (code >> shift_rs2) & mask_v_zimm; + instr->setZimm(true); + instr->setVlmul(zimm & mask_v_lmul); + instr->setVsew((zimm >> shift_v_sew) & mask_v_sew); + instr->setVta((zimm >> shift_v_ta) & mask_v_ta); + instr->setVma((zimm >> shift_v_ma) & mask_v_ma); + if ((code >> shift_vset)) { // vsetivli + instr->setImm(rs1); + } else { // vsetvli + instr->addSrcReg(rs1, RegType::Integer); + } + } + } break; + case 3: { // Vector - immediate arithmetic instructions + instr->setDestReg(rd, RegType::Vector); + instr->addSrcReg(rs2, RegType::Vector); + instr->setImm(rs1); + instr->setVmask((code >> shift_func7) & 0x1); + instr->setFunc6(func6); + } break; + default: { // Vector - vector/scalar arithmetic instructions + if (func3 == 1 && func6 == 16) { + instr->setDestReg(rd, RegType::Float); + } else if (func3 == 2 && func6 == 16) { + instr->setDestReg(rd, RegType::Integer); + } else { + instr->setDestReg(rd, RegType::Vector); + } + instr->addSrcReg(rs1, RegType::Vector); + instr->addSrcReg(rs2, RegType::Vector); + instr->setVmask((code >> shift_func7) & 0x1); + instr->setFunc6(func6); + } + } + } break; + case Opcode::FL: + instr->addSrcReg(rs1, RegType::Integer); + instr->setVmop((code >> shift_vmop) & 0b11); + switch (instr->getVmop()) { + case 0b00: + instr->setVumop(rs2); + break; + case 0b10: + instr->addSrcReg(rs2, RegType::Integer); + break; + case 0b01: + case 0b11: + instr->addSrcReg(rs2, RegType::Vector); + break; + } + instr->setVsew(func3 & 0x3); + instr->setDestReg(rd, RegType::Vector); + instr->setVlsWidth(func3); + instr->setVmask((code >> shift_func7) & 0x1); + instr->setVnf((code >> shift_vnf) & mask_func3); + break; + + case Opcode::FS: + instr->addSrcReg(rs1, RegType::Integer); + instr->setVmop((code >> shift_vmop) & 0b11); + switch (instr->getVmop()) { + case 0b00: + instr->setVumop(rs2); + break; + case 0b10: + instr->addSrcReg(rs2, RegType::Integer); + break; + case 0b01: + case 0b11: + instr->addSrcReg(rs2, RegType::Vector); + break; + } + instr->setVsew(func3 & 0x3); + instr->addSrcReg(rd, RegType::Vector); + instr->setVlsWidth(func3); + instr->setVmask((code >> shift_func7) & 0x1); + instr->setVmop((code >> shift_vmop) & 0b11); + instr->setVnf((code >> shift_vnf) & mask_func3); + break; + + default: + std::abort(); + } + break; case InstType::R4: instr->setDestReg(rd, RegType::Float); instr->addSrcReg(rs1, RegType::Float); diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 05b3497c4..14cb979d4 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -33,6 +33,7 @@ using namespace vortex; Emulator::warp_t::warp_t(const Arch& arch) : ireg_file(arch.num_threads(), std::vector(MAX_NUM_REGS)) , freg_file(arch.num_threads(), std::vector(MAX_NUM_REGS)) + , vreg_file(MAX_NUM_REGS, std::vector(arch.vsize())) , uuid(0) {} @@ -64,6 +65,26 @@ void Emulator::warp_t::clear(uint64_t startup_addr) { #endif } } + + for (auto& reg_file : this->vreg_file) { + for (auto& reg : reg_file) { + #ifndef NDEBUG + reg = 0; + #else + reg = std::rand(); + #endif + } + } + + for (auto& reg_file : this->vreg_file) { + for (auto& reg : reg_file) { + #ifndef NDEBUG + reg = 0; + #else + reg = std::rand(); + #endif + } + } } /////////////////////////////////////////////////////////////////////////////// @@ -79,7 +100,12 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core) // considered to be big enough to hold input tiles for one output tile. // In future versions, scratchpad size should be fixed to an appropriate value. , scratchpad(std::vector(32 * 32 * 32768)) + , csrs_(arch.num_warps()) { + for (uint32_t i = 0; i < arch_.num_warps(); ++i) { + csrs_.at(i).resize(arch.num_threads()); + } + this->clear(); } @@ -463,6 +489,32 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { case VX_CSR_FFLAGS: return warps_.at(wid).fcsr & 0x1F; case VX_CSR_FRM: return (warps_.at(wid).fcsr >> 5); case VX_CSR_FCSR: return warps_.at(wid).fcsr; + + // Vector CRSs + case VX_CSR_VSTART: + return csrs_.at(wid).at(tid)[VX_CSR_VSTART]; + case VX_CSR_VXSAT: + return csrs_.at(wid).at(tid)[VX_CSR_VXSAT]; + case VX_CSR_VXRM: + return csrs_.at(wid).at(tid)[VX_CSR_VXRM]; + case VX_CSR_VCSR: { + Word vxsat = csrs_.at(wid).at(tid)[VX_CSR_VXSAT]; + Word vxrm = csrs_.at(wid).at(tid)[VX_CSR_VXRM]; + return (vxrm << 1) | vxsat; + } + case VX_CSR_VL: + return csrs_.at(wid).at(tid)[VX_CSR_VL]; + case VX_CSR_VTYPE: + return csrs_.at(wid).at(tid)[VX_CSR_VTYPE]; + case VX_CSR_VLENB: + return VLEN / 8; + case VX_CSR_VCYCLE: + return csrs_.at(wid).at(tid)[VX_CSR_VCYCLE]; + case VX_CSR_VTIME: + return csrs_.at(wid).at(tid)[VX_CSR_VTIME]; + case VX_CSR_VINSTRET: + return csrs_.at(wid).at(tid)[VX_CSR_VINSTRET]; + case VX_CSR_MHARTID: return (core_->id() * arch_.num_warps() + wid) * arch_.num_threads() + tid; case VX_CSR_THREAD_ID: return tid; case VX_CSR_WARP_ID: return wid; @@ -578,6 +630,29 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) { case VX_CSR_MSCRATCH: csr_mscratch_ = value; break; + + // Vector CRSs + case VX_CSR_VSTART: + csrs_.at(wid).at(tid)[VX_CSR_VSTART] = value; + break; + case VX_CSR_VXSAT: + csrs_.at(wid).at(tid)[VX_CSR_VXSAT] = value & 0b1; + break; + case VX_CSR_VXRM: + csrs_.at(wid).at(tid)[VX_CSR_VXRM] = value & 0b11; + break; + case VX_CSR_VCSR: + csrs_.at(wid).at(tid)[VX_CSR_VXSAT] = value & 0b1; + csrs_.at(wid).at(tid)[VX_CSR_VXRM] = (value >> 1) & 0b11; + break; + case VX_CSR_VL: // read only, written by vset(i)vl(i) + csrs_.at(wid).at(tid)[VX_CSR_VL] = value; + break; + case VX_CSR_VTYPE: // read only, written by vset(i)vl(i) + csrs_.at(wid).at(tid)[VX_CSR_VTYPE] = value; + break; + case VX_CSR_VLENB: // read only, set to VLEN / 8 + case VX_CSR_SATP: #ifdef VM_ENABLE // warps_.at(wid).fcsr = (warps_.at(wid).fcsr & ~0x1F) | (value & 0x1F); diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index 5f1b91d5d..ffe630c3d 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -28,6 +28,76 @@ class Core; class Instr; class instr_trace_t; +enum Constants { + width_opcode= 7, + width_reg = 5, + width_func2 = 2, + width_func3 = 3, + width_func6 = 6, + width_func7 = 7, + width_mop = 3, + width_vmask = 1, + width_i_imm = 12, + width_j_imm = 20, + width_v_zimm = 11, + width_v_ma = 1, + width_v_ta = 1, + width_v_sew = 3, + width_v_lmul = 3, + width_aq = 1, + width_rl = 1, + + shift_opcode= 0, + shift_rd = width_opcode, + shift_func3 = shift_rd + width_reg, + shift_rs1 = shift_func3 + width_func3, + shift_rs2 = shift_rs1 + width_reg, + shift_func2 = shift_rs2 + width_reg, + shift_func7 = shift_rs2 + width_reg, + shift_rs3 = shift_func7 + width_func2, + shift_vmop = shift_func7 + width_vmask, + shift_vnf = shift_vmop + width_mop, + shift_func6 = shift_func7 + width_vmask, + shift_vset = shift_func7 + width_func6, + shift_v_sew = width_v_lmul, + shift_v_ta = shift_v_sew + width_v_sew, + shift_v_ma = shift_v_ta + width_v_ta, + + mask_opcode = (1 << width_opcode) - 1, + mask_reg = (1 << width_reg) - 1, + mask_func2 = (1 << width_func2) - 1, + mask_func3 = (1 << width_func3) - 1, + mask_func6 = (1 << width_func6) - 1, + mask_func7 = (1 << width_func7) - 1, + mask_i_imm = (1 << width_i_imm) - 1, + mask_j_imm = (1 << width_j_imm) - 1, + mask_v_zimm = (1 << width_v_zimm) - 1, + mask_v_ma = (1 << width_v_ma) - 1, + mask_v_ta = (1 << width_v_ta) - 1, + mask_v_sew = (1 << width_v_sew) - 1, + mask_v_lmul = (1 << width_v_lmul) - 1, +}; + +struct vtype { + uint32_t vill; + uint32_t vma; + uint32_t vta; + uint32_t vsew; + uint32_t vlmul; +}; + +union reg_data_t { + Word u; + WordI i; + WordF f; + float f32; + double f64; + uint32_t u32; + uint64_t u64; + int32_t i32; + int64_t i64; +}; + class Emulator { public: Emulator(const Arch &arch, @@ -61,6 +131,10 @@ public: Word get_tc_size(); Word get_tc_num(); + void dcache_read(void* data, uint64_t addr, uint32_t size); + + void dcache_write(const void* data, uint64_t addr, uint32_t size); + private: struct ipdom_entry_t { @@ -85,9 +159,14 @@ private: ThreadMask tmask; std::vector> ireg_file; std::vector>freg_file; + std::vector> vreg_file; std::stack ipdom_stack; Byte fcsr; uint32_t uuid; + + struct vtype vtype; + uint32_t vl; + Word VLMAX; }; struct wspawn_t { @@ -100,12 +179,14 @@ private: void execute(const Instr &instr, uint32_t wid, instr_trace_t *trace); + void executeVector(const Instr &instr, uint32_t wid, std::vector &rsdata, std::vector &rddata); + + void loadVector(const Instr &instr, uint32_t wid, std::vector &rsdata); + + void storeVector(const Instr &instr, uint32_t wid, std::vector &rsdata); + void icache_read(void* data, uint64_t addr, uint32_t size); - void dcache_read(void* data, uint64_t addr, uint32_t size); - - void dcache_write(const void* data, uint64_t addr, uint32_t size); - void dcache_amo_reserve(uint64_t addr); bool dcache_amo_check(uint64_t addr); @@ -142,6 +223,7 @@ private: uint32_t mat_size; uint32_t tc_size; uint32_t tc_num; + std::vector>> csrs_; }; } diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index dd8253571..d477a1d45 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -25,22 +25,11 @@ #include "emulator.h" #include "instr.h" #include "core.h" +#include "processor_impl.h" #include "VX_types.h" using namespace vortex; -union reg_data_t { - Word u; - WordI i; - WordF f; - float f32; - double f64; - uint32_t u32; - uint64_t u64; - int32_t i32; - int64_t i64; -}; - inline uint64_t nan_box(uint32_t value) { return value | 0xffffffff00000000; } @@ -128,6 +117,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { } DPN(2, "}" << std::endl); break; + case RegType::Vector: + break; default: break; } @@ -678,41 +669,47 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { trace->src_regs[0] = {RegType::Integer, rsrc0}; auto trace_data = std::make_shared(num_threads); trace->data = trace_data; - uint32_t data_bytes = 1 << (func3 & 0x3); - uint32_t data_width = 8 * data_bytes; - for (uint32_t t = thread_start; t < num_threads; ++t) { - if (!warp.tmask.test(t)) - continue; - uint64_t mem_addr = rsdata[t][0].i + immsrc; - uint64_t read_data = 0; - this->dcache_read(&read_data, mem_addr, data_bytes); - trace_data->mem_addrs.at(t) = {mem_addr, data_bytes}; - switch (func3) { - case 0: // RV32I: LB - case 1: // RV32I: LH - rddata[t].i = sext((Word)read_data, data_width); - break; - case 2: - if (opcode == Opcode::L) { - // RV32I: LW + if ((opcode == Opcode::L ) + || (opcode == Opcode::FL && func3 == 2) + || (opcode == Opcode::FL && func3 == 3)) { + uint32_t data_bytes = 1 << (func3 & 0x3); + uint32_t data_width = 8 * data_bytes; + for (uint32_t t = thread_start; t < num_threads; ++t) { + if (!warp.tmask.test(t)) + continue; + uint64_t mem_addr = rsdata[t][0].i + immsrc; + uint64_t read_data = 0; + this->dcache_read(&read_data, mem_addr, data_bytes); + trace_data->mem_addrs.at(t) = {mem_addr, data_bytes}; + switch (func3) { + case 0: // RV32I: LB + case 1: // RV32I: LH rddata[t].i = sext((Word)read_data, data_width); - } else { - // RV32F: FLW - rddata[t].u64 = nan_box((uint32_t)read_data); + break; + case 2: + if (opcode == Opcode::L) { + // RV32I: LW + rddata[t].i = sext((Word)read_data, data_width); + } else { + // RV32F: FLW + rddata[t].u64 = nan_box((uint32_t)read_data); + } + break; + case 3: // RV64I: LD + // RV32D: FLD + case 4: // RV32I: LBU + case 5: // RV32I: LHU + case 6: // RV64I: LWU + rddata[t].u64 = read_data; + break; + default: + std::abort(); } - break; - case 3: // RV64I: LD - // RV32D: FLD - case 4: // RV32I: LBU - case 5: // RV32I: LHU - case 6: // RV64I: LWU - rddata[t].u64 = read_data; - break; - default: - std::abort(); } + rd_write = true; + } else { + loadVector(instr, wid, rsdata); } - rd_write = true; break; } case Opcode::S: @@ -724,23 +721,29 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { trace->src_regs[1] = {data_type, rsrc1}; auto trace_data = std::make_shared(num_threads); trace->data = trace_data; - uint32_t data_bytes = 1 << (func3 & 0x3); - for (uint32_t t = thread_start; t < num_threads; ++t) { - if (!warp.tmask.test(t)) - continue; - uint64_t mem_addr = rsdata[t][0].i + immsrc; - uint64_t write_data = rsdata[t][1].u64; - trace_data->mem_addrs.at(t) = {mem_addr, data_bytes}; - switch (func3) { - case 0: - case 1: - case 2: - case 3: - this->dcache_write(&write_data, mem_addr, data_bytes); - break; - default: - std::abort(); + if ((opcode == Opcode::S) + || (opcode == Opcode::FS && func3 == 2) + || (opcode == Opcode::FS && func3 == 3)) { + uint32_t data_bytes = 1 << (func3 & 0x3); + for (uint32_t t = thread_start; t < num_threads; ++t) { + if (!warp.tmask.test(t)) + continue; + uint64_t mem_addr = rsdata[t][0].i + immsrc; + uint64_t write_data = rsdata[t][1].u64; + trace_data->mem_addrs.at(t) = {mem_addr, data_bytes}; + switch (func3) { + case 0: + case 1: + case 2: + case 3: + this->dcache_write(&write_data, mem_addr, data_bytes); + break; + default: + std::abort(); + } } + } else { + storeVector(instr, wid, rsdata); } break; } @@ -925,7 +928,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { for (uint32_t t = thread_start; t < num_threads; ++t) { if (!warp.tmask.test(t)) continue; - uint32_t frm = this->get_fpu_rm(func3, t, wid); + uint32_t frm = (func3 == 0x7) ? this->get_csr(VX_CSR_FRM, t, wid) : func3; uint32_t fflags = 0; switch (func7) { case 0x00: { // RV32F: FADD.S @@ -1240,7 +1243,10 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { break; } } - this->update_fcrs(fflags, t, wid); + if (fflags) { + this->set_csr(VX_CSR_FCSR, this->get_csr(VX_CSR_FCSR, t, wid) | fflags, t, wid); + this->set_csr(VX_CSR_FFLAGS, this->get_csr(VX_CSR_FFLAGS, t, wid) | fflags, t, wid); + } } rd_write = true; break; @@ -1294,7 +1300,10 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { default: break; } - this->update_fcrs(fflags, t, wid); + if (fflags) { + this->set_csr(VX_CSR_FCSR, this->get_csr(VX_CSR_FCSR, t, wid) | fflags, t, wid); + this->set_csr(VX_CSR_FFLAGS, this->get_csr(VX_CSR_FFLAGS, t, wid) | fflags, t, wid); + } } rd_write = true; break; @@ -1586,6 +1595,13 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { std::abort(); } } break; + case Opcode::VSET: { + auto func6 = instr.getFunc6(); + if ((func3 == 0x7) || (func3 == 0x2 && func6 == 16) || (func3 == 0x1 && func6 == 16)) { + rd_write = true; + } + executeVector(instr, wid, rsdata, rddata); + } break; default: std::abort(); } @@ -1629,6 +1645,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { trace->dst_reg = {type, rdest}; break; default: + std::cout << "Unrecognized register write back type: " << type << std::endl; std::abort(); break; } diff --git a/sim/simx/execute_vector.cpp b/sim/simx/execute_vector.cpp new file mode 100644 index 000000000..3b2d585db --- /dev/null +++ b/sim/simx/execute_vector.cpp @@ -0,0 +1,4493 @@ +// This is a fork of https://github.com/troibe/vortex/tree/simx-v2-vector +// The purpose of this fork is to make the simx-v2-vector up to date with master +// Thanks to Troibe for his amazing work + +#include +#include +#include +#include +#include +#include "emulator.h" +#include "instr.h" +#include "processor_impl.h" + +using namespace vortex; + +template +class Add { + public: + static R apply(T first, T second, R) { + return (R)first + (R)second; + } + static std::string name() {return "Add";} +}; + +template +class Sub { + public: + static R apply(T first, T second, R) { + return (R)second - (R)first; + } + static std::string name() {return "Sub";} +}; + +template +class Adc { + public: + static R apply(T first, T second, R third) { + return (R)first + (R)second + third; + } + static std::string name() {return "Adc";} +}; + +template +class Madc { + public: + static R apply(T first, T second, R third) { + return (R)first + (R)second + third > (R)std::numeric_limits::max(); + } + static std::string name() {return "Madc";} +}; + +template +class Sbc { + public: + static R apply(T first, T second, R third) { + return (R)second - (R)first - third; + } + static std::string name() {return "Sbc";} +}; + +template +class Msbc { + public: + static R apply(T first, T second, R third) { + return (R)second < (R)first + third; + } + static std::string name() {return "Msbc";} +}; + +template +class Ssub { + public: + static R apply(T first, T second, uint32_t, uint32_t &vxsat_) { + // rounding mode is not relevant for this operation + T unclippedResult = second - first; + R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits::min(), (T)std::numeric_limits::max()); + vxsat_ |= clippedResult != unclippedResult; + return clippedResult; + } + static std::string name() {return "Ssub";} +}; + +template +class Ssubu { + public: + static R apply(T first, T second, uint32_t, uint32_t &vxsat_) { + // rounding mode is not relevant for this operation + if (first > second) { + vxsat_ = true; + return 0; + } else { + vxsat_ = false; + return second - first; + } + } + static std::string name() {return "Ssubu";} +}; + +template +class Sadd { + public: + static R apply(T first, T second, uint32_t, uint32_t &vxsat_) { + // rounding mode is not relevant for this operation + T unclippedResult = second + first; + R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits::min(), (T)std::numeric_limits::max()); + vxsat_ |= clippedResult != unclippedResult; + return clippedResult; + } + static std::string name() {return "Sadd";} +}; + +template +class Rsub { + public: + static R apply(T first, T second, R) { + return first - second; + } + static std::string name() {return "Rsub";} +}; + +template +class Div { + public: + static R apply(T first, T second, R) { + // logic taken from scalar div + if (first == 0) { + return -1; + } else if (second == std::numeric_limits::min() && first == T(-1)) { + return second; + } else { + return (R)second / (R)first; + } + } + static std::string name() {return "Div";} +}; + +template +class Rem { + public: + static R apply(T first, T second, R) { + // logic taken from scalar rem + if (first == 0) { + return second; + } else if (second == std::numeric_limits::min() && first == T(-1)) { + return 0; + } else { + return (R)second % (R)first; + } + } + static std::string name() {return "Rem";} +}; + +template +class Mul { + public: + static R apply(T first, T second, R) { + return (R)first * (R)second; + } + static std::string name() {return "Mul";} +}; + +template +class Mulsu { + public: + static R apply(T first, T second, R) { + R first_ext = zext((R)first, (sizeof(T) * 8)); + return first_ext * (R)second; + } + static std::string name() {return "Mulsu";} +}; + +template +class Mulh { + public: + static R apply(T first, T second, R) { + __int128_t first_ext = sext((__int128_t)first, (sizeof(T) * 8)); + __int128_t second_ext = sext((__int128_t)second, (sizeof(T) * 8)); + return (first_ext * second_ext) >> (sizeof(T) * 8); + } + static std::string name() {return "Mulh";} +}; + +template +class Mulhsu { + public: + static R apply(T first, T second, R) { + __int128_t first_ext = zext((__int128_t)first, (sizeof(T) * 8)); + __int128_t second_ext = sext((__int128_t)second, (sizeof(T) * 8)); + return (first_ext * second_ext) >> (sizeof(T) * 8); + } + static std::string name() {return "Mulhsu";} +}; + +template +class Mulhu { + public: + static R apply(T first, T second, R) { + return ((__uint128_t)first * (__uint128_t)second) >> (sizeof(T) * 8); + } + static std::string name() {return "Mulhu";} +}; + +template +class Madd { + public: + static R apply(T first, T second, R third) { + return ((R)first * third) + (R)second; + } + static std::string name() {return "Madd";} +}; + +template +class Nmsac { + public: + static R apply(T first, T second, R third) { + return -((R)first * (R)second) + third; + } + static std::string name() {return "Nmsac";} +}; + +template +class Macc { + public: + static R apply(T first, T second, R third) { + return ((R)first * (R)second) + third; + } + static std::string name() {return "Macc";} +}; + +template +class Maccsu { + public: + static R apply(T first, T second, R third) { + R first_ext = sext((R)first, (sizeof(T) * 8)); + R second_ext = zext((R)second, (sizeof(T) * 8)); + return (first_ext * second_ext) + third; + } + static std::string name() {return "Maccsu";} +}; + +template +class Maccus { + public: + static R apply(T first, T second, R third) { + R first_ext = zext((R)first, (sizeof(T) * 8)); + R second_ext = sext((R)second, (sizeof(T) * 8)); + return (first_ext * second_ext) + third; + } + static std::string name() {return "Maccus";} +}; + +template +class Nmsub { + public: + static R apply(T first, T second, R third) { + return -((R)first * third) + (R)second; + } + static std::string name() {return "Nmsub";} +}; + +template +class Min { + public: + static R apply(T first, T second, R) { + return std::min(first, second); + } + static std::string name() {return "Min";} +}; + +template +class Max { + public: + static R apply(T first, T second, R) { + return std::max(first, second); + } + static std::string name() {return "Max";} +}; + +template +class And { + public: + static R apply(T first, T second, R) { + return first & second; + } + static std::string name() {return "And";} +}; + +template +class Or { + public: + static R apply(T first, T second, R) { + return first | second; + } + static std::string name() {return "Or";} +}; + +template +class Xor { + public: + static R apply(T first, T second, R) { + return first ^ second; + } + static std::string name() {return "Xor";} +}; + +template +class Sll { + public: + static R apply(T first, T second, R) { + // Only the low lg2(SEW) bits of the shift-amount value are used to control the shift amount. + return second << (first & (sizeof(T) * 8 - 1)); + } + static std::string name() {return "Sll";} +}; + +template +bool bitAt(T value, R pos, R negOffset) { + R offsetPos = pos - negOffset; + return pos >= negOffset && ((value >> offsetPos) & 0x1); +} + +template +bool anyBitUpTo(T value, R to, R negOffset) { + R offsetTo = to - negOffset; + return to >= negOffset && (value & (((R)1 << (offsetTo + 1)) - 1)); +} + +template +bool roundBit(T value, R shiftDown, uint32_t vxrm) { + switch (vxrm){ + case 0: // round-to-nearest-up + return bitAt(value, shiftDown, (R)1); + case 1: // round-to-nearest-even + return bitAt(value, shiftDown, (R)1) && (anyBitUpTo(value, shiftDown, (R)2) || bitAt(value, shiftDown, (R)0)); + case 2: // round-down (truncate) + return 0; + case 3: // round-to-odd + return !bitAt(value, shiftDown, (R)0) && anyBitUpTo(value, shiftDown, (R)1); + default: + std::cout << "Roundoff - invalid value for vxrm: " << vxrm << std::endl; + std::abort(); + } +} + +template +class SrlSra { + public: + static R apply(T first, T second, R) { + // Only the low lg2(SEW) bits of the shift-amount value are used to control the shift amount. + return second >> (first & (sizeof(T) * 8 - 1)); + } + static R apply(T first, T second, uint32_t vxrm, uint32_t) { + // Saturation is not relevant for this operation + // Only the low lg2(SEW) bits of the shift-amount value are used to control the shift amount. + T firstValid = first & (sizeof(T) * 8 - 1); + return apply(firstValid, second, 0) + roundBit(second, firstValid, vxrm); + } + static std::string name() {return "SrlSra";} +}; + +template +class Aadd { + public: + static R apply(T first, T second, uint32_t vxrm, uint32_t) { + // Saturation is not relevant for this operation + T sum = second + first; + return (sum >> 1) + roundBit(sum, 1, vxrm); + } + static std::string name() {return "Aadd";} +}; + +template +class Asub { + public: + static R apply(T first, T second, uint32_t vxrm, uint32_t) { + // Saturation is not relevant for this operation + T difference = second - first; + return (difference >> 1) + roundBit(difference, 1, vxrm); + } + static std::string name() {return "Asub";} +}; + +template +class Eq { + public: + static R apply(T first, T second, R) { + return first == second; + } + static std::string name() {return "Eq";} +}; + +template +class Ne { + public: + static R apply(T first, T second, R) { + return first != second; + } + static std::string name() {return "Ne";} +}; + +template +class Lt { + public: + static R apply(T first, T second, R) { + return first > second; + } + static std::string name() {return "Lt";} +}; + +template +class Le { + public: + static R apply(T first, T second, R) { + return first >= second; + } + static std::string name() {return "Le";} +}; + +template +class Gt { + public: + static R apply(T first, T second, R) { + return first < second; + } + static std::string name() {return "Gt";} +}; + +template +class AndNot { + public: + static R apply(T first, T second, R) { + return second & ~first; + } + static std::string name() {return "AndNot";} +}; + +template +class OrNot { + public: + static R apply(T first, T second, R) { + return second | ~first; + } + static std::string name() {return "OrNot";} +}; + +template +class Nand { + public: + static R apply(T first, T second, R) { + return ~(second & first); + } + static std::string name() {return "Nand";} +}; + +template +class Mv { + public: + static R apply(T first, T, R) { + return first; + } + static std::string name() {return "Mv";} +}; + +template +class Nor { + public: + static R apply(T first, T second, R) { + return ~(second | first); + } + static std::string name() {return "Nor";} +}; + +template +class Xnor { + public: + static R apply(T first, T second, R) { + return ~(second ^ first); + } + static std::string name() {return "Xnor";} +}; + +template +class Fadd { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(R) == 4) { + return rv_fadd_s(first, second, frm, &fflags); + } else if (sizeof(R) == 8) { + uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); + uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); + return rv_fadd_d(first_d, second_d, frm, &fflags); + } else { + std::cout << "Fadd only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fadd";} +}; + +template +class Fsub { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(R) == 4) { + return rv_fsub_s(second, first, frm, &fflags); + } else if (sizeof(R) == 8) { + uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); + uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); + return rv_fsub_d(second_d, first_d, frm, &fflags); + } else { + std::cout << "Fsub only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fsub";} +}; + +template +class Fmacc { + public: + static R apply(T first, T second, R third) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(R) == 4) { + return rv_fmadd_s(first, second, third, frm, &fflags); + } else if (sizeof(R) == 8) { + uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); + uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); + return rv_fmadd_d(first_d, second_d, third, frm, &fflags); + } else { + std::cout << "Fmacc only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fmacc";} +}; + +template +class Fnmacc { + public: + static R apply(T first, T second, R third) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(R) == 4) { + return rv_fnmadd_s(first, second, third, frm, &fflags); + } else if (sizeof(R) == 8) { + uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); + uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); + return rv_fnmadd_d(first_d, second_d, third, frm, &fflags); + } else { + std::cout << "Fnmacc only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fnmacc";} +}; + +template +class Fmsac { + public: + static R apply(T first, T second, R third) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(R) == 4) { + return rv_fmadd_s(first, second, rv_fsgnjn_s(third, third), frm, &fflags); + } else if (sizeof(R) == 8) { + uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); + uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); + return rv_fmadd_d(first_d, second_d, rv_fsgnjn_d(third, third), frm, &fflags); + } else { + std::cout << "Fmsac only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fmsac";} +}; + +template +class Fnmsac { + public: + static R apply(T first, T second, R third) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(R) == 4) { + return rv_fnmadd_s(first, second, rv_fsgnjn_s(third, third), frm, &fflags); + } else if (sizeof(R) == 8) { + uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); + uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); + return rv_fnmadd_d(first_d, second_d, rv_fsgnjn_d(third, third), frm, &fflags); + } else { + std::cout << "Fnmsac only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fnmsac";} +}; + +template +class Fmadd { + public: + static R apply(T first, T second, R third) { + if (sizeof(T) == 4 || sizeof(T) == 8) { + return Fmacc::apply(first, third, second); + } else { + std::cout << "Fmadd only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fmadd";} +}; + +template +class Fnmadd { + public: + static R apply(T first, T second, R third) { + if (sizeof(T) == 4 || sizeof(T) == 8) { + return Fnmacc::apply(first, third, second); + } else { + std::cout << "Fnmadd only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fnmadd";} +}; + +template +class Fmsub { + public: + static R apply(T first, T second, R third) { + if (sizeof(T) == 4 || sizeof(T) == 8) { + return Fmsac::apply(first, third, second); + } else { + std::cout << "Fmsub only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fmsub";} +}; + +template +class Fnmsub { + public: + static R apply(T first, T second, R third) { + if (sizeof(T) == 4 || sizeof(T) == 8) { + return Fnmsac::apply(first, third, second); + } else { + std::cout << "Fnmsub only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fnmsub";} +}; + +template +class Fmin { + public: + static R apply(T first, T second, R) { + // ignoring rounding modes for now + uint32_t fflags = 0; + if (sizeof(T) == 4) { + return rv_fmin_s(first, second, &fflags); + } else if (sizeof(T) == 8) { + return rv_fmin_d(first, second, &fflags); + } else { + std::cout << "Fmin only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fmin";} +}; + +template +class Fmax { + public: + static R apply(T first, T second, R) { + // ignoring rounding modes for now + uint32_t fflags = 0; + if (sizeof(T) == 4) { + return rv_fmax_s(first, second, &fflags); + } else if (sizeof(T) == 8) { + return rv_fmax_d(first, second, &fflags); + } else { + std::cout << "Fmax only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fmax";} +}; + +template +class Fsgnj { + public: + static R apply(T first, T second, R) { + if (sizeof(T) == 4) { + return rv_fsgnj_s(second, first); + } else if (sizeof(T) == 8) { + return rv_fsgnj_d(second, first); + } else { + std::cout << "Fsgnj only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fsgnj";} +}; + +template +class Fsgnjn { + public: + static R apply(T first, T second, R) { + if (sizeof(T) == 4) { + return rv_fsgnjn_s(second, first); + } else if (sizeof(T) == 8) { + return rv_fsgnjn_d(second, first); + } else { + std::cout << "Fsgnjn only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fsgnjn";} +}; + +template +class Fsgnjx { + public: + static R apply(T first, T second, R) { + if (sizeof(T) == 4) { + return rv_fsgnjx_s(second, first); + } else if (sizeof(T) == 8) { + return rv_fsgnjx_d(second, first); + } else { + std::cout << "Fsgnjx only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fsgnjx";} +}; + +template +class Fcvt { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(T) == 4) { + switch (first) { + case 0b00000: // vfcvt.xu.f.v + return rv_ftou_s(second, frm, &fflags); + case 0b00001: // vfcvt.x.f.v + return rv_ftoi_s(second, frm, &fflags); + case 0b00010: // vfcvt.f.xu.v + return rv_utof_s(second, frm, &fflags); + case 0b00011: // vfcvt.f.x.v + return rv_itof_s(second, frm, &fflags); + case 0b00110: // vfcvt.rtz.xu.f.v + return rv_ftou_s(second, 1, &fflags); + case 0b00111: // vfcvt.rtz.x.f.v + return rv_ftoi_s(second, 1, &fflags); + case 0b01000: // vfwcvt.xu.f.v + return rv_ftolu_s(second, frm, &fflags); + case 0b01001: // vfwcvt.x.f.v + return rv_ftol_s(second, frm, &fflags); + case 0b01010: // vfwcvt.f.xu.v + return rv_utof_d(second, frm, &fflags); + case 0b01011: // vfwcvt.f.x.v + return rv_itof_d(second, frm, &fflags); + case 0b01100: // vfwcvt.f.f.v + return rv_ftod(second); + case 0b01110: // vfwcvt.rtz.xu.f.v + return rv_ftolu_s(second, 1, &fflags); + case 0b01111: // vfwcvt.rtz.x.f.v + return rv_ftol_s(second, 1, &fflags); + default: + std::cout << "Fcvt has unsupported value for first: " << first << std::endl; + std::abort(); + } + } else if (sizeof(T) == 8) { + switch (first) { + case 0b00000: // vfcvt.xu.f.v + return rv_ftolu_d(second, frm, &fflags); + case 0b00001: // vfcvt.x.f.v + return rv_ftol_d(second, frm, &fflags); + case 0b00010: // vfcvt.f.xu.v + return rv_lutof_d(second, frm, &fflags); + case 0b00011: // vfcvt.f.x.v + return rv_ltof_d(second, frm, &fflags); + case 0b00110: // vfcvt.rtz.xu.f.v + return rv_ftolu_d(second, 1, &fflags); + case 0b00111: // vfcvt.rtz.x.f.v + return rv_ftol_d(second, 1, &fflags); + case 0b01000: // vfwcvt.xu.f.v + case 0b01001: // vfwcvt.x.f.v + case 0b01010: // vfwcvt.f.xu.v + case 0b01011: // vfwcvt.f.x.v + case 0b01100: // vfwcvt.f.f.v + case 0b01110: // vfwcvt.rtz.xu.f.v + case 0b01111: // vfwcvt.rtz.x.f.v + std::cout << "Fwcvt only supports f32" << std::endl; + std::abort(); + default: + std::cout << "Fcvt has unsupported value for first: " << first << std::endl; + std::abort(); + } + } else { + std::cout << "Fcvt only supports f32 and f64" << std::endl; + std::abort(); + } + } + static R apply(T first, T second, uint32_t vxrm, uint32_t &) { // saturation argument is unused + // ignoring flags for now + uint32_t fflags = 0; + if (sizeof(T) == 8) { + switch (first) { + case 0b10000: // vfncvt.xu.f.w + return rv_ftou_d(second, vxrm, &fflags); + case 0b10001: // vfncvt.x.f.w + return rv_ftoi_d(second, vxrm, &fflags); + case 0b10010: // vfncvt.f.xu.w + return rv_lutof_s(second, vxrm, &fflags); + case 0b10011: // vfncvt.f.x.w + return rv_ltof_s(second, vxrm, &fflags); + case 0b10100: // vfncvt.f.f.w + return rv_dtof_r(second, vxrm); + case 0b10101: // vfncvt.rod.f.f.w + return rv_dtof_r(second, 6); + case 0b10110: // vfncvt.rtz.xu.f.w + return rv_ftou_d(second, 1, &fflags); + case 0b10111: // vfncvt.rtz.x.f.w + return rv_ftoi_d(second, 1, &fflags); + default: + std::cout << "Fncvt has unsupported value for first: " << first << std::endl; + std::abort(); + } + } else { + std::cout << "Fncvt only supports f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fcvt";} +}; + +template +class Funary1 { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(T) == 4) { + switch (first) { + case 0b00000: // vfsqrt.v + return rv_fsqrt_s(second, frm, &fflags); + case 0b00100: // vfrsqrt7.v + return rv_frsqrt7_s(second, frm, &fflags); + case 0b00101: // vfrec7.v + return rv_frecip7_s(second, frm, &fflags); + case 0b10000: // vfclass.v + return rv_fclss_s(second); + default: + std::cout << "Funary1 has unsupported value for first: " << first << std::endl; + std::abort(); + } + } else if (sizeof(T) == 8) { + switch (first) { + case 0b00000: // vfsqrt.v + return rv_fsqrt_d(second, frm, &fflags); + case 0b00100: // vfrsqrt7.v + return rv_frsqrt7_d(second, frm, &fflags); + case 0b00101: // vfrec7.v + return rv_frecip7_d(second, frm, &fflags); + case 0b10000: // vfclass.v + return rv_fclss_d(second); + default: + std::cout << "Funary1 has unsupported value for first: " << first << std::endl; + std::abort(); + } + } else { + std::cout << "Funary1 only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Funary1";} +}; + +template +class Xunary0 { + public: + static R apply(T, T second, T) { + return second; + } + static std::string name() {return "Xunary0";} +}; + +template +class Feq { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + if (sizeof(T) == 4) { + return rv_feq_s(second, first, &fflags); + } else if (sizeof(T) == 8) { + return rv_feq_d(second, first, &fflags); + } else { + std::cout << "Feq only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Feq";} +}; + +template +class Fle { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + if (sizeof(T) == 4) { + return rv_fle_s(second, first, &fflags); + } else if (sizeof(T) == 8) { + return rv_fle_d(second, first, &fflags); + } else { + std::cout << "Fle only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fle";} +}; + +template +class Flt { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + if (sizeof(T) == 4) { + return rv_flt_s(second, first, &fflags); + } else if (sizeof(T) == 8) { + return rv_flt_d(second, first, &fflags); + } else { + std::cout << "Flt only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Flt";} +}; + +template +class Fne { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + if (sizeof(T) == 4) { + return !rv_feq_s(second, first, &fflags); + } else if (sizeof(T) == 8) { + return !rv_feq_d(second, first, &fflags); + } else { + std::cout << "Fne only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fne";} +}; + +template +class Fgt { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + if (sizeof(T) == 4) { + return rv_flt_s(first, second, &fflags); + } else if (sizeof(T) == 8) { + return rv_flt_d(first, second, &fflags); + } else { + std::cout << "Fgt only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fgt";} +}; + +template +class Fge { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + if (sizeof(T) == 4) { + return rv_fle_s(first, second, &fflags); + } else if (sizeof(T) == 8) { + return rv_fle_d(first, second, &fflags); + } else { + std::cout << "Fge only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fge";} +}; + +template +class Fdiv { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(T) == 4) { + return rv_fdiv_s(second, first, frm, &fflags); + } else if (sizeof(T) == 8) { + return rv_fdiv_d(second, first, frm, &fflags); + } else { + std::cout << "Fdiv only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fdiv";} +}; + +template +class Frdiv { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(T) == 4) { + return rv_fdiv_s(first, second, frm, &fflags); + } else if (sizeof(T) == 8) { + return rv_fdiv_d(first, second, frm, &fflags); + } else { + std::cout << "Frdiv only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Frdiv";} +}; + +template +class Fmul { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(R) == 4) { + return rv_fmul_s(first, second, frm, &fflags); + } else if (sizeof(R) == 8) { + uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); + uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); + return rv_fmul_d(first_d, second_d, frm, &fflags); + } else { + std::cout << "Fmul only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Fmul";} +}; + +template +class Frsub { + public: + static R apply(T first, T second, R) { + // ignoring flags for now + uint32_t fflags = 0; + // ignoring rounding mode for now + uint32_t frm = 0; + if (sizeof(T) == 4) { + return rv_fsub_s(first, second, frm, &fflags); + } else if (sizeof(T) == 8) { + return rv_fsub_d(first, second, frm, &fflags); + } else { + std::cout << "Frsub only supports f32 and f64" << std::endl; + std::abort(); + } + } + static std::string name() {return "Frsub";} +}; + +template +class Clip { + public: + static R apply(T first, T second, uint32_t vxrm, uint32_t &vxsat_) { + // The low lg2(2*SEW) bits of the vector or scalar shift-amount value (e.g., the low 6 bits for a SEW=64-bit to + // SEW=32-bit narrowing operation) are used to control the right shift amount, which provides the scaling. + R firstValid = first & (sizeof(T) * 8 - 1); + T unclippedResult = (second >> firstValid) + roundBit(second, firstValid, vxrm); + R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits::min(), (T)std::numeric_limits::max()); + vxsat_ |= clippedResult != unclippedResult; + return clippedResult; + } + static std::string name() {return "Clip";} +}; + +template +class Smul { + public: + static R apply(T first, T second, uint32_t vxrm, uint32_t &vxsat_) { + R shift = sizeof(R) * 8 - 1; + T unshiftedResult = first * second; + T unclippedResult = (unshiftedResult >> shift) + roundBit(unshiftedResult, shift, vxrm); + R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits::min(), (T)std::numeric_limits::max()); + vxsat_ |= clippedResult != unclippedResult; + return clippedResult; + } + static std::string name() {return "Smul";} +}; + +bool isMasked(std::vector> &vreg_file, uint32_t maskVreg, uint32_t byteI, bool vmask) { + auto& mask = vreg_file.at(maskVreg); + uint8_t emask = *(uint8_t *)(mask.data() + byteI / 8); + uint8_t value = (emask >> (byteI % 8)) & 0x1; + DP(1, "Masking enabled: " << +!vmask << " mask element: " << +value); + return !vmask && value == 0; +} + +template +uint32_t getVreg(uint32_t baseVreg, uint32_t byteI) { + uint32_t vsew = sizeof(DT) * 8; + return (baseVreg + (byteI / (VLEN / vsew))) % 32; +} + +template +DT &getVregData(std::vector &baseVregVec, uint32_t byteI) { + uint32_t vsew = sizeof(DT) * 8; + return *(DT *)(baseVregVec.data() + (byteI % (VLEN / vsew)) * vsew / 8); +} + +template +DT &getVregData(std::vector> &vreg_file, uint32_t baseVreg, uint32_t byteI) { + auto& vr1 = vreg_file.at(getVreg
(baseVreg, byteI)); + return getVregData
(vr1, byteI); +} + +template +void vector_op_vix_load(std::vector> &vreg_file, vortex::Emulator *emul_, std::vector &rsdata, uint32_t rdest, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) { + uint32_t vsew = sizeof(DT) * 8; + uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11); + if (nfields * emul > 8) { + std::cout << "NFIELDS * EMUL = " << nfields * lmul << " but it should be <= 8" << std::endl; + std::abort(); + } + for (uint32_t i = 0; i < vl * nfields; i++) { + if (isMasked(vreg_file, 0, i / nfields, vmask)) continue; + + uint32_t nfields_strided = strided ? nfields : 1; + Word mem_addr = ((rsdata[0][0].i) & 0xFFFFFFFC) + (i / nfields_strided) * stride + (i % nfields_strided) * sizeof(DT); + Word mem_data = 0; + emul_->dcache_read(&mem_data, mem_addr, vsew / 8); + DP(1, "Loading data " << mem_data << " from: " << mem_addr << " to vec reg: " << getVreg
(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields); + DT &result = getVregData
(vreg_file, rdest + (i % nfields) * emul, i / nfields); + DP(1, "Previous data: " << +result); + result = (DT) mem_data; + } +} + +void vector_op_vix_load(std::vector> &vreg_file, vortex::Emulator *emul_, std::vector &rsdata, uint32_t rdest, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) { + switch (vsew) { + case 8: + vector_op_vix_load(vreg_file, emul_, rsdata, rdest, vl, strided, stride, nfields, lmul, vmask); + break; + case 16: + vector_op_vix_load(vreg_file, emul_, rsdata, rdest, vl, strided, stride, nfields, lmul, vmask); + break; + case 32: + vector_op_vix_load(vreg_file, emul_, rsdata, rdest, vl, strided, stride, nfields, lmul, vmask); + break; + case 64: + vector_op_vix_load(vreg_file, emul_, rsdata, rdest, vl, strided, stride, nfields, lmul, vmask); + break; + default: + std::cout << "Failed to execute VLE for vsew: " << vsew << std::endl; + std::abort(); + } +} + +template +void vector_op_vv_load(std::vector> &vreg_file, vortex::Emulator *emul_, std::vector &rsdata, uint32_t rsrc1, uint32_t rdest, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) { + uint32_t vsew = sizeof(DT) * 8; + uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11); + if (nfields * emul > 8) { + std::cout << "NFIELDS * EMUL = " << nfields * lmul << " but it should be <= 8" << std::endl; + std::abort(); + } + for (uint32_t i = 0; i < vl * nfields; i++) { + if (isMasked(vreg_file, 0, i / nfields, vmask)) continue; + + Word offset = 0; + switch (iSew) { + case 8: + offset = getVregData(vreg_file, rsrc1, i / nfields); + break; + case 16: + offset = getVregData(vreg_file, rsrc1, i / nfields); + break; + case 32: + offset = getVregData(vreg_file, rsrc1, i / nfields); + break; + case 64: + offset = getVregData(vreg_file, rsrc1, i / nfields); + break; + default: + std::cout << "Unsupported iSew: " << iSew << std::endl; + std::abort(); + } + + Word mem_addr = ((rsdata[0][0].i) & 0xFFFFFFFC) + offset + (i % nfields) * sizeof(DT); + Word mem_data = 0; + emul_->dcache_read(&mem_data, mem_addr, vsew / 8); + DP(1, "VLUX/VLOX - Loading data " << mem_data << " from: " << mem_addr << " with offset: " << std::dec << offset << " to vec reg: " << getVreg
(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields); + DT &result = getVregData
(vreg_file, rdest + (i % nfields) * emul, i / nfields); + DP(1, "Previous data: " << +result); + result = (DT) mem_data; + } +} + +void vector_op_vv_load(std::vector> &vreg_file, vortex::Emulator *emul_, std::vector &rsdata, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) { + switch (vsew) { + case 8: + vector_op_vv_load(vreg_file, emul_, rsdata, rsrc1, rdest, iSew, vl, nfields, lmul, vmask); + break; + case 16: + vector_op_vv_load(vreg_file, emul_, rsdata, rsrc1, rdest, iSew, vl, nfields, lmul, vmask); + break; + case 32: + vector_op_vv_load(vreg_file, emul_, rsdata, rsrc1, rdest, iSew, vl, nfields, lmul, vmask); + break; + case 64: + vector_op_vv_load(vreg_file, emul_, rsdata, rsrc1, rdest, iSew, vl, nfields, lmul, vmask); + break; + default: + std::cout << "Failed to execute VLUX/VLOX for vsew: " << vsew << std::endl; + std::abort(); + } +} + +void Emulator::loadVector(const Instr &instr, uint32_t wid, std::vector &rsdata) { + auto &warp = warps_.at(wid); + auto vmask = instr.getVmask(); + auto rdest = instr.getRDest(); + auto mop = instr.getVmop(); + switch (mop) { + case 0b00: { // unit-stride + auto lumop = instr.getVumop(); + switch (lumop) { + case 0b10000: // vle8ff.v, vle16ff.v, vle32ff.v, vle64ff.v - we do not support exceptions -> treat like regular unit stride + // vlseg2e8ff.v, vlseg2e16ff.v, vlseg2e32ff.v, vlseg2e64ff.v + // vlseg3e8ff.v, vlseg3e16ff.v, vlseg3e32ff.v, vlseg3e64ff.v + // vlseg4e8ff.v, vlseg4e16ff.v, vlseg4e32ff.v, vlseg4e64ff.v + // vlseg5e8ff.v, vlseg5e16ff.v, vlseg5e32ff.v, vlseg5e64ff.v + // vlseg6e8ff.v, vlseg6e16ff.v, vlseg6e32ff.v, vlseg6e64ff.v + // vlseg7e8ff.v, vlseg7e16ff.v, vlseg7e32ff.v, vlseg7e64ff.v + // vlseg8e8ff.v, vlseg8e16ff.v, vlseg8e32ff.v, vlseg8e64ff.v + case 0b0000: { // vle8.v, vle16.v, vle32.v, vle64.v + // vlseg2e8.v, vlseg2e16.v, vlseg2e32.v, vlseg2e64.v + // vlseg3e8.v, vlseg3e16.v, vlseg3e32.v, vlseg3e64.v + // vlseg4e8.v, vlseg4e16.v, vlseg4e32.v, vlseg4e64.v + // vlseg5e8.v, vlseg5e16.v, vlseg5e32.v, vlseg5e64.v + // vlseg6e8.v, vlseg6e16.v, vlseg6e32.v, vlseg6e64.v + // vlseg7e8.v, vlseg7e16.v, vlseg7e32.v, vlseg7e64.v + // vlseg8e8.v, vlseg8e16.v, vlseg8e32.v, vlseg8e64.v + WordI stride = warp.vtype.vsew / 8; + uint32_t nfields = instr.getVnf() + 1; + vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, warp.vl, false, stride, nfields, warp.vtype.vlmul, vmask); + break; + } + case 0b1000: { // vl1r.v, vl2r.v, vl4r.v, vl8r.v + uint32_t nreg = instr.getVnf() + 1; + if (nreg != 1 && nreg != 2 && nreg != 4 && nreg != 8) { + std::cout << "Whole vector register load - reserved value for nreg: " << nreg << std::endl; + std::abort(); + } + DP(1, "Whole vector register load with nreg: " << nreg); + uint32_t vl = nreg * VLEN / instr.getVsew(); + WordI stride = instr.getVsew() / 8; + vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, instr.getVsew(), vl, false, stride, 1, 0, vmask); + break; + } + case 0b1011: { // vlm.v + if (warp.vtype.vsew != 8) { + std::cout << "vlm.v only supports EEW=8, but EEW was: " << warp.vtype.vsew << std::endl; + std::abort(); + } + WordI stride = warp.vtype.vsew / 8; + vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, (warp.vl + 7) / 8, false, stride, 1, 0, true); + break; + } + default: + std::cout << "Load vector - unsupported lumop: " << lumop << std::endl; + std::abort(); + } + break; + } + case 0b10: { // strided: vlse8.v, vlse16.v, vlse32.v, vlse64.v + // vlsseg2e8.v, vlsseg2e16.v, vlsseg2e32.v, vlsseg2e64.v + // vlsseg3e8.v, vlsseg3e16.v, vlsseg3e32.v, vlsseg3e64.v + // vlsseg4e8.v, vlsseg4e16.v, vlsseg4e32.v, vlsseg4e64.v + // vlsseg5e8.v, vlsseg5e16.v, vlsseg5e32.v, vlsseg5e64.v + // vlsseg6e8.v, vlsseg6e16.v, vlsseg6e32.v, vlsseg6e64.v + // vlsseg7e8.v, vlsseg7e16.v, vlsseg7e32.v, vlsseg7e64.v + // vlsseg8e8.v, vlsseg8e16.v, vlsseg8e32.v, vlsseg8e64.v + auto rsrc1 = instr.getRSrc(1); + auto rdest = instr.getRDest(); + WordI stride = warp.ireg_file.at(0).at(rsrc1); + uint32_t nfields = instr.getVnf() + 1; + vector_op_vix_load(warp.vreg_file, this, rsdata, rdest, warp.vtype.vsew, warp.vl, true, stride, nfields, warp.vtype.vlmul, vmask); + break; + } + case 0b01: // indexed - unordered, vluxei8.v, vluxei16.v, vluxei32.v, vluxei64.v + // vluxseg2e8.v, vluxseg2e16.v, vluxseg2e32.v, vluxseg2e64.v + // vluxseg3e8.v, vluxseg3e16.v, vluxseg3e32.v, vluxseg3e64.v + // vluxseg4e8.v, vluxseg4e16.v, vluxseg4e32.v, vluxseg4e64.v + // vluxseg5e8.v, vluxseg5e16.v, vluxseg5e32.v, vluxseg5e64.v + // vluxseg6e8.v, vluxseg6e16.v, vluxseg6e32.v, vluxseg6e64.v + // vluxseg7e8.v, vluxseg7e16.v, vluxseg7e32.v, vluxseg7e64.v + // vluxseg8e8.v, vluxseg8e16.v, vluxseg8e32.v, vluxseg8e64.v + case 0b11: { // indexed - ordered, vloxei8.v, vloxei16.v, vloxei32.v, vloxei64.v + // vloxseg2e8.v, vloxseg2e16.v, vloxseg2e32.v, vloxseg2e64.v + // vloxseg3e8.v, vloxseg3e16.v, vloxseg3e32.v, vloxseg3e64.v + // vloxseg4e8.v, vloxseg4e16.v, vloxseg4e32.v, vloxseg4e64.v + // vloxseg5e8.v, vloxseg5e16.v, vloxseg5e32.v, vloxseg5e64.v + // vloxseg6e8.v, vloxseg6e16.v, vloxseg6e32.v, vloxseg6e64.v + // vloxseg7e8.v, vloxseg7e16.v, vloxseg7e32.v, vloxseg7e64.v + // vloxseg8e8.v, vloxseg8e16.v, vloxseg8e32.v, vloxseg8e64.v + uint32_t nfields = instr.getVnf() + 1; + vector_op_vv_load(warp.vreg_file, this, rsdata, instr.getRSrc(1), rdest, warp.vtype.vsew, instr.getVsew(), warp.vl, nfields, warp.vtype.vlmul, vmask); + break; + } + default: + std::cout << "Load vector - unsupported mop: " << mop << std::endl; + std::abort(); + } +} + +template +void vector_op_vix_store(std::vector> &vreg_file, vortex::Emulator *emul_, std::vector &rsdata, uint32_t rsrc3, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) { + uint32_t vsew = sizeof(DT) * 8; + uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11); + for (uint32_t i = 0; i < vl * nfields; i++) { + if (isMasked(vreg_file, 0, i / nfields, vmask)) continue; + + uint32_t nfields_strided = strided ? nfields : 1; + Word mem_addr = rsdata[0][0].i + (i / nfields_strided) * stride + (i % nfields_strided) * sizeof(DT); + Word mem_data = getVregData
(vreg_file, rsrc3 + (i % nfields) * emul, i / nfields); + DP(1, "Storing: " << std::hex << mem_data << " at: " << mem_addr << " from vec reg: " << getVreg
(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields); + emul_->dcache_write(&mem_data, mem_addr, vsew / 8); + } +} + +void vector_op_vix_store(std::vector> &vreg_file, vortex::Emulator *emul_, std::vector &rsdata, uint32_t rsrc3, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) { + switch (vsew) { + case 8: + vector_op_vix_store(vreg_file, emul_, rsdata, rsrc3, vl, strided, stride, nfields, lmul, vmask); + break; + case 16: + vector_op_vix_store(vreg_file, emul_, rsdata, rsrc3, vl, strided, stride, nfields, lmul, vmask); + break; + case 32: + vector_op_vix_store(vreg_file, emul_, rsdata, rsrc3, vl, strided, stride, nfields, lmul, vmask); + break; + case 64: + vector_op_vix_store(vreg_file, emul_, rsdata, rsrc3, vl, strided, stride, nfields, lmul, vmask); + break; + default: + std::cout << "Failed to execute VSE for vsew: " << vsew << std::endl; + std::abort(); + } +} + +template +void vector_op_vv_store(std::vector> &vreg_file, vortex::Emulator *emul_, std::vector &rsdata, uint32_t rsrc1, uint32_t rsrc3, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) { + uint32_t vsew = sizeof(DT) * 8; + uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11); + for (uint32_t i = 0; i < vl * nfields; i++) { + if (isMasked(vreg_file, 0, i / nfields, vmask)) continue; + + Word offset = 0; + switch (iSew) { + case 8: + offset = getVregData(vreg_file, rsrc1, i / nfields); + break; + case 16: + offset = getVregData(vreg_file, rsrc1, i / nfields); + break; + case 32: + offset = getVregData(vreg_file, rsrc1, i / nfields); + break; + case 64: + offset = getVregData(vreg_file, rsrc1, i / nfields); + break; + default: + std::cout << "Unsupported iSew: " << iSew << std::endl; + std::abort(); + } + + Word mem_addr = rsdata[0][0].i + offset + (i % nfields) * sizeof(DT); + Word mem_data = getVregData
(vreg_file, rsrc3 + (i % nfields) * emul, i / nfields); + DP(1, "VSUX/VSOX - Storing: " << std::hex << mem_data << " at: " << mem_addr << " with offset: " << std::dec << offset << " from vec reg: " << getVreg
(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields); + emul_->dcache_write(&mem_data, mem_addr, vsew / 8); + } +} + +void vector_op_vv_store(std::vector> &vreg_file, vortex::Emulator *emul_, std::vector &rsdata, uint32_t rsrc1, uint32_t rsrc3, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) { + switch (vsew) { + case 8: + vector_op_vv_store(vreg_file, emul_, rsdata, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask); + break; + case 16: + vector_op_vv_store(vreg_file, emul_, rsdata, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask); + break; + case 32: + vector_op_vv_store(vreg_file, emul_, rsdata, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask); + break; + case 64: + vector_op_vv_store(vreg_file, emul_, rsdata, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask); + break; + default: + std::cout << "Failed to execute VSUX/VSOX for vsew: " << vsew << std::endl; + std::abort(); + } +} + +void Emulator::storeVector(const Instr &instr, uint32_t wid, std::vector &rsdata) { + auto &warp = warps_.at(wid); + auto vmask = instr.getVmask(); + auto mop = instr.getVmop(); + switch (mop) { + case 0b00: { // unit-stride + auto vs3 = instr.getRSrc(1); + auto sumop = instr.getVumop(); + WordI stride = warp.vtype.vsew / 8; + switch (sumop) { + case 0b0000: { // vse8.v, vse16.v, vse32.v, vse64.v + uint32_t nfields = instr.getVnf() + 1; + vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, warp.vl, false, stride, nfields, warp.vtype.vlmul, vmask); + break; + } + case 0b1000: { // vs1r.v, vs2r.v, vs4r.v, vs8r.v + uint32_t nreg = instr.getVnf() + 1; + if (nreg != 1 && nreg != 2 && nreg != 4 && nreg != 8) { + std::cout << "Whole vector register store - reserved value for nreg: " << nreg << std::endl; + std::abort(); + } + DP(1, "Whole vector register store with nreg: " << nreg); + uint32_t vl = nreg * VLEN / 8; + vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, vl, false, stride, 1, 0, vmask); + break; + } + case 0b1011: { // vsm.v + if (warp.vtype.vsew != 8) { + std::cout << "vsm.v only supports EEW=8, but EEW was: " << warp.vtype.vsew << std::endl; + std::abort(); + } + vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, (warp.vl + 7) / 8, false, stride, 1, 0, true); + break; + } + default: + std::cout << "Store vector - unsupported sumop: " << sumop << std::endl; + std::abort(); + } + break; + } + case 0b10: { // strided: vsse8.v, vsse16.v, vsse32.v, vsse64.v + // vssseg2e8.v, vssseg2e16.v, vssseg2e32.v, vssseg2e64.v + // vssseg3e8.v, vssseg3e16.v, vssseg3e32.v, vssseg3e64.v + // vssseg4e8.v, vssseg4e16.v, vssseg4e32.v, vssseg4e64.v + // vssseg5e8.v, vssseg5e16.v, vssseg5e32.v, vssseg5e64.v + // vssseg6e8.v, vssseg6e16.v, vssseg6e32.v, vssseg6e64.v + // vssseg7e8.v, vssseg7e16.v, vssseg7e32.v, vssseg7e64.v + // vssseg8e8.v, vssseg8e16.v, vssseg8e32.v, vssseg8e64.v + auto rsrc1 = instr.getRSrc(1); + auto vs3 = instr.getRSrc(2); + WordI stride = warp.ireg_file.at(0).at(rsrc1); + uint32_t nfields = instr.getVnf() + 1; + vector_op_vix_store(warp.vreg_file, this, rsdata, vs3, warp.vtype.vsew, warp.vl, true, stride, nfields, warp.vtype.vlmul, vmask); + break; + } + case 0b01: // indexed - unordered, vsuxei8.v, vsuxei16.v, vsuxei32.v, vsuxei64.v + // vsuxseg2ei8.v, vsuxseg2ei16.v, vsuxseg2ei32.v, vsuxseg2ei64.v + // vsuxseg3ei8.v, vsuxseg3ei16.v, vsuxseg3ei32.v, vsuxseg3ei64.v + // vsuxseg4ei8.v, vsuxseg4ei16.v, vsuxseg4ei32.v, vsuxseg4ei64.v + // vsuxseg5ei8.v, vsuxseg5ei16.v, vsuxseg5ei32.v, vsuxseg5ei64.v + // vsuxseg6ei8.v, vsuxseg6ei16.v, vsuxseg6ei32.v, vsuxseg6ei64.v + // vsuxseg7ei8.v, vsuxseg7ei16.v, vsuxseg7ei32.v, vsuxseg7ei64.v + // vsuxseg8ei8.v, vsuxseg8ei16.v, vsuxseg8ei32.v, vsuxseg8ei64.v + case 0b11: { // indexed - ordered, vsoxei8.v, vsoxei16.v, vsoxei32.v, vsoxei64.v + // vsoxseg2ei8.v, vsoxseg2ei16.v, vsoxseg2ei32.v, vsoxseg2ei64.v + // vsoxseg3ei8.v, vsoxseg3ei16.v, vsoxseg3ei32.v, vsoxseg3ei64.v + // vsoxseg4ei8.v, vsoxseg4ei16.v, vsoxseg4ei32.v, vsoxseg4ei64.v + // vsoxseg5ei8.v, vsoxseg5ei16.v, vsoxseg5ei32.v, vsoxseg5ei64.v + // vsoxseg6ei8.v, vsoxseg6ei16.v, vsoxseg6ei32.v, vsoxseg6ei64.v + // vsoxseg7ei8.v, vsoxseg7ei16.v, vsoxseg7ei32.v, vsoxseg7ei64.v + // vsoxseg8ei8.v, vsoxseg8ei16.v, vsoxseg8ei32.v, vsoxseg8ei64.v + uint32_t nfields = instr.getVnf() + 1; + vector_op_vv_store(warp.vreg_file, this, rsdata, instr.getRSrc(1), instr.getRSrc(2), warp.vtype.vsew, instr.getVsew(), warp.vl, nfields, warp.vtype.vlmul, vmask); + break; + } + default: + std::cout << "Store vector - unsupported mop: " << mop << std::endl; + std::abort(); + } +} + +template