mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'master' into stencil3d
This commit is contained in:
commit
e42c7c6a82
24 changed files with 845 additions and 375 deletions
3
.github/workflows/ci.yml
vendored
3
.github/workflows/ci.yml
vendored
|
@ -102,7 +102,8 @@ jobs:
|
|||
cd build${{ matrix.xlen }}
|
||||
../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }}
|
||||
source ci/toolchain_env.sh
|
||||
make build -s > /dev/null
|
||||
make software -s > /dev/null
|
||||
make tests -s > /dev/null
|
||||
|
||||
- name: Upload Build Artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
|
|
10
Makefile.in
10
Makefile.in
|
@ -1,5 +1,7 @@
|
|||
include config.mk
|
||||
|
||||
.PHONY: build software tests
|
||||
|
||||
all:
|
||||
$(MAKE) -C $(VORTEX_HOME)/third_party
|
||||
$(MAKE) -C hw
|
||||
|
@ -15,6 +17,14 @@ build:
|
|||
$(MAKE) -C runtime
|
||||
$(MAKE) -C tests
|
||||
|
||||
software:
|
||||
$(MAKE) -C hw
|
||||
$(MAKE) -C kernel
|
||||
$(MAKE) -C runtime/stub
|
||||
|
||||
tests:
|
||||
$(MAKE) -C tests
|
||||
|
||||
clean:
|
||||
$(MAKE) -C hw clean
|
||||
$(MAKE) -C sim clean
|
||||
|
|
|
@ -56,7 +56,6 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
|
|||
$ git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
$ cd Vortex
|
||||
### Configure your build folder
|
||||
# By default, the toolchain default install location is the /opt folder and can be overridden by setting --tooldir.
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ ../configure --xlen=32 --tooldir=$HOME/tools
|
||||
|
|
|
@ -23,37 +23,6 @@ XLEN=${XLEN:=@XLEN@}
|
|||
|
||||
echo "Vortex Regression Test: XLEN=$XLEN"
|
||||
|
||||
split_file() {
|
||||
if [[ $# -ne 2 ]]; then
|
||||
echo "Usage: $0 <filename> <start_with>"
|
||||
return 1
|
||||
fi
|
||||
input_file="$1"
|
||||
start_with="$2"
|
||||
if [[ ! -r "$input_file" ]]; then
|
||||
echo "Error: File '$input_file' is not readable or does not exist."
|
||||
return 1
|
||||
fi
|
||||
count=0
|
||||
output_file=""
|
||||
while IFS= read -r line; do
|
||||
if [[ $line == $start_with* ]]; then
|
||||
count=$((count + 1))
|
||||
output_file="$input_file.part$count"
|
||||
> "$output_file" # ensure empty
|
||||
fi
|
||||
if [[ -n "$output_file" ]]; then
|
||||
echo "$line" >> "$output_file"
|
||||
fi
|
||||
done < "$input_file"
|
||||
|
||||
if [[ $count -eq 0 ]]; then
|
||||
echo "No lines starting with '$start_with' were found in '$input_file'."
|
||||
fi
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
|
||||
unittest()
|
||||
{
|
||||
make -C tests/unittest run
|
||||
|
@ -64,6 +33,9 @@ isa()
|
|||
{
|
||||
echo "begin isa tests..."
|
||||
|
||||
make -C sim/simx
|
||||
make -C sim/rtlsim
|
||||
|
||||
make -C tests/riscv/isa run-simx
|
||||
make -C tests/riscv/isa run-rtlsim
|
||||
|
||||
|
@ -94,8 +66,8 @@ isa()
|
|||
make -C tests/riscv/isa run-rtlsim-64fx
|
||||
fi
|
||||
|
||||
# restore default prebuilt configuration
|
||||
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null
|
||||
# clean build
|
||||
make -C sim/rtlsim clean
|
||||
|
||||
echo "isa tests done!"
|
||||
}
|
||||
|
@ -104,6 +76,9 @@ kernel()
|
|||
{
|
||||
echo "begin kernel tests..."
|
||||
|
||||
make -C sim/simx
|
||||
make -C sim/rtlsim
|
||||
|
||||
make -C tests/kernel run-simx
|
||||
make -C tests/kernel run-rtlsim
|
||||
|
||||
|
@ -114,6 +89,9 @@ regression()
|
|||
{
|
||||
echo "begin regression tests..."
|
||||
|
||||
make -C runtime/simx
|
||||
make -C runtime/rtlsim
|
||||
|
||||
make -C tests/regression run-simx
|
||||
make -C tests/regression run-rtlsim
|
||||
|
||||
|
@ -132,6 +110,9 @@ opencl()
|
|||
{
|
||||
echo "begin opencl tests..."
|
||||
|
||||
make -C runtime/simx
|
||||
make -C runtime/rtlsim
|
||||
|
||||
make -C tests/opencl run-simx
|
||||
make -C tests/opencl run-rtlsim
|
||||
|
||||
|
@ -148,29 +129,20 @@ test_csv_trace()
|
|||
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-simx-32im > run_simx.log
|
||||
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
|
||||
split_file run_simx.log "Running "
|
||||
split_file run_rtlsim.log "Running "
|
||||
for file in ./run_simx.log.part*; do
|
||||
if [[ -f "$file" ]]; then
|
||||
file2="${file//simx/rtlsim}"
|
||||
if [[ -f "$file2" ]]; then
|
||||
./ci/trace_csv.py -tsimx $file -otrace_simx.csv
|
||||
./ci/trace_csv.py -trtlsim $file2 -otrace_rtlsim.csv
|
||||
diff trace_rtlsim.csv trace_simx.csv
|
||||
else
|
||||
echo "File $file2 not found."
|
||||
fi
|
||||
fi
|
||||
done
|
||||
# restore default prebuilt configuration
|
||||
make -C sim/simx clean && make -C sim/simx > /dev/null
|
||||
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null
|
||||
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
|
||||
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
||||
diff trace_rtlsim.csv trace_simx.csv
|
||||
# clean build
|
||||
make -C sim/simx clean
|
||||
make -C sim/rtlsim clean
|
||||
}
|
||||
|
||||
debug()
|
||||
{
|
||||
echo "begin debugging tests..."
|
||||
|
||||
test_csv_trace
|
||||
|
||||
./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1"
|
||||
|
@ -250,11 +222,12 @@ config2()
|
|||
STARTUP_ADDR=0x40000000 make -C tests/regression/dogfood
|
||||
./ci/blackbox.sh --driver=simx --app=dogfood
|
||||
./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
make -C tests/regression/dogfood clean-kernel
|
||||
|
||||
# disabling M & F extensions
|
||||
make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32i
|
||||
make -C sim/rtlsim clean && make -C sim/rtlsim > /dev/null
|
||||
make -C sim/rtlsim clean
|
||||
|
||||
# disabling ZICOND extension
|
||||
CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo
|
||||
|
|
4
configure
vendored
4
configure
vendored
|
@ -111,7 +111,7 @@ copy_files() {
|
|||
|
||||
# default configuration parameters
|
||||
default_xlen=32
|
||||
default_tooldir=/opt
|
||||
default_tooldir=$HOME/tools
|
||||
default_osversion=$(detect_osversion)
|
||||
default_prefix=$CURRENT_DIR
|
||||
|
||||
|
@ -140,7 +140,7 @@ PREFIX=${PREFIX:=$default_prefix}
|
|||
usage() {
|
||||
echo "Usage: $0 [--xlen=<value>] [--tooldir=<path>] [--osversion=<version>]"
|
||||
echo " --xlen=<value> Set the XLEN value (default: 32)"
|
||||
echo " --tooldir=<path> Set the TOOLDIR path (default: /opt)"
|
||||
echo " --tooldir=<path> Set the TOOLDIR path (default: $HOME/tools)"
|
||||
echo " --osversion=<version> Set the OS Version (default: $(detect_osversion))"
|
||||
echo " --prefix=<path> Set installation directory"
|
||||
exit 1
|
||||
|
|
|
@ -374,7 +374,7 @@ module VX_mem_coalescer #(
|
|||
`TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS);
|
||||
`TRACE(1, (", pmask=%b, tag=0x%0h (#%0d)\n", out_req_pmask, out_req_tag, out_req_uuid));
|
||||
if ($countones(out_req_pmask) > 1) begin
|
||||
`TRACE(1, ("%t: *** %s: coalescing=%b (#%0d)\n", $time, INSTANCE_ID, out_req_pmask, out_req_uuid));
|
||||
`TRACE(1, ("%t: *** %s: coalesced=%d (#%0d)\n", $time, INSTANCE_ID, $countones(out_req_pmask), out_req_uuid));
|
||||
end
|
||||
end
|
||||
if (out_rsp_fire) begin
|
||||
|
|
|
@ -38,11 +38,9 @@ module VX_stream_pack #(
|
|||
output wire [TAG_WIDTH-1:0] tag_out,
|
||||
input wire ready_out
|
||||
);
|
||||
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS);
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
wire [LOG_NUM_REQS-1:0] grant_index;
|
||||
wire [NUM_REQS-1:0] grant_onehot;
|
||||
wire grant_valid;
|
||||
wire grant_ready;
|
||||
|
||||
|
@ -54,29 +52,33 @@ module VX_stream_pack #(
|
|||
.reset (reset),
|
||||
.requests (valid_in),
|
||||
.grant_valid (grant_valid),
|
||||
.grant_index (grant_index),
|
||||
`UNUSED_PIN (grant_onehot),
|
||||
`UNUSED_PIN (grant_index),
|
||||
.grant_onehot(grant_onehot),
|
||||
.grant_ready (grant_ready)
|
||||
);
|
||||
|
||||
reg [NUM_REQS-1:0] valid_sel;
|
||||
reg [NUM_REQS-1:0] ready_sel;
|
||||
wire ready_unqual;
|
||||
wire [TAG_WIDTH-1:0] tag_sel;
|
||||
|
||||
wire [TAG_WIDTH-1:0] tag_sel = tag_in[grant_index];
|
||||
VX_onehot_mux #(
|
||||
.DATAW (TAG_WIDTH),
|
||||
.N (NUM_REQS)
|
||||
) onehot_mux (
|
||||
.data_in (tag_in),
|
||||
.sel_in (grant_onehot),
|
||||
.data_out (tag_sel)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
valid_sel = '0;
|
||||
ready_sel = '0;
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
if (tag_in[i][TAG_SEL_BITS-1:0] == tag_sel[TAG_SEL_BITS-1:0]) begin
|
||||
valid_sel[i] = valid_in[i];
|
||||
ready_sel[i] = ready_unqual;
|
||||
end
|
||||
end
|
||||
wire [NUM_REQS-1:0] tag_matches;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign tag_matches[i] = (tag_in[i][TAG_SEL_BITS-1:0] == tag_sel[TAG_SEL_BITS-1:0]);
|
||||
end
|
||||
|
||||
assign grant_ready = ready_unqual;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign ready_in[i] = grant_ready & tag_matches[i];
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0] mask_sel = valid_in & tag_matches;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (NUM_REQS + TAG_WIDTH + (NUM_REQS * DATA_WIDTH)),
|
||||
|
@ -86,15 +88,13 @@ module VX_stream_pack #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (grant_valid),
|
||||
.data_in ({valid_sel, tag_sel, data_in}),
|
||||
.ready_in (ready_unqual),
|
||||
.data_in ({mask_sel, tag_sel, data_in}),
|
||||
.ready_in (grant_ready),
|
||||
.valid_out (valid_out),
|
||||
.data_out ({mask_out, tag_out, data_out}),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
assign ready_in = ready_sel;
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
|
|
|
@ -1,28 +0,0 @@
|
|||
Frontend:
|
||||
impl: GEM5
|
||||
|
||||
MemorySystem:
|
||||
impl: GenericDRAM
|
||||
clock_ratio: 1
|
||||
|
||||
DRAM:
|
||||
impl: HBM2
|
||||
org:
|
||||
preset: HBM2_8Gb
|
||||
density: 8192
|
||||
timing:
|
||||
preset: HBM2_2Gbps
|
||||
|
||||
Controller:
|
||||
impl: Generic
|
||||
Scheduler:
|
||||
impl: FRFCFS
|
||||
RefreshManager:
|
||||
impl: AllBank
|
||||
RowPolicy:
|
||||
impl: OpenRowPolicy
|
||||
cap: 1
|
||||
plugins:
|
||||
|
||||
AddrMapper:
|
||||
impl: RoBaRaCoCh
|
314
sim/common/bitvector.h
Normal file
314
sim/common/bitvector.h
Normal file
|
@ -0,0 +1,314 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <vector>
|
||||
#include <stdexcept>
|
||||
#include <algorithm>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
template <typename T = uint32_t>
|
||||
class BitVector {
|
||||
private:
|
||||
static constexpr size_t BITS_PER_WORD = sizeof(T) * 8;
|
||||
std::vector<T> bits_;
|
||||
size_t size_;
|
||||
bool all_zero_;
|
||||
|
||||
size_t wordIndex(size_t pos) const {
|
||||
return pos / BITS_PER_WORD;
|
||||
}
|
||||
|
||||
T bitMask(size_t pos) const {
|
||||
return T(1) << (pos % BITS_PER_WORD);
|
||||
}
|
||||
|
||||
void updateAllZero() {
|
||||
all_zero_ = std::all_of(bits_.begin(), bits_.end(), [](T word) { return word == 0; });
|
||||
}
|
||||
|
||||
public:
|
||||
explicit BitVector(size_t size = 0)
|
||||
: bits_((size + (BITS_PER_WORD - 1)) / BITS_PER_WORD)
|
||||
, size_(size)
|
||||
, all_zero_(true)
|
||||
{}
|
||||
|
||||
void set(size_t pos) {
|
||||
if (pos >= size_) throw std::out_of_range("Index out of range");
|
||||
bits_[this->wordIndex(pos)] |= this->bitMask(pos);
|
||||
all_zero_ = false;
|
||||
}
|
||||
|
||||
void set(size_t pos, bool value) {
|
||||
if (value) {
|
||||
this->set(pos);
|
||||
} else {
|
||||
this->reset(pos);
|
||||
}
|
||||
}
|
||||
|
||||
void reset() {
|
||||
std::fill(bits_.begin(), bits_.end(), 0);
|
||||
all_zero_ = true;
|
||||
}
|
||||
|
||||
void reset(size_t pos) {
|
||||
if (pos >= size_) throw std::out_of_range("Index out of range");
|
||||
bits_[this->wordIndex(pos)] &= ~this->bitMask(pos);
|
||||
this->updateAllZero();
|
||||
}
|
||||
|
||||
bool test(size_t pos) const {
|
||||
if (pos >= size_) throw std::out_of_range("Index out of range");
|
||||
return bits_[this->wordIndex(pos)] & this->bitMask(pos);
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return size_;
|
||||
}
|
||||
|
||||
void resize(size_t new_size) {
|
||||
size_ = new_size;
|
||||
bits_.resize((new_size + (BITS_PER_WORD - 1)) / BITS_PER_WORD, 0);
|
||||
this->updateAllZero();
|
||||
}
|
||||
|
||||
bool operator==(const BitVector& other) const {
|
||||
return (size_ == other.size_) && (bits_ == other.bits_);
|
||||
}
|
||||
|
||||
bool operator!=(const BitVector& other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
bool operator[](size_t pos) const {
|
||||
return test(pos);
|
||||
}
|
||||
|
||||
BitVector& operator&=(const BitVector& other) {
|
||||
if (size_ != other.size_) throw std::invalid_argument("Bit sizes must match");
|
||||
for (size_t i = 0; i < bits_.size(); ++i) {
|
||||
bits_[i] &= other.bits_[i];
|
||||
}
|
||||
this->updateAllZero();
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector& operator|=(const BitVector& other) {
|
||||
if (size_ != other.size_) throw std::invalid_argument("Bit sizes must match");
|
||||
for (size_t i = 0; i < bits_.size(); ++i) {
|
||||
bits_[i] |= other.bits_[i];
|
||||
}
|
||||
this->updateAllZero();
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector& operator^=(const BitVector& other) {
|
||||
if (size_ != other.size_) throw std::invalid_argument("Bit sizes must match");
|
||||
for (size_t i = 0; i < bits_.size(); ++i) {
|
||||
bits_[i] ^= other.bits_[i];
|
||||
}
|
||||
this->updateAllZero();
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector operator~() const {
|
||||
BitVector result(size_);
|
||||
for (size_t i = 0; i < bits_.size(); ++i) {
|
||||
result.bits_[i] = ~bits_[i];
|
||||
}
|
||||
result.updateAllZero();
|
||||
return result;
|
||||
}
|
||||
|
||||
void flip() {
|
||||
for (auto &word : bits_) {
|
||||
word = ~word;
|
||||
}
|
||||
this->updateAllZero();
|
||||
}
|
||||
|
||||
size_t count() const {
|
||||
size_t count = 0;
|
||||
for (const auto &word : bits_) {
|
||||
count += std::bitset<BITS_PER_WORD>(word).count();
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
bool none() const {
|
||||
return all_zero_;
|
||||
}
|
||||
|
||||
bool any() const {
|
||||
return !all_zero_;
|
||||
}
|
||||
|
||||
bool all() const {
|
||||
size_t full_bits = size_ / BITS_PER_WORD;
|
||||
size_t remaining_bits = size_ % BITS_PER_WORD;
|
||||
T full_mask = ~T(0);
|
||||
for (size_t i = 0; i < full_bits; ++i) {
|
||||
if (bits_[i] != full_mask)
|
||||
return false;
|
||||
}
|
||||
if (remaining_bits > 0) {
|
||||
T partial_mask = (T(1) << remaining_bits) - 1;
|
||||
if ((bits_[full_bits] & partial_mask) != partial_mask)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
BitVector& operator<<=(size_t pos) {
|
||||
if (pos >= size_) {
|
||||
reset();
|
||||
return *this;
|
||||
}
|
||||
|
||||
size_t word_shift = pos / BITS_PER_WORD;
|
||||
size_t bit_shift = pos % BITS_PER_WORD;
|
||||
|
||||
if (word_shift > 0) {
|
||||
for (size_t i = bits_.size() - 1; i >= word_shift; --i) {
|
||||
bits_[i] = bits_[i - word_shift];
|
||||
}
|
||||
std::fill(bits_.begin(), bits_.begin() + word_shift, 0);
|
||||
}
|
||||
|
||||
if (bit_shift > 0) {
|
||||
for (size_t i = bits_.size() - 1; i > 0; --i) {
|
||||
bits_[i] = (bits_[i] << bit_shift) | (bits_[i - 1] >> (BITS_PER_WORD - bit_shift));
|
||||
}
|
||||
bits_[0] <<= bit_shift;
|
||||
}
|
||||
|
||||
this->updateAllZero();
|
||||
return *this;
|
||||
}
|
||||
|
||||
BitVector& operator>>=(size_t pos) {
|
||||
if (pos >= size_) {
|
||||
reset();
|
||||
return *this;
|
||||
}
|
||||
|
||||
size_t word_shift = pos / BITS_PER_WORD;
|
||||
size_t bit_shift = pos % BITS_PER_WORD;
|
||||
|
||||
if (word_shift > 0) {
|
||||
for (size_t i = 0; i < bits_.size() - word_shift; ++i) {
|
||||
bits_[i] = bits_[i + word_shift];
|
||||
}
|
||||
std::fill(bits_.end() - word_shift, bits_.end(), 0);
|
||||
}
|
||||
|
||||
if (bit_shift > 0) {
|
||||
for (size_t i = 0; i < bits_.size() - 1; ++i) {
|
||||
bits_[i] = (bits_[i] >> bit_shift) | (bits_[i + 1] << (BITS_PER_WORD - bit_shift));
|
||||
}
|
||||
bits_.back() >>= bit_shift;
|
||||
}
|
||||
|
||||
this->updateAllZero();
|
||||
return *this;
|
||||
}
|
||||
|
||||
std::string to_string() const {
|
||||
std::string result;
|
||||
for (size_t i = 0; i < size_; ++i) {
|
||||
result.push_back(test(i) ? '1' : '0');
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
unsigned long to_ulong() const {
|
||||
if (size_ > sizeof(unsigned long) * 8) {
|
||||
throw std::overflow_error("BitVector size exceeds unsigned long capacity");
|
||||
}
|
||||
|
||||
unsigned long result = 0;
|
||||
for (size_t i = 0; i < size_; ++i) {
|
||||
if (test(i)) {
|
||||
result |= (1UL << i);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
unsigned long long to_ullong() const {
|
||||
if (size_ > sizeof(unsigned long long) * 8) {
|
||||
throw std::overflow_error("BitVector size exceeds unsigned long long capacity");
|
||||
}
|
||||
|
||||
unsigned long long result = 0;
|
||||
for (size_t i = 0; i < size_; ++i) {
|
||||
if (test(i)) {
|
||||
result |= (1ULL << i);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const BitVector& bv) {
|
||||
for (size_t i = 0; i < bv.size_; ++i) {
|
||||
os << bv.test(i);
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
friend BitVector operator&(const BitVector& lhs, const BitVector& rhs) {
|
||||
BitVector result(lhs);
|
||||
result &= rhs;
|
||||
return result;
|
||||
}
|
||||
|
||||
friend BitVector operator|(const BitVector& lhs, const BitVector& rhs) {
|
||||
BitVector result(lhs);
|
||||
result |= rhs;
|
||||
return result;
|
||||
}
|
||||
|
||||
friend BitVector operator^(const BitVector& lhs, const BitVector& rhs) {
|
||||
BitVector result(lhs);
|
||||
result ^= rhs;
|
||||
return result;
|
||||
}
|
||||
|
||||
friend BitVector operator<<(const BitVector& lhs, size_t pos) {
|
||||
BitVector result(lhs);
|
||||
result <<= pos;
|
||||
return result;
|
||||
}
|
||||
|
||||
friend BitVector operator>>(const BitVector& lhs, size_t pos) {
|
||||
BitVector result(lhs);
|
||||
result >>= pos;
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
// std::hash specialization for BitVector
|
||||
namespace std {
|
||||
|
||||
template <typename T>
|
||||
struct hash<vortex::BitVector<T>> {
|
||||
size_t operator()(const vortex::BitVector<T>& bv) const {
|
||||
return hash<std::string>()(bv.to_string());
|
||||
}
|
||||
};
|
||||
|
||||
}
|
|
@ -89,7 +89,9 @@ int main(int argc, char **argv) {
|
|||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
std::cout << "[VXDRV] START: program=" << program << std::endl;
|
||||
#endif
|
||||
// run simulation
|
||||
processor.run();
|
||||
|
||||
|
|
|
@ -539,7 +539,7 @@ private:
|
|||
continue;
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).push(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-core-" << core_rsp);
|
||||
DT(3, simobject_->name() << "-replay-" << core_rsp);
|
||||
}
|
||||
}
|
||||
} break;
|
||||
|
@ -583,7 +583,7 @@ private:
|
|||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
DT(3, simobject_->name() << "-writethrough-" << mem_req);
|
||||
} else {
|
||||
// mark line as dirty
|
||||
hit_line.dirty = true;
|
||||
|
@ -615,7 +615,7 @@ private:
|
|||
mem_req.write = true;
|
||||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req_ports_.at(bank_id).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
DT(3, simobject_->name() << "-writeback-" << mem_req);
|
||||
++perf_stats_.evictions;
|
||||
}
|
||||
}
|
||||
|
@ -629,7 +629,7 @@ private:
|
|||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).push(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
DT(3, simobject_->name() << "-writethrough-" << mem_req);
|
||||
}
|
||||
// send core response
|
||||
if (config_.write_reponse) {
|
||||
|
|
|
@ -44,8 +44,10 @@ Core::Core(const SimContext& ctx,
|
|||
, operands_(ISSUE_WIDTH)
|
||||
, dispatchers_((uint32_t)FUType::Count)
|
||||
, func_units_((uint32_t)FUType::Count)
|
||||
, lsu_demux_(LSU_NUM_REQS)
|
||||
, lsu_demux_(NUM_LSU_BLOCKS)
|
||||
, mem_coalescers_(NUM_LSU_BLOCKS)
|
||||
, lsu_dcache_adapter_(NUM_LSU_BLOCKS)
|
||||
, lsu_lmem_adapter_(NUM_LSU_BLOCKS)
|
||||
, pending_icache_(arch_.num_warps())
|
||||
, commit_arbs_(ISSUE_WIDTH)
|
||||
{
|
||||
|
@ -72,31 +74,53 @@ Core::Core(const SimContext& ctx,
|
|||
});
|
||||
|
||||
// create lsu demux
|
||||
for (uint32_t i = 0; i < LSU_NUM_REQS; ++i) {
|
||||
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
|
||||
snprintf(sname, 100, "core%d-lsu_demux%d", core_id, i);
|
||||
lsu_demux_.at(i) = LocalMemDemux::Create(sname, 1);
|
||||
}
|
||||
|
||||
// connect dcache-coalescer
|
||||
for (uint32_t b = 0; b < NUM_LSU_BLOCKS; ++b) {
|
||||
for (uint32_t c = 0; c < DCACHE_CHANNELS; ++c) {
|
||||
uint32_t i = b * DCACHE_CHANNELS + c;
|
||||
mem_coalescers_.at(b)->ReqOut.at(c).bind(&dcache_req_ports.at(i));
|
||||
dcache_rsp_ports.at(i).bind(&mem_coalescers_.at(b)->RspOut.at(c));
|
||||
}
|
||||
// create lsu dcache adapter
|
||||
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
|
||||
snprintf(sname, 100, "core%d-lsu_dcache_adapter%d", core_id, i);
|
||||
lsu_dcache_adapter_.at(i) = LsuMemAdapter::Create(sname, DCACHE_CHANNELS, 1);
|
||||
}
|
||||
|
||||
// create lsu lmem adapter
|
||||
for (uint32_t i = 0; i < NUM_LSU_BLOCKS; ++i) {
|
||||
snprintf(sname, 100, "core%d-lsu_lmem_adapter%d", core_id, i);
|
||||
lsu_lmem_adapter_.at(i) = LsuMemAdapter::Create(sname, LSU_CHANNELS, 1);
|
||||
}
|
||||
|
||||
// connect lsu demux
|
||||
for (uint32_t b = 0; b < NUM_LSU_BLOCKS; ++b) {
|
||||
lsu_demux_.at(b)->ReqDC.bind(&mem_coalescers_.at(b)->ReqIn);
|
||||
mem_coalescers_.at(b)->RspIn.bind(&lsu_demux_.at(b)->RspDC);
|
||||
|
||||
lsu_demux_.at(b)->ReqLmem.bind(&lsu_lmem_adapter_.at(b)->ReqIn);
|
||||
lsu_lmem_adapter_.at(b)->RspIn.bind(&lsu_demux_.at(b)->RspLmem);
|
||||
}
|
||||
|
||||
// connect coalescer-adapter
|
||||
for (uint32_t b = 0; b < NUM_LSU_BLOCKS; ++b) {
|
||||
mem_coalescers_.at(b)->ReqOut.bind(&lsu_dcache_adapter_.at(b)->ReqIn);
|
||||
lsu_dcache_adapter_.at(b)->RspIn.bind(&mem_coalescers_.at(b)->RspOut);
|
||||
}
|
||||
|
||||
// connect adapter-dcache
|
||||
for (uint32_t b = 0; b < NUM_LSU_BLOCKS; ++b) {
|
||||
for (uint32_t c = 0; c < DCACHE_CHANNELS; ++c) {
|
||||
uint32_t i = b * DCACHE_CHANNELS + c;
|
||||
lsu_dcache_adapter_.at(b)->ReqOut.at(c).bind(&dcache_req_ports.at(i));
|
||||
dcache_rsp_ports.at(i).bind(&lsu_dcache_adapter_.at(b)->RspOut.at(c));
|
||||
}
|
||||
}
|
||||
|
||||
// connect adapter-lmem
|
||||
for (uint32_t b = 0; b < NUM_LSU_BLOCKS; ++b) {
|
||||
for (uint32_t c = 0; c < LSU_CHANNELS; ++c) {
|
||||
uint32_t i = b * LSU_CHANNELS + c;
|
||||
auto lmem_demux = lsu_demux_.at(i);
|
||||
|
||||
lmem_demux->ReqDC.bind(&mem_coalescers_.at(b)->ReqIn.at(c));
|
||||
mem_coalescers_.at(b)->RspIn.at(c).bind(&lmem_demux->RspDC);
|
||||
|
||||
lmem_demux->ReqSM.bind(&local_mem_->Inputs.at(i));
|
||||
local_mem_->Outputs.at(i).bind(&lmem_demux->RspSM);
|
||||
lsu_lmem_adapter_.at(b)->ReqOut.at(c).bind(&local_mem_->Inputs.at(i));
|
||||
local_mem_->Outputs.at(i).bind(&lsu_lmem_adapter_.at(b)->RspOut.at(c));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -152,6 +152,8 @@ private:
|
|||
LocalMem::Ptr local_mem_;
|
||||
std::vector<LocalMemDemux::Ptr> lsu_demux_;
|
||||
std::vector<MemCoalescer::Ptr> mem_coalescers_;
|
||||
std::vector<LsuMemAdapter::Ptr> lsu_dcache_adapter_;
|
||||
std::vector<LsuMemAdapter::Ptr> lsu_lmem_adapter_;
|
||||
|
||||
PipelineLatch fetch_latch_;
|
||||
PipelineLatch decode_latch_;
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
using namespace vortex;
|
||||
|
||||
AluUnit::AluUnit(const SimContext& ctx, Core* core) : FuncUnit(ctx, core, "ALU") {}
|
||||
AluUnit::AluUnit(const SimContext& ctx, Core* core) : FuncUnit(ctx, core, "alu-unit") {}
|
||||
|
||||
void AluUnit::tick() {
|
||||
for (uint32_t iw = 0; iw < ISSUE_WIDTH; ++iw) {
|
||||
|
@ -49,7 +49,7 @@ void AluUnit::tick() {
|
|||
default:
|
||||
std::abort();
|
||||
}
|
||||
DT(3, "pipeline-execute: op=" << trace->alu_type << ", " << *trace);
|
||||
DT(3, this->name() << ": op" << trace->alu_type << ", " << *trace);
|
||||
if (trace->eop && trace->fetch_stall) {
|
||||
core_->resume(trace->wid);
|
||||
}
|
||||
|
@ -59,7 +59,7 @@ void AluUnit::tick() {
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
FpuUnit::FpuUnit(const SimContext& ctx, Core* core) : FuncUnit(ctx, core, "FPU") {}
|
||||
FpuUnit::FpuUnit(const SimContext& ctx, Core* core) : FuncUnit(ctx, core, "fpu-unit") {}
|
||||
|
||||
void FpuUnit::tick() {
|
||||
for (uint32_t iw = 0; iw < ISSUE_WIDTH; ++iw) {
|
||||
|
@ -88,7 +88,7 @@ void FpuUnit::tick() {
|
|||
default:
|
||||
std::abort();
|
||||
}
|
||||
DT(3, "pipeline-execute: op=" << trace->fpu_type << ", " << *trace);
|
||||
DT(3,this->name() << ": op=" << trace->fpu_type << ", " << *trace);
|
||||
input.pop();
|
||||
}
|
||||
}
|
||||
|
@ -96,7 +96,7 @@ void FpuUnit::tick() {
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
LsuUnit::LsuUnit(const SimContext& ctx, Core* core)
|
||||
: FuncUnit(ctx, core, "LSU")
|
||||
: FuncUnit(ctx, core, "lsu-unit")
|
||||
, pending_loads_(0)
|
||||
{}
|
||||
|
||||
|
@ -114,25 +114,25 @@ void LsuUnit::tick() {
|
|||
core_->perf_stats_.load_latency += pending_loads_;
|
||||
|
||||
// handle memory responses
|
||||
for (uint32_t r = 0; r < LSU_NUM_REQS; ++r) {
|
||||
auto& dcache_rsp_port = core_->lsu_demux_.at(r)->RspIn;
|
||||
if (dcache_rsp_port.empty())
|
||||
for (uint32_t b = 0; b < NUM_LSU_BLOCKS; ++b) {
|
||||
auto& lsu_rsp_port = core_->lsu_demux_.at(b)->RspIn;
|
||||
if (lsu_rsp_port.empty())
|
||||
continue;
|
||||
uint32_t block_idx = r / LSU_CHANNELS;
|
||||
auto& state = states_.at(block_idx);
|
||||
auto& mem_rsp = dcache_rsp_port.front();
|
||||
auto& entry = state.pending_rd_reqs.at(mem_rsp.tag);
|
||||
auto& state = states_.at(b);
|
||||
auto& lsu_rsp = lsu_rsp_port.front();
|
||||
DT(3, this->name() << "-" << lsu_rsp);
|
||||
auto& entry = state.pending_rd_reqs.at(lsu_rsp.tag);
|
||||
auto trace = entry.trace;
|
||||
DT(3, "mem-rsp: tag=" << mem_rsp.tag << ", type=" << trace->lsu_type << ", rid=" << r << ", " << *trace);
|
||||
assert(entry.count);
|
||||
--entry.count; // track remaining addresses
|
||||
if (0 == entry.count) {
|
||||
assert(!entry.mask.none());
|
||||
entry.mask &= ~lsu_rsp.mask; // track remaining
|
||||
if (entry.mask.none()) {
|
||||
// whole response received, release trace
|
||||
int iw = trace->wid % ISSUE_WIDTH;
|
||||
Outputs.at(iw).push(trace, 1);
|
||||
state.pending_rd_reqs.release(mem_rsp.tag);
|
||||
state.pending_rd_reqs.release(lsu_rsp.tag);
|
||||
}
|
||||
dcache_rsp_port.pop();
|
||||
--pending_loads_;
|
||||
pending_loads_ -= lsu_rsp.mask.count();
|
||||
lsu_rsp_port.pop();
|
||||
}
|
||||
|
||||
// handle LSU requests
|
||||
|
@ -145,7 +145,7 @@ void LsuUnit::tick() {
|
|||
continue;
|
||||
Outputs.at(iw).push(state.fence_trace, 1);
|
||||
state.fence_lock = false;
|
||||
DT(3, "fence-unlock: " << state.fence_trace);
|
||||
DT(3, this->name() << "-fence-unlock: " << state.fence_trace);
|
||||
}
|
||||
|
||||
// check input queue
|
||||
|
@ -153,14 +153,13 @@ void LsuUnit::tick() {
|
|||
if (input.empty())
|
||||
continue;
|
||||
|
||||
auto& output = Outputs.at(iw);
|
||||
auto trace = input.front();
|
||||
|
||||
if (trace->lsu_type == LsuType::FENCE) {
|
||||
// schedule fence lock
|
||||
state.fence_trace = trace;
|
||||
state.fence_lock = true;
|
||||
DT(3, "fence-lock: " << *trace);
|
||||
DT(3, this->name() << "-fence-lock: " << *trace);
|
||||
// remove input
|
||||
input.pop();
|
||||
continue;
|
||||
|
@ -178,21 +177,43 @@ void LsuUnit::tick() {
|
|||
trace->log_once(false);
|
||||
}
|
||||
|
||||
// build memory request
|
||||
LsuReq lsu_req(NUM_LSU_LANES);
|
||||
lsu_req.write = is_write;
|
||||
{
|
||||
auto trace_data = std::dynamic_pointer_cast<LsuTraceData>(trace->data);
|
||||
auto t0 = trace->pid * NUM_LSU_LANES;
|
||||
for (uint32_t i = 0; i < NUM_LSU_LANES; ++i) {
|
||||
if (trace->tmask.test(t0 + i)) {
|
||||
lsu_req.mask.set(i);
|
||||
lsu_req.addrs.at(i) = trace_data->mem_addrs.at(t0 + i).addr;
|
||||
}
|
||||
}
|
||||
}
|
||||
uint32_t tag = 0;
|
||||
if (!is_write) {
|
||||
tag = state.pending_rd_reqs.allocate({trace, 0});
|
||||
tag = state.pending_rd_reqs.allocate({trace, lsu_req.mask});
|
||||
}
|
||||
lsu_req.tag = tag;
|
||||
lsu_req.cid = trace->cid;
|
||||
lsu_req.uuid = trace->uuid;
|
||||
|
||||
// send memory request
|
||||
auto num_reqs = this->send_requests(trace, block_idx, tag);
|
||||
core_->lsu_demux_.at(block_idx)->ReqIn.push(lsu_req);
|
||||
DT(3, this->name() << "-" << lsu_req);
|
||||
|
||||
if (!is_write) {
|
||||
state.pending_rd_reqs.at(tag).count = num_reqs;
|
||||
// update stats
|
||||
auto num_addrs = lsu_req.mask.count();
|
||||
if (is_write) {
|
||||
core_->perf_stats_.stores += num_addrs;
|
||||
} else {
|
||||
core_->perf_stats_.loads += num_addrs;
|
||||
pending_loads_ += num_addrs;
|
||||
}
|
||||
|
||||
// do not wait on writes
|
||||
if (is_write) {
|
||||
output.push(trace, 1);
|
||||
Outputs.at(iw).push(trace, 1);
|
||||
}
|
||||
|
||||
// remove input
|
||||
|
@ -200,52 +221,10 @@ void LsuUnit::tick() {
|
|||
}
|
||||
}
|
||||
|
||||
int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) {
|
||||
int count = 0;
|
||||
|
||||
auto trace_data = std::dynamic_pointer_cast<LsuTraceData>(trace->data);
|
||||
bool is_write = (trace->lsu_type == LsuType::STORE);
|
||||
auto t0 = trace->pid * NUM_LSU_LANES;
|
||||
|
||||
for (uint32_t i = 0; i < NUM_LSU_LANES; ++i) {
|
||||
uint32_t t = t0 + i;
|
||||
if (!trace->tmask.test(t))
|
||||
continue;
|
||||
|
||||
int req_idx = block_idx * LSU_CHANNELS + (i % LSU_CHANNELS);
|
||||
auto& dcache_req_port = core_->lsu_demux_.at(req_idx)->ReqIn;
|
||||
|
||||
auto mem_addr = trace_data->mem_addrs.at(t);
|
||||
auto type = get_addr_type(mem_addr.addr);
|
||||
|
||||
MemReq mem_req;
|
||||
mem_req.addr = mem_addr.addr;
|
||||
mem_req.write = is_write;
|
||||
mem_req.type = type;
|
||||
mem_req.tag = tag;
|
||||
mem_req.cid = trace->cid;
|
||||
mem_req.uuid = trace->uuid;
|
||||
|
||||
dcache_req_port.push(mem_req, 1);
|
||||
DT(3, "mem-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << tag
|
||||
<< ", lsu_type=" << trace->lsu_type << ", rid=" << req_idx << ", addr_type=" << mem_req.type << ", " << *trace);
|
||||
|
||||
if (is_write) {
|
||||
++core_->perf_stats_.stores;
|
||||
} else {
|
||||
++core_->perf_stats_.loads;
|
||||
++pending_loads_;
|
||||
}
|
||||
|
||||
++count;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
SfuUnit::SfuUnit(const SimContext& ctx, Core* core)
|
||||
: FuncUnit(ctx, core, "SFU")
|
||||
: FuncUnit(ctx, core, "sfu-unit")
|
||||
{}
|
||||
|
||||
void SfuUnit::tick() {
|
||||
|
@ -287,7 +266,7 @@ void SfuUnit::tick() {
|
|||
std::abort();
|
||||
}
|
||||
|
||||
DT(3, "pipeline-execute: op=" << trace->sfu_type << ", " << *trace);
|
||||
DT(3, this->name() << ": op=" << trace->sfu_type << ", " << *trace);
|
||||
if (trace->eop && release_warp) {
|
||||
core_->resume(trace->wid);
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -26,13 +26,13 @@ public:
|
|||
std::vector<SimPort<instr_trace_t*>> Inputs;
|
||||
std::vector<SimPort<instr_trace_t*>> Outputs;
|
||||
|
||||
FuncUnit(const SimContext& ctx, Core* core, const char* name)
|
||||
: SimObject<FuncUnit>(ctx, name)
|
||||
FuncUnit(const SimContext& ctx, Core* core, const char* name)
|
||||
: SimObject<FuncUnit>(ctx, name)
|
||||
, Inputs(ISSUE_WIDTH, this)
|
||||
, Outputs(ISSUE_WIDTH, this)
|
||||
, core_(core)
|
||||
{}
|
||||
|
||||
|
||||
virtual ~FuncUnit() {}
|
||||
|
||||
virtual void reset() {}
|
||||
|
@ -73,28 +73,26 @@ public:
|
|||
|
||||
private:
|
||||
|
||||
int send_requests(instr_trace_t* trace, int block_idx, int tag);
|
||||
|
||||
struct pending_req_t {
|
||||
struct pending_req_t {
|
||||
instr_trace_t* trace;
|
||||
uint32_t count;
|
||||
BitVector<> mask;
|
||||
};
|
||||
|
||||
struct lsu_state_t {
|
||||
struct lsu_state_t {
|
||||
HashTable<pending_req_t> pending_rd_reqs;
|
||||
instr_trace_t* fence_trace;
|
||||
instr_trace_t* fence_trace;
|
||||
bool fence_lock;
|
||||
|
||||
lsu_state_t() : pending_rd_reqs(LSUQ_IN_SIZE) {}
|
||||
|
||||
|
||||
void clear() {
|
||||
this->pending_rd_reqs.clear();
|
||||
this->fence_trace = nullptr;
|
||||
this->fence_lock = false;
|
||||
}
|
||||
};
|
||||
|
||||
std::array<lsu_state_t, NUM_LSU_BLOCKS> states_;
|
||||
|
||||
std::array<lsu_state_t, NUM_LSU_BLOCKS> states_;
|
||||
uint64_t pending_loads_;
|
||||
};
|
||||
|
||||
|
@ -103,7 +101,7 @@ private:
|
|||
class SfuUnit : public FuncUnit {
|
||||
public:
|
||||
SfuUnit(const SimContext& ctx, Core*);
|
||||
|
||||
|
||||
void tick();
|
||||
};
|
||||
|
||||
|
|
|
@ -82,11 +82,13 @@ public:
|
|||
continue;
|
||||
}
|
||||
|
||||
DT(4, simobject_->name() << "-" << core_req);
|
||||
|
||||
in_used_banks.at(bank_id) = true;
|
||||
|
||||
if (!core_req.write || config_.write_reponse) {
|
||||
// send response
|
||||
MemRsp core_rsp{core_req.tag, core_req.cid};
|
||||
MemRsp core_rsp{core_req.tag, core_req.cid, core_req.uuid};
|
||||
simobject_->Outputs.at(req_id).push(core_rsp, 1);
|
||||
}
|
||||
|
||||
|
|
|
@ -112,7 +112,9 @@ int main(int argc, char **argv) {
|
|||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
std::cout << "[VXDRV] START: program=" << program << std::endl;
|
||||
#endif
|
||||
// run simulation
|
||||
processor.run();
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,100 +16,141 @@
|
|||
using namespace vortex;
|
||||
|
||||
MemCoalescer::MemCoalescer(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t input_size,
|
||||
uint32_t output_size,
|
||||
uint32_t line_size,
|
||||
uint32_t queue_size,
|
||||
uint32_t delay
|
||||
) : SimObject<MemCoalescer>(ctx, name)
|
||||
, ReqIn(input_size, this)
|
||||
, RspIn(input_size, this)
|
||||
, ReqOut(output_size, this)
|
||||
, RspOut(output_size, this)
|
||||
) : SimObject<MemCoalescer>(ctx, name)
|
||||
, ReqIn(this)
|
||||
, RspIn(this)
|
||||
, ReqOut(this)
|
||||
, RspOut(this)
|
||||
, input_size_(input_size)
|
||||
, output_size_(output_size)
|
||||
, output_ratio_(input_size / output_size)
|
||||
, pending_rd_reqs_(queue_size)
|
||||
, sent_mask_(input_size)
|
||||
, line_size_(line_size)
|
||||
, delay_(delay)
|
||||
{}
|
||||
|
||||
void MemCoalescer::reset() {
|
||||
last_index_ = 0;
|
||||
sent_mask_.reset();
|
||||
}
|
||||
|
||||
void MemCoalescer::tick() {
|
||||
uint32_t I = ReqIn.size();
|
||||
uint32_t O = ReqOut.size();
|
||||
|
||||
void MemCoalescer::tick() {
|
||||
// process incoming responses
|
||||
for (uint32_t o = 0; o < O; ++o) {
|
||||
if (RspOut.at(o).empty())
|
||||
continue;
|
||||
auto& mem_rsp = RspOut.at(o).front();
|
||||
DT(3, this->name() << "-" << mem_rsp);
|
||||
auto& entry = pending_rd_reqs_.at(mem_rsp.tag);
|
||||
for (uint32_t i = 0; i < I; ++i) {
|
||||
if (entry.mask.test(i)) {
|
||||
MemRsp rsp(mem_rsp);
|
||||
rsp.tag = entry.tag;
|
||||
RspIn.at(i).push(rsp, 1);
|
||||
if (!RspOut.empty()) {
|
||||
auto& out_rsp = RspOut.front();
|
||||
DT(4, this->name() << "-" << out_rsp);
|
||||
auto& entry = pending_rd_reqs_.at(out_rsp.tag);
|
||||
|
||||
BitVector<> rsp_mask(input_size_);
|
||||
for (uint32_t o = 0; o < output_size_; ++o) {
|
||||
if (!out_rsp.mask.test(o))
|
||||
continue;
|
||||
for (uint32_t r = 0; r < output_ratio_; ++r) {
|
||||
uint32_t i = o * output_ratio_ + r;
|
||||
if (entry.mask.test(i))
|
||||
rsp_mask.set(i);
|
||||
}
|
||||
}
|
||||
pending_rd_reqs_.release(mem_rsp.tag);
|
||||
RspOut.at(o).pop();
|
||||
|
||||
// build memory response
|
||||
LsuRsp in_rsp(input_size_);
|
||||
in_rsp.mask = rsp_mask;
|
||||
in_rsp.tag = entry.tag;
|
||||
in_rsp.cid = out_rsp.cid;
|
||||
in_rsp.uuid = out_rsp.uuid;
|
||||
|
||||
// send memory response
|
||||
RspIn.push(in_rsp, 1);
|
||||
|
||||
// track remaining responses
|
||||
assert(!entry.mask.none());
|
||||
entry.mask &= ~rsp_mask;
|
||||
if (entry.mask.none()) {
|
||||
// whole response received, release tag
|
||||
pending_rd_reqs_.release(out_rsp.tag);
|
||||
}
|
||||
RspOut.pop();
|
||||
}
|
||||
|
||||
// process incoming requests
|
||||
uint64_t addr_mask = ~uint64_t(line_size_-1);
|
||||
bool completed = true;
|
||||
for (uint32_t i = last_index_; i < I; ++i) {
|
||||
if (sent_mask_.test(i) || ReqIn.at(i).empty())
|
||||
continue;
|
||||
if (ReqIn.empty())
|
||||
return;
|
||||
|
||||
auto& seed = ReqIn.at(i).front();
|
||||
auto& in_req = ReqIn.front();
|
||||
assert(in_req.mask.size() == input_size_);
|
||||
assert(!in_req.mask.none());
|
||||
|
||||
// ensure we can allocate a response tag
|
||||
if (!seed.write && pending_rd_reqs_.full()) {
|
||||
DT(4, "*** " << this->name() << "-queue-full: " << seed);
|
||||
last_index_ = i;
|
||||
completed = false;
|
||||
break;
|
||||
}
|
||||
|
||||
std::bitset<64> mask(0);
|
||||
mask.set(i);
|
||||
|
||||
// coalesce matching requests
|
||||
uint64_t seed_addr = seed.addr & addr_mask;
|
||||
for (uint32_t j = i + 1; j < I; ++j) {
|
||||
if (sent_mask_.test(j) || ReqIn.at(j).empty())
|
||||
continue;
|
||||
auto& match = ReqIn.at(j).front();
|
||||
uint64_t match_addr = match.addr & addr_mask;
|
||||
if (match_addr == seed_addr) {
|
||||
mask.set(j);
|
||||
ReqIn.at(j).pop();
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t tag = 0;
|
||||
if (!seed.write) {
|
||||
tag = pending_rd_reqs_.allocate(pending_req_t{seed.tag, mask});
|
||||
}
|
||||
|
||||
MemReq mem_req{seed};
|
||||
mem_req.tag = tag;
|
||||
DT(3, this->name() << "-" << mem_req << ", coalesced=" << mask.count());
|
||||
uint32_t c = i % O;
|
||||
ReqOut.at(c).push(mem_req, delay_);
|
||||
ReqIn.at(i).pop();
|
||||
|
||||
sent_mask_ |= mask;
|
||||
// ensure we can allocate a response tag
|
||||
if (pending_rd_reqs_.full()) {
|
||||
DT(4, "*** " << this->name() << "-queue-full: " << in_req);
|
||||
return;
|
||||
}
|
||||
|
||||
if (completed) {
|
||||
last_index_ = 0;
|
||||
uint64_t addr_mask = ~uint64_t(line_size_-1);
|
||||
|
||||
BitVector<> out_mask(output_size_);
|
||||
std::vector<uint64_t> out_addrs(output_size_);
|
||||
|
||||
BitVector<> cur_mask(input_size_);
|
||||
|
||||
for (uint32_t o = 0; o < output_size_; ++o) {
|
||||
for (uint32_t r = 0; r < output_ratio_; ++r) {
|
||||
uint32_t i = o * output_ratio_ + r;
|
||||
if (sent_mask_.test(i) || !in_req.mask.test(i))
|
||||
continue;
|
||||
|
||||
uint64_t seed_addr = in_req.addrs.at(i) & addr_mask;
|
||||
cur_mask.set(i);
|
||||
|
||||
// coalesce matching requests
|
||||
for (uint32_t s = r + 1; s < output_ratio_; ++s) {
|
||||
uint32_t j = o * output_ratio_ + s;
|
||||
if (sent_mask_.test(j) || !in_req.mask.test(j))
|
||||
continue;
|
||||
uint64_t match_addr = in_req.addrs.at(j) & addr_mask;
|
||||
if (match_addr == seed_addr) {
|
||||
cur_mask.set(j);
|
||||
}
|
||||
}
|
||||
|
||||
out_mask.set(o);
|
||||
out_addrs.at(o) = seed_addr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert(!out_mask.none());
|
||||
|
||||
uint32_t tag = 0;
|
||||
if (!in_req.write) {
|
||||
// allocate a response tag for read requests
|
||||
tag = pending_rd_reqs_.allocate(pending_req_t{in_req.tag, cur_mask});
|
||||
}
|
||||
|
||||
// build memory request
|
||||
LsuReq out_req{output_size_};
|
||||
out_req.mask = out_mask;
|
||||
out_req.tag = tag;
|
||||
out_req.write = in_req.write;
|
||||
out_req.addrs = out_addrs;
|
||||
out_req.cid = in_req.cid;
|
||||
out_req.uuid = in_req.uuid;
|
||||
|
||||
// send memory request
|
||||
ReqOut.push(out_req, delay_);
|
||||
DT(4, this->name() << "-" << out_req << ", coalesced=" << cur_mask.count());
|
||||
|
||||
// update sent mask
|
||||
sent_mask_ |= cur_mask;
|
||||
if (sent_mask_ == in_req.mask) {
|
||||
ReqIn.pop();
|
||||
sent_mask_.reset();
|
||||
}
|
||||
}
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -17,15 +17,15 @@ namespace vortex {
|
|||
|
||||
class MemCoalescer : public SimObject<MemCoalescer> {
|
||||
public:
|
||||
std::vector<SimPort<MemReq>> ReqIn;
|
||||
std::vector<SimPort<MemRsp>> RspIn;
|
||||
SimPort<LsuReq> ReqIn;
|
||||
SimPort<LsuRsp> RspIn;
|
||||
|
||||
std::vector<SimPort<MemReq>> ReqOut;
|
||||
std::vector<SimPort<MemRsp>> RspOut;
|
||||
SimPort<LsuReq> ReqOut;
|
||||
SimPort<LsuRsp> RspOut;
|
||||
|
||||
MemCoalescer(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t input_size,
|
||||
uint32_t output_size,
|
||||
uint32_t line_size,
|
||||
|
@ -41,14 +41,17 @@ private:
|
|||
|
||||
struct pending_req_t {
|
||||
uint32_t tag;
|
||||
std::bitset<64> mask;
|
||||
BitVector<> mask;
|
||||
};
|
||||
|
||||
uint32_t input_size_;
|
||||
uint32_t output_size_;
|
||||
uint32_t output_ratio_;
|
||||
|
||||
HashTable<pending_req_t> pending_rd_reqs_;
|
||||
BitVector<> sent_mask_;
|
||||
uint32_t line_size_;
|
||||
uint32_t delay_;
|
||||
uint32_t last_index_;
|
||||
std::bitset<64> sent_mask_;
|
||||
};
|
||||
|
||||
}
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,14 +16,14 @@
|
|||
using namespace vortex;
|
||||
|
||||
LocalMemDemux::LocalMemDemux(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t delay
|
||||
) : SimObject<LocalMemDemux>(ctx, name)
|
||||
) : SimObject<LocalMemDemux>(ctx, name)
|
||||
, ReqIn(this)
|
||||
, RspIn(this)
|
||||
, ReqSM(this)
|
||||
, RspSM(this)
|
||||
, ReqLmem(this)
|
||||
, RspLmem(this)
|
||||
, ReqDC(this)
|
||||
, RspDC(this)
|
||||
, delay_(delay)
|
||||
|
@ -31,30 +31,133 @@ LocalMemDemux::LocalMemDemux(
|
|||
|
||||
void LocalMemDemux::reset() {}
|
||||
|
||||
void LocalMemDemux::tick() {
|
||||
void LocalMemDemux::tick() {
|
||||
// process incoming responses
|
||||
if (!RspSM.empty()) {
|
||||
auto& rsp = RspSM.front();
|
||||
DT(4, this->name() << "-" << rsp);
|
||||
RspIn.push(rsp, 1);
|
||||
RspSM.pop();
|
||||
if (!RspLmem.empty()) {
|
||||
auto& out_rsp = RspLmem.front();
|
||||
DT(4, this->name() << "-" << out_rsp);
|
||||
RspIn.push(out_rsp, 1);
|
||||
RspLmem.pop();
|
||||
}
|
||||
if (!RspDC.empty()) {
|
||||
auto& rsp = RspDC.front();
|
||||
DT(4, this->name() << "-" << rsp);
|
||||
RspIn.push(rsp, 1);
|
||||
RspDC
|
||||
.pop();
|
||||
auto& out_rsp = RspDC.front();
|
||||
DT(4, this->name() << "-" << out_rsp);
|
||||
RspIn.push(out_rsp, 1);
|
||||
RspDC.pop();
|
||||
}
|
||||
// process incoming requests
|
||||
|
||||
// process incoming requests
|
||||
if (!ReqIn.empty()) {
|
||||
auto& req = ReqIn.front();
|
||||
DT(4, this->name() << "-" << req);
|
||||
if (req.type == AddrType::Shared) {
|
||||
ReqSM.push(req, delay_);
|
||||
} else {
|
||||
ReqDC.push(req, delay_);
|
||||
auto& in_req = ReqIn.front();
|
||||
|
||||
LsuReq out_dc_req(in_req.mask.size());
|
||||
out_dc_req.write = in_req.write;
|
||||
out_dc_req.tag = in_req.tag;
|
||||
out_dc_req.cid = in_req.cid;
|
||||
out_dc_req.uuid = in_req.uuid;
|
||||
|
||||
LsuReq out_lmem_req(out_dc_req);
|
||||
|
||||
for (uint32_t i = 0; i < in_req.mask.size(); ++i) {
|
||||
if (in_req.mask.test(i)) {
|
||||
auto type = get_addr_type(in_req.addrs.at(i));
|
||||
if (type == AddrType::Shared) {
|
||||
out_lmem_req.mask.set(i);
|
||||
out_lmem_req.addrs.at(i) = in_req.addrs.at(i);
|
||||
} else {
|
||||
out_dc_req.mask.set(i);
|
||||
out_dc_req.addrs.at(i) = in_req.addrs.at(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!out_dc_req.mask.none()) {
|
||||
ReqDC.push(out_dc_req, delay_);
|
||||
DT(4, this->name() << "-" << out_dc_req);
|
||||
}
|
||||
|
||||
if (!out_lmem_req.mask.none()) {
|
||||
ReqLmem.push(out_lmem_req, delay_);
|
||||
DT(4, this->name() << "-" << out_lmem_req);
|
||||
}
|
||||
ReqIn.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
LsuMemAdapter::LsuMemAdapter(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t num_inputs,
|
||||
uint32_t delay
|
||||
) : SimObject<LsuMemAdapter>(ctx, name)
|
||||
, ReqIn(this)
|
||||
, RspIn(this)
|
||||
, ReqOut(num_inputs, this)
|
||||
, RspOut(num_inputs, this)
|
||||
, delay_(delay)
|
||||
{}
|
||||
|
||||
void LsuMemAdapter::reset() {}
|
||||
|
||||
void LsuMemAdapter::tick() {
|
||||
uint32_t input_size = ReqOut.size();
|
||||
|
||||
// process incoming responses
|
||||
for (uint32_t i = 0; i < input_size; ++i) {
|
||||
if (RspOut.at(i).empty())
|
||||
continue;
|
||||
auto& out_rsp = RspOut.at(i).front();
|
||||
DT(4, this->name() << "-" << out_rsp);
|
||||
|
||||
// build memory response
|
||||
LsuRsp in_rsp(input_size);
|
||||
in_rsp.mask.set(i);
|
||||
in_rsp.tag = out_rsp.tag;
|
||||
in_rsp.cid = out_rsp.cid;
|
||||
in_rsp.uuid = out_rsp.uuid;
|
||||
|
||||
// include other responses with the same tag
|
||||
for (uint32_t j = i + 1; j < input_size; ++j) {
|
||||
if (RspOut.at(j).empty())
|
||||
continue;
|
||||
auto& other_rsp = RspOut.at(j).front();
|
||||
if (out_rsp.tag == other_rsp.tag) {
|
||||
in_rsp.mask.set(j);
|
||||
RspOut.at(j).pop();
|
||||
}
|
||||
}
|
||||
|
||||
// send memory response
|
||||
RspIn.push(in_rsp, 1);
|
||||
|
||||
// remove input
|
||||
RspOut.at(i).pop();
|
||||
break;
|
||||
}
|
||||
|
||||
// process incoming requests
|
||||
if (!ReqIn.empty()) {
|
||||
auto& in_req = ReqIn.front();
|
||||
assert(in_req.mask.size() == input_size);
|
||||
|
||||
for (uint32_t i = 0; i < input_size; ++i) {
|
||||
if (in_req.mask.test(i)) {
|
||||
// build memory request
|
||||
MemReq out_req;
|
||||
out_req.write = in_req.write;
|
||||
out_req.addr = in_req.addrs.at(i);
|
||||
out_req.type = get_addr_type(in_req.addrs.at(i));
|
||||
out_req.tag = in_req.tag;
|
||||
out_req.cid = in_req.cid;
|
||||
out_req.uuid = in_req.uuid;
|
||||
|
||||
// send memory request
|
||||
ReqOut.at(i).push(out_req, delay_);
|
||||
DT(4, this->name() << "-" << out_req);
|
||||
}
|
||||
}
|
||||
ReqIn.pop();
|
||||
}
|
||||
}
|
|
@ -23,6 +23,7 @@
|
|||
#include <VX_config.h>
|
||||
#include <VX_types.h>
|
||||
#include <simobject.h>
|
||||
#include <bitvector.h>
|
||||
#include "debug.h"
|
||||
|
||||
namespace vortex {
|
||||
|
@ -238,6 +239,62 @@ inline std::ostream &operator<<(std::ostream &os, const ArbiterType& type) {
|
|||
default: assert(false);
|
||||
}
|
||||
return os;
|
||||
}///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct LsuReq {
|
||||
BitVector<> mask;
|
||||
std::vector<uint64_t> addrs;
|
||||
bool write;
|
||||
uint32_t tag;
|
||||
uint32_t cid;
|
||||
uint64_t uuid;
|
||||
|
||||
LsuReq(uint32_t size)
|
||||
: mask(size)
|
||||
, addrs(size, 0)
|
||||
, write(false)
|
||||
, tag(0)
|
||||
, cid(0)
|
||||
, uuid(0)
|
||||
{}
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const LsuReq& req) {
|
||||
os << "lsu-req: rw=" << req.write << ", mask=" << req.mask << ", ";
|
||||
for (size_t i = 0; i < req.mask.size(); ++i) {
|
||||
os << "addr" << i << "=";
|
||||
if (req.mask.test(i)) {
|
||||
os << "0x" << std::hex << req.addrs.at(i);
|
||||
} else {
|
||||
os << "-";
|
||||
}
|
||||
os << ", ";
|
||||
}
|
||||
os << std::dec << "tag=" << req.tag << ", cid=" << req.cid;
|
||||
os << " (#" << std::dec << req.uuid << ")";
|
||||
return os;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct LsuRsp {
|
||||
BitVector<> mask;
|
||||
uint64_t tag;
|
||||
uint32_t cid;
|
||||
uint64_t uuid;
|
||||
|
||||
LsuRsp(uint32_t size)
|
||||
: mask(size)
|
||||
, tag (0)
|
||||
, cid(0)
|
||||
, uuid(0)
|
||||
{}
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const LsuRsp& rsp) {
|
||||
os << "lsu-rsp: mask=" << rsp.mask << ", tag=" << rsp.tag << ", cid=" << rsp.cid;
|
||||
os << " (#" << std::dec << rsp.uuid << ")";
|
||||
return os;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -266,7 +323,7 @@ struct MemReq {
|
|||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const MemReq& req) {
|
||||
os << "mem-" << (req.write ? "wr" : "rd") << ": ";
|
||||
os << "mem-req: rw=" << req.write << ", ";
|
||||
os << "addr=0x" << std::hex << req.addr << ", type=" << req.type;
|
||||
os << std::dec << ", tag=" << req.tag << ", cid=" << req.cid;
|
||||
os << " (#" << std::dec << req.uuid << ")";
|
||||
|
@ -427,7 +484,6 @@ public:
|
|||
auto& req_in = Inputs.at(j);
|
||||
if (!req_in.empty()) {
|
||||
auto& req = req_in.front();
|
||||
DT(4, this->name() << "-" << req);
|
||||
Outputs.at(o).push(req, delay_);
|
||||
req_in.pop();
|
||||
this->update_cursor(o, i);
|
||||
|
@ -566,14 +622,14 @@ using MemSwitch = Switch<MemReq, MemRsp>;
|
|||
|
||||
class LocalMemDemux : public SimObject<LocalMemDemux> {
|
||||
public:
|
||||
SimPort<MemReq> ReqIn;
|
||||
SimPort<MemRsp> RspIn;
|
||||
SimPort<LsuReq> ReqIn;
|
||||
SimPort<LsuRsp> RspIn;
|
||||
|
||||
SimPort<MemReq> ReqSM;
|
||||
SimPort<MemRsp> RspSM;
|
||||
SimPort<LsuReq> ReqLmem;
|
||||
SimPort<LsuRsp> RspLmem;
|
||||
|
||||
SimPort<MemReq> ReqDC;
|
||||
SimPort<MemRsp> RspDC;
|
||||
SimPort<LsuReq> ReqDC;
|
||||
SimPort<LsuRsp> RspDC;
|
||||
|
||||
LocalMemDemux(
|
||||
const SimContext& ctx,
|
||||
|
@ -589,4 +645,29 @@ private:
|
|||
uint32_t delay_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class LsuMemAdapter : public SimObject<LsuMemAdapter> {
|
||||
public:
|
||||
SimPort<LsuReq> ReqIn;
|
||||
SimPort<LsuRsp> RspIn;
|
||||
|
||||
std::vector<SimPort<MemReq>> ReqOut;
|
||||
std::vector<SimPort<MemRsp>> RspOut;
|
||||
|
||||
LsuMemAdapter(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t num_inputs,
|
||||
uint32_t delay
|
||||
);
|
||||
|
||||
void reset();
|
||||
|
||||
void tick();
|
||||
|
||||
private:
|
||||
uint32_t delay_;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -65,27 +65,6 @@ run-rtlsim:
|
|||
$(MAKE) -C sgemm3 run-rtlsim
|
||||
$(MAKE) -C psum run-rtlsim
|
||||
|
||||
run-opae:
|
||||
$(MAKE) -C vecadd run-opae
|
||||
$(MAKE) -C sgemm run-opae
|
||||
$(MAKE) -C conv3 run-opae
|
||||
$(MAKE) -C psort run-opae
|
||||
$(MAKE) -C saxpy run-opae
|
||||
$(MAKE) -C sfilter run-opae
|
||||
$(MAKE) -C oclprintf run-opae
|
||||
$(MAKE) -C dotproduct run-opae
|
||||
$(MAKE) -C transpose run-opae
|
||||
$(MAKE) -C spmv run-opae
|
||||
$(MAKE) -C stencil run-opae
|
||||
$(MAKE) -C nearn run-opae
|
||||
$(MAKE) -C guassian run-opae
|
||||
$(MAKE) -C kmeans run-opae
|
||||
$(MAKE) -C blackscholes run-opae
|
||||
$(MAKE) -C bfs run-opae
|
||||
$(MAKE) -C sgemm2 run-opae
|
||||
$(MAKE) -C sgemm3 run-opae
|
||||
$(MAKE) -C psum run-opae
|
||||
|
||||
clean:
|
||||
$(MAKE) -C vecadd clean
|
||||
$(MAKE) -C sgemm clean
|
||||
|
|
|
@ -46,7 +46,7 @@ static float* read_output_file(const char* filename, int size) {
|
|||
return NULL;
|
||||
}
|
||||
// Read the float data
|
||||
if (fread(floats, sizeof(float), size, file) != size) {
|
||||
if (fread(floats, sizeof(float), size, file) != (size_t)size) {
|
||||
fclose(file);
|
||||
free(floats);
|
||||
perror("Error reading floats from file");
|
||||
|
@ -128,6 +128,7 @@ int main(int nArgs, char *arg[]) {
|
|||
MAIN_initialize(¶m, &prm);
|
||||
|
||||
for (t = 1; t <= param.nTimeSteps; t++) {
|
||||
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_KERNEL);
|
||||
OpenCL_LBM_performStreamCollide(&prm, OpenCL_srcGrid, OpenCL_dstGrid);
|
||||
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
|
||||
|
@ -198,9 +199,9 @@ void MAIN_printInfo(const MAIN_Param *param) {
|
|||
"\tsimulation type: %s\n"
|
||||
"\tobstacle file : %s\n\n",
|
||||
SIZE_X, SIZE_Y, SIZE_Z, 1e-6 * SIZE_X * SIZE_Y * SIZE_Z,
|
||||
param->nTimeSteps, param->resultFilename, "store", "lid-driven cavity",
|
||||
(param->obstacleFilename == NULL) ? "<none>"
|
||||
: param->obstacleFilename);
|
||||
param->nTimeSteps, ((param->resultFilename == NULL) ? "<none>" : param->resultFilename), "store", "lid-driven cavity",
|
||||
((param->obstacleFilename == NULL) ? "<none>" : param->obstacleFilename)
|
||||
);
|
||||
}
|
||||
|
||||
/*############################################################################*/
|
||||
|
@ -316,7 +317,7 @@ void OpenCL_initialize(struct pb_Parameters *p, OpenCL_Param *prm) {
|
|||
// read kernel binary from file
|
||||
uint8_t *kernel_bin = NULL;
|
||||
size_t kernel_size;
|
||||
cl_int binary_status = 0;
|
||||
//cl_int binary_status = 0;
|
||||
|
||||
clStatus = read_kernel_file("kernel.cl", &kernel_bin, &kernel_size);
|
||||
CHECK_ERROR("read_kernel_file")
|
||||
|
|
|
@ -49,22 +49,6 @@ run-rtlsim:
|
|||
$(MAKE) -C sgemm2x run-rtlsim
|
||||
$(MAKE) -C stencil3d run-rtlsim
|
||||
|
||||
run-opae:
|
||||
$(MAKE) -C basic run-opae
|
||||
$(MAKE) -C demo run-opae
|
||||
$(MAKE) -C dogfood run-opae
|
||||
$(MAKE) -C mstress run-opae
|
||||
$(MAKE) -C io_addr run-opae
|
||||
$(MAKE) -C printf run-opae
|
||||
$(MAKE) -C diverge run-opae
|
||||
$(MAKE) -C sort run-opae
|
||||
$(MAKE) -C fence run-opae
|
||||
$(MAKE) -C vecaddx run-opae
|
||||
$(MAKE) -C sgemmx run-opae
|
||||
$(MAKE) -C conv3x run-opae
|
||||
$(MAKE) -C sgemm2x run-opae
|
||||
$(MAKE) -C stencil3d run-opae
|
||||
|
||||
clean:
|
||||
$(MAKE) -C basic clean
|
||||
$(MAKE) -C demo clean
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue