snooping response handling fix

This commit is contained in:
Blaise Tine 2020-05-12 13:35:18 -04:00
parent c49f01b769
commit fcf3800d5d
11 changed files with 92 additions and 134 deletions

View file

@ -3,8 +3,7 @@
#include <stdlib.h>
#include <assert.h>
#include <iostream>
#include <thread>
#include <mutex>
#include <future>
#include <chrono>
#include <vortex.h>
@ -59,20 +58,14 @@ private:
class vx_device {
public:
vx_device()
: is_done_(false) {
thread_ = new std::thread(__thread_proc__, this);
vx_device() {
mem_allocation_ = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
simulator_.attach_ram(&ram_);
}
~vx_device() {
if (thread_) {
mutex_.lock();
is_done_ = true;
mutex_.unlock();
thread_->join();
delete thread_;
~vx_device() {
if (future_.valid()) {
future_.wait();
}
}
@ -115,79 +108,48 @@ public:
return 0;
}
int flush_caches(size_t dev_maddr, size_t size) {
mutex_.lock();
simulator_.attach_ram(&ram_);
simulator_.flush_caches(dev_maddr, size);
simulator_.attach_ram(nullptr);
mutex_.unlock();
return 0;
}
int start() {
mutex_.lock();
simulator_.reset();
simulator_.attach_ram(&ram_);
mutex_.unlock();
if (future_.valid()) {
future_.wait(); // ensure prior run completed
}
future_ = std::async(std::launch::async, [&]{
simulator_.reset();
while (simulator_.is_busy()) {
simulator_.step();
}
});
return 0;
}
int wait(long long timeout) {
if (!future_.valid())
return 0;
auto timeout_sec = (timeout < 0) ? timeout : (timeout / 1000);
std::chrono::seconds wait_time(1);
for (;;) {
mutex_.lock();
bool is_busy = simulator_.is_busy();
mutex_.unlock();
if (!is_busy || 0 == timeout_sec--) {
if (!is_busy) {
mutex_.lock();
simulator_.attach_ram(nullptr);
mutex_.unlock();
}
auto status = future_.wait_for(wait_time); // wait for 1 sec and check status
if (status == std::future_status::ready
|| 0 == timeout_sec--)
break;
}
std::this_thread::sleep_for(std::chrono::seconds(1));
}
return 0;
}
int flush_caches(size_t dev_maddr, size_t size) {
if (future_.valid()) {
future_.wait(); // ensure prior run completed
}
simulator_.flush_caches(dev_maddr, size);
return 0;
}
private:
void thread_proc() {
std::cout << "Device ready..." << std::endl;
for (;;) {
mutex_.lock();
bool is_done = is_done_;
mutex_.unlock();
if (is_done)
break;
mutex_.lock();
simulator_.step();
mutex_.unlock();
}
std::cout << "Device shutdown..." << std::endl;
}
static void __thread_proc__(vx_device* device) {
device->thread_proc();
}
bool is_done_;
size_t mem_allocation_;
RAM ram_;
Simulator simulator_;
std::thread* thread_;
std::mutex mutex_;
std::future<void> future_;
};
///////////////////////////////////////////////////////////////////////////////

View file

@ -29,11 +29,6 @@
if (!(cond)) $error(msg); \
endgenerate
`define UNUSED(x) \
`IGNORE_WARNINGS_BEGIN \
if (x != 0) begin end \
`IGNORE_WARNINGS_END
`define CLOG2(x) $clog2(x)
`define FLOG2(x) ($clog2(x) - (((1 << $clog2(x)) > x) ? 1 : 0))
`define LOG2UP(x) ((x > 1) ? $clog2(x) : 1)

View file

@ -64,10 +64,10 @@ module VX_icache_stage #(
/*always_comb begin
if (1'($time & 1) && icache_req_if.core_req_ready && icache_req_if.core_req_valid) begin
$display("*** %t: I%01d$ req: pc=%0h, warp=%0d", $time, CORE_ID, fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num);
$display("*** %t: I%01d$ req: tag=%0h, pc=%0h, warp=%0d", $time, CORE_ID, icache_req_if.core_req_tag, fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num);
end
if (1'($time & 1) && icache_rsp_if.core_rsp_ready && icache_rsp_if.core_rsp_valid) begin
$display("*** %t: I%01d$ rsp: pc=%0h, warp=%0d, instr=%0h", $time, CORE_ID, fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num, fe_inst_meta_id.instruction);
$display("*** %t: I%01d$ rsp: tag=%0h, pc=%0h, warp=%0d, instr=%0h", $time, CORE_ID, icache_rsp_if.core_rsp_tag, fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num, fe_inst_meta_id.instruction);
end
end*/

View file

@ -64,10 +64,10 @@ module VX_lsu_unit #(
/*always_comb begin
if (1'($time & 1) && dcache_req_if.core_req_ready && (| dcache_req_if.core_req_valid)) begin
$display("*** %t: D%01d$ req: valid=%b, addr=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, data=%0h", $time, CORE_ID, use_valid, use_address, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, use_store_data);
$display("*** %t: D%01d$ req: valid=%b, addr=%0h, tag=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, data=%0h", $time, CORE_ID, use_valid, use_address, dcache_req_if.core_req_tag, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, use_store_data);
end
if (1'($time & 1) && dcache_rsp_if.core_rsp_ready && (| dcache_rsp_if.core_rsp_valid)) begin
$display("*** %t: D%01d$ rsp: valid=%b, pc=%0h, rd=%0d, warp=%0d, data=%0h", $time, CORE_ID, mem_wb_if.valid, mem_wb_if.pc, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
$display("*** %t: D%01d$ rsp: valid=%b, tag=%0h, pc=%0h, rd=%0d, warp=%0d, data=%0h", $time, CORE_ID, mem_wb_if.valid, dcache_rsp_if.core_rsp_tag, mem_wb_if.pc, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
end
end*/

View file

@ -330,7 +330,7 @@ module Vortex_Socket (
/*always_comb begin
if (1'($time & 1) && (dram_req_read || dram_req_write) && dram_req_ready) begin
$display("*** %t: DRAM req: w=%b addr=%0h, tag=%0h, data=%0h", $time, dram_req_write, dram_req_addr, dram_req_tag, dram_req_data);
$display("*** %t: DRAM req: w=%b addr=%0h, tag=%0h, data=%0h", $time, dram_req_write, {dram_req_addr, `CLOG2(`GLOBAL_BLOCK_SIZE)'(0)}, dram_req_tag, dram_req_data);
end
if (1'($time & 1) && dram_rsp_valid && dram_rsp_ready) begin
$display("*** %t: DRAM rsp: tag=%0h, data=%0h", $time, dram_rsp_tag, dram_rsp_data);

View file

@ -407,10 +407,9 @@ module VX_bank #(
.out ({is_snp_st2 , snrq_tag_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , inst_meta_st2 })
);
wire should_flush;
wire dwbq_push;
wire cwbq_full;
wire dwbq_push;
wire dwbq_empty;
wire dwbq_full;
wire srpq_full;
wire invalidate_fill;
@ -420,7 +419,6 @@ module VX_bank #(
&& !is_snp_st2
&& miss_st2
&& !mrvq_full
&& !(should_flush && dwbq_push)
&& !((is_snp_st2 && valid_st2 && srpq_full)
|| ((valid_st2 && !miss_st2) && cwbq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
@ -443,7 +441,7 @@ module VX_bank #(
.clk (clk),
.reset (reset),
// Enqueue
.miss_add (miss_add), // Need to do all
.miss_add (miss_add),
.miss_add_addr (miss_add_addr),
.miss_add_wsel (miss_add_wsel),
.miss_add_data (miss_add_data),
@ -505,30 +503,16 @@ module VX_bank #(
.full (cwbq_full)
);
assign should_flush = valid_st2
&& (miss_add_mem_write != `BYTE_EN_NO)
&& !is_snp_st2
&& !is_fill_st2;
// Enqueue to DWB Queue
assign dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2 || should_flush)
assign dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2)
&& !dwbq_full
&& !((is_snp_st2 && valid_st2 && srpq_full)
|| ((valid_st2 && !miss_st2) && cwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && ~dram_fill_req_ready));
wire[`LINE_ADDR_WIDTH-1:0] dwbq_req_addr;
wire[`BANK_LINE_WIDTH-1:0] dwbq_req_data;
wire dwbq_empty;
if (SNOOP_FORWARDING) begin
assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2;
assign dwbq_req_addr = (should_flush && dwbq_push) ? addr_st2 : {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]};
end else begin
assign dwbq_req_data = readdata_st2;
assign dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]};
end
wire [`BANK_LINE_WIDTH-1:0] dwbq_req_data = readdata_st2;
wire [`LINE_ADDR_WIDTH-1:0] dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]};
wire possible_fill = valid_st2 && miss_st2 && dram_fill_req_ready && ~is_snp_st2;
wire [`LINE_ADDR_WIDTH-1:0] fill_invalidator_addr = addr_st2;
@ -544,7 +528,7 @@ module VX_bank #(
.success_fill (is_fill_st2),
.fill_addr (fill_invalidator_addr),
.invalidate_fill (invalidate_fill)
);
);
// Enqueue in dram_fill_req
assign dram_fill_req_valid = possible_fill && !invalidate_fill;

View file

@ -72,6 +72,9 @@ module VX_tag_data_access #(
&& ((valid_req_st1e && !use_read_valid_st1e)
|| (valid_req_st1e && use_read_valid_st1e && !tags_match));
wire[`TAG_SELECT_BITS-1:0] writetag_st1e = writeaddr_st1e[`TAG_LINE_ADDR_RNG];
wire[`LINE_SELECT_BITS-1:0] writeladdr_st1e = writeaddr_st1e[`LINE_SELECT_BITS-1:0];
VX_tag_data_structure #(
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
@ -91,8 +94,8 @@ module VX_tag_data_access #(
.invalidate (invalidate_line),
.write_enable(use_write_enable),
.write_fill (real_writefill),
.write_addr (writeaddr_st1e[`LINE_SELECT_BITS-1:0]),
.tag_index (writeaddr_st1e[`TAG_LINE_ADDR_RNG]),
.write_addr (writeladdr_st1e),
.tag_index (writetag_st1e),
.write_data (use_write_data),
.fill_sent (fill_sent)
);
@ -125,30 +128,28 @@ module VX_tag_data_access #(
assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-1] || ~DRAM_ENABLE; // If shared memory, always valid
assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && DRAM_ENABLE; // Dirty only applies in Dcache
assign use_read_tag_st1e = DRAM_ENABLE ? read_tag_st1c[STAGE_1_CYCLES-1] : writeaddr_st1e[`TAG_LINE_ADDR_RNG]; // Tag is always the same in SM
for (i = 0; i < `BANK_LINE_WORDS; i++) begin
assign use_read_data_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] = read_data_st1c[STAGE_1_CYCLES-1][i * `WORD_WIDTH +: `WORD_WIDTH];
end
assign use_read_tag_st1e = DRAM_ENABLE ? read_tag_st1c[STAGE_1_CYCLES-1] : writetag_st1e; // Tag is always the same in SM
assign use_read_data_st1e = read_data_st1c[STAGE_1_CYCLES-1];
wire force_write = real_writefill;
wire should_write;
wire [`BANK_LINE_WORDS-1:0][3:0] we;
wire [`BANK_LINE_WIDTH-1:0] data_write;
if (WORD_SIZE == BANK_LINE_SIZE) begin
wire should_write = ((mem_write_st1e != `BYTE_EN_NO))
&& valid_req_st1e
&& use_read_valid_st1e
&& !miss_st1e
&& !is_snp_st1e;
assign should_write = ((mem_write_st1e != `BYTE_EN_NO))
&& valid_req_st1e
&& use_read_valid_st1e
&& !miss_st1e
&& !is_snp_st1e;
for (i = 0; i < `BANK_LINE_WORDS; i++) begin
assign we[i] = (force_write || (should_write && !real_writefill)) ? 4'b1111 : 4'b0000;
end
assign readword_st1e = read_data_st1c[STAGE_1_CYCLES-1];
assign readword_st1e = use_read_data_st1e;
assign data_write = force_write ? writedata_st1e : writeword_st1e;
end else begin
@ -174,13 +175,13 @@ module VX_tag_data_access #(
wire [3:0] sb_mask = (b0 ? 4'b0001 : (b1 ? 4'b0010 : (b2 ? 4'b0100 : 4'b1000)));
wire [3:0] sh_mask = (b0 ? 4'b0011 : 4'b1100);
wire should_write = (sw || sb || sh)
&& valid_req_st1e
&& use_read_valid_st1e
&& !miss_st1e
&& !is_snp_st1e;
assign should_write = (sw || sb || sh)
&& valid_req_st1e
&& use_read_valid_st1e
&& !miss_st1e
&& !is_snp_st1e;
wire[`WORD_WIDTH-1:0] data_unmod = read_data_st1c[STAGE_1_CYCLES-1][block_offset * 32 +: 32];
wire[`WORD_WIDTH-1:0] data_unmod = use_read_data_st1e[block_offset * 32 +: 32];
wire[`WORD_WIDTH-1:0] data_unQual = (b0 || lw) ? (data_unmod) :
b1 ? (data_unmod >> 8) :
b2 ? (data_unmod >> 16) :
@ -200,7 +201,7 @@ module VX_tag_data_access #(
assign readword_st1e = data_Qual;
for (i = 0; i < `BANK_LINE_WORDS; i++) begin
wire normal_write = (block_offset == i[`WORD_SELECT_BITS-1:0]) && should_write && !real_writefill;
wire normal_write = (block_offset == `WORD_SELECT_BITS'(i)) && should_write && !real_writefill;
assign we[i] = (force_write) ? 4'b1111 :
(normal_write && sw) ? 4'b1111 :
@ -226,7 +227,7 @@ module VX_tag_data_access #(
assign use_write_data = data_write;
// use "case equality" to handle uninitialized tag when block entry is not valid
assign tags_match = ((writeaddr_st1e[`TAG_LINE_ADDR_RNG] == use_read_tag_st1e) === 1'b1);
assign tags_match = ((writetag_st1e == use_read_tag_st1e) === 1'b1);
wire snoop_hit = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && use_read_dirty_st1e;
wire req_invalid = valid_req_st1e && !is_snp_st1e && !use_read_valid_st1e && !writefill_st1e;

View file

@ -39,7 +39,7 @@ module VX_tag_data_structure #(
assign read_tag = tag [read_addr];
assign read_data = data [read_addr];
wire going_to_write = (| write_enable);
wire going_to_write = (| write_enable);
integer i;
always @(posedge clk) begin

View file

@ -2,13 +2,13 @@
#include <iostream>
#include <iomanip>
uint64_t time_stamp = 0;
uint64_t timestamp = 0;
double sc_time_stamp() {
return time_stamp;
return timestamp;
}
Simulator::Simulator() {
Simulator::Simulator() {
ram_ = nullptr;
vortex_ = new VVortex_Socket();
@ -28,18 +28,23 @@ Simulator::~Simulator() {
}
void Simulator::attach_ram(RAM* ram) {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] attach_ram" << std::endl;
#endif
ram_ = ram;
dram_rsp_vec_.clear();
}
void Simulator::print_stats(std::ostream& out) {
out << std::left;
out << std::setw(24) << "# of total cycles:" << std::dec << time_stamp/2 << std::endl;
out << std::setw(24) << "# of total cycles:" << std::dec << timestamp/2 << std::endl;
}
void Simulator::dbus_driver() {
if (ram_ == nullptr)
if (ram_ == nullptr) {
vortex_->dram_req_ready = false;
return;
}
// handle DRAM response cycle
int dequeue_index = -1;
@ -70,7 +75,7 @@ void Simulator::dbus_driver() {
// handle DRAM stalls
bool dram_stalled = false;
#ifdef ENABLE_DRAM_STALLS
if (0 == ((time_stamp/2) % DRAM_STALLS_MODULO)) {
if (0 == ((timestamp/2) % DRAM_STALLS_MODULO)) {
dram_stalled = true;
} else
if (dram_rsp_vec_.size() >= DRAM_RQ_SIZE) {
@ -114,12 +119,15 @@ void Simulator::io_driver() {
&& vortex_->io_req_addr == IO_BUS_ADDR_COUT) {
uint32_t data_write = (uint32_t)vortex_->io_req_data;
char c = (char)data_write;
std::cerr << c;
std::cout << c;
}
vortex_->io_req_ready = true;
}
void Simulator::reset() {
void Simulator::reset() {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] reset()" << std::endl;
#endif
vortex_->reset = 1;
this->step();
vortex_->reset = 0;
@ -141,9 +149,9 @@ void Simulator::step() {
void Simulator::eval() {
vortex_->eval();
#ifdef VCD_OUTPUT
trace_->dump(time_stamp);
trace_->dump(timestamp);
#endif
++time_stamp;
++timestamp;
}
void Simulator::wait(uint32_t cycles) {
@ -157,6 +165,9 @@ bool Simulator::is_busy() {
}
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] flush_caches()" << std::endl;
#endif
// align address to LLC block boundaries
auto aligned_addr_start = mem_addr / GLOBAL_BLOCK_SIZE;
auto aligned_addr_end = (mem_addr + size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
@ -186,6 +197,10 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
}
bool Simulator::run() {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] run()" << std::endl;
#endif
// reset the device
this->reset();

View file

@ -52,6 +52,7 @@ private:
RAM *ram_;
VVortex_Socket *vortex_;
bool enable_;
#ifdef VCD_OUTPUT
VerilatedVcdC *trace_;
#endif