vortex runtime console out implementation

This commit is contained in:
Blaise Tine 2021-06-15 04:01:44 -04:00
parent ee06337553
commit 6213b1a910
18 changed files with 78144 additions and 78036 deletions

View file

@ -6,6 +6,8 @@
#include <unistd.h>
#include <assert.h>
#include <cmath>
#include <sstream>
#include <unordered_map>
#if defined(USE_FPGA) || defined(USE_ASE)
#include <opae/fpga.h>
@ -334,6 +336,8 @@ extern int vx_buf_release(vx_buffer_h hbuffer) {
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
if (nullptr == hdevice)
return -1;
std::unordered_map<int, std::stringstream> print_bufs;
vx_device_t *device = ((vx_device_t*)hdevice);
@ -351,14 +355,40 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
for (;;) {
uint64_t data;
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &data));
if (0 == data || 0 == timeout) {
if (data != 0) {
fprintf(stdout, "[VXDRV] ready-wait timed out: status=%ld\n", data);
uint64_t status;
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &status));
uint16_t cout_data = (status >> 8) & 0xffff;
if (cout_data & 0x0001) {
do {
char cout_char = (cout_data >> 1) & 0xff;
int cout_tid = (cout_data >> 9) & 0xff;
auto& ss_buf = print_bufs[cout_tid];
ss_buf << cout_char;
if (cout_char == '\n') {
std::cout << std::dec << "#" << cout_tid << ": " << ss_buf.str() << std::flush;
ss_buf.str("");
}
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &status));
cout_data = (status >> 8) & 0xffff;
} while (cout_data & 0x0001);
}
uint8_t state = status & 0xff;
if (0 == state || 0 == timeout) {
for (auto& buf : print_bufs) {
auto str = buf.second.str();
if (!str.empty()) {
std::cout << "#" << buf.first << ": " << str << std::endl;
}
}
if (state != 0) {
fprintf(stdout, "[VXDRV] ready-wait timed out: state=%d\n", state);
}
break;
}
nanosleep(&sleep_time, nullptr);
timeout -= sleep_time_ms;
};

View file

@ -54,11 +54,11 @@
`endif
`ifndef IO_COUT_ADDR
`define IO_COUT_ADDR 32'hFFFFFFC0
`define IO_COUT_ADDR (32'hFFFFFFFF - `MEM_BLOCK_SIZE + 1)
`endif
`ifndef IO_COUT_SIZE
`define IO_COUT_SIZE 64
`define IO_COUT_SIZE `MEM_BLOCK_SIZE
`endif
`ifndef IO_CSR_ADDR

View file

@ -45,14 +45,13 @@ localparam LMEM_BURST_CTRW = $bits(t_local_mem_burst_cnt);
localparam CCI_LINE_WIDTH = $bits(t_ccip_clData);
localparam CCI_LINE_SIZE = CCI_LINE_WIDTH / 8;
localparam CCI_ADDR_WIDTH = 32 - $clog2(CCI_LINE_WIDTH / 8);
localparam CCI_ADDR_WIDTH = 32 - $clog2(CCI_LINE_SIZE);
localparam AVS_RD_QUEUE_SIZE = 16;
localparam AVS_REQ_TAGW_VX = `MAX(`VX_MEM_TAG_WIDTH, `VX_MEM_TAG_WIDTH + $clog2(LMEM_LINE_WIDTH) - $clog2(`VX_MEM_LINE_WIDTH));
localparam AVS_REQ_TAGW_CCI = `MAX(CCI_ADDR_WIDTH, CCI_ADDR_WIDTH + $clog2(LMEM_LINE_WIDTH) - $clog2(CCI_LINE_WIDTH));
localparam AVS_REQ_TAGW = `MAX(AVS_REQ_TAGW_VX, AVS_REQ_TAGW_CCI);
localparam CCI_RD_WINDOW_SIZE = 8;
localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE;
localparam CCI_RW_PENDING_SIZE= 256;
@ -70,6 +69,10 @@ localparam MMIO_MEM_ADDR = `AFU_IMAGE_MMIO_MEM_ADDR;
localparam MMIO_DATA_SIZE = `AFU_IMAGE_MMIO_DATA_SIZE;
localparam MMIO_STATUS = `AFU_IMAGE_MMIO_STATUS;
localparam COUT_TID_WIDTH = $clog2(`IO_COUT_SIZE);
localparam COUT_QUEUE_DATAW = COUT_TID_WIDTH + 8;
localparam COUT_QUEUE_SIZE = 256;
localparam MMIO_SCOPE_READ = `AFU_IMAGE_MMIO_SCOPE_READ;
localparam MMIO_SCOPE_WRITE = `AFU_IMAGE_MMIO_SCOPE_WRITE;
@ -147,6 +150,9 @@ assign cmd_scope_read = cp2af_sRxPort.c0.mmioRdValid && (MMIO_SCOPE_READ == mmi
assign cmd_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mmio_hdr.address);
`endif
wire [COUT_QUEUE_DATAW-1:0] cout_q_dout;
wire cout_q_full, cout_q_empty;
/*
`DEBUG_BEGIN
wire cp2af_sRxPort_c0_mmioWrValid = cp2af_sRxPort.c0.mmioWrValid;
@ -189,7 +195,7 @@ always @(posedge clk) begin
mmio_tx.hdr <= 0;
end else begin
mmio_tx.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid;
mmio_tx.hdr.tid <= mmio_hdr.tid;
mmio_tx.hdr.tid <= mmio_hdr.tid;
end
// serve MMIO write request
@ -252,7 +258,7 @@ always @(posedge clk) begin
16'h0006: mmio_tx.data <= 64'h0; // next AFU
16'h0008: mmio_tx.data <= 64'h0; // reserved
MMIO_STATUS: begin
mmio_tx.data <= 64'(state);
mmio_tx.data <= 64'({cout_q_dout, !cout_q_empty, 8'(state)});
`ifdef DBG_PRINT_OPAE
if (state != STATE_WIDTH'(mmio_tx.data)) begin
$display("%t: MMIO_STATUS: addr=%0h, state=%0d", $time, mmio_hdr.address, state);
@ -462,6 +468,16 @@ t_local_mem_data vx_mem_rsp_arb_data;
wire [AVS_REQ_TAGW-1:0] vx_mem_rsp_arb_tag;
wire vx_mem_rsp_arb_ready;
wire vx_mem_is_cout;
wire vx_mem_req_valid_qual;
wire vx_mem_req_ready_qual;
assign vx_mem_req_valid_qual = vx_mem_req_valid
&& vx_mem_en
&& ~vx_mem_is_cout;
assign vx_mem_req_ready = vx_mem_is_cout ? ~cout_q_full : vx_mem_req_ready_qual;
VX_to_mem #(
.SRC_DATA_WIDTH (`VX_MEM_LINE_WIDTH),
.DST_DATA_WIDTH (LMEM_LINE_WIDTH),
@ -473,13 +489,13 @@ VX_to_mem #(
.clk (clk),
.reset (reset),
.mem_req_valid_in (vx_mem_req_valid && vx_mem_en),
.mem_req_valid_in (vx_mem_req_valid_qual),
.mem_req_addr_in (vx_mem_req_addr),
.mem_req_rw_in (vx_mem_req_rw),
.mem_req_byteen_in (vx_mem_req_byteen),
.mem_req_data_in (vx_mem_req_data),
.mem_req_tag_in (vx_mem_req_tag),
.mem_req_ready_in (vx_mem_req_ready),
.mem_req_ready_in (vx_mem_req_ready_qual),
.mem_req_valid_out (vx_mem_req_arb_valid),
.mem_req_addr_out (vx_mem_req_arb_addr),
@ -885,6 +901,50 @@ Vortex #() vortex (
.busy (vx_busy)
);
// COUT HANDLING //////////////////////////////////////////////////////////////
wire [COUT_TID_WIDTH-1:0] cout_tid;
wire [7:0] cout_char;
VX_onehot_encoder #(
.N (`VX_MEM_BYTEEN_WIDTH)
) cout_tid_enc (
.data_in (vx_mem_req_byteen),
.data_out (cout_tid),
`UNUSED_PIN (valid)
);
wire [`VX_MEM_BYTEEN_WIDTH-1:0][7:0] vx_mem_req_data_ar = vx_mem_req_data;
assign cout_char = vx_mem_req_data_ar[cout_tid];
assign vx_mem_is_cout = (vx_mem_req_addr == `VX_MEM_ADDR_WIDTH'(`IO_COUT_ADDR >> (32 - `VX_MEM_ADDR_WIDTH)));
wire cout_q_push = vx_mem_req_valid
&& vx_mem_en
&& vx_mem_is_cout
&& ~cout_q_full;
wire cout_q_pop = cp2af_sRxPort.c0.mmioRdValid
&& (mmio_hdr.address == MMIO_STATUS)
&& ~cout_q_empty;
VX_fifo_queue #(
.DATAW (COUT_QUEUE_DATAW),
.SIZE (COUT_QUEUE_SIZE)
) cout_queue (
.clk (clk),
.reset (reset),
.push (cout_q_push),
.pop (cout_q_pop),
.data_in ({cout_tid, cout_char}),
.data_out (cout_q_dout),
.empty (cout_q_empty),
.full (cout_q_full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
// SCOPE //////////////////////////////////////////////////////////////////////
`ifdef SCOPE

View file

@ -57,8 +57,8 @@ Simulator::Simulator() {
Simulator::~Simulator() {
for (auto& buf : print_bufs_) {
auto str = buf.second.str();
if (str.size()) {
std::cout << "#" << buf.first << ": " << buf.second.str() << std::endl;
if (!str.empty()) {
std::cout << "#" << buf.first << ": " << str << std::endl;
}
}
#ifdef VCD_OUTPUT
@ -171,16 +171,31 @@ void Simulator::eval_mem_bus() {
// process memory requests
if (!mem_stalled) {
if (vortex_->mem_req_valid) {
if (vortex_->mem_req_rw) {
if (vortex_->mem_req_rw) {
uint64_t byteen = vortex_->mem_req_byteen;
unsigned base_addr = (vortex_->mem_req_addr * MEM_BLOCK_SIZE);
uint8_t* data = (uint8_t*)(vortex_->mem_req_data);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[base_addr + i] = data[i];
if (base_addr >= IO_COUT_ADDR
&& base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
auto& ss_buf = print_bufs_[i];
char c = data[i];
ss_buf << c;
if (c == '\n') {
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
ss_buf.str("");
}
}
}
} else {
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[base_addr + i] = data[i];
}
}
}
} else {
} else {
mem_req_t mem_req;
mem_req.tag = vortex_->mem_req_tag;
mem_req.addr = vortex_->mem_req_addr;

View file

@ -3,15 +3,9 @@
.type vx_putchar, @function
.global vx_putchar
vx_putchar:
la t0, print_addr
lw t0, 0(t0)
csrr t1, CSR_GTID
slli t1, t1, 16
or t1, t1, a0
sw t1, 0(t0)
ret
.section .data
print_addr:
.word IO_COUT_ADDR
csrr t0, CSR_GTID
andi t0, t0, %lo(IO_COUT_SIZE-1)
li t1, IO_COUT_ADDR
add t0, t0, t1
sb a0, 0(t0)
ret

View file

@ -64,7 +64,7 @@ static const char* skip_modifier(const char* format) {
break;
case 'j':
case 'z':
case 't':
case 't':
case 'L':
++format;
break;
@ -123,10 +123,10 @@ int vx_vprintf(const char* format, va_list va) {
int vx_printf(const char * format, ...) {
va_list va;
va_start(va, format);
va_start(va, format);
int ret = vx_vprintf(format, va);
va_end(va);
return ret;
va_end(va);
return ret;
}
static const char hextoa[] = "0123456789abcdef";

View file

@ -42,6 +42,15 @@ Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
this->clear();
}
Core::~Core() {
for (auto& buf : print_bufs_) {
auto str = buf.second.str();
if (!str.empty()) {
std::cout << "#" << buf.first << ": " << str << std::endl;
}
}
}
void Core::clear() {
for (int w = 0; w < arch_.num_warps(); ++w) {
in_use_iregs_[w].reset();
@ -73,6 +82,7 @@ void Core::clear() {
inst_in_issue_.clear();
inst_in_execute_.clear();
inst_in_writeback_.clear();
print_bufs_.clear();
steps_ = 0;
insts_ = 0;
@ -340,6 +350,11 @@ void Core::dcache_write(Addr addr, Word data, Size size) {
return;
}
#endif
if (addr >= IO_COUT_ADDR
&& addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
this->writeToStdOut(addr, data);
return;
}
mem_.write(addr, &data, size, 0);
}
@ -356,4 +371,15 @@ void Core::printStats() const {
<< "Insts : " << insts_ << std::endl
<< "Loads : " << loads_ << std::endl
<< "Stores: " << stores_ << std::endl;
}
void Core::writeToStdOut(Addr addr, Word data) {
uint32_t tid = (addr - IO_COUT_ADDR) & (IO_COUT_SIZE-1);
auto& ss_buf = print_bufs_[tid];
char c = (char)data;
ss_buf << c;
if (c == '\n') {
std::cout << std::dec << "#" << tid << ": " << ss_buf.str() << std::flush;
ss_buf.str("");
}
}

View file

@ -22,6 +22,8 @@ class Core {
public:
Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id);
~Core();
void clear();
bool running() const;
@ -78,7 +80,8 @@ private:
void issue();
void execute();
void writeback();
void writeToStdOut(Addr addr, Word data);
std::vector<RegMask> in_use_iregs_;
std::vector<RegMask> in_use_fregs_;
@ -88,6 +91,7 @@ private:
std::vector<WarpMask> barriers_;
std::vector<Word> csrs_;
std::vector<Byte> fcsrs_;
std::unordered_map<int, std::stringstream> print_bufs_;
Word id_;
const ArchDef &arch_;

View file

@ -5,6 +5,7 @@ all:
$(MAKE) -C sfilter
$(MAKE) -C nearn
$(MAKE) -C guassian
$(MAKE) -C printf
run:
$(MAKE) -C vecadd run-vlsim
@ -13,6 +14,7 @@ run:
$(MAKE) -C sfilter run-vlsim
$(MAKE) -C nearn run-vlsim
$(MAKE) -C guassian run-vlsim
$(MAKE) -C printf run-vlsim
clean:
$(MAKE) -C vecadd clean
@ -21,6 +23,7 @@ clean:
$(MAKE) -C sfilter clean
$(MAKE) -C nearn clean
$(MAKE) -C guassian clean
$(MAKE) -C printf clean
clean-all:
$(MAKE) -C vecadd clean-all
@ -28,4 +31,5 @@ clean-all:
$(MAKE) -C saxpy clean-all
$(MAKE) -C sfilter clean-all
$(MAKE) -C nearn clean-all
$(MAKE) -C guassian clean-all
$(MAKE) -C guassian clean-all
$(MAKE) -C printf clean-all

File diff suppressed because it is too large Load diff

Binary file not shown.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

Binary file not shown.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

Binary file not shown.

File diff suppressed because it is too large Load diff