mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
vortex runtime console out implementation
This commit is contained in:
parent
ee06337553
commit
6213b1a910
18 changed files with 78144 additions and 78036 deletions
|
@ -6,6 +6,8 @@
|
|||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <cmath>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
|
||||
#if defined(USE_FPGA) || defined(USE_ASE)
|
||||
#include <opae/fpga.h>
|
||||
|
@ -334,6 +336,8 @@ extern int vx_buf_release(vx_buffer_h hbuffer) {
|
|||
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
std::unordered_map<int, std::stringstream> print_bufs;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
|
@ -351,14 +355,40 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
|||
long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
|
||||
|
||||
for (;;) {
|
||||
uint64_t data;
|
||||
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &data));
|
||||
if (0 == data || 0 == timeout) {
|
||||
if (data != 0) {
|
||||
fprintf(stdout, "[VXDRV] ready-wait timed out: status=%ld\n", data);
|
||||
uint64_t status;
|
||||
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &status));
|
||||
|
||||
uint16_t cout_data = (status >> 8) & 0xffff;
|
||||
if (cout_data & 0x0001) {
|
||||
do {
|
||||
char cout_char = (cout_data >> 1) & 0xff;
|
||||
int cout_tid = (cout_data >> 9) & 0xff;
|
||||
auto& ss_buf = print_bufs[cout_tid];
|
||||
ss_buf << cout_char;
|
||||
if (cout_char == '\n') {
|
||||
std::cout << std::dec << "#" << cout_tid << ": " << ss_buf.str() << std::flush;
|
||||
ss_buf.str("");
|
||||
}
|
||||
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &status));
|
||||
cout_data = (status >> 8) & 0xffff;
|
||||
} while (cout_data & 0x0001);
|
||||
}
|
||||
|
||||
uint8_t state = status & 0xff;
|
||||
|
||||
if (0 == state || 0 == timeout) {
|
||||
for (auto& buf : print_bufs) {
|
||||
auto str = buf.second.str();
|
||||
if (!str.empty()) {
|
||||
std::cout << "#" << buf.first << ": " << str << std::endl;
|
||||
}
|
||||
}
|
||||
if (state != 0) {
|
||||
fprintf(stdout, "[VXDRV] ready-wait timed out: state=%d\n", state);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
nanosleep(&sleep_time, nullptr);
|
||||
timeout -= sleep_time_ms;
|
||||
};
|
||||
|
|
|
@ -54,11 +54,11 @@
|
|||
`endif
|
||||
|
||||
`ifndef IO_COUT_ADDR
|
||||
`define IO_COUT_ADDR 32'hFFFFFFC0
|
||||
`define IO_COUT_ADDR (32'hFFFFFFFF - `MEM_BLOCK_SIZE + 1)
|
||||
`endif
|
||||
|
||||
`ifndef IO_COUT_SIZE
|
||||
`define IO_COUT_SIZE 64
|
||||
`define IO_COUT_SIZE `MEM_BLOCK_SIZE
|
||||
`endif
|
||||
|
||||
`ifndef IO_CSR_ADDR
|
||||
|
|
|
@ -45,14 +45,13 @@ localparam LMEM_BURST_CTRW = $bits(t_local_mem_burst_cnt);
|
|||
|
||||
localparam CCI_LINE_WIDTH = $bits(t_ccip_clData);
|
||||
localparam CCI_LINE_SIZE = CCI_LINE_WIDTH / 8;
|
||||
localparam CCI_ADDR_WIDTH = 32 - $clog2(CCI_LINE_WIDTH / 8);
|
||||
localparam CCI_ADDR_WIDTH = 32 - $clog2(CCI_LINE_SIZE);
|
||||
|
||||
localparam AVS_RD_QUEUE_SIZE = 16;
|
||||
localparam AVS_REQ_TAGW_VX = `MAX(`VX_MEM_TAG_WIDTH, `VX_MEM_TAG_WIDTH + $clog2(LMEM_LINE_WIDTH) - $clog2(`VX_MEM_LINE_WIDTH));
|
||||
localparam AVS_REQ_TAGW_CCI = `MAX(CCI_ADDR_WIDTH, CCI_ADDR_WIDTH + $clog2(LMEM_LINE_WIDTH) - $clog2(CCI_LINE_WIDTH));
|
||||
localparam AVS_REQ_TAGW = `MAX(AVS_REQ_TAGW_VX, AVS_REQ_TAGW_CCI);
|
||||
|
||||
|
||||
localparam CCI_RD_WINDOW_SIZE = 8;
|
||||
localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE;
|
||||
localparam CCI_RW_PENDING_SIZE= 256;
|
||||
|
@ -70,6 +69,10 @@ localparam MMIO_MEM_ADDR = `AFU_IMAGE_MMIO_MEM_ADDR;
|
|||
localparam MMIO_DATA_SIZE = `AFU_IMAGE_MMIO_DATA_SIZE;
|
||||
localparam MMIO_STATUS = `AFU_IMAGE_MMIO_STATUS;
|
||||
|
||||
localparam COUT_TID_WIDTH = $clog2(`IO_COUT_SIZE);
|
||||
localparam COUT_QUEUE_DATAW = COUT_TID_WIDTH + 8;
|
||||
localparam COUT_QUEUE_SIZE = 256;
|
||||
|
||||
localparam MMIO_SCOPE_READ = `AFU_IMAGE_MMIO_SCOPE_READ;
|
||||
localparam MMIO_SCOPE_WRITE = `AFU_IMAGE_MMIO_SCOPE_WRITE;
|
||||
|
||||
|
@ -147,6 +150,9 @@ assign cmd_scope_read = cp2af_sRxPort.c0.mmioRdValid && (MMIO_SCOPE_READ == mmi
|
|||
assign cmd_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mmio_hdr.address);
|
||||
`endif
|
||||
|
||||
wire [COUT_QUEUE_DATAW-1:0] cout_q_dout;
|
||||
wire cout_q_full, cout_q_empty;
|
||||
|
||||
/*
|
||||
`DEBUG_BEGIN
|
||||
wire cp2af_sRxPort_c0_mmioWrValid = cp2af_sRxPort.c0.mmioWrValid;
|
||||
|
@ -189,7 +195,7 @@ always @(posedge clk) begin
|
|||
mmio_tx.hdr <= 0;
|
||||
end else begin
|
||||
mmio_tx.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid;
|
||||
mmio_tx.hdr.tid <= mmio_hdr.tid;
|
||||
mmio_tx.hdr.tid <= mmio_hdr.tid;
|
||||
end
|
||||
|
||||
// serve MMIO write request
|
||||
|
@ -252,7 +258,7 @@ always @(posedge clk) begin
|
|||
16'h0006: mmio_tx.data <= 64'h0; // next AFU
|
||||
16'h0008: mmio_tx.data <= 64'h0; // reserved
|
||||
MMIO_STATUS: begin
|
||||
mmio_tx.data <= 64'(state);
|
||||
mmio_tx.data <= 64'({cout_q_dout, !cout_q_empty, 8'(state)});
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
if (state != STATE_WIDTH'(mmio_tx.data)) begin
|
||||
$display("%t: MMIO_STATUS: addr=%0h, state=%0d", $time, mmio_hdr.address, state);
|
||||
|
@ -462,6 +468,16 @@ t_local_mem_data vx_mem_rsp_arb_data;
|
|||
wire [AVS_REQ_TAGW-1:0] vx_mem_rsp_arb_tag;
|
||||
wire vx_mem_rsp_arb_ready;
|
||||
|
||||
wire vx_mem_is_cout;
|
||||
wire vx_mem_req_valid_qual;
|
||||
wire vx_mem_req_ready_qual;
|
||||
|
||||
assign vx_mem_req_valid_qual = vx_mem_req_valid
|
||||
&& vx_mem_en
|
||||
&& ~vx_mem_is_cout;
|
||||
|
||||
assign vx_mem_req_ready = vx_mem_is_cout ? ~cout_q_full : vx_mem_req_ready_qual;
|
||||
|
||||
VX_to_mem #(
|
||||
.SRC_DATA_WIDTH (`VX_MEM_LINE_WIDTH),
|
||||
.DST_DATA_WIDTH (LMEM_LINE_WIDTH),
|
||||
|
@ -473,13 +489,13 @@ VX_to_mem #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.mem_req_valid_in (vx_mem_req_valid && vx_mem_en),
|
||||
.mem_req_valid_in (vx_mem_req_valid_qual),
|
||||
.mem_req_addr_in (vx_mem_req_addr),
|
||||
.mem_req_rw_in (vx_mem_req_rw),
|
||||
.mem_req_byteen_in (vx_mem_req_byteen),
|
||||
.mem_req_data_in (vx_mem_req_data),
|
||||
.mem_req_tag_in (vx_mem_req_tag),
|
||||
.mem_req_ready_in (vx_mem_req_ready),
|
||||
.mem_req_ready_in (vx_mem_req_ready_qual),
|
||||
|
||||
.mem_req_valid_out (vx_mem_req_arb_valid),
|
||||
.mem_req_addr_out (vx_mem_req_arb_addr),
|
||||
|
@ -885,6 +901,50 @@ Vortex #() vortex (
|
|||
.busy (vx_busy)
|
||||
);
|
||||
|
||||
// COUT HANDLING //////////////////////////////////////////////////////////////
|
||||
|
||||
wire [COUT_TID_WIDTH-1:0] cout_tid;
|
||||
wire [7:0] cout_char;
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (`VX_MEM_BYTEEN_WIDTH)
|
||||
) cout_tid_enc (
|
||||
.data_in (vx_mem_req_byteen),
|
||||
.data_out (cout_tid),
|
||||
`UNUSED_PIN (valid)
|
||||
);
|
||||
|
||||
wire [`VX_MEM_BYTEEN_WIDTH-1:0][7:0] vx_mem_req_data_ar = vx_mem_req_data;
|
||||
assign cout_char = vx_mem_req_data_ar[cout_tid];
|
||||
|
||||
assign vx_mem_is_cout = (vx_mem_req_addr == `VX_MEM_ADDR_WIDTH'(`IO_COUT_ADDR >> (32 - `VX_MEM_ADDR_WIDTH)));
|
||||
|
||||
wire cout_q_push = vx_mem_req_valid
|
||||
&& vx_mem_en
|
||||
&& vx_mem_is_cout
|
||||
&& ~cout_q_full;
|
||||
|
||||
wire cout_q_pop = cp2af_sRxPort.c0.mmioRdValid
|
||||
&& (mmio_hdr.address == MMIO_STATUS)
|
||||
&& ~cout_q_empty;
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (COUT_QUEUE_DATAW),
|
||||
.SIZE (COUT_QUEUE_SIZE)
|
||||
) cout_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (cout_q_push),
|
||||
.pop (cout_q_pop),
|
||||
.data_in ({cout_tid, cout_char}),
|
||||
.data_out (cout_q_dout),
|
||||
.empty (cout_q_empty),
|
||||
.full (cout_q_full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
// SCOPE //////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef SCOPE
|
||||
|
|
|
@ -57,8 +57,8 @@ Simulator::Simulator() {
|
|||
Simulator::~Simulator() {
|
||||
for (auto& buf : print_bufs_) {
|
||||
auto str = buf.second.str();
|
||||
if (str.size()) {
|
||||
std::cout << "#" << buf.first << ": " << buf.second.str() << std::endl;
|
||||
if (!str.empty()) {
|
||||
std::cout << "#" << buf.first << ": " << str << std::endl;
|
||||
}
|
||||
}
|
||||
#ifdef VCD_OUTPUT
|
||||
|
@ -171,16 +171,31 @@ void Simulator::eval_mem_bus() {
|
|||
// process memory requests
|
||||
if (!mem_stalled) {
|
||||
if (vortex_->mem_req_valid) {
|
||||
if (vortex_->mem_req_rw) {
|
||||
if (vortex_->mem_req_rw) {
|
||||
uint64_t byteen = vortex_->mem_req_byteen;
|
||||
unsigned base_addr = (vortex_->mem_req_addr * MEM_BLOCK_SIZE);
|
||||
uint8_t* data = (uint8_t*)(vortex_->mem_req_data);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[base_addr + i] = data[i];
|
||||
if (base_addr >= IO_COUT_ADDR
|
||||
&& base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
auto& ss_buf = print_bufs_[i];
|
||||
char c = data[i];
|
||||
ss_buf << c;
|
||||
if (c == '\n') {
|
||||
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
|
||||
ss_buf.str("");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[base_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
} else {
|
||||
mem_req_t mem_req;
|
||||
mem_req.tag = vortex_->mem_req_tag;
|
||||
mem_req.addr = vortex_->mem_req_addr;
|
||||
|
|
|
@ -3,15 +3,9 @@
|
|||
.type vx_putchar, @function
|
||||
.global vx_putchar
|
||||
vx_putchar:
|
||||
la t0, print_addr
|
||||
lw t0, 0(t0)
|
||||
csrr t1, CSR_GTID
|
||||
slli t1, t1, 16
|
||||
or t1, t1, a0
|
||||
sw t1, 0(t0)
|
||||
ret
|
||||
|
||||
.section .data
|
||||
print_addr:
|
||||
.word IO_COUT_ADDR
|
||||
|
||||
csrr t0, CSR_GTID
|
||||
andi t0, t0, %lo(IO_COUT_SIZE-1)
|
||||
li t1, IO_COUT_ADDR
|
||||
add t0, t0, t1
|
||||
sb a0, 0(t0)
|
||||
ret
|
|
@ -64,7 +64,7 @@ static const char* skip_modifier(const char* format) {
|
|||
break;
|
||||
case 'j':
|
||||
case 'z':
|
||||
case 't':
|
||||
case 't':
|
||||
case 'L':
|
||||
++format;
|
||||
break;
|
||||
|
@ -123,10 +123,10 @@ int vx_vprintf(const char* format, va_list va) {
|
|||
|
||||
int vx_printf(const char * format, ...) {
|
||||
va_list va;
|
||||
va_start(va, format);
|
||||
va_start(va, format);
|
||||
int ret = vx_vprintf(format, va);
|
||||
va_end(va);
|
||||
return ret;
|
||||
va_end(va);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char hextoa[] = "0123456789abcdef";
|
||||
|
|
|
@ -42,6 +42,15 @@ Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
|
|||
this->clear();
|
||||
}
|
||||
|
||||
Core::~Core() {
|
||||
for (auto& buf : print_bufs_) {
|
||||
auto str = buf.second.str();
|
||||
if (!str.empty()) {
|
||||
std::cout << "#" << buf.first << ": " << str << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Core::clear() {
|
||||
for (int w = 0; w < arch_.num_warps(); ++w) {
|
||||
in_use_iregs_[w].reset();
|
||||
|
@ -73,6 +82,7 @@ void Core::clear() {
|
|||
inst_in_issue_.clear();
|
||||
inst_in_execute_.clear();
|
||||
inst_in_writeback_.clear();
|
||||
print_bufs_.clear();
|
||||
|
||||
steps_ = 0;
|
||||
insts_ = 0;
|
||||
|
@ -340,6 +350,11 @@ void Core::dcache_write(Addr addr, Word data, Size size) {
|
|||
return;
|
||||
}
|
||||
#endif
|
||||
if (addr >= IO_COUT_ADDR
|
||||
&& addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
|
||||
this->writeToStdOut(addr, data);
|
||||
return;
|
||||
}
|
||||
mem_.write(addr, &data, size, 0);
|
||||
}
|
||||
|
||||
|
@ -356,4 +371,15 @@ void Core::printStats() const {
|
|||
<< "Insts : " << insts_ << std::endl
|
||||
<< "Loads : " << loads_ << std::endl
|
||||
<< "Stores: " << stores_ << std::endl;
|
||||
}
|
||||
|
||||
void Core::writeToStdOut(Addr addr, Word data) {
|
||||
uint32_t tid = (addr - IO_COUT_ADDR) & (IO_COUT_SIZE-1);
|
||||
auto& ss_buf = print_bufs_[tid];
|
||||
char c = (char)data;
|
||||
ss_buf << c;
|
||||
if (c == '\n') {
|
||||
std::cout << std::dec << "#" << tid << ": " << ss_buf.str() << std::flush;
|
||||
ss_buf.str("");
|
||||
}
|
||||
}
|
|
@ -22,6 +22,8 @@ class Core {
|
|||
public:
|
||||
Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id);
|
||||
|
||||
~Core();
|
||||
|
||||
void clear();
|
||||
|
||||
bool running() const;
|
||||
|
@ -78,7 +80,8 @@ private:
|
|||
void issue();
|
||||
void execute();
|
||||
void writeback();
|
||||
|
||||
|
||||
void writeToStdOut(Addr addr, Word data);
|
||||
|
||||
std::vector<RegMask> in_use_iregs_;
|
||||
std::vector<RegMask> in_use_fregs_;
|
||||
|
@ -88,6 +91,7 @@ private:
|
|||
std::vector<WarpMask> barriers_;
|
||||
std::vector<Word> csrs_;
|
||||
std::vector<Byte> fcsrs_;
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
|
||||
Word id_;
|
||||
const ArchDef &arch_;
|
||||
|
|
|
@ -5,6 +5,7 @@ all:
|
|||
$(MAKE) -C sfilter
|
||||
$(MAKE) -C nearn
|
||||
$(MAKE) -C guassian
|
||||
$(MAKE) -C printf
|
||||
|
||||
run:
|
||||
$(MAKE) -C vecadd run-vlsim
|
||||
|
@ -13,6 +14,7 @@ run:
|
|||
$(MAKE) -C sfilter run-vlsim
|
||||
$(MAKE) -C nearn run-vlsim
|
||||
$(MAKE) -C guassian run-vlsim
|
||||
$(MAKE) -C printf run-vlsim
|
||||
|
||||
clean:
|
||||
$(MAKE) -C vecadd clean
|
||||
|
@ -21,6 +23,7 @@ clean:
|
|||
$(MAKE) -C sfilter clean
|
||||
$(MAKE) -C nearn clean
|
||||
$(MAKE) -C guassian clean
|
||||
$(MAKE) -C printf clean
|
||||
|
||||
clean-all:
|
||||
$(MAKE) -C vecadd clean-all
|
||||
|
@ -28,4 +31,5 @@ clean-all:
|
|||
$(MAKE) -C saxpy clean-all
|
||||
$(MAKE) -C sfilter clean-all
|
||||
$(MAKE) -C nearn clean-all
|
||||
$(MAKE) -C guassian clean-all
|
||||
$(MAKE) -C guassian clean-all
|
||||
$(MAKE) -C printf clean-all
|
File diff suppressed because it is too large
Load diff
Binary file not shown.
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
Binary file not shown.
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
Binary file not shown.
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue