AXI memory bus support

This commit is contained in:
Blaise Tine 2021-09-10 01:36:01 -07:00
parent ca46b0a0be
commit 18172fa611
6 changed files with 425 additions and 10 deletions

View file

@ -72,6 +72,9 @@ FPU_CORE=FPU_DEFAULT ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# using FPNEW FPU core
FPU_CORE=FPU_FPNEW ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# using AXI bus
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
# adjust l1 block size to match l2
CONFIGS="-DMEM_BLOCK_SIZE=16 -DL1_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr --args="-n1"

View file

@ -28,7 +28,12 @@ CFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared -pthread
#LDFLAGS += -dynamiclib -pthread
TOP = Vortex
ifdef AXI_BUS
TOP = Vortex_axi
CFLAGS += -DAXI_BUS
else
TOP = Vortex
endif
RTL_DIR = ../../hw/rtl
DPI_DIR = ../../hw/dpi

124
hw/rtl/Vortex_axi.v Normal file
View file

@ -0,0 +1,124 @@
`include "VX_define.vh"
module Vortex_axi #(
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
parameter AXI_ADDR_WIDTH = 32,
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH,
localparam AXI_STROBE_WIDTH = (AXI_DATA_WIDTH / 8)
)(
// Clock
input wire clk,
input wire reset,
// AXI write request
output wire m_axi_wvalid,
output wire m_axi_awvalid,
output wire [AXI_TID_WIDTH-1:0] m_axi_awid,
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr,
output wire [7:0] m_axi_awlen,
output wire [2:0] m_axi_awsize,
output wire [1:0] m_axi_awburst,
output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata,
output wire [AXI_STROBE_WIDTH-1:0] m_axi_wstrb,
input wire m_axi_wready,
input wire m_axi_awready,
// AXI read request
output wire m_axi_arvalid,
output wire [AXI_TID_WIDTH-1:0] m_axi_arid,
output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr,
output wire [7:0] m_axi_arlen,
output wire [2:0] m_axi_arsize,
output wire [1:0] m_axi_arburst,
input wire m_axi_arready,
// AXI read response
input wire m_axi_rvalid,
input wire [AXI_TID_WIDTH-1:0] m_axi_rid,
input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata,
output wire m_axi_rready,
// Status
output wire busy
);
wire mem_req_valid;
wire mem_req_rw;
wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen;
wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr;
wire [`VX_MEM_DATA_WIDTH-1:0] mem_req_data;
wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag;
wire mem_req_ready;
wire mem_rsp_valid;
wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data;
wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag;
wire mem_rsp_ready;
VX_axi_adapter #(
.VX_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.VX_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
.VX_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
.AXI_DATA_WIDTH (AXI_DATA_WIDTH),
.AXI_ADDR_WIDTH (AXI_ADDR_WIDTH),
.AXI_TID_WIDTH (AXI_TID_WIDTH)
) axi_adapter (
.mem_req_valid (mem_req_valid),
.mem_req_rw (mem_req_rw),
.mem_req_byteen (mem_req_byteen),
.mem_req_addr (mem_req_addr),
.mem_req_data (mem_req_data),
.mem_req_tag (mem_req_tag),
.mem_req_ready (mem_req_ready),
.mem_rsp_valid (mem_rsp_valid),
.mem_rsp_data (mem_rsp_data),
.mem_rsp_tag (mem_rsp_tag),
.mem_rsp_ready (mem_rsp_ready),
.m_axi_wvalid (m_axi_wvalid),
.m_axi_awvalid (m_axi_awvalid),
.m_axi_awid (m_axi_awid),
.m_axi_awaddr (m_axi_awaddr),
.m_axi_awlen (m_axi_awlen),
.m_axi_awsize (m_axi_awsize),
.m_axi_awburst (m_axi_awburst),
.m_axi_wdata (m_axi_wdata),
.m_axi_wstrb (m_axi_wstrb),
.m_axi_wready (m_axi_wready),
.m_axi_awready (m_axi_awready),
.m_axi_arvalid (m_axi_arvalid),
.m_axi_arid (m_axi_arid),
.m_axi_araddr (m_axi_araddr),
.m_axi_arlen (m_axi_arlen),
.m_axi_arsize (m_axi_arsize),
.m_axi_arburst (m_axi_arburst),
.m_axi_arready (m_axi_arready),
.m_axi_rvalid (m_axi_rvalid),
.m_axi_rid (m_axi_rid),
.m_axi_rdata (m_axi_rdata),
.m_axi_rready (m_axi_rready)
);
Vortex vortex (
.clk (clk),
.reset (reset),
.mem_req_valid (mem_req_valid),
.mem_req_rw (mem_req_rw),
.mem_req_byteen (mem_req_byteen),
.mem_req_addr (mem_req_addr),
.mem_req_data (mem_req_data),
.mem_req_tag (mem_req_tag),
.mem_req_ready (mem_req_ready),
.mem_rsp_valid (mem_rsp_valid),
.mem_rsp_data (mem_rsp_data),
.mem_rsp_tag (mem_rsp_tag),
.mem_rsp_ready (mem_rsp_ready),
.busy (busy)
);
endmodule

View file

@ -0,0 +1,88 @@
`include "VX_define.vh"
module VX_axi_adapter #(
parameter VX_DATA_WIDTH = 512,
parameter VX_ADDR_WIDTH = (32 - $clog2(VX_DATA_WIDTH/8)),
parameter VX_TAG_WIDTH = 8,
parameter AXI_DATA_WIDTH = VX_DATA_WIDTH,
parameter AXI_ADDR_WIDTH = 32,
parameter AXI_TID_WIDTH = VX_TAG_WIDTH,
localparam VX_BYTEEN_WIDTH = (VX_DATA_WIDTH / 8),
localparam AXI_STROBE_WIDTH = (AXI_DATA_WIDTH / 8)
) (
// Vortex request
input wire mem_req_valid,
input wire mem_req_rw,
input wire [VX_BYTEEN_WIDTH-1:0] mem_req_byteen,
input wire [VX_ADDR_WIDTH-1:0] mem_req_addr,
input wire [VX_DATA_WIDTH-1:0] mem_req_data,
input wire [VX_TAG_WIDTH-1:0] mem_req_tag,
// Vortex response
input wire mem_rsp_ready,
output wire mem_rsp_valid,
output wire [VX_DATA_WIDTH-1:0] mem_rsp_data,
output wire [VX_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_req_ready,
// AXI write request
output wire m_axi_wvalid,
output wire m_axi_awvalid,
output wire [AXI_TID_WIDTH-1:0] m_axi_awid,
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr,
output wire [7:0] m_axi_awlen,
output wire [2:0] m_axi_awsize,
output wire [1:0] m_axi_awburst,
output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata,
output wire [AXI_STROBE_WIDTH-1:0] m_axi_wstrb,
input wire m_axi_wready,
input wire m_axi_awready,
// AXI read request
output wire m_axi_arvalid,
output wire [AXI_TID_WIDTH-1:0] m_axi_arid,
output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr,
output wire [7:0] m_axi_arlen,
output wire [2:0] m_axi_arsize,
output wire [1:0] m_axi_arburst,
input wire m_axi_arready,
// AXI read response
input wire m_axi_rvalid,
input wire [AXI_TID_WIDTH-1:0] m_axi_rid,
input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata,
output wire m_axi_rready
);
localparam AXSIZE = $clog2(VX_DATA_WIDTH/8);
`STATIC_ASSERT((AXI_DATA_WIDTH == VX_DATA_WIDTH), ("invalid parameter"))
`STATIC_ASSERT((AXI_TID_WIDTH == VX_TAG_WIDTH), ("invalid parameter"))
// AXI write channel
assign m_axi_wvalid = mem_req_valid & mem_req_rw;
assign m_axi_awvalid = mem_req_valid & mem_req_rw;
assign m_axi_awid = mem_req_tag;
assign m_axi_awaddr = AXI_ADDR_WIDTH'(mem_req_addr) << AXSIZE;
assign m_axi_awlen = 8'b00000000;
assign m_axi_awsize = 3'(AXSIZE);
assign m_axi_awburst = 2'b00;
assign m_axi_wdata = mem_req_data;
assign m_axi_wstrb = mem_req_byteen;
// AXI read channel
assign m_axi_arvalid = mem_req_valid & ~mem_req_rw;
assign m_axi_arid = mem_req_tag;
assign m_axi_araddr = AXI_ADDR_WIDTH'(mem_req_addr) << AXSIZE;
assign m_axi_arlen = 8'b00000000;
assign m_axi_arsize = 3'(AXSIZE);
assign m_axi_arburst = 2'b00;
assign m_axi_rready = mem_rsp_ready;
// Vortex inputs
assign mem_rsp_valid = m_axi_rvalid;
assign mem_rsp_tag = m_axi_rid;
assign mem_rsp_data = m_axi_rdata;
assign mem_req_ready = mem_req_rw ? (m_axi_awready && m_axi_wready) : m_axi_arready;
endmodule

View file

@ -66,7 +66,12 @@ Simulator::Simulator() {
Verilated::assertOn(false);
ram_ = nullptr;
#ifdef AXI_BUS
vortex_ = new VVortex_axi();
#else
vortex_ = new VVortex();
#endif
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
@ -103,15 +108,18 @@ void Simulator::attach_ram(RAM* ram) {
void Simulator::reset() {
print_bufs_.clear();
for (int b = 0; b < MEMORY_BANKS; ++b) {
mem_rsp_vec_[b].clear();
}
last_mem_rsp_bank_ = 0;
mem_rsp_active_ = false;
vortex_->mem_rsp_valid = 0;
vortex_->mem_req_ready = 0;
#ifdef AXI_BUS
this->reset_axi_bus();
#else
this->reset_mem_bus();
#endif
vortex_->reset = 1;
@ -133,12 +141,20 @@ void Simulator::step() {
vortex_->clk = 0;
this->eval();
mem_rsp_ready_ = vortex_->mem_rsp_ready;
#ifdef AXI_BUS
this->eval_axi_bus(0);
#else
this->eval_mem_bus(0);
#endif
vortex_->clk = 1;
this->eval();
this->eval_mem_bus();
#ifdef AXI_BUS
this->eval_axi_bus(1);
#else
this->eval_mem_bus(1);
#endif
#ifndef NDEBUG
fflush(stdout);
@ -155,7 +171,158 @@ void Simulator::eval() {
++timestamp;
}
void Simulator::eval_mem_bus() {
#ifdef AXI_BUS
void Simulator::reset_axi_bus() {
vortex_->m_axi_wready = 0;
vortex_->m_axi_awready = 0;
vortex_->m_axi_arready = 0;
vortex_->m_axi_rvalid = 0;
}
void Simulator::eval_axi_bus(bool clk) {
if (!clk) {
mem_rsp_ready_ = vortex_->m_axi_rready;
return;
}
if (ram_ == nullptr) {
vortex_->m_axi_wready = 0;
vortex_->m_axi_awready = 0;
vortex_->m_axi_arready = 0;
return;
}
// update memory responses schedule
for (int b = 0; b < MEMORY_BANKS; ++b) {
for (auto& rsp : mem_rsp_vec_[b]) {
if (rsp.cycles_left > 0)
rsp.cycles_left -= 1;
}
}
bool has_response = false;
// schedule memory responses that are ready
for (int i = 0; i < MEMORY_BANKS; ++i) {
uint32_t b = (i + last_mem_rsp_bank_ + 1) % MEMORY_BANKS;
if (!mem_rsp_vec_[b].empty()
&& (mem_rsp_vec_[b].begin()->cycles_left) <= 0) {
has_response = true;
last_mem_rsp_bank_ = b;
break;
}
}
// send memory response
if (mem_rsp_active_
&& vortex_->m_axi_rvalid && mem_rsp_ready_) {
mem_rsp_active_ = false;
}
if (!mem_rsp_active_) {
if (has_response) {
vortex_->m_axi_rvalid = 1;
std::list<mem_req_t>::iterator mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
/*
printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%02x", mem_rsp_it->block[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");
*/
memcpy((uint8_t*)vortex_->m_axi_rdata, mem_rsp_it->block.data(), MEM_BLOCK_SIZE);
vortex_->m_axi_rid = mem_rsp_it->tag;
mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it);
mem_rsp_active_ = true;
} else {
vortex_->m_axi_rvalid = 0;
}
}
// select the memory bank
uint32_t req_addr = vortex_->m_axi_wvalid ? vortex_->m_axi_awaddr : vortex_->m_axi_araddr;
uint32_t req_bank = (MEMORY_BANKS >= 2) ? ((req_addr / MEM_BLOCK_SIZE) % MEMORY_BANKS) : 0;
// handle memory stalls
bool mem_stalled = false;
#ifdef ENABLE_MEM_STALLS
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
mem_stalled = true;
} else
if (mem_rsp_vec_[req_bank].size() >= MEM_RQ_SIZE) {
mem_stalled = true;
}
#endif
// process memory requests
if (!mem_stalled) {
if (vortex_->m_axi_wvalid || vortex_->m_axi_arvalid) {
if (vortex_->m_axi_wvalid) {
uint64_t byteen = vortex_->m_axi_wstrb;
unsigned base_addr = vortex_->m_axi_awaddr;
uint8_t* data = (uint8_t*)(vortex_->m_axi_wdata);
if (base_addr >= IO_COUT_ADDR
&& base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
auto& ss_buf = print_bufs_[i];
char c = data[i];
ss_buf << c;
if (c == '\n') {
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
ss_buf.str("");
}
}
}
} else {
/*
printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");
*/
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[base_addr + i] = data[i];
}
}
}
} else {
mem_req_t mem_req;
mem_req.tag = vortex_->m_axi_arid;
mem_req.addr = vortex_->m_axi_araddr;
ram_->read(vortex_->m_axi_araddr, MEM_BLOCK_SIZE, mem_req.block.data());
mem_req.cycles_left = MEM_LATENCY;
for (auto& rsp : mem_rsp_vec_[req_bank]) {
if (mem_req.addr == rsp.addr) {
// duplicate requests receive the same cycle delay
mem_req.cycles_left = rsp.cycles_left;
break;
}
}
mem_rsp_vec_[req_bank].emplace_back(mem_req);
}
}
}
vortex_->m_axi_wready = !mem_stalled;
vortex_->m_axi_awready = !mem_stalled;
vortex_->m_axi_arready = !mem_stalled;
}
#else
void Simulator::reset_mem_bus() {
vortex_->mem_req_ready = 0;
vortex_->mem_rsp_valid = 0;
}
void Simulator::eval_mem_bus(bool clk) {
if (!clk) {
mem_rsp_ready_ = vortex_->mem_rsp_ready;
return;
}
if (ram_ == nullptr) {
vortex_->mem_req_ready = 0;
return;
@ -276,6 +443,8 @@ void Simulator::eval_mem_bus() {
vortex_->mem_req_ready = !mem_stalled;
}
#endif
void Simulator::wait(uint32_t cycles) {
for (int i = 0; i < cycles; ++i) {
this->step();
@ -309,11 +478,19 @@ int Simulator::run() {
}
bool Simulator::get_ebreak() const {
#ifdef AXI_BUS
return (int)vortex_->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak;
#else
return (int)vortex_->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak;
#endif
}
int Simulator::get_last_wb_value(int reg) const {
#ifdef AXI_BUS
return (int)vortex_->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
#else
return (int)vortex_->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
#endif
}
void Simulator::load_bin(const char* program_file) {

View file

@ -1,8 +1,14 @@
#pragma once
#include <verilated.h>
#ifdef AXI_BUS
#include "VVortex_axi.h"
#include "VVortex_axi__Syms.h"
#else
#include "VVortex.h"
#include "VVortex__Syms.h"
#endif
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
@ -58,8 +64,14 @@ private:
std::unordered_map<int, std::stringstream> print_bufs_;
void eval();
void eval_mem_bus();
#ifdef AXI_BUS
void reset_axi_bus();
void eval_axi_bus(bool clk);
#else
void reset_mem_bus();
void eval_mem_bus(bool clk);
#endif
int get_last_wb_value(int reg) const;
@ -73,7 +85,13 @@ private:
bool mem_rsp_ready_;
RAM *ram_;
#ifdef AXI_BUS
VVortex_axi *vortex_;
#else
VVortex *vortex_;
#endif
#ifdef VCD_OUTPUT
VerilatedVcdC *trace_;
#endif