adding support for multi-banks memory bus

This commit is contained in:
Blaise Tine 2021-05-04 07:32:03 -07:00
parent bdbf99c5b0
commit bde6a69ea0
11 changed files with 276 additions and 477 deletions

View file

@ -137,16 +137,19 @@ void opae_sim::flush() {
void opae_sim::reset() {
host_buffers_.clear();
mem_reads_.clear();
host_buffers_.clear();
cci_reads_.clear();
cci_writes_.clear();
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = 0;
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = 0;
vortex_afu_->avs_readdatavalid = 0;
vortex_afu_->avs_waitrequest = 0;
for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) {
mem_reads_[b].clear();
vortex_afu_->avs_readdatavalid[b] = 0;
vortex_afu_->avs_waitrequest[b] = 0;
}
vortex_afu_->reset = 1;
@ -268,79 +271,29 @@ void opae_sim::sTxPort_bus() {
}
void opae_sim::avs_bus() {
// update memory responses schedule
for (auto& rsp : mem_reads_) {
if (rsp.cycles_left > 0)
rsp.cycles_left -= 1;
}
// schedule memory responses in FIFO order
std::list<mem_rd_req_t>::iterator mem_rd_it(mem_reads_.end());
if (!mem_reads_.empty()
&& (0 == mem_reads_.begin()->cycles_left)) {
mem_rd_it = mem_reads_.begin();
}
// send memory response
vortex_afu_->avs_readdatavalid = 0;
if (mem_rd_it != mem_reads_.end()) {
vortex_afu_->avs_readdatavalid = 1;
memcpy(vortex_afu_->avs_readdata, mem_rd_it->data.data(), MEM_BLOCK_SIZE);
uint32_t addr = mem_rd_it->addr;
mem_reads_.erase(mem_rd_it);
/*printf("%0ld: [sim] MEM Rd Rsp: addr=%x, pending={", timestamp, addr * MEM_BLOCK_SIZE);
for (auto& req : mem_reads_) {
if (req.cycles_left != 0)
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
else
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) {
// update memory responses schedule
for (auto& rsp : mem_reads_[b]) {
if (rsp.cycles_left > 0)
rsp.cycles_left -= 1;
}
printf("}\n");*/
}
// handle memory stalls
bool mem_stalled = false;
#ifdef ENABLE_MEM_STALLS
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
mem_stalled = true;
} else
if (mem_reads_.size() >= MEM_RQ_SIZE) {
mem_stalled = true;
}
#endif
// process memory requests
if (!mem_stalled) {
assert(!vortex_afu_->avs_read || !vortex_afu_->avs_write);
if (vortex_afu_->avs_write) {
uint64_t byteen = vortex_afu_->avs_byteenable;
unsigned base_addr = vortex_afu_->avs_address * MEM_BLOCK_SIZE;
uint8_t* data = (uint8_t*)(vortex_afu_->avs_writedata);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
ram_[base_addr + i] = data[i];
}
}
/*printf("%0ld: [sim] MEM Wr Req: addr=%x, data=", timestamp, base_addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%0x", data[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");*/
// schedule memory responses in FIFO order
std::list<mem_rd_req_t>::iterator mem_rd_it(mem_reads_[b].end());
if (!mem_reads_[b].empty()
&& (0 == mem_reads_[b].begin()->cycles_left)) {
mem_rd_it = mem_reads_[b].begin();
}
if (vortex_afu_->avs_read) {
mem_rd_req_t mem_req;
mem_req.addr = vortex_afu_->avs_address;
ram_.read(vortex_afu_->avs_address * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.data.data());
mem_req.cycles_left = MEM_LATENCY;
for (auto& rsp : mem_reads_) {
if (mem_req.addr == rsp.addr) {
mem_req.cycles_left = rsp.cycles_left;
break;
}
}
mem_reads_.emplace_back(mem_req);
/*printf("%0ld: [sim] MEM Rd Req: addr=%x, pending={", timestamp, mem_req.addr * MEM_BLOCK_SIZE);
for (auto& req : mem_reads_) {
// send memory response
vortex_afu_->avs_readdatavalid[b] = 0;
if (mem_rd_it != mem_reads_[b].end()) {
vortex_afu_->avs_readdatavalid[b] = 1;
memcpy(vortex_afu_->avs_readdata[b], mem_rd_it->data.data(), MEM_BLOCK_SIZE);
uint32_t addr = mem_rd_it->addr;
mem_reads_[b].erase(mem_rd_it);
/*printf("%0ld: [sim] MEM Rd Rsp: addr=%x, pending={", timestamp, addr * MEM_BLOCK_SIZE);
for (auto& req : mem_reads_[b]) {
if (req.cycles_left != 0)
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
else
@ -348,7 +301,59 @@ void opae_sim::avs_bus() {
}
printf("}\n");*/
}
}
vortex_afu_->avs_waitrequest = mem_stalled;
// handle memory stalls
bool mem_stalled = false;
#ifdef ENABLE_MEM_STALLS
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
mem_stalled = true;
} else
if (mem_reads_[b].size() >= MEM_RQ_SIZE) {
mem_stalled = true;
}
#endif
// process memory requests
if (!mem_stalled) {
assert(!vortex_afu_->avs_read[b] || !vortex_afu_->avs_write[b]);
if (vortex_afu_->avs_write[b]) {
uint64_t byteen = vortex_afu_->avs_byteenable[b];
unsigned base_addr = vortex_afu_->avs_address[b] * MEM_BLOCK_SIZE;
uint8_t* data = (uint8_t*)(vortex_afu_->avs_writedata[b]);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
ram_[base_addr + i] = data[i];
}
}
/*printf("%0ld: [sim] MEM Wr Req: addr=%x, data=", timestamp, base_addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%0x", data[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");*/
}
if (vortex_afu_->avs_read[b]) {
mem_rd_req_t mem_req;
mem_req.addr = vortex_afu_->avs_address[b];
ram_.read(vortex_afu_->avs_address[b] * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.data.data());
mem_req.cycles_left = MEM_LATENCY;
for (auto& rsp : mem_reads_[b]) {
if (mem_req.addr == rsp.addr) {
mem_req.cycles_left = rsp.cycles_left;
break;
}
}
mem_reads_[b].emplace_back(mem_req);
/*printf("%0ld: [sim] MEM Rd Req: addr=%x, pending={", timestamp, mem_req.addr * MEM_BLOCK_SIZE);
for (auto& req : mem_reads_[b]) {
if (req.cycles_left != 0)
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
else
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
}
printf("}\n");*/
}
}
vortex_afu_->avs_waitrequest[b] = mem_stalled;
}
}

View file

@ -1,8 +1,7 @@
#pragma once
#include "verilated.h"
#include "verilated_stub.h"
//#include "verilated_stub.h"
#include "Vvortex_afu_shim.h"
#include "Vvortex_afu_shim__Syms.h"
@ -20,7 +19,7 @@
#include <unordered_map>
#undef MEM_BLOCK_SIZE
#define MEM_BLOCK_SIZE (Vvortex_afu_shim::VL_BITS_avs_writedata / 8)
#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8)
#define CACHE_BLOCK_SIZE 64
@ -83,7 +82,7 @@ private:
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
std::list<mem_rd_req_t> mem_reads_;
std::list<mem_rd_req_t> mem_reads_ [PLATFORM_PARAM_LOCAL_MEMORY_BANKS];
std::list<cci_rd_req_t> cci_reads_;

View file

@ -1,126 +0,0 @@
#pragma once
#undef VL_ST_SIG8
#define VL_ST_SIG8(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
CData name
#undef VL_ST_SIG16
#define VL_ST_SIG16(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
SData name
#undef VL_ST_SIG64
#define VL_ST_SIG64(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
QData name
#undef VL_ST_SIG
#define VL_ST_SIG(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
IData name
#undef VL_ST_SIGW
#define VL_ST_SIGW(name, msb, lsb, words) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
WData name[words]
#undef VL_SIG8
#define VL_SIG8(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
CData name
#undef VL_SIG16
#define VL_SIG16(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
SData name
#undef VL_SIG64
#define VL_SIG64(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
QData name
#undef VL_SIG
#define VL_SIG(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
IData name
#undef VL_SIGW
#define VL_SIGW(name, msb, lsb, words) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
WData name[words]
#undef VL_IN8
#define VL_IN8(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
CData name
#undef VL_IN16
#define VL_IN16(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
SData name
#undef VL_IN64
#define VL_IN64(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
QData name
#undef VL_IN
#define VL_IN(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
IData name
#undef VL_INW
#define VL_INW(name, msb, lsb, words) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
WData name[words]
#undef VL_INOUT8
#define VL_INOUT8(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
CData name
#undef VL_INOUT16
#define VL_INOUT16(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
SData name
#undef VL_INOUT64
#define VL_INOUT64(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
QData name
#undef VL_INOUT
#define VL_INOUT(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
IData name
#undef VL_INOUTW
#define VL_INOUTW(name, msb, lsb, words) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
WData name[words]
#undef VL_OUT8
#define VL_OUT8(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
CData name
#undef VL_OUT16
#define VL_OUT16(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
SData name
#undef VL_OUT64
#define VL_OUT64(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
QData name
#undef VL_OUT
#define VL_OUT(name, msb, lsb) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
IData name
#undef VL_OUTW
#define VL_OUTW(name, msb, lsb, words) \
enum { VL_MSB_##name = msb, VL_LSB_##name = lsb, VL_BITS_##name = (msb - lsb + 1) }; \
WData name[words]

View file

@ -72,17 +72,15 @@ module vortex_afu_shim (
output t_ccip_mmioData af2cp_sTxPort_c2_data,
// Avalon signals for local memory access
output t_local_mem_data avs_writedata,
input t_local_mem_data avs_readdata,
output t_local_mem_addr avs_address,
input logic avs_waitrequest,
output logic avs_write,
output logic avs_read,
output t_local_mem_byte_mask avs_byteenable,
output t_local_mem_burst_cnt avs_burstcount,
input avs_readdatavalid,
output logic [$clog2(`PLATFORM_PARAM_LOCAL_MEMORY_BANKS)-1:0] mem_bank_select
output t_local_mem_data avs_writedata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
input t_local_mem_data avs_readdata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output t_local_mem_addr avs_address [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
input logic avs_waitrequest [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output logic avs_write [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output logic avs_read [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output t_local_mem_byte_mask avs_byteenable [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS],
input avs_readdatavalid [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS]
);
t_if_ccip_Rx cp2af_sRxPort;
@ -103,8 +101,7 @@ vortex_afu #(
.avs_read(avs_read),
.avs_byteenable(avs_byteenable),
.avs_burstcount(avs_burstcount),
.avs_readdatavalid(avs_readdatavalid),
.mem_bank_select(mem_bank_select)
.avs_readdatavalid(avs_readdatavalid)
);
t_if_ccip_c0_RxHdr c0_RxHdr;

View file

@ -1,6 +1,7 @@
`include "VX_define.vh"
module VX_avs_wrapper #(
parameter NUM_BANKS = 1,
parameter AVS_DATA_WIDTH = 1,
parameter AVS_ADDR_WIDTH = 1,
parameter AVS_BURST_WIDTH = 1,
@ -31,103 +32,141 @@ module VX_avs_wrapper #(
input wire mem_rsp_ready,
// AVS bus
output wire [AVS_DATA_WIDTH-1:0] avs_writedata,
input wire [AVS_DATA_WIDTH-1:0] avs_readdata,
output wire [AVS_ADDR_WIDTH-1:0] avs_address,
input wire avs_waitrequest,
output wire avs_write,
output wire avs_read,
output wire [AVS_BYTEENW-1:0] avs_byteenable,
output wire [AVS_BURST_WIDTH-1:0] avs_burstcount,
input avs_readdatavalid,
output wire [AVS_BANKS_BITS-1:0] avs_bankselect
output wire [AVS_DATA_WIDTH-1:0] avs_writedata [NUM_BANKS],
input wire [AVS_DATA_WIDTH-1:0] avs_readdata [NUM_BANKS],
output wire [AVS_ADDR_WIDTH-1:0] avs_address [NUM_BANKS],
input wire avs_waitrequest [NUM_BANKS],
output wire avs_write [NUM_BANKS],
output wire avs_read [NUM_BANKS],
output wire [AVS_BYTEENW-1:0] avs_byteenable [NUM_BANKS],
output wire [AVS_BURST_WIDTH-1:0] avs_burstcount [NUM_BANKS],
input avs_readdatavalid [NUM_BANKS]
);
reg [AVS_BANKS_BITS-1:0] avs_bankselect_r;
reg [AVS_BURST_WIDTH-1:0] avs_burstcount_r;
wire avs_reqq_push = mem_req_valid && mem_req_ready && !mem_req_rw;
wire avs_reqq_pop = mem_rsp_valid && mem_rsp_ready;
localparam BANK_ADDRW = $clog2(NUM_BANKS);
wire avs_rspq_push = avs_readdatavalid;
wire avs_rspq_pop = avs_reqq_pop;
wire avs_rspq_empty;
// Requests handling
wire rsp_queue_going_full;
wire [RD_QUEUE_ADDR_WIDTH-1:0] rsp_queue_size;
VX_pending_size #(
.SIZE (RD_QUEUE_SIZE)
) pending_size (
.clk (clk),
.reset (reset),
.push (avs_reqq_push),
.pop (avs_rspq_pop),
`UNUSED_PIN (empty),
.full (rsp_queue_going_full),
.size (rsp_queue_size)
);
`UNUSED_VAR (rsp_queue_size)
always @(posedge clk) begin
avs_burstcount_r <= 1;
avs_bankselect_r <= 0;
end
reg [AVS_BURST_WIDTH-1:0] avs_burstcount_r;
VX_fifo_queue #(
.DATAW (REQ_TAG_WIDTH),
.SIZE (RD_QUEUE_SIZE)
) rd_req_queue (
.clk (clk),
.reset (reset),
.push (avs_reqq_push),
.pop (avs_reqq_pop),
.data_in (mem_req_tag),
.data_out (mem_rsp_tag),
`UNUSED_PIN (empty),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
wire [NUM_BANKS-1:0] avs_reqq_pop;
wire [NUM_BANKS-1:0] req_queue_going_full;
wire [NUM_BANKS-1:0][RD_QUEUE_ADDR_WIDTH-1:0] req_queue_size;
wire [NUM_BANKS-1:0][REQ_TAG_WIDTH-1:0] avs_reqq_data_out;
wire [BANK_ADDRW-1:0] req_bank_sel = mem_req_addr [BANK_ADDRW-1:0];
wire avs_reqq_push = mem_req_valid && !mem_req_rw && mem_req_ready;
for (genvar i = 0; i < NUM_BANKS; i++) begin
VX_pending_size #(
.SIZE (RD_QUEUE_SIZE)
) pending_size (
.clk (clk),
.reset (reset),
.push (avs_reqq_push && (req_bank_sel == i)),
.pop (avs_reqq_pop[i]),
`UNUSED_PIN (empty),
.full (req_queue_going_full[i]),
.size (req_queue_size[i])
);
`UNUSED_VAR (req_queue_size)
always @(posedge clk) begin
avs_burstcount_r <= 1;
end
VX_fifo_queue #(
.DATAW (REQ_TAG_WIDTH),
.SIZE (RD_QUEUE_SIZE)
) rd_req_queue (
.clk (clk),
.reset (reset),
.push (avs_reqq_push && (req_bank_sel == i)),
.pop (avs_reqq_pop[i]),
.data_in (mem_req_tag),
.data_out (avs_reqq_data_out[i]),
`UNUSED_PIN (empty),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign avs_read[i] = mem_req_valid && !mem_req_rw && !req_queue_going_full[i] && (req_bank_sel == i);
assign avs_write[i] = mem_req_valid && mem_req_rw && !req_queue_going_full[i] && (req_bank_sel == i);
assign avs_address[i] = mem_req_addr;
assign avs_byteenable[i] = mem_req_byteen;
assign avs_writedata[i] = mem_req_data;
assign avs_burstcount[i] = avs_burstcount_r;
end
assign mem_req_ready = !(avs_waitrequest[req_bank_sel] || req_queue_going_full[req_bank_sel]);
// Responses handling
wire [NUM_BANKS-1:0] rsp_arb_valid_in;
wire [NUM_BANKS-1:0][AVS_DATA_WIDTH+REQ_TAG_WIDTH-1:0] rsp_arb_data_in;
wire [NUM_BANKS-1:0] rsp_arb_ready_in;
wire [NUM_BANKS-1:0][AVS_DATA_WIDTH-1:0] avs_rspq_data_out;
wire [NUM_BANKS-1:0] avs_rspq_empty;
for (genvar i = 0; i < NUM_BANKS; i++) begin
VX_fifo_queue #(
.DATAW (AVS_DATA_WIDTH),
.SIZE (RD_QUEUE_SIZE)
) rd_rsp_queue (
.clk (clk),
.reset (reset),
.push (avs_readdatavalid[i]),
.pop (avs_reqq_pop[i]),
.data_in (avs_readdata[i]),
.data_out (avs_rspq_data_out[i]),
.empty (avs_rspq_empty[i]),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign rsp_arb_valid_in[i] = !avs_rspq_empty[i];
assign rsp_arb_data_in[i] = {avs_rspq_data_out[i], avs_reqq_data_out[i]};
assign avs_reqq_pop[i] = rsp_arb_valid_in[i] && rsp_arb_ready_in[i];
end
VX_stream_arbiter #(
.NUM_REQS (NUM_BANKS),
.DATAW (AVS_DATA_WIDTH+REQ_TAG_WIDTH),
.BUFFERED (0)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_arb_valid_in),
.data_in (rsp_arb_data_in),
.ready_in (rsp_arb_ready_in),
.valid_out (mem_rsp_valid),
.data_out ({mem_rsp_data, mem_rsp_tag}),
.ready_out (mem_rsp_ready)
);
VX_fifo_queue #(
.DATAW (AVS_DATA_WIDTH),
.SIZE (RD_QUEUE_SIZE)
) rd_rsp_queue (
.clk (clk),
.reset (reset),
.push (avs_rspq_push),
.pop (avs_rspq_pop),
.data_in (avs_readdata),
.data_out (mem_rsp_data),
.empty (avs_rspq_empty),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
assign avs_read = mem_req_valid && !mem_req_rw && !rsp_queue_going_full;
assign avs_write = mem_req_valid && mem_req_rw && !rsp_queue_going_full;
assign avs_address = mem_req_addr;
assign avs_byteenable = mem_req_byteen;
assign avs_writedata = mem_req_data;
assign avs_burstcount = avs_burstcount_r;
assign avs_bankselect = avs_bankselect_r;
assign mem_req_ready = !avs_waitrequest && !rsp_queue_going_full;
assign mem_rsp_valid = !avs_rspq_empty;
`ifdef DBG_PRINT_AVS
always @(posedge clk) begin
if (mem_req_valid && mem_req_ready) begin
if (mem_req_rw)
$display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, mem_req_data);
else
$display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, rsp_queue_size);
$display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, req_queue_size);
end
if (mem_rsp_valid && mem_rsp_ready) begin
$display("%t: AVS Rd Rsp: tag=%0h, data=%0h, pending=%0d", $time, mem_rsp_tag, mem_rsp_data, rsp_queue_size);
$display("%t: AVS Rd Rsp: tag=%0h, data=%0h, pending=%0d", $time, mem_rsp_tag, mem_rsp_data, req_queue_size);
end
end
`endif

View file

@ -77,30 +77,28 @@ module ccip_std_afu #(
// User AFU goes here
// ====================================================================
//
// vortex_afu depends on CCI-P and local memory being in the same
// clock domain. This is accomplished by choosing a common clock
// in the AFU's JSON description. The platform instantiates clock-
// crossing shims automatically, as needed.
//
t_local_mem_byte_mask avs_byteenable [NUM_LOCAL_MEM_BANKS];
logic avs_waitrequest [NUM_LOCAL_MEM_BANKS];
t_local_mem_data avs_readdata [NUM_LOCAL_MEM_BANKS];
logic avs_readdatavalid [NUM_LOCAL_MEM_BANKS];
t_local_mem_burst_cnt avs_burstcount [NUM_LOCAL_MEM_BANKS];
t_local_mem_data avs_writedata [NUM_LOCAL_MEM_BANKS];
t_local_mem_addr avs_address [NUM_LOCAL_MEM_BANKS];
logic avs_write [NUM_LOCAL_MEM_BANKS];
logic avs_read [NUM_LOCAL_MEM_BANKS];
//
// Memory banks are used very simply here. Only bank is active at
// a time, selected by mem_bank_select. mem_bank_select is set
// by a CSR from the host.
//
t_local_mem_byte_mask avs_byteenable;
logic avs_waitrequest;
t_local_mem_data avs_readdata;
logic avs_readdatavalid;
t_local_mem_burst_cnt avs_burstcount;
t_local_mem_data avs_writedata;
t_local_mem_addr avs_address;
logic avs_write;
logic avs_read;
// choose which memory bank to test
logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select;
for (genvar b = 0; b < NUM_LOCAL_MEM_BANKS; b++) begin
assign local_mem[b].burstcount = avs_burstcount[b];
assign local_mem[b].writedata = avs_writedata[b];
assign local_mem[b].address = avs_address[b];
assign local_mem[b].byteenable = avs_byteenable[b];
assign local_mem[b].write = avs_write[b];
assign local_mem[b].read = avs_read[b];
assign avs_waitrequest[b] = local_mem[b].waitrequest;
assign avs_readdata[b] = local_mem[b].readdata;
assign avs_readdatavalid[b] = local_mem[b].readdatavalid;
end
vortex_afu #(
.NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS)
@ -108,6 +106,9 @@ module ccip_std_afu #(
.clk (clk),
.reset (reset_T1),
.cp2af_sRxPort (cp2af_sRx_T1),
.af2cp_sTxPort (af2cp_sTx_T0),
.avs_writedata (avs_writedata),
.avs_readdata (avs_readdata),
.avs_address (avs_address),
@ -116,52 +117,7 @@ module ccip_std_afu #(
.avs_read (avs_read),
.avs_byteenable (avs_byteenable),
.avs_burstcount (avs_burstcount),
.avs_readdatavalid (avs_readdatavalid),
.mem_bank_select (mem_bank_select),
.cp2af_sRxPort (cp2af_sRx_T1),
.af2cp_sTxPort (af2cp_sTx_T0)
);
//
// Export the local memory interface signals as vectors so that bank
// selection can use array syntax.
//
logic avs_waitrequest_v[NUM_LOCAL_MEM_BANKS];
t_local_mem_data avs_readdata_v[NUM_LOCAL_MEM_BANKS];
logic avs_readdatavalid_v[NUM_LOCAL_MEM_BANKS];
genvar b;
generate
for (b = 0; b < NUM_LOCAL_MEM_BANKS; b = b + 1)
begin : lmb
always_comb
begin
// Local memory to AFU signals
avs_waitrequest_v[b] = local_mem[b].waitrequest;
avs_readdata_v[b] = local_mem[b].readdata;
avs_readdatavalid_v[b] = local_mem[b].readdatavalid;
// Replicate address and write data to all banks. Only
// the request signals have to be bank-specific.
local_mem[b].burstcount = avs_burstcount;
local_mem[b].writedata = avs_writedata;
local_mem[b].address = avs_address;
local_mem[b].byteenable = avs_byteenable;
// Request a write to this bank?
local_mem[b].write = avs_write &&
($bits(mem_bank_select)'(b) == mem_bank_select);
// Request a read from this bank?
local_mem[b].read = avs_read &&
($bits(mem_bank_select)'(b) == mem_bank_select);
end
end
endgenerate
assign avs_waitrequest = avs_waitrequest_v[mem_bank_select];
assign avs_readdata = avs_readdata_v[mem_bank_select];
assign avs_readdatavalid = avs_readdatavalid_v[mem_bank_select];
.avs_readdatavalid (avs_readdatavalid)
);
endmodule

View file

@ -26,17 +26,15 @@ module vortex_afu #(
output t_if_ccip_Tx af2cp_sTxPort,
// Avalon signals for local memory access
output t_local_mem_data avs_writedata,
input t_local_mem_data avs_readdata,
output t_local_mem_addr avs_address,
input logic avs_waitrequest,
output logic avs_write,
output logic avs_read,
output t_local_mem_byte_mask avs_byteenable,
output t_local_mem_burst_cnt avs_burstcount,
input avs_readdatavalid,
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
output t_local_mem_data avs_writedata [NUM_LOCAL_MEM_BANKS],
input t_local_mem_data avs_readdata [NUM_LOCAL_MEM_BANKS],
output t_local_mem_addr avs_address [NUM_LOCAL_MEM_BANKS],
input logic avs_waitrequest [NUM_LOCAL_MEM_BANKS],
output logic avs_write [NUM_LOCAL_MEM_BANKS],
output logic avs_read [NUM_LOCAL_MEM_BANKS],
output t_local_mem_byte_mask avs_byteenable [NUM_LOCAL_MEM_BANKS],
output t_local_mem_burst_cnt avs_burstcount [NUM_LOCAL_MEM_BANKS],
input avs_readdatavalid [NUM_LOCAL_MEM_BANKS]
);
localparam RESET_DELAY = 3;
@ -636,6 +634,7 @@ VX_mem_arb #(
//--
VX_avs_wrapper #(
.NUM_BANKS (NUM_LOCAL_MEM_BANKS),
.AVS_DATA_WIDTH (LMEM_LINE_WIDTH),
.AVS_ADDR_WIDTH (LMEM_ADDR_WIDTH),
.AVS_BURST_WIDTH (LMEM_BURST_CTRW),
@ -670,8 +669,7 @@ VX_avs_wrapper #(
.avs_read (avs_read),
.avs_byteenable (avs_byteenable),
.avs_burstcount (avs_burstcount),
.avs_readdatavalid(avs_readdatavalid),
.avs_bankselect (mem_bank_select)
.avs_readdatavalid(avs_readdatavalid)
);
// CCI-P Read Request ///////////////////////////////////////////////////////////

View file

@ -1,19 +0,0 @@
`include "VX_platform.vh"
module VX_tex_mgr (
input wire clk,
input wire reset
);
//--
endmodule

View file

@ -1,50 +0,0 @@
`include "VX_platform.vh"
module VX_tex_unit #(
parameter TADDRW = 32,
parameter MADDRW = 32,
parameter DATAW = 32,
parameter MAXWTW = 8,
parameter MAXHTW = 8,
parameter MAXFTW = 2,
parameter MAXFMW = 1,
parameter MAXAMW = 2,
parameter TAGW = 16,
parameter NUMCRQS = 32
) (
input wire clk,
input wire reset,
// Texture Request
input wire tex_req_valid,
input wire [TADDRW-1:0] tex_req_u,
input wire [TADDRW-1:0] tex_req_v,
input wire [MADDRW-1:0] tex_req_addr,
input wire [MAXWTW-1:0] tex_req_width,
input wire [MAXHTW-1:0] tex_req_height,
input wire [MAXFTW-1:0] tex_req_format,
input wire [MAXFMW-1:0] tex_req_filter,
input wire [MAXAMW-1:0] tex_req_clamp,
input wire [TAGW-1:0] tex_req_tag,
output wire tex_req_ready,
// Texture Response
output wire tex_rsp_valid,
output wire [TAGW-1:0] tex_rsp_tag,
input wire [DATAW-1:0] tex_rsp_data,
input wire tex_rsp_ready,
// Cache Request
output wire [NUMCRQS-1:0] cache_req_valids,
output wire [NUMCRQS-1:0][MADDRW-1:0] cache_req_addrs,
input wire cache_req_ready,
// Cache Response
input wire cache_rsp_valid,
input wire [MADDRW-1:0] cache_rsp_addr,
input wire [DATAW-1:0] cache_rsp_data,
output wire cache_rsp_ready
);
endmodule

View file

@ -1,6 +1,6 @@
ASE_BUILD_DIR ?= build_ase
FPGA_BUILD_DIR ?= build_fpga
DEVICE_FAMILY ?= arria10
ASE_BUILD_DIR ?= build_ase_$(DEVICE_FAMILY)
FPGA_BUILD_DIR ?= build_fpga_$(DEVICE_FAMILY)
RTL_DIR=../../rtl
ifeq ($(shell which qsub-synth),)

View file

@ -3,18 +3,18 @@ BUILDIR ?= build
.PHONY: unittest pipeline cache core vortex top1 top2 top4 top8 top16 top32 top64
unittest:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p unittest/$(BUILDIR)
cp core/Makefile unittest/$(BUILDIR)
$(MAKE) -C unittest/$(BUILDIR) clean && $(MAKE) -C unittest/$(BUILDIR) > unittest//$(BUILDIR)build.log 2>&1 &
pipeline:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p pipeline/$(BUILDIR)
cp core/Makefile pipeline/$(BUILDIR)
$(MAKE) -C pipeline/$(BUILDIR) clean && $(MAKE) -C pipeline/$(BUILDIR) > pipeline/$(BUILDIR)/build.log 2>&1 &
cache:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p cache/$(BUILDIR)
cp core/Makefile cache/$(BUILDIR)
$(MAKE) -C cache/$(BUILDIR) clean && $(MAKE) -C cache/$(BUILDIR) > cache/$(BUILDIR)/build.log 2>&1 &
core:
@ -23,41 +23,41 @@ core:
$(MAKE) -C core/$(BUILDIR) clean && $(MAKE) -C core/$(BUILDIR) > core/$(BUILDIR)/build.log 2>&1 &
vortex:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p vortex/$(BUILDIR)
cp core/Makefile vortex/$(BUILDIR)
$(MAKE) -C vortex/$(BUILDIR) clean && $(MAKE) -C vortex/$(BUILDIR) > vortex/$(BUILDIR)/build.log 2>&1 &
top1:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p top1/$(BUILDIR)
cp core/Makefile top1/$(BUILDIR)
$(MAKE) -C top1/$(BUILDIR) clean && $(MAKE) -C top1/$(BUILDIR) > top1/$(BUILDIR)/build.log 2>&1 &
top2:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p top2/$(BUILDIR)
cp core/Makefile top2/$(BUILDIR)
$(MAKE) -C top2/$(BUILDIR) clean && $(MAKE) -C top2/$(BUILDIR) > top2/$(BUILDIR)/build.log 2>&1 &
top4:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p top4/$(BUILDIR)
cp core/Makefile top4/$(BUILDIR)
$(MAKE) -C top4/$(BUILDIR) clean && $(MAKE) -C top4/$(BUILDIR) > top4/$(BUILDIR)/build.log 2>&1 &
top8:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p top8/$(BUILDIR)
cp core/Makefile top8/$(BUILDIR)
$(MAKE) -C top8/$(BUILDIR) clean && $(MAKE) -C top8/$(BUILDIR) > top8/$(BUILDIR)/build.log 2>&1 &
top16:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p top16/$(BUILDIR)
cp core/Makefile top16/$(BUILDIR)
$(MAKE) -C top16/$(BUILDIR) clean && $(MAKE) -C top16/$(BUILDIR) > top16/$(BUILDIR)build.log 2>&1 &
top32:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p top32/$(BUILDIR)
cp core/Makefile top32/$(BUILDIR)
$(MAKE) -C top32/$(BUILDIR) clean && $(MAKE) -C top32/$(BUILDIR) > top32/$(BUILDIR)/build.log 2>&1 &
top64:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p top64/$(BUILDIR)
cp core/Makefile top64/$(BUILDIR)
$(MAKE) -C top64/$(BUILDIR) clean && $(MAKE) -C top64/$(BUILDIR) > top64/$(BUILDIR)/build.log 2>&1 &