pulled master and made initial changes

This commit is contained in:
sij814 2024-08-22 18:37:34 +02:00
parent 6c607d32fe
commit 7ae7ffa007
4 changed files with 650 additions and 111 deletions

229
hw/rtl/Vortex_hbm.sv Normal file
View file

@ -0,0 +1,229 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module Vortex_hbm import VX_gpu_pkg::*; (
`SCOPE_IO_DECL
// Clock
input wire clk,
input wire reset,
// Memory request
output wire mem_req_valid [`NUM_MEM_PORTS],
output wire mem_req_rw [`NUM_MEM_PORTS],
output wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen [`NUM_MEM_PORTS],
output wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr [`NUM_MEM_PORTS],
output wire [`VX_MEM_DATA_WIDTH-1:0] mem_req_data [`NUM_MEM_PORTS],
output wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag [`NUM_MEM_PORTS],
input wire mem_req_ready [`NUM_MEM_PORTS],
// Memory response
input wire mem_rsp_valid [`NUM_MEM_PORTS],
input wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data [`NUM_MEM_PORTS],
input wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag [`NUM_MEM_PORTS],
output wire mem_rsp_ready [`NUM_MEM_PORTS],
// DCR write request
input wire dcr_wr_valid,
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data,
// Status
output wire busy
);
`ifdef SCOPE
localparam scope_cluster = 0;
`SCOPE_IO_SWITCH (`NUM_CLUSTERS);
`endif
`ifdef PERF_ENABLE
VX_mem_perf_if mem_perf_if();
assign mem_perf_if.icache = 'x;
assign mem_perf_if.dcache = 'x;
assign mem_perf_if.l2cache = 'x;
assign mem_perf_if.lmem = 'x;
`endif
VX_mem_bus_if #(
.DATA_SIZE (`L2_LINE_SIZE),
.TAG_WIDTH (L2_MEM_TAG_WIDTH)
) per_cluster_mem_bus_if[`NUM_CLUSTERS]();
VX_mem_bus_if #(
.DATA_SIZE (`L3_LINE_SIZE),
.TAG_WIDTH (L3_MEM_TAG_WIDTH)
) mem_bus_if[`NUM_MEM_PORTS]();
`RESET_RELAY (l3_reset, reset);
VX_cache_wrap_l3 #(
.INSTANCE_ID ("l3cache"),
.CACHE_SIZE (`L3_CACHE_SIZE),
.LINE_SIZE (`L3_LINE_SIZE),
.NUM_BANKS (`L3_NUM_BANKS),
.NUM_WAYS (`L3_NUM_WAYS),
.WORD_SIZE (L3_WORD_SIZE),
.NUM_MEM_PORTS (`NUM_MEM_PORTS),
.NUM_REQS (L3_NUM_REQS),
.CRSQ_SIZE (`L3_CRSQ_SIZE),
.MSHR_SIZE (`L3_MSHR_SIZE),
.MRSQ_SIZE (`L3_MRSQ_SIZE),
.MREQ_SIZE (`L3_WRITEBACK ? `L3_MSHR_SIZE : `L3_MREQ_SIZE),
.TAG_WIDTH (L2_MEM_TAG_WIDTH),
.WRITE_ENABLE (1),
.WRITEBACK (`L3_WRITEBACK),
.DIRTY_BYTES (`L3_WRITEBACK),
.UUID_WIDTH (`UUID_WIDTH),
.CORE_OUT_BUF (2),
.MEM_OUT_BUF (2),
.NC_ENABLE (1),
.PASSTHRU (!`L3_ENABLED)
) l3cache (
.clk (clk),
.reset (l3_reset),
`ifdef PERF_ENABLE
.cache_perf (mem_perf_if.l3cache),
`endif
.core_bus_if (per_cluster_mem_bus_if),
.mem_bus_if (mem_bus_if)
);
wire mem_req_fire[`NUM_MEM_PORTS-1:0];
wire mem_rsp_fire[`NUM_MEM_PORTS-1:0];
for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin
assign mem_req_valid[i] = mem_bus_if[i].req_valid;
assign mem_req_rw[i] = mem_bus_if[i].req_data.rw;
assign mem_req_byteen[i]= mem_bus_if[i].req_data.byteen;
assign mem_req_addr[i] = mem_bus_if[i].req_data.addr;
assign mem_req_data[i] = mem_bus_if[i].req_data.data;
assign mem_req_tag[i] = mem_bus_if[i].req_data.tag;
assign mem_bus_if[i].req_ready = mem_req_ready[i];
`UNUSED_VAR (mem_bus_if[i].req_data.atype)
assign mem_bus_if[i].rsp_valid = mem_rsp_valid[i];
assign mem_bus_if[i].rsp_data.data = mem_rsp_data[i];
assign mem_bus_if[i].rsp_data.tag = mem_rsp_tag[i];
assign mem_rsp_ready[i] = mem_bus_if[i].rsp_ready;
assign mem_req_fire[i] = mem_req_valid[i] && mem_req_ready[i];
assign mem_rsp_fire[i] = mem_rsp_valid[i] && mem_rsp_ready[i];
`UNUSED_VAR (mem_req_fire[i])
`UNUSED_VAR (mem_rsp_fire[i])
end
VX_dcr_bus_if dcr_bus_if();
assign dcr_bus_if.write_valid = dcr_wr_valid;
assign dcr_bus_if.write_addr = dcr_wr_addr;
assign dcr_bus_if.write_data = dcr_wr_data;
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
// Generate all clusters
for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters
`RESET_RELAY (cluster_reset, reset);
VX_dcr_bus_if cluster_dcr_bus_if();
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1));
VX_cluster #(
.CLUSTER_ID (cluster_id),
.INSTANCE_ID ($sformatf("cluster%0d", cluster_id))
) cluster (
`SCOPE_IO_BIND (scope_cluster + cluster_id)
.clk (clk),
.reset (cluster_reset),
`ifdef PERF_ENABLE
.mem_perf_if (mem_perf_if),
`endif
.dcr_bus_if (cluster_dcr_bus_if),
.mem_bus_if (per_cluster_mem_bus_if[cluster_id]),
.busy (per_cluster_busy[cluster_id])
);
end
`BUFFER_EX(busy, (| per_cluster_busy), 1'b1, (`NUM_CLUSTERS > 1));
`ifdef PERF_ENABLE
reg [`PERF_CTR_BITS-1:0] perf_mem_pending_reads;
mem_perf_t mem_perf;
for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin
always @(posedge clk) begin
if (reset) begin
perf_mem_pending_reads <= '0;
end else begin
perf_mem_pending_reads <= $signed(perf_mem_pending_reads) +
`PERF_CTR_BITS'($signed(2'(mem_req_fire[i] && ~mem_bus_if[i].req_data.rw) - 2'(mem_rsp_fire[i])));
end
end
end
wire mem_rd_req_fire[`NUM_MEM_PORTS-1:0];
wire mem_wr_req_fire[`NUM_MEM_PORTS-1:0];
for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin
assign mem_rd_req_fire[i] = mem_req_fire[i] && ~mem_bus_if[i].req_data.rw;
assign mem_wr_req_fire[i] = mem_req_fire[i] && mem_bus_if[i].req_data.rw;
end
always @(posedge clk) begin
if (reset) begin
mem_perf <= '0;
end else begin
for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin
mem_perf.reads <= mem_perf.reads + `PERF_CTR_BITS'(mem_rd_req_fire[i]);
mem_perf.writes <= mem_perf.writes + `PERF_CTR_BITS'(mem_wr_req_fire[i]);
end
mem_perf.latency <= mem_perf.latency + perf_mem_pending_reads;
end
end
assign mem_perf_if.mem = mem_perf;
`endif
`ifdef DBG_TRACE_MEM
always @(posedge clk) begin
for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin
if (mem_req_fire[i]) begin
if (mem_req_rw[i])
`TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], mem_req_data[i]));
else
`TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i]));
end
if (mem_rsp_fire[i]) begin
`TRACE(1, ("%d: MEM Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag[i], mem_rsp_data[i]));
end
end
end
`endif
`ifdef SIMULATION
always @(posedge clk) begin
$fflush(); // flush stdout buffer
end
`endif
endmodule

286
hw/rtl/cache/VX_cache_wrap_l3.sv vendored Normal file
View file

@ -0,0 +1,286 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_cache_define.vh"
module VX_cache_wrap_l3 import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter TAG_SEL_IDX = 0,
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 4096,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 64,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Number of memory ports
parameter NUM_MEM_PORTS = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 8,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
// Memory Request Queue Size
parameter MREQ_SIZE = 4,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
// Enable dirty bytes on writeback
parameter DIRTY_BYTES = 0,
// Request debug identifier
parameter UUID_WIDTH = 0,
// core request tag size
parameter TAG_WIDTH = UUID_WIDTH + 1,
// enable bypass for non-cacheable addresses
parameter NC_ENABLE = 0,
// Force bypass for all requests
parameter PASSTHRU = 0,
// Core response output buffer
parameter CORE_OUT_BUF = 0,
// Memory request output buffer
parameter MEM_OUT_BUF = 0
) (
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
output cache_perf_t cache_perf,
`endif
VX_mem_bus_if.slave core_bus_if [NUM_REQS],
VX_mem_bus_if.master mem_bus_if [NUM_MEM_PORTS]
);
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
localparam NC_OR_BYPASS = (NC_ENABLE || PASSTHRU);
VX_mem_bus_if #(
.DATA_SIZE (WORD_SIZE),
.TAG_WIDTH (TAG_WIDTH)
) core_bus_cache_if[NUM_REQS]();
VX_mem_bus_if #(
.DATA_SIZE (LINE_SIZE),
.TAG_WIDTH (CACHE_MEM_TAG_WIDTH)
) mem_bus_cache_if[NUM_MEM_PORTS]();
if (NC_OR_BYPASS) begin
`RESET_RELAY (nc_bypass_reset, reset);
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
VX_cache_bypass #(
.NUM_REQS (NUM_REQS),
.TAG_SEL_IDX (TAG_SEL_IDX),
.PASSTHRU (PASSTHRU),
.NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE),
.WORD_SIZE (WORD_SIZE),
.LINE_SIZE (LINE_SIZE),
.CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH),
.CORE_TAG_WIDTH (TAG_WIDTH),
.MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH),
.MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH),
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH),
.UUID_WIDTH (UUID_WIDTH),
.CORE_OUT_BUF (CORE_OUT_BUF),
.MEM_OUT_BUF (MEM_OUT_BUF)
) cache_bypass (
.clk (clk),
.reset (nc_bypass_reset),
.core_bus_in_if (core_bus_if),
.core_bus_out_if(core_bus_cache_if),
.mem_bus_in_if (mem_bus_cache_if[i]),
.mem_bus_out_if (mem_bus_if[i])
);
end
end else begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
`ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]);
end
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if[i], mem_bus_cache_if[i]);
end
end
if (PASSTHRU != 0) begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
`UNUSED_VAR (core_bus_cache_if[i].req_valid)
`UNUSED_VAR (core_bus_cache_if[i].req_data)
assign core_bus_cache_if[i].req_ready = 0;
assign core_bus_cache_if[i].rsp_valid = 0;
assign core_bus_cache_if[i].rsp_data = '0;
`UNUSED_VAR (core_bus_cache_if[i].rsp_ready)
end
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
assign mem_bus_cache_if[i].req_valid = 0;
assign mem_bus_cache_if[i].req_data = '0;
`UNUSED_VAR (mem_bus_cache_if[i].req_ready)
`UNUSED_VAR (mem_bus_cache_if[i].rsp_valid)
`UNUSED_VAR (mem_bus_cache_if[i].rsp_data)
assign mem_bus_cache_if[i].rsp_ready = 0;
end
`ifdef PERF_ENABLE
assign cache_perf = '0;
`endif
end else begin
`RESET_RELAY (cache_reset, reset);
VX_cache #(
.INSTANCE_ID (INSTANCE_ID),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.CRSQ_SIZE (CRSQ_SIZE),
.MSHR_SIZE (MSHR_SIZE),
.MRSQ_SIZE (MRSQ_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK),
.DIRTY_BYTES (DIRTY_BYTES),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF),
.MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF)
) cache (
.clk (clk),
.reset (cache_reset),
`ifdef PERF_ENABLE
.cache_perf (cache_perf),
`endif
.core_bus_if (core_bus_cache_if),
.mem_bus_if (mem_bus_cache_if[0])
);
end
`ifdef DBG_TRACE_CACHE
for (genvar i = 0; i < NUM_REQS; ++i) begin
wire [`UP(UUID_WIDTH)-1:0] core_req_uuid;
wire [`UP(UUID_WIDTH)-1:0] core_rsp_uuid;
if (UUID_WIDTH != 0) begin
assign core_req_uuid = core_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
assign core_rsp_uuid = core_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign core_req_uuid = 0;
assign core_rsp_uuid = 0;
end
wire core_req_fire = core_bus_if[i].req_valid && core_bus_if[i].req_ready;
wire core_rsp_fire = core_bus_if[i].rsp_valid && core_bus_if[i].rsp_ready;
always @(posedge clk) begin
if (core_req_fire) begin
if (core_bus_if[i].req_data.rw)
`TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid));
else
`TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid));
end
if (core_rsp_fire) begin
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
end
end
end
wire [NUM_MEM_PORTS-1:0][`UP(UUID_WIDTH)-1:0] mem_req_uuid;
wire [NUM_MEM_PORTS-1:0][`UP(UUID_WIDTH)-1:0] mem_rsp_uuid;
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin
assign mem_req_uuid[i] = mem_bus_if[i].req_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
assign mem_rsp_uuid[i] = mem_bus_if[i].rsp_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign mem_req_uuid[i] = 0;
assign mem_rsp_uuid[i] = 0;
end
end
wire mem_req_fire [NUM_MEM_PORTS-1:0];
wire mem_rsp_fire [NUM_MEM_PORTS-1:0];
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
assign mem_req_fire[i] = mem_bus_if[i].req_valid && mem_bus_if[i].req_ready;
assign mem_rsp_fire[i] = mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready;
end
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
always @(posedge clk) begin
if (mem_req_fire[i]) begin
if (mem_bus_if[i].req_data.rw)
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d) bank=%d\n",
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, mem_req_uuid[i], i));
else
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d) bank=%d\n",
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag, mem_req_uuid[i], i));
end
if (mem_rsp_fire[i]) begin
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
$time, INSTANCE_ID, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data, mem_rsp_uuid[i]));
end
end
end
`endif
endmodule

View file

@ -43,7 +43,7 @@ ifdef AXI_BUS
TOP = Vortex_axi
CXXFLAGS += -DAXI_BUS
else
TOP = Vortex
TOP = Vortex_hbm
endif
VL_FLAGS = --exe

View file

@ -17,8 +17,8 @@
#include "VVortex_axi.h"
typedef VVortex_axi Device;
#else
#include "VVortex.h"
typedef VVortex Device;
#include "VVortex_hbm.h"
typedef VVortex_hbm Device;
#endif
#ifdef VCD_OUTPUT
@ -123,6 +123,15 @@ public:
tfp_->open("trace.vcd");
#endif
pending_mem_reqs_.resize(NUM_MEM_PORTS);
dram_queue_.resize(NUM_MEM_PORTS);
mem_rd_rsp_active_.resize(NUM_MEM_PORTS);
mem_rd_rsp_ready_.resize(NUM_MEM_PORTS);
mem_wr_rsp_active_.resize(NUM_MEM_PORTS);
mem_wr_rsp_ready_.resize(NUM_MEM_PORTS);
ram_ = nullptr;
#ifndef NDEBUG
@ -210,16 +219,19 @@ private:
print_bufs_.clear();
pending_mem_reqs_.clear();
for (int i = 0; i < NUM_MEM_PORTS; ++i) {
{
std::queue<mem_req_t*> empty;
std::swap(dram_queue_, empty);
pending_mem_reqs_.at(i).clear();
{
std::queue<mem_req_t*> empty;
std::swap(dram_queue_.at(i), empty);
}
mem_rd_rsp_active_.at(i) = false;
mem_wr_rsp_active_.at(i) = false;
}
mem_rd_rsp_active_ = false;
mem_wr_rsp_active_ = false;
this->mem_bus_reset();
this->dcr_bus_reset();
@ -250,17 +262,19 @@ private:
dram_sim_.tick();
if (!dram_queue_.empty()) {
auto mem_req = dram_queue_.front();
if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) {
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
if (orig_req->ready) {
delete orig_req;
} else {
orig_req->ready = true;
for (int i = 0; i < NUM_MEM_PORTS; ++i) {
if (!dram_queue_.at(i).empty()) {
auto mem_req = dram_queue_.at(i).front();
if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) {
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
if (orig_req->ready) {
delete orig_req;
} else {
orig_req->ready = true;
}
}, mem_req)) {
dram_queue_.at(i).pop();
}
}, mem_req)) {
dram_queue_.pop();
}
}
@ -437,116 +451,126 @@ private:
#else
void mem_bus_reset() {
device_->mem_req_ready = 0;
device_->mem_rsp_valid = 0;
for (int i = 0; i < NUM_MEM_PORTS; ++i) {
device_->mem_req_ready[i] = 0;
device_->mem_rsp_valid[i] = 0;
}
}
void mem_bus_eval(bool clk) {
if (!clk) {
mem_rd_rsp_ready_ = device_->mem_rsp_ready;
return;
for (int i = 0; i < NUM_MEM_PORTS; ++i) {
if (!clk) {
mem_rd_rsp_ready_.at(i) = device_->mem_rsp_ready[i];
return;
}
}
if (ram_ == nullptr) {
device_->mem_req_ready = 0;
return;
for (int i = 0; i < NUM_MEM_PORTS; ++i) {
if (ram_ == nullptr) {
device_->mem_req_ready[i] = 0;
return;
}
}
// process memory read responses
if (mem_rd_rsp_active_
&& device_->mem_rsp_valid && mem_rd_rsp_ready_) {
mem_rd_rsp_active_ = false;
}
if (!mem_rd_rsp_active_) {
if (!pending_mem_reqs_.empty()
&& (*pending_mem_reqs_.begin())->ready) {
device_->mem_rsp_valid = 1;
auto mem_rsp_it = pending_mem_reqs_.begin();
auto mem_rsp = *mem_rsp_it;
/*
printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr);
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
printf("%02x", mem_rsp->block[i]);
for (int i = 0; i < NUM_MEM_PORTS; ++i) {
if (mem_rd_rsp_active_.at(i)
&& device_->mem_rsp_valid[i] && mem_rd_rsp_ready_.at(i)) {
mem_rd_rsp_active_.at(i) = false;
}
if (!mem_rd_rsp_active_.at(i)) {
if (!pending_mem_reqs_.at(i).empty()
&& (*pending_mem_reqs_.at(i).begin())->ready) {
device_->mem_rsp_valid[i] = 1;
auto mem_rsp_it = pending_mem_reqs_.at(i).begin();
auto mem_rsp = *mem_rsp_it;
/*
printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr);
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
printf("%02x", mem_rsp->block[i]);
}
printf("\n");
*/
memcpy(VDataCast<void*, MEM_BLOCK_SIZE>::get(device_->mem_rsp_data[i]), mem_rsp->block.data(), MEM_BLOCK_SIZE);
device_->mem_rsp_tag[i] = mem_rsp->tag;
pending_mem_reqs_.at(i).erase(mem_rsp_it);
mem_rd_rsp_active_.at(i) = true;
delete mem_rsp;
} else {
device_->mem_rsp_valid[i] = 0;
}
printf("\n");
*/
memcpy(VDataCast<void*, MEM_BLOCK_SIZE>::get(device_->mem_rsp_data), mem_rsp->block.data(), MEM_BLOCK_SIZE);
device_->mem_rsp_tag = mem_rsp->tag;
pending_mem_reqs_.erase(mem_rsp_it);
mem_rd_rsp_active_ = true;
delete mem_rsp;
} else {
device_->mem_rsp_valid = 0;
}
}
// process memory requests
if (device_->mem_req_valid && running_) {
uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE);
if (device_->mem_req_rw) {
auto byteen = device_->mem_req_byteen;
auto data = VDataCast<uint8_t*, MEM_BLOCK_SIZE>::get(device_->mem_req_data);
for (int j = 0; j < NUM_MEM_PORTS; ++j) {
if (device_->mem_req_valid[j] && running_) {
uint64_t byte_addr = (device_->mem_req_addr[j] * MEM_BLOCK_SIZE);
if (device_->mem_req_rw[j]) {
auto byteen = device_->mem_req_byteen[j];
auto data = VDataCast<uint8_t*, MEM_BLOCK_SIZE>::get(device_->mem_req_data[j]);
if (byte_addr >= uint64_t(IO_COUT_ADDR)
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
// process console output
for (int i = 0; i < IO_COUT_SIZE; i++) {
if ((byteen >> i) & 0x1) {
auto& ss_buf = print_bufs_[i];
char c = data[i];
ss_buf << c;
if (c == '\n') {
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
ss_buf.str("");
if (byte_addr >= uint64_t(IO_COUT_ADDR)
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
// process console output
for (int i = 0; i < IO_COUT_SIZE; i++) {
if ((byteen >> i) & 0x1) {
auto& ss_buf = print_bufs_[i];
char c = data[i];
ss_buf << c;
if (c == '\n') {
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
ss_buf.str("");
}
}
}
} else {
// process writes
/*
printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr);
for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
}
printf(", data=0x");
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
printf("%d=%02x,", i, data[i]);
}
printf("\n");
*/
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[byte_addr + i] = data[i];
}
}
auto mem_req = new mem_req_t();
mem_req->tag = device_->mem_req_tag[j];
mem_req->addr = byte_addr;
mem_req->write = true;
mem_req->ready = true;
// send dram request
dram_queue_.at(j).push(mem_req);
}
} else {
// process writes
/*
printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr);
for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
}
printf(", data=0x");
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
printf("%d=%02x,", i, data[i]);
}
printf("\n");
*/
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[byte_addr + i] = data[i];
}
}
// process reads
auto mem_req = new mem_req_t();
mem_req->tag = device_->mem_req_tag;
mem_req->tag = device_->mem_req_tag[j];
mem_req->addr = byte_addr;
mem_req->write = true;
mem_req->ready = true;
mem_req->write = false;
mem_req->ready = false;
ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE);
pending_mem_reqs_.at(j).emplace_back(mem_req);
//printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
// send dram request
dram_queue_.push(mem_req);
dram_queue_.at(j).push(mem_req);
}
} else {
// process reads
auto mem_req = new mem_req_t();
mem_req->tag = device_->mem_req_tag;
mem_req->addr = byte_addr;
mem_req->write = false;
mem_req->ready = false;
ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE);
pending_mem_reqs_.emplace_back(mem_req);
//printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
// send dram request
dram_queue_.push(mem_req);
}
}
device_->mem_req_ready = running_;
device_->mem_req_ready[j] = running_;
}
}
#endif
@ -583,9 +607,9 @@ private:
std::unordered_map<int, std::stringstream> print_bufs_;
std::list<mem_req_t*> pending_mem_reqs_;
std::vector<std::list<mem_req_t*>> pending_mem_reqs_;
std::queue<mem_req_t*> dram_queue_;
std::vector<std::queue<mem_req_t*>> dram_queue_;
DramSim dram_sim_;
@ -597,11 +621,11 @@ private:
RAM* ram_;
bool mem_rd_rsp_active_;
bool mem_rd_rsp_ready_;
std::vector<bool> mem_rd_rsp_active_;
std::vector<bool> mem_rd_rsp_ready_;
bool mem_wr_rsp_active_;
bool mem_wr_rsp_ready_;
std::vector<bool> mem_wr_rsp_active_;
std::vector<bool> mem_wr_rsp_ready_;
bool running_;
};