mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
pulled master and made initial changes
This commit is contained in:
parent
6c607d32fe
commit
7ae7ffa007
4 changed files with 650 additions and 111 deletions
229
hw/rtl/Vortex_hbm.sv
Normal file
229
hw/rtl/Vortex_hbm.sv
Normal file
|
@ -0,0 +1,229 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module Vortex_hbm import VX_gpu_pkg::*; (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Memory request
|
||||
output wire mem_req_valid [`NUM_MEM_PORTS],
|
||||
output wire mem_req_rw [`NUM_MEM_PORTS],
|
||||
output wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen [`NUM_MEM_PORTS],
|
||||
output wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr [`NUM_MEM_PORTS],
|
||||
output wire [`VX_MEM_DATA_WIDTH-1:0] mem_req_data [`NUM_MEM_PORTS],
|
||||
output wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag [`NUM_MEM_PORTS],
|
||||
input wire mem_req_ready [`NUM_MEM_PORTS],
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid [`NUM_MEM_PORTS],
|
||||
input wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data [`NUM_MEM_PORTS],
|
||||
input wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag [`NUM_MEM_PORTS],
|
||||
output wire mem_rsp_ready [`NUM_MEM_PORTS],
|
||||
|
||||
// DCR write request
|
||||
input wire dcr_wr_valid,
|
||||
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
|
||||
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
|
||||
`ifdef SCOPE
|
||||
localparam scope_cluster = 0;
|
||||
`SCOPE_IO_SWITCH (`NUM_CLUSTERS);
|
||||
`endif
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if mem_perf_if();
|
||||
assign mem_perf_if.icache = 'x;
|
||||
assign mem_perf_if.dcache = 'x;
|
||||
assign mem_perf_if.l2cache = 'x;
|
||||
assign mem_perf_if.lmem = 'x;
|
||||
`endif
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (`L2_LINE_SIZE),
|
||||
.TAG_WIDTH (L2_MEM_TAG_WIDTH)
|
||||
) per_cluster_mem_bus_if[`NUM_CLUSTERS]();
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (`L3_LINE_SIZE),
|
||||
.TAG_WIDTH (L3_MEM_TAG_WIDTH)
|
||||
) mem_bus_if[`NUM_MEM_PORTS]();
|
||||
|
||||
`RESET_RELAY (l3_reset, reset);
|
||||
|
||||
VX_cache_wrap_l3 #(
|
||||
.INSTANCE_ID ("l3cache"),
|
||||
.CACHE_SIZE (`L3_CACHE_SIZE),
|
||||
.LINE_SIZE (`L3_LINE_SIZE),
|
||||
.NUM_BANKS (`L3_NUM_BANKS),
|
||||
.NUM_WAYS (`L3_NUM_WAYS),
|
||||
.WORD_SIZE (L3_WORD_SIZE),
|
||||
.NUM_MEM_PORTS (`NUM_MEM_PORTS),
|
||||
.NUM_REQS (L3_NUM_REQS),
|
||||
.CRSQ_SIZE (`L3_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L3_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L3_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L3_WRITEBACK ? `L3_MSHR_SIZE : `L3_MREQ_SIZE),
|
||||
.TAG_WIDTH (L2_MEM_TAG_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`L3_WRITEBACK),
|
||||
.DIRTY_BYTES (`L3_WRITEBACK),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2),
|
||||
.NC_ENABLE (1),
|
||||
.PASSTHRU (!`L3_ENABLED)
|
||||
) l3cache (
|
||||
.clk (clk),
|
||||
.reset (l3_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (mem_perf_if.l3cache),
|
||||
`endif
|
||||
|
||||
.core_bus_if (per_cluster_mem_bus_if),
|
||||
.mem_bus_if (mem_bus_if)
|
||||
);
|
||||
|
||||
wire mem_req_fire[`NUM_MEM_PORTS-1:0];
|
||||
wire mem_rsp_fire[`NUM_MEM_PORTS-1:0];
|
||||
|
||||
for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin
|
||||
assign mem_req_valid[i] = mem_bus_if[i].req_valid;
|
||||
assign mem_req_rw[i] = mem_bus_if[i].req_data.rw;
|
||||
assign mem_req_byteen[i]= mem_bus_if[i].req_data.byteen;
|
||||
assign mem_req_addr[i] = mem_bus_if[i].req_data.addr;
|
||||
assign mem_req_data[i] = mem_bus_if[i].req_data.data;
|
||||
assign mem_req_tag[i] = mem_bus_if[i].req_data.tag;
|
||||
assign mem_bus_if[i].req_ready = mem_req_ready[i];
|
||||
`UNUSED_VAR (mem_bus_if[i].req_data.atype)
|
||||
|
||||
assign mem_bus_if[i].rsp_valid = mem_rsp_valid[i];
|
||||
assign mem_bus_if[i].rsp_data.data = mem_rsp_data[i];
|
||||
assign mem_bus_if[i].rsp_data.tag = mem_rsp_tag[i];
|
||||
assign mem_rsp_ready[i] = mem_bus_if[i].rsp_ready;
|
||||
|
||||
assign mem_req_fire[i] = mem_req_valid[i] && mem_req_ready[i];
|
||||
assign mem_rsp_fire[i] = mem_rsp_valid[i] && mem_rsp_ready[i];
|
||||
`UNUSED_VAR (mem_req_fire[i])
|
||||
`UNUSED_VAR (mem_rsp_fire[i])
|
||||
end
|
||||
|
||||
VX_dcr_bus_if dcr_bus_if();
|
||||
assign dcr_bus_if.write_valid = dcr_wr_valid;
|
||||
assign dcr_bus_if.write_addr = dcr_wr_addr;
|
||||
assign dcr_bus_if.write_data = dcr_wr_data;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
|
||||
// Generate all clusters
|
||||
for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters
|
||||
|
||||
`RESET_RELAY (cluster_reset, reset);
|
||||
|
||||
VX_dcr_bus_if cluster_dcr_bus_if();
|
||||
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1));
|
||||
|
||||
VX_cluster #(
|
||||
.CLUSTER_ID (cluster_id),
|
||||
.INSTANCE_ID ($sformatf("cluster%0d", cluster_id))
|
||||
) cluster (
|
||||
`SCOPE_IO_BIND (scope_cluster + cluster_id)
|
||||
|
||||
.clk (clk),
|
||||
.reset (cluster_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
`endif
|
||||
|
||||
.dcr_bus_if (cluster_dcr_bus_if),
|
||||
|
||||
.mem_bus_if (per_cluster_mem_bus_if[cluster_id]),
|
||||
|
||||
.busy (per_cluster_busy[cluster_id])
|
||||
);
|
||||
end
|
||||
|
||||
`BUFFER_EX(busy, (| per_cluster_busy), 1'b1, (`NUM_CLUSTERS > 1));
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_pending_reads;
|
||||
mem_perf_t mem_perf;
|
||||
|
||||
for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_mem_pending_reads <= '0;
|
||||
end else begin
|
||||
perf_mem_pending_reads <= $signed(perf_mem_pending_reads) +
|
||||
`PERF_CTR_BITS'($signed(2'(mem_req_fire[i] && ~mem_bus_if[i].req_data.rw) - 2'(mem_rsp_fire[i])));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire mem_rd_req_fire[`NUM_MEM_PORTS-1:0];
|
||||
wire mem_wr_req_fire[`NUM_MEM_PORTS-1:0];
|
||||
|
||||
for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin
|
||||
assign mem_rd_req_fire[i] = mem_req_fire[i] && ~mem_bus_if[i].req_data.rw;
|
||||
assign mem_wr_req_fire[i] = mem_req_fire[i] && mem_bus_if[i].req_data.rw;
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
mem_perf <= '0;
|
||||
end else begin
|
||||
for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin
|
||||
mem_perf.reads <= mem_perf.reads + `PERF_CTR_BITS'(mem_rd_req_fire[i]);
|
||||
mem_perf.writes <= mem_perf.writes + `PERF_CTR_BITS'(mem_wr_req_fire[i]);
|
||||
end
|
||||
mem_perf.latency <= mem_perf.latency + perf_mem_pending_reads;
|
||||
end
|
||||
end
|
||||
assign mem_perf_if.mem = mem_perf;
|
||||
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_MEM
|
||||
always @(posedge clk) begin
|
||||
for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin
|
||||
if (mem_req_fire[i]) begin
|
||||
if (mem_req_rw[i])
|
||||
`TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], mem_req_data[i]));
|
||||
else
|
||||
`TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i]));
|
||||
end
|
||||
if (mem_rsp_fire[i]) begin
|
||||
`TRACE(1, ("%d: MEM Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag[i], mem_rsp_data[i]));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef SIMULATION
|
||||
always @(posedge clk) begin
|
||||
$fflush(); // flush stdout buffer
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
286
hw/rtl/cache/VX_cache_wrap_l3.sv
vendored
Normal file
286
hw/rtl/cache/VX_cache_wrap_l3.sv
vendored
Normal file
|
@ -0,0 +1,286 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_wrap_l3 import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
|
||||
parameter TAG_SEL_IDX = 0,
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 4,
|
||||
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 4096,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
// Number of memory ports
|
||||
parameter NUM_MEM_PORTS = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
parameter MREQ_SIZE = 4,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// enable bypass for non-cacheable addresses
|
||||
parameter NC_ENABLE = 0,
|
||||
|
||||
// Force bypass for all requests
|
||||
parameter PASSTHRU = 0,
|
||||
|
||||
// Core response output buffer
|
||||
parameter CORE_OUT_BUF = 0,
|
||||
|
||||
// Memory request output buffer
|
||||
parameter MEM_OUT_BUF = 0
|
||||
) (
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
output cache_perf_t cache_perf,
|
||||
`endif
|
||||
|
||||
VX_mem_bus_if.slave core_bus_if [NUM_REQS],
|
||||
VX_mem_bus_if.master mem_bus_if [NUM_MEM_PORTS]
|
||||
);
|
||||
|
||||
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
|
||||
|
||||
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
|
||||
localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
|
||||
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
|
||||
|
||||
localparam NC_OR_BYPASS = (NC_ENABLE || PASSTHRU);
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) core_bus_cache_if[NUM_REQS]();
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LINE_SIZE),
|
||||
.TAG_WIDTH (CACHE_MEM_TAG_WIDTH)
|
||||
) mem_bus_cache_if[NUM_MEM_PORTS]();
|
||||
|
||||
if (NC_OR_BYPASS) begin
|
||||
|
||||
`RESET_RELAY (nc_bypass_reset, reset);
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
VX_cache_bypass #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
|
||||
.PASSTHRU (PASSTHRU),
|
||||
.NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE),
|
||||
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
|
||||
.CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH),
|
||||
.CORE_TAG_WIDTH (TAG_WIDTH),
|
||||
|
||||
.MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH),
|
||||
.MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH),
|
||||
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH),
|
||||
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
|
||||
.CORE_OUT_BUF (CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (MEM_OUT_BUF)
|
||||
) cache_bypass (
|
||||
.clk (clk),
|
||||
.reset (nc_bypass_reset),
|
||||
|
||||
.core_bus_in_if (core_bus_if),
|
||||
.core_bus_out_if(core_bus_cache_if),
|
||||
|
||||
.mem_bus_in_if (mem_bus_cache_if[i]),
|
||||
.mem_bus_out_if (mem_bus_if[i])
|
||||
);
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if[i], mem_bus_cache_if[i]);
|
||||
end
|
||||
end
|
||||
|
||||
if (PASSTHRU != 0) begin
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
`UNUSED_VAR (core_bus_cache_if[i].req_valid)
|
||||
`UNUSED_VAR (core_bus_cache_if[i].req_data)
|
||||
assign core_bus_cache_if[i].req_ready = 0;
|
||||
|
||||
assign core_bus_cache_if[i].rsp_valid = 0;
|
||||
assign core_bus_cache_if[i].rsp_data = '0;
|
||||
`UNUSED_VAR (core_bus_cache_if[i].rsp_ready)
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
assign mem_bus_cache_if[i].req_valid = 0;
|
||||
assign mem_bus_cache_if[i].req_data = '0;
|
||||
`UNUSED_VAR (mem_bus_cache_if[i].req_ready)
|
||||
|
||||
`UNUSED_VAR (mem_bus_cache_if[i].rsp_valid)
|
||||
`UNUSED_VAR (mem_bus_cache_if[i].rsp_data)
|
||||
assign mem_bus_cache_if[i].rsp_ready = 0;
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign cache_perf = '0;
|
||||
`endif
|
||||
|
||||
end else begin
|
||||
|
||||
`RESET_RELAY (cache_reset, reset);
|
||||
|
||||
VX_cache #(
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.CRSQ_SIZE (CRSQ_SIZE),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.MRSQ_SIZE (MRSQ_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF)
|
||||
) cache (
|
||||
.clk (clk),
|
||||
.reset (cache_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (cache_perf),
|
||||
`endif
|
||||
.core_bus_if (core_bus_cache_if),
|
||||
.mem_bus_if (mem_bus_cache_if[0])
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
wire [`UP(UUID_WIDTH)-1:0] core_req_uuid;
|
||||
wire [`UP(UUID_WIDTH)-1:0] core_rsp_uuid;
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign core_req_uuid = core_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign core_rsp_uuid = core_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign core_req_uuid = 0;
|
||||
assign core_rsp_uuid = 0;
|
||||
end
|
||||
|
||||
wire core_req_fire = core_bus_if[i].req_valid && core_bus_if[i].req_ready;
|
||||
wire core_rsp_fire = core_bus_if[i].rsp_valid && core_bus_if[i].rsp_ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (core_req_fire) begin
|
||||
if (core_bus_if[i].req_data.rw)
|
||||
`TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid));
|
||||
else
|
||||
`TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid));
|
||||
end
|
||||
if (core_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [NUM_MEM_PORTS-1:0][`UP(UUID_WIDTH)-1:0] mem_req_uuid;
|
||||
wire [NUM_MEM_PORTS-1:0][`UP(UUID_WIDTH)-1:0] mem_rsp_uuid;
|
||||
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin
|
||||
assign mem_req_uuid[i] = mem_bus_if[i].req_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign mem_rsp_uuid[i] = mem_bus_if[i].rsp_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign mem_req_uuid[i] = 0;
|
||||
assign mem_rsp_uuid[i] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
wire mem_req_fire [NUM_MEM_PORTS-1:0];
|
||||
wire mem_rsp_fire [NUM_MEM_PORTS-1:0];
|
||||
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
assign mem_req_fire[i] = mem_bus_if[i].req_valid && mem_bus_if[i].req_ready;
|
||||
assign mem_rsp_fire[i] = mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (mem_req_fire[i]) begin
|
||||
if (mem_bus_if[i].req_data.rw)
|
||||
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d) bank=%d\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, mem_req_uuid[i], i));
|
||||
else
|
||||
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d) bank=%d\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag, mem_req_uuid[i], i));
|
||||
end
|
||||
if (mem_rsp_fire[i]) begin
|
||||
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data, mem_rsp_uuid[i]));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
|
@ -43,7 +43,7 @@ ifdef AXI_BUS
|
|||
TOP = Vortex_axi
|
||||
CXXFLAGS += -DAXI_BUS
|
||||
else
|
||||
TOP = Vortex
|
||||
TOP = Vortex_hbm
|
||||
endif
|
||||
|
||||
VL_FLAGS = --exe
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
#include "VVortex_axi.h"
|
||||
typedef VVortex_axi Device;
|
||||
#else
|
||||
#include "VVortex.h"
|
||||
typedef VVortex Device;
|
||||
#include "VVortex_hbm.h"
|
||||
typedef VVortex_hbm Device;
|
||||
#endif
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
|
@ -123,6 +123,15 @@ public:
|
|||
tfp_->open("trace.vcd");
|
||||
#endif
|
||||
|
||||
pending_mem_reqs_.resize(NUM_MEM_PORTS);
|
||||
dram_queue_.resize(NUM_MEM_PORTS);
|
||||
|
||||
mem_rd_rsp_active_.resize(NUM_MEM_PORTS);
|
||||
mem_rd_rsp_ready_.resize(NUM_MEM_PORTS);
|
||||
|
||||
mem_wr_rsp_active_.resize(NUM_MEM_PORTS);
|
||||
mem_wr_rsp_ready_.resize(NUM_MEM_PORTS);
|
||||
|
||||
ram_ = nullptr;
|
||||
|
||||
#ifndef NDEBUG
|
||||
|
@ -210,16 +219,19 @@ private:
|
|||
|
||||
print_bufs_.clear();
|
||||
|
||||
pending_mem_reqs_.clear();
|
||||
for (int i = 0; i < NUM_MEM_PORTS; ++i) {
|
||||
|
||||
{
|
||||
std::queue<mem_req_t*> empty;
|
||||
std::swap(dram_queue_, empty);
|
||||
pending_mem_reqs_.at(i).clear();
|
||||
|
||||
{
|
||||
std::queue<mem_req_t*> empty;
|
||||
std::swap(dram_queue_.at(i), empty);
|
||||
}
|
||||
|
||||
mem_rd_rsp_active_.at(i) = false;
|
||||
mem_wr_rsp_active_.at(i) = false;
|
||||
}
|
||||
|
||||
mem_rd_rsp_active_ = false;
|
||||
mem_wr_rsp_active_ = false;
|
||||
|
||||
this->mem_bus_reset();
|
||||
|
||||
this->dcr_bus_reset();
|
||||
|
@ -250,17 +262,19 @@ private:
|
|||
|
||||
dram_sim_.tick();
|
||||
|
||||
if (!dram_queue_.empty()) {
|
||||
auto mem_req = dram_queue_.front();
|
||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) {
|
||||
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
||||
if (orig_req->ready) {
|
||||
delete orig_req;
|
||||
} else {
|
||||
orig_req->ready = true;
|
||||
for (int i = 0; i < NUM_MEM_PORTS; ++i) {
|
||||
if (!dram_queue_.at(i).empty()) {
|
||||
auto mem_req = dram_queue_.at(i).front();
|
||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) {
|
||||
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
||||
if (orig_req->ready) {
|
||||
delete orig_req;
|
||||
} else {
|
||||
orig_req->ready = true;
|
||||
}
|
||||
}, mem_req)) {
|
||||
dram_queue_.at(i).pop();
|
||||
}
|
||||
}, mem_req)) {
|
||||
dram_queue_.pop();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -437,116 +451,126 @@ private:
|
|||
#else
|
||||
|
||||
void mem_bus_reset() {
|
||||
device_->mem_req_ready = 0;
|
||||
device_->mem_rsp_valid = 0;
|
||||
for (int i = 0; i < NUM_MEM_PORTS; ++i) {
|
||||
device_->mem_req_ready[i] = 0;
|
||||
device_->mem_rsp_valid[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void mem_bus_eval(bool clk) {
|
||||
if (!clk) {
|
||||
mem_rd_rsp_ready_ = device_->mem_rsp_ready;
|
||||
return;
|
||||
for (int i = 0; i < NUM_MEM_PORTS; ++i) {
|
||||
if (!clk) {
|
||||
mem_rd_rsp_ready_.at(i) = device_->mem_rsp_ready[i];
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (ram_ == nullptr) {
|
||||
device_->mem_req_ready = 0;
|
||||
return;
|
||||
for (int i = 0; i < NUM_MEM_PORTS; ++i) {
|
||||
if (ram_ == nullptr) {
|
||||
device_->mem_req_ready[i] = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// process memory read responses
|
||||
if (mem_rd_rsp_active_
|
||||
&& device_->mem_rsp_valid && mem_rd_rsp_ready_) {
|
||||
mem_rd_rsp_active_ = false;
|
||||
}
|
||||
if (!mem_rd_rsp_active_) {
|
||||
if (!pending_mem_reqs_.empty()
|
||||
&& (*pending_mem_reqs_.begin())->ready) {
|
||||
device_->mem_rsp_valid = 1;
|
||||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr);
|
||||
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||
printf("%02x", mem_rsp->block[i]);
|
||||
for (int i = 0; i < NUM_MEM_PORTS; ++i) {
|
||||
if (mem_rd_rsp_active_.at(i)
|
||||
&& device_->mem_rsp_valid[i] && mem_rd_rsp_ready_.at(i)) {
|
||||
mem_rd_rsp_active_.at(i) = false;
|
||||
}
|
||||
if (!mem_rd_rsp_active_.at(i)) {
|
||||
if (!pending_mem_reqs_.at(i).empty()
|
||||
&& (*pending_mem_reqs_.at(i).begin())->ready) {
|
||||
device_->mem_rsp_valid[i] = 1;
|
||||
auto mem_rsp_it = pending_mem_reqs_.at(i).begin();
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr);
|
||||
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||
printf("%02x", mem_rsp->block[i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
memcpy(VDataCast<void*, MEM_BLOCK_SIZE>::get(device_->mem_rsp_data[i]), mem_rsp->block.data(), MEM_BLOCK_SIZE);
|
||||
device_->mem_rsp_tag[i] = mem_rsp->tag;
|
||||
pending_mem_reqs_.at(i).erase(mem_rsp_it);
|
||||
mem_rd_rsp_active_.at(i) = true;
|
||||
delete mem_rsp;
|
||||
} else {
|
||||
device_->mem_rsp_valid[i] = 0;
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
memcpy(VDataCast<void*, MEM_BLOCK_SIZE>::get(device_->mem_rsp_data), mem_rsp->block.data(), MEM_BLOCK_SIZE);
|
||||
device_->mem_rsp_tag = mem_rsp->tag;
|
||||
pending_mem_reqs_.erase(mem_rsp_it);
|
||||
mem_rd_rsp_active_ = true;
|
||||
delete mem_rsp;
|
||||
} else {
|
||||
device_->mem_rsp_valid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// process memory requests
|
||||
if (device_->mem_req_valid && running_) {
|
||||
uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE);
|
||||
if (device_->mem_req_rw) {
|
||||
auto byteen = device_->mem_req_byteen;
|
||||
auto data = VDataCast<uint8_t*, MEM_BLOCK_SIZE>::get(device_->mem_req_data);
|
||||
for (int j = 0; j < NUM_MEM_PORTS; ++j) {
|
||||
if (device_->mem_req_valid[j] && running_) {
|
||||
uint64_t byte_addr = (device_->mem_req_addr[j] * MEM_BLOCK_SIZE);
|
||||
if (device_->mem_req_rw[j]) {
|
||||
auto byteen = device_->mem_req_byteen[j];
|
||||
auto data = VDataCast<uint8_t*, MEM_BLOCK_SIZE>::get(device_->mem_req_data[j]);
|
||||
|
||||
if (byte_addr >= uint64_t(IO_COUT_ADDR)
|
||||
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
|
||||
// process console output
|
||||
for (int i = 0; i < IO_COUT_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
auto& ss_buf = print_bufs_[i];
|
||||
char c = data[i];
|
||||
ss_buf << c;
|
||||
if (c == '\n') {
|
||||
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
|
||||
ss_buf.str("");
|
||||
if (byte_addr >= uint64_t(IO_COUT_ADDR)
|
||||
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
|
||||
// process console output
|
||||
for (int i = 0; i < IO_COUT_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
auto& ss_buf = print_bufs_[i];
|
||||
char c = data[i];
|
||||
ss_buf << c;
|
||||
if (c == '\n') {
|
||||
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
|
||||
ss_buf.str("");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// process writes
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr);
|
||||
for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
|
||||
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
|
||||
}
|
||||
printf(", data=0x");
|
||||
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||
printf("%d=%02x,", i, data[i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[byte_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = device_->mem_req_tag[j];
|
||||
mem_req->addr = byte_addr;
|
||||
mem_req->write = true;
|
||||
mem_req->ready = true;
|
||||
|
||||
// send dram request
|
||||
dram_queue_.at(j).push(mem_req);
|
||||
}
|
||||
} else {
|
||||
// process writes
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr);
|
||||
for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
|
||||
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
|
||||
}
|
||||
printf(", data=0x");
|
||||
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||
printf("%d=%02x,", i, data[i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[byte_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
// process reads
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = device_->mem_req_tag;
|
||||
mem_req->tag = device_->mem_req_tag[j];
|
||||
mem_req->addr = byte_addr;
|
||||
mem_req->write = true;
|
||||
mem_req->ready = true;
|
||||
mem_req->write = false;
|
||||
mem_req->ready = false;
|
||||
ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE);
|
||||
pending_mem_reqs_.at(j).emplace_back(mem_req);
|
||||
|
||||
//printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
|
||||
|
||||
// send dram request
|
||||
dram_queue_.push(mem_req);
|
||||
dram_queue_.at(j).push(mem_req);
|
||||
}
|
||||
} else {
|
||||
// process reads
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = device_->mem_req_tag;
|
||||
mem_req->addr = byte_addr;
|
||||
mem_req->write = false;
|
||||
mem_req->ready = false;
|
||||
ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE);
|
||||
pending_mem_reqs_.emplace_back(mem_req);
|
||||
|
||||
//printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
|
||||
|
||||
// send dram request
|
||||
dram_queue_.push(mem_req);
|
||||
}
|
||||
}
|
||||
|
||||
device_->mem_req_ready = running_;
|
||||
device_->mem_req_ready[j] = running_;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -583,9 +607,9 @@ private:
|
|||
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
|
||||
std::list<mem_req_t*> pending_mem_reqs_;
|
||||
std::vector<std::list<mem_req_t*>> pending_mem_reqs_;
|
||||
|
||||
std::queue<mem_req_t*> dram_queue_;
|
||||
std::vector<std::queue<mem_req_t*>> dram_queue_;
|
||||
|
||||
DramSim dram_sim_;
|
||||
|
||||
|
@ -597,11 +621,11 @@ private:
|
|||
|
||||
RAM* ram_;
|
||||
|
||||
bool mem_rd_rsp_active_;
|
||||
bool mem_rd_rsp_ready_;
|
||||
std::vector<bool> mem_rd_rsp_active_;
|
||||
std::vector<bool> mem_rd_rsp_ready_;
|
||||
|
||||
bool mem_wr_rsp_active_;
|
||||
bool mem_wr_rsp_ready_;
|
||||
std::vector<bool> mem_wr_rsp_active_;
|
||||
std::vector<bool> mem_wr_rsp_ready_;
|
||||
|
||||
bool running_;
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue