mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'master' into tensor-core
This commit is contained in:
commit
119805a959
8 changed files with 2222 additions and 2 deletions
229
hw/rtl/Vortex_hbm.sv
Normal file
229
hw/rtl/Vortex_hbm.sv
Normal file
|
@ -0,0 +1,229 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module Vortex_hbm import VX_gpu_pkg::*; (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Memory request
|
||||
output wire mem_req_valid [`NUM_MEM_PORTS],
|
||||
output wire mem_req_rw [`NUM_MEM_PORTS],
|
||||
output wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen [`NUM_MEM_PORTS],
|
||||
output wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr [`NUM_MEM_PORTS],
|
||||
output wire [`VX_MEM_DATA_WIDTH-1:0] mem_req_data [`NUM_MEM_PORTS],
|
||||
output wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag [`NUM_MEM_PORTS],
|
||||
input wire mem_req_ready [`NUM_MEM_PORTS],
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid [`NUM_MEM_PORTS],
|
||||
input wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data [`NUM_MEM_PORTS],
|
||||
input wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag [`NUM_MEM_PORTS],
|
||||
output wire mem_rsp_ready [`NUM_MEM_PORTS],
|
||||
|
||||
// DCR write request
|
||||
input wire dcr_wr_valid,
|
||||
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
|
||||
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
|
||||
`ifdef SCOPE
|
||||
localparam scope_cluster = 0;
|
||||
`SCOPE_IO_SWITCH (`NUM_CLUSTERS);
|
||||
`endif
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if mem_perf_if();
|
||||
assign mem_perf_if.icache = 'x;
|
||||
assign mem_perf_if.dcache = 'x;
|
||||
assign mem_perf_if.l2cache = 'x;
|
||||
assign mem_perf_if.lmem = 'x;
|
||||
`endif
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (`L2_LINE_SIZE),
|
||||
.TAG_WIDTH (L2_MEM_TAG_WIDTH)
|
||||
) per_cluster_mem_bus_if[`NUM_CLUSTERS]();
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (`L3_LINE_SIZE),
|
||||
.TAG_WIDTH (L3_MEM_TAG_WIDTH)
|
||||
) mem_bus_if[`NUM_MEM_PORTS]();
|
||||
|
||||
`RESET_RELAY (l3_reset, reset);
|
||||
|
||||
VX_cache_wrap_l3 #(
|
||||
.INSTANCE_ID ("l3cache"),
|
||||
.CACHE_SIZE (`L3_CACHE_SIZE),
|
||||
.LINE_SIZE (`L3_LINE_SIZE),
|
||||
.NUM_BANKS (`L3_NUM_BANKS),
|
||||
.NUM_WAYS (`L3_NUM_WAYS),
|
||||
.WORD_SIZE (L3_WORD_SIZE),
|
||||
.NUM_MEM_PORTS (`NUM_MEM_PORTS),
|
||||
.NUM_REQS (L3_NUM_REQS),
|
||||
.CRSQ_SIZE (`L3_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L3_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L3_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L3_WRITEBACK ? `L3_MSHR_SIZE : `L3_MREQ_SIZE),
|
||||
.TAG_WIDTH (L2_MEM_TAG_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`L3_WRITEBACK),
|
||||
.DIRTY_BYTES (`L3_WRITEBACK),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2),
|
||||
.NC_ENABLE (1),
|
||||
.PASSTHRU (!`L3_ENABLED)
|
||||
) l3cache (
|
||||
.clk (clk),
|
||||
.reset (l3_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (mem_perf_if.l3cache),
|
||||
`endif
|
||||
|
||||
.core_bus_if (per_cluster_mem_bus_if),
|
||||
.mem_bus_if (mem_bus_if)
|
||||
);
|
||||
|
||||
wire mem_req_fire[`NUM_MEM_PORTS-1:0];
|
||||
wire mem_rsp_fire[`NUM_MEM_PORTS-1:0];
|
||||
|
||||
for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin
|
||||
assign mem_req_valid[i] = mem_bus_if[i].req_valid;
|
||||
assign mem_req_rw[i] = mem_bus_if[i].req_data.rw;
|
||||
assign mem_req_byteen[i]= mem_bus_if[i].req_data.byteen;
|
||||
assign mem_req_addr[i] = mem_bus_if[i].req_data.addr;
|
||||
assign mem_req_data[i] = mem_bus_if[i].req_data.data;
|
||||
assign mem_req_tag[i] = mem_bus_if[i].req_data.tag;
|
||||
assign mem_bus_if[i].req_ready = mem_req_ready[i];
|
||||
`UNUSED_VAR (mem_bus_if[i].req_data.atype)
|
||||
|
||||
assign mem_bus_if[i].rsp_valid = mem_rsp_valid[i];
|
||||
assign mem_bus_if[i].rsp_data.data = mem_rsp_data[i];
|
||||
assign mem_bus_if[i].rsp_data.tag = mem_rsp_tag[i];
|
||||
assign mem_rsp_ready[i] = mem_bus_if[i].rsp_ready;
|
||||
|
||||
assign mem_req_fire[i] = mem_req_valid[i] && mem_req_ready[i];
|
||||
assign mem_rsp_fire[i] = mem_rsp_valid[i] && mem_rsp_ready[i];
|
||||
`UNUSED_VAR (mem_req_fire[i])
|
||||
`UNUSED_VAR (mem_rsp_fire[i])
|
||||
end
|
||||
|
||||
VX_dcr_bus_if dcr_bus_if();
|
||||
assign dcr_bus_if.write_valid = dcr_wr_valid;
|
||||
assign dcr_bus_if.write_addr = dcr_wr_addr;
|
||||
assign dcr_bus_if.write_data = dcr_wr_data;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
|
||||
// Generate all clusters
|
||||
for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters
|
||||
|
||||
`RESET_RELAY (cluster_reset, reset);
|
||||
|
||||
VX_dcr_bus_if cluster_dcr_bus_if();
|
||||
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1));
|
||||
|
||||
VX_cluster #(
|
||||
.CLUSTER_ID (cluster_id),
|
||||
.INSTANCE_ID ($sformatf("cluster%0d", cluster_id))
|
||||
) cluster (
|
||||
`SCOPE_IO_BIND (scope_cluster + cluster_id)
|
||||
|
||||
.clk (clk),
|
||||
.reset (cluster_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
`endif
|
||||
|
||||
.dcr_bus_if (cluster_dcr_bus_if),
|
||||
|
||||
.mem_bus_if (per_cluster_mem_bus_if[cluster_id]),
|
||||
|
||||
.busy (per_cluster_busy[cluster_id])
|
||||
);
|
||||
end
|
||||
|
||||
`BUFFER_EX(busy, (| per_cluster_busy), 1'b1, (`NUM_CLUSTERS > 1));
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_pending_reads;
|
||||
mem_perf_t mem_perf;
|
||||
|
||||
for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_mem_pending_reads <= '0;
|
||||
end else begin
|
||||
perf_mem_pending_reads <= $signed(perf_mem_pending_reads) +
|
||||
`PERF_CTR_BITS'($signed(2'(mem_req_fire[i] && ~mem_bus_if[i].req_data.rw) - 2'(mem_rsp_fire[i])));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire mem_rd_req_fire[`NUM_MEM_PORTS-1:0];
|
||||
wire mem_wr_req_fire[`NUM_MEM_PORTS-1:0];
|
||||
|
||||
for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin
|
||||
assign mem_rd_req_fire[i] = mem_req_fire[i] && ~mem_bus_if[i].req_data.rw;
|
||||
assign mem_wr_req_fire[i] = mem_req_fire[i] && mem_bus_if[i].req_data.rw;
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
mem_perf <= '0;
|
||||
end else begin
|
||||
for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin
|
||||
mem_perf.reads <= mem_perf.reads + `PERF_CTR_BITS'(mem_rd_req_fire[i]);
|
||||
mem_perf.writes <= mem_perf.writes + `PERF_CTR_BITS'(mem_wr_req_fire[i]);
|
||||
end
|
||||
mem_perf.latency <= mem_perf.latency + perf_mem_pending_reads;
|
||||
end
|
||||
end
|
||||
assign mem_perf_if.mem = mem_perf;
|
||||
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_MEM
|
||||
always @(posedge clk) begin
|
||||
for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin
|
||||
if (mem_req_fire[i]) begin
|
||||
if (mem_req_rw[i])
|
||||
`TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h, bank=%d\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], mem_req_data[i], i));
|
||||
else
|
||||
`TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h, bank=%d\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], i));
|
||||
end
|
||||
if (mem_rsp_fire[i]) begin
|
||||
`TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag[i], mem_rsp_data[i]));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef SIMULATION
|
||||
always @(posedge clk) begin
|
||||
$fflush(); // flush stdout buffer
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
2
hw/rtl/cache/VX_cache_bypass.sv
vendored
2
hw/rtl/cache/VX_cache_bypass.sv
vendored
|
@ -247,7 +247,9 @@ module VX_cache_bypass #(
|
|||
assign is_mem_rsp_nc = 1'b0;
|
||||
end
|
||||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc;
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
VX_bits_remove #(
|
||||
.N (MEM_TAG_OUT_WIDTH),
|
||||
|
|
355
hw/rtl/cache/VX_cache_bypass_l3.sv
vendored
Normal file
355
hw/rtl/cache/VX_cache_bypass_l3.sv
vendored
Normal file
|
@ -0,0 +1,355 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_bypass_l3 #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter NUM_OUTPUTS = 1,
|
||||
parameter TAG_SEL_IDX = 0,
|
||||
|
||||
parameter PASSTHRU = 0,
|
||||
parameter NC_ENABLE = 0,
|
||||
|
||||
parameter WORD_SIZE = 1,
|
||||
parameter LINE_SIZE = 1,
|
||||
|
||||
parameter CORE_ADDR_WIDTH = 1,
|
||||
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
|
||||
parameter MEM_ADDR_WIDTH = 1,
|
||||
parameter MEM_TAG_IN_WIDTH = 1,
|
||||
parameter MEM_TAG_OUT_WIDTH = 1,
|
||||
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
parameter CORE_OUT_BUF = 0,
|
||||
parameter MEM_OUT_BUF = 0,
|
||||
|
||||
parameter CORE_DATA_WIDTH = WORD_SIZE * 8
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Core request in
|
||||
VX_mem_bus_if.slave core_bus_in_if [NUM_REQS],
|
||||
|
||||
// Core request out
|
||||
VX_mem_bus_if.master core_bus_out_if [NUM_REQS],
|
||||
|
||||
// Memory request in
|
||||
VX_mem_bus_if.slave mem_bus_in_if,
|
||||
|
||||
// Memory request out
|
||||
VX_mem_bus_if.master mem_bus_out_if
|
||||
);
|
||||
localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1);
|
||||
|
||||
localparam REQ_SEL_BITS = `CLOG2(NUM_REQS);
|
||||
localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `ADDR_TYPE_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH;
|
||||
|
||||
localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE;
|
||||
localparam WSEL_BITS = `CLOG2(WORDS_PER_LINE);
|
||||
|
||||
localparam CORE_TAG_ID_BITS = CORE_TAG_WIDTH - UUID_WIDTH;
|
||||
localparam MEM_TAG_ID_BITS = REQ_SEL_BITS + WSEL_BITS + CORE_TAG_ID_BITS;
|
||||
localparam MEM_TAG_BYPASS_BITS = UUID_WIDTH + MEM_TAG_ID_BITS;
|
||||
|
||||
`STATIC_ASSERT(0 == (`IO_BASE_ADDR % `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
|
||||
// handle core requests ///////////////////////////////////////////////////
|
||||
|
||||
wire core_req_nc_valid;
|
||||
wire [NUM_REQS-1:0] core_req_nc_valids;
|
||||
wire [NUM_REQS-1:0] core_req_nc_idxs;
|
||||
wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx;
|
||||
wire [NUM_REQS-1:0] core_req_nc_sel;
|
||||
wire [NUM_REQS-1:0] core_req_nc_ready;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (PASSTHRU != 0) begin
|
||||
assign core_req_nc_idxs[i] = 1'b1;
|
||||
end else if (NC_ENABLE) begin
|
||||
assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO];
|
||||
end else begin
|
||||
assign core_req_nc_idxs[i] = 1'b0;
|
||||
end
|
||||
assign core_req_nc_valids[i] = core_bus_in_if[i].req_valid && core_req_nc_idxs[i];
|
||||
end
|
||||
|
||||
/*
|
||||
|
||||
VX_generic_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.TYPE (PASSTHRU ? "R" : "P")
|
||||
) core_req_nc_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (core_req_nc_valids),
|
||||
.grant_index (core_req_nc_idx),
|
||||
.grant_onehot (core_req_nc_sel),
|
||||
.grant_valid (core_req_nc_valid),
|
||||
.grant_ready (core_req_nc_ready)
|
||||
);
|
||||
*/
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && ~core_req_nc_idxs[i];
|
||||
assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data;
|
||||
assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i])
|
||||
: core_bus_out_if[i].req_ready;
|
||||
end
|
||||
|
||||
// handle memory requests /////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_OUTPUTS-1:0] mem_req_out_valid;
|
||||
wire [NUM_OUTPUTS-1:0] mem_req_out_rw;
|
||||
wire [NUM_OUTPUTS-1:0][LINE_SIZE-1:0] mem_req_out_byteen;
|
||||
wire [NUM_OUTPUTS-1:0][`CS_MEM_ADDR_WIDTH-1:0] mem_req_out_addr;
|
||||
wire [NUM_OUTPUTS-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_out_atype;
|
||||
wire [NUM_OUTPUTS-1:0][`CS_LINE_WIDTH-1:0] mem_req_out_data;
|
||||
wire [NUM_OUTPUTS-1:0][MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag;
|
||||
wire [NUM_OUTPUTS-1:0] mem_req_out_ready;
|
||||
|
||||
wire [NUM_REQS-1:0] core_req_nc_sel_rw;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_nc_sel_byteen;
|
||||
wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr;
|
||||
wire [NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] core_req_nc_sel_atype;
|
||||
wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_nc_sel_data;
|
||||
wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_nc_sel_tag;
|
||||
|
||||
wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_nc_mux_in[i] = {
|
||||
core_bus_in_if[i].req_data.rw,
|
||||
core_bus_in_if[i].req_data.byteen,
|
||||
core_bus_in_if[i].req_data.addr,
|
||||
core_bus_in_if[i].req_data.atype,
|
||||
core_bus_in_if[i].req_data.data,
|
||||
core_bus_in_if[i].req_data.tag
|
||||
};
|
||||
end
|
||||
|
||||
assign {
|
||||
core_req_nc_sel_rw,
|
||||
core_req_nc_sel_byteen,
|
||||
core_req_nc_sel_addr,
|
||||
core_req_nc_sel_atype,
|
||||
core_req_nc_sel_data,
|
||||
core_req_nc_sel_tag
|
||||
} = core_req_nc_mux_in;
|
||||
|
||||
assign core_req_nc_ready = ~mem_bus_in_if.req_valid && mem_req_out_ready;
|
||||
|
||||
assign mem_req_out_valid = mem_bus_in_if.req_valid || core_req_nc_valid;
|
||||
assign mem_req_out_rw = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.rw : core_req_nc_sel_rw;
|
||||
assign mem_req_out_addr = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.addr : core_req_nc_sel_addr[WSEL_BITS +: MEM_ADDR_WIDTH];
|
||||
assign mem_req_out_atype = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.atype : core_req_nc_sel_atype;
|
||||
|
||||
wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass;
|
||||
|
||||
wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0];
|
||||
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_r;
|
||||
reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r;
|
||||
|
||||
wire [WSEL_BITS-1:0] req_wsel = core_req_nc_sel_addr[WSEL_BITS-1:0];
|
||||
|
||||
always @(*) begin
|
||||
mem_req_byteen_in_r = '0;
|
||||
mem_req_byteen_in_r[req_wsel] = core_req_nc_sel_byteen;
|
||||
|
||||
mem_req_data_in_r = 'x;
|
||||
mem_req_data_in_r[req_wsel] = core_req_nc_sel_data;
|
||||
end
|
||||
|
||||
assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_r;
|
||||
assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_r;
|
||||
if (NUM_REQS > 1) begin
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id});
|
||||
end else begin
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id});
|
||||
end
|
||||
end else begin
|
||||
assign mem_req_out_byteen = mem_bus_in_if[0].req_valid ? mem_bus_in_if[0].req_data.byteen : core_req_nc_sel_byteen;
|
||||
assign mem_req_out_data = mem_bus_in_if[0].req_valid ? mem_bus_in_if[0].req_data.data : core_req_nc_sel_data;
|
||||
if (NUM_REQS > 1) begin
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, core_req_in_id});
|
||||
end else begin
|
||||
assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_in_id});
|
||||
end
|
||||
end
|
||||
|
||||
wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass;
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign mem_req_tag_bypass = {core_req_nc_sel_tag[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass};
|
||||
end else begin
|
||||
assign mem_req_tag_bypass = mem_req_tag_id_bypass;
|
||||
end
|
||||
|
||||
if (PASSTHRU != 0) begin
|
||||
assign mem_req_out_tag = mem_req_tag_bypass;
|
||||
`UNUSED_VAR (mem_bus_in_if[0].req_data.tag)
|
||||
end else begin
|
||||
if (NC_ENABLE) begin
|
||||
VX_bits_insert #(
|
||||
.N (MEM_TAG_OUT_WIDTH-1),
|
||||
.S (1),
|
||||
.POS (TAG_SEL_IDX)
|
||||
) mem_req_tag_in_nc_insert (
|
||||
.data_in (mem_bus_in_if[0].req_valid ? (MEM_TAG_OUT_WIDTH-1)'(mem_bus_in_if[0].req_data.tag) : (MEM_TAG_OUT_WIDTH-1)'(mem_req_tag_bypass)),
|
||||
.ins_in (~mem_bus_in_if[0].req_valid),
|
||||
.data_out (mem_req_out_tag)
|
||||
);
|
||||
end else begin
|
||||
assign mem_req_out_tag = mem_bus_in_if[0].req_data.tag;
|
||||
end
|
||||
end
|
||||
|
||||
assign mem_bus_in_if[0].req_ready = mem_req_out_ready;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `ADDR_TYPE_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH),
|
||||
.SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_out_valid),
|
||||
.ready_in (mem_req_out_ready),
|
||||
.data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}),
|
||||
.data_out ({mem_bus_out_if[0].req_data.rw, mem_bus_out_if[0].req_data.byteen, mem_bus_out_if[0].req_data.addr, mem_bus_out_if[0].req_data.atype, mem_bus_out_if[0].req_data.data, mem_bus_out_if[0].req_data.tag}),
|
||||
.valid_out (mem_bus_out_if[0].req_valid),
|
||||
.ready_out (mem_bus_out_if[0].req_ready)
|
||||
);
|
||||
|
||||
// handle core responses //////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_REQS-1:0] core_rsp_in_valid;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_in_data;
|
||||
wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_in_tag;
|
||||
wire [NUM_REQS-1:0] core_rsp_in_ready;
|
||||
|
||||
wire is_mem_rsp_nc;
|
||||
if (PASSTHRU != 0) begin
|
||||
assign is_mem_rsp_nc = mem_bus_out_if[0].rsp_valid;
|
||||
end else begin
|
||||
if (NC_ENABLE) begin
|
||||
assign is_mem_rsp_nc = mem_bus_out_if[0].rsp_valid && mem_bus_out_if[0].rsp_data.tag[TAG_SEL_IDX];
|
||||
end else begin
|
||||
assign is_mem_rsp_nc = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc;
|
||||
|
||||
VX_bits_remove #(
|
||||
.N (MEM_TAG_OUT_WIDTH),
|
||||
.S (NC_ENABLE),
|
||||
.POS (TAG_SEL_IDX)
|
||||
) mem_rsp_tag_in_nc_remove (
|
||||
.data_in (mem_bus_out_if[0].rsp_data.tag),
|
||||
.data_out (mem_rsp_tag_id_nc)
|
||||
);
|
||||
|
||||
wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx;
|
||||
if (NUM_REQS > 1) begin
|
||||
assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS];
|
||||
end else begin
|
||||
assign rsp_idx = 1'b0;
|
||||
end
|
||||
|
||||
reg [NUM_REQS-1:0] rsp_nc_valid_r;
|
||||
always @(*) begin
|
||||
rsp_nc_valid_r = '0;
|
||||
rsp_nc_valid_r[rsp_idx] = is_mem_rsp_nc;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i];
|
||||
assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i];
|
||||
end
|
||||
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS];
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ?
|
||||
core_bus_out_if[i].rsp_data.data : mem_bus_out_if[0].rsp_data.data[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
|
||||
end
|
||||
end else begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.data : mem_bus_out_if[0].rsp_data.data;
|
||||
end
|
||||
end
|
||||
|
||||
wire [(CORE_TAG_ID_BITS + UUID_WIDTH)-1:0] mem_rsp_tag_in_nc2;
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign mem_rsp_tag_in_nc2 = {mem_rsp_tag_id_nc[(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1 -: UUID_WIDTH], mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]};
|
||||
end else begin
|
||||
assign mem_rsp_tag_in_nc2 = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (PASSTHRU) begin
|
||||
assign core_rsp_in_tag[i] = mem_rsp_tag_in_nc2;
|
||||
end else if (NC_ENABLE) begin
|
||||
assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.tag : mem_rsp_tag_in_nc2;
|
||||
end else begin
|
||||
assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_data.tag;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`CS_WORD_WIDTH + CORE_TAG_WIDTH),
|
||||
.SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
||||
) core_rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_rsp_in_valid[i]),
|
||||
.ready_in (core_rsp_in_ready[i]),
|
||||
.data_in ({core_rsp_in_data[i], core_rsp_in_tag[i]}),
|
||||
.data_out ({core_bus_in_if[i].rsp_data.data, core_bus_in_if[i].rsp_data.tag}),
|
||||
.valid_out (core_bus_in_if[i].rsp_valid),
|
||||
.ready_out (core_bus_in_if[i].rsp_ready)
|
||||
);
|
||||
end
|
||||
|
||||
// handle memory responses ////////////////////////////////////////////////
|
||||
|
||||
if (PASSTHRU != 0) begin
|
||||
assign mem_bus_in_if[0].rsp_valid = 1'b0;
|
||||
assign mem_bus_in_if[0].rsp_data.data = '0;
|
||||
assign mem_bus_in_if[0].rsp_data.tag = '0;
|
||||
end else if (NC_ENABLE) begin
|
||||
assign mem_bus_in_if[0].rsp_valid = mem_bus_out_if[0].rsp_valid && ~mem_bus_out_if[0].rsp_data.tag[TAG_SEL_IDX];
|
||||
assign mem_bus_in_if[0].rsp_data.data = mem_bus_out_if[0].rsp_data.data;
|
||||
assign mem_bus_in_if[0].rsp_data.tag = mem_rsp_tag_id_nc[MEM_TAG_IN_WIDTH-1:0];
|
||||
end else begin
|
||||
assign mem_bus_in_if[0].rsp_valid = mem_bus_out_if[0].rsp_valid;
|
||||
assign mem_bus_in_if[0].rsp_data.data = mem_bus_out_if[0].rsp_data.data;
|
||||
assign mem_bus_in_if[0].rsp_data.tag = mem_rsp_tag_id_nc;
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0] core_rsp_out_valid;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_out_valid[i] = core_bus_out_if[i].rsp_valid;
|
||||
end
|
||||
|
||||
assign mem_bus_out_if[0].rsp_ready = is_mem_rsp_nc ? (~core_rsp_out_valid[rsp_idx] && core_rsp_in_ready[rsp_idx]) : mem_bus_in_if[0].rsp_ready;
|
||||
|
||||
endmodule
|
640
hw/rtl/cache/VX_cache_l3.sv
vendored
Normal file
640
hw/rtl/cache/VX_cache_l3.sv
vendored
Normal file
|
@ -0,0 +1,640 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_l3 import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 4,
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 4096,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of memory ports
|
||||
parameter NUM_MEM_PORTS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = `XLEN/8,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
parameter MREQ_SIZE = 4,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// Core response output register
|
||||
parameter CORE_OUT_BUF = 0,
|
||||
|
||||
// Memory request output register
|
||||
parameter MEM_OUT_BUF = 0
|
||||
) (
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
output cache_perf_t cache_perf,
|
||||
`endif
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_mem_bus_if.slave core_bus_if [NUM_REQS],
|
||||
VX_mem_bus_if.master mem_bus_if [NUM_MEM_PORTS]
|
||||
);
|
||||
|
||||
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter: number of banks must be power of 2"))
|
||||
`STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter: writeback requires write enable"))
|
||||
`STATIC_ASSERT(WRITEBACK || !DIRTY_BYTES, ("invalid parameter: dirty bytes require writeback"))
|
||||
|
||||
// In writeback mode, memory fill response may issue a new memory request to handle evicted blocks.
|
||||
// We need to ensure that the memory request queue never fills up to avoid deadlock.
|
||||
`STATIC_ASSERT(!WRITEBACK || (MREQ_SIZE >= MSHR_SIZE), ("invalid parameter: writeback requires MREQ_SIZE >= MSHR_SIZE"))
|
||||
|
||||
localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS);
|
||||
localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS);
|
||||
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
|
||||
localparam MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
|
||||
localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE;
|
||||
localparam WORD_WIDTH = WORD_SIZE * 8;
|
||||
localparam WORD_SEL_BITS = `CLOG2(WORDS_PER_LINE);
|
||||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS);
|
||||
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1;
|
||||
localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
|
||||
|
||||
localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1);
|
||||
localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1);
|
||||
|
||||
localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_write_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank;
|
||||
`endif
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) core_bus2_if[NUM_REQS]();
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_begin;
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_end;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_fire;
|
||||
|
||||
VX_cache_flush #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency
|
||||
) flush_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.core_bus_in_if (core_bus_if),
|
||||
.core_bus_out_if (core_bus2_if),
|
||||
.bank_req_fire (per_bank_core_req_fire),
|
||||
.flush_begin (per_bank_flush_begin),
|
||||
.flush_end (per_bank_flush_end)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Core response buffering
|
||||
wire [NUM_REQS-1:0] core_rsp_valid_s;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_rsp_data_s;
|
||||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s;
|
||||
wire [NUM_REQS-1:0] core_rsp_ready_s;
|
||||
|
||||
`RESET_RELAY_EX (core_rsp_reset, reset, NUM_REQS, `MAX_FANOUT);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
|
||||
.SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
|
||||
) core_rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (core_rsp_reset[i]),
|
||||
.valid_in (core_rsp_valid_s[i]),
|
||||
.ready_in (core_rsp_ready_s[i]),
|
||||
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
|
||||
.data_out ({core_bus2_if[i].rsp_data.data, core_bus2_if[i].rsp_data.tag}),
|
||||
.valid_out (core_bus2_if[i].rsp_valid),
|
||||
.ready_out (core_bus2_if[i].rsp_ready)
|
||||
);
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Memory request buffering
|
||||
wire [NUM_MEM_PORTS-1:0] mem_req_valid_s;
|
||||
wire [NUM_MEM_PORTS-1:0][`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_s;
|
||||
wire [NUM_MEM_PORTS-1:0] mem_req_rw_s;
|
||||
wire [NUM_MEM_PORTS-1:0][LINE_SIZE-1:0] mem_req_byteen_s;
|
||||
wire [NUM_MEM_PORTS-1:0][`CS_LINE_WIDTH-1:0] mem_req_data_s;
|
||||
wire [NUM_MEM_PORTS-1:0][MEM_TAG_WIDTH-1:0] mem_req_tag_s;
|
||||
wire [NUM_MEM_PORTS-1:0] mem_req_flush_s;
|
||||
wire [NUM_MEM_PORTS-1:0] mem_req_ready_s;
|
||||
|
||||
wire [NUM_MEM_PORTS-1:0] mem_bus_if_flush;
|
||||
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
|
||||
.SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_req_valid_s[i]),
|
||||
.ready_in (mem_req_ready_s[i]),
|
||||
.data_in ({mem_req_rw_s[i], mem_req_byteen_s[i], mem_req_addr_s[i], mem_req_data_s[i], mem_req_tag_s[i], mem_req_flush_s[i]}),
|
||||
.data_out ({mem_bus_if[i].req_data.rw, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.data, mem_bus_if[i].req_data.tag, mem_bus_if_flush[i]}),
|
||||
.valid_out (mem_bus_if[i].req_valid),
|
||||
.ready_out (mem_bus_if[i].req_ready)
|
||||
);
|
||||
|
||||
assign mem_bus_if[i].req_data.atype = mem_bus_if_flush[i] ? `ADDR_TYPE_WIDTH'(1 << `ADDR_TYPE_FLUSH) : '0;
|
||||
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Memory response buffering
|
||||
wire [NUM_MEM_PORTS-1:0] mem_rsp_valid_s;
|
||||
wire [NUM_MEM_PORTS-1:0][`CS_LINE_WIDTH-1:0] mem_rsp_data_s;
|
||||
wire [NUM_MEM_PORTS-1:0][MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
|
||||
wire [NUM_MEM_PORTS-1:0] mem_rsp_ready_s;
|
||||
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH),
|
||||
.SIZE (MRSQ_SIZE),
|
||||
.OUT_REG (MRSQ_SIZE > 2)
|
||||
) mem_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mem_bus_if[i].rsp_valid),
|
||||
.ready_in (mem_bus_if[i].rsp_ready),
|
||||
.data_in ({mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data}),
|
||||
.data_out ({mem_rsp_tag_s[i], mem_rsp_data_s[i]}),
|
||||
.valid_out (mem_rsp_valid_s[i]),
|
||||
.ready_out (mem_rsp_ready_s[i])
|
||||
);
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
|
||||
wire [NUM_BANKS-1:0][`CS_LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_rw;
|
||||
wire [NUM_BANKS-1:0][WORD_SEL_WIDTH-1:0] per_bank_core_req_wsel;
|
||||
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data;
|
||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_flush;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
|
||||
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_rsp_data;
|
||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_rsp_tag;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_rsp_idx;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_rsp_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
|
||||
wire [NUM_BANKS-1:0][`CS_MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
|
||||
wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data;
|
||||
wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_flush;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
|
||||
|
||||
assign per_bank_core_req_fire = per_bank_core_req_valid & per_bank_mem_req_ready;
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign mem_rsp_ready_s = per_bank_mem_rsp_ready;
|
||||
end else begin
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
assign mem_rsp_ready_s[i] = per_bank_mem_rsp_ready[`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s[i])];
|
||||
end
|
||||
end
|
||||
|
||||
// Bank requests dispatch
|
||||
|
||||
wire [NUM_REQS-1:0] core_req_valid;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
|
||||
wire [NUM_REQS-1:0] core_req_rw;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
|
||||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
|
||||
wire [NUM_REQS-1:0] core_req_flush;
|
||||
wire [NUM_REQS-1:0] core_req_ready;
|
||||
|
||||
wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr;
|
||||
wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] core_req_bid;
|
||||
wire [NUM_REQS-1:0][WORD_SEL_WIDTH-1:0] core_req_wsel;
|
||||
|
||||
wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in;
|
||||
wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_valid[i] = core_bus2_if[i].req_valid;
|
||||
assign core_req_rw[i] = core_bus2_if[i].req_data.rw;
|
||||
assign core_req_byteen[i] = core_bus2_if[i].req_data.byteen;
|
||||
assign core_req_addr[i] = core_bus2_if[i].req_data.addr;
|
||||
assign core_req_data[i] = core_bus2_if[i].req_data.data;
|
||||
assign core_req_tag[i] = core_bus2_if[i].req_data.tag;
|
||||
assign core_req_flush[i] = core_bus2_if[i].req_data.atype[`ADDR_TYPE_FLUSH];
|
||||
assign core_bus2_if[i].req_ready = core_req_ready[i];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS];
|
||||
end else begin
|
||||
assign core_req_wsel[i] = '0;
|
||||
end
|
||||
assign core_req_line_addr[i] = core_req_addr[i][(BANK_SEL_BITS + WORD_SEL_BITS) +: LINE_ADDR_WIDTH];
|
||||
end
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_bid[i] = core_req_addr[i][WORD_SEL_BITS +: BANK_SEL_BITS];
|
||||
end
|
||||
end else begin
|
||||
assign core_req_bid = '0;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_data_in[i] = {
|
||||
core_req_line_addr[i],
|
||||
core_req_rw[i],
|
||||
core_req_wsel[i],
|
||||
core_req_byteen[i],
|
||||
core_req_data[i],
|
||||
core_req_tag[i],
|
||||
core_req_flush[i]
|
||||
};
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [`PERF_CTR_BITS-1:0] perf_collisions;
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (req_xbar_reset, reset);
|
||||
|
||||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_REQS),
|
||||
.NUM_OUTPUTS (NUM_BANKS),
|
||||
.DATAW (CORE_REQ_DATAW),
|
||||
.PERF_CTR_BITS (`PERF_CTR_BITS),
|
||||
.ARBITER ("F"),
|
||||
.OUT_BUF (REQ_XBAR_BUF)
|
||||
) req_xbar (
|
||||
.clk (clk),
|
||||
.reset (req_xbar_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.collisions(perf_collisions),
|
||||
`else
|
||||
`UNUSED_PIN(collisions),
|
||||
`endif
|
||||
.valid_in (core_req_valid),
|
||||
.data_in (core_req_data_in),
|
||||
.sel_in (core_req_bid),
|
||||
.ready_in (core_req_ready),
|
||||
.valid_out (per_bank_core_req_valid),
|
||||
.data_out (core_req_data_out),
|
||||
.sel_out (per_bank_core_req_idx),
|
||||
.ready_out (per_bank_core_req_ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign {
|
||||
per_bank_core_req_addr[i],
|
||||
per_bank_core_req_rw[i],
|
||||
per_bank_core_req_wsel[i],
|
||||
per_bank_core_req_byteen[i],
|
||||
per_bank_core_req_data[i],
|
||||
per_bank_core_req_tag[i],
|
||||
per_bank_core_req_flush[i]
|
||||
} = core_req_data_out[i];
|
||||
end
|
||||
|
||||
// Banks access
|
||||
for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
|
||||
wire curr_bank_mem_rsp_valid;
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s;
|
||||
end else begin
|
||||
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s[bank_id] && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s[bank_id]) == bank_id);
|
||||
end
|
||||
|
||||
`RESET_RELAY (bank_reset, reset);
|
||||
|
||||
VX_cache_bank #(
|
||||
.BANK_ID (bank_id),
|
||||
.INSTANCE_ID ($sformatf("%s-bank%0d", INSTANCE_ID, bank_id)),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.CRSQ_SIZE (CRSQ_SIZE),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_BUF)
|
||||
) bank (
|
||||
.clk (clk),
|
||||
.reset (bank_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_read_misses (perf_read_miss_per_bank[bank_id]),
|
||||
.perf_write_misses (perf_write_miss_per_bank[bank_id]),
|
||||
.perf_mshr_stalls (perf_mshr_stall_per_bank[bank_id]),
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
.core_req_valid (per_bank_core_req_valid[bank_id]),
|
||||
.core_req_addr (per_bank_core_req_addr[bank_id]),
|
||||
.core_req_rw (per_bank_core_req_rw[bank_id]),
|
||||
.core_req_wsel (per_bank_core_req_wsel[bank_id]),
|
||||
.core_req_byteen (per_bank_core_req_byteen[bank_id]),
|
||||
.core_req_data (per_bank_core_req_data[bank_id]),
|
||||
.core_req_tag (per_bank_core_req_tag[bank_id]),
|
||||
.core_req_idx (per_bank_core_req_idx[bank_id]),
|
||||
.core_req_flush (per_bank_core_req_flush[bank_id]),
|
||||
.core_req_ready (per_bank_core_req_ready[bank_id]),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (per_bank_core_rsp_valid[bank_id]),
|
||||
.core_rsp_data (per_bank_core_rsp_data[bank_id]),
|
||||
.core_rsp_tag (per_bank_core_rsp_tag[bank_id]),
|
||||
.core_rsp_idx (per_bank_core_rsp_idx[bank_id]),
|
||||
.core_rsp_ready (per_bank_core_rsp_ready[bank_id]),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (per_bank_mem_req_valid[bank_id]),
|
||||
.mem_req_addr (curr_bank_mem_req_addr),
|
||||
.mem_req_rw (per_bank_mem_req_rw[bank_id]),
|
||||
.mem_req_byteen (per_bank_mem_req_byteen[bank_id]),
|
||||
.mem_req_data (per_bank_mem_req_data[bank_id]),
|
||||
.mem_req_id (per_bank_mem_req_id[bank_id]),
|
||||
.mem_req_flush (per_bank_mem_req_flush[bank_id]),
|
||||
.mem_req_ready (per_bank_mem_req_ready[bank_id]),
|
||||
|
||||
// Memory response
|
||||
.mem_rsp_valid (curr_bank_mem_rsp_valid),
|
||||
.mem_rsp_data (mem_rsp_data_s[bank_id]),
|
||||
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s[bank_id])),
|
||||
.mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]),
|
||||
|
||||
.flush_begin (per_bank_flush_begin[bank_id]),
|
||||
.flush_end (per_bank_flush_end[bank_id])
|
||||
);
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign per_bank_mem_req_addr[bank_id] = curr_bank_mem_req_addr;
|
||||
end else begin
|
||||
assign per_bank_mem_req_addr[bank_id] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, bank_id);
|
||||
end
|
||||
end
|
||||
|
||||
// Bank responses gather
|
||||
|
||||
wire [NUM_BANKS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_in;
|
||||
wire [NUM_REQS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign core_rsp_data_in[i] = {per_bank_core_rsp_data[i], per_bank_core_rsp_tag[i]};
|
||||
end
|
||||
|
||||
`RESET_RELAY (rsp_xbar_reset, reset);
|
||||
|
||||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.NUM_OUTPUTS (NUM_REQS),
|
||||
.DATAW (CORE_RSP_DATAW),
|
||||
.ARBITER ("F")
|
||||
) rsp_xbar (
|
||||
.clk (clk),
|
||||
.reset (rsp_xbar_reset),
|
||||
`UNUSED_PIN (collisions),
|
||||
.valid_in (per_bank_core_rsp_valid),
|
||||
.data_in (core_rsp_data_in),
|
||||
.sel_in (per_bank_core_rsp_idx),
|
||||
.ready_in (per_bank_core_rsp_ready),
|
||||
.valid_out (core_rsp_valid_s),
|
||||
.data_out (core_rsp_data_out),
|
||||
.ready_out (core_rsp_ready_s),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign {core_rsp_data_s[i], core_rsp_tag_s[i]} = core_rsp_data_out[i];
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_MEM_PORTS-1:0] mem_req_valid_p;
|
||||
wire [NUM_MEM_PORTS-1:0][`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p;
|
||||
wire [NUM_MEM_PORTS-1:0] mem_req_rw_p;
|
||||
wire [NUM_MEM_PORTS-1:0][LINE_SIZE-1:0] mem_req_byteen_p;
|
||||
wire [NUM_MEM_PORTS-1:0][`CS_LINE_WIDTH-1:0] mem_req_data_p;
|
||||
wire [NUM_MEM_PORTS-1:0][MEM_TAG_WIDTH-1:0] mem_req_tag_p;
|
||||
wire [NUM_MEM_PORTS-1:0][MSHR_ADDR_WIDTH-1:0] mem_req_id_p;
|
||||
wire [NUM_MEM_PORTS-1:0] mem_req_flush_p;
|
||||
wire [NUM_MEM_PORTS-1:0] mem_req_ready_p;
|
||||
|
||||
// Memory request arbitration
|
||||
|
||||
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in;
|
||||
wire [NUM_MEM_PORTS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign data_in[i] = {
|
||||
per_bank_mem_req_addr[i],
|
||||
per_bank_mem_req_rw[i],
|
||||
per_bank_mem_req_byteen[i],
|
||||
per_bank_mem_req_data[i],
|
||||
per_bank_mem_req_id[i],
|
||||
per_bank_mem_req_flush[i]
|
||||
};
|
||||
end
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.NUM_OUTPUTS (NUM_MEM_PORTS),
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1),
|
||||
.ARBITER ("F")
|
||||
) mem_req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (per_bank_mem_req_valid),
|
||||
.ready_in (per_bank_mem_req_ready),
|
||||
.data_in (data_in),
|
||||
.data_out (data_out),
|
||||
.valid_out (mem_req_valid_p),
|
||||
.ready_out (mem_req_ready_p),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
assign {
|
||||
mem_req_addr_p[i],
|
||||
mem_req_rw_p[i],
|
||||
mem_req_byteen_p[i],
|
||||
mem_req_data_p[i],
|
||||
mem_req_id_p[i],
|
||||
mem_req_flush_p[i]
|
||||
} = data_out[i];
|
||||
end
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p[i]);
|
||||
assign mem_req_tag_p[i] = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p[i]});
|
||||
end
|
||||
end else begin
|
||||
assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p);
|
||||
end
|
||||
|
||||
// Memory request multi-port handling
|
||||
|
||||
assign mem_req_valid_s = mem_req_valid_p;
|
||||
assign mem_req_addr_s = mem_req_addr_p;
|
||||
assign mem_req_tag_s = mem_req_tag_p;
|
||||
assign mem_req_flush_s = mem_req_flush_p;
|
||||
assign mem_req_ready_p = mem_req_ready_s;
|
||||
|
||||
if (WRITE_ENABLE != 0) begin
|
||||
assign mem_req_rw_s = mem_req_rw_p;
|
||||
assign mem_req_byteen_s = mem_req_byteen_p;
|
||||
assign mem_req_data_s = mem_req_data_p;
|
||||
end else begin
|
||||
`UNUSED_VAR (mem_req_byteen_p)
|
||||
`UNUSED_VAR (mem_req_data_p)
|
||||
`UNUSED_VAR (mem_req_rw_p)
|
||||
|
||||
assign mem_req_rw_s = 0;
|
||||
assign mem_req_byteen_s = {LINE_SIZE{1'b1}};
|
||||
assign mem_req_data_s = '0;
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
// per cycle: core_reads, core_writes
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
|
||||
|
||||
wire [NUM_REQS-1:0] perf_core_reads_per_req;
|
||||
wire [NUM_REQS-1:0] perf_core_writes_per_req;
|
||||
|
||||
// per cycle: read misses, write misses, msrq stalls, pipeline stalls
|
||||
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle;
|
||||
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle;
|
||||
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_mshr_stall_per_cycle;
|
||||
wire [`CLOG2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
|
||||
|
||||
`BUFFER(perf_core_reads_per_req, core_req_valid & core_req_ready & ~core_req_rw);
|
||||
`BUFFER(perf_core_writes_per_req, core_req_valid & core_req_ready & core_req_rw);
|
||||
|
||||
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_req);
|
||||
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_req);
|
||||
`POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank);
|
||||
`POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank);
|
||||
`POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank);
|
||||
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_req;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign perf_crsp_stall_per_req[i] = core_bus2_if[i].rsp_valid && ~core_bus2_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req);
|
||||
|
||||
wire perf_mem_stall_per_cycle = mem_bus_if[0].req_valid && ~mem_bus_if[0].req_ready;
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_writes;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_read_misses;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_write_misses;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_core_reads <= '0;
|
||||
perf_core_writes <= '0;
|
||||
perf_read_misses <= '0;
|
||||
perf_write_misses <= '0;
|
||||
perf_mshr_stalls <= '0;
|
||||
perf_mem_stalls <= '0;
|
||||
perf_crsp_stalls <= '0;
|
||||
end else begin
|
||||
perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle);
|
||||
perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle);
|
||||
perf_read_misses <= perf_read_misses + `PERF_CTR_BITS'(perf_read_miss_per_cycle);
|
||||
perf_write_misses <= perf_write_misses + `PERF_CTR_BITS'(perf_write_miss_per_cycle);
|
||||
perf_mshr_stalls <= perf_mshr_stalls + `PERF_CTR_BITS'(perf_mshr_stall_per_cycle);
|
||||
perf_mem_stalls <= perf_mem_stalls + `PERF_CTR_BITS'(perf_mem_stall_per_cycle);
|
||||
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
|
||||
end
|
||||
end
|
||||
|
||||
assign cache_perf.reads = perf_core_reads;
|
||||
assign cache_perf.writes = perf_core_writes;
|
||||
assign cache_perf.read_misses = perf_read_misses;
|
||||
assign cache_perf.write_misses = perf_write_misses;
|
||||
assign cache_perf.bank_stalls = perf_collisions;
|
||||
assign cache_perf.mshr_stalls = perf_mshr_stalls;
|
||||
assign cache_perf.mem_stalls = perf_mem_stalls;
|
||||
assign cache_perf.crsp_stalls = perf_crsp_stalls;
|
||||
`endif
|
||||
|
||||
endmodule
|
331
hw/rtl/cache/VX_cache_wrap_l3.sv
vendored
Normal file
331
hw/rtl/cache/VX_cache_wrap_l3.sv
vendored
Normal file
|
@ -0,0 +1,331 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_wrap_l3 import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
|
||||
parameter TAG_SEL_IDX = 0,
|
||||
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 4,
|
||||
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 4096,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
// Number of memory ports
|
||||
parameter NUM_MEM_PORTS = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
parameter MREQ_SIZE = 4,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Enable dirty bytes on writeback
|
||||
parameter DIRTY_BYTES = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// enable bypass for non-cacheable addresses
|
||||
parameter NC_ENABLE = 0,
|
||||
|
||||
// Force bypass for all requests
|
||||
parameter PASSTHRU = 0,
|
||||
|
||||
// Core response output buffer
|
||||
parameter CORE_OUT_BUF = 0,
|
||||
|
||||
// Memory request output buffer
|
||||
parameter MEM_OUT_BUF = 0
|
||||
) (
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
output cache_perf_t cache_perf,
|
||||
`endif
|
||||
|
||||
VX_mem_bus_if.slave core_bus_if [NUM_REQS],
|
||||
VX_mem_bus_if.master mem_bus_if [NUM_MEM_PORTS]
|
||||
);
|
||||
|
||||
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
|
||||
|
||||
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
|
||||
localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
|
||||
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
|
||||
|
||||
localparam NC_OR_BYPASS = (NC_ENABLE || PASSTHRU);
|
||||
|
||||
localparam NUM_REQS_P = NUM_REQS / NUM_MEM_PORTS;
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) core_bus_cache_if[NUM_REQS]();
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LINE_SIZE),
|
||||
.TAG_WIDTH (CACHE_MEM_TAG_WIDTH)
|
||||
) mem_bus_cache_if[NUM_MEM_PORTS]();
|
||||
|
||||
if (NC_OR_BYPASS) begin
|
||||
`RESET_RELAY (nc_bypass_reset, reset);
|
||||
|
||||
// Slicing version
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
|
||||
localparam SLICE_BEGIN = i * NUM_REQS_P;
|
||||
localparam SLICE_END = SLICE_BEGIN + NUM_REQS_P;
|
||||
|
||||
VX_cache_bypass #(
|
||||
.NUM_REQS (NUM_REQS_P),
|
||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
|
||||
.PASSTHRU (PASSTHRU),
|
||||
.NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE),
|
||||
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
|
||||
.CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH),
|
||||
.CORE_TAG_WIDTH (TAG_WIDTH),
|
||||
|
||||
.MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH),
|
||||
.MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH),
|
||||
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH),
|
||||
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
|
||||
.CORE_OUT_BUF (CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (MEM_OUT_BUF)
|
||||
) cache_bypass (
|
||||
.clk (clk),
|
||||
.reset (nc_bypass_reset),
|
||||
|
||||
.core_bus_in_if (core_bus_if[SLICE_END-1:SLICE_BEGIN]),
|
||||
.core_bus_out_if(core_bus_cache_if[SLICE_END-1:SLICE_BEGIN]),
|
||||
|
||||
.mem_bus_in_if (mem_bus_cache_if[i]),
|
||||
.mem_bus_out_if (mem_bus_if[i])
|
||||
);
|
||||
end
|
||||
|
||||
// Connect everything
|
||||
/*
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
VX_cache_bypass #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
|
||||
.PASSTHRU (PASSTHRU),
|
||||
.NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE),
|
||||
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
|
||||
.CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH),
|
||||
.CORE_TAG_WIDTH (TAG_WIDTH),
|
||||
|
||||
.MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH),
|
||||
.MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH),
|
||||
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH),
|
||||
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
|
||||
.CORE_OUT_BUF (CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (MEM_OUT_BUF)
|
||||
) cache_bypass (
|
||||
.clk (clk),
|
||||
.reset (nc_bypass_reset),
|
||||
|
||||
.core_bus_in_if (core_bus_if),
|
||||
.core_bus_out_if(core_bus_cache_if),
|
||||
|
||||
.mem_bus_in_if (mem_bus_cache_if[i]),
|
||||
.mem_bus_out_if (mem_bus_if[i])
|
||||
);
|
||||
end
|
||||
*/
|
||||
|
||||
end else begin
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if[i], mem_bus_cache_if[i]);
|
||||
end
|
||||
end
|
||||
|
||||
if (PASSTHRU != 0) begin
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
`UNUSED_VAR (core_bus_cache_if[i].req_valid)
|
||||
`UNUSED_VAR (core_bus_cache_if[i].req_data)
|
||||
assign core_bus_cache_if[i].req_ready = 0;
|
||||
|
||||
assign core_bus_cache_if[i].rsp_valid = 0;
|
||||
assign core_bus_cache_if[i].rsp_data = '0;
|
||||
`UNUSED_VAR (core_bus_cache_if[i].rsp_ready)
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
assign mem_bus_cache_if[i].req_valid = 0;
|
||||
assign mem_bus_cache_if[i].req_data = '0;
|
||||
`UNUSED_VAR (mem_bus_cache_if[i].req_ready)
|
||||
|
||||
`UNUSED_VAR (mem_bus_cache_if[i].rsp_valid)
|
||||
`UNUSED_VAR (mem_bus_cache_if[i].rsp_data)
|
||||
assign mem_bus_cache_if[i].rsp_ready = 0;
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
assign cache_perf = '0;
|
||||
`endif
|
||||
|
||||
end else begin
|
||||
|
||||
`RESET_RELAY (cache_reset, reset);
|
||||
|
||||
VX_cache_l3 #(
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_MEM_PORTS (NUM_MEM_PORTS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.CRSQ_SIZE (CRSQ_SIZE),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.MRSQ_SIZE (MRSQ_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF)
|
||||
) cache (
|
||||
.clk (clk),
|
||||
.reset (cache_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (cache_perf),
|
||||
`endif
|
||||
.core_bus_if (core_bus_cache_if),
|
||||
.mem_bus_if (mem_bus_cache_if)
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
wire [`UP(UUID_WIDTH)-1:0] core_req_uuid;
|
||||
wire [`UP(UUID_WIDTH)-1:0] core_rsp_uuid;
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign core_req_uuid = core_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign core_rsp_uuid = core_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign core_req_uuid = 0;
|
||||
assign core_rsp_uuid = 0;
|
||||
end
|
||||
|
||||
wire core_req_fire = core_bus_if[i].req_valid && core_bus_if[i].req_ready;
|
||||
wire core_rsp_fire = core_bus_if[i].rsp_valid && core_bus_if[i].rsp_ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (core_req_fire) begin
|
||||
if (core_bus_if[i].req_data.rw)
|
||||
`TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid));
|
||||
else
|
||||
`TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid));
|
||||
end
|
||||
if (core_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [NUM_MEM_PORTS-1:0][`UP(UUID_WIDTH)-1:0] mem_req_uuid;
|
||||
wire [NUM_MEM_PORTS-1:0][`UP(UUID_WIDTH)-1:0] mem_rsp_uuid;
|
||||
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin
|
||||
assign mem_req_uuid[i] = mem_bus_if[i].req_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign mem_rsp_uuid[i] = mem_bus_if[i].rsp_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign mem_req_uuid[i] = 0;
|
||||
assign mem_rsp_uuid[i] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
wire mem_req_fire [NUM_MEM_PORTS-1:0];
|
||||
wire mem_rsp_fire [NUM_MEM_PORTS-1:0];
|
||||
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
assign mem_req_fire[i] = mem_bus_if[i].req_valid && mem_bus_if[i].req_ready;
|
||||
assign mem_rsp_fire[i] = mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_MEM_PORTS; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (mem_req_fire[i]) begin
|
||||
if (mem_bus_if[i].req_data.rw)
|
||||
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d) bank=%d\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, mem_req_uuid[i], i));
|
||||
else
|
||||
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d) bank=%d\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if[i].req_data.addr), mem_bus_if[i].req_data.tag, mem_req_uuid[i], i));
|
||||
end
|
||||
if (mem_rsp_fire[i]) begin
|
||||
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data, mem_rsp_uuid[i]));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
|
@ -37,7 +37,14 @@ RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interface
|
|||
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
SRCS += $(SRC_DIR)/processor.cpp
|
||||
SRCS += $(SRC_DIR)/processor_hbm.cpp
|
||||
|
||||
ifdef AXI_BUS
|
||||
TOP = Vortex_axi
|
||||
CXXFLAGS += -DAXI_BUS
|
||||
else
|
||||
TOP = Vortex_hbm
|
||||
endif
|
||||
|
||||
VL_FLAGS = --exe
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
|
||||
|
|
656
sim/rtlsim/processor_hbm.cpp
Normal file
656
sim/rtlsim/processor_hbm.cpp
Normal file
|
@ -0,0 +1,656 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "processor.h"
|
||||
|
||||
#ifdef AXI_BUS
|
||||
#include "VVortex_axi.h"
|
||||
typedef VVortex_axi Device;
|
||||
#else
|
||||
#include "VVortex_hbm.h"
|
||||
typedef VVortex_hbm Device;
|
||||
#endif
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
#include <verilated_vcd_c.h>
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <mem.h>
|
||||
|
||||
#include <VX_config.h>
|
||||
#include <ostream>
|
||||
#include <list>
|
||||
#include <queue>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <dram_sim.h>
|
||||
#include <util.h>
|
||||
|
||||
#ifndef MEMORY_BANKS
|
||||
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
#else
|
||||
#define MEMORY_BANKS 2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef MEM_CLOCK_RATIO
|
||||
#define MEM_CLOCK_RATIO 1
|
||||
#endif
|
||||
|
||||
#ifndef TRACE_START_TIME
|
||||
#define TRACE_START_TIME 0ull
|
||||
#endif
|
||||
|
||||
#ifndef TRACE_STOP_TIME
|
||||
#define TRACE_STOP_TIME -1ull
|
||||
#endif
|
||||
|
||||
#ifndef VERILATOR_RESET_VALUE
|
||||
#define VERILATOR_RESET_VALUE 2
|
||||
#endif
|
||||
|
||||
#if (XLEN == 32)
|
||||
typedef uint32_t Word;
|
||||
#elif (XLEN == 64)
|
||||
typedef uint64_t Word;
|
||||
#else
|
||||
#error unsupported XLEN
|
||||
#endif
|
||||
|
||||
#define VL_WDATA_GETW(lwp, i, n, w) \
|
||||
VL_SEL_IWII(0, n * w, 0, 0, lwp, i * w, w)
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
static uint64_t timestamp = 0;
|
||||
|
||||
double sc_time_stamp() {
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static bool trace_enabled = false;
|
||||
static uint64_t trace_start_time = TRACE_START_TIME;
|
||||
static uint64_t trace_stop_time = TRACE_STOP_TIME;
|
||||
|
||||
bool sim_trace_enabled() {
|
||||
if (timestamp >= trace_start_time
|
||||
&& timestamp < trace_stop_time)
|
||||
return true;
|
||||
return trace_enabled;
|
||||
}
|
||||
|
||||
void sim_trace_enable(bool enable) {
|
||||
trace_enabled = enable;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class Processor::Impl {
|
||||
public:
|
||||
Impl() : dram_sim_(MEM_CLOCK_RATIO) {
|
||||
// force random values for unitialized signals
|
||||
Verilated::randReset(VERILATOR_RESET_VALUE);
|
||||
Verilated::randSeed(50);
|
||||
|
||||
// turn off assertion before reset
|
||||
Verilated::assertOn(false);
|
||||
|
||||
// create RTL module instance
|
||||
device_ = new Device();
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
Verilated::traceEverOn(true);
|
||||
tfp_ = new VerilatedVcdC();
|
||||
device_->trace(tfp_, 99);
|
||||
tfp_->open("trace.vcd");
|
||||
#endif
|
||||
|
||||
pending_mem_reqs_.resize(NUM_MEM_PORTS);
|
||||
dram_queue_.resize(NUM_MEM_PORTS);
|
||||
|
||||
mem_rd_rsp_active_.resize(NUM_MEM_PORTS);
|
||||
mem_rd_rsp_ready_.resize(NUM_MEM_PORTS);
|
||||
|
||||
mem_wr_rsp_active_.resize(NUM_MEM_PORTS);
|
||||
mem_wr_rsp_ready_.resize(NUM_MEM_PORTS);
|
||||
|
||||
ram_ = nullptr;
|
||||
|
||||
#ifndef NDEBUG
|
||||
// dump device configuration
|
||||
std::cout << "CONFIGS:"
|
||||
<< " num_threads=" << NUM_THREADS
|
||||
<< ", num_warps=" << NUM_WARPS
|
||||
<< ", num_cores=" << NUM_CORES
|
||||
<< ", num_clusters=" << NUM_CLUSTERS
|
||||
<< ", socket_size=" << SOCKET_SIZE
|
||||
<< ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec
|
||||
<< ", num_barriers=" << NUM_BARRIERS
|
||||
<< std::endl;
|
||||
#endif
|
||||
// reset the device
|
||||
this->reset();
|
||||
|
||||
// Turn on assertion after reset
|
||||
Verilated::assertOn(true);
|
||||
}
|
||||
|
||||
~Impl() {
|
||||
this->cout_flush();
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
tfp_->close();
|
||||
delete tfp_;
|
||||
#endif
|
||||
|
||||
delete device_;
|
||||
}
|
||||
|
||||
void cout_flush() {
|
||||
for (auto& buf : print_bufs_) {
|
||||
auto str = buf.second.str();
|
||||
if (!str.empty()) {
|
||||
std::cout << "#" << buf.first << ": " << str << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void attach_ram(RAM* ram) {
|
||||
ram_ = ram;
|
||||
}
|
||||
|
||||
void run() {
|
||||
|
||||
#ifndef NDEBUG
|
||||
std::cout << std::dec << timestamp << ": [sim] run()" << std::endl;
|
||||
#endif
|
||||
|
||||
// start execution
|
||||
running_ = true;
|
||||
device_->reset = 0;
|
||||
|
||||
/*
|
||||
device_->mem_req_valid[1] = 0;
|
||||
device_->mem_req_ready[1] = 0;
|
||||
device_->mem_rsp_valid[1] = 0;
|
||||
device_->mem_rsp_ready[1] = 0;
|
||||
*/
|
||||
|
||||
// wait on device to go busy
|
||||
while (!device_->busy) {
|
||||
this->tick();
|
||||
}
|
||||
|
||||
// wait on device to go idle
|
||||
while (device_->busy) {
|
||||
this->tick();
|
||||
}
|
||||
|
||||
// reset device
|
||||
this->reset();
|
||||
|
||||
this->cout_flush();
|
||||
}
|
||||
|
||||
void dcr_write(uint32_t addr, uint32_t value) {
|
||||
device_->dcr_wr_valid = 1;
|
||||
device_->dcr_wr_addr = addr;
|
||||
device_->dcr_wr_data = value;
|
||||
while (device_->dcr_wr_valid) {
|
||||
this->tick();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void reset() {
|
||||
running_ = false;
|
||||
|
||||
print_bufs_.clear();
|
||||
|
||||
for (int i = 0; i < NUM_MEM_PORTS; ++i) {
|
||||
|
||||
pending_mem_reqs_.at(i).clear();
|
||||
|
||||
{
|
||||
std::queue<mem_req_t*> empty;
|
||||
std::swap(dram_queue_.at(i), empty);
|
||||
}
|
||||
|
||||
mem_rd_rsp_active_.at(i) = false;
|
||||
mem_wr_rsp_active_.at(i) = false;
|
||||
}
|
||||
|
||||
this->mem_bus_reset();
|
||||
|
||||
this->dcr_bus_reset();
|
||||
|
||||
device_->reset = 1;
|
||||
|
||||
for (int i = 0; i < RESET_DELAY; ++i) {
|
||||
device_->clk = 0;
|
||||
this->eval();
|
||||
device_->clk = 1;
|
||||
this->eval();
|
||||
}
|
||||
}
|
||||
|
||||
void tick() {
|
||||
|
||||
device_->clk = 0;
|
||||
this->eval();
|
||||
|
||||
for (int i = 0; i < NUM_MEM_PORTS; ++i) {
|
||||
this->mem_bus_eval(0, i);
|
||||
}
|
||||
this->dcr_bus_eval(0);
|
||||
|
||||
device_->clk = 1;
|
||||
this->eval();
|
||||
|
||||
for (int i = 0; i < NUM_MEM_PORTS; ++i) {
|
||||
this->mem_bus_eval(1, i);
|
||||
}
|
||||
this->dcr_bus_eval(1);
|
||||
|
||||
dram_sim_.tick();
|
||||
|
||||
for (int i = 0; i < NUM_MEM_PORTS; ++i) {
|
||||
if (!dram_queue_.at(i).empty()) {
|
||||
auto mem_req = dram_queue_.at(i).front();
|
||||
if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) {
|
||||
auto orig_req = reinterpret_cast<mem_req_t*>(arg);
|
||||
if (orig_req->ready) {
|
||||
delete orig_req;
|
||||
} else {
|
||||
orig_req->ready = true;
|
||||
}
|
||||
}, mem_req)) {
|
||||
dram_queue_.at(i).pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
|
||||
void eval() {
|
||||
device_->eval();
|
||||
#ifdef VCD_OUTPUT
|
||||
if (sim_trace_enabled()) {
|
||||
tfp_->dump(timestamp);
|
||||
} else {
|
||||
exit(-1);
|
||||
}
|
||||
#endif
|
||||
++timestamp;
|
||||
}
|
||||
|
||||
#ifdef AXI_BUS
|
||||
|
||||
void mem_bus_reset() {
|
||||
device_->m_axi_wready[0] = 0;
|
||||
device_->m_axi_awready[0] = 0;
|
||||
device_->m_axi_arready[0] = 0;
|
||||
device_->m_axi_rvalid[0] = 0;
|
||||
device_->m_axi_bvalid[0] = 0;
|
||||
}
|
||||
|
||||
void mem_bus_eval(bool clk) {
|
||||
if (!clk) {
|
||||
mem_rd_rsp_ready_ = device_->m_axi_rready[0];
|
||||
mem_wr_rsp_ready_ = device_->m_axi_bready[0];
|
||||
return;
|
||||
}
|
||||
|
||||
if (ram_ == nullptr) {
|
||||
device_->m_axi_wready[0] = 0;
|
||||
device_->m_axi_awready[0] = 0;
|
||||
device_->m_axi_arready[0] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// process memory read responses
|
||||
if (mem_rd_rsp_active_
|
||||
&& device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) {
|
||||
mem_rd_rsp_active_ = false;
|
||||
}
|
||||
if (!mem_rd_rsp_active_) {
|
||||
if (!pending_mem_reqs_.empty()
|
||||
&& (*pending_mem_reqs_.begin())->ready
|
||||
&& !(*pending_mem_reqs_.begin())->write) {
|
||||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Rd Rsp: addr=0x%0lx, data=0x", timestamp, mem_rsp->addr);
|
||||
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||
printf("%02x", mem_rsp->block[i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
device_->m_axi_rvalid[0] = 1;
|
||||
device_->m_axi_rid[0] = mem_rsp->tag;
|
||||
device_->m_axi_rresp[0] = 0;
|
||||
device_->m_axi_rlast[0] = 1;
|
||||
memcpy(device_->m_axi_rdata[0].data(), mem_rsp->block.data(), MEM_BLOCK_SIZE);
|
||||
pending_mem_reqs_.erase(mem_rsp_it);
|
||||
mem_rd_rsp_active_ = true;
|
||||
delete mem_rsp;
|
||||
} else {
|
||||
device_->m_axi_rvalid[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// process memory write responses
|
||||
if (mem_wr_rsp_active_
|
||||
&& device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) {
|
||||
mem_wr_rsp_active_ = false;
|
||||
}
|
||||
if (!mem_wr_rsp_active_) {
|
||||
if (!pending_mem_reqs_.empty()
|
||||
&& (*pending_mem_reqs_.begin())->ready
|
||||
&& (*pending_mem_reqs_.begin())->write) {
|
||||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr Rsp: addr=0x%0lx\n", timestamp, mem_rsp->addr);
|
||||
*/
|
||||
device_->m_axi_bvalid[0] = 1;
|
||||
device_->m_axi_bid[0] = mem_rsp->tag;
|
||||
device_->m_axi_bresp[0] = 0;
|
||||
pending_mem_reqs_.erase(mem_rsp_it);
|
||||
mem_wr_rsp_active_ = true;
|
||||
delete mem_rsp;
|
||||
} else {
|
||||
device_->m_axi_bvalid[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// select the memory bank
|
||||
uint32_t req_addr = device_->m_axi_wvalid[0] ? device_->m_axi_awaddr[0] : device_->m_axi_araddr[0];
|
||||
|
||||
// process memory requests
|
||||
if ((device_->m_axi_wvalid[0] || device_->m_axi_arvalid[0]) && running_) {
|
||||
if (device_->m_axi_wvalid[0]) {
|
||||
auto byteen = device_->m_axi_wstrb[0];
|
||||
auto base_addr = device_->m_axi_awaddr[0];
|
||||
auto data = (uint8_t*)device_->m_axi_wdata[0].data();
|
||||
|
||||
if (base_addr >= uint64_t(IO_COUT_ADDR)
|
||||
&& base_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
|
||||
// process console output
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
auto& ss_buf = print_bufs_[i];
|
||||
char c = data[i];
|
||||
ss_buf << c;
|
||||
if (c == '\n') {
|
||||
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
|
||||
ss_buf.str("");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// process writes
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr: addr=0x%0lx, byteen=0x", timestamp, base_addr);
|
||||
for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
|
||||
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
|
||||
}
|
||||
printf(", data=0x");
|
||||
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||
printf("%02x", data[i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[base_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = device_->m_axi_awid[0];
|
||||
mem_req->addr = device_->m_axi_awaddr[0];
|
||||
mem_req->write = true;
|
||||
mem_req->ready = false;
|
||||
pending_mem_reqs_.emplace_back(mem_req);
|
||||
|
||||
// send dram request
|
||||
dram_queue_.push(mem_req);
|
||||
}
|
||||
} else {
|
||||
// process reads
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = device_->m_axi_arid[0];
|
||||
mem_req->addr = device_->m_axi_araddr[0];
|
||||
ram_->read(mem_req->block.data(), device_->m_axi_araddr[0], MEM_BLOCK_SIZE);
|
||||
mem_req->write = false;
|
||||
mem_req->ready = false;
|
||||
pending_mem_reqs_.emplace_back(mem_req);
|
||||
|
||||
// send dram request
|
||||
dram_queue_.push(mem_req);
|
||||
}
|
||||
}
|
||||
|
||||
device_->m_axi_wready[0] = running_;
|
||||
device_->m_axi_awready[0] = running_;
|
||||
device_->m_axi_arready[0] = running_;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void mem_bus_reset() {
|
||||
for (int i = 0; i < NUM_MEM_PORTS; ++i) {
|
||||
device_->mem_req_ready[i] = 0;
|
||||
device_->mem_rsp_valid[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void mem_bus_eval(bool clk, int n) {
|
||||
if (!clk) {
|
||||
mem_rd_rsp_ready_.at(n) = device_->mem_rsp_ready[n];
|
||||
return;
|
||||
}
|
||||
|
||||
if (ram_ == nullptr) {
|
||||
device_->mem_req_ready[n] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// process memory read responses
|
||||
if (mem_rd_rsp_active_.at(n)
|
||||
&& device_->mem_rsp_valid[n] && mem_rd_rsp_ready_.at(n)) {
|
||||
mem_rd_rsp_active_.at(n) = false;
|
||||
}
|
||||
if (!mem_rd_rsp_active_.at(n)) {
|
||||
if (!pending_mem_reqs_.at(n).empty()
|
||||
&& (*pending_mem_reqs_.at(n).begin())->ready) {
|
||||
device_->mem_rsp_valid[n] = 1;
|
||||
auto mem_rsp_it = pending_mem_reqs_.at(n).begin();
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr);
|
||||
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||
printf("%02x", mem_rsp->block[i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
memcpy(VDataCast<void*, MEM_BLOCK_SIZE>::get(device_->mem_rsp_data[n]), mem_rsp->block.data(), MEM_BLOCK_SIZE);
|
||||
device_->mem_rsp_tag[n] = mem_rsp->tag;
|
||||
pending_mem_reqs_.at(n).erase(mem_rsp_it);
|
||||
mem_rd_rsp_active_.at(n) = true;
|
||||
delete mem_rsp;
|
||||
} else {
|
||||
device_->mem_rsp_valid[n] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// process memory requests
|
||||
if (device_->mem_req_valid[n] && running_) {
|
||||
uint64_t byte_addr = (device_->mem_req_addr[n] * MEM_BLOCK_SIZE);
|
||||
if (device_->mem_req_rw[n]) {
|
||||
auto byteen = device_->mem_req_byteen[n];
|
||||
auto data = VDataCast<uint8_t*, MEM_BLOCK_SIZE>::get(device_->mem_req_data[n]);
|
||||
|
||||
if (byte_addr >= uint64_t(IO_COUT_ADDR)
|
||||
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
|
||||
// process console output
|
||||
for (int i = 0; i < IO_COUT_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
auto& ss_buf = print_bufs_[i];
|
||||
char c = data[i];
|
||||
ss_buf << c;
|
||||
if (c == '\n') {
|
||||
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
|
||||
ss_buf.str("");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// process writes
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr Req: tag=0x%0lx, addr=0x%0lx, byteen=0x", timestamp, device_->mem_req_tag, byte_addr);
|
||||
for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) {
|
||||
printf("%x", (int)((byteen >> (4 * i)) & 0xf));
|
||||
}
|
||||
printf(", data=0x");
|
||||
for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) {
|
||||
printf("%d=%02x,", i, data[i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[byte_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = device_->mem_req_tag[n];
|
||||
mem_req->addr = byte_addr;
|
||||
mem_req->write = true;
|
||||
mem_req->ready = true;
|
||||
|
||||
// send dram request
|
||||
dram_queue_.at(n).push(mem_req);
|
||||
}
|
||||
} else {
|
||||
// process reads
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = device_->mem_req_tag[n];
|
||||
mem_req->addr = byte_addr;
|
||||
mem_req->write = false;
|
||||
mem_req->ready = false;
|
||||
ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE);
|
||||
pending_mem_reqs_.at(n).emplace_back(mem_req);
|
||||
|
||||
//printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
|
||||
|
||||
// send dram request
|
||||
dram_queue_.at(n).push(mem_req);
|
||||
}
|
||||
}
|
||||
|
||||
device_->mem_req_ready[n] = running_;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void dcr_bus_reset() {
|
||||
device_->dcr_wr_valid = 0;
|
||||
}
|
||||
|
||||
void dcr_bus_eval(bool clk) {
|
||||
if (!clk) {
|
||||
return;
|
||||
}
|
||||
if (device_->dcr_wr_valid) {
|
||||
device_->dcr_wr_valid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void wait(uint32_t cycles) {
|
||||
for (int i = 0; i < cycles; ++i) {
|
||||
this->tick();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typedef struct {
|
||||
Device* device;
|
||||
std::array<uint8_t, MEM_BLOCK_SIZE> block;
|
||||
uint64_t addr;
|
||||
uint64_t tag;
|
||||
bool write;
|
||||
bool ready;
|
||||
} mem_req_t;
|
||||
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
|
||||
std::vector<std::list<mem_req_t*>> pending_mem_reqs_;
|
||||
|
||||
std::vector<std::queue<mem_req_t*>> dram_queue_;
|
||||
|
||||
DramSim dram_sim_;
|
||||
|
||||
Device* device_;
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC *tfp_;
|
||||
#endif
|
||||
|
||||
RAM* ram_;
|
||||
|
||||
std::vector<bool> mem_rd_rsp_active_;
|
||||
std::vector<bool> mem_rd_rsp_ready_;
|
||||
|
||||
std::vector<bool> mem_wr_rsp_active_;
|
||||
std::vector<bool> mem_wr_rsp_ready_;
|
||||
|
||||
bool running_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Processor::Processor()
|
||||
: impl_(new Impl())
|
||||
{}
|
||||
|
||||
Processor::~Processor() {
|
||||
delete impl_;
|
||||
}
|
||||
|
||||
void Processor::attach_ram(RAM* mem) {
|
||||
impl_->attach_ram(mem);
|
||||
}
|
||||
|
||||
void Processor::run() {
|
||||
impl_->run();
|
||||
}
|
||||
|
||||
void Processor::dcr_write(uint32_t addr, uint32_t value) {
|
||||
return impl_->dcr_write(addr, value);
|
||||
}
|
2
third_party/softfloat
vendored
2
third_party/softfloat
vendored
|
@ -1 +1 @@
|
|||
Subproject commit b51ef8f3201669b2288104c28546fc72532a1ea4
|
||||
Subproject commit 3b70b5d8147675932c38b36cd09af6df4eedd919
|
Loading…
Add table
Add a link
Reference in a new issue