core memory unit refactoring

This commit is contained in:
Blaise Tine 2024-08-15 21:12:28 -07:00
parent 65bd9afabb
commit f4983cb380
4 changed files with 356 additions and 318 deletions

View file

@ -202,132 +202,20 @@ module VX_core import VX_gpu_pkg::*; #(
.commit_sched_if(commit_sched_if)
);
VX_lsu_mem_if #(
.NUM_LANES (`NUM_LSU_LANES),
.DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH)
) lsu_dcache_if[`NUM_LSU_BLOCKS]();
`ifdef LMEM_ENABLE
`RESET_RELAY (lmem_unit_reset, reset);
VX_lmem_unit #(
VX_mem_unit #(
.INSTANCE_ID (INSTANCE_ID)
) lmem_unit (
.clk (clk),
.reset (lmem_unit_reset),
) mem_unit (
.clk (clk),
.reset (lmem_unit_reset),
`ifdef PERF_ENABLE
.cache_perf (mem_perf_tmp_if.lmem),
.cache_perf (mem_perf_tmp_if.lmem),
`endif
.lsu_mem_in_if (lsu_mem_if),
.lsu_mem_out_if (lsu_dcache_if)
.lsu_mem_in_if (lsu_mem_if),
.dcache_bus_if (dcache_bus_if)
);
`else
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
`ASSIGN_VX_LSU_MEM_IF (lsu_dcache_if[i], lsu_mem_if[i]);
end
`endif
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescer_blocks
VX_lsu_mem_if #(
.NUM_LANES (DCACHE_CHANNELS),
.DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_TAG_WIDTH)
) dcache_coalesced_if();
if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if
`RESET_RELAY (mem_coalescer_reset, reset);
VX_mem_coalescer #(
.INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)),
.NUM_REQS (`NUM_LSU_LANES),
.DATA_IN_SIZE (LSU_WORD_SIZE),
.DATA_OUT_SIZE (DCACHE_WORD_SIZE),
.ADDR_WIDTH (LSU_ADDR_WIDTH),
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
.TAG_WIDTH (LSU_TAG_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.QUEUE_SIZE (`LSUQ_OUT_SIZE)
) mem_coalescer (
.clk (clk),
.reset (mem_coalescer_reset),
// Input request
.in_req_valid (lsu_dcache_if[i].req_valid),
.in_req_mask (lsu_dcache_if[i].req_data.mask),
.in_req_rw (lsu_dcache_if[i].req_data.rw),
.in_req_byteen (lsu_dcache_if[i].req_data.byteen),
.in_req_addr (lsu_dcache_if[i].req_data.addr),
.in_req_flags (lsu_dcache_if[i].req_data.flags),
.in_req_data (lsu_dcache_if[i].req_data.data),
.in_req_tag (lsu_dcache_if[i].req_data.tag),
.in_req_ready (lsu_dcache_if[i].req_ready),
// Input response
.in_rsp_valid (lsu_dcache_if[i].rsp_valid),
.in_rsp_mask (lsu_dcache_if[i].rsp_data.mask),
.in_rsp_data (lsu_dcache_if[i].rsp_data.data),
.in_rsp_tag (lsu_dcache_if[i].rsp_data.tag),
.in_rsp_ready (lsu_dcache_if[i].rsp_ready),
// Output request
.out_req_valid (dcache_coalesced_if.req_valid),
.out_req_mask (dcache_coalesced_if.req_data.mask),
.out_req_rw (dcache_coalesced_if.req_data.rw),
.out_req_byteen (dcache_coalesced_if.req_data.byteen),
.out_req_addr (dcache_coalesced_if.req_data.addr),
.out_req_flags (dcache_coalesced_if.req_data.flags),
.out_req_data (dcache_coalesced_if.req_data.data),
.out_req_tag (dcache_coalesced_if.req_data.tag),
.out_req_ready (dcache_coalesced_if.req_ready),
// Output response
.out_rsp_valid (dcache_coalesced_if.rsp_valid),
.out_rsp_mask (dcache_coalesced_if.rsp_data.mask),
.out_rsp_data (dcache_coalesced_if.rsp_data.data),
.out_rsp_tag (dcache_coalesced_if.rsp_data.tag),
.out_rsp_ready (dcache_coalesced_if.rsp_ready)
);
end else begin
`ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if, lsu_dcache_if[i]);
end
VX_mem_bus_if #(
.DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_TAG_WIDTH)
) dcache_bus_tmp_if[DCACHE_CHANNELS]();
VX_lsu_adapter #(
.NUM_LANES (DCACHE_CHANNELS),
.DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_TAG_WIDTH),
.TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH),
.ARBITER ("P"),
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (0)
) lsu_adapter (
.clk (clk),
.reset (reset),
.lsu_mem_if (dcache_coalesced_if),
.mem_bus_if (dcache_bus_tmp_if)
);
for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]);
end
end
`ifdef PERF_ENABLE
wire [`CLOG2(LSU_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle;

View file

@ -1,199 +0,0 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_lmem_unit import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
output cache_perf_t cache_perf,
`endif
VX_lsu_mem_if.slave lsu_mem_in_if [`NUM_LSU_BLOCKS],
VX_lsu_mem_if.master lsu_mem_out_if [`NUM_LSU_BLOCKS]
);
`STATIC_ASSERT(`IS_DIVISBLE((1 << `LMEM_LOG_SIZE), `MEM_BLOCK_SIZE), ("invalid parameter"))
`STATIC_ASSERT(0 == (`LMEM_BASE_ADDR % (1 << `LMEM_LOG_SIZE)), ("invalid parameter"))
localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH;
localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH;
localparam LMEM_ADDR_WIDTH = `LMEM_LOG_SIZE - `CLOG2(LSU_WORD_SIZE);
VX_lsu_mem_if #(
.NUM_LANES (`NUM_LSU_LANES),
.DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH)
) lsu_lmem_if[`NUM_LSU_BLOCKS]();
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : demux_slices
wire [`NUM_LSU_LANES-1:0] is_addr_local_mask;
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.flags[j][`MEM_REQ_FLAG_LOCAL];
end
wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask);
wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask);
wire req_global_ready;
wire req_local_ready;
VX_elastic_buffer #(
.DATAW (REQ_DATAW),
.SIZE (2),
.OUT_REG (3)
) req_global_buf (
.clk (clk),
.reset (reset),
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_global),
.data_in ({
lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask,
lsu_mem_in_if[i].req_data.rw,
lsu_mem_in_if[i].req_data.addr,
lsu_mem_in_if[i].req_data.data,
lsu_mem_in_if[i].req_data.byteen,
lsu_mem_in_if[i].req_data.flags,
lsu_mem_in_if[i].req_data.tag
}),
.ready_in (req_global_ready),
.valid_out (lsu_mem_out_if[i].req_valid),
.data_out ({
lsu_mem_out_if[i].req_data.mask,
lsu_mem_out_if[i].req_data.rw,
lsu_mem_out_if[i].req_data.addr,
lsu_mem_out_if[i].req_data.data,
lsu_mem_out_if[i].req_data.byteen,
lsu_mem_out_if[i].req_data.flags,
lsu_mem_out_if[i].req_data.tag
}),
.ready_out (lsu_mem_out_if[i].req_ready)
);
VX_elastic_buffer #(
.DATAW (REQ_DATAW),
.SIZE (0),
.OUT_REG (0)
) req_local_buf (
.clk (clk),
.reset (reset),
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_local),
.data_in ({
lsu_mem_in_if[i].req_data.mask & is_addr_local_mask,
lsu_mem_in_if[i].req_data.rw,
lsu_mem_in_if[i].req_data.addr,
lsu_mem_in_if[i].req_data.data,
lsu_mem_in_if[i].req_data.byteen,
lsu_mem_in_if[i].req_data.flags,
lsu_mem_in_if[i].req_data.tag
}),
.ready_in (req_local_ready),
.valid_out (lsu_lmem_if[i].req_valid),
.data_out ({
lsu_lmem_if[i].req_data.mask,
lsu_lmem_if[i].req_data.rw,
lsu_lmem_if[i].req_data.addr,
lsu_lmem_if[i].req_data.data,
lsu_lmem_if[i].req_data.byteen,
lsu_lmem_if[i].req_data.flags,
lsu_lmem_if[i].req_data.tag
}),
.ready_out (lsu_lmem_if[i].req_ready)
);
assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global)
|| (req_local_ready && is_addr_local);
VX_stream_arb #(
.NUM_INPUTS (2),
.DATAW (RSP_DATAW),
.ARBITER ("R"),
.OUT_BUF (1)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in ({
lsu_lmem_if[i].rsp_valid,
lsu_mem_out_if[i].rsp_valid
}),
.ready_in ({
lsu_lmem_if[i].rsp_ready,
lsu_mem_out_if[i].rsp_ready
}),
.data_in ({
lsu_lmem_if[i].rsp_data,
lsu_mem_out_if[i].rsp_data
}),
.data_out (lsu_mem_in_if[i].rsp_data),
.valid_out (lsu_mem_in_if[i].rsp_valid),
.ready_out (lsu_mem_in_if[i].rsp_ready),
`UNUSED_PIN (sel_out)
);
end
VX_mem_bus_if #(
.DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH)
) lmem_bus_if[LSU_NUM_REQS]();
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : adapter_slices
VX_mem_bus_if #(
.DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH)
) lmem_bus_tmp_if[`NUM_LSU_LANES]();
VX_lsu_adapter #(
.NUM_LANES (`NUM_LSU_LANES),
.DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH),
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
.ARBITER ("P"),
.REQ_OUT_BUF (3),
.RSP_OUT_BUF (0)
) lsu_adapter (
.clk (clk),
.reset (reset),
.lsu_mem_if (lsu_lmem_if[i]),
.mem_bus_if (lmem_bus_tmp_if)
);
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
`ASSIGN_VX_MEM_BUS_IF (lmem_bus_if[i * `NUM_LSU_LANES + j], lmem_bus_tmp_if[j]);
end
end
`RESET_RELAY (lmem_reset, reset);
VX_local_mem #(
.INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)),
.SIZE (1 << `LMEM_LOG_SIZE),
.NUM_REQS (LSU_NUM_REQS),
.NUM_BANKS (`LMEM_NUM_BANKS),
.WORD_SIZE (LSU_WORD_SIZE),
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.TAG_WIDTH (LSU_TAG_WIDTH),
.OUT_BUF (3)
) local_mem (
.clk (clk),
.reset (lmem_reset),
`ifdef PERF_ENABLE
.cache_perf (cache_perf),
`endif
.mem_bus_if (lmem_bus_if)
);
endmodule

217
hw/rtl/core/VX_mem_unit.sv Normal file
View file

@ -0,0 +1,217 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_mem_unit import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
output cache_perf_t cache_perf,
`endif
VX_lsu_mem_if.slave lsu_mem_in_if [`NUM_LSU_BLOCKS],
VX_mem_bus_if.master dcache_bus_if [DCACHE_NUM_REQS]
);
VX_lsu_mem_if #(
.NUM_LANES (`NUM_LSU_LANES),
.DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH)
) lsu_dcache_if[`NUM_LSU_BLOCKS]();
`ifdef LMEM_ENABLE
`STATIC_ASSERT(`IS_DIVISBLE((1 << `LMEM_LOG_SIZE), `MEM_BLOCK_SIZE), ("invalid parameter"))
`STATIC_ASSERT(0 == (`LMEM_BASE_ADDR % (1 << `LMEM_LOG_SIZE)), ("invalid parameter"))
localparam LMEM_ADDR_WIDTH = `LMEM_LOG_SIZE - `CLOG2(LSU_WORD_SIZE);
VX_lsu_mem_if #(
.NUM_LANES (`NUM_LSU_LANES),
.DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH)
) lsu_lmem_if[`NUM_LSU_BLOCKS]();
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : demux_slices
VX_lmem_demux #(
.REQ0_OUT_BUF (3),
.REQ1_OUT_BUF (0),
.RSP_OUT_BUF (1)
) lmem_demux (
.clk (clk),
.reset (reset),
.lsu_in_if (lsu_mem_in_if[i]),
.cache_out_if (lsu_dcache_if[i]),
.lmem_out_if (lsu_lmem_if[i])
);
end
VX_mem_bus_if #(
.DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH)
) lmem_bus_if[LSU_NUM_REQS]();
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : adapter_slices
VX_mem_bus_if #(
.DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH)
) lmem_bus_tmp_if[`NUM_LSU_LANES]();
VX_lsu_adapter #(
.NUM_LANES (`NUM_LSU_LANES),
.DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH),
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
.ARBITER ("P"),
.REQ_OUT_BUF (3),
.RSP_OUT_BUF (0)
) lmem_adapter (
.clk (clk),
.reset (reset),
.lsu_mem_if (lsu_lmem_if[i]),
.mem_bus_if (lmem_bus_tmp_if)
);
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
`ASSIGN_VX_MEM_BUS_IF (lmem_bus_if[i * `NUM_LSU_LANES + j], lmem_bus_tmp_if[j]);
end
end
`RESET_RELAY (lmem_reset, reset);
VX_local_mem #(
.INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)),
.SIZE (1 << `LMEM_LOG_SIZE),
.NUM_REQS (LSU_NUM_REQS),
.NUM_BANKS (`LMEM_NUM_BANKS),
.WORD_SIZE (LSU_WORD_SIZE),
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.TAG_WIDTH (LSU_TAG_WIDTH),
.OUT_BUF (3)
) local_mem (
.clk (clk),
.reset (lmem_reset),
`ifdef PERF_ENABLE
.cache_perf (cache_perf),
`endif
.mem_bus_if (lmem_bus_if)
);
`else
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
`ASSIGN_VX_LSU_MEM_IF (lsu_dcache_if[i], lsu_mem_if[i]);
end
`endif
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : coalescer_blocks
VX_lsu_mem_if #(
.NUM_LANES (DCACHE_CHANNELS),
.DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_TAG_WIDTH)
) dcache_coalesced_if();
if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : coalescer_if
`RESET_RELAY (mem_coalescer_reset, reset);
VX_mem_coalescer #(
.INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)),
.NUM_REQS (`NUM_LSU_LANES),
.DATA_IN_SIZE (LSU_WORD_SIZE),
.DATA_OUT_SIZE (DCACHE_WORD_SIZE),
.ADDR_WIDTH (LSU_ADDR_WIDTH),
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
.TAG_WIDTH (LSU_TAG_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.QUEUE_SIZE (`LSUQ_OUT_SIZE)
) mem_coalescer (
.clk (clk),
.reset (mem_coalescer_reset),
// Input request
.in_req_valid (lsu_dcache_if[i].req_valid),
.in_req_mask (lsu_dcache_if[i].req_data.mask),
.in_req_rw (lsu_dcache_if[i].req_data.rw),
.in_req_byteen (lsu_dcache_if[i].req_data.byteen),
.in_req_addr (lsu_dcache_if[i].req_data.addr),
.in_req_flags (lsu_dcache_if[i].req_data.flags),
.in_req_data (lsu_dcache_if[i].req_data.data),
.in_req_tag (lsu_dcache_if[i].req_data.tag),
.in_req_ready (lsu_dcache_if[i].req_ready),
// Input response
.in_rsp_valid (lsu_dcache_if[i].rsp_valid),
.in_rsp_mask (lsu_dcache_if[i].rsp_data.mask),
.in_rsp_data (lsu_dcache_if[i].rsp_data.data),
.in_rsp_tag (lsu_dcache_if[i].rsp_data.tag),
.in_rsp_ready (lsu_dcache_if[i].rsp_ready),
// Output request
.out_req_valid (dcache_coalesced_if.req_valid),
.out_req_mask (dcache_coalesced_if.req_data.mask),
.out_req_rw (dcache_coalesced_if.req_data.rw),
.out_req_byteen (dcache_coalesced_if.req_data.byteen),
.out_req_addr (dcache_coalesced_if.req_data.addr),
.out_req_flags (dcache_coalesced_if.req_data.flags),
.out_req_data (dcache_coalesced_if.req_data.data),
.out_req_tag (dcache_coalesced_if.req_data.tag),
.out_req_ready (dcache_coalesced_if.req_ready),
// Output response
.out_rsp_valid (dcache_coalesced_if.rsp_valid),
.out_rsp_mask (dcache_coalesced_if.rsp_data.mask),
.out_rsp_data (dcache_coalesced_if.rsp_data.data),
.out_rsp_tag (dcache_coalesced_if.rsp_data.tag),
.out_rsp_ready (dcache_coalesced_if.rsp_ready)
);
end else begin
`ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if, lsu_dcache_if[i]);
end
VX_mem_bus_if #(
.DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_TAG_WIDTH)
) dcache_bus_tmp_if[DCACHE_CHANNELS]();
VX_lsu_adapter #(
.NUM_LANES (DCACHE_CHANNELS),
.DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_TAG_WIDTH),
.TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH),
.ARBITER ("P"),
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (0)
) lsu_adapter (
.clk (clk),
.reset (reset),
.lsu_mem_if (dcache_coalesced_if),
.mem_bus_if (dcache_bus_tmp_if)
);
for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin
`ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]);
end
end
endmodule

132
hw/rtl/mem/VX_lmem_demux.sv Normal file
View file

@ -0,0 +1,132 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_lmem_demux import VX_gpu_pkg::*; #(
parameter REQ0_OUT_BUF = 0,
parameter REQ1_OUT_BUF = 0,
parameter RSP_OUT_BUF = 0
) (
input wire clk,
input wire reset,
VX_lsu_mem_if.slave lsu_in_if,
VX_lsu_mem_if.master cache_out_if,
VX_lsu_mem_if.master lmem_out_if
);
localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH;
localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH;
wire [`NUM_LSU_LANES-1:0] is_addr_local_mask;
for (genvar i = 0; i < `NUM_LSU_LANES; ++i) begin
assign is_addr_local_mask[i] = lsu_in_if.req_data.flags[i][`MEM_REQ_FLAG_LOCAL];
end
wire is_addr_global = | (lsu_in_if.req_data.mask & ~is_addr_local_mask);
wire is_addr_local = | (lsu_in_if.req_data.mask & is_addr_local_mask);
wire req_global_ready;
wire req_local_ready;
VX_elastic_buffer #(
.DATAW (REQ_DATAW),
.SIZE (2),
.OUT_REG (REQ0_OUT_BUF)
) req_global_buf (
.clk (clk),
.reset (reset),
.valid_in (lsu_in_if.req_valid && is_addr_global),
.data_in ({
lsu_in_if.req_data.mask & ~is_addr_local_mask,
lsu_in_if.req_data.rw,
lsu_in_if.req_data.addr,
lsu_in_if.req_data.data,
lsu_in_if.req_data.byteen,
lsu_in_if.req_data.flags,
lsu_in_if.req_data.tag
}),
.ready_in (req_global_ready),
.valid_out (cache_out_if.req_valid),
.data_out ({
cache_out_if.req_data.mask,
cache_out_if.req_data.rw,
cache_out_if.req_data.addr,
cache_out_if.req_data.data,
cache_out_if.req_data.byteen,
cache_out_if.req_data.flags,
cache_out_if.req_data.tag
}),
.ready_out (cache_out_if.req_ready)
);
VX_elastic_buffer #(
.DATAW (REQ_DATAW),
.SIZE (0),
.OUT_REG (REQ1_OUT_BUF)
) req_local_buf (
.clk (clk),
.reset (reset),
.valid_in (lsu_in_if.req_valid && is_addr_local),
.data_in ({
lsu_in_if.req_data.mask & is_addr_local_mask,
lsu_in_if.req_data.rw,
lsu_in_if.req_data.addr,
lsu_in_if.req_data.data,
lsu_in_if.req_data.byteen,
lsu_in_if.req_data.flags,
lsu_in_if.req_data.tag
}),
.ready_in (req_local_ready),
.valid_out (lmem_out_if.req_valid),
.data_out ({
lmem_out_if.req_data.mask,
lmem_out_if.req_data.rw,
lmem_out_if.req_data.addr,
lmem_out_if.req_data.data,
lmem_out_if.req_data.byteen,
lmem_out_if.req_data.flags,
lmem_out_if.req_data.tag
}),
.ready_out (lmem_out_if.req_ready)
);
assign lsu_in_if.req_ready = (req_global_ready && is_addr_global)
|| (req_local_ready && is_addr_local);
VX_stream_arb #(
.NUM_INPUTS (2),
.DATAW (RSP_DATAW),
.ARBITER ("R"),
.OUT_BUF (RSP_OUT_BUF)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in ({
lmem_out_if.rsp_valid,
cache_out_if.rsp_valid
}),
.ready_in ({
lmem_out_if.rsp_ready,
cache_out_if.rsp_ready
}),
.data_in ({
lmem_out_if.rsp_data,
cache_out_if.rsp_data
}),
.data_out (lsu_in_if.rsp_data),
.valid_out (lsu_in_if.rsp_valid),
.ready_out (lsu_in_if.rsp_ready),
`UNUSED_PIN (sel_out)
);
endmodule