texture lerp DSP optimization + axi adapter multibank support

This commit is contained in:
Blaise Tine 2023-03-23 00:27:05 -04:00
parent fbddcf34ab
commit 57081f8440
9 changed files with 320 additions and 233 deletions

View file

@ -565,7 +565,7 @@ VX_avs_adapter #(
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
.BURST_WIDTH (LMEM_BURST_CTRW),
.NUM_BANKS (NUM_LOCAL_MEM_BANKS),
.REQ_TAG_WIDTH (AVS_REQ_TAGW + 1),
.TAG_WIDTH (AVS_REQ_TAGW + 1),
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE),
.BUFFERED_REQ (2),
.BUFFERED_RSP (0)

View file

@ -33,7 +33,7 @@ module vortex_afu #(
parameter C_S_AXI_CTRL_ADDR_WIDTH = 6,
parameter C_S_AXI_CTRL_DATA_WIDTH = 32,
`ifdef NDEBUG
parameter C_M_AXI_MEM_ID_WIDTH = 16,
parameter C_M_AXI_MEM_ID_WIDTH = 20,
`else
parameter C_M_AXI_MEM_ID_WIDTH = 32,
`endif

View file

@ -8,60 +8,61 @@ import VX_gpu_types::*;
module Vortex_axi #(
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
parameter AXI_ADDR_WIDTH = `XLEN,
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH,
parameter AXI_NUM_BANKS = 1
)(
// Clock
input wire clk,
input wire reset,
// AXI write request address channel
output wire m_axi_awvalid,
input wire m_axi_awready,
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr,
output wire [AXI_TID_WIDTH-1:0] m_axi_awid,
output wire [7:0] m_axi_awlen,
output wire [2:0] m_axi_awsize,
output wire [1:0] m_axi_awburst,
output wire [1:0] m_axi_awlock,
output wire [3:0] m_axi_awcache,
output wire [2:0] m_axi_awprot,
output wire [3:0] m_axi_awqos,
output wire [3:0] m_axi_awregion,
output wire m_axi_awvalid [AXI_NUM_BANKS],
input wire m_axi_awready [AXI_NUM_BANKS],
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr [AXI_NUM_BANKS],
output wire [AXI_TID_WIDTH-1:0] m_axi_awid [AXI_NUM_BANKS],
output wire [7:0] m_axi_awlen [AXI_NUM_BANKS],
output wire [2:0] m_axi_awsize [AXI_NUM_BANKS],
output wire [1:0] m_axi_awburst [AXI_NUM_BANKS],
output wire [1:0] m_axi_awlock [AXI_NUM_BANKS],
output wire [3:0] m_axi_awcache [AXI_NUM_BANKS],
output wire [2:0] m_axi_awprot [AXI_NUM_BANKS],
output wire [3:0] m_axi_awqos [AXI_NUM_BANKS],
output wire [3:0] m_axi_awregion [AXI_NUM_BANKS],
// AXI write request data channel
output wire m_axi_wvalid,
input wire m_axi_wready,
output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata,
output wire [AXI_DATA_WIDTH/8-1:0] m_axi_wstrb,
output wire m_axi_wlast,
output wire m_axi_wvalid [AXI_NUM_BANKS],
input wire m_axi_wready [AXI_NUM_BANKS],
output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata [AXI_NUM_BANKS],
output wire [AXI_DATA_WIDTH/8-1:0] m_axi_wstrb [AXI_NUM_BANKS],
output wire m_axi_wlast [AXI_NUM_BANKS],
// AXI write response channel
input wire m_axi_bvalid,
output wire m_axi_bready,
input wire [AXI_TID_WIDTH-1:0] m_axi_bid,
input wire [1:0] m_axi_bresp,
input wire m_axi_bvalid [AXI_NUM_BANKS],
output wire m_axi_bready [AXI_NUM_BANKS],
input wire [AXI_TID_WIDTH-1:0] m_axi_bid [AXI_NUM_BANKS],
input wire [1:0] m_axi_bresp [AXI_NUM_BANKS],
// AXI read request channel
output wire m_axi_arvalid,
input wire m_axi_arready,
output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr,
output wire [AXI_TID_WIDTH-1:0] m_axi_arid,
output wire [7:0] m_axi_arlen,
output wire [2:0] m_axi_arsize,
output wire [1:0] m_axi_arburst,
output wire [1:0] m_axi_arlock,
output wire [3:0] m_axi_arcache,
output wire [2:0] m_axi_arprot,
output wire [3:0] m_axi_arqos,
output wire [3:0] m_axi_arregion,
output wire m_axi_arvalid [AXI_NUM_BANKS],
input wire m_axi_arready [AXI_NUM_BANKS],
output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr [AXI_NUM_BANKS],
output wire [AXI_TID_WIDTH-1:0] m_axi_arid [AXI_NUM_BANKS],
output wire [7:0] m_axi_arlen [AXI_NUM_BANKS],
output wire [2:0] m_axi_arsize [AXI_NUM_BANKS],
output wire [1:0] m_axi_arburst [AXI_NUM_BANKS],
output wire [1:0] m_axi_arlock [AXI_NUM_BANKS],
output wire [3:0] m_axi_arcache [AXI_NUM_BANKS],
output wire [2:0] m_axi_arprot [AXI_NUM_BANKS],
output wire [3:0] m_axi_arqos [AXI_NUM_BANKS],
output wire [3:0] m_axi_arregion [AXI_NUM_BANKS],
// AXI read response channel
input wire m_axi_rvalid,
output wire m_axi_rready,
input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata,
input wire m_axi_rlast,
input wire [AXI_TID_WIDTH-1:0] m_axi_rid,
input wire [1:0] m_axi_rresp,
input wire m_axi_rvalid [AXI_NUM_BANKS],
output wire m_axi_rready [AXI_NUM_BANKS],
input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata [AXI_NUM_BANKS],
input wire m_axi_rlast [AXI_NUM_BANKS],
input wire [AXI_TID_WIDTH-1:0] m_axi_rid [AXI_NUM_BANKS],
input wire [1:0] m_axi_rresp [AXI_NUM_BANKS],
// DCR write request
input wire dcr_wr_valid,
@ -87,29 +88,33 @@ module Vortex_axi #(
wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data;
wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag;
wire mem_rsp_ready;
wire [`XLEN-1:0] m_axi_awaddr_unqual;
wire [`XLEN-1:0] m_axi_araddr_unqual;
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_awid_unqual;
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_arid_unqual;
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_bid_unqual;
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_rid_unqual;
wire [`XLEN-1:0] m_axi_awaddr_unqual [AXI_NUM_BANKS];
wire [`XLEN-1:0] m_axi_araddr_unqual [AXI_NUM_BANKS];
assign m_axi_awaddr = `XLEN'(m_axi_awaddr_unqual);
assign m_axi_araddr = `XLEN'(m_axi_araddr_unqual);
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_awid_unqual [AXI_NUM_BANKS];
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_arid_unqual [AXI_NUM_BANKS];
assign m_axi_awid = AXI_TID_WIDTH'(m_axi_awid_unqual);
assign m_axi_arid = AXI_TID_WIDTH'(m_axi_arid_unqual);
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_bid_unqual [AXI_NUM_BANKS];
wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_rid_unqual [AXI_NUM_BANKS];
assign m_axi_rid_unqual = `VX_MEM_TAG_WIDTH'(m_axi_rid);
assign m_axi_bid_unqual = `VX_MEM_TAG_WIDTH'(m_axi_bid);
for (genvar i = 0; i < AXI_NUM_BANKS; ++i) begin
assign m_axi_awaddr[i] = `XLEN'(m_axi_awaddr_unqual[i]);
assign m_axi_araddr[i] = `XLEN'(m_axi_araddr_unqual[i]);
assign m_axi_awid[i] = AXI_TID_WIDTH'(m_axi_awid_unqual[i]);
assign m_axi_arid[i] = AXI_TID_WIDTH'(m_axi_arid_unqual[i]);
assign m_axi_rid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_rid[i]);
assign m_axi_bid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_bid[i]);
end
VX_axi_adapter #(
.DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.ADDR_WIDTH (`XLEN),
.TAG_WIDTH (`VX_MEM_TAG_WIDTH)
.TAG_WIDTH (`VX_MEM_TAG_WIDTH),
.NUM_BANKS (AXI_NUM_BANKS),
.BUFFERED_RSP ((AXI_NUM_BANKS > 1) ? 1 : 0)
) axi_adapter (
.clk (clk),
.reset (reset),
@ -196,4 +201,4 @@ module Vortex_axi #(
.busy (busy)
);
endmodule
endmodule

View file

@ -6,7 +6,7 @@ module VX_avs_adapter #(
parameter ADDR_WIDTH = 1,
parameter BURST_WIDTH = 1,
parameter NUM_BANKS = 1,
parameter REQ_TAG_WIDTH = 1,
parameter TAG_WIDTH = 1,
parameter RD_QUEUE_SIZE = 1,
parameter BUFFERED_REQ = 0,
parameter BUFFERED_RSP = 0
@ -20,13 +20,13 @@ module VX_avs_adapter #(
input wire [DATA_WIDTH/8-1:0] mem_req_byteen,
input wire [ADDR_WIDTH-1:0] mem_req_addr,
input wire [DATA_WIDTH-1:0] mem_req_data,
input wire [REQ_TAG_WIDTH-1:0] mem_req_tag,
input wire [TAG_WIDTH-1:0] mem_req_tag,
output wire mem_req_ready,
// Memory response
output wire mem_rsp_valid,
output wire [DATA_WIDTH-1:0] mem_rsp_data,
output wire [REQ_TAG_WIDTH-1:0] mem_rsp_tag,
output wire [TAG_WIDTH-1:0] mem_rsp_tag,
input wire mem_rsp_ready,
// AVS bus
@ -47,7 +47,7 @@ module VX_avs_adapter #(
// Requests handling //////////////////////////////////////////////////////
wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop;
wire [NUM_BANKS-1:0][REQ_TAG_WIDTH-1:0] req_queue_tag_out;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out;
wire [NUM_BANKS-1:0] req_queue_going_full;
wire [NUM_BANKS-1:0][RD_QUEUE_ADDR_WIDTH-1:0] req_queue_size;
wire [BANK_ADDRW-1:0] req_bank_sel;
@ -78,7 +78,7 @@ module VX_avs_adapter #(
`UNUSED_VAR (req_queue_size)
VX_fifo_queue #(
.DATAW (REQ_TAG_WIDTH),
.DATAW (TAG_WIDTH),
.DEPTH (RD_QUEUE_SIZE)
) rd_req_queue (
.clk (clk),
@ -141,7 +141,7 @@ module VX_avs_adapter #(
// Responses handling /////////////////////////////////////////////////////
wire [NUM_BANKS-1:0] rsp_arb_valid_in;
wire [NUM_BANKS-1:0][DATA_WIDTH+REQ_TAG_WIDTH-1:0] rsp_arb_data_in;
wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH-1:0] rsp_arb_data_in;
wire [NUM_BANKS-1:0] rsp_arb_ready_in;
wire [NUM_BANKS-1:0][DATA_WIDTH-1:0] rsp_queue_data_out;
@ -174,7 +174,7 @@ module VX_avs_adapter #(
VX_stream_arb #(
.NUM_INPUTS (NUM_BANKS),
.DATAW (DATA_WIDTH + REQ_TAG_WIDTH),
.DATAW (DATA_WIDTH + TAG_WIDTH),
.ARBITER ("R"),
.BUFFERED (BUFFERED_RSP)
) rsp_arb (

View file

@ -5,7 +5,9 @@ module VX_axi_adapter #(
parameter DATA_WIDTH = 512,
parameter ADDR_WIDTH = 32,
parameter TAG_WIDTH = 8,
parameter AVS_ADDR_WIDTH = (ADDR_WIDTH - $clog2(DATA_WIDTH/8))
parameter NUM_BANKS = 1,
parameter AVS_ADDR_WIDTH = (ADDR_WIDTH - $clog2(DATA_WIDTH/8)),
parameter BUFFERED_RSP = 0
) (
input wire clk,
input wire reset,
@ -26,131 +28,183 @@ module VX_axi_adapter #(
output wire mem_req_ready,
// AXI write request address channel
output wire m_axi_awvalid,
input wire m_axi_awready,
output wire [ADDR_WIDTH-1:0] m_axi_awaddr,
output wire [TAG_WIDTH-1:0] m_axi_awid,
output wire [7:0] m_axi_awlen,
output wire [2:0] m_axi_awsize,
output wire [1:0] m_axi_awburst,
output wire [1:0] m_axi_awlock,
output wire [3:0] m_axi_awcache,
output wire [2:0] m_axi_awprot,
output wire [3:0] m_axi_awqos,
output wire [3:0] m_axi_awregion,
output wire m_axi_awvalid [NUM_BANKS],
input wire m_axi_awready [NUM_BANKS],
output wire [ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS],
output wire [TAG_WIDTH-1:0] m_axi_awid [NUM_BANKS],
output wire [7:0] m_axi_awlen [NUM_BANKS],
output wire [2:0] m_axi_awsize [NUM_BANKS],
output wire [1:0] m_axi_awburst [NUM_BANKS],
output wire [1:0] m_axi_awlock [NUM_BANKS],
output wire [3:0] m_axi_awcache [NUM_BANKS],
output wire [2:0] m_axi_awprot [NUM_BANKS],
output wire [3:0] m_axi_awqos [NUM_BANKS],
output wire [3:0] m_axi_awregion [NUM_BANKS],
// AXI write request data channel
output wire m_axi_wvalid,
input wire m_axi_wready,
output wire [DATA_WIDTH-1:0] m_axi_wdata,
output wire [DATA_WIDTH/8-1:0] m_axi_wstrb,
output wire m_axi_wlast,
output wire m_axi_wvalid [NUM_BANKS],
input wire m_axi_wready [NUM_BANKS],
output wire [DATA_WIDTH-1:0] m_axi_wdata [NUM_BANKS],
output wire [DATA_WIDTH/8-1:0] m_axi_wstrb [NUM_BANKS],
output wire m_axi_wlast [NUM_BANKS],
// AXI write response channel
input wire m_axi_bvalid,
output wire m_axi_bready,
input wire [TAG_WIDTH-1:0] m_axi_bid,
input wire [1:0] m_axi_bresp,
input wire m_axi_bvalid [NUM_BANKS],
output wire m_axi_bready [NUM_BANKS],
input wire [TAG_WIDTH-1:0] m_axi_bid [NUM_BANKS],
input wire [1:0] m_axi_bresp [NUM_BANKS],
// AXI read address channel
output wire m_axi_arvalid,
input wire m_axi_arready,
output wire [ADDR_WIDTH-1:0] m_axi_araddr,
output wire [TAG_WIDTH-1:0] m_axi_arid,
output wire [7:0] m_axi_arlen,
output wire [2:0] m_axi_arsize,
output wire [1:0] m_axi_arburst,
output wire [1:0] m_axi_arlock,
output wire [3:0] m_axi_arcache,
output wire [2:0] m_axi_arprot,
output wire [3:0] m_axi_arqos,
output wire [3:0] m_axi_arregion,
output wire m_axi_arvalid [NUM_BANKS],
input wire m_axi_arready [NUM_BANKS],
output wire [ADDR_WIDTH-1:0] m_axi_araddr [NUM_BANKS],
output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS],
output wire [7:0] m_axi_arlen [NUM_BANKS],
output wire [2:0] m_axi_arsize [NUM_BANKS],
output wire [1:0] m_axi_arburst [NUM_BANKS],
output wire [1:0] m_axi_arlock [NUM_BANKS],
output wire [3:0] m_axi_arcache [NUM_BANKS],
output wire [2:0] m_axi_arprot [NUM_BANKS],
output wire [3:0] m_axi_arqos [NUM_BANKS],
output wire [3:0] m_axi_arregion [NUM_BANKS],
// AXI read response channel
input wire m_axi_rvalid,
output wire m_axi_rready,
input wire [DATA_WIDTH-1:0] m_axi_rdata,
input wire m_axi_rlast,
input wire [TAG_WIDTH-1:0] m_axi_rid,
input wire [1:0] m_axi_rresp
input wire m_axi_rvalid [NUM_BANKS],
output wire m_axi_rready [NUM_BANKS],
input wire [DATA_WIDTH-1:0] m_axi_rdata [NUM_BANKS],
input wire m_axi_rlast [NUM_BANKS],
input wire [TAG_WIDTH-1:0] m_axi_rid [NUM_BANKS],
input wire [1:0] m_axi_rresp [NUM_BANKS]
);
localparam AXSIZE = $clog2(DATA_WIDTH/8);
localparam AXSIZE = $clog2(DATA_WIDTH/8);
localparam BANK_ADDRW = `LOG2UP(NUM_BANKS);
wire mem_req_fire = mem_req_valid && mem_req_ready;
wire m_axi_aw_fire = m_axi_awvalid && m_axi_awready;
wire m_axi_w_fire = m_axi_wvalid && m_axi_wready;
wire [BANK_ADDRW-1:0] req_bank_sel;
reg m_axi_aw_ack;
reg m_axi_w_ack;
always @(posedge clk) begin
if (reset) begin
m_axi_aw_ack <= 0;
m_axi_w_ack <= 0;
end else begin
if (mem_req_fire) begin
m_axi_aw_ack <= 0;
m_axi_w_ack <= 0;
end else begin
if (m_axi_aw_fire)
m_axi_aw_ack <= 1;
if (m_axi_w_fire)
m_axi_w_ack <= 1;
if (NUM_BANKS > 1) begin
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
end else begin
assign req_bank_sel = '0;
end
wire mem_req_fire = mem_req_valid && mem_req_ready;
reg m_axi_aw_ack [NUM_BANKS];
reg m_axi_w_ack [NUM_BANKS];
for (genvar i = 0; i < NUM_BANKS; ++i) begin
wire m_axi_aw_fire = m_axi_awvalid[i] && m_axi_awready[i];
wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i];
always @(posedge clk) begin
if (reset) begin
m_axi_aw_ack[i] <= 0;
m_axi_w_ack[i] <= 0;
end else begin
if (mem_req_fire && (req_bank_sel == i)) begin
m_axi_aw_ack[i] <= 0;
m_axi_w_ack[i] <= 0;
end else begin
if (m_axi_aw_fire)
m_axi_aw_ack[i] <= 1;
if (m_axi_w_fire)
m_axi_w_ack[i] <= 1;
end
end
end
end
end
end
// Vortex request ack
wire axi_write_ready = (m_axi_awready || m_axi_aw_ack) && (m_axi_wready || m_axi_w_ack);
assign mem_req_ready = mem_req_rw ? axi_write_ready : m_axi_arready;
wire axi_write_ready [NUM_BANKS];
// AXI write request address channel
assign m_axi_awvalid = mem_req_valid && mem_req_rw && ~m_axi_aw_ack;
assign m_axi_awaddr = ADDR_WIDTH'(mem_req_addr) << AXSIZE;
assign m_axi_awid = mem_req_tag;
assign m_axi_awlen = 8'b00000000;
assign m_axi_awsize = 3'(AXSIZE);
assign m_axi_awburst = 2'b00;
assign m_axi_awlock = 2'b00;
assign m_axi_awcache = 4'b0000;
assign m_axi_awprot = 3'b000;
assign m_axi_awqos = 4'b0000;
assign m_axi_awregion = 4'b0000;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i])
&& (m_axi_wready[i] || m_axi_w_ack[i]);
end
// AXI write request data channel
assign m_axi_wvalid = mem_req_valid && mem_req_rw && ~m_axi_w_ack;
assign m_axi_wdata = mem_req_data;
assign m_axi_wstrb = mem_req_byteen;
assign m_axi_wlast = 1'b1;
// Vortex request ack
if (NUM_BANKS > 1) begin
assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel];
end else begin
assign mem_req_ready = mem_req_rw ? axi_write_ready[0] : m_axi_arready[0];
end
// AXI write request address channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i];
assign m_axi_awaddr[i] = ADDR_WIDTH'(mem_req_addr) << AXSIZE;
assign m_axi_awid[i] = mem_req_tag;
assign m_axi_awlen[i] = 8'b00000000;
assign m_axi_awsize[i] = 3'(AXSIZE);
assign m_axi_awburst[i] = 2'b00;
assign m_axi_awlock[i] = 2'b00;
assign m_axi_awcache[i] = 4'b0000;
assign m_axi_awprot[i] = 3'b000;
assign m_axi_awqos[i] = 4'b0000;
assign m_axi_awregion[i]= 4'b0000;
end
// AXI write request data channel
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_w_ack[i];
assign m_axi_wdata[i] = mem_req_data;
assign m_axi_wstrb[i] = mem_req_byteen;
assign m_axi_wlast[i] = 1'b1;
end
// AXI write response channel (ignore)
`UNUSED_VAR (m_axi_bvalid)
`UNUSED_VAR (m_axi_bid)
`UNUSED_VAR (m_axi_bresp)
assign m_axi_bready = 1'b1;
`RUNTIME_ASSERT(~m_axi_bvalid || m_axi_bresp == 0, ("%t: *** AXI response error", $time));
for (genvar i = 0; i < NUM_BANKS; ++i) begin
`UNUSED_VAR (m_axi_bvalid[i])
`UNUSED_VAR (m_axi_bid[i])
`UNUSED_VAR (m_axi_bresp[i])
assign m_axi_bready[i] = 1'b1;
`RUNTIME_ASSERT(~m_axi_bvalid[i] || m_axi_bresp[i] == 0, ("%t: *** AXI response error", $time));
end
// AXI read request channel
assign m_axi_arvalid = mem_req_valid && ~mem_req_rw;
assign m_axi_araddr = ADDR_WIDTH'(mem_req_addr) << AXSIZE;
assign m_axi_arid = mem_req_tag;
assign m_axi_arlen = 8'b00000000;
assign m_axi_arsize = 3'(AXSIZE);
assign m_axi_arburst = 2'b00;
assign m_axi_arlock = 2'b00;
assign m_axi_arcache = 4'b0000;
assign m_axi_arprot = 3'b000;
assign m_axi_arqos = 4'b0000;
assign m_axi_arregion = 4'b0000;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i);
assign m_axi_araddr[i] = ADDR_WIDTH'(mem_req_addr) << AXSIZE;
assign m_axi_arid[i] = mem_req_tag;
assign m_axi_arlen[i] = 8'b00000000;
assign m_axi_arsize[i] = 3'(AXSIZE);
assign m_axi_arburst[i] = 2'b00;
assign m_axi_arlock[i] = 2'b00;
assign m_axi_arcache[i] = 4'b0000;
assign m_axi_arprot[i] = 3'b000;
assign m_axi_arqos[i] = 4'b0000;
assign m_axi_arregion[i]= 4'b0000;
end
// AXI read response channel
assign mem_rsp_valid = m_axi_rvalid;
assign mem_rsp_tag = m_axi_rid;
assign mem_rsp_data = m_axi_rdata;
`UNUSED_VAR (m_axi_rlast)
assign m_axi_rready = mem_rsp_ready;
`RUNTIME_ASSERT(~m_axi_rvalid || m_axi_rlast == 1, ("%t: *** AXI response error", $time));
`RUNTIME_ASSERT(~m_axi_rvalid || m_axi_rresp == 0, ("%t: *** AXI response error", $time));
// AXI read response channel
wire [NUM_BANKS-1:0] rsp_arb_valid_in;
wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH-1:0] rsp_arb_data_in;
wire [NUM_BANKS-1:0] rsp_arb_ready_in;
`UNUSED_VAR (m_axi_rlast)
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign rsp_arb_valid_in[i] = m_axi_rvalid[i];
assign rsp_arb_data_in[i] = {m_axi_rdata[i], m_axi_rid[i]};
assign m_axi_rready[i] = rsp_arb_ready_in[i];
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time));
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time));
end
VX_stream_arb #(
.NUM_INPUTS (NUM_BANKS),
.DATAW (DATA_WIDTH + TAG_WIDTH),
.ARBITER ("R"),
.BUFFERED (BUFFERED_RSP)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_arb_valid_in),
.data_in (rsp_arb_data_in),
.ready_in (rsp_arb_ready_in),
.valid_out (mem_rsp_valid),
.data_out ({mem_rsp_data, mem_rsp_tag}),
.ready_out (mem_rsp_ready)
);
endmodule
`TRACING_ON

View file

@ -415,7 +415,7 @@ module VX_mem_scheduler #(
if (ibuf_push) begin
rsp_orig_mask[ibuf_waddr] <= req_mask;
end
if (mem_rsp_fire_s) begin
if (mem_rsp_valid_s) begin
rsp_store[ibuf_raddr] <= rsp_store_n;
end
end

View file

@ -2,14 +2,32 @@
`TRACING_OFF
module VX_tex_lerp (
input wire clk,
input wire reset,
input wire enable,
input wire [7:0] in1,
input wire [7:0] in2,
input wire [7:0] frac,
output wire [7:0] out
);
`UNUSED_VAR (reset)
reg [15:0] p1, p2;
reg [16:0] sum;
reg [7:0] res;
wire [7:0] sub = (8'hff - frac);
wire [16:0] tmp = in1 * sub + in2 * frac + 16'h80;
assign out = 8'((tmp + (tmp >> 8)) >> 8);
always @(posedge clk) begin
if (enable) begin
p1 <= in1 * sub;
p2 <= in2 * frac;
sum <= p1 + p2 + 17'h80;
res <= 8'((sum + (sum >> 8)) >> 8);
end
end
assign out = res;
endmodule
`TRACING_ON

View file

@ -27,10 +27,8 @@ module VX_tex_sampler #(
wire valid_s0, valid_s1;
wire [REQ_INFOW-1:0] req_info_s0, req_info_s1;
wire [NUM_LANES-1:0][31:0] texel_ul, texel_uh;
wire [NUM_LANES-1:0][31:0] texel_ul_s1, texel_uh_s1;
wire [NUM_LANES-1:0][1:0][`TEX_BLEND_FRAC-1:0] req_blends_s0;
wire [NUM_LANES-1:0][`TEX_BLEND_FRAC-1:0] blend_v, blend_v_s1;
wire [NUM_LANES-1:0][31:0] texel_v;
wire [NUM_LANES-1:0][`TEX_BLEND_FRAC-1:0] blend_v_s0, blend_v_s1;
wire [NUM_LANES-1:0][3:0][31:0] fmt_texels, fmt_texels_s0;
wire stall_out;
@ -47,6 +45,7 @@ module VX_tex_sampler #(
VX_pipe_register #(
.DATAW (1 + REQ_INFOW + (NUM_LANES * 2 * `TEX_BLEND_FRAC) + (NUM_LANES * 4 * 32)),
.DEPTH (1),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
@ -59,57 +58,68 @@ module VX_tex_sampler #(
for (genvar i = 0; i < NUM_LANES; ++i) begin
for (genvar j = 0; j < 4; ++j) begin
VX_tex_lerp tex_lerp_ul (
.clk (clk),
.reset(reset),
.enable(~stall_out),
.in1 (fmt_texels_s0[i][0][j*8 +: 8]),
.in2 (fmt_texels_s0[i][1][j*8 +: 8]),
.frac (req_blends_s0[i][0]),
.out (texel_ul[i][j*8 +: 8])
);
);
VX_tex_lerp tex_lerp_uh (
.clk (clk),
.reset(reset),
.enable(~stall_out),
.in1 (fmt_texels_s0[i][2][j*8 +: 8]),
.in2 (fmt_texels_s0[i][3][j*8 +: 8]),
.frac (req_blends_s0[i][0]),
.out (texel_uh[i][j*8 +: 8])
);
end
assign blend_v[i] = req_blends_s0[i][1];
end
end
VX_pipe_register #(
.DATAW (1 + REQ_INFOW + (NUM_LANES * `TEX_BLEND_FRAC) + (2 * NUM_LANES * 32)),
for (genvar i = 0; i < NUM_LANES; ++i) begin
assign blend_v_s0[i] = req_blends_s0[i][1];
end
VX_shift_register #(
.DATAW (1 + REQ_INFOW + (NUM_LANES * `TEX_BLEND_FRAC)),
.DEPTH (3),
.RESETW (1)
) pipe_reg1 (
) shift_reg1 (
.clk (clk),
.reset (reset),
.enable (~stall_out),
.data_in ({valid_s0, req_info_s0, blend_v, texel_ul, texel_uh}),
.data_out ({valid_s1, req_info_s1, blend_v_s1, texel_ul_s1, texel_uh_s1})
.data_in ({valid_s0, req_info_s0, blend_v_s0}),
.data_out ({valid_s1, req_info_s1, blend_v_s1})
);
for (genvar i = 0; i < NUM_LANES; ++i) begin
for (genvar j = 0; j < 4; ++j) begin
VX_tex_lerp tex_lerp_v (
.in1 (texel_ul_s1[i][j*8 +: 8]),
.in2 (texel_uh_s1[i][j*8 +: 8]),
.clk (clk),
.reset(reset),
.enable(~stall_out),
.in1 (texel_ul[i][j*8 +: 8]),
.in2 (texel_uh[i][j*8 +: 8]),
.frac (blend_v_s1[i]),
.out (texel_v[i][j*8 +: 8])
.out (rsp_data[i][j*8 +: 8])
);
end
end
assign stall_out = rsp_valid && ~rsp_ready;
VX_pipe_register #(
.DATAW (1 + REQ_INFOW + (NUM_LANES * 32)),
.DEPTH (2),
VX_shift_register #(
.DATAW (1 + REQ_INFOW),
.DEPTH (3),
.RESETW (1)
) pipe_reg2 (
) shift_reg2 (
.clk (clk),
.reset (reset),
.enable (~stall_out),
.data_in ({valid_s1, req_info_s1, texel_v}),
.data_out ({rsp_valid, rsp_info, rsp_data})
.data_in ({valid_s1, req_info_s1}),
.data_out ({rsp_valid, rsp_info})
);
// can accept new request?

View file

@ -287,30 +287,30 @@ private:
#ifdef AXI_BUS
void reset_axi_bus() {
device_->m_axi_wready = 0;
device_->m_axi_awready = 0;
device_->m_axi_arready = 0;
device_->m_axi_rvalid = 0;
device_->m_axi_bvalid = 0;
device_->m_axi_wready[0] = 0;
device_->m_axi_awready[0] = 0;
device_->m_axi_arready[0] = 0;
device_->m_axi_rvalid[0] = 0;
device_->m_axi_bvalid[0] = 0;
}
void eval_axi_bus(bool clk) {
if (!clk) {
mem_rd_rsp_ready_ = device_->m_axi_rready;
mem_wr_rsp_ready_ = device_->m_axi_bready;
mem_rd_rsp_ready_ = device_->m_axi_rready[0];
mem_wr_rsp_ready_ = device_->m_axi_bready[0];
return;
}
if (ram_ == nullptr) {
device_->m_axi_wready = 0;
device_->m_axi_awready = 0;
device_->m_axi_arready = 0;
device_->m_axi_wready[0] = 0;
device_->m_axi_awready[0] = 0;
device_->m_axi_arready[0] = 0;
return;
}
// process memory responses
if (mem_rd_rsp_active_
&& device_->m_axi_rvalid && mem_rd_rsp_ready_) {
&& device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) {
mem_rd_rsp_active_ = false;
}
if (!mem_rd_rsp_active_) {
@ -326,22 +326,22 @@ private:
}
printf("\n");
*/
device_->m_axi_rvalid = 1;
device_->m_axi_rid = mem_rsp->tag;
device_->m_axi_rresp = 0;
device_->m_axi_rlast = 1;
memcpy((uint8_t*)device_->m_axi_rdata, mem_rsp->block.data(), MEM_BLOCK_SIZE);
device_->m_axi_rvalid[0] = 1;
device_->m_axi_rid[0] = mem_rsp->tag;
device_->m_axi_rresp[0] = 0;
device_->m_axi_rlast[0] = 1;
memcpy(device_->m_axi_rdata[0].data(), mem_rsp->block.data(), MEM_BLOCK_SIZE);
pending_mem_reqs_.erase(mem_rsp_it);
mem_rd_rsp_active_ = true;
delete mem_rsp;
} else {
device_->m_axi_rvalid = 0;
device_->m_axi_rvalid[0] = 0;
}
}
// send memory write response
if (mem_wr_rsp_active_
&& device_->m_axi_bvalid && mem_wr_rsp_ready_) {
&& device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) {
mem_wr_rsp_active_ = false;
}
if (!mem_wr_rsp_active_) {
@ -353,26 +353,26 @@ private:
/*
printf("%0ld: [sim] MEM Wr Rsp: bank=%d, addr=%0lx\n", timestamp, last_mem_rsp_bank_, mem_rsp->addr);
*/
device_->m_axi_bvalid = 1;
device_->m_axi_bid = mem_rsp->tag;
device_->m_axi_bresp = 0;
device_->m_axi_bvalid[0] = 1;
device_->m_axi_bid[0] = mem_rsp->tag;
device_->m_axi_bresp[0] = 0;
pending_mem_reqs_.erase(mem_rsp_it);
mem_wr_rsp_active_ = true;
delete mem_rsp;
} else {
device_->m_axi_bvalid = 0;
device_->m_axi_bvalid[0] = 0;
}
}
// select the memory bank
uint32_t req_addr = device_->m_axi_wvalid ? device_->m_axi_awaddr : device_->m_axi_araddr;
uint32_t req_addr = device_->m_axi_wvalid[0] ? device_->m_axi_awaddr[0] : device_->m_axi_araddr[0];
// process memory requests
if ((device_->m_axi_wvalid || device_->m_axi_arvalid) && running_) {
if (device_->m_axi_wvalid) {
uint64_t byteen = device_->m_axi_wstrb;
unsigned base_addr = device_->m_axi_awaddr;
uint8_t* data = (uint8_t*)(device_->m_axi_wdata);
if ((device_->m_axi_wvalid[0] || device_->m_axi_arvalid[0]) && running_) {
if (device_->m_axi_wvalid[0]) {
uint64_t byteen = device_->m_axi_wstrb[0];
unsigned base_addr = device_->m_axi_awaddr[0];
uint8_t* data = (uint8_t*)device_->m_axi_wdata[0].data();
// check console output
if (base_addr >= IO_COUT_ADDR
@ -403,15 +403,15 @@ private:
}
auto mem_req = new mem_req_t();
mem_req->tag = device_->m_axi_awid;
mem_req->addr = device_->m_axi_awaddr;
mem_req->tag = device_->m_axi_awid[0];
mem_req->addr = device_->m_axi_awaddr[0];
mem_req->write = true;
mem_req->ready = true;
pending_mem_reqs_.emplace_back(mem_req);
// send dram request
ramulator::Request dram_req(
device_->m_axi_awaddr,
device_->m_axi_awaddr[0],
ramulator::Request::Type::WRITE,
0
);
@ -420,16 +420,16 @@ private:
} else {
// process reads
auto mem_req = new mem_req_t();
mem_req->tag = device_->m_axi_arid;
mem_req->addr = device_->m_axi_araddr;
ram_->read(mem_req->block.data(), device_->m_axi_araddr, MEM_BLOCK_SIZE);
mem_req->tag = device_->m_axi_arid[0];
mem_req->addr = device_->m_axi_araddr[0];
ram_->read(mem_req->block.data(), device_->m_axi_araddr[0], MEM_BLOCK_SIZE);
mem_req->write = false;
mem_req->ready = false;
pending_mem_reqs_.emplace_back(mem_req);
// send dram request
ramulator::Request dram_req(
device_->m_axi_araddr,
device_->m_axi_araddr[0],
ramulator::Request::Type::READ,
std::bind([&](ramulator::Request& dram_req, mem_req_t* mem_req) {
mem_req->ready = true;
@ -440,9 +440,9 @@ private:
}
}
device_->m_axi_wready = running_;
device_->m_axi_awready = running_;
device_->m_axi_arready = running_;
device_->m_axi_wready[0] = running_;
device_->m_axi_awready[0] = running_;
device_->m_axi_arready[0] = running_;
}
#else