This commit is contained in:
Blaise Tine 2021-09-10 06:03:32 -04:00
commit 0dfdf6cd4d
24 changed files with 571 additions and 157 deletions

View file

@ -72,6 +72,9 @@ FPU_CORE=FPU_DEFAULT ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# using FPNEW FPU core
FPU_CORE=FPU_FPNEW ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# using AXI bus
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
# adjust l1 block size to match l2
CONFIGS="-DMEM_BLOCK_SIZE=16 -DL1_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr --args="-n1"

View file

@ -28,7 +28,12 @@ CFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared -pthread
#LDFLAGS += -dynamiclib -pthread
TOP = Vortex
ifdef AXI_BUS
TOP = Vortex_axi
CFLAGS += -DAXI_BUS
else
TOP = Vortex
endif
RTL_DIR = ../../hw/rtl
DPI_DIR = ../../hw/dpi

View file

@ -38,9 +38,9 @@ module VX_ibuffer #(
wire going_empty = empty_r[i] || (alm_empty_r[i] && reading);
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (`IBUF_SIZE),
.OUTPUT_REG (1)
.DATAW (DATAW),
.SIZE (`IBUF_SIZE),
.OUT_REG (1)
) queue (
.clk (clk),
.reset (reset),

View file

@ -22,7 +22,7 @@ module VX_icache_stage #(
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (reset)
localparam OUTPUT_REG = 0;
localparam OUT_REG = 0;
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
@ -64,12 +64,12 @@ module VX_icache_stage #(
wire [`NW_BITS-1:0] rsp_wid = rsp_tag;
wire stall_out = ~ifetch_rsp_if.ready && (0 == OUTPUT_REG && ifetch_rsp_if.valid);
wire stall_out = ~ifetch_rsp_if.ready && (0 == OUT_REG && ifetch_rsp_if.valid);
VX_pipe_register #(
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + 32),
.RESETW (1),
.DEPTH (OUTPUT_REG)
.DEPTH (OUT_REG)
) pipe_reg (
.clk (clk),
.reset (reset),

View file

@ -42,7 +42,8 @@ module VX_instr_demux (
wire [`INST_ALU_BITS-1:0] alu_op_type = `INST_ALU_BITS'(ibuffer_if.op_type);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32))
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
.OUT_REG (1)
) alu_buffer (
.clk (clk),
.reset (reset),
@ -61,7 +62,8 @@ module VX_instr_demux (
wire lsu_is_fence = `INST_LSU_IS_FENCE(ibuffer_if.op_mod);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32))
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
.OUT_REG (1)
) lsu_buffer (
.clk (clk),
.reset (reset),
@ -82,7 +84,8 @@ module VX_instr_demux (
wire [31:0] csr_rs1_data = gpr_rsp_if.rs1_data[tid];
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + 32)
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + 32),
.OUT_REG (1)
) csr_buffer (
.clk (clk),
.reset (reset),
@ -101,7 +104,8 @@ module VX_instr_demux (
wire [`INST_FPU_BITS-1:0] fpu_op_type = `INST_FPU_BITS'(ibuffer_if.op_type);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32))
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
.OUT_REG (1)
) fpu_buffer (
.clk (clk),
.reset (reset),
@ -123,7 +127,8 @@ module VX_instr_demux (
wire [31:0] gpu_rs2_data = gpr_rsp_if.rs2_data[tid];
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `NR_BITS + 1 + + `NT_BITS + (`NUM_THREADS * 32 + 32))
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `NR_BITS + 1 + + `NT_BITS + (`NUM_THREADS * 32 + 32)),
.OUT_REG (1)
) gpu_buffer (
.clk (clk),
.reset (reset),

View file

@ -207,7 +207,7 @@ module VX_mem_unit # (
.DATA_SIZE (4),
.TAG_IN_WIDTH (`DCORE_TAG_WIDTH),
.TYPE ("P"),
.BUFFERED_REQ (1),
.BUFFERED_REQ (2),
.BUFFERED_RSP (1)
) smem_arb (
.clk (clk),
@ -319,7 +319,7 @@ module VX_mem_unit # (
.TYPE ("R"),
.TAG_SEL_IDX (1), // Skip 0 for NC flag
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
.BUFFERED_RSP (2)
) mem_arb (
.clk (clk),
.reset (mem_arb_reset),

124
hw/rtl/Vortex_axi.v Normal file
View file

@ -0,0 +1,124 @@
`include "VX_define.vh"
module Vortex_axi #(
parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH,
parameter AXI_ADDR_WIDTH = 32,
parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH,
localparam AXI_STROBE_WIDTH = (AXI_DATA_WIDTH / 8)
)(
// Clock
input wire clk,
input wire reset,
// AXI write request
output wire m_axi_wvalid,
output wire m_axi_awvalid,
output wire [AXI_TID_WIDTH-1:0] m_axi_awid,
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr,
output wire [7:0] m_axi_awlen,
output wire [2:0] m_axi_awsize,
output wire [1:0] m_axi_awburst,
output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata,
output wire [AXI_STROBE_WIDTH-1:0] m_axi_wstrb,
input wire m_axi_wready,
input wire m_axi_awready,
// AXI read request
output wire m_axi_arvalid,
output wire [AXI_TID_WIDTH-1:0] m_axi_arid,
output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr,
output wire [7:0] m_axi_arlen,
output wire [2:0] m_axi_arsize,
output wire [1:0] m_axi_arburst,
input wire m_axi_arready,
// AXI read response
input wire m_axi_rvalid,
input wire [AXI_TID_WIDTH-1:0] m_axi_rid,
input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata,
output wire m_axi_rready,
// Status
output wire busy
);
wire mem_req_valid;
wire mem_req_rw;
wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen;
wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr;
wire [`VX_MEM_DATA_WIDTH-1:0] mem_req_data;
wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag;
wire mem_req_ready;
wire mem_rsp_valid;
wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data;
wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag;
wire mem_rsp_ready;
VX_axi_adapter #(
.VX_DATA_WIDTH (`VX_MEM_DATA_WIDTH),
.VX_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
.VX_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
.AXI_DATA_WIDTH (AXI_DATA_WIDTH),
.AXI_ADDR_WIDTH (AXI_ADDR_WIDTH),
.AXI_TID_WIDTH (AXI_TID_WIDTH)
) axi_adapter (
.mem_req_valid (mem_req_valid),
.mem_req_rw (mem_req_rw),
.mem_req_byteen (mem_req_byteen),
.mem_req_addr (mem_req_addr),
.mem_req_data (mem_req_data),
.mem_req_tag (mem_req_tag),
.mem_req_ready (mem_req_ready),
.mem_rsp_valid (mem_rsp_valid),
.mem_rsp_data (mem_rsp_data),
.mem_rsp_tag (mem_rsp_tag),
.mem_rsp_ready (mem_rsp_ready),
.m_axi_wvalid (m_axi_wvalid),
.m_axi_awvalid (m_axi_awvalid),
.m_axi_awid (m_axi_awid),
.m_axi_awaddr (m_axi_awaddr),
.m_axi_awlen (m_axi_awlen),
.m_axi_awsize (m_axi_awsize),
.m_axi_awburst (m_axi_awburst),
.m_axi_wdata (m_axi_wdata),
.m_axi_wstrb (m_axi_wstrb),
.m_axi_wready (m_axi_wready),
.m_axi_awready (m_axi_awready),
.m_axi_arvalid (m_axi_arvalid),
.m_axi_arid (m_axi_arid),
.m_axi_araddr (m_axi_araddr),
.m_axi_arlen (m_axi_arlen),
.m_axi_arsize (m_axi_arsize),
.m_axi_arburst (m_axi_arburst),
.m_axi_arready (m_axi_arready),
.m_axi_rvalid (m_axi_rvalid),
.m_axi_rid (m_axi_rid),
.m_axi_rdata (m_axi_rdata),
.m_axi_rready (m_axi_rready)
);
Vortex vortex (
.clk (clk),
.reset (reset),
.mem_req_valid (mem_req_valid),
.mem_req_rw (mem_req_rw),
.mem_req_byteen (mem_req_byteen),
.mem_req_addr (mem_req_addr),
.mem_req_data (mem_req_data),
.mem_req_tag (mem_req_tag),
.mem_req_ready (mem_req_ready),
.mem_rsp_valid (mem_rsp_valid),
.mem_rsp_data (mem_rsp_data),
.mem_rsp_tag (mem_rsp_tag),
.mem_rsp_ready (mem_rsp_ready),
.busy (busy)
);
endmodule

View file

@ -42,7 +42,7 @@ module VX_avs_wrapper #(
);
localparam BANK_ADDRW = `LOG2UP(AVS_BANKS);
localparam OUTPUT_REG = (AVS_BANKS > 2);
localparam OUT_REG = (AVS_BANKS > 2);
// Requests handling
@ -78,9 +78,9 @@ module VX_avs_wrapper #(
`UNUSED_VAR (req_queue_size)
VX_fifo_queue #(
.DATAW (REQ_TAG_WIDTH),
.SIZE (RD_QUEUE_SIZE),
.OUTPUT_REG (!OUTPUT_REG)
.DATAW (REQ_TAG_WIDTH),
.SIZE (RD_QUEUE_SIZE),
.OUT_REG (!OUT_REG)
) rd_req_queue (
.clk (clk),
.reset (reset),
@ -122,9 +122,9 @@ module VX_avs_wrapper #(
for (genvar i = 0; i < AVS_BANKS; i++) begin
VX_fifo_queue #(
.DATAW (AVS_DATA_WIDTH),
.SIZE (RD_QUEUE_SIZE),
.OUTPUT_REG (!OUTPUT_REG)
.DATAW (AVS_DATA_WIDTH),
.SIZE (RD_QUEUE_SIZE),
.OUT_REG (!OUT_REG)
) rd_rsp_queue (
.clk (clk),
.reset (reset),
@ -150,7 +150,7 @@ module VX_avs_wrapper #(
.NUM_REQS (AVS_BANKS),
.DATAW (AVS_DATA_WIDTH + REQ_TAG_WIDTH),
.TYPE ("R"),
.BUFFERED (OUTPUT_REG ? 1 : 0)
.BUFFERED (OUT_REG ? 1 : 0)
) rsp_arb (
.clk (clk),
.reset (reset),

View file

@ -520,8 +520,8 @@ VX_mem_arb #(
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
.TAG_IN_WIDTH (AVS_REQ_TAGW),
.TYPE ("P"),
.BUFFERED_REQ (0),
.BUFFERED_RSP (0)
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
) mem_arb (
.clk (clk),
.reset (mem_arb_reset),
@ -731,9 +731,9 @@ end
`RESET_RELAY (cci_rdq_reset);
VX_fifo_queue #(
.DATAW (CCI_RD_QUEUE_DATAW),
.SIZE (CCI_RD_QUEUE_SIZE),
.OUTPUT_REG (1)
.DATAW (CCI_RD_QUEUE_DATAW),
.SIZE (CCI_RD_QUEUE_SIZE),
.OUT_REG (1)
) cci_rd_req_queue (
.clk (clk),
.reset (cci_rdq_reset),
@ -880,7 +880,7 @@ assign cci_mem_req_tag = cci_mem_req_rw ? cci_mem_wr_req_ctr : cci_mem_rd_req_
assign cmd_run_done = !vx_busy;
Vortex #() vortex (
Vortex vortex (
`SCOPE_BIND_afu_vortex
.clk (clk),

View file

@ -148,7 +148,7 @@ module VX_bank #(
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] req_tid_st0, req_tid_st1;
wire [NUM_PORTS-1:0] pmask_st0, pmask_st1;
wire [NUM_PORTS-1:0][CORE_TAG_WIDTH-1:0] tag_st0, tag_st1;
wire [`CACHE_LINE_WIDTH-1:0] rdata_st1;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] rdata_st1;
wire [`CACHE_LINE_WIDTH-1:0] wdata_st0, wdata_st1;
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1;
wire valid_st0, valid_st1;
@ -305,46 +305,15 @@ module VX_bank #(
wire mreq_push_st1 = (read_st1 && miss_st1 && !mshr_pending_st1)
|| write_st1;
wire [`CACHE_LINE_WIDTH-1:0] line_wdata_st1;
wire [CACHE_LINE_SIZE-1:0] line_byteen_st1;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data_st1 = wdata_st1[0 +: NUM_PORTS * `WORD_WIDTH];
if (`WORDS_PER_LINE > 1) begin
reg [`CACHE_LINE_WIDTH-1:0] line_wdata_r;
reg [CACHE_LINE_SIZE-1:0] line_byteen_r;
if (NUM_PORTS > 1) begin
always @(*) begin
line_wdata_r = 'x;
line_byteen_r = 0;
for (integer i = 0; i < NUM_PORTS; ++i) begin
if (pmask_st1[i]) begin
line_wdata_r[wsel_st1[i] * `WORD_WIDTH +: `WORD_WIDTH] = creq_data_st1[i];
line_byteen_r[wsel_st1[i] * WORD_SIZE +: WORD_SIZE] = byteen_st1[i];
end
end
end
end else begin
always @(*) begin
line_wdata_r = {`WORDS_PER_LINE{creq_data_st1}};
line_byteen_r = 0;
line_byteen_r[wsel_st1 * WORD_SIZE +: WORD_SIZE] = byteen_st1;
end
end
assign line_wdata_st1 = line_wdata_r;
assign line_byteen_st1 = line_byteen_r;
end else begin
`UNUSED_VAR (wsel_st1)
assign line_wdata_st1 = creq_data_st1;
assign line_byteen_st1 = byteen_st1;
end
VX_data_access #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
.CACHE_SIZE (CACHE_SIZE),
.CACHE_LINE_SIZE(CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_PORTS (NUM_PORTS),
.WORD_SIZE (WORD_SIZE),
.WRITE_ENABLE (WRITE_ENABLE)
) data_access (
@ -359,6 +328,8 @@ module VX_bank #(
.stall (crsq_stall),
.addr (addr_st1),
.wsel (wsel_st1),
.pmask (pmask_st1),
// reading
.readen (valid_st1 && read_st1),
@ -367,8 +338,8 @@ module VX_bank #(
// writing
.writeen (valid_st1 && writeen_st1),
.is_fill (is_fill_st1),
.byteen (line_byteen_st1),
.write_data (line_wdata_st1),
.byteen (byteen_st1),
.write_data (creq_data_st1),
.fill_data (wdata_st1)
);
@ -454,20 +425,13 @@ module VX_bank #(
assign crsq_pmask = pmask_st1;
assign crsq_tid = req_tid_st1;
assign crsq_data = rdata_st1;
assign crsq_tag = tag_st1;
if (`WORDS_PER_LINE > 1) begin
for (genvar i = 0; i < NUM_PORTS; ++i) begin
assign crsq_data[i] = rdata_st1[wsel_st1[i] * `WORD_WIDTH +: `WORD_WIDTH];
end
end else begin
assign crsq_data = rdata_st1;
end
VX_elastic_buffer #(
.DATAW (NUM_PORTS * (CORE_TAG_WIDTH + 1 + `WORD_WIDTH + `REQS_BITS)),
.SIZE (CRSQ_SIZE),
.OUTPUT_REG (1 == NUM_BANKS)
.DATAW (NUM_PORTS * (CORE_TAG_WIDTH + 1 + `WORD_WIDTH + `REQS_BITS)),
.SIZE (CRSQ_SIZE),
.OUT_REG (1 == NUM_BANKS)
) core_rsp_req (
.clk (clk),
.reset (reset),

View file

@ -314,9 +314,9 @@ module VX_cache #(
`RESET_RELAY (mrsq_reset);
VX_elastic_buffer #(
.DATAW (MEM_TAG_IN_WIDTH + `CACHE_LINE_WIDTH),
.SIZE (MRSQ_SIZE),
.OUTPUT_REG (MRSQ_SIZE > 2)
.DATAW (MEM_TAG_IN_WIDTH + `CACHE_LINE_WIDTH),
.SIZE (MRSQ_SIZE),
.OUT_REG (MRSQ_SIZE > 2)
) mem_rsp_queue (
.clk (clk),
.reset (mrsq_reset),

View file

@ -9,10 +9,14 @@ module VX_data_access #(
parameter CACHE_LINE_SIZE = 1,
// Number of banks
parameter NUM_BANKS = 1,
// Number of ports per banks
parameter NUM_PORTS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Enable cache writeable
parameter WRITE_ENABLE = 1
parameter WRITE_ENABLE = 1,
localparam WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
) (
input wire clk,
input wire reset,
@ -30,15 +34,18 @@ module VX_data_access #(
input wire[`LINE_ADDR_WIDTH-1:0] addr,
`IGNORE_UNUSED_END
input wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] wsel,
input wire [NUM_PORTS-1:0] pmask,
// reading
input wire readen,
output wire [`CACHE_LINE_WIDTH-1:0] read_data,
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] read_data,
// writing
input wire writeen,
input wire is_fill,
input wire [CACHE_LINE_SIZE-1:0] byteen,
input wire [`CACHE_LINE_WIDTH-1:0] write_data,
input wire [WORD_SIZE-1:0] byteen,
input wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] write_data,
input wire [`CACHE_LINE_WIDTH-1:0] fill_data
);
@ -50,25 +57,58 @@ module VX_data_access #(
localparam BYTEENW = WRITE_ENABLE ? CACHE_LINE_SIZE : 1;
wire [`LINE_SELECT_BITS-1:0] line_addr;
wire [`CACHE_LINE_WIDTH-1:0] rdata;
wire [`CACHE_LINE_WIDTH-1:0] wdata;
wire [BYTEENW-1:0] wren;
assign line_addr = addr[`LINE_SELECT_BITS-1:0];
wire [`LINE_SELECT_BITS-1:0] line_addr = addr[`LINE_SELECT_BITS-1:0];
if (WRITE_ENABLE) begin
assign wren = is_fill ? {BYTEENW{writeen}} : (byteen & {BYTEENW{writeen}});
assign wdata = is_fill ? fill_data : write_data;
end else begin
wire [`CACHE_LINE_WIDTH-1:0] line_wdata;
wire [CACHE_LINE_SIZE-1:0] line_byteen;
if (`WORDS_PER_LINE > 1) begin
reg [`CACHE_LINE_WIDTH-1:0] line_wdata_r;
reg [CACHE_LINE_SIZE-1:0] line_byteen_r;
if (NUM_PORTS > 1) begin
always @(*) begin
line_wdata_r = 'x;
line_byteen_r = 0;
for (integer i = 0; i < NUM_PORTS; ++i) begin
if (pmask[i]) begin
line_wdata_r[wsel[i] * `WORD_WIDTH +: `WORD_WIDTH] = write_data[i];
line_byteen_r[wsel[i] * WORD_SIZE +: WORD_SIZE] = byteen[i];
end
end
end
end else begin
`UNUSED_VAR (pmask)
always @(*) begin
line_wdata_r = {`WORDS_PER_LINE{write_data}};
line_byteen_r = 0;
line_byteen_r[wsel * WORD_SIZE +: WORD_SIZE] = byteen;
end
end
assign line_wdata = line_wdata_r;
assign line_byteen = line_byteen_r;
end else begin
`UNUSED_VAR (wsel)
`UNUSED_VAR (pmask)
assign line_wdata = write_data;
assign line_byteen = byteen;
end
assign wren = is_fill ? {BYTEENW{writeen}} : ({BYTEENW{writeen}} & line_byteen);
assign wdata = is_fill ? fill_data : line_wdata;
end else begin
`UNUSED_VAR (is_fill)
`UNUSED_VAR (byteen)
`UNUSED_VAR (byteen)
`UNUSED_VAR (pmask)
`UNUSED_VAR (write_data)
assign wren = writeen;
assign wdata = fill_data;
end
VX_sp_ram #(
.DATAW (CACHE_LINE_SIZE * 8),
.DATAW (`CACHE_LINE_WIDTH),
.SIZE (`LINES_PER_BANK),
.BYTEENW (BYTEENW),
.NO_RWCHECK (1)
@ -78,9 +118,17 @@ module VX_data_access #(
.wren (wren),
.wdata (wdata),
.rden (1'b1),
.rdata (read_data)
.rdata (rdata)
);
if (`WORDS_PER_LINE > 1) begin
for (genvar i = 0; i < NUM_PORTS; ++i) begin
assign read_data = rdata[wsel[i] * `WORD_WIDTH +: `WORD_WIDTH];
end
end else begin
assign read_data = rdata;
end
`UNUSED_VAR (stall)
`ifdef DBG_PRINT_CACHE_DATA

View file

@ -127,9 +127,9 @@ module VX_shared_mem #(
assign core_req_writeonly_unqual = ~(| core_req_read_mask_unqual);
VX_elastic_buffer #(
.DATAW (NUM_BANKS * (1 + 1 + `LINE_ADDR_WIDTH + WORD_SIZE + `WORD_WIDTH + CORE_TAG_WIDTH + `REQS_BITS) + NUM_BANKS + 1),
.SIZE (CREQ_SIZE),
.OUTPUT_REG (1) // output should be registered for the data_store addr port
.DATAW (NUM_BANKS * (1 + 1 + `LINE_ADDR_WIDTH + WORD_SIZE + `WORD_WIDTH + CORE_TAG_WIDTH + `REQS_BITS) + NUM_BANKS + 1),
.SIZE (CREQ_SIZE),
.OUT_REG (1) // output should be registered for the data_store addr port
) core_req_queue (
.clk (clk),
.reset (reset),

View file

@ -100,7 +100,7 @@ module VX_fp_ncomp #(
VX_pipe_register #(
.DATAW (1 + TAGW + `INST_FPU_BITS + `INST_FRM_BITS + LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fp_class_t) + 1 + 1)),
.RESETW (1),
.DEPTH (1)
.DEPTH (0)
) pipe_reg0 (
.clk (clk),
.reset (reset),

View file

@ -0,0 +1,88 @@
`include "VX_define.vh"
module VX_axi_adapter #(
parameter VX_DATA_WIDTH = 512,
parameter VX_ADDR_WIDTH = (32 - $clog2(VX_DATA_WIDTH/8)),
parameter VX_TAG_WIDTH = 8,
parameter AXI_DATA_WIDTH = VX_DATA_WIDTH,
parameter AXI_ADDR_WIDTH = 32,
parameter AXI_TID_WIDTH = VX_TAG_WIDTH,
localparam VX_BYTEEN_WIDTH = (VX_DATA_WIDTH / 8),
localparam AXI_STROBE_WIDTH = (AXI_DATA_WIDTH / 8)
) (
// Vortex request
input wire mem_req_valid,
input wire mem_req_rw,
input wire [VX_BYTEEN_WIDTH-1:0] mem_req_byteen,
input wire [VX_ADDR_WIDTH-1:0] mem_req_addr,
input wire [VX_DATA_WIDTH-1:0] mem_req_data,
input wire [VX_TAG_WIDTH-1:0] mem_req_tag,
// Vortex response
input wire mem_rsp_ready,
output wire mem_rsp_valid,
output wire [VX_DATA_WIDTH-1:0] mem_rsp_data,
output wire [VX_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_req_ready,
// AXI write request
output wire m_axi_wvalid,
output wire m_axi_awvalid,
output wire [AXI_TID_WIDTH-1:0] m_axi_awid,
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr,
output wire [7:0] m_axi_awlen,
output wire [2:0] m_axi_awsize,
output wire [1:0] m_axi_awburst,
output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata,
output wire [AXI_STROBE_WIDTH-1:0] m_axi_wstrb,
input wire m_axi_wready,
input wire m_axi_awready,
// AXI read request
output wire m_axi_arvalid,
output wire [AXI_TID_WIDTH-1:0] m_axi_arid,
output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr,
output wire [7:0] m_axi_arlen,
output wire [2:0] m_axi_arsize,
output wire [1:0] m_axi_arburst,
input wire m_axi_arready,
// AXI read response
input wire m_axi_rvalid,
input wire [AXI_TID_WIDTH-1:0] m_axi_rid,
input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata,
output wire m_axi_rready
);
localparam AXSIZE = $clog2(VX_DATA_WIDTH/8);
`STATIC_ASSERT((AXI_DATA_WIDTH == VX_DATA_WIDTH), ("invalid parameter"))
`STATIC_ASSERT((AXI_TID_WIDTH == VX_TAG_WIDTH), ("invalid parameter"))
// AXI write channel
assign m_axi_wvalid = mem_req_valid & mem_req_rw;
assign m_axi_awvalid = mem_req_valid & mem_req_rw;
assign m_axi_awid = mem_req_tag;
assign m_axi_awaddr = AXI_ADDR_WIDTH'(mem_req_addr) << AXSIZE;
assign m_axi_awlen = 8'b00000000;
assign m_axi_awsize = 3'(AXSIZE);
assign m_axi_awburst = 2'b00;
assign m_axi_wdata = mem_req_data;
assign m_axi_wstrb = mem_req_byteen;
// AXI read channel
assign m_axi_arvalid = mem_req_valid & ~mem_req_rw;
assign m_axi_arid = mem_req_tag;
assign m_axi_araddr = AXI_ADDR_WIDTH'(mem_req_addr) << AXSIZE;
assign m_axi_arlen = 8'b00000000;
assign m_axi_arsize = 3'(AXSIZE);
assign m_axi_arburst = 2'b00;
assign m_axi_rready = mem_rsp_ready;
// Vortex inputs
assign mem_rsp_valid = m_axi_rvalid;
assign mem_rsp_tag = m_axi_rid;
assign mem_rsp_data = m_axi_rdata;
assign mem_req_ready = mem_req_rw ? (m_axi_awready && m_axi_wready) : m_axi_arready;
endmodule

View file

@ -5,7 +5,7 @@ module VX_dp_ram #(
parameter DATAW = 1,
parameter SIZE = 1,
parameter BYTEENW = 1,
parameter OUTPUT_REG = 0,
parameter OUT_REG = 0,
parameter NO_RWCHECK = 0,
parameter ADDRW = $clog2(SIZE),
parameter LUTRAM = 0,
@ -35,7 +35,7 @@ module VX_dp_ram #(
`ifdef SYNTHESIS
if (LUTRAM) begin
if (OUTPUT_REG) begin
if (OUT_REG) begin
reg [DATAW-1:0] rdata_r;
if (BYTEENW > 1) begin
`USE_FAST_BRAM reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
@ -90,7 +90,7 @@ module VX_dp_ram #(
end
end
end else begin
if (OUTPUT_REG) begin
if (OUT_REG) begin
reg [DATAW-1:0] rdata_r;
if (BYTEENW > 1) begin
@ -173,7 +173,7 @@ module VX_dp_ram #(
end
end
`else
if (OUTPUT_REG) begin
if (OUT_REG) begin
reg [DATAW-1:0] rdata_r;
if (BYTEENW > 1) begin
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];

View file

@ -4,7 +4,7 @@
module VX_elastic_buffer #(
parameter DATAW = 1,
parameter SIZE = 2,
parameter OUTPUT_REG = 0,
parameter OUT_REG = 0,
parameter LUTRAM = 0
) (
input wire clk,
@ -32,8 +32,8 @@ module VX_elastic_buffer #(
end else if (SIZE == 2) begin
VX_skid_buffer #(
.DATAW (DATAW),
.OUTPUT_REG (OUTPUT_REG)
.DATAW (DATAW),
.OUT_REG (OUT_REG)
) queue (
.clk (clk),
.reset (reset),
@ -53,10 +53,10 @@ module VX_elastic_buffer #(
wire pop = valid_out && ready_out;
VX_fifo_queue #(
.DATAW (DATAW),
.SIZE (SIZE),
.OUTPUT_REG (OUTPUT_REG),
.LUTRAM (LUTRAM)
.DATAW (DATAW),
.SIZE (SIZE),
.OUT_REG (OUT_REG),
.LUTRAM (LUTRAM)
) queue (
.clk (clk),
.reset (reset),

View file

@ -8,7 +8,7 @@ module VX_fifo_queue #(
parameter ALM_EMPTY = 1,
parameter ADDRW = $clog2(SIZE),
parameter SIZEW = $clog2(SIZE+1),
parameter OUTPUT_REG = 0,
parameter OUT_REG = 0,
parameter LUTRAM = 1
) (
input wire clk,
@ -103,7 +103,7 @@ module VX_fifo_queue #(
if (SIZE == 2) begin
if (0 == OUTPUT_REG) begin
if (0 == OUT_REG) begin
reg [DATAW-1:0] shift_reg [1:0];
@ -138,7 +138,7 @@ module VX_fifo_queue #(
end else begin
if (0 == OUTPUT_REG) begin
if (0 == OUT_REG) begin
reg [ADDRW-1:0] rd_ptr_r;
reg [ADDRW-1:0] wr_ptr_r;
@ -154,10 +154,10 @@ module VX_fifo_queue #(
end
VX_dp_ram #(
.DATAW (DATAW),
.SIZE (SIZE),
.OUTPUT_REG (0),
.LUTRAM (LUTRAM)
.DATAW (DATAW),
.SIZE (SIZE),
.OUT_REG (0),
.LUTRAM (LUTRAM)
) dp_ram (
.clk(clk),
.wren (push),
@ -197,10 +197,10 @@ module VX_fifo_queue #(
end
VX_dp_ram #(
.DATAW (DATAW),
.SIZE (SIZE),
.OUTPUT_REG (0),
.LUTRAM (LUTRAM)
.DATAW (DATAW),
.SIZE (SIZE),
.OUT_REG (0),
.LUTRAM (LUTRAM)
) dp_ram (
.clk (clk),
.wren (push),

View file

@ -5,7 +5,7 @@ module VX_skid_buffer #(
parameter DATAW = 1,
parameter PASSTHRU = 0,
parameter NOBACKPRESSURE = 0,
parameter OUTPUT_REG = 0
parameter OUT_REG = 0
) (
input wire clk,
input wire reset,
@ -51,7 +51,7 @@ module VX_skid_buffer #(
end else begin
if (OUTPUT_REG) begin
if (OUT_REG) begin
reg [DATAW-1:0] data_out_r;
reg [DATAW-1:0] buffer;

View file

@ -5,7 +5,7 @@ module VX_sp_ram #(
parameter DATAW = 1,
parameter SIZE = 1,
parameter BYTEENW = 1,
parameter OUTPUT_REG = 0,
parameter OUT_REG = 0,
parameter NO_RWCHECK = 0,
parameter ADDRW = $clog2(SIZE),
parameter LUTRAM = 0,
@ -34,7 +34,7 @@ module VX_sp_ram #(
`ifdef SYNTHESIS
if (LUTRAM) begin
if (OUTPUT_REG) begin
if (OUT_REG) begin
reg [DATAW-1:0] rdata_r;
if (BYTEENW > 1) begin
@ -90,7 +90,7 @@ module VX_sp_ram #(
end
end
end else begin
if (OUTPUT_REG) begin
if (OUT_REG) begin
reg [DATAW-1:0] rdata_r;
if (BYTEENW > 1) begin
@ -173,7 +173,7 @@ module VX_sp_ram #(
end
end
`else
if (OUTPUT_REG) begin
if (OUT_REG) begin
reg [DATAW-1:0] rdata_r;
if (BYTEENW > 1) begin
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];

View file

@ -98,31 +98,13 @@ module VX_stream_arbiter #(
if (LANES > 1) begin
wire [NUM_REQS-1:0][(LANES * (1 + DATAW))-1:0] valid_data_in;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign valid_data_in[i] = {valid_in[i], data_in[i]};
end
VX_mux #(
.DATAW (LANES * (1 + DATAW)),
.N (NUM_REQS)
) data_in_mux (
.data_in (valid_data_in),
.sel_in (sel_index),
.data_out ({valid_in_sel, data_in_sel})
);
assign {valid_in_sel, data_in_sel} = valid_data_in[sel_index];
`UNUSED_VAR (sel_valid)
end else begin
VX_mux #(
.DATAW (DATAW),
.N (NUM_REQS)
) data_in_mux (
.data_in (data_in),
.sel_in (sel_index),
.data_out (data_in_sel)
);
assign data_in_sel = data_in[sel_index];
assign valid_in_sel = sel_valid;
end
@ -132,9 +114,9 @@ module VX_stream_arbiter #(
for (genvar i = 0; i < LANES; ++i) begin
VX_skid_buffer #(
.DATAW (DATAW),
.PASSTHRU (0 == BUFFERED),
.OUTPUT_REG (2 == BUFFERED)
.DATAW (DATAW),
.PASSTHRU (0 == BUFFERED),
.OUT_REG (2 == BUFFERED)
) out_buffer (
.clk (clk),
.reset (reset),

View file

@ -37,9 +37,9 @@ module VX_stream_demux #(
for (genvar i = 0; i < NUM_REQS; i++) begin
VX_skid_buffer #(
.DATAW (DATAW),
.PASSTHRU (0 == BUFFERED),
.OUTPUT_REG (2 == BUFFERED)
.DATAW (DATAW),
.PASSTHRU (0 == BUFFERED),
.OUT_REG (2 == BUFFERED)
) out_buffer (
.clk (clk),
.reset (reset),

View file

@ -66,7 +66,12 @@ Simulator::Simulator() {
Verilated::assertOn(false);
ram_ = nullptr;
#ifdef AXI_BUS
vortex_ = new VVortex_axi();
#else
vortex_ = new VVortex();
#endif
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
@ -103,15 +108,18 @@ void Simulator::attach_ram(RAM* ram) {
void Simulator::reset() {
print_bufs_.clear();
for (int b = 0; b < MEMORY_BANKS; ++b) {
mem_rsp_vec_[b].clear();
}
last_mem_rsp_bank_ = 0;
mem_rsp_active_ = false;
vortex_->mem_rsp_valid = 0;
vortex_->mem_req_ready = 0;
#ifdef AXI_BUS
this->reset_axi_bus();
#else
this->reset_mem_bus();
#endif
vortex_->reset = 1;
@ -133,12 +141,20 @@ void Simulator::step() {
vortex_->clk = 0;
this->eval();
mem_rsp_ready_ = vortex_->mem_rsp_ready;
#ifdef AXI_BUS
this->eval_axi_bus(0);
#else
this->eval_mem_bus(0);
#endif
vortex_->clk = 1;
this->eval();
this->eval_mem_bus();
#ifdef AXI_BUS
this->eval_axi_bus(1);
#else
this->eval_mem_bus(1);
#endif
#ifndef NDEBUG
fflush(stdout);
@ -155,7 +171,158 @@ void Simulator::eval() {
++timestamp;
}
void Simulator::eval_mem_bus() {
#ifdef AXI_BUS
void Simulator::reset_axi_bus() {
vortex_->m_axi_wready = 0;
vortex_->m_axi_awready = 0;
vortex_->m_axi_arready = 0;
vortex_->m_axi_rvalid = 0;
}
void Simulator::eval_axi_bus(bool clk) {
if (!clk) {
mem_rsp_ready_ = vortex_->m_axi_rready;
return;
}
if (ram_ == nullptr) {
vortex_->m_axi_wready = 0;
vortex_->m_axi_awready = 0;
vortex_->m_axi_arready = 0;
return;
}
// update memory responses schedule
for (int b = 0; b < MEMORY_BANKS; ++b) {
for (auto& rsp : mem_rsp_vec_[b]) {
if (rsp.cycles_left > 0)
rsp.cycles_left -= 1;
}
}
bool has_response = false;
// schedule memory responses that are ready
for (int i = 0; i < MEMORY_BANKS; ++i) {
uint32_t b = (i + last_mem_rsp_bank_ + 1) % MEMORY_BANKS;
if (!mem_rsp_vec_[b].empty()
&& (mem_rsp_vec_[b].begin()->cycles_left) <= 0) {
has_response = true;
last_mem_rsp_bank_ = b;
break;
}
}
// send memory response
if (mem_rsp_active_
&& vortex_->m_axi_rvalid && mem_rsp_ready_) {
mem_rsp_active_ = false;
}
if (!mem_rsp_active_) {
if (has_response) {
vortex_->m_axi_rvalid = 1;
std::list<mem_req_t>::iterator mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
/*
printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%02x", mem_rsp_it->block[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");
*/
memcpy((uint8_t*)vortex_->m_axi_rdata, mem_rsp_it->block.data(), MEM_BLOCK_SIZE);
vortex_->m_axi_rid = mem_rsp_it->tag;
mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it);
mem_rsp_active_ = true;
} else {
vortex_->m_axi_rvalid = 0;
}
}
// select the memory bank
uint32_t req_addr = vortex_->m_axi_wvalid ? vortex_->m_axi_awaddr : vortex_->m_axi_araddr;
uint32_t req_bank = (MEMORY_BANKS >= 2) ? ((req_addr / MEM_BLOCK_SIZE) % MEMORY_BANKS) : 0;
// handle memory stalls
bool mem_stalled = false;
#ifdef ENABLE_MEM_STALLS
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
mem_stalled = true;
} else
if (mem_rsp_vec_[req_bank].size() >= MEM_RQ_SIZE) {
mem_stalled = true;
}
#endif
// process memory requests
if (!mem_stalled) {
if (vortex_->m_axi_wvalid || vortex_->m_axi_arvalid) {
if (vortex_->m_axi_wvalid) {
uint64_t byteen = vortex_->m_axi_wstrb;
unsigned base_addr = vortex_->m_axi_awaddr;
uint8_t* data = (uint8_t*)(vortex_->m_axi_wdata);
if (base_addr >= IO_COUT_ADDR
&& base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
auto& ss_buf = print_bufs_[i];
char c = data[i];
ss_buf << c;
if (c == '\n') {
std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush;
ss_buf.str("");
}
}
}
} else {
/*
printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");
*/
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[base_addr + i] = data[i];
}
}
}
} else {
mem_req_t mem_req;
mem_req.tag = vortex_->m_axi_arid;
mem_req.addr = vortex_->m_axi_araddr;
ram_->read(vortex_->m_axi_araddr, MEM_BLOCK_SIZE, mem_req.block.data());
mem_req.cycles_left = MEM_LATENCY;
for (auto& rsp : mem_rsp_vec_[req_bank]) {
if (mem_req.addr == rsp.addr) {
// duplicate requests receive the same cycle delay
mem_req.cycles_left = rsp.cycles_left;
break;
}
}
mem_rsp_vec_[req_bank].emplace_back(mem_req);
}
}
}
vortex_->m_axi_wready = !mem_stalled;
vortex_->m_axi_awready = !mem_stalled;
vortex_->m_axi_arready = !mem_stalled;
}
#else
void Simulator::reset_mem_bus() {
vortex_->mem_req_ready = 0;
vortex_->mem_rsp_valid = 0;
}
void Simulator::eval_mem_bus(bool clk) {
if (!clk) {
mem_rsp_ready_ = vortex_->mem_rsp_ready;
return;
}
if (ram_ == nullptr) {
vortex_->mem_req_ready = 0;
return;
@ -276,6 +443,8 @@ void Simulator::eval_mem_bus() {
vortex_->mem_req_ready = !mem_stalled;
}
#endif
void Simulator::wait(uint32_t cycles) {
for (int i = 0; i < cycles; ++i) {
this->step();
@ -309,11 +478,19 @@ int Simulator::run() {
}
bool Simulator::get_ebreak() const {
#ifdef AXI_BUS
return (int)vortex_->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak;
#else
return (int)vortex_->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak;
#endif
}
int Simulator::get_last_wb_value(int reg) const {
#ifdef AXI_BUS
return (int)vortex_->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
#else
return (int)vortex_->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
#endif
}
void Simulator::load_bin(const char* program_file) {

View file

@ -1,8 +1,14 @@
#pragma once
#include <verilated.h>
#ifdef AXI_BUS
#include "VVortex_axi.h"
#include "VVortex_axi__Syms.h"
#else
#include "VVortex.h"
#include "VVortex__Syms.h"
#endif
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
@ -58,8 +64,14 @@ private:
std::unordered_map<int, std::stringstream> print_bufs_;
void eval();
void eval_mem_bus();
#ifdef AXI_BUS
void reset_axi_bus();
void eval_axi_bus(bool clk);
#else
void reset_mem_bus();
void eval_mem_bus(bool clk);
#endif
int get_last_wb_value(int reg) const;
@ -73,7 +85,13 @@ private:
bool mem_rsp_ready_;
RAM *ram_;
#ifdef AXI_BUS
VVortex_axi *vortex_;
#else
VVortex *vortex_;
#endif
#ifdef VCD_OUTPUT
VerilatedVcdC *trace_;
#endif