rtl refactoring

This commit is contained in:
Blaise Tine 2020-05-04 20:12:05 -04:00
parent 69f607b73e
commit f142afac80
39 changed files with 31067 additions and 31607 deletions

View file

@ -1,9 +1,9 @@
CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
# CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
#CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=2
#DEBUG = 1
CFLAGS += -fPIC

View file

@ -1,4 +1,4 @@
all: singlecore
all: build-s
CF += -std=c++11 -fms-extensions

View file

@ -10,11 +10,9 @@
import local_mem_cfg_pkg::*;
module ccip_std_afu
#(
module ccip_std_afu #(
parameter NUM_LOCAL_MEM_BANKS = 2
)
(
) (
// CCI-P Clocks and Resets
input logic pClk, // Primary CCI-P interface clock.
input logic pClkDiv2, // Aligned, pClk divided by 2.
@ -104,12 +102,9 @@ module ccip_std_afu
// choose which memory bank to test
logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select;
vortex_afu
#(
vortex_afu #(
.NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS)
)
vortex_afu_inst
(
) vortex_afu_inst (
.clk (clk),
.SoftReset (reset_T1),

View file

@ -1,11 +1,12 @@
`include "VX_define.vh"
module VX_back_end #(
module VX_back_end #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
input wire schedule_delay,
input wire schedule_delay,
VX_cache_core_rsp_if dcache_rsp_if,
VX_cache_core_req_if dcache_req_if,
@ -22,104 +23,102 @@ module VX_back_end #(
VX_warp_ctl_if warp_ctl_if
);
VX_wb_if writeback_temp_if();
assign writeback_if.wb = writeback_temp_if.wb;
assign writeback_if.rd = writeback_temp_if.rd;
assign writeback_if.data = writeback_temp_if.data;
assign writeback_if.valid = writeback_temp_if.valid;
assign writeback_if.warp_num = writeback_temp_if.warp_num;
assign writeback_if.pc = writeback_temp_if.pc;
VX_wb_if wb_temp_if();
assign writeback_if.wb = wb_temp_if.wb;
assign writeback_if.rd = wb_temp_if.rd;
assign writeback_if.data = wb_temp_if.data;
assign writeback_if.valid = wb_temp_if.valid;
assign writeback_if.warp_num = wb_temp_if.warp_num;
assign writeback_if.pc = wb_temp_if.pc;
// assign VX_writeback_if(writeback_temp_if);
wire no_slot_mem;
wire no_slot_exec;
wire no_slot_mem;
wire no_slot_exec;
// LSU input + output
VX_lsu_req_if lsu_req_if();
VX_wb_if mem_wb_if();
// LSU input + output
VX_lsu_req_if lsu_req_if();
VX_wb_if mem_wb_if();
// Exec unit input + output
VX_exec_unit_req_if exec_unit_req_if();
VX_wb_if inst_exec_wb_if();
// Exec unit input + output
VX_exec_unit_req_if exec_unit_req_if();
VX_wb_if inst_exec_wb_if();
// GPU unit input
VX_gpu_inst_req_if gpu_inst_req_if();
// GPU unit input
VX_gpu_inst_req_if gpu_inst_req_if();
// CSR unit inputs
VX_csr_req_if csr_req_if();
VX_wb_if csr_wb_if();
wire no_slot_csr;
wire stall_gpr_csr;
// CSR unit inputs
VX_csr_req_if csr_req_if();
VX_wb_if csr_wb_if();
wire no_slot_csr;
wire stall_gpr_csr;
VX_gpr_stage gpr_stage (
.clk (clk),
.reset (reset),
.schedule_delay (schedule_delay),
.writeback_if (wb_temp_if),
.bckE_req_if (bckE_req_if),
// New
.exec_unit_req_if (exec_unit_req_if),
.lsu_req_if (lsu_req_if),
.gpu_inst_req_if (gpu_inst_req_if),
.csr_req_if (csr_req_if),
.stall_gpr_csr (stall_gpr_csr),
// End new
.memory_delay (mem_delay),
.exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay)
);
VX_gpr_stage gpr_stage (
.clk (clk),
.reset (reset),
.schedule_delay (schedule_delay),
.writeback_if (writeback_temp_if),
.bckE_req_if (bckE_req_if),
// New
.exec_unit_req_if (exec_unit_req_if),
.lsu_req_if (lsu_req_if),
.gpu_inst_req_if (gpu_inst_req_if),
.csr_req_if (csr_req_if),
.stall_gpr_csr (stall_gpr_csr),
// End new
.memory_delay (mem_delay),
.exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay)
);
VX_lsu_unit lsu_unit (
.clk (clk),
.reset (reset),
.lsu_req_if (lsu_req_if),
.mem_wb_if (mem_wb_if),
.dcache_rsp_if (dcache_rsp_if),
.dcache_req_if (dcache_req_if),
.delay (mem_delay),
.no_slot_mem (no_slot_mem)
);
VX_lsu_unit lsu_unit (
.clk (clk),
.reset (reset),
.lsu_req_if (lsu_req_if),
.mem_wb_if (mem_wb_if),
.dcache_rsp_if (dcache_rsp_if),
.dcache_req_if (dcache_req_if),
.delay (mem_delay),
.no_slot_mem (no_slot_mem)
);
VX_exec_unit exec_unit (
.clk (clk),
.reset (reset),
.exec_unit_req_if(exec_unit_req_if),
.inst_exec_wb_if (inst_exec_wb_if),
.jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if),
.delay (exec_delay),
.no_slot_exec (no_slot_exec)
);
VX_exec_unit exec_unit (
.clk (clk),
.reset (reset),
.exec_unit_req_if(exec_unit_req_if),
.inst_exec_wb_if (inst_exec_wb_if),
.jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if),
.delay (exec_delay),
.no_slot_exec (no_slot_exec)
);
VX_gpu_inst gpu_inst (
.gpu_inst_req_if(gpu_inst_req_if),
.warp_ctl_if (warp_ctl_if)
);
VX_gpu_inst gpu_inst (
.gpu_inst_req_if(gpu_inst_req_if),
.warp_ctl_if (warp_ctl_if)
);
VX_csr_pipe #(
.CORE_ID(CORE_ID)
) csr_pipe (
.clk (clk),
.reset (reset),
.no_slot_csr (no_slot_csr),
.csr_req_if (csr_req_if),
.writeback_if (wb_temp_if),
.csr_wb_if (csr_wb_if),
.stall_gpr_csr (stall_gpr_csr)
);
VX_csr_pipe #(
.CORE_ID(CORE_ID)
) csr_pipe (
.clk (clk),
.reset (reset),
.no_slot_csr (no_slot_csr),
.csr_req_if (csr_req_if),
.writeback_if(writeback_temp_if),
.csr_wb_if (csr_wb_if),
.stall_gpr_csr(stall_gpr_csr)
);
VX_writeback writeback (
.clk (clk),
.reset (reset),
.mem_wb_if (mem_wb_if),
.inst_exec_wb_if(inst_exec_wb_if),
.csr_wb_if (csr_wb_if),
VX_writeback writeback (
.clk (clk),
.reset (reset),
.mem_wb_if (mem_wb_if),
.inst_exec_wb_if (inst_exec_wb_if),
.csr_wb_if (csr_wb_if),
.writeback_if (writeback_temp_if),
.no_slot_mem (no_slot_mem),
.no_slot_exec (no_slot_exec),
.no_slot_csr (no_slot_csr)
);
.writeback_if (wb_temp_if),
.no_slot_mem (no_slot_mem),
.no_slot_exec (no_slot_exec),
.no_slot_csr (no_slot_csr)
);
endmodule

View file

@ -36,15 +36,19 @@
`endif
`ifndef SHARED_MEM_TOP_ADDR
`define SHARED_MEM_TOP_ADDR 8'hFF
`endif
`ifndef IO_BUS_ADDR
`define IO_BUS_ADDR 32'h00010000
`define SHARED_MEM_TOP_ADDR 8'hFE
`endif
`ifndef STACK_BASE_ADDR
`define STACK_BASE_ADDR 20'h6ffff
`define STACK_BASE_ADDR 20'h6FFFF
`endif
`ifndef IO_BUS_BASE_ADDR
`define IO_BUS_BASE_ADDR 32'hFFFFFF00
`endif
`ifndef IO_BUS_ADDR_COUT
`define IO_BUS_ADDR_COUT 32'hFFFFFFFC
`endif
`ifndef L2_ENABLE

View file

@ -1,6 +1,6 @@
`include "VX_define.vh"
module VX_csr_data #(
module VX_csr_data #(
parameter CORE_ID = 0
) (
input wire clk, // Clock

48
hw/rtl/VX_dcache_io_arb.v Normal file
View file

@ -0,0 +1,48 @@
`include "VX_define.vh"
module VX_dcache_io_arb (
input wire io_select,
// Core request
VX_cache_core_req_if core_req_if,
// Dcache request
VX_cache_core_req_if dcache_core_req_if,
// I/O request
VX_cache_core_req_if io_core_req_if,
// Dcache response
VX_cache_core_rsp_if dcache_core_rsp_if,
// I/O response
VX_cache_core_rsp_if io_core_rsp_if,
// Core response
VX_cache_core_rsp_if core_rsp_if
);
assign dcache_core_req_if.core_req_valid = core_req_if.core_req_valid & {`NUM_THREADS{~io_select}};
assign dcache_core_req_if.core_req_read = core_req_if.core_req_read;
assign dcache_core_req_if.core_req_write = core_req_if.core_req_write;
assign dcache_core_req_if.core_req_addr = core_req_if.core_req_addr;
assign dcache_core_req_if.core_req_data = core_req_if.core_req_data;
assign dcache_core_req_if.core_req_tag = core_req_if.core_req_tag;
assign io_core_req_if.core_req_valid = core_req_if.core_req_valid & {`NUM_THREADS{io_select}};
assign io_core_req_if.core_req_read = core_req_if.core_req_read;
assign io_core_req_if.core_req_write = core_req_if.core_req_write;
assign io_core_req_if.core_req_addr = core_req_if.core_req_addr;
assign io_core_req_if.core_req_data = core_req_if.core_req_data;
assign io_core_req_if.core_req_tag = core_req_if.core_req_tag;
assign core_req_if.core_req_ready = io_select ? io_core_req_if.core_req_ready : dcache_core_req_if.core_req_ready;
wire dcache_rsp_valid = (|dcache_core_rsp_if.core_rsp_valid);
assign core_rsp_if.core_rsp_valid = dcache_rsp_valid ? dcache_core_rsp_if.core_rsp_valid : io_core_rsp_if.core_rsp_valid;
assign core_rsp_if.core_rsp_data = dcache_rsp_valid ? dcache_core_rsp_if.core_rsp_data : io_core_rsp_if.core_rsp_data;
assign core_rsp_if.core_rsp_tag = dcache_rsp_valid ? dcache_core_rsp_if.core_rsp_tag : io_core_rsp_if.core_rsp_tag;
assign dcache_core_rsp_if.core_rsp_ready = core_rsp_if.core_rsp_ready;
assign io_core_rsp_if.core_rsp_ready = core_rsp_if.core_rsp_ready && ~dcache_rsp_valid;
endmodule

View file

@ -140,8 +140,8 @@ module VX_decode(
assign frE_to_bckE_req_if.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG;
// MEM signals
assign frE_to_bckE_req_if.mem_read = (is_linst) ? func3 : `WORD_SEL_NO;
assign frE_to_bckE_req_if.mem_write = (is_stype) ? func3 : `WORD_SEL_NO;
assign frE_to_bckE_req_if.mem_read = (is_linst) ? func3 : `BYTE_EN_NO;
assign frE_to_bckE_req_if.mem_write = (is_stype) ? func3 : `BYTE_EN_NO;
// UPPER IMMEDIATE
always @(*) begin

View file

@ -127,75 +127,68 @@
`define ZERO_REG 5'h0
///////////////////////////////////////////////////////////////////////////////
// Core request tag width pc, wb, rd, warp_num
`define CORE_REQ_TAG_WIDTH (32 + 2 + 5 + `NW_BITS)
// TAG sharing enable rd, warp_num
`define CORE_TAG_ID_BITS (5 + `NW_BITS)
////////////////////////// Dcache Configurable Knobs //////////////////////////
// Function ID
`define DFUNC_ID 0
// DRAM request data bits
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
// DRAM request address bits
`define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DBANK_LINE_SIZE))
`define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DBANK_LINE_SIZE))
// DRAM request tag bits
`define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH
`define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH
////////////////////////// Icache Configurable Knobs //////////////////////////
// Function ID
`define IFUNC_ID 1
// DRAM request data bits
`define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8)
`define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8)
// DRAM request address bits
`define IDRAM_ADDR_WIDTH (32 - `CLOG2(`IBANK_LINE_SIZE))
`define IDRAM_ADDR_WIDTH (32 - `CLOG2(`IBANK_LINE_SIZE))
// DRAM request tag bits
`define IDRAM_TAG_WIDTH `IDRAM_ADDR_WIDTH
`define IDRAM_TAG_WIDTH `IDRAM_ADDR_WIDTH
////////////////////////// SM Configurable Knobs //////////////////////////////
// Function ID
`define SFUNC_ID 2
// DRAM request data bits
`define SDRAM_LINE_WIDTH (`SBANK_LINE_SIZE * 8)
`define SDRAM_LINE_WIDTH (`SBANK_LINE_SIZE * 8)
// DRAM request address bits
`define SDRAM_ADDR_WIDTH (32 - `CLOG2(`SBANK_LINE_SIZE))
`define SDRAM_ADDR_WIDTH (32 - `CLOG2(`SBANK_LINE_SIZE))
// DRAM request tag bits
`define SDRAM_TAG_WIDTH `SDRAM_ADDR_WIDTH
`define SDRAM_TAG_WIDTH `SDRAM_ADDR_WIDTH
////////////////////////// L2cache Configurable Knobs /////////////////////////
// Function ID
`define L2FUNC_ID 3
// DRAM request data bits
`define L2DRAM_LINE_WIDTH (`L2BANK_LINE_SIZE * 8)
`define L2DRAM_LINE_WIDTH (`L2BANK_LINE_SIZE * 8)
// DRAM request address bits
`define L2DRAM_ADDR_WIDTH (32 - `CLOG2(`L2BANK_LINE_SIZE))
`define L2DRAM_ADDR_WIDTH (32 - `CLOG2(`L2BANK_LINE_SIZE))
// DRAM request tag bits
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2)))
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2)))
////////////////////////// L3cache Configurable Knobs /////////////////////////
// Function ID
`define L3FUNC_ID 3
// DRAM request data bits
`define L3DRAM_LINE_WIDTH (`L3BANK_LINE_SIZE * 8)
`define L3DRAM_LINE_WIDTH (`L3BANK_LINE_SIZE * 8)
// DRAM request address bits
`define L3DRAM_ADDR_WIDTH (32 - `CLOG2(`L3BANK_LINE_SIZE))
`define L3DRAM_ADDR_WIDTH (32 - `CLOG2(`L3BANK_LINE_SIZE))
// DRAM request tag bits
`define L3DRAM_TAG_WIDTH ((`NUM_CLUSTERS > 1) ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH)
`define L3DRAM_TAG_WIDTH ((`NUM_CLUSTERS > 1) ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH)
// VX_DEFINE
`endif

View file

@ -4,86 +4,46 @@ module VX_dmem_ctrl (
input wire clk,
input wire reset,
// Core <-> Dcache
VX_cache_core_req_if dcache_core_req_if,
VX_cache_core_rsp_if dcache_core_rsp_if,
// Dram <-> Dcache
VX_cache_dram_req_if dcache_dram_req_if,
VX_cache_dram_rsp_if dcache_dram_rsp_if,
VX_cache_snp_req_if dcache_snp_req_if,
// Core <-> Icache
VX_cache_core_req_if icache_core_req_if,
VX_cache_core_rsp_if icache_core_rsp_if,
// Dram <-> Icache
VX_cache_dram_req_if icache_dram_req_if,
VX_cache_dram_rsp_if icache_dram_rsp_if,
VX_cache_snp_req_if icache_snp_req_if,
// Core <-> Dcache
VX_cache_core_rsp_if dcache_core_rsp_if,
VX_cache_core_req_if dcache_core_req_if,
// Core <-> Icache
VX_cache_core_rsp_if icache_core_rsp_if,
VX_cache_core_req_if icache_core_req_if
VX_cache_dram_rsp_if icache_dram_rsp_if
);
VX_cache_core_req_if #(
.NUM_REQUESTS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH)
) dcache_req_smem_if();
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH),
.CORE_TAG_ID_BITS(`CORE_TAG_ID_BITS)
) dcache_core_req_qual_if(), smem_core_req_if();
VX_cache_core_rsp_if #(
.NUM_REQUESTS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH)
) dcache_rsp_smem_if();
VX_cache_core_req_if #(
.NUM_REQUESTS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH)
) dcache_req_dcache_if();
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH),
.CORE_TAG_ID_BITS(`CORE_TAG_ID_BITS)
) dcache_core_rsp_qual_if(), smem_core_rsp_if();
VX_cache_core_rsp_if #(
.NUM_REQUESTS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH)
) dcache_rsp_dcache_if();
wire to_shm = (dcache_core_req_if.core_req_addr[0][31:24] == `SHARED_MEM_TOP_ADDR);
wire dcache_wants_wb = (|dcache_rsp_dcache_if.core_rsp_valid);
// Dcache Request
assign dcache_req_dcache_if.core_req_valid = dcache_core_req_if.core_req_valid & {`NUM_THREADS{~to_shm}};
assign dcache_req_dcache_if.core_req_read = dcache_core_req_if.core_req_read;
assign dcache_req_dcache_if.core_req_write = dcache_core_req_if.core_req_write;
assign dcache_req_dcache_if.core_req_addr = dcache_core_req_if.core_req_addr;
assign dcache_req_dcache_if.core_req_data = dcache_core_req_if.core_req_data;
assign dcache_req_dcache_if.core_req_tag = dcache_core_req_if.core_req_tag;
assign dcache_rsp_dcache_if.core_rsp_ready = dcache_core_rsp_if.core_rsp_ready;
// Shared Memory Request
assign dcache_req_smem_if.core_req_valid = dcache_core_req_if.core_req_valid & {`NUM_THREADS{to_shm}};
assign dcache_req_smem_if.core_req_addr = dcache_core_req_if.core_req_addr;
assign dcache_req_smem_if.core_req_data = dcache_core_req_if.core_req_data;
assign dcache_req_smem_if.core_req_read = dcache_core_req_if.core_req_read;
assign dcache_req_smem_if.core_req_write = dcache_core_req_if.core_req_write;
assign dcache_req_smem_if.core_req_tag = dcache_core_req_if.core_req_tag;
assign dcache_core_req_if.core_req_ready = to_shm ? dcache_req_smem_if.core_req_ready : dcache_req_dcache_if.core_req_ready;
// Dcache Response
assign dcache_core_rsp_if.core_rsp_valid = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_valid : dcache_rsp_smem_if.core_rsp_valid;
assign dcache_core_rsp_if.core_rsp_data = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_data : dcache_rsp_smem_if.core_rsp_data;
assign dcache_core_rsp_if.core_rsp_tag = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_tag : dcache_rsp_smem_if.core_rsp_tag;
assign dcache_rsp_smem_if.core_rsp_ready = dcache_core_rsp_if.core_rsp_ready && ~dcache_wants_wb;
VX_cache_dram_req_if #(
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH),
.DRAM_TAG_WIDTH(`DDRAM_TAG_WIDTH)
) smem_dram_req_if();
VX_cache_dram_rsp_if #(
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
.DRAM_TAG_WIDTH(`DDRAM_TAG_WIDTH)
) smem_dram_rsp_if();
VX_dcache_io_arb dcache_io_arb (
.io_select (dcache_core_req_if.core_req_addr[0][31:24] == `SHARED_MEM_TOP_ADDR),
.core_req_if (dcache_core_req_if),
.dcache_core_req_if (dcache_core_req_qual_if),
.io_core_req_if (smem_core_req_if),
.dcache_core_rsp_if (dcache_core_rsp_qual_if),
.io_core_rsp_if (smem_core_rsp_if),
.core_rsp_if (dcache_core_rsp_if)
);
VX_cache #(
.CACHE_SIZE (`SCACHE_SIZE),
@ -92,7 +52,6 @@ module VX_dmem_ctrl (
.WORD_SIZE (`SWORD_SIZE),
.NUM_REQUESTS (`SNUM_REQUESTS),
.STAGE_1_CYCLES (`SSTAGE_1_CYCLES),
.FUNC_ID (`SFUNC_ID),
.REQQ_SIZE (`SREQQ_SIZE),
.MRVQ_SIZE (`SMRVQ_SIZE),
.DFPQ_SIZE (`SDFPQ_SIZE),
@ -105,54 +64,56 @@ module VX_dmem_ctrl (
.PRFQ_SIZE (`SPRFQ_SIZE),
.PRFQ_STRIDE (`SPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE),
.SNOOP_FORWARDING_ENABLE(0),
.DRAM_ENABLE (0),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
.CORE_TAG_ID_BITS (`CORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH)
) gpu_smem (
.clk (clk),
.reset (reset),
// Core request
.core_req_valid (dcache_req_smem_if.core_req_valid),
.core_req_read (dcache_req_smem_if.core_req_read),
.core_req_write (dcache_req_smem_if.core_req_write),
.core_req_addr (dcache_req_smem_if.core_req_addr),
.core_req_data (dcache_req_smem_if.core_req_data),
.core_req_tag (dcache_req_smem_if.core_req_tag),
.core_req_ready (dcache_req_smem_if.core_req_ready),
.core_req_valid (smem_core_req_if.core_req_valid),
.core_req_read (smem_core_req_if.core_req_read),
.core_req_write (smem_core_req_if.core_req_write),
.core_req_addr (smem_core_req_if.core_req_addr),
.core_req_data (smem_core_req_if.core_req_data),
.core_req_tag (smem_core_req_if.core_req_tag),
.core_req_ready (smem_core_req_if.core_req_ready),
// Core response
.core_rsp_valid (dcache_rsp_smem_if.core_rsp_valid),
.core_rsp_data (dcache_rsp_smem_if.core_rsp_data),
.core_rsp_tag (dcache_rsp_smem_if.core_rsp_tag),
.core_rsp_ready (dcache_rsp_smem_if.core_rsp_ready),
.core_rsp_valid (smem_core_rsp_if.core_rsp_valid),
.core_rsp_data (smem_core_rsp_if.core_rsp_data),
.core_rsp_tag (smem_core_rsp_if.core_rsp_tag),
.core_rsp_ready (smem_core_rsp_if.core_rsp_ready),
`IGNORE_WARNINGS_BEGIN
// DRAM request
.dram_req_read (smem_dram_req_if.dram_req_read),
.dram_req_write (smem_dram_req_if.dram_req_write),
.dram_req_addr (smem_dram_req_if.dram_req_addr),
.dram_req_data (smem_dram_req_if.dram_req_data),
.dram_req_tag (smem_dram_req_if.dram_req_tag),
.dram_req_ready (smem_dram_req_if.dram_req_ready),
.dram_req_read (),
.dram_req_write (),
.dram_req_addr (),
.dram_req_data (),
.dram_req_tag (),
.dram_req_ready (0),
// DRAM response
.dram_rsp_valid (smem_dram_rsp_if.dram_rsp_valid),
.dram_rsp_data (smem_dram_rsp_if.dram_rsp_data),
.dram_rsp_tag (smem_dram_rsp_if.dram_rsp_tag),
.dram_rsp_ready (smem_dram_rsp_if.dram_rsp_ready),
.dram_rsp_valid (0),
.dram_rsp_data (0),
.dram_rsp_tag (0),
.dram_rsp_ready (),
// Snoop Request
// Snoop request
.snp_req_valid (0),
.snp_req_addr (0),
`IGNORE_WARNINGS_BEGIN
.snp_req_ready (),
`IGNORE_WARNINGS_END
// Snoop Forward
`IGNORE_WARNINGS_BEGIN
// Snoop forwarding
.snp_fwd_valid (),
.snp_fwd_addr (),
`IGNORE_WARNINGS_END
.snp_fwd_ready (0)
`IGNORE_WARNINGS_END
);
VX_cache #(
@ -162,7 +123,6 @@ module VX_dmem_ctrl (
.WORD_SIZE (`DWORD_SIZE),
.NUM_REQUESTS (`DNUM_REQUESTS),
.STAGE_1_CYCLES (`DSTAGE_1_CYCLES),
.FUNC_ID (`DFUNC_ID),
.REQQ_SIZE (`DREQQ_SIZE),
.MRVQ_SIZE (`DMRVQ_SIZE),
.DFPQ_SIZE (`DDFPQ_SIZE),
@ -175,26 +135,30 @@ module VX_dmem_ctrl (
.PRFQ_SIZE (`DPRFQ_SIZE),
.PRFQ_STRIDE (`DPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE),
.SNOOP_FORWARDING_ENABLE(0),
.DRAM_ENABLE (1),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
.CORE_TAG_ID_BITS (`CORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH)
) gpu_dcache (
.clk (clk),
.reset (reset),
// Core req
.core_req_valid (dcache_req_dcache_if.core_req_valid),
.core_req_read (dcache_req_dcache_if.core_req_read),
.core_req_write (dcache_req_dcache_if.core_req_write),
.core_req_addr (dcache_req_dcache_if.core_req_addr),
.core_req_data (dcache_req_dcache_if.core_req_data),
.core_req_tag (dcache_req_dcache_if.core_req_tag),
.core_req_ready (dcache_req_dcache_if.core_req_ready),
.core_req_valid (dcache_core_req_qual_if.core_req_valid),
.core_req_read (dcache_core_req_qual_if.core_req_read),
.core_req_write (dcache_core_req_qual_if.core_req_write),
.core_req_addr (dcache_core_req_qual_if.core_req_addr),
.core_req_data (dcache_core_req_qual_if.core_req_data),
.core_req_tag (dcache_core_req_qual_if.core_req_tag),
.core_req_ready (dcache_core_req_qual_if.core_req_ready),
// Core response
.core_rsp_valid (dcache_rsp_dcache_if.core_rsp_valid),
.core_rsp_data (dcache_rsp_dcache_if.core_rsp_data),
.core_rsp_tag (dcache_rsp_dcache_if.core_rsp_tag),
.core_rsp_ready (dcache_rsp_dcache_if.core_rsp_ready),
.core_rsp_valid (dcache_core_rsp_qual_if.core_rsp_valid),
.core_rsp_data (dcache_core_rsp_qual_if.core_rsp_data),
.core_rsp_tag (dcache_core_rsp_qual_if.core_rsp_tag),
.core_rsp_ready (dcache_core_rsp_qual_if.core_rsp_ready),
// DRAM request
.dram_req_read (dcache_dram_req_if.dram_req_read),
@ -214,13 +178,13 @@ module VX_dmem_ctrl (
.snp_req_valid (dcache_snp_req_if.snp_req_valid),
.snp_req_addr (dcache_snp_req_if.snp_req_addr),
.snp_req_ready (dcache_snp_req_if.snp_req_ready),
// Snoop Forward
`IGNORE_WARNINGS_BEGIN
// Snoop Forward
.snp_fwd_valid (),
.snp_fwd_addr (),
`IGNORE_WARNINGS_END
.snp_fwd_ready (0)
`IGNORE_WARNINGS_END
);
VX_cache #(
@ -230,7 +194,6 @@ module VX_dmem_ctrl (
.WORD_SIZE (`IWORD_SIZE),
.NUM_REQUESTS (`INUM_REQUESTS),
.STAGE_1_CYCLES (`ISTAGE_1_CYCLES),
.FUNC_ID (`IFUNC_ID),
.REQQ_SIZE (`IREQQ_SIZE),
.MRVQ_SIZE (`IMRVQ_SIZE),
.DFPQ_SIZE (`IDFPQ_SIZE),
@ -243,7 +206,11 @@ module VX_dmem_ctrl (
.PRFQ_SIZE (`IPRFQ_SIZE),
.PRFQ_STRIDE (`IPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE),
.SNOOP_FORWARDING_ENABLE(0),
.DRAM_ENABLE (1),
.WRITE_ENABLE (0),
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
.CORE_TAG_ID_BITS (`CORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
) gpu_icache (
.clk (clk),
@ -278,17 +245,17 @@ module VX_dmem_ctrl (
.dram_rsp_tag (icache_dram_rsp_if.dram_rsp_tag),
.dram_rsp_ready (icache_dram_rsp_if.dram_rsp_ready),
`IGNORE_WARNINGS_BEGIN
// Snoop Request
.snp_req_valid (icache_snp_req_if.snp_req_valid),
.snp_req_addr (icache_snp_req_if.snp_req_addr),
.snp_req_ready (icache_snp_req_if.snp_req_ready),
.snp_req_valid (0),
.snp_req_addr (0),
.snp_req_ready (),
// Snoop Forward
`IGNORE_WARNINGS_BEGIN
.snp_fwd_valid (),
.snp_fwd_addr (),
`IGNORE_WARNINGS_END
.snp_fwd_addr (),
.snp_fwd_ready (0)
`IGNORE_WARNINGS_END
);
endmodule

View file

@ -26,8 +26,8 @@ module VX_gpr_stage (
`DEBUG_BEGIN
wire[31:0] curr_PC = bckE_req_if.curr_PC;
wire[2:0] branchType = bckE_req_if.branch_type;
wire is_store = (bckE_req_if.mem_write != `WORD_SEL_NO);
wire is_load = (bckE_req_if.mem_read != `WORD_SEL_NO);
wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
wire jalQual = bckE_req_if.jalQual;
`DEBUG_END

View file

@ -16,14 +16,14 @@ module VX_icache_stage (
reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0];
wire valid_inst = (|fe_inst_meta_fi.valid);
wire valid_inst = (|fe_inst_meta_fi.valid);
// Icache Request
assign icache_req_if.core_req_valid = valid_inst && !total_freeze;
assign icache_req_if.core_req_addr = fe_inst_meta_fi.inst_pc;
assign icache_req_if.core_req_data = 32'b0;
assign icache_req_if.core_req_read = `WORD_SEL_LW;
assign icache_req_if.core_req_write = `WORD_SEL_NO;
assign icache_req_if.core_req_read = `BYTE_EN_LW;
assign icache_req_if.core_req_write = `BYTE_EN_NO;
assign icache_req_if.core_req_tag = {fe_inst_meta_fi.inst_pc, 2'b1, 5'b0, fe_inst_meta_fi.warp_num};
`IGNORE_WARNINGS_BEGIN

View file

@ -16,7 +16,7 @@ module VX_inst_multiplex (
wire[`NUM_THREADS-1:0] is_gpu_mask;
wire[`NUM_THREADS-1:0] is_csr_mask;
wire is_mem = (bckE_req_if.mem_write != `WORD_SEL_NO) || (bckE_req_if.mem_read != `WORD_SEL_NO);
wire is_mem = (bckE_req_if.mem_write != `BYTE_EN_NO) || (bckE_req_if.mem_read != `BYTE_EN_NO);
wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split);
wire is_csr = bckE_req_if.is_csr;
// wire is_gpu = 0;

View file

@ -24,18 +24,15 @@ module VX_lsu_unit (
.address (address)
);
wire[`NUM_THREADS-1:0][31:0] use_address;
wire[`NUM_THREADS-1:0][31:0] use_store_data;
wire[`NUM_THREADS-1:0] use_valid;
wire[`WORD_SEL_BITS-1:0] use_mem_read;
wire[`WORD_SEL_BITS-1:0] use_mem_write;
wire[4:0] use_rd;
wire[`NW_BITS-1:0] use_warp_num;
wire[1:0] use_wb;
wire[31:0] use_pc;
wire[(`LOG2UP(`NUM_THREADS))-1:0] tag_index;
wire zero = 0;
wire[`NUM_THREADS-1:0][31:0] use_address;
wire[`NUM_THREADS-1:0][31:0] use_store_data;
wire[`NUM_THREADS-1:0] use_valid;
wire[`BYTE_EN_BITS-1:0] use_mem_read;
wire[`BYTE_EN_BITS-1:0] use_mem_write;
wire[4:0] use_rd;
wire[`NW_BITS-1:0] use_warp_num;
wire[1:0] use_wb;
wire[31:0] use_pc;
VX_generic_register #(
.N(45 + `NW_BITS-1 + 1 + `NUM_THREADS*65)
@ -43,7 +40,7 @@ module VX_lsu_unit (
.clk (clk),
.reset(reset),
.stall(delay),
.flush(zero),
.flush(0),
.in ({address , lsu_req_if.store_data, lsu_req_if.valid, lsu_req_if.mem_read, lsu_req_if.mem_write, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.lsu_pc}),
.out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc })
);
@ -54,36 +51,25 @@ module VX_lsu_unit (
assign dcache_req_if.core_req_write = {`NUM_THREADS{use_mem_write}};
assign dcache_req_if.core_req_addr = use_address;
assign dcache_req_if.core_req_data = use_store_data;
assign dcache_req_if.core_req_tag = {`NUM_THREADS{use_pc, use_wb, use_rd, use_warp_num}};
assign dcache_req_if.core_req_tag = {use_pc, use_wb, use_rd, use_warp_num};
assign delay = ~dcache_req_if.core_req_ready;
// Core Response
assign mem_wb_if.valid = dcache_rsp_if.core_rsp_valid;
assign mem_wb_if.data = dcache_rsp_if.core_rsp_data;
assign dcache_rsp_if.core_rsp_ready = ~no_slot_mem;
assign {mem_wb_if.pc, mem_wb_if.wb, mem_wb_if.rd, mem_wb_if.warp_num} = dcache_rsp_if.core_rsp_tag[tag_index];
// select first valid entry in tag array
VX_generic_priority_encoder #(
.N(`NUM_THREADS)
) tag_select (
.valids(dcache_rsp_if.core_rsp_valid),
.index (tag_index),
`IGNORE_WARNINGS_BEGIN
.found ()
`IGNORE_WARNINGS_END
);
assign {mem_wb_if.pc, mem_wb_if.wb, mem_wb_if.rd, mem_wb_if.warp_num} = dcache_rsp_if.core_rsp_tag;
/*always_comb begin
if (1'($time & 1) && dcache_req_if.core_req_ready && |dcache_req_if.core_req_valid) begin
$display("*** %t: D$ req: valid=%b, addr=%0h, r=%d, w=%d, rd=%d, warp=%d, data=%0h", $time, use_valid, use_address, use_mem_read, use_mem_write, use_rd, use_warp_num, use_store_data);
$display("*** %t: D$ req: valid=%b, addr=%0h, r=%d, w=%d, pc=%0h, rd=%d, warp=%d, data=%0h", $time, use_valid, use_address, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, use_store_data);
end
if (1'($time & 1) && dcache_rsp_if.core_rsp_ready && |dcache_rsp_if.core_rsp_valid) begin
$display("*** %t: D$ rsp: valid=%b, rd=%d, warp=%d, data=%0h", $time, dcache_rsp_if.core_rsp_valid, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
$display("*** %t: D$ rsp: valid=%b, pc=%0h, rd=%d, warp=%d, data=%0h", $time, mem_wb_if.valid, mem_wb_if.pc, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
end
end*/
endmodule // Memory
endmodule

View file

@ -25,8 +25,8 @@ module VX_scheduler (
wire rs2_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0;
wire rd_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0;
wire is_store = (bckE_req_if.mem_write != `WORD_SEL_NO);
wire is_load = (bckE_req_if.mem_read != `WORD_SEL_NO);
wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
// classify our next instruction.
wire is_mem = is_store || is_load;

View file

@ -148,7 +148,7 @@ module VX_warp_sched (
end
if (is_barrier) begin
warp_stalled[barrier_warp_num] <= 0;
warp_stalled[barrier_warp_num] <= 0;
if (reached_barrier_limit) begin
barrier_stall_mask[barrier_id] <= 0;
end else begin

View file

@ -41,10 +41,20 @@ module Vortex #(
input wire [`DDRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
output wire llc_snp_req_ready,
// I/O
output wire io_valid,
output wire [31:0] io_data,
input wire io_ready,
// I/O request
output wire io_req_read,
output wire io_req_write,
output wire[31:0] io_req_addr,
output wire[31:0] io_req_data,
output wire[`BYTE_EN_BITS-1:0] io_req_byteen,
output wire[`CORE_REQ_TAG_WIDTH-1:0] io_req_tag,
input wire io_req_ready,
// I/O response
input wire io_rsp_valid,
input wire[31:0] io_rsp_data,
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
output wire io_rsp_ready,
// Debug
output wire ebreak
@ -62,20 +72,16 @@ module Vortex #(
VX_cache_core_req_if #(
.NUM_REQUESTS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH)
) dcache_core_req_if();
VX_cache_core_req_if #(
.NUM_REQUESTS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH)
) dcache_core_req_qual_if();
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH),
.CORE_TAG_ID_BITS(`CORE_TAG_ID_BITS)
) dcache_core_req_if(), io_core_req_if(), dcache_io_core_req_if();
VX_cache_core_rsp_if #(
.NUM_REQUESTS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH)
) dcache_core_rsp_if();
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH),
.CORE_TAG_ID_BITS(`CORE_TAG_ID_BITS)
) dcache_core_rsp_if(), io_core_rsp_if(), dcache_io_core_rsp_if();
VX_cache_dram_req_if #(
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
@ -100,29 +106,32 @@ module Vortex #(
assign dcache_dram_rsp_if.dram_rsp_tag = D_dram_rsp_tag;
assign D_dram_rsp_ready = dcache_dram_rsp_if.dram_rsp_ready;
wire to_io_bus = (dcache_core_req_if.core_req_addr[0] == `IO_BUS_ADDR);
assign io_valid = |dcache_core_req_if.core_req_valid && to_io_bus;
assign io_data = dcache_core_req_if.core_req_data[0];
assign io_req_read = (io_core_req_if.core_req_read[0] != `BYTE_EN_NO);
assign io_req_write = (io_core_req_if.core_req_write[0] != `BYTE_EN_NO);
assign io_req_addr = io_core_req_if.core_req_addr[0];
assign io_req_data = io_core_req_if.core_req_data[0];
assign io_req_byteen = io_req_read ? io_core_req_if.core_req_read[0] : io_core_req_if.core_req_write[0];
assign io_req_tag = io_core_req_if.core_req_tag[0];
assign io_core_req_if.core_req_ready = io_req_ready;
assign dcache_core_req_qual_if.core_req_valid = dcache_core_req_if.core_req_valid & {`NUM_THREADS{~to_io_bus}};
assign dcache_core_req_qual_if.core_req_read = dcache_core_req_if.core_req_read;
assign dcache_core_req_qual_if.core_req_write = dcache_core_req_if.core_req_write;
assign dcache_core_req_qual_if.core_req_addr = dcache_core_req_if.core_req_addr;
assign dcache_core_req_qual_if.core_req_data = dcache_core_req_if.core_req_data;
assign dcache_core_req_qual_if.core_req_tag = dcache_core_req_if.core_req_tag;
assign dcache_core_req_if.core_req_ready = to_io_bus ? io_ready : dcache_core_req_qual_if.core_req_ready;
assign io_core_rsp_if.core_rsp_valid[0] = io_rsp_valid;
assign io_core_rsp_if.core_rsp_data[0] = io_rsp_data;
assign io_core_rsp_if.core_rsp_tag = io_rsp_tag;
assign io_rsp_ready = io_core_rsp_if.core_rsp_ready;
// Icache interfaces
VX_cache_core_req_if #(
.NUM_REQUESTS(`INUM_REQUESTS),
.WORD_SIZE(`IWORD_SIZE),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH)
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH),
.CORE_TAG_ID_BITS(`CORE_TAG_ID_BITS)
) icache_core_req_if();
VX_cache_core_rsp_if #(
.NUM_REQUESTS(`INUM_REQUESTS),
.WORD_SIZE(`IWORD_SIZE),
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH)
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH),
.CORE_TAG_ID_BITS(`CORE_TAG_ID_BITS)
) icache_core_rsp_if();
VX_cache_dram_req_if #(
@ -162,7 +171,6 @@ VX_jal_rsp_if jal_rsp_if(); // Jump resolution to Fetch
VX_warp_ctl_if warp_ctl_if();
// Cache snooping
VX_cache_snp_req_if #(.DRAM_ADDR_WIDTH(`IDRAM_ADDR_WIDTH)) icache_snp_req_if();
VX_cache_snp_req_if #(.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH)) dcache_snp_req_if();
assign dcache_snp_req_if.snp_req_valid = llc_snp_req_valid;
@ -203,9 +211,9 @@ VX_back_end #(
.warp_ctl_if (warp_ctl_if),
.bckE_req_if (bckE_req_if),
.jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if),
.dcache_rsp_if (dcache_core_rsp_if),
.dcache_req_if (dcache_core_req_if),
.branch_rsp_if (branch_rsp_if),
.dcache_req_if (dcache_io_core_req_if),
.dcache_rsp_if (dcache_io_core_rsp_if),
.writeback_if (writeback_if),
.mem_delay (memory_delay),
.exec_delay (exec_delay),
@ -216,23 +224,32 @@ VX_dmem_ctrl dmem_ctrl (
.clk (clk),
.reset (reset),
// Core <-> Dcache
.dcache_core_req_if (dcache_core_req_if),
.dcache_core_rsp_if (dcache_core_rsp_if),
// Dram <-> Dcache
.dcache_dram_req_if (dcache_dram_req_if),
.dcache_dram_rsp_if (dcache_dram_rsp_if),
.dcache_snp_req_if (dcache_snp_req_if),
// Dram <-> Icache
.icache_dram_req_if (icache_dram_req_if),
.icache_dram_rsp_if (icache_dram_rsp_if),
.icache_snp_req_if (icache_snp_req_if),
// Core <-> Icache
.icache_core_req_if (icache_core_req_if),
.icache_core_rsp_if (icache_core_rsp_if),
// Core <-> Dcache
.dcache_core_req_if (dcache_core_req_qual_if),
.dcache_core_rsp_if (dcache_core_rsp_if)
// Dram <-> Icache
.icache_dram_req_if (icache_dram_req_if),
.icache_dram_rsp_if (icache_dram_rsp_if)
);
VX_dcache_io_arb dcache_io_arb (
.io_select (dcache_io_core_req_if.core_req_addr[0] >= `IO_BUS_BASE_ADDR),
.core_req_if (dcache_io_core_req_if),
.dcache_core_req_if (dcache_core_req_if),
.io_core_req_if (io_core_req_if),
.dcache_core_rsp_if (dcache_core_rsp_if),
.io_core_rsp_if (io_core_rsp_if),
.core_rsp_if (dcache_io_core_rsp_if)
);
endmodule // Vortex

View file

@ -27,15 +27,24 @@ module Vortex_Cluster #(
input wire[`L2DRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
output wire llc_snp_req_ready,
// IO
output wire io_valid,
output wire [31:0] io_data,
input wire io_ready,
// I/O request
output wire io_req_read,
output wire io_req_write,
output wire[31:0] io_req_addr,
output wire[31:0] io_req_data,
output wire[`BYTE_EN_BITS-1:0] io_req_byteen,
output wire[`CORE_REQ_TAG_WIDTH-1:0] io_req_tag,
input wire io_req_ready,
// I/O response
input wire io_rsp_valid,
input wire[31:0] io_rsp_data,
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
output wire io_rsp_ready,
// Debug
output wire ebreak
);
// DRAM Dcache Req
wire[`NUM_CORES-1:0] per_core_D_dram_req_read;
wire[`NUM_CORES-1:0] per_core_D_dram_req_write;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_D_dram_req_addr;
@ -43,42 +52,39 @@ module Vortex_Cluster #(
wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_req_tag;
wire[`NUM_CORES-1:0] per_core_D_dram_req_ready;
// DRAM Dcache Rsp
wire[`NUM_CORES-1:0] per_core_D_dram_rsp_valid;
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_rsp_data;
wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_rsp_tag;
wire[`NUM_CORES-1:0] per_core_D_dram_rsp_ready;
// DRAM Icache Req
wire[`NUM_CORES-1:0] per_core_I_dram_req_read;
wire[`NUM_CORES-1:0][`IDRAM_ADDR_WIDTH-1:0] per_core_I_dram_req_addr;
wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_req_data;
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_req_tag;
wire[`NUM_CORES-1:0] per_core_I_dram_req_ready;
// DRAM Icache Rsp
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_valid;
wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_rsp_data;
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag;
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready;
// Snooping
wire snp_fwd_valid;
wire[`DDRAM_ADDR_WIDTH-1:0] snp_fwd_addr;
wire[`NUM_CORES-1:0] per_core_snp_fwd_ready;
`IGNORE_WARNINGS_BEGIN
wire[`NUM_CORES-1:0] per_core_io_valid;
wire[`NUM_CORES-1:0][31:0] per_core_io_data;
wire[`NUM_CORES-1:0] per_core_io_req_read;
wire[`NUM_CORES-1:0] per_core_io_req_write;
wire[`NUM_CORES-1:0][31:0] per_core_io_req_addr;
wire[`NUM_CORES-1:0][31:0] per_core_io_req_data;
wire[`NUM_CORES-1:0][`BYTE_EN_BITS-1:0] per_core_io_req_byteen;
wire[`NUM_CORES-1:0][`CORE_REQ_TAG_WIDTH-1:0] per_core_io_req_tag;
wire[`NUM_CORES-1:0] per_core_io_rsp_ready;
`IGNORE_WARNINGS_END
// ebreak
wire[`NUM_CORES-1:0] per_core_ebreak;
assign io_valid = per_core_io_valid[0];
assign io_data = per_core_io_data[0];
assign ebreak = (& per_core_ebreak);
genvar i;
for (i = 0; i < `NUM_CORES; i = i + 1) begin
Vortex #(
@ -97,6 +103,7 @@ module Vortex_Cluster #(
.D_dram_rsp_tag (per_core_D_dram_rsp_tag [i]),
.D_dram_rsp_ready (per_core_D_dram_rsp_ready [i]),
.I_dram_req_read (per_core_I_dram_req_read [i]),
`IGNORE_WARNINGS_BEGIN
.I_dram_req_write (),
`IGNORE_WARNINGS_END
@ -107,24 +114,47 @@ module Vortex_Cluster #(
.I_dram_rsp_valid (per_core_I_dram_rsp_valid [i]),
.I_dram_rsp_tag (per_core_I_dram_rsp_tag [i]),
.I_dram_rsp_data (per_core_I_dram_rsp_data [i]),
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]),
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]),
.llc_snp_req_valid (snp_fwd_valid),
.llc_snp_req_addr (snp_fwd_addr),
.llc_snp_req_ready (per_core_snp_fwd_ready [i]),
.io_valid (per_core_io_valid [i]),
.io_data (per_core_io_data [i]),
.io_ready (io_ready),
.io_req_read (per_core_io_req_read [i]),
.io_req_write (per_core_io_req_write [i]),
.io_req_addr (per_core_io_req_addr [i]),
.io_req_data (per_core_io_req_data [i]),
.io_req_byteen (per_core_io_req_byteen [i]),
.io_req_tag (per_core_io_req_tag [i]),
.io_req_ready (io_req_ready),
.io_rsp_valid (io_rsp_valid),
.io_rsp_data (io_rsp_data),
.io_rsp_tag (io_rsp_tag),
.io_rsp_ready (per_core_io_rsp_ready [i]),
.ebreak (per_core_ebreak [i])
);
end
end
assign io_req_read = per_core_io_req_read[0];
assign io_req_write = per_core_io_req_write[0];
assign io_req_addr = per_core_io_req_addr[0];
assign io_req_data = per_core_io_req_data[0];
assign io_req_byteen = per_core_io_req_byteen[0];
assign io_req_tag = per_core_io_req_tag[0];
assign io_rsp_ready = per_core_io_rsp_ready[0];
assign ebreak = (& per_core_ebreak);
if (`L2_ENABLE) begin
// L2 Cache ///////////////////////////////////////////////////////////
wire[`L2NUM_REQUESTS-1:0] l2_core_req_valid;
wire[`L2NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l2_core_req_write;
wire[`L2NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l2_core_req_read;
wire[`L2NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] l2_core_req_write;
wire[`L2NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] l2_core_req_read;
wire[`L2NUM_REQUESTS-1:0][31:0] l2_core_req_addr;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_req_tag;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] l2_core_req_data;
@ -139,11 +169,11 @@ module Vortex_Cluster #(
assign l2_core_req_valid [i] = (per_core_D_dram_req_read[(i/2)] | per_core_D_dram_req_write[(i/2)]);
assign l2_core_req_valid [i+1] = per_core_I_dram_req_read[(i/2)];
assign l2_core_req_read [i] = per_core_D_dram_req_read[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l2_core_req_read [i+1] = per_core_I_dram_req_read[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l2_core_req_read [i] = per_core_D_dram_req_read[(i/2)] ? `BYTE_EN_LW : `BYTE_EN_NO;
assign l2_core_req_read [i+1] = per_core_I_dram_req_read[(i/2)] ? `BYTE_EN_LW : `BYTE_EN_NO;
assign l2_core_req_write [i] = per_core_D_dram_req_write[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l2_core_req_write [i+1] = `WORD_SEL_NO;
assign l2_core_req_write [i] = per_core_D_dram_req_write[(i/2)] ? `BYTE_EN_LW : `BYTE_EN_NO;
assign l2_core_req_write [i+1] = `BYTE_EN_NO;
assign l2_core_req_addr [i] = {per_core_D_dram_req_addr[(i/2)], {`LOG2UP(`DBANK_LINE_SIZE){1'b0}}};
assign l2_core_req_addr [i+1] = {per_core_I_dram_req_addr[(i/2)], {`LOG2UP(`IBANK_LINE_SIZE){1'b0}}};
@ -177,7 +207,6 @@ module Vortex_Cluster #(
.WORD_SIZE (`L2WORD_SIZE),
.NUM_REQUESTS (`L2NUM_REQUESTS),
.STAGE_1_CYCLES (`L2STAGE_1_CYCLES),
.FUNC_ID (`L2FUNC_ID),
.REQQ_SIZE (`L2REQQ_SIZE),
.MRVQ_SIZE (`L2MRVQ_SIZE),
.DFPQ_SIZE (`L2DFPQ_SIZE),
@ -189,8 +218,12 @@ module Vortex_Cluster #(
.FFSQ_SIZE (`L2FFSQ_SIZE),
.PRFQ_SIZE (`L2PRFQ_SIZE),
.PRFQ_STRIDE (`L2PRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE),
.FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE),
.DRAM_ENABLE (1),
.WRITE_ENABLE (1),
.SNOOP_FORWARDING_ENABLE(1),
.CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH)
) gpu_l2cache (
.clk (clk),
@ -266,8 +299,8 @@ module Vortex_Cluster #(
assign per_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
assign per_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
assign per_core_D_dram_req_ready[(i/2)] = per_core_req_ready[i];
assign per_core_I_dram_req_ready[(i/2)] = per_core_req_ready[i+1];
assign per_core_D_dram_req_ready [(i/2)] = per_core_req_ready[i];
assign per_core_I_dram_req_ready [(i/2)] = per_core_req_ready[i+1];
assign per_core_D_dram_rsp_valid [(i/2)] = per_core_rsp_valid[i];
assign per_core_I_dram_rsp_valid [(i/2)] = per_core_rsp_valid[i+1];

View file

@ -6,7 +6,7 @@ module Vortex_Socket (
input wire clk,
input wire reset,
// DRAM Req
// DRAM request
output wire dram_req_read,
output wire dram_req_write,
output wire[`L3DRAM_ADDR_WIDTH-1:0] dram_req_addr,
@ -14,21 +14,31 @@ module Vortex_Socket (
output wire[`L3DRAM_TAG_WIDTH-1:0] dram_req_tag,
input wire dram_req_ready,
// DRAM Rsp
// DRAM response
input wire dram_rsp_valid,
input wire[`L3DRAM_LINE_WIDTH-1:0] dram_rsp_data,
input wire[`L3DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready,
// Cache Snooping
// Cache snooping
input wire llc_snp_req_valid,
input wire[`L3DRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
output wire llc_snp_req_ready,
// I/O
output wire io_valid,
output wire [31:0] io_data,
input wire io_ready,
// I/O request
output wire io_req_read,
output wire io_req_write,
output wire[31:0] io_req_addr,
output wire[31:0] io_req_data,
output wire[`BYTE_EN_BITS-1:0] io_req_byteen,
output wire[`CORE_REQ_TAG_WIDTH-1:0] io_req_tag,
input wire io_req_ready,
// I/O response
input wire io_rsp_valid,
input wire[31:0] io_rsp_data,
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
output wire io_rsp_ready,
// Debug
output wire ebreak
@ -57,24 +67,31 @@ module Vortex_Socket (
.llc_snp_req_addr (llc_snp_req_addr),
.llc_snp_req_ready (llc_snp_req_ready),
.io_valid (io_valid),
.io_data (io_data),
.io_ready (io_ready),
.io_req_read (io_req_read),
.io_req_write (io_req_write),
.io_req_addr (io_req_addr),
.io_req_data (io_req_data),
.io_req_byteen (io_req_byteen),
.io_req_tag (io_req_tag),
.io_req_ready (io_req_ready),
.io_rsp_valid (io_rsp_valid),
.io_rsp_data (io_rsp_data),
.io_rsp_tag (io_rsp_tag),
.io_rsp_ready (io_rsp_ready),
.ebreak (ebreak)
);
end else begin
// DRAM Dcache Req
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_read;
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_write;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag;
wire l3_core_req_ready;
// DRAM Dcache Rsp
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid;
wire[`NUM_CLUSTERS-1:0][`L3DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data;
wire[`NUM_CLUSTERS-1:0][`L3DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag;
@ -85,16 +102,17 @@ module Vortex_Socket (
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_ready;
`IGNORE_WARNINGS_BEGIN
wire[`NUM_CLUSTERS-1:0] per_cluster_io_valid;
wire[`NUM_CLUSTERS-1:0][31:0] per_cluster_io_data;
wire[`NUM_CLUSTERS-1:0] per_cluster_io_req_read;
wire[`NUM_CLUSTERS-1:0] per_cluster_io_req_write;
wire[`NUM_CLUSTERS-1:0][31:0] per_cluster_io_req_addr;
wire[`NUM_CLUSTERS-1:0][31:0] per_cluster_io_req_data;
wire[`NUM_CLUSTERS-1:0][`BYTE_EN_BITS-1:0] per_cluster_io_req_byteen;
wire[`NUM_CLUSTERS-1:0][`CORE_REQ_TAG_WIDTH-1:0] per_cluster_io_req_tag;
wire[`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready;
`IGNORE_WARNINGS_END
wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak;
assign io_valid = per_cluster_io_valid[0];
assign io_data = per_cluster_io_data[0];
assign ebreak = (& per_cluster_ebreak);
genvar i;
for (i = 0; i < `NUM_CLUSTERS; i=i+1) begin
@ -120,19 +138,39 @@ module Vortex_Socket (
.llc_snp_req_addr (snp_fwd_addr),
.llc_snp_req_ready (per_cluster_snp_fwd_ready [i]),
.io_valid (per_cluster_io_valid [i]),
.io_data (per_cluster_io_data [i]),
.io_ready (io_ready),
.io_req_read (per_cluster_io_req_read [i]),
.io_req_write (per_cluster_io_req_write [i]),
.io_req_addr (per_cluster_io_req_addr [i]),
.io_req_data (per_cluster_io_req_data [i]),
.io_req_byteen (per_cluster_io_req_byteen [i]),
.io_req_tag (per_cluster_io_req_tag [i]),
.io_req_ready (io_req_ready),
.io_rsp_valid (io_rsp_valid),
.io_rsp_data (io_rsp_data),
.io_rsp_tag (io_rsp_tag),
.io_rsp_ready (per_cluster_io_rsp_ready [i]),
.ebreak (per_cluster_ebreak [i])
);
end
end
assign io_req_read = per_cluster_io_req_read[0];
assign io_req_write = per_cluster_io_req_write[0];
assign io_req_addr = per_cluster_io_req_addr[0];
assign io_req_data = per_cluster_io_req_data[0];
assign io_req_byteen = per_cluster_io_req_byteen[0];
assign io_req_tag = per_cluster_io_req_tag[0];
assign io_rsp_ready = per_cluster_io_rsp_ready[0];
assign ebreak = (& per_cluster_ebreak);
// L3 Cache ///////////////////////////////////////////////////////////
wire[`L3NUM_REQUESTS-1:0] l3_core_req_valid;
wire[`L3NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l3_core_req_read;
wire[`L3NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l3_core_req_write;
wire[`L3NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] l3_core_req_read;
wire[`L3NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] l3_core_req_write;
wire[`L3NUM_REQUESTS-1:0][31:0] l3_core_req_addr;
wire[`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data;
wire[`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag;
@ -145,8 +183,8 @@ module Vortex_Socket (
for (i = 0; i < `L3NUM_REQUESTS; i=i+1) begin
// Core Request
assign l3_core_req_valid [i] = (per_cluster_dram_req_read [i] | per_cluster_dram_req_write [i]);
assign l3_core_req_read [i] = per_cluster_dram_req_read [i] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l3_core_req_write [i] = per_cluster_dram_req_write [i] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l3_core_req_read [i] = per_cluster_dram_req_read [i] ? `BYTE_EN_LW : `BYTE_EN_NO;
assign l3_core_req_write [i] = per_cluster_dram_req_write [i] ? `BYTE_EN_LW : `BYTE_EN_NO;
assign l3_core_req_addr [i] = {per_cluster_dram_req_addr [i], {`LOG2UP(`L2BANK_LINE_SIZE){1'b0}}};
assign l3_core_req_tag [i] = per_cluster_dram_req_tag [i];
assign l3_core_req_data [i] = per_cluster_dram_req_data [i];
@ -167,7 +205,6 @@ module Vortex_Socket (
.WORD_SIZE (`L3WORD_SIZE),
.NUM_REQUESTS (`L3NUM_REQUESTS),
.STAGE_1_CYCLES (`L3STAGE_1_CYCLES),
.FUNC_ID (`L2FUNC_ID),
.REQQ_SIZE (`L3REQQ_SIZE),
.MRVQ_SIZE (`L3MRVQ_SIZE),
.DFPQ_SIZE (`L3DFPQ_SIZE),
@ -180,7 +217,11 @@ module Vortex_Socket (
.PRFQ_SIZE (`L3PRFQ_SIZE),
.PRFQ_STRIDE (`L3PRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE),
.DRAM_ENABLE (1),
.WRITE_ENABLE (1),
.SNOOP_FORWARDING_ENABLE(1),
.CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH)
) gpu_l3cache (
.clk (clk),

117
hw/rtl/cache/VX_bank.v vendored
View file

@ -12,9 +12,7 @@ module VX_bank #(
// Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
parameter FUNC_ID = 0,
parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size
@ -38,11 +36,23 @@ module VX_bank #(
// Fill Forward SNP Queue
parameter FFSQ_SIZE = 8,
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
// caceh requests tag size
parameter CORE_TAG_WIDTH = 1
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Enable dram update
parameter DRAM_ENABLE = 1,
// Enable snoop forwarding
parameter SNOOP_FORWARDING_ENABLE = 0,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0
) (
input wire clk,
input wire reset,
@ -50,11 +60,11 @@ module VX_bank #(
// Core Request
input wire core_req_ready,
input wire [NUM_REQUESTS-1:0] core_req_valids,
input wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] core_req_read,
input wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] core_req_write,
input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_read,
input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_write,
input wire [NUM_REQUESTS-1:0][31:0] core_req_addr,
input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output wire core_req_full,
// Core Response
@ -98,7 +108,7 @@ module VX_bank #(
if (reset) begin
snoop_state <= 0;
end else begin
snoop_state <= (snoop_state | snp_req_valid) && ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID));
snoop_state <= (snoop_state | snp_req_valid) && SNOOP_FORWARDING_ENABLE;
end
end
@ -156,12 +166,12 @@ module VX_bank #(
`IGNORE_WARNINGS_END
wire [`WORD_WIDTH-1:0] reqq_req_writeword_st0;
wire [CORE_TAG_WIDTH-1:0] reqq_req_tag_st0;
wire [`WORD_SEL_BITS-1:0] reqq_req_mem_read_st0;
wire [`WORD_SEL_BITS-1:0] reqq_req_mem_write_st0;
wire [`BYTE_EN_BITS-1:0] reqq_req_mem_read_st0;
wire [`BYTE_EN_BITS-1:0] reqq_req_mem_write_st0;
assign reqq_push = core_req_ready && (|core_req_valids);
VX_cache_req_queue #(
VX_cache_req_queue #(
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
@ -177,7 +187,8 @@ module VX_bank #(
.DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH)
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) req_queue (
.clk (clk),
.reset (reset),
@ -212,17 +223,17 @@ module VX_bank #(
wire [`BASE_ADDR_BITS-1:0] mrvq_wsel_st0;
wire [`WORD_WIDTH-1:0] mrvq_writeword_st0;
wire [CORE_TAG_WIDTH-1:0] mrvq_tag_st0;
wire [`WORD_SEL_BITS-1:0] mrvq_mem_read_st0;
wire [`WORD_SEL_BITS-1:0] mrvq_mem_write_st0;
wire [`BYTE_EN_BITS-1:0] mrvq_mem_read_st0;
wire [`BYTE_EN_BITS-1:0] mrvq_mem_write_st0;
wire miss_add;
wire[`LINE_ADDR_WIDTH-1:0] miss_add_addr;
wire[`BASE_ADDR_BITS-1:0] miss_add_wsel;
wire[`WORD_WIDTH-1:0] miss_add_data;
wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_add_tid;
wire[CORE_TAG_WIDTH-1:0] miss_add_tag;
wire[`WORD_SEL_BITS-1:0] miss_add_mem_read;
wire[`WORD_SEL_BITS-1:0] miss_add_mem_write;
wire[`BYTE_EN_BITS-1:0] miss_add_mem_read;
wire[`BYTE_EN_BITS-1:0] miss_add_mem_write;
wire[`LINE_ADDR_WIDTH-1:0] addr_st2;
wire is_fill_st2;
@ -294,8 +305,8 @@ module VX_bank #(
0;
assign qual_going_to_write_st0 = dfpq_pop ? 1 :
(mrvq_pop && (mrvq_mem_write_st0 != `WORD_SEL_NO)) ? 1 :
(reqq_pop && (reqq_req_mem_write_st0 != `WORD_SEL_NO)) ? 1 :
(mrvq_pop && (mrvq_mem_write_st0 != `BYTE_EN_NO)) ? 1 :
(reqq_pop && (reqq_req_mem_write_st0 != `BYTE_EN_NO)) ? 1 :
(snrq_pop) ? 1 :
0;
@ -339,8 +350,8 @@ module VX_bank #(
wire [CORE_TAG_WIDTH-1:0] tag_st1e;
wire [`LOG2UP(NUM_REQUESTS)-1:0] tid_st1e;
`DEBUG_END
wire [`WORD_SEL_BITS-1:0] mem_read_st1e;
wire [`WORD_SEL_BITS-1:0] mem_write_st1e;
wire [`BYTE_EN_BITS-1:0] mem_read_st1e;
wire [`BYTE_EN_BITS-1:0] mem_write_st1e;
wire fill_saw_dirty_st1e;
wire is_snp_st1e;
@ -354,7 +365,6 @@ module VX_bank #(
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES),
.FUNC_ID (FUNC_ID),
.REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE),
@ -363,7 +373,9 @@ module VX_bank #(
.DWBQ_SIZE (DWBQ_SIZE),
.DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE)
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.DRAM_ENABLE (DRAM_ENABLE),
.WRITE_ENABLE (WRITE_ENABLE)
) tag_data_access (
.clk (clk),
.reset (reset),
@ -429,7 +441,17 @@ module VX_bank #(
wire invalidate_fill;
// Enqueue to miss reserv if it's a valid miss
assign miss_add = valid_st2 && !is_snp_st2 && miss_st2 && !mrvq_full && !(should_flush && dwbq_push) && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full));
assign miss_add = valid_st2
&& !is_snp_st2
&& miss_st2
&& !mrvq_full
&& !(should_flush && dwbq_push)
&& !((is_snp_st2 && valid_st2 && ffsq_full)
|| ((valid_st2 && !miss_st2) && cwbq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full));
assign miss_add_addr = addr_st2;
assign miss_add_wsel = wsel_st2;
assign miss_add_data = writeword_st2;
@ -484,7 +506,14 @@ module VX_bank #(
);
// Enqueue to CWB Queue
wire cwbq_push = (valid_st2 && !miss_st2) && !cwbq_full && !((FUNC_ID == `L2FUNC_ID) && (miss_add_mem_write == `WORD_SEL_NO)) && !((is_snp_st2 && valid_st2 && ffsq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full));
wire cwbq_push = (valid_st2 && !miss_st2)
&& !cwbq_full
&& !(SNOOP_FORWARDING_ENABLE && (miss_add_mem_write == `BYTE_EN_NO))
&& !((is_snp_st2 && valid_st2 && ffsq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full));
wire [`WORD_WIDTH-1:0] cwbq_data = readword_st2;
wire [`LOG2UP(NUM_REQUESTS)-1:0] cwbq_tid = miss_add_tid;
wire [CORE_TAG_WIDTH-1:0] cwbq_tag = miss_add_tag;
@ -507,18 +536,27 @@ module VX_bank #(
.full (cwbq_full)
);
assign should_flush = snoop_state && valid_st2 && (miss_add_mem_write != `WORD_SEL_NO) && !is_snp_st2 && !is_fill_st2;
assign should_flush = snoop_state
&& valid_st2
&& (miss_add_mem_write != `BYTE_EN_NO)
&& !is_snp_st2 && !is_fill_st2;
// Enqueue to DWB Queue
assign dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2 || should_flush) && !dwbq_full && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full));
assign dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2 || should_flush)
&& !dwbq_full
&& !((is_snp_st2 && valid_st2 && ffsq_full)
|| ((valid_st2 && !miss_st2) && cwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full));
wire[`LINE_ADDR_WIDTH-1:0] dwbq_req_addr;
wire dwbq_empty;
wire[`BANK_LINE_WORDS-1:0][`WORD_WIDTH-1:0] dwbq_req_data;
if ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
if (SNOOP_FORWARDING_ENABLE) begin
assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2;
assign dwbq_req_addr = (should_flush && dwbq_push) ? (addr_st2) : {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]};
assign dwbq_req_addr = (should_flush && dwbq_push) ? addr_st2 : {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]};
end else begin
assign dwbq_req_data = readdata_st2;
assign dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]};
@ -579,7 +617,14 @@ module VX_bank #(
wire snp_fwd_push;
wire ffsq_empty;
assign snp_fwd_push = is_snp_st2 && valid_st2 && !ffsq_full && !(((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full));
assign snp_fwd_push = is_snp_st2
&& valid_st2
&& !ffsq_full
&& !(((valid_st2 && !miss_st2) && cwbq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full));
assign snp_fwd_valid = !ffsq_empty;
VX_generic_queue #(
@ -596,6 +641,10 @@ module VX_bank #(
.full (ffsq_full)
);
assign stall_bank_pipe = (is_snp_st2 && valid_st2 && ffsq_full) || ((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full);
assign stall_bank_pipe = (is_snp_st2 && valid_st2 && ffsq_full)
|| ((valid_st2 && !miss_st2) && cwbq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full);
endmodule : VX_bank

View file

@ -12,9 +12,7 @@ module VX_cache #(
// Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
parameter FUNC_ID = 3,
parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
@ -40,14 +38,28 @@ module VX_cache #(
parameter FFSQ_SIZE = 8,
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
parameter FILL_INVALIDAOR_SIZE = 16,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Enable dram update
parameter DRAM_ENABLE = 1,
// Enable snoop forwarding
parameter SNOOP_FORWARDING_ENABLE = 0,
// Prefetcher
parameter PRFQ_SIZE = 64,
parameter PRFQ_STRIDE = 0,
// caceh requests tag size
// core request tag size
parameter CORE_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0,
// dram request tag size
parameter DRAM_TAG_WIDTH = 1
) (
input wire clk,
@ -55,17 +67,17 @@ module VX_cache #(
// Core request
input wire [NUM_REQUESTS-1:0] core_req_valid,
input wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] core_req_read,
input wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] core_req_write,
input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_read,
input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_write,
input wire [NUM_REQUESTS-1:0][31:0] core_req_addr,
input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output wire core_req_ready,
// Core response
output wire [NUM_REQUESTS-1:0] core_rsp_valid,
output wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready,
// DRAM request
@ -127,7 +139,7 @@ module VX_cache #(
assign snp_req_ready = ~(|per_bank_snp_req_full);
assign dram_rsp_ready = (|per_bank_dram_fill_rsp_ready);
VX_cache_core_req_bank_sel #(
VX_cache_core_req_bank_sel #(
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
@ -154,10 +166,10 @@ module VX_cache #(
for (i = 0; i < NUM_BANKS; i = i + 1) begin
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valids;
wire [NUM_REQUESTS-1:0][31:0] curr_bank_core_req_addr;
wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data;
wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] curr_bank_core_req_read;
wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] curr_bank_core_req_write;
wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] curr_bank_core_req_read;
wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] curr_bank_core_req_write;
wire curr_bank_core_rsp_pop;
wire curr_bank_core_rsp_valid;
@ -241,7 +253,6 @@ module VX_cache #(
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES),
.FUNC_ID (FUNC_ID),
.REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE),
@ -252,7 +263,11 @@ module VX_cache #(
.LLVQ_SIZE (LLVQ_SIZE),
.FFSQ_SIZE (FFSQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH)
.DRAM_ENABLE (DRAM_ENABLE),
.WRITE_ENABLE (WRITE_ENABLE),
.SNOOP_FORWARDING_ENABLE(SNOOP_FORWARDING_ENABLE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) bank (
.clk (clk),
.reset (reset),
@ -304,14 +319,13 @@ module VX_cache #(
end
endgenerate
VX_cache_core_rsp_merge #(
VX_cache_core_rsp_merge #(
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES),
.FUNC_ID (FUNC_ID),
.REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE),
@ -321,7 +335,8 @@ module VX_cache #(
.DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH)
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) cache_core_rsp_merge (
.per_bank_core_rsp_tid (per_bank_core_rsp_tid),
.per_bank_core_rsp_valid (per_bank_core_rsp_valid),

View file

@ -3,19 +3,19 @@
`include "VX_define.vh"
`define WORD_SEL_NO 3'h7
`define WORD_SEL_LB 3'h0
`define WORD_SEL_LH 3'h1
`define WORD_SEL_LW 3'h2
`define WORD_SEL_HB 3'h4
`define WORD_SEL_HH 3'h5
`define WORD_SEL_BITS 3
`define BYTE_EN_NO 3'h7
`define BYTE_EN_LB 3'h0
`define BYTE_EN_LH 3'h1
`define BYTE_EN_LW 3'h2
`define BYTE_EN_HB 3'h4
`define BYTE_EN_HH 3'h5
`define BYTE_EN_BITS 3
// data tid tag read write base addr
`define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `LOG2UP(NUM_REQUESTS) + CORE_TAG_WIDTH + `WORD_SEL_BITS + `WORD_SEL_BITS + `BASE_ADDR_BITS)
`define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `LOG2UP(NUM_REQUESTS) + CORE_TAG_WIDTH + `BYTE_EN_BITS + `BYTE_EN_BITS + `BASE_ADDR_BITS)
// tag read write reqs
`define REQ_INST_META_WIDTH (CORE_TAG_WIDTH + `WORD_SEL_BITS + `WORD_SEL_BITS + `LOG2UP(NUM_REQUESTS))
`define REQ_INST_META_WIDTH (CORE_TAG_WIDTH + `BYTE_EN_BITS + `BYTE_EN_BITS + `LOG2UP(NUM_REQUESTS))
`define WORD_WIDTH (8 * WORD_SIZE)
`define BYTE_WIDTH (`WORD_WIDTH / 4)
@ -66,13 +66,8 @@
///////////////////////////////////////////////////////////////////////////////
// Core request tag width pc, wb, rd, warp_num
`define CORE_REQ_TAG_WIDTH (32 + 2 + 5 + `NW_BITS)
`define CORE_REQ_TAG_COUNT ((CORE_TAG_ID_BITS != 0) ? 1 : NUM_REQUESTS)
// Core request tag info rd + warp_num
`define CORE_REQ_TAG_WARP(x) x[(5 + `NW_BITS)-1:0]
// DRAM response tag bank info
`define DRAM_ADDR_BANK(x) x[`BANK_SELECT_BITS-1:0]
`define DRAM_TO_LINE_ADDR(x) x[`DRAM_ADDR_WIDTH-1:`BANK_SELECT_BITS]

View file

@ -13,8 +13,6 @@ module VX_cache_core_rsp_merge #(
parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
parameter FUNC_ID = 0,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size
@ -39,8 +37,12 @@ module VX_cache_core_rsp_merge #(
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
// caceh requests tag size
parameter CORE_TAG_WIDTH = 1,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0,
// dram request tag size
parameter DRAM_TAG_WIDTH = 1
) (
// Per Bank WB
@ -53,7 +55,7 @@ module VX_cache_core_rsp_merge #(
// Core Writeback
output reg [NUM_REQUESTS-1:0] core_rsp_valid,
output reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output reg [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
output reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready
);
@ -73,43 +75,48 @@ module VX_cache_core_rsp_merge #(
);
integer i;
generate
if (CORE_TAG_ID_BITS != 0) begin
assign core_rsp_tag = per_bank_core_rsp_tag[main_bank_index];
always @(*) begin
core_rsp_valid = 0;
core_rsp_data = 0;
core_rsp_tag = 0;
for (i = 0; i < NUM_BANKS; i = i + 1) begin
if ((FUNC_ID == `L2FUNC_ID)
|| (FUNC_ID == `L3FUNC_ID)) begin
if (found_bank
&& per_bank_core_rsp_valid[i]
&& !core_rsp_valid[per_bank_core_rsp_tid[i]]
&& ((main_bank_index == `LOG2UP(NUM_BANKS)'(i))
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))) begin
core_rsp_valid[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_tag[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
per_bank_core_rsp_pop_unqual[i] = 1;
end else begin
per_bank_core_rsp_pop_unqual[i] = 0;
end
for (i = 0; i < NUM_BANKS; i = i + 1) begin
if (found_bank
&& per_bank_core_rsp_valid[i]
&& !core_rsp_valid[per_bank_core_rsp_tid[i]]
&& ((main_bank_index == `LOG2UP(NUM_BANKS)'(i))
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[main_bank_index][CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valid[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
per_bank_core_rsp_pop_unqual[i] = 1;
end else begin
if (found_bank
&& per_bank_core_rsp_valid[i]
&& !core_rsp_valid[per_bank_core_rsp_tid[i]]
&& ((main_bank_index == `LOG2UP(NUM_BANKS)'(i))
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))
&& (`CORE_REQ_TAG_WARP(per_bank_core_rsp_tag[i]) == `CORE_REQ_TAG_WARP(per_bank_core_rsp_tag[main_bank_index]))) begin
core_rsp_valid[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_tag[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
per_bank_core_rsp_pop_unqual[i] = 1;
end else begin
per_bank_core_rsp_pop_unqual[i] = 0;
end
per_bank_core_rsp_pop_unqual[i] = 0;
end
end
end
end
endgenerate
end else begin
always @(*) begin
core_rsp_valid = 0;
core_rsp_data = 0;
core_rsp_tag = 0;
for (i = 0; i < NUM_BANKS; i = i + 1) begin
if (found_bank
&& per_bank_core_rsp_valid[i]
&& !core_rsp_valid[per_bank_core_rsp_tid[i]]
&& ((main_bank_index == `LOG2UP(NUM_BANKS)'(i))
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))) begin
core_rsp_valid[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_tag[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
per_bank_core_rsp_pop_unqual[i] = 1;
end else begin
per_bank_core_rsp_pop_unqual[i] = 0;
end
end
end
end
endmodule

View file

@ -87,7 +87,7 @@ module VX_cache_dfq_queue #(
assign qual_bank_dram_fill_req_addr = use_empty ? out_per_bank_dram_fill_req_addr : use_per_bank_dram_fill_req_addr;
wire[`LOG2UP(NUM_BANKS)-1:0] qual_request_index;
wire qual_has_request;
wire qual_has_request;
VX_generic_priority_encoder #(
.N(NUM_BANKS)
@ -105,12 +105,12 @@ module VX_cache_dfq_queue #(
always @(posedge clk) begin
if (reset) begin
use_per_bank_dram_fill_req_valid <= 0;
use_per_bank_dram_fill_req_addr <= 0;
use_per_bank_dram_fill_req_valid <= 0;
use_per_bank_dram_fill_req_addr <= 0;
end else begin
if (dfqq_pop && qual_has_request) begin
use_per_bank_dram_fill_req_valid <= updated_bank_dram_fill_req_valid;
use_per_bank_dram_fill_req_addr <= qual_bank_dram_fill_req_addr;
use_per_bank_dram_fill_req_addr <= qual_bank_dram_fill_req_addr;
end
end
end

View file

@ -79,7 +79,7 @@ module VX_cache_dram_req_arb #(
.PRFQ_STRIDE (PRFQ_STRIDE),
.BANK_LINE_SIZE(BANK_LINE_SIZE),
.WORD_SIZE (WORD_SIZE)
) prfqq (
) prfqq (
.clk (clk),
.reset (reset),

View file

@ -1,4 +1,3 @@
`include "VX_cache_config.vh"
module VX_cache_miss_resrv #(
@ -51,8 +50,8 @@ module VX_cache_miss_resrv #(
input wire[`WORD_WIDTH-1:0] miss_add_data,
input wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_add_tid,
input wire[CORE_TAG_WIDTH-1:0] miss_add_tag,
input wire[`WORD_SEL_BITS-1:0] miss_add_mem_read,
input wire[`WORD_SEL_BITS-1:0] miss_add_mem_write,
input wire[`BYTE_EN_BITS-1:0] miss_add_mem_read,
input wire[`BYTE_EN_BITS-1:0] miss_add_mem_write,
output wire miss_resrv_full,
output wire miss_resrv_stop,
@ -72,8 +71,8 @@ module VX_cache_miss_resrv #(
output wire[`WORD_WIDTH-1:0] miss_resrv_data_st0,
output wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_resrv_tid_st0,
output wire[CORE_TAG_WIDTH-1:0] miss_resrv_tag_st0,
output wire[`WORD_SEL_BITS-1:0] miss_resrv_mem_read_st0,
output wire[`WORD_SEL_BITS-1:0] miss_resrv_mem_write_st0
output wire[`BYTE_EN_BITS-1:0] miss_resrv_mem_read_st0,
output wire[`BYTE_EN_BITS-1:0] miss_resrv_mem_write_st0
);
reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0];
reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table;

View file

@ -37,8 +37,11 @@ module VX_cache_req_queue #(
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
// caceh requests tag size
parameter CORE_TAG_WIDTH = 1
// core request tag size
parameter CORE_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0
) (
input wire clk,
input wire reset,
@ -46,18 +49,18 @@ module VX_cache_req_queue #(
// Enqueue Data
input wire reqq_push,
input wire [NUM_REQUESTS-1:0] bank_valids,
input wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] bank_mem_read,
input wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] bank_mem_write,
input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] bank_mem_read,
input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] bank_mem_write,
input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] bank_writedata,
input wire [NUM_REQUESTS-1:0][31:0] bank_addr,
input wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] bank_tag,
input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] bank_tag,
// Dequeue Data
input wire reqq_pop,
output wire reqq_req_st0,
output wire [`LOG2UP(NUM_REQUESTS)-1:0] reqq_req_tid_st0,
output wire [`WORD_SEL_BITS-1:0] reqq_req_mem_read_st0,
output wire [`WORD_SEL_BITS-1:0] reqq_req_mem_write_st0,
output wire [`BYTE_EN_BITS-1:0] reqq_req_mem_read_st0,
output wire [`BYTE_EN_BITS-1:0] reqq_req_mem_write_st0,
output wire [`WORD_WIDTH-1:0] reqq_req_writedata_st0,
output wire [31:0] reqq_req_addr_st0,
output wire [CORE_TAG_WIDTH-1:0] reqq_req_tag_st0,
@ -70,23 +73,23 @@ module VX_cache_req_queue #(
wire [NUM_REQUESTS-1:0] out_per_valids;
wire [NUM_REQUESTS-1:0][31:0] out_per_addr;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] out_per_writedata;
wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] out_per_mem_read;
wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] out_per_mem_write;
wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] out_per_tag;
wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] out_per_mem_read;
wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] out_per_mem_write;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] out_per_tag;
reg [NUM_REQUESTS-1:0] use_per_valids;
reg [NUM_REQUESTS-1:0][31:0] use_per_addr;
reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] use_per_writedata;
reg [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] use_per_mem_read;
reg [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] use_per_mem_write;
reg [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] use_per_tag;
reg [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] use_per_mem_read;
reg [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] use_per_mem_write;
reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] use_per_tag;
wire [NUM_REQUESTS-1:0] qual_valids;
wire [NUM_REQUESTS-1:0][31:0] qual_addr;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] qual_writedata;
wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] qual_mem_read;
wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] qual_mem_write;
wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] qual_tag;
wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] qual_mem_read;
wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] qual_mem_write;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] qual_tag;
`DEBUG_BEGIN
reg [NUM_REQUESTS-1:0] updated_valids;
@ -123,8 +126,8 @@ module VX_cache_req_queue #(
assign qual_mem_read = use_per_mem_read;
assign qual_mem_write = use_per_mem_write;
wire[`LOG2UP(NUM_REQUESTS)-1:0]qual_request_index;
wire qual_has_request;
wire[`LOG2UP(NUM_REQUESTS)-1:0] qual_request_index;
wire qual_has_request;
VX_generic_priority_encoder #(
.N(NUM_REQUESTS)
@ -139,9 +142,15 @@ module VX_cache_req_queue #(
assign reqq_req_tid_st0 = qual_request_index;
assign reqq_req_addr_st0 = qual_addr[qual_request_index];
assign reqq_req_writedata_st0 = qual_writedata[qual_request_index];
assign reqq_req_tag_st0 = qual_tag[qual_request_index];
assign reqq_req_mem_read_st0 = qual_mem_read [qual_request_index];
assign reqq_req_mem_write_st0 = qual_mem_write[qual_request_index];
if (CORE_TAG_ID_BITS != 0) begin
assign reqq_req_tag_st0 = qual_tag;
end else begin
assign reqq_req_tag_st0 = qual_tag[qual_request_index];
end
assign reqq_req_mem_read_st0 = qual_mem_read [qual_request_index];
assign reqq_req_mem_write_st0 = qual_mem_write[qual_request_index];
always @(*) begin
updated_valids = qual_valids;

View file

@ -13,8 +13,6 @@ module VX_tag_data_access #(
parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
parameter FUNC_ID = 0,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size
@ -37,7 +35,13 @@ module VX_tag_data_access #(
parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16
parameter FILL_INVALIDAOR_SIZE = 16,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Enable dram update
parameter DRAM_ENABLE = 1
) (
input wire clk,
input wire reset,
@ -55,8 +59,8 @@ module VX_tag_data_access #(
`IGNORE_WARNINGS_BEGIN
input wire[`WORD_SELECT_ADDR_END:0] writewsel_st1e,
input wire[`WORD_SEL_BITS-1:0] mem_write_st1e,
input wire[`WORD_SEL_BITS-1:0] mem_read_st1e,
input wire[`BYTE_EN_BITS-1:0] mem_write_st1e,
input wire[`BYTE_EN_BITS-1:0] mem_read_st1e,
`IGNORE_WARNINGS_END
output wire[`WORD_WIDTH-1:0] readword_st1e,
@ -101,7 +105,6 @@ module VX_tag_data_access #(
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES),
.FUNC_ID (FUNC_ID),
.REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE),
@ -139,8 +142,8 @@ module VX_tag_data_access #(
.reset(reset),
.stall(stall),
.flush(0),
.in ({qual_read_valid_st1, qual_read_dirty_st1, qual_read_tag_st1, qual_read_data_st1}),
.out ({read_valid_st1c[0], read_dirty_st1c[0], read_tag_st1c[0], read_data_st1c[0]})
.in({qual_read_valid_st1, qual_read_dirty_st1, qual_read_tag_st1, qual_read_data_st1}),
.out({read_valid_st1c[0], read_dirty_st1c[0], read_tag_st1c[0], read_data_st1c[0]})
);
genvar i;
@ -152,14 +155,14 @@ module VX_tag_data_access #(
.reset(reset),
.stall(stall),
.flush(0),
.in ({read_valid_st1c[i-1], read_dirty_st1c[i-1], read_tag_st1c[i-1], read_data_st1c[i-1]}),
.out ({read_valid_st1c[i], read_dirty_st1c[i], read_tag_st1c[i], read_data_st1c[i]})
.in({read_valid_st1c[i-1], read_dirty_st1c[i-1], read_tag_st1c[i-1], read_data_st1c[i-1]}),
.out({read_valid_st1c[i], read_dirty_st1c[i], read_tag_st1c[i], read_data_st1c[i]})
);
end
assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-1] || (FUNC_ID == `SFUNC_ID); // If shared memory, always valid
assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && (FUNC_ID != `SFUNC_ID); // Dirty only applies in Dcache
assign use_read_tag_st1e = (FUNC_ID == `SFUNC_ID) ? writeaddr_st1e[`TAG_LINE_ADDR_RNG] : read_tag_st1c[STAGE_1_CYCLES-1]; // Tag is always the same in SM
assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-1] || ~DRAM_ENABLE; // If shared memory, always valid
assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && DRAM_ENABLE; // Dirty only applies in Dcache
assign use_read_tag_st1e = DRAM_ENABLE ? read_tag_st1c[STAGE_1_CYCLES-1] : writeaddr_st1e[`TAG_LINE_ADDR_RNG]; // Tag is always the same in SM
for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin
assign use_read_data_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] = read_data_st1c[STAGE_1_CYCLES-1][i * `WORD_WIDTH +: `WORD_WIDTH];
@ -170,30 +173,49 @@ module VX_tag_data_access #(
wire [`BANK_LINE_WORDS-1:0][3:0] we;
wire [`BANK_LINE_WIDTH-1:0] data_write;
if (WORD_SIZE == 4) begin
if (WORD_SIZE == BANK_LINE_SIZE) begin
wire should_write = ((mem_write_st1e != `BYTE_EN_NO))
&& valid_req_st1e
&& use_read_valid_st1e
&& !miss_st1e
&& !is_snp_st1e;
for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin
assign we[i] = (force_write || (should_write && !real_writefill)) ? 4'b1111 : 4'b0000;
end
assign readword_st1e = read_data_st1c[STAGE_1_CYCLES-1];
assign data_write = force_write ? writedata_st1e : writeword_st1e;
end else begin
wire[`OFFSET_ADDR_BITS-1:0] byte_select = writewsel_st1e[`OFFSET_ADDR_RNG];
wire[`WORD_SELECT_BITS-1:0] block_offset = writewsel_st1e[`WORD_SELECT_ADDR_RNG];
wire lb = valid_req_st1e && (mem_read_st1e == `WORD_SEL_LB);
wire lh = valid_req_st1e && (mem_read_st1e == `WORD_SEL_LH);
wire lbu = valid_req_st1e && (mem_read_st1e == `WORD_SEL_HB);
wire lhu = valid_req_st1e && (mem_read_st1e == `WORD_SEL_HH);
wire lw = valid_req_st1e && (mem_read_st1e == `WORD_SEL_LW);
wire lb = valid_req_st1e && (mem_read_st1e == `BYTE_EN_LB);
wire lh = valid_req_st1e && (mem_read_st1e == `BYTE_EN_LH);
wire lbu = valid_req_st1e && (mem_read_st1e == `BYTE_EN_HB);
wire lhu = valid_req_st1e && (mem_read_st1e == `BYTE_EN_HH);
wire lw = valid_req_st1e && (mem_read_st1e == `BYTE_EN_LW);
wire b0 = (byte_select == 0);
wire b1 = (byte_select == 1);
wire b2 = (byte_select == 2);
wire b3 = (byte_select == 3);
wire sb = valid_req_st1e && (mem_write_st1e == `WORD_SEL_LB);
wire sh = valid_req_st1e && (mem_write_st1e == `WORD_SEL_LH);
wire sw = valid_req_st1e && (mem_write_st1e == `WORD_SEL_LW);
wire sb = valid_req_st1e && (mem_write_st1e == `BYTE_EN_LB);
wire sh = valid_req_st1e && (mem_write_st1e == `BYTE_EN_LH);
wire sw = valid_req_st1e && (mem_write_st1e == `BYTE_EN_LW);
wire [3:0] sb_mask = (b0 ? 4'b0001 : (b1 ? 4'b0010 : (b2 ? 4'b0100 : 4'b1000)));
wire [3:0] sh_mask = (b0 ? 4'b0011 : 4'b1100);
wire should_write = (sw || sb || sh) && valid_req_st1e && use_read_valid_st1e && !miss_st1e && !is_snp_st1e;
wire should_write = (sw || sb || sh)
&& valid_req_st1e
&& use_read_valid_st1e
&& !miss_st1e
&& !is_snp_st1e;
wire[`WORD_WIDTH-1:0] data_unmod = read_data_st1c[STAGE_1_CYCLES-1][block_offset * 32 +: 32];
wire[`WORD_WIDTH-1:0] data_unQual = (b0 || lw) ? (data_unmod) :
@ -234,13 +256,6 @@ module VX_tag_data_access #(
assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = force_write ? writedata_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] : use_write_dat;
end
end else begin
wire should_write = ((mem_write_st1e != `WORD_SEL_NO)) && valid_req_st1e && use_read_valid_st1e && !miss_st1e && !is_snp_st1e;
for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin
assign we[i] = (force_write || (should_write && !real_writefill)) ? 4'b1111 : 4'b0000;
end
assign readword_st1e = read_data_st1c[STAGE_1_CYCLES-1];
assign data_write = force_write ? writedata_st1e : writeword_st1e;
end
assign use_write_enable = (writefill_st1e && !real_writefill) ? 0 : we;

View file

@ -4,17 +4,18 @@
`include "../cache/VX_cache_config.vh"
interface VX_cache_core_req_if #(
parameter NUM_REQUESTS = 1,
parameter WORD_SIZE = 1,
parameter CORE_TAG_WIDTH = 1
parameter NUM_REQUESTS = 1,
parameter WORD_SIZE = 1,
parameter CORE_TAG_WIDTH = 1,
parameter CORE_TAG_ID_BITS = 0
) ();
wire [NUM_REQUESTS-1:0] core_req_valid;
wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] core_req_read;
wire [NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] core_req_write;
wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_read;
wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_write;
wire [NUM_REQUESTS-1:0][31:0] core_req_addr;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data;
wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag;
wire core_req_ready;
endinterface

View file

@ -4,14 +4,15 @@
`include "../cache/VX_cache_config.vh"
interface VX_cache_core_rsp_if #(
parameter NUM_REQUESTS = 1,
parameter WORD_SIZE = 1,
parameter CORE_TAG_WIDTH = 1
parameter NUM_REQUESTS = 1,
parameter WORD_SIZE = 1,
parameter CORE_TAG_WIDTH = 1,
parameter CORE_TAG_ID_BITS = 0
) ();
wire [NUM_REQUESTS-1:0] core_rsp_valid;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data;
wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag;
wire core_rsp_ready;
endinterface

View file

@ -16,8 +16,8 @@ interface VX_frE_to_bckE_req_if ();
wire [1:0] wb;
wire rs2_src;
wire [31:0] itype_immed;
wire [`WORD_SEL_BITS-1:0] mem_read;
wire [`WORD_SEL_BITS-1:0] mem_write;
wire [`BYTE_EN_BITS-1:0] mem_read;
wire [`BYTE_EN_BITS-1:0] mem_write;
wire [2:0] branch_type;
wire [19:0] upper_immed;
wire [31:0] curr_PC;

View file

@ -12,8 +12,8 @@ interface VX_lsu_req_if ();
wire [`NUM_THREADS-1:0][31:0] store_data;
wire [`NUM_THREADS-1:0][31:0] base_address; // A reg data
wire [31:0] offset; // itype_immed
wire [`WORD_SEL_BITS-1:0] mem_read;
wire [`WORD_SEL_BITS-1:0] mem_write;
wire [`BYTE_EN_BITS-1:0] mem_read;
wire [`BYTE_EN_BITS-1:0] mem_write;
wire [4:0] rd; // dest register
wire [1:0] wb; //

View file

@ -1,5 +1,5 @@
module VX_generic_stack #(
module VX_generic_stack #(
parameter WIDTH = 40,
parameter DEPTH = 2
) (

View file

@ -99,12 +99,13 @@ void Simulator::dbus_driver() {
}
void Simulator::io_driver() {
if (vortex_->io_valid) {
uint32_t data_write = (uint32_t)vortex_->io_data;
if (vortex_->io_req_write
&& vortex_->io_req_addr == IO_BUS_ADDR_COUT) {
uint32_t data_write = (uint32_t)vortex_->io_req_data;
char c = (char)data_write;
std::cerr << c;
}
vortex_->io_ready = true;
vortex_->io_req_ready = true;
}
void Simulator::reset() {

View file

@ -29,5 +29,5 @@ vx_printc:
.section .data
print_addr:
.word IO_BUS_ADDR
.word IO_BUS_ADDR_COUT

View file

@ -24,7 +24,7 @@ unsigned y[] = {1, 1, 1, 1,
1, 1, 1, 1,
1, 1, 1, 1,
1, 1, 1, 1};
F
unsigned z[] = {0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff