scope refactoring + snoop invalidate

This commit is contained in:
Blaise Tine 2020-06-12 00:04:31 -07:00
parent 19f263c772
commit d6b0ef2b3c
44 changed files with 652 additions and 589 deletions

View file

@ -67,7 +67,60 @@ inline bool is_aligned(size_t addr, size_t alignment) {
///////////////////////////////////////////////////////////////////////////////
static int vx_scope_start(vx_device_h hdevice) {
struct scope_signal_t {
int width;
const char* name;
};
static const scope_signal_t scope_signals[] = {
{ 32, "icache_req_addr" },
{ 2 , "icache_req_tag" },
{ 32, "icache_rsp_data" },
{ 2 , "icache_rsp_tag" },
{ 32, "dcache_req_addr" },
{ 2 , "dcache_req_tag" },
{ 32, "dcache_rsp_data" },
{ 2 , "dcache_rsp_tag" },
{ 29, "dram_req_tag" },
{ 29, "dram_rsp_tag" },
{ 2 , "icache_req_warp_num" },
{ 2 , "dcache_req_warp_num" },
{ 32, "decode_curr_PC" },
{ 5 , "execute_rd" },
{ 2 , "execute_warp_num" },
{ 32, "execute_a" },
{ 32, "execute_b" },
{ 5 , "writeback_rd" },
{ 2 , "writeback_warp_num" },
{ 32, "writeback_data" },
{ 2 , "decode_warp_num" },
{ 1 , "decode_is_jal" },
{ 5 , "decode_rs1" },
{ 5 , "decode_rs2" },
{ 2 , "writeback_wb" },
{ 1, "icache_req_valid" },
{ 1, "icache_req_ready" },
{ 1, "icache_rsp_valid" },
{ 1, "icache_rsp_ready" },
{ 4, "dcache_req_valid" },
{ 1, "dcache_req_ready" },
{ 4, "dcache_rsp_valid" },
{ 1, "dcache_rsp_ready" },
{ 1, "dram_req_valid" },
{ 1, "dram_req_ready" },
{ 1, "dram_rsp_valid" },
{ 1, "dram_rsp_ready" },
{ 4, "decode_valid" },
{ 4, "execute_valid" },
{ 4, "writeback_valid" },
{ 1, "schedule_delay" },
{ 1, "memory_delay" },
{ 1, "exec_delay" },
{ 1, "gpr_stage_delay" },
};
static int vx_scope_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
@ -80,48 +133,19 @@ static int vx_scope_start(vx_device_h hdevice) {
// start execution
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_RUN));
const int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t);
std::ofstream ofs("vx_scope.vcd");
ofs << "$timescale 1 ns $end" << std::endl;
int fwidth = 0;
ofs << "$var reg 1 0 clk $end" << std::endl;
fwidth += 1;
ofs << "$var reg 1 1 icache_req_valid $end" << std::endl;
ofs << "$var reg 1 2 icache_req_ready $end" << std::endl;
ofs << "$var reg 1 3 icache_rsp_valid $end" << std::endl;
ofs << "$var reg 1 4 icache_rsp_ready $end" << std::endl;
ofs << "$var reg 4 5 dcache_req_valid $end" << std::endl;
ofs << "$var reg 1 6 dcache_req_ready $end" << std::endl;
ofs << "$var reg 4 7 dcache_rsp_valid $end" << std::endl;
ofs << "$var reg 1 8 dcache_rsp_ready $end" << std::endl;
ofs << "$var reg 1 9 dram_req_valid $end" << std::endl;
ofs << "$var reg 1 10 dram_req_ready $end" << std::endl;
ofs << "$var reg 1 11 dram_rsp_valid $end" << std::endl;
ofs << "$var reg 1 12 dram_rsp_ready $end" << std::endl;
ofs << "$var reg 1 13 schedule_delay $end" << std::endl;
fwidth += 19;
ofs << "$var reg 32 14 icache_req_addr $end" << std::endl;
ofs << "$var reg 2 15 icache_req_tag $end" << std::endl;
ofs << "$var reg 32 16 icache_rsp_data $end" << std::endl;
ofs << "$var reg 2 17 icache_rsp_tag $end" << std::endl;
ofs << "$var reg 32 18 dcache_req_addr $end" << std::endl;
ofs << "$var reg 2 19 dcache_req_tag $end" << std::endl;
ofs << "$var reg 32 20 dcache_rsp_data $end" << std::endl;
ofs << "$var reg 2 21 dcache_rsp_tag $end" << std::endl;
ofs << "$var reg 29 22 dram_req_tag $end" << std::endl;
ofs << "$var reg 29 23 dram_rsp_tag $end" << std::endl;
ofs << "$var reg 2 24 icache_req_warp $end" << std::endl;
ofs << "$var reg 2 25 dcache_req_warp $end" << std::endl;
fwidth += 198;
const int num_signals = 26;
int fwidth = 0;
for (int i = 0; i < num_signals; ++i) {
ofs << "$var reg " << scope_signals[i].width << " " << (i+1) << " " << scope_signals[i].name << " $end" << std::endl;
fwidth += scope_signals[i].width;
}
uint64_t frame_width, max_frames, data_valid;
@ -141,7 +165,7 @@ static int vx_scope_start(vx_device_h hdevice) {
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &frame_width));
std::cout << "scope::frame_width=" << frame_width << std::endl;
assert((fwidth-1)== (int)frame_width);
assert(fwidth == (int)frame_width);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 3));
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &max_frames));
@ -149,7 +173,7 @@ static int vx_scope_start(vx_device_h hdevice) {
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 1));
std::vector<char> signa_data(frame_width+1);
std::vector<char> signal_data(frame_width+1);
uint64_t frame_offset = 0, frame_no = 0, timestamp = 0;
@ -174,34 +198,7 @@ static int vx_scope_start(vx_device_h hdevice) {
--delta;
}
signal_id = 1;
};
auto print_signal = [&] (uint64_t word, int signal_width) {
int word_offset = frame_offset % 64;
signa_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0';
++signal_offset;
++frame_offset;
if (signal_offset == signal_width) {
signa_data[signal_width] = 0; // string null termination
ofs << 'b' << signa_data.data() << ' ' << (num_signals - signal_id) << std::endl;
signal_offset = 0;
++signal_id;
}
if (frame_offset == frame_width) {
assert(0 == signal_offset);
signal_id = 0;
frame_offset = 0;
++frame_no;
if (frame_no != max_frames) {
print_header();
}
}
signal_id = num_signals;
};
print_header();
@ -218,34 +215,30 @@ static int vx_scope_start(vx_device_h hdevice) {
uint64_t word;
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &word));
do {
switch (num_signals - signal_id) {
default:
print_signal(word, 1);
break;
case 15:
case 17:
case 19:
case 21:
case 24:
case 25:
print_signal(word, 2);
break;
case 5:
case 7:
print_signal(word, 4);
break;
case 22:
case 23:
print_signal(word, 29);
break;
case 14:
case 16:
case 18:
case 20:
print_signal(word, 32);
break;
}
do {
int signal_width = scope_signals[signal_id-1].width;
int word_offset = frame_offset % 64;
signal_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0';
++signal_offset;
++frame_offset;
if (signal_offset == signal_width) {
signal_data[signal_width] = 0; // string null termination
ofs << 'b' << signal_data.data() << ' ' << signal_id << std::endl;
signal_offset = 0;
--signal_id;
}
if (frame_offset == frame_width) {
assert(0 == signal_offset);
frame_offset = 0;
++frame_no;
if (frame_no != max_frames) {
print_header();
}
}
} while ((frame_offset % 64) != 0);
} while (frame_no != max_frames);

View file

@ -10,6 +10,7 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
-DDBG_PRINT_CACHE_SNP \
-DDBG_PRINT_CACHE_MSRQ \
-DDBG_PRINT_DRAM \
-DDBG_PRINT_WB \
-DDBG_PRINT_OPAE
#DBG_PRINT=$(DBG_PRINT_FLAGS)
@ -19,7 +20,7 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
#DEBUG=1
#AFU=1
AFU=1
CFLAGS += -fPIC

BIN
driver/tests/basic/kernel.bin Executable file → Normal file

Binary file not shown.

View file

@ -9,6 +9,7 @@ vortex_afu.json
#+define+DBG_PRINT_CACHE_SNP
#+define+DBG_PRINT_CACHE_MSRQ
#+define+DBG_PRINT_DRAM
#+define+DBG_PRINT_WB
#+define+DBG_PRINT_OPAE
#+define+DBG_PRINT_SCOPE
@ -87,12 +88,11 @@ vortex_afu.json
../rtl/VX_writeback.v
../rtl/VX_csr_pipe.v
../rtl/VX_csr_data.v
../rtl/VX_csr_wrapper.v
../rtl/VX_warp_sched.v
../rtl/VX_gpr.v
../rtl/VX_gpr_ram.v
../rtl/VX_gpr_stage.v
../rtl/VX_dmem_ctrl.v
../rtl/VX_mem_ctrl.v
../rtl/VX_alu_unit.v
../rtl/VX_lsu_unit.v
../rtl/VX_decode.v

View file

@ -804,7 +804,7 @@ end
`SCOPE_ASSIGN(scope_dram_rsp_tag, vx_dram_rsp_tag);
`SCOPE_ASSIGN(scope_dram_rsp_ready, vx_dram_rsp_ready);
`STATIC_ASSERT($bits({`SCOPE_SIGNALS_LIST}) == 217, "oops!")
`STATIC_ASSERT($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}) == 389, "oops!")
wire force_changed = (scope_icache_req_valid && scope_icache_req_ready)
|| (scope_icache_rsp_valid && scope_icache_rsp_ready)
@ -814,17 +814,17 @@ wire force_changed = (scope_icache_req_valid && scope_icache_req_ready)
|| (scope_dram_rsp_valid && scope_dram_rsp_ready);
VX_scope #(
.DATAW ($bits({`SCOPE_SIGNALS_LIST})),
.DATAW ($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST})),
.BUSW (64),
.SIZE (8192),
.IDW (19)
.SIZE (4096),
.UPDW ($bits({`SCOPE_SIGNALS_UPD_LIST}))
) scope (
.clk (clk),
.reset (SoftReset),
.start (vx_reset),
.stop (cmd_run_done),
.changed (force_changed),
.data_in ({`SCOPE_SIGNALS_LIST}),
.data_in ({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}),
.bus_in (csr_scope_cmd),
.bus_out (csr_scope_data),
.bus_read (csr_scope_read),
@ -841,7 +841,6 @@ Vortex_Socket #() vx_socket (
`SCOPE_SIGNALS_ICACHE_ATTACH
`SCOPE_SIGNALS_DCACHE_ATTACH
`SCOPE_SIGNALS_CORE_ATTACH
`SCOPE_SIGNALS_FE_ATTACH
`SCOPE_SIGNALS_BE_ATTACH
.clk (clk),
@ -865,6 +864,7 @@ Vortex_Socket #() vx_socket (
// Snoop request
.snp_req_valid (vx_snp_req_valid),
.snp_req_addr (vx_snp_req_addr),
.snp_req_invalidate(0),
.snp_req_tag (vx_snp_req_tag),
.snp_req_ready (vx_snp_req_ready),

View file

@ -1,17 +1,17 @@
`include "VX_define.vh"
module VX_alu_unit (
input wire clk,
input wire reset,
input wire[31:0] src_a,
input wire[31:0] src_b,
input wire src_rs2,
input wire[31:0] itype_immed,
input wire[19:0] upper_immed,
input wire[4:0] alu_op,
input wire[31:0] curr_PC,
output reg[31:0] alu_result,
output reg alu_stall
input wire clk,
input wire reset,
input wire [31:0] src_a,
input wire [31:0] src_b,
input wire src_rs2,
input wire [31:0] itype_immed,
input wire [19:0] upper_immed,
input wire [4:0] alu_op,
input wire [31:0] curr_PC,
output reg [31:0] alu_result,
output reg alu_stall
);
localparam div_pipeline_len = 20;
localparam mul_pipeline_len = 8;
@ -85,7 +85,7 @@ module VX_alu_unit (
reg [15:0] inst_delay;
reg inst_was_stalling;
wire inst_delay_stall = inst_was_stalling ? inst_delay != 0 : curr_inst_delay != 0;
wire inst_delay_stall = inst_was_stalling ? (inst_delay != 0) : (curr_inst_delay != 0);
assign alu_stall = inst_delay_stall;
always @(*) begin
@ -127,7 +127,7 @@ module VX_alu_unit (
wire which_in2;
wire[31:0] upper_immed;
assign which_in2 = src_rs2 == `RS2_IMMED;
assign which_in2 = (src_rs2 == `RS2_IMMED);
assign ALU_in1 = src_a;
assign ALU_in2 = which_in2 ? itype_immed : src_b;
@ -167,7 +167,7 @@ module VX_alu_unit (
wire which_in2;
wire[31:0] upper_immed_s;
assign which_in2 = src_rs2 == `RS2_IMMED;
assign which_in2 = (src_rs2 == `RS2_IMMED);
assign ALU_in1 = src_a;

View file

@ -3,6 +3,7 @@
module VX_back_end #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_DCACHE_IO
`SCOPE_SIGNALS_BE_IO
input wire clk,
@ -70,7 +71,7 @@ module VX_back_end #(
VX_lsu_unit #(
.CORE_ID(CORE_ID)
) lsu_unit (
`SCOPE_SIGNALS_BE_ATTACH
`SCOPE_SIGNALS_DCACHE_ATTACH
.clk (clk),
.reset (reset),
@ -123,4 +124,23 @@ module VX_back_end #(
.no_slot_csr (no_slot_csr)
);
`SCOPE_ASSIGN(scope_decode_valid, bckE_req_if.valid);
`SCOPE_ASSIGN(scope_decode_warp_num, bckE_req_if.warp_num);
`SCOPE_ASSIGN(scope_decode_curr_PC, bckE_req_if.curr_PC);
`SCOPE_ASSIGN(scope_decode_is_jal, bckE_req_if.is_jal);
`SCOPE_ASSIGN(scope_decode_rs1, bckE_req_if.rs1);
`SCOPE_ASSIGN(scope_decode_rs2, bckE_req_if.rs2);
`SCOPE_ASSIGN(scope_execute_valid, exec_unit_req_if.valid);
`SCOPE_ASSIGN(scope_execute_warp_num, exec_unit_req_if.warp_num);
`SCOPE_ASSIGN(scope_execute_rd, exec_unit_req_if.rd);
`SCOPE_ASSIGN(scope_execute_a, exec_unit_req_if.a_reg_data[0]);
`SCOPE_ASSIGN(scope_execute_b, exec_unit_req_if.b_reg_data[0]);
`SCOPE_ASSIGN(scope_writeback_valid, writeback_if.valid);
`SCOPE_ASSIGN(scope_writeback_wb, writeback_if.wb);
`SCOPE_ASSIGN(scope_writeback_warp_num, writeback_if.warp_num);
`SCOPE_ASSIGN(scope_writeback_rd, writeback_if.rd);
`SCOPE_ASSIGN(scope_writeback_data, writeback_if.data[0]);
endmodule

View file

@ -1,37 +0,0 @@
`include "VX_define.vh"
module VX_csr_wrapper (
VX_csr_req_if csr_req_if,
VX_wb_if csr_wb_if
);
wire[`NUM_THREADS-1:0][31:0] thread_ids;
wire[`NUM_THREADS-1:0][31:0] warp_ids;
genvar i;
generate
for (i = 0; i < `NUM_THREADS; i++) begin : thread_ids_init
assign thread_ids[i] = i;
end
for (i = 0; i < `NUM_THREADS; i++) begin : warp_ids_init
assign warp_ids[i] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num};
end
endgenerate
assign csr_wb_if.valid = csr_req_if.valid;
assign csr_wb_if.warp_num = csr_req_if.warp_num;
assign csr_wb_if.rd = csr_req_if.rd;
assign csr_wb_if.wb = csr_req_if.wb;
wire thread_select = csr_req_if.csr_address == 12'h20;
wire warp_select = csr_req_if.csr_address == 12'h21;
assign csr_wb_if.csr_result = thread_select ? thread_ids :
warp_select ? warp_ids :
0;
endmodule

View file

@ -91,7 +91,7 @@ module VX_decode(
assign func7 = in_instruction[31:25];
assign u_12 = in_instruction[31:20];
assign frE_to_bckE_req_if.PC_next = in_curr_PC + 32'h4;
assign frE_to_bckE_req_if.next_PC = in_curr_PC + 32'h4;
// Write Back sigal
assign is_rtype = (curr_opcode == `INST_R);
@ -169,12 +169,12 @@ module VX_decode(
case (curr_opcode)
`INST_JAL:
begin
temp_jal = 1'b1 && in_valid;
temp_jal = in_valid;
temp_jal_offset = jal_1_offset;
end
`INST_JALR:
begin
temp_jal = 1'b1 && in_valid;
temp_jal = in_valid;
temp_jal_offset = jal_2_offset;
end
`INST_SYS:
@ -185,13 +185,13 @@ module VX_decode(
end
default:
begin
temp_jal = 1'b0 && in_valid;
temp_jal_offset = 32'hdeadbeef;
temp_jal = 1'b0;
temp_jal_offset = 32'hdeadbeef;
end
endcase
end
assign frE_to_bckE_req_if.jalQual = is_jal;
assign frE_to_bckE_req_if.is_jal = is_jal;
assign frE_to_bckE_req_if.jal = temp_jal;
assign frE_to_bckE_req_if.jal_offset = temp_jal_offset;

View file

@ -283,7 +283,34 @@
///////////////////////////////////////////////////////////////////////////////
`ifdef SCOPE
`define SCOPE_SIGNALS_LIST \
`define SCOPE_SIGNALS_DATA_LIST \
scope_icache_req_addr, \
scope_icache_req_tag, \
scope_icache_rsp_data, \
scope_icache_rsp_tag, \
scope_dcache_req_addr, \
scope_dcache_req_tag, \
scope_dcache_rsp_data, \
scope_dcache_rsp_tag, \
scope_dram_req_tag, \
scope_dram_rsp_tag, \
scope_icache_req_warp_num, \
scope_dcache_req_warp_num, \
scope_decode_curr_PC, \
scope_execute_rd, \
scope_execute_warp_num, \
scope_execute_a, \
scope_execute_b, \
scope_writeback_rd, \
scope_writeback_warp_num, \
scope_writeback_data, \
scope_decode_warp_num, \
scope_decode_is_jal, \
scope_decode_rs1, \
scope_decode_rs2, \
scope_writeback_wb,
`define SCOPE_SIGNALS_UPD_LIST \
scope_icache_req_valid, \
scope_icache_req_ready, \
scope_icache_rsp_valid, \
@ -296,23 +323,18 @@
scope_dram_req_ready, \
scope_dram_rsp_valid, \
scope_dram_rsp_ready, \
scope_decode_valid, \
scope_execute_valid, \
scope_writeback_valid, \
scope_schedule_delay, \
scope_icache_req_addr, \
scope_icache_req_tag, \
scope_icache_rsp_data, \
scope_icache_rsp_tag, \
scope_dcache_req_addr, \
scope_dcache_req_tag, \
scope_dcache_rsp_data, \
scope_dcache_rsp_tag, \
scope_dram_req_tag, \
scope_dram_rsp_tag, \
scope_icache_req_warp, \
scope_dcache_req_warp
scope_memory_delay, \
scope_exec_delay, \
scope_gpr_stage_delay
`define SCOPE_SIGNALS_DECL \
wire scope_icache_req_valid; \
wire [31:0] scope_icache_req_addr; \
wire [1:0] scope_icache_req_warp_num; \
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag; \
wire scope_icache_req_ready; \
wire scope_icache_rsp_valid; \
@ -321,6 +343,7 @@
wire scope_icache_rsp_ready; \
wire [`DNUM_REQUESTS-1:0] scope_dcache_req_valid; \
wire [31:0] scope_dcache_req_addr; \
wire [1:0] scope_dcache_req_warp_num; \
wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag; \
wire scope_dcache_req_ready; \
wire [`DNUM_REQUESTS-1:0] scope_dcache_rsp_valid; \
@ -334,13 +357,31 @@
wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \
wire scope_dram_rsp_ready; \
wire scope_schedule_delay; \
wire [1:0] scope_icache_req_warp; \
wire [1:0] scope_dcache_req_warp;
wire scope_memory_delay; \
wire scope_exec_delay; \
wire scope_gpr_stage_delay; \
wire [3:0] scope_decode_valid; \
wire [1:0] scope_decode_warp_num; \
wire [31:0] scope_decode_curr_PC; \
wire scope_decode_is_jal; \
wire [4:0] scope_decode_rs1; \
wire [4:0] scope_decode_rs2; \
wire [3:0] scope_execute_valid; \
wire [1:0] scope_execute_warp_num; \
wire [4:0] scope_execute_rd; \
wire [31:0] scope_execute_a; \
wire [31:0] scope_execute_b; \
wire [3:0] scope_writeback_valid; \
wire [1:0] scope_writeback_warp_num; \
wire [1:0] scope_writeback_wb; \
wire [4:0] scope_writeback_rd; \
wire [31:0] scope_writeback_data;
`define SCOPE_SIGNALS_ICACHE_IO \
/* verilator lint_off UNDRIVEN */ \
output wire scope_icache_req_valid, \
output wire [31:0] scope_icache_req_addr, \
output wire [1:0] scope_icache_req_warp_num, \
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag, \
output wire scope_icache_req_ready, \
output wire scope_icache_rsp_valid, \
@ -353,6 +394,7 @@
/* verilator lint_off UNDRIVEN */ \
output wire [`DNUM_REQUESTS-1:0] scope_dcache_req_valid, \
output wire [31:0] scope_dcache_req_addr, \
output wire [1:0] scope_dcache_req_warp_num, \
output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag, \
output wire scope_dcache_req_ready, \
output wire [`DNUM_REQUESTS-1:0] scope_dcache_rsp_valid, \
@ -374,21 +416,35 @@
`define SCOPE_SIGNALS_CORE_IO \
/* verilator lint_off UNDRIVEN */ \
output wire scope_schedule_delay, \
/* verilator lint_on UNDRIVEN */
`define SCOPE_SIGNALS_FE_IO \
/* verilator lint_off UNDRIVEN */ \
output wire [1:0] scope_icache_req_warp, \
output wire scope_memory_delay, \
output wire scope_exec_delay, \
output wire scope_gpr_stage_delay, \
/* verilator lint_on UNDRIVEN */
`define SCOPE_SIGNALS_BE_IO \
/* verilator lint_off UNDRIVEN */ \
output wire [1:0] scope_dcache_req_warp, \
output wire [3:0] scope_decode_valid, \
output wire [1:0] scope_decode_warp_num, \
output wire [31:0] scope_decode_curr_PC, \
output wire scope_decode_is_jal, \
output wire [4:0] scope_decode_rs1, \
output wire [4:0] scope_decode_rs2, \
output wire [3:0] scope_execute_valid, \
output wire [1:0] scope_execute_warp_num, \
output wire [4:0] scope_execute_rd, \
output wire [31:0] scope_execute_a, \
output wire [31:0] scope_execute_b, \
output wire [3:0] scope_writeback_valid, \
output wire [1:0] scope_writeback_warp_num, \
output wire [1:0] scope_writeback_wb, \
output wire [4:0] scope_writeback_rd, \
output wire [31:0] scope_writeback_data, \
/* verilator lint_on UNDRIVEN */
`define SCOPE_SIGNALS_ICACHE_ATTACH \
.scope_icache_req_valid (scope_icache_req_valid), \
.scope_icache_req_addr (scope_icache_req_addr), \
.scope_icache_req_warp_num (scope_icache_req_warp_num), \
.scope_icache_req_tag (scope_icache_req_tag), \
.scope_icache_req_ready (scope_icache_req_ready), \
.scope_icache_rsp_valid (scope_icache_rsp_valid), \
@ -399,6 +455,7 @@
`define SCOPE_SIGNALS_DCACHE_ATTACH \
.scope_dcache_req_valid (scope_dcache_req_valid), \
.scope_dcache_req_addr (scope_dcache_req_addr), \
.scope_dcache_req_warp_num (scope_dcache_req_warp_num), \
.scope_dcache_req_tag (scope_dcache_req_tag), \
.scope_dcache_req_ready (scope_dcache_req_ready), \
.scope_dcache_rsp_valid (scope_dcache_rsp_valid), \
@ -415,13 +472,28 @@
.scope_dram_rsp_ready (scope_dram_rsp_ready),
`define SCOPE_SIGNALS_CORE_ATTACH \
.scope_schedule_delay (scope_schedule_delay),
`define SCOPE_SIGNALS_FE_ATTACH \
.scope_icache_req_warp (scope_icache_req_warp),
.scope_schedule_delay (scope_schedule_delay), \
.scope_memory_delay (scope_memory_delay), \
.scope_exec_delay (scope_exec_delay), \
.scope_gpr_stage_delay (scope_gpr_stage_delay),
`define SCOPE_SIGNALS_BE_ATTACH \
.scope_dcache_req_warp (scope_dcache_req_warp),
.scope_decode_valid (scope_decode_valid), \
.scope_decode_warp_num (scope_decode_warp_num), \
.scope_decode_curr_PC (scope_decode_curr_PC), \
.scope_decode_is_jal (scope_decode_is_jal), \
.scope_decode_rs1 (scope_decode_rs1), \
.scope_decode_rs2 (scope_decode_rs2), \
.scope_execute_valid (scope_execute_valid), \
.scope_execute_warp_num (scope_execute_warp_num), \
.scope_execute_rd (scope_execute_rd), \
.scope_execute_a (scope_execute_a), \
.scope_execute_b (scope_execute_b), \
.scope_writeback_valid (scope_writeback_valid), \
.scope_writeback_warp_num (scope_writeback_warp_num), \
.scope_writeback_wb (scope_writeback_wb), \
.scope_writeback_rd (scope_writeback_rd), \
.scope_writeback_data (scope_writeback_data),
`define SCOPE_ASSIGN(d,s) assign d = s
`else
@ -429,14 +501,12 @@
`define SCOPE_SIGNALS_DCACHE_IO
`define SCOPE_SIGNALS_DRAM_IO
`define SCOPE_SIGNALS_CORE_IO
`define SCOPE_SIGNALS_FE_IO
`define SCOPE_SIGNALS_BE_IO
`define SCOPE_SIGNALS_ICACHE_ATTACH
`define SCOPE_SIGNALS_DCACHE_ATTACH
`define SCOPE_SIGNALS_DRAM_ATTACH
`define SCOPE_SIGNALS_CORE_ATTACH
`define SCOPE_SIGNALS_FE_ATTACH
`define SCOPE_SIGNALS_BE_ATTACH
`define SCOPE_ASSIGN(d,s)

View file

@ -7,11 +7,8 @@ module VX_exec_unit (
VX_exec_unit_req_if exec_unit_req_if,
// Output
// Writeback
VX_wb_if inst_exec_wb_if,
// JAL Response
VX_wb_if inst_exec_wb_if,
VX_jal_rsp_if jal_rsp_if,
// Branch Response
VX_branch_rsp_if branch_rsp_if,
input wire no_slot_exec,
@ -71,7 +68,7 @@ module VX_exec_unit (
`DEBUG_BEGIN
wire [$clog2(`NUM_THREADS)-1:0] jal_branch_use_index;
wire jal_branch_found_valid;
wire jal_branch_found_valid;
`DEBUG_END
VX_generic_priority_encoder #(
@ -103,16 +100,12 @@ module VX_exec_unit (
generate
for (i = 0; i < `NUM_THREADS; i++) begin
assign duplicate_PC_data[i] = exec_unit_req_if.PC_next;
assign duplicate_PC_data[i] = exec_unit_req_if.next_PC;
end
endgenerate
// VX_wb_if inst_exec_wb_temp_if();
// JAL Response
VX_jal_rsp_if jal_rsp_temp_if();
// Branch Response
VX_branch_rsp_if branch_rsp_temp_if();
VX_jal_rsp_if jal_rsp_temp_if();
VX_branch_rsp_if branch_rsp_temp_if();
// Actual Writeback
assign inst_exec_wb_if.rd = exec_unit_req_if.rd;
@ -120,7 +113,7 @@ module VX_exec_unit (
assign inst_exec_wb_if.valid = exec_unit_req_if.valid & {`NUM_THREADS{!internal_stall}};
assign inst_exec_wb_if.warp_num = exec_unit_req_if.warp_num;
assign inst_exec_wb_if.data = exec_unit_req_if.jal ? duplicate_PC_data : alu_result;
assign inst_exec_wb_if.pc = in_curr_PC;
assign inst_exec_wb_if.curr_PC = in_curr_PC;
// Jal rsp
assign jal_rsp_temp_if.jal = in_jal;
@ -133,50 +126,26 @@ module VX_exec_unit (
assign branch_rsp_temp_if.branch_warp_num = exec_unit_req_if.warp_num;
assign branch_rsp_temp_if.branch_dest = $signed(exec_unit_req_if.curr_PC) + ($signed(exec_unit_req_if.itype_immed) << 1); // itype_immed = branch_offset
wire zero = 0;
// VX_generic_register #(.N(174)) exec_reg(
// .clk (clk),
// .reset(reset),
// .stall(zero),
// .flush(zero),
// .in ({inst_exec_wb_temp_if.rd, inst_exec_wb_temp_if.wb, inst_exec_wb_temp_if.wb_valid, inst_exec_wb_temp_if.wb_warp_num, inst_exec_wb_temp_if.alu_result, inst_exec_wb_temp_if.exec_wb_pc}),
// .out ({inst_exec_wb_if.rd , inst_exec_wb_if.wb , inst_exec_wb_if.wb_valid , inst_exec_wb_if.wb_warp_num , inst_exec_wb_if.alu_result , inst_exec_wb_if.exec_wb_pc })
// );
VX_generic_register #(
.N(33 + `NW_BITS-1 + 1)
) jal_reg (
.clk (clk),
.reset(reset),
.stall(zero),
.flush(zero),
.in ({jal_rsp_temp_if.jal, jal_rsp_temp_if.jal_dest, jal_rsp_temp_if.jal_warp_num}),
.out ({jal_rsp_if.jal , jal_rsp_if.jal_dest , jal_rsp_if.jal_warp_num})
.clk (clk),
.reset (reset),
.stall (1'b0),
.flush (1'b0),
.in ({jal_rsp_temp_if.jal, jal_rsp_temp_if.jal_dest, jal_rsp_temp_if.jal_warp_num}),
.out ({jal_rsp_if.jal , jal_rsp_if.jal_dest , jal_rsp_if.jal_warp_num})
);
VX_generic_register #(
.N(34 + `NW_BITS-1 + 1)
) branch_reg (
.clk (clk),
.reset(reset),
.stall(zero),
.flush(zero),
.in ({branch_rsp_temp_if.valid_branch, branch_rsp_temp_if.branch_dir, branch_rsp_temp_if.branch_warp_num, branch_rsp_temp_if.branch_dest}),
.out ({branch_rsp_if.valid_branch , branch_rsp_if.branch_dir , branch_rsp_if.branch_warp_num , branch_rsp_if.branch_dest })
.clk (clk),
.reset (reset),
.stall (1'b0),
.flush (1'b0),
.in ({branch_rsp_temp_if.valid_branch, branch_rsp_temp_if.branch_dir, branch_rsp_temp_if.branch_warp_num, branch_rsp_temp_if.branch_dest}),
.out ({branch_rsp_if.valid_branch , branch_rsp_if.branch_dir , branch_rsp_if.branch_warp_num , branch_rsp_if.branch_dest })
);
// always @(*) begin
// case (in_alu_op)
// `ALU_CSR_RW: out_csr_result = in_csr_mask;
// `ALU_CSR_RS: out_csr_result = in_csr_data | in_csr_mask;
// `ALU_CSR_RC: out_csr_result = in_csr_data & (32'hFFFFFFFF - in_csr_mask);
// default: out_csr_result = 32'hdeadbeef;
// endcase
// end
// assign out_is_csr = exec_unit_req_if.is_csr;
// assign out_csr_address = exec_unit_req_if.csr_address;
endmodule : VX_exec_unit

View file

@ -3,7 +3,7 @@
module VX_front_end #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_FE_IO
`SCOPE_SIGNALS_ICACHE_IO
input wire clk,
input wire reset,
@ -65,7 +65,7 @@ module VX_front_end #(
VX_icache_stage #(
.CORE_ID(CORE_ID)
) icache_stage (
`SCOPE_SIGNALS_FE_ATTACH
`SCOPE_SIGNALS_ICACHE_ATTACH
.clk (clk),
.reset (reset),

View file

@ -3,40 +3,40 @@
module VX_gpr (
input wire clk,
input wire reset,
input wire valid_write_request,
input wire write_ce,
VX_gpr_read_if gpr_read_if,
VX_wb_if writeback_if,
output wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data,
output wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data
output wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data,
output wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data
);
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data_uqual;
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data_uqual;
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data_uqual;
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data_uqual;
assign a_reg_data = (gpr_read_if.rs1 != 0) ? a_reg_data_uqual : 0;
assign b_reg_data = (gpr_read_if.rs2 != 0) ? b_reg_data_uqual : 0;
wire write_enable = valid_write_request && ((writeback_if.wb != 0));
wire write_enable = write_ce && ((writeback_if.wb != 0));
`ifndef ASIC
VX_gpr_ram gpr_ram (
.we (write_enable),
.clk (clk),
.reset (reset),
.waddr (writeback_if.rd),
.raddr1(gpr_read_if.rs1),
.raddr2(gpr_read_if.rs2),
.be (writeback_if.valid),
.wdata (writeback_if.data),
.q1 (a_reg_data_uqual),
.q2 (b_reg_data_uqual)
.we (write_enable),
.clk (clk),
.reset (reset),
.waddr (writeback_if.rd),
.raddr1 (gpr_read_if.rs1),
.raddr2 (gpr_read_if.rs2),
.be (writeback_if.valid),
.wdata (writeback_if.data),
.q1 (a_reg_data_uqual),
.q2 (b_reg_data_uqual)
);
`else
wire going_to_write = write_enable & (| writeback_if.wb_valid);
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask;
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask;
genvar i;
for (i = 0; i < `NUM_THREADS; i++) begin
@ -44,31 +44,27 @@ module VX_gpr (
assign write_bit_mask[i] = {`NUM_GPRS{~local_write}};
end
// wire cenb = !going_to_write;
wire cenb = 0;
wire cenb = 0;
wire cena_1 = 0;
wire cena_2 = 0;
// wire cena_1 = (gpr_read_if.rs1 == 0);
// wire cena_2 = (gpr_read_if.rs2 == 0);
wire cena_1 = 0;
wire cena_2 = 0;
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] temp_a;
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] temp_b;
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] tmp_a;
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] tmp_b;
`ifndef SYN
genvar j;
for (i = 0; i < `NUM_THREADS; i++) begin
for (j = 0; j < `NUM_GPRS; j++) begin
assign a_reg_data_uqual[i][j] = ((temp_a[i][j] === 1'dx) || cena_1 )? 1'b0 : temp_a[i][j];
assign b_reg_data_uqual[i][j] = ((temp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : temp_b[i][j];
assign a_reg_data_uqual[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j];
assign b_reg_data_uqual[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j];
end
end
`else
assign a_reg_data_uqual = temp_a;
assign b_reg_data_uqual = temp_b;
assign a_reg_data_uqual = tmp_a;
assign b_reg_data_uqual = tmp_b;
`endif
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = writeback_if.write_data;
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = writeback_if.write_data;
for (i = 0; i < 'NT; i=i+4)
begin
@ -79,7 +75,7 @@ module VX_gpr (
.CENYB(),
.WENYB(),
.AYB(),
.QA(temp_a[(i+3):(i)]),
.QA(tmp_a[(i+3):(i)]),
.SOA(),
.SOB(),
.CLKA(clk),
@ -116,7 +112,7 @@ module VX_gpr (
.CENYB(),
.WENYB(),
.AYB(),
.QA(temp_b[(i+3):(i)]),
.QA(tmp_b[(i+3):(i)]),
.SOA(),
.SOB(),
.CLKA(clk),

View file

@ -10,8 +10,7 @@ module VX_gpr_stage (
input wire stall_gpr_csr,
output wire gpr_stage_delay,
// inputs
// Instruction Information
// decodee inputs
VX_frE_to_bckE_req_if bckE_req_if,
// WriteBack inputs
@ -24,11 +23,11 @@ module VX_gpr_stage (
VX_csr_req_if csr_req_if
);
`DEBUG_BEGIN
wire[31:0] curr_PC = bckE_req_if.curr_PC;
wire[31:0] curr_PC = bckE_req_if.curr_PC;
wire[2:0] branchType = bckE_req_if.branch_type;
wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
wire jalQual = bckE_req_if.jalQual;
wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
wire is_jal = bckE_req_if.is_jal;
`DEBUG_END
VX_gpr_read_if gpr_read_if();
@ -38,15 +37,15 @@ module VX_gpr_stage (
`ifndef ASIC
VX_gpr_jal_if gpr_jal_if();
assign gpr_jal_if.is_jal = bckE_req_if.jalQual;
assign gpr_jal_if.is_jal = bckE_req_if.is_jal;
assign gpr_jal_if.curr_PC = bckE_req_if.curr_PC;
`else
VX_gpr_jal_if gpr_jal_if();
assign gpr_jal_if.is_jal = exec_unit_req_if.jalQual;
assign gpr_jal_if.is_jal = exec_unit_req_if.is_jal;
assign gpr_jal_if.curr_PC = exec_unit_req_if.curr_PC;
`endif
VX_gpr_data_if gpr_datf_if();
VX_gpr_data_if gpr_datf_if();
VX_gpr_wrapper grp_wrapper (
.clk (clk),
@ -73,6 +72,7 @@ module VX_gpr_stage (
.gpu_inst_req_if (gpu_inst_req_temp_if),
.csr_req_if (csr_req_temp_if)
);
`DEBUG_BEGIN
wire is_lsu = (| lsu_req_temp_if.valid);
`DEBUG_END
@ -104,11 +104,11 @@ module VX_gpr_stage (
`UNUSED_PIN (size)
);
wire[`NUM_THREADS-1:0][31:0] temp_store_data;
wire[`NUM_THREADS-1:0][31:0] temp_base_address; // A reg data
wire [`NUM_THREADS-1:0][31:0] temp_store_data;
wire [`NUM_THREADS-1:0][31:0] temp_base_address; // A reg data
wire[`NUM_THREADS-1:0][31:0] real_store_data;
wire[`NUM_THREADS-1:0][31:0] real_base_address; // A reg data
wire [`NUM_THREADS-1:0][31:0] real_store_data;
wire [`NUM_THREADS-1:0][31:0] real_base_address; // A reg data
wire store_curr_real = !delayed_lsu_last_cycle && stall_lsu;
@ -132,12 +132,12 @@ module VX_gpr_stage (
VX_generic_register #(
.N(77 + `NW_BITS-1 + 1 + (`NUM_THREADS))
) lsu_reg (
.clk (clk),
.reset(reset),
.stall(stall_lsu),
.flush(flush_lsu),
.in ({lsu_req_temp_if.valid, lsu_req_temp_if.lsu_pc, lsu_req_temp_if.warp_num, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}),
.out ({lsu_req_if.valid , lsu_req_if.lsu_pc ,lsu_req_if.warp_num , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb })
.clk (clk),
.reset (reset),
.stall (stall_lsu),
.flush (flush_lsu),
.in ({lsu_req_temp_if.valid, lsu_req_temp_if.curr_PC, lsu_req_temp_if.warp_num, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}),
.out ({lsu_req_if.valid , lsu_req_if.curr_PC ,lsu_req_if.warp_num , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb })
);
VX_generic_register #(
@ -147,8 +147,8 @@ module VX_gpr_stage (
.reset (reset),
.stall (stall_exec),
.flush (flush_exec),
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.next_PC, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.is_jal, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.next_PC , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.is_jal , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
);
assign exec_unit_req_if.a_reg_data = real_base_address;
@ -161,8 +161,8 @@ module VX_gpr_stage (
.reset (reset),
.stall (stall_rest),
.flush (flush_rest),
.in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.pc_next}),
.out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.pc_next })
.in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.next_PC}),
.out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.next_PC })
);
assign gpu_inst_req_if.a_reg_data = real_base_address;
@ -189,8 +189,8 @@ module VX_gpr_stage (
.reset (reset),
.stall (stall_lsu),
.flush (flush_lsu),
.in ({lsu_req_temp_if.valid, lsu_req_temp_if.lsu_pc, lsu_req_temp_if.warp_num, lsu_req_temp_if.store_data, lsu_req_temp_if.base_address, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}),
.out ({lsu_req_if.valid , lsu_req_if.lsu_pc , lsu_req_if.warp_num , lsu_req_if.store_data , lsu_req_if.base_address , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb })
.in ({lsu_req_temp_if.valid, lsu_req_temp_if.curr_PC, lsu_req_temp_if.warp_num, lsu_req_temp_if.store_data, lsu_req_temp_if.base_address, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}),
.out ({lsu_req_if.valid , lsu_req_if.curr_PC , lsu_req_if.warp_num , lsu_req_if.store_data , lsu_req_if.base_address , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb })
);
VX_generic_register #(
@ -200,8 +200,8 @@ module VX_gpr_stage (
.reset (reset),
.stall (stall_exec),
.flush (flush_exec),
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.next_PC, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.is_jal, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.next_PC , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.is_jal , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
);
VX_generic_register #(
@ -211,8 +211,8 @@ module VX_gpr_stage (
.reset (reset),
.stall (stall_rest),
.flush (flush_rest),
.in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.pc_next, gpu_inst_req_temp_if.a_reg_data, gpu_inst_req_temp_if.rd2}),
.out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.pc_next , gpu_inst_req_if.a_reg_data , gpu_inst_req_if.rd2 })
.in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.next_PC, gpu_inst_req_temp_if.a_reg_data, gpu_inst_req_temp_if.rd2}),
.out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.next_PC , gpu_inst_req_if.a_reg_data , gpu_inst_req_if.rd2 })
);
VX_generic_register #(

View file

@ -1,20 +1,19 @@
`include "VX_define.vh"
module VX_gpr_wrapper (
input wire clk,
input wire reset,
VX_gpr_read_if gpr_read_if,
VX_wb_if writeback_if,
VX_gpr_jal_if gpr_jal_if,
input wire clk,
input wire reset,
VX_gpr_read_if gpr_read_if,
VX_wb_if writeback_if,
VX_gpr_jal_if gpr_jal_if,
output wire[`NUM_THREADS-1:0][31:0] a_reg_data,
output wire[`NUM_THREADS-1:0][31:0] b_reg_data
);
output wire [`NUM_THREADS-1:0][31:0] a_reg_data,
output wire [`NUM_THREADS-1:0][31:0] b_reg_data
);
wire [`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] tmp_a_reg_data;
wire [`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] tmp_b_reg_data;
wire [`NUM_THREADS-1:0][31:0] jal_data;
wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_a_reg_data;
wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_b_reg_data;
wire[`NUM_THREADS-1:0][31:0] jal_data;
genvar i;
generate
for (i = 0; i < `NUM_THREADS; i++) begin : jal_data_assign
@ -23,49 +22,42 @@ module VX_gpr_wrapper (
endgenerate
`ifndef ASIC
assign a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[gpr_read_if.warp_num]));
assign b_reg_data = (temp_b_reg_data[gpr_read_if.warp_num]);
assign a_reg_data = (gpr_jal_if.is_jal ? jal_data : (tmp_a_reg_data[gpr_read_if.warp_num]));
assign b_reg_data = (tmp_b_reg_data[gpr_read_if.warp_num]);
`else
wire zer = 0;
wire[`NW_BITS-1:0] old_warp_num;
wire [`NW_BITS-1:0] old_warp_num;
VX_generic_register #(
.N(`NW_BITS-1+1)
) store_wn (
.clk (clk),
.reset(reset),
.stall(zer),
.flush(zer),
.in (gpr_read_if.warp_num),
.out (old_warp_num)
.clk (clk),
.reset (reset),
.stall (1'b0),
.flush (1'b0),
.in (gpr_read_if.warp_num),
.out (old_warp_num)
);
assign a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[old_warp_num]));
assign b_reg_data = (temp_b_reg_data[old_warp_num]);
assign a_reg_data = (gpr_jal_if.is_jal ? jal_data : (tmp_a_reg_data[old_warp_num]));
assign b_reg_data = (tmp_b_reg_data[old_warp_num]);
`endif
generate
for (i = 0; i < `NUM_WARPS; i++) begin : warp_gprs
wire valid_write_request = i == writeback_if.warp_num;
wire write_ce = (i == writeback_if.warp_num);
VX_gpr gpr(
.clk (clk),
.reset (reset),
.valid_write_request (valid_write_request),
.gpr_read_if (gpr_read_if),
.writeback_if (writeback_if),
.a_reg_data (temp_a_reg_data[i]),
.b_reg_data (temp_b_reg_data[i])
.clk (clk),
.reset (reset),
.write_ce (write_ce),
.gpr_read_if (gpr_read_if),
.writeback_if (writeback_if),
.a_reg_data (tmp_a_reg_data[i]),
.b_reg_data (tmp_b_reg_data[i])
);
/*always_ff @(posedge clk) begin
if (valid_write_request && ((writeback_if.wb != 0))) begin
$display("%t: GPR%01d$: wid=%0d, rd=%0d, data=%0h", $time, 0, writeback_if.warp_num, writeback_if.rd, writeback_if.data);
end
end*/
end
endgenerate
endgenerate
endmodule

View file

@ -78,7 +78,7 @@ module VX_gpu_inst (
assign warp_ctl_if.dont_split = warp_ctl_if.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}}));
assign warp_ctl_if.split_new_mask = split_new_use_mask;
assign warp_ctl_if.split_later_mask = split_new_later_mask;
assign warp_ctl_if.split_save_pc = gpu_inst_req_if.pc_next;
assign warp_ctl_if.split_save_pc = gpu_inst_req_if.next_PC;
assign warp_ctl_if.split_warp_num = gpu_inst_req_if.warp_num;
// gpu_inst_req_if.is_wspawn

View file

@ -3,7 +3,7 @@
module VX_icache_stage #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_FE_IO
`SCOPE_SIGNALS_ICACHE_IO
input wire clk,
input wire reset,
@ -45,8 +45,6 @@ module VX_icache_stage #(
.read_data ({dbg_mrq_write_addr, fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num})
);
`SCOPE_ASSIGN(scope_icache_req_warp, fe_inst_meta_fi.warp_num);
always @(posedge clk) begin
if (reset) begin
//--
@ -76,7 +74,7 @@ module VX_icache_stage #(
assign icache_req_if.core_req_tag = mrq_write_addr;
`endif
assign fe_inst_meta_id.instruction = icache_rsp_if.core_rsp_data[0];
assign fe_inst_meta_id.instruction = icache_rsp_if.core_rsp_valid ? icache_rsp_if.core_rsp_data[0] : 0;
assign fe_inst_meta_id.valid = icache_rsp_if.core_rsp_valid ? valid_threads[fe_inst_meta_id.warp_num] : 0;
assign icache_stage_response = mrq_pop;
@ -85,6 +83,16 @@ module VX_icache_stage #(
// Can't accept new response
assign icache_rsp_if.core_rsp_ready = ~total_freeze;
`SCOPE_ASSIGN(scope_icache_req_valid, icache_req_if.core_req_valid);
`SCOPE_ASSIGN(scope_icache_req_addr, {icache_req_if.core_req_addr, 2'b0});
`SCOPE_ASSIGN(scope_icache_req_warp_num, fe_inst_meta_fi.warp_num);
`SCOPE_ASSIGN(scope_icache_req_tag, icache_req_if.core_req_tag);
`SCOPE_ASSIGN(scope_icache_req_ready, icache_req_if.core_req_ready);
`SCOPE_ASSIGN(scope_icache_rsp_valid, icache_rsp_if.core_rsp_valid);
`SCOPE_ASSIGN(scope_icache_rsp_data, icache_rsp_if.core_rsp_data);
`SCOPE_ASSIGN(scope_icache_rsp_tag, icache_rsp_if.core_rsp_tag);
`SCOPE_ASSIGN(scope_icache_rsp_ready, icache_rsp_if.core_rsp_ready);
`ifdef DBG_PRINT_CORE_ICACHE
always_ff @(posedge clk) begin
if (icache_req_if.core_req_valid && icache_req_if.core_req_ready) begin

View file

@ -42,14 +42,13 @@ module VX_inst_multiplex (
assign lsu_req_if.mem_write = bckE_req_if.mem_write;
assign lsu_req_if.rd = bckE_req_if.rd;
assign lsu_req_if.wb = bckE_req_if.wb;
assign lsu_req_if.lsu_pc = bckE_req_if.curr_PC;
assign lsu_req_if.curr_PC = bckE_req_if.curr_PC;
// Execute Unit
assign exec_unit_req_if.valid = bckE_req_if.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask);
assign exec_unit_req_if.warp_num = bckE_req_if.warp_num;
assign exec_unit_req_if.curr_PC = bckE_req_if.curr_PC;
assign exec_unit_req_if.PC_next = bckE_req_if.PC_next;
assign exec_unit_req_if.next_PC = bckE_req_if.next_PC;
assign exec_unit_req_if.rd = bckE_req_if.rd;
assign exec_unit_req_if.wb = bckE_req_if.wb;
assign exec_unit_req_if.a_reg_data = gpr_data_if.a_reg_data;
@ -61,12 +60,11 @@ module VX_inst_multiplex (
assign exec_unit_req_if.itype_immed = bckE_req_if.itype_immed;
assign exec_unit_req_if.upper_immed = bckE_req_if.upper_immed;
assign exec_unit_req_if.branch_type = bckE_req_if.branch_type;
assign exec_unit_req_if.jalQual = bckE_req_if.jalQual;
assign exec_unit_req_if.is_jal = bckE_req_if.is_jal;
assign exec_unit_req_if.jal = bckE_req_if.jal;
assign exec_unit_req_if.jal_offset = bckE_req_if.jal_offset;
assign exec_unit_req_if.is_etype = bckE_req_if.is_etype;
// GPR Req
assign gpu_inst_req_if.valid = bckE_req_if.valid & is_gpu_mask;
assign gpu_inst_req_if.warp_num = bckE_req_if.warp_num;
@ -76,8 +74,7 @@ module VX_inst_multiplex (
assign gpu_inst_req_if.is_barrier = bckE_req_if.is_barrier;
assign gpu_inst_req_if.a_reg_data = gpr_data_if.a_reg_data;
assign gpu_inst_req_if.rd2 = gpr_data_if.b_reg_data[0];
assign gpu_inst_req_if.pc_next = bckE_req_if.PC_next;
assign gpu_inst_req_if.next_PC = bckE_req_if.next_PC;
// CSR Req
assign csr_req_if.valid = bckE_req_if.valid & is_csr_mask;
@ -90,8 +87,4 @@ module VX_inst_multiplex (
assign csr_req_if.csr_immed = bckE_req_if.csr_immed;
assign csr_req_if.csr_mask = bckE_req_if.csr_mask;
endmodule
endmodule

View file

@ -2,9 +2,8 @@
module VX_lsu_unit #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_BE_IO
) (
`SCOPE_SIGNALS_DCACHE_IO
input wire clk,
input wire reset,
@ -44,16 +43,14 @@ module VX_lsu_unit #(
VX_generic_register #(
.N(45 + `NW_BITS-1 + 1 + `NUM_THREADS*65)
) lsu_buffer (
.clk (clk),
.reset(reset),
.stall(delay),
.flush(1'b0),
.in ({address , lsu_req_if.store_data, lsu_req_if.valid, lsu_req_if.mem_read, lsu_req_if.mem_write, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.lsu_pc}),
.out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc })
.clk (clk),
.reset (reset),
.stall (delay),
.flush (1'b0),
.in ({address , lsu_req_if.store_data, lsu_req_if.valid, lsu_req_if.mem_read, lsu_req_if.mem_write, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.curr_PC}),
.out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc })
);
`SCOPE_ASSIGN(scope_dcache_req_warp, use_warp_num);
wire core_req_rw = (use_mem_write != `BYTE_EN_NO);
wire [`NUM_THREADS-1:0][4:0] mem_req_offset;
@ -108,7 +105,7 @@ module VX_lsu_unit #(
.full (mrq_full),
.pop (mrq_pop),
.read_addr (mrq_read_addr),
.read_data ({dbg_mrq_write_addr, mem_wb_if.pc, mem_wb_if.wb, mem_rsp_offset, core_rsp_mem_read, mem_wb_if.rd, mem_wb_if.warp_num})
.read_data ({dbg_mrq_write_addr, mem_wb_if.curr_PC, mem_wb_if.wb, mem_rsp_offset, core_rsp_mem_read, mem_wb_if.rd, mem_wb_if.warp_num})
);
always @(posedge clk) begin
@ -165,6 +162,16 @@ module VX_lsu_unit #(
// Can't accept new response
assign dcache_rsp_if.core_rsp_ready = ~no_slot_mem;
`SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_if.core_req_valid);
`SCOPE_ASSIGN(scope_dcache_req_addr, {dcache_req_if.core_req_addr[0], 2'b0});
`SCOPE_ASSIGN(scope_dcache_req_warp_num, use_warp_num);
`SCOPE_ASSIGN(scope_dcache_req_tag, dcache_req_if.core_req_tag);
`SCOPE_ASSIGN(scope_dcache_req_ready, dcache_req_if.core_req_ready);
`SCOPE_ASSIGN(scope_dcache_rsp_valid, dcache_rsp_if.core_rsp_valid);
`SCOPE_ASSIGN(scope_dcache_rsp_data, dcache_rsp_if.core_rsp_data[0]);
`SCOPE_ASSIGN(scope_dcache_rsp_tag, dcache_rsp_if.core_rsp_tag);
`SCOPE_ASSIGN(scope_dcache_rsp_ready, dcache_rsp_if.core_rsp_ready);
`ifdef DBG_PRINT_CORE_DCACHE
always_ff @(posedge clk) begin
@ -172,7 +179,7 @@ module VX_lsu_unit #(
$display("%t: D%01d$ req: valid=%b, addr=%0h, tag=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, byteen=%0h, data=%0h", $time, CORE_ID, use_valid, use_address, mrq_write_addr, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, mem_req_byteen, mem_req_data);
end
if ((| dcache_rsp_if.core_rsp_valid) && dcache_rsp_if.core_rsp_ready) begin
$display("%t: D%01d$ rsp: valid=%b, tag=%0h, pc=%0h, rd=%0d, warp=%0d, data=%0h", $time, CORE_ID, mem_wb_if.valid, mrq_read_addr, mem_wb_if.pc, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
$display("%t: D%01d$ rsp: valid=%b, tag=%0h, pc=%0h, rd=%0d, warp=%0d, data=%0h", $time, CORE_ID, mem_wb_if.valid, mrq_read_addr, mem_wb_if.curr_PC, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
end
end
`endif

View file

@ -1,6 +1,6 @@
`include "VX_define.vh"
module VX_dmem_ctrl # (
module VX_mem_ctrl # (
parameter CORE_ID = 0
) (
input wire clk,
@ -111,6 +111,7 @@ module VX_dmem_ctrl # (
// Snoop request
.snp_req_valid (0),
.snp_req_addr (0),
.snp_req_invalidate (0),
.snp_req_tag (0),
`UNUSED_PIN (snp_req_ready),
@ -122,6 +123,7 @@ module VX_dmem_ctrl # (
// Snoop forward out
`UNUSED_PIN (snp_fwdout_valid),
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (0),
@ -192,6 +194,7 @@ module VX_dmem_ctrl # (
// Snoop request
.snp_req_valid (dcache_snp_req_if.snp_req_valid),
.snp_req_addr (dcache_snp_req_if.snp_req_addr),
.snp_req_invalidate (dcache_snp_req_if.snp_req_invalidate),
.snp_req_tag (dcache_snp_req_if.snp_req_tag),
.snp_req_ready (dcache_snp_req_if.snp_req_ready),
@ -203,6 +206,7 @@ module VX_dmem_ctrl # (
// Snoop forward out
`UNUSED_PIN (snp_fwdout_valid),
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (0),
@ -272,6 +276,7 @@ module VX_dmem_ctrl # (
// Snoop request
.snp_req_valid (0),
.snp_req_addr (0),
.snp_req_invalidate (0),
.snp_req_tag (0),
`UNUSED_PIN (snp_req_ready),
@ -282,7 +287,8 @@ module VX_dmem_ctrl # (
// Snoop forward out
`UNUSED_PIN (snp_fwdout_valid),
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (0),

View file

@ -6,7 +6,6 @@ module VX_pipeline #(
`SCOPE_SIGNALS_ICACHE_IO
`SCOPE_SIGNALS_DCACHE_IO
`SCOPE_SIGNALS_CORE_IO
`SCOPE_SIGNALS_FE_IO
`SCOPE_SIGNALS_BE_IO
// Clock
@ -57,26 +56,6 @@ module VX_pipeline #(
wire gpr_stage_delay;
wire schedule_delay;
`SCOPE_ASSIGN(scope_icache_req_valid, icache_req_valid);
`SCOPE_ASSIGN(scope_icache_req_addr, {icache_req_addr, 2'b0});
`SCOPE_ASSIGN(scope_icache_req_tag, icache_req_tag);
`SCOPE_ASSIGN(scope_icache_req_ready, icache_req_ready);
`SCOPE_ASSIGN(scope_icache_rsp_valid, icache_rsp_valid);
`SCOPE_ASSIGN(scope_icache_rsp_data, icache_rsp_data);
`SCOPE_ASSIGN(scope_icache_rsp_tag, icache_rsp_tag);
`SCOPE_ASSIGN(scope_icache_rsp_ready, icache_rsp_ready);
`SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_valid);
`SCOPE_ASSIGN(scope_dcache_req_addr, {dcache_req_addr[0], 2'b0});
`SCOPE_ASSIGN(scope_dcache_req_tag, dcache_req_tag);
`SCOPE_ASSIGN(scope_dcache_req_ready, dcache_req_ready);
`SCOPE_ASSIGN(scope_dcache_rsp_valid, dcache_rsp_valid);
`SCOPE_ASSIGN(scope_dcache_rsp_data, dcache_rsp_data[0]);
`SCOPE_ASSIGN(scope_dcache_rsp_tag, dcache_rsp_tag);
`SCOPE_ASSIGN(scope_dcache_rsp_ready, dcache_rsp_ready);
`SCOPE_ASSIGN(scope_schedule_delay, schedule_delay);
// Dcache
VX_cache_core_req_if #(
.NUM_REQUESTS(`NUM_THREADS),
@ -121,7 +100,8 @@ module VX_pipeline #(
VX_front_end #(
.CORE_ID(CORE_ID)
) front_end (
`SCOPE_SIGNALS_FE_ATTACH
`SCOPE_SIGNALS_ICACHE_ATTACH
.clk (clk),
.reset (reset),
.warp_ctl_if (warp_ctl_if),
@ -149,7 +129,9 @@ module VX_pipeline #(
VX_back_end #(
.CORE_ID(CORE_ID)
) back_end (
`SCOPE_SIGNALS_DCACHE_ATTACH
`SCOPE_SIGNALS_BE_ATTACH
.clk (clk),
.reset (reset),
.schedule_delay (schedule_delay),
@ -192,4 +174,17 @@ module VX_pipeline #(
assign core_icache_rsp_if.core_rsp_tag = icache_rsp_tag;
assign icache_rsp_ready = core_icache_rsp_if.core_rsp_ready;
`SCOPE_ASSIGN(scope_schedule_delay, schedule_delay);
`SCOPE_ASSIGN(scope_memory_delay, memory_delay);
`SCOPE_ASSIGN(scope_exec_delay, exec_delay);
`SCOPE_ASSIGN(scope_gpr_stage_delay, gpr_stage_delay);
`ifdef DBG_PRINT_WB
always_ff @(posedge clk) begin
if ((| writeback_if.valid) && (writeback_if.wb != 0)) begin
$display("%t: Writeback: wid=%0d, rd=%0d, data=%0h", $time, writeback_if.warp_num, writeback_if.rd, writeback_if.data);
end
end
`endif
endmodule // Vortex

View file

@ -55,13 +55,11 @@ module VX_writeback (
mem_wb ? mem_wb_if.warp_num :
0;
assign writeback_tmp_if.pc = exec_wb ? inst_exec_wb_if.pc :
csr_wb ? 32'hdeadbeef :
mem_wb ? mem_wb_if.pc :
assign writeback_tmp_if.curr_PC = exec_wb ? inst_exec_wb_if.curr_PC :
csr_wb ? 32'hdeadbeef :
mem_wb ? mem_wb_if.curr_PC :
32'hdeadbeef;
wire zero = 0;
wire [`NUM_THREADS-1:0][31:0] use_wb_data;
VX_generic_register #(
@ -69,10 +67,10 @@ module VX_writeback (
) wb_register (
.clk (clk),
.reset(reset),
.stall(zero),
.flush(zero),
.in ({writeback_tmp_if.data, writeback_tmp_if.valid, writeback_tmp_if.rd, writeback_tmp_if.wb, writeback_tmp_if.warp_num, writeback_tmp_if.pc}),
.out ({use_wb_data, writeback_if.valid, writeback_if.rd, writeback_if.wb, writeback_if.warp_num, writeback_if.pc})
.stall(1'b0),
.flush(1'b0),
.in ({writeback_tmp_if.data, writeback_tmp_if.valid, writeback_tmp_if.rd, writeback_tmp_if.wb, writeback_tmp_if.warp_num, writeback_tmp_if.curr_PC}),
.out ({use_wb_data, writeback_if.valid, writeback_if.rd, writeback_if.wb, writeback_if.warp_num, writeback_if.curr_PC})
);
reg [31:0] last_data_wb /* verilator public */;

View file

@ -6,7 +6,6 @@ module Vortex #(
`SCOPE_SIGNALS_ICACHE_IO
`SCOPE_SIGNALS_DCACHE_IO
`SCOPE_SIGNALS_CORE_IO
`SCOPE_SIGNALS_FE_IO
`SCOPE_SIGNALS_BE_IO
// Clock
@ -46,6 +45,7 @@ module Vortex #(
// Snoop request
input wire snp_req_valid,
input wire [`DDRAM_ADDR_WIDTH-1:0] snp_req_addr,
input wire snp_req_invalidate,
input wire [`DSNP_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
@ -172,7 +172,6 @@ module Vortex #(
`SCOPE_SIGNALS_ICACHE_ATTACH
`SCOPE_SIGNALS_DCACHE_ATTACH
`SCOPE_SIGNALS_CORE_ATTACH
`SCOPE_SIGNALS_FE_ATTACH
`SCOPE_SIGNALS_BE_ATTACH
.clk(clk),
@ -223,18 +222,19 @@ module Vortex #(
.SNP_TAG_WIDTH(`DSNP_TAG_WIDTH)
) dcache_snp_rsp_if();
assign dcache_snp_req_if.snp_req_valid = snp_req_valid;
assign dcache_snp_req_if.snp_req_addr = snp_req_addr;
assign dcache_snp_req_if.snp_req_tag = snp_req_tag;
assign snp_req_ready = dcache_snp_req_if.snp_req_ready;
assign dcache_snp_req_if.snp_req_valid = snp_req_valid;
assign dcache_snp_req_if.snp_req_addr = snp_req_addr;
assign dcache_snp_req_if.snp_req_invalidate = snp_req_invalidate;
assign dcache_snp_req_if.snp_req_tag = snp_req_tag;
assign snp_req_ready = dcache_snp_req_if.snp_req_ready;
assign snp_rsp_valid = dcache_snp_rsp_if.snp_rsp_valid;
assign snp_rsp_tag = dcache_snp_rsp_if.snp_rsp_tag;
assign dcache_snp_rsp_if.snp_rsp_ready = snp_rsp_ready;
VX_dmem_ctrl #(
VX_mem_ctrl #(
.CORE_ID(CORE_ID)
) dmem_ctrl (
) mem_ctrl (
.clk (clk),
.reset (reset),

View file

@ -6,7 +6,6 @@ module Vortex_Cluster #(
`SCOPE_SIGNALS_ICACHE_IO
`SCOPE_SIGNALS_DCACHE_IO
`SCOPE_SIGNALS_CORE_IO
`SCOPE_SIGNALS_FE_IO
`SCOPE_SIGNALS_BE_IO
// Clock
@ -31,6 +30,7 @@ module Vortex_Cluster #(
// Snoop request
input wire snp_req_valid,
input wire[`L2DRAM_ADDR_WIDTH-1:0] snp_req_addr,
input wire snp_req_invalidate,
input wire[`L2SNP_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
@ -84,8 +84,9 @@ module Vortex_Cluster #(
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag;
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready;
wire[`NUM_CORES-1:0] per_core_snp_req_valid;
wire[`NUM_CORES-1:0] per_core_snp_req_valid;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_snp_req_addr;
wire[`NUM_CORES-1:0] per_core_snp_req_invalidate;
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_req_tag;
wire[`NUM_CORES-1:0] per_core_snp_req_ready;
@ -115,7 +116,6 @@ module Vortex_Cluster #(
`SCOPE_SIGNALS_ICACHE_ATTACH
`SCOPE_SIGNALS_DCACHE_ATTACH
`SCOPE_SIGNALS_CORE_ATTACH
`SCOPE_SIGNALS_FE_ATTACH
`SCOPE_SIGNALS_BE_ATTACH
.clk (clk),
@ -146,6 +146,7 @@ module Vortex_Cluster #(
.snp_req_valid (per_core_snp_req_valid [i]),
.snp_req_addr (per_core_snp_req_addr [i]),
.snp_req_invalidate (per_core_snp_req_invalidate[i]),
.snp_req_tag (per_core_snp_req_tag [i]),
.snp_req_ready (per_core_snp_req_ready [i]),
@ -203,6 +204,7 @@ module Vortex_Cluster #(
wire[`NUM_CORES-1:0] l2_snp_fwdout_valid;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] l2_snp_fwdout_addr;
wire[`NUM_CORES-1:0] l2_snp_fwdout_invalidate;
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] l2_snp_fwdout_tag;
wire[`NUM_CORES-1:0] l2_snp_fwdout_ready;
@ -241,10 +243,11 @@ module Vortex_Cluster #(
assign per_core_D_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i];
assign per_core_I_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i+1];
assign per_core_snp_req_valid [(i/2)] = l2_snp_fwdout_valid [(i/2)];
assign per_core_snp_req_addr [(i/2)] = l2_snp_fwdout_addr [(i/2)];
assign per_core_snp_req_tag [(i/2)] = l2_snp_fwdout_tag [(i/2)];
assign l2_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)];
assign per_core_snp_req_valid [(i/2)] = l2_snp_fwdout_valid [(i/2)];
assign per_core_snp_req_addr [(i/2)] = l2_snp_fwdout_addr [(i/2)];
assign per_core_snp_req_invalidate [(i/2)] = l2_snp_fwdout_invalidate [(i/2)];
assign per_core_snp_req_tag [(i/2)] = l2_snp_fwdout_tag [(i/2)];
assign l2_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)];
assign l2_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)];
assign l2_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)];
@ -316,6 +319,7 @@ module Vortex_Cluster #(
// Snoop request
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_invalidate (snp_req_invalidate),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
@ -327,6 +331,7 @@ module Vortex_Cluster #(
// Snoop forwarding out
.snp_fwdout_valid (l2_snp_fwdout_valid),
.snp_fwdout_addr (l2_snp_fwdout_addr),
.snp_fwdout_invalidate(l2_snp_fwdout_invalidate),
.snp_fwdout_tag (l2_snp_fwdout_tag),
.snp_fwdout_ready (l2_snp_fwdout_ready),
@ -353,6 +358,7 @@ module Vortex_Cluster #(
wire[`NUM_CORES-1:0] arb_snp_fwdout_valid;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] arb_snp_fwdout_addr;
wire[`NUM_CORES-1:0] arb_snp_fwdout_invalidate;
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] arb_snp_fwdout_tag;
wire[`NUM_CORES-1:0] arb_snp_fwdout_ready;
@ -394,10 +400,11 @@ module Vortex_Cluster #(
assign arb_core_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)];
assign arb_core_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)];
assign per_core_snp_req_valid [(i/2)] = arb_snp_fwdout_valid [(i/2)];
assign per_core_snp_req_addr [(i/2)] = arb_snp_fwdout_addr [(i/2)];
assign per_core_snp_req_tag [(i/2)] = arb_snp_fwdout_tag [(i/2)];
assign arb_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)];
assign per_core_snp_req_valid [(i/2)] = arb_snp_fwdout_valid [(i/2)];
assign per_core_snp_req_addr [(i/2)] = arb_snp_fwdout_addr [(i/2)];
assign per_core_snp_req_invalidate [(i/2)] = arb_snp_fwdout_invalidate [(i/2)];
assign per_core_snp_req_tag [(i/2)] = arb_snp_fwdout_tag [(i/2)];
assign arb_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)];
assign arb_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)];
assign arb_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)];
@ -417,16 +424,19 @@ module Vortex_Cluster #(
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_invalidate (snp_req_invalidate),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_rsp_valid),
`UNUSED_PIN (snp_rsp_addr),
`UNUSED_PIN (snp_rsp_invalidate),
.snp_rsp_tag (snp_rsp_tag),
.snp_rsp_ready (snp_rsp_ready),
.snp_fwdout_valid (arb_snp_fwdout_valid),
.snp_fwdout_addr (arb_snp_fwdout_addr),
.snp_fwdout_invalidate(arb_snp_fwdout_invalidate),
.snp_fwdout_tag (arb_snp_fwdout_tag),
.snp_fwdout_ready (arb_snp_fwdout_ready),
@ -435,10 +445,11 @@ module Vortex_Cluster #(
.snp_fwdin_ready (arb_snp_fwdin_ready)
);
end else begin
assign arb_snp_fwdout_valid = snp_req_valid;
assign arb_snp_fwdout_addr = snp_req_addr;
assign arb_snp_fwdout_tag = snp_req_tag;
assign snp_req_ready = arb_snp_fwdout_ready;
assign arb_snp_fwdout_valid = snp_req_valid;
assign arb_snp_fwdout_addr = snp_req_addr;
assign arb_snp_fwdout_invalidate = snp_req_invalidate;
assign arb_snp_fwdout_tag = snp_req_tag;
assign snp_req_ready = arb_snp_fwdout_ready;
assign snp_rsp_valid = arb_snp_fwdin_valid;
assign snp_rsp_tag = arb_snp_fwdin_tag;

View file

@ -4,7 +4,6 @@ module Vortex_Socket (
`SCOPE_SIGNALS_ICACHE_IO
`SCOPE_SIGNALS_DCACHE_IO
`SCOPE_SIGNALS_CORE_IO
`SCOPE_SIGNALS_FE_IO
`SCOPE_SIGNALS_BE_IO
// Clock
@ -29,6 +28,7 @@ module Vortex_Socket (
// Snoop request
input wire snp_req_valid,
input wire[`VX_DRAM_ADDR_WIDTH-1:0] snp_req_addr,
input wire snp_req_invalidate,
input wire[`VX_SNP_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
@ -64,7 +64,6 @@ module Vortex_Socket (
`SCOPE_SIGNALS_ICACHE_ATTACH
`SCOPE_SIGNALS_DCACHE_ATTACH
`SCOPE_SIGNALS_CORE_ATTACH
`SCOPE_SIGNALS_FE_ATTACH
`SCOPE_SIGNALS_BE_ATTACH
.clk (clk),
@ -85,6 +84,7 @@ module Vortex_Socket (
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_invalidate (snp_req_invalidate),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
@ -126,6 +126,7 @@ module Vortex_Socket (
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_req_invalidate;
wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready;
@ -155,7 +156,6 @@ module Vortex_Socket (
`SCOPE_SIGNALS_ICACHE_ATTACH
`SCOPE_SIGNALS_DCACHE_ATTACH
`SCOPE_SIGNALS_CORE_ATTACH
`SCOPE_SIGNALS_FE_ATTACH
`SCOPE_SIGNALS_BE_ATTACH
.clk (clk),
@ -176,6 +176,7 @@ module Vortex_Socket (
.snp_req_valid (per_cluster_snp_req_valid [i]),
.snp_req_addr (per_cluster_snp_req_addr [i]),
.snp_req_invalidate (per_cluster_snp_req_invalidate[i]),
.snp_req_tag (per_cluster_snp_req_tag [i]),
.snp_req_ready (per_cluster_snp_req_ready [i]),
@ -229,6 +230,7 @@ module Vortex_Socket (
wire[`NUM_CLUSTERS-1:0] l3_snp_fwdout_valid;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr;
wire[`NUM_CLUSTERS-1:0] l3_snp_fwdout_invalidate;
wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag;
wire[`NUM_CLUSTERS-1:0] l3_snp_fwdout_ready;
@ -251,10 +253,11 @@ module Vortex_Socket (
assign per_cluster_dram_rsp_tag [i] = l3_core_rsp_tag [i];
// Snoop Forwarding out
assign per_cluster_snp_req_valid [i] = l3_snp_fwdout_valid[i];
assign per_cluster_snp_req_addr [i] = l3_snp_fwdout_addr[i];
assign per_cluster_snp_req_tag [i] = l3_snp_fwdout_tag[i];
assign l3_snp_fwdout_ready [i] = per_cluster_snp_req_ready[i];
assign per_cluster_snp_req_valid [i] = l3_snp_fwdout_valid[i];
assign per_cluster_snp_req_addr [i] = l3_snp_fwdout_addr[i];
assign per_cluster_snp_req_invalidate [i] = l3_snp_fwdout_invalidate[i];
assign per_cluster_snp_req_tag [i] = l3_snp_fwdout_tag[i];
assign l3_snp_fwdout_ready [i] = per_cluster_snp_req_ready[i];
// Snoop Forwarding in
assign l3_snp_fwdin_valid [i] = per_cluster_snp_rsp_valid [i];
@ -327,6 +330,7 @@ module Vortex_Socket (
// Snoop request
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_invalidate (snp_req_invalidate),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
@ -338,6 +342,7 @@ module Vortex_Socket (
// Snoop forwarding out
.snp_fwdout_valid (l3_snp_fwdout_valid),
.snp_fwdout_addr (l3_snp_fwdout_addr),
.snp_fwdout_invalidate(l3_snp_fwdout_invalidate),
.snp_fwdout_tag (l3_snp_fwdout_tag),
.snp_fwdout_ready (l3_snp_fwdout_ready),

View file

@ -92,6 +92,7 @@ module VX_bank #(
// Snp Request
input wire snp_req_valid,
input wire [`LINE_ADDR_WIDTH-1:0] snp_req_addr,
input wire snp_req_invalidate,
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
@ -134,18 +135,19 @@ module VX_bank #(
wire snrq_full;
wire [`LINE_ADDR_WIDTH-1:0] snrq_addr_st0;
wire snrq_invalidate_st0;
wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st0;
VX_generic_queue #(
.DATAW(`LINE_ADDR_WIDTH + SNP_REQ_TAG_WIDTH),
.DATAW(`LINE_ADDR_WIDTH + 1 + SNP_REQ_TAG_WIDTH),
.SIZE(SNRQ_SIZE)
) snp_req_queue (
.clk (clk),
.reset (reset),
.push (snp_req_valid),
.data_in ({snp_req_addr, snp_req_tag}),
.data_in ({snp_req_addr, snp_req_invalidate, snp_req_tag}),
.pop (snrq_pop),
.data_out({snrq_addr_st0, snrq_tag_st0}),
.data_out({snrq_addr_st0, snrq_invalidate_st0, snrq_tag_st0}),
.empty (snrq_empty),
.full (snrq_full),
`UNUSED_PIN (size)
@ -236,6 +238,7 @@ module VX_bank #(
wire mrvq_rw_st0;
wire [WORD_SIZE-1:0] mrvq_byteen_st0;
wire mrvq_is_snp_st0;
wire mrvq_snp_invalidate_st0;
wire mrvq_pending_hazard_st1e;
wire st2_pending_hazard_st1e;
@ -295,6 +298,8 @@ module VX_bank #(
wire [`REQ_INST_META_WIDTH-1:0] qual_inst_meta_st0;
wire qual_going_to_write_st0;
wire qual_is_snp_st0;
wire qual_snp_invalidate_st0;
wire valid_st1 [STAGE_1_CYCLES-1:0];
wire [`LINE_ADDR_WIDTH-1:0] addr_st1 [STAGE_1_CYCLES-1:0];
@ -303,6 +308,7 @@ module VX_bank #(
wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st1 [STAGE_1_CYCLES-1:0];
wire [`BANK_LINE_WIDTH-1:0] writedata_st1 [STAGE_1_CYCLES-1:0];
wire is_snp_st1 [STAGE_1_CYCLES-1:0];
wire snp_invalidate_st1 [STAGE_1_CYCLES-1:0];
wire from_mrvq_st1 [STAGE_1_CYCLES-1:0];
assign qual_is_fill_st0 = dfpq_pop_unqual;
@ -339,6 +345,10 @@ module VX_bank #(
snrq_pop_unqual ? 1 :
0;
assign qual_snp_invalidate_st0 = mrvq_pop_unqual ? mrvq_snp_invalidate_st0 :
snrq_pop_unqual ? snrq_invalidate_st0 :
0;
assign qual_writeword_st0 = mrvq_pop_unqual ? mrvq_writeword_st0 :
reqq_pop_unqual ? reqq_req_writeword_st0 :
0;
@ -352,27 +362,27 @@ module VX_bank #(
)
VX_generic_register #(
.N(1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH)
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH)
) s0_1_c0 (
.clk (clk),
.reset (reset),
.stall (stall_bank_pipe),
.flush (1'b0),
.in ({qual_from_mrvq_st0, qual_is_snp_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}),
.out ({from_mrvq_st1[0] , is_snp_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]})
.in ({qual_from_mrvq_st0, qual_is_snp_st0, qual_snp_invalidate_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}),
.out ({from_mrvq_st1[0] , is_snp_st1[0], snp_invalidate_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]})
);
genvar i;
for (i = 1; i < STAGE_1_CYCLES; i++) begin
VX_generic_register #(
.N(1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH)
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH)
) s0_1_cc (
.clk (clk),
.reset(reset),
.stall(stall_bank_pipe),
.flush(1'b0),
.in ({from_mrvq_st1[i-1], is_snp_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}),
.out ({from_mrvq_st1[i] , is_snp_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]})
.clk (clk),
.reset (reset),
.stall (stall_bank_pipe),
.flush (1'b0),
.in ({from_mrvq_st1[i-1], is_snp_st1[i-1], snp_invalidate_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}),
.out ({from_mrvq_st1[i] , is_snp_st1[i], snp_invalidate_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]})
);
end
@ -390,6 +400,7 @@ module VX_bank #(
wire [WORD_SIZE-1:0] mem_byteen_st1e;
wire fill_saw_dirty_st1e;
wire is_snp_st1e;
wire snp_invalidate_st1e;
wire snp_to_mrvq_st1e;
wire mrvq_init_ready_state_st1e;
wire miss_add_because_miss;
@ -398,8 +409,9 @@ module VX_bank #(
wire mrvq_recover_ready_state_st1e;
assign from_mrvq_st1e = from_mrvq_st1[STAGE_1_CYCLES-1];
assign valid_st1e = valid_st1 [STAGE_1_CYCLES-1];
assign is_snp_st1e = is_snp_st1 [STAGE_1_CYCLES-1];
assign valid_st1e = valid_st1 [STAGE_1_CYCLES-1];
assign is_snp_st1e = is_snp_st1 [STAGE_1_CYCLES-1];
assign snp_invalidate_st1e = snp_invalidate_st1 [STAGE_1_CYCLES-1];
assign {tag_st1e, mem_rw_st1e, mem_byteen_st1e, tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
@ -440,6 +452,7 @@ module VX_bank #(
.mem_byteen_st1e (mem_byteen_st1e),
.is_snp_st1e (is_snp_st1e),
.snp_invalidate_st1e (snp_invalidate_st1e),
// Read Data
.readword_st1e (readword_st1e),
@ -473,6 +486,7 @@ module VX_bank #(
wire [`TAG_SELECT_BITS-1:0] readtag_st2;
wire fill_saw_dirty_st2;
wire is_snp_st2;
wire snp_invalidate_st2;
wire snp_to_mrvq_st2;
wire from_mrvq_st2;
wire mrvq_init_ready_state_st2;
@ -482,14 +496,14 @@ module VX_bank #(
wire mrvq_init_ready_state_hazard_st1e_st1;
VX_generic_register #(
.N(1+ 1+ 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH)
.N(1+ 1+ 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH)
) st_1e_2 (
.clk (clk),
.reset(reset),
.stall(stall_bank_pipe),
.flush(1'b0),
.in ({mrvq_recover_ready_state_st1e, from_mrvq_st1e_st2, mrvq_init_ready_state_st1e , snp_to_mrvq_st1e, is_snp_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, dirtyb_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}),
.out ({mrvq_recover_ready_state_st2 , from_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , dirtyb_st2, inst_meta_st2 })
.clk (clk),
.reset (reset),
.stall (stall_bank_pipe),
.flush (1'b0),
.in ({mrvq_recover_ready_state_st1e, from_mrvq_st1e_st2, mrvq_init_ready_state_st1e , snp_to_mrvq_st1e, is_snp_st1e, snp_invalidate_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, dirtyb_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}),
.out ({mrvq_recover_ready_state_st2 , from_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , dirtyb_st2, inst_meta_st2 })
);
`DEBUG_BLOCK(
@ -517,7 +531,8 @@ module VX_bank #(
wire [`UP(`WORD_SELECT_WIDTH)-1:0] miss_add_wsel = wsel_st2;
wire [`WORD_WIDTH-1:0] miss_add_data = writeword_st2;
assign {miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_tid} = inst_meta_st2;
wire miss_add_is_snp = is_snp_st2;
wire miss_add_is_snp = is_snp_st2;
wire miss_add_snp_invalidate = snp_invalidate_st2;
wire miss_add_from_mrvq = valid_st2 && from_mrvq_st2 && !stall_bank_pipe;
@ -551,6 +566,7 @@ module VX_bank #(
.miss_add_rw (miss_add_rw),
.miss_add_byteen (miss_add_byteen),
.miss_add_is_snp (miss_add_is_snp),
.miss_add_snp_invalidate (miss_add_snp_invalidate),
.miss_resrv_full (mrvq_full),
.miss_resrv_stop (mrvq_stop),
.mrvq_init_ready_state (mrvq_init_ready_state_st2),
@ -570,7 +586,8 @@ module VX_bank #(
.miss_resrv_tag_st0 (mrvq_tag_st0),
.miss_resrv_rw_st0 (mrvq_rw_st0),
.miss_resrv_byteen_st0 (mrvq_byteen_st0),
.miss_resrv_is_snp_st0 (mrvq_is_snp_st0)
.miss_resrv_is_snp_st0 (mrvq_is_snp_st0),
.miss_resrv_snp_invalidate_st0 (mrvq_snp_invalidate_st0)
);
// Enqueue core response
@ -760,7 +777,7 @@ module VX_bank #(
$display("%t: bank%0d-%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data);
end
if (snp_req_valid && snp_req_ready) begin
$display("%t: bank%0d-%0d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_tag);
$display("%t: bank%0d-%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag);
end
if (snp_rsp_valid && snp_rsp_ready) begin
$display("%t: bank%0d-%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);

View file

@ -101,6 +101,7 @@ module VX_cache #(
// Snoop request
input wire snp_req_valid,
input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr,
input wire snp_req_invalidate,
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
@ -112,6 +113,7 @@ module VX_cache #(
// Snoop Forwarding out
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_valid,
output wire [NUM_SNP_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] snp_fwdout_addr,
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_invalidate,
output wire [NUM_SNP_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdout_tag,
`IGNORE_WARNINGS_BEGIN
input wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_ready,
@ -164,6 +166,7 @@ module VX_cache #(
wire snp_req_valid_qual;
wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr_qual;
wire snp_req_invalidate_qual;
wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag_qual;
wire snp_req_ready_qual;
@ -180,16 +183,19 @@ module VX_cache #(
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_invalidate (snp_req_invalidate),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_req_valid_qual),
.snp_rsp_addr (snp_req_addr_qual),
.snp_rsp_invalidate (snp_req_invalidate_qual),
.snp_rsp_tag (snp_req_tag_qual),
.snp_rsp_ready (snp_req_ready_qual),
.snp_fwdout_valid (snp_fwdout_valid),
.snp_fwdout_addr (snp_fwdout_addr),
.snp_fwdout_invalidate(snp_fwdout_invalidate),
.snp_fwdout_tag (snp_fwdout_tag),
.snp_fwdout_ready (snp_fwdout_ready),
@ -200,14 +206,16 @@ module VX_cache #(
end else begin
assign snp_fwdout_valid = 0;
assign snp_fwdout_addr = 0;
assign snp_fwdout_invalidate = 0;
assign snp_fwdout_tag = 0;
assign snp_fwdin_ready = 0;
assign snp_req_valid_qual = snp_req_valid;
assign snp_req_addr_qual = snp_req_addr;
assign snp_req_tag_qual = snp_req_tag;
assign snp_req_ready = snp_req_ready_qual;
assign snp_req_valid_qual = snp_req_valid;
assign snp_req_addr_qual = snp_req_addr;
assign snp_req_invalidate_qual = snp_req_invalidate;
assign snp_req_tag_qual = snp_req_tag;
assign snp_req_ready = snp_req_ready_qual;
end
if (NUM_BANKS == 1) begin
@ -266,6 +274,7 @@ module VX_cache #(
wire curr_bank_snp_req_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr;
wire curr_bank_snp_req_invalidate;
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_req_tag;
wire curr_bank_snp_req_ready;
@ -330,8 +339,9 @@ module VX_cache #(
assign curr_bank_snp_req_valid = snp_req_valid_qual && (`DRAM_ADDR_BANK(snp_req_addr_qual) == i);
assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr_qual);
end
assign curr_bank_snp_req_tag = snp_req_tag_qual;
assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready;
assign curr_bank_snp_req_invalidate = snp_req_invalidate_qual;
assign curr_bank_snp_req_tag = snp_req_tag_qual;
assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready;
// Snoop response
assign per_bank_snp_rsp_valid[i] = curr_bank_snp_rsp_valid;
@ -400,6 +410,7 @@ module VX_cache #(
// Snoop request
.snp_req_valid (curr_bank_snp_req_valid),
.snp_req_addr (curr_bank_snp_req_addr),
.snp_req_invalidate (curr_bank_snp_req_invalidate),
.snp_req_tag (curr_bank_snp_req_tag),
.snp_req_ready (curr_bank_snp_req_ready),

View file

@ -8,8 +8,8 @@
// tag rw byteen tid
`define REQ_INST_META_WIDTH (`REQ_TAG_WIDTH + 1 + WORD_SIZE + `REQS_BITS)
// data metadata word_sel is_snp
`define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `REQ_INST_META_WIDTH + `UP(`WORD_SELECT_WIDTH) + 1)
// data metadata word_sel is_snp snp_invalidate
`define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `REQ_INST_META_WIDTH + `UP(`WORD_SELECT_WIDTH) + 1 + 1)
`define BANK_BITS `LOG2UP(NUM_BANKS)

View file

@ -33,6 +33,7 @@ module VX_cache_miss_resrv #(
input wire[WORD_SIZE-1:0] miss_add_byteen,
input wire mrvq_init_ready_state,
input wire miss_add_is_snp,
input wire miss_add_snp_invalidate,
output wire miss_resrv_full,
output wire miss_resrv_stop,
@ -52,7 +53,8 @@ module VX_cache_miss_resrv #(
output wire[`REQ_TAG_WIDTH-1:0] miss_resrv_tag_st0,
output wire miss_resrv_rw_st0,
output wire[WORD_SIZE-1:0] miss_resrv_byteen_st0,
output wire miss_resrv_is_snp_st0
output wire miss_resrv_is_snp_st0,
output wire miss_resrv_snp_invalidate_st0
);
reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0];
reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table;
@ -91,7 +93,7 @@ module VX_cache_miss_resrv #(
assign miss_resrv_valid_st0 = dequeue_possible;
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_tag_st0, miss_resrv_rw_st0, miss_resrv_byteen_st0, miss_resrv_wsel_st0, miss_resrv_is_snp_st0} = metadata_table[dequeue_index];
assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_tag_st0, miss_resrv_rw_st0, miss_resrv_byteen_st0, miss_resrv_wsel_st0, miss_resrv_is_snp_st0, miss_resrv_snp_invalidate_st0} = metadata_table[dequeue_index];
wire mrvq_push = miss_add && enqueue_possible && !from_mrvq;
wire mrvq_pop = miss_resrv_pop && dequeue_possible;
@ -119,7 +121,7 @@ module VX_cache_miss_resrv #(
valid_table[enqueue_index] <= 1;
ready_table[enqueue_index] <= mrvq_init_ready_state;
addr_table[enqueue_index] <= miss_add_addr;
metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp};
metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate};
tail_ptr <= tail_ptr + 1;
end else if (increment_head) begin
valid_table[head_ptr] <= 0;

View file

@ -14,18 +14,21 @@ module VX_snp_forwarder #(
// Snoop request
input wire snp_req_valid,
input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr,
input wire snp_req_invalidate,
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
// Snoop response
output wire snp_rsp_valid,
output wire [`DRAM_ADDR_WIDTH-1:0] snp_rsp_addr,
output wire snp_rsp_invalidate,
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
// Snoop Forwarding out
output wire [NUM_REQUESTS-1:0] snp_fwdout_valid,
output wire [NUM_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] snp_fwdout_addr,
output wire [NUM_REQUESTS-1:0] snp_fwdout_invalidate,
output wire [NUM_REQUESTS-1:0][`LOG2UP(SNRQ_SIZE)-1:0] snp_fwdout_tag,
input wire [NUM_REQUESTS-1:0] snp_fwdout_ready,
@ -58,18 +61,18 @@ module VX_snp_forwarder #(
assign sfq_pop = snp_rsp_valid;
VX_indexable_queue #(
.DATAW (`LOG2UP(SNRQ_SIZE) + `DRAM_ADDR_WIDTH+SNP_REQ_TAG_WIDTH),
.DATAW (`LOG2UP(SNRQ_SIZE) + 1 +`DRAM_ADDR_WIDTH+SNP_REQ_TAG_WIDTH),
.SIZE (SNRQ_SIZE)
) snp_fwd_queue (
.clk (clk),
.reset (reset),
.write_data ({sfq_write_addr, snp_req_addr, snp_req_tag}),
.write_data ({sfq_write_addr, snp_req_addr, snp_req_invalidate, snp_req_tag}),
.write_addr (sfq_write_addr),
.push (sfq_push),
.full (sfq_full),
.pop (sfq_pop),
.read_addr (sfq_read_addr),
.read_data ({dbg_sfq_write_addr, snp_rsp_addr, snp_rsp_tag})
.read_data ({dbg_sfq_write_addr, snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag})
);
always @(posedge clk) begin
@ -89,9 +92,10 @@ module VX_snp_forwarder #(
genvar i;
for (i = 0; i < NUM_REQUESTS; i++) begin
assign snp_fwdout_valid[i] = snp_req_valid && !sfq_full;
assign snp_fwdout_addr[i] = snp_req_addr;
assign snp_fwdout_tag[i] = sfq_write_addr;
assign snp_fwdout_valid[i] = snp_req_valid && !sfq_full;
assign snp_fwdout_addr[i] = snp_req_addr;
assign snp_fwdout_invalidate[i] = snp_req_invalidate;
assign snp_fwdout_tag[i] = sfq_write_addr;
end
assign snp_req_ready = !sfq_full && fwdout_ready;
@ -114,16 +118,16 @@ module VX_snp_forwarder #(
`ifdef DBG_PRINT_CACHE_SNP
always_ff @(posedge clk) begin
if (snp_req_valid && snp_req_ready) begin
$display("%t: cache%0d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_req_addr), snp_req_tag);
$display("%t: cache%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_req_addr), snp_req_invalidate, snp_req_tag);
end
if (snp_fwdout_valid[0] && snp_fwdout_ready[0]) begin
$display("%t: cache%0d snp fwd_out: addr=%0h, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_fwdout_addr[0]), snp_fwdout_tag[0]);
$display("%t: cache%0d snp fwd_out: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_fwdout_addr[0]), snp_fwdout_invalidate[0], snp_fwdout_tag[0]);
end
if (fwdin_valid && fwdin_ready) begin
$display("%t: cache%0d snp fwd_in[%01d]: tag=%0h", $time, CACHE_ID, fwdin_sel, fwdin_tag);
end
if (snp_rsp_valid && snp_rsp_ready) begin
$display("%t: cache%0d snp rsp: addr=%0h, tag=%0h", $time, CACHE_ID, snp_rsp_addr, snp_rsp_tag);
$display("%t: cache%0d snp rsp: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag);
end
end
`endif

View file

@ -23,6 +23,7 @@ module VX_tag_data_access #(
input wire reset,
input wire stall,
input wire is_snp_st1e,
input wire snp_invalidate_st1e,
input wire stall_bank_pipe,
input wire force_request_miss_st1e,
@ -113,12 +114,12 @@ module VX_tag_data_access #(
.N(1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH),
.PassThru(1)
) s0_1_c0 (
.clk (clk),
.reset(reset),
.stall(stall),
.flush(1'b0),
.in({qual_read_valid_st1, qual_read_dirty_st1, qual_read_dirtyb_st1, qual_read_tag_st1, qual_read_data_st1}),
.out({read_valid_st1c[0], read_dirty_st1c[0], read_dirtyb_st1c[0], read_tag_st1c[0], read_data_st1c[0]})
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({qual_read_valid_st1, qual_read_dirty_st1, qual_read_dirtyb_st1, qual_read_tag_st1, qual_read_data_st1}),
.out ({read_valid_st1c[0], read_dirty_st1c[0], read_dirtyb_st1c[0], read_tag_st1c[0], read_data_st1c[0]})
);
genvar i;
@ -126,12 +127,12 @@ module VX_tag_data_access #(
VX_generic_register #(
.N( 1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH)
) s0_1_cc (
.clk (clk),
.reset(reset),
.stall(stall),
.flush(1'b0),
.in({read_valid_st1c[i-1], read_dirty_st1c[i-1], read_dirtyb_st1c[i-1], read_tag_st1c[i-1], read_data_st1c[i-1]}),
.out({read_valid_st1c[i], read_dirty_st1c[i], read_dirtyb_st1c[i], read_tag_st1c[i], read_data_st1c[i]})
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({read_valid_st1c[i-1], read_dirty_st1c[i-1], read_dirtyb_st1c[i-1], read_tag_st1c[i-1], read_data_st1c[i-1]}),
.out ({read_valid_st1c[i], read_dirty_st1c[i], read_dirtyb_st1c[i], read_tag_st1c[i], read_data_st1c[i]})
);
end
@ -174,20 +175,19 @@ module VX_tag_data_access #(
// use "case equality" to handle uninitialized tag when block entry is not valid
assign tags_match = ((writetag_st1e == use_read_tag_st1e) === 1'b1);
wire snoop_hit_no_pending = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && use_read_dirty_st1e && !force_request_miss_st1e;
wire snoop_hit_no_pending = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && (use_read_dirty_st1e || snp_invalidate_st1e) && !force_request_miss_st1e;
wire req_invalid = valid_req_st1e && !is_snp_st1e && !use_read_valid_st1e && !writefill_st1e;
wire req_miss = valid_req_st1e && !is_snp_st1e && use_read_valid_st1e && !writefill_st1e && !tags_match;
wire real_miss = req_invalid || req_miss;
wire force_core_miss = (force_request_miss_st1e && !is_snp_st1e && !writefill_st1e && valid_req_st1e && !real_miss);
assign snp_to_mrvq_st1e = valid_req_st1e && is_snp_st1e && force_request_miss_st1e;
// The second term is basically saying always make an entry ready if there's already antoher entry waiting, even if you yourself see a miss
assign mrvq_init_ready_state_st1e = snp_to_mrvq_st1e || (force_request_miss_st1e && !is_snp_st1e && !writefill_st1e && valid_req_st1e);
// assign mrvq_init_ready_state_st1e = snp_to_mrvq_st1e || force_core_miss;
assign mrvq_init_ready_state_st1e = snp_to_mrvq_st1e
|| (force_request_miss_st1e && !is_snp_st1e && !writefill_st1e && valid_req_st1e);
assign miss_st1e = real_miss || snoop_hit_no_pending || force_core_miss;
assign dirty_st1e = valid_req_st1e && use_read_valid_st1e && use_read_dirty_st1e;

View file

@ -9,7 +9,8 @@ interface VX_cache_snp_req_if #(
) ();
wire snp_req_valid;
wire [DRAM_ADDR_WIDTH-1:0] snp_req_addr;
wire [DRAM_ADDR_WIDTH-1:0] snp_req_addr;
wire snp_req_invalidate;
wire [SNP_TAG_WIDTH-1:0] snp_req_tag;
wire snp_req_ready;

View file

@ -9,7 +9,7 @@ interface VX_exec_unit_req_if ();
wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num;
wire [31:0] curr_PC;
wire [31:0] PC_next;
wire [31:0] next_PC;
// Write Back Info
wire [4:0] rd;
@ -29,7 +29,7 @@ interface VX_exec_unit_req_if ();
wire [2:0] branch_type;
// Jal info
wire jalQual;
wire is_jal;
wire jal;
wire [31:0] jal_offset;

View file

@ -22,10 +22,10 @@ interface VX_frE_to_bckE_req_if ();
wire [19:0] upper_immed;
wire [31:0] curr_PC;
wire is_etype;
wire jalQual;
wire is_jal;
wire jal;
wire [31:0] jal_offset;
wire [31:0] PC_next;
wire [31:0] next_PC;
wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num;

View file

@ -13,7 +13,7 @@ interface VX_gpu_inst_req_if();
wire is_barrier;
wire[31:0] pc_next;
wire[31:0] next_PC;
wire [`NUM_THREADS-1:0][31:0] a_reg_data;
wire [31:0] rd2;

View file

@ -6,7 +6,7 @@
interface VX_jal_rsp_if ();
wire jal;
wire jal;
wire [31:0] jal_dest;
wire [`NW_BITS-1:0] jal_warp_num;

View file

@ -7,7 +7,7 @@
interface VX_lsu_req_if ();
wire [`NUM_THREADS-1:0] valid;
wire [31:0] lsu_pc;
wire [31:0] curr_PC;
wire [`NW_BITS-1:0] warp_num;
wire [`NUM_THREADS-1:0][31:0] store_data;
wire [`NUM_THREADS-1:0][31:0] base_address; // A reg data

View file

@ -10,7 +10,7 @@ interface VX_wb_if ();
wire [`NW_BITS-1:0] warp_num;
wire [4:0] rd;
wire [1:0] wb;
wire [31:0] pc;
wire [31:0] curr_PC;
endinterface

View file

@ -1,10 +1,11 @@
`include "VX_define.vh"
module VX_scope #(
parameter DATAW = 64,
parameter BUSW = 64,
parameter SIZE = 256,
parameter IDW = 1
parameter DATAW = 64,
parameter BUSW = 64,
parameter SIZE = 16,
parameter UPDW = 1,
parameter DELTAW = 16
) (
input wire clk,
input wire reset,
@ -17,7 +18,8 @@ module VX_scope #(
input wire bus_write,
input wire bus_read
);
localparam DELTA_ENABLE = (IDW != 0);
localparam DELTA_ENABLE = (UPDW != 0);
localparam MAX_DELTA = (1**DELTAW)-1;
typedef enum logic[2:0] {
CMD_GET_VALID,
@ -38,10 +40,9 @@ module VX_scope #(
} cmd_get_t;
reg [DATAW-1:0] data_store [SIZE-1:0];
reg [63:0] delta_store [SIZE-1:0];
reg [IDW-1:0] prev_id;
reg [63:0] delta;
reg [DELTAW-1:0] delta_store [SIZE-1:0];
reg [UPDW-1:0] prev_id;
reg [DELTAW-1:0] delta;
reg [`CLOG2(SIZE)-1:0] raddr, waddr, waddr_end;
@ -57,7 +58,7 @@ module VX_scope #(
wire [BUSW-4:0] cmd_data;
assign {cmd_data, cmd_type} = bus_in;
wire [IDW-1:0] trigger_id = data_in[DATAW-1:DATAW-IDW];
wire [UPDW-1:0] trigger_id = data_in[UPDW-1:0];
always @(posedge clk) begin
if (reset) begin
@ -93,7 +94,7 @@ module VX_scope #(
start_wait <= 0;
recording <= 1;
delay_cntr <= 0;
delta <= 0;
delta <= MAX_DELTA;
end else begin
start_wait <= 1;
recording <= 0;
@ -106,22 +107,22 @@ module VX_scope #(
if (1 == delay_cntr) begin
start_wait <= 0;
recording <= 1;
delta <= 0;
delta <= MAX_DELTA;
end
end
if (recording) begin
if (DELTA_ENABLE) begin
if (changed
|| (0 == waddr)
|| (delta == MAX_DELTA)
|| (trigger_id != prev_id)) begin
data_store[waddr] <= data_in;
delta_store[waddr] <= delta;
waddr <= waddr + 1;
delta <= 0;
end else begin
end else begin
delta <= delta + 1;
end
end
prev_id <= trigger_id;
end else begin
data_store[waddr] <= data_in;
@ -171,7 +172,7 @@ module VX_scope #(
GET_VALID : bus_out = BUSW'(data_valid);
GET_WIDTH : bus_out = BUSW'(DATAW);
GET_COUNT : bus_out = BUSW'(waddr) + BUSW'(1);
default : bus_out = read_delta ? (BUSW)'(delta_store[raddr]) : (BUSW)'(data_store[raddr] >> read_offset);
default : bus_out = read_delta ? BUSW'(delta_store[raddr]) : BUSW'(data_store[raddr] >> read_offset);
endcase
end

View file

@ -19,8 +19,8 @@ module VX_d_e_reg (
.reset (reset),
.stall (stall),
.flush (flush),
.in ({frE_to_bckE_req_if.csr_address, frE_to_bckE_req_if.jalQual, frE_to_bckE_req_if.is_etype, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.PC_next, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}),
.out ({bckE_req_if.csr_address , bckE_req_if.jalQual , bckE_req_if.is_etype ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.PC_next , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier })
.in ({frE_to_bckE_req_if.csr_address, frE_to_bckE_req_if.is_jal, frE_to_bckE_req_if.is_etype, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.next_PC, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}),
.out ({bckE_req_if.csr_address , bckE_req_if.is_jal , bckE_req_if.is_etype ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.next_PC , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier })
);
endmodule

View file

@ -16,12 +16,12 @@ module VX_f_d_reg (
VX_generic_register #(
.N(64+`NW_BITS-1+1+`NUM_THREADS)
) f_d_reg (
.clk (clk),
.reset(reset),
.stall(stall),
.flush(flush),
.in ({fe_inst_meta_fd.instruction, fe_inst_meta_fd.inst_pc, fe_inst_meta_fd.warp_num, fe_inst_meta_fd.valid}),
.out ({fd_inst_meta_de.instruction, fd_inst_meta_de.inst_pc, fd_inst_meta_de.warp_num, fd_inst_meta_de.valid})
.clk (clk),
.reset (reset),
.stall (stall),
.flush (flush),
.in ({fe_inst_meta_fd.instruction, fe_inst_meta_fd.inst_pc, fe_inst_meta_fd.warp_num, fe_inst_meta_fd.valid}),
.out ({fd_inst_meta_de.instruction, fd_inst_meta_de.inst_pc, fd_inst_meta_de.warp_num, fd_inst_meta_de.valid})
);
endmodule

View file

@ -12,17 +12,16 @@ module VX_i_d_reg (
wire flush = 1'b0;
wire stall = freeze == 1'b1;
VX_generic_register #(
.N(64 + `NW_BITS-1 + 1 + `NUM_THREADS)
) i_d_reg (
.clk (clk),
.reset(reset),
.stall(stall),
.flush(flush),
.in ({fe_inst_meta_fd.instruction, fe_inst_meta_fd.inst_pc, fe_inst_meta_fd.warp_num, fe_inst_meta_fd.valid}),
.out ({fd_inst_meta_de.instruction, fd_inst_meta_de.inst_pc, fd_inst_meta_de.warp_num, fd_inst_meta_de.valid})
.clk (clk),
.reset (reset),
.stall (stall),
.flush (flush),
.in ({fe_inst_meta_fd.instruction, fe_inst_meta_fd.inst_pc, fe_inst_meta_fd.warp_num, fe_inst_meta_fd.valid}),
.out ({fd_inst_meta_de.instruction, fd_inst_meta_de.inst_pc, fd_inst_meta_de.warp_num, fd_inst_meta_de.valid})
);
endmodule

View file

@ -9,6 +9,7 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
-DDBG_PRINT_CACHE_SNP \
-DDBG_PRINT_CACHE_MSRQ \
-DDBG_PRINT_DRAM \
-DDBG_PRINT_WB \
-DDBG_PRINT_OPAE
#DBG_PRINT=$(DBG_PRINT_FLAGS)

View file

@ -10,52 +10,52 @@ int main(int argc, char **argv)
bool passed = true;
std::string tests[] = {
"../../benchmarks/riscv_tests/rv32ui-p-add.hex",
"../../benchmarks/riscv_tests/rv32ui-p-addi.hex",
"../../benchmarks/riscv_tests/rv32ui-p-and.hex",
"../../benchmarks/riscv_tests/rv32ui-p-andi.hex",
"../../benchmarks/riscv_tests/rv32ui-p-auipc.hex",
"../../benchmarks/riscv_tests/rv32ui-p-beq.hex",
"../../benchmarks/riscv_tests/rv32ui-p-bge.hex",
"../../benchmarks/riscv_tests/rv32ui-p-bgeu.hex",
"../../benchmarks/riscv_tests/rv32ui-p-blt.hex",
"../../benchmarks/riscv_tests/rv32ui-p-bltu.hex",
"../../benchmarks/riscv_tests/rv32ui-p-bne.hex",
"../../benchmarks/riscv_tests/rv32ui-p-jal.hex",
"../../benchmarks/riscv_tests/rv32ui-p-jalr.hex",
"../../benchmarks/riscv_tests/rv32ui-p-lb.hex",
"../../benchmarks/riscv_tests/rv32ui-p-lbu.hex",
"../../benchmarks/riscv_tests/rv32ui-p-lh.hex",
"../../benchmarks/riscv_tests/rv32ui-p-lhu.hex",
"../../benchmarks/riscv_tests/rv32ui-p-lui.hex",
"../../benchmarks/riscv_tests/rv32ui-p-lw.hex",
"../../benchmarks/riscv_tests/rv32ui-p-or.hex",
"../../benchmarks/riscv_tests/rv32ui-p-ori.hex",
"../../benchmarks/riscv_tests/rv32ui-p-sb.hex",
"../../benchmarks/riscv_tests/rv32ui-p-sh.hex",
"../../benchmarks/riscv_tests/rv32ui-p-simple.hex",
"../../benchmarks/riscv_tests/rv32ui-p-sll.hex",
"../../benchmarks/riscv_tests/rv32ui-p-slli.hex",
"../../benchmarks/riscv_tests/rv32ui-p-slt.hex",
"../../benchmarks/riscv_tests/rv32ui-p-slti.hex",
"../../benchmarks/riscv_tests/rv32ui-p-sltiu.hex",
"../../benchmarks/riscv_tests/rv32ui-p-sltu.hex",
"../../benchmarks/riscv_tests/rv32ui-p-sra.hex",
"../../benchmarks/riscv_tests/rv32ui-p-srai.hex",
"../../benchmarks/riscv_tests/rv32ui-p-srl.hex",
"../../benchmarks/riscv_tests/rv32ui-p-srli.hex",
"../../benchmarks/riscv_tests/rv32ui-p-sub.hex",
"../../benchmarks/riscv_tests/rv32ui-p-sw.hex",
"../../benchmarks/riscv_tests/rv32ui-p-xor.hex",
"../../benchmarks/riscv_tests/rv32ui-p-xori.hex",
"../../benchmarks/riscv_tests/rv32um-p-div.hex",
"../../benchmarks/riscv_tests/rv32um-p-divu.hex",
"../../benchmarks/riscv_tests/rv32um-p-mul.hex",
"../../benchmarks/riscv_tests/rv32um-p-mulh.hex",
"../../benchmarks/riscv_tests/rv32um-p-mulhsu.hex",
"../../benchmarks/riscv_tests/rv32um-p-mulhu.hex",
"../../benchmarks/riscv_tests/rv32um-p-rem.hex",
"../../benchmarks/riscv_tests/rv32um-p-remu.hex"
"../../../benchmarks/riscv_tests/rv32ui-p-add.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-addi.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-and.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-andi.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-auipc.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-beq.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-bge.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-bgeu.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-blt.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-bltu.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-bne.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-jal.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-jalr.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-lb.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-lbu.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-lh.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-lhu.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-lui.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-lw.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-or.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-ori.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-sb.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-sh.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-simple.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-sll.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-slli.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-slt.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-slti.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-sltiu.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-sltu.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-sra.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-srai.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-srl.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-srli.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-sub.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-sw.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-xor.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-xori.hex",
"../../../benchmarks/riscv_tests/rv32um-p-div.hex",
"../../../benchmarks/riscv_tests/rv32um-p-divu.hex",
"../../../benchmarks/riscv_tests/rv32um-p-mul.hex",
"../../../benchmarks/riscv_tests/rv32um-p-mulh.hex",
"../../../benchmarks/riscv_tests/rv32um-p-mulhsu.hex",
"../../../benchmarks/riscv_tests/rv32um-p-mulhu.hex",
"../../../benchmarks/riscv_tests/rv32um-p-rem.hex",
"../../../benchmarks/riscv_tests/rv32um-p-remu.hex"
};
for (std::string test : tests) {
@ -84,10 +84,10 @@ int main(int argc, char **argv)
#else
char test[] = "../../runtime/tests/simple/vx_simple_main.hex";
//char test[] = "../../benchmarks/riscv_tests/rv32ui-p-lb.hex";
//char test[] = "../../benchmarks/riscv_tests/rv32ui-p-lw.hex";
//char test[] = "../../benchmarks/riscv_tests/rv32ui-p-sw.hex";
char test[] = "../../../runtime/tests/simple/vx_simple_main.hex";
//char test[] = "../../../benchmarks/riscv_tests/rv32ui-p-lb.hex";
//char test[] = "../../../benchmarks/riscv_tests/rv32ui-p-lw.hex";
//char test[] = "../../../benchmarks/riscv_tests/rv32ui-p-sw.hex";
std::cerr << test << std::endl;