RTL code refactoring

This commit is contained in:
Blaise Tine 2020-04-21 01:03:37 -04:00
parent cfa8626bf7
commit ba4e736782
29 changed files with 332 additions and 563 deletions

View file

@ -102,28 +102,27 @@ reg[31:0] io_data;
.icache_request_pc_address (icache_request_pc_address),
.io_valid (io_valid),
.io_data (io_data),
.o_m_read_addr_d (o_m_read_addr_d),
.o_m_evict_addr_d (o_m_evict_addr_d),
.o_m_valid_d (o_m_valid_d),
.o_m_writedata_d (o_m_writedata_d),
.o_m_read_or_write_d (o_m_read_or_write_d),
.i_m_readdata_d (i_m_readdata_d),
.i_m_ready_d (i_m_ready_d),
.o_m_read_addr_i (o_m_read_addr_i),
.o_m_evict_addr_i (o_m_evict_addr_i),
.o_m_valid_i (o_m_valid_i),
.o_m_writedata_i (o_m_writedata_i),
.o_m_read_or_write_i (o_m_read_or_write_i),
.i_m_readdata_i (i_m_readdata_i),
.i_m_ready_i (i_m_ready_i),
.ebreak_o (out_ebreak)
.m_read_addr_d (o_m_read_addr_d),
.m_evict_addr_d (o_m_evict_addr_d),
.m_valid_d (o_m_valid_d),
.m_writedata_d (o_m_writedata_d),
.m_read_or_write_d (o_m_read_or_write_d),
.m_readdata_d (i_m_readdata_d),
.m_ready_d (i_m_ready_d),
.m_read_addr (o_m_read_addr_i),
.m_evict_addr (o_m_evict_addr_i),
.m_valid (o_m_valid_i),
.writedata (o_m_writedata_i),
.m_read_or_write (o_m_read_or_write_i),
.m_readdata (i_m_readdata_i),
.m_ready (i_m_ready_i),
.ebreak (out_ebreak)
);
always @(negedge clk) begin
ibus_driver(clk, o_m_read_addr_i, o_m_evict_addr_i, o_m_valid_i, o_m_writedata_i, o_m_read_or_write_i, `ICACHE_BANKS, `ICACHE_NUM_WORDS_PER_BLOCK, i_m_readdata_i, i_m_ready_i);
dbus_driver(clk, o_m_read_addr_d, o_m_evict_addr_d, o_m_valid_d, o_m_writedata_d, o_m_read_or_write_d, `DCACHE_BANKS, `DCACHE_NUM_WORDS_PER_BLOCK, i_m_readdata_d, i_m_ready_d);
io_handler (clk, io_valid, io_data);
io_handler (clk, io_valid, io_data);
end
always @(posedge clk) begin

View file

@ -3,15 +3,15 @@
module VX_alu (
input wire clk,
input wire reset,
input wire[31:0] a_i,
input wire[31:0] b_i,
input wire rs2_src_i,
input wire[31:0] itype_immed_i,
input wire[19:0] upper_immed_i,
input wire[4:0] alu_op_i,
input wire[31:0] curr_PC_i,
output reg[31:0] alu_result_o,
output reg alu_stall_o
input wire[31:0] src_a,
input wire[31:0] src_b,
input wire src_rs2,
input wire[31:0] itype_immed,
input wire[19:0] upper_immed,
input wire[4:0] alu_op,
input wire[31:0] curr_PC,
output reg[31:0] alu_result,
output reg alu_stall
);
localparam div_pipeline_len = 20;
@ -79,18 +79,18 @@ module VX_alu (
// MUL, MULH (signed*signed), MULHSU (signed*unsigned), MULHU (unsigned*unsigned)
wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1};
wire[63:0] alu_in2_signed = {{32{ALU_in2[31]}}, ALU_in2};
assign mul_data_a = (alu_op_i == `MULHU) ? {32'b0, ALU_in1} : alu_in1_signed;
assign mul_data_b = (alu_op_i == `MULHU || alu_op_i == `MULHSU) ? {32'b0, ALU_in2} : alu_in2_signed;
assign mul_data_a = (alu_op == `MULHU) ? {32'b0, ALU_in1} : alu_in1_signed;
assign mul_data_b = (alu_op == `MULHU || alu_op == `MULHSU) ? {32'b0, ALU_in2} : alu_in2_signed;
reg [15:0] curr_inst_delay;
reg [15:0] inst_delay;
reg inst_was_stalling;
wire inst_delay_stall = inst_was_stalling ? inst_delay != 0 : curr_inst_delay != 0;
assign alu_stall_o = inst_delay_stall;
assign alu_stall = inst_delay_stall;
always @(*) begin
case(alu_op_i)
case(alu_op)
`DIV,
`DIVU,
`REM,
@ -100,7 +100,7 @@ module VX_alu (
`MULHSU,
`MULHU: curr_inst_delay = mul_pipeline_len;
default: curr_inst_delay = 0;
endcase // alu_op_i
endcase // alu_op
end
always @(posedge clk) begin
@ -127,80 +127,80 @@ module VX_alu (
wire which_in2;
wire[31:0] upper_immed;
assign which_in2 = rs2_src_i == `RS2_IMMED;
assign which_in2 = src_rs2 == `RS2_IMMED;
assign ALU_in1 = a_i;
assign ALU_in2 = which_in2 ? itype_immed_i : b_i;
assign ALU_in1 = src_a;
assign ALU_in2 = which_in2 ? itype_immed : src_b;
assign upper_immed = {upper_immed_i, {12{1'b0}}};
assign upper_immed = {upper_immed, {12{1'b0}}};
always @(*) begin
case(alu_op_i)
`ADD: alu_result_o = $signed(ALU_in1) + $signed(ALU_in2);
`SUB: alu_result_o = $signed(ALU_in1) - $signed(ALU_in2);
`SLLA: alu_result_o = ALU_in1 << ALU_in2[4:0];
`SLT: alu_result_o = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
`SLTU: alu_result_o = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
`XOR: alu_result_o = ALU_in1 ^ ALU_in2;
`SRL: alu_result_o = ALU_in1 >> ALU_in2[4:0];
`SRA: alu_result_o = $signed(ALU_in1) >>> ALU_in2[4:0];
`OR: alu_result_o = ALU_in1 | ALU_in2;
`AND: alu_result_o = ALU_in2 & ALU_in1;
`SUBU: alu_result_o = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
`LUI_ALU: alu_result_o = upper_immed;
`AUIPC_ALU: alu_result_o = $signed(curr_PC_i) + $signed(upper_immed);
case(alu_op)
`ADD: alu_result = $signed(ALU_in1) + $signed(ALU_in2);
`SUB: alu_result = $signed(ALU_in1) - $signed(ALU_in2);
`SLLA: alu_result = ALU_in1 << ALU_in2[4:0];
`SLT: alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
`SLTU: alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
`XOR: alu_result = ALU_in1 ^ ALU_in2;
`SRL: alu_result = ALU_in1 >> ALU_in2[4:0];
`SRA: alu_result = $signed(ALU_in1) >>> ALU_in2[4:0];
`OR: alu_result = ALU_in1 | ALU_in2;
`AND: alu_result = ALU_in2 & ALU_in1;
`SUBU: alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
`LUI_ALU: alu_result = upper_immed;
`AUIPC_ALU: alu_result = $signed(curr_PC) + $signed(upper_immed);
// TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible?
`MUL: alu_result_o = mul_result[31:0];
`MULH: alu_result_o = mul_result[63:32];
`MULHSU: alu_result_o = mul_result[63:32];
`MULHU: alu_result_o = mul_result[63:32];
`DIV: alu_result_o = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result;
`DIVU: alu_result_o = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result;
`REM: alu_result_o = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result;
`REMU: alu_result_o = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result;
default: alu_result_o = 32'h0;
endcase // alu_op_i
`MUL: alu_result = mul_result[31:0];
`MULH: alu_result = mul_result[63:32];
`MULHSU: alu_result = mul_result[63:32];
`MULHU: alu_result = mul_result[63:32];
`DIV: alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result;
`DIVU: alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result;
`REM: alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result;
`REMU: alu_result = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result;
default: alu_result = 32'h0;
endcase // alu_op
end
`else
wire which_in2;
wire[31:0] upper_immed;
wire[31:0] upper_immed_s;
assign which_in2 = rs2_src_i == `RS2_IMMED;
assign which_in2 = src_rs2 == `RS2_IMMED;
assign ALU_in1 = a_i;
assign ALU_in1 = src_a;
assign ALU_in2 = which_in2 ? itype_immed_i : b_i;
assign ALU_in2 = which_in2 ? itype_immed : src_b;
assign upper_immed = {upper_immed_i, {12{1'b0}}};
assign upper_immed_s = {upper_immed, {12{1'b0}}};
always @(*) begin
case(alu_op_i)
`ADD: alu_result_o = $signed(ALU_in1) + $signed(ALU_in2);
`SUB: alu_result_o = $signed(ALU_in1) - $signed(ALU_in2);
`SLLA: alu_result_o = ALU_in1 << ALU_in2[4:0];
`SLT: alu_result_o = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
`SLTU: alu_result_o = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
`XOR: alu_result_o = ALU_in1 ^ ALU_in2;
`SRL: alu_result_o = ALU_in1 >> ALU_in2[4:0];
`SRA: alu_result_o = $signed(ALU_in1) >>> ALU_in2[4:0];
`OR: alu_result_o = ALU_in1 | ALU_in2;
`AND: alu_result_o = ALU_in2 & ALU_in1;
`SUBU: alu_result_o = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
`LUI_ALU: alu_result_o = upper_immed;
`AUIPC_ALU: alu_result_o = $signed(curr_PC_i) + $signed(upper_immed);
case(alu_op)
`ADD: alu_result = $signed(ALU_in1) + $signed(ALU_in2);
`SUB: alu_result = $signed(ALU_in1) - $signed(ALU_in2);
`SLLA: alu_result = ALU_in1 << ALU_in2[4:0];
`SLT: alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
`SLTU: alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
`XOR: alu_result = ALU_in1 ^ ALU_in2;
`SRL: alu_result = ALU_in1 >> ALU_in2[4:0];
`SRA: alu_result = $signed(ALU_in1) >>> ALU_in2[4:0];
`OR: alu_result = ALU_in1 | ALU_in2;
`AND: alu_result = ALU_in2 & ALU_in1;
`SUBU: alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
`LUI_ALU: alu_result = upper_immed_s;
`AUIPC_ALU: alu_result = $signed(curr_PC) + $signed(upper_immed_s);
// TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible?
`MUL: alu_result_o = mul_result[31:0];
`MULH: alu_result_o = mul_result[63:32];
`MULHSU: alu_result_o = mul_result[63:32];
`MULHU: alu_result_o = mul_result[63:32];
`DIV: alu_result_o = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result;
`DIVU: alu_result_o = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result;
`REM: alu_result_o = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result;
`REMU: alu_result_o = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result;
default: alu_result_o = 32'h0;
endcase // alu_op_i
`MUL: alu_result = mul_result[31:0];
`MULH: alu_result = mul_result[63:32];
`MULHSU: alu_result = mul_result[63:32];
`MULHU: alu_result = mul_result[63:32];
`DIV: alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result;
`DIVU: alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result;
`REM: alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result;
`REMU: alu_result = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result;
default: alu_result = 32'h0;
endcase // alu_op
end
`endif

View file

@ -10,8 +10,8 @@ module VX_back_end #(
VX_gpu_dcache_rsp_if dcache_rsp_if,
VX_gpu_dcache_req_if dcache_req_if,
output wire mem_delay_o,
output wire exec_delay_o,
output wire mem_delay,
output wire exec_delay,
output wire gpr_stage_delay,
VX_jal_rsp_if jal_rsp_if,
VX_branch_rsp_if branch_rsp_if,
@ -65,8 +65,8 @@ VX_gpr_stage gpr_stage (
.csr_req_if (csr_req_if),
.stall_gpr_csr (stall_gpr_csr),
// End new
.memory_delay (mem_delay_o),
.exec_delay (exec_delay_o),
.memory_delay (mem_delay),
.exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay)
);
@ -77,8 +77,8 @@ VX_lsu load_store_unit (
.mem_wb_if (mem_wb_if),
.dcache_rsp_if (dcache_rsp_if),
.dcache_req_if (dcache_req_if),
.delay_o (mem_delay_o),
.no_slot_mem_i (no_slot_mem)
.delay (mem_delay),
.no_slot_mem (no_slot_mem)
);
VX_exec_unit exec_unit (
@ -88,8 +88,8 @@ VX_exec_unit exec_unit (
.inst_exec_wb_if (inst_exec_wb_if),
.jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if),
.delay_o (exec_delay_o),
.no_slot_exec_i (no_slot_exec)
.delay (exec_delay),
.no_slot_exec (no_slot_exec)
);
VX_gpgpu_inst gpgpu_inst (
@ -117,9 +117,9 @@ VX_writeback wb (
.csr_wb_if (csr_wb_if),
.writeback_if (writeback_temp_if),
.no_slot_mem_o (no_slot_mem),
.no_slot_exec_o (no_slot_exec),
.no_slot_csr_o (no_slot_csr)
.no_slot_mem (no_slot_mem),
.no_slot_exec (no_slot_exec),
.no_slot_csr (no_slot_csr)
);
endmodule

View file

@ -342,7 +342,7 @@
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`ifndef L2NUM_REQUESTS
`define L2NUM_REQUESTS (2*`NUM_CORES_PER_CLUSTER)
`define L2NUM_REQUESTS (2*`NUM_CORES)
`endif
// Number of cycles to complete stage 1 (read from memory)

View file

@ -4,19 +4,19 @@ module VX_csr_data (
input wire clk, // Clock
input wire reset,
input wire[`CSR_ADDR_SIZE-1:0] read_csr_address_i,
input wire write_valid_i,
input wire[`CSR_WIDTH-1:0] write_csr_data_i,
input wire[`CSR_ADDR_SIZE-1:0] read_csr_address,
input wire write_valid,
input wire[`CSR_WIDTH-1:0] write_csr_data,
`IGNORE_WARNINGS_BEGIN
// We use a smaller storage for CSRs than the standard 4KB in RISC-V
input wire[`CSR_ADDR_SIZE-1:0] write_csr_address_i,
input wire[`CSR_ADDR_SIZE-1:0] write_csr_address,
`IGNORE_WARNINGS_END
output wire[31:0] read_csr_data_o,
output wire[31:0] read_csr_data,
// For instruction retire counting
input wire writeback_valid_i
input wire writeback_valid
);
// wire[`NUM_THREADS-1:0][31:0] thread_ids;
// wire[`NUM_THREADS-1:0][31:0] warp_ids;
@ -41,21 +41,21 @@ module VX_csr_data (
wire read_instret;
wire read_instreth;
assign read_cycle = read_csr_address_i == `CSR_CYCL_L;
assign read_cycleh = read_csr_address_i == `CSR_CYCL_H;
assign read_instret = read_csr_address_i == `CSR_INST_L;
assign read_instreth = read_csr_address_i == `CSR_INST_H;
assign read_cycle = read_csr_address == `CSR_CYCL_L;
assign read_cycleh = read_csr_address == `CSR_CYCL_H;
assign read_instret = read_csr_address == `CSR_INST_L;
assign read_instreth = read_csr_address == `CSR_INST_H;
wire [$clog2(`NUM_CSRS)-1:0] read_addr, write_addr;
// cast address to physical CSR range
assign read_addr = $size(read_addr)'(read_csr_address_i);
assign write_addr = $size(write_addr)'(write_csr_address_i);
assign read_addr = $size(read_addr)'(read_csr_address);
assign write_addr = $size(write_addr)'(write_csr_address);
// wire thread_select = read_csr_address_i == 12'h20;
// wire warp_select = read_csr_address_i == 12'h21;
// wire thread_select = read_csr_address == 12'h20;
// wire warp_select = read_csr_address == 12'h21;
// assign read_csr_data_o = thread_select ? thread_ids :
// assign read_csr_data = thread_select ? thread_ids :
// warp_select ? warp_ids :
// 0;
@ -67,16 +67,16 @@ module VX_csr_data (
instret <= 0;
end else begin
cycle <= cycle + 1;
if (write_valid_i) begin
csr[write_addr] <= write_csr_data_i;
if (write_valid) begin
csr[write_addr] <= write_csr_data;
end
if (writeback_valid_i) begin
if (writeback_valid) begin
instret <= instret + 1;
end
end
end
assign read_csr_data_o = read_cycle ? cycle[31:0] :
assign read_csr_data = read_cycle ? cycle[31:0] :
read_cycleh ? cycle[63:32] :
read_instret ? instret[31:0] :
read_instreth ? instret[63:32] :

View file

@ -33,12 +33,12 @@ module VX_csr_pipe #(
VX_csr_data csr_data(
.clk (clk),
.reset (reset),
.read_csr_address_i (csr_req_if.csr_address),
.write_valid_i (is_csr_s2),
.write_csr_data_i (csr_updated_data_s2[`CSR_WIDTH-1:0]),
.write_csr_address_i(csr_address_s2),
.read_csr_data_o (csr_read_data_unqual),
.writeback_valid_i (writeback)
.read_csr_address (csr_req_if.csr_address),
.write_valid (is_csr_s2),
.write_csr_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
.write_csr_address (csr_address_s2),
.read_csr_data (csr_read_data_unqual),
.writeback_valid (writeback)
);
reg [31:0] csr_updated_data;

View file

@ -8,9 +8,30 @@
// `define ASIC 1
// `define SYN_FUNC 1
///////////////////////////////////////////////////////////////////////////////
`define DEBUG_BEGIN /* verilator lint_off UNUSED */
`define DEBUG_END /* verilator lint_on UNUSED */
`define IGNORE_WARNINGS_BEGIN /* verilator lint_off UNUSED */ \
/* verilator lint_off PINCONNECTEMPTY */ \
/* verilator lint_off DECLFILENAME */
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
/* verilator lint_on PINCONNECTEMPTY */ \
/* verilator lint_on DECLFILENAME */
`define STRINGIFY(x) `"x`"
`define STATIC_ASSERT(cond, msg) \
generate \
if (!(cond)) $error(msg); \
endgenerate
`define LOG2UP(x) ((x > 1) ? $clog2(x) : 1)
`define NUM_CORES_PER_CLUSTER (`NUM_CORES / `NUM_CLUSTERS)
///////////////////////////////////////////////////////////////////////////////
`define NW_BITS (`LOG2UP(`NUM_WARPS))
@ -119,7 +140,7 @@
// IO BUS
`define IO_BUS_ADDR 32'h00010000
// ======================= Dcache Configurable Knobs ==========================
////////////////////////// Dcache Configurable Knobs //////////////////////////
// Function ID
`define DFUNC_ID 0
@ -133,7 +154,7 @@
// Word size in bits
`define DWORD_SIZE_BITS (`DWORD_SIZE_BYTES * 8)
// ======================= Icache Configurable Knobs ==========================
////////////////////////// Icache Configurable Knobs //////////////////////////
// Function ID
`define IFUNC_ID 1
@ -144,7 +165,7 @@
// Bank Number of words in a line
`define IBANK_LINE_WORDS (`IBANK_LINE_SIZE_BYTES / `IWORD_SIZE_BYTES)
// ======================= SM Configurable Knobs ==============================
////////////////////////// SM Configurable Knobs //////////////////////////////
// Function ID
`define SFUNC_ID 2
@ -155,7 +176,7 @@
// Bank Number of words in a line
`define SBANK_LINE_WORDS (`SBANK_LINE_SIZE_BYTES / `SWORD_SIZE_BYTES)
// ======================= L2cache Configurable Knobs =========================
////////////////////////// L2cache Configurable Knobs /////////////////////////
// Function ID
`define L2FUNC_ID 3
@ -166,7 +187,7 @@
// Bank Number of words in a line
`define L2BANK_LINE_WORDS (`L2BANK_LINE_SIZE_BYTES / `L2WORD_SIZE_BYTES)
// ======================= L3cache Configurable Knobs =========================
////////////////////////// L3cache Configurable Knobs /////////////////////////
// Function ID
`define L3FUNC_ID 3
@ -177,25 +198,5 @@
// Bank Number of words in a line
`define L3BANK_LINE_WORDS (`L3BANK_LINE_SIZE_BYTES / `L3WORD_SIZE_BYTES)
//=============================================================================
`define DEBUG_BEGIN /* verilator lint_off UNUSED */
`define DEBUG_END /* verilator lint_on UNUSED */
`define IGNORE_WARNINGS_BEGIN /* verilator lint_off UNUSED */ \
/* verilator lint_off PINCONNECTEMPTY */ \
/* verilator lint_off DECLFILENAME */
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
/* verilator lint_on PINCONNECTEMPTY */ \
/* verilator lint_on DECLFILENAME */
`define STRINGIFY(x) `"x`"
`define STATIC_ASSERT(cond, msg) \
generate \
if (!(cond)) $error(msg); \
endgenerate
// VX_DEFINE
`endif

View file

@ -10,12 +10,12 @@ module VX_exec_unit (
// Writeback
VX_inst_exec_wb_if inst_exec_wb_if,
// JAL Response
VX_jal_rsp_if jal_rsp_if,
VX_jal_rsp_if jal_rsp_if,
// Branch Response
VX_branch_rsp_if branch_rsp_if,
VX_branch_rsp_if branch_rsp_if,
input wire no_slot_exec_i,
output wire delay_o
input wire no_slot_exec,
output wire delay
);
wire[`NUM_THREADS-1:0][31:0] in_a_reg_data;
@ -50,15 +50,15 @@ module VX_exec_unit (
VX_alu alu(
.clk (clk),
.reset (reset),
.a_i (in_a_reg_data[index_out_reg]),
.b_i (in_b_reg_data[index_out_reg]),
.rs2_src_i (in_rs2_src),
.itype_immed_i (in_itype_immed),
.upper_immed_i (in_upper_immed),
.alu_op_i (in_alu_op),
.curr_PC_i (in_curr_PC),
.alu_result_o (alu_result[index_out_reg]),
.alu_stall_o (alu_stall[index_out_reg])
.src_a (in_a_reg_data[index_out_reg]),
.src_b (in_b_reg_data[index_out_reg]),
.src_rs2 (in_rs2_src),
.itype_immed (in_itype_immed),
.upper_immed (in_upper_immed),
.alu_op (in_alu_op),
.curr_PC (in_curr_PC),
.alu_result (alu_result[index_out_reg]),
.alu_stall (alu_stall[index_out_reg])
);
end
endgenerate
@ -66,7 +66,7 @@ module VX_exec_unit (
wire internal_stall;
assign internal_stall = |alu_stall;
assign delay_o = no_slot_exec_i || internal_stall;
assign delay = no_slot_exec || internal_stall;
`DEBUG_BEGIN
wire [$clog2(`NUM_THREADS)-1:0] jal_branch_use_index;

View file

@ -10,7 +10,7 @@ module VX_fetch (
input wire[`NW_BITS-1:0] icache_stage_wid,
input wire[`NUM_THREADS-1:0] icache_stage_valids,
output wire ebreak_o,
output wire ebreak,
VX_jal_rsp_if jal_rsp_if,
VX_branch_rsp_if branch_rsp_if,
VX_inst_meta_if fe_inst_meta_fi,
@ -86,7 +86,7 @@ module VX_fetch (
.thread_mask (thread_mask),
.warp_num (warp_num),
.warp_pc (warp_pc),
.ebreak_o (ebreak_o),
.ebreak (ebreak),
.scheduled_warp (scheduled_warp)
);

View file

@ -52,7 +52,7 @@ module VX_front_end (
.warp_ctl_if (warp_ctl_if),
.icache_stage_delay (icache_stage_delay),
.branch_rsp_if (branch_rsp_if),
.ebreak_o (vortex_ebreak), // fetch_ebreak
.ebreak (vortex_ebreak), // fetch_ebreak
.fe_inst_meta_fi (fe_inst_meta_fi)
);
@ -61,7 +61,7 @@ module VX_front_end (
VX_f_d_reg f_i_reg(
.clk (clk),
.reset (reset),
.freeze_i (freeze_fi_reg),
.freeze (freeze_fi_reg),
.fe_inst_meta_fd(fe_inst_meta_fi),
.fd_inst_meta_de(fe_inst_meta_fi2)
);
@ -82,7 +82,7 @@ module VX_front_end (
VX_i_d_reg i_d_reg(
.clk (clk),
.reset (reset),
.freeze_i (total_freeze),
.freeze (total_freeze),
.fe_inst_meta_fd (fe_inst_meta_id),
.fd_inst_meta_de (fd_inst_meta_de)
);
@ -100,8 +100,8 @@ module VX_front_end (
VX_d_e_reg d_e_reg(
.clk (clk),
.reset (reset),
.branch_stall_i (no_br_stall),
.freeze_i (total_freeze),
.branch_stall (no_br_stall),
.freeze (total_freeze),
.frE_to_bckE_req_if (frE_to_bckE_req_if),
.bckE_req_if (bckE_req_if)
);

View file

@ -3,17 +3,17 @@
module VX_gpr (
input wire clk,
input wire reset,
input wire valid_write_request_i,
input wire valid_write_request,
VX_gpr_read_if gpr_read_if,
VX_wb_if writeback_if,
output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data_o,
output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data_o
output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data,
output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data
);
wire write_enable;
`ifndef ASIC
assign write_enable = valid_write_request_i && ((writeback_if.wb != 0)) && (writeback_if.rd != 0);
assign write_enable = valid_write_request && ((writeback_if.wb != 0)) && (writeback_if.rd != 0);
byte_enabled_simple_dual_port_ram first_ram(
.we (write_enable),
@ -24,11 +24,11 @@ module VX_gpr (
.raddr2(gpr_read_if.rs2),
.be (writeback_if.wb_valid),
.wdata (writeback_if.write_data),
.q1 (a_reg_data_o),
.q2 (b_reg_data_o)
.q1 (a_reg_data),
.q2 (b_reg_data)
);
`else
assign write_enable = valid_write_request_i && ((writeback_if.wb != 0));
assign write_enable = valid_write_request && ((writeback_if.wb != 0));
wire going_to_write = write_enable & (|writeback_if.wb_valid);
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask;
@ -56,13 +56,13 @@ module VX_gpr (
begin
for (curr_bit = 0; curr_bit < `NUM_GPRS; curr_bit=curr_bit+1)
begin
assign a_reg_data_o[thread][curr_bit] = ((temp_a[thread][curr_bit] === 1'dx) || cena_1 )? 1'b0 : temp_a[thread][curr_bit];
assign b_reg_data_o[thread][curr_bit] = ((temp_b[thread][curr_bit] === 1'dx) || cena_2) ? 1'b0 : temp_b[thread][curr_bit];
assign a_reg_data[thread][curr_bit] = ((temp_a[thread][curr_bit] === 1'dx) || cena_1 )? 1'b0 : temp_a[thread][curr_bit];
assign b_reg_data[thread][curr_bit] = ((temp_b[thread][curr_bit] === 1'dx) || cena_2) ? 1'b0 : temp_b[thread][curr_bit];
end
end
`else
assign a_reg_data_o = temp_a;
assign b_reg_data_o = temp_b;
assign a_reg_data = temp_a;
assign b_reg_data = temp_b;
`endif
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = (writeback_if.rd != 0) ? writeback_if.write_data : 0;

View file

@ -49,14 +49,14 @@ module VX_gpr_stage (
VX_gpr_data_if gpr_datf_if();
VX_gpr_wrapper grp_wrapper (
.clk (clk),
.reset (reset),
.writeback_if(writeback_if),
.gpr_read_if (gpr_read_if),
.gpr_jal_if (gpr_jal_if),
.clk (clk),
.reset (reset),
.writeback_if (writeback_if),
.gpr_read_if (gpr_read_if),
.gpr_jal_if (gpr_jal_if),
.a_reg_data_o (gpr_datf_if.a_reg_data),
.b_reg_data_o (gpr_datf_if.b_reg_data)
.a_reg_data (gpr_datf_if.a_reg_data),
.b_reg_data (gpr_datf_if.b_reg_data)
);
// assign bckE_req_if.is_csr = is_csr;

View file

@ -7,8 +7,8 @@ module VX_gpr_wrapper (
VX_wb_if writeback_if,
VX_gpr_jal_if gpr_jal_if,
output wire[`NUM_THREADS-1:0][31:0] a_reg_data_o,
output wire[`NUM_THREADS-1:0][31:0] b_reg_data_o
output wire[`NUM_THREADS-1:0][31:0] a_reg_data,
output wire[`NUM_THREADS-1:0][31:0] b_reg_data
);
wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_a_reg_data;
@ -23,8 +23,8 @@ module VX_gpr_wrapper (
endgenerate
`ifndef ASIC
assign a_reg_data_o = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[gpr_read_if.warp_num]));
assign b_reg_data_o = (temp_b_reg_data[gpr_read_if.warp_num]);
assign a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[gpr_read_if.warp_num]));
assign b_reg_data = (temp_b_reg_data[gpr_read_if.warp_num]);
`else
wire zer = 0;
@ -41,8 +41,8 @@ module VX_gpr_wrapper (
.out (old_warp_num)
);
assign a_reg_data_o = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[old_warp_num]));
assign b_reg_data_o = (temp_b_reg_data[old_warp_num]);
assign a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[old_warp_num]));
assign b_reg_data = (temp_b_reg_data[old_warp_num]);
`endif
@ -54,11 +54,11 @@ module VX_gpr_wrapper (
VX_gpr gpr(
.clk (clk),
.reset (reset),
.valid_write_request_i (valid_write_request),
.valid_write_request (valid_write_request),
.gpr_read_if (gpr_read_if),
.writeback_if (writeback_if),
.a_reg_data_o (temp_a_reg_data[warp_index]),
.b_reg_data_o (temp_b_reg_data[warp_index])
.a_reg_data (temp_a_reg_data[warp_index]),
.b_reg_data (temp_b_reg_data[warp_index])
);
end

View file

@ -3,7 +3,7 @@
module VX_lsu (
input wire clk,
input wire reset,
input wire no_slot_mem_i,
input wire no_slot_mem,
VX_lsu_req_if lsu_req_if,
// Write back to GPR
@ -11,7 +11,7 @@ module VX_lsu (
VX_gpu_dcache_rsp_if dcache_rsp_if,
VX_gpu_dcache_req_if dcache_req_if,
output wire delay_o
output wire delay
);
// Generate Addresses
wire[`NUM_THREADS-1:0][31:0] address;
@ -38,7 +38,7 @@ module VX_lsu (
) lsu_buffer(
.clk (clk),
.reset(reset),
.stall(delay_o),
.stall(delay),
.flush(zero),
.in ({address , lsu_req_if.store_data, lsu_req_if.valid, lsu_req_if.mem_read, lsu_req_if.mem_write, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.lsu_pc}),
.out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc })
@ -56,10 +56,10 @@ module VX_lsu (
assign dcache_req_if.core_req_pc = use_pc;
// Core can't accept response
assign dcache_rsp_if.core_rsp_ready = ~no_slot_mem_i;
assign dcache_rsp_if.core_rsp_ready = ~no_slot_mem;
// Cache can't accept request
assign delay_o = ~dcache_req_if.core_req_ready;
assign delay = ~dcache_req_if.core_req_ready;
// Core Response
assign mem_wb_if.rd = dcache_rsp_if.core_rsp_read;

View file

@ -6,17 +6,17 @@ module VX_warp (
input wire reset,
input wire stall,
input wire remove,
input wire[`NUM_THREADS-1:0] in_thread_mask,
input wire in_change_mask,
input wire in_jal,
input wire[31:0] in_jal_dest,
input wire in_branch_dir,
input wire[31:0] in_branch_dest,
input wire in_wspawn,
input wire[31:0] in_wspawn_pc,
input wire[`NUM_THREADS-1:0] thread_mask,
input wire change_mask,
input wire jal,
input wire[31:0] jal_dest,
input wire branch_dir,
input wire[31:0] branch_dest,
input wire wspawn,
input wire[31:0] wspawn_pc,
output wire[31:0] out_PC,
output wire[`NUM_THREADS-1:0] out_valid
output wire[31:0] PC,
output wire[`NUM_THREADS-1:0] valid
);
reg[31:0] real_PC;
@ -41,40 +41,40 @@ module VX_warp (
always @(posedge clk) begin
if (remove) begin
valid <= valid_zero;
end else if (in_change_mask) begin
valid <= in_thread_mask;
end else if (change_mask) begin
valid <= thread_mask;
end
end
genvar out_cur_th;
generate
for (out_cur_th = 0; out_cur_th < `NUM_THREADS; out_cur_th = out_cur_th+1) begin : out_valid_assign
assign out_valid[out_cur_th] = in_change_mask ? in_thread_mask[out_cur_th] : stall ? 1'b0 : valid[out_cur_th];
for (out_cur_th = 0; out_cur_th < `NUM_THREADS; out_cur_th = out_cur_th+1) begin : valid_assign
assign valid[out_cur_th] = change_mask ? thread_mask[out_cur_th] : stall ? 1'b0 : valid[out_cur_th];
end
endgenerate
always @(*) begin
if (in_jal == 1'b1) begin
temp_PC = in_jal_dest;
if (jal == 1'b1) begin
temp_PC = jal_dest;
// $display("LINKING TO %h", temp_PC);
end else if (in_branch_dir == 1'b1) begin
temp_PC = in_branch_dest;
end else if (branch_dir == 1'b1) begin
temp_PC = branch_dest;
end else begin
temp_PC = real_PC;
end
end
assign use_PC = temp_PC;
assign out_PC = temp_PC;
assign PC = temp_PC;
always @(posedge clk) begin
if (reset) begin
real_PC <= 0;
end else if (in_wspawn == 1'b1) begin
// $display("Inside warp ***** Spawn @ %H",in_wspawn_pc);
real_PC <= in_wspawn_pc;
end else if (wspawn == 1'b1) begin
// $display("Inside warp ***** Spawn @ %H",wspawn_pc);
real_PC <= wspawn_pc;
end else if (!stall) begin
real_PC <= use_PC + 32'h4;
end else begin

View file

@ -55,7 +55,7 @@ module VX_warp_sched (
output wire[`NUM_THREADS-1:0] thread_mask,
output wire[`NW_BITS-1:0] warp_num,
output wire[31:0] warp_pc,
output wire ebreak_o,
output wire ebreak,
output wire scheduled_warp,
input wire[`NW_BITS-1:0] icache_stage_wid,
@ -331,10 +331,6 @@ module VX_warp_sched (
// .ones_found()
// );
wire ebreak = (warp_active == 0);
assign ebreak_o = ebreak;
/* verilator lint_on WIDTH */
assign ebreak = (warp_active == 0);
endmodule

View file

@ -12,9 +12,9 @@ module VX_writeback (
// Actual WB to GPR
VX_wb_if writeback_if,
output wire no_slot_mem_o,
output wire no_slot_exec_o,
output wire no_slot_csr_o
output wire no_slot_mem,
output wire no_slot_exec,
output wire no_slot_csr
);
VX_wb_if writeback_tempp_if();
@ -23,17 +23,15 @@ module VX_writeback (
wire mem_wb = (mem_wb_if.wb != 0) && (|mem_wb_if.wb_valid);
wire csr_wb = (csr_wb_if.wb != 0) && (|csr_wb_if.valid);
assign no_slot_mem_o = mem_wb && (exec_wb || csr_wb);
assign no_slot_csr_o = csr_wb && (exec_wb);
assign no_slot_exec_o = 0;
assign no_slot_mem = mem_wb && (exec_wb || csr_wb);
assign no_slot_csr = csr_wb && (exec_wb);
assign no_slot_exec = 0;
assign writeback_tempp_if.write_data = exec_wb ? inst_exec_wb_if.alu_result :
csr_wb ? csr_wb_if.csr_result :
mem_wb ? mem_wb_if.loaded_data :
0;
assign writeback_tempp_if.wb_valid = exec_wb ? inst_exec_wb_if.wb_valid :
csr_wb ? csr_wb_if.valid :
mem_wb ? mem_wb_if.wb_valid :
@ -52,9 +50,7 @@ module VX_writeback (
assign writeback_tempp_if.wb_warp_num = exec_wb ? inst_exec_wb_if.wb_warp_num :
csr_wb ? csr_wb_if.warp_num :
mem_wb ? mem_wb_if.wb_warp_num :
0;
0;
assign writeback_tempp_if.wb_pc = exec_wb ? inst_exec_wb_if.exec_wb_pc :
csr_wb ? 32'hdeadbeef :
@ -63,19 +59,21 @@ module VX_writeback (
wire zero = 0;
wire[`NUM_THREADS-1:0][31:0] use_wb_data;
wire [`NUM_THREADS-1:0][31:0] use_wb_data;
VX_generic_register #(.N(39 + `NW_BITS-1 + 1 + `NUM_THREADS*33)) wb_register(
VX_generic_register #(
.N(39 + `NW_BITS-1 + 1 + `NUM_THREADS*33)
) wb_register (
.clk (clk),
.reset(reset),
.stall(zero),
.flush(zero),
.in ({writeback_tempp_if.write_data, writeback_tempp_if.wb_valid, writeback_tempp_if.rd, writeback_tempp_if.wb, writeback_tempp_if.wb_warp_num, writeback_tempp_if.wb_pc}),
.out ({use_wb_data , writeback_if.wb_valid, writeback_if.rd, writeback_if.wb, writeback_if.wb_warp_num, writeback_if.wb_pc})
);
);
reg [31:0] last_data_wb /* verilator public */;
reg[31:0] last_data_wb /* verilator public */ ;
always @(posedge clk) begin
if ((|writeback_if.wb_valid) && (writeback_if.wb != 0) && (writeback_if.rd == 28)) begin
last_data_wb <= use_wb_data[0];

View file

@ -184,8 +184,8 @@ VX_back_end #(
.dcache_rsp_if (dcache_rsp_if),
.dcache_req_if (dcache_req_if),
.writeback_if (writeback_if),
.mem_delay_o (memory_delay),
.exec_delay_o (exec_delay),
.mem_delay (memory_delay),
.exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay)
);

View file

@ -42,7 +42,9 @@ module Vortex_Socket (
assign io_data [curr_c] = cluster_io_data [curr_c];
end
Vortex_Cluster #(.CLUSTER_ID(0)) Vortex_Cluster(
Vortex_Cluster #(
.CLUSTER_ID(0)
) Vortex_Cluster (
.clk (clk),
.reset (reset),
.io_valid (cluster_io_valid),
@ -260,7 +262,6 @@ module Vortex_Socket (
.snp_fwd_addr (snp_fwd_addr),
.snp_fwd_ready (& snp_fwd_ready)
);
end
endmodule

View file

@ -1,8 +1,7 @@
`include "VX_cache_config.vh"
module VX_cache_core_req_bank_sel
#(
module VX_cache_core_req_bank_sel #(
// Size of cache in bytes
parameter CACHE_SIZE_BYTES = 1024,
// Size of line inside a bank in bytes
@ -18,8 +17,7 @@ module VX_cache_core_req_bank_sel
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
parameter FUNC_ID = 0,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size
parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob
@ -29,7 +27,7 @@ module VX_cache_core_req_bank_sel
// Snoop Req Queue
parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size
parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size
@ -42,12 +40,9 @@ module VX_cache_core_req_bank_sel
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
// Dram knobs
// Dram knobs
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
)
(
) (
input wire [NUM_REQUESTS-1:0] core_req_valid,
input wire [NUM_REQUESTS-1:0][31:0] core_req_addr,

View file

@ -56,21 +56,21 @@ module VX_cache_req_queue #(
input wire [31:0] bank_pc,
// Dequeue Data
input wire reqq_pop,
output wire reqq_req_st0,
input wire reqq_pop,
output wire reqq_req_st0,
output wire [`LOG2UP(NUM_REQUESTS)-1:0] reqq_req_tid_st0,
output wire [31:0] reqq_req_addr_st0,
output wire [`WORD_SIZE_RNG] reqq_req_writedata_st0,
output wire [4:0] reqq_req_rd_st0,
output wire [1:0] reqq_req_wb_st0,
output wire [`NW_BITS-1:0] reqq_req_warp_num_st0,
output wire [2:0] reqq_req_mem_read_st0,
output wire [2:0] reqq_req_mem_write_st0,
output wire [31:0] reqq_req_pc_st0,
output wire [31:0] reqq_req_addr_st0,
output wire [`WORD_SIZE_RNG] reqq_req_writedata_st0,
output wire [4:0] reqq_req_rd_st0,
output wire [1:0] reqq_req_wb_st0,
output wire [`NW_BITS-1:0] reqq_req_warp_num_st0,
output wire [2:0] reqq_req_mem_read_st0,
output wire [2:0] reqq_req_mem_write_st0,
output wire [31:0] reqq_req_pc_st0,
// State Data
output wire reqq_empty,
output wire reqq_full
output wire reqq_empty,
output wire reqq_full
);
wire [NUM_REQUESTS-1:0] out_per_valids;

View file

@ -1,79 +0,0 @@
`include "VX_cache_config.vh"
module VX_dcache_llv_resp_bank_sel #(
// Size of cache in bytes
parameter CACHE_SIZE_BYTES = 1024,
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE_BYTES = 16,
// Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8,
// Size of a word in bytes
parameter WORD_SIZE_BYTES = 4,
// Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size
parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2,
// Snoop Req Queue
parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size
parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size
parameter DWBQ_SIZE = 4,
// Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
// Dram knobs
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
) (
output reg [NUM_BANKS-1:0] per_bank_llvq_pop,
input wire[NUM_BANKS-1:0] per_bank_llvq_valid,
input wire[NUM_BANKS-1:0][31:0] per_bank_llvq_rsp_addr,
input wire[NUM_BANKS-1:0][`BANK_LINE_WORDS-1:0][31:0] per_bank_llvq_rsp_data,
input wire[NUM_BANKS-1:0][`LOG2UP(NUM_REQUESTS)-1:0] per_bank_llvq_rsp_tid,
input wire llvq_pop,
output reg[NUM_REQUESTS-1:0] llvq_valid,
output reg[NUM_REQUESTS-1:0][31:0] llvq_rsp_addr,
output reg[NUM_REQUESTS-1:0][`BANK_LINE_WORDS-1:0][31:0] llvq_rsp_data
);
wire [(`LOG2UP(NUM_BANKS))-1:0] main_bank_index;
wire found_bank;
VX_generic_priority_encoder #(
.N(NUM_BANKS)
) sel_bank(
.valids(per_bank_llvq_valid),
.index (main_bank_index),
.found (found_bank)
);
always @(*) begin
llvq_valid = 0;
llvq_rsp_addr = 0;
llvq_rsp_data = 0;
per_bank_llvq_pop = 0;
if (found_bank && llvq_pop) begin
llvq_valid [per_bank_llvq_rsp_tid[main_bank_index]] = 1'b1;
llvq_rsp_addr[per_bank_llvq_rsp_tid[main_bank_index]] = per_bank_llvq_rsp_addr[main_bank_index];
llvq_rsp_data[per_bank_llvq_rsp_tid[main_bank_index]] = per_bank_llvq_rsp_data[main_bank_index];
per_bank_llvq_pop[main_bank_index] = 1'b1;
end
end
endmodule

View file

@ -1,7 +1,6 @@
`include "VX_cache_config.vh"
module VX_fill_invalidator
#(
module VX_fill_invalidator #(
// Size of cache in bytes
parameter CACHE_SIZE_BYTES = 1024,
// Size of line inside a bank in bytes
@ -15,8 +14,7 @@ module VX_fill_invalidator
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size
parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob
@ -26,7 +24,7 @@ module VX_fill_invalidator
// Snoop Req Queue
parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size
parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size
@ -39,12 +37,9 @@ module VX_fill_invalidator
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
// Dram knobs
// Dram knobs
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
)
(
) (
input wire clk,
input wire reset,
@ -53,22 +48,19 @@ module VX_fill_invalidator
input wire[31:0] fill_addr,
output reg invalidate_fill
output reg invalidate_fill
);
if (FILL_INVALIDAOR_SIZE == 0) begin
assign invalidate_fill = 0;
end else begin
reg[FILL_INVALIDAOR_SIZE-1:0] fills_active;
reg[FILL_INVALIDAOR_SIZE-1:0][31:0] fills_address;
reg [FILL_INVALIDAOR_SIZE-1:0] fills_active;
reg [FILL_INVALIDAOR_SIZE-1:0][31:0] fills_address;
reg[FILL_INVALIDAOR_SIZE-1:0] matched_fill;
reg [FILL_INVALIDAOR_SIZE-1:0] matched_fill;
wire matched;
integer fi;
always @(*) begin
@ -77,10 +69,8 @@ module VX_fill_invalidator
end
end
assign matched = (|(matched_fill));
wire [(`LOG2UP(FILL_INVALIDAOR_SIZE))-1:0] enqueue_index;
wire enqueue_found;
@ -110,7 +100,7 @@ module VX_fill_invalidator
end
end
// reg success_found;
// reg success_found;
// reg[(`LOG2UP(FILL_INVALIDAOR_SIZE))-1:0] success_index;
// integer curr_fill;

View file

@ -1,122 +0,0 @@
module VX_mrv_queue
#(
parameter DATAW = 4,
parameter SIZE = 277
)
(
input wire clk,
input wire reset,
input wire push,
input wire[DATAW-1:0] in_data,
input wire pop,
output wire[DATAW-1:0] out_data,
output wire empty,
output wire full
);
if (SIZE == 0) begin
assign empty = 1;
assign out_data = 0;
assign full = 0;
end else begin
reg[DATAW-1:0] data[SIZE-1:0], curr_r, head_r;
reg[$clog2(SIZE+1)-1:0] size_r;
reg[$clog2(SIZE)-1:0] wr_ctr_r;
reg[$clog2(SIZE)-1:0] rd_ptr_r, rd_next_ptr_r;
reg empty_r, full_r, bypass_r;
wire reading, writing;
assign reading = pop && !empty;
assign writing = push && !full;
if (SIZE == 1) begin
always @(posedge clk) begin
if (reset) begin
size_r <= 0;
end else begin
if (writing && !reading) begin
size_r <= 1;
end else if (reading && !writing) begin
size_r <= 0;
end
if (writing) begin
head_r <= in_data;
end
end
end
assign out_data = head_r;
assign empty = (size_r == 0);
assign full = (size_r != 0) && !pop;
end else begin
always @(posedge clk) begin
if (reset) begin
wr_ctr_r <= 0;
end else begin
if (writing)
wr_ctr_r <= wr_ctr_r + 1;
end
end
always @(posedge clk) begin
if (reset) begin
size_r <= 0;
empty_r <= 1;
full_r <= 0;
end else begin
if (writing && !reading) begin
size_r <= size_r + 1;
empty_r <= 0;
if (size_r == SIZE-1)
full_r <= 1;
end else if (reading && !writing) begin
size_r <= size_r - 1;
if (size_r == 1)
empty_r <= 1;
full_r <= 0;
end
end
end
always @(posedge clk) begin
if (writing) begin
data[wr_ctr_r] <= in_data;
end
end
always @(posedge clk) begin
if (reset) begin
rd_ptr_r <= 0;
rd_next_ptr_r <= 1;
bypass_r <= 0;
end else begin
if (reading) begin
if (SIZE == 2) begin
rd_ptr_r <= rd_next_ptr_r;
rd_next_ptr_r <= ~rd_next_ptr_r;
end else if (SIZE > 2) begin
rd_ptr_r <= rd_next_ptr_r;
rd_next_ptr_r <= rd_ptr_r + 2;
end
end
bypass_r <= writing && (empty_r || (1 == size_r) && reading);
curr_r <= in_data;
head_r <= data[reading ? rd_next_ptr_r : rd_ptr_r];
end
end
assign out_data = bypass_r ? curr_r : head_r;
assign empty = empty_r;
assign full = full_r;
end
end
endmodule

View file

@ -3,14 +3,14 @@
module VX_d_e_reg (
input wire clk,
input wire reset,
input wire branch_stall_i,
input wire freeze_i,
input wire branch_stall,
input wire freeze,
VX_frE_to_bckE_req_if frE_to_bckE_req_if,
VX_frE_to_bckE_req_if bckE_req_if
);
wire stall = freeze_i;
wire flush = (branch_stall_i == `STALL);
wire stall = freeze;
wire flush = (branch_stall == `STALL);
VX_generic_register #(
.N(233 + `NW_BITS-1 + 1 + `NUM_THREADS)

View file

@ -3,7 +3,7 @@
module VX_f_d_reg (
input wire clk,
input wire reset,
input wire freeze_i,
input wire freeze,
VX_inst_meta_if fe_inst_meta_fd,
VX_inst_meta_if fd_inst_meta_de
@ -11,7 +11,7 @@ module VX_f_d_reg (
);
wire flush = 1'b0;
wire stall = freeze_i == 1'b1;
wire stall = freeze == 1'b1;
VX_generic_register #(
.N(64+`NW_BITS-1+1+`NUM_THREADS)

View file

@ -3,7 +3,7 @@
module VX_i_d_reg (
input wire clk,
input wire reset,
input wire freeze_i,
input wire freeze,
VX_inst_meta_if fe_inst_meta_fd,
VX_inst_meta_if fd_inst_meta_de
@ -11,7 +11,7 @@ module VX_i_d_reg (
);
wire flush = 1'b0;
wire stall = freeze_i == 1'b1;
wire stall = freeze == 1'b1;
VX_generic_register #(

View file

@ -7,22 +7,26 @@ module testbench();
reg clk;
reg reset;
reg[3:0] in_data;
reg[3:0] data_in;
reg push;
reg pop;
wire[3:0] out_data;
wire[3:0] data_out;
wire full;
wire empty;
VX_generic_queue #(.DATAW(4), .SIZE(4)) dut (
.clk(clk),
.reset(reset),
.data_in(in_data),
.push(push),
.pop(pop),
.data_out(out_data),
.empty(empty),
.full(full));
VX_generic_queue #(
.DATAW(4),
.SIZE(4)
) dut (
.clk(clk),
.reset(reset),
.data_in(data_in),
.push(push),
.pop(pop),
.data_out(data_out),
.empty(empty),
.full(full)
);
always begin
#1 clk = !clk;
@ -30,27 +34,27 @@ module testbench();
initial begin
$monitor ("%d: clk=%b rst=%b push=%b, pop=%b, din=%h, empty=%b, full=%b, dout=%h",
$time, clk, reset, push, pop, in_data, empty, full, out_data);
$time, clk, reset, push, pop, data_in, empty, full, data_out);
#0 clk=0; reset=1; pop=0; push=0;
#2 reset=0; in_data=4'ha; pop=0; push=1;
#2 `check(full, 0); `check(out_data, 4'ha); `check(empty, 0);
#0 in_data=4'hb;
#2 `check(full, 0); `check(out_data, 4'ha); `check(empty, 0);
#0 in_data=4'hc;
#2 `check(full, 0); `check(out_data, 4'ha); `check(empty, 0);
#0 in_data=4'hd;
#2 `check(full, 1); `check(out_data, 4'ha); `check(empty, 0);
#2 reset=0; data_in=4'ha; pop=0; push=1;
#2 `check(full, 0); `check(data_out, 4'ha); `check(empty, 0);
#0 data_in=4'hb;
#2 `check(full, 0); `check(data_out, 4'ha); `check(empty, 0);
#0 data_in=4'hc;
#2 `check(full, 0); `check(data_out, 4'ha); `check(empty, 0);
#0 data_in=4'hd;
#2 `check(full, 1); `check(data_out, 4'ha); `check(empty, 0);
#0 push=0; pop=1;
#2 `check(full, 0); `check(out_data, 4'hb); `check(empty, 0);
#2 `check(full, 0); `check(out_data, 4'hc); `check(empty, 0);
#2 `check(full, 0); `check(out_data, 4'hd); `check(empty, 0);
#2 `check(full, 0); `check(out_data, 4'ha); `check(empty, 1);
#0 in_data=4'he; push=1; pop=0;
#2 `check(full, 0); `check(out_data, 4'he); `check(empty, 0);
#0 in_data=4'hf; pop=1;
#2 `check(full, 0); `check(out_data, 4'hf); `check(empty, 0);
#2 `check(full, 0); `check(data_out, 4'hb); `check(empty, 0);
#2 `check(full, 0); `check(data_out, 4'hc); `check(empty, 0);
#2 `check(full, 0); `check(data_out, 4'hd); `check(empty, 0);
#2 `check(full, 0); `check(data_out, 4'ha); `check(empty, 1);
#0 data_in=4'he; push=1; pop=0;
#2 `check(full, 0); `check(data_out, 4'he); `check(empty, 0);
#0 data_in=4'hf; pop=1;
#2 `check(full, 0); `check(data_out, 4'hf); `check(empty, 0);
#0 push=0;
#2 `check(full, 0); `check(out_data, 4'hc); `check(empty, 1);
#2 `check(full, 0); `check(data_out, 4'hc); `check(empty, 1);
#1 $finish;
end

View file

@ -1,4 +1,3 @@
`include "VX_define.vh"
module cache_simX (
@ -6,36 +5,28 @@ module cache_simX (
input wire reset,
// Icache
input wire[31:0] in_icache_pc_addr,
input wire in_icache_valid_pc_addr,
output wire out_icache_stall,
input wire[31:0] cache_pc_addr,
input wire icache_valid_pc_addr,
output wire icache_stall,
// Dcache
input wire[2:0] in_dcache_mem_read,
input wire[2:0] in_dcache_mem_write,
input wire in_dcache_in_valid[`NT_M1:0],
input wire[31:0] in_dcache_in_address[`NT_M1:0],
output wire out_dcache_stall
input wire[2:0] dcache_mem_read,
input wire[2:0] dcache_mem_write,
input wire dcache_in_valid[`NT_M1:0],
input wire[31:0] dcache_in_addr[`NT_M1:0],
output wire dcache_stall
);
//////////////////// ICACHE ///////////////////
VX_icache_request_if VX_icache_req;
assign VX_icache_req.pc_address = in_icache_pc_addr;
assign VX_icache_req.cache_driver_in_mem_read_o = (in_icache_valid_pc_addr) ? `LW_MEM_READ : `NO_MEM_READ;
assign VX_icache_req.pc_address = cache_pc_addr;
assign VX_icache_req.cache_driver_in_mem_read_o = (icache_valid_pc_addr) ? `LW_MEM_READ : `NO_MEM_READ;
assign VX_icache_req.cache_driver_in_mem_write_o = `NO_MEM_WRITE;
assign VX_icache_req.cache_driver_in_valid_o = in_icache_valid_pc_addr;
assign VX_icache_req.cache_driver_in_valid_o = icache_valid_pc_addr;
assign VX_icache_req.cache_driver_in_data_o = 0;
VX_icache_rsp_if VX_icache_rsp;
assign out_icache_stall = VX_icache_rsp.delay;
assign icache_stall = VX_icache_rsp.delay;
VX_dram_req_rsp_if #(
@ -48,25 +39,22 @@ module cache_simX (
assign VX_dram_req_rsp_icache.i_m_ready = icache_i_m_ready;
//////////////////// DCACHE ///////////////////
VX_dcache_request_if VX_dcache_req;
assign VX_dcache_req.cache_driver_in_mem_read_o = in_dcache_mem_read;
assign VX_dcache_req.cache_driver_in_mem_write_o = in_dcache_mem_write;
assign VX_dcache_req.cache_driver_in_mem_read_o = dcache_mem_read;
assign VX_dcache_req.cache_driver_in_mem_write_o = dcache_mem_write;
assign VX_dcache_req.cache_driver_in_data_o = 0;
genvar curr_t;
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1)
begin
assign VX_dcache_req.cache_driver_in_address_o[curr_t] = in_dcache_in_address[curr_t];
assign VX_dcache_req.cache_driver_in_valid_o[curr_t] = in_dcache_in_valid[curr_t];
assign VX_dcache_req.cache_driver_in_address_o[curr_t] = dcache_in_addr[curr_t];
assign VX_dcache_req.cache_driver_in_valid_o[curr_t] = dcache_in_valid[curr_t];
end
VX_dcache_response_if VX_dcache_rsp;
assign out_dcache_stall = VX_dcache_rsp.delay;
assign dcache_stall = VX_dcache_rsp.delay;
VX_dram_req_rsp_if #(
@ -78,7 +66,6 @@ module cache_simX (
reg dcache_i_m_ready;
assign VX_dram_req_rsp.i_m_ready = dcache_i_m_ready;
VX_dmem_ctrl dmem_controller (
.clk (clk),
.reset (reset),
@ -118,7 +105,6 @@ module cache_simX (
end
end
endmodule