Rename Stage that removes the need for forwarding

This commit is contained in:
felsabbagh3 2019-10-17 00:48:54 -04:00
parent 9a9afbbb6b
commit 95047fcadc
12 changed files with 177 additions and 183 deletions

View file

@ -3,7 +3,7 @@ all: RUNFILE
# -LDFLAGS '-lsystemc'
VERILATOR:
echo "#define VCD_OFF" > tb_debug.h
verilator --compiler gcc -Wno-fatal -Wno-UNOPTFLAT -Wno-UNDRIVEN -Wno-UNSIGNED -Wno-lint -cc Vortex.v -I. -I../models/memory/cln28hpc/rf2_32x128_wm1/ -I/usr/local/systemc/ -Iinterfaces/ -Ipipe_regs/ --exe test_bench.cpp -CFLAGS '-std=c++11 -O3' -LDFLAGS '-L/usr/local/systemc/'
verilator --compiler gcc --Wno-PINMISSING -cc Vortex.v -I. -I../models/memory/cln28hpc/rf2_32x128_wm1/ -I/usr/local/systemc/ -Iinterfaces/ -Ipipe_regs/ --exe test_bench.cpp -CFLAGS '-std=c++11 -O3' -LDFLAGS '-L/usr/local/systemc/'
compdebug:
echo "#define VCD_OUTPUT" > tb_debug.h

View file

@ -2,8 +2,9 @@ module VX_back_end (
input wire clk,
input wire reset,
input wire fetch_delay,
input wire schedule_delay,
input wire[31:0] csr_decode_csr_data,
input wire[31:0] csr_decode_csr_data,
output wire execute_branch_stall,
input wire in_fwd_stall,
@ -64,6 +65,7 @@ VX_frE_to_bckE_req_inter VX_bckE_req_out();
VX_gpr_stage VX_gpr_stage(
.clk (clk),
.schedule_delay (schedule_delay),
.VX_writeback_inter(VX_writeback_inter),
.VX_fwd_rsp (VX_fwd_rsp),
.in_fwd_stall (in_fwd_stall),

View file

@ -8,6 +8,7 @@ module VX_fetch (
input wire in_fwd_stall,
input wire in_branch_stall_exe,
input wire in_gpr_stall,
input wire schedule_delay,
VX_icache_response_inter icache_response,
VX_icache_request_inter icache_request,
@ -28,7 +29,7 @@ module VX_fetch (
wire warp_stall;
assign pipe_stall = in_gpr_stall || in_fwd_stall || in_freeze;
assign pipe_stall = in_gpr_stall || in_fwd_stall || in_freeze || schedule_delay;
assign warp_stall = in_branch_stall || (in_branch_stall_exe && 0);

View file

@ -111,7 +111,8 @@ module VX_forwarding (
(!src1_mem_fwd));
assign out_src1_fwd = src1_exe_fwd || src1_mem_fwd || (src1_wb_fwd && 0);
// assign out_src1_fwd = src1_exe_fwd || src1_mem_fwd || (src1_wb_fwd && 0);
assign out_src1_fwd = 0;
@ -137,15 +138,19 @@ module VX_forwarding (
(in_writeback_warp_num == in_decode_warp_num);
assign out_src2_fwd = src2_exe_fwd || src2_mem_fwd || (src2_wb_fwd && 0);
// assign out_src2_fwd = src2_exe_fwd || src2_mem_fwd || (src2_wb_fwd && 0);
assign out_src2_fwd = 0;
wire exe_mem_read_stall = ((src1_exe_fwd || src2_exe_fwd) && exe_mem_read) ? `STALL : `NO_STALL;
wire mem_mem_read_stall = ((src1_mem_fwd || src2_mem_fwd) && mem_mem_read) ? `STALL : `NO_STALL;
// wire exe_mem_read_stall = ((src1_exe_fwd || src2_exe_fwd) && exe_mem_read) ? `STALL : `NO_STALL;
// wire mem_mem_read_stall = ((src1_mem_fwd || src2_mem_fwd) && mem_mem_read) ? `STALL : `NO_STALL;
wire exe_mem_read_stall = `NO_STALL;
wire mem_mem_read_stall = `NO_STALL;
assign out_fwd_stall = exe_mem_read_stall || mem_mem_read_stall;
// assign out_fwd_stall = exe_mem_read_stall || mem_mem_read_stall;
assign out_fwd_stall = 0;
// always @(*) begin
// if (out_fwd_stall) $display("FWD STALL");

View file

@ -9,6 +9,7 @@ module VX_front_end (
input wire execute_branch_stall,
input wire in_gpr_stall,
input wire schedule_delay,
VX_warp_ctl_inter VX_warp_ctl,
@ -18,7 +19,6 @@ module VX_front_end (
VX_jal_response_inter VX_jal_rsp,
VX_branch_response_inter VX_branch_rsp,
VX_wb_inter VX_writeback_inter,
VX_frE_to_bckE_req_inter VX_bckE_req,
@ -38,7 +38,7 @@ wire decode_branch_stall;
wire decode_gpr_stall;
wire total_freeze = memory_delay || fetch_delay || in_gpr_stall;
wire total_freeze = memory_delay || fetch_delay || in_gpr_stall || schedule_delay;
/* verilator lint_off UNUSED */
wire real_fetch_ebreak;
@ -49,6 +49,7 @@ VX_fetch vx_fetch(
.in_memory_delay (memory_delay),
.in_branch_stall (decode_branch_stall),
.in_fwd_stall (forwarding_fwd_stall),
.schedule_delay (schedule_delay),
.in_branch_stall_exe(execute_branch_stall),
.in_gpr_stall (decode_gpr_stall),
.VX_jal_rsp (VX_jal_rsp),

View file

@ -15,13 +15,6 @@ module VX_gpr (
assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd != 5'h0));
// <<<<<<< HEAD
// always @(*) begin
// if(write_enable) $display("Writing to %d: %d = %h",VX_writeback_inter.wb_warp_num, VX_writeback_inter.rd, VX_writeback_inter.write_data[0][31:0]);
// end
// byte_enabled_simple_dual_port_ram first_ram(
// .we (write_enable),
// .clk (clk),
@ -35,160 +28,102 @@ module VX_gpr (
// );
// =======
// byte_enabled_simple_dual_port_ram first_ram(
// .we (write_enable),
// .clk (clk),
// .waddr (VX_writeback_inter.rd),
// .raddr1(VX_gpr_read.rs1),
// .be (VX_writeback_inter.wb_valid),
// .wdata (VX_writeback_inter.write_data),
// .q1 (out_a_reg_data)
// );
byte_enabled_simple_dual_port_ram first_ram(
.we (write_enable),
.clk (clk),
.waddr (VX_writeback_inter.rd),
.raddr1(VX_gpr_read.rs1),
.be (VX_writeback_inter.wb_valid),
.wdata (VX_writeback_inter.write_data),
.q1 (out_a_reg_data)
);
// byte_enabled_simple_dual_port_ram first_ram(
// .we (write_enable),
// .clk (clk),
// .waddr (VX_writeback_inter.rd),
// .raddr1(VX_gpr_read.rs2),
// .be (VX_writeback_inter.wb_valid),
// .wdata (VX_writeback_inter.write_data),
// .q1 (out_b_reg_data)
// );
byte_enabled_simple_dual_port_ram second_ram(
.we (write_enable),
.clk (clk),
.waddr (VX_writeback_inter.rd),
.raddr1(VX_gpr_read.rs2),
.be (VX_writeback_inter.wb_valid),
.wdata (VX_writeback_inter.write_data),
.q1 (out_b_reg_data)
);
wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}};
// Port A is a read port, Port B is a write port
/* verilator lint_off PINCONNECTEMPTY */
rf2_32x128_wm1 first_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(out_a_reg_data),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(1'b0),
.AA(VX_gpr_read.rs1),
.CLKB(clk),
.CENB(1'b0),
.WENB(write_bit_mask),
.AB(VX_writeback_inter.rd),
.DB(VX_writeback_inter.write_data),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
/* verilator lint_on PINCONNECTEMPTY */
/* verilator lint_off PINCONNECTEMPTY */
rf2_32x128_wm1 second_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(out_b_reg_data),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(1'b0),
.AA(VX_gpr_read.rs2),
.CLKB(clk),
.CENB(1'b0),
.WENB(write_bit_mask),
.AB(VX_writeback_inter.rd),
.DB(VX_writeback_inter.write_data),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
/* verilator lint_on PINCONNECTEMPTY */
// >>>>>>> 5680b997b599ce2900997cab976681fe3881e880
// // USING RAM blocks
// // First RAM
// byte_enabled_simple_dual_port_ram first_ram(
// .we (write_enable),
// .clk (clk),
// .waddr(VX_writeback_inter.rd),
// .raddr(VX_gpr_read.rs1),
// .be (VX_writeback_inter.wb_valid),
// .wdata(VX_writeback_inter.write_data),
// .q (out_a_reg_data)
// );
// // Second RAM block
// byte_enabled_simple_dual_port_ram second_ram(
// .we (write_enable),
// .clk (clk),
// .waddr(VX_writeback_inter.rd),
// .raddr(VX_gpr_read.rs2),
// .be (VX_writeback_inter.wb_valid),
// .wdata(VX_writeback_inter.write_data),
// .q (out_b_reg_data)
// );
// logic[`NT_M1:0][31:0] gpr[31:0]; // gpr[register_number][thread_number][data_bits]
// wire write_enable;
// assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd != 5'h0));
// assign read_enable = valid_request;
// // Using Registers
// integer thread_index;
// always_ff@(posedge clk)
// begin
// if (write_enable) begin
// for (thread_index = 0; thread_index <= `NT_M1; thread_index = thread_index + 1) begin
// if (VX_writeback_inter.wb_valid[thread_index]) begin
// gpr[VX_writeback_inter.rd][thread_index] <= VX_writeback_inter.write_data[thread_index];
// end
// end
// end
// out_a_reg_data <= gpr[VX_gpr_read.rs1];
// out_b_reg_data <= gpr[VX_gpr_read.rs2];
// end
// wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}};
// /* verilator lint_off PINCONNECTEMPTY */
// rf2_32x128_wm1 first_ram (
// .CENYA(),
// .AYA(),
// .CENYB(),
// .WENYB(),
// .AYB(),
// .QA(out_a_reg_data),
// .SOA(),
// .SOB(),
// .CLKA(clk),
// .CENA(1'b0),
// .AA(VX_gpr_read.rs1),
// .CLKB(clk),
// .CENB(1'b0),
// .WENB(write_bit_mask),
// .AB(VX_writeback_inter.rd),
// .DB(VX_writeback_inter.write_data),
// .EMAA(3'b011),
// .EMASA(1'b0),
// .EMAB(3'b011),
// .TENA(1'b1),
// .TCENA(1'b0),
// .TAA(5'b0),
// .TENB(1'b1),
// .TCENB(1'b0),
// .TWENB(128'b0),
// .TAB(5'b0),
// .TDB(128'b0),
// .RET1N(1'b1),
// .SIA(2'b0),
// .SEA(1'b0),
// .DFTRAMBYP(1'b0),
// .SIB(2'b0),
// .SEB(1'b0),
// .COLLDISN(1'b1)
// );
// /* verilator lint_on PINCONNECTEMPTY */
// /* verilator lint_off PINCONNECTEMPTY */
// rf2_32x128_wm1 second_ram (
// .CENYA(),
// .AYA(),
// .CENYB(),
// .WENYB(),
// .AYB(),
// .QA(out_b_reg_data),
// .SOA(),
// .SOB(),
// .CLKA(clk),
// .CENA(1'b0),
// .AA(VX_gpr_read.rs2),
// .CLKB(clk),
// .CENB(1'b0),
// .WENB(write_bit_mask),
// .AB(VX_writeback_inter.rd),
// .DB(VX_writeback_inter.write_data),
// .EMAA(3'b011),
// .EMASA(1'b0),
// .EMAB(3'b011),
// .TENA(1'b1),
// .TCENA(1'b0),
// .TAA(5'b0),
// .TENB(1'b1),
// .TCENB(1'b0),
// .TWENB(128'b0),
// .TAB(5'b0),
// .TDB(128'b0),
// .RET1N(1'b1),
// .SIA(2'b0),
// .SEA(1'b0),
// .DFTRAMBYP(1'b0),
// .SIB(2'b0),
// .SEB(1'b0),
// .COLLDISN(1'b1)
// );
endmodule

View file

@ -1,6 +1,7 @@
module VX_gpr_stage (
input wire clk,
input wire in_fwd_stall,
input wire schedule_delay,
// inputs
// Instruction Information
VX_frE_to_bckE_req_inter VX_bckE_req,
@ -62,7 +63,7 @@ module VX_gpr_stage (
// assign VX_bckE_req_out.csr_mask = (VX_bckE_req.sr_immed == 1'b1) ? {27'h0, VX_bckE_req.rs1} : VX_gpr_data.a_reg_data[0];
VX_gpr_data_inter VX_gpr_datf;
VX_generic_register #(.N(256)) d_e_reg
VX_generic_register #(.N(256)) reg_data
(
.clk (clk),
.reset(0),
@ -72,10 +73,12 @@ module VX_gpr_stage (
.out ({VX_gpr_data.a_reg_data, VX_gpr_data.b_reg_data})
);
VX_d_e_reg vx_d_e_reg(
wire stall = in_fwd_stall || schedule_delay;
VX_d_e_reg gpr_stage_reg(
.clk (clk),
.reset (0),
.in_fwd_stall (in_fwd_stall),
.in_fwd_stall (stall),
.in_branch_stall (0),
.in_freeze (0),
.in_gpr_stall (out_gpr_stall),

View file

@ -1,11 +1,50 @@
`include "VX_define.v"
module VX_scheduler (
input clk,
input
input wire clk,
VX_frE_to_bckE_req_inter VX_bckE_req,
VX_wb_inter VX_writeback_inter,
output wire schedule_delay
);
reg rename_table[31:0];
initial begin
integer i;
for (i = 0; i < 32; i = i + 1) rename_table[i] = 0;
end
wire valid_wb = (VX_writeback_inter.wb != 0) && (|VX_writeback_inter.wb_valid) && (VX_writeback_inter.rd != 0);
wire wb_inc = (VX_bckE_req.wb != 0) && (VX_bckE_req.rd != 0);
// wire pass_through = ((VX_bckE_req.rs1 == VX_writeback_inter.rd) || (VX_bckE_req.rs2 == VX_writeback_inter.rd)) && valid_wb;
// wire pass_through = 0;
wire rs1_rename = rename_table[VX_bckE_req.rs1];
wire rs2_rename = rename_table[VX_bckE_req.rs2];
wire is_store = (VX_bckE_req.mem_write != `NO_MEM_WRITE);
wire rs1_rename_qual = (rs1_rename && (VX_bckE_req.rs1 != 0));
wire rs2_rename_qual = (rs2_rename && (VX_bckE_req.rs2 != 0) && ((VX_bckE_req.rs2_src == `RS2_REG) || is_store));
wire rename_valid = rs1_rename_qual || rs2_rename_qual ;
assign schedule_delay = (rename_valid) && (|VX_bckE_req.valid);
always @(posedge clk) begin
if (valid_wb ) rename_table[VX_writeback_inter.rd] <= 0;
if (!schedule_delay && wb_inc) rename_table[VX_bckE_req.rd] <= 1;
end
endmodule

View file

@ -73,6 +73,7 @@ VX_warp_ctl_inter VX_warp_ctl();
wire out_gpr_stall;
wire schedule_delay;
VX_front_end vx_front_end(
@ -81,11 +82,11 @@ VX_front_end vx_front_end(
.VX_warp_ctl (VX_warp_ctl),
.forwarding_fwd_stall(forwarding_fwd_stall),
.execute_branch_stall(execute_branch_stall),
.VX_writeback_inter (VX_writeback_inter),
.VX_bckE_req (VX_bckE_req),
.decode_csr_address (decode_csr_address),
.memory_delay (memory_delay),
.fetch_delay (fetch_delay),
.schedule_delay (schedule_delay),
.icache_response_fe (icache_response_fe),
.icache_request_fe (icache_request_fe),
.VX_jal_rsp (VX_jal_rsp),
@ -94,10 +95,17 @@ VX_front_end vx_front_end(
.in_gpr_stall (out_gpr_stall)
);
VX_scheduler schedule(
.clk (clk),
.VX_bckE_req (VX_bckE_req),
.VX_writeback_inter(VX_writeback_inter),
.schedule_delay (schedule_delay)
);
VX_back_end vx_back_end(
.clk (clk),
.reset (reset),
.schedule_delay (schedule_delay),
.fetch_delay (fetch_delay),
.in_fwd_stall (forwarding_fwd_stall),
.VX_fwd_req_de (VX_fwd_req_de),

View file

@ -1,7 +1,7 @@
# Dynamic Instructions: 13
# of total cycles: 24
# Dynamic Instructions: 67875
# of total cycles: 67891
# of forwarding stalls: 0
# of branch stalls: 0
# CPI: 1.84615
# time to simulate: 6.95312e-310 milliseconds
# CPI: 1.00024
# time to simulate: 0 milliseconds
# GRADE: Failed on test: 4294967295

View file

@ -1 +1 @@
#define VCD_OUTPUT
#define VCD_OFF

View file

@ -372,11 +372,11 @@ bool Vortex::simulate(std::string file_to_simulate)
// unsigned cycles;
counter = 0;
this->stats_total_cycles = 10;
while (this->stop && ((counter < 5)))
while (this->stop && ((counter < 6)))
// while (this->stats_total_cycles < 10)
{
// std::cout << "Counter: " << counter << "\n";
if ((this->stats_total_cycles) % 5000 == 0) std::cout << "************* Cycle: " << (this->stats_total_cycles) << "\n";
// if ((this->stats_total_cycles) % 5000 == 0) std::cout << "************* Cycle: " << (this->stats_total_cycles) << "\n";
// dstop = !dbus_driver();
vortex->clk = 1;