GPR ASIC Working

This commit is contained in:
felsabbagh3 2019-10-29 23:20:16 -04:00
parent 3caae2b88e
commit 3b49b82c46
15 changed files with 168 additions and 99 deletions

View file

@ -5,8 +5,8 @@ vortex_test.elf: file format elf32-littleriscv
Disassembly of section .text:
80000000 <_start>:
80000000: 00400593 li a1,4
80000004: 00b58633 add a2,a1,a1
80000000: 7ffff137 lui sp,0x7ffff
80000004: 041010ef jal ra,80001844 <main>
80000008: 00000513 li a0,0
8000000c: 0005006b 0x5006b
@ -23,7 +23,7 @@ Disassembly of section .text:
80000024: 00755c63 bge a0,t2,8000003c <loop_done>
80000028 <loop_body>:
80000028: 80010113 addi sp,sp,-2048
80000028: 80010113 addi sp,sp,-2048 # 7fffe800 <SIZE+0x7fffe7ce>
8000002c: 00050313 mv t1,a0
80000030: 0003506b 0x3506b

Binary file not shown.

View file

@ -1,5 +1,5 @@
:0200000480007A
:10000000930540003386B500130500006B00050022
:1000000037F1FF7FEF101004130500006B000500AF
:10001000938B0600130D0700130F010093030500D7
:1000200013051000635C75001301018013030500C4
:100030006B500300130515006FF0DFFE1300000086

View file

@ -6,10 +6,10 @@
.type _start, @function
.global _start
_start:
li a1, 4
add a2, a1, a1
li a0, 0
.word 0x0005006b # tmc a0
# li a1, 4
# add a2, a1, a1
# li a0, 0
# .word 0x0005006b # tmc a0
###########################
# la a0, 0x10000000
# li a1, 7
@ -19,9 +19,9 @@ _start:
# # li a1, 3
# # sw a1, 0(a0)
# la a0, 0x80000000
# li a1, 9
# sw a1, 0(a0)
# # la a0, 0x80000000
# # li a1, 9
# # sw a1, 0(a0)
# # la a0, 0x80000008
# # li a1, 8
@ -72,11 +72,11 @@ _start:
# .word 0x0000306b #join
# ecall
############################
# lui sp, 0x7ffff
# # jal vx_before_main
# jal main
# li a0, 0
# .word 0x0005006b # tmc a0
lui sp, 0x7ffff
# jal vx_before_main
jal main
li a0, 0
.word 0x0005006b # tmc a0
# Hi:
# li a2, 7

View file

@ -47,6 +47,7 @@
//
// Known Work Arounds: N/A
//
`define ARM_UD_MODEL
`timescale 1 ns/1 ps
`define ARM_MEM_PROP 1.000
`define ARM_MEM_RETAIN 1.000

View file

@ -47,6 +47,8 @@
//
// Known Work Arounds: N/A
//
`define ARM_UD_MODEL
`timescale 1 ns/1 ps
`define ARM_MEM_PROP 1.000
`define ARM_MEM_RETAIN 1.000

View file

@ -47,6 +47,8 @@
//
// Known Work Arounds: N/A
//
`define ARM_UD_MODEL
`timescale 1 ns/1 ps
`define ARM_MEM_PROP 1.000
`define ARM_MEM_RETAIN 1.000

View file

@ -47,19 +47,30 @@
//
// Known Work Arounds: N/A
//
`define ARM_UD_MODEL
`timescale 1 ns/1 ps
`define ARM_MEM_PROP 1.000
`define ARM_MEM_RETAIN 1.000
`define ARM_MEM_PERIOD 3.000
`define ARM_MEM_WIDTH 1.000
`define ARM_MEM_SETUP 1.000
`define ARM_MEM_HOLD 0.500
`define ARM_MEM_COLLISION 3.000
// `define ARM_MEM_PROP 1.000
// `define ARM_MEM_RETAIN 1.000
// `define ARM_MEM_PERIOD 3.000
// `define ARM_MEM_WIDTH 1.000
// `define ARM_MEM_SETUP 1.000
// `define ARM_MEM_HOLD 0.500
// `define ARM_MEM_COLLISION 3.000
`define ARM_MEM_PROP 0
`define ARM_MEM_RETAIN 0
`define ARM_MEM_PERIOD 0
`define ARM_MEM_WIDTH 0
`define ARM_MEM_SETUP 0
`define ARM_MEM_HOLD 0
`define ARM_MEM_COLLISION 0
// If ARM_HVM_MODEL is defined at Simulator Command Line, it Selects the Hierarchical Verilog Model
`ifdef ARM_HVM_MODEL
`undef ARM_MESSAGES
`define ARM_UD_MODEL
// ARM_MEM_SETUP, `ARM_MEM_HOLD,
module datapath_latch_rf2_32x128_wm1 (CLK,Q_update,SE,SI,D,DFTRAMBYP,mem_path,XQ,Q);
input CLK,Q_update,SE,SI,D,DFTRAMBYP,mem_path,XQ;

View file

@ -1,3 +1,5 @@
`include "VX_define.v"
module VX_back_end (
input wire clk,
input wire reset,
@ -8,7 +10,6 @@ module VX_back_end (
VX_jal_response_inter VX_jal_rsp,
VX_branch_response_inter VX_branch_rsp,
VX_frE_to_bckE_req_inter VX_bckE_req,
VX_wb_inter VX_writeback_inter,

View file

@ -10,6 +10,7 @@
// `define ONLY
// `define SYN 1
`define ASIC 1
`define CACHE_NUM_BANKS 8

View file

@ -16,43 +16,31 @@ module VX_gpr (
wire write_enable;
assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0));
`ifndef ASIC
assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0)) && (VX_writeback_inter.rd != 0);
// `ifndef SYN
byte_enabled_simple_dual_port_ram first_ram(
.we (write_enable),
.clk (clk),
.reset (reset),
.waddr (VX_writeback_inter.rd),
.raddr1(VX_gpr_read.rs1),
.raddr2(VX_gpr_read.rs2),
.be (VX_writeback_inter.wb_valid),
.wdata (VX_writeback_inter.write_data),
.q1 (out_a_reg_data),
.q2 (out_b_reg_data)
);
// byte_enabled_simple_dual_port_ram first_ram(
// .we (write_enable),
// .clk (clk),
// .reset (reset),
// .waddr (VX_writeback_inter.rd),
// .raddr1(VX_gpr_read.rs1),
// .raddr2(VX_gpr_read.rs2),
// .be (VX_writeback_inter.wb_valid),
// .wdata (VX_writeback_inter.write_data),
// .q1 (out_a_reg_data),
// .q2 (out_b_reg_data)
// );
`else
// `else
assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0));
wire writing_to_zero = (VX_writeback_inter.rd == 5'h0);
reg[31:0] use_before;
wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid);
integer i;
always @(posedge clk) begin
if (reset) begin
use_before = 0;
end else if (going_to_write) begin
use_before[VX_writeback_inter.rd] = 1;
end
end
wire[`NT_M1:0][31:0] write_bit_mask;
genvar curr_t;
@ -65,15 +53,15 @@ module VX_gpr (
wire cenb = !going_to_write;
wire cena_1 = (VX_gpr_read.rs1 == 0);
wire cena_2 = (VX_gpr_read.rs2 == 0);
// wire cena_1 = (VX_gpr_read.rs1 == 0);
// wire cena_2 = (VX_gpr_read.rs2 == 0);
wire cena_1 = 0;
wire cena_2 = 0;
wire[`NT_M1:0][31:0] temp_a;
wire[`NT_M1:0][31:0] temp_b;
`ifndef SYN
genvar thread;
genvar curr_bit;
for (thread = 0; thread < `NT; thread = thread + 1)
@ -84,18 +72,10 @@ module VX_gpr (
assign out_b_reg_data[thread][curr_bit] = (temp_b[thread][curr_bit] === 1'dx) ? 1'b0 : temp_b[thread][curr_bit];
end
end
`else
assign out_a_reg_data = (cena_1 | !use_before[VX_gpr_read.rs1]) ? 0 : temp_a;
assign out_b_reg_data = (cena_2 | !use_before[VX_gpr_read.rs2]) ? 0 : temp_b;
`endif
wire[`NT_M1:0][31:0] to_write = writing_to_zero ? 0 : VX_writeback_inter.write_data;
wire[`NT_M1:0][31:0] to_write = (VX_writeback_inter.rd != 0) ? VX_writeback_inter.write_data : 0;
// wire cena_1 = 0;
// wire cena_2 = 0;
// wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}};
/* verilator lint_off PINCONNECTEMPTY */
rf2_32x128_wm1 first_ram (
.CENYA(),
@ -173,6 +153,7 @@ module VX_gpr (
.COLLDISN(1'b1)
);
/* verilator lint_on PINCONNECTEMPTY */
// `endif
`endif
endmodule

View file

@ -12,6 +12,7 @@ module VX_gpr_stage (
// inputs
// Instruction Information
VX_frE_to_bckE_req_inter VX_bckE_req,
// WriteBack inputs
VX_wb_inter VX_writeback_inter,
@ -40,9 +41,15 @@ module VX_gpr_stage (
assign VX_gpr_read.rs2 = VX_bckE_req.rs2;
assign VX_gpr_read.warp_num = VX_bckE_req.warp_num;
VX_gpr_jal_inter VX_gpr_jal();
assign VX_gpr_jal.is_jal = VX_bckE_req.jalQual;
assign VX_gpr_jal.curr_PC = VX_bckE_req.curr_PC;
`ifndef ASIC
VX_gpr_jal_inter VX_gpr_jal();
assign VX_gpr_jal.is_jal = VX_bckE_req.jalQual;
assign VX_gpr_jal.curr_PC = VX_bckE_req.curr_PC;
`else
VX_gpr_jal_inter VX_gpr_jal();
assign VX_gpr_jal.is_jal = VX_exec_unit_req.jalQual;
assign VX_gpr_jal.curr_PC = VX_exec_unit_req.curr_PC;
`endif
VX_gpr_data_inter VX_gpr_datf();
@ -86,9 +93,92 @@ module VX_gpr_stage (
wire stall_lsu = memory_delay;
wire flush_lsu = schedule_delay && !stall_lsu;
assign gpr_stage_delay = stall_lsu;
`ifdef ASIC
wire delayed_lsu_last_cycle;
VX_generic_register #(.N(1)) delayed_reg (
.clk (clk),
.reset(reset),
.stall(stall_rest),
.flush(stall_rest),
.in (stall_lsu),
.out (delayed_lsu_last_cycle)
);
wire[`NT_M1:0][31:0] temp_store_data;
wire[`NT_M1:0][31:0] temp_base_address; // A reg data
wire[`NT_M1:0][31:0] real_store_data;
wire[`NT_M1:0][31:0] real_base_address; // A reg data
wire store_curr_real = !delayed_lsu_last_cycle && stall_lsu;
VX_generic_register #(.N(256)) lsu_data(
.clk (clk),
.reset(reset),
.stall(!store_curr_real),
.flush(stall_rest),
.in ({real_store_data, real_base_address}),
.out ({temp_store_data, temp_base_address})
);
assign real_store_data = VX_lsu_req_temp.store_data;
assign real_base_address = VX_lsu_req_temp.base_address;
assign VX_lsu_req.store_data = (delayed_lsu_last_cycle) ? temp_store_data : real_store_data;
assign VX_lsu_req.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address;
VX_generic_register #(.N(52)) lsu_reg(
.clk (clk),
.reset(reset),
.stall(stall_lsu),
.flush(flush_lsu),
.in ({VX_lsu_req_temp.valid, VX_lsu_req_temp.warp_num, VX_lsu_req_temp.offset, VX_lsu_req_temp.mem_read, VX_lsu_req_temp.mem_write, VX_lsu_req_temp.rd, VX_lsu_req_temp.wb}),
.out ({VX_lsu_req.valid , VX_lsu_req.warp_num , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb })
);
VX_generic_register #(.N(231)) exec_unit_reg(
.clk (clk),
.reset(reset),
.stall(stall_rest),
.flush(flush_rest),
.in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}),
.out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask })
);
assign VX_exec_unit_req.a_reg_data = real_base_address;
assign VX_exec_unit_req.b_reg_data = real_store_data;
VX_generic_register #(.N(43)) gpu_inst_reg(
.clk (clk),
.reset(reset),
.stall(stall_rest),
.flush(flush_rest),
.in ({VX_gpu_inst_req_temp.valid, VX_gpu_inst_req_temp.warp_num, VX_gpu_inst_req_temp.is_wspawn, VX_gpu_inst_req_temp.is_tmc, VX_gpu_inst_req_temp.is_split, VX_gpu_inst_req_temp.is_barrier, VX_gpu_inst_req_temp.pc_next}),
.out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next })
);
assign VX_gpu_inst_req.a_reg_data = real_base_address;
assign VX_gpu_inst_req.rd2 = real_store_data;
VX_generic_register #(.N(60)) csr_reg(
.clk (clk),
.reset(reset),
.stall(stall_rest),
.flush(flush_rest),
.in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}),
.out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask })
);
// assign
`else
VX_generic_register #(.N(308)) lsu_reg(
.clk (clk),
@ -126,29 +216,6 @@ module VX_gpr_stage (
.out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask })
);
// wire zero_temp = 0;
// VX_generic_register #(.N(256)) reg_data
// (
// .clk (clk),
// .reset(reset),
// .stall(zero_temp),
// .flush(zero_temp),
// .in ({VX_gpr_datf.a_reg_data, VX_gpr_datf.b_reg_data}),
// .out ({VX_gpr_data.a_reg_data, VX_gpr_data.b_reg_data})
// );
// wire stall = schedule_delay;
// VX_d_e_reg gpr_stage_reg(
// .clk (clk),
// .reset (reset),
// .in_branch_stall (stall),
// .in_freeze (zero_temp),
// .VX_frE_to_bckE_req(VX_bckE_req),
// .VX_bckE_req (VX_bckE_req_out)
// );
`endif
endmodule

View file

@ -83,7 +83,6 @@ assign icache_request_pc_address = icache_request_fe.pc_address;
// Front-end to Back-end
VX_frE_to_bckE_req_inter VX_bckE_req(); // New instruction request to EXE/MEM
// Back-end to Front-end
VX_wb_inter VX_writeback_inter(); // Writeback to GPRs
VX_branch_response_inter VX_branch_rsp(); // Branch Resolution to Fetch

View file

@ -84,7 +84,8 @@ SRC = \
CMD= \
-do "vcd file vortex.vcd; \
-do "VoptFlow = 0; \
vcd file vortex.vcd; \
vcd add -r /vortex_tb/*; \
vcd add -r /vortex/*; \
run -all; \
@ -102,12 +103,13 @@ LOG=
# vlib
comp:
vlog $(OPT) -work $(LIB) $(SRC)
vlog -O0 $(OPT) -work $(LIB) $(SRC)
# vlog -O0 -dpiheader vortex_dpi.h $(OPT) -work $(LIB) $(SRC)
sim: comp
vsim vortex_tb $(LOG) -c -lib $(LIB) $(CMD)
vsim vortex_tb $(LOG) -c -lib $(LIB) $(CMD) > vortex_sim.log
# vsim -novopt vortex_tb $(LOG) -c -lib $(LIB) $(CMD) > vortex_sim.log

View file

@ -4,6 +4,8 @@
`define NUMBER_BANKS 8
`define NUM_WORDS_PER_BLOCK 4
`define ARM_UD_MODEL
`timescale 1ns/1ps
import "DPI-C" load_file = function void load_file(input string filename);