Warp + Context Aware Design - Global Stalling

This commit is contained in:
felsabbagh3 2019-05-08 16:32:49 -07:00
parent a6c13bc38c
commit 96dac5e1ce
9 changed files with 94 additions and 66 deletions

View file

@ -18,6 +18,8 @@ module VX_fetch (
input wire[31:0] in_instruction,
input wire in_thread_mask[`NT_M1:0],
input wire in_change_mask,
input wire[`NW_M1:0] in_decode_warp_num,
input wire[`NW_M1:0] in_memory_warp_num,
output wire[31:0] out_instruction,
output wire out_delay,
@ -53,15 +55,19 @@ module VX_fetch (
wire[31:0] warp_pc;
wire warp_valid[`NT_M1:0];
wire warp_zero_change_mask = in_change_mask && (in_decode_warp_num == 0);
wire warp_zero_jal = in_jal && (in_memory_warp_num == 0);
wire warp_zero_branch = in_branch_dir && (in_memory_warp_num == 0);
VX_warp VX_Warp(
.clk (clk),
.reset (reset),
.stall (stall),
.in_thread_mask(in_thread_mask),
.in_change_mask(in_change_mask),
.in_jal (in_jal),
.in_change_mask(warp_zero_change_mask),
.in_jal (warp_zero_jal),
.in_jal_dest (in_jal_dest),
.in_branch_dir (in_branch_dir),
.in_branch_dir (warp_zero_branch),
.in_branch_dest(in_branch_dest),
.out_PC (warp_pc),
.out_valid (warp_valid)

View file

@ -212,6 +212,8 @@ VX_fetch vx_fetch(
.in_instruction (fe_instruction),
.in_thread_mask (decode_thread_mask),
.in_change_mask (decode_change_mask),
.in_decode_warp_num (decode_warp_num),
.in_memory_warp_num (memory_warp_num),
.out_instruction (fetch_instruction),
.out_delay (fetch_delay),

Binary file not shown.

View file

@ -161,7 +161,7 @@ void VVortex::_initial__TOP__1(VVortex__Syms* __restrict vlSymsp) {
vlTOPp->Vortex__DOT__vx_d_e_reg__DOT__jal = 0U;
vlTOPp->Vortex__DOT__vx_d_e_reg__DOT__jal_offset = 0U;
vlTOPp->Vortex__DOT__vx_d_e_reg__DOT__warp_num = 0U;
// INITIAL at VX_fetch.v:35
// INITIAL at VX_fetch.v:37
vlTOPp->Vortex__DOT__vx_fetch__DOT__warp_num = 0U;
vlTOPp->Vortex__DOT__vx_fetch__DOT__warp_state = 0U;
}
@ -859,8 +859,10 @@ void VVortex::_settle__TOP__2(VVortex__Syms* __restrict vlSymsp) {
} else {
vlTOPp->Vortex__DOT__decode_branch_type = 0U;
}
vlTOPp->Vortex__DOT__decode_change_mask = ((IData)(vlTOPp->Vortex__DOT__vx_decode__DOT__is_jalrs)
| (IData)(vlTOPp->Vortex__DOT__vx_decode__DOT__is_jmprt));
vlTOPp->Vortex__DOT__vx_fetch__DOT__warp_zero_change_mask
= (((IData)(vlTOPp->Vortex__DOT__vx_decode__DOT__is_jalrs)
| (IData)(vlTOPp->Vortex__DOT__vx_decode__DOT__is_jmprt))
& (0U == (IData)(vlTOPp->Vortex__DOT__vx_f_d_reg__DOT__warp_num)));
vlTOPp->Vortex__DOT__e_m_b_reg_data[1U] = vlTOPp->Vortex__DOT____Vcellout__vx_e_m_reg__out_b_reg_data
[1U];
vlTOPp->Vortex__DOT__e_m_b_reg_data[0U] = vlTOPp->Vortex__DOT____Vcellout__vx_e_m_reg__out_b_reg_data
@ -1327,26 +1329,34 @@ void VVortex::_settle__TOP__2(VVortex__Syms* __restrict vlSymsp) {
[0U];
// ALWAYS at VX_warp.v:49
vlTOPp->Vortex__DOT__vx_fetch__DOT__VX_Warp__DOT__temp_PC
= ((IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__jal)
= (((IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__jal)
& (0U == (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__warp_num)))
? vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__jal_dest
: ((1U & ((4U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? ((2U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? ((~ (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
& (~ (vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U] >> 0x1fU)))
: ((1U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? (vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U] >> 0x1fU) :
(~ (vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U] >> 0x1fU))))
: ((2U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? ((1U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? (vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U] >> 0x1fU) :
(0U != vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U])) : ((IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type)
& (0U == vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U])))))
: ((((4U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? ((2U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? ((~ (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
& (~ (vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U] >> 0x1fU))) : (
(1U
& (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
?
(vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U]
>> 0x1fU)
:
(~
(vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U]
>> 0x1fU))))
: ((2U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? ((1U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? (vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U] >> 0x1fU) : (0U !=
vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U]))
: ((IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type)
& (0U == vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U])))) & (0U == (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__warp_num)))
? (vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__curr_PC
+ (vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_offset
<< 1U)) : vlTOPp->Vortex__DOT__vx_fetch__DOT__VX_Warp__DOT__real_PC));
@ -1984,7 +1994,7 @@ void VVortex::_settle__TOP__2(VVortex__Syms* __restrict vlSymsp) {
= vlTOPp->Vortex__DOT____Vcellinp__vx_fetch__in_thread_mask
[0U];
// ALWAYS at VX_warp.v:35
if (vlTOPp->Vortex__DOT__decode_change_mask) {
if (vlTOPp->Vortex__DOT__vx_fetch__DOT__warp_zero_change_mask) {
vlTOPp->Vortex__DOT__vx_fetch__DOT__VX_Warp__DOT__valid[1U]
= vlTOPp->Vortex__DOT__vx_fetch__DOT____Vcellinp__VX_Warp__in_thread_mask
[1U];
@ -1993,13 +2003,13 @@ void VVortex::_settle__TOP__2(VVortex__Syms* __restrict vlSymsp) {
[0U];
}
vlTOPp->Vortex__DOT__vx_fetch__DOT____Vcellout__VX_Warp__out_valid[0U]
= ((IData)(vlTOPp->Vortex__DOT__decode_change_mask)
= ((IData)(vlTOPp->Vortex__DOT__vx_fetch__DOT__warp_zero_change_mask)
? vlTOPp->Vortex__DOT__vx_fetch__DOT____Vcellinp__VX_Warp__in_thread_mask
[0U] : ((~ (IData)(vlTOPp->Vortex__DOT__vx_fetch__DOT__stall))
& vlTOPp->Vortex__DOT__vx_fetch__DOT__VX_Warp__DOT__valid
[0U]));
vlTOPp->Vortex__DOT__vx_fetch__DOT____Vcellout__VX_Warp__out_valid[1U]
= ((IData)(vlTOPp->Vortex__DOT__decode_change_mask)
= ((IData)(vlTOPp->Vortex__DOT__vx_fetch__DOT__warp_zero_change_mask)
? vlTOPp->Vortex__DOT__vx_fetch__DOT____Vcellinp__VX_Warp__in_thread_mask
[1U] : ((~ (IData)(vlTOPp->Vortex__DOT__vx_fetch__DOT__stall))
& vlTOPp->Vortex__DOT__vx_fetch__DOT__VX_Warp__DOT__valid
@ -2040,7 +2050,7 @@ VL_INLINE_OPT void VVortex::_sequent__TOP__3(VVortex__Syms* __restrict vlSymsp)
= vlTOPp->Vortex__DOT__vx_fetch__DOT__warp_num;
__Vdlyvset__Vortex__DOT__vx_f_d_reg__DOT__valid__v0 = 0U;
__Vdlyvset__Vortex__DOT__vx_f_d_reg__DOT__valid__v2 = 0U;
// ALWAYS at VX_fetch.v:40
// ALWAYS at VX_fetch.v:42
vlTOPp->__Vdly__Vortex__DOT__vx_fetch__DOT__warp_num
= (3U & (((IData)(vlTOPp->reset) | ((IData)(vlTOPp->Vortex__DOT__vx_fetch__DOT__warp_num)
== (IData)(vlTOPp->Vortex__DOT__vx_fetch__DOT__warp_state)))
@ -3718,26 +3728,34 @@ VL_INLINE_OPT void VVortex::_sequent__TOP__7(VVortex__Syms* __restrict vlSymsp)
}
// ALWAYS at VX_warp.v:49
vlTOPp->Vortex__DOT__vx_fetch__DOT__VX_Warp__DOT__temp_PC
= ((IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__jal)
= (((IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__jal)
& (0U == (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__warp_num)))
? vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__jal_dest
: ((1U & ((4U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? ((2U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? ((~ (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
& (~ (vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U] >> 0x1fU)))
: ((1U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? (vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U] >> 0x1fU) :
(~ (vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U] >> 0x1fU))))
: ((2U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? ((1U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? (vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U] >> 0x1fU) :
(0U != vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U])) : ((IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type)
& (0U == vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U])))))
: ((((4U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? ((2U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? ((~ (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
& (~ (vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U] >> 0x1fU))) : (
(1U
& (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
?
(vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U]
>> 0x1fU)
:
(~
(vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U]
>> 0x1fU))))
: ((2U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? ((1U & (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type))
? (vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U] >> 0x1fU) : (0U !=
vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U]))
: ((IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_type)
& (0U == vlTOPp->Vortex__DOT____Vcellinp__vx_memory__in_alu_result
[0U])))) & (0U == (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__warp_num)))
? (vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__curr_PC
+ (vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__branch_offset
<< 1U)) : vlTOPp->Vortex__DOT__vx_fetch__DOT__VX_Warp__DOT__real_PC));
@ -4223,8 +4241,10 @@ VL_INLINE_OPT void VVortex::_sequent__TOP__7(VVortex__Syms* __restrict vlSymsp)
} else {
vlTOPp->Vortex__DOT__decode_branch_stall = 0U;
}
vlTOPp->Vortex__DOT__decode_change_mask = ((IData)(vlTOPp->Vortex__DOT__vx_decode__DOT__is_jalrs)
| (IData)(vlTOPp->Vortex__DOT__vx_decode__DOT__is_jmprt));
vlTOPp->Vortex__DOT__vx_fetch__DOT__warp_zero_change_mask
= (((IData)(vlTOPp->Vortex__DOT__vx_decode__DOT__is_jalrs)
| (IData)(vlTOPp->Vortex__DOT__vx_decode__DOT__is_jmprt))
& (0U == (IData)(vlTOPp->Vortex__DOT__vx_f_d_reg__DOT__warp_num)));
vlTOPp->Vortex__DOT__vx_forwarding__DOT__src2_mem_fwd
= ((((((0x1fU & (vlTOPp->Vortex__DOT__vx_f_d_reg__DOT__instruction
>> 0x14U)) == (IData)(vlTOPp->Vortex__DOT__vx_e_m_reg__DOT__rd))
@ -4594,7 +4614,7 @@ VL_INLINE_OPT void VVortex::_combo__TOP__8(VVortex__Syms* __restrict vlSymsp) {
= vlTOPp->Vortex__DOT____Vcellinp__vx_fetch__in_thread_mask
[0U];
// ALWAYS at VX_warp.v:35
if (vlTOPp->Vortex__DOT__decode_change_mask) {
if (vlTOPp->Vortex__DOT__vx_fetch__DOT__warp_zero_change_mask) {
vlTOPp->Vortex__DOT__vx_fetch__DOT__VX_Warp__DOT__valid[1U]
= vlTOPp->Vortex__DOT__vx_fetch__DOT____Vcellinp__VX_Warp__in_thread_mask
[1U];
@ -4603,13 +4623,13 @@ VL_INLINE_OPT void VVortex::_combo__TOP__8(VVortex__Syms* __restrict vlSymsp) {
[0U];
}
vlTOPp->Vortex__DOT__vx_fetch__DOT____Vcellout__VX_Warp__out_valid[0U]
= ((IData)(vlTOPp->Vortex__DOT__decode_change_mask)
= ((IData)(vlTOPp->Vortex__DOT__vx_fetch__DOT__warp_zero_change_mask)
? vlTOPp->Vortex__DOT__vx_fetch__DOT____Vcellinp__VX_Warp__in_thread_mask
[0U] : ((~ (IData)(vlTOPp->Vortex__DOT__vx_fetch__DOT__stall))
& vlTOPp->Vortex__DOT__vx_fetch__DOT__VX_Warp__DOT__valid
[0U]));
vlTOPp->Vortex__DOT__vx_fetch__DOT____Vcellout__VX_Warp__out_valid[1U]
= ((IData)(vlTOPp->Vortex__DOT__decode_change_mask)
= ((IData)(vlTOPp->Vortex__DOT__vx_fetch__DOT__warp_zero_change_mask)
? vlTOPp->Vortex__DOT__vx_fetch__DOT____Vcellinp__VX_Warp__in_thread_mask
[1U] : ((~ (IData)(vlTOPp->Vortex__DOT__vx_fetch__DOT__stall))
& vlTOPp->Vortex__DOT__vx_fetch__DOT__VX_Warp__DOT__valid
@ -4747,7 +4767,6 @@ void VVortex::_ctor_var_reset() {
Vortex__DOT__decode_valid[__Vi0] = VL_RAND_RESET_I(1);
}}
Vortex__DOT__decode_clone_stall = VL_RAND_RESET_I(1);
Vortex__DOT__decode_change_mask = VL_RAND_RESET_I(1);
{ int __Vi0=0; for (; __Vi0<2; ++__Vi0) {
Vortex__DOT__decode_thread_mask[__Vi0] = VL_RAND_RESET_I(1);
}}
@ -4987,6 +5006,7 @@ void VVortex::_ctor_var_reset() {
{ int __Vi0=0; for (; __Vi0<2; ++__Vi0) {
Vortex__DOT__vx_fetch__DOT__warp_valid[__Vi0] = VL_RAND_RESET_I(1);
}}
Vortex__DOT__vx_fetch__DOT__warp_zero_change_mask = VL_RAND_RESET_I(1);
{ int __Vi0=0; for (; __Vi0<2; ++__Vi0) {
Vortex__DOT__vx_fetch__DOT____Vcellout__VX_Warp__out_valid[__Vi0] = VL_RAND_RESET_I(1);
}}

View file

@ -41,7 +41,6 @@ VL_MODULE(VVortex) {
VL_SIG8(Vortex__DOT__decode_branch_type,2,0);
VL_SIG8(Vortex__DOT__decode_jal,0,0);
VL_SIG8(Vortex__DOT__decode_clone_stall,0,0);
VL_SIG8(Vortex__DOT__decode_change_mask,0,0);
VL_SIG8(Vortex__DOT__execute_branch_stall,0,0);
VL_SIG8(Vortex__DOT__forwarding_fwd_stall,0,0);
VL_SIG8(Vortex__DOT__forwarding_src1_fwd,0,0);
@ -49,6 +48,7 @@ VL_MODULE(VVortex) {
VL_SIG8(Vortex__DOT__vx_fetch__DOT__stall,0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_num,1,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_state,1,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_zero_change_mask,0,0);
VL_SIG8(Vortex__DOT__vx_f_d_reg__DOT__warp_num,1,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_itype,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_csr,0,0);

Binary file not shown.

Binary file not shown.

View file

@ -10,7 +10,7 @@ S 1574 12890307906 1557343909 0 1557343909 0 "VX_defi
S 4267 12889457992 1557345117 0 1557345117 0 "VX_e_m_reg.v"
S 3405 12889457993 1557348460 0 1557348460 0 "VX_execute.v"
S 1751 12889457994 1557344924 0 1557344924 0 "VX_f_d_reg.v"
S 2030 12890309989 1557348839 0 1557348839 0 "VX_fetch.v"
S 2362 12890309989 1557358323 0 1557358323 0 "VX_fetch.v"
S 6293 12889457996 1557348346 0 1557348346 0 "VX_forwarding.v"
S 1866 12889457997 1557348551 0 1557348551 0 "VX_m_w_reg.v"
S 3847 12890309990 1557348518 0 1557348518 0 "VX_memory.v"
@ -18,12 +18,12 @@ S 1118 12889457999 1557354753 0 1557354753 0 "VX_regi
S 1428 12889458000 1557354772 0 1557354772 0 "VX_register_file_slave.v"
S 1499 12890308905 1557267602 0 1557267602 0 "VX_warp.v"
S 1568 12890307909 1557348531 0 1557348531 0 "VX_writeback.v"
S 18162 12890307910 1557354587 0 1557354587 0 "Vortex.v"
T 276843 12890339974 1557354791 0 1557354791 0 "obj_dir/VVortex.cpp"
T 16753 12890339973 1557354791 0 1557354791 0 "obj_dir/VVortex.h"
T 1800 12890339976 1557354791 0 1557354791 0 "obj_dir/VVortex.mk"
T 530 12890339972 1557354791 0 1557354791 0 "obj_dir/VVortex__Syms.cpp"
T 711 12890339971 1557354791 0 1557354791 0 "obj_dir/VVortex__Syms.h"
T 512 12890339977 1557354791 0 1557354791 0 "obj_dir/VVortex__ver.d"
T 0 0 1557354791 0 1557354791 0 "obj_dir/VVortex__verFiles.dat"
T 1159 12890339975 1557354791 0 1557354791 0 "obj_dir/VVortex_classes.mk"
S 18244 12890307910 1557357447 0 1557357447 0 "Vortex.v"
T 277561 12890339974 1557358338 0 1557358338 0 "obj_dir/VVortex.cpp"
T 16771 12890339973 1557358338 0 1557358338 0 "obj_dir/VVortex.h"
T 1800 12890339976 1557358338 0 1557358338 0 "obj_dir/VVortex.mk"
T 530 12890339972 1557358338 0 1557358338 0 "obj_dir/VVortex__Syms.cpp"
T 711 12890339971 1557358338 0 1557358338 0 "obj_dir/VVortex__Syms.h"
T 512 12890339977 1557358338 0 1557358338 0 "obj_dir/VVortex__ver.d"
T 0 0 1557358338 0 1557358338 0 "obj_dir/VVortex__verFiles.dat"
T 1159 12890339975 1557358338 0 1557358338 0 "obj_dir/VVortex_classes.mk"

View file

@ -3,5 +3,5 @@
# of forwarding stalls: 0
# of branch stalls: 0
# CPI: 1.0001
# time to simulate: 6.95313e-310 milliseconds
# time to simulate: 6.95312e-310 milliseconds
# GRADE: Failed on test: 0