Proper SIMT with fine-grain scheduler implemented

This commit is contained in:
felsabbagh3 2019-05-10 00:49:54 -07:00
parent 96dac5e1ce
commit 48468ed26a
27 changed files with 6080 additions and 3375 deletions

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -1,393 +1,375 @@
:0200000480007A
:10000000130510007310050213052000731015026C
:10001000731040F17310103037F1FF7FEF0080193B
:10002000EF10C06D73000000938B0600130D0700E6
:10000000130520007310050213052000731015025C
:10001000731040F17310103037F1FF7FEF00001BB9
:10002000EF10806A73000000938B0600130D070029
:10003000130F01009303050013051000635C7500A6
:1000400013010180130305006B5003001305150015
:100050006FF0DFFE13010F0013050000930F060081
:10006000938D0300EBE0BF01170500001305051B8E
:100070006B40050017030000130343FB6B000300F4
:1000800067800000170200011302022623200200ED
:100090002322120023242200232632002328420098
:1000A000232A5200232C6200232E72002320820276
:1000B000232292022324A2022326B2022328C20270
:1000C000232AD202232CE202232EF202232002054D
:1000D0002322120523242205232632052328420544
:1000E000232A5205232C6205232E72052320820722
:1000F000232292072324A2072326B2072328C2071C
:10010000232AD207232CE207232EF2071302100022
:1001100067800000170200011302021D0320020085
:1001200083204200032182008321C20003220201B6
:1001300083224201032382018323C201032402029A
:1001400083244202032582028325C202032602037E
:1001500083264203032782038327C2030328020462
:1001600083284204032982048329C204032A020546
:10017000832A4205032B8205832BC205032C02062A
:10018000832C4206032D8206832DC206032E02070E
:10019000832E4207032F8207832FC207130200001A
:1001A000678000007325000267800000732510023D
:1001B00067800000130101FE232E1100232C810013
:1001C00013040102232604FE6F0000030327C4FE6C
:1001D0009307404C3307F702B707008193874736F0
:1001E000B307F70013850700EF0040408327C4FEE4
:1001F000938717002326F4FE0327C4FE930770009D
:10020000E3D6E7FC130000008320C1010324810131
:100210001301010267800000130101FD2326110272
:10022000232481022322A1031304010313070D00D9
:100230009307404C3307F702B7070081938747368F
:10024000B307F70013850700EF00804B9307050005
:10025000638A070213070D00B707008113172700F1
:100260009387472CB307F7001307100023A0E7007C
:1002700093070D0063960700EFF0DFE96F0080053C
:100280007300000013070D009307404C3307F7027B
:10029000B707008193874736B307F700130784FD3C
:1002A0009305070013850700EF00003D832704FE38
:1002B000138107008327C4FD832584FD032644FEA4
:1002C000832684FE0327C4FE13850700EFF0DFD5E5
:1002D000730000008320C10203248102032D410228
:1002E0001301010367800000130101FB23261104A1
:1002F0002324810413040105EFF0DFEA2324A4FE84
:1003000093090100930710002326F4FE6F004008B4
:100310000327C4FE9307404C3307F702B707008159
:1003200093874736B307F70013850700EF00403D7A
:1003300093070500639807040327C4FE9307404C06
:100340003307F702B707008193874736B307F700F3
:10035000130704FD9305070013850700EF00C03164
:10036000832784FD13810700832744FD832504FD33
:100370000326C4FD832604FE032744FE13850700DD
:10038000EFF05FCF8327C4FE938717002326F4FE88
:100390008327C4FE032784FEE3ECE7F613810900FC
:1003A000EFF05FCE9307020063880704B707008170
:1003B00013854736EF00C03493070500639E07029C
:1003C000930784FB93850700B707008113854736A1
:1003D000EF00802A832704FC138107008327C4FBD6
:1003E000832584FB032644FC832684FC0327C4FC6A
:1003F00013850700EFF05FC3130000008320C104E2
:10040000032481041301010567800000130101FB2F
:10041000232611042324810413040105232EA4FAA6
:10042000232CB4FA232AC4FA2328D4FAEFF09FD855
:10043000EFF05FD72322A4FE13090100232604FE58
:10044000232404FE6F00C008B709FFFF3301310108
:10045000832784FE2326F4FC832784FB2328F4FCD3
:1004600093070100232AF4FC832744FB232CF4FC8C
:10047000832704FB232EF4FC8327C4FE2320F4FEF1
:100480000327C4FE9307404C3307F702B7070081E8
:1004900093874736B307F7001307C4FC930507009B
:1004A00013850700EF00C0168327C4FE938717004B
:1004B0002326F4FE8327C4FE032744FE63E4E700FB
:1004C000232604FE832784FE938717002324F4FE4B
:1004D000032784FE8327C4FBE368F7F613010900B2
:1004E000EFF09FE0130000008320C1040324810487
:1004F0001301010567800000130101FD232611028D
:100500002324810213040103232EA4FCEFF09FC9CE
:100510002320A4FE232604FE6F004005232604FEAC
:10052000232404FE6F00C003B7070081032784FE65
:10053000131727009387472CB307F70003A707007B
:10054000930710006318F7008327C4FE93871700F2
:100550002326F4FE832784FE938717002324F4FECA
:10056000832784FE032704FEE3E0E7FC0327C4FEA1
:100570008327C4FDE314F7FA232204FE6F008002F0
:10058000B7070081032744FE131727009387472CE2
:10059000B307F70023A00700832744FE93871700C3
:1005A0002322F4FE832744FE032704FEE3EAE7FC4C
:1005B000130000008320C102032481021301010300
:1005C00067800000130101FF2326810023247101AD
:1005D0001304010193870B00138507000324C10056
:1005E000832B810013010101678000009302050045
:1005F000130300009303700023A0620023A2620093
:1006000023A4620023A6720023A862006780000072
:100610009302050003A382001303130023A46200C6
:100620001383420183AE420093935E003303730051
:1006300003AE05002320C30103AE45002322C301FE
:1006400003AE85002324C30103AEC5002326C301E6
:1006500003AE05012328C30103AE4501232AC301CC
:10066000938E1E00130F20036394EE01930E00007F
:1006700023A2D201678000009302050003A3820039
:100680001303F3FF23A462001383420183AE02002D
:10069000930F2003138F0E00130F1F006314FF012D
:1006A000130F000023A0E20193935E003303730055
:1006B000032E030023A0C501032E430023A2C5017E
:1006C000032E830023A4C501032EC30023A6C50166
:1006D000032E030123A8C501032E430123AAC5014C
:1006E000678000009302050003A382001305000049
:1006F000130E200363146E001305150067800000BD
:100700009302050003A3820013050000130E0000EE
:1007100063146E0013051500678000009302050046
:1007200003A3C20083A3020133B563006780000006
:10073000130141FF232011002322B100834505004E
:1007400063880500EF00C001130515006FF01FFF5F
:1007500083200100832541001301C1006780000050
:10076000B708010023A0B80067800000130101FD55
:10077000232611022324810213040103232EA4FC47
:100780000327C4FD9307F00063E4E702B707008185
:100790000327C4FD1317270093874720B307F700EB
:1007A00083A7070013850700EFF09FF86F0040074D
:1007B000930700022326F4FEA30504FE8327C4FE4C
:1007C0009387C7FF0327C4FDB357F70093F7F700DC
:1007D0002322F4FE832744FE63860700930710005C
:1007E000A305F4FE8347B4FE63820702B7070081C6
:1007F000032744FE1317270093874720B307F7000A
:1008000083A7070013850700EFF09FF28327C4FE3C
:100810009387C7FF2326F4FE8327C4FEE340F0FA44
:100820008320C102032481021301010367800000B9
:10083000130101FE232E1100232C81001304010259
:100840002326A4FE2324B4FE0325C4FEEFF05FEEAE
:10085000032584FEEFF09FF1B70700811385070899
:10086000EFF01FED130000008320C101032481017C
:100870001301010267800000130101FD232611020C
:100880002324810213040103232EA4FC232CB4FC93
:10089000232AC4FC2328D4FCB73700810327C4FDD6
:1008A00023A2E79AB73700819387479A032784FDED
:1008B00023A2E700B73700819387479A032744FDB7
:1008C00023A4E700B73700819387479A032704FDE5
:1008D00023A6E700EFF09F8D2324A4FE032704FD49
:1008E000832784FEB357F7022326F4FE032704FD73
:1008F000832784FEB377F702638807008327C4FE4B
:10090000938717002326F4FE8325C4FEB7070081D2
:100910001385470CEFF0DFF1B73700819387479AD3
:100920000327C4FE23A8E700032704FD832784FED2
:100930006362F702B73700819386479AB717008042
:100940001386C79A832584FE032504FDEFF01FACB0
:100950006F000002B73700819386479AB71700806F
:100960001386C79A832504FD032504FDEFF01FAA13
:10097000EFF05F832322A4FE032704FD832744FEB8
:1009800063F8E700032544FEEFF01FB76F00C000D7
:10099000032504FDEFF05FB6130000008320C102C1
:1009A000032481021301010367800000130101FA8F
:1009B000232E1104232C8104130401062326A4FAF8
:1009C0002324B4FAEFF01FC0232CA4FC832784FD5A
:1009D00083A70700232AF4FC832784FD83A747000D
:1009E0002328F4FC832784FD83A787002326F4FCB7
:1009F000832784FD83A707012326F4FE0327C4FE73
:100A00008327C4FAB307F7022324F4FE8327C4FE26
:100A1000639A0700930710002326F4FE8327C4FA85
:100A20002324F4FE832784FD83A7C7002324F4FC3A
:100A3000232204FE6F004012232004FE232E04FC18
:100A40006F008007032784FA832784FCB307F7022B
:100A50000327C4FDB307F7002322F4FC032784FC1B
:100A60008327C4FDB307F702032784FEB307F7000B
:100A70002320F4FC832744FC93972700032744FD9D
:100A8000B307F70003A70700832704FC9397270009
:100A9000832604FDB387F60083A70700B307F70298
:100AA000032704FEB307F7002320F4FE8327C4FDC9
:100AB00093871700232EF4FC0327C4FD832784FCAF
:100AC000E362F7F8032784FA832784FC3307F702ED
:100AD000832784FEB307F700232EF4FA032784FE4E
:100AE000832784FCB337F70093F7F70F232CF4FA2E
:100AF000832784FB93B71700A30BF4FA834774FB97
:100B0000138F0700B7170080938F47B46B200F0037
:100B10007B70FF018327C4FB939727000327C4FC46
:100B2000B307F700032704FE23A0E700832784FE12
:100B3000938717002324F4FEB7170080138E87B421
:100B400067000E00130000006B300000832744FE96
:100B5000938717002322F4FE832744FE0327C4FE55
:100B6000E3ECE7EC130000008320C10503248105BA
:100B70001301010667800000130101FC232E1102FE
:100B8000232C810213040104232EA4FC232CB4FC87
:100B9000232AC4FC2328D4FC2326E4FCB737008195
:100BA0000327C4FD23ACE79AB73700819387879B5F
:100BB000032784FD23A2E700B73700819387879B33
:100BC000032744FD23A4E700B73700819387879B61
:100BD0000327C4FC23A6E700B73700819387879BD0
:100BE000032704FD23A8E700EFF04FDC2324A4FE35
:100BF0000327C4FC832784FEB357F7022326F4FEA1
:100C00000327C4FC832784FEB377F70263880700B9
:100C10008327C4FE938717002326F4FEB73700818D
:100C20009387879B0327C4FE23AAE7000327C4FCFE
:100C3000832784FE6362F702B73700819386879B20
:100C4000B7170080138607CB832584FE032504FD98
:100C5000EFF0CFFB6F000002B73700819386879BD0
:100C6000B7170080138607CB8325C4FC032504FD3A
:100C7000EFF0CFF9EFF00FD32322A4FE032704FDFA
:100C8000832744FE63F8E700032544FEEFF0DF8688
:100C90006F00C000032504FDEFF01F861300000065
:100CA0008320C10303248103130101046780000032
:100CB000130101FB232611042324810413040105DD
:100CC000232EA4FA232CB4FAEFF0DF8F2320A4FE06
:100CD000832704FE83A70700232EF4FC832704FE4A
:100CE00083A74700232CF4FC832704FE83A78700F7
:100CF000232AF4FC832704FE83A747012326F4FE5E
:100D00000327C4FE8327C4FBB307F7022324F4FEA2
:100D10008327C4FE639A0700930710002326F4FE7E
:100D20008327C4FB2324F4FE832704FE83A7C70084
:100D30002328F4FC232204FE6F00000C032784FB0D
:100D4000832704FD3307F702832784FEB307F700E8
:100D50002326F4FC032784FE832704FDB337F70022
:100D600093F7F70F2324F4FC832784FC93B7170031
:100D7000A303F4FC834774FC138F0700B7170080AC
:100D8000938F47DE6B200F007B70FF018327C4FC2D
:100D9000939727000327C4FDB307F70083A6070036
:100DA0008327C4FC93972700032784FDB307F7002C
:100DB00003A707008327C4FC93972700032644FD5D
:100DC000B307F6003387E60023A0E700832784FEFD
:100DD000938717002324F4FEB7170080138E87DE55
:100DE00067000E00130000006B300000832744FEF4
:100DF000938717002322F4FE832744FE0327C4FEB3
:100E0000E3EEE7F2130000008320C1040324810411
:100E10001301010567800000130101FC232E11025C
:100E2000232C810213040104232EA4FC232CB4FCE4
:100E3000232AC4FC2328D4FC2326E4FCB7370081F2
:100E40000327C4FD23ACE79AB73700819387879BBC
:100E5000032784FD23A2E700B73700819387879B90
:100E6000032744FD23A4E700B73700819387879BBE
:100E70000327C4FC23A6E700B73700819387879B2D
:100E8000032704FD23A8E700EFF04FB22324A4FEBC
:100E90000327C4FC832784FEB357F7022326F4FEFE
:100EA0000327C4FC832784FEB377F7026388070017
:100EB0008327C4FE938717002326F4FEB7370081EB
:100EC0009387879B0327C4FE23AAE7000327C4FC5C
:100ED000832784FE6362F702B73700819386879B7E
:100EE000B7170080138607F5832584FE032504FDCC
:100EF000EFF0CFD16F000002B73700819386879B58
:100F0000B7170080138607F58325C4FC032504FD6D
:100F1000EFF0CFCFEFF00FA92322A4FE032704FDAB
:100F2000832744FE63F8E700032544FEEFF0CFDC9F
:100F30006F00C000032504FDEFF00FDC130000007C
:100F40008320C1030324810313010104678000008F
:100F5000130101FB2326110423248104130401053A
:100F6000232EA4FA232CB4FAEFF0CFE52320A4FE1D
:100F7000832704FE83A70700232EF4FC832704FEA7
:100F800083A74700232CF4FC832704FE83A7870054
:100F9000232AF4FC832704FE83A747012326F4FEBB
:100FA0000327C4FE8327C4FBB307F7022324F4FE00
:100FB0008327C4FE639A0700930710002326F4FEDC
:100FC0008327C4FB2324F4FE832704FE83A7C700E2
:100FD0002328F4FC232204FE6F00000C032784FB6B
:100FE000832704FD3307F702832784FEB307F70046
:100FF0002326F4FC032784FE832704FDB337F70080
:1010000093F7F70F2324F4FC832784FC93B717008E
:10101000A303F4FC834774FC138F0700B717008009
:10102000938F47086B200F007B70FF018327C4FC60
:10103000939727000327C4FDB307F70083A6070093
:101040008327C4FC93972700032784FDB307F70089
:1010500003A707008327C4FC93972700032644FDBA
:10106000B307F6003387E64023A0E700832784FE1A
:10107000938717002324F4FEB7170080138E870888
:1010800067000E00130000006B300000832744FE51
:10109000938717002322F4FE832744FE0327C4FE10
:1010A000E3EEE7F2130000008320C104032481046F
:1010B0001301010567800000130101FC232E1102BA
:1010C000232C810213040104232EA4FC232CB4FC42
:1010D000232AC4FC2328D4FC2326E4FCB737008150
:1010E0000327C4FD23A8E79CB73700819387079D9A
:1010F000032784FD23A2E700B73700819387079D6C
:10110000032744FD23A4E700B73700819387079D99
:101110000327C4FC23A6E700B73700819387079D08
:10112000032704FD23A8E700EFF04F882324A4FE43
:101130000327C4FC832784FEB357F7022326F4FE5B
:101140000327C4FC832784FEB377F7026388070074
:101150008327C4FE938717002326F4FEB737008148
:101160009387079D0327C4FE23AAE7000327C4FC37
:10117000832784FE6362F702B73700819386079D59
:10118000B71700801386071F832584FE032504FDFF
:10119000EFF0CFA76F000002B73700819386079D5D
:1011A000B71700801386071F8325C4FC032504FDA1
:1011B000EFF0CFA5EFE01FFF2322A4FE032704FDDD
:1011C000832744FE63F8E700032544FEEFF0CFB227
:1011D0006F00C000032504FDEFF00FB21300000004
:1011E0008320C103032481031301010467800000ED
:1011F000130101FB23261104232481041304010598
:10120000232EA4FA232CB4FAEFF0CFBB2320A4FEA4
:10121000832704FE83A70700232EF4FC832704FE04
:1012200083A7470083A70700232CF4FC832704FE31
:1012300083A78700232AF4FC832704FE83A74701A2
:101240002326F4FE0327C4FE8327C4FBB307F7025B
:101250002324F4FE8327C4FE639A0700930710003B
:101260002326F4FE8327C4FB2324F4FE832704FEF5
:1012700083A7C7002328F4FC232204FE6F00000B81
:10128000032784FB832704FD3307F702832784FEAB
:10129000B307F7002326F4FC032784FE832704FD0D
:1012A000B337F70093F7F70F2324F4FC832784FC6C
:1012B00093B71700A303F4FC834774FC138F070054
:1012C000B7170080938F87316B200F007B70FF0171
:1012D0008327C4FC939727000327C4FDB307F700B7
:1012E00083A607008327C4FC93972700032744FDA8
:1012F000B307F700032784FD3387E60023A0E70048
:10130000832784FE938717002324F4FEB7170080F9
:10131000138EC73167000E00130000006B30000011
:10132000832744FE938717002322F4FE832744FE7D
:101330000327C4FEE3E6E7F4130000008320C104A2
:10134000032481041301010567800000130101FCDF
:10135000232E1102232C810213040104232EA4FC4A
:10136000232CB4FC232AC4FC2328D4FC2326E4FC2D
:10137000B73700810327C4FD23A8E79CB737008156
:101380009387079D032784FD23A2E700B7370081D9
:101390009387079D032744FD23A4E700B737008107
:1013A0009387079D0327C4FC23A6E700B737008176
:1013B0009387079D032704FD23A8E700EFE01FDFC5
:1013C0002324A4FE0327C4FC832784FEB357F7021B
:1013D0002326F4FE0327C4FC832784FEB377F70299
:1013E000638807008327C4FE938717002326F4FE33
:1013F000B73700819387079D0327C4FE23AAE70020
:101400000327C4FC832784FE6362F702B737008199
:101410009386079DB717008013864748832584FE6F
:10142000032504FDEFE09FFE6F000002B737008147
:101430009386079DB7170080138647488325C4FC11
:10144000032504FDEFE09FFCEFE0DFD52322A4FE9F
:10145000032704FD832744FE63F8E700032544FEC9
:10146000EFF08F896F00C000032504FDEFF0CF88F7
:10147000130000008320C10303248103130101042E
:1014800067800000130101FB23261104232481043B
:1014900013040105232EA4FA232CB4FAEFF08F9243
:1014A0002320A4FE832704FE83A70700232EF4FC39
:1014B000832704FE83A7470083A70700232CF4FC9F
:1014C000832704FE83A78700232AF4FC832704FED6
:1014D00083A747012326F4FE0327C4FE8327C4FB0A
:1014E000B307F7022324F4FE8327C4FE639A0700A0
:1014F000930710002326F4FE8327C4FB2324F4FE65
:10150000832704FE83A7C7002328F4FC232204FEBC
:101510006F00000B032784FB832704FD3307F702CA
:10152000832784FEB307F7002326F4FC032784FEF9
:10153000832704FDB337F70093F7F70F2324F4FC58
:10154000832784FC93B71700A303F4FC834774FC40
:10155000138F0700B7170080938FC75A6B200F00B7
:101560007B70FF018327C4FC939727000327C4FDEA
:10157000B307F70083A607008327C4FC93972700CF
:10158000032744FDB307F700032784FD3387E602F2
:1015900023A0E700832784FE938717002324F4FE0B
:1015A000B7170080138E075B67000E001300000062
:1015B0006B300000832744FE938717002322F4FE3C
:1015C000832744FE0327C4FEE3E6E7F4130000008C
:1015D0008320C104032481041301010567800000F6
:1015E000130101FE232E810013040102232604FEB1
:1015F0006F008004B73700810327C4FE131727004C
:101600009387879EB307F7001307300023A0E700F6
:10161000B74700810327C4FE131727009387879ECF
:10162000B307F7001307200023A0E7008327C4FEB9
:10163000938717002326F4FE0327C4FE9307F003C5
:10164000E3DAE7FA130000000324C10113010102E9
:1016500067800000130101FD23261102232481026B
:1016600013040103232EA4FCB7070081138507117F
:10167000EFF00F8C232604FE6F0080058327C4FE45
:10168000638E07008327C4FE93F7770063980700F3
:10169000B707008113854713EFF08F898327C4FEB6
:1016A000939727000327C4FDB307F70083A707001C
:1016B00013850700EFF08F8BB70700811385871321
:1016C000EFF00F878327C4FE938717002326F4FECD
:1016D0000327C4FE9307F003E3D2E7FAB7070081BC
:1016E0001385C713EFF0CF84130000008320C102DD
:1016F000032481021301010367800000130101FE2E
:10170000232E1100232C810013040102EFF05FED62
:1017100093068000B75700811386879EB7470081E4
:101720009385879EB73700811385879EEFF0CF940E
:10173000B707008113850716EFE09FFFB7570081B9
:101740001385879EEFF01FF113078000930680003A
:10175000B75700811386879EB74700819385879E80
:10176000B73700811385879EEFF00FC1B70700815F
:101770001385C717EFE0DFFBB75700811385879EFE
:10178000EFF05FED1307800093068000B7570081EC
:101790001386879EB74700819385879EB737008160
:1017A0001385879EEFF04FE7B70700811385071970
:1017B000EFE01FF8B75700811385879EEFF09FE990
:1017C000930730002326F4FE9305C4FE1307800020
:1017D00093068000B75700811386879EB757008114
:1017E0001385879EEFF05F8DB70700811385871AF9
:1017F000EFE01FF4B75700811385879EEFF09FE558
:101800009305C4FE1307800093068000B75700813C
:101810001386879EB75700811385879EEFF01FB30D
:10182000B70700811385871AEFE09FF0B757008153
:101830001385879EEFF01FE29307000013850700D2
:101840008320C1010324810113010102678000008C
:100050006FF0DFFE1300000013000000130000002B
:1000600013000000130000001300000013010F0034
:1000700013050000930F0600938D0300EBE0BF0112
:10008000170500001305051B6B4005001703000052
:100090001303C3F96B00030067800000170200011F
:1000A0001302022023200200232212002324220014
:1000B0002326320023284200232A5200232C6200E8
:1000C000232E720023208202232292022324A202E2
:1000D0002326B2022328C202232AD202232CE202C0
:1000E000232EF202232002052322120523242205B7
:1000F0002326320523284205232A5205232C620594
:10010000232E720523208207232292072324A2078D
:100110002326B2072328C207232AD207232CE2076B
:10012000232EF2071302100067800000170200015F
:1001300013020217032002008320420003218200E1
:100140008321C20003220201832242010323820190
:100150008323C20103240202832442020325820274
:100160008325C20203260203832642030327820358
:100170008327C2030328020483284204032982043C
:100180008329C204032A0205832A4205032B820520
:10019000832BC205032C0206832C4206032D820604
:1001A000832DC206032E0207832E4207032F8207E8
:1001B000832FC2071302000067800000732500022E
:1001C000678000007325100267800000130101FEA4
:1001D000232E1100232C810013040102232604FE88
:1001E0006F0000030327C4FE9307404C3307F70258
:1001F000B70700819387C731B307F700138507005E
:10020000EF0040408327C4FE938717002326F4FEA7
:100210000327C4FE93077000E3D6E7FC1300000039
:100220008320C101032481011301010267800000C2
:10023000130101FD23261102232481022322A1039D
:100240001304010313070D009307404C3307F70213
:10025000B70700819387C731B307F70013850700FD
:10026000EF00804B93070500638A070213070D0018
:10027000B7070081131727009387C727B307F70035
:100280001307100023A0E70093070D0063960700F3
:10029000EFF0DFE96F0080057300000013070D0029
:1002A0009307404C3307F702B70700819387C731A4
:1002B000B307F700130784FD9305070013850700B4
:1002C000EF00003D832704FE138107008327C4FD50
:1002D000832584FD032644FE832684FE0327C4FE73
:1002E00013850700EFF05FD4730000008320C10284
:1002F00003248102032D41021301010367800000E2
:10030000130101FB23261104232481041304010596
:10031000EFF0DFEA2324A4FE930901009307100005
:100320002326F4FE6F0040080327C4FE9307404CC9
:100330003307F702B70700819387C731B307F70088
:1003400013850700EF00403D9307050063980704FD
:100350000327C4FE9307404C3307F702B707008119
:100360009387C731B307F700130704FD9305070010
:1003700013850700EF00C031832784FD1381070038
:10038000832744FD832504FD0326C4FD832604FE44
:10039000032744FE13850700EFF05FCF8327C4FED9
:1003A000938717002326F4FE8327C4FE032784FEC9
:1003B000E3ECE7F613810900EFF05FCE930702004C
:1003C00063880704B70700811385C731EF00C03485
:1003D00093070500639E0702930784FB938507003C
:1003E000B70700811385C731EF00802A832704FCFB
:1003F000138107008327C4FB832584FB032644FC69
:10040000832684FC0327C4FC13850700EFF0DFC1BB
:10041000130000008320C10403248104130101059B
:1004200067800000130101FB2326110423248104AB
:1004300013040105232EA4FA232CB4FA232AC4FAA8
:100440002328D4FAEFF09FD8EFF05FD72322A4FE41
:1004500013090100232604FE232404FE6F00C008B4
:10046000B709FFFF33013101832784FE2326F4FC03
:10047000832784FB2328F4FC93070100232AF4FC40
:10048000832744FB232CF4FC832704FB232EF4FC5A
:100490008327C4FE2320F4FE0327C4FE9307404CA9
:1004A0003307F702B70700819387C731B307F70017
:1004B0001307C4FC9305070013850700EF00C0165F
:1004C0008327C4FE938717002326F4FE8327C4FEE8
:1004D000032744FE63E4E700232604FE832784FE0B
:1004E000938717002324F4FE032784FE8327C4FB8D
:1004F000E368F7F613010900EFF09FE01300000036
:100500008320C104032481041301010567800000D6
:10051000130101FD23261102232481021304010388
:10052000232EA4FCEFF09FC92320A4FE232604FE63
:100530006F004005232604FE232404FE6F00C00341
:10054000B7070081032784FE131727009387C72767
:10055000B307F70003A70700930710006318F7001D
:100560008327C4FE938717002326F4FE832784FE87
:10057000938717002324F4FE832784FE032704FEB9
:10058000E3E0E7FC0327C4FE8327C4FDE314F7FA86
:10059000232204FE6F008002B7070081032744FE78
:1005A000131727009387C727B307F70023A0070077
:1005B000832744FE938717002322F4FE832744FEFB
:1005C000032704FEE3EAE7FC130000008320C102D6
:1005D000032481021301010367800000130101FF5E
:1005E00023268100232471011304010193870B004A
:1005F000138507000324C100832B8100130101012F
:10060000678000009302050013030000930370004D
:1006100023A0620023A2620023A4620023A672002A
:1006200023A86200678000009302050003A38200F4
:100630001303130023A462001383420183AE42001C
:1006400093935E003303730003AE05002320C301C0
:1006500003AE45002322C30103AE85002324C3015A
:1006600003AEC5002326C30103AE05012328C30141
:1006700003AE4501232AC301938E1E00130F2003EE
:100680006394EE01930E000023A2D2016780000064
:100690009302050003A382001303F3FF23A4620067
:1006A0001383420183AE0200930F2003138F0E00C9
:1006B000130F1F006314FF01130F000023A0E201BA
:1006C00093935E0033037300032E030023A0C50140
:1006D000032E430023A2C501032E830023A4C501DA
:1006E000032EC30023A6C501032E030123A8C501C1
:1006F000032E430123AAC501678000009302050071
:1007000003A3820013050000130E200363146E0080
:1007100013051500678000009302050003A3820003
:1007200013050000130E000063146E00130515007E
:10073000678000009302050003A3C20083A30201A7
:1007400033B5630067800000130141FF23201100CF
:100750002322B1008345050063880500EF00C00136
:10076000130515006FF01FFF832001008325410052
:100770001301C10067800000B708010023A0B80082
:1007800067800000130101FD23261102232481024A
:1007900013040103232EA4FC0327C4FD9307F000D8
:1007A00063E4E702B70700810327C4FD131727009E
:1007B0009387C71BB307F70083A7070013850700BC
:1007C000EFF09FF86F004007930700022326F4FE26
:1007D000A30504FE8327C4FE9387C7FF0327C4FD38
:1007E000B357F70093F7F7002322F4FE832744FE64
:1007F0006386070093071000A305F4FE8347B4FE49
:1008000063820702B7070081032744FE13172700FE
:100810009387C71BB307F70083A70700138507005B
:10082000EFF09FF28327C4FE9387C7FF2326F4FED1
:100830008327C4FEE340F0FA8320C102032481022F
:100840001301010367800000130101FE232E110034
:10085000232C8100130401022326A4FE2324B4FECA
:100860000325C4FEEFF05FEE032584FEEFF09FF159
:10087000B707008113850708EFF01FED1300000094
:100880008320C1010324810113010102678000005C
:10089000130101FD23261102232481021304010305
:1008A000232EA4FC232CB4FC232AC4FC2328D4FC30
:1008B000B73700810327C4FD23AEE794B737008123
:1008C0009387C795032784FD23A2E700B7370081EC
:1008D0009387C795032744FD23A4E700B73700811A
:1008E0009387C795032704FD23A6E700EFF09F8DAC
:1008F0002324A4FE032704FD832784FEB357F702B5
:100900002326F4FE032704FD832784FEB377F70232
:10091000638807008327C4FE938717002326F4FE0D
:100920008325C4FEB70700811385470CEFF0DFF184
:10093000B73700819387C7950327C4FE23A8E70034
:10094000032704FD832784FE6362F702B737008123
:100950009386C795B71700801386479C832584FE2E
:10096000032504FDEFF01FAC6F000002B7370081D4
:100970009386C795B71700801386479C832504FD8F
:10098000032504FDEFF01FAAEFF05F832322A4FEEE
:10099000032704FD832744FE63F8E700032544FE94
:1009A000EFF01FB76F00C000032504FDEFF05FB646
:1009B000130000008320C1020324810213010103FC
:1009C00067800000130101FA232E1104232C8104F7
:1009D000130401062326A4FA2324B4FAEFF01FC05F
:1009E000232CA4FC832784FD83A70700232AF4FC7F
:1009F000832784FD83A747002328F4FC832784FDF5
:100A000083A787002326F4FC832784FD83A707019F
:100A10002326F4FE0327C4FE8327C4FAB307F70294
:100A20002324F4FE8327C4FE639A07009307100073
:100A30002326F4FE8327C4FA2324F4FE832784FDAF
:100A400083A7C7002324F4FC232204FE6F00800D3B
:100A5000232004FE232E04FC6F008007032784FA62
:100A6000832784FCB307F7020327C4FDB307F7000D
:100A70002322F4FC032784FC8327C4FDB307F70279
:100A8000032784FEB307F7002320F4FC832744FCEC
:100A900093972700032744FDB307F70003A7070038
:100AA000832704FC93972700832604FDB387F60071
:100AB00083A70700B307F702032704FEB307F70075
:100AC0002320F4FE8327C4FD93871700232EF4FC14
:100AD0000327C4FD832784FCE362F7F8032784FA25
:100AE000832784FC3307F702832784FEB307F700CC
:100AF000232EF4FA8327C4FB939727000327C4FC13
:100B0000B307F700032704FE23A0E700832784FE32
:100B1000938717002324F4FE832744FE938717004E
:100B20002322F4FE832744FE0327C4FEE3E2E7F218
:100B3000130000008320C105032481051301010671
:100B400067800000130101FC232E1102232C810277
:100B500013040104232EA4FC232CB4FC232AC4FC7C
:100B60002328D4FC2326E4FCB73700810327C4FDE7
:100B700023A8E796B737008193870797032784FD5B
:100B800023A2E700B737008193870797032744FD27
:100B900023A4E700B7370081938707970327C4FC96
:100BA00023A6E700B737008193870797032704FD43
:100BB00023A8E700EFF00FE12324A4FE0327C4FCE1
:100BC000832784FEB357F7022326F4FE0327C4FCD1
:100BD000832784FEB377F702638807008327C4FE68
:100BE000938717002326F4FEB73700819387079772
:100BF0000327C4FE23AAE7000327C4FC832784FE3F
:100C00006362F702B737008193860797B7170080B2
:100C10001386C7C7832584FE032504FDEFF09F805C
:100C20006F000002B737008193860797B7170080DF
:100C30001386C7C78325C4FC032504FDEFF08FFE90
:100C4000EFF0CFD72322A4FE032704FD832744FE21
:100C500063F8E700032544FEEFF09F8B6F00C000B0
:100C6000032504FDEFF0DF8A130000008320C10399
:100C7000032481031301010467800000130101FBB9
:100C8000232611042324810413040105232EA4FA2E
:100C9000232CB4FAEFF09F942320A4FE832704FEB4
:100CA00083A70700232EF4FC832704FE83A74700B5
:100CB000232CF4FC832704FE83A78700232AF4FC5B
:100CC000832704FE83A747012326F4FE0327C4FEDF
:100CD0008327C4FBB307F7022324F4FE8327C4FE53
:100CE000639A0700930710002326F4FE8327C4FBB2
:100CF0002324F4FE832704FE83A7C7002328F4FCE3
:100D0000232204FE6F00000C032784FB832704FDCD
:100D10003307F702832784FEB307F7002326F4FC8A
:100D2000032784FE832704FDB337F70093F7F70FFB
:100D30002324F4FC832784FC93B71700A303F4FC5B
:100D4000834774FC138F0700B7170080938F07DB6E
:100D50006B200F007B70FF018327C4FC9397270053
:100D60000327C4FDB307F70083A607008327C4FC4D
:100D700093972700032784FDB307F70003A7070015
:100D80008327C4FC93972700032644FDB307F6008E
:100D90003387E60023A0E700832784FE93871700AC
:100DA0002324F4FEB7170080138E47DB67000E0084
:100DB000130000006B300000832744FE9387170068
:100DC0002322F4FE832744FE0327C4FEE3EEE7F26A
:100DD000130000008320C1040324810413010105D2
:100DE00067800000130101FC232E1102232C8102D5
:100DF00013040104232EA4FC232CB4FC232AC4FCDA
:100E00002328D4FC2326E4FCB73700810327C4FD44
:100E100023A8E796B737008193870797032784FDB8
:100E200023A2E700B737008193870797032744FD84
:100E300023A4E700B7370081938707970327C4FCF3
:100E400023A6E700B737008193870797032704FDA0
:100E500023A8E700EFF00FB72324A4FE0327C4FC68
:100E6000832784FEB357F7022326F4FE0327C4FC2E
:100E7000832784FEB377F702638807008327C4FEC5
:100E8000938717002326F4FEB737008193870797CF
:100E90000327C4FE23AAE7000327C4FC832784FE9C
:100EA0006362F702B737008193860797B717008010
:100EB0001386C7F1832584FE032504FDEFF08FD64A
:100EC0006F000002B737008193860797B71700803D
:100ED0001386C7F18325C4FC032504FDEFF08FD4EE
:100EE000EFF0CFAD2322A4FE032704FD832744FEA9
:100EF00063F8E700032544FEEFF08FE16F00C000C8
:100F0000032504FDEFF0CFE0130000008320C103B0
:100F1000032481031301010467800000130101FB16
:100F2000232611042324810413040105232EA4FA8B
:100F3000232CB4FAEFF08FEA2320A4FE832704FECB
:100F400083A70700232EF4FC832704FE83A7470012
:100F5000232CF4FC832704FE83A78700232AF4FCB8
:100F6000832704FE83A747012326F4FE0327C4FE3C
:100F70008327C4FBB307F7022324F4FE8327C4FEB0
:100F8000639A0700930710002326F4FE8327C4FB0F
:100F90002324F4FE832704FE83A7C7002328F4FC40
:100FA000232204FE6F00000C032784FB832704FD2B
:100FB0003307F702832784FEB307F7002326F4FCE8
:100FC000032784FE832704FDB337F70093F7F70F59
:100FD0002324F4FC832784FC93B71700A303F4FCB9
:100FE000834774FC138F0700B7170080938F0705A2
:100FF0006B200F007B70FF018327C4FC93972700B1
:101000000327C4FDB307F70083A607008327C4FCAA
:1010100093972700032784FDB307F70003A7070072
:101020008327C4FC93972700032644FDB307F600EB
:101030003387E64023A0E700832784FE93871700C9
:101040002324F4FEB7170080138E470567000E00B7
:10105000130000006B300000832744FE93871700C5
:101060002322F4FE832744FE0327C4FEE3EEE7F2C7
:10107000130000008320C10403248104130101052F
:1010800067800000130101FC232E1102232C810232
:1010900013040104232EA4FC232CB4FC232AC4FC37
:1010A0002328D4FC2326E4FCB73700810327C4FDA2
:1010B00023A4E798B737008193878798032784FD97
:1010C00023A2E700B737008193878798032744FD61
:1010D00023A4E700B7370081938787980327C4FCD0
:1010E00023A6E700B737008193878798032704FD7D
:1010F00023A8E700EFF00F8D2324A4FE0327C4FCF0
:10110000832784FEB357F7022326F4FE0327C4FC8B
:10111000832784FEB377F702638807008327C4FE22
:10112000938717002326F4FEB737008193878798AB
:101130000327C4FE23AAE7000327C4FC832784FEF9
:101140006362F702B737008193868798B7170080EC
:101150001386C71B832584FE032504FDEFF08FACA7
:101160006F000002B737008193868798B717008019
:101170001386C71B8325C4FC032504FDEFF08FAA4B
:10118000EFF0CF832322A4FE032704FD832744FE30
:1011900063F8E700032544FEEFF08FB76F00C0004F
:1011A000032504FDEFF0CFB6130000008320C10338
:1011B000032481031301010467800000130101FB74
:1011C000232611042324810413040105232EA4FAE9
:1011D000232CB4FAEFF08FC02320A4FE832704FE53
:1011E00083A70700232EF4FC832704FE83A7470070
:1011F00083A70700232CF4FC832704FE83A7870022
:10120000232AF4FC832704FE83A747012326F4FE48
:101210000327C4FE8327C4FBB307F7022324F4FE8D
:101220008327C4FE639A0700930710002326F4FE69
:101230008327C4FB2324F4FE832704FE83A7C7006F
:101240002328F4FC232204FE6F00000B032784FBF9
:10125000832704FD3307F702832784FEB307F700D3
:101260002326F4FC032784FE832704FDB337F7000D
:1012700093F7F70F2324F4FC832784FC93B717001C
:10128000A303F4FC834774FC138F0700B717008097
:10129000938F472E6B200F007B70FF018327C4FCC8
:1012A000939727000327C4FDB307F70083A6070021
:1012B0008327C4FC93972700032744FDB307F70057
:1012C000032784FD3387E60023A0E700832784FEFD
:1012D000938717002324F4FEB7170080138E872E00
:1012E00067000E00130000006B300000832744FEEF
:1012F000938717002322F4FE832744FE0327C4FEAE
:10130000E3E6E7F4130000008320C1040324810412
:101310001301010567800000130101FC232E110257
:10132000232C810213040104232EA4FC232CB4FCDF
:10133000232AC4FC2328D4FC2326E4FCB7370081ED
:101340000327C4FD23A4E798B737008193878798C4
:10135000032784FD23A2E700B7370081938787988E
:10136000032744FD23A4E700B737008193878798BC
:101370000327C4FC23A6E700B7370081938787982B
:10138000032704FD23A8E700EFE0DFE32324A4FE06
:101390000327C4FC832784FEB357F7022326F4FEF9
:1013A0000327C4FC832784FEB377F7026388070012
:1013B0008327C4FE938717002326F4FEB7370081E6
:1013C000938787980327C4FE23AAE7000327C4FC5A
:1013D000832784FE6362F702B7370081938687987C
:1013E000B717008013860745832584FE032504FD77
:1013F000EFF04F836F000002B73700819386879824
:10140000B7170080138607458325C4FC032504FD18
:10141000EFF04F81EFE09FDA2322A4FE032704FDC3
:10142000832744FE63F8E700032544FEEFF04F8E68
:101430006F00C000032504FDEFF08F8D1300000046
:101440008320C1030324810313010104678000008A
:10145000130101FB23261104232481041304010535
:10146000232EA4FA232CB4FAEFF04F972320A4FEE6
:10147000832704FE83A70700232EF4FC832704FEA2
:1014800083A7470083A70700232CF4FC832704FECF
:1014900083A78700232AF4FC832704FE83A7470140
:1014A0002326F4FE0327C4FE8327C4FBB307F702F9
:1014B0002324F4FE8327C4FE639A070093071000D9
:1014C0002326F4FE8327C4FB2324F4FE832704FE93
:1014D00083A7C7002328F4FC232204FE6F00000B1F
:1014E000032784FB832704FD3307F702832784FE49
:1014F000B307F7002326F4FC032784FE832704FDAB
:10150000B337F70093F7F70F2324F4FC832784FC09
:1015100093B71700A303F4FC834774FC138F0700F1
:10152000B7170080938F87576B200F007B70FF01E8
:101530008327C4FC939727000327C4FDB307F70054
:1015400083A607008327C4FC93972700032744FD45
:10155000B307F700032784FD3387E60223A0E700E3
:10156000832784FE938717002324F4FEB717008097
:10157000138EC75767000E00130000006B30000089
:10158000832744FE938717002322F4FE832744FE1B
:101590000327C4FEE3E6E7F4130000008320C10440
:1015A000032481041301010567800000130101FE7B
:1015B000232E810013040102232604FE6F00800401
:1015C000B73700810327C4FE131727009387079AB4
:1015D000B307F7001307300023A0E700B7470081E7
:1015E0000327C4FE131727009387079AB307F70052
:1015F0001307200023A0E7008327C4FE938717006A
:101600002326F4FE0327C4FE9307F00FE3DAE7FA7C
:10161000130000000324C1011301010267800000D0
:10162000130101FD23261102232481021304010367
:10163000232EA4FCB707008113850711EFF0CF908C
:10164000232604FE6F0080058327C4FE638E0700F7
:101650008327C4FE93F7F70063980700B70700815C
:1016600013854713EFF04F8E8327C4FE939727000F
:101670000327C4FDB307F70083A7070013850700FE
:10168000EFF04F90B707008113858713EFF0CF8BF2
:101690008327C4FE938717002326F4FE0327C4FE86
:1016A0009307F00FE3D2E7FAB70700811385C7135A
:1016B000EFF08F89130000008320C1020324810210
:1016C0001301010367800000130101FF23261100AD
:1016D0002324810013040101EFF05FED9306000164
:1016E000B75700811386079AB74700819385079AF9
:1016F000B73700811385079AEFF08F99B7070081FC
:1017000013850716EFF04F84B75700811385079AAA
:10171000EFF01FF193070000138507008320C1003D
:0C17200003248100130101016780000018
:02000004810079
:10000000300000003100000032000000330000002A
:10001000340000003500000036000000370000000A
@ -412,27 +394,22 @@
:100140002D2D2D2D2D2D2D2D2D2D2D2D2D2D2D2DDF
:100150002D2D2D2D2D2D2D2D2D2D2D2D0A00000079
:100160000A0A4D6174726978206D756C7469706CDF
:1001700069636174696F6E0A000000000A0A4D61CC
:1001800074726978204164646974696F6E0A000052
:100190000A0A4D61747269782053756274726163E2
:1001A00074696F6E0A0000000A0A4D617472697802
:1001B00020456C656D656E74204164646974696F77
:0301C0006E0A00C4
:1001C4000000008104000081080000810C0000810F
:1001D4001000008114000081180000811C000081BF
:1001E4002000008124000081280000812C0000816F
:1001F4003000008134000081380000813C0000811F
:100204004000008144000081480000814C000081CE
:100214005000008154000081580000815C0000817E
:100224006000008164000081680000816C0000812E
:100234007000008174000081780000817C000081DE
:1002440084000081880000818C000081900000817E
:1002540094000081980000819C000081A00000812E
:10026400A4000081A8000081AC000081B0000081DE
:10027400B4000081B8000081BC000081C00000818E
:10028400D0000081D4000081D8000081DC0000810E
:10029400E0000081E4000081E8000081EC000081BE
:1002A400F0000081F4000081F8000081FC0000816E
:1002B4000001008104010081080100810C0100811A
:0901700069636174696F6E0A0095
:10017C000000008104000081080000810C00008157
:10018C001000008114000081180000811C00008107
:10019C002000008124000081280000812C000081B7
:1001AC003000008134000081380000813C00008167
:1001BC004000008144000081480000814C00008117
:1001CC005000008154000081580000815C000081C7
:1001DC006000008164000081680000816C00008177
:1001EC007000008174000081780000817C00008127
:1001FC0084000081880000818C00008190000081C7
:10020C0094000081980000819C000081A000008176
:10021C00A4000081A8000081AC000081B000008126
:10022C00B4000081B8000081BC000081C0000081D6
:10023C00D0000081D4000081D8000081DC00008156
:10024C00E0000081E4000081E8000081EC00008106
:10025C00F0000081F4000081F8000081FC000081B6
:10026C000001008104010081080100810C01008162
:040000058000000077
:00000001FF

View file

@ -74,21 +74,21 @@ void _vx_mat_mult(unsigned tid, unsigned wid)
unsigned total = 0;
for (unsigned place = 0; place < mat_dim; ++place)
{
unsigned x_i = (wid * mat_dim) + place;
unsigned y_i = (mat_dim * place) + i_index;
unsigned x_i = (wid * mat_dim) + place;
unsigned y_i = (mat_dim * place ) + i_index;
total += (x_ptr[x_i] * y_ptr[y_i]);
}
int final_i = (wid * mat_dim) + i_index;
unsigned cond = i_index < mat_dim;
__if(cond)
{
// unsigned cond = i_index < mat_dim;
// __if(cond)
// {
z_ptr[final_i] = total;
i_index++;
}
__else
__end_if
// }
// __else
// __end_if
}
// for (int z = 0; z < ((1000 * wid) + 1000); z++);

View file

@ -5,10 +5,10 @@ unsigned x[1024] = {0};
unsigned y[1024] = {0};
unsigned z[1024] = {0};
#define MAT_DIM 8
#define MAT_DIM 16
#define NUM_COLS 8
#define NUM_ROWS 8
#define NUM_COLS 16
#define NUM_ROWS 16
void initialize_mats()
{
@ -42,28 +42,28 @@ int main()
print_matrix(z);
// matrix addition
vx_mat_add(x, y, z, NUM_ROWS, NUM_COLS);
vx_print_str("\n\nMatrix Addition\n");
print_matrix(z);
// // matrix addition
// vx_mat_add(x, y, z, NUM_ROWS, NUM_COLS);
// vx_print_str("\n\nMatrix Addition\n");
// print_matrix(z);
// matrix sub
vx_mat_sub(x, y, z, NUM_ROWS, NUM_COLS);
vx_print_str("\n\nMatrix Subtraction\n");
print_matrix(z);
// // matrix sub
// vx_mat_sub(x, y, z, NUM_ROWS, NUM_COLS);
// vx_print_str("\n\nMatrix Subtraction\n");
// print_matrix(z);
unsigned scal = 3;
// unsigned scal = 3;
// matrix element add
vx_e_mat_add(z, &scal, z, NUM_ROWS, NUM_COLS);
vx_print_str("\n\nMatrix Element Addition\n");
print_matrix(z);
// // matrix element add
// vx_e_mat_add(z, &scal, z, NUM_ROWS, NUM_COLS);
// vx_print_str("\n\nMatrix Element Addition\n");
// print_matrix(z);
// matrix element add
vx_e_mat_mult(z, &scal, z, NUM_ROWS, NUM_COLS);
vx_print_str("\n\nMatrix Element Addition\n");
print_matrix(z);
// // matrix element add
// vx_e_mat_mult(z, &scal, z, NUM_ROWS, NUM_COLS);
// vx_print_str("\n\nMatrix Element Addition\n");
// print_matrix(z);
return 0;

View file

@ -6,7 +6,7 @@
.type _start, @function
.global _start
_start:
li a0, 1 # Num Warps
li a0, 2 # Num Warps
csrw 0x20, a0 # Setting the number of available warps
li a0, 2 # Num Threads
csrw 0x21, a0 # Setting the number of available threads
@ -31,11 +31,17 @@ loop_cond:
loop_body:
addi sp,sp,-2048 # Allocate 2k stack for new thread
mv t1, a0 # #lane = i
.word 0x3506b # clone register state
.word 0x3506b # clone register state
loop_inc:
addi a0, a0, 1
j loop_cond
loop_done:
nop
nop
nop
nop
nop
nop
mv sp,t5 # Restoring the stack
li a0,0 # setting tid = 0 for main thread
mv t6,a2 # setting func_addr

View file

@ -4,6 +4,7 @@
module VX_context (
input wire clk,
input wire in_warp,
input wire in_wb_warp,
input wire in_valid[`NT_M1:0],
input wire in_write_register,
input wire[4:0] in_rd,
@ -20,18 +21,26 @@ module VX_context (
output reg[31:0] out_a_reg_data[`NT_M1:0],
output reg[31:0] out_b_reg_data[`NT_M1:0],
output wire out_clone_stall
output wire out_clone_stall,
output wire[31:0] w0_t0_registers[31:0]
);
reg[5:0] state_stall;
initial begin
state_stall = 0;
end
wire[31:0] rd1_register[`NT_M1:0];
wire[31:0] rd2_register[`NT_M1:0];
/* verilator lint_off UNUSED */
wire[31:0] clone_regsiters[31:0];
/* verilator lint_on UNUSED */
assign w0_t0_registers = clone_regsiters;
VX_register_file vx_register_file_master(
.clk (clk),
.in_warp (in_warp),
.in_wb_warp (in_wb_warp),
.in_valid (in_valid[0]),
.in_write_register (in_write_register),
.in_rd (in_rd),
@ -52,6 +61,7 @@ module VX_context (
VX_register_file_slave vx_register_file_slave(
.clk (clk),
.in_warp (in_warp),
.in_wb_warp (in_wb_warp),
.in_valid (in_valid[index]),
.in_write_register (in_write_register),
.in_rd (in_rd),
@ -64,11 +74,10 @@ module VX_context (
.out_src1_data (rd1_register[index]),
.out_src2_data (rd2_register[index])
);
end
end
endgenerate
reg[5:0] state_stall = 0;
always @(posedge clk) begin
if ((in_is_clone) && state_stall == 0) begin
state_stall <= 10;

145
rtl/VX_context_slave.v Normal file
View file

@ -0,0 +1,145 @@
`include "VX_define.v"
module VX_context_slave (
input wire clk,
input wire in_warp,
input wire in_wb_warp,
input wire in_valid[`NT_M1:0],
input wire in_write_register,
input wire[4:0] in_rd,
input wire[31:0] in_write_data[`NT_M1:0],
input wire[4:0] in_src1,
input wire[4:0] in_src2,
input wire[31:0] in_curr_PC,
input wire in_is_clone,
input wire in_is_jal,
input wire in_src1_fwd,
input wire[31:0] in_src1_fwd_data[`NT_M1:0],
input wire in_src2_fwd,
input wire[31:0] in_src2_fwd_data[`NT_M1:0],
input wire[31:0] in_wspawn_regs[31:0],
input wire in_wspawn,
output reg[31:0] out_a_reg_data[`NT_M1:0],
output reg[31:0] out_b_reg_data[`NT_M1:0],
output wire out_clone_stall
);
wire[31:0] rd1_register[`NT_M1:0];
wire[31:0] rd2_register[`NT_M1:0];
/* verilator lint_off UNUSED */
wire[31:0] clone_regsiters[31:0];
/* verilator lint_on UNUSED */
reg[5:0] clone_state_stall = 0;
reg[5:0] wspawn_state_stall = 0;
initial begin
clone_state_stall = 0;
wspawn_state_stall = 0;
end
wire to_wspawn = wspawn_state_stall == 2;
// always @(*) begin
// if (to_wspawn)
// $display("-----> to_wspawn == 1");
// end
VX_register_file_master_slave vx_register_file_master(
.clk (clk),
.in_wb_warp (in_wb_warp),
.in_valid (in_valid[0]),
.in_write_register (in_write_register),
.in_rd (in_rd),
.in_data (in_write_data[0]),
.in_src1 (in_src1),
.in_src2 (in_src2),
.in_wspawn (in_wspawn),
.in_to_wspawn (to_wspawn),
.in_wspawn_regs (in_wspawn_regs),
.out_regs (clone_regsiters),
.out_src1_data (rd1_register[0]),
.out_src2_data (rd2_register[0])
);
genvar index;
generate
for (index=1; index < `NT; index=index+1)
begin: gen_code_label
wire to_clone;
assign to_clone = (index == rd1_register[0]) && (clone_state_stall == 1);
VX_register_file_slave vx_register_file_slave(
.clk (clk),
.in_warp (in_warp),
.in_wb_warp (in_wb_warp),
.in_valid (in_valid[index]),
.in_write_register (in_write_register),
.in_rd (in_rd),
.in_data (in_write_data[index]),
.in_src1 (in_src1),
.in_src2 (in_src2),
.in_clone (in_is_clone),
.in_to_clone (to_clone),
.in_regs (clone_regsiters),
.out_src1_data (rd1_register[index]),
.out_src2_data (rd2_register[index])
);
end
endgenerate
// always @(*) begin
// if (in_valid[0] && in_valid[1]) begin
// $display("Reg write: %h %h", in_write_data[0], in_write_data[1]);
// end else if (in_valid[0]) begin
// $display("Reg write: %h", in_write_data[0]);
// end
// end
// for clone
always @(posedge clk) begin
if ((in_is_clone) && clone_state_stall == 0) begin
clone_state_stall <= 10;
// $display("CLONEEE BITCH %d, 1 =? %h = %h -- %d", clone_state_stall, rd1_register[0], to_clone_1, in_is_clone);
end else if (clone_state_stall == 1) begin
// $display("ENDING CLONE, 1 =? %h = %h -- %d", rd1_register[0], to_clone_1, in_is_clone);
clone_state_stall <= 0;
end else if (clone_state_stall > 0) begin
clone_state_stall <= clone_state_stall - 1;
// $display("CLONEEE BITCH %d, 1 =? %h = %h -- %d", clone_state_stall, rd1_register[0], to_clone_1, in_is_clone);
end
end
// for wspawn
always @(posedge clk) begin
if ((in_wspawn) && wspawn_state_stall == 0) begin
wspawn_state_stall <= 10;
// $display("starting wspawn stalling -- in_wspawn: %d -- stall %d", in_wspawn, wspwan_stall);
end else if (wspawn_state_stall == 1) begin
// $display("ENDING wspawn stalling -- in_wspawn %d -- stall: %d", in_wspawn, wspwan_stall);
wspawn_state_stall <= 0;
end else if (wspawn_state_stall > 0) begin
wspawn_state_stall <= wspawn_state_stall - 1;
// $display("wspawn state: %d in_wspawn: %d -- stall: %d", wspawn_state_stall, in_wspawn, wspwan_stall);
end
end
genvar index_out_reg;
generate
for (index_out_reg = 0; index_out_reg < `NT; index_out_reg = index_out_reg + 1)
begin
assign out_a_reg_data[index_out_reg] = ( (in_is_jal == 1'b1) ? in_curr_PC : ((in_src1_fwd == 1'b1) ? in_src1_fwd_data[index_out_reg] : rd1_register[index_out_reg]));
assign out_b_reg_data[index_out_reg] = (in_src2_fwd == 1'b1) ? in_src2_fwd_data[index_out_reg] : rd2_register[index_out_reg];
end
endgenerate
wire clone_stall = ((clone_state_stall == 0) && in_is_clone) || ((clone_state_stall != 1) && in_is_clone);
wire wspwan_stall = ((wspawn_state_stall == 0) && in_wspawn) || (wspawn_state_stall > 1);
assign out_clone_stall = clone_stall || wspwan_stall;
endmodule

View file

@ -3,55 +3,55 @@
module VX_decode(
// Fetch Inputs
input wire clk,
input wire[31:0] in_instruction,
input wire[31:0] in_curr_PC,
input wire in_valid[`NT_M1:0],
input wire clk,
input wire[31:0] in_instruction,
input wire[31:0] in_curr_PC,
input wire in_valid[`NT_M1:0],
// WriteBack inputs
input wire[31:0] in_write_data[`NT_M1:0],
input wire[4:0] in_rd,
input wire[1:0] in_wb,
input wire in_wb_valid[`NT_M1:0],
input wire[`NW_M1:0] in_wb_warp_num,
input wire[31:0] in_write_data[`NT_M1:0],
input wire[4:0] in_rd,
input wire[1:0] in_wb,
input wire in_wb_valid[`NT_M1:0],
input wire[`NW_M1:0] in_wb_warp_num,
// FORWARDING INPUTS
input wire in_src1_fwd,
input wire[31:0] in_src1_fwd_data[`NT_M1:0],
input wire in_src2_fwd,
input wire[31:0] in_src2_fwd_data[`NT_M1:0],
input wire[`NW_M1:0] in_warp_num,
output wire[11:0] out_csr_address,
output wire out_is_csr,
output wire[31:0] out_csr_mask,
input wire in_src1_fwd,
input wire[31:0] in_src1_fwd_data[`NT_M1:0],
input wire in_src2_fwd,
input wire[31:0] in_src2_fwd_data[`NT_M1:0],
input wire[`NW_M1:0] in_warp_num,
output wire[11:0] out_csr_address,
output wire out_is_csr,
output wire[31:0] out_csr_mask,
// Outputs
output wire[4:0] out_rd,
output wire[4:0] out_rs1,
output wire[4:0] out_rs2,
output wire[31:0] out_a_reg_data[`NT_M1:0],
output wire[31:0] out_b_reg_data[`NT_M1:0],
output wire[1:0] out_wb,
output wire[4:0] out_alu_op,
output wire out_rs2_src,
output reg[31:0] out_itype_immed,
output wire[2:0] out_mem_read,
output wire[2:0] out_mem_write,
output reg[2:0] out_branch_type,
output reg out_branch_stall,
output reg out_jal,
output reg[31:0] out_jal_offset,
output reg[19:0] out_upper_immed,
output wire[31:0] out_PC_next,
output reg out_clone_stall,
output wire out_change_mask,
output wire out_thread_mask[`NT_M1:0],
output wire out_valid[`NT_M1:0],
output wire[`NW_M1:0] out_warp_num
output wire[4:0] out_rd,
output wire[4:0] out_rs1,
output wire[4:0] out_rs2,
output wire[31:0] out_a_reg_data[`NT_M1:0],
output wire[31:0] out_b_reg_data[`NT_M1:0],
output wire[1:0] out_wb,
output wire[4:0] out_alu_op,
output wire out_rs2_src,
output reg[31:0] out_itype_immed,
output wire[2:0] out_mem_read,
output wire[2:0] out_mem_write,
output reg[2:0] out_branch_type,
output reg out_branch_stall,
output reg out_jal,
output reg[31:0] out_jal_offset,
output reg[19:0] out_upper_immed,
output wire[31:0] out_PC_next,
output reg out_clone_stall,
output wire out_change_mask,
output wire out_thread_mask[`NT_M1:0],
output wire out_valid[`NT_M1:0],
output wire[`NW_M1:0] out_warp_num,
output wire out_wspawn,
output wire[31:0] out_wspawn_pc,
output wire out_ebreak
);
wire[6:0] curr_opcode;
@ -73,6 +73,7 @@ module VX_decode(
wire is_clone;
wire is_jalrs;
wire is_jmprt;
wire is_wspawn;
wire write_register;
@ -110,11 +111,28 @@ module VX_decode(
reg[4:0] alu_op;
reg[4:0] mul_alu;
wire context_zero_valid = (in_wb_warp_num == 0);
wire[31:0] w0_t0_registers[31:0];
VX_context VX_Context(
wire context_zero_valid = (in_wb_warp_num == 0);
wire[31:0] zero_a_reg_data[`NT_M1:0];
wire[31:0] zero_b_reg_data[`NT_M1:0];
reg zero_clone_stall;
// always @(*) begin
// $display("DECODE WARP: %h", in_warp_num);
// end
wire curr_warp_zero = in_warp_num == 0;
wire curr_warp_one = in_warp_num == 1;
// always @(*) begin
// $display("DECODE WARP: %h PC: %h",in_warp_num, in_curr_PC);
// end
VX_context VX_Context_zero(
.clk (clk),
.in_warp (context_zero_valid),
.in_warp (curr_warp_zero),
.in_wb_warp (context_zero_valid),
.in_valid (in_wb_valid),
.in_rd (in_rd),
.in_src1 (out_rs1),
@ -128,13 +146,52 @@ module VX_decode(
.in_src2_fwd_data (in_src2_fwd_data),
.in_write_register(write_register),
.in_write_data (in_write_data),
.out_a_reg_data (out_a_reg_data),
.out_b_reg_data (out_b_reg_data),
.out_clone_stall (out_clone_stall)
);
.out_a_reg_data (zero_a_reg_data),
.out_b_reg_data (zero_b_reg_data),
.out_clone_stall (zero_clone_stall),
.w0_t0_registers (w0_t0_registers)
);
wire context_one_valid = (in_wb_warp_num == 1);
wire[31:0] one_a_reg_data[`NT_M1:0];
wire[31:0] one_b_reg_data[`NT_M1:0];
reg one_clone_stall;
VX_context_slave VX_Context_one(
.clk (clk),
.in_warp (curr_warp_one),
.in_wb_warp (context_one_valid),
.in_valid (in_wb_valid),
.in_rd (in_rd),
.in_src1 (out_rs1),
.in_src2 (out_rs2),
.in_curr_PC (in_curr_PC),
.in_is_clone (is_clone),
.in_is_jal (is_jal),
.in_src1_fwd (in_src1_fwd),
.in_src1_fwd_data (in_src1_fwd_data),
.in_src2_fwd (in_src2_fwd),
.in_src2_fwd_data (in_src2_fwd_data),
.in_write_register(write_register),
.in_write_data (in_write_data),
.in_wspawn_regs (w0_t0_registers),
.in_wspawn (is_wspawn),
.out_a_reg_data (one_a_reg_data),
.out_b_reg_data (one_b_reg_data),
.out_clone_stall (one_clone_stall)
);
assign out_a_reg_data = curr_warp_zero ? zero_a_reg_data : one_a_reg_data;
assign out_b_reg_data = curr_warp_zero ? zero_b_reg_data : one_b_reg_data;
assign out_clone_stall = zero_clone_stall || one_clone_stall;
// always @(*) begin
// if (context_one_valid) begin
// $display("PC: %h -> src1: %h\tsrc2: %h",in_curr_PC, one_a_reg_data[0], one_b_reg_data[0]);
// end
// end
assign out_warp_num = in_warp_num;
assign out_valid = in_valid;
assign out_valid = in_valid;
assign write_register = (in_wb != 2'h0) ? (1'b1) : (1'b0);
@ -171,6 +228,10 @@ module VX_decode(
assign is_clone = is_gpgpu && (func3 == 5);
assign is_jalrs = is_gpgpu && (func3 == 6);
assign is_jmprt = is_gpgpu && (func3 == 4);
assign is_wspawn = is_gpgpu && (func3 == 0);
assign out_wspawn = is_wspawn;
assign out_wspawn_pc = out_a_reg_data[0];
// always @(*) begin
// if (is_jalrs) begin
@ -259,7 +320,7 @@ module VX_decode(
case(curr_opcode)
`LUI_INST: out_upper_immed = {func7, out_rs2, out_rs1, func3};
`AUIPC_INST: out_upper_immed = {func7, out_rs2, out_rs1, func3};
default: out_upper_immed = 20'h0;
default: out_upper_immed = 20'h0;
endcase // curr_opcode
end
@ -306,6 +367,7 @@ module VX_decode(
end
`SYS_INST:
begin
// $display("SYS EBREAK %h", (jal_sys_jal && in_valid[0]) );
out_jal = jal_sys_jal && in_valid[0];
out_jal_offset = jal_sys_off;
end
@ -317,6 +379,13 @@ module VX_decode(
endcase
end
wire is_ebreak;
assign is_ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && in_valid[0]);
assign out_ebreak = is_ebreak;
// CSR

View file

@ -68,6 +68,13 @@ module VX_execute (
end
endgenerate
// always @(*) begin
// if ((in_alu_op == `MUL) && (in_warp_num == 1)) begin
// $display("@PC: %h ---> %d * %d = %d\t%d * %d = %d", in_curr_PC, in_a_reg_data[0], in_b_reg_data[0], out_alu_result[0], in_a_reg_data[1], in_b_reg_data[1], out_alu_result[1]);
// end
// end
assign out_jal_dest = $signed(in_a_reg_data[0]) + $signed(in_jal_offset);
assign out_jal = in_jal;

View file

@ -2,30 +2,34 @@
`include "VX_define.v"
module VX_fetch (
input wire clk,
input wire reset,
input wire in_branch_dir,
input wire in_freeze,
input wire[31:0] in_branch_dest,
input wire in_branch_stall,
input wire in_fwd_stall,
input wire in_branch_stall_exe,
input wire in_clone_stall,
input wire in_jal,
input wire[31:0] in_jal_dest,
input wire in_interrupt,
input wire in_debug,
input wire[31:0] in_instruction,
input wire in_thread_mask[`NT_M1:0],
input wire in_change_mask,
input wire clk,
input wire reset,
input wire in_branch_dir,
input wire in_freeze,
input wire[31:0] in_branch_dest,
input wire in_branch_stall,
input wire in_fwd_stall,
input wire in_branch_stall_exe,
input wire in_clone_stall,
input wire in_jal,
input wire[31:0] in_jal_dest,
input wire in_interrupt,
input wire in_debug,
input wire[31:0] in_instruction,
input wire in_thread_mask[`NT_M1:0],
input wire in_change_mask,
input wire[`NW_M1:0] in_decode_warp_num,
input wire[`NW_M1:0] in_memory_warp_num,
input wire in_wspawn,
input wire[31:0] in_wspawn_pc,
input wire in_ebreak,
output wire[31:0] out_instruction,
output wire out_delay,
output wire[31:0] out_instruction,
output wire out_delay,
output wire[`NW_M1:0] out_warp_num,
output wire[31:0] out_curr_PC,
output wire out_valid[`NT_M1:0]
output wire[31:0] out_curr_PC,
output wire out_valid[`NT_M1:0],
output wire out_ebreak
);
reg stall;
@ -39,42 +43,98 @@ module VX_fetch (
warp_state = 0;
end
wire add_warp = in_wspawn && !in_ebreak && !in_clone_stall;
wire remove_warp = in_ebreak && !in_wspawn && !in_clone_stall;
always @(posedge clk or posedge reset) begin
if (reset || (warp_num == warp_state)) begin
warp_num <= 0;
if (reset || (warp_num == warp_state) || remove_warp || add_warp) begin
warp_num <= 0;
end else begin
warp_num <= warp_num + 1;
warp_num <= warp_num + 1;
end
if (add_warp) begin
// $display("Adding a new warp %h", warp_state);
warp_state <= warp_state + 1;
end else if (remove_warp) begin
// $display("Removing a warp %h", warp_state);
warp_state <= warp_state - 1;
end
end
assign out_ebreak = (warp_state == 0) && in_ebreak;
assign stall = in_clone_stall || in_branch_stall || in_fwd_stall || in_branch_stall_exe || in_interrupt || in_freeze || in_debug;
wire[31:0] warp_pc;
wire warp_valid[`NT_M1:0];
wire warp_zero_change_mask = in_change_mask && (in_decode_warp_num == 0);
wire warp_zero_jal = in_jal && (in_memory_warp_num == 0);
wire warp_zero_branch = in_branch_dir && (in_memory_warp_num == 0);
wire warp_zero_stall = stall || (warp_num == 1);
wire warp_zero_wspawn = 0;
wire[31:0] warp_zero_wspawn_pc = 32'h0;
wire warp_zero_change_mask = in_change_mask && (in_decode_warp_num == 0);
wire warp_zero_jal = in_jal && (in_memory_warp_num == 0);
wire warp_zero_branch = in_branch_dir && (in_memory_warp_num == 0);
VX_warp VX_Warp(
wire[31:0] warp_zero_pc;
wire warp_zero_valid[`NT_M1:0];
VX_warp VX_Warp_zero(
.clk (clk),
.reset (reset),
.stall (stall),
.stall (warp_zero_stall),
.in_thread_mask(in_thread_mask),
.in_change_mask(warp_zero_change_mask),
.in_jal (warp_zero_jal),
.in_jal_dest (in_jal_dest),
.in_branch_dir (warp_zero_branch),
.in_branch_dest(in_branch_dest),
.out_PC (warp_pc),
.out_valid (warp_valid)
.in_wspawn (warp_zero_wspawn),
.in_wspawn_pc (warp_zero_wspawn_pc),
.out_PC (warp_zero_pc),
.out_valid (warp_zero_valid)
);
assign out_PC = warp_pc;
wire warp_one_change_mask = in_change_mask && (in_decode_warp_num == 1);
wire warp_one_jal = in_jal && (in_memory_warp_num == 1);
wire warp_one_branch = in_branch_dir && (in_memory_warp_num == 1);
wire warp_one_stall = stall || (warp_num == 0);
wire[31:0] warp_one_pc;
wire warp_one_valid[`NT_M1:0];
VX_warp VX_Warp_one(
.clk (clk),
.reset (reset),
.stall (warp_one_stall),
.in_thread_mask(in_thread_mask),
.in_change_mask(warp_one_change_mask),
.in_jal (warp_one_jal),
.in_jal_dest (in_jal_dest),
.in_branch_dir (warp_one_branch),
.in_branch_dest(in_branch_dest),
.in_wspawn (in_wspawn),
.in_wspawn_pc (in_wspawn_pc),
.out_PC (warp_one_pc),
.out_valid (warp_one_valid)
);
// always @(*) begin
// if (in_wspawn) begin
// $display("Spawning a warp @ %h",in_wspawn_pc);
// end
// end
// always @(posedge clk) begin
// $display("curr warp: %h Threads:%d%d PC: %h", warp_num, out_valid[0],out_valid[1], out_PC);
// end
// always @(*) begin
// if (warp_num == 1) begin
// $display("Going to PC: %h", warp_one_pc);
// end
// end
assign out_PC = (warp_num == 0) ? warp_zero_pc : warp_one_pc;
assign out_valid = (warp_num == 0) ? warp_zero_valid : warp_one_valid;
// always @(*) begin
// $display("FETCH PC: %h (%h, %h, %h)",delete, delete, in_jal_dest, in_branch_dest);
@ -82,9 +142,9 @@ module VX_fetch (
assign out_curr_PC = out_PC;
assign out_valid = warp_valid;
assign out_warp_num = warp_num;
assign out_delay = 0;
assign out_instruction = stall ? 32'b0 : in_instruction;

View file

@ -65,6 +65,12 @@ module VX_memory (
assign out_cache_driver_in_data = in_rd2;
assign out_cache_driver_in_valid = in_valid;
// always @(*) begin
// if (in_valid[0] && (in_mem_write == `SW_MEM_WRITE) && (in_alu_result[0] >= 32'h810049a0)) begin
// $display("SW$ PC: %h - Warp: %h -> [%h]%h = %h || [%h]%h = %h",in_curr_PC, in_warp_num, in_valid[0], in_alu_result[0], in_rd2[0], in_valid[1], in_alu_result[1], in_rd2[1]);
// end
// end
// wire[31:0] sm_out_data[`NT_M1:0];
@ -113,7 +119,13 @@ module VX_memory (
end
`BLT: out_branch_dir = (in_alu_result[0][31] == 0) ? `NOT_TAKEN : `TAKEN;
`BGT: out_branch_dir = (in_alu_result[0][31] == 0) ? `TAKEN : `NOT_TAKEN;
`BLTU: out_branch_dir = (in_alu_result[0][31] == 0) ? `NOT_TAKEN : `TAKEN;
`BLTU:
begin
out_branch_dir = (in_alu_result[0][31] == 0) ? `NOT_TAKEN : `TAKEN;
if (in_warp_num == 1) begin
// $display("BLTU PC:%h : %d < %d = %d", in_curr_PC, in_rs1, in_rs2, (in_alu_result[0][31] == 0));
end
end
`BGTU: out_branch_dir = (in_alu_result[0][31] == 0) ? `TAKEN : `NOT_TAKEN;
`NO_BRANCH: out_branch_dir = `NOT_TAKEN;
default: out_branch_dir = `NOT_TAKEN;

View file

@ -2,7 +2,7 @@
module VX_register_file (
input wire clk,
input wire in_warp,
input wire in_wb_warp,
input wire in_valid,
input wire in_write_register,
input wire[4:0] in_rd,
@ -30,6 +30,11 @@ module VX_register_file (
// end
// end
// always @(*) begin
// $display("TID: %d: %h",10,registers[10]);
// $display("WID: %d: %h",11,registers[11]);
// end
assign out_regs = registers;
assign write_data = in_data;
@ -38,7 +43,7 @@ module VX_register_file (
assign write_enable = (in_write_register && (in_rd != 5'h0)) && in_valid;
always @(posedge clk) begin
if(write_enable && in_warp) begin
if(write_enable && in_wb_warp) begin
// $display("RF: Writing %h to %d",write_data, write_register);
registers[write_register] <= write_data;
end

View file

@ -0,0 +1,72 @@
module VX_register_file_master_slave (
input wire clk,
input wire in_wb_warp,
input wire in_valid,
input wire in_write_register,
input wire[4:0] in_rd,
input wire[31:0] in_data,
input wire[4:0] in_src1,
input wire[4:0] in_src2,
input wire in_wspawn,
input wire in_to_wspawn,
input wire[31:0] in_wspawn_regs[31:0],
output reg[31:0] out_src1_data,
output reg[31:0] out_src2_data,
output wire[31:0] out_regs[31:0]
);
reg[31:0] registers[31:0];
wire[31:0] write_data;
wire[4:0] write_register;
wire write_enable;
assign out_regs = registers;
// reg[5:0] i;
// always @(posedge clk) begin
// for (i = 0; i < 32; i++) begin
// $display("%d: %h",i, registers[i[4:0]]);
// end
// end
// integer i;
assign write_data = in_data;
assign write_register = in_rd;
// always @(*) begin
// $display("TID: %d: %h",10,registers[10]);
// $display("WID: %d: %h",11,registers[11]);
// end
assign write_enable = (in_write_register && (in_rd != 5'h0)) && in_valid && in_wb_warp;
always @(posedge clk) begin
if(write_enable && !in_wspawn) begin
// $display("RF: Writing %h to %d",write_data, write_register);
registers[write_register] <= write_data;
end else if (in_wspawn && in_to_wspawn) begin
// $display("WSPAWN IN MASTER SLAVE");
registers <= in_wspawn_regs;
end
end
// always @(posedge clk) begin
// for (i = 0; i < 32; i = i + 1)
// $display("(%d): %x", i, registers[i]);
// end
always @(negedge clk) begin
out_src1_data <= registers[in_src1];
out_src2_data <= registers[in_src2];
end
endmodule

View file

@ -6,6 +6,7 @@
module VX_register_file_slave (
input wire clk,
input wire in_warp,
input wire in_wb_warp,
input wire in_valid,
input wire in_write_register,
input wire[4:0] in_rd,
@ -37,17 +38,23 @@ module VX_register_file_slave (
// integer i;
// always @(*) begin
// if (in_warp) begin
// $display("TID: %d: %h",10,registers[10]);
// $display("WID: %d: %h",11,registers[11]);
// end
// end
assign write_data = in_data;
assign write_register = in_rd;
assign write_enable = (in_write_register && (in_rd != 5'h0)) && in_valid;
assign write_enable = (in_write_register && (in_rd != 5'h0)) && in_valid && in_wb_warp;
always @(posedge clk) begin
if(write_enable && !in_clone && in_warp) begin
if(write_enable && !in_clone) begin
// $display("RF: Writing %h to %d",write_data, write_register);
registers[write_register] <= write_data;
end else if (in_clone && in_to_clone) begin
// $display("CLONING IN SLAVE");
end else if (in_clone && in_to_clone && in_warp) begin
registers <= in_regs;
end
end

View file

@ -11,7 +11,8 @@ module VX_warp (
input wire[31:0] in_jal_dest,
input wire in_branch_dir,
input wire[31:0] in_branch_dest,
input wire in_wspawn,
input wire[31:0] in_wspawn_pc,
output wire[31:0] out_PC,
output wire out_valid[`NT_M1:0]
@ -62,7 +63,10 @@ module VX_warp (
always @(posedge clk or posedge reset) begin
if (reset) begin
real_PC <= 0;
end else if (stall == 1'b0) begin
end else if (in_wspawn == 1'b1) begin
// $display("Inside warp ***** Spawn @ %H",in_wspawn_pc);
real_PC <= in_wspawn_pc;
end else if (!stall) begin
real_PC <= use_PC + 32'h4;
end else begin
real_PC <= use_PC;

View file

@ -13,7 +13,8 @@ module Vortex(
output wire[2:0] out_cache_driver_in_mem_read,
output wire[2:0] out_cache_driver_in_mem_write,
output wire out_cache_driver_in_valid[`NT_M1:0],
output wire[31:0] out_cache_driver_in_data[`NT_M1:0]
output wire[31:0] out_cache_driver_in_data[`NT_M1:0],
output wire out_ebreak
);
// wire[31:0] in_cache_driver_out_data[`NT_M1:0];
@ -25,11 +26,12 @@ module Vortex(
assign curr_PC = fetch_curr_PC;
// From fetch
wire[31:0] fetch_instruction;
wire fetch_delay;
wire[31:0] fetch_curr_PC;
wire fetch_valid[`NT_M1:0];
wire[31:0] fetch_instruction;
wire fetch_delay;
wire[31:0] fetch_curr_PC;
wire fetch_valid[`NT_M1:0];
wire[`NW_M1:0] fetch_warp_num;
wire fetch_ebreak;
// From f_d_register
wire[31:0] f_d_instruction;
@ -62,7 +64,10 @@ wire decode_valid[`NT_M1:0];
wire decode_clone_stall;
wire decode_change_mask;
wire decode_thread_mask[`NT_M1:0];
wire[`NW_M1:0] decode_warp_num;
wire[`NW_M1:0] decode_warp_num;
wire decode_wspawn;
wire[31:0] decode_wspawn_pc;
wire decode_ebreak;
// From d_e_register
wire[11:0] d_e_csr_address;
@ -193,7 +198,7 @@ wire debug;
assign debug = 1'b0;
assign interrupt = 1'b0;
assign total_freeze = fetch_delay || memory_delay;
assign out_ebreak = fetch_ebreak;
VX_fetch vx_fetch(
.clk (clk),
@ -214,12 +219,16 @@ VX_fetch vx_fetch(
.in_change_mask (decode_change_mask),
.in_decode_warp_num (decode_warp_num),
.in_memory_warp_num (memory_warp_num),
.in_wspawn (decode_wspawn),
.in_wspawn_pc (decode_wspawn_pc),
.in_ebreak (decode_ebreak),
.out_instruction (fetch_instruction),
.out_delay (fetch_delay),
.out_curr_PC (fetch_curr_PC),
.out_warp_num (fetch_warp_num),
.out_valid (fetch_valid)
.out_valid (fetch_valid),
.out_ebreak (fetch_ebreak)
);
@ -280,7 +289,10 @@ VX_decode vx_decode(
.out_clone_stall (decode_clone_stall),
.out_change_mask (decode_change_mask),
.out_thread_mask (decode_thread_mask),
.out_warp_num (decode_warp_num)
.out_warp_num (decode_warp_num),
.out_wspawn (decode_wspawn),
.out_wspawn_pc (decode_wspawn_pc),
.out_ebreak (decode_ebreak)
);

Binary file not shown.

File diff suppressed because it is too large Load diff

View file

@ -25,6 +25,7 @@ VL_MODULE(VVortex) {
VL_IN8(reset,0,0);
VL_OUT8(out_cache_driver_in_mem_read,2,0);
VL_OUT8(out_cache_driver_in_mem_write,2,0);
VL_OUT8(out_ebreak,0,0);
VL_IN(fe_instruction,31,0);
VL_OUT(curr_PC,31,0);
VL_IN(in_cache_driver_out_data[2],31,0);
@ -41,23 +42,35 @@ VL_MODULE(VVortex) {
VL_SIG8(Vortex__DOT__decode_branch_type,2,0);
VL_SIG8(Vortex__DOT__decode_jal,0,0);
VL_SIG8(Vortex__DOT__decode_clone_stall,0,0);
VL_SIG8(Vortex__DOT__decode_change_mask,0,0);
VL_SIG8(Vortex__DOT__execute_branch_stall,0,0);
VL_SIG8(Vortex__DOT__memory_branch_dir,0,0);
VL_SIG8(Vortex__DOT__forwarding_fwd_stall,0,0);
VL_SIG8(Vortex__DOT__forwarding_src1_fwd,0,0);
VL_SIG8(Vortex__DOT__forwarding_src2_fwd,0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__stall,0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_num,1,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_state,1,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__add_warp,0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__remove_warp,0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_zero_change_mask,0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_zero_stall,0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_one_change_mask,0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_one_stall,0,0);
VL_SIG8(Vortex__DOT__vx_f_d_reg__DOT__warp_num,1,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_itype,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_csr,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_clone,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_jalrs,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_jmprt,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_wspawn,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__jal_sys_jal,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__mul_alu,4,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__is_ebreak,0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__temp_final_alu,4,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__VX_Context__DOT__state_stall,5,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT__state_stall,5,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT__clone_state_stall,5,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT__wspawn_state_stall,5,0);
VL_SIG8(Vortex__DOT__vx_d_e_reg__DOT__rd,4,0);
VL_SIG8(Vortex__DOT__vx_d_e_reg__DOT__alu_op,4,0);
VL_SIG8(Vortex__DOT__vx_d_e_reg__DOT__wb,1,0);
@ -89,20 +102,24 @@ VL_MODULE(VVortex) {
VL_SIG16(Vortex__DOT__decode_csr_address,11,0);
VL_SIG16(Vortex__DOT__vx_decode__DOT__alu_tempp,11,0);
VL_SIG16(Vortex__DOT__vx_d_e_reg__DOT__csr_address,11,0);
};
struct {
VL_SIG16(Vortex__DOT__vx_e_m_reg__DOT__csr_address,11,0);
VL_SIG16(Vortex__DOT__vx_csr_handler__DOT__decode_csr_address,11,0);
VL_SIG(Vortex__DOT__decode_itype_immed,31,0);
VL_SIG(Vortex__DOT__decode_jal_offset,31,0);
VL_SIG(Vortex__DOT__memory_branch_dest,31,0);
VL_SIG(Vortex__DOT__csr_decode_csr_data,31,0);
VL_SIG(Vortex__DOT__vx_fetch__DOT__VX_Warp__DOT__real_PC,31,0);
VL_SIG(Vortex__DOT__vx_fetch__DOT__VX_Warp__DOT__temp_PC,31,0);
VL_SIG(Vortex__DOT__vx_fetch__DOT__out_PC,31,0);
VL_SIG(Vortex__DOT__vx_fetch__DOT__VX_Warp_zero__DOT__real_PC,31,0);
VL_SIG(Vortex__DOT__vx_fetch__DOT__VX_Warp_zero__DOT__temp_PC,31,0);
VL_SIG(Vortex__DOT__vx_fetch__DOT__VX_Warp_one__DOT__real_PC,31,0);
VL_SIG(Vortex__DOT__vx_fetch__DOT__VX_Warp_one__DOT__temp_PC,31,0);
VL_SIG(Vortex__DOT__vx_f_d_reg__DOT__instruction,31,0);
VL_SIG(Vortex__DOT__vx_f_d_reg__DOT__curr_PC,31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__PC_next_out,31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__itype_immed,31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__upper_immed,19,0);
};
struct {
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__csr_mask,31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__curr_PC,31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__jal_offset,31,0);
@ -142,16 +159,30 @@ VL_MODULE(VVortex) {
VL_SIG(Vortex__DOT__writeback_write_data[2],31,0);
VL_SIG(Vortex__DOT__forwarding_src1_fwd_data[2],31,0);
VL_SIG(Vortex__DOT__forwarding_src2_fwd_data[2],31,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_valid[2],0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__VX_Warp__DOT__valid[2],0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_zero_valid[2],0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__warp_one_valid[2],0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__VX_Warp_zero__DOT__valid[2],0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT__VX_Warp_one__DOT__valid[2],0,0);
VL_SIG8(Vortex__DOT__vx_f_d_reg__DOT__valid[2],0,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__w0_t0_registers[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__zero_a_reg_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__zero_b_reg_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__one_a_reg_data[2],31,0);
};
struct {
VL_SIG(Vortex__DOT__vx_decode__DOT__one_b_reg_data[2],31,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__jalrs_thread_mask[2],0,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT__jmprt_thread_mask[2],0,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT__rd1_register[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT__rd2_register[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT__clone_regsiters[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT__vx_register_file_master__DOT__registers[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__DOT__registers[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT__rd1_register[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT__rd2_register[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT__clone_regsiters[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT__vx_register_file_master__DOT__registers[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__DOT__registers[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT__rd1_register[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT__rd2_register[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT__clone_regsiters[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT__vx_register_file_master__DOT__registers[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__DOT__registers[32],31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__a_reg_data[2],31,0);
VL_SIG(Vortex__DOT__vx_d_e_reg__DOT__b_reg_data[2],31,0);
VL_SIG8(Vortex__DOT__vx_d_e_reg__DOT__valid[2],0,0);
@ -167,8 +198,6 @@ VL_MODULE(VVortex) {
VL_SIG(Vortex__DOT__vx_writeback__DOT__out_pc_data[2],31,0);
VL_SIG(Vortex__DOT__vx_forwarding__DOT__use_execute_PC_next[2],31,0);
VL_SIG(Vortex__DOT__vx_forwarding__DOT__use_memory_PC_next[2],31,0);
};
struct {
VL_SIG(Vortex__DOT__vx_forwarding__DOT__use_writeback_PC_next[2],31,0);
VL_SIG16(Vortex__DOT__vx_csr_handler__DOT__csr[4096],11,0);
};
@ -179,13 +208,16 @@ VL_MODULE(VVortex) {
struct {
// Begin mtask footprint all:
VL_SIG8(__Vtableidx1,2,0);
VL_SIG8(__Vdly__Vortex__DOT__vx_fetch__DOT__warp_num,1,0);
VL_SIG8(__Vclklast__TOP__clk,0,0);
VL_SIG8(__Vclklast__TOP__reset,0,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT____Vcellout__vx_register_file_master__out_src2_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT____Vcellout__vx_register_file_master__out_src1_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT____Vcellout__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__out_src2_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT____Vcellout__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__out_src1_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT____Vcellout__vx_register_file_master__out_src2_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT____Vcellout__vx_register_file_master__out_src1_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT____Vcellout__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__out_src2_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT____Vcellout__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__out_src1_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT____Vcellout__vx_register_file_master__out_src2_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT____Vcellout__vx_register_file_master__out_src1_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT____Vcellout__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__out_src2_data,31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT____Vcellout__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__out_src1_data,31,0);
VL_SIG(Vortex__DOT__vx_execute__DOT____Vcellout__genblk1__BRA__0__KET____DOT__vx_alu__out_alu_result,31,0);
VL_SIG(Vortex__DOT__vx_execute__DOT____Vcellout__genblk1__BRA__1__KET____DOT__vx_alu__out_alu_result,31,0);
VL_SIG8(Vortex__DOT____Vcellout__vx_fetch__out_valid[2],0,0);
@ -239,24 +271,37 @@ VL_MODULE(VVortex) {
VL_SIG(Vortex__DOT____Vcellinp__vx_writeback__in_mem_result[2],31,0);
VL_SIG(Vortex__DOT____Vcellinp__vx_writeback__in_alu_result[2],31,0);
VL_SIG(Vortex__DOT____Vcellout__vx_forwarding__out_src2_fwd_data[2],31,0);
};
struct {
VL_SIG(Vortex__DOT____Vcellout__vx_forwarding__out_src1_fwd_data[2],31,0);
VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_writeback_mem_data[2],31,0);
VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_writeback_alu_result[2],31,0);
};
struct {
VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_memory_mem_data[2],31,0);
VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_memory_alu_result[2],31,0);
VL_SIG(Vortex__DOT____Vcellinp__vx_forwarding__in_execute_alu_result[2],31,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT____Vcellout__VX_Warp__out_valid[2],0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT____Vcellinp__VX_Warp__in_thread_mask[2],0,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellout__VX_Context__out_b_reg_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellout__VX_Context__out_a_reg_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context__in_write_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context__in_src2_fwd_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context__in_src1_fwd_data[2],31,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context__in_valid[2],0,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT____Vcellout__vx_register_file_master__out_regs[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context__DOT____Vcellinp__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__in_regs[32],31,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT____Vcellout__VX_Warp_zero__out_valid[2],0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT____Vcellinp__VX_Warp_zero__in_thread_mask[2],0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT____Vcellout__VX_Warp_one__out_valid[2],0,0);
VL_SIG8(Vortex__DOT__vx_fetch__DOT____Vcellinp__VX_Warp_one__in_thread_mask[2],0,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellout__VX_Context_zero__w0_t0_registers[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellout__VX_Context_zero__out_b_reg_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellout__VX_Context_zero__out_a_reg_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_zero__in_write_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_zero__in_src2_fwd_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_zero__in_src1_fwd_data[2],31,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_zero__in_valid[2],0,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellout__VX_Context_one__out_b_reg_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellout__VX_Context_one__out_a_reg_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_one__in_wspawn_regs[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_one__in_write_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_one__in_src2_fwd_data[2],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_one__in_src1_fwd_data[2],31,0);
VL_SIG8(Vortex__DOT__vx_decode__DOT____Vcellinp__VX_Context_one__in_valid[2],0,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT____Vcellout__vx_register_file_master__out_regs[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_zero__DOT____Vcellinp__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__in_regs[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT____Vcellout__vx_register_file_master__out_regs[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT____Vcellinp__vx_register_file_master__in_wspawn_regs[32],31,0);
VL_SIG(Vortex__DOT__vx_decode__DOT__VX_Context_one__DOT____Vcellinp__gen_code_label__BRA__1__KET____DOT__vx_register_file_slave__in_regs[32],31,0);
};
static VL_ST_SIG8(__Vtable1_Vortex__DOT__vx_decode__DOT__mul_alu[8],4,0);

Binary file not shown.

Binary file not shown.

View file

@ -1 +1 @@
obj_dir/VVortex.cpp obj_dir/VVortex.h obj_dir/VVortex.mk obj_dir/VVortex__Syms.cpp obj_dir/VVortex__Syms.h obj_dir/VVortex__ver.d obj_dir/VVortex_classes.mk : /usr/local/Cellar/verilator/4.010/bin/verilator_bin /usr/local/Cellar/verilator/4.010/bin/verilator_bin VX_alu.v VX_context.v VX_csr_handler.v VX_d_e_reg.v VX_decode.v VX_define.v VX_e_m_reg.v VX_execute.v VX_f_d_reg.v VX_fetch.v VX_forwarding.v VX_m_w_reg.v VX_memory.v VX_register_file.v VX_register_file_slave.v VX_warp.v VX_writeback.v Vortex.v
obj_dir/VVortex.cpp obj_dir/VVortex.h obj_dir/VVortex.mk obj_dir/VVortex__Syms.cpp obj_dir/VVortex__Syms.h obj_dir/VVortex__ver.d obj_dir/VVortex_classes.mk : /usr/local/Cellar/verilator/4.010/bin/verilator_bin /usr/local/Cellar/verilator/4.010/bin/verilator_bin VX_alu.v VX_context.v VX_context_slave.v VX_csr_handler.v VX_d_e_reg.v VX_decode.v VX_define.v VX_e_m_reg.v VX_execute.v VX_f_d_reg.v VX_fetch.v VX_forwarding.v VX_m_w_reg.v VX_memory.v VX_register_file.v VX_register_file_master_slave.v VX_register_file_slave.v VX_warp.v VX_writeback.v Vortex.v

View file

@ -2,28 +2,30 @@
C "-Wall -cc Vortex.v --exe test_bench.cpp"
S 4608404 12889046060 1553037052 0 1548678579 0 "/usr/local/Cellar/verilator/4.010/bin/verilator_bin"
S 2785 12889457986 1554064009 0 1554064009 0 "VX_alu.v"
S 3288 12890338917 1557354788 0 1557354788 0 "VX_context.v"
S 3486 12890338917 1557473618 0 1557473618 0 "VX_context.v"
S 4928 12890355578 1557474515 0 1557474515 0 "VX_context_slave.v"
S 1495 12889457987 1554023089 0 1554023089 0 "VX_csr_handler.v"
S 5512 12889457988 1557345046 0 1557345046 0 "VX_d_e_reg.v"
S 12085 12890307904 1557354665 0 1557354665 0 "VX_decode.v"
S 14563 12890307904 1557474495 0 1557474495 0 "VX_decode.v"
S 1574 12890307906 1557343909 0 1557343909 0 "VX_define.v"
S 4267 12889457992 1557345117 0 1557345117 0 "VX_e_m_reg.v"
S 3405 12889457993 1557348460 0 1557348460 0 "VX_execute.v"
S 3692 12889457993 1557447660 0 1557447660 0 "VX_execute.v"
S 1751 12889457994 1557344924 0 1557344924 0 "VX_f_d_reg.v"
S 2362 12890309989 1557358323 0 1557358323 0 "VX_fetch.v"
S 4619 12890309989 1557474372 0 1557474372 0 "VX_fetch.v"
S 6293 12889457996 1557348346 0 1557348346 0 "VX_forwarding.v"
S 1866 12889457997 1557348551 0 1557348551 0 "VX_m_w_reg.v"
S 3847 12890309990 1557348518 0 1557348518 0 "VX_memory.v"
S 1118 12889457999 1557354753 0 1557354753 0 "VX_register_file.v"
S 1428 12889458000 1557354772 0 1557354772 0 "VX_register_file_slave.v"
S 1499 12890308905 1557267602 0 1557267602 0 "VX_warp.v"
S 4352 12890309990 1557474440 0 1557474440 0 "VX_memory.v"
S 1249 12889457999 1557474005 0 1557474005 0 "VX_register_file.v"
S 1655 12890356143 1557474338 0 1557474338 0 "VX_register_file_master_slave.v"
S 1599 12889458000 1557474345 0 1557474345 0 "VX_register_file_slave.v"
S 1686 12890308905 1557474462 0 1557474462 0 "VX_warp.v"
S 1568 12890307909 1557348531 0 1557348531 0 "VX_writeback.v"
S 18244 12890307910 1557357447 0 1557357447 0 "Vortex.v"
T 277561 12890339974 1557358338 0 1557358338 0 "obj_dir/VVortex.cpp"
T 16771 12890339973 1557358338 0 1557358338 0 "obj_dir/VVortex.h"
T 1800 12890339976 1557358338 0 1557358338 0 "obj_dir/VVortex.mk"
T 530 12890339972 1557358338 0 1557358338 0 "obj_dir/VVortex__Syms.cpp"
T 711 12890339971 1557358338 0 1557358338 0 "obj_dir/VVortex__Syms.h"
T 512 12890339977 1557358338 0 1557358338 0 "obj_dir/VVortex__ver.d"
T 0 0 1557358338 0 1557358338 0 "obj_dir/VVortex__verFiles.dat"
T 1159 12890339975 1557358338 0 1557358338 0 "obj_dir/VVortex_classes.mk"
S 18714 12890307910 1557368874 0 1557368874 0 "Vortex.v"
T 451065 12890356589 1557474518 0 1557474518 0 "obj_dir/VVortex.cpp"
T 20559 12890356588 1557474518 0 1557474518 0 "obj_dir/VVortex.h"
T 1800 12890356591 1557474518 0 1557474518 0 "obj_dir/VVortex.mk"
T 530 12890356587 1557474518 0 1557474518 0 "obj_dir/VVortex__Syms.cpp"
T 711 12890356586 1557474518 0 1557474518 0 "obj_dir/VVortex__Syms.h"
T 563 12890356592 1557474518 0 1557474518 0 "obj_dir/VVortex__ver.d"
T 0 0 1557474518 0 1557474518 0 "obj_dir/VVortex__verFiles.dat"
T 1159 12890356590 1557474518 0 1557474518 0 "obj_dir/VVortex_classes.mk"

Binary file not shown.

View file

@ -1,7 +1,7 @@
# Dynamic Instructions: 122612
# of total cycles: 122624
# Dynamic Instructions: 222955
# of total cycles: 222962
# of forwarding stalls: 0
# of branch stalls: 0
# CPI: 1.0001
# CPI: 1.00003
# time to simulate: 6.95312e-310 milliseconds
# GRADE: Failed on test: 0
# GRADE: Failed on test: 4294967295

View file

@ -326,11 +326,12 @@ bool Vortex::simulate(std::string file_to_simulate)
bool istop;
bool dstop;
bool cont = false;
// for (int i = 0; i < 500; i++)
// unsigned cycles;
while (this->stop && (!(stop && (counter > 5))))
counter = 0;
while (this->stop && ((counter < 5)))
{
// std::cout << "************* Cycle: " << cycle << "\n";
@ -347,10 +348,12 @@ bool Vortex::simulate(std::string file_to_simulate)
vortex->eval();
stop = istop && dstop;
// stop = istop && dstop;
stop = vortex->out_ebreak;
if (stop)
if (stop || cont)
{
cont = true;
counter++;
} else
{