mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-20 03:47:31 -04:00
Compare commits
1580 commits
Author | SHA1 | Date | |
---|---|---|---|
|
332e8eeaf9 | ||
|
5dbfcecc21 | ||
|
f19335023f | ||
|
6a7e402ab4 | ||
|
18687d53b3 | ||
|
a35fb4bf1d | ||
|
9929c42417 | ||
|
06e5e2e859 | ||
|
09e89791e5 | ||
|
b35f69f486 | ||
|
63b41f21c6 | ||
|
cc7fdf2fbd | ||
|
a9352a3b64 | ||
|
9a2709db08 | ||
|
4785736e4d | ||
|
38861d9aaf | ||
|
82b0eeded6 | ||
|
22398c991d | ||
|
e80ee2c819 | ||
|
9dc1d3f688 | ||
|
0c1bc17c09 | ||
|
4e83c28d04 | ||
|
2c940cf509 | ||
|
fb4527fe95 | ||
|
d1f37fc629 | ||
|
001a107395 | ||
|
fce24b9535 | ||
|
6d27575db3 | ||
|
a2cfeffcfe | ||
|
cb491ddb53 | ||
|
43b143bba6 | ||
|
87297e0eca | ||
|
929ef1b6e2 | ||
|
83ba1cc3dc | ||
|
347889c504 | ||
|
083cf04afd | ||
|
84b1c8a43c | ||
|
43d33b942e | ||
|
adf60e7e35 | ||
|
8fda922570 | ||
|
53900bee4f | ||
|
704f525fd6 | ||
|
f478bdcf25 | ||
|
01974e124f | ||
|
100e4e3970 | ||
|
4819891a5e | ||
|
066ab105eb | ||
|
a98d2e24e5 | ||
|
572a397018 | ||
|
cad129c64c | ||
|
bae24e589c | ||
|
461f2cbbc9 | ||
|
7975a5a38c | ||
|
f635d71ba4 | ||
|
70ade222b1 | ||
|
aa6a47eb11 | ||
|
115ff2b599 | ||
|
896c59306c | ||
|
6bbcd4ebaf | ||
|
6b23d290c3 | ||
|
5d91fe58ad | ||
|
5891a1e592 | ||
|
18ae57cc7f | ||
|
a760d909cb | ||
|
86f20b27dd | ||
|
3ace9bbeda | ||
|
30b0daf050 | ||
|
24ca4f03aa | ||
|
3b454efd56 | ||
|
951746badc | ||
|
6c2cbdfec2 | ||
|
973fcd7845 | ||
|
5eecd0e987 | ||
|
073e0ddd10 | ||
|
c05a0571c8 | ||
|
1e4583ac17 | ||
|
3e4bbfc9f0 | ||
|
7c4ce74801 | ||
|
18bf49d1e0 | ||
|
180735c531 | ||
|
8d8769c710 | ||
|
b0c48e7a46 | ||
|
320c090613 | ||
|
b48b605b51 | ||
|
8230b37411 | ||
|
5844de8c4d | ||
|
dfc7b6178c | ||
|
bffc6d9610 | ||
|
6dbbc62b04 | ||
|
667fa1662d | ||
|
e73e1c2bb3 | ||
|
27f3d6dde6 | ||
|
d475e9d201 | ||
|
ce510d78c7 | ||
|
eecff10dea | ||
|
98b58606e5 | ||
|
8b172d07ec | ||
|
f68cc95cbe | ||
|
659ad87f93 | ||
|
22ade31fd5 | ||
|
2d3f4b6efc | ||
|
cc5ac8388b | ||
|
ec12b50007 | ||
|
e7d09feb4a | ||
|
7ab58111d8 | ||
|
1c384c096d | ||
|
24d018b4c9 | ||
|
1fa4603fa2 | ||
|
3a3bb7b70a | ||
|
ff50306833 | ||
|
519023fb2b | ||
|
8fdca0e52a | ||
|
f184b57c24 | ||
|
d584e7bac1 | ||
|
2b3d1f0860 | ||
|
fccbadfe25 | ||
|
1e4f0fa0bd | ||
|
22c3828bf5 | ||
|
acc1e3dfd8 | ||
|
0f380a3d78 | ||
|
9373e21950 | ||
|
2bd22253eb | ||
|
4206ffdb80 | ||
|
b6bd6467ef | ||
|
8f29ad58ae | ||
|
6b1091e08f | ||
|
91fee5da11 | ||
|
077b682d7d | ||
|
5971158f43 | ||
|
a7ba377581 | ||
|
f695e4d754 | ||
|
e06333b3c0 | ||
|
645befdce6 | ||
|
e62b638d88 | ||
|
1d5e4f63dd | ||
|
68b78fc42f | ||
|
db98965f56 | ||
|
03a1e25828 | ||
|
5d7e53f7d7 | ||
|
37757fab8f | ||
|
0d04423074 | ||
|
fe5442dbb3 | ||
|
2a2fc2ae39 | ||
|
26df675e24 | ||
|
f63233334e | ||
|
9e5638c9b0 | ||
|
1d626588ef | ||
|
37f4d05393 | ||
|
9f32e5693c | ||
|
684f2e2d3d | ||
|
28bf27e951 | ||
|
8155173aab | ||
|
d3df61abb0 | ||
|
a5381fd788 | ||
|
f49084b298 | ||
|
ee96d4334b | ||
|
c91f9684fc | ||
|
07ce16e75c | ||
|
2eeb2ac532 | ||
|
91c135ac15 | ||
|
faa3b9a469 | ||
|
847562be9e | ||
|
0bf79a0f05 | ||
|
119805a959 | ||
|
bc765d10bd | ||
|
208c5b3804 | ||
|
32b0376b28 | ||
|
6a447350b7 | ||
|
dd16d70515 | ||
|
5cf6797bd3 | ||
|
bbc02cc013 | ||
|
b7531c9de1 | ||
|
6c725978b4 | ||
|
d1175a03c9 | ||
|
83badaac86 | ||
|
4b8ca42e85 | ||
|
ad7377c8ba | ||
|
5cb033ae13 | ||
|
44ebc12ed4 | ||
|
a3aca502b7 | ||
|
4a606061d2 | ||
|
ee69024841 | ||
|
6f81df5edb | ||
|
1deb13c469 | ||
|
2d00cec9d3 | ||
|
a3031922ce | ||
|
60860ec684 | ||
|
cf3909a910 | ||
|
5c694a997c | ||
|
30571d716c | ||
|
b8475c65dc | ||
|
4329e3f968 | ||
|
b634f9f47d | ||
|
87e613d29d | ||
|
eee037ffcd | ||
|
9027555e6a | ||
|
989341a77d | ||
|
ec8cc4c84d | ||
|
6e40162027 | ||
|
f2c970868e | ||
|
533ddffc47 | ||
|
e9f19a0bf9 | ||
|
5db1937a5e | ||
|
9a3eb74051 | ||
|
27543e240e | ||
|
4f11278d2c | ||
|
0e3206747a | ||
|
ce4f90e843 | ||
|
a9a5ded030 | ||
|
b5f541b891 | ||
|
2cf483ddf5 | ||
|
9a6dbdf1a9 | ||
|
818522f7e4 | ||
|
030071571d | ||
|
e5e9a5c2e9 | ||
|
406583c0bd | ||
|
29ea3041c4 | ||
|
828b8827e7 | ||
|
a80be895ba | ||
|
923d2bb94c | ||
|
e38c2c1fba | ||
|
8bb5e5ab8a | ||
|
b146fab290 | ||
|
15ead4acf6 | ||
|
f5eca75311 | ||
|
5e123d0507 | ||
|
54f0c8e270 | ||
|
b8199decf4 | ||
|
00feb8b424 | ||
|
7938c7be5f | ||
|
3bac7eae6a | ||
|
a61f97f6c6 | ||
|
5ab13559e0 | ||
|
4383631543 | ||
|
9cc0010835 | ||
|
9902856221 | ||
|
e5f2442353 | ||
|
ff9d52c162 | ||
|
acc8221a7e | ||
|
63cce35c1a | ||
|
d2db612bb4 | ||
|
e8ce3878bb | ||
|
380c36d930 | ||
|
4fff940e42 | ||
|
2d7f9eae0a | ||
|
a37309c6b0 | ||
|
48f86a48f6 | ||
|
f0bff2a4a2 | ||
|
8e3bd5696b | ||
|
992f8d97d3 | ||
|
8908f3e006 | ||
|
f2c1ad7831 | ||
|
8135f72cc9 | ||
|
50458bbae0 | ||
|
5a2d4e6c26 | ||
|
0a48d98bc1 | ||
|
dc76101068 | ||
|
bbe9c0372f | ||
|
263893eb7c | ||
|
b77fff764e | ||
|
145eacc451 | ||
|
1ddd1ba1cc | ||
|
49ed88e59f | ||
|
7208f251b7 | ||
|
6cf0d9f7b4 | ||
|
ccf0135d97 | ||
|
daec55ae95 | ||
|
e91eb4aed4 | ||
|
5c72685356 | ||
|
f00f96377b | ||
|
230b29de6f | ||
|
bb9ae8576d | ||
|
ae24264a2a | ||
|
83d65e2cf1 | ||
|
63840a20da | ||
|
b56aa00f4f | ||
|
202af1e783 | ||
|
207840a97e | ||
|
b1dc2fba42 | ||
|
cc105eaea9 | ||
|
fa11d4c502 | ||
|
6626f9201c | ||
|
7823f5529c | ||
|
7bef62aef8 | ||
|
1a35d3fed1 | ||
|
0cbdc3be9e | ||
|
aa1489d8eb | ||
|
a75ed78bf2 | ||
|
2041a4ad4a | ||
|
bfbe642170 | ||
|
fdc62c5f98 | ||
|
e178eb1330 | ||
|
7cbb026a12 | ||
|
efc8834c75 | ||
|
8db77ea1cd | ||
|
cf9172b8fc | ||
|
fb0cd1c272 | ||
|
0aaca84016 | ||
|
8d1baf677d | ||
|
37555b1208 | ||
|
96fb3566a9 | ||
|
7ca9a5e87e | ||
|
039e5e2ffc | ||
|
32738e0b74 | ||
|
fd5903fef1 | ||
|
335b53475a | ||
|
f9230bdac3 | ||
|
19d6142023 | ||
|
c28449f515 | ||
|
45ed8abf22 | ||
|
d16aee3ecd | ||
|
c4df7221c6 | ||
|
33bec667c2 | ||
|
a17580375b | ||
|
40e04a409e | ||
|
d7eae0c886 | ||
|
32636fac70 | ||
|
8215089194 | ||
|
d979cf277f | ||
|
72c63a47f3 | ||
|
431c0cfc46 | ||
|
83ea236b84 | ||
|
01fedb066c | ||
|
7d0c141129 | ||
|
6eee0728fb | ||
|
fc5bb387a2 | ||
|
961b9c3d63 | ||
|
5f2bf2418b | ||
|
847dee3473 | ||
|
105f884129 | ||
|
fa1fd39645 | ||
|
a38960674e | ||
|
0f41774fea | ||
|
41e41c9688 | ||
|
74a47ebbe4 | ||
|
6c1e785004 | ||
|
4cc7426c44 | ||
|
cf42025c20 | ||
|
f4426e0127 | ||
|
91b8c6e67a | ||
|
c162d04b8f | ||
|
4480ed8b0e | ||
|
5adfd5ec68 | ||
|
6d5e71a062 | ||
|
9718a5b405 | ||
|
51719f69bb | ||
|
2ca3439109 | ||
|
088aed022f | ||
|
df3fc150f4 | ||
|
b40441b68f | ||
|
bdcc5f5991 | ||
|
b6879b25e3 | ||
|
592297582e | ||
|
e538dfa316 | ||
|
e05fe0d75b | ||
|
383dc1f6b8 | ||
|
3b336d7fb3 | ||
|
4570a20eee | ||
|
10a8705161 | ||
|
1f5cc53434 | ||
|
0ed589a3bf | ||
|
cd97945d0d | ||
|
31a5ab714e | ||
|
370daf1025 | ||
|
bcf7d9f960 | ||
|
ade6b2c985 | ||
|
4f9b15d96d | ||
|
f57841608e | ||
|
35c15f554d | ||
|
ea9560b33b | ||
|
86b0bdd93c | ||
|
66fd2d4e2d | ||
|
6eeb8eac0f | ||
|
df99b9da0e | ||
|
7ae7ffa007 | ||
|
e4bfa47895 | ||
|
ca3499f3df | ||
|
811ceb5dc0 | ||
|
177f0efc59 | ||
|
9797c6c48a | ||
|
771a10ea0c | ||
|
005d480bb4 | ||
|
5e241c153c | ||
|
693a9f648d | ||
|
1814ff6d40 | ||
|
2762bd53ff | ||
|
8e9026524a | ||
|
3612ceda80 | ||
|
a2b24b4ed0 | ||
|
de47307428 | ||
|
06ef53025d | ||
|
6c607d32fe | ||
|
f6daf9bb84 | ||
|
adcad92a73 | ||
|
51862dbc06 | ||
|
9d3d35c6b4 | ||
|
b6663eaff9 | ||
|
a03471837c | ||
|
9638f5a6e6 | ||
|
62a4ee7a3e | ||
|
1f43d4a2fc | ||
|
8fe02093e2 | ||
|
20b82fd34d | ||
|
4b6f8efeaa | ||
|
9fc9b43307 | ||
|
304761c6fc | ||
|
e34e4b790a | ||
|
7a61b67170 | ||
|
f6ed49f19c | ||
|
d5fa26350c | ||
|
a523afbebe | ||
|
b83190c6e1 | ||
|
f4983cb380 | ||
|
65bd9afabb | ||
|
54045fa05b | ||
|
bc936c67a3 | ||
|
4a213e7c20 | ||
|
26df47d6e2 | ||
|
d7e8fd74ff | ||
|
49738672ec | ||
|
48ff4ee4e0 | ||
|
aaff18cca2 | ||
|
2b22d47dd9 | ||
|
98db249500 | ||
|
9c346dee86 | ||
|
58e5435f0f | ||
|
cfb5cd5326 | ||
|
aef1411af5 | ||
|
e23d569076 | ||
|
d6f1393627 | ||
|
ea34239b43 | ||
|
7528dd9c0f | ||
|
19b5496f00 | ||
|
ee39da74b4 | ||
|
1a9a04ac76 | ||
|
76f4cd66d3 | ||
|
3ae3afc59b | ||
|
5126a7c472 | ||
|
6c1ee9bfea | ||
|
14ae4b8c13 | ||
|
2edda834c3 | ||
|
47427ab22e | ||
|
d74ee43a66 | ||
|
79362dea4b | ||
|
9053919e92 | ||
|
ed66ee2806 | ||
|
bab9496117 | ||
|
de81baaabf | ||
|
6f3add273d | ||
|
1fb0691bc7 | ||
|
c94c3651ec | ||
|
8fb73b6da7 | ||
|
32a882e26f | ||
|
eaa7ed7fe2 | ||
|
c8d0357ac6 | ||
|
229641441f | ||
|
42afa2472f | ||
|
455fc8389c | ||
|
ab21f76aed | ||
|
f1e79f4c0f | ||
|
932c435a20 | ||
|
aad3b26332 | ||
|
30ebb65fc3 | ||
|
0d7012e69e | ||
|
bddf276335 | ||
|
e86eeab8ea | ||
|
7cdfac8ea1 | ||
|
32497e19df | ||
|
df8355ac76 | ||
|
c265ff97b8 | ||
|
d276875ab9 | ||
|
50b12ef754 | ||
|
0096e60f03 | ||
|
9dcb377b67 | ||
|
fbedc567e5 | ||
|
b81ae8e431 | ||
|
42c62001ec | ||
|
e663db9b5a | ||
|
9ddb06bf56 | ||
|
59108525e1 | ||
|
74579fd4dc | ||
|
668b590876 | ||
|
cb1e49d3f6 | ||
|
4b6a48c716 | ||
|
07981a585c | ||
|
4b93c9ffb5 | ||
|
35fb50f9a6 | ||
|
fce935f1c4 | ||
|
fc0392e5e3 | ||
|
d09bce011b | ||
|
4c1b3fd88d | ||
|
52c5f1ff6b | ||
|
3d51b8bcfd | ||
|
76f74b8a59 | ||
|
e8cdae1225 | ||
|
067b7a8726 | ||
|
3b81a32b12 | ||
|
410c47e2ae | ||
|
f723e7baf5 | ||
|
9c5aee5e25 | ||
|
c1b8ecfd1a | ||
|
16c209ac0c | ||
|
09028d8cee | ||
|
c00368b7c6 | ||
|
3075c1737b | ||
|
29c5a28273 | ||
|
e53b295eea | ||
|
0a3035e6a7 | ||
|
81251b1af8 | ||
|
ef5d58dc9e | ||
|
4dc34cfd2d | ||
|
3fe8f963aa | ||
|
fc50b66819 | ||
|
609155e490 | ||
|
516ce43a5c | ||
|
2e77c9eec2 | ||
|
95ca49a85f | ||
|
e1c5b5277e | ||
|
029609b3fd | ||
|
2bc8a881b6 | ||
|
9cc3e0a459 | ||
|
abf8d2c51a | ||
|
6e55840a32 | ||
|
047960ac4d | ||
|
54e6421854 | ||
|
f46b764748 | ||
|
99cbae1820 | ||
|
edf960d9ed | ||
|
22b0525c51 | ||
|
34f7e3c982 | ||
|
5600a8dd42 | ||
|
bce5226614 | ||
|
2a9895c337 | ||
|
eab1791d46 | ||
|
724cb40849 | ||
|
a62e651b02 | ||
|
3223a40a76 | ||
|
8457163114 | ||
|
a91dabcc72 | ||
|
0709d656ca | ||
|
30258c04d2 | ||
|
9db3870309 | ||
|
34ef500910 | ||
|
735b713613 | ||
|
8d978f23ce | ||
|
78fc053ad5 | ||
|
6add1e16f6 | ||
|
49255bfa69 | ||
|
31133ae6e9 | ||
|
7916684c36 | ||
|
aa45f55126 | ||
|
5877cfe8ae | ||
|
52233fe13a | ||
|
6d480b3da1 | ||
|
2e61dad11f | ||
|
c99e4b37b6 | ||
|
3a5278a62e | ||
|
e21bf9afbd | ||
|
9942f251e0 | ||
|
da9c51aa3f | ||
|
7b80da2538 | ||
|
53c547f9de | ||
|
43a90071e1 | ||
|
2662b6bcab | ||
|
da1f4baa5d | ||
|
768c966681 | ||
|
ae312f9022 | ||
|
e20a610e67 | ||
|
e7660b6ffe | ||
|
de66a1b861 | ||
|
e34f824bf9 | ||
|
75f1f957d4 | ||
|
96831c8b89 | ||
|
03e21924f4 | ||
|
2e060faaf4 | ||
|
48b1ab7494 | ||
|
382b686d59 | ||
|
160c428ef5 | ||
|
7f99007568 | ||
|
bad280ae80 | ||
|
7cc6df7e7c | ||
|
fcf9c13d5f | ||
|
ca5232b58d | ||
|
4cd48193da | ||
|
4c09d107c3 | ||
|
69126dfd35 | ||
|
904a6dc136 | ||
|
81b17169ab | ||
|
f5014e8975 | ||
|
a5bde3693f | ||
|
c8455eb562 | ||
|
bdbe22ff4d | ||
|
2b481024bb | ||
|
fe8ab30345 | ||
|
ed0171bcc7 | ||
|
20ce870b1b | ||
|
b9328754bc | ||
|
b7594edf74 | ||
|
5b9d01a421 | ||
|
01187795d0 | ||
|
3de14dd8bf | ||
|
2773b87ae5 | ||
|
5457cab5d1 | ||
|
f4376e2c4a | ||
|
e42c7c6a82 | ||
|
31663aa7ca | ||
|
b859d0c088 | ||
|
3a5bdd4cf0 | ||
|
ab761946e5 | ||
|
aec73aa758 | ||
|
8df962d6b4 | ||
|
c5c4ccdd95 | ||
|
b3a4d58825 | ||
|
99f114aba3 | ||
|
7808a0a3e3 | ||
|
eb92e0bdbe | ||
|
60f7786e17 | ||
|
d39f3f688a | ||
|
95f59d23a8 | ||
|
b489cc7abd | ||
|
cd94288e05 | ||
|
2f723c4afd | ||
|
e7b2bb81b4 | ||
|
d85661420b | ||
|
fbe236037e | ||
|
9c43bae333 | ||
|
530ec638a7 | ||
|
24e8e91a94 | ||
|
527910b31e | ||
|
178f4d67fa | ||
|
a5377d78ca | ||
|
31e31ac528 | ||
|
0bc459d84e | ||
|
7057103deb | ||
|
cff6e320b2 | ||
|
01b81830bf | ||
|
1981179995 | ||
|
e778015ab9 | ||
|
cf3a77ab1b | ||
|
92390187ff | ||
|
920da584af | ||
|
71c2bab682 | ||
|
75782f49da | ||
|
ef38dc9744 | ||
|
fb141ae522 | ||
|
ca474c39b9 | ||
|
e7a3b52d2a | ||
|
0cb75c67c0 | ||
|
80b79d4d31 | ||
|
be935420f0 | ||
|
198c3b92ff | ||
|
f637219fc7 | ||
|
5cb8ba5e92 | ||
|
8feb8b191c | ||
|
9f8e02824f | ||
|
e4060fee3a | ||
|
c706d410bf | ||
|
a9f122bd9c | ||
|
f5956b2eb7 | ||
|
90b4a16c9b | ||
|
c3e657f201 | ||
|
0f8e5505d3 | ||
|
a23fb26a8b | ||
|
a4ee8dfa7f | ||
|
35f561b0e9 | ||
|
0e15f69838 | ||
|
3eaa6e4e55 | ||
|
aac57a5f81 | ||
|
016f8e830c | ||
|
2fa99b7d17 | ||
|
e33b62f6fc | ||
|
5c73243a4e | ||
|
d07266b1f1 | ||
|
aa954a6aa8 | ||
|
3cba4f4a5f | ||
|
4b73762aea | ||
|
65036e2d34 | ||
|
7507e36149 | ||
|
08bd918066 | ||
|
9944331f29 | ||
|
9a018014ff | ||
|
068ec6c5ef | ||
|
1b9f0a998b | ||
|
b297c29a10 | ||
|
6a03882bd2 | ||
|
578c3d33d2 | ||
|
0dbcddcb54 | ||
|
f0ebe94253 | ||
|
ad36bdbd44 | ||
|
84da2ff8ff | ||
|
a2307a28dc | ||
|
59ed24dc0b | ||
|
649e15c2b3 | ||
|
42f3d55e15 | ||
|
7b94c983c9 | ||
|
79ad8bb51f | ||
|
47c33cca66 | ||
|
34023ab814 | ||
|
91a1f41f99 | ||
|
2460b9b95b | ||
|
69f7213afc | ||
|
b8757c539d | ||
|
a854e9d25b | ||
|
156707bbec | ||
|
d6770f7adc | ||
|
13d5a9c969 | ||
|
0acf7761ef | ||
|
ed0c1a778f | ||
|
2651632884 | ||
|
7b9d2bdff2 | ||
|
3ab353ab61 | ||
|
3efced37c5 | ||
|
1322499c3f | ||
|
f0a4d2e142 | ||
|
314ad3ff8a | ||
|
31837dd7c3 | ||
|
f0ea1acaa2 | ||
|
c13e02b19f | ||
|
ccbb2243cc | ||
|
3caeeeea13 | ||
|
b99cd97622 | ||
|
d531fa6b26 | ||
|
58c3c63dae | ||
|
56e9e19508 | ||
|
ed11defd71 | ||
|
934416d3ec | ||
|
4ab015ddd9 | ||
|
5e63b8f35a | ||
|
5b0fc8cbd4 | ||
|
3d98121ab6 | ||
|
02091f3d44 | ||
|
a378aed67c | ||
|
f97ffac7e7 | ||
|
b82a755e44 | ||
|
61df2ca428 | ||
|
deee7cd8b0 | ||
|
2271d2b286 | ||
|
862997fc94 | ||
|
62673b4b72 | ||
|
01c7b5e384 | ||
|
cfcece940e | ||
|
d8a6ac748a | ||
|
2f2974ee72 | ||
|
6f0af066e8 | ||
|
2b426693f5 | ||
|
cf3f2d4f6f | ||
|
54af5eb186 | ||
|
efe12ca6bf | ||
|
0e3badf723 | ||
|
99c6a1af5a | ||
|
abdea91120 | ||
|
4adabc3f68 | ||
|
4a11c1ec0f | ||
|
c5e57ce5d5 | ||
|
896eb09ec3 | ||
|
cf3413c824 | ||
|
3f52964a94 | ||
|
8b53f01d1c | ||
|
e603c302b2 | ||
|
3611ad5b4c | ||
|
5bcf24ed55 | ||
|
78b6e0638c | ||
|
0e2e09b2ea | ||
|
3393522b54 | ||
|
4ced61790a | ||
|
04b967217d | ||
|
7887e6b432 | ||
|
267521a1cb | ||
|
023dc477a6 | ||
|
250a5741f7 | ||
|
8b63305201 | ||
|
e38187acb5 | ||
|
c0044bc303 | ||
|
99eaaf6189 | ||
|
fa5590bbf7 | ||
|
96cb381885 | ||
|
8c5a783477 | ||
|
e79f905d87 | ||
|
01cca01511 | ||
|
f992f5bae6 | ||
|
b21ea24815 | ||
|
9e20e6edb6 | ||
|
6c56edf65d | ||
|
6b800f2054 | ||
|
a7fac99fd9 | ||
|
0235da5798 | ||
|
bc280e2703 | ||
|
2b9628c73b | ||
|
681646d27a | ||
|
5b56f76289 | ||
|
a38d47c0df | ||
|
68d2ac6f5e | ||
|
364136d66f | ||
|
f8ef570778 | ||
|
0426856ab4 | ||
|
9dbff0e77c | ||
|
f0253a5f80 | ||
|
47d578c4d2 | ||
|
319c18158a | ||
|
c1000f6a3b | ||
|
405d6b468f | ||
|
32f39264ef | ||
|
d413786b9e | ||
|
f35e4266ed | ||
|
bd33f1edc3 | ||
|
a72c68acf4 | ||
|
f6663d6618 | ||
|
b1ae82bae5 | ||
|
e4fdc740ba | ||
|
f3c0b7d186 | ||
|
dc5cbfe932 | ||
|
e1c8ff02be | ||
|
210e4a8e8f | ||
|
9b79d60507 | ||
|
d99aaf3933 | ||
|
c7c1dddeac | ||
|
94cadb69d2 | ||
|
b3f96e288a | ||
|
8d97d2c998 | ||
|
0aaf010a62 | ||
|
ab56cc6d4a | ||
|
15ca8290d0 | ||
|
0fbe22dafa | ||
|
ae11df3e6a | ||
|
19beb0728e | ||
|
60107cf2b6 | ||
|
98f080340a | ||
|
dc27d3c014 | ||
|
bb53658ce7 | ||
|
112e8235ee | ||
|
4e7bc9654b | ||
|
d1ba02681e | ||
|
3cceed1e0b | ||
|
c1fa2bbc38 | ||
|
82a417f1f0 | ||
|
df95c7c4c6 | ||
|
599edfbbeb | ||
|
aaba10a133 | ||
|
4bb31e63e2 | ||
|
ca11ccee1e | ||
|
a1ddddf929 | ||
|
ad5d82d430 | ||
|
b9da621c09 | ||
|
da65e964ed | ||
|
4a2b984710 | ||
|
13e9e6d9f9 | ||
|
1864e46c14 | ||
|
b9c0082cb8 | ||
|
717b2e9ba1 | ||
|
b6aa44f39f | ||
|
0003926d01 | ||
|
98ead77405 | ||
|
82908a3026 | ||
|
9e7074b871 | ||
|
a569d54104 | ||
|
04e72a2341 | ||
|
ce26a0a3cc | ||
|
bae6cd2d86 | ||
|
a94d868cd5 | ||
|
faacace2a4 | ||
|
6519d356ed | ||
|
da272064b5 | ||
|
009861433a | ||
|
79f5824c74 | ||
|
0cabd24f08 | ||
|
311799b423 | ||
|
189990e351 | ||
|
badfb24e01 | ||
|
c8bae13448 | ||
|
f4202868bc | ||
|
42202e2940 | ||
|
b68d32b83c | ||
|
7d82212fb1 | ||
|
ef5aa6d610 | ||
|
68a3664a04 | ||
|
1b2d9ed538 | ||
|
b5ca7a999c | ||
|
896aca0c62 | ||
|
4737cdabbd | ||
|
06896f272c | ||
|
27a9e30857 | ||
|
e84f978502 | ||
|
5ea10fd872 | ||
|
aea1d2c8eb | ||
|
19484a531a | ||
|
284d438acd | ||
|
ca79e69355 | ||
|
9df25ff48f | ||
|
604c41fc54 | ||
|
a167c07e7d | ||
|
db0f0fd353 | ||
|
c554f53e44 | ||
|
daf1360d83 | ||
|
0cd2ea458f | ||
|
0098d197c9 | ||
|
efc7a971dc | ||
|
dbe052594d | ||
|
95b23fa97b | ||
|
f369006956 | ||
|
0c746d93bb | ||
|
e3d06e0d9c | ||
|
57a5aead4c | ||
|
8fa28bfca1 | ||
|
69fdb4bd04 | ||
|
ac669a30ca | ||
|
8a933520f0 | ||
|
dfed5b29c0 | ||
|
1f8b3dcc5c | ||
|
75255269a1 | ||
|
15dc9afe93 | ||
|
2488e4736c | ||
|
9380fd3d72 | ||
|
83a7deb5da | ||
|
25d0c76d14 | ||
|
b32ea5b750 | ||
|
9feb46387e | ||
|
1963ae04b2 | ||
|
7f61f0b015 | ||
|
bb47a14388 | ||
|
3de8075636 | ||
|
de66d2ec3e | ||
|
5231bb8576 | ||
|
e58c12fc02 | ||
|
b54e85113d | ||
|
cef05d3110 | ||
|
bdcf2a0af0 | ||
|
dd461468d3 | ||
|
7784dfe9b7 | ||
|
135cc4f5a7 | ||
|
db35f5d768 | ||
|
0e4501aecd | ||
|
135445ce9c | ||
|
d96e2fa56b | ||
|
e9b66d5a1c | ||
|
6400e73c42 | ||
|
ca1bbdf415 | ||
|
b02746fb0d | ||
|
8d252bb6f8 | ||
|
04314cefed | ||
|
70717fb42b | ||
|
351aa48f6e | ||
|
3534175d43 | ||
|
b4f5616814 | ||
|
7df4f1ba03 | ||
|
c39b8e1112 | ||
|
d743e2ba22 | ||
|
87bff732ac | ||
|
486cd0b866 | ||
|
da8608d702 | ||
|
d0c441519d | ||
|
2a38ef0db8 | ||
|
914044b57b | ||
|
d1ff95eb9f | ||
|
beebc2adf0 | ||
|
e4e0ee8fef | ||
|
299657d693 | ||
|
03cf694238 | ||
|
6b81b26ffc | ||
|
99c91987fb | ||
|
697e2fffbb | ||
|
70f2f58ac9 | ||
|
8ab4c53e27 | ||
|
c8dd0aafb0 | ||
|
ae12b45f77 | ||
|
896aa6b2a1 | ||
|
74368ab65a | ||
|
afc0b2056d | ||
|
c6e09d40ff | ||
|
402c911991 | ||
|
86055335ee | ||
|
d27656819b | ||
|
7324900c57 | ||
|
459abdef21 | ||
|
830c43517b | ||
|
19ba4c7bd4 | ||
|
178e1a6b2a | ||
|
d0a53ff53e | ||
|
231eb4e78b | ||
|
04de44d280 | ||
|
3cd7f41012 | ||
|
35a782a7ba | ||
|
2776f2cdf0 | ||
|
6d0e345073 | ||
|
91135bb855 | ||
|
fbc49b1455 | ||
|
354d3663e1 | ||
|
45e791437c | ||
|
17cdc32eee | ||
|
c175e11a18 | ||
|
df38cc00f5 | ||
|
d65f6e064a | ||
|
6556e8c66d | ||
|
dc19d25bcc | ||
|
787f02e4c6 | ||
|
d9426d5789 | ||
|
100eb49201 | ||
|
f1522e68f8 | ||
|
07c063031f | ||
|
a8f2bb30da | ||
|
454b9e7444 | ||
|
4766787478 | ||
|
a8e892593e | ||
|
840ced22a9 | ||
|
ff6f33acff | ||
|
3ec37c6c40 | ||
|
c1e639bd44 | ||
|
489738751f | ||
|
33406d2e83 | ||
|
27db94b20d | ||
|
72f5976dd6 | ||
|
44e685f8af | ||
|
4d7b2b9ea5 | ||
|
de0f4dda44 | ||
|
b0f3e91006 | ||
|
288147ac4f | ||
|
274e6a4c52 | ||
|
de8453d0be | ||
|
589e351832 | ||
|
34324bb768 | ||
|
fe3c712d66 | ||
|
badb0c8300 | ||
|
c344e28476 | ||
|
9c900394fa | ||
|
cce517b02b | ||
|
26d45ed9db | ||
|
1b9c39283e | ||
|
dd40e9c754 | ||
|
59497e52df | ||
|
51ae0b71f3 | ||
|
041f573815 | ||
|
fc0f5e2ca4 | ||
|
76a828cf50 | ||
|
bb3a49f95b | ||
|
413e933b8a | ||
|
bb4c150aaf | ||
|
a78ac7a246 | ||
|
f13a885815 | ||
|
422bcdee0f | ||
|
08c06be601 | ||
|
de90863333 | ||
|
f83094e0d9 | ||
|
21b54761e3 | ||
|
a01f3a0370 | ||
|
1e5550ccda | ||
|
9e54ccde6d | ||
|
7f778000ea | ||
|
b8ccff7ade | ||
|
5f2b10b8a6 | ||
|
3fee1a6193 | ||
|
ae7b01405c | ||
|
0823c71b2e | ||
|
be0db6e1a5 | ||
|
50028c1a33 | ||
|
e06e6646a9 | ||
|
8d4b6c804f | ||
|
6f7a389a1f | ||
|
fe15647f98 | ||
|
b0b7cd2b1e | ||
|
61da7f609d | ||
|
f9cd8be19e | ||
|
0a38312527 | ||
|
dab262e4f7 | ||
|
8ab7c590fd | ||
|
e3090930c0 | ||
|
e2d1387df8 | ||
|
fd65ed95eb | ||
|
597e3b0e35 | ||
|
b31d868a27 | ||
|
b6919d19a7 | ||
|
eac6a485fa | ||
|
6045597ad0 | ||
|
1c1140d517 | ||
|
38b92ad592 | ||
|
b08c7403f6 | ||
|
a9d578f3ab | ||
|
f04ee15f94 | ||
|
f0e6a435f8 | ||
|
648bf75b0b | ||
|
665907355e | ||
|
3b75418ea9 | ||
|
f2e8317412 | ||
|
cc042a4098 | ||
|
bd18b03cc3 | ||
|
e7f8b40d93 | ||
|
ec2a35def9 | ||
|
29c15dc9c4 | ||
|
031d24e695 | ||
|
645ca62c91 | ||
|
7425446b15 | ||
|
db5db20800 | ||
|
a7548db5ec | ||
|
e62d122c9b | ||
|
e8cbfb4a72 | ||
|
51e621cdf1 | ||
|
83d55da2d5 | ||
|
afea903332 | ||
|
36f5dd87fe | ||
|
e217bc2c23 | ||
|
57ecb5e530 | ||
|
c7a81d1493 | ||
|
914b680aed | ||
|
c94fd8e83b | ||
|
d567d67caa | ||
|
7842520848 | ||
|
d105b91438 | ||
|
cf51770c2c | ||
|
2c6d84bac9 | ||
|
39e6f95c2b | ||
|
5a2bc88d20 | ||
|
e04e026a14 | ||
|
c6845a4c8d | ||
|
f5f9e3dfdb | ||
|
6c7ac35054 | ||
|
1be3778731 | ||
|
caef3c5990 | ||
|
2c1d858a2d | ||
|
5a6d98a2e2 | ||
|
d79ff077b7 | ||
|
f506ae6cea | ||
|
0468577b0b | ||
|
4e51544402 | ||
|
144d4d629b | ||
|
0bdbbd2667 | ||
|
7fbd253d3f | ||
|
b001eb43f8 | ||
|
7afc557ba8 | ||
|
7cae30076a | ||
|
3c3bdc08ad | ||
|
100d4459cd | ||
|
664a58b742 | ||
|
c397fb5f4b | ||
|
22fef445ff | ||
|
e5b41bcd66 | ||
|
1912f52bee | ||
|
900a1efaca | ||
|
d288fb360c | ||
|
e44d38bb02 | ||
|
63a4ccef16 | ||
|
0d5887b938 | ||
|
a43b7432a0 | ||
|
af94d24963 | ||
|
247f91a296 | ||
|
b20320236d | ||
|
9c2916f3fc | ||
|
e8d56dc013 | ||
|
621d6de6ce | ||
|
24973ffca0 | ||
|
4b68235389 | ||
|
9dc5793046 | ||
|
1271c9c03f | ||
|
ebec982434 | ||
|
2f1171ca76 | ||
|
11752b2562 | ||
|
88f99e9525 | ||
|
43154cf738 | ||
|
d65cc61df5 | ||
|
547d916ae2 | ||
|
2c94e358b8 | ||
|
ede5e1c311 | ||
|
61e3442ef8 | ||
|
4e7a536918 | ||
|
ecf546bc4a | ||
|
b274b8cc21 | ||
|
a08d3ebd42 | ||
|
62cdd8e993 | ||
|
64dc5e1667 | ||
|
c1e168fdbe | ||
|
6e93787e59 | ||
|
e0becb1599 | ||
|
d13c5f2986 | ||
|
1fd5a95f5a | ||
|
9f1f1ecaa3 | ||
|
c9e6518e05 | ||
|
69f9ae778d | ||
|
970cbf066a | ||
|
1c100c4cf5 | ||
|
cb7d6b964c | ||
|
8cf833b7eb | ||
|
8fe373891f | ||
|
3cacb4f80f | ||
|
65ca0fff3a | ||
|
d47cccc157 | ||
|
d69a64c32c | ||
|
b9cda8fca7 | ||
|
1136c664f1 | ||
|
1243848963 | ||
|
dce5e79f65 | ||
|
afa9e4003c | ||
|
1b6d9bd3a5 | ||
|
88ed687557 | ||
|
ebff870d54 | ||
|
d802defd6c | ||
|
ff4c24657e | ||
|
e1b666cb93 | ||
|
da834a28df | ||
|
77002dd06a | ||
|
76481bc794 | ||
|
5a5f1ad3fe | ||
|
2277e3c878 | ||
|
1bd25acb0b | ||
|
d297351211 | ||
|
2fd93e1d89 | ||
|
a5ab68d9df | ||
|
1b5b7a3cba | ||
|
6dd6a88c12 | ||
|
09833fdfb1 | ||
|
eacdb63454 | ||
|
1c781c78c0 | ||
|
b7e5a83ba3 | ||
|
3179541efe | ||
|
bda77760c8 | ||
|
140124b423 | ||
|
703d3faf27 | ||
|
a2342cfd82 | ||
|
5fbace9fa0 | ||
|
cf2a0a5f39 | ||
|
212ee21b54 | ||
|
71e8a30186 | ||
|
8a525039af | ||
|
836c777680 | ||
|
a3472da181 | ||
|
a06812f93f | ||
|
54dd2cfe1d | ||
|
01d183c6a9 | ||
|
3eb2b71955 | ||
|
b23a9c76e7 | ||
|
0ce51df108 | ||
|
d48f1c1c5f | ||
|
a73f656d06 | ||
|
4cf596338d | ||
|
e3e2609f7e | ||
|
3750c672a7 | ||
|
f7887d8720 | ||
|
7e3a2fdb0f | ||
|
7aa93a735d | ||
|
ad92c09f5b | ||
|
91c22a2592 | ||
|
d3c65edcf5 | ||
|
d762d401cd | ||
|
29df0da8b5 | ||
|
d7e2a6b3b1 | ||
|
a9e3104ce1 | ||
|
f93303bac7 | ||
|
71acf4eadb | ||
|
d8796efd89 | ||
|
b1e82223ee | ||
|
039f5eb733 | ||
|
d14e05e748 | ||
|
76eb79d7fa | ||
|
4abfca4cb2 | ||
|
e82d5fe48f | ||
|
67daa6e616 | ||
|
427146d59b | ||
|
885bb58ca9 | ||
|
3324b32a29 | ||
|
5edb9098ce | ||
|
e7bc436b52 | ||
|
be499d6f38 | ||
|
b8b3405efe | ||
|
0e38c39b62 | ||
|
bde789b320 | ||
|
d7737542e4 | ||
|
0e2de4f13a | ||
|
fb6106267c | ||
|
5825b7c15a | ||
|
e6eda67d0c | ||
|
a9ec1c08a7 | ||
|
9811740ead | ||
|
71ce58500a | ||
|
30d9d3e956 | ||
|
b741807f8c | ||
|
30a0d34151 | ||
|
59232642c4 | ||
|
38f166f090 | ||
|
3784da0d2f | ||
|
189cec3ca2 | ||
|
f0dc04ad04 | ||
|
092ff42ab4 | ||
|
4477cbeed1 | ||
|
7c4b3cab29 | ||
|
d4cb3b8410 | ||
|
d4addc65ab | ||
|
2a7a4df342 | ||
|
41d7e6c63a | ||
|
28ab94e925 | ||
|
a48a78088c | ||
|
64d47f3637 | ||
|
b995843a5b | ||
|
a671e1a05d | ||
|
d25fa21a59 | ||
|
18762dffce | ||
|
1501360f4b | ||
|
27a65fdee7 | ||
|
808bddb586 | ||
|
9656779d48 | ||
|
bd70afa688 | ||
|
c2721fd545 | ||
|
d1892bd6ec | ||
|
9cd8dec397 | ||
|
8a550b625c | ||
|
ad633aa639 | ||
|
d64bc880b2 | ||
|
007b10e79c | ||
|
620ec78cc0 | ||
|
d219e29f0c | ||
|
009e897cab | ||
|
956f3d1880 | ||
|
fe862f64b1 | ||
|
5423958366 | ||
|
e248f744d5 | ||
|
67a76155b1 | ||
|
6edf38548f | ||
|
0b23d8e935 | ||
|
66e253ab95 | ||
|
456f1df332 | ||
|
0d62129d32 | ||
|
bf72800676 | ||
|
af6d9e7a8b | ||
|
5d90439a51 | ||
|
b529f538b8 | ||
|
73d249fc56 | ||
|
78345d8bdf | ||
|
58a2140b92 | ||
|
e380ded5e1 | ||
|
dd12d3f848 | ||
|
668cfb5da4 | ||
|
e2b5799a01 | ||
|
1cd833d2c4 | ||
|
4a4498cd53 | ||
|
549629440d | ||
|
e324c9a90a | ||
|
d94dc37cfe | ||
|
7102ca5394 | ||
|
48b2238a1f | ||
|
b8682f56ac | ||
|
28e26f3130 | ||
|
ca1d97a3c2 | ||
|
54bddeee9c | ||
|
51673665b5 | ||
|
cfed87f416 | ||
|
efb70b21df | ||
|
5a45dcdc02 | ||
|
764b3e194c | ||
|
1e6ad235c9 | ||
|
29aba92bf1 | ||
|
d3b9c43dd8 | ||
|
fe14a9933d | ||
|
8e82ee00a0 | ||
|
bbcb50ba81 | ||
|
04249c3ee9 | ||
|
a45261b530 | ||
|
18c1dc2f0e | ||
|
eb5e21d803 | ||
|
132260d84c | ||
|
d84241aad0 | ||
|
9f34b2944c | ||
|
b8cfc0525e | ||
|
9b04f3d9d6 | ||
|
feca2db24e | ||
|
73d102afed | ||
|
142cbb8f3b | ||
|
83d80c061f | ||
|
4e8293c3e3 | ||
|
3d7baf1640 | ||
|
6652e2f0e9 | ||
|
7a0b4d7895 | ||
|
12704f9929 | ||
|
6f09fb8ba5 | ||
|
182811697e | ||
|
95287980af | ||
|
f98e26e0f2 | ||
|
0dfdf6cd4d | ||
|
5192846c72 | ||
|
18172fa611 | ||
|
ca46b0a0be | ||
|
170c5d0c8a | ||
|
a46c32ed4b | ||
|
a25076b9c1 | ||
|
81bee3ac45 | ||
|
3d052e9428 | ||
|
05bc970900 | ||
|
aeeb3ca616 | ||
|
c06efbf480 | ||
|
134cbcfc5a | ||
|
0d91f8771e | ||
|
c8d705158c | ||
|
105a24d65e | ||
|
d42baf34ff | ||
|
af1cecae07 | ||
|
3e014c8285 | ||
|
b52ace5142 | ||
|
fe5112b6c1 | ||
|
377466ed1c | ||
|
33a83cc733 | ||
|
d3c3d551ff | ||
|
c162ce526f | ||
|
53c8cddccf | ||
|
a801a16062 | ||
|
90b50277d0 | ||
|
e26cfab04d | ||
|
5392395fba | ||
|
6674e8c44a | ||
|
f3ba27b138 | ||
|
12b8b4af24 | ||
|
28eb3cfdb2 | ||
|
26e94dde44 | ||
|
74a45e2772 | ||
|
d3d82de29e | ||
|
d91d56d126 | ||
|
06a6857508 | ||
|
e494860f38 | ||
|
2a27bfbfd5 | ||
|
6caf674163 | ||
|
97b2709132 | ||
|
7aea9357f3 | ||
|
b00dea6d05 | ||
|
1472375fb5 | ||
|
bc3fa0bb23 | ||
|
a60bfc5e01 | ||
|
640c98a4e8 | ||
|
221417b8b4 | ||
|
eef3dda81d | ||
|
646371f9e9 | ||
|
4336dcb2a8 | ||
|
36d95fd892 | ||
|
3c43308e71 | ||
|
f12be56d7c | ||
|
4976a8c4f2 | ||
|
c2b3aaa7d1 | ||
|
7961cf7474 | ||
|
7202bdf977 | ||
|
9098495153 | ||
|
ac454eee79 | ||
|
cc259f60f6 | ||
|
90fa9eee7d | ||
|
4e4aa33a50 | ||
|
0debdd3fe7 | ||
|
5f487c899b | ||
|
b1eef0fb7c | ||
|
5b8e58e15e | ||
|
b5af2065ee | ||
|
7ae5da914e | ||
|
9935768280 | ||
|
dd984ceb45 | ||
|
bf8f249754 | ||
|
d7948a1ce6 | ||
|
e4d9fd8a00 | ||
|
e7aa93614b | ||
|
43ad188ccb | ||
|
7b8fe11e6a | ||
|
deed327890 | ||
|
b3aaac4903 | ||
|
c35434bd8f | ||
|
bd433b55b0 | ||
|
7b921387bc | ||
|
07f6667b66 | ||
|
16f1f24a62 | ||
|
80f62e8a41 | ||
|
26f553b75b | ||
|
591522f15a | ||
|
f52f1b1df0 | ||
|
69940feac0 | ||
|
74227f8cdf | ||
|
91d4419fae | ||
|
6525dff158 | ||
|
dc322894cd | ||
|
fd0d908a68 | ||
|
3b7da61245 | ||
|
ee46bc8a48 | ||
|
94ad34768b | ||
|
12f6930486 | ||
|
bb1ceffadd | ||
|
79fd92a1b4 | ||
|
160ff94a22 | ||
|
3d19588e57 | ||
|
b6596494ff | ||
|
8b9d5e772e | ||
|
8030750b07 | ||
|
1a65f89575 | ||
|
e41a16cb58 | ||
|
1326538e5b | ||
|
30a39afbf6 | ||
|
432d694455 | ||
|
d42171d2ed | ||
|
f4c52a41fa | ||
|
90a9325d6d | ||
|
0920d1e745 | ||
|
e1e4c8bc03 | ||
|
a9f91b7acd | ||
|
de72a23fc6 | ||
|
0541433028 | ||
|
97bf6d8046 | ||
|
10182ebb7e | ||
|
951c516615 | ||
|
66ff74eb97 | ||
|
0f05efbcf4 | ||
|
bbd65a19b5 | ||
|
d38d512d42 | ||
|
0c2dc580d8 | ||
|
87888a9a93 | ||
|
c23c0fbe4c | ||
|
eda784da40 | ||
|
4683def6dd | ||
|
03a1a4b9f1 | ||
|
3c7754dcf3 | ||
|
bc091d52c4 | ||
|
005a86f551 | ||
|
0c60680e9a | ||
|
62cfeef61d | ||
|
5d1071bc9a | ||
|
f6d210b67a | ||
|
8add5efe66 | ||
|
387fe55610 | ||
|
ac8104e27e | ||
|
3751c895fc | ||
|
3ec653cf38 | ||
|
d0186344e8 | ||
|
aa53a5a70f | ||
|
ff75cc9874 | ||
|
fff3fb18fb | ||
|
fa2d84831e | ||
|
99ca04ce8c | ||
|
e6cb148ce1 | ||
|
a30e9fbcbc | ||
|
81391bc084 | ||
|
90e4a800d5 | ||
|
1332fd1b67 | ||
|
7b2f96bc6d | ||
|
79fcdf7a28 | ||
|
28ee19779c | ||
|
3f5382a317 | ||
|
f968dbccd3 | ||
|
6514a3b782 | ||
|
0b2f2a27ac | ||
|
39a8579c27 | ||
|
c1e25135fd | ||
|
02e093d407 | ||
|
2d48fe13c8 | ||
|
994cc25ec4 | ||
|
147f70fa3e | ||
|
eee74a25cd | ||
|
4de75fad31 | ||
|
8f0198149a | ||
|
938b66f232 | ||
|
79fdde3c0c | ||
|
6320cf778c | ||
|
79cbea0a13 | ||
|
40e3e0105a | ||
|
a6c489b553 | ||
|
ebea51a8a8 | ||
|
97041e8e82 | ||
|
82a9aee417 | ||
|
f499ce6c95 | ||
|
c0fdee5787 | ||
|
0909ad3201 | ||
|
e1b36b78e3 | ||
|
8fa78ef059 | ||
|
96a03a3edf | ||
|
8c0c4e2b6e | ||
|
5747d12530 | ||
|
3c8469008f | ||
|
7ff5c082bc | ||
|
1431ef9bc0 | ||
|
20ae993e51 | ||
|
859877a00d | ||
|
50f5bdcfe3 | ||
|
41d7d8e7d5 | ||
|
b8022822bb | ||
|
9abccbfda5 | ||
|
124acfbf12 | ||
|
6febdf7399 | ||
|
2cbc1c4161 | ||
|
676a13f30d | ||
|
17424ad554 | ||
|
fb46c65d25 | ||
|
6a9a279a32 | ||
|
72e06ef4fe | ||
|
7587876820 | ||
|
a953fe4e1e | ||
|
f3f62e9e7b |
2134 changed files with 2037122 additions and 1119062 deletions
8
.clang-format
Normal file
8
.clang-format
Normal file
|
@ -0,0 +1,8 @@
|
|||
Language: Cpp
|
||||
BasedOnStyle: LLVM
|
||||
IndentWidth: 2
|
||||
TabWidth: 2
|
||||
ColumnLimit: 0
|
||||
UseTab: Never
|
||||
BreakBeforeBraces: Attach
|
||||
AlwaysBreakTemplateDeclarations: true
|
|
@ -1,3 +0,0 @@
|
|||
ignore:
|
||||
- "./examples/*"
|
||||
- "./tests/*"
|
175
.github/workflows/ci.yml
vendored
Normal file
175
.github/workflows/ci.yml
vendored
Normal file
|
@ -0,0 +1,175 @@
|
|||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
name: CI
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-toolchain-
|
||||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-thirdparty-
|
||||
|
||||
- name: Install Dependencies
|
||||
if: steps.cache-toolchain.outputs.cache-hit != 'true' || steps.cache-thirdparty.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
sudo bash ./ci/install_dependencies.sh
|
||||
|
||||
- name: Setup Toolchain
|
||||
if: steps.cache-toolchain.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
TOOLDIR=$PWD/tools
|
||||
mkdir -p build
|
||||
cd build
|
||||
../configure --tooldir=$TOOLDIR
|
||||
ci/toolchain_install.sh --all
|
||||
|
||||
- name: Setup Third Party
|
||||
if: steps.cache-thirdparty.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
make -C third_party > /dev/null
|
||||
|
||||
build:
|
||||
runs-on: ubuntu-22.04
|
||||
needs: setup
|
||||
strategy:
|
||||
matrix:
|
||||
xlen: [32, 64]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
sudo bash ./ci/install_dependencies.sh
|
||||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-toolchain-
|
||||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-thirdparty-
|
||||
|
||||
- name: Run Build
|
||||
run: |
|
||||
TOOLDIR=$PWD/tools
|
||||
mkdir -p build${{ matrix.xlen }}
|
||||
cd build${{ matrix.xlen }}
|
||||
../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }}
|
||||
source ci/toolchain_env.sh
|
||||
make software -s > /dev/null
|
||||
make tests -s > /dev/null
|
||||
|
||||
- name: Upload Build Artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ matrix.xlen }}
|
||||
path: build${{ matrix.xlen }}
|
||||
|
||||
tests:
|
||||
runs-on: ubuntu-22.04
|
||||
needs: build
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
name: [regression, opencl, cache, config1, config2, debug, scope, stress, synthesis, vm, vector]
|
||||
xlen: [32, 64]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
sudo bash ./ci/install_dependencies.sh
|
||||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-toolchain-
|
||||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
restore-keys: |
|
||||
${{ runner.os }}-thirdparty-
|
||||
|
||||
- name: Download Build Artifact
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: build-${{ matrix.xlen }}
|
||||
path: build${{ matrix.xlen }}
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
cd build${{ matrix.xlen }}
|
||||
source ci/toolchain_env.sh
|
||||
chmod -R +x . # Ensure all files have executable permissions
|
||||
if [ "${{ matrix.name }}" == "regression" ]; then
|
||||
./ci/regression.sh --unittest
|
||||
./ci/regression.sh --isa
|
||||
./ci/regression.sh --kernel
|
||||
./ci/regression.sh --regression
|
||||
else
|
||||
./ci/regression.sh --${{ matrix.name }}
|
||||
fi
|
||||
|
||||
complete:
|
||||
runs-on: ubuntu-22.04
|
||||
needs: tests
|
||||
|
||||
steps:
|
||||
- name: Check Completion
|
||||
run: echo "All matrix jobs passed"
|
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
/build*
|
||||
/.vscode
|
||||
*.cache
|
||||
*.code-workspace
|
12
.gitmodules
vendored
12
.gitmodules
vendored
|
@ -1,3 +1,9 @@
|
|||
[submodule "hw/rtl/fp_cores/fpnew"]
|
||||
path = hw/rtl/fp_cores/fpnew
|
||||
url = https://github.com/pulp-platform/fpnew.git
|
||||
[submodule "third_party/softfloat"]
|
||||
path = third_party/softfloat
|
||||
url = https://github.com/ucb-bar/berkeley-softfloat-3.git
|
||||
[submodule "third_party/ramulator"]
|
||||
path = third_party/ramulator
|
||||
url = https://github.com/CMU-SAFARI/ramulator2.git
|
||||
[submodule "third_party/cvfpu"]
|
||||
path = third_party/cvfpu
|
||||
url = https://github.com/openhwgroup/cvfpu.git
|
||||
|
|
58
.travis.yml
58
.travis.yml
|
@ -1,58 +0,0 @@
|
|||
language: cpp
|
||||
dist: bionic
|
||||
os: linux
|
||||
compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- build-essential
|
||||
- valgrind
|
||||
- verilator
|
||||
- yosys
|
||||
|
||||
install:
|
||||
# Set environments
|
||||
- export RISCV_TOOLCHAIN_PATH=/opt/riscv-gnu-toolchain
|
||||
- export VERILATOR_ROOT=/opt/verilator
|
||||
- export PATH=$VERILATOR_ROOT/bin:$PATH
|
||||
# Install toolchain
|
||||
- ci/toolchain_install.sh -all
|
||||
# clone build directory
|
||||
- make -s
|
||||
|
||||
# stages ordering
|
||||
stages:
|
||||
- test
|
||||
|
||||
jobs:
|
||||
include:
|
||||
- stage: test
|
||||
name: coverage
|
||||
script: cp -r $PWD ../build1 && cd ../build1 && ./ci/regression.sh -coverage
|
||||
- stage: test
|
||||
name: cluster
|
||||
script: cp -r $PWD ../build2 && cd ../build2 && ./ci/regression.sh -cluster
|
||||
- stage: test
|
||||
name: debug
|
||||
script: cp -r $PWD ../build3 && cd ../build3 && ./ci/regression.sh -debug
|
||||
- stage: test
|
||||
name: config
|
||||
script: cp -r $PWD ../build4 && cd ../build4 && ./ci/regression.sh -config
|
||||
- stage: test
|
||||
name: stress
|
||||
script: cp -r $PWD ../build5 && cd ../build5 && ./ci/regression.sh -stress
|
||||
- stage: test
|
||||
name: compiler
|
||||
script: cp -r $PWD ../build6 && cd ../build6 && ./ci/test_compiler.sh
|
||||
|
||||
after_success:
|
||||
# Gather code coverage
|
||||
- lcov --directory driver --capture --output-file driver.cov # capture trace
|
||||
- lcov --directory simx --capture --output-file simx.cov # capture trace
|
||||
- lcov --list driver.cov # output coverage data for debugging
|
||||
- lcov --list simx.cov # output coverage data for debugging
|
||||
# Upload coverage report
|
||||
- bash <(curl -s https://codecov.io/bash) -f driver.cov
|
||||
- bash <(curl -s https://codecov.io/bash) -f simx.cov
|
20
Dockerfile.dev
Normal file
20
Dockerfile.dev
Normal file
|
@ -0,0 +1,20 @@
|
|||
FROM ubuntu:20.04
|
||||
|
||||
LABEL "Udit Subramanya"="usubramanya3@gatech.edu"
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential valgrind git wget libpng-dev libboost-all-dev uuid-dev ccache cmake
|
||||
|
||||
# Third-Party Repository to Install g++11 on Ubuntu 18.04
|
||||
RUN apt-get install -y manpages-dev software-properties-common
|
||||
RUN add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
|
||||
RUN apt-get install -y gcc-11 g++-11
|
||||
|
||||
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 11
|
||||
RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 11
|
||||
|
||||
# create a directory for mounting the volume
|
||||
WORKDIR /root/vortex
|
221
LICENSE
221
LICENSE
|
@ -1,24 +1,201 @@
|
|||
Copyright (c) <2020>, <Georgia Institute of Technology>
|
||||
All rights reserved.
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Georgia Institute of Technology nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
|
14
Makefile
14
Makefile
|
@ -1,14 +0,0 @@
|
|||
|
||||
all:
|
||||
$(MAKE) -C hw
|
||||
$(MAKE) -C driver
|
||||
$(MAKE) -C runtime
|
||||
$(MAKE) -C simX
|
||||
$(MAKE) -C tests
|
||||
|
||||
clean:
|
||||
$(MAKE) -C hw clean
|
||||
$(MAKE) -C driver clean
|
||||
$(MAKE) -C simX clean
|
||||
$(MAKE) -C runtime clean
|
||||
$(MAKE) -C tests clean
|
74
Makefile.in
Normal file
74
Makefile.in
Normal file
|
@ -0,0 +1,74 @@
|
|||
include config.mk
|
||||
|
||||
.PHONY: build software tests
|
||||
|
||||
all:
|
||||
$(MAKE) -C $(VORTEX_HOME)/third_party
|
||||
$(MAKE) -C hw
|
||||
$(MAKE) -C sim
|
||||
$(MAKE) -C kernel
|
||||
$(MAKE) -C runtime
|
||||
$(MAKE) -C tests
|
||||
|
||||
build:
|
||||
$(MAKE) -C hw
|
||||
$(MAKE) -C sim
|
||||
$(MAKE) -C kernel
|
||||
$(MAKE) -C runtime
|
||||
$(MAKE) -C tests
|
||||
|
||||
software:
|
||||
$(MAKE) -C hw
|
||||
$(MAKE) -C kernel
|
||||
$(MAKE) -C runtime/stub
|
||||
|
||||
tests:
|
||||
$(MAKE) -C tests
|
||||
|
||||
clean-build:
|
||||
$(MAKE) -C hw clean
|
||||
$(MAKE) -C sim clean
|
||||
$(MAKE) -C kernel clean
|
||||
$(MAKE) -C runtime clean
|
||||
$(MAKE) -C tests clean
|
||||
|
||||
clean: clean-build
|
||||
$(MAKE) -C $(VORTEX_HOME)/third_party clean
|
||||
|
||||
# Install setup
|
||||
KERNEL_INC_DST = $(INSTALLDIR)/kernel/include
|
||||
KERNEL_LIB_DST = $(INSTALLDIR)/kernel/lib$(XLEN)
|
||||
RUNTIME_INC_DST = $(INSTALLDIR)/runtime/include
|
||||
RUNTIME_LIB_DST = $(INSTALLDIR)/runtime/lib
|
||||
|
||||
KERNEL_HEADERS = $(wildcard $(VORTEX_HOME)/kernel/include/*.h)
|
||||
KERNEL_LIBS = $(wildcard kernel/*.a)
|
||||
RUNTIME_HEADERS = $(wildcard $(VORTEX_HOME)/runtime/include/*.h)
|
||||
RUNTIME_LIBS = $(wildcard runtime/*.so)
|
||||
|
||||
INSTALL_DIRS = $(KERNEL_LIB_DST) $(RUNTIME_LIB_DST) $(KERNEL_INC_DST) $(RUNTIME_INC_DST)
|
||||
|
||||
$(INSTALL_DIRS):
|
||||
mkdir -p $@
|
||||
|
||||
$(KERNEL_INC_DST)/VX_types.h: hw/VX_types.h | $(KERNEL_INC_DST)
|
||||
cp $< $@
|
||||
|
||||
$(KERNEL_INC_DST)/%.h: $(VORTEX_HOME)/kernel/include/%.h | $(KERNEL_INC_DST)
|
||||
cp $< $@
|
||||
|
||||
$(RUNTIME_INC_DST)/%.h: $(VORTEX_HOME)/runtime/include/%.h | $(RUNTIME_INC_DST)
|
||||
cp $< $@
|
||||
|
||||
$(KERNEL_LIB_DST)/%.a: kernel/%.a | $(KERNEL_LIB_DST)
|
||||
cp $< $@
|
||||
|
||||
$(RUNTIME_LIB_DST)/%.so: runtime/%.so | $(RUNTIME_LIB_DST)
|
||||
cp $< $@
|
||||
|
||||
install: $(INSTALL_DIRS) \
|
||||
$(KERNEL_INC_DST)/VX_types.h \
|
||||
$(KERNEL_HEADERS:$(VORTEX_HOME)/kernel/include/%=$(KERNEL_INC_DST)/%) \
|
||||
$(RUNTIME_HEADERS:$(VORTEX_HOME)/runtime/include/%=$(RUNTIME_INC_DST)/%) \
|
||||
$(KERNEL_LIBS:kernel/%=$(KERNEL_LIB_DST)/%) \
|
||||
$(RUNTIME_LIBS:runtime/%=$(RUNTIME_LIB_DST)/%)
|
177
README.md
177
README.md
|
@ -1,75 +1,134 @@
|
|||
[](https://travis-ci.com/vortexgpgpu/vortex)
|
||||
[](https://codecov.io/gh/vortexgpgpu/vortex)
|
||||
# Vortex GPGPU
|
||||
|
||||
# Vortex RISC-V GPGPU
|
||||
Vortex is a full-stack open-source RISC-V GPGPU. Vortex supports multiple **backend drivers**, including our C++ simulator (simx), an RTL simulator, and physical Xilinx and Altera FPGAs-- all controlled by a single driver script. The chosen driver determines the corresponding code invoked to run Vortex. Generally, developers will prototype their intended design in simx, before completing going forward with an RTL implementation. Alternatively, you can get up and running by selecting a driver of your choice and running a demo program.
|
||||
|
||||
Vortex is a full-system RISCV-based GPGPU processor.
|
||||
## Website
|
||||
Vortex news can be found on its [website](https://vortex.cc.gatech.edu/)
|
||||
|
||||
## Citation
|
||||
```
|
||||
@inproceedings{10.1145/3466752.3480128,
|
||||
author = {Tine, Blaise and Yalamarthy, Krishna Praveen and Elsabbagh, Fares and Hyesoon, Kim},
|
||||
title = {Vortex: Extending the RISC-V ISA for GPGPU and 3D-Graphics},
|
||||
year = {2021},
|
||||
isbn = {9781450385572},
|
||||
publisher = {Association for Computing Machinery},
|
||||
address = {New York, NY, USA},
|
||||
url = {https://doi.org/10.1145/3466752.3480128},
|
||||
doi = {10.1145/3466752.3480128},
|
||||
abstract = {The importance of open-source hardware and software has been increasing. However, despite GPUs being one of the more popular accelerators across various applications, there is very little open-source GPU infrastructure in the public domain. We argue that one of the reasons for the lack of open-source infrastructure for GPUs is rooted in the complexity of their ISA and software stacks. In this work, we first propose an ISA extension to RISC-V that supports GPGPUs and graphics. The main goal of the ISA extension proposal is to minimize the ISA changes so that the corresponding changes to the open-source ecosystem are also minimal, which makes for a sustainable development ecosystem. To demonstrate the feasibility of the minimally extended RISC-V ISA, we implemented the complete software and hardware stacks of Vortex on FPGA. Vortex is a PCIe-based soft GPU that supports OpenCL and OpenGL. Vortex can be used in a variety of applications, including machine learning, graph analytics, and graphics rendering. Vortex can scale up to 32 cores on an Altera Stratix 10 FPGA, delivering a peak performance of 25.6 GFlops at 200 Mhz.},
|
||||
booktitle = {MICRO-54: 54th Annual IEEE/ACM International Symposium on Microarchitecture},
|
||||
pages = {754–766},
|
||||
numpages = {13},
|
||||
keywords = {reconfigurable computing, memory systems., computer graphics},
|
||||
location = {Virtual Event, Greece},
|
||||
series = {MICRO '21}
|
||||
}
|
||||
```
|
||||
|
||||
## Specifications
|
||||
|
||||
- Support RISC-V RV32IMF ISA
|
||||
- Scalability: 1 to 32 cores with optional L2 and L3 caches
|
||||
- Software: OpenCL 1.2 Support
|
||||
- Supported FPGAs:
|
||||
- Intel Arria 10
|
||||
- Intel Stratix 10
|
||||
- Support RISC-V RV32IMAF and RV64IMAFD
|
||||
|
||||
- Microarchitecture:
|
||||
- configurable number of cores, warps, and threads.
|
||||
- configurable number of ALU, FPU, LSU, and SFU units per core.
|
||||
- configurable pipeline issue width.
|
||||
- optional local memory, L1, L2, and L3 caches.
|
||||
- Software:
|
||||
- OpenCL 1.2 Support.
|
||||
- Supported FPGAs:
|
||||
- Altera Arria 10
|
||||
- Altera Stratix 10
|
||||
- Xilinx Alveo U50, U250, U280
|
||||
- Xilinx Versal VCK5000
|
||||
|
||||
## Directory structure
|
||||
|
||||
- `doc`: [Documentation](doc/Vortex.md).
|
||||
|
||||
- `doc`: [Documentation](docs/index.md).
|
||||
- `hw`: Hardware sources.
|
||||
|
||||
- `driver`: Host driver software.
|
||||
|
||||
- `driver`: Host drivers repository.
|
||||
- `runtime`: Kernel Runtime software.
|
||||
|
||||
- `simX`: Cycle-approximate simulator.
|
||||
|
||||
- `sim`: Simulators repository.
|
||||
- `tests`: Tests repository.
|
||||
|
||||
- `ci`: Continuous integration scripts.
|
||||
|
||||
- `miscs`: Miscellaneous resources.
|
||||
|
||||
## Basic Installation
|
||||
## Quick Start
|
||||
If you are interested in a stable release of Vortex, you can download the latest release [here](https://github.com/vortexgpgpu/vortex/releases/latest). Otherwise, you can pull the most recent, but (potentially) unstable version as shown below. The following steps demonstrate how to build and run Vortex with the default driver: SimX. If you are interested in a different backend, look [here](docs/simulation.md).
|
||||
|
||||
### Install development tools
|
||||
### Supported OS Platforms
|
||||
- Ubuntu 18.04, 20.04, 22.04, 24.04
|
||||
- Centos 7
|
||||
### Toolchain Dependencies
|
||||
The following dependencies will be fetched prebuilt by `toolchain_install.sh`.
|
||||
- [POCL](http://portablecl.org/)
|
||||
- [LLVM](https://llvm.org/)
|
||||
- [RISCV-GNU-TOOLCHAIN](https://github.com/riscv-collab/riscv-gnu-toolchain)
|
||||
- [Verilator](https://www.veripool.org/verilator)
|
||||
- [cvfpu](https://github.com/openhwgroup/cvfpu.git)
|
||||
- [SoftFloat](https://github.com/ucb-bar/berkeley-softfloat-3.git)
|
||||
- [Ramulator](https://github.com/CMU-SAFARI/ramulator.git)
|
||||
- [Yosys](https://github.com/YosysHQ/yosys)
|
||||
- [Sv2v](https://github.com/zachjs/sv2v)
|
||||
### Install Vortex codebase
|
||||
```sh
|
||||
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
cd vortex
|
||||
```
|
||||
### Install system dependencies
|
||||
```sh
|
||||
# ensure dependent libraries are present
|
||||
sudo ./ci/install_dependencies.sh
|
||||
```
|
||||
### Configure your build folder
|
||||
```sh
|
||||
mkdir build
|
||||
cd build
|
||||
# for 32bit
|
||||
../configure --xlen=32 --tooldir=$HOME/tools
|
||||
# for 64bit
|
||||
../configure --xlen=64 --tooldir=$HOME/tools
|
||||
```
|
||||
### Install prebuilt toolchain
|
||||
```sh
|
||||
./ci/toolchain_install.sh --all
|
||||
```
|
||||
### set environment variables
|
||||
```sh
|
||||
# should always run before using the toolchain!
|
||||
source ./ci/toolchain_env.sh
|
||||
```
|
||||
### Building Vortex
|
||||
```sh
|
||||
make -s
|
||||
```
|
||||
### Quick demo running vecadd OpenCL kernel on 2 cores
|
||||
```sh
|
||||
./ci/blackbox.sh --cores=2 --app=vecadd
|
||||
```
|
||||
|
||||
$ sudo apt-get install build-essential
|
||||
$ sudo apt-get install git
|
||||
|
||||
### Install gnu-riscv-tools
|
||||
|
||||
$ export RISCV_TOOLCHAIN_PATH=/opt/riscv-gnu-toolchain
|
||||
|
||||
$ sudo apt-get -y install \
|
||||
binutils build-essential libtool texinfo \
|
||||
gzip zip unzip patchutils curl git \
|
||||
make cmake ninja-build automake bison flex gperf \
|
||||
grep sed gawk python bc \
|
||||
zlib1g-dev libexpat1-dev libmpc-dev \
|
||||
libglib2.0-dev libfdt-dev libpixman-1-dev
|
||||
$ git clone https://github.com/riscv/riscv-gnu-toolchain
|
||||
$ cd riscv-gnu-toolchain
|
||||
$ git submodule update --init --recursive
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ ../configure --prefix=$RISCV_TOOLCHAIN_PATH --with-arch=rv32im --with-abi=ilp32
|
||||
$ make -j`nproc`
|
||||
$ make -j`nproc` build-qemu
|
||||
|
||||
### Install Verilator
|
||||
|
||||
You need into build the latest version using the instructions on their website
|
||||
$ https://www.veripool.org/projects/verilator/wiki/Installing
|
||||
|
||||
### Install Vortex
|
||||
|
||||
$ git clone --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
$ cd Vortex
|
||||
$ make
|
||||
|
||||
### Quick Test running OpenCL vecadd sample on 2 cores
|
||||
|
||||
$ ./ci/blackbox.sh --cores=2 --app=vecadd
|
||||
### Common Developer Tips
|
||||
- Installing Vortex kernel and runtime libraries to use with external tools requires passing --prefix=<install-path> to the configure script.
|
||||
```sh
|
||||
../configure --xlen=32 --tooldir=$HOME/tools --prefix=<install-path>
|
||||
make -s
|
||||
make install
|
||||
```
|
||||
- Building Vortex 64-bit requires setting --xlen=64 configure option.
|
||||
```sh
|
||||
../configure --xlen=64 --tooldir=$HOME/tools
|
||||
```
|
||||
- Sourcing "./ci/toolchain_env.sh" is required everytime you start a new terminal. we recommend adding "source <build-path>/ci/toolchain_env.sh" to your ~/.bashrc file to automate the process at login.
|
||||
```sh
|
||||
echo "source <build-path>/ci/toolchain_env.sh" >> ~/.bashrc
|
||||
```
|
||||
- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again without any options to get changes propagated to your build folder.
|
||||
```sh
|
||||
../configure
|
||||
```
|
||||
- To debug the GPU, the simulation can generate a runtime trace for analysis. See /docs/debugging.md for more information.
|
||||
```sh
|
||||
./ci/blackbox.sh --app=demo --debug=3
|
||||
```
|
||||
- For additional information, check out the [documentation](docs/index.md)
|
||||
|
|
4
RELEASE
4
RELEASE
|
@ -1,4 +0,0 @@
|
|||
|
||||
Release Notes!
|
||||
|
||||
* 07/01/2020 - LKG FPGA build - Passed basic, demo, vecadd kernels.
|
23
TODO
23
TODO
|
@ -1,23 +0,0 @@
|
|||
|
||||
|
||||
|
||||
Functionality:
|
||||
1) vx_cl_warpSpawn()
|
||||
-> To be used by pocl->ops->run
|
||||
|
||||
2) newlib Integration (LoadFile(""))
|
||||
-> To be used by the Rhinio benchmarks
|
||||
|
||||
3) POCL OPS Vortex Suite
|
||||
|
||||
Performance:
|
||||
1) Icache doesn't need SEND_MEM_REQUEST Stage
|
||||
-> Blocks are never dirty, so why not evict right away
|
||||
|
||||
2) Branch not taken speculation
|
||||
|
||||
3) Runtime -02 not running on RTL, and -03 not running on RTL and Emulator
|
||||
|
||||
|
||||
Vector:
|
||||
1) Cycle accurate simulator (would require Cache Simulator)
|
358
ci/blackbox.sh
358
ci/blackbox.sh
|
@ -1,179 +1,205 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
SCRIPT_DIR=$(dirname "$0")
|
||||
ROOT_DIR=$SCRIPT_DIR/..
|
||||
|
||||
show_usage()
|
||||
{
|
||||
echo "Vortex BlackBox Test Driver v1.0"
|
||||
echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [--l3cache] [[--driver=rtlsim|vlsim|simx] [--debug] [--scope] [--perf] [--app=vecadd|sgemm|basic|demo|dogfood] [--args=<args>] [--help]]"
|
||||
echo "Usage: $0 [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [--l3cache] [[--driver=#name] [--app=#app] [--args=#args] [--debug=#level] [--scope] [--perf=#class] [--rebuild=#n] [--log=logfile] [--help]]"
|
||||
}
|
||||
|
||||
SCRIPT_DIR=$(dirname "$0")
|
||||
VORTEX_HOME=$SCRIPT_DIR/..
|
||||
show_help()
|
||||
{
|
||||
show_usage
|
||||
echo " where"
|
||||
echo "--driver: gpu, simx, rtlsim, oape, xrt"
|
||||
echo "--app: any subfolder test under regression or opencl"
|
||||
echo "--class: 0=disable, 1=pipeline, 2=memsys"
|
||||
echo "--rebuild: 0=disable, 1=force, 2=auto, 3=temp"
|
||||
}
|
||||
|
||||
DRIVER=vlsim
|
||||
APP=sgemm
|
||||
CLUSTERS=1
|
||||
CORES=2
|
||||
WARPS=4
|
||||
THREADS=4
|
||||
L2=0
|
||||
L3=0
|
||||
DEBUG=0
|
||||
SCOPE=0
|
||||
HAS_ARGS=0
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
case $i in
|
||||
--driver=*)
|
||||
DRIVER=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--app=*)
|
||||
APP=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--clusters=*)
|
||||
CLUSTERS=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--cores=*)
|
||||
CORES=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--warps=*)
|
||||
WARPS=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--threads=*)
|
||||
THREADS=${i#*=}
|
||||
shift
|
||||
;;
|
||||
--l2cache)
|
||||
L2=1
|
||||
shift
|
||||
;;
|
||||
--l3cache)
|
||||
L3=1
|
||||
shift
|
||||
;;
|
||||
--debug)
|
||||
DEBUG=1
|
||||
shift
|
||||
;;
|
||||
--scope)
|
||||
SCOPE=1
|
||||
CORES=1
|
||||
shift
|
||||
;;
|
||||
--perf)
|
||||
PERF_FLAG=-DPERF_ENABLE
|
||||
shift
|
||||
;;
|
||||
--args=*)
|
||||
ARGS=${i#*=}
|
||||
HAS_ARGS=1
|
||||
shift
|
||||
;;
|
||||
--help)
|
||||
show_usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
show_usage
|
||||
exit -1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
case $DRIVER in
|
||||
rtlsim)
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/rtlsim
|
||||
DRIVER_EXTRA=
|
||||
CLEAN_TOKEN=clean
|
||||
;;
|
||||
vlsim)
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/opae
|
||||
DRIVER_EXTRA=vlsim
|
||||
CLEAN_TOKEN=clean-vlsim
|
||||
;;
|
||||
asesim)
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/opae
|
||||
DRIVER_EXTRA=asesim
|
||||
CLEAN_TOKEN=clean-asesim
|
||||
;;
|
||||
fpga)
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/opae
|
||||
DRIVER_EXTRA=fpga
|
||||
CLEAN_TOKEN=clean-fpga
|
||||
;;
|
||||
simx)
|
||||
DRIVER_PATH=$VORTEX_HOME/driver/simx
|
||||
DRIVER_EXTRA=
|
||||
CLEAN_TOKEN=clean
|
||||
;;
|
||||
*)
|
||||
echo "invalid driver: $DRIVER"
|
||||
exit -1
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ -d "$VORTEX_HOME/tests/opencl/$APP" ];
|
||||
then
|
||||
APP_PATH=$VORTEX_HOME/tests/opencl/$APP
|
||||
elif [ -d "$VORTEX_HOME/tests/regression/$APP" ];
|
||||
then
|
||||
APP_PATH=$VORTEX_HOME/tests/regression/$APP
|
||||
else
|
||||
echo "Application folder found: $APP"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2 -DL3_ENABLE=$L3 $PERF_FLAG $CONFIGS"
|
||||
|
||||
echo "CONFIGS=$CONFIGS"
|
||||
|
||||
make -C $DRIVER_PATH $CLEAN_TOKEN
|
||||
|
||||
status=0
|
||||
|
||||
if [ $DEBUG -eq 1 ]
|
||||
then
|
||||
if [ $SCOPE -eq 1 ]
|
||||
then
|
||||
DEBUG=1 SCOPE=1 CONFIGS="$CONFIGS" make -s -C $DRIVER_PATH $DRIVER_EXTRA
|
||||
add_option() {
|
||||
if [ -n "$1" ]; then
|
||||
echo "$1 $2"
|
||||
else
|
||||
DEBUG=1 CONFIGS="$CONFIGS" make -s -C $DRIVER_PATH $DRIVER_EXTRA
|
||||
fi
|
||||
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
then
|
||||
OPTS=$ARGS make -C $APP_PATH run-$DRIVER > run.log 2>&1
|
||||
status=$?
|
||||
else
|
||||
make -C $APP_PATH run-$DRIVER > run.log 2>&1
|
||||
status=$?
|
||||
echo "$2"
|
||||
fi
|
||||
|
||||
if [ -f "$APP_PATH/trace.vcd" ]
|
||||
then
|
||||
}
|
||||
|
||||
DEFAULTS() {
|
||||
DRIVER=simx
|
||||
APP=sgemm
|
||||
DEBUG=0
|
||||
DEBUG_LEVEL=0
|
||||
SCOPE=0
|
||||
HAS_ARGS=0
|
||||
PERF_CLASS=0
|
||||
CONFIGS="$CONFIGS"
|
||||
REBUILD=2
|
||||
TEMPBUILD=0
|
||||
LOGFILE=run.log
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
DEFAULTS
|
||||
for i in "$@"; do
|
||||
case $i in
|
||||
--driver=*) DRIVER=${i#*=} ;;
|
||||
--app=*) APP=${i#*=} ;;
|
||||
--clusters=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CLUSTERS=${i#*=}") ;;
|
||||
--cores=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CORES=${i#*=}") ;;
|
||||
--warps=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_WARPS=${i#*=}") ;;
|
||||
--threads=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_THREADS=${i#*=}") ;;
|
||||
--l2cache) CONFIGS=$(add_option "$CONFIGS" "-DL2_ENABLE") ;;
|
||||
--l3cache) CONFIGS=$(add_option "$CONFIGS" "-DL3_ENABLE") ;;
|
||||
--perf=*) CONFIGS=$(add_option "$CONFIGS" "-DPERF_ENABLE"); PERF_CLASS=${i#*=} ;;
|
||||
--debug=*) DEBUG=1; DEBUG_LEVEL=${i#*=} ;;
|
||||
--scope) SCOPE=1; ;;
|
||||
--args=*) HAS_ARGS=1; ARGS=${i#*=} ;;
|
||||
--rebuild=*) REBUILD=${i#*=} ;;
|
||||
--log=*) LOGFILE=${i#*=} ;;
|
||||
--help) show_help; exit 0 ;;
|
||||
*) show_usage; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ $REBUILD -eq 3 ];
|
||||
then
|
||||
REBUILD=1
|
||||
TEMPBUILD=1
|
||||
fi
|
||||
}
|
||||
|
||||
set_driver_path() {
|
||||
case $DRIVER in
|
||||
gpu) DRIVER_PATH="" ;;
|
||||
simx|rtlsim|opae|xrt) DRIVER_PATH="$ROOT_DIR/runtime/$DRIVER" ;;
|
||||
*) echo "Invalid driver: $DRIVER"; exit 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
set_app_path() {
|
||||
if [ -d "$ROOT_DIR/tests/opencl/$APP" ]; then
|
||||
APP_PATH="$ROOT_DIR/tests/opencl/$APP"
|
||||
elif [ -d "$ROOT_DIR/tests/regression/$APP" ]; then
|
||||
APP_PATH="$ROOT_DIR/tests/regression/$APP"
|
||||
else
|
||||
echo "Application folder not found: $APP"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
build_driver() {
|
||||
local cmd_opts=""
|
||||
[ $DEBUG -ne 0 ] && cmd_opts=$(add_option "$cmd_opts" "DEBUG=$DEBUG_LEVEL")
|
||||
[ $SCOPE -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "SCOPE=1")
|
||||
[ $TEMPBUILD -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "DESTDIR=\"$TEMPDIR\"")
|
||||
[ -n "$CONFIGS" ] && cmd_opts=$(add_option "$cmd_opts" "CONFIGS=\"$CONFIGS\"")
|
||||
|
||||
if [ -n "$cmd_opts" ]; then
|
||||
echo "Running: $cmd_opts make -C $DRIVER_PATH > /dev/null"
|
||||
eval "$cmd_opts make -C $DRIVER_PATH > /dev/null"
|
||||
else
|
||||
echo "Running: make -C $DRIVER_PATH > /dev/null"
|
||||
make -C $DRIVER_PATH > /dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
run_app() {
|
||||
local cmd_opts=""
|
||||
[ $DEBUG -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "DEBUG=1")
|
||||
[ $TEMPBUILD -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "VORTEX_RT_PATH=\"$TEMPDIR\"")
|
||||
[ $HAS_ARGS -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "OPTS=\"$ARGS\"")
|
||||
|
||||
if [ $DEBUG -ne 0 ]; then
|
||||
if [ -n "$cmd_opts" ]; then
|
||||
echo "Running: $cmd_opts make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
eval "$cmd_opts make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
else
|
||||
echo "Running: make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
|
||||
make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
|
||||
fi
|
||||
else
|
||||
if [ -n "$cmd_opts" ]; then
|
||||
echo "Running: $cmd_opts make -C $APP_PATH run-$DRIVER"
|
||||
eval "$cmd_opts make -C $APP_PATH run-$DRIVER"
|
||||
else
|
||||
echo "Running: make -C $APP_PATH run-$DRIVER"
|
||||
make -C $APP_PATH run-$DRIVER
|
||||
fi
|
||||
fi
|
||||
status=$?
|
||||
return $status
|
||||
}
|
||||
|
||||
main() {
|
||||
parse_args "$@"
|
||||
set_driver_path
|
||||
set_app_path
|
||||
|
||||
# execute on default installed GPU
|
||||
if [ "$DRIVER" = "gpu" ]; then
|
||||
run_app
|
||||
exit $?
|
||||
fi
|
||||
|
||||
if [ -n "$CONFIGS" ]; then
|
||||
echo "CONFIGS=$CONFIGS"
|
||||
fi
|
||||
|
||||
if [ $REBUILD -ne 0 ]; then
|
||||
BLACKBOX_CACHE=blackbox.$DRIVER.cache
|
||||
LAST_CONFIGS=$(cat "$BLACKBOX_CACHE" 2>/dev/null || echo "")
|
||||
|
||||
if [ $REBUILD -eq 1 ] || [ "$CONFIGS+$DEBUG+$SCOPE" != "$LAST_CONFIGS" ]; then
|
||||
make -C $DRIVER_PATH clean-driver > /dev/null
|
||||
echo "$CONFIGS+$DEBUG+$SCOPE" > "$BLACKBOX_CACHE"
|
||||
fi
|
||||
fi
|
||||
|
||||
export VORTEX_PROFILING=$PERF_CLASS
|
||||
|
||||
make -C "$ROOT_DIR/hw" config > /dev/null
|
||||
make -C "$ROOT_DIR/runtime/stub" > /dev/null
|
||||
|
||||
if [ $TEMPBUILD -eq 1 ]; then
|
||||
# setup temp directory
|
||||
TEMPDIR=$(mktemp -d)
|
||||
mkdir -p "$TEMPDIR"
|
||||
# build stub driver
|
||||
echo "running: DESTDIR=$TEMPDIR make -C $ROOT_DIR/runtime/stub"
|
||||
DESTDIR="$TEMPDIR" make -C $ROOT_DIR/runtime/stub > /dev/null
|
||||
# register tempdir cleanup on exit
|
||||
trap "rm -rf $TEMPDIR" EXIT
|
||||
fi
|
||||
|
||||
build_driver
|
||||
run_app
|
||||
status=$?
|
||||
|
||||
if [ $DEBUG -eq 1 ] && [ -f "$APP_PATH/trace.vcd" ]; then
|
||||
mv -f $APP_PATH/trace.vcd .
|
||||
fi
|
||||
else
|
||||
if [ $SCOPE -eq 1 ]
|
||||
then
|
||||
SCOPE=1 CONFIGS="$CONFIGS" make -s -C $DRIVER_PATH $DRIVER_EXTRA
|
||||
else
|
||||
CONFIGS="$CONFIGS" make -s -C $DRIVER_PATH $DRIVER_EXTRA
|
||||
fi
|
||||
|
||||
if [ $HAS_ARGS -eq 1 ]
|
||||
then
|
||||
OPTS=$ARGS make -C $APP_PATH run-$DRIVER
|
||||
status=$?
|
||||
else
|
||||
make -C $APP_PATH run-$DRIVER
|
||||
status=$?
|
||||
fi
|
||||
fi
|
||||
|
||||
exit $status
|
||||
if [ $SCOPE -eq 1 ] && [ -f "$APP_PATH/scope.vcd" ]; then
|
||||
mv -f $APP_PATH/scope.vcd .
|
||||
fi
|
||||
|
||||
exit $status
|
||||
}
|
||||
|
||||
main "$@"
|
41
ci/datagen.py
Executable file
41
ci/datagen.py
Executable file
|
@ -0,0 +1,41 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import struct
|
||||
import random
|
||||
import sys
|
||||
|
||||
def create_binary_file(n, filename):
|
||||
# Open the file in binary write mode
|
||||
with open(filename, 'wb') as f:
|
||||
# Write the integer N as 4 bytes
|
||||
f.write(struct.pack('i', n))
|
||||
# Generate and write N floating-point numbers
|
||||
for _ in range(n):
|
||||
# Generate a random float between 0 and 1
|
||||
num = random.random()
|
||||
# Write the float in IEEE 754 format (4 bytes)
|
||||
f.write(struct.pack('f', num))
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: script.py N filename")
|
||||
sys.exit(1)
|
||||
|
||||
n = int(sys.argv[1])
|
||||
filename = sys.argv[2]
|
||||
|
||||
create_binary_file(n, filename)
|
||||
print(f"Created binary file '{filename}' containing {n} floats.")
|
46
ci/install_dependencies.sh
Executable file
46
ci/install_dependencies.sh
Executable file
|
@ -0,0 +1,46 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Copyright 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
set -e
|
||||
|
||||
# Function to check if GCC version is less than 11
|
||||
check_gcc_version() {
|
||||
local gcc_version
|
||||
gcc_version=$(gcc -dumpversion)
|
||||
if dpkg --compare-versions "$gcc_version" lt 11; then
|
||||
return 0 # GCC version is less than 11
|
||||
else
|
||||
return 1 # GCC version is 11 or greater
|
||||
fi
|
||||
}
|
||||
|
||||
# Update package list
|
||||
apt-get update -y
|
||||
|
||||
# install system dependencies
|
||||
apt-get install -y build-essential valgrind libstdc++6 binutils python3 uuid-dev ccache cmake libffi7
|
||||
|
||||
# Check and install GCC 11 if necessary
|
||||
if check_gcc_version; then
|
||||
echo "GCC version is less than 11. Installing GCC 11..."
|
||||
add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
apt-get update
|
||||
apt-get install -y g++-11 gcc-11
|
||||
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100
|
||||
else
|
||||
echo "GCC version is 11 or greater. No need to install GCC 11."
|
||||
fi
|
|
@ -1,69 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
OS_DIR=ubuntu/bionic
|
||||
SRCDIR=/opt
|
||||
DESTDIR=.
|
||||
|
||||
riscv()
|
||||
{
|
||||
echo "prebuilt riscv-gnu-toolchain..."
|
||||
tar -C $SRCDIR -cvjf riscv-gnu-toolchain.tar.bz2 riscv-gnu-toolchain
|
||||
split -b 50M riscv-gnu-toolchain.tar.bz2 "riscv-gnu-toolchain.tar.bz2.part"
|
||||
mv riscv-gnu-toolchain.tar.bz2.part* $DESTDIR/riscv-gnu-toolchain/$OS_DIR
|
||||
rm riscv-gnu-toolchain.tar.bz2
|
||||
}
|
||||
|
||||
llvm()
|
||||
{
|
||||
echo "prebuilt llvm-riscv..."
|
||||
tar -C $SRCDIR -cvjf llvm-riscv.tar.bz2 llvm-riscv
|
||||
split -b 50M llvm-riscv.tar.bz2 "llvm-riscv.tar.bz2.part"
|
||||
mv llvm-riscv.tar.bz2.part* $DESTDIR/llvm-riscv/$OS_DIR
|
||||
rm llvm-riscv.tar.bz2
|
||||
}
|
||||
|
||||
pocl()
|
||||
{
|
||||
echo "prebuilt pocl..."
|
||||
tar -C $SRCDIR -cvjf pocl.tar.bz2 pocl
|
||||
mv pocl.tar.bz2 $DESTDIR/pocl/$OS_DIR
|
||||
}
|
||||
|
||||
verilator()
|
||||
{
|
||||
echo "prebuilt verilator..."
|
||||
tar -C $SRCDIR -cvjf verilator.tar.bz2 verilator
|
||||
mv verilator.tar.bz2 $DESTDIR/verilator/$OS_DIR
|
||||
}
|
||||
|
||||
usage()
|
||||
{
|
||||
echo "usage: prebuilt [[-riscv] [-llvm] [-pocl] [-verilator] [-all] [-h|--help]]"
|
||||
}
|
||||
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
-pocl ) pocl
|
||||
;;
|
||||
-verilator ) verilator
|
||||
;;
|
||||
-riscv ) riscv
|
||||
;;
|
||||
-llvm ) llvm
|
||||
;;
|
||||
-all ) riscv
|
||||
llvm
|
||||
pocl
|
||||
verilator
|
||||
;;
|
||||
-h | --help ) usage
|
||||
exit
|
||||
;;
|
||||
* ) usage
|
||||
exit 1
|
||||
esac
|
||||
shift
|
||||
done
|
127
ci/regression.sh
127
ci/regression.sh
|
@ -1,127 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
# build sources
|
||||
make -s
|
||||
|
||||
coverage()
|
||||
{
|
||||
# coverage tests
|
||||
make -C tests/runtime run-rtlsim
|
||||
make -C tests/riscv/isa run-rtlsim
|
||||
make -C tests/regression run-vlsim
|
||||
make -C tests/opencl run-vlsim
|
||||
make -C tests/runtime run-simx
|
||||
make -C tests/riscv/isa run-simx
|
||||
make -C tests/regression run-simx
|
||||
make -C tests/opencl run-simx
|
||||
}
|
||||
|
||||
cluster()
|
||||
{
|
||||
# warp/threads configurations
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=8 --app=demo
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=8 --threads=2 --app=demo
|
||||
|
||||
# cores clustering
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=1 --clusters=1 --app=demo --args="-n1"
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=demo --args="-n1"
|
||||
|
||||
# L2/L3
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=demo --args="-n1"
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=demo --args="-n1"
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
|
||||
}
|
||||
|
||||
debug()
|
||||
{
|
||||
# debugging
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=basic --args="-t0 -n1"
|
||||
}
|
||||
|
||||
config()
|
||||
{
|
||||
# disabling M extension
|
||||
CONFIGS=-DEXT_M_DISABLE make -C hw/simulate
|
||||
|
||||
# disabling F extension
|
||||
CONFIGS=-DEXT_F_DISABLE make -C hw/simulate
|
||||
|
||||
# disable shared memory
|
||||
CONFIGS=-DSM_ENABLE=0 make -C hw/simulate
|
||||
|
||||
# using Default FPU core
|
||||
FPU_CORE=FPU_DEFAULT ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
|
||||
|
||||
# using FPNEW FPU core
|
||||
FPU_CORE=FPU_FPNEW ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
|
||||
|
||||
# test cache banking
|
||||
CONFIGS="-DDNUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
|
||||
CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
|
||||
|
||||
# test cache multi-porting
|
||||
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
|
||||
CONFIGS="-DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
|
||||
|
||||
# test 128-bit MEM block
|
||||
CONFIGS=-DMEM_BLOCK_SIZE=16 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
|
||||
|
||||
# test 128-bit MEM and DRAM block
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16 -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=128 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=28 -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
|
||||
|
||||
# test 27-bit DRAM address
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
|
||||
|
||||
# test 128-bit DRAM block
|
||||
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=128 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=28 -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
|
||||
|
||||
# test verilator reset values
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=sgemm
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=sgemm
|
||||
|
||||
# test long memory latency
|
||||
CONFIGS="-DMEM_LATENCY=100 -DMEM_RQ_SIZE=4 -DMEM_STALLS_MODULO=4" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
|
||||
}
|
||||
|
||||
stress()
|
||||
{
|
||||
# test pipeline stress
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --clusters=2 --l3cache --app=sgemm --args="-n256"
|
||||
}
|
||||
|
||||
usage()
|
||||
{
|
||||
echo "usage: regression [-coverage] [-cluster] [-debug] [-config] [-stress] [-all] [-h|--help]"
|
||||
}
|
||||
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
-coverage ) coverage
|
||||
;;
|
||||
-cluster ) cluster
|
||||
;;
|
||||
-debug ) debug
|
||||
;;
|
||||
-config ) config
|
||||
;;
|
||||
-stress ) stress
|
||||
;;
|
||||
-all ) coverage
|
||||
cluster
|
||||
debug
|
||||
config
|
||||
stress
|
||||
;;
|
||||
-h | --help ) usage
|
||||
exit
|
||||
;;
|
||||
* ) usage
|
||||
exit 1
|
||||
esac
|
||||
shift
|
||||
done
|
505
ci/regression.sh.in
Executable file
505
ci/regression.sh.in
Executable file
|
@ -0,0 +1,505 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
# clear blackbox cache
|
||||
rm -f blackbox.*.cache
|
||||
|
||||
# HW: add a test "VM Test" to make sure VM feature is enabled
|
||||
|
||||
XLEN=${XLEN:=@XLEN@}
|
||||
|
||||
XSIZE=$((XLEN / 8))
|
||||
|
||||
echo "Vortex Regression Test: XLEN=$XLEN"
|
||||
|
||||
unittest()
|
||||
{
|
||||
make -C tests/unittest run
|
||||
make -C hw/unittest > /dev/null
|
||||
}
|
||||
|
||||
isa()
|
||||
{
|
||||
echo "begin isa tests..."
|
||||
|
||||
make -C sim/simx
|
||||
make -C sim/rtlsim
|
||||
|
||||
make -C tests/riscv/isa run-simx
|
||||
make -C tests/riscv/isa run-rtlsim
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
if [ "$XLEN" == "64" ]
|
||||
then
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64fx
|
||||
fi
|
||||
|
||||
# clean build
|
||||
make -C sim/rtlsim clean
|
||||
|
||||
echo "isa tests done!"
|
||||
}
|
||||
|
||||
kernel()
|
||||
{
|
||||
echo "begin kernel tests..."
|
||||
|
||||
make -C sim/simx
|
||||
make -C sim/rtlsim
|
||||
|
||||
make -C tests/kernel run-simx
|
||||
make -C tests/kernel run-rtlsim
|
||||
|
||||
echo "kernel tests done!"
|
||||
}
|
||||
|
||||
regression()
|
||||
{
|
||||
echo "begin regression tests..."
|
||||
|
||||
make -C runtime/simx
|
||||
make -C runtime/rtlsim
|
||||
|
||||
make -C tests/regression run-simx
|
||||
make -C tests/regression run-rtlsim
|
||||
|
||||
# test global barrier
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
|
||||
# test local barrier
|
||||
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
|
||||
./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tbar"
|
||||
./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tbar"
|
||||
|
||||
# test temp driver mode for
|
||||
./ci/blackbox.sh --driver=simx --app=vecadd --rebuild=3
|
||||
|
||||
# test for matmul
|
||||
CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1"
|
||||
|
||||
echo "regression tests done!"
|
||||
}
|
||||
|
||||
opencl()
|
||||
{
|
||||
echo "begin opencl tests..."
|
||||
|
||||
make -C runtime/simx
|
||||
make -C runtime/rtlsim
|
||||
|
||||
make -C tests/opencl run-simx
|
||||
make -C tests/opencl run-rtlsim
|
||||
|
||||
./ci/blackbox.sh --driver=simx --app=lbm --warps=8
|
||||
./ci/blackbox.sh --driver=rtlsim --app=lbm --warps=8
|
||||
|
||||
echo "opencl tests done!"
|
||||
}
|
||||
|
||||
vm(){
|
||||
echo "begin vm tests..."
|
||||
|
||||
make -C sim/simx clean && CONFIGS="-DVM_ENABLE" make -C sim/simx
|
||||
make -C runtime/simx clean && CONFIGS="-DVM_ENABLE" make -C runtime/simx
|
||||
make -C tests/opencl run-simx
|
||||
make -C tests/regression run-simx
|
||||
|
||||
make -C sim/simx clean && CONFIGS="-DVM_ENABLE -DVM_ADDR_MODE=BARE" make -C sim/simx
|
||||
make -C runtime/simx clean && CONFIGS="-DVM_ENABLE -DVM_ADDR_MODE=BARE" make -C runtime/simx
|
||||
make -C tests/opencl run-simx
|
||||
make -C tests/regression run-simx
|
||||
|
||||
echo "vm tests done!"
|
||||
}
|
||||
|
||||
cache()
|
||||
{
|
||||
echo "begin cache tests..."
|
||||
|
||||
# disable local memory
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo --perf=1
|
||||
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=demo --perf=1
|
||||
|
||||
# disable L1 cache
|
||||
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
|
||||
# reduce l1 line size
|
||||
CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DDISABLE_L1" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=io_addr
|
||||
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache ways
|
||||
CONFIGS="-DICACHE_NUM_WAYS=1 -DDCACHE_NUM_WAYS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache banking
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --threads=8
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=4" ./ci/blackbox.sh --driver=simx --app=sgemmx --threads=8
|
||||
|
||||
# replacement policy
|
||||
CONFIGS="-DDCACHE_REPL_POLICY=0" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_REPL_POLICY=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DDCACHE_REPL_POLICY=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
|
||||
# test writeback
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=0 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
|
||||
# cache clustering
|
||||
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=4 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=4 --warps=1 --threads=2
|
||||
|
||||
# L2/L3
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=4 --l2cache --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
|
||||
|
||||
echo "begin cache tests..."
|
||||
}
|
||||
|
||||
config1()
|
||||
{
|
||||
echo "begin configuration-1 tests..."
|
||||
|
||||
# warp/threads
|
||||
./ci/blackbox.sh --driver=rtlsim --warps=1 --threads=1 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=2 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=8 --app=diverge
|
||||
./ci/blackbox.sh --driver=rtlsim --warps=8 --threads=2 --app=diverge
|
||||
./ci/blackbox.sh --driver=simx --warps=1 --threads=1 --app=diverge
|
||||
./ci/blackbox.sh --driver=simx --warps=8 --threads=16 --app=diverge
|
||||
|
||||
# cores clustering
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=4 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=4 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
|
||||
|
||||
# issue width
|
||||
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
||||
CONFIGS="-DISSUE_WIDTH=4" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
||||
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=simx --app=diverge
|
||||
CONFIGS="-DISSUE_WIDTH=4" ./ci/blackbox.sh --driver=simx --app=diverge
|
||||
|
||||
# ALU scaling
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_ALU_BLOCK=1 -DNUM_ALU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_ALU_BLOCK=4 -DNUM_ALU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=diverge
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_ALU_BLOCK=1 -DNUM_ALU_LANES=2" ./ci/blackbox.sh --driver=simx --app=diverge
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_ALU_BLOCK=4 -DNUM_ALU_LANES=4" ./ci/blackbox.sh --driver=simx --app=diverge
|
||||
|
||||
# FPU scaling
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_FPU_BLOCK=1 -DNUM_FPU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_FPU_BLOCK=4 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_FPU_BLOCK=1 -DNUM_FPU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_FPU_BLOCK=4 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
|
||||
# FPU's PE scaling
|
||||
CONFIGS="-DFMA_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfmadd"
|
||||
CONFIGS="-DFCVT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tftoi"
|
||||
CONFIGS="-DFDIV_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfdiv"
|
||||
CONFIGS="-DFSQRT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfsqrt"
|
||||
CONFIGS="-DFNCP_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfclamp"
|
||||
|
||||
# LSU scaling
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
|
||||
|
||||
echo "configuration-1 tests done!"
|
||||
}
|
||||
|
||||
config2()
|
||||
{
|
||||
echo "begin configuration-2 tests..."
|
||||
|
||||
# test opaesim
|
||||
./ci/blackbox.sh --driver=opae --app=printf
|
||||
./ci/blackbox.sh --driver=opae --app=diverge
|
||||
./ci/blackbox.sh --driver=xrt --app=diverge
|
||||
|
||||
# disable DPI
|
||||
if [ "$XLEN" == "64" ]; then
|
||||
# need to disable trig on 64-bit due to a bug inside fpnew's sqrt core.
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-xtrig -xbar -xgbar"
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-xtrig -xbar -xgbar"
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-xtrig -xbar -xgbar"
|
||||
else
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood
|
||||
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood
|
||||
fi
|
||||
|
||||
# custom program startup address
|
||||
make -C tests/regression/dogfood clean-kernel
|
||||
STARTUP_ADDR=0x80000000 make -C tests/regression/dogfood
|
||||
./ci/blackbox.sh --driver=simx --app=dogfood
|
||||
./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
make -C tests/regression/dogfood clean-kernel
|
||||
|
||||
# disabling M & F extensions
|
||||
make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32i
|
||||
make -C sim/rtlsim clean
|
||||
|
||||
# disabling ZICOND extension
|
||||
CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo
|
||||
|
||||
# test 128-bit memory block
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
|
||||
# test XLEN-bit memory block
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||
|
||||
# test memory coalescing
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
|
||||
|
||||
# test single-bank memory
|
||||
if [ "$XLEN" == "64" ]; then
|
||||
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
else
|
||||
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
fi
|
||||
|
||||
# test larger memory address
|
||||
if [ "$XLEN" == "64" ]; then
|
||||
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=49" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=49" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
else
|
||||
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=xrt --app=mstress
|
||||
fi
|
||||
|
||||
# test memory banks interleaving
|
||||
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=1" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress
|
||||
|
||||
# test memory ports
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=opae --app=mstress --threads=8
|
||||
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=xrt --app=mstress --threads=8
|
||||
|
||||
echo "configuration-2 tests done!"
|
||||
}
|
||||
|
||||
test_csv_trace()
|
||||
{
|
||||
# test CSV trace generation
|
||||
make -C sim/simx clean && DEBUG=3 make -C sim/simx > /dev/null
|
||||
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-simx-32im > run_simx.log
|
||||
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
|
||||
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
|
||||
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
||||
diff trace_rtlsim.csv trace_simx.csv
|
||||
# clean build
|
||||
make -C sim/simx clean
|
||||
make -C sim/rtlsim clean
|
||||
}
|
||||
|
||||
debug()
|
||||
{
|
||||
echo "begin debugging tests..."
|
||||
|
||||
test_csv_trace
|
||||
|
||||
CONFIGS="-O0" ./ci/blackbox.sh --driver=opae --app=demo --args="-n1"
|
||||
CONFIGS="-O0" ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=xrt --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
|
||||
|
||||
echo "debugging tests done!"
|
||||
}
|
||||
|
||||
scope()
|
||||
{
|
||||
echo "begin scope tests..."
|
||||
|
||||
SCOPE_DEPTH=128 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope
|
||||
SCOPE_DEPTH=128 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope
|
||||
|
||||
echo "debugging scope done!"
|
||||
}
|
||||
|
||||
stress()
|
||||
{
|
||||
echo "begin stress tests..."
|
||||
|
||||
# test verilator reset values
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1 -DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
|
||||
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=xrt --app=sgemmx --args="-n128" --l2cache
|
||||
|
||||
echo "stress tests done!"
|
||||
}
|
||||
|
||||
synthesis()
|
||||
{
|
||||
echo "begin synthesis tests..."
|
||||
|
||||
PREFIX=build_base make -C hw/syn/yosys clean
|
||||
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE -DNUM_WARPS=2 -DNUM_THREADS=2" make -C hw/syn/yosys synthesis
|
||||
|
||||
echo "synthesis tests done!"
|
||||
}
|
||||
|
||||
vector()
|
||||
{
|
||||
echo "begin vector tests..."
|
||||
|
||||
make -C sim/simx clean && CONFIGS="-DEXT_V_ENABLE" make -C sim/simx
|
||||
TOOLDIR=@TOOLDIR@ XLEN=@XLEN@ VLEN=256 REG_TESTS=1 ./tests/riscv/riscv-vector-tests/run-test.sh
|
||||
|
||||
echo "vector tests done!"
|
||||
}
|
||||
|
||||
show_usage()
|
||||
{
|
||||
echo "Vortex Regression Test"
|
||||
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--vector] [--all] [--h|--help]"
|
||||
}
|
||||
|
||||
declare -a tests=()
|
||||
clean=0
|
||||
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
--clean )
|
||||
clean=1
|
||||
;;
|
||||
--unittest )
|
||||
tests+=("unittest")
|
||||
;;
|
||||
--isa )
|
||||
tests+=("isa")
|
||||
;;
|
||||
--kernel )
|
||||
tests+=("kernel")
|
||||
;;
|
||||
--regression )
|
||||
tests+=("regression")
|
||||
;;
|
||||
--opencl )
|
||||
tests+=("opencl")
|
||||
;;
|
||||
--cache )
|
||||
tests+=("cache")
|
||||
;;
|
||||
--vm )
|
||||
tests+=("vm")
|
||||
;;
|
||||
--config1 )
|
||||
tests+=("config1")
|
||||
;;
|
||||
--config2 )
|
||||
tests+=("config2")
|
||||
;;
|
||||
--debug )
|
||||
tests+=("debug")
|
||||
;;
|
||||
--scope )
|
||||
tests+=("scope")
|
||||
;;
|
||||
--stress )
|
||||
tests+=("stress")
|
||||
;;
|
||||
--synthesis )
|
||||
tests+=("synthesis")
|
||||
;;
|
||||
--vector )
|
||||
tests+=("vector")
|
||||
;;
|
||||
--all )
|
||||
tests=()
|
||||
tests+=("unittest")
|
||||
tests+=("isa")
|
||||
tests+=("kernel")
|
||||
tests+=("regression")
|
||||
tests+=("opencl")
|
||||
tests+=("cache")
|
||||
tests+=("vm")
|
||||
tests+=("config1")
|
||||
tests+=("config2")
|
||||
tests+=("debug")
|
||||
tests+=("scope")
|
||||
tests+=("stress")
|
||||
tests+=("synthesis")
|
||||
tests+=("vector")
|
||||
;;
|
||||
-h | --help )
|
||||
show_usage
|
||||
exit
|
||||
;;
|
||||
* )
|
||||
show_usage
|
||||
exit 1
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [ $clean -eq 1 ];
|
||||
then
|
||||
make clean
|
||||
make -s
|
||||
fi
|
||||
|
||||
start=$SECONDS
|
||||
|
||||
for test in "${tests[@]}"; do
|
||||
$test
|
||||
done
|
||||
|
||||
echo "Regression completed!"
|
||||
|
||||
duration=$(( SECONDS - start ))
|
||||
awk -v t=$duration 'BEGIN{t=int(t*1000); printf "Elapsed Time: %d:%02d:%02d\n", t/3600000, t/60000%60, t/1000%60}'
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
# clear POCL cache
|
||||
rm -rf ~/.cache/pocl
|
||||
|
||||
# rebuild runtime
|
||||
make -C runtime clean
|
||||
make -C runtime
|
||||
|
||||
# rebuild drivers
|
||||
make -C driver clean
|
||||
make -C driver
|
||||
|
||||
# rebuild runtime tests
|
||||
make -C tests/runtime clean
|
||||
make -C tests/runtime
|
||||
|
||||
# rebuild regression tests
|
||||
make -C tests/regression clean-all
|
||||
make -C tests/regression
|
||||
|
||||
# rebuild opencl tests
|
||||
make -C tests/opencl clean-all
|
||||
make -C tests/opencl
|
24
ci/toolchain_env.sh.in
Executable file
24
ci/toolchain_env.sh.in
Executable file
|
@ -0,0 +1,24 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Copyright 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
TOOLDIR=${TOOLDIR:=@TOOLDIR@}
|
||||
export PATH=$TOOLDIR/verilator/bin:$PATH
|
||||
|
||||
export SV2V_PATH=$TOOLDIR/sv2v
|
||||
export PATH=$SV2V_PATH/bin:$PATH
|
||||
|
||||
export YOSYS_PATH=$TOOLDIR/yosys
|
||||
export PATH=$YOSYS_PATH/bin:$PATH
|
|
@ -1,81 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
REPOSITORY=https://github.com/vortexgpgpu/vortex-toolchain-prebuilt/raw/master
|
||||
|
||||
DESTDIR="${DESTDIR:=/opt}"
|
||||
|
||||
riscv()
|
||||
{
|
||||
for x in {a..o}
|
||||
do
|
||||
wget $REPOSITORY/riscv-gnu-toolchain/ubuntu/bionic/riscv-gnu-toolchain.tar.bz2.parta$x
|
||||
done
|
||||
cat riscv-gnu-toolchain.tar.bz2.parta* > riscv-gnu-toolchain.tar.bz2
|
||||
tar -xvf riscv-gnu-toolchain.tar.bz2
|
||||
rm -f riscv-gnu-toolchain.tar.bz2*
|
||||
cp -r riscv-gnu-toolchain $DESTDIR
|
||||
rm -rf riscv-gnu-toolchain
|
||||
}
|
||||
|
||||
llvm()
|
||||
{
|
||||
for x in {a..b}
|
||||
do
|
||||
wget $REPOSITORY/llvm-riscv/ubuntu/bionic/llvm-riscv.tar.bz2.parta$x
|
||||
done
|
||||
cat llvm-riscv.tar.bz2.parta* > llvm-riscv.tar.bz2
|
||||
tar -xvf llvm-riscv.tar.bz2
|
||||
rm -f llvm-riscv.tar.bz2*
|
||||
cp -r llvm-riscv $DESTDIR
|
||||
rm -rf llvm-riscv
|
||||
}
|
||||
|
||||
pocl()
|
||||
{
|
||||
wget $REPOSITORY/pocl/ubuntu/bionic/pocl.tar.bz2
|
||||
tar -xvf pocl.tar.bz2
|
||||
rm -f pocl.tar.bz2
|
||||
cp -r pocl $DESTDIR
|
||||
rm -rf pocl
|
||||
}
|
||||
|
||||
verilator()
|
||||
{
|
||||
wget $REPOSITORY/verilator/ubuntu/bionic/verilator.tar.bz2
|
||||
tar -xvf verilator.tar.bz2
|
||||
rm -f verilator.tar.bz2
|
||||
cp -r verilator $DESTDIR
|
||||
rm -rf verilator
|
||||
}
|
||||
|
||||
usage()
|
||||
{
|
||||
echo "usage: toolchain_install [[-riscv] [-llvm] [-pocl] [-verilator] [-all] [-h|--help]]"
|
||||
}
|
||||
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
-pocl ) pocl
|
||||
;;
|
||||
-verilator ) verilator
|
||||
;;
|
||||
-riscv ) riscv
|
||||
;;
|
||||
-llvm ) llvm
|
||||
;;
|
||||
-all ) riscv
|
||||
llvm
|
||||
pocl
|
||||
verilator
|
||||
;;
|
||||
-h | --help ) usage
|
||||
exit
|
||||
;;
|
||||
* ) usage
|
||||
exit 1
|
||||
esac
|
||||
shift
|
||||
done
|
199
ci/toolchain_install.sh.in
Executable file
199
ci/toolchain_install.sh.in
Executable file
|
@ -0,0 +1,199 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
REPOSITORY=https://github.com/vortexgpgpu/vortex-toolchain-prebuilt/raw/master
|
||||
TOOLDIR=${TOOLDIR:=@TOOLDIR@}
|
||||
OSVERSION=${OSVERSION:=@OSVERSION@}
|
||||
|
||||
riscv32()
|
||||
{
|
||||
case $OSVERSION in
|
||||
"centos/7") parts=$(eval echo {a..l}) ;;
|
||||
"ubuntu/bionic") parts=$(eval echo {a..j}) ;;
|
||||
*) parts=$(eval echo {a..k}) ;;
|
||||
esac
|
||||
rm -f riscv32-gnu-toolchain.tar.bz2.parta*
|
||||
for x in $parts
|
||||
do
|
||||
wget $REPOSITORY/riscv32-gnu-toolchain/$OSVERSION/riscv32-gnu-toolchain.tar.bz2.parta$x
|
||||
done
|
||||
cat riscv32-gnu-toolchain.tar.bz2.parta* > riscv32-gnu-toolchain.tar.bz2
|
||||
tar -xvf riscv32-gnu-toolchain.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/riscv32-gnu-toolchain && mv riscv32-gnu-toolchain $TOOLDIR
|
||||
rm -rf riscv32-gnu-toolchain.tar.bz2*
|
||||
}
|
||||
|
||||
riscv64()
|
||||
{
|
||||
case $OSVERSION in
|
||||
"centos/7") parts=$(eval echo {a..l}) ;;
|
||||
*) parts=$(eval echo {a..j}) ;;
|
||||
esac
|
||||
rm -f riscv64-gnu-toolchain.tar.bz2.parta*
|
||||
for x in $parts
|
||||
do
|
||||
wget $REPOSITORY/riscv64-gnu-toolchain/$OSVERSION/riscv64-gnu-toolchain.tar.bz2.parta$x
|
||||
done
|
||||
cat riscv64-gnu-toolchain.tar.bz2.parta* > riscv64-gnu-toolchain.tar.bz2
|
||||
tar -xvf riscv64-gnu-toolchain.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/riscv64-gnu-toolchain && mv riscv64-gnu-toolchain $TOOLDIR
|
||||
rm -rf riscv64-gnu-toolchain riscv64-gnu-toolchain.tar.bz2*
|
||||
}
|
||||
|
||||
llvm()
|
||||
{
|
||||
case $OSVERSION in
|
||||
"centos/7") parts=$(eval echo {a..b}) ;;
|
||||
*) parts=$(eval echo {a..b}) ;;
|
||||
esac
|
||||
echo $parts
|
||||
rm -f llvm-vortex2.tar.bz2.parta*
|
||||
for x in $parts
|
||||
do
|
||||
wget $REPOSITORY/llvm-vortex/$OSVERSION/llvm-vortex2.tar.bz2.parta$x
|
||||
done
|
||||
cat llvm-vortex2.tar.bz2.parta* > llvm-vortex2.tar.bz2
|
||||
tar -xvf llvm-vortex2.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/llvm-vortex && mv llvm-vortex $TOOLDIR
|
||||
rm -rf llvm-vortex llvm-vortex2.tar.bz2*
|
||||
}
|
||||
|
||||
libcrt32()
|
||||
{
|
||||
wget $REPOSITORY/libcrt32/libcrt32.tar.bz2
|
||||
tar -xvf libcrt32.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/libcrt32 && mv libcrt32 $TOOLDIR
|
||||
rm -rf libcrt32 libcrt32.tar.bz2
|
||||
}
|
||||
|
||||
libcrt64()
|
||||
{
|
||||
wget $REPOSITORY/libcrt64/libcrt64.tar.bz2
|
||||
tar -xvf libcrt64.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/libcrt64 && mv libcrt64 $TOOLDIR
|
||||
rm -rf libcrt64 libcrt64.tar.bz2
|
||||
}
|
||||
|
||||
libc32()
|
||||
{
|
||||
wget $REPOSITORY/libc32/libc32.tar.bz2
|
||||
tar -xvf libc32.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/libc32 && mv libc32 $TOOLDIR
|
||||
rm -rf libc32 libc32.tar.bz2
|
||||
}
|
||||
|
||||
libc64()
|
||||
{
|
||||
wget $REPOSITORY/libc64/libc64.tar.bz2
|
||||
tar -xvf libc64.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/libc64 && mv libc64 $TOOLDIR
|
||||
rm -rf libc64 libc64.tar.bz2
|
||||
}
|
||||
|
||||
pocl()
|
||||
{
|
||||
wget $REPOSITORY/pocl/$OSVERSION/pocl2.tar.bz2
|
||||
tar -xvf pocl2.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/pocl && mv pocl $TOOLDIR
|
||||
rm -rf pocl2 pocl2.tar.bz2
|
||||
}
|
||||
|
||||
verilator()
|
||||
{
|
||||
wget $REPOSITORY/verilator/$OSVERSION/verilator.tar.bz2
|
||||
tar -xvf verilator.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/verilator && mv verilator $TOOLDIR
|
||||
rm -rf verilator verilator.tar.bz2
|
||||
}
|
||||
|
||||
sv2v()
|
||||
{
|
||||
wget $REPOSITORY/sv2v/$OSVERSION/sv2v.tar.bz2
|
||||
tar -xvf sv2v.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/sv2v && mv sv2v $TOOLDIR
|
||||
rm -rf sv2v sv2v.tar.bz2
|
||||
}
|
||||
|
||||
yosys()
|
||||
{
|
||||
case $OSVERSION in
|
||||
"centos/7") parts=$(eval echo {a..c}) ;;
|
||||
*) parts=$(eval echo {a..c}) ;;
|
||||
esac
|
||||
echo $parts
|
||||
rm -f yosys.tar.bz2.parta*
|
||||
for x in $parts
|
||||
do
|
||||
wget $REPOSITORY/yosys/$OSVERSION/yosys.tar.bz2.parta$x
|
||||
done
|
||||
cat yosys.tar.bz2.parta* > yosys.tar.bz2
|
||||
tar -xvf yosys.tar.bz2
|
||||
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/yosys && mv yosys $TOOLDIR
|
||||
rm -rf yosys yosys.tar.bz2* yosys
|
||||
}
|
||||
|
||||
show_usage()
|
||||
{
|
||||
echo "Install Pre-built Vortex Toolchain"
|
||||
echo "Usage: $0 [--pocl] [--verilator] [--riscv32] [--riscv64] [--llvm] [--libcrt32] [--libcrt64] [--libc32] [--libc64] [--sv2v] [--yosys] [--all] [-h|--help]"
|
||||
}
|
||||
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
--pocl ) pocl
|
||||
;;
|
||||
--verilator ) verilator
|
||||
;;
|
||||
--riscv32 ) riscv32
|
||||
;;
|
||||
--riscv64 ) riscv64
|
||||
;;
|
||||
--llvm ) llvm
|
||||
;;
|
||||
--libcrt32 ) libcrt32
|
||||
;;
|
||||
--libcrt64 ) libcrt64
|
||||
;;
|
||||
--libc32 ) libc32
|
||||
;;
|
||||
--libc64 ) libc64
|
||||
;;
|
||||
--sv2v ) sv2v
|
||||
;;
|
||||
--yosys ) yosys
|
||||
;;
|
||||
--all ) pocl
|
||||
verilator
|
||||
llvm
|
||||
libcrt32
|
||||
libcrt64
|
||||
libc32
|
||||
libc64
|
||||
riscv32
|
||||
riscv64
|
||||
sv2v
|
||||
yosys
|
||||
;;
|
||||
-h | --help ) show_usage
|
||||
exit
|
||||
;;
|
||||
* ) show_usage
|
||||
exit 1
|
||||
esac
|
||||
shift
|
||||
done
|
167
ci/toolchain_prebuilt.sh.in
Executable file
167
ci/toolchain_prebuilt.sh.in
Executable file
|
@ -0,0 +1,167 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
TOOLDIR=${TOOLDIR:=@TOOLDIR@}
|
||||
OSVERSION=${OSVERSION:=@OSVERSION@}
|
||||
|
||||
riscv32()
|
||||
{
|
||||
echo "prebuilt riscv32-gnu-toolchain..."
|
||||
tar -C $TOOLDIR -cvjf riscv32-gnu-toolchain.tar.bz2 riscv32-gnu-toolchain
|
||||
split -b 50M riscv32-gnu-toolchain.tar.bz2 "riscv32-gnu-toolchain.tar.bz2.part"
|
||||
mkdir -p ./riscv32-gnu-toolchain/$OSVERSION
|
||||
mv riscv32-gnu-toolchain.tar.bz2.part* ./riscv32-gnu-toolchain/$OSVERSION
|
||||
rm riscv32-gnu-toolchain.tar.bz2
|
||||
}
|
||||
|
||||
riscv64()
|
||||
{
|
||||
echo "prebuilt riscv64-gnu-toolchain..."
|
||||
tar -C $TOOLDIR -cvjf riscv64-gnu-toolchain.tar.bz2 riscv64-gnu-toolchain
|
||||
split -b 50M riscv64-gnu-toolchain.tar.bz2 "riscv64-gnu-toolchain.tar.bz2.part"
|
||||
mkdir -p ./riscv64-gnu-toolchain/$OSVERSION
|
||||
mv riscv64-gnu-toolchain.tar.bz2.part* ./riscv64-gnu-toolchain/$OSVERSION
|
||||
rm riscv64-gnu-toolchain.tar.bz2
|
||||
}
|
||||
|
||||
llvm()
|
||||
{
|
||||
echo "prebuilt llvm-vortex2..."
|
||||
tar -C $TOOLDIR -cvjf llvm-vortex2.tar.bz2 llvm-vortex
|
||||
split -b 50M llvm-vortex2.tar.bz2 "llvm-vortex2.tar.bz2.part"
|
||||
mkdir -p ./llvm-vortex/$OSVERSION
|
||||
mv llvm-vortex2.tar.bz2.part* ./llvm-vortex/$OSVERSION
|
||||
rm llvm-vortex2.tar.bz2
|
||||
}
|
||||
|
||||
libcrt32()
|
||||
{
|
||||
echo "prebuilt libcrt32..."
|
||||
tar -C $TOOLDIR -cvjf libcrt32.tar.bz2 libcrt32
|
||||
mkdir -p ./libcrt32
|
||||
mv libcrt32.tar.bz2 ./libcrt32
|
||||
}
|
||||
|
||||
libcrt64()
|
||||
{
|
||||
echo "prebuilt libcrt64..."
|
||||
tar -C $TOOLDIR -cvjf libcrt64.tar.bz2 libcrt64
|
||||
mkdir -p ./libcrt64
|
||||
mv libcrt64.tar.bz2 ./libcrt64
|
||||
}
|
||||
|
||||
libc32()
|
||||
{
|
||||
echo "prebuilt libc32..."
|
||||
tar -C $TOOLDIR -cvjf libc32.tar.bz2 libc32
|
||||
mkdir -p ./libc32
|
||||
mv libc32.tar.bz2 ./libc32
|
||||
}
|
||||
|
||||
libc64()
|
||||
{
|
||||
echo "prebuilt libc64..."
|
||||
tar -C $TOOLDIR -cvjf libc64.tar.bz2 libc64
|
||||
mkdir -p ./libc64
|
||||
mv libc64.tar.bz2 ./libc64
|
||||
}
|
||||
|
||||
pocl()
|
||||
{
|
||||
echo "prebuilt pocl..."
|
||||
tar -C $TOOLDIR -cvjf pocl2.tar.bz2 pocl
|
||||
mkdir -p ./pocl/$OSVERSION
|
||||
mv pocl2.tar.bz2 ./pocl/$OSVERSION
|
||||
}
|
||||
|
||||
verilator()
|
||||
{
|
||||
echo "prebuilt verilator..."
|
||||
tar -C $TOOLDIR -cvjf verilator.tar.bz2 verilator
|
||||
mkdir -p ./verilator/$OSVERSION
|
||||
mv verilator.tar.bz2 ./verilator/$OSVERSION
|
||||
}
|
||||
|
||||
sv2v()
|
||||
{
|
||||
echo "prebuilt sv2v..."
|
||||
tar -C $TOOLDIR -cvjf sv2v.tar.bz2 sv2v
|
||||
mkdir -p ./sv2v/$OSVERSION
|
||||
mv sv2v.tar.bz2 ./sv2v/$OSVERSION
|
||||
}
|
||||
|
||||
yosys()
|
||||
{
|
||||
echo "prebuilt yosys..."
|
||||
tar -C $TOOLDIR -cvjf yosys.tar.bz2 yosys
|
||||
split -b 50M yosys.tar.bz2 "yosys.tar.bz2.part"
|
||||
mkdir -p ./yosys/$OSVERSION
|
||||
mv yosys.tar.bz2.part* ./yosys/$OSVERSION
|
||||
rm yosys.tar.bz2
|
||||
}
|
||||
|
||||
show_usage()
|
||||
{
|
||||
echo "Setup Pre-built Vortex Toolchain"
|
||||
echo "Usage: $0 [--pocl] [--verilator] [--riscv32] [--riscv64] [--llvm] [--libcrt32] [--libcrt64] [--libc32] [--libc64] [--sv2v] [-yosys] [--all] [-h|--help]"
|
||||
}
|
||||
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
--pocl ) pocl
|
||||
;;
|
||||
--verilator ) verilator
|
||||
;;
|
||||
--riscv32 ) riscv32
|
||||
;;
|
||||
--riscv64 ) riscv64
|
||||
;;
|
||||
--llvm ) llvm
|
||||
;;
|
||||
--libcrt32 ) libcrt32
|
||||
;;
|
||||
--libcrt64 ) libcrt64
|
||||
;;
|
||||
--libc32 ) libc32
|
||||
;;
|
||||
--libc64 ) libc64
|
||||
;;
|
||||
--sv2v ) sv2v
|
||||
;;
|
||||
--yosys ) yosys
|
||||
;;
|
||||
--all ) pocl
|
||||
verilator
|
||||
riscv32
|
||||
riscv64
|
||||
llvm
|
||||
libcrt32
|
||||
libcrt64
|
||||
libc32
|
||||
libc64
|
||||
sv2v
|
||||
yosys
|
||||
;;
|
||||
-h | --help ) show_usage
|
||||
exit
|
||||
;;
|
||||
* ) show_usage
|
||||
exit 1
|
||||
esac
|
||||
shift
|
||||
done
|
291
ci/trace_csv.py
Executable file
291
ci/trace_csv.py
Executable file
|
@ -0,0 +1,291 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
import csv
|
||||
import re
|
||||
import inspect
|
||||
|
||||
configs = None
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='CPU trace log to CSV format converter.')
|
||||
parser.add_argument('-t', '--type', default='simx', help='log type (rtlsim or simx)')
|
||||
parser.add_argument('-o', '--csv', default='trace.csv', help='Output CSV file')
|
||||
parser.add_argument('log', help='Input log file')
|
||||
return parser.parse_args()
|
||||
|
||||
def load_config(filename):
|
||||
config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=0x([0-9a-fA-F]+), num_barriers=(\d+)"
|
||||
with open(filename, 'r') as file:
|
||||
for line in file:
|
||||
config_match = re.search(config_pattern, line)
|
||||
if config_match:
|
||||
config = {
|
||||
'num_threads': int(config_match.group(1)),
|
||||
'num_warps': int(config_match.group(2)),
|
||||
'num_cores': int(config_match.group(3)),
|
||||
'num_clusters': int(config_match.group(4)),
|
||||
'socket_size': int(config_match.group(5)),
|
||||
'local_mem_base': int(config_match.group(6), 16),
|
||||
'num_barriers': int(config_match.group(7)),
|
||||
}
|
||||
return config
|
||||
print("Error: missing CONFIGS: header")
|
||||
sys.exit(1)
|
||||
|
||||
def parse_simx(log_lines):
|
||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||
instr_pattern = r"Instr (0x[0-9a-fA-F]+):"
|
||||
opcode_pattern = r"Instr 0x[0-9a-fA-F]+: ([0-9a-zA-Z_\.]+)"
|
||||
core_id_pattern = r"cid=(\d+)"
|
||||
warp_id_pattern = r"wid=(\d+)"
|
||||
tmask_pattern = r"tmask=(\d+)"
|
||||
operands_pattern = r"Src\d+ Reg: (.+)"
|
||||
destination_pattern = r"Dest Reg: (.+)"
|
||||
uuid_pattern = r"#(\d+)"
|
||||
entries = []
|
||||
instr_data = None
|
||||
for lineno, line in enumerate(log_lines, start=1):
|
||||
try:
|
||||
if line.startswith("DEBUG Fetch:"):
|
||||
if instr_data:
|
||||
entries.append(instr_data)
|
||||
instr_data = {}
|
||||
instr_data["lineno"] = lineno
|
||||
instr_data["PC"] = re.search(pc_pattern, line).group(1)
|
||||
instr_data["core_id"] = int(re.search(core_id_pattern, line).group(1))
|
||||
instr_data["warp_id"] = int(re.search(warp_id_pattern, line).group(1))
|
||||
instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
|
||||
instr_data["uuid"] = int(re.search(uuid_pattern, line).group(1))
|
||||
elif line.startswith("DEBUG Instr"):
|
||||
instr_data["instr"] = re.search(instr_pattern, line).group(1)
|
||||
instr_data["opcode"] = re.search(opcode_pattern, line).group(1)
|
||||
elif line.startswith("DEBUG Src"):
|
||||
src_reg = re.search(operands_pattern, line).group(1)
|
||||
instr_data["operands"] = (instr_data["operands"] + ', ' + src_reg) if 'operands' in instr_data else src_reg
|
||||
elif line.startswith("DEBUG Dest"):
|
||||
instr_data["destination"] = re.search(destination_pattern, line).group(1)
|
||||
except Exception as e:
|
||||
print("Error at line {}: {}".format(lineno, e))
|
||||
instr_data = None
|
||||
if instr_data:
|
||||
entries.append(instr_data)
|
||||
return entries
|
||||
|
||||
def reverse_binary(bin_str):
|
||||
return bin_str[::-1]
|
||||
|
||||
def bin_to_array(bin_str):
|
||||
return [int(bit) for bit in bin_str]
|
||||
|
||||
def append_reg(text, value, sep):
|
||||
if sep:
|
||||
text += ", "
|
||||
ivalue = int(value)
|
||||
if (ivalue >= 32):
|
||||
text += "f" + str(ivalue % 32)
|
||||
else:
|
||||
text += "x" + value
|
||||
sep = True
|
||||
return text, sep
|
||||
|
||||
def append_value(text, reg, value, tmask_arr, sep):
|
||||
text, sep = append_reg(text, reg, sep)
|
||||
text += "={"
|
||||
for i in range(len(tmask_arr)):
|
||||
if i != 0:
|
||||
text += ", "
|
||||
if tmask_arr[i]:
|
||||
text += value[i]
|
||||
else:
|
||||
text +="-"
|
||||
text += "}"
|
||||
return text, sep
|
||||
|
||||
def parse_rtlsim(log_lines):
|
||||
global configs
|
||||
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)"
|
||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||
instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
|
||||
ex_pattern = r"ex=([a-zA-Z]+)"
|
||||
op_pattern = r"op=([\?0-9a-zA-Z_\.]+)"
|
||||
warp_id_pattern = r"wid=(\d+)"
|
||||
tmask_pattern = r"tmask=(\d+)"
|
||||
wb_pattern = r"wb=(\d)"
|
||||
opds_pattern = r"opds=(\d+)"
|
||||
rd_pattern = r"rd=(\d+)"
|
||||
rs1_pattern = r"rs1=(\d+)"
|
||||
rs2_pattern = r"rs2=(\d+)"
|
||||
rs3_pattern = r"rs3=(\d+)"
|
||||
rs1_data_pattern = r"rs1_data=\{(.+?)\}"
|
||||
rs2_data_pattern = r"rs2_data=\{(.+?)\}"
|
||||
rs3_data_pattern = r"rs3_data=\{(.+?)\}"
|
||||
rd_data_pattern = r"data=\{(.+?)\}"
|
||||
eop_pattern = r"eop=(\d)"
|
||||
uuid_pattern = r"#(\d+)"
|
||||
entries = []
|
||||
instr_data = {}
|
||||
num_cores = configs['num_cores']
|
||||
socket_size = configs['socket_size']
|
||||
num_sockets = (num_cores + socket_size - 1) // socket_size
|
||||
for lineno, line in enumerate(log_lines, start=1):
|
||||
try:
|
||||
line_match = re.search(line_pattern, line)
|
||||
if line_match:
|
||||
PC = re.search(pc_pattern, line).group(1)
|
||||
warp_id = int(re.search(warp_id_pattern, line).group(1))
|
||||
tmask = re.search(tmask_pattern, line).group(1)
|
||||
uuid = int(re.search(uuid_pattern, line).group(1))
|
||||
cluster_id = int(line_match.group(1))
|
||||
socket_id = int(line_match.group(2))
|
||||
core_id = int(line_match.group(3))
|
||||
stage = line_match.group(4)
|
||||
if stage == "decode":
|
||||
trace = {}
|
||||
trace["uuid"] = uuid
|
||||
trace["PC"] = PC
|
||||
trace["core_id"] = ((((cluster_id * num_sockets) + socket_id) * socket_size) + core_id)
|
||||
trace["warp_id"] = warp_id
|
||||
trace["tmask"] = reverse_binary(tmask)
|
||||
trace["instr"] = re.search(instr_pattern, line).group(1)
|
||||
trace["opcode"] = re.search(op_pattern, line).group(1)
|
||||
trace["opds"] = bin_to_array(re.search(opds_pattern, line).group(1))
|
||||
trace["rd"] = re.search(rd_pattern, line).group(1)
|
||||
trace["rs1"] = re.search(rs1_pattern, line).group(1)
|
||||
trace["rs2"] = re.search(rs2_pattern, line).group(1)
|
||||
trace["rs3"] = re.search(rs3_pattern, line).group(1)
|
||||
instr_data[uuid] = trace
|
||||
elif stage == "issue":
|
||||
if uuid in instr_data:
|
||||
trace = instr_data[uuid]
|
||||
trace["lineno"] = lineno
|
||||
opds = trace["opds"]
|
||||
if opds[1]:
|
||||
trace["rs1_data"] = re.search(rs1_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if opds[2]:
|
||||
trace["rs2_data"] = re.search(rs2_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if opds[3]:
|
||||
trace["rs3_data"] = re.search(rs3_data_pattern, line).group(1).split(', ')[::-1]
|
||||
trace["issued"] = True
|
||||
instr_data[uuid] = trace
|
||||
elif stage == "commit":
|
||||
if uuid in instr_data:
|
||||
trace = instr_data[uuid]
|
||||
if "issued" in trace:
|
||||
opds = trace["opds"]
|
||||
dst_tmask_arr = bin_to_array(tmask)[::-1]
|
||||
wb = re.search(wb_pattern, line).group(1) == "1"
|
||||
if wb:
|
||||
rd_data = re.search(rd_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if 'rd_data' in trace:
|
||||
merged_rd_data = trace['rd_data']
|
||||
for i in range(len(dst_tmask_arr)):
|
||||
if dst_tmask_arr[i] == 1:
|
||||
merged_rd_data[i] = rd_data[i]
|
||||
trace['rd_data'] = merged_rd_data
|
||||
else:
|
||||
trace['rd_data'] = rd_data
|
||||
instr_data[uuid] = trace
|
||||
eop = re.search(eop_pattern, line).group(1) == "1"
|
||||
if eop:
|
||||
tmask_arr = bin_to_array(trace["tmask"])
|
||||
destination = ''
|
||||
if wb:
|
||||
destination, sep = append_value(destination, trace["rd"], trace['rd_data'], tmask_arr, False)
|
||||
del trace['rd_data']
|
||||
trace["destination"] = destination
|
||||
operands = ''
|
||||
sep = False
|
||||
if opds[1]:
|
||||
operands, sep = append_value(operands, trace["rs1"], trace["rs1_data"], tmask_arr, sep)
|
||||
del trace["rs1_data"]
|
||||
if opds[2]:
|
||||
operands, sep = append_value(operands, trace["rs2"], trace["rs2_data"], tmask_arr, sep)
|
||||
del trace["rs2_data"]
|
||||
if opds[3]:
|
||||
operands, sep = append_value(operands, trace["rs3"], trace["rs3_data"], tmask_arr, sep)
|
||||
del trace["rs3_data"]
|
||||
trace["operands"] = operands
|
||||
del trace["opds"]
|
||||
del trace["rd"]
|
||||
del trace["rs1"]
|
||||
del trace["rs2"]
|
||||
del trace["rs3"]
|
||||
del trace["issued"]
|
||||
del instr_data[uuid]
|
||||
entries.append(trace)
|
||||
except Exception as e:
|
||||
print("Error at line {}: {}".format(lineno, e))
|
||||
return entries
|
||||
|
||||
def write_csv(sublogs, csv_filename, log_type):
|
||||
with open(csv_filename, 'w', newline='') as csv_file:
|
||||
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "destination", "operands"]
|
||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
|
||||
for sublog in sublogs:
|
||||
entries = None
|
||||
|
||||
# parse sublog
|
||||
if log_type == "rtlsim":
|
||||
entries = parse_rtlsim(sublog)
|
||||
elif log_type == "simx":
|
||||
entries = parse_simx(sublog)
|
||||
else:
|
||||
print('Error: invalid log type')
|
||||
sys.exit()
|
||||
|
||||
# sort entries by uuid
|
||||
entries.sort(key=lambda x: (int(x['uuid'])))
|
||||
for entry in entries:
|
||||
del entry['lineno']
|
||||
|
||||
for entry in entries:
|
||||
writer.writerow(entry)
|
||||
|
||||
def split_log_file(log_filename):
|
||||
with open(log_filename, 'r') as log_file:
|
||||
log_lines = log_file.readlines()
|
||||
|
||||
sublogs = []
|
||||
current_sublog = None
|
||||
|
||||
for line in log_lines:
|
||||
if line.startswith("[VXDRV] START"):
|
||||
if current_sublog is not None:
|
||||
sublogs.append(current_sublog)
|
||||
current_sublog = [line]
|
||||
elif current_sublog is not None:
|
||||
current_sublog.append(line)
|
||||
|
||||
if current_sublog is not None:
|
||||
sublogs.append(current_sublog)
|
||||
else:
|
||||
sublogs.append(log_lines)
|
||||
|
||||
return sublogs
|
||||
|
||||
def main():
|
||||
global configs
|
||||
args = parse_args()
|
||||
configs = load_config(args.log)
|
||||
sublogs = split_log_file(args.log)
|
||||
write_csv(sublogs, args.csv, args.type)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,41 +1,73 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
import time
|
||||
import threading
|
||||
import subprocess
|
||||
|
||||
PingInterval = 15
|
||||
# This script executes a long-running command while printing "still running ..." periodically
|
||||
# to notify Travis build system that the program has not hanged
|
||||
|
||||
def PingCallback(stop):
|
||||
wait_time = 0
|
||||
while True:
|
||||
time.sleep(PingInterval)
|
||||
wait_time += PingInterval
|
||||
print(" + still running (" + str(wait_time) + "s) ...")
|
||||
sys.stdout.flush()
|
||||
if stop():
|
||||
break
|
||||
PING_INTERVAL=300 # 5 minutes
|
||||
SLEEP_INTERVAL=1 # 1 second
|
||||
|
||||
def run_command(command):
|
||||
process = subprocess.Popen(command, stdout=subprocess.PIPE)
|
||||
def monitor(stop_event):
|
||||
wait_time = 0
|
||||
elapsed_time = 0
|
||||
while not stop_event.is_set():
|
||||
time.sleep(SLEEP_INTERVAL)
|
||||
elapsed_time += SLEEP_INTERVAL
|
||||
if elapsed_time >= PING_INTERVAL:
|
||||
wait_time += elapsed_time
|
||||
print(" + still running (" + str(wait_time) + "s) ...")
|
||||
sys.stdout.flush()
|
||||
elapsed_time = 0
|
||||
|
||||
def execute(command):
|
||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
while True:
|
||||
output = process.stdout.readline()
|
||||
if output == '' and process.poll() is not None:
|
||||
break
|
||||
if output:
|
||||
print output.strip()
|
||||
return process.returncode
|
||||
try:
|
||||
line = output.decode('utf-8').rstrip()
|
||||
except UnicodeDecodeError:
|
||||
line = repr(output) # Safely print raw binary data
|
||||
print(">>> " + line)
|
||||
process.stdout.flush()
|
||||
ret = process.poll()
|
||||
if ret is not None:
|
||||
return ret
|
||||
return -1
|
||||
|
||||
def main(argv):
|
||||
if not argv:
|
||||
print("Usage: travis_run.py <command>")
|
||||
sys.exit(1)
|
||||
|
||||
stop_threads = False
|
||||
t = threading.Thread(target = PingCallback, args =(lambda : stop_threads, ))
|
||||
# start monitoring thread
|
||||
stop_event = threading.Event()
|
||||
t = threading.Thread(target=monitor, args=(stop_event,))
|
||||
t.start()
|
||||
|
||||
exitcode = run_command(argv)
|
||||
# execute command
|
||||
exitcode = execute(argv)
|
||||
print(" + exitcode="+str(exitcode))
|
||||
|
||||
stop_threads = True
|
||||
# terminate monitoring thread
|
||||
stop_event.set()
|
||||
t.join()
|
||||
|
||||
sys.exit(exitcode)
|
||||
|
|
34
config.mk.in
Normal file
34
config.mk.in
Normal file
|
@ -0,0 +1,34 @@
|
|||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
VORTEX_HOME ?= @VORTEX_HOME@
|
||||
|
||||
XLEN ?= @XLEN@
|
||||
|
||||
TOOLDIR ?= @TOOLDIR@
|
||||
|
||||
OSVERSION ?= @OSVERSION@
|
||||
|
||||
INSTALLDIR ?= @INSTALLDIR@
|
||||
|
||||
LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex
|
||||
|
||||
LIBC_VORTEX ?= $(TOOLDIR)/libc$(XLEN)
|
||||
LIBCRT_VORTEX ?= $(TOOLDIR)/libcrt$(XLEN)
|
||||
|
||||
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv$(XLEN)-gnu-toolchain
|
||||
|
||||
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
|
||||
RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
|
||||
|
||||
THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party
|
176
configure
vendored
Executable file
176
configure
vendored
Executable file
|
@ -0,0 +1,176 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Determine the current working directory
|
||||
CURRENT_DIR=$(pwd)
|
||||
|
||||
# Function to detect current OS
|
||||
detect_osversion() {
|
||||
local osversion="unsupported"
|
||||
if [ -f /etc/os-release ]; then
|
||||
. /etc/os-release # Source the os-release file to get OS information
|
||||
case "$ID" in
|
||||
ubuntu)
|
||||
case "$VERSION_CODENAME" in
|
||||
bionic) osversion="ubuntu/bionic";;
|
||||
focal) osversion="ubuntu/focal";;
|
||||
jammy) osversion="ubuntu/focal";;
|
||||
noble) osversion="ubuntu/focal";;
|
||||
# Add new versions as needed
|
||||
esac
|
||||
;;
|
||||
centos)
|
||||
case "$VERSION_ID" in
|
||||
7) osversion="centos/7";;
|
||||
# Add new versions as needed
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
echo "$osversion"
|
||||
}
|
||||
|
||||
# Function to recursively copy files, skipping the current directory
|
||||
copy_files() {
|
||||
local source_dir="$1"
|
||||
local target_dir="$2"
|
||||
#echo "source_dir=$source_dir, target_dir=$target_dir"
|
||||
|
||||
local same_dir=0
|
||||
if [ "$(realpath "$source_dir")" == "$(realpath "$target_dir")" ]; then
|
||||
same_dir=1
|
||||
fi
|
||||
|
||||
# Function to copy and update file
|
||||
copy_and_update() {
|
||||
local src_pattern="$1"
|
||||
local dest_dir="$2"
|
||||
for file in $src_pattern; do
|
||||
#echo "*** $file > $dest_dir"
|
||||
if [ -f "$file" ]; then
|
||||
if [[ "$file" == *.in ]]; then
|
||||
filename=$(basename -- "$file")
|
||||
filename_no_ext="${filename%.in}"
|
||||
dest_file="$dest_dir/$filename_no_ext"
|
||||
mkdir -p "$dest_dir"
|
||||
sed "s|@VORTEX_HOME@|$SOURCE_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@BUILDDIR@|$CURRENT_DIR|g" "$file" > "$dest_file"
|
||||
# apply permissions to bash scripts
|
||||
read -r firstline < "$dest_file"
|
||||
if [[ "$firstline" =~ ^#!.*bash ]]; then
|
||||
chmod +x "$dest_file"
|
||||
fi
|
||||
else
|
||||
if [ $same_dir -eq 0 ]; then
|
||||
mkdir -p "$dest_dir"
|
||||
cp -p "$file" "$dest_dir"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
for pattern in "${SUBDIRS[@]}"; do
|
||||
local full_copy=0
|
||||
if [[ "$pattern" == !* ]]; then
|
||||
full_copy=1
|
||||
pattern=${pattern:1}
|
||||
fi
|
||||
local source_pattern="$source_dir/$pattern"
|
||||
if [[ "$pattern" == "." ]]; then
|
||||
source_pattern=$source_dir
|
||||
fi
|
||||
find "$source_dir" -type d -path "$source_pattern" 2>/dev/null | while read dir; do
|
||||
# Compute the relative path of the directory
|
||||
local rel_path="${dir#$source_dir}"
|
||||
rel_path="${rel_path#/}" # Remove leading slash, if present
|
||||
local full_target_dir="$target_dir/$rel_path"
|
||||
|
||||
# Copy and update Makefile and common.mk if they exist
|
||||
if [ $full_copy -eq 1 ]; then
|
||||
copy_and_update "$dir/*" "$full_target_dir"
|
||||
else
|
||||
copy_and_update "$dir/Makefile" "$full_target_dir"
|
||||
copy_and_update "$dir/common.mk" "$full_target_dir"
|
||||
copy_and_update "$dir/*.in" "$full_target_dir"
|
||||
fi
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
|
||||
# default configuration parameters
|
||||
default_xlen=32
|
||||
default_tooldir=$HOME/tools
|
||||
default_osversion=$(detect_osversion)
|
||||
default_prefix=$CURRENT_DIR
|
||||
|
||||
# load default configuration parameters from existing config.mk
|
||||
if [ -f "config.mk" ]; then
|
||||
while IFS='=' read -r key value; do
|
||||
value=${value//[@]/} # Remove placeholder characters
|
||||
value="${value#"${value%%[![:space:]]*}"}" # Remove leading whitespace
|
||||
value="${value%"${value##*[![:space:]]}"}" # Remove trailing whitespace
|
||||
case $key in
|
||||
XLEN\ ?*) default_xlen=${value//\?=/} ;;
|
||||
TOOLDIR\ ?*) default_tooldir=${value//\?=/} ;;
|
||||
OSVERSION\ ?*) default_osversion=${value//\?=/} ;;
|
||||
PREFIX\ ?*) default_prefix=${value//\?=/} ;;
|
||||
esac
|
||||
done < config.mk
|
||||
fi
|
||||
|
||||
# set configuration parameters
|
||||
XLEN=${XLEN:=$default_xlen}
|
||||
TOOLDIR=${TOOLDIR:=$default_tooldir}
|
||||
OSVERSION=${OSVERSION:=$default_osversion}
|
||||
PREFIX=${PREFIX:=$default_prefix}
|
||||
|
||||
# parse command line arguments
|
||||
usage() {
|
||||
echo "Usage: $0 [--xlen=<value>] [--tooldir=<path>] [--osversion=<version>]"
|
||||
echo " --xlen=<value> Set the XLEN value (default: 32)"
|
||||
echo " --tooldir=<path> Set the TOOLDIR path (default: $HOME/tools)"
|
||||
echo " --osversion=<version> Set the OS Version (default: $(detect_osversion))"
|
||||
echo " --prefix=<path> Set installation directory"
|
||||
exit 1
|
||||
}
|
||||
while [[ "$#" -gt 0 ]]; do
|
||||
case $1 in
|
||||
--xlen=*) XLEN="${1#*=}" ;;
|
||||
--tooldir=*) TOOLDIR="${1#*=}" ;;
|
||||
--osversion=*) OSVERSION="${1#*=}" ;;
|
||||
--prefix=*) PREFIX="${1#*=}" ;;
|
||||
-h|--help) usage ;;
|
||||
*) echo "Unknown parameter passed: $1"; usage ;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# check OS
|
||||
if [ "$OSVERSION" == "unsupported" ]; then
|
||||
echo "Error: Unsupported OS."
|
||||
exit -1
|
||||
fi
|
||||
|
||||
# project subdirectories to build
|
||||
SUBDIRS=("." "!ci" "!perf" "hw*" "kernel*" "runtime*" "sim*" "tests*")
|
||||
|
||||
# Get the directory of the script
|
||||
SOURCE_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
|
||||
THIRD_PARTY_DIR=$SOURCE_DIR/third_party
|
||||
|
||||
copy_files "$SOURCE_DIR" "$CURRENT_DIR"
|
|
@ -1,30 +0,0 @@
|
|||
# Vortex Documentation
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Codebase Layout](codebase.md)
|
||||
- [Microarchitecture](microarchitecture.md)
|
||||
- [Cache Subsystem](cache_subsystem.md)
|
||||
- [Software](software.md)
|
||||
- [Simulation](simulation.md)
|
||||
- [FPGA Setup Guide](fpga_setup.md)
|
||||
- [Debugging](debugging.md)
|
||||
- [Useful Links](references.md)
|
||||
|
||||
|
||||
## Installation
|
||||
|
||||
- Refer to the install instructions in [README](../README.md).
|
||||
|
||||
## Quick Start Scenarios
|
||||
|
||||
Running Vortex simulators with different configurations:
|
||||
- Run basic driver test with rtlsim driver and Vortex config of 2 clusters, 2 cores, 2 warps, 4 threads
|
||||
|
||||
$ ./ci/blackbox.sh --clusters=2 --cores=2 --warps=2 --threads=4 --driver=rtlsim --app=basic
|
||||
- Run demo driver test with vlsim driver and Vortex config of 1 clusters, 4 cores, 4 warps, 2 threads
|
||||
|
||||
$ ./ci/blackbox.sh --clusters=1 --cores=4 --warps=4 --threads=2 --driver=vlsim --app=demo
|
||||
- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads
|
||||
|
||||
$ ./ci/blackbox.sh --clusters=4 --cores=4 --warps=8 --threads=6 --driver=simx --app=dogfood
|
|
@ -1,70 +0,0 @@
|
|||
# Vortex Cache Subsystem
|
||||
|
||||
The Vortex Cache Sub-system has the following main properties:
|
||||
|
||||
- High-bandwidth with bank parallelism
|
||||
- Snoop protocol to flush data for CPU access
|
||||
- Generic design: Dcache, Icache, Shared Memory, L2 cache, L3 cache
|
||||
|
||||
### Cache Hierarchy
|
||||
|
||||

|
||||
|
||||
- Cache can be configured to be any level in the hierarchy
|
||||
- Caches communicate via snooping
|
||||
- Cache flush from AFU is passed down the hierarchy
|
||||
|
||||
### VX_cache.v (Top Module)
|
||||
|
||||
VX.cache.v is the top module of the cache verilog code located in the `/hw/rtl/cache` directory.
|
||||
|
||||

|
||||
|
||||
- Configurable (Cache size, number of banks, bank line size, etc.)
|
||||
- I/O signals
|
||||
- Core Request
|
||||
- Core Rsp
|
||||
- DRAM Req
|
||||
- DRAM Rsp
|
||||
- Snoop Rsp
|
||||
- Snoop Rsp
|
||||
- Snoop Forwarding Out
|
||||
- Snoop Forwarding In
|
||||
- Bank Select
|
||||
- Assigns valid and ready signals for each bank
|
||||
- Snoop Forwarder
|
||||
- DRAM Request Arbiter
|
||||
- Prepares cache response for communication with DRAM
|
||||
- Snoop Response Arbiter
|
||||
- Sends snoop response
|
||||
- Core Response Merge
|
||||
- Cache accesses one line at a time. As a result, each request may not come back in the same response. This module tries to recombine the responses by thread ID.
|
||||
|
||||
### VX_bank.v
|
||||
|
||||
VX_bank.v is the verilog code that handles cache bank functionality and is located in the `/hw/rtl/cache` directory.
|
||||
|
||||

|
||||
|
||||
- Allows for high throughput
|
||||
- Each bank contains queues to hold requests to the cache
|
||||
- I/O signals
|
||||
- Core request
|
||||
- Core Response
|
||||
- DRAM Fill Requests
|
||||
- DRAM Fill Response
|
||||
- DRAM WB Requests
|
||||
- Snp Request
|
||||
- Snp Response
|
||||
- Request Priority: DRAM fill, miss reserve, core request, snoop request
|
||||
- Snoop Request Queue
|
||||
- DRAM Fill Queue
|
||||
- Core Req Arbiter
|
||||
- Requests to be processed by the bank
|
||||
- Tag Data Store
|
||||
- Registers for valid, dirty, dirtyb, tag, and data
|
||||
- Length of registers determined by lines in the bank
|
||||
- Tag Data Access:
|
||||
- I/O: stall, snoop info, force request miss
|
||||
- Writes to cache or sends read response; hit or miss determined here
|
||||
- A missed request goes to the miss reserve if it is not a snoop request or DRAM fill
|
|
@ -1,35 +0,0 @@
|
|||
# Vortex Codebase
|
||||
|
||||
The directory/file layout of the Vortex codebase is as followed:
|
||||
|
||||
- `hw`:
|
||||
- `rtl`: hardware rtl sources
|
||||
- `cache`: cache subsystem code
|
||||
- `fp_cores`: floating point unit code
|
||||
- `interfaces`: interfaces for inter-module communication
|
||||
- `libs`: general-purpose modules (i.e., encoder, arbiter, ...)
|
||||
- `syn`: synthesis directory
|
||||
- `opae`: OPAE synthesis scripts
|
||||
- `quartus`: Quartus synthesis scripts
|
||||
- `synopsys`: Synopsys synthesis scripts
|
||||
- `yosys`: Yosys synthesis scripts
|
||||
- `simulate`: baseline RTL simulator (used by RTLSIM)
|
||||
- `unit_tests`: unit tests for some hardware components
|
||||
- `driver`: Host driver software
|
||||
- `include`: Vortex driver public headers
|
||||
- `opae`: software driver that uses Intel OPAE
|
||||
- `vlsim`: software driver that simulates Full RTL (include AFU)
|
||||
- `rtlsim`: software driver that simulates processor RTL
|
||||
- `simx`: software driver that uses simX simulator
|
||||
- `runtime`: Kernel runtime software
|
||||
- `include`: Vortex runtime public headers
|
||||
- `linker`: linker file for compiling kernels
|
||||
- `src`: runtime implementation
|
||||
- `simX`: cycle approximate simulator for vortex
|
||||
- `tests`: tests repository.
|
||||
- `runtime`: runtime tests
|
||||
- `regression`: regression tests
|
||||
- `riscv`: RISC-V standard tests
|
||||
- `opencl`: opencl benchmarks and tests
|
||||
- `ci`: continuous integration scripts
|
||||
- `miscs`: miscellaneous resources.
|
|
@ -1,43 +0,0 @@
|
|||
# Debugging Vortex Hardware
|
||||
|
||||
## SimX Debugging
|
||||
|
||||
SimX cycle-approximate simulator allows faster debugging of Vortex kernels' execution.
|
||||
The recommended method to enable debugging is to pass the `--debug` flag to `blackbox` tool when running a program.
|
||||
|
||||
// Running demo program on SimX in debug mode
|
||||
$ ./ci/blackbox.sh --driver=simx --app=demo --debug
|
||||
|
||||
A debug trace `run.log` is generated in the current directory during the program execution. The trace includes important states of the simulated processor (decoded instruction, register states, pipeline states, etc..). You can increase the verbosity level of the trace by changing the `DEBUG_LEVEL` variable to a value [1-5] (default is 3).
|
||||
|
||||
// Using SimX in debug mode with verbose level 4
|
||||
$ CONFIGS=-DDEBUG_LEVEL=4 ./ci/blackbox.sh --driver=simx --app=demo --debug
|
||||
|
||||
## RTL Debugging
|
||||
|
||||
To debug the processor RTL, you need to use VLSIM or RTLSIM driver. VLSIM simulates the full processor including the AFU command processor (using `/rtl/afu/vortex_afu.sv` as top module). RTLSIM simulates the Vortex processor only (using `/rtl/Vortex.v` as top module).
|
||||
|
||||
The recommended method to enable debugging is to pass the `--debug` flag to `blackbox` tool when running a program.
|
||||
|
||||
// Running demo program on vlsim in debug mode
|
||||
$ ./ci/blackbox.sh --driver=vlsim --app=demo --debug
|
||||
|
||||
// Running demo program on rtlsim in debug mode
|
||||
$ ./ci/blackbox.sh --driver=rtlsim --app=demo --debug
|
||||
|
||||
A debug trace `run.log` is generated in the current directory during the program execution. The trace includes important states of the simulated processor (memory, caches, pipeline, stalls, etc..). A waveform trace `trace.vcd` is also generated in the current directory during the program execution. You can visualize the waveform trace using any tool that can open VCD files (Modelsim, Quartus, Vivado, etc..). [GTKwave] (http://gtkwave.sourceforge.net) is a great open-source scope analyzer that also works with VCD files.
|
||||
|
||||
## FPGA Debugging
|
||||
|
||||
Debugging the FPGA directly may be necessary to investigate runtime bugs that the RTL simulation cannot catch. We have implemented an in-house scope analyzer for Vortex that works when the FPGA is running. To enable the FPGA scope analyzer, the FPGA bitstream should be built using `SCOPE=1` flag
|
||||
|
||||
& cd /hw/syn/opae
|
||||
$ CONFIGS=-DSCOPE=1 make fpga-4c
|
||||
|
||||
When running the program on the FPGA, you need to pass the `--scope` flag to the `blackbox` tool.
|
||||
|
||||
// Running demo program on FPGA with scope enabled
|
||||
$ ./ci/blackbox.sh --driver=fpga --app=demo --scope
|
||||
|
||||
|
||||
A waveform trace `trace.vcd` will be generated in the current directory during the program execution. This trace includes a limited set of signals that are defined in `/hw/scripts/scope.json`. You can expand your signals' selection by updating the json file.
|
|
@ -1,80 +0,0 @@
|
|||
# FPGA Startup and Configuration Guide
|
||||
|
||||
OPAE Environment Setup
|
||||
----------------------
|
||||
|
||||
$ source /opt/inteldevstack/init_env_user.sh
|
||||
$ export OPAE_HOME=/opt/opae/1.1.2
|
||||
$ export PATH=$OPAE_HOME/bin:$PATH
|
||||
$ export C_INCLUDE_PATH=$OPAE_HOME/include:$C_INCLUDE_PATH
|
||||
$ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH
|
||||
$ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH
|
||||
$ export RISCV_TOOLCHAIN_PATH=/opt/riscv-gnu-toolchain
|
||||
$ export PATH=:/opt/verilator/bin:$PATH
|
||||
$ export VERILATOR_ROOT=/opt/verilator
|
||||
|
||||
OPAE Build Configuration
|
||||
------------------------
|
||||
|
||||
Within the `/hw/syn/opae` directory, there are source text files for each core-option for the fpga build (the 32 and 64 core options are not currently implemented) which have the following parameters that can be configured:
|
||||
- NUM_CORES: the number of cores per cluster
|
||||
- NUM_CLUSTERS: the number of clusters alotted to the processor
|
||||
- L3_ENABLE: enable the use of the L3 cache
|
||||
- PERF_ENABLE: enable the use of all profile counters
|
||||
|
||||
To enable L3 cache and profile counters for a build, simply uncomment the definition within the respective source file.
|
||||
|
||||
OPAE Build
|
||||
------------------
|
||||
|
||||
The FPGA has to following configuration options:
|
||||
- 1 core fpga (fpga-1c)
|
||||
- 2 cores fpga (fpga-2c)
|
||||
- 4 cores fpga (fpga-4c)
|
||||
- 8 cores fpga (fpga-8c)
|
||||
- 16 cores fpga (fpga-16c)
|
||||
|
||||
$ cd hw/syn/opae
|
||||
$ make fpga- *# of cores* c
|
||||
|
||||
Example: `make fpga-4c`
|
||||
|
||||
A new folder (ex: `build_fpga_4c`) will be created and the build will start and take ~30-45 min to complete.
|
||||
|
||||
OPAE Build Progress
|
||||
-------------------
|
||||
|
||||
You could check the last 10 lines in the build log for possible errors until build completion.
|
||||
|
||||
$ tail -n 10 ./build_fpga_4c/build.log
|
||||
|
||||
Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs.
|
||||
|
||||
$ ps -u *username*
|
||||
|
||||
|
||||
If the build fails and you need to restart it, clean up the build folder using the following command:
|
||||
|
||||
$ make clean-fpga- *# of cores* c
|
||||
|
||||
Example: `make clean-fpga-4c`
|
||||
|
||||
The file `vortex_afu.gbs` should exist when the build is done:
|
||||
|
||||
$ ls -lsa ./build_fpga_ *# of cores* c/vortex_afu.gbs
|
||||
|
||||
|
||||
Signing the bitstream and Programming the FPGA
|
||||
----------------------------------------------
|
||||
|
||||
$ cd ./build_fpga_`# of cores`c/
|
||||
$ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs
|
||||
$ fpgasupdate vortex_afu_unsigned_ssl.gbs
|
||||
|
||||
FPGA sample test running OpenCL sgemm kernel
|
||||
--------------------------------------------
|
||||
|
||||
Run the following from the Vortex root directory
|
||||
|
||||
$ ./ci/blackbox.sh --driver=fpga --app=sgemm --args="-n64"
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 60 KiB |
Binary file not shown.
Before Width: | Height: | Size: 77 KiB |
Binary file not shown.
Before Width: | Height: | Size: 67 KiB |
Binary file not shown.
Before Width: | Height: | Size: 517 KiB |
|
@ -1,94 +0,0 @@
|
|||
# Vortex Microarchitecture
|
||||
|
||||
### Vortex GPGPU Execution Model
|
||||
|
||||
Vortex uses the SIMT (Single Instruction, Multiple Threads) execution model with a single warp issued per cycle.
|
||||
|
||||
- **Threads**
|
||||
- Smallest unit of computation
|
||||
- Each thread has its own register file (32 int + 32 fp registers)
|
||||
- Threads execute in parallel
|
||||
- **Warps**
|
||||
- A logical clster of threads
|
||||
- Each thread in a warp execute the same instruction
|
||||
- The PC is shared; maintain thread mask for Writeback
|
||||
- Warp's execution is time-multiplexed at log steps
|
||||
- Ex. warp 0 executes at cycle 0, warp 1 executes at cycle 1
|
||||
|
||||
### Vortex RISC-V ISA Extension
|
||||
|
||||
- **Thread Mask Control**
|
||||
- Control the number of warps to activate during execution
|
||||
- `TMC` *count*: activate count threads
|
||||
- **Warp Scheduling**
|
||||
- Control the number of warps to activate during execution
|
||||
- `WSPAWN` *count, addr*: activate count warps and jump to addr location
|
||||
- **Control-Flow Divergence**
|
||||
- Control threads to activate when a branch diverges
|
||||
- `SPLIT` *predicate*: apply 'taken' predicate thread mask adn save 'not-taken' into IPDOM stack
|
||||
- `JOIN`: restore 'not-taken' thread mask
|
||||
- **Warp Synchronization**
|
||||
- `BAR` *id, count*: stall warps entering barrier *id* until count is reached
|
||||
|
||||
### Vortex Pipeline/Datapath
|
||||
|
||||

|
||||
|
||||
Vortex has a 5-stage pipeline: FI | ID | Issue | EX | WB.
|
||||
|
||||
- **Fetch**
|
||||
- Warp Scheduler
|
||||
- Track stalled & active warps, resolve branches and barriers, maintain split/join IPDOM stack
|
||||
- Instruction Cache
|
||||
- Retrieve instruction from cache, issue I-cache requests/responses
|
||||
- **Decode**
|
||||
- Decode fetched instructions, notify warp scheduler when the following instructions are decoded:
|
||||
- Branch, tmc, split/join, wspawn
|
||||
- Precompute used_regs mask (needed for Issue stage)
|
||||
- **Issue**
|
||||
- Scheduling
|
||||
- In-order issue (operands/execute unit ready), out-of-order commit
|
||||
- IBuffer
|
||||
- Store fetched instructions, separate queues per-warp, selects next warp through round-robin scheduling
|
||||
- Scoreboard
|
||||
- Track in-use registers
|
||||
- GPRs (General-Purpose Registers) stage
|
||||
- Fetch issued instruction operands and send operands to execute unit
|
||||
- **Execute**
|
||||
- ALU Unit
|
||||
- Single-cycle operations (+,-,>>,<<,&,|,^), Branch instructions (Share ALU resources)
|
||||
- MULDIV Unit
|
||||
- Multiplier - done in 2 cycles
|
||||
- Divider - division and remainder, done in 32 cycles
|
||||
- Implements serial alogrithm (Stalls the pipeline)
|
||||
- FPU Unit
|
||||
- Multi-cycle operations, uses `FPnew` Library on ASIC, uses hard DSPs on FPGA
|
||||
- CSR Unit
|
||||
- Store constant status registers - device caps, FPU status flags, performance counters
|
||||
- Handle external CSR requests (requests from host CPU)
|
||||
- LSU Unit
|
||||
- Handle load/store operations, issue D-cache requests, handle D-cache responses
|
||||
- Commit load responses - saves storage, Scoreboard tracks completion
|
||||
- GPGPU Unit
|
||||
- Handle GPGPU instructions
|
||||
- TMC, WSPAWN, SPLIT, BAR
|
||||
- JOIN is handled by Warp Scheduler (upon SPLIT response)
|
||||
- **Commit**
|
||||
- Commit
|
||||
- Update CSR flags, update performance counters
|
||||
- Writeback
|
||||
- Write result back to GPRs, notify Scoreboard (release in-use register), select candidate instruction (ALU unit has highest priority)
|
||||
- **Clustering**
|
||||
- Group mulitple cores into clusters (optionally share L2 cache)
|
||||
- Group multiple clusters (optionally share L3 cache)
|
||||
- Configurable at build time
|
||||
- Default configuration:
|
||||
- #Clusters = 1
|
||||
- #Cores = 4
|
||||
- #Warps = 4
|
||||
- #Threads = 4
|
||||
- **FPGA AFU Interface**
|
||||
- Manage CPU-GPU comunication
|
||||
- Query devices caps, load kernel instructions and resource buffers, start kernel execution, read destination buffers
|
||||
- Local Memory - GPU access to local DRAM
|
||||
- Reserved I/O addresses - redirect to host CPU, console output
|
BIN
docs/assets/img/cache_microarchitecture.png
Normal file
BIN
docs/assets/img/cache_microarchitecture.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 207 KiB |
BIN
docs/assets/img/vortex_microarchitecture.png
Normal file
BIN
docs/assets/img/vortex_microarchitecture.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 463 KiB |
27
docs/cache_subsystem.md
Normal file
27
docs/cache_subsystem.md
Normal file
|
@ -0,0 +1,27 @@
|
|||
# Vortex Cache Subsystem
|
||||
|
||||
The Vortex Cache Sub-system has the following main properties:
|
||||
|
||||
- High-bandwidth transfer with Multi-bank parallelism
|
||||
- Non-blocking pipelined write-through cache architecture with per-bank MSHR
|
||||
- Configurable design: Dcache, Icache, L2 cache, L3 cache
|
||||
|
||||
### Cache Microarchitecture
|
||||
|
||||

|
||||
|
||||
The Vortex cache is comprised of multiple parallel banks. It is comprised of the following modules:
|
||||
- **Bank request dispatch crossbar**: assigns a bank to incoming requests and resolve collision using stalls.
|
||||
- **Bank response merge crossbar**: merges result from banks and forward to the core response.
|
||||
- **Memory request multiplexer**: arbitrates bank memory requests
|
||||
- **Memory response demultiplexer**: forwards memory response to the corresponding bank.
|
||||
- **Flush Unit**: performs tag memory initialization.
|
||||
|
||||
Incoming requests entering the cache are sent to a dispatch crossbar that select the corresponding bank for each request, resolving bank collisions with stalls. The result output of each bank is merge back into outgoing response port via merger crossbar. Each bank intergates a non-blocking pipeline with a local Miss Status Holding Register (MSHR) to reduce the miss rate. The bank pipeline consists of the following stages:
|
||||
|
||||
- **Schedule**: Selects the next request into the pipeline from the incoming core request, memory fill, or the MSHR entry, with priority given to the latter.
|
||||
- **Tag Access**: single-port read/write access to the tag store.
|
||||
- **Data Access**: Single-port read/write access to the data store.
|
||||
- **Response Handling**: Core response back to the core.
|
||||
|
||||
Deadlocks inside the cache can occur when the MSHR is full and a new request is already in the pipeline. It can also occur when the memory request queue is full, and there is an incoming memory response. The cache mitigates MSHR deadlocks by using an early full signal before a new request is issued and similarly mitigates memory deadlocks by ensuring that its request queue never fills up.
|
41
docs/codebase.md
Normal file
41
docs/codebase.md
Normal file
|
@ -0,0 +1,41 @@
|
|||
# Vortex Codebase
|
||||
|
||||
The directory/file layout of the Vortex codebase is as followed:
|
||||
|
||||
- `hw`:
|
||||
- `rtl`: hardware rtl sources
|
||||
- `core`: core pipeline
|
||||
- `cache`: cache subsystem
|
||||
- `mem`: memory subsystem
|
||||
- `fpu`: floating point unit
|
||||
- `interfaces`: interfaces for inter-module communication
|
||||
- `libs`: general-purpose RTL modules
|
||||
- `syn`: synthesis directory
|
||||
- `altera`: Altera synthesis scripts
|
||||
- `xilinx`: Xilinx synthesis scripts
|
||||
- `synopsys`: Synopsys synthesis scripts
|
||||
- `modelsim`: Modelsim synthesis scripts
|
||||
- `yosys`: Yosys synthesis scripts
|
||||
- `unit_tests`: unit tests for some hardware components
|
||||
- `runtime`: host runtime software APIs
|
||||
- `include`: Vortex driver public headers
|
||||
- `stub`: Vortex stub driver library
|
||||
- `opae`: software driver that uses Intel OPAE API with device targets=fpga|asesim|opaesim
|
||||
- `xrt`: software driver that uses Xilinx XRT API with device targets=hw|hw_emu|sw_emu
|
||||
- `rtlsim`: software driver that uses rtlsim simulator
|
||||
- `simx`: software driver that uses simX simulator
|
||||
- `kernel`: GPU kernel software APIs
|
||||
- `include`: Vortex runtime public headers
|
||||
- `linker`: linker file for compiling kernels
|
||||
- `src`: runtime implementation
|
||||
- `sim`:
|
||||
- `opaesim`: Intel OPAE AFU RTL simulator
|
||||
- `rtlsim`: processor RTL simulator
|
||||
- `simX`: cycle approximate simulator for vortex
|
||||
- `tests`: tests repository.
|
||||
- `riscv`: RISC-V conformance tests
|
||||
- `kernel`: kernel tests
|
||||
- `regression`: regression tests
|
||||
- `opencl`: opencl benchmarks and tests
|
||||
- `ci`: continuous integration scripts
|
||||
- `miscs`: miscellaneous resources.
|
36
docs/continuous_integration.md
Normal file
36
docs/continuous_integration.md
Normal file
|
@ -0,0 +1,36 @@
|
|||
# Continuous Integration
|
||||
- Each time you push to the repo, the Continuous Integration pipeline will run
|
||||
- This pipeline consists of creating the correct development environment, building your code, and running all tests
|
||||
- This is an extensive pipeline so it might take some time to complete
|
||||
|
||||
|
||||
## Protecting Master Branch
|
||||
Navigate to your Repository:
|
||||
Open your repository on GitHub.
|
||||
|
||||
Click on "Settings":
|
||||
In the upper-right corner of your repository page, click on the "Settings" tab.
|
||||
|
||||
Select "Branches" in the left sidebar:
|
||||
On the left sidebar, look for the "Branches" option and click on it.
|
||||
|
||||
Choose the Branch:
|
||||
Under "Branch protection rules," select the branch you want to protect. In this case, choose the main branch.
|
||||
|
||||
Enable Branch Protection:``
|
||||
Check the box that says "Protect this branch."
|
||||
|
||||
Configure Protection Settings:
|
||||
You can configure various protection settings. Some common settings include:
|
||||
|
||||
Require pull request reviews before merging: This ensures that changes are reviewed before being merged.
|
||||
Require status checks to pass before merging: This ensures that automated tests and checks are passing.
|
||||
Require signed commits: This enforces that commits are signed with a verified signature.
|
||||
Restrict Who Can Push:
|
||||
You can further restrict who can push directly to the branch. You might want to limit this privilege to specific people or teams.
|
||||
|
||||
Save Changes:
|
||||
Once you've configured the protection settings, scroll down and click on the "Save changes" button.
|
||||
|
||||
Now, your main branch is protected, and certain criteria must be met before changes can be pushed directly to it. Contributors will need to create pull requests, have their changes reviewed, and meet other specified criteria before the changes can be merged into the main branch.
|
||||
|
37
docs/contributing.md
Normal file
37
docs/contributing.md
Normal file
|
@ -0,0 +1,37 @@
|
|||
# Contributing to Vortex
|
||||
|
||||
## Github
|
||||
Vortex uses Github to host its git repositories.
|
||||
There are a lot of ways to use the features on Github for collaboration.
|
||||
Therefore, this documentation details the standard procedure for contributing to Vortex.
|
||||
Development of Vortex is consolidated to this repo, `vortex` and any associated forks.
|
||||
Previously, there was active work done on a private repo named `vortex-dev`.
|
||||
`vortex-dev` has officially been deprecated and fully merged into this public repo, `vortex`.
|
||||
If you are returning to this project and have legacy versions of Vortex, you can use the releases branches to access older versions.
|
||||
|
||||
## Contribution Process
|
||||
In an effort to keep `vortex` organized, permissions to directly create branches and push code has been limited to admins.
|
||||
However, contributions are strongly encouraged and keep the project moving forward! Here is the procedure for contributing:
|
||||
|
||||
1. Create a fork of `vortex`
|
||||
2. In your fork, create a branch from `master` that briefly explains the work you are adding (ie: `develop-documentation`)
|
||||
3. Make your changes on the new branch in your fork. You may create as many commits as you need, which might be common if you are making multiple iterations
|
||||
4. Since you are the owner of your fork, you have full permissions to push commits to your fork
|
||||
4. When you are satisfied with the changes on your fork, you can open a PR from your fork using the online interface
|
||||
5. If you recently made a push, you will get automatically get a prompt on Github online to create a PR, which you can press
|
||||
6. Otherwise, you can go to your fork on Github online and manually create a PR (todo)
|
||||
(todo): how to name and format your PR, what information you should add to the PR, does not need to be too strict if you are attending the weekly meetings*
|
||||
7. Github uses the following semantics: `base repository` gets the changes from your `head repository`
|
||||
8. Therefore, you should set the `base repository` to `vortexgpgpu/vortex` and the `base` branch to `master` since the master branch is protected by reviewed PRs.
|
||||
9. And you should assign the `head repository` to `<your-github-username>/vortex` (which represents your fork of vortex) and the `base` branch to the one created in step 2
|
||||
10. Now that your intended PR has been specified, you should review the status. Check for merge conflicts, if all your commits are present, and all the modified files make sense
|
||||
11. You can still make a PR if there are issues in step 10, just make sure the structure is correct according to steps 7-9
|
||||
12. Once the PR is made, the CI pipeline will run automatically, testing your changes
|
||||
13. Remember, a PR is flexible if you need to make changes to the code you can go back to your branch of the fork to commit and push any updates
|
||||
14. As long as the `head repository`'s `base` branch is the one you edited, the PR will automatically get the most recent changes
|
||||
15. When all merge conflicts are resolved, changes are made, and tests pass you can have an admin merge your PR
|
||||
|
||||
## What Makes a Good Contribution?
|
||||
- If you are contributing code changes, then review [testing.md](./testing.md) to ensure your tests are integrated into the [CI pipeline](continuous_integration.md)
|
||||
- During a PR, you should consider the advice you are provided by your reviewers. Remember you keep adding commits to an open PR!
|
||||
- If your change aims to fix an issue opened on Github, please tag that issue in the PR itself
|
74
docs/debugging.md
Normal file
74
docs/debugging.md
Normal file
|
@ -0,0 +1,74 @@
|
|||
# Debugging Vortex GPU
|
||||
|
||||
## Testing changes to the RTL or simulator GPU driver.
|
||||
|
||||
The Blackbox utility script will not pick up your changes if the h/w configuration is the same as during teh last run.
|
||||
To force the utility to build the driver, you need pass the --rebuild=1 option when running tests.
|
||||
Using --rebuild=0 will prevent the rebuild even if the h/w configuration is different from last run.
|
||||
|
||||
$ ./ci/blackbox.sh --driver=simx --app=demo --rebuild=1
|
||||
|
||||
## SimX Debugging
|
||||
|
||||
SimX cycle-approximate simulator allows faster debugging of Vortex kernels' execution.
|
||||
The recommended method to enable debugging is to pass the `--debug=<level>` flag to `blackbox` tool when running a program.
|
||||
|
||||
// Running demo program on SimX in debug mode
|
||||
$ ./ci/blackbox.sh --driver=simx --app=demo --debug=1
|
||||
|
||||
A debug trace `run.log` is generated in the current directory during the program execution. The trace includes important states of the simulated processor (decoded instruction, register states, pipeline states, etc..). You can increase the verbosity of the trace by changing the debug level.
|
||||
|
||||
// Using SimX in debug mode with verbose level 3
|
||||
$ ./ci/blackbox.sh --driver=simx --app=demo --debug=3
|
||||
|
||||
## RTL Debugging
|
||||
|
||||
To debug the processor RTL, you need to use VLSIM or RTLSIM driver. VLSIM simulates the full processor including the AFU command processor (using `/rtl/afu/opae/vortex_afu.sv` as top module). RTLSIM simulates the Vortex processor only (using `/rtl/Vortex.v` as top module).
|
||||
|
||||
The recommended method to enable debugging is to pass the `--debug` flag to `blackbox` tool when running a program.
|
||||
|
||||
// Running demo program on the opae simulator in debug mode
|
||||
$ TARGET=opaesim ./ci/blackbox.sh --driver=opae --app=demo --debug=1
|
||||
|
||||
// Running demo program on rtlsim in debug mode
|
||||
$ ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=1
|
||||
|
||||
A debug trace `run.log` is generated in the current directory during the program execution. The trace includes important states of the simulated processor (memory, caches, pipeline, stalls, etc..). A waveform trace `trace.vcd` is also generated in the current directory during the program execution.
|
||||
By default all library modules unde the /libs/ folder are excluded from the trace to reduce the waveform file size, you can chnage that behavoir by either explicitly commenting out `TRACING_OFF`/`TRACING_ON` inside a lib module source (e.g. VX_stream_buffer.sv) or simply enabling a full trace using the following command.
|
||||
|
||||
// Debugging the demo program with rtlsim in full tracing mode
|
||||
$ CONFIGS="-DTRACING_ALL" ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=1
|
||||
|
||||
You can visualize the waveform trace using any tool that can open VCD files (Modelsim, Quartus, Vivado, etc..). [GTKwave] (http://gtkwave.sourceforge.net) is a great open-source scope analyzer that also works with VCD files.
|
||||
|
||||
## FPGA Debugging
|
||||
|
||||
Debugging the FPGA directly may be necessary to investigate runtime bugs that the RTL simulation cannot catch. We have implemented an in-house scope analyzer for Vortex that works when the FPGA is running. To enable the FPGA scope analyzer, the FPGA bitstream should be built using `SCOPE=1` flag
|
||||
|
||||
& cd /hw/syn/opae
|
||||
$ CONFIGS="-DSCOPE=1" TARGET=fpga make
|
||||
|
||||
When running the program on the FPGA, you need to pass the `--scope` flag to the `blackbox` tool.
|
||||
|
||||
// Running demo program on FPGA with scope enabled
|
||||
$ ./ci/blackbox.sh --driver=fpga --app=demo --scope
|
||||
|
||||
|
||||
A waveform trace `trace.vcd` will be generated in the current directory during the program execution. This trace includes a limited set of signals that are defined in `/hw/scripts/scope.json`. You can expand your signals' selection by updating the json file.
|
||||
|
||||
## Analyzing Vortex trace log
|
||||
|
||||
When debugging Vortex RTL or SimX Simulator, reading the trace run.log file can be overwhelming when the trace gets really large.
|
||||
We provide a trace sanitizer tool under ./hw/scripts/trace_csv.py that you can use to convert the large trace into a CSV file containing all the instructions that executed with their source and destination operands.
|
||||
|
||||
$ ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=3 --log=run_rtlsim.log
|
||||
$ ./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
|
||||
|
||||
$ ./ci/blackbox.sh --driver=simx --app=demo --debug=3 --log=run_simx.log
|
||||
$ ./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
|
||||
|
||||
$ diff trace_rtlsim.csv trace_simx.csv
|
||||
|
||||
The first column in the CSV trace is UUID (universal unique identifier) of the instruction and the content is sorted by the UUID.
|
||||
You can use the UUID to trace the same instruction running on either the RTL hw or SimX simulator.
|
||||
This can be very effective if you want to use SimX to debugging your RTL hardware by comparing CSV traces.
|
51
docs/environment_setup.md
Normal file
51
docs/environment_setup.md
Normal file
|
@ -0,0 +1,51 @@
|
|||
# Environment Setup
|
||||
|
||||
These instructions apply to the development vortex repo using the updated toolchain. The updated toolchain is considered to be any commit of `master` pulled from July 2, 2023 onwards. The toolchain update in question can be viewed in this [commit](https://github.com/vortexgpgpu/vortex-dev/commit/0048496ba28d7b9a209a0e569d52d60f2b68fc04). Therefore, if you are unsure whether you are using the new toolchain or not, then you should check the `ci` folder for the existence of the `toolchain_prebuilt.sh` script. Furthermore, you should notice that the `toolchain_install.sh` script has the legacy `llvm()` split into `llvm-vortex()` and `llvm-pocl()`.
|
||||
|
||||
## Set Up on Your Own System
|
||||
|
||||
The toolchain binaries provided with Vortex are built on Ubuntu-based systems. To install Vortex on your own system, [follow these instructions](install_vortex.md).
|
||||
|
||||
## Servers for Georgia Tech Students and Collaborators
|
||||
|
||||
### Volvo
|
||||
|
||||
Volvo is a 64-core server provided by HPArch. You need valid credentials to access it. If you don't already have access, you can get in contact with your mentor to ask about setting your account up.
|
||||
|
||||
Setup on Volvo:
|
||||
|
||||
1. Connect to Georgia Tech's VPN or ssh into another machine on campus
|
||||
2. `ssh volvo.cc.gatech.edu`
|
||||
3. Clone Vortex to your home directory: `git clone --recursive https://github.com/vortexgpgpu/vortex.git`
|
||||
4. `source /nethome/software/set_vortex_env.sh` to set up the necessary environment variables.
|
||||
5. `make -s` in the `vortex` root directory
|
||||
6. Run a test program: `./ci/blackbox.sh --cores=2 --app=dogfood`
|
||||
|
||||
### Nio
|
||||
|
||||
Nio is a 20-core desktop server provided by HPArch. If you have access to Volvo, you also have access to Nio.
|
||||
|
||||
Setup on Nio:
|
||||
|
||||
1. Connect to Georgia Tech's VPN or ssh into another machine on campus
|
||||
2. `ssh nio.cc.gatech.edu`
|
||||
3. Clone Vortex to your home directory: `git clone --recursive https://github.com/vortexgpgpu/vortex.git`
|
||||
4. `source /opt/set_vortex_env_dev.sh` to set up the necessary environment variables.
|
||||
5. `make -s` in the `vortex` root directory
|
||||
6. Run a test program: `./ci/blackbox.sh --cores=2 --app=dogfood`
|
||||
|
||||
## Docker (Experimental)
|
||||
|
||||
Docker allows for isolated pre-built environments to be created, shared and used. The emulation mode required for ARM-based processors will incur a decrease in performance. Currently, the dockerfile is not included with the official vortex repository and is not actively maintained or supported.
|
||||
|
||||
### Setup with Docker
|
||||
|
||||
1. Clone repo recursively onto your local machine: `git clone --recursive https://github.com/vortexgpgpu/vortex.git`
|
||||
2. Download the dockerfile from [here](https://github.gatech.edu/gist/usubramanya3/f1bf3e953faa38a6372e1292ffd0b65c) and place it in the root of the repo.
|
||||
3. Build the Dockerfile into an image: `docker build --platform=linux/amd64 -t vortex -f dockerfile .`
|
||||
4. Run a container based on the image: `docker run --rm -v ./:/root/vortex/ -it --name vtx-dev --privileged=true --platform=linux/amd64 vortex`
|
||||
5. Install the toolchain `./ci/toolchain_install.sh --all` (once per container)
|
||||
6. `make -s` in `vortex` root directory
|
||||
7. Run a test program: `./ci/blackbox.sh --cores=2 --app=dogfood`
|
||||
|
||||
You may exit from a container and resume a container you have exited or start a second terminal session `docker exec -it <container-name> bash`
|
217
docs/fpga_setup.md
Normal file
217
docs/fpga_setup.md
Normal file
|
@ -0,0 +1,217 @@
|
|||
# FPGA Startup and Configuration Guide
|
||||
|
||||
## Gaining Access to FPGA's with CRNCH
|
||||
If you are associated with Georgia Tech (or related workshops) you can use CRNCH's server to gain remote access to FPGA's. Otherwise, you can skip to the Xilinx or Intel (Altera) synthesis steps below.
|
||||
|
||||
## What is CRNCH?
|
||||
|
||||
**C**enter for **R**esearch into **N**ovel **C**omputing **H**ierarchies
|
||||
|
||||
## What does CRNCH Offer?
|
||||
|
||||
**The Rogues Gallery (RG)**: new concept focused on developing our understanding of next-generation hardware with a focus on unorthodox and uncommon technologies. **RG** will acquire new and unique hardware (ie, the aforementioned “*rogues*”) from vendors, research labs, and startups and make this hardware available to students, faculty, and industry collaborators within a managed data center environment
|
||||
|
||||
## Why are the Rouges Important?
|
||||
|
||||
By exposing students and researchers to this set of unique hardware, we hope to foster cross-cutting discussions about hardware designs that will drive future *performance improvements in computing long after the Moore’s Law era of “cheap transistors” ends*. Specifically, the Rouges Gallery contains FPGA's which can be synthesized into Vortex hardware.
|
||||
|
||||
## How is the Rouges Gallery Funded?
|
||||
|
||||
Rogues Gallery testbed is primarily supported by the National Science Foundation (NSF) under NSF Award Number [#2016701](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2016701&HistoricalAwards=false)
|
||||
|
||||
## Rouges Gallery Documentation
|
||||
|
||||
You can read about RG in more detail on its official documentation [page](https://gt-crnch-rg.readthedocs.io/en/main/index.html#).
|
||||
|
||||
You can listen to a talk about RG [here](https://mediaspace.gatech.edu/media/Jeff%20Young%20-%20Rogues%20Gallery%20-%20CRNCH%20Summit%202021/1_lqlgr0jj)
|
||||
|
||||
[CRNCH Summit 2023](https://github.com/gt-crnch/crnch-summit-2023/tree/main)
|
||||
|
||||
## Request Access for Rouges Gallery
|
||||
|
||||
You should use [this form](https://crnch-rg.cc.gatech.edu/request-rogues-gallery-access/) to request access to RG’s reconfigurable computing (vortex fpga) resources. You should receive an email with your ticket item being created. Once it gets processed, you should get an email confirmed your access has been granted. It might take some time to get processed.
|
||||
|
||||
## How to Access Rouges Gallery?
|
||||
There are two methods of accessing CRNCH's Rouges Gallery
|
||||
1) Web-based GUI: [rg-ood.crnch.gatech.edu](http://rg-ood.crnch.gatech.edu/)
|
||||
2) SSH: `ssh <your-gt-username>@rg-login.crnch.gatech.edu`
|
||||
|
||||
|
||||
## Where should I keep my files?
|
||||
The CRNCH servers have a folder called `USERSCRATCH` which can be found in your home directory: `echo $HOME`. You should keep all your files in this folder since it is available across all the Rouges Gallery Nodes.
|
||||
|
||||
## **What Machines are Available in the Rogues Gallery?**
|
||||
|
||||
Complete list of machines can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/rg-hardware.html). Furthermore, you can find detailed information about the FPGA hardware [here](https://gt-crnch-rg.readthedocs.io/en/main/reconfig/xilinx/xilinx-getting-started.html).
|
||||
|
||||
## Allocate an FPGA Node
|
||||
Once you’ve connected to the CRNCH login node, you can use the Slurm scheduler to request an interactive job using `salloc`. This [page](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm.html) explains why we use Slurm to request resources. Documentation for `salloc` can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm-examples.html). And here.
|
||||
|
||||
|
||||
To request 16 cores and 64GB of RAM for 6 hours on flubber9, a fpga dev node:
|
||||
```bash
|
||||
salloc -p rg-fpga --nodes=1 --ntasks-per-node=16 --mem=64G --nodelist flubber1 --time=06:00:00
|
||||
```
|
||||
Synthesis for Xilinx Boards
|
||||
----------------------
|
||||
Once you are logged in, you will need to complete some first time configurations. If you are interested in the Intel (Altera) synthesis steps, scroll down below.
|
||||
|
||||
### Source Configuration Scripts
|
||||
```
|
||||
# From any directory
|
||||
$ source /opt/xilinx/xrt/setup.sh
|
||||
$ source /tools/reconfig/xilinx/Vitis/2023.1/settings64.sh
|
||||
```
|
||||
|
||||
### Check Installed FPGA Platforms
|
||||
`platforminfo -l` which tells us the correct name of the platform installed on the current fpga node. It should be used for the `PLATFORM` variable below. Otherwise, if there is an error then there was an issue with the previous two commands.
|
||||
|
||||
### Install Vortex Toolchain
|
||||
The Xilinx synthesis process requires verilator to generate the bitstream. Eventually, you will need the whole toolchain to run the bitstream on the FPGA. Therefore, the Vortex toolchain and can be installed as follows. If you complete these steps properly, you should only need to complete them once and you can skip to `Activate Vortex Toolchain`
|
||||
```
|
||||
# Make a build directory from root and configure scripts for your environment
|
||||
mkdir build && cd build && ../configure --tooldir=$HOME/tools
|
||||
|
||||
# Install the whole prebuilt toolchain
|
||||
./ci/toolchain_install.sh --all
|
||||
|
||||
# Add environment variables to bashrc
|
||||
echo "source <full-path-to-vortex-root>/vortex/build/ci/toolchain_env.sh" >> ~/.bashrc
|
||||
```
|
||||
|
||||
### Activate Vortex Toolchain
|
||||
```
|
||||
# From any directory
|
||||
source ~/.bashrc
|
||||
|
||||
# Check environment setup
|
||||
verilator --version
|
||||
```
|
||||
|
||||
### Build the FPGA Bitstream
|
||||
The root directory contains the path `hw/syn/xilinx/xrt` which has the makefile used to generate the Vortex bitstream.
|
||||
|
||||
```
|
||||
$ cd hw/syn/xilinx/xrt
|
||||
$ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=1 make > build_u250_hw_1c.log 2>&1 &
|
||||
```
|
||||
Will run the synthesis under new build directory: BUILD_DIR := "\<PREFIX>\_\<PLATFORM>\_\<TARGET>"
|
||||
The generated bitstream will be located under <BUILD_DIR>/bin/vortex_afu.xclbin
|
||||
|
||||
For long-running jobs, invocation of this makefile can be made of the following form:
|
||||
|
||||
`[CONFIGS=<vortex macros>] [PREFIX=<prefix directory name>] [NUM_CORES=<#>] TARGET=hw|hw_emu PLATFORM=<platform baseName> nohup make > <log filename> 2>&1 &`
|
||||
|
||||
For example:
|
||||
|
||||
```bash
|
||||
CONFIGS="-DL2_ENABLE -DDCACHE_SIZE=8192" PREFIX=build_4c_u280 NUM_CORES=4 TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202310_1 nohup make > build_u250_hw_4c.log 2>&1 &
|
||||
```
|
||||
|
||||
The build is complete when the bitstream file `vortex_afu.xclbin` exists in `<prefix directory name><platform baseName>hw|hw_emu/bin`.
|
||||
|
||||
### Running a Program on Xilinx FPGA
|
||||
|
||||
The [blackbox.sh](./simulation.md) script within the build directory can be used to run a test with Vortex’s xrt driver using the following command:
|
||||
|
||||
`FPGA_BIN_DIR=<path to bitstream directory> TARGET=hw|hw_emu PLATFORM=<platform baseName> ./ci/blackbox.sh --driver=xrt --app=<test name>`
|
||||
|
||||
For example:
|
||||
|
||||
```FPGA_BIN_DIR=<realpath> hw/syn/xilinx/xrt/build_4c_u280_xilinx_u280_gen3x16_xdma_1_202211_1_hw/bin TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202211_1 ./ci/blackbox.sh --driver=xrt --app=demo```
|
||||
|
||||
Synthesis for Intel (Altera) Boards
|
||||
----------------------
|
||||
|
||||
### OPAE Environment Setup
|
||||
|
||||
|
||||
$ source /opt/inteldevstack/init_env_user.sh
|
||||
$ export OPAE_HOME=/opt/opae/1.1.2
|
||||
$ export PATH=$OPAE_HOME/bin:$PATH
|
||||
$ export C_INCLUDE_PATH=$OPAE_HOME/include:$C_INCLUDE_PATH
|
||||
$ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH
|
||||
$ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH
|
||||
|
||||
### OPAE Build
|
||||
|
||||
The FPGA has to following configuration options:
|
||||
- DEVICE_FAMILY=arria10 | stratix10
|
||||
- NUM_CORES=#n
|
||||
|
||||
Command line:
|
||||
|
||||
$ cd hw/syn/altera/opae
|
||||
$ PREFIX=test1 TARGET=fpga NUM_CORES=4 make
|
||||
|
||||
A new folder (ex: `test1_xxx_4c`) will be created and the build will start and take ~30-480 min to complete.
|
||||
Setting TARGET=ase will build the project for simulation using Intel ASE.
|
||||
|
||||
|
||||
### OPAE Build Configuration
|
||||
|
||||
The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware parameters that can be modified when build the processor.For example, have the following parameters that can be configured:
|
||||
- `NUM_WARPS`: Number of warps per cores
|
||||
- `NUM_THREADS`: Number of threads per warps
|
||||
- `PERF_ENABLE`: enable the use of all profile counters
|
||||
|
||||
You configure the syntesis build from the command line:
|
||||
|
||||
$ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make
|
||||
|
||||
### OPAE Build Progress
|
||||
|
||||
You could check the last 10 lines in the build log for possible errors until build completion.
|
||||
|
||||
$ tail -n 10 <build_dir>/build.log
|
||||
|
||||
Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs.
|
||||
|
||||
$ ps -u <username>
|
||||
|
||||
If the build fails and you need to restart it, clean up the build folder using the following command:
|
||||
|
||||
$ make clean
|
||||
|
||||
The file `vortex_afu.gbs` should exist when the build is done:
|
||||
|
||||
$ ls -lsa <build_dir>/synth/vortex_afu.gbs
|
||||
|
||||
|
||||
### Signing the bitstream and Programming the FPGA
|
||||
|
||||
$ cd <build_dir>
|
||||
$ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs
|
||||
$ fpgasupdate vortex_afu_unsigned_ssl.gbs
|
||||
|
||||
### Sample FPGA Run Test
|
||||
Ensure you have the correct opae runtime for the FPGA target
|
||||
|
||||
```
|
||||
$ TARGET=FPGA make -C runtime/opae
|
||||
```
|
||||
|
||||
Run the [blackbox.sh](./simulation.md) from your Vortex build directory
|
||||
|
||||
```
|
||||
$ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128"
|
||||
```
|
||||
|
||||
### FPGA sample test running OpenCL sgemm kernel
|
||||
|
||||
You can use the `blackbox.sh` script to run the following from your Vortex build directory
|
||||
|
||||
$ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128"
|
||||
|
||||
### Testing Vortex using OPAE with Intel ASE Simulation
|
||||
Building ASE synthesis
|
||||
|
||||
```$ TARGET=asesim make -C runtime/opae```
|
||||
|
||||
Building ASE runtime
|
||||
|
||||
```$ TARGET=asesim make -C runtime/opae```
|
||||
|
||||
Running ASE simulation
|
||||
|
||||
```$ ASE_LOG=0 ASE_WORKDIR=<build_dir>/synth/work TARGET=asesim ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n16"```
|
9
docs/index.md
Normal file
9
docs/index.md
Normal file
|
@ -0,0 +1,9 @@
|
|||
# Vortex Documentation
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Codebase Layout](codebase.md): Summary of repo file tree
|
||||
- [Microarchitecture](microarchitecture.md): Vortex Pipeline and cache microarchitectural details and reconfigurability
|
||||
- [Simulation](simulation.md): Details for building and running each simulation driver
|
||||
- [Contributing](contributing.md): Process for contributing your own features including repo semantics and testing
|
||||
- [Debugging](debugging.md): Debugging configurations for each Vortex driver
|
81
docs/install_vortex.md
Normal file
81
docs/install_vortex.md
Normal file
|
@ -0,0 +1,81 @@
|
|||
# Installing and Setting Up the Vortex Environment
|
||||
|
||||
## Ubuntu 18.04, 20.04
|
||||
|
||||
1. Install the following dependencies:
|
||||
|
||||
```
|
||||
sudo apt-get install build-essential zlib1g-dev libtinfo-dev libncurses5 uuid-dev libboost-serialization-dev libpng-dev libhwloc-dev
|
||||
```
|
||||
|
||||
2. Upgrade GCC to 11:
|
||||
|
||||
```
|
||||
sudo apt-get install gcc-11 g++-11
|
||||
```
|
||||
|
||||
Multiple gcc versions on Ubuntu can be managed with update-alternatives, e.g.:
|
||||
|
||||
```
|
||||
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 9
|
||||
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 9
|
||||
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 11
|
||||
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 11
|
||||
```
|
||||
|
||||
3. Download the Vortex codebase:
|
||||
|
||||
```
|
||||
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
```
|
||||
4. Build Vortex
|
||||
|
||||
```
|
||||
$ cd vortex
|
||||
$ mkdir -p build
|
||||
$ cd build
|
||||
$ ../configure --xlen=32 --tooldir=$HOME/tools
|
||||
$ ./ci/toolchain_install.sh --all
|
||||
$ source ./ci/toolchain_env.sh
|
||||
$ make -s
|
||||
```
|
||||
|
||||
|
||||
## RHEL 8
|
||||
Note: depending on the system, some of the toolchain may need to be recompiled for non-Ubuntu Linux. The source for the tools can be found [here](https://github.com/vortexgpgpu/).
|
||||
|
||||
1. Install the following dependencies:
|
||||
|
||||
```
|
||||
sudo yum install libpng-devel boost boost-devel boost-serialization libuuid-devel opencl-headers hwloc hwloc-devel gmp-devel compat-hwloc1
|
||||
```
|
||||
|
||||
2. Upgrade GCC to 11:
|
||||
|
||||
```
|
||||
sudo yum install gcc-toolset-11
|
||||
```
|
||||
|
||||
Multiple gcc versions on Red Hat can be managed with scl
|
||||
|
||||
3. Install MPFR 4.2.0:
|
||||
|
||||
Download [the source](https://ftp.gnu.org/gnu/mpfr/) and follow [the installation documentation](https://www.mpfr.org/mpfr-current/mpfr.html#How-to-Install).
|
||||
|
||||
4. Download the Vortex codebase:
|
||||
|
||||
```
|
||||
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
```
|
||||
|
||||
5. Build Vortex
|
||||
|
||||
```
|
||||
$ cd vortex
|
||||
$ mkdir -p build
|
||||
$ cd build
|
||||
$ ../configure --xlen=32 --tooldir=$HOME/tools
|
||||
$ ./ci/toolchain_install.sh --all
|
||||
$ source ./ci/toolchain_env.sh
|
||||
$ make -s
|
||||
```
|
83
docs/microarchitecture.md
Normal file
83
docs/microarchitecture.md
Normal file
|
@ -0,0 +1,83 @@
|
|||
# Vortex Microarchitecture
|
||||
|
||||
### Vortex GPGPU Execution Model
|
||||
|
||||
Vortex uses the SIMT (Single Instruction, Multiple Threads) execution model with a single warp issued per cycle.
|
||||
|
||||
- **Threads**
|
||||
- Smallest unit of computation
|
||||
- Each thread has its own register file (32 int + 32 fp registers)
|
||||
- Threads execute in parallel
|
||||
- **Warps**
|
||||
- A logical clster of threads
|
||||
- Each thread in a warp execute the same instruction
|
||||
- The PC is shared; maintain thread mask for Writeback
|
||||
- Warp's execution is time-multiplexed at log steps
|
||||
- Ex. warp 0 executes at cycle 0, warp 1 executes at cycle 1
|
||||
|
||||
### Vortex RISC-V ISA Extension
|
||||
|
||||
- **Thread Mask Control**
|
||||
- Control the number of warps to activate during execution
|
||||
- `TMC` *count*: activate count threads
|
||||
- **Warp Scheduling**
|
||||
- Control the number of warps to activate during execution
|
||||
- `WSPAWN` *count, addr*: activate count warps and jump to addr location
|
||||
- **Control-Flow Divergence**
|
||||
- Control threads activation when a branch diverges
|
||||
- `SPLIT` *taken, predicate*: apply predicate thread mask and save current state into IPDOM stack
|
||||
- `JOIN`: pop IPDOM stack to restore thread mask
|
||||
- `PRED` *predicate, restore_mask*: thread predicate instruction
|
||||
- **Warp Synchronization**
|
||||
- `BAR` *id, count*: stall warps entering barrier *id* until count is reached
|
||||
|
||||
### Vortex Pipeline/Datapath
|
||||
|
||||

|
||||
|
||||
Vortex has a 6-stage pipeline:
|
||||
|
||||
- **Schedule**
|
||||
- Warp Scheduler
|
||||
- Schedule the next PC into the pipeline
|
||||
- Track stalled, active warps
|
||||
- IPDOM Stack
|
||||
- Save split/join states for divergent threads
|
||||
- Inflight Tracker
|
||||
- Track in-flight instructions
|
||||
|
||||
- **Fetch**
|
||||
- Retrieve instructions from memory
|
||||
- Handle I-cache requests/responses
|
||||
- **Decode**
|
||||
- Decode fetched instructions
|
||||
- Notify warp scheduler on control instructions
|
||||
- **Issue**
|
||||
- IBuffer
|
||||
- Store decoded instructions in separate per-warp queues
|
||||
- Scoreboard
|
||||
- Track in-use registers
|
||||
- Check register use for decoded instructions
|
||||
- Operands Collector
|
||||
- Fetch the operands for issued instructions from the register file
|
||||
- **Execute**
|
||||
- ALU Unit
|
||||
- Handle arithmetic and branch operations
|
||||
- FPU Unit
|
||||
- Handle floating-point operations
|
||||
- LSU Unit
|
||||
- Handle load/store operations
|
||||
- SFU Unit
|
||||
- Handle warp control operations
|
||||
- Handle Control Status Registers (CSRs) operations
|
||||
- **Commit**
|
||||
- Write result back to the register file and update the Scoreboard.
|
||||
|
||||
### Vortex clustering architecture
|
||||
- Sockets
|
||||
- Grouping multiple cores sharing L1 cache
|
||||
- Clusters
|
||||
- Grouping of sockets sharing L2 cache
|
||||
|
||||
### Vortex Cache Subsystem
|
||||
More details about the cache subsystem are provided [here](./cache_subsystem.md).
|
|
@ -6,13 +6,16 @@
|
|||
|
||||
### Cycle-Approximate Simulation
|
||||
|
||||
SimX is a C++ cycle-level in-house simulator developed for Vortex. The relevant files are located in the `simX` folder.
|
||||
SimX is a C++ cycle-level in-house simulator developed for Vortex. The relevant files are located in the `simx` folder. The [readme](README.md) has the most detailed instructions for building and running simX.
|
||||
|
||||
- To install on your own system, [follow this document](install_vortex.md).
|
||||
- For the different Georgia Tech environments Vortex supports, [read this document](environment_setup.md).
|
||||
|
||||
### FGPA Simulation
|
||||
|
||||
The current target FPGA for simulation is the Arria10 Intel Accelerator Card v1.0. The guide to build the fpga with specific configurations is located [here.](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/FPGA_Startup_Guide.md)
|
||||
The guide to build the fpga with specific configurations is located [here.](fpga_setup.md) You can find instructions for both Xilinx and Altera based FPGAs.
|
||||
|
||||
### How to Test
|
||||
### How to Test (using `blackbox.sh`)
|
||||
|
||||
Running tests under specific drivers (rtlsim,simx,fpga) is done using the script named `blackbox.sh` located in the `ci` folder. Running command `./ci/blackbox.sh --help` from the Vortex root directory will display the following command line arguments for `blackbox.sh`:
|
||||
|
||||
|
@ -20,17 +23,17 @@ Running tests under specific drivers (rtlsim,simx,fpga) is done using the script
|
|||
- *Cores* - used to specify the number of cores (processing element containing multiple warps) within a configuration.
|
||||
- *Warps* - used to specify the number of warps (collection of concurrent hardware threads) within a configuration.
|
||||
- *Threads* - used to specify the number of threads (smallest unit of computation) within a configuration.
|
||||
- *L2cache* - used to enable the shard l2cache among the Vortex cores.
|
||||
- *L2cache* - used to enable the shared l2cache among the Vortex cores.
|
||||
- *L3cache* - used to enable the shared l3cache among the Vortex clusters.
|
||||
- *Driver* - used to specify which driver to run the Vortex simulation (either rtlsim, vlsim, fpga, or simx).
|
||||
- *Driver* - used to specify which driver to run the Vortex simulation (either rtlsim, opae, xrt, simx).
|
||||
- *Debug* - used to enable debug mode for the Vortex simulation.
|
||||
- *Perf* - used to enable the detailed performance counters within the Vortex simulation.
|
||||
- *App* - used to specify which test/benchmark to run in the Vortex simulation. The main choices are vecadd, sgemm, basic, demo, and dogfood. Other tests/benchmarks are located in the `/benchmarks/opencl` folder though not all of them work wit the current version of Vortex.
|
||||
- *Args* - used to pass additional arguments to the application.
|
||||
|
||||
Example use of command line arguments: Run the sgemm benchmark using the vlsim driver with a Vortex configuration of 1 cluster, 4 cores, 4 warps, and 4 threads.
|
||||
Example use of command line arguments: Run the sgemm benchmark using the opae driver with a Vortex configuration of 1 cluster, 4 cores, 4 warps, and 4 threads.
|
||||
|
||||
$ ./ci/blackbox.sh --clusters=1 --cores=4 --warps=4 --threads=4 --driver=vlsim --app=sgemm
|
||||
$ ./ci/blackbox.sh --clusters=1 --cores=4 --warps=4 --threads=4 --driver=opae --app=sgemm
|
||||
|
||||
Output from terminal:
|
||||
```
|
||||
|
@ -47,4 +50,20 @@ PERF: core1: instrs=90693, cycles=53108, IPC=1.707709
|
|||
PERF: core2: instrs=90849, cycles=53107, IPC=1.710678
|
||||
PERF: core3: instrs=90836, cycles=50347, IPC=1.804199
|
||||
PERF: instrs=363180, cycles=53108, IPC=6.838518
|
||||
```
|
||||
```
|
||||
|
||||
## Additional Quick Start Scenarios
|
||||
|
||||
Running Vortex simulators with different configurations and drivers is supported. For example:
|
||||
|
||||
- Run basic driver test with rtlsim driver and Vortex config of 2 clusters, 2 cores, 2 warps, 4 threads
|
||||
|
||||
$ ./ci/blackbox.sh --driver=rtlsim --clusters=2 --cores=2 --warps=2 --threads=4 --app=basic
|
||||
|
||||
- Run demo driver test with opae driver and Vortex config of 1 clusters, 4 cores, 4 warps, 2 threads
|
||||
|
||||
$ ./ci/blackbox.sh --driver=opae --clusters=1 --cores=4 --warps=4 --threads=2 --app=demo
|
||||
|
||||
- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads
|
||||
|
||||
$ ./ci/blackbox.sh --driver=simx --clusters=4 --cores=4 --warps=8 --threads=6 --app=dogfood
|
52
docs/testing.md
Normal file
52
docs/testing.md
Normal file
|
@ -0,0 +1,52 @@
|
|||
# Testing
|
||||
|
||||
## Running a Vortex application
|
||||
|
||||
The framework provides a utility script: blackbox.sh under the /ci/ folder for executing applications in the tests tree. It gets copied into the `build` directory with all the environment variables resolved, so you should run it from the `build` directory as follows:
|
||||
You can query the commandline options of the tool using:
|
||||
|
||||
$ ./ci/blackbox.sh --help
|
||||
|
||||
To execute sgemm test program on the simx driver and passing "-n10" as argument to sgemm:
|
||||
|
||||
$ ./ci/blackbox.sh --driver=simx --app=sgemm --args="-n10"
|
||||
|
||||
You can execute the same application of a GPU architecture with 2 cores:
|
||||
|
||||
$ ./ci/blackbox.sh --core=2 --driver=simx --app=sgemm --args="-n10"
|
||||
|
||||
When excuting, Blackbox needs to recompile the driver if the desired architecture changes.
|
||||
It tracks the latest configuration in a file under the current directory blackbox.<driver>.cache.
|
||||
To avoid having to rebuild the driver all the time, Blackbox checks if the latest cached configuration matches the current.
|
||||
|
||||
## Running Benchmarks
|
||||
|
||||
The Vortex test suite is located under the /test/ folder
|
||||
You can execute the default regression suite by running the following commands at the root folder.
|
||||
|
||||
$ make -C tests/regression run-simx
|
||||
$ make -C tests/regression run-rtlsim
|
||||
|
||||
You can execute the default opncl suite by running the following commands at the root folder.
|
||||
|
||||
$ make -C tests/opencl run-simx
|
||||
$ make -C tests/opencl run-rtlsim
|
||||
|
||||
## Creating Your Own Regression Test
|
||||
|
||||
Inside `tests/regression` you will find a series of folders which are named based on what they test.
|
||||
You can view the tests to see which ones have tests similar to what you are trying to create new tests for.
|
||||
Once you have found a similar baseline, you can copy the folder and rename it to what you are planning to test.
|
||||
A regression test typically implements the following files:
|
||||
- ***kernel.cpp*** contains the GPU kernel code.
|
||||
- ***main.cpp*** contains the host CPU code.
|
||||
- ***Makefile*** defines the compiler build commands for the CPU and GPU binaries.
|
||||
|
||||
Sync your build folder: `$ ../configure`
|
||||
|
||||
Compile your test: `$ make -C tests/regression/<test-name>`
|
||||
|
||||
Run your test: `$ ./ci/blackbox.sh --driver=simx --app=<test-name> --debug`
|
||||
|
||||
## Adding Your Tests to the CI Pipeline
|
||||
If you are a contributor, then you will need to add tests that integrate into the continuous integration pipeline. Remember, Pull Requests cannot be merged unless new code has tests and existing tests do not regress. Furthermore, if you are contributing a new feature, it is recommended that you add the ability to enable / disable the new feature that you are adding. See more at [contributing.md](contributing.md) and [continuous_integration.md](continuous_integration.md).
|
|
@ -1,21 +0,0 @@
|
|||
all: stub rtlsim simx opae
|
||||
|
||||
stub:
|
||||
$(MAKE) -C stub
|
||||
|
||||
opae:
|
||||
$(MAKE) -C opae
|
||||
|
||||
rtlsim:
|
||||
$(MAKE) -C rtlsim
|
||||
|
||||
simx:
|
||||
$(MAKE) -C simx
|
||||
|
||||
clean:
|
||||
$(MAKE) clean -C stub
|
||||
$(MAKE) clean -C opae
|
||||
$(MAKE) clean -C rtlsim
|
||||
$(MAKE) clean -C simx
|
||||
|
||||
.PHONY: all stub opae rtlsim simx clean
|
|
@ -1,317 +0,0 @@
|
|||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cstring>
|
||||
#include <vortex.h>
|
||||
#include <VX_config.h>
|
||||
|
||||
extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size) {
|
||||
int err = 0;
|
||||
|
||||
if (NULL == content || 0 == size)
|
||||
return -1;
|
||||
|
||||
uint32_t buffer_transfer_size = 65536;
|
||||
unsigned kernel_base_addr;
|
||||
err = vx_dev_caps(device, VX_CAPS_KERNEL_BASE_ADDR, &kernel_base_addr);
|
||||
if (err != 0)
|
||||
return -1;
|
||||
|
||||
// allocate device buffer
|
||||
vx_buffer_h buffer;
|
||||
err = vx_alloc_shared_mem(device, buffer_transfer_size, &buffer);
|
||||
if (err != 0)
|
||||
return -1;
|
||||
|
||||
// get buffer address
|
||||
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
|
||||
|
||||
//
|
||||
// upload content
|
||||
//
|
||||
|
||||
size_t offset = 0;
|
||||
while (offset < size) {
|
||||
auto chunk_size = std::min<size_t>(buffer_transfer_size, size - offset);
|
||||
std::memcpy(buf_ptr, (uint8_t*)content + offset, chunk_size);
|
||||
|
||||
/*printf("*** Upload Kernel to 0x%0x: data=", kernel_base_addr + offset);
|
||||
for (int i = 0, n = ((chunk_size+7)/8); i < n; ++i) {
|
||||
printf("%08x", ((uint64_t*)((uint8_t*)content + offset))[n-1-i]);
|
||||
}
|
||||
printf("\n");*/
|
||||
|
||||
err = vx_copy_to_dev(buffer, kernel_base_addr + offset, chunk_size, 0);
|
||||
if (err != 0) {
|
||||
vx_buf_release(buffer);
|
||||
return err;
|
||||
}
|
||||
offset += chunk_size;
|
||||
}
|
||||
|
||||
vx_buf_release(buffer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_upload_kernel_file(vx_device_h device, const char* filename) {
|
||||
std::ifstream ifs(filename);
|
||||
if (!ifs) {
|
||||
std::cout << "error: " << filename << " not found" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// read file content
|
||||
ifs.seekg(0, ifs.end);
|
||||
auto size = ifs.tellg();
|
||||
auto content = new char [size];
|
||||
ifs.seekg(0, ifs.beg);
|
||||
ifs.read(content, size);
|
||||
|
||||
// upload
|
||||
int err = vx_upload_kernel_bytes(device, content, size);
|
||||
|
||||
// release buffer
|
||||
delete[] content;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*static uint32_t get_csr_32(const uint32_t* buffer, int addr) {
|
||||
uint32_t value_lo = buffer[addr - CSR_MPM_BASE];
|
||||
return value_lo;
|
||||
}*/
|
||||
|
||||
static uint64_t get_csr_64(const uint32_t* buffer, int addr) {
|
||||
uint32_t value_lo = buffer[addr - CSR_MPM_BASE];
|
||||
uint32_t value_hi = buffer[addr - CSR_MPM_BASE + 32];
|
||||
return (uint64_t(value_hi) << 32) | value_lo;
|
||||
}
|
||||
|
||||
extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||
int ret = 0;
|
||||
|
||||
uint64_t instrs = 0;
|
||||
uint64_t cycles = 0;
|
||||
|
||||
#ifdef PERF_ENABLE
|
||||
// PERF: pipeline stalls
|
||||
uint64_t ibuffer_stalls = 0;
|
||||
uint64_t scoreboard_stalls = 0;
|
||||
uint64_t lsu_stalls = 0;
|
||||
uint64_t fpu_stalls = 0;
|
||||
uint64_t csr_stalls = 0;
|
||||
uint64_t alu_stalls = 0;
|
||||
uint64_t gpu_stalls = 0;
|
||||
// PERF: Icache
|
||||
uint64_t icache_reads = 0;
|
||||
uint64_t icache_read_misses = 0;
|
||||
uint64_t icache_pipe_stalls = 0;
|
||||
uint64_t icache_rsp_stalls = 0;
|
||||
// PERF: Dcache
|
||||
uint64_t dcache_reads = 0;
|
||||
uint64_t dcache_writes = 0;
|
||||
uint64_t dcache_read_misses = 0;
|
||||
uint64_t dcache_write_misses = 0;
|
||||
uint64_t dcache_bank_stalls = 0;
|
||||
uint64_t dcache_mshr_stalls = 0;
|
||||
uint64_t dcache_pipe_stalls = 0;
|
||||
uint64_t dcache_rsp_stalls = 0;
|
||||
// PERF: SMEM
|
||||
uint64_t smem_reads = 0;
|
||||
uint64_t smem_writes = 0;
|
||||
uint64_t smem_bank_stalls = 0;
|
||||
// PERF: memory
|
||||
uint64_t mem_reads = 0;
|
||||
uint64_t mem_writes = 0;
|
||||
uint64_t mem_stalls = 0;
|
||||
uint64_t mem_lat = 0;
|
||||
#endif
|
||||
|
||||
unsigned num_cores;
|
||||
ret = vx_dev_caps(device, VX_CAPS_MAX_CORES, &num_cores);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
vx_buffer_h staging_buf;
|
||||
ret = vx_alloc_shared_mem(device, 64 * sizeof(uint32_t), &staging_buf);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
auto staging_ptr = (uint32_t*)vx_host_ptr(staging_buf);
|
||||
|
||||
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
|
||||
ret = vx_copy_from_dev(staging_buf, IO_CSR_ADDR + 64 * sizeof(uint32_t) * core_id, 64 * sizeof(uint32_t), 0);
|
||||
if (ret != 0) {
|
||||
vx_buf_release(staging_buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint64_t instrs_per_core = get_csr_64(staging_ptr, CSR_MINSTRET);
|
||||
uint64_t cycles_per_core = get_csr_64(staging_ptr, CSR_MCYCLE);
|
||||
float IPC = (float)(double(instrs_per_core) / double(cycles_per_core));
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs_per_core, cycles_per_core, IPC);
|
||||
instrs += instrs_per_core;
|
||||
cycles = std::max<uint64_t>(cycles_per_core, cycles);
|
||||
|
||||
#ifdef PERF_ENABLE
|
||||
// PERF: pipeline
|
||||
// ibuffer_stall
|
||||
uint64_t ibuffer_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_IBUF_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: ibuffer stalls=%ld\n", core_id, ibuffer_stalls_per_core);
|
||||
ibuffer_stalls += ibuffer_stalls_per_core;
|
||||
// scoreboard_stall
|
||||
uint64_t scoreboard_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_SCRB_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: scoreboard stalls=%ld\n", core_id, scoreboard_stalls_per_core);
|
||||
scoreboard_stalls += scoreboard_stalls_per_core;
|
||||
// alu_stall
|
||||
uint64_t alu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_ALU_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: alu unit stalls=%ld\n", core_id, alu_stalls_per_core);
|
||||
alu_stalls += alu_stalls_per_core;
|
||||
// lsu_stall
|
||||
uint64_t lsu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_LSU_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: lsu unit stalls=%ld\n", core_id, lsu_stalls_per_core);
|
||||
lsu_stalls += lsu_stalls_per_core;
|
||||
// csr_stall
|
||||
uint64_t csr_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_CSR_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: csr unit stalls=%ld\n", core_id, csr_stalls_per_core);
|
||||
csr_stalls += csr_stalls_per_core;
|
||||
// fpu_stall
|
||||
uint64_t fpu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_FPU_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: fpu unit stalls=%ld\n", core_id, fpu_stalls_per_core);
|
||||
fpu_stalls += fpu_stalls_per_core;
|
||||
// gpu_stall
|
||||
uint64_t gpu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_GPU_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: gpu unit stalls=%ld\n", core_id, gpu_stalls_per_core);
|
||||
gpu_stalls += gpu_stalls_per_core;
|
||||
|
||||
// PERF: Icache
|
||||
// total reads
|
||||
uint64_t icache_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_READS);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache reads=%ld\n", core_id, icache_reads_per_core);
|
||||
icache_reads += icache_reads_per_core;
|
||||
// read misses
|
||||
uint64_t icache_miss_r_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_MISS_R);
|
||||
int icache_read_hit_ratio = (int)((1.0 - (double(icache_miss_r_per_core) / double(icache_reads_per_core))) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache read misses=%ld (hit ratio=%d%%)\n", core_id, icache_miss_r_per_core, icache_read_hit_ratio);
|
||||
icache_read_misses += icache_miss_r_per_core;
|
||||
// pipeline stalls
|
||||
uint64_t icache_pipe_st_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_PIPE_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache pipeline stalls=%ld\n", core_id, icache_pipe_st_per_core);
|
||||
icache_pipe_stalls += icache_pipe_st_per_core;
|
||||
// response stalls
|
||||
uint64_t icache_crsp_st_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_CRSP_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache reponse stalls=%ld\n", core_id, icache_crsp_st_per_core);
|
||||
icache_rsp_stalls += icache_crsp_st_per_core;
|
||||
|
||||
// PERF: Dcache
|
||||
// total reads
|
||||
uint64_t dcache_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_READS);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache reads=%ld\n", core_id, dcache_reads_per_core);
|
||||
dcache_reads += dcache_reads_per_core;
|
||||
// total write
|
||||
uint64_t dcache_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_WRITES);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache writes=%ld\n", core_id, dcache_writes_per_core);
|
||||
dcache_writes += dcache_writes_per_core;
|
||||
// read misses
|
||||
uint64_t dcache_miss_r_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MISS_R);
|
||||
int dcache_read_hit_ratio = (int)((1.0 - (double(dcache_miss_r_per_core) / double(dcache_reads_per_core))) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache read misses=%ld (hit ratio=%d%%)\n", core_id, dcache_miss_r_per_core, dcache_read_hit_ratio);
|
||||
dcache_read_misses += dcache_miss_r_per_core;
|
||||
// read misses
|
||||
uint64_t dcache_miss_w_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MISS_W);
|
||||
int dcache_write_hit_ratio = (int)((1.0 - (double(dcache_miss_w_per_core) / double(dcache_writes_per_core))) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache write misses=%ld (hit ratio=%d%%)\n", core_id, dcache_miss_w_per_core, dcache_write_hit_ratio);
|
||||
dcache_write_misses += dcache_miss_w_per_core;
|
||||
// bank_stalls
|
||||
uint64_t dcache_bank_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_BANK_ST);
|
||||
int dcache_bank_utilization = (int)((double(dcache_reads_per_core + dcache_writes_per_core) / double(dcache_reads_per_core + dcache_writes_per_core + dcache_bank_st_per_core)) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache bank stalls=%ld (utilization=%d%%)\n", core_id, dcache_bank_st_per_core, dcache_bank_utilization);
|
||||
dcache_bank_stalls += dcache_bank_st_per_core;
|
||||
// mshr_stalls
|
||||
uint64_t dcache_mshr_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MSHR_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache mshr stalls=%ld\n", core_id, dcache_mshr_st_per_core);
|
||||
dcache_mshr_stalls += dcache_mshr_st_per_core;
|
||||
// pipeline stalls
|
||||
uint64_t dcache_pipe_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_PIPE_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache pipeline stalls=%ld\n", core_id, dcache_pipe_st_per_core);
|
||||
dcache_pipe_stalls += dcache_pipe_st_per_core;
|
||||
// response stalls
|
||||
uint64_t dcache_crsp_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_CRSP_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache reponse stalls=%ld\n", core_id, dcache_crsp_st_per_core);
|
||||
dcache_rsp_stalls += dcache_crsp_st_per_core;
|
||||
|
||||
// PERF: SMEM
|
||||
// total reads
|
||||
uint64_t smem_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_READS);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem reads=%ld\n", core_id, smem_reads_per_core);
|
||||
smem_reads += smem_reads_per_core;
|
||||
// total write
|
||||
uint64_t smem_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_WRITES);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem writes=%ld\n", core_id, smem_writes_per_core);
|
||||
smem_writes += smem_writes_per_core;
|
||||
// bank_stalls
|
||||
uint64_t smem_bank_st_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_BANK_ST);
|
||||
int smem_bank_utilization = (int)((double(smem_reads_per_core + smem_writes_per_core) / double(smem_reads_per_core + smem_writes_per_core + smem_bank_st_per_core)) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem bank stalls=%ld (utilization=%d%%)\n", core_id, smem_bank_st_per_core, smem_bank_utilization);
|
||||
smem_bank_stalls += smem_bank_st_per_core;
|
||||
|
||||
// PERF: memory
|
||||
uint64_t mem_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_READS);
|
||||
uint64_t mem_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_WRITES);
|
||||
uint64_t mem_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_ST);
|
||||
uint64_t mem_lat_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_LAT);
|
||||
int mem_utilization = (int)((double(mem_reads_per_core + mem_writes_per_core) / double(mem_reads_per_core + mem_writes_per_core + mem_stalls_per_core)) * 100);
|
||||
int mem_avg_lat = (int)(double(mem_lat_per_core) / double(mem_reads_per_core));
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: memory requests=%ld (reads=%ld, writes=%ld)\n", core_id, (mem_reads_per_core + mem_writes_per_core), mem_reads_per_core, mem_writes_per_core);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: memory stalls=%ld (utilization=%d%%)\n", core_id, mem_stalls_per_core, mem_utilization);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: memory average latency=%d cycles\n", core_id, mem_avg_lat);
|
||||
mem_reads += mem_reads_per_core;
|
||||
mem_writes += mem_writes_per_core;
|
||||
mem_stalls += mem_stalls_per_core;
|
||||
mem_lat += mem_lat_per_core;
|
||||
#endif
|
||||
}
|
||||
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
fprintf(stream, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||
|
||||
#ifdef PERF_ENABLE
|
||||
int icache_read_hit_ratio = (int)((1.0 - (double(icache_read_misses) / double(icache_reads))) * 100);
|
||||
int dcache_read_hit_ratio = (int)((1.0 - (double(dcache_read_misses) / double(dcache_reads))) * 100);
|
||||
int dcache_write_hit_ratio = (int)((1.0 - (double(dcache_write_misses) / double(dcache_writes))) * 100);
|
||||
int dcache_bank_utilization = (int)((double(dcache_reads + dcache_writes) / double(dcache_reads + dcache_writes + dcache_bank_stalls)) * 100);
|
||||
int smem_bank_utilization = (int)((double(smem_reads + smem_writes) / double(smem_reads + smem_writes + smem_bank_stalls)) * 100);
|
||||
int mem_utilization = (int)((double(mem_reads + mem_writes) / double(mem_reads + mem_writes + mem_stalls)) * 100);
|
||||
int mem_avg_lat = (int)(double(mem_lat) / double(mem_reads));
|
||||
fprintf(stream, "PERF: ibuffer stalls=%ld\n", ibuffer_stalls);
|
||||
fprintf(stream, "PERF: scoreboard stalls=%ld\n", scoreboard_stalls);
|
||||
fprintf(stream, "PERF: alu unit stalls=%ld\n", alu_stalls);
|
||||
fprintf(stream, "PERF: lsu unit stalls=%ld\n", lsu_stalls);
|
||||
fprintf(stream, "PERF: csr unit stalls=%ld\n", csr_stalls);
|
||||
fprintf(stream, "PERF: fpu unit stalls=%ld\n", fpu_stalls);
|
||||
fprintf(stream, "PERF: gpu unit stalls=%ld\n", gpu_stalls);
|
||||
fprintf(stream, "PERF: icache reads=%ld\n", icache_reads);
|
||||
fprintf(stream, "PERF: icache read misses=%ld (hit ratio=%d%%)\n", icache_read_misses, icache_read_hit_ratio);
|
||||
fprintf(stream, "PERF: icache pipeline stalls=%ld\n", icache_pipe_stalls);
|
||||
fprintf(stream, "PERF: icache reponse stalls=%ld\n", icache_rsp_stalls);
|
||||
fprintf(stream, "PERF: dcache reads=%ld\n", dcache_reads);
|
||||
fprintf(stream, "PERF: dcache writes=%ld\n", dcache_writes);
|
||||
fprintf(stream, "PERF: dcache read misses=%ld (hit ratio=%d%%)\n", dcache_read_misses, dcache_read_hit_ratio);
|
||||
fprintf(stream, "PERF: dcache write misses=%ld (hit ratio=%d%%)\n", dcache_write_misses, dcache_write_hit_ratio);
|
||||
fprintf(stream, "PERF: dcache bank stalls=%ld (utilization=%d%%)\n", dcache_bank_stalls, dcache_bank_utilization);
|
||||
fprintf(stream, "PERF: dcache mshr stalls=%ld\n", dcache_mshr_stalls);
|
||||
fprintf(stream, "PERF: dcache pipeline stalls=%ld\n", dcache_pipe_stalls);
|
||||
fprintf(stream, "PERF: dcache reponse stalls=%ld\n", dcache_rsp_stalls);
|
||||
fprintf(stream, "PERF: smem reads=%ld\n", smem_reads);
|
||||
fprintf(stream, "PERF: smem writes=%ld\n", smem_writes);
|
||||
fprintf(stream, "PERF: smem bank stalls=%ld (utilization=%d%%)\n", smem_bank_stalls, smem_bank_utilization);
|
||||
fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes);
|
||||
fprintf(stream, "PERF: memory stalls=%ld (utilization=%d%%)\n", mem_stalls, mem_utilization);
|
||||
fprintf(stream, "PERF: memory average latency=%d cycles\n", mem_avg_lat);
|
||||
#endif
|
||||
|
||||
// release allocated resources
|
||||
vx_buf_release(staging_buf);
|
||||
|
||||
return ret;
|
||||
}
|
|
@ -1,77 +0,0 @@
|
|||
#ifndef __VX_DRIVER_H__
|
||||
#define __VX_DRIVER_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef void* vx_device_h;
|
||||
|
||||
typedef void* vx_buffer_h;
|
||||
|
||||
// device caps ids
|
||||
#define VX_CAPS_VERSION 0x0
|
||||
#define VX_CAPS_MAX_CORES 0x1
|
||||
#define VX_CAPS_MAX_WARPS 0x2
|
||||
#define VX_CAPS_MAX_THREADS 0x3
|
||||
#define VX_CAPS_CACHE_LINE_SIZE 0x4
|
||||
#define VX_CAPS_LOCAL_MEM_SIZE 0x5
|
||||
#define VX_CAPS_ALLOC_BASE_ADDR 0x6
|
||||
#define VX_CAPS_KERNEL_BASE_ADDR 0x7
|
||||
|
||||
#define CACHE_BLOCK_SIZE 64
|
||||
#define ALLOC_BASE_ADDR 0x00000000
|
||||
#define LOCAL_MEM_SIZE 0xffffffff
|
||||
|
||||
// open the device and connect to it
|
||||
int vx_dev_open(vx_device_h* hdevice);
|
||||
|
||||
// Close the device when all the operations are done
|
||||
int vx_dev_close(vx_device_h hdevice);
|
||||
|
||||
// return device configurations
|
||||
int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value);
|
||||
|
||||
// Allocate shared buffer with device
|
||||
int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer);
|
||||
|
||||
// Get host pointer address
|
||||
void* vx_host_ptr(vx_buffer_h hbuffer);
|
||||
|
||||
// release buffer
|
||||
int vx_buf_release(vx_buffer_h hbuffer);
|
||||
|
||||
// allocate device memory and return address
|
||||
int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr);
|
||||
|
||||
// Copy bytes from buffer to device local memory
|
||||
int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset);
|
||||
|
||||
// Copy bytes from device local memory to buffer
|
||||
int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dst_offset);
|
||||
|
||||
// Start device execution
|
||||
int vx_start(vx_device_h hdevice);
|
||||
|
||||
// Wait for device ready with milliseconds timeout
|
||||
int vx_ready_wait(vx_device_h hdevice, long long timeout);
|
||||
|
||||
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////
|
||||
|
||||
// upload kernel bytes to device
|
||||
int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size);
|
||||
|
||||
// upload kernel file to device
|
||||
int vx_upload_kernel_file(vx_device_h device, const char* filename);
|
||||
|
||||
// dump performance counters
|
||||
int vx_dump_perf(vx_device_h device, FILE* stream);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __VX_DRIVER_H__
|
|
@ -1,116 +0,0 @@
|
|||
OPAE_HOME ?= /tools/opae/1.4.0
|
||||
|
||||
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
|
||||
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../include -I$(OPAE_HOME)/include -I../../hw
|
||||
|
||||
LDFLAGS += -L$(OPAE_HOME)/lib
|
||||
|
||||
#SCOPE=1
|
||||
|
||||
# stack execution protection
|
||||
LDFLAGS +=-z noexecstack
|
||||
|
||||
# data relocation and projection
|
||||
LDFLAGS +=-z relro -z now
|
||||
|
||||
# stack buffer overrun detection
|
||||
CXXFLAGS +=-fstack-protector
|
||||
|
||||
# Position independent code
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
# Add external configuration
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
# Dump perf stats
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared
|
||||
|
||||
FPGA_LIBS += -luuid -lopae-c
|
||||
|
||||
ASE_LIBS += -luuid -lopae-c-ase
|
||||
|
||||
VLSIM_LIBS += -lopae-c-vlsim
|
||||
|
||||
ASE_DIR = ase
|
||||
|
||||
VLSIM_DIR = vlsim
|
||||
|
||||
RTL_DIR=../../hw/rtl
|
||||
|
||||
SCRIPT_DIR=../../hw/scripts
|
||||
|
||||
PROJECT = libvortex.so
|
||||
|
||||
PROJECT_ASE = $(ASE_DIR)/libvortex.so
|
||||
|
||||
PROJECT_VLSIM = $(VLSIM_DIR)/libvortex.so
|
||||
|
||||
AFU_JSON_INFO = vortex_afu.h
|
||||
|
||||
SRCS = vortex.cpp ../common/vx_utils.cpp
|
||||
|
||||
# Enable scope analyzer
|
||||
ifdef SCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
SRCS += vx_scope.cpp
|
||||
SCOPE_ENABLE = SCOPE=1
|
||||
SCOPE_H = scope-defs.h
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
PERF_ENABLE = PERF=1
|
||||
endif
|
||||
|
||||
all: vlsim
|
||||
|
||||
# AFU info from JSON file, including AFU UUID
|
||||
json: ../../hw/opae/vortex_afu.json
|
||||
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
|
||||
|
||||
scope-defs.h: $(SCRIPT_DIR)/scope.json
|
||||
$(SCRIPT_DIR)/scope.py $(RTL_INCLUDE) $(CONFIGS) -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
|
||||
|
||||
# generate scope data
|
||||
scope: scope-defs.h
|
||||
|
||||
vlsim-hw: $(SCOPE_H)
|
||||
$(SCOPE_ENABLE) $(PERF_ENABLE) $(MAKE) -C vlsim
|
||||
|
||||
fpga: $(SRCS) $(SCOPE_H)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
|
||||
|
||||
asesim: $(SRCS) $(ASE_DIR) $(SCOPE_H)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE)
|
||||
|
||||
vlsim: $(SRCS) vlsim-hw
|
||||
$(CXX) $(CXXFLAGS) -DUSE_VLSIM $(SRCS) $(LDFLAGS) -L./vlsim $(VLSIM_LIBS) -o $(PROJECT_VLSIM)
|
||||
|
||||
vortex.o: vortex.cpp
|
||||
$(CXX) $(CXXFLAGS) -c vortex.cpp -o $@
|
||||
|
||||
$(ASE_DIR):
|
||||
mkdir -p ase
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $(SRCS) > .depend;
|
||||
|
||||
clean-fpga:
|
||||
rm -rf $(PROJECT) *.o .depend
|
||||
|
||||
clean-asesim:
|
||||
rm -rf $(PROJECT_ASE) *.o .depend
|
||||
|
||||
clean-vlsim:
|
||||
$(MAKE) -C vlsim clean
|
||||
|
||||
clean: clean-fpga clean-asesim clean-vlsim
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
1
driver/opae/vlsim/.gitignore
vendored
1
driver/opae/vlsim/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
/obj_dir/*
|
|
@ -1,98 +0,0 @@
|
|||
CFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
||||
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CFLAGS += -DUSE_VLSIM -fPIC -Wno-maybe-uninitialized
|
||||
CFLAGS += -I../../../../hw
|
||||
|
||||
# control RTL debug print states
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
|
||||
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
CFLAGS += $(CONFIGS)
|
||||
CFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
# LDFLAGS += -dynamiclib -pthread
|
||||
|
||||
TOP = vortex_afu_shim
|
||||
|
||||
RTL_DIR=../../../hw/rtl
|
||||
DPI_DIR=../../../hw/dpi
|
||||
|
||||
SRCS = fpga.cpp opae_sim.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
RTL_INCLUDE += -I$(RTL_DIR)/afu -I$(RTL_DIR)/afu/ccip
|
||||
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += verilator.vlt
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += -DVCD_OUTPUT --trace --trace-structs $(DBG_FLAGS)
|
||||
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CFLAGS += -DNDEBUG
|
||||
endif
|
||||
|
||||
# Enable scope analyzer
|
||||
ifdef SCOPE
|
||||
VL_FLAGS += -DSCOPE
|
||||
CFLAGS += -DSCOPE
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
VL_FLAGS += -DPERF_ENABLE
|
||||
CFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
# use our OPAE shim
|
||||
VL_FLAGS += -DNOPAE
|
||||
CFLAGS += -DNOPAE
|
||||
|
||||
# ALU backend
|
||||
VL_FLAGS += -DIMUL_DPI
|
||||
VL_FLAGS += -DIDIV_DPI
|
||||
|
||||
# FPU backend
|
||||
FPU_CORE ?= FPU_DPI
|
||||
VL_FLAGS += -D$(FPU_CORE)
|
||||
|
||||
PROJECT = libopae-c-vlsim.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
vortex_afu.h : $(RTL_DIR)/afu/vortex_afu.vh
|
||||
../../../hw/scripts/gen_config.py -i $(RTL_DIR)/afu/vortex_afu.vh -o vortex_afu.h
|
||||
|
||||
$(PROJECT): $(SRCS) vortex_afu.h
|
||||
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
|
||||
make -j -C obj_dir -f V$(TOP).mk
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh vortex_afu.h
|
|
@ -1,84 +0,0 @@
|
|||
#include <stdint.h>
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <cstdlib>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include "fpga.h"
|
||||
#include "opae_sim.h"
|
||||
#include <VX_config.h>
|
||||
|
||||
extern fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags) {
|
||||
if (NULL == handle || flags != 0)
|
||||
return FPGA_INVALID_PARAM;
|
||||
auto sim = new opae_sim();
|
||||
*handle = reinterpret_cast<fpga_handle>(sim);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaClose(fpga_handle handle) {
|
||||
if (NULL == handle)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
delete sim;
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaPrepareBuffer(fpga_handle handle, uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
|
||||
if (NULL == handle || len == 0 || buf_addr == NULL || wsid == NULL)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
int ret = sim->prepare_buffer(len, buf_addr, wsid, flags);
|
||||
if (ret != 0)
|
||||
return FPGA_NO_MEMORY;
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid) {
|
||||
if (NULL == handle)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
sim->release_buffer(wsid);
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr) {
|
||||
if (NULL == handle || ioaddr == NULL)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
sim->get_io_address(wsid, ioaddr);
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaWriteMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t value) {
|
||||
if (NULL == handle || mmio_num != 0)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
sim->write_mmio64(mmio_num, offset, value);
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
if (NULL == handle || mmio_num != 0 || value == NULL)
|
||||
return FPGA_INVALID_PARAM;
|
||||
|
||||
auto sim = reinterpret_cast<opae_sim*>(handle);
|
||||
sim->read_mmio64(mmio_num, offset, value);
|
||||
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern const char *fpgaErrStr(fpga_result e) {
|
||||
return "";
|
||||
}
|
|
@ -1,372 +0,0 @@
|
|||
#include "opae_sim.h"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
|
||||
#define CCI_LATENCY 8
|
||||
#define CCI_RAND_MOD 8
|
||||
#define CCI_RQ_SIZE 16
|
||||
#define CCI_WQ_SIZE 16
|
||||
|
||||
#define ENABLE_MEM_STALLS
|
||||
|
||||
#ifndef MEM_LATENCY
|
||||
#define MEM_LATENCY 24
|
||||
#endif
|
||||
|
||||
#ifndef MEM_RQ_SIZE
|
||||
#define MEM_RQ_SIZE 16
|
||||
#endif
|
||||
|
||||
#ifndef MEM_STALLS_MODULO
|
||||
#define MEM_STALLS_MODULO 16
|
||||
#endif
|
||||
|
||||
#ifndef VERILATOR_RESET_VALUE
|
||||
#define VERILATOR_RESET_VALUE 2
|
||||
#endif
|
||||
|
||||
uint64_t timestamp = 0;
|
||||
|
||||
double sc_time_stamp() {
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
static void *__aligned_malloc(size_t alignment, size_t size) {
|
||||
// reserve margin for alignment and storing of unaligned address
|
||||
size_t margin = (alignment-1) + sizeof(void*);
|
||||
void *unaligned_addr = malloc(size + margin);
|
||||
void **aligned_addr = (void**)((uintptr_t)(((uint8_t*)unaligned_addr) + margin) & ~(alignment-1));
|
||||
aligned_addr[-1] = unaligned_addr;
|
||||
return aligned_addr;
|
||||
}
|
||||
|
||||
static void __aligned_free(void *ptr) {
|
||||
// retreive the stored unaligned address and use it to free the allocation
|
||||
void* unaligned_addr = ((void**)ptr)[-1];
|
||||
free(unaligned_addr);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
opae_sim::opae_sim()
|
||||
: stop_(false)
|
||||
, host_buffer_ids_(0)
|
||||
{
|
||||
// force random values for unitialized signals
|
||||
Verilated::randReset(VERILATOR_RESET_VALUE);
|
||||
Verilated::randSeed(50);
|
||||
|
||||
// Turn off assertion before reset
|
||||
Verilated::assertOn(false);
|
||||
|
||||
vortex_afu_ = new Vvortex_afu_shim();
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
Verilated::traceEverOn(true);
|
||||
trace_ = new VerilatedVcdC();
|
||||
vortex_afu_->trace(trace_, 99);
|
||||
trace_->open("trace.vcd");
|
||||
#endif
|
||||
|
||||
// reset the device
|
||||
this->reset();
|
||||
|
||||
// launch execution thread
|
||||
future_ = std::async(std::launch::async, [&]{
|
||||
while (!stop_) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
this->step();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
opae_sim::~opae_sim() {
|
||||
stop_ = true;
|
||||
if (future_.valid()) {
|
||||
future_.wait();
|
||||
}
|
||||
#ifdef VCD_OUTPUT
|
||||
trace_->close();
|
||||
#endif
|
||||
for (auto& buffer : host_buffers_) {
|
||||
__aligned_free(buffer.second.data);
|
||||
}
|
||||
delete vortex_afu_;
|
||||
}
|
||||
|
||||
int opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
|
||||
auto alloc = __aligned_malloc(CACHE_BLOCK_SIZE, len);
|
||||
if (alloc == NULL)
|
||||
return -1;
|
||||
host_buffer_t buffer;
|
||||
buffer.data = (uint64_t*)alloc;
|
||||
buffer.size = len;
|
||||
buffer.ioaddr = uintptr_t(alloc);
|
||||
auto buffer_id = host_buffer_ids_++;
|
||||
host_buffers_.emplace(buffer_id, buffer);
|
||||
*buf_addr = alloc;
|
||||
*wsid = buffer_id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void opae_sim::release_buffer(uint64_t wsid) {
|
||||
auto it = host_buffers_.find(wsid);
|
||||
if (it != host_buffers_.end()) {
|
||||
__aligned_free(it->second.data);
|
||||
host_buffers_.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) {
|
||||
*ioaddr = host_buffers_[wsid].ioaddr;
|
||||
}
|
||||
|
||||
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
|
||||
this->step();
|
||||
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0;
|
||||
assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid);
|
||||
*value = vortex_afu_->af2cp_sTxPort_c2_data;
|
||||
}
|
||||
|
||||
void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 1;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
|
||||
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, &value, 8);
|
||||
this->step();
|
||||
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void opae_sim::reset() {
|
||||
cci_reads_.clear();
|
||||
cci_writes_.clear();
|
||||
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
|
||||
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = 0;
|
||||
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = 0;
|
||||
|
||||
for (int b = 0; b < MEMORY_BANKS; ++b) {
|
||||
mem_reads_[b].clear();
|
||||
vortex_afu_->avs_readdatavalid[b] = 0;
|
||||
vortex_afu_->avs_waitrequest[b] = 0;
|
||||
}
|
||||
|
||||
vortex_afu_->reset = 1;
|
||||
|
||||
for (int i = 0; i < RESET_DELAY; ++i) {
|
||||
vortex_afu_->clk = 0;
|
||||
this->eval();
|
||||
vortex_afu_->clk = 1;
|
||||
this->eval();
|
||||
}
|
||||
|
||||
vortex_afu_->reset = 0;
|
||||
|
||||
// Turn on assertion after reset
|
||||
Verilated::assertOn(true);
|
||||
}
|
||||
|
||||
void opae_sim::step() {
|
||||
this->sRxPort_bus();
|
||||
this->sTxPort_bus();
|
||||
this->avs_bus();
|
||||
|
||||
vortex_afu_->clk = 0;
|
||||
this->eval();
|
||||
vortex_afu_->clk = 1;
|
||||
this->eval();
|
||||
|
||||
#ifndef NDEBUG
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
|
||||
void opae_sim::eval() {
|
||||
vortex_afu_->eval();
|
||||
#ifdef VCD_OUTPUT
|
||||
trace_->dump(timestamp);
|
||||
#endif
|
||||
++timestamp;
|
||||
}
|
||||
|
||||
void opae_sim::sRxPort_bus() {
|
||||
// check mmio request
|
||||
bool mmio_req_enabled = vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid
|
||||
|| vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid;
|
||||
|
||||
// schedule CCI read responses
|
||||
std::list<cci_rd_req_t>::iterator cci_rd_it(cci_reads_.end());
|
||||
for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) {
|
||||
if (it->cycles_left > 0)
|
||||
it->cycles_left -= 1;
|
||||
if ((cci_rd_it == ie) && (it->cycles_left == 0)) {
|
||||
cci_rd_it = it;
|
||||
}
|
||||
}
|
||||
|
||||
// schedule CCI write responses
|
||||
std::list<cci_wr_req_t>::iterator cci_wr_it(cci_writes_.end());
|
||||
for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) {
|
||||
if (it->cycles_left > 0)
|
||||
it->cycles_left -= 1;
|
||||
if ((cci_wr_it == ie) && (it->cycles_left == 0)) {
|
||||
cci_wr_it = it;
|
||||
}
|
||||
}
|
||||
|
||||
// send CCI write response
|
||||
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
|
||||
if (cci_wr_it != cci_writes_.end()) {
|
||||
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 1;
|
||||
vortex_afu_->vcp2af_sRxPort_c1_hdr_resp_type = 0;
|
||||
vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_wr_it->mdata;
|
||||
cci_writes_.erase(cci_wr_it);
|
||||
}
|
||||
|
||||
// send CCI read response (ensure mmio disabled)
|
||||
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
|
||||
if (!mmio_req_enabled
|
||||
&& (cci_rd_it != cci_reads_.end())) {
|
||||
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_hdr_resp_type = 0;
|
||||
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE);
|
||||
vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
|
||||
/*printf("%0ld: [sim] CCI Rd Rsp: addr=%ld, mdata=%d, data=", timestamp, cci_rd_it->addr, cci_rd_it->mdata);
|
||||
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i)
|
||||
printf("%02x", cci_rd_it->data[CACHE_BLOCK_SIZE-1-i]);
|
||||
printf("\n");*/
|
||||
cci_reads_.erase(cci_rd_it);
|
||||
}
|
||||
}
|
||||
|
||||
void opae_sim::sTxPort_bus() {
|
||||
// process read requests
|
||||
if (vortex_afu_->af2cp_sTxPort_c0_valid) {
|
||||
assert(!vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull);
|
||||
cci_rd_req_t cci_req;
|
||||
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
|
||||
cci_req.addr = vortex_afu_->af2cp_sTxPort_c0_hdr_address;
|
||||
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata;
|
||||
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
|
||||
memcpy(cci_req.data.data(), host_ptr, CACHE_BLOCK_SIZE);
|
||||
//printf("%0ld: [sim] CCI Rd Req: addr=%ld, mdata=%d\n", timestamp, vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
|
||||
cci_reads_.emplace_back(cci_req);
|
||||
}
|
||||
|
||||
// process write requests
|
||||
if (vortex_afu_->af2cp_sTxPort_c1_valid) {
|
||||
assert(!vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull);
|
||||
cci_wr_req_t cci_req;
|
||||
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
|
||||
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c1_hdr_mdata;
|
||||
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c1_hdr_address * CACHE_BLOCK_SIZE);
|
||||
memcpy(host_ptr, vortex_afu_->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE);
|
||||
cci_writes_.emplace_back(cci_req);
|
||||
}
|
||||
|
||||
// check queues overflow
|
||||
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= (CCI_RQ_SIZE-1));
|
||||
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1));
|
||||
}
|
||||
|
||||
void opae_sim::avs_bus() {
|
||||
for (int b = 0; b < MEMORY_BANKS; ++b) {
|
||||
// update memory responses schedule
|
||||
for (auto& rsp : mem_reads_[b]) {
|
||||
if (rsp.cycles_left > 0)
|
||||
rsp.cycles_left -= 1;
|
||||
}
|
||||
|
||||
// schedule memory responses in FIFO order
|
||||
std::list<mem_rd_req_t>::iterator mem_rd_it(mem_reads_[b].end());
|
||||
if (!mem_reads_[b].empty()
|
||||
&& (0 == mem_reads_[b].begin()->cycles_left)) {
|
||||
mem_rd_it = mem_reads_[b].begin();
|
||||
}
|
||||
|
||||
// send memory response
|
||||
vortex_afu_->avs_readdatavalid[b] = 0;
|
||||
if (mem_rd_it != mem_reads_[b].end()) {
|
||||
vortex_afu_->avs_readdatavalid[b] = 1;
|
||||
memcpy(vortex_afu_->avs_readdata[b], mem_rd_it->data.data(), MEM_BLOCK_SIZE);
|
||||
uint32_t addr = mem_rd_it->addr;
|
||||
mem_reads_[b].erase(mem_rd_it);
|
||||
/*printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%x, pending={", timestamp, b, addr * MEM_BLOCK_SIZE);
|
||||
for (auto& req : mem_reads_[b]) {
|
||||
if (req.cycles_left != 0)
|
||||
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
|
||||
else
|
||||
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
|
||||
}
|
||||
printf("}\n");*/
|
||||
}
|
||||
|
||||
// handle memory stalls
|
||||
bool mem_stalled = false;
|
||||
#ifdef ENABLE_MEM_STALLS
|
||||
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
|
||||
mem_stalled = true;
|
||||
} else
|
||||
if (mem_reads_[b].size() >= MEM_RQ_SIZE) {
|
||||
mem_stalled = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
// process memory requests
|
||||
if (!mem_stalled) {
|
||||
assert(!vortex_afu_->avs_read[b] || !vortex_afu_->avs_write[b]);
|
||||
if (vortex_afu_->avs_write[b]) {
|
||||
uint64_t byteen = vortex_afu_->avs_byteenable[b];
|
||||
unsigned base_addr = vortex_afu_->avs_address[b] * MEM_BLOCK_SIZE;
|
||||
uint8_t* data = (uint8_t*)(vortex_afu_->avs_writedata[b]);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
ram_[base_addr + i] = data[i];
|
||||
}
|
||||
}
|
||||
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=%x, data=", timestamp, b, base_addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%0x", data[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");*/
|
||||
}
|
||||
if (vortex_afu_->avs_read[b]) {
|
||||
mem_rd_req_t mem_req;
|
||||
mem_req.addr = vortex_afu_->avs_address[b];
|
||||
ram_.read(vortex_afu_->avs_address[b] * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.data.data());
|
||||
mem_req.cycles_left = MEM_LATENCY;
|
||||
for (auto& rsp : mem_reads_[b]) {
|
||||
if (mem_req.addr == rsp.addr) {
|
||||
mem_req.cycles_left = rsp.cycles_left;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mem_reads_[b].emplace_back(mem_req);
|
||||
/*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=%x, pending={", timestamp, b, mem_req.addr * MEM_BLOCK_SIZE);
|
||||
for (auto& req : mem_reads_[b]) {
|
||||
if (req.cycles_left != 0)
|
||||
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
|
||||
else
|
||||
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
|
||||
}
|
||||
printf("}\n");*/
|
||||
}
|
||||
}
|
||||
|
||||
vortex_afu_->avs_waitrequest[b] = mem_stalled;
|
||||
}
|
||||
}
|
|
@ -1,105 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "verilated.h"
|
||||
//#include "verilated_stub.h"
|
||||
#include "Vvortex_afu_shim.h"
|
||||
#include "Vvortex_afu_shim__Syms.h"
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
#include <verilated_vcd_c.h>
|
||||
#endif
|
||||
|
||||
#include <VX_config.h>
|
||||
#include "vortex_afu.h"
|
||||
#include "ram.h"
|
||||
|
||||
#include <ostream>
|
||||
#include <future>
|
||||
#include <list>
|
||||
#include <unordered_map>
|
||||
|
||||
#ifndef MEMORY_BANKS
|
||||
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
#else
|
||||
#define MEMORY_BANKS 2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#undef MEM_BLOCK_SIZE
|
||||
#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8)
|
||||
|
||||
#define CACHE_BLOCK_SIZE 64
|
||||
|
||||
class opae_sim {
|
||||
public:
|
||||
|
||||
opae_sim();
|
||||
virtual ~opae_sim();
|
||||
|
||||
int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
|
||||
|
||||
void release_buffer(uint64_t wsid);
|
||||
|
||||
void get_io_address(uint64_t wsid, uint64_t *ioaddr);
|
||||
|
||||
void write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value);
|
||||
|
||||
void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value);
|
||||
|
||||
private:
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, MEM_BLOCK_SIZE> data;
|
||||
uint32_t addr;
|
||||
} mem_rd_req_t;
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, CACHE_BLOCK_SIZE> data;
|
||||
uint64_t addr;
|
||||
uint32_t mdata;
|
||||
} cci_rd_req_t;
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
uint32_t mdata;
|
||||
} cci_wr_req_t;
|
||||
|
||||
typedef struct {
|
||||
uint64_t* data;
|
||||
size_t size;
|
||||
uint64_t ioaddr;
|
||||
} host_buffer_t;
|
||||
|
||||
void reset();
|
||||
|
||||
void eval();
|
||||
|
||||
void step();
|
||||
|
||||
void sRxPort_bus();
|
||||
void sTxPort_bus();
|
||||
void avs_bus();
|
||||
|
||||
std::future<void> future_;
|
||||
bool stop_;
|
||||
|
||||
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
|
||||
int64_t host_buffer_ids_;
|
||||
|
||||
std::list<mem_rd_req_t> mem_reads_ [MEMORY_BANKS];
|
||||
|
||||
std::list<cci_rd_req_t> cci_reads_;
|
||||
|
||||
std::list<cci_wr_req_t> cci_writes_;
|
||||
|
||||
std::mutex mutex_;
|
||||
|
||||
RAM ram_;
|
||||
Vvortex_afu_shim *vortex_afu_;
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC *trace_;
|
||||
#endif
|
||||
};
|
|
@ -1,64 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
class RAM {
|
||||
private:
|
||||
|
||||
mutable uint8_t *mem_[(1 << 12)];
|
||||
|
||||
uint8_t *get(uint32_t address) const {
|
||||
uint32_t block_addr = address >> 20;
|
||||
uint32_t block_offset = address & 0x000FFFFF;
|
||||
if (mem_[block_addr] == NULL) {
|
||||
mem_[block_addr] = new uint8_t[(1 << 20)];
|
||||
}
|
||||
return mem_[block_addr] + block_offset;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
RAM() {
|
||||
for (uint32_t i = 0; i < (1 << 12); i++) {
|
||||
mem_[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
~RAM() {
|
||||
this->clear();
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return (1ull << 32);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (uint32_t i = 0; i < (1 << 12); i++) {
|
||||
if (mem_[i]) {
|
||||
delete [] mem_[i];
|
||||
mem_[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void read(uint32_t address, uint32_t length, uint8_t *data) const {
|
||||
for (unsigned i = 0; i < length; i++) {
|
||||
data[i] = *this->get(address + i);
|
||||
}
|
||||
}
|
||||
|
||||
void write(uint32_t address, uint32_t length, const uint8_t *data) {
|
||||
for (unsigned i = 0; i < length; i++) {
|
||||
*this->get(address + i) = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t& operator[](uint32_t address) {
|
||||
return *get(address);
|
||||
}
|
||||
|
||||
const uint8_t& operator[](uint32_t address) const {
|
||||
return *get(address);
|
||||
}
|
||||
};
|
|
@ -1,10 +0,0 @@
|
|||
`verilator_config
|
||||
|
||||
lint_off -rule BLKANDNBLK -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNOPTFLAT -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule WIDTH -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNUSED -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule LITENDIAN -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule IMPORTSTAR -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule PINCONNECTEMPTY -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -file "../rtl/fp_cores/fpnew/*"
|
|
@ -1,495 +0,0 @@
|
|||
#include <stdint.h>
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <cstdlib>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <cmath>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
|
||||
#if defined(USE_FPGA) || defined(USE_ASE)
|
||||
#include <opae/fpga.h>
|
||||
#include <uuid/uuid.h>
|
||||
#elif defined(USE_VLSIM)
|
||||
#include "vlsim/fpga.h"
|
||||
#endif
|
||||
|
||||
#include <vortex.h>
|
||||
#include <VX_config.h>
|
||||
#include "vortex_afu.h"
|
||||
|
||||
#ifdef SCOPE
|
||||
#include "vx_scope.h"
|
||||
#endif
|
||||
|
||||
#define CHECK_RES(_expr) \
|
||||
do { \
|
||||
fpga_result res = _expr; \
|
||||
if (res == FPGA_OK) \
|
||||
break; \
|
||||
printf("[VXDRV] Error: '%s' returned %d, %s!\n", \
|
||||
#_expr, (int)res, fpgaErrStr(res)); \
|
||||
return -1; \
|
||||
} while (false)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define CMD_MEM_READ AFU_IMAGE_CMD_MEM_READ
|
||||
#define CMD_MEM_WRITE AFU_IMAGE_CMD_MEM_WRITE
|
||||
#define CMD_RUN AFU_IMAGE_CMD_RUN
|
||||
|
||||
#define MMIO_CMD_TYPE (AFU_IMAGE_MMIO_CMD_TYPE * 4)
|
||||
#define MMIO_IO_ADDR (AFU_IMAGE_MMIO_IO_ADDR * 4)
|
||||
#define MMIO_MEM_ADDR (AFU_IMAGE_MMIO_MEM_ADDR * 4)
|
||||
#define MMIO_DATA_SIZE (AFU_IMAGE_MMIO_DATA_SIZE * 4)
|
||||
#define MMIO_DEV_CAPS (AFU_IMAGE_MMIO_DEV_CAPS * 4)
|
||||
#define MMIO_STATUS (AFU_IMAGE_MMIO_STATUS * 4)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef struct vx_device_ {
|
||||
fpga_handle fpga;
|
||||
size_t mem_allocation;
|
||||
unsigned version;
|
||||
unsigned num_cores;
|
||||
unsigned num_warps;
|
||||
unsigned num_threads;
|
||||
} vx_device_t;
|
||||
|
||||
typedef struct vx_buffer_ {
|
||||
uint64_t wsid;
|
||||
void* host_ptr;
|
||||
uint64_t io_addr;
|
||||
vx_device_h hdevice;
|
||||
size_t size;
|
||||
} vx_buffer_t;
|
||||
|
||||
inline size_t align_size(size_t size, size_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return (size + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
inline bool is_aligned(size_t addr, size_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return 0 == (addr & (alignment - 1));
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
switch (caps_id) {
|
||||
case VX_CAPS_VERSION:
|
||||
*value = device->version;
|
||||
break;
|
||||
case VX_CAPS_MAX_CORES:
|
||||
*value = device->num_cores;
|
||||
break;
|
||||
case VX_CAPS_MAX_WARPS:
|
||||
*value = device->num_warps;
|
||||
break;
|
||||
case VX_CAPS_MAX_THREADS:
|
||||
*value = device->num_threads;
|
||||
break;
|
||||
case VX_CAPS_CACHE_LINE_SIZE:
|
||||
*value = CACHE_BLOCK_SIZE;
|
||||
break;
|
||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
*value = LOCAL_MEM_SIZE;
|
||||
break;
|
||||
case VX_CAPS_ALLOC_BASE_ADDR:
|
||||
*value = ALLOC_BASE_ADDR;
|
||||
break;
|
||||
case VX_CAPS_KERNEL_BASE_ADDR:
|
||||
*value = STARTUP_ADDR;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
|
||||
std::abort();
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_dev_open(vx_device_h* hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
fpga_handle accel_handle;
|
||||
vx_device_t* device;
|
||||
|
||||
#ifndef USE_VLSIM
|
||||
fpga_result res;
|
||||
fpga_token accel_token;
|
||||
fpga_properties filter = nullptr;
|
||||
fpga_guid guid;
|
||||
uint32_t num_matches;
|
||||
|
||||
// Set up a filter that will search for an accelerator
|
||||
CHECK_RES(fpgaGetProperties(nullptr, &filter));
|
||||
res = fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
|
||||
if (res != FPGA_OK) {
|
||||
fprintf(stderr, "[VXDRV] Error: fpgaGetProperties() returned %d, %s!\n", (int)res, fpgaErrStr(res));
|
||||
fpgaDestroyProperties(&filter);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Add the desired UUID to the filter
|
||||
uuid_parse(AFU_ACCEL_UUID, guid);
|
||||
res = fpgaPropertiesSetGUID(filter, guid);
|
||||
if (res != FPGA_OK) {
|
||||
fprintf(stderr, "[VXDRV] Error: fpgaPropertiesSetGUID() returned %d, %s!\n", (int)res, fpgaErrStr(res));
|
||||
fpgaDestroyProperties(&filter);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Do the search across the available FPGA contexts
|
||||
num_matches = 1;
|
||||
res = fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches);
|
||||
if (res != FPGA_OK) {
|
||||
fprintf(stderr, "[VXDRV] Error: fpgaEnumerate() returned %d, %s!\n", (int)res, fpgaErrStr(res));
|
||||
fpgaDestroyProperties(&filter);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Not needed anymore
|
||||
fpgaDestroyProperties(&filter);
|
||||
|
||||
if (num_matches < 1) {
|
||||
fprintf(stderr, "[VXDRV] Error: accelerator %s not found!\n", AFU_ACCEL_UUID);
|
||||
fpgaDestroyToken(&accel_token);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Open accelerator
|
||||
res = fpgaOpen(accel_token, &accel_handle, 0);
|
||||
if (res != FPGA_OK) {
|
||||
fprintf(stderr, "[VXDRV] Error: fpgaOpen() returned %d, %s!\n", (int)res, fpgaErrStr(res));
|
||||
fpgaDestroyToken(&accel_token);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Done with token
|
||||
fpgaDestroyToken(&accel_token);
|
||||
#else
|
||||
// Open accelerator
|
||||
CHECK_RES(fpgaOpen(NULL, &accel_handle, 0));
|
||||
#endif
|
||||
|
||||
// allocate device object
|
||||
device = (vx_device_t*)malloc(sizeof(vx_device_t));
|
||||
if (nullptr == device) {
|
||||
fpgaClose(accel_handle);
|
||||
return -1;
|
||||
}
|
||||
|
||||
device->fpga = accel_handle;
|
||||
device->mem_allocation = ALLOC_BASE_ADDR;
|
||||
|
||||
{
|
||||
// Load device CAPS
|
||||
uint64_t dev_caps;
|
||||
int ret = fpgaReadMMIO64(device->fpga, 0, MMIO_DEV_CAPS, &dev_caps);
|
||||
if (ret != FPGA_OK) {
|
||||
fpgaClose(accel_handle);
|
||||
return ret;
|
||||
}
|
||||
device->version = (dev_caps >> 0) & 0xffff;
|
||||
device->num_cores = (dev_caps >> 16) & 0xffff;
|
||||
device->num_warps = (dev_caps >> 32) & 0xffff;
|
||||
device->num_threads = (dev_caps >> 48) & 0xffff;
|
||||
#ifndef NDEBUG
|
||||
fprintf(stdout, "[VXDRV] DEVCAPS: version=%d, num_cores=%d, num_warps=%d, num_threads=%d\n",
|
||||
device->version, device->num_cores, device->num_warps, device->num_threads);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef SCOPE
|
||||
{
|
||||
int ret = vx_scope_start(accel_handle, 0, -1);
|
||||
if (ret != 0) {
|
||||
fpgaClose(accel_handle);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*hdevice = device;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_dev_close(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
#ifdef SCOPE
|
||||
vx_scope_stop(device->fpga);
|
||||
#endif
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
vx_dump_perf(device, stdout);
|
||||
#endif
|
||||
|
||||
fpgaClose(device->fpga);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
||||
if (nullptr == hdevice
|
||||
|| nullptr == dev_maddr
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
size_t dev_mem_size = LOCAL_MEM_SIZE;
|
||||
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
|
||||
if (device->mem_allocation + asize > dev_mem_size)
|
||||
return -1;
|
||||
|
||||
*dev_maddr = device->mem_allocation;
|
||||
device->mem_allocation += asize;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
|
||||
fpga_result res;
|
||||
void* host_ptr;
|
||||
uint64_t wsid;
|
||||
uint64_t io_addr;
|
||||
vx_buffer_t* buffer;
|
||||
|
||||
if (nullptr == hdevice
|
||||
|| 0 >= size
|
||||
|| nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
|
||||
res = fpgaPrepareBuffer(device->fpga, asize, &host_ptr, &wsid, 0);
|
||||
if (FPGA_OK != res) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Get the physical address of the buffer in the accelerator
|
||||
res = fpgaGetIOAddress(device->fpga, wsid, &io_addr);
|
||||
if (FPGA_OK != res) {
|
||||
fpgaReleaseBuffer(device->fpga, wsid);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// allocate buffer object
|
||||
buffer = (vx_buffer_t*)malloc(sizeof(vx_buffer_t));
|
||||
if (nullptr == buffer) {
|
||||
fpgaReleaseBuffer(device->fpga, wsid);
|
||||
return -1;
|
||||
}
|
||||
|
||||
buffer->wsid = wsid;
|
||||
buffer->host_ptr = host_ptr;
|
||||
buffer->io_addr = io_addr;
|
||||
buffer->hdevice = hdevice;
|
||||
buffer->size = asize;
|
||||
|
||||
*hbuffer = buffer;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern void* vx_host_ptr(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return nullptr;
|
||||
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
return buffer->host_ptr;
|
||||
}
|
||||
|
||||
extern int vx_buf_release(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
|
||||
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
||||
|
||||
fpgaReleaseBuffer(device->fpga, buffer->wsid);
|
||||
|
||||
free(buffer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
std::unordered_map<int, std::stringstream> print_bufs;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
struct timespec sleep_time;
|
||||
|
||||
#if defined(USE_ASE)
|
||||
sleep_time.tv_sec = 1;
|
||||
sleep_time.tv_nsec = 0;
|
||||
#else
|
||||
sleep_time.tv_sec = 0;
|
||||
sleep_time.tv_nsec = 1000000;
|
||||
#endif
|
||||
|
||||
// to milliseconds
|
||||
long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
|
||||
|
||||
for (;;) {
|
||||
uint64_t status;
|
||||
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &status));
|
||||
|
||||
uint16_t cout_data = (status >> 8) & 0xffff;
|
||||
if (cout_data & 0x0001) {
|
||||
do {
|
||||
char cout_char = (cout_data >> 1) & 0xff;
|
||||
int cout_tid = (cout_data >> 9) & 0xff;
|
||||
auto& ss_buf = print_bufs[cout_tid];
|
||||
ss_buf << cout_char;
|
||||
if (cout_char == '\n') {
|
||||
std::cout << std::dec << "#" << cout_tid << ": " << ss_buf.str() << std::flush;
|
||||
ss_buf.str("");
|
||||
}
|
||||
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &status));
|
||||
cout_data = (status >> 8) & 0xffff;
|
||||
} while (cout_data & 0x0001);
|
||||
}
|
||||
|
||||
uint8_t state = status & 0xff;
|
||||
|
||||
if (0 == state || 0 == timeout) {
|
||||
for (auto& buf : print_bufs) {
|
||||
auto str = buf.second.str();
|
||||
if (!str.empty()) {
|
||||
std::cout << "#" << buf.first << ": " << str << std::endl;
|
||||
}
|
||||
}
|
||||
if (state != 0) {
|
||||
fprintf(stdout, "[VXDRV] ready-wait timed out: state=%d\n", state);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
nanosleep(&sleep_time, nullptr);
|
||||
timeout -= sleep_time_ms;
|
||||
};
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
|
||||
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
||||
|
||||
size_t dev_mem_size = LOCAL_MEM_SIZE;
|
||||
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
|
||||
// check alignment
|
||||
if (!is_aligned(dev_maddr, CACHE_BLOCK_SIZE))
|
||||
return -1;
|
||||
if (!is_aligned(buffer->io_addr + src_offset, CACHE_BLOCK_SIZE))
|
||||
return -1;
|
||||
|
||||
// bound checking
|
||||
if (src_offset + asize > buffer->size)
|
||||
return -1;
|
||||
if (dev_maddr + asize > dev_mem_size)
|
||||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
auto ls_shift = (int)std::log2(CACHE_BLOCK_SIZE);
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_IO_ADDR, (buffer->io_addr + src_offset) >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_MEM_ADDR, dev_maddr >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_DATA_SIZE, asize >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_WRITE));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
|
||||
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
|
||||
|
||||
size_t dev_mem_size = LOCAL_MEM_SIZE;
|
||||
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
|
||||
// check alignment
|
||||
if (!is_aligned(dev_maddr, CACHE_BLOCK_SIZE))
|
||||
return -1;
|
||||
if (!is_aligned(buffer->io_addr + dest_offset, CACHE_BLOCK_SIZE))
|
||||
return -1;
|
||||
|
||||
// bound checking
|
||||
if (dest_offset + asize > buffer->size)
|
||||
return -1;
|
||||
if (dev_maddr + asize > dev_mem_size)
|
||||
return -1;
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
auto ls_shift = (int)std::log2(CACHE_BLOCK_SIZE);
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_IO_ADDR, (buffer->io_addr + dest_offset) >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_MEM_ADDR, dev_maddr >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_DATA_SIZE, asize >> ls_shift));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_READ));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
if (vx_ready_wait(buffer->hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_start(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
// start execution
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_RUN));
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,24 +0,0 @@
|
|||
//
|
||||
// Generated by afu_json_mgr from ../../hw/opae/vortex_afu.json
|
||||
//
|
||||
|
||||
#ifndef __AFU_JSON_INFO__
|
||||
#define __AFU_JSON_INFO__
|
||||
|
||||
#define AFU_ACCEL_NAME "vortex_afu"
|
||||
#define AFU_ACCEL_UUID "35F9452B-25C2-434C-93D5-6F8C60DB361C"
|
||||
#define AFU_IMAGE_CMD_MEM_READ 1
|
||||
#define AFU_IMAGE_CMD_MEM_WRITE 2
|
||||
#define AFU_IMAGE_CMD_RUN 3
|
||||
#define AFU_IMAGE_MMIO_CMD_TYPE 10
|
||||
#define AFU_IMAGE_MMIO_DATA_SIZE 16
|
||||
#define AFU_IMAGE_MMIO_IO_ADDR 12
|
||||
#define AFU_IMAGE_MMIO_MEM_ADDR 14
|
||||
#define AFU_IMAGE_MMIO_SCOPE_READ 20
|
||||
#define AFU_IMAGE_MMIO_SCOPE_WRITE 22
|
||||
#define AFU_IMAGE_MMIO_DEV_CAPS 24
|
||||
#define AFU_IMAGE_MMIO_STATUS 18
|
||||
#define AFU_IMAGE_POWER 0
|
||||
#define AFU_TOP_IFC "ccip_std_afu_avalon_mm"
|
||||
|
||||
#endif // __AFU_JSON_INFO__
|
|
@ -1,257 +0,0 @@
|
|||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
#include <vector>
|
||||
#include <assert.h>
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
|
||||
#ifdef USE_VLSIM
|
||||
#include "vlsim/fpga.h"
|
||||
#else
|
||||
#include <opae/fpga.h>
|
||||
#endif
|
||||
|
||||
#include <VX_config.h>
|
||||
#include "vx_scope.h"
|
||||
#include "vortex_afu.h"
|
||||
#include "scope-defs.h"
|
||||
|
||||
#define FRAME_FLUSH_SIZE 100
|
||||
|
||||
#define CHECK_RES(_expr) \
|
||||
do { \
|
||||
fpga_result res = _expr; \
|
||||
if (res == FPGA_OK) \
|
||||
break; \
|
||||
printf("OPAE Error: '%s' returned %d, %s!\n", \
|
||||
#_expr, (int)res, fpgaErrStr(res)); \
|
||||
return -1; \
|
||||
} while (false)
|
||||
|
||||
#define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4)
|
||||
#define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4)
|
||||
|
||||
#define CMD_GET_VALID 0
|
||||
#define CMD_GET_DATA 1
|
||||
#define CMD_GET_WIDTH 2
|
||||
#define CMD_GET_COUNT 3
|
||||
#define CMD_SET_START 4
|
||||
#define CMD_SET_STOP 5
|
||||
#define CMD_GET_OFFSET 6
|
||||
|
||||
static constexpr int num_modules = sizeof(scope_modules) / sizeof(scope_module_t);
|
||||
|
||||
static constexpr int num_taps = sizeof(scope_taps) / sizeof(scope_tap_t);
|
||||
|
||||
constexpr int calcFrameWidth(int index = 0) {
|
||||
return (index < num_taps) ? (scope_taps[index].width + calcFrameWidth(index + 1)) : 0;
|
||||
}
|
||||
|
||||
static constexpr int fwidth = calcFrameWidth();
|
||||
|
||||
#ifdef HANG_TIMEOUT
|
||||
static std::thread g_timeout_thread;
|
||||
static std::mutex g_timeout_mutex;
|
||||
|
||||
static void timeout_callback(fpga_handle fpga) {
|
||||
std::this_thread::sleep_for(std::chrono::seconds{HANG_TIMEOUT});
|
||||
vx_scope_stop(fpga);
|
||||
fpgaClose(fpga);
|
||||
exit(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
uint64_t print_clock(std::ofstream& ofs, uint64_t delta, uint64_t timestamp) {
|
||||
while (delta != 0) {
|
||||
ofs << '#' << timestamp++ << std::endl;
|
||||
ofs << "b0 0" << std::endl;
|
||||
ofs << '#' << timestamp++ << std::endl;
|
||||
ofs << "b1 0" << std::endl;
|
||||
--delta;
|
||||
}
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
void dump_taps(std::ofstream& ofs, int module) {
|
||||
for (int i = 0; i < num_taps; ++i) {
|
||||
auto& tap = scope_taps[i];
|
||||
if (tap.module != module)
|
||||
continue;
|
||||
ofs << "$var reg " << tap.width << " " << (i + 1) << " " << tap.name << " $end" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void dump_module(std::ofstream& ofs, int parent) {
|
||||
for (auto& module : scope_modules) {
|
||||
if (module.parent != parent)
|
||||
continue;
|
||||
if (module.name[0] == '*') {
|
||||
ofs << "$var reg 1 0 clk $end" << std::endl;
|
||||
} else {
|
||||
ofs << "$scope module " << module.name << " $end" << std::endl;
|
||||
}
|
||||
dump_module(ofs, module.index);
|
||||
dump_taps(ofs, module.index);
|
||||
if (module.name[0] != '*') {
|
||||
ofs << "$upscope $end" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int vx_scope_start(fpga_handle hfpga, uint64_t start_time, uint64_t stop_time) {
|
||||
if (nullptr == hfpga)
|
||||
return -1;
|
||||
|
||||
if (stop_time != uint64_t(-1)) {
|
||||
// set stop time
|
||||
uint64_t cmd_stop = ((stop_time << 3) | CMD_SET_STOP);
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_stop));
|
||||
std::cout << "scope stop time: " << std::dec << stop_time << "s" << std::endl;
|
||||
}
|
||||
|
||||
// start recording
|
||||
uint64_t cmd_delay = ((start_time << 3) | CMD_SET_START);
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_delay));
|
||||
std::cout << "scope start time: " << std::dec << start_time << "s" << std::endl;
|
||||
|
||||
#ifdef HANG_TIMEOUT
|
||||
g_timeout_thread = std::thread(timeout_callback, hfpga);
|
||||
g_timeout_thread.detach();
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int vx_scope_stop(fpga_handle hfpga) {
|
||||
#ifdef HANG_TIMEOUT
|
||||
if (!g_timeout_mutex.try_lock())
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
if (nullptr == hfpga)
|
||||
return -1;
|
||||
|
||||
// forced stop
|
||||
uint64_t cmd_stop = ((0 << 3) | CMD_SET_STOP);
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_stop));
|
||||
|
||||
std::cout << "scope trace dump begin..." << std::endl;
|
||||
|
||||
std::ofstream ofs("trace.vcd");
|
||||
|
||||
ofs << "$version Generated by Vortex Scope $end" << std::endl;
|
||||
ofs << "$timescale 1 ns $end" << std::endl;
|
||||
ofs << "$scope module TOP $end" << std::endl;
|
||||
|
||||
dump_module(ofs, -1);
|
||||
dump_taps(ofs, -1);
|
||||
ofs << "$upscope $end" << std::endl;
|
||||
ofs << "enddefinitions $end" << std::endl;
|
||||
|
||||
uint64_t frame_width, max_frames, data_valid, offset, delta;
|
||||
uint64_t timestamp = 0;
|
||||
uint64_t frame_offset = 0;
|
||||
uint64_t frame_no = 0;
|
||||
int signal_id = 0;
|
||||
int signal_offset = 0;
|
||||
|
||||
// wait for recording to terminate
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID));
|
||||
do {
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
|
||||
if (data_valid)
|
||||
break;
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
} while (true);
|
||||
|
||||
// get frame width
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_WIDTH));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &frame_width));
|
||||
std::cout << "scope::frame_width=" << std::dec << frame_width << std::endl;
|
||||
|
||||
if (fwidth != (int)frame_width) {
|
||||
std::cerr << "invalid frame_width: expecting " << std::dec << fwidth << "!" << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
||||
// get max frames
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_COUNT));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &max_frames));
|
||||
std::cout << "scope::max_frames=" << std::dec << max_frames << std::endl;
|
||||
|
||||
// get offset
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_OFFSET));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &offset));
|
||||
|
||||
// get data
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_DATA));
|
||||
|
||||
// print clock header
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta));
|
||||
timestamp = print_clock(ofs, offset + delta + 2, timestamp);
|
||||
signal_id = num_taps;
|
||||
|
||||
std::vector<char> signal_data(frame_width+1);
|
||||
|
||||
do {
|
||||
if (frame_no == (max_frames-1)) {
|
||||
// verify last frame is valid
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
|
||||
assert(data_valid == 1);
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_DATA));
|
||||
}
|
||||
|
||||
// read next data words
|
||||
uint64_t word;
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &word));
|
||||
|
||||
do {
|
||||
int signal_width = scope_taps[signal_id-1].width;
|
||||
int word_offset = frame_offset % 64;
|
||||
|
||||
signal_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0';
|
||||
|
||||
++signal_offset;
|
||||
++frame_offset;
|
||||
|
||||
if (signal_offset == signal_width) {
|
||||
signal_data[signal_width] = 0; // string null termination
|
||||
ofs << 'b' << signal_data.data() << ' ' << signal_id << std::endl;
|
||||
signal_offset = 0;
|
||||
--signal_id;
|
||||
}
|
||||
|
||||
if (frame_offset == frame_width) {
|
||||
assert(0 == signal_offset);
|
||||
frame_offset = 0;
|
||||
++frame_no;
|
||||
|
||||
if (frame_no != max_frames) {
|
||||
// print clock header
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta));
|
||||
timestamp = print_clock(ofs, delta + 1, timestamp);
|
||||
signal_id = num_taps;
|
||||
if (0 == (frame_no % FRAME_FLUSH_SIZE)) {
|
||||
ofs << std::flush;
|
||||
std::cout << "*** " << frame_no << "/" << max_frames << " frames" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} while ((frame_offset % 64) != 0);
|
||||
|
||||
} while (frame_no != max_frames);
|
||||
|
||||
std::cout << "scope trace dump done! - " << (timestamp/2) << " cycles" << std::endl;
|
||||
|
||||
// verify data not valid
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID));
|
||||
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
|
||||
assert(data_valid == 0);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#if defined(USE_FPGA)
|
||||
#define HANG_TIMEOUT 60
|
||||
#else
|
||||
#define HANG_TIMEOUT (30*60)
|
||||
#endif
|
||||
|
||||
int vx_scope_start(fpga_handle hfpga, uint64_t start_time = 0, uint64_t stop_time = -1);
|
||||
|
||||
int vx_scope_stop(fpga_handle hfpga);
|
2
driver/rtlsim/.gitignore
vendored
2
driver/rtlsim/.gitignore
vendored
|
@ -1,2 +0,0 @@
|
|||
obj_dir
|
||||
*.so
|
|
@ -1,85 +0,0 @@
|
|||
CFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
||||
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CFLAGS += -DUSE_RTLSIM -fPIC -Wno-maybe-uninitialized
|
||||
CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw
|
||||
|
||||
# control RTL debug print states
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
|
||||
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
CFLAGS += $(CONFIGS)
|
||||
CFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
#LDFLAGS += -dynamiclib -pthread
|
||||
|
||||
TOP = Vortex
|
||||
|
||||
RTL_DIR = ../../hw/rtl
|
||||
DPI_DIR = ../../hw/dpi
|
||||
|
||||
SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += verilator.vlt
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += -DVCD_OUTPUT --trace --trace-structs $(DBG_FLAGS)
|
||||
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CFLAGS += -DNDEBUG
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
VL_FLAGS += -DPERF_ENABLE
|
||||
CFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
# ALU backend
|
||||
VL_FLAGS += -DIMUL_DPI
|
||||
VL_FLAGS += -DIDIV_DPI
|
||||
|
||||
# FPU backend
|
||||
FPU_CORE ?= FPU_DPI
|
||||
VL_FLAGS += -D$(FPU_CORE)
|
||||
|
||||
PROJECT = libvortex.so
|
||||
# PROJECT = libvortex.dylib
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
|
||||
make -j -C obj_dir -f V$(TOP).mk
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) obj_dir
|
|
@ -1,64 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
class RAM {
|
||||
private:
|
||||
|
||||
mutable uint8_t *mem_[(1 << 12)];
|
||||
|
||||
uint8_t *get(uint32_t address) const {
|
||||
uint32_t block_addr = address >> 20;
|
||||
uint32_t block_offset = address & 0x000FFFFF;
|
||||
if (mem_[block_addr] == NULL) {
|
||||
mem_[block_addr] = new uint8_t[(1 << 20)];
|
||||
}
|
||||
return mem_[block_addr] + block_offset;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
RAM() {
|
||||
for (uint32_t i = 0; i < (1 << 12); i++) {
|
||||
mem_[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
~RAM() {
|
||||
this->clear();
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return (1ull << 32);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (uint32_t i = 0; i < (1 << 12); i++) {
|
||||
if (mem_[i]) {
|
||||
delete [] mem_[i];
|
||||
mem_[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void read(uint32_t address, uint32_t length, uint8_t *data) const {
|
||||
for (unsigned i = 0; i < length; i++) {
|
||||
data[i] = *this->get(address + i);
|
||||
}
|
||||
}
|
||||
|
||||
void write(uint32_t address, uint32_t length, const uint8_t *data) {
|
||||
for (unsigned i = 0; i < length; i++) {
|
||||
*this->get(address + i) = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t& operator[](uint32_t address) {
|
||||
return *get(address);
|
||||
}
|
||||
|
||||
const uint8_t& operator[](uint32_t address) const {
|
||||
return *get(address);
|
||||
}
|
||||
};
|
|
@ -1,10 +0,0 @@
|
|||
`verilator_config
|
||||
|
||||
lint_off -rule BLKANDNBLK -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNOPTFLAT -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule WIDTH -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNUSED -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule LITENDIAN -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule IMPORTSTAR -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule PINCONNECTEMPTY -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -file "../rtl/fp_cores/fpnew/*"
|
|
@ -1,311 +0,0 @@
|
|||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include <future>
|
||||
#include <chrono>
|
||||
|
||||
#include <vortex.h>
|
||||
#include <VX_config.h>
|
||||
#include <ram.h>
|
||||
#include <simulator.h>
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline size_t align_size(size_t size, size_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return (size + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device;
|
||||
|
||||
class vx_buffer {
|
||||
public:
|
||||
vx_buffer(size_t size, vx_device* device)
|
||||
: size_(size)
|
||||
, device_(device) {
|
||||
auto aligned_asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
data_ = malloc(aligned_asize);
|
||||
}
|
||||
|
||||
~vx_buffer() {
|
||||
if (data_) {
|
||||
free(data_);
|
||||
}
|
||||
}
|
||||
|
||||
void* data() const {
|
||||
return data_;
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return size_;
|
||||
}
|
||||
|
||||
vx_device* device() const {
|
||||
return device_;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t size_;
|
||||
vx_device* device_;
|
||||
void* data_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device {
|
||||
public:
|
||||
vx_device() {
|
||||
mem_allocation_ = ALLOC_BASE_ADDR;
|
||||
}
|
||||
|
||||
~vx_device() {
|
||||
if (future_.valid()) {
|
||||
future_.wait();
|
||||
}
|
||||
}
|
||||
|
||||
int alloc_local_mem(size_t size, size_t* dev_maddr) {
|
||||
auto dev_mem_size = LOCAL_MEM_SIZE;
|
||||
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
if (mem_allocation_ + asize > dev_mem_size)
|
||||
return -1;
|
||||
*dev_maddr = mem_allocation_;
|
||||
mem_allocation_ += asize;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int upload(const void* src, size_t dest_addr, size_t size, size_t src_offset) {
|
||||
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
if (dest_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
/*printf("VXDRV: upload %d bytes from 0x%lx to 0x%lx", size, (uint8_t*)src + src_offset, dest_addr);
|
||||
if (size <= 1024) {
|
||||
printf(": ");
|
||||
for (int i = asize-1; i >= 0; --i) {
|
||||
printf("%x", *((uint8_t*)src + src_offset + i));
|
||||
}
|
||||
}
|
||||
printf("\n");*/
|
||||
|
||||
ram_.write(dest_addr, asize, (const uint8_t*)src + src_offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int download(void* dest, size_t src_addr, size_t size, size_t dest_offset) {
|
||||
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
if (src_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
ram_.read(src_addr, asize, (uint8_t*)dest + dest_offset);
|
||||
|
||||
/*printf("VXDRV: download %d bytes from 0x%lx to 0x%lx", size, src_addr, (uint8_t*)dest + dest_offset);
|
||||
if (size <= 1024) {
|
||||
printf(": ");
|
||||
for (int i = asize-1; i >= 0; --i) {
|
||||
printf("%x", *((uint8_t*)dest + dest_offset + i));
|
||||
}
|
||||
}
|
||||
printf("\n");*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int start() {
|
||||
if (future_.valid()) {
|
||||
future_.wait(); // ensure prior run completed
|
||||
}
|
||||
simulator_.attach_ram(&ram_);
|
||||
future_ = std::async(std::launch::async, [&]{
|
||||
simulator_.reset();
|
||||
while (simulator_.is_busy()) {
|
||||
simulator_.step();
|
||||
}
|
||||
});
|
||||
return 0;
|
||||
}
|
||||
|
||||
int wait(long long timeout) {
|
||||
if (!future_.valid())
|
||||
return 0;
|
||||
auto timeout_sec = (timeout < 0) ? timeout : (timeout / 1000);
|
||||
std::chrono::seconds wait_time(1);
|
||||
for (;;) {
|
||||
auto status = future_.wait_for(wait_time); // wait for 1 sec and check status
|
||||
if (status == std::future_status::ready
|
||||
|| 0 == timeout_sec--)
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
size_t mem_allocation_;
|
||||
RAM ram_;
|
||||
Simulator simulator_;
|
||||
std::future<void> future_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
switch (caps_id) {
|
||||
case VX_CAPS_VERSION:
|
||||
*value = IMPLEMENTATION_ID;
|
||||
break;
|
||||
case VX_CAPS_MAX_CORES:
|
||||
*value = NUM_CORES * NUM_CLUSTERS;
|
||||
break;
|
||||
case VX_CAPS_MAX_WARPS:
|
||||
*value = NUM_WARPS;
|
||||
break;
|
||||
case VX_CAPS_MAX_THREADS:
|
||||
*value = NUM_THREADS;
|
||||
break;
|
||||
case VX_CAPS_CACHE_LINE_SIZE:
|
||||
*value = CACHE_BLOCK_SIZE;
|
||||
break;
|
||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
*value = 0xffffffff;
|
||||
break;
|
||||
case VX_CAPS_ALLOC_BASE_ADDR:
|
||||
*value = 0x10000000;
|
||||
break;
|
||||
case VX_CAPS_KERNEL_BASE_ADDR:
|
||||
*value = STARTUP_ADDR;
|
||||
break;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
std::abort();
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_dev_open(vx_device_h* hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
*hdevice = new vx_device();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_dev_close(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
vx_dump_perf(device, stdout);
|
||||
#endif
|
||||
|
||||
delete device;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
||||
if (nullptr == hdevice
|
||||
|| nullptr == dev_maddr
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
return device->alloc_local_mem(size, dev_maddr);
|
||||
}
|
||||
|
||||
|
||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
|
||||
if (nullptr == hdevice
|
||||
|| 0 >= size
|
||||
|| nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
auto buffer = new vx_buffer(size, device);
|
||||
if (nullptr == buffer->data()) {
|
||||
delete buffer;
|
||||
return -1;
|
||||
}
|
||||
|
||||
*hbuffer = buffer;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern void* vx_host_ptr(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return nullptr;
|
||||
|
||||
vx_buffer* buffer = ((vx_buffer*)hbuffer);
|
||||
|
||||
return buffer->data();
|
||||
}
|
||||
|
||||
extern int vx_buf_release(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_buffer* buffer = ((vx_buffer*)hbuffer);
|
||||
|
||||
delete buffer;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
auto buffer = (vx_buffer*)hbuffer;
|
||||
|
||||
if (size + src_offset > buffer->size())
|
||||
return -1;
|
||||
|
||||
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
|
||||
}
|
||||
|
||||
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
auto buffer = (vx_buffer*)hbuffer;
|
||||
|
||||
if (size + dest_offset > buffer->size())
|
||||
return -1;
|
||||
|
||||
return buffer->device()->download(buffer->data(), dev_maddr, size, dest_offset);
|
||||
}
|
||||
|
||||
extern int vx_start(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->start();
|
||||
}
|
||||
|
||||
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->wait(timeout);
|
||||
}
|
2
driver/simx/.gitignore
vendored
2
driver/simx/.gitignore
vendored
|
@ -1,2 +0,0 @@
|
|||
obj_dir
|
||||
libvortex.so
|
|
@ -1,37 +0,0 @@
|
|||
PROJECT = libvortex.so
|
||||
#PROJECT = libvortex.dylib
|
||||
|
||||
SIMX_DIR = ../../simX
|
||||
|
||||
CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
|
||||
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -DUSE_SIMX -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I../include -I../../hw -I$(SIMX_DIR)
|
||||
|
||||
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
#LDFLAGS += -dynamiclib -pthread
|
||||
|
||||
SRCS = vortex.cpp ../common/vx_utils.cpp
|
||||
SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/pipeline.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp
|
||||
|
||||
# Debugigng
|
||||
ifndef DEBUG
|
||||
CXXFLAGS += -DNDEBUG
|
||||
endif
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o .depend
|
|
@ -1,379 +0,0 @@
|
|||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <chrono>
|
||||
|
||||
#include <vortex.h>
|
||||
#include <core.h>
|
||||
#include <VX_config.h>
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
inline size_t align_size(size_t size, size_t alignment) {
|
||||
assert(0 == (alignment & (alignment - 1)));
|
||||
return (size + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device;
|
||||
|
||||
class vx_buffer {
|
||||
public:
|
||||
vx_buffer(size_t size, vx_device* device)
|
||||
: size_(size)
|
||||
, device_(device) {
|
||||
auto aligned_asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
data_ = malloc(aligned_asize);
|
||||
}
|
||||
|
||||
~vx_buffer() {
|
||||
if (data_) {
|
||||
free(data_);
|
||||
}
|
||||
}
|
||||
|
||||
void* data() const {
|
||||
return data_;
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return size_;
|
||||
}
|
||||
|
||||
vx_device* device() const {
|
||||
return device_;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t size_;
|
||||
vx_device* device_;
|
||||
void* data_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class vx_device {
|
||||
public:
|
||||
vx_device()
|
||||
: arch_("rv32i", NUM_CORES, NUM_WARPS, NUM_THREADS)
|
||||
, decoder_(arch_)
|
||||
, mmu_(PAGE_SIZE, arch_.wsize(), true)
|
||||
, cores_(arch_.num_cores())
|
||||
, is_done_(false)
|
||||
, is_running_(false)
|
||||
, thread_(__thread_proc__, this)
|
||||
, ram_((1<<12), (1<<20)) {
|
||||
|
||||
mem_allocation_ = ALLOC_BASE_ADDR;
|
||||
mmu_.attach(ram_, 0, 0xffffffff);
|
||||
for (int i = 0; i < arch_.num_cores(); ++i) {
|
||||
cores_[i] = std::make_shared<vortex::Core>(arch_, decoder_, mmu_, i);
|
||||
}
|
||||
}
|
||||
|
||||
~vx_device() {
|
||||
mutex_.lock();
|
||||
is_done_ = true;
|
||||
mutex_.unlock();
|
||||
|
||||
thread_.join();
|
||||
}
|
||||
|
||||
int alloc_local_mem(size_t size, size_t* dev_maddr) {
|
||||
auto dev_mem_size = LOCAL_MEM_SIZE;
|
||||
auto asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
if (mem_allocation_ + asize > dev_mem_size)
|
||||
return -1;
|
||||
*dev_maddr = mem_allocation_;
|
||||
mem_allocation_ += asize;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int upload(const void* src, size_t dest_addr, size_t size, size_t src_offset) {
|
||||
auto asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
if (dest_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
ram_.write(dest_addr, (const uint8_t*)src + src_offset, asize);
|
||||
|
||||
/*printf("VXDRV: upload %d bytes to 0x%x\n", size, dest_addr);
|
||||
for (int i = 0; i < size; i += 4) {
|
||||
printf("mem-write: 0x%x <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + src_offset + i));
|
||||
}*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int download(void* dest, size_t src_addr, size_t size, size_t dest_offset) {
|
||||
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
|
||||
if (src_addr + asize > ram_.size())
|
||||
return -1;
|
||||
|
||||
ram_.read(src_addr, (uint8_t*)dest + dest_offset, asize);
|
||||
|
||||
/*printf("VXDRV: download %d bytes from 0x%x\n", size, src_addr);
|
||||
for (int i = 0; i < size; i += 4) {
|
||||
printf("mem-read: 0x%x -> 0x%x\n", src_addr + i, *(uint32_t*)((uint8_t*)dest + dest_offset + i));
|
||||
}*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int start() {
|
||||
|
||||
mutex_.lock();
|
||||
for (int i = 0; i < arch_.num_cores(); ++i) {
|
||||
cores_[i]->clear();
|
||||
}
|
||||
is_running_ = true;
|
||||
mutex_.unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int wait(long long timeout) {
|
||||
auto timeout_sec = (timeout < 0) ? timeout : (timeout / 1000);
|
||||
for (;;) {
|
||||
mutex_.lock();
|
||||
bool is_running = is_running_;
|
||||
mutex_.unlock();
|
||||
|
||||
if (!is_running || 0 == timeout_sec--)
|
||||
break;
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_csr(int core_id, int addr, unsigned *value) {
|
||||
*value = cores_.at(core_id)->get_csr(addr, 0, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int set_csr(int core_id, int addr, unsigned value) {
|
||||
cores_.at(core_id)->set_csr(addr, value, 0, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void run() {
|
||||
bool running;
|
||||
do {
|
||||
running = false;
|
||||
for (auto& core : cores_) {
|
||||
core->step();
|
||||
if (core->running())
|
||||
running = true;
|
||||
}
|
||||
} while (running);
|
||||
}
|
||||
|
||||
void thread_proc() {
|
||||
std::cout << "Device ready..." << std::flush << std::endl;
|
||||
|
||||
for (;;) {
|
||||
mutex_.lock();
|
||||
bool is_done = is_done_;
|
||||
bool is_running = is_running_;
|
||||
mutex_.unlock();
|
||||
|
||||
if (is_done)
|
||||
break;
|
||||
|
||||
if (is_running) {
|
||||
std::cout << "Device running..." << std::flush << std::endl;
|
||||
|
||||
this->run();
|
||||
|
||||
mutex_.lock();
|
||||
is_running_ = false;
|
||||
mutex_.unlock();
|
||||
|
||||
std::cout << "Device ready..." << std::flush << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Device shutdown..." << std::flush << std::endl;
|
||||
}
|
||||
|
||||
static void __thread_proc__(vx_device* device) {
|
||||
device->thread_proc();
|
||||
}
|
||||
|
||||
vortex::ArchDef arch_;
|
||||
vortex::Decoder decoder_;
|
||||
vortex::MemoryUnit mmu_;
|
||||
std::vector<std::shared_ptr<vortex::Core>> cores_;
|
||||
bool is_done_;
|
||||
bool is_running_;
|
||||
size_t mem_allocation_;
|
||||
std::thread thread_;
|
||||
vortex::RAM ram_;
|
||||
std::mutex mutex_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern int vx_dev_open(vx_device_h* hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
*hdevice = new vx_device();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_dev_close(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
#ifdef DUMP_PERF_STATS
|
||||
vx_dump_perf(device, stdout);
|
||||
#endif
|
||||
|
||||
delete device;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
switch (caps_id) {
|
||||
case VX_CAPS_VERSION:
|
||||
*value = IMPLEMENTATION_ID;
|
||||
break;
|
||||
case VX_CAPS_MAX_CORES:
|
||||
*value = NUM_CORES * NUM_CLUSTERS;
|
||||
break;
|
||||
case VX_CAPS_MAX_WARPS:
|
||||
*value = NUM_WARPS;
|
||||
break;
|
||||
case VX_CAPS_MAX_THREADS:
|
||||
*value = NUM_THREADS;
|
||||
break;
|
||||
case VX_CAPS_CACHE_LINE_SIZE:
|
||||
*value = CACHE_BLOCK_SIZE;
|
||||
break;
|
||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
*value = LOCAL_MEM_SIZE;
|
||||
break;
|
||||
case VX_CAPS_ALLOC_BASE_ADDR:
|
||||
*value = ALLOC_BASE_ADDR;
|
||||
break;
|
||||
case VX_CAPS_KERNEL_BASE_ADDR:
|
||||
*value = STARTUP_ADDR;
|
||||
break;
|
||||
default:
|
||||
std::cout << "invalid caps id: " << caps_id << std::endl;
|
||||
std::abort();
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
|
||||
if (nullptr == hdevice
|
||||
|| nullptr == dev_maddr
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
return device->alloc_local_mem(size, dev_maddr);
|
||||
}
|
||||
|
||||
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
|
||||
if (nullptr == hdevice
|
||||
|| 0 >= size
|
||||
|| nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
auto buffer = new vx_buffer(size, device);
|
||||
if (nullptr == buffer->data()) {
|
||||
delete buffer;
|
||||
return -1;
|
||||
}
|
||||
|
||||
*hbuffer = buffer;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern void* vx_host_ptr(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return nullptr;
|
||||
|
||||
vx_buffer* buffer = ((vx_buffer*)hbuffer);
|
||||
|
||||
return buffer->data();
|
||||
}
|
||||
|
||||
extern int vx_buf_release(vx_buffer_h hbuffer) {
|
||||
if (nullptr == hbuffer)
|
||||
return -1;
|
||||
|
||||
vx_buffer* buffer = ((vx_buffer*)hbuffer);
|
||||
|
||||
delete buffer;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
auto buffer = (vx_buffer*)hbuffer;
|
||||
|
||||
if (size + src_offset > buffer->size())
|
||||
return -1;
|
||||
|
||||
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
|
||||
}
|
||||
|
||||
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
|
||||
if (nullptr == hbuffer
|
||||
|| 0 >= size)
|
||||
return -1;
|
||||
|
||||
auto buffer = (vx_buffer*)hbuffer;
|
||||
|
||||
if (size + dest_offset > buffer->size())
|
||||
return -1;
|
||||
|
||||
return buffer->device()->download(buffer->data(), dev_maddr, size, dest_offset);
|
||||
}
|
||||
|
||||
extern int vx_start(vx_device_h hdevice) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->start();
|
||||
}
|
||||
|
||||
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->wait(timeout);
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
CXXFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../include -I../../runtime -I../../hw
|
||||
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
|
||||
SRCS = vortex.cpp ../common/vx_utils.cpp
|
||||
|
||||
PROJECT = libvortex.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) obj_dir
|
|
@ -1,45 +0,0 @@
|
|||
#include <vortex.h>
|
||||
|
||||
extern int vx_dev_open(vx_device_h* /*hdevice*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_dev_close(vx_device_h /*hdevice*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_dev_caps(vx_device_h /*hdevice*/, unsigned /*caps_id*/, unsigned* /*value*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_alloc_dev_mem(vx_device_h /*hdevice*/, size_t /*size*/, size_t* /*dev_maddr*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_alloc_shared_mem(vx_device_h /*hdevice*/, size_t /*size*/, vx_buffer_h* /*hbuffer*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern void* vx_host_ptr(vx_buffer_h /*hbuffer*/) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
extern int vx_buf_release(vx_buffer_h /*hbuffer*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_copy_to_dev(vx_buffer_h /*hbuffer*/, size_t /*dev_maddr*/, size_t /*size*/, size_t /*src_offset*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_copy_from_dev(vx_buffer_h /*hbuffer*/, size_t /*dev_maddr*/, size_t /*size*/, size_t /*dest_offset*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_start(vx_device_h /*hdevice*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_ready_wait(vx_device_h /*hdevice*/, long long /*timeout*/) {
|
||||
return -1;
|
||||
}
|
1
hw/.gitignore
vendored
1
hw/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
obj_dir/*
|
25
hw/Makefile
25
hw/Makefile
|
@ -1,9 +1,22 @@
|
|||
.PHONY: build_config
|
||||
ROOT_DIR := $(realpath ..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
build_config: ./rtl/VX_config.vh
|
||||
./scripts/gen_config.py -i ./rtl/VX_config.vh -o ./VX_config.h
|
||||
$(MAKE) -C simulate
|
||||
HW_DIR := $(VORTEX_HOME)/hw
|
||||
SCRIPT_DIR := $(HW_DIR)/scripts
|
||||
RTL_DIR := $(HW_DIR)/rtl
|
||||
|
||||
all: config
|
||||
|
||||
config: VX_config.h VX_types.h
|
||||
|
||||
VX_config.h: $(RTL_DIR)/VX_config.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_config.vh -o VX_config.h
|
||||
|
||||
VX_types.h: $(RTL_DIR)/VX_types.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_types.vh -o VX_types.h
|
||||
|
||||
clean:
|
||||
rm -f ./VX_config.h
|
||||
$(MAKE) -C simulate clean
|
||||
$(MAKE) -C unittest clean
|
||||
rm -f VX_config.h VX_types.h
|
||||
|
||||
.PHONY: VX_config.h VX_types.h
|
2
hw/configs/.gitignore
vendored
2
hw/configs/.gitignore
vendored
|
@ -1,2 +0,0 @@
|
|||
*.v
|
||||
*.sh
|
|
@ -1,296 +1,339 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <iostream>
|
||||
#include <rvfloats.h>
|
||||
#include <util.h>
|
||||
#include "svdpi.h"
|
||||
#include "verilated_vpi.h"
|
||||
#include "VX_config.h"
|
||||
|
||||
extern "C" {
|
||||
void dpi_fadd(int a, int b, int frm, int* result, int* fflags);
|
||||
void dpi_fsub(int a, int b, int frm, int* result, int* fflags);
|
||||
void dpi_fmul(int a, int b, int frm, int* result, int* fflags);
|
||||
void dpi_fmadd(int a, int b, int c, int frm, int* result, int* fflags);
|
||||
void dpi_fmsub(int a, int b, int c, int frm, int* result, int* fflags);
|
||||
void dpi_fnmadd(int a, int b, int c, int frm, int* result, int* fflags);
|
||||
void dpi_fnmsub(int a, int b, int c, int frm, int* result, int* fflags);
|
||||
void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fmul(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
|
||||
void dpi_fdiv(int a, int b, int frm, int* result, int* fflags);
|
||||
void dpi_fsqrt(int a, int frm, int* result, int* fflags);
|
||||
|
||||
void dpi_ftoi(int a, int frm, int* result, int* fflags);
|
||||
void dpi_ftou(int a, int frm, int* result, int* fflags);
|
||||
void dpi_itof(int a, int frm, int* result, int* fflags);
|
||||
void dpi_utof(int a, int frm, int* result, int* fflags);
|
||||
void dpi_fdiv(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fsqrt(bool enable, int dst_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
|
||||
void dpi_fclss(int a, int* result);
|
||||
void dpi_fsgnj(int a, int b, int* result);
|
||||
void dpi_fsgnjn(int a, int b, int* result);
|
||||
void dpi_fsgnjx(int a, int b, int* result);
|
||||
void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_ftou(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result);
|
||||
|
||||
void dpi_flt(int a, int b, int* result, int* fflags);
|
||||
void dpi_fle(int a, int b, int* result, int* fflags);
|
||||
void dpi_feq(int a, int b, int* result, int* fflags);
|
||||
void dpi_fmin(int a, int b, int* result, int* fflags);
|
||||
void dpi_fmax(int a, int b, int* result, int* fflags);
|
||||
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result);
|
||||
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
|
||||
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
|
||||
void dpi_fsgnjx(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
|
||||
|
||||
void dpi_flt(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fle(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_feq(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fmin(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
|
||||
void dpi_fmax(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
|
||||
}
|
||||
|
||||
union Float_t {
|
||||
float f;
|
||||
int i;
|
||||
struct {
|
||||
uint32_t man : 23;
|
||||
uint32_t exp : 8;
|
||||
uint32_t sign : 1;
|
||||
} parts;
|
||||
};
|
||||
|
||||
void dpi_fadd(int a, int b, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f + fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
inline uint64_t nan_box(uint32_t value) {
|
||||
#ifdef XLEN_64
|
||||
return value | 0xffffffff00000000;
|
||||
#else
|
||||
return value;
|
||||
#endif
|
||||
}
|
||||
|
||||
void dpi_fsub(int a, int b, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f - fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
inline bool is_nan_boxed(uint64_t value) {
|
||||
#ifdef XLEN_64
|
||||
return (uint32_t(value >> 32) == 0xffffffff);
|
||||
#else
|
||||
__unused (value);
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
void dpi_fmul(int a, int b, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f * fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
inline int64_t check_boxing(int64_t a) {
|
||||
if (is_nan_boxed(a))
|
||||
return a;
|
||||
return nan_box(0x7fc00000); // NaN
|
||||
}
|
||||
|
||||
void dpi_fmadd(int a, int b, int c, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fc, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fc.i = c;
|
||||
fr.f = fa.f * fb.f + fc.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fadd_d(a, b, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fadd_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fmsub(int a, int b, int c, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fc, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fc.i = c;
|
||||
fr.f = fa.f * fb.f - fc.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsub_d(a, b, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fsub_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fnmadd(int a, int b, int c, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fc, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fc.i = c;
|
||||
fr.f = -(fa.f * fb.f + fc.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fmul(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmul_d(a, b, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmul_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fnmsub(int a, int b, int c, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fc, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fc.i = c;
|
||||
fr.f = -(fa.f * fb.f - fc.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmadd_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fdiv(int a, int b, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f / fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmsub_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsqrt(int a, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fr;
|
||||
|
||||
fa.i = a;
|
||||
fr.f = sqrtf(fa.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fnmadd_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fnmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_ftoi(int a, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fr;
|
||||
|
||||
fa.i = a;
|
||||
fr.i = int(fa.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fnmsub_d(a, b, c, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fnmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_ftou(int a, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fr;
|
||||
|
||||
fa.i = a;
|
||||
fr.i = unsigned(fa.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fdiv(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fdiv_d(a, b, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fdiv_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_itof(int a, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fr;
|
||||
|
||||
fr.f = (float)a;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
void dpi_fsqrt(bool enable, int dst_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsqrt_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fsqrt_s(check_boxing(a), (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_utof(int a, int frm, int* result, int* fflags) {
|
||||
Float_t fa, fr;
|
||||
|
||||
unsigned ua = a;
|
||||
fr.f = (float)ua;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_flt(int a, int b, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f < fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_fle(int a, int b, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f <= fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_feq(int a, int b, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = fa.f == fb.f;
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_fmin(int a, int b, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = std::min<float>(fa.f, fb.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_fmax(int a, int b, int* result, int* fflags) {
|
||||
Float_t fa, fb, fr;
|
||||
|
||||
fa.i = a;
|
||||
fb.i = b;
|
||||
fr.f = std::max<float>(fa.f, fb.f);
|
||||
|
||||
*result = fr.i;
|
||||
*fflags = 0;
|
||||
}
|
||||
|
||||
void dpi_fclss(int a, int* result) {
|
||||
|
||||
int r = 0; // clear all bits
|
||||
|
||||
bool fsign = (a >> 31);
|
||||
uint32_t expo = (a >> 23) & 0xFF;
|
||||
uint32_t fraction = a & 0x7FFFFF;
|
||||
|
||||
if ((expo == 0) && (fraction == 0)) {
|
||||
r = fsign ? (1 << 3) : (1 << 4); // +/- 0
|
||||
} else if ((expo == 0) && (fraction != 0)) {
|
||||
r = fsign ? (1 << 2) : (1 << 5); // +/- subnormal
|
||||
} else if ((expo == 0xFF) && (fraction == 0)) {
|
||||
r = fsign ? (1<<0) : (1<<7); // +/- infinity
|
||||
} else if ((expo == 0xFF ) && (fraction != 0)) {
|
||||
if (!fsign && (fraction == 0x00400000)) {
|
||||
r = (1 << 9); // quiet NaN
|
||||
} else {
|
||||
r = (1 << 8); // signaling NaN
|
||||
void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_ftol_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = rv_ftol_s(check_boxing(a), (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
r = fsign ? (1 << 1) : (1 << 6); // +/- normal
|
||||
if (src_fmt) {
|
||||
*result = sext<uint64_t>(rv_ftoi_d(a, (*frm & 0x7), fflags), 32);
|
||||
} else {
|
||||
*result = sext<uint64_t>(rv_ftoi_s(check_boxing(a), (*frm & 0x7), fflags), 32);
|
||||
}
|
||||
}
|
||||
|
||||
*result = r;
|
||||
}
|
||||
|
||||
void dpi_fsgnj(int a, int b, int* result) {
|
||||
|
||||
int sign = b & 0x80000000;
|
||||
int r = sign | (a & 0x7FFFFFFF);
|
||||
|
||||
*result = r;
|
||||
void dpi_ftou(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_ftolu_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = rv_ftolu_s(check_boxing(a), (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
*result = sext<uint64_t>(rv_ftou_d(a, (*frm & 0x7), fflags), 32);
|
||||
} else {
|
||||
*result = sext<uint64_t>(rv_ftou_s(check_boxing(a), (*frm & 0x7), fflags), 32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsgnjn(int a, int b, int* result) {
|
||||
|
||||
int sign = ~b & 0x80000000;
|
||||
int r = sign | (a & 0x7FFFFFFF);
|
||||
|
||||
*result = r;
|
||||
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_ltof_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = rv_itof_d(a, (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
*result = nan_box(rv_ltof_s(a, (*frm & 0x7), fflags));
|
||||
} else {
|
||||
*result = nan_box(rv_itof_s(a, (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsgnjx(int a, int b, int* result) {
|
||||
|
||||
int sign1 = a & 0x80000000;
|
||||
int sign2 = b & 0x80000000;
|
||||
int r = (sign1 ^ sign2) | (a & 0x7FFFFFFF);
|
||||
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
if (src_fmt) {
|
||||
*result = rv_lutof_d(a, (*frm & 0x7), fflags);
|
||||
} else {
|
||||
*result = rv_utof_d(a, (*frm & 0x7), fflags);
|
||||
}
|
||||
} else {
|
||||
if (src_fmt) {
|
||||
*result = nan_box(rv_lutof_s(a, (*frm & 0x7), fflags));
|
||||
} else {
|
||||
*result = nan_box(rv_utof_s(a, (*frm & 0x7), fflags));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*result = r;
|
||||
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_ftod((int32_t)check_boxing(a));
|
||||
} else {
|
||||
*result = nan_box(rv_dtof(a));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fclss_d(a);
|
||||
} else {
|
||||
*result = rv_fclss_s(check_boxing(a));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnj_d(a, b);
|
||||
} else {
|
||||
*result = nan_box(rv_fsgnj_s(check_boxing(a), check_boxing(b)));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnjn_d(a, b);
|
||||
} else {
|
||||
*result = nan_box(rv_fsgnjn_s(check_boxing(a), check_boxing(b)));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fsgnjx(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fsgnjx_d(a, b);
|
||||
} else {
|
||||
*result = nan_box(rv_fsgnjx_s(check_boxing(a), check_boxing(b)));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_flt(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_flt_d(a, b, fflags);
|
||||
} else {
|
||||
*result = rv_flt_s(check_boxing(a), check_boxing(b), fflags);
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fle(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fle_d(a, b, fflags);
|
||||
} else {
|
||||
*result = rv_fle_s(check_boxing(a), check_boxing(b), fflags);
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_feq(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_feq_d(a, b, fflags);
|
||||
} else {
|
||||
*result = rv_feq_s(check_boxing(a), check_boxing(b), fflags);
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fmin(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmin_d(a, b, fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmin_s(check_boxing(a), check_boxing(b), fflags));
|
||||
}
|
||||
}
|
||||
|
||||
void dpi_fmax(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
|
||||
if (!enable)
|
||||
return;
|
||||
if (dst_fmt) {
|
||||
*result = rv_fmax_d(a, b, fflags);
|
||||
} else {
|
||||
*result = nan_box(rv_fmax_s(check_boxing(a), check_boxing(b), fflags));
|
||||
}
|
||||
}
|
|
@ -1,31 +1,45 @@
|
|||
`ifndef FLOAT_DPI
|
||||
`define FLOAT_DPI
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import "DPI-C" context function void dpi_fadd(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fsub(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fmul(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fmadd(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fmsub(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fnmadd(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fnmsub(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
`ifndef FLOAT_DPI_VH
|
||||
`define FLOAT_DPI_VH
|
||||
|
||||
import "DPI-C" context function void dpi_fdiv(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fsqrt(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fadd(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fsub(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmul(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmadd(input logic enable, input int dst_fmt, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmsub(input logic enable, input int dst_fmt, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fnmadd(input logic enable, input int dst_fmt, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fnmsub(input logic enable, input int dst_fmt, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
|
||||
import "DPI-C" context function void dpi_ftoi(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_ftou(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_itof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_utof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fdiv(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fsqrt(input logic enable, input int dst_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
|
||||
import "DPI-C" context function void dpi_fclss(input int a, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnj(input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnjn(input int a, input int b, output int result);
|
||||
import "DPI-C" context function void dpi_fsgnjx(input int a, input int b, output int result);
|
||||
import "DPI-C" function void dpi_ftoi(input logic enable, input int dst_fmt, input int src_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_ftou(input logic enable, input int dst_fmt, input int src_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_itof(input logic enable, input int dst_fmt, input int src_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_utof(input logic enable, input int dst_fmt, input int src_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_f2f(input logic enable, input int dst_fmt, input longint a, output longint result);
|
||||
|
||||
import "DPI-C" context function void dpi_flt(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fle(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_feq(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fmin(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" context function void dpi_fmax(input int a, input int b, output int result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fclss(input logic enable, input int dst_fmt, input longint a, output longint result);
|
||||
import "DPI-C" function void dpi_fsgnj(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result);
|
||||
import "DPI-C" function void dpi_fsgnjn(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result);
|
||||
import "DPI-C" function void dpi_fsgnjx(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result);
|
||||
|
||||
`endif
|
||||
import "DPI-C" function void dpi_flt(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fle(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_feq(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmin(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
|
||||
import "DPI-C" function void dpi_fmax(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
|
||||
|
||||
`endif
|
||||
|
|
|
@ -1,21 +1,57 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <iostream>
|
||||
|
||||
#include "svdpi.h"
|
||||
#include "verilated_vpi.h"
|
||||
#include "VX_config.h"
|
||||
|
||||
#ifdef XLEN_64
|
||||
#define iword_t int64_t
|
||||
#define uword_t uint64_t
|
||||
#define idword_t __int128_t
|
||||
#define udword_t __uint128_t
|
||||
#else
|
||||
#define iword_t int32_t
|
||||
#define uword_t uint32_t
|
||||
#define idword_t int64_t
|
||||
#define udword_t uint64_t
|
||||
#endif
|
||||
|
||||
#ifndef DEBUG_LEVEL
|
||||
#define DEBUG_LEVEL 3
|
||||
#endif
|
||||
|
||||
extern "C" {
|
||||
void dpi_imul(int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth);
|
||||
void dpi_idiv(int a, int b, bool is_signed, int* quotient, int* remainder);
|
||||
void dpi_imul(bool enable, bool is_signed_a, bool is_signed_b, iword_t a, iword_t b, iword_t* resultl, iword_t* resulth);
|
||||
void dpi_idiv(bool enable, bool is_signed, iword_t a, iword_t b, iword_t* quotient, iword_t* remainder);
|
||||
|
||||
int dpi_register();
|
||||
void dpi_assert(int inst, bool cond, int delay);
|
||||
|
||||
void dpi_trace(int level, const char* format, ...);
|
||||
void dpi_trace_start();
|
||||
void dpi_trace_stop();
|
||||
}
|
||||
|
||||
bool sim_trace_enabled();
|
||||
void sim_trace_enable(bool enable);
|
||||
|
||||
class ShiftRegister {
|
||||
public:
|
||||
ShiftRegister() : init_(false), depth_(0) {}
|
||||
|
@ -30,7 +66,7 @@ public:
|
|||
|
||||
void push(int value, bool enable) {
|
||||
if (!enable)
|
||||
return;
|
||||
return;
|
||||
for (unsigned i = 0; i < depth_-1; ++i) {
|
||||
buffer_[i] = buffer_[i+1];
|
||||
}
|
||||
|
@ -45,7 +81,7 @@ private:
|
|||
|
||||
std::vector<int> buffer_;
|
||||
bool init_;
|
||||
unsigned depth_;
|
||||
unsigned depth_;
|
||||
};
|
||||
|
||||
class Instances {
|
||||
|
@ -55,9 +91,9 @@ public:
|
|||
}
|
||||
|
||||
int allocate() {
|
||||
mutex_.lock();
|
||||
mutex_.lock();
|
||||
int inst = instances_.size();
|
||||
instances_.resize(inst + 1);
|
||||
instances_.resize(inst + 1);
|
||||
mutex_.unlock();
|
||||
return inst;
|
||||
}
|
||||
|
@ -86,45 +122,56 @@ void dpi_assert(int inst, bool cond, int delay) {
|
|||
}
|
||||
}
|
||||
|
||||
void dpi_imul(int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth) {
|
||||
uint64_t first = a;
|
||||
uint64_t second = b;
|
||||
|
||||
if (is_signed_a && (a & 0x80000000)) {
|
||||
first |= 0xFFFFFFFF00000000;
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void dpi_imul(bool enable, bool is_signed_a, bool is_signed_b, iword_t a, iword_t b, iword_t* resultl, iword_t* resulth) {
|
||||
if (!enable)
|
||||
return;
|
||||
udword_t first = *(uword_t*)&a;
|
||||
udword_t second = *(uword_t*)&b;
|
||||
|
||||
udword_t mask = udword_t(-1) << (8 * sizeof(iword_t));
|
||||
|
||||
if (is_signed_a && a < 0) {
|
||||
first |= mask;
|
||||
}
|
||||
|
||||
if (is_signed_b && (b & 0x80000000)) {
|
||||
second |= 0xFFFFFFFF00000000;
|
||||
if (is_signed_b && b < 0) {
|
||||
second |= mask;
|
||||
}
|
||||
|
||||
uint64_t result;
|
||||
udword_t result;
|
||||
if (is_signed_a || is_signed_b) {
|
||||
result = (int64_t)first * (int64_t)second;
|
||||
result = idword_t(first) * idword_t(second);
|
||||
} else {
|
||||
result = first * second;
|
||||
}
|
||||
|
||||
*resultl = result & 0xFFFFFFFF;
|
||||
*resulth = (result >> 32) & 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
*resultl = iword_t(result);
|
||||
*resulth = iword_t(result >> (8 * sizeof(iword_t)));
|
||||
}
|
||||
|
||||
void dpi_idiv(int a, int b, bool is_signed, int* quotient, int* remainder) {
|
||||
uint32_t dividen = a;
|
||||
uint32_t divisor = b;
|
||||
void dpi_idiv(bool enable, bool is_signed, iword_t a, iword_t b, iword_t* quotient, iword_t* remainder) {
|
||||
if (!enable)
|
||||
return;
|
||||
|
||||
uword_t dividen = a;
|
||||
uword_t divisor = b;
|
||||
|
||||
auto inf_neg = uword_t(1) << (8 * sizeof(iword_t) - 1);
|
||||
|
||||
if (is_signed) {
|
||||
if (b == 0) {
|
||||
*quotient = -1;
|
||||
*remainder = dividen;
|
||||
} else if (dividen == 0x80000000 && divisor == 0xffffffff) {
|
||||
} else if (dividen == inf_neg && divisor == -1) {
|
||||
*remainder = 0;
|
||||
*quotient = dividen;
|
||||
} else {
|
||||
*quotient = (int32_t)dividen / (int32_t)divisor;
|
||||
*remainder = (int32_t)dividen % (int32_t)divisor;
|
||||
} else {
|
||||
*quotient = (iword_t)dividen / (iword_t)divisor;
|
||||
*remainder = (iword_t)dividen % (iword_t)divisor;
|
||||
}
|
||||
} else {
|
||||
} else {
|
||||
if (b == 0) {
|
||||
*quotient = -1;
|
||||
*remainder = dividen;
|
||||
|
@ -133,4 +180,25 @@ void dpi_idiv(int a, int b, bool is_signed, int* quotient, int* remainder) {
|
|||
*remainder = dividen % divisor;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void dpi_trace(int level, const char* format, ...) {
|
||||
if (level > DEBUG_LEVEL)
|
||||
return;
|
||||
if (!sim_trace_enabled())
|
||||
return;
|
||||
va_list va;
|
||||
va_start(va, format);
|
||||
vprintf(format, va);
|
||||
va_end(va);
|
||||
}
|
||||
|
||||
void dpi_trace_start() {
|
||||
sim_trace_enable(true);
|
||||
}
|
||||
|
||||
void dpi_trace_stop() {
|
||||
sim_trace_enable(false);
|
||||
}
|
||||
|
|
|
@ -1,10 +1,33 @@
|
|||
`ifndef UTIL_DPI
|
||||
`define UTIL_DPI
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import "DPI-C" context function void dpi_imul(input int a, input int b, input logic is_signed_a, input logic is_signed_b, output int resultl, output int resulth);
|
||||
import "DPI-C" context function void dpi_idiv(input int a, input int b, input logic is_signed, output int quotient, output int remainder);
|
||||
`ifndef UTIL_DPI_VH
|
||||
`define UTIL_DPI_VH
|
||||
|
||||
import "DPI-C" context function int dpi_register();
|
||||
import "DPI-C" context function void dpi_assert(int inst, input logic cond, input int delay);
|
||||
`ifdef XLEN_64
|
||||
`define INT_TYPE longint
|
||||
`else
|
||||
`define INT_TYPE int
|
||||
`endif
|
||||
|
||||
`endif
|
||||
import "DPI-C" function void dpi_imul(input logic enable, input logic is_signed_a, input logic is_signed_b, input `INT_TYPE a, input `INT_TYPE b, output `INT_TYPE resultl, output `INT_TYPE resulth);
|
||||
import "DPI-C" function void dpi_idiv(input logic enable, input logic is_signed, input `INT_TYPE a, input `INT_TYPE b, output `INT_TYPE quotient, output `INT_TYPE remainder);
|
||||
|
||||
import "DPI-C" function int dpi_register();
|
||||
import "DPI-C" function void dpi_assert(int inst, input logic cond, input int delay);
|
||||
|
||||
import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/);
|
||||
import "DPI-C" function void dpi_trace_start();
|
||||
import "DPI-C" function void dpi_trace_stop();
|
||||
|
||||
`endif
|
||||
|
|
|
@ -1,19 +0,0 @@
|
|||
|
||||
|
||||
#include "Vrf2_32x128_wm1_rtl.h"
|
||||
#include "verilated.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
Vrf2_32x128_wm1_rtl module;
|
||||
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
// module.clk = 0;
|
||||
module.eval();
|
||||
// module.clk = 1;
|
||||
module.eval();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,13 +0,0 @@
|
|||
set SOURCE_FILES [glob *.lib]
|
||||
foreach FILE ${SOURCE_FILES} {
|
||||
read_lib $FILE
|
||||
redirect -variable CURR_LIB {get_lib}
|
||||
|
||||
set CURR_LIB [string range $CURR_LIB 2 end-3]
|
||||
set CURR_LIB [lindex $CURR_LIB 0]
|
||||
set FILENAME [string range $FILE 0 end-4]
|
||||
write_lib $CURR_LIB -output ${FILENAME}.db
|
||||
remove_lib $CURR_LIB
|
||||
}
|
||||
|
||||
exit
|
1
hw/rtl/.gitignore
vendored
1
hw/rtl/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
/VX_user_config.vh
|
|
@ -1,213 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_alu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
|
||||
// Outputs
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
VX_commit_if alu_commit_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] add_result;
|
||||
wire [`NUM_THREADS-1:0][32:0] sub_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] shr_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] msc_result;
|
||||
|
||||
wire stall_in, stall_out;
|
||||
|
||||
`UNUSED_VAR (alu_req_if.op_mod)
|
||||
wire is_br_op = `ALU_IS_BR(alu_req_if.op_mod);
|
||||
wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op_type);
|
||||
wire [`BR_BITS-1:0] br_op = `BR_OP(alu_req_if.op_type);
|
||||
wire alu_signed = `ALU_SIGNED(alu_op);
|
||||
wire [1:0] alu_op_class = `ALU_OP_CLASS(alu_op);
|
||||
wire is_sub = (alu_op == `ALU_SUB);
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.use_PC ? {`NUM_THREADS{alu_req_if.PC}} : alu_in1;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2_imm = alu_req_if.use_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2_less = (alu_req_if.use_imm && ~is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [32:0] sub_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
|
||||
wire [32:0] sub_in2 = {alu_signed & alu_in2_less[i][31], alu_in2_less[i]};
|
||||
assign sub_result[i] = sub_in1 - sub_in2;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [32:0] shr_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
|
||||
assign shr_result[i] = 32'($signed(shr_in1) >>> alu_in2_imm[i][4:0]);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(*) begin
|
||||
case (alu_op)
|
||||
`ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
|
||||
`ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
|
||||
`ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
|
||||
//`ALU_SLL,
|
||||
default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(*) begin
|
||||
case (alu_op_class)
|
||||
2'b00: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
|
||||
2'b01: alu_result[i] = {31'b0, sub_result[i][32]}; // SLTU, SLT
|
||||
2'b10: alu_result[i] = is_sub ? sub_result[i][31:0] // SUB
|
||||
: shr_result[i]; // SRL, SRA
|
||||
// 2'b11,
|
||||
default: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// branch
|
||||
|
||||
wire is_jal = is_br_op && (br_op == `BR_JAL || br_op == `BR_JALR);
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : alu_result;
|
||||
|
||||
wire [31:0] br_dest = add_result[alu_req_if.tid];
|
||||
wire [32:0] cmp_result = sub_result[alu_req_if.tid];
|
||||
|
||||
wire is_less = cmp_result[32];
|
||||
wire is_equal = ~(| cmp_result[31:0]);
|
||||
|
||||
wire br_neg = `BR_NEG(br_op);
|
||||
wire br_less = `BR_LESS(br_op);
|
||||
wire br_static = `BR_STATIC(br_op);
|
||||
wire br_taken = ((br_less ? is_less : is_equal) ^ br_neg) | br_static;
|
||||
|
||||
// output
|
||||
|
||||
wire result_valid;
|
||||
wire [`NW_BITS-1:0] result_wid;
|
||||
wire [`NUM_THREADS-1:0] result_tmask;
|
||||
wire [31:0] result_PC;
|
||||
wire [`NR_BITS-1:0] result_rd;
|
||||
wire result_wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] result_data;
|
||||
wire result_is_br;
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
|
||||
wire mul_ready_in;
|
||||
wire mul_valid_out;
|
||||
wire mul_ready_out;
|
||||
wire [`NW_BITS-1:0] mul_wid;
|
||||
wire [`NUM_THREADS-1:0] mul_tmask;
|
||||
wire [31:0] mul_PC;
|
||||
wire [`NR_BITS-1:0] mul_rd;
|
||||
wire mul_wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_data;
|
||||
|
||||
wire is_mul_op = `ALU_IS_MUL(alu_req_if.op_mod);
|
||||
|
||||
VX_muldiv muldiv (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Inputs
|
||||
.alu_op (`MUL_OP(alu_req_if.op_type)),
|
||||
.wid_in (alu_req_if.wid),
|
||||
.tmask_in (alu_req_if.tmask),
|
||||
.PC_in (alu_req_if.PC),
|
||||
.rd_in (alu_req_if.rd),
|
||||
.wb_in (alu_req_if.wb),
|
||||
.alu_in1 (alu_req_if.rs1_data),
|
||||
.alu_in2 (alu_req_if.rs2_data),
|
||||
|
||||
// Outputs
|
||||
.wid_out (mul_wid),
|
||||
.tmask_out (mul_tmask),
|
||||
.PC_out (mul_PC),
|
||||
.rd_out (mul_rd),
|
||||
.wb_out (mul_wb),
|
||||
.data_out (mul_data),
|
||||
|
||||
// handshake
|
||||
.valid_in (alu_req_if.valid && is_mul_op),
|
||||
.ready_in (mul_ready_in),
|
||||
.valid_out (mul_valid_out),
|
||||
.ready_out (mul_ready_out)
|
||||
);
|
||||
|
||||
assign stall_in = (is_mul_op && ~mul_ready_in)
|
||||
|| (~is_mul_op && (mul_valid_out || stall_out));
|
||||
|
||||
assign mul_ready_out = ~stall_out;
|
||||
|
||||
assign result_valid = mul_valid_out | (alu_req_if.valid && ~is_mul_op);
|
||||
assign result_wid = mul_valid_out ? mul_wid : alu_req_if.wid;
|
||||
assign result_tmask = mul_valid_out ? mul_tmask : alu_req_if.tmask;
|
||||
assign result_PC = mul_valid_out ? mul_PC : alu_req_if.PC;
|
||||
assign result_rd = mul_valid_out ? mul_rd : alu_req_if.rd;
|
||||
assign result_wb = mul_valid_out ? mul_wb : alu_req_if.wb;
|
||||
assign result_data = mul_valid_out ? mul_data : alu_jal_result;
|
||||
assign result_is_br = ~mul_valid_out && is_br_op;
|
||||
|
||||
`else
|
||||
|
||||
assign stall_in = 0;
|
||||
|
||||
assign result_valid = alu_req_if.valid;
|
||||
assign result_wid = alu_req_if.wid;
|
||||
assign result_tmask = alu_req_if.tmask;
|
||||
assign result_PC = alu_req_if.PC;
|
||||
assign result_rd = alu_req_if.rd;
|
||||
assign result_wb = alu_req_if.wb;
|
||||
assign result_data = alu_jal_result;
|
||||
assign result_is_br = is_br_op;
|
||||
|
||||
`endif
|
||||
|
||||
wire is_br_op_r;
|
||||
|
||||
assign stall_out = ~alu_commit_if.ready && alu_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + 1 + 32),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({result_valid, result_wid, result_tmask, result_PC, result_rd, result_wb, result_data, result_is_br, br_taken, br_dest}),
|
||||
.data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, branch_ctl_if.taken, branch_ctl_if.dest})
|
||||
);
|
||||
|
||||
assign alu_commit_if.eop = 1'b1;
|
||||
|
||||
assign branch_ctl_if.valid = alu_commit_if.valid && alu_commit_if.ready && is_br_op_r;
|
||||
assign branch_ctl_if.wid = alu_commit_if.wid;
|
||||
|
||||
// can accept new request?
|
||||
assign alu_req_if.ready = ~stall_in;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (branch_ctl_if.valid) begin
|
||||
$display("%t: core%0d-branch: wid=%0d, PC=%0h, taken=%b, dest=%0h", $time, CORE_ID,
|
||||
branch_ctl_if.wid, alu_commit_if.PC, branch_ctl_if.taken, branch_ctl_if.dest);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
160
hw/rtl/VX_cluster.sv
Normal file
160
hw/rtl/VX_cluster.sv
Normal file
|
@ -0,0 +1,160 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_cluster import VX_gpu_pkg::*; #(
|
||||
parameter CLUSTER_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
input sysmem_perf_t sysmem_perf,
|
||||
`endif
|
||||
|
||||
// DCRs
|
||||
VX_dcr_bus_if.slave dcr_bus_if,
|
||||
|
||||
// Memory
|
||||
VX_mem_bus_if.master mem_bus_if [`L2_MEM_PORTS],
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
|
||||
`ifdef SCOPE
|
||||
localparam scope_socket = 0;
|
||||
`SCOPE_IO_SWITCH (`NUM_SOCKETS);
|
||||
`endif
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
cache_perf_t l2_perf;
|
||||
sysmem_perf_t sysmem_perf_tmp;
|
||||
always @(*) begin
|
||||
sysmem_perf_tmp = sysmem_perf;
|
||||
sysmem_perf_tmp.l2cache = l2_perf;
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
|
||||
VX_gbar_bus_if per_socket_gbar_bus_if[`NUM_SOCKETS]();
|
||||
VX_gbar_bus_if gbar_bus_if();
|
||||
|
||||
VX_gbar_arb #(
|
||||
.NUM_REQS (`NUM_SOCKETS),
|
||||
.OUT_BUF ((`NUM_SOCKETS > 2) ? 1 : 0) // bgar_unit has no backpressure
|
||||
) gbar_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.bus_in_if (per_socket_gbar_bus_if),
|
||||
.bus_out_if (gbar_bus_if)
|
||||
);
|
||||
|
||||
VX_gbar_unit #(
|
||||
.INSTANCE_ID (`SFORMATF(("gbar%0d", CLUSTER_ID)))
|
||||
) gbar_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.gbar_bus_if (gbar_bus_if)
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (`L1_LINE_SIZE),
|
||||
.TAG_WIDTH (L1_MEM_ARB_TAG_WIDTH)
|
||||
) per_socket_mem_bus_if[`NUM_SOCKETS * `L1_MEM_PORTS]();
|
||||
|
||||
`RESET_RELAY (l2_reset, reset);
|
||||
|
||||
VX_cache_wrap #(
|
||||
.INSTANCE_ID (`SFORMATF(("%s-l2cache", INSTANCE_ID))),
|
||||
.CACHE_SIZE (`L2_CACHE_SIZE),
|
||||
.LINE_SIZE (`L2_LINE_SIZE),
|
||||
.NUM_BANKS (`L2_NUM_BANKS),
|
||||
.NUM_WAYS (`L2_NUM_WAYS),
|
||||
.WORD_SIZE (L2_WORD_SIZE),
|
||||
.NUM_REQS (L2_NUM_REQS),
|
||||
.MEM_PORTS (`L2_MEM_PORTS),
|
||||
.CRSQ_SIZE (`L2_CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L2_MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L2_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L2_WRITEBACK ? `L2_MSHR_SIZE : `L2_MREQ_SIZE),
|
||||
.TAG_WIDTH (L2_TAG_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`L2_WRITEBACK),
|
||||
.DIRTY_BYTES (`L2_DIRTYBYTES),
|
||||
.REPL_POLICY (`L2_REPL_POLICY),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
|
||||
.CORE_OUT_BUF (3),
|
||||
.MEM_OUT_BUF (3),
|
||||
.NC_ENABLE (1),
|
||||
.PASSTHRU (!`L2_ENABLED)
|
||||
) l2cache (
|
||||
.clk (clk),
|
||||
.reset (l2_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.cache_perf (l2_perf),
|
||||
`endif
|
||||
.core_bus_if (per_socket_mem_bus_if),
|
||||
.mem_bus_if (mem_bus_if)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [`NUM_SOCKETS-1:0] per_socket_busy;
|
||||
|
||||
// Generate all sockets
|
||||
for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : g_sockets
|
||||
|
||||
`RESET_RELAY (socket_reset, reset);
|
||||
|
||||
VX_dcr_bus_if socket_dcr_bus_if();
|
||||
wire is_base_dcr_addr = (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END);
|
||||
`BUFFER_DCR_BUS_IF (socket_dcr_bus_if, dcr_bus_if, is_base_dcr_addr, (`NUM_SOCKETS > 1))
|
||||
|
||||
VX_socket #(
|
||||
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id),
|
||||
.INSTANCE_ID (`SFORMATF(("%s-socket%0d", INSTANCE_ID, socket_id)))
|
||||
) socket (
|
||||
`SCOPE_IO_BIND (scope_socket+socket_id)
|
||||
|
||||
.clk (clk),
|
||||
.reset (socket_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.sysmem_perf (sysmem_perf_tmp),
|
||||
`endif
|
||||
|
||||
.dcr_bus_if (socket_dcr_bus_if),
|
||||
|
||||
.mem_bus_if (per_socket_mem_bus_if[socket_id * `L1_MEM_PORTS +: `L1_MEM_PORTS]),
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
.gbar_bus_if (per_socket_gbar_bus_if[socket_id]),
|
||||
`endif
|
||||
|
||||
.busy (per_socket_busy[socket_id])
|
||||
);
|
||||
end
|
||||
|
||||
`BUFFER_EX(busy, (| per_socket_busy), 1'b1, 1, (`NUM_SOCKETS > 1));
|
||||
|
||||
endmodule
|
|
@ -1,190 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_cluster #(
|
||||
parameter CLUSTER_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_VX_cluster
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire [`L2MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
|
||||
output wire [`L2MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`L2MEM_DATA_WIDTH-1:0] mem_req_data,
|
||||
output wire [`L2MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`L2MEM_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [`L2MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
`STATIC_ASSERT((`L2_ENABLE == 0 || `NUM_CORES > 1), ("invalid parameter"))
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_mem_req_valid;
|
||||
wire [`NUM_CORES-1:0] per_core_mem_req_rw;
|
||||
wire [`NUM_CORES-1:0][`DMEM_BYTEEN_WIDTH-1:0] per_core_mem_req_byteen;
|
||||
wire [`NUM_CORES-1:0][`DMEM_ADDR_WIDTH-1:0] per_core_mem_req_addr;
|
||||
wire [`NUM_CORES-1:0][`DMEM_DATA_WIDTH-1:0] per_core_mem_req_data;
|
||||
wire [`NUM_CORES-1:0][`XMEM_TAG_WIDTH-1:0] per_core_mem_req_tag;
|
||||
wire [`NUM_CORES-1:0] per_core_mem_req_ready;
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_mem_rsp_valid;
|
||||
wire [`NUM_CORES-1:0][`DMEM_DATA_WIDTH-1:0] per_core_mem_rsp_data;
|
||||
wire [`NUM_CORES-1:0][`XMEM_TAG_WIDTH-1:0] per_core_mem_rsp_tag;
|
||||
wire [`NUM_CORES-1:0] per_core_mem_rsp_ready;
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_busy;
|
||||
|
||||
for (genvar i = 0; i < `NUM_CORES; i++) begin
|
||||
|
||||
`RESET_RELAY (core_reset);
|
||||
|
||||
VX_core #(
|
||||
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
|
||||
) core (
|
||||
`SCOPE_BIND_VX_cluster_core(i)
|
||||
|
||||
.clk (clk),
|
||||
.reset (core_reset),
|
||||
|
||||
.mem_req_valid (per_core_mem_req_valid[i]),
|
||||
.mem_req_rw (per_core_mem_req_rw [i]),
|
||||
.mem_req_byteen (per_core_mem_req_byteen[i]),
|
||||
.mem_req_addr (per_core_mem_req_addr [i]),
|
||||
.mem_req_data (per_core_mem_req_data [i]),
|
||||
.mem_req_tag (per_core_mem_req_tag [i]),
|
||||
.mem_req_ready (per_core_mem_req_ready[i]),
|
||||
|
||||
.mem_rsp_valid (per_core_mem_rsp_valid[i]),
|
||||
.mem_rsp_data (per_core_mem_rsp_data [i]),
|
||||
.mem_rsp_tag (per_core_mem_rsp_tag [i]),
|
||||
.mem_rsp_ready (per_core_mem_rsp_ready[i]),
|
||||
|
||||
.busy (per_core_busy [i])
|
||||
);
|
||||
end
|
||||
|
||||
assign busy = (| per_core_busy);
|
||||
|
||||
if (`L2_ENABLE) begin
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_l2cache_if();
|
||||
`endif
|
||||
|
||||
`RESET_RELAY (l2_reset);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`L2CACHE_ID),
|
||||
.CACHE_SIZE (`L2CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (`L2CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`L2NUM_BANKS),
|
||||
.WORD_SIZE (`L2WORD_SIZE),
|
||||
.NUM_REQS (`L2NUM_REQS),
|
||||
.CREQ_SIZE (`L2CREQ_SIZE),
|
||||
.CRSQ_SIZE (`L2CRSQ_SIZE),
|
||||
.MSHR_SIZE (`L2MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L2MRSQ_SIZE),
|
||||
.MREQ_SIZE (`L2MREQ_SIZE),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`XMEM_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (0),
|
||||
.MEM_TAG_WIDTH (`L2MEM_TAG_WIDTH),
|
||||
.NC_ENABLE (1)
|
||||
) l2cache (
|
||||
`SCOPE_BIND_VX_cluster_l2cache
|
||||
|
||||
.clk (clk),
|
||||
.reset (l2_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_l2cache_if),
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
.core_req_valid (per_core_mem_req_valid),
|
||||
.core_req_rw (per_core_mem_req_rw),
|
||||
.core_req_byteen (per_core_mem_req_byteen),
|
||||
.core_req_addr (per_core_mem_req_addr),
|
||||
.core_req_data (per_core_mem_req_data),
|
||||
.core_req_tag (per_core_mem_req_tag),
|
||||
.core_req_ready (per_core_mem_req_ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (per_core_mem_rsp_valid),
|
||||
.core_rsp_data (per_core_mem_rsp_data),
|
||||
.core_rsp_tag (per_core_mem_rsp_tag),
|
||||
.core_rsp_ready (per_core_mem_rsp_ready),
|
||||
`UNUSED_PIN (core_rsp_tmask),
|
||||
|
||||
// Memory request
|
||||
.mem_req_valid (mem_req_valid),
|
||||
.mem_req_rw (mem_req_rw),
|
||||
.mem_req_byteen (mem_req_byteen),
|
||||
.mem_req_addr (mem_req_addr),
|
||||
.mem_req_data (mem_req_data),
|
||||
.mem_req_tag (mem_req_tag),
|
||||
.mem_req_ready (mem_req_ready),
|
||||
|
||||
// Memory response
|
||||
.mem_rsp_valid (mem_rsp_valid),
|
||||
.mem_rsp_tag (mem_rsp_tag),
|
||||
.mem_rsp_data (mem_rsp_data),
|
||||
.mem_rsp_ready (mem_rsp_ready)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (`NUM_CORES),
|
||||
.DATA_WIDTH (`L2MEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`L2MEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (`XMEM_TAG_WIDTH),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core request
|
||||
.req_valid_in (per_core_mem_req_valid),
|
||||
.req_rw_in (per_core_mem_req_rw),
|
||||
.req_byteen_in (per_core_mem_req_byteen),
|
||||
.req_addr_in (per_core_mem_req_addr),
|
||||
.req_data_in (per_core_mem_req_data),
|
||||
.req_tag_in (per_core_mem_req_tag),
|
||||
.req_ready_in (per_core_mem_req_ready),
|
||||
|
||||
// Memory request
|
||||
.req_valid_out (mem_req_valid),
|
||||
.req_rw_out (mem_req_rw),
|
||||
.req_byteen_out (mem_req_byteen),
|
||||
.req_addr_out (mem_req_addr),
|
||||
.req_data_out (mem_req_data),
|
||||
.req_tag_out (mem_req_tag),
|
||||
.req_ready_out (mem_req_ready),
|
||||
|
||||
// Core response
|
||||
.rsp_valid_out (per_core_mem_rsp_valid),
|
||||
.rsp_data_out (per_core_mem_rsp_data),
|
||||
.rsp_tag_out (per_core_mem_rsp_tag),
|
||||
.rsp_ready_out (per_core_mem_rsp_ready),
|
||||
|
||||
// Memory response
|
||||
.rsp_valid_in (mem_rsp_valid),
|
||||
.rsp_tag_in (mem_rsp_tag),
|
||||
.rsp_data_in (mem_rsp_data),
|
||||
.rsp_ready_in (mem_rsp_ready)
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -1,116 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_commit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_commit_if alu_commit_if,
|
||||
VX_commit_if ld_commit_if,
|
||||
VX_commit_if st_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_if gpu_commit_if,
|
||||
|
||||
// outputs
|
||||
VX_writeback_if writeback_if,
|
||||
VX_cmt_to_csr_if cmt_to_csr_if
|
||||
);
|
||||
localparam CMTW = $clog2(3*`NUM_THREADS+1);
|
||||
|
||||
// CSRs update
|
||||
|
||||
wire alu_commit_fire = alu_commit_if.valid && alu_commit_if.ready;
|
||||
wire ld_commit_fire = ld_commit_if.valid && ld_commit_if.ready;
|
||||
wire st_commit_fire = st_commit_if.valid && st_commit_if.ready;
|
||||
wire csr_commit_fire = csr_commit_if.valid && csr_commit_if.ready;
|
||||
wire fpu_commit_fire = fpu_commit_if.valid && fpu_commit_if.ready;
|
||||
wire gpu_commit_fire = gpu_commit_if.valid && gpu_commit_if.ready;
|
||||
|
||||
wire commit_fire = alu_commit_fire
|
||||
|| ld_commit_fire
|
||||
|| st_commit_fire
|
||||
|| csr_commit_fire
|
||||
|| fpu_commit_fire
|
||||
|| gpu_commit_fire;
|
||||
|
||||
wire [`NUM_THREADS-1:0] commit_tmask1, commit_tmask2, commit_tmask3;
|
||||
|
||||
assign commit_tmask1 = alu_commit_fire ? alu_commit_if.tmask:
|
||||
ld_commit_fire ? ld_commit_if.tmask:
|
||||
csr_commit_fire ? csr_commit_if.tmask:
|
||||
fpu_commit_fire ? fpu_commit_if.tmask:
|
||||
0;
|
||||
|
||||
assign commit_tmask2 = st_commit_fire ? st_commit_if.tmask : 0;
|
||||
assign commit_tmask3 = gpu_commit_fire ? gpu_commit_if.tmask : 0;
|
||||
|
||||
wire [CMTW-1:0] commit_size;
|
||||
assign commit_size = $countones({commit_tmask3, commit_tmask2, commit_tmask1});
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + CMTW),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({commit_fire, commit_size}),
|
||||
.data_out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size})
|
||||
);
|
||||
|
||||
// Writeback
|
||||
|
||||
VX_writeback #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) writeback (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.ld_commit_if (ld_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
|
||||
.writeback_if (writeback_if)
|
||||
);
|
||||
|
||||
// store and gpu commits don't writeback
|
||||
assign st_commit_if.ready = 1'b1;
|
||||
assign gpu_commit_if.ready = 1'b1;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (alu_commit_if.valid && alu_commit_if.ready) begin
|
||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=ALU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, alu_commit_if.wid, alu_commit_if.PC, alu_commit_if.tmask, alu_commit_if.wb, alu_commit_if.rd, alu_commit_if.data);
|
||||
end
|
||||
if (ld_commit_if.valid && ld_commit_if.ready) begin
|
||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, ld_commit_if.wid, ld_commit_if.PC, ld_commit_if.tmask, ld_commit_if.wb, ld_commit_if.rd, ld_commit_if.data);
|
||||
end
|
||||
if (st_commit_if.valid && st_commit_if.ready) begin
|
||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, st_commit_if.wid, st_commit_if.PC, st_commit_if.tmask, st_commit_if.wb, st_commit_if.rd, st_commit_if.data);
|
||||
end
|
||||
if (csr_commit_if.valid && csr_commit_if.ready) begin
|
||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=CSR, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data);
|
||||
end
|
||||
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
|
||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.wb, fpu_commit_if.rd, fpu_commit_if.data);
|
||||
end
|
||||
if (gpu_commit_if.valid && gpu_commit_if.ready) begin
|
||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=GPU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, gpu_commit_if.wid, gpu_commit_if.PC, gpu_commit_if.tmask, gpu_commit_if.wb, gpu_commit_if.rd, gpu_commit_if.data);
|
||||
end
|
||||
end
|
||||
`else
|
||||
`UNUSED_VAR (fpu_commit_if.PC)
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
1005
hw/rtl/VX_config.vh
1005
hw/rtl/VX_config.vh
File diff suppressed because it is too large
Load diff
156
hw/rtl/VX_core.v
156
hw/rtl/VX_core.v
|
@ -1,156 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_core #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_VX_core
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
output wire mem_req_rw,
|
||||
output wire [`DMEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
|
||||
output wire [`DMEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [`DMEM_DATA_WIDTH-1:0] mem_req_data,
|
||||
output wire [`XMEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory reponse
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`DMEM_DATA_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [`XMEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if();
|
||||
`endif
|
||||
|
||||
VX_mem_req_if #(
|
||||
.DATA_WIDTH (`DMEM_DATA_WIDTH),
|
||||
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
|
||||
.TAG_WIDTH (`XMEM_TAG_WIDTH)
|
||||
) mem_req_if();
|
||||
|
||||
VX_mem_rsp_if #(
|
||||
.DATA_WIDTH (`DMEM_DATA_WIDTH),
|
||||
.TAG_WIDTH (`XMEM_TAG_WIDTH)
|
||||
) mem_rsp_if();
|
||||
|
||||
assign mem_req_valid = mem_req_if.valid;
|
||||
assign mem_req_rw = mem_req_if.rw;
|
||||
assign mem_req_byteen= mem_req_if.byteen;
|
||||
assign mem_req_addr = mem_req_if.addr;
|
||||
assign mem_req_data = mem_req_if.data;
|
||||
assign mem_req_tag = mem_req_if.tag;
|
||||
assign mem_req_if.ready = mem_req_ready;
|
||||
|
||||
assign mem_rsp_if.valid = mem_rsp_valid;
|
||||
assign mem_rsp_if.data = mem_rsp_data;
|
||||
assign mem_rsp_if.tag = mem_rsp_tag;
|
||||
assign mem_rsp_ready = mem_rsp_if.ready;
|
||||
|
||||
//--
|
||||
|
||||
VX_dcache_req_if #(
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
) dcache_req_if();
|
||||
|
||||
VX_dcache_rsp_if #(
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
) dcache_rsp_if();
|
||||
|
||||
VX_icache_req_if #(
|
||||
.WORD_SIZE (`IWORD_SIZE),
|
||||
.TAG_WIDTH (`ICORE_TAG_WIDTH)
|
||||
) icache_req_if();
|
||||
|
||||
VX_icache_rsp_if #(
|
||||
.WORD_SIZE (`IWORD_SIZE),
|
||||
.TAG_WIDTH (`ICORE_TAG_WIDTH)
|
||||
) icache_rsp_if();
|
||||
|
||||
VX_pipeline #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) pipeline (
|
||||
`SCOPE_BIND_VX_core_pipeline
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
`endif
|
||||
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
|
||||
// Dcache core request
|
||||
.dcache_req_valid (dcache_req_if.valid),
|
||||
.dcache_req_rw (dcache_req_if.rw),
|
||||
.dcache_req_byteen (dcache_req_if.byteen),
|
||||
.dcache_req_addr (dcache_req_if.addr),
|
||||
.dcache_req_data (dcache_req_if.data),
|
||||
.dcache_req_tag (dcache_req_if.tag),
|
||||
.dcache_req_ready (dcache_req_if.ready),
|
||||
|
||||
// Dcache core reponse
|
||||
.dcache_rsp_valid (dcache_rsp_if.valid),
|
||||
.dcache_rsp_tmask (dcache_rsp_if.tmask),
|
||||
.dcache_rsp_data (dcache_rsp_if.data),
|
||||
.dcache_rsp_tag (dcache_rsp_if.tag),
|
||||
.dcache_rsp_ready (dcache_rsp_if.ready),
|
||||
|
||||
// Icache core request
|
||||
.icache_req_valid (icache_req_if.valid),
|
||||
.icache_req_addr (icache_req_if.addr),
|
||||
.icache_req_tag (icache_req_if.tag),
|
||||
.icache_req_ready (icache_req_if.ready),
|
||||
|
||||
// Icache core reponse
|
||||
.icache_rsp_valid (icache_rsp_if.valid),
|
||||
.icache_rsp_data (icache_rsp_if.data),
|
||||
.icache_rsp_tag (icache_rsp_if.tag),
|
||||
.icache_rsp_ready (icache_rsp_if.ready),
|
||||
|
||||
// Status
|
||||
.busy(busy)
|
||||
);
|
||||
|
||||
//--
|
||||
|
||||
VX_mem_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) mem_unit (
|
||||
`SCOPE_BIND_VX_core_mem_unit
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
`endif
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core <-> Dcache
|
||||
.dcache_req_if (dcache_req_if),
|
||||
.dcache_rsp_if (dcache_rsp_if),
|
||||
|
||||
// Core <-> Icache
|
||||
.icache_req_if (icache_req_if),
|
||||
.icache_rsp_if (icache_rsp_if),
|
||||
|
||||
// Memory
|
||||
.mem_req_if (mem_req_if),
|
||||
.mem_rsp_if (mem_rsp_if)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,217 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_data #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
|
||||
input wire read_enable,
|
||||
input wire[`CSR_ADDR_BITS-1:0] read_addr,
|
||||
input wire[`NW_BITS-1:0] read_wid,
|
||||
output wire[31:0] read_data,
|
||||
|
||||
input wire write_enable,
|
||||
input wire[`CSR_ADDR_BITS-1:0] write_addr,
|
||||
input wire[`NW_BITS-1:0] write_wid,
|
||||
input wire[`CSR_WIDTH-1:0] write_data,
|
||||
|
||||
input wire busy
|
||||
);
|
||||
reg [`CSR_WIDTH-1:0] csr_satp;
|
||||
reg [`CSR_WIDTH-1:0] csr_mstatus;
|
||||
reg [`CSR_WIDTH-1:0] csr_medeleg;
|
||||
reg [`CSR_WIDTH-1:0] csr_mideleg;
|
||||
reg [`CSR_WIDTH-1:0] csr_mie;
|
||||
reg [`CSR_WIDTH-1:0] csr_mtvec;
|
||||
reg [`CSR_WIDTH-1:0] csr_mepc;
|
||||
reg [`CSR_WIDTH-1:0] csr_pmpcfg [0:0];
|
||||
reg [`CSR_WIDTH-1:0] csr_pmpaddr [0:0];
|
||||
reg [63:0] csr_cycle;
|
||||
reg [63:0] csr_instret;
|
||||
|
||||
reg [`NUM_WARPS-1:0][`FRM_BITS+`FFG_BITS-1:0] fcsr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
||||
if (reset) begin
|
||||
fcsr <= '0;
|
||||
end
|
||||
|
||||
if (fpu_to_csr_if.write_enable) begin
|
||||
fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0]
|
||||
| fpu_to_csr_if.write_fflags;
|
||||
end
|
||||
|
||||
if (write_enable) begin
|
||||
case (write_addr)
|
||||
`CSR_FFLAGS: fcsr[write_wid][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
|
||||
`CSR_FRM: fcsr[write_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
|
||||
`CSR_FCSR: fcsr[write_wid] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
|
||||
|
||||
`CSR_SATP: csr_satp <= write_data;
|
||||
|
||||
`CSR_MSTATUS: csr_mstatus <= write_data;
|
||||
`CSR_MEDELEG: csr_medeleg <= write_data;
|
||||
`CSR_MIDELEG: csr_mideleg <= write_data;
|
||||
`CSR_MIE: csr_mie <= write_data;
|
||||
`CSR_MTVEC: csr_mtvec <= write_data;
|
||||
|
||||
`CSR_MEPC: csr_mepc <= write_data;
|
||||
|
||||
`CSR_PMPCFG0: csr_pmpcfg[0] <= write_data;
|
||||
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data;
|
||||
|
||||
default: begin
|
||||
assert(~write_enable) else $error("%t: invalid CSR write address: %0h", $time, write_addr);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
csr_cycle <= 0;
|
||||
csr_instret <= 0;
|
||||
end else begin
|
||||
if (busy) begin
|
||||
csr_cycle <= csr_cycle + 1;
|
||||
end
|
||||
if (cmt_to_csr_if.valid) begin
|
||||
csr_instret <= csr_instret + 64'(cmt_to_csr_if.commit_size);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg [31:0] read_data_r;
|
||||
reg read_addr_valid_r;
|
||||
|
||||
always @(*) begin
|
||||
read_data_r = 'x;
|
||||
read_addr_valid_r = 1;
|
||||
case (read_addr)
|
||||
`CSR_FFLAGS : read_data_r = 32'(fcsr[read_wid][`FFG_BITS-1:0]);
|
||||
`CSR_FRM : read_data_r = 32'(fcsr[read_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS]);
|
||||
`CSR_FCSR : read_data_r = 32'(fcsr[read_wid]);
|
||||
|
||||
`CSR_WTID ,
|
||||
`CSR_LTID ,
|
||||
`CSR_LWID : read_data_r = 32'(read_wid);
|
||||
`CSR_GTID ,
|
||||
/*`CSR_MHARTID ,*/
|
||||
`CSR_GWID : read_data_r = CORE_ID * `NUM_WARPS + 32'(read_wid);
|
||||
`CSR_GCID : read_data_r = CORE_ID;
|
||||
`CSR_NT : read_data_r = `NUM_THREADS;
|
||||
`CSR_NW : read_data_r = `NUM_WARPS;
|
||||
`CSR_NC : read_data_r = `NUM_CORES * `NUM_CLUSTERS;
|
||||
|
||||
`CSR_MCYCLE : read_data_r = csr_cycle[31:0];
|
||||
`CSR_MCYCLE_H : read_data_r = 32'(csr_cycle[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MINSTRET : read_data_r = csr_instret[31:0];
|
||||
`CSR_MINSTRET_H : read_data_r = 32'(csr_instret[`PERF_CTR_BITS-1:32]);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
// PERF: pipeline
|
||||
`CSR_MPM_IBUF_ST : read_data_r = perf_pipeline_if.ibf_stalls[31:0];
|
||||
`CSR_MPM_IBUF_ST_H : read_data_r = 32'(perf_pipeline_if.ibf_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SCRB_ST : read_data_r = perf_pipeline_if.scb_stalls[31:0];
|
||||
`CSR_MPM_SCRB_ST_H : read_data_r = 32'(perf_pipeline_if.scb_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ALU_ST : read_data_r = perf_pipeline_if.alu_stalls[31:0];
|
||||
`CSR_MPM_ALU_ST_H : read_data_r = 32'(perf_pipeline_if.alu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_LSU_ST : read_data_r = perf_pipeline_if.lsu_stalls[31:0];
|
||||
`CSR_MPM_LSU_ST_H : read_data_r = 32'(perf_pipeline_if.lsu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_CSR_ST : read_data_r = perf_pipeline_if.csr_stalls[31:0];
|
||||
`CSR_MPM_CSR_ST_H : read_data_r = 32'(perf_pipeline_if.csr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_FPU_ST : read_data_r = perf_pipeline_if.fpu_stalls[31:0];
|
||||
`CSR_MPM_FPU_ST_H : read_data_r = 32'(perf_pipeline_if.fpu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_GPU_ST : read_data_r = perf_pipeline_if.gpu_stalls[31:0];
|
||||
`CSR_MPM_GPU_ST_H : read_data_r = 32'(perf_pipeline_if.gpu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: icache
|
||||
`CSR_MPM_ICACHE_READS : read_data_r = perf_memsys_if.icache_reads[31:0];
|
||||
`CSR_MPM_ICACHE_READS_H : read_data_r = 32'(perf_memsys_if.icache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ICACHE_MISS_R : read_data_r = perf_memsys_if.icache_read_misses[31:0];
|
||||
`CSR_MPM_ICACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.icache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ICACHE_PIPE_ST : read_data_r = perf_memsys_if.icache_pipe_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.icache_pipe_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ICACHE_CRSP_ST : read_data_r = perf_memsys_if.icache_crsp_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.icache_crsp_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: dcache
|
||||
`CSR_MPM_DCACHE_READS : read_data_r = perf_memsys_if.dcache_reads[31:0];
|
||||
`CSR_MPM_DCACHE_READS_H : read_data_r = 32'(perf_memsys_if.dcache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_WRITES : read_data_r = perf_memsys_if.dcache_writes[31:0];
|
||||
`CSR_MPM_DCACHE_WRITES_H : read_data_r = 32'(perf_memsys_if.dcache_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_MISS_R : read_data_r = perf_memsys_if.dcache_read_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.dcache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_MISS_W : read_data_r = perf_memsys_if.dcache_write_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_W_H : read_data_r = 32'(perf_memsys_if.dcache_write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_BANK_ST : read_data_r = perf_memsys_if.dcache_bank_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_BANK_ST_H : read_data_r = 32'(perf_memsys_if.dcache_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_MSHR_ST : read_data_r = perf_memsys_if.dcache_mshr_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_r = 32'(perf_memsys_if.dcache_mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_PIPE_ST : read_data_r = perf_memsys_if.dcache_pipe_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.dcache_pipe_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_CRSP_ST : read_data_r = perf_memsys_if.dcache_crsp_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.dcache_crsp_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: smem
|
||||
`CSR_MPM_SMEM_READS : read_data_r = perf_memsys_if.smem_reads[31:0];
|
||||
`CSR_MPM_SMEM_READS_H : read_data_r = 32'(perf_memsys_if.smem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_WRITES : read_data_r = perf_memsys_if.smem_writes[31:0];
|
||||
`CSR_MPM_SMEM_WRITES_H : read_data_r = 32'(perf_memsys_if.smem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_BANK_ST : read_data_r = perf_memsys_if.smem_bank_stalls[31:0];
|
||||
`CSR_MPM_SMEM_BANK_ST_H : read_data_r = 32'(perf_memsys_if.smem_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: MEM
|
||||
`CSR_MPM_MEM_READS : read_data_r = perf_memsys_if.mem_reads[31:0];
|
||||
`CSR_MPM_MEM_READS_H : read_data_r = 32'(perf_memsys_if.mem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_MEM_WRITES : read_data_r = perf_memsys_if.mem_writes[31:0];
|
||||
`CSR_MPM_MEM_WRITES_H : read_data_r = 32'(perf_memsys_if.mem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_MEM_ST : read_data_r = perf_memsys_if.mem_stalls[31:0];
|
||||
`CSR_MPM_MEM_ST_H : read_data_r = 32'(perf_memsys_if.mem_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_MEM_LAT : read_data_r = perf_memsys_if.mem_latency[31:0];
|
||||
`CSR_MPM_MEM_LAT_H : read_data_r = 32'(perf_memsys_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: reserved
|
||||
`CSR_MPM_RESERVED : read_data_r = '0;
|
||||
`CSR_MPM_RESERVED_H : read_data_r = '0;
|
||||
`endif
|
||||
|
||||
`CSR_SATP : read_data_r = 32'(csr_satp);
|
||||
|
||||
`CSR_MSTATUS : read_data_r = 32'(csr_mstatus);
|
||||
`CSR_MISA : read_data_r = `ISA_CODE;
|
||||
`CSR_MEDELEG : read_data_r = 32'(csr_medeleg);
|
||||
`CSR_MIDELEG : read_data_r = 32'(csr_mideleg);
|
||||
`CSR_MIE : read_data_r = 32'(csr_mie);
|
||||
`CSR_MTVEC : read_data_r = 32'(csr_mtvec);
|
||||
|
||||
`CSR_MEPC : read_data_r = 32'(csr_mepc);
|
||||
|
||||
`CSR_PMPCFG0 : read_data_r = 32'(csr_pmpcfg[0]);
|
||||
`CSR_PMPADDR0 : read_data_r = 32'(csr_pmpaddr[0]);
|
||||
|
||||
`CSR_MVENDORID : read_data_r = `VENDOR_ID;
|
||||
`CSR_MARCHID : read_data_r = `ARCHITECTURE_ID;
|
||||
`CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID;
|
||||
|
||||
default: begin
|
||||
if (!((read_addr >= `CSR_MPM_BASE && read_addr < (`CSR_MPM_BASE + 32))
|
||||
| (read_addr >= `CSR_MPM_BASE_H && read_addr < (`CSR_MPM_BASE_H + 32)))) begin
|
||||
read_addr_valid_r = 0;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT(~read_enable || read_addr_valid_r, ("invalid CSR read address: %0h", read_addr))
|
||||
|
||||
assign read_data = read_data_r;
|
||||
|
||||
assign fpu_to_csr_if.read_frm = fcsr[fpu_to_csr_if.read_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS];
|
||||
|
||||
endmodule
|
|
@ -1,127 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
|
||||
input wire busy,
|
||||
|
||||
input wire[`NUM_WARPS-1:0] fpu_pending,
|
||||
output wire[`NUM_WARPS-1:0] pending
|
||||
);
|
||||
wire csr_we_s1;
|
||||
wire [`CSR_ADDR_BITS-1:0] csr_addr_s1;
|
||||
wire [31:0] csr_read_data, csr_read_data_s1;
|
||||
wire [31:0] csr_updated_data_s1;
|
||||
|
||||
wire write_enable = csr_commit_if.valid && csr_we_s1;
|
||||
|
||||
wire [31:0] csr_req_data = csr_req_if.use_imm ? 32'(csr_req_if.rs1) : csr_req_if.rs1_data;
|
||||
|
||||
VX_csr_data #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) csr_data (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if (perf_pipeline_if),
|
||||
`endif
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.read_enable (csr_req_if.valid),
|
||||
.read_addr (csr_req_if.addr),
|
||||
.read_wid (csr_req_if.wid),
|
||||
.read_data (csr_read_data),
|
||||
.write_enable (write_enable),
|
||||
.write_addr (csr_addr_s1),
|
||||
.write_wid (csr_commit_if.wid),
|
||||
.write_data (csr_updated_data_s1[`CSR_WIDTH-1:0]),
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
wire write_hazard = (csr_addr_s1 == csr_req_if.addr)
|
||||
&& (csr_commit_if.wid == csr_req_if.wid)
|
||||
&& csr_commit_if.valid;
|
||||
|
||||
wire [31:0] csr_read_data_qual = write_hazard ? csr_updated_data_s1 : csr_read_data;
|
||||
|
||||
reg [31:0] csr_updated_data;
|
||||
reg csr_we_s0_unqual;
|
||||
|
||||
always @(*) begin
|
||||
csr_we_s0_unqual = (csr_req_data != 0);
|
||||
case (csr_req_if.op_type)
|
||||
`CSR_RW: begin
|
||||
csr_updated_data = csr_req_data;
|
||||
csr_we_s0_unqual = 1;
|
||||
end
|
||||
`CSR_RS: begin
|
||||
csr_updated_data = csr_read_data_qual | csr_req_data;
|
||||
end
|
||||
//`CSR_RC
|
||||
default: begin
|
||||
csr_updated_data = csr_read_data_qual & ~csr_req_data;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
wire stall_in = fpu_pending[csr_req_if.wid];
|
||||
|
||||
wire csr_req_valid = csr_req_if.valid && !stall_in;
|
||||
|
||||
wire stall_out = ~csr_commit_if.ready && csr_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 32 + 32),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({csr_req_valid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.rd, csr_req_if.wb, csr_we_s0_unqual, csr_req_if.addr, csr_read_data_qual, csr_updated_data}),
|
||||
.data_out ({csr_commit_if.valid, csr_commit_if.wid, csr_commit_if.tmask, csr_commit_if.PC, csr_commit_if.rd, csr_commit_if.wb, csr_we_s1, csr_addr_s1, csr_read_data_s1, csr_updated_data_s1})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign csr_commit_if.data[i] = (csr_addr_s1 == `CSR_WTID) ? i :
|
||||
(csr_addr_s1 == `CSR_LTID
|
||||
|| csr_addr_s1 == `CSR_GTID) ? (csr_read_data_s1 * `NUM_THREADS + i) :
|
||||
csr_read_data_s1;
|
||||
end
|
||||
|
||||
assign csr_commit_if.eop = 1'b1;
|
||||
|
||||
// can accept new request?
|
||||
assign csr_req_if.ready = ~(stall_out || stall_in);
|
||||
|
||||
// pending request
|
||||
reg [`NUM_WARPS-1:0] pending_r;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pending_r <= 0;
|
||||
end else begin
|
||||
if (csr_commit_if.valid && csr_commit_if.ready) begin
|
||||
pending_r[csr_commit_if.wid] <= 0;
|
||||
end
|
||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||
pending_r[csr_req_if.wid] <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
assign pending = pending_r;
|
||||
|
||||
endmodule
|
|
@ -1,426 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
`include "VX_print_instr.vh"
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define USED_IREG(r) \
|
||||
used_regs[{1'b0, r}] = 1
|
||||
|
||||
`define USED_FREG(r) \
|
||||
r``_r[5] = 1; \
|
||||
used_regs[{1'b1, r}] = 1
|
||||
`else
|
||||
`define USED_IREG(r) \
|
||||
used_regs[r] = 1
|
||||
`endif
|
||||
|
||||
module VX_decode #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_ifetch_rsp_if ifetch_rsp_if,
|
||||
|
||||
// outputs
|
||||
VX_decode_if decode_if,
|
||||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [`EX_BITS-1:0] ex_type;
|
||||
reg [`OP_BITS-1:0] op_type;
|
||||
reg [`MOD_BITS-1:0] op_mod;
|
||||
reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r;
|
||||
reg [31:0] imm;
|
||||
reg use_rd, use_PC, use_imm;
|
||||
reg is_join, is_wstall;
|
||||
reg [`NUM_REGS-1:0] used_regs;
|
||||
|
||||
wire [31:0] instr = ifetch_rsp_if.data;
|
||||
wire [6:0] opcode = instr[6:0];
|
||||
wire [2:0] func3 = instr[14:12];
|
||||
wire [6:0] func7 = instr[31:25];
|
||||
wire [11:0] u_12 = instr[31:20];
|
||||
|
||||
wire [4:0] rd = instr[11:7];
|
||||
wire [4:0] rs1 = instr[19:15];
|
||||
wire [4:0] rs2 = instr[24:20];
|
||||
wire [4:0] rs3 = instr[31:27];
|
||||
|
||||
wire [19:0] upper_imm = {func7, rs2, rs1, func3};
|
||||
wire [11:0] alu_imm = (func3[0] && ~func3[1]) ? {{7{1'b0}}, rs2} : u_12;
|
||||
wire [11:0] s_imm = {func7, rd};
|
||||
wire [12:0] b_imm = {instr[31], instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
|
||||
wire [11:0] jalr_imm = {func7, rs2};
|
||||
|
||||
always @(*) begin
|
||||
|
||||
ex_type = 0;
|
||||
op_type = 'x;
|
||||
op_mod = 0;
|
||||
rd_r = `NR_BITS'(rd);
|
||||
rs1_r = `NR_BITS'(rs1);
|
||||
rs2_r = `NR_BITS'(rs2);
|
||||
rs3_r = `NR_BITS'(rs3);
|
||||
imm = 'x;
|
||||
use_imm = 0;
|
||||
use_PC = 0;
|
||||
use_rd = 0;
|
||||
is_join = 0;
|
||||
is_wstall = 0;
|
||||
used_regs = 0;
|
||||
|
||||
case (opcode)
|
||||
`INST_I: begin
|
||||
ex_type = `EX_ALU;
|
||||
case (func3)
|
||||
3'h0: op_type = `OP_BITS'(`ALU_ADD);
|
||||
3'h1: op_type = `OP_BITS'(`ALU_SLL);
|
||||
3'h2: op_type = `OP_BITS'(`ALU_SLT);
|
||||
3'h3: op_type = `OP_BITS'(`ALU_SLTU);
|
||||
3'h4: op_type = `OP_BITS'(`ALU_XOR);
|
||||
3'h5: op_type = (func7[5]) ? `OP_BITS'(`ALU_SRA) : `OP_BITS'(`ALU_SRL);
|
||||
3'h6: op_type = `OP_BITS'(`ALU_OR);
|
||||
3'h7: op_type = `OP_BITS'(`ALU_AND);
|
||||
default:;
|
||||
endcase
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {{20{alu_imm[11]}}, alu_imm};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_R: begin
|
||||
ex_type = `EX_ALU;
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (func7[0]) begin
|
||||
case (func3)
|
||||
3'h0: op_type = `OP_BITS'(`MUL_MUL);
|
||||
3'h1: op_type = `OP_BITS'(`MUL_MULH);
|
||||
3'h2: op_type = `OP_BITS'(`MUL_MULHSU);
|
||||
3'h3: op_type = `OP_BITS'(`MUL_MULHU);
|
||||
3'h4: op_type = `OP_BITS'(`MUL_DIV);
|
||||
3'h5: op_type = `OP_BITS'(`MUL_DIVU);
|
||||
3'h6: op_type = `OP_BITS'(`MUL_REM);
|
||||
3'h7: op_type = `OP_BITS'(`MUL_REMU);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 2;
|
||||
end else
|
||||
`endif
|
||||
begin
|
||||
case (func3)
|
||||
3'h0: op_type = (func7[5]) ? `OP_BITS'(`ALU_SUB) : `OP_BITS'(`ALU_ADD);
|
||||
3'h1: op_type = `OP_BITS'(`ALU_SLL);
|
||||
3'h2: op_type = `OP_BITS'(`ALU_SLT);
|
||||
3'h3: op_type = `OP_BITS'(`ALU_SLTU);
|
||||
3'h4: op_type = `OP_BITS'(`ALU_XOR);
|
||||
3'h5: op_type = (func7[5]) ? `OP_BITS'(`ALU_SRA) : `OP_BITS'(`ALU_SRL);
|
||||
3'h6: op_type = `OP_BITS'(`ALU_OR);
|
||||
3'h7: op_type = `OP_BITS'(`ALU_AND);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`INST_LUI: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`ALU_LUI);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {upper_imm, 12'(0)};
|
||||
`USED_IREG (rd);
|
||||
rs1_r = 0;
|
||||
end
|
||||
`INST_AUIPC: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`ALU_AUIPC);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
imm = {upper_imm, 12'(0)};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_JAL: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`BR_JAL);
|
||||
op_mod = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{11{jal_imm[20]}}, jal_imm};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_JALR: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`BR_JALR);
|
||||
op_mod = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{20{jalr_imm[11]}}, jalr_imm};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_B: begin
|
||||
ex_type = `EX_ALU;
|
||||
case (func3)
|
||||
3'h0: op_type = `OP_BITS'(`BR_EQ);
|
||||
3'h1: op_type = `OP_BITS'(`BR_NE);
|
||||
3'h4: op_type = `OP_BITS'(`BR_LT);
|
||||
3'h5: op_type = `OP_BITS'(`BR_GE);
|
||||
3'h6: op_type = `OP_BITS'(`BR_LTU);
|
||||
3'h7: op_type = `OP_BITS'(`BR_GEU);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{19{b_imm[12]}}, b_imm};
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`INST_F: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_mod = `MOD_BITS'(!func3[0]); // data fence
|
||||
end
|
||||
`INST_SYS : begin
|
||||
if (func3[1:0] != 0) begin
|
||||
ex_type = `EX_CSR;
|
||||
op_type = `OP_BITS'(func3[1:0]);
|
||||
use_rd = 1;
|
||||
use_imm = func3[2];
|
||||
imm = 32'(u_12); // addr
|
||||
`USED_IREG (rd);
|
||||
if (func3[2]) begin
|
||||
rs1_r = `NR_BITS'(rs1); // imm
|
||||
end else begin
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
end else begin
|
||||
ex_type = `EX_ALU;
|
||||
case (u_12)
|
||||
12'h000: op_type = `OP_BITS'(`BR_ECALL);
|
||||
12'h001: op_type = `OP_BITS'(`BR_EBREAK);
|
||||
12'h302: op_type = `OP_BITS'(`BR_MRET);
|
||||
12'h102: op_type = `OP_BITS'(`BR_SRET);
|
||||
12'h7B2: op_type = `OP_BITS'(`BR_DRET);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 1;
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
imm = 32'd4;
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FL,
|
||||
`endif
|
||||
`INST_L: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `OP_BITS'({1'b0, func3});
|
||||
use_rd = 1;
|
||||
imm = {{20{u_12[11]}}, u_12};
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (opcode[2]) begin
|
||||
`USED_FREG (rd);
|
||||
end else
|
||||
`endif
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FS,
|
||||
`endif
|
||||
`INST_S: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `OP_BITS'({1'b1, func3});
|
||||
imm = {{20{s_imm[11]}}, s_imm};
|
||||
`USED_IREG (rs1);
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (opcode[2]) begin
|
||||
`USED_FREG (rs2);
|
||||
end else
|
||||
`endif
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FMADD,
|
||||
`INST_FMSUB,
|
||||
`INST_FNMSUB,
|
||||
`INST_FNMADD: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_type = `OP_BITS'(opcode[3:0]);
|
||||
op_mod = func3;
|
||||
use_rd = 1;
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
`USED_FREG (rs3);
|
||||
end
|
||||
`INST_FCI: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_mod = func3;
|
||||
use_rd = 1;
|
||||
case (func7)
|
||||
7'h00, // FADD
|
||||
7'h04, // FSUB
|
||||
7'h08, // FMUL
|
||||
7'h0C: begin // FDIV
|
||||
op_type = `OP_BITS'(func7[3:0]);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
7'h2C: begin
|
||||
op_type = `OP_BITS'(`FPU_SQRT);
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
7'h50: begin
|
||||
op_type = `OP_BITS'(`FPU_CMP);
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
7'h60: begin
|
||||
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTWUS) : `OP_BITS'(`FPU_CVTWS);
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
7'h68: begin
|
||||
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTSWU) : `OP_BITS'(`FPU_CVTSW);
|
||||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
7'h10: begin
|
||||
// FSGNJ=0, FSGNJN=1, FSGNJX=2
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_mod = {1'b0, func3[1:0]};
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
7'h14: begin
|
||||
// FMIN=3, FMAX=4
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_mod = func3[0] ? 4 : 3;
|
||||
`USED_FREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
`USED_FREG (rs2);
|
||||
end
|
||||
7'h70: begin
|
||||
if (func3[0]) begin
|
||||
// FCLASS
|
||||
op_type = `OP_BITS'(`FPU_CLASS);
|
||||
end else begin
|
||||
// FMV.X.W=5
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_mod = 5;
|
||||
end
|
||||
`USED_IREG (rd);
|
||||
`USED_FREG (rs1);
|
||||
end
|
||||
7'h78: begin
|
||||
// FMV.W.X=6
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_mod = 6;
|
||||
`USED_FREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`endif
|
||||
`INST_GPU: begin
|
||||
ex_type = `EX_GPU;
|
||||
case (func3)
|
||||
3'h0: begin
|
||||
op_type = `OP_BITS'(`GPU_TMC);
|
||||
is_wstall = 1;
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
3'h1: begin
|
||||
op_type = `OP_BITS'(`GPU_WSPAWN);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
3'h2: begin
|
||||
op_type = `OP_BITS'(`GPU_SPLIT);
|
||||
is_wstall = 1;
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
3'h3: begin
|
||||
op_type = `OP_BITS'(`GPU_JOIN);
|
||||
is_join = 1;
|
||||
end
|
||||
3'h4: begin
|
||||
op_type = `OP_BITS'(`GPU_BAR);
|
||||
is_wstall = 1;
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
// disable write to integer register r0
|
||||
wire wb = use_rd && (| rd_r);
|
||||
|
||||
assign decode_if.valid = ifetch_rsp_if.valid;
|
||||
assign decode_if.wid = ifetch_rsp_if.wid;
|
||||
assign decode_if.tmask = ifetch_rsp_if.tmask;
|
||||
assign decode_if.PC = ifetch_rsp_if.PC;
|
||||
assign decode_if.ex_type = ex_type;
|
||||
assign decode_if.op_type = op_type;
|
||||
assign decode_if.op_mod = op_mod;
|
||||
assign decode_if.wb = wb;
|
||||
assign decode_if.rd = rd_r;
|
||||
assign decode_if.rs1 = rs1_r;
|
||||
assign decode_if.rs2 = rs2_r;
|
||||
assign decode_if.rs3 = rs3_r;
|
||||
assign decode_if.imm = imm;
|
||||
assign decode_if.use_PC = use_PC;
|
||||
assign decode_if.use_imm = use_imm;
|
||||
assign decode_if.used_regs = used_regs;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire ifetch_rsp_fire = ifetch_rsp_if.valid && ifetch_rsp_if.ready;
|
||||
|
||||
assign join_if.valid = ifetch_rsp_fire && is_join;
|
||||
assign join_if.wid = ifetch_rsp_if.wid;
|
||||
|
||||
assign wstall_if.valid = ifetch_rsp_fire && is_wstall;
|
||||
assign wstall_if.wid = ifetch_rsp_if.wid;
|
||||
|
||||
assign ifetch_rsp_if.ready = decode_if.ready;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (decode_if.valid && decode_if.ready) begin
|
||||
$write("%t: core%0d-decode: wid=%0d, PC=%0h, ex=", $time, CORE_ID, decode_if.wid, decode_if.PC);
|
||||
print_ex_type(decode_if.ex_type);
|
||||
$write(", op=");
|
||||
print_ex_op(decode_if.ex_type, decode_if.op_type, decode_if.op_mod);
|
||||
$write(", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b, use_regs=%b\n", decode_if.op_mod, decode_if.tmask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.use_PC, decode_if.use_imm, decode_if.used_regs);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
|
@ -1,35 +1,90 @@
|
|||
`ifndef VX_DEFINE
|
||||
`define VX_DEFINE
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifndef VX_DEFINE_VH
|
||||
`define VX_DEFINE_VH
|
||||
|
||||
`include "VX_platform.vh"
|
||||
`include "VX_config.vh"
|
||||
`include "VX_types.vh"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define NW_BITS `LOG2UP(`NUM_WARPS)
|
||||
`define NW_BITS `CLOG2(`NUM_WARPS)
|
||||
`define NC_WIDTH `UP(`NC_BITS)
|
||||
|
||||
`define NT_BITS `LOG2UP(`NUM_THREADS)
|
||||
`define NT_BITS `CLOG2(`NUM_THREADS)
|
||||
`define NW_WIDTH `UP(`NW_BITS)
|
||||
|
||||
`define NC_BITS `LOG2UP(`NUM_CORES)
|
||||
`define NC_BITS `CLOG2(`NUM_CORES)
|
||||
`define NT_WIDTH `UP(`NT_BITS)
|
||||
|
||||
`define NB_BITS `LOG2UP(`NUM_BARRIERS)
|
||||
`define NB_BITS `CLOG2(`NUM_BARRIERS)
|
||||
`define NB_WIDTH `UP(`NB_BITS)
|
||||
|
||||
`define REQS_BITS `LOG2UP(NUM_REQS)
|
||||
`define NUM_IREGS 32
|
||||
|
||||
`define NRI_BITS `CLOG2(`NUM_IREGS)
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define NUM_REGS 64
|
||||
`define NUM_REGS (2 * `NUM_IREGS)
|
||||
`else
|
||||
`define NUM_REGS 32
|
||||
`define NUM_REGS `NUM_IREGS
|
||||
`endif
|
||||
|
||||
`define NR_BITS `LOG2UP(`NUM_REGS)
|
||||
`define NR_BITS `CLOG2(`NUM_REGS)
|
||||
|
||||
`define CSR_ADDR_BITS 12
|
||||
|
||||
`define CSR_WIDTH 12
|
||||
`define DV_STACK_SIZE `UP(`NUM_THREADS-1)
|
||||
`define DV_STACK_SIZEW `UP(`CLOG2(`DV_STACK_SIZE))
|
||||
|
||||
`define PERF_CTR_BITS 44
|
||||
|
||||
`ifndef NDEBUG
|
||||
`define UUID_ENABLE
|
||||
`define UUID_WIDTH 44
|
||||
`else
|
||||
`ifdef SCOPE
|
||||
`define UUID_ENABLE
|
||||
`define UUID_WIDTH 44
|
||||
`else
|
||||
`define UUID_WIDTH 1
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`define PC_BITS (`XLEN-1)
|
||||
`define OFFSET_BITS 12
|
||||
`define IMM_BITS `XLEN
|
||||
|
||||
`define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define EX_ALU 0
|
||||
`define EX_LSU 1
|
||||
`define EX_SFU 2
|
||||
`define EX_FPU (`EX_SFU + `EXT_F_ENABLED)
|
||||
|
||||
`define NUM_EX_UNITS (3 + `EXT_F_ENABLED)
|
||||
`define EX_BITS `CLOG2(`NUM_EX_UNITS)
|
||||
`define EX_WIDTH `UP(`EX_BITS)
|
||||
|
||||
`define SFU_CSRS 0
|
||||
`define SFU_WCTL 1
|
||||
|
||||
`define NUM_SFU_UNITS (2)
|
||||
`define SFU_BITS `CLOG2(`NUM_SFU_UNITS)
|
||||
`define SFU_WIDTH `UP(`SFU_BITS)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_LUI 7'b0110111
|
||||
|
@ -41,374 +96,424 @@
|
|||
`define INST_S 7'b0100011 // store instructions
|
||||
`define INST_I 7'b0010011 // immediate instructions
|
||||
`define INST_R 7'b0110011 // register instructions
|
||||
`define INST_F 7'b0001111 // Fence instructions
|
||||
`define INST_FENCE 7'b0001111 // Fence instructions
|
||||
`define INST_SYS 7'b1110011 // system instructions
|
||||
|
||||
// RV64I instruction specific opcodes (for any W instruction)
|
||||
`define INST_I_W 7'b0011011 // W type immediate instructions
|
||||
`define INST_R_W 7'b0111011 // W type register instructions
|
||||
|
||||
`define INST_FL 7'b0000111 // float load instruction
|
||||
`define INST_FS 7'b0100111 // float store instruction
|
||||
`define INST_FMADD 7'b1000011
|
||||
`define INST_FMADD 7'b1000011
|
||||
`define INST_FMSUB 7'b1000111
|
||||
`define INST_FNMSUB 7'b1001011
|
||||
`define INST_FNMADD 7'b1001111
|
||||
`define INST_FNMADD 7'b1001111
|
||||
`define INST_FCI 7'b1010011 // float common instructions
|
||||
|
||||
`define INST_GPU 7'b1101011
|
||||
// Custom extension opcodes
|
||||
`define INST_EXT1 7'b0001011 // 0x0B
|
||||
`define INST_EXT2 7'b0101011 // 0x2B
|
||||
`define INST_EXT3 7'b1011011 // 0x5B
|
||||
`define INST_EXT4 7'b1111011 // 0x7B
|
||||
|
||||
// Opcode extensions
|
||||
`define INST_R_F7_MUL 7'b0000001
|
||||
`define INST_R_F7_ZICOND 7'b0000111
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define FRM_RNE 3'b000 // round to nearest even
|
||||
`define FRM_RTZ 3'b001 // round to zero
|
||||
`define FRM_RDN 3'b010 // round to -inf
|
||||
`define FRM_RUP 3'b011 // round to +inf
|
||||
`define FRM_RMM 3'b100 // round to nearest max magnitude
|
||||
`define FRM_DYN 3'b111 // dynamic mode
|
||||
`define FRM_BITS 3
|
||||
`define INST_FRM_RNE 3'b000 // round to nearest even
|
||||
`define INST_FRM_RTZ 3'b001 // round to zero
|
||||
`define INST_FRM_RDN 3'b010 // round to -inf
|
||||
`define INST_FRM_RUP 3'b011 // round to +inf
|
||||
`define INST_FRM_RMM 3'b100 // round to nearest max magnitude
|
||||
`define INST_FRM_DYN 3'b111 // dynamic mode
|
||||
`define INST_FRM_BITS 3
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define EX_NOP 3'h0
|
||||
`define EX_ALU 3'h1
|
||||
`define EX_LSU 3'h2
|
||||
`define EX_CSR 3'h3
|
||||
`define EX_FPU 3'h4
|
||||
`define EX_GPU 3'h5
|
||||
`define EX_BITS 3
|
||||
|
||||
`define NUM_EXS 6
|
||||
`define NE_BITS `LOG2UP(`NUM_EXS)
|
||||
`define INST_OP_BITS 4
|
||||
`define INST_ARGS_BITS $bits(op_args_t)
|
||||
`define INST_FMT_BITS 2
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define OP_BITS 4
|
||||
`define MOD_BITS 3
|
||||
`define INST_ALU_ADD 4'b0000
|
||||
//`define INST_ALU_UNUSED 4'b0001
|
||||
`define INST_ALU_LUI 4'b0010
|
||||
`define INST_ALU_AUIPC 4'b0011
|
||||
`define INST_ALU_SLTU 4'b0100
|
||||
`define INST_ALU_SLT 4'b0101
|
||||
//`define INST_ALU_UNUSED 4'b0110
|
||||
`define INST_ALU_SUB 4'b0111
|
||||
`define INST_ALU_SRL 4'b1000
|
||||
`define INST_ALU_SRA 4'b1001
|
||||
`define INST_ALU_CZEQ 4'b1010
|
||||
`define INST_ALU_CZNE 4'b1011
|
||||
`define INST_ALU_AND 4'b1100
|
||||
`define INST_ALU_OR 4'b1101
|
||||
`define INST_ALU_XOR 4'b1110
|
||||
`define INST_ALU_SLL 4'b1111
|
||||
|
||||
`define ALU_ADD 4'b0000
|
||||
`define ALU_LUI 4'b0010
|
||||
`define ALU_AUIPC 4'b0011
|
||||
`define ALU_SLTU 4'b0100
|
||||
`define ALU_SLT 4'b0101
|
||||
`define ALU_SRL 4'b1000
|
||||
`define ALU_SRA 4'b1001
|
||||
`define ALU_SUB 4'b1011
|
||||
`define ALU_AND 4'b1100
|
||||
`define ALU_OR 4'b1101
|
||||
`define ALU_XOR 4'b1110
|
||||
`define ALU_SLL 4'b1111
|
||||
`define ALU_OTHER 4'b0111
|
||||
`define ALU_BITS 4
|
||||
`define ALU_OP(x) x[`ALU_BITS-1:0]
|
||||
`define ALU_OP_CLASS(x) x[3:2]
|
||||
`define ALU_SIGNED(x) x[0]
|
||||
`define ALU_IS_BR(x) x[0]
|
||||
`define ALU_IS_MUL(x) x[1]
|
||||
|
||||
`define BR_EQ 4'b0000
|
||||
`define BR_NE 4'b0010
|
||||
`define BR_LTU 4'b0100
|
||||
`define BR_GEU 4'b0110
|
||||
`define BR_LT 4'b0101
|
||||
`define BR_GE 4'b0111
|
||||
`define BR_JAL 4'b1000
|
||||
`define BR_JALR 4'b1001
|
||||
`define BR_ECALL 4'b1010
|
||||
`define BR_EBREAK 4'b1011
|
||||
`define BR_MRET 4'b1100
|
||||
`define BR_SRET 4'b1101
|
||||
`define BR_DRET 4'b1110
|
||||
`define BR_OTHER 4'b1111
|
||||
`define BR_BITS 4
|
||||
`define BR_OP(x) x[`BR_BITS-1:0]
|
||||
`define BR_NEG(x) x[1]
|
||||
`define BR_LESS(x) x[2]
|
||||
`define BR_STATIC(x) x[3]
|
||||
`define ALU_TYPE_BITS 2
|
||||
`define ALU_TYPE_ARITH 0
|
||||
`define ALU_TYPE_BRANCH 1
|
||||
`define ALU_TYPE_MULDIV 2
|
||||
`define ALU_TYPE_OTHER 3
|
||||
|
||||
`define MUL_MUL 3'h0
|
||||
`define MUL_MULH 3'h1
|
||||
`define MUL_MULHSU 3'h2
|
||||
`define MUL_MULHU 3'h3
|
||||
`define MUL_DIV 3'h4
|
||||
`define MUL_DIVU 3'h5
|
||||
`define MUL_REM 3'h6
|
||||
`define MUL_REMU 3'h7
|
||||
`define MUL_BITS 3
|
||||
`define MUL_OP(x) x[`MUL_BITS-1:0]
|
||||
`define MUL_IS_DIV(x) x[2]
|
||||
`define INST_ALU_BITS 4
|
||||
`define INST_ALU_CLASS(op) op[3:2]
|
||||
`define INST_ALU_SIGNED(op) op[0]
|
||||
`define INST_ALU_IS_SUB(op) op[1]
|
||||
`define INST_ALU_IS_CZERO(op) (op[3:1] == 3'b101)
|
||||
|
||||
`define FMT_B 3'b000
|
||||
`define FMT_H 3'b001
|
||||
`define FMT_W 3'b010
|
||||
`define FMT_BU 3'b100
|
||||
`define FMT_HU 3'b101
|
||||
`define INST_BR_EQ 4'b0000
|
||||
`define INST_BR_NE 4'b0010
|
||||
`define INST_BR_LTU 4'b0100
|
||||
`define INST_BR_GEU 4'b0110
|
||||
`define INST_BR_LT 4'b0101
|
||||
`define INST_BR_GE 4'b0111
|
||||
`define INST_BR_JAL 4'b1000
|
||||
`define INST_BR_JALR 4'b1001
|
||||
`define INST_BR_ECALL 4'b1010
|
||||
`define INST_BR_EBREAK 4'b1011
|
||||
`define INST_BR_URET 4'b1100
|
||||
`define INST_BR_SRET 4'b1101
|
||||
`define INST_BR_MRET 4'b1110
|
||||
`define INST_BR_OTHER 4'b1111
|
||||
`define INST_BR_BITS 4
|
||||
`define INST_BR_CLASS(op) {1'b0, ~op[3]}
|
||||
`define INST_BR_IS_NEG(op) op[1]
|
||||
`define INST_BR_IS_LESS(op) op[2]
|
||||
`define INST_BR_IS_STATIC(op) op[3]
|
||||
|
||||
`define LSU_LB 4'b0000
|
||||
`define LSU_LH 4'b0001
|
||||
`define LSU_LW 4'b0010
|
||||
`define LSU_LBU 4'b0100
|
||||
`define LSU_LHU 4'b0101
|
||||
`define LSU_SB 4'b1000
|
||||
`define LSU_SH 4'b1001
|
||||
`define LSU_SW 4'b1010
|
||||
`define LSU_BITS 4
|
||||
`define LSU_FMT(x) x[2:0]
|
||||
`define LSU_WSIZE(x) x[1:0]
|
||||
`define LSU_OP(x) x[`LSU_BITS-1:0]
|
||||
`define LSU_IS_FENCE(x) x[0]
|
||||
`define INST_M_MUL 3'b000
|
||||
`define INST_M_MULHU 3'b001
|
||||
`define INST_M_MULH 3'b010
|
||||
`define INST_M_MULHSU 3'b011
|
||||
`define INST_M_DIV 3'b100
|
||||
`define INST_M_DIVU 3'b101
|
||||
`define INST_M_REM 3'b110
|
||||
`define INST_M_REMU 3'b111
|
||||
`define INST_M_BITS 3
|
||||
`define INST_M_SIGNED(op) (~op[0])
|
||||
`define INST_M_IS_MULX(op) (~op[2])
|
||||
`define INST_M_IS_MULH(op) (op[1:0] != 0)
|
||||
`define INST_M_SIGNED_A(op) (op[1:0] != 1)
|
||||
`define INST_M_IS_REM(op) op[1]
|
||||
|
||||
`define CSR_RW 2'h1
|
||||
`define CSR_RS 2'h2
|
||||
`define CSR_RC 2'h3
|
||||
`define CSR_OTHER 2'h0
|
||||
`define CSR_BITS 2
|
||||
`define CSR_OP(x) x[`CSR_BITS-1:0]
|
||||
`define INST_FMT_B 3'b000
|
||||
`define INST_FMT_H 3'b001
|
||||
`define INST_FMT_W 3'b010
|
||||
`define INST_FMT_D 3'b011
|
||||
`define INST_FMT_BU 3'b100
|
||||
`define INST_FMT_HU 3'b101
|
||||
`define INST_FMT_WU 3'b110
|
||||
|
||||
`define FPU_ADD 4'h0
|
||||
`define FPU_SUB 4'h4
|
||||
`define FPU_MUL 4'h8
|
||||
`define FPU_DIV 4'hC
|
||||
`define FPU_CVTWS 4'h1 // FCVT.W.S
|
||||
`define FPU_CVTWUS 4'h5 // FCVT.WU.S
|
||||
`define FPU_CVTSW 4'h9 // FCVT.S.W
|
||||
`define FPU_CVTSWU 4'hD // FCVT.S.WU
|
||||
`define FPU_SQRT 4'h2
|
||||
`define FPU_CLASS 4'h6
|
||||
`define FPU_CMP 4'hA
|
||||
`define FPU_MISC 4'hE // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
|
||||
`define FPU_MADD 4'h3
|
||||
`define FPU_MSUB 4'h7
|
||||
`define FPU_NMSUB 4'hB
|
||||
`define FPU_NMADD 4'hF
|
||||
`define FPU_BITS 4
|
||||
`define FPU_OP(x) x[`FPU_BITS-1:0]
|
||||
`define INST_LSU_LB 4'b0000
|
||||
`define INST_LSU_LH 4'b0001
|
||||
`define INST_LSU_LW 4'b0010
|
||||
`define INST_LSU_LD 4'b0011 // new for RV64I LD
|
||||
`define INST_LSU_LBU 4'b0100
|
||||
`define INST_LSU_LHU 4'b0101
|
||||
`define INST_LSU_LWU 4'b0110 // new for RV64I LWU
|
||||
`define INST_LSU_SB 4'b1000
|
||||
`define INST_LSU_SH 4'b1001
|
||||
`define INST_LSU_SW 4'b1010
|
||||
`define INST_LSU_SD 4'b1011 // new for RV64I SD
|
||||
`define INST_LSU_FENCE 4'b1111
|
||||
`define INST_LSU_BITS 4
|
||||
`define INST_LSU_FMT(op) op[2:0]
|
||||
`define INST_LSU_WSIZE(op) op[1:0]
|
||||
`define INST_LSU_IS_FENCE(op) (op[3:2] == 3)
|
||||
|
||||
`define GPU_TMC 3'h0
|
||||
`define GPU_WSPAWN 3'h1
|
||||
`define GPU_SPLIT 3'h2
|
||||
`define GPU_JOIN 3'h3
|
||||
`define GPU_BAR 3'h4
|
||||
`define GPU_OTHER 3'h7
|
||||
`define GPU_BITS 3
|
||||
`define GPU_OP(x) x[`GPU_BITS-1:0]
|
||||
`define INST_FENCE_BITS 1
|
||||
`define INST_FENCE_D 1'h0
|
||||
`define INST_FENCE_I 1'h1
|
||||
|
||||
`define INST_FPU_ADD 4'b0000 // SUB=fmt[1]
|
||||
`define INST_FPU_MUL 4'b0001
|
||||
`define INST_FPU_MADD 4'b0010 // SUB=fmt[1]
|
||||
`define INST_FPU_NMADD 4'b0011 // SUB=fmt[1]
|
||||
`define INST_FPU_DIV 4'b0100
|
||||
`define INST_FPU_SQRT 4'b0101
|
||||
`define INST_FPU_F2I 4'b1000 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
|
||||
`define INST_FPU_F2U 4'b1001 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
|
||||
`define INST_FPU_I2F 4'b1010 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
|
||||
`define INST_FPU_U2F 4'b1011 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
|
||||
`define INST_FPU_CMP 4'b1100 // frm: LE=0, LT=1, EQ=2
|
||||
`define INST_FPU_F2F 4'b1101 // fmt[0]: F32=0, F64=1
|
||||
`define INST_FPU_MISC 4'b1110 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
|
||||
`define INST_FPU_BITS 4
|
||||
`define INST_FPU_IS_CLASS(op, frm) (op == `INST_FPU_MISC && frm == 3)
|
||||
`define INST_FPU_IS_MVXW(op, frm) (op == `INST_FPU_MISC && frm == 4)
|
||||
|
||||
`define INST_SFU_TMC 4'h0
|
||||
`define INST_SFU_WSPAWN 4'h1
|
||||
`define INST_SFU_SPLIT 4'h2
|
||||
`define INST_SFU_JOIN 4'h3
|
||||
`define INST_SFU_BAR 4'h4
|
||||
`define INST_SFU_PRED 4'h5
|
||||
`define INST_SFU_CSRRW 4'h6
|
||||
`define INST_SFU_CSRRS 4'h7
|
||||
`define INST_SFU_CSRRC 4'h8
|
||||
`define INST_SFU_BITS 4
|
||||
`define INST_SFU_CSR(f3) (4'h6 + 4'(f3) - 4'h1)
|
||||
`define INST_SFU_IS_WCTL(op) (op <= 5)
|
||||
`define INST_SFU_IS_CSR(op) (op >= 6 && op <= 8)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
`define ISA_EXT_M (1 << 12)
|
||||
`else
|
||||
`define ISA_EXT_M 0
|
||||
`define ARB_SEL_BITS(I, O) ((I > O) ? `CLOG2(`CDIV(I, O)) : 0)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, mem_ports, uuid_width) \
|
||||
(uuid_width + `CLOG2(mshr_size) + `CLOG2(`CDIV(num_banks, mem_ports)))
|
||||
|
||||
`define CACHE_BYPASS_TAG_WIDTH(num_reqs, mem_ports, line_size, word_size, tag_width) \
|
||||
(`CLOG2(`CDIV(num_reqs, mem_ports)) + `CLOG2(line_size / word_size) + tag_width)
|
||||
|
||||
`define CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, mem_ports, line_size, word_size, tag_width, uuid_width) \
|
||||
(`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, mem_ports, uuid_width), `CACHE_BYPASS_TAG_WIDTH(num_reqs, mem_ports, line_size, word_size, tag_width)) + 1)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches) \
|
||||
(tag_width + `ARB_SEL_BITS(num_inputs, `UP(num_caches)))
|
||||
|
||||
`define CACHE_CLUSTER_MEM_ARB_TAG(tag_width, num_caches) \
|
||||
(tag_width + `ARB_SEL_BITS(`UP(num_caches), 1))
|
||||
|
||||
`define CACHE_CLUSTER_MEM_TAG_WIDTH(mshr_size, num_banks, mem_ports, num_caches, uuid_width) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, mem_ports, uuid_width), num_caches)
|
||||
|
||||
`define CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(num_reqs, mem_ports, line_size, word_size, tag_width, num_inputs, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_BYPASS_TAG_WIDTH(num_reqs, mem_ports, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches)
|
||||
|
||||
`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, mem_ports, line_size, word_size, tag_width, num_inputs, num_caches, uuid_width) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, mem_ports, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches), uuid_width), num_caches)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef ICACHE_ENABLE
|
||||
`define L1_ENABLE
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define ISA_EXT_F (1 << 5)
|
||||
`else
|
||||
`define ISA_EXT_F 0
|
||||
`ifdef DCACHE_ENABLE
|
||||
`define L1_ENABLE
|
||||
`endif
|
||||
|
||||
`define ISA_CODE (0 << 0) // A - Atomic Instructions extension \
|
||||
| (0 << 1) // B - Tentatively reserved for Bit operations extension \
|
||||
| (0 << 2) // C - Compressed extension \
|
||||
| (0 << 3) // D - Double precsision floating-point extension \
|
||||
| (0 << 4) // E - RV32E base ISA \
|
||||
|`ISA_EXT_F // F - Single precsision floating-point extension \
|
||||
| (0 << 6) // G - Additional standard extensions present \
|
||||
| (0 << 7) // H - Hypervisor mode implemented \
|
||||
| (1 << 8) // I - RV32I/64I/128I base ISA \
|
||||
| (0 << 9) // J - Reserved \
|
||||
| (0 << 10) // K - Reserved \
|
||||
| (0 << 11) // L - Tentatively reserved for Bit operations extension \
|
||||
|`ISA_EXT_M // M - Integer Multiply/Divide extension \
|
||||
| (0 << 13) // N - User level interrupts supported \
|
||||
| (0 << 14) // O - Reserved \
|
||||
| (0 << 15) // P - Tentatively reserved for Packed-SIMD extension \
|
||||
| (0 << 16) // Q - Quad-precision floating-point extension \
|
||||
| (0 << 17) // R - Reserved \
|
||||
| (0 << 18) // S - Supervisor mode implemented \
|
||||
| (0 << 19) // T - Tentatively reserved for Transactional Memory extension \
|
||||
| (1 << 20) // U - User mode implemented \
|
||||
| (0 << 21) // V - Tentatively reserved for Vector extension \
|
||||
| (0 << 22) // W - Reserved \
|
||||
| (1 << 23) // X - Non-standard extensions present \
|
||||
| (0 << 24) // Y - Reserved \
|
||||
| (0 << 25) // Z - Reserved
|
||||
`define MEM_REQ_FLAG_FLUSH 0
|
||||
`define MEM_REQ_FLAG_IO 1
|
||||
`define MEM_REQ_FLAG_LOCAL 2 // shoud be last since optional
|
||||
`define MEM_REQ_FLAGS_WIDTH (`MEM_REQ_FLAG_LOCAL + `LMEM_ENABLED)
|
||||
|
||||
`define VX_MEM_PORTS `L3_MEM_PORTS
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
|
||||
`define VX_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH - `CLOG2(`L3_LINE_SIZE))
|
||||
`define VX_MEM_DATA_WIDTH (`L3_LINE_SIZE * 8)
|
||||
`define VX_MEM_TAG_WIDTH L3_MEM_TAG_WIDTH
|
||||
|
||||
`define VX_DCR_ADDR_WIDTH `VX_DCR_ADDR_BITS
|
||||
`define VX_DCR_DATA_WIDTH 32
|
||||
|
||||
`define TO_FULL_ADDR(x) {x, (`MEM_ADDR_WIDTH-$bits(x))'(0)}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO // wid PC
|
||||
`define DBG_CACHE_REQ_MDATAW (`NW_BITS + 32)
|
||||
`else
|
||||
`define DBG_CACHE_REQ_MDATAW 0
|
||||
`endif
|
||||
`define NEG_EDGE(dst, src) \
|
||||
VX_edge_trigger #( \
|
||||
.POS (0), \
|
||||
.INIT (0) \
|
||||
) __neg_edge`__LINE__ ( \
|
||||
.clk (clk), \
|
||||
.reset (1'b0), \
|
||||
.data_in (src), \
|
||||
.data_out (dst) \
|
||||
)
|
||||
|
||||
// non-cacheable address bit
|
||||
`define NC_ADDR_BITS 1
|
||||
`define BUFFER_EX(dst, src, ena, resetw, latency) \
|
||||
VX_pipe_register #( \
|
||||
.DATAW ($bits(dst)), \
|
||||
.RESETW (resetw), \
|
||||
.DEPTH (latency) \
|
||||
) __buffer_ex`__LINE__ ( \
|
||||
.clk (clk), \
|
||||
.reset (reset), \
|
||||
.enable (ena), \
|
||||
.data_in (src), \
|
||||
.data_out (dst) \
|
||||
)
|
||||
|
||||
////////////////////////// Icache Configurable Knobs //////////////////////////
|
||||
`define BUFFER(dst, src) `BUFFER_EX(dst, src, 1'b1, $bits(dst), 1)
|
||||
|
||||
// Cache ID
|
||||
`define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
|
||||
`define POP_COUNT_EX(out, in, model) \
|
||||
VX_popcount #( \
|
||||
.N ($bits(in)), \
|
||||
.MODEL (model) \
|
||||
) __pop_count_ex`__LINE__ ( \
|
||||
.data_in (in), \
|
||||
.data_out (out) \
|
||||
)
|
||||
|
||||
// Block size in bytes
|
||||
`define ICACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `MEM_BLOCK_SIZE)
|
||||
`define POP_COUNT(out, in) `POP_COUNT_EX(out, in, 1)
|
||||
|
||||
// Word size in bytes
|
||||
`define IWORD_SIZE 4
|
||||
`define ASSIGN_VX_IF(dst, src) \
|
||||
assign dst.valid = src.valid; \
|
||||
assign dst.data = src.data; \
|
||||
assign src.ready = dst.ready
|
||||
|
||||
// Number of banks
|
||||
`define INUM_BANKS 1
|
||||
`define ASSIGN_VX_MEM_BUS_IF(dst, src) \
|
||||
assign dst.req_valid = src.req_valid; \
|
||||
assign dst.req_data = src.req_data; \
|
||||
assign src.req_ready = dst.req_ready; \
|
||||
assign src.rsp_valid = dst.rsp_valid; \
|
||||
assign src.rsp_data = dst.rsp_data; \
|
||||
assign dst.rsp_ready = src.rsp_ready
|
||||
|
||||
// Core request address bits
|
||||
`define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE))
|
||||
`define ASSIGN_VX_MEM_BUS_RO_IF(dst, src) \
|
||||
assign dst.req_valid = src.req_valid; \
|
||||
assign dst.req_data.rw = 0; \
|
||||
assign dst.req_data.addr = src.req_data.addr; \
|
||||
assign dst.req_data.data = '0; \
|
||||
assign dst.req_data.byteen = '1; \
|
||||
assign dst.req_data.flags = src.req_data.flags; \
|
||||
assign dst.req_data.tag = src.req_data.tag; \
|
||||
assign src.req_ready = dst.req_ready; \
|
||||
assign src.rsp_valid = dst.rsp_valid; \
|
||||
assign src.rsp_data.data = dst.rsp_data.data; \
|
||||
assign src.rsp_data.tag = dst.rsp_data.tag; \
|
||||
assign dst.rsp_ready = src.rsp_ready
|
||||
|
||||
// Core request byte enable bits
|
||||
`define ICORE_BYTEEN_WIDTH `DWORD_SIZE
|
||||
`define ASSIGN_VX_MEM_BUS_IF_EX(dst, src, TD, TS, UUID) \
|
||||
assign dst.req_valid = src.req_valid; \
|
||||
assign dst.req_data.rw = src.req_data.rw; \
|
||||
assign dst.req_data.addr = src.req_data.addr; \
|
||||
assign dst.req_data.data = src.req_data.data; \
|
||||
assign dst.req_data.byteen = src.req_data.byteen; \
|
||||
assign dst.req_data.flags = src.req_data.flags; \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (TD != TS) begin \
|
||||
if (UUID != 0) begin \
|
||||
if (TD > TS) begin \
|
||||
assign dst.req_data.tag = {src.req_data.tag.uuid, {(TD-TS){1'b0}}, src.req_data.tag.value}; \
|
||||
end else begin \
|
||||
assign dst.req_data.tag = {src.req_data.tag.uuid, src.req_data.tag.value[TD-UUID-1:0]}; \
|
||||
end \
|
||||
end else begin \
|
||||
if (TD > TS) begin \
|
||||
assign dst.req_data.tag = {{(TD-TS){1'b0}}, src.req_data.tag}; \
|
||||
end else begin \
|
||||
assign dst.req_data.tag = src.req_data.tag[TD-1:0]; \
|
||||
end \
|
||||
end \
|
||||
end else begin \
|
||||
assign dst.req_data.tag = src.req_data.tag; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */ \
|
||||
assign src.req_ready = dst.req_ready; \
|
||||
assign src.rsp_valid = dst.rsp_valid; \
|
||||
assign src.rsp_data.data = dst.rsp_data.data; \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (TD != TS) begin \
|
||||
if (UUID != 0) begin \
|
||||
if (TD > TS) begin \
|
||||
assign src.rsp_data.tag = {dst.rsp_data.tag.uuid, dst.rsp_data.tag.value[TS-UUID-1:0]}; \
|
||||
end else begin \
|
||||
assign src.rsp_data.tag = {dst.rsp_data.tag.uuid, {(TS-TD){1'b0}}, dst.rsp_data.tag.value}; \
|
||||
end \
|
||||
end else begin \
|
||||
if (TD > TS) begin \
|
||||
assign src.rsp_data.tag = dst.rsp_data.tag[TS-1:0]; \
|
||||
end else begin \
|
||||
assign src.rsp_data.tag = {{(TS-TD){1'b0}}, dst.rsp_data.tag}; \
|
||||
end \
|
||||
end \
|
||||
end else begin \
|
||||
assign src.rsp_data.tag = dst.rsp_data.tag; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */ \
|
||||
assign dst.rsp_ready = src.rsp_ready
|
||||
|
||||
// TAG sharing enable
|
||||
`define ICORE_TAG_ID_BITS `NW_BITS
|
||||
`define INIT_VX_MEM_BUS_IF(itf) \
|
||||
assign itf.req_valid = 0; \
|
||||
assign itf.req_data = '0; \
|
||||
`UNUSED_VAR (itf.req_ready) \
|
||||
`UNUSED_VAR (itf.rsp_valid) \
|
||||
`UNUSED_VAR (itf.rsp_data) \
|
||||
assign itf.rsp_ready = 0;
|
||||
|
||||
// Core request tag bits
|
||||
`define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
|
||||
`define UNUSED_VX_MEM_BUS_IF(itf) \
|
||||
`UNUSED_VAR (itf.req_valid) \
|
||||
`UNUSED_VAR (itf.req_data) \
|
||||
assign itf.req_ready = 0; \
|
||||
assign itf.rsp_valid = 0; \
|
||||
assign itf.rsp_data = '0; \
|
||||
`UNUSED_VAR (itf.rsp_ready)
|
||||
|
||||
// Memory request data bits
|
||||
`define IMEM_DATA_WIDTH (`ICACHE_LINE_SIZE * 8)
|
||||
|
||||
// Memory request address bits
|
||||
`define IMEM_ADDR_WIDTH (32 - `CLOG2(`ICACHE_LINE_SIZE))
|
||||
`define BUFFER_DCR_BUS_IF(dst, src, ena, latency) \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (latency != 0) begin \
|
||||
VX_pipe_register #( \
|
||||
.DATAW (1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH), \
|
||||
.DEPTH (latency) \
|
||||
) pipe_reg ( \
|
||||
.clk (clk), \
|
||||
.reset (1'b0), \
|
||||
.enable (1'b1), \
|
||||
.data_in ({src.write_valid && ena, src.write_addr, src.write_data}), \
|
||||
.data_out ({dst.write_valid, dst.write_addr, dst.write_data}) \
|
||||
); \
|
||||
end else begin \
|
||||
assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid && ena, src.write_addr, src.write_data}; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
// Memory byte enable bits
|
||||
`define IMEM_BYTEEN_WIDTH `ICACHE_LINE_SIZE
|
||||
`define PERF_COUNTER_ADD(dst, src, field, width, count, reg_enable) \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (count > 1) begin \
|
||||
wire [count-1:0][width-1:0] __reduce_add_i_field; \
|
||||
wire [width-1:0] __reduce_add_o_field; \
|
||||
for (genvar __i = 0; __i < count; ++__i) begin \
|
||||
assign __reduce_add_i_field[__i] = src[__i].``field; \
|
||||
end \
|
||||
VX_reduce_tree #(.DATAW_IN(width), .N(count), .OP("+")) __reduce_add_field ( \
|
||||
__reduce_add_i_field, \
|
||||
__reduce_add_o_field \
|
||||
); \
|
||||
if (reg_enable) begin \
|
||||
reg [width-1:0] __reduce_add_r_field; \
|
||||
always @(posedge clk) begin \
|
||||
if (reset) begin \
|
||||
__reduce_add_r_field <= '0; \
|
||||
end else begin \
|
||||
__reduce_add_r_field <= __reduce_add_o_field; \
|
||||
end \
|
||||
end \
|
||||
assign dst.``field = __reduce_add_r_field; \
|
||||
end else begin \
|
||||
assign dst.``field = __reduce_add_o_field; \
|
||||
end \
|
||||
end else begin \
|
||||
assign dst.``field = src[0].``field; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
// Memory request tag bits
|
||||
`define IMEM_TAG_WIDTH `IMEM_ADDR_WIDTH
|
||||
`define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (block_size != 1) begin \
|
||||
if (block_size != `NUM_WARPS) begin \
|
||||
assign dst = {src[`NW_WIDTH-1:`CLOG2(block_size)], `CLOG2(block_size)'(block_idx)}; \
|
||||
end else begin \
|
||||
assign dst = `NW_WIDTH'(block_idx); \
|
||||
end \
|
||||
end else begin \
|
||||
assign dst = src; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
////////////////////////// Dcache Configurable Knobs //////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
|
||||
|
||||
// Block size in bytes
|
||||
`define DCACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `MEM_BLOCK_SIZE)
|
||||
|
||||
// Word size in bytes
|
||||
`define DWORD_SIZE 4
|
||||
|
||||
// Core request address bits
|
||||
`define DCORE_ADDR_WIDTH (32-`CLOG2(`DWORD_SIZE))
|
||||
|
||||
// TAG sharing enable
|
||||
`define LSUQ_ADDR_BITS `LOG2UP(`LSUQ_SIZE)
|
||||
`define DCORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_ADDR_BITS + `SM_ENABLE)
|
||||
|
||||
// Input request tag bits
|
||||
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
|
||||
|
||||
// Memory request data bits
|
||||
`define DMEM_DATA_WIDTH (`DCACHE_LINE_SIZE * 8)
|
||||
|
||||
// Memory request address bits
|
||||
`define DMEM_ADDR_WIDTH (32 - `CLOG2(`DCACHE_LINE_SIZE))
|
||||
|
||||
// Memory byte enable bits
|
||||
`define DMEM_BYTEEN_WIDTH `DCACHE_LINE_SIZE
|
||||
|
||||
// Input request size
|
||||
`define DNUM_REQS `NUM_THREADS
|
||||
|
||||
// Memory request tag bits
|
||||
`define _DMEM_ADDR_RATIO_W $clog2(`DCACHE_LINE_SIZE / `DWORD_SIZE)
|
||||
`define _DNC_MEM_TAG_WIDTH ($clog2(`DNUM_REQS) + `_DMEM_ADDR_RATIO_W + `DCORE_TAG_WIDTH)
|
||||
`define DMEM_TAG_WIDTH `MAX((`DMEM_ADDR_WIDTH + `NC_ADDR_BITS), `_DNC_MEM_TAG_WIDTH)
|
||||
|
||||
////////////////////////// SM Configurable Knobs //////////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define SCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2)
|
||||
|
||||
// Word size in bytes
|
||||
`define SWORD_SIZE 4
|
||||
|
||||
// bank address offset
|
||||
`define SBANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SWORD_SIZE)
|
||||
|
||||
// Input request size
|
||||
`define SNUM_REQS `NUM_THREADS
|
||||
|
||||
////////////////////////// L2cache Configurable Knobs /////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define L2CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID)
|
||||
|
||||
// Block size in bytes
|
||||
`define L2CACHE_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
|
||||
// Word size in bytes
|
||||
`define L2WORD_SIZE `DCACHE_LINE_SIZE
|
||||
|
||||
// Input request tag bits
|
||||
`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
|
||||
|
||||
// Memory request data bits
|
||||
`define L2MEM_DATA_WIDTH (`L2CACHE_LINE_SIZE * 8)
|
||||
|
||||
// Memory request address bits
|
||||
`define L2MEM_ADDR_WIDTH (32 - `CLOG2(`L2CACHE_LINE_SIZE))
|
||||
|
||||
// Memory byte enable bits
|
||||
`define L2MEM_BYTEEN_WIDTH `L2CACHE_LINE_SIZE
|
||||
|
||||
// Input request size
|
||||
`define L2NUM_REQS `NUM_CORES
|
||||
|
||||
// Memory request tag bits
|
||||
`define _L2MEM_ADDR_RATIO_W $clog2(`L2CACHE_LINE_SIZE / `L2WORD_SIZE)
|
||||
`define _L2NC_MEM_TAG_WIDTH ($clog2(`L2NUM_REQS) + `_L2MEM_ADDR_RATIO_W + `XMEM_TAG_WIDTH)
|
||||
`define _L2MEM_TAG_WIDTH `MAX((`L2MEM_ADDR_WIDTH + `NC_ADDR_BITS), `_L2NC_MEM_TAG_WIDTH)
|
||||
`define L2MEM_TAG_WIDTH (`L2_ENABLE ? `_L2MEM_TAG_WIDTH : (`XMEM_TAG_WIDTH + `CLOG2(`L2NUM_REQS)))
|
||||
|
||||
////////////////////////// L3cache Configurable Knobs /////////////////////////
|
||||
|
||||
// Cache ID
|
||||
`define L3CACHE_ID 0
|
||||
|
||||
// Block size in bytes
|
||||
`define L3CACHE_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
|
||||
// Word size in bytes
|
||||
`define L3WORD_SIZE `L2CACHE_LINE_SIZE
|
||||
|
||||
// Input request tag bits
|
||||
`define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
|
||||
|
||||
// Memory request data bits
|
||||
`define L3MEM_DATA_WIDTH (`L3CACHE_LINE_SIZE * 8)
|
||||
|
||||
// Memory request address bits
|
||||
`define L3MEM_ADDR_WIDTH (32 - `CLOG2(`L3CACHE_LINE_SIZE))
|
||||
|
||||
// Memory byte enable bits
|
||||
`define L3MEM_BYTEEN_WIDTH `L3CACHE_LINE_SIZE
|
||||
|
||||
// Input request size
|
||||
`define L3NUM_REQS `NUM_CLUSTERS
|
||||
|
||||
// Memory request tag bits
|
||||
`define _L3MEM_ADDR_RATIO_W $clog2(`L3CACHE_LINE_SIZE / `L3WORD_SIZE)
|
||||
`define _L3NC_MEM_TAG_WIDTH ($clog2(`L3NUM_REQS) + `_L3MEM_ADDR_RATIO_W + `L2MEM_TAG_WIDTH)
|
||||
`define _L3MEM_TAG_WIDTH `MAX((`L3MEM_ADDR_WIDTH + `NC_ADDR_BITS), `_L3NC_MEM_TAG_WIDTH)
|
||||
`define L3MEM_TAG_WIDTH (`L3_ENABLE ? `_L3MEM_TAG_WIDTH : (`L2MEM_TAG_WIDTH + `CLOG2(`L3NUM_REQS)))
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3MEM_BYTEEN_WIDTH
|
||||
`define VX_MEM_ADDR_WIDTH `L3MEM_ADDR_WIDTH
|
||||
`define VX_MEM_DATA_WIDTH `L3MEM_DATA_WIDTH
|
||||
`define VX_MEM_TAG_WIDTH `L3MEM_TAG_WIDTH
|
||||
`define VX_CORE_TAG_WIDTH `L3CORE_TAG_WIDTH
|
||||
`define VX_CSR_ID_WIDTH `LOG2UP(`NUM_CLUSTERS * `NUM_CORES)
|
||||
|
||||
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
|
||||
|
||||
// Merged D-cache/I-cache memory tag
|
||||
`define XMEM_TAG_WIDTH (`DMEM_TAG_WIDTH + `CLOG2(2))
|
||||
|
||||
`include "VX_types.vh"
|
||||
|
||||
`endif
|
||||
`endif // VX_DEFINE_VH
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue