Compare commits

...

1184 commits
v1.x ... master

Author SHA1 Message Date
tinebp
332e8eeaf9
Merge pull request #244 from vortexgpgpu/bug_fixes
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
Bug fixes
2025-04-13 20:48:36 -07:00
tinebp
5dbfcecc21 minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-04-13 17:28:33 -07:00
tinebp
f19335023f CI migration to ubuntu 22.04 2025-04-13 14:16:05 -07:00
tinebp
6a7e402ab4
Merge pull request #239 from vortexgpgpu/bug_fixes
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
regression fix
2025-03-17 04:33:46 -07:00
tinebp
18687d53b3
Merge branch 'master' into bug_fixes
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-03-17 04:33:12 -07:00
tinebp
a35fb4bf1d regression fix 2025-03-17 04:30:50 -07:00
tinebp
9929c42417
Merge pull request #238 from vortexgpgpu/bug_fixes
workaroud fix for opencl kernel include in POCL
2025-03-17 04:07:52 -07:00
tinebp
06e5e2e859 workaroud fix for opencl kernel include in POCL 2025-03-17 04:04:07 -07:00
tinebp
09e89791e5
Merge pull request #237 from vortexgpgpu/bug_fixes
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
CI versioning
2025-03-12 20:21:51 -07:00
tinebp
b35f69f486 CI versioning
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-03-12 17:38:09 -07:00
tinebp
63b41f21c6 xrt sandbox simulation
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-02-12 00:24:36 -08:00
tinebp
cc7fdf2fbd fixed github actions versioning 2025-02-11 22:03:32 -08:00
tinebp
a9352a3b64 minor update 2025-02-11 21:56:05 -08:00
tinebp
9a2709db08 xrt sandbox synthesis build fix
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2025-02-11 14:10:29 -08:00
tinebp
4785736e4d minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-01-26 22:55:22 -08:00
tinebp
38861d9aaf minor updates 2025-01-26 22:40:34 -08:00
tinebp
82b0eeded6 minor update 2025-01-26 19:35:56 -08:00
tinebp
22398c991d ramulator memory addressing bug fix + platform memory refactoring
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2025-01-26 06:28:51 -08:00
tinebp
e80ee2c819 minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-01-22 04:56:49 -08:00
tinebp
9dc1d3f688 Merge branch 'bug_fixes' 2025-01-22 02:49:55 -08:00
tinebp
0c1bc17c09
Merge pull request #222 from MichaelJSr/simx-vpu-toggle
Toggle the RISC-V Vector Extension on and off
2025-01-22 02:48:38 -08:00
tinebp
4e83c28d04 minor bug fix 2025-01-21 23:07:41 -08:00
tinebp
2c940cf509 AXI adapter bug fix
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2025-01-21 06:14:22 -08:00
tinebp
fb4527fe95 cache repl reset
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2025-01-21 01:06:22 -08:00
tinebp
d1f37fc629 minor update 2025-01-20 22:19:42 -08:00
tinebp
001a107395 bram reset bug fix 2025-01-20 22:16:05 -08:00
tinebp
fce24b9535 fixed XRT AFU done handshake
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-01-17 23:58:23 -08:00
MichaelJSr
6d27575db3 Revert some of "Added ifndef statements for the vector extension anywhere they didn't exist already" 2025-01-14 21:56:39 -08:00
MichaelJSr
a2cfeffcfe Added ifndef statements for the vector extension anywhere they didn't exist already
Added ifndef statements for the vector extension anywhere they didn't exist already

more ifdef statements

more ifdef

Update decode.cpp

Update decode.cpp

Update decode.cpp
2025-01-14 21:29:47 -08:00
MichaelJSr
cb491ddb53 test
Revert "test"

This reverts commit 393e347c2faba260f1469667596e22dc2aa16553.
2025-01-14 21:22:28 -08:00
tinebp
43b143bba6 bug fixes
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-01-14 03:44:53 -08:00
tinebp
87297e0eca bug fixes 2025-01-14 02:21:17 -08:00
MichaelJSr
929ef1b6e2 Remove unused EXTV code, clean up code, pragma once around vpu.h 2025-01-13 16:45:13 -08:00
tinebp
83ba1cc3dc minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-01-11 20:23:26 -08:00
tinebp
347889c504 minor updates
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2025-01-11 03:24:06 -08:00
tinebp
083cf04afd timing optimizations 2025-01-11 03:19:55 -08:00
tinebp
84b1c8a43c BRAM optimizations 2025-01-11 03:18:11 -08:00
tinebp
43d33b942e minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-12-26 11:32:57 -08:00
tinebp
adf60e7e35 minor update 2024-12-26 10:58:13 -08:00
tinebp
8fda922570 minor update
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-12-26 10:20:57 -08:00
tinebp
53900bee4f bug fixes 2024-12-26 10:01:36 -08:00
tinebp
704f525fd6 memory mem_coalescer miss perf counter
RTL perf counters refactoring
2024-12-26 08:00:36 -08:00
tinebp
f478bdcf25 memory coalescer misses perf counter
rtl perf interface refactoring
2024-12-26 07:56:28 -08:00
tinebp
01974e124f
Merge pull request #216 from sij814/simx2
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
Simx ICache DCache Changes
2024-12-18 03:17:14 -08:00
tinebp
100e4e3970 multi-ports fixes
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-12-17 22:38:23 -08:00
tinebp
4819891a5e minor update 2024-12-17 18:06:52 -08:00
tinebp
066ab105eb multiports fixes 2024-12-17 16:23:08 -08:00
tinebp
a98d2e24e5 rtlsim multibanks
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-12-16 22:10:57 -08:00
sij814
572a397018 changed versions 2024-12-15 15:11:13 -08:00
sij814
cad129c64c added icache dcache overlap 2024-12-15 14:55:21 -08:00
tinebp
bae24e589c minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-12-14 02:04:50 -08:00
tinebp
461f2cbbc9 Intel Opae AFU support for multiport
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-12-13 21:20:38 -08:00
tinebp
7975a5a38c fixed AXI adapter 2024-12-12 20:52:45 -08:00
tinebp
f635d71ba4 minor fix
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-12-11 10:31:03 -08:00
tinebp
70ade222b1 multiport 2024-12-10 23:25:05 -08:00
tinebp
aa6a47eb11 minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-12-05 23:35:15 -08:00
tinebp
115ff2b599 minor fixes 2024-12-05 22:38:04 -08:00
tinebp
896c59306c adding clang-format file 2024-12-05 15:58:04 -08:00
tinebp
6bbcd4ebaf vector updates with clang formatting 2024-12-05 15:55:57 -08:00
tinebp
6b23d290c3 vector ISA updates 2024-12-05 14:43:51 -08:00
tinebp
5d91fe58ad
Merge pull request #211 from MichaelJSr/riscv-vector-isa-simx-clean
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
Vector extension simx with fallbacks for testcases and clean history
2024-12-05 10:18:45 -08:00
tinebp
5891a1e592
Merge branch 'master' into riscv-vector-isa-simx-clean 2024-12-05 10:17:05 -08:00
tinebp
18ae57cc7f Merge branch 'bug_fixes'
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-12-04 22:20:52 -08:00
tinebp
a760d909cb minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-12-04 21:36:31 -08:00
tinebp
86f20b27dd SimX multi-ports memory fixes 2024-12-04 21:11:51 -08:00
tinebp
3ace9bbeda minor updates
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-12-04 06:00:19 -08:00
tinebp
30b0daf050 SimX multiports support fixes
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-12-03 05:46:33 -08:00
tinebp
24ca4f03aa minor update
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-12-02 19:53:28 -08:00
tinebp
3b454efd56 fixes to SimX's multiports memory support 2024-12-02 17:51:42 -08:00
MichaelJSr
951746badc Commented out some vector testcases that dont pass 2024-11-28 05:13:56 -08:00
MichaelJSr
6c2cbdfec2 made -v a valid option for simx simulator 2024-11-28 02:12:01 -08:00
MichaelJSr
973fcd7845 Merge branch 'riscv-vector-isa-simx-clean' of https://github.com/MichaelJSr/vortex into riscv-vector-isa-simx-clean 2024-11-27 23:53:57 -08:00
MichaelJSr
5eecd0e987 Added case for vector-test due to different exitcode
The vector tests need the cluster exitcodes
2024-11-27 23:50:57 -08:00
MichaelJSr
073e0ddd10 Adds the riscv vector extension into simx
Added vector regression test to ci.yml
2024-11-27 23:22:22 -08:00
MichaelJSr
c05a0571c8 Added vector regression test to ci.yml 2024-11-27 13:10:08 -08:00
MichaelJSr
1e4583ac17 Adds the riscv vector extension into simx 2024-11-26 18:41:01 -08:00
tinebp
3e4bbfc9f0 minor update 2024-11-22 11:12:17 -08:00
tinebp
7c4ce74801 memory unit timing optimization 2024-11-21 16:48:41 -08:00
tinebp
18bf49d1e0 minor update 2024-11-21 16:48:18 -08:00
tinebp
180735c531 fifoqueue area optimization 2024-11-21 16:47:00 -08:00
tinebp
8d8769c710 stream_buffer area optimization
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-11-20 19:15:51 -08:00
tinebp
b0c48e7a46 stream buffer area optimization 2024-11-20 18:27:52 -08:00
tinebp
320c090613 xilinx asynchronous bram patch fixes
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-11-19 01:57:33 -08:00
tinebp
b48b605b51 remove deprecared yosys link 2024-11-15 03:42:06 -08:00
tinebp
8230b37411 fixed opae build bug
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-11-14 11:42:21 -08:00
tinebp
5844de8c4d Merge branch 'rtl_cache'
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-11-13 22:27:11 -08:00
tinebp
dfc7b6178c cleanup old cache test
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-11-13 20:56:06 -08:00
tinebp
bffc6d9610 enabling Vivado's asynchronous bram suppot via direct netlist transformation
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-11-13 16:20:25 -08:00
Hyesoon Kim
6dbbc62b04
Merge pull request #200 from Udit8348/develop-docker-micro
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
Develop Docker Micro
2024-11-09 10:45:16 -05:00
Udit Subramanya
667fa1662d update docker for micro apptainer 2024-11-01 14:46:38 -04:00
Udit Subramanya
e73e1c2bb3 update xilinx fpga steps with environment variable steps 2024-11-01 13:56:01 -04:00
Udit Subramanya
27f3d6dde6 Merge remote-tracking branch 'origin/master' into develop-documentation 2024-10-25 13:16:28 -04:00
Udit Subramanya
d475e9d201 remove duplicate block 2024-10-25 12:59:24 -04:00
Blaise Tine
ce510d78c7 minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-10-24 05:02:46 -07:00
Blaise Tine
eecff10dea minor update
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-10-24 02:51:08 -07:00
Blaise Tine
98b58606e5 merge fixes
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-10-24 02:18:00 -07:00
Blaise Tine
8b172d07ec revert xilinx's asynchronous bram workaround 2024-10-24 01:44:55 -07:00
Hyesoon Kim
f68cc95cbe
Merge branch 'master' into develop-documentation 2024-10-23 19:41:29 -04:00
Hyesoon Kim
659ad87f93
Merge pull request #188 from Udit8348/develop-docker
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
Vortex Dockerfiles
2024-10-23 19:41:06 -04:00
Blaise Tine
22ade31fd5 minor updates 2024-10-23 15:55:11 -07:00
Hyesoon Kim
2d3f4b6efc
Merge branch 'master' into develop-docker 2024-10-23 18:08:19 -04:00
Blaise Tine
cc5ac8388b minor update
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-10-23 14:03:19 -07:00
Blaise Tine
ec12b50007 minor udpate 2024-10-23 13:09:34 -07:00
Blaise Tine
e7d09feb4a decode => demux 2024-10-23 13:06:45 -07:00
Blaise Tine
7ab58111d8 minor update 2024-10-23 12:30:39 -07:00
Blaise Tine
1c384c096d minor update 2024-10-23 12:27:44 -07:00
Udit Subramanya
24d018b4c9 documentation updates 2024-10-23 05:18:53 -04:00
Blaise Tine
1fa4603fa2 disable sformatf during synthesis
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-10-23 01:14:19 -07:00
Blaise Tine
3a3bb7b70a cleanup deleted files
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-10-21 22:46:04 -07:00
Blaise Tine
ff50306833 minor update 2024-10-21 22:24:54 -07:00
Udit Subramanya
519023fb2b add citation for MICRO 21 paper 2024-10-21 15:39:10 -04:00
Udit Subramanya
8fdca0e52a correct vitis env 2024-10-21 15:38:53 -04:00
Udit Subramanya
f184b57c24 merge upstream and resolve deleted file conflict 2024-10-21 13:45:32 -04:00
Udit Subramanya
d584e7bac1 intermediate docs update 2024-10-21 13:28:57 -04:00
Blaise Tine
2b3d1f0860 minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-10-20 23:54:42 -07:00
Blaise Tine
fccbadfe25 minor update 2024-10-20 23:32:22 -07:00
Blaise Tine
1e4f0fa0bd minor update 2024-10-20 21:42:02 -07:00
Blaise Tine
22c3828bf5 minor update 2024-10-20 21:12:49 -07:00
Blaise Tine
acc1e3dfd8 minor update 2024-10-20 20:07:34 -07:00
Blaise Tine
0f380a3d78 minor update
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-10-20 07:49:27 -07:00
Blaise Tine
9373e21950 minor update 2024-10-20 07:32:32 -07:00
Blaise Tine
2bd22253eb minor update
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-10-19 22:14:38 -07:00
Blaise Tine
4206ffdb80 minor update 2024-10-19 21:39:34 -07:00
Blaise Tine
b6bd6467ef cache hit timing optimization 2024-10-19 20:04:51 -07:00
Blaise Tine
8f29ad58ae block ram redesign to support synthesizable write-first mode
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-10-18 23:54:20 -07:00
Blaise Tine
6b1091e08f minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-10-17 14:07:22 -07:00
Blaise Tine
91fee5da11 minor update 2024-10-17 11:25:17 -07:00
Blaise Tine
077b682d7d minor update
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-10-17 04:58:29 -07:00
Blaise Tine
5971158f43 minor update
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-10-16 20:22:42 -07:00
Blaise Tine
a7ba377581 minor update 2024-10-16 18:04:11 -07:00
Blaise Tine
f695e4d754 minor update 2024-10-15 14:59:31 -07:00
Blaise Tine
e06333b3c0 minor update 2024-10-15 11:28:33 -07:00
Blaise Tine
645befdce6 minor update 2024-10-15 11:23:29 -07:00
Blaise Tine
e62b638d88 minor update 2024-10-15 10:36:05 -07:00
Blaise Tine
1d5e4f63dd minor update 2024-10-15 03:24:02 -07:00
Blaise Tine
68b78fc42f minor update 2024-10-15 02:32:17 -07:00
Blaise Tine
db98965f56 minor update 2024-10-15 02:27:07 -07:00
Blaise Tine
03a1e25828 adding cache replacement policy 2024-10-15 00:28:09 -07:00
tinebp
5d7e53f7d7
Merge pull request #194 from MichaelJSr/add-back-ecall-ebreak-traps
Add back the "ecall" and "ebreak" instruction traps for riscv-vector test functionality
2024-10-14 20:46:29 -07:00
Blaise Tine
37757fab8f fixed fifo_queue support for BRAM 2024-10-14 15:48:49 -07:00
MichaelJSr
0d04423074 Readded the ecall and ebreak instruction traps so that the riscv-vector tests run properly 2024-10-14 10:12:33 -07:00
Blaise Tine
fe5442dbb3 minor update 2024-10-13 23:34:57 -07:00
Blaise Tine
2a2fc2ae39 minor update 2024-10-13 23:25:41 -07:00
Blaise Tine
26df675e24 minor update 2024-10-13 20:08:38 -07:00
Blaise Tine
f63233334e minor update 2024-10-13 16:22:59 -07:00
Blaise Tine
9e5638c9b0 minor update 2024-10-13 12:06:55 -07:00
Blaise Tine
1d626588ef minor update 2024-10-13 11:49:12 -07:00
Blaise Tine
37f4d05393 minor update 2024-10-13 10:44:04 -07:00
Blaise Tine
9f32e5693c minor update 2024-10-13 10:41:32 -07:00
Blaise Tine
684f2e2d3d minor update 2024-10-13 03:42:51 -07:00
Blaise Tine
28bf27e951 rtl cache redesign to support xilinx bram types 2024-10-13 03:40:45 -07:00
Udit Subramanya
8155173aab add documentation based on intial feedback 2024-10-11 07:40:21 -07:00
Udit Subramanya
d3df61abb0 add initial development and production dockerfiles 2024-10-09 12:32:49 -04:00
Blaise Tine
a5381fd788 async bram optimization 2024-10-09 04:14:15 -07:00
Blaise Tine
f49084b298 improving block rams inference with registered read address. 2024-10-08 23:44:36 -07:00
Blaise Tine
ee96d4334b VX_onehot_encoder update 2024-10-08 23:01:01 -07:00
Blaise Tine
c91f9684fc minor update 2024-10-05 18:35:26 -07:00
Blaise Tine
07ce16e75c minor update 2024-10-05 17:42:26 -07:00
Blaise Tine
2eeb2ac532 fixed memory flags propagation through the cache hierarchy 2024-10-05 13:46:10 -07:00
Hyesoon Kim
91c135ac15
Merge pull request #185 from vortexgpgpu/tensor-core
Merge tensor-core and devel branch into master
2024-10-05 10:46:50 -04:00
jaewon-lee-github
faa3b9a469 Merge branch 'master' into tensor-core 2024-10-04 12:58:51 -04:00
Hyesoon Kim
847562be9e
Merge pull request #187 from vortexgpgpu/revert-181-master
Revert "Initial HBM changes for RTL"
2024-10-04 11:37:06 -04:00
Jaewon Lee
0bf79a0f05
Revert "Initial HBM changes for RTL" 2024-10-04 10:13:31 -04:00
Jaewon Lee
119805a959
Merge branch 'master' into tensor-core 2024-10-04 10:03:00 -04:00
tinebp
bc765d10bd
Merge pull request #181 from sij814/master
Initial HBM changes for RTL
2024-10-04 06:45:44 -07:00
Udit Subramanya
208c5b3804 reorg docs 2024-10-04 08:56:49 -04:00
Udit Subramanya
32b0376b28 remove old artifacts 2024-10-03 17:43:39 -04:00
Udit Subramanya
6a447350b7 remove redundant docs after consolidating 2024-10-03 17:42:47 -04:00
Udit Subramanya
dd16d70515 contributing and fpga docs 2024-10-03 17:29:21 -04:00
jaewon-lee-github
5cf6797bd3 - Change STARTUP_ADDR to use the same 0x80000000 address
- Fix environment variable for vortex kernel directories
2024-10-03 15:19:39 -04:00
jaewon-lee-github
bbc02cc013 merged with master 2024-10-03 13:44:39 -04:00
jaewon-lee-github
b7531c9de1 support 64bit 2024-10-02 17:46:01 -04:00
Jaewon Lee
6c725978b4
Merge pull request #184 from vortexgpgpu/develop
Develop
2024-10-02 15:41:35 -04:00
jaewon-lee-github
d1175a03c9 update the code accessing registers in obsoleted way 2024-10-02 14:16:57 -04:00
Blaise Tine
83badaac86 minor update 2024-10-02 11:10:33 -07:00
Blaise Tine
4b8ca42e85 minor update 2024-10-02 09:27:26 -07:00
Blaise Tine
ad7377c8ba minor udpate 2024-10-02 07:41:29 -07:00
Blaise Tine
5cb033ae13 minor update 2024-10-02 07:12:30 -07:00
Blaise Tine
44ebc12ed4 minor update 2024-10-01 00:55:45 -07:00
Blaise Tine
a3aca502b7 minor update 2024-09-30 14:20:48 -07:00
Jaewon Lee
4a606061d2
Merge branch 'develop' into tensor-core 2024-09-30 16:48:47 -04:00
Blaise Tine
ee69024841 minor update 2024-09-30 09:17:42 -07:00
Blaise Tine
6f81df5edb axi_adapter large tags support 2024-09-30 06:25:50 -07:00
Blaise Tine
1deb13c469 minor update 2024-09-30 03:36:00 -07:00
Blaise Tine
2d00cec9d3 minor update 2024-09-30 02:12:30 -07:00
Blaise Tine
a3031922ce minor update 2024-09-29 09:07:45 -07:00
Blaise Tine
60860ec684 minor update 2024-09-29 09:03:24 -07:00
Blaise Tine
cf3909a910 minor update 2024-09-29 07:52:53 -07:00
Blaise Tine
5c694a997c update scope tap testing 2024-09-29 00:09:25 -07:00
Blaise Tine
30571d716c updated scope CI test 2024-09-28 21:37:48 -07:00
Blaise Tine
b8475c65dc adjusting platform caps 2024-09-28 21:25:55 -07:00
Blaise Tine
4329e3f968 minor update 2024-09-28 20:28:57 -07:00
Blaise Tine
b634f9f47d count_leading_zeros fix 2024-09-28 20:15:03 -07:00
Blaise Tine
87e613d29d fixed XRT AFU deadlock on exit 2024-09-28 05:20:37 -07:00
Blaise Tine
eee037ffcd minor update 2024-09-27 20:59:29 -07:00
Blaise Tine
9027555e6a minor update 2024-09-27 20:30:57 -07:00
Blaise Tine
989341a77d minor udpate 2024-09-27 15:13:42 -07:00
Blaise Tine
ec8cc4c84d minor update 2024-09-27 14:21:09 -07:00
Blaise Tine
6e40162027 extending scope triggering to capture continous firing events 2024-09-27 11:36:31 -07:00
Blaise Tine
f2c970868e minor update 2024-09-27 10:02:59 -07:00
Blaise Tine
533ddffc47 cleanup multi-dimensional array to improve synthesis compatibility 2024-09-27 09:48:05 -07:00
Blaise Tine
e9f19a0bf9 fixed BRAM multi-dimensional array bug on Xilinx Vivado 2024-09-27 09:13:24 -07:00
Blaise Tine
5db1937a5e fixed scope parser array indexing 2024-09-27 07:52:38 -07:00
Blaise Tine
9a3eb74051 adding scope.py support for structs 2024-09-26 09:50:38 -07:00
Blaise Tine
27543e240e minor update 2024-09-25 19:11:40 -07:00
Blaise Tine
4f11278d2c scope_tap bug fixes and improvements 2024-09-25 10:28:19 -07:00
Blaise Tine
0e3206747a scope_tap bug fix 2024-09-24 21:46:26 -07:00
Blaise Tine
ce4f90e843 scope analyzer updates 2024-09-24 01:20:26 -07:00
Blaise Tine
a9a5ded030 bitmanip logceil fix 2024-09-23 23:54:43 -07:00
Hyesoon Kim
b5f541b891
Merge pull request #180 from vortexgpgpu/vortex_vm
Vortex vm
2024-09-24 02:48:46 -04:00
Blaise Tine
2cf483ddf5 xrt afu bug fixes 2024-09-23 21:01:24 -07:00
Blaise Tine
9a6dbdf1a9 xrtsim addressing fix 2024-09-23 08:56:57 -07:00
Blaise Tine
818522f7e4 CI scripts update 2024-09-23 05:57:08 -07:00
Blaise Tine
030071571d test memory bank interleaving 2024-09-23 04:30:28 -07:00
Blaise Tine
e5e9a5c2e9 build fix 2024-09-23 04:03:04 -07:00
Blaise Tine
406583c0bd build fix 2024-09-23 04:00:23 -07:00
Blaise Tine
29ea3041c4 build fix 2024-09-23 03:52:03 -07:00
Blaise Tine
828b8827e7 build error fix 2024-09-23 03:36:35 -07:00
Blaise Tine
a80be895ba fixed compiler errors 2024-09-23 03:05:46 -07:00
Blaise Tine
923d2bb94c mark as executable 2024-09-23 02:30:34 -07:00
Blaise Tine
e38c2c1fba xilinx xrt platforms configuration 2024-09-23 02:12:47 -07:00
Blaise Tine
8bb5e5ab8a build error fix 2024-09-22 22:47:23 -07:00
Blaise Tine
b146fab290 xrt kernel registers update 2024-09-22 22:46:55 -07:00
Blaise Tine
15ead4acf6 xrt with merge memory interface 2024-09-22 22:46:10 -07:00
Blaise Tine
f5eca75311 handling synthesis builds with simulation enabled (e.g xrt with hw_emu) 2024-09-22 22:43:48 -07:00
Blaise Tine
5e123d0507 minor update 2024-09-22 22:31:54 -07:00
Blaise Tine
54f0c8e270 scope analyzer optimization 2024-09-22 22:31:14 -07:00
Blaise Tine
b8199decf4 opaesim and xrtsim multi-bank memory support 2024-09-22 03:54:40 -07:00
Blaise Tine
00feb8b424 scope analyzer bug fixes 2024-09-21 08:39:20 -07:00
Blaise Tine
7938c7be5f synthesis updates 2024-09-20 20:35:58 -07:00
sij814
3bac7eae6a changed fpnew commit 2024-09-20 16:52:12 -07:00
Blaise Tine
a61f97f6c6 minor update 2024-09-20 08:09:46 -07:00
Jaewon Lee
5ab13559e0
Update README.md 2024-09-20 10:08:53 -04:00
jaewon-lee-github
4383631543 Add BARE mode test and print out VM info 2024-09-20 09:58:50 -04:00
jaewon-lee-github
9cc0010835 change verilator path 2024-09-20 09:19:17 -04:00
jaewon-lee-github
9902856221 VERILATOR 2024-09-20 09:05:54 -04:00
jaewon-lee-github
e5f2442353 Update Virtual Memory testing 2024-09-20 08:58:11 -04:00
Udit Subramanya
ff9d52c162 Merge remote-tracking branch 'upstream/master' into develop-documentation 2024-09-20 08:26:08 -04:00
Udit Subramanya
acc8221a7e Merge remote-tracking branch 'origin/master' into develop-documentation
Bring in latest docs, to update in this branch
2024-09-20 08:20:17 -04:00
Blaise Tine
63cce35c1a scope taps annotation 2024-09-19 23:33:23 -07:00
Blaise Tine
d2db612bb4 adding scope support to xrtsim 2024-09-19 22:33:28 -07:00
sij814
e8ce3878bb Merge branch 'master' of github.com:vortexgpgpu/vortex 2024-09-19 13:36:46 -07:00
sij814
380c36d930 merged rtlsim branch 2024-09-19 13:31:25 -07:00
sij814
4fff940e42 two different versions of bypass connection 2024-09-19 13:21:14 -07:00
Blaise Tine
2d7f9eae0a minor update 2024-09-19 04:44:00 -07:00
Blaise Tine
a37309c6b0 xrtsim implementation 2024-09-19 04:24:20 -07:00
sij814
48f86a48f6 changed mem_req_arb in VX_cache_l3.sv to accept data_out 2024-09-18 22:05:40 -07:00
Blaise Tine
f0bff2a4a2 minor update 2024-09-17 20:31:12 -07:00
Blaise Tine
8e3bd5696b xilinx synthesis debugging fixes 2024-09-17 19:52:51 -07:00
sij814
992f8d97d3 sliced the bypass requests 2024-09-17 19:47:13 -07:00
Blaise Tine
8908f3e006 minor update 2024-09-17 10:05:17 -07:00
Blaise Tine
f2c1ad7831 minor update 2024-09-17 09:56:54 -07:00
Blaise Tine
8135f72cc9 configure update 2024-09-17 06:45:22 -07:00
Blaise Tine
50458bbae0 xilinx synthesis debugging foxes 2024-09-17 06:22:07 -07:00
Jaewon Lee
5a2d4e6c26
Merge pull request #179 from vortexgpgpu/jaewon-lee-github-patch-2
Update README.md
2024-09-13 10:50:03 -04:00
Jaewon Lee
0a48d98bc1
Update README.md
It has the instruction about the other branch(Vortex_vm).
2024-09-13 09:39:28 -04:00
Udit Subramanya
dc76101068 contribution stats 2024-09-13 09:09:38 -04:00
Blaise Tine
bbe9c0372f minor update 2024-09-13 00:35:42 -07:00
Blaise Tine
263893eb7c minor update 2024-09-13 00:03:08 -07:00
Blaise Tine
b77fff764e minor update 2024-09-12 22:12:03 -07:00
Blaise Tine
145eacc451 minor update 2024-09-12 21:08:19 -07:00
Blaise Tine
1ddd1ba1cc minor update 2024-09-12 20:15:41 -07:00
Blaise Tine
49ed88e59f minor update 2024-09-12 20:12:18 -07:00
Blaise Tine
7208f251b7 minor update 2024-09-12 20:07:19 -07:00
Blaise Tine
6cf0d9f7b4 fixed generate labels lint warnings to improve hardware debugging 2024-09-12 20:00:50 -07:00
Hyesoon Kim
ccf0135d97
Merge pull request #178 from vortexgpgpu/vortex_vm
Vortex Virtual Memory Support
2024-09-12 14:12:04 -04:00
jaewon-lee-github
daec55ae95 change the ci version 2024-09-12 11:24:37 -04:00
Jaewon Lee
e91eb4aed4 merge from master branch 2024-09-12 10:32:02 -04:00
Blaise Tine
5c72685356 minor update 2024-09-11 17:27:36 -07:00
Blaise Tine
f00f96377b disable tracing on synthesis mode 2024-09-11 17:16:34 -07:00
Blaise Tine
230b29de6f minor update 2024-09-11 06:57:43 -07:00
Blaise Tine
bb9ae8576d adding uuid support to memory transactions 2024-09-11 06:47:33 -07:00
Blaise Tine
ae24264a2a minor update 2024-09-11 05:40:05 -07:00
Blaise Tine
83d65e2cf1 tracing update 2024-09-10 16:22:34 -07:00
Blaise Tine
63840a20da minor update 2024-09-09 06:10:56 -07:00
Blaise Tine
b56aa00f4f reset cleanup 2024-09-08 20:37:28 -07:00
Blaise Tine
202af1e783 rtl bug fix 2024-09-08 20:33:27 -07:00
Blaise Tine
207840a97e minor update 2024-09-08 17:49:28 -07:00
Blaise Tine
b1dc2fba42 cache read byteenable bug fix 2024-09-08 17:47:17 -07:00
Blaise Tine
cc105eaea9 tracing refactoring 2024-09-08 14:54:04 -07:00
Blaise Tine
fa11d4c502 TRACING refactoring to support vivado/quartus simulators 2024-09-08 05:26:00 -07:00
Blaise Tine
6626f9201c minor update 2024-09-08 02:46:32 -07:00
Blaise Tine
7823f5529c minor update 2024-09-08 01:38:48 -07:00
Blaise Tine
7bef62aef8 minor update 2024-09-08 01:37:20 -07:00
Blaise Tine
1a35d3fed1 fixed byteen signal on memory read 2024-09-07 21:33:45 -07:00
Blaise Tine
0cbdc3be9e opae afu x warning fixes 2024-09-07 21:32:11 -07:00
Blaise Tine
aa1489d8eb fixed trace.vcd copy 2024-09-07 03:45:23 -07:00
Blaise Tine
a75ed78bf2 fixed getopt exitcode with invalid parameters 2024-09-07 03:42:46 -07:00
Blaise Tine
2041a4ad4a xrt.ini update 2024-09-07 01:43:30 -07:00
Blaise Tine
bfbe642170 adding RTL uuigen 2024-09-07 01:36:17 -07:00
Blaise Tine
fdc62c5f98 minor update 2024-09-06 01:27:54 -07:00
Blaise Tine
e178eb1330 operands's x-propagation bug fix (caught using vivado simulator) 2024-09-05 21:35:10 -07:00
Blaise Tine
7cbb026a12 minor update 2024-09-05 21:34:44 -07:00
Blaise Tine
efc8834c75 xilinx afu reset refactoring 2024-09-05 21:32:25 -07:00
Blaise Tine
8db77ea1cd minor updates 2024-09-05 21:29:01 -07:00
Blaise Tine
cf9172b8fc minor update 2024-09-04 20:16:54 -07:00
Blaise Tine
fb0cd1c272 minor update 2024-09-04 18:24:42 -07:00
Blaise Tine
0aaca84016 minor update 2024-09-04 18:22:37 -07:00
Blaise Tine
8d1baf677d minor update 2024-09-04 18:17:27 -07:00
Blaise Tine
37555b1208 minor update 2024-09-04 15:18:39 -07:00
Blaise Tine
96fb3566a9 minor update 2024-09-04 13:44:23 -07:00
Blaise Tine
7ca9a5e87e reset relay refactory 2024-09-04 13:39:51 -07:00
Blaise Tine
039e5e2ffc minor update 2024-09-04 03:52:55 -07:00
Blaise Tine
32738e0b74 CI script update 2024-09-04 03:39:29 -07:00
Blaise Tine
fd5903fef1 minor update 2024-09-04 03:34:25 -07:00
Blaise Tine
335b53475a minor updates 2024-09-04 02:01:59 -07:00
Blaise Tine
f9230bdac3 minor update 2024-09-03 06:14:09 -07:00
Blaise Tine
19d6142023 fixed fpu serialization 2024-09-03 04:54:29 -07:00
Blaise Tine
c28449f515 minor update 2024-09-02 21:58:12 -07:00
Blaise Tine
45ed8abf22 minor update 2024-09-02 19:39:28 -07:00
Blaise Tine
d16aee3ecd minor update 2024-09-02 10:37:51 -07:00
Blaise Tine
c4df7221c6 Merge branch 'master' of https://github.com/vortexgpgpu/vortex into develop 2024-09-02 04:13:35 -07:00
Blaise Tine
33bec667c2 minor update 2024-09-02 04:12:58 -07:00
Blaise Tine
a17580375b fpu timing optimization 2024-09-02 03:11:26 -07:00
Blaise Tine
40e04a409e adding PE switch 2024-09-02 02:34:08 -07:00
Blaise Tine
d7eae0c886 minor update 2024-09-02 02:33:30 -07:00
Blaise Tine
32636fac70 minor update 2024-09-01 10:15:02 -07:00
Blaise Tine
8215089194 minor update 2024-09-01 04:03:46 -07:00
Blaise Tine
d979cf277f decoder logic specialization 2024-09-01 04:00:57 -07:00
Blaise Tine
72c63a47f3 adding read-first mode support to block ram 2024-09-01 01:19:24 -07:00
Blaise Tine
431c0cfc46 minor update 2024-08-31 02:14:08 -07:00
Blaise Tine
83ea236b84 minor update 2024-08-31 01:58:21 -07:00
Blaise Tine
01fedb066c minor updates 2024-08-31 01:57:08 -07:00
Blaise Tine
7d0c141129 minor updates 2024-08-31 01:44:41 -07:00
Blaise Tine
6eee0728fb minor update 2024-08-29 03:22:09 -07:00
Blaise Tine
fc5bb387a2 minor update 2024-08-29 03:02:50 -07:00
Blaise Tine
961b9c3d63 minor update 2024-08-29 02:41:36 -07:00
Blaise Tine
5f2bf2418b minor update 2024-08-29 02:40:54 -07:00
Blaise Tine
847dee3473 minor update 2024-08-29 01:30:54 -07:00
Blaise Tine
105f884129 migration from fpnew to latest cvfpu core to resolve fpnew bugs and feature limitations 2024-08-29 00:48:51 -07:00
Blaise Tine
fa1fd39645 minor updates 2024-08-28 21:31:09 -07:00
Blaise Tine
a38960674e SimX split.N fix 2024-08-28 21:10:05 -07:00
Blaise Tine
0f41774fea SimX's decode minor fix 2024-08-28 19:07:15 -07:00
Blaise Tine
41e41c9688 adjust SimX's split/join to match RTL. 2024-08-28 18:46:30 -07:00
Blaise Tine
74a47ebbe4 displatch unit fix 2024-08-28 04:36:13 -07:00
Blaise Tine
6c1e785004 minor update 2024-08-28 03:08:08 -07:00
Blaise Tine
4cc7426c44 minor update 2024-08-28 02:52:20 -07:00
Blaise Tine
cf42025c20 minor update 2024-08-28 01:35:55 -07:00
Blaise Tine
f4426e0127 fpu timing optimization 2024-08-28 01:27:51 -07:00
Blaise Tine
91b8c6e67a fixed xilinx fpu ip dut synthesis 2024-08-28 00:40:28 -07:00
Blaise Tine
c162d04b8f minor update 2024-08-27 03:17:01 -07:00
Blaise Tine
4480ed8b0e minor update 2024-08-27 01:19:02 -07:00
Blaise Tine
5adfd5ec68 minor update 2024-08-26 23:45:00 -07:00
Blaise Tine
6d5e71a062 minor update 2024-08-25 20:12:05 -07:00
Blaise Tine
9718a5b405 fpu timing optimization 2024-08-25 19:20:07 -07:00
Blaise Tine
51719f69bb minor update 2024-08-25 16:51:00 -07:00
Blaise Tine
2ca3439109 xrt runtime update 2024-08-25 15:52:27 -07:00
Blaise Tine
088aed022f minor update 2024-08-25 15:52:17 -07:00
Blaise Tine
df3fc150f4 minor update 2024-08-25 06:06:52 -07:00
Blaise Tine
b40441b68f minor update 2024-08-25 05:12:44 -07:00
Blaise Tine
bdcc5f5991 FPU decode optimization 2024-08-25 05:11:48 -07:00
Blaise Tine
b6879b25e3 switching to python3 dependency 2024-08-24 20:46:25 -07:00
Blaise Tine
592297582e fpu_unit timing optimization 2024-08-24 19:44:03 -07:00
Blaise Tine
e538dfa316 minor update 2024-08-24 19:11:06 -07:00
Blaise Tine
e05fe0d75b dispatch_unit speed up 2024-08-24 18:11:06 -07:00
Blaise Tine
383dc1f6b8 timing optimization 2024-08-24 17:38:01 -07:00
Blaise Tine
3b336d7fb3 register vs combinational signals naming consistency 2024-08-24 16:59:18 -07:00
Blaise Tine
4570a20eee minor update 2024-08-24 12:15:12 -07:00
Blaise Tine
10a8705161 minor update 2024-08-24 10:42:48 -07:00
Blaise Tine
1f5cc53434 minor update 2024-08-24 09:16:23 -07:00
Blaise Tine
0ed589a3bf minor update 2024-08-24 07:49:08 -07:00
Blaise Tine
cd97945d0d minor update 2024-08-24 04:51:27 -07:00
Blaise Tine
31a5ab714e xbar timing optimitzaion 2024-08-24 01:57:45 -07:00
Blaise Tine
370daf1025 fifo refactoring 2024-08-24 01:56:56 -07:00
Blaise Tine
bcf7d9f960 timing optimization 2024-08-24 01:56:14 -07:00
Blaise Tine
ade6b2c985 timing optimization 2024-08-24 01:55:25 -07:00
Blaise Tine
4f9b15d96d minor update 2024-08-24 01:54:17 -07:00
Hanran Wu
f57841608e Merge branch 'vortex_vm_rebased' into vortex_vm 2024-08-23 17:45:59 -04:00
Hanran Wu
35c15f554d Merge branch 'mranduril-vortex_vm_rebased' into vortex_vm 2024-08-23 17:45:03 -04:00
Hanran Wu
ea9560b33b merge 2024-08-23 17:44:24 -04:00
Hanran Wu
86b0bdd93c merge into vortex_vm 2024-08-23 17:20:42 -04:00
Hanran Wu
66fd2d4e2d update ci 2024-08-23 16:42:31 -04:00
Blaise Tine
6eeb8eac0f minor update 2024-08-23 00:54:48 -07:00
Blaise Tine
df99b9da0e minor update 2024-08-22 16:29:27 -07:00
sij814
7ae7ffa007 pulled master and made initial changes 2024-08-22 18:37:34 +02:00
Blaise Tine
e4bfa47895 adding test coverage for xilinx synthesis 2024-08-22 02:51:17 -07:00
Blaise Tine
ca3499f3df minor update 2024-08-21 17:54:30 -07:00
Blaise Tine
811ceb5dc0 minor update 2024-08-21 13:00:05 -07:00
Blaise Tine
177f0efc59 minor update 2024-08-21 03:39:09 -07:00
Blaise Tine
9797c6c48a minor udpate 2024-08-21 03:38:15 -07:00
Blaise Tine
771a10ea0c minor update 2024-08-20 23:31:16 -07:00
Blaise Tine
005d480bb4 minor updates 2024-08-20 23:30:44 -07:00
Blaise Tine
5e241c153c Ci script update 2024-08-19 18:36:37 -07:00
Blaise Tine
693a9f648d Ci script update 2024-08-19 18:25:38 -07:00
Blaise Tine
1814ff6d40 xilinx standalone synthesis fixes 2024-08-18 22:02:37 -07:00
Blaise Tine
2762bd53ff minor updates 2024-08-18 18:56:17 -07:00
Blaise Tine
8e9026524a synthesis of the memory unit and local memory 2024-08-18 16:03:59 -07:00
Blaise Tine
3612ceda80 minor update 2024-08-18 02:13:43 -07:00
Blaise Tine
a2b24b4ed0 xilinx non-xrt synthesis fixes 2024-08-18 02:10:34 -07:00
Blaise Tine
de47307428 minor update 2024-08-18 01:57:36 -07:00
Blaise Tine
06ef53025d minor update 2024-08-17 21:19:10 -07:00
tinebp
6c607d32fe
Merge pull request #169 from sij814/simx
simx HBM initial implementation
2024-08-17 20:24:37 -07:00
Blaise Tine
f6daf9bb84 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-08-17 19:10:29 -07:00
tinebp
adcad92a73 extending OS support 2024-08-17 19:09:02 -07:00
Blaise Tine
51862dbc06 doc update 2024-08-17 19:05:47 -07:00
Blaise Tine
9d3d35c6b4 operands timing optimization 2024-08-17 16:03:02 -07:00
Blaise Tine
b6663eaff9 output register fix 2024-08-17 15:49:49 -07:00
Blaise Tine
a03471837c minor update 2024-08-17 15:21:13 -07:00
Blaise Tine
9638f5a6e6 minor update 2024-08-17 06:05:26 -07:00
Blaise Tine
62a4ee7a3e minor update 2024-08-17 05:32:21 -07:00
Blaise Tine
1f43d4a2fc ASE simulation fixes + docs update 2024-08-17 04:55:32 -07:00
Blaise Tine
8fe02093e2 minor udpate 2024-08-17 04:11:16 -07:00
Blaise Tine
20b82fd34d update configure to deep-copy syn directory tree 2024-08-17 04:09:50 -07:00
Blaise Tine
4b6f8efeaa removing trace_pkg to fix unsupported package dependencies 2024-08-17 04:07:10 -07:00
Blaise Tine
9fc9b43307 OPAE runtime bug fix 2024-08-17 02:18:04 -07:00
Blaise Tine
304761c6fc fixed blackbox temp driver mode with --rebuild=3 2024-08-16 22:32:35 -07:00
sij814
e34e4b790a forced memory bank change in opae 2024-08-16 16:53:18 -07:00
sij814
7a61b67170 added CAPS 2024-08-16 15:47:03 -07:00
Blaise Tine
f6ed49f19c minor update 2024-08-16 08:19:55 -07:00
Blaise Tine
d5fa26350c minor update 2024-08-16 01:35:20 -07:00
sij814
a523afbebe removed jammy 2024-08-15 22:30:32 -07:00
Blaise Tine
b83190c6e1 minor update 2024-08-15 21:29:06 -07:00
Blaise Tine
f4983cb380 core memory unit refactoring 2024-08-15 21:12:28 -07:00
Blaise Tine
65bd9afabb reset relay cleanup 2024-08-15 20:35:07 -07:00
Hanran Wu
54045fa05b skip build and tests ci stages for vm_disable due to verilator dependency 2024-08-15 23:04:08 -04:00
Hanran Wu
bc936c67a3 update ci 2024-08-15 23:02:03 -04:00
Hanran Wu
4a213e7c20 update readme 2024-08-15 23:00:14 -04:00
Hanran Wu
26df47d6e2 add a subset of tests for vm and update ci 2024-08-15 22:55:29 -04:00
sij814
d7e8fd74ff source_id = 0 2024-08-15 19:40:52 -07:00
Blaise Tine
49738672ec minor update 2024-08-15 19:34:50 -07:00
Hanran Wu
48ff4ee4e0 add VM_ENABLE flag to configure&compilation 2024-08-15 16:34:36 -04:00
Blaise Tine
aaff18cca2 bug fix 2024-08-15 05:11:51 -07:00
Blaise Tine
2b22d47dd9 minor update 2024-08-15 05:11:19 -07:00
Blaise Tine
98db249500 minor updates 2024-08-15 01:56:31 -07:00
Blaise Tine
9c346dee86 read-only cache optimization 2024-08-15 01:55:22 -07:00
Blaise Tine
58e5435f0f a priority arbiter performs better than round-robin during commit arbitration 2024-08-13 22:30:54 -07:00
Blaise Tine
cfb5cd5326 arbiter runtime assertion 2024-08-13 21:39:08 -07:00
Blaise Tine
aef1411af5 scoreboard timing optimization 2024-08-13 21:38:33 -07:00
tinebp
e23d569076
Merge pull request #171 from dhy2000/master
Same as #170
2024-08-13 18:48:07 -07:00
Blaise Tine
d6f1393627 memory coalescer timing optimization 2024-08-13 18:34:06 -07:00
sij814
ea34239b43 changes made for initial feedback 2024-08-13 16:52:27 -07:00
Hanran Wu
7528dd9c0f debug and remove travis.yml 2024-08-13 18:18:54 -04:00
Hanran Wu
19b5496f00 modify makefile to only compile simx 2024-08-13 17:54:06 -04:00
Blaise Tine
ee39da74b4 increasing reset delay 2024-08-13 04:14:02 -07:00
donghanyuan
1a9a04ac76 replace local static allocator to global static
Ensure MemoryPool construct before SimPlatform,
thus MemoryPool destruct after SimPlatform.

Avoid use-after-free issue clearing events_ of SimPlatform
after SimPortEvent's allocator is destructed.
2024-08-13 18:13:41 +08:00
Blaise Tine
76f4cd66d3 minor update 2024-08-13 03:08:48 -07:00
Blaise Tine
3ae3afc59b minor update 2024-08-12 21:34:41 -07:00
Blaise Tine
5126a7c472 minor update 2024-08-12 21:32:20 -07:00
Blaise Tine
6c1ee9bfea arbiter fixes 2024-08-12 20:08:08 -07:00
Blaise Tine
14ae4b8c13 minor update 2024-08-12 20:07:50 -07:00
Blaise Tine
2edda834c3 minor update 2024-08-12 18:11:21 -07:00
sij814
47427ab22e regression test with source_id 0 2024-08-12 16:22:30 -07:00
Blaise Tine
d74ee43a66 minor update 2024-08-12 14:19:09 -07:00
Blaise Tine
79362dea4b minor update 2024-08-12 14:01:11 -07:00
Blaise Tine
9053919e92 fixed synthesis warning 2024-08-12 05:24:46 -07:00
Blaise Tine
ed66ee2806 arbitration update 2024-08-12 04:09:56 -07:00
sij814
bab9496117 debugging segmentation fault with 8 clusters 2024-08-12 03:52:48 -07:00
sij814
de81baaabf hbm for vortex 2.2 2024-08-12 02:52:47 -07:00
Blaise Tine
6f3add273d elastic buffer lutram refactoring 2024-08-11 20:28:39 -07:00
Blaise Tine
1fb0691bc7 minor update 2024-08-11 19:50:31 -07:00
sij814
c94c3651ec configure change 22.04 2024-08-11 14:47:43 -07:00
Blaise Tine
8fb73b6da7 fair arbiter optimization 2024-08-10 22:11:49 -07:00
Blaise Tine
32a882e26f arbiters optimization 2024-08-10 18:41:10 -07:00
Blaise Tine
eaa7ed7fe2 rtl arbiter update 2024-08-10 02:38:54 -07:00
Blaise Tine
c8d0357ac6 rtl arbiter fixes 2024-08-10 00:37:56 -07:00
Blaise Tine
229641441f adding static assertion 2024-08-09 18:13:52 -07:00
Blaise Tine
42afa2472f cdiv 2024-08-09 18:11:12 -07:00
Blaise Tine
455fc8389c refactoring priority encoder 2024-08-09 13:58:19 -07:00
Blaise Tine
ab21f76aed minor update 2024-08-07 19:44:24 -07:00
Blaise Tine
f1e79f4c0f fixed toolchain install on centos/7 2024-08-07 19:44:04 -07:00
tinebp
932c435a20
Merge pull request #101 from dhy2000/master
fix #100: change return type to float
2024-08-07 18:09:44 -07:00
tinebp
aad3b26332
Merge branch 'master' into master 2024-08-07 18:09:27 -07:00
Blaise Tine
30ebb65fc3 minor update 2024-08-06 23:36:37 -07:00
Blaise Tine
0d7012e69e minor update 2024-08-06 21:27:08 -07:00
Blaise Tine
bddf276335 memory request flags refactoring 2024-08-06 19:05:22 -07:00
Blaise Tine
e86eeab8ea Merge branch 'master' of https://github.com/vortexgpgpu/vortex 2024-08-06 17:25:35 -07:00
Blaise Tine
7cdfac8ea1 fixed kernel lib dependency 2024-08-06 17:20:01 -07:00
tinebp
32497e19df
Merge pull request #166 from JacobLevinson/stencil3d-fixes
Updated prints and code spacing for stencil3d test
2024-08-06 14:32:18 -07:00
Blaise Tine
df8355ac76 fixed minor typo 2024-08-06 13:11:28 -07:00
Blaise Tine
c265ff97b8 minor updates 2024-08-06 12:58:58 -07:00
Blaise Tine
d276875ab9 fixed memory block size configuration 2024-08-06 12:47:05 -07:00
Blaise Tine
50b12ef754 fixed memory block size configuration 2024-08-06 12:46:19 -07:00
Blaise Tine
0096e60f03 Making LUT optimization optional 2024-08-06 12:38:30 -07:00
Blaise Tine
9dcb377b67 Moving from one-hot to binary muxing optimization
FPGA synthesis is suboptimal with one-hot muxing, particularly Xilinx Vivado.
This change fixed Xilinx synthesis for 256-thread cores.
2024-08-06 12:32:02 -07:00
Jacob Levinson
fbedc567e5 Updated prints and code spacing 2024-08-04 23:39:13 -07:00
Blaise Tine
b81ae8e431 reset network cleanup 2024-08-04 22:50:28 -07:00
Blaise Tine
42c62001ec fair arbiter speed optimization 2024-08-04 22:13:47 -07:00
Blaise Tine
e663db9b5a Merge branch 'develop' 2024-08-04 14:17:08 -07:00
Blaise Tine
9ddb06bf56 Merge branch 'master' of https://github.com/vortexgpgpu/vortex 2024-08-04 14:16:22 -07:00
Blaise Tine
59108525e1 minor update 2024-08-04 14:16:08 -07:00
Blaise Tine
74579fd4dc minor update 2024-08-04 14:13:26 -07:00
Blaise Tine
668b590876 minor update 2024-08-03 18:25:24 -07:00
Blaise Tine
cb1e49d3f6 minor update 2024-08-03 17:08:16 -07:00
Blaise Tine
4b6a48c716 minor update 2024-08-03 13:37:01 -07:00
Blaise Tine
07981a585c minor update 2024-08-03 13:00:34 -07:00
Blaise Tine
4b93c9ffb5 minor updates 2024-08-03 11:49:12 -07:00
Blaise Tine
35fb50f9a6 minor updates 2024-08-03 10:43:08 -07:00
Blaise Tine
fce935f1c4 add debug level to FPGA makefile 2024-08-03 00:54:58 -07:00
Blaise Tine
fc0392e5e3 fixed typo 2024-08-03 00:54:17 -07:00
Blaise Tine
d09bce011b local memory test update 2024-08-03 00:52:41 -07:00
Blaise Tine
4c1b3fd88d local memory area optimization 2024-08-03 00:10:06 -07:00
Blaise Tine
52c5f1ff6b minor update 2024-08-02 23:32:34 -07:00
tinebp
3d51b8bcfd
Merge pull request #165 from JacobLevinson/fix-incomplete-type-error
Fix invalid use of incomplete type error by including <array>
2024-08-02 19:52:23 -07:00
Blaise Tine
76f74b8a59 minor update 2024-08-02 19:50:34 -07:00
Blaise Tine
e8cdae1225 minor fix in VX_local_mem.sv 2024-08-02 19:19:57 -07:00
Blaise Tine
067b7a8726 fixed typo 2024-08-02 18:57:07 -07:00
Jacob Levinson
3b81a32b12 Fix invalid use of incomplete type error by including <array> 2024-08-02 18:25:56 -07:00
Blaise Tine
410c47e2ae adding out_buf to VX_pe_serializer + testing 2024-08-02 18:16:50 -07:00
Blaise Tine
f723e7baf5 registering local memory bram output 2024-08-02 18:15:08 -07:00
Blaise Tine
9c5aee5e25 bram reset fix 2024-08-02 18:13:58 -07:00
Blaise Tine
c1b8ecfd1a block ram reset refactoring 2024-08-02 16:39:40 -07:00
Blaise Tine
16c209ac0c fixed operand collector critical path 2024-08-02 16:23:36 -07:00
tinebp
09028d8cee
Merge pull request #144 from nayannair/tensor-core
dummy commit
2024-08-02 15:45:18 -07:00
tinebp
c00368b7c6
Merge pull request #159 from JacobLevinson/readme_fixes
Fixes readme by removing $ from shell commands + small tweaks
2024-08-02 15:41:17 -07:00
Blaise Tine
3075c1737b fixed bug in VX_onehot_encoder.sv (see issue #126) 2024-08-02 15:12:10 -07:00
Blaise Tine
29c5a28273 minor update 2024-08-02 00:36:10 -07:00
Blaise Tine
e53b295eea writeback cache fixes 2024-07-31 20:53:40 -07:00
Blaise Tine
0a3035e6a7 minor update 2024-07-31 14:51:34 -07:00
Blaise Tine
81251b1af8 minor update 2024-07-31 13:55:44 -07:00
Blaise Tine
ef5d58dc9e cache regression tests 2024-07-31 11:45:51 -07:00
Blaise Tine
4dc34cfd2d hw arbitration update 2024-07-31 10:52:57 -07:00
Blaise Tine
3fe8f963aa writeback cache fixes 2024-07-31 02:20:32 -07:00
Blaise Tine
fc50b66819 regression script update 2024-07-31 00:16:13 -07:00
Blaise Tine
609155e490 fixed CSV trace converter 2024-07-31 00:15:32 -07:00
Blaise Tine
516ce43a5c testing writeback cache 2024-07-30 22:21:10 -07:00
Blaise Tine
2e77c9eec2 writeback cache fixes 2024-07-30 22:14:06 -07:00
Blaise Tine
95ca49a85f writeback cache fixes 2024-07-30 20:38:06 -07:00
Blaise Tine
e1c5b5277e minor update 2024-07-30 17:55:21 -07:00
Blaise Tine
029609b3fd disable atexit() support, not needed for static kernels. 2024-07-30 14:47:08 -07:00
Blaise Tine
2bc8a881b6 fixed trace log formatting 2024-07-30 12:05:36 -07:00
Jaewon Lee
9cc3e0a459
Merge pull request #151 from mranduril/vortex_vm
Add virtual memory allocator for vortex vm
2024-07-30 13:59:14 -04:00
Blaise Tine
abf8d2c51a minor update 2024-07-30 05:59:50 -07:00
Blaise Tine
6e55840a32 minor update 2024-07-30 03:32:49 -07:00
Blaise Tine
047960ac4d minor update 2024-07-30 02:51:12 -07:00
Blaise Tine
54e6421854 minor update 2024-07-30 02:42:25 -07:00
Blaise Tine
f46b764748 minor update 2024-07-30 01:59:50 -07:00
Blaise Tine
99cbae1820 writeback cache deadlock fix 2024-07-30 01:55:32 -07:00
Blaise Tine
edf960d9ed writeback cache fixes 2024-07-30 00:58:31 -07:00
Blaise Tine
22b0525c51 writeback cache fixes 2024-07-30 00:06:44 -07:00
Hanran Wu
34f7e3c982 config ramulator2 2024-07-30 00:18:28 -04:00
Blaise Tine
5600a8dd42 writeback cache fixes 2024-07-29 19:49:12 -07:00
Blaise Tine
bce5226614 minor update 2024-07-29 17:07:01 -07:00
Blaise Tine
2a9895c337 minor update 2024-07-29 16:20:23 -07:00
Blaise Tine
eab1791d46 CI script update 2024-07-29 16:07:46 -07:00
Blaise Tine
724cb40849 minor update 2024-07-29 16:01:08 -07:00
Blaise Tine
a62e651b02 minor update 2024-07-29 15:51:52 -07:00
Blaise Tine
3223a40a76 Verilator optimization flags update 2024-07-29 14:58:35 -07:00
Blaise Tine
8457163114 adding dirty bytes configuration to writeback cache 2024-07-29 14:43:49 -07:00
Blaise Tine
a91dabcc72 minor update 2024-07-29 13:52:04 -07:00
Blaise Tine
0709d656ca writeback cache fixes 2024-07-29 13:32:35 -07:00
Jaewon Lee
30258c04d2 Apply suggestions from code review
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-29 16:29:39 -04:00
Jaewon Lee
9db3870309 Update runtime/simx/vortex.cpp
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-29 16:29:39 -04:00
Jaewon Lee
34ef500910 Update runtime/simx/vortex.cpp
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-29 16:29:39 -04:00
Jaewon Lee
735b713613 Update runtime/simx/vortex.cpp
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-29 16:29:39 -04:00
Jaewon Lee
8d978f23ce Update runtime/simx/vortex.cpp
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-29 16:29:39 -04:00
Hanran Wu
78fc053ad5 save work before pull 2024-07-29 16:29:39 -04:00
Hanran Wu
6add1e16f6 debugged virtual memory allocator 2024-07-29 16:29:39 -04:00
Hanran Wu
49255bfa69 add virtual mem allocator addr spacereservation 2024-07-29 16:29:39 -04:00
Hanran Wu
31133ae6e9 update destructor of vx_device 2024-07-29 16:29:39 -04:00
Hanran Wu
7916684c36 vpn allocator debug complete, now pass demo&vecadd tests 2024-07-29 16:29:39 -04:00
Hanran Wu
aa45f55126 vpn allocator added but doesn't pass any tests 2024-07-29 16:29:39 -04:00
Jaewon Lee
5877cfe8ae Change STARTUP_ADDR from 0x40000000 to 0x80000000(32b) and 0x180000000(64b) 2024-07-29 16:29:39 -04:00
Jaewon Lee
52233fe13a fixed compile error 2024-07-29 16:29:39 -04:00
Jaewon Lee
6d480b3da1 satp_ is not set, then we skip VAT 2024-07-29 16:29:39 -04:00
Jaewon Lee
2e61dad11f Update README.md
Update TOOLDIR to vortex-toolchain-2024-6-14/
2024-07-29 16:29:39 -04:00
Jaewon Lee
c99e4b37b6 Update README.md 2024-07-29 16:29:02 -04:00
Jaewon Lee
3a5278a62e 64bit support 2024-07-29 15:31:47 -04:00
Jaewon Lee
e21bf9afbd Merge Vortex 2.2 2024-07-29 15:31:17 -04:00
Jaewon Lee
9942f251e0 remove # 2024-07-29 15:31:17 -04:00
Jaewon Lee
da9c51aa3f Virtual Memory Support 2024-07-29 15:31:17 -04:00
Jaewon Lee
7b80da2538 Update upload and download function in simx runtime 2024-07-29 15:31:17 -04:00
Jaewon Lee
53c547f9de Change the declaration of set_processor_satp function 2024-07-29 15:31:17 -04:00
Jaewon Lee
43a90071e1 Merge Austin's code (Preliminary) 2024-07-29 15:31:17 -04:00
Jaewon Lee
2662b6bcab Update README.md 2024-07-29 15:31:17 -04:00
Jaewon Lee
da1f4baa5d Update README.md 2024-07-29 15:29:59 -04:00
Hanran Wu
768c966681 expand MemoryUnit class defs and add some tlb-related functions 2024-07-29 15:29:59 -04:00
Jaewon Lee
ae312f9022 Update README.md 2024-07-29 15:29:36 -04:00
Jaewon Lee
e20a610e67 Update README.md 2024-07-29 15:29:20 -04:00
Hanran Wu
e7660b6ffe Merge branch 'vortex_vm' of https://github.com/mranduril/vortex into vortex_vm
add changes from pull request reviews
2024-07-29 14:35:20 -04:00
Hanran Wu
de66a1b861 save work before pull 2024-07-29 14:35:11 -04:00
Blaise Tine
e34f824bf9 minor update 2024-07-29 03:56:08 -07:00
Blaise Tine
75f1f957d4 minor updates 2024-07-29 03:28:51 -07:00
Blaise Tine
96831c8b89 writeback cache fixes 2024-07-29 03:11:33 -07:00
Blaise Tine
03e21924f4 Verilator bug workaround
This was causing a buffer overflow, ignoring range checks
2024-07-29 00:28:07 -07:00
Blaise Tine
2e060faaf4 reverting uuid format to ease file diff 2024-07-29 00:05:51 -07:00
Blaise Tine
48b1ab7494 fixed uuid format 2024-07-28 18:03:34 -07:00
Blaise Tine
382b686d59 reset GRPs only in debug mode 2024-07-28 17:40:03 -07:00
Blaise Tine
160c428ef5 fixed uuid format 2024-07-28 17:29:15 -07:00
Blaise Tine
7f99007568 CI script update 2024-07-28 13:17:14 -07:00
Blaise Tine
bad280ae80 testing writeback cache 2024-07-28 12:48:01 -07:00
Blaise Tine
7cc6df7e7c CI script fix 2024-07-28 12:36:45 -07:00
Blaise Tine
fcf9c13d5f add completion job to CI 2024-07-28 12:24:55 -07:00
Blaise Tine
ca5232b58d writeback cache fix 2024-07-28 11:12:48 -07:00
Blaise Tine
4cd48193da minor update 2024-07-27 22:40:40 -07:00
Blaise Tine
4c09d107c3 writeback cache bug fix 2024-07-27 22:23:02 -07:00
Blaise Tine
69126dfd35 SimX writeback configuration 2024-07-27 17:25:13 -07:00
Blaise Tine
904a6dc136 fixed trace format consistency 2024-07-27 17:24:14 -07:00
Blaise Tine
81b17169ab minor update 2024-07-27 14:35:52 -07:00
Blaise Tine
f5014e8975 Adding support for cache flush and writeback
Crediting Yi-Lin Tsai's original work at https://github.com/richardyilin/GPU_writeback
2024-07-27 13:57:36 -07:00
Blaise Tine
a5bde3693f minor update 2024-07-27 13:41:44 -07:00
Blaise Tine
c8455eb562 minor update 2024-07-27 01:35:07 -07:00
Jacob Levinson
bdbe22ff4d Capitalize S in "set enviroment vairables" 2024-07-25 16:40:19 -07:00
Jacob Levinson
2b481024bb Fixes readme by removing $ from shell commands + small tweaks
Removed $ from all the shell commands so that they can be easily
1-click copy-pasted from github without the dollar sign, as well
as changed "cd Vortex" to "cd vortex" to match the actual directory
spelling. Also removed obsolete travis ci link as the project
has moved to github ci.
2024-07-25 16:35:43 -07:00
Blaise Tine
fe8ab30345 minor update 2024-07-25 13:20:57 -07:00
Blaise Tine
ed0171bcc7 updated documentation for both altera and xilinx FPGA setup 2024-07-25 13:17:09 -07:00
Blaise Tine
20ce870b1b minor updates to opae runtime 2024-07-25 12:38:17 -07:00
Blaise Tine
b9328754bc ramulator build fix 2024-07-24 23:27:16 -07:00
Blaise Tine
b7594edf74 minor update 2024-07-24 20:54:03 -07:00
Blaise Tine
5b9d01a421 Yosys synthesis fixes 2024-07-24 20:16:54 -07:00
Blaise Tine
01187795d0 Merge branch 'master' of https://github.com/vortexgpgpu/vortex 2024-07-24 16:30:41 -07:00
Blaise Tine
3de14dd8bf Verilator crash workaround 2024-07-24 16:09:27 -07:00
Blaise Tine
2773b87ae5 minor update 2024-07-24 15:38:49 -07:00
Blaise Tine
5457cab5d1 fixed memory scheduler bug 2024-07-24 15:37:55 -07:00
tinebp
f4376e2c4a
Merge pull request #155 from JacobLevinson/stencil3d
Added Stencil3d regression test
2024-07-24 08:59:42 -07:00
Jacob Levinson
e42c7c6a82
Merge branch 'master' into stencil3d 2024-07-23 21:01:10 -07:00
Blaise Tine
31663aa7ca Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-07-23 16:01:27 -07:00
Blaise Tine
b859d0c088 Dockerfile update 2024-07-23 16:01:00 -07:00
tinebp
3a5bdd4cf0 CI scritp fixes 2024-07-23 15:43:36 -07:00
Blaise Tine
ab761946e5 CI script fixes 2024-07-23 15:39:05 -07:00
Blaise Tine
aec73aa758 Docker update 2024-07-23 15:23:46 -07:00
Blaise Tine
8df962d6b4 Merge remote-tracking branch 'wb/l1_writeback' into develop 2024-07-23 14:20:09 -07:00
Blaise Tine
c5c4ccdd95 CI workflow optimization 2024-07-23 13:15:34 -07:00
Blaise Tine
b3a4d58825 fixed lsu stats 2024-07-23 13:13:51 -07:00
Blaise Tine
99f114aba3 minor updates 2024-07-23 13:13:24 -07:00
Blaise Tine
7808a0a3e3 change default configure tooldir ~/tools 2024-07-23 13:10:44 -07:00
Blaise Tine
eb92e0bdbe fixed simx lsu-unit bug 2024-07-23 11:29:56 -07:00
Blaise Tine
60f7786e17 BitVector class bug fixes 2024-07-23 09:40:20 -07:00
Blaise Tine
d39f3f688a runtime_assert 2024-07-23 01:13:25 -07:00
Blaise Tine
95f59d23a8 simx memory coalescer bug fix 2024-07-23 00:02:43 -07:00
Jacob Levinson
b489cc7abd Added *.cache to gitignore 2024-07-22 20:46:28 -07:00
Jacob Levinson
cd94288e05 Added all files for stencil3d regression test 2024-07-22 20:46:28 -07:00
Jacob Levinson
2f723c4afd Adjusted regression Makefile to include new test 2024-07-22 20:46:28 -07:00
Blaise Tine
e7b2bb81b4 stream pack optimization 2024-07-22 19:35:40 -07:00
Blaise Tine
d85661420b minor update 2024-07-22 19:35:20 -07:00
Blaise Tine
fbe236037e opencl warning fixes 2024-07-22 10:08:25 -07:00
Blaise Tine
9c43bae333 Regression fixes 2024-07-22 04:45:08 -07:00
Blaise Tine
530ec638a7 regression fixes 2024-07-22 04:29:34 -07:00
Blaise Tine
24e8e91a94 DramSim fix 2024-07-22 03:37:10 -07:00
Blaise Tine
527910b31e regression fixes 2024-07-22 00:29:56 -07:00
Blaise Tine
178f4d67fa minor update 2024-07-21 23:39:47 -07:00
Blaise Tine
a5377d78ca minor update 2024-07-21 14:50:59 -07:00
Blaise Tine
31e31ac528 minor updates 2024-07-21 11:37:34 -07:00
Blaise Tine
0bc459d84e minor update 2024-07-21 11:20:17 -07:00
Blaise Tine
7057103deb minor updates 2024-07-21 10:37:50 -07:00
Blaise Tine
cff6e320b2 CI script fixes 2024-07-21 10:23:01 -07:00
Blaise Tine
01b81830bf CI scripts fixes 2024-07-21 09:04:31 -07:00
Blaise Tine
1981179995 CI scripts fixes 2024-07-21 09:03:37 -07:00
Blaise Tine
e778015ab9 CI script fixes 2024-07-21 08:56:53 -07:00
Blaise Tine
cf3a77ab1b CI script fixes 2024-07-21 08:55:13 -07:00
Blaise Tine
92390187ff minor update 2024-07-21 08:14:47 -07:00
Blaise Tine
920da584af CI script fixes 2024-07-21 08:02:14 -07:00
Blaise Tine
71c2bab682 CI script fixes 2024-07-21 07:40:37 -07:00
Blaise Tine
75782f49da CI script fixes 2024-07-21 07:24:55 -07:00
Blaise Tine
ef38dc9744 CI script load/balancing 2024-07-21 07:14:05 -07:00
Blaise Tine
fb141ae522 Ramulator 2.0 with HBM 2.0 support
Verilator 5.0 support
SimX C++17 requirement
2024-07-21 06:57:13 -07:00
Blaise Tine
ca474c39b9 CI script fixes 2024-07-20 23:25:51 -07:00
Blaise Tine
e7a3b52d2a CI script fixes 2024-07-20 22:16:00 -07:00
Blaise Tine
0cb75c67c0 CI script fixes 2024-07-20 21:55:36 -07:00
Blaise Tine
80b79d4d31 CI script fixes 2024-07-20 21:40:44 -07:00
Blaise Tine
be935420f0 CI script fixes 2024-07-20 21:01:15 -07:00
Blaise Tine
198c3b92ff CI script fixes 2024-07-20 20:36:36 -07:00
Blaise Tine
f637219fc7 CI script fixes 2024-07-20 20:10:50 -07:00
Blaise Tine
5cb8ba5e92 CI script fixes 2024-07-20 19:49:12 -07:00
Blaise Tine
8feb8b191c CI script fixes 2024-07-20 19:28:42 -07:00
Blaise Tine
9f8e02824f CI script fixes 2024-07-20 19:19:39 -07:00
Blaise Tine
e4060fee3a CI script fixes 2024-07-20 18:57:02 -07:00
Blaise Tine
c706d410bf CI debuuging... 2024-07-20 18:39:49 -07:00
Blaise Tine
a9f122bd9c CI script fixes 2024-07-20 18:01:56 -07:00
Blaise Tine
f5956b2eb7 CI script fixes 2024-07-20 14:24:57 -07:00
Jaewon Lee
90b4a16c9b
Apply suggestions from code review
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-20 11:20:27 -04:00
Jaewon Lee
c3e657f201
Update runtime/simx/vortex.cpp
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-20 10:39:40 -04:00
Jaewon Lee
0f8e5505d3
Update runtime/simx/vortex.cpp
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-20 10:36:58 -04:00
Jaewon Lee
a23fb26a8b
Update runtime/simx/vortex.cpp
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-20 10:34:24 -04:00
Jaewon Lee
a4ee8dfa7f
Update runtime/simx/vortex.cpp
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-20 10:25:33 -04:00
Blaise Tine
35f561b0e9 minor update 2024-07-20 04:49:56 -07:00
Blaise Tine
0e15f69838 CI fixes 2024-07-20 02:06:34 -07:00
Blaise Tine
3eaa6e4e55 travis to github workflow migration 2024-07-20 01:14:54 -07:00
Blaise Tine
aac57a5f81 minor updates 2024-07-19 23:36:10 -07:00
Blaise Tine
016f8e830c minor update 2024-07-19 21:50:10 -07:00
Blaise Tine
2fa99b7d17 minor update 2024-07-19 20:49:39 -07:00
Blaise Tine
e33b62f6fc extending csv trace gen to support multiple traces 2024-07-19 20:44:27 -07:00
Blaise Tine
5c73243a4e minor update 2024-07-19 20:34:43 -07:00
Blaise Tine
d07266b1f1 cache mshr replay bug fix 2024-07-19 14:57:58 -07:00
Blaise Tine
aa954a6aa8 operand collector RTL optimization 2024-07-19 09:31:08 -07:00
Blaise Tine
3cba4f4a5f minor update 2024-07-19 02:09:17 -07:00
Blaise Tine
4b73762aea profiling optimizations 2024-07-19 00:18:53 -07:00
Blaise Tine
65036e2d34 RTL optimizations 2024-07-18 10:25:23 -07:00
Blaise Tine
7507e36149 minor update 2024-07-17 21:13:22 -07:00
Blaise Tine
08bd918066 minor update 2024-07-17 20:49:26 -07:00
Blaise Tine
9944331f29 minor update 2024-07-17 20:38:49 -07:00
Blaise Tine
9a018014ff issue pipeline synthesis test 2024-07-17 20:34:44 -07:00
Blaise Tine
068ec6c5ef adding issue_top testbench 2024-07-17 20:29:59 -07:00
Blaise Tine
1b9f0a998b VX_mem_coalescer fix 2024-07-17 15:54:25 -07:00
Blaise Tine
b297c29a10 minor updates 2024-07-16 20:34:26 -07:00
Blaise Tine
6a03882bd2 minor updates 2024-07-16 10:52:07 -07:00
Blaise Tine
578c3d33d2 cumulative fixes 2024-07-15 10:13:57 -07:00
Blaise Tine
0dbcddcb54 minor update 2024-07-14 03:12:30 -07:00
Blaise Tine
f0ebe94253 arbiter refactoring 2024-07-14 00:44:35 -07:00
Blaise Tine
ad36bdbd44 minor updates 2024-07-14 00:13:49 -07:00
Blaise Tine
84da2ff8ff minor updates 2024-07-14 00:13:25 -07:00
Blaise Tine
a2307a28dc perf counters update 2024-07-12 19:02:43 -07:00
Blaise Tine
59ed24dc0b perf update 2024-07-12 18:34:42 -07:00
Blaise Tine
649e15c2b3 minor update 2024-07-12 05:29:00 -07:00
Blaise Tine
42f3d55e15 SimX operands collector optimization 2024-07-12 04:54:44 -07:00
Blaise Tine
7b94c983c9 bug fix 2024-07-12 04:49:55 -07:00
Blaise Tine
79ad8bb51f operands optimization 2024-07-12 04:37:47 -07:00
Blaise Tine
47c33cca66 minor update 2024-07-12 03:33:00 -07:00
Blaise Tine
34023ab814 minor update 2024-07-11 12:31:10 -07:00
Hanran Wu
91a1f41f99 debugged virtual memory allocator 2024-07-11 14:49:00 -04:00
Blaise Tine
2460b9b95b fixed local mem unit critical path 2024-07-11 05:44:49 -07:00
Blaise Tine
69f7213afc minor update 2024-07-11 05:31:46 -07:00
Hanran Wu
b8757c539d add virtual mem allocator addr spacereservation 2024-07-10 22:39:00 -04:00
Blaise Tine
a854e9d25b minor update 2024-07-10 18:04:38 -07:00
Blaise Tine
156707bbec minor update 2024-07-10 16:41:35 -07:00
Blaise Tine
d6770f7adc fixed degenerate writes to R0 2024-07-10 16:40:37 -07:00
Blaise Tine
13d5a9c969 minor update 2024-07-10 09:29:41 -07:00
Blaise Tine
0acf7761ef operands optimization 2024-07-09 14:26:27 -07:00
Blaise Tine
ed0c1a778f scoreboard arbiter optimization 2024-07-09 14:22:08 -07:00
Blaise Tine
2651632884 minor update 2024-07-09 14:19:19 -07:00
Blaise Tine
7b9d2bdff2 minor updates 2024-07-09 13:39:44 -07:00
Blaise Tine
3ab353ab61 Using LUTRAM for elastic buffers 2024-07-09 13:35:58 -07:00
Blaise Tine
3efced37c5 trace INSTANCE_ID refactoring 2024-07-09 13:33:17 -07:00
Blaise Tine
1322499c3f minor update 2024-07-09 13:05:43 -07:00
Blaise Tine
f0a4d2e142 minor update 2024-07-09 11:48:29 -07:00
Hanran Wu
314ad3ff8a update destructor of vx_device 2024-07-09 13:42:57 -04:00
Hanran Wu
31837dd7c3 vpn allocator debug complete, now pass demo&vecadd tests 2024-07-08 17:10:19 -04:00
Hanran Wu
f0ea1acaa2 vpn allocator added but doesn't pass any tests 2024-07-08 17:07:30 -04:00
Jaewon Lee
c13e02b19f Change STARTUP_ADDR from 0x40000000 to 0x80000000(32b) and 0x180000000(64b) 2024-06-30 03:10:36 -04:00
Jaewon Lee
ccbb2243cc fixed compile error 2024-06-30 00:54:22 -04:00
Jaewon Lee
3caeeeea13 satp_ is not set, then we skip VAT 2024-06-30 00:35:26 -04:00
Jaewon Lee
b99cd97622 Merge branch 'vortex_vm' of github.com:vortexgpgpu/vortex into vortex_vm 2024-06-29 17:43:44 -04:00
Jaewon Lee
d531fa6b26 64bit support 2024-06-29 17:43:20 -04:00
Blaise Tine
58c3c63dae issue stage partitioning into slices 2024-06-28 08:22:18 -07:00
Blaise Tine
56e9e19508 gpr_slice redesign to use multi-banking architecture 2024-06-28 08:03:01 -07:00
Blaise Tine
ed11defd71 debug scope environment fix 2024-06-28 07:57:16 -07:00
Blaise Tine
934416d3ec debug scope environment fix 2024-06-28 07:55:58 -07:00
Jaewon Lee
4ab015ddd9
Update README.md
Update TOOLDIR to vortex-toolchain-2024-6-14/
2024-06-28 09:48:04 -04:00
Nayan Sivakumar Nair
5e63b8f35a dummy commit 2024-06-25 23:27:18 -04:00
Nayan Sivakumar Nair
5b0fc8cbd4 Fixes for PR 2024-06-25 03:18:50 -04:00
Jaewon Lee
3d98121ab6
Update README.md 2024-06-23 11:24:10 -04:00
Jaewon Lee
02091f3d44 Merge Vortex 2.2 2024-06-22 23:55:01 -04:00
Nayan Sivakumar Nair
a378aed67c Moved tc_num, tc_size param to makefile args 2024-06-21 22:23:24 -04:00
Blaise Tine
f97ffac7e7 minor update 2024-06-19 22:17:03 -07:00
Blaise Tine
b82a755e44 minor update 2024-06-19 16:39:04 -07:00
Blaise Tine
61df2ca428 minor update 2024-06-18 23:13:48 -07:00
Blaise Tine
deee7cd8b0 fixed tracing support for xilinx simulation 2024-06-18 23:11:58 -07:00
Jaewon Lee
2271d2b286 remove # 2024-06-19 02:04:24 -04:00
Jaewon Lee
862997fc94 Virtual Memory Support 2024-06-19 01:52:22 -04:00
Jaewon Lee
62673b4b72 Update upload and download function in simx runtime 2024-06-19 01:43:11 -04:00
Jaewon Lee
01c7b5e384 Change the declaration of set_processor_satp function 2024-06-19 01:36:26 -04:00
Jaewon Lee
cfcece940e Merge Austin's code (Preliminary) 2024-06-19 01:36:26 -04:00
Jaewon Lee
d8a6ac748a Update README.md 2024-06-19 01:09:56 -04:00
Jaewon Lee
2f2974ee72 Ignore the changed on ramulator 2024-06-19 01:09:56 -04:00
Jaewon Lee
6f0af066e8 Update README.md 2024-06-19 01:09:56 -04:00
Hanran Wu
2b426693f5 expand MemoryUnit class defs and add some tlb-related functions 2024-06-19 01:09:56 -04:00
Jaewon Lee
cf3f2d4f6f Update README.md 2024-06-19 01:09:56 -04:00
Jaewon Lee
54af5eb186 Update README.md 2024-06-19 01:09:56 -04:00
Jaewon Lee
efe12ca6bf Update README.md 2024-06-19 01:09:18 -04:00
Varsha Singhania
0e3badf723 Script checkin and code cleanup 2024-06-18 02:19:57 -04:00
Varsha Singhania
99c6a1af5a Tensor cores in Vortex 2024-06-17 04:28:51 -04:00
Blaise Tine
abdea91120 minor update 2024-06-14 19:15:00 -07:00
Blaise Tine
4adabc3f68 fixed xilinx synthesis build 2024-06-14 19:14:33 -07:00
Blaise Tine
4a11c1ec0f FpNew RTL fix 2024-06-14 16:29:52 -07:00
Blaise Tine
c5e57ce5d5 minor update 2024-06-14 08:38:17 -07:00
Blaise Tine
896eb09ec3 docs update 2024-06-14 07:46:25 -07:00
Blaise Tine
cf3413c824 minor update 2024-06-14 07:45:37 -07:00
Blaise Tine
3f52964a94 Merge branch 'develop' 2024-06-13 23:31:47 -07:00
Blaise Tine
8b53f01d1c minor update 2024-06-13 23:29:41 -07:00
Blaise Tine
e603c302b2 minor update 2024-06-13 10:12:40 -07:00
Blaise Tine
3611ad5b4c travis update for pocl 64-bit testing 2024-06-13 09:41:58 -07:00
Blaise Tine
5bcf24ed55 64-bit rtl fix 2024-06-13 06:26:45 -07:00
Blaise Tine
78b6e0638c Docker update 2024-06-13 02:14:54 -07:00
Blaise Tine
0e2e09b2ea minor update 2024-06-12 05:43:02 -07:00
Blaise Tine
3393522b54 minor update 2024-06-12 03:59:31 -07:00
Blaise Tine
4ced61790a minor update 2024-06-12 02:32:27 -07:00
Blaise Tine
04b967217d minor update 2024-06-11 13:42:05 -07:00
Blaise Tine
7887e6b432 merge fix 2024-06-11 09:31:13 -07:00
Blaise Tine
267521a1cb Merge branch 'develop' 2024-06-11 05:54:12 -07:00
Blaise Tine
023dc477a6 travis version update 2024-06-11 03:05:04 -07:00
Blaise Tine
250a5741f7 Migrating all tests to new kernel launch API 2024-06-11 02:53:36 -07:00
Blaise Tine
8b63305201 minor update 2024-06-08 21:36:28 -07:00
Blaise Tine
e38187acb5 minor update 2024-06-08 02:47:31 -07:00
Blaise Tine
c0044bc303 minor update 2024-06-08 01:58:21 -07:00
Blaise Tine
99eaaf6189 uuid_gen cleanup 2024-06-08 01:57:38 -07:00
Blaise Tine
fa5590bbf7 minor update 2024-06-08 01:05:41 -07:00
Blaise Tine
96cb381885 vx_spawn_threads implementation 2024-06-07 07:52:15 -07:00
Blaise Tine
8c5a783477 minor update 2024-06-07 07:48:33 -07:00
Blaise Tine
e79f905d87 minor update 2024-06-07 07:47:40 -07:00
Blaise Tine
01cca01511 TLS alignment fix 2024-06-07 05:38:54 -07:00
Blaise Tine
f992f5bae6 fully disable dynamic linking for kernels 2024-06-07 02:11:15 -07:00
Blaise Tine
b21ea24815 removing dynamic link hooks in linker script to fix TLS support in kernels 2024-06-06 22:07:56 -07:00
Blaise Tine
9e20e6edb6 minor update 2024-06-06 00:32:58 -07:00
Blaise Tine
6c56edf65d minor update 2024-06-04 14:28:30 -07:00
Blaise Tine
6b800f2054 minor update 2024-06-03 20:39:16 -07:00
Blaise Tine
a7fac99fd9 minor update 2024-06-03 20:35:26 -07:00
Blaise Tine
0235da5798 minor update 2024-06-03 19:56:32 -07:00
Blaise Tine
bc280e2703 minor update 2024-06-03 19:55:03 -07:00
Blaise Tine
2b9628c73b minor update 2024-06-03 19:54:21 -07:00
Blaise Tine
681646d27a minor update 2024-05-29 17:02:16 -07:00
Blaise Tine
5b56f76289 minor update 2024-05-28 23:15:26 -07:00
Blaise Tine
a38d47c0df riscv-tests update 2024-05-28 22:34:00 -07:00
Blaise Tine
68d2ac6f5e 32-bit/64-bit address space compatibility 2024-05-28 22:30:59 -07:00
Blaise Tine
364136d66f minor update 2024-05-28 11:14:50 -07:00
Blaise Tine
f8ef570778 riscv tests refactoring 2024-05-28 10:46:31 -07:00
Blaise Tine
0426856ab4 minor update 2024-05-27 20:21:37 -07:00
Blaise Tine
9dbff0e77c makefile clean-all 2024-05-27 20:03:16 -07:00
Blaise Tine
f0253a5f80 minor update 2024-05-27 19:12:32 -07:00
Blaise Tine
47d578c4d2 runtime refactoring 2024-05-27 18:55:42 -07:00
Blaise Tine
319c18158a minor update 2024-05-27 16:01:24 -07:00
Blaise Tine
c1000f6a3b runtime refactoring 2024-05-27 15:59:41 -07:00
Blaise Tine
405d6b468f minor update 2024-05-27 02:15:03 -07:00
Blaise Tine
32f39264ef runtime dynamic loading for driver-specific implementations 2024-05-26 19:05:17 -07:00
Blaise Tine
d413786b9e minor update 2024-05-26 17:14:11 -07:00
Blaise Tine
f35e4266ed minor update 2024-05-25 17:21:28 -07:00
Kumar Saunack
bd33f1edc3
Add explicit imports cpp (#131)
* add imports for array and unordered_map

* add auto
2024-05-24 15:21:21 -04:00
Blaise Tine
a72c68acf4 minor update 2024-05-23 22:33:12 -07:00
Blaise Tine
f6663d6618 memory fence update 2024-05-23 22:24:30 -07:00
Blaise Tine
b1ae82bae5 minor update 2024-05-23 15:11:59 -07:00
Blaise Tine
e4fdc740ba minor update 2024-05-23 02:06:08 -07:00
Blaise Tine
f3c0b7d186 minor update 2024-05-21 17:23:28 -07:00
Blaise Tine
dc5cbfe932 adding support for build install target 2024-05-21 17:02:32 -07:00
Blaise Tine
e1c8ff02be minor update 2024-05-21 12:46:15 -07:00
Blaise Tine
210e4a8e8f minor update 2024-05-21 12:45:03 -07:00
Blaise Tine
9b79d60507 minor update 2024-05-21 05:39:35 -07:00
Blaise Tine
d99aaf3933 fixed spawn_task_groups bug 2024-05-21 04:18:04 -07:00
Blaise Tine
c7c1dddeac minor update 2024-05-20 08:42:12 -07:00
Blaise Tine
94cadb69d2 minor update 2024-05-20 08:00:43 -07:00
Blaise Tine
b3f96e288a + support for ZICOND RISC-V extension
+ RTL decode refactoring
2024-05-20 00:17:24 -07:00
Blaise Tine
8d97d2c998 minor update 2024-05-17 18:30:49 -07:00
Blaise Tine
0aaf010a62 doc update 2024-05-17 13:30:26 -07:00
Blaise Tine
ab56cc6d4a minor update 2024-05-13 23:57:33 -07:00
Blaise Tine
15ca8290d0 updated configure script usage 2024-05-13 08:19:37 -07:00
Blaise Tine
0fbe22dafa minor update 2024-05-13 06:59:46 -07:00
Blaise Tine
ae11df3e6a minor update 2024-05-12 20:28:49 -07:00
Blaise Tine
19beb0728e minor update 2024-05-12 20:21:23 -07:00
Blaise Tine
60107cf2b6 XRT runtime and simulation support for Vortex AFU (incomplete) 2024-05-11 17:43:49 -07:00
Blaise Tine
98f080340a perf counters profiling refactoring 2024-05-11 17:10:08 -07:00
Blaise Tine
dc27d3c014 minor update 2024-05-11 13:10:03 -07:00
Blaise Tine
bb53658ce7 minor update 2024-05-11 06:51:32 -07:00
Blaise Tine
112e8235ee minor update 2024-05-11 03:45:24 -07:00
Blaise Tine
4e7bc9654b wspawn fix 2024-05-10 21:42:20 -07:00
Blaise Tine
d1ba02681e minor update 2024-05-10 03:46:22 -07:00
Blaise Tine
3cceed1e0b lock CSR unit only for FPU CSRs 2024-05-10 03:45:24 -07:00
Blaise Tine
c1fa2bbc38 minor update 2024-05-10 01:27:54 -07:00
Blaise Tine
82a417f1f0 minor update 2024-05-09 22:59:47 -07:00
Blaise Tine
df95c7c4c6 minor update 2024-05-09 22:51:07 -07:00
Blaise Tine
599edfbbeb minor update 2024-05-09 21:42:44 -07:00
Blaise Tine
aaba10a133 minor update 2024-05-09 21:27:38 -07:00
Blaise Tine
4bb31e63e2 minor update 2024-05-09 21:21:28 -07:00
Blaise Tine
ca11ccee1e minor udpate 2024-05-09 16:37:53 -07:00
Blaise Tine
a1ddddf929 minor update 2024-05-09 16:34:30 -07:00
Blaise Tine
ad5d82d430 minor update 2024-05-09 13:23:49 -07:00
Blaise Tine
b9da621c09 minor update to ci/toolchain_install.sh.in 2024-05-09 12:37:24 -07:00
Blaise Tine
da65e964ed minor update 2024-05-09 03:37:03 -07:00
Blaise Tine
4a2b984710 minor update 2024-05-09 03:23:48 -07:00
Blaise Tine
13e9e6d9f9 minor update 2024-05-09 02:47:10 -07:00
Blaise Tine
1864e46c14 minor update 2024-05-08 22:39:03 -07:00
Blaise Tine
b9c0082cb8 minor update 2024-05-08 22:01:37 -07:00
Blaise Tine
717b2e9ba1 enable barrier and spawn skip mode if N=1 2024-05-08 04:23:38 -07:00
Blaise Tine
b6aa44f39f spawn_tasks_ex optimization 2024-05-07 23:40:38 -07:00
Blaise Tine
0003926d01 checking workgroup occupancy 2024-05-07 23:38:51 -07:00
Blaise Tine
98ead77405 querying num_barriers device caps 2024-05-07 23:35:50 -07:00
Blaise Tine
82908a3026 documentation 2024-05-07 23:33:45 -07:00
Blaise Tine
9e7074b871 minor update 2024-05-07 04:04:57 -07:00
Blaise Tine
a569d54104 minor update 2024-05-07 03:58:44 -07:00
Blaise Tine
04e72a2341 minor update 2024-05-07 03:50:00 -07:00
Blaise Tine
ce26a0a3cc minor update to travis and regression script 2024-05-06 23:20:42 -07:00
Blaise Tine
bae6cd2d86 minor update 2024-05-06 19:02:16 -07:00
Blaise Tine
a94d868cd5 minor update 2024-05-06 18:54:44 -07:00
Blaise Tine
faacace2a4 minor update 2024-05-06 07:54:50 -07:00
Blaise Tine
6519d356ed travis udpate 2024-05-06 02:08:41 -07:00
Blaise Tine
da272064b5 minor update 2024-05-06 01:43:59 -07:00
Blaise Tine
009861433a minor update 2024-05-06 01:28:03 -07:00
Blaise Tine
79f5824c74 adding work groups support to spawntasks API 2024-05-06 01:25:13 -07:00
Blaise Tine
0cabd24f08 opencl tests update 2024-05-06 01:23:25 -07:00
Blaise Tine
311799b423 Passing LLC flags to POCL 2024-05-06 00:55:00 -07:00
Blaise Tine
189990e351 minor update 2024-05-06 00:54:07 -07:00
Blaise Tine
badfb24e01 CSRs update 2024-05-06 00:51:38 -07:00
Blaise Tine
c8bae13448 minor update 2024-05-03 19:51:09 -07:00
Blaise Tine
f4202868bc MINOR UPDATE 2024-05-02 18:44:43 -07:00
Blaise Tine
42202e2940 minor update 2024-05-02 18:25:10 -07:00
Blaise Tine
b68d32b83c minor update 2024-05-02 18:00:42 -07:00
Blaise Tine
7d82212fb1 minor update 2024-05-02 13:20:33 -07:00
Blaise Tine
ef5aa6d610 minor update 2024-05-02 11:57:56 -07:00
Blaise Tine
68a3664a04 minor update 2024-05-01 23:57:14 -07:00
Blaise Tine
1b2d9ed538 minor udpate 2024-05-01 21:01:20 -07:00
Blaise Tine
b5ca7a999c SIMT stack fix 2024-05-01 20:50:21 -07:00
Blaise Tine
896aca0c62 mpm counters query fix 2024-05-01 17:21:37 -07:00
Blaise Tine
4737cdabbd minor update 2024-05-01 08:06:45 -07:00
Blaise Tine
06896f272c minor update 2024-05-01 02:13:52 -07:00
Blaise Tine
27a9e30857 minor update 2024-05-01 00:52:35 -07:00
Blaise Tine
e84f978502 minor update 2024-05-01 00:02:52 -07:00
Blaise Tine
5ea10fd872 minor update 2024-04-30 22:47:59 -07:00
Blaise Tine
aea1d2c8eb minor updates to the build system 2024-04-30 16:27:20 -07:00
Blaise Tine
19484a531a minor update 2024-04-30 04:19:59 -07:00
Blaise Tine
284d438acd minor update 2024-04-30 02:20:40 -07:00
Blaise Tine
ca79e69355 SIMT Tack compression 2024-04-30 02:19:32 -07:00
Blaise Tine
9df25ff48f minor update 2024-04-28 04:42:22 -07:00
Blaise Tine
604c41fc54 minor update 2024-04-28 04:30:31 -07:00
Blaise Tine
a167c07e7d adding wait cycles to wspawn 2024-04-28 04:27:47 -07:00
Blaise Tine
db0f0fd353 runtime API refactoring to support memory reservation and protection 2024-04-28 04:23:00 -07:00
Blaise Tine
c554f53e44 minor update 2024-04-26 18:03:30 -07:00
Blaise Tine
daf1360d83 minor single-thread fix 2024-04-20 22:32:28 -07:00
Blaise Tine
0cd2ea458f minor update 2024-04-18 13:25:11 -07:00
Blaise Tine
0098d197c9 minor update 2024-04-18 02:40:49 -07:00
Blaise Tine
efc7a971dc minor update 2024-04-17 22:52:12 -07:00
Blaise Tine
dbe052594d minor update 2024-04-17 19:26:27 -07:00
Blaise Tine
95b23fa97b minor update 2024-04-17 18:34:16 -07:00
Blaise Tine
f369006956 minor update 2024-04-17 04:17:13 -07:00
Blaise Tine
0c746d93bb minor update 2024-04-17 04:13:05 -07:00
Blaise Tine
e3d06e0d9c minor update 2024-04-17 03:36:28 -07:00
Blaise Tine
57a5aead4c minor update 2024-04-17 01:05:04 -07:00
Blaise Tine
8fa28bfca1 location independent kernel loading support/fixes 2024-04-17 01:00:38 -07:00
Blaise Tine
69fdb4bd04 mino rupdate 2024-04-15 19:23:20 -07:00
Blaise Tine
ac669a30ca UUID refactoring 2024-04-14 22:01:03 -07:00
Blaise Tine
8a933520f0 minor update 2024-04-14 22:00:39 -07:00
Blaise Tine
dfed5b29c0 minor update 2024-04-13 23:28:31 -07:00
Blaise Tine
1f8b3dcc5c minor update 2024-04-13 22:56:50 -07:00
Blaise Tine
75255269a1 minor update 2024-04-13 22:49:10 -07:00
Blaise Tine
15dc9afe93 position-independent kernel fix 2024-04-13 22:26:59 -07:00
Blaise Tine
2488e4736c minor update 2024-04-12 14:45:37 -07:00
Blaise Tine
9380fd3d72 minor update 2024-04-12 14:09:16 -07:00
Blaise Tine
83a7deb5da minor update 2024-04-12 13:46:23 -07:00
Blaise Tine
25d0c76d14 enabling explicit kernel address and arguments allocation 2024-04-12 06:58:42 -07:00
Blaise Tine
b32ea5b750 fixed compiler relocation issue with R_RISCV_GOT_HI20 2024-04-11 15:35:51 -07:00
Blaise Tine
9feb46387e minor update 2024-04-10 21:56:01 -07:00
Blaise Tine
1963ae04b2 minor update 2024-04-09 22:20:52 -07:00
Blaise Tine
7f61f0b015 minor update 2024-04-09 18:59:44 -07:00
Blaise Tine
bb47a14388 remove codecov 2024-04-09 18:31:33 -07:00
Blaise Tine
3de8075636 opencl tests GPU support 2024-04-09 18:20:24 -07:00
Blaise Tine
de66d2ec3e minor fix 2024-04-09 15:27:31 -07:00
Blaise Tine
5231bb8576 minor update 2024-04-09 15:22:59 -07:00
Blaise Tine
e58c12fc02 minor update 2024-04-09 14:49:23 -07:00
Blaise Tine
b54e85113d adding trigonomitry etst to dogfood 2024-04-09 04:14:28 -07:00
Blaise Tine
cef05d3110 minor update 2024-04-09 04:13:41 -07:00
Blaise Tine
bdcf2a0af0 syscalls update 2024-04-09 03:40:18 -07:00
Blaise Tine
dd461468d3 enabling MSCRATCH CSR 2024-04-09 02:01:17 -07:00
Blaise Tine
7784dfe9b7 CSR 32-bit/64-bit refactoring 2024-04-09 02:00:34 -07:00
Blaise Tine
135cc4f5a7 minor update 2024-04-09 01:58:04 -07:00
Blaise Tine
db35f5d768 simx decode bug fix. 2024-04-09 01:34:14 -07:00
Blaise Tine
0e4501aecd minor update 2024-04-07 06:09:15 -07:00
Blaise Tine
135445ce9c minor update 2024-04-07 05:15:08 -07:00
Blaise Tine
d96e2fa56b minor update 2024-04-07 04:34:02 -07:00
Blaise Tine
e9b66d5a1c minor update 2024-04-07 03:20:19 -07:00
Blaise Tine
6400e73c42 minor updates 2024-04-06 22:05:46 -07:00
Blaise Tine
ca1bbdf415 minor update 2024-04-06 09:11:25 -07:00
Blaise Tine
b02746fb0d minor update 2024-04-06 08:32:16 -07:00
Blaise Tine
8d252bb6f8 minor update 2024-04-06 07:39:00 -07:00
Blaise Tine
04314cefed using Vortex custom libc library 2024-04-06 06:42:47 -07:00
Blaise Tine
70717fb42b tests update 2024-04-06 03:21:49 -07:00
Blaise Tine
351aa48f6e per-workgroup local memory fix 2024-04-06 02:05:51 -07:00
Blaise Tine
3534175d43 build configuration update 2024-04-06 01:44:39 -07:00
Blaise Tine
b4f5616814 minor update 2024-03-31 05:36:15 -07:00
Blaise Tine
7df4f1ba03 minor update 2024-03-31 03:50:50 -07:00
Blaise Tine
c39b8e1112 minor update 2024-03-31 03:38:15 -07:00
Blaise Tine
d743e2ba22 minor update 2024-03-31 01:22:01 -07:00
Blaise Tine
87bff732ac minor update 2024-03-31 00:31:47 -07:00
Blaise Tine
486cd0b866 minor update 2024-03-30 22:45:31 -07:00
Blaise Tine
da8608d702 minor update 2024-03-30 18:07:32 -07:00
Blaise Tine
d0c441519d minor update 2024-03-30 12:29:27 -07:00
Blaise Tine
2a38ef0db8 minor update 2024-03-30 12:17:11 -07:00
Blaise Tine
914044b57b minor update 2024-03-30 11:02:09 -07:00
Blaise Tine
d1ff95eb9f minor update 2024-03-30 10:35:15 -07:00
Blaise Tine
beebc2adf0 minor update 2024-03-30 10:03:40 -07:00
Blaise Tine
e4e0ee8fef minor update 2024-03-30 09:32:12 -07:00
Blaise Tine
299657d693 minor update 2024-03-30 05:49:45 -07:00
Blaise Tine
03cf694238 minor update 2024-03-30 05:33:57 -07:00
Blaise Tine
6b81b26ffc enabling Makefile configuration with build folder support 2024-03-30 02:28:39 -07:00
Blaise Tine
99c91987fb opencl benchmarks fixes 2024-03-28 06:40:09 -07:00
Blaise Tine
697e2fffbb adding dogfood fclamp test 2024-03-27 06:40:13 -07:00
Blaise Tine
70f2f58ac9 minor update 2024-03-26 16:31:24 -07:00
Blaise Tine
8ab4c53e27 new conv3x regression test 2024-03-26 16:30:47 -07:00
Blaise Tine
c8dd0aafb0 minor update 2024-03-26 16:22:02 -07:00
Blaise Tine
ae12b45f77 minor update 2024-03-25 15:01:40 -07:00
Blaise Tine
896aa6b2a1 minor update 2024-03-25 14:53:03 -07:00
Blaise Tine
74368ab65a minor update 2024-03-24 23:40:00 -07:00
Blaise Tine
afc0b2056d minor update 2024-03-24 20:59:32 -07:00
Blaise Tine
c6e09d40ff minor update 2024-03-24 20:32:33 -07:00
Blaise Tine
402c911991 simx mem_coalescer 2024-03-24 20:31:36 -07:00
Blaise Tine
86055335ee minor update 2024-03-24 19:39:53 -07:00
Blaise Tine
d27656819b regression update 2024-03-24 14:44:19 -07:00
Blaise Tine
7324900c57 minor updates 2024-03-24 14:19:23 -07:00
Blaise Tine
459abdef21 minor update 2024-03-24 03:26:28 -07:00
Blaise Tine
830c43517b FPU area reduction via time-multiplexing 2024-03-24 02:39:17 -07:00
Blaise Tine
19ba4c7bd4 minor update 2024-03-23 20:25:30 -07:00
Blaise Tine
178e1a6b2a minor update 2024-03-23 20:10:01 -07:00
Blaise Tine
d0a53ff53e minor update 2024-03-23 19:52:04 -07:00
Blaise Tine
231eb4e78b SOA to AOS conversion 2024-03-23 19:50:57 -07:00
Blaise Tine
04de44d280 dogfood update 2024-03-23 19:49:44 -07:00
Blaise Tine
3cd7f41012 minor update 2024-03-21 09:36:38 -07:00
Blaise Tine
35a782a7ba local memory runtime refactoring 2024-03-21 09:31:55 -07:00
Blaise Tine
2776f2cdf0 minor update 2024-03-20 14:31:00 -07:00
Blaise Tine
6d0e345073 minor update 2024-03-20 14:19:13 -07:00
Blaise Tine
91135bb855 minor update 2024-03-20 13:12:55 -07:00
Blaise Tine
fbc49b1455 m 2024-03-19 01:04:15 -07:00
Blaise Tine
354d3663e1 minor update 2024-03-19 00:08:12 -07:00
Blaise Tine
45e791437c minor update 2024-03-18 21:51:08 -07:00
Blaise Tine
17cdc32eee minor update 2024-03-18 21:33:16 -07:00
Blaise Tine
c175e11a18 Using packed LSU memory requests within the code 2024-03-18 21:22:02 -07:00
Blaise Tine
df38cc00f5 adding gpr_slice 2024-03-18 01:43:44 -07:00
Blaise Tine
d65f6e064a adding lsu_slice 2024-03-18 01:11:03 -07:00
Blaise Tine
6556e8c66d extending memory interface with address type 2024-03-18 00:35:03 -07:00
Blaise Tine
dc19d25bcc fixed scoreboard critical path 2024-03-17 23:05:02 -07:00
Blaise Tine
787f02e4c6 minor update 2024-03-15 00:03:05 -07:00
Blaise Tine
d9426d5789 minor update 2024-03-14 21:36:39 -07:00
Blaise Tine
100eb49201 minor update 2024-03-14 12:57:02 -07:00
Blaise Tine
f1522e68f8 simx memory coalescing support 2024-03-14 12:20:39 -07:00
Blaise Tine
07c063031f tabs cleanup 2024-03-13 23:19:54 -07:00
Blaise Tine
a8f2bb30da minor update 2024-03-13 21:06:26 -07:00
Blaise Tine
454b9e7444 removed dup address detection, replaced with coalescing 2024-03-13 13:56:07 -07:00
Blaise Tine
4766787478 minor update 2024-03-13 13:15:15 -07:00
Blaise Tine
a8e892593e maxfanout update 2024-03-12 01:46:42 -07:00
Blaise Tine
840ced22a9 simx refactoring - emulation vs simulation discrete separation 2024-03-12 00:23:42 -07:00
Blaise Tine
ff6f33acff simx refactoring: simobject::push(), instr_trace, FUtype, pending_instrs_ 2024-03-11 15:39:49 -07:00
Blaise Tine
3ec37c6c40 minor update 2024-03-09 00:28:37 -08:00
Blaise Tine
c1e639bd44 minor update 2024-03-08 01:15:04 -08:00
Blaise Tine
489738751f minor update 2024-03-07 02:01:59 -08:00
Blaise Tine
33406d2e83 dispatch/commit refactoring 2024-03-06 17:35:33 -08:00
Blaise Tine
27db94b20d minor update 2024-03-06 15:48:37 -08:00
Blaise Tine
72f5976dd6 minor update 2024-03-06 15:32:32 -08:00
Blaise Tine
44e685f8af adding num_lsu_blocks 2024-03-06 15:31:50 -08:00
Blaise Tine
4d7b2b9ea5 minor update 2024-03-06 08:10:22 -08:00
Blaise Tine
de0f4dda44 minor update 2024-03-05 10:12:05 -08:00
Blaise Tine
b0f3e91006 minor update 2024-03-04 22:19:52 -08:00
Blaise Tine
288147ac4f memory coalescing RTL implementation 2024-03-04 22:18:39 -08:00
Blaise Tine
274e6a4c52 removed NULL local_group to prevent OpenCL runtime automatic allocation of moving global group into local group which will be inefficient on Vortex. 2024-03-04 20:30:32 -08:00
Blaise Tine
de8453d0be wspawn thread index reordering 2024-03-04 20:28:45 -08:00
Blaise Tine
589e351832 minor update 2024-03-04 20:28:02 -08:00
Blaise Tine
34324bb768 minor update 2024-03-04 15:00:12 -08:00
Blaise Tine
fe3c712d66 minor update 2024-03-02 23:16:09 -08:00
Blaise Tine
badb0c8300 minor update 2024-03-02 21:33:54 -08:00
Blaise Tine
c344e28476 minor update 2024-03-01 08:55:25 -08:00
Blaise Tine
9c900394fa minor update 2024-03-01 08:19:26 -08:00
Blaise Tine
cce517b02b minor update 2024-02-29 01:24:32 -08:00
Blaise Tine
26d45ed9db renamed shared to local memory 2024-02-29 01:04:52 -08:00
Blaise Tine
1b9c39283e minor update 2024-02-29 00:10:29 -08:00
Blaise Tine
dd40e9c754 cache subsystem refactoring 2024-02-29 00:08:14 -08:00
Blaise Tine
59497e52df minor update 2024-02-28 16:36:26 -08:00
Blaise Tine
51ae0b71f3 minor update 2024-02-28 16:35:22 -08:00
Blaise Tine
041f573815 cleaned up vector code from simx 2024-02-21 18:27:52 -08:00
Blaise Tine
fc0f5e2ca4 minor update 2024-02-16 21:02:30 -08:00
Blaise Tine
76a828cf50 minor update 2024-02-16 21:02:12 -08:00
Blaise Tine
bb3a49f95b minor update 2024-02-15 04:31:13 -08:00
Blaise Tine
413e933b8a OUT_BUF / OUT_REG refactoring 2024-02-15 04:08:50 -08:00
Blaise Tine
bb4c150aaf per-warp ibuffer scoreboard scheduling optimization 2024-02-14 14:24:47 -08:00
Blaise Tine
a78ac7a246 minor update 2024-02-14 14:24:29 -08:00
Blaise Tine
f13a885815 minor update 2024-02-14 12:53:58 -08:00
Blaise Tine
422bcdee0f renamed convolution test => conv3 2024-02-14 12:53:13 -08:00
Blaise Tine
08c06be601 minor update 2024-02-14 10:02:10 -08:00
Blaise Tine
de90863333 minor update 2024-02-13 08:41:02 -08:00
Blaise Tine
f83094e0d9 minor update 2024-02-12 04:19:31 -08:00
Blaise Tine
21b54761e3 minor update 2024-02-11 22:13:16 -08:00
Blaise Tine
a01f3a0370 cache_bypass refactoring 2024-02-11 21:41:33 -08:00
Blaise Tine
1e5550ccda Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-02-11 08:03:22 -08:00
Blaise Tine
9e54ccde6d adding support for top-module parameter replacement during synthesis tests 2024-02-10 21:54:35 -08:00
Blaise Tine
7f778000ea docs update 2024-02-10 18:20:34 -08:00
Blaise Tine
b8ccff7ade Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-02-10 17:31:36 -08:00
Blaise Tine
5f2b10b8a6 minor update 2024-02-09 21:20:23 -08:00
Blaise Tine
3fee1a6193 minor update 2024-02-09 20:34:44 -08:00
Blaise Tine
ae7b01405c CI minor update 2024-02-08 14:10:00 -08:00
Blaise Tine
0823c71b2e Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-02-04 20:32:33 -08:00
Blaise Tine
be0db6e1a5 minor update 2024-02-04 20:32:05 -08:00
Blaise Tine
50028c1a33 Merge remote-tracking branch 'origin' into develop 2024-02-04 20:19:30 -08:00
Blaise Tine
e06e6646a9 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-02-04 20:18:21 -08:00
Blaise Tine
8d4b6c804f minor update 2024-02-04 20:17:12 -08:00
Blaise Tine
6f7a389a1f arbiters unlock refactoring 2024-02-04 20:16:18 -08:00
Blaise Tine
fe15647f98 minor update 2024-02-04 02:11:53 -08:00
Blaise Tine
b0b7cd2b1e minor updates 2024-02-03 19:09:53 -08:00
Blaise Tine
61da7f609d Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-31 13:36:20 -08:00
Blaise Tine
f9cd8be19e minor update 2024-01-31 13:35:43 -08:00
Blaise Tine
0a38312527 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-31 12:14:49 -08:00
Blaise Tine
dab262e4f7 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-31 12:03:50 -08:00
Blaise Tine
8ab7c590fd disabling fetch's deadlock check when L1 caches are present 2024-01-31 06:16:54 -08:00
Blaise Tine
e3090930c0 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-31 03:37:08 -08:00
Blaise Tine
e2d1387df8 elastic buffers classification 2024-01-31 00:39:37 -08:00
Shinnung Jeong
fd65ed95eb fix bug to access memory address in simx 2024-01-30 20:45:47 -05:00
Blaise Tine
597e3b0e35 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-29 04:49:42 -08:00
Blaise Tine
b31d868a27 Merge branch 'develop' 2024-01-28 17:34:46 -08:00
Blaise Tine
b6919d19a7 minor update 2024-01-28 17:34:07 -08:00
Blaise Tine
eac6a485fa Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-28 00:36:41 -08:00
Blaise Tine
6045597ad0 Merge branch 'develop' 2024-01-28 00:25:55 -08:00
Blaise Tine
1c1140d517 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-28 00:25:16 -08:00
Blaise Tine
38b92ad592 - using SV_DPI defines to disable DPI in synthesis-based simulations
- fixed Intel ASE run script: run_ase.sh
2024-01-28 00:22:21 -08:00
dhy2000
b08c7403f6
fix #100: change return type to float 2024-01-26 19:56:50 +08:00
lpc97667
a9d578f3ab Docs update 2024-01-10 15:56:22 -05:00
Blaise Tine
f04ee15f94 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-03 19:11:33 -08:00
Blaise Tine
f0e6a435f8 Merge branch 'develop' 2024-01-03 19:09:49 -08:00
Blaise Tine
648bf75b0b minor update 2024-01-03 19:09:18 -08:00
Blaise Tine
665907355e Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-03 19:02:00 -08:00
Blaise Tine
3b75418ea9 Merge branch 'develop' 2024-01-03 10:24:48 -08:00
Blaise Tine
f2e8317412 updated documentation 2024-01-03 10:23:38 -08:00
Blaise Tine
cc042a4098 Merge branch 'develop' 2023-12-31 15:30:20 -08:00
Blaise Tine
bd18b03cc3 minor update 2023-12-31 15:29:04 -08:00
Blaise Tine
e7f8b40d93 minor update 2023-12-31 11:46:41 -08:00
Blaise Tine
ec2a35def9 Merge branch 'develop' 2023-12-31 11:26:48 -08:00
Blaise Tine
29c15dc9c4 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-30 00:54:07 -08:00
Blaise Tine
031d24e695 minor updates 2023-12-30 00:52:44 -08:00
Blaise Tine
645ca62c91 Merge branch 'develop' 2023-12-29 15:14:23 -08:00
Blaise Tine
7425446b15 fixed DESTDIR support in simumation Makefiles 2023-12-29 14:11:16 -08:00
Blaise Tine
db5db20800 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-28 20:12:17 -08:00
Blaise Tine
a7548db5ec Merge branch 'develop' 2023-12-28 20:08:12 -08:00
Blaise Tine
e62d122c9b enabling temporary build directory for blackbox multiple instances 2023-12-28 20:06:10 -08:00
Blaise Tine
e8cbfb4a72 Merge branch 'develop' 2023-12-28 16:11:29 -08:00
Blaise Tine
51e621cdf1 minor update 2023-12-28 16:08:26 -08:00
Blaise Tine
83d55da2d5 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-28 13:00:00 -08:00
Blaise Tine
afea903332 Merge branch 'develop' 2023-12-28 12:33:58 -08:00
Blaise Tine
36f5dd87fe minor update 2023-12-28 12:22:22 -08:00
Blaise Tine
e217bc2c23 adding tracking for SFU stalls 2023-12-28 12:12:11 -08:00
Blaise Tine
57ecb5e530 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop
Conflicts:
	hw/rtl/interfaces/VX_pipeline_perf_if.sv
2023-12-20 18:33:17 -08:00
Blaise Tine
c7a81d1493 adding sockets support to simx and cache subsystem refactoring
minor update

minor update

minor updates
2023-12-20 15:16:12 -08:00
Blaise Tine
914b680aed operands optimization
minor updates

minor updates

minor update

operands optimization

minor updates

minor updates
2023-12-20 15:07:23 -08:00
Blaise Tine
c94fd8e83b Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-20 14:58:34 -08:00
Blaise Tine
d567d67caa minor update 2023-12-20 14:47:27 -08:00
Blaise Tine
7842520848 minor update 2023-12-20 14:04:31 -08:00
Blaise Tine
d105b91438 adding sockets support to simx and cache subsystem refactoring 2023-12-20 11:57:44 -08:00
Blaise Tine
cf51770c2c Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop
Conflicts:
	hw/rtl/core/VX_lsu_unit.sv
2023-12-18 13:01:13 -08:00
Blaise Tine
2c6d84bac9 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-18 12:54:13 -08:00
Blaise Tine
39e6f95c2b operands optimization
minor updates

minor updates

minor update
2023-12-18 12:53:34 -08:00
Blaise Tine
5a2bc88d20 operands optimization
minor updates

minor updates
2023-12-18 04:44:01 -08:00
Blaise Tine
e04e026a14 profiling update
minor updates
2023-12-18 04:43:44 -08:00
Blaise Tine
c6845a4c8d profiling timing optimization
minor update

minor update

minor update
2023-12-18 04:43:10 -08:00
Blaise Tine
f5f9e3dfdb profiling timing optimization 2023-12-18 04:43:10 -08:00
Blaise Tine
6c7ac35054 profiling optimizations
minor updates
2023-12-18 04:43:00 -08:00
Blaise Tine
1be3778731 erge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-18 03:17:05 -08:00
Blaise Tine
caef3c5990 minor updates 2023-12-18 03:16:41 -08:00
Blaise Tine
2c1d858a2d Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-17 23:21:31 -08:00
Blaise Tine
5a6d98a2e2 minor updates 2023-12-17 23:20:43 -08:00
Blaise Tine
d79ff077b7 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-15 14:11:38 -08:00
Blaise Tine
f506ae6cea operands optimization 2023-12-15 14:09:51 -08:00
Blaise Tine
0468577b0b minor updates 2023-12-15 14:09:38 -08:00
Blaise Tine
4e51544402 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-15 01:14:21 -08:00
Blaise Tine
144d4d629b profiling update 2023-12-15 01:01:39 -08:00
Blaise Tine
0bdbbd2667 minor update 2023-12-14 15:55:19 -08:00
Blaise Tine
7fbd253d3f minor update 2023-12-14 03:01:36 -08:00
Blaise Tine
b001eb43f8 minor update 2023-12-14 02:57:30 -08:00
Blaise Tine
7afc557ba8 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-13 18:06:31 -08:00
Blaise Tine
7cae30076a profiling timing optimization 2023-12-13 18:04:12 -08:00
Blaise Tine
3c3bdc08ad Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-05 17:11:27 -08:00
Blaise Tine
100d4459cd profiling timing optimization 2023-12-05 17:10:30 -08:00
Blaise Tine
664a58b742 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-05 06:19:51 -08:00
Blaise Tine
c397fb5f4b minor updates 2023-12-05 06:15:15 -08:00
Blaise Tine
22fef445ff profiling optimizations 2023-12-05 05:12:13 -08:00
Blaise Tine
e5b41bcd66 wctl unit bug fix 2023-12-05 04:57:52 -08:00
Blaise Tine
1912f52bee profiling bug fix 2023-12-05 04:56:46 -08:00
root
900a1efaca BUFFER_EX refactoring 2023-12-05 04:55:50 -08:00
root
d288fb360c Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-05 04:50:20 -08:00
Blaise Tine
e44d38bb02 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-05 04:00:33 -08:00
Hyesoon Kim
63a4ccef16
Merge pull request #95 from Udit8348/develop-documentation
Documentation for Testing and Contributing
2023-12-01 09:20:21 -05:00
Udit Subramanya
0d5887b938 Merge branch 'develop' into develop-documentation
Attempted to directly push to develop, but permission was denied.
Therefore, I moved my changes to my development branch located on my fork.
I have permission to commit changes to my fork, and I can open a PR to bring those changes into main repo
2023-12-01 08:56:17 -05:00
Udit Subramanya
a43b7432a0 add environment setup readme 2023-12-01 08:55:01 -05:00
Udit Subramanya
af94d24963 Merge branch 'develop' into develop-documentation 2023-12-01 08:49:46 -05:00
Udit Subramanya
247f91a296
Merge branch 'vortexgpgpu:master' into master 2023-12-01 08:39:18 -05:00
Udit Subramanya
b20320236d adding documemtation for contributing and documentation 2023-12-01 08:22:44 -05:00
Blaise Tine
9c2916f3fc minor update 2023-11-28 12:03:48 -08:00
Blaise Tine
e8d56dc013 minor update 2023-11-27 22:16:36 -08:00
Blaise Tine
621d6de6ce Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-11-27 08:07:52 -08:00
Blaise Tine
24973ffca0 scoreboard optimization & profiling 2023-11-27 05:53:36 -08:00
Blaise Tine
4b68235389 fixed simx dispatcher bug 2023-11-27 04:50:55 -08:00
Blaise Tine
9dc5793046 minor udpate 2023-11-27 02:21:47 -08:00
Blaise Tine
1271c9c03f minor update 2023-11-27 02:12:12 -08:00
Blaise Tine
ebec982434 minor update 2023-11-27 02:04:53 -08:00
Blaise Tine
2f1171ca76 minor update 2023-11-27 02:04:22 -08:00
Blaise Tine
11752b2562 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-11-18 00:27:46 -08:00
Blaise Tine
88f99e9525 Merge branch 'master' of https://github.com/vortexgpgpu/vortex into develop 2023-11-17 01:20:10 -08:00
Blaise Tine
43154cf738 minor updates 2023-11-16 23:41:59 -08:00
Blaise Tine
d65cc61df5 minor update 2023-11-16 12:00:37 -08:00
Blaise Tine
547d916ae2 minor update 2023-11-15 13:00:06 -08:00
Blaise Tine
2c94e358b8 perf counter bug fix 2023-11-15 00:52:39 -08:00
Blaise Tine
ede5e1c311 minor update 2023-11-15 00:28:26 -08:00
Blaise Tine
61e3442ef8 adding opencl convolution benchmark 2023-11-14 22:31:30 -08:00
Blaise Tine
4e7a536918 adding tensor regression test. 2023-11-14 05:37:46 -08:00
Blaise Tine
ecf546bc4a minor update 2023-11-13 20:00:39 -08:00
Blaise Tine
b274b8cc21 minor updates 2023-11-13 00:23:15 -08:00
Blaise Tine
a08d3ebd42 minor update 2023-11-12 23:40:59 -08:00
Blaise Tine
62cdd8e993 minor update 2023-11-11 15:49:39 -08:00
Blaise Tine
64dc5e1667 Merge branch 'develop' 2023-11-10 02:57:42 -08:00
Blaise Tine
c1e168fdbe Vortex 2.0 changes:
+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes

minor update

minor update

minor update

minor update

minor update

minor update

cleanup

cleanup

cache bindings and memory perf refactory

minor update

minor update

hw unit tests fixes

minor update

minor update

minor update

minor update

minor update

minor udpate

minor update

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor updates

minor updates

minor update

minor update
2023-11-10 02:47:05 -08:00
Blaise Tine
6e93787e59 minor update 2023-11-06 00:16:24 -08:00
Blaise Tine
e0becb1599 minor update 2023-11-05 20:03:31 -08:00
Blaise Tine
d13c5f2986 hw unit tests fixes 2023-11-05 18:51:31 -08:00
Blaise Tine
1fd5a95f5a minor update 2023-11-03 18:04:05 -04:00
Blaise Tine
9f1f1ecaa3 minor update 2023-11-03 08:36:28 -04:00
Blaise Tine
c9e6518e05 cache bindings and memory perf refactory 2023-11-03 08:18:18 -04:00
Blaise Tine
69f9ae778d cleanup 2023-11-03 08:12:03 -04:00
Blaise Tine
970cbf066a cleanup 2023-11-03 08:09:59 -04:00
Blaise Tine
1c100c4cf5 minor update 2023-10-22 23:31:58 -07:00
Blaise Tine
cb7d6b964c minor update 2023-10-22 02:25:34 -07:00
Blaise Tine
8cf833b7eb minor update 2023-10-21 19:12:07 -07:00
Blaise Tine
8fe373891f minor update 2023-10-21 17:55:29 -07:00
Blaise Tine
3cacb4f80f minor update 2023-10-20 02:21:20 -07:00
Blaise Tine
65ca0fff3a minor update 2023-10-20 00:48:05 -07:00
Blaise Tine
d47cccc157 Vortex 2.0 changes:
+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes
2023-10-19 20:51:22 -07:00
Nicholas Ade
afa9e4003c adding mul and divide to bfloat 2023-04-13 04:20:23 -04:00
Nicholas Ade
1b6d9bd3a5 Making the bfloat files 2023-04-12 15:01:09 -04:00
1880 changed files with 2029563 additions and 349697 deletions

8
.clang-format Normal file
View file

@ -0,0 +1,8 @@
Language: Cpp
BasedOnStyle: LLVM
IndentWidth: 2
TabWidth: 2
ColumnLimit: 0
UseTab: Never
BreakBeforeBraces: Attach
AlwaysBreakTemplateDeclarations: true

View file

@ -1,3 +0,0 @@
ignore:
- "./examples/*"
- "./tests/*"

175
.github/workflows/ci.yml vendored Normal file
View file

@ -0,0 +1,175 @@
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: CI
on: [push, pull_request]
jobs:
setup:
runs-on: ubuntu-22.04
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
submodules: recursive
- name: Cache Toolchain Directory
id: cache-toolchain
uses: actions/cache@v4
with:
path: tools
key: ${{ runner.os }}-toolchain-v0.1
restore-keys: |
${{ runner.os }}-toolchain-
- name: Cache Third Party Directory
id: cache-thirdparty
uses: actions/cache@v4
with:
path: third_party
key: ${{ runner.os }}-thirdparty-v0.1
restore-keys: |
${{ runner.os }}-thirdparty-
- name: Install Dependencies
if: steps.cache-toolchain.outputs.cache-hit != 'true' || steps.cache-thirdparty.outputs.cache-hit != 'true'
run: |
sudo bash ./ci/install_dependencies.sh
- name: Setup Toolchain
if: steps.cache-toolchain.outputs.cache-hit != 'true'
run: |
TOOLDIR=$PWD/tools
mkdir -p build
cd build
../configure --tooldir=$TOOLDIR
ci/toolchain_install.sh --all
- name: Setup Third Party
if: steps.cache-thirdparty.outputs.cache-hit != 'true'
run: |
make -C third_party > /dev/null
build:
runs-on: ubuntu-22.04
needs: setup
strategy:
matrix:
xlen: [32, 64]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install Dependencies
run: |
sudo bash ./ci/install_dependencies.sh
- name: Cache Toolchain Directory
id: cache-toolchain
uses: actions/cache@v4
with:
path: tools
key: ${{ runner.os }}-toolchain-v0.1
restore-keys: |
${{ runner.os }}-toolchain-
- name: Cache Third Party Directory
id: cache-thirdparty
uses: actions/cache@v4
with:
path: third_party
key: ${{ runner.os }}-thirdparty-v0.1
restore-keys: |
${{ runner.os }}-thirdparty-
- name: Run Build
run: |
TOOLDIR=$PWD/tools
mkdir -p build${{ matrix.xlen }}
cd build${{ matrix.xlen }}
../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }}
source ci/toolchain_env.sh
make software -s > /dev/null
make tests -s > /dev/null
- name: Upload Build Artifact
uses: actions/upload-artifact@v4
with:
name: build-${{ matrix.xlen }}
path: build${{ matrix.xlen }}
tests:
runs-on: ubuntu-22.04
needs: build
strategy:
fail-fast: false
matrix:
name: [regression, opencl, cache, config1, config2, debug, scope, stress, synthesis, vm, vector]
xlen: [32, 64]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install Dependencies
run: |
sudo bash ./ci/install_dependencies.sh
- name: Cache Toolchain Directory
id: cache-toolchain
uses: actions/cache@v4
with:
path: tools
key: ${{ runner.os }}-toolchain-v0.1
restore-keys: |
${{ runner.os }}-toolchain-
- name: Cache Third Party Directory
id: cache-thirdparty
uses: actions/cache@v4
with:
path: third_party
key: ${{ runner.os }}-thirdparty-v0.1
restore-keys: |
${{ runner.os }}-thirdparty-
- name: Download Build Artifact
uses: actions/download-artifact@v4
with:
name: build-${{ matrix.xlen }}
path: build${{ matrix.xlen }}
- name: Run tests
run: |
cd build${{ matrix.xlen }}
source ci/toolchain_env.sh
chmod -R +x . # Ensure all files have executable permissions
if [ "${{ matrix.name }}" == "regression" ]; then
./ci/regression.sh --unittest
./ci/regression.sh --isa
./ci/regression.sh --kernel
./ci/regression.sh --regression
else
./ci/regression.sh --${{ matrix.name }}
fi
complete:
runs-on: ubuntu-22.04
needs: tests
steps:
- name: Check Completion
run: echo "All matrix jobs passed"

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
/build*
/.vscode
*.cache
*.code-workspace

11
.gitmodules vendored
View file

@ -1,12 +1,9 @@
[submodule "third_party/fpnew"]
path = third_party/fpnew
url = https://github.com/pulp-platform/fpnew.git
[submodule "third_party/softfloat"]
path = third_party/softfloat
url = https://github.com/ucb-bar/berkeley-softfloat-3.git
[submodule "third_party/cocogfx"]
path = third_party/cocogfx
url = https://github.com/gtcasl/cocogfx.git
[submodule "third_party/ramulator"]
path = third_party/ramulator
url = https://github.com/CMU-SAFARI/ramulator.git
url = https://github.com/CMU-SAFARI/ramulator2.git
[submodule "third_party/cvfpu"]
path = third_party/cvfpu
url = https://github.com/openhwgroup/cvfpu.git

View file

@ -1,76 +0,0 @@
language: cpp
dist: bionic
os: linux
compiler: gcc
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- build-essential
- valgrind
- verilator
- yosys
- libpng-dev
- libboost-serialization-dev
- libstdc++6
install:
# Set environments
- export RISCV_TOOLCHAIN_PATH=/opt/riscv-gnu-toolchain
- export VERILATOR_ROOT=/opt/verilator
- export PATH=$VERILATOR_ROOT/bin:$PATH
# Install toolchain
- ci/toolchain_install.sh -all
# build project
- make -s
# stages ordering
stages:
- test
jobs:
include:
- stage: test
name: coverage
script: cp -r $PWD ../build_coverage && cd ../build_coverage && ./ci/travis_run.py ./ci/regression.sh -coverage
- stage: test
name: coverage64
script: cp -r $PWD ../build_coverage64 && cd ../build_coverage64 && ./ci/travis_run.py ./ci/regression64.sh -coverage
- stage: test
name: tex
script: cp -r $PWD ../build_tex && cd ../build_tex && ./ci/travis_run.py ./ci/regression.sh -tex
- stage: test
name: cluster
script: cp -r $PWD ../build_cluster && cd ../build_cluster && ./ci/travis_run.py ./ci/regression.sh -cluster
- stage: test
name: config
script: cp -r $PWD ../build_config && cd ../build_config && ./ci/travis_run.py ./ci/regression.sh -config
- stage: test
name: debug
script: cp -r $PWD ../build_debug && cd ../build_debug && ./ci/travis_run.py ./ci/regression.sh -debug
- stage: test
name: stress0
script: cp -r $PWD ../build_stress0 && cd ../build_stress0 && ./ci/travis_run.py ./ci/regression.sh -stress0
- stage: test
name: stress1
script: cp -r $PWD ../build_stress1 && cd ../build_stress1 && ./ci/travis_run.py ./ci/regression.sh -stress1
- stage: test
name: compiler
script: cp -r $PWD ../build_compiler && cd ../build_compiler && ./ci/travis_run.py ./ci/test_compiler.sh
- stage: test
name: tex
script: cp -r $PWD ../build_tex && cd ../build_tex && ./ci/travis_run.py ./ci/regression.sh -tex
- stage: test
name: unittest
script: cp -r $PWD ../build_unittest && cd ../build_unittest && ./ci/travis_run.py ./ci/regression.sh -unittest
after_success:
# Gather code coverage
- lcov --directory driver --capture --output-file driver.cov # capture trace
- lcov --directory simx --capture --output-file simx.cov # capture trace
- lcov --list driver.cov # output coverage data for debugging
- lcov --list simx.cov # output coverage data for debugging
# Upload coverage report
- bash <(curl -s https://codecov.io/bash) -f driver.cov
- bash <(curl -s https://codecov.io/bash) -f simx.cov

20
Dockerfile.dev Normal file
View file

@ -0,0 +1,20 @@
FROM ubuntu:20.04
LABEL "Udit Subramanya"="usubramanya3@gatech.edu"
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && \
apt-get install -y build-essential valgrind git wget libpng-dev libboost-all-dev uuid-dev ccache cmake
# Third-Party Repository to Install g++11 on Ubuntu 18.04
RUN apt-get install -y manpages-dev software-properties-common
RUN add-apt-repository -y ppa:ubuntu-toolchain-r/test
RUN apt-get install -y gcc-11 g++-11
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 11
RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 11
# create a directory for mounting the volume
WORKDIR /root/vortex

221
LICENSE
View file

@ -1,24 +1,201 @@
Copyright (c) <2020>, <Georgia Institute of Technology>
All rights reserved.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Georgia Institute of Technology nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View file

@ -1,14 +0,0 @@
all:
$(MAKE) -C third_party
$(MAKE) -C hw
$(MAKE) -C sim
$(MAKE) -C driver
$(MAKE) -C runtime
$(MAKE) -C tests
clean:
$(MAKE) -C hw clean
$(MAKE) -C sim clean
$(MAKE) -C driver clean
$(MAKE) -C runtime clean
$(MAKE) -C tests clean

74
Makefile.in Normal file
View file

@ -0,0 +1,74 @@
include config.mk
.PHONY: build software tests
all:
$(MAKE) -C $(VORTEX_HOME)/third_party
$(MAKE) -C hw
$(MAKE) -C sim
$(MAKE) -C kernel
$(MAKE) -C runtime
$(MAKE) -C tests
build:
$(MAKE) -C hw
$(MAKE) -C sim
$(MAKE) -C kernel
$(MAKE) -C runtime
$(MAKE) -C tests
software:
$(MAKE) -C hw
$(MAKE) -C kernel
$(MAKE) -C runtime/stub
tests:
$(MAKE) -C tests
clean-build:
$(MAKE) -C hw clean
$(MAKE) -C sim clean
$(MAKE) -C kernel clean
$(MAKE) -C runtime clean
$(MAKE) -C tests clean
clean: clean-build
$(MAKE) -C $(VORTEX_HOME)/third_party clean
# Install setup
KERNEL_INC_DST = $(INSTALLDIR)/kernel/include
KERNEL_LIB_DST = $(INSTALLDIR)/kernel/lib$(XLEN)
RUNTIME_INC_DST = $(INSTALLDIR)/runtime/include
RUNTIME_LIB_DST = $(INSTALLDIR)/runtime/lib
KERNEL_HEADERS = $(wildcard $(VORTEX_HOME)/kernel/include/*.h)
KERNEL_LIBS = $(wildcard kernel/*.a)
RUNTIME_HEADERS = $(wildcard $(VORTEX_HOME)/runtime/include/*.h)
RUNTIME_LIBS = $(wildcard runtime/*.so)
INSTALL_DIRS = $(KERNEL_LIB_DST) $(RUNTIME_LIB_DST) $(KERNEL_INC_DST) $(RUNTIME_INC_DST)
$(INSTALL_DIRS):
mkdir -p $@
$(KERNEL_INC_DST)/VX_types.h: hw/VX_types.h | $(KERNEL_INC_DST)
cp $< $@
$(KERNEL_INC_DST)/%.h: $(VORTEX_HOME)/kernel/include/%.h | $(KERNEL_INC_DST)
cp $< $@
$(RUNTIME_INC_DST)/%.h: $(VORTEX_HOME)/runtime/include/%.h | $(RUNTIME_INC_DST)
cp $< $@
$(KERNEL_LIB_DST)/%.a: kernel/%.a | $(KERNEL_LIB_DST)
cp $< $@
$(RUNTIME_LIB_DST)/%.so: runtime/%.so | $(RUNTIME_LIB_DST)
cp $< $@
install: $(INSTALL_DIRS) \
$(KERNEL_INC_DST)/VX_types.h \
$(KERNEL_HEADERS:$(VORTEX_HOME)/kernel/include/%=$(KERNEL_INC_DST)/%) \
$(RUNTIME_HEADERS:$(VORTEX_HOME)/runtime/include/%=$(RUNTIME_INC_DST)/%) \
$(KERNEL_LIBS:kernel/%=$(KERNEL_LIB_DST)/%) \
$(RUNTIME_LIBS:runtime/%=$(RUNTIME_LIB_DST)/%)

132
README.md
View file

@ -1,22 +1,47 @@
[![Build Status](https://travis-ci.com/vortexgpgpu/vortex.svg?branch=master)](https://travis-ci.com/vortexgpgpu/vortex)
[![codecov](https://codecov.io/gh/vortexgpgpu/vortex/branch/master/graph/badge.svg)](https://codecov.io/gh/vortexgpgpu/vortex)
# Vortex GPGPU
# Vortex OpenGPU
Vortex is a full-stack open-source RISC-V GPGPU. Vortex supports multiple **backend drivers**, including our C++ simulator (simx), an RTL simulator, and physical Xilinx and Altera FPGAs-- all controlled by a single driver script. The chosen driver determines the corresponding code invoked to run Vortex. Generally, developers will prototype their intended design in simx, before completing going forward with an RTL implementation. Alternatively, you can get up and running by selecting a driver of your choice and running a demo program.
Vortex is a full-system RISCV-based GPGPU processor.
## Website
Vortex news can be found on its [website](https://vortex.cc.gatech.edu/)
## Citation
```
@inproceedings{10.1145/3466752.3480128,
author = {Tine, Blaise and Yalamarthy, Krishna Praveen and Elsabbagh, Fares and Hyesoon, Kim},
title = {Vortex: Extending the RISC-V ISA for GPGPU and 3D-Graphics},
year = {2021},
isbn = {9781450385572},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3466752.3480128},
doi = {10.1145/3466752.3480128},
abstract = {The importance of open-source hardware and software has been increasing. However, despite GPUs being one of the more popular accelerators across various applications, there is very little open-source GPU infrastructure in the public domain. We argue that one of the reasons for the lack of open-source infrastructure for GPUs is rooted in the complexity of their ISA and software stacks. In this work, we first propose an ISA extension to RISC-V that supports GPGPUs and graphics. The main goal of the ISA extension proposal is to minimize the ISA changes so that the corresponding changes to the open-source ecosystem are also minimal, which makes for a sustainable development ecosystem. To demonstrate the feasibility of the minimally extended RISC-V ISA, we implemented the complete software and hardware stacks of Vortex on FPGA. Vortex is a PCIe-based soft GPU that supports OpenCL and OpenGL. Vortex can be used in a variety of applications, including machine learning, graph analytics, and graphics rendering. Vortex can scale up to 32 cores on an Altera Stratix 10 FPGA, delivering a peak performance of 25.6 GFlops at 200 Mhz.},
booktitle = {MICRO-54: 54th Annual IEEE/ACM International Symposium on Microarchitecture},
pages = {754766},
numpages = {13},
keywords = {reconfigurable computing, memory systems., computer graphics},
location = {Virtual Event, Greece},
series = {MICRO '21}
}
```
## Specifications
- Support RISC-V RV32IMF ISA
- Performance:
- 1024 total threads running at 250 MHz
- 128 Gflops of compute bandwidth
- 16 GB/s of memory bandwidth
- Scalability: up to 64 cores with optional L2 and L3 caches
- Software: OpenCL 1.2 Support
- Supported FPGAs:
- Intel Arria 10
- Intel Stratix 10
- Support RISC-V RV32IMAF and RV64IMAFD
- Microarchitecture:
- configurable number of cores, warps, and threads.
- configurable number of ALU, FPU, LSU, and SFU units per core.
- configurable pipeline issue width.
- optional local memory, L1, L2, and L3 caches.
- Software:
- OpenCL 1.2 Support.
- Supported FPGAs:
- Altera Arria 10
- Altera Stratix 10
- Xilinx Alveo U50, U250, U280
- Xilinx Versal VCK5000
## Directory structure
@ -29,24 +54,81 @@ Vortex is a full-system RISCV-based GPGPU processor.
- `ci`: Continuous integration scripts.
- `miscs`: Miscellaneous resources.
## Build Instructions
## Quick Start
If you are interested in a stable release of Vortex, you can download the latest release [here](https://github.com/vortexgpgpu/vortex/releases/latest). Otherwise, you can pull the most recent, but (potentially) unstable version as shown below. The following steps demonstrate how to build and run Vortex with the default driver: SimX. If you are interested in a different backend, look [here](docs/simulation.md).
### Supported OS Platforms
- Ubuntu 18.04
- Ubuntu 18.04, 20.04, 22.04, 24.04
- Centos 7
### Toolchain Dependencies
The following dependencies will be fetched prebuilt by `toolchain_install.sh`.
- [POCL](http://portablecl.org/)
- [LLVM](https://llvm.org/)
- [RISCV-GNU-TOOLCHAIN](https://github.com/riscv-collab/riscv-gnu-toolchain)
- [Verilator](https://www.veripool.org/verilator)
### Install development tools
$ sudo apt-get install build-essential
$ sudo apt-get install git
- [cvfpu](https://github.com/openhwgroup/cvfpu.git)
- [SoftFloat](https://github.com/ucb-bar/berkeley-softfloat-3.git)
- [Ramulator](https://github.com/CMU-SAFARI/ramulator.git)
- [Yosys](https://github.com/YosysHQ/yosys)
- [Sv2v](https://github.com/zachjs/sv2v)
### Install Vortex codebase
$ git clone --recursive https://github.com/vortexgpgpu/vortex.git
$ cd Vortex
```sh
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
cd vortex
```
### Install system dependencies
```sh
# ensure dependent libraries are present
sudo ./ci/install_dependencies.sh
```
### Configure your build folder
```sh
mkdir build
cd build
# for 32bit
../configure --xlen=32 --tooldir=$HOME/tools
# for 64bit
../configure --xlen=64 --tooldir=$HOME/tools
```
### Install prebuilt toolchain
$ ./ci/toolchain_install.sh -all
### Build Vortex sources
$ make -s
```sh
./ci/toolchain_install.sh --all
```
### set environment variables
```sh
# should always run before using the toolchain!
source ./ci/toolchain_env.sh
```
### Building Vortex
```sh
make -s
```
### Quick demo running vecadd OpenCL kernel on 2 cores
$ ./ci/blackbox.sh --driver=rtlsim --cores=2 --app=vecadd
```sh
./ci/blackbox.sh --cores=2 --app=vecadd
```
### Common Developer Tips
- Installing Vortex kernel and runtime libraries to use with external tools requires passing --prefix=<install-path> to the configure script.
```sh
../configure --xlen=32 --tooldir=$HOME/tools --prefix=<install-path>
make -s
make install
```
- Building Vortex 64-bit requires setting --xlen=64 configure option.
```sh
../configure --xlen=64 --tooldir=$HOME/tools
```
- Sourcing "./ci/toolchain_env.sh" is required everytime you start a new terminal. we recommend adding "source <build-path>/ci/toolchain_env.sh" to your ~/.bashrc file to automate the process at login.
```sh
echo "source <build-path>/ci/toolchain_env.sh" >> ~/.bashrc
```
- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again without any options to get changes propagated to your build folder.
```sh
../configure
```
- To debug the GPU, the simulation can generate a runtime trace for analysis. See /docs/debugging.md for more information.
```sh
./ci/blackbox.sh --app=demo --debug=3
```
- For additional information, check out the [documentation](docs/index.md)

View file

@ -1,4 +0,0 @@
Release Notes!
* 07/01/2020 - LKG FPGA build - Passed basic, demo, vecadd kernels.

23
TODO
View file

@ -1,23 +0,0 @@
Functionality:
1) vx_cl_warpSpawn()
-> To be used by pocl->ops->run
2) newlib Integration (LoadFile(""))
-> To be used by the Rhinio benchmarks
3) POCL OPS Vortex Suite
Performance:
1) Icache doesn't need SEND_MEM_REQUEST Stage
-> Blocks are never dirty, so why not evict right away
2) Branch not taken speculation
3) Runtime -02 not running on RTL, and -03 not running on RTL and Emulator
Vector:
1) Cycle accurate simulator (would require Cache Simulator)

View file

@ -1,193 +1,205 @@
#!/bin/sh
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
SCRIPT_DIR=$(dirname "$0")
ROOT_DIR=$SCRIPT_DIR/..
show_usage()
{
echo "Vortex BlackBox Test Driver v1.0"
echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [--l3cache] [[--driver=rtlsim|vlsim|simx] [--debug] [--scope] [--perf] [--app=vecadd|sgemm|basic|demo|dogfood] [--args=<args>] [--help]]"
echo "Usage: $0 [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [--l3cache] [[--driver=#name] [--app=#app] [--args=#args] [--debug=#level] [--scope] [--perf=#class] [--rebuild=#n] [--log=logfile] [--help]]"
}
SCRIPT_DIR=$(dirname "$0")
VORTEX_HOME=$SCRIPT_DIR/..
show_help()
{
show_usage
echo " where"
echo "--driver: gpu, simx, rtlsim, oape, xrt"
echo "--app: any subfolder test under regression or opencl"
echo "--class: 0=disable, 1=pipeline, 2=memsys"
echo "--rebuild: 0=disable, 1=force, 2=auto, 3=temp"
}
DRIVER=vlsim
APP=sgemm
CLUSTERS=1
CORES=1
WARPS=4
THREADS=4
L2=0
L3=0
DEBUG=0
SCOPE=0
HAS_ARGS=0
DEBUG_LEVEL=1
for i in "$@"
do
case $i in
--driver=*)
DRIVER=${i#*=}
shift
;;
--app=*)
APP=${i#*=}
shift
;;
--clusters=*)
CLUSTERS=${i#*=}
shift
;;
--cores=*)
CORES=${i#*=}
shift
;;
--warps=*)
WARPS=${i#*=}
shift
;;
--threads=*)
THREADS=${i#*=}
shift
;;
--l2cache)
L2=1
shift
;;
--l3cache)
L3=1
shift
;;
--debug)
DEBUG=1
shift
;;
--scope)
SCOPE=1
CORES=1
shift
;;
--perf)
PERF_FLAG=-DPERF_ENABLE
shift
;;
--args=*)
ARGS=${i#*=}
HAS_ARGS=1
shift
;;
--help)
show_usage
exit 0
;;
*)
show_usage
exit -1
;;
esac
done
case $DRIVER in
rtlsim)
DRIVER_PATH=$VORTEX_HOME/driver/rtlsim
;;
vlsim)
DRIVER_PATH=$VORTEX_HOME/driver/vlsim
;;
asesim)
DRIVER_PATH=$VORTEX_HOME/driver/asesim
;;
fpga)
DRIVER_PATH=$VORTEX_HOME/driver/fpga
;;
simx)
DRIVER_PATH=$VORTEX_HOME/driver/simx
DEBUG_LEVEL=3
;;
*)
echo "invalid driver: $DRIVER"
exit -1
;;
esac
if [ -d "$VORTEX_HOME/tests/opencl/$APP" ];
then
APP_PATH=$VORTEX_HOME/tests/opencl/$APP
elif [ -d "$VORTEX_HOME/tests/regression/$APP" ];
then
APP_PATH=$VORTEX_HOME/tests/regression/$APP
else
echo "Application folder found: $APP"
exit -1
fi
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2 -DL3_ENABLE=$L3 $PERF_FLAG $CONFIGS"
echo "CONFIGS=$CONFIGS"
BLACKBOX_CACHE=blackbox.$DRIVER.cache
if [ -f "$BLACKBOX_CACHE" ]
then
LAST_CONFIGS=`cat $BLACKBOX_CACHE`
fi
if [ "$CONFIGS+$DEBUG+$SCOPE" != "$LAST_CONFIGS" ];
then
make -C $DRIVER_PATH clean
fi
echo "$CONFIGS+$DEBUG+$SCOPE" > $BLACKBOX_CACHE
status=0
if [ $DEBUG -eq 1 ]
then
if [ $SCOPE -eq 1 ]
then
echo "running: DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH"
DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH
add_option() {
if [ -n "$1" ]; then
echo "$1 $2"
else
echo "running: DEBUG=$DEBUG_LEVEL CONFIGS="$CONFIGS" make -C $DRIVER_PATH"
DEBUG=$DEBUG_LEVEL CONFIGS="$CONFIGS" make -C $DRIVER_PATH
fi
if [ $HAS_ARGS -eq 1 ]
then
echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER > run.log 2>&1"
OPTS=$ARGS make -C $APP_PATH run-$DRIVER > run.log 2>&1
status=$?
else
echo "running: make -C $APP_PATH run-$DRIVER > run.log 2>&1"
make -C $APP_PATH run-$DRIVER > run.log 2>&1
status=$?
echo "$2"
fi
if [ -f "$APP_PATH/trace.vcd" ]
then
}
DEFAULTS() {
DRIVER=simx
APP=sgemm
DEBUG=0
DEBUG_LEVEL=0
SCOPE=0
HAS_ARGS=0
PERF_CLASS=0
CONFIGS="$CONFIGS"
REBUILD=2
TEMPBUILD=0
LOGFILE=run.log
}
parse_args() {
DEFAULTS
for i in "$@"; do
case $i in
--driver=*) DRIVER=${i#*=} ;;
--app=*) APP=${i#*=} ;;
--clusters=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CLUSTERS=${i#*=}") ;;
--cores=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CORES=${i#*=}") ;;
--warps=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_WARPS=${i#*=}") ;;
--threads=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_THREADS=${i#*=}") ;;
--l2cache) CONFIGS=$(add_option "$CONFIGS" "-DL2_ENABLE") ;;
--l3cache) CONFIGS=$(add_option "$CONFIGS" "-DL3_ENABLE") ;;
--perf=*) CONFIGS=$(add_option "$CONFIGS" "-DPERF_ENABLE"); PERF_CLASS=${i#*=} ;;
--debug=*) DEBUG=1; DEBUG_LEVEL=${i#*=} ;;
--scope) SCOPE=1; ;;
--args=*) HAS_ARGS=1; ARGS=${i#*=} ;;
--rebuild=*) REBUILD=${i#*=} ;;
--log=*) LOGFILE=${i#*=} ;;
--help) show_help; exit 0 ;;
*) show_usage; exit 1 ;;
esac
done
if [ $REBUILD -eq 3 ];
then
REBUILD=1
TEMPBUILD=1
fi
}
set_driver_path() {
case $DRIVER in
gpu) DRIVER_PATH="" ;;
simx|rtlsim|opae|xrt) DRIVER_PATH="$ROOT_DIR/runtime/$DRIVER" ;;
*) echo "Invalid driver: $DRIVER"; exit 1 ;;
esac
}
set_app_path() {
if [ -d "$ROOT_DIR/tests/opencl/$APP" ]; then
APP_PATH="$ROOT_DIR/tests/opencl/$APP"
elif [ -d "$ROOT_DIR/tests/regression/$APP" ]; then
APP_PATH="$ROOT_DIR/tests/regression/$APP"
else
echo "Application folder not found: $APP"
exit 1
fi
}
build_driver() {
local cmd_opts=""
[ $DEBUG -ne 0 ] && cmd_opts=$(add_option "$cmd_opts" "DEBUG=$DEBUG_LEVEL")
[ $SCOPE -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "SCOPE=1")
[ $TEMPBUILD -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "DESTDIR=\"$TEMPDIR\"")
[ -n "$CONFIGS" ] && cmd_opts=$(add_option "$cmd_opts" "CONFIGS=\"$CONFIGS\"")
if [ -n "$cmd_opts" ]; then
echo "Running: $cmd_opts make -C $DRIVER_PATH > /dev/null"
eval "$cmd_opts make -C $DRIVER_PATH > /dev/null"
else
echo "Running: make -C $DRIVER_PATH > /dev/null"
make -C $DRIVER_PATH > /dev/null
fi
}
run_app() {
local cmd_opts=""
[ $DEBUG -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "DEBUG=1")
[ $TEMPBUILD -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "VORTEX_RT_PATH=\"$TEMPDIR\"")
[ $HAS_ARGS -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "OPTS=\"$ARGS\"")
if [ $DEBUG -ne 0 ]; then
if [ -n "$cmd_opts" ]; then
echo "Running: $cmd_opts make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
eval "$cmd_opts make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
else
echo "Running: make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
fi
else
if [ -n "$cmd_opts" ]; then
echo "Running: $cmd_opts make -C $APP_PATH run-$DRIVER"
eval "$cmd_opts make -C $APP_PATH run-$DRIVER"
else
echo "Running: make -C $APP_PATH run-$DRIVER"
make -C $APP_PATH run-$DRIVER
fi
fi
status=$?
return $status
}
main() {
parse_args "$@"
set_driver_path
set_app_path
# execute on default installed GPU
if [ "$DRIVER" = "gpu" ]; then
run_app
exit $?
fi
if [ -n "$CONFIGS" ]; then
echo "CONFIGS=$CONFIGS"
fi
if [ $REBUILD -ne 0 ]; then
BLACKBOX_CACHE=blackbox.$DRIVER.cache
LAST_CONFIGS=$(cat "$BLACKBOX_CACHE" 2>/dev/null || echo "")
if [ $REBUILD -eq 1 ] || [ "$CONFIGS+$DEBUG+$SCOPE" != "$LAST_CONFIGS" ]; then
make -C $DRIVER_PATH clean-driver > /dev/null
echo "$CONFIGS+$DEBUG+$SCOPE" > "$BLACKBOX_CACHE"
fi
fi
export VORTEX_PROFILING=$PERF_CLASS
make -C "$ROOT_DIR/hw" config > /dev/null
make -C "$ROOT_DIR/runtime/stub" > /dev/null
if [ $TEMPBUILD -eq 1 ]; then
# setup temp directory
TEMPDIR=$(mktemp -d)
mkdir -p "$TEMPDIR"
# build stub driver
echo "running: DESTDIR=$TEMPDIR make -C $ROOT_DIR/runtime/stub"
DESTDIR="$TEMPDIR" make -C $ROOT_DIR/runtime/stub > /dev/null
# register tempdir cleanup on exit
trap "rm -rf $TEMPDIR" EXIT
fi
build_driver
run_app
status=$?
if [ $DEBUG -eq 1 ] && [ -f "$APP_PATH/trace.vcd" ]; then
mv -f $APP_PATH/trace.vcd .
fi
else
echo "driver initialization..."
if [ $SCOPE -eq 1 ]
then
echo "running: SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH"
SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH
else
echo "running: CONFIGS="$CONFIGS" make -C $DRIVER_PATH"
CONFIGS="$CONFIGS" make -C $DRIVER_PATH
fi
echo "running application..."
if [ $HAS_ARGS -eq 1 ]
then
echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER"
OPTS=$ARGS make -C $APP_PATH run-$DRIVER
status=$?
else
echo "running: make -C $APP_PATH run-$DRIVER"
make -C $APP_PATH run-$DRIVER
status=$?
fi
fi
exit $status
if [ $SCOPE -eq 1 ] && [ -f "$APP_PATH/scope.vcd" ]; then
mv -f $APP_PATH/scope.vcd .
fi
exit $status
}
main "$@"

41
ci/datagen.py Executable file
View file

@ -0,0 +1,41 @@
#!/usr/bin/env python3
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import struct
import random
import sys
def create_binary_file(n, filename):
# Open the file in binary write mode
with open(filename, 'wb') as f:
# Write the integer N as 4 bytes
f.write(struct.pack('i', n))
# Generate and write N floating-point numbers
for _ in range(n):
# Generate a random float between 0 and 1
num = random.random()
# Write the float in IEEE 754 format (4 bytes)
f.write(struct.pack('f', num))
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: script.py N filename")
sys.exit(1)
n = int(sys.argv[1])
filename = sys.argv[2]
create_binary_file(n, filename)
print(f"Created binary file '{filename}' containing {n} floats.")

46
ci/install_dependencies.sh Executable file
View file

@ -0,0 +1,46 @@
#!/bin/sh
# Copyright 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
# Function to check if GCC version is less than 11
check_gcc_version() {
local gcc_version
gcc_version=$(gcc -dumpversion)
if dpkg --compare-versions "$gcc_version" lt 11; then
return 0 # GCC version is less than 11
else
return 1 # GCC version is 11 or greater
fi
}
# Update package list
apt-get update -y
# install system dependencies
apt-get install -y build-essential valgrind libstdc++6 binutils python3 uuid-dev ccache cmake libffi7
# Check and install GCC 11 if necessary
if check_gcc_version; then
echo "GCC version is less than 11. Installing GCC 11..."
add-apt-repository -y ppa:ubuntu-toolchain-r/test
apt-get update
apt-get install -y g++-11 gcc-11
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100
else
echo "GCC version is 11 or greater. No need to install GCC 11."
fi

View file

@ -1,73 +0,0 @@
#!/bin/bash
# exit when any command fails
set -e
OS_DIR=${OS_DIR:-'ubuntu/bionic'}
SRCDIR=${SRCDIR:-'/opt'}
DESTDIR=${DESTDIR:-'.'}
echo "OS_DIR=${OS_DIR}"
echo "SRCDIR=${SRCDIR}"
echo "DESTDIR=${DESTDIR}"
riscv()
{
echo "prebuilt riscv-gnu-toolchain..."
tar -C $SRCDIR -cvjf riscv-gnu-toolchain.tar.bz2 riscv-gnu-toolchain
split -b 50M riscv-gnu-toolchain.tar.bz2 "riscv-gnu-toolchain.tar.bz2.part"
mv riscv-gnu-toolchain.tar.bz2.part* $DESTDIR/riscv-gnu-toolchain/$OS_DIR
rm riscv-gnu-toolchain.tar.bz2
}
llvm()
{
echo "prebuilt llvm-riscv..."
tar -C $SRCDIR -cvjf llvm-vortex1.tar.bz2 llvm-riscv
split -b 50M llvm-vortex1.tar.bz2 "llvm-vortex1.tar.bz2.part"
mv llvm-vortex1.tar.bz2.part* $DESTDIR/llvm-vortex/$OS_DIR
rm llvm-vortex1.tar.bz2
}
pocl()
{
echo "prebuilt pocl..."
tar -C $SRCDIR -cvjf pocl1.tar.bz2 pocl
mv pocl1.tar.bz2 $DESTDIR/pocl/$OS_DIR
}
verilator()
{
echo "prebuilt verilator..."
tar -C $SRCDIR -cvjf verilator.tar.bz2 verilator
mv verilator.tar.bz2 $DESTDIR/verilator/$OS_DIR
}
usage()
{
echo "usage: prebuilt [[-riscv] [-llvm] [-pocl] [-verilator] [-all] [-h|--help]]"
}
while [ "$1" != "" ]; do
case $1 in
-pocl ) pocl
;;
-verilator ) verilator
;;
-riscv ) riscv
;;
-llvm ) llvm
;;
-all ) riscv
llvm
pocl
verilator
;;
-h | --help ) usage
exit
;;
* ) usage
exit 1
esac
shift
done

View file

@ -1,204 +0,0 @@
#!/bin/bash
# exit when any command fails
set -e
# ensure build
make -s
unittest()
{
make -C tests/unittest run
}
coverage()
{
echo "begin coverage tests..."
make -C tests/runtime run-rtlsim
make -C tests/riscv/isa run-rtlsim
make -C tests/regression run-vlsim
make -C tests/opencl run-vlsim
make -C tests/runtime run-simx
make -C tests/riscv/isa run-simx
make -C tests/regression run-simx
make -C tests/opencl run-simx
echo "coverage tests done!"
}
tex()
{
echo "begin texture tests..."
CONFIGS="-DEXT_TEX_ENABLE=1" ./ci/blackbox.sh --driver=vlsim --app=tex --args="-isoccer.png -osoccer_result.png -g0"
CONFIGS="-DEXT_TEX_ENABLE=1" ./ci/blackbox.sh --driver=simx --app=tex --args="-isoccer.png -osoccer_result.png -g0"
CONFIGS="-DEXT_TEX_ENABLE=1" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-itoad.png -otoad_result.png -g1"
CONFIGS="-DEXT_TEX_ENABLE=1" ./ci/blackbox.sh --driver=simx --app=tex --args="-irainbow.png -orainbow_result.png -g2"
CONFIGS="-DEXT_TEX_ENABLE=1" ./ci/blackbox.sh --driver=rtlsim --app=tex --args="-itoad.png -otoad_result.png -g1" --perf
CONFIGS="-DEXT_TEX_ENABLE=1" ./ci/blackbox.sh --driver=simx --app=tex --args="-itoad.png -otoad_result.png -g1" --perf
echo "coverage texture done!"
}
cluster()
{
echo "begin clustering tests..."
# warp/threads configurations
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=8 --app=demo
./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=8 --threads=2 --app=demo
./ci/blackbox.sh --driver=simx --cores=1 --warps=8 --threads=16 --app=demo
# cores clustering
./ci/blackbox.sh --driver=rtlsim --cores=1 --clusters=1 --app=demo --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=4 --clusters=1 --app=demo --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=demo --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=1 --app=demo --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --app=demo --args="-n1"
# L2/L3
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=demo --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=demo --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=2 --l2cache --app=demo --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --clusters=4 --l2cache --l3cache --app=demo --args="-n1"
echo "clustering tests done!"
}
debug()
{
echo "begin debugging tests..."
./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --perf --app=demo --args="-n1"
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --perf --app=demo --args="-n1"
./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --debug --app=demo --args="-n1"
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug --app=demo --args="-n1"
./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=basic --args="-t0 -n1"
echo "debugging tests done!"
}
config()
{
echo "begin configuration tests..."
# disabling M extension
CONFIGS=-DEXT_M_DISABLE ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_mf_ext
# disabling F extension
CONFIGS=-DEXT_F_DISABLE ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_mf_ext
CONFIGS=-DEXT_F_DISABLE ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_mf_ext --perf
CONFIGS=-DEXT_F_DISABLE ./ci/blackbox.sh --driver=simx --cores=1 --app=no_mf_ext --perf
# disable shared memory
CONFIGS=-DSM_ENABLE=0 ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_smem
CONFIGS=-DSM_ENABLE=0 ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=no_smem --perf
CONFIGS=-DSM_ENABLE=0 ./ci/blackbox.sh --driver=simx --cores=1 --app=no_smem --perf
# using Default FPU core
FPU_CORE=FPU_DEFAULT ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# using FPNEW FPU core
FPU_CORE=FPU_FPNEW ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# using AXI bus
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
# adjust l1 block size to match l2
CONFIGS="-DL1_BLOCK_SIZE=64" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr --args="-n1"
# test cache banking
CONFIGS="-DDNUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=simx --cores=1 --app=io_addr
# test cache multi-porting
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo --debug --args="-n1"
CONFIGS="-DL2_NUM_PORTS=2 -DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr
CONFIGS="-DL2_NUM_PORTS=4 -DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=io_addr
CONFIGS="-DL2_NUM_PORTS=4 -DDNUM_PORTS=4" ./ci/blackbox.sh --driver=simx --cores=4 --l2cache --app=io_addr
# test 128-bit MEM block
CONFIGS=-DMEM_BLOCK_SIZE=16 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
# test single-bank DRAM
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
# test 27-bit DRAM address
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
echo "configuration tests done!"
}
stress0()
{
echo "begin stress0 tests..."
# test verilator reset values
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=sgemm
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=sgemm
FPU_CORE=FPU_DEFAULT CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
FPU_CORE=FPU_DEFAULT CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --app=printf
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --app=printf
echo "stress0 tests done!"
}
stress1()
{
echo "begin stress1 tests..."
./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --clusters=2 --l3cache --app=sgemm --args="-n256"
echo "stress1 tests done!"
}
usage()
{
echo "usage: regression [-unittest] [-coverage] [-tex] [-cluster] [-debug] [-config] [-stress[#n]] [-all] [-h|--help]"
}
while [ "$1" != "" ]; do
case $1 in
-unittest ) unittest
;;
-coverage ) coverage
;;
-tex ) tex
;;
-cluster ) cluster
;;
-debug ) debug
;;
-config ) config
;;
-stress0 ) stress0
;;
-stress1 ) stress1
;;
-stress ) stress0
stress1
;;
-all ) unittest
coverage
tex
cluster
debug
config
stress0
stress1
;;
-h | --help ) usage
exit
;;
* ) usage
exit 1
esac
shift
done

505
ci/regression.sh.in Executable file
View file

@ -0,0 +1,505 @@
#!/bin/bash
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# exit when any command fails
set -e
# clear blackbox cache
rm -f blackbox.*.cache
# HW: add a test "VM Test" to make sure VM feature is enabled
XLEN=${XLEN:=@XLEN@}
XSIZE=$((XLEN / 8))
echo "Vortex Regression Test: XLEN=$XLEN"
unittest()
{
make -C tests/unittest run
make -C hw/unittest > /dev/null
}
isa()
{
echo "begin isa tests..."
make -C sim/simx
make -C sim/rtlsim
make -C tests/riscv/isa run-simx
make -C tests/riscv/isa run-rtlsim
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
if [ "$XLEN" == "64" ]
then
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64fx
fi
# clean build
make -C sim/rtlsim clean
echo "isa tests done!"
}
kernel()
{
echo "begin kernel tests..."
make -C sim/simx
make -C sim/rtlsim
make -C tests/kernel run-simx
make -C tests/kernel run-rtlsim
echo "kernel tests done!"
}
regression()
{
echo "begin regression tests..."
make -C runtime/simx
make -C runtime/rtlsim
make -C tests/regression run-simx
make -C tests/regression run-rtlsim
# test global barrier
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tgbar" --cores=2
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tgbar" --cores=2
# test local barrier
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tbar"
./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tbar"
# test temp driver mode for
./ci/blackbox.sh --driver=simx --app=vecadd --rebuild=3
# test for matmul
CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1"
echo "regression tests done!"
}
opencl()
{
echo "begin opencl tests..."
make -C runtime/simx
make -C runtime/rtlsim
make -C tests/opencl run-simx
make -C tests/opencl run-rtlsim
./ci/blackbox.sh --driver=simx --app=lbm --warps=8
./ci/blackbox.sh --driver=rtlsim --app=lbm --warps=8
echo "opencl tests done!"
}
vm(){
echo "begin vm tests..."
make -C sim/simx clean && CONFIGS="-DVM_ENABLE" make -C sim/simx
make -C runtime/simx clean && CONFIGS="-DVM_ENABLE" make -C runtime/simx
make -C tests/opencl run-simx
make -C tests/regression run-simx
make -C sim/simx clean && CONFIGS="-DVM_ENABLE -DVM_ADDR_MODE=BARE" make -C sim/simx
make -C runtime/simx clean && CONFIGS="-DVM_ENABLE -DVM_ADDR_MODE=BARE" make -C runtime/simx
make -C tests/opencl run-simx
make -C tests/regression run-simx
echo "vm tests done!"
}
cache()
{
echo "begin cache tests..."
# disable local memory
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo --perf=1
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=demo --perf=1
# disable L1 cache
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
# reduce l1 line size
CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DDISABLE_L1" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=io_addr
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test cache ways
CONFIGS="-DICACHE_NUM_WAYS=1 -DDCACHE_NUM_WAYS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test cache banking
CONFIGS="-DMEM_BLOCK_SIZE=8 -DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DMEM_BLOCK_SIZE=8 -DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=simx --app=sgemmx
CONFIGS="-DMEM_BLOCK_SIZE=8 -DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8 -DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=4" ./ci/blackbox.sh --driver=simx --app=sgemmx --threads=8
# replacement policy
CONFIGS="-DDCACHE_REPL_POLICY=0" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_REPL_POLICY=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_REPL_POLICY=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
# test writeback
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=0 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=simx --app=mstress
CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
# cache clustering
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=4 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=4 --warps=1 --threads=2
# L2/L3
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=diverge --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=4 --l2cache --app=diverge --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
echo "begin cache tests..."
}
config1()
{
echo "begin configuration-1 tests..."
# warp/threads
./ci/blackbox.sh --driver=rtlsim --warps=1 --threads=1 --app=diverge
./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=2 --app=diverge
./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=8 --app=diverge
./ci/blackbox.sh --driver=rtlsim --warps=8 --threads=2 --app=diverge
./ci/blackbox.sh --driver=simx --warps=1 --threads=1 --app=diverge
./ci/blackbox.sh --driver=simx --warps=8 --threads=16 --app=diverge
# cores clustering
./ci/blackbox.sh --driver=rtlsim --cores=4 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
# issue width
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
CONFIGS="-DISSUE_WIDTH=4" ./ci/blackbox.sh --driver=rtlsim --app=diverge
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=simx --app=diverge
CONFIGS="-DISSUE_WIDTH=4" ./ci/blackbox.sh --driver=simx --app=diverge
# ALU scaling
CONFIGS="-DISSUE_WIDTH=2 -DNUM_ALU_BLOCK=1 -DNUM_ALU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
CONFIGS="-DISSUE_WIDTH=4 -DNUM_ALU_BLOCK=4 -DNUM_ALU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=diverge
CONFIGS="-DISSUE_WIDTH=2 -DNUM_ALU_BLOCK=1 -DNUM_ALU_LANES=2" ./ci/blackbox.sh --driver=simx --app=diverge
CONFIGS="-DISSUE_WIDTH=4 -DNUM_ALU_BLOCK=4 -DNUM_ALU_LANES=4" ./ci/blackbox.sh --driver=simx --app=diverge
# FPU scaling
CONFIGS="-DISSUE_WIDTH=2 -DNUM_FPU_BLOCK=1 -DNUM_FPU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
CONFIGS="-DISSUE_WIDTH=4 -DNUM_FPU_BLOCK=4 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
CONFIGS="-DISSUE_WIDTH=2 -DNUM_FPU_BLOCK=1 -DNUM_FPU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
CONFIGS="-DISSUE_WIDTH=4 -DNUM_FPU_BLOCK=4 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
# FPU's PE scaling
CONFIGS="-DFMA_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfmadd"
CONFIGS="-DFCVT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tftoi"
CONFIGS="-DFDIV_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfdiv"
CONFIGS="-DFSQRT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfsqrt"
CONFIGS="-DFNCP_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfclamp"
# LSU scaling
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
echo "configuration-1 tests done!"
}
config2()
{
echo "begin configuration-2 tests..."
# test opaesim
./ci/blackbox.sh --driver=opae --app=printf
./ci/blackbox.sh --driver=opae --app=diverge
./ci/blackbox.sh --driver=xrt --app=diverge
# disable DPI
if [ "$XLEN" == "64" ]; then
# need to disable trig on 64-bit due to a bug inside fpnew's sqrt core.
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-xtrig -xbar -xgbar"
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-xtrig -xbar -xgbar"
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-xtrig -xbar -xgbar"
else
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood
fi
# custom program startup address
make -C tests/regression/dogfood clean-kernel
STARTUP_ADDR=0x80000000 make -C tests/regression/dogfood
./ci/blackbox.sh --driver=simx --app=dogfood
./ci/blackbox.sh --driver=rtlsim --app=dogfood
make -C tests/regression/dogfood clean-kernel
# disabling M & F extensions
make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32i
make -C sim/rtlsim clean
# disabling ZICOND extension
CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo
# test 128-bit memory block
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=xrt --app=mstress
# test XLEN-bit memory block
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=mstress
# test memory coalescing
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
# test single-bank memory
if [ "$XLEN" == "64" ]; then
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=xrt --app=mstress
else
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=xrt --app=mstress
fi
# test larger memory address
if [ "$XLEN" == "64" ]; then
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=49" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=49" ./ci/blackbox.sh --driver=xrt --app=mstress
else
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=xrt --app=mstress
fi
# test memory banks interleaving
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=1" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress
# test memory ports
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=opae --app=mstress --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=xrt --app=mstress --threads=8
echo "configuration-2 tests done!"
}
test_csv_trace()
{
# test CSV trace generation
make -C sim/simx clean && DEBUG=3 make -C sim/simx > /dev/null
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-simx-32im > run_simx.log
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
diff trace_rtlsim.csv trace_simx.csv
# clean build
make -C sim/simx clean
make -C sim/rtlsim clean
}
debug()
{
echo "begin debugging tests..."
test_csv_trace
CONFIGS="-O0" ./ci/blackbox.sh --driver=opae --app=demo --args="-n1"
CONFIGS="-O0" ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=xrt --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
echo "debugging tests done!"
}
scope()
{
echo "begin scope tests..."
SCOPE_DEPTH=128 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope
SCOPE_DEPTH=128 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope
echo "debugging scope done!"
}
stress()
{
echo "begin stress tests..."
# test verilator reset values
CONFIGS="-DVERILATOR_RESET_VALUE=1 -DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=xrt --app=sgemmx --args="-n128" --l2cache
echo "stress tests done!"
}
synthesis()
{
echo "begin synthesis tests..."
PREFIX=build_base make -C hw/syn/yosys clean
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE -DNUM_WARPS=2 -DNUM_THREADS=2" make -C hw/syn/yosys synthesis
echo "synthesis tests done!"
}
vector()
{
echo "begin vector tests..."
make -C sim/simx clean && CONFIGS="-DEXT_V_ENABLE" make -C sim/simx
TOOLDIR=@TOOLDIR@ XLEN=@XLEN@ VLEN=256 REG_TESTS=1 ./tests/riscv/riscv-vector-tests/run-test.sh
echo "vector tests done!"
}
show_usage()
{
echo "Vortex Regression Test"
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--vector] [--all] [--h|--help]"
}
declare -a tests=()
clean=0
while [ "$1" != "" ]; do
case $1 in
--clean )
clean=1
;;
--unittest )
tests+=("unittest")
;;
--isa )
tests+=("isa")
;;
--kernel )
tests+=("kernel")
;;
--regression )
tests+=("regression")
;;
--opencl )
tests+=("opencl")
;;
--cache )
tests+=("cache")
;;
--vm )
tests+=("vm")
;;
--config1 )
tests+=("config1")
;;
--config2 )
tests+=("config2")
;;
--debug )
tests+=("debug")
;;
--scope )
tests+=("scope")
;;
--stress )
tests+=("stress")
;;
--synthesis )
tests+=("synthesis")
;;
--vector )
tests+=("vector")
;;
--all )
tests=()
tests+=("unittest")
tests+=("isa")
tests+=("kernel")
tests+=("regression")
tests+=("opencl")
tests+=("cache")
tests+=("vm")
tests+=("config1")
tests+=("config2")
tests+=("debug")
tests+=("scope")
tests+=("stress")
tests+=("synthesis")
tests+=("vector")
;;
-h | --help )
show_usage
exit
;;
* )
show_usage
exit 1
esac
shift
done
if [ $clean -eq 1 ];
then
make clean
make -s
fi
start=$SECONDS
for test in "${tests[@]}"; do
$test
done
echo "Regression completed!"
duration=$(( SECONDS - start ))
awk -v t=$duration 'BEGIN{t=int(t*1000); printf "Elapsed Time: %d:%02d:%02d\n", t/3600000, t/60000%60, t/1000%60}'

View file

@ -1,38 +0,0 @@
#!/bin/bash
# exit when any command fails
set -e
# ensure build
make -s
coverage()
{
echo "begin coverage tests..."
make -C sim/simx clean
XLEN=64 make -C sim/simx
XLEN=64 make -C tests/riscv/isa run-simx
echo "coverage tests done!"
}
usage()
{
echo "usage: regression [-coverage] [-all] [-h|--help]"
}
while [ "$1" != "" ]; do
case $1 in
-coverage ) coverage
;;
-all ) coverage
;;
-h | --help ) usage
exit
;;
* ) usage
exit 1
esac
shift
done

View file

@ -1,30 +0,0 @@
#!/bin/bash
# exit when any command fails
set -e
# ensure build
make -s
# clear POCL cache
rm -rf ~/.cache/pocl
# rebuild runtime
make -C runtime clean
make -C runtime
# rebuild drivers
make -C driver clean
make -C driver
# rebuild runtime tests
make -C tests/runtime clean
make -C tests/runtime
# rebuild regression tests
make -C tests/regression clean-all
make -C tests/regression
# rebuild opencl tests
make -C tests/opencl clean-all
make -C tests/opencl

24
ci/toolchain_env.sh.in Executable file
View file

@ -0,0 +1,24 @@
#!/bin/sh
# Copyright 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
TOOLDIR=${TOOLDIR:=@TOOLDIR@}
export PATH=$TOOLDIR/verilator/bin:$PATH
export SV2V_PATH=$TOOLDIR/sv2v
export PATH=$SV2V_PATH/bin:$PATH
export YOSYS_PATH=$TOOLDIR/yosys
export PATH=$YOSYS_PATH/bin:$PATH

View file

@ -1,97 +0,0 @@
#!/bin/bash
# exit when any command fails
set -e
REPOSITORY=https://github.com/vortexgpgpu/vortex-toolchain-prebuilt/raw/master
DESTDIR="${DESTDIR:=/opt}"
riscv()
{
for x in {a..j}
do
wget $REPOSITORY/riscv-gnu-toolchain/ubuntu/bionic/riscv-gnu-toolchain.tar.bz2.parta$x
done
cat riscv-gnu-toolchain.tar.bz2.parta* > riscv-gnu-toolchain.tar.bz2
tar -xvf riscv-gnu-toolchain.tar.bz2
rm -f riscv-gnu-toolchain.tar.bz2*
cp -r riscv-gnu-toolchain $DESTDIR
rm -rf riscv-gnu-toolchain
}
riscv64()
{
for x in {a..j}
do
wget $REPOSITORY/riscv64-gnu-toolchain/ubuntu/bionic/riscv64-gnu-toolchain.tar.bz2.parta$x
done
cat riscv64-gnu-toolchain.tar.bz2.parta* > riscv64-gnu-toolchain.tar.bz2
tar -xvf riscv64-gnu-toolchain.tar.bz2
rm -f riscv64-gnu-toolchain.tar.bz2*
cp -r riscv64-gnu-toolchain $DESTDIR
rm -rf riscv64-gnu-toolchain
}
llvm()
{
for x in {a..b}
do
wget $REPOSITORY/llvm-vortex/ubuntu/bionic/llvm-vortex1.tar.bz2.parta$x
done
cat llvm-vortex1.tar.bz2.parta* > llvm-vortex1.tar.bz2
tar -xvf llvm-vortex1.tar.bz2
rm -f llvm-vortex1.tar.bz2*
cp -r llvm-riscv $DESTDIR
rm -rf llvm-riscv
}
pocl()
{
wget $REPOSITORY/pocl/ubuntu/bionic/pocl1.tar.bz2
tar -xvf pocl1.tar.bz2
rm -f pocl1.tar.bz2
cp -r pocl $DESTDIR
rm -rf pocl
}
verilator()
{
wget $REPOSITORY/verilator/ubuntu/bionic/verilator.tar.bz2
tar -xvf verilator.tar.bz2
rm -f verilator.tar.bz2
cp -r verilator $DESTDIR
rm -rf verilator
}
usage()
{
echo "usage: toolchain_install [[-riscv] [-riscv64] [-llvm] [-pocl] [-verilator] [-all] [-h|--help]]"
}
while [ "$1" != "" ]; do
case $1 in
-pocl ) pocl
;;
-verilator ) verilator
;;
-riscv ) riscv
;;
-riscv64 ) riscv64
;;
-llvm ) llvm
;;
-all ) riscv
riscv64
llvm
pocl
verilator
;;
-h | --help ) usage
exit
;;
* ) usage
exit 1
esac
shift
done

199
ci/toolchain_install.sh.in Executable file
View file

@ -0,0 +1,199 @@
#!/bin/bash
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# exit when any command fails
set -e
REPOSITORY=https://github.com/vortexgpgpu/vortex-toolchain-prebuilt/raw/master
TOOLDIR=${TOOLDIR:=@TOOLDIR@}
OSVERSION=${OSVERSION:=@OSVERSION@}
riscv32()
{
case $OSVERSION in
"centos/7") parts=$(eval echo {a..l}) ;;
"ubuntu/bionic") parts=$(eval echo {a..j}) ;;
*) parts=$(eval echo {a..k}) ;;
esac
rm -f riscv32-gnu-toolchain.tar.bz2.parta*
for x in $parts
do
wget $REPOSITORY/riscv32-gnu-toolchain/$OSVERSION/riscv32-gnu-toolchain.tar.bz2.parta$x
done
cat riscv32-gnu-toolchain.tar.bz2.parta* > riscv32-gnu-toolchain.tar.bz2
tar -xvf riscv32-gnu-toolchain.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/riscv32-gnu-toolchain && mv riscv32-gnu-toolchain $TOOLDIR
rm -rf riscv32-gnu-toolchain.tar.bz2*
}
riscv64()
{
case $OSVERSION in
"centos/7") parts=$(eval echo {a..l}) ;;
*) parts=$(eval echo {a..j}) ;;
esac
rm -f riscv64-gnu-toolchain.tar.bz2.parta*
for x in $parts
do
wget $REPOSITORY/riscv64-gnu-toolchain/$OSVERSION/riscv64-gnu-toolchain.tar.bz2.parta$x
done
cat riscv64-gnu-toolchain.tar.bz2.parta* > riscv64-gnu-toolchain.tar.bz2
tar -xvf riscv64-gnu-toolchain.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/riscv64-gnu-toolchain && mv riscv64-gnu-toolchain $TOOLDIR
rm -rf riscv64-gnu-toolchain riscv64-gnu-toolchain.tar.bz2*
}
llvm()
{
case $OSVERSION in
"centos/7") parts=$(eval echo {a..b}) ;;
*) parts=$(eval echo {a..b}) ;;
esac
echo $parts
rm -f llvm-vortex2.tar.bz2.parta*
for x in $parts
do
wget $REPOSITORY/llvm-vortex/$OSVERSION/llvm-vortex2.tar.bz2.parta$x
done
cat llvm-vortex2.tar.bz2.parta* > llvm-vortex2.tar.bz2
tar -xvf llvm-vortex2.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/llvm-vortex && mv llvm-vortex $TOOLDIR
rm -rf llvm-vortex llvm-vortex2.tar.bz2*
}
libcrt32()
{
wget $REPOSITORY/libcrt32/libcrt32.tar.bz2
tar -xvf libcrt32.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/libcrt32 && mv libcrt32 $TOOLDIR
rm -rf libcrt32 libcrt32.tar.bz2
}
libcrt64()
{
wget $REPOSITORY/libcrt64/libcrt64.tar.bz2
tar -xvf libcrt64.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/libcrt64 && mv libcrt64 $TOOLDIR
rm -rf libcrt64 libcrt64.tar.bz2
}
libc32()
{
wget $REPOSITORY/libc32/libc32.tar.bz2
tar -xvf libc32.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/libc32 && mv libc32 $TOOLDIR
rm -rf libc32 libc32.tar.bz2
}
libc64()
{
wget $REPOSITORY/libc64/libc64.tar.bz2
tar -xvf libc64.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/libc64 && mv libc64 $TOOLDIR
rm -rf libc64 libc64.tar.bz2
}
pocl()
{
wget $REPOSITORY/pocl/$OSVERSION/pocl2.tar.bz2
tar -xvf pocl2.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/pocl && mv pocl $TOOLDIR
rm -rf pocl2 pocl2.tar.bz2
}
verilator()
{
wget $REPOSITORY/verilator/$OSVERSION/verilator.tar.bz2
tar -xvf verilator.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/verilator && mv verilator $TOOLDIR
rm -rf verilator verilator.tar.bz2
}
sv2v()
{
wget $REPOSITORY/sv2v/$OSVERSION/sv2v.tar.bz2
tar -xvf sv2v.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/sv2v && mv sv2v $TOOLDIR
rm -rf sv2v sv2v.tar.bz2
}
yosys()
{
case $OSVERSION in
"centos/7") parts=$(eval echo {a..c}) ;;
*) parts=$(eval echo {a..c}) ;;
esac
echo $parts
rm -f yosys.tar.bz2.parta*
for x in $parts
do
wget $REPOSITORY/yosys/$OSVERSION/yosys.tar.bz2.parta$x
done
cat yosys.tar.bz2.parta* > yosys.tar.bz2
tar -xvf yosys.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/yosys && mv yosys $TOOLDIR
rm -rf yosys yosys.tar.bz2* yosys
}
show_usage()
{
echo "Install Pre-built Vortex Toolchain"
echo "Usage: $0 [--pocl] [--verilator] [--riscv32] [--riscv64] [--llvm] [--libcrt32] [--libcrt64] [--libc32] [--libc64] [--sv2v] [--yosys] [--all] [-h|--help]"
}
while [ "$1" != "" ]; do
case $1 in
--pocl ) pocl
;;
--verilator ) verilator
;;
--riscv32 ) riscv32
;;
--riscv64 ) riscv64
;;
--llvm ) llvm
;;
--libcrt32 ) libcrt32
;;
--libcrt64 ) libcrt64
;;
--libc32 ) libc32
;;
--libc64 ) libc64
;;
--sv2v ) sv2v
;;
--yosys ) yosys
;;
--all ) pocl
verilator
llvm
libcrt32
libcrt64
libc32
libc64
riscv32
riscv64
sv2v
yosys
;;
-h | --help ) show_usage
exit
;;
* ) show_usage
exit 1
esac
shift
done

167
ci/toolchain_prebuilt.sh.in Executable file
View file

@ -0,0 +1,167 @@
#!/bin/bash
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# exit when any command fails
set -e
TOOLDIR=${TOOLDIR:=@TOOLDIR@}
OSVERSION=${OSVERSION:=@OSVERSION@}
riscv32()
{
echo "prebuilt riscv32-gnu-toolchain..."
tar -C $TOOLDIR -cvjf riscv32-gnu-toolchain.tar.bz2 riscv32-gnu-toolchain
split -b 50M riscv32-gnu-toolchain.tar.bz2 "riscv32-gnu-toolchain.tar.bz2.part"
mkdir -p ./riscv32-gnu-toolchain/$OSVERSION
mv riscv32-gnu-toolchain.tar.bz2.part* ./riscv32-gnu-toolchain/$OSVERSION
rm riscv32-gnu-toolchain.tar.bz2
}
riscv64()
{
echo "prebuilt riscv64-gnu-toolchain..."
tar -C $TOOLDIR -cvjf riscv64-gnu-toolchain.tar.bz2 riscv64-gnu-toolchain
split -b 50M riscv64-gnu-toolchain.tar.bz2 "riscv64-gnu-toolchain.tar.bz2.part"
mkdir -p ./riscv64-gnu-toolchain/$OSVERSION
mv riscv64-gnu-toolchain.tar.bz2.part* ./riscv64-gnu-toolchain/$OSVERSION
rm riscv64-gnu-toolchain.tar.bz2
}
llvm()
{
echo "prebuilt llvm-vortex2..."
tar -C $TOOLDIR -cvjf llvm-vortex2.tar.bz2 llvm-vortex
split -b 50M llvm-vortex2.tar.bz2 "llvm-vortex2.tar.bz2.part"
mkdir -p ./llvm-vortex/$OSVERSION
mv llvm-vortex2.tar.bz2.part* ./llvm-vortex/$OSVERSION
rm llvm-vortex2.tar.bz2
}
libcrt32()
{
echo "prebuilt libcrt32..."
tar -C $TOOLDIR -cvjf libcrt32.tar.bz2 libcrt32
mkdir -p ./libcrt32
mv libcrt32.tar.bz2 ./libcrt32
}
libcrt64()
{
echo "prebuilt libcrt64..."
tar -C $TOOLDIR -cvjf libcrt64.tar.bz2 libcrt64
mkdir -p ./libcrt64
mv libcrt64.tar.bz2 ./libcrt64
}
libc32()
{
echo "prebuilt libc32..."
tar -C $TOOLDIR -cvjf libc32.tar.bz2 libc32
mkdir -p ./libc32
mv libc32.tar.bz2 ./libc32
}
libc64()
{
echo "prebuilt libc64..."
tar -C $TOOLDIR -cvjf libc64.tar.bz2 libc64
mkdir -p ./libc64
mv libc64.tar.bz2 ./libc64
}
pocl()
{
echo "prebuilt pocl..."
tar -C $TOOLDIR -cvjf pocl2.tar.bz2 pocl
mkdir -p ./pocl/$OSVERSION
mv pocl2.tar.bz2 ./pocl/$OSVERSION
}
verilator()
{
echo "prebuilt verilator..."
tar -C $TOOLDIR -cvjf verilator.tar.bz2 verilator
mkdir -p ./verilator/$OSVERSION
mv verilator.tar.bz2 ./verilator/$OSVERSION
}
sv2v()
{
echo "prebuilt sv2v..."
tar -C $TOOLDIR -cvjf sv2v.tar.bz2 sv2v
mkdir -p ./sv2v/$OSVERSION
mv sv2v.tar.bz2 ./sv2v/$OSVERSION
}
yosys()
{
echo "prebuilt yosys..."
tar -C $TOOLDIR -cvjf yosys.tar.bz2 yosys
split -b 50M yosys.tar.bz2 "yosys.tar.bz2.part"
mkdir -p ./yosys/$OSVERSION
mv yosys.tar.bz2.part* ./yosys/$OSVERSION
rm yosys.tar.bz2
}
show_usage()
{
echo "Setup Pre-built Vortex Toolchain"
echo "Usage: $0 [--pocl] [--verilator] [--riscv32] [--riscv64] [--llvm] [--libcrt32] [--libcrt64] [--libc32] [--libc64] [--sv2v] [-yosys] [--all] [-h|--help]"
}
while [ "$1" != "" ]; do
case $1 in
--pocl ) pocl
;;
--verilator ) verilator
;;
--riscv32 ) riscv32
;;
--riscv64 ) riscv64
;;
--llvm ) llvm
;;
--libcrt32 ) libcrt32
;;
--libcrt64 ) libcrt64
;;
--libc32 ) libc32
;;
--libc64 ) libc64
;;
--sv2v ) sv2v
;;
--yosys ) yosys
;;
--all ) pocl
verilator
riscv32
riscv64
llvm
libcrt32
libcrt64
libc32
libc64
sv2v
yosys
;;
-h | --help ) show_usage
exit
;;
* ) show_usage
exit 1
esac
shift
done

291
ci/trace_csv.py Executable file
View file

@ -0,0 +1,291 @@
#!/usr/bin/env python3
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import argparse
import csv
import re
import inspect
configs = None
def parse_args():
parser = argparse.ArgumentParser(description='CPU trace log to CSV format converter.')
parser.add_argument('-t', '--type', default='simx', help='log type (rtlsim or simx)')
parser.add_argument('-o', '--csv', default='trace.csv', help='Output CSV file')
parser.add_argument('log', help='Input log file')
return parser.parse_args()
def load_config(filename):
config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=0x([0-9a-fA-F]+), num_barriers=(\d+)"
with open(filename, 'r') as file:
for line in file:
config_match = re.search(config_pattern, line)
if config_match:
config = {
'num_threads': int(config_match.group(1)),
'num_warps': int(config_match.group(2)),
'num_cores': int(config_match.group(3)),
'num_clusters': int(config_match.group(4)),
'socket_size': int(config_match.group(5)),
'local_mem_base': int(config_match.group(6), 16),
'num_barriers': int(config_match.group(7)),
}
return config
print("Error: missing CONFIGS: header")
sys.exit(1)
def parse_simx(log_lines):
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
instr_pattern = r"Instr (0x[0-9a-fA-F]+):"
opcode_pattern = r"Instr 0x[0-9a-fA-F]+: ([0-9a-zA-Z_\.]+)"
core_id_pattern = r"cid=(\d+)"
warp_id_pattern = r"wid=(\d+)"
tmask_pattern = r"tmask=(\d+)"
operands_pattern = r"Src\d+ Reg: (.+)"
destination_pattern = r"Dest Reg: (.+)"
uuid_pattern = r"#(\d+)"
entries = []
instr_data = None
for lineno, line in enumerate(log_lines, start=1):
try:
if line.startswith("DEBUG Fetch:"):
if instr_data:
entries.append(instr_data)
instr_data = {}
instr_data["lineno"] = lineno
instr_data["PC"] = re.search(pc_pattern, line).group(1)
instr_data["core_id"] = int(re.search(core_id_pattern, line).group(1))
instr_data["warp_id"] = int(re.search(warp_id_pattern, line).group(1))
instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
instr_data["uuid"] = int(re.search(uuid_pattern, line).group(1))
elif line.startswith("DEBUG Instr"):
instr_data["instr"] = re.search(instr_pattern, line).group(1)
instr_data["opcode"] = re.search(opcode_pattern, line).group(1)
elif line.startswith("DEBUG Src"):
src_reg = re.search(operands_pattern, line).group(1)
instr_data["operands"] = (instr_data["operands"] + ', ' + src_reg) if 'operands' in instr_data else src_reg
elif line.startswith("DEBUG Dest"):
instr_data["destination"] = re.search(destination_pattern, line).group(1)
except Exception as e:
print("Error at line {}: {}".format(lineno, e))
instr_data = None
if instr_data:
entries.append(instr_data)
return entries
def reverse_binary(bin_str):
return bin_str[::-1]
def bin_to_array(bin_str):
return [int(bit) for bit in bin_str]
def append_reg(text, value, sep):
if sep:
text += ", "
ivalue = int(value)
if (ivalue >= 32):
text += "f" + str(ivalue % 32)
else:
text += "x" + value
sep = True
return text, sep
def append_value(text, reg, value, tmask_arr, sep):
text, sep = append_reg(text, reg, sep)
text += "={"
for i in range(len(tmask_arr)):
if i != 0:
text += ", "
if tmask_arr[i]:
text += value[i]
else:
text +="-"
text += "}"
return text, sep
def parse_rtlsim(log_lines):
global configs
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)"
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
ex_pattern = r"ex=([a-zA-Z]+)"
op_pattern = r"op=([\?0-9a-zA-Z_\.]+)"
warp_id_pattern = r"wid=(\d+)"
tmask_pattern = r"tmask=(\d+)"
wb_pattern = r"wb=(\d)"
opds_pattern = r"opds=(\d+)"
rd_pattern = r"rd=(\d+)"
rs1_pattern = r"rs1=(\d+)"
rs2_pattern = r"rs2=(\d+)"
rs3_pattern = r"rs3=(\d+)"
rs1_data_pattern = r"rs1_data=\{(.+?)\}"
rs2_data_pattern = r"rs2_data=\{(.+?)\}"
rs3_data_pattern = r"rs3_data=\{(.+?)\}"
rd_data_pattern = r"data=\{(.+?)\}"
eop_pattern = r"eop=(\d)"
uuid_pattern = r"#(\d+)"
entries = []
instr_data = {}
num_cores = configs['num_cores']
socket_size = configs['socket_size']
num_sockets = (num_cores + socket_size - 1) // socket_size
for lineno, line in enumerate(log_lines, start=1):
try:
line_match = re.search(line_pattern, line)
if line_match:
PC = re.search(pc_pattern, line).group(1)
warp_id = int(re.search(warp_id_pattern, line).group(1))
tmask = re.search(tmask_pattern, line).group(1)
uuid = int(re.search(uuid_pattern, line).group(1))
cluster_id = int(line_match.group(1))
socket_id = int(line_match.group(2))
core_id = int(line_match.group(3))
stage = line_match.group(4)
if stage == "decode":
trace = {}
trace["uuid"] = uuid
trace["PC"] = PC
trace["core_id"] = ((((cluster_id * num_sockets) + socket_id) * socket_size) + core_id)
trace["warp_id"] = warp_id
trace["tmask"] = reverse_binary(tmask)
trace["instr"] = re.search(instr_pattern, line).group(1)
trace["opcode"] = re.search(op_pattern, line).group(1)
trace["opds"] = bin_to_array(re.search(opds_pattern, line).group(1))
trace["rd"] = re.search(rd_pattern, line).group(1)
trace["rs1"] = re.search(rs1_pattern, line).group(1)
trace["rs2"] = re.search(rs2_pattern, line).group(1)
trace["rs3"] = re.search(rs3_pattern, line).group(1)
instr_data[uuid] = trace
elif stage == "issue":
if uuid in instr_data:
trace = instr_data[uuid]
trace["lineno"] = lineno
opds = trace["opds"]
if opds[1]:
trace["rs1_data"] = re.search(rs1_data_pattern, line).group(1).split(', ')[::-1]
if opds[2]:
trace["rs2_data"] = re.search(rs2_data_pattern, line).group(1).split(', ')[::-1]
if opds[3]:
trace["rs3_data"] = re.search(rs3_data_pattern, line).group(1).split(', ')[::-1]
trace["issued"] = True
instr_data[uuid] = trace
elif stage == "commit":
if uuid in instr_data:
trace = instr_data[uuid]
if "issued" in trace:
opds = trace["opds"]
dst_tmask_arr = bin_to_array(tmask)[::-1]
wb = re.search(wb_pattern, line).group(1) == "1"
if wb:
rd_data = re.search(rd_data_pattern, line).group(1).split(', ')[::-1]
if 'rd_data' in trace:
merged_rd_data = trace['rd_data']
for i in range(len(dst_tmask_arr)):
if dst_tmask_arr[i] == 1:
merged_rd_data[i] = rd_data[i]
trace['rd_data'] = merged_rd_data
else:
trace['rd_data'] = rd_data
instr_data[uuid] = trace
eop = re.search(eop_pattern, line).group(1) == "1"
if eop:
tmask_arr = bin_to_array(trace["tmask"])
destination = ''
if wb:
destination, sep = append_value(destination, trace["rd"], trace['rd_data'], tmask_arr, False)
del trace['rd_data']
trace["destination"] = destination
operands = ''
sep = False
if opds[1]:
operands, sep = append_value(operands, trace["rs1"], trace["rs1_data"], tmask_arr, sep)
del trace["rs1_data"]
if opds[2]:
operands, sep = append_value(operands, trace["rs2"], trace["rs2_data"], tmask_arr, sep)
del trace["rs2_data"]
if opds[3]:
operands, sep = append_value(operands, trace["rs3"], trace["rs3_data"], tmask_arr, sep)
del trace["rs3_data"]
trace["operands"] = operands
del trace["opds"]
del trace["rd"]
del trace["rs1"]
del trace["rs2"]
del trace["rs3"]
del trace["issued"]
del instr_data[uuid]
entries.append(trace)
except Exception as e:
print("Error at line {}: {}".format(lineno, e))
return entries
def write_csv(sublogs, csv_filename, log_type):
with open(csv_filename, 'w', newline='') as csv_file:
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "destination", "operands"]
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for sublog in sublogs:
entries = None
# parse sublog
if log_type == "rtlsim":
entries = parse_rtlsim(sublog)
elif log_type == "simx":
entries = parse_simx(sublog)
else:
print('Error: invalid log type')
sys.exit()
# sort entries by uuid
entries.sort(key=lambda x: (int(x['uuid'])))
for entry in entries:
del entry['lineno']
for entry in entries:
writer.writerow(entry)
def split_log_file(log_filename):
with open(log_filename, 'r') as log_file:
log_lines = log_file.readlines()
sublogs = []
current_sublog = None
for line in log_lines:
if line.startswith("[VXDRV] START"):
if current_sublog is not None:
sublogs.append(current_sublog)
current_sublog = [line]
elif current_sublog is not None:
current_sublog.append(line)
if current_sublog is not None:
sublogs.append(current_sublog)
else:
sublogs.append(log_lines)
return sublogs
def main():
global configs
args = parse_args()
configs = load_config(args.log)
sublogs = split_log_file(args.log)
write_csv(sublogs, args.csv, args.type)
if __name__ == "__main__":
main()

View file

@ -1,51 +1,73 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# Copyright 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import time
import threading
import subprocess
# This script executes a long-running command while outputing "still running ..." periodically
# This script executes a long-running command while printing "still running ..." periodically
# to notify Travis build system that the program has not hanged
PING_INTERVAL=15
PING_INTERVAL=300 # 5 minutes
SLEEP_INTERVAL=1 # 1 second
def monitor(stop):
wait_time = 0
while True:
time.sleep(PING_INTERVAL)
wait_time += PING_INTERVAL
print(" + still running (" + str(wait_time) + "s) ...")
sys.stdout.flush()
if stop():
break
def monitor(stop_event):
wait_time = 0
elapsed_time = 0
while not stop_event.is_set():
time.sleep(SLEEP_INTERVAL)
elapsed_time += SLEEP_INTERVAL
if elapsed_time >= PING_INTERVAL:
wait_time += elapsed_time
print(" + still running (" + str(wait_time) + "s) ...")
sys.stdout.flush()
elapsed_time = 0
def execute(command):
process = subprocess.Popen(command, stdout=subprocess.PIPE)
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
while True:
output = process.stdout.readline()
if output:
line = output.decode('ascii').rstrip()
try:
line = output.decode('utf-8').rstrip()
except UnicodeDecodeError:
line = repr(output) # Safely print raw binary data
print(">>> " + line)
process.stdout.flush()
ret = process.poll()
if ret is not None:
return ret
return ret
return -1
def main(argv):
if not argv:
print("Usage: travis_run.py <command>")
sys.exit(1)
# start monitoring thread
stop_monitor = False
t = threading.Thread(target = monitor, args =(lambda : stop_monitor, ))
stop_event = threading.Event()
t = threading.Thread(target=monitor, args=(stop_event,))
t.start()
# execute command
exitcode = execute(argv)
exitcode = execute(argv)
print(" + exitcode="+str(exitcode))
sys.stdout.flush()
# terminate monitoring thread
stop_monitor = True
stop_event.set()
t.join()
sys.exit(exitcode)

34
config.mk.in Normal file
View file

@ -0,0 +1,34 @@
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
VORTEX_HOME ?= @VORTEX_HOME@
XLEN ?= @XLEN@
TOOLDIR ?= @TOOLDIR@
OSVERSION ?= @OSVERSION@
INSTALLDIR ?= @INSTALLDIR@
LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex
LIBC_VORTEX ?= $(TOOLDIR)/libc$(XLEN)
LIBCRT_VORTEX ?= $(TOOLDIR)/libcrt$(XLEN)
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv$(XLEN)-gnu-toolchain
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party

176
configure vendored Executable file
View file

@ -0,0 +1,176 @@
#!/bin/bash
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Determine the current working directory
CURRENT_DIR=$(pwd)
# Function to detect current OS
detect_osversion() {
local osversion="unsupported"
if [ -f /etc/os-release ]; then
. /etc/os-release # Source the os-release file to get OS information
case "$ID" in
ubuntu)
case "$VERSION_CODENAME" in
bionic) osversion="ubuntu/bionic";;
focal) osversion="ubuntu/focal";;
jammy) osversion="ubuntu/focal";;
noble) osversion="ubuntu/focal";;
# Add new versions as needed
esac
;;
centos)
case "$VERSION_ID" in
7) osversion="centos/7";;
# Add new versions as needed
esac
;;
esac
fi
echo "$osversion"
}
# Function to recursively copy files, skipping the current directory
copy_files() {
local source_dir="$1"
local target_dir="$2"
#echo "source_dir=$source_dir, target_dir=$target_dir"
local same_dir=0
if [ "$(realpath "$source_dir")" == "$(realpath "$target_dir")" ]; then
same_dir=1
fi
# Function to copy and update file
copy_and_update() {
local src_pattern="$1"
local dest_dir="$2"
for file in $src_pattern; do
#echo "*** $file > $dest_dir"
if [ -f "$file" ]; then
if [[ "$file" == *.in ]]; then
filename=$(basename -- "$file")
filename_no_ext="${filename%.in}"
dest_file="$dest_dir/$filename_no_ext"
mkdir -p "$dest_dir"
sed "s|@VORTEX_HOME@|$SOURCE_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@BUILDDIR@|$CURRENT_DIR|g" "$file" > "$dest_file"
# apply permissions to bash scripts
read -r firstline < "$dest_file"
if [[ "$firstline" =~ ^#!.*bash ]]; then
chmod +x "$dest_file"
fi
else
if [ $same_dir -eq 0 ]; then
mkdir -p "$dest_dir"
cp -p "$file" "$dest_dir"
fi
fi
fi
done
}
for pattern in "${SUBDIRS[@]}"; do
local full_copy=0
if [[ "$pattern" == !* ]]; then
full_copy=1
pattern=${pattern:1}
fi
local source_pattern="$source_dir/$pattern"
if [[ "$pattern" == "." ]]; then
source_pattern=$source_dir
fi
find "$source_dir" -type d -path "$source_pattern" 2>/dev/null | while read dir; do
# Compute the relative path of the directory
local rel_path="${dir#$source_dir}"
rel_path="${rel_path#/}" # Remove leading slash, if present
local full_target_dir="$target_dir/$rel_path"
# Copy and update Makefile and common.mk if they exist
if [ $full_copy -eq 1 ]; then
copy_and_update "$dir/*" "$full_target_dir"
else
copy_and_update "$dir/Makefile" "$full_target_dir"
copy_and_update "$dir/common.mk" "$full_target_dir"
copy_and_update "$dir/*.in" "$full_target_dir"
fi
done
done
}
###############################################################################
# default configuration parameters
default_xlen=32
default_tooldir=$HOME/tools
default_osversion=$(detect_osversion)
default_prefix=$CURRENT_DIR
# load default configuration parameters from existing config.mk
if [ -f "config.mk" ]; then
while IFS='=' read -r key value; do
value=${value//[@]/} # Remove placeholder characters
value="${value#"${value%%[![:space:]]*}"}" # Remove leading whitespace
value="${value%"${value##*[![:space:]]}"}" # Remove trailing whitespace
case $key in
XLEN\ ?*) default_xlen=${value//\?=/} ;;
TOOLDIR\ ?*) default_tooldir=${value//\?=/} ;;
OSVERSION\ ?*) default_osversion=${value//\?=/} ;;
PREFIX\ ?*) default_prefix=${value//\?=/} ;;
esac
done < config.mk
fi
# set configuration parameters
XLEN=${XLEN:=$default_xlen}
TOOLDIR=${TOOLDIR:=$default_tooldir}
OSVERSION=${OSVERSION:=$default_osversion}
PREFIX=${PREFIX:=$default_prefix}
# parse command line arguments
usage() {
echo "Usage: $0 [--xlen=<value>] [--tooldir=<path>] [--osversion=<version>]"
echo " --xlen=<value> Set the XLEN value (default: 32)"
echo " --tooldir=<path> Set the TOOLDIR path (default: $HOME/tools)"
echo " --osversion=<version> Set the OS Version (default: $(detect_osversion))"
echo " --prefix=<path> Set installation directory"
exit 1
}
while [[ "$#" -gt 0 ]]; do
case $1 in
--xlen=*) XLEN="${1#*=}" ;;
--tooldir=*) TOOLDIR="${1#*=}" ;;
--osversion=*) OSVERSION="${1#*=}" ;;
--prefix=*) PREFIX="${1#*=}" ;;
-h|--help) usage ;;
*) echo "Unknown parameter passed: $1"; usage ;;
esac
shift
done
# check OS
if [ "$OSVERSION" == "unsupported" ]; then
echo "Error: Unsupported OS."
exit -1
fi
# project subdirectories to build
SUBDIRS=("." "!ci" "!perf" "hw*" "kernel*" "runtime*" "sim*" "tests*")
# Get the directory of the script
SOURCE_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
THIRD_PARTY_DIR=$SOURCE_DIR/third_party
copy_files "$SOURCE_DIR" "$CURRENT_DIR"

Binary file not shown.

Before

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 207 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 77 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 463 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 517 KiB

View file

@ -2,69 +2,26 @@
The Vortex Cache Sub-system has the following main properties:
- High-bandwidth with bank parallelism
- Snoop protocol to flush data for CPU access
- Generic design: Dcache, Icache, Shared Memory, L2 cache, L3 cache
- High-bandwidth transfer with Multi-bank parallelism
- Non-blocking pipelined write-through cache architecture with per-bank MSHR
- Configurable design: Dcache, Icache, L2 cache, L3 cache
### Cache Hierarchy
### Cache Microarchitecture
![Image of Cache Hierarchy](./assets/img/cache_hierarchy.png)
![Image of Cache Hierarchy](./assets/img/cache_microarchitecture.png)
- Cache can be configured to be any level in the hierarchy
- Caches communicate via snooping
- Cache flush from AFU is passed down the hierarchy
The Vortex cache is comprised of multiple parallel banks. It is comprised of the following modules:
- **Bank request dispatch crossbar**: assigns a bank to incoming requests and resolve collision using stalls.
- **Bank response merge crossbar**: merges result from banks and forward to the core response.
- **Memory request multiplexer**: arbitrates bank memory requests
- **Memory response demultiplexer**: forwards memory response to the corresponding bank.
- **Flush Unit**: performs tag memory initialization.
### VX_cache.v (Top Module)
Incoming requests entering the cache are sent to a dispatch crossbar that select the corresponding bank for each request, resolving bank collisions with stalls. The result output of each bank is merge back into outgoing response port via merger crossbar. Each bank intergates a non-blocking pipeline with a local Miss Status Holding Register (MSHR) to reduce the miss rate. The bank pipeline consists of the following stages:
VX.cache.v is the top module of the cache verilog code located in the `/hw/rtl/cache` directory.
- **Schedule**: Selects the next request into the pipeline from the incoming core request, memory fill, or the MSHR entry, with priority given to the latter.
- **Tag Access**: single-port read/write access to the tag store.
- **Data Access**: Single-port read/write access to the data store.
- **Response Handling**: Core response back to the core.
![Image of Vortex Cache](./assets/img/vortex_cache_top_module.png)
- Configurable (Cache size, number of banks, bank line size, etc.)
- I/O signals
- Core Request
- Core Rsp
- DRAM Req
- DRAM Rsp
- Snoop Rsp
- Snoop Rsp
- Snoop Forwarding Out
- Snoop Forwarding In
- Bank Select
- Assigns valid and ready signals for each bank
- Snoop Forwarder
- DRAM Request Arbiter
- Prepares cache response for communication with DRAM
- Snoop Response Arbiter
- Sends snoop response
- Core Response Merge
- Cache accesses one line at a time. As a result, each request may not come back in the same response. This module tries to recombine the responses by thread ID.
### VX_bank.v
VX_bank.v is the verilog code that handles cache bank functionality and is located in the `/hw/rtl/cache` directory.
![Image of Vortex Cache Bank](./assets/img/vortex_bank.png)
- Allows for high throughput
- Each bank contains queues to hold requests to the cache
- I/O signals
- Core request
- Core Response
- DRAM Fill Requests
- DRAM Fill Response
- DRAM WB Requests
- Snp Request
- Snp Response
- Request Priority: DRAM fill, miss reserve, core request, snoop request
- Snoop Request Queue
- DRAM Fill Queue
- Core Req Arbiter
- Requests to be processed by the bank
- Tag Data Store
- Registers for valid, dirty, dirtyb, tag, and data
- Length of registers determined by lines in the bank
- Tag Data Access:
- I/O: stall, snoop info, force request miss
- Writes to cache or sends read response; hit or miss determined here
- A missed request goes to the miss reserve if it is not a snoop request or DRAM fill
Deadlocks inside the cache can occur when the MSHR is full and a new request is already in the pipeline. It can also occur when the memory request queue is full, and there is an incoming memory response. The cache mitigates MSHR deadlocks by using an early full signal before a new request is issued and similarly mitigates memory deadlocks by ensuring that its request queue never fills up.

View file

@ -3,38 +3,39 @@
The directory/file layout of the Vortex codebase is as followed:
- `hw`:
- `rtl`: hardware rtl sources
- `cache`: cache subsystem code
- `fp_cores`: floating point unit code
- `rtl`: hardware rtl sources
- `core`: core pipeline
- `cache`: cache subsystem
- `mem`: memory subsystem
- `fpu`: floating point unit
- `interfaces`: interfaces for inter-module communication
- `libs`: general-purpose RTL modules
- `libs`: general-purpose RTL modules
- `syn`: synthesis directory
- `opae`: OPAE synthesis scripts
- `quartus`: Quartus synthesis scripts
- `altera`: Altera synthesis scripts
- `xilinx`: Xilinx synthesis scripts
- `synopsys`: Synopsys synthesis scripts
- `modelsim`: Modelsim synthesis scripts
- `yosys`: Yosys synthesis scripts
- `unit_tests`: unit tests for some hardware components
- `driver`: host drivers repository
- `runtime`: host runtime software APIs
- `include`: Vortex driver public headers
- `stub`: Vortex stub driver library
- `fpga`: software driver that uses Intel OPAE FPGA
- `asesim`: software driver that uses Intel ASE simulator
- `vlsim`: software driver that uses vlsim simulator
- `opae`: software driver that uses Intel OPAE API with device targets=fpga|asesim|opaesim
- `xrt`: software driver that uses Xilinx XRT API with device targets=hw|hw_emu|sw_emu
- `rtlsim`: software driver that uses rtlsim simulator
- `simx`: software driver that uses simX simulator
- `runtime`: kernel runtime software
- `kernel`: GPU kernel software APIs
- `include`: Vortex runtime public headers
- `linker`: linker file for compiling kernels
- `src`: runtime implementation
- `sim`:
- `vlsim`: AFU RTL simulator
- `opaesim`: Intel OPAE AFU RTL simulator
- `rtlsim`: processor RTL simulator
- `simX`: cycle approximate simulator for vortex
- `tests`: tests repository.
- `runtime`: runtime tests
- `regression`: regression tests
- `riscv`: RISC-V standard tests
- `riscv`: RISC-V conformance tests
- `kernel`: kernel tests
- `regression`: regression tests
- `opencl`: opencl benchmarks and tests
- `ci`: continuous integration scripts
- `miscs`: miscellaneous resources.

View file

@ -0,0 +1,36 @@
# Continuous Integration
- Each time you push to the repo, the Continuous Integration pipeline will run
- This pipeline consists of creating the correct development environment, building your code, and running all tests
- This is an extensive pipeline so it might take some time to complete
## Protecting Master Branch
Navigate to your Repository:
Open your repository on GitHub.
Click on "Settings":
In the upper-right corner of your repository page, click on the "Settings" tab.
Select "Branches" in the left sidebar:
On the left sidebar, look for the "Branches" option and click on it.
Choose the Branch:
Under "Branch protection rules," select the branch you want to protect. In this case, choose the main branch.
Enable Branch Protection:``
Check the box that says "Protect this branch."
Configure Protection Settings:
You can configure various protection settings. Some common settings include:
Require pull request reviews before merging: This ensures that changes are reviewed before being merged.
Require status checks to pass before merging: This ensures that automated tests and checks are passing.
Require signed commits: This enforces that commits are signed with a verified signature.
Restrict Who Can Push:
You can further restrict who can push directly to the branch. You might want to limit this privilege to specific people or teams.
Save Changes:
Once you've configured the protection settings, scroll down and click on the "Save changes" button.
Now, your main branch is protected, and certain criteria must be met before changes can be pushed directly to it. Contributors will need to create pull requests, have their changes reviewed, and meet other specified criteria before the changes can be merged into the main branch.

37
docs/contributing.md Normal file
View file

@ -0,0 +1,37 @@
# Contributing to Vortex
## Github
Vortex uses Github to host its git repositories.
There are a lot of ways to use the features on Github for collaboration.
Therefore, this documentation details the standard procedure for contributing to Vortex.
Development of Vortex is consolidated to this repo, `vortex` and any associated forks.
Previously, there was active work done on a private repo named `vortex-dev`.
`vortex-dev` has officially been deprecated and fully merged into this public repo, `vortex`.
If you are returning to this project and have legacy versions of Vortex, you can use the releases branches to access older versions.
## Contribution Process
In an effort to keep `vortex` organized, permissions to directly create branches and push code has been limited to admins.
However, contributions are strongly encouraged and keep the project moving forward! Here is the procedure for contributing:
1. Create a fork of `vortex`
2. In your fork, create a branch from `master` that briefly explains the work you are adding (ie: `develop-documentation`)
3. Make your changes on the new branch in your fork. You may create as many commits as you need, which might be common if you are making multiple iterations
4. Since you are the owner of your fork, you have full permissions to push commits to your fork
4. When you are satisfied with the changes on your fork, you can open a PR from your fork using the online interface
5. If you recently made a push, you will get automatically get a prompt on Github online to create a PR, which you can press
6. Otherwise, you can go to your fork on Github online and manually create a PR (todo)
(todo): how to name and format your PR, what information you should add to the PR, does not need to be too strict if you are attending the weekly meetings*
7. Github uses the following semantics: `base repository` gets the changes from your `head repository`
8. Therefore, you should set the `base repository` to `vortexgpgpu/vortex` and the `base` branch to `master` since the master branch is protected by reviewed PRs.
9. And you should assign the `head repository` to `<your-github-username>/vortex` (which represents your fork of vortex) and the `base` branch to the one created in step 2
10. Now that your intended PR has been specified, you should review the status. Check for merge conflicts, if all your commits are present, and all the modified files make sense
11. You can still make a PR if there are issues in step 10, just make sure the structure is correct according to steps 7-9
12. Once the PR is made, the CI pipeline will run automatically, testing your changes
13. Remember, a PR is flexible if you need to make changes to the code you can go back to your branch of the fork to commit and push any updates
14. As long as the `head repository`'s `base` branch is the one you edited, the PR will automatically get the most recent changes
15. When all merge conflicts are resolved, changes are made, and tests pass you can have an admin merge your PR
## What Makes a Good Contribution?
- If you are contributing code changes, then review [testing.md](./testing.md) to ensure your tests are integrated into the [CI pipeline](continuous_integration.md)
- During a PR, you should consider the advice you are provided by your reviewers. Remember you keep adding commits to an open PR!
- If your change aims to fix an issue opened on Github, please tag that issue in the PR itself

View file

@ -1,38 +1,52 @@
# Debugging Vortex Hardware
# Debugging Vortex GPU
## Testing changes to the RTL or simulator GPU driver.
The Blackbox utility script will not pick up your changes if the h/w configuration is the same as during teh last run.
To force the utility to build the driver, you need pass the --rebuild=1 option when running tests.
Using --rebuild=0 will prevent the rebuild even if the h/w configuration is different from last run.
$ ./ci/blackbox.sh --driver=simx --app=demo --rebuild=1
## SimX Debugging
SimX cycle-approximate simulator allows faster debugging of Vortex kernels' execution.
The recommended method to enable debugging is to pass the `--debug` flag to `blackbox` tool when running a program.
SimX cycle-approximate simulator allows faster debugging of Vortex kernels' execution.
The recommended method to enable debugging is to pass the `--debug=<level>` flag to `blackbox` tool when running a program.
// Running demo program on SimX in debug mode
$ ./ci/blackbox.sh --driver=simx --app=demo --debug
$ ./ci/blackbox.sh --driver=simx --app=demo --debug=1
A debug trace `run.log` is generated in the current directory during the program execution. The trace includes important states of the simulated processor (decoded instruction, register states, pipeline states, etc..). You can increase the verbosity level of the trace by changing the `DEBUG_LEVEL` variable to a value [1-5] (default is 3).
A debug trace `run.log` is generated in the current directory during the program execution. The trace includes important states of the simulated processor (decoded instruction, register states, pipeline states, etc..). You can increase the verbosity of the trace by changing the debug level.
// Using SimX in debug mode with verbose level 4
$ CONFIGS=-DDEBUG_LEVEL=4 ./ci/blackbox.sh --driver=simx --app=demo --debug
// Using SimX in debug mode with verbose level 3
$ ./ci/blackbox.sh --driver=simx --app=demo --debug=3
## RTL Debugging
To debug the processor RTL, you need to use VLSIM or RTLSIM driver. VLSIM simulates the full processor including the AFU command processor (using `/rtl/afu/vortex_afu.sv` as top module). RTLSIM simulates the Vortex processor only (using `/rtl/Vortex.v` as top module).
To debug the processor RTL, you need to use VLSIM or RTLSIM driver. VLSIM simulates the full processor including the AFU command processor (using `/rtl/afu/opae/vortex_afu.sv` as top module). RTLSIM simulates the Vortex processor only (using `/rtl/Vortex.v` as top module).
The recommended method to enable debugging is to pass the `--debug` flag to `blackbox` tool when running a program.
// Running demo program on vlsim in debug mode
$ ./ci/blackbox.sh --driver=vlsim --app=demo --debug
// Running demo program on the opae simulator in debug mode
$ TARGET=opaesim ./ci/blackbox.sh --driver=opae --app=demo --debug=1
// Running demo program on rtlsim in debug mode
$ ./ci/blackbox.sh --driver=rtlsim --app=demo --debug
$ ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=1
A debug trace `run.log` is generated in the current directory during the program execution. The trace includes important states of the simulated processor (memory, caches, pipeline, stalls, etc..). A waveform trace `trace.vcd` is also generated in the current directory during the program execution. You can visualize the waveform trace using any tool that can open VCD files (Modelsim, Quartus, Vivado, etc..). [GTKwave] (http://gtkwave.sourceforge.net) is a great open-source scope analyzer that also works with VCD files.
A debug trace `run.log` is generated in the current directory during the program execution. The trace includes important states of the simulated processor (memory, caches, pipeline, stalls, etc..). A waveform trace `trace.vcd` is also generated in the current directory during the program execution.
By default all library modules unde the /libs/ folder are excluded from the trace to reduce the waveform file size, you can chnage that behavoir by either explicitly commenting out `TRACING_OFF`/`TRACING_ON` inside a lib module source (e.g. VX_stream_buffer.sv) or simply enabling a full trace using the following command.
// Debugging the demo program with rtlsim in full tracing mode
$ CONFIGS="-DTRACING_ALL" ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=1
You can visualize the waveform trace using any tool that can open VCD files (Modelsim, Quartus, Vivado, etc..). [GTKwave] (http://gtkwave.sourceforge.net) is a great open-source scope analyzer that also works with VCD files.
## FPGA Debugging
Debugging the FPGA directly may be necessary to investigate runtime bugs that the RTL simulation cannot catch. We have implemented an in-house scope analyzer for Vortex that works when the FPGA is running. To enable the FPGA scope analyzer, the FPGA bitstream should be built using `SCOPE=1` flag
& cd /hw/syn/opae
$ CONFIGS=-DSCOPE=1 make fpga-4c
$ CONFIGS="-DSCOPE=1" TARGET=fpga make
When running the program on the FPGA, you need to pass the `--scope` flag to the `blackbox` tool.
@ -40,4 +54,21 @@ When running the program on the FPGA, you need to pass the `--scope` flag to the
$ ./ci/blackbox.sh --driver=fpga --app=demo --scope
A waveform trace `trace.vcd` will be generated in the current directory during the program execution. This trace includes a limited set of signals that are defined in `/hw/scripts/scope.json`. You can expand your signals' selection by updating the json file.
A waveform trace `trace.vcd` will be generated in the current directory during the program execution. This trace includes a limited set of signals that are defined in `/hw/scripts/scope.json`. You can expand your signals' selection by updating the json file.
## Analyzing Vortex trace log
When debugging Vortex RTL or SimX Simulator, reading the trace run.log file can be overwhelming when the trace gets really large.
We provide a trace sanitizer tool under ./hw/scripts/trace_csv.py that you can use to convert the large trace into a CSV file containing all the instructions that executed with their source and destination operands.
$ ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=3 --log=run_rtlsim.log
$ ./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
$ ./ci/blackbox.sh --driver=simx --app=demo --debug=3 --log=run_simx.log
$ ./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
$ diff trace_rtlsim.csv trace_simx.csv
The first column in the CSV trace is UUID (universal unique identifier) of the instruction and the content is sorted by the UUID.
You can use the UUID to trace the same instruction running on either the RTL hw or SimX simulator.
This can be very effective if you want to use SimX to debugging your RTL hardware by comparing CSV traces.

51
docs/environment_setup.md Normal file
View file

@ -0,0 +1,51 @@
# Environment Setup
These instructions apply to the development vortex repo using the updated toolchain. The updated toolchain is considered to be any commit of `master` pulled from July 2, 2023 onwards. The toolchain update in question can be viewed in this [commit](https://github.com/vortexgpgpu/vortex-dev/commit/0048496ba28d7b9a209a0e569d52d60f2b68fc04). Therefore, if you are unsure whether you are using the new toolchain or not, then you should check the `ci` folder for the existence of the `toolchain_prebuilt.sh` script. Furthermore, you should notice that the `toolchain_install.sh` script has the legacy `llvm()` split into `llvm-vortex()` and `llvm-pocl()`.
## Set Up on Your Own System
The toolchain binaries provided with Vortex are built on Ubuntu-based systems. To install Vortex on your own system, [follow these instructions](install_vortex.md).
## Servers for Georgia Tech Students and Collaborators
### Volvo
Volvo is a 64-core server provided by HPArch. You need valid credentials to access it. If you don't already have access, you can get in contact with your mentor to ask about setting your account up.
Setup on Volvo:
1. Connect to Georgia Tech's VPN or ssh into another machine on campus
2. `ssh volvo.cc.gatech.edu`
3. Clone Vortex to your home directory: `git clone --recursive https://github.com/vortexgpgpu/vortex.git`
4. `source /nethome/software/set_vortex_env.sh` to set up the necessary environment variables.
5. `make -s` in the `vortex` root directory
6. Run a test program: `./ci/blackbox.sh --cores=2 --app=dogfood`
### Nio
Nio is a 20-core desktop server provided by HPArch. If you have access to Volvo, you also have access to Nio.
Setup on Nio:
1. Connect to Georgia Tech's VPN or ssh into another machine on campus
2. `ssh nio.cc.gatech.edu`
3. Clone Vortex to your home directory: `git clone --recursive https://github.com/vortexgpgpu/vortex.git`
4. `source /opt/set_vortex_env_dev.sh` to set up the necessary environment variables.
5. `make -s` in the `vortex` root directory
6. Run a test program: `./ci/blackbox.sh --cores=2 --app=dogfood`
## Docker (Experimental)
Docker allows for isolated pre-built environments to be created, shared and used. The emulation mode required for ARM-based processors will incur a decrease in performance. Currently, the dockerfile is not included with the official vortex repository and is not actively maintained or supported.
### Setup with Docker
1. Clone repo recursively onto your local machine: `git clone --recursive https://github.com/vortexgpgpu/vortex.git`
2. Download the dockerfile from [here](https://github.gatech.edu/gist/usubramanya3/f1bf3e953faa38a6372e1292ffd0b65c) and place it in the root of the repo.
3. Build the Dockerfile into an image: `docker build --platform=linux/amd64 -t vortex -f dockerfile .`
4. Run a container based on the image: `docker run --rm -v ./:/root/vortex/ -it --name vtx-dev --privileged=true --platform=linux/amd64 vortex`
5. Install the toolchain `./ci/toolchain_install.sh --all` (once per container)
6. `make -s` in `vortex` root directory
7. Run a test program: `./ci/blackbox.sh --cores=2 --app=dogfood`
You may exit from a container and resume a container you have exited or start a second terminal session `docker exec -it <container-name> bash`

View file

@ -1,128 +0,0 @@
# Execute OpenCL on Vortex backend
## Requirements
- [Vortex](https://github.com/vortexgpgpu/vortex)
- [POCL for Vortex](https://github.com/vortexgpgpu/pocl)
- [riscv-toolchain](https://github.com/riscv-collab/riscv-gnu-toolchain)
- [llvm-riscv](https://github.com/llvm-mirror/llvm)
For installation, please see [Build Instructions](../README.md) for more details.
**For Ubuntu18.04 users, you can directly download pre-build toolchains with [toolchain_install.sh](https://github.com/vortexgpgpu/vortex/blob/master/ci/toolchain_install.sh) script.**
```bash
# please modify the DESTDIR variable in the script before execution
bash toolchain_install.sh -all
```
Assuming we have installed all dependencies in `/opt` path, we can get the following environment:
```bash
tree -L 2 /opt
'''
/opt/
├── llvm-riscv
│ ├── bin
│ ├── include
│ ├── lib
│ ├── libexec
│ └── share
├── pocl
│ ├── compiler
│ └── runtime
├── riscv-gnu-toolchain
│ ├── bin
│ ├── drops
│ ├── include
│ ├── lib
│ ├── libexec
│ ├── riscv32-unknown-elf
│ ├── share
│ └── var
└── verilator
├── bin
├── examples
├── include
├── verilator-config.cmake
└── verilator-config-version.cmake
'''
```
## Execute OpenCL on Vortex
In this tutorial, we show the example of executing a vecadd programs on SIMX backend.
To execute a OpenCL program on Vortex, we have the following steps:
- Compile the [OpenCL kernels](https://github.com/vortexgpgpu/vortex/blob/master/tests/opencl/vecadd/kernel.cl) into risc-v binary by POCL compiler.
- Compile the [OpenCL host](https://github.com/vortexgpgpu/vortex/blob/master/tests/opencl/vecadd/main.cc) and link with Vortex driver(```-lvortex```).
- Execute the compiled host programs on a backend.
Thus, we can write a Makefile as following:
```Makefile
LLVM_PREFIX ?= /opt/llvm-riscv
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf
POCL_CC_PATH ?= /opt/pocl/compiler
POCL_RT_PATH ?= /opt/pocl/runtime
OPTS ?= -n64
# please edit these two variable to your environment
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
K_LLCFLAGS += "-O3 -march=riscv32 -target-abi=ilp32f -mcpu=generic-rv32 -mattr=+m,+f -mattr=+vortex -float-abi=hard -code-model=small"
K_CFLAGS += "-v -O3 --sysroot=$(SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -march=rv32imf -mabi=ilp32f -Xclang -target-feature -Xclang +vortex -I$(VORTEX_RT_PATH)/include -fno-rtti -fno-exceptions -ffreestanding -nostartfiles -fdata-sections -ffunction-sections"
K_LDFLAGS += "-Wl,-Bstatic,-T$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a -lm"
CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -Wno-deprecated-declarations -Wno-unused-parameter
CXXFLAGS += -I$(POCL_RT_PATH)/include
LDFLAGS += -L$(POCL_RT_PATH)/lib -L$(VORTEX_DRV_PATH)/stub -lOpenCL -lvortex
PROJECT = vecadd
SRCS = main.cc
all: $(PROJECT) kernel.pocl
kernel.pocl: kernel.cl
LLVM_PREFIX=$(LLVM_PREFIX) POCL_DEBUG=all LD_LIBRARY_PATH=$(LLVM_PREFIX)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -LLCFLAGS $(K_LLCFLAGS) -CFLAGS $(K_CFLAGS) -LDFLAGS $(K_LDFLAGS) -o kernel.pocl kernel.cl
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
run-fpga: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/fpga:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-asesim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/asesim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-vlsim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-simx: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-rtlsim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
rm -rf $(PROJECT) *.o .depend
clean-all: clean
rm -rf *.pocl *.dump
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif
```
First, build the host program.
```bash
make all
```
If we want to execute on SIMX, we can execute the command below.
```bash
make run-simx
```

View file

@ -1,7 +1,130 @@
# FPGA Startup and Configuration Guide
# FPGA Startup and Configuration Guide
OPAE Environment Setup
## Gaining Access to FPGA's with CRNCH
If you are associated with Georgia Tech (or related workshops) you can use CRNCH's server to gain remote access to FPGA's. Otherwise, you can skip to the Xilinx or Intel (Altera) synthesis steps below.
## What is CRNCH?
**C**enter for **R**esearch into **N**ovel **C**omputing **H**ierarchies
## What does CRNCH Offer?
**The Rogues Gallery (RG)**: new concept focused on developing our understanding of next-generation hardware with a focus on unorthodox and uncommon technologies. **RG** will acquire new and unique hardware (ie, the aforementioned “*rogues*”) from vendors, research labs, and startups and make this hardware available to students, faculty, and industry collaborators within a managed data center environment
## Why are the Rouges Important?
By exposing students and researchers to this set of unique hardware, we hope to foster cross-cutting discussions about hardware designs that will drive future *performance improvements in computing long after the Moores Law era of “cheap transistors” ends*. Specifically, the Rouges Gallery contains FPGA's which can be synthesized into Vortex hardware.
## How is the Rouges Gallery Funded?
Rogues Gallery testbed is primarily supported by the National Science Foundation (NSF) under NSF Award Number [#2016701](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2016701&HistoricalAwards=false)
## Rouges Gallery Documentation
You can read about RG in more detail on its official documentation [page](https://gt-crnch-rg.readthedocs.io/en/main/index.html#).
You can listen to a talk about RG [here](https://mediaspace.gatech.edu/media/Jeff%20Young%20-%20Rogues%20Gallery%20-%20CRNCH%20Summit%202021/1_lqlgr0jj)
[CRNCH Summit 2023](https://github.com/gt-crnch/crnch-summit-2023/tree/main)
## Request Access for Rouges Gallery
You should use [this form](https://crnch-rg.cc.gatech.edu/request-rogues-gallery-access/) to request access to RGs reconfigurable computing (vortex fpga) resources. You should receive an email with your ticket item being created. Once it gets processed, you should get an email confirmed your access has been granted. It might take some time to get processed.
## How to Access Rouges Gallery?
There are two methods of accessing CRNCH's Rouges Gallery
1) Web-based GUI: [rg-ood.crnch.gatech.edu](http://rg-ood.crnch.gatech.edu/)
2) SSH: `ssh <your-gt-username>@rg-login.crnch.gatech.edu`
## Where should I keep my files?
The CRNCH servers have a folder called `USERSCRATCH` which can be found in your home directory: `echo $HOME`. You should keep all your files in this folder since it is available across all the Rouges Gallery Nodes.
## **What Machines are Available in the Rogues Gallery?**
Complete list of machines can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/rg-hardware.html). Furthermore, you can find detailed information about the FPGA hardware [here](https://gt-crnch-rg.readthedocs.io/en/main/reconfig/xilinx/xilinx-getting-started.html).
## Allocate an FPGA Node
Once youve connected to the CRNCH login node, you can use the Slurm scheduler to request an interactive job using `salloc`. This [page](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm.html) explains why we use Slurm to request resources. Documentation for `salloc` can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm-examples.html). And here.
To request 16 cores and 64GB of RAM for 6 hours on flubber9, a fpga dev node:
```bash
salloc -p rg-fpga --nodes=1 --ntasks-per-node=16 --mem=64G --nodelist flubber1 --time=06:00:00
```
Synthesis for Xilinx Boards
----------------------
Once you are logged in, you will need to complete some first time configurations. If you are interested in the Intel (Altera) synthesis steps, scroll down below.
### Source Configuration Scripts
```
# From any directory
$ source /opt/xilinx/xrt/setup.sh
$ source /tools/reconfig/xilinx/Vitis/2023.1/settings64.sh
```
### Check Installed FPGA Platforms
`platforminfo -l` which tells us the correct name of the platform installed on the current fpga node. It should be used for the `PLATFORM` variable below. Otherwise, if there is an error then there was an issue with the previous two commands.
### Install Vortex Toolchain
The Xilinx synthesis process requires verilator to generate the bitstream. Eventually, you will need the whole toolchain to run the bitstream on the FPGA. Therefore, the Vortex toolchain and can be installed as follows. If you complete these steps properly, you should only need to complete them once and you can skip to `Activate Vortex Toolchain`
```
# Make a build directory from root and configure scripts for your environment
mkdir build && cd build && ../configure --tooldir=$HOME/tools
# Install the whole prebuilt toolchain
./ci/toolchain_install.sh --all
# Add environment variables to bashrc
echo "source <full-path-to-vortex-root>/vortex/build/ci/toolchain_env.sh" >> ~/.bashrc
```
### Activate Vortex Toolchain
```
# From any directory
source ~/.bashrc
# Check environment setup
verilator --version
```
### Build the FPGA Bitstream
The root directory contains the path `hw/syn/xilinx/xrt` which has the makefile used to generate the Vortex bitstream.
```
$ cd hw/syn/xilinx/xrt
$ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=1 make > build_u250_hw_1c.log 2>&1 &
```
Will run the synthesis under new build directory: BUILD_DIR := "\<PREFIX>\_\<PLATFORM>\_\<TARGET>"
The generated bitstream will be located under <BUILD_DIR>/bin/vortex_afu.xclbin
For long-running jobs, invocation of this makefile can be made of the following form:
`[CONFIGS=<vortex macros>] [PREFIX=<prefix directory name>] [NUM_CORES=<#>] TARGET=hw|hw_emu PLATFORM=<platform baseName> nohup make > <log filename> 2>&1 &`
For example:
```bash
CONFIGS="-DL2_ENABLE -DDCACHE_SIZE=8192" PREFIX=build_4c_u280 NUM_CORES=4 TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202310_1 nohup make > build_u250_hw_4c.log 2>&1 &
```
The build is complete when the bitstream file `vortex_afu.xclbin` exists in `<prefix directory name><platform baseName>hw|hw_emu/bin`.
### Running a Program on Xilinx FPGA
The [blackbox.sh](./simulation.md) script within the build directory can be used to run a test with Vortexs xrt driver using the following command:
`FPGA_BIN_DIR=<path to bitstream directory> TARGET=hw|hw_emu PLATFORM=<platform baseName> ./ci/blackbox.sh --driver=xrt --app=<test name>`
For example:
```FPGA_BIN_DIR=<realpath> hw/syn/xilinx/xrt/build_4c_u280_xilinx_u280_gen3x16_xdma_1_202211_1_hw/bin TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202211_1 ./ci/blackbox.sh --driver=xrt --app=demo```
Synthesis for Intel (Altera) Boards
----------------------
### OPAE Environment Setup
$ source /opt/inteldevstack/init_env_user.sh
$ export OPAE_HOME=/opt/opae/1.1.2
@ -9,34 +132,23 @@ OPAE Environment Setup
$ export C_INCLUDE_PATH=$OPAE_HOME/include:$C_INCLUDE_PATH
$ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH
$ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH
$ export RISCV_TOOLCHAIN_PATH=/opt/riscv-gnu-toolchain
$ export PATH=:/opt/verilator/bin:$PATH
$ export VERILATOR_ROOT=/opt/verilator
OPAE Build
------------------
### OPAE Build
The FPGA has to following configuration options:
- 1 core fpga (fpga-1c)
- 2 cores fpga (fpga-2c)
- 4 cores fpga (fpga-4c)
- 8 cores fpga (fpga-8c)
- 16 cores fpga (fpga-16c)
- 32 cores fpga (fpga-32c)
- 64 cores fpga (fpga-64c)
- DEVICE_FAMILY=arria10 | stratix10
- NUM_CORES=#n
Command line:
$ cd hw/syn/opae
$ make fpga-<num-of-cores>c
$ cd hw/syn/altera/opae
$ PREFIX=test1 TARGET=fpga NUM_CORES=4 make
Example: `make fpga-4c`
A new folder (ex: `build_fpga_4c`) will be created and the build will start and take ~30-480 min to complete.
A new folder (ex: `test1_xxx_4c`) will be created and the build will start and take ~30-480 min to complete.
Setting TARGET=ase will build the project for simulation using Intel ASE.
OPAE Build Configuration
------------------------
### OPAE Build Configuration
The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware parameters that can be modified when build the processor.For example, have the following parameters that can be configured:
- `NUM_WARPS`: Number of warps per cores
@ -45,42 +157,61 @@ The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware
You configure the syntesis build from the command line:
$ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make fpga-4c
$ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make
OPAE Build Progress
-------------------
### OPAE Build Progress
You could check the last 10 lines in the build log for possible errors until build completion.
$ tail -n 10 ./build_fpga_<num-of-cores>c/build.log
$ tail -n 10 <build_dir>/build.log
Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs.
$ ps -u <username>
If the build fails and you need to restart it, clean up the build folder using the following command:
$ make clean-fpga-<num-of-cores>c
Example: `make clean-fpga-4c`
$ make clean
The file `vortex_afu.gbs` should exist when the build is done:
$ ls -lsa ./build_fpga_<num-of-cores>c/vortex_afu.gbs
$ ls -lsa <build_dir>/synth/vortex_afu.gbs
Signing the bitstream and Programming the FPGA
----------------------------------------------
### Signing the bitstream and Programming the FPGA
$ cd ./build_fpga_<num-of-cores>c
$ cd <build_dir>
$ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs
$ fpgasupdate vortex_afu_unsigned_ssl.gbs
FPGA sample test running OpenCL sgemm kernel
--------------------------------------------
### Sample FPGA Run Test
Ensure you have the correct opae runtime for the FPGA target
Run the following from the Vortex root directory
```
$ TARGET=FPGA make -C runtime/opae
```
$ ./ci/blackbox.sh --driver=fpga --app=sgemm --args="-n64"
Run the [blackbox.sh](./simulation.md) from your Vortex build directory
```
$ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128"
```
### FPGA sample test running OpenCL sgemm kernel
You can use the `blackbox.sh` script to run the following from your Vortex build directory
$ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128"
### Testing Vortex using OPAE with Intel ASE Simulation
Building ASE synthesis
```$ TARGET=asesim make -C runtime/opae```
Building ASE runtime
```$ TARGET=asesim make -C runtime/opae```
Running ASE simulation
```$ ASE_LOG=0 ASE_WORKDIR=<build_dir>/synth/work TARGET=asesim ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n16"```

View file

@ -2,29 +2,8 @@
## Table of Contents
- [Codebase Layout](codebase.md)
- [Microarchitecture](microarchitecture.md)
- [Cache Subsystem](cache_subsystem.md)
- [Software](software.md)
- [Simulation](simulation.md)
- [FPGA Setup Guide](fpga_setup.md)
- [Debugging](debugging.md)
- [Useful Links](references.md)
## Installation
- Refer to the build instructions in [README](../README.md).
## Quick Start Scenarios
Running Vortex simulators with different configurations:
- Run basic driver test with rtlsim driver and Vortex config of 2 clusters, 2 cores, 2 warps, 4 threads
$ ./ci/blackbox.sh --driver=rtlsim --clusters=2 --cores=2 --warps=2 --threads=4 --app=basic
- Run demo driver test with vlsim driver and Vortex config of 1 clusters, 4 cores, 4 warps, 2 threads
$ ./ci/blackbox.sh --driver=vlsim --clusters=1 --cores=4 --warps=4 --threads=2 --app=demo
- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads
$ ./ci/blackbox.sh --driver=simx --clusters=4 --cores=4 --warps=8 --threads=6 --app=dogfood
- [Codebase Layout](codebase.md): Summary of repo file tree
- [Microarchitecture](microarchitecture.md): Vortex Pipeline and cache microarchitectural details and reconfigurability
- [Simulation](simulation.md): Details for building and running each simulation driver
- [Contributing](contributing.md): Process for contributing your own features including repo semantics and testing
- [Debugging](debugging.md): Debugging configurations for each Vortex driver

81
docs/install_vortex.md Normal file
View file

@ -0,0 +1,81 @@
# Installing and Setting Up the Vortex Environment
## Ubuntu 18.04, 20.04
1. Install the following dependencies:
```
sudo apt-get install build-essential zlib1g-dev libtinfo-dev libncurses5 uuid-dev libboost-serialization-dev libpng-dev libhwloc-dev
```
2. Upgrade GCC to 11:
```
sudo apt-get install gcc-11 g++-11
```
Multiple gcc versions on Ubuntu can be managed with update-alternatives, e.g.:
```
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 9
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 9
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 11
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 11
```
3. Download the Vortex codebase:
```
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
```
4. Build Vortex
```
$ cd vortex
$ mkdir -p build
$ cd build
$ ../configure --xlen=32 --tooldir=$HOME/tools
$ ./ci/toolchain_install.sh --all
$ source ./ci/toolchain_env.sh
$ make -s
```
## RHEL 8
Note: depending on the system, some of the toolchain may need to be recompiled for non-Ubuntu Linux. The source for the tools can be found [here](https://github.com/vortexgpgpu/).
1. Install the following dependencies:
```
sudo yum install libpng-devel boost boost-devel boost-serialization libuuid-devel opencl-headers hwloc hwloc-devel gmp-devel compat-hwloc1
```
2. Upgrade GCC to 11:
```
sudo yum install gcc-toolset-11
```
Multiple gcc versions on Red Hat can be managed with scl
3. Install MPFR 4.2.0:
Download [the source](https://ftp.gnu.org/gnu/mpfr/) and follow [the installation documentation](https://www.mpfr.org/mpfr-current/mpfr.html#How-to-Install).
4. Download the Vortex codebase:
```
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
```
5. Build Vortex
```
$ cd vortex
$ mkdir -p build
$ cd build
$ ../configure --xlen=32 --tooldir=$HOME/tools
$ ./ci/toolchain_install.sh --all
$ source ./ci/toolchain_env.sh
$ make -s
```

View file

@ -24,71 +24,60 @@ Vortex uses the SIMT (Single Instruction, Multiple Threads) execution model with
- Control the number of warps to activate during execution
- `WSPAWN` *count, addr*: activate count warps and jump to addr location
- **Control-Flow Divergence**
- Control threads to activate when a branch diverges
- `SPLIT` *predicate*: apply 'taken' predicate thread mask adn save 'not-taken' into IPDOM stack
- `JOIN`: restore 'not-taken' thread mask
- Control threads activation when a branch diverges
- `SPLIT` *taken, predicate*: apply predicate thread mask and save current state into IPDOM stack
- `JOIN`: pop IPDOM stack to restore thread mask
- `PRED` *predicate, restore_mask*: thread predicate instruction
- **Warp Synchronization**
- `BAR` *id, count*: stall warps entering barrier *id* until count is reached
### Vortex Pipeline/Datapath
![Image of Vortex Microarchitecture](./assets/img/vortex_microarchitecture_v2.png)
![Image of Vortex Microarchitecture](./assets/img/vortex_microarchitecture.png)
Vortex has a 5-stage pipeline: FI | ID | Issue | EX | WB.
Vortex has a 6-stage pipeline:
- **Schedule**
- Warp Scheduler
- Schedule the next PC into the pipeline
- Track stalled, active warps
- IPDOM Stack
- Save split/join states for divergent threads
- Inflight Tracker
- Track in-flight instructions
- **Fetch**
- Warp Scheduler
- Track stalled & active warps, resolve branches and barriers, maintain split/join IPDOM stack
- Instruction Cache
- Retrieve instruction from cache, issue I-cache requests/responses
- Retrieve instructions from memory
- Handle I-cache requests/responses
- **Decode**
- Decode fetched instructions, notify warp scheduler when the following instructions are decoded:
- Branch, tmc, split/join, wspawn
- Precompute used_regs mask (needed for Issue stage)
- Decode fetched instructions
- Notify warp scheduler on control instructions
- **Issue**
- Scheduling
- In-order issue (operands/execute unit ready), out-of-order commit
- IBuffer
- Store fetched instructions, separate queues per-warp, selects next warp through round-robin scheduling
- Store decoded instructions in separate per-warp queues
- Scoreboard
- Track in-use registers
- GPRs (General-Purpose Registers) stage
- Fetch issued instruction operands and send operands to execute unit
- Check register use for decoded instructions
- Operands Collector
- Fetch the operands for issued instructions from the register file
- **Execute**
- ALU Unit
- Single-cycle operations (+,-,>>,<<,&,|,^), Branch instructions (Share ALU resources)
- MULDIV Unit
- Multiplier - done in 2 cycles
- Divider - division and remainder, done in 32 cycles
- Implements serial alogrithm (Stalls the pipeline)
- Handle arithmetic and branch operations
- FPU Unit
- Multi-cycle operations, uses `FPnew` Library on ASIC, uses hard DSPs on FPGA
- CSR Unit
- Store constant status registers - device caps, FPU status flags, performance counters
- Handle external CSR requests (requests from host CPU)
- Handle floating-point operations
- LSU Unit
- Handle load/store operations, issue D-cache requests, handle D-cache responses
- Commit load responses - saves storage, Scoreboard tracks completion
- GPGPU Unit
- Handle GPGPU instructions
- TMC, WSPAWN, SPLIT, BAR
- JOIN is handled by Warp Scheduler (upon SPLIT response)
- Handle load/store operations
- SFU Unit
- Handle warp control operations
- Handle Control Status Registers (CSRs) operations
- **Commit**
- Commit
- Update CSR flags, update performance counters
- Writeback
- Write result back to GPRs, notify Scoreboard (release in-use register), select candidate instruction (ALU unit has highest priority)
- **Clustering**
- Group mulitple cores into clusters (optionally share L2 cache)
- Group multiple clusters (optionally share L3 cache)
- Configurable at build time
- Default configuration:
- #Clusters = 1
- #Cores = 4
- #Warps = 4
- #Threads = 4
- **FPGA AFU Interface**
- Manage CPU-GPU comunication
- Query devices caps, load kernel instructions and resource buffers, start kernel execution, read destination buffers
- Local Memory - GPU access to local DRAM
- Reserved I/O addresses - redirect to host CPU, console output
- Write result back to the register file and update the Scoreboard.
### Vortex clustering architecture
- Sockets
- Grouping multiple cores sharing L1 cache
- Clusters
- Grouping of sockets sharing L2 cache
### Vortex Cache Subsystem
More details about the cache subsystem are provided [here](./cache_subsystem.md).

View file

@ -6,13 +6,16 @@
### Cycle-Approximate Simulation
SimX is a C++ cycle-level in-house simulator developed for Vortex. The relevant files are located in the `simX` folder.
SimX is a C++ cycle-level in-house simulator developed for Vortex. The relevant files are located in the `simx` folder. The [readme](README.md) has the most detailed instructions for building and running simX.
- To install on your own system, [follow this document](install_vortex.md).
- For the different Georgia Tech environments Vortex supports, [read this document](environment_setup.md).
### FGPA Simulation
The current target FPGA for simulation is the Arria10 Intel Accelerator Card v1.0. The guide to build the fpga with specific configurations is located [here.](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/FPGA_Startup_Guide.md)
The guide to build the fpga with specific configurations is located [here.](fpga_setup.md) You can find instructions for both Xilinx and Altera based FPGAs.
### How to Test
### How to Test (using `blackbox.sh`)
Running tests under specific drivers (rtlsim,simx,fpga) is done using the script named `blackbox.sh` located in the `ci` folder. Running command `./ci/blackbox.sh --help` from the Vortex root directory will display the following command line arguments for `blackbox.sh`:
@ -20,17 +23,17 @@ Running tests under specific drivers (rtlsim,simx,fpga) is done using the script
- *Cores* - used to specify the number of cores (processing element containing multiple warps) within a configuration.
- *Warps* - used to specify the number of warps (collection of concurrent hardware threads) within a configuration.
- *Threads* - used to specify the number of threads (smallest unit of computation) within a configuration.
- *L2cache* - used to enable the shard l2cache among the Vortex cores.
- *L2cache* - used to enable the shared l2cache among the Vortex cores.
- *L3cache* - used to enable the shared l3cache among the Vortex clusters.
- *Driver* - used to specify which driver to run the Vortex simulation (either rtlsim, vlsim, fpga, or simx).
- *Driver* - used to specify which driver to run the Vortex simulation (either rtlsim, opae, xrt, simx).
- *Debug* - used to enable debug mode for the Vortex simulation.
- *Perf* - used to enable the detailed performance counters within the Vortex simulation.
- *App* - used to specify which test/benchmark to run in the Vortex simulation. The main choices are vecadd, sgemm, basic, demo, and dogfood. Other tests/benchmarks are located in the `/benchmarks/opencl` folder though not all of them work wit the current version of Vortex.
- *Args* - used to pass additional arguments to the application.
Example use of command line arguments: Run the sgemm benchmark using the vlsim driver with a Vortex configuration of 1 cluster, 4 cores, 4 warps, and 4 threads.
Example use of command line arguments: Run the sgemm benchmark using the opae driver with a Vortex configuration of 1 cluster, 4 cores, 4 warps, and 4 threads.
$ ./ci/blackbox.sh --clusters=1 --cores=4 --warps=4 --threads=4 --driver=vlsim --app=sgemm
$ ./ci/blackbox.sh --clusters=1 --cores=4 --warps=4 --threads=4 --driver=opae --app=sgemm
Output from terminal:
```
@ -47,4 +50,20 @@ PERF: core1: instrs=90693, cycles=53108, IPC=1.707709
PERF: core2: instrs=90849, cycles=53107, IPC=1.710678
PERF: core3: instrs=90836, cycles=50347, IPC=1.804199
PERF: instrs=363180, cycles=53108, IPC=6.838518
```
```
## Additional Quick Start Scenarios
Running Vortex simulators with different configurations and drivers is supported. For example:
- Run basic driver test with rtlsim driver and Vortex config of 2 clusters, 2 cores, 2 warps, 4 threads
$ ./ci/blackbox.sh --driver=rtlsim --clusters=2 --cores=2 --warps=2 --threads=4 --app=basic
- Run demo driver test with opae driver and Vortex config of 1 clusters, 4 cores, 4 warps, 2 threads
$ ./ci/blackbox.sh --driver=opae --clusters=1 --cores=4 --warps=4 --threads=2 --app=demo
- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads
$ ./ci/blackbox.sh --driver=simx --clusters=4 --cores=4 --warps=8 --threads=6 --app=dogfood

52
docs/testing.md Normal file
View file

@ -0,0 +1,52 @@
# Testing
## Running a Vortex application
The framework provides a utility script: blackbox.sh under the /ci/ folder for executing applications in the tests tree. It gets copied into the `build` directory with all the environment variables resolved, so you should run it from the `build` directory as follows:
You can query the commandline options of the tool using:
$ ./ci/blackbox.sh --help
To execute sgemm test program on the simx driver and passing "-n10" as argument to sgemm:
$ ./ci/blackbox.sh --driver=simx --app=sgemm --args="-n10"
You can execute the same application of a GPU architecture with 2 cores:
$ ./ci/blackbox.sh --core=2 --driver=simx --app=sgemm --args="-n10"
When excuting, Blackbox needs to recompile the driver if the desired architecture changes.
It tracks the latest configuration in a file under the current directory blackbox.<driver>.cache.
To avoid having to rebuild the driver all the time, Blackbox checks if the latest cached configuration matches the current.
## Running Benchmarks
The Vortex test suite is located under the /test/ folder
You can execute the default regression suite by running the following commands at the root folder.
$ make -C tests/regression run-simx
$ make -C tests/regression run-rtlsim
You can execute the default opncl suite by running the following commands at the root folder.
$ make -C tests/opencl run-simx
$ make -C tests/opencl run-rtlsim
## Creating Your Own Regression Test
Inside `tests/regression` you will find a series of folders which are named based on what they test.
You can view the tests to see which ones have tests similar to what you are trying to create new tests for.
Once you have found a similar baseline, you can copy the folder and rename it to what you are planning to test.
A regression test typically implements the following files:
- ***kernel.cpp*** contains the GPU kernel code.
- ***main.cpp*** contains the host CPU code.
- ***Makefile*** defines the compiler build commands for the CPU and GPU binaries.
Sync your build folder: `$ ../configure`
Compile your test: `$ make -C tests/regression/<test-name>`
Run your test: `$ ./ci/blackbox.sh --driver=simx --app=<test-name> --debug`
## Adding Your Tests to the CI Pipeline
If you are a contributor, then you will need to add tests that integrate into the continuous integration pipeline. Remember, Pull Requests cannot be merged unless new code has tests and existing tests do not regress. Furthermore, if you are contributing a new feature, it is recommended that you add the ability to enable / disable the new feature that you are adding. See more at [contributing.md](contributing.md) and [continuous_integration.md](continuous_integration.md).

View file

@ -1,29 +0,0 @@
all: stub rtlsim simx vlsim
stub:
$(MAKE) -C stub
fpga:
$(MAKE) -C fpga
asesim:
$(MAKE) -C asesim
vlsim:
$(MAKE) -C vlsim
rtlsim:
$(MAKE) -C rtlsim
simx:
$(MAKE) -C simx
clean:
$(MAKE) clean -C stub
$(MAKE) clean -C fpga
$(MAKE) clean -C asesim
$(MAKE) clean -C vlsim
$(MAKE) clean -C rtlsim
$(MAKE) clean -C simx
.PHONY: all stub fpga asesim vlsim rtlsim simx clean

View file

@ -1,73 +0,0 @@
OPAE_HOME ?= /tools/opae/1.4.0
RTL_DIR=../../hw/rtl
SCRIPT_DIR=../../hw/scripts
OPAE_SYN_DIR=../../hw/syn/opae
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I. -I../include -I../../hw -I$(OPAE_HOME)/include -I$(OPAE_SYN_DIR)
LDFLAGS += -L$(OPAE_HOME)/lib -luuid -lopae-c-ase
# stack execution protection
LDFLAGS +=-z noexecstack
# data relocation and projection
LDFLAGS +=-z relro -z now
# stack buffer overrun detection
CXXFLAGS +=-fstack-protector
# Position independent code
CXXFLAGS += -fPIC
# Add external configuration
CXXFLAGS += $(CONFIGS)
# Dump perf stats
CXXFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared
PROJECT = libvortex.so
SRCS = ../common/opae.cpp ../common/vx_utils.cpp
# Debugigng
ifdef DEBUG
CXXFLAGS += -g -O0
else
CXXFLAGS += -O2 -DNDEBUG
endif
# Enable scope analyzer
ifdef SCOPE
CXXFLAGS += -DSCOPE
SRCS += ../common/vx_scope.cpp
SCOPE_H = scope-defs.h
endif
# Enable perf counters
ifdef PERF
CXXFLAGS += -DPERF_ENABLE
endif
all: $(PROJECT)
$(OPAE_SYN_DIR)/vortex_afu.h:
$(MAKE) -C $(OPAE_SYN_DIR) vortex_afu.h
scope-defs.h: $(SCRIPT_DIR)/scope.json
$(SCRIPT_DIR)/scope.py $(CONFIGS) -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
# generate scope data
scope: scope-defs.h
$(PROJECT): $(SRCS) $(OPAE_SYN_DIR)/vortex_afu.h $(SCOPE_H)
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) -o $(PROJECT)
clean:
rm -rf $(PROJECT) *.o scope-defs.h

View file

@ -1,535 +0,0 @@
#include <stdint.h>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <cstdlib>
#include <unistd.h>
#include <assert.h>
#include <cmath>
#include <sstream>
#include <unordered_map>
#include <list>
#if defined(USE_FPGA) || defined(USE_ASE)
#include <opae/fpga.h>
#include <uuid/uuid.h>
#elif defined(USE_VLSIM)
#include <fpga.h>
#endif
#include "vx_utils.h"
#include "vx_malloc.h"
#include <vortex.h>
#include <VX_config.h>
#include "vortex_afu.h"
#ifdef SCOPE
#include "vx_scope.h"
#endif
#define CHECK_RES(_expr) \
do { \
fpga_result res = _expr; \
if (res == FPGA_OK) \
break; \
printf("[VXDRV] Error: '%s' returned %d, %s!\n", \
#_expr, (int)res, fpgaErrStr(res)); \
return -1; \
} while (false)
///////////////////////////////////////////////////////////////////////////////
#define CMD_MEM_READ AFU_IMAGE_CMD_MEM_READ
#define CMD_MEM_WRITE AFU_IMAGE_CMD_MEM_WRITE
#define CMD_RUN AFU_IMAGE_CMD_RUN
#define MMIO_CMD_TYPE (AFU_IMAGE_MMIO_CMD_TYPE * 4)
#define MMIO_IO_ADDR (AFU_IMAGE_MMIO_IO_ADDR * 4)
#define MMIO_MEM_ADDR (AFU_IMAGE_MMIO_MEM_ADDR * 4)
#define MMIO_DATA_SIZE (AFU_IMAGE_MMIO_DATA_SIZE * 4)
#define MMIO_DEV_CAPS (AFU_IMAGE_MMIO_DEV_CAPS * 4)
#define MMIO_STATUS (AFU_IMAGE_MMIO_STATUS * 4)
#define STATUS_STATE_BITS 8
///////////////////////////////////////////////////////////////////////////////
class vx_device {
public:
vx_device()
: mem_allocator(
ALLOC_BASE_ADDR,
ALLOC_BASE_ADDR + LOCAL_MEM_SIZE,
4096,
CACHE_BLOCK_SIZE)
{}
~vx_device() {}
fpga_handle fpga;
vortex::MemoryAllocator mem_allocator;
unsigned version;
unsigned num_cores;
unsigned num_warps;
unsigned num_threads;
};
typedef struct vx_buffer_ {
uint64_t wsid;
void* host_ptr;
uint64_t io_addr;
vx_device_h hdevice;
uint64_t size;
} vx_buffer_t;
///////////////////////////////////////////////////////////////////////////////
#ifdef DUMP_PERF_STATS
class AutoPerfDump {
private:
std::list<vx_device_h> devices_;
public:
AutoPerfDump() {}
~AutoPerfDump() {
for (auto device : devices_) {
vx_dump_perf(device, stdout);
}
}
void add_device(vx_device_h device) {
devices_.push_back(device);
}
void remove_device(vx_device_h device) {
devices_.remove(device);
}
};
AutoPerfDump gAutoPerfDump;
#endif
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
switch (caps_id) {
case VX_CAPS_VERSION:
*value = device->version;
break;
case VX_CAPS_MAX_CORES:
*value = device->num_cores;
break;
case VX_CAPS_MAX_WARPS:
*value = device->num_warps;
break;
case VX_CAPS_MAX_THREADS:
*value = device->num_threads;
break;
case VX_CAPS_CACHE_LINE_SIZE:
*value = CACHE_BLOCK_SIZE;
break;
case VX_CAPS_LOCAL_MEM_SIZE:
*value = LOCAL_MEM_SIZE;
break;
case VX_CAPS_ALLOC_BASE_ADDR:
*value = ALLOC_BASE_ADDR;
break;
case VX_CAPS_KERNEL_BASE_ADDR:
*value = STARTUP_ADDR;
break;
default:
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
std::abort();
return -1;
}
return 0;
}
extern int vx_dev_open(vx_device_h* hdevice) {
if (nullptr == hdevice)
return -1;
fpga_handle accel_handle;
vx_device* device;
#ifndef USE_VLSIM
fpga_result res;
fpga_token accel_token;
fpga_properties filter = nullptr;
fpga_guid guid;
uint32_t num_matches;
// Set up a filter that will search for an accelerator
CHECK_RES(fpgaGetProperties(nullptr, &filter));
res = fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
if (res != FPGA_OK) {
fprintf(stderr, "[VXDRV] Error: fpgaGetProperties() returned %d, %s!\n", (int)res, fpgaErrStr(res));
fpgaDestroyProperties(&filter);
return -1;
}
// Add the desired UUID to the filter
uuid_parse(AFU_ACCEL_UUID, guid);
res = fpgaPropertiesSetGUID(filter, guid);
if (res != FPGA_OK) {
fprintf(stderr, "[VXDRV] Error: fpgaPropertiesSetGUID() returned %d, %s!\n", (int)res, fpgaErrStr(res));
fpgaDestroyProperties(&filter);
return -1;
}
// Do the search across the available FPGA contexts
num_matches = 1;
res = fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches);
if (res != FPGA_OK) {
fprintf(stderr, "[VXDRV] Error: fpgaEnumerate() returned %d, %s!\n", (int)res, fpgaErrStr(res));
fpgaDestroyProperties(&filter);
return -1;
}
// Not needed anymore
fpgaDestroyProperties(&filter);
if (num_matches < 1) {
fprintf(stderr, "[VXDRV] Error: accelerator %s not found!\n", AFU_ACCEL_UUID);
fpgaDestroyToken(&accel_token);
return -1;
}
// Open accelerator
res = fpgaOpen(accel_token, &accel_handle, 0);
if (res != FPGA_OK) {
fprintf(stderr, "[VXDRV] Error: fpgaOpen() returned %d, %s!\n", (int)res, fpgaErrStr(res));
fpgaDestroyToken(&accel_token);
return -1;
}
// Done with token
fpgaDestroyToken(&accel_token);
#else
// Open accelerator
CHECK_RES(fpgaOpen(NULL, &accel_handle, 0));
#endif
// allocate device object
device = new vx_device();
if (nullptr == device) {
fpgaClose(accel_handle);
return -1;
}
device->fpga = accel_handle;
{
// Load device CAPS
uint64_t dev_caps;
int ret = fpgaReadMMIO64(device->fpga, 0, MMIO_DEV_CAPS, &dev_caps);
if (ret != FPGA_OK) {
fpgaClose(accel_handle);
return ret;
}
device->version = (dev_caps >> 0) & 0xffff;
device->num_cores = (dev_caps >> 16) & 0xffff;
device->num_warps = (dev_caps >> 32) & 0xffff;
device->num_threads = (dev_caps >> 48) & 0xffff;
#ifndef NDEBUG
fprintf(stdout, "[VXDRV] DEVCAPS: version=%d, num_cores=%d, num_warps=%d, num_threads=%d\n",
device->version, device->num_cores, device->num_warps, device->num_threads);
#endif
}
#ifdef SCOPE
{
int ret = vx_scope_start(accel_handle, 0, -1);
if (ret != 0) {
fpgaClose(accel_handle);
return ret;
}
}
#endif
*hdevice = device;
#ifdef DUMP_PERF_STATS
gAutoPerfDump.add_device(*hdevice);
#endif
return 0;
}
extern int vx_dev_close(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
#ifdef SCOPE
vx_scope_stop(device->fpga);
#endif
#ifdef DUMP_PERF_STATS
gAutoPerfDump.remove_device(hdevice);
vx_dump_perf(hdevice, stdout);
#endif
fpgaClose(device->fpga);
delete device;
return 0;
}
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->mem_allocator.allocate(size, dev_maddr);
}
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_maddr) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->mem_allocator.release(dev_maddr);
}
extern int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
fpga_result res;
void* host_ptr;
uint64_t wsid;
uint64_t io_addr;
vx_buffer_t* buffer;
if (nullptr == hdevice
|| 0 >= size
|| nullptr == hbuffer)
return -1;
vx_device *device = ((vx_device*)hdevice);
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
res = fpgaPrepareBuffer(device->fpga, asize, &host_ptr, &wsid, 0);
if (FPGA_OK != res) {
return -1;
}
// Get the physical address of the buffer in the accelerator
res = fpgaGetIOAddress(device->fpga, wsid, &io_addr);
if (FPGA_OK != res) {
fpgaReleaseBuffer(device->fpga, wsid);
return -1;
}
// allocate buffer object
buffer = (vx_buffer_t*)malloc(sizeof(vx_buffer_t));
if (nullptr == buffer) {
fpgaReleaseBuffer(device->fpga, wsid);
return -1;
}
buffer->wsid = wsid;
buffer->host_ptr = host_ptr;
buffer->io_addr = io_addr;
buffer->hdevice = hdevice;
buffer->size = asize;
*hbuffer = buffer;
return 0;
}
extern void* vx_host_ptr(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return nullptr;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
return buffer->host_ptr;
}
extern int vx_buf_free(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return -1;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
vx_device *device = ((vx_device*)buffer->hdevice);
fpgaReleaseBuffer(device->fpga, buffer->wsid);
free(buffer);
return 0;
}
extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
if (nullptr == hdevice)
return -1;
std::unordered_map<uint32_t, std::stringstream> print_bufs;
vx_device *device = ((vx_device*)hdevice);
struct timespec sleep_time;
#if defined(USE_ASE)
sleep_time.tv_sec = 1;
sleep_time.tv_nsec = 0;
#else
sleep_time.tv_sec = 0;
sleep_time.tv_nsec = 1000000;
#endif
// to milliseconds
uint64_t sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
for (;;) {
uint64_t status;
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &status));
// check for console data
uint32_t cout_data = status >> STATUS_STATE_BITS;
if (cout_data & 0x1) {
// retrieve console data
do {
char cout_char = (cout_data >> 1) & 0xff;
uint32_t cout_tid = (cout_data >> 9) & 0xff;
auto& ss_buf = print_bufs[cout_tid];
ss_buf << cout_char;
if (cout_char == '\n') {
std::cout << std::dec << "#" << cout_tid << ": " << ss_buf.str() << std::flush;
ss_buf.str("");
}
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &status));
cout_data = status >> STATUS_STATE_BITS;
} while (cout_data & 0x1);
}
uint32_t state = status & ((1 << STATUS_STATE_BITS)-1);
if (0 == state || 0 == timeout) {
for (auto& buf : print_bufs) {
auto str = buf.second.str();
if (!str.empty()) {
std::cout << "#" << buf.first << ": " << str << std::endl;
}
}
if (state != 0) {
fprintf(stdout, "[VXDRV] ready-wait timed out: state=%d\n", state);
}
break;
}
nanosleep(&sleep_time, nullptr);
timeout -= sleep_time_ms;
};
return 0;
}
extern int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t src_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
vx_device *device = ((vx_device*)buffer->hdevice);
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
// check alignment
if (!is_aligned(dev_maddr, CACHE_BLOCK_SIZE))
return -1;
if (!is_aligned(buffer->io_addr + src_offset, CACHE_BLOCK_SIZE))
return -1;
// bound checking
if (src_offset + asize > buffer->size)
return -1;
if (dev_maddr + asize > dev_mem_size)
return -1;
// Ensure ready for new command
if (vx_ready_wait(buffer->hdevice, MAX_TIMEOUT) != 0)
return -1;
auto ls_shift = (int)std::log2(CACHE_BLOCK_SIZE);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_IO_ADDR, (buffer->io_addr + src_offset) >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_MEM_ADDR, dev_maddr >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_DATA_SIZE, asize >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_WRITE));
// Wait for the write operation to finish
if (vx_ready_wait(buffer->hdevice, MAX_TIMEOUT) != 0)
return -1;
return 0;
}
extern int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t dest_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
vx_device *device = ((vx_device*)buffer->hdevice);
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
// check alignment
if (!is_aligned(dev_maddr, CACHE_BLOCK_SIZE))
return -1;
if (!is_aligned(buffer->io_addr + dest_offset, CACHE_BLOCK_SIZE))
return -1;
// bound checking
if (dest_offset + asize > buffer->size)
return -1;
if (dev_maddr + asize > dev_mem_size)
return -1;
// Ensure ready for new command
if (vx_ready_wait(buffer->hdevice, MAX_TIMEOUT) != 0)
return -1;
auto ls_shift = (int)std::log2(CACHE_BLOCK_SIZE);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_IO_ADDR, (buffer->io_addr + dest_offset) >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_MEM_ADDR, dev_maddr >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_DATA_SIZE, asize >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_READ));
// Wait for the write operation to finish
if (vx_ready_wait(buffer->hdevice, MAX_TIMEOUT) != 0)
return -1;
return 0;
}
extern int vx_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
// Ensure ready for new command
if (vx_ready_wait(hdevice, MAX_TIMEOUT) != 0)
return -1;
// start execution
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_RUN));
return 0;
}

View file

@ -1,406 +0,0 @@
#pragma once
#include <cstdint>
#include <assert.h>
namespace vortex {
class MemoryAllocator {
public:
MemoryAllocator(
uint64_t minAddress,
uint64_t maxAddress,
uint32_t pageAlign,
uint32_t blockAlign)
: nextAddress_(minAddress)
, maxAddress_(maxAddress)
, pageAlign_(pageAlign)
, blockAlign_(blockAlign)
, pages_(nullptr)
{}
~MemoryAllocator() {
// Free allocated pages
page_t* currPage = pages_;
while (currPage) {
auto nextPage = currPage->next;
this->DeletePage(currPage);
currPage = nextPage;
}
}
int allocate(uint64_t size, uint64_t* addr) {
if (size == 0 || addr == nullptr)
return -1;
// Align allocation size
size = AlignSize(size, blockAlign_);
// Walk thru all pages to find a free block
block_t* freeBlock = nullptr;
auto currPage = pages_;
while (currPage) {
auto currBlock = currPage->freeSList;
if (currBlock) {
// The free S-list is already sorted with the largest block first
// Quick check if the head block has enough space.
if (currBlock->size >= size) {
// Find the smallest matching block in the S-list
while (currBlock->nextFreeS
&& (currBlock->nextFreeS->size >= size)) {
currBlock = currBlock->nextFreeS;
}
// Return the free block
freeBlock = currBlock;
break;
}
}
currPage = currPage->next;
}
if (nullptr == freeBlock) {
// Allocate a new page for this request
currPage = this->NewPage(size);
if (nullptr == currPage)
return -1;
freeBlock = currPage->freeSList;
}
// Remove the block from the free lists
assert(freeBlock->size >= size);
currPage->RemoveFreeMBlock(freeBlock);
currPage->RemoveFreeSBlock(freeBlock);
// If the free block we have found is larger than what we are looking for,
// we may be able to split our free block in two.
uint64_t extraBytes = freeBlock->size - size;
if (extraBytes >= blockAlign_) {
// Reduce the free block size to the requested value
freeBlock->size = size;
// Allocate a new block to contain the extra buffer
auto nextAddr = freeBlock->addr + size;
auto newBlock = new block_t(nextAddr, extraBytes);
// Add the new block to the free lists
currPage->InsertFreeMBlock(newBlock);
currPage->InsertFreeSBlock(newBlock);
}
// Insert the free block into the used list
currPage->InsertUsedBlock(freeBlock);
// Return the free block address
*addr = freeBlock->addr;
return 0;
}
int release(uint64_t addr) {
// Walk all pages to find the pointer
block_t* usedBlock = nullptr;
auto currPage = pages_;
while (currPage) {
if (addr >= currPage->addr
&& addr < (currPage->addr + currPage->size)) {
auto currBlock = currPage->usedList;
while (currBlock) {
if (currBlock->addr == addr) {
usedBlock = currBlock;
break;
}
currBlock = currBlock->nextUsed;
}
break;
}
currPage = currPage->next;
}
// found the corresponding block?
if (nullptr == usedBlock)
return -1;
// Remove the block from the used list
currPage->RemoveUsedBlock(usedBlock);
// Insert the block into the free M-list.
currPage->InsertFreeMBlock(usedBlock);
// Check if we can merge adjacent free blocks from the left.
if (usedBlock->prevFreeM) {
// Calculate the previous address
auto prevAddr = usedBlock->prevFreeM->addr + usedBlock->prevFreeM->size;
if (usedBlock->addr == prevAddr) {
auto prevBlock = usedBlock->prevFreeM;
// Merge the blocks to the left
prevBlock->size += usedBlock->size;
prevBlock->nextFreeM = usedBlock->nextFreeM;
if (prevBlock->nextFreeM) {
prevBlock->nextFreeM->prevFreeM = prevBlock;
}
// Detach previous block from the free S-list since size increased
currPage->RemoveFreeSBlock(prevBlock);
// reset usedBlock
delete usedBlock;
usedBlock = prevBlock;
}
}
// Check if we can merge adjacent free blocks from the right.
if (usedBlock->nextFreeM) {
// Calculate the next allocation start address
auto nextAddr = usedBlock->addr + usedBlock->size;
if (usedBlock->nextFreeM->addr == nextAddr) {
auto nextBlock = usedBlock->nextFreeM;
// Merge the blocks to the right
usedBlock->size += nextBlock->size;
usedBlock->nextFreeM = nextBlock->nextFreeM;
if (usedBlock->nextFreeM) {
usedBlock->nextFreeM->prevFreeM = usedBlock;
}
// Delete next block
currPage->RemoveFreeSBlock(nextBlock);
delete nextBlock;
}
}
// Insert the block into the free S-list.
currPage->InsertFreeSBlock(usedBlock);
// Check if we can free empty pages
if (nullptr == currPage->usedList) {
// Try to delete the page
while (currPage && this->DeletePage(currPage)) {
currPage = this->NextEmptyPage();
}
}
return 0;
}
private:
struct block_t {
block_t* nextFreeS;
block_t* prevFreeS;
block_t* nextFreeM;
block_t* prevFreeM;
block_t* nextUsed;
block_t* prevUsed;
uint64_t addr;
uint64_t size;
block_t(uint64_t addr, uint64_t size)
: nextFreeS(nullptr)
, prevFreeS(nullptr)
, nextFreeM(nullptr)
, prevFreeM(nullptr)
, nextUsed(nullptr)
, prevUsed(nullptr)
, addr(addr)
, size(size)
{}
};
struct page_t {
page_t* next;
// List of used blocks
block_t* usedList;
// List with blocks sorted by descreasing sizes
// Used for block lookup during memory allocation.
block_t* freeSList;
// List with blocks sorted by increasing memory addresses
// Used for block merging during memory release.
block_t* freeMList;
uint64_t addr;
uint64_t size;
page_t(uint64_t addr, uint64_t size) :
next(nullptr),
usedList(nullptr),
addr(addr),
size(size) {
freeSList = freeMList = new block_t(addr, size);
}
void InsertUsedBlock(block_t* block) {
block->nextUsed = usedList;
if (usedList) {
usedList->prevUsed = block;
}
usedList = block;
}
void RemoveUsedBlock(block_t* block) {
if (block->prevUsed) {
block->prevUsed->nextUsed = block->nextUsed;
} else {
usedList = block->nextUsed;
}
if (block->nextUsed) {
block->nextUsed->prevUsed = block->prevUsed;
}
block->nextUsed = nullptr;
block->prevUsed = nullptr;
}
void InsertFreeMBlock(block_t* block) {
block_t* currBlock = freeMList;
block_t* prevBlock = nullptr;
while (currBlock && (currBlock->addr < block->addr)) {
prevBlock = currBlock;
currBlock = currBlock->nextFreeM;
}
block->nextFreeM = currBlock;
block->prevFreeM = prevBlock;
if (prevBlock) {
prevBlock->nextFreeM = block;
} else {
freeMList = block;
}
if (currBlock) {
currBlock->prevFreeM = block;
}
}
void RemoveFreeMBlock(block_t* block) {
if (block->prevFreeM) {
block->prevFreeM->nextFreeM = block->nextFreeM;
} else {
freeMList = block->nextFreeM;
}
if (block->nextFreeM) {
block->nextFreeM->prevFreeM = block->prevFreeM;
}
block->nextFreeM = nullptr;
block->prevFreeM = nullptr;
}
void InsertFreeSBlock(block_t* block) {
block_t* currBlock = this->freeSList;
block_t* prevBlock = nullptr;
while (currBlock && (currBlock->size > block->size)) {
prevBlock = currBlock;
currBlock = currBlock->nextFreeS;
}
block->nextFreeS = currBlock;
block->prevFreeS = prevBlock;
if (prevBlock) {
prevBlock->nextFreeS = block;
} else {
this->freeSList = block;
}
if (currBlock) {
currBlock->prevFreeS = block;
}
}
void RemoveFreeSBlock(block_t* block) {
if (block->prevFreeS) {
block->prevFreeS->nextFreeS = block->nextFreeS;
} else {
freeSList = block->nextFreeS;
}
if (block->nextFreeS) {
block->nextFreeS->prevFreeS = block->prevFreeS;
}
block->nextFreeS = nullptr;
block->prevFreeS = nullptr;
}
};
page_t* NewPage(uint64_t size) {
// Increase buffer size to include the page and first block size
// also add padding to ensure page aligment
size = AlignSize(size, pageAlign_);
// Allocate page memory
auto addr = nextAddress_;
nextAddress_ += size;
// Overflow check
if (nextAddress_ > maxAddress_)
return nullptr;
// Allocate object
auto newPage = new page_t(addr, size);
// Insert the new page into the list
newPage->next = pages_;
pages_ = newPage;
return newPage;
}
bool DeletePage(page_t* page) {
// The page should be empty
assert(nullptr == page->usedList);
assert(page->freeMList && (nullptr == page->freeMList->nextFreeM));
// Only delete top-level pages
auto nextAddr = page->addr + page->size;
if (nextAddr != nextAddress_)
return false;
// Remove the page from the list
page_t* prevPage = nullptr;
auto currPage = pages_;
while (currPage) {
if (currPage == page) {
if (prevPage) {
prevPage->next = currPage->next;
} else {
pages_ = currPage->next;
}
break;
}
prevPage = currPage;
currPage = currPage->next;
}
// Update next allocation address
nextAddress_ = page->addr;
// free object
delete page->freeMList;
delete page;
return true;
}
page_t* NextEmptyPage() {
auto currPage = pages_;
while (currPage) {
if (nullptr == currPage->usedList)
return currPage;
currPage = currPage->next;
}
return nullptr;
}
static uint64_t AlignSize(uint64_t size, uint64_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return (size + alignment - 1) & ~(alignment - 1);
}
uint64_t nextAddress_;
uint64_t maxAddress_;
uint32_t pageAlign_;
uint32_t blockAlign_;
page_t* pages_;
};
} // namespace vortex

View file

@ -1,250 +0,0 @@
#include "vx_scope.h"
#include <iostream>
#include <fstream>
#include <thread>
#include <chrono>
#include <vector>
#include <assert.h>
#include <chrono>
#include <thread>
#include <mutex>
#include <VX_config.h>
#include <vortex_afu.h>
#include <scope-defs.h>
#define FRAME_FLUSH_SIZE 100
#define CHECK_RES(_expr) \
do { \
fpga_result res = _expr; \
if (res == FPGA_OK) \
break; \
printf("OPAE Error: '%s' returned %d, %s!\n", \
#_expr, (int)res, fpgaErrStr(res)); \
return -1; \
} while (false)
#define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4)
#define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4)
#define CMD_GET_VALID 0
#define CMD_GET_DATA 1
#define CMD_GET_WIDTH 2
#define CMD_GET_COUNT 3
#define CMD_SET_START 4
#define CMD_SET_STOP 5
#define CMD_GET_OFFSET 6
static constexpr int num_modules = sizeof(scope_modules) / sizeof(scope_module_t);
static constexpr int num_taps = sizeof(scope_taps) / sizeof(scope_tap_t);
constexpr int calcFrameWidth(int index = 0) {
return (index < num_taps) ? (scope_taps[index].width + calcFrameWidth(index + 1)) : 0;
}
static constexpr int fwidth = calcFrameWidth();
#ifdef HANG_TIMEOUT
static std::thread g_timeout_thread;
static std::mutex g_timeout_mutex;
static void timeout_callback(fpga_handle fpga) {
std::this_thread::sleep_for(std::chrono::seconds{HANG_TIMEOUT});
vx_scope_stop(fpga);
fpgaClose(fpga);
exit(0);
}
#endif
uint64_t print_clock(std::ofstream& ofs, uint64_t delta, uint64_t timestamp) {
while (delta != 0) {
ofs << '#' << timestamp++ << std::endl;
ofs << "b0 0" << std::endl;
ofs << '#' << timestamp++ << std::endl;
ofs << "b1 0" << std::endl;
--delta;
}
return timestamp;
}
void dump_taps(std::ofstream& ofs, int module) {
for (int i = 0; i < num_taps; ++i) {
auto& tap = scope_taps[i];
if (tap.module != module)
continue;
ofs << "$var reg " << tap.width << " " << (i + 1) << " " << tap.name << " $end" << std::endl;
}
}
void dump_module(std::ofstream& ofs, int parent) {
for (auto& module : scope_modules) {
if (module.parent != parent)
continue;
if (module.name[0] == '*') {
ofs << "$var reg 1 0 clk $end" << std::endl;
} else {
ofs << "$scope module " << module.name << " $end" << std::endl;
}
dump_module(ofs, module.index);
dump_taps(ofs, module.index);
if (module.name[0] != '*') {
ofs << "$upscope $end" << std::endl;
}
}
}
int vx_scope_start(fpga_handle hfpga, uint64_t start_time, uint64_t stop_time) {
if (nullptr == hfpga)
return -1;
if (stop_time != uint64_t(-1)) {
// set stop time
uint64_t cmd_stop = ((stop_time << 3) | CMD_SET_STOP);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_stop));
std::cout << "scope stop time: " << std::dec << stop_time << "s" << std::endl;
}
// start recording
uint64_t cmd_delay = ((start_time << 3) | CMD_SET_START);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_delay));
std::cout << "scope start time: " << std::dec << start_time << "s" << std::endl;
#ifdef HANG_TIMEOUT
g_timeout_thread = std::thread(timeout_callback, hfpga);
g_timeout_thread.detach();
#endif
return 0;
}
int vx_scope_stop(fpga_handle hfpga) {
#ifdef HANG_TIMEOUT
if (!g_timeout_mutex.try_lock())
return 0;
#endif
if (nullptr == hfpga)
return -1;
// forced stop
uint64_t cmd_stop = ((0 << 3) | CMD_SET_STOP);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_stop));
std::cout << "scope trace dump begin..." << std::endl;
std::ofstream ofs("trace.vcd");
ofs << "$version Generated by Vortex Scope $end" << std::endl;
ofs << "$timescale 1 ns $end" << std::endl;
ofs << "$scope module TOP $end" << std::endl;
dump_module(ofs, -1);
dump_taps(ofs, -1);
ofs << "$upscope $end" << std::endl;
ofs << "enddefinitions $end" << std::endl;
uint64_t frame_width, max_frames, data_valid, offset, delta;
uint64_t timestamp = 0;
uint64_t frame_offset = 0;
uint64_t frame_no = 0;
int signal_id = 0;
int signal_offset = 0;
// wait for recording to terminate
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID));
do {
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
if (data_valid)
break;
std::this_thread::sleep_for(std::chrono::seconds(1));
} while (true);
// get frame width
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_WIDTH));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &frame_width));
std::cout << "scope::frame_width=" << std::dec << frame_width << std::endl;
if (fwidth != (int)frame_width) {
std::cerr << "invalid frame_width: expecting " << std::dec << fwidth << "!" << std::endl;
std::abort();
}
// get max frames
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_COUNT));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &max_frames));
std::cout << "scope::max_frames=" << std::dec << max_frames << std::endl;
// get offset
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_OFFSET));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &offset));
// get data
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_DATA));
// print clock header
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta));
timestamp = print_clock(ofs, offset + delta + 2, timestamp);
signal_id = num_taps;
std::vector<char> signal_data(frame_width+1);
do {
if (frame_no == (max_frames-1)) {
// verify last frame is valid
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
assert(data_valid == 1);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_DATA));
}
// read next data words
uint64_t word;
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &word));
do {
int signal_width = scope_taps[signal_id-1].width;
int word_offset = frame_offset % 64;
signal_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0';
++signal_offset;
++frame_offset;
if (signal_offset == signal_width) {
signal_data[signal_width] = 0; // string null termination
ofs << 'b' << signal_data.data() << ' ' << signal_id << std::endl;
signal_offset = 0;
--signal_id;
}
if (frame_offset == frame_width) {
assert(0 == signal_offset);
frame_offset = 0;
++frame_no;
if (frame_no != max_frames) {
// print clock header
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta));
timestamp = print_clock(ofs, delta + 1, timestamp);
signal_id = num_taps;
if (0 == (frame_no % FRAME_FLUSH_SIZE)) {
ofs << std::flush;
std::cout << "*** " << frame_no << "/" << max_frames << " frames" << std::endl;
}
}
}
} while ((frame_offset % 64) != 0);
} while (frame_no != max_frames);
std::cout << "scope trace dump done! - " << (timestamp/2) << " cycles" << std::endl;
// verify data not valid
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
assert(data_valid == 0);
return 0;
}

View file

@ -1,19 +0,0 @@
#pragma once
#include <stdint.h>
#ifdef USE_VLSIM
#include <fpga.h>
#else
#include <opae/fpga.h>
#endif
#if defined(USE_FPGA)
#define HANG_TIMEOUT 60
#else
#define HANG_TIMEOUT (30*60)
#endif
int vx_scope_start(fpga_handle hfpga, uint64_t start_time = 0, uint64_t stop_time = -1);
int vx_scope_stop(fpga_handle hfpga);

View file

@ -1,356 +0,0 @@
#include "vx_utils.h"
#include <iostream>
#include <fstream>
#include <cstring>
#include <vortex.h>
#include <VX_config.h>
#include <assert.h>
uint64_t aligned_size(uint64_t size, uint64_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return (size + alignment - 1) & ~(alignment - 1);
}
bool is_aligned(uint64_t addr, uint64_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return 0 == (addr & (alignment - 1));
}
extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, uint64_t size) {
int err = 0;
if (NULL == content || 0 == size)
return -1;
uint32_t buffer_transfer_size = 65536; // 64 KB
uint64_t kernel_base_addr;
err = vx_dev_caps(device, VX_CAPS_KERNEL_BASE_ADDR, &kernel_base_addr);
if (err != 0)
return -1;
// allocate device buffer
vx_buffer_h buffer;
err = vx_buf_alloc(device, buffer_transfer_size, &buffer);
if (err != 0)
return -1;
// get buffer address
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
//
// upload content
//
uint64_t offset = 0;
while (offset < size) {
auto chunk_size = std::min<uint64_t>(buffer_transfer_size, size - offset);
std::memcpy(buf_ptr, (uint8_t*)content + offset, chunk_size);
/*printf("*** Upload Kernel to 0x%0x: data=", kernel_base_addr + offset);
for (int i = 0, n = ((chunk_size+7)/8); i < n; ++i) {
printf("%08x", ((uint64_t*)((uint8_t*)content + offset))[n-1-i]);
}
printf("\n");*/
err = vx_copy_to_dev(buffer, kernel_base_addr + offset, chunk_size, 0);
if (err != 0) {
vx_buf_free(buffer);
return err;
}
offset += chunk_size;
}
vx_buf_free(buffer);
return 0;
}
extern int vx_upload_kernel_file(vx_device_h device, const char* filename) {
std::ifstream ifs(filename);
if (!ifs) {
std::cout << "error: " << filename << " not found" << std::endl;
return -1;
}
// read file content
ifs.seekg(0, ifs.end);
auto size = ifs.tellg();
auto content = new char [size];
ifs.seekg(0, ifs.beg);
ifs.read(content, size);
// upload
int err = vx_upload_kernel_bytes(device, content, size);
// release buffer
delete[] content;
return err;
}
/*static uint32_t get_csr_32(const uint32_t* buffer, int addr) {
uint32_t value_lo = buffer[addr - CSR_MPM_BASE];
return value_lo;
}*/
static uint64_t get_csr_64(const uint32_t* buffer, int addr) {
uint32_t value_lo = buffer[addr - CSR_MPM_BASE];
uint32_t value_hi = buffer[addr - CSR_MPM_BASE + 32];
return (uint64_t(value_hi) << 32) | value_lo;
}
extern int vx_dump_perf(vx_device_h device, FILE* stream) {
int ret = 0;
uint64_t instrs = 0;
uint64_t cycles = 0;
#ifdef PERF_ENABLE
// PERF: pipeline stalls
uint64_t ibuffer_stalls = 0;
uint64_t scoreboard_stalls = 0;
uint64_t lsu_stalls = 0;
uint64_t fpu_stalls = 0;
uint64_t csr_stalls = 0;
uint64_t alu_stalls = 0;
uint64_t gpu_stalls = 0;
// PERF: decode
uint64_t loads = 0;
uint64_t stores = 0;
uint64_t branches = 0;
// PERF: Icache
uint64_t icache_reads = 0;
uint64_t icache_read_misses = 0;
// PERF: Dcache
uint64_t dcache_reads = 0;
uint64_t dcache_writes = 0;
uint64_t dcache_read_misses = 0;
uint64_t dcache_write_misses = 0;
uint64_t dcache_bank_stalls = 0;
uint64_t dcache_mshr_stalls = 0;
// PERF: shared memory
uint64_t smem_reads = 0;
uint64_t smem_writes = 0;
uint64_t smem_bank_stalls = 0;
// PERF: memory
uint64_t mem_reads = 0;
uint64_t mem_writes = 0;
uint64_t mem_lat = 0;
#ifdef EXT_TEX_ENABLE
// PERF: texunit
uint64_t tex_mem_reads = 0;
uint64_t tex_mem_lat = 0;
#endif
#endif
uint64_t num_cores;
ret = vx_dev_caps(device, VX_CAPS_MAX_CORES, &num_cores);
if (ret != 0)
return ret;
vx_buffer_h staging_buf;
ret = vx_buf_alloc(device, 64 * sizeof(uint32_t), &staging_buf);
if (ret != 0)
return ret;
auto staging_ptr = (uint32_t*)vx_host_ptr(staging_buf);
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
ret = vx_copy_from_dev(staging_buf, IO_CSR_ADDR + 64 * sizeof(uint32_t) * core_id, 64 * sizeof(uint32_t), 0);
if (ret != 0) {
vx_buf_free(staging_buf);
return ret;
}
uint64_t instrs_per_core = get_csr_64(staging_ptr, CSR_MINSTRET);
uint64_t cycles_per_core = get_csr_64(staging_ptr, CSR_MCYCLE);
float IPC = (float)(double(instrs_per_core) / double(cycles_per_core));
if (num_cores > 1) fprintf(stream, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs_per_core, cycles_per_core, IPC);
instrs += instrs_per_core;
cycles = std::max<uint64_t>(cycles_per_core, cycles);
#ifdef PERF_ENABLE
// PERF: pipeline
// ibuffer_stall
uint64_t ibuffer_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_IBUF_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: ibuffer stalls=%ld\n", core_id, ibuffer_stalls_per_core);
ibuffer_stalls += ibuffer_stalls_per_core;
// scoreboard_stall
uint64_t scoreboard_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_SCRB_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: scoreboard stalls=%ld\n", core_id, scoreboard_stalls_per_core);
scoreboard_stalls += scoreboard_stalls_per_core;
// alu_stall
uint64_t alu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_ALU_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: alu unit stalls=%ld\n", core_id, alu_stalls_per_core);
alu_stalls += alu_stalls_per_core;
// lsu_stall
uint64_t lsu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_LSU_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: lsu unit stalls=%ld\n", core_id, lsu_stalls_per_core);
lsu_stalls += lsu_stalls_per_core;
// csr_stall
uint64_t csr_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_CSR_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: csr unit stalls=%ld\n", core_id, csr_stalls_per_core);
csr_stalls += csr_stalls_per_core;
// fpu_stall
uint64_t fpu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_FPU_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: fpu unit stalls=%ld\n", core_id, fpu_stalls_per_core);
fpu_stalls += fpu_stalls_per_core;
// gpu_stall
uint64_t gpu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_GPU_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: gpu unit stalls=%ld\n", core_id, gpu_stalls_per_core);
gpu_stalls += gpu_stalls_per_core;
// PERF: decode
// loads
uint64_t loads_per_core = get_csr_64(staging_ptr, CSR_MPM_LOADS);
if (num_cores > 1) fprintf(stream, "PERF: core%d: loads=%ld\n", core_id, loads_per_core);
loads += loads_per_core;
// stores
uint64_t stores_per_core = get_csr_64(staging_ptr, CSR_MPM_STORES);
if (num_cores > 1) fprintf(stream, "PERF: core%d: stores=%ld\n", core_id, stores_per_core);
stores += stores_per_core;
// branches
uint64_t branches_per_core = get_csr_64(staging_ptr, CSR_MPM_BRANCHES);
if (num_cores > 1) fprintf(stream, "PERF: core%d: branches=%ld\n", core_id, branches_per_core);
branches += branches_per_core;
// PERF: Icache
// total reads
uint64_t icache_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_READS);
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache reads=%ld\n", core_id, icache_reads_per_core);
icache_reads += icache_reads_per_core;
// read misses
uint64_t icache_miss_r_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_MISS_R);
int icache_read_hit_ratio = (int)((1.0 - (double(icache_miss_r_per_core) / double(icache_reads_per_core))) * 100);
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache misses=%ld (hit ratio=%d%%)\n", core_id, icache_miss_r_per_core, icache_read_hit_ratio);
icache_read_misses += icache_miss_r_per_core;
// PERF: Dcache
// total reads
uint64_t dcache_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_READS);
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache reads=%ld\n", core_id, dcache_reads_per_core);
dcache_reads += dcache_reads_per_core;
// total write
uint64_t dcache_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_WRITES);
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache writes=%ld\n", core_id, dcache_writes_per_core);
dcache_writes += dcache_writes_per_core;
// read misses
uint64_t dcache_miss_r_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MISS_R);
int dcache_read_hit_ratio = (int)((1.0 - (double(dcache_miss_r_per_core) / double(dcache_reads_per_core))) * 100);
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache read misses=%ld (hit ratio=%d%%)\n", core_id, dcache_miss_r_per_core, dcache_read_hit_ratio);
dcache_read_misses += dcache_miss_r_per_core;
// read misses
uint64_t dcache_miss_w_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MISS_W);
int dcache_write_hit_ratio = (int)((1.0 - (double(dcache_miss_w_per_core) / double(dcache_writes_per_core))) * 100);
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache write misses=%ld (hit ratio=%d%%)\n", core_id, dcache_miss_w_per_core, dcache_write_hit_ratio);
dcache_write_misses += dcache_miss_w_per_core;
// bank_stalls
uint64_t dcache_bank_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_BANK_ST);
int dcache_bank_utilization = (int)((double(dcache_reads_per_core + dcache_writes_per_core) / double(dcache_reads_per_core + dcache_writes_per_core + dcache_bank_st_per_core)) * 100);
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache bank stalls=%ld (utilization=%d%%)\n", core_id, dcache_bank_st_per_core, dcache_bank_utilization);
dcache_bank_stalls += dcache_bank_st_per_core;
// mshr_stalls
uint64_t dcache_mshr_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MSHR_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache mshr stalls=%ld\n", core_id, dcache_mshr_st_per_core);
dcache_mshr_stalls += dcache_mshr_st_per_core;
// PERF: SMEM
// total reads
uint64_t smem_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_READS);
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem reads=%ld\n", core_id, smem_reads_per_core);
smem_reads += smem_reads_per_core;
// total write
uint64_t smem_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_WRITES);
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem writes=%ld\n", core_id, smem_writes_per_core);
smem_writes += smem_writes_per_core;
// bank_stalls
uint64_t smem_bank_st_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_BANK_ST);
int smem_bank_utilization = (int)((double(smem_reads_per_core + smem_writes_per_core) / double(smem_reads_per_core + smem_writes_per_core + smem_bank_st_per_core)) * 100);
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem bank stalls=%ld (utilization=%d%%)\n", core_id, smem_bank_st_per_core, smem_bank_utilization);
smem_bank_stalls += smem_bank_st_per_core;
// PERF: memory
uint64_t mem_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_READS);
uint64_t mem_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_WRITES);
uint64_t mem_lat_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_LAT);
int mem_avg_lat = (int)(double(mem_lat_per_core) / double(mem_reads_per_core));
if (num_cores > 1) fprintf(stream, "PERF: core%d: memory requests=%ld (reads=%ld, writes=%ld)\n", core_id, (mem_reads_per_core + mem_writes_per_core), mem_reads_per_core, mem_writes_per_core);
if (num_cores > 1) fprintf(stream, "PERF: core%d: memory latency=%d cycles\n", core_id, mem_avg_lat);
mem_reads += mem_reads_per_core;
mem_writes += mem_writes_per_core;
mem_lat += mem_lat_per_core;
#ifdef EXT_TEX_ENABLE
// total reads
uint64_t tex_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_TEX_READS);
if (num_cores > 1) fprintf(stream, "PERF: core%d: tex memory reads=%ld\n", core_id, tex_reads_per_core);
tex_mem_reads += tex_reads_per_core;
// read latency
uint64_t tex_lat_per_core = get_csr_64(staging_ptr, CSR_MPM_TEX_LAT);
int tex_avg_lat = (int)(double(tex_lat_per_core) / double(tex_reads_per_core));
if (num_cores > 1) fprintf(stream, "PERF: core%d: tex memory latency=%d cycles\n", core_id, tex_avg_lat);
tex_mem_lat += tex_lat_per_core;
#endif
#endif
}
float IPC = (float)(double(instrs) / double(cycles));
fprintf(stream, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
#ifdef PERF_ENABLE
int icache_read_hit_ratio = (int)((1.0 - (double(icache_read_misses) / double(icache_reads))) * 100);
int dcache_read_hit_ratio = (int)((1.0 - (double(dcache_read_misses) / double(dcache_reads))) * 100);
int dcache_write_hit_ratio = (int)((1.0 - (double(dcache_write_misses) / double(dcache_writes))) * 100);
int dcache_bank_utilization = (int)((double(dcache_reads + dcache_writes) / double(dcache_reads + dcache_writes + dcache_bank_stalls)) * 100);
int smem_bank_utilization = (int)((double(smem_reads + smem_writes) / double(smem_reads + smem_writes + smem_bank_stalls)) * 100);
int mem_avg_lat = (int)(double(mem_lat) / double(mem_reads));
fprintf(stream, "PERF: ibuffer stalls=%ld\n", ibuffer_stalls);
fprintf(stream, "PERF: scoreboard stalls=%ld\n", scoreboard_stalls);
fprintf(stream, "PERF: alu unit stalls=%ld\n", alu_stalls);
fprintf(stream, "PERF: lsu unit stalls=%ld\n", lsu_stalls);
fprintf(stream, "PERF: csr unit stalls=%ld\n", csr_stalls);
fprintf(stream, "PERF: fpu unit stalls=%ld\n", fpu_stalls);
fprintf(stream, "PERF: gpu unit stalls=%ld\n", gpu_stalls);
fprintf(stream, "PERF: loads=%ld\n", loads);
fprintf(stream, "PERF: stores=%ld\n", stores);
fprintf(stream, "PERF: branches=%ld\n", branches);
fprintf(stream, "PERF: icache reads=%ld\n", icache_reads);
fprintf(stream, "PERF: icache read misses=%ld (hit ratio=%d%%)\n", icache_read_misses, icache_read_hit_ratio);
fprintf(stream, "PERF: dcache reads=%ld\n", dcache_reads);
fprintf(stream, "PERF: dcache writes=%ld\n", dcache_writes);
fprintf(stream, "PERF: dcache read misses=%ld (hit ratio=%d%%)\n", dcache_read_misses, dcache_read_hit_ratio);
fprintf(stream, "PERF: dcache write misses=%ld (hit ratio=%d%%)\n", dcache_write_misses, dcache_write_hit_ratio);
fprintf(stream, "PERF: dcache bank stalls=%ld (utilization=%d%%)\n", dcache_bank_stalls, dcache_bank_utilization);
fprintf(stream, "PERF: dcache mshr stalls=%ld\n", dcache_mshr_stalls);
fprintf(stream, "PERF: smem reads=%ld\n", smem_reads);
fprintf(stream, "PERF: smem writes=%ld\n", smem_writes);
fprintf(stream, "PERF: smem bank stalls=%ld (utilization=%d%%)\n", smem_bank_stalls, smem_bank_utilization);
fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes);
fprintf(stream, "PERF: memory average latency=%d cycles\n", mem_avg_lat);
#ifdef EXT_TEX_ENABLE
int tex_avg_lat = (int)(double(tex_mem_lat) / double(tex_mem_reads));
fprintf(stream, "PERF: tex memory reads=%ld\n", tex_mem_reads);
fprintf(stream, "PERF: tex memory latency=%d cycles\n", tex_avg_lat);
#endif
#endif
// release allocated resources
vx_buf_free(staging_buf);
return ret;
}
// Deprecated API functions
extern int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
return vx_buf_alloc(hdevice, size, hbuffer);
}
extern int vx_buf_release(vx_buffer_h hbuffer) {
return vx_buf_free(hbuffer);
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
return vx_mem_alloc(hdevice, size, dev_maddr);
}

View file

@ -1,11 +0,0 @@
#pragma once
#include <cstdint>
uint64_t aligned_size(uint64_t size, uint64_t alignment);
bool is_aligned(uint64_t addr, uint64_t alignment);
#define CACHE_BLOCK_SIZE 64
#define ALLOC_BASE_ADDR 0x00000000
#define LOCAL_MEM_SIZE 4294967296 // 4 GB

View file

@ -1,75 +0,0 @@
OPAE_HOME ?= /tools/opae/1.4.0
RTL_DIR=../../hw/rtl
SCRIPT_DIR=../../hw/scripts
OPAE_SYN_DIR=../../hw/syn/opae
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I. -I../include -I../../hw -I$(OPAE_HOME)/include -I$(OPAE_SYN_DIR)
LDFLAGS += -L$(OPAE_HOME)/lib -luuid -lopae-c
#SCOPE=1
# stack execution protection
LDFLAGS +=-z noexecstack
# data relocation and projection
LDFLAGS +=-z relro -z now
# stack buffer overrun detection
CXXFLAGS +=-fstack-protector
# Position independent code
CXXFLAGS += -fPIC
# Add external configuration
CXXFLAGS += $(CONFIGS)
# Dump perf stats
CXXFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared
PROJECT = libvortex.so
SRCS = ../common/opae.cpp ../common/vx_utils.cpp
# Debugigng
ifdef DEBUG
CXXFLAGS += -g -O0
else
CXXFLAGS += -O2 -DNDEBUG
endif
# Enable scope analyzer
ifdef SCOPE
CXXFLAGS += -DSCOPE
SRCS += ../common/vx_scope.cpp
SCOPE_H = scope-defs.h
endif
# Enable perf counters
ifdef PERF
CXXFLAGS += -DPERF_ENABLE
endif
all: $(PROJECT)
$(OPAE_SYN_DIR)/vortex_afu.h:
$(MAKE) -C $(OPAE_SYN_DIR) vortex_afu.h
scope-defs.h: $(SCRIPT_DIR)/scope.json
$(SCRIPT_DIR)/scope.py $(CONFIGS) -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
# generate scope data
scope: scope-defs.h
$(PROJECT): $(SRCS) $(OPAE_SYN_DIR)/vortex_afu.h $(SCOPE_H)
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) -o $(PROJECT)
clean:
rm -rf $(PROJECT) *.o scope-defs.h

View file

@ -1,84 +0,0 @@
#ifndef __VX_DRIVER_H__
#define __VX_DRIVER_H__
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef void* vx_device_h;
typedef void* vx_buffer_h;
// device caps ids
#define VX_CAPS_VERSION 0x0
#define VX_CAPS_MAX_CORES 0x1
#define VX_CAPS_MAX_WARPS 0x2
#define VX_CAPS_MAX_THREADS 0x3
#define VX_CAPS_CACHE_LINE_SIZE 0x4
#define VX_CAPS_LOCAL_MEM_SIZE 0x5
#define VX_CAPS_ALLOC_BASE_ADDR 0x6
#define VX_CAPS_KERNEL_BASE_ADDR 0x7
#define MAX_TIMEOUT (60*60*1000) // 1hr
// open the device and connect to it
int vx_dev_open(vx_device_h* hdevice);
// Close the device when all the operations are done
int vx_dev_close(vx_device_h hdevice);
// return device configurations
int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value);
// Allocate shared buffer with device
int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer);
// release buffer
int vx_buf_free(vx_buffer_h hbuffer);
// Get host pointer address
void* vx_host_ptr(vx_buffer_h hbuffer);
// allocate device memory and return address
int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr);
// release device memory
int vx_mem_free(vx_device_h hdevice, uint64_t dev_maddr);
// Copy bytes from buffer to device local memory
int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t src_offset);
// Copy bytes from device local memory to buffer
int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t dst_offset);
// Start device execution
int vx_start(vx_device_h hdevice);
// Wait for device ready with milliseconds timeout
int vx_ready_wait(vx_device_h hdevice, uint64_t timeout);
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////
// upload kernel bytes to device
int vx_upload_kernel_bytes(vx_device_h device, const void* content, uint64_t size);
// upload kernel file to device
int vx_upload_kernel_file(vx_device_h device, const char* filename);
// dump performance counters
int vx_dump_perf(vx_device_h device, FILE* stream);
//////////////////////////// DEPRECATED FUNCTIONS /////////////////////////////
int vx_alloc_dev_mem(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr);
int vx_alloc_shared_mem(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer);
int vx_buf_release(vx_buffer_h hbuffer);
#ifdef __cplusplus
}
#endif
#endif // __VX_DRIVER_H__

View file

@ -1,2 +0,0 @@
obj_dir
*.so

View file

@ -1,43 +0,0 @@
RTLSIM_DIR = ../../sim/rtlsim
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I../common -I../../hw -I$(RTLSIM_DIR) -I$(RTLSIM_DIR)/../common
# Position independent code
CXXFLAGS += -fPIC
# Add external configuration
CXXFLAGS += $(CONFIGS)
# Dump perf stats
CXXFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared -pthread
LDFLAGS += -L. -lrtlsim
SRCS = vortex.cpp ../common/vx_utils.cpp
# Debugigng
ifdef DEBUG
CXXFLAGS += -g -O0
else
CXXFLAGS += -O2 -DNDEBUG
endif
# Enable perf counters
ifdef PERF
CXXFLAGS += -DPERF_ENABLE
endif
PROJECT = libvortex.so
all: $(PROJECT)
$(PROJECT): $(SRCS)
DESTDIR=../../driver/rtlsim $(MAKE) -C $(RTLSIM_DIR) ../../driver/rtlsim/librtlsim.so
$(CXX) $(CXXFLAGS) $(SRCS) $(LDFLAGS) -o $(PROJECT)
clean:
DESTDIR=../../driver/rtlsim $(MAKE) -C $(RTLSIM_DIR) clean
rm -rf $(PROJECT) *.o

View file

@ -1,355 +0,0 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <iostream>
#include <future>
#include <list>
#include <chrono>
#include <vortex.h>
#include <vx_malloc.h>
#include <vx_utils.h>
#include <VX_config.h>
#include <mem.h>
#include <util.h>
#include <processor.h>
#define RAM_PAGE_SIZE 4096
using namespace vortex;
///////////////////////////////////////////////////////////////////////////////
class vx_device;
class vx_buffer {
public:
vx_buffer(uint64_t size, vx_device* device)
: size_(size)
, device_(device) {
auto aligned_asize = aligned_size(size, CACHE_BLOCK_SIZE);
data_ = malloc(aligned_asize);
}
~vx_buffer() {
if (data_) {
free(data_);
}
}
void* data() const {
return data_;
}
uint64_t size() const {
return size_;
}
vx_device* device() const {
return device_;
}
private:
uint64_t size_;
vx_device* device_;
void* data_;
};
///////////////////////////////////////////////////////////////////////////////
class vx_device {
public:
vx_device()
: ram_(RAM_PAGE_SIZE)
, mem_allocator_(
ALLOC_BASE_ADDR,
ALLOC_BASE_ADDR + LOCAL_MEM_SIZE,
RAM_PAGE_SIZE,
CACHE_BLOCK_SIZE)
{
processor_.attach_ram(&ram_);
}
~vx_device() {
if (future_.valid()) {
future_.wait();
}
}
int alloc_local_mem(uint64_t size, uint64_t* dev_maddr) {
return mem_allocator_.allocate(size, dev_maddr);
}
int free_local_mem(uint64_t dev_maddr) {
return mem_allocator_.release(dev_maddr);
}
int upload(const void* src, uint64_t dest_addr, uint64_t size, uint64_t src_offset) {
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (dest_addr + asize > LOCAL_MEM_SIZE)
return -1;
/*printf("VXDRV: upload %ld bytes from 0x%lx:", size, uintptr_t((uint8_t*)src + src_offset));
for (int i = 0; i < (asize / CACHE_BLOCK_SIZE); ++i) {
printf("\n0x%08lx=", dest_addr + i * CACHE_BLOCK_SIZE);
for (int j = 0; j < CACHE_BLOCK_SIZE; ++j) {
printf("%02x", *((uint8_t*)src + src_offset + i * CACHE_BLOCK_SIZE + CACHE_BLOCK_SIZE - 1 - j));
}
}
printf("\n");*/
ram_.write((const uint8_t*)src + src_offset, dest_addr, asize);
return 0;
}
int download(void* dest, uint64_t src_addr, uint64_t size, uint64_t dest_offset) {
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (src_addr + asize > LOCAL_MEM_SIZE)
return -1;
ram_.read((uint8_t*)dest + dest_offset, src_addr, asize);
/*printf("VXDRV: download %ld bytes to 0x%lx:", size, uintptr_t((uint8_t*)dest + dest_offset));
for (int i = 0; i < (asize / CACHE_BLOCK_SIZE); ++i) {
printf("\n0x%08lx=", src_addr + i * CACHE_BLOCK_SIZE);
for (int j = 0; j < CACHE_BLOCK_SIZE; ++j) {
printf("%02x", *((uint8_t*)dest + dest_offset + i * CACHE_BLOCK_SIZE + CACHE_BLOCK_SIZE - 1 - j));
}
}
printf("\n");*/
return 0;
}
int start() {
// ensure prior run completed
if (future_.valid()) {
future_.wait();
}
// start new run
future_ = std::async(std::launch::async, [&]{
processor_.run();
});
return 0;
}
int wait(uint64_t timeout) {
if (!future_.valid())
return 0;
uint64_t timeout_sec = timeout / 1000;
std::chrono::seconds wait_time(1);
for (;;) {
// wait for 1 sec and check status
auto status = future_.wait_for(wait_time);
if (status == std::future_status::ready
|| 0 == timeout_sec--)
break;
}
return 0;
}
private:
RAM ram_;
Processor processor_;
MemoryAllocator mem_allocator_;
std::future<void> future_;
};
///////////////////////////////////////////////////////////////////////////////
#ifdef DUMP_PERF_STATS
class AutoPerfDump {
private:
std::list<vx_device_h> devices_;
public:
AutoPerfDump() {}
~AutoPerfDump() {
for (auto device : devices_) {
vx_dump_perf(device, stdout);
}
}
void add_device(vx_device_h device) {
devices_.push_back(device);
}
void remove_device(vx_device_h device) {
devices_.remove(device);
}
};
AutoPerfDump gAutoPerfDump;
#endif
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
if (nullptr == hdevice)
return -1;
switch (caps_id) {
case VX_CAPS_VERSION:
*value = IMPLEMENTATION_ID;
break;
case VX_CAPS_MAX_CORES:
*value = NUM_CORES * NUM_CLUSTERS;
break;
case VX_CAPS_MAX_WARPS:
*value = NUM_WARPS;
break;
case VX_CAPS_MAX_THREADS:
*value = NUM_THREADS;
break;
case VX_CAPS_CACHE_LINE_SIZE:
*value = CACHE_BLOCK_SIZE;
break;
case VX_CAPS_LOCAL_MEM_SIZE:
*value = LOCAL_MEM_SIZE;
break;
case VX_CAPS_ALLOC_BASE_ADDR:
*value = ALLOC_BASE_ADDR;
break;
case VX_CAPS_KERNEL_BASE_ADDR:
*value = STARTUP_ADDR;
break;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort();
return -1;
}
return 0;
}
extern int vx_dev_open(vx_device_h* hdevice) {
if (nullptr == hdevice)
return -1;
*hdevice = new vx_device();
#ifdef DUMP_PERF_STATS
gAutoPerfDump.add_device(*hdevice);
#endif
return 0;
}
extern int vx_dev_close(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
#ifdef DUMP_PERF_STATS
gAutoPerfDump.remove_device(hdevice);
vx_dump_perf(hdevice, stdout);
#endif
delete device;
return 0;
}
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->alloc_local_mem(size, dev_maddr);
}
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_maddr) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->free_local_mem(dev_maddr);
}
extern int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
if (nullptr == hdevice
|| 0 >= size
|| nullptr == hbuffer)
return -1;
vx_device *device = ((vx_device*)hdevice);
auto buffer = new vx_buffer(size, device);
if (nullptr == buffer->data()) {
delete buffer;
return -1;
}
*hbuffer = buffer;
return 0;
}
extern void* vx_host_ptr(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return nullptr;
vx_buffer* buffer = ((vx_buffer*)hbuffer);
return buffer->data();
}
extern int vx_buf_free(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return -1;
vx_buffer* buffer = ((vx_buffer*)hbuffer);
delete buffer;
return 0;
}
extern int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t src_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + src_offset > buffer->size())
return -1;
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
}
extern int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t dest_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + dest_offset > buffer->size())
return -1;
return buffer->device()->download(buffer->data(), dev_maddr, size, dest_offset);
}
extern int vx_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->start();
}
extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->wait(timeout);
}

View file

@ -1,2 +0,0 @@
obj_dir
libvortex.so

View file

@ -1,32 +0,0 @@
SIMX_DIR = ../../sim/simx
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I../include -I../common -I../../hw -I$(SIMX_DIR) -I$(SIMX_DIR)/../common
CXXFLAGS += $(CONFIGS)
CXXFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared -pthread
LDFLAGS += -L. -lsimx
SRCS = vortex.cpp ../common/vx_utils.cpp
# Debugigng
ifdef DEBUG
CXXFLAGS += -g -O0
else
CXXFLAGS += -O2 -DNDEBUG
endif
PROJECT = libvortex.so
all: $(PROJECT)
$(PROJECT): $(SRCS)
DESTDIR=../../driver/simx $(MAKE) -C $(SIMX_DIR) ../../driver/simx/libsimx.so
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
clean:
DESTDIR=../../driver/simx $(MAKE) -C $(SIMX_DIR) clean
rm -rf libsimx.so $(PROJECT) *.o

View file

@ -1,357 +0,0 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <iostream>
#include <future>
#include <chrono>
#include <vortex.h>
#include <vx_utils.h>
#include <vx_malloc.h>
#include <VX_config.h>
#include <util.h>
#include <processor.h>
#include <archdef.h>
#include <mem.h>
#include <constants.h>
using namespace vortex;
///////////////////////////////////////////////////////////////////////////////
class vx_device;
class vx_buffer {
public:
vx_buffer(uint64_t size, vx_device* device)
: size_(size)
, device_(device) {
uint64_t aligned_asize = aligned_size(size, CACHE_BLOCK_SIZE);
data_ = malloc(aligned_asize);
}
~vx_buffer() {
if (data_) {
free(data_);
}
}
void* data() const {
return data_;
}
uint64_t size() const {
return size_;
}
vx_device* device() const {
return device_;
}
private:
uint64_t size_;
vx_device* device_;
void* data_;
};
///////////////////////////////////////////////////////////////////////////////
class vx_device {
public:
vx_device()
: arch_(NUM_CORES * NUM_CLUSTERS, NUM_WARPS, NUM_THREADS)
, ram_(RAM_PAGE_SIZE)
, processor_(arch_)
, mem_allocator_(
ALLOC_BASE_ADDR,
ALLOC_BASE_ADDR + LOCAL_MEM_SIZE,
RAM_PAGE_SIZE,
CACHE_BLOCK_SIZE)
{
// attach memory module
processor_.attach_ram(&ram_);
}
~vx_device() {
if (future_.valid()) {
future_.wait();
}
}
int alloc_local_mem(uint64_t size, uint64_t* dev_maddr) {
return mem_allocator_.allocate(size, dev_maddr);
}
int free_local_mem(uint64_t dev_maddr) {
return mem_allocator_.release(dev_maddr);
}
int upload(const void* src, uint64_t dest_addr, uint64_t size, uint64_t src_offset) {
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (dest_addr + asize > LOCAL_MEM_SIZE)
return -1;
ram_.write((const uint8_t*)src + src_offset, dest_addr, asize);
/*printf("VXDRV: upload %d bytes to 0x%x\n", size, dest_addr);
for (int i = 0; i < size; i += 4) {
printf("mem-write: 0x%x <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + src_offset + i));
}*/
return 0;
}
int download(void* dest, uint64_t src_addr, uint64_t size, uint64_t dest_offset) {
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (src_addr + asize > LOCAL_MEM_SIZE)
return -1;
ram_.read((uint8_t*)dest + dest_offset, src_addr, asize);
/*printf("VXDRV: download %d bytes from 0x%x\n", size, src_addr);
for (int i = 0; i < size; i += 4) {
printf("mem-read: 0x%x -> 0x%x\n", src_addr + i, *(uint32_t*)((uint8_t*)dest + dest_offset + i));
}*/
return 0;
}
int start() {
// ensure prior run completed
if (future_.valid()) {
future_.wait();
}
// start new run
future_ = std::async(std::launch::async, [&]{
processor_.run();
});
return 0;
}
int wait(uint64_t timeout) {
if (!future_.valid())
return 0;
uint64_t timeout_sec = timeout / 1000;
std::chrono::seconds wait_time(1);
for (;;) {
// wait for 1 sec and check status
auto status = future_.wait_for(wait_time);
if (status == std::future_status::ready
|| 0 == timeout_sec--)
break;
}
return 0;
}
private:
ArchDef arch_;
RAM ram_;
Processor processor_;
MemoryAllocator mem_allocator_;
std::future<void> future_;
};
///////////////////////////////////////////////////////////////////////////////
#ifdef DUMP_PERF_STATS
class AutoPerfDump {
private:
std::list<vx_device_h> devices_;
public:
AutoPerfDump() {}
~AutoPerfDump() {
for (auto device : devices_) {
vx_dump_perf(device, stdout);
}
}
void add_device(vx_device_h device) {
devices_.push_back(device);
}
void remove_device(vx_device_h device) {
devices_.remove(device);
}
};
AutoPerfDump gAutoPerfDump;
#endif
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_open(vx_device_h* hdevice) {
if (nullptr == hdevice)
return -1;
*hdevice = new vx_device();
#ifdef DUMP_PERF_STATS
gAutoPerfDump.add_device(*hdevice);
#endif
return 0;
}
extern int vx_dev_close(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
#ifdef DUMP_PERF_STATS
gAutoPerfDump.remove_device(hdevice);
vx_dump_perf(hdevice, stdout);
#endif
delete device;
return 0;
}
extern int vx_dev_caps(vx_device_h hdevice, uint32_t caps_id, uint64_t *value) {
if (nullptr == hdevice)
return -1;
switch (caps_id) {
case VX_CAPS_VERSION:
*value = IMPLEMENTATION_ID;
break;
case VX_CAPS_MAX_CORES:
*value = NUM_CORES * NUM_CLUSTERS;
break;
case VX_CAPS_MAX_WARPS:
*value = NUM_WARPS;
break;
case VX_CAPS_MAX_THREADS:
*value = NUM_THREADS;
break;
case VX_CAPS_CACHE_LINE_SIZE:
*value = CACHE_BLOCK_SIZE;
break;
case VX_CAPS_LOCAL_MEM_SIZE:
*value = LOCAL_MEM_SIZE;
break;
case VX_CAPS_ALLOC_BASE_ADDR:
*value = ALLOC_BASE_ADDR;
break;
case VX_CAPS_KERNEL_BASE_ADDR:
*value = STARTUP_ADDR;
break;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort();
return -1;
}
return 0;
}
extern int vx_mem_alloc(vx_device_h hdevice, uint64_t size, uint64_t* dev_maddr) {
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->alloc_local_mem(size, dev_maddr);
}
extern int vx_mem_free(vx_device_h hdevice, uint64_t dev_maddr) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->free_local_mem(dev_maddr);
}
extern int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer) {
if (nullptr == hdevice
|| 0 >= size
|| nullptr == hbuffer)
return -1;
vx_device *device = ((vx_device*)hdevice);
auto buffer = new vx_buffer(size, device);
if (nullptr == buffer->data()) {
delete buffer;
return -1;
}
*hbuffer = buffer;
return 0;
}
extern void* vx_host_ptr(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return nullptr;
vx_buffer* buffer = ((vx_buffer*)hbuffer);
return buffer->data();
}
extern int vx_buf_free(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return -1;
vx_buffer* buffer = ((vx_buffer*)hbuffer);
delete buffer;
return 0;
}
extern int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t src_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + src_offset > buffer->size())
return -1;
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
}
extern int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size, uint64_t dest_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + dest_offset > buffer->size())
return -1;
return buffer->device()->download(buffer->data(), dev_maddr, size, dest_offset);
}
extern int vx_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->start();
}
extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->wait(timeout);
}

View file

@ -1,19 +0,0 @@
CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I../../runtime -I../../hw
CXXFLAGS += -fPIC
LDFLAGS += -shared -pthread
SRCS = vortex.cpp ../common/vx_utils.cpp
PROJECT = libvortex.so
all: $(PROJECT)
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
clean:
rm -rf $(PROJECT) obj_dir

View file

@ -1,49 +0,0 @@
#include <vortex.h>
extern int vx_dev_open(vx_device_h* /*hdevice*/) {
return -1;
}
extern int vx_dev_close(vx_device_h /*hdevice*/) {
return -1;
}
extern int vx_dev_caps(vx_device_h /*hdevice*/, uint32_t /*caps_id*/, uint64_t* /*value*/) {
return -1;
}
extern int vx_mem_alloc(vx_device_h /*hdevice*/, uint64_t /*size*/, uint64_t* /*dev_maddr*/) {
return -1;
}
int vx_mem_free(vx_device_h /*hdevice*/, uint64_t /*dev_maddr*/) {
return -1;
}
extern int vx_buf_alloc(vx_device_h /*hdevice*/, uint64_t /*size*/, vx_buffer_h* /*hbuffer*/) {
return -1;
}
extern void* vx_host_ptr(vx_buffer_h /*hbuffer*/) {
return nullptr;
}
extern int vx_buf_free(vx_buffer_h /*hbuffer*/) {
return -1;
}
extern int vx_copy_to_dev(vx_buffer_h /*hbuffer*/, uint64_t /*dev_maddr*/, uint64_t /*size*/, uint64_t /*src_offset*/) {
return -1;
}
extern int vx_copy_from_dev(vx_buffer_h /*hbuffer*/, uint64_t /*dev_maddr*/, uint64_t /*size*/, uint64_t /*dest_offset*/) {
return -1;
}
extern int vx_start(vx_device_h /*hdevice*/) {
return -1;
}
extern int vx_ready_wait(vx_device_h /*hdevice*/, uint64_t /*timeout*/) {
return -1;
}

View file

@ -1,60 +0,0 @@
VLSIM_DIR = ../../sim/vlsim
RTL_DIR=../../hw/rtl
SCRIPT_DIR=../../hw/scripts
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I. -I../include -I../../hw -I$(VLSIM_DIR)
# Position independent code
CXXFLAGS += -fPIC
# Add external configuration
CXXFLAGS += $(CONFIGS)
# Dump perf stats
CXXFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared -pthread
LDFLAGS += -L. -lopae-c-vlsim
SRCS = ../common/opae.cpp ../common/vx_utils.cpp
# Debugigng
ifdef DEBUG
CXXFLAGS += -g -O0
else
CXXFLAGS += -O2 -DNDEBUG
endif
# Enable scope analyzer
ifdef SCOPE
CXXFLAGS += -DSCOPE
SRCS += ../common/vx_scope.cpp
SCOPE_H = scope-defs.h
endif
# Enable perf counters
ifdef PERF
CXXFLAGS += -DPERF_ENABLE
endif
PROJECT = libvortex.so
all: $(PROJECT)
scope-defs.h: $(SCRIPT_DIR)/scope.json
$(SCRIPT_DIR)/scope.py $(CONFIGS) -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
# generate scope data
scope: scope-defs.h
$(PROJECT): $(SRCS) $(SCOPE_H)
DESTDIR=../../driver/vlsim $(MAKE) -C $(VLSIM_DIR) ../../driver/vlsim/libopae-c-vlsim.so
$(CXX) $(CXXFLAGS) -DUSE_VLSIM $(SRCS) $(LDFLAGS) -o $(PROJECT)
clean:
DESTDIR=../../driver/vlsim $(MAKE) -C $(VLSIM_DIR) clean
rm -rf libopae-c-vlsim.so $(PROJECT) *.o scope-defs.h

1
hw/.gitignore vendored
View file

@ -1 +0,0 @@
obj_dir/*

View file

@ -1,12 +1,22 @@
RTL_DIR=./rtl
SCRIPT_DIR=./scripts
ROOT_DIR := $(realpath ..)
include $(ROOT_DIR)/config.mk
all: VX_config.h
HW_DIR := $(VORTEX_HOME)/hw
SCRIPT_DIR := $(HW_DIR)/scripts
RTL_DIR := $(HW_DIR)/rtl
VX_config.h: $(RTL_DIR)/VX_config.vh
all: config
config: VX_config.h VX_types.h
VX_config.h: $(RTL_DIR)/VX_config.vh
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_config.vh -o VX_config.h
clean:
rm -f VX_config.h
VX_types.h: $(RTL_DIR)/VX_types.vh
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_types.vh -o VX_types.h
.PHONY: VX_config.h
clean:
$(MAKE) -C unittest clean
rm -f VX_config.h VX_types.h
.PHONY: VX_config.h VX_types.h

View file

@ -1,3 +1,16 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdio.h>
#include <math.h>
#include <unordered_map>
@ -5,167 +18,322 @@
#include <mutex>
#include <iostream>
#include <rvfloats.h>
#include <util.h>
#include "svdpi.h"
#include "verilated_vpi.h"
#include "VX_config.h"
extern "C" {
void dpi_fadd(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fsub(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fmul(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fnmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fnmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fmul(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fdiv(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fsqrt(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_ftoi(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_ftou(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_itof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_utof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fdiv(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fsqrt(bool enable, int dst_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fclss(bool enable, int a, int* result);
void dpi_fsgnj(bool enable, int a, int b, int* result);
void dpi_fsgnjn(bool enable, int a, int b, int* result);
void dpi_fsgnjx(bool enable, int a, int b, int* result);
void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_ftou(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result);
void dpi_flt(bool enable, int a, int b, int* result, svBitVecVal* fflags);
void dpi_fle(bool enable, int a, int b, int* result, svBitVecVal* fflags);
void dpi_feq(bool enable, int a, int b, int* result, svBitVecVal* fflags);
void dpi_fmin(bool enable, int a, int b, int* result, svBitVecVal* fflags);
void dpi_fmax(bool enable, int a, int b, int* result, svBitVecVal* fflags);
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result);
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
void dpi_fsgnjx(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
void dpi_flt(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
void dpi_fle(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
void dpi_feq(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
void dpi_fmin(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
void dpi_fmax(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
}
void dpi_fadd(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fadd_s(a, b, (*frm & 0x7), fflags);
inline uint64_t nan_box(uint32_t value) {
#ifdef XLEN_64
return value | 0xffffffff00000000;
#else
return value;
#endif
}
void dpi_fsub(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fsub_s(a, b, (*frm & 0x7), fflags);
inline bool is_nan_boxed(uint64_t value) {
#ifdef XLEN_64
return (uint32_t(value >> 32) == 0xffffffff);
#else
__unused (value);
return true;
#endif
}
void dpi_fmul(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fmul_s(a, b, (*frm & 0x7), fflags);
inline int64_t check_boxing(int64_t a) {
if (is_nan_boxed(a))
return a;
return nan_box(0x7fc00000); // NaN
}
void dpi_fmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
if (!enable)
void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fmadd_s(a, b, c, (*frm & 0x7), fflags);
if (dst_fmt) {
*result = rv_fadd_d(a, b, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fadd_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
}
}
void dpi_fmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
if (!enable)
void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fmsub_s(a, b, c, (*frm & 0x7), fflags);
if (dst_fmt) {
*result = rv_fsub_d(a, b, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fsub_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
}
}
void dpi_fnmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
if (!enable)
void dpi_fmul(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fnmadd_s(a, b, c, (*frm & 0x7), fflags);
if (dst_fmt) {
*result = rv_fmul_d(a, b, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fmul_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
}
}
void dpi_fnmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
if (!enable)
void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fnmsub_s(a, b, c, (*frm & 0x7), fflags);
if (dst_fmt) {
*result = rv_fmadd_d(a, b, c, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
}
}
void dpi_fdiv(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
if (!enable)
void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fdiv_s(a, b, (*frm & 0x7), fflags);
if (dst_fmt) {
*result = rv_fmsub_d(a, b, c, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
}
}
void dpi_fsqrt(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
if (!enable)
void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fsqrt_s(a, (*frm & 0x7), fflags);
if (dst_fmt) {
*result = rv_fnmadd_d(a, b, c, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fnmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
}
}
void dpi_ftoi(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
if (!enable)
void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_ftoi_s(a, (*frm & 0x7), fflags);
if (dst_fmt) {
*result = rv_fnmsub_d(a, b, c, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fnmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
}
}
void dpi_ftou(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
if (!enable)
void dpi_fdiv(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_ftou_s(a, (*frm & 0x7), fflags);
if (dst_fmt) {
*result = rv_fdiv_d(a, b, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fdiv_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
}
}
void dpi_itof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
if (!enable)
void dpi_fsqrt(bool enable, int dst_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_itof_s(a, (*frm & 0x7), fflags);
if (dst_fmt) {
*result = rv_fsqrt_d(a, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fsqrt_s(check_boxing(a), (*frm & 0x7), fflags));
}
}
void dpi_utof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
if (!enable)
void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_utof_s(a, (*frm & 0x7), fflags);
if (dst_fmt) {
if (src_fmt) {
*result = rv_ftol_d(a, (*frm & 0x7), fflags);
} else {
*result = rv_ftol_s(check_boxing(a), (*frm & 0x7), fflags);
}
} else {
if (src_fmt) {
*result = sext<uint64_t>(rv_ftoi_d(a, (*frm & 0x7), fflags), 32);
} else {
*result = sext<uint64_t>(rv_ftoi_s(check_boxing(a), (*frm & 0x7), fflags), 32);
}
}
}
void dpi_flt(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
if (!enable)
void dpi_ftou(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_flt_s(a, b, fflags);
if (dst_fmt) {
if (src_fmt) {
*result = rv_ftolu_d(a, (*frm & 0x7), fflags);
} else {
*result = rv_ftolu_s(check_boxing(a), (*frm & 0x7), fflags);
}
} else {
if (src_fmt) {
*result = sext<uint64_t>(rv_ftou_d(a, (*frm & 0x7), fflags), 32);
} else {
*result = sext<uint64_t>(rv_ftou_s(check_boxing(a), (*frm & 0x7), fflags), 32);
}
}
}
void dpi_fle(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
if (!enable)
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fle_s(a, b, fflags);
if (dst_fmt) {
if (src_fmt) {
*result = rv_ltof_d(a, (*frm & 0x7), fflags);
} else {
*result = rv_itof_d(a, (*frm & 0x7), fflags);
}
} else {
if (src_fmt) {
*result = nan_box(rv_ltof_s(a, (*frm & 0x7), fflags));
} else {
*result = nan_box(rv_itof_s(a, (*frm & 0x7), fflags));
}
}
}
void dpi_feq(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
if (!enable)
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_feq_s(a, b, fflags);
if (dst_fmt) {
if (src_fmt) {
*result = rv_lutof_d(a, (*frm & 0x7), fflags);
} else {
*result = rv_utof_d(a, (*frm & 0x7), fflags);
}
} else {
if (src_fmt) {
*result = nan_box(rv_lutof_s(a, (*frm & 0x7), fflags));
} else {
*result = nan_box(rv_utof_s(a, (*frm & 0x7), fflags));
}
}
}
void dpi_fmin(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
if (!enable)
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result) {
if (!enable)
return;
*result = rv_fmin_s(a, b, fflags);
if (dst_fmt) {
*result = rv_ftod((int32_t)check_boxing(a));
} else {
*result = nan_box(rv_dtof(a));
}
}
void dpi_fmax(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
if (!enable)
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result) {
if (!enable)
return;
*result = rv_fmax_s(a, b, fflags);
if (dst_fmt) {
*result = rv_fclss_d(a);
} else {
*result = rv_fclss_s(check_boxing(a));
}
}
void dpi_fclss(bool enable, int a, int* result) {
if (!enable)
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
if (!enable)
return;
*result = rv_fclss_s(a);
if (dst_fmt) {
*result = rv_fsgnj_d(a, b);
} else {
*result = nan_box(rv_fsgnj_s(check_boxing(a), check_boxing(b)));
}
}
void dpi_fsgnj(bool enable, int a, int b, int* result) {
if (!enable)
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
if (!enable)
return;
*result = rv_fsgnj_s(a, b);
if (dst_fmt) {
*result = rv_fsgnjn_d(a, b);
} else {
*result = nan_box(rv_fsgnjn_s(check_boxing(a), check_boxing(b)));
}
}
void dpi_fsgnjn(bool enable, int a, int b, int* result) {
if (!enable)
void dpi_fsgnjx(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
if (!enable)
return;
*result = rv_fsgnjn_s(a, b);
if (dst_fmt) {
*result = rv_fsgnjx_d(a, b);
} else {
*result = nan_box(rv_fsgnjx_s(check_boxing(a), check_boxing(b)));
}
}
void dpi_fsgnjx(bool enable, int a, int b, int* result) {
if (!enable)
void dpi_flt(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fsgnjx_s(a, b);
if (dst_fmt) {
*result = rv_flt_d(a, b, fflags);
} else {
*result = rv_flt_s(check_boxing(a), check_boxing(b), fflags);
}
}
void dpi_fle(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fle_d(a, b, fflags);
} else {
*result = rv_fle_s(check_boxing(a), check_boxing(b), fflags);
}
}
void dpi_feq(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_feq_d(a, b, fflags);
} else {
*result = rv_feq_s(check_boxing(a), check_boxing(b), fflags);
}
}
void dpi_fmin(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fmin_d(a, b, fflags);
} else {
*result = nan_box(rv_fmin_s(check_boxing(a), check_boxing(b), fflags));
}
}
void dpi_fmax(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fmax_d(a, b, fflags);
} else {
*result = nan_box(rv_fmax_s(check_boxing(a), check_boxing(b), fflags));
}
}

View file

@ -1,31 +1,45 @@
`ifndef FLOAT_DPI
`define FLOAT_DPI
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import "DPI-C" function void dpi_fadd(input logic enable, input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fsub(input logic enable, input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmul(input logic enable, input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmadd(input logic enable, input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmsub(input logic enable, input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fnmadd(input logic enable, input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fnmsub(input logic enable, input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
`ifndef FLOAT_DPI_VH
`define FLOAT_DPI_VH
import "DPI-C" function void dpi_fdiv(input logic enable, input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fsqrt(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fadd(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fsub(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmul(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmadd(input logic enable, input int dst_fmt, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmsub(input logic enable, input int dst_fmt, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fnmadd(input logic enable, input int dst_fmt, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fnmsub(input logic enable, input int dst_fmt, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_ftoi(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_ftou(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_itof(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_utof(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fdiv(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fsqrt(input logic enable, input int dst_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fclss(input logic enable, input int a, output int result);
import "DPI-C" function void dpi_fsgnj(input logic enable, input int a, input int b, output int result);
import "DPI-C" function void dpi_fsgnjn(input logic enable, input int a, input int b, output int result);
import "DPI-C" function void dpi_fsgnjx(input logic enable, input int a, input int b, output int result);
import "DPI-C" function void dpi_ftoi(input logic enable, input int dst_fmt, input int src_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_ftou(input logic enable, input int dst_fmt, input int src_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_itof(input logic enable, input int dst_fmt, input int src_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_utof(input logic enable, input int dst_fmt, input int src_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_f2f(input logic enable, input int dst_fmt, input longint a, output longint result);
import "DPI-C" function void dpi_flt(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fle(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_feq(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmin(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmax(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fclss(input logic enable, input int dst_fmt, input longint a, output longint result);
import "DPI-C" function void dpi_fsgnj(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result);
import "DPI-C" function void dpi_fsgnjn(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result);
import "DPI-C" function void dpi_fsgnjx(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result);
`endif
import "DPI-C" function void dpi_flt(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fle(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_feq(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmin(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmax(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
`endif

View file

@ -1,21 +1,50 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdio.h>
#include <math.h>
#include <unordered_map>
#include <vector>
#include <mutex>
#include <iostream>
#include "svdpi.h"
#include "verilated_vpi.h"
#include "VX_config.h"
#ifdef XLEN_64
#define iword_t int64_t
#define uword_t uint64_t
#define idword_t __int128_t
#define udword_t __uint128_t
#else
#define iword_t int32_t
#define uword_t uint32_t
#define idword_t int64_t
#define udword_t uint64_t
#endif
#ifndef DEBUG_LEVEL
#define DEBUG_LEVEL 3
#endif
extern "C" {
void dpi_imul(bool enable, int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth);
void dpi_idiv(bool enable, int a, int b, bool is_signed, int* quotient, int* remainder);
void dpi_imul(bool enable, bool is_signed_a, bool is_signed_b, iword_t a, iword_t b, iword_t* resultl, iword_t* resulth);
void dpi_idiv(bool enable, bool is_signed, iword_t a, iword_t b, iword_t* quotient, iword_t* remainder);
int dpi_register();
void dpi_assert(int inst, bool cond, int delay);
void dpi_trace(const char* format, ...);
void dpi_trace(int level, const char* format, ...);
void dpi_trace_start();
void dpi_trace_stop();
}
@ -37,7 +66,7 @@ public:
void push(int value, bool enable) {
if (!enable)
return;
return;
for (unsigned i = 0; i < depth_-1; ++i) {
buffer_[i] = buffer_[i+1];
}
@ -52,7 +81,7 @@ private:
std::vector<int> buffer_;
bool init_;
unsigned depth_;
unsigned depth_;
};
class Instances {
@ -62,9 +91,9 @@ public:
}
int allocate() {
mutex_.lock();
mutex_.lock();
int inst = instances_.size();
instances_.resize(inst + 1);
instances_.resize(inst + 1);
mutex_.unlock();
return inst;
}
@ -93,51 +122,56 @@ void dpi_assert(int inst, bool cond, int delay) {
}
}
void dpi_imul(bool enable, int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth) {
///////////////////////////////////////////////////////////////////////////////
void dpi_imul(bool enable, bool is_signed_a, bool is_signed_b, iword_t a, iword_t b, iword_t* resultl, iword_t* resulth) {
if (!enable)
return;
uint64_t first = *(uint32_t*)&a;
uint64_t second = *(uint32_t*)&b;
if (is_signed_a && (first & 0x80000000)) {
first |= 0xFFFFFFFF00000000;
udword_t first = *(uword_t*)&a;
udword_t second = *(uword_t*)&b;
udword_t mask = udword_t(-1) << (8 * sizeof(iword_t));
if (is_signed_a && a < 0) {
first |= mask;
}
if (is_signed_b && (second & 0x80000000)) {
second |= 0xFFFFFFFF00000000;
if (is_signed_b && b < 0) {
second |= mask;
}
uint64_t result;
udword_t result;
if (is_signed_a || is_signed_b) {
result = (int64_t)first * (int64_t)second;
result = idword_t(first) * idword_t(second);
} else {
result = first * second;
}
*resultl = result & 0xFFFFFFFF;
*resulth = (result >> 32) & 0xFFFFFFFF;
}
*resultl = iword_t(result);
*resulth = iword_t(result >> (8 * sizeof(iword_t)));
}
void dpi_idiv(bool enable, int a, int b, bool is_signed, int* quotient, int* remainder) {
void dpi_idiv(bool enable, bool is_signed, iword_t a, iword_t b, iword_t* quotient, iword_t* remainder) {
if (!enable)
return;
uint32_t dividen = *(uint32_t*)&a;
uint32_t divisor = *(uint32_t*)&b;
uword_t dividen = a;
uword_t divisor = b;
auto inf_neg = uword_t(1) << (8 * sizeof(iword_t) - 1);
if (is_signed) {
if (b == 0) {
*quotient = -1;
*remainder = dividen;
} else if (dividen == 0x80000000 && divisor == 0xffffffff) {
} else if (dividen == inf_neg && divisor == -1) {
*remainder = 0;
*quotient = dividen;
} else {
*quotient = (int32_t)dividen / (int32_t)divisor;
*remainder = (int32_t)dividen % (int32_t)divisor;
} else {
*quotient = (iword_t)dividen / (iword_t)divisor;
*remainder = (iword_t)dividen % (iword_t)divisor;
}
} else {
} else {
if (b == 0) {
*quotient = -1;
*remainder = dividen;
@ -148,19 +182,23 @@ void dpi_idiv(bool enable, int a, int b, bool is_signed, int* quotient, int* rem
}
}
void dpi_trace(const char* format, ...) {
///////////////////////////////////////////////////////////////////////////////
void dpi_trace(int level, const char* format, ...) {
if (level > DEBUG_LEVEL)
return;
if (!sim_trace_enabled())
return;
va_list va;
va_start(va, format);
va_start(va, format);
vprintf(format, va);
va_end(va);
va_end(va);
}
void dpi_trace_start() {
void dpi_trace_start() {
sim_trace_enable(true);
}
void dpi_trace_stop() {
void dpi_trace_stop() {
sim_trace_enable(false);
}
}

View file

@ -1,14 +1,33 @@
`ifndef UTIL_DPI
`define UTIL_DPI
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import "DPI-C" function void dpi_imul(input logic enable, input int a, input int b, input logic is_signed_a, input logic is_signed_b, output int resultl, output int resulth);
import "DPI-C" function void dpi_idiv(input logic enable, input int a, input int b, input logic is_signed, output int quotient, output int remainder);
`ifndef UTIL_DPI_VH
`define UTIL_DPI_VH
`ifdef XLEN_64
`define INT_TYPE longint
`else
`define INT_TYPE int
`endif
import "DPI-C" function void dpi_imul(input logic enable, input logic is_signed_a, input logic is_signed_b, input `INT_TYPE a, input `INT_TYPE b, output `INT_TYPE resultl, output `INT_TYPE resulth);
import "DPI-C" function void dpi_idiv(input logic enable, input logic is_signed, input `INT_TYPE a, input `INT_TYPE b, output `INT_TYPE quotient, output `INT_TYPE remainder);
import "DPI-C" function int dpi_register();
import "DPI-C" function void dpi_assert(int inst, input logic cond, input int delay);
import "DPI-C" function void dpi_trace(input string format /*verilator sformat*/);
import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/);
import "DPI-C" function void dpi_trace_start();
import "DPI-C" function void dpi_trace_stop();
`endif
`endif

1
hw/rtl/.gitignore vendored
View file

@ -1 +0,0 @@
/VX_user_config.vh

View file

@ -1,235 +0,0 @@
`include "VX_define.vh"
module VX_alu_unit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// Inputs
VX_alu_req_if.slave alu_req_if,
// Outputs
VX_branch_ctl_if.master branch_ctl_if,
VX_commit_if.master alu_commit_if
);
`UNUSED_PARAM (CORE_ID)
reg [`NUM_THREADS-1:0][31:0] alu_result;
wire [`NUM_THREADS-1:0][31:0] add_result;
wire [`NUM_THREADS-1:0][32:0] sub_result;
wire [`NUM_THREADS-1:0][31:0] shr_result;
reg [`NUM_THREADS-1:0][31:0] msc_result;
wire ready_in;
`UNUSED_VAR (alu_req_if.op_mod)
wire is_br_op = `INST_ALU_IS_BR(alu_req_if.op_mod);
wire [`INST_ALU_BITS-1:0] alu_op = `INST_ALU_BITS'(alu_req_if.op_type);
wire [`INST_BR_BITS-1:0] br_op = `INST_BR_BITS'(alu_req_if.op_type);
wire alu_signed = `INST_ALU_SIGNED(alu_op);
wire [1:0] alu_op_class = `INST_ALU_OP_CLASS(alu_op);
wire is_sub = (alu_op == `INST_ALU_SUB);
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.use_PC ? {`NUM_THREADS{alu_req_if.PC}} : alu_in1;
wire [`NUM_THREADS-1:0][31:0] alu_in2_imm = alu_req_if.use_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
wire [`NUM_THREADS-1:0][31:0] alu_in2_less = (alu_req_if.use_imm && ~is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [32:0] sub_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
wire [32:0] sub_in2 = {alu_signed & alu_in2_less[i][31], alu_in2_less[i]};
assign sub_result[i] = sub_in1 - sub_in2;
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [32:0] shr_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
assign shr_result[i] = 32'($signed(shr_in1) >>> alu_in2_imm[i][4:0]);
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
always @(*) begin
case (alu_op)
`INST_ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
`INST_ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
`INST_ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
//`INST_ALU_SLL,
default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
endcase
end
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
always @(*) begin
case (alu_op_class)
2'b00: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
2'b01: alu_result[i] = {31'b0, sub_result[i][32]}; // SLTU, SLT
2'b10: alu_result[i] = is_sub ? sub_result[i][31:0] // SUB
: shr_result[i]; // SRL, SRA
// 2'b11,
default: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL
endcase
end
end
// branch
wire is_jal = is_br_op && (br_op == `INST_BR_JAL || br_op == `INST_BR_JALR);
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : alu_result;
wire [31:0] br_dest = add_result[alu_req_if.tid];
wire [32:0] cmp_result = sub_result[alu_req_if.tid];
wire is_less = cmp_result[32];
wire is_equal = ~(| cmp_result[31:0]);
// output
wire alu_valid_in;
wire alu_ready_in;
wire alu_valid_out;
wire alu_ready_out;
wire [`UUID_BITS-1:0] alu_uuid;
wire [`NW_BITS-1:0] alu_wid;
wire [`NUM_THREADS-1:0] alu_tmask;
wire [31:0] alu_PC;
wire [`NR_BITS-1:0] alu_rd;
wire alu_wb;
wire [`NUM_THREADS-1:0][31:0] alu_data;
wire [`INST_BR_BITS-1:0] br_op_r;
wire [31:0] br_dest_r;
wire is_less_r;
wire is_equal_r;
wire is_br_op_r;
assign alu_ready_in = alu_ready_out || ~alu_valid_out;
VX_pipe_register #(
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `INST_BR_BITS + 1 + 1 + 32),
.RESETW (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (alu_ready_in),
.data_in ({alu_valid_in, alu_req_if.uuid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, is_less, is_equal, br_dest}),
.data_out ({alu_valid_out, alu_uuid, alu_wid, alu_tmask, alu_PC, alu_rd, alu_wb, alu_data, is_br_op_r, br_op_r, is_less_r, is_equal_r, br_dest_r})
);
`UNUSED_VAR (br_op_r)
wire br_neg = `INST_BR_NEG(br_op_r);
wire br_less = `INST_BR_LESS(br_op_r);
wire br_static = `INST_BR_STATIC(br_op_r);
assign branch_ctl_if.valid = alu_valid_out && alu_ready_out && is_br_op_r;
assign branch_ctl_if.taken = ((br_less ? is_less_r : is_equal_r) ^ br_neg) | br_static;
assign branch_ctl_if.wid = alu_wid;
assign branch_ctl_if.dest = br_dest_r;
`ifdef EXT_M_ENABLE
wire mul_valid_in;
wire mul_ready_in;
wire mul_valid_out;
wire mul_ready_out;
wire [`UUID_BITS-1:0] mul_uuid;
wire [`NW_BITS-1:0] mul_wid;
wire [`NUM_THREADS-1:0] mul_tmask;
wire [31:0] mul_PC;
wire [`NR_BITS-1:0] mul_rd;
wire mul_wb;
wire [`NUM_THREADS-1:0][31:0] mul_data;
wire [`INST_MUL_BITS-1:0] mul_op = `INST_MUL_BITS'(alu_req_if.op_type);
VX_muldiv muldiv (
.clk (clk),
.reset (reset),
// Inputs
.alu_op (mul_op),
.uuid_in (alu_req_if.uuid),
.wid_in (alu_req_if.wid),
.tmask_in (alu_req_if.tmask),
.PC_in (alu_req_if.PC),
.rd_in (alu_req_if.rd),
.wb_in (alu_req_if.wb),
.alu_in1 (alu_req_if.rs1_data),
.alu_in2 (alu_req_if.rs2_data),
// Outputs
.wid_out (mul_wid),
.uuid_out (mul_uuid),
.tmask_out (mul_tmask),
.PC_out (mul_PC),
.rd_out (mul_rd),
.wb_out (mul_wb),
.data_out (mul_data),
// handshake
.valid_in (mul_valid_in),
.ready_in (mul_ready_in),
.valid_out (mul_valid_out),
.ready_out (mul_ready_out)
);
wire is_mul_op = `INST_ALU_IS_MUL(alu_req_if.op_mod);
assign ready_in = is_mul_op ? mul_ready_in : alu_ready_in;
assign alu_valid_in = alu_req_if.valid && ~is_mul_op;
assign mul_valid_in = alu_req_if.valid && is_mul_op;
assign alu_commit_if.valid = alu_valid_out || mul_valid_out;
assign alu_commit_if.uuid = alu_valid_out ? alu_uuid : mul_uuid;
assign alu_commit_if.wid = alu_valid_out ? alu_wid : mul_wid;
assign alu_commit_if.tmask = alu_valid_out ? alu_tmask : mul_tmask;
assign alu_commit_if.PC = alu_valid_out ? alu_PC : mul_PC;
assign alu_commit_if.rd = alu_valid_out ? alu_rd : mul_rd;
assign alu_commit_if.wb = alu_valid_out ? alu_wb : mul_wb;
assign alu_commit_if.data = alu_valid_out ? alu_data : mul_data;
assign alu_ready_out = alu_commit_if.ready;
assign mul_ready_out = alu_commit_if.ready & ~alu_valid_out; // ALU takes priority
`else
assign ready_in = alu_ready_in;
assign alu_valid_in = alu_req_if.valid;
assign alu_commit_if.valid = alu_valid_out;
assign alu_commit_if.uuid = alu_uuid;
assign alu_commit_if.wid = alu_wid;
assign alu_commit_if.tmask = alu_tmask;
assign alu_commit_if.PC = alu_PC;
assign alu_commit_if.rd = alu_rd;
assign alu_commit_if.wb = alu_wb;
assign alu_commit_if.data = alu_data;
assign alu_ready_out = alu_commit_if.ready;
`endif
assign alu_commit_if.eop = 1'b1;
// can accept new request?
assign alu_req_if.ready = ready_in;
`ifdef DBG_TRACE_CORE_PIPELINE
always @(posedge clk) begin
if (branch_ctl_if.valid) begin
dpi_trace("%d: core%0d-branch: wid=%0d, PC=%0h, taken=%b, dest=%0h (#%0d)\n",
$time, CORE_ID, branch_ctl_if.wid, alu_commit_if.PC, branch_ctl_if.taken, branch_ctl_if.dest, alu_uuid);
end
end
`endif
endmodule

View file

@ -1,159 +0,0 @@
`include "VX_define.vh"
module VX_cache_arb #(
parameter NUM_REQS = 1,
parameter LANES = 1,
parameter DATA_SIZE = 1,
parameter TAG_IN_WIDTH = 1,
parameter TAG_SEL_IDX = 0,
parameter BUFFERED_REQ = 0,
parameter BUFFERED_RSP = 0,
parameter TYPE = "R",
localparam ADDR_WIDTH = (32-`CLOG2(DATA_SIZE)),
localparam DATA_WIDTH = (8 * DATA_SIZE),
localparam LOG_NUM_REQS = `CLOG2(NUM_REQS),
localparam TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS
) (
input wire clk,
input wire reset,
// input requests
input wire [NUM_REQS-1:0][LANES-1:0] req_valid_in,
input wire [NUM_REQS-1:0][LANES-1:0] req_rw_in,
input wire [NUM_REQS-1:0][LANES-1:0][DATA_SIZE-1:0] req_byteen_in,
input wire [NUM_REQS-1:0][LANES-1:0][ADDR_WIDTH-1:0] req_addr_in,
input wire [NUM_REQS-1:0][LANES-1:0][DATA_WIDTH-1:0] req_data_in,
input wire [NUM_REQS-1:0][LANES-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
output wire [NUM_REQS-1:0][LANES-1:0] req_ready_in,
// output request
output wire [LANES-1:0] req_valid_out,
output wire [LANES-1:0] req_rw_out,
output wire [LANES-1:0][DATA_SIZE-1:0] req_byteen_out,
output wire [LANES-1:0][ADDR_WIDTH-1:0] req_addr_out,
output wire [LANES-1:0][DATA_WIDTH-1:0] req_data_out,
output wire [LANES-1:0][TAG_OUT_WIDTH-1:0] req_tag_out,
input wire [LANES-1:0] req_ready_out,
// input response
input wire rsp_valid_in,
input wire [LANES-1:0] rsp_tmask_in,
input wire [LANES-1:0][DATA_WIDTH-1:0] rsp_data_in,
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
output wire rsp_ready_in,
// output responses
output wire [NUM_REQS-1:0] rsp_valid_out,
output wire [NUM_REQS-1:0][LANES-1:0] rsp_tmask_out,
output wire [NUM_REQS-1:0][LANES-1:0][DATA_WIDTH-1:0] rsp_data_out,
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
input wire [NUM_REQS-1:0] rsp_ready_out
);
localparam REQ_DATAW = TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam RSP_DATAW = LANES * (1 + DATA_WIDTH) + TAG_IN_WIDTH;
if (NUM_REQS > 1) begin
wire [NUM_REQS-1:0][LANES-1:0][REQ_DATAW-1:0] req_data_in_merged;
wire [LANES-1:0][REQ_DATAW-1:0] req_data_out_merged;
for (genvar i = 0; i < NUM_REQS; i++) begin
for (genvar j = 0; j < LANES; ++j) begin
wire [TAG_OUT_WIDTH-1:0] req_tag_in_w;
VX_bits_insert #(
.N (TAG_IN_WIDTH),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
) bits_insert (
.data_in (req_tag_in[i][j]),
.sel_in (LOG_NUM_REQS'(i)),
.data_out (req_tag_in_w)
);
assign req_data_in_merged[i][j] = {req_tag_in_w, req_addr_in[i][j], req_rw_in[i][j], req_byteen_in[i][j], req_data_in[i][j]};
end
end
VX_stream_arbiter #(
.NUM_REQS (NUM_REQS),
.LANES (LANES),
.DATAW (REQ_DATAW),
.BUFFERED (BUFFERED_REQ),
.TYPE (TYPE)
) req_arb (
.clk (clk),
.reset (reset),
.valid_in (req_valid_in),
.data_in (req_data_in_merged),
.ready_in (req_ready_in),
.valid_out (req_valid_out),
.data_out (req_data_out_merged),
.ready_out (req_ready_out)
);
for (genvar i = 0; i < LANES; ++i) begin
assign {req_tag_out[i], req_addr_out[i], req_rw_out[i], req_byteen_out[i], req_data_out[i]} = req_data_out_merged[i];
end
///////////////////////////////////////////////////////////////////////
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out_merged;
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[TAG_SEL_IDX +: LOG_NUM_REQS];
wire [TAG_IN_WIDTH-1:0] rsp_tag_in_w;
VX_bits_remove #(
.N (TAG_OUT_WIDTH),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
) bits_remove (
.data_in (rsp_tag_in),
.data_out (rsp_tag_in_w)
);
VX_stream_demux #(
.NUM_REQS (NUM_REQS),
.LANES (1),
.DATAW (RSP_DATAW),
.BUFFERED (BUFFERED_RSP)
) rsp_demux (
.clk (clk),
.reset (reset),
.sel_in (rsp_sel),
.valid_in (rsp_valid_in),
.data_in ({rsp_tmask_in, rsp_tag_in_w, rsp_data_in}),
.ready_in (rsp_ready_in),
.valid_out (rsp_valid_out),
.data_out (rsp_data_out_merged),
.ready_out (rsp_ready_out)
);
for (genvar i = 0; i < NUM_REQS; i++) begin
assign {rsp_tmask_out[i], rsp_tag_out[i], rsp_data_out[i]} = rsp_data_out_merged[i];
end
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign req_valid_out = req_valid_in;
assign req_tag_out = req_tag_in;
assign req_addr_out = req_addr_in;
assign req_rw_out = req_rw_in;
assign req_byteen_out = req_byteen_in;
assign req_data_out = req_data_in;
assign req_ready_in = req_ready_out;
assign rsp_valid_out = rsp_valid_in;
assign rsp_tmask_out = rsp_tmask_in;
assign rsp_tag_out = rsp_tag_in;
assign rsp_data_out = rsp_data_in;
assign rsp_ready_in = rsp_ready_out;
end
endmodule

View file

@ -1,195 +1,160 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_cluster #(
parameter CLUSTER_ID = 0
) (
`SCOPE_IO_VX_cluster
module VX_cluster import VX_gpu_pkg::*; #(
parameter CLUSTER_ID = 0,
parameter `STRING INSTANCE_ID = ""
) (
`SCOPE_IO_DECL
// Clock
input wire clk,
input wire reset,
input wire clk,
input wire reset,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [`L2_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
output wire [`L2_MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`L2_MEM_DATA_WIDTH-1:0] mem_req_data,
output wire [`L2_MEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
`ifdef PERF_ENABLE
input sysmem_perf_t sysmem_perf,
`endif
// Memory response
input wire mem_rsp_valid,
input wire [`L2_MEM_DATA_WIDTH-1:0] mem_rsp_data,
input wire [`L2_MEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready,
// DCRs
VX_dcr_bus_if.slave dcr_bus_if,
// Memory
VX_mem_bus_if.master mem_bus_if [`L2_MEM_PORTS],
// Status
output wire busy
);
`STATIC_ASSERT((`L2_ENABLE == 0 || `NUM_CORES > 1), ("invalid parameter"))
output wire busy
);
wire [`NUM_CORES-1:0] per_core_mem_req_valid;
wire [`NUM_CORES-1:0] per_core_mem_req_rw;
wire [`NUM_CORES-1:0][`DCACHE_MEM_BYTEEN_WIDTH-1:0] per_core_mem_req_byteen;
wire [`NUM_CORES-1:0][`DCACHE_MEM_ADDR_WIDTH-1:0] per_core_mem_req_addr;
wire [`NUM_CORES-1:0][`DCACHE_MEM_DATA_WIDTH-1:0] per_core_mem_req_data;
wire [`NUM_CORES-1:0][`L1_MEM_TAG_WIDTH-1:0] per_core_mem_req_tag;
wire [`NUM_CORES-1:0] per_core_mem_req_ready;
`ifdef SCOPE
localparam scope_socket = 0;
`SCOPE_IO_SWITCH (`NUM_SOCKETS);
`endif
wire [`NUM_CORES-1:0] per_core_mem_rsp_valid;
wire [`NUM_CORES-1:0][`DCACHE_MEM_DATA_WIDTH-1:0] per_core_mem_rsp_data;
wire [`NUM_CORES-1:0][`L1_MEM_TAG_WIDTH-1:0] per_core_mem_rsp_tag;
wire [`NUM_CORES-1:0] per_core_mem_rsp_ready;
`ifdef PERF_ENABLE
cache_perf_t l2_perf;
sysmem_perf_t sysmem_perf_tmp;
always @(*) begin
sysmem_perf_tmp = sysmem_perf;
sysmem_perf_tmp.l2cache = l2_perf;
end
`endif
wire [`NUM_CORES-1:0] per_core_busy;
`ifdef GBAR_ENABLE
for (genvar i = 0; i < `NUM_CORES; i++) begin
VX_gbar_bus_if per_socket_gbar_bus_if[`NUM_SOCKETS]();
VX_gbar_bus_if gbar_bus_if();
`RESET_RELAY (core_reset);
VX_gbar_arb #(
.NUM_REQS (`NUM_SOCKETS),
.OUT_BUF ((`NUM_SOCKETS > 2) ? 1 : 0) // bgar_unit has no backpressure
) gbar_arb (
.clk (clk),
.reset (reset),
.bus_in_if (per_socket_gbar_bus_if),
.bus_out_if (gbar_bus_if)
);
VX_core #(
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
) core (
`SCOPE_BIND_VX_cluster_core(i)
VX_gbar_unit #(
.INSTANCE_ID (`SFORMATF(("gbar%0d", CLUSTER_ID)))
) gbar_unit (
.clk (clk),
.reset (reset),
.gbar_bus_if (gbar_bus_if)
);
`endif
VX_mem_bus_if #(
.DATA_SIZE (`L1_LINE_SIZE),
.TAG_WIDTH (L1_MEM_ARB_TAG_WIDTH)
) per_socket_mem_bus_if[`NUM_SOCKETS * `L1_MEM_PORTS]();
`RESET_RELAY (l2_reset, reset);
VX_cache_wrap #(
.INSTANCE_ID (`SFORMATF(("%s-l2cache", INSTANCE_ID))),
.CACHE_SIZE (`L2_CACHE_SIZE),
.LINE_SIZE (`L2_LINE_SIZE),
.NUM_BANKS (`L2_NUM_BANKS),
.NUM_WAYS (`L2_NUM_WAYS),
.WORD_SIZE (L2_WORD_SIZE),
.NUM_REQS (L2_NUM_REQS),
.MEM_PORTS (`L2_MEM_PORTS),
.CRSQ_SIZE (`L2_CRSQ_SIZE),
.MSHR_SIZE (`L2_MSHR_SIZE),
.MRSQ_SIZE (`L2_MRSQ_SIZE),
.MREQ_SIZE (`L2_WRITEBACK ? `L2_MSHR_SIZE : `L2_MREQ_SIZE),
.TAG_WIDTH (L2_TAG_WIDTH),
.WRITE_ENABLE (1),
.WRITEBACK (`L2_WRITEBACK),
.DIRTY_BYTES (`L2_DIRTYBYTES),
.REPL_POLICY (`L2_REPL_POLICY),
.UUID_WIDTH (`UUID_WIDTH),
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
.CORE_OUT_BUF (3),
.MEM_OUT_BUF (3),
.NC_ENABLE (1),
.PASSTHRU (!`L2_ENABLED)
) l2cache (
.clk (clk),
.reset (l2_reset),
`ifdef PERF_ENABLE
.cache_perf (l2_perf),
`endif
.core_bus_if (per_socket_mem_bus_if),
.mem_bus_if (mem_bus_if)
);
///////////////////////////////////////////////////////////////////////////
wire [`NUM_SOCKETS-1:0] per_socket_busy;
// Generate all sockets
for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : g_sockets
`RESET_RELAY (socket_reset, reset);
VX_dcr_bus_if socket_dcr_bus_if();
wire is_base_dcr_addr = (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END);
`BUFFER_DCR_BUS_IF (socket_dcr_bus_if, dcr_bus_if, is_base_dcr_addr, (`NUM_SOCKETS > 1))
VX_socket #(
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id),
.INSTANCE_ID (`SFORMATF(("%s-socket%0d", INSTANCE_ID, socket_id)))
) socket (
`SCOPE_IO_BIND (scope_socket+socket_id)
.clk (clk),
.reset (core_reset),
.mem_req_valid (per_core_mem_req_valid[i]),
.mem_req_rw (per_core_mem_req_rw [i]),
.mem_req_byteen (per_core_mem_req_byteen[i]),
.mem_req_addr (per_core_mem_req_addr [i]),
.mem_req_data (per_core_mem_req_data [i]),
.mem_req_tag (per_core_mem_req_tag [i]),
.mem_req_ready (per_core_mem_req_ready[i]),
.mem_rsp_valid (per_core_mem_rsp_valid[i]),
.mem_rsp_data (per_core_mem_rsp_data [i]),
.mem_rsp_tag (per_core_mem_rsp_tag [i]),
.mem_rsp_ready (per_core_mem_rsp_ready[i]),
.busy (per_core_busy [i])
);
end
assign busy = (| per_core_busy);
if (`L2_ENABLE) begin
`ifdef PERF_ENABLE
VX_perf_cache_if perf_l2cache_if();
`endif
`RESET_RELAY (l2_reset);
VX_cache #(
.CACHE_ID (`L2_CACHE_ID),
.CACHE_SIZE (`L2_CACHE_SIZE),
.CACHE_LINE_SIZE (`L2_CACHE_LINE_SIZE),
.NUM_BANKS (`L2_NUM_BANKS),
.NUM_PORTS (`L2_NUM_PORTS),
.WORD_SIZE (`L2_WORD_SIZE),
.NUM_REQS (`L2_NUM_REQS),
.CREQ_SIZE (`L2_CREQ_SIZE),
.CRSQ_SIZE (`L2_CRSQ_SIZE),
.MSHR_SIZE (`L2_MSHR_SIZE),
.MRSQ_SIZE (`L2_MRSQ_SIZE),
.MREQ_SIZE (`L2_MREQ_SIZE),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`L1_MEM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.MEM_TAG_WIDTH (`L2_MEM_TAG_WIDTH),
.NC_ENABLE (1)
) l2cache (
`SCOPE_BIND_VX_cluster_l2cache
.clk (clk),
.reset (l2_reset),
.reset (socket_reset),
`ifdef PERF_ENABLE
.perf_cache_if (perf_l2cache_if),
.sysmem_perf (sysmem_perf_tmp),
`endif
// Core request
.core_req_valid (per_core_mem_req_valid),
.core_req_rw (per_core_mem_req_rw),
.core_req_byteen (per_core_mem_req_byteen),
.core_req_addr (per_core_mem_req_addr),
.core_req_data (per_core_mem_req_data),
.core_req_tag (per_core_mem_req_tag),
.core_req_ready (per_core_mem_req_ready),
.dcr_bus_if (socket_dcr_bus_if),
// Core response
.core_rsp_valid (per_core_mem_rsp_valid),
.core_rsp_data (per_core_mem_rsp_data),
.core_rsp_tag (per_core_mem_rsp_tag),
.core_rsp_ready (per_core_mem_rsp_ready),
`UNUSED_PIN (core_rsp_tmask),
.mem_bus_if (per_socket_mem_bus_if[socket_id * `L1_MEM_PORTS +: `L1_MEM_PORTS]),
// Memory request
.mem_req_valid (mem_req_valid),
.mem_req_rw (mem_req_rw),
.mem_req_byteen (mem_req_byteen),
.mem_req_addr (mem_req_addr),
.mem_req_data (mem_req_data),
.mem_req_tag (mem_req_tag),
.mem_req_ready (mem_req_ready),
// Memory response
.mem_rsp_valid (mem_rsp_valid),
.mem_rsp_tag (mem_rsp_tag),
.mem_rsp_data (mem_rsp_data),
.mem_rsp_ready (mem_rsp_ready)
`ifdef GBAR_ENABLE
.gbar_bus_if (per_socket_gbar_bus_if[socket_id]),
`endif
.busy (per_socket_busy[socket_id])
);
end else begin
`RESET_RELAY (mem_arb_reset);
VX_mem_arb #(
.NUM_REQS (`NUM_CORES),
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
.TAG_IN_WIDTH (`L1_MEM_TAG_WIDTH),
.TYPE ("R"),
.TAG_SEL_IDX (1), // Skip 0 for NC flag
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
) mem_arb (
.clk (clk),
.reset (mem_arb_reset),
// Core request
.req_valid_in (per_core_mem_req_valid),
.req_rw_in (per_core_mem_req_rw),
.req_byteen_in (per_core_mem_req_byteen),
.req_addr_in (per_core_mem_req_addr),
.req_data_in (per_core_mem_req_data),
.req_tag_in (per_core_mem_req_tag),
.req_ready_in (per_core_mem_req_ready),
// Memory request
.req_valid_out (mem_req_valid),
.req_rw_out (mem_req_rw),
.req_byteen_out (mem_req_byteen),
.req_addr_out (mem_req_addr),
.req_data_out (mem_req_data),
.req_tag_out (mem_req_tag),
.req_ready_out (mem_req_ready),
// Core response
.rsp_valid_out (per_core_mem_rsp_valid),
.rsp_data_out (per_core_mem_rsp_data),
.rsp_tag_out (per_core_mem_rsp_tag),
.rsp_ready_out (per_core_mem_rsp_ready),
// Memory response
.rsp_valid_in (mem_rsp_valid),
.rsp_tag_in (mem_rsp_tag),
.rsp_data_in (mem_rsp_data),
.rsp_ready_in (mem_rsp_ready)
);
end
`BUFFER_EX(busy, (| per_socket_busy), 1'b1, 1, (`NUM_SOCKETS > 1));
endmodule

View file

@ -1,138 +0,0 @@
`include "VX_define.vh"
module VX_commit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// inputs
VX_commit_if.slave alu_commit_if,
VX_commit_if.slave ld_commit_if,
VX_commit_if.slave st_commit_if,
VX_commit_if.slave csr_commit_if,
`ifdef EXT_F_ENABLE
VX_commit_if.slave fpu_commit_if,
`endif
VX_commit_if.slave gpu_commit_if,
// outputs
VX_writeback_if.master writeback_if,
VX_cmt_to_csr_if.master cmt_to_csr_if
);
// CSRs update
wire alu_commit_fire = alu_commit_if.valid && alu_commit_if.ready;
wire ld_commit_fire = ld_commit_if.valid && ld_commit_if.ready;
wire st_commit_fire = st_commit_if.valid && st_commit_if.ready;
wire csr_commit_fire = csr_commit_if.valid && csr_commit_if.ready;
`ifdef EXT_F_ENABLE
wire fpu_commit_fire = fpu_commit_if.valid && fpu_commit_if.ready;
`endif
wire gpu_commit_fire = gpu_commit_if.valid && gpu_commit_if.ready;
wire commit_fire = alu_commit_fire
|| ld_commit_fire
|| st_commit_fire
|| csr_commit_fire
`ifdef EXT_F_ENABLE
|| fpu_commit_fire
`endif
|| gpu_commit_fire;
`ifdef EXT_F_ENABLE
wire [(6*`NUM_THREADS)-1:0] commit_tmask;
`else
wire [(5*`NUM_THREADS)-1:0] commit_tmask;
`endif
wire [$clog2($bits(commit_tmask)+1)-1:0] commit_size;
assign commit_tmask = {
{`NUM_THREADS{alu_commit_fire}} & alu_commit_if.tmask,
{`NUM_THREADS{ld_commit_fire}} & ld_commit_if.tmask,
{`NUM_THREADS{st_commit_fire}} & st_commit_if.tmask,
{`NUM_THREADS{csr_commit_fire}} & csr_commit_if.tmask,
`ifdef EXT_F_ENABLE
{`NUM_THREADS{fpu_commit_fire}} & fpu_commit_if.tmask,
`endif
{`NUM_THREADS{gpu_commit_fire}} & gpu_commit_if.tmask
};
`POP_COUNT(commit_size, commit_tmask);
VX_pipe_register #(
.DATAW (1 + $bits(commit_size)),
.RESETW (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (1'b1),
.data_in ({commit_fire, commit_size}),
.data_out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size})
);
// Writeback
VX_writeback #(
.CORE_ID(CORE_ID)
) writeback (
.clk (clk),
.reset (reset),
.alu_commit_if (alu_commit_if),
.ld_commit_if (ld_commit_if),
.csr_commit_if (csr_commit_if),
`ifdef EXT_F_ENABLE
.fpu_commit_if (fpu_commit_if),
`endif
.gpu_commit_if (gpu_commit_if),
.writeback_if (writeback_if)
);
// store and gpu commits don't writeback
assign st_commit_if.ready = 1'b1;
`ifdef DBG_TRACE_CORE_PIPELINE
always @(posedge clk) begin
if (alu_commit_if.valid && alu_commit_if.ready) begin
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=ALU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, alu_commit_if.wid, alu_commit_if.PC, alu_commit_if.tmask, alu_commit_if.wb, alu_commit_if.rd);
`TRACE_ARRAY1D(alu_commit_if.data, `NUM_THREADS);
dpi_trace(" (#%0d)\n", alu_commit_if.uuid);
end
if (ld_commit_if.valid && ld_commit_if.ready) begin
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, ld_commit_if.wid, ld_commit_if.PC, ld_commit_if.tmask, ld_commit_if.wb, ld_commit_if.rd);
`TRACE_ARRAY1D(ld_commit_if.data, `NUM_THREADS);
dpi_trace(" (#%0d)\n", ld_commit_if.uuid);
end
if (st_commit_if.valid && st_commit_if.ready) begin
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d (#%0d)\n", $time, CORE_ID, st_commit_if.wid, st_commit_if.PC, st_commit_if.tmask, st_commit_if.wb, st_commit_if.rd, st_commit_if.uuid);
end
if (csr_commit_if.valid && csr_commit_if.ready) begin
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=CSR, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.wb, csr_commit_if.rd);
`TRACE_ARRAY1D(csr_commit_if.data, `NUM_THREADS);
dpi_trace(" (#%0d)\n", csr_commit_if.uuid);
end
`ifdef EXT_F_ENABLE
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.wb, fpu_commit_if.rd);
`TRACE_ARRAY1D(fpu_commit_if.data, `NUM_THREADS);
dpi_trace(" (#%0d)\n", fpu_commit_if.uuid);
end
`endif
if (gpu_commit_if.valid && gpu_commit_if.ready) begin
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=GPU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, gpu_commit_if.wid, gpu_commit_if.PC, gpu_commit_if.tmask, gpu_commit_if.wb, gpu_commit_if.rd);
`TRACE_ARRAY1D(gpu_commit_if.data, `NUM_THREADS);
dpi_trace(" (#%0d)\n", gpu_commit_if.uuid);
end
end
`endif
endmodule

File diff suppressed because it is too large Load diff

View file

@ -1,156 +0,0 @@
`include "VX_define.vh"
module VX_core #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_core
// Clock
input wire clk,
input wire reset,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [`DCACHE_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
output wire [`DCACHE_MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`DCACHE_MEM_DATA_WIDTH-1:0] mem_req_data,
output wire [`L1_MEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
// Memory reponse
input wire mem_rsp_valid,
input wire [`DCACHE_MEM_DATA_WIDTH-1:0] mem_rsp_data,
input wire [`L1_MEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready,
// Status
output wire busy
);
`ifdef PERF_ENABLE
VX_perf_memsys_if perf_memsys_if();
`endif
VX_mem_req_if #(
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
.TAG_WIDTH (`L1_MEM_TAG_WIDTH)
) mem_req_if();
VX_mem_rsp_if #(
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
.TAG_WIDTH (`L1_MEM_TAG_WIDTH)
) mem_rsp_if();
assign mem_req_valid = mem_req_if.valid;
assign mem_req_rw = mem_req_if.rw;
assign mem_req_byteen= mem_req_if.byteen;
assign mem_req_addr = mem_req_if.addr;
assign mem_req_data = mem_req_if.data;
assign mem_req_tag = mem_req_if.tag;
assign mem_req_if.ready = mem_req_ready;
assign mem_rsp_if.valid = mem_rsp_valid;
assign mem_rsp_if.data = mem_rsp_data;
assign mem_rsp_if.tag = mem_rsp_tag;
assign mem_rsp_ready = mem_rsp_if.ready;
//--
VX_dcache_req_if #(
.NUM_REQS (`DCACHE_NUM_REQS),
.WORD_SIZE (`DCACHE_WORD_SIZE),
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
) dcache_req_if();
VX_dcache_rsp_if #(
.NUM_REQS (`DCACHE_NUM_REQS),
.WORD_SIZE (`DCACHE_WORD_SIZE),
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH)
) dcache_rsp_if();
VX_icache_req_if #(
.WORD_SIZE (`ICACHE_WORD_SIZE),
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
) icache_req_if();
VX_icache_rsp_if #(
.WORD_SIZE (`ICACHE_WORD_SIZE),
.TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH)
) icache_rsp_if();
VX_pipeline #(
.CORE_ID(CORE_ID)
) pipeline (
`SCOPE_BIND_VX_core_pipeline
`ifdef PERF_ENABLE
.perf_memsys_if (perf_memsys_if),
`endif
.clk(clk),
.reset(reset),
// Dcache core request
.dcache_req_valid (dcache_req_if.valid),
.dcache_req_rw (dcache_req_if.rw),
.dcache_req_byteen (dcache_req_if.byteen),
.dcache_req_addr (dcache_req_if.addr),
.dcache_req_data (dcache_req_if.data),
.dcache_req_tag (dcache_req_if.tag),
.dcache_req_ready (dcache_req_if.ready),
// Dcache core reponse
.dcache_rsp_valid (dcache_rsp_if.valid),
.dcache_rsp_tmask (dcache_rsp_if.tmask),
.dcache_rsp_data (dcache_rsp_if.data),
.dcache_rsp_tag (dcache_rsp_if.tag),
.dcache_rsp_ready (dcache_rsp_if.ready),
// Icache core request
.icache_req_valid (icache_req_if.valid),
.icache_req_addr (icache_req_if.addr),
.icache_req_tag (icache_req_if.tag),
.icache_req_ready (icache_req_if.ready),
// Icache core reponse
.icache_rsp_valid (icache_rsp_if.valid),
.icache_rsp_data (icache_rsp_if.data),
.icache_rsp_tag (icache_rsp_if.tag),
.icache_rsp_ready (icache_rsp_if.ready),
// Status
.busy(busy)
);
//--
VX_mem_unit #(
.CORE_ID(CORE_ID)
) mem_unit (
`SCOPE_BIND_VX_core_mem_unit
`ifdef PERF_ENABLE
.perf_memsys_if (perf_memsys_if),
`endif
.clk (clk),
.reset (reset),
// Core <-> Dcache
.dcache_req_if (dcache_req_if),
.dcache_rsp_if (dcache_rsp_if),
// Core <-> Icache
.icache_req_if (icache_req_if),
.icache_rsp_if (icache_rsp_if),
// Memory
.mem_req_if (mem_req_if),
.mem_rsp_if (mem_rsp_if)
);
endmodule

View file

@ -1,265 +0,0 @@
`include "VX_define.vh"
module VX_csr_data #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
`ifdef EXT_TEX_ENABLE
VX_perf_tex_if.slave perf_tex_if,
`endif
VX_perf_memsys_if.slave perf_memsys_if,
VX_perf_pipeline_if.slave perf_pipeline_if,
`endif
VX_cmt_to_csr_if.slave cmt_to_csr_if,
VX_fetch_to_csr_if.slave fetch_to_csr_if,
`ifdef EXT_F_ENABLE
VX_fpu_to_csr_if.slave fpu_to_csr_if,
`endif
`ifdef EXT_TEX_ENABLE
VX_tex_csr_if.master tex_csr_if,
`endif
input wire read_enable,
input wire [`UUID_BITS-1:0] read_uuid,
input wire[`CSR_ADDR_BITS-1:0] read_addr,
input wire[`NW_BITS-1:0] read_wid,
output wire[31:0] read_data,
input wire write_enable,
input wire [`UUID_BITS-1:0] write_uuid,
input wire[`CSR_ADDR_BITS-1:0] write_addr,
input wire[`NW_BITS-1:0] write_wid,
input wire[31:0] write_data,
input wire busy
);
import fpu_types::*;
reg [`CSR_WIDTH-1:0] csr_satp;
reg [`CSR_WIDTH-1:0] csr_mstatus;
reg [`CSR_WIDTH-1:0] csr_medeleg;
reg [`CSR_WIDTH-1:0] csr_mideleg;
reg [`CSR_WIDTH-1:0] csr_mie;
reg [`CSR_WIDTH-1:0] csr_mtvec;
reg [`CSR_WIDTH-1:0] csr_mepc;
reg [`CSR_WIDTH-1:0] csr_pmpcfg [0:0];
reg [`CSR_WIDTH-1:0] csr_pmpaddr [0:0];
reg [63:0] csr_cycle;
reg [63:0] csr_instret;
reg [`NUM_WARPS-1:0][`INST_FRM_BITS+`FFLAGS_BITS-1:0] fcsr;
always @(posedge clk) begin
if (reset) begin
fcsr <= '0;
end else begin
`ifdef EXT_F_ENABLE
if (fpu_to_csr_if.write_enable) begin
fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0]
| fpu_to_csr_if.write_fflags;
end
`endif
if (write_enable) begin
case (write_addr)
`CSR_FFLAGS: fcsr[write_wid][`FFLAGS_BITS-1:0] <= write_data[`FFLAGS_BITS-1:0];
`CSR_FRM: fcsr[write_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS] <= write_data[`INST_FRM_BITS-1:0];
`CSR_FCSR: fcsr[write_wid] <= write_data[`FFLAGS_BITS+`INST_FRM_BITS-1:0];
`CSR_SATP: csr_satp <= write_data[`CSR_WIDTH-1:0];
`CSR_MSTATUS: csr_mstatus <= write_data[`CSR_WIDTH-1:0];
`CSR_MEDELEG: csr_medeleg <= write_data[`CSR_WIDTH-1:0];
`CSR_MIDELEG: csr_mideleg <= write_data[`CSR_WIDTH-1:0];
`CSR_MIE: csr_mie <= write_data[`CSR_WIDTH-1:0];
`CSR_MTVEC: csr_mtvec <= write_data[`CSR_WIDTH-1:0];
`CSR_MEPC: csr_mepc <= write_data[`CSR_WIDTH-1:0];
`CSR_PMPCFG0: csr_pmpcfg[0] <= write_data[`CSR_WIDTH-1:0];
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data[`CSR_WIDTH-1:0];
default: begin
`ifdef EXT_TEX_ENABLE
`ASSERT((write_addr == `CSR_TEX_UNIT)
|| (write_addr >= `CSR_TEX_STATE_BEGIN
&& write_addr < `CSR_TEX_STATE_END),
("%t: *** invalid CSR write address: %0h (#%0d)", $time, write_addr, write_uuid));
`else
`ASSERT(~write_enable, ("%t: *** invalid CSR write address: %0h (#%0d)", $time, write_addr, write_uuid));
`endif
end
endcase
end
end
end
`UNUSED_VAR (write_data)
// TEX CSRs
`ifdef EXT_TEX_ENABLE
assign tex_csr_if.write_enable = write_enable;
assign tex_csr_if.write_addr = write_addr;
assign tex_csr_if.write_data = write_data;
assign tex_csr_if.write_uuid = write_uuid;
`endif
always @(posedge clk) begin
if (reset) begin
csr_cycle <= 0;
csr_instret <= 0;
end else begin
if (busy) begin
csr_cycle <= csr_cycle + 1;
end
if (cmt_to_csr_if.valid) begin
csr_instret <= csr_instret + 64'(cmt_to_csr_if.commit_size);
end
end
end
reg [31:0] read_data_r;
reg read_addr_valid_r;
always @(*) begin
read_data_r = 'x;
read_addr_valid_r = 1;
case (read_addr)
`CSR_FFLAGS : read_data_r = 32'(fcsr[read_wid][`FFLAGS_BITS-1:0]);
`CSR_FRM : read_data_r = 32'(fcsr[read_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS]);
`CSR_FCSR : read_data_r = 32'(fcsr[read_wid]);
`CSR_WTID ,
`CSR_LTID ,
`CSR_LWID : read_data_r = 32'(read_wid);
`CSR_GTID ,
/*`CSR_MHARTID ,*/
`CSR_GWID : read_data_r = CORE_ID * `NUM_WARPS + 32'(read_wid);
`CSR_GCID : read_data_r = CORE_ID;
`CSR_TMASK : read_data_r = 32'(fetch_to_csr_if.thread_masks[read_wid]);
`CSR_NT : read_data_r = `NUM_THREADS;
`CSR_NW : read_data_r = `NUM_WARPS;
`CSR_NC : read_data_r = `NUM_CORES * `NUM_CLUSTERS;
`CSR_MCYCLE : read_data_r = csr_cycle[31:0];
`CSR_MCYCLE_H : read_data_r = 32'(csr_cycle[`PERF_CTR_BITS-1:32]);
`CSR_MINSTRET : read_data_r = csr_instret[31:0];
`CSR_MINSTRET_H : read_data_r = 32'(csr_instret[`PERF_CTR_BITS-1:32]);
`ifdef PERF_ENABLE
// PERF: pipeline
`CSR_MPM_IBUF_ST : read_data_r = perf_pipeline_if.ibf_stalls[31:0];
`CSR_MPM_IBUF_ST_H : read_data_r = 32'(perf_pipeline_if.ibf_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_SCRB_ST : read_data_r = perf_pipeline_if.scb_stalls[31:0];
`CSR_MPM_SCRB_ST_H : read_data_r = 32'(perf_pipeline_if.scb_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ALU_ST : read_data_r = perf_pipeline_if.alu_stalls[31:0];
`CSR_MPM_ALU_ST_H : read_data_r = 32'(perf_pipeline_if.alu_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_LSU_ST : read_data_r = perf_pipeline_if.lsu_stalls[31:0];
`CSR_MPM_LSU_ST_H : read_data_r = 32'(perf_pipeline_if.lsu_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_CSR_ST : read_data_r = perf_pipeline_if.csr_stalls[31:0];
`CSR_MPM_CSR_ST_H : read_data_r = 32'(perf_pipeline_if.csr_stalls[`PERF_CTR_BITS-1:32]);
`ifdef EXT_F_ENABLE
`CSR_MPM_FPU_ST : read_data_r = perf_pipeline_if.fpu_stalls[31:0];
`CSR_MPM_FPU_ST_H : read_data_r = 32'(perf_pipeline_if.fpu_stalls[`PERF_CTR_BITS-1:32]);
`else
`CSR_MPM_FPU_ST : read_data_r = '0;
`CSR_MPM_FPU_ST_H : read_data_r = '0;
`endif
`CSR_MPM_GPU_ST : read_data_r = perf_pipeline_if.gpu_stalls[31:0];
`CSR_MPM_GPU_ST_H : read_data_r = 32'(perf_pipeline_if.gpu_stalls[`PERF_CTR_BITS-1:32]);
// PERF: decode
`CSR_MPM_LOADS : read_data_r = perf_pipeline_if.loads[31:0];
`CSR_MPM_LOADS_H : read_data_r = 32'(perf_pipeline_if.loads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_STORES : read_data_r = perf_pipeline_if.stores[31:0];
`CSR_MPM_STORES_H : read_data_r = 32'(perf_pipeline_if.stores[`PERF_CTR_BITS-1:32]);
`CSR_MPM_BRANCHES : read_data_r = perf_pipeline_if.branches[31:0];
`CSR_MPM_BRANCHES_H : read_data_r = 32'(perf_pipeline_if.branches[`PERF_CTR_BITS-1:32]);
// PERF: icache
`CSR_MPM_ICACHE_READS : read_data_r = perf_memsys_if.icache_reads[31:0];
`CSR_MPM_ICACHE_READS_H : read_data_r = 32'(perf_memsys_if.icache_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ICACHE_MISS_R : read_data_r = perf_memsys_if.icache_read_misses[31:0];
`CSR_MPM_ICACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.icache_read_misses[`PERF_CTR_BITS-1:32]);
// PERF: dcache
`CSR_MPM_DCACHE_READS : read_data_r = perf_memsys_if.dcache_reads[31:0];
`CSR_MPM_DCACHE_READS_H : read_data_r = 32'(perf_memsys_if.dcache_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_WRITES : read_data_r = perf_memsys_if.dcache_writes[31:0];
`CSR_MPM_DCACHE_WRITES_H : read_data_r = 32'(perf_memsys_if.dcache_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_MISS_R : read_data_r = perf_memsys_if.dcache_read_misses[31:0];
`CSR_MPM_DCACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.dcache_read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_MISS_W : read_data_r = perf_memsys_if.dcache_write_misses[31:0];
`CSR_MPM_DCACHE_MISS_W_H : read_data_r = 32'(perf_memsys_if.dcache_write_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_BANK_ST : read_data_r = perf_memsys_if.dcache_bank_stalls[31:0];
`CSR_MPM_DCACHE_BANK_ST_H : read_data_r = 32'(perf_memsys_if.dcache_bank_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_MSHR_ST : read_data_r = perf_memsys_if.dcache_mshr_stalls[31:0];
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_r = 32'(perf_memsys_if.dcache_mshr_stalls[`PERF_CTR_BITS-1:32]);
// PERF: smem
`CSR_MPM_SMEM_READS : read_data_r = perf_memsys_if.smem_reads[31:0];
`CSR_MPM_SMEM_READS_H : read_data_r = 32'(perf_memsys_if.smem_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_SMEM_WRITES : read_data_r = perf_memsys_if.smem_writes[31:0];
`CSR_MPM_SMEM_WRITES_H : read_data_r = 32'(perf_memsys_if.smem_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_SMEM_BANK_ST : read_data_r = perf_memsys_if.smem_bank_stalls[31:0];
`CSR_MPM_SMEM_BANK_ST_H : read_data_r = 32'(perf_memsys_if.smem_bank_stalls[`PERF_CTR_BITS-1:32]);
// PERF: memory
`CSR_MPM_MEM_READS : read_data_r = perf_memsys_if.mem_reads[31:0];
`CSR_MPM_MEM_READS_H : read_data_r = 32'(perf_memsys_if.mem_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_MEM_WRITES : read_data_r = perf_memsys_if.mem_writes[31:0];
`CSR_MPM_MEM_WRITES_H : read_data_r = 32'(perf_memsys_if.mem_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_MEM_LAT : read_data_r = perf_memsys_if.mem_latency[31:0];
`CSR_MPM_MEM_LAT_H : read_data_r = 32'(perf_memsys_if.mem_latency[`PERF_CTR_BITS-1:32]);
`ifdef EXT_TEX_ENABLE
// PERF: texunit
`CSR_MPM_TEX_READS : read_data_r = perf_tex_if.mem_reads[31:0];
`CSR_MPM_TEX_READS_H : read_data_r = 32'(perf_tex_if.mem_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_TEX_LAT : read_data_r = perf_tex_if.mem_latency[31:0];
`CSR_MPM_TEX_LAT_H : read_data_r = 32'(perf_tex_if.mem_latency[`PERF_CTR_BITS-1:32]);
`endif
// PERF: reserved
`CSR_MPM_RESERVED : read_data_r = '0;
`CSR_MPM_RESERVED_H : read_data_r = '0;
`endif
`CSR_SATP : read_data_r = 32'(csr_satp);
`CSR_MSTATUS : read_data_r = 32'(csr_mstatus);
`CSR_MISA : read_data_r = `ISA_CODE;
`CSR_MEDELEG : read_data_r = 32'(csr_medeleg);
`CSR_MIDELEG : read_data_r = 32'(csr_mideleg);
`CSR_MIE : read_data_r = 32'(csr_mie);
`CSR_MTVEC : read_data_r = 32'(csr_mtvec);
`CSR_MEPC : read_data_r = 32'(csr_mepc);
`CSR_PMPCFG0 : read_data_r = 32'(csr_pmpcfg[0]);
`CSR_PMPADDR0 : read_data_r = 32'(csr_pmpaddr[0]);
`CSR_MVENDORID : read_data_r = `VENDOR_ID;
`CSR_MARCHID : read_data_r = `ARCHITECTURE_ID;
`CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID;
default: begin
if ((read_addr >= `CSR_MPM_BASE && read_addr < (`CSR_MPM_BASE + 32))
|| (read_addr >= `CSR_MPM_BASE_H && read_addr < (`CSR_MPM_BASE_H + 32))) begin
read_addr_valid_r = 1;
end else
`ifdef EXT_TEX_ENABLE
if ((read_addr == `CSR_TEX_UNIT)
|| (read_addr >= `CSR_TEX_STATE_BEGIN
&& read_addr < `CSR_TEX_STATE_END)) begin
read_addr_valid_r = 1;
end else
`endif
read_addr_valid_r = 0;
end
endcase
end
`RUNTIME_ASSERT(~read_enable || read_addr_valid_r, ("%t: *** invalid CSR read address: %0h (#%0d)", $time, read_addr, read_uuid))
assign read_data = read_data_r;
`ifdef EXT_F_ENABLE
assign fpu_to_csr_if.read_frm = fcsr[fpu_to_csr_if.read_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS];
`endif
endmodule

View file

@ -1,151 +0,0 @@
`include "VX_define.vh"
module VX_csr_unit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
`ifdef EXT_TEX_ENABLE
VX_perf_tex_if.slave perf_tex_if,
`endif
VX_perf_memsys_if.slave perf_memsys_if,
VX_perf_pipeline_if.slave perf_pipeline_if,
`endif
VX_cmt_to_csr_if.slave cmt_to_csr_if,
VX_fetch_to_csr_if.slave fetch_to_csr_if,
VX_csr_req_if.slave csr_req_if,
VX_commit_if.master csr_commit_if,
`ifdef EXT_F_ENABLE
VX_fpu_to_csr_if.slave fpu_to_csr_if,
input wire[`NUM_WARPS-1:0] fpu_pending,
`endif
`ifdef EXT_TEX_ENABLE
VX_tex_csr_if.master tex_csr_if,
`endif
output wire[`NUM_WARPS-1:0] pending,
input wire busy
);
wire csr_we_s1;
wire [`CSR_ADDR_BITS-1:0] csr_addr_s1;
wire [31:0] csr_read_data;
wire [31:0] csr_read_data_s1;
wire [31:0] csr_updated_data_s1;
wire write_enable = csr_commit_if.valid && csr_we_s1;
wire [31:0] csr_req_data = csr_req_if.use_imm ? 32'(csr_req_if.imm) : csr_req_if.rs1_data;
VX_csr_data #(
.CORE_ID(CORE_ID)
) csr_data (
.clk (clk),
.reset (reset),
`ifdef PERF_ENABLE
`ifdef EXT_TEX_ENABLE
.perf_tex_if (perf_tex_if),
`endif
.perf_memsys_if (perf_memsys_if),
.perf_pipeline_if(perf_pipeline_if),
`endif
.cmt_to_csr_if (cmt_to_csr_if),
.fetch_to_csr_if(fetch_to_csr_if),
`ifdef EXT_F_ENABLE
.fpu_to_csr_if (fpu_to_csr_if),
`endif
`ifdef EXT_TEX_ENABLE
.tex_csr_if (tex_csr_if),
`endif
.read_enable (csr_req_if.valid),
.read_uuid (csr_req_if.uuid),
.read_addr (csr_req_if.addr),
.read_wid (csr_req_if.wid),
.read_data (csr_read_data),
.write_enable (write_enable),
.write_uuid (csr_commit_if.uuid),
.write_addr (csr_addr_s1),
.write_wid (csr_commit_if.wid),
.write_data (csr_updated_data_s1),
.busy (busy)
);
wire write_hazard = (csr_addr_s1 == csr_req_if.addr)
&& (csr_commit_if.wid == csr_req_if.wid)
&& csr_commit_if.valid;
wire [31:0] csr_read_data_qual = write_hazard ? csr_updated_data_s1 : csr_read_data;
reg [31:0] csr_updated_data;
reg csr_we_s0_unqual;
always @(*) begin
csr_we_s0_unqual = (csr_req_data != 0);
case (csr_req_if.op_type)
`INST_CSR_RW: begin
csr_updated_data = csr_req_data;
csr_we_s0_unqual = 1;
end
`INST_CSR_RS: begin
csr_updated_data = csr_read_data_qual | csr_req_data;
end
//`INST_CSR_RC
default: begin
csr_updated_data = csr_read_data_qual & ~csr_req_data;
end
endcase
end
`ifdef EXT_F_ENABLE
wire stall_in = fpu_pending[csr_req_if.wid];
`else
wire stall_in = 0;
`endif
wire csr_req_valid = csr_req_if.valid && !stall_in;
wire stall_out = ~csr_commit_if.ready && csr_commit_if.valid;
VX_pipe_register #(
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 32 + 32),
.RESETW (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (!stall_out),
.data_in ({csr_req_valid, csr_req_if.uuid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.rd, csr_req_if.wb, csr_we_s0_unqual, csr_req_if.addr, csr_read_data_qual, csr_updated_data}),
.data_out ({csr_commit_if.valid, csr_commit_if.uuid, csr_commit_if.wid, csr_commit_if.tmask, csr_commit_if.PC, csr_commit_if.rd, csr_commit_if.wb, csr_we_s1, csr_addr_s1, csr_read_data_s1, csr_updated_data_s1})
);
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign csr_commit_if.data[i] = (csr_addr_s1 == `CSR_WTID) ? i :
(csr_addr_s1 == `CSR_LTID
|| csr_addr_s1 == `CSR_GTID) ? (csr_read_data_s1 * `NUM_THREADS + i) :
csr_read_data_s1;
end
assign csr_commit_if.eop = 1'b1;
// can accept new request?
assign csr_req_if.ready = ~(stall_out || stall_in);
// pending request
reg [`NUM_WARPS-1:0] pending_r;
always @(posedge clk) begin
if (reset) begin
pending_r <= 0;
end else begin
if (csr_commit_if.valid && csr_commit_if.ready) begin
pending_r[csr_commit_if.wid] <= 0;
end
if (csr_req_if.valid && csr_req_if.ready) begin
pending_r[csr_req_if.wid] <= 1;
end
end
end
assign pending = pending_r;
endmodule

View file

@ -1,495 +0,0 @@
`include "VX_define.vh"
`ifdef DBG_TRACE_CORE_PIPELINE
`include "VX_trace_instr.vh"
`endif
`ifdef EXT_F_ENABLE
`define USED_IREG(r) \
r``_r = {1'b0, ``r}
`define USED_FREG(r) \
r``_r = {1'b1, ``r}
`else
`define USED_IREG(r) \
r``_r = ``r
`endif
module VX_decode #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
VX_perf_pipeline_if.decode perf_decode_if,
`endif
// inputs
VX_ifetch_rsp_if.slave ifetch_rsp_if,
// outputs
VX_decode_if.master decode_if,
VX_wstall_if.master wstall_if,
VX_join_if.master join_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
reg [`EX_BITS-1:0] ex_type;
reg [`INST_OP_BITS-1:0] op_type;
reg [`INST_MOD_BITS-1:0] op_mod;
reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r;
reg [31:0] imm;
reg use_rd, use_PC, use_imm;
reg is_join, is_wstall;
wire [31:0] instr = ifetch_rsp_if.data;
wire [6:0] opcode = instr[6:0];
wire [1:0] func2 = instr[26:25];
wire [2:0] func3 = instr[14:12];
wire [6:0] func7 = instr[31:25];
wire [11:0] u_12 = instr[31:20];
wire [4:0] rd = instr[11:7];
wire [4:0] rs1 = instr[19:15];
wire [4:0] rs2 = instr[24:20];
wire [4:0] rs3 = instr[31:27];
wire [19:0] upper_imm = {func7, rs2, rs1, func3};
wire [11:0] alu_imm = (func3[0] && ~func3[1]) ? {{7{1'b0}}, rs2} : u_12;
wire [11:0] s_imm = {func7, rd};
wire [12:0] b_imm = {instr[31], instr[7], instr[30:25], instr[11:8], 1'b0};
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
`UNUSED_VAR (rs3)
always @(*) begin
ex_type = 0;
op_type = 'x;
op_mod = 0;
rd_r = 0;
rs1_r = 0;
rs2_r = 0;
rs3_r = 0;
imm = 'x;
use_imm = 0;
use_PC = 0;
use_rd = 0;
is_join = 0;
is_wstall = 0;
case (opcode)
`INST_I: begin
ex_type = `EX_ALU;
case (func3)
3'h0: op_type = `INST_OP_BITS'(`INST_ALU_ADD);
3'h1: op_type = `INST_OP_BITS'(`INST_ALU_SLL);
3'h2: op_type = `INST_OP_BITS'(`INST_ALU_SLT);
3'h3: op_type = `INST_OP_BITS'(`INST_ALU_SLTU);
3'h4: op_type = `INST_OP_BITS'(`INST_ALU_XOR);
3'h5: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SRA) : `INST_OP_BITS'(`INST_ALU_SRL);
3'h6: op_type = `INST_OP_BITS'(`INST_ALU_OR);
3'h7: op_type = `INST_OP_BITS'(`INST_ALU_AND);
default:;
endcase
use_rd = 1;
use_imm = 1;
imm = {{20{alu_imm[11]}}, alu_imm};
`USED_IREG (rd);
`USED_IREG (rs1);
end
`INST_R: begin
ex_type = `EX_ALU;
`ifdef EXT_F_ENABLE
if (func7[0]) begin
case (func3)
3'h0: op_type = `INST_OP_BITS'(`INST_MUL_MUL);
3'h1: op_type = `INST_OP_BITS'(`INST_MUL_MULH);
3'h2: op_type = `INST_OP_BITS'(`INST_MUL_MULHSU);
3'h3: op_type = `INST_OP_BITS'(`INST_MUL_MULHU);
3'h4: op_type = `INST_OP_BITS'(`INST_MUL_DIV);
3'h5: op_type = `INST_OP_BITS'(`INST_MUL_DIVU);
3'h6: op_type = `INST_OP_BITS'(`INST_MUL_REM);
3'h7: op_type = `INST_OP_BITS'(`INST_MUL_REMU);
default:;
endcase
op_mod = 2;
end else
`endif
begin
case (func3)
3'h0: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SUB) : `INST_OP_BITS'(`INST_ALU_ADD);
3'h1: op_type = `INST_OP_BITS'(`INST_ALU_SLL);
3'h2: op_type = `INST_OP_BITS'(`INST_ALU_SLT);
3'h3: op_type = `INST_OP_BITS'(`INST_ALU_SLTU);
3'h4: op_type = `INST_OP_BITS'(`INST_ALU_XOR);
3'h5: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SRA) : `INST_OP_BITS'(`INST_ALU_SRL);
3'h6: op_type = `INST_OP_BITS'(`INST_ALU_OR);
3'h7: op_type = `INST_OP_BITS'(`INST_ALU_AND);
default:;
endcase
end
use_rd = 1;
`USED_IREG (rd);
`USED_IREG (rs1);
`USED_IREG (rs2);
end
`INST_LUI: begin
ex_type = `EX_ALU;
op_type = `INST_OP_BITS'(`INST_ALU_LUI);
use_rd = 1;
use_imm = 1;
imm = {upper_imm, 12'(0)};
`USED_IREG (rd);
rs1_r = 0;
end
`INST_AUIPC: begin
ex_type = `EX_ALU;
op_type = `INST_OP_BITS'(`INST_ALU_AUIPC);
use_rd = 1;
use_imm = 1;
use_PC = 1;
imm = {upper_imm, 12'(0)};
`USED_IREG (rd);
end
`INST_JAL: begin
ex_type = `EX_ALU;
op_type = `INST_OP_BITS'(`INST_BR_JAL);
op_mod = 1;
use_rd = 1;
use_imm = 1;
use_PC = 1;
is_wstall = 1;
imm = {{11{jal_imm[20]}}, jal_imm};
`USED_IREG (rd);
end
`INST_JALR: begin
ex_type = `EX_ALU;
op_type = `INST_OP_BITS'(`INST_BR_JALR);
op_mod = 1;
use_rd = 1;
use_imm = 1;
is_wstall = 1;
imm = {{20{u_12[11]}}, u_12};
`USED_IREG (rd);
`USED_IREG (rs1);
end
`INST_B: begin
ex_type = `EX_ALU;
case (func3)
3'h0: op_type = `INST_OP_BITS'(`INST_BR_EQ);
3'h1: op_type = `INST_OP_BITS'(`INST_BR_NE);
3'h4: op_type = `INST_OP_BITS'(`INST_BR_LT);
3'h5: op_type = `INST_OP_BITS'(`INST_BR_GE);
3'h6: op_type = `INST_OP_BITS'(`INST_BR_LTU);
3'h7: op_type = `INST_OP_BITS'(`INST_BR_GEU);
default:;
endcase
op_mod = 1;
use_imm = 1;
use_PC = 1;
is_wstall = 1;
imm = {{19{b_imm[12]}}, b_imm};
`USED_IREG (rs1);
`USED_IREG (rs2);
end
`INST_FENCE: begin
ex_type = `EX_LSU;
op_mod = `INST_MOD_BITS'(1);
end
`INST_SYS : begin
if (func3[1:0] != 0) begin
ex_type = `EX_CSR;
op_type = `INST_OP_BITS'(func3[1:0]);
use_rd = 1;
use_imm = func3[2];
imm[`CSR_ADDR_BITS-1:0] = u_12; // addr
`USED_IREG (rd);
if (func3[2]) begin
imm[`CSR_ADDR_BITS +: `NRI_BITS] = rs1; // imm
end else begin
`USED_IREG (rs1);
end
end else begin
ex_type = `EX_ALU;
case (u_12)
12'h000: op_type = `INST_OP_BITS'(`INST_BR_ECALL);
12'h001: op_type = `INST_OP_BITS'(`INST_BR_EBREAK);
12'h002: op_type = `INST_OP_BITS'(`INST_BR_URET);
12'h102: op_type = `INST_OP_BITS'(`INST_BR_SRET);
12'h302: op_type = `INST_OP_BITS'(`INST_BR_MRET);
default:;
endcase
op_mod = 1;
use_rd = 1;
use_imm = 1;
use_PC = 1;
is_wstall = 1;
imm = 32'd4;
`USED_IREG (rd);
end
end
`ifdef EXT_F_ENABLE
`INST_FL,
`endif
`INST_L: begin
ex_type = `EX_LSU;
op_type = `INST_OP_BITS'({1'b0, func3});
use_rd = 1;
imm = {{20{u_12[11]}}, u_12};
`ifdef EXT_F_ENABLE
if (opcode[2]) begin
`USED_FREG (rd);
end else
`endif
`USED_IREG (rd);
`USED_IREG (rs1);
end
`ifdef EXT_F_ENABLE
`INST_FS,
`endif
`INST_S: begin
ex_type = `EX_LSU;
op_type = `INST_OP_BITS'({1'b1, func3});
imm = {{20{s_imm[11]}}, s_imm};
`USED_IREG (rs1);
`ifdef EXT_F_ENABLE
if (opcode[2]) begin
`USED_FREG (rs2);
end else
`endif
`USED_IREG (rs2);
end
`ifdef EXT_F_ENABLE
`INST_FMADD,
`INST_FMSUB,
`INST_FNMSUB,
`INST_FNMADD: begin
ex_type = `EX_FPU;
op_type = `INST_OP_BITS'(opcode[3:0]);
op_mod = func3;
use_rd = 1;
`USED_FREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
`USED_FREG (rs3);
end
`INST_FCI: begin
ex_type = `EX_FPU;
op_mod = func3;
use_rd = 1;
case (func7)
7'h00, // FADD
7'h04, // FSUB
7'h08, // FMUL
7'h0C: begin // FDIV
op_type = `INST_OP_BITS'(func7[3:0]);
`USED_FREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
end
7'h2C: begin
op_type = `INST_OP_BITS'(`INST_FPU_SQRT);
`USED_FREG (rd);
`USED_FREG (rs1);
end
7'h50: begin
op_type = `INST_OP_BITS'(`INST_FPU_CMP);
`USED_IREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
end
7'h60: begin
op_type = (instr[20]) ? `INST_OP_BITS'(`INST_FPU_CVTWUS) : `INST_OP_BITS'(`INST_FPU_CVTWS);
`USED_IREG (rd);
`USED_FREG (rs1);
end
7'h68: begin
op_type = (instr[20]) ? `INST_OP_BITS'(`INST_FPU_CVTSWU) : `INST_OP_BITS'(`INST_FPU_CVTSW);
`USED_FREG (rd);
`USED_IREG (rs1);
end
7'h10: begin
// FSGNJ=0, FSGNJN=1, FSGNJX=2
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
op_mod = {1'b0, func3[1:0]};
`USED_FREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
end
7'h14: begin
// FMIN=3, FMAX=4
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
op_mod = func3[0] ? 4 : 3;
`USED_FREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
end
7'h70: begin
if (func3[0]) begin
// FCLASS
op_type = `INST_OP_BITS'(`INST_FPU_CLASS);
end else begin
// FMV.X.W=5
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
op_mod = 5;
end
`USED_IREG (rd);
`USED_FREG (rs1);
end
7'h78: begin
// FMV.W.X=6
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
op_mod = 6;
`USED_FREG (rd);
`USED_IREG (rs1);
end
default:;
endcase
end
`endif
`INST_GPGPU: begin
ex_type = `EX_GPU;
case (func3)
3'h0: begin
op_type = rs2[0] ? `INST_OP_BITS'(`INST_GPU_PRED) : `INST_OP_BITS'(`INST_GPU_TMC);
is_wstall = 1;
`USED_IREG (rs1);
end
3'h1: begin
op_type = `INST_OP_BITS'(`INST_GPU_WSPAWN);
`USED_IREG (rs1);
`USED_IREG (rs2);
end
3'h2: begin
op_type = `INST_OP_BITS'(`INST_GPU_SPLIT);
is_wstall = 1;
`USED_IREG (rs1);
end
3'h3: begin
op_type = `INST_OP_BITS'(`INST_GPU_JOIN);
is_join = 1;
end
3'h4: begin
op_type = `INST_OP_BITS'(`INST_GPU_BAR);
is_wstall = 1;
`USED_IREG (rs1);
`USED_IREG (rs2);
end
3'h5: begin
ex_type = `EX_LSU;
op_type = `INST_OP_BITS'(`INST_LSU_LW);
op_mod = `INST_MOD_BITS'(2);
`USED_IREG (rs1);
end
default:;
endcase
end
`INST_GPU: begin
case (func3)
`ifdef EXT_TEX_ENABLE
3'h0: begin
ex_type = `EX_GPU;
op_type = `INST_OP_BITS'(`INST_GPU_TEX);
op_mod = `INST_MOD_BITS'(func2);
use_rd = 1;
`USED_IREG (rd);
`USED_IREG (rs1);
`USED_IREG (rs2);
`USED_IREG (rs3);
end
`endif
default:;
endcase
end
default:;
endcase
end
`UNUSED_VAR (func2)
// disable write to integer register r0
wire wb = use_rd && (| rd_r);
assign decode_if.valid = ifetch_rsp_if.valid;
assign decode_if.uuid = ifetch_rsp_if.uuid;
assign decode_if.wid = ifetch_rsp_if.wid;
assign decode_if.tmask = ifetch_rsp_if.tmask;
assign decode_if.PC = ifetch_rsp_if.PC;
assign decode_if.ex_type = ex_type;
assign decode_if.op_type = op_type;
assign decode_if.op_mod = op_mod;
assign decode_if.wb = wb;
assign decode_if.rd = rd_r;
assign decode_if.rs1 = rs1_r;
assign decode_if.rs2 = rs2_r;
assign decode_if.rs3 = rs3_r;
assign decode_if.imm = imm;
assign decode_if.use_PC = use_PC;
assign decode_if.use_imm = use_imm;
///////////////////////////////////////////////////////////////////////////
wire ifetch_rsp_fire = ifetch_rsp_if.valid && ifetch_rsp_if.ready;
assign join_if.valid = ifetch_rsp_fire && is_join;
assign join_if.wid = ifetch_rsp_if.wid;
assign wstall_if.valid = ifetch_rsp_fire;
assign wstall_if.wid = ifetch_rsp_if.wid;
assign wstall_if.stalled = is_wstall;
assign ifetch_rsp_if.ready = decode_if.ready;
`ifdef PERF_ENABLE
wire [$clog2(`NUM_THREADS+1)-1:0] perf_loads_per_cycle;
wire [$clog2(`NUM_THREADS+1)-1:0] perf_stores_per_cycle;
wire [$clog2(`NUM_THREADS+1)-1:0] perf_branches_per_cycle;
wire [`NUM_THREADS-1:0] perf_loads_per_mask = decode_if.tmask & {`NUM_THREADS{decode_if.ex_type == `EX_LSU && `INST_LSU_IS_MEM(decode_if.op_mod) && decode_if.wb}};
wire [`NUM_THREADS-1:0] perf_stores_per_mask = decode_if.tmask & {`NUM_THREADS{decode_if.ex_type == `EX_LSU && `INST_LSU_IS_MEM(decode_if.op_mod) && ~decode_if.wb}};
wire [`NUM_THREADS-1:0] perf_branches_per_mask = decode_if.tmask & {`NUM_THREADS{decode_if.ex_type == `EX_ALU && `INST_ALU_IS_BR(decode_if.op_mod)}};
`POP_COUNT(perf_loads_per_cycle, perf_loads_per_mask);
`POP_COUNT(perf_stores_per_cycle, perf_stores_per_mask);
`POP_COUNT(perf_branches_per_cycle, perf_branches_per_mask);
reg [`PERF_CTR_BITS-1:0] perf_loads;
reg [`PERF_CTR_BITS-1:0] perf_stores;
reg [`PERF_CTR_BITS-1:0] perf_branches;
always @(posedge clk) begin
if (reset) begin
perf_loads <= 0;
perf_stores <= 0;
perf_branches <= 0;
end else begin
if (decode_if.valid && decode_if.ready) begin
perf_loads <= perf_loads + `PERF_CTR_BITS'(perf_loads_per_cycle);
perf_stores <= perf_stores + `PERF_CTR_BITS'(perf_stores_per_cycle);
perf_branches <= perf_branches + `PERF_CTR_BITS'(perf_branches_per_cycle);
end
end
end
assign perf_decode_if.loads = perf_loads;
assign perf_decode_if.stores = perf_stores;
assign perf_decode_if.branches = perf_branches;
`endif
`ifdef DBG_TRACE_CORE_PIPELINE
always @(posedge clk) begin
if (decode_if.valid && decode_if.ready) begin
dpi_trace("%d: core%0d-decode: wid=%0d, PC=%0h, ex=", $time, CORE_ID, decode_if.wid, decode_if.PC);
trace_ex_type(decode_if.ex_type);
dpi_trace(", op=");
trace_ex_op(decode_if.ex_type, decode_if.op_type, decode_if.op_mod);
dpi_trace(", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b (#%0d)\n",
decode_if.op_mod, decode_if.tmask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.use_PC, decode_if.use_imm, decode_if.uuid);
end
end
`endif
endmodule

View file

@ -1,24 +1,40 @@
`ifndef VX_DEFINE
`define VX_DEFINE
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`ifndef VX_DEFINE_VH
`define VX_DEFINE_VH
`include "VX_platform.vh"
`include "VX_config.vh"
`include "VX_types.vh"
///////////////////////////////////////////////////////////////////////////////
`define NW_BITS `LOG2UP(`NUM_WARPS)
`define NW_BITS `CLOG2(`NUM_WARPS)
`define NC_WIDTH `UP(`NC_BITS)
`define NT_BITS `LOG2UP(`NUM_THREADS)
`define NT_BITS `CLOG2(`NUM_THREADS)
`define NW_WIDTH `UP(`NW_BITS)
`define NC_BITS `LOG2UP(`NUM_CORES)
`define NC_BITS `CLOG2(`NUM_CORES)
`define NT_WIDTH `UP(`NT_BITS)
`define NB_BITS `LOG2UP(`NUM_BARRIERS)
`define NB_BITS `CLOG2(`NUM_BARRIERS)
`define NB_WIDTH `UP(`NB_BITS)
`define NUM_IREGS 32
`define NRI_BITS `LOG2UP(`NUM_IREGS)
`define NTEX_BITS `LOG2UP(`NUM_TEX_UNITS)
`define NRI_BITS `CLOG2(`NUM_IREGS)
`ifdef EXT_F_ENABLE
`define NUM_REGS (2 * `NUM_IREGS)
@ -26,25 +42,48 @@
`define NUM_REGS `NUM_IREGS
`endif
`define NR_BITS `LOG2UP(`NUM_REGS)
`define NR_BITS `CLOG2(`NUM_REGS)
`define CSR_ADDR_BITS 12
`define CSR_WIDTH 12
`define DV_STACK_SIZE `UP(`NUM_THREADS-1)
`define DV_STACK_SIZEW `UP(`CLOG2(`DV_STACK_SIZE))
`define PERF_CTR_BITS 44
`define UUID_BITS 44
`ifndef NDEBUG
`define UUID_ENABLE
`define UUID_WIDTH 44
`else
`ifdef SCOPE
`define UUID_ENABLE
`define UUID_WIDTH 44
`else
`define UUID_WIDTH 1
`endif
`endif
`define PC_BITS (`XLEN-1)
`define OFFSET_BITS 12
`define IMM_BITS `XLEN
`define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE)
///////////////////////////////////////////////////////////////////////////////
`define EX_NOP 3'h0
`define EX_ALU 3'h1
`define EX_LSU 3'h2
`define EX_CSR 3'h3
`define EX_FPU 3'h4
`define EX_GPU 3'h5
`define EX_BITS 3
`define EX_ALU 0
`define EX_LSU 1
`define EX_SFU 2
`define EX_FPU (`EX_SFU + `EXT_F_ENABLED)
`define NUM_EX_UNITS (3 + `EXT_F_ENABLED)
`define EX_BITS `CLOG2(`NUM_EX_UNITS)
`define EX_WIDTH `UP(`EX_BITS)
`define SFU_CSRS 0
`define SFU_WCTL 1
`define NUM_SFU_UNITS (2)
`define SFU_BITS `CLOG2(`NUM_SFU_UNITS)
`define SFU_WIDTH `UP(`SFU_BITS)
///////////////////////////////////////////////////////////////////////////////
@ -60,18 +99,27 @@
`define INST_FENCE 7'b0001111 // Fence instructions
`define INST_SYS 7'b1110011 // system instructions
// RV64I instruction specific opcodes (for any W instruction)
`define INST_I_W 7'b0011011 // W type immediate instructions
`define INST_R_W 7'b0111011 // W type register instructions
`define INST_FL 7'b0000111 // float load instruction
`define INST_FS 7'b0100111 // float store instruction
`define INST_FMADD 7'b1000011
`define INST_FMADD 7'b1000011
`define INST_FMSUB 7'b1000111
`define INST_FNMSUB 7'b1001011
`define INST_FNMADD 7'b1001111
`define INST_FNMADD 7'b1001111
`define INST_FCI 7'b1010011 // float common instructions
`define INST_GPGPU 7'b1101011
`define INST_GPU 7'b1011011
// Custom extension opcodes
`define INST_EXT1 7'b0001011 // 0x0B
`define INST_EXT2 7'b0101011 // 0x2B
`define INST_EXT3 7'b1011011 // 0x5B
`define INST_EXT4 7'b1111011 // 0x7B
`define INST_TEX 7'b0101011
// Opcode extensions
`define INST_R_F7_MUL 7'b0000001
`define INST_R_F7_ZICOND 7'b0000111
///////////////////////////////////////////////////////////////////////////////
@ -86,34 +134,45 @@
///////////////////////////////////////////////////////////////////////////////
`define INST_OP_BITS 4
`define INST_MOD_BITS 3
`define INST_ARGS_BITS $bits(op_args_t)
`define INST_FMT_BITS 2
///////////////////////////////////////////////////////////////////////////////
`define INST_ALU_ADD 4'b0000
//`define INST_ALU_UNUSED 4'b0001
`define INST_ALU_LUI 4'b0010
`define INST_ALU_AUIPC 4'b0011
`define INST_ALU_SLTU 4'b0100
`define INST_ALU_SLT 4'b0101
//`define INST_ALU_UNUSED 4'b0110
`define INST_ALU_SUB 4'b0111
`define INST_ALU_SRL 4'b1000
`define INST_ALU_SRA 4'b1001
`define INST_ALU_SUB 4'b1011
`define INST_ALU_CZEQ 4'b1010
`define INST_ALU_CZNE 4'b1011
`define INST_ALU_AND 4'b1100
`define INST_ALU_OR 4'b1101
`define INST_ALU_XOR 4'b1110
`define INST_ALU_SLL 4'b1111
`define INST_ALU_OTHER 4'b0111
`define ALU_TYPE_BITS 2
`define ALU_TYPE_ARITH 0
`define ALU_TYPE_BRANCH 1
`define ALU_TYPE_MULDIV 2
`define ALU_TYPE_OTHER 3
`define INST_ALU_BITS 4
`define INST_ALU_OP(x) x[`INST_ALU_BITS-1:0]
`define INST_ALU_OP_CLASS(x) x[3:2]
`define INST_ALU_SIGNED(x) x[0]
`define INST_ALU_IS_BR(x) x[0]
`define INST_ALU_IS_MUL(x) x[1]
`define INST_ALU_CLASS(op) op[3:2]
`define INST_ALU_SIGNED(op) op[0]
`define INST_ALU_IS_SUB(op) op[1]
`define INST_ALU_IS_CZERO(op) (op[3:1] == 3'b101)
`define INST_BR_EQ 4'b0000
`define INST_BR_NE 4'b0010
`define INST_BR_LTU 4'b0100
`define INST_BR_GEU 4'b0110
`define INST_BR_LTU 4'b0100
`define INST_BR_GEU 4'b0110
`define INST_BR_LT 4'b0101
`define INST_BR_GE 4'b0111
`define INST_BR_JAL 4'b1000
@ -125,292 +184,336 @@
`define INST_BR_MRET 4'b1110
`define INST_BR_OTHER 4'b1111
`define INST_BR_BITS 4
`define INST_BR_NEG(x) x[1]
`define INST_BR_LESS(x) x[2]
`define INST_BR_STATIC(x) x[3]
`define INST_BR_CLASS(op) {1'b0, ~op[3]}
`define INST_BR_IS_NEG(op) op[1]
`define INST_BR_IS_LESS(op) op[2]
`define INST_BR_IS_STATIC(op) op[3]
`define INST_MUL_MUL 3'h0
`define INST_MUL_MULH 3'h1
`define INST_MUL_MULHSU 3'h2
`define INST_MUL_MULHU 3'h3
`define INST_MUL_DIV 3'h4
`define INST_MUL_DIVU 3'h5
`define INST_MUL_REM 3'h6
`define INST_MUL_REMU 3'h7
`define INST_MUL_BITS 3
`define INST_MUL_IS_DIV(x) x[2]
`define INST_M_MUL 3'b000
`define INST_M_MULHU 3'b001
`define INST_M_MULH 3'b010
`define INST_M_MULHSU 3'b011
`define INST_M_DIV 3'b100
`define INST_M_DIVU 3'b101
`define INST_M_REM 3'b110
`define INST_M_REMU 3'b111
`define INST_M_BITS 3
`define INST_M_SIGNED(op) (~op[0])
`define INST_M_IS_MULX(op) (~op[2])
`define INST_M_IS_MULH(op) (op[1:0] != 0)
`define INST_M_SIGNED_A(op) (op[1:0] != 1)
`define INST_M_IS_REM(op) op[1]
`define INST_FMT_B 3'b000
`define INST_FMT_H 3'b001
`define INST_FMT_W 3'b010
`define INST_FMT_D 3'b011
`define INST_FMT_BU 3'b100
`define INST_FMT_HU 3'b101
`define INST_FMT_WU 3'b110
`define INST_LSU_LB 4'b0000
`define INST_LSU_LB 4'b0000
`define INST_LSU_LH 4'b0001
`define INST_LSU_LW 4'b0010
`define INST_LSU_LD 4'b0011 // new for RV64I LD
`define INST_LSU_LBU 4'b0100
`define INST_LSU_LHU 4'b0101
`define INST_LSU_SB 4'b1000
`define INST_LSU_LWU 4'b0110 // new for RV64I LWU
`define INST_LSU_SB 4'b1000
`define INST_LSU_SH 4'b1001
`define INST_LSU_SW 4'b1010
`define INST_LSU_SD 4'b1011 // new for RV64I SD
`define INST_LSU_FENCE 4'b1111
`define INST_LSU_BITS 4
`define INST_LSU_FMT(x) x[2:0]
`define INST_LSU_WSIZE(x) x[1:0]
`define INST_LSU_IS_MEM(x) (3'h0 == x)
`define INST_LSU_IS_FENCE(x) (3'h1 == x)
`define INST_LSU_IS_PREFETCH(x) (3'h2 == x)
`define INST_LSU_FMT(op) op[2:0]
`define INST_LSU_WSIZE(op) op[1:0]
`define INST_LSU_IS_FENCE(op) (op[3:2] == 3)
`define INST_FENCE_BITS 1
`define INST_FENCE_D 1'h0
`define INST_FENCE_I 1'h1
`define INST_CSR_RW 2'h1
`define INST_CSR_RS 2'h2
`define INST_CSR_RC 2'h3
`define INST_CSR_OTHER 2'h0
`define INST_CSR_BITS 2
`define INST_FPU_ADD 4'h0
`define INST_FPU_SUB 4'h4
`define INST_FPU_MUL 4'h8
`define INST_FPU_DIV 4'hC
`define INST_FPU_CVTWS 4'h1 // FCVT.W.S
`define INST_FPU_CVTWUS 4'h5 // FCVT.WU.S
`define INST_FPU_CVTSW 4'h9 // FCVT.S.W
`define INST_FPU_CVTSWU 4'hD // FCVT.S.WU
`define INST_FPU_SQRT 4'h2
`define INST_FPU_CLASS 4'h6
`define INST_FPU_CMP 4'hA
`define INST_FPU_MISC 4'hE // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
`define INST_FPU_MADD 4'h3
`define INST_FPU_MSUB 4'h7
`define INST_FPU_NMSUB 4'hB
`define INST_FPU_NMADD 4'hF
`define INST_FPU_ADD 4'b0000 // SUB=fmt[1]
`define INST_FPU_MUL 4'b0001
`define INST_FPU_MADD 4'b0010 // SUB=fmt[1]
`define INST_FPU_NMADD 4'b0011 // SUB=fmt[1]
`define INST_FPU_DIV 4'b0100
`define INST_FPU_SQRT 4'b0101
`define INST_FPU_F2I 4'b1000 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
`define INST_FPU_F2U 4'b1001 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
`define INST_FPU_I2F 4'b1010 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
`define INST_FPU_U2F 4'b1011 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
`define INST_FPU_CMP 4'b1100 // frm: LE=0, LT=1, EQ=2
`define INST_FPU_F2F 4'b1101 // fmt[0]: F32=0, F64=1
`define INST_FPU_MISC 4'b1110 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
`define INST_FPU_BITS 4
`define INST_FPU_IS_CLASS(op, frm) (op == `INST_FPU_MISC && frm == 3)
`define INST_FPU_IS_MVXW(op, frm) (op == `INST_FPU_MISC && frm == 4)
`define INST_GPU_TMC 4'h0
`define INST_GPU_WSPAWN 4'h1
`define INST_GPU_SPLIT 4'h2
`define INST_GPU_JOIN 4'h3
`define INST_GPU_BAR 4'h4
`define INST_GPU_PRED 4'h5
`define INST_GPU_TEX 4'h6
`define INST_GPU_BITS 4
`define INST_SFU_TMC 4'h0
`define INST_SFU_WSPAWN 4'h1
`define INST_SFU_SPLIT 4'h2
`define INST_SFU_JOIN 4'h3
`define INST_SFU_BAR 4'h4
`define INST_SFU_PRED 4'h5
`define INST_SFU_CSRRW 4'h6
`define INST_SFU_CSRRS 4'h7
`define INST_SFU_CSRRC 4'h8
`define INST_SFU_BITS 4
`define INST_SFU_CSR(f3) (4'h6 + 4'(f3) - 4'h1)
`define INST_SFU_IS_WCTL(op) (op <= 5)
`define INST_SFU_IS_CSR(op) (op >= 6 && op <= 8)
///////////////////////////////////////////////////////////////////////////////
`ifdef EXT_M_ENABLE
`define ISA_EXT_M (1 << 12)
`else
`define ISA_EXT_M 0
`endif
`ifdef EXT_F_ENABLE
`define ISA_EXT_F (1 << 5)
`else
`define ISA_EXT_F 0
`endif
`define ISA_CODE (0 << 0) // A - Atomic Instructions extension \
| (0 << 1) // B - Tentatively reserved for Bit operations extension \
| (0 << 2) // C - Compressed extension \
| (0 << 3) // D - Double precsision floating-point extension \
| (0 << 4) // E - RV32E base ISA \
|`ISA_EXT_F // F - Single precsision floating-point extension \
| (0 << 6) // G - Additional standard extensions present \
| (0 << 7) // H - Hypervisor mode implemented \
| (1 << 8) // I - RV32I/64I/128I base ISA \
| (0 << 9) // J - Reserved \
| (0 << 10) // K - Reserved \
| (0 << 11) // L - Tentatively reserved for Bit operations extension \
|`ISA_EXT_M // M - Integer Multiply/Divide extension \
| (0 << 13) // N - User level interrupts supported \
| (0 << 14) // O - Reserved \
| (0 << 15) // P - Tentatively reserved for Packed-SIMD extension \
| (0 << 16) // Q - Quad-precision floating-point extension \
| (0 << 17) // R - Reserved \
| (0 << 18) // S - Supervisor mode implemented \
| (0 << 19) // T - Tentatively reserved for Transactional Memory extension \
| (1 << 20) // U - User mode implemented \
| (0 << 21) // V - Tentatively reserved for Vector extension \
| (0 << 22) // W - Reserved \
| (1 << 23) // X - Non-standard extensions present \
| (0 << 24) // Y - Reserved \
| (0 << 25) // Z - Reserved
`define ARB_SEL_BITS(I, O) ((I > O) ? `CLOG2(`CDIV(I, O)) : 0)
///////////////////////////////////////////////////////////////////////////////
// non-cacheable tag bits
`define NC_TAG_BIT 1
`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, mem_ports, uuid_width) \
(uuid_width + `CLOG2(mshr_size) + `CLOG2(`CDIV(num_banks, mem_ports)))
// texture tag bits
`define TEX_TAG_BIT 1
`define CACHE_BYPASS_TAG_WIDTH(num_reqs, mem_ports, line_size, word_size, tag_width) \
(`CLOG2(`CDIV(num_reqs, mem_ports)) + `CLOG2(line_size / word_size) + tag_width)
// cache address type bits
`define CACHE_ADDR_TYPE_BITS (`NC_TAG_BIT + `SM_ENABLE)
////////////////////////// Icache Configurable Knobs //////////////////////////
// Cache ID
`define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
// Word size in bytes
`define ICACHE_WORD_SIZE 4
// Block size in bytes
`define ICACHE_LINE_SIZE `L1_BLOCK_SIZE
// TAG sharing enable
`define ICACHE_CORE_TAG_ID_BITS `NW_BITS
// Core request tag bits
`define ICACHE_CORE_TAG_WIDTH (`UUID_BITS + `ICACHE_CORE_TAG_ID_BITS)
// Memory request data bits
`define ICACHE_MEM_DATA_WIDTH (`ICACHE_LINE_SIZE * 8)
// Memory request address bits
`define ICACHE_MEM_ADDR_WIDTH (32 - `CLOG2(`ICACHE_LINE_SIZE))
// Memory request tag bits
`define ICACHE_MEM_TAG_WIDTH `CLOG2(`ICACHE_MSHR_SIZE)
////////////////////////// Dcache Configurable Knobs //////////////////////////
// Cache ID
`define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
// Word size in bytes
`define DCACHE_WORD_SIZE 4
// Block size in bytes
`define DCACHE_LINE_SIZE `L1_BLOCK_SIZE
// Core request tag bits
`define LSUQ_ADDR_BITS `LOG2UP(`LSUQ_SIZE)
`ifdef EXT_TEX_ENABLE
`define LSU_TAG_ID_BITS `MAX(`LSUQ_ADDR_BITS, 2)
`define LSU_TEX_DCACHE_TAG_BITS (`UUID_BITS + `LSU_TAG_ID_BITS + `CACHE_ADDR_TYPE_BITS)
`define DCACHE_CORE_TAG_ID_BITS (`LSU_TAG_ID_BITS + `CACHE_ADDR_TYPE_BITS + `TEX_TAG_BIT)
`else
`define LSU_TAG_ID_BITS `LSUQ_ADDR_BITS
`define DCACHE_CORE_TAG_ID_BITS (`LSU_TAG_ID_BITS + `CACHE_ADDR_TYPE_BITS)
`endif
`define DCACHE_CORE_TAG_WIDTH (`UUID_BITS + `DCACHE_CORE_TAG_ID_BITS)
// Memory request data bits
`define DCACHE_MEM_DATA_WIDTH (`DCACHE_LINE_SIZE * 8)
// Memory request address bits
`define DCACHE_MEM_ADDR_WIDTH (32 - `CLOG2(`DCACHE_LINE_SIZE))
// Memory byte enable bits
`define DCACHE_MEM_BYTEEN_WIDTH `DCACHE_LINE_SIZE
// Input request size
`define DCACHE_NUM_REQS `NUM_THREADS
// Memory request tag bits
`define _DMEM_ADDR_RATIO_W $clog2(`DCACHE_LINE_SIZE / `DCACHE_WORD_SIZE)
`define _DNC_MEM_TAG_WIDTH ($clog2(`DCACHE_NUM_REQS) + `_DMEM_ADDR_RATIO_W + `DCACHE_CORE_TAG_WIDTH)
`define DCACHE_MEM_TAG_WIDTH `MAX((`CLOG2(`DCACHE_NUM_BANKS) + `CLOG2(`DCACHE_MSHR_SIZE) + `NC_TAG_BIT), `_DNC_MEM_TAG_WIDTH)
// Merged D-cache/I-cache memory tag
`define L1_MEM_TAG_WIDTH (`MAX(`ICACHE_MEM_TAG_WIDTH, `DCACHE_MEM_TAG_WIDTH) + `CLOG2(2))
////////////////////////// SM Configurable Knobs //////////////////////////////
// Cache ID
`define SMEM_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2)
// Word size in bytes
`define SMEM_WORD_SIZE 4
// bank address offset
`define SMEM_BANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SMEM_WORD_SIZE)
// Input request size
`define SMEM_NUM_REQS `NUM_THREADS
////////////////////////// L2cache Configurable Knobs /////////////////////////
// Cache ID
`define L2_CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID)
// Word size in bytes
`define L2_WORD_SIZE `DCACHE_LINE_SIZE
// Block size in bytes
`define L2_CACHE_LINE_SIZE ((`L2_ENABLE) ? `MEM_BLOCK_SIZE : `L2_WORD_SIZE)
// Input request tag bits
`define L2_CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
// Memory request data bits
`define L2_MEM_DATA_WIDTH (`L2_CACHE_LINE_SIZE * 8)
// Memory request address bits
`define L2_MEM_ADDR_WIDTH (32 - `CLOG2(`L2_CACHE_LINE_SIZE))
// Memory byte enable bits
`define L2_MEM_BYTEEN_WIDTH `L2_CACHE_LINE_SIZE
// Input request size
`define L2_NUM_REQS `NUM_CORES
// Memory request tag bits
`define _L2_MEM_ADDR_RATIO_W $clog2(`L2_CACHE_LINE_SIZE / `L2_WORD_SIZE)
`define _L2_NC_MEM_TAG_WIDTH ($clog2(`L2_NUM_REQS) + `_L2_MEM_ADDR_RATIO_W + `L1_MEM_TAG_WIDTH)
`define _L2_MEM_TAG_WIDTH `MAX((`CLOG2(`L2_NUM_BANKS) + `CLOG2(`L2_MSHR_SIZE) + `NC_TAG_BIT), `_L2_NC_MEM_TAG_WIDTH)
`define L2_MEM_TAG_WIDTH ((`L2_ENABLE) ? `_L2_MEM_TAG_WIDTH : (`L1_MEM_TAG_WIDTH + `CLOG2(`L2_NUM_REQS)))
////////////////////////// L3cache Configurable Knobs /////////////////////////
// Cache ID
`define L3_CACHE_ID 0
// Word size in bytes
`define L3_WORD_SIZE `L2_CACHE_LINE_SIZE
// Block size in bytes
`define L3_CACHE_LINE_SIZE ((`L3_ENABLE) ? `MEM_BLOCK_SIZE : `L3_WORD_SIZE)
// Input request tag bits
`define L3_CORE_TAG_WIDTH (`L2_CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
// Memory request data bits
`define L3_MEM_DATA_WIDTH (`L3_CACHE_LINE_SIZE * 8)
// Memory request address bits
`define L3_MEM_ADDR_WIDTH (32 - `CLOG2(`L3_CACHE_LINE_SIZE))
// Memory byte enable bits
`define L3_MEM_BYTEEN_WIDTH `L3_CACHE_LINE_SIZE
// Input request size
`define L3_NUM_REQS `NUM_CLUSTERS
// Memory request tag bits
`define _L3_MEM_ADDR_RATIO_W $clog2(`L3_CACHE_LINE_SIZE / `L3_WORD_SIZE)
`define _L3_NC_MEM_TAG_WIDTH ($clog2(`L3_NUM_REQS) + `_L3_MEM_ADDR_RATIO_W + `L2_MEM_TAG_WIDTH)
`define _L3_MEM_TAG_WIDTH `MAX((`CLOG2(`L3_NUM_BANKS) + `CLOG2(`L3_MSHR_SIZE) + `NC_TAG_BIT), `_L3_NC_MEM_TAG_WIDTH)
`define L3_MEM_TAG_WIDTH ((`L3_ENABLE) ? `_L3_MEM_TAG_WIDTH : (`L2_MEM_TAG_WIDTH + `CLOG2(`L3_NUM_REQS)))
`define CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, mem_ports, line_size, word_size, tag_width, uuid_width) \
(`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, mem_ports, uuid_width), `CACHE_BYPASS_TAG_WIDTH(num_reqs, mem_ports, line_size, word_size, tag_width)) + 1)
///////////////////////////////////////////////////////////////////////////////
`define VX_MEM_BYTEEN_WIDTH `L3_MEM_BYTEEN_WIDTH
`define VX_MEM_ADDR_WIDTH `L3_MEM_ADDR_WIDTH
`define VX_MEM_DATA_WIDTH `L3_MEM_DATA_WIDTH
`define VX_MEM_TAG_WIDTH `L3_MEM_TAG_WIDTH
`define VX_CORE_TAG_WIDTH `L3_CORE_TAG_WIDTH
`define VX_CSR_ID_WIDTH `LOG2UP(`NUM_CLUSTERS * `NUM_CORES)
`define CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches) \
(tag_width + `ARB_SEL_BITS(num_inputs, `UP(num_caches)))
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
`define CACHE_CLUSTER_MEM_ARB_TAG(tag_width, num_caches) \
(tag_width + `ARB_SEL_BITS(`UP(num_caches), 1))
`define CACHE_CLUSTER_MEM_TAG_WIDTH(mshr_size, num_banks, mem_ports, num_caches, uuid_width) \
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, mem_ports, uuid_width), num_caches)
`define CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(num_reqs, mem_ports, line_size, word_size, tag_width, num_inputs, num_caches) \
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_BYPASS_TAG_WIDTH(num_reqs, mem_ports, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches)
`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, mem_ports, line_size, word_size, tag_width, num_inputs, num_caches, uuid_width) \
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, mem_ports, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches), uuid_width), num_caches)
///////////////////////////////////////////////////////////////////////////////
`include "VX_fpu_types.vh"
`include "VX_gpu_types.vh"
`ifdef ICACHE_ENABLE
`define L1_ENABLE
`endif
`ifdef DCACHE_ENABLE
`define L1_ENABLE
`endif
`define MEM_REQ_FLAG_FLUSH 0
`define MEM_REQ_FLAG_IO 1
`define MEM_REQ_FLAG_LOCAL 2 // shoud be last since optional
`define MEM_REQ_FLAGS_WIDTH (`MEM_REQ_FLAG_LOCAL + `LMEM_ENABLED)
`define VX_MEM_PORTS `L3_MEM_PORTS
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
`define VX_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH - `CLOG2(`L3_LINE_SIZE))
`define VX_MEM_DATA_WIDTH (`L3_LINE_SIZE * 8)
`define VX_MEM_TAG_WIDTH L3_MEM_TAG_WIDTH
`define VX_DCR_ADDR_WIDTH `VX_DCR_ADDR_BITS
`define VX_DCR_DATA_WIDTH 32
`define TO_FULL_ADDR(x) {x, (`MEM_ADDR_WIDTH-$bits(x))'(0)}
///////////////////////////////////////////////////////////////////////////////
`define NEG_EDGE(dst, src) \
VX_edge_trigger #( \
.POS (0), \
.INIT (0) \
) __neg_edge`__LINE__ ( \
.clk (clk), \
.reset (1'b0), \
.data_in (src), \
.data_out (dst) \
)
`define BUFFER_EX(dst, src, ena, resetw, latency) \
VX_pipe_register #( \
.DATAW ($bits(dst)), \
.RESETW (resetw), \
.DEPTH (latency) \
) __buffer_ex`__LINE__ ( \
.clk (clk), \
.reset (reset), \
.enable (ena), \
.data_in (src), \
.data_out (dst) \
)
`define BUFFER(dst, src) `BUFFER_EX(dst, src, 1'b1, $bits(dst), 1)
`define POP_COUNT_EX(out, in, model) \
VX_popcount #( \
.N ($bits(in)), \
.MODEL (model) \
) __pop_count_ex`__LINE__ ( \
.data_in (in), \
.data_out (out) \
)
`define POP_COUNT(out, in) `POP_COUNT_EX(out, in, 1)
`define ASSIGN_VX_IF(dst, src) \
assign dst.valid = src.valid; \
assign dst.data = src.data; \
assign src.ready = dst.ready
`define ASSIGN_VX_MEM_BUS_IF(dst, src) \
assign dst.req_valid = src.req_valid; \
assign dst.req_data = src.req_data; \
assign src.req_ready = dst.req_ready; \
assign src.rsp_valid = dst.rsp_valid; \
assign src.rsp_data = dst.rsp_data; \
assign dst.rsp_ready = src.rsp_ready
`define ASSIGN_VX_MEM_BUS_RO_IF(dst, src) \
assign dst.req_valid = src.req_valid; \
assign dst.req_data.rw = 0; \
assign dst.req_data.addr = src.req_data.addr; \
assign dst.req_data.data = '0; \
assign dst.req_data.byteen = '1; \
assign dst.req_data.flags = src.req_data.flags; \
assign dst.req_data.tag = src.req_data.tag; \
assign src.req_ready = dst.req_ready; \
assign src.rsp_valid = dst.rsp_valid; \
assign src.rsp_data.data = dst.rsp_data.data; \
assign src.rsp_data.tag = dst.rsp_data.tag; \
assign dst.rsp_ready = src.rsp_ready
`define ASSIGN_VX_MEM_BUS_IF_EX(dst, src, TD, TS, UUID) \
assign dst.req_valid = src.req_valid; \
assign dst.req_data.rw = src.req_data.rw; \
assign dst.req_data.addr = src.req_data.addr; \
assign dst.req_data.data = src.req_data.data; \
assign dst.req_data.byteen = src.req_data.byteen; \
assign dst.req_data.flags = src.req_data.flags; \
/* verilator lint_off GENUNNAMED */ \
if (TD != TS) begin \
if (UUID != 0) begin \
if (TD > TS) begin \
assign dst.req_data.tag = {src.req_data.tag.uuid, {(TD-TS){1'b0}}, src.req_data.tag.value}; \
end else begin \
assign dst.req_data.tag = {src.req_data.tag.uuid, src.req_data.tag.value[TD-UUID-1:0]}; \
end \
end else begin \
if (TD > TS) begin \
assign dst.req_data.tag = {{(TD-TS){1'b0}}, src.req_data.tag}; \
end else begin \
assign dst.req_data.tag = src.req_data.tag[TD-1:0]; \
end \
end \
end else begin \
assign dst.req_data.tag = src.req_data.tag; \
end \
/* verilator lint_on GENUNNAMED */ \
assign src.req_ready = dst.req_ready; \
assign src.rsp_valid = dst.rsp_valid; \
assign src.rsp_data.data = dst.rsp_data.data; \
/* verilator lint_off GENUNNAMED */ \
if (TD != TS) begin \
if (UUID != 0) begin \
if (TD > TS) begin \
assign src.rsp_data.tag = {dst.rsp_data.tag.uuid, dst.rsp_data.tag.value[TS-UUID-1:0]}; \
end else begin \
assign src.rsp_data.tag = {dst.rsp_data.tag.uuid, {(TS-TD){1'b0}}, dst.rsp_data.tag.value}; \
end \
end else begin \
if (TD > TS) begin \
assign src.rsp_data.tag = dst.rsp_data.tag[TS-1:0]; \
end else begin \
assign src.rsp_data.tag = {{(TS-TD){1'b0}}, dst.rsp_data.tag}; \
end \
end \
end else begin \
assign src.rsp_data.tag = dst.rsp_data.tag; \
end \
/* verilator lint_on GENUNNAMED */ \
assign dst.rsp_ready = src.rsp_ready
`define INIT_VX_MEM_BUS_IF(itf) \
assign itf.req_valid = 0; \
assign itf.req_data = '0; \
`UNUSED_VAR (itf.req_ready) \
`UNUSED_VAR (itf.rsp_valid) \
`UNUSED_VAR (itf.rsp_data) \
assign itf.rsp_ready = 0;
`define UNUSED_VX_MEM_BUS_IF(itf) \
`UNUSED_VAR (itf.req_valid) \
`UNUSED_VAR (itf.req_data) \
assign itf.req_ready = 0; \
assign itf.rsp_valid = 0; \
assign itf.rsp_data = '0; \
`UNUSED_VAR (itf.rsp_ready)
`define BUFFER_DCR_BUS_IF(dst, src, ena, latency) \
/* verilator lint_off GENUNNAMED */ \
if (latency != 0) begin \
VX_pipe_register #( \
.DATAW (1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH), \
.DEPTH (latency) \
) pipe_reg ( \
.clk (clk), \
.reset (1'b0), \
.enable (1'b1), \
.data_in ({src.write_valid && ena, src.write_addr, src.write_data}), \
.data_out ({dst.write_valid, dst.write_addr, dst.write_data}) \
); \
end else begin \
assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid && ena, src.write_addr, src.write_data}; \
end \
/* verilator lint_on GENUNNAMED */
`define PERF_COUNTER_ADD(dst, src, field, width, count, reg_enable) \
/* verilator lint_off GENUNNAMED */ \
if (count > 1) begin \
wire [count-1:0][width-1:0] __reduce_add_i_field; \
wire [width-1:0] __reduce_add_o_field; \
for (genvar __i = 0; __i < count; ++__i) begin \
assign __reduce_add_i_field[__i] = src[__i].``field; \
end \
VX_reduce_tree #(.DATAW_IN(width), .N(count), .OP("+")) __reduce_add_field ( \
__reduce_add_i_field, \
__reduce_add_o_field \
); \
if (reg_enable) begin \
reg [width-1:0] __reduce_add_r_field; \
always @(posedge clk) begin \
if (reset) begin \
__reduce_add_r_field <= '0; \
end else begin \
__reduce_add_r_field <= __reduce_add_o_field; \
end \
end \
assign dst.``field = __reduce_add_r_field; \
end else begin \
assign dst.``field = __reduce_add_o_field; \
end \
end else begin \
assign dst.``field = src[0].``field; \
end \
/* verilator lint_on GENUNNAMED */
`define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \
/* verilator lint_off GENUNNAMED */ \
if (block_size != 1) begin \
if (block_size != `NUM_WARPS) begin \
assign dst = {src[`NW_WIDTH-1:`CLOG2(block_size)], `CLOG2(block_size)'(block_idx)}; \
end else begin \
assign dst = `NW_WIDTH'(block_idx); \
end \
end else begin \
assign dst = src; \
end \
/* verilator lint_on GENUNNAMED */
`endif // VX_DEFINE_VH

View file

@ -1,159 +0,0 @@
`include "VX_define.vh"
module VX_dispatch (
input wire clk,
input wire reset,
// inputs
VX_ibuffer_if.slave ibuffer_if,
VX_gpr_rsp_if.slave gpr_rsp_if,
// outputs
VX_alu_req_if.master alu_req_if,
VX_lsu_req_if.master lsu_req_if,
VX_csr_req_if.master csr_req_if,
`ifdef EXT_F_ENABLE
VX_fpu_req_if.master fpu_req_if,
`endif
VX_gpu_req_if.master gpu_req_if
);
wire [`NT_BITS-1:0] tid;
wire alu_req_ready;
wire lsu_req_ready;
wire csr_req_ready;
`ifdef EXT_F_ENABLE
wire fpu_req_ready;
`endif
wire gpu_req_ready;
VX_lzc #(
.N (`NUM_THREADS)
) tid_select (
.in_i (ibuffer_if.tmask),
.cnt_o (tid),
`UNUSED_PIN (valid_o)
);
wire [31:0] next_PC = ibuffer_if.PC + 4;
// ALU unit
wire alu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_ALU);
wire [`INST_ALU_BITS-1:0] alu_op_type = `INST_ALU_BITS'(ibuffer_if.op_type);
VX_skid_buffer #(
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
.OUT_REG (1)
) alu_buffer (
.clk (clk),
.reset (reset),
.valid_in (alu_req_valid),
.ready_in (alu_req_ready),
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, alu_op_type, ibuffer_if.op_mod, ibuffer_if.imm, ibuffer_if.use_PC, ibuffer_if.use_imm, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({alu_req_if.uuid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.op_mod, alu_req_if.imm, alu_req_if.use_PC, alu_req_if.use_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
.valid_out (alu_req_if.valid),
.ready_out (alu_req_if.ready)
);
// lsu unit
wire lsu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_LSU);
wire [`INST_LSU_BITS-1:0] lsu_op_type = `INST_LSU_BITS'(ibuffer_if.op_type);
wire lsu_is_fence = `INST_LSU_IS_FENCE(ibuffer_if.op_mod);
wire lsu_is_prefetch = `INST_LSU_IS_PREFETCH(ibuffer_if.op_mod);
VX_skid_buffer #(
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32) + 1),
.OUT_REG (1)
) lsu_buffer (
.clk (clk),
.reset (reset),
.valid_in (lsu_req_valid),
.ready_in (lsu_req_ready),
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, lsu_op_type, lsu_is_fence, ibuffer_if.imm, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, lsu_is_prefetch}),
.data_out ({lsu_req_if.uuid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.is_fence, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data, lsu_req_if.is_prefetch}),
.valid_out (lsu_req_if.valid),
.ready_out (lsu_req_if.ready)
);
// csr unit
wire csr_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_CSR);
wire [`INST_CSR_BITS-1:0] csr_op_type = `INST_CSR_BITS'(ibuffer_if.op_type);
wire [`CSR_ADDR_BITS-1:0] csr_addr = ibuffer_if.imm[`CSR_ADDR_BITS-1:0];
wire [`NRI_BITS-1:0] csr_imm = ibuffer_if.imm[`CSR_ADDR_BITS +: `NRI_BITS];
wire [31:0] csr_rs1_data = gpr_rsp_if.rs1_data[tid];
VX_skid_buffer #(
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + 32),
.OUT_REG (1)
) csr_buffer (
.clk (clk),
.reset (reset),
.valid_in (csr_req_valid),
.ready_in (csr_req_ready),
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, csr_op_type, csr_addr, ibuffer_if.rd, ibuffer_if.wb, ibuffer_if.use_imm, csr_imm, csr_rs1_data}),
.data_out ({csr_req_if.uuid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.use_imm, csr_req_if.imm, csr_req_if.rs1_data}),
.valid_out (csr_req_if.valid),
.ready_out (csr_req_if.ready)
);
// fpu unit
`ifdef EXT_F_ENABLE
wire fpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_FPU);
wire [`INST_FPU_BITS-1:0] fpu_op_type = `INST_FPU_BITS'(ibuffer_if.op_type);
VX_skid_buffer #(
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
.OUT_REG (1)
) fpu_buffer (
.clk (clk),
.reset (reset),
.valid_in (fpu_req_valid),
.ready_in (fpu_req_ready),
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, fpu_op_type, ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
.data_out ({fpu_req_if.uuid, fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
.valid_out (fpu_req_if.valid),
.ready_out (fpu_req_if.ready)
);
`else
`UNUSED_VAR (gpr_rsp_if.rs3_data)
`endif
// gpu unit
wire gpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_GPU);
wire [`INST_GPU_BITS-1:0] gpu_op_type = `INST_GPU_BITS'(ibuffer_if.op_type);
VX_skid_buffer #(
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + `NT_BITS + (3 * `NUM_THREADS * 32)),
.OUT_REG (1)
) gpu_buffer (
.clk (clk),
.reset (reset),
.valid_in (gpu_req_valid),
.ready_in (gpu_req_ready),
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, gpu_op_type, ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
.data_out ({gpu_req_if.uuid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.op_mod, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data}),
.valid_out (gpu_req_if.valid),
.ready_out (gpu_req_if.ready)
);
// can take next request?
reg ready_r;
always @(*) begin
case (ibuffer_if.ex_type)
`EX_ALU: ready_r = alu_req_ready;
`EX_LSU: ready_r = lsu_req_ready;
`EX_CSR: ready_r = csr_req_ready;
`ifdef EXT_F_ENABLE
`EX_FPU: ready_r = fpu_req_ready;
`endif
`EX_GPU: ready_r = gpu_req_ready;
default: ready_r = 1'b1; // ignore NOPs
endcase
end
assign ibuffer_if.ready = ready_r;
endmodule

View file

@ -1,237 +0,0 @@
`include "VX_define.vh"
module VX_execute #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_execute
input wire clk,
input wire reset,
// Dcache interface
VX_dcache_req_if.master dcache_req_if,
VX_dcache_rsp_if.slave dcache_rsp_if,
// commit interface
VX_cmt_to_csr_if.slave cmt_to_csr_if,
// fetch interface
VX_fetch_to_csr_if.slave fetch_to_csr_if,
`ifdef PERF_ENABLE
VX_perf_memsys_if.slave perf_memsys_if,
VX_perf_pipeline_if.slave perf_pipeline_if,
`endif
// inputs
VX_alu_req_if.slave alu_req_if,
VX_lsu_req_if.slave lsu_req_if,
VX_csr_req_if.slave csr_req_if,
`ifdef EXT_F_ENABLE
VX_fpu_req_if.slave fpu_req_if,
`endif
VX_gpu_req_if.slave gpu_req_if,
// outputs
VX_branch_ctl_if.master branch_ctl_if,
VX_warp_ctl_if.master warp_ctl_if,
VX_commit_if.master alu_commit_if,
VX_commit_if.master ld_commit_if,
VX_commit_if.master st_commit_if,
VX_commit_if.master csr_commit_if,
`ifdef EXT_F_ENABLE
VX_commit_if.master fpu_commit_if,
`endif
VX_commit_if.master gpu_commit_if,
input wire busy
);
`ifdef EXT_TEX_ENABLE
VX_dcache_req_if #(
.NUM_REQS (`NUM_THREADS),
.WORD_SIZE (4),
.TAG_WIDTH (`LSU_TEX_DCACHE_TAG_BITS)
) lsu_dcache_req_if();
VX_dcache_rsp_if #(
.NUM_REQS (`NUM_THREADS),
.WORD_SIZE (4),
.TAG_WIDTH (`LSU_TEX_DCACHE_TAG_BITS)
) lsu_dcache_rsp_if();
VX_dcache_req_if #(
.NUM_REQS (`NUM_THREADS),
.WORD_SIZE (4),
.TAG_WIDTH (`LSU_TEX_DCACHE_TAG_BITS)
) tex_dcache_req_if();
VX_dcache_rsp_if #(
.NUM_REQS (`NUM_THREADS),
.WORD_SIZE (4),
.TAG_WIDTH (`LSU_TEX_DCACHE_TAG_BITS)
) tex_dcache_rsp_if();
VX_tex_csr_if tex_csr_if();
`ifdef PERF_ENABLE
VX_perf_tex_if perf_tex_if();
`endif
VX_cache_arb #(
.NUM_REQS (2),
.LANES (`NUM_THREADS),
.DATA_SIZE (4),
.TAG_IN_WIDTH (`LSU_TEX_DCACHE_TAG_BITS),
.TAG_SEL_IDX (`NC_TAG_BIT + `SM_ENABLE)
) tex_lsu_arb (
.clk (clk),
.reset (reset),
// Tex/LSU request
.req_valid_in ({tex_dcache_req_if.valid, lsu_dcache_req_if.valid}),
.req_rw_in ({tex_dcache_req_if.rw, lsu_dcache_req_if.rw}),
.req_byteen_in ({tex_dcache_req_if.byteen, lsu_dcache_req_if.byteen}),
.req_addr_in ({tex_dcache_req_if.addr, lsu_dcache_req_if.addr}),
.req_data_in ({tex_dcache_req_if.data, lsu_dcache_req_if.data}),
.req_tag_in ({tex_dcache_req_if.tag, lsu_dcache_req_if.tag}),
.req_ready_in ({tex_dcache_req_if.ready, lsu_dcache_req_if.ready}),
// Dcache request
.req_valid_out (dcache_req_if.valid),
.req_rw_out (dcache_req_if.rw),
.req_byteen_out (dcache_req_if.byteen),
.req_addr_out (dcache_req_if.addr),
.req_data_out (dcache_req_if.data),
.req_tag_out (dcache_req_if.tag),
.req_ready_out (dcache_req_if.ready),
// Dcache response
.rsp_valid_in (dcache_rsp_if.valid),
.rsp_tmask_in (dcache_rsp_if.tmask),
.rsp_tag_in (dcache_rsp_if.tag),
.rsp_data_in (dcache_rsp_if.data),
.rsp_ready_in (dcache_rsp_if.ready),
// Tex/LSU response
.rsp_valid_out ({tex_dcache_rsp_if.valid, lsu_dcache_rsp_if.valid}),
.rsp_tmask_out ({tex_dcache_rsp_if.tmask, lsu_dcache_rsp_if.tmask}),
.rsp_data_out ({tex_dcache_rsp_if.data, lsu_dcache_rsp_if.data}),
.rsp_tag_out ({tex_dcache_rsp_if.tag, lsu_dcache_rsp_if.tag}),
.rsp_ready_out ({tex_dcache_rsp_if.ready, lsu_dcache_rsp_if.ready})
);
`endif
`ifdef EXT_F_ENABLE
wire [`NUM_WARPS-1:0] csr_pending;
wire [`NUM_WARPS-1:0] fpu_pending;
VX_fpu_to_csr_if fpu_to_csr_if();
`endif
`RESET_RELAY (alu_reset);
`RESET_RELAY (lsu_reset);
`RESET_RELAY (csr_reset);
`RESET_RELAY (gpu_reset);
VX_alu_unit #(
.CORE_ID(CORE_ID)
) alu_unit (
.clk (clk),
.reset (alu_reset),
.alu_req_if (alu_req_if),
.branch_ctl_if (branch_ctl_if),
.alu_commit_if (alu_commit_if)
);
VX_lsu_unit #(
.CORE_ID(CORE_ID)
) lsu_unit (
`SCOPE_BIND_VX_execute_lsu_unit
.clk (clk),
.reset (lsu_reset),
`ifdef EXT_TEX_ENABLE
.dcache_req_if (lsu_dcache_req_if),
.dcache_rsp_if (lsu_dcache_rsp_if),
`else
.dcache_req_if (dcache_req_if),
.dcache_rsp_if (dcache_rsp_if),
`endif
.lsu_req_if (lsu_req_if),
.ld_commit_if (ld_commit_if),
.st_commit_if (st_commit_if)
);
VX_csr_unit #(
.CORE_ID(CORE_ID)
) csr_unit (
.clk (clk),
.reset (csr_reset),
`ifdef PERF_ENABLE
`ifdef EXT_TEX_ENABLE
.perf_tex_if (perf_tex_if),
`endif
.perf_memsys_if (perf_memsys_if),
.perf_pipeline_if(perf_pipeline_if),
`endif
.cmt_to_csr_if (cmt_to_csr_if),
.fetch_to_csr_if(fetch_to_csr_if),
.csr_req_if (csr_req_if),
.csr_commit_if (csr_commit_if),
`ifdef EXT_F_ENABLE
.fpu_to_csr_if (fpu_to_csr_if),
.fpu_pending (fpu_pending),
.pending (csr_pending),
`else
`UNUSED_PIN (pending),
`endif
`ifdef EXT_TEX_ENABLE
.tex_csr_if (tex_csr_if),
`endif
.busy (busy)
);
`ifdef EXT_F_ENABLE
`RESET_RELAY (fpu_reset);
VX_fpu_unit #(
.CORE_ID(CORE_ID)
) fpu_unit (
.clk (clk),
.reset (fpu_reset),
.fpu_req_if (fpu_req_if),
.fpu_to_csr_if (fpu_to_csr_if),
.fpu_commit_if (fpu_commit_if),
.csr_pending (csr_pending),
.pending (fpu_pending)
);
`endif
VX_gpu_unit #(
.CORE_ID(CORE_ID)
) gpu_unit (
`SCOPE_BIND_VX_execute_gpu_unit
.clk (clk),
.reset (gpu_reset),
.gpu_req_if (gpu_req_if),
`ifdef EXT_TEX_ENABLE
`ifdef PERF_ENABLE
.perf_tex_if (perf_tex_if),
`endif
.tex_csr_if (tex_csr_if),
.dcache_req_if (tex_dcache_req_if),
.dcache_rsp_if (tex_dcache_rsp_if),
`endif
.warp_ctl_if (warp_ctl_if),
.gpu_commit_if (gpu_commit_if)
);
// special workaround to get RISC-V tests Pass/Fail status
wire ebreak /* verilator public */;
assign ebreak = alu_req_if.valid && alu_req_if.ready
&& `INST_ALU_IS_BR(alu_req_if.op_mod)
&& (`INST_BR_BITS'(alu_req_if.op_type) == `INST_BR_EBREAK
|| `INST_BR_BITS'(alu_req_if.op_type) == `INST_BR_ECALL);
endmodule

View file

@ -1,68 +0,0 @@
`include "VX_define.vh"
module VX_fetch #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_fetch
input wire clk,
input wire reset,
// Icache interface
VX_icache_req_if.master icache_req_if,
VX_icache_rsp_if.slave icache_rsp_if,
// inputs
VX_wstall_if.slave wstall_if,
VX_join_if.slave join_if,
VX_branch_ctl_if.slave branch_ctl_if,
VX_warp_ctl_if.slave warp_ctl_if,
// outputs
VX_ifetch_rsp_if.master ifetch_rsp_if,
// csr interface
VX_fetch_to_csr_if.master fetch_to_csr_if,
// busy status
output wire busy
);
VX_ifetch_req_if ifetch_req_if();
VX_warp_sched #(
.CORE_ID(CORE_ID)
) warp_sched (
`SCOPE_BIND_VX_fetch_warp_sched
.clk (clk),
.reset (reset),
.warp_ctl_if (warp_ctl_if),
.wstall_if (wstall_if),
.join_if (join_if),
.branch_ctl_if (branch_ctl_if),
.ifetch_req_if (ifetch_req_if),
.fetch_to_csr_if (fetch_to_csr_if),
.busy (busy)
);
VX_icache_stage #(
.CORE_ID(CORE_ID)
) icache_stage (
`SCOPE_BIND_VX_fetch_icache_stage
.clk (clk),
.reset (reset),
.icache_rsp_if (icache_rsp_if),
.icache_req_if (icache_req_if),
.ifetch_req_if (ifetch_req_if),
.ifetch_rsp_if (ifetch_rsp_if)
);
endmodule

View file

@ -1,219 +0,0 @@
`include "VX_define.vh"
module VX_fpu_unit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
VX_fpu_req_if.slave fpu_req_if,
VX_fpu_to_csr_if.master fpu_to_csr_if,
VX_commit_if.master fpu_commit_if,
input wire[`NUM_WARPS-1:0] csr_pending,
output wire[`NUM_WARPS-1:0] pending
);
import fpu_types::*;
`UNUSED_PARAM (CORE_ID)
localparam FPUQ_BITS = `LOG2UP(`FPUQ_SIZE);
wire ready_in;
wire valid_out;
wire ready_out;
wire [`UUID_BITS-1:0] rsp_uuid;
wire [`NW_BITS-1:0] rsp_wid;
wire [`NUM_THREADS-1:0] rsp_tmask;
wire [31:0] rsp_PC;
wire [`NR_BITS-1:0] rsp_rd;
wire rsp_wb;
wire has_fflags;
fflags_t [`NUM_THREADS-1:0] fflags;
wire [`NUM_THREADS-1:0][31:0] result;
wire [FPUQ_BITS-1:0] tag_in, tag_out;
wire fpuq_full;
wire fpuq_push = fpu_req_if.valid && fpu_req_if.ready;
wire fpuq_pop = valid_out && ready_out;
VX_index_buffer #(
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
.SIZE (`FPUQ_SIZE)
) req_metadata (
.clk (clk),
.reset (reset),
.acquire_slot (fpuq_push),
.write_addr (tag_in),
.read_addr (tag_out),
.release_addr (tag_out),
.write_data ({fpu_req_if.uuid, fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.rd, fpu_req_if.wb}),
.read_data ({rsp_uuid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}),
.release_slot (fpuq_pop),
.full (fpuq_full),
`UNUSED_PIN (empty)
);
// can accept new request?
assign fpu_req_if.ready = ready_in && ~fpuq_full && !csr_pending[fpu_req_if.wid];
wire valid_in = fpu_req_if.valid && ~fpuq_full && !csr_pending[fpu_req_if.wid];
// resolve dynamic FRM from CSR
assign fpu_to_csr_if.read_wid = fpu_req_if.wid;
wire [`INST_FRM_BITS-1:0] fpu_frm = (fpu_req_if.op_mod == `INST_FRM_DYN) ? fpu_to_csr_if.read_frm : fpu_req_if.op_mod;
`ifdef FPU_DPI
VX_fpu_dpi #(
.TAGW (FPUQ_BITS)
) fpu_dpi (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.ready_in (ready_in),
.tag_in (tag_in),
.op_type (fpu_req_if.op_type),
.frm (fpu_frm),
.dataa (fpu_req_if.rs1_data),
.datab (fpu_req_if.rs2_data),
.datac (fpu_req_if.rs3_data),
.result (result),
.has_fflags (has_fflags),
.fflags (fflags),
.tag_out (tag_out),
.ready_out (ready_out),
.valid_out (valid_out)
);
`elsif FPU_FPNEW
VX_fpu_fpnew #(
.FMULADD (1),
.FDIVSQRT (1),
.FNONCOMP (1),
.FCONV (1),
.TAGW (FPUQ_BITS)
) fpu_fpnew (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.ready_in (ready_in),
.tag_in (tag_in),
.op_type (fpu_req_if.op_type),
.frm (fpu_frm),
.dataa (fpu_req_if.rs1_data),
.datab (fpu_req_if.rs2_data),
.datac (fpu_req_if.rs3_data),
.result (result),
.has_fflags (has_fflags),
.fflags (fflags),
.tag_out (tag_out),
.ready_out (ready_out),
.valid_out (valid_out)
);
`else
VX_fpu_fpga #(
.TAGW (FPUQ_BITS)
) fpu_fpga (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.ready_in (ready_in),
.tag_in (tag_in),
.op_type (fpu_req_if.op_type),
.frm (fpu_frm),
.dataa (fpu_req_if.rs1_data),
.datab (fpu_req_if.rs2_data),
.datac (fpu_req_if.rs3_data),
.result (result),
.has_fflags (has_fflags),
.fflags (fflags),
.tag_out (tag_out),
.ready_out (ready_out),
.valid_out (valid_out)
);
`endif
reg has_fflags_r;
fflags_t fflags_r;
fflags_t rsp_fflags;
always @(*) begin
rsp_fflags = '0;
for (integer i = 0; i < `NUM_THREADS; i++) begin
if (rsp_tmask[i]) begin
rsp_fflags.NX |= fflags[i].NX;
rsp_fflags.UF |= fflags[i].UF;
rsp_fflags.OF |= fflags[i].OF;
rsp_fflags.DZ |= fflags[i].DZ;
rsp_fflags.NV |= fflags[i].NV;
end
end
end
wire stall_out = ~fpu_commit_if.ready && fpu_commit_if.valid;
VX_pipe_register #(
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFLAGS_BITS),
.RESETW (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (!stall_out),
.data_in ({valid_out, rsp_uuid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, rsp_fflags}),
.data_out ({fpu_commit_if.valid, fpu_commit_if.uuid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, has_fflags_r, fflags_r})
);
assign fpu_commit_if.eop = 1'b1;
assign ready_out = ~stall_out;
// CSR fflags Update
assign fpu_to_csr_if.write_enable = fpu_commit_if.valid && fpu_commit_if.ready && has_fflags_r;
assign fpu_to_csr_if.write_wid = fpu_commit_if.wid;
assign fpu_to_csr_if.write_fflags = fflags_r;
// pending request
reg [`NUM_WARPS-1:0] pending_r;
always @(posedge clk) begin
if (reset) begin
pending_r <= 0;
end else begin
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
pending_r[fpu_commit_if.wid] <= 0;
end
if (fpu_req_if.valid && fpu_req_if.ready) begin
pending_r[fpu_req_if.wid] <= 1;
end
end
end
assign pending = pending_r;
endmodule

View file

@ -1,91 +0,0 @@
`include "VX_define.vh"
module VX_gpr_stage #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// inputs
VX_writeback_if.slave writeback_if,
VX_gpr_req_if.slave gpr_req_if,
// outputs
VX_gpr_rsp_if.master gpr_rsp_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (reset)
localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS;
// ensure r0 never gets written, which can happen before the reset
wire write_enable = writeback_if.valid && (writeback_if.rd != 0);
wire [`NUM_THREADS-1:0] wren;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign wren[i] = write_enable && writeback_if.tmask[i];
end
wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2;
assign waddr = {writeback_if.wid, writeback_if.rd};
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
VX_dp_ram #(
.DATAW (32),
.SIZE (RAM_SIZE),
.INIT_ENABLE (1),
.INIT_VALUE (0)
) dp_ram1 (
.clk (clk),
.wren (wren[i]),
.waddr (waddr),
.wdata (writeback_if.data[i]),
.raddr (raddr1),
.rdata (gpr_rsp_if.rs1_data[i])
);
VX_dp_ram #(
.DATAW (32),
.SIZE (RAM_SIZE),
.INIT_ENABLE (1),
.INIT_VALUE (0)
) dp_ram2 (
.clk (clk),
.wren (wren[i]),
.waddr (waddr),
.wdata (writeback_if.data[i]),
.raddr (raddr2),
.rdata (gpr_rsp_if.rs2_data[i])
);
end
`ifdef EXT_F_ENABLE
wire [$clog2(RAM_SIZE)-1:0] raddr3;
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
VX_dp_ram #(
.DATAW (32),
.SIZE (RAM_SIZE),
.INIT_ENABLE (1),
.INIT_VALUE (0)
) dp_ram3 (
.clk (clk),
.wren (wren[i]),
.waddr (waddr),
.wdata (writeback_if.data[i]),
.raddr (raddr3),
.rdata (gpr_rsp_if.rs3_data[i])
);
end
`else
`UNUSED_VAR (gpr_req_if.rs3)
assign gpr_rsp_if.rs3_data = 'x;
`endif
assign writeback_if.ready = 1'b1;
endmodule

770
hw/rtl/VX_gpu_pkg.sv Normal file
View file

@ -0,0 +1,770 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`ifndef VX_GPU_PKG_VH
`define VX_GPU_PKG_VH
`include "VX_define.vh"
package VX_gpu_pkg;
typedef struct packed {
logic valid;
logic [`NUM_THREADS-1:0] tmask;
} tmc_t;
typedef struct packed {
logic valid;
logic [`NUM_WARPS-1:0] wmask;
logic [`PC_BITS-1:0] pc;
} wspawn_t;
typedef struct packed {
logic valid;
logic is_dvg;
logic [`NUM_THREADS-1:0] then_tmask;
logic [`NUM_THREADS-1:0] else_tmask;
logic [`PC_BITS-1:0] next_pc;
} split_t;
typedef struct packed {
logic valid;
logic [`DV_STACK_SIZEW-1:0] stack_ptr;
} join_t;
typedef struct packed {
logic valid;
logic [`NB_WIDTH-1:0] id;
logic is_global;
`ifdef GBAR_ENABLE
logic [`MAX(`NW_WIDTH, `NC_WIDTH)-1:0] size_m1;
`else
logic [`NW_WIDTH-1:0] size_m1;
`endif
logic is_noop;
} barrier_t;
typedef struct packed {
logic [`XLEN-1:0] startup_addr;
logic [`XLEN-1:0] startup_arg;
logic [7:0] mpm_class;
} base_dcrs_t;
//////////////////////////// Perf counter types ///////////////////////////
typedef struct packed {
logic [`PERF_CTR_BITS-1:0] reads;
logic [`PERF_CTR_BITS-1:0] writes;
logic [`PERF_CTR_BITS-1:0] read_misses;
logic [`PERF_CTR_BITS-1:0] write_misses;
logic [`PERF_CTR_BITS-1:0] bank_stalls;
logic [`PERF_CTR_BITS-1:0] mshr_stalls;
logic [`PERF_CTR_BITS-1:0] mem_stalls;
logic [`PERF_CTR_BITS-1:0] crsp_stalls;
} cache_perf_t;
typedef struct packed {
logic [`PERF_CTR_BITS-1:0] reads;
logic [`PERF_CTR_BITS-1:0] writes;
logic [`PERF_CTR_BITS-1:0] bank_stalls;
logic [`PERF_CTR_BITS-1:0] crsp_stalls;
} lmem_perf_t;
typedef struct packed {
logic [`PERF_CTR_BITS-1:0] misses;
} coalescer_perf_t;
typedef struct packed {
logic [`PERF_CTR_BITS-1:0] reads;
logic [`PERF_CTR_BITS-1:0] writes;
logic [`PERF_CTR_BITS-1:0] latency;
} mem_perf_t;
typedef struct packed {
logic [`PERF_CTR_BITS-1:0] idles;
logic [`PERF_CTR_BITS-1:0] stalls;
} sched_perf_t;
typedef struct packed {
logic [`PERF_CTR_BITS-1:0] ibf_stalls;
logic [`PERF_CTR_BITS-1:0] scb_stalls;
logic [`PERF_CTR_BITS-1:0] opd_stalls;
logic [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] units_uses;
logic [`NUM_SFU_UNITS-1:0][`PERF_CTR_BITS-1:0] sfu_uses;
} issue_perf_t;
typedef struct packed {
cache_perf_t icache;
cache_perf_t dcache;
cache_perf_t l2cache;
cache_perf_t l3cache;
lmem_perf_t lmem;
coalescer_perf_t coalescer;
mem_perf_t mem;
} sysmem_perf_t;
typedef struct packed {
sched_perf_t sched;
issue_perf_t issue;
logic [`PERF_CTR_BITS-1:0] ifetches;
logic [`PERF_CTR_BITS-1:0] loads;
logic [`PERF_CTR_BITS-1:0] stores;
logic [`PERF_CTR_BITS-1:0] ifetch_latency;
logic [`PERF_CTR_BITS-1:0] load_latency;
} pipeline_perf_t;
//////////////////////// instruction arguments ////////////////////////////
typedef struct packed {
logic use_PC;
logic use_imm;
logic is_w;
logic [`ALU_TYPE_BITS-1:0] xtype;
logic [`IMM_BITS-1:0] imm;
} alu_args_t;
typedef struct packed {
logic [($bits(alu_args_t)-`INST_FRM_BITS-`INST_FMT_BITS)-1:0] __padding;
logic [`INST_FRM_BITS-1:0] frm;
logic [`INST_FMT_BITS-1:0] fmt;
} fpu_args_t;
typedef struct packed {
logic [($bits(alu_args_t)-1-1-`OFFSET_BITS)-1:0] __padding;
logic is_store;
logic is_float;
logic [`OFFSET_BITS-1:0] offset;
} lsu_args_t;
typedef struct packed {
logic [($bits(alu_args_t)-1-`VX_CSR_ADDR_BITS-5)-1:0] __padding;
logic use_imm;
logic [`VX_CSR_ADDR_BITS-1:0] addr;
logic [4:0] imm;
} csr_args_t;
typedef struct packed {
logic [($bits(alu_args_t)-1)-1:0] __padding;
logic is_neg;
} wctl_args_t;
typedef union packed {
alu_args_t alu;
fpu_args_t fpu;
lsu_args_t lsu;
csr_args_t csr;
wctl_args_t wctl;
} op_args_t;
`IGNORE_UNUSED_BEGIN
///////////////////////// LSU memory Parameters ///////////////////////////
localparam LSU_WORD_SIZE = `XLEN / 8;
localparam LSU_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(LSU_WORD_SIZE));
localparam LSU_MEM_BATCHES = 1;
localparam LSU_TAG_ID_BITS = (`CLOG2(`LSUQ_IN_SIZE) + `CLOG2(LSU_MEM_BATCHES));
localparam LSU_TAG_WIDTH = (`UUID_WIDTH + LSU_TAG_ID_BITS);
localparam LSU_NUM_REQS = `NUM_LSU_BLOCKS * `NUM_LSU_LANES;
localparam LMEM_TAG_WIDTH = LSU_TAG_WIDTH + `CLOG2(`NUM_LSU_BLOCKS);
////////////////////////// Icache Parameters //////////////////////////////
// Word size in bytes
localparam ICACHE_WORD_SIZE = 4;
localparam ICACHE_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(ICACHE_WORD_SIZE));
// Block size in bytes
localparam ICACHE_LINE_SIZE = `L1_LINE_SIZE;
// Core request tag Id bits
localparam ICACHE_TAG_ID_BITS = `NW_WIDTH;
// Core request tag bits
localparam ICACHE_TAG_WIDTH = (`UUID_WIDTH + ICACHE_TAG_ID_BITS);
// Memory request data bits
localparam ICACHE_MEM_DATA_WIDTH = (ICACHE_LINE_SIZE * 8);
// Memory request tag bits
`ifdef ICACHE_ENABLE
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, 1, `NUM_ICACHES, `UUID_WIDTH);
`else
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, 1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES);
`endif
////////////////////////// Dcache Parameters //////////////////////////////
// Word size in bytes
localparam DCACHE_WORD_SIZE = `LSU_LINE_SIZE;
localparam DCACHE_ADDR_WIDTH = (`MEM_ADDR_WIDTH - `CLOG2(DCACHE_WORD_SIZE));
// Block size in bytes
localparam DCACHE_LINE_SIZE = `L1_LINE_SIZE;
// Input request size (using coalesced memory blocks)
localparam DCACHE_CHANNELS = `UP((`NUM_LSU_LANES * LSU_WORD_SIZE) / DCACHE_WORD_SIZE);
localparam DCACHE_NUM_REQS = `NUM_LSU_BLOCKS * DCACHE_CHANNELS;
// Core request tag Id bits
localparam DCACHE_MERGED_REQS = (`NUM_LSU_LANES * LSU_WORD_SIZE) / DCACHE_WORD_SIZE;
localparam DCACHE_MEM_BATCHES = `CDIV(DCACHE_MERGED_REQS, DCACHE_CHANNELS);
localparam DCACHE_TAG_ID_BITS = (`CLOG2(`LSUQ_OUT_SIZE) + `CLOG2(DCACHE_MEM_BATCHES));
// Core request tag bits
localparam DCACHE_TAG_WIDTH = (`UUID_WIDTH + DCACHE_TAG_ID_BITS);
// Memory request data bits
localparam DCACHE_MEM_DATA_WIDTH = (DCACHE_LINE_SIZE * 8);
// Memory request tag bits
`ifdef DCACHE_ENABLE
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, `L1_MEM_PORTS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES, `UUID_WIDTH);
`else
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(DCACHE_NUM_REQS, `L1_MEM_PORTS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
`endif
/////////////////////////////// L1 Parameters /////////////////////////////
// arbitrate between icache and dcache
localparam L1_MEM_TAG_WIDTH = `MAX(ICACHE_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH);
localparam L1_MEM_ARB_TAG_WIDTH = (L1_MEM_TAG_WIDTH + `CLOG2(2));
/////////////////////////////// L2 Parameters /////////////////////////////
localparam ICACHE_MEM_ARB_IDX = 0;
localparam DCACHE_MEM_ARB_IDX = ICACHE_MEM_ARB_IDX + 1;
// Word size in bytes
localparam L2_WORD_SIZE = `L1_LINE_SIZE;
// Input request size
localparam L2_NUM_REQS = `NUM_SOCKETS * `L1_MEM_PORTS;
// Core request tag bits
localparam L2_TAG_WIDTH = L1_MEM_ARB_TAG_WIDTH;
// Memory request data bits
localparam L2_MEM_DATA_WIDTH = (`L2_LINE_SIZE * 8);
// Memory request tag bits
`ifdef L2_ENABLE
localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_MEM_PORTS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH, `UUID_WIDTH);
`else
localparam L2_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L2_NUM_REQS, `L2_MEM_PORTS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
`endif
/////////////////////////////// L3 Parameters /////////////////////////////
// Word size in bytes
localparam L3_WORD_SIZE = `L2_LINE_SIZE;
// Input request size
localparam L3_NUM_REQS = `NUM_CLUSTERS * `L2_MEM_PORTS;
// Core request tag bits
localparam L3_TAG_WIDTH = L2_MEM_TAG_WIDTH;
// Memory request data bits
localparam L3_MEM_DATA_WIDTH = (`L3_LINE_SIZE * 8);
// Memory request tag bits
`ifdef L3_ENABLE
localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_MEM_PORTS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH, `UUID_WIDTH);
`else
localparam L3_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L3_NUM_REQS, `L3_MEM_PORTS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
`endif
/////////////////////////////// Issue parameters //////////////////////////
localparam ISSUE_ISW = `CLOG2(`ISSUE_WIDTH);
localparam ISSUE_ISW_W = `UP(ISSUE_ISW);
localparam PER_ISSUE_WARPS = `NUM_WARPS / `ISSUE_WIDTH;
localparam ISSUE_WIS = `CLOG2(PER_ISSUE_WARPS);
localparam ISSUE_WIS_W = `UP(ISSUE_WIS);
function logic [`NW_WIDTH-1:0] wis_to_wid(
input logic [ISSUE_WIS_W-1:0] wis,
input logic [ISSUE_ISW_W-1:0] isw
);
if (ISSUE_WIS == 0) begin
wis_to_wid = `NW_WIDTH'(isw);
end else if (ISSUE_ISW == 0) begin
wis_to_wid = `NW_WIDTH'(wis);
end else begin
wis_to_wid = `NW_WIDTH'({wis, isw});
end
endfunction
function logic [ISSUE_ISW_W-1:0] wid_to_isw(
input logic [`NW_WIDTH-1:0] wid
);
if (ISSUE_ISW != 0) begin
wid_to_isw = wid[ISSUE_ISW_W-1:0];
end else begin
wid_to_isw = 0;
end
endfunction
function logic [ISSUE_WIS_W-1:0] wid_to_wis(
input logic [`NW_WIDTH-1:0] wid
);
if (ISSUE_WIS != 0) begin
wid_to_wis = ISSUE_WIS_W'(wid >> ISSUE_ISW);
end else begin
wid_to_wis = 0;
end
endfunction
///////////////////////// Miscaellaneous functions ////////////////////////
function logic [`SFU_WIDTH-1:0] op_to_sfu_type(
input logic [`INST_OP_BITS-1:0] op_type
);
case (op_type)
`INST_SFU_CSRRW,
`INST_SFU_CSRRS,
`INST_SFU_CSRRC: op_to_sfu_type = `SFU_CSRS;
default: op_to_sfu_type = `SFU_WCTL;
endcase
endfunction
`IGNORE_UNUSED_END
////////////////////////////////// Tracing ////////////////////////////////////
`ifdef SIMULATION
`ifdef SV_DPI
import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/);
`endif
task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type);
case (ex_type)
`EX_ALU: `TRACE(level, ("ALU"))
`EX_LSU: `TRACE(level, ("LSU"))
`EX_SFU: `TRACE(level, ("SFU"))
`ifdef EXT_F_ENABLE
`EX_FPU: `TRACE(level, ("FPU"))
`endif
default: `TRACE(level, ("?"))
endcase
endtask
task trace_ex_op(input int level,
input [`EX_BITS-1:0] ex_type,
input [`INST_OP_BITS-1:0] op_type,
input VX_gpu_pkg::op_args_t op_args
);
case (ex_type)
`EX_ALU: begin
case (op_args.alu.xtype)
`ALU_TYPE_ARITH: begin
if (op_args.alu.is_w) begin
if (op_args.alu.use_imm) begin
case (`INST_ALU_BITS'(op_type))
`INST_ALU_ADD: `TRACE(level, ("ADDIW"))
`INST_ALU_SLL: `TRACE(level, ("SLLIW"))
`INST_ALU_SRL: `TRACE(level, ("SRLIW"))
`INST_ALU_SRA: `TRACE(level, ("SRAIW"))
default: `TRACE(level, ("?"))
endcase
end else begin
case (`INST_ALU_BITS'(op_type))
`INST_ALU_ADD: `TRACE(level, ("ADDW"))
`INST_ALU_SUB: `TRACE(level, ("SUBW"))
`INST_ALU_SLL: `TRACE(level, ("SLLW"))
`INST_ALU_SRL: `TRACE(level, ("SRLW"))
`INST_ALU_SRA: `TRACE(level, ("SRAW"))
default: `TRACE(level, ("?"))
endcase
end
end else begin
if (op_args.alu.use_imm) begin
case (`INST_ALU_BITS'(op_type))
`INST_ALU_ADD: `TRACE(level, ("ADDI"))
`INST_ALU_SLL: `TRACE(level, ("SLLI"))
`INST_ALU_SRL: `TRACE(level, ("SRLI"))
`INST_ALU_SRA: `TRACE(level, ("SRAI"))
`INST_ALU_SLT: `TRACE(level, ("SLTI"))
`INST_ALU_SLTU: `TRACE(level, ("SLTIU"))
`INST_ALU_XOR: `TRACE(level, ("XORI"))
`INST_ALU_OR: `TRACE(level, ("ORI"))
`INST_ALU_AND: `TRACE(level, ("ANDI"))
`INST_ALU_LUI: `TRACE(level, ("LUI"))
`INST_ALU_AUIPC: `TRACE(level, ("AUIPC"))
default: `TRACE(level, ("?"))
endcase
end else begin
case (`INST_ALU_BITS'(op_type))
`INST_ALU_ADD: `TRACE(level, ("ADD"))
`INST_ALU_SUB: `TRACE(level, ("SUB"))
`INST_ALU_SLL: `TRACE(level, ("SLL"))
`INST_ALU_SRL: `TRACE(level, ("SRL"))
`INST_ALU_SRA: `TRACE(level, ("SRA"))
`INST_ALU_SLT: `TRACE(level, ("SLT"))
`INST_ALU_SLTU: `TRACE(level, ("SLTU"))
`INST_ALU_XOR: `TRACE(level, ("XOR"))
`INST_ALU_OR: `TRACE(level, ("OR"))
`INST_ALU_AND: `TRACE(level, ("AND"))
`INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ"))
`INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ"))
default: `TRACE(level, ("?"))
endcase
end
end
end
`ALU_TYPE_BRANCH: begin
case (`INST_BR_BITS'(op_type))
`INST_BR_EQ: `TRACE(level, ("BEQ"))
`INST_BR_NE: `TRACE(level, ("BNE"))
`INST_BR_LT: `TRACE(level, ("BLT"))
`INST_BR_GE: `TRACE(level, ("BGE"))
`INST_BR_LTU: `TRACE(level, ("BLTU"))
`INST_BR_GEU: `TRACE(level, ("BGEU"))
`INST_BR_JAL: `TRACE(level, ("JAL"))
`INST_BR_JALR: `TRACE(level, ("JALR"))
`INST_BR_ECALL: `TRACE(level, ("ECALL"))
`INST_BR_EBREAK:`TRACE(level, ("EBREAK"))
`INST_BR_URET: `TRACE(level, ("URET"))
`INST_BR_SRET: `TRACE(level, ("SRET"))
`INST_BR_MRET: `TRACE(level, ("MRET"))
default: `TRACE(level, ("?"))
endcase
end
`ALU_TYPE_MULDIV: begin
if (op_args.alu.is_w) begin
case (`INST_M_BITS'(op_type))
`INST_M_MUL: `TRACE(level, ("MULW"))
`INST_M_DIV: `TRACE(level, ("DIVW"))
`INST_M_DIVU: `TRACE(level, ("DIVUW"))
`INST_M_REM: `TRACE(level, ("REMW"))
`INST_M_REMU: `TRACE(level, ("REMUW"))
default: `TRACE(level, ("?"))
endcase
end else begin
case (`INST_M_BITS'(op_type))
`INST_M_MUL: `TRACE(level, ("MUL"))
`INST_M_MULH: `TRACE(level, ("MULH"))
`INST_M_MULHSU:`TRACE(level, ("MULHSU"))
`INST_M_MULHU: `TRACE(level, ("MULHU"))
`INST_M_DIV: `TRACE(level, ("DIV"))
`INST_M_DIVU: `TRACE(level, ("DIVU"))
`INST_M_REM: `TRACE(level, ("REM"))
`INST_M_REMU: `TRACE(level, ("REMU"))
default: `TRACE(level, ("?"))
endcase
end
end
default: `TRACE(level, ("?"))
endcase
end
`EX_LSU: begin
if (op_args.lsu.is_float) begin
case (`INST_LSU_BITS'(op_type))
`INST_LSU_LW: `TRACE(level, ("FLW"))
`INST_LSU_LD: `TRACE(level, ("FLD"))
`INST_LSU_SW: `TRACE(level, ("FSW"))
`INST_LSU_SD: `TRACE(level, ("FSD"))
default: `TRACE(level, ("?"))
endcase
end else begin
case (`INST_LSU_BITS'(op_type))
`INST_LSU_LB: `TRACE(level, ("LB"))
`INST_LSU_LH: `TRACE(level, ("LH"))
`INST_LSU_LW: `TRACE(level, ("LW"))
`INST_LSU_LD: `TRACE(level, ("LD"))
`INST_LSU_LBU:`TRACE(level, ("LBU"))
`INST_LSU_LHU:`TRACE(level, ("LHU"))
`INST_LSU_LWU:`TRACE(level, ("LWU"))
`INST_LSU_SB: `TRACE(level, ("SB"))
`INST_LSU_SH: `TRACE(level, ("SH"))
`INST_LSU_SW: `TRACE(level, ("SW"))
`INST_LSU_SD: `TRACE(level, ("SD"))
`INST_LSU_FENCE:`TRACE(level,("FENCE"))
default: `TRACE(level, ("?"))
endcase
end
end
`EX_SFU: begin
case (`INST_SFU_BITS'(op_type))
`INST_SFU_TMC: `TRACE(level, ("TMC"))
`INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN"))
`INST_SFU_SPLIT: begin
if (op_args.wctl.is_neg) begin
`TRACE(level, ("SPLIT.N"))
end else begin
`TRACE(level, ("SPLIT"))
end
end
`INST_SFU_JOIN: `TRACE(level, ("JOIN"))
`INST_SFU_BAR: `TRACE(level, ("BAR"))
`INST_SFU_PRED: begin
if (op_args.wctl.is_neg) begin
`TRACE(level, ("PRED.N"))
end else begin
`TRACE(level, ("PRED"))
end
end
`INST_SFU_CSRRW: begin
if (op_args.csr.use_imm) begin
`TRACE(level, ("CSRRWI"))
end else begin
`TRACE(level, ("CSRRW"))
end
end
`INST_SFU_CSRRS: begin
if (op_args.csr.use_imm) begin
`TRACE(level, ("CSRRSI"))
end else begin
`TRACE(level, ("CSRRS"))
end
end
`INST_SFU_CSRRC: begin
if (op_args.csr.use_imm) begin
`TRACE(level, ("CSRRCI"))
end else begin
`TRACE(level, ("CSRRC"))
end
end
default: `TRACE(level, ("?"))
endcase
end
`ifdef EXT_F_ENABLE
`EX_FPU: begin
case (`INST_FPU_BITS'(op_type))
`INST_FPU_ADD: begin
if (op_args.fpu.fmt[1]) begin
if (op_args.fpu.fmt[0]) begin
`TRACE(level, ("FSUB.D"))
end else begin
`TRACE(level, ("FSUB.S"))
end
end else begin
if (op_args.fpu.fmt[0]) begin
`TRACE(level, ("FADD.D"))
end else begin
`TRACE(level, ("FADD.S"))
end
end
end
`INST_FPU_MADD: begin
if (op_args.fpu.fmt[1]) begin
if (op_args.fpu.fmt[0]) begin
`TRACE(level, ("FMSUB.D"))
end else begin
`TRACE(level, ("FMSUB.S"))
end
end else begin
if (op_args.fpu.fmt[0]) begin
`TRACE(level, ("FMADD.D"))
end else begin
`TRACE(level, ("FMADD.S"))
end
end
end
`INST_FPU_NMADD: begin
if (op_args.fpu.fmt[1]) begin
if (op_args.fpu.fmt[0]) begin
`TRACE(level, ("FNMSUB.D"))
end else begin
`TRACE(level, ("FNMSUB.S"))
end
end else begin
if (op_args.fpu.fmt[0]) begin
`TRACE(level, ("FNMADD.D"))
end else begin
`TRACE(level, ("FNMADD.S"))
end
end
end
`INST_FPU_MUL: begin
if (op_args.fpu.fmt[0]) begin
`TRACE(level, ("FMUL.D"))
end else begin
`TRACE(level, ("FMUL.S"))
end
end
`INST_FPU_DIV: begin
if (op_args.fpu.fmt[0]) begin
`TRACE(level, ("FDIV.D"))
end else begin
`TRACE(level, ("FDIV.S"))
end
end
`INST_FPU_SQRT: begin
if (op_args.fpu.fmt[0]) begin
`TRACE(level, ("FSQRT.D"))
end else begin
`TRACE(level, ("FSQRT.S"))
end
end
`INST_FPU_CMP: begin
if (op_args.fpu.fmt[0]) begin
case (op_args.fpu.frm[1:0])
0: `TRACE(level, ("FLE.D"))
1: `TRACE(level, ("FLT.D"))
2: `TRACE(level, ("FEQ.D"))
default: `TRACE(level, ("?"))
endcase
end else begin
case (op_args.fpu.frm[1:0])
0: `TRACE(level, ("FLE.S"))
1: `TRACE(level, ("FLT.S"))
2: `TRACE(level, ("FEQ.S"))
default: `TRACE(level, ("?"))
endcase
end
end
`INST_FPU_F2F: begin
if (op_args.fpu.fmt[0]) begin
`TRACE(level, ("FCVT.D.S"))
end else begin
`TRACE(level, ("FCVT.S.D"))
end
end
`INST_FPU_F2I: begin
if (op_args.fpu.fmt[0]) begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.L.D"))
end else begin
`TRACE(level, ("FCVT.W.D"))
end
end else begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.L.S"))
end else begin
`TRACE(level, ("FCVT.W.S"))
end
end
end
`INST_FPU_F2U: begin
if (op_args.fpu.fmt[0]) begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.LU.D"))
end else begin
`TRACE(level, ("FCVT.WU.D"))
end
end else begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.LU.S"))
end else begin
`TRACE(level, ("FCVT.WU.S"))
end
end
end
`INST_FPU_I2F: begin
if (op_args.fpu.fmt[0]) begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.D.L"))
end else begin
`TRACE(level, ("FCVT.D.W"))
end
end else begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.S.L"))
end else begin
`TRACE(level, ("FCVT.S.W"))
end
end
end
`INST_FPU_U2F: begin
if (op_args.fpu.fmt[0]) begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.D.LU"))
end else begin
`TRACE(level, ("FCVT.D.WU"))
end
end else begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.S.LU"))
end else begin
`TRACE(level, ("FCVT.S.WU"))
end
end
end
`INST_FPU_MISC: begin
if (op_args.fpu.fmt[0]) begin
case (op_args.fpu.frm)
0: `TRACE(level, ("FSGNJ.D"))
1: `TRACE(level, ("FSGNJN.D"))
2: `TRACE(level, ("FSGNJX.D"))
3: `TRACE(level, ("FCLASS.D"))
4: `TRACE(level, ("FMV.X.D"))
5: `TRACE(level, ("FMV.D.X"))
6: `TRACE(level, ("FMIN.D"))
7: `TRACE(level, ("FMAX.D"))
endcase
end else begin
case (op_args.fpu.frm)
0: `TRACE(level, ("FSGNJ.S"))
1: `TRACE(level, ("FSGNJN.S"))
2: `TRACE(level, ("FSGNJX.S"))
3: `TRACE(level, ("FCLASS.S"))
4: `TRACE(level, ("FMV.X.S"))
5: `TRACE(level, ("FMV.S.X"))
6: `TRACE(level, ("FMIN.S"))
7: `TRACE(level, ("FMAX.S"))
endcase
end
end
default: `TRACE(level, ("?"))
endcase
end
`endif
default: `TRACE(level, ("?"))
endcase
endtask
task trace_op_args(input int level,
input [`EX_BITS-1:0] ex_type,
input [`INST_OP_BITS-1:0] op_type,
input VX_gpu_pkg::op_args_t op_args
);
case (ex_type)
`EX_ALU: begin
`TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm))
end
`EX_LSU: begin
`TRACE(level, (", offset=0x%0h", op_args.lsu.offset))
end
`EX_SFU: begin
if (`INST_SFU_IS_CSR(op_type)) begin
`TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm))
end
end
`ifdef EXT_F_ENABLE
`EX_FPU: begin
`TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm))
end
`endif
default:;
endcase
endtask
task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr);
case (addr)
`VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0"))
`VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1"))
`VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0"))
`VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1"))
`VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS"))
default: `TRACE(level, ("?"))
endcase
endtask
`endif
endpackage
`endif // VX_GPU_PKG_VH

View file

@ -1,43 +0,0 @@
`ifndef VX_GPU_TYPES
`define VX_GPU_TYPES
`include "VX_define.vh"
package gpu_types;
typedef struct packed {
logic valid;
logic [`NUM_THREADS-1:0] tmask;
} gpu_tmc_t;
`define GPU_TMC_BITS $bits(gpu_types::gpu_tmc_t)
typedef struct packed {
logic valid;
logic [`NUM_WARPS-1:0] wmask;
logic [31:0] pc;
} gpu_wspawn_t;
`define GPU_WSPAWN_BITS $bits(gpu_types::gpu_wspawn_t)
typedef struct packed {
logic valid;
logic diverged;
logic [`NUM_THREADS-1:0] then_tmask;
logic [`NUM_THREADS-1:0] else_tmask;
logic [31:0] pc;
} gpu_split_t;
`define GPU_SPLIT_BITS $bits(gpu_types::gpu_split_t)
typedef struct packed {
logic valid;
logic [`NB_BITS-1:0] id;
logic [`NW_BITS-1:0] size_m1;
} gpu_barrier_t;
`define GPU_BARRIER_BITS $bits(gpu_types::gpu_barrier_t)
endpackage
`endif

View file

@ -1,220 +0,0 @@
`include "VX_define.vh"
module VX_gpu_unit #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_gpu_unit
input wire clk,
input wire reset,
// Inputs
VX_gpu_req_if.slave gpu_req_if,
`ifdef EXT_TEX_ENABLE
// PERF
`ifdef PERF_ENABLE
VX_perf_tex_if.master perf_tex_if,
`endif
VX_dcache_req_if.master dcache_req_if,
VX_dcache_rsp_if.slave dcache_rsp_if,
VX_tex_csr_if.slave tex_csr_if,
`endif
// Outputs
VX_warp_ctl_if.master warp_ctl_if,
VX_commit_if.master gpu_commit_if
);
import gpu_types::*;
`UNUSED_PARAM (CORE_ID)
localparam WCTL_DATAW = `GPU_TMC_BITS + `GPU_WSPAWN_BITS + `GPU_SPLIT_BITS + `GPU_BARRIER_BITS;
localparam RSP_DATAW = `MAX(`NUM_THREADS * 32, WCTL_DATAW);
wire rsp_valid;
wire [`UUID_BITS-1:0] rsp_uuid;
wire [`NW_BITS-1:0] rsp_wid;
wire [`NUM_THREADS-1:0] rsp_tmask;
wire [31:0] rsp_PC;
wire [`NR_BITS-1:0] rsp_rd;
wire rsp_wb;
wire [RSP_DATAW-1:0] rsp_data, rsp_data_r;
gpu_tmc_t tmc;
gpu_wspawn_t wspawn;
gpu_barrier_t barrier;
gpu_split_t split;
wire [WCTL_DATAW-1:0] warp_ctl_data;
wire is_warp_ctl;
wire stall_in, stall_out;
wire is_wspawn = (gpu_req_if.op_type == `INST_GPU_WSPAWN);
wire is_tmc = (gpu_req_if.op_type == `INST_GPU_TMC);
wire is_split = (gpu_req_if.op_type == `INST_GPU_SPLIT);
wire is_bar = (gpu_req_if.op_type == `INST_GPU_BAR);
wire is_pred = (gpu_req_if.op_type == `INST_GPU_PRED);
wire [31:0] rs1_data = gpu_req_if.rs1_data[gpu_req_if.tid];
wire [31:0] rs2_data = gpu_req_if.rs2_data[gpu_req_if.tid];
wire [`NUM_THREADS-1:0] taken_tmask;
wire [`NUM_THREADS-1:0] not_taken_tmask;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire taken = (gpu_req_if.rs1_data[i] != 0);
assign taken_tmask[i] = gpu_req_if.tmask[i] & taken;
assign not_taken_tmask[i] = gpu_req_if.tmask[i] & ~taken;
end
// tmc
wire [`NUM_THREADS-1:0] pred_mask = (taken_tmask != 0) ? taken_tmask : gpu_req_if.tmask;
assign tmc.valid = is_tmc || is_pred;
assign tmc.tmask = is_pred ? pred_mask : rs1_data[`NUM_THREADS-1:0];
// wspawn
wire [31:0] wspawn_pc = rs2_data;
wire [`NUM_WARPS-1:0] wspawn_wmask;
for (genvar i = 0; i < `NUM_WARPS; i++) begin
assign wspawn_wmask[i] = (i < rs1_data);
end
assign wspawn.valid = is_wspawn;
assign wspawn.wmask = wspawn_wmask;
assign wspawn.pc = wspawn_pc;
// split
assign split.valid = is_split;
assign split.diverged = (| taken_tmask) && (| not_taken_tmask);
assign split.then_tmask = taken_tmask;
assign split.else_tmask = not_taken_tmask;
assign split.pc = gpu_req_if.next_PC;
// barrier
assign barrier.valid = is_bar;
assign barrier.id = rs1_data[`NB_BITS-1:0];
assign barrier.size_m1 = (`NW_BITS)'(rs2_data - 1);
// pack warp ctl result
assign warp_ctl_data = {tmc, wspawn, split, barrier};
// texture
`ifdef EXT_TEX_ENABLE
`UNUSED_VAR (gpu_req_if.op_mod)
VX_tex_req_if tex_req_if();
VX_tex_rsp_if tex_rsp_if();
wire is_tex = (gpu_req_if.op_type == `INST_GPU_TEX);
assign tex_req_if.valid = gpu_req_if.valid && is_tex;
assign tex_req_if.uuid = gpu_req_if.uuid;
assign tex_req_if.wid = gpu_req_if.wid;
assign tex_req_if.tmask = gpu_req_if.tmask;
assign tex_req_if.PC = gpu_req_if.PC;
assign tex_req_if.rd = gpu_req_if.rd;
assign tex_req_if.wb = gpu_req_if.wb;
assign tex_req_if.unit = gpu_req_if.op_mod[`NTEX_BITS-1:0];
assign tex_req_if.coords[0] = gpu_req_if.rs1_data;
assign tex_req_if.coords[1] = gpu_req_if.rs2_data;
assign tex_req_if.lod = gpu_req_if.rs3_data;
VX_tex_unit #(
.CORE_ID(CORE_ID)
) tex_unit (
.clk (clk),
.reset (reset),
`ifdef PERF_ENABLE
.perf_tex_if (perf_tex_if),
`endif
.tex_req_if (tex_req_if),
.tex_csr_if (tex_csr_if),
.tex_rsp_if (tex_rsp_if),
.dcache_req_if (dcache_req_if),
.dcache_rsp_if (dcache_rsp_if)
);
assign tex_rsp_if.ready = !stall_out;
assign stall_in = (is_tex && ~tex_req_if.ready)
|| (~is_tex && (tex_rsp_if.valid || stall_out));
assign is_warp_ctl = !(is_tex || tex_rsp_if.valid);
assign rsp_valid = tex_rsp_if.valid || (gpu_req_if.valid && ~is_tex);
assign rsp_uuid = tex_rsp_if.valid ? tex_rsp_if.uuid : gpu_req_if.uuid;
assign rsp_wid = tex_rsp_if.valid ? tex_rsp_if.wid : gpu_req_if.wid;
assign rsp_tmask = tex_rsp_if.valid ? tex_rsp_if.tmask : gpu_req_if.tmask;
assign rsp_PC = tex_rsp_if.valid ? tex_rsp_if.PC : gpu_req_if.PC;
assign rsp_rd = tex_rsp_if.rd;
assign rsp_wb = tex_rsp_if.valid && tex_rsp_if.wb;
assign rsp_data = tex_rsp_if.valid ? RSP_DATAW'(tex_rsp_if.data) : RSP_DATAW'(warp_ctl_data);
`else
`UNUSED_VAR (gpu_req_if.op_mod)
`UNUSED_VAR (gpu_req_if.rs3_data)
`UNUSED_VAR (gpu_req_if.wb)
`UNUSED_VAR (gpu_req_if.rd)
assign stall_in = stall_out;
assign is_warp_ctl = 1;
assign rsp_valid = gpu_req_if.valid;
assign rsp_uuid = gpu_req_if.uuid;
assign rsp_wid = gpu_req_if.wid;
assign rsp_tmask = gpu_req_if.tmask;
assign rsp_PC = gpu_req_if.PC;
assign rsp_rd = 0;
assign rsp_wb = 0;
assign rsp_data = RSP_DATAW'(warp_ctl_data);
`endif
wire is_warp_ctl_r;
// output
assign stall_out = ~gpu_commit_if.ready && gpu_commit_if.valid;
VX_pipe_register #(
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + RSP_DATAW + 1),
.RESETW (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (!stall_out),
.data_in ({rsp_valid, rsp_uuid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data, is_warp_ctl}),
.data_out ({gpu_commit_if.valid, gpu_commit_if.uuid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, rsp_data_r, is_warp_ctl_r})
);
assign gpu_commit_if.data = rsp_data_r[(`NUM_THREADS * 32)-1:0];
assign gpu_commit_if.eop = 1'b1;
// warp control reponse
assign {warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier} = rsp_data_r[WCTL_DATAW-1:0];
assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready && is_warp_ctl_r;
assign warp_ctl_if.wid = gpu_commit_if.wid;
// can accept new request?
assign gpu_req_if.ready = ~stall_in;
`SCOPE_ASSIGN (gpu_rsp_valid, warp_ctl_if.valid);
`SCOPE_ASSIGN (gpu_rsp_uuid, gpu_commit_if.uuid);
`SCOPE_ASSIGN (gpu_rsp_tmc, warp_ctl_if.tmc.valid);
`SCOPE_ASSIGN (gpu_rsp_wspawn, warp_ctl_if.wspawn.valid);
`SCOPE_ASSIGN (gpu_rsp_split, warp_ctl_if.split.valid);
`SCOPE_ASSIGN (gpu_rsp_barrier, warp_ctl_if.barrier.valid);
endmodule

View file

@ -1,210 +0,0 @@
`include "VX_define.vh"
module VX_ibuffer #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// inputs
VX_decode_if.slave decode_if,
// outputs
VX_ibuffer_if.master ibuffer_if
);
`UNUSED_PARAM (CORE_ID)
localparam DATAW = `UUID_BITS + `NUM_THREADS + 32 + `EX_BITS + `INST_OP_BITS + `INST_FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1;
localparam ADDRW = $clog2(`IBUF_SIZE+1);
localparam NWARPSW = $clog2(`NUM_WARPS+1);
reg [`NUM_WARPS-1:0][ADDRW-1:0] used_r;
reg [`NUM_WARPS-1:0] full_r, empty_r, alm_empty_r;
wire [`NUM_WARPS-1:0] q_full, q_empty, q_alm_empty;
wire [DATAW-1:0] q_data_in;
wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev;
reg [`NUM_WARPS-1:0][DATAW-1:0] q_data_out;
wire enq_fire = decode_if.valid && decode_if.ready;
wire deq_fire = ibuffer_if.valid && ibuffer_if.ready;
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
wire writing = enq_fire && (i == decode_if.wid);
wire reading = deq_fire && (i == ibuffer_if.wid);
wire going_empty = empty_r[i] || (alm_empty_r[i] && reading);
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (`IBUF_SIZE),
.OUT_REG (1)
) queue (
.clk (clk),
.reset (reset),
.valid_in (writing && !going_empty),
.data_in (q_data_in),
.ready_out(reading),
.data_out (q_data_prev[i]),
`UNUSED_PIN (ready_in),
`UNUSED_PIN (valid_out)
);
always @(posedge clk) begin
if (reset) begin
used_r[i] <= 0;
full_r[i] <= 0;
empty_r[i] <= 1;
alm_empty_r[i] <= 1;
end else begin
if (writing) begin
if (!reading) begin
empty_r[i] <= 0;
if (used_r[i] == 1)
alm_empty_r[i] <= 0;
if (used_r[i] == ADDRW'(`IBUF_SIZE))
full_r[i] <= 1;
end
end else if (reading) begin
full_r[i] <= 0;
if (used_r[i] == ADDRW'(1))
empty_r[i] <= 1;
if (used_r[i] == ADDRW'(2))
alm_empty_r[i] <= 1;
end
used_r[i] <= used_r[i] + ADDRW'($signed(2'(writing) - 2'(reading)));
end
if (writing && going_empty) begin
q_data_out[i] <= q_data_in;
end else if (reading) begin
q_data_out[i] <= q_data_prev[i];
end
end
assign q_full[i] = full_r[i];
assign q_empty[i] = empty_r[i];
assign q_alm_empty[i] = alm_empty_r[i];
end
///////////////////////////////////////////////////////////////////////////
reg [`NUM_WARPS-1:0] valid_table, valid_table_n;
reg [`NW_BITS-1:0] deq_wid, deq_wid_n;
reg [`NW_BITS-1:0] deq_wid_rr, deq_wid_rr_n;
reg deq_valid, deq_valid_n;
reg [DATAW-1:0] deq_instr, deq_instr_n;
reg [NWARPSW-1:0] num_warps;
`UNUSED_VAR (deq_instr)
// calculate valid table
always @(*) begin
valid_table_n = valid_table;
if (deq_fire) begin
valid_table_n[deq_wid] = !q_alm_empty[deq_wid];
end
if (enq_fire) begin
valid_table_n[decode_if.wid] = 1;
end
end
// round-robin warp scheduling
VX_rr_arbiter #(
.NUM_REQS (`NUM_WARPS)
) rr_arbiter (
.clk (clk),
.reset (reset),
.requests (valid_table_n),
.grant_index (deq_wid_rr_n),
`UNUSED_PIN (grant_valid),
`UNUSED_PIN (grant_onehot),
`UNUSED_PIN (enable)
);
// schedule the next instruction to issue
always @(*) begin
if (num_warps > 1) begin
deq_valid_n = 1;
deq_wid_n = deq_wid_rr;
deq_instr_n = q_data_out[deq_wid_rr];
end else if (1 == num_warps && !(deq_fire && q_alm_empty[deq_wid])) begin
deq_valid_n = 1;
deq_wid_n = deq_wid;
deq_instr_n = deq_fire ? q_data_prev[deq_wid] : q_data_out[deq_wid];
end else begin
deq_valid_n = enq_fire;
deq_wid_n = decode_if.wid;
deq_instr_n = q_data_in;
end
end
wire warp_added = enq_fire && q_empty[decode_if.wid];
wire warp_removed = deq_fire && ~(enq_fire && decode_if.wid == deq_wid) && q_alm_empty[deq_wid];
always @(posedge clk) begin
if (reset) begin
valid_table <= 0;
deq_valid <= 0;
num_warps <= 0;
end else begin
valid_table <= valid_table_n;
deq_valid <= deq_valid_n;
if (warp_added && !warp_removed) begin
num_warps <= num_warps + NWARPSW'(1);
end else if (warp_removed && !warp_added) begin
num_warps <= num_warps - NWARPSW'(1);
end
end
deq_wid <= deq_wid_n;
deq_wid_rr <= deq_wid_rr_n;
deq_instr <= deq_instr_n;
end
assign decode_if.ready = ~q_full[decode_if.wid];
assign q_data_in = {decode_if.uuid,
decode_if.tmask,
decode_if.PC,
decode_if.ex_type,
decode_if.op_type,
decode_if.op_mod,
decode_if.wb,
decode_if.use_PC,
decode_if.use_imm,
decode_if.imm,
decode_if.rd,
decode_if.rs1,
decode_if.rs2,
decode_if.rs3};
assign ibuffer_if.valid = deq_valid;
assign ibuffer_if.wid = deq_wid;
assign {ibuffer_if.uuid,
ibuffer_if.tmask,
ibuffer_if.PC,
ibuffer_if.ex_type,
ibuffer_if.op_type,
ibuffer_if.op_mod,
ibuffer_if.wb,
ibuffer_if.use_PC,
ibuffer_if.use_imm,
ibuffer_if.imm,
ibuffer_if.rd,
ibuffer_if.rs1,
ibuffer_if.rs2,
ibuffer_if.rs3} = deq_instr;
// scoreboard forwarding
assign ibuffer_if.wid_n = deq_wid_n;
assign ibuffer_if.rd_n = deq_instr_n[3*`NR_BITS +: `NR_BITS];
assign ibuffer_if.rs1_n = deq_instr_n[2*`NR_BITS +: `NR_BITS];
assign ibuffer_if.rs2_n = deq_instr_n[1*`NR_BITS +: `NR_BITS];
assign ibuffer_if.rs3_n = deq_instr_n[0*`NR_BITS +: `NR_BITS];
endmodule

View file

@ -1,102 +0,0 @@
`include "VX_define.vh"
module VX_icache_stage #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_icache_stage
input wire clk,
input wire reset,
// Icache interface
VX_icache_req_if.master icache_req_if,
VX_icache_rsp_if.slave icache_rsp_if,
// request
VX_ifetch_req_if.slave ifetch_req_if,
// reponse
VX_ifetch_rsp_if.master ifetch_rsp_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (reset)
localparam OUT_REG = 0;
wire [`NW_BITS-1:0] req_tag, rsp_tag;
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
assign req_tag = ifetch_req_if.wid;
assign rsp_tag = icache_rsp_if.tag[`NW_BITS-1:0];
wire [`UUID_BITS-1:0] rsp_uuid;
wire [31:0] rsp_PC;
wire [`NUM_THREADS-1:0] rsp_tmask;
VX_dp_ram #(
.DATAW (32 + `NUM_THREADS + `UUID_BITS),
.SIZE (`NUM_WARPS),
.LUTRAM (1)
) req_metadata (
.clk (clk),
.wren (icache_req_fire),
.waddr (req_tag),
.wdata ({ifetch_req_if.PC, ifetch_req_if.tmask, ifetch_req_if.uuid}),
.raddr (rsp_tag),
.rdata ({rsp_PC, rsp_tmask, rsp_uuid})
);
`RUNTIME_ASSERT((!ifetch_req_if.valid || ifetch_req_if.PC >= `STARTUP_ADDR),
("%t: *** invalid PC=%0h, wid=%0d, tmask=%b (#%0d)", $time, ifetch_req_if.PC, ifetch_req_if.wid, ifetch_req_if.tmask, ifetch_req_if.uuid))
// Icache Request
assign icache_req_if.valid = ifetch_req_if.valid;
assign icache_req_if.addr = ifetch_req_if.PC[31:2];
assign icache_req_if.tag = {ifetch_req_if.uuid, req_tag};
// Can accept new request?
assign ifetch_req_if.ready = icache_req_if.ready;
wire [`NW_BITS-1:0] rsp_wid = rsp_tag;
wire stall_out = ~ifetch_rsp_if.ready && (0 == OUT_REG && ifetch_rsp_if.valid);
VX_pipe_register #(
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + 32 + `UUID_BITS),
.RESETW (1),
.DEPTH (OUT_REG)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (!stall_out),
.data_in ({icache_rsp_if.valid, rsp_wid, rsp_tmask, rsp_PC, icache_rsp_if.data, rsp_uuid}),
.data_out ({ifetch_rsp_if.valid, ifetch_rsp_if.wid, ifetch_rsp_if.tmask, ifetch_rsp_if.PC, ifetch_rsp_if.data, ifetch_rsp_if.uuid})
);
// Can accept new response?
assign icache_rsp_if.ready = ~stall_out;
`SCOPE_ASSIGN (icache_req_fire, icache_req_fire);
`SCOPE_ASSIGN (icache_req_uuid, ifetch_req_if.uuid);
`SCOPE_ASSIGN (icache_req_addr, {icache_req_if.addr, 2'b0});
`SCOPE_ASSIGN (icache_req_tag, req_tag);
`SCOPE_ASSIGN (icache_rsp_fire, icache_rsp_if.valid && icache_rsp_if.ready);
`SCOPE_ASSIGN (icache_rsp_uuid, rsp_uuid);
`SCOPE_ASSIGN (icache_rsp_data, icache_rsp_if.data);
`SCOPE_ASSIGN (icache_rsp_tag, rsp_tag);
`ifdef DBG_TRACE_CORE_ICACHE
always @(posedge clk) begin
if (icache_req_fire) begin
dpi_trace("%d: I$%0d req: wid=%0d, PC=%0h (#%0d)\n", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.PC, ifetch_req_if.uuid);
end
if (ifetch_rsp_if.valid && ifetch_rsp_if.ready) begin
dpi_trace("%d: I$%0d rsp: wid=%0d, PC=%0h, data=%0h (#%0d)\n", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.PC, ifetch_rsp_if.data, ifetch_rsp_if.uuid);
end
end
`endif
endmodule

View file

@ -1,68 +0,0 @@
`include "VX_platform.vh"
module VX_ipdom_stack #(
parameter WIDTH = 1,
parameter DEPTH = 1
) (
input wire clk,
input wire reset,
input wire pair,
input wire [WIDTH - 1:0] q1,
input wire [WIDTH - 1:0] q2,
output wire [WIDTH - 1:0] d,
input wire push,
input wire pop,
output wire index,
output wire empty,
output wire full
);
localparam ADDRW = $clog2(DEPTH);
reg is_part [DEPTH-1:0];
reg [ADDRW-1:0] rd_ptr, wr_ptr;
wire [WIDTH-1:0] d1, d2;
always @(posedge clk) begin
if (reset) begin
rd_ptr <= 0;
wr_ptr <= 0;
end else begin
if (push) begin
rd_ptr <= wr_ptr;
wr_ptr <= wr_ptr + ADDRW'(1);
end else if (pop) begin
wr_ptr <= wr_ptr - ADDRW'(is_part[rd_ptr]);
rd_ptr <= rd_ptr - ADDRW'(is_part[rd_ptr]);
end
end
end
VX_dp_ram #(
.DATAW (WIDTH * 2),
.SIZE (DEPTH),
.LUTRAM (1)
) store (
.clk (clk),
.wren (push),
.waddr (wr_ptr),
.wdata ({q2, q1}),
.raddr (rd_ptr),
.rdata ({d2, d1})
);
always @(posedge clk) begin
if (push) begin
is_part[wr_ptr] <= ~pair;
end else if (pop) begin
is_part[rd_ptr] <= 1;
end
end
assign index = is_part[rd_ptr];
assign d = index ? d1 : d2;
assign empty = (ADDRW'(0) == wr_ptr);
assign full = (ADDRW'(DEPTH-1) == wr_ptr);
endmodule

View file

@ -1,256 +0,0 @@
`include "VX_define.vh"
module VX_issue #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_issue
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
VX_perf_pipeline_if.issue perf_issue_if,
`endif
VX_decode_if.slave decode_if,
VX_writeback_if.slave writeback_if,
VX_alu_req_if.master alu_req_if,
VX_lsu_req_if.master lsu_req_if,
VX_csr_req_if.master csr_req_if,
`ifdef EXT_F_ENABLE
VX_fpu_req_if.master fpu_req_if,
`endif
VX_gpu_req_if.master gpu_req_if
);
VX_ibuffer_if ibuffer_if();
VX_gpr_req_if gpr_req_if();
VX_gpr_rsp_if gpr_rsp_if();
VX_writeback_if sboard_wb_if();
VX_ibuffer_if scoreboard_if();
VX_ibuffer_if dispatch_if();
// GPR request interface
assign gpr_req_if.wid = ibuffer_if.wid;
assign gpr_req_if.rs1 = ibuffer_if.rs1;
assign gpr_req_if.rs2 = ibuffer_if.rs2;
assign gpr_req_if.rs3 = ibuffer_if.rs3;
// scoreboard writeback interface
assign sboard_wb_if.valid = writeback_if.valid;
assign sboard_wb_if.uuid = writeback_if.uuid;
assign sboard_wb_if.wid = writeback_if.wid;
assign sboard_wb_if.PC = writeback_if.PC;
assign sboard_wb_if.rd = writeback_if.rd;
assign sboard_wb_if.eop = writeback_if.eop;
// scoreboard interface
assign scoreboard_if.valid = ibuffer_if.valid && dispatch_if.ready;
assign scoreboard_if.uuid = ibuffer_if.uuid;
assign scoreboard_if.wid = ibuffer_if.wid;
assign scoreboard_if.PC = ibuffer_if.PC;
assign scoreboard_if.wb = ibuffer_if.wb;
assign scoreboard_if.rd = ibuffer_if.rd;
assign scoreboard_if.rd_n = ibuffer_if.rd_n;
assign scoreboard_if.rs1_n = ibuffer_if.rs1_n;
assign scoreboard_if.rs2_n = ibuffer_if.rs2_n;
assign scoreboard_if.rs3_n = ibuffer_if.rs3_n;
assign scoreboard_if.wid_n = ibuffer_if.wid_n;
// dispatch interface
assign dispatch_if.valid = ibuffer_if.valid && scoreboard_if.ready;
assign dispatch_if.uuid = ibuffer_if.uuid;
assign dispatch_if.wid = ibuffer_if.wid;
assign dispatch_if.tmask = ibuffer_if.tmask;
assign dispatch_if.PC = ibuffer_if.PC;
assign dispatch_if.ex_type = ibuffer_if.ex_type;
assign dispatch_if.op_type = ibuffer_if.op_type;
assign dispatch_if.op_mod = ibuffer_if.op_mod;
assign dispatch_if.wb = ibuffer_if.wb;
assign dispatch_if.rd = ibuffer_if.rd;
assign dispatch_if.rs1 = ibuffer_if.rs1;
assign dispatch_if.imm = ibuffer_if.imm;
assign dispatch_if.use_PC = ibuffer_if.use_PC;
assign dispatch_if.use_imm = ibuffer_if.use_imm;
// issue the instruction
assign ibuffer_if.ready = scoreboard_if.ready && dispatch_if.ready;
`RESET_RELAY (ibuf_reset);
`RESET_RELAY (scoreboard_reset);
`RESET_RELAY (gpr_reset);
`RESET_RELAY (dispatch_reset);
VX_ibuffer #(
.CORE_ID(CORE_ID)
) ibuffer (
.clk (clk),
.reset (ibuf_reset),
.decode_if (decode_if),
.ibuffer_if (ibuffer_if)
);
VX_scoreboard #(
.CORE_ID(CORE_ID)
) scoreboard (
.clk (clk),
.reset (scoreboard_reset),
.writeback_if(sboard_wb_if),
.ibuffer_if (scoreboard_if)
);
VX_gpr_stage #(
.CORE_ID(CORE_ID)
) gpr_stage (
.clk (clk),
.reset (gpr_reset),
.writeback_if (writeback_if),
.gpr_req_if (gpr_req_if),
.gpr_rsp_if (gpr_rsp_if)
);
VX_dispatch dispatch (
.clk (clk),
.reset (dispatch_reset),
.ibuffer_if (dispatch_if),
.gpr_rsp_if (gpr_rsp_if),
.alu_req_if (alu_req_if),
.lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if),
`ifdef EXT_F_ENABLE
.fpu_req_if (fpu_req_if),
`endif
.gpu_req_if (gpu_req_if)
);
`SCOPE_ASSIGN (issue_fire, ibuffer_if.valid && ibuffer_if.ready);
`SCOPE_ASSIGN (issue_uuid, ibuffer_if.uuid);
`SCOPE_ASSIGN (issue_tmask, ibuffer_if.tmask);
`SCOPE_ASSIGN (issue_ex_type, ibuffer_if.ex_type);
`SCOPE_ASSIGN (issue_op_type, ibuffer_if.op_type);
`SCOPE_ASSIGN (issue_op_mod, ibuffer_if.op_mod);
`SCOPE_ASSIGN (issue_wb, ibuffer_if.wb);
`SCOPE_ASSIGN (issue_rd, ibuffer_if.rd);
`SCOPE_ASSIGN (issue_rs1, ibuffer_if.rs1);
`SCOPE_ASSIGN (issue_rs2, ibuffer_if.rs2);
`SCOPE_ASSIGN (issue_rs3, ibuffer_if.rs3);
`SCOPE_ASSIGN (issue_imm, ibuffer_if.imm);
`SCOPE_ASSIGN (issue_use_pc, ibuffer_if.use_PC);
`SCOPE_ASSIGN (issue_use_imm, ibuffer_if.use_imm);
`SCOPE_ASSIGN (scoreboard_delay, !scoreboard_if.ready);
`SCOPE_ASSIGN (dispatch_delay, !dispatch_if.ready);
`SCOPE_ASSIGN (gpr_rs1, gpr_rsp_if.rs1_data);
`SCOPE_ASSIGN (gpr_rs2, gpr_rsp_if.rs2_data);
`SCOPE_ASSIGN (gpr_rs3, gpr_rsp_if.rs3_data);
`SCOPE_ASSIGN (writeback_valid, writeback_if.valid);
`SCOPE_ASSIGN (writeback_uuid, writeback_if.uuid);
`SCOPE_ASSIGN (writeback_tmask, writeback_if.tmask);
`SCOPE_ASSIGN (writeback_rd, writeback_if.rd);
`SCOPE_ASSIGN (writeback_data, writeback_if.data);
`SCOPE_ASSIGN (writeback_eop, writeback_if.eop);
`ifdef PERF_ENABLE
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;
reg [`PERF_CTR_BITS-1:0] perf_scb_stalls;
reg [`PERF_CTR_BITS-1:0] perf_alu_stalls;
reg [`PERF_CTR_BITS-1:0] perf_lsu_stalls;
reg [`PERF_CTR_BITS-1:0] perf_csr_stalls;
reg [`PERF_CTR_BITS-1:0] perf_gpu_stalls;
`ifdef EXT_F_ENABLE
reg [`PERF_CTR_BITS-1:0] perf_fpu_stalls;
`endif
always @(posedge clk) begin
if (reset) begin
perf_ibf_stalls <= 0;
perf_scb_stalls <= 0;
perf_alu_stalls <= 0;
perf_lsu_stalls <= 0;
perf_csr_stalls <= 0;
perf_gpu_stalls <= 0;
`ifdef EXT_F_ENABLE
perf_fpu_stalls <= 0;
`endif
end else begin
if (decode_if.valid & ~decode_if.ready) begin
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'd1;
end
if (scoreboard_if.valid & ~scoreboard_if.ready) begin
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'd1;
end
if (dispatch_if.valid & ~dispatch_if.ready) begin
case (dispatch_if.ex_type)
`EX_ALU: perf_alu_stalls <= perf_alu_stalls + `PERF_CTR_BITS'd1;
`ifdef EXT_F_ENABLE
`EX_FPU: perf_fpu_stalls <= perf_fpu_stalls + `PERF_CTR_BITS'd1;
`endif
`EX_LSU: perf_lsu_stalls <= perf_lsu_stalls + `PERF_CTR_BITS'd1;
`EX_CSR: perf_csr_stalls <= perf_csr_stalls + `PERF_CTR_BITS'd1;
//`EX_GPU:
default: perf_gpu_stalls <= perf_gpu_stalls + `PERF_CTR_BITS'd1;
endcase
end
end
end
assign perf_issue_if.ibf_stalls = perf_ibf_stalls;
assign perf_issue_if.scb_stalls = perf_scb_stalls;
assign perf_issue_if.alu_stalls = perf_alu_stalls;
assign perf_issue_if.lsu_stalls = perf_lsu_stalls;
assign perf_issue_if.csr_stalls = perf_csr_stalls;
assign perf_issue_if.gpu_stalls = perf_gpu_stalls;
`ifdef EXT_F_ENABLE
assign perf_issue_if.fpu_stalls = perf_fpu_stalls;
`endif
`endif
`ifdef DBG_TRACE_CORE_PIPELINE
always @(posedge clk) begin
if (alu_req_if.valid && alu_req_if.ready) begin
dpi_trace("%d: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=",
$time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd);
`TRACE_ARRAY1D(alu_req_if.rs1_data, `NUM_THREADS);
dpi_trace(", rs2_data=");
`TRACE_ARRAY1D(alu_req_if.rs2_data, `NUM_THREADS);
dpi_trace(" (#%0d)\n", alu_req_if.uuid);
end
if (lsu_req_if.valid && lsu_req_if.ready) begin
dpi_trace("%d: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, offset=%0h, addr=",
$time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.offset);
`TRACE_ARRAY1D(lsu_req_if.base_addr, `NUM_THREADS);
dpi_trace(", data=");
`TRACE_ARRAY1D(lsu_req_if.store_data, `NUM_THREADS);
dpi_trace(" (#%0d)\n", lsu_req_if.uuid);
end
if (csr_req_if.valid && csr_req_if.ready) begin
dpi_trace("%d: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=",
$time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr);
`TRACE_ARRAY1D(csr_req_if.rs1_data, `NUM_THREADS);
dpi_trace(" (#%0d)\n", csr_req_if.uuid);
end
`ifdef EXT_F_ENABLE
if (fpu_req_if.valid && fpu_req_if.ready) begin
dpi_trace("%d: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=",
$time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd);
`TRACE_ARRAY1D(fpu_req_if.rs1_data, `NUM_THREADS);
dpi_trace(", rs2_data=");
`TRACE_ARRAY1D(fpu_req_if.rs2_data, `NUM_THREADS);
dpi_trace(", rs3_data=");
`TRACE_ARRAY1D(fpu_req_if.rs3_data, `NUM_THREADS);
dpi_trace(" (#%0d)\n", fpu_req_if.uuid);
end
`endif
if (gpu_req_if.valid && gpu_req_if.ready) begin
dpi_trace("%d: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=",
$time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd);
`TRACE_ARRAY1D(gpu_req_if.rs1_data, `NUM_THREADS);
dpi_trace(", rs2_data=");
`TRACE_ARRAY1D(gpu_req_if.rs2_data, `NUM_THREADS);
dpi_trace(", rs3_data=");
`TRACE_ARRAY1D(gpu_req_if.rs3_data, `NUM_THREADS);
dpi_trace(" (#%0d)\n", gpu_req_if.uuid);
end
end
`endif
endmodule

View file

@ -1,372 +0,0 @@
`include "VX_define.vh"
module VX_lsu_unit #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_lsu_unit
input wire clk,
input wire reset,
// Dcache interface
VX_dcache_req_if.master dcache_req_if,
VX_dcache_rsp_if.slave dcache_rsp_if,
// inputs
VX_lsu_req_if.slave lsu_req_if,
// outputs
VX_commit_if.master ld_commit_if,
VX_commit_if.master st_commit_if
);
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
localparam MEM_ADDRW = 32 - MEM_ASHIFT;
localparam REQ_ASHIFT = `CLOG2(`DCACHE_WORD_SIZE);
`STATIC_ASSERT(0 == (`IO_BASE_ADDR % MEM_ASHIFT), ("invalid parameter"))
`STATIC_ASSERT(0 == (`SMEM_BASE_ADDR % MEM_ASHIFT), ("invalid parameter"))
`STATIC_ASSERT(`SMEM_SIZE == `MEM_BLOCK_SIZE * (`SMEM_SIZE / `MEM_BLOCK_SIZE), ("invalid parameter"))
wire req_valid;
wire [`UUID_BITS-1:0] req_uuid;
wire [`NUM_THREADS-1:0] req_tmask;
wire [`NUM_THREADS-1:0][31:0] req_addr;
wire [`INST_LSU_BITS-1:0] req_type;
wire [`NUM_THREADS-1:0][31:0] req_data;
wire [`NR_BITS-1:0] req_rd;
wire req_wb;
wire [`NW_BITS-1:0] req_wid;
wire [31:0] req_pc;
wire req_is_dup;
wire req_is_prefetch;
wire mbuf_empty;
wire [`NUM_THREADS-1:0][`CACHE_ADDR_TYPE_BITS-1:0] lsu_addr_type, req_addr_type;
// full address calculation
wire [`NUM_THREADS-1:0][31:0] full_addr;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign full_addr[i] = lsu_req_if.base_addr[i] + lsu_req_if.offset;
end
// detect duplicate addresses
wire [`NUM_THREADS-2:0] addr_matches;
for (genvar i = 0; i < (`NUM_THREADS-1); i++) begin
assign addr_matches[i] = (lsu_req_if.base_addr[i+1] == lsu_req_if.base_addr[0]) || ~lsu_req_if.tmask[i+1];
end
wire lsu_is_dup = lsu_req_if.tmask[0] && (& addr_matches);
for (genvar i = 0; i < `NUM_THREADS; i++) begin
// is non-cacheable address
wire is_addr_nc = (full_addr[i][MEM_ASHIFT +: MEM_ADDRW] >= MEM_ADDRW'(`IO_BASE_ADDR >> MEM_ASHIFT));
if (`SM_ENABLE) begin
// is shared memory address
wire is_addr_sm = (full_addr[i][MEM_ASHIFT +: MEM_ADDRW] >= MEM_ADDRW'((`SMEM_BASE_ADDR - `SMEM_SIZE) >> MEM_ASHIFT))
& (full_addr[i][MEM_ASHIFT +: MEM_ADDRW] < MEM_ADDRW'(`SMEM_BASE_ADDR >> MEM_ASHIFT));
assign lsu_addr_type[i] = {is_addr_nc, is_addr_sm};
end else begin
assign lsu_addr_type[i] = is_addr_nc;
end
end
// fence stalls the pipeline until all pending requests are sent
wire fence_wait = lsu_req_if.is_fence && (req_valid || !mbuf_empty);
wire ready_in;
wire stall_in = ~ready_in && req_valid;
wire lsu_valid = lsu_req_if.valid && ~fence_wait;
wire lsu_wb = lsu_req_if.wb | lsu_req_if.is_prefetch;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * `CACHE_ADDR_TYPE_BITS) + `INST_LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.RESETW (1)
) req_pipe_reg (
.clk (clk),
.reset (reset),
.enable (!stall_in),
.data_in ({lsu_valid, lsu_is_dup, lsu_req_if.is_prefetch, lsu_req_if.uuid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, full_addr, lsu_addr_type, lsu_req_if.op_type, lsu_req_if.rd, lsu_wb, lsu_req_if.store_data}),
.data_out ({req_valid, req_is_dup, req_is_prefetch, req_uuid, req_wid, req_tmask, req_pc, req_addr, req_addr_type, req_type, req_rd, req_wb, req_data})
);
// Can accept new request?
assign lsu_req_if.ready = ~stall_in && ~fence_wait;
wire [`UUID_BITS-1:0] rsp_uuid;
wire [`NW_BITS-1:0] rsp_wid;
wire [31:0] rsp_pc;
wire [`NR_BITS-1:0] rsp_rd;
wire rsp_wb;
wire [`INST_LSU_BITS-1:0] rsp_type;
wire rsp_is_dup;
wire rsp_is_prefetch;
reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] rsp_rem_mask;
wire [`NUM_THREADS-1:0] rsp_rem_mask_n;
wire [`NUM_THREADS-1:0] rsp_tmask;
reg [`NUM_THREADS-1:0] req_sent_mask;
reg is_req_start;
wire [`LSUQ_ADDR_BITS-1:0] mbuf_waddr, mbuf_raddr;
wire mbuf_full;
`UNUSED_VAR (rsp_type)
`UNUSED_VAR (rsp_is_prefetch)
wire [`NUM_THREADS-1:0][REQ_ASHIFT-1:0] req_offset, rsp_offset;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign req_offset[i] = req_addr[i][1:0];
end
wire [`NUM_THREADS-1:0] dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready;
wire dcache_rsp_fire = dcache_rsp_if.valid && dcache_rsp_if.ready;
wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
wire mbuf_push = ~mbuf_full
&& (| ({`NUM_THREADS{req_valid}} & req_tmask_dup & dcache_req_if.ready))
&& is_req_start // first submission only
&& req_wb; // loads only
wire mbuf_pop = dcache_rsp_fire && (0 == rsp_rem_mask_n);
assign mbuf_raddr = dcache_rsp_if.tag[`CACHE_ADDR_TYPE_BITS +: `LSUQ_ADDR_BITS];
`UNUSED_VAR (dcache_rsp_if.tag)
// do not writeback from software prefetch
wire req_wb2 = req_wb && ~req_is_prefetch;
VX_index_buffer #(
.DATAW (`UUID_BITS + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `INST_LSU_BITS + (`NUM_THREADS * REQ_ASHIFT) + 1 + 1),
.SIZE (`LSUQ_SIZE)
) req_metadata (
.clk (clk),
.reset (reset),
.write_addr (mbuf_waddr),
.acquire_slot (mbuf_push),
.read_addr (mbuf_raddr),
.write_data ({req_uuid, req_wid, req_pc, req_tmask, req_rd, req_wb2, req_type, req_offset, req_is_dup, req_is_prefetch}),
.read_data ({rsp_uuid, rsp_wid, rsp_pc, rsp_tmask, rsp_rd, rsp_wb, rsp_type, rsp_offset, rsp_is_dup, rsp_is_prefetch}),
.release_addr (mbuf_raddr),
.release_slot (mbuf_pop),
.full (mbuf_full),
.empty (mbuf_empty)
);
wire dcache_req_ready = &(dcache_req_if.ready | req_sent_mask | ~req_tmask_dup);
wire [`NUM_THREADS-1:0] req_sent_mask_n = req_sent_mask | dcache_req_fire;
always @(posedge clk) begin
if (reset) begin
req_sent_mask <= 0;
is_req_start <= 1;
end else begin
if (dcache_req_ready) begin
req_sent_mask <= 0;
is_req_start <= 1;
end else begin
req_sent_mask <= req_sent_mask_n;
is_req_start <= (0 == req_sent_mask_n);
end
end
end
// need to hold the acquired tag index until the full request is submitted
reg [`LSUQ_ADDR_BITS-1:0] req_tag_hold;
wire [`LSUQ_ADDR_BITS-1:0] req_tag = is_req_start ? mbuf_waddr : req_tag_hold;
always @(posedge clk) begin
if (mbuf_push) begin
req_tag_hold <= mbuf_waddr;
end
end
assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.tmask;
always @(posedge clk) begin
if (mbuf_push) begin
rsp_rem_mask[mbuf_waddr] <= req_tmask_dup;
end
if (dcache_rsp_fire) begin
rsp_rem_mask[mbuf_raddr] <= rsp_rem_mask_n;
end
end
// ensure all dependencies for the requests are resolved
wire req_dep_ready = (req_wb && ~(mbuf_full && is_req_start))
|| (~req_wb && st_commit_if.ready);
// DCache Request
for (genvar i = 0; i < `NUM_THREADS; i++) begin
reg [3:0] mem_req_byteen;
reg [31:0] mem_req_data;
always @(*) begin
mem_req_byteen = {4{req_wb}};
case (`INST_LSU_WSIZE(req_type))
0: mem_req_byteen[req_offset[i]] = 1;
1: begin
mem_req_byteen[req_offset[i]] = 1;
mem_req_byteen[{req_offset[i][1], 1'b1}] = 1;
end
default : mem_req_byteen = {4{1'b1}};
endcase
end
always @(*) begin
mem_req_data = req_data[i];
case (req_offset[i])
1: mem_req_data[31:8] = req_data[i][23:0];
2: mem_req_data[31:16] = req_data[i][15:0];
3: mem_req_data[31:24] = req_data[i][7:0];
default:;
endcase
end
assign dcache_req_if.valid[i] = req_valid && req_dep_ready && req_tmask_dup[i] && !req_sent_mask[i];
assign dcache_req_if.rw[i] = ~req_wb;
assign dcache_req_if.addr[i] = req_addr[i][31:2];
assign dcache_req_if.byteen[i] = mem_req_byteen;
assign dcache_req_if.data[i] = mem_req_data;
assign dcache_req_if.tag[i] = {req_uuid, `LSU_TAG_ID_BITS'(req_tag), req_addr_type[i]};
end
assign ready_in = req_dep_ready && dcache_req_ready;
// send store commit
wire is_store_rsp = req_valid && ~req_wb && dcache_req_ready;
assign st_commit_if.valid = is_store_rsp;
assign st_commit_if.uuid = req_uuid;
assign st_commit_if.wid = req_wid;
assign st_commit_if.tmask = req_tmask;
assign st_commit_if.PC = req_pc;
assign st_commit_if.rd = 0;
assign st_commit_if.wb = 0;
assign st_commit_if.eop = 1'b1;
assign st_commit_if.data = 0;
// load response formatting
reg [`NUM_THREADS-1:0][31:0] rsp_data;
wire [`NUM_THREADS-1:0] rsp_tmask_qual;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [31:0] rsp_data32 = (i == 0 || rsp_is_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i];
wire [15:0] rsp_data16 = rsp_offset[i][1] ? rsp_data32[31:16] : rsp_data32[15:0];
wire [7:0] rsp_data8 = rsp_offset[i][0] ? rsp_data16[15:8] : rsp_data16[7:0];
always @(*) begin
case (`INST_LSU_FMT(rsp_type))
`INST_FMT_B: rsp_data[i] = 32'(signed'(rsp_data8));
`INST_FMT_H: rsp_data[i] = 32'(signed'(rsp_data16));
`INST_FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data8));
`INST_FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data16));
default: rsp_data[i] = rsp_data32;
endcase
end
end
assign rsp_tmask_qual = rsp_is_dup ? rsp_tmask : dcache_rsp_if.tmask;
// send load commit
wire load_rsp_stall = ~ld_commit_if.ready && ld_commit_if.valid;
VX_pipe_register #(
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1),
.RESETW (1)
) rsp_pipe_reg (
.clk (clk),
.reset (reset),
.enable (!load_rsp_stall),
.data_in ({dcache_rsp_if.valid, rsp_uuid, rsp_wid, rsp_tmask_qual, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
.data_out ({ld_commit_if.valid, ld_commit_if.uuid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
);
// Can accept new cache response?
assign dcache_rsp_if.ready = ~load_rsp_stall;
// scope registration
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_fire);
`SCOPE_ASSIGN (dcache_req_uuid, req_uuid);
`SCOPE_ASSIGN (dcache_req_addr, req_addr);
`SCOPE_ASSIGN (dcache_req_rw, ~req_wb);
`SCOPE_ASSIGN (dcache_req_byteen,dcache_req_if.byteen);
`SCOPE_ASSIGN (dcache_req_data, dcache_req_if.data);
`SCOPE_ASSIGN (dcache_req_tag, req_tag);
`SCOPE_ASSIGN (dcache_rsp_fire, dcache_rsp_if.tmask & {`NUM_THREADS{dcache_rsp_fire}});
`SCOPE_ASSIGN (dcache_rsp_uuid, rsp_uuid);
`SCOPE_ASSIGN (dcache_rsp_data, dcache_rsp_if.data);
`SCOPE_ASSIGN (dcache_rsp_tag, mbuf_raddr);
`ifndef SYNTHESIS
reg [`LSUQ_SIZE-1:0][(`NW_BITS + 32 + `NR_BITS + `UUID_BITS + 64 + 1)-1:0] pending_reqs;
wire [63:0] delay_timeout = 10000 * (1 ** (`L2_ENABLE + `L3_ENABLE));
always @(posedge clk) begin
if (reset) begin
pending_reqs <= '0;
end begin
if (mbuf_push) begin
pending_reqs[mbuf_waddr] <= {req_wid, req_pc, req_rd, req_uuid, $time, 1'b1};
end
if (mbuf_pop) begin
pending_reqs[mbuf_raddr] <= '0;
end
end
for (integer i = 0; i < `LSUQ_SIZE; ++i) begin
if (pending_reqs[i][0]) begin
`ASSERT(($time - pending_reqs[i][1 +: 64]) < delay_timeout,
("%t: *** D$%0d response timeout: remaining=%b, wid=%0d, PC=%0h, rd=%0d (#%0d)",
$time, CORE_ID, rsp_rem_mask[i], pending_reqs[i][1+64+`UUID_BITS+`NR_BITS+32 +: `NW_BITS],
pending_reqs[i][1+64+`UUID_BITS+`NR_BITS +: 32],
pending_reqs[i][1+64+`UUID_BITS +: `NR_BITS],
pending_reqs[i][1+64 +: `UUID_BITS]));
end
end
end
`endif
`ifdef DBG_TRACE_CORE_DCACHE
wire dcache_req_fire_any = (| dcache_req_fire);
always @(posedge clk) begin
if (lsu_req_if.valid && fence_wait) begin
dpi_trace("%d: *** D$%0d fence wait\n", $time, CORE_ID);
end
if (dcache_req_fire_any) begin
if (dcache_req_if.rw[0]) begin
dpi_trace("%d: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire);
`TRACE_ARRAY1D(req_addr, `NUM_THREADS);
dpi_trace(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen);
`TRACE_ARRAY1D(req_addr_type, `NUM_THREADS);
dpi_trace(", data=");
`TRACE_ARRAY1D(dcache_req_if.data, `NUM_THREADS);
dpi_trace(", (#%0d)\n", req_uuid);
end else begin
dpi_trace("%d: D$%0d Rd Req: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_is_prefetch, req_wid, req_pc, dcache_req_fire);
`TRACE_ARRAY1D(req_addr, `NUM_THREADS);
dpi_trace(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen);
`TRACE_ARRAY1D(req_addr_type, `NUM_THREADS);
dpi_trace(", rd=%0d, is_dup=%b (#%0d)\n", req_rd, req_is_dup, req_uuid);
end
end
if (dcache_rsp_fire) begin
dpi_trace("%d: D$%0d Rsp: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, tag=%0h, rd=%0d, data=",
$time, CORE_ID, rsp_is_prefetch, rsp_wid, rsp_pc, dcache_rsp_if.tmask, mbuf_raddr, rsp_rd);
`TRACE_ARRAY1D(dcache_rsp_if.data, `NUM_THREADS);
dpi_trace(", is_dup=%b (#%0d)\n", rsp_is_dup, rsp_uuid);
end
end
`endif
endmodule

View file

@ -1,146 +0,0 @@
`include "VX_define.vh"
module VX_mem_arb #(
parameter NUM_REQS = 1,
parameter DATA_WIDTH = 1,
parameter ADDR_WIDTH = 1,
parameter TAG_IN_WIDTH = 1,
parameter TAG_SEL_IDX = 0,
parameter BUFFERED_REQ = 0,
parameter BUFFERED_RSP = 0,
parameter TYPE = "P",
parameter DATA_SIZE = (DATA_WIDTH / 8),
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS),
parameter TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS
) (
input wire clk,
input wire reset,
// input requests
input wire [NUM_REQS-1:0] req_valid_in,
input wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] req_addr_in,
input wire [NUM_REQS-1:0] req_rw_in,
input wire [NUM_REQS-1:0][DATA_SIZE-1:0] req_byteen_in,
input wire [NUM_REQS-1:0][DATA_WIDTH-1:0] req_data_in,
output wire [NUM_REQS-1:0] req_ready_in,
// output request
output wire req_valid_out,
output wire [TAG_OUT_WIDTH-1:0] req_tag_out,
output wire [ADDR_WIDTH-1:0] req_addr_out,
output wire req_rw_out,
output wire [DATA_SIZE-1:0] req_byteen_out,
output wire [DATA_WIDTH-1:0] req_data_out,
input wire req_ready_out,
// input response
input wire rsp_valid_in,
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
input wire [DATA_WIDTH-1:0] rsp_data_in,
output wire rsp_ready_in,
// output responses
output wire [NUM_REQS-1:0] rsp_valid_out,
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] rsp_data_out,
input wire [NUM_REQS-1:0] rsp_ready_out
);
localparam REQ_DATAW = TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam RSP_DATAW = TAG_IN_WIDTH + DATA_WIDTH;
if (NUM_REQS > 1) begin
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in_merged;
for (genvar i = 0; i < NUM_REQS; i++) begin
wire [TAG_OUT_WIDTH-1:0] req_tag_in_w;
VX_bits_insert #(
.N (TAG_IN_WIDTH),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
) bits_insert (
.data_in (req_tag_in[i]),
.sel_in (LOG_NUM_REQS'(i)),
.data_out (req_tag_in_w)
);
assign req_data_in_merged[i] = {req_tag_in_w, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
end
VX_stream_arbiter #(
.NUM_REQS (NUM_REQS),
.DATAW (REQ_DATAW),
.BUFFERED (BUFFERED_REQ),
.TYPE (TYPE)
) req_arb (
.clk (clk),
.reset (reset),
.valid_in (req_valid_in),
.data_in (req_data_in_merged),
.ready_in (req_ready_in),
.valid_out (req_valid_out),
.data_out ({req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}),
.ready_out (req_ready_out)
);
///////////////////////////////////////////////////////////////////////
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out_merged;
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[TAG_SEL_IDX +: LOG_NUM_REQS];
wire [TAG_IN_WIDTH-1:0] rsp_tag_in_w;
VX_bits_remove #(
.N (TAG_OUT_WIDTH),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
) bits_remove (
.data_in (rsp_tag_in),
.data_out (rsp_tag_in_w)
);
VX_stream_demux #(
.NUM_REQS (NUM_REQS),
.DATAW (RSP_DATAW),
.BUFFERED (BUFFERED_RSP)
) rsp_demux (
.clk (clk),
.reset (reset),
.sel_in (rsp_sel),
.valid_in (rsp_valid_in),
.data_in ({rsp_tag_in_w, rsp_data_in}),
.ready_in (rsp_ready_in),
.valid_out (rsp_valid_out),
.data_out (rsp_data_out_merged),
.ready_out (rsp_ready_out)
);
for (genvar i = 0; i < NUM_REQS; i++) begin
assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_data_out_merged[i];
end
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign req_valid_out = req_valid_in;
assign req_tag_out = req_tag_in;
assign req_addr_out = req_addr_in;
assign req_rw_out = req_rw_in;
assign req_byteen_out = req_byteen_in;
assign req_data_out = req_data_in;
assign req_ready_in = req_ready_out;
assign rsp_valid_out = rsp_valid_in;
assign rsp_tag_out = rsp_tag_in;
assign rsp_data_out = rsp_data_in;
assign rsp_ready_in = rsp_ready_out;
end
endmodule

View file

@ -1,420 +0,0 @@
`include "VX_define.vh"
module VX_mem_unit # (
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_mem_unit
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
VX_perf_memsys_if.master perf_memsys_if,
`endif
// Core <-> Dcache
VX_dcache_req_if.slave dcache_req_if,
VX_dcache_rsp_if.master dcache_rsp_if,
// Core <-> Icache
VX_icache_req_if.slave icache_req_if,
VX_icache_rsp_if.master icache_rsp_if,
// Memory
VX_mem_req_if.master mem_req_if,
VX_mem_rsp_if.slave mem_rsp_if
);
`ifdef PERF_ENABLE
VX_perf_cache_if perf_icache_if(), perf_dcache_if(), perf_smem_if();
`endif
VX_mem_req_if #(
.DATA_WIDTH (`ICACHE_MEM_DATA_WIDTH),
.ADDR_WIDTH (`ICACHE_MEM_ADDR_WIDTH),
.TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH)
) icache_mem_req_if();
VX_mem_rsp_if #(
.DATA_WIDTH (`ICACHE_MEM_DATA_WIDTH),
.TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH)
) icache_mem_rsp_if();
VX_mem_req_if #(
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
.TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH)
) dcache_mem_req_if();
VX_mem_rsp_if #(
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
.TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH)
) dcache_mem_rsp_if();
VX_dcache_req_if #(
.NUM_REQS (`DCACHE_NUM_REQS),
.WORD_SIZE (`DCACHE_WORD_SIZE),
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
) dcache_req_tmp_if();
VX_dcache_rsp_if #(
.NUM_REQS (`DCACHE_NUM_REQS),
.WORD_SIZE (`DCACHE_WORD_SIZE),
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
) dcache_rsp_tmp_if();
`RESET_RELAY (icache_reset);
`RESET_RELAY (dcache_reset);
`RESET_RELAY (mem_arb_reset);
VX_cache #(
.CACHE_ID (`ICACHE_ID),
.CACHE_SIZE (`ICACHE_SIZE),
.CACHE_LINE_SIZE (`ICACHE_LINE_SIZE),
.NUM_BANKS (1),
.WORD_SIZE (`ICACHE_WORD_SIZE),
.NUM_REQS (1),
.CREQ_SIZE (`ICACHE_CREQ_SIZE),
.CRSQ_SIZE (`ICACHE_CRSQ_SIZE),
.MSHR_SIZE (`ICACHE_MSHR_SIZE),
.MRSQ_SIZE (`ICACHE_MRSQ_SIZE),
.MREQ_SIZE (`ICACHE_MREQ_SIZE),
.WRITE_ENABLE (0),
.CORE_TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`ICACHE_CORE_TAG_ID_BITS),
.MEM_TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH)
) icache (
`SCOPE_BIND_VX_mem_unit_icache
.clk (clk),
.reset (icache_reset),
// Core request
.core_req_valid (icache_req_if.valid),
.core_req_rw (1'b0),
.core_req_byteen ('b0),
.core_req_addr (icache_req_if.addr),
.core_req_data ('x),
.core_req_tag (icache_req_if.tag),
.core_req_ready (icache_req_if.ready),
// Core response
.core_rsp_valid (icache_rsp_if.valid),
.core_rsp_data (icache_rsp_if.data),
.core_rsp_tag (icache_rsp_if.tag),
.core_rsp_ready (icache_rsp_if.ready),
`UNUSED_PIN (core_rsp_tmask),
`ifdef PERF_ENABLE
.perf_cache_if (perf_icache_if),
`endif
// Memory Request
.mem_req_valid (icache_mem_req_if.valid),
.mem_req_rw (icache_mem_req_if.rw),
.mem_req_byteen (icache_mem_req_if.byteen),
.mem_req_addr (icache_mem_req_if.addr),
.mem_req_data (icache_mem_req_if.data),
.mem_req_tag (icache_mem_req_if.tag),
.mem_req_ready (icache_mem_req_if.ready),
// Memory response
.mem_rsp_valid (icache_mem_rsp_if.valid),
.mem_rsp_data (icache_mem_rsp_if.data),
.mem_rsp_tag (icache_mem_rsp_if.tag),
.mem_rsp_ready (icache_mem_rsp_if.ready)
);
VX_cache #(
.CACHE_ID (`DCACHE_ID),
.CACHE_SIZE (`DCACHE_SIZE),
.CACHE_LINE_SIZE (`DCACHE_LINE_SIZE),
.NUM_BANKS (`DCACHE_NUM_BANKS),
.NUM_PORTS (`DCACHE_NUM_PORTS),
.WORD_SIZE (`DCACHE_WORD_SIZE),
.NUM_REQS (`DCACHE_NUM_REQS),
.CREQ_SIZE (`DCACHE_CREQ_SIZE),
.CRSQ_SIZE (`DCACHE_CRSQ_SIZE),
.MSHR_SIZE (`DCACHE_MSHR_SIZE),
.MRSQ_SIZE (`DCACHE_MRSQ_SIZE),
.MREQ_SIZE (`DCACHE_MREQ_SIZE),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE),
.CORE_TAG_ID_BITS (`DCACHE_CORE_TAG_ID_BITS-`SM_ENABLE),
.MEM_TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH),
.NC_ENABLE (1)
) dcache (
`SCOPE_BIND_VX_mem_unit_dcache
.clk (clk),
.reset (dcache_reset),
// Core req
.core_req_valid (dcache_req_tmp_if.valid),
.core_req_rw (dcache_req_tmp_if.rw),
.core_req_byteen (dcache_req_tmp_if.byteen),
.core_req_addr (dcache_req_tmp_if.addr),
.core_req_data (dcache_req_tmp_if.data),
.core_req_tag (dcache_req_tmp_if.tag),
.core_req_ready (dcache_req_tmp_if.ready),
// Core response
.core_rsp_valid (dcache_rsp_tmp_if.valid),
.core_rsp_tmask (dcache_rsp_tmp_if.tmask),
.core_rsp_data (dcache_rsp_tmp_if.data),
.core_rsp_tag (dcache_rsp_tmp_if.tag),
.core_rsp_ready (dcache_rsp_tmp_if.ready),
`ifdef PERF_ENABLE
.perf_cache_if (perf_dcache_if),
`endif
// Memory request
.mem_req_valid (dcache_mem_req_if.valid),
.mem_req_rw (dcache_mem_req_if.rw),
.mem_req_byteen (dcache_mem_req_if.byteen),
.mem_req_addr (dcache_mem_req_if.addr),
.mem_req_data (dcache_mem_req_if.data),
.mem_req_tag (dcache_mem_req_if.tag),
.mem_req_ready (dcache_mem_req_if.ready),
// Memory response
.mem_rsp_valid (dcache_mem_rsp_if.valid),
.mem_rsp_data (dcache_mem_rsp_if.data),
.mem_rsp_tag (dcache_mem_rsp_if.tag),
.mem_rsp_ready (dcache_mem_rsp_if.ready)
);
if (`SM_ENABLE) begin
VX_dcache_req_if #(
.NUM_REQS (`DCACHE_NUM_REQS),
.WORD_SIZE (`DCACHE_WORD_SIZE),
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
) smem_req_if();
VX_dcache_rsp_if #(
.NUM_REQS (`DCACHE_NUM_REQS),
.WORD_SIZE (`DCACHE_WORD_SIZE),
.TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE)
) smem_rsp_if();
`RESET_RELAY (smem_arb_reset);
`RESET_RELAY (smem_reset);
VX_smem_arb #(
.NUM_REQS (2),
.LANES (`NUM_THREADS),
.DATA_SIZE (4),
.TAG_IN_WIDTH (`DCACHE_CORE_TAG_WIDTH),
.TAG_SEL_IDX (0), // SM flag
.TYPE ("P"),
.BUFFERED_REQ (2),
.BUFFERED_RSP (1)
) smem_arb (
.clk (clk),
.reset (smem_arb_reset),
// input request
.req_valid_in (dcache_req_if.valid),
.req_rw_in (dcache_req_if.rw),
.req_byteen_in (dcache_req_if.byteen),
.req_addr_in (dcache_req_if.addr),
.req_data_in (dcache_req_if.data),
.req_tag_in (dcache_req_if.tag),
.req_ready_in (dcache_req_if.ready),
// output requests
.req_valid_out ({smem_req_if.valid, dcache_req_tmp_if.valid}),
.req_rw_out ({smem_req_if.rw, dcache_req_tmp_if.rw}),
.req_byteen_out ({smem_req_if.byteen, dcache_req_tmp_if.byteen}),
.req_addr_out ({smem_req_if.addr, dcache_req_tmp_if.addr}),
.req_data_out ({smem_req_if.data, dcache_req_tmp_if.data}),
.req_tag_out ({smem_req_if.tag, dcache_req_tmp_if.tag}),
.req_ready_out ({smem_req_if.ready, dcache_req_tmp_if.ready}),
// input responses
.rsp_valid_in ({smem_rsp_if.valid, dcache_rsp_tmp_if.valid}),
.rsp_tmask_in ({smem_rsp_if.tmask, dcache_rsp_tmp_if.tmask}),
.rsp_data_in ({smem_rsp_if.data, dcache_rsp_tmp_if.data}),
.rsp_tag_in ({smem_rsp_if.tag, dcache_rsp_tmp_if.tag}),
.rsp_ready_in ({smem_rsp_if.ready, dcache_rsp_tmp_if.ready}),
// output response
.rsp_valid_out (dcache_rsp_if.valid),
.rsp_tmask_out (dcache_rsp_if.tmask),
.rsp_tag_out (dcache_rsp_if.tag),
.rsp_data_out (dcache_rsp_if.data),
.rsp_ready_out (dcache_rsp_if.ready)
);
VX_shared_mem #(
.CACHE_ID (`SMEM_ID),
.CACHE_SIZE (`SMEM_SIZE),
.NUM_BANKS (`SMEM_NUM_BANKS),
.WORD_SIZE (`SMEM_WORD_SIZE),
.NUM_REQS (`SMEM_NUM_REQS),
.CREQ_SIZE (`SMEM_CREQ_SIZE),
.CRSQ_SIZE (`SMEM_CRSQ_SIZE),
.CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE),
.CORE_TAG_ID_BITS (`DCACHE_CORE_TAG_ID_BITS-`SM_ENABLE),
.BANK_ADDR_OFFSET (`SMEM_BANK_ADDR_OFFSET)
) smem (
.clk (clk),
.reset (smem_reset),
`ifdef PERF_ENABLE
.perf_cache_if (perf_smem_if),
`endif
// Core request
.core_req_valid (smem_req_if.valid),
.core_req_rw (smem_req_if.rw),
.core_req_byteen (smem_req_if.byteen),
.core_req_addr (smem_req_if.addr),
.core_req_data (smem_req_if.data),
.core_req_tag (smem_req_if.tag),
.core_req_ready (smem_req_if.ready),
// Core response
.core_rsp_valid (smem_rsp_if.valid),
.core_rsp_tmask (smem_rsp_if.tmask),
.core_rsp_data (smem_rsp_if.data),
.core_rsp_tag (smem_rsp_if.tag),
.core_rsp_ready (smem_rsp_if.ready)
);
end else begin
// core to D-cache request
for (genvar i = 0; i < `DCACHE_NUM_REQS; ++i) begin
VX_skid_buffer #(
.DATAW ((32-`CLOG2(`DCACHE_WORD_SIZE)) + 1 + `DCACHE_WORD_SIZE + (8*`DCACHE_WORD_SIZE) + `DCACHE_CORE_TAG_WIDTH)
) req_buf (
.clk (clk),
.reset (reset),
.valid_in (dcache_req_if.valid[i]),
.data_in ({dcache_req_if.addr[i], dcache_req_if.rw[i], dcache_req_if.byteen[i], dcache_req_if.data[i], dcache_req_if.tag[i]}),
.ready_in (dcache_req_if.ready[i]),
.valid_out (dcache_req_tmp_if.valid[i]),
.data_out ({dcache_req_tmp_if.addr[i], dcache_req_tmp_if.rw[i], dcache_req_tmp_if.byteen[i], dcache_req_tmp_if.data[i], dcache_req_tmp_if.tag[i]}),
.ready_out (dcache_req_tmp_if.ready[i])
);
end
// D-cache to core reponse
assign dcache_rsp_if.valid = dcache_rsp_tmp_if.valid;
assign dcache_rsp_if.tmask = dcache_rsp_tmp_if.tmask;
assign dcache_rsp_if.tag = dcache_rsp_tmp_if.tag;
assign dcache_rsp_if.data = dcache_rsp_tmp_if.data;
assign dcache_rsp_tmp_if.ready = dcache_rsp_if.ready;
end
wire [`DCACHE_MEM_TAG_WIDTH-1:0] icache_mem_req_tag = `DCACHE_MEM_TAG_WIDTH'(icache_mem_req_if.tag);
wire [`DCACHE_MEM_TAG_WIDTH-1:0] icache_mem_rsp_tag;
assign icache_mem_rsp_if.tag = icache_mem_rsp_tag[`ICACHE_MEM_TAG_WIDTH-1:0];
`UNUSED_VAR (icache_mem_rsp_tag)
VX_mem_arb #(
.NUM_REQS (2),
.DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH),
.ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH),
.TAG_IN_WIDTH (`DCACHE_MEM_TAG_WIDTH),
.TYPE ("R"),
.TAG_SEL_IDX (1), // Skip 0 for NC flag
.BUFFERED_REQ (1),
.BUFFERED_RSP (2)
) mem_arb (
.clk (clk),
.reset (mem_arb_reset),
// Source request
.req_valid_in ({dcache_mem_req_if.valid, icache_mem_req_if.valid}),
.req_rw_in ({dcache_mem_req_if.rw, icache_mem_req_if.rw}),
.req_byteen_in ({dcache_mem_req_if.byteen, icache_mem_req_if.byteen}),
.req_addr_in ({dcache_mem_req_if.addr, icache_mem_req_if.addr}),
.req_data_in ({dcache_mem_req_if.data, icache_mem_req_if.data}),
.req_tag_in ({dcache_mem_req_if.tag, icache_mem_req_tag}),
.req_ready_in ({dcache_mem_req_if.ready, icache_mem_req_if.ready}),
// Memory request
.req_valid_out (mem_req_if.valid),
.req_rw_out (mem_req_if.rw),
.req_byteen_out (mem_req_if.byteen),
.req_addr_out (mem_req_if.addr),
.req_data_out (mem_req_if.data),
.req_tag_out (mem_req_if.tag),
.req_ready_out (mem_req_if.ready),
// Source response
.rsp_valid_out ({dcache_mem_rsp_if.valid, icache_mem_rsp_if.valid}),
.rsp_data_out ({dcache_mem_rsp_if.data, icache_mem_rsp_if.data}),
.rsp_tag_out ({dcache_mem_rsp_if.tag, icache_mem_rsp_tag}),
.rsp_ready_out ({dcache_mem_rsp_if.ready, icache_mem_rsp_if.ready}),
// Memory response
.rsp_valid_in (mem_rsp_if.valid),
.rsp_tag_in (mem_rsp_if.tag),
.rsp_data_in (mem_rsp_if.data),
.rsp_ready_in (mem_rsp_if.ready)
);
`ifdef PERF_ENABLE
`UNUSED_VAR (perf_dcache_if.mem_stalls)
`UNUSED_VAR (perf_dcache_if.crsp_stalls)
assign perf_memsys_if.icache_reads = perf_icache_if.reads;
assign perf_memsys_if.icache_read_misses = perf_icache_if.read_misses;
assign perf_memsys_if.dcache_reads = perf_dcache_if.reads;
assign perf_memsys_if.dcache_writes = perf_dcache_if.writes;
assign perf_memsys_if.dcache_read_misses = perf_dcache_if.read_misses;
assign perf_memsys_if.dcache_write_misses= perf_dcache_if.write_misses;
assign perf_memsys_if.dcache_bank_stalls = perf_dcache_if.bank_stalls;
assign perf_memsys_if.dcache_mshr_stalls = perf_dcache_if.mshr_stalls;
if (`SM_ENABLE) begin
assign perf_memsys_if.smem_reads = perf_smem_if.reads;
assign perf_memsys_if.smem_writes = perf_smem_if.writes;
assign perf_memsys_if.smem_bank_stalls = perf_smem_if.bank_stalls;
end else begin
assign perf_memsys_if.smem_reads = 0;
assign perf_memsys_if.smem_writes = 0;
assign perf_memsys_if.smem_bank_stalls = 0;
end
reg [`PERF_CTR_BITS-1:0] perf_mem_pending_reads;
always @(posedge clk) begin
if (reset) begin
perf_mem_pending_reads <= 0;
end else begin
perf_mem_pending_reads <= perf_mem_pending_reads +
`PERF_CTR_BITS'($signed(2'((mem_req_if.valid && mem_req_if.ready && !mem_req_if.rw) && !(mem_rsp_if.valid && mem_rsp_if.ready)) -
2'((mem_rsp_if.valid && mem_rsp_if.ready) && !(mem_req_if.valid && mem_req_if.ready && !mem_req_if.rw))));
end
end
reg [`PERF_CTR_BITS-1:0] perf_mem_reads;
reg [`PERF_CTR_BITS-1:0] perf_mem_writes;
reg [`PERF_CTR_BITS-1:0] perf_mem_lat;
always @(posedge clk) begin
if (reset) begin
perf_mem_reads <= 0;
perf_mem_writes <= 0;
perf_mem_lat <= 0;
end else begin
if (mem_req_if.valid && mem_req_if.ready && !mem_req_if.rw) begin
perf_mem_reads <= perf_mem_reads + `PERF_CTR_BITS'd1;
end
if (mem_req_if.valid && mem_req_if.ready && mem_req_if.rw) begin
perf_mem_writes <= perf_mem_writes + `PERF_CTR_BITS'd1;
end
perf_mem_lat <= perf_mem_lat + perf_mem_pending_reads;
end
end
assign perf_memsys_if.mem_reads = perf_mem_reads;
assign perf_memsys_if.mem_writes = perf_mem_writes;
assign perf_memsys_if.mem_latency = perf_mem_lat;
`endif
endmodule

View file

@ -1,226 +0,0 @@
`include "VX_define.vh"
module VX_muldiv (
input wire clk,
input wire reset,
// Inputs
input wire [`INST_MUL_BITS-1:0] alu_op,
input wire [`UUID_BITS-1:0] uuid_in,
input wire [`NW_BITS-1:0] wid_in,
input wire [`NUM_THREADS-1:0] tmask_in,
input wire [31:0] PC_in,
input wire [`NR_BITS-1:0] rd_in,
input wire wb_in,
input wire [`NUM_THREADS-1:0][31:0] alu_in1,
input wire [`NUM_THREADS-1:0][31:0] alu_in2,
// Outputs
output wire [`UUID_BITS-1:0] uuid_out,
output wire [`NW_BITS-1:0] wid_out,
output wire [`NUM_THREADS-1:0] tmask_out,
output wire [31:0] PC_out,
output wire [`NR_BITS-1:0] rd_out,
output wire wb_out,
output wire [`NUM_THREADS-1:0][31:0] data_out,
// handshake
input wire valid_in,
output wire ready_in,
output wire valid_out,
input wire ready_out
);
wire is_div_op = `INST_MUL_IS_DIV(alu_op);
wire [`NUM_THREADS-1:0][31:0] mul_result;
wire [`UUID_BITS-1:0] mul_uuid_out;
wire [`NW_BITS-1:0] mul_wid_out;
wire [`NUM_THREADS-1:0] mul_tmask_out;
wire [31:0] mul_PC_out;
wire [`NR_BITS-1:0] mul_rd_out;
wire mul_wb_out;
wire stall_out;
wire mul_valid_out;
wire mul_valid_in = valid_in && !is_div_op;
wire mul_ready_in = ~stall_out || ~mul_valid_out;
wire is_mulh_in = (alu_op != `INST_MUL_MUL);
wire is_signed_mul_a = (alu_op != `INST_MUL_MULHU);
wire is_signed_mul_b = (alu_op != `INST_MUL_MULHU && alu_op != `INST_MUL_MULHSU);
`ifdef IMUL_DPI
wire [`NUM_THREADS-1:0][31:0] mul_result_tmp;
wire mul_fire_in = mul_valid_in && mul_ready_in;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [31:0] mul_resultl, mul_resulth;
always @(*) begin
dpi_imul (mul_fire_in, alu_in1[i], alu_in2[i], is_signed_mul_a, is_signed_mul_b, mul_resultl, mul_resulth);
end
assign mul_result_tmp[i] = is_mulh_in ? mul_resulth : mul_resultl;
end
VX_shift_register #(
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.DEPTH (`LATENCY_IMUL),
.RESETW (1)
) mul_shift_reg (
.clk(clk),
.reset (reset),
.enable (mul_ready_in),
.data_in ({mul_valid_in, uuid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, mul_result_tmp}),
.data_out ({mul_valid_out, mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, mul_result})
);
`else
wire is_mulh_out;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [32:0] mul_in1 = {is_signed_mul_a & alu_in1[i][31], alu_in1[i]};
wire [32:0] mul_in2 = {is_signed_mul_b & alu_in2[i][31], alu_in2[i]};
`IGNORE_UNUSED_BEGIN
wire [65:0] mul_result_tmp;
`IGNORE_UNUSED_END
VX_multiplier #(
.WIDTHA (33),
.WIDTHB (33),
.WIDTHP (66),
.SIGNED (1),
.LATENCY (`LATENCY_IMUL)
) multiplier (
.clk (clk),
.enable (mul_ready_in),
.dataa (mul_in1),
.datab (mul_in2),
.result (mul_result_tmp)
);
assign mul_result[i] = is_mulh_out ? mul_result_tmp[63:32] : mul_result_tmp[31:0];
end
VX_shift_register #(
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1),
.DEPTH (`LATENCY_IMUL),
.RESETW (1)
) mul_shift_reg (
.clk(clk),
.reset (reset),
.enable (mul_ready_in),
.data_in ({mul_valid_in, uuid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, is_mulh_in}),
.data_out ({mul_valid_out, mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out})
);
`endif
///////////////////////////////////////////////////////////////////////////
wire [`NUM_THREADS-1:0][31:0] div_result;
wire [`UUID_BITS-1:0] div_uuid_out;
wire [`NW_BITS-1:0] div_wid_out;
wire [`NUM_THREADS-1:0] div_tmask_out;
wire [31:0] div_PC_out;
wire [`NR_BITS-1:0] div_rd_out;
wire div_wb_out;
wire is_rem_op_in = (alu_op == `INST_MUL_REM) || (alu_op == `INST_MUL_REMU);
wire is_signed_div = (alu_op == `INST_MUL_DIV) || (alu_op == `INST_MUL_REM);
wire div_valid_in = valid_in && is_div_op;
wire div_ready_out = ~stall_out && ~mul_valid_out; // arbitration prioritizes MUL
wire div_ready_in;
wire div_valid_out;
`ifdef IDIV_DPI
wire [`NUM_THREADS-1:0][31:0] div_result_tmp;
wire div_fire_in = div_valid_in && div_ready_in;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [31:0] div_quotient, div_remainder;
always @(*) begin
dpi_idiv (div_fire_in, alu_in1[i], alu_in2[i], is_signed_div, div_quotient, div_remainder);
end
assign div_result_tmp[i] = is_rem_op_in ? div_remainder : div_quotient;
end
VX_shift_register #(
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.DEPTH (`LATENCY_IMUL),
.RESETW (1)
) div_shift_reg (
.clk(clk),
.reset (reset),
.enable (div_ready_in),
.data_in ({div_valid_in, uuid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, div_result_tmp}),
.data_out ({div_valid_out, div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, div_result})
);
assign div_ready_in = div_ready_out || ~div_valid_out;
`else
wire [`NUM_THREADS-1:0][31:0] div_result_tmp, rem_result_tmp;
wire is_rem_op_out;
VX_serial_div #(
.WIDTHN (32),
.WIDTHD (32),
.WIDTHQ (32),
.WIDTHR (32),
.LANES (`NUM_THREADS),
.TAGW (64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1)
) divide (
.clk (clk),
.reset (reset),
.valid_in (div_valid_in),
.ready_in (div_ready_in),
.signed_mode(is_signed_div),
.tag_in ({uuid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, is_rem_op_in}),
.numer (alu_in1),
.denom (alu_in2),
.quotient (div_result_tmp),
.remainder (rem_result_tmp),
.ready_out (div_ready_out),
.valid_out (div_valid_out),
.tag_out ({div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out})
);
assign div_result = is_rem_op_out ? rem_result_tmp : div_result_tmp;
`endif
///////////////////////////////////////////////////////////////////////////
wire rsp_valid = mul_valid_out || div_valid_out;
wire [`UUID_BITS-1:0] rsp_uuid = mul_valid_out ? mul_uuid_out : div_uuid_out;
wire [`NW_BITS-1:0] rsp_wid = mul_valid_out ? mul_wid_out : div_wid_out;
wire [`NUM_THREADS-1:0] rsp_tmask = mul_valid_out ? mul_tmask_out : div_tmask_out;
wire [31:0] rsp_PC = mul_valid_out ? mul_PC_out : div_PC_out;
wire [`NR_BITS-1:0] rsp_rd = mul_valid_out ? mul_rd_out : div_rd_out;
wire rsp_wb = mul_valid_out ? mul_wb_out : div_wb_out;
wire [`NUM_THREADS-1:0][31:0] rsp_data = mul_valid_out ? mul_result : div_result;
assign stall_out = ~ready_out && valid_out;
VX_pipe_register #(
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.RESETW (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (~stall_out),
.data_in ({rsp_valid, rsp_uuid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}),
.data_out ({valid_out, uuid_out, wid_out, tmask_out, PC_out, rd_out, wb_out, data_out})
);
// can accept new request?
assign ready_in = is_div_op ? div_ready_in : mul_ready_in;
endmodule

Some files were not shown because too many files have changed in this diff Show more