Compare commits

..

1580 commits

Author SHA1 Message Date
tinebp
332e8eeaf9
Merge pull request #244 from vortexgpgpu/bug_fixes
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
Bug fixes
2025-04-13 20:48:36 -07:00
tinebp
5dbfcecc21 minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-04-13 17:28:33 -07:00
tinebp
f19335023f CI migration to ubuntu 22.04 2025-04-13 14:16:05 -07:00
tinebp
6a7e402ab4
Merge pull request #239 from vortexgpgpu/bug_fixes
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
regression fix
2025-03-17 04:33:46 -07:00
tinebp
18687d53b3
Merge branch 'master' into bug_fixes
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-03-17 04:33:12 -07:00
tinebp
a35fb4bf1d regression fix 2025-03-17 04:30:50 -07:00
tinebp
9929c42417
Merge pull request #238 from vortexgpgpu/bug_fixes
workaroud fix for opencl kernel include in POCL
2025-03-17 04:07:52 -07:00
tinebp
06e5e2e859 workaroud fix for opencl kernel include in POCL 2025-03-17 04:04:07 -07:00
tinebp
09e89791e5
Merge pull request #237 from vortexgpgpu/bug_fixes
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
CI versioning
2025-03-12 20:21:51 -07:00
tinebp
b35f69f486 CI versioning
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-03-12 17:38:09 -07:00
tinebp
63b41f21c6 xrt sandbox simulation
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-02-12 00:24:36 -08:00
tinebp
cc7fdf2fbd fixed github actions versioning 2025-02-11 22:03:32 -08:00
tinebp
a9352a3b64 minor update 2025-02-11 21:56:05 -08:00
tinebp
9a2709db08 xrt sandbox synthesis build fix
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2025-02-11 14:10:29 -08:00
tinebp
4785736e4d minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-01-26 22:55:22 -08:00
tinebp
38861d9aaf minor updates 2025-01-26 22:40:34 -08:00
tinebp
82b0eeded6 minor update 2025-01-26 19:35:56 -08:00
tinebp
22398c991d ramulator memory addressing bug fix + platform memory refactoring
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2025-01-26 06:28:51 -08:00
tinebp
e80ee2c819 minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-01-22 04:56:49 -08:00
tinebp
9dc1d3f688 Merge branch 'bug_fixes' 2025-01-22 02:49:55 -08:00
tinebp
0c1bc17c09
Merge pull request #222 from MichaelJSr/simx-vpu-toggle
Toggle the RISC-V Vector Extension on and off
2025-01-22 02:48:38 -08:00
tinebp
4e83c28d04 minor bug fix 2025-01-21 23:07:41 -08:00
tinebp
2c940cf509 AXI adapter bug fix
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2025-01-21 06:14:22 -08:00
tinebp
fb4527fe95 cache repl reset
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2025-01-21 01:06:22 -08:00
tinebp
d1f37fc629 minor update 2025-01-20 22:19:42 -08:00
tinebp
001a107395 bram reset bug fix 2025-01-20 22:16:05 -08:00
tinebp
fce24b9535 fixed XRT AFU done handshake
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-01-17 23:58:23 -08:00
MichaelJSr
6d27575db3 Revert some of "Added ifndef statements for the vector extension anywhere they didn't exist already" 2025-01-14 21:56:39 -08:00
MichaelJSr
a2cfeffcfe Added ifndef statements for the vector extension anywhere they didn't exist already
Added ifndef statements for the vector extension anywhere they didn't exist already

more ifdef statements

more ifdef

Update decode.cpp

Update decode.cpp

Update decode.cpp
2025-01-14 21:29:47 -08:00
MichaelJSr
cb491ddb53 test
Revert "test"

This reverts commit 393e347c2faba260f1469667596e22dc2aa16553.
2025-01-14 21:22:28 -08:00
tinebp
43b143bba6 bug fixes
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-01-14 03:44:53 -08:00
tinebp
87297e0eca bug fixes 2025-01-14 02:21:17 -08:00
MichaelJSr
929ef1b6e2 Remove unused EXTV code, clean up code, pragma once around vpu.h 2025-01-13 16:45:13 -08:00
tinebp
83ba1cc3dc minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2025-01-11 20:23:26 -08:00
tinebp
347889c504 minor updates
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2025-01-11 03:24:06 -08:00
tinebp
083cf04afd timing optimizations 2025-01-11 03:19:55 -08:00
tinebp
84b1c8a43c BRAM optimizations 2025-01-11 03:18:11 -08:00
tinebp
43d33b942e minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-12-26 11:32:57 -08:00
tinebp
adf60e7e35 minor update 2024-12-26 10:58:13 -08:00
tinebp
8fda922570 minor update
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-12-26 10:20:57 -08:00
tinebp
53900bee4f bug fixes 2024-12-26 10:01:36 -08:00
tinebp
704f525fd6 memory mem_coalescer miss perf counter
RTL perf counters refactoring
2024-12-26 08:00:36 -08:00
tinebp
f478bdcf25 memory coalescer misses perf counter
rtl perf interface refactoring
2024-12-26 07:56:28 -08:00
tinebp
01974e124f
Merge pull request #216 from sij814/simx2
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
Simx ICache DCache Changes
2024-12-18 03:17:14 -08:00
tinebp
100e4e3970 multi-ports fixes
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-12-17 22:38:23 -08:00
tinebp
4819891a5e minor update 2024-12-17 18:06:52 -08:00
tinebp
066ab105eb multiports fixes 2024-12-17 16:23:08 -08:00
tinebp
a98d2e24e5 rtlsim multibanks
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-12-16 22:10:57 -08:00
sij814
572a397018 changed versions 2024-12-15 15:11:13 -08:00
sij814
cad129c64c added icache dcache overlap 2024-12-15 14:55:21 -08:00
tinebp
bae24e589c minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-12-14 02:04:50 -08:00
tinebp
461f2cbbc9 Intel Opae AFU support for multiport
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-12-13 21:20:38 -08:00
tinebp
7975a5a38c fixed AXI adapter 2024-12-12 20:52:45 -08:00
tinebp
f635d71ba4 minor fix
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-12-11 10:31:03 -08:00
tinebp
70ade222b1 multiport 2024-12-10 23:25:05 -08:00
tinebp
aa6a47eb11 minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vector, 32) (push) Has been cancelled
CI / tests (vector, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-12-05 23:35:15 -08:00
tinebp
115ff2b599 minor fixes 2024-12-05 22:38:04 -08:00
tinebp
896c59306c adding clang-format file 2024-12-05 15:58:04 -08:00
tinebp
6bbcd4ebaf vector updates with clang formatting 2024-12-05 15:55:57 -08:00
tinebp
6b23d290c3 vector ISA updates 2024-12-05 14:43:51 -08:00
tinebp
5d91fe58ad
Merge pull request #211 from MichaelJSr/riscv-vector-isa-simx-clean
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
Vector extension simx with fallbacks for testcases and clean history
2024-12-05 10:18:45 -08:00
tinebp
5891a1e592
Merge branch 'master' into riscv-vector-isa-simx-clean 2024-12-05 10:17:05 -08:00
tinebp
18ae57cc7f Merge branch 'bug_fixes'
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-12-04 22:20:52 -08:00
tinebp
a760d909cb minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-12-04 21:36:31 -08:00
tinebp
86f20b27dd SimX multi-ports memory fixes 2024-12-04 21:11:51 -08:00
tinebp
3ace9bbeda minor updates
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-12-04 06:00:19 -08:00
tinebp
30b0daf050 SimX multiports support fixes
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-12-03 05:46:33 -08:00
tinebp
24ca4f03aa minor update
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-12-02 19:53:28 -08:00
tinebp
3b454efd56 fixes to SimX's multiports memory support 2024-12-02 17:51:42 -08:00
MichaelJSr
951746badc Commented out some vector testcases that dont pass 2024-11-28 05:13:56 -08:00
MichaelJSr
6c2cbdfec2 made -v a valid option for simx simulator 2024-11-28 02:12:01 -08:00
MichaelJSr
973fcd7845 Merge branch 'riscv-vector-isa-simx-clean' of https://github.com/MichaelJSr/vortex into riscv-vector-isa-simx-clean 2024-11-27 23:53:57 -08:00
MichaelJSr
5eecd0e987 Added case for vector-test due to different exitcode
The vector tests need the cluster exitcodes
2024-11-27 23:50:57 -08:00
MichaelJSr
073e0ddd10 Adds the riscv vector extension into simx
Added vector regression test to ci.yml
2024-11-27 23:22:22 -08:00
MichaelJSr
c05a0571c8 Added vector regression test to ci.yml 2024-11-27 13:10:08 -08:00
MichaelJSr
1e4583ac17 Adds the riscv vector extension into simx 2024-11-26 18:41:01 -08:00
tinebp
3e4bbfc9f0 minor update 2024-11-22 11:12:17 -08:00
tinebp
7c4ce74801 memory unit timing optimization 2024-11-21 16:48:41 -08:00
tinebp
18bf49d1e0 minor update 2024-11-21 16:48:18 -08:00
tinebp
180735c531 fifoqueue area optimization 2024-11-21 16:47:00 -08:00
tinebp
8d8769c710 stream_buffer area optimization
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-11-20 19:15:51 -08:00
tinebp
b0c48e7a46 stream buffer area optimization 2024-11-20 18:27:52 -08:00
tinebp
320c090613 xilinx asynchronous bram patch fixes
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-11-19 01:57:33 -08:00
tinebp
b48b605b51 remove deprecared yosys link 2024-11-15 03:42:06 -08:00
tinebp
8230b37411 fixed opae build bug
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-11-14 11:42:21 -08:00
tinebp
5844de8c4d Merge branch 'rtl_cache'
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-11-13 22:27:11 -08:00
tinebp
dfc7b6178c cleanup old cache test
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-11-13 20:56:06 -08:00
tinebp
bffc6d9610 enabling Vivado's asynchronous bram suppot via direct netlist transformation
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-11-13 16:20:25 -08:00
Hyesoon Kim
6dbbc62b04
Merge pull request #200 from Udit8348/develop-docker-micro
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
Develop Docker Micro
2024-11-09 10:45:16 -05:00
Udit Subramanya
667fa1662d update docker for micro apptainer 2024-11-01 14:46:38 -04:00
Udit Subramanya
e73e1c2bb3 update xilinx fpga steps with environment variable steps 2024-11-01 13:56:01 -04:00
Udit Subramanya
27f3d6dde6 Merge remote-tracking branch 'origin/master' into develop-documentation 2024-10-25 13:16:28 -04:00
Udit Subramanya
d475e9d201 remove duplicate block 2024-10-25 12:59:24 -04:00
Blaise Tine
ce510d78c7 minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-10-24 05:02:46 -07:00
Blaise Tine
eecff10dea minor update
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-10-24 02:51:08 -07:00
Blaise Tine
98b58606e5 merge fixes
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-10-24 02:18:00 -07:00
Blaise Tine
8b172d07ec revert xilinx's asynchronous bram workaround 2024-10-24 01:44:55 -07:00
Hyesoon Kim
f68cc95cbe
Merge branch 'master' into develop-documentation 2024-10-23 19:41:29 -04:00
Hyesoon Kim
659ad87f93
Merge pull request #188 from Udit8348/develop-docker
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
Vortex Dockerfiles
2024-10-23 19:41:06 -04:00
Blaise Tine
22ade31fd5 minor updates 2024-10-23 15:55:11 -07:00
Hyesoon Kim
2d3f4b6efc
Merge branch 'master' into develop-docker 2024-10-23 18:08:19 -04:00
Blaise Tine
cc5ac8388b minor update
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-10-23 14:03:19 -07:00
Blaise Tine
ec12b50007 minor udpate 2024-10-23 13:09:34 -07:00
Blaise Tine
e7d09feb4a decode => demux 2024-10-23 13:06:45 -07:00
Blaise Tine
7ab58111d8 minor update 2024-10-23 12:30:39 -07:00
Blaise Tine
1c384c096d minor update 2024-10-23 12:27:44 -07:00
Udit Subramanya
24d018b4c9 documentation updates 2024-10-23 05:18:53 -04:00
Blaise Tine
1fa4603fa2 disable sformatf during synthesis
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-10-23 01:14:19 -07:00
Blaise Tine
3a3bb7b70a cleanup deleted files
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / tests (vm, 32) (push) Has been cancelled
CI / tests (vm, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-10-21 22:46:04 -07:00
Blaise Tine
ff50306833 minor update 2024-10-21 22:24:54 -07:00
Udit Subramanya
519023fb2b add citation for MICRO 21 paper 2024-10-21 15:39:10 -04:00
Udit Subramanya
8fdca0e52a correct vitis env 2024-10-21 15:38:53 -04:00
Udit Subramanya
f184b57c24 merge upstream and resolve deleted file conflict 2024-10-21 13:45:32 -04:00
Udit Subramanya
d584e7bac1 intermediate docs update 2024-10-21 13:28:57 -04:00
Blaise Tine
2b3d1f0860 minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-10-20 23:54:42 -07:00
Blaise Tine
fccbadfe25 minor update 2024-10-20 23:32:22 -07:00
Blaise Tine
1e4f0fa0bd minor update 2024-10-20 21:42:02 -07:00
Blaise Tine
22c3828bf5 minor update 2024-10-20 21:12:49 -07:00
Blaise Tine
acc1e3dfd8 minor update 2024-10-20 20:07:34 -07:00
Blaise Tine
0f380a3d78 minor update
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-10-20 07:49:27 -07:00
Blaise Tine
9373e21950 minor update 2024-10-20 07:32:32 -07:00
Blaise Tine
2bd22253eb minor update
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-10-19 22:14:38 -07:00
Blaise Tine
4206ffdb80 minor update 2024-10-19 21:39:34 -07:00
Blaise Tine
b6bd6467ef cache hit timing optimization 2024-10-19 20:04:51 -07:00
Blaise Tine
8f29ad58ae block ram redesign to support synthesizable write-first mode
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-10-18 23:54:20 -07:00
Blaise Tine
6b1091e08f minor update
Some checks failed
CI / setup (push) Has been cancelled
CI / build (32) (push) Has been cancelled
CI / build (64) (push) Has been cancelled
CI / tests (cache, 32) (push) Has been cancelled
CI / tests (cache, 64) (push) Has been cancelled
CI / tests (config1, 32) (push) Has been cancelled
CI / tests (config1, 64) (push) Has been cancelled
CI / tests (config2, 32) (push) Has been cancelled
CI / tests (config2, 64) (push) Has been cancelled
CI / tests (debug, 32) (push) Has been cancelled
CI / tests (debug, 64) (push) Has been cancelled
CI / tests (opencl, 32) (push) Has been cancelled
CI / tests (opencl, 64) (push) Has been cancelled
CI / tests (regression, 32) (push) Has been cancelled
CI / tests (regression, 64) (push) Has been cancelled
CI / tests (scope, 32) (push) Has been cancelled
CI / tests (scope, 64) (push) Has been cancelled
CI / tests (stress, 32) (push) Has been cancelled
CI / tests (stress, 64) (push) Has been cancelled
CI / tests (synthesis, 32) (push) Has been cancelled
CI / tests (synthesis, 64) (push) Has been cancelled
CI / complete (push) Has been cancelled
2024-10-17 14:07:22 -07:00
Blaise Tine
91fee5da11 minor update 2024-10-17 11:25:17 -07:00
Blaise Tine
077b682d7d minor update
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-10-17 04:58:29 -07:00
Blaise Tine
5971158f43 minor update
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
2024-10-16 20:22:42 -07:00
Blaise Tine
a7ba377581 minor update 2024-10-16 18:04:11 -07:00
Blaise Tine
f695e4d754 minor update 2024-10-15 14:59:31 -07:00
Blaise Tine
e06333b3c0 minor update 2024-10-15 11:28:33 -07:00
Blaise Tine
645befdce6 minor update 2024-10-15 11:23:29 -07:00
Blaise Tine
e62b638d88 minor update 2024-10-15 10:36:05 -07:00
Blaise Tine
1d5e4f63dd minor update 2024-10-15 03:24:02 -07:00
Blaise Tine
68b78fc42f minor update 2024-10-15 02:32:17 -07:00
Blaise Tine
db98965f56 minor update 2024-10-15 02:27:07 -07:00
Blaise Tine
03a1e25828 adding cache replacement policy 2024-10-15 00:28:09 -07:00
tinebp
5d7e53f7d7
Merge pull request #194 from MichaelJSr/add-back-ecall-ebreak-traps
Add back the "ecall" and "ebreak" instruction traps for riscv-vector test functionality
2024-10-14 20:46:29 -07:00
Blaise Tine
37757fab8f fixed fifo_queue support for BRAM 2024-10-14 15:48:49 -07:00
MichaelJSr
0d04423074 Readded the ecall and ebreak instruction traps so that the riscv-vector tests run properly 2024-10-14 10:12:33 -07:00
Blaise Tine
fe5442dbb3 minor update 2024-10-13 23:34:57 -07:00
Blaise Tine
2a2fc2ae39 minor update 2024-10-13 23:25:41 -07:00
Blaise Tine
26df675e24 minor update 2024-10-13 20:08:38 -07:00
Blaise Tine
f63233334e minor update 2024-10-13 16:22:59 -07:00
Blaise Tine
9e5638c9b0 minor update 2024-10-13 12:06:55 -07:00
Blaise Tine
1d626588ef minor update 2024-10-13 11:49:12 -07:00
Blaise Tine
37f4d05393 minor update 2024-10-13 10:44:04 -07:00
Blaise Tine
9f32e5693c minor update 2024-10-13 10:41:32 -07:00
Blaise Tine
684f2e2d3d minor update 2024-10-13 03:42:51 -07:00
Blaise Tine
28bf27e951 rtl cache redesign to support xilinx bram types 2024-10-13 03:40:45 -07:00
Udit Subramanya
8155173aab add documentation based on intial feedback 2024-10-11 07:40:21 -07:00
Udit Subramanya
d3df61abb0 add initial development and production dockerfiles 2024-10-09 12:32:49 -04:00
Blaise Tine
a5381fd788 async bram optimization 2024-10-09 04:14:15 -07:00
Blaise Tine
f49084b298 improving block rams inference with registered read address. 2024-10-08 23:44:36 -07:00
Blaise Tine
ee96d4334b VX_onehot_encoder update 2024-10-08 23:01:01 -07:00
Blaise Tine
c91f9684fc minor update 2024-10-05 18:35:26 -07:00
Blaise Tine
07ce16e75c minor update 2024-10-05 17:42:26 -07:00
Blaise Tine
2eeb2ac532 fixed memory flags propagation through the cache hierarchy 2024-10-05 13:46:10 -07:00
Hyesoon Kim
91c135ac15
Merge pull request #185 from vortexgpgpu/tensor-core
Merge tensor-core and devel branch into master
2024-10-05 10:46:50 -04:00
jaewon-lee-github
faa3b9a469 Merge branch 'master' into tensor-core 2024-10-04 12:58:51 -04:00
Hyesoon Kim
847562be9e
Merge pull request #187 from vortexgpgpu/revert-181-master
Revert "Initial HBM changes for RTL"
2024-10-04 11:37:06 -04:00
Jaewon Lee
0bf79a0f05
Revert "Initial HBM changes for RTL" 2024-10-04 10:13:31 -04:00
Jaewon Lee
119805a959
Merge branch 'master' into tensor-core 2024-10-04 10:03:00 -04:00
tinebp
bc765d10bd
Merge pull request #181 from sij814/master
Initial HBM changes for RTL
2024-10-04 06:45:44 -07:00
Udit Subramanya
208c5b3804 reorg docs 2024-10-04 08:56:49 -04:00
Udit Subramanya
32b0376b28 remove old artifacts 2024-10-03 17:43:39 -04:00
Udit Subramanya
6a447350b7 remove redundant docs after consolidating 2024-10-03 17:42:47 -04:00
Udit Subramanya
dd16d70515 contributing and fpga docs 2024-10-03 17:29:21 -04:00
jaewon-lee-github
5cf6797bd3 - Change STARTUP_ADDR to use the same 0x80000000 address
- Fix environment variable for vortex kernel directories
2024-10-03 15:19:39 -04:00
jaewon-lee-github
bbc02cc013 merged with master 2024-10-03 13:44:39 -04:00
jaewon-lee-github
b7531c9de1 support 64bit 2024-10-02 17:46:01 -04:00
Jaewon Lee
6c725978b4
Merge pull request #184 from vortexgpgpu/develop
Develop
2024-10-02 15:41:35 -04:00
jaewon-lee-github
d1175a03c9 update the code accessing registers in obsoleted way 2024-10-02 14:16:57 -04:00
Blaise Tine
83badaac86 minor update 2024-10-02 11:10:33 -07:00
Blaise Tine
4b8ca42e85 minor update 2024-10-02 09:27:26 -07:00
Blaise Tine
ad7377c8ba minor udpate 2024-10-02 07:41:29 -07:00
Blaise Tine
5cb033ae13 minor update 2024-10-02 07:12:30 -07:00
Blaise Tine
44ebc12ed4 minor update 2024-10-01 00:55:45 -07:00
Blaise Tine
a3aca502b7 minor update 2024-09-30 14:20:48 -07:00
Jaewon Lee
4a606061d2
Merge branch 'develop' into tensor-core 2024-09-30 16:48:47 -04:00
Blaise Tine
ee69024841 minor update 2024-09-30 09:17:42 -07:00
Blaise Tine
6f81df5edb axi_adapter large tags support 2024-09-30 06:25:50 -07:00
Blaise Tine
1deb13c469 minor update 2024-09-30 03:36:00 -07:00
Blaise Tine
2d00cec9d3 minor update 2024-09-30 02:12:30 -07:00
Blaise Tine
a3031922ce minor update 2024-09-29 09:07:45 -07:00
Blaise Tine
60860ec684 minor update 2024-09-29 09:03:24 -07:00
Blaise Tine
cf3909a910 minor update 2024-09-29 07:52:53 -07:00
Blaise Tine
5c694a997c update scope tap testing 2024-09-29 00:09:25 -07:00
Blaise Tine
30571d716c updated scope CI test 2024-09-28 21:37:48 -07:00
Blaise Tine
b8475c65dc adjusting platform caps 2024-09-28 21:25:55 -07:00
Blaise Tine
4329e3f968 minor update 2024-09-28 20:28:57 -07:00
Blaise Tine
b634f9f47d count_leading_zeros fix 2024-09-28 20:15:03 -07:00
Blaise Tine
87e613d29d fixed XRT AFU deadlock on exit 2024-09-28 05:20:37 -07:00
Blaise Tine
eee037ffcd minor update 2024-09-27 20:59:29 -07:00
Blaise Tine
9027555e6a minor update 2024-09-27 20:30:57 -07:00
Blaise Tine
989341a77d minor udpate 2024-09-27 15:13:42 -07:00
Blaise Tine
ec8cc4c84d minor update 2024-09-27 14:21:09 -07:00
Blaise Tine
6e40162027 extending scope triggering to capture continous firing events 2024-09-27 11:36:31 -07:00
Blaise Tine
f2c970868e minor update 2024-09-27 10:02:59 -07:00
Blaise Tine
533ddffc47 cleanup multi-dimensional array to improve synthesis compatibility 2024-09-27 09:48:05 -07:00
Blaise Tine
e9f19a0bf9 fixed BRAM multi-dimensional array bug on Xilinx Vivado 2024-09-27 09:13:24 -07:00
Blaise Tine
5db1937a5e fixed scope parser array indexing 2024-09-27 07:52:38 -07:00
Blaise Tine
9a3eb74051 adding scope.py support for structs 2024-09-26 09:50:38 -07:00
Blaise Tine
27543e240e minor update 2024-09-25 19:11:40 -07:00
Blaise Tine
4f11278d2c scope_tap bug fixes and improvements 2024-09-25 10:28:19 -07:00
Blaise Tine
0e3206747a scope_tap bug fix 2024-09-24 21:46:26 -07:00
Blaise Tine
ce4f90e843 scope analyzer updates 2024-09-24 01:20:26 -07:00
Blaise Tine
a9a5ded030 bitmanip logceil fix 2024-09-23 23:54:43 -07:00
Hyesoon Kim
b5f541b891
Merge pull request #180 from vortexgpgpu/vortex_vm
Vortex vm
2024-09-24 02:48:46 -04:00
Blaise Tine
2cf483ddf5 xrt afu bug fixes 2024-09-23 21:01:24 -07:00
Blaise Tine
9a6dbdf1a9 xrtsim addressing fix 2024-09-23 08:56:57 -07:00
Blaise Tine
818522f7e4 CI scripts update 2024-09-23 05:57:08 -07:00
Blaise Tine
030071571d test memory bank interleaving 2024-09-23 04:30:28 -07:00
Blaise Tine
e5e9a5c2e9 build fix 2024-09-23 04:03:04 -07:00
Blaise Tine
406583c0bd build fix 2024-09-23 04:00:23 -07:00
Blaise Tine
29ea3041c4 build fix 2024-09-23 03:52:03 -07:00
Blaise Tine
828b8827e7 build error fix 2024-09-23 03:36:35 -07:00
Blaise Tine
a80be895ba fixed compiler errors 2024-09-23 03:05:46 -07:00
Blaise Tine
923d2bb94c mark as executable 2024-09-23 02:30:34 -07:00
Blaise Tine
e38c2c1fba xilinx xrt platforms configuration 2024-09-23 02:12:47 -07:00
Blaise Tine
8bb5e5ab8a build error fix 2024-09-22 22:47:23 -07:00
Blaise Tine
b146fab290 xrt kernel registers update 2024-09-22 22:46:55 -07:00
Blaise Tine
15ead4acf6 xrt with merge memory interface 2024-09-22 22:46:10 -07:00
Blaise Tine
f5eca75311 handling synthesis builds with simulation enabled (e.g xrt with hw_emu) 2024-09-22 22:43:48 -07:00
Blaise Tine
5e123d0507 minor update 2024-09-22 22:31:54 -07:00
Blaise Tine
54f0c8e270 scope analyzer optimization 2024-09-22 22:31:14 -07:00
Blaise Tine
b8199decf4 opaesim and xrtsim multi-bank memory support 2024-09-22 03:54:40 -07:00
Blaise Tine
00feb8b424 scope analyzer bug fixes 2024-09-21 08:39:20 -07:00
Blaise Tine
7938c7be5f synthesis updates 2024-09-20 20:35:58 -07:00
sij814
3bac7eae6a changed fpnew commit 2024-09-20 16:52:12 -07:00
Blaise Tine
a61f97f6c6 minor update 2024-09-20 08:09:46 -07:00
Jaewon Lee
5ab13559e0
Update README.md 2024-09-20 10:08:53 -04:00
jaewon-lee-github
4383631543 Add BARE mode test and print out VM info 2024-09-20 09:58:50 -04:00
jaewon-lee-github
9cc0010835 change verilator path 2024-09-20 09:19:17 -04:00
jaewon-lee-github
9902856221 VERILATOR 2024-09-20 09:05:54 -04:00
jaewon-lee-github
e5f2442353 Update Virtual Memory testing 2024-09-20 08:58:11 -04:00
Udit Subramanya
ff9d52c162 Merge remote-tracking branch 'upstream/master' into develop-documentation 2024-09-20 08:26:08 -04:00
Udit Subramanya
acc8221a7e Merge remote-tracking branch 'origin/master' into develop-documentation
Bring in latest docs, to update in this branch
2024-09-20 08:20:17 -04:00
Blaise Tine
63cce35c1a scope taps annotation 2024-09-19 23:33:23 -07:00
Blaise Tine
d2db612bb4 adding scope support to xrtsim 2024-09-19 22:33:28 -07:00
sij814
e8ce3878bb Merge branch 'master' of github.com:vortexgpgpu/vortex 2024-09-19 13:36:46 -07:00
sij814
380c36d930 merged rtlsim branch 2024-09-19 13:31:25 -07:00
sij814
4fff940e42 two different versions of bypass connection 2024-09-19 13:21:14 -07:00
Blaise Tine
2d7f9eae0a minor update 2024-09-19 04:44:00 -07:00
Blaise Tine
a37309c6b0 xrtsim implementation 2024-09-19 04:24:20 -07:00
sij814
48f86a48f6 changed mem_req_arb in VX_cache_l3.sv to accept data_out 2024-09-18 22:05:40 -07:00
Blaise Tine
f0bff2a4a2 minor update 2024-09-17 20:31:12 -07:00
Blaise Tine
8e3bd5696b xilinx synthesis debugging fixes 2024-09-17 19:52:51 -07:00
sij814
992f8d97d3 sliced the bypass requests 2024-09-17 19:47:13 -07:00
Blaise Tine
8908f3e006 minor update 2024-09-17 10:05:17 -07:00
Blaise Tine
f2c1ad7831 minor update 2024-09-17 09:56:54 -07:00
Blaise Tine
8135f72cc9 configure update 2024-09-17 06:45:22 -07:00
Blaise Tine
50458bbae0 xilinx synthesis debugging foxes 2024-09-17 06:22:07 -07:00
Jaewon Lee
5a2d4e6c26
Merge pull request #179 from vortexgpgpu/jaewon-lee-github-patch-2
Update README.md
2024-09-13 10:50:03 -04:00
Jaewon Lee
0a48d98bc1
Update README.md
It has the instruction about the other branch(Vortex_vm).
2024-09-13 09:39:28 -04:00
Udit Subramanya
dc76101068 contribution stats 2024-09-13 09:09:38 -04:00
Blaise Tine
bbe9c0372f minor update 2024-09-13 00:35:42 -07:00
Blaise Tine
263893eb7c minor update 2024-09-13 00:03:08 -07:00
Blaise Tine
b77fff764e minor update 2024-09-12 22:12:03 -07:00
Blaise Tine
145eacc451 minor update 2024-09-12 21:08:19 -07:00
Blaise Tine
1ddd1ba1cc minor update 2024-09-12 20:15:41 -07:00
Blaise Tine
49ed88e59f minor update 2024-09-12 20:12:18 -07:00
Blaise Tine
7208f251b7 minor update 2024-09-12 20:07:19 -07:00
Blaise Tine
6cf0d9f7b4 fixed generate labels lint warnings to improve hardware debugging 2024-09-12 20:00:50 -07:00
Hyesoon Kim
ccf0135d97
Merge pull request #178 from vortexgpgpu/vortex_vm
Vortex Virtual Memory Support
2024-09-12 14:12:04 -04:00
jaewon-lee-github
daec55ae95 change the ci version 2024-09-12 11:24:37 -04:00
Jaewon Lee
e91eb4aed4 merge from master branch 2024-09-12 10:32:02 -04:00
Blaise Tine
5c72685356 minor update 2024-09-11 17:27:36 -07:00
Blaise Tine
f00f96377b disable tracing on synthesis mode 2024-09-11 17:16:34 -07:00
Blaise Tine
230b29de6f minor update 2024-09-11 06:57:43 -07:00
Blaise Tine
bb9ae8576d adding uuid support to memory transactions 2024-09-11 06:47:33 -07:00
Blaise Tine
ae24264a2a minor update 2024-09-11 05:40:05 -07:00
Blaise Tine
83d65e2cf1 tracing update 2024-09-10 16:22:34 -07:00
Blaise Tine
63840a20da minor update 2024-09-09 06:10:56 -07:00
Blaise Tine
b56aa00f4f reset cleanup 2024-09-08 20:37:28 -07:00
Blaise Tine
202af1e783 rtl bug fix 2024-09-08 20:33:27 -07:00
Blaise Tine
207840a97e minor update 2024-09-08 17:49:28 -07:00
Blaise Tine
b1dc2fba42 cache read byteenable bug fix 2024-09-08 17:47:17 -07:00
Blaise Tine
cc105eaea9 tracing refactoring 2024-09-08 14:54:04 -07:00
Blaise Tine
fa11d4c502 TRACING refactoring to support vivado/quartus simulators 2024-09-08 05:26:00 -07:00
Blaise Tine
6626f9201c minor update 2024-09-08 02:46:32 -07:00
Blaise Tine
7823f5529c minor update 2024-09-08 01:38:48 -07:00
Blaise Tine
7bef62aef8 minor update 2024-09-08 01:37:20 -07:00
Blaise Tine
1a35d3fed1 fixed byteen signal on memory read 2024-09-07 21:33:45 -07:00
Blaise Tine
0cbdc3be9e opae afu x warning fixes 2024-09-07 21:32:11 -07:00
Blaise Tine
aa1489d8eb fixed trace.vcd copy 2024-09-07 03:45:23 -07:00
Blaise Tine
a75ed78bf2 fixed getopt exitcode with invalid parameters 2024-09-07 03:42:46 -07:00
Blaise Tine
2041a4ad4a xrt.ini update 2024-09-07 01:43:30 -07:00
Blaise Tine
bfbe642170 adding RTL uuigen 2024-09-07 01:36:17 -07:00
Blaise Tine
fdc62c5f98 minor update 2024-09-06 01:27:54 -07:00
Blaise Tine
e178eb1330 operands's x-propagation bug fix (caught using vivado simulator) 2024-09-05 21:35:10 -07:00
Blaise Tine
7cbb026a12 minor update 2024-09-05 21:34:44 -07:00
Blaise Tine
efc8834c75 xilinx afu reset refactoring 2024-09-05 21:32:25 -07:00
Blaise Tine
8db77ea1cd minor updates 2024-09-05 21:29:01 -07:00
Blaise Tine
cf9172b8fc minor update 2024-09-04 20:16:54 -07:00
Blaise Tine
fb0cd1c272 minor update 2024-09-04 18:24:42 -07:00
Blaise Tine
0aaca84016 minor update 2024-09-04 18:22:37 -07:00
Blaise Tine
8d1baf677d minor update 2024-09-04 18:17:27 -07:00
Blaise Tine
37555b1208 minor update 2024-09-04 15:18:39 -07:00
Blaise Tine
96fb3566a9 minor update 2024-09-04 13:44:23 -07:00
Blaise Tine
7ca9a5e87e reset relay refactory 2024-09-04 13:39:51 -07:00
Blaise Tine
039e5e2ffc minor update 2024-09-04 03:52:55 -07:00
Blaise Tine
32738e0b74 CI script update 2024-09-04 03:39:29 -07:00
Blaise Tine
fd5903fef1 minor update 2024-09-04 03:34:25 -07:00
Blaise Tine
335b53475a minor updates 2024-09-04 02:01:59 -07:00
Blaise Tine
f9230bdac3 minor update 2024-09-03 06:14:09 -07:00
Blaise Tine
19d6142023 fixed fpu serialization 2024-09-03 04:54:29 -07:00
Blaise Tine
c28449f515 minor update 2024-09-02 21:58:12 -07:00
Blaise Tine
45ed8abf22 minor update 2024-09-02 19:39:28 -07:00
Blaise Tine
d16aee3ecd minor update 2024-09-02 10:37:51 -07:00
Blaise Tine
c4df7221c6 Merge branch 'master' of https://github.com/vortexgpgpu/vortex into develop 2024-09-02 04:13:35 -07:00
Blaise Tine
33bec667c2 minor update 2024-09-02 04:12:58 -07:00
Blaise Tine
a17580375b fpu timing optimization 2024-09-02 03:11:26 -07:00
Blaise Tine
40e04a409e adding PE switch 2024-09-02 02:34:08 -07:00
Blaise Tine
d7eae0c886 minor update 2024-09-02 02:33:30 -07:00
Blaise Tine
32636fac70 minor update 2024-09-01 10:15:02 -07:00
Blaise Tine
8215089194 minor update 2024-09-01 04:03:46 -07:00
Blaise Tine
d979cf277f decoder logic specialization 2024-09-01 04:00:57 -07:00
Blaise Tine
72c63a47f3 adding read-first mode support to block ram 2024-09-01 01:19:24 -07:00
Blaise Tine
431c0cfc46 minor update 2024-08-31 02:14:08 -07:00
Blaise Tine
83ea236b84 minor update 2024-08-31 01:58:21 -07:00
Blaise Tine
01fedb066c minor updates 2024-08-31 01:57:08 -07:00
Blaise Tine
7d0c141129 minor updates 2024-08-31 01:44:41 -07:00
Blaise Tine
6eee0728fb minor update 2024-08-29 03:22:09 -07:00
Blaise Tine
fc5bb387a2 minor update 2024-08-29 03:02:50 -07:00
Blaise Tine
961b9c3d63 minor update 2024-08-29 02:41:36 -07:00
Blaise Tine
5f2bf2418b minor update 2024-08-29 02:40:54 -07:00
Blaise Tine
847dee3473 minor update 2024-08-29 01:30:54 -07:00
Blaise Tine
105f884129 migration from fpnew to latest cvfpu core to resolve fpnew bugs and feature limitations 2024-08-29 00:48:51 -07:00
Blaise Tine
fa1fd39645 minor updates 2024-08-28 21:31:09 -07:00
Blaise Tine
a38960674e SimX split.N fix 2024-08-28 21:10:05 -07:00
Blaise Tine
0f41774fea SimX's decode minor fix 2024-08-28 19:07:15 -07:00
Blaise Tine
41e41c9688 adjust SimX's split/join to match RTL. 2024-08-28 18:46:30 -07:00
Blaise Tine
74a47ebbe4 displatch unit fix 2024-08-28 04:36:13 -07:00
Blaise Tine
6c1e785004 minor update 2024-08-28 03:08:08 -07:00
Blaise Tine
4cc7426c44 minor update 2024-08-28 02:52:20 -07:00
Blaise Tine
cf42025c20 minor update 2024-08-28 01:35:55 -07:00
Blaise Tine
f4426e0127 fpu timing optimization 2024-08-28 01:27:51 -07:00
Blaise Tine
91b8c6e67a fixed xilinx fpu ip dut synthesis 2024-08-28 00:40:28 -07:00
Blaise Tine
c162d04b8f minor update 2024-08-27 03:17:01 -07:00
Blaise Tine
4480ed8b0e minor update 2024-08-27 01:19:02 -07:00
Blaise Tine
5adfd5ec68 minor update 2024-08-26 23:45:00 -07:00
Blaise Tine
6d5e71a062 minor update 2024-08-25 20:12:05 -07:00
Blaise Tine
9718a5b405 fpu timing optimization 2024-08-25 19:20:07 -07:00
Blaise Tine
51719f69bb minor update 2024-08-25 16:51:00 -07:00
Blaise Tine
2ca3439109 xrt runtime update 2024-08-25 15:52:27 -07:00
Blaise Tine
088aed022f minor update 2024-08-25 15:52:17 -07:00
Blaise Tine
df3fc150f4 minor update 2024-08-25 06:06:52 -07:00
Blaise Tine
b40441b68f minor update 2024-08-25 05:12:44 -07:00
Blaise Tine
bdcc5f5991 FPU decode optimization 2024-08-25 05:11:48 -07:00
Blaise Tine
b6879b25e3 switching to python3 dependency 2024-08-24 20:46:25 -07:00
Blaise Tine
592297582e fpu_unit timing optimization 2024-08-24 19:44:03 -07:00
Blaise Tine
e538dfa316 minor update 2024-08-24 19:11:06 -07:00
Blaise Tine
e05fe0d75b dispatch_unit speed up 2024-08-24 18:11:06 -07:00
Blaise Tine
383dc1f6b8 timing optimization 2024-08-24 17:38:01 -07:00
Blaise Tine
3b336d7fb3 register vs combinational signals naming consistency 2024-08-24 16:59:18 -07:00
Blaise Tine
4570a20eee minor update 2024-08-24 12:15:12 -07:00
Blaise Tine
10a8705161 minor update 2024-08-24 10:42:48 -07:00
Blaise Tine
1f5cc53434 minor update 2024-08-24 09:16:23 -07:00
Blaise Tine
0ed589a3bf minor update 2024-08-24 07:49:08 -07:00
Blaise Tine
cd97945d0d minor update 2024-08-24 04:51:27 -07:00
Blaise Tine
31a5ab714e xbar timing optimitzaion 2024-08-24 01:57:45 -07:00
Blaise Tine
370daf1025 fifo refactoring 2024-08-24 01:56:56 -07:00
Blaise Tine
bcf7d9f960 timing optimization 2024-08-24 01:56:14 -07:00
Blaise Tine
ade6b2c985 timing optimization 2024-08-24 01:55:25 -07:00
Blaise Tine
4f9b15d96d minor update 2024-08-24 01:54:17 -07:00
Hanran Wu
f57841608e Merge branch 'vortex_vm_rebased' into vortex_vm 2024-08-23 17:45:59 -04:00
Hanran Wu
35c15f554d Merge branch 'mranduril-vortex_vm_rebased' into vortex_vm 2024-08-23 17:45:03 -04:00
Hanran Wu
ea9560b33b merge 2024-08-23 17:44:24 -04:00
Hanran Wu
86b0bdd93c merge into vortex_vm 2024-08-23 17:20:42 -04:00
Hanran Wu
66fd2d4e2d update ci 2024-08-23 16:42:31 -04:00
Blaise Tine
6eeb8eac0f minor update 2024-08-23 00:54:48 -07:00
Blaise Tine
df99b9da0e minor update 2024-08-22 16:29:27 -07:00
sij814
7ae7ffa007 pulled master and made initial changes 2024-08-22 18:37:34 +02:00
Blaise Tine
e4bfa47895 adding test coverage for xilinx synthesis 2024-08-22 02:51:17 -07:00
Blaise Tine
ca3499f3df minor update 2024-08-21 17:54:30 -07:00
Blaise Tine
811ceb5dc0 minor update 2024-08-21 13:00:05 -07:00
Blaise Tine
177f0efc59 minor update 2024-08-21 03:39:09 -07:00
Blaise Tine
9797c6c48a minor udpate 2024-08-21 03:38:15 -07:00
Blaise Tine
771a10ea0c minor update 2024-08-20 23:31:16 -07:00
Blaise Tine
005d480bb4 minor updates 2024-08-20 23:30:44 -07:00
Blaise Tine
5e241c153c Ci script update 2024-08-19 18:36:37 -07:00
Blaise Tine
693a9f648d Ci script update 2024-08-19 18:25:38 -07:00
Blaise Tine
1814ff6d40 xilinx standalone synthesis fixes 2024-08-18 22:02:37 -07:00
Blaise Tine
2762bd53ff minor updates 2024-08-18 18:56:17 -07:00
Blaise Tine
8e9026524a synthesis of the memory unit and local memory 2024-08-18 16:03:59 -07:00
Blaise Tine
3612ceda80 minor update 2024-08-18 02:13:43 -07:00
Blaise Tine
a2b24b4ed0 xilinx non-xrt synthesis fixes 2024-08-18 02:10:34 -07:00
Blaise Tine
de47307428 minor update 2024-08-18 01:57:36 -07:00
Blaise Tine
06ef53025d minor update 2024-08-17 21:19:10 -07:00
tinebp
6c607d32fe
Merge pull request #169 from sij814/simx
simx HBM initial implementation
2024-08-17 20:24:37 -07:00
Blaise Tine
f6daf9bb84 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-08-17 19:10:29 -07:00
tinebp
adcad92a73 extending OS support 2024-08-17 19:09:02 -07:00
Blaise Tine
51862dbc06 doc update 2024-08-17 19:05:47 -07:00
Blaise Tine
9d3d35c6b4 operands timing optimization 2024-08-17 16:03:02 -07:00
Blaise Tine
b6663eaff9 output register fix 2024-08-17 15:49:49 -07:00
Blaise Tine
a03471837c minor update 2024-08-17 15:21:13 -07:00
Blaise Tine
9638f5a6e6 minor update 2024-08-17 06:05:26 -07:00
Blaise Tine
62a4ee7a3e minor update 2024-08-17 05:32:21 -07:00
Blaise Tine
1f43d4a2fc ASE simulation fixes + docs update 2024-08-17 04:55:32 -07:00
Blaise Tine
8fe02093e2 minor udpate 2024-08-17 04:11:16 -07:00
Blaise Tine
20b82fd34d update configure to deep-copy syn directory tree 2024-08-17 04:09:50 -07:00
Blaise Tine
4b6f8efeaa removing trace_pkg to fix unsupported package dependencies 2024-08-17 04:07:10 -07:00
Blaise Tine
9fc9b43307 OPAE runtime bug fix 2024-08-17 02:18:04 -07:00
Blaise Tine
304761c6fc fixed blackbox temp driver mode with --rebuild=3 2024-08-16 22:32:35 -07:00
sij814
e34e4b790a forced memory bank change in opae 2024-08-16 16:53:18 -07:00
sij814
7a61b67170 added CAPS 2024-08-16 15:47:03 -07:00
Blaise Tine
f6ed49f19c minor update 2024-08-16 08:19:55 -07:00
Blaise Tine
d5fa26350c minor update 2024-08-16 01:35:20 -07:00
sij814
a523afbebe removed jammy 2024-08-15 22:30:32 -07:00
Blaise Tine
b83190c6e1 minor update 2024-08-15 21:29:06 -07:00
Blaise Tine
f4983cb380 core memory unit refactoring 2024-08-15 21:12:28 -07:00
Blaise Tine
65bd9afabb reset relay cleanup 2024-08-15 20:35:07 -07:00
Hanran Wu
54045fa05b skip build and tests ci stages for vm_disable due to verilator dependency 2024-08-15 23:04:08 -04:00
Hanran Wu
bc936c67a3 update ci 2024-08-15 23:02:03 -04:00
Hanran Wu
4a213e7c20 update readme 2024-08-15 23:00:14 -04:00
Hanran Wu
26df47d6e2 add a subset of tests for vm and update ci 2024-08-15 22:55:29 -04:00
sij814
d7e8fd74ff source_id = 0 2024-08-15 19:40:52 -07:00
Blaise Tine
49738672ec minor update 2024-08-15 19:34:50 -07:00
Hanran Wu
48ff4ee4e0 add VM_ENABLE flag to configure&compilation 2024-08-15 16:34:36 -04:00
Blaise Tine
aaff18cca2 bug fix 2024-08-15 05:11:51 -07:00
Blaise Tine
2b22d47dd9 minor update 2024-08-15 05:11:19 -07:00
Blaise Tine
98db249500 minor updates 2024-08-15 01:56:31 -07:00
Blaise Tine
9c346dee86 read-only cache optimization 2024-08-15 01:55:22 -07:00
Blaise Tine
58e5435f0f a priority arbiter performs better than round-robin during commit arbitration 2024-08-13 22:30:54 -07:00
Blaise Tine
cfb5cd5326 arbiter runtime assertion 2024-08-13 21:39:08 -07:00
Blaise Tine
aef1411af5 scoreboard timing optimization 2024-08-13 21:38:33 -07:00
tinebp
e23d569076
Merge pull request #171 from dhy2000/master
Same as #170
2024-08-13 18:48:07 -07:00
Blaise Tine
d6f1393627 memory coalescer timing optimization 2024-08-13 18:34:06 -07:00
sij814
ea34239b43 changes made for initial feedback 2024-08-13 16:52:27 -07:00
Hanran Wu
7528dd9c0f debug and remove travis.yml 2024-08-13 18:18:54 -04:00
Hanran Wu
19b5496f00 modify makefile to only compile simx 2024-08-13 17:54:06 -04:00
Blaise Tine
ee39da74b4 increasing reset delay 2024-08-13 04:14:02 -07:00
donghanyuan
1a9a04ac76 replace local static allocator to global static
Ensure MemoryPool construct before SimPlatform,
thus MemoryPool destruct after SimPlatform.

Avoid use-after-free issue clearing events_ of SimPlatform
after SimPortEvent's allocator is destructed.
2024-08-13 18:13:41 +08:00
Blaise Tine
76f4cd66d3 minor update 2024-08-13 03:08:48 -07:00
Blaise Tine
3ae3afc59b minor update 2024-08-12 21:34:41 -07:00
Blaise Tine
5126a7c472 minor update 2024-08-12 21:32:20 -07:00
Blaise Tine
6c1ee9bfea arbiter fixes 2024-08-12 20:08:08 -07:00
Blaise Tine
14ae4b8c13 minor update 2024-08-12 20:07:50 -07:00
Blaise Tine
2edda834c3 minor update 2024-08-12 18:11:21 -07:00
sij814
47427ab22e regression test with source_id 0 2024-08-12 16:22:30 -07:00
Blaise Tine
d74ee43a66 minor update 2024-08-12 14:19:09 -07:00
Blaise Tine
79362dea4b minor update 2024-08-12 14:01:11 -07:00
Blaise Tine
9053919e92 fixed synthesis warning 2024-08-12 05:24:46 -07:00
Blaise Tine
ed66ee2806 arbitration update 2024-08-12 04:09:56 -07:00
sij814
bab9496117 debugging segmentation fault with 8 clusters 2024-08-12 03:52:48 -07:00
sij814
de81baaabf hbm for vortex 2.2 2024-08-12 02:52:47 -07:00
Blaise Tine
6f3add273d elastic buffer lutram refactoring 2024-08-11 20:28:39 -07:00
Blaise Tine
1fb0691bc7 minor update 2024-08-11 19:50:31 -07:00
sij814
c94c3651ec configure change 22.04 2024-08-11 14:47:43 -07:00
Blaise Tine
8fb73b6da7 fair arbiter optimization 2024-08-10 22:11:49 -07:00
Blaise Tine
32a882e26f arbiters optimization 2024-08-10 18:41:10 -07:00
Blaise Tine
eaa7ed7fe2 rtl arbiter update 2024-08-10 02:38:54 -07:00
Blaise Tine
c8d0357ac6 rtl arbiter fixes 2024-08-10 00:37:56 -07:00
Blaise Tine
229641441f adding static assertion 2024-08-09 18:13:52 -07:00
Blaise Tine
42afa2472f cdiv 2024-08-09 18:11:12 -07:00
Blaise Tine
455fc8389c refactoring priority encoder 2024-08-09 13:58:19 -07:00
Blaise Tine
ab21f76aed minor update 2024-08-07 19:44:24 -07:00
Blaise Tine
f1e79f4c0f fixed toolchain install on centos/7 2024-08-07 19:44:04 -07:00
tinebp
932c435a20
Merge pull request #101 from dhy2000/master
fix #100: change return type to float
2024-08-07 18:09:44 -07:00
tinebp
aad3b26332
Merge branch 'master' into master 2024-08-07 18:09:27 -07:00
Blaise Tine
30ebb65fc3 minor update 2024-08-06 23:36:37 -07:00
Blaise Tine
0d7012e69e minor update 2024-08-06 21:27:08 -07:00
Blaise Tine
bddf276335 memory request flags refactoring 2024-08-06 19:05:22 -07:00
Blaise Tine
e86eeab8ea Merge branch 'master' of https://github.com/vortexgpgpu/vortex 2024-08-06 17:25:35 -07:00
Blaise Tine
7cdfac8ea1 fixed kernel lib dependency 2024-08-06 17:20:01 -07:00
tinebp
32497e19df
Merge pull request #166 from JacobLevinson/stencil3d-fixes
Updated prints and code spacing for stencil3d test
2024-08-06 14:32:18 -07:00
Blaise Tine
df8355ac76 fixed minor typo 2024-08-06 13:11:28 -07:00
Blaise Tine
c265ff97b8 minor updates 2024-08-06 12:58:58 -07:00
Blaise Tine
d276875ab9 fixed memory block size configuration 2024-08-06 12:47:05 -07:00
Blaise Tine
50b12ef754 fixed memory block size configuration 2024-08-06 12:46:19 -07:00
Blaise Tine
0096e60f03 Making LUT optimization optional 2024-08-06 12:38:30 -07:00
Blaise Tine
9dcb377b67 Moving from one-hot to binary muxing optimization
FPGA synthesis is suboptimal with one-hot muxing, particularly Xilinx Vivado.
This change fixed Xilinx synthesis for 256-thread cores.
2024-08-06 12:32:02 -07:00
Jacob Levinson
fbedc567e5 Updated prints and code spacing 2024-08-04 23:39:13 -07:00
Blaise Tine
b81ae8e431 reset network cleanup 2024-08-04 22:50:28 -07:00
Blaise Tine
42c62001ec fair arbiter speed optimization 2024-08-04 22:13:47 -07:00
Blaise Tine
e663db9b5a Merge branch 'develop' 2024-08-04 14:17:08 -07:00
Blaise Tine
9ddb06bf56 Merge branch 'master' of https://github.com/vortexgpgpu/vortex 2024-08-04 14:16:22 -07:00
Blaise Tine
59108525e1 minor update 2024-08-04 14:16:08 -07:00
Blaise Tine
74579fd4dc minor update 2024-08-04 14:13:26 -07:00
Blaise Tine
668b590876 minor update 2024-08-03 18:25:24 -07:00
Blaise Tine
cb1e49d3f6 minor update 2024-08-03 17:08:16 -07:00
Blaise Tine
4b6a48c716 minor update 2024-08-03 13:37:01 -07:00
Blaise Tine
07981a585c minor update 2024-08-03 13:00:34 -07:00
Blaise Tine
4b93c9ffb5 minor updates 2024-08-03 11:49:12 -07:00
Blaise Tine
35fb50f9a6 minor updates 2024-08-03 10:43:08 -07:00
Blaise Tine
fce935f1c4 add debug level to FPGA makefile 2024-08-03 00:54:58 -07:00
Blaise Tine
fc0392e5e3 fixed typo 2024-08-03 00:54:17 -07:00
Blaise Tine
d09bce011b local memory test update 2024-08-03 00:52:41 -07:00
Blaise Tine
4c1b3fd88d local memory area optimization 2024-08-03 00:10:06 -07:00
Blaise Tine
52c5f1ff6b minor update 2024-08-02 23:32:34 -07:00
tinebp
3d51b8bcfd
Merge pull request #165 from JacobLevinson/fix-incomplete-type-error
Fix invalid use of incomplete type error by including <array>
2024-08-02 19:52:23 -07:00
Blaise Tine
76f74b8a59 minor update 2024-08-02 19:50:34 -07:00
Blaise Tine
e8cdae1225 minor fix in VX_local_mem.sv 2024-08-02 19:19:57 -07:00
Blaise Tine
067b7a8726 fixed typo 2024-08-02 18:57:07 -07:00
Jacob Levinson
3b81a32b12 Fix invalid use of incomplete type error by including <array> 2024-08-02 18:25:56 -07:00
Blaise Tine
410c47e2ae adding out_buf to VX_pe_serializer + testing 2024-08-02 18:16:50 -07:00
Blaise Tine
f723e7baf5 registering local memory bram output 2024-08-02 18:15:08 -07:00
Blaise Tine
9c5aee5e25 bram reset fix 2024-08-02 18:13:58 -07:00
Blaise Tine
c1b8ecfd1a block ram reset refactoring 2024-08-02 16:39:40 -07:00
Blaise Tine
16c209ac0c fixed operand collector critical path 2024-08-02 16:23:36 -07:00
tinebp
09028d8cee
Merge pull request #144 from nayannair/tensor-core
dummy commit
2024-08-02 15:45:18 -07:00
tinebp
c00368b7c6
Merge pull request #159 from JacobLevinson/readme_fixes
Fixes readme by removing $ from shell commands + small tweaks
2024-08-02 15:41:17 -07:00
Blaise Tine
3075c1737b fixed bug in VX_onehot_encoder.sv (see issue #126) 2024-08-02 15:12:10 -07:00
Blaise Tine
29c5a28273 minor update 2024-08-02 00:36:10 -07:00
Blaise Tine
e53b295eea writeback cache fixes 2024-07-31 20:53:40 -07:00
Blaise Tine
0a3035e6a7 minor update 2024-07-31 14:51:34 -07:00
Blaise Tine
81251b1af8 minor update 2024-07-31 13:55:44 -07:00
Blaise Tine
ef5d58dc9e cache regression tests 2024-07-31 11:45:51 -07:00
Blaise Tine
4dc34cfd2d hw arbitration update 2024-07-31 10:52:57 -07:00
Blaise Tine
3fe8f963aa writeback cache fixes 2024-07-31 02:20:32 -07:00
Blaise Tine
fc50b66819 regression script update 2024-07-31 00:16:13 -07:00
Blaise Tine
609155e490 fixed CSV trace converter 2024-07-31 00:15:32 -07:00
Blaise Tine
516ce43a5c testing writeback cache 2024-07-30 22:21:10 -07:00
Blaise Tine
2e77c9eec2 writeback cache fixes 2024-07-30 22:14:06 -07:00
Blaise Tine
95ca49a85f writeback cache fixes 2024-07-30 20:38:06 -07:00
Blaise Tine
e1c5b5277e minor update 2024-07-30 17:55:21 -07:00
Blaise Tine
029609b3fd disable atexit() support, not needed for static kernels. 2024-07-30 14:47:08 -07:00
Blaise Tine
2bc8a881b6 fixed trace log formatting 2024-07-30 12:05:36 -07:00
Jaewon Lee
9cc3e0a459
Merge pull request #151 from mranduril/vortex_vm
Add virtual memory allocator for vortex vm
2024-07-30 13:59:14 -04:00
Blaise Tine
abf8d2c51a minor update 2024-07-30 05:59:50 -07:00
Blaise Tine
6e55840a32 minor update 2024-07-30 03:32:49 -07:00
Blaise Tine
047960ac4d minor update 2024-07-30 02:51:12 -07:00
Blaise Tine
54e6421854 minor update 2024-07-30 02:42:25 -07:00
Blaise Tine
f46b764748 minor update 2024-07-30 01:59:50 -07:00
Blaise Tine
99cbae1820 writeback cache deadlock fix 2024-07-30 01:55:32 -07:00
Blaise Tine
edf960d9ed writeback cache fixes 2024-07-30 00:58:31 -07:00
Blaise Tine
22b0525c51 writeback cache fixes 2024-07-30 00:06:44 -07:00
Hanran Wu
34f7e3c982 config ramulator2 2024-07-30 00:18:28 -04:00
Blaise Tine
5600a8dd42 writeback cache fixes 2024-07-29 19:49:12 -07:00
Blaise Tine
bce5226614 minor update 2024-07-29 17:07:01 -07:00
Blaise Tine
2a9895c337 minor update 2024-07-29 16:20:23 -07:00
Blaise Tine
eab1791d46 CI script update 2024-07-29 16:07:46 -07:00
Blaise Tine
724cb40849 minor update 2024-07-29 16:01:08 -07:00
Blaise Tine
a62e651b02 minor update 2024-07-29 15:51:52 -07:00
Blaise Tine
3223a40a76 Verilator optimization flags update 2024-07-29 14:58:35 -07:00
Blaise Tine
8457163114 adding dirty bytes configuration to writeback cache 2024-07-29 14:43:49 -07:00
Blaise Tine
a91dabcc72 minor update 2024-07-29 13:52:04 -07:00
Blaise Tine
0709d656ca writeback cache fixes 2024-07-29 13:32:35 -07:00
Jaewon Lee
30258c04d2 Apply suggestions from code review
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-29 16:29:39 -04:00
Jaewon Lee
9db3870309 Update runtime/simx/vortex.cpp
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-29 16:29:39 -04:00
Jaewon Lee
34ef500910 Update runtime/simx/vortex.cpp
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-29 16:29:39 -04:00
Jaewon Lee
735b713613 Update runtime/simx/vortex.cpp
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-29 16:29:39 -04:00
Jaewon Lee
8d978f23ce Update runtime/simx/vortex.cpp
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-29 16:29:39 -04:00
Hanran Wu
78fc053ad5 save work before pull 2024-07-29 16:29:39 -04:00
Hanran Wu
6add1e16f6 debugged virtual memory allocator 2024-07-29 16:29:39 -04:00
Hanran Wu
49255bfa69 add virtual mem allocator addr spacereservation 2024-07-29 16:29:39 -04:00
Hanran Wu
31133ae6e9 update destructor of vx_device 2024-07-29 16:29:39 -04:00
Hanran Wu
7916684c36 vpn allocator debug complete, now pass demo&vecadd tests 2024-07-29 16:29:39 -04:00
Hanran Wu
aa45f55126 vpn allocator added but doesn't pass any tests 2024-07-29 16:29:39 -04:00
Jaewon Lee
5877cfe8ae Change STARTUP_ADDR from 0x40000000 to 0x80000000(32b) and 0x180000000(64b) 2024-07-29 16:29:39 -04:00
Jaewon Lee
52233fe13a fixed compile error 2024-07-29 16:29:39 -04:00
Jaewon Lee
6d480b3da1 satp_ is not set, then we skip VAT 2024-07-29 16:29:39 -04:00
Jaewon Lee
2e61dad11f Update README.md
Update TOOLDIR to vortex-toolchain-2024-6-14/
2024-07-29 16:29:39 -04:00
Jaewon Lee
c99e4b37b6 Update README.md 2024-07-29 16:29:02 -04:00
Jaewon Lee
3a5278a62e 64bit support 2024-07-29 15:31:47 -04:00
Jaewon Lee
e21bf9afbd Merge Vortex 2.2 2024-07-29 15:31:17 -04:00
Jaewon Lee
9942f251e0 remove # 2024-07-29 15:31:17 -04:00
Jaewon Lee
da9c51aa3f Virtual Memory Support 2024-07-29 15:31:17 -04:00
Jaewon Lee
7b80da2538 Update upload and download function in simx runtime 2024-07-29 15:31:17 -04:00
Jaewon Lee
53c547f9de Change the declaration of set_processor_satp function 2024-07-29 15:31:17 -04:00
Jaewon Lee
43a90071e1 Merge Austin's code (Preliminary) 2024-07-29 15:31:17 -04:00
Jaewon Lee
2662b6bcab Update README.md 2024-07-29 15:31:17 -04:00
Jaewon Lee
da1f4baa5d Update README.md 2024-07-29 15:29:59 -04:00
Hanran Wu
768c966681 expand MemoryUnit class defs and add some tlb-related functions 2024-07-29 15:29:59 -04:00
Jaewon Lee
ae312f9022 Update README.md 2024-07-29 15:29:36 -04:00
Jaewon Lee
e20a610e67 Update README.md 2024-07-29 15:29:20 -04:00
Hanran Wu
e7660b6ffe Merge branch 'vortex_vm' of https://github.com/mranduril/vortex into vortex_vm
add changes from pull request reviews
2024-07-29 14:35:20 -04:00
Hanran Wu
de66a1b861 save work before pull 2024-07-29 14:35:11 -04:00
Blaise Tine
e34f824bf9 minor update 2024-07-29 03:56:08 -07:00
Blaise Tine
75f1f957d4 minor updates 2024-07-29 03:28:51 -07:00
Blaise Tine
96831c8b89 writeback cache fixes 2024-07-29 03:11:33 -07:00
Blaise Tine
03e21924f4 Verilator bug workaround
This was causing a buffer overflow, ignoring range checks
2024-07-29 00:28:07 -07:00
Blaise Tine
2e060faaf4 reverting uuid format to ease file diff 2024-07-29 00:05:51 -07:00
Blaise Tine
48b1ab7494 fixed uuid format 2024-07-28 18:03:34 -07:00
Blaise Tine
382b686d59 reset GRPs only in debug mode 2024-07-28 17:40:03 -07:00
Blaise Tine
160c428ef5 fixed uuid format 2024-07-28 17:29:15 -07:00
Blaise Tine
7f99007568 CI script update 2024-07-28 13:17:14 -07:00
Blaise Tine
bad280ae80 testing writeback cache 2024-07-28 12:48:01 -07:00
Blaise Tine
7cc6df7e7c CI script fix 2024-07-28 12:36:45 -07:00
Blaise Tine
fcf9c13d5f add completion job to CI 2024-07-28 12:24:55 -07:00
Blaise Tine
ca5232b58d writeback cache fix 2024-07-28 11:12:48 -07:00
Blaise Tine
4cd48193da minor update 2024-07-27 22:40:40 -07:00
Blaise Tine
4c09d107c3 writeback cache bug fix 2024-07-27 22:23:02 -07:00
Blaise Tine
69126dfd35 SimX writeback configuration 2024-07-27 17:25:13 -07:00
Blaise Tine
904a6dc136 fixed trace format consistency 2024-07-27 17:24:14 -07:00
Blaise Tine
81b17169ab minor update 2024-07-27 14:35:52 -07:00
Blaise Tine
f5014e8975 Adding support for cache flush and writeback
Crediting Yi-Lin Tsai's original work at https://github.com/richardyilin/GPU_writeback
2024-07-27 13:57:36 -07:00
Blaise Tine
a5bde3693f minor update 2024-07-27 13:41:44 -07:00
Blaise Tine
c8455eb562 minor update 2024-07-27 01:35:07 -07:00
Jacob Levinson
bdbe22ff4d Capitalize S in "set enviroment vairables" 2024-07-25 16:40:19 -07:00
Jacob Levinson
2b481024bb Fixes readme by removing $ from shell commands + small tweaks
Removed $ from all the shell commands so that they can be easily
1-click copy-pasted from github without the dollar sign, as well
as changed "cd Vortex" to "cd vortex" to match the actual directory
spelling. Also removed obsolete travis ci link as the project
has moved to github ci.
2024-07-25 16:35:43 -07:00
Blaise Tine
fe8ab30345 minor update 2024-07-25 13:20:57 -07:00
Blaise Tine
ed0171bcc7 updated documentation for both altera and xilinx FPGA setup 2024-07-25 13:17:09 -07:00
Blaise Tine
20ce870b1b minor updates to opae runtime 2024-07-25 12:38:17 -07:00
Blaise Tine
b9328754bc ramulator build fix 2024-07-24 23:27:16 -07:00
Blaise Tine
b7594edf74 minor update 2024-07-24 20:54:03 -07:00
Blaise Tine
5b9d01a421 Yosys synthesis fixes 2024-07-24 20:16:54 -07:00
Blaise Tine
01187795d0 Merge branch 'master' of https://github.com/vortexgpgpu/vortex 2024-07-24 16:30:41 -07:00
Blaise Tine
3de14dd8bf Verilator crash workaround 2024-07-24 16:09:27 -07:00
Blaise Tine
2773b87ae5 minor update 2024-07-24 15:38:49 -07:00
Blaise Tine
5457cab5d1 fixed memory scheduler bug 2024-07-24 15:37:55 -07:00
tinebp
f4376e2c4a
Merge pull request #155 from JacobLevinson/stencil3d
Added Stencil3d regression test
2024-07-24 08:59:42 -07:00
Jacob Levinson
e42c7c6a82
Merge branch 'master' into stencil3d 2024-07-23 21:01:10 -07:00
Blaise Tine
31663aa7ca Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-07-23 16:01:27 -07:00
Blaise Tine
b859d0c088 Dockerfile update 2024-07-23 16:01:00 -07:00
tinebp
3a5bdd4cf0 CI scritp fixes 2024-07-23 15:43:36 -07:00
Blaise Tine
ab761946e5 CI script fixes 2024-07-23 15:39:05 -07:00
Blaise Tine
aec73aa758 Docker update 2024-07-23 15:23:46 -07:00
Blaise Tine
8df962d6b4 Merge remote-tracking branch 'wb/l1_writeback' into develop 2024-07-23 14:20:09 -07:00
Blaise Tine
c5c4ccdd95 CI workflow optimization 2024-07-23 13:15:34 -07:00
Blaise Tine
b3a4d58825 fixed lsu stats 2024-07-23 13:13:51 -07:00
Blaise Tine
99f114aba3 minor updates 2024-07-23 13:13:24 -07:00
Blaise Tine
7808a0a3e3 change default configure tooldir ~/tools 2024-07-23 13:10:44 -07:00
Blaise Tine
eb92e0bdbe fixed simx lsu-unit bug 2024-07-23 11:29:56 -07:00
Blaise Tine
60f7786e17 BitVector class bug fixes 2024-07-23 09:40:20 -07:00
Blaise Tine
d39f3f688a runtime_assert 2024-07-23 01:13:25 -07:00
Blaise Tine
95f59d23a8 simx memory coalescer bug fix 2024-07-23 00:02:43 -07:00
Jacob Levinson
b489cc7abd Added *.cache to gitignore 2024-07-22 20:46:28 -07:00
Jacob Levinson
cd94288e05 Added all files for stencil3d regression test 2024-07-22 20:46:28 -07:00
Jacob Levinson
2f723c4afd Adjusted regression Makefile to include new test 2024-07-22 20:46:28 -07:00
Blaise Tine
e7b2bb81b4 stream pack optimization 2024-07-22 19:35:40 -07:00
Blaise Tine
d85661420b minor update 2024-07-22 19:35:20 -07:00
Blaise Tine
fbe236037e opencl warning fixes 2024-07-22 10:08:25 -07:00
Blaise Tine
9c43bae333 Regression fixes 2024-07-22 04:45:08 -07:00
Blaise Tine
530ec638a7 regression fixes 2024-07-22 04:29:34 -07:00
Blaise Tine
24e8e91a94 DramSim fix 2024-07-22 03:37:10 -07:00
Blaise Tine
527910b31e regression fixes 2024-07-22 00:29:56 -07:00
Blaise Tine
178f4d67fa minor update 2024-07-21 23:39:47 -07:00
Blaise Tine
a5377d78ca minor update 2024-07-21 14:50:59 -07:00
Blaise Tine
31e31ac528 minor updates 2024-07-21 11:37:34 -07:00
Blaise Tine
0bc459d84e minor update 2024-07-21 11:20:17 -07:00
Blaise Tine
7057103deb minor updates 2024-07-21 10:37:50 -07:00
Blaise Tine
cff6e320b2 CI script fixes 2024-07-21 10:23:01 -07:00
Blaise Tine
01b81830bf CI scripts fixes 2024-07-21 09:04:31 -07:00
Blaise Tine
1981179995 CI scripts fixes 2024-07-21 09:03:37 -07:00
Blaise Tine
e778015ab9 CI script fixes 2024-07-21 08:56:53 -07:00
Blaise Tine
cf3a77ab1b CI script fixes 2024-07-21 08:55:13 -07:00
Blaise Tine
92390187ff minor update 2024-07-21 08:14:47 -07:00
Blaise Tine
920da584af CI script fixes 2024-07-21 08:02:14 -07:00
Blaise Tine
71c2bab682 CI script fixes 2024-07-21 07:40:37 -07:00
Blaise Tine
75782f49da CI script fixes 2024-07-21 07:24:55 -07:00
Blaise Tine
ef38dc9744 CI script load/balancing 2024-07-21 07:14:05 -07:00
Blaise Tine
fb141ae522 Ramulator 2.0 with HBM 2.0 support
Verilator 5.0 support
SimX C++17 requirement
2024-07-21 06:57:13 -07:00
Blaise Tine
ca474c39b9 CI script fixes 2024-07-20 23:25:51 -07:00
Blaise Tine
e7a3b52d2a CI script fixes 2024-07-20 22:16:00 -07:00
Blaise Tine
0cb75c67c0 CI script fixes 2024-07-20 21:55:36 -07:00
Blaise Tine
80b79d4d31 CI script fixes 2024-07-20 21:40:44 -07:00
Blaise Tine
be935420f0 CI script fixes 2024-07-20 21:01:15 -07:00
Blaise Tine
198c3b92ff CI script fixes 2024-07-20 20:36:36 -07:00
Blaise Tine
f637219fc7 CI script fixes 2024-07-20 20:10:50 -07:00
Blaise Tine
5cb8ba5e92 CI script fixes 2024-07-20 19:49:12 -07:00
Blaise Tine
8feb8b191c CI script fixes 2024-07-20 19:28:42 -07:00
Blaise Tine
9f8e02824f CI script fixes 2024-07-20 19:19:39 -07:00
Blaise Tine
e4060fee3a CI script fixes 2024-07-20 18:57:02 -07:00
Blaise Tine
c706d410bf CI debuuging... 2024-07-20 18:39:49 -07:00
Blaise Tine
a9f122bd9c CI script fixes 2024-07-20 18:01:56 -07:00
Blaise Tine
f5956b2eb7 CI script fixes 2024-07-20 14:24:57 -07:00
Jaewon Lee
90b4a16c9b
Apply suggestions from code review
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-20 11:20:27 -04:00
Jaewon Lee
c3e657f201
Update runtime/simx/vortex.cpp
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-20 10:39:40 -04:00
Jaewon Lee
0f8e5505d3
Update runtime/simx/vortex.cpp
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-20 10:36:58 -04:00
Jaewon Lee
a23fb26a8b
Update runtime/simx/vortex.cpp
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-20 10:34:24 -04:00
Jaewon Lee
a4ee8dfa7f
Update runtime/simx/vortex.cpp
Co-authored-by: Martin Troiber <34752929+troibe@users.noreply.github.com>
2024-07-20 10:25:33 -04:00
Blaise Tine
35f561b0e9 minor update 2024-07-20 04:49:56 -07:00
Blaise Tine
0e15f69838 CI fixes 2024-07-20 02:06:34 -07:00
Blaise Tine
3eaa6e4e55 travis to github workflow migration 2024-07-20 01:14:54 -07:00
Blaise Tine
aac57a5f81 minor updates 2024-07-19 23:36:10 -07:00
Blaise Tine
016f8e830c minor update 2024-07-19 21:50:10 -07:00
Blaise Tine
2fa99b7d17 minor update 2024-07-19 20:49:39 -07:00
Blaise Tine
e33b62f6fc extending csv trace gen to support multiple traces 2024-07-19 20:44:27 -07:00
Blaise Tine
5c73243a4e minor update 2024-07-19 20:34:43 -07:00
Blaise Tine
d07266b1f1 cache mshr replay bug fix 2024-07-19 14:57:58 -07:00
Blaise Tine
aa954a6aa8 operand collector RTL optimization 2024-07-19 09:31:08 -07:00
Blaise Tine
3cba4f4a5f minor update 2024-07-19 02:09:17 -07:00
Blaise Tine
4b73762aea profiling optimizations 2024-07-19 00:18:53 -07:00
Blaise Tine
65036e2d34 RTL optimizations 2024-07-18 10:25:23 -07:00
Blaise Tine
7507e36149 minor update 2024-07-17 21:13:22 -07:00
Blaise Tine
08bd918066 minor update 2024-07-17 20:49:26 -07:00
Blaise Tine
9944331f29 minor update 2024-07-17 20:38:49 -07:00
Blaise Tine
9a018014ff issue pipeline synthesis test 2024-07-17 20:34:44 -07:00
Blaise Tine
068ec6c5ef adding issue_top testbench 2024-07-17 20:29:59 -07:00
Blaise Tine
1b9f0a998b VX_mem_coalescer fix 2024-07-17 15:54:25 -07:00
Blaise Tine
b297c29a10 minor updates 2024-07-16 20:34:26 -07:00
Blaise Tine
6a03882bd2 minor updates 2024-07-16 10:52:07 -07:00
Blaise Tine
578c3d33d2 cumulative fixes 2024-07-15 10:13:57 -07:00
Blaise Tine
0dbcddcb54 minor update 2024-07-14 03:12:30 -07:00
Blaise Tine
f0ebe94253 arbiter refactoring 2024-07-14 00:44:35 -07:00
Blaise Tine
ad36bdbd44 minor updates 2024-07-14 00:13:49 -07:00
Blaise Tine
84da2ff8ff minor updates 2024-07-14 00:13:25 -07:00
Blaise Tine
a2307a28dc perf counters update 2024-07-12 19:02:43 -07:00
Blaise Tine
59ed24dc0b perf update 2024-07-12 18:34:42 -07:00
Blaise Tine
649e15c2b3 minor update 2024-07-12 05:29:00 -07:00
Blaise Tine
42f3d55e15 SimX operands collector optimization 2024-07-12 04:54:44 -07:00
Blaise Tine
7b94c983c9 bug fix 2024-07-12 04:49:55 -07:00
Blaise Tine
79ad8bb51f operands optimization 2024-07-12 04:37:47 -07:00
Blaise Tine
47c33cca66 minor update 2024-07-12 03:33:00 -07:00
Blaise Tine
34023ab814 minor update 2024-07-11 12:31:10 -07:00
Hanran Wu
91a1f41f99 debugged virtual memory allocator 2024-07-11 14:49:00 -04:00
Blaise Tine
2460b9b95b fixed local mem unit critical path 2024-07-11 05:44:49 -07:00
Blaise Tine
69f7213afc minor update 2024-07-11 05:31:46 -07:00
Hanran Wu
b8757c539d add virtual mem allocator addr spacereservation 2024-07-10 22:39:00 -04:00
Blaise Tine
a854e9d25b minor update 2024-07-10 18:04:38 -07:00
Blaise Tine
156707bbec minor update 2024-07-10 16:41:35 -07:00
Blaise Tine
d6770f7adc fixed degenerate writes to R0 2024-07-10 16:40:37 -07:00
Blaise Tine
13d5a9c969 minor update 2024-07-10 09:29:41 -07:00
Blaise Tine
0acf7761ef operands optimization 2024-07-09 14:26:27 -07:00
Blaise Tine
ed0c1a778f scoreboard arbiter optimization 2024-07-09 14:22:08 -07:00
Blaise Tine
2651632884 minor update 2024-07-09 14:19:19 -07:00
Blaise Tine
7b9d2bdff2 minor updates 2024-07-09 13:39:44 -07:00
Blaise Tine
3ab353ab61 Using LUTRAM for elastic buffers 2024-07-09 13:35:58 -07:00
Blaise Tine
3efced37c5 trace INSTANCE_ID refactoring 2024-07-09 13:33:17 -07:00
Blaise Tine
1322499c3f minor update 2024-07-09 13:05:43 -07:00
Blaise Tine
f0a4d2e142 minor update 2024-07-09 11:48:29 -07:00
Hanran Wu
314ad3ff8a update destructor of vx_device 2024-07-09 13:42:57 -04:00
Hanran Wu
31837dd7c3 vpn allocator debug complete, now pass demo&vecadd tests 2024-07-08 17:10:19 -04:00
Hanran Wu
f0ea1acaa2 vpn allocator added but doesn't pass any tests 2024-07-08 17:07:30 -04:00
Jaewon Lee
c13e02b19f Change STARTUP_ADDR from 0x40000000 to 0x80000000(32b) and 0x180000000(64b) 2024-06-30 03:10:36 -04:00
Jaewon Lee
ccbb2243cc fixed compile error 2024-06-30 00:54:22 -04:00
Jaewon Lee
3caeeeea13 satp_ is not set, then we skip VAT 2024-06-30 00:35:26 -04:00
Jaewon Lee
b99cd97622 Merge branch 'vortex_vm' of github.com:vortexgpgpu/vortex into vortex_vm 2024-06-29 17:43:44 -04:00
Jaewon Lee
d531fa6b26 64bit support 2024-06-29 17:43:20 -04:00
Blaise Tine
58c3c63dae issue stage partitioning into slices 2024-06-28 08:22:18 -07:00
Blaise Tine
56e9e19508 gpr_slice redesign to use multi-banking architecture 2024-06-28 08:03:01 -07:00
Blaise Tine
ed11defd71 debug scope environment fix 2024-06-28 07:57:16 -07:00
Blaise Tine
934416d3ec debug scope environment fix 2024-06-28 07:55:58 -07:00
Jaewon Lee
4ab015ddd9
Update README.md
Update TOOLDIR to vortex-toolchain-2024-6-14/
2024-06-28 09:48:04 -04:00
Nayan Sivakumar Nair
5e63b8f35a dummy commit 2024-06-25 23:27:18 -04:00
Nayan Sivakumar Nair
5b0fc8cbd4 Fixes for PR 2024-06-25 03:18:50 -04:00
Jaewon Lee
3d98121ab6
Update README.md 2024-06-23 11:24:10 -04:00
Jaewon Lee
02091f3d44 Merge Vortex 2.2 2024-06-22 23:55:01 -04:00
Nayan Sivakumar Nair
a378aed67c Moved tc_num, tc_size param to makefile args 2024-06-21 22:23:24 -04:00
Blaise Tine
f97ffac7e7 minor update 2024-06-19 22:17:03 -07:00
Blaise Tine
b82a755e44 minor update 2024-06-19 16:39:04 -07:00
Blaise Tine
61df2ca428 minor update 2024-06-18 23:13:48 -07:00
Blaise Tine
deee7cd8b0 fixed tracing support for xilinx simulation 2024-06-18 23:11:58 -07:00
Jaewon Lee
2271d2b286 remove # 2024-06-19 02:04:24 -04:00
Jaewon Lee
862997fc94 Virtual Memory Support 2024-06-19 01:52:22 -04:00
Jaewon Lee
62673b4b72 Update upload and download function in simx runtime 2024-06-19 01:43:11 -04:00
Jaewon Lee
01c7b5e384 Change the declaration of set_processor_satp function 2024-06-19 01:36:26 -04:00
Jaewon Lee
cfcece940e Merge Austin's code (Preliminary) 2024-06-19 01:36:26 -04:00
Jaewon Lee
d8a6ac748a Update README.md 2024-06-19 01:09:56 -04:00
Jaewon Lee
2f2974ee72 Ignore the changed on ramulator 2024-06-19 01:09:56 -04:00
Jaewon Lee
6f0af066e8 Update README.md 2024-06-19 01:09:56 -04:00
Hanran Wu
2b426693f5 expand MemoryUnit class defs and add some tlb-related functions 2024-06-19 01:09:56 -04:00
Jaewon Lee
cf3f2d4f6f Update README.md 2024-06-19 01:09:56 -04:00
Jaewon Lee
54af5eb186 Update README.md 2024-06-19 01:09:56 -04:00
Jaewon Lee
efe12ca6bf Update README.md 2024-06-19 01:09:18 -04:00
Varsha Singhania
0e3badf723 Script checkin and code cleanup 2024-06-18 02:19:57 -04:00
Varsha Singhania
99c6a1af5a Tensor cores in Vortex 2024-06-17 04:28:51 -04:00
Blaise Tine
abdea91120 minor update 2024-06-14 19:15:00 -07:00
Blaise Tine
4adabc3f68 fixed xilinx synthesis build 2024-06-14 19:14:33 -07:00
Blaise Tine
4a11c1ec0f FpNew RTL fix 2024-06-14 16:29:52 -07:00
Blaise Tine
c5e57ce5d5 minor update 2024-06-14 08:38:17 -07:00
Blaise Tine
896eb09ec3 docs update 2024-06-14 07:46:25 -07:00
Blaise Tine
cf3413c824 minor update 2024-06-14 07:45:37 -07:00
Blaise Tine
3f52964a94 Merge branch 'develop' 2024-06-13 23:31:47 -07:00
Blaise Tine
8b53f01d1c minor update 2024-06-13 23:29:41 -07:00
Blaise Tine
e603c302b2 minor update 2024-06-13 10:12:40 -07:00
Blaise Tine
3611ad5b4c travis update for pocl 64-bit testing 2024-06-13 09:41:58 -07:00
Blaise Tine
5bcf24ed55 64-bit rtl fix 2024-06-13 06:26:45 -07:00
Blaise Tine
78b6e0638c Docker update 2024-06-13 02:14:54 -07:00
Blaise Tine
0e2e09b2ea minor update 2024-06-12 05:43:02 -07:00
Blaise Tine
3393522b54 minor update 2024-06-12 03:59:31 -07:00
Blaise Tine
4ced61790a minor update 2024-06-12 02:32:27 -07:00
Blaise Tine
04b967217d minor update 2024-06-11 13:42:05 -07:00
Blaise Tine
7887e6b432 merge fix 2024-06-11 09:31:13 -07:00
Blaise Tine
267521a1cb Merge branch 'develop' 2024-06-11 05:54:12 -07:00
Blaise Tine
023dc477a6 travis version update 2024-06-11 03:05:04 -07:00
Blaise Tine
250a5741f7 Migrating all tests to new kernel launch API 2024-06-11 02:53:36 -07:00
Blaise Tine
8b63305201 minor update 2024-06-08 21:36:28 -07:00
Blaise Tine
e38187acb5 minor update 2024-06-08 02:47:31 -07:00
Blaise Tine
c0044bc303 minor update 2024-06-08 01:58:21 -07:00
Blaise Tine
99eaaf6189 uuid_gen cleanup 2024-06-08 01:57:38 -07:00
Blaise Tine
fa5590bbf7 minor update 2024-06-08 01:05:41 -07:00
Blaise Tine
96cb381885 vx_spawn_threads implementation 2024-06-07 07:52:15 -07:00
Blaise Tine
8c5a783477 minor update 2024-06-07 07:48:33 -07:00
Blaise Tine
e79f905d87 minor update 2024-06-07 07:47:40 -07:00
Blaise Tine
01cca01511 TLS alignment fix 2024-06-07 05:38:54 -07:00
Blaise Tine
f992f5bae6 fully disable dynamic linking for kernels 2024-06-07 02:11:15 -07:00
Blaise Tine
b21ea24815 removing dynamic link hooks in linker script to fix TLS support in kernels 2024-06-06 22:07:56 -07:00
Blaise Tine
9e20e6edb6 minor update 2024-06-06 00:32:58 -07:00
Blaise Tine
6c56edf65d minor update 2024-06-04 14:28:30 -07:00
Blaise Tine
6b800f2054 minor update 2024-06-03 20:39:16 -07:00
Blaise Tine
a7fac99fd9 minor update 2024-06-03 20:35:26 -07:00
Blaise Tine
0235da5798 minor update 2024-06-03 19:56:32 -07:00
Blaise Tine
bc280e2703 minor update 2024-06-03 19:55:03 -07:00
Blaise Tine
2b9628c73b minor update 2024-06-03 19:54:21 -07:00
Blaise Tine
681646d27a minor update 2024-05-29 17:02:16 -07:00
Blaise Tine
5b56f76289 minor update 2024-05-28 23:15:26 -07:00
Blaise Tine
a38d47c0df riscv-tests update 2024-05-28 22:34:00 -07:00
Blaise Tine
68d2ac6f5e 32-bit/64-bit address space compatibility 2024-05-28 22:30:59 -07:00
Blaise Tine
364136d66f minor update 2024-05-28 11:14:50 -07:00
Blaise Tine
f8ef570778 riscv tests refactoring 2024-05-28 10:46:31 -07:00
Blaise Tine
0426856ab4 minor update 2024-05-27 20:21:37 -07:00
Blaise Tine
9dbff0e77c makefile clean-all 2024-05-27 20:03:16 -07:00
Blaise Tine
f0253a5f80 minor update 2024-05-27 19:12:32 -07:00
Blaise Tine
47d578c4d2 runtime refactoring 2024-05-27 18:55:42 -07:00
Blaise Tine
319c18158a minor update 2024-05-27 16:01:24 -07:00
Blaise Tine
c1000f6a3b runtime refactoring 2024-05-27 15:59:41 -07:00
Blaise Tine
405d6b468f minor update 2024-05-27 02:15:03 -07:00
Blaise Tine
32f39264ef runtime dynamic loading for driver-specific implementations 2024-05-26 19:05:17 -07:00
Blaise Tine
d413786b9e minor update 2024-05-26 17:14:11 -07:00
Blaise Tine
f35e4266ed minor update 2024-05-25 17:21:28 -07:00
Kumar Saunack
bd33f1edc3
Add explicit imports cpp (#131)
* add imports for array and unordered_map

* add auto
2024-05-24 15:21:21 -04:00
Blaise Tine
a72c68acf4 minor update 2024-05-23 22:33:12 -07:00
Blaise Tine
f6663d6618 memory fence update 2024-05-23 22:24:30 -07:00
Blaise Tine
b1ae82bae5 minor update 2024-05-23 15:11:59 -07:00
Blaise Tine
e4fdc740ba minor update 2024-05-23 02:06:08 -07:00
Blaise Tine
f3c0b7d186 minor update 2024-05-21 17:23:28 -07:00
Blaise Tine
dc5cbfe932 adding support for build install target 2024-05-21 17:02:32 -07:00
Blaise Tine
e1c8ff02be minor update 2024-05-21 12:46:15 -07:00
Blaise Tine
210e4a8e8f minor update 2024-05-21 12:45:03 -07:00
Blaise Tine
9b79d60507 minor update 2024-05-21 05:39:35 -07:00
Blaise Tine
d99aaf3933 fixed spawn_task_groups bug 2024-05-21 04:18:04 -07:00
Blaise Tine
c7c1dddeac minor update 2024-05-20 08:42:12 -07:00
Blaise Tine
94cadb69d2 minor update 2024-05-20 08:00:43 -07:00
Blaise Tine
b3f96e288a + support for ZICOND RISC-V extension
+ RTL decode refactoring
2024-05-20 00:17:24 -07:00
Blaise Tine
8d97d2c998 minor update 2024-05-17 18:30:49 -07:00
Blaise Tine
0aaf010a62 doc update 2024-05-17 13:30:26 -07:00
Blaise Tine
ab56cc6d4a minor update 2024-05-13 23:57:33 -07:00
Blaise Tine
15ca8290d0 updated configure script usage 2024-05-13 08:19:37 -07:00
Blaise Tine
0fbe22dafa minor update 2024-05-13 06:59:46 -07:00
Blaise Tine
ae11df3e6a minor update 2024-05-12 20:28:49 -07:00
Blaise Tine
19beb0728e minor update 2024-05-12 20:21:23 -07:00
Blaise Tine
60107cf2b6 XRT runtime and simulation support for Vortex AFU (incomplete) 2024-05-11 17:43:49 -07:00
Blaise Tine
98f080340a perf counters profiling refactoring 2024-05-11 17:10:08 -07:00
Blaise Tine
dc27d3c014 minor update 2024-05-11 13:10:03 -07:00
Blaise Tine
bb53658ce7 minor update 2024-05-11 06:51:32 -07:00
Blaise Tine
112e8235ee minor update 2024-05-11 03:45:24 -07:00
Blaise Tine
4e7bc9654b wspawn fix 2024-05-10 21:42:20 -07:00
Blaise Tine
d1ba02681e minor update 2024-05-10 03:46:22 -07:00
Blaise Tine
3cceed1e0b lock CSR unit only for FPU CSRs 2024-05-10 03:45:24 -07:00
Blaise Tine
c1fa2bbc38 minor update 2024-05-10 01:27:54 -07:00
Blaise Tine
82a417f1f0 minor update 2024-05-09 22:59:47 -07:00
Blaise Tine
df95c7c4c6 minor update 2024-05-09 22:51:07 -07:00
Blaise Tine
599edfbbeb minor update 2024-05-09 21:42:44 -07:00
Blaise Tine
aaba10a133 minor update 2024-05-09 21:27:38 -07:00
Blaise Tine
4bb31e63e2 minor update 2024-05-09 21:21:28 -07:00
Blaise Tine
ca11ccee1e minor udpate 2024-05-09 16:37:53 -07:00
Blaise Tine
a1ddddf929 minor update 2024-05-09 16:34:30 -07:00
Blaise Tine
ad5d82d430 minor update 2024-05-09 13:23:49 -07:00
Blaise Tine
b9da621c09 minor update to ci/toolchain_install.sh.in 2024-05-09 12:37:24 -07:00
Blaise Tine
da65e964ed minor update 2024-05-09 03:37:03 -07:00
Blaise Tine
4a2b984710 minor update 2024-05-09 03:23:48 -07:00
Blaise Tine
13e9e6d9f9 minor update 2024-05-09 02:47:10 -07:00
Blaise Tine
1864e46c14 minor update 2024-05-08 22:39:03 -07:00
Blaise Tine
b9c0082cb8 minor update 2024-05-08 22:01:37 -07:00
Blaise Tine
717b2e9ba1 enable barrier and spawn skip mode if N=1 2024-05-08 04:23:38 -07:00
Blaise Tine
b6aa44f39f spawn_tasks_ex optimization 2024-05-07 23:40:38 -07:00
Blaise Tine
0003926d01 checking workgroup occupancy 2024-05-07 23:38:51 -07:00
Blaise Tine
98ead77405 querying num_barriers device caps 2024-05-07 23:35:50 -07:00
Blaise Tine
82908a3026 documentation 2024-05-07 23:33:45 -07:00
Blaise Tine
9e7074b871 minor update 2024-05-07 04:04:57 -07:00
Blaise Tine
a569d54104 minor update 2024-05-07 03:58:44 -07:00
Blaise Tine
04e72a2341 minor update 2024-05-07 03:50:00 -07:00
Blaise Tine
ce26a0a3cc minor update to travis and regression script 2024-05-06 23:20:42 -07:00
Blaise Tine
bae6cd2d86 minor update 2024-05-06 19:02:16 -07:00
Blaise Tine
a94d868cd5 minor update 2024-05-06 18:54:44 -07:00
Blaise Tine
faacace2a4 minor update 2024-05-06 07:54:50 -07:00
Blaise Tine
6519d356ed travis udpate 2024-05-06 02:08:41 -07:00
Blaise Tine
da272064b5 minor update 2024-05-06 01:43:59 -07:00
Blaise Tine
009861433a minor update 2024-05-06 01:28:03 -07:00
Blaise Tine
79f5824c74 adding work groups support to spawntasks API 2024-05-06 01:25:13 -07:00
Blaise Tine
0cabd24f08 opencl tests update 2024-05-06 01:23:25 -07:00
Blaise Tine
311799b423 Passing LLC flags to POCL 2024-05-06 00:55:00 -07:00
Blaise Tine
189990e351 minor update 2024-05-06 00:54:07 -07:00
Blaise Tine
badfb24e01 CSRs update 2024-05-06 00:51:38 -07:00
Blaise Tine
c8bae13448 minor update 2024-05-03 19:51:09 -07:00
Blaise Tine
f4202868bc MINOR UPDATE 2024-05-02 18:44:43 -07:00
Blaise Tine
42202e2940 minor update 2024-05-02 18:25:10 -07:00
Blaise Tine
b68d32b83c minor update 2024-05-02 18:00:42 -07:00
Blaise Tine
7d82212fb1 minor update 2024-05-02 13:20:33 -07:00
Blaise Tine
ef5aa6d610 minor update 2024-05-02 11:57:56 -07:00
Blaise Tine
68a3664a04 minor update 2024-05-01 23:57:14 -07:00
Blaise Tine
1b2d9ed538 minor udpate 2024-05-01 21:01:20 -07:00
Blaise Tine
b5ca7a999c SIMT stack fix 2024-05-01 20:50:21 -07:00
Blaise Tine
896aca0c62 mpm counters query fix 2024-05-01 17:21:37 -07:00
Blaise Tine
4737cdabbd minor update 2024-05-01 08:06:45 -07:00
Blaise Tine
06896f272c minor update 2024-05-01 02:13:52 -07:00
Blaise Tine
27a9e30857 minor update 2024-05-01 00:52:35 -07:00
Blaise Tine
e84f978502 minor update 2024-05-01 00:02:52 -07:00
Blaise Tine
5ea10fd872 minor update 2024-04-30 22:47:59 -07:00
Blaise Tine
aea1d2c8eb minor updates to the build system 2024-04-30 16:27:20 -07:00
Blaise Tine
19484a531a minor update 2024-04-30 04:19:59 -07:00
Blaise Tine
284d438acd minor update 2024-04-30 02:20:40 -07:00
Blaise Tine
ca79e69355 SIMT Tack compression 2024-04-30 02:19:32 -07:00
Blaise Tine
9df25ff48f minor update 2024-04-28 04:42:22 -07:00
Blaise Tine
604c41fc54 minor update 2024-04-28 04:30:31 -07:00
Blaise Tine
a167c07e7d adding wait cycles to wspawn 2024-04-28 04:27:47 -07:00
Blaise Tine
db0f0fd353 runtime API refactoring to support memory reservation and protection 2024-04-28 04:23:00 -07:00
Blaise Tine
c554f53e44 minor update 2024-04-26 18:03:30 -07:00
Blaise Tine
daf1360d83 minor single-thread fix 2024-04-20 22:32:28 -07:00
Blaise Tine
0cd2ea458f minor update 2024-04-18 13:25:11 -07:00
Blaise Tine
0098d197c9 minor update 2024-04-18 02:40:49 -07:00
Blaise Tine
efc7a971dc minor update 2024-04-17 22:52:12 -07:00
Blaise Tine
dbe052594d minor update 2024-04-17 19:26:27 -07:00
Blaise Tine
95b23fa97b minor update 2024-04-17 18:34:16 -07:00
Blaise Tine
f369006956 minor update 2024-04-17 04:17:13 -07:00
Blaise Tine
0c746d93bb minor update 2024-04-17 04:13:05 -07:00
Blaise Tine
e3d06e0d9c minor update 2024-04-17 03:36:28 -07:00
Blaise Tine
57a5aead4c minor update 2024-04-17 01:05:04 -07:00
Blaise Tine
8fa28bfca1 location independent kernel loading support/fixes 2024-04-17 01:00:38 -07:00
Blaise Tine
69fdb4bd04 mino rupdate 2024-04-15 19:23:20 -07:00
Blaise Tine
ac669a30ca UUID refactoring 2024-04-14 22:01:03 -07:00
Blaise Tine
8a933520f0 minor update 2024-04-14 22:00:39 -07:00
Blaise Tine
dfed5b29c0 minor update 2024-04-13 23:28:31 -07:00
Blaise Tine
1f8b3dcc5c minor update 2024-04-13 22:56:50 -07:00
Blaise Tine
75255269a1 minor update 2024-04-13 22:49:10 -07:00
Blaise Tine
15dc9afe93 position-independent kernel fix 2024-04-13 22:26:59 -07:00
Blaise Tine
2488e4736c minor update 2024-04-12 14:45:37 -07:00
Blaise Tine
9380fd3d72 minor update 2024-04-12 14:09:16 -07:00
Blaise Tine
83a7deb5da minor update 2024-04-12 13:46:23 -07:00
Blaise Tine
25d0c76d14 enabling explicit kernel address and arguments allocation 2024-04-12 06:58:42 -07:00
Blaise Tine
b32ea5b750 fixed compiler relocation issue with R_RISCV_GOT_HI20 2024-04-11 15:35:51 -07:00
Blaise Tine
9feb46387e minor update 2024-04-10 21:56:01 -07:00
Blaise Tine
1963ae04b2 minor update 2024-04-09 22:20:52 -07:00
Blaise Tine
7f61f0b015 minor update 2024-04-09 18:59:44 -07:00
Blaise Tine
bb47a14388 remove codecov 2024-04-09 18:31:33 -07:00
Blaise Tine
3de8075636 opencl tests GPU support 2024-04-09 18:20:24 -07:00
Blaise Tine
de66d2ec3e minor fix 2024-04-09 15:27:31 -07:00
Blaise Tine
5231bb8576 minor update 2024-04-09 15:22:59 -07:00
Blaise Tine
e58c12fc02 minor update 2024-04-09 14:49:23 -07:00
Blaise Tine
b54e85113d adding trigonomitry etst to dogfood 2024-04-09 04:14:28 -07:00
Blaise Tine
cef05d3110 minor update 2024-04-09 04:13:41 -07:00
Blaise Tine
bdcf2a0af0 syscalls update 2024-04-09 03:40:18 -07:00
Blaise Tine
dd461468d3 enabling MSCRATCH CSR 2024-04-09 02:01:17 -07:00
Blaise Tine
7784dfe9b7 CSR 32-bit/64-bit refactoring 2024-04-09 02:00:34 -07:00
Blaise Tine
135cc4f5a7 minor update 2024-04-09 01:58:04 -07:00
Blaise Tine
db35f5d768 simx decode bug fix. 2024-04-09 01:34:14 -07:00
Blaise Tine
0e4501aecd minor update 2024-04-07 06:09:15 -07:00
Blaise Tine
135445ce9c minor update 2024-04-07 05:15:08 -07:00
Blaise Tine
d96e2fa56b minor update 2024-04-07 04:34:02 -07:00
Blaise Tine
e9b66d5a1c minor update 2024-04-07 03:20:19 -07:00
Blaise Tine
6400e73c42 minor updates 2024-04-06 22:05:46 -07:00
Blaise Tine
ca1bbdf415 minor update 2024-04-06 09:11:25 -07:00
Blaise Tine
b02746fb0d minor update 2024-04-06 08:32:16 -07:00
Blaise Tine
8d252bb6f8 minor update 2024-04-06 07:39:00 -07:00
Blaise Tine
04314cefed using Vortex custom libc library 2024-04-06 06:42:47 -07:00
Blaise Tine
70717fb42b tests update 2024-04-06 03:21:49 -07:00
Blaise Tine
351aa48f6e per-workgroup local memory fix 2024-04-06 02:05:51 -07:00
Blaise Tine
3534175d43 build configuration update 2024-04-06 01:44:39 -07:00
Blaise Tine
b4f5616814 minor update 2024-03-31 05:36:15 -07:00
Blaise Tine
7df4f1ba03 minor update 2024-03-31 03:50:50 -07:00
Blaise Tine
c39b8e1112 minor update 2024-03-31 03:38:15 -07:00
Blaise Tine
d743e2ba22 minor update 2024-03-31 01:22:01 -07:00
Blaise Tine
87bff732ac minor update 2024-03-31 00:31:47 -07:00
Blaise Tine
486cd0b866 minor update 2024-03-30 22:45:31 -07:00
Blaise Tine
da8608d702 minor update 2024-03-30 18:07:32 -07:00
Blaise Tine
d0c441519d minor update 2024-03-30 12:29:27 -07:00
Blaise Tine
2a38ef0db8 minor update 2024-03-30 12:17:11 -07:00
Blaise Tine
914044b57b minor update 2024-03-30 11:02:09 -07:00
Blaise Tine
d1ff95eb9f minor update 2024-03-30 10:35:15 -07:00
Blaise Tine
beebc2adf0 minor update 2024-03-30 10:03:40 -07:00
Blaise Tine
e4e0ee8fef minor update 2024-03-30 09:32:12 -07:00
Blaise Tine
299657d693 minor update 2024-03-30 05:49:45 -07:00
Blaise Tine
03cf694238 minor update 2024-03-30 05:33:57 -07:00
Blaise Tine
6b81b26ffc enabling Makefile configuration with build folder support 2024-03-30 02:28:39 -07:00
Blaise Tine
99c91987fb opencl benchmarks fixes 2024-03-28 06:40:09 -07:00
Blaise Tine
697e2fffbb adding dogfood fclamp test 2024-03-27 06:40:13 -07:00
Blaise Tine
70f2f58ac9 minor update 2024-03-26 16:31:24 -07:00
Blaise Tine
8ab4c53e27 new conv3x regression test 2024-03-26 16:30:47 -07:00
Blaise Tine
c8dd0aafb0 minor update 2024-03-26 16:22:02 -07:00
Blaise Tine
ae12b45f77 minor update 2024-03-25 15:01:40 -07:00
Blaise Tine
896aa6b2a1 minor update 2024-03-25 14:53:03 -07:00
Blaise Tine
74368ab65a minor update 2024-03-24 23:40:00 -07:00
Blaise Tine
afc0b2056d minor update 2024-03-24 20:59:32 -07:00
Blaise Tine
c6e09d40ff minor update 2024-03-24 20:32:33 -07:00
Blaise Tine
402c911991 simx mem_coalescer 2024-03-24 20:31:36 -07:00
Blaise Tine
86055335ee minor update 2024-03-24 19:39:53 -07:00
Blaise Tine
d27656819b regression update 2024-03-24 14:44:19 -07:00
Blaise Tine
7324900c57 minor updates 2024-03-24 14:19:23 -07:00
Blaise Tine
459abdef21 minor update 2024-03-24 03:26:28 -07:00
Blaise Tine
830c43517b FPU area reduction via time-multiplexing 2024-03-24 02:39:17 -07:00
Blaise Tine
19ba4c7bd4 minor update 2024-03-23 20:25:30 -07:00
Blaise Tine
178e1a6b2a minor update 2024-03-23 20:10:01 -07:00
Blaise Tine
d0a53ff53e minor update 2024-03-23 19:52:04 -07:00
Blaise Tine
231eb4e78b SOA to AOS conversion 2024-03-23 19:50:57 -07:00
Blaise Tine
04de44d280 dogfood update 2024-03-23 19:49:44 -07:00
Blaise Tine
3cd7f41012 minor update 2024-03-21 09:36:38 -07:00
Blaise Tine
35a782a7ba local memory runtime refactoring 2024-03-21 09:31:55 -07:00
Blaise Tine
2776f2cdf0 minor update 2024-03-20 14:31:00 -07:00
Blaise Tine
6d0e345073 minor update 2024-03-20 14:19:13 -07:00
Blaise Tine
91135bb855 minor update 2024-03-20 13:12:55 -07:00
Blaise Tine
fbc49b1455 m 2024-03-19 01:04:15 -07:00
Blaise Tine
354d3663e1 minor update 2024-03-19 00:08:12 -07:00
Blaise Tine
45e791437c minor update 2024-03-18 21:51:08 -07:00
Blaise Tine
17cdc32eee minor update 2024-03-18 21:33:16 -07:00
Blaise Tine
c175e11a18 Using packed LSU memory requests within the code 2024-03-18 21:22:02 -07:00
Blaise Tine
df38cc00f5 adding gpr_slice 2024-03-18 01:43:44 -07:00
Blaise Tine
d65f6e064a adding lsu_slice 2024-03-18 01:11:03 -07:00
Blaise Tine
6556e8c66d extending memory interface with address type 2024-03-18 00:35:03 -07:00
Blaise Tine
dc19d25bcc fixed scoreboard critical path 2024-03-17 23:05:02 -07:00
Blaise Tine
787f02e4c6 minor update 2024-03-15 00:03:05 -07:00
Blaise Tine
d9426d5789 minor update 2024-03-14 21:36:39 -07:00
Blaise Tine
100eb49201 minor update 2024-03-14 12:57:02 -07:00
Blaise Tine
f1522e68f8 simx memory coalescing support 2024-03-14 12:20:39 -07:00
Blaise Tine
07c063031f tabs cleanup 2024-03-13 23:19:54 -07:00
Blaise Tine
a8f2bb30da minor update 2024-03-13 21:06:26 -07:00
Blaise Tine
454b9e7444 removed dup address detection, replaced with coalescing 2024-03-13 13:56:07 -07:00
Blaise Tine
4766787478 minor update 2024-03-13 13:15:15 -07:00
Blaise Tine
a8e892593e maxfanout update 2024-03-12 01:46:42 -07:00
Blaise Tine
840ced22a9 simx refactoring - emulation vs simulation discrete separation 2024-03-12 00:23:42 -07:00
Blaise Tine
ff6f33acff simx refactoring: simobject::push(), instr_trace, FUtype, pending_instrs_ 2024-03-11 15:39:49 -07:00
Blaise Tine
3ec37c6c40 minor update 2024-03-09 00:28:37 -08:00
Blaise Tine
c1e639bd44 minor update 2024-03-08 01:15:04 -08:00
Blaise Tine
489738751f minor update 2024-03-07 02:01:59 -08:00
Blaise Tine
33406d2e83 dispatch/commit refactoring 2024-03-06 17:35:33 -08:00
Blaise Tine
27db94b20d minor update 2024-03-06 15:48:37 -08:00
Blaise Tine
72f5976dd6 minor update 2024-03-06 15:32:32 -08:00
Blaise Tine
44e685f8af adding num_lsu_blocks 2024-03-06 15:31:50 -08:00
Blaise Tine
4d7b2b9ea5 minor update 2024-03-06 08:10:22 -08:00
Blaise Tine
de0f4dda44 minor update 2024-03-05 10:12:05 -08:00
Blaise Tine
b0f3e91006 minor update 2024-03-04 22:19:52 -08:00
Blaise Tine
288147ac4f memory coalescing RTL implementation 2024-03-04 22:18:39 -08:00
Blaise Tine
274e6a4c52 removed NULL local_group to prevent OpenCL runtime automatic allocation of moving global group into local group which will be inefficient on Vortex. 2024-03-04 20:30:32 -08:00
Blaise Tine
de8453d0be wspawn thread index reordering 2024-03-04 20:28:45 -08:00
Blaise Tine
589e351832 minor update 2024-03-04 20:28:02 -08:00
Blaise Tine
34324bb768 minor update 2024-03-04 15:00:12 -08:00
Blaise Tine
fe3c712d66 minor update 2024-03-02 23:16:09 -08:00
Blaise Tine
badb0c8300 minor update 2024-03-02 21:33:54 -08:00
Blaise Tine
c344e28476 minor update 2024-03-01 08:55:25 -08:00
Blaise Tine
9c900394fa minor update 2024-03-01 08:19:26 -08:00
Blaise Tine
cce517b02b minor update 2024-02-29 01:24:32 -08:00
Blaise Tine
26d45ed9db renamed shared to local memory 2024-02-29 01:04:52 -08:00
Blaise Tine
1b9c39283e minor update 2024-02-29 00:10:29 -08:00
Blaise Tine
dd40e9c754 cache subsystem refactoring 2024-02-29 00:08:14 -08:00
Blaise Tine
59497e52df minor update 2024-02-28 16:36:26 -08:00
Blaise Tine
51ae0b71f3 minor update 2024-02-28 16:35:22 -08:00
Blaise Tine
041f573815 cleaned up vector code from simx 2024-02-21 18:27:52 -08:00
Blaise Tine
fc0f5e2ca4 minor update 2024-02-16 21:02:30 -08:00
Blaise Tine
76a828cf50 minor update 2024-02-16 21:02:12 -08:00
Blaise Tine
bb3a49f95b minor update 2024-02-15 04:31:13 -08:00
Blaise Tine
413e933b8a OUT_BUF / OUT_REG refactoring 2024-02-15 04:08:50 -08:00
Blaise Tine
bb4c150aaf per-warp ibuffer scoreboard scheduling optimization 2024-02-14 14:24:47 -08:00
Blaise Tine
a78ac7a246 minor update 2024-02-14 14:24:29 -08:00
Blaise Tine
f13a885815 minor update 2024-02-14 12:53:58 -08:00
Blaise Tine
422bcdee0f renamed convolution test => conv3 2024-02-14 12:53:13 -08:00
Blaise Tine
08c06be601 minor update 2024-02-14 10:02:10 -08:00
Blaise Tine
de90863333 minor update 2024-02-13 08:41:02 -08:00
Blaise Tine
f83094e0d9 minor update 2024-02-12 04:19:31 -08:00
Blaise Tine
21b54761e3 minor update 2024-02-11 22:13:16 -08:00
Blaise Tine
a01f3a0370 cache_bypass refactoring 2024-02-11 21:41:33 -08:00
Blaise Tine
1e5550ccda Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-02-11 08:03:22 -08:00
Blaise Tine
9e54ccde6d adding support for top-module parameter replacement during synthesis tests 2024-02-10 21:54:35 -08:00
Blaise Tine
7f778000ea docs update 2024-02-10 18:20:34 -08:00
Blaise Tine
b8ccff7ade Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-02-10 17:31:36 -08:00
Blaise Tine
5f2b10b8a6 minor update 2024-02-09 21:20:23 -08:00
Blaise Tine
3fee1a6193 minor update 2024-02-09 20:34:44 -08:00
Blaise Tine
ae7b01405c CI minor update 2024-02-08 14:10:00 -08:00
Blaise Tine
0823c71b2e Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-02-04 20:32:33 -08:00
Blaise Tine
be0db6e1a5 minor update 2024-02-04 20:32:05 -08:00
Blaise Tine
50028c1a33 Merge remote-tracking branch 'origin' into develop 2024-02-04 20:19:30 -08:00
Blaise Tine
e06e6646a9 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-02-04 20:18:21 -08:00
Blaise Tine
8d4b6c804f minor update 2024-02-04 20:17:12 -08:00
Blaise Tine
6f7a389a1f arbiters unlock refactoring 2024-02-04 20:16:18 -08:00
Blaise Tine
fe15647f98 minor update 2024-02-04 02:11:53 -08:00
Blaise Tine
b0b7cd2b1e minor updates 2024-02-03 19:09:53 -08:00
Blaise Tine
61da7f609d Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-31 13:36:20 -08:00
Blaise Tine
f9cd8be19e minor update 2024-01-31 13:35:43 -08:00
Blaise Tine
0a38312527 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-31 12:14:49 -08:00
Blaise Tine
dab262e4f7 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-31 12:03:50 -08:00
Blaise Tine
8ab7c590fd disabling fetch's deadlock check when L1 caches are present 2024-01-31 06:16:54 -08:00
Blaise Tine
e3090930c0 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-31 03:37:08 -08:00
Blaise Tine
e2d1387df8 elastic buffers classification 2024-01-31 00:39:37 -08:00
Shinnung Jeong
fd65ed95eb fix bug to access memory address in simx 2024-01-30 20:45:47 -05:00
Blaise Tine
597e3b0e35 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-29 04:49:42 -08:00
Blaise Tine
b31d868a27 Merge branch 'develop' 2024-01-28 17:34:46 -08:00
Blaise Tine
b6919d19a7 minor update 2024-01-28 17:34:07 -08:00
Blaise Tine
eac6a485fa Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-28 00:36:41 -08:00
Blaise Tine
6045597ad0 Merge branch 'develop' 2024-01-28 00:25:55 -08:00
Blaise Tine
1c1140d517 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-28 00:25:16 -08:00
Blaise Tine
38b92ad592 - using SV_DPI defines to disable DPI in synthesis-based simulations
- fixed Intel ASE run script: run_ase.sh
2024-01-28 00:22:21 -08:00
dhy2000
b08c7403f6
fix #100: change return type to float 2024-01-26 19:56:50 +08:00
lpc97667
a9d578f3ab Docs update 2024-01-10 15:56:22 -05:00
Blaise Tine
f04ee15f94 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-03 19:11:33 -08:00
Blaise Tine
f0e6a435f8 Merge branch 'develop' 2024-01-03 19:09:49 -08:00
Blaise Tine
648bf75b0b minor update 2024-01-03 19:09:18 -08:00
Blaise Tine
665907355e Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2024-01-03 19:02:00 -08:00
Blaise Tine
3b75418ea9 Merge branch 'develop' 2024-01-03 10:24:48 -08:00
Blaise Tine
f2e8317412 updated documentation 2024-01-03 10:23:38 -08:00
Blaise Tine
cc042a4098 Merge branch 'develop' 2023-12-31 15:30:20 -08:00
Blaise Tine
bd18b03cc3 minor update 2023-12-31 15:29:04 -08:00
Blaise Tine
e7f8b40d93 minor update 2023-12-31 11:46:41 -08:00
Blaise Tine
ec2a35def9 Merge branch 'develop' 2023-12-31 11:26:48 -08:00
Blaise Tine
29c15dc9c4 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-30 00:54:07 -08:00
Blaise Tine
031d24e695 minor updates 2023-12-30 00:52:44 -08:00
Blaise Tine
645ca62c91 Merge branch 'develop' 2023-12-29 15:14:23 -08:00
Blaise Tine
7425446b15 fixed DESTDIR support in simumation Makefiles 2023-12-29 14:11:16 -08:00
Blaise Tine
db5db20800 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-28 20:12:17 -08:00
Blaise Tine
a7548db5ec Merge branch 'develop' 2023-12-28 20:08:12 -08:00
Blaise Tine
e62d122c9b enabling temporary build directory for blackbox multiple instances 2023-12-28 20:06:10 -08:00
Blaise Tine
e8cbfb4a72 Merge branch 'develop' 2023-12-28 16:11:29 -08:00
Blaise Tine
51e621cdf1 minor update 2023-12-28 16:08:26 -08:00
Blaise Tine
83d55da2d5 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-28 13:00:00 -08:00
Blaise Tine
afea903332 Merge branch 'develop' 2023-12-28 12:33:58 -08:00
Blaise Tine
36f5dd87fe minor update 2023-12-28 12:22:22 -08:00
Blaise Tine
e217bc2c23 adding tracking for SFU stalls 2023-12-28 12:12:11 -08:00
Blaise Tine
57ecb5e530 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop
Conflicts:
	hw/rtl/interfaces/VX_pipeline_perf_if.sv
2023-12-20 18:33:17 -08:00
Blaise Tine
c7a81d1493 adding sockets support to simx and cache subsystem refactoring
minor update

minor update

minor updates
2023-12-20 15:16:12 -08:00
Blaise Tine
914b680aed operands optimization
minor updates

minor updates

minor update

operands optimization

minor updates

minor updates
2023-12-20 15:07:23 -08:00
Blaise Tine
c94fd8e83b Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-20 14:58:34 -08:00
Blaise Tine
d567d67caa minor update 2023-12-20 14:47:27 -08:00
Blaise Tine
7842520848 minor update 2023-12-20 14:04:31 -08:00
Blaise Tine
d105b91438 adding sockets support to simx and cache subsystem refactoring 2023-12-20 11:57:44 -08:00
Blaise Tine
cf51770c2c Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop
Conflicts:
	hw/rtl/core/VX_lsu_unit.sv
2023-12-18 13:01:13 -08:00
Blaise Tine
2c6d84bac9 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-18 12:54:13 -08:00
Blaise Tine
39e6f95c2b operands optimization
minor updates

minor updates

minor update
2023-12-18 12:53:34 -08:00
Blaise Tine
5a2bc88d20 operands optimization
minor updates

minor updates
2023-12-18 04:44:01 -08:00
Blaise Tine
e04e026a14 profiling update
minor updates
2023-12-18 04:43:44 -08:00
Blaise Tine
c6845a4c8d profiling timing optimization
minor update

minor update

minor update
2023-12-18 04:43:10 -08:00
Blaise Tine
f5f9e3dfdb profiling timing optimization 2023-12-18 04:43:10 -08:00
Blaise Tine
6c7ac35054 profiling optimizations
minor updates
2023-12-18 04:43:00 -08:00
Blaise Tine
1be3778731 erge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-18 03:17:05 -08:00
Blaise Tine
caef3c5990 minor updates 2023-12-18 03:16:41 -08:00
Blaise Tine
2c1d858a2d Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-17 23:21:31 -08:00
Blaise Tine
5a6d98a2e2 minor updates 2023-12-17 23:20:43 -08:00
Blaise Tine
d79ff077b7 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-15 14:11:38 -08:00
Blaise Tine
f506ae6cea operands optimization 2023-12-15 14:09:51 -08:00
Blaise Tine
0468577b0b minor updates 2023-12-15 14:09:38 -08:00
Blaise Tine
4e51544402 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-15 01:14:21 -08:00
Blaise Tine
144d4d629b profiling update 2023-12-15 01:01:39 -08:00
Blaise Tine
0bdbbd2667 minor update 2023-12-14 15:55:19 -08:00
Blaise Tine
7fbd253d3f minor update 2023-12-14 03:01:36 -08:00
Blaise Tine
b001eb43f8 minor update 2023-12-14 02:57:30 -08:00
Blaise Tine
7afc557ba8 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-13 18:06:31 -08:00
Blaise Tine
7cae30076a profiling timing optimization 2023-12-13 18:04:12 -08:00
Blaise Tine
3c3bdc08ad Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-05 17:11:27 -08:00
Blaise Tine
100d4459cd profiling timing optimization 2023-12-05 17:10:30 -08:00
Blaise Tine
664a58b742 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-05 06:19:51 -08:00
Blaise Tine
c397fb5f4b minor updates 2023-12-05 06:15:15 -08:00
Blaise Tine
22fef445ff profiling optimizations 2023-12-05 05:12:13 -08:00
Blaise Tine
e5b41bcd66 wctl unit bug fix 2023-12-05 04:57:52 -08:00
Blaise Tine
1912f52bee profiling bug fix 2023-12-05 04:56:46 -08:00
root
900a1efaca BUFFER_EX refactoring 2023-12-05 04:55:50 -08:00
root
d288fb360c Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-05 04:50:20 -08:00
Blaise Tine
e44d38bb02 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-12-05 04:00:33 -08:00
Hyesoon Kim
63a4ccef16
Merge pull request #95 from Udit8348/develop-documentation
Documentation for Testing and Contributing
2023-12-01 09:20:21 -05:00
Udit Subramanya
0d5887b938 Merge branch 'develop' into develop-documentation
Attempted to directly push to develop, but permission was denied.
Therefore, I moved my changes to my development branch located on my fork.
I have permission to commit changes to my fork, and I can open a PR to bring those changes into main repo
2023-12-01 08:56:17 -05:00
Udit Subramanya
a43b7432a0 add environment setup readme 2023-12-01 08:55:01 -05:00
Udit Subramanya
af94d24963 Merge branch 'develop' into develop-documentation 2023-12-01 08:49:46 -05:00
Udit Subramanya
247f91a296
Merge branch 'vortexgpgpu:master' into master 2023-12-01 08:39:18 -05:00
Udit Subramanya
b20320236d adding documemtation for contributing and documentation 2023-12-01 08:22:44 -05:00
Blaise Tine
9c2916f3fc minor update 2023-11-28 12:03:48 -08:00
Blaise Tine
e8d56dc013 minor update 2023-11-27 22:16:36 -08:00
Blaise Tine
621d6de6ce Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-11-27 08:07:52 -08:00
Blaise Tine
24973ffca0 scoreboard optimization & profiling 2023-11-27 05:53:36 -08:00
Blaise Tine
4b68235389 fixed simx dispatcher bug 2023-11-27 04:50:55 -08:00
Blaise Tine
9dc5793046 minor udpate 2023-11-27 02:21:47 -08:00
Blaise Tine
1271c9c03f minor update 2023-11-27 02:12:12 -08:00
Blaise Tine
ebec982434 minor update 2023-11-27 02:04:53 -08:00
Blaise Tine
2f1171ca76 minor update 2023-11-27 02:04:22 -08:00
Blaise Tine
11752b2562 Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop 2023-11-18 00:27:46 -08:00
Blaise Tine
88f99e9525 Merge branch 'master' of https://github.com/vortexgpgpu/vortex into develop 2023-11-17 01:20:10 -08:00
Blaise Tine
43154cf738 minor updates 2023-11-16 23:41:59 -08:00
Blaise Tine
d65cc61df5 minor update 2023-11-16 12:00:37 -08:00
Blaise Tine
547d916ae2 minor update 2023-11-15 13:00:06 -08:00
Blaise Tine
2c94e358b8 perf counter bug fix 2023-11-15 00:52:39 -08:00
Blaise Tine
ede5e1c311 minor update 2023-11-15 00:28:26 -08:00
Blaise Tine
61e3442ef8 adding opencl convolution benchmark 2023-11-14 22:31:30 -08:00
Blaise Tine
4e7a536918 adding tensor regression test. 2023-11-14 05:37:46 -08:00
Blaise Tine
ecf546bc4a minor update 2023-11-13 20:00:39 -08:00
Blaise Tine
b274b8cc21 minor updates 2023-11-13 00:23:15 -08:00
Blaise Tine
a08d3ebd42 minor update 2023-11-12 23:40:59 -08:00
Blaise Tine
62cdd8e993 minor update 2023-11-11 15:49:39 -08:00
Blaise Tine
64dc5e1667 Merge branch 'develop' 2023-11-10 02:57:42 -08:00
Blaise Tine
c1e168fdbe Vortex 2.0 changes:
+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes

minor update

minor update

minor update

minor update

minor update

minor update

cleanup

cleanup

cache bindings and memory perf refactory

minor update

minor update

hw unit tests fixes

minor update

minor update

minor update

minor update

minor update

minor udpate

minor update

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor update

minor update

minor update

minor update

minor update

minor update

minor updates

minor updates

minor updates

minor updates

minor update

minor update
2023-11-10 02:47:05 -08:00
Blaise Tine
6e93787e59 minor update 2023-11-06 00:16:24 -08:00
Blaise Tine
e0becb1599 minor update 2023-11-05 20:03:31 -08:00
Blaise Tine
d13c5f2986 hw unit tests fixes 2023-11-05 18:51:31 -08:00
Blaise Tine
1fd5a95f5a minor update 2023-11-03 18:04:05 -04:00
Blaise Tine
9f1f1ecaa3 minor update 2023-11-03 08:36:28 -04:00
Blaise Tine
c9e6518e05 cache bindings and memory perf refactory 2023-11-03 08:18:18 -04:00
Blaise Tine
69f9ae778d cleanup 2023-11-03 08:12:03 -04:00
Blaise Tine
970cbf066a cleanup 2023-11-03 08:09:59 -04:00
Blaise Tine
1c100c4cf5 minor update 2023-10-22 23:31:58 -07:00
Blaise Tine
cb7d6b964c minor update 2023-10-22 02:25:34 -07:00
Blaise Tine
8cf833b7eb minor update 2023-10-21 19:12:07 -07:00
Blaise Tine
8fe373891f minor update 2023-10-21 17:55:29 -07:00
Blaise Tine
3cacb4f80f minor update 2023-10-20 02:21:20 -07:00
Blaise Tine
65ca0fff3a minor update 2023-10-20 00:48:05 -07:00
Blaise Tine
d47cccc157 Vortex 2.0 changes:
+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes
2023-10-19 20:51:22 -07:00
Blaise Tine
d69a64c32c minor update 2023-05-16 04:59:01 -04:00
Blaise Tine
b9cda8fca7 minor update 2023-05-15 20:19:14 -04:00
Blaise Tine
1136c664f1 minor update 2023-05-15 19:51:20 -04:00
Blaise Tine
1243848963 minor update 2023-05-15 19:13:45 -04:00
Blaise Tine
dce5e79f65 toolchain update 2023-05-15 18:53:24 -04:00
Nicholas Ade
afa9e4003c adding mul and divide to bfloat 2023-04-13 04:20:23 -04:00
Nicholas Ade
1b6d9bd3a5 Making the bfloat files 2023-04-12 15:01:09 -04:00
tinebp
88ed687557
Update .travis.yml 2022-09-30 04:19:53 -04:00
tinebp
ebff870d54
Update .travis.yml 2022-09-30 03:09:35 -04:00
tinebp
d802defd6c
Update .travis.yml 2022-09-30 01:44:54 -04:00
tinebp
ff4c24657e
Update README.md 2022-09-29 21:05:35 -04:00
Blaise Tine
e1b666cb93 minor update 2022-07-14 08:55:09 -04:00
Blaise Tine
da834a28df adding support for TLS global variables 2022-07-14 06:03:02 -04:00
Blaise Tine
77002dd06a minor updates 2022-02-05 20:52:23 -05:00
Blaise Tine
76481bc794 minor update 2022-02-05 20:41:44 -05:00
Blaise Tine
5a5f1ad3fe minor update 2022-02-05 18:03:38 -05:00
Blaise Tine
2277e3c878 minor update 2022-02-05 17:59:58 -05:00
Blaise Tine
1bd25acb0b cmov 2022-02-05 17:58:12 -05:00
Blaise Tine
d297351211 simx64 bug fix 2022-02-05 17:13:16 -05:00
Blaise Tine
2fd93e1d89 Merge branch 'staging' of https://github.com/vortexgpgpu/vortex 2022-02-05 16:12:52 -05:00
tinebp
a5ab68d9df
Merge pull request #42 from SantoshSrivatsan24/staging
Staging
2022-02-05 14:55:49 -05:00
Santosh Srivatsan
1b5b7a3cba Modified travis.yml 2022-02-05 14:45:21 -05:00
Santosh Srivatsan
6dd6a88c12 Modified regression64.sh 2022-02-05 14:37:05 -05:00
Santosh Srivatsan
09833fdfb1 Fixed merge conflict 2022-02-05 14:34:36 -05:00
tinebp
eacdb63454
Merge pull request #41 from SantoshSrivatsan24/staging
Staging
2022-02-05 14:25:17 -05:00
Santosh Srivatsan
1c781c78c0 Minor bug fix 2022-02-05 14:17:56 -05:00
Santosh Srivatsan
b7e5a83ba3 Merged branch xlen-parameterization into staging 2022-02-05 13:47:42 -05:00
Blaise Tine
3179541efe minor update 2022-02-05 12:24:16 -05:00
Blaise Tine
bda77760c8 addition bug fixes 2022-02-05 09:14:35 -05:00
Blaise Tine
140124b423 additional bug fixes 2022-02-05 07:42:50 -05:00
Blaise Tine
703d3faf27 minor bug fixes 2022-02-05 06:37:54 -05:00
Blaise Tine
a2342cfd82 toolchain install bug fix 2022-02-05 05:19:53 -05:00
Blaise Tine
5fbace9fa0 fixed several bugs and refactor memory access 2022-02-04 17:50:19 -05:00
Blaise Tine
cf2a0a5f39 code refactoring 2022-02-04 00:07:24 -05:00
Santosh Srivatsan
212ee21b54 Updated excluded 32-bit tests 2022-02-03 22:33:47 -05:00
Santosh Srivatsan
71e8a30186 Added vx_link32.ld 2022-02-03 22:31:47 -05:00
Santosh Srivatsan
8a525039af Added 64 bit regression script 2022-02-03 15:20:15 -05:00
Santosh Srivatsan
836c777680 XLEN parameterization for simx 2022-02-03 15:19:31 -05:00
Blaise Tine
a3472da181 Merge branch 'master' of https://github.com/vortexgpgpu/vortex 2022-02-02 01:24:00 -05:00
Blaise Tine
a06812f93f minor updates 2022-02-01 22:51:33 -05:00
Santosh Srivatsan
54dd2cfe1d Added xlen parameterization to types.h instead of xlen.h 2022-02-01 14:02:46 -05:00
Santosh Srivatsan
01d183c6a9 Removed xlen.h 2022-02-01 13:59:39 -05:00
Santosh Srivatsan
3eb2b71955 removed traces of xlen. Overloaded sext 2022-02-01 13:54:51 -05:00
Santosh Srivatsan
b23a9c76e7 Changed run-simx-32imf to run-simx to comply with the rest of the code 2022-02-01 13:26:41 -05:00
Santosh Srivatsan
0ce51df108 Removed simx64 comments 2022-02-01 13:25:26 -05:00
Blaise Tine
d48f1c1c5f minor updates 2022-02-01 06:53:31 -05:00
Santosh Srivatsan
a73f656d06 Minor bug fixes 2022-01-31 17:01:14 -05:00
Santosh Srivatsan
4cf596338d Minor bug fixes 2022-01-31 15:53:49 -05:00
Blaise Tine
e3e2609f7e adding unit test for vx_malloc 2022-01-30 05:57:18 -05:00
Blaise Tine
3750c672a7 Makefiles update 2022-01-30 00:26:55 -05:00
Blaise Tine
f7887d8720 refactoring device memory allocation and cleanup 2022-01-28 21:57:16 -05:00
Santosh Srivatsan
7e3a2fdb0f Modifications to allow 64-bit riscv tests to run on travis CI 2022-01-27 15:55:19 -05:00
Santosh Srivatsan
7aa93a735d Added FLEN parameterization for RV32/64 F and D instructions 2022-01-24 15:42:15 -05:00
Santosh Srivatsan
ad92c09f5b Changed all instances of DWord to XWord and DWordI to XWordI. Added XLEN parameterization to the simx Makefile 2022-01-22 13:47:44 -05:00
Santosh Srivatsan
91c22a2592 Fixed some riscv-tests 2022-01-22 12:54:10 -05:00
tinebp
d3c65edcf5
Update README.md 2022-01-21 13:18:19 -05:00
Santosh Srivatsan
d762d401cd Added 64-bit linker script 2022-01-11 17:22:16 -05:00
Blaise Tine
29df0da8b5 minor warning fixes 2022-01-10 20:33:37 -05:00
Santosh Srivatsan
d7e2a6b3b1 Minor update 2021-12-18 16:27:29 -05:00
Santosh Srivatsan
a9e3104ce1 Removed ramulator log from tests/riscv/isa 2021-12-15 17:30:12 -05:00
Santosh Srivatsan
f93303bac7 Minor update 2021-12-15 17:21:38 -05:00
Santosh Srivatsan
71acf4eadb Changed instruction size from wsize() * 4 to wsize() * 8 2021-12-13 20:42:44 -05:00
Santosh Srivatsan
d8796efd89 Minor update 2021-12-13 20:39:40 -05:00
Santosh Srivatsan
b1e82223ee Renamed rv_f* functions to rvf*_s to follow the naming convention between single and double precision floating point 2021-12-13 20:37:29 -05:00
Santosh Srivatsan
039f5eb733 Moved 64-bit riscv-tests to tests/riscv/isa from tests/riscv/isa64 2021-12-13 20:21:51 -05:00
Santosh Srivatsan
d14e05e748 Removed all instances of my username \'ssrivatsan8\' and un-did the changes to vx_start.S 2021-12-13 20:01:11 -05:00
Santosh Srivatsan
76eb79d7fa Removed pipeline.cpp 2021-12-13 19:57:47 -05:00
Santosh Srivatsan
4abfca4cb2 Replaced all instanced of DoubleWord to DWord and DoubleWordI to DWordI 2021-12-13 19:55:02 -05:00
Santosh Srivatsan
e82d5fe48f Removed all comments labelled \'simx64\' 2021-12-13 19:52:13 -05:00
Santosh Srivatsan
67daa6e616 Minor update 2021-12-11 17:58:31 -05:00
Santosh Srivatsan
427146d59b Removed 64-bit runtime and regression tests 2021-12-11 17:20:40 -05:00
Santosh Srivatsan
885bb58ca9 Merged RV64IMFD extensions to master branch 2021-12-11 17:06:29 -05:00
Santosh Srivatsan
3324b32a29 Moved Dockerfile to miscs 2021-12-10 21:54:41 -05:00
Santosh Srivatsan
5edb9098ce Merge branch 'simx64' 2021-12-10 21:48:29 -05:00
Santosh Srivatsan
e7bc436b52 Renamed simX to simx 2021-12-10 16:57:29 -05:00
Santosh Srivatsan
be499d6f38 Renamed simX to simx and added 64-bit riscv-tests 2021-12-10 16:56:12 -05:00
Santosh Srivatsan
b8b3405efe Merge branch 'vortexgpgpu-master'
Merged updated codebase with master branch.
2021-12-10 16:41:43 -05:00
Santosh Srivatsan
0e38c39b62 Merge branch 'master' of https://github.com/vortexgpgpu/vortex into vortexgpgpu-master 2021-12-10 16:38:00 -05:00
Santosh Raghav Srivatsan
bde789b320 Added support for RV32D and RV64D instructions 2021-12-10 16:30:24 -05:00
Blaise Tine
d7737542e4 cache uuid support 2021-12-09 20:43:22 -05:00
Blaise Tine
0e2de4f13a prefetch test fixes 2021-12-09 04:54:10 -05:00
Blaise Tine
fb6106267c Merge branch 'master' of https://github.com/vortexgpgpu/vortex 2021-12-09 00:00:28 -05:00
Blaise Tine
5825b7c15a dram simulator fix 2021-12-07 22:44:06 -05:00
Santosh Raghav Srivatsan
e6eda67d0c Modified RV32F instructions to support 64-bit register file and added RV64F ISA extension 2021-12-06 18:55:13 -05:00
Blaise Tine
a9ec1c08a7 minor update 2021-12-06 15:44:25 -05:00
Blaise Tine
9811740ead minor update 2021-12-06 14:34:20 -05:00
Blaise Tine
71ce58500a minor update 2021-12-06 14:09:31 -05:00
Blaise Tine
30d9d3e956 minor update 2021-12-06 13:17:51 -05:00
Blaise Tine
b741807f8c using ramulator dram simulator 2021-12-06 01:22:45 -05:00
Santosh Raghav Srivatsan
30a0d34151 Fixed Makefile 2021-12-05 15:55:43 -05:00
Blaise Tine
59232642c4 Merge branch 'master' of https://github.gatech.edu/casl/Vortex 2021-12-02 10:34:11 -08:00
Blaise Tine
38f166f090 texture unit hardware optimizations 2021-12-02 10:22:21 -08:00
Santosh Raghav Srivatsan
3784da0d2f riscv-tests work on simx 2021-12-01 19:41:16 -05:00
Blaise Tine
189cec3ca2 minor update 2021-12-01 10:36:50 -05:00
Santosh Raghav Srivatsan
f0dc04ad04 Added tests to commit. 64 bit simx still not working 2021-12-01 02:44:14 -05:00
Blaise Tine
092ff42ab4 simx multicore fix 2021-12-01 00:12:16 -05:00
Blaise Tine
4477cbeed1 blackbox caching fix 2021-11-30 15:36:59 -05:00
Blaise Tine
7c4b3cab29 adding cocogfx 2021-11-30 14:26:54 -05:00
Blaise Tine
d4cb3b8410 removed cocogfx 2021-11-30 14:24:34 -05:00
Blaise Tine
d4addc65ab minor update 2021-11-30 08:55:40 -05:00
Blaise Tine
2a7a4df342 simx directory name fix 2021-11-30 07:17:58 -05:00
Blaise Tine
41d7e6c63a cummulative fixes, RTL uuid trace, texture unit fixes, simx timing fixes 2021-11-30 07:08:15 -05:00
Santosh Raghav Srivatsan
28ab94e925 Added isa tests 2021-11-27 12:37:29 -05:00
Santosh Raghav Srivatsan
a48a78088c Added 64 bit basic test 2021-11-27 12:36:26 -05:00
Santosh Raghav Srivatsan
64d47f3637 Added support for RV64I instructions 2021-11-27 12:33:30 -05:00
Blaise Tine
b995843a5b cocogfx fixes and refactoring 2021-11-25 13:58:09 -05:00
Blaise Tine
a671e1a05d moving submodules into third_party folder 2021-11-24 18:10:00 -05:00
Blaise Tine
d25fa21a59 adding cocogfx submodule 2021-11-24 18:03:09 -05:00
Blaise Tine
18762dffce fixes: texture unit mem access sometimes going to smem, bilinear texture filtering; new: cache req_id, 2021-11-24 00:00:17 -05:00
Blaise Tine
1501360f4b minor update 2021-11-14 09:06:13 -05:00
Blaise Tine
27a65fdee7 driver refactoring 2021-11-14 09:05:15 -05:00
Blaise Tine
808bddb586 simx timing simulation refactoring 2021-11-14 08:52:34 -05:00
Blaise Tine
9656779d48 minor update 2021-11-14 04:45:06 -05:00
Blaise Tine
bd70afa688 cache multi-porting fix - ensure per-bank uniform rw 2021-11-14 04:44:25 -05:00
Blaise Tine
c2721fd545 SimX timing simulation 2021-11-13 01:41:12 -05:00
Santosh Raghav Srivatsan
d1892bd6ec Added support for a few RV64I instructions 2021-11-11 13:35:14 -05:00
Santosh Raghav Srivatsan
9cd8dec397 Extended register file to 64 bits 2021-11-08 17:54:16 -05:00
Santosh Raghav Srivatsan
8a550b625c Merge branch 'master' of https://github.com/SantoshSrivatsan24/vortex 2021-11-08 16:35:32 -05:00
Santosh Raghav Srivatsan
ad633aa639 Added prefetch test to commit 2021-11-08 16:32:52 -05:00
tinebp
d64bc880b2
Merge pull request #26 from SantoshSrivatsan24/assignment5
Added tests for prefetch
2021-10-19 18:56:38 -04:00
tinebp
007b10e79c
Merge branch 'master' into assignment5 2021-10-19 18:54:09 -04:00
Santosh Raghav Srivatsan
620ec78cc0 Updated Makefile to include tests for software prefetch 2021-10-19 18:47:37 -04:00
Santosh Raghav Srivatsan
d219e29f0c Added tests for prefetch 2021-10-19 17:56:58 -04:00
Blaise Tine
009e897cab minor update 2021-10-19 17:12:40 -04:00
Blaise Tine
956f3d1880 docs update 2021-10-19 16:32:31 -04:00
Blaise Tine
fe862f64b1 dispatch refactoring 2021-10-19 15:16:00 -04:00
Blaise Tine
5423958366 docs update 2021-10-19 12:56:36 -04:00
Blaise Tine
e248f744d5 Merge branch 'master' of https://github.com/vortexgpgpu/vortex 2021-10-19 03:07:13 -04:00
Blaise Tine
67a76155b1 minor update 2021-10-19 01:46:36 -04:00
Blaise Tine
6edf38548f text_unit merge fixes 2021-10-19 00:16:22 -04:00
Blaise Tine
0b23d8e935 minor update 2021-10-18 14:32:39 -04:00
Hyesoon Kim
66e253ab95
Merge pull request #25 from SantoshSrivatsan24/assignment5
Solution for Vortex tutorial assignment 5
2021-10-18 08:17:54 -04:00
Blaise Tine
456f1df332 sort test fix. 2021-10-18 00:41:11 -04:00
Blaise Tine
0d62129d32 merge fixes 2021-10-17 18:33:02 -07:00
Blaise Tine
bf72800676 debug tracing refactoring 2021-10-17 13:42:16 -07:00
Blaise Tine
af6d9e7a8b fixed build warnings 2021-10-17 12:01:31 -07:00
Blaise Tine
5d90439a51 travis update 2021-10-17 11:25:57 -07:00
Blaise Tine
b529f538b8 Makefile updates 2021-10-17 10:52:07 -07:00
RobinHan
73d249fc56 add docs for executing OpenCL on Vortex 2021-10-16 11:37:10 -04:00
Blaise Tine
78345d8bdf fixed warning messages 2021-10-15 21:30:47 -07:00
Blaise Tine
58a2140b92 merge update 2021-10-15 19:58:13 -07:00
Blaise Tine
e380ded5e1 Merge branch 'master' into graphics 2021-10-15 19:32:11 -07:00
Santosh Raghav Srivatsan
dd12d3f848 vortex tutorial assignment 5 solution 2021-10-15 18:25:54 -04:00
Blaise Tine
668cfb5da4 text_addr optimization 2021-10-14 13:00:17 -07:00
Blaise Tine
e2b5799a01 minor updates 2021-10-13 15:55:35 -04:00
Blaise Tine
1cd833d2c4 minor fixes 2021-10-11 19:02:13 -07:00
Blaise Tine
4a4498cd53 minor update 2021-10-11 18:32:17 -04:00
Blaise Tine
549629440d minor update 2021-10-11 17:11:36 -04:00
Blaise Tine
e324c9a90a opencl kernels update to use Vortex llvm compiler 2021-10-11 17:11:07 -04:00
Blaise Tine
d94dc37cfe ipdom bug fix 2021-10-11 16:25:54 -04:00
Blaise Tine
7102ca5394 minor update 2021-10-10 20:00:56 -04:00
Blaise Tine
48b2238a1f adding rvfloats library 2021-10-10 13:26:11 -07:00
Blaise Tine
b8682f56ac softfloat library integration 2021-10-10 13:20:50 -07:00
Blaise Tine
28e26f3130 minor update 2021-10-09 13:19:46 -07:00
Blaise Tine
ca1d97a3c2 test sources refactoring 2021-10-09 10:51:43 -04:00
Blaise Tine
54bddeee9c simulation framework refactoring 2021-10-09 10:20:42 -04:00
Blaise Tine
51673665b5 simX updates 2021-10-07 13:59:26 -04:00
Blaise Tine
cfed87f416 runtime update 2021-10-07 13:58:21 -04:00
Blaise Tine
efb70b21df dpi bug fix 2021-10-07 13:35:35 -04:00
Blaise Tine
5a45dcdc02 minor update 2021-10-07 10:05:32 -04:00
Blaise Tine
764b3e194c predication testing 2021-10-03 02:51:48 -04:00
Blaise Tine
1e6ad235c9 Merge branch 'master' of https://github.gatech.edu/casl/Vortex 2021-09-30 09:16:45 -04:00
Blaise Tine
29aba92bf1 minor update 2021-09-30 06:14:05 -07:00
Blaise Tine
d3b9c43dd8 Merge branch 'master' of https://github.gatech.edu/casl/Vortex 2021-09-29 23:59:55 -04:00
Blaise Tine
fe14a9933d device caps fix 2021-09-29 09:32:47 -07:00
Blaise Tine
8e82ee00a0 minor update 2021-09-29 09:32:21 -07:00
Blaise Tine
bbcb50ba81 minor update 2021-09-29 04:49:36 -04:00
Blaise Tine
04249c3ee9 code refactoring for Vivado compatibility 2021-09-29 04:48:53 -04:00
Blaise Tine
a45261b530 code refactoring for Vivado compatibility 2021-09-29 03:24:17 -04:00
Blaise Tine
18c1dc2f0e fixed interface modports 2021-09-28 02:42:04 -07:00
Blaise Tine
eb5e21d803 Merge branch 'master' of https://github.gatech.edu/casl/Vortex 2021-09-27 09:24:26 -04:00
Blaise Tine
132260d84c minor update 2021-09-27 09:23:58 -04:00
Blaise Tine
d84241aad0 Merge branch 'master' of https://github.gatech.edu/casl/Vortex 2021-09-27 09:22:13 -04:00
Blaise Tine
9f34b2944c code refactoring for Vivado, sv2v, and yosys compatibility 2021-09-27 08:55:10 -04:00
Blaise Tine
b8cfc0525e minor update 2021-09-23 03:56:36 -04:00
Blaise Tine
9b04f3d9d6 Updated README and synthesis scripts 2021-09-22 07:50:47 -07:00
Blaise Tine
feca2db24e critical path optimizations 2021-09-15 04:50:45 -07:00
Blaise Tine
73d102afed minor fix 2021-09-15 04:49:24 -07:00
Blaise Tine
142cbb8f3b minor update 2021-09-14 09:17:08 -04:00
Blaise Tine
83d80c061f Merge branch 'master' of https://github.gatech.edu/casl/Vortex 2021-09-14 05:11:28 -04:00
Blaise Tine
4e8293c3e3 cache bank pipeline optimization 2021-09-14 02:09:35 -07:00
Blaise Tine
3d7baf1640 block ram read enable fix 2021-09-14 01:45:01 -07:00
Blaise Tine
6652e2f0e9 minor update 2021-09-11 18:16:08 -07:00
Blaise Tine
7a0b4d7895 Merge branch 'master' of https://github.gatech.edu/casl/Vortex 2021-09-11 19:53:00 -04:00
Blaise Tine
12704f9929 minor update 2021-09-11 16:47:29 -07:00
Blaise Tine
6f09fb8ba5 Merge branch 'master' of https://github.gatech.edu/casl/Vortex 2021-09-11 18:26:08 -04:00
Blaise Tine
182811697e minor update 2021-09-11 15:14:55 -07:00
Blaise Tine
95287980af minor update 2021-09-11 15:14:17 -07:00
Blaise Tine
f98e26e0f2 AXI interface update 2021-09-11 15:12:36 -07:00
Blaise Tine
0dfdf6cd4d Merge branch 'master' of https://github.gatech.edu/casl/Vortex 2021-09-10 06:03:32 -04:00
Blaise Tine
5192846c72 minor updates 2021-09-10 02:57:05 -07:00
Blaise Tine
18172fa611 AXI memory bus support 2021-09-10 01:36:01 -07:00
Blaise Tine
ca46b0a0be OUTPUT_REG => OUT_REG renaming 2021-09-09 03:05:38 -07:00
Blaise Tine
170c5d0c8a regression script update 2021-09-08 23:22:50 -04:00
Blaise Tine
a46c32ed4b Adding Vortex Yosys build support 2021-09-08 23:04:33 -04:00
Blaise Tine
a25076b9c1 regression fix. 2021-09-08 12:02:22 -07:00
Blaise Tine
81bee3ac45 Merge branch 'master' of https://github.gatech.edu/casl/Vortex 2021-09-08 02:27:53 -07:00
Blaise Tine
3d052e9428 fmax optimization bundle (250 MHz). 2021-09-08 02:26:39 -07:00
Blaise Tine
05bc970900 minor update 2021-09-07 23:57:14 -07:00
Blaise Tine
aeeb3ca616 ALU unit critical path optimization 2021-09-07 23:54:10 -07:00
Blaise Tine
c06efbf480 minor update 2021-09-07 23:47:41 -07:00
Blaise Tine
134cbcfc5a optimize critical path inside cache bank 2021-09-07 23:44:51 -07:00
Blaise Tine
0d91f8771e Workaround fix for Verilator bug with array indexing 2021-09-07 23:28:54 -07:00
Blaise Tine
c8d705158c lsu unit fmax optimization 2021-09-06 23:47:09 -07:00
Blaise Tine
105a24d65e shared memory optimization 2021-09-06 23:46:36 -07:00
Blaise Tine
d42baf34ff minor update 2021-09-06 23:44:31 -07:00
Blaise Tine
af1cecae07 stream arbiter update 2021-09-06 23:38:20 -07:00
Blaise Tine
3e014c8285 fmax optimizations bundles 2021-09-06 01:36:57 -07:00
Blaise Tine
b52ace5142 area optimization bundle 2021-09-05 23:35:44 -07:00
Blaise Tine
fe5112b6c1 minor updates 2021-09-05 23:05:21 -07:00
Blaise Tine
377466ed1c fpu area optimization 2021-09-05 21:01:52 -07:00
Blaise Tine
33a83cc733 adding fpu_core synthesis build 2021-09-05 20:27:55 -07:00
Blaise Tine
d3c3d551ff Merge branch 'master' of https://github.gatech.edu/casl/Vortex 2021-08-31 03:36:37 -04:00
Blaise Tine
c162ce526f adding predicate instruction 2021-08-31 03:23:59 -04:00
Blaise Tine
53c8cddccf LKG build - minor update 2021-08-30 10:25:52 -07:00
Blaise Tine
a801a16062 instruction decode refactoring fixing naming collision 2021-08-29 20:07:34 -07:00
Blaise Tine
90b50277d0 cache multi-porting fixes + optimization 2021-08-29 18:33:49 -07:00
Blaise Tine
e26cfab04d bank area optimization 2021-08-29 02:25:55 -07:00
Blaise Tine
5392395fba minor update 2021-08-28 23:13:50 -07:00
Blaise Tine
6674e8c44a cache bank area optimization + multi-porting fix for l2/l3 caches 2021-08-28 21:34:06 -07:00
Blaise Tine
f3ba27b138 GPRs optimization - disabling BRAM's read-during-write bypass block. 2021-08-28 15:34:36 -07:00
Blaise Tine
12b8b4af24 minor updates 2021-08-28 15:21:40 -07:00
Blaise Tine
28eb3cfdb2 minor update 2021-08-26 14:49:57 -07:00
Blaise Tine
26e94dde44 cache area optimization by disabling BRAM read-during-write bypassing for tag/data stores 2021-08-26 12:27:38 -07:00
Blaise Tine
74a45e2772 stream arbiter optimization (using indexing instead of onehot mux) 2021-08-26 09:52:13 -07:00
Blaise Tine
d3d82de29e minor update 2021-08-26 09:49:57 -07:00
Blaise Tine
d91d56d126 block ram refactoring (multi-porting supporting and simulation support) 2021-08-26 08:19:44 -07:00
Blaise Tine
06a6857508 using lzc instead of priority encoder 2021-08-26 08:05:54 -07:00
Blaise Tine
e494860f38 using lzc instead of priority_encoder 2021-08-26 07:29:47 -07:00
Blaise Tine
2a27bfbfd5 LKG Build (reset network update -fmax=236 mhz 4c) 2021-08-23 01:59:22 -07:00
Blaise Tine
6caf674163 minor update 2021-08-16 04:47:08 -07:00
Blaise Tine
97b2709132 minor update 2021-08-16 04:46:53 -07:00
Blaise Tine
7aea9357f3 minor update 2021-08-16 04:43:59 -07:00
Blaise Tine
b00dea6d05 minor update 2021-08-15 14:27:04 -04:00
Blaise Tine
1472375fb5 updating opencl kernel after wspawn barrier fix 2021-08-15 08:19:10 -04:00
Blaise Tine
bc3fa0bb23 fixed wspawn's warp synchronization 2021-08-15 05:12:27 -07:00
Blaise Tine
a60bfc5e01 extending tracing feature for advanced debugging 2021-08-15 05:10:46 -07:00
Blaise Tine
640c98a4e8 Reverting Verilator versionb support to v4.200 2021-08-14 00:45:56 -07:00
Blaise Tine
221417b8b4 minor update 2021-08-13 20:23:20 -07:00
Blaise Tine
eef3dda81d fixed Verilator error 2021-08-13 19:33:12 -07:00
Blaise Tine
646371f9e9 bram block optimization 2021-08-13 19:31:55 -07:00
Blaise Tine
4336dcb2a8 minor scope analyzer fix 2021-08-13 19:23:57 -07:00
Blaise Tine
36d95fd892 automatic perf dump fix 2021-08-13 19:23:13 -07:00
Blaise Tine
3c43308e71 Makfile fixes for latest version of Verilator 2021-08-13 04:35:40 -07:00
Blaise Tine
f12be56d7c fixed Verilator warnings 2021-08-13 05:52:43 -04:00
Blaise Tine
4976a8c4f2 enabling automatic device cleanup for bad applications not releasing the device on exit 2021-08-12 20:06:51 -07:00
Blaise Tine
c2b3aaa7d1 enabling delayed tracing 2021-08-12 20:05:43 -07:00
Blaise Tine
7961cf7474 Disabling tracing on library 2021-08-12 01:55:52 -07:00
Blaise Tine
7202bdf977 minor update 2021-08-12 01:51:46 -07:00
Blaise Tine
9098495153 MSHR Redesign: removed fifo replay constraints and overheads 2021-08-12 01:49:32 -07:00
Blaise Tine
ac454eee79 warp scheduler optimization 2021-08-12 01:34:35 -07:00
Blaise Tine
cc259f60f6 minor update 2021-08-11 15:39:21 -07:00
Blaise Tine
90fa9eee7d minor update 2021-08-08 18:35:05 -07:00
Blaise Tine
4e4aa33a50 minor update 2021-08-08 03:09:28 -07:00
Blaise Tine
0debdd3fe7 minor update 2021-08-08 02:59:30 -07:00
Blaise Tine
5f487c899b Merge branch 'master' of https://github.gatech.edu/casl/Vortex 2021-08-08 01:27:35 -07:00
Blaise Tine
b1eef0fb7c warp scheduler optimization 2021-08-07 23:45:01 -07:00
Blaise Tine
5b8e58e15e warp scheduler optimization 2021-08-07 14:44:37 -07:00
Blaise Tine
b5af2065ee fetch optimization 2021-08-07 12:57:14 -07:00
Blaise Tine
7ae5da914e minor update 2021-08-07 12:56:26 -07:00
Blaise Tine
9935768280 vx_wsapwn.c fix 2021-08-07 15:13:07 -04:00
Blaise Tine
dd984ceb45 minor update 2021-08-07 01:10:02 -04:00
Blaise Tine
bf8f249754 runtime test update 2021-08-06 18:05:04 -07:00
Blaise Tine
d7948a1ce6 software updaet for new thread mask design 2021-08-05 22:39:39 -04:00
Blaise Tine
e4d9fd8a00 thread mask redesign 2021-08-05 17:32:58 -07:00
Blaise Tine
e7aa93614b minor update 2021-08-05 11:49:05 -07:00
Blaise Tine
43ad188ccb texture unit critical path optimization 2021-08-05 02:47:26 -07:00
Blaise Tine
7b8fe11e6a unused variables refactoring 2021-08-05 01:46:26 -07:00
Blaise Tine
deed327890 using png images in tex_unit test 2021-08-04 01:07:21 -07:00
Blaise Tine
b3aaac4903 minor update 2021-08-03 12:22:15 -07:00
Blaise Tine
c35434bd8f minor update 2021-08-03 11:01:52 -07:00
Blaise Tine
bd433b55b0 minor update 2021-08-03 01:13:18 -07:00
Blaise Tine
7b921387bc Merge branch 'master' into graphics 2021-08-02 23:57:53 -07:00
Blaise Tine
07f6667b66 minor update 2021-08-02 22:29:05 -07:00
Blaise Tine
16f1f24a62 minor update 2021-08-02 20:35:28 -07:00
Blaise Tine
80f62e8a41 instruction buffer optimization 2021-08-02 19:54:28 -07:00
Blaise Tine
26f553b75b minor update 2021-08-02 16:37:12 -07:00
Blaise Tine
591522f15a minor update 2021-08-02 16:30:41 -07:00
Blaise Tine
f52f1b1df0 removing test kernel binaries 2021-08-02 16:30:26 -07:00
Blaise Tine
69940feac0 registering execute units inputs 2021-08-02 16:08:11 -07:00
Blaise Tine
74227f8cdf optimize startup routine 2021-08-02 16:06:54 -07:00
Blaise Tine
91d4419fae new regression tests 2021-08-02 16:05:33 -07:00
Blaise Tine
6525dff158 fixed no shared memory bug, fixed cache debug log 2021-08-02 15:59:33 -07:00
Blaise Tine
dc322894cd bug fixes - lkg build 2021-08-01 19:21:37 -07:00
Blaise Tine
fd0d908a68 tex_unit critical path refactoring 2021-08-01 00:54:47 -07:00
Blaise Tine
3b7da61245 minor update 2021-07-31 03:30:35 -07:00
Blaise Tine
ee46bc8a48 minor update 2021-07-31 03:02:25 -07:00
Blaise Tine
94ad34768b Merge branch 'graphics' of https://github.com/vortexgpgpu/vortex-dev into graphics
Conflicts:
	hw/rtl/VX_core.v
	hw/rtl/VX_csr_data.v
	hw/rtl/VX_csr_unit.v
	hw/rtl/VX_decode.v
	hw/rtl/VX_define.vh
	hw/rtl/VX_execute.v
	hw/rtl/VX_gpu_unit.v
	hw/rtl/VX_instr_demux.v
	hw/rtl/VX_lsu_unit.v
	hw/rtl/VX_mem_unit.v
	hw/rtl/VX_pipeline.v
	hw/rtl/VX_platform.vh
	hw/rtl/VX_smem_arb.v
	hw/rtl/VX_writeback.v
	hw/rtl/cache/VX_bank.v
	hw/rtl/interfaces/VX_cache_mem_req_if.v
	hw/rtl/interfaces/VX_cache_mem_rsp_if.v
	hw/rtl/interfaces/VX_dcache_core_rsp_if.v
	hw/rtl/interfaces/VX_dcache_req_if.v
	hw/rtl/interfaces/VX_icache_core_rsp_if.v
	hw/rtl/tex_unit/VX_tex_memory.v
	hw/rtl/tex_unit/VX_tex_unit.v
	hw/simulate/Makefile
	hw/syn/quartus/pipeline/Makefile
	hw/unit_tests/tex_unit/tex_sampler/main.cpp
	hw/unit_tests/tex_unit/tex_sampler/vl_simulator.h
	runtime/include/vx_intrinsics.h
2021-07-30 21:12:55 -07:00
Blaise Tine
12f6930486 minor update 2021-07-30 21:05:32 -07:00
Blaise Tine
bb1ceffadd rebase master update 2021-07-30 21:03:14 -07:00
Blaise Tine
79fd92a1b4 minor update 2021-07-30 17:43:15 -07:00
Blaise Tine
160ff94a22 minor update 2021-07-30 16:01:22 -07:00
Blaise Tine
3d19588e57 bus arbiters refactoring 2021-07-30 16:00:09 -07:00
Blaise Tine
b6596494ff minor update 2021-07-30 15:50:04 -07:00
Blaise Tine
8b9d5e772e elete junk files 2021-07-28 12:38:08 -07:00
Blaise Tine
8030750b07 delete junk files 2021-07-28 12:02:59 -07:00
Blaise Tine
1a65f89575 minor update 2021-06-13 13:12:54 -07:00
Blaise Tine
e41a16cb58 tex_unit refactoring 2021-05-28 13:29:47 -07:00
Blaise Tine
1326538e5b adding larger tex_demo images 2021-05-27 20:08:57 -07:00
Blaise Tine
30a39afbf6 master merge fixes 2021-05-27 16:50:03 -07:00
Blaise Tine
432d694455 master merge fixes 2021-05-27 14:59:03 -07:00
Blaise Tine
d42171d2ed Merge branch 'master' into graphics 2021-05-26 23:33:06 -07:00
Blaise Tine
f4c52a41fa minor update 2021-04-16 02:45:37 -07:00
Blaise Tine
90a9325d6d minor update 2021-04-15 23:46:33 -07:00
Blaise Tine
0920d1e745 minor update 2021-04-16 02:04:04 -04:00
Blaise Tine
e1e4c8bc03 Merge branch 'graphics' of https://github.com/vortexgpgpu/vortex-dev into graphics
Conflicts:
	driver/tests/tex_demo/main.cpp
2021-04-15 22:37:29 -07:00
Blaise Tine
a9f91b7acd text sw emulation 2021-04-15 22:34:04 -07:00
Blaise Tine
de72a23fc6 bug fixes 2021-04-16 01:26:47 -04:00
Blaise Tine
0541433028 Merge branch 'graphics' of https://github.com/vortexgpgpu/vortex-dev into graphics 2021-04-15 23:56:50 -04:00
Blaise Tine
97bf6d8046 Merge branch 'master' of https://github.com/vortexgpgpu/vortex-dev into graphics 2021-04-15 23:56:40 -04:00
Krishna Yalamarthy
10182ebb7e minor update 2021-04-13 06:46:21 -04:00
Krishna Yalamarthy
951c516615 minor format change ; tex demo copy image added 2021-04-13 06:29:20 -04:00
Blaise Tine
66ff74eb97 minor update 2021-04-05 11:57:46 -07:00
Blaise Tine
0f05efbcf4 minor fix 2021-04-05 04:25:57 -04:00
Blaise Tine
bbd65a19b5 minor update 2021-04-05 04:25:40 -04:00
Blaise Tine
d38d512d42 Merge remote-tracking branch 'origin/master' into graphics 2021-04-04 23:48:31 -07:00
Blaise Tine
0c2dc580d8 Merge remote-tracking branch 'origin/master' into graphics 2021-04-04 22:58:04 -07:00
Blaise Tine
87888a9a93 master merge fixes 2021-04-04 21:12:12 -07:00
Blaise Tine
c23c0fbe4c Merge remote-tracking branch 'origin/master' into graphics 2021-04-04 18:26:05 -07:00
Blaise Tine
eda784da40 minor update 2021-04-04 18:25:26 -07:00
Blaise Tine
4683def6dd timimg fixes 2021-04-04 07:59:43 -07:00
Blaise Tine
03a1a4b9f1 synthesis makefile update 2021-04-04 04:03:22 -07:00
Blaise Tine
3c7754dcf3 minor updates 2021-04-04 03:48:01 -07:00
Blaise Tine
bc091d52c4 fpga build fixes 2021-04-02 23:34:58 -07:00
Blaise Tine
005a86f551 Merge branch 'graphics' of https://github.com/vortexgpgpu/vortex-dev into graphics 2021-04-03 01:09:34 -04:00
Blaise Tine
0c60680e9a minor update 2021-04-03 01:09:00 -04:00
Blaise Tine
62cfeef61d minor update 2021-04-02 14:15:58 -07:00
Blaise Tine
5d1071bc9a minor update 2021-04-02 04:32:32 -04:00
Blaise Tine
f6d210b67a minor update 2021-04-02 04:23:30 -04:00
Blaise Tine
8add5efe66 minor update 2021-04-01 19:42:53 -04:00
Blaise Tine
387fe55610 adding format conversion support 2021-04-01 17:54:55 -04:00
Blaise Tine
ac8104e27e minor update 2021-04-01 05:46:04 -04:00
Blaise Tine
3751c895fc minor update 2021-04-01 05:45:48 -04:00
Blaise Tine
3ec653cf38 minor update 2021-04-01 05:45:24 -04:00
Blaise Tine
d0186344e8 minor update 2021-04-01 05:34:59 -04:00
Blaise Tine
aa53a5a70f minor update 2021-04-01 05:27:09 -04:00
Blaise Tine
ff75cc9874 minor update 2021-04-01 05:20:23 -04:00
Blaise Tine
fff3fb18fb minor update 2021-04-01 03:20:30 -04:00
Blaise Tine
fa2d84831e bilinear fixes + refactoring 2021-03-31 23:30:36 -04:00
Blaise Tine
99ca04ce8c minor update 2021-03-31 18:56:03 -04:00
Blaise Tine
e6cb148ce1 Merge branch 'graphics' of https://github.com/vortexgpgpu/vortex-dev into graphics 2021-03-31 18:36:07 -04:00
Krishna Yalamarthy
a30e9fbcbc minor format fix 2021-03-31 17:45:20 -04:00
Krishna Yalamarthy
81391bc084 pt sampling / bilerp format support 2021-03-31 17:38:54 -04:00
Blaise Tine
90e4a800d5 minor update 2021-03-31 14:28:12 -04:00
Blaise Tine
1332fd1b67 minor update 2021-03-31 14:07:12 -04:00
Blaise Tine
7b2f96bc6d tex_unit update 2021-03-31 05:43:44 -04:00
Blaise Tine
79fcdf7a28 tex_unit update 2021-03-30 07:01:45 -04:00
Blaise Tine
28ee19779c Merge branch 'graphics' of https://github.com/vortexgpgpu/vortex-dev into graphics 2021-03-28 21:21:39 -04:00
Blaise Tine
3f5382a317 minor updates 2021-03-28 21:21:26 -04:00
Blaise Tine
f968dbccd3 minor updates 2021-03-28 18:08:04 -04:00
Blaise Tine
6514a3b782 minor updates 2021-03-28 04:00:20 -04:00
Blaise Tine
0b2f2a27ac minor updates 2021-03-27 21:27:08 -04:00
Blaise Tine
39a8579c27 many fixes 2021-03-27 20:58:12 -04:00
Krishna Yalamarthy
c1e25135fd tex sampler unit test pass - pt sampling, rgba8 2021-03-27 19:35:14 -04:00
Krishna Yalamarthy
02e093d407 partial update sampler unit test 2021-03-26 11:26:20 -04:00
Blaise Tine
2d48fe13c8 a few fixes... 2021-03-25 14:23:33 -04:00
Blaise Tine
994cc25ec4 minor update 2021-03-25 09:50:04 -04:00
Blaise Tine
147f70fa3e minor update 2021-03-25 09:47:37 -04:00
Blaise Tine
eee74a25cd Verilator testbench for unit tests 2021-03-24 09:53:10 -04:00
Blaise Tine
4de75fad31 tex_unit compiler fixes 2021-03-22 21:56:06 -04:00
Krishna Yalamarthy
8f0198149a lerp unused warning fixees 2021-03-22 13:46:11 -04:00
Krishna Yalamarthy
938b66f232 sampler bug fixes 2021-03-22 13:39:30 -04:00
Blaise Tine
79fdde3c0c tex_unit compiler fixes 2021-03-22 12:53:41 -04:00
Blaise Tine
6320cf778c tex_unit compiler fixes 2021-03-22 12:23:46 -04:00
Blaise Tine
79cbea0a13 tex_unit compiler fixes 2021-03-22 12:20:01 -04:00
Blaise Tine
40e3e0105a minor update 2021-03-22 10:48:26 -04:00
Blaise Tine
a6c489b553 tex_unit code refactoring 2021-03-22 10:44:11 -04:00
Blaise Tine
ebea51a8a8 tex_unit code refactoring 2021-03-22 10:42:22 -04:00
Blaise Tine
97041e8e82 tex_unit code refactoring 2021-03-22 10:41:50 -04:00
Blaise Tine
82a9aee417 Merge branch 'graphics' of https://github.com/vortexgpgpu/vortex-dev into graphics 2021-03-22 10:27:58 -04:00
Blaise Tine
f499ce6c95 tex_unit memory partial 2021-03-22 10:25:17 -04:00
Krishna Yalamarthy
c0fdee5787 bilinear sampling 2021-03-21 22:55:24 -04:00
Blaise Tine
0909ad3201 minor update 2021-03-20 21:57:06 -04:00
Blaise Tine
e1b36b78e3 minor update 2021-03-20 21:21:51 -04:00
Blaise Tine
8fa78ef059 minor update 2021-03-20 20:59:11 -04:00
Blaise Tine
96a03a3edf minor update 2021-03-20 19:25:11 -04:00
Blaise Tine
8c0c4e2b6e Merge branch 'graphics' of https://github.com/vortexgpgpu/vortex-dev into graphics 2021-03-20 19:22:11 -04:00
Blaise Tine
5747d12530 minor update 2021-03-20 19:18:22 -04:00
Krishna Yalamarthy
3c8469008f texture format and sampler pt partial 2021-03-20 19:07:11 -04:00
Blaise Tine
7ff5c082bc tex_unit address generation complete 2021-03-20 18:56:34 -04:00
Blaise Tine
1431ef9bc0 texunit tex_wrap 2021-03-20 13:40:42 -04:00
Blaise Tine
20ae993e51 texunit partial update 2021-03-20 10:50:54 -04:00
Blaise Tine
859877a00d tex_unit partial update 2021-03-20 08:40:57 -04:00
Blaise Tine
50f5bdcfe3 remobing VX_config.h from repor 2021-03-18 18:09:13 -04:00
Krishna Yalamarthy
41d7d8e7d5 Point Sampling Tex Unit Update 2021-03-18 16:57:14 -04:00
Blaise Tine
b8022822bb minor update 2021-03-18 14:28:58 -04:00
Blaise Tine
9abccbfda5 minor update 2021-03-18 14:28:27 -04:00
Blaise Tine
124acfbf12 texture unit dcache arbitration 2021-03-18 14:23:53 -04:00
Krishna Yalamarthy
6febdf7399 pt sampling - dcache arb; pt address compute setup 2021-03-17 12:07:25 -04:00
Blaise Tine
2cbc1c4161 adding texture test to tex_demo 2021-03-17 09:52:33 -04:00
Blaise Tine
676a13f30d tex refactoring and bug fixes 2021-03-16 09:25:57 -04:00
Blaise Tine
17424ad554 Merge remote-tracking branch 'origin/master' into graphics 2021-03-15 18:23:28 -04:00
Blaise Tine
fb46c65d25 Merge branch 'graphics' of https://github.com/vortexgpgpu/vortex-dev into graphics 2021-03-15 18:02:02 -04:00
Blaise Tine
6a9a279a32 build fixes and cleanup 2021-03-15 17:59:53 -04:00
Krishna Yalamarthy
72e06ef4fe Tex CSRs write support added 2021-03-15 16:41:29 -04:00
Krishna Yalamarthy
7587876820 Texture Instruction - Fixed Color 2021-03-15 16:41:28 -04:00
Krishna Yalamarthy
a953fe4e1e Tex CSRs write support added 2021-03-13 22:01:25 -05:00
Krishna Yalamarthy
f3f62e9e7b Texture Instruction - Fixed Color 2021-03-12 18:33:04 -05:00
2134 changed files with 2037122 additions and 1119062 deletions

8
.clang-format Normal file
View file

@ -0,0 +1,8 @@
Language: Cpp
BasedOnStyle: LLVM
IndentWidth: 2
TabWidth: 2
ColumnLimit: 0
UseTab: Never
BreakBeforeBraces: Attach
AlwaysBreakTemplateDeclarations: true

View file

@ -1,3 +0,0 @@
ignore:
- "./examples/*"
- "./tests/*"

175
.github/workflows/ci.yml vendored Normal file
View file

@ -0,0 +1,175 @@
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: CI
on: [push, pull_request]
jobs:
setup:
runs-on: ubuntu-22.04
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
submodules: recursive
- name: Cache Toolchain Directory
id: cache-toolchain
uses: actions/cache@v4
with:
path: tools
key: ${{ runner.os }}-toolchain-v0.1
restore-keys: |
${{ runner.os }}-toolchain-
- name: Cache Third Party Directory
id: cache-thirdparty
uses: actions/cache@v4
with:
path: third_party
key: ${{ runner.os }}-thirdparty-v0.1
restore-keys: |
${{ runner.os }}-thirdparty-
- name: Install Dependencies
if: steps.cache-toolchain.outputs.cache-hit != 'true' || steps.cache-thirdparty.outputs.cache-hit != 'true'
run: |
sudo bash ./ci/install_dependencies.sh
- name: Setup Toolchain
if: steps.cache-toolchain.outputs.cache-hit != 'true'
run: |
TOOLDIR=$PWD/tools
mkdir -p build
cd build
../configure --tooldir=$TOOLDIR
ci/toolchain_install.sh --all
- name: Setup Third Party
if: steps.cache-thirdparty.outputs.cache-hit != 'true'
run: |
make -C third_party > /dev/null
build:
runs-on: ubuntu-22.04
needs: setup
strategy:
matrix:
xlen: [32, 64]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install Dependencies
run: |
sudo bash ./ci/install_dependencies.sh
- name: Cache Toolchain Directory
id: cache-toolchain
uses: actions/cache@v4
with:
path: tools
key: ${{ runner.os }}-toolchain-v0.1
restore-keys: |
${{ runner.os }}-toolchain-
- name: Cache Third Party Directory
id: cache-thirdparty
uses: actions/cache@v4
with:
path: third_party
key: ${{ runner.os }}-thirdparty-v0.1
restore-keys: |
${{ runner.os }}-thirdparty-
- name: Run Build
run: |
TOOLDIR=$PWD/tools
mkdir -p build${{ matrix.xlen }}
cd build${{ matrix.xlen }}
../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }}
source ci/toolchain_env.sh
make software -s > /dev/null
make tests -s > /dev/null
- name: Upload Build Artifact
uses: actions/upload-artifact@v4
with:
name: build-${{ matrix.xlen }}
path: build${{ matrix.xlen }}
tests:
runs-on: ubuntu-22.04
needs: build
strategy:
fail-fast: false
matrix:
name: [regression, opencl, cache, config1, config2, debug, scope, stress, synthesis, vm, vector]
xlen: [32, 64]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install Dependencies
run: |
sudo bash ./ci/install_dependencies.sh
- name: Cache Toolchain Directory
id: cache-toolchain
uses: actions/cache@v4
with:
path: tools
key: ${{ runner.os }}-toolchain-v0.1
restore-keys: |
${{ runner.os }}-toolchain-
- name: Cache Third Party Directory
id: cache-thirdparty
uses: actions/cache@v4
with:
path: third_party
key: ${{ runner.os }}-thirdparty-v0.1
restore-keys: |
${{ runner.os }}-thirdparty-
- name: Download Build Artifact
uses: actions/download-artifact@v4
with:
name: build-${{ matrix.xlen }}
path: build${{ matrix.xlen }}
- name: Run tests
run: |
cd build${{ matrix.xlen }}
source ci/toolchain_env.sh
chmod -R +x . # Ensure all files have executable permissions
if [ "${{ matrix.name }}" == "regression" ]; then
./ci/regression.sh --unittest
./ci/regression.sh --isa
./ci/regression.sh --kernel
./ci/regression.sh --regression
else
./ci/regression.sh --${{ matrix.name }}
fi
complete:
runs-on: ubuntu-22.04
needs: tests
steps:
- name: Check Completion
run: echo "All matrix jobs passed"

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
/build*
/.vscode
*.cache
*.code-workspace

12
.gitmodules vendored
View file

@ -1,3 +1,9 @@
[submodule "hw/rtl/fp_cores/fpnew"]
path = hw/rtl/fp_cores/fpnew
url = https://github.com/pulp-platform/fpnew.git
[submodule "third_party/softfloat"]
path = third_party/softfloat
url = https://github.com/ucb-bar/berkeley-softfloat-3.git
[submodule "third_party/ramulator"]
path = third_party/ramulator
url = https://github.com/CMU-SAFARI/ramulator2.git
[submodule "third_party/cvfpu"]
path = third_party/cvfpu
url = https://github.com/openhwgroup/cvfpu.git

View file

@ -1,58 +0,0 @@
language: cpp
dist: bionic
os: linux
compiler: gcc
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- build-essential
- valgrind
- verilator
- yosys
install:
# Set environments
- export RISCV_TOOLCHAIN_PATH=/opt/riscv-gnu-toolchain
- export VERILATOR_ROOT=/opt/verilator
- export PATH=$VERILATOR_ROOT/bin:$PATH
# Install toolchain
- ci/toolchain_install.sh -all
# clone build directory
- make -s
# stages ordering
stages:
- test
jobs:
include:
- stage: test
name: coverage
script: cp -r $PWD ../build1 && cd ../build1 && ./ci/regression.sh -coverage
- stage: test
name: cluster
script: cp -r $PWD ../build2 && cd ../build2 && ./ci/regression.sh -cluster
- stage: test
name: debug
script: cp -r $PWD ../build3 && cd ../build3 && ./ci/regression.sh -debug
- stage: test
name: config
script: cp -r $PWD ../build4 && cd ../build4 && ./ci/regression.sh -config
- stage: test
name: stress
script: cp -r $PWD ../build5 && cd ../build5 && ./ci/regression.sh -stress
- stage: test
name: compiler
script: cp -r $PWD ../build6 && cd ../build6 && ./ci/test_compiler.sh
after_success:
# Gather code coverage
- lcov --directory driver --capture --output-file driver.cov # capture trace
- lcov --directory simx --capture --output-file simx.cov # capture trace
- lcov --list driver.cov # output coverage data for debugging
- lcov --list simx.cov # output coverage data for debugging
# Upload coverage report
- bash <(curl -s https://codecov.io/bash) -f driver.cov
- bash <(curl -s https://codecov.io/bash) -f simx.cov

20
Dockerfile.dev Normal file
View file

@ -0,0 +1,20 @@
FROM ubuntu:20.04
LABEL "Udit Subramanya"="usubramanya3@gatech.edu"
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && \
apt-get install -y build-essential valgrind git wget libpng-dev libboost-all-dev uuid-dev ccache cmake
# Third-Party Repository to Install g++11 on Ubuntu 18.04
RUN apt-get install -y manpages-dev software-properties-common
RUN add-apt-repository -y ppa:ubuntu-toolchain-r/test
RUN apt-get install -y gcc-11 g++-11
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 11
RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 11
# create a directory for mounting the volume
WORKDIR /root/vortex

221
LICENSE
View file

@ -1,24 +1,201 @@
Copyright (c) <2020>, <Georgia Institute of Technology>
All rights reserved.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Georgia Institute of Technology nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View file

@ -1,14 +0,0 @@
all:
$(MAKE) -C hw
$(MAKE) -C driver
$(MAKE) -C runtime
$(MAKE) -C simX
$(MAKE) -C tests
clean:
$(MAKE) -C hw clean
$(MAKE) -C driver clean
$(MAKE) -C simX clean
$(MAKE) -C runtime clean
$(MAKE) -C tests clean

74
Makefile.in Normal file
View file

@ -0,0 +1,74 @@
include config.mk
.PHONY: build software tests
all:
$(MAKE) -C $(VORTEX_HOME)/third_party
$(MAKE) -C hw
$(MAKE) -C sim
$(MAKE) -C kernel
$(MAKE) -C runtime
$(MAKE) -C tests
build:
$(MAKE) -C hw
$(MAKE) -C sim
$(MAKE) -C kernel
$(MAKE) -C runtime
$(MAKE) -C tests
software:
$(MAKE) -C hw
$(MAKE) -C kernel
$(MAKE) -C runtime/stub
tests:
$(MAKE) -C tests
clean-build:
$(MAKE) -C hw clean
$(MAKE) -C sim clean
$(MAKE) -C kernel clean
$(MAKE) -C runtime clean
$(MAKE) -C tests clean
clean: clean-build
$(MAKE) -C $(VORTEX_HOME)/third_party clean
# Install setup
KERNEL_INC_DST = $(INSTALLDIR)/kernel/include
KERNEL_LIB_DST = $(INSTALLDIR)/kernel/lib$(XLEN)
RUNTIME_INC_DST = $(INSTALLDIR)/runtime/include
RUNTIME_LIB_DST = $(INSTALLDIR)/runtime/lib
KERNEL_HEADERS = $(wildcard $(VORTEX_HOME)/kernel/include/*.h)
KERNEL_LIBS = $(wildcard kernel/*.a)
RUNTIME_HEADERS = $(wildcard $(VORTEX_HOME)/runtime/include/*.h)
RUNTIME_LIBS = $(wildcard runtime/*.so)
INSTALL_DIRS = $(KERNEL_LIB_DST) $(RUNTIME_LIB_DST) $(KERNEL_INC_DST) $(RUNTIME_INC_DST)
$(INSTALL_DIRS):
mkdir -p $@
$(KERNEL_INC_DST)/VX_types.h: hw/VX_types.h | $(KERNEL_INC_DST)
cp $< $@
$(KERNEL_INC_DST)/%.h: $(VORTEX_HOME)/kernel/include/%.h | $(KERNEL_INC_DST)
cp $< $@
$(RUNTIME_INC_DST)/%.h: $(VORTEX_HOME)/runtime/include/%.h | $(RUNTIME_INC_DST)
cp $< $@
$(KERNEL_LIB_DST)/%.a: kernel/%.a | $(KERNEL_LIB_DST)
cp $< $@
$(RUNTIME_LIB_DST)/%.so: runtime/%.so | $(RUNTIME_LIB_DST)
cp $< $@
install: $(INSTALL_DIRS) \
$(KERNEL_INC_DST)/VX_types.h \
$(KERNEL_HEADERS:$(VORTEX_HOME)/kernel/include/%=$(KERNEL_INC_DST)/%) \
$(RUNTIME_HEADERS:$(VORTEX_HOME)/runtime/include/%=$(RUNTIME_INC_DST)/%) \
$(KERNEL_LIBS:kernel/%=$(KERNEL_LIB_DST)/%) \
$(RUNTIME_LIBS:runtime/%=$(RUNTIME_LIB_DST)/%)

177
README.md
View file

@ -1,75 +1,134 @@
[![Build Status](https://travis-ci.com/vortexgpgpu/vortex.svg?branch=master)](https://travis-ci.com/vortexgpgpu/vortex)
[![codecov](https://codecov.io/gh/vortexgpgpu/vortex/branch/master/graph/badge.svg)](https://codecov.io/gh/vortexgpgpu/vortex)
# Vortex GPGPU
# Vortex RISC-V GPGPU
Vortex is a full-stack open-source RISC-V GPGPU. Vortex supports multiple **backend drivers**, including our C++ simulator (simx), an RTL simulator, and physical Xilinx and Altera FPGAs-- all controlled by a single driver script. The chosen driver determines the corresponding code invoked to run Vortex. Generally, developers will prototype their intended design in simx, before completing going forward with an RTL implementation. Alternatively, you can get up and running by selecting a driver of your choice and running a demo program.
Vortex is a full-system RISCV-based GPGPU processor.
## Website
Vortex news can be found on its [website](https://vortex.cc.gatech.edu/)
## Citation
```
@inproceedings{10.1145/3466752.3480128,
author = {Tine, Blaise and Yalamarthy, Krishna Praveen and Elsabbagh, Fares and Hyesoon, Kim},
title = {Vortex: Extending the RISC-V ISA for GPGPU and 3D-Graphics},
year = {2021},
isbn = {9781450385572},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3466752.3480128},
doi = {10.1145/3466752.3480128},
abstract = {The importance of open-source hardware and software has been increasing. However, despite GPUs being one of the more popular accelerators across various applications, there is very little open-source GPU infrastructure in the public domain. We argue that one of the reasons for the lack of open-source infrastructure for GPUs is rooted in the complexity of their ISA and software stacks. In this work, we first propose an ISA extension to RISC-V that supports GPGPUs and graphics. The main goal of the ISA extension proposal is to minimize the ISA changes so that the corresponding changes to the open-source ecosystem are also minimal, which makes for a sustainable development ecosystem. To demonstrate the feasibility of the minimally extended RISC-V ISA, we implemented the complete software and hardware stacks of Vortex on FPGA. Vortex is a PCIe-based soft GPU that supports OpenCL and OpenGL. Vortex can be used in a variety of applications, including machine learning, graph analytics, and graphics rendering. Vortex can scale up to 32 cores on an Altera Stratix 10 FPGA, delivering a peak performance of 25.6 GFlops at 200 Mhz.},
booktitle = {MICRO-54: 54th Annual IEEE/ACM International Symposium on Microarchitecture},
pages = {754766},
numpages = {13},
keywords = {reconfigurable computing, memory systems., computer graphics},
location = {Virtual Event, Greece},
series = {MICRO '21}
}
```
## Specifications
- Support RISC-V RV32IMF ISA
- Scalability: 1 to 32 cores with optional L2 and L3 caches
- Software: OpenCL 1.2 Support
- Supported FPGAs:
- Intel Arria 10
- Intel Stratix 10
- Support RISC-V RV32IMAF and RV64IMAFD
- Microarchitecture:
- configurable number of cores, warps, and threads.
- configurable number of ALU, FPU, LSU, and SFU units per core.
- configurable pipeline issue width.
- optional local memory, L1, L2, and L3 caches.
- Software:
- OpenCL 1.2 Support.
- Supported FPGAs:
- Altera Arria 10
- Altera Stratix 10
- Xilinx Alveo U50, U250, U280
- Xilinx Versal VCK5000
## Directory structure
- `doc`: [Documentation](doc/Vortex.md).
- `doc`: [Documentation](docs/index.md).
- `hw`: Hardware sources.
- `driver`: Host driver software.
- `driver`: Host drivers repository.
- `runtime`: Kernel Runtime software.
- `simX`: Cycle-approximate simulator.
- `sim`: Simulators repository.
- `tests`: Tests repository.
- `ci`: Continuous integration scripts.
- `miscs`: Miscellaneous resources.
## Basic Installation
## Quick Start
If you are interested in a stable release of Vortex, you can download the latest release [here](https://github.com/vortexgpgpu/vortex/releases/latest). Otherwise, you can pull the most recent, but (potentially) unstable version as shown below. The following steps demonstrate how to build and run Vortex with the default driver: SimX. If you are interested in a different backend, look [here](docs/simulation.md).
### Install development tools
### Supported OS Platforms
- Ubuntu 18.04, 20.04, 22.04, 24.04
- Centos 7
### Toolchain Dependencies
The following dependencies will be fetched prebuilt by `toolchain_install.sh`.
- [POCL](http://portablecl.org/)
- [LLVM](https://llvm.org/)
- [RISCV-GNU-TOOLCHAIN](https://github.com/riscv-collab/riscv-gnu-toolchain)
- [Verilator](https://www.veripool.org/verilator)
- [cvfpu](https://github.com/openhwgroup/cvfpu.git)
- [SoftFloat](https://github.com/ucb-bar/berkeley-softfloat-3.git)
- [Ramulator](https://github.com/CMU-SAFARI/ramulator.git)
- [Yosys](https://github.com/YosysHQ/yosys)
- [Sv2v](https://github.com/zachjs/sv2v)
### Install Vortex codebase
```sh
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
cd vortex
```
### Install system dependencies
```sh
# ensure dependent libraries are present
sudo ./ci/install_dependencies.sh
```
### Configure your build folder
```sh
mkdir build
cd build
# for 32bit
../configure --xlen=32 --tooldir=$HOME/tools
# for 64bit
../configure --xlen=64 --tooldir=$HOME/tools
```
### Install prebuilt toolchain
```sh
./ci/toolchain_install.sh --all
```
### set environment variables
```sh
# should always run before using the toolchain!
source ./ci/toolchain_env.sh
```
### Building Vortex
```sh
make -s
```
### Quick demo running vecadd OpenCL kernel on 2 cores
```sh
./ci/blackbox.sh --cores=2 --app=vecadd
```
$ sudo apt-get install build-essential
$ sudo apt-get install git
### Install gnu-riscv-tools
$ export RISCV_TOOLCHAIN_PATH=/opt/riscv-gnu-toolchain
$ sudo apt-get -y install \
binutils build-essential libtool texinfo \
gzip zip unzip patchutils curl git \
make cmake ninja-build automake bison flex gperf \
grep sed gawk python bc \
zlib1g-dev libexpat1-dev libmpc-dev \
libglib2.0-dev libfdt-dev libpixman-1-dev
$ git clone https://github.com/riscv/riscv-gnu-toolchain
$ cd riscv-gnu-toolchain
$ git submodule update --init --recursive
$ mkdir build
$ cd build
$ ../configure --prefix=$RISCV_TOOLCHAIN_PATH --with-arch=rv32im --with-abi=ilp32
$ make -j`nproc`
$ make -j`nproc` build-qemu
### Install Verilator
You need into build the latest version using the instructions on their website
$ https://www.veripool.org/projects/verilator/wiki/Installing
### Install Vortex
$ git clone --recursive https://github.com/vortexgpgpu/vortex.git
$ cd Vortex
$ make
### Quick Test running OpenCL vecadd sample on 2 cores
$ ./ci/blackbox.sh --cores=2 --app=vecadd
### Common Developer Tips
- Installing Vortex kernel and runtime libraries to use with external tools requires passing --prefix=<install-path> to the configure script.
```sh
../configure --xlen=32 --tooldir=$HOME/tools --prefix=<install-path>
make -s
make install
```
- Building Vortex 64-bit requires setting --xlen=64 configure option.
```sh
../configure --xlen=64 --tooldir=$HOME/tools
```
- Sourcing "./ci/toolchain_env.sh" is required everytime you start a new terminal. we recommend adding "source <build-path>/ci/toolchain_env.sh" to your ~/.bashrc file to automate the process at login.
```sh
echo "source <build-path>/ci/toolchain_env.sh" >> ~/.bashrc
```
- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again without any options to get changes propagated to your build folder.
```sh
../configure
```
- To debug the GPU, the simulation can generate a runtime trace for analysis. See /docs/debugging.md for more information.
```sh
./ci/blackbox.sh --app=demo --debug=3
```
- For additional information, check out the [documentation](docs/index.md)

View file

@ -1,4 +0,0 @@
Release Notes!
* 07/01/2020 - LKG FPGA build - Passed basic, demo, vecadd kernels.

23
TODO
View file

@ -1,23 +0,0 @@
Functionality:
1) vx_cl_warpSpawn()
-> To be used by pocl->ops->run
2) newlib Integration (LoadFile(""))
-> To be used by the Rhinio benchmarks
3) POCL OPS Vortex Suite
Performance:
1) Icache doesn't need SEND_MEM_REQUEST Stage
-> Blocks are never dirty, so why not evict right away
2) Branch not taken speculation
3) Runtime -02 not running on RTL, and -03 not running on RTL and Emulator
Vector:
1) Cycle accurate simulator (would require Cache Simulator)

View file

@ -1,179 +1,205 @@
#!/bin/sh
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
SCRIPT_DIR=$(dirname "$0")
ROOT_DIR=$SCRIPT_DIR/..
show_usage()
{
echo "Vortex BlackBox Test Driver v1.0"
echo "Usage: [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [--l3cache] [[--driver=rtlsim|vlsim|simx] [--debug] [--scope] [--perf] [--app=vecadd|sgemm|basic|demo|dogfood] [--args=<args>] [--help]]"
echo "Usage: $0 [[--clusters=#n] [--cores=#n] [--warps=#n] [--threads=#n] [--l2cache] [--l3cache] [[--driver=#name] [--app=#app] [--args=#args] [--debug=#level] [--scope] [--perf=#class] [--rebuild=#n] [--log=logfile] [--help]]"
}
SCRIPT_DIR=$(dirname "$0")
VORTEX_HOME=$SCRIPT_DIR/..
show_help()
{
show_usage
echo " where"
echo "--driver: gpu, simx, rtlsim, oape, xrt"
echo "--app: any subfolder test under regression or opencl"
echo "--class: 0=disable, 1=pipeline, 2=memsys"
echo "--rebuild: 0=disable, 1=force, 2=auto, 3=temp"
}
DRIVER=vlsim
APP=sgemm
CLUSTERS=1
CORES=2
WARPS=4
THREADS=4
L2=0
L3=0
DEBUG=0
SCOPE=0
HAS_ARGS=0
for i in "$@"
do
case $i in
--driver=*)
DRIVER=${i#*=}
shift
;;
--app=*)
APP=${i#*=}
shift
;;
--clusters=*)
CLUSTERS=${i#*=}
shift
;;
--cores=*)
CORES=${i#*=}
shift
;;
--warps=*)
WARPS=${i#*=}
shift
;;
--threads=*)
THREADS=${i#*=}
shift
;;
--l2cache)
L2=1
shift
;;
--l3cache)
L3=1
shift
;;
--debug)
DEBUG=1
shift
;;
--scope)
SCOPE=1
CORES=1
shift
;;
--perf)
PERF_FLAG=-DPERF_ENABLE
shift
;;
--args=*)
ARGS=${i#*=}
HAS_ARGS=1
shift
;;
--help)
show_usage
exit 0
;;
*)
show_usage
exit -1
;;
esac
done
case $DRIVER in
rtlsim)
DRIVER_PATH=$VORTEX_HOME/driver/rtlsim
DRIVER_EXTRA=
CLEAN_TOKEN=clean
;;
vlsim)
DRIVER_PATH=$VORTEX_HOME/driver/opae
DRIVER_EXTRA=vlsim
CLEAN_TOKEN=clean-vlsim
;;
asesim)
DRIVER_PATH=$VORTEX_HOME/driver/opae
DRIVER_EXTRA=asesim
CLEAN_TOKEN=clean-asesim
;;
fpga)
DRIVER_PATH=$VORTEX_HOME/driver/opae
DRIVER_EXTRA=fpga
CLEAN_TOKEN=clean-fpga
;;
simx)
DRIVER_PATH=$VORTEX_HOME/driver/simx
DRIVER_EXTRA=
CLEAN_TOKEN=clean
;;
*)
echo "invalid driver: $DRIVER"
exit -1
;;
esac
if [ -d "$VORTEX_HOME/tests/opencl/$APP" ];
then
APP_PATH=$VORTEX_HOME/tests/opencl/$APP
elif [ -d "$VORTEX_HOME/tests/regression/$APP" ];
then
APP_PATH=$VORTEX_HOME/tests/regression/$APP
else
echo "Application folder found: $APP"
exit -1
fi
CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS -DL2_ENABLE=$L2 -DL3_ENABLE=$L3 $PERF_FLAG $CONFIGS"
echo "CONFIGS=$CONFIGS"
make -C $DRIVER_PATH $CLEAN_TOKEN
status=0
if [ $DEBUG -eq 1 ]
then
if [ $SCOPE -eq 1 ]
then
DEBUG=1 SCOPE=1 CONFIGS="$CONFIGS" make -s -C $DRIVER_PATH $DRIVER_EXTRA
add_option() {
if [ -n "$1" ]; then
echo "$1 $2"
else
DEBUG=1 CONFIGS="$CONFIGS" make -s -C $DRIVER_PATH $DRIVER_EXTRA
fi
if [ $HAS_ARGS -eq 1 ]
then
OPTS=$ARGS make -C $APP_PATH run-$DRIVER > run.log 2>&1
status=$?
else
make -C $APP_PATH run-$DRIVER > run.log 2>&1
status=$?
echo "$2"
fi
if [ -f "$APP_PATH/trace.vcd" ]
then
}
DEFAULTS() {
DRIVER=simx
APP=sgemm
DEBUG=0
DEBUG_LEVEL=0
SCOPE=0
HAS_ARGS=0
PERF_CLASS=0
CONFIGS="$CONFIGS"
REBUILD=2
TEMPBUILD=0
LOGFILE=run.log
}
parse_args() {
DEFAULTS
for i in "$@"; do
case $i in
--driver=*) DRIVER=${i#*=} ;;
--app=*) APP=${i#*=} ;;
--clusters=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CLUSTERS=${i#*=}") ;;
--cores=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CORES=${i#*=}") ;;
--warps=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_WARPS=${i#*=}") ;;
--threads=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_THREADS=${i#*=}") ;;
--l2cache) CONFIGS=$(add_option "$CONFIGS" "-DL2_ENABLE") ;;
--l3cache) CONFIGS=$(add_option "$CONFIGS" "-DL3_ENABLE") ;;
--perf=*) CONFIGS=$(add_option "$CONFIGS" "-DPERF_ENABLE"); PERF_CLASS=${i#*=} ;;
--debug=*) DEBUG=1; DEBUG_LEVEL=${i#*=} ;;
--scope) SCOPE=1; ;;
--args=*) HAS_ARGS=1; ARGS=${i#*=} ;;
--rebuild=*) REBUILD=${i#*=} ;;
--log=*) LOGFILE=${i#*=} ;;
--help) show_help; exit 0 ;;
*) show_usage; exit 1 ;;
esac
done
if [ $REBUILD -eq 3 ];
then
REBUILD=1
TEMPBUILD=1
fi
}
set_driver_path() {
case $DRIVER in
gpu) DRIVER_PATH="" ;;
simx|rtlsim|opae|xrt) DRIVER_PATH="$ROOT_DIR/runtime/$DRIVER" ;;
*) echo "Invalid driver: $DRIVER"; exit 1 ;;
esac
}
set_app_path() {
if [ -d "$ROOT_DIR/tests/opencl/$APP" ]; then
APP_PATH="$ROOT_DIR/tests/opencl/$APP"
elif [ -d "$ROOT_DIR/tests/regression/$APP" ]; then
APP_PATH="$ROOT_DIR/tests/regression/$APP"
else
echo "Application folder not found: $APP"
exit 1
fi
}
build_driver() {
local cmd_opts=""
[ $DEBUG -ne 0 ] && cmd_opts=$(add_option "$cmd_opts" "DEBUG=$DEBUG_LEVEL")
[ $SCOPE -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "SCOPE=1")
[ $TEMPBUILD -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "DESTDIR=\"$TEMPDIR\"")
[ -n "$CONFIGS" ] && cmd_opts=$(add_option "$cmd_opts" "CONFIGS=\"$CONFIGS\"")
if [ -n "$cmd_opts" ]; then
echo "Running: $cmd_opts make -C $DRIVER_PATH > /dev/null"
eval "$cmd_opts make -C $DRIVER_PATH > /dev/null"
else
echo "Running: make -C $DRIVER_PATH > /dev/null"
make -C $DRIVER_PATH > /dev/null
fi
}
run_app() {
local cmd_opts=""
[ $DEBUG -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "DEBUG=1")
[ $TEMPBUILD -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "VORTEX_RT_PATH=\"$TEMPDIR\"")
[ $HAS_ARGS -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "OPTS=\"$ARGS\"")
if [ $DEBUG -ne 0 ]; then
if [ -n "$cmd_opts" ]; then
echo "Running: $cmd_opts make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
eval "$cmd_opts make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
else
echo "Running: make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1"
make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1
fi
else
if [ -n "$cmd_opts" ]; then
echo "Running: $cmd_opts make -C $APP_PATH run-$DRIVER"
eval "$cmd_opts make -C $APP_PATH run-$DRIVER"
else
echo "Running: make -C $APP_PATH run-$DRIVER"
make -C $APP_PATH run-$DRIVER
fi
fi
status=$?
return $status
}
main() {
parse_args "$@"
set_driver_path
set_app_path
# execute on default installed GPU
if [ "$DRIVER" = "gpu" ]; then
run_app
exit $?
fi
if [ -n "$CONFIGS" ]; then
echo "CONFIGS=$CONFIGS"
fi
if [ $REBUILD -ne 0 ]; then
BLACKBOX_CACHE=blackbox.$DRIVER.cache
LAST_CONFIGS=$(cat "$BLACKBOX_CACHE" 2>/dev/null || echo "")
if [ $REBUILD -eq 1 ] || [ "$CONFIGS+$DEBUG+$SCOPE" != "$LAST_CONFIGS" ]; then
make -C $DRIVER_PATH clean-driver > /dev/null
echo "$CONFIGS+$DEBUG+$SCOPE" > "$BLACKBOX_CACHE"
fi
fi
export VORTEX_PROFILING=$PERF_CLASS
make -C "$ROOT_DIR/hw" config > /dev/null
make -C "$ROOT_DIR/runtime/stub" > /dev/null
if [ $TEMPBUILD -eq 1 ]; then
# setup temp directory
TEMPDIR=$(mktemp -d)
mkdir -p "$TEMPDIR"
# build stub driver
echo "running: DESTDIR=$TEMPDIR make -C $ROOT_DIR/runtime/stub"
DESTDIR="$TEMPDIR" make -C $ROOT_DIR/runtime/stub > /dev/null
# register tempdir cleanup on exit
trap "rm -rf $TEMPDIR" EXIT
fi
build_driver
run_app
status=$?
if [ $DEBUG -eq 1 ] && [ -f "$APP_PATH/trace.vcd" ]; then
mv -f $APP_PATH/trace.vcd .
fi
else
if [ $SCOPE -eq 1 ]
then
SCOPE=1 CONFIGS="$CONFIGS" make -s -C $DRIVER_PATH $DRIVER_EXTRA
else
CONFIGS="$CONFIGS" make -s -C $DRIVER_PATH $DRIVER_EXTRA
fi
if [ $HAS_ARGS -eq 1 ]
then
OPTS=$ARGS make -C $APP_PATH run-$DRIVER
status=$?
else
make -C $APP_PATH run-$DRIVER
status=$?
fi
fi
exit $status
if [ $SCOPE -eq 1 ] && [ -f "$APP_PATH/scope.vcd" ]; then
mv -f $APP_PATH/scope.vcd .
fi
exit $status
}
main "$@"

41
ci/datagen.py Executable file
View file

@ -0,0 +1,41 @@
#!/usr/bin/env python3
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import struct
import random
import sys
def create_binary_file(n, filename):
# Open the file in binary write mode
with open(filename, 'wb') as f:
# Write the integer N as 4 bytes
f.write(struct.pack('i', n))
# Generate and write N floating-point numbers
for _ in range(n):
# Generate a random float between 0 and 1
num = random.random()
# Write the float in IEEE 754 format (4 bytes)
f.write(struct.pack('f', num))
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: script.py N filename")
sys.exit(1)
n = int(sys.argv[1])
filename = sys.argv[2]
create_binary_file(n, filename)
print(f"Created binary file '{filename}' containing {n} floats.")

46
ci/install_dependencies.sh Executable file
View file

@ -0,0 +1,46 @@
#!/bin/sh
# Copyright 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
# Function to check if GCC version is less than 11
check_gcc_version() {
local gcc_version
gcc_version=$(gcc -dumpversion)
if dpkg --compare-versions "$gcc_version" lt 11; then
return 0 # GCC version is less than 11
else
return 1 # GCC version is 11 or greater
fi
}
# Update package list
apt-get update -y
# install system dependencies
apt-get install -y build-essential valgrind libstdc++6 binutils python3 uuid-dev ccache cmake libffi7
# Check and install GCC 11 if necessary
if check_gcc_version; then
echo "GCC version is less than 11. Installing GCC 11..."
add-apt-repository -y ppa:ubuntu-toolchain-r/test
apt-get update
apt-get install -y g++-11 gcc-11
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100
else
echo "GCC version is 11 or greater. No need to install GCC 11."
fi

View file

@ -1,69 +0,0 @@
#!/bin/bash
# exit when any command fails
set -e
OS_DIR=ubuntu/bionic
SRCDIR=/opt
DESTDIR=.
riscv()
{
echo "prebuilt riscv-gnu-toolchain..."
tar -C $SRCDIR -cvjf riscv-gnu-toolchain.tar.bz2 riscv-gnu-toolchain
split -b 50M riscv-gnu-toolchain.tar.bz2 "riscv-gnu-toolchain.tar.bz2.part"
mv riscv-gnu-toolchain.tar.bz2.part* $DESTDIR/riscv-gnu-toolchain/$OS_DIR
rm riscv-gnu-toolchain.tar.bz2
}
llvm()
{
echo "prebuilt llvm-riscv..."
tar -C $SRCDIR -cvjf llvm-riscv.tar.bz2 llvm-riscv
split -b 50M llvm-riscv.tar.bz2 "llvm-riscv.tar.bz2.part"
mv llvm-riscv.tar.bz2.part* $DESTDIR/llvm-riscv/$OS_DIR
rm llvm-riscv.tar.bz2
}
pocl()
{
echo "prebuilt pocl..."
tar -C $SRCDIR -cvjf pocl.tar.bz2 pocl
mv pocl.tar.bz2 $DESTDIR/pocl/$OS_DIR
}
verilator()
{
echo "prebuilt verilator..."
tar -C $SRCDIR -cvjf verilator.tar.bz2 verilator
mv verilator.tar.bz2 $DESTDIR/verilator/$OS_DIR
}
usage()
{
echo "usage: prebuilt [[-riscv] [-llvm] [-pocl] [-verilator] [-all] [-h|--help]]"
}
while [ "$1" != "" ]; do
case $1 in
-pocl ) pocl
;;
-verilator ) verilator
;;
-riscv ) riscv
;;
-llvm ) llvm
;;
-all ) riscv
llvm
pocl
verilator
;;
-h | --help ) usage
exit
;;
* ) usage
exit 1
esac
shift
done

View file

@ -1,127 +0,0 @@
#!/bin/bash
# exit when any command fails
set -e
# build sources
make -s
coverage()
{
# coverage tests
make -C tests/runtime run-rtlsim
make -C tests/riscv/isa run-rtlsim
make -C tests/regression run-vlsim
make -C tests/opencl run-vlsim
make -C tests/runtime run-simx
make -C tests/riscv/isa run-simx
make -C tests/regression run-simx
make -C tests/opencl run-simx
}
cluster()
{
# warp/threads configurations
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=8 --app=demo
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=8 --threads=2 --app=demo
# cores clustering
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=1 --clusters=1 --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=demo --args="-n1"
# L2/L3
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
}
debug()
{
# debugging
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=basic --args="-t0 -n1"
}
config()
{
# disabling M extension
CONFIGS=-DEXT_M_DISABLE make -C hw/simulate
# disabling F extension
CONFIGS=-DEXT_F_DISABLE make -C hw/simulate
# disable shared memory
CONFIGS=-DSM_ENABLE=0 make -C hw/simulate
# using Default FPU core
FPU_CORE=FPU_DEFAULT ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# using FPNEW FPU core
FPU_CORE=FPU_FPNEW ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# test cache banking
CONFIGS="-DDNUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
# test cache multi-porting
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
CONFIGS="-DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
# test 128-bit MEM block
CONFIGS=-DMEM_BLOCK_SIZE=16 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
# test 128-bit MEM and DRAM block
CONFIGS="-DMEM_BLOCK_SIZE=16 -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=128 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=28 -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
# test 27-bit DRAM address
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
# test 128-bit DRAM block
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=128 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=28 -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
# test verilator reset values
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=sgemm
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=sgemm
# test long memory latency
CONFIGS="-DMEM_LATENCY=100 -DMEM_RQ_SIZE=4 -DMEM_STALLS_MODULO=4" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
}
stress()
{
# test pipeline stress
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --clusters=2 --l3cache --app=sgemm --args="-n256"
}
usage()
{
echo "usage: regression [-coverage] [-cluster] [-debug] [-config] [-stress] [-all] [-h|--help]"
}
while [ "$1" != "" ]; do
case $1 in
-coverage ) coverage
;;
-cluster ) cluster
;;
-debug ) debug
;;
-config ) config
;;
-stress ) stress
;;
-all ) coverage
cluster
debug
config
stress
;;
-h | --help ) usage
exit
;;
* ) usage
exit 1
esac
shift
done

505
ci/regression.sh.in Executable file
View file

@ -0,0 +1,505 @@
#!/bin/bash
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# exit when any command fails
set -e
# clear blackbox cache
rm -f blackbox.*.cache
# HW: add a test "VM Test" to make sure VM feature is enabled
XLEN=${XLEN:=@XLEN@}
XSIZE=$((XLEN / 8))
echo "Vortex Regression Test: XLEN=$XLEN"
unittest()
{
make -C tests/unittest run
make -C hw/unittest > /dev/null
}
isa()
{
echo "begin isa tests..."
make -C sim/simx
make -C sim/rtlsim
make -C tests/riscv/isa run-simx
make -C tests/riscv/isa run-rtlsim
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
if [ "$XLEN" == "64" ]
then
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64fx
fi
# clean build
make -C sim/rtlsim clean
echo "isa tests done!"
}
kernel()
{
echo "begin kernel tests..."
make -C sim/simx
make -C sim/rtlsim
make -C tests/kernel run-simx
make -C tests/kernel run-rtlsim
echo "kernel tests done!"
}
regression()
{
echo "begin regression tests..."
make -C runtime/simx
make -C runtime/rtlsim
make -C tests/regression run-simx
make -C tests/regression run-rtlsim
# test global barrier
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tgbar" --cores=2
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tgbar" --cores=2
# test local barrier
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tbar"
./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tbar"
# test temp driver mode for
./ci/blackbox.sh --driver=simx --app=vecadd --rebuild=3
# test for matmul
CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1"
echo "regression tests done!"
}
opencl()
{
echo "begin opencl tests..."
make -C runtime/simx
make -C runtime/rtlsim
make -C tests/opencl run-simx
make -C tests/opencl run-rtlsim
./ci/blackbox.sh --driver=simx --app=lbm --warps=8
./ci/blackbox.sh --driver=rtlsim --app=lbm --warps=8
echo "opencl tests done!"
}
vm(){
echo "begin vm tests..."
make -C sim/simx clean && CONFIGS="-DVM_ENABLE" make -C sim/simx
make -C runtime/simx clean && CONFIGS="-DVM_ENABLE" make -C runtime/simx
make -C tests/opencl run-simx
make -C tests/regression run-simx
make -C sim/simx clean && CONFIGS="-DVM_ENABLE -DVM_ADDR_MODE=BARE" make -C sim/simx
make -C runtime/simx clean && CONFIGS="-DVM_ENABLE -DVM_ADDR_MODE=BARE" make -C runtime/simx
make -C tests/opencl run-simx
make -C tests/regression run-simx
echo "vm tests done!"
}
cache()
{
echo "begin cache tests..."
# disable local memory
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo --perf=1
CONFIGS="-DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=demo --perf=1
# disable L1 cache
CONFIGS="-DL1_DISABLE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DL1_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
# reduce l1 line size
CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DDISABLE_L1" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=io_addr
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test cache ways
CONFIGS="-DICACHE_NUM_WAYS=1 -DDCACHE_NUM_WAYS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test cache banking
CONFIGS="-DMEM_BLOCK_SIZE=8 -DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DMEM_BLOCK_SIZE=8 -DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=simx --app=sgemmx
CONFIGS="-DMEM_BLOCK_SIZE=8 -DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8 -DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=4" ./ci/blackbox.sh --driver=simx --app=sgemmx --threads=8
# replacement policy
CONFIGS="-DDCACHE_REPL_POLICY=0" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_REPL_POLICY=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_REPL_POLICY=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
# test writeback
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=0 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=simx --app=mstress
CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
# cache clustering
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=4 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=4 --warps=1 --threads=2
# L2/L3
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=diverge --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=4 --l2cache --app=diverge --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=diverge --args="-n1"
echo "begin cache tests..."
}
config1()
{
echo "begin configuration-1 tests..."
# warp/threads
./ci/blackbox.sh --driver=rtlsim --warps=1 --threads=1 --app=diverge
./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=2 --app=diverge
./ci/blackbox.sh --driver=rtlsim --warps=2 --threads=8 --app=diverge
./ci/blackbox.sh --driver=rtlsim --warps=8 --threads=2 --app=diverge
./ci/blackbox.sh --driver=simx --warps=1 --threads=1 --app=diverge
./ci/blackbox.sh --driver=simx --warps=8 --threads=16 --app=diverge
# cores clustering
./ci/blackbox.sh --driver=rtlsim --cores=4 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=4 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --app=diverge --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --app=diverge --args="-n1"
# issue width
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
CONFIGS="-DISSUE_WIDTH=4" ./ci/blackbox.sh --driver=rtlsim --app=diverge
CONFIGS="-DISSUE_WIDTH=2" ./ci/blackbox.sh --driver=simx --app=diverge
CONFIGS="-DISSUE_WIDTH=4" ./ci/blackbox.sh --driver=simx --app=diverge
# ALU scaling
CONFIGS="-DISSUE_WIDTH=2 -DNUM_ALU_BLOCK=1 -DNUM_ALU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=diverge
CONFIGS="-DISSUE_WIDTH=4 -DNUM_ALU_BLOCK=4 -DNUM_ALU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=diverge
CONFIGS="-DISSUE_WIDTH=2 -DNUM_ALU_BLOCK=1 -DNUM_ALU_LANES=2" ./ci/blackbox.sh --driver=simx --app=diverge
CONFIGS="-DISSUE_WIDTH=4 -DNUM_ALU_BLOCK=4 -DNUM_ALU_LANES=4" ./ci/blackbox.sh --driver=simx --app=diverge
# FPU scaling
CONFIGS="-DISSUE_WIDTH=2 -DNUM_FPU_BLOCK=1 -DNUM_FPU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
CONFIGS="-DISSUE_WIDTH=4 -DNUM_FPU_BLOCK=4 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
CONFIGS="-DISSUE_WIDTH=2 -DNUM_FPU_BLOCK=1 -DNUM_FPU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
CONFIGS="-DISSUE_WIDTH=4 -DNUM_FPU_BLOCK=4 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
# FPU's PE scaling
CONFIGS="-DFMA_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfmadd"
CONFIGS="-DFCVT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tftoi"
CONFIGS="-DFDIV_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfdiv"
CONFIGS="-DFSQRT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfsqrt"
CONFIGS="-DFNCP_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfclamp"
# LSU scaling
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
echo "configuration-1 tests done!"
}
config2()
{
echo "begin configuration-2 tests..."
# test opaesim
./ci/blackbox.sh --driver=opae --app=printf
./ci/blackbox.sh --driver=opae --app=diverge
./ci/blackbox.sh --driver=xrt --app=diverge
# disable DPI
if [ "$XLEN" == "64" ]; then
# need to disable trig on 64-bit due to a bug inside fpnew's sqrt core.
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-xtrig -xbar -xgbar"
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-xtrig -xbar -xgbar"
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-xtrig -xbar -xgbar"
else
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood
CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood
fi
# custom program startup address
make -C tests/regression/dogfood clean-kernel
STARTUP_ADDR=0x80000000 make -C tests/regression/dogfood
./ci/blackbox.sh --driver=simx --app=dogfood
./ci/blackbox.sh --driver=rtlsim --app=dogfood
make -C tests/regression/dogfood clean-kernel
# disabling M & F extensions
make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32i
make -C sim/rtlsim clean
# disabling ZICOND extension
CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo
# test 128-bit memory block
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=xrt --app=mstress
# test XLEN-bit memory block
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=mstress
# test memory coalescing
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
# test single-bank memory
if [ "$XLEN" == "64" ]; then
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=xrt --app=mstress
else
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DPLATFORM_MEMORY_NUM_BANKS=1" ./ci/blackbox.sh --driver=xrt --app=mstress
fi
# test larger memory address
if [ "$XLEN" == "64" ]; then
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=49" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=49" ./ci/blackbox.sh --driver=xrt --app=mstress
else
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=xrt --app=mstress
fi
# test memory banks interleaving
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=1" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress
# test memory ports
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=opae --app=mstress --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=xrt --app=mstress --threads=8
echo "configuration-2 tests done!"
}
test_csv_trace()
{
# test CSV trace generation
make -C sim/simx clean && DEBUG=3 make -C sim/simx > /dev/null
make -C sim/rtlsim clean && DEBUG=3 CONFIGS="-DGPR_RESET" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-simx-32im > run_simx.log
make -C tests/riscv/isa run-rtlsim-32im > run_rtlsim.log
./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
diff trace_rtlsim.csv trace_simx.csv
# clean build
make -C sim/simx clean
make -C sim/rtlsim clean
}
debug()
{
echo "begin debugging tests..."
test_csv_trace
CONFIGS="-O0" ./ci/blackbox.sh --driver=opae --app=demo --args="-n1"
CONFIGS="-O0" ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=xrt --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1"
echo "debugging tests done!"
}
scope()
{
echo "begin scope tests..."
SCOPE_DEPTH=128 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope
SCOPE_DEPTH=128 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope
echo "debugging scope done!"
}
stress()
{
echo "begin stress tests..."
# test verilator reset values
CONFIGS="-DVERILATOR_RESET_VALUE=1 -DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=xrt --app=sgemmx --args="-n128" --l2cache
echo "stress tests done!"
}
synthesis()
{
echo "begin synthesis tests..."
PREFIX=build_base make -C hw/syn/yosys clean
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE -DNUM_WARPS=2 -DNUM_THREADS=2" make -C hw/syn/yosys synthesis
echo "synthesis tests done!"
}
vector()
{
echo "begin vector tests..."
make -C sim/simx clean && CONFIGS="-DEXT_V_ENABLE" make -C sim/simx
TOOLDIR=@TOOLDIR@ XLEN=@XLEN@ VLEN=256 REG_TESTS=1 ./tests/riscv/riscv-vector-tests/run-test.sh
echo "vector tests done!"
}
show_usage()
{
echo "Vortex Regression Test"
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--vector] [--all] [--h|--help]"
}
declare -a tests=()
clean=0
while [ "$1" != "" ]; do
case $1 in
--clean )
clean=1
;;
--unittest )
tests+=("unittest")
;;
--isa )
tests+=("isa")
;;
--kernel )
tests+=("kernel")
;;
--regression )
tests+=("regression")
;;
--opencl )
tests+=("opencl")
;;
--cache )
tests+=("cache")
;;
--vm )
tests+=("vm")
;;
--config1 )
tests+=("config1")
;;
--config2 )
tests+=("config2")
;;
--debug )
tests+=("debug")
;;
--scope )
tests+=("scope")
;;
--stress )
tests+=("stress")
;;
--synthesis )
tests+=("synthesis")
;;
--vector )
tests+=("vector")
;;
--all )
tests=()
tests+=("unittest")
tests+=("isa")
tests+=("kernel")
tests+=("regression")
tests+=("opencl")
tests+=("cache")
tests+=("vm")
tests+=("config1")
tests+=("config2")
tests+=("debug")
tests+=("scope")
tests+=("stress")
tests+=("synthesis")
tests+=("vector")
;;
-h | --help )
show_usage
exit
;;
* )
show_usage
exit 1
esac
shift
done
if [ $clean -eq 1 ];
then
make clean
make -s
fi
start=$SECONDS
for test in "${tests[@]}"; do
$test
done
echo "Regression completed!"
duration=$(( SECONDS - start ))
awk -v t=$duration 'BEGIN{t=int(t*1000); printf "Elapsed Time: %d:%02d:%02d\n", t/3600000, t/60000%60, t/1000%60}'

View file

@ -1,27 +0,0 @@
#!/bin/bash
# exit when any command fails
set -e
# clear POCL cache
rm -rf ~/.cache/pocl
# rebuild runtime
make -C runtime clean
make -C runtime
# rebuild drivers
make -C driver clean
make -C driver
# rebuild runtime tests
make -C tests/runtime clean
make -C tests/runtime
# rebuild regression tests
make -C tests/regression clean-all
make -C tests/regression
# rebuild opencl tests
make -C tests/opencl clean-all
make -C tests/opencl

24
ci/toolchain_env.sh.in Executable file
View file

@ -0,0 +1,24 @@
#!/bin/sh
# Copyright 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
TOOLDIR=${TOOLDIR:=@TOOLDIR@}
export PATH=$TOOLDIR/verilator/bin:$PATH
export SV2V_PATH=$TOOLDIR/sv2v
export PATH=$SV2V_PATH/bin:$PATH
export YOSYS_PATH=$TOOLDIR/yosys
export PATH=$YOSYS_PATH/bin:$PATH

View file

@ -1,81 +0,0 @@
#!/bin/bash
# exit when any command fails
set -e
REPOSITORY=https://github.com/vortexgpgpu/vortex-toolchain-prebuilt/raw/master
DESTDIR="${DESTDIR:=/opt}"
riscv()
{
for x in {a..o}
do
wget $REPOSITORY/riscv-gnu-toolchain/ubuntu/bionic/riscv-gnu-toolchain.tar.bz2.parta$x
done
cat riscv-gnu-toolchain.tar.bz2.parta* > riscv-gnu-toolchain.tar.bz2
tar -xvf riscv-gnu-toolchain.tar.bz2
rm -f riscv-gnu-toolchain.tar.bz2*
cp -r riscv-gnu-toolchain $DESTDIR
rm -rf riscv-gnu-toolchain
}
llvm()
{
for x in {a..b}
do
wget $REPOSITORY/llvm-riscv/ubuntu/bionic/llvm-riscv.tar.bz2.parta$x
done
cat llvm-riscv.tar.bz2.parta* > llvm-riscv.tar.bz2
tar -xvf llvm-riscv.tar.bz2
rm -f llvm-riscv.tar.bz2*
cp -r llvm-riscv $DESTDIR
rm -rf llvm-riscv
}
pocl()
{
wget $REPOSITORY/pocl/ubuntu/bionic/pocl.tar.bz2
tar -xvf pocl.tar.bz2
rm -f pocl.tar.bz2
cp -r pocl $DESTDIR
rm -rf pocl
}
verilator()
{
wget $REPOSITORY/verilator/ubuntu/bionic/verilator.tar.bz2
tar -xvf verilator.tar.bz2
rm -f verilator.tar.bz2
cp -r verilator $DESTDIR
rm -rf verilator
}
usage()
{
echo "usage: toolchain_install [[-riscv] [-llvm] [-pocl] [-verilator] [-all] [-h|--help]]"
}
while [ "$1" != "" ]; do
case $1 in
-pocl ) pocl
;;
-verilator ) verilator
;;
-riscv ) riscv
;;
-llvm ) llvm
;;
-all ) riscv
llvm
pocl
verilator
;;
-h | --help ) usage
exit
;;
* ) usage
exit 1
esac
shift
done

199
ci/toolchain_install.sh.in Executable file
View file

@ -0,0 +1,199 @@
#!/bin/bash
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# exit when any command fails
set -e
REPOSITORY=https://github.com/vortexgpgpu/vortex-toolchain-prebuilt/raw/master
TOOLDIR=${TOOLDIR:=@TOOLDIR@}
OSVERSION=${OSVERSION:=@OSVERSION@}
riscv32()
{
case $OSVERSION in
"centos/7") parts=$(eval echo {a..l}) ;;
"ubuntu/bionic") parts=$(eval echo {a..j}) ;;
*) parts=$(eval echo {a..k}) ;;
esac
rm -f riscv32-gnu-toolchain.tar.bz2.parta*
for x in $parts
do
wget $REPOSITORY/riscv32-gnu-toolchain/$OSVERSION/riscv32-gnu-toolchain.tar.bz2.parta$x
done
cat riscv32-gnu-toolchain.tar.bz2.parta* > riscv32-gnu-toolchain.tar.bz2
tar -xvf riscv32-gnu-toolchain.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/riscv32-gnu-toolchain && mv riscv32-gnu-toolchain $TOOLDIR
rm -rf riscv32-gnu-toolchain.tar.bz2*
}
riscv64()
{
case $OSVERSION in
"centos/7") parts=$(eval echo {a..l}) ;;
*) parts=$(eval echo {a..j}) ;;
esac
rm -f riscv64-gnu-toolchain.tar.bz2.parta*
for x in $parts
do
wget $REPOSITORY/riscv64-gnu-toolchain/$OSVERSION/riscv64-gnu-toolchain.tar.bz2.parta$x
done
cat riscv64-gnu-toolchain.tar.bz2.parta* > riscv64-gnu-toolchain.tar.bz2
tar -xvf riscv64-gnu-toolchain.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/riscv64-gnu-toolchain && mv riscv64-gnu-toolchain $TOOLDIR
rm -rf riscv64-gnu-toolchain riscv64-gnu-toolchain.tar.bz2*
}
llvm()
{
case $OSVERSION in
"centos/7") parts=$(eval echo {a..b}) ;;
*) parts=$(eval echo {a..b}) ;;
esac
echo $parts
rm -f llvm-vortex2.tar.bz2.parta*
for x in $parts
do
wget $REPOSITORY/llvm-vortex/$OSVERSION/llvm-vortex2.tar.bz2.parta$x
done
cat llvm-vortex2.tar.bz2.parta* > llvm-vortex2.tar.bz2
tar -xvf llvm-vortex2.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/llvm-vortex && mv llvm-vortex $TOOLDIR
rm -rf llvm-vortex llvm-vortex2.tar.bz2*
}
libcrt32()
{
wget $REPOSITORY/libcrt32/libcrt32.tar.bz2
tar -xvf libcrt32.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/libcrt32 && mv libcrt32 $TOOLDIR
rm -rf libcrt32 libcrt32.tar.bz2
}
libcrt64()
{
wget $REPOSITORY/libcrt64/libcrt64.tar.bz2
tar -xvf libcrt64.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/libcrt64 && mv libcrt64 $TOOLDIR
rm -rf libcrt64 libcrt64.tar.bz2
}
libc32()
{
wget $REPOSITORY/libc32/libc32.tar.bz2
tar -xvf libc32.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/libc32 && mv libc32 $TOOLDIR
rm -rf libc32 libc32.tar.bz2
}
libc64()
{
wget $REPOSITORY/libc64/libc64.tar.bz2
tar -xvf libc64.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/libc64 && mv libc64 $TOOLDIR
rm -rf libc64 libc64.tar.bz2
}
pocl()
{
wget $REPOSITORY/pocl/$OSVERSION/pocl2.tar.bz2
tar -xvf pocl2.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/pocl && mv pocl $TOOLDIR
rm -rf pocl2 pocl2.tar.bz2
}
verilator()
{
wget $REPOSITORY/verilator/$OSVERSION/verilator.tar.bz2
tar -xvf verilator.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/verilator && mv verilator $TOOLDIR
rm -rf verilator verilator.tar.bz2
}
sv2v()
{
wget $REPOSITORY/sv2v/$OSVERSION/sv2v.tar.bz2
tar -xvf sv2v.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/sv2v && mv sv2v $TOOLDIR
rm -rf sv2v sv2v.tar.bz2
}
yosys()
{
case $OSVERSION in
"centos/7") parts=$(eval echo {a..c}) ;;
*) parts=$(eval echo {a..c}) ;;
esac
echo $parts
rm -f yosys.tar.bz2.parta*
for x in $parts
do
wget $REPOSITORY/yosys/$OSVERSION/yosys.tar.bz2.parta$x
done
cat yosys.tar.bz2.parta* > yosys.tar.bz2
tar -xvf yosys.tar.bz2
mkdir -p $TOOLDIR && rm -rf $TOOLDIR/yosys && mv yosys $TOOLDIR
rm -rf yosys yosys.tar.bz2* yosys
}
show_usage()
{
echo "Install Pre-built Vortex Toolchain"
echo "Usage: $0 [--pocl] [--verilator] [--riscv32] [--riscv64] [--llvm] [--libcrt32] [--libcrt64] [--libc32] [--libc64] [--sv2v] [--yosys] [--all] [-h|--help]"
}
while [ "$1" != "" ]; do
case $1 in
--pocl ) pocl
;;
--verilator ) verilator
;;
--riscv32 ) riscv32
;;
--riscv64 ) riscv64
;;
--llvm ) llvm
;;
--libcrt32 ) libcrt32
;;
--libcrt64 ) libcrt64
;;
--libc32 ) libc32
;;
--libc64 ) libc64
;;
--sv2v ) sv2v
;;
--yosys ) yosys
;;
--all ) pocl
verilator
llvm
libcrt32
libcrt64
libc32
libc64
riscv32
riscv64
sv2v
yosys
;;
-h | --help ) show_usage
exit
;;
* ) show_usage
exit 1
esac
shift
done

167
ci/toolchain_prebuilt.sh.in Executable file
View file

@ -0,0 +1,167 @@
#!/bin/bash
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# exit when any command fails
set -e
TOOLDIR=${TOOLDIR:=@TOOLDIR@}
OSVERSION=${OSVERSION:=@OSVERSION@}
riscv32()
{
echo "prebuilt riscv32-gnu-toolchain..."
tar -C $TOOLDIR -cvjf riscv32-gnu-toolchain.tar.bz2 riscv32-gnu-toolchain
split -b 50M riscv32-gnu-toolchain.tar.bz2 "riscv32-gnu-toolchain.tar.bz2.part"
mkdir -p ./riscv32-gnu-toolchain/$OSVERSION
mv riscv32-gnu-toolchain.tar.bz2.part* ./riscv32-gnu-toolchain/$OSVERSION
rm riscv32-gnu-toolchain.tar.bz2
}
riscv64()
{
echo "prebuilt riscv64-gnu-toolchain..."
tar -C $TOOLDIR -cvjf riscv64-gnu-toolchain.tar.bz2 riscv64-gnu-toolchain
split -b 50M riscv64-gnu-toolchain.tar.bz2 "riscv64-gnu-toolchain.tar.bz2.part"
mkdir -p ./riscv64-gnu-toolchain/$OSVERSION
mv riscv64-gnu-toolchain.tar.bz2.part* ./riscv64-gnu-toolchain/$OSVERSION
rm riscv64-gnu-toolchain.tar.bz2
}
llvm()
{
echo "prebuilt llvm-vortex2..."
tar -C $TOOLDIR -cvjf llvm-vortex2.tar.bz2 llvm-vortex
split -b 50M llvm-vortex2.tar.bz2 "llvm-vortex2.tar.bz2.part"
mkdir -p ./llvm-vortex/$OSVERSION
mv llvm-vortex2.tar.bz2.part* ./llvm-vortex/$OSVERSION
rm llvm-vortex2.tar.bz2
}
libcrt32()
{
echo "prebuilt libcrt32..."
tar -C $TOOLDIR -cvjf libcrt32.tar.bz2 libcrt32
mkdir -p ./libcrt32
mv libcrt32.tar.bz2 ./libcrt32
}
libcrt64()
{
echo "prebuilt libcrt64..."
tar -C $TOOLDIR -cvjf libcrt64.tar.bz2 libcrt64
mkdir -p ./libcrt64
mv libcrt64.tar.bz2 ./libcrt64
}
libc32()
{
echo "prebuilt libc32..."
tar -C $TOOLDIR -cvjf libc32.tar.bz2 libc32
mkdir -p ./libc32
mv libc32.tar.bz2 ./libc32
}
libc64()
{
echo "prebuilt libc64..."
tar -C $TOOLDIR -cvjf libc64.tar.bz2 libc64
mkdir -p ./libc64
mv libc64.tar.bz2 ./libc64
}
pocl()
{
echo "prebuilt pocl..."
tar -C $TOOLDIR -cvjf pocl2.tar.bz2 pocl
mkdir -p ./pocl/$OSVERSION
mv pocl2.tar.bz2 ./pocl/$OSVERSION
}
verilator()
{
echo "prebuilt verilator..."
tar -C $TOOLDIR -cvjf verilator.tar.bz2 verilator
mkdir -p ./verilator/$OSVERSION
mv verilator.tar.bz2 ./verilator/$OSVERSION
}
sv2v()
{
echo "prebuilt sv2v..."
tar -C $TOOLDIR -cvjf sv2v.tar.bz2 sv2v
mkdir -p ./sv2v/$OSVERSION
mv sv2v.tar.bz2 ./sv2v/$OSVERSION
}
yosys()
{
echo "prebuilt yosys..."
tar -C $TOOLDIR -cvjf yosys.tar.bz2 yosys
split -b 50M yosys.tar.bz2 "yosys.tar.bz2.part"
mkdir -p ./yosys/$OSVERSION
mv yosys.tar.bz2.part* ./yosys/$OSVERSION
rm yosys.tar.bz2
}
show_usage()
{
echo "Setup Pre-built Vortex Toolchain"
echo "Usage: $0 [--pocl] [--verilator] [--riscv32] [--riscv64] [--llvm] [--libcrt32] [--libcrt64] [--libc32] [--libc64] [--sv2v] [-yosys] [--all] [-h|--help]"
}
while [ "$1" != "" ]; do
case $1 in
--pocl ) pocl
;;
--verilator ) verilator
;;
--riscv32 ) riscv32
;;
--riscv64 ) riscv64
;;
--llvm ) llvm
;;
--libcrt32 ) libcrt32
;;
--libcrt64 ) libcrt64
;;
--libc32 ) libc32
;;
--libc64 ) libc64
;;
--sv2v ) sv2v
;;
--yosys ) yosys
;;
--all ) pocl
verilator
riscv32
riscv64
llvm
libcrt32
libcrt64
libc32
libc64
sv2v
yosys
;;
-h | --help ) show_usage
exit
;;
* ) show_usage
exit 1
esac
shift
done

291
ci/trace_csv.py Executable file
View file

@ -0,0 +1,291 @@
#!/usr/bin/env python3
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import argparse
import csv
import re
import inspect
configs = None
def parse_args():
parser = argparse.ArgumentParser(description='CPU trace log to CSV format converter.')
parser.add_argument('-t', '--type', default='simx', help='log type (rtlsim or simx)')
parser.add_argument('-o', '--csv', default='trace.csv', help='Output CSV file')
parser.add_argument('log', help='Input log file')
return parser.parse_args()
def load_config(filename):
config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=0x([0-9a-fA-F]+), num_barriers=(\d+)"
with open(filename, 'r') as file:
for line in file:
config_match = re.search(config_pattern, line)
if config_match:
config = {
'num_threads': int(config_match.group(1)),
'num_warps': int(config_match.group(2)),
'num_cores': int(config_match.group(3)),
'num_clusters': int(config_match.group(4)),
'socket_size': int(config_match.group(5)),
'local_mem_base': int(config_match.group(6), 16),
'num_barriers': int(config_match.group(7)),
}
return config
print("Error: missing CONFIGS: header")
sys.exit(1)
def parse_simx(log_lines):
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
instr_pattern = r"Instr (0x[0-9a-fA-F]+):"
opcode_pattern = r"Instr 0x[0-9a-fA-F]+: ([0-9a-zA-Z_\.]+)"
core_id_pattern = r"cid=(\d+)"
warp_id_pattern = r"wid=(\d+)"
tmask_pattern = r"tmask=(\d+)"
operands_pattern = r"Src\d+ Reg: (.+)"
destination_pattern = r"Dest Reg: (.+)"
uuid_pattern = r"#(\d+)"
entries = []
instr_data = None
for lineno, line in enumerate(log_lines, start=1):
try:
if line.startswith("DEBUG Fetch:"):
if instr_data:
entries.append(instr_data)
instr_data = {}
instr_data["lineno"] = lineno
instr_data["PC"] = re.search(pc_pattern, line).group(1)
instr_data["core_id"] = int(re.search(core_id_pattern, line).group(1))
instr_data["warp_id"] = int(re.search(warp_id_pattern, line).group(1))
instr_data["tmask"] = re.search(tmask_pattern, line).group(1)
instr_data["uuid"] = int(re.search(uuid_pattern, line).group(1))
elif line.startswith("DEBUG Instr"):
instr_data["instr"] = re.search(instr_pattern, line).group(1)
instr_data["opcode"] = re.search(opcode_pattern, line).group(1)
elif line.startswith("DEBUG Src"):
src_reg = re.search(operands_pattern, line).group(1)
instr_data["operands"] = (instr_data["operands"] + ', ' + src_reg) if 'operands' in instr_data else src_reg
elif line.startswith("DEBUG Dest"):
instr_data["destination"] = re.search(destination_pattern, line).group(1)
except Exception as e:
print("Error at line {}: {}".format(lineno, e))
instr_data = None
if instr_data:
entries.append(instr_data)
return entries
def reverse_binary(bin_str):
return bin_str[::-1]
def bin_to_array(bin_str):
return [int(bit) for bit in bin_str]
def append_reg(text, value, sep):
if sep:
text += ", "
ivalue = int(value)
if (ivalue >= 32):
text += "f" + str(ivalue % 32)
else:
text += "x" + value
sep = True
return text, sep
def append_value(text, reg, value, tmask_arr, sep):
text, sep = append_reg(text, reg, sep)
text += "={"
for i in range(len(tmask_arr)):
if i != 0:
text += ", "
if tmask_arr[i]:
text += value[i]
else:
text +="-"
text += "}"
return text, sep
def parse_rtlsim(log_lines):
global configs
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)"
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
ex_pattern = r"ex=([a-zA-Z]+)"
op_pattern = r"op=([\?0-9a-zA-Z_\.]+)"
warp_id_pattern = r"wid=(\d+)"
tmask_pattern = r"tmask=(\d+)"
wb_pattern = r"wb=(\d)"
opds_pattern = r"opds=(\d+)"
rd_pattern = r"rd=(\d+)"
rs1_pattern = r"rs1=(\d+)"
rs2_pattern = r"rs2=(\d+)"
rs3_pattern = r"rs3=(\d+)"
rs1_data_pattern = r"rs1_data=\{(.+?)\}"
rs2_data_pattern = r"rs2_data=\{(.+?)\}"
rs3_data_pattern = r"rs3_data=\{(.+?)\}"
rd_data_pattern = r"data=\{(.+?)\}"
eop_pattern = r"eop=(\d)"
uuid_pattern = r"#(\d+)"
entries = []
instr_data = {}
num_cores = configs['num_cores']
socket_size = configs['socket_size']
num_sockets = (num_cores + socket_size - 1) // socket_size
for lineno, line in enumerate(log_lines, start=1):
try:
line_match = re.search(line_pattern, line)
if line_match:
PC = re.search(pc_pattern, line).group(1)
warp_id = int(re.search(warp_id_pattern, line).group(1))
tmask = re.search(tmask_pattern, line).group(1)
uuid = int(re.search(uuid_pattern, line).group(1))
cluster_id = int(line_match.group(1))
socket_id = int(line_match.group(2))
core_id = int(line_match.group(3))
stage = line_match.group(4)
if stage == "decode":
trace = {}
trace["uuid"] = uuid
trace["PC"] = PC
trace["core_id"] = ((((cluster_id * num_sockets) + socket_id) * socket_size) + core_id)
trace["warp_id"] = warp_id
trace["tmask"] = reverse_binary(tmask)
trace["instr"] = re.search(instr_pattern, line).group(1)
trace["opcode"] = re.search(op_pattern, line).group(1)
trace["opds"] = bin_to_array(re.search(opds_pattern, line).group(1))
trace["rd"] = re.search(rd_pattern, line).group(1)
trace["rs1"] = re.search(rs1_pattern, line).group(1)
trace["rs2"] = re.search(rs2_pattern, line).group(1)
trace["rs3"] = re.search(rs3_pattern, line).group(1)
instr_data[uuid] = trace
elif stage == "issue":
if uuid in instr_data:
trace = instr_data[uuid]
trace["lineno"] = lineno
opds = trace["opds"]
if opds[1]:
trace["rs1_data"] = re.search(rs1_data_pattern, line).group(1).split(', ')[::-1]
if opds[2]:
trace["rs2_data"] = re.search(rs2_data_pattern, line).group(1).split(', ')[::-1]
if opds[3]:
trace["rs3_data"] = re.search(rs3_data_pattern, line).group(1).split(', ')[::-1]
trace["issued"] = True
instr_data[uuid] = trace
elif stage == "commit":
if uuid in instr_data:
trace = instr_data[uuid]
if "issued" in trace:
opds = trace["opds"]
dst_tmask_arr = bin_to_array(tmask)[::-1]
wb = re.search(wb_pattern, line).group(1) == "1"
if wb:
rd_data = re.search(rd_data_pattern, line).group(1).split(', ')[::-1]
if 'rd_data' in trace:
merged_rd_data = trace['rd_data']
for i in range(len(dst_tmask_arr)):
if dst_tmask_arr[i] == 1:
merged_rd_data[i] = rd_data[i]
trace['rd_data'] = merged_rd_data
else:
trace['rd_data'] = rd_data
instr_data[uuid] = trace
eop = re.search(eop_pattern, line).group(1) == "1"
if eop:
tmask_arr = bin_to_array(trace["tmask"])
destination = ''
if wb:
destination, sep = append_value(destination, trace["rd"], trace['rd_data'], tmask_arr, False)
del trace['rd_data']
trace["destination"] = destination
operands = ''
sep = False
if opds[1]:
operands, sep = append_value(operands, trace["rs1"], trace["rs1_data"], tmask_arr, sep)
del trace["rs1_data"]
if opds[2]:
operands, sep = append_value(operands, trace["rs2"], trace["rs2_data"], tmask_arr, sep)
del trace["rs2_data"]
if opds[3]:
operands, sep = append_value(operands, trace["rs3"], trace["rs3_data"], tmask_arr, sep)
del trace["rs3_data"]
trace["operands"] = operands
del trace["opds"]
del trace["rd"]
del trace["rs1"]
del trace["rs2"]
del trace["rs3"]
del trace["issued"]
del instr_data[uuid]
entries.append(trace)
except Exception as e:
print("Error at line {}: {}".format(lineno, e))
return entries
def write_csv(sublogs, csv_filename, log_type):
with open(csv_filename, 'w', newline='') as csv_file:
fieldnames = ["uuid", "PC", "opcode", "instr", "core_id", "warp_id", "tmask", "destination", "operands"]
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for sublog in sublogs:
entries = None
# parse sublog
if log_type == "rtlsim":
entries = parse_rtlsim(sublog)
elif log_type == "simx":
entries = parse_simx(sublog)
else:
print('Error: invalid log type')
sys.exit()
# sort entries by uuid
entries.sort(key=lambda x: (int(x['uuid'])))
for entry in entries:
del entry['lineno']
for entry in entries:
writer.writerow(entry)
def split_log_file(log_filename):
with open(log_filename, 'r') as log_file:
log_lines = log_file.readlines()
sublogs = []
current_sublog = None
for line in log_lines:
if line.startswith("[VXDRV] START"):
if current_sublog is not None:
sublogs.append(current_sublog)
current_sublog = [line]
elif current_sublog is not None:
current_sublog.append(line)
if current_sublog is not None:
sublogs.append(current_sublog)
else:
sublogs.append(log_lines)
return sublogs
def main():
global configs
args = parse_args()
configs = load_config(args.log)
sublogs = split_log_file(args.log)
write_csv(sublogs, args.csv, args.type)
if __name__ == "__main__":
main()

View file

@ -1,41 +1,73 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# Copyright 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import time
import threading
import subprocess
PingInterval = 15
# This script executes a long-running command while printing "still running ..." periodically
# to notify Travis build system that the program has not hanged
def PingCallback(stop):
wait_time = 0
while True:
time.sleep(PingInterval)
wait_time += PingInterval
print(" + still running (" + str(wait_time) + "s) ...")
sys.stdout.flush()
if stop():
break
PING_INTERVAL=300 # 5 minutes
SLEEP_INTERVAL=1 # 1 second
def run_command(command):
process = subprocess.Popen(command, stdout=subprocess.PIPE)
def monitor(stop_event):
wait_time = 0
elapsed_time = 0
while not stop_event.is_set():
time.sleep(SLEEP_INTERVAL)
elapsed_time += SLEEP_INTERVAL
if elapsed_time >= PING_INTERVAL:
wait_time += elapsed_time
print(" + still running (" + str(wait_time) + "s) ...")
sys.stdout.flush()
elapsed_time = 0
def execute(command):
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
while True:
output = process.stdout.readline()
if output == '' and process.poll() is not None:
break
if output:
print output.strip()
return process.returncode
try:
line = output.decode('utf-8').rstrip()
except UnicodeDecodeError:
line = repr(output) # Safely print raw binary data
print(">>> " + line)
process.stdout.flush()
ret = process.poll()
if ret is not None:
return ret
return -1
def main(argv):
if not argv:
print("Usage: travis_run.py <command>")
sys.exit(1)
stop_threads = False
t = threading.Thread(target = PingCallback, args =(lambda : stop_threads, ))
# start monitoring thread
stop_event = threading.Event()
t = threading.Thread(target=monitor, args=(stop_event,))
t.start()
exitcode = run_command(argv)
# execute command
exitcode = execute(argv)
print(" + exitcode="+str(exitcode))
stop_threads = True
# terminate monitoring thread
stop_event.set()
t.join()
sys.exit(exitcode)

34
config.mk.in Normal file
View file

@ -0,0 +1,34 @@
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
VORTEX_HOME ?= @VORTEX_HOME@
XLEN ?= @XLEN@
TOOLDIR ?= @TOOLDIR@
OSVERSION ?= @OSVERSION@
INSTALLDIR ?= @INSTALLDIR@
LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex
LIBC_VORTEX ?= $(TOOLDIR)/libc$(XLEN)
LIBCRT_VORTEX ?= $(TOOLDIR)/libcrt$(XLEN)
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv$(XLEN)-gnu-toolchain
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party

176
configure vendored Executable file
View file

@ -0,0 +1,176 @@
#!/bin/bash
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Determine the current working directory
CURRENT_DIR=$(pwd)
# Function to detect current OS
detect_osversion() {
local osversion="unsupported"
if [ -f /etc/os-release ]; then
. /etc/os-release # Source the os-release file to get OS information
case "$ID" in
ubuntu)
case "$VERSION_CODENAME" in
bionic) osversion="ubuntu/bionic";;
focal) osversion="ubuntu/focal";;
jammy) osversion="ubuntu/focal";;
noble) osversion="ubuntu/focal";;
# Add new versions as needed
esac
;;
centos)
case "$VERSION_ID" in
7) osversion="centos/7";;
# Add new versions as needed
esac
;;
esac
fi
echo "$osversion"
}
# Function to recursively copy files, skipping the current directory
copy_files() {
local source_dir="$1"
local target_dir="$2"
#echo "source_dir=$source_dir, target_dir=$target_dir"
local same_dir=0
if [ "$(realpath "$source_dir")" == "$(realpath "$target_dir")" ]; then
same_dir=1
fi
# Function to copy and update file
copy_and_update() {
local src_pattern="$1"
local dest_dir="$2"
for file in $src_pattern; do
#echo "*** $file > $dest_dir"
if [ -f "$file" ]; then
if [[ "$file" == *.in ]]; then
filename=$(basename -- "$file")
filename_no_ext="${filename%.in}"
dest_file="$dest_dir/$filename_no_ext"
mkdir -p "$dest_dir"
sed "s|@VORTEX_HOME@|$SOURCE_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@BUILDDIR@|$CURRENT_DIR|g" "$file" > "$dest_file"
# apply permissions to bash scripts
read -r firstline < "$dest_file"
if [[ "$firstline" =~ ^#!.*bash ]]; then
chmod +x "$dest_file"
fi
else
if [ $same_dir -eq 0 ]; then
mkdir -p "$dest_dir"
cp -p "$file" "$dest_dir"
fi
fi
fi
done
}
for pattern in "${SUBDIRS[@]}"; do
local full_copy=0
if [[ "$pattern" == !* ]]; then
full_copy=1
pattern=${pattern:1}
fi
local source_pattern="$source_dir/$pattern"
if [[ "$pattern" == "." ]]; then
source_pattern=$source_dir
fi
find "$source_dir" -type d -path "$source_pattern" 2>/dev/null | while read dir; do
# Compute the relative path of the directory
local rel_path="${dir#$source_dir}"
rel_path="${rel_path#/}" # Remove leading slash, if present
local full_target_dir="$target_dir/$rel_path"
# Copy and update Makefile and common.mk if they exist
if [ $full_copy -eq 1 ]; then
copy_and_update "$dir/*" "$full_target_dir"
else
copy_and_update "$dir/Makefile" "$full_target_dir"
copy_and_update "$dir/common.mk" "$full_target_dir"
copy_and_update "$dir/*.in" "$full_target_dir"
fi
done
done
}
###############################################################################
# default configuration parameters
default_xlen=32
default_tooldir=$HOME/tools
default_osversion=$(detect_osversion)
default_prefix=$CURRENT_DIR
# load default configuration parameters from existing config.mk
if [ -f "config.mk" ]; then
while IFS='=' read -r key value; do
value=${value//[@]/} # Remove placeholder characters
value="${value#"${value%%[![:space:]]*}"}" # Remove leading whitespace
value="${value%"${value##*[![:space:]]}"}" # Remove trailing whitespace
case $key in
XLEN\ ?*) default_xlen=${value//\?=/} ;;
TOOLDIR\ ?*) default_tooldir=${value//\?=/} ;;
OSVERSION\ ?*) default_osversion=${value//\?=/} ;;
PREFIX\ ?*) default_prefix=${value//\?=/} ;;
esac
done < config.mk
fi
# set configuration parameters
XLEN=${XLEN:=$default_xlen}
TOOLDIR=${TOOLDIR:=$default_tooldir}
OSVERSION=${OSVERSION:=$default_osversion}
PREFIX=${PREFIX:=$default_prefix}
# parse command line arguments
usage() {
echo "Usage: $0 [--xlen=<value>] [--tooldir=<path>] [--osversion=<version>]"
echo " --xlen=<value> Set the XLEN value (default: 32)"
echo " --tooldir=<path> Set the TOOLDIR path (default: $HOME/tools)"
echo " --osversion=<version> Set the OS Version (default: $(detect_osversion))"
echo " --prefix=<path> Set installation directory"
exit 1
}
while [[ "$#" -gt 0 ]]; do
case $1 in
--xlen=*) XLEN="${1#*=}" ;;
--tooldir=*) TOOLDIR="${1#*=}" ;;
--osversion=*) OSVERSION="${1#*=}" ;;
--prefix=*) PREFIX="${1#*=}" ;;
-h|--help) usage ;;
*) echo "Unknown parameter passed: $1"; usage ;;
esac
shift
done
# check OS
if [ "$OSVERSION" == "unsupported" ]; then
echo "Error: Unsupported OS."
exit -1
fi
# project subdirectories to build
SUBDIRS=("." "!ci" "!perf" "hw*" "kernel*" "runtime*" "sim*" "tests*")
# Get the directory of the script
SOURCE_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
THIRD_PARTY_DIR=$SOURCE_DIR/third_party
copy_files "$SOURCE_DIR" "$CURRENT_DIR"

View file

@ -1,30 +0,0 @@
# Vortex Documentation
## Table of Contents
- [Codebase Layout](codebase.md)
- [Microarchitecture](microarchitecture.md)
- [Cache Subsystem](cache_subsystem.md)
- [Software](software.md)
- [Simulation](simulation.md)
- [FPGA Setup Guide](fpga_setup.md)
- [Debugging](debugging.md)
- [Useful Links](references.md)
## Installation
- Refer to the install instructions in [README](../README.md).
## Quick Start Scenarios
Running Vortex simulators with different configurations:
- Run basic driver test with rtlsim driver and Vortex config of 2 clusters, 2 cores, 2 warps, 4 threads
$ ./ci/blackbox.sh --clusters=2 --cores=2 --warps=2 --threads=4 --driver=rtlsim --app=basic
- Run demo driver test with vlsim driver and Vortex config of 1 clusters, 4 cores, 4 warps, 2 threads
$ ./ci/blackbox.sh --clusters=1 --cores=4 --warps=4 --threads=2 --driver=vlsim --app=demo
- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads
$ ./ci/blackbox.sh --clusters=4 --cores=4 --warps=8 --threads=6 --driver=simx --app=dogfood

View file

@ -1,70 +0,0 @@
# Vortex Cache Subsystem
The Vortex Cache Sub-system has the following main properties:
- High-bandwidth with bank parallelism
- Snoop protocol to flush data for CPU access
- Generic design: Dcache, Icache, Shared Memory, L2 cache, L3 cache
### Cache Hierarchy
![Image of Cache Hierarchy](./images/cache_hierarchy.png)
- Cache can be configured to be any level in the hierarchy
- Caches communicate via snooping
- Cache flush from AFU is passed down the hierarchy
### VX_cache.v (Top Module)
VX.cache.v is the top module of the cache verilog code located in the `/hw/rtl/cache` directory.
![Image of Vortex Cache](./images/vortex_cache_top_module.png)
- Configurable (Cache size, number of banks, bank line size, etc.)
- I/O signals
- Core Request
- Core Rsp
- DRAM Req
- DRAM Rsp
- Snoop Rsp
- Snoop Rsp
- Snoop Forwarding Out
- Snoop Forwarding In
- Bank Select
- Assigns valid and ready signals for each bank
- Snoop Forwarder
- DRAM Request Arbiter
- Prepares cache response for communication with DRAM
- Snoop Response Arbiter
- Sends snoop response
- Core Response Merge
- Cache accesses one line at a time. As a result, each request may not come back in the same response. This module tries to recombine the responses by thread ID.
### VX_bank.v
VX_bank.v is the verilog code that handles cache bank functionality and is located in the `/hw/rtl/cache` directory.
![Image of Vortex Cache Bank](./images/vortex_bank.png)
- Allows for high throughput
- Each bank contains queues to hold requests to the cache
- I/O signals
- Core request
- Core Response
- DRAM Fill Requests
- DRAM Fill Response
- DRAM WB Requests
- Snp Request
- Snp Response
- Request Priority: DRAM fill, miss reserve, core request, snoop request
- Snoop Request Queue
- DRAM Fill Queue
- Core Req Arbiter
- Requests to be processed by the bank
- Tag Data Store
- Registers for valid, dirty, dirtyb, tag, and data
- Length of registers determined by lines in the bank
- Tag Data Access:
- I/O: stall, snoop info, force request miss
- Writes to cache or sends read response; hit or miss determined here
- A missed request goes to the miss reserve if it is not a snoop request or DRAM fill

View file

@ -1,35 +0,0 @@
# Vortex Codebase
The directory/file layout of the Vortex codebase is as followed:
- `hw`:
- `rtl`: hardware rtl sources
- `cache`: cache subsystem code
- `fp_cores`: floating point unit code
- `interfaces`: interfaces for inter-module communication
- `libs`: general-purpose modules (i.e., encoder, arbiter, ...)
- `syn`: synthesis directory
- `opae`: OPAE synthesis scripts
- `quartus`: Quartus synthesis scripts
- `synopsys`: Synopsys synthesis scripts
- `yosys`: Yosys synthesis scripts
- `simulate`: baseline RTL simulator (used by RTLSIM)
- `unit_tests`: unit tests for some hardware components
- `driver`: Host driver software
- `include`: Vortex driver public headers
- `opae`: software driver that uses Intel OPAE
- `vlsim`: software driver that simulates Full RTL (include AFU)
- `rtlsim`: software driver that simulates processor RTL
- `simx`: software driver that uses simX simulator
- `runtime`: Kernel runtime software
- `include`: Vortex runtime public headers
- `linker`: linker file for compiling kernels
- `src`: runtime implementation
- `simX`: cycle approximate simulator for vortex
- `tests`: tests repository.
- `runtime`: runtime tests
- `regression`: regression tests
- `riscv`: RISC-V standard tests
- `opencl`: opencl benchmarks and tests
- `ci`: continuous integration scripts
- `miscs`: miscellaneous resources.

View file

@ -1,43 +0,0 @@
# Debugging Vortex Hardware
## SimX Debugging
SimX cycle-approximate simulator allows faster debugging of Vortex kernels' execution.
The recommended method to enable debugging is to pass the `--debug` flag to `blackbox` tool when running a program.
// Running demo program on SimX in debug mode
$ ./ci/blackbox.sh --driver=simx --app=demo --debug
A debug trace `run.log` is generated in the current directory during the program execution. The trace includes important states of the simulated processor (decoded instruction, register states, pipeline states, etc..). You can increase the verbosity level of the trace by changing the `DEBUG_LEVEL` variable to a value [1-5] (default is 3).
// Using SimX in debug mode with verbose level 4
$ CONFIGS=-DDEBUG_LEVEL=4 ./ci/blackbox.sh --driver=simx --app=demo --debug
## RTL Debugging
To debug the processor RTL, you need to use VLSIM or RTLSIM driver. VLSIM simulates the full processor including the AFU command processor (using `/rtl/afu/vortex_afu.sv` as top module). RTLSIM simulates the Vortex processor only (using `/rtl/Vortex.v` as top module).
The recommended method to enable debugging is to pass the `--debug` flag to `blackbox` tool when running a program.
// Running demo program on vlsim in debug mode
$ ./ci/blackbox.sh --driver=vlsim --app=demo --debug
// Running demo program on rtlsim in debug mode
$ ./ci/blackbox.sh --driver=rtlsim --app=demo --debug
A debug trace `run.log` is generated in the current directory during the program execution. The trace includes important states of the simulated processor (memory, caches, pipeline, stalls, etc..). A waveform trace `trace.vcd` is also generated in the current directory during the program execution. You can visualize the waveform trace using any tool that can open VCD files (Modelsim, Quartus, Vivado, etc..). [GTKwave] (http://gtkwave.sourceforge.net) is a great open-source scope analyzer that also works with VCD files.
## FPGA Debugging
Debugging the FPGA directly may be necessary to investigate runtime bugs that the RTL simulation cannot catch. We have implemented an in-house scope analyzer for Vortex that works when the FPGA is running. To enable the FPGA scope analyzer, the FPGA bitstream should be built using `SCOPE=1` flag
& cd /hw/syn/opae
$ CONFIGS=-DSCOPE=1 make fpga-4c
When running the program on the FPGA, you need to pass the `--scope` flag to the `blackbox` tool.
// Running demo program on FPGA with scope enabled
$ ./ci/blackbox.sh --driver=fpga --app=demo --scope
A waveform trace `trace.vcd` will be generated in the current directory during the program execution. This trace includes a limited set of signals that are defined in `/hw/scripts/scope.json`. You can expand your signals' selection by updating the json file.

View file

@ -1,80 +0,0 @@
# FPGA Startup and Configuration Guide
OPAE Environment Setup
----------------------
$ source /opt/inteldevstack/init_env_user.sh
$ export OPAE_HOME=/opt/opae/1.1.2
$ export PATH=$OPAE_HOME/bin:$PATH
$ export C_INCLUDE_PATH=$OPAE_HOME/include:$C_INCLUDE_PATH
$ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH
$ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH
$ export RISCV_TOOLCHAIN_PATH=/opt/riscv-gnu-toolchain
$ export PATH=:/opt/verilator/bin:$PATH
$ export VERILATOR_ROOT=/opt/verilator
OPAE Build Configuration
------------------------
Within the `/hw/syn/opae` directory, there are source text files for each core-option for the fpga build (the 32 and 64 core options are not currently implemented) which have the following parameters that can be configured:
- NUM_CORES: the number of cores per cluster
- NUM_CLUSTERS: the number of clusters alotted to the processor
- L3_ENABLE: enable the use of the L3 cache
- PERF_ENABLE: enable the use of all profile counters
To enable L3 cache and profile counters for a build, simply uncomment the definition within the respective source file.
OPAE Build
------------------
The FPGA has to following configuration options:
- 1 core fpga (fpga-1c)
- 2 cores fpga (fpga-2c)
- 4 cores fpga (fpga-4c)
- 8 cores fpga (fpga-8c)
- 16 cores fpga (fpga-16c)
$ cd hw/syn/opae
$ make fpga- *# of cores* c
Example: `make fpga-4c`
A new folder (ex: `build_fpga_4c`) will be created and the build will start and take ~30-45 min to complete.
OPAE Build Progress
-------------------
You could check the last 10 lines in the build log for possible errors until build completion.
$ tail -n 10 ./build_fpga_4c/build.log
Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs.
$ ps -u *username*
If the build fails and you need to restart it, clean up the build folder using the following command:
$ make clean-fpga- *# of cores* c
Example: `make clean-fpga-4c`
The file `vortex_afu.gbs` should exist when the build is done:
$ ls -lsa ./build_fpga_ *# of cores* c/vortex_afu.gbs
Signing the bitstream and Programming the FPGA
----------------------------------------------
$ cd ./build_fpga_`# of cores`c/
$ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs
$ fpgasupdate vortex_afu_unsigned_ssl.gbs
FPGA sample test running OpenCL sgemm kernel
--------------------------------------------
Run the following from the Vortex root directory
$ ./ci/blackbox.sh --driver=fpga --app=sgemm --args="-n64"

Binary file not shown.

Before

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 77 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 517 KiB

View file

@ -1,94 +0,0 @@
# Vortex Microarchitecture
### Vortex GPGPU Execution Model
Vortex uses the SIMT (Single Instruction, Multiple Threads) execution model with a single warp issued per cycle.
- **Threads**
- Smallest unit of computation
- Each thread has its own register file (32 int + 32 fp registers)
- Threads execute in parallel
- **Warps**
- A logical clster of threads
- Each thread in a warp execute the same instruction
- The PC is shared; maintain thread mask for Writeback
- Warp's execution is time-multiplexed at log steps
- Ex. warp 0 executes at cycle 0, warp 1 executes at cycle 1
### Vortex RISC-V ISA Extension
- **Thread Mask Control**
- Control the number of warps to activate during execution
- `TMC` *count*: activate count threads
- **Warp Scheduling**
- Control the number of warps to activate during execution
- `WSPAWN` *count, addr*: activate count warps and jump to addr location
- **Control-Flow Divergence**
- Control threads to activate when a branch diverges
- `SPLIT` *predicate*: apply 'taken' predicate thread mask adn save 'not-taken' into IPDOM stack
- `JOIN`: restore 'not-taken' thread mask
- **Warp Synchronization**
- `BAR` *id, count*: stall warps entering barrier *id* until count is reached
### Vortex Pipeline/Datapath
![Image of Vortex Microarchitecture](./images/vortex_microarchitecture_v2.png)
Vortex has a 5-stage pipeline: FI | ID | Issue | EX | WB.
- **Fetch**
- Warp Scheduler
- Track stalled & active warps, resolve branches and barriers, maintain split/join IPDOM stack
- Instruction Cache
- Retrieve instruction from cache, issue I-cache requests/responses
- **Decode**
- Decode fetched instructions, notify warp scheduler when the following instructions are decoded:
- Branch, tmc, split/join, wspawn
- Precompute used_regs mask (needed for Issue stage)
- **Issue**
- Scheduling
- In-order issue (operands/execute unit ready), out-of-order commit
- IBuffer
- Store fetched instructions, separate queues per-warp, selects next warp through round-robin scheduling
- Scoreboard
- Track in-use registers
- GPRs (General-Purpose Registers) stage
- Fetch issued instruction operands and send operands to execute unit
- **Execute**
- ALU Unit
- Single-cycle operations (+,-,>>,<<,&,|,^), Branch instructions (Share ALU resources)
- MULDIV Unit
- Multiplier - done in 2 cycles
- Divider - division and remainder, done in 32 cycles
- Implements serial alogrithm (Stalls the pipeline)
- FPU Unit
- Multi-cycle operations, uses `FPnew` Library on ASIC, uses hard DSPs on FPGA
- CSR Unit
- Store constant status registers - device caps, FPU status flags, performance counters
- Handle external CSR requests (requests from host CPU)
- LSU Unit
- Handle load/store operations, issue D-cache requests, handle D-cache responses
- Commit load responses - saves storage, Scoreboard tracks completion
- GPGPU Unit
- Handle GPGPU instructions
- TMC, WSPAWN, SPLIT, BAR
- JOIN is handled by Warp Scheduler (upon SPLIT response)
- **Commit**
- Commit
- Update CSR flags, update performance counters
- Writeback
- Write result back to GPRs, notify Scoreboard (release in-use register), select candidate instruction (ALU unit has highest priority)
- **Clustering**
- Group mulitple cores into clusters (optionally share L2 cache)
- Group multiple clusters (optionally share L3 cache)
- Configurable at build time
- Default configuration:
- #Clusters = 1
- #Cores = 4
- #Warps = 4
- #Threads = 4
- **FPGA AFU Interface**
- Manage CPU-GPU comunication
- Query devices caps, load kernel instructions and resource buffers, start kernel execution, read destination buffers
- Local Memory - GPU access to local DRAM
- Reserved I/O addresses - redirect to host CPU, console output

Binary file not shown.

After

Width:  |  Height:  |  Size: 207 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 463 KiB

27
docs/cache_subsystem.md Normal file
View file

@ -0,0 +1,27 @@
# Vortex Cache Subsystem
The Vortex Cache Sub-system has the following main properties:
- High-bandwidth transfer with Multi-bank parallelism
- Non-blocking pipelined write-through cache architecture with per-bank MSHR
- Configurable design: Dcache, Icache, L2 cache, L3 cache
### Cache Microarchitecture
![Image of Cache Hierarchy](./assets/img/cache_microarchitecture.png)
The Vortex cache is comprised of multiple parallel banks. It is comprised of the following modules:
- **Bank request dispatch crossbar**: assigns a bank to incoming requests and resolve collision using stalls.
- **Bank response merge crossbar**: merges result from banks and forward to the core response.
- **Memory request multiplexer**: arbitrates bank memory requests
- **Memory response demultiplexer**: forwards memory response to the corresponding bank.
- **Flush Unit**: performs tag memory initialization.
Incoming requests entering the cache are sent to a dispatch crossbar that select the corresponding bank for each request, resolving bank collisions with stalls. The result output of each bank is merge back into outgoing response port via merger crossbar. Each bank intergates a non-blocking pipeline with a local Miss Status Holding Register (MSHR) to reduce the miss rate. The bank pipeline consists of the following stages:
- **Schedule**: Selects the next request into the pipeline from the incoming core request, memory fill, or the MSHR entry, with priority given to the latter.
- **Tag Access**: single-port read/write access to the tag store.
- **Data Access**: Single-port read/write access to the data store.
- **Response Handling**: Core response back to the core.
Deadlocks inside the cache can occur when the MSHR is full and a new request is already in the pipeline. It can also occur when the memory request queue is full, and there is an incoming memory response. The cache mitigates MSHR deadlocks by using an early full signal before a new request is issued and similarly mitigates memory deadlocks by ensuring that its request queue never fills up.

41
docs/codebase.md Normal file
View file

@ -0,0 +1,41 @@
# Vortex Codebase
The directory/file layout of the Vortex codebase is as followed:
- `hw`:
- `rtl`: hardware rtl sources
- `core`: core pipeline
- `cache`: cache subsystem
- `mem`: memory subsystem
- `fpu`: floating point unit
- `interfaces`: interfaces for inter-module communication
- `libs`: general-purpose RTL modules
- `syn`: synthesis directory
- `altera`: Altera synthesis scripts
- `xilinx`: Xilinx synthesis scripts
- `synopsys`: Synopsys synthesis scripts
- `modelsim`: Modelsim synthesis scripts
- `yosys`: Yosys synthesis scripts
- `unit_tests`: unit tests for some hardware components
- `runtime`: host runtime software APIs
- `include`: Vortex driver public headers
- `stub`: Vortex stub driver library
- `opae`: software driver that uses Intel OPAE API with device targets=fpga|asesim|opaesim
- `xrt`: software driver that uses Xilinx XRT API with device targets=hw|hw_emu|sw_emu
- `rtlsim`: software driver that uses rtlsim simulator
- `simx`: software driver that uses simX simulator
- `kernel`: GPU kernel software APIs
- `include`: Vortex runtime public headers
- `linker`: linker file for compiling kernels
- `src`: runtime implementation
- `sim`:
- `opaesim`: Intel OPAE AFU RTL simulator
- `rtlsim`: processor RTL simulator
- `simX`: cycle approximate simulator for vortex
- `tests`: tests repository.
- `riscv`: RISC-V conformance tests
- `kernel`: kernel tests
- `regression`: regression tests
- `opencl`: opencl benchmarks and tests
- `ci`: continuous integration scripts
- `miscs`: miscellaneous resources.

View file

@ -0,0 +1,36 @@
# Continuous Integration
- Each time you push to the repo, the Continuous Integration pipeline will run
- This pipeline consists of creating the correct development environment, building your code, and running all tests
- This is an extensive pipeline so it might take some time to complete
## Protecting Master Branch
Navigate to your Repository:
Open your repository on GitHub.
Click on "Settings":
In the upper-right corner of your repository page, click on the "Settings" tab.
Select "Branches" in the left sidebar:
On the left sidebar, look for the "Branches" option and click on it.
Choose the Branch:
Under "Branch protection rules," select the branch you want to protect. In this case, choose the main branch.
Enable Branch Protection:``
Check the box that says "Protect this branch."
Configure Protection Settings:
You can configure various protection settings. Some common settings include:
Require pull request reviews before merging: This ensures that changes are reviewed before being merged.
Require status checks to pass before merging: This ensures that automated tests and checks are passing.
Require signed commits: This enforces that commits are signed with a verified signature.
Restrict Who Can Push:
You can further restrict who can push directly to the branch. You might want to limit this privilege to specific people or teams.
Save Changes:
Once you've configured the protection settings, scroll down and click on the "Save changes" button.
Now, your main branch is protected, and certain criteria must be met before changes can be pushed directly to it. Contributors will need to create pull requests, have their changes reviewed, and meet other specified criteria before the changes can be merged into the main branch.

37
docs/contributing.md Normal file
View file

@ -0,0 +1,37 @@
# Contributing to Vortex
## Github
Vortex uses Github to host its git repositories.
There are a lot of ways to use the features on Github for collaboration.
Therefore, this documentation details the standard procedure for contributing to Vortex.
Development of Vortex is consolidated to this repo, `vortex` and any associated forks.
Previously, there was active work done on a private repo named `vortex-dev`.
`vortex-dev` has officially been deprecated and fully merged into this public repo, `vortex`.
If you are returning to this project and have legacy versions of Vortex, you can use the releases branches to access older versions.
## Contribution Process
In an effort to keep `vortex` organized, permissions to directly create branches and push code has been limited to admins.
However, contributions are strongly encouraged and keep the project moving forward! Here is the procedure for contributing:
1. Create a fork of `vortex`
2. In your fork, create a branch from `master` that briefly explains the work you are adding (ie: `develop-documentation`)
3. Make your changes on the new branch in your fork. You may create as many commits as you need, which might be common if you are making multiple iterations
4. Since you are the owner of your fork, you have full permissions to push commits to your fork
4. When you are satisfied with the changes on your fork, you can open a PR from your fork using the online interface
5. If you recently made a push, you will get automatically get a prompt on Github online to create a PR, which you can press
6. Otherwise, you can go to your fork on Github online and manually create a PR (todo)
(todo): how to name and format your PR, what information you should add to the PR, does not need to be too strict if you are attending the weekly meetings*
7. Github uses the following semantics: `base repository` gets the changes from your `head repository`
8. Therefore, you should set the `base repository` to `vortexgpgpu/vortex` and the `base` branch to `master` since the master branch is protected by reviewed PRs.
9. And you should assign the `head repository` to `<your-github-username>/vortex` (which represents your fork of vortex) and the `base` branch to the one created in step 2
10. Now that your intended PR has been specified, you should review the status. Check for merge conflicts, if all your commits are present, and all the modified files make sense
11. You can still make a PR if there are issues in step 10, just make sure the structure is correct according to steps 7-9
12. Once the PR is made, the CI pipeline will run automatically, testing your changes
13. Remember, a PR is flexible if you need to make changes to the code you can go back to your branch of the fork to commit and push any updates
14. As long as the `head repository`'s `base` branch is the one you edited, the PR will automatically get the most recent changes
15. When all merge conflicts are resolved, changes are made, and tests pass you can have an admin merge your PR
## What Makes a Good Contribution?
- If you are contributing code changes, then review [testing.md](./testing.md) to ensure your tests are integrated into the [CI pipeline](continuous_integration.md)
- During a PR, you should consider the advice you are provided by your reviewers. Remember you keep adding commits to an open PR!
- If your change aims to fix an issue opened on Github, please tag that issue in the PR itself

74
docs/debugging.md Normal file
View file

@ -0,0 +1,74 @@
# Debugging Vortex GPU
## Testing changes to the RTL or simulator GPU driver.
The Blackbox utility script will not pick up your changes if the h/w configuration is the same as during teh last run.
To force the utility to build the driver, you need pass the --rebuild=1 option when running tests.
Using --rebuild=0 will prevent the rebuild even if the h/w configuration is different from last run.
$ ./ci/blackbox.sh --driver=simx --app=demo --rebuild=1
## SimX Debugging
SimX cycle-approximate simulator allows faster debugging of Vortex kernels' execution.
The recommended method to enable debugging is to pass the `--debug=<level>` flag to `blackbox` tool when running a program.
// Running demo program on SimX in debug mode
$ ./ci/blackbox.sh --driver=simx --app=demo --debug=1
A debug trace `run.log` is generated in the current directory during the program execution. The trace includes important states of the simulated processor (decoded instruction, register states, pipeline states, etc..). You can increase the verbosity of the trace by changing the debug level.
// Using SimX in debug mode with verbose level 3
$ ./ci/blackbox.sh --driver=simx --app=demo --debug=3
## RTL Debugging
To debug the processor RTL, you need to use VLSIM or RTLSIM driver. VLSIM simulates the full processor including the AFU command processor (using `/rtl/afu/opae/vortex_afu.sv` as top module). RTLSIM simulates the Vortex processor only (using `/rtl/Vortex.v` as top module).
The recommended method to enable debugging is to pass the `--debug` flag to `blackbox` tool when running a program.
// Running demo program on the opae simulator in debug mode
$ TARGET=opaesim ./ci/blackbox.sh --driver=opae --app=demo --debug=1
// Running demo program on rtlsim in debug mode
$ ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=1
A debug trace `run.log` is generated in the current directory during the program execution. The trace includes important states of the simulated processor (memory, caches, pipeline, stalls, etc..). A waveform trace `trace.vcd` is also generated in the current directory during the program execution.
By default all library modules unde the /libs/ folder are excluded from the trace to reduce the waveform file size, you can chnage that behavoir by either explicitly commenting out `TRACING_OFF`/`TRACING_ON` inside a lib module source (e.g. VX_stream_buffer.sv) or simply enabling a full trace using the following command.
// Debugging the demo program with rtlsim in full tracing mode
$ CONFIGS="-DTRACING_ALL" ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=1
You can visualize the waveform trace using any tool that can open VCD files (Modelsim, Quartus, Vivado, etc..). [GTKwave] (http://gtkwave.sourceforge.net) is a great open-source scope analyzer that also works with VCD files.
## FPGA Debugging
Debugging the FPGA directly may be necessary to investigate runtime bugs that the RTL simulation cannot catch. We have implemented an in-house scope analyzer for Vortex that works when the FPGA is running. To enable the FPGA scope analyzer, the FPGA bitstream should be built using `SCOPE=1` flag
& cd /hw/syn/opae
$ CONFIGS="-DSCOPE=1" TARGET=fpga make
When running the program on the FPGA, you need to pass the `--scope` flag to the `blackbox` tool.
// Running demo program on FPGA with scope enabled
$ ./ci/blackbox.sh --driver=fpga --app=demo --scope
A waveform trace `trace.vcd` will be generated in the current directory during the program execution. This trace includes a limited set of signals that are defined in `/hw/scripts/scope.json`. You can expand your signals' selection by updating the json file.
## Analyzing Vortex trace log
When debugging Vortex RTL or SimX Simulator, reading the trace run.log file can be overwhelming when the trace gets really large.
We provide a trace sanitizer tool under ./hw/scripts/trace_csv.py that you can use to convert the large trace into a CSV file containing all the instructions that executed with their source and destination operands.
$ ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=3 --log=run_rtlsim.log
$ ./ci/trace_csv.py -trtlsim run_rtlsim.log -otrace_rtlsim.csv
$ ./ci/blackbox.sh --driver=simx --app=demo --debug=3 --log=run_simx.log
$ ./ci/trace_csv.py -tsimx run_simx.log -otrace_simx.csv
$ diff trace_rtlsim.csv trace_simx.csv
The first column in the CSV trace is UUID (universal unique identifier) of the instruction and the content is sorted by the UUID.
You can use the UUID to trace the same instruction running on either the RTL hw or SimX simulator.
This can be very effective if you want to use SimX to debugging your RTL hardware by comparing CSV traces.

51
docs/environment_setup.md Normal file
View file

@ -0,0 +1,51 @@
# Environment Setup
These instructions apply to the development vortex repo using the updated toolchain. The updated toolchain is considered to be any commit of `master` pulled from July 2, 2023 onwards. The toolchain update in question can be viewed in this [commit](https://github.com/vortexgpgpu/vortex-dev/commit/0048496ba28d7b9a209a0e569d52d60f2b68fc04). Therefore, if you are unsure whether you are using the new toolchain or not, then you should check the `ci` folder for the existence of the `toolchain_prebuilt.sh` script. Furthermore, you should notice that the `toolchain_install.sh` script has the legacy `llvm()` split into `llvm-vortex()` and `llvm-pocl()`.
## Set Up on Your Own System
The toolchain binaries provided with Vortex are built on Ubuntu-based systems. To install Vortex on your own system, [follow these instructions](install_vortex.md).
## Servers for Georgia Tech Students and Collaborators
### Volvo
Volvo is a 64-core server provided by HPArch. You need valid credentials to access it. If you don't already have access, you can get in contact with your mentor to ask about setting your account up.
Setup on Volvo:
1. Connect to Georgia Tech's VPN or ssh into another machine on campus
2. `ssh volvo.cc.gatech.edu`
3. Clone Vortex to your home directory: `git clone --recursive https://github.com/vortexgpgpu/vortex.git`
4. `source /nethome/software/set_vortex_env.sh` to set up the necessary environment variables.
5. `make -s` in the `vortex` root directory
6. Run a test program: `./ci/blackbox.sh --cores=2 --app=dogfood`
### Nio
Nio is a 20-core desktop server provided by HPArch. If you have access to Volvo, you also have access to Nio.
Setup on Nio:
1. Connect to Georgia Tech's VPN or ssh into another machine on campus
2. `ssh nio.cc.gatech.edu`
3. Clone Vortex to your home directory: `git clone --recursive https://github.com/vortexgpgpu/vortex.git`
4. `source /opt/set_vortex_env_dev.sh` to set up the necessary environment variables.
5. `make -s` in the `vortex` root directory
6. Run a test program: `./ci/blackbox.sh --cores=2 --app=dogfood`
## Docker (Experimental)
Docker allows for isolated pre-built environments to be created, shared and used. The emulation mode required for ARM-based processors will incur a decrease in performance. Currently, the dockerfile is not included with the official vortex repository and is not actively maintained or supported.
### Setup with Docker
1. Clone repo recursively onto your local machine: `git clone --recursive https://github.com/vortexgpgpu/vortex.git`
2. Download the dockerfile from [here](https://github.gatech.edu/gist/usubramanya3/f1bf3e953faa38a6372e1292ffd0b65c) and place it in the root of the repo.
3. Build the Dockerfile into an image: `docker build --platform=linux/amd64 -t vortex -f dockerfile .`
4. Run a container based on the image: `docker run --rm -v ./:/root/vortex/ -it --name vtx-dev --privileged=true --platform=linux/amd64 vortex`
5. Install the toolchain `./ci/toolchain_install.sh --all` (once per container)
6. `make -s` in `vortex` root directory
7. Run a test program: `./ci/blackbox.sh --cores=2 --app=dogfood`
You may exit from a container and resume a container you have exited or start a second terminal session `docker exec -it <container-name> bash`

217
docs/fpga_setup.md Normal file
View file

@ -0,0 +1,217 @@
# FPGA Startup and Configuration Guide
## Gaining Access to FPGA's with CRNCH
If you are associated with Georgia Tech (or related workshops) you can use CRNCH's server to gain remote access to FPGA's. Otherwise, you can skip to the Xilinx or Intel (Altera) synthesis steps below.
## What is CRNCH?
**C**enter for **R**esearch into **N**ovel **C**omputing **H**ierarchies
## What does CRNCH Offer?
**The Rogues Gallery (RG)**: new concept focused on developing our understanding of next-generation hardware with a focus on unorthodox and uncommon technologies. **RG** will acquire new and unique hardware (ie, the aforementioned “*rogues*”) from vendors, research labs, and startups and make this hardware available to students, faculty, and industry collaborators within a managed data center environment
## Why are the Rouges Important?
By exposing students and researchers to this set of unique hardware, we hope to foster cross-cutting discussions about hardware designs that will drive future *performance improvements in computing long after the Moores Law era of “cheap transistors” ends*. Specifically, the Rouges Gallery contains FPGA's which can be synthesized into Vortex hardware.
## How is the Rouges Gallery Funded?
Rogues Gallery testbed is primarily supported by the National Science Foundation (NSF) under NSF Award Number [#2016701](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2016701&HistoricalAwards=false)
## Rouges Gallery Documentation
You can read about RG in more detail on its official documentation [page](https://gt-crnch-rg.readthedocs.io/en/main/index.html#).
You can listen to a talk about RG [here](https://mediaspace.gatech.edu/media/Jeff%20Young%20-%20Rogues%20Gallery%20-%20CRNCH%20Summit%202021/1_lqlgr0jj)
[CRNCH Summit 2023](https://github.com/gt-crnch/crnch-summit-2023/tree/main)
## Request Access for Rouges Gallery
You should use [this form](https://crnch-rg.cc.gatech.edu/request-rogues-gallery-access/) to request access to RGs reconfigurable computing (vortex fpga) resources. You should receive an email with your ticket item being created. Once it gets processed, you should get an email confirmed your access has been granted. It might take some time to get processed.
## How to Access Rouges Gallery?
There are two methods of accessing CRNCH's Rouges Gallery
1) Web-based GUI: [rg-ood.crnch.gatech.edu](http://rg-ood.crnch.gatech.edu/)
2) SSH: `ssh <your-gt-username>@rg-login.crnch.gatech.edu`
## Where should I keep my files?
The CRNCH servers have a folder called `USERSCRATCH` which can be found in your home directory: `echo $HOME`. You should keep all your files in this folder since it is available across all the Rouges Gallery Nodes.
## **What Machines are Available in the Rogues Gallery?**
Complete list of machines can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/rg-hardware.html). Furthermore, you can find detailed information about the FPGA hardware [here](https://gt-crnch-rg.readthedocs.io/en/main/reconfig/xilinx/xilinx-getting-started.html).
## Allocate an FPGA Node
Once youve connected to the CRNCH login node, you can use the Slurm scheduler to request an interactive job using `salloc`. This [page](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm.html) explains why we use Slurm to request resources. Documentation for `salloc` can be found [here](https://gt-crnch-rg.readthedocs.io/en/main/general/using-slurm-examples.html). And here.
To request 16 cores and 64GB of RAM for 6 hours on flubber9, a fpga dev node:
```bash
salloc -p rg-fpga --nodes=1 --ntasks-per-node=16 --mem=64G --nodelist flubber1 --time=06:00:00
```
Synthesis for Xilinx Boards
----------------------
Once you are logged in, you will need to complete some first time configurations. If you are interested in the Intel (Altera) synthesis steps, scroll down below.
### Source Configuration Scripts
```
# From any directory
$ source /opt/xilinx/xrt/setup.sh
$ source /tools/reconfig/xilinx/Vitis/2023.1/settings64.sh
```
### Check Installed FPGA Platforms
`platforminfo -l` which tells us the correct name of the platform installed on the current fpga node. It should be used for the `PLATFORM` variable below. Otherwise, if there is an error then there was an issue with the previous two commands.
### Install Vortex Toolchain
The Xilinx synthesis process requires verilator to generate the bitstream. Eventually, you will need the whole toolchain to run the bitstream on the FPGA. Therefore, the Vortex toolchain and can be installed as follows. If you complete these steps properly, you should only need to complete them once and you can skip to `Activate Vortex Toolchain`
```
# Make a build directory from root and configure scripts for your environment
mkdir build && cd build && ../configure --tooldir=$HOME/tools
# Install the whole prebuilt toolchain
./ci/toolchain_install.sh --all
# Add environment variables to bashrc
echo "source <full-path-to-vortex-root>/vortex/build/ci/toolchain_env.sh" >> ~/.bashrc
```
### Activate Vortex Toolchain
```
# From any directory
source ~/.bashrc
# Check environment setup
verilator --version
```
### Build the FPGA Bitstream
The root directory contains the path `hw/syn/xilinx/xrt` which has the makefile used to generate the Vortex bitstream.
```
$ cd hw/syn/xilinx/xrt
$ PREFIX=test1 PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 TARGET=hw NUM_CORES=1 make > build_u250_hw_1c.log 2>&1 &
```
Will run the synthesis under new build directory: BUILD_DIR := "\<PREFIX>\_\<PLATFORM>\_\<TARGET>"
The generated bitstream will be located under <BUILD_DIR>/bin/vortex_afu.xclbin
For long-running jobs, invocation of this makefile can be made of the following form:
`[CONFIGS=<vortex macros>] [PREFIX=<prefix directory name>] [NUM_CORES=<#>] TARGET=hw|hw_emu PLATFORM=<platform baseName> nohup make > <log filename> 2>&1 &`
For example:
```bash
CONFIGS="-DL2_ENABLE -DDCACHE_SIZE=8192" PREFIX=build_4c_u280 NUM_CORES=4 TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202310_1 nohup make > build_u250_hw_4c.log 2>&1 &
```
The build is complete when the bitstream file `vortex_afu.xclbin` exists in `<prefix directory name><platform baseName>hw|hw_emu/bin`.
### Running a Program on Xilinx FPGA
The [blackbox.sh](./simulation.md) script within the build directory can be used to run a test with Vortexs xrt driver using the following command:
`FPGA_BIN_DIR=<path to bitstream directory> TARGET=hw|hw_emu PLATFORM=<platform baseName> ./ci/blackbox.sh --driver=xrt --app=<test name>`
For example:
```FPGA_BIN_DIR=<realpath> hw/syn/xilinx/xrt/build_4c_u280_xilinx_u280_gen3x16_xdma_1_202211_1_hw/bin TARGET=hw PLATFORM=xilinx_u280_gen3x16_xdma_1_202211_1 ./ci/blackbox.sh --driver=xrt --app=demo```
Synthesis for Intel (Altera) Boards
----------------------
### OPAE Environment Setup
$ source /opt/inteldevstack/init_env_user.sh
$ export OPAE_HOME=/opt/opae/1.1.2
$ export PATH=$OPAE_HOME/bin:$PATH
$ export C_INCLUDE_PATH=$OPAE_HOME/include:$C_INCLUDE_PATH
$ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH
$ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH
### OPAE Build
The FPGA has to following configuration options:
- DEVICE_FAMILY=arria10 | stratix10
- NUM_CORES=#n
Command line:
$ cd hw/syn/altera/opae
$ PREFIX=test1 TARGET=fpga NUM_CORES=4 make
A new folder (ex: `test1_xxx_4c`) will be created and the build will start and take ~30-480 min to complete.
Setting TARGET=ase will build the project for simulation using Intel ASE.
### OPAE Build Configuration
The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware parameters that can be modified when build the processor.For example, have the following parameters that can be configured:
- `NUM_WARPS`: Number of warps per cores
- `NUM_THREADS`: Number of threads per warps
- `PERF_ENABLE`: enable the use of all profile counters
You configure the syntesis build from the command line:
$ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make
### OPAE Build Progress
You could check the last 10 lines in the build log for possible errors until build completion.
$ tail -n 10 <build_dir>/build.log
Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs.
$ ps -u <username>
If the build fails and you need to restart it, clean up the build folder using the following command:
$ make clean
The file `vortex_afu.gbs` should exist when the build is done:
$ ls -lsa <build_dir>/synth/vortex_afu.gbs
### Signing the bitstream and Programming the FPGA
$ cd <build_dir>
$ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs
$ fpgasupdate vortex_afu_unsigned_ssl.gbs
### Sample FPGA Run Test
Ensure you have the correct opae runtime for the FPGA target
```
$ TARGET=FPGA make -C runtime/opae
```
Run the [blackbox.sh](./simulation.md) from your Vortex build directory
```
$ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128"
```
### FPGA sample test running OpenCL sgemm kernel
You can use the `blackbox.sh` script to run the following from your Vortex build directory
$ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128"
### Testing Vortex using OPAE with Intel ASE Simulation
Building ASE synthesis
```$ TARGET=asesim make -C runtime/opae```
Building ASE runtime
```$ TARGET=asesim make -C runtime/opae```
Running ASE simulation
```$ ASE_LOG=0 ASE_WORKDIR=<build_dir>/synth/work TARGET=asesim ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n16"```

9
docs/index.md Normal file
View file

@ -0,0 +1,9 @@
# Vortex Documentation
## Table of Contents
- [Codebase Layout](codebase.md): Summary of repo file tree
- [Microarchitecture](microarchitecture.md): Vortex Pipeline and cache microarchitectural details and reconfigurability
- [Simulation](simulation.md): Details for building and running each simulation driver
- [Contributing](contributing.md): Process for contributing your own features including repo semantics and testing
- [Debugging](debugging.md): Debugging configurations for each Vortex driver

81
docs/install_vortex.md Normal file
View file

@ -0,0 +1,81 @@
# Installing and Setting Up the Vortex Environment
## Ubuntu 18.04, 20.04
1. Install the following dependencies:
```
sudo apt-get install build-essential zlib1g-dev libtinfo-dev libncurses5 uuid-dev libboost-serialization-dev libpng-dev libhwloc-dev
```
2. Upgrade GCC to 11:
```
sudo apt-get install gcc-11 g++-11
```
Multiple gcc versions on Ubuntu can be managed with update-alternatives, e.g.:
```
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 9
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 9
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 11
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 11
```
3. Download the Vortex codebase:
```
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
```
4. Build Vortex
```
$ cd vortex
$ mkdir -p build
$ cd build
$ ../configure --xlen=32 --tooldir=$HOME/tools
$ ./ci/toolchain_install.sh --all
$ source ./ci/toolchain_env.sh
$ make -s
```
## RHEL 8
Note: depending on the system, some of the toolchain may need to be recompiled for non-Ubuntu Linux. The source for the tools can be found [here](https://github.com/vortexgpgpu/).
1. Install the following dependencies:
```
sudo yum install libpng-devel boost boost-devel boost-serialization libuuid-devel opencl-headers hwloc hwloc-devel gmp-devel compat-hwloc1
```
2. Upgrade GCC to 11:
```
sudo yum install gcc-toolset-11
```
Multiple gcc versions on Red Hat can be managed with scl
3. Install MPFR 4.2.0:
Download [the source](https://ftp.gnu.org/gnu/mpfr/) and follow [the installation documentation](https://www.mpfr.org/mpfr-current/mpfr.html#How-to-Install).
4. Download the Vortex codebase:
```
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
```
5. Build Vortex
```
$ cd vortex
$ mkdir -p build
$ cd build
$ ../configure --xlen=32 --tooldir=$HOME/tools
$ ./ci/toolchain_install.sh --all
$ source ./ci/toolchain_env.sh
$ make -s
```

83
docs/microarchitecture.md Normal file
View file

@ -0,0 +1,83 @@
# Vortex Microarchitecture
### Vortex GPGPU Execution Model
Vortex uses the SIMT (Single Instruction, Multiple Threads) execution model with a single warp issued per cycle.
- **Threads**
- Smallest unit of computation
- Each thread has its own register file (32 int + 32 fp registers)
- Threads execute in parallel
- **Warps**
- A logical clster of threads
- Each thread in a warp execute the same instruction
- The PC is shared; maintain thread mask for Writeback
- Warp's execution is time-multiplexed at log steps
- Ex. warp 0 executes at cycle 0, warp 1 executes at cycle 1
### Vortex RISC-V ISA Extension
- **Thread Mask Control**
- Control the number of warps to activate during execution
- `TMC` *count*: activate count threads
- **Warp Scheduling**
- Control the number of warps to activate during execution
- `WSPAWN` *count, addr*: activate count warps and jump to addr location
- **Control-Flow Divergence**
- Control threads activation when a branch diverges
- `SPLIT` *taken, predicate*: apply predicate thread mask and save current state into IPDOM stack
- `JOIN`: pop IPDOM stack to restore thread mask
- `PRED` *predicate, restore_mask*: thread predicate instruction
- **Warp Synchronization**
- `BAR` *id, count*: stall warps entering barrier *id* until count is reached
### Vortex Pipeline/Datapath
![Image of Vortex Microarchitecture](./assets/img/vortex_microarchitecture.png)
Vortex has a 6-stage pipeline:
- **Schedule**
- Warp Scheduler
- Schedule the next PC into the pipeline
- Track stalled, active warps
- IPDOM Stack
- Save split/join states for divergent threads
- Inflight Tracker
- Track in-flight instructions
- **Fetch**
- Retrieve instructions from memory
- Handle I-cache requests/responses
- **Decode**
- Decode fetched instructions
- Notify warp scheduler on control instructions
- **Issue**
- IBuffer
- Store decoded instructions in separate per-warp queues
- Scoreboard
- Track in-use registers
- Check register use for decoded instructions
- Operands Collector
- Fetch the operands for issued instructions from the register file
- **Execute**
- ALU Unit
- Handle arithmetic and branch operations
- FPU Unit
- Handle floating-point operations
- LSU Unit
- Handle load/store operations
- SFU Unit
- Handle warp control operations
- Handle Control Status Registers (CSRs) operations
- **Commit**
- Write result back to the register file and update the Scoreboard.
### Vortex clustering architecture
- Sockets
- Grouping multiple cores sharing L1 cache
- Clusters
- Grouping of sockets sharing L2 cache
### Vortex Cache Subsystem
More details about the cache subsystem are provided [here](./cache_subsystem.md).

View file

@ -6,13 +6,16 @@
### Cycle-Approximate Simulation
SimX is a C++ cycle-level in-house simulator developed for Vortex. The relevant files are located in the `simX` folder.
SimX is a C++ cycle-level in-house simulator developed for Vortex. The relevant files are located in the `simx` folder. The [readme](README.md) has the most detailed instructions for building and running simX.
- To install on your own system, [follow this document](install_vortex.md).
- For the different Georgia Tech environments Vortex supports, [read this document](environment_setup.md).
### FGPA Simulation
The current target FPGA for simulation is the Arria10 Intel Accelerator Card v1.0. The guide to build the fpga with specific configurations is located [here.](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/FPGA_Startup_Guide.md)
The guide to build the fpga with specific configurations is located [here.](fpga_setup.md) You can find instructions for both Xilinx and Altera based FPGAs.
### How to Test
### How to Test (using `blackbox.sh`)
Running tests under specific drivers (rtlsim,simx,fpga) is done using the script named `blackbox.sh` located in the `ci` folder. Running command `./ci/blackbox.sh --help` from the Vortex root directory will display the following command line arguments for `blackbox.sh`:
@ -20,17 +23,17 @@ Running tests under specific drivers (rtlsim,simx,fpga) is done using the script
- *Cores* - used to specify the number of cores (processing element containing multiple warps) within a configuration.
- *Warps* - used to specify the number of warps (collection of concurrent hardware threads) within a configuration.
- *Threads* - used to specify the number of threads (smallest unit of computation) within a configuration.
- *L2cache* - used to enable the shard l2cache among the Vortex cores.
- *L2cache* - used to enable the shared l2cache among the Vortex cores.
- *L3cache* - used to enable the shared l3cache among the Vortex clusters.
- *Driver* - used to specify which driver to run the Vortex simulation (either rtlsim, vlsim, fpga, or simx).
- *Driver* - used to specify which driver to run the Vortex simulation (either rtlsim, opae, xrt, simx).
- *Debug* - used to enable debug mode for the Vortex simulation.
- *Perf* - used to enable the detailed performance counters within the Vortex simulation.
- *App* - used to specify which test/benchmark to run in the Vortex simulation. The main choices are vecadd, sgemm, basic, demo, and dogfood. Other tests/benchmarks are located in the `/benchmarks/opencl` folder though not all of them work wit the current version of Vortex.
- *Args* - used to pass additional arguments to the application.
Example use of command line arguments: Run the sgemm benchmark using the vlsim driver with a Vortex configuration of 1 cluster, 4 cores, 4 warps, and 4 threads.
Example use of command line arguments: Run the sgemm benchmark using the opae driver with a Vortex configuration of 1 cluster, 4 cores, 4 warps, and 4 threads.
$ ./ci/blackbox.sh --clusters=1 --cores=4 --warps=4 --threads=4 --driver=vlsim --app=sgemm
$ ./ci/blackbox.sh --clusters=1 --cores=4 --warps=4 --threads=4 --driver=opae --app=sgemm
Output from terminal:
```
@ -47,4 +50,20 @@ PERF: core1: instrs=90693, cycles=53108, IPC=1.707709
PERF: core2: instrs=90849, cycles=53107, IPC=1.710678
PERF: core3: instrs=90836, cycles=50347, IPC=1.804199
PERF: instrs=363180, cycles=53108, IPC=6.838518
```
```
## Additional Quick Start Scenarios
Running Vortex simulators with different configurations and drivers is supported. For example:
- Run basic driver test with rtlsim driver and Vortex config of 2 clusters, 2 cores, 2 warps, 4 threads
$ ./ci/blackbox.sh --driver=rtlsim --clusters=2 --cores=2 --warps=2 --threads=4 --app=basic
- Run demo driver test with opae driver and Vortex config of 1 clusters, 4 cores, 4 warps, 2 threads
$ ./ci/blackbox.sh --driver=opae --clusters=1 --cores=4 --warps=4 --threads=2 --app=demo
- Run dogfood driver test with simx driver and Vortex config of 4 cluster, 4 cores, 8 warps, 6 threads
$ ./ci/blackbox.sh --driver=simx --clusters=4 --cores=4 --warps=8 --threads=6 --app=dogfood

52
docs/testing.md Normal file
View file

@ -0,0 +1,52 @@
# Testing
## Running a Vortex application
The framework provides a utility script: blackbox.sh under the /ci/ folder for executing applications in the tests tree. It gets copied into the `build` directory with all the environment variables resolved, so you should run it from the `build` directory as follows:
You can query the commandline options of the tool using:
$ ./ci/blackbox.sh --help
To execute sgemm test program on the simx driver and passing "-n10" as argument to sgemm:
$ ./ci/blackbox.sh --driver=simx --app=sgemm --args="-n10"
You can execute the same application of a GPU architecture with 2 cores:
$ ./ci/blackbox.sh --core=2 --driver=simx --app=sgemm --args="-n10"
When excuting, Blackbox needs to recompile the driver if the desired architecture changes.
It tracks the latest configuration in a file under the current directory blackbox.<driver>.cache.
To avoid having to rebuild the driver all the time, Blackbox checks if the latest cached configuration matches the current.
## Running Benchmarks
The Vortex test suite is located under the /test/ folder
You can execute the default regression suite by running the following commands at the root folder.
$ make -C tests/regression run-simx
$ make -C tests/regression run-rtlsim
You can execute the default opncl suite by running the following commands at the root folder.
$ make -C tests/opencl run-simx
$ make -C tests/opencl run-rtlsim
## Creating Your Own Regression Test
Inside `tests/regression` you will find a series of folders which are named based on what they test.
You can view the tests to see which ones have tests similar to what you are trying to create new tests for.
Once you have found a similar baseline, you can copy the folder and rename it to what you are planning to test.
A regression test typically implements the following files:
- ***kernel.cpp*** contains the GPU kernel code.
- ***main.cpp*** contains the host CPU code.
- ***Makefile*** defines the compiler build commands for the CPU and GPU binaries.
Sync your build folder: `$ ../configure`
Compile your test: `$ make -C tests/regression/<test-name>`
Run your test: `$ ./ci/blackbox.sh --driver=simx --app=<test-name> --debug`
## Adding Your Tests to the CI Pipeline
If you are a contributor, then you will need to add tests that integrate into the continuous integration pipeline. Remember, Pull Requests cannot be merged unless new code has tests and existing tests do not regress. Furthermore, if you are contributing a new feature, it is recommended that you add the ability to enable / disable the new feature that you are adding. See more at [contributing.md](contributing.md) and [continuous_integration.md](continuous_integration.md).

View file

@ -1,21 +0,0 @@
all: stub rtlsim simx opae
stub:
$(MAKE) -C stub
opae:
$(MAKE) -C opae
rtlsim:
$(MAKE) -C rtlsim
simx:
$(MAKE) -C simx
clean:
$(MAKE) clean -C stub
$(MAKE) clean -C opae
$(MAKE) clean -C rtlsim
$(MAKE) clean -C simx
.PHONY: all stub opae rtlsim simx clean

View file

@ -1,317 +0,0 @@
#include <iostream>
#include <fstream>
#include <cstring>
#include <vortex.h>
#include <VX_config.h>
extern int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size) {
int err = 0;
if (NULL == content || 0 == size)
return -1;
uint32_t buffer_transfer_size = 65536;
unsigned kernel_base_addr;
err = vx_dev_caps(device, VX_CAPS_KERNEL_BASE_ADDR, &kernel_base_addr);
if (err != 0)
return -1;
// allocate device buffer
vx_buffer_h buffer;
err = vx_alloc_shared_mem(device, buffer_transfer_size, &buffer);
if (err != 0)
return -1;
// get buffer address
auto buf_ptr = (uint8_t*)vx_host_ptr(buffer);
//
// upload content
//
size_t offset = 0;
while (offset < size) {
auto chunk_size = std::min<size_t>(buffer_transfer_size, size - offset);
std::memcpy(buf_ptr, (uint8_t*)content + offset, chunk_size);
/*printf("*** Upload Kernel to 0x%0x: data=", kernel_base_addr + offset);
for (int i = 0, n = ((chunk_size+7)/8); i < n; ++i) {
printf("%08x", ((uint64_t*)((uint8_t*)content + offset))[n-1-i]);
}
printf("\n");*/
err = vx_copy_to_dev(buffer, kernel_base_addr + offset, chunk_size, 0);
if (err != 0) {
vx_buf_release(buffer);
return err;
}
offset += chunk_size;
}
vx_buf_release(buffer);
return 0;
}
extern int vx_upload_kernel_file(vx_device_h device, const char* filename) {
std::ifstream ifs(filename);
if (!ifs) {
std::cout << "error: " << filename << " not found" << std::endl;
return -1;
}
// read file content
ifs.seekg(0, ifs.end);
auto size = ifs.tellg();
auto content = new char [size];
ifs.seekg(0, ifs.beg);
ifs.read(content, size);
// upload
int err = vx_upload_kernel_bytes(device, content, size);
// release buffer
delete[] content;
return err;
}
/*static uint32_t get_csr_32(const uint32_t* buffer, int addr) {
uint32_t value_lo = buffer[addr - CSR_MPM_BASE];
return value_lo;
}*/
static uint64_t get_csr_64(const uint32_t* buffer, int addr) {
uint32_t value_lo = buffer[addr - CSR_MPM_BASE];
uint32_t value_hi = buffer[addr - CSR_MPM_BASE + 32];
return (uint64_t(value_hi) << 32) | value_lo;
}
extern int vx_dump_perf(vx_device_h device, FILE* stream) {
int ret = 0;
uint64_t instrs = 0;
uint64_t cycles = 0;
#ifdef PERF_ENABLE
// PERF: pipeline stalls
uint64_t ibuffer_stalls = 0;
uint64_t scoreboard_stalls = 0;
uint64_t lsu_stalls = 0;
uint64_t fpu_stalls = 0;
uint64_t csr_stalls = 0;
uint64_t alu_stalls = 0;
uint64_t gpu_stalls = 0;
// PERF: Icache
uint64_t icache_reads = 0;
uint64_t icache_read_misses = 0;
uint64_t icache_pipe_stalls = 0;
uint64_t icache_rsp_stalls = 0;
// PERF: Dcache
uint64_t dcache_reads = 0;
uint64_t dcache_writes = 0;
uint64_t dcache_read_misses = 0;
uint64_t dcache_write_misses = 0;
uint64_t dcache_bank_stalls = 0;
uint64_t dcache_mshr_stalls = 0;
uint64_t dcache_pipe_stalls = 0;
uint64_t dcache_rsp_stalls = 0;
// PERF: SMEM
uint64_t smem_reads = 0;
uint64_t smem_writes = 0;
uint64_t smem_bank_stalls = 0;
// PERF: memory
uint64_t mem_reads = 0;
uint64_t mem_writes = 0;
uint64_t mem_stalls = 0;
uint64_t mem_lat = 0;
#endif
unsigned num_cores;
ret = vx_dev_caps(device, VX_CAPS_MAX_CORES, &num_cores);
if (ret != 0)
return ret;
vx_buffer_h staging_buf;
ret = vx_alloc_shared_mem(device, 64 * sizeof(uint32_t), &staging_buf);
if (ret != 0)
return ret;
auto staging_ptr = (uint32_t*)vx_host_ptr(staging_buf);
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
ret = vx_copy_from_dev(staging_buf, IO_CSR_ADDR + 64 * sizeof(uint32_t) * core_id, 64 * sizeof(uint32_t), 0);
if (ret != 0) {
vx_buf_release(staging_buf);
return ret;
}
uint64_t instrs_per_core = get_csr_64(staging_ptr, CSR_MINSTRET);
uint64_t cycles_per_core = get_csr_64(staging_ptr, CSR_MCYCLE);
float IPC = (float)(double(instrs_per_core) / double(cycles_per_core));
if (num_cores > 1) fprintf(stream, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs_per_core, cycles_per_core, IPC);
instrs += instrs_per_core;
cycles = std::max<uint64_t>(cycles_per_core, cycles);
#ifdef PERF_ENABLE
// PERF: pipeline
// ibuffer_stall
uint64_t ibuffer_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_IBUF_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: ibuffer stalls=%ld\n", core_id, ibuffer_stalls_per_core);
ibuffer_stalls += ibuffer_stalls_per_core;
// scoreboard_stall
uint64_t scoreboard_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_SCRB_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: scoreboard stalls=%ld\n", core_id, scoreboard_stalls_per_core);
scoreboard_stalls += scoreboard_stalls_per_core;
// alu_stall
uint64_t alu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_ALU_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: alu unit stalls=%ld\n", core_id, alu_stalls_per_core);
alu_stalls += alu_stalls_per_core;
// lsu_stall
uint64_t lsu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_LSU_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: lsu unit stalls=%ld\n", core_id, lsu_stalls_per_core);
lsu_stalls += lsu_stalls_per_core;
// csr_stall
uint64_t csr_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_CSR_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: csr unit stalls=%ld\n", core_id, csr_stalls_per_core);
csr_stalls += csr_stalls_per_core;
// fpu_stall
uint64_t fpu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_FPU_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: fpu unit stalls=%ld\n", core_id, fpu_stalls_per_core);
fpu_stalls += fpu_stalls_per_core;
// gpu_stall
uint64_t gpu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_GPU_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: gpu unit stalls=%ld\n", core_id, gpu_stalls_per_core);
gpu_stalls += gpu_stalls_per_core;
// PERF: Icache
// total reads
uint64_t icache_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_READS);
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache reads=%ld\n", core_id, icache_reads_per_core);
icache_reads += icache_reads_per_core;
// read misses
uint64_t icache_miss_r_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_MISS_R);
int icache_read_hit_ratio = (int)((1.0 - (double(icache_miss_r_per_core) / double(icache_reads_per_core))) * 100);
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache read misses=%ld (hit ratio=%d%%)\n", core_id, icache_miss_r_per_core, icache_read_hit_ratio);
icache_read_misses += icache_miss_r_per_core;
// pipeline stalls
uint64_t icache_pipe_st_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_PIPE_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache pipeline stalls=%ld\n", core_id, icache_pipe_st_per_core);
icache_pipe_stalls += icache_pipe_st_per_core;
// response stalls
uint64_t icache_crsp_st_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_CRSP_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache reponse stalls=%ld\n", core_id, icache_crsp_st_per_core);
icache_rsp_stalls += icache_crsp_st_per_core;
// PERF: Dcache
// total reads
uint64_t dcache_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_READS);
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache reads=%ld\n", core_id, dcache_reads_per_core);
dcache_reads += dcache_reads_per_core;
// total write
uint64_t dcache_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_WRITES);
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache writes=%ld\n", core_id, dcache_writes_per_core);
dcache_writes += dcache_writes_per_core;
// read misses
uint64_t dcache_miss_r_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MISS_R);
int dcache_read_hit_ratio = (int)((1.0 - (double(dcache_miss_r_per_core) / double(dcache_reads_per_core))) * 100);
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache read misses=%ld (hit ratio=%d%%)\n", core_id, dcache_miss_r_per_core, dcache_read_hit_ratio);
dcache_read_misses += dcache_miss_r_per_core;
// read misses
uint64_t dcache_miss_w_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MISS_W);
int dcache_write_hit_ratio = (int)((1.0 - (double(dcache_miss_w_per_core) / double(dcache_writes_per_core))) * 100);
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache write misses=%ld (hit ratio=%d%%)\n", core_id, dcache_miss_w_per_core, dcache_write_hit_ratio);
dcache_write_misses += dcache_miss_w_per_core;
// bank_stalls
uint64_t dcache_bank_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_BANK_ST);
int dcache_bank_utilization = (int)((double(dcache_reads_per_core + dcache_writes_per_core) / double(dcache_reads_per_core + dcache_writes_per_core + dcache_bank_st_per_core)) * 100);
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache bank stalls=%ld (utilization=%d%%)\n", core_id, dcache_bank_st_per_core, dcache_bank_utilization);
dcache_bank_stalls += dcache_bank_st_per_core;
// mshr_stalls
uint64_t dcache_mshr_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MSHR_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache mshr stalls=%ld\n", core_id, dcache_mshr_st_per_core);
dcache_mshr_stalls += dcache_mshr_st_per_core;
// pipeline stalls
uint64_t dcache_pipe_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_PIPE_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache pipeline stalls=%ld\n", core_id, dcache_pipe_st_per_core);
dcache_pipe_stalls += dcache_pipe_st_per_core;
// response stalls
uint64_t dcache_crsp_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_CRSP_ST);
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache reponse stalls=%ld\n", core_id, dcache_crsp_st_per_core);
dcache_rsp_stalls += dcache_crsp_st_per_core;
// PERF: SMEM
// total reads
uint64_t smem_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_READS);
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem reads=%ld\n", core_id, smem_reads_per_core);
smem_reads += smem_reads_per_core;
// total write
uint64_t smem_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_WRITES);
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem writes=%ld\n", core_id, smem_writes_per_core);
smem_writes += smem_writes_per_core;
// bank_stalls
uint64_t smem_bank_st_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_BANK_ST);
int smem_bank_utilization = (int)((double(smem_reads_per_core + smem_writes_per_core) / double(smem_reads_per_core + smem_writes_per_core + smem_bank_st_per_core)) * 100);
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem bank stalls=%ld (utilization=%d%%)\n", core_id, smem_bank_st_per_core, smem_bank_utilization);
smem_bank_stalls += smem_bank_st_per_core;
// PERF: memory
uint64_t mem_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_READS);
uint64_t mem_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_WRITES);
uint64_t mem_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_ST);
uint64_t mem_lat_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_LAT);
int mem_utilization = (int)((double(mem_reads_per_core + mem_writes_per_core) / double(mem_reads_per_core + mem_writes_per_core + mem_stalls_per_core)) * 100);
int mem_avg_lat = (int)(double(mem_lat_per_core) / double(mem_reads_per_core));
if (num_cores > 1) fprintf(stream, "PERF: core%d: memory requests=%ld (reads=%ld, writes=%ld)\n", core_id, (mem_reads_per_core + mem_writes_per_core), mem_reads_per_core, mem_writes_per_core);
if (num_cores > 1) fprintf(stream, "PERF: core%d: memory stalls=%ld (utilization=%d%%)\n", core_id, mem_stalls_per_core, mem_utilization);
if (num_cores > 1) fprintf(stream, "PERF: core%d: memory average latency=%d cycles\n", core_id, mem_avg_lat);
mem_reads += mem_reads_per_core;
mem_writes += mem_writes_per_core;
mem_stalls += mem_stalls_per_core;
mem_lat += mem_lat_per_core;
#endif
}
float IPC = (float)(double(instrs) / double(cycles));
fprintf(stream, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
#ifdef PERF_ENABLE
int icache_read_hit_ratio = (int)((1.0 - (double(icache_read_misses) / double(icache_reads))) * 100);
int dcache_read_hit_ratio = (int)((1.0 - (double(dcache_read_misses) / double(dcache_reads))) * 100);
int dcache_write_hit_ratio = (int)((1.0 - (double(dcache_write_misses) / double(dcache_writes))) * 100);
int dcache_bank_utilization = (int)((double(dcache_reads + dcache_writes) / double(dcache_reads + dcache_writes + dcache_bank_stalls)) * 100);
int smem_bank_utilization = (int)((double(smem_reads + smem_writes) / double(smem_reads + smem_writes + smem_bank_stalls)) * 100);
int mem_utilization = (int)((double(mem_reads + mem_writes) / double(mem_reads + mem_writes + mem_stalls)) * 100);
int mem_avg_lat = (int)(double(mem_lat) / double(mem_reads));
fprintf(stream, "PERF: ibuffer stalls=%ld\n", ibuffer_stalls);
fprintf(stream, "PERF: scoreboard stalls=%ld\n", scoreboard_stalls);
fprintf(stream, "PERF: alu unit stalls=%ld\n", alu_stalls);
fprintf(stream, "PERF: lsu unit stalls=%ld\n", lsu_stalls);
fprintf(stream, "PERF: csr unit stalls=%ld\n", csr_stalls);
fprintf(stream, "PERF: fpu unit stalls=%ld\n", fpu_stalls);
fprintf(stream, "PERF: gpu unit stalls=%ld\n", gpu_stalls);
fprintf(stream, "PERF: icache reads=%ld\n", icache_reads);
fprintf(stream, "PERF: icache read misses=%ld (hit ratio=%d%%)\n", icache_read_misses, icache_read_hit_ratio);
fprintf(stream, "PERF: icache pipeline stalls=%ld\n", icache_pipe_stalls);
fprintf(stream, "PERF: icache reponse stalls=%ld\n", icache_rsp_stalls);
fprintf(stream, "PERF: dcache reads=%ld\n", dcache_reads);
fprintf(stream, "PERF: dcache writes=%ld\n", dcache_writes);
fprintf(stream, "PERF: dcache read misses=%ld (hit ratio=%d%%)\n", dcache_read_misses, dcache_read_hit_ratio);
fprintf(stream, "PERF: dcache write misses=%ld (hit ratio=%d%%)\n", dcache_write_misses, dcache_write_hit_ratio);
fprintf(stream, "PERF: dcache bank stalls=%ld (utilization=%d%%)\n", dcache_bank_stalls, dcache_bank_utilization);
fprintf(stream, "PERF: dcache mshr stalls=%ld\n", dcache_mshr_stalls);
fprintf(stream, "PERF: dcache pipeline stalls=%ld\n", dcache_pipe_stalls);
fprintf(stream, "PERF: dcache reponse stalls=%ld\n", dcache_rsp_stalls);
fprintf(stream, "PERF: smem reads=%ld\n", smem_reads);
fprintf(stream, "PERF: smem writes=%ld\n", smem_writes);
fprintf(stream, "PERF: smem bank stalls=%ld (utilization=%d%%)\n", smem_bank_stalls, smem_bank_utilization);
fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes);
fprintf(stream, "PERF: memory stalls=%ld (utilization=%d%%)\n", mem_stalls, mem_utilization);
fprintf(stream, "PERF: memory average latency=%d cycles\n", mem_avg_lat);
#endif
// release allocated resources
vx_buf_release(staging_buf);
return ret;
}

View file

@ -1,77 +0,0 @@
#ifndef __VX_DRIVER_H__
#define __VX_DRIVER_H__
#include <stddef.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef void* vx_device_h;
typedef void* vx_buffer_h;
// device caps ids
#define VX_CAPS_VERSION 0x0
#define VX_CAPS_MAX_CORES 0x1
#define VX_CAPS_MAX_WARPS 0x2
#define VX_CAPS_MAX_THREADS 0x3
#define VX_CAPS_CACHE_LINE_SIZE 0x4
#define VX_CAPS_LOCAL_MEM_SIZE 0x5
#define VX_CAPS_ALLOC_BASE_ADDR 0x6
#define VX_CAPS_KERNEL_BASE_ADDR 0x7
#define CACHE_BLOCK_SIZE 64
#define ALLOC_BASE_ADDR 0x00000000
#define LOCAL_MEM_SIZE 0xffffffff
// open the device and connect to it
int vx_dev_open(vx_device_h* hdevice);
// Close the device when all the operations are done
int vx_dev_close(vx_device_h hdevice);
// return device configurations
int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value);
// Allocate shared buffer with device
int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer);
// Get host pointer address
void* vx_host_ptr(vx_buffer_h hbuffer);
// release buffer
int vx_buf_release(vx_buffer_h hbuffer);
// allocate device memory and return address
int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr);
// Copy bytes from buffer to device local memory
int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset);
// Copy bytes from device local memory to buffer
int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dst_offset);
// Start device execution
int vx_start(vx_device_h hdevice);
// Wait for device ready with milliseconds timeout
int vx_ready_wait(vx_device_h hdevice, long long timeout);
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////
// upload kernel bytes to device
int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size);
// upload kernel file to device
int vx_upload_kernel_file(vx_device_h device, const char* filename);
// dump performance counters
int vx_dump_perf(vx_device_h device, FILE* stream);
#ifdef __cplusplus
}
#endif
#endif // __VX_DRIVER_H__

View file

@ -1,116 +0,0 @@
OPAE_HOME ?= /tools/opae/1.4.0
CXXFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -pedantic -Wfatal-errors
#CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I$(OPAE_HOME)/include -I../../hw
LDFLAGS += -L$(OPAE_HOME)/lib
#SCOPE=1
# stack execution protection
LDFLAGS +=-z noexecstack
# data relocation and projection
LDFLAGS +=-z relro -z now
# stack buffer overrun detection
CXXFLAGS +=-fstack-protector
# Position independent code
CXXFLAGS += -fPIC
# Add external configuration
CXXFLAGS += $(CONFIGS)
# Dump perf stats
CXXFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared
FPGA_LIBS += -luuid -lopae-c
ASE_LIBS += -luuid -lopae-c-ase
VLSIM_LIBS += -lopae-c-vlsim
ASE_DIR = ase
VLSIM_DIR = vlsim
RTL_DIR=../../hw/rtl
SCRIPT_DIR=../../hw/scripts
PROJECT = libvortex.so
PROJECT_ASE = $(ASE_DIR)/libvortex.so
PROJECT_VLSIM = $(VLSIM_DIR)/libvortex.so
AFU_JSON_INFO = vortex_afu.h
SRCS = vortex.cpp ../common/vx_utils.cpp
# Enable scope analyzer
ifdef SCOPE
CXXFLAGS += -DSCOPE
SRCS += vx_scope.cpp
SCOPE_ENABLE = SCOPE=1
SCOPE_H = scope-defs.h
endif
# Enable perf counters
ifdef PERF
CXXFLAGS += -DPERF_ENABLE
PERF_ENABLE = PERF=1
endif
all: vlsim
# AFU info from JSON file, including AFU UUID
json: ../../hw/opae/vortex_afu.json
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
scope-defs.h: $(SCRIPT_DIR)/scope.json
$(SCRIPT_DIR)/scope.py $(RTL_INCLUDE) $(CONFIGS) -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
# generate scope data
scope: scope-defs.h
vlsim-hw: $(SCOPE_H)
$(SCOPE_ENABLE) $(PERF_ENABLE) $(MAKE) -C vlsim
fpga: $(SRCS) $(SCOPE_H)
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
asesim: $(SRCS) $(ASE_DIR) $(SCOPE_H)
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE)
vlsim: $(SRCS) vlsim-hw
$(CXX) $(CXXFLAGS) -DUSE_VLSIM $(SRCS) $(LDFLAGS) -L./vlsim $(VLSIM_LIBS) -o $(PROJECT_VLSIM)
vortex.o: vortex.cpp
$(CXX) $(CXXFLAGS) -c vortex.cpp -o $@
$(ASE_DIR):
mkdir -p ase
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $(SRCS) > .depend;
clean-fpga:
rm -rf $(PROJECT) *.o .depend
clean-asesim:
rm -rf $(PROJECT_ASE) *.o .depend
clean-vlsim:
$(MAKE) -C vlsim clean
clean: clean-fpga clean-asesim clean-vlsim
ifneq ($(MAKECMDGOALS),clean)
-include .depend
endif

View file

@ -1 +0,0 @@
/obj_dir/*

View file

@ -1,98 +0,0 @@
CFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CFLAGS += -DUSE_VLSIM -fPIC -Wno-maybe-uninitialized
CFLAGS += -I../../../../hw
# control RTL debug print states
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
CFLAGS += $(CONFIGS)
CFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared -pthread
# LDFLAGS += -dynamiclib -pthread
TOP = vortex_afu_shim
RTL_DIR=../../../hw/rtl
DPI_DIR=../../../hw/dpi
SRCS = fpga.cpp opae_sim.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
RTL_INCLUDE += -I$(RTL_DIR)/afu -I$(RTL_DIR)/afu/ccip
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += verilator.vlt
VL_FLAGS += $(CONFIGS)
# Enable Verilator multithreaded simulation
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += -DVCD_OUTPUT --trace --trace-structs $(DBG_FLAGS)
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CFLAGS += -DNDEBUG
endif
# Enable scope analyzer
ifdef SCOPE
VL_FLAGS += -DSCOPE
CFLAGS += -DSCOPE
endif
# Enable perf counters
ifdef PERF
VL_FLAGS += -DPERF_ENABLE
CFLAGS += -DPERF_ENABLE
endif
# use our OPAE shim
VL_FLAGS += -DNOPAE
CFLAGS += -DNOPAE
# ALU backend
VL_FLAGS += -DIMUL_DPI
VL_FLAGS += -DIDIV_DPI
# FPU backend
FPU_CORE ?= FPU_DPI
VL_FLAGS += -D$(FPU_CORE)
PROJECT = libopae-c-vlsim.so
all: $(PROJECT)
vortex_afu.h : $(RTL_DIR)/afu/vortex_afu.vh
../../../hw/scripts/gen_config.py -i $(RTL_DIR)/afu/vortex_afu.vh -o vortex_afu.h
$(PROJECT): $(SRCS) vortex_afu.h
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
make -j -C obj_dir -f V$(TOP).mk
clean:
rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh vortex_afu.h

View file

@ -1,84 +0,0 @@
#include <stdint.h>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <cstdlib>
#include <unistd.h>
#include <assert.h>
#include "fpga.h"
#include "opae_sim.h"
#include <VX_config.h>
extern fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags) {
if (NULL == handle || flags != 0)
return FPGA_INVALID_PARAM;
auto sim = new opae_sim();
*handle = reinterpret_cast<fpga_handle>(sim);
return FPGA_OK;
}
extern fpga_result fpgaClose(fpga_handle handle) {
if (NULL == handle)
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);
delete sim;
return FPGA_OK;
}
extern fpga_result fpgaPrepareBuffer(fpga_handle handle, uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
if (NULL == handle || len == 0 || buf_addr == NULL || wsid == NULL)
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);
int ret = sim->prepare_buffer(len, buf_addr, wsid, flags);
if (ret != 0)
return FPGA_NO_MEMORY;
return FPGA_OK;
}
extern fpga_result fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid) {
if (NULL == handle)
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);
sim->release_buffer(wsid);
return FPGA_OK;
}
extern fpga_result fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr) {
if (NULL == handle || ioaddr == NULL)
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);
sim->get_io_address(wsid, ioaddr);
return FPGA_OK;
}
extern fpga_result fpgaWriteMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t value) {
if (NULL == handle || mmio_num != 0)
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);
sim->write_mmio64(mmio_num, offset, value);
return FPGA_OK;
}
extern fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value) {
if (NULL == handle || mmio_num != 0 || value == NULL)
return FPGA_INVALID_PARAM;
auto sim = reinterpret_cast<opae_sim*>(handle);
sim->read_mmio64(mmio_num, offset, value);
return FPGA_OK;
}
extern const char *fpgaErrStr(fpga_result e) {
return "";
}

View file

@ -1,372 +0,0 @@
#include "opae_sim.h"
#include <iostream>
#include <fstream>
#include <iomanip>
#define CCI_LATENCY 8
#define CCI_RAND_MOD 8
#define CCI_RQ_SIZE 16
#define CCI_WQ_SIZE 16
#define ENABLE_MEM_STALLS
#ifndef MEM_LATENCY
#define MEM_LATENCY 24
#endif
#ifndef MEM_RQ_SIZE
#define MEM_RQ_SIZE 16
#endif
#ifndef MEM_STALLS_MODULO
#define MEM_STALLS_MODULO 16
#endif
#ifndef VERILATOR_RESET_VALUE
#define VERILATOR_RESET_VALUE 2
#endif
uint64_t timestamp = 0;
double sc_time_stamp() {
return timestamp;
}
static void *__aligned_malloc(size_t alignment, size_t size) {
// reserve margin for alignment and storing of unaligned address
size_t margin = (alignment-1) + sizeof(void*);
void *unaligned_addr = malloc(size + margin);
void **aligned_addr = (void**)((uintptr_t)(((uint8_t*)unaligned_addr) + margin) & ~(alignment-1));
aligned_addr[-1] = unaligned_addr;
return aligned_addr;
}
static void __aligned_free(void *ptr) {
// retreive the stored unaligned address and use it to free the allocation
void* unaligned_addr = ((void**)ptr)[-1];
free(unaligned_addr);
}
///////////////////////////////////////////////////////////////////////////////
opae_sim::opae_sim()
: stop_(false)
, host_buffer_ids_(0)
{
// force random values for unitialized signals
Verilated::randReset(VERILATOR_RESET_VALUE);
Verilated::randSeed(50);
// Turn off assertion before reset
Verilated::assertOn(false);
vortex_afu_ = new Vvortex_afu_shim();
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
trace_ = new VerilatedVcdC();
vortex_afu_->trace(trace_, 99);
trace_->open("trace.vcd");
#endif
// reset the device
this->reset();
// launch execution thread
future_ = std::async(std::launch::async, [&]{
while (!stop_) {
std::lock_guard<std::mutex> guard(mutex_);
this->step();
}
});
}
opae_sim::~opae_sim() {
stop_ = true;
if (future_.valid()) {
future_.wait();
}
#ifdef VCD_OUTPUT
trace_->close();
#endif
for (auto& buffer : host_buffers_) {
__aligned_free(buffer.second.data);
}
delete vortex_afu_;
}
int opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
auto alloc = __aligned_malloc(CACHE_BLOCK_SIZE, len);
if (alloc == NULL)
return -1;
host_buffer_t buffer;
buffer.data = (uint64_t*)alloc;
buffer.size = len;
buffer.ioaddr = uintptr_t(alloc);
auto buffer_id = host_buffer_ids_++;
host_buffers_.emplace(buffer_id, buffer);
*buf_addr = alloc;
*wsid = buffer_id;
return 0;
}
void opae_sim::release_buffer(uint64_t wsid) {
auto it = host_buffers_.find(wsid);
if (it != host_buffers_.end()) {
__aligned_free(it->second.data);
host_buffers_.erase(it);
}
}
void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) {
*ioaddr = host_buffers_[wsid].ioaddr;
}
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
std::lock_guard<std::mutex> guard(mutex_);
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
this->step();
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0;
assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid);
*value = vortex_afu_->af2cp_sTxPort_c2_data;
}
void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) {
std::lock_guard<std::mutex> guard(mutex_);
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, &value, 8);
this->step();
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0;
}
///////////////////////////////////////////////////////////////////////////////
void opae_sim::reset() {
cci_reads_.clear();
cci_writes_.clear();
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0;
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0;
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = 0;
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = 0;
for (int b = 0; b < MEMORY_BANKS; ++b) {
mem_reads_[b].clear();
vortex_afu_->avs_readdatavalid[b] = 0;
vortex_afu_->avs_waitrequest[b] = 0;
}
vortex_afu_->reset = 1;
for (int i = 0; i < RESET_DELAY; ++i) {
vortex_afu_->clk = 0;
this->eval();
vortex_afu_->clk = 1;
this->eval();
}
vortex_afu_->reset = 0;
// Turn on assertion after reset
Verilated::assertOn(true);
}
void opae_sim::step() {
this->sRxPort_bus();
this->sTxPort_bus();
this->avs_bus();
vortex_afu_->clk = 0;
this->eval();
vortex_afu_->clk = 1;
this->eval();
#ifndef NDEBUG
fflush(stdout);
#endif
}
void opae_sim::eval() {
vortex_afu_->eval();
#ifdef VCD_OUTPUT
trace_->dump(timestamp);
#endif
++timestamp;
}
void opae_sim::sRxPort_bus() {
// check mmio request
bool mmio_req_enabled = vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid
|| vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid;
// schedule CCI read responses
std::list<cci_rd_req_t>::iterator cci_rd_it(cci_reads_.end());
for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) {
if (it->cycles_left > 0)
it->cycles_left -= 1;
if ((cci_rd_it == ie) && (it->cycles_left == 0)) {
cci_rd_it = it;
}
}
// schedule CCI write responses
std::list<cci_wr_req_t>::iterator cci_wr_it(cci_writes_.end());
for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) {
if (it->cycles_left > 0)
it->cycles_left -= 1;
if ((cci_wr_it == ie) && (it->cycles_left == 0)) {
cci_wr_it = it;
}
}
// send CCI write response
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
if (cci_wr_it != cci_writes_.end()) {
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 1;
vortex_afu_->vcp2af_sRxPort_c1_hdr_resp_type = 0;
vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_wr_it->mdata;
cci_writes_.erase(cci_wr_it);
}
// send CCI read response (ensure mmio disabled)
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
if (!mmio_req_enabled
&& (cci_rd_it != cci_reads_.end())) {
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1;
vortex_afu_->vcp2af_sRxPort_c0_hdr_resp_type = 0;
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE);
vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
/*printf("%0ld: [sim] CCI Rd Rsp: addr=%ld, mdata=%d, data=", timestamp, cci_rd_it->addr, cci_rd_it->mdata);
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i)
printf("%02x", cci_rd_it->data[CACHE_BLOCK_SIZE-1-i]);
printf("\n");*/
cci_reads_.erase(cci_rd_it);
}
}
void opae_sim::sTxPort_bus() {
// process read requests
if (vortex_afu_->af2cp_sTxPort_c0_valid) {
assert(!vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull);
cci_rd_req_t cci_req;
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
cci_req.addr = vortex_afu_->af2cp_sTxPort_c0_hdr_address;
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata;
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
memcpy(cci_req.data.data(), host_ptr, CACHE_BLOCK_SIZE);
//printf("%0ld: [sim] CCI Rd Req: addr=%ld, mdata=%d\n", timestamp, vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
cci_reads_.emplace_back(cci_req);
}
// process write requests
if (vortex_afu_->af2cp_sTxPort_c1_valid) {
assert(!vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull);
cci_wr_req_t cci_req;
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c1_hdr_mdata;
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c1_hdr_address * CACHE_BLOCK_SIZE);
memcpy(host_ptr, vortex_afu_->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE);
cci_writes_.emplace_back(cci_req);
}
// check queues overflow
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= (CCI_RQ_SIZE-1));
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1));
}
void opae_sim::avs_bus() {
for (int b = 0; b < MEMORY_BANKS; ++b) {
// update memory responses schedule
for (auto& rsp : mem_reads_[b]) {
if (rsp.cycles_left > 0)
rsp.cycles_left -= 1;
}
// schedule memory responses in FIFO order
std::list<mem_rd_req_t>::iterator mem_rd_it(mem_reads_[b].end());
if (!mem_reads_[b].empty()
&& (0 == mem_reads_[b].begin()->cycles_left)) {
mem_rd_it = mem_reads_[b].begin();
}
// send memory response
vortex_afu_->avs_readdatavalid[b] = 0;
if (mem_rd_it != mem_reads_[b].end()) {
vortex_afu_->avs_readdatavalid[b] = 1;
memcpy(vortex_afu_->avs_readdata[b], mem_rd_it->data.data(), MEM_BLOCK_SIZE);
uint32_t addr = mem_rd_it->addr;
mem_reads_[b].erase(mem_rd_it);
/*printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%x, pending={", timestamp, b, addr * MEM_BLOCK_SIZE);
for (auto& req : mem_reads_[b]) {
if (req.cycles_left != 0)
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
else
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
}
printf("}\n");*/
}
// handle memory stalls
bool mem_stalled = false;
#ifdef ENABLE_MEM_STALLS
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
mem_stalled = true;
} else
if (mem_reads_[b].size() >= MEM_RQ_SIZE) {
mem_stalled = true;
}
#endif
// process memory requests
if (!mem_stalled) {
assert(!vortex_afu_->avs_read[b] || !vortex_afu_->avs_write[b]);
if (vortex_afu_->avs_write[b]) {
uint64_t byteen = vortex_afu_->avs_byteenable[b];
unsigned base_addr = vortex_afu_->avs_address[b] * MEM_BLOCK_SIZE;
uint8_t* data = (uint8_t*)(vortex_afu_->avs_writedata[b]);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
ram_[base_addr + i] = data[i];
}
}
/*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=%x, data=", timestamp, b, base_addr);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
printf("%0x", data[(MEM_BLOCK_SIZE-1)-i]);
}
printf("\n");*/
}
if (vortex_afu_->avs_read[b]) {
mem_rd_req_t mem_req;
mem_req.addr = vortex_afu_->avs_address[b];
ram_.read(vortex_afu_->avs_address[b] * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.data.data());
mem_req.cycles_left = MEM_LATENCY;
for (auto& rsp : mem_reads_[b]) {
if (mem_req.addr == rsp.addr) {
mem_req.cycles_left = rsp.cycles_left;
break;
}
}
mem_reads_[b].emplace_back(mem_req);
/*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=%x, pending={", timestamp, b, mem_req.addr * MEM_BLOCK_SIZE);
for (auto& req : mem_reads_[b]) {
if (req.cycles_left != 0)
printf(" !%0x", req.addr * MEM_BLOCK_SIZE);
else
printf(" %0x", req.addr * MEM_BLOCK_SIZE);
}
printf("}\n");*/
}
}
vortex_afu_->avs_waitrequest[b] = mem_stalled;
}
}

View file

@ -1,105 +0,0 @@
#pragma once
#include "verilated.h"
//#include "verilated_stub.h"
#include "Vvortex_afu_shim.h"
#include "Vvortex_afu_shim__Syms.h"
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
#endif
#include <VX_config.h>
#include "vortex_afu.h"
#include "ram.h"
#include <ostream>
#include <future>
#include <list>
#include <unordered_map>
#ifndef MEMORY_BANKS
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#else
#define MEMORY_BANKS 2
#endif
#endif
#undef MEM_BLOCK_SIZE
#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8)
#define CACHE_BLOCK_SIZE 64
class opae_sim {
public:
opae_sim();
virtual ~opae_sim();
int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
void release_buffer(uint64_t wsid);
void get_io_address(uint64_t wsid, uint64_t *ioaddr);
void write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value);
void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value);
private:
typedef struct {
int cycles_left;
std::array<uint8_t, MEM_BLOCK_SIZE> data;
uint32_t addr;
} mem_rd_req_t;
typedef struct {
int cycles_left;
std::array<uint8_t, CACHE_BLOCK_SIZE> data;
uint64_t addr;
uint32_t mdata;
} cci_rd_req_t;
typedef struct {
int cycles_left;
uint32_t mdata;
} cci_wr_req_t;
typedef struct {
uint64_t* data;
size_t size;
uint64_t ioaddr;
} host_buffer_t;
void reset();
void eval();
void step();
void sRxPort_bus();
void sTxPort_bus();
void avs_bus();
std::future<void> future_;
bool stop_;
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
int64_t host_buffer_ids_;
std::list<mem_rd_req_t> mem_reads_ [MEMORY_BANKS];
std::list<cci_rd_req_t> cci_reads_;
std::list<cci_wr_req_t> cci_writes_;
std::mutex mutex_;
RAM ram_;
Vvortex_afu_shim *vortex_afu_;
#ifdef VCD_OUTPUT
VerilatedVcdC *trace_;
#endif
};

View file

@ -1,64 +0,0 @@
#pragma once
#include <stdio.h>
#include <stdint.h>
class RAM {
private:
mutable uint8_t *mem_[(1 << 12)];
uint8_t *get(uint32_t address) const {
uint32_t block_addr = address >> 20;
uint32_t block_offset = address & 0x000FFFFF;
if (mem_[block_addr] == NULL) {
mem_[block_addr] = new uint8_t[(1 << 20)];
}
return mem_[block_addr] + block_offset;
}
public:
RAM() {
for (uint32_t i = 0; i < (1 << 12); i++) {
mem_[i] = NULL;
}
}
~RAM() {
this->clear();
}
size_t size() const {
return (1ull << 32);
}
void clear() {
for (uint32_t i = 0; i < (1 << 12); i++) {
if (mem_[i]) {
delete [] mem_[i];
mem_[i] = NULL;
}
}
}
void read(uint32_t address, uint32_t length, uint8_t *data) const {
for (unsigned i = 0; i < length; i++) {
data[i] = *this->get(address + i);
}
}
void write(uint32_t address, uint32_t length, const uint8_t *data) {
for (unsigned i = 0; i < length; i++) {
*this->get(address + i) = data[i];
}
}
uint8_t& operator[](uint32_t address) {
return *get(address);
}
const uint8_t& operator[](uint32_t address) const {
return *get(address);
}
};

View file

@ -1,10 +0,0 @@
`verilator_config
lint_off -rule BLKANDNBLK -file "../../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule UNOPTFLAT -file "../../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule WIDTH -file "../../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule UNUSED -file "../../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule LITENDIAN -file "../../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule IMPORTSTAR -file "../../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule PINCONNECTEMPTY -file "../../../hw/rtl/fp_cores/fpnew/*"
lint_off -file "../rtl/fp_cores/fpnew/*"

View file

@ -1,495 +0,0 @@
#include <stdint.h>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <cstdlib>
#include <unistd.h>
#include <assert.h>
#include <cmath>
#include <sstream>
#include <unordered_map>
#if defined(USE_FPGA) || defined(USE_ASE)
#include <opae/fpga.h>
#include <uuid/uuid.h>
#elif defined(USE_VLSIM)
#include "vlsim/fpga.h"
#endif
#include <vortex.h>
#include <VX_config.h>
#include "vortex_afu.h"
#ifdef SCOPE
#include "vx_scope.h"
#endif
#define CHECK_RES(_expr) \
do { \
fpga_result res = _expr; \
if (res == FPGA_OK) \
break; \
printf("[VXDRV] Error: '%s' returned %d, %s!\n", \
#_expr, (int)res, fpgaErrStr(res)); \
return -1; \
} while (false)
///////////////////////////////////////////////////////////////////////////////
#define CMD_MEM_READ AFU_IMAGE_CMD_MEM_READ
#define CMD_MEM_WRITE AFU_IMAGE_CMD_MEM_WRITE
#define CMD_RUN AFU_IMAGE_CMD_RUN
#define MMIO_CMD_TYPE (AFU_IMAGE_MMIO_CMD_TYPE * 4)
#define MMIO_IO_ADDR (AFU_IMAGE_MMIO_IO_ADDR * 4)
#define MMIO_MEM_ADDR (AFU_IMAGE_MMIO_MEM_ADDR * 4)
#define MMIO_DATA_SIZE (AFU_IMAGE_MMIO_DATA_SIZE * 4)
#define MMIO_DEV_CAPS (AFU_IMAGE_MMIO_DEV_CAPS * 4)
#define MMIO_STATUS (AFU_IMAGE_MMIO_STATUS * 4)
///////////////////////////////////////////////////////////////////////////////
typedef struct vx_device_ {
fpga_handle fpga;
size_t mem_allocation;
unsigned version;
unsigned num_cores;
unsigned num_warps;
unsigned num_threads;
} vx_device_t;
typedef struct vx_buffer_ {
uint64_t wsid;
void* host_ptr;
uint64_t io_addr;
vx_device_h hdevice;
size_t size;
} vx_buffer_t;
inline size_t align_size(size_t size, size_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return (size + alignment - 1) & ~(alignment - 1);
}
inline bool is_aligned(size_t addr, size_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return 0 == (addr & (alignment - 1));
}
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
if (nullptr == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
switch (caps_id) {
case VX_CAPS_VERSION:
*value = device->version;
break;
case VX_CAPS_MAX_CORES:
*value = device->num_cores;
break;
case VX_CAPS_MAX_WARPS:
*value = device->num_warps;
break;
case VX_CAPS_MAX_THREADS:
*value = device->num_threads;
break;
case VX_CAPS_CACHE_LINE_SIZE:
*value = CACHE_BLOCK_SIZE;
break;
case VX_CAPS_LOCAL_MEM_SIZE:
*value = LOCAL_MEM_SIZE;
break;
case VX_CAPS_ALLOC_BASE_ADDR:
*value = ALLOC_BASE_ADDR;
break;
case VX_CAPS_KERNEL_BASE_ADDR:
*value = STARTUP_ADDR;
break;
default:
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
std::abort();
return -1;
}
return 0;
}
extern int vx_dev_open(vx_device_h* hdevice) {
if (nullptr == hdevice)
return -1;
fpga_handle accel_handle;
vx_device_t* device;
#ifndef USE_VLSIM
fpga_result res;
fpga_token accel_token;
fpga_properties filter = nullptr;
fpga_guid guid;
uint32_t num_matches;
// Set up a filter that will search for an accelerator
CHECK_RES(fpgaGetProperties(nullptr, &filter));
res = fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR);
if (res != FPGA_OK) {
fprintf(stderr, "[VXDRV] Error: fpgaGetProperties() returned %d, %s!\n", (int)res, fpgaErrStr(res));
fpgaDestroyProperties(&filter);
return -1;
}
// Add the desired UUID to the filter
uuid_parse(AFU_ACCEL_UUID, guid);
res = fpgaPropertiesSetGUID(filter, guid);
if (res != FPGA_OK) {
fprintf(stderr, "[VXDRV] Error: fpgaPropertiesSetGUID() returned %d, %s!\n", (int)res, fpgaErrStr(res));
fpgaDestroyProperties(&filter);
return -1;
}
// Do the search across the available FPGA contexts
num_matches = 1;
res = fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches);
if (res != FPGA_OK) {
fprintf(stderr, "[VXDRV] Error: fpgaEnumerate() returned %d, %s!\n", (int)res, fpgaErrStr(res));
fpgaDestroyProperties(&filter);
return -1;
}
// Not needed anymore
fpgaDestroyProperties(&filter);
if (num_matches < 1) {
fprintf(stderr, "[VXDRV] Error: accelerator %s not found!\n", AFU_ACCEL_UUID);
fpgaDestroyToken(&accel_token);
return -1;
}
// Open accelerator
res = fpgaOpen(accel_token, &accel_handle, 0);
if (res != FPGA_OK) {
fprintf(stderr, "[VXDRV] Error: fpgaOpen() returned %d, %s!\n", (int)res, fpgaErrStr(res));
fpgaDestroyToken(&accel_token);
return -1;
}
// Done with token
fpgaDestroyToken(&accel_token);
#else
// Open accelerator
CHECK_RES(fpgaOpen(NULL, &accel_handle, 0));
#endif
// allocate device object
device = (vx_device_t*)malloc(sizeof(vx_device_t));
if (nullptr == device) {
fpgaClose(accel_handle);
return -1;
}
device->fpga = accel_handle;
device->mem_allocation = ALLOC_BASE_ADDR;
{
// Load device CAPS
uint64_t dev_caps;
int ret = fpgaReadMMIO64(device->fpga, 0, MMIO_DEV_CAPS, &dev_caps);
if (ret != FPGA_OK) {
fpgaClose(accel_handle);
return ret;
}
device->version = (dev_caps >> 0) & 0xffff;
device->num_cores = (dev_caps >> 16) & 0xffff;
device->num_warps = (dev_caps >> 32) & 0xffff;
device->num_threads = (dev_caps >> 48) & 0xffff;
#ifndef NDEBUG
fprintf(stdout, "[VXDRV] DEVCAPS: version=%d, num_cores=%d, num_warps=%d, num_threads=%d\n",
device->version, device->num_cores, device->num_warps, device->num_threads);
#endif
}
#ifdef SCOPE
{
int ret = vx_scope_start(accel_handle, 0, -1);
if (ret != 0) {
fpgaClose(accel_handle);
return ret;
}
}
#endif
*hdevice = device;
return 0;
}
extern int vx_dev_close(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
#ifdef SCOPE
vx_scope_stop(device->fpga);
#endif
#ifdef DUMP_PERF_STATS
vx_dump_perf(device, stdout);
#endif
fpgaClose(device->fpga);
return 0;
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
size_t dev_mem_size = LOCAL_MEM_SIZE;
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
if (device->mem_allocation + asize > dev_mem_size)
return -1;
*dev_maddr = device->mem_allocation;
device->mem_allocation += asize;
return 0;
}
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
fpga_result res;
void* host_ptr;
uint64_t wsid;
uint64_t io_addr;
vx_buffer_t* buffer;
if (nullptr == hdevice
|| 0 >= size
|| nullptr == hbuffer)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
res = fpgaPrepareBuffer(device->fpga, asize, &host_ptr, &wsid, 0);
if (FPGA_OK != res) {
return -1;
}
// Get the physical address of the buffer in the accelerator
res = fpgaGetIOAddress(device->fpga, wsid, &io_addr);
if (FPGA_OK != res) {
fpgaReleaseBuffer(device->fpga, wsid);
return -1;
}
// allocate buffer object
buffer = (vx_buffer_t*)malloc(sizeof(vx_buffer_t));
if (nullptr == buffer) {
fpgaReleaseBuffer(device->fpga, wsid);
return -1;
}
buffer->wsid = wsid;
buffer->host_ptr = host_ptr;
buffer->io_addr = io_addr;
buffer->hdevice = hdevice;
buffer->size = asize;
*hbuffer = buffer;
return 0;
}
extern void* vx_host_ptr(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return nullptr;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
return buffer->host_ptr;
}
extern int vx_buf_release(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return -1;
vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
fpgaReleaseBuffer(device->fpga, buffer->wsid);
free(buffer);
return 0;
}
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
if (nullptr == hdevice)
return -1;
std::unordered_map<int, std::stringstream> print_bufs;
vx_device_t *device = ((vx_device_t*)hdevice);
struct timespec sleep_time;
#if defined(USE_ASE)
sleep_time.tv_sec = 1;
sleep_time.tv_nsec = 0;
#else
sleep_time.tv_sec = 0;
sleep_time.tv_nsec = 1000000;
#endif
// to milliseconds
long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000);
for (;;) {
uint64_t status;
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &status));
uint16_t cout_data = (status >> 8) & 0xffff;
if (cout_data & 0x0001) {
do {
char cout_char = (cout_data >> 1) & 0xff;
int cout_tid = (cout_data >> 9) & 0xff;
auto& ss_buf = print_bufs[cout_tid];
ss_buf << cout_char;
if (cout_char == '\n') {
std::cout << std::dec << "#" << cout_tid << ": " << ss_buf.str() << std::flush;
ss_buf.str("");
}
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &status));
cout_data = (status >> 8) & 0xffff;
} while (cout_data & 0x0001);
}
uint8_t state = status & 0xff;
if (0 == state || 0 == timeout) {
for (auto& buf : print_bufs) {
auto str = buf.second.str();
if (!str.empty()) {
std::cout << "#" << buf.first << ": " << str << std::endl;
}
}
if (state != 0) {
fprintf(stdout, "[VXDRV] ready-wait timed out: state=%d\n", state);
}
break;
}
nanosleep(&sleep_time, nullptr);
timeout -= sleep_time_ms;
};
return 0;
}
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
size_t dev_mem_size = LOCAL_MEM_SIZE;
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
// check alignment
if (!is_aligned(dev_maddr, CACHE_BLOCK_SIZE))
return -1;
if (!is_aligned(buffer->io_addr + src_offset, CACHE_BLOCK_SIZE))
return -1;
// bound checking
if (src_offset + asize > buffer->size)
return -1;
if (dev_maddr + asize > dev_mem_size)
return -1;
// Ensure ready for new command
if (vx_ready_wait(buffer->hdevice, -1) != 0)
return -1;
auto ls_shift = (int)std::log2(CACHE_BLOCK_SIZE);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_IO_ADDR, (buffer->io_addr + src_offset) >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_MEM_ADDR, dev_maddr >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_DATA_SIZE, asize >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_WRITE));
// Wait for the write operation to finish
if (vx_ready_wait(buffer->hdevice, -1) != 0)
return -1;
return 0;
}
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
vx_buffer_t *buffer = ((vx_buffer_t*)hbuffer);
vx_device_t *device = ((vx_device_t*)buffer->hdevice);
size_t dev_mem_size = LOCAL_MEM_SIZE;
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
// check alignment
if (!is_aligned(dev_maddr, CACHE_BLOCK_SIZE))
return -1;
if (!is_aligned(buffer->io_addr + dest_offset, CACHE_BLOCK_SIZE))
return -1;
// bound checking
if (dest_offset + asize > buffer->size)
return -1;
if (dev_maddr + asize > dev_mem_size)
return -1;
// Ensure ready for new command
if (vx_ready_wait(buffer->hdevice, -1) != 0)
return -1;
auto ls_shift = (int)std::log2(CACHE_BLOCK_SIZE);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_IO_ADDR, (buffer->io_addr + dest_offset) >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_MEM_ADDR, dev_maddr >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_DATA_SIZE, asize >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_MEM_READ));
// Wait for the write operation to finish
if (vx_ready_wait(buffer->hdevice, -1) != 0)
return -1;
return 0;
}
extern int vx_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device_t *device = ((vx_device_t*)hdevice);
// Ensure ready for new command
if (vx_ready_wait(hdevice, -1) != 0)
return -1;
// start execution
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_RUN));
return 0;
}

View file

@ -1,24 +0,0 @@
//
// Generated by afu_json_mgr from ../../hw/opae/vortex_afu.json
//
#ifndef __AFU_JSON_INFO__
#define __AFU_JSON_INFO__
#define AFU_ACCEL_NAME "vortex_afu"
#define AFU_ACCEL_UUID "35F9452B-25C2-434C-93D5-6F8C60DB361C"
#define AFU_IMAGE_CMD_MEM_READ 1
#define AFU_IMAGE_CMD_MEM_WRITE 2
#define AFU_IMAGE_CMD_RUN 3
#define AFU_IMAGE_MMIO_CMD_TYPE 10
#define AFU_IMAGE_MMIO_DATA_SIZE 16
#define AFU_IMAGE_MMIO_IO_ADDR 12
#define AFU_IMAGE_MMIO_MEM_ADDR 14
#define AFU_IMAGE_MMIO_SCOPE_READ 20
#define AFU_IMAGE_MMIO_SCOPE_WRITE 22
#define AFU_IMAGE_MMIO_DEV_CAPS 24
#define AFU_IMAGE_MMIO_STATUS 18
#define AFU_IMAGE_POWER 0
#define AFU_TOP_IFC "ccip_std_afu_avalon_mm"
#endif // __AFU_JSON_INFO__

View file

@ -1,257 +0,0 @@
#include <iostream>
#include <fstream>
#include <thread>
#include <chrono>
#include <vector>
#include <assert.h>
#include <chrono>
#include <thread>
#include <mutex>
#ifdef USE_VLSIM
#include "vlsim/fpga.h"
#else
#include <opae/fpga.h>
#endif
#include <VX_config.h>
#include "vx_scope.h"
#include "vortex_afu.h"
#include "scope-defs.h"
#define FRAME_FLUSH_SIZE 100
#define CHECK_RES(_expr) \
do { \
fpga_result res = _expr; \
if (res == FPGA_OK) \
break; \
printf("OPAE Error: '%s' returned %d, %s!\n", \
#_expr, (int)res, fpgaErrStr(res)); \
return -1; \
} while (false)
#define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4)
#define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4)
#define CMD_GET_VALID 0
#define CMD_GET_DATA 1
#define CMD_GET_WIDTH 2
#define CMD_GET_COUNT 3
#define CMD_SET_START 4
#define CMD_SET_STOP 5
#define CMD_GET_OFFSET 6
static constexpr int num_modules = sizeof(scope_modules) / sizeof(scope_module_t);
static constexpr int num_taps = sizeof(scope_taps) / sizeof(scope_tap_t);
constexpr int calcFrameWidth(int index = 0) {
return (index < num_taps) ? (scope_taps[index].width + calcFrameWidth(index + 1)) : 0;
}
static constexpr int fwidth = calcFrameWidth();
#ifdef HANG_TIMEOUT
static std::thread g_timeout_thread;
static std::mutex g_timeout_mutex;
static void timeout_callback(fpga_handle fpga) {
std::this_thread::sleep_for(std::chrono::seconds{HANG_TIMEOUT});
vx_scope_stop(fpga);
fpgaClose(fpga);
exit(0);
}
#endif
uint64_t print_clock(std::ofstream& ofs, uint64_t delta, uint64_t timestamp) {
while (delta != 0) {
ofs << '#' << timestamp++ << std::endl;
ofs << "b0 0" << std::endl;
ofs << '#' << timestamp++ << std::endl;
ofs << "b1 0" << std::endl;
--delta;
}
return timestamp;
}
void dump_taps(std::ofstream& ofs, int module) {
for (int i = 0; i < num_taps; ++i) {
auto& tap = scope_taps[i];
if (tap.module != module)
continue;
ofs << "$var reg " << tap.width << " " << (i + 1) << " " << tap.name << " $end" << std::endl;
}
}
void dump_module(std::ofstream& ofs, int parent) {
for (auto& module : scope_modules) {
if (module.parent != parent)
continue;
if (module.name[0] == '*') {
ofs << "$var reg 1 0 clk $end" << std::endl;
} else {
ofs << "$scope module " << module.name << " $end" << std::endl;
}
dump_module(ofs, module.index);
dump_taps(ofs, module.index);
if (module.name[0] != '*') {
ofs << "$upscope $end" << std::endl;
}
}
}
int vx_scope_start(fpga_handle hfpga, uint64_t start_time, uint64_t stop_time) {
if (nullptr == hfpga)
return -1;
if (stop_time != uint64_t(-1)) {
// set stop time
uint64_t cmd_stop = ((stop_time << 3) | CMD_SET_STOP);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_stop));
std::cout << "scope stop time: " << std::dec << stop_time << "s" << std::endl;
}
// start recording
uint64_t cmd_delay = ((start_time << 3) | CMD_SET_START);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_delay));
std::cout << "scope start time: " << std::dec << start_time << "s" << std::endl;
#ifdef HANG_TIMEOUT
g_timeout_thread = std::thread(timeout_callback, hfpga);
g_timeout_thread.detach();
#endif
return 0;
}
int vx_scope_stop(fpga_handle hfpga) {
#ifdef HANG_TIMEOUT
if (!g_timeout_mutex.try_lock())
return 0;
#endif
if (nullptr == hfpga)
return -1;
// forced stop
uint64_t cmd_stop = ((0 << 3) | CMD_SET_STOP);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_stop));
std::cout << "scope trace dump begin..." << std::endl;
std::ofstream ofs("trace.vcd");
ofs << "$version Generated by Vortex Scope $end" << std::endl;
ofs << "$timescale 1 ns $end" << std::endl;
ofs << "$scope module TOP $end" << std::endl;
dump_module(ofs, -1);
dump_taps(ofs, -1);
ofs << "$upscope $end" << std::endl;
ofs << "enddefinitions $end" << std::endl;
uint64_t frame_width, max_frames, data_valid, offset, delta;
uint64_t timestamp = 0;
uint64_t frame_offset = 0;
uint64_t frame_no = 0;
int signal_id = 0;
int signal_offset = 0;
// wait for recording to terminate
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID));
do {
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
if (data_valid)
break;
std::this_thread::sleep_for(std::chrono::seconds(1));
} while (true);
// get frame width
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_WIDTH));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &frame_width));
std::cout << "scope::frame_width=" << std::dec << frame_width << std::endl;
if (fwidth != (int)frame_width) {
std::cerr << "invalid frame_width: expecting " << std::dec << fwidth << "!" << std::endl;
std::abort();
}
// get max frames
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_COUNT));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &max_frames));
std::cout << "scope::max_frames=" << std::dec << max_frames << std::endl;
// get offset
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_OFFSET));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &offset));
// get data
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_DATA));
// print clock header
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta));
timestamp = print_clock(ofs, offset + delta + 2, timestamp);
signal_id = num_taps;
std::vector<char> signal_data(frame_width+1);
do {
if (frame_no == (max_frames-1)) {
// verify last frame is valid
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
assert(data_valid == 1);
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_DATA));
}
// read next data words
uint64_t word;
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &word));
do {
int signal_width = scope_taps[signal_id-1].width;
int word_offset = frame_offset % 64;
signal_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0';
++signal_offset;
++frame_offset;
if (signal_offset == signal_width) {
signal_data[signal_width] = 0; // string null termination
ofs << 'b' << signal_data.data() << ' ' << signal_id << std::endl;
signal_offset = 0;
--signal_id;
}
if (frame_offset == frame_width) {
assert(0 == signal_offset);
frame_offset = 0;
++frame_no;
if (frame_no != max_frames) {
// print clock header
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &delta));
timestamp = print_clock(ofs, delta + 1, timestamp);
signal_id = num_taps;
if (0 == (frame_no % FRAME_FLUSH_SIZE)) {
ofs << std::flush;
std::cout << "*** " << frame_no << "/" << max_frames << " frames" << std::endl;
}
}
}
} while ((frame_offset % 64) != 0);
} while (frame_no != max_frames);
std::cout << "scope trace dump done! - " << (timestamp/2) << " cycles" << std::endl;
// verify data not valid
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID));
CHECK_RES(fpgaReadMMIO64(hfpga, 0, MMIO_SCOPE_READ, &data_valid));
assert(data_valid == 0);
return 0;
}

View file

@ -1,11 +0,0 @@
#pragma once
#if defined(USE_FPGA)
#define HANG_TIMEOUT 60
#else
#define HANG_TIMEOUT (30*60)
#endif
int vx_scope_start(fpga_handle hfpga, uint64_t start_time = 0, uint64_t stop_time = -1);
int vx_scope_stop(fpga_handle hfpga);

View file

@ -1,2 +0,0 @@
obj_dir
*.so

View file

@ -1,85 +0,0 @@
CFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CFLAGS += -DUSE_RTLSIM -fPIC -Wno-maybe-uninitialized
CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw
# control RTL debug print states
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
CFLAGS += $(CONFIGS)
CFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared -pthread
#LDFLAGS += -dynamiclib -pthread
TOP = Vortex
RTL_DIR = ../../hw/rtl
DPI_DIR = ../../hw/dpi
SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE)
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += verilator.vlt
VL_FLAGS += $(CONFIGS)
# Enable Verilator multithreaded simulation
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += -DVCD_OUTPUT --trace --trace-structs $(DBG_FLAGS)
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CFLAGS += -DNDEBUG
endif
# Enable perf counters
ifdef PERF
VL_FLAGS += -DPERF_ENABLE
CFLAGS += -DPERF_ENABLE
endif
# ALU backend
VL_FLAGS += -DIMUL_DPI
VL_FLAGS += -DIDIV_DPI
# FPU backend
FPU_CORE ?= FPU_DPI
VL_FLAGS += -D$(FPU_CORE)
PROJECT = libvortex.so
# PROJECT = libvortex.dylib
all: $(PROJECT)
$(PROJECT): $(SRCS)
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
make -j -C obj_dir -f V$(TOP).mk
clean:
rm -rf $(PROJECT) obj_dir

View file

@ -1,64 +0,0 @@
#pragma once
#include <stdio.h>
#include <stdint.h>
class RAM {
private:
mutable uint8_t *mem_[(1 << 12)];
uint8_t *get(uint32_t address) const {
uint32_t block_addr = address >> 20;
uint32_t block_offset = address & 0x000FFFFF;
if (mem_[block_addr] == NULL) {
mem_[block_addr] = new uint8_t[(1 << 20)];
}
return mem_[block_addr] + block_offset;
}
public:
RAM() {
for (uint32_t i = 0; i < (1 << 12); i++) {
mem_[i] = NULL;
}
}
~RAM() {
this->clear();
}
size_t size() const {
return (1ull << 32);
}
void clear() {
for (uint32_t i = 0; i < (1 << 12); i++) {
if (mem_[i]) {
delete [] mem_[i];
mem_[i] = NULL;
}
}
}
void read(uint32_t address, uint32_t length, uint8_t *data) const {
for (unsigned i = 0; i < length; i++) {
data[i] = *this->get(address + i);
}
}
void write(uint32_t address, uint32_t length, const uint8_t *data) {
for (unsigned i = 0; i < length; i++) {
*this->get(address + i) = data[i];
}
}
uint8_t& operator[](uint32_t address) {
return *get(address);
}
const uint8_t& operator[](uint32_t address) const {
return *get(address);
}
};

View file

@ -1,10 +0,0 @@
`verilator_config
lint_off -rule BLKANDNBLK -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule UNOPTFLAT -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule WIDTH -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule UNUSED -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule LITENDIAN -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule IMPORTSTAR -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -rule PINCONNECTEMPTY -file "../../hw/rtl/fp_cores/fpnew/*"
lint_off -file "../rtl/fp_cores/fpnew/*"

View file

@ -1,311 +0,0 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <iostream>
#include <future>
#include <chrono>
#include <vortex.h>
#include <VX_config.h>
#include <ram.h>
#include <simulator.h>
///////////////////////////////////////////////////////////////////////////////
inline size_t align_size(size_t size, size_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return (size + alignment - 1) & ~(alignment - 1);
}
///////////////////////////////////////////////////////////////////////////////
class vx_device;
class vx_buffer {
public:
vx_buffer(size_t size, vx_device* device)
: size_(size)
, device_(device) {
auto aligned_asize = align_size(size, CACHE_BLOCK_SIZE);
data_ = malloc(aligned_asize);
}
~vx_buffer() {
if (data_) {
free(data_);
}
}
void* data() const {
return data_;
}
size_t size() const {
return size_;
}
vx_device* device() const {
return device_;
}
private:
size_t size_;
vx_device* device_;
void* data_;
};
///////////////////////////////////////////////////////////////////////////////
class vx_device {
public:
vx_device() {
mem_allocation_ = ALLOC_BASE_ADDR;
}
~vx_device() {
if (future_.valid()) {
future_.wait();
}
}
int alloc_local_mem(size_t size, size_t* dev_maddr) {
auto dev_mem_size = LOCAL_MEM_SIZE;
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
if (mem_allocation_ + asize > dev_mem_size)
return -1;
*dev_maddr = mem_allocation_;
mem_allocation_ += asize;
return 0;
}
int upload(const void* src, size_t dest_addr, size_t size, size_t src_offset) {
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
if (dest_addr + asize > ram_.size())
return -1;
/*printf("VXDRV: upload %d bytes from 0x%lx to 0x%lx", size, (uint8_t*)src + src_offset, dest_addr);
if (size <= 1024) {
printf(": ");
for (int i = asize-1; i >= 0; --i) {
printf("%x", *((uint8_t*)src + src_offset + i));
}
}
printf("\n");*/
ram_.write(dest_addr, asize, (const uint8_t*)src + src_offset);
return 0;
}
int download(void* dest, size_t src_addr, size_t size, size_t dest_offset) {
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
if (src_addr + asize > ram_.size())
return -1;
ram_.read(src_addr, asize, (uint8_t*)dest + dest_offset);
/*printf("VXDRV: download %d bytes from 0x%lx to 0x%lx", size, src_addr, (uint8_t*)dest + dest_offset);
if (size <= 1024) {
printf(": ");
for (int i = asize-1; i >= 0; --i) {
printf("%x", *((uint8_t*)dest + dest_offset + i));
}
}
printf("\n");*/
return 0;
}
int start() {
if (future_.valid()) {
future_.wait(); // ensure prior run completed
}
simulator_.attach_ram(&ram_);
future_ = std::async(std::launch::async, [&]{
simulator_.reset();
while (simulator_.is_busy()) {
simulator_.step();
}
});
return 0;
}
int wait(long long timeout) {
if (!future_.valid())
return 0;
auto timeout_sec = (timeout < 0) ? timeout : (timeout / 1000);
std::chrono::seconds wait_time(1);
for (;;) {
auto status = future_.wait_for(wait_time); // wait for 1 sec and check status
if (status == std::future_status::ready
|| 0 == timeout_sec--)
break;
}
return 0;
}
private:
size_t mem_allocation_;
RAM ram_;
Simulator simulator_;
std::future<void> future_;
};
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
if (nullptr == hdevice)
return -1;
switch (caps_id) {
case VX_CAPS_VERSION:
*value = IMPLEMENTATION_ID;
break;
case VX_CAPS_MAX_CORES:
*value = NUM_CORES * NUM_CLUSTERS;
break;
case VX_CAPS_MAX_WARPS:
*value = NUM_WARPS;
break;
case VX_CAPS_MAX_THREADS:
*value = NUM_THREADS;
break;
case VX_CAPS_CACHE_LINE_SIZE:
*value = CACHE_BLOCK_SIZE;
break;
case VX_CAPS_LOCAL_MEM_SIZE:
*value = 0xffffffff;
break;
case VX_CAPS_ALLOC_BASE_ADDR:
*value = 0x10000000;
break;
case VX_CAPS_KERNEL_BASE_ADDR:
*value = STARTUP_ADDR;
break;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort();
return -1;
}
return 0;
}
extern int vx_dev_open(vx_device_h* hdevice) {
if (nullptr == hdevice)
return -1;
*hdevice = new vx_device();
return 0;
}
extern int vx_dev_close(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
#ifdef DUMP_PERF_STATS
vx_dump_perf(device, stdout);
#endif
delete device;
return 0;
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->alloc_local_mem(size, dev_maddr);
}
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
if (nullptr == hdevice
|| 0 >= size
|| nullptr == hbuffer)
return -1;
vx_device *device = ((vx_device*)hdevice);
auto buffer = new vx_buffer(size, device);
if (nullptr == buffer->data()) {
delete buffer;
return -1;
}
*hbuffer = buffer;
return 0;
}
extern void* vx_host_ptr(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return nullptr;
vx_buffer* buffer = ((vx_buffer*)hbuffer);
return buffer->data();
}
extern int vx_buf_release(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return -1;
vx_buffer* buffer = ((vx_buffer*)hbuffer);
delete buffer;
return 0;
}
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + src_offset > buffer->size())
return -1;
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
}
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + dest_offset > buffer->size())
return -1;
return buffer->device()->download(buffer->data(), dev_maddr, size, dest_offset);
}
extern int vx_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->start();
}
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->wait(timeout);
}

View file

@ -1,2 +0,0 @@
obj_dir
libvortex.so

View file

@ -1,37 +0,0 @@
PROJECT = libvortex.so
#PROJECT = libvortex.dylib
SIMX_DIR = ../../simX
CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -DUSE_SIMX -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I../include -I../../hw -I$(SIMX_DIR)
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
CXXFLAGS += $(CONFIGS)
CXXFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared -pthread
#LDFLAGS += -dynamiclib -pthread
SRCS = vortex.cpp ../common/vx_utils.cpp
SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/pipeline.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp
# Debugigng
ifndef DEBUG
CXXFLAGS += -DNDEBUG
endif
all: $(PROJECT)
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
rm -rf $(PROJECT) *.o .depend

View file

@ -1,379 +0,0 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <iostream>
#include <thread>
#include <mutex>
#include <chrono>
#include <vortex.h>
#include <core.h>
#include <VX_config.h>
#define PAGE_SIZE 4096
///////////////////////////////////////////////////////////////////////////////
inline size_t align_size(size_t size, size_t alignment) {
assert(0 == (alignment & (alignment - 1)));
return (size + alignment - 1) & ~(alignment - 1);
}
///////////////////////////////////////////////////////////////////////////////
class vx_device;
class vx_buffer {
public:
vx_buffer(size_t size, vx_device* device)
: size_(size)
, device_(device) {
auto aligned_asize = align_size(size, CACHE_BLOCK_SIZE);
data_ = malloc(aligned_asize);
}
~vx_buffer() {
if (data_) {
free(data_);
}
}
void* data() const {
return data_;
}
size_t size() const {
return size_;
}
vx_device* device() const {
return device_;
}
private:
size_t size_;
vx_device* device_;
void* data_;
};
///////////////////////////////////////////////////////////////////////////////
class vx_device {
public:
vx_device()
: arch_("rv32i", NUM_CORES, NUM_WARPS, NUM_THREADS)
, decoder_(arch_)
, mmu_(PAGE_SIZE, arch_.wsize(), true)
, cores_(arch_.num_cores())
, is_done_(false)
, is_running_(false)
, thread_(__thread_proc__, this)
, ram_((1<<12), (1<<20)) {
mem_allocation_ = ALLOC_BASE_ADDR;
mmu_.attach(ram_, 0, 0xffffffff);
for (int i = 0; i < arch_.num_cores(); ++i) {
cores_[i] = std::make_shared<vortex::Core>(arch_, decoder_, mmu_, i);
}
}
~vx_device() {
mutex_.lock();
is_done_ = true;
mutex_.unlock();
thread_.join();
}
int alloc_local_mem(size_t size, size_t* dev_maddr) {
auto dev_mem_size = LOCAL_MEM_SIZE;
auto asize = align_size(size, CACHE_BLOCK_SIZE);
if (mem_allocation_ + asize > dev_mem_size)
return -1;
*dev_maddr = mem_allocation_;
mem_allocation_ += asize;
return 0;
}
int upload(const void* src, size_t dest_addr, size_t size, size_t src_offset) {
auto asize = align_size(size, CACHE_BLOCK_SIZE);
if (dest_addr + asize > ram_.size())
return -1;
ram_.write(dest_addr, (const uint8_t*)src + src_offset, asize);
/*printf("VXDRV: upload %d bytes to 0x%x\n", size, dest_addr);
for (int i = 0; i < size; i += 4) {
printf("mem-write: 0x%x <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + src_offset + i));
}*/
return 0;
}
int download(void* dest, size_t src_addr, size_t size, size_t dest_offset) {
size_t asize = align_size(size, CACHE_BLOCK_SIZE);
if (src_addr + asize > ram_.size())
return -1;
ram_.read(src_addr, (uint8_t*)dest + dest_offset, asize);
/*printf("VXDRV: download %d bytes from 0x%x\n", size, src_addr);
for (int i = 0; i < size; i += 4) {
printf("mem-read: 0x%x -> 0x%x\n", src_addr + i, *(uint32_t*)((uint8_t*)dest + dest_offset + i));
}*/
return 0;
}
int start() {
mutex_.lock();
for (int i = 0; i < arch_.num_cores(); ++i) {
cores_[i]->clear();
}
is_running_ = true;
mutex_.unlock();
return 0;
}
int wait(long long timeout) {
auto timeout_sec = (timeout < 0) ? timeout : (timeout / 1000);
for (;;) {
mutex_.lock();
bool is_running = is_running_;
mutex_.unlock();
if (!is_running || 0 == timeout_sec--)
break;
std::this_thread::sleep_for(std::chrono::seconds(1));
}
return 0;
}
int get_csr(int core_id, int addr, unsigned *value) {
*value = cores_.at(core_id)->get_csr(addr, 0, 0);
return 0;
}
int set_csr(int core_id, int addr, unsigned value) {
cores_.at(core_id)->set_csr(addr, value, 0, 0);
return 0;
}
private:
void run() {
bool running;
do {
running = false;
for (auto& core : cores_) {
core->step();
if (core->running())
running = true;
}
} while (running);
}
void thread_proc() {
std::cout << "Device ready..." << std::flush << std::endl;
for (;;) {
mutex_.lock();
bool is_done = is_done_;
bool is_running = is_running_;
mutex_.unlock();
if (is_done)
break;
if (is_running) {
std::cout << "Device running..." << std::flush << std::endl;
this->run();
mutex_.lock();
is_running_ = false;
mutex_.unlock();
std::cout << "Device ready..." << std::flush << std::endl;
}
}
std::cout << "Device shutdown..." << std::flush << std::endl;
}
static void __thread_proc__(vx_device* device) {
device->thread_proc();
}
vortex::ArchDef arch_;
vortex::Decoder decoder_;
vortex::MemoryUnit mmu_;
std::vector<std::shared_ptr<vortex::Core>> cores_;
bool is_done_;
bool is_running_;
size_t mem_allocation_;
std::thread thread_;
vortex::RAM ram_;
std::mutex mutex_;
};
///////////////////////////////////////////////////////////////////////////////
extern int vx_dev_open(vx_device_h* hdevice) {
if (nullptr == hdevice)
return -1;
*hdevice = new vx_device();
return 0;
}
extern int vx_dev_close(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
#ifdef DUMP_PERF_STATS
vx_dump_perf(device, stdout);
#endif
delete device;
return 0;
}
extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
if (nullptr == hdevice)
return -1;
switch (caps_id) {
case VX_CAPS_VERSION:
*value = IMPLEMENTATION_ID;
break;
case VX_CAPS_MAX_CORES:
*value = NUM_CORES * NUM_CLUSTERS;
break;
case VX_CAPS_MAX_WARPS:
*value = NUM_WARPS;
break;
case VX_CAPS_MAX_THREADS:
*value = NUM_THREADS;
break;
case VX_CAPS_CACHE_LINE_SIZE:
*value = CACHE_BLOCK_SIZE;
break;
case VX_CAPS_LOCAL_MEM_SIZE:
*value = LOCAL_MEM_SIZE;
break;
case VX_CAPS_ALLOC_BASE_ADDR:
*value = ALLOC_BASE_ADDR;
break;
case VX_CAPS_KERNEL_BASE_ADDR:
*value = STARTUP_ADDR;
break;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort();
return -1;
}
return 0;
}
extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) {
if (nullptr == hdevice
|| nullptr == dev_maddr
|| 0 >= size)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->alloc_local_mem(size, dev_maddr);
}
extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) {
if (nullptr == hdevice
|| 0 >= size
|| nullptr == hbuffer)
return -1;
vx_device *device = ((vx_device*)hdevice);
auto buffer = new vx_buffer(size, device);
if (nullptr == buffer->data()) {
delete buffer;
return -1;
}
*hbuffer = buffer;
return 0;
}
extern void* vx_host_ptr(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return nullptr;
vx_buffer* buffer = ((vx_buffer*)hbuffer);
return buffer->data();
}
extern int vx_buf_release(vx_buffer_h hbuffer) {
if (nullptr == hbuffer)
return -1;
vx_buffer* buffer = ((vx_buffer*)hbuffer);
delete buffer;
return 0;
}
extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + src_offset > buffer->size())
return -1;
return buffer->device()->upload(buffer->data(), dev_maddr, size, src_offset);
}
extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) {
if (nullptr == hbuffer
|| 0 >= size)
return -1;
auto buffer = (vx_buffer*)hbuffer;
if (size + dest_offset > buffer->size())
return -1;
return buffer->device()->download(buffer->data(), dev_maddr, size, dest_offset);
}
extern int vx_start(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->start();
}
extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
if (nullptr == hdevice)
return -1;
vx_device *device = ((vx_device*)hdevice);
return device->wait(timeout);
}

View file

@ -1,20 +0,0 @@
CXXFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors
#CXXFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I../../runtime -I../../hw
CXXFLAGS += -fPIC
LDFLAGS += -shared -pthread
SRCS = vortex.cpp ../common/vx_utils.cpp
PROJECT = libvortex.so
all: $(PROJECT)
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
clean:
rm -rf $(PROJECT) obj_dir

View file

@ -1,45 +0,0 @@
#include <vortex.h>
extern int vx_dev_open(vx_device_h* /*hdevice*/) {
return -1;
}
extern int vx_dev_close(vx_device_h /*hdevice*/) {
return -1;
}
extern int vx_dev_caps(vx_device_h /*hdevice*/, unsigned /*caps_id*/, unsigned* /*value*/) {
return -1;
}
extern int vx_alloc_dev_mem(vx_device_h /*hdevice*/, size_t /*size*/, size_t* /*dev_maddr*/) {
return -1;
}
extern int vx_alloc_shared_mem(vx_device_h /*hdevice*/, size_t /*size*/, vx_buffer_h* /*hbuffer*/) {
return -1;
}
extern void* vx_host_ptr(vx_buffer_h /*hbuffer*/) {
return nullptr;
}
extern int vx_buf_release(vx_buffer_h /*hbuffer*/) {
return -1;
}
extern int vx_copy_to_dev(vx_buffer_h /*hbuffer*/, size_t /*dev_maddr*/, size_t /*size*/, size_t /*src_offset*/) {
return -1;
}
extern int vx_copy_from_dev(vx_buffer_h /*hbuffer*/, size_t /*dev_maddr*/, size_t /*size*/, size_t /*dest_offset*/) {
return -1;
}
extern int vx_start(vx_device_h /*hdevice*/) {
return -1;
}
extern int vx_ready_wait(vx_device_h /*hdevice*/, long long /*timeout*/) {
return -1;
}

1
hw/.gitignore vendored
View file

@ -1 +0,0 @@
obj_dir/*

View file

@ -1,9 +1,22 @@
.PHONY: build_config
ROOT_DIR := $(realpath ..)
include $(ROOT_DIR)/config.mk
build_config: ./rtl/VX_config.vh
./scripts/gen_config.py -i ./rtl/VX_config.vh -o ./VX_config.h
$(MAKE) -C simulate
HW_DIR := $(VORTEX_HOME)/hw
SCRIPT_DIR := $(HW_DIR)/scripts
RTL_DIR := $(HW_DIR)/rtl
all: config
config: VX_config.h VX_types.h
VX_config.h: $(RTL_DIR)/VX_config.vh
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_config.vh -o VX_config.h
VX_types.h: $(RTL_DIR)/VX_types.vh
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/VX_types.vh -o VX_types.h
clean:
rm -f ./VX_config.h
$(MAKE) -C simulate clean
$(MAKE) -C unittest clean
rm -f VX_config.h VX_types.h
.PHONY: VX_config.h VX_types.h

View file

@ -1,2 +0,0 @@
*.v
*.sh

View file

View file

@ -1,296 +1,339 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdio.h>
#include <math.h>
#include <unordered_map>
#include <vector>
#include <mutex>
#include <iostream>
#include <rvfloats.h>
#include <util.h>
#include "svdpi.h"
#include "verilated_vpi.h"
#include "VX_config.h"
extern "C" {
void dpi_fadd(int a, int b, int frm, int* result, int* fflags);
void dpi_fsub(int a, int b, int frm, int* result, int* fflags);
void dpi_fmul(int a, int b, int frm, int* result, int* fflags);
void dpi_fmadd(int a, int b, int c, int frm, int* result, int* fflags);
void dpi_fmsub(int a, int b, int c, int frm, int* result, int* fflags);
void dpi_fnmadd(int a, int b, int c, int frm, int* result, int* fflags);
void dpi_fnmsub(int a, int b, int c, int frm, int* result, int* fflags);
void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fmul(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fdiv(int a, int b, int frm, int* result, int* fflags);
void dpi_fsqrt(int a, int frm, int* result, int* fflags);
void dpi_ftoi(int a, int frm, int* result, int* fflags);
void dpi_ftou(int a, int frm, int* result, int* fflags);
void dpi_itof(int a, int frm, int* result, int* fflags);
void dpi_utof(int a, int frm, int* result, int* fflags);
void dpi_fdiv(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fsqrt(bool enable, int dst_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_fclss(int a, int* result);
void dpi_fsgnj(int a, int b, int* result);
void dpi_fsgnjn(int a, int b, int* result);
void dpi_fsgnjx(int a, int b, int* result);
void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_ftou(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags);
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result);
void dpi_flt(int a, int b, int* result, int* fflags);
void dpi_fle(int a, int b, int* result, int* fflags);
void dpi_feq(int a, int b, int* result, int* fflags);
void dpi_fmin(int a, int b, int* result, int* fflags);
void dpi_fmax(int a, int b, int* result, int* fflags);
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result);
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
void dpi_fsgnjx(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result);
void dpi_flt(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
void dpi_fle(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
void dpi_feq(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
void dpi_fmin(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
void dpi_fmax(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags);
}
union Float_t {
float f;
int i;
struct {
uint32_t man : 23;
uint32_t exp : 8;
uint32_t sign : 1;
} parts;
};
void dpi_fadd(int a, int b, int frm, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f + fb.f;
*result = fr.i;
*fflags = 0;
inline uint64_t nan_box(uint32_t value) {
#ifdef XLEN_64
return value | 0xffffffff00000000;
#else
return value;
#endif
}
void dpi_fsub(int a, int b, int frm, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f - fb.f;
*result = fr.i;
*fflags = 0;
inline bool is_nan_boxed(uint64_t value) {
#ifdef XLEN_64
return (uint32_t(value >> 32) == 0xffffffff);
#else
__unused (value);
return true;
#endif
}
void dpi_fmul(int a, int b, int frm, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f * fb.f;
*result = fr.i;
*fflags = 0;
inline int64_t check_boxing(int64_t a) {
if (is_nan_boxed(a))
return a;
return nan_box(0x7fc00000); // NaN
}
void dpi_fmadd(int a, int b, int c, int frm, int* result, int* fflags) {
Float_t fa, fb, fc, fr;
fa.i = a;
fb.i = b;
fc.i = c;
fr.f = fa.f * fb.f + fc.f;
*result = fr.i;
*fflags = 0;
void dpi_fadd(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fadd_d(a, b, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fadd_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
}
}
void dpi_fmsub(int a, int b, int c, int frm, int* result, int* fflags) {
Float_t fa, fb, fc, fr;
fa.i = a;
fb.i = b;
fc.i = c;
fr.f = fa.f * fb.f - fc.f;
*result = fr.i;
*fflags = 0;
void dpi_fsub(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fsub_d(a, b, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fsub_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
}
}
void dpi_fnmadd(int a, int b, int c, int frm, int* result, int* fflags) {
Float_t fa, fb, fc, fr;
fa.i = a;
fb.i = b;
fc.i = c;
fr.f = -(fa.f * fb.f + fc.f);
*result = fr.i;
*fflags = 0;
void dpi_fmul(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fmul_d(a, b, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fmul_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
}
}
void dpi_fnmsub(int a, int b, int c, int frm, int* result, int* fflags) {
Float_t fa, fb, fc, fr;
fa.i = a;
fb.i = b;
fc.i = c;
fr.f = -(fa.f * fb.f - fc.f);
*result = fr.i;
*fflags = 0;
void dpi_fmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fmadd_d(a, b, c, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
}
}
void dpi_fdiv(int a, int b, int frm, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f / fb.f;
*result = fr.i;
*fflags = 0;
void dpi_fmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fmsub_d(a, b, c, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
}
}
void dpi_fsqrt(int a, int frm, int* result, int* fflags) {
Float_t fa, fr;
fa.i = a;
fr.f = sqrtf(fa.f);
*result = fr.i;
*fflags = 0;
void dpi_fnmadd(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fnmadd_d(a, b, c, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fnmadd_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
}
}
void dpi_ftoi(int a, int frm, int* result, int* fflags) {
Float_t fa, fr;
fa.i = a;
fr.i = int(fa.f);
*result = fr.i;
*fflags = 0;
void dpi_fnmsub(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t c, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fnmsub_d(a, b, c, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fnmsub_s(check_boxing(a), check_boxing(b), check_boxing(c), (*frm & 0x7), fflags));
}
}
void dpi_ftou(int a, int frm, int* result, int* fflags) {
Float_t fa, fr;
fa.i = a;
fr.i = unsigned(fa.f);
*result = fr.i;
*fflags = 0;
void dpi_fdiv(bool enable, int dst_fmt, int64_t a, int64_t b, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fdiv_d(a, b, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fdiv_s(check_boxing(a), check_boxing(b), (*frm & 0x7), fflags));
}
}
void dpi_itof(int a, int frm, int* result, int* fflags) {
Float_t fa, fr;
fr.f = (float)a;
*result = fr.i;
*fflags = 0;
void dpi_fsqrt(bool enable, int dst_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fsqrt_d(a, (*frm & 0x7), fflags);
} else {
*result = nan_box(rv_fsqrt_s(check_boxing(a), (*frm & 0x7), fflags));
}
}
void dpi_utof(int a, int frm, int* result, int* fflags) {
Float_t fa, fr;
unsigned ua = a;
fr.f = (float)ua;
*result = fr.i;
*fflags = 0;
}
void dpi_flt(int a, int b, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f < fb.f;
*result = fr.i;
*fflags = 0;
}
void dpi_fle(int a, int b, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f <= fb.f;
*result = fr.i;
*fflags = 0;
}
void dpi_feq(int a, int b, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = fa.f == fb.f;
*result = fr.i;
*fflags = 0;
}
void dpi_fmin(int a, int b, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = std::min<float>(fa.f, fb.f);
*result = fr.i;
*fflags = 0;
}
void dpi_fmax(int a, int b, int* result, int* fflags) {
Float_t fa, fb, fr;
fa.i = a;
fb.i = b;
fr.f = std::max<float>(fa.f, fb.f);
*result = fr.i;
*fflags = 0;
}
void dpi_fclss(int a, int* result) {
int r = 0; // clear all bits
bool fsign = (a >> 31);
uint32_t expo = (a >> 23) & 0xFF;
uint32_t fraction = a & 0x7FFFFF;
if ((expo == 0) && (fraction == 0)) {
r = fsign ? (1 << 3) : (1 << 4); // +/- 0
} else if ((expo == 0) && (fraction != 0)) {
r = fsign ? (1 << 2) : (1 << 5); // +/- subnormal
} else if ((expo == 0xFF) && (fraction == 0)) {
r = fsign ? (1<<0) : (1<<7); // +/- infinity
} else if ((expo == 0xFF ) && (fraction != 0)) {
if (!fsign && (fraction == 0x00400000)) {
r = (1 << 9); // quiet NaN
} else {
r = (1 << 8); // signaling NaN
void dpi_ftoi(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
if (src_fmt) {
*result = rv_ftol_d(a, (*frm & 0x7), fflags);
} else {
*result = rv_ftol_s(check_boxing(a), (*frm & 0x7), fflags);
}
} else {
r = fsign ? (1 << 1) : (1 << 6); // +/- normal
if (src_fmt) {
*result = sext<uint64_t>(rv_ftoi_d(a, (*frm & 0x7), fflags), 32);
} else {
*result = sext<uint64_t>(rv_ftoi_s(check_boxing(a), (*frm & 0x7), fflags), 32);
}
}
*result = r;
}
void dpi_fsgnj(int a, int b, int* result) {
int sign = b & 0x80000000;
int r = sign | (a & 0x7FFFFFFF);
*result = r;
void dpi_ftou(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
if (src_fmt) {
*result = rv_ftolu_d(a, (*frm & 0x7), fflags);
} else {
*result = rv_ftolu_s(check_boxing(a), (*frm & 0x7), fflags);
}
} else {
if (src_fmt) {
*result = sext<uint64_t>(rv_ftou_d(a, (*frm & 0x7), fflags), 32);
} else {
*result = sext<uint64_t>(rv_ftou_s(check_boxing(a), (*frm & 0x7), fflags), 32);
}
}
}
void dpi_fsgnjn(int a, int b, int* result) {
int sign = ~b & 0x80000000;
int r = sign | (a & 0x7FFFFFFF);
*result = r;
void dpi_itof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
if (src_fmt) {
*result = rv_ltof_d(a, (*frm & 0x7), fflags);
} else {
*result = rv_itof_d(a, (*frm & 0x7), fflags);
}
} else {
if (src_fmt) {
*result = nan_box(rv_ltof_s(a, (*frm & 0x7), fflags));
} else {
*result = nan_box(rv_itof_s(a, (*frm & 0x7), fflags));
}
}
}
void dpi_fsgnjx(int a, int b, int* result) {
int sign1 = a & 0x80000000;
int sign2 = b & 0x80000000;
int r = (sign1 ^ sign2) | (a & 0x7FFFFFFF);
void dpi_utof(bool enable, int dst_fmt, int src_fmt, int64_t a, const svBitVecVal* frm, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
if (src_fmt) {
*result = rv_lutof_d(a, (*frm & 0x7), fflags);
} else {
*result = rv_utof_d(a, (*frm & 0x7), fflags);
}
} else {
if (src_fmt) {
*result = nan_box(rv_lutof_s(a, (*frm & 0x7), fflags));
} else {
*result = nan_box(rv_utof_s(a, (*frm & 0x7), fflags));
}
}
}
*result = r;
void dpi_f2f(bool enable, int dst_fmt, int64_t a, int64_t* result) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_ftod((int32_t)check_boxing(a));
} else {
*result = nan_box(rv_dtof(a));
}
}
void dpi_fclss(bool enable, int dst_fmt, int64_t a, int64_t* result) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fclss_d(a);
} else {
*result = rv_fclss_s(check_boxing(a));
}
}
void dpi_fsgnj(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fsgnj_d(a, b);
} else {
*result = nan_box(rv_fsgnj_s(check_boxing(a), check_boxing(b)));
}
}
void dpi_fsgnjn(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fsgnjn_d(a, b);
} else {
*result = nan_box(rv_fsgnjn_s(check_boxing(a), check_boxing(b)));
}
}
void dpi_fsgnjx(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fsgnjx_d(a, b);
} else {
*result = nan_box(rv_fsgnjx_s(check_boxing(a), check_boxing(b)));
}
}
void dpi_flt(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_flt_d(a, b, fflags);
} else {
*result = rv_flt_s(check_boxing(a), check_boxing(b), fflags);
}
}
void dpi_fle(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fle_d(a, b, fflags);
} else {
*result = rv_fle_s(check_boxing(a), check_boxing(b), fflags);
}
}
void dpi_feq(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_feq_d(a, b, fflags);
} else {
*result = rv_feq_s(check_boxing(a), check_boxing(b), fflags);
}
}
void dpi_fmin(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fmin_d(a, b, fflags);
} else {
*result = nan_box(rv_fmin_s(check_boxing(a), check_boxing(b), fflags));
}
}
void dpi_fmax(bool enable, int dst_fmt, int64_t a, int64_t b, int64_t* result, svBitVecVal* fflags) {
if (!enable)
return;
if (dst_fmt) {
*result = rv_fmax_d(a, b, fflags);
} else {
*result = nan_box(rv_fmax_s(check_boxing(a), check_boxing(b), fflags));
}
}

View file

@ -1,31 +1,45 @@
`ifndef FLOAT_DPI
`define FLOAT_DPI
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import "DPI-C" context function void dpi_fadd(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fsub(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fmul(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fmadd(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fmsub(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fnmadd(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fnmsub(input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
`ifndef FLOAT_DPI_VH
`define FLOAT_DPI_VH
import "DPI-C" context function void dpi_fdiv(input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fsqrt(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fadd(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fsub(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmul(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmadd(input logic enable, input int dst_fmt, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmsub(input logic enable, input int dst_fmt, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fnmadd(input logic enable, input int dst_fmt, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fnmsub(input logic enable, input int dst_fmt, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_ftoi(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_ftou(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_itof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_utof(input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fdiv(input logic enable, input int dst_fmt, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fsqrt(input logic enable, input int dst_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fclss(input int a, output int result);
import "DPI-C" context function void dpi_fsgnj(input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsgnjn(input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsgnjx(input int a, input int b, output int result);
import "DPI-C" function void dpi_ftoi(input logic enable, input int dst_fmt, input int src_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_ftou(input logic enable, input int dst_fmt, input int src_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_itof(input logic enable, input int dst_fmt, input int src_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_utof(input logic enable, input int dst_fmt, input int src_fmt, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_f2f(input logic enable, input int dst_fmt, input longint a, output longint result);
import "DPI-C" context function void dpi_flt(input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fle(input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_feq(input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fmin(input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" context function void dpi_fmax(input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fclss(input logic enable, input int dst_fmt, input longint a, output longint result);
import "DPI-C" function void dpi_fsgnj(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result);
import "DPI-C" function void dpi_fsgnjn(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result);
import "DPI-C" function void dpi_fsgnjx(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result);
`endif
import "DPI-C" function void dpi_flt(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fle(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_feq(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmin(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmax(input logic enable, input int dst_fmt, input longint a, input longint b, output longint result, output bit[4:0] fflags);
`endif

View file

@ -1,21 +1,57 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdio.h>
#include <math.h>
#include <unordered_map>
#include <vector>
#include <mutex>
#include <iostream>
#include "svdpi.h"
#include "verilated_vpi.h"
#include "VX_config.h"
#ifdef XLEN_64
#define iword_t int64_t
#define uword_t uint64_t
#define idword_t __int128_t
#define udword_t __uint128_t
#else
#define iword_t int32_t
#define uword_t uint32_t
#define idword_t int64_t
#define udword_t uint64_t
#endif
#ifndef DEBUG_LEVEL
#define DEBUG_LEVEL 3
#endif
extern "C" {
void dpi_imul(int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth);
void dpi_idiv(int a, int b, bool is_signed, int* quotient, int* remainder);
void dpi_imul(bool enable, bool is_signed_a, bool is_signed_b, iword_t a, iword_t b, iword_t* resultl, iword_t* resulth);
void dpi_idiv(bool enable, bool is_signed, iword_t a, iword_t b, iword_t* quotient, iword_t* remainder);
int dpi_register();
void dpi_assert(int inst, bool cond, int delay);
void dpi_trace(int level, const char* format, ...);
void dpi_trace_start();
void dpi_trace_stop();
}
bool sim_trace_enabled();
void sim_trace_enable(bool enable);
class ShiftRegister {
public:
ShiftRegister() : init_(false), depth_(0) {}
@ -30,7 +66,7 @@ public:
void push(int value, bool enable) {
if (!enable)
return;
return;
for (unsigned i = 0; i < depth_-1; ++i) {
buffer_[i] = buffer_[i+1];
}
@ -45,7 +81,7 @@ private:
std::vector<int> buffer_;
bool init_;
unsigned depth_;
unsigned depth_;
};
class Instances {
@ -55,9 +91,9 @@ public:
}
int allocate() {
mutex_.lock();
mutex_.lock();
int inst = instances_.size();
instances_.resize(inst + 1);
instances_.resize(inst + 1);
mutex_.unlock();
return inst;
}
@ -86,45 +122,56 @@ void dpi_assert(int inst, bool cond, int delay) {
}
}
void dpi_imul(int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth) {
uint64_t first = a;
uint64_t second = b;
if (is_signed_a && (a & 0x80000000)) {
first |= 0xFFFFFFFF00000000;
///////////////////////////////////////////////////////////////////////////////
void dpi_imul(bool enable, bool is_signed_a, bool is_signed_b, iword_t a, iword_t b, iword_t* resultl, iword_t* resulth) {
if (!enable)
return;
udword_t first = *(uword_t*)&a;
udword_t second = *(uword_t*)&b;
udword_t mask = udword_t(-1) << (8 * sizeof(iword_t));
if (is_signed_a && a < 0) {
first |= mask;
}
if (is_signed_b && (b & 0x80000000)) {
second |= 0xFFFFFFFF00000000;
if (is_signed_b && b < 0) {
second |= mask;
}
uint64_t result;
udword_t result;
if (is_signed_a || is_signed_b) {
result = (int64_t)first * (int64_t)second;
result = idword_t(first) * idword_t(second);
} else {
result = first * second;
}
*resultl = result & 0xFFFFFFFF;
*resulth = (result >> 32) & 0xFFFFFFFF;
}
*resultl = iword_t(result);
*resulth = iword_t(result >> (8 * sizeof(iword_t)));
}
void dpi_idiv(int a, int b, bool is_signed, int* quotient, int* remainder) {
uint32_t dividen = a;
uint32_t divisor = b;
void dpi_idiv(bool enable, bool is_signed, iword_t a, iword_t b, iword_t* quotient, iword_t* remainder) {
if (!enable)
return;
uword_t dividen = a;
uword_t divisor = b;
auto inf_neg = uword_t(1) << (8 * sizeof(iword_t) - 1);
if (is_signed) {
if (b == 0) {
*quotient = -1;
*remainder = dividen;
} else if (dividen == 0x80000000 && divisor == 0xffffffff) {
} else if (dividen == inf_neg && divisor == -1) {
*remainder = 0;
*quotient = dividen;
} else {
*quotient = (int32_t)dividen / (int32_t)divisor;
*remainder = (int32_t)dividen % (int32_t)divisor;
} else {
*quotient = (iword_t)dividen / (iword_t)divisor;
*remainder = (iword_t)dividen % (iword_t)divisor;
}
} else {
} else {
if (b == 0) {
*quotient = -1;
*remainder = dividen;
@ -133,4 +180,25 @@ void dpi_idiv(int a, int b, bool is_signed, int* quotient, int* remainder) {
*remainder = dividen % divisor;
}
}
}
}
///////////////////////////////////////////////////////////////////////////////
void dpi_trace(int level, const char* format, ...) {
if (level > DEBUG_LEVEL)
return;
if (!sim_trace_enabled())
return;
va_list va;
va_start(va, format);
vprintf(format, va);
va_end(va);
}
void dpi_trace_start() {
sim_trace_enable(true);
}
void dpi_trace_stop() {
sim_trace_enable(false);
}

View file

@ -1,10 +1,33 @@
`ifndef UTIL_DPI
`define UTIL_DPI
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import "DPI-C" context function void dpi_imul(input int a, input int b, input logic is_signed_a, input logic is_signed_b, output int resultl, output int resulth);
import "DPI-C" context function void dpi_idiv(input int a, input int b, input logic is_signed, output int quotient, output int remainder);
`ifndef UTIL_DPI_VH
`define UTIL_DPI_VH
import "DPI-C" context function int dpi_register();
import "DPI-C" context function void dpi_assert(int inst, input logic cond, input int delay);
`ifdef XLEN_64
`define INT_TYPE longint
`else
`define INT_TYPE int
`endif
`endif
import "DPI-C" function void dpi_imul(input logic enable, input logic is_signed_a, input logic is_signed_b, input `INT_TYPE a, input `INT_TYPE b, output `INT_TYPE resultl, output `INT_TYPE resulth);
import "DPI-C" function void dpi_idiv(input logic enable, input logic is_signed, input `INT_TYPE a, input `INT_TYPE b, output `INT_TYPE quotient, output `INT_TYPE remainder);
import "DPI-C" function int dpi_register();
import "DPI-C" function void dpi_assert(int inst, input logic cond, input int delay);
import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/);
import "DPI-C" function void dpi_trace_start();
import "DPI-C" function void dpi_trace_stop();
`endif

View file

@ -1,19 +0,0 @@
#include "Vrf2_32x128_wm1_rtl.h"
#include "verilated.h"
int main()
{
Vrf2_32x128_wm1_rtl module;
for (int i = 0; i < 10; i++)
{
// module.clk = 0;
module.eval();
// module.clk = 1;
module.eval();
}
return 0;
}

View file

@ -1,13 +0,0 @@
set SOURCE_FILES [glob *.lib]
foreach FILE ${SOURCE_FILES} {
read_lib $FILE
redirect -variable CURR_LIB {get_lib}
set CURR_LIB [string range $CURR_LIB 2 end-3]
set CURR_LIB [lindex $CURR_LIB 0]
set FILENAME [string range $FILE 0 end-4]
write_lib $CURR_LIB -output ${FILENAME}.db
remove_lib $CURR_LIB
}
exit

1
hw/rtl/.gitignore vendored
View file

@ -1 +0,0 @@
/VX_user_config.vh

View file

@ -1,213 +0,0 @@
`include "VX_define.vh"
module VX_alu_unit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// Inputs
VX_alu_req_if alu_req_if,
// Outputs
VX_branch_ctl_if branch_ctl_if,
VX_commit_if alu_commit_if
);
`UNUSED_PARAM (CORE_ID)
reg [`NUM_THREADS-1:0][31:0] alu_result;
wire [`NUM_THREADS-1:0][31:0] add_result;
wire [`NUM_THREADS-1:0][32:0] sub_result;
wire [`NUM_THREADS-1:0][31:0] shr_result;
reg [`NUM_THREADS-1:0][31:0] msc_result;
wire stall_in, stall_out;
`UNUSED_VAR (alu_req_if.op_mod)
wire is_br_op = `ALU_IS_BR(alu_req_if.op_mod);
wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op_type);
wire [`BR_BITS-1:0] br_op = `BR_OP(alu_req_if.op_type);
wire alu_signed = `ALU_SIGNED(alu_op);
wire [1:0] alu_op_class = `ALU_OP_CLASS(alu_op);
wire is_sub = (alu_op == `ALU_SUB);
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.use_PC ? {`NUM_THREADS{alu_req_if.PC}} : alu_in1;
wire [`NUM_THREADS-1:0][31:0] alu_in2_imm = alu_req_if.use_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
wire [`NUM_THREADS-1:0][31:0] alu_in2_less = (alu_req_if.use_imm && ~is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [32:0] sub_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
wire [32:0] sub_in2 = {alu_signed & alu_in2_less[i][31], alu_in2_less[i]};
assign sub_result[i] = sub_in1 - sub_in2;
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [32:0] shr_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
assign shr_result[i] = 32'($signed(shr_in1) >>> alu_in2_imm[i][4:0]);
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
always @(*) begin
case (alu_op)
`ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
`ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
`ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
//`ALU_SLL,
default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
endcase
end
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
always @(*) begin
case (alu_op_class)
2'b00: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
2'b01: alu_result[i] = {31'b0, sub_result[i][32]}; // SLTU, SLT
2'b10: alu_result[i] = is_sub ? sub_result[i][31:0] // SUB
: shr_result[i]; // SRL, SRA
// 2'b11,
default: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL
endcase
end
end
// branch
wire is_jal = is_br_op && (br_op == `BR_JAL || br_op == `BR_JALR);
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : alu_result;
wire [31:0] br_dest = add_result[alu_req_if.tid];
wire [32:0] cmp_result = sub_result[alu_req_if.tid];
wire is_less = cmp_result[32];
wire is_equal = ~(| cmp_result[31:0]);
wire br_neg = `BR_NEG(br_op);
wire br_less = `BR_LESS(br_op);
wire br_static = `BR_STATIC(br_op);
wire br_taken = ((br_less ? is_less : is_equal) ^ br_neg) | br_static;
// output
wire result_valid;
wire [`NW_BITS-1:0] result_wid;
wire [`NUM_THREADS-1:0] result_tmask;
wire [31:0] result_PC;
wire [`NR_BITS-1:0] result_rd;
wire result_wb;
wire [`NUM_THREADS-1:0][31:0] result_data;
wire result_is_br;
`ifdef EXT_M_ENABLE
wire mul_ready_in;
wire mul_valid_out;
wire mul_ready_out;
wire [`NW_BITS-1:0] mul_wid;
wire [`NUM_THREADS-1:0] mul_tmask;
wire [31:0] mul_PC;
wire [`NR_BITS-1:0] mul_rd;
wire mul_wb;
wire [`NUM_THREADS-1:0][31:0] mul_data;
wire is_mul_op = `ALU_IS_MUL(alu_req_if.op_mod);
VX_muldiv muldiv (
.clk (clk),
.reset (reset),
// Inputs
.alu_op (`MUL_OP(alu_req_if.op_type)),
.wid_in (alu_req_if.wid),
.tmask_in (alu_req_if.tmask),
.PC_in (alu_req_if.PC),
.rd_in (alu_req_if.rd),
.wb_in (alu_req_if.wb),
.alu_in1 (alu_req_if.rs1_data),
.alu_in2 (alu_req_if.rs2_data),
// Outputs
.wid_out (mul_wid),
.tmask_out (mul_tmask),
.PC_out (mul_PC),
.rd_out (mul_rd),
.wb_out (mul_wb),
.data_out (mul_data),
// handshake
.valid_in (alu_req_if.valid && is_mul_op),
.ready_in (mul_ready_in),
.valid_out (mul_valid_out),
.ready_out (mul_ready_out)
);
assign stall_in = (is_mul_op && ~mul_ready_in)
|| (~is_mul_op && (mul_valid_out || stall_out));
assign mul_ready_out = ~stall_out;
assign result_valid = mul_valid_out | (alu_req_if.valid && ~is_mul_op);
assign result_wid = mul_valid_out ? mul_wid : alu_req_if.wid;
assign result_tmask = mul_valid_out ? mul_tmask : alu_req_if.tmask;
assign result_PC = mul_valid_out ? mul_PC : alu_req_if.PC;
assign result_rd = mul_valid_out ? mul_rd : alu_req_if.rd;
assign result_wb = mul_valid_out ? mul_wb : alu_req_if.wb;
assign result_data = mul_valid_out ? mul_data : alu_jal_result;
assign result_is_br = ~mul_valid_out && is_br_op;
`else
assign stall_in = 0;
assign result_valid = alu_req_if.valid;
assign result_wid = alu_req_if.wid;
assign result_tmask = alu_req_if.tmask;
assign result_PC = alu_req_if.PC;
assign result_rd = alu_req_if.rd;
assign result_wb = alu_req_if.wb;
assign result_data = alu_jal_result;
assign result_is_br = is_br_op;
`endif
wire is_br_op_r;
assign stall_out = ~alu_commit_if.ready && alu_commit_if.valid;
VX_pipe_register #(
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + 1 + 32),
.RESETW (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (!stall_out),
.data_in ({result_valid, result_wid, result_tmask, result_PC, result_rd, result_wb, result_data, result_is_br, br_taken, br_dest}),
.data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, branch_ctl_if.taken, branch_ctl_if.dest})
);
assign alu_commit_if.eop = 1'b1;
assign branch_ctl_if.valid = alu_commit_if.valid && alu_commit_if.ready && is_br_op_r;
assign branch_ctl_if.wid = alu_commit_if.wid;
// can accept new request?
assign alu_req_if.ready = ~stall_in;
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (branch_ctl_if.valid) begin
$display("%t: core%0d-branch: wid=%0d, PC=%0h, taken=%b, dest=%0h", $time, CORE_ID,
branch_ctl_if.wid, alu_commit_if.PC, branch_ctl_if.taken, branch_ctl_if.dest);
end
end
`endif
endmodule

160
hw/rtl/VX_cluster.sv Normal file
View file

@ -0,0 +1,160 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_cluster import VX_gpu_pkg::*; #(
parameter CLUSTER_ID = 0,
parameter `STRING INSTANCE_ID = ""
) (
`SCOPE_IO_DECL
// Clock
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
input sysmem_perf_t sysmem_perf,
`endif
// DCRs
VX_dcr_bus_if.slave dcr_bus_if,
// Memory
VX_mem_bus_if.master mem_bus_if [`L2_MEM_PORTS],
// Status
output wire busy
);
`ifdef SCOPE
localparam scope_socket = 0;
`SCOPE_IO_SWITCH (`NUM_SOCKETS);
`endif
`ifdef PERF_ENABLE
cache_perf_t l2_perf;
sysmem_perf_t sysmem_perf_tmp;
always @(*) begin
sysmem_perf_tmp = sysmem_perf;
sysmem_perf_tmp.l2cache = l2_perf;
end
`endif
`ifdef GBAR_ENABLE
VX_gbar_bus_if per_socket_gbar_bus_if[`NUM_SOCKETS]();
VX_gbar_bus_if gbar_bus_if();
VX_gbar_arb #(
.NUM_REQS (`NUM_SOCKETS),
.OUT_BUF ((`NUM_SOCKETS > 2) ? 1 : 0) // bgar_unit has no backpressure
) gbar_arb (
.clk (clk),
.reset (reset),
.bus_in_if (per_socket_gbar_bus_if),
.bus_out_if (gbar_bus_if)
);
VX_gbar_unit #(
.INSTANCE_ID (`SFORMATF(("gbar%0d", CLUSTER_ID)))
) gbar_unit (
.clk (clk),
.reset (reset),
.gbar_bus_if (gbar_bus_if)
);
`endif
VX_mem_bus_if #(
.DATA_SIZE (`L1_LINE_SIZE),
.TAG_WIDTH (L1_MEM_ARB_TAG_WIDTH)
) per_socket_mem_bus_if[`NUM_SOCKETS * `L1_MEM_PORTS]();
`RESET_RELAY (l2_reset, reset);
VX_cache_wrap #(
.INSTANCE_ID (`SFORMATF(("%s-l2cache", INSTANCE_ID))),
.CACHE_SIZE (`L2_CACHE_SIZE),
.LINE_SIZE (`L2_LINE_SIZE),
.NUM_BANKS (`L2_NUM_BANKS),
.NUM_WAYS (`L2_NUM_WAYS),
.WORD_SIZE (L2_WORD_SIZE),
.NUM_REQS (L2_NUM_REQS),
.MEM_PORTS (`L2_MEM_PORTS),
.CRSQ_SIZE (`L2_CRSQ_SIZE),
.MSHR_SIZE (`L2_MSHR_SIZE),
.MRSQ_SIZE (`L2_MRSQ_SIZE),
.MREQ_SIZE (`L2_WRITEBACK ? `L2_MSHR_SIZE : `L2_MREQ_SIZE),
.TAG_WIDTH (L2_TAG_WIDTH),
.WRITE_ENABLE (1),
.WRITEBACK (`L2_WRITEBACK),
.DIRTY_BYTES (`L2_DIRTYBYTES),
.REPL_POLICY (`L2_REPL_POLICY),
.UUID_WIDTH (`UUID_WIDTH),
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
.CORE_OUT_BUF (3),
.MEM_OUT_BUF (3),
.NC_ENABLE (1),
.PASSTHRU (!`L2_ENABLED)
) l2cache (
.clk (clk),
.reset (l2_reset),
`ifdef PERF_ENABLE
.cache_perf (l2_perf),
`endif
.core_bus_if (per_socket_mem_bus_if),
.mem_bus_if (mem_bus_if)
);
///////////////////////////////////////////////////////////////////////////
wire [`NUM_SOCKETS-1:0] per_socket_busy;
// Generate all sockets
for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : g_sockets
`RESET_RELAY (socket_reset, reset);
VX_dcr_bus_if socket_dcr_bus_if();
wire is_base_dcr_addr = (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END);
`BUFFER_DCR_BUS_IF (socket_dcr_bus_if, dcr_bus_if, is_base_dcr_addr, (`NUM_SOCKETS > 1))
VX_socket #(
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id),
.INSTANCE_ID (`SFORMATF(("%s-socket%0d", INSTANCE_ID, socket_id)))
) socket (
`SCOPE_IO_BIND (scope_socket+socket_id)
.clk (clk),
.reset (socket_reset),
`ifdef PERF_ENABLE
.sysmem_perf (sysmem_perf_tmp),
`endif
.dcr_bus_if (socket_dcr_bus_if),
.mem_bus_if (per_socket_mem_bus_if[socket_id * `L1_MEM_PORTS +: `L1_MEM_PORTS]),
`ifdef GBAR_ENABLE
.gbar_bus_if (per_socket_gbar_bus_if[socket_id]),
`endif
.busy (per_socket_busy[socket_id])
);
end
`BUFFER_EX(busy, (| per_socket_busy), 1'b1, 1, (`NUM_SOCKETS > 1));
endmodule

View file

@ -1,190 +0,0 @@
`include "VX_define.vh"
module VX_cluster #(
parameter CLUSTER_ID = 0
) (
`SCOPE_IO_VX_cluster
// Clock
input wire clk,
input wire reset,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [`L2MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
output wire [`L2MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`L2MEM_DATA_WIDTH-1:0] mem_req_data,
output wire [`L2MEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
// Memory response
input wire mem_rsp_valid,
input wire [`L2MEM_DATA_WIDTH-1:0] mem_rsp_data,
input wire [`L2MEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready,
// Status
output wire busy
);
`STATIC_ASSERT((`L2_ENABLE == 0 || `NUM_CORES > 1), ("invalid parameter"))
wire [`NUM_CORES-1:0] per_core_mem_req_valid;
wire [`NUM_CORES-1:0] per_core_mem_req_rw;
wire [`NUM_CORES-1:0][`DMEM_BYTEEN_WIDTH-1:0] per_core_mem_req_byteen;
wire [`NUM_CORES-1:0][`DMEM_ADDR_WIDTH-1:0] per_core_mem_req_addr;
wire [`NUM_CORES-1:0][`DMEM_DATA_WIDTH-1:0] per_core_mem_req_data;
wire [`NUM_CORES-1:0][`XMEM_TAG_WIDTH-1:0] per_core_mem_req_tag;
wire [`NUM_CORES-1:0] per_core_mem_req_ready;
wire [`NUM_CORES-1:0] per_core_mem_rsp_valid;
wire [`NUM_CORES-1:0][`DMEM_DATA_WIDTH-1:0] per_core_mem_rsp_data;
wire [`NUM_CORES-1:0][`XMEM_TAG_WIDTH-1:0] per_core_mem_rsp_tag;
wire [`NUM_CORES-1:0] per_core_mem_rsp_ready;
wire [`NUM_CORES-1:0] per_core_busy;
for (genvar i = 0; i < `NUM_CORES; i++) begin
`RESET_RELAY (core_reset);
VX_core #(
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
) core (
`SCOPE_BIND_VX_cluster_core(i)
.clk (clk),
.reset (core_reset),
.mem_req_valid (per_core_mem_req_valid[i]),
.mem_req_rw (per_core_mem_req_rw [i]),
.mem_req_byteen (per_core_mem_req_byteen[i]),
.mem_req_addr (per_core_mem_req_addr [i]),
.mem_req_data (per_core_mem_req_data [i]),
.mem_req_tag (per_core_mem_req_tag [i]),
.mem_req_ready (per_core_mem_req_ready[i]),
.mem_rsp_valid (per_core_mem_rsp_valid[i]),
.mem_rsp_data (per_core_mem_rsp_data [i]),
.mem_rsp_tag (per_core_mem_rsp_tag [i]),
.mem_rsp_ready (per_core_mem_rsp_ready[i]),
.busy (per_core_busy [i])
);
end
assign busy = (| per_core_busy);
if (`L2_ENABLE) begin
`ifdef PERF_ENABLE
VX_perf_cache_if perf_l2cache_if();
`endif
`RESET_RELAY (l2_reset);
VX_cache #(
.CACHE_ID (`L2CACHE_ID),
.CACHE_SIZE (`L2CACHE_SIZE),
.CACHE_LINE_SIZE (`L2CACHE_LINE_SIZE),
.NUM_BANKS (`L2NUM_BANKS),
.WORD_SIZE (`L2WORD_SIZE),
.NUM_REQS (`L2NUM_REQS),
.CREQ_SIZE (`L2CREQ_SIZE),
.CRSQ_SIZE (`L2CRSQ_SIZE),
.MSHR_SIZE (`L2MSHR_SIZE),
.MRSQ_SIZE (`L2MRSQ_SIZE),
.MREQ_SIZE (`L2MREQ_SIZE),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`XMEM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.MEM_TAG_WIDTH (`L2MEM_TAG_WIDTH),
.NC_ENABLE (1)
) l2cache (
`SCOPE_BIND_VX_cluster_l2cache
.clk (clk),
.reset (l2_reset),
`ifdef PERF_ENABLE
.perf_cache_if (perf_l2cache_if),
`endif
// Core request
.core_req_valid (per_core_mem_req_valid),
.core_req_rw (per_core_mem_req_rw),
.core_req_byteen (per_core_mem_req_byteen),
.core_req_addr (per_core_mem_req_addr),
.core_req_data (per_core_mem_req_data),
.core_req_tag (per_core_mem_req_tag),
.core_req_ready (per_core_mem_req_ready),
// Core response
.core_rsp_valid (per_core_mem_rsp_valid),
.core_rsp_data (per_core_mem_rsp_data),
.core_rsp_tag (per_core_mem_rsp_tag),
.core_rsp_ready (per_core_mem_rsp_ready),
`UNUSED_PIN (core_rsp_tmask),
// Memory request
.mem_req_valid (mem_req_valid),
.mem_req_rw (mem_req_rw),
.mem_req_byteen (mem_req_byteen),
.mem_req_addr (mem_req_addr),
.mem_req_data (mem_req_data),
.mem_req_tag (mem_req_tag),
.mem_req_ready (mem_req_ready),
// Memory response
.mem_rsp_valid (mem_rsp_valid),
.mem_rsp_tag (mem_rsp_tag),
.mem_rsp_data (mem_rsp_data),
.mem_rsp_ready (mem_rsp_ready)
);
end else begin
VX_mem_arb #(
.NUM_REQS (`NUM_CORES),
.DATA_WIDTH (`L2MEM_DATA_WIDTH),
.ADDR_WIDTH (`L2MEM_ADDR_WIDTH),
.TAG_IN_WIDTH (`XMEM_TAG_WIDTH),
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
) mem_arb (
.clk (clk),
.reset (reset),
// Core request
.req_valid_in (per_core_mem_req_valid),
.req_rw_in (per_core_mem_req_rw),
.req_byteen_in (per_core_mem_req_byteen),
.req_addr_in (per_core_mem_req_addr),
.req_data_in (per_core_mem_req_data),
.req_tag_in (per_core_mem_req_tag),
.req_ready_in (per_core_mem_req_ready),
// Memory request
.req_valid_out (mem_req_valid),
.req_rw_out (mem_req_rw),
.req_byteen_out (mem_req_byteen),
.req_addr_out (mem_req_addr),
.req_data_out (mem_req_data),
.req_tag_out (mem_req_tag),
.req_ready_out (mem_req_ready),
// Core response
.rsp_valid_out (per_core_mem_rsp_valid),
.rsp_data_out (per_core_mem_rsp_data),
.rsp_tag_out (per_core_mem_rsp_tag),
.rsp_ready_out (per_core_mem_rsp_ready),
// Memory response
.rsp_valid_in (mem_rsp_valid),
.rsp_tag_in (mem_rsp_tag),
.rsp_data_in (mem_rsp_data),
.rsp_ready_in (mem_rsp_ready)
);
end
endmodule

View file

@ -1,116 +0,0 @@
`include "VX_define.vh"
module VX_commit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// inputs
VX_commit_if alu_commit_if,
VX_commit_if ld_commit_if,
VX_commit_if st_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if fpu_commit_if,
VX_commit_if gpu_commit_if,
// outputs
VX_writeback_if writeback_if,
VX_cmt_to_csr_if cmt_to_csr_if
);
localparam CMTW = $clog2(3*`NUM_THREADS+1);
// CSRs update
wire alu_commit_fire = alu_commit_if.valid && alu_commit_if.ready;
wire ld_commit_fire = ld_commit_if.valid && ld_commit_if.ready;
wire st_commit_fire = st_commit_if.valid && st_commit_if.ready;
wire csr_commit_fire = csr_commit_if.valid && csr_commit_if.ready;
wire fpu_commit_fire = fpu_commit_if.valid && fpu_commit_if.ready;
wire gpu_commit_fire = gpu_commit_if.valid && gpu_commit_if.ready;
wire commit_fire = alu_commit_fire
|| ld_commit_fire
|| st_commit_fire
|| csr_commit_fire
|| fpu_commit_fire
|| gpu_commit_fire;
wire [`NUM_THREADS-1:0] commit_tmask1, commit_tmask2, commit_tmask3;
assign commit_tmask1 = alu_commit_fire ? alu_commit_if.tmask:
ld_commit_fire ? ld_commit_if.tmask:
csr_commit_fire ? csr_commit_if.tmask:
fpu_commit_fire ? fpu_commit_if.tmask:
0;
assign commit_tmask2 = st_commit_fire ? st_commit_if.tmask : 0;
assign commit_tmask3 = gpu_commit_fire ? gpu_commit_if.tmask : 0;
wire [CMTW-1:0] commit_size;
assign commit_size = $countones({commit_tmask3, commit_tmask2, commit_tmask1});
VX_pipe_register #(
.DATAW (1 + CMTW),
.RESETW (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (1'b1),
.data_in ({commit_fire, commit_size}),
.data_out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size})
);
// Writeback
VX_writeback #(
.CORE_ID(CORE_ID)
) writeback (
.clk (clk),
.reset (reset),
.alu_commit_if (alu_commit_if),
.ld_commit_if (ld_commit_if),
.csr_commit_if (csr_commit_if),
.fpu_commit_if (fpu_commit_if),
.writeback_if (writeback_if)
);
// store and gpu commits don't writeback
assign st_commit_if.ready = 1'b1;
assign gpu_commit_if.ready = 1'b1;
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (alu_commit_if.valid && alu_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=ALU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, alu_commit_if.wid, alu_commit_if.PC, alu_commit_if.tmask, alu_commit_if.wb, alu_commit_if.rd, alu_commit_if.data);
end
if (ld_commit_if.valid && ld_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, ld_commit_if.wid, ld_commit_if.PC, ld_commit_if.tmask, ld_commit_if.wb, ld_commit_if.rd, ld_commit_if.data);
end
if (st_commit_if.valid && st_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, st_commit_if.wid, st_commit_if.PC, st_commit_if.tmask, st_commit_if.wb, st_commit_if.rd, st_commit_if.data);
end
if (csr_commit_if.valid && csr_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=CSR, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data);
end
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.wb, fpu_commit_if.rd, fpu_commit_if.data);
end
if (gpu_commit_if.valid && gpu_commit_if.ready) begin
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=GPU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, gpu_commit_if.wid, gpu_commit_if.PC, gpu_commit_if.tmask, gpu_commit_if.wb, gpu_commit_if.rd, gpu_commit_if.data);
end
end
`else
`UNUSED_VAR (fpu_commit_if.PC)
`endif
endmodule

File diff suppressed because it is too large Load diff

View file

@ -1,156 +0,0 @@
`include "VX_define.vh"
module VX_core #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_core
// Clock
input wire clk,
input wire reset,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [`DMEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
output wire [`DMEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`DMEM_DATA_WIDTH-1:0] mem_req_data,
output wire [`XMEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
// Memory reponse
input wire mem_rsp_valid,
input wire [`DMEM_DATA_WIDTH-1:0] mem_rsp_data,
input wire [`XMEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready,
// Status
output wire busy
);
`ifdef PERF_ENABLE
VX_perf_memsys_if perf_memsys_if();
`endif
VX_mem_req_if #(
.DATA_WIDTH (`DMEM_DATA_WIDTH),
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
.TAG_WIDTH (`XMEM_TAG_WIDTH)
) mem_req_if();
VX_mem_rsp_if #(
.DATA_WIDTH (`DMEM_DATA_WIDTH),
.TAG_WIDTH (`XMEM_TAG_WIDTH)
) mem_rsp_if();
assign mem_req_valid = mem_req_if.valid;
assign mem_req_rw = mem_req_if.rw;
assign mem_req_byteen= mem_req_if.byteen;
assign mem_req_addr = mem_req_if.addr;
assign mem_req_data = mem_req_if.data;
assign mem_req_tag = mem_req_if.tag;
assign mem_req_if.ready = mem_req_ready;
assign mem_rsp_if.valid = mem_rsp_valid;
assign mem_rsp_if.data = mem_rsp_data;
assign mem_rsp_if.tag = mem_rsp_tag;
assign mem_rsp_ready = mem_rsp_if.ready;
//--
VX_dcache_req_if #(
.NUM_REQS (`DNUM_REQS),
.WORD_SIZE (`DWORD_SIZE),
.TAG_WIDTH (`DCORE_TAG_WIDTH)
) dcache_req_if();
VX_dcache_rsp_if #(
.NUM_REQS (`DNUM_REQS),
.WORD_SIZE (`DWORD_SIZE),
.TAG_WIDTH (`DCORE_TAG_WIDTH)
) dcache_rsp_if();
VX_icache_req_if #(
.WORD_SIZE (`IWORD_SIZE),
.TAG_WIDTH (`ICORE_TAG_WIDTH)
) icache_req_if();
VX_icache_rsp_if #(
.WORD_SIZE (`IWORD_SIZE),
.TAG_WIDTH (`ICORE_TAG_WIDTH)
) icache_rsp_if();
VX_pipeline #(
.CORE_ID(CORE_ID)
) pipeline (
`SCOPE_BIND_VX_core_pipeline
`ifdef PERF_ENABLE
.perf_memsys_if (perf_memsys_if),
`endif
.clk(clk),
.reset(reset),
// Dcache core request
.dcache_req_valid (dcache_req_if.valid),
.dcache_req_rw (dcache_req_if.rw),
.dcache_req_byteen (dcache_req_if.byteen),
.dcache_req_addr (dcache_req_if.addr),
.dcache_req_data (dcache_req_if.data),
.dcache_req_tag (dcache_req_if.tag),
.dcache_req_ready (dcache_req_if.ready),
// Dcache core reponse
.dcache_rsp_valid (dcache_rsp_if.valid),
.dcache_rsp_tmask (dcache_rsp_if.tmask),
.dcache_rsp_data (dcache_rsp_if.data),
.dcache_rsp_tag (dcache_rsp_if.tag),
.dcache_rsp_ready (dcache_rsp_if.ready),
// Icache core request
.icache_req_valid (icache_req_if.valid),
.icache_req_addr (icache_req_if.addr),
.icache_req_tag (icache_req_if.tag),
.icache_req_ready (icache_req_if.ready),
// Icache core reponse
.icache_rsp_valid (icache_rsp_if.valid),
.icache_rsp_data (icache_rsp_if.data),
.icache_rsp_tag (icache_rsp_if.tag),
.icache_rsp_ready (icache_rsp_if.ready),
// Status
.busy(busy)
);
//--
VX_mem_unit #(
.CORE_ID(CORE_ID)
) mem_unit (
`SCOPE_BIND_VX_core_mem_unit
`ifdef PERF_ENABLE
.perf_memsys_if (perf_memsys_if),
`endif
.clk (clk),
.reset (reset),
// Core <-> Dcache
.dcache_req_if (dcache_req_if),
.dcache_rsp_if (dcache_rsp_if),
// Core <-> Icache
.icache_req_if (icache_req_if),
.icache_rsp_if (icache_rsp_if),
// Memory
.mem_req_if (mem_req_if),
.mem_rsp_if (mem_rsp_if)
);
endmodule

View file

@ -1,217 +0,0 @@
`include "VX_define.vh"
module VX_csr_data #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
VX_perf_memsys_if perf_memsys_if,
VX_perf_pipeline_if perf_pipeline_if,
`endif
VX_cmt_to_csr_if cmt_to_csr_if,
VX_fpu_to_csr_if fpu_to_csr_if,
input wire read_enable,
input wire[`CSR_ADDR_BITS-1:0] read_addr,
input wire[`NW_BITS-1:0] read_wid,
output wire[31:0] read_data,
input wire write_enable,
input wire[`CSR_ADDR_BITS-1:0] write_addr,
input wire[`NW_BITS-1:0] write_wid,
input wire[`CSR_WIDTH-1:0] write_data,
input wire busy
);
reg [`CSR_WIDTH-1:0] csr_satp;
reg [`CSR_WIDTH-1:0] csr_mstatus;
reg [`CSR_WIDTH-1:0] csr_medeleg;
reg [`CSR_WIDTH-1:0] csr_mideleg;
reg [`CSR_WIDTH-1:0] csr_mie;
reg [`CSR_WIDTH-1:0] csr_mtvec;
reg [`CSR_WIDTH-1:0] csr_mepc;
reg [`CSR_WIDTH-1:0] csr_pmpcfg [0:0];
reg [`CSR_WIDTH-1:0] csr_pmpaddr [0:0];
reg [63:0] csr_cycle;
reg [63:0] csr_instret;
reg [`NUM_WARPS-1:0][`FRM_BITS+`FFG_BITS-1:0] fcsr;
always @(posedge clk) begin
if (reset) begin
fcsr <= '0;
end
if (fpu_to_csr_if.write_enable) begin
fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0]
| fpu_to_csr_if.write_fflags;
end
if (write_enable) begin
case (write_addr)
`CSR_FFLAGS: fcsr[write_wid][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
`CSR_FRM: fcsr[write_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
`CSR_FCSR: fcsr[write_wid] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
`CSR_SATP: csr_satp <= write_data;
`CSR_MSTATUS: csr_mstatus <= write_data;
`CSR_MEDELEG: csr_medeleg <= write_data;
`CSR_MIDELEG: csr_mideleg <= write_data;
`CSR_MIE: csr_mie <= write_data;
`CSR_MTVEC: csr_mtvec <= write_data;
`CSR_MEPC: csr_mepc <= write_data;
`CSR_PMPCFG0: csr_pmpcfg[0] <= write_data;
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data;
default: begin
assert(~write_enable) else $error("%t: invalid CSR write address: %0h", $time, write_addr);
end
endcase
end
end
always @(posedge clk) begin
if (reset) begin
csr_cycle <= 0;
csr_instret <= 0;
end else begin
if (busy) begin
csr_cycle <= csr_cycle + 1;
end
if (cmt_to_csr_if.valid) begin
csr_instret <= csr_instret + 64'(cmt_to_csr_if.commit_size);
end
end
end
reg [31:0] read_data_r;
reg read_addr_valid_r;
always @(*) begin
read_data_r = 'x;
read_addr_valid_r = 1;
case (read_addr)
`CSR_FFLAGS : read_data_r = 32'(fcsr[read_wid][`FFG_BITS-1:0]);
`CSR_FRM : read_data_r = 32'(fcsr[read_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS]);
`CSR_FCSR : read_data_r = 32'(fcsr[read_wid]);
`CSR_WTID ,
`CSR_LTID ,
`CSR_LWID : read_data_r = 32'(read_wid);
`CSR_GTID ,
/*`CSR_MHARTID ,*/
`CSR_GWID : read_data_r = CORE_ID * `NUM_WARPS + 32'(read_wid);
`CSR_GCID : read_data_r = CORE_ID;
`CSR_NT : read_data_r = `NUM_THREADS;
`CSR_NW : read_data_r = `NUM_WARPS;
`CSR_NC : read_data_r = `NUM_CORES * `NUM_CLUSTERS;
`CSR_MCYCLE : read_data_r = csr_cycle[31:0];
`CSR_MCYCLE_H : read_data_r = 32'(csr_cycle[`PERF_CTR_BITS-1:32]);
`CSR_MINSTRET : read_data_r = csr_instret[31:0];
`CSR_MINSTRET_H : read_data_r = 32'(csr_instret[`PERF_CTR_BITS-1:32]);
`ifdef PERF_ENABLE
// PERF: pipeline
`CSR_MPM_IBUF_ST : read_data_r = perf_pipeline_if.ibf_stalls[31:0];
`CSR_MPM_IBUF_ST_H : read_data_r = 32'(perf_pipeline_if.ibf_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_SCRB_ST : read_data_r = perf_pipeline_if.scb_stalls[31:0];
`CSR_MPM_SCRB_ST_H : read_data_r = 32'(perf_pipeline_if.scb_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ALU_ST : read_data_r = perf_pipeline_if.alu_stalls[31:0];
`CSR_MPM_ALU_ST_H : read_data_r = 32'(perf_pipeline_if.alu_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_LSU_ST : read_data_r = perf_pipeline_if.lsu_stalls[31:0];
`CSR_MPM_LSU_ST_H : read_data_r = 32'(perf_pipeline_if.lsu_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_CSR_ST : read_data_r = perf_pipeline_if.csr_stalls[31:0];
`CSR_MPM_CSR_ST_H : read_data_r = 32'(perf_pipeline_if.csr_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_FPU_ST : read_data_r = perf_pipeline_if.fpu_stalls[31:0];
`CSR_MPM_FPU_ST_H : read_data_r = 32'(perf_pipeline_if.fpu_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_GPU_ST : read_data_r = perf_pipeline_if.gpu_stalls[31:0];
`CSR_MPM_GPU_ST_H : read_data_r = 32'(perf_pipeline_if.gpu_stalls[`PERF_CTR_BITS-1:32]);
// PERF: icache
`CSR_MPM_ICACHE_READS : read_data_r = perf_memsys_if.icache_reads[31:0];
`CSR_MPM_ICACHE_READS_H : read_data_r = 32'(perf_memsys_if.icache_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ICACHE_MISS_R : read_data_r = perf_memsys_if.icache_read_misses[31:0];
`CSR_MPM_ICACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.icache_read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ICACHE_PIPE_ST : read_data_r = perf_memsys_if.icache_pipe_stalls[31:0];
`CSR_MPM_ICACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.icache_pipe_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ICACHE_CRSP_ST : read_data_r = perf_memsys_if.icache_crsp_stalls[31:0];
`CSR_MPM_ICACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.icache_crsp_stalls[`PERF_CTR_BITS-1:32]);
// PERF: dcache
`CSR_MPM_DCACHE_READS : read_data_r = perf_memsys_if.dcache_reads[31:0];
`CSR_MPM_DCACHE_READS_H : read_data_r = 32'(perf_memsys_if.dcache_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_WRITES : read_data_r = perf_memsys_if.dcache_writes[31:0];
`CSR_MPM_DCACHE_WRITES_H : read_data_r = 32'(perf_memsys_if.dcache_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_MISS_R : read_data_r = perf_memsys_if.dcache_read_misses[31:0];
`CSR_MPM_DCACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.dcache_read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_MISS_W : read_data_r = perf_memsys_if.dcache_write_misses[31:0];
`CSR_MPM_DCACHE_MISS_W_H : read_data_r = 32'(perf_memsys_if.dcache_write_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_BANK_ST : read_data_r = perf_memsys_if.dcache_bank_stalls[31:0];
`CSR_MPM_DCACHE_BANK_ST_H : read_data_r = 32'(perf_memsys_if.dcache_bank_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_MSHR_ST : read_data_r = perf_memsys_if.dcache_mshr_stalls[31:0];
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_r = 32'(perf_memsys_if.dcache_mshr_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_PIPE_ST : read_data_r = perf_memsys_if.dcache_pipe_stalls[31:0];
`CSR_MPM_DCACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.dcache_pipe_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_CRSP_ST : read_data_r = perf_memsys_if.dcache_crsp_stalls[31:0];
`CSR_MPM_DCACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.dcache_crsp_stalls[`PERF_CTR_BITS-1:32]);
// PERF: smem
`CSR_MPM_SMEM_READS : read_data_r = perf_memsys_if.smem_reads[31:0];
`CSR_MPM_SMEM_READS_H : read_data_r = 32'(perf_memsys_if.smem_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_SMEM_WRITES : read_data_r = perf_memsys_if.smem_writes[31:0];
`CSR_MPM_SMEM_WRITES_H : read_data_r = 32'(perf_memsys_if.smem_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_SMEM_BANK_ST : read_data_r = perf_memsys_if.smem_bank_stalls[31:0];
`CSR_MPM_SMEM_BANK_ST_H : read_data_r = 32'(perf_memsys_if.smem_bank_stalls[`PERF_CTR_BITS-1:32]);
// PERF: MEM
`CSR_MPM_MEM_READS : read_data_r = perf_memsys_if.mem_reads[31:0];
`CSR_MPM_MEM_READS_H : read_data_r = 32'(perf_memsys_if.mem_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_MEM_WRITES : read_data_r = perf_memsys_if.mem_writes[31:0];
`CSR_MPM_MEM_WRITES_H : read_data_r = 32'(perf_memsys_if.mem_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_MEM_ST : read_data_r = perf_memsys_if.mem_stalls[31:0];
`CSR_MPM_MEM_ST_H : read_data_r = 32'(perf_memsys_if.mem_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_MEM_LAT : read_data_r = perf_memsys_if.mem_latency[31:0];
`CSR_MPM_MEM_LAT_H : read_data_r = 32'(perf_memsys_if.mem_latency[`PERF_CTR_BITS-1:32]);
// PERF: reserved
`CSR_MPM_RESERVED : read_data_r = '0;
`CSR_MPM_RESERVED_H : read_data_r = '0;
`endif
`CSR_SATP : read_data_r = 32'(csr_satp);
`CSR_MSTATUS : read_data_r = 32'(csr_mstatus);
`CSR_MISA : read_data_r = `ISA_CODE;
`CSR_MEDELEG : read_data_r = 32'(csr_medeleg);
`CSR_MIDELEG : read_data_r = 32'(csr_mideleg);
`CSR_MIE : read_data_r = 32'(csr_mie);
`CSR_MTVEC : read_data_r = 32'(csr_mtvec);
`CSR_MEPC : read_data_r = 32'(csr_mepc);
`CSR_PMPCFG0 : read_data_r = 32'(csr_pmpcfg[0]);
`CSR_PMPADDR0 : read_data_r = 32'(csr_pmpaddr[0]);
`CSR_MVENDORID : read_data_r = `VENDOR_ID;
`CSR_MARCHID : read_data_r = `ARCHITECTURE_ID;
`CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID;
default: begin
if (!((read_addr >= `CSR_MPM_BASE && read_addr < (`CSR_MPM_BASE + 32))
| (read_addr >= `CSR_MPM_BASE_H && read_addr < (`CSR_MPM_BASE_H + 32)))) begin
read_addr_valid_r = 0;
end
end
endcase
end
`RUNTIME_ASSERT(~read_enable || read_addr_valid_r, ("invalid CSR read address: %0h", read_addr))
assign read_data = read_data_r;
assign fpu_to_csr_if.read_frm = fcsr[fpu_to_csr_if.read_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS];
endmodule

View file

@ -1,127 +0,0 @@
`include "VX_define.vh"
module VX_csr_unit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
VX_perf_memsys_if perf_memsys_if,
VX_perf_pipeline_if perf_pipeline_if,
`endif
VX_cmt_to_csr_if cmt_to_csr_if,
VX_fpu_to_csr_if fpu_to_csr_if,
VX_csr_req_if csr_req_if,
VX_commit_if csr_commit_if,
input wire busy,
input wire[`NUM_WARPS-1:0] fpu_pending,
output wire[`NUM_WARPS-1:0] pending
);
wire csr_we_s1;
wire [`CSR_ADDR_BITS-1:0] csr_addr_s1;
wire [31:0] csr_read_data, csr_read_data_s1;
wire [31:0] csr_updated_data_s1;
wire write_enable = csr_commit_if.valid && csr_we_s1;
wire [31:0] csr_req_data = csr_req_if.use_imm ? 32'(csr_req_if.rs1) : csr_req_if.rs1_data;
VX_csr_data #(
.CORE_ID(CORE_ID)
) csr_data (
.clk (clk),
.reset (reset),
`ifdef PERF_ENABLE
.perf_memsys_if (perf_memsys_if),
.perf_pipeline_if (perf_pipeline_if),
`endif
.cmt_to_csr_if (cmt_to_csr_if),
.fpu_to_csr_if (fpu_to_csr_if),
.read_enable (csr_req_if.valid),
.read_addr (csr_req_if.addr),
.read_wid (csr_req_if.wid),
.read_data (csr_read_data),
.write_enable (write_enable),
.write_addr (csr_addr_s1),
.write_wid (csr_commit_if.wid),
.write_data (csr_updated_data_s1[`CSR_WIDTH-1:0]),
.busy (busy)
);
wire write_hazard = (csr_addr_s1 == csr_req_if.addr)
&& (csr_commit_if.wid == csr_req_if.wid)
&& csr_commit_if.valid;
wire [31:0] csr_read_data_qual = write_hazard ? csr_updated_data_s1 : csr_read_data;
reg [31:0] csr_updated_data;
reg csr_we_s0_unqual;
always @(*) begin
csr_we_s0_unqual = (csr_req_data != 0);
case (csr_req_if.op_type)
`CSR_RW: begin
csr_updated_data = csr_req_data;
csr_we_s0_unqual = 1;
end
`CSR_RS: begin
csr_updated_data = csr_read_data_qual | csr_req_data;
end
//`CSR_RC
default: begin
csr_updated_data = csr_read_data_qual & ~csr_req_data;
end
endcase
end
wire stall_in = fpu_pending[csr_req_if.wid];
wire csr_req_valid = csr_req_if.valid && !stall_in;
wire stall_out = ~csr_commit_if.ready && csr_commit_if.valid;
VX_pipe_register #(
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 32 + 32),
.RESETW (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (!stall_out),
.data_in ({csr_req_valid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.rd, csr_req_if.wb, csr_we_s0_unqual, csr_req_if.addr, csr_read_data_qual, csr_updated_data}),
.data_out ({csr_commit_if.valid, csr_commit_if.wid, csr_commit_if.tmask, csr_commit_if.PC, csr_commit_if.rd, csr_commit_if.wb, csr_we_s1, csr_addr_s1, csr_read_data_s1, csr_updated_data_s1})
);
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign csr_commit_if.data[i] = (csr_addr_s1 == `CSR_WTID) ? i :
(csr_addr_s1 == `CSR_LTID
|| csr_addr_s1 == `CSR_GTID) ? (csr_read_data_s1 * `NUM_THREADS + i) :
csr_read_data_s1;
end
assign csr_commit_if.eop = 1'b1;
// can accept new request?
assign csr_req_if.ready = ~(stall_out || stall_in);
// pending request
reg [`NUM_WARPS-1:0] pending_r;
always @(posedge clk) begin
if (reset) begin
pending_r <= 0;
end else begin
if (csr_commit_if.valid && csr_commit_if.ready) begin
pending_r[csr_commit_if.wid] <= 0;
end
if (csr_req_if.valid && csr_req_if.ready) begin
pending_r[csr_req_if.wid] <= 1;
end
end
end
assign pending = pending_r;
endmodule

View file

@ -1,426 +0,0 @@
`include "VX_define.vh"
`include "VX_print_instr.vh"
`ifdef EXT_F_ENABLE
`define USED_IREG(r) \
used_regs[{1'b0, r}] = 1
`define USED_FREG(r) \
r``_r[5] = 1; \
used_regs[{1'b1, r}] = 1
`else
`define USED_IREG(r) \
used_regs[r] = 1
`endif
module VX_decode #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// inputs
VX_ifetch_rsp_if ifetch_rsp_if,
// outputs
VX_decode_if decode_if,
VX_wstall_if wstall_if,
VX_join_if join_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
reg [`EX_BITS-1:0] ex_type;
reg [`OP_BITS-1:0] op_type;
reg [`MOD_BITS-1:0] op_mod;
reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r;
reg [31:0] imm;
reg use_rd, use_PC, use_imm;
reg is_join, is_wstall;
reg [`NUM_REGS-1:0] used_regs;
wire [31:0] instr = ifetch_rsp_if.data;
wire [6:0] opcode = instr[6:0];
wire [2:0] func3 = instr[14:12];
wire [6:0] func7 = instr[31:25];
wire [11:0] u_12 = instr[31:20];
wire [4:0] rd = instr[11:7];
wire [4:0] rs1 = instr[19:15];
wire [4:0] rs2 = instr[24:20];
wire [4:0] rs3 = instr[31:27];
wire [19:0] upper_imm = {func7, rs2, rs1, func3};
wire [11:0] alu_imm = (func3[0] && ~func3[1]) ? {{7{1'b0}}, rs2} : u_12;
wire [11:0] s_imm = {func7, rd};
wire [12:0] b_imm = {instr[31], instr[7], instr[30:25], instr[11:8], 1'b0};
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
wire [11:0] jalr_imm = {func7, rs2};
always @(*) begin
ex_type = 0;
op_type = 'x;
op_mod = 0;
rd_r = `NR_BITS'(rd);
rs1_r = `NR_BITS'(rs1);
rs2_r = `NR_BITS'(rs2);
rs3_r = `NR_BITS'(rs3);
imm = 'x;
use_imm = 0;
use_PC = 0;
use_rd = 0;
is_join = 0;
is_wstall = 0;
used_regs = 0;
case (opcode)
`INST_I: begin
ex_type = `EX_ALU;
case (func3)
3'h0: op_type = `OP_BITS'(`ALU_ADD);
3'h1: op_type = `OP_BITS'(`ALU_SLL);
3'h2: op_type = `OP_BITS'(`ALU_SLT);
3'h3: op_type = `OP_BITS'(`ALU_SLTU);
3'h4: op_type = `OP_BITS'(`ALU_XOR);
3'h5: op_type = (func7[5]) ? `OP_BITS'(`ALU_SRA) : `OP_BITS'(`ALU_SRL);
3'h6: op_type = `OP_BITS'(`ALU_OR);
3'h7: op_type = `OP_BITS'(`ALU_AND);
default:;
endcase
use_rd = 1;
use_imm = 1;
imm = {{20{alu_imm[11]}}, alu_imm};
`USED_IREG (rd);
`USED_IREG (rs1);
end
`INST_R: begin
ex_type = `EX_ALU;
`ifdef EXT_F_ENABLE
if (func7[0]) begin
case (func3)
3'h0: op_type = `OP_BITS'(`MUL_MUL);
3'h1: op_type = `OP_BITS'(`MUL_MULH);
3'h2: op_type = `OP_BITS'(`MUL_MULHSU);
3'h3: op_type = `OP_BITS'(`MUL_MULHU);
3'h4: op_type = `OP_BITS'(`MUL_DIV);
3'h5: op_type = `OP_BITS'(`MUL_DIVU);
3'h6: op_type = `OP_BITS'(`MUL_REM);
3'h7: op_type = `OP_BITS'(`MUL_REMU);
default:;
endcase
op_mod = 2;
end else
`endif
begin
case (func3)
3'h0: op_type = (func7[5]) ? `OP_BITS'(`ALU_SUB) : `OP_BITS'(`ALU_ADD);
3'h1: op_type = `OP_BITS'(`ALU_SLL);
3'h2: op_type = `OP_BITS'(`ALU_SLT);
3'h3: op_type = `OP_BITS'(`ALU_SLTU);
3'h4: op_type = `OP_BITS'(`ALU_XOR);
3'h5: op_type = (func7[5]) ? `OP_BITS'(`ALU_SRA) : `OP_BITS'(`ALU_SRL);
3'h6: op_type = `OP_BITS'(`ALU_OR);
3'h7: op_type = `OP_BITS'(`ALU_AND);
default:;
endcase
end
use_rd = 1;
`USED_IREG (rd);
`USED_IREG (rs1);
`USED_IREG (rs2);
end
`INST_LUI: begin
ex_type = `EX_ALU;
op_type = `OP_BITS'(`ALU_LUI);
use_rd = 1;
use_imm = 1;
imm = {upper_imm, 12'(0)};
`USED_IREG (rd);
rs1_r = 0;
end
`INST_AUIPC: begin
ex_type = `EX_ALU;
op_type = `OP_BITS'(`ALU_AUIPC);
use_rd = 1;
use_imm = 1;
use_PC = 1;
imm = {upper_imm, 12'(0)};
`USED_IREG (rd);
end
`INST_JAL: begin
ex_type = `EX_ALU;
op_type = `OP_BITS'(`BR_JAL);
op_mod = 1;
use_rd = 1;
use_imm = 1;
use_PC = 1;
is_wstall = 1;
imm = {{11{jal_imm[20]}}, jal_imm};
`USED_IREG (rd);
end
`INST_JALR: begin
ex_type = `EX_ALU;
op_type = `OP_BITS'(`BR_JALR);
op_mod = 1;
use_rd = 1;
use_imm = 1;
is_wstall = 1;
imm = {{20{jalr_imm[11]}}, jalr_imm};
`USED_IREG (rd);
`USED_IREG (rs1);
end
`INST_B: begin
ex_type = `EX_ALU;
case (func3)
3'h0: op_type = `OP_BITS'(`BR_EQ);
3'h1: op_type = `OP_BITS'(`BR_NE);
3'h4: op_type = `OP_BITS'(`BR_LT);
3'h5: op_type = `OP_BITS'(`BR_GE);
3'h6: op_type = `OP_BITS'(`BR_LTU);
3'h7: op_type = `OP_BITS'(`BR_GEU);
default:;
endcase
op_mod = 1;
use_imm = 1;
use_PC = 1;
is_wstall = 1;
imm = {{19{b_imm[12]}}, b_imm};
`USED_IREG (rs1);
`USED_IREG (rs2);
end
`INST_F: begin
ex_type = `EX_LSU;
op_mod = `MOD_BITS'(!func3[0]); // data fence
end
`INST_SYS : begin
if (func3[1:0] != 0) begin
ex_type = `EX_CSR;
op_type = `OP_BITS'(func3[1:0]);
use_rd = 1;
use_imm = func3[2];
imm = 32'(u_12); // addr
`USED_IREG (rd);
if (func3[2]) begin
rs1_r = `NR_BITS'(rs1); // imm
end else begin
`USED_IREG (rs1);
end
end else begin
ex_type = `EX_ALU;
case (u_12)
12'h000: op_type = `OP_BITS'(`BR_ECALL);
12'h001: op_type = `OP_BITS'(`BR_EBREAK);
12'h302: op_type = `OP_BITS'(`BR_MRET);
12'h102: op_type = `OP_BITS'(`BR_SRET);
12'h7B2: op_type = `OP_BITS'(`BR_DRET);
default:;
endcase
op_mod = 1;
use_rd = 1;
use_imm = 1;
use_PC = 1;
imm = 32'd4;
`USED_IREG (rd);
end
end
`ifdef EXT_F_ENABLE
`INST_FL,
`endif
`INST_L: begin
ex_type = `EX_LSU;
op_type = `OP_BITS'({1'b0, func3});
use_rd = 1;
imm = {{20{u_12[11]}}, u_12};
`ifdef EXT_F_ENABLE
if (opcode[2]) begin
`USED_FREG (rd);
end else
`endif
`USED_IREG (rd);
`USED_IREG (rs1);
end
`ifdef EXT_F_ENABLE
`INST_FS,
`endif
`INST_S: begin
ex_type = `EX_LSU;
op_type = `OP_BITS'({1'b1, func3});
imm = {{20{s_imm[11]}}, s_imm};
`USED_IREG (rs1);
`ifdef EXT_F_ENABLE
if (opcode[2]) begin
`USED_FREG (rs2);
end else
`endif
`USED_IREG (rs2);
end
`ifdef EXT_F_ENABLE
`INST_FMADD,
`INST_FMSUB,
`INST_FNMSUB,
`INST_FNMADD: begin
ex_type = `EX_FPU;
op_type = `OP_BITS'(opcode[3:0]);
op_mod = func3;
use_rd = 1;
`USED_FREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
`USED_FREG (rs3);
end
`INST_FCI: begin
ex_type = `EX_FPU;
op_mod = func3;
use_rd = 1;
case (func7)
7'h00, // FADD
7'h04, // FSUB
7'h08, // FMUL
7'h0C: begin // FDIV
op_type = `OP_BITS'(func7[3:0]);
`USED_FREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
end
7'h2C: begin
op_type = `OP_BITS'(`FPU_SQRT);
`USED_FREG (rd);
`USED_FREG (rs1);
end
7'h50: begin
op_type = `OP_BITS'(`FPU_CMP);
`USED_IREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
end
7'h60: begin
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTWUS) : `OP_BITS'(`FPU_CVTWS);
`USED_IREG (rd);
`USED_FREG (rs1);
end
7'h68: begin
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTSWU) : `OP_BITS'(`FPU_CVTSW);
`USED_FREG (rd);
`USED_IREG (rs1);
end
7'h10: begin
// FSGNJ=0, FSGNJN=1, FSGNJX=2
op_type = `OP_BITS'(`FPU_MISC);
op_mod = {1'b0, func3[1:0]};
`USED_FREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
end
7'h14: begin
// FMIN=3, FMAX=4
op_type = `OP_BITS'(`FPU_MISC);
op_mod = func3[0] ? 4 : 3;
`USED_FREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
end
7'h70: begin
if (func3[0]) begin
// FCLASS
op_type = `OP_BITS'(`FPU_CLASS);
end else begin
// FMV.X.W=5
op_type = `OP_BITS'(`FPU_MISC);
op_mod = 5;
end
`USED_IREG (rd);
`USED_FREG (rs1);
end
7'h78: begin
// FMV.W.X=6
op_type = `OP_BITS'(`FPU_MISC);
op_mod = 6;
`USED_FREG (rd);
`USED_IREG (rs1);
end
default:;
endcase
end
`endif
`INST_GPU: begin
ex_type = `EX_GPU;
case (func3)
3'h0: begin
op_type = `OP_BITS'(`GPU_TMC);
is_wstall = 1;
`USED_IREG (rs1);
end
3'h1: begin
op_type = `OP_BITS'(`GPU_WSPAWN);
`USED_IREG (rs1);
`USED_IREG (rs2);
end
3'h2: begin
op_type = `OP_BITS'(`GPU_SPLIT);
is_wstall = 1;
`USED_IREG (rs1);
end
3'h3: begin
op_type = `OP_BITS'(`GPU_JOIN);
is_join = 1;
end
3'h4: begin
op_type = `OP_BITS'(`GPU_BAR);
is_wstall = 1;
`USED_IREG (rs1);
`USED_IREG (rs2);
end
default:;
endcase
end
default:;
endcase
end
// disable write to integer register r0
wire wb = use_rd && (| rd_r);
assign decode_if.valid = ifetch_rsp_if.valid;
assign decode_if.wid = ifetch_rsp_if.wid;
assign decode_if.tmask = ifetch_rsp_if.tmask;
assign decode_if.PC = ifetch_rsp_if.PC;
assign decode_if.ex_type = ex_type;
assign decode_if.op_type = op_type;
assign decode_if.op_mod = op_mod;
assign decode_if.wb = wb;
assign decode_if.rd = rd_r;
assign decode_if.rs1 = rs1_r;
assign decode_if.rs2 = rs2_r;
assign decode_if.rs3 = rs3_r;
assign decode_if.imm = imm;
assign decode_if.use_PC = use_PC;
assign decode_if.use_imm = use_imm;
assign decode_if.used_regs = used_regs;
///////////////////////////////////////////////////////////////////////////
wire ifetch_rsp_fire = ifetch_rsp_if.valid && ifetch_rsp_if.ready;
assign join_if.valid = ifetch_rsp_fire && is_join;
assign join_if.wid = ifetch_rsp_if.wid;
assign wstall_if.valid = ifetch_rsp_fire && is_wstall;
assign wstall_if.wid = ifetch_rsp_if.wid;
assign ifetch_rsp_if.ready = decode_if.ready;
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (decode_if.valid && decode_if.ready) begin
$write("%t: core%0d-decode: wid=%0d, PC=%0h, ex=", $time, CORE_ID, decode_if.wid, decode_if.PC);
print_ex_type(decode_if.ex_type);
$write(", op=");
print_ex_op(decode_if.ex_type, decode_if.op_type, decode_if.op_mod);
$write(", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b, use_regs=%b\n", decode_if.op_mod, decode_if.tmask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.use_PC, decode_if.use_imm, decode_if.used_regs);
end
end
`endif
endmodule

View file

@ -1,35 +1,90 @@
`ifndef VX_DEFINE
`define VX_DEFINE
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`ifndef VX_DEFINE_VH
`define VX_DEFINE_VH
`include "VX_platform.vh"
`include "VX_config.vh"
`include "VX_types.vh"
///////////////////////////////////////////////////////////////////////////////
`define NW_BITS `LOG2UP(`NUM_WARPS)
`define NW_BITS `CLOG2(`NUM_WARPS)
`define NC_WIDTH `UP(`NC_BITS)
`define NT_BITS `LOG2UP(`NUM_THREADS)
`define NT_BITS `CLOG2(`NUM_THREADS)
`define NW_WIDTH `UP(`NW_BITS)
`define NC_BITS `LOG2UP(`NUM_CORES)
`define NC_BITS `CLOG2(`NUM_CORES)
`define NT_WIDTH `UP(`NT_BITS)
`define NB_BITS `LOG2UP(`NUM_BARRIERS)
`define NB_BITS `CLOG2(`NUM_BARRIERS)
`define NB_WIDTH `UP(`NB_BITS)
`define REQS_BITS `LOG2UP(NUM_REQS)
`define NUM_IREGS 32
`define NRI_BITS `CLOG2(`NUM_IREGS)
`ifdef EXT_F_ENABLE
`define NUM_REGS 64
`define NUM_REGS (2 * `NUM_IREGS)
`else
`define NUM_REGS 32
`define NUM_REGS `NUM_IREGS
`endif
`define NR_BITS `LOG2UP(`NUM_REGS)
`define NR_BITS `CLOG2(`NUM_REGS)
`define CSR_ADDR_BITS 12
`define CSR_WIDTH 12
`define DV_STACK_SIZE `UP(`NUM_THREADS-1)
`define DV_STACK_SIZEW `UP(`CLOG2(`DV_STACK_SIZE))
`define PERF_CTR_BITS 44
`ifndef NDEBUG
`define UUID_ENABLE
`define UUID_WIDTH 44
`else
`ifdef SCOPE
`define UUID_ENABLE
`define UUID_WIDTH 44
`else
`define UUID_WIDTH 1
`endif
`endif
`define PC_BITS (`XLEN-1)
`define OFFSET_BITS 12
`define IMM_BITS `XLEN
`define NUM_SOCKETS `UP(`NUM_CORES / `SOCKET_SIZE)
///////////////////////////////////////////////////////////////////////////////
`define EX_ALU 0
`define EX_LSU 1
`define EX_SFU 2
`define EX_FPU (`EX_SFU + `EXT_F_ENABLED)
`define NUM_EX_UNITS (3 + `EXT_F_ENABLED)
`define EX_BITS `CLOG2(`NUM_EX_UNITS)
`define EX_WIDTH `UP(`EX_BITS)
`define SFU_CSRS 0
`define SFU_WCTL 1
`define NUM_SFU_UNITS (2)
`define SFU_BITS `CLOG2(`NUM_SFU_UNITS)
`define SFU_WIDTH `UP(`SFU_BITS)
///////////////////////////////////////////////////////////////////////////////
`define INST_LUI 7'b0110111
@ -41,374 +96,424 @@
`define INST_S 7'b0100011 // store instructions
`define INST_I 7'b0010011 // immediate instructions
`define INST_R 7'b0110011 // register instructions
`define INST_F 7'b0001111 // Fence instructions
`define INST_FENCE 7'b0001111 // Fence instructions
`define INST_SYS 7'b1110011 // system instructions
// RV64I instruction specific opcodes (for any W instruction)
`define INST_I_W 7'b0011011 // W type immediate instructions
`define INST_R_W 7'b0111011 // W type register instructions
`define INST_FL 7'b0000111 // float load instruction
`define INST_FS 7'b0100111 // float store instruction
`define INST_FMADD 7'b1000011
`define INST_FMADD 7'b1000011
`define INST_FMSUB 7'b1000111
`define INST_FNMSUB 7'b1001011
`define INST_FNMADD 7'b1001111
`define INST_FNMADD 7'b1001111
`define INST_FCI 7'b1010011 // float common instructions
`define INST_GPU 7'b1101011
// Custom extension opcodes
`define INST_EXT1 7'b0001011 // 0x0B
`define INST_EXT2 7'b0101011 // 0x2B
`define INST_EXT3 7'b1011011 // 0x5B
`define INST_EXT4 7'b1111011 // 0x7B
// Opcode extensions
`define INST_R_F7_MUL 7'b0000001
`define INST_R_F7_ZICOND 7'b0000111
///////////////////////////////////////////////////////////////////////////////
`define FRM_RNE 3'b000 // round to nearest even
`define FRM_RTZ 3'b001 // round to zero
`define FRM_RDN 3'b010 // round to -inf
`define FRM_RUP 3'b011 // round to +inf
`define FRM_RMM 3'b100 // round to nearest max magnitude
`define FRM_DYN 3'b111 // dynamic mode
`define FRM_BITS 3
`define INST_FRM_RNE 3'b000 // round to nearest even
`define INST_FRM_RTZ 3'b001 // round to zero
`define INST_FRM_RDN 3'b010 // round to -inf
`define INST_FRM_RUP 3'b011 // round to +inf
`define INST_FRM_RMM 3'b100 // round to nearest max magnitude
`define INST_FRM_DYN 3'b111 // dynamic mode
`define INST_FRM_BITS 3
///////////////////////////////////////////////////////////////////////////////
`define EX_NOP 3'h0
`define EX_ALU 3'h1
`define EX_LSU 3'h2
`define EX_CSR 3'h3
`define EX_FPU 3'h4
`define EX_GPU 3'h5
`define EX_BITS 3
`define NUM_EXS 6
`define NE_BITS `LOG2UP(`NUM_EXS)
`define INST_OP_BITS 4
`define INST_ARGS_BITS $bits(op_args_t)
`define INST_FMT_BITS 2
///////////////////////////////////////////////////////////////////////////////
`define OP_BITS 4
`define MOD_BITS 3
`define INST_ALU_ADD 4'b0000
//`define INST_ALU_UNUSED 4'b0001
`define INST_ALU_LUI 4'b0010
`define INST_ALU_AUIPC 4'b0011
`define INST_ALU_SLTU 4'b0100
`define INST_ALU_SLT 4'b0101
//`define INST_ALU_UNUSED 4'b0110
`define INST_ALU_SUB 4'b0111
`define INST_ALU_SRL 4'b1000
`define INST_ALU_SRA 4'b1001
`define INST_ALU_CZEQ 4'b1010
`define INST_ALU_CZNE 4'b1011
`define INST_ALU_AND 4'b1100
`define INST_ALU_OR 4'b1101
`define INST_ALU_XOR 4'b1110
`define INST_ALU_SLL 4'b1111
`define ALU_ADD 4'b0000
`define ALU_LUI 4'b0010
`define ALU_AUIPC 4'b0011
`define ALU_SLTU 4'b0100
`define ALU_SLT 4'b0101
`define ALU_SRL 4'b1000
`define ALU_SRA 4'b1001
`define ALU_SUB 4'b1011
`define ALU_AND 4'b1100
`define ALU_OR 4'b1101
`define ALU_XOR 4'b1110
`define ALU_SLL 4'b1111
`define ALU_OTHER 4'b0111
`define ALU_BITS 4
`define ALU_OP(x) x[`ALU_BITS-1:0]
`define ALU_OP_CLASS(x) x[3:2]
`define ALU_SIGNED(x) x[0]
`define ALU_IS_BR(x) x[0]
`define ALU_IS_MUL(x) x[1]
`define BR_EQ 4'b0000
`define BR_NE 4'b0010
`define BR_LTU 4'b0100
`define BR_GEU 4'b0110
`define BR_LT 4'b0101
`define BR_GE 4'b0111
`define BR_JAL 4'b1000
`define BR_JALR 4'b1001
`define BR_ECALL 4'b1010
`define BR_EBREAK 4'b1011
`define BR_MRET 4'b1100
`define BR_SRET 4'b1101
`define BR_DRET 4'b1110
`define BR_OTHER 4'b1111
`define BR_BITS 4
`define BR_OP(x) x[`BR_BITS-1:0]
`define BR_NEG(x) x[1]
`define BR_LESS(x) x[2]
`define BR_STATIC(x) x[3]
`define ALU_TYPE_BITS 2
`define ALU_TYPE_ARITH 0
`define ALU_TYPE_BRANCH 1
`define ALU_TYPE_MULDIV 2
`define ALU_TYPE_OTHER 3
`define MUL_MUL 3'h0
`define MUL_MULH 3'h1
`define MUL_MULHSU 3'h2
`define MUL_MULHU 3'h3
`define MUL_DIV 3'h4
`define MUL_DIVU 3'h5
`define MUL_REM 3'h6
`define MUL_REMU 3'h7
`define MUL_BITS 3
`define MUL_OP(x) x[`MUL_BITS-1:0]
`define MUL_IS_DIV(x) x[2]
`define INST_ALU_BITS 4
`define INST_ALU_CLASS(op) op[3:2]
`define INST_ALU_SIGNED(op) op[0]
`define INST_ALU_IS_SUB(op) op[1]
`define INST_ALU_IS_CZERO(op) (op[3:1] == 3'b101)
`define FMT_B 3'b000
`define FMT_H 3'b001
`define FMT_W 3'b010
`define FMT_BU 3'b100
`define FMT_HU 3'b101
`define INST_BR_EQ 4'b0000
`define INST_BR_NE 4'b0010
`define INST_BR_LTU 4'b0100
`define INST_BR_GEU 4'b0110
`define INST_BR_LT 4'b0101
`define INST_BR_GE 4'b0111
`define INST_BR_JAL 4'b1000
`define INST_BR_JALR 4'b1001
`define INST_BR_ECALL 4'b1010
`define INST_BR_EBREAK 4'b1011
`define INST_BR_URET 4'b1100
`define INST_BR_SRET 4'b1101
`define INST_BR_MRET 4'b1110
`define INST_BR_OTHER 4'b1111
`define INST_BR_BITS 4
`define INST_BR_CLASS(op) {1'b0, ~op[3]}
`define INST_BR_IS_NEG(op) op[1]
`define INST_BR_IS_LESS(op) op[2]
`define INST_BR_IS_STATIC(op) op[3]
`define LSU_LB 4'b0000
`define LSU_LH 4'b0001
`define LSU_LW 4'b0010
`define LSU_LBU 4'b0100
`define LSU_LHU 4'b0101
`define LSU_SB 4'b1000
`define LSU_SH 4'b1001
`define LSU_SW 4'b1010
`define LSU_BITS 4
`define LSU_FMT(x) x[2:0]
`define LSU_WSIZE(x) x[1:0]
`define LSU_OP(x) x[`LSU_BITS-1:0]
`define LSU_IS_FENCE(x) x[0]
`define INST_M_MUL 3'b000
`define INST_M_MULHU 3'b001
`define INST_M_MULH 3'b010
`define INST_M_MULHSU 3'b011
`define INST_M_DIV 3'b100
`define INST_M_DIVU 3'b101
`define INST_M_REM 3'b110
`define INST_M_REMU 3'b111
`define INST_M_BITS 3
`define INST_M_SIGNED(op) (~op[0])
`define INST_M_IS_MULX(op) (~op[2])
`define INST_M_IS_MULH(op) (op[1:0] != 0)
`define INST_M_SIGNED_A(op) (op[1:0] != 1)
`define INST_M_IS_REM(op) op[1]
`define CSR_RW 2'h1
`define CSR_RS 2'h2
`define CSR_RC 2'h3
`define CSR_OTHER 2'h0
`define CSR_BITS 2
`define CSR_OP(x) x[`CSR_BITS-1:0]
`define INST_FMT_B 3'b000
`define INST_FMT_H 3'b001
`define INST_FMT_W 3'b010
`define INST_FMT_D 3'b011
`define INST_FMT_BU 3'b100
`define INST_FMT_HU 3'b101
`define INST_FMT_WU 3'b110
`define FPU_ADD 4'h0
`define FPU_SUB 4'h4
`define FPU_MUL 4'h8
`define FPU_DIV 4'hC
`define FPU_CVTWS 4'h1 // FCVT.W.S
`define FPU_CVTWUS 4'h5 // FCVT.WU.S
`define FPU_CVTSW 4'h9 // FCVT.S.W
`define FPU_CVTSWU 4'hD // FCVT.S.WU
`define FPU_SQRT 4'h2
`define FPU_CLASS 4'h6
`define FPU_CMP 4'hA
`define FPU_MISC 4'hE // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
`define FPU_MADD 4'h3
`define FPU_MSUB 4'h7
`define FPU_NMSUB 4'hB
`define FPU_NMADD 4'hF
`define FPU_BITS 4
`define FPU_OP(x) x[`FPU_BITS-1:0]
`define INST_LSU_LB 4'b0000
`define INST_LSU_LH 4'b0001
`define INST_LSU_LW 4'b0010
`define INST_LSU_LD 4'b0011 // new for RV64I LD
`define INST_LSU_LBU 4'b0100
`define INST_LSU_LHU 4'b0101
`define INST_LSU_LWU 4'b0110 // new for RV64I LWU
`define INST_LSU_SB 4'b1000
`define INST_LSU_SH 4'b1001
`define INST_LSU_SW 4'b1010
`define INST_LSU_SD 4'b1011 // new for RV64I SD
`define INST_LSU_FENCE 4'b1111
`define INST_LSU_BITS 4
`define INST_LSU_FMT(op) op[2:0]
`define INST_LSU_WSIZE(op) op[1:0]
`define INST_LSU_IS_FENCE(op) (op[3:2] == 3)
`define GPU_TMC 3'h0
`define GPU_WSPAWN 3'h1
`define GPU_SPLIT 3'h2
`define GPU_JOIN 3'h3
`define GPU_BAR 3'h4
`define GPU_OTHER 3'h7
`define GPU_BITS 3
`define GPU_OP(x) x[`GPU_BITS-1:0]
`define INST_FENCE_BITS 1
`define INST_FENCE_D 1'h0
`define INST_FENCE_I 1'h1
`define INST_FPU_ADD 4'b0000 // SUB=fmt[1]
`define INST_FPU_MUL 4'b0001
`define INST_FPU_MADD 4'b0010 // SUB=fmt[1]
`define INST_FPU_NMADD 4'b0011 // SUB=fmt[1]
`define INST_FPU_DIV 4'b0100
`define INST_FPU_SQRT 4'b0101
`define INST_FPU_F2I 4'b1000 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
`define INST_FPU_F2U 4'b1001 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
`define INST_FPU_I2F 4'b1010 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
`define INST_FPU_U2F 4'b1011 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1
`define INST_FPU_CMP 4'b1100 // frm: LE=0, LT=1, EQ=2
`define INST_FPU_F2F 4'b1101 // fmt[0]: F32=0, F64=1
`define INST_FPU_MISC 4'b1110 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7
`define INST_FPU_BITS 4
`define INST_FPU_IS_CLASS(op, frm) (op == `INST_FPU_MISC && frm == 3)
`define INST_FPU_IS_MVXW(op, frm) (op == `INST_FPU_MISC && frm == 4)
`define INST_SFU_TMC 4'h0
`define INST_SFU_WSPAWN 4'h1
`define INST_SFU_SPLIT 4'h2
`define INST_SFU_JOIN 4'h3
`define INST_SFU_BAR 4'h4
`define INST_SFU_PRED 4'h5
`define INST_SFU_CSRRW 4'h6
`define INST_SFU_CSRRS 4'h7
`define INST_SFU_CSRRC 4'h8
`define INST_SFU_BITS 4
`define INST_SFU_CSR(f3) (4'h6 + 4'(f3) - 4'h1)
`define INST_SFU_IS_WCTL(op) (op <= 5)
`define INST_SFU_IS_CSR(op) (op >= 6 && op <= 8)
///////////////////////////////////////////////////////////////////////////////
`ifdef EXT_M_ENABLE
`define ISA_EXT_M (1 << 12)
`else
`define ISA_EXT_M 0
`define ARB_SEL_BITS(I, O) ((I > O) ? `CLOG2(`CDIV(I, O)) : 0)
///////////////////////////////////////////////////////////////////////////////
`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, mem_ports, uuid_width) \
(uuid_width + `CLOG2(mshr_size) + `CLOG2(`CDIV(num_banks, mem_ports)))
`define CACHE_BYPASS_TAG_WIDTH(num_reqs, mem_ports, line_size, word_size, tag_width) \
(`CLOG2(`CDIV(num_reqs, mem_ports)) + `CLOG2(line_size / word_size) + tag_width)
`define CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, mem_ports, line_size, word_size, tag_width, uuid_width) \
(`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, mem_ports, uuid_width), `CACHE_BYPASS_TAG_WIDTH(num_reqs, mem_ports, line_size, word_size, tag_width)) + 1)
///////////////////////////////////////////////////////////////////////////////
`define CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches) \
(tag_width + `ARB_SEL_BITS(num_inputs, `UP(num_caches)))
`define CACHE_CLUSTER_MEM_ARB_TAG(tag_width, num_caches) \
(tag_width + `ARB_SEL_BITS(`UP(num_caches), 1))
`define CACHE_CLUSTER_MEM_TAG_WIDTH(mshr_size, num_banks, mem_ports, num_caches, uuid_width) \
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, mem_ports, uuid_width), num_caches)
`define CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(num_reqs, mem_ports, line_size, word_size, tag_width, num_inputs, num_caches) \
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_BYPASS_TAG_WIDTH(num_reqs, mem_ports, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches)
`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, mem_ports, line_size, word_size, tag_width, num_inputs, num_caches, uuid_width) \
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, mem_ports, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches), uuid_width), num_caches)
///////////////////////////////////////////////////////////////////////////////
`ifdef ICACHE_ENABLE
`define L1_ENABLE
`endif
`ifdef EXT_F_ENABLE
`define ISA_EXT_F (1 << 5)
`else
`define ISA_EXT_F 0
`ifdef DCACHE_ENABLE
`define L1_ENABLE
`endif
`define ISA_CODE (0 << 0) // A - Atomic Instructions extension \
| (0 << 1) // B - Tentatively reserved for Bit operations extension \
| (0 << 2) // C - Compressed extension \
| (0 << 3) // D - Double precsision floating-point extension \
| (0 << 4) // E - RV32E base ISA \
|`ISA_EXT_F // F - Single precsision floating-point extension \
| (0 << 6) // G - Additional standard extensions present \
| (0 << 7) // H - Hypervisor mode implemented \
| (1 << 8) // I - RV32I/64I/128I base ISA \
| (0 << 9) // J - Reserved \
| (0 << 10) // K - Reserved \
| (0 << 11) // L - Tentatively reserved for Bit operations extension \
|`ISA_EXT_M // M - Integer Multiply/Divide extension \
| (0 << 13) // N - User level interrupts supported \
| (0 << 14) // O - Reserved \
| (0 << 15) // P - Tentatively reserved for Packed-SIMD extension \
| (0 << 16) // Q - Quad-precision floating-point extension \
| (0 << 17) // R - Reserved \
| (0 << 18) // S - Supervisor mode implemented \
| (0 << 19) // T - Tentatively reserved for Transactional Memory extension \
| (1 << 20) // U - User mode implemented \
| (0 << 21) // V - Tentatively reserved for Vector extension \
| (0 << 22) // W - Reserved \
| (1 << 23) // X - Non-standard extensions present \
| (0 << 24) // Y - Reserved \
| (0 << 25) // Z - Reserved
`define MEM_REQ_FLAG_FLUSH 0
`define MEM_REQ_FLAG_IO 1
`define MEM_REQ_FLAG_LOCAL 2 // shoud be last since optional
`define MEM_REQ_FLAGS_WIDTH (`MEM_REQ_FLAG_LOCAL + `LMEM_ENABLED)
`define VX_MEM_PORTS `L3_MEM_PORTS
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
`define VX_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH - `CLOG2(`L3_LINE_SIZE))
`define VX_MEM_DATA_WIDTH (`L3_LINE_SIZE * 8)
`define VX_MEM_TAG_WIDTH L3_MEM_TAG_WIDTH
`define VX_DCR_ADDR_WIDTH `VX_DCR_ADDR_BITS
`define VX_DCR_DATA_WIDTH 32
`define TO_FULL_ADDR(x) {x, (`MEM_ADDR_WIDTH-$bits(x))'(0)}
///////////////////////////////////////////////////////////////////////////////
`ifdef DBG_CACHE_REQ_INFO // wid PC
`define DBG_CACHE_REQ_MDATAW (`NW_BITS + 32)
`else
`define DBG_CACHE_REQ_MDATAW 0
`endif
`define NEG_EDGE(dst, src) \
VX_edge_trigger #( \
.POS (0), \
.INIT (0) \
) __neg_edge`__LINE__ ( \
.clk (clk), \
.reset (1'b0), \
.data_in (src), \
.data_out (dst) \
)
// non-cacheable address bit
`define NC_ADDR_BITS 1
`define BUFFER_EX(dst, src, ena, resetw, latency) \
VX_pipe_register #( \
.DATAW ($bits(dst)), \
.RESETW (resetw), \
.DEPTH (latency) \
) __buffer_ex`__LINE__ ( \
.clk (clk), \
.reset (reset), \
.enable (ena), \
.data_in (src), \
.data_out (dst) \
)
////////////////////////// Icache Configurable Knobs //////////////////////////
`define BUFFER(dst, src) `BUFFER_EX(dst, src, 1'b1, $bits(dst), 1)
// Cache ID
`define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
`define POP_COUNT_EX(out, in, model) \
VX_popcount #( \
.N ($bits(in)), \
.MODEL (model) \
) __pop_count_ex`__LINE__ ( \
.data_in (in), \
.data_out (out) \
)
// Block size in bytes
`define ICACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `MEM_BLOCK_SIZE)
`define POP_COUNT(out, in) `POP_COUNT_EX(out, in, 1)
// Word size in bytes
`define IWORD_SIZE 4
`define ASSIGN_VX_IF(dst, src) \
assign dst.valid = src.valid; \
assign dst.data = src.data; \
assign src.ready = dst.ready
// Number of banks
`define INUM_BANKS 1
`define ASSIGN_VX_MEM_BUS_IF(dst, src) \
assign dst.req_valid = src.req_valid; \
assign dst.req_data = src.req_data; \
assign src.req_ready = dst.req_ready; \
assign src.rsp_valid = dst.rsp_valid; \
assign src.rsp_data = dst.rsp_data; \
assign dst.rsp_ready = src.rsp_ready
// Core request address bits
`define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE))
`define ASSIGN_VX_MEM_BUS_RO_IF(dst, src) \
assign dst.req_valid = src.req_valid; \
assign dst.req_data.rw = 0; \
assign dst.req_data.addr = src.req_data.addr; \
assign dst.req_data.data = '0; \
assign dst.req_data.byteen = '1; \
assign dst.req_data.flags = src.req_data.flags; \
assign dst.req_data.tag = src.req_data.tag; \
assign src.req_ready = dst.req_ready; \
assign src.rsp_valid = dst.rsp_valid; \
assign src.rsp_data.data = dst.rsp_data.data; \
assign src.rsp_data.tag = dst.rsp_data.tag; \
assign dst.rsp_ready = src.rsp_ready
// Core request byte enable bits
`define ICORE_BYTEEN_WIDTH `DWORD_SIZE
`define ASSIGN_VX_MEM_BUS_IF_EX(dst, src, TD, TS, UUID) \
assign dst.req_valid = src.req_valid; \
assign dst.req_data.rw = src.req_data.rw; \
assign dst.req_data.addr = src.req_data.addr; \
assign dst.req_data.data = src.req_data.data; \
assign dst.req_data.byteen = src.req_data.byteen; \
assign dst.req_data.flags = src.req_data.flags; \
/* verilator lint_off GENUNNAMED */ \
if (TD != TS) begin \
if (UUID != 0) begin \
if (TD > TS) begin \
assign dst.req_data.tag = {src.req_data.tag.uuid, {(TD-TS){1'b0}}, src.req_data.tag.value}; \
end else begin \
assign dst.req_data.tag = {src.req_data.tag.uuid, src.req_data.tag.value[TD-UUID-1:0]}; \
end \
end else begin \
if (TD > TS) begin \
assign dst.req_data.tag = {{(TD-TS){1'b0}}, src.req_data.tag}; \
end else begin \
assign dst.req_data.tag = src.req_data.tag[TD-1:0]; \
end \
end \
end else begin \
assign dst.req_data.tag = src.req_data.tag; \
end \
/* verilator lint_on GENUNNAMED */ \
assign src.req_ready = dst.req_ready; \
assign src.rsp_valid = dst.rsp_valid; \
assign src.rsp_data.data = dst.rsp_data.data; \
/* verilator lint_off GENUNNAMED */ \
if (TD != TS) begin \
if (UUID != 0) begin \
if (TD > TS) begin \
assign src.rsp_data.tag = {dst.rsp_data.tag.uuid, dst.rsp_data.tag.value[TS-UUID-1:0]}; \
end else begin \
assign src.rsp_data.tag = {dst.rsp_data.tag.uuid, {(TS-TD){1'b0}}, dst.rsp_data.tag.value}; \
end \
end else begin \
if (TD > TS) begin \
assign src.rsp_data.tag = dst.rsp_data.tag[TS-1:0]; \
end else begin \
assign src.rsp_data.tag = {{(TS-TD){1'b0}}, dst.rsp_data.tag}; \
end \
end \
end else begin \
assign src.rsp_data.tag = dst.rsp_data.tag; \
end \
/* verilator lint_on GENUNNAMED */ \
assign dst.rsp_ready = src.rsp_ready
// TAG sharing enable
`define ICORE_TAG_ID_BITS `NW_BITS
`define INIT_VX_MEM_BUS_IF(itf) \
assign itf.req_valid = 0; \
assign itf.req_data = '0; \
`UNUSED_VAR (itf.req_ready) \
`UNUSED_VAR (itf.rsp_valid) \
`UNUSED_VAR (itf.rsp_data) \
assign itf.rsp_ready = 0;
// Core request tag bits
`define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
`define UNUSED_VX_MEM_BUS_IF(itf) \
`UNUSED_VAR (itf.req_valid) \
`UNUSED_VAR (itf.req_data) \
assign itf.req_ready = 0; \
assign itf.rsp_valid = 0; \
assign itf.rsp_data = '0; \
`UNUSED_VAR (itf.rsp_ready)
// Memory request data bits
`define IMEM_DATA_WIDTH (`ICACHE_LINE_SIZE * 8)
// Memory request address bits
`define IMEM_ADDR_WIDTH (32 - `CLOG2(`ICACHE_LINE_SIZE))
`define BUFFER_DCR_BUS_IF(dst, src, ena, latency) \
/* verilator lint_off GENUNNAMED */ \
if (latency != 0) begin \
VX_pipe_register #( \
.DATAW (1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH), \
.DEPTH (latency) \
) pipe_reg ( \
.clk (clk), \
.reset (1'b0), \
.enable (1'b1), \
.data_in ({src.write_valid && ena, src.write_addr, src.write_data}), \
.data_out ({dst.write_valid, dst.write_addr, dst.write_data}) \
); \
end else begin \
assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid && ena, src.write_addr, src.write_data}; \
end \
/* verilator lint_on GENUNNAMED */
// Memory byte enable bits
`define IMEM_BYTEEN_WIDTH `ICACHE_LINE_SIZE
`define PERF_COUNTER_ADD(dst, src, field, width, count, reg_enable) \
/* verilator lint_off GENUNNAMED */ \
if (count > 1) begin \
wire [count-1:0][width-1:0] __reduce_add_i_field; \
wire [width-1:0] __reduce_add_o_field; \
for (genvar __i = 0; __i < count; ++__i) begin \
assign __reduce_add_i_field[__i] = src[__i].``field; \
end \
VX_reduce_tree #(.DATAW_IN(width), .N(count), .OP("+")) __reduce_add_field ( \
__reduce_add_i_field, \
__reduce_add_o_field \
); \
if (reg_enable) begin \
reg [width-1:0] __reduce_add_r_field; \
always @(posedge clk) begin \
if (reset) begin \
__reduce_add_r_field <= '0; \
end else begin \
__reduce_add_r_field <= __reduce_add_o_field; \
end \
end \
assign dst.``field = __reduce_add_r_field; \
end else begin \
assign dst.``field = __reduce_add_o_field; \
end \
end else begin \
assign dst.``field = src[0].``field; \
end \
/* verilator lint_on GENUNNAMED */
// Memory request tag bits
`define IMEM_TAG_WIDTH `IMEM_ADDR_WIDTH
`define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \
/* verilator lint_off GENUNNAMED */ \
if (block_size != 1) begin \
if (block_size != `NUM_WARPS) begin \
assign dst = {src[`NW_WIDTH-1:`CLOG2(block_size)], `CLOG2(block_size)'(block_idx)}; \
end else begin \
assign dst = `NW_WIDTH'(block_idx); \
end \
end else begin \
assign dst = src; \
end \
/* verilator lint_on GENUNNAMED */
////////////////////////// Dcache Configurable Knobs //////////////////////////
// Cache ID
`define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
// Block size in bytes
`define DCACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `MEM_BLOCK_SIZE)
// Word size in bytes
`define DWORD_SIZE 4
// Core request address bits
`define DCORE_ADDR_WIDTH (32-`CLOG2(`DWORD_SIZE))
// TAG sharing enable
`define LSUQ_ADDR_BITS `LOG2UP(`LSUQ_SIZE)
`define DCORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_ADDR_BITS + `SM_ENABLE)
// Input request tag bits
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
// Memory request data bits
`define DMEM_DATA_WIDTH (`DCACHE_LINE_SIZE * 8)
// Memory request address bits
`define DMEM_ADDR_WIDTH (32 - `CLOG2(`DCACHE_LINE_SIZE))
// Memory byte enable bits
`define DMEM_BYTEEN_WIDTH `DCACHE_LINE_SIZE
// Input request size
`define DNUM_REQS `NUM_THREADS
// Memory request tag bits
`define _DMEM_ADDR_RATIO_W $clog2(`DCACHE_LINE_SIZE / `DWORD_SIZE)
`define _DNC_MEM_TAG_WIDTH ($clog2(`DNUM_REQS) + `_DMEM_ADDR_RATIO_W + `DCORE_TAG_WIDTH)
`define DMEM_TAG_WIDTH `MAX((`DMEM_ADDR_WIDTH + `NC_ADDR_BITS), `_DNC_MEM_TAG_WIDTH)
////////////////////////// SM Configurable Knobs //////////////////////////////
// Cache ID
`define SCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2)
// Word size in bytes
`define SWORD_SIZE 4
// bank address offset
`define SBANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SWORD_SIZE)
// Input request size
`define SNUM_REQS `NUM_THREADS
////////////////////////// L2cache Configurable Knobs /////////////////////////
// Cache ID
`define L2CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID)
// Block size in bytes
`define L2CACHE_LINE_SIZE `MEM_BLOCK_SIZE
// Word size in bytes
`define L2WORD_SIZE `DCACHE_LINE_SIZE
// Input request tag bits
`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
// Memory request data bits
`define L2MEM_DATA_WIDTH (`L2CACHE_LINE_SIZE * 8)
// Memory request address bits
`define L2MEM_ADDR_WIDTH (32 - `CLOG2(`L2CACHE_LINE_SIZE))
// Memory byte enable bits
`define L2MEM_BYTEEN_WIDTH `L2CACHE_LINE_SIZE
// Input request size
`define L2NUM_REQS `NUM_CORES
// Memory request tag bits
`define _L2MEM_ADDR_RATIO_W $clog2(`L2CACHE_LINE_SIZE / `L2WORD_SIZE)
`define _L2NC_MEM_TAG_WIDTH ($clog2(`L2NUM_REQS) + `_L2MEM_ADDR_RATIO_W + `XMEM_TAG_WIDTH)
`define _L2MEM_TAG_WIDTH `MAX((`L2MEM_ADDR_WIDTH + `NC_ADDR_BITS), `_L2NC_MEM_TAG_WIDTH)
`define L2MEM_TAG_WIDTH (`L2_ENABLE ? `_L2MEM_TAG_WIDTH : (`XMEM_TAG_WIDTH + `CLOG2(`L2NUM_REQS)))
////////////////////////// L3cache Configurable Knobs /////////////////////////
// Cache ID
`define L3CACHE_ID 0
// Block size in bytes
`define L3CACHE_LINE_SIZE `MEM_BLOCK_SIZE
// Word size in bytes
`define L3WORD_SIZE `L2CACHE_LINE_SIZE
// Input request tag bits
`define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
// Memory request data bits
`define L3MEM_DATA_WIDTH (`L3CACHE_LINE_SIZE * 8)
// Memory request address bits
`define L3MEM_ADDR_WIDTH (32 - `CLOG2(`L3CACHE_LINE_SIZE))
// Memory byte enable bits
`define L3MEM_BYTEEN_WIDTH `L3CACHE_LINE_SIZE
// Input request size
`define L3NUM_REQS `NUM_CLUSTERS
// Memory request tag bits
`define _L3MEM_ADDR_RATIO_W $clog2(`L3CACHE_LINE_SIZE / `L3WORD_SIZE)
`define _L3NC_MEM_TAG_WIDTH ($clog2(`L3NUM_REQS) + `_L3MEM_ADDR_RATIO_W + `L2MEM_TAG_WIDTH)
`define _L3MEM_TAG_WIDTH `MAX((`L3MEM_ADDR_WIDTH + `NC_ADDR_BITS), `_L3NC_MEM_TAG_WIDTH)
`define L3MEM_TAG_WIDTH (`L3_ENABLE ? `_L3MEM_TAG_WIDTH : (`L2MEM_TAG_WIDTH + `CLOG2(`L3NUM_REQS)))
///////////////////////////////////////////////////////////////////////////////
`define VX_MEM_BYTEEN_WIDTH `L3MEM_BYTEEN_WIDTH
`define VX_MEM_ADDR_WIDTH `L3MEM_ADDR_WIDTH
`define VX_MEM_DATA_WIDTH `L3MEM_DATA_WIDTH
`define VX_MEM_TAG_WIDTH `L3MEM_TAG_WIDTH
`define VX_CORE_TAG_WIDTH `L3CORE_TAG_WIDTH
`define VX_CSR_ID_WIDTH `LOG2UP(`NUM_CLUSTERS * `NUM_CORES)
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
// Merged D-cache/I-cache memory tag
`define XMEM_TAG_WIDTH (`DMEM_TAG_WIDTH + `CLOG2(2))
`include "VX_types.vh"
`endif
`endif // VX_DEFINE_VH

Some files were not shown because too many files have changed in this diff Show more