mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 13:57:17 -04:00
merging error fixed
This commit is contained in:
parent
a9970ae62f
commit
72361b3afe
2 changed files with 11 additions and 11 deletions
|
@ -53,8 +53,6 @@ int main()
|
|||
for (int i = 0; i < w; i=+4) {
|
||||
for (int m = 0; m < d; m++) {
|
||||
vx_vec_sgemm_nn(i, m, n, a1, b1, c1, ldc, vsize);
|
||||
//d1[i+n*ldc] += a1[m+n*ldc]*b1[m*ldc+i];
|
||||
vx_vec_sgemm_nn(i, r, c, a1, b1, c1, ldc, vsize);
|
||||
i = i + vsize;
|
||||
}
|
||||
}
|
||||
|
@ -77,10 +75,10 @@ int main()
|
|||
}
|
||||
|
||||
#if 1
|
||||
printf("\n\nc[%d]:\n", m*n);
|
||||
for(int i = 0; i < m; ++i) {
|
||||
for(int j = 0; j < n; ++j) {
|
||||
printf("%d ", d1[i*m+j]);
|
||||
printf("\n\nc[%d]:\n", w*d);
|
||||
for(int i = 0; i < w; ++i) {
|
||||
for(int j = 0; j < d; ++j) {
|
||||
printf("%d ", d1[i*w+j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
|
|
@ -16,17 +16,19 @@ vx_vec_sgemm_nn:
|
|||
vsetvli t0, a7, e32 # <--- vsize
|
||||
mul x11, a6, a2 # n*ldc
|
||||
add x12, x11, a1 # i + (n*ldc)
|
||||
slli x12, x12, 2
|
||||
add a3, x12, a3 # a[i+ n*ldc]
|
||||
lw x13, (a3)
|
||||
|
||||
mul x14, a1, a6 # m*ldc
|
||||
add x15, a0, x14 # i + m*ldc
|
||||
slli x15, x15, 2
|
||||
add a4, x15, a4 # b[i + m*ldc]
|
||||
vlw.v v0, (a4)
|
||||
vmul.vx v2, v1, x13
|
||||
## lw x6, (a4)
|
||||
# lw x10, (a4) # b
|
||||
# mul x11, x3, x10
|
||||
## lw x10, (a4) # b
|
||||
## mul x11, x3, x10
|
||||
|
||||
mul x6, a2, a6 # n*ldc
|
||||
add x7, a0, x6 # i + n*ldc
|
||||
|
@ -34,7 +36,7 @@ vx_vec_sgemm_nn:
|
|||
vlw.v v3, (a5) # c
|
||||
vadd.vv v3, v3, v2
|
||||
vsw.v v3, (a5)
|
||||
# lw x12, (a5)
|
||||
# add x12, x12, x11
|
||||
# sw x12, (a5)
|
||||
## lw x12, (a5)
|
||||
## add x12, x12, x11
|
||||
## sw x12, (a5)
|
||||
ret
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue