vortex/tests/opencl/sgemm2/kernel.cl
2024-05-12 20:21:23 -07:00

38 lines
No EOL
1.1 KiB
Common Lisp

#include "common.h"
__kernel void sgemm2(__global TYPE *A,
__global TYPE *B,
__global TYPE *C,
const unsigned int N)
{
int globalRow = get_global_id(1);
int globalCol = get_global_id(0);
int localRow = get_local_id(1);
int localCol = get_local_id(0);
// Static local memory declaration
__local TYPE localA[TILE_SIZE][TILE_SIZE];
__local TYPE localB[TILE_SIZE][TILE_SIZE];
TYPE sum = 0;
// Iterate over blocks
for (int k = 0; k < N; k += TILE_SIZE) {
// Load block of matrix A & B to local memory
localA[localRow][localCol] = A[globalRow * N + (k + localCol)];
localB[localRow][localCol] = B[(k + localRow) * N + globalCol];
// Ensure the entire block is loaded
barrier(CLK_LOCAL_MEM_FENCE);
// Compute multiplication for this block
for (int j = 0; j < TILE_SIZE; j++) {
sum += localA[localRow][j] * localB[j][localCol];
}
// Ensure computation is done before loading next block
barrier(CLK_LOCAL_MEM_FENCE);
}
C[globalRow * N + globalCol] = sum;
}