mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 13:57:17 -04:00
update git ignore
This commit is contained in:
parent
81e8c9da24
commit
2e7ab99d28
3 changed files with 157 additions and 68 deletions
7
.gitignore
vendored
7
.gitignore
vendored
|
@ -59,3 +59,10 @@ tests/regression/rop/
|
|||
tests/regression/tex/
|
||||
tests/opencl/matmul/matmul
|
||||
tests/opencl/oclprintf/oclprintf
|
||||
tests/opencl/nw/nw
|
||||
tests/opencl/lud/lud
|
||||
tests/opencl/particlefilter/particlefilter
|
||||
tests/opencl/hybridsort/hybridsort
|
||||
tests/opencl/convolution/convolution
|
||||
tests/opencl/cfd/cfd
|
||||
tests/opencl/lavaMD/lavaMD
|
||||
|
|
84
tests/opencl/backprop/kernel.cc
Normal file
84
tests/opencl/backprop/kernel.cc
Normal file
|
@ -0,0 +1,84 @@
|
|||
#define THREADS 256
|
||||
#define WIDTH 16
|
||||
#define HEIGHT 16
|
||||
#define ETA 0.3f
|
||||
#define MOMENTUM 0.3f
|
||||
|
||||
#ifndef _BACKPROP_CUDA_KERNEL_H_
|
||||
#define _BACKPROP_CUDA_KERNEL_H_
|
||||
#define WM(i, j) weight_matrix[(j) + (i)*WIDTH]
|
||||
|
||||
__kernel void
|
||||
bpnn_layerforward_ocl(__global float* input_cuda,
|
||||
__global float* output_hidden_cuda,
|
||||
__global float* input_hidden_cuda,
|
||||
__global float* hidden_partial_sum,
|
||||
__local float* input_node,
|
||||
__local float* weight_matrix,
|
||||
int in,
|
||||
int hid)
|
||||
{
|
||||
|
||||
int by = get_group_id(1);
|
||||
int tx = get_local_id(0);
|
||||
int ty = get_local_id(1);
|
||||
|
||||
int index = (hid + 1) * HEIGHT * by + (hid + 1) * ty + tx + 1 + (hid + 1);
|
||||
|
||||
int index_in = HEIGHT * by + ty + 1;
|
||||
|
||||
if (tx == 0)
|
||||
input_node[ty] = input_cuda[index_in];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
weight_matrix[ty * WIDTH + tx] = input_hidden_cuda[index];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
weight_matrix[ty * WIDTH + tx] = weight_matrix[ty * WIDTH + tx] * input_node[ty];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for (int i = 1; i <= HEIGHT; i = i * 2) {
|
||||
int power_two = i;
|
||||
|
||||
if (ty % power_two == 0)
|
||||
weight_matrix[ty * WIDTH + tx] = weight_matrix[ty * WIDTH + tx] + weight_matrix[(ty + power_two / 2) * WIDTH + tx];
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
input_hidden_cuda[index] = weight_matrix[ty * WIDTH + tx];
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tx == 0) {
|
||||
hidden_partial_sum[by * hid + ty] = weight_matrix[tx * WIDTH + ty];
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void bpnn_adjust_weights_ocl(__global float* delta,
|
||||
int hid,
|
||||
__global float* ly,
|
||||
int in,
|
||||
__global float* w,
|
||||
__global float* oldw)
|
||||
{
|
||||
|
||||
int by = get_group_id(1);
|
||||
int tx = get_local_id(0);
|
||||
int ty = get_local_id(1);
|
||||
|
||||
int index = (hid + 1) * HEIGHT * by + (hid + 1) * ty + tx + 1 + (hid + 1);
|
||||
int index_y = HEIGHT * by + ty + 1;
|
||||
int index_x = tx + 1;
|
||||
|
||||
w[index] += ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index]));
|
||||
oldw[index] = ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index]));
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (ty == 0 && by == 0) {
|
||||
w[index_x] += ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x]));
|
||||
oldw[index_x] = ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x]));
|
||||
}
|
||||
}
|
||||
#endif
|
|
@ -1,90 +1,88 @@
|
|||
#define THREADS 256
|
||||
#define WIDTH 16
|
||||
#define HEIGHT 16
|
||||
#define ETA 0.3f
|
||||
#define MOMENTUM 0.3f
|
||||
#define WIDTH 16
|
||||
#define HEIGHT 16
|
||||
#define ETA 0.3f
|
||||
#define MOMENTUM 0.3f
|
||||
|
||||
#ifndef _BACKPROP_CUDA_KERNEL_H_
|
||||
#define _BACKPROP_CUDA_KERNEL_H_
|
||||
#define WM(i, j) weight_matrix[(j) + (i) * WIDTH]
|
||||
|
||||
__kernel void
|
||||
__kernel void
|
||||
bpnn_layerforward_ocl(__global float *input_cuda,
|
||||
__global float *output_hidden_cuda,
|
||||
__global float *input_hidden_cuda,
|
||||
__global float *hidden_partial_sum,
|
||||
__local float *input_node,
|
||||
__local float *weight_matrix,
|
||||
int in,
|
||||
int hid)
|
||||
__global float *output_hidden_cuda,
|
||||
__global float *input_hidden_cuda,
|
||||
__global float *hidden_partial_sum,
|
||||
__local float *input_node,
|
||||
__local float *weight_matrix,
|
||||
int in,
|
||||
int hid)
|
||||
{
|
||||
|
||||
int by = get_group_id(1);
|
||||
int tx = get_local_id(0);
|
||||
int ty = get_local_id(1);
|
||||
int by = get_group_id(1);
|
||||
int tx = get_local_id(0);
|
||||
int ty = get_local_id(1);
|
||||
|
||||
int index = ( hid + 1 ) * HEIGHT * by + ( hid + 1 ) * ty + tx + 1 + ( hid + 1 ) ;
|
||||
int index = ( hid + 1 ) * HEIGHT * by + ( hid + 1 ) * ty + tx + 1 + ( hid + 1 ) ;
|
||||
|
||||
int index_in = HEIGHT * by + ty + 1;
|
||||
|
||||
if ( tx == 0 )
|
||||
input_node[ty] = input_cuda[index_in] ;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
int index_in = HEIGHT * by + ty + 1;
|
||||
|
||||
weight_matrix[ty * WIDTH + tx] = input_hidden_cuda[index];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
weight_matrix[ty * WIDTH + tx]= weight_matrix[ty * WIDTH + tx] * input_node[ty];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for ( int i = 1 ; i <= HEIGHT ; i=i*2){
|
||||
//for ( int i = 1 ; i <= 4 ; i++){
|
||||
int power_two = i;
|
||||
//int power_two = 2 << (i - 1);
|
||||
if ( tx == 0 )
|
||||
input_node[ty] = input_cuda[index_in] ;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if( ty % power_two == 0 )
|
||||
weight_matrix[ty * WIDTH + tx]= weight_matrix[ty * WIDTH + tx] + weight_matrix[(ty + power_two/2)* WIDTH + tx];
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
weight_matrix[ty * WIDTH + tx] = input_hidden_cuda[index];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
}
|
||||
|
||||
input_hidden_cuda[index] = weight_matrix[ty * WIDTH + tx];
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
weight_matrix[ty * WIDTH + tx]= weight_matrix[ty * WIDTH + tx] * input_node[ty];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if ( tx == 0 ) {
|
||||
hidden_partial_sum[by * hid + ty] = weight_matrix[tx* WIDTH + ty];
|
||||
}
|
||||
for ( int i = 1 ; i <= HEIGHT ; i=i*2){
|
||||
int power_two = i;
|
||||
|
||||
}
|
||||
if( ty % power_two == 0 )
|
||||
weight_matrix[ty * WIDTH + tx]= weight_matrix[ty * WIDTH + tx] + weight_matrix[(ty + power_two/2)* WIDTH + tx];
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
}
|
||||
|
||||
input_hidden_cuda[index] = weight_matrix[ty * WIDTH + tx];
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if ( tx == 0 ) {
|
||||
hidden_partial_sum[by * hid + ty] = weight_matrix[tx* WIDTH + ty];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
__kernel void bpnn_adjust_weights_ocl( __global float * delta,
|
||||
int hid,
|
||||
__global float * ly,
|
||||
int in,
|
||||
__global float * w,
|
||||
__global float * oldw)
|
||||
{
|
||||
|
||||
int by = get_group_id(1);
|
||||
int tx = get_local_id(0);
|
||||
int ty = get_local_id(1);
|
||||
|
||||
int index = ( hid + 1 ) * HEIGHT * by + ( hid + 1 ) * ty + tx + 1 + ( hid + 1 ) ;
|
||||
int index_y = HEIGHT * by + ty + 1;
|
||||
int index_x = tx + 1;
|
||||
__kernel void bpnn_adjust_weights_ocl( __global float * delta,
|
||||
int hid,
|
||||
__global float * ly,
|
||||
int in,
|
||||
__global float * w,
|
||||
__global float * oldw)
|
||||
{
|
||||
|
||||
w[index] += ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index]));
|
||||
oldw[index] = ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index]));
|
||||
int by = get_group_id(1);
|
||||
int tx = get_local_id(0);
|
||||
int ty = get_local_id(1);
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
int index = ( hid + 1 ) * HEIGHT * by + ( hid + 1 ) * ty + tx + 1 + ( hid + 1 ) ;
|
||||
int index_y = HEIGHT * by + ty + 1;
|
||||
int index_x = tx + 1;
|
||||
|
||||
if (ty == 0 && by ==0){
|
||||
w[index_x] += ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x]));
|
||||
oldw[index_x] = ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x]));
|
||||
}
|
||||
w[index] += ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index]));
|
||||
oldw[index] = ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index]));
|
||||
|
||||
}
|
||||
#endif
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (ty == 0 && by ==0){
|
||||
w[index_x] += ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x]));
|
||||
oldw[index_x] = ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x]));
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue