vortex/tests/opencl/lbm/kernel.cl
2021-06-13 17:42:04 -07:00

424 lines
14 KiB
Common Lisp

/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#ifndef LBM_KERNEL_CL
#define LBM_KERNEL_CL
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
/*############################################################################*/
#ifndef _LAYOUT_CONFIG_H_
#define _LAYOUT_CONFIG_H_
/*############################################################################*/
//Unchangeable settings: volume simulation size for the given example
#define SIZE_X (32)
#define SIZE_Y (32)
#define SIZE_Z (32)
//Changeable settings
//Padding in each dimension
#define PADDING_X (8)
#define PADDING_Y (0)
#define PADDING_Z (4)
//Pitch in each dimension
#define PADDED_X (SIZE_X+PADDING_X)
#define PADDED_Y (SIZE_Y+PADDING_Y)
#define PADDED_Z (SIZE_Z+PADDING_Z)
#define TOTAL_CELLS (SIZE_X*SIZE_Y*SIZE_Z)
#define TOTAL_PADDED_CELLS (PADDED_X*PADDED_Y*PADDED_Z)
//Flattening function
// This macro will be used to map a 3-D index and element to a value
// The macro below implements the equivalent of a 3-D array of
// 20-element structures in C standard layout.
#define CALC_INDEX(x,y,z,e) ( e + N_CELL_ENTRIES*\
((x)+(y)*PADDED_X+(z)*PADDED_X*PADDED_Y) )
#define MARGIN (CALC_INDEX(0, 0, 2, 0) - CALC_INDEX(0,0,0,0))
// Set this value to 1 for GATHER, or 0 for SCATTER
#if 1
#define GATHER
#else
#define SCATTER
#endif
//OpenCL block size (not trivially changeable here)
#define BLOCK_SIZE SIZE_X
/*############################################################################*/
typedef enum {C = 0,
N, S, E, W, T, B,
NE, NW, SE, SW,
NT, NB, ST, SB,
ET, EB, WT, WB,
FLAGS, N_CELL_ENTRIES} CELL_ENTRIES;
#define N_DISTR_FUNCS FLAGS
typedef enum {OBSTACLE = 1 << 0,
ACCEL = 1 << 1,
IN_OUT_FLOW = 1 << 2} CELL_FLAGS;
#endif /* _CONFIG_H_ */
#ifndef _LBM_MARCOS_H
#define _LBM_MACROS_H_
#define OMEGA (1.95f)
#define OUTPUT_PRECISION float
#define BOOL int
#define TRUE (-1)
#define FALSE (0)
#define DFL1 (1.0f/ 3.0f)
#define DFL2 (1.0f/18.0f)
#define DFL3 (1.0f/36.0f)
/*############################################################################*/
typedef float* LBM_Grid;//float LBM_Grid[PADDED_Z*PADDED_Y*PADDED_X*N_CELL_ENTRIES];
typedef LBM_Grid* LBM_GridPtr;
/*############################################################################*/
#define SWEEP_X __temp_x__
#define SWEEP_Y __temp_y__
#define SWEEP_Z __temp_z__
#define SWEEP_VAR int __temp_x__, __temp_y__, __temp_z__;
#define SWEEP_START(x1,y1,z1,x2,y2,z2) \
for( __temp_z__ = z1; \
__temp_z__ < z2; \
__temp_z__++) { \
for( __temp_y__ = 0; \
__temp_y__ < SIZE_Y; \
__temp_y__++) { \
for(__temp_x__ = 0; \
__temp_x__ < SIZE_X; \
__temp_x__++) { \
#define SWEEP_END }}}
#define GRID_ENTRY(g,x,y,z,e) ((g)[CALC_INDEX( x, y, z, e)])
#define GRID_ENTRY_SWEEP(g,dx,dy,dz,e) ((g)[CALC_INDEX((dx)+SWEEP_X, (dy)+SWEEP_Y, (dz)+SWEEP_Z, e)])
#define LOCAL(g,e) (GRID_ENTRY_SWEEP( g, 0, 0, 0, e ))
#define NEIGHBOR_C(g,e) (GRID_ENTRY_SWEEP( g, 0, 0, 0, e ))
#define NEIGHBOR_N(g,e) (GRID_ENTRY_SWEEP( g, 0, +1, 0, e ))
#define NEIGHBOR_S(g,e) (GRID_ENTRY_SWEEP( g, 0, -1, 0, e ))
#define NEIGHBOR_E(g,e) (GRID_ENTRY_SWEEP( g, +1, 0, 0, e ))
#define NEIGHBOR_W(g,e) (GRID_ENTRY_SWEEP( g, -1, 0, 0, e ))
#define NEIGHBOR_T(g,e) (GRID_ENTRY_SWEEP( g, 0, 0, +1, e ))
#define NEIGHBOR_B(g,e) (GRID_ENTRY_SWEEP( g, 0, 0, -1, e ))
#define NEIGHBOR_NE(g,e) (GRID_ENTRY_SWEEP( g, +1, +1, 0, e ))
#define NEIGHBOR_NW(g,e) (GRID_ENTRY_SWEEP( g, -1, +1, 0, e ))
#define NEIGHBOR_SE(g,e) (GRID_ENTRY_SWEEP( g, +1, -1, 0, e ))
#define NEIGHBOR_SW(g,e) (GRID_ENTRY_SWEEP( g, -1, -1, 0, e ))
#define NEIGHBOR_NT(g,e) (GRID_ENTRY_SWEEP( g, 0, +1, +1, e ))
#define NEIGHBOR_NB(g,e) (GRID_ENTRY_SWEEP( g, 0, +1, -1, e ))
#define NEIGHBOR_ST(g,e) (GRID_ENTRY_SWEEP( g, 0, -1, +1, e ))
#define NEIGHBOR_SB(g,e) (GRID_ENTRY_SWEEP( g, 0, -1, -1, e ))
#define NEIGHBOR_ET(g,e) (GRID_ENTRY_SWEEP( g, +1, 0, +1, e ))
#define NEIGHBOR_EB(g,e) (GRID_ENTRY_SWEEP( g, +1, 0, -1, e ))
#define NEIGHBOR_WT(g,e) (GRID_ENTRY_SWEEP( g, -1, 0, +1, e ))
#define NEIGHBOR_WB(g,e) (GRID_ENTRY_SWEEP( g, -1, 0, -1, e ))
#ifdef SCATTER
#define SRC_C(g) (LOCAL( g, C ))
#define SRC_N(g) (LOCAL( g, N ))
#define SRC_S(g) (LOCAL( g, S ))
#define SRC_E(g) (LOCAL( g, E ))
#define SRC_W(g) (LOCAL( g, W ))
#define SRC_T(g) (LOCAL( g, T ))
#define SRC_B(g) (LOCAL( g, B ))
#define SRC_NE(g) (LOCAL( g, NE ))
#define SRC_NW(g) (LOCAL( g, NW ))
#define SRC_SE(g) (LOCAL( g, SE ))
#define SRC_SW(g) (LOCAL( g, SW ))
#define SRC_NT(g) (LOCAL( g, NT ))
#define SRC_NB(g) (LOCAL( g, NB ))
#define SRC_ST(g) (LOCAL( g, ST ))
#define SRC_SB(g) (LOCAL( g, SB ))
#define SRC_ET(g) (LOCAL( g, ET ))
#define SRC_EB(g) (LOCAL( g, EB ))
#define SRC_WT(g) (LOCAL( g, WT ))
#define SRC_WB(g) (LOCAL( g, WB ))
#define DST_C(g) (NEIGHBOR_C ( g, C ))
#define DST_N(g) (NEIGHBOR_N ( g, N ))
#define DST_S(g) (NEIGHBOR_S ( g, S ))
#define DST_E(g) (NEIGHBOR_E ( g, E ))
#define DST_W(g) (NEIGHBOR_W ( g, W ))
#define DST_T(g) (NEIGHBOR_T ( g, T ))
#define DST_B(g) (NEIGHBOR_B ( g, B ))
#define DST_NE(g) (NEIGHBOR_NE( g, NE ))
#define DST_NW(g) (NEIGHBOR_NW( g, NW ))
#define DST_SE(g) (NEIGHBOR_SE( g, SE ))
#define DST_SW(g) (NEIGHBOR_SW( g, SW ))
#define DST_NT(g) (NEIGHBOR_NT( g, NT ))
#define DST_NB(g) (NEIGHBOR_NB( g, NB ))
#define DST_ST(g) (NEIGHBOR_ST( g, ST ))
#define DST_SB(g) (NEIGHBOR_SB( g, SB ))
#define DST_ET(g) (NEIGHBOR_ET( g, ET ))
#define DST_EB(g) (NEIGHBOR_EB( g, EB ))
#define DST_WT(g) (NEIGHBOR_WT( g, WT ))
#define DST_WB(g) (NEIGHBOR_WB( g, WB ))
#else /* GATHER */
#define SRC_C(g) (NEIGHBOR_C ( g, C ))
#define SRC_N(g) (NEIGHBOR_S ( g, N ))
#define SRC_S(g) (NEIGHBOR_N ( g, S ))
#define SRC_E(g) (NEIGHBOR_W ( g, E ))
#define SRC_W(g) (NEIGHBOR_E ( g, W ))
#define SRC_T(g) (NEIGHBOR_B ( g, T ))
#define SRC_B(g) (NEIGHBOR_T ( g, B ))
#define SRC_NE(g) (NEIGHBOR_SW( g, NE ))
#define SRC_NW(g) (NEIGHBOR_SE( g, NW ))
#define SRC_SE(g) (NEIGHBOR_NW( g, SE ))
#define SRC_SW(g) (NEIGHBOR_NE( g, SW ))
#define SRC_NT(g) (NEIGHBOR_SB( g, NT ))
#define SRC_NB(g) (NEIGHBOR_ST( g, NB ))
#define SRC_ST(g) (NEIGHBOR_NB( g, ST ))
#define SRC_SB(g) (NEIGHBOR_NT( g, SB ))
#define SRC_ET(g) (NEIGHBOR_WB( g, ET ))
#define SRC_EB(g) (NEIGHBOR_WT( g, EB ))
#define SRC_WT(g) (NEIGHBOR_EB( g, WT ))
#define SRC_WB(g) (NEIGHBOR_ET( g, WB ))
#define DST_C(g) (LOCAL( g, C ))
#define DST_N(g) (LOCAL( g, N ))
#define DST_S(g) (LOCAL( g, S ))
#define DST_E(g) (LOCAL( g, E ))
#define DST_W(g) (LOCAL( g, W ))
#define DST_T(g) (LOCAL( g, T ))
#define DST_B(g) (LOCAL( g, B ))
#define DST_NE(g) (LOCAL( g, NE ))
#define DST_NW(g) (LOCAL( g, NW ))
#define DST_SE(g) (LOCAL( g, SE ))
#define DST_SW(g) (LOCAL( g, SW ))
#define DST_NT(g) (LOCAL( g, NT ))
#define DST_NB(g) (LOCAL( g, NB ))
#define DST_ST(g) (LOCAL( g, ST ))
#define DST_SB(g) (LOCAL( g, SB ))
#define DST_ET(g) (LOCAL( g, ET ))
#define DST_EB(g) (LOCAL( g, EB ))
#define DST_WT(g) (LOCAL( g, WT ))
#define DST_WB(g) (LOCAL( g, WB ))
#endif /* GATHER */
#define MAGIC_CAST(v) ((unsigned int*) ((void*) (&(v))))
#define FLAG_VAR(v) unsigned int* _aux_ = MAGIC_CAST(v)
#define TEST_FLAG_SWEEP(g,f) ((*MAGIC_CAST(LOCAL(g, FLAGS))) & (f))
#define SET_FLAG_SWEEP(g,f) {FLAG_VAR(LOCAL(g, FLAGS)); (*_aux_) |= (f);}
#define CLEAR_FLAG_SWEEP(g,f) {FLAG_VAR(LOCAL(g, FLAGS)); (*_aux_) &= ~(f);}
#define CLEAR_ALL_FLAGS_SWEEP(g) {FLAG_VAR(LOCAL(g, FLAGS)); (*_aux_) = 0;}
#define TEST_FLAG(g,x,y,z,f) ((*MAGIC_CAST(GRID_ENTRY(g, x, y, z, FLAGS))) & (f))
#define SET_FLAG(g,x,y,z,f) {FLAG_VAR(GRID_ENTRY(g, x, y, z, FLAGS)); (*_aux_) |= (f);}
#define CLEAR_FLAG(g,x,y,z,f) {FLAG_VAR(GRID_ENTRY(g, x, y, z, FLAGS)); (*_aux_) &= ~(f);}
#define CLEAR_ALL_FLAGS(g,x,y,z) {FLAG_VAR(GRID_ENTRY(g, x, y, z, FLAGS)); (*_aux_) = 0;}
/*############################################################################*/
#endif /* _CONFIG_H_ */
/******************************************************************************/
__kernel void performStreamCollide_kernel( __global float* srcGrid, __global float* dstGrid )
{
srcGrid += MARGIN;
dstGrid += MARGIN;
//Using some predefined macros here. Consider this the declaration
// and initialization of the variables SWEEP_X, SWEEP_Y and SWEEP_Z
SWEEP_VAR
SWEEP_X = get_local_id(0);
SWEEP_Y = get_group_id(0);
SWEEP_Z = get_group_id(1);
float temp_swp, tempC, tempN, tempS, tempE, tempW, tempT, tempB;
float tempNE, tempNW, tempSE, tempSW, tempNT, tempNB, tempST ;
float tempSB, tempET, tempEB, tempWT, tempWB ;
//Load all of the input fields
//This is a gather operation of the SCATTER preprocessor variable
// is undefined in layout_config.h, or a "local" read otherwise
tempC = SRC_C(srcGrid);
tempN = SRC_N(srcGrid);
tempS = SRC_S(srcGrid);
tempE = SRC_E(srcGrid);
tempW = SRC_W(srcGrid);
tempT = SRC_T(srcGrid);
tempB = SRC_B(srcGrid);
tempNE = SRC_NE(srcGrid);
tempNW = SRC_NW(srcGrid);
tempSE = SRC_SE(srcGrid);
tempSW = SRC_SW(srcGrid);
tempNT = SRC_NT(srcGrid);
tempNB = SRC_NB(srcGrid);
tempST = SRC_ST(srcGrid);
tempSB = SRC_SB(srcGrid);
tempET = SRC_ET(srcGrid);
tempEB = SRC_EB(srcGrid);
tempWT = SRC_WT(srcGrid);
tempWB = SRC_WB(srcGrid);
//Test whether the cell is fluid or obstacle
if(as_uint(LOCAL(srcGrid,FLAGS)) & (OBSTACLE)) {
//Swizzle the inputs: reflect any fluid coming into this cell
// back to where it came from
temp_swp = tempN ; tempN = tempS ; tempS = temp_swp ;
temp_swp = tempE ; tempE = tempW ; tempW = temp_swp;
temp_swp = tempT ; tempT = tempB ; tempB = temp_swp;
temp_swp = tempNE; tempNE = tempSW ; tempSW = temp_swp;
temp_swp = tempNW; tempNW = tempSE ; tempSE = temp_swp;
temp_swp = tempNT ; tempNT = tempSB ; tempSB = temp_swp;
temp_swp = tempNB ; tempNB = tempST ; tempST = temp_swp;
temp_swp = tempET ; tempET= tempWB ; tempWB = temp_swp;
temp_swp = tempEB ; tempEB = tempWT ; tempWT = temp_swp;
}
else {
//The math meat of LBM: ignore for optimization
float ux, uy, uz, rho, u2;
float temp1, temp2, temp_base;
rho = tempC + tempN
+ tempS + tempE
+ tempW + tempT
+ tempB + tempNE
+ tempNW + tempSE
+ tempSW + tempNT
+ tempNB + tempST
+ tempSB + tempET
+ tempEB + tempWT
+ tempWB;
ux = + tempE - tempW
+ tempNE - tempNW
+ tempSE - tempSW
+ tempET + tempEB
- tempWT - tempWB;
uy = + tempN - tempS
+ tempNE + tempNW
- tempSE - tempSW
+ tempNT + tempNB
- tempST - tempSB;
uz = + tempT - tempB
+ tempNT - tempNB
+ tempST - tempSB
+ tempET - tempEB
+ tempWT - tempWB;
ux /= rho;
uy /= rho;
uz /= rho;
if(as_uint(LOCAL(srcGrid,FLAGS)) & (ACCEL)) {
ux = 0.005f;
uy = 0.002f;
uz = 0.000f;
}
u2 = 1.5f * (ux*ux + uy*uy + uz*uz) - 1.0f;
temp_base = OMEGA*rho;
temp1 = DFL1*temp_base;
//Put the output values for this cell in the shared memory
temp_base = OMEGA*rho;
temp1 = DFL1*temp_base;
temp2 = 1.0f-OMEGA;
tempC = temp2*tempC + temp1*( - u2);
temp1 = DFL2*temp_base;
tempN = temp2*tempN + temp1*( uy*(4.5f*uy + 3.0f) - u2);
tempS = temp2*tempS + temp1*( uy*(4.5f*uy - 3.0f) - u2);
tempT = temp2*tempT + temp1*( uz*(4.5f*uz + 3.0f) - u2);
tempB = temp2*tempB + temp1*( uz*(4.5f*uz - 3.0f) - u2);
tempE = temp2*tempE + temp1*( ux*(4.5f*ux + 3.0f) - u2);
tempW = temp2*tempW + temp1*( ux*(4.5f*ux - 3.0f) - u2);
temp1 = DFL3*temp_base;
tempNT= temp2*tempNT + temp1 *( (+uy+uz)*(4.5f*(+uy+uz) + 3.0f) - u2);
tempNB= temp2*tempNB + temp1 *( (+uy-uz)*(4.5f*(+uy-uz) + 3.0f) - u2);
tempST= temp2*tempST + temp1 *( (-uy+uz)*(4.5f*(-uy+uz) + 3.0f) - u2);
tempSB= temp2*tempSB + temp1 *( (-uy-uz)*(4.5f*(-uy-uz) + 3.0f) - u2);
tempNE = temp2*tempNE + temp1 *( (+ux+uy)*(4.5f*(+ux+uy) + 3.0f) - u2);
tempSE = temp2*tempSE + temp1 *((+ux-uy)*(4.5f*(+ux-uy) + 3.0f) - u2);
tempET = temp2*tempET + temp1 *( (+ux+uz)*(4.5f*(+ux+uz) + 3.0f) - u2);
tempEB = temp2*tempEB + temp1 *( (+ux-uz)*(4.5f*(+ux-uz) + 3.0f) - u2);
tempNW = temp2*tempNW + temp1 *( (-ux+uy)*(4.5f*(-ux+uy) + 3.0f) - u2);
tempSW = temp2*tempSW + temp1 *( (-ux-uy)*(4.5f*(-ux-uy) + 3.0f) - u2);
tempWT = temp2*tempWT + temp1 *( (-ux+uz)*(4.5f*(-ux+uz) + 3.0f) - u2);
tempWB = temp2*tempWB + temp1 *( (-ux-uz)*(4.5f*(-ux-uz) + 3.0f) - u2);
}
//Write the results computed above
//This is a scatter operation of the SCATTER preprocessor variable
// is defined in layout_config.h, or a "local" write otherwise
DST_C ( dstGrid ) = tempC;
DST_N ( dstGrid ) = tempN;
DST_S ( dstGrid ) = tempS;
DST_E ( dstGrid ) = tempE;
DST_W ( dstGrid ) = tempW;
DST_T ( dstGrid ) = tempT;
DST_B ( dstGrid ) = tempB;
DST_NE( dstGrid ) = tempNE;
DST_NW( dstGrid ) = tempNW;
DST_SE( dstGrid ) = tempSE;
DST_SW( dstGrid ) = tempSW;
DST_NT( dstGrid ) = tempNT;
DST_NB( dstGrid ) = tempNB;
DST_ST( dstGrid ) = tempST;
DST_SB( dstGrid ) = tempSB;
DST_ET( dstGrid ) = tempET;
DST_EB( dstGrid ) = tempEB;
DST_WT( dstGrid ) = tempWT;
DST_WB( dstGrid ) = tempWB;
}
#endif // LBM_KERNEL_CL