adding opencl runtime and compiler tools

This commit is contained in:
Blaise Tine 2020-03-28 00:35:54 -04:00
parent 96e960fa69
commit cff762c435
50 changed files with 18689 additions and 35 deletions

View file

@ -1,6 +1,6 @@
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCLCC_PATH ?= $(wildcard ~/dev/pocl/drops_vortex_cc)
POCLRT_PATH ?= $(wildcard ..)
LLVM_LIB_PATH ?= $(wildcard ../compiler/lib)
POCLCC_PATH ?= $(wildcard ../compiler)
POCLRT_PATH ?= $(wildcard ../runtime)
DRIVER_PATH ?= $(wildcard ../../../driver/sw)
CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors
@ -16,7 +16,7 @@ SRCS = main.cc
all: $(PROJECT)
kernel.pocl: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(LLVM_LIB_PATH):$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,193 @@
/* pocl/_kernel_renames.h - Rename OpenCL builtin functions to avoid name
clashes with libm functions which are called in implementation.
Copyright (c) 2011-2013 Erik Schnetter <eschnetter@perimeterinstitute.ca>
Perimeter Institute for Theoretical Physics
Copyright (c) 2011-2017 Pekka Jääskeläinen / TUT
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef _KERNEL_RENAMES_H
#define _KERNEL_RENAMES_H
/* Move built-in declarations and libm functions out of the way.
(There should be a better way of doing so. These functions are
built-in math functions for OpenCL (see Clang's "Builtins.def").
Functions defined in libc or libm may also
interfere with OpenCL's functions, since their prototypes will be
wrong. */
#define abs _cl_abs
#define abs_diff _cl_abs_diff
#define acos _cl_acos
#define acosh _cl_acosh
#define acospi _cl_acospi
#define add_sat _cl_add_sat
#define all _cl_all
#define any _cl_any
#define asin _cl_asin
#define asinh _cl_asinh
#define asinpi _cl_asinpi
#define atan _cl_atan
#define atan2 _cl_atan2
#define atan2pi _cl_atan2pi
#define atanh _cl_atanh
#define atanpi _cl_atanpi
#define bitselect _cl_bitselect
#define cbrt _cl_cbrt
#define ceil _cl_ceil
#define clamp _cl_clamp
#define clz _cl_clz
#define copysign _cl_copysign
#define cos _cl_cos
#define cosh _cl_cosh
#define cospi _cl_cospi
#define cross _cl_cross
#define degrees _cl_degrees
#define distance _cl_distance
#define dot _cl_dot
#define erf _cl_erf
#define erfc _cl_erfc
#define exp _cl_exp
#define exp10 _cl_exp10
#define exp2 _cl_exp2
#define expm1 _cl_expm1
#define fabs _cl_fabs
#define fast_distance _cl_fast_distance
#define fast_length _cl_fast_length
#define fast_normalize _cl_fast_normalize
#define fdim _cl_fdim
#define floor _cl_floor
#define fma _cl_fma
#define fmax _cl_fmax
#define fmin _cl_fmin
#define fmod _cl_fmod
#define fract _cl_fract
#define frexp _cl_frexp
#define hadd _cl_hadd
#define half_cos _cl_half_cos
#define half_divide _cl_half_divide
#define half_exp _cl_half_exp
#define half_exp10 _cl_half_exp10
#define half_exp2 _cl_half_exp2
#define half_log _cl_half_log
#define half_log10 _cl_half_log10
#define half_log2 _cl_half_log2
#define half_powr _cl_half_powr
#define half_recip _cl_half_recip
#define half_rsqrt _cl_half_rsqrt
#define half_sin _cl_half_sin
#define half_sqrt _cl_half_sqrt
#define half_tan _cl_half_tan
#define hypot _cl_hypot
#define ilogb _cl_ilogb
#define isequal _cl_isequal
#define isfinite _cl_isfinite
#define isgreater _cl_isgreater
#define isgreaterequal _cl_isgreaterequal
#define isinf _cl_isinf
#define isless _cl_isless
#define islessequal _cl_islessequal
#define islessgreater _cl_islessgreater
#define isnan _cl_isnan
#define isnormal _cl_isnormal
#define isnotequal _cl_isnotequal
#define isordered _cl_isordered
#define isunordered _cl_isunordered
#define ldexp _cl_ldexp
#define length _cl_length
#define lgamma _cl_lgamma
#define lgamma_r _cl_lgamma_r
#define log _cl_log
#define log10 _cl_log10
#define log1p _cl_log1p
#define log2 _cl_log2
#define logb _cl_logb
#define mad _cl_mad
#define mad24 _cl_mad24
#define mad_hi _cl_mad_hi
#define mad_sat _cl_mad_sat
#define max _cl_max
#define maxmag _cl_maxmag
#define min _cl_min
#define minmag _cl_minmag
#define mix _cl_mix
#define modf _cl_modf
#define mul24 _cl_mul24
#define mul_hi _cl_mul_hi
#define nan _cl_nan
#define native_cos _cl_native_cos
#define native_divide _cl_native_divide
#define native_exp _cl_native_exp
#define native_exp10 _cl_native_exp10
#define native_exp2 _cl_native_exp2
#define native_log _cl_native_log
#define native_log10 _cl_native_log10
#define native_log2 _cl_native_log2
#define native_powr _cl_native_powr
#define native_recip _cl_native_recip
#define native_rsqrt _cl_native_rsqrt
#define native_sin _cl_native_sin
#define native_sqrt _cl_native_sqrt
#define native_tan _cl_native_tan
#define nextafter _cl_nextafter
#define normalize _cl_normalize
#define popcount _cl_popcount
#define pow _cl_pow
#define pown _cl_pown
#define powr _cl_powr
#define radians _cl_radians
#define remainder _cl_remainder
#define remquo _cl_remquo
#define rhadd _cl_rhadd
#define rint _cl_rint
#define rootn _cl_rootn
#define rotate _cl_rotate
#define round _cl_round
#define rsqrt _cl_rsqrt
#define select _cl_select
#define sign _cl_sign
#define signbit _cl_signbit
#define sin _cl_sin
#define sincos _cl_sincos
#define sinh _cl_sinh
#define sinpi _cl_sinpi
#define smoothstep _cl_smoothstep
#define sqrt _cl_sqrt
#define step _cl_step
#define sub_sat _cl_sub_sat
#define tan _cl_tan
#define tanh _cl_tanh
#define tanpi _cl_tanpi
#define tgamma _cl_tgamma
#define trunc _cl_trunc
#define upsample _cl_upsample
#define atom_add atomic_add
#define atom_sub atomic_sub
#define atom_xchg atomic_xchg
#define atom_inc atomic_inc
#define atom_dec atomic_dec
#define atom_cmpxchg atomic_cmpxchg
#define atom_min atomic_min
#define atom_max atomic_max
#define atom_and atomic_and
#define atom_or atomic_or
#define atom_xor atomic_xor
#endif

View file

@ -0,0 +1,91 @@
/* This file includes opencl-c.h from Clang and fixes a few pocl extras.
Copyright (c) 2011-2017 Pekka Jääskeläinen / TUT
Copyright (c) 2017 Michal Babej / Tampere University of Technology
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef _OPENCL_H_
/* Use the declarations shipped with Clang. */
/* Check for _OPENCL_H already here because the kernel compiler loads the
header beforehand, but cannot find the file due to include paths not
set up. */
#include <opencl-c.h>
/* Missing declarations from opencl-c.h. Some of the geometric builtins are
defined only up to 4 vectors, but we implement them all: */
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
half _CL_OVERLOADABLE _CL_READNONE length (half8 p);
half _CL_OVERLOADABLE _CL_READNONE length (half16 p);
half _CL_OVERLOADABLE _CL_READNONE fast_length (half8 p);
half _CL_OVERLOADABLE _CL_READNONE fast_length (half16 p);
half8 _CL_OVERLOADABLE _CL_READNONE normalize (half8 p);
half16 _CL_OVERLOADABLE _CL_READNONE normalize (half16 p);
half8 _CL_OVERLOADABLE _CL_READNONE fast_normalize (half8 p);
half16 _CL_OVERLOADABLE _CL_READNONE fast_normalize (half16 p);
half _CL_OVERLOADABLE _CL_READNONE dot (half8 p0, half8 p1);
half _CL_OVERLOADABLE _CL_READNONE dot (half16 p0, half16 p1);
#endif
float _CL_OVERLOADABLE _CL_READNONE length (float8 p);
float _CL_OVERLOADABLE _CL_READNONE length (float16 p);
float _CL_OVERLOADABLE _CL_READNONE fast_length (float8 p);
float _CL_OVERLOADABLE _CL_READNONE fast_length (float16 p);
float8 _CL_OVERLOADABLE _CL_READNONE normalize (float8 p);
float16 _CL_OVERLOADABLE _CL_READNONE normalize (float16 p);
float8 _CL_OVERLOADABLE _CL_READNONE fast_normalize (float8 p);
float16 _CL_OVERLOADABLE _CL_READNONE fast_normalize (float16 p);
float _CL_OVERLOADABLE _CL_READNONE dot (float8 p0, float8 p1);
float _CL_OVERLOADABLE _CL_READNONE dot (float16 p0, float16 p1);
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
double _CL_OVERLOADABLE _CL_READNONE length (double8 p);
double _CL_OVERLOADABLE _CL_READNONE length (double16 p);
double _CL_OVERLOADABLE _CL_READNONE fast_length (double p);
double _CL_OVERLOADABLE _CL_READNONE fast_length (double2 p);
double _CL_OVERLOADABLE _CL_READNONE fast_length (double3 p);
double _CL_OVERLOADABLE _CL_READNONE fast_length (double4 p);
double _CL_OVERLOADABLE _CL_READNONE fast_length (double8 p);
double _CL_OVERLOADABLE _CL_READNONE fast_length (double16 p);
double8 _CL_OVERLOADABLE _CL_READNONE normalize (double8 p);
double16 _CL_OVERLOADABLE _CL_READNONE normalize (double16 p);
double8 _CL_OVERLOADABLE _CL_READNONE fast_normalize (double8 p);
double16 _CL_OVERLOADABLE _CL_READNONE fast_normalize (double16 p);
double _CL_OVERLOADABLE _CL_READNONE dot (double8 p0, double8 p1);
double _CL_OVERLOADABLE _CL_READNONE dot (double16 p0, double16 p1);
#endif
#endif

View file

@ -0,0 +1,58 @@
/* Enable all extensions known to pocl, which a device supports.
* This is required at the start of include/_kernel.h for prototypes,
* then at kernel lib compilation phase (because _kernel.h disables
* everything at the end).
*/
/* OpenCL 1.0-only extensions */
#if (__OPENCL_C_VERSION__ < 110)
#ifdef cl_khr_global_int32_base_atomics
# pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#endif
#ifdef cl_khr_global_int32_extended_atomics
# pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable
#endif
#ifdef cl_khr_local_int32_base_atomics
# pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
#endif
#ifdef cl_khr_local_int32_extended_atomics
# pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
#endif
#ifdef cl_khr_byte_addressable_store
# pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
#endif
#endif
/* all versions */
#ifdef cl_khr_fp16
# pragma OPENCL EXTENSION cl_khr_fp16: enable
#endif
#ifdef cl_khr_fp64
# pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
#ifdef cl_khr_int64_base_atomics
# pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#endif
#ifdef cl_khr_int64_extended_atomics
# pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable
#endif
#if (__clang_major__ > 4)
#ifdef cl_khr_3d_image_writes
# pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
#endif
#endif

View file

@ -0,0 +1,233 @@
/* pocl/_kernel.h - OpenCL types and runtime library
functions declarations. This should be included only from OpenCL C files.
Copyright (c) 2011 Universidad Rey Juan Carlos
Copyright (c) 2011-2017 Pekka Jääskeläinen / TUT
Copyright (c) 2011-2013 Erik Schnetter <eschnetter@perimeterinstitute.ca>
Perimeter Institute for Theoretical Physics
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* If the -cl-std build option is not specified, the highest OpenCL C 1.x
* language version supported by each device is used as the version of
* OpenCL C when compiling the program for each device.
*/
#ifndef __OPENCL_C_VERSION__
#define __OPENCL_C_VERSION__ 120
#endif
#if (__OPENCL_C_VERSION__ > 99)
#define CL_VERSION_1_0 100
#endif
#if (__OPENCL_C_VERSION__ > 109)
#define CL_VERSION_1_1 110
#endif
#if (__OPENCL_C_VERSION__ > 119)
#define CL_VERSION_1_2 120
#endif
#if (__OPENCL_C_VERSION__ > 199)
#define CL_VERSION_2_0 200
#endif
#include "_enable_all_exts.h"
#include "_builtin_renames.h"
/* Define some feature test macros to help write generic code. These are used
* mostly in _pocl_opencl.h header + some .cl files in kernel library */
#ifdef cl_khr_int64
# define __IF_INT64(x) x
#else
# define __IF_INT64(x)
#endif
#ifdef cl_khr_fp16
# define __IF_FP16(x) x
#else
# define __IF_FP16(x)
#endif
#ifdef cl_khr_fp64
# define __IF_FP64(x) x
#else
# define __IF_FP64(x)
#endif
#ifdef cl_khr_int64_base_atomics
#define __IF_BA64(x) x
#else
#define __IF_BA64(x)
#endif
#ifdef cl_khr_int64_extended_atomics
#define __IF_EA64(x) x
#else
#define __IF_EA64(x)
#endif
/****************************************************************************/
/* Function/type attributes supported by Clang/SPIR */
#if __has_attribute(__always_inline__)
# define _CL_ALWAYSINLINE __attribute__((__always_inline__))
#else
# define _CL_ALWAYSINLINE
#endif
#if __has_attribute(__noinline__)
# define _CL_NOINLINE __attribute__((__noinline__))
#else
# define _CL_NOINLINE
#endif
#if __has_attribute(__overloadable__)
# define _CL_OVERLOADABLE __attribute__((__overloadable__))
#else
# define _CL_OVERLOADABLE
#endif
#if __has_attribute(__pure__)
# define _CL_READONLY __attribute__((__pure__))
#else
# define _CL_READONLY
#endif
#if __has_attribute(__const__)
# define _CL_READNONE __attribute__((__const__))
#else
# define _CL_READNONE
#endif
#if __has_attribute(convergent)
# define _CL_CONVERGENT __attribute__((convergent))
#else
# define _CL_CONVERGENT
#endif
/************************ setup Clang version macros ******************/
#if (__clang_major__ == 6)
# undef LLVM_6_0
# define LLVM_6_0
#elif (__clang_major__ == 7)
# undef LLVM_7_0
# define LLVM_7_0
#elif (__clang_major__ == 8)
# undef LLVM_8_0
# define LLVM_8_0
#elif (__clang_major__ == 9)
# undef LLVM_9_0
# define LLVM_9_0
#elif (__clang_major__ == 10)
# undef LLVM_10_0
# define LLVM_10_0
#else
#error Unsupported Clang/LLVM version.
#endif
#ifndef LLVM_10_0
#define LLVM_OLDER_THAN_10_0 1
#ifndef LLVM_9_0
#define LLVM_OLDER_THAN_9_0 1
#ifndef LLVM_8_0
#define LLVM_OLDER_THAN_8_0 1
#ifndef LLVM_7_0
#define LLVM_OLDER_THAN_7_0 1
#ifndef LLVM_6_0
#define LLVM_OLDER_THAN_6_0 1
#endif
#endif
#endif
#endif
#endif
/****************************************************************************/
/* A static assert statement to catch inconsistencies at build time */
#if __has_extension(__c_static_assert__)
# define _CL_STATIC_ASSERT(_t, _x) _Static_assert(_x, #_t)
#else
# define _CL_STATIC_ASSERT(_t, _x) typedef int __cl_ai##_t[(x) ? 1 : -1];
#endif
/****************************************************************************/
#define IMG_RO_AQ __read_only
#define IMG_WO_AQ __write_only
#if (__OPENCL_C_VERSION__ > 199)
#define CLANG_HAS_RW_IMAGES
#define IMG_RW_AQ __read_write
#else
#undef CLANG_HAS_RW_IMAGES
#define IMG_RW_AQ __RW_IMAGES_UNSUPPORTED_BEFORE_CL_20
#endif
/****************************************************************************/
/* use Clang opencl header for definitions. */
#ifdef POCL_DEVICE_ADDRESS_BITS
/* If we wish to override the Clang set __SIZE_TYPE__ for this target,
let's do it here so the opencl-c.h sets size_t to the wanted type. */
#ifdef __SIZE_TYPE__
#undef __SIZE_TYPE__
#endif
#if POCL_DEVICE_ADDRESS_BITS == 32
#define __SIZE_TYPE__ uint
#elif POCL_DEVICE_ADDRESS_BITS == 64
#define __SIZE_TYPE__ ulong
#else
#error Unsupported POCL_DEVICE_ADDRESS_BITS value.
#endif
#endif
#include "_clang_opencl.h"
/****************************************************************************/
/* GNU's libm seems to use INT_MIN here while the Clang's header uses
INT_MAX. Both are allowed by the OpenCL specs, but we want them to
be unified to avoid failing tests. */
#undef FP_ILOGBNAN
#undef FP_ILOGB0
#define FP_ILOGBNAN INT_MIN
#define FP_ILOGB0 INT_MIN
/****************************************************************************/
#include "pocl_image_types.h"
#pragma OPENCL EXTENSION all : disable

View file

@ -0,0 +1,189 @@
/* pocl/_kernel_c.h - C compatible OpenCL types and runtime library
functions declarations for kernel builtin implementations using C.
Copyright (c) 2011 Universidad Rey Juan Carlos
Copyright (c) 2011-2017 Pekka Jääskeläinen / TUT
Copyright (c) 2011-2013 Erik Schnetter <eschnetter@perimeterinstitute.ca>
Perimeter Institute for Theoretical Physics
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* Header that can be implemented in C compiled implementations of
* built-in functions to introduce the OpenCL C compatible types etc.
*/
#ifndef _KERNEL_C_H
#define _KERNEL_C_H
#include "pocl_types.h"
#include "_kernel_constants.h"
/* Function/type attributes supported by Clang/SPIR */
#if __has_attribute(__always_inline__)
# define _CL_ALWAYSINLINE __attribute__((__always_inline__))
#else
# define _CL_ALWAYSINLINE
#endif
#if __has_attribute(__noinline__)
# define _CL_NOINLINE __attribute__((__noinline__))
#else
# define _CL_NOINLINE
#endif
#if __has_attribute(__overloadable__)
# define _CL_OVERLOADABLE __attribute__((__overloadable__))
#else
# define _CL_OVERLOADABLE
#endif
#if __has_attribute(__pure__)
# define _CL_READONLY __attribute__((__pure__))
#else
# define _CL_READONLY
#endif
#if __has_attribute(__const__)
# define _CL_READNONE __attribute__((__const__))
#else
# define _CL_READNONE
#endif
#if __has_attribute(convergent)
# define _CL_CONVERGENT __attribute__((convergent))
#else
# define _CL_CONVERGENT
#endif
typedef char char2 __attribute__((__ext_vector_type__(2)));
typedef char char3 __attribute__((__ext_vector_type__(3)));
typedef char char4 __attribute__((__ext_vector_type__(4)));
typedef char char8 __attribute__((__ext_vector_type__(8)));
typedef char char16 __attribute__((__ext_vector_type__(16)));
typedef uchar uchar2 __attribute__((__ext_vector_type__(2)));
typedef uchar uchar3 __attribute__((__ext_vector_type__(3)));
typedef uchar uchar4 __attribute__((__ext_vector_type__(4)));
typedef uchar uchar8 __attribute__((__ext_vector_type__(8)));
typedef uchar uchar16 __attribute__((__ext_vector_type__(16)));
typedef short short2 __attribute__((__ext_vector_type__(2)));
typedef short short3 __attribute__((__ext_vector_type__(3)));
typedef short short4 __attribute__((__ext_vector_type__(4)));
typedef short short8 __attribute__((__ext_vector_type__(8)));
typedef short short16 __attribute__((__ext_vector_type__(16)));
typedef ushort ushort2 __attribute__((__ext_vector_type__(2)));
typedef ushort ushort3 __attribute__((__ext_vector_type__(3)));
typedef ushort ushort4 __attribute__((__ext_vector_type__(4)));
typedef ushort ushort8 __attribute__((__ext_vector_type__(8)));
typedef ushort ushort16 __attribute__((__ext_vector_type__(16)));
typedef int int2 __attribute__((__ext_vector_type__(2)));
typedef int int3 __attribute__((__ext_vector_type__(3)));
typedef int int4 __attribute__((__ext_vector_type__(4)));
typedef int int8 __attribute__((__ext_vector_type__(8)));
typedef int int16 __attribute__((__ext_vector_type__(16)));
typedef uint uint2 __attribute__((__ext_vector_type__(2)));
typedef uint uint3 __attribute__((__ext_vector_type__(3)));
typedef uint uint4 __attribute__((__ext_vector_type__(4)));
typedef uint uint8 __attribute__((__ext_vector_type__(8)));
typedef uint uint16 __attribute__((__ext_vector_type__(16)));
#if defined(__CBUILD__) && defined(cl_khr_fp16)
/* NOTE: the Clang's __fp16 does not work robustly in C mode,
it might produce invalid code at least with half vectors.
Using the native 'half' type in OpenCL C mode works better. */
typedef __fp16 half;
#endif
typedef half half2 __attribute__((__ext_vector_type__(2)));
typedef half half3 __attribute__((__ext_vector_type__(3)));
typedef half half4 __attribute__((__ext_vector_type__(4)));
typedef half half8 __attribute__((__ext_vector_type__(8)));
typedef half half16 __attribute__((__ext_vector_type__(16)));
typedef float float2 __attribute__((__ext_vector_type__(2)));
typedef float float3 __attribute__((__ext_vector_type__(3)));
typedef float float4 __attribute__((__ext_vector_type__(4)));
typedef float float8 __attribute__((__ext_vector_type__(8)));
typedef float float16 __attribute__((__ext_vector_type__(16)));
#ifdef cl_khr_fp64
# ifndef __CBUILD__
# pragma OPENCL EXTENSION cl_khr_fp64 : enable
# endif
typedef double double2 __attribute__((__ext_vector_type__(2)));
typedef double double3 __attribute__((__ext_vector_type__(3)));
typedef double double4 __attribute__((__ext_vector_type__(4)));
typedef double double8 __attribute__((__ext_vector_type__(8)));
typedef double double16 __attribute__((__ext_vector_type__(16)));
#endif
#ifdef cl_khr_int64
typedef long long2 __attribute__((__ext_vector_type__(2)));
typedef long long3 __attribute__((__ext_vector_type__(3)));
typedef long long4 __attribute__((__ext_vector_type__(4)));
typedef long long8 __attribute__((__ext_vector_type__(8)));
typedef long long16 __attribute__((__ext_vector_type__(16)));
typedef ulong ulong2 __attribute__((__ext_vector_type__(2)));
typedef ulong ulong3 __attribute__((__ext_vector_type__(3)));
typedef ulong ulong4 __attribute__((__ext_vector_type__(4)));
typedef ulong ulong8 __attribute__((__ext_vector_type__(8)));
typedef ulong ulong16 __attribute__((__ext_vector_type__(16)));
#endif
#if defined(__TCE__)
#define POCL_ADDRESS_SPACE_PRIVATE 0
#define POCL_ADDRESS_SPACE_GLOBAL 1
#define POCL_ADDRESS_SPACE_LOCAL 3
#define POCL_ADDRESS_SPACE_CONSTANT 2
#define POCL_ADDRESS_SPACE_GENERIC 6
#endif
typedef uint cl_mem_fence_flags;
/* Integer Constants */
#if defined(__CBUILD__)
#define CHAR_BIT 8
#define CHAR_MAX SCHAR_MAX
#define CHAR_MIN SCHAR_MIN
#define INT_MAX 2147483647
#define INT_MIN (-2147483647 - 1)
#ifdef cl_khr_int64
#define LONG_MAX 0x7fffffffffffffffL
#define LONG_MIN (-0x7fffffffffffffffL - 1)
#endif
#define SCHAR_MAX 127
#define SCHAR_MIN (-127 - 1)
#define SHRT_MAX 32767
#define SHRT_MIN (-32767 - 1)
#define UCHAR_MAX 255
#define USHRT_MAX 65535
#define UINT_MAX 0xffffffff
#ifdef cl_khr_int64
#define ULONG_MAX 0xffffffffffffffffUL
#endif
#endif /* __CBUILD__ */
#endif

View file

@ -0,0 +1,93 @@
/* pocl/_kernel_constants.h - C compatible OpenCL types and runtime library
constants declarations.
Copyright (c) 2011 Universidad Rey Juan Carlos
Copyright (c) 2011-2013 Pekka Jääskeläinen / TUT
Copyright (c) 2011-2013 Erik Schnetter <eschnetter@perimeterinstitute.ca>
Perimeter Institute for Theoretical Physics
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* Header that can be implemented in C compiled implementations of
* built-in functions to introduce the OpenCL C compatible constants.
*/
#ifndef _KERNEL_CONSTANTS_H
#define _KERNEL_CONSTANTS_H
/* clang's header defines these */
#ifndef _OPENCL_H_
/* cl_channel_order */
#define CLK_R 0x10B0
#define CLK_A 0x10B1
#define CLK_RG 0x10B2
#define CLK_RA 0x10B3
#define CLK_RGB 0x10B4
#define CLK_RGBA 0x10B5
#define CLK_BGRA 0x10B6
#define CLK_ARGB 0x10B7
#define CLK_INTENSITY 0x10B8
#define CLK_LUMINANCE 0x10B9
#define CLK_Rx 0x10BA
#define CLK_RGx 0x10BB
#define CLK_RGBx 0x10BC
#define CLK_DEPTH 0x10BD
#define CLK_DEPTH_STENCIL 0x10BE
/* cl_channel_type */
#define CLK_SNORM_INT8 0x10D0
#define CLK_SNORM_INT16 0x10D1
#define CLK_UNORM_INT8 0x10D2
#define CLK_UNORM_INT16 0x10D3
#define CLK_UNORM_SHORT_565 0x10D4
#define CLK_UNORM_SHORT_555 0x10D5
#define CLK_UNORM_INT_101010 0x10D6
#define CLK_SIGNED_INT8 0x10D7
#define CLK_SIGNED_INT16 0x10D8
#define CLK_SIGNED_INT32 0x10D9
#define CLK_UNSIGNED_INT8 0x10DA
#define CLK_UNSIGNED_INT16 0x10DB
#define CLK_UNSIGNED_INT32 0x10DC
#define CLK_HALF_FLOAT 0x10DD
#define CLK_FLOAT 0x10DE
#define CLK_UNORM_INT24 0x10DF
/* cl_addressing _mode */
#define CLK_ADDRESS_NONE 0x00
#define CLK_ADDRESS_CLAMP_TO_EDGE 0x02
#define CLK_ADDRESS_CLAMP 0x04
#define CLK_ADDRESS_REPEAT 0x06
#define CLK_ADDRESS_MIRRORED_REPEAT 0x08
/* cl_sampler_info */
#define CLK_NORMALIZED_COORDS_FALSE 0x00
#define CLK_NORMALIZED_COORDS_TRUE 0x01
/* filter_mode */
#define CLK_FILTER_NEAREST 0x10
#define CLK_FILTER_LINEAR 0x20
/* barrier() flags */
#define CLK_LOCAL_MEM_FENCE 0x01
#define CLK_GLOBAL_MEM_FENCE 0x02
#endif
#endif

View file

@ -0,0 +1,571 @@
//===----- opencl-c-base.h - OpenCL C language base definitions -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _OPENCL_BASE_H_
#define _OPENCL_BASE_H_
// built-in scalar data types:
/**
* An unsigned 8-bit integer.
*/
typedef unsigned char uchar;
/**
* An unsigned 16-bit integer.
*/
typedef unsigned short ushort;
/**
* An unsigned 32-bit integer.
*/
typedef unsigned int uint;
/**
* An unsigned 64-bit integer.
*/
typedef unsigned long ulong;
/**
* The unsigned integer type of the result of the sizeof operator. This
* is a 32-bit unsigned integer if CL_DEVICE_ADDRESS_BITS
* defined in table 4.3 is 32-bits and is a 64-bit unsigned integer if
* CL_DEVICE_ADDRESS_BITS is 64-bits.
*/
typedef __SIZE_TYPE__ size_t;
/**
* A signed integer type that is the result of subtracting two pointers.
* This is a 32-bit signed integer if CL_DEVICE_ADDRESS_BITS
* defined in table 4.3 is 32-bits and is a 64-bit signed integer if
* CL_DEVICE_ADDRESS_BITS is 64-bits.
*/
typedef __PTRDIFF_TYPE__ ptrdiff_t;
/**
* A signed integer type with the property that any valid pointer to
* void can be converted to this type, then converted back to pointer
* to void, and the result will compare equal to the original pointer.
*/
typedef __INTPTR_TYPE__ intptr_t;
/**
* An unsigned integer type with the property that any valid pointer to
* void can be converted to this type, then converted back to pointer
* to void, and the result will compare equal to the original pointer.
*/
typedef __UINTPTR_TYPE__ uintptr_t;
// built-in vector data types:
typedef char char2 __attribute__((ext_vector_type(2)));
typedef char char3 __attribute__((ext_vector_type(3)));
typedef char char4 __attribute__((ext_vector_type(4)));
typedef char char8 __attribute__((ext_vector_type(8)));
typedef char char16 __attribute__((ext_vector_type(16)));
typedef uchar uchar2 __attribute__((ext_vector_type(2)));
typedef uchar uchar3 __attribute__((ext_vector_type(3)));
typedef uchar uchar4 __attribute__((ext_vector_type(4)));
typedef uchar uchar8 __attribute__((ext_vector_type(8)));
typedef uchar uchar16 __attribute__((ext_vector_type(16)));
typedef short short2 __attribute__((ext_vector_type(2)));
typedef short short3 __attribute__((ext_vector_type(3)));
typedef short short4 __attribute__((ext_vector_type(4)));
typedef short short8 __attribute__((ext_vector_type(8)));
typedef short short16 __attribute__((ext_vector_type(16)));
typedef ushort ushort2 __attribute__((ext_vector_type(2)));
typedef ushort ushort3 __attribute__((ext_vector_type(3)));
typedef ushort ushort4 __attribute__((ext_vector_type(4)));
typedef ushort ushort8 __attribute__((ext_vector_type(8)));
typedef ushort ushort16 __attribute__((ext_vector_type(16)));
typedef int int2 __attribute__((ext_vector_type(2)));
typedef int int3 __attribute__((ext_vector_type(3)));
typedef int int4 __attribute__((ext_vector_type(4)));
typedef int int8 __attribute__((ext_vector_type(8)));
typedef int int16 __attribute__((ext_vector_type(16)));
typedef uint uint2 __attribute__((ext_vector_type(2)));
typedef uint uint3 __attribute__((ext_vector_type(3)));
typedef uint uint4 __attribute__((ext_vector_type(4)));
typedef uint uint8 __attribute__((ext_vector_type(8)));
typedef uint uint16 __attribute__((ext_vector_type(16)));
typedef long long2 __attribute__((ext_vector_type(2)));
typedef long long3 __attribute__((ext_vector_type(3)));
typedef long long4 __attribute__((ext_vector_type(4)));
typedef long long8 __attribute__((ext_vector_type(8)));
typedef long long16 __attribute__((ext_vector_type(16)));
typedef ulong ulong2 __attribute__((ext_vector_type(2)));
typedef ulong ulong3 __attribute__((ext_vector_type(3)));
typedef ulong ulong4 __attribute__((ext_vector_type(4)));
typedef ulong ulong8 __attribute__((ext_vector_type(8)));
typedef ulong ulong16 __attribute__((ext_vector_type(16)));
typedef float float2 __attribute__((ext_vector_type(2)));
typedef float float3 __attribute__((ext_vector_type(3)));
typedef float float4 __attribute__((ext_vector_type(4)));
typedef float float8 __attribute__((ext_vector_type(8)));
typedef float float16 __attribute__((ext_vector_type(16)));
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
typedef half half2 __attribute__((ext_vector_type(2)));
typedef half half3 __attribute__((ext_vector_type(3)));
typedef half half4 __attribute__((ext_vector_type(4)));
typedef half half8 __attribute__((ext_vector_type(8)));
typedef half half16 __attribute__((ext_vector_type(16)));
#endif
#ifdef cl_khr_fp64
#if __OPENCL_C_VERSION__ < CL_VERSION_1_2
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
typedef double double2 __attribute__((ext_vector_type(2)));
typedef double double3 __attribute__((ext_vector_type(3)));
typedef double double4 __attribute__((ext_vector_type(4)));
typedef double double8 __attribute__((ext_vector_type(8)));
typedef double double16 __attribute__((ext_vector_type(16)));
#endif
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
#define NULL ((void*)0)
#endif
/**
* Value of maximum non-infinite single-precision floating-point
* number.
*/
#define MAXFLOAT 0x1.fffffep127f
/**
* A positive float constant expression. HUGE_VALF evaluates
* to +infinity. Used as an error value returned by the built-in
* math functions.
*/
#define HUGE_VALF (__builtin_huge_valf())
/**
* A positive double constant expression. HUGE_VAL evaluates
* to +infinity. Used as an error value returned by the built-in
* math functions.
*/
#define HUGE_VAL (__builtin_huge_val())
/**
* A constant expression of type float representing positive or
* unsigned infinity.
*/
#define INFINITY (__builtin_inff())
/**
* A constant expression of type float representing a quiet NaN.
*/
#define NAN as_float(INT_MAX)
#define FP_ILOGB0 INT_MIN
#define FP_ILOGBNAN INT_MAX
#define FLT_DIG 6
#define FLT_MANT_DIG 24
#define FLT_MAX_10_EXP +38
#define FLT_MAX_EXP +128
#define FLT_MIN_10_EXP -37
#define FLT_MIN_EXP -125
#define FLT_RADIX 2
#define FLT_MAX 0x1.fffffep127f
#define FLT_MIN 0x1.0p-126f
#define FLT_EPSILON 0x1.0p-23f
#define M_E_F 2.71828182845904523536028747135266250f
#define M_LOG2E_F 1.44269504088896340735992468100189214f
#define M_LOG10E_F 0.434294481903251827651128918916605082f
#define M_LN2_F 0.693147180559945309417232121458176568f
#define M_LN10_F 2.30258509299404568401799145468436421f
#define M_PI_F 3.14159265358979323846264338327950288f
#define M_PI_2_F 1.57079632679489661923132169163975144f
#define M_PI_4_F 0.785398163397448309615660845819875721f
#define M_1_PI_F 0.318309886183790671537767526745028724f
#define M_2_PI_F 0.636619772367581343075535053490057448f
#define M_2_SQRTPI_F 1.12837916709551257389615890312154517f
#define M_SQRT2_F 1.41421356237309504880168872420969808f
#define M_SQRT1_2_F 0.707106781186547524400844362104849039f
#define DBL_DIG 15
#define DBL_MANT_DIG 53
#define DBL_MAX_10_EXP +308
#define DBL_MAX_EXP +1024
#define DBL_MIN_10_EXP -307
#define DBL_MIN_EXP -1021
#define DBL_RADIX 2
#define DBL_MAX 0x1.fffffffffffffp1023
#define DBL_MIN 0x1.0p-1022
#define DBL_EPSILON 0x1.0p-52
#define M_E 0x1.5bf0a8b145769p+1
#define M_LOG2E 0x1.71547652b82fep+0
#define M_LOG10E 0x1.bcb7b1526e50ep-2
#define M_LN2 0x1.62e42fefa39efp-1
#define M_LN10 0x1.26bb1bbb55516p+1
#define M_PI 0x1.921fb54442d18p+1
#define M_PI_2 0x1.921fb54442d18p+0
#define M_PI_4 0x1.921fb54442d18p-1
#define M_1_PI 0x1.45f306dc9c883p-2
#define M_2_PI 0x1.45f306dc9c883p-1
#define M_2_SQRTPI 0x1.20dd750429b6dp+0
#define M_SQRT2 0x1.6a09e667f3bcdp+0
#define M_SQRT1_2 0x1.6a09e667f3bcdp-1
#ifdef cl_khr_fp16
#define HALF_DIG 3
#define HALF_MANT_DIG 11
#define HALF_MAX_10_EXP +4
#define HALF_MAX_EXP +16
#define HALF_MIN_10_EXP -4
#define HALF_MIN_EXP -13
#define HALF_RADIX 2
#define HALF_MAX ((0x1.ffcp15h))
#define HALF_MIN ((0x1.0p-14h))
#define HALF_EPSILON ((0x1.0p-10h))
#define M_E_H 2.71828182845904523536028747135266250h
#define M_LOG2E_H 1.44269504088896340735992468100189214h
#define M_LOG10E_H 0.434294481903251827651128918916605082h
#define M_LN2_H 0.693147180559945309417232121458176568h
#define M_LN10_H 2.30258509299404568401799145468436421h
#define M_PI_H 3.14159265358979323846264338327950288h
#define M_PI_2_H 1.57079632679489661923132169163975144h
#define M_PI_4_H 0.785398163397448309615660845819875721h
#define M_1_PI_H 0.318309886183790671537767526745028724h
#define M_2_PI_H 0.636619772367581343075535053490057448h
#define M_2_SQRTPI_H 1.12837916709551257389615890312154517h
#define M_SQRT2_H 1.41421356237309504880168872420969808h
#define M_SQRT1_2_H 0.707106781186547524400844362104849039h
#endif //cl_khr_fp16
#define CHAR_BIT 8
#define SCHAR_MAX 127
#define SCHAR_MIN (-128)
#define UCHAR_MAX 255
#define CHAR_MAX SCHAR_MAX
#define CHAR_MIN SCHAR_MIN
#define USHRT_MAX 65535
#define SHRT_MAX 32767
#define SHRT_MIN (-32768)
#define UINT_MAX 0xffffffff
#define INT_MAX 2147483647
#define INT_MIN (-2147483647-1)
#define ULONG_MAX 0xffffffffffffffffUL
#define LONG_MAX 0x7fffffffffffffffL
#define LONG_MIN (-0x7fffffffffffffffL-1)
// OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions
// Flag type and values for barrier, mem_fence, read_mem_fence, write_mem_fence
typedef uint cl_mem_fence_flags;
/**
* Queue a memory fence to ensure correct
* ordering of memory operations to local memory
*/
#define CLK_LOCAL_MEM_FENCE 0x01
/**
* Queue a memory fence to ensure correct
* ordering of memory operations to global memory
*/
#define CLK_GLOBAL_MEM_FENCE 0x02
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
typedef enum memory_scope {
memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
#endif
} memory_scope;
/**
* Queue a memory fence to ensure correct ordering of memory
* operations between work-items of a work-group to
* image memory.
*/
#define CLK_IMAGE_MEM_FENCE 0x04
#ifndef ATOMIC_VAR_INIT
#define ATOMIC_VAR_INIT(x) (x)
#endif //ATOMIC_VAR_INIT
#define ATOMIC_FLAG_INIT 0
// enum values aligned with what clang uses in EmitAtomicExpr()
typedef enum memory_order
{
memory_order_relaxed = __ATOMIC_RELAXED,
memory_order_acquire = __ATOMIC_ACQUIRE,
memory_order_release = __ATOMIC_RELEASE,
memory_order_acq_rel = __ATOMIC_ACQ_REL,
memory_order_seq_cst = __ATOMIC_SEQ_CST
} memory_order;
#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
// OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions
// These values need to match the runtime equivalent
//
// Addressing Mode.
//
#define CLK_ADDRESS_NONE 0
#define CLK_ADDRESS_CLAMP_TO_EDGE 2
#define CLK_ADDRESS_CLAMP 4
#define CLK_ADDRESS_REPEAT 6
#define CLK_ADDRESS_MIRRORED_REPEAT 8
//
// Coordination Normalization
//
#define CLK_NORMALIZED_COORDS_FALSE 0
#define CLK_NORMALIZED_COORDS_TRUE 1
//
// Filtering Mode.
//
#define CLK_FILTER_NEAREST 0x10
#define CLK_FILTER_LINEAR 0x20
#ifdef cl_khr_gl_msaa_sharing
#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable
#endif //cl_khr_gl_msaa_sharing
//
// Channel Datatype.
//
#define CLK_SNORM_INT8 0x10D0
#define CLK_SNORM_INT16 0x10D1
#define CLK_UNORM_INT8 0x10D2
#define CLK_UNORM_INT16 0x10D3
#define CLK_UNORM_SHORT_565 0x10D4
#define CLK_UNORM_SHORT_555 0x10D5
#define CLK_UNORM_INT_101010 0x10D6
#define CLK_SIGNED_INT8 0x10D7
#define CLK_SIGNED_INT16 0x10D8
#define CLK_SIGNED_INT32 0x10D9
#define CLK_UNSIGNED_INT8 0x10DA
#define CLK_UNSIGNED_INT16 0x10DB
#define CLK_UNSIGNED_INT32 0x10DC
#define CLK_HALF_FLOAT 0x10DD
#define CLK_FLOAT 0x10DE
#define CLK_UNORM_INT24 0x10DF
// Channel order, numbering must be aligned with cl_channel_order in cl.h
//
#define CLK_R 0x10B0
#define CLK_A 0x10B1
#define CLK_RG 0x10B2
#define CLK_RA 0x10B3
#define CLK_RGB 0x10B4
#define CLK_RGBA 0x10B5
#define CLK_BGRA 0x10B6
#define CLK_ARGB 0x10B7
#define CLK_INTENSITY 0x10B8
#define CLK_LUMINANCE 0x10B9
#define CLK_Rx 0x10BA
#define CLK_RGx 0x10BB
#define CLK_RGBx 0x10BC
#define CLK_DEPTH 0x10BD
#define CLK_DEPTH_STENCIL 0x10BE
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
#define CLK_sRGB 0x10BF
#define CLK_sRGBx 0x10C0
#define CLK_sRGBA 0x10C1
#define CLK_sBGRA 0x10C2
#define CLK_ABGR 0x10C3
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
// OpenCL v2.0 s6.13.16 - Pipe Functions
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t))
// OpenCL v2.0 s6.13.17 - Enqueue Kernels
#define CL_COMPLETE 0x0
#define CL_RUNNING 0x1
#define CL_SUBMITTED 0x2
#define CL_QUEUED 0x3
#define CLK_SUCCESS 0
#define CLK_ENQUEUE_FAILURE -101
#define CLK_INVALID_QUEUE -102
#define CLK_INVALID_NDRANGE -160
#define CLK_INVALID_EVENT_WAIT_LIST -57
#define CLK_DEVICE_QUEUE_FULL -161
#define CLK_INVALID_ARG_SIZE -51
#define CLK_EVENT_ALLOCATION_FAILURE -100
#define CLK_OUT_OF_RESOURCES -5
#define CLK_NULL_QUEUE 0
#define CLK_NULL_EVENT (__builtin_astype(((__SIZE_MAX__)), clk_event_t))
// execution model related definitions
#define CLK_ENQUEUE_FLAGS_NO_WAIT 0x0
#define CLK_ENQUEUE_FLAGS_WAIT_KERNEL 0x1
#define CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP 0x2
typedef int kernel_enqueue_flags_t;
typedef int clk_profiling_info;
// Profiling info name (see capture_event_profiling_info)
#define CLK_PROFILING_COMMAND_EXEC_TIME 0x1
#define MAX_WORK_DIM 3
typedef struct {
unsigned int workDimension;
size_t globalWorkOffset[MAX_WORK_DIM];
size_t globalWorkSize[MAX_WORK_DIM];
size_t localWorkSize[MAX_WORK_DIM];
} ndrange_t;
#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
#ifdef cl_intel_device_side_avc_motion_estimation
#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin
#define CLK_AVC_ME_MAJOR_16x16_INTEL 0x0
#define CLK_AVC_ME_MAJOR_16x8_INTEL 0x1
#define CLK_AVC_ME_MAJOR_8x16_INTEL 0x2
#define CLK_AVC_ME_MAJOR_8x8_INTEL 0x3
#define CLK_AVC_ME_MINOR_8x8_INTEL 0x0
#define CLK_AVC_ME_MINOR_8x4_INTEL 0x1
#define CLK_AVC_ME_MINOR_4x8_INTEL 0x2
#define CLK_AVC_ME_MINOR_4x4_INTEL 0x3
#define CLK_AVC_ME_MAJOR_FORWARD_INTEL 0x0
#define CLK_AVC_ME_MAJOR_BACKWARD_INTEL 0x1
#define CLK_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2
#define CLK_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0
#define CLK_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E
#define CLK_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D
#define CLK_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B
#define CLK_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77
#define CLK_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F
#define CLK_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F
#define CLK_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F
#define CLK_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0
#define CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1
#define CLK_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2
#define CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0
#define CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1
#define CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2
#define CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3
#define CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4
#define CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5
#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6
#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7
#define CLK_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8
#define CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
#define CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2
#define CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
#define CLK_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
#define CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3
#define CLK_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0
#define CLK_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1
#define CLK_AVC_ME_COST_PRECISION_PEL_INTEL 0x2
#define CLK_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3
#define CLK_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10
#define CLK_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15
#define CLK_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20
#define CLK_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B
#define CLK_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30
#define CLK_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0
#define CLK_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2
#define CLK_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4
#define CLK_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8
#define CLK_AVC_ME_INTRA_16x16_INTEL 0x0
#define CLK_AVC_ME_INTRA_8x8_INTEL 0x1
#define CLK_AVC_ME_INTRA_4x4_INTEL 0x2
#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0
#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000
#define CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL (0x1 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL (0x2 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL (0x3 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL (0x55 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL (0xAA << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL (0xFF << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL (0x1 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL (0x2 << 24)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL (0x1 << 26)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL (0x2 << 26)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL (0x1 << 28)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL (0x2 << 28)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL (0x1 << 30)
#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL (0x2 << 30)
#define CLK_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00
#define CLK_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_ALL_INTEL 0x0
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5
#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3
#define CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
#define CLK_AVC_ME_FRAME_FORWARD_INTEL 0x1
#define CLK_AVC_ME_FRAME_BACKWARD_INTEL 0x2
#define CLK_AVC_ME_FRAME_DUAL_INTEL 0x3
#define CLK_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0
#define CLK_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1
#define CLK_AVC_ME_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_PAYLOAD_INITIALIZE_INTEL 0x0
#define CLK_AVC_REF_PAYLOAD_INITIALIZE_INTEL 0x0
#define CLK_AVC_SIC_PAYLOAD_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_INITIALIZE_INTEL 0x0
#define CLK_AVC_REF_RESULT_INITIALIZE_INTEL 0x0
#define CLK_AVC_SIC_RESULT_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0
#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0
#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : end
#endif // cl_intel_device_side_avc_motion_estimation
#endif //_OPENCL_BASE_H_

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,395 @@
/* pocl.h - global pocl declarations for the host side runtime.
Copyright (c) 2011 Universidad Rey Juan Carlos
2011-2019 Pekka Jääskeläinen
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* @file pocl.h
*
* The declarations in this file are such that are used both in the
* libpocl implementation CL and the kernel compiler. Others should be
* moved to pocl_cl.h of lib/CL or under the kernel compiler dir.
* @todo Check if there are extra declarations here that could be moved.
*/
#ifndef POCL_H
#define POCL_H
#ifndef CL_TARGET_OPENCL_VERSION
#define CL_TARGET_OPENCL_VERSION 220
#endif
#include <CL/opencl.h>
#include "config.h"
#include "pocl_context.h"
/* detects restrict, variadic macros etc */
#include "pocl_compiler_features.h"
#define POCL_FILENAME_LENGTH 1024
#define WORKGROUP_STRING_LENGTH 1024
typedef struct _mem_mapping mem_mapping_t;
/* represents a single buffer to host memory mapping */
struct _mem_mapping {
void *host_ptr; /* the location of the mapped buffer chunk in the host memory */
size_t offset; /* offset to the beginning of the buffer */
size_t size;
mem_mapping_t *prev, *next;
/* This is required, because two clEnqueueMap() with the same buffer+size+offset,
will create two identical mappings in the buffer->mappings LL.
Without this flag, both corresponding clEnqUnmap()s will find
the same mapping (the first one in mappings LL), which will lead
to memory double-free corruption later. */
long unmap_requested;
cl_map_flags map_flags;
/* image mapping data */
size_t origin[3];
size_t region[3];
size_t row_pitch;
size_t slice_pitch;
};
/* memory identifier: id to point the global memory where memory resides
+ pointer to actual data */
typedef struct _pocl_mem_identifier
{
int available; /* ... in this mem objs context */
int global_mem_id;
void *mem_ptr;
void *image_data;
} pocl_mem_identifier;
typedef struct _mem_destructor_callback mem_destructor_callback_t;
/* represents a memory object destructor callback */
struct _mem_destructor_callback
{
void (CL_CALLBACK * pfn_notify) (cl_mem, void*); /* callback function */
void *user_data; /* user supplied data passed to callback function */
mem_destructor_callback_t *next;
};
typedef struct _build_program_callback build_program_callback_t;
struct _build_program_callback
{
void (CL_CALLBACK * callback_function) (cl_program, void*); /* callback function */
void *user_data; /* user supplied data passed to callback function */
};
// Command Queue datatypes
#define POCL_KERNEL_DIGEST_SIZE 16
typedef uint8_t pocl_kernel_hash_t[POCL_KERNEL_DIGEST_SIZE];
// clEnqueueNDRangeKernel
typedef struct
{
void *hash;
void *wg; /* The work group function ptr. Device specific. */
cl_kernel kernel;
/* The launch data that can be passed to the kernel execution environment. */
struct pocl_context pc;
struct pocl_argument *arguments;
/* Can be used to store/cache arbitrary device-specific data. */
void *device_data;
/* If set to 1, disallow any work-group function specialization. */
int force_generic_wg_func;
/* If set to 1, disallow "small grid" WG function specialization. */
int force_large_grid_wg_func;
unsigned device_i;
} _cl_command_run;
// clEnqueueNativeKernel
typedef struct
{
void *args;
size_t cb_args;
void (*user_func)(void *);
} _cl_command_native;
// clEnqueueReadBuffer
typedef struct
{
void *__restrict__ dst_host_ptr;
pocl_mem_identifier *src_mem_id;
size_t offset;
size_t size;
} _cl_command_read;
// clEnqueueWriteBuffer
typedef struct
{
const void *__restrict__ src_host_ptr;
pocl_mem_identifier *dst_mem_id;
size_t offset;
size_t size;
} _cl_command_write;
// clEnqueueCopyBuffer
typedef struct
{
pocl_mem_identifier *src_mem_id;
pocl_mem_identifier *dst_mem_id;
size_t src_offset;
size_t dst_offset;
size_t size;
} _cl_command_copy;
// clEnqueueReadBufferRect
typedef struct
{
void *__restrict__ dst_host_ptr;
pocl_mem_identifier *src_mem_id;
size_t buffer_origin[3];
size_t host_origin[3];
size_t region[3];
size_t buffer_row_pitch;
size_t buffer_slice_pitch;
size_t host_row_pitch;
size_t host_slice_pitch;
} _cl_command_read_rect;
// clEnqueueWriteBufferRect
typedef struct
{
const void *__restrict__ src_host_ptr;
pocl_mem_identifier *dst_mem_id;
size_t buffer_origin[3];
size_t host_origin[3];
size_t region[3];
size_t buffer_row_pitch;
size_t buffer_slice_pitch;
size_t host_row_pitch;
size_t host_slice_pitch;
} _cl_command_write_rect;
// clEnqueueCopyBufferRect
typedef struct
{
pocl_mem_identifier *src_mem_id;
pocl_mem_identifier *dst_mem_id;
size_t dst_origin[3];
size_t src_origin[3];
size_t region[3];
size_t src_row_pitch;
size_t src_slice_pitch;
size_t dst_row_pitch;
size_t dst_slice_pitch;
} _cl_command_copy_rect;
// clEnqueueMapBuffer
typedef struct
{
pocl_mem_identifier *mem_id;
mem_mapping_t *mapping;
} _cl_command_map;
/* clEnqueueUnMapMemObject */
typedef struct
{
pocl_mem_identifier *mem_id;
mem_mapping_t *mapping;
} _cl_command_unmap;
/* clEnqueueFillBuffer */
typedef struct
{
pocl_mem_identifier *dst_mem_id;
size_t size;
size_t offset;
void *__restrict__ pattern;
size_t pattern_size;
} _cl_command_fill_mem;
/* clEnqueue(Write/Read)Image */
typedef struct
{
pocl_mem_identifier *src_mem_id;
void *__restrict__ dst_host_ptr;
pocl_mem_identifier *dst_mem_id;
size_t dst_offset;
size_t origin[3];
size_t region[3];
size_t dst_row_pitch;
size_t dst_slice_pitch;
} _cl_command_read_image;
typedef struct
{
pocl_mem_identifier *dst_mem_id;
const void *__restrict__ src_host_ptr;
pocl_mem_identifier *src_mem_id;
size_t src_offset;
size_t origin[3];
size_t region[3];
size_t src_row_pitch;
size_t src_slice_pitch;
} _cl_command_write_image;
typedef struct
{
pocl_mem_identifier *src_mem_id;
pocl_mem_identifier *dst_mem_id;
size_t dst_origin[3];
size_t src_origin[3];
size_t region[3];
} _cl_command_copy_image;
/* clEnqueueFillImage */
typedef struct
{
pocl_mem_identifier *mem_id;
size_t origin[3];
size_t region[3];
void *__restrict__ fill_pixel;
size_t pixel_size;
} _cl_command_fill_image;
/* clEnqueueMarkerWithWaitlist */
typedef struct
{
void *data;
int has_wait_list;
} _cl_command_marker;
/* clEnqueueBarrierWithWaitlist */
typedef _cl_command_marker _cl_command_barrier;
/* clEnqueueMigrateMemObjects */
typedef struct
{
void *data;
size_t num_mem_objects;
cl_mem *mem_objects;
cl_device_id *source_devices;
} _cl_command_migrate;
typedef struct
{
void* data;
void* queue;
unsigned num_svm_pointers;
void **svm_pointers;
void (CL_CALLBACK *pfn_free_func) ( cl_command_queue queue,
cl_uint num_svm_pointers,
void *svm_pointers[],
void *user_data);
} _cl_command_svm_free;
typedef struct
{
void* svm_ptr;
size_t size;
cl_map_flags flags;
} _cl_command_svm_map;
typedef struct
{
void* svm_ptr;
} _cl_command_svm_unmap;
typedef struct
{
const void *__restrict__ src;
void *__restrict__ dst;
size_t size;
} _cl_command_svm_cpy;
typedef struct
{
void *__restrict__ svm_ptr;
size_t size;
void *__restrict__ pattern;
size_t pattern_size;
} _cl_command_svm_fill;
typedef union
{
_cl_command_run run;
_cl_command_native native;
_cl_command_read read;
_cl_command_write write;
_cl_command_copy copy;
_cl_command_read_rect read_rect;
_cl_command_write_rect write_rect;
_cl_command_copy_rect copy_rect;
_cl_command_fill_mem memfill;
_cl_command_read_image read_image;
_cl_command_write_image write_image;
_cl_command_copy_image copy_image;
_cl_command_fill_image fill_image;
_cl_command_map map;
_cl_command_unmap unmap;
_cl_command_marker marker;
_cl_command_barrier barrier;
_cl_command_migrate migrate;
_cl_command_svm_free svm_free;
_cl_command_svm_map svm_map;
_cl_command_svm_unmap svm_unmap;
_cl_command_svm_cpy svm_memcpy;
_cl_command_svm_fill svm_fill;
} _cl_command_t;
// one item in the command queue
typedef struct _cl_command_node _cl_command_node;
struct _cl_command_node
{
_cl_command_t command;
cl_command_type type;
_cl_command_node *next; // for linked-list storage
_cl_command_node *prev;
cl_event event;
const cl_event *event_wait_list;
cl_device_id device;
/* The index of the targeted device in the platform's device list. */
unsigned device_i;
cl_int ready;
};
#ifndef LLVM_10_0
#define LLVM_OLDER_THAN_10_0 1
#ifndef LLVM_9_0
#define LLVM_OLDER_THAN_9_0 1
#ifndef LLVM_8_0
#define LLVM_OLDER_THAN_8_0 1
#ifndef LLVM_7_0
#define LLVM_OLDER_THAN_7_0 1
#ifndef LLVM_6_0
#define LLVM_OLDER_THAN_6_0 1
#endif
#endif
#endif
#endif
#endif
#endif /* POCL_H */

View file

@ -0,0 +1,80 @@
/* pocl_device.h - global pocl declarations to be used in the device binaries in
case applicable by the target
Copyright (c) 2012-2018 Pekka Jääskeläinen / Tampere University of Technology
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef POCL_DEVICE_H
#define POCL_DEVICE_H
#include "pocl_types.h"
#define MAX_KERNEL_ARGS 64
#define MAX_KERNEL_NAME_LENGTH 64
/* Metadata of a single kernel stored in the device.*/
typedef struct {
const uchar name[MAX_KERNEL_NAME_LENGTH];
ushort num_args;
ushort num_locals;
void *work_group_func;
} __kernel_metadata;
#ifdef _MSC_VER
#define ALIGN4(x) __declspec(align(4)) x
#define ALIGN8(x) __declspec(align(4)) x
#else
#define ALIGN4(x) x __attribute__ ((aligned (4)))
#define ALIGN8(x) x __attribute__ ((aligned (8)))
#endif
/* A kernel invocation command. */
typedef struct {
/* The execution status of this queue slot. */
ALIGN8(uint status);
/* The kernel to execute. Points to the metadata in the device global
memory. It will be casted to a __kernel_metadata* */
ALIGN8(uint kernel);
/* Pointers to the kernel arguments in the global memory. Will be
casted to 32 bit void* */
ALIGN8(uint args[MAX_KERNEL_ARGS]);
/* Sizes of the dynamically allocated local buffers. */
/* uint32_t dynamic_local_arg_sizes[MAX_KERNEL_ARGS] ALIGN4; */
/* Number of dimensions in the work space. */
ALIGN4(uint work_dim);
ALIGN4(uint num_groups[3]);
ALIGN4(uint global_offset[3]);
} __kernel_exec_cmd;
/* Kernel execution statuses. */
/* The invocation entry is free to use. */
#define POCL_KST_FREE 1
/* The kernel structure has been populated and is waiting to be
executed. */
#define POCL_KST_READY 2
/* The kernel is currently running in the device. */
#define POCL_KST_RUNNING 3
/* The kernel has finished execution. The results can be collected and the
execution entry be freed (by writing POCL_KST_FREE to the status). */
#define POCL_KST_FINISHED 4
#endif

View file

@ -0,0 +1,52 @@
/* pocl_image_types.h - image data structure used by device implementations
Copyright (c) 2013 Ville Korhonen
Copyright (c) 2017 Michal Babej / Tampere University of Technology
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef __X86_IMAGE_H__
#define __X86_IMAGE_H__
#ifdef __CBUILD__
#define INTTYPE cl_int
#else
#define INTTYPE int
#endif
typedef uintptr_t dev_sampler_t;
typedef struct dev_image_t {
void *_data;
INTTYPE _width;
INTTYPE _height;
INTTYPE _depth;
INTTYPE _image_array_size;
INTTYPE _row_pitch;
INTTYPE _slice_pitch;
INTTYPE _num_mip_levels; /* maybe not needed */
INTTYPE _num_samples; /* maybe not needed */
INTTYPE _order;
INTTYPE _data_type;
INTTYPE _num_channels;
INTTYPE _elem_size;
} dev_image_t;
#endif

View file

@ -0,0 +1,33 @@
/* pocl-spir.h - global pocl declarations for the SPIR support.
Copyright (c) 2018-2019 Pekka Jääskeläinen
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef POCL_SPIR_H
#define POCL_SPIR_H
#define SPIR_ADDRESS_SPACE_PRIVATE 0
#define SPIR_ADDRESS_SPACE_GLOBAL 1
#define SPIR_ADDRESS_SPACE_CONSTANT 2
#define SPIR_ADDRESS_SPACE_LOCAL 3
#define SPIR_ADDRESS_SPACE_GENERIC 4
#endif

View file

@ -0,0 +1,171 @@
/* pocl_types.h - The basic OpenCL C device side scalar data types.
Copyright (c) 2018 Pekka Jääskeläinen / Tampere University of Technology
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* This header is designed to be included both from the device and the host.
In case compiling OpenCL C sources, __OPENCL_VERSION__ should be set.
In case compiling in the host, all but the device-specific types are
defined (size_t and others). Devices should avoid including the C
stdint.h instead of this one as OpenCL C size_t et al. is allowed to
be of different width than when targeting C.
TODO: replace this header (partially) with Clang's opencl-c.h
*/
#ifndef POCL_DEVICE_TYPES_H
#define POCL_DEVICE_TYPES_H
#ifdef __OPENCL_VERSION__
#ifdef __USE_CLANG_OPENCL_C_H
/* Minimal definitions, only the target specific macro overrides,
just in case Clang export the C ones which might differ for
OpenCL C. */
#ifdef __INTPTR_TYPE__
#undef __INTPTR_TYPE__
#endif
#ifdef __UINTPTR_TYPE__
#undef __UINTPTR_TYPE__
#endif
#ifdef __SIZE_TYPE__
#undef __SIZE_TYPE__
#endif
#ifdef __SIZE_MAX__
#undef __SIZE_MAX__
#endif
#if defined(POCL_DEVICE_ADDRESS_BITS) && POCL_DEVICE_ADDRESS_BITS == 32
#define __SIZE_TYPE__ uint
#define __SIZE_MAX__ UINT_MAX
#else
#define __SIZE_TYPE__ ulong
#define __SIZE_MAX__ ULONG_MAX
#endif
#define __INTPTR_TYPE__ __SIZE_TYPE__
#define __UINTPTR_TYPE__ __INTPTR_TYPE__
#else
/* Compiling Device-specific OpenCL C or builtin library C. */
#if defined cl_khr_fp64 && !defined cl_khr_int64
#error "cl_khr_fp64 requires cl_khr_int64"
#endif
/* TODO FIXME We should not use these in OpenCL library's C code at all.
* The problem is that 1) these are predefined by glibc, 2) while we can
* re-define "ulong", we cannot control the size of "long" at all.
* which can lead to "ulong" being 64bit and "long" 32bit, resulting in
* mysterious errors and bugs. Therefore OpenCL library's C code should
* use the fixed size C types where integer size matters. */
#ifdef __CBUILD__
/* Builtin library C code definitions. */
#define size_t csize_t
#define uintptr_t cuintptr_t
#include <stdint.h>
#undef size_t
#undef uintptr_t
typedef uint8_t uchar;
typedef uint16_t ushort;
typedef uint32_t uint;
#ifdef cl_khr_int64
typedef uint64_t ulong;
#else
typedef uint32_t ulong;
#endif
#ifndef cl_khr_fp16
typedef short half;
#endif
#endif
/* The definitions below intentionally lead to errors if these types
are used when they are not available in the language. This prevents
accidentally using them if the compiler does not disable these
types, but only e.g. defines them with an incorrect size.*/
#ifndef cl_khr_fp64
typedef struct error_undefined_type_double error_undefined_type_double;
#define double error_undefined_type_double
#endif
#ifdef __SIZE_TYPE__
#undef __SIZE_TYPE__
#endif
#ifdef __SIZE_MAX__
#undef __SIZE_MAX__
#endif
#if defined(POCL_DEVICE_ADDRESS_BITS) && POCL_DEVICE_ADDRESS_BITS == 32
#define __SIZE_TYPE__ uint
#define __SIZE_MAX__ UINT_MAX
#else
#define __SIZE_TYPE__ ulong
#define __SIZE_MAX__ ULONG_MAX
#endif
typedef __SIZE_TYPE__ size_t;
typedef __PTRDIFF_TYPE__ ptrdiff_t;
typedef ptrdiff_t intptr_t;
typedef size_t uintptr_t;
#endif /* #ifdef __USE_CLANG_OPENCL_C_H */
#else /* #ifdef __OPENCL_VERSION__ */
/* Including from a host source (runtime API implementation). Introduce
the fixed width datatypes, but do not override C's size_t and other
target specific datatypes. */
typedef unsigned char uchar;
/* FIXME see the above TODO about these types. */
#if !(defined(_SYS_TYPES_H) && defined(__USE_MISC))
/* glibc, when including sys/types.h, typedefs these. */
typedef unsigned long int ulong;
typedef unsigned short int ushort;
typedef unsigned int uint;
#endif
#include <stdint.h>
#endif
#endif

View file

@ -1,6 +1,6 @@
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCLCC_PATH ?= $(wildcard ~/dev/pocl/drops_vortex_cc)
POCLRT_PATH ?= $(wildcard ..)
LLVM_LIB_PATH ?= $(wildcard ../compiler/lib)
POCLCC_PATH ?= $(wildcard ../compiler)
POCLRT_PATH ?= $(wildcard ../runtime)
DRIVER_PATH ?= $(wildcard ../../../driver/sw)
CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors
@ -16,7 +16,7 @@ SRCS = main.cc clutils.cpp utils.cpp
all: $(PROJECT)
kernel.pocl: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(LLVM_LIB_PATH):$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@

View file

@ -1,6 +1,6 @@
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCLCC_PATH ?= $(wildcard ~/dev/pocl/drops_vortex_cc)
POCLRT_PATH ?= $(wildcard ..)
LLVM_LIB_PATH ?= $(wildcard ../compiler/lib)
POCLCC_PATH ?= $(wildcard ../compiler)
POCLRT_PATH ?= $(wildcard ../runtime)
DRIVER_PATH ?= $(wildcard ../../../driver/sw)
CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors
@ -16,7 +16,7 @@ SRCS = main.cc clutils.cpp utils.cpp
all: $(PROJECT)
kernel.pocl: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(LLVM_LIB_PATH):$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@

View file

@ -1,7 +0,0 @@
# Dynamic Instructions: -1
# of total cycles: 2519
# of forwarding stalls: 0
# of branch stalls: 0
# CPI: -2519
# time to simulate: 4.94066e-323 milliseconds
# GRADE: Failed on test: 0

View file

@ -1,6 +1,6 @@
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCLCC_PATH ?= $(wildcard ~/dev/pocl/drops_vortex_cc)
POCLRT_PATH ?= $(wildcard ..)
LLVM_LIB_PATH ?= $(wildcard ../compiler/lib)
POCLCC_PATH ?= $(wildcard ../compiler)
POCLRT_PATH ?= $(wildcard ../runtime)
DRIVER_PATH ?= $(wildcard ../../../driver/sw)
CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors
@ -16,7 +16,7 @@ SRCS = main.cc
all: $(PROJECT)
kernel.pocl: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(LLVM_LIB_PATH):$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@

View file

@ -1,6 +1,6 @@
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCLCC_PATH ?= $(wildcard ~/dev/pocl/drops_vortex_cc)
POCLRT_PATH ?= $(wildcard ..)
LLVM_LIB_PATH ?= $(wildcard ../compiler/lib)
POCLCC_PATH ?= $(wildcard ../compiler)
POCLRT_PATH ?= $(wildcard ../runtime)
DRIVER_PATH ?= $(wildcard ../../../driver/sw)
CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors
@ -16,7 +16,7 @@ SRCS = main.cc
all: $(PROJECT)
kernel.pocl: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(LLVM_LIB_PATH):$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@

View file

@ -1,6 +1,6 @@
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCLCC_PATH ?= $(wildcard ~/dev/pocl/drops_vortex_cc)
POCLRT_PATH ?= $(wildcard ..)
LLVM_LIB_PATH ?= $(wildcard ../compiler/lib)
POCLCC_PATH ?= $(wildcard ../compiler)
POCLRT_PATH ?= $(wildcard ../runtime)
DRIVER_PATH ?= $(wildcard ../../../driver/sw)
CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors
@ -16,7 +16,7 @@ SRCS = main.cc
all: $(PROJECT)
kernel.pocl: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(LLVM_LIB_PATH):$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@

Binary file not shown.

View file

@ -1,6 +1,6 @@
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCLCC_PATH ?= $(wildcard ~/dev/pocl/drops_vortex_cc)
POCLRT_PATH ?= $(wildcard ..)
LLVM_LIB_PATH ?= $(wildcard ../compiler/lib)
POCLCC_PATH ?= $(wildcard ../compiler)
POCLRT_PATH ?= $(wildcard ../runtime)
DRIVER_PATH ?= $(wildcard ../../../driver/sw)
CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-errors
@ -16,7 +16,7 @@ SRCS = main.cc
all: $(PROJECT)
kernel.pocl: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(LLVM_LIB_PATH):$(POCLCC_PATH)/lib:$(DRIVER_PATH)/simx $(POCLCC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
$(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@

Binary file not shown.