mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-04-19 04:45:07 -04:00
Fix vec_caps to test for OS support too (on x64) (#126911)
On x64, we are testing if we support vector capabilities (1 = "basic" = AVX2, 2 = "advanced" = AVX-512) in order to enable and choose a native implementation for some vector functions, using CPUID. However, under some circumstances, this is not sufficient: the OS on which we are running also needs to support AVX/AVX2 etc; basically, it needs to acknowledge it knows about the additional register and that it is able to handle them e.g. in context switches. To do that we need to a) test if the CPU has xsave feature and b) use the xgetbv to test if the OS set it (declaring it supports AVX/AVX2/etc). In most cases this is not needed, as all modern OSes do that, but for some virtualized situations (hypervisors, emulators, etc.) all the component along the chain must support it, and in some cases this is not a given. This PR introduces a change to the x64 version of vec_caps to check for OS support too, and a warning on the Java side in case the CPU supports vector capabilities but those are not enabled at OS level. Tested by passing noxsave to my linux box kernel boot options, and ensuring that the avx flags "disappear" from /proc/cpuinfo, and we fall back to the "no native vector" case. Fixes #126809
This commit is contained in:
parent
e74c237059
commit
115062c643
6 changed files with 52 additions and 7 deletions
6
docs/changelog/126911.yaml
Normal file
6
docs/changelog/126911.yaml
Normal file
|
@ -0,0 +1,6 @@
|
|||
pr: 126911
|
||||
summary: Fix `vec_caps` to test for OS support too (on x64)
|
||||
area: Vector Search
|
||||
type: bug
|
||||
issues:
|
||||
- 126809
|
|
@ -19,7 +19,7 @@ configurations {
|
|||
}
|
||||
|
||||
var zstdVersion = "1.5.5"
|
||||
var vecVersion = "1.0.10"
|
||||
var vecVersion = "1.0.11"
|
||||
|
||||
repositories {
|
||||
exclusiveContent {
|
||||
|
|
|
@ -41,7 +41,7 @@ public final class JdkVectorLibrary implements VectorLibrary {
|
|||
try {
|
||||
int caps = (int) vecCaps$mh.invokeExact();
|
||||
logger.info("vec_caps=" + caps);
|
||||
if (caps != 0) {
|
||||
if (caps > 0) {
|
||||
if (caps == 2) {
|
||||
dot7u$mh = downcallHandle(
|
||||
"dot7u_2",
|
||||
|
@ -67,6 +67,11 @@ public final class JdkVectorLibrary implements VectorLibrary {
|
|||
}
|
||||
INSTANCE = new JdkVectorSimilarityFunctions();
|
||||
} else {
|
||||
if (caps < 0) {
|
||||
logger.warn("""
|
||||
Your CPU supports vector capabilities, but they are disabled at OS level. For optimal performance, \
|
||||
enable them in your OS/Hypervisor/VM/container""");
|
||||
}
|
||||
dot7u$mh = null;
|
||||
sqr7u$mh = null;
|
||||
INSTANCE = null;
|
||||
|
|
|
@ -11,7 +11,7 @@ apply plugin: 'cpp'
|
|||
|
||||
var os = org.gradle.internal.os.OperatingSystem.current()
|
||||
|
||||
// To update this library run publish_vec_binaries.sh ( or ./gradlew vecSharedLibrary )
|
||||
// To update this library run publish_vec_binaries.sh ( or ./gradlew buildSharedLibrary )
|
||||
// Or
|
||||
// For local development, build the docker image with:
|
||||
// docker build --platform linux/arm64 --progress=plain --file=Dockerfile.aarch64 . (for aarch64)
|
||||
|
|
|
@ -20,7 +20,7 @@ if [ -z "$ARTIFACTORY_API_KEY" ]; then
|
|||
exit 1;
|
||||
fi
|
||||
|
||||
VERSION="1.0.10"
|
||||
VERSION="1.0.11"
|
||||
ARTIFACTORY_REPOSITORY="${ARTIFACTORY_REPOSITORY:-https://artifactory.elastic.dev/artifactory/elasticsearch-native/}"
|
||||
TEMP=$(mktemp -d)
|
||||
|
||||
|
|
|
@ -46,6 +46,23 @@ static inline void cpuid(int output[4], int functionNumber) {
|
|||
#endif
|
||||
}
|
||||
|
||||
// Multi-platform XGETBV "intrinsic"
|
||||
static inline int64_t xgetbv(int ctr) {
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
// use inline assembly, Gnu/AT&T syntax
|
||||
uint32_t a, d;
|
||||
__asm("xgetbv" : "=a"(a),"=d"(d) : "c"(ctr) : );
|
||||
return a | (((uint64_t) d) << 32);
|
||||
|
||||
#elif (defined (_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined (__INTEL_COMPILER) && __INTEL_COMPILER >= 1200)
|
||||
// Microsoft or Intel compiler supporting _xgetbv intrinsic
|
||||
return _xgetbv(ctr);
|
||||
|
||||
#else
|
||||
#error Unsupported compiler
|
||||
#endif
|
||||
}
|
||||
|
||||
// Utility function to horizontally add 8 32-bit integers
|
||||
static inline int hsum_i32_8(const __m256i a) {
|
||||
const __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1));
|
||||
|
@ -57,11 +74,20 @@ static inline int hsum_i32_8(const __m256i a) {
|
|||
|
||||
EXPORT int vec_caps() {
|
||||
int cpuInfo[4] = {-1};
|
||||
// Calling __cpuid with 0x0 as the function_id argument
|
||||
// Calling CPUID function 0x0 as the function_id argument
|
||||
// gets the number of the highest valid function ID.
|
||||
cpuid(cpuInfo, 0);
|
||||
int functionIds = cpuInfo[0];
|
||||
if (functionIds == 0) {
|
||||
// No CPUID functions
|
||||
return 0;
|
||||
}
|
||||
// call CPUID function 0x1 for feature flags
|
||||
cpuid(cpuInfo, 1);
|
||||
int hasOsXsave = (cpuInfo[2] & (1 << 27)) != 0;
|
||||
int avxEnabledInOS = hasOsXsave && ((xgetbv(0) & 6) == 6);
|
||||
if (functionIds >= 7) {
|
||||
// call CPUID function 0x7 for AVX2/512 flags
|
||||
cpuid(cpuInfo, 7);
|
||||
int ebx = cpuInfo[1];
|
||||
int ecx = cpuInfo[2];
|
||||
|
@ -72,10 +98,18 @@ EXPORT int vec_caps() {
|
|||
// int avx512_vnni = (ecx & 0x00000800) != 0;
|
||||
// if (avx512 && avx512_vnni) {
|
||||
if (avx512) {
|
||||
return 2;
|
||||
if (avxEnabledInOS) {
|
||||
return 2;
|
||||
} else {
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
if (avx2) {
|
||||
return 1;
|
||||
if (avxEnabledInOS) {
|
||||
return 1;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
|
Loading…
Add table
Reference in a new issue