Fix vec_caps to test for OS support too (on x64) (#126911)

On x64, we are testing if we support vector capabilities (1 = "basic" = AVX2, 2 = "advanced" = AVX-512) in order to enable and choose a native implementation for some vector functions, using CPUID.

However, under some circumstances, this is not sufficient: the OS on which we are running also needs to support AVX/AVX2 etc; basically, it needs to acknowledge it knows about the additional register and that it is able to handle them e.g. in context switches. To do that we need to a) test if the CPU has xsave feature and b) use the xgetbv to test if the OS set it (declaring it supports AVX/AVX2/etc).

In most cases this is not needed, as all modern OSes do that, but for some virtualized situations (hypervisors, emulators, etc.) all the component along the chain must support it, and in some cases this is not a given.

This PR introduces a change to the x64 version of vec_caps to check for OS support too, and a warning on the Java side in case the CPU supports vector capabilities but those are not enabled at OS level.

Tested by passing noxsave to my linux box kernel boot options, and ensuring that the avx flags "disappear" from /proc/cpuinfo, and we fall back to the "no native vector" case.

Fixes #126809
This commit is contained in:
Lorenzo Dematté 2025-04-16 16:06:46 +02:00 committed by GitHub
parent e74c237059
commit 115062c643
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 52 additions and 7 deletions

View file

@ -0,0 +1,6 @@
pr: 126911
summary: Fix `vec_caps` to test for OS support too (on x64)
area: Vector Search
type: bug
issues:
- 126809

View file

@ -19,7 +19,7 @@ configurations {
}
var zstdVersion = "1.5.5"
var vecVersion = "1.0.10"
var vecVersion = "1.0.11"
repositories {
exclusiveContent {

View file

@ -41,7 +41,7 @@ public final class JdkVectorLibrary implements VectorLibrary {
try {
int caps = (int) vecCaps$mh.invokeExact();
logger.info("vec_caps=" + caps);
if (caps != 0) {
if (caps > 0) {
if (caps == 2) {
dot7u$mh = downcallHandle(
"dot7u_2",
@ -67,6 +67,11 @@ public final class JdkVectorLibrary implements VectorLibrary {
}
INSTANCE = new JdkVectorSimilarityFunctions();
} else {
if (caps < 0) {
logger.warn("""
Your CPU supports vector capabilities, but they are disabled at OS level. For optimal performance, \
enable them in your OS/Hypervisor/VM/container""");
}
dot7u$mh = null;
sqr7u$mh = null;
INSTANCE = null;

View file

@ -11,7 +11,7 @@ apply plugin: 'cpp'
var os = org.gradle.internal.os.OperatingSystem.current()
// To update this library run publish_vec_binaries.sh ( or ./gradlew vecSharedLibrary )
// To update this library run publish_vec_binaries.sh ( or ./gradlew buildSharedLibrary )
// Or
// For local development, build the docker image with:
// docker build --platform linux/arm64 --progress=plain --file=Dockerfile.aarch64 . (for aarch64)

View file

@ -20,7 +20,7 @@ if [ -z "$ARTIFACTORY_API_KEY" ]; then
exit 1;
fi
VERSION="1.0.10"
VERSION="1.0.11"
ARTIFACTORY_REPOSITORY="${ARTIFACTORY_REPOSITORY:-https://artifactory.elastic.dev/artifactory/elasticsearch-native/}"
TEMP=$(mktemp -d)

View file

@ -46,6 +46,23 @@ static inline void cpuid(int output[4], int functionNumber) {
#endif
}
// Multi-platform XGETBV "intrinsic"
static inline int64_t xgetbv(int ctr) {
#if defined(__GNUC__) || defined(__clang__)
// use inline assembly, Gnu/AT&T syntax
uint32_t a, d;
__asm("xgetbv" : "=a"(a),"=d"(d) : "c"(ctr) : );
return a | (((uint64_t) d) << 32);
#elif (defined (_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined (__INTEL_COMPILER) && __INTEL_COMPILER >= 1200)
// Microsoft or Intel compiler supporting _xgetbv intrinsic
return _xgetbv(ctr);
#else
#error Unsupported compiler
#endif
}
// Utility function to horizontally add 8 32-bit integers
static inline int hsum_i32_8(const __m256i a) {
const __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1));
@ -57,11 +74,20 @@ static inline int hsum_i32_8(const __m256i a) {
EXPORT int vec_caps() {
int cpuInfo[4] = {-1};
// Calling __cpuid with 0x0 as the function_id argument
// Calling CPUID function 0x0 as the function_id argument
// gets the number of the highest valid function ID.
cpuid(cpuInfo, 0);
int functionIds = cpuInfo[0];
if (functionIds == 0) {
// No CPUID functions
return 0;
}
// call CPUID function 0x1 for feature flags
cpuid(cpuInfo, 1);
int hasOsXsave = (cpuInfo[2] & (1 << 27)) != 0;
int avxEnabledInOS = hasOsXsave && ((xgetbv(0) & 6) == 6);
if (functionIds >= 7) {
// call CPUID function 0x7 for AVX2/512 flags
cpuid(cpuInfo, 7);
int ebx = cpuInfo[1];
int ecx = cpuInfo[2];
@ -72,10 +98,18 @@ EXPORT int vec_caps() {
// int avx512_vnni = (ecx & 0x00000800) != 0;
// if (avx512 && avx512_vnni) {
if (avx512) {
return 2;
if (avxEnabledInOS) {
return 2;
} else {
return -2;
}
}
if (avx2) {
return 1;
if (avxEnabledInOS) {
return 1;
} else {
return -1;
}
}
}
return 0;