Disallow vectors whose magnitudes will not fit in a float (#100519)

While we check for a magnitude to not be `0f`, we don't verify that it
actually fits within a `float` value. 

This commit returns a failure and rejects `float` vectors whose
magnitude don't fit within a 32bit `float` value.

We don't support `float64` (aka `double`) values for vector search and
should fail when a user attempts to index a vector that requires storing
as `double`.

closes: https://github.com/elastic/elasticsearch/issues/100471
This commit is contained in:
Benjamin Trent 2023-10-11 06:56:54 -07:00 committed by GitHub
parent e411b57baf
commit 18c5246f1a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 51 additions and 1 deletions

View file

@ -0,0 +1,5 @@
pr: 100519
summary: Disallow vectors whose magnitudes will not fit in a float
area: Vector Search
type: bug
issues: []

View file

@ -458,6 +458,15 @@ public class DenseVectorFieldMapper extends FieldMapper {
) {
StringBuilder errorBuilder = null;
if (Float.isNaN(squaredMagnitude) || Float.isInfinite(squaredMagnitude)) {
errorBuilder = new StringBuilder(
"NaN or Infinite magnitude detected, this usually means the vector values are too extreme to fit within a float."
);
}
if (errorBuilder != null) {
throw new IllegalArgumentException(appender.apply(errorBuilder).toString());
}
if (similarity == VectorSimilarity.DOT_PRODUCT && Math.abs(squaredMagnitude - 1.0f) > 1e-4f) {
errorBuilder = new StringBuilder(
"The [" + VectorSimilarity.DOT_PRODUCT + "] similarity can only be used with unit-length vectors."
@ -886,7 +895,9 @@ public class DenseVectorFieldMapper extends FieldMapper {
}
elementType.checkVectorBounds(queryVector);
if (similarity == VectorSimilarity.DOT_PRODUCT || similarity == VectorSimilarity.COSINE) {
if (similarity == VectorSimilarity.DOT_PRODUCT
|| similarity == VectorSimilarity.COSINE
|| similarity == VectorSimilarity.MAX_INNER_PRODUCT) {
float squaredMagnitude = VectorUtil.dotProduct(queryVector, queryVector);
elementType.checkVectorMagnitude(similarity, ElementType.errorFloatElementsAppender(queryVector), squaredMagnitude);
}

View file

@ -413,6 +413,40 @@ public class DenseVectorFieldMapperTests extends MapperTestCase {
);
}
public void testMaxInnerProductWithValidNorm() throws Exception {
DocumentMapper mapper = createDocumentMapper(
fieldMapping(
b -> b.field("type", "dense_vector")
.field("dims", 3)
.field("index", true)
.field("similarity", VectorSimilarity.MAX_INNER_PRODUCT)
)
);
float[] vector = { -12.1f, 2.7f, -4 };
// Shouldn't throw
mapper.parse(source(b -> b.array("field", vector)));
}
public void testWithExtremeFloatVector() throws Exception {
for (VectorSimilarity vs : List.of(VectorSimilarity.COSINE, VectorSimilarity.DOT_PRODUCT, VectorSimilarity.COSINE)) {
DocumentMapper mapper = createDocumentMapper(
fieldMapping(b -> b.field("type", "dense_vector").field("dims", 3).field("index", true).field("similarity", vs))
);
float[] vector = { 0.07247924f, -4.310546E-11f, -1.7255947E30f };
DocumentParsingException e = expectThrows(
DocumentParsingException.class,
() -> mapper.parse(source(b -> b.array("field", vector)))
);
assertNotNull(e.getCause());
assertThat(
e.getCause().getMessage(),
containsString(
"NaN or Infinite magnitude detected, this usually means the vector values are too extreme to fit within a float."
)
);
}
}
public void testInvalidParameters() {
MapperParsingException e = expectThrows(
MapperParsingException.class,