mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 17:34:17 -04:00
Disallow vectors whose magnitudes will not fit in a float (#100519)
While we check for a magnitude to not be `0f`, we don't verify that it actually fits within a `float` value. This commit returns a failure and rejects `float` vectors whose magnitude don't fit within a 32bit `float` value. We don't support `float64` (aka `double`) values for vector search and should fail when a user attempts to index a vector that requires storing as `double`. closes: https://github.com/elastic/elasticsearch/issues/100471
This commit is contained in:
parent
e411b57baf
commit
18c5246f1a
3 changed files with 51 additions and 1 deletions
5
docs/changelog/100519.yaml
Normal file
5
docs/changelog/100519.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 100519
|
||||
summary: Disallow vectors whose magnitudes will not fit in a float
|
||||
area: Vector Search
|
||||
type: bug
|
||||
issues: []
|
|
@ -458,6 +458,15 @@ public class DenseVectorFieldMapper extends FieldMapper {
|
|||
) {
|
||||
StringBuilder errorBuilder = null;
|
||||
|
||||
if (Float.isNaN(squaredMagnitude) || Float.isInfinite(squaredMagnitude)) {
|
||||
errorBuilder = new StringBuilder(
|
||||
"NaN or Infinite magnitude detected, this usually means the vector values are too extreme to fit within a float."
|
||||
);
|
||||
}
|
||||
if (errorBuilder != null) {
|
||||
throw new IllegalArgumentException(appender.apply(errorBuilder).toString());
|
||||
}
|
||||
|
||||
if (similarity == VectorSimilarity.DOT_PRODUCT && Math.abs(squaredMagnitude - 1.0f) > 1e-4f) {
|
||||
errorBuilder = new StringBuilder(
|
||||
"The [" + VectorSimilarity.DOT_PRODUCT + "] similarity can only be used with unit-length vectors."
|
||||
|
@ -886,7 +895,9 @@ public class DenseVectorFieldMapper extends FieldMapper {
|
|||
}
|
||||
elementType.checkVectorBounds(queryVector);
|
||||
|
||||
if (similarity == VectorSimilarity.DOT_PRODUCT || similarity == VectorSimilarity.COSINE) {
|
||||
if (similarity == VectorSimilarity.DOT_PRODUCT
|
||||
|| similarity == VectorSimilarity.COSINE
|
||||
|| similarity == VectorSimilarity.MAX_INNER_PRODUCT) {
|
||||
float squaredMagnitude = VectorUtil.dotProduct(queryVector, queryVector);
|
||||
elementType.checkVectorMagnitude(similarity, ElementType.errorFloatElementsAppender(queryVector), squaredMagnitude);
|
||||
}
|
||||
|
|
|
@ -413,6 +413,40 @@ public class DenseVectorFieldMapperTests extends MapperTestCase {
|
|||
);
|
||||
}
|
||||
|
||||
public void testMaxInnerProductWithValidNorm() throws Exception {
|
||||
DocumentMapper mapper = createDocumentMapper(
|
||||
fieldMapping(
|
||||
b -> b.field("type", "dense_vector")
|
||||
.field("dims", 3)
|
||||
.field("index", true)
|
||||
.field("similarity", VectorSimilarity.MAX_INNER_PRODUCT)
|
||||
)
|
||||
);
|
||||
float[] vector = { -12.1f, 2.7f, -4 };
|
||||
// Shouldn't throw
|
||||
mapper.parse(source(b -> b.array("field", vector)));
|
||||
}
|
||||
|
||||
public void testWithExtremeFloatVector() throws Exception {
|
||||
for (VectorSimilarity vs : List.of(VectorSimilarity.COSINE, VectorSimilarity.DOT_PRODUCT, VectorSimilarity.COSINE)) {
|
||||
DocumentMapper mapper = createDocumentMapper(
|
||||
fieldMapping(b -> b.field("type", "dense_vector").field("dims", 3).field("index", true).field("similarity", vs))
|
||||
);
|
||||
float[] vector = { 0.07247924f, -4.310546E-11f, -1.7255947E30f };
|
||||
DocumentParsingException e = expectThrows(
|
||||
DocumentParsingException.class,
|
||||
() -> mapper.parse(source(b -> b.array("field", vector)))
|
||||
);
|
||||
assertNotNull(e.getCause());
|
||||
assertThat(
|
||||
e.getCause().getMessage(),
|
||||
containsString(
|
||||
"NaN or Infinite magnitude detected, this usually means the vector values are too extreme to fit within a float."
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
public void testInvalidParameters() {
|
||||
MapperParsingException e = expectThrows(
|
||||
MapperParsingException.class,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue