Add an error margin when comparing floats. (#129721)

We add a margin of error when comparing floats to the DynamicFieldMatcher to account for a small loss of accuracy when loading fields from synthetic source.
This commit is contained in:
Mary Gouseti 2025-06-23 18:46:46 +03:00 committed by GitHub
parent 816caf70fc
commit d859366d4b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 37 additions and 26 deletions

View file

@ -17,14 +17,14 @@ import org.elasticsearch.xcontent.XContentBuilder;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.function.Supplier;
import static org.elasticsearch.datageneration.matchers.Messages.formatErrorMessage;
import static org.elasticsearch.datageneration.matchers.Messages.prettyPrintCollections;
class DynamicFieldMatcher {
private static final double FLOAT_ERROR_MARGIN = 1e-8;
private final XContentBuilder actualMappings;
private final Settings.Builder actualSettings;
private final XContentBuilder expectedMappings;
@ -62,31 +62,51 @@ class DynamicFieldMatcher {
var normalizedActual = normalizeDoubles(actual);
var normalizedExpected = normalizeDoubles(expected);
Supplier<MatchResult> noMatchSupplier = () -> MatchResult.noMatch(
formatErrorMessage(
actualMappings,
actualSettings,
expectedMappings,
expectedSettings,
"Values of dynamically mapped field containing double values don't match after normalization, normalized "
+ prettyPrintCollections(normalizedActual, normalizedExpected)
)
);
return normalizedActual.equals(normalizedExpected)
? MatchResult.match()
: MatchResult.noMatch(
formatErrorMessage(
actualMappings,
actualSettings,
expectedMappings,
expectedSettings,
"Values of dynamically mapped field containing double values don't match after normalization, normalized "
+ prettyPrintCollections(normalizedActual, normalizedExpected)
)
);
if (normalizedActual.size() != normalizedExpected.size()) {
return noMatchSupplier.get();
}
for (int i = 0; i < normalizedActual.size(); i++) {
if (floatEquals(normalizedActual.get(i), normalizedExpected.get(i)) == false) {
return noMatchSupplier.get();
}
}
return MatchResult.match();
}
return matchWithGenericMatcher(actual, expected);
}
private static Set<Float> normalizeDoubles(List<Object> values) {
/**
* We make the normalisation of double values stricter than {@link SourceTransforms#normalizeValues} to facilitate the equality of the
* values within a margin of error. Synthetic source does support duplicate values and preserves the order, but it loses some accuracy,
* this is why the margin of error is very important. In the future, we can make {@link SourceTransforms#normalizeValues} also stricter.
*/
private static List<Float> normalizeDoubles(List<Object> values) {
if (values == null) {
return Set.of();
return List.of();
}
Function<Object, Float> toFloat = (o) -> o instanceof Number n ? n.floatValue() : Float.parseFloat((String) o);
return values.stream().filter(Objects::nonNull).map(toFloat).collect(Collectors.toSet());
// We skip nulls because they trip the pretty print collections.
return values.stream().filter(Objects::nonNull).map(toFloat).toList();
}
private static boolean floatEquals(Float actual, Float expected) {
return Math.abs(actual - expected) < FLOAT_ERROR_MARGIN;
}
private MatchResult matchWithGenericMatcher(List<Object> actualValues, List<Object> expectedValues) {