ESQL: Fix precision of scaled_float field values retrieved from stored source (#122586)

This commit is contained in:
kanoshiou 2025-02-21 06:01:34 +08:00 committed by GitHub
parent 521f8554c3
commit de41d5704b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 35 additions and 27 deletions

View file

@ -0,0 +1,6 @@
pr: 122586
summary: "ESQL: Fix inconsistent results in using scaled_float field"
area: ES|QL
type: bug
issues:
- 122547

View file

@ -321,8 +321,7 @@ public class ScaledFloatFieldMapper extends FieldMapper {
return BlockLoader.CONSTANT_NULLS; return BlockLoader.CONSTANT_NULLS;
} }
if (hasDocValues()) { if (hasDocValues()) {
double scalingFactorInverse = 1d / scalingFactor; return new BlockDocValuesReader.DoublesBlockLoader(name(), l -> l / scalingFactor);
return new BlockDocValuesReader.DoublesBlockLoader(name(), l -> l * scalingFactorInverse);
} }
if (isSyntheticSource) { if (isSyntheticSource) {
return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) { return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) {

View file

@ -26,16 +26,6 @@ public class ScaledFloatFieldBlockLoaderTests extends NumberFieldBlockLoaderTest
protected Double convert(Number value, Map<String, Object> fieldMapping) { protected Double convert(Number value, Map<String, Object> fieldMapping) {
var scalingFactor = ((Number) fieldMapping.get("scaling_factor")).doubleValue(); var scalingFactor = ((Number) fieldMapping.get("scaling_factor")).doubleValue();
var docValues = (boolean) fieldMapping.getOrDefault("doc_values", false);
// There is a slight inconsistency between values that are read from doc_values and from source.
// Due to how precision reduction is applied to source values so that they are consistent with doc_values.
// See #122547.
if (docValues) {
var reverseScalingFactor = 1d / scalingFactor;
return Math.round(value.doubleValue() * scalingFactor) * reverseScalingFactor;
}
// Adjust values coming from source to the way they are stored in doc_values. // Adjust values coming from source to the way they are stored in doc_values.
// See mapper implementation. // See mapper implementation.
return Math.round(value.doubleValue() * scalingFactor) / scalingFactor; return Math.round(value.doubleValue() * scalingFactor) / scalingFactor;

View file

@ -660,8 +660,6 @@ public final class CsvTestUtils {
private static double scaledFloat(String value, String factor) { private static double scaledFloat(String value, String factor) {
double scalingFactor = Double.parseDouble(factor); double scalingFactor = Double.parseDouble(factor);
// this extra division introduces extra imprecision in the following multiplication, but this is how ScaledFloatFieldMapper works. return new BigDecimal(value).multiply(BigDecimal.valueOf(scalingFactor)).longValue() / scalingFactor;
double scalingFactorInverse = 1d / scalingFactor;
return new BigDecimal(value).multiply(BigDecimal.valueOf(scalingFactor)).longValue() * scalingFactorInverse;
} }
} }

View file

@ -8,35 +8,39 @@ a:double | b:double
; ;
inDouble inDouble
required_capability: fix_precision_of_scaled_float_fields
from employees | keep emp_no, height, height.float, height.half_float, height.scaled_float | where height in (2.03, 2.0299999713897705, 2.029296875, 2.0300000000000002) | sort emp_no; from employees | keep emp_no, height, height.float, height.half_float, height.scaled_float | where height in (2.03, 2.0299999713897705, 2.029296875, 2.0300000000000002) | sort emp_no;
emp_no:integer |height:double |height.float:double |height.half_float:double |height.scaled_float:double emp_no:integer |height:double |height.float:double |height.half_float:double |height.scaled_float:double
10001 |2.03 |2.0299999713897705 |2.029296875 |2.0300000000000002 10001 |2.03 |2.0299999713897705 |2.029296875 |2.03
10090 |2.03 |2.0299999713897705 |2.029296875 |2.0300000000000002 10090 |2.03 |2.0299999713897705 |2.029296875 |2.03
; ;
inFloat inFloat
required_capability: fix_precision_of_scaled_float_fields
from employees | keep emp_no, height, height.float, height.half_float, height.scaled_float | where height.float in (2.03, 2.0299999713897705, 2.029296875, 2.0300000000000002) | sort emp_no; from employees | keep emp_no, height, height.float, height.half_float, height.scaled_float | where height.float in (2.03, 2.0299999713897705, 2.029296875, 2.0300000000000002) | sort emp_no;
emp_no:integer |height:double |height.float:double |height.half_float:double |height.scaled_float:double emp_no:integer |height:double |height.float:double |height.half_float:double |height.scaled_float:double
10001 |2.03 |2.0299999713897705 |2.029296875 |2.0300000000000002 10001 |2.03 |2.0299999713897705 |2.029296875 |2.03
10090 |2.03 |2.0299999713897705 |2.029296875 |2.0300000000000002 10090 |2.03 |2.0299999713897705 |2.029296875 |2.03
; ;
inHalfFloat inHalfFloat
required_capability: fix_precision_of_scaled_float_fields
from employees | keep emp_no, height, height.float, height.half_float, height.scaled_float | where height.half_float in (2.03, 2.0299999713897705, 2.029296875, 2.0300000000000002) | sort emp_no; from employees | keep emp_no, height, height.float, height.half_float, height.scaled_float | where height.half_float in (2.03, 2.0299999713897705, 2.029296875, 2.0300000000000002) | sort emp_no;
emp_no:integer |height:double |height.float:double |height.half_float:double |height.scaled_float:double emp_no:integer |height:double |height.float:double |height.half_float:double |height.scaled_float:double
10001 |2.03 |2.0299999713897705 |2.029296875 |2.0300000000000002 10001 |2.03 |2.0299999713897705 |2.029296875 |2.03
10090 |2.03 |2.0299999713897705 |2.029296875 |2.0300000000000002 10090 |2.03 |2.0299999713897705 |2.029296875 |2.03
; ;
inScaledFloat inScaledFloat
required_capability: fix_precision_of_scaled_float_fields
from employees | keep emp_no, height, height.float, height.half_float, height.scaled_float | where height.scaled_float in (2.03, 2.0299999713897705, 2.029296875, 2.0300000000000002) | sort emp_no; from employees | keep emp_no, height, height.float, height.half_float, height.scaled_float | where height.scaled_float in (2.03, 2.0299999713897705, 2.029296875, 2.0300000000000002) | sort emp_no;
emp_no:integer |height:double |height.float:double |height.half_float:double |height.scaled_float:double emp_no:integer |height:double |height.float:double |height.half_float:double |height.scaled_float:double
10001 |2.03 |2.0299999713897705 |2.029296875 |2.0300000000000002 10001 |2.03 |2.0299999713897705 |2.029296875 |2.03
10090 |2.03 |2.0299999713897705 |2.029296875 |2.0300000000000002 10090 |2.03 |2.0299999713897705 |2.029296875 |2.03
; ;
convertFromDouble convertFromDouble

View file

@ -266,10 +266,11 @@ height:double | languages.long:long | still_hired:boolean
; ;
simpleEvalWithSortAndLimitOne simpleEvalWithSortAndLimitOne
required_capability: fix_precision_of_scaled_float_fields
from employees | eval x = languages + 7 | sort x, avg_worked_seconds | limit 1; from employees | eval x = languages + 7 | sort x, avg_worked_seconds | limit 1;
avg_worked_seconds:long | birth_date:date | emp_no:integer | first_name:keyword | gender:keyword | height:double | height.float:double | height.half_float:double | height.scaled_float:double | hire_date:date | is_rehired:boolean | job_positions:keyword | languages:integer | languages.byte:integer | languages.long:long | languages.short:integer | last_name:keyword | salary:integer | salary_change:double | salary_change.int:integer | salary_change.keyword:keyword |salary_change.long:long | still_hired:boolean | x:integer avg_worked_seconds:long | birth_date:date | emp_no:integer | first_name:keyword | gender:keyword | height:double | height.float:double | height.half_float:double | height.scaled_float:double | hire_date:date | is_rehired:boolean | job_positions:keyword | languages:integer | languages.byte:integer | languages.long:long | languages.short:integer | last_name:keyword | salary:integer | salary_change:double | salary_change.int:integer | salary_change.keyword:keyword |salary_change.long:long | still_hired:boolean | x:integer
208374744 |1956-11-14T00:00:00.000Z|10033 |null |M |1.63 |1.6299999952316284|1.6298828125 |1.6300000000000001 |1987-03-18T00:00:00.000Z|true |null |1 |1 |1 |1 |Merlo |70011 |null |null |null |null |false |8 208374744 |1956-11-14T00:00:00.000Z|10033 |null |M |1.63 |1.6299999952316284|1.6298828125 |1.63 |1987-03-18T00:00:00.000Z|true |null |1 |1 |1 |1 |Merlo |70011 |null |null |null |null |false |8
; ;
evalOfAverageValue evalOfAverageValue

View file

@ -319,10 +319,11 @@ a | a
//see https://github.com/elastic/elasticsearch/issues/103331 //see https://github.com/elastic/elasticsearch/issues/103331
keepStarMvExpand#[skip:-8.12.99] keepStarMvExpand#[skip:-8.12.99]
required_capability: fix_precision_of_scaled_float_fields
from employees | where emp_no == 10001 | keep * | mv_expand first_name; from employees | where emp_no == 10001 | keep * | mv_expand first_name;
avg_worked_seconds:long | birth_date:date | emp_no:integer | first_name:keyword | gender:keyword | height:double | height.float:double | height.half_float:double | height.scaled_float:double | hire_date:date | is_rehired:boolean | job_positions:keyword | languages:integer | languages.byte:integer | languages.long:long | languages.short:integer | last_name:keyword | salary:integer | salary_change:double | salary_change.int:integer | salary_change.keyword:keyword | salary_change.long:long | still_hired:boolean avg_worked_seconds:long | birth_date:date | emp_no:integer | first_name:keyword | gender:keyword | height:double | height.float:double | height.half_float:double | height.scaled_float:double | hire_date:date | is_rehired:boolean | job_positions:keyword | languages:integer | languages.byte:integer | languages.long:long | languages.short:integer | last_name:keyword | salary:integer | salary_change:double | salary_change.int:integer | salary_change.keyword:keyword | salary_change.long:long | still_hired:boolean
268728049 | 1953-09-02T00:00:00.000Z | 10001 | Georgi | M | 2.03 | 2.0299999713897705 | 2.029296875 | 2.0300000000000002 | 1986-06-26T00:00:00.000Z | [false, true] | [Accountant, Senior Python Developer] | 2 | 2 | 2 | 2 | Facello | 57305 | 1.19 | 1 | 1.19 | 1 | true 268728049 | 1953-09-02T00:00:00.000Z | 10001 | Georgi | M | 2.03 | 2.0299999713897705 | 2.029296875 | 2.03 | 1986-06-26T00:00:00.000Z | [false, true] | [Accountant, Senior Python Developer] | 2 | 2 | 2 | 2 | Facello | 57305 | 1.19 | 1 | 1.19 | 1 | true
; ;

View file

@ -159,11 +159,12 @@ y:integer | x:date
; ;
renameIntertwinedWithSort renameIntertwinedWithSort
required_capability: fix_precision_of_scaled_float_fields
FROM employees | eval x = salary | rename x as y | rename y as x | sort x | rename x as y | limit 10; FROM employees | eval x = salary | rename x as y | rename y as x | sort x | rename x as y | limit 10;
avg_worked_seconds:l | birth_date:date | emp_no:i | first_name:s | gender:s | height:d | height.float:d | height.half_float:d | height.scaled_float:d| hire_date:date | is_rehired:bool | job_positions:s | languages:i | languages.byte:i | languages.long:l | languages.short:i | last_name:s | salary:i | salary_change:d | salary_change.int:i | salary_change.keyword:s | salary_change.long:l | still_hired:bool | y:i avg_worked_seconds:l | birth_date:date | emp_no:i | first_name:s | gender:s | height:d | height.float:d | height.half_float:d | height.scaled_float:d| hire_date:date | is_rehired:bool | job_positions:s | languages:i | languages.byte:i | languages.long:l | languages.short:i | last_name:s | salary:i | salary_change:d | salary_change.int:i | salary_change.keyword:s | salary_change.long:l | still_hired:bool | y:i
390266432 | 1959-08-19T00:00:00.000Z | 10015 | Guoxiang | null | 1.66 | 1.659999966621399 | 1.66015625 | 1.6600000000000001 | 1987-07-02T00:00:00.000Z | [false, false, false, true]| [Head Human Resources, Junior Developer, Principal Support Engineer, Support Engineer] | 5 | 5 | 5 | 5 | Nooteboom | 25324 | [12.4, 14.25] | [12, 14] | [12.40, 14.25] | [12, 14] | true | 25324 390266432 | 1959-08-19T00:00:00.000Z | 10015 | Guoxiang | null | 1.66 | 1.659999966621399 | 1.66015625 | 1.66 | 1987-07-02T00:00:00.000Z | [false, false, false, true]| [Head Human Resources, Junior Developer, Principal Support Engineer, Support Engineer] | 5 | 5 | 5 | 5 | Nooteboom | 25324 | [12.4, 14.25] | [12, 14] | [12.40, 14.25] | [12, 14] | true | 25324
203838153 | 1953-02-08T00:00:00.000Z | 10035 | null | M | 1.81 | 1.809999942779541 | 1.8095703125 | 1.81 | 1988-09-05T00:00:00.000Z | false | [Data Scientist, Senior Python Developer] | 5 | 5 | 5 | 5 | Chappelet | 25945 | [-6.58, -2.54] | [-6, -2] | [-2.54, -6.58] | [-6, -2] | false | 25945 203838153 | 1953-02-08T00:00:00.000Z | 10035 | null | M | 1.81 | 1.809999942779541 | 1.8095703125 | 1.81 | 1988-09-05T00:00:00.000Z | false | [Data Scientist, Senior Python Developer] | 5 | 5 | 5 | 5 | Chappelet | 25945 | [-6.58, -2.54] | [-6, -2] | [-2.54, -6.58] | [-6, -2] | false | 25945
313407352 | 1964-10-18T00:00:00.000Z | 10092 | Valdiodio | F | 1.75 | 1.75 | 1.75 | 1.75 | 1989-09-22T00:00:00.000Z | [false, false, true, true] | [Accountant, Junior Developer] | 1 | 1 | 1 | 1 | Niizuma | 25976 | [-6.77, 0.39, 8.3, 8.78] | [-6, 0, 8, 8] | [-6.77, 0.39, 8.30,8.78] | [-6, 0, 8, 8] | false | 25976 313407352 | 1964-10-18T00:00:00.000Z | 10092 | Valdiodio | F | 1.75 | 1.75 | 1.75 | 1.75 | 1989-09-22T00:00:00.000Z | [false, false, true, true] | [Accountant, Junior Developer] | 1 | 1 | 1 | 1 | Niizuma | 25976 | [-6.77, 0.39, 8.3, 8.78] | [-6, 0, 8, 8] | [-6.77, 0.39, 8.30,8.78] | [-6, 0, 8, 8] | false | 25976
248451647 | null | 10048 | Florian | M | 2.0 | 2.0 | 2.0 | 2.0 | 1985-02-24T00:00:00.000Z | [true, true] | Internship | 3 | 3 | 3 | 3 | Syrotiuk | 26436 | null | null | null | null | false | 26436 248451647 | null | 10048 | Florian | M | 2.0 | 2.0 | 2.0 | 2.0 | 1985-02-24T00:00:00.000Z | [true, true] | Internship | 3 | 3 | 3 | 3 | Syrotiuk | 26436 | null | null | null | null | false | 26436

View file

@ -52,6 +52,7 @@ h:long
; ;
countDistinctOfScaledFloat countDistinctOfScaledFloat
required_capability: fix_precision_of_scaled_float_fields
// float becomes double until https://github.com/elastic/elasticsearch-internal/issues/724 // float becomes double until https://github.com/elastic/elasticsearch-internal/issues/724
from employees | stats h = count_distinct(height.scaled_float); from employees | stats h = count_distinct(height.scaled_float);

View file

@ -734,10 +734,11 @@ null
; ;
convertFromFloats convertFromFloats
required_capability: fix_precision_of_scaled_float_fields
from employees | sort emp_no| eval double = to_string(height), float = to_string(height.float), scaled_float = to_string(height.scaled_float), half_float = to_string(height.half_float) | keep emp_no, double, float, scaled_float, half_float, height | limit 2; from employees | sort emp_no| eval double = to_string(height), float = to_string(height.float), scaled_float = to_string(height.scaled_float), half_float = to_string(height.half_float) | keep emp_no, double, float, scaled_float, half_float, height | limit 2;
emp_no:integer |double:keyword |float:keyword |scaled_float:keyword |half_float:keyword |height:double emp_no:integer |double:keyword |float:keyword |scaled_float:keyword |half_float:keyword |height:double
10001 |2.03 |2.0299999713897705|2.0300000000000002 |2.029296875 |2.03 10001 |2.03 |2.0299999713897705|2.03 |2.029296875 |2.03
10002 |2.08 |2.0799999237060547|2.08 |2.080078125 |2.08 10002 |2.08 |2.0799999237060547|2.08 |2.080078125 |2.08
; ;

View file

@ -408,6 +408,12 @@ public class EsqlCapabilities {
*/ */
FIX_PARSING_LARGE_NEGATIVE_NUMBERS, FIX_PARSING_LARGE_NEGATIVE_NUMBERS,
/**
* Fix precision of scaled_float field values retrieved from stored source
* see <a href="https://github.com/elastic/elasticsearch/issues/122547"> Slight inconsistency in ESQL using scaled_float field #122547 </a>
*/
FIX_PRECISION_OF_SCALED_FLOAT_FIELDS,
/** /**
* Fix the status code returned when trying to run count_distinct on the _source type (which is not supported). * Fix the status code returned when trying to run count_distinct on the _source type (which is not supported).
* see <a href="https://github.com/elastic/elasticsearch/issues/105240">count_distinct(_source) returns a 500 response</a> * see <a href="https://github.com/elastic/elasticsearch/issues/105240">count_distinct(_source) returns a 500 response</a>