[ES|QL] Add improved support for Elasticsearch sub-types in AST for both validation and autocomplete (#188600)

## Summary

This PR addresses https://github.com/elastic/kibana/issues/174710 and
leverages sub-types in AST for both validation and autocomplete. Changes
include:
- Expanded the automatically generated function signatures to not longer
cast to `string` or `number`. Instead, it expands out to `keyword`,
`text`, `double`, `long`, `float`, etc.
- Expanded math function signatures to reflect the true return types
(e.g double + interger => double)
- Expanded function signatures for `bucket`
- Expanded builtin functions to better support types
-  Literal support: 
- Added a `decimal` literal context → Which will be accepted by function
signatures for 'double', 'long', 'float', etc.
- Added and `integer` literal type → Which will be accepted by function
signatures for 'interger'
- Updated tests

After:
<img width="1181" alt="Screenshot 2024-07-24 at 13 06 23"
src="https://github.com/user-attachments/assets/42f32f3b-2e6e-4dff-8b20-dd60f8c45750">
<img width="1726" alt="Screenshot 2024-07-24 at 13 06 47"
src="https://github.com/user-attachments/assets/3c6f1eac-600e-4b2c-9e57-f71f79ad9ce9">
<img width="1726" alt="Screenshot 2024-07-24 at 13 07 09"
src="https://github.com/user-attachments/assets/e8b5817e-d4de-446b-8b41-c6c7b8612420">

### Checklist

Delete any items that are not applicable to this PR.

- [ ] Any text added follows [EUI's writing
guidelines](https://elastic.github.io/eui/#/guidelines/writing), uses
sentence case text and includes [i18n
support](https://github.com/elastic/kibana/blob/main/packages/kbn-i18n/README.md)
- [ ]
[Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html)
was added for features that require explanation or tutorials
- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
- [ ] [Flaky Test
Runner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was
used on any tests changed
- [ ] Any UI touched in this PR is usable by keyboard only (learn more
about [keyboard accessibility](https://webaim.org/techniques/keyboard/))
- [ ] Any UI touched in this PR does not create any new axe failures
(run axe in browser:
[FF](https://addons.mozilla.org/en-US/firefox/addon/axe-devtools/),
[Chrome](https://chrome.google.com/webstore/detail/axe-web-accessibility-tes/lhdoppojpmngadmnindnejefpokejbdd?hl=en-US))
- [ ] If a plugin configuration key changed, check if it needs to be
allowlisted in the cloud and added to the [docker
list](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker)
- [ ] This renders correctly on smaller devices using a responsive
layout. (You can test this [in your
browser](https://www.browserstack.com/guide/responsive-testing-on-local-server))
- [ ] This was checked for [cross-browser
compatibility](https://www.elastic.co/support/matrix#matrix_browsers)


### Risk Matrix

Delete this section if it is not applicable to this PR.

Before closing this PR, invite QA, stakeholders, and other developers to
identify risks that should be tested prior to the change/feature
release.

When forming the risk matrix, consider some of the following examples
and how they may potentially impact the change:

| Risk | Probability | Severity | Mitigation/Notes |

|---------------------------|-------------|----------|-------------------------|
| Multiple Spaces&mdash;unexpected behavior in non-default Kibana Space.
| Low | High | Integration tests will verify that all features are still
supported in non-default Kibana Space and when user switches between
spaces. |
| Multiple nodes&mdash;Elasticsearch polling might have race conditions
when multiple Kibana nodes are polling for the same tasks. | High | Low
| Tasks are idempotent, so executing them multiple times will not result
in logical error, but will degrade performance. To test for this case we
add plenty of unit tests around this logic and document manual testing
procedure. |
| Code should gracefully handle cases when feature X or plugin Y are
disabled. | Medium | High | Unit tests will verify that any feature flag
or plugin combination still results in our service operational. |
| [See more potential risk
examples](https://github.com/elastic/kibana/blob/main/RISK_MATRIX.mdx) |


### For maintainers

- [ ] This was checked for breaking API changes and was [labeled
appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)

---------

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
This commit is contained in:
Quynh Nguyen (Quinn) 2024-07-31 17:08:11 -05:00 committed by GitHub
parent 0be8295ed0
commit d9282a6e6e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
37 changed files with 30729 additions and 14403 deletions

View file

@ -25,6 +25,7 @@ import {
} from '../src/definitions/types';
import { FUNCTION_DESCRIBE_BLOCK_NAME } from '../src/validation/function_describe_block_name';
import { getMaxMinNumberOfParams } from '../src/validation/helpers';
import { ESQL_NUMBER_TYPES, isNumericType, isStringType } from '../src/shared/esql_types';
export const fieldNameFromType = (type: SupportedFieldType) => `${camelCase(type)}Field`;
@ -141,8 +142,8 @@ function generateImplicitDateCastingTestsForFunction(
const allSignaturesWithDateParams = definition.signatures.filter((signature) =>
signature.params.some(
(param, i) =>
param.type === 'date' &&
!definition.signatures.some((def) => getParamAtPosition(def, i)?.type === 'string') // don't count parameters that already accept a string
(param.type === 'date' || param.type === 'date_period') &&
!definition.signatures.some((def) => isStringType(getParamAtPosition(def, i)?.type)) // don't count parameters that already accept a string
)
);
@ -300,8 +301,8 @@ function generateWhereCommandTestsForEvalFunction(
// TODO: not sure why there's this constraint...
const supportedFunction = signatures.some(
({ returnType, params }) =>
['number', 'string'].includes(returnType) &&
params.every(({ type }) => ['number', 'string'].includes(type))
[...ESQL_NUMBER_TYPES, 'string'].includes(returnType) &&
params.every(({ type }) => [...ESQL_NUMBER_TYPES, 'string'].includes(type))
);
if (!supportedFunction) {
@ -311,12 +312,12 @@ function generateWhereCommandTestsForEvalFunction(
const supportedSignatures = signatures.filter(({ returnType }) =>
// TODO — not sure why the tests have this limitation... seems like any type
// that can be part of a boolean expression should be allowed in a where clause
['number', 'string'].includes(returnType)
[...ESQL_NUMBER_TYPES, 'string'].includes(returnType)
);
for (const { params, returnType, ...restSign } of supportedSignatures) {
const correctMapping = getFieldMapping(params);
testCases.set(
`from a_index | where ${returnType !== 'number' ? 'length(' : ''}${
`from a_index | where ${!isNumericType(returnType) ? 'length(' : ''}${
// hijacking a bit this function to produce a function call
getFunctionSignatures(
{
@ -326,7 +327,7 @@ function generateWhereCommandTestsForEvalFunction(
},
{ withTypes: false }
)[0].declaration
}${returnType !== 'number' ? ')' : ''} > 0`,
}${!isNumericType(returnType) ? ')' : ''} > 0`,
[]
);
@ -337,7 +338,7 @@ function generateWhereCommandTestsForEvalFunction(
supportedTypesAndFieldNames
);
testCases.set(
`from a_index | where ${returnType !== 'number' ? 'length(' : ''}${
`from a_index | where ${!isNumericType(returnType) ? 'length(' : ''}${
// hijacking a bit this function to produce a function call
getFunctionSignatures(
{
@ -347,7 +348,7 @@ function generateWhereCommandTestsForEvalFunction(
},
{ withTypes: false }
)[0].declaration
}${returnType !== 'number' ? ')' : ''} > 0`,
}${!isNumericType(returnType) ? ')' : ''} > 0`,
expectedErrors
);
}
@ -357,7 +358,7 @@ function generateWhereCommandTestsForAggFunction(
{ name, alias, signatures, ...defRest }: FunctionDefinition,
testCases: Map<string, string[]>
) {
// statsSignatures.some(({ returnType, params }) => ['number'].includes(returnType))
// statsSignatures.some(({ returnType, params }) => [...ESQL_NUMBER_TYPES].includes(returnType))
for (const { params, ...signRest } of signatures) {
const fieldMapping = getFieldMapping(params);
@ -542,7 +543,7 @@ function generateEvalCommandTestsForEvalFunction(
signatureWithGreatestNumberOfParams.params
).concat({
name: 'extraArg',
type: 'number',
type: 'integer',
});
// get the expected args from the first signature in case of errors
@ -660,7 +661,7 @@ function generateStatsCommandTestsForAggFunction(
testCases.set(`from a_index | stats var = ${correctSignature}`, []);
testCases.set(`from a_index | stats ${correctSignature}`, []);
if (signRest.returnType === 'number') {
if (isNumericType(signRest.returnType)) {
testCases.set(`from a_index | stats var = round(${correctSignature})`, []);
testCases.set(`from a_index | stats round(${correctSignature})`, []);
testCases.set(
@ -713,8 +714,8 @@ function generateStatsCommandTestsForAggFunction(
}
// test only numeric functions for now
if (params[0].type === 'number') {
const nestedBuiltin = 'numberField / 2';
if (isNumericType(params[0].type)) {
const nestedBuiltin = 'doubleField / 2';
const fieldMappingWithNestedBuiltinFunctions = getFieldMapping(params);
fieldMappingWithNestedBuiltinFunctions[0].name = nestedBuiltin;
@ -726,16 +727,16 @@ function generateStatsCommandTestsForAggFunction(
},
{ withTypes: false }
)[0].declaration;
// from a_index | STATS aggFn( numberField / 2 )
// from a_index | STATS aggFn( doubleField / 2 )
testCases.set(`from a_index | stats ${fnSignatureWithBuiltinString}`, []);
testCases.set(`from a_index | stats var0 = ${fnSignatureWithBuiltinString}`, []);
testCases.set(`from a_index | stats avg(numberField), ${fnSignatureWithBuiltinString}`, []);
testCases.set(`from a_index | stats avg(doubleField), ${fnSignatureWithBuiltinString}`, []);
testCases.set(
`from a_index | stats avg(numberField), var0 = ${fnSignatureWithBuiltinString}`,
`from a_index | stats avg(doubleField), var0 = ${fnSignatureWithBuiltinString}`,
[]
);
const nestedEvalAndBuiltin = 'round(numberField / 2)';
const nestedEvalAndBuiltin = 'round(doubleField / 2)';
const fieldMappingWithNestedEvalAndBuiltinFunctions = getFieldMapping(params);
fieldMappingWithNestedBuiltinFunctions[0].name = nestedEvalAndBuiltin;
@ -747,18 +748,18 @@ function generateStatsCommandTestsForAggFunction(
},
{ withTypes: false }
)[0].declaration;
// from a_index | STATS aggFn( round(numberField / 2) )
// from a_index | STATS aggFn( round(doubleField / 2) )
testCases.set(`from a_index | stats ${fnSignatureWithEvalAndBuiltinString}`, []);
testCases.set(`from a_index | stats var0 = ${fnSignatureWithEvalAndBuiltinString}`, []);
testCases.set(
`from a_index | stats avg(numberField), ${fnSignatureWithEvalAndBuiltinString}`,
`from a_index | stats avg(doubleField), ${fnSignatureWithEvalAndBuiltinString}`,
[]
);
testCases.set(
`from a_index | stats avg(numberField), var0 = ${fnSignatureWithEvalAndBuiltinString}`,
`from a_index | stats avg(doubleField), var0 = ${fnSignatureWithEvalAndBuiltinString}`,
[]
);
// from a_index | STATS aggFn(round(numberField / 2) ) BY round(numberField / 2)
// from a_index | STATS aggFn(round(doubleField / 2) ) BY round(doubleField / 2)
testCases.set(
`from a_index | stats ${fnSignatureWithEvalAndBuiltinString} by ${nestedEvalAndBuiltin}`,
[]
@ -768,19 +769,19 @@ function generateStatsCommandTestsForAggFunction(
[]
);
testCases.set(
`from a_index | stats avg(numberField), ${fnSignatureWithEvalAndBuiltinString} by ${nestedEvalAndBuiltin}, ipField`,
`from a_index | stats avg(doubleField), ${fnSignatureWithEvalAndBuiltinString} by ${nestedEvalAndBuiltin}, ipField`,
[]
);
testCases.set(
`from a_index | stats avg(numberField), var0 = ${fnSignatureWithEvalAndBuiltinString} by var1 = ${nestedEvalAndBuiltin}, ipField`,
`from a_index | stats avg(doubleField), var0 = ${fnSignatureWithEvalAndBuiltinString} by var1 = ${nestedEvalAndBuiltin}, ipField`,
[]
);
testCases.set(
`from a_index | stats avg(numberField), ${fnSignatureWithEvalAndBuiltinString} by ${nestedEvalAndBuiltin}, ${nestedBuiltin}`,
`from a_index | stats avg(doubleField), ${fnSignatureWithEvalAndBuiltinString} by ${nestedEvalAndBuiltin}, ${nestedBuiltin}`,
[]
);
testCases.set(
`from a_index | stats avg(numberField), var0 = ${fnSignatureWithEvalAndBuiltinString} by var1 = ${nestedEvalAndBuiltin}, ${nestedBuiltin}`,
`from a_index | stats avg(doubleField), var0 = ${fnSignatureWithEvalAndBuiltinString} by var1 = ${nestedEvalAndBuiltin}, ${nestedBuiltin}`,
[]
);
}
@ -798,7 +799,7 @@ function generateStatsCommandTestsForAggFunction(
.filter(({ constantOnly }) => !constantOnly)
.map(
(_) =>
`Aggregate function's parameters must be an attribute, literal or a non-aggregation function; found [avg(numberField)] of type [number]`
`Aggregate function's parameters must be an attribute, literal or a non-aggregation function; found [avg(doubleField)] of type [double]`
);
testCases.set(
`from a_index | stats var = ${
@ -965,9 +966,17 @@ function generateSortCommandTestsForAggFunction(
const generateSortCommandTestsForGroupingFunction = generateSortCommandTestsForAggFunction;
const fieldTypesToConstants: Record<SupportedFieldType, string> = {
string: '"a"',
number: '5',
date: 'now()',
text: '"a"',
keyword: '"a"',
double: '5.5',
integer: '5',
long: '5',
unsigned_long: '5',
counter_integer: '5',
counter_long: '5',
counter_double: '5.5',
date: 'to_datetime("2021-01-01T00:00:00Z")',
date_period: 'to_date_period("2021-01-01/2021-01-02")',
boolean: 'true',
version: 'to_version("1.0.0")',
ip: 'to_ip("127.0.0.1")',
@ -1003,8 +1012,8 @@ function prepareNestedFunction(fnSignature: FunctionDefinition): string {
}
const toAvgSignature = statsAggregationFunctionDefinitions.find(({ name }) => name === 'avg')!;
const toInteger = evalFunctionDefinitions.find(({ name }) => name === 'to_integer')!;
const toDoubleSignature = evalFunctionDefinitions.find(({ name }) => name === 'to_double')!;
const toStringSignature = evalFunctionDefinitions.find(({ name }) => name === 'to_string')!;
const toDateSignature = evalFunctionDefinitions.find(({ name }) => name === 'to_datetime')!;
const toBooleanSignature = evalFunctionDefinitions.find(({ name }) => name === 'to_boolean')!;
@ -1019,10 +1028,12 @@ const toCartesianShapeSignature = evalFunctionDefinitions.find(
)!;
const toVersionSignature = evalFunctionDefinitions.find(({ name }) => name === 'to_version')!;
// We don't have full list for long, unsigned_long, etc.
const nestedFunctions: Record<SupportedFieldType, string> = {
number: prepareNestedFunction(toInteger),
string: prepareNestedFunction(toStringSignature),
date: prepareNestedFunction(toDateSignature),
double: prepareNestedFunction(toDoubleSignature),
integer: prepareNestedFunction(toInteger),
text: prepareNestedFunction(toStringSignature),
keyword: prepareNestedFunction(toStringSignature),
boolean: prepareNestedFunction(toBooleanSignature),
ip: prepareNestedFunction(toIpSignature),
version: prepareNestedFunction(toVersionSignature),
@ -1030,6 +1041,8 @@ const nestedFunctions: Record<SupportedFieldType, string> = {
geo_shape: prepareNestedFunction(toGeoShapeSignature),
cartesian_point: prepareNestedFunction(toCartesianPointSignature),
cartesian_shape: prepareNestedFunction(toCartesianShapeSignature),
// @ts-expect-error
datetime: prepareNestedFunction(toDateSignature),
};
function getFieldName(
@ -1086,6 +1099,7 @@ function getFieldMapping(
number: '5',
date: 'now()',
};
return params.map(({ name: _name, type, constantOnly, literalOptions, ...rest }) => {
const typeString: string = type;
if (isSupportedFieldType(typeString)) {
@ -1124,7 +1138,7 @@ function getFieldMapping(
...rest,
};
}
return { name: 'stringField', type, ...rest };
return { name: 'textField', type, ...rest };
});
}
@ -1225,8 +1239,12 @@ function generateIncorrectlyTypedParameters(
}
const fieldName = wrongFieldMapping[i].name;
if (
fieldName === 'numberField' &&
signatures.every((signature) => getParamAtPosition(signature, i)?.type !== 'string')
fieldName === 'doubleField' &&
signatures.every(
(signature) =>
getParamAtPosition(signature, i)?.type !== 'keyword' ||
getParamAtPosition(signature, i)?.type !== 'text'
)
) {
return;
}