[ES|QL] Function AST node subtypes (#189268)

## Summary

Closes https://github.com/elastic/kibana/issues/189259

- Introduces `subtype` property for *function* AST node types. This
allows to discriminate between real functions an various expression
types.


### Checklist

Delete any items that are not applicable to this PR.

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios



### For maintainers

- [x] This was checked for breaking API changes and was [labeled
appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
This commit is contained in:
Vadim Kibana 2024-07-26 21:47:48 +02:00 committed by GitHub
parent 5817a9c9d3
commit 6c093b643b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 329 additions and 27 deletions

View file

@ -0,0 +1,227 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
import { getAstAndSyntaxErrors as parse } from '../ast_parser';
import { Walker } from '../walker';
describe('function AST nodes', () => {
describe('"variadic-call"', () => {
it('function call with a single argument', () => {
const query = 'ROW fn(1)';
const { ast, errors } = parse(query);
expect(errors.length).toBe(0);
expect(ast).toMatchObject([
{
type: 'command',
name: 'row',
args: [
{
type: 'function',
name: 'fn',
args: [
{
type: 'literal',
value: 1,
},
],
},
],
},
]);
});
it('function call with multiple argument', () => {
const query = 'ROW fn(1, 2, 3)';
const { ast, errors } = parse(query);
expect(errors.length).toBe(0);
expect(ast).toMatchObject([
{
type: 'command',
name: 'row',
args: [
{
type: 'function',
name: 'fn',
args: [
{
type: 'literal',
value: 1,
},
{
type: 'literal',
value: 2,
},
{
type: 'literal',
value: 3,
},
],
},
],
},
]);
});
});
describe('"unary-expression"', () => {
it('logical NOT', () => {
const query = 'FROM a | STATS NOT b';
const { ast, errors } = parse(query);
const fn = Walker.findFunction(ast, ({ name }) => name === 'not');
expect(errors.length).toBe(0);
expect(fn).toMatchObject({
type: 'function',
subtype: 'unary-expression',
name: 'not',
args: [expect.any(Object)],
});
});
// Currently arithmetic unary expressions, like "-x", are transformed to
// binary expressions: "-1 * x". Enable this test once unary expressions
// are supported.
it.skip('arithmetic', () => {
const query = 'FROM a | STATS -a';
const { ast, errors } = parse(query);
const fn = Walker.findFunction(ast, ({ name }) => name === '*');
expect(errors.length).toBe(0);
expect(fn).toMatchObject({
type: 'function',
subtype: 'unary-expression',
name: '-',
args: [expect.any(Object)],
});
});
});
describe('"postfix-unary-expression"', () => {
it('IS [NOT] NULL', () => {
const query = 'FROM a | STATS a IS NOT NULL';
const { ast, errors } = parse(query);
const fn = Walker.findFunction(ast, ({ name }) => name === 'is not null');
expect(errors.length).toBe(0);
expect(fn).toMatchObject({
type: 'function',
subtype: 'postfix-unary-expression',
name: 'is not null',
args: [expect.any(Object)],
});
});
});
describe('"binary-expression"', () => {
it('arithmetic and logical operations', () => {
const ops = ['+', '-', '*', '/', '%', 'and', 'or', '>', '>=', '<', '<=', '==', '!='];
for (const op of ops) {
const query = `ROW 1 ${op} 2`;
const { ast, errors } = parse(query);
expect(errors.length).toBe(0);
expect(ast).toMatchObject([
{
type: 'command',
name: 'row',
args: [
{
type: 'function',
subtype: 'binary-expression',
name: op,
args: [
{
type: 'literal',
value: 1,
},
{
type: 'literal',
value: 2,
},
],
},
],
},
]);
}
});
it('logical IN', () => {
const query = 'FROM a | STATS a IN (1, 2, 3)';
const { ast, errors } = parse(query);
const fn = Walker.findFunction(ast, ({ name }) => name === 'in');
expect(errors.length).toBe(0);
expect(fn).toMatchObject({
type: 'function',
subtype: 'binary-expression',
name: 'in',
args: [expect.any(Object), expect.any(Object)],
});
});
it('logical NOT IN', () => {
const query = 'FROM a | STATS a NOT IN (1, 2, 3)';
const { ast, errors } = parse(query);
const fn = Walker.findFunction(ast, ({ name }) => name === 'not_in');
expect(errors.length).toBe(0);
expect(fn).toMatchObject({
type: 'function',
subtype: 'binary-expression',
name: 'not_in',
args: [expect.any(Object), expect.any(Object)],
});
});
it('regex expression', () => {
const query = 'FROM a | STATS a LIKE "adsf"';
const { ast, errors } = parse(query);
const fn = Walker.findFunction(ast, ({ name }) => name === 'like');
expect(errors.length).toBe(0);
expect(fn).toMatchObject({
type: 'function',
subtype: 'binary-expression',
name: 'like',
args: [expect.any(Object), expect.any(Object)],
});
});
it('assignment in ENRICH .. WITH clause', () => {
const query = 'FROM a | ENRICH b ON c WITH d = e';
const { ast, errors } = parse(query);
const fn = Walker.findFunction(ast, ({ name }) => name === '=');
expect(errors.length).toBe(0);
expect(fn).toMatchObject({
type: 'function',
subtype: 'binary-expression',
name: '=',
args: [expect.any(Object), expect.any(Object)],
});
});
it('assignment in STATS', () => {
const query = 'FROM a | STATS b = c';
const { ast, errors } = parse(query);
const fn = Walker.findFunction(ast, ({ name }) => name === '=');
expect(errors.length).toBe(0);
expect(fn).toMatchObject({
type: 'function',
subtype: 'binary-expression',
name: '=',
args: [expect.any(Object), expect.any(Object)],
});
});
});
});

View file

@ -76,6 +76,7 @@ export class AstListener implements ESQLParserListener {
this.ast.push(commandAst);
commandAst.text = ctx.getText();
if (textExistsAndIsValid(ctx.INFO().getText())) {
// TODO: these probably should not be functions, instead use "column", like: INFO <identifier>?
commandAst?.args.push(createFunction('info', ctx, getPosition(ctx.INFO().symbol)));
}
}

View file

@ -35,6 +35,7 @@ import type {
ESQLCommandMode,
ESQLInlineCast,
ESQLUnknownItem,
FunctionSubtype,
} from './types';
export function nonNullable<T>(v: T): v is NonNullable<T> {
@ -187,12 +188,13 @@ export function createTimeUnit(ctx: QualifiedIntegerLiteralContext): ESQLTimeInt
};
}
export function createFunction(
export function createFunction<Subtype extends FunctionSubtype>(
name: string,
ctx: ParserRuleContext,
customPosition?: ESQLLocation
): ESQLFunction {
return {
customPosition?: ESQLLocation,
subtype?: Subtype
): ESQLFunction<Subtype> {
const node: ESQLFunction<Subtype> = {
type: 'function',
name,
text: ctx.getText(),
@ -200,6 +202,10 @@ export function createFunction(
args: [],
incomplete: Boolean(ctx.exception),
};
if (subtype) {
node.subtype = subtype;
}
return node;
}
function walkFunctionStructure(

View file

@ -194,7 +194,7 @@ export function getEnrichClauses(ctx: EnrichCommandContext) {
}
}
if (args.length) {
const fn = createFunction('=', clause);
const fn = createFunction('=', clause, undefined, 'binary-expression');
fn.args.push(args[0], args[1] ? [args[1]] : []);
option.args.push(fn);
}
@ -207,7 +207,7 @@ export function getEnrichClauses(ctx: EnrichCommandContext) {
}
function visitLogicalNot(ctx: LogicalNotContext) {
const fn = createFunction('not', ctx);
const fn = createFunction('not', ctx, undefined, 'unary-expression');
fn.args.push(...collectBooleanExpression(ctx.booleanExpression()));
// update the location of the assign based on arguments
const argsLocationExtends = computeLocationExtends(fn);
@ -216,7 +216,7 @@ function visitLogicalNot(ctx: LogicalNotContext) {
}
function visitLogicalAndsOrs(ctx: LogicalBinaryContext) {
const fn = createFunction(ctx.AND() ? 'and' : 'or', ctx);
const fn = createFunction(ctx.AND() ? 'and' : 'or', ctx, undefined, 'binary-expression');
fn.args.push(...collectBooleanExpression(ctx._left), ...collectBooleanExpression(ctx._right));
// update the location of the assign based on arguments
const argsLocationExtends = computeLocationExtends(fn);
@ -225,7 +225,7 @@ function visitLogicalAndsOrs(ctx: LogicalBinaryContext) {
}
function visitLogicalIns(ctx: LogicalInContext) {
const fn = createFunction(ctx.NOT() ? 'not_in' : 'in', ctx);
const fn = createFunction(ctx.NOT() ? 'not_in' : 'in', ctx, undefined, 'binary-expression');
const [left, ...list] = ctx.valueExpression_list();
const leftArg = visitValueExpression(left);
if (leftArg) {
@ -264,7 +264,12 @@ function visitValueExpression(ctx: ValueExpressionContext) {
}
if (ctx instanceof ComparisonContext) {
const comparisonNode = ctx.comparisonOperator();
const comparisonFn = createFunction(getComparisonName(comparisonNode), comparisonNode);
const comparisonFn = createFunction(
getComparisonName(comparisonNode),
comparisonNode,
undefined,
'binary-expression'
);
comparisonFn.args.push(
visitOperatorExpression(ctx._left)!,
visitOperatorExpression(ctx._right)!
@ -283,7 +288,7 @@ function visitOperatorExpression(
if (ctx instanceof ArithmeticUnaryContext) {
const arg = visitOperatorExpression(ctx.operatorExpression());
// this is a number sign thing
const fn = createFunction('*', ctx);
const fn = createFunction('*', ctx, undefined, 'binary-expression');
fn.args.push(createFakeMultiplyLiteral(ctx));
if (arg) {
fn.args.push(arg);
@ -291,7 +296,7 @@ function visitOperatorExpression(
return fn;
}
if (ctx instanceof ArithmeticBinaryContext) {
const fn = createFunction(getMathOperation(ctx), ctx);
const fn = createFunction(getMathOperation(ctx), ctx, undefined, 'binary-expression');
const args = [visitOperatorExpression(ctx._left), visitOperatorExpression(ctx._right)];
for (const arg of args) {
if (arg) {
@ -443,7 +448,12 @@ export function visitPrimaryExpression(ctx: PrimaryExpressionContext): ESQLAstIt
}
if (ctx instanceof FunctionContext) {
const functionExpressionCtx = ctx.functionExpression();
const fn = createFunction(functionExpressionCtx.identifier().getText().toLowerCase(), ctx);
const fn = createFunction(
functionExpressionCtx.identifier().getText().toLowerCase(),
ctx,
undefined,
'variadic-call'
);
const asteriskArg = functionExpressionCtx.ASTERISK()
? createColumnStar(functionExpressionCtx.ASTERISK()!)
: undefined;
@ -494,7 +504,7 @@ function collectRegexExpression(ctx: BooleanExpressionContext): ESQLFunction[] {
const negate = regex.NOT();
const likeType = regex._kind.text?.toLowerCase() || '';
const fnName = `${negate ? 'not_' : ''}${likeType}`;
const fn = createFunction(fnName, regex);
const fn = createFunction(fnName, regex, undefined, 'binary-expression');
const arg = visitValueExpression(regex.valueExpression());
if (arg) {
fn.args.push(arg);
@ -514,7 +524,7 @@ function collectIsNullExpression(ctx: BooleanExpressionContext) {
}
const negate = ctx.NOT();
const fnName = `is${negate ? ' not ' : ' '}null`;
const fn = createFunction(fnName, ctx);
const fn = createFunction(fnName, ctx, undefined, 'postfix-unary-expression');
const arg = visitValueExpression(ctx.valueExpression());
if (arg) {
fn.args.push(arg);
@ -547,7 +557,7 @@ export function collectBooleanExpression(ctx: BooleanExpressionContext | undefin
export function visitField(ctx: FieldContext) {
if (ctx.qualifiedName() && ctx.ASSIGN()) {
const fn = createFunction(ctx.ASSIGN()!.getText(), ctx);
const fn = createFunction(ctx.ASSIGN()!.getText(), ctx, undefined, 'binary-expression');
fn.args.push(
createColumn(ctx.qualifiedName()!),
collectBooleanExpression(ctx.booleanExpression())

View file

@ -67,11 +67,67 @@ export interface ESQLCommandMode extends ESQLAstBaseItem {
type: 'mode';
}
export interface ESQLFunction extends ESQLAstBaseItem {
/**
* We coalesce all function calls and expressions into a single "function"
* node type. This subtype is used to distinguish between different types
* of function calls and expressions.
*
* - `variadic-call` is a function call with any number of arguments: fn(a, b, c, ...)
* - `unary-expression` is a unary expression: -a, +a, NOT a, ...
* - `binary-expression` is a binary expression: a + b, a - b, a * b, ...
*/
export type FunctionSubtype =
| 'variadic-call' // fn(a, b, c, ...)
| 'unary-expression' // -a, +a, NOT a, ...
| 'postfix-unary-expression' // a IS NULL, a IS NOT NULL, ...
| 'binary-expression'; // a + b, a - b, a * b, ...
export interface ESQLFunction<
Subtype extends FunctionSubtype = FunctionSubtype,
Name extends string = string
> extends ESQLAstBaseItem<Name> {
type: 'function';
/**
* Default is 'variadic-call'.
*/
subtype?: Subtype;
args: ESQLAstItem[];
}
export interface ESQLFunctionCallExpression extends ESQLFunction<'variadic-call'> {
subtype: 'variadic-call';
args: ESQLAstItem[];
}
export interface ESQLUnaryExpression extends ESQLFunction<'unary-expression'> {
subtype: 'unary-expression';
args: [ESQLAstItem];
}
export interface ESQLPostfixUnaryExpression extends ESQLFunction<'postfix-unary-expression'> {
subtype: 'postfix-unary-expression';
args: [ESQLAstItem];
}
export interface ESQLBinaryExpression
extends ESQLFunction<'binary-expression', BinaryExpressionOperator> {
subtype: 'binary-expression';
args: [ESQLAstItem, ESQLAstItem];
}
export type BinaryExpressionOperator =
| BinaryExpressionArithmeticOperator
| BinaryExpressionAssignmentOperator
| BinaryExpressionComparisonOperator
| BinaryExpressionRegexOperator;
export type BinaryExpressionArithmeticOperator = '+' | '-' | '*' | '/' | '%';
export type BinaryExpressionAssignmentOperator = '=';
export type BinaryExpressionComparisonOperator = '==' | '=~' | '!=' | '<' | '<=' | '>' | '>=';
export type BinaryExpressionRegexOperator = 'like' | 'not_like' | 'rlike' | 'not_rlike';
export interface ESQLInlineCast<ValueType = ESQLAstItem> extends ESQLAstBaseItem {
type: 'inlineCast';
value: ValueType;

View file

@ -42,6 +42,8 @@ export interface WalkerOptions {
visitUnknown?: (node: ESQLUnknownItem) => void;
}
export type WalkerAstNode = ESQLAstNode | ESQLAstNode[];
/**
* Iterates over all nodes in the AST and calls the appropriate visitor
* functions.
@ -64,7 +66,7 @@ export class Walker {
/**
* Walks the AST and calls the appropriate visitor functions.
*/
public static readonly walk = (node: Node, options: WalkerOptions): Walker => {
public static readonly walk = (node: WalkerAstNode, options: WalkerOptions): Walker => {
const walker = new Walker(options);
walker.walk(node);
return walker;
@ -88,7 +90,7 @@ export class Walker {
*
* @param node AST node to extract parameters from.
*/
public static readonly params = (node: Node): ESQLParamLiteral[] => {
public static readonly params = (node: WalkerAstNode): ESQLParamLiteral[] => {
const params: ESQLParamLiteral[] = [];
Walker.walk(node, {
visitLiteral: (param) => {
@ -101,21 +103,21 @@ export class Walker {
};
/**
* Returns the first function that matches the predicate.
* Finds the first function that matches the predicate.
*
* @param node AST subtree to search in.
* @param predicate Function to test each function with.
* @returns The first function that matches the predicate.
* @param node AST node from which to search for a function
* @param predicate Callback function to determine if the function is found
* @returns The first function that matches the predicate
*/
public static readonly findFunction = (
node: Node,
predicate: (fn: ESQLFunction) => boolean
node: WalkerAstNode,
predicate: (node: ESQLFunction) => boolean
): ESQLFunction | undefined => {
let found: ESQLFunction | undefined;
Walker.walk(node, {
visitFunction: (fn) => {
if (!found && predicate(fn)) {
found = fn;
visitFunction: (func) => {
if (!found && predicate(func)) {
found = func;
}
},
});