[ES|QL] Support all AST node types in Walker (#188712)

## Summary

Partially addresses https://github.com/elastic/kibana/issues/182255

- This PR add support for all ES|QL AST node types in the `Walker`
class, which means all nodes from any query now will be visited.


### Checklist

Delete any items that are not applicable to this PR.

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios

### For maintainers

- [x] This was checked for breaking API changes and was [labeled
appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
This commit is contained in:
Vadim Kibana 2024-07-22 10:10:07 +02:00 committed by GitHub
parent d5b9af1910
commit cc65a510db
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 723 additions and 81 deletions

View file

@ -12,20 +12,27 @@ export type ESQLAstCommand = ESQLCommand | ESQLAstMetricsCommand;
export type ESQLAstNode = ESQLAstCommand | ESQLAstItem;
/**
* Represents an *expression* in the AST.
*/
export type ESQLSingleAstItem =
| ESQLFunction
| ESQLFunction // "function call expression"
| ESQLCommandOption
| ESQLSource
| ESQLColumn
| ESQLSource // "source identifier expression"
| ESQLColumn // "field identifier expression"
| ESQLTimeInterval
| ESQLList
| ESQLLiteral
| ESQLList // "list expression"
| ESQLLiteral // "literal expression"
| ESQLCommandMode
| ESQLInlineCast
| ESQLInlineCast // "inline cast expression"
| ESQLUnknownItem;
export type ESQLAstField = ESQLFunction | ESQLColumn;
/**
* An array of AST nodes represents different things in different contexts.
* For example, in command top level arguments it is treated as an "assignment expression".
*/
export type ESQLAstItem = ESQLSingleAstItem | ESQLAstItem[];
export interface ESQLLocation {

View file

@ -0,0 +1,41 @@
# ES|QL AST Walker
The ES|QL AST Walker is a utility that traverses the ES|QL AST and provides a
set of callbacks that can be used to perform introspection of the AST.
To start a new *walk* you create a `Walker` instance and call the `walk()` method
with the AST node to start the walk from.
```ts
import { Walker, getAstAndSyntaxErrors } from '@kbn/esql-ast';
const walker = new Walker({
// Called every time a function node is visited.
visitFunction: (fn) => {
console.log('Function:', fn.name);
},
// Called every time a source identifier node is visited.
visitSource: (source) => {
console.log('Source:', source.name);
},
});
const { ast } = getAstAndSyntaxErrors('FROM source | STATS fn()');
walker.walk(ast);
```
Conceptual structure of an ES|QL AST:
- A single ES|QL query is composed of one or more source commands and zero or
more transformation commands.
- Each command is represented by a `command` node.
- Each command contains a list expressions named in ES|QL AST as *AST Item*.
- `function` — function call expression.
- `option` — a list of expressions with a specific role in the command.
- `source` — s source identifier expression.
- `column` — a field identifier expression.
- `timeInterval` — a time interval expression.
- `list` — a list literal expression.
- `literal` — a literal expression.
- `inlineCast` — an inline cast expression.

View file

@ -6,7 +6,20 @@
* Side Public License, v 1.
*/
import { ESQLColumn, ESQLFunction, ESQLLiteral, getAstAndSyntaxErrors } from '../..';
import { getAstAndSyntaxErrors } from '../ast_parser';
import {
ESQLColumn,
ESQLCommand,
ESQLCommandMode,
ESQLCommandOption,
ESQLFunction,
ESQLLiteral,
ESQLSource,
ESQLList,
ESQLTimeInterval,
ESQLInlineCast,
ESQLUnknownItem,
} from '../types';
import { walk, Walker } from './walker';
test('can walk all functions', () => {
@ -20,54 +33,6 @@ test('can walk all functions', () => {
expect(functions.sort()).toStrictEqual(['a', 'b', 'c']);
});
test('can walk "columns"', () => {
const query = 'ROW x = 1';
const { ast } = getAstAndSyntaxErrors(query);
const columns: ESQLColumn[] = [];
walk(ast, {
visitColumn: (node) => columns.push(node),
});
expect(columns).toMatchObject([
{
type: 'column',
name: 'x',
},
]);
});
test('can walk literals', () => {
const query = 'ROW x = 1';
const { ast } = getAstAndSyntaxErrors(query);
const columns: ESQLLiteral[] = [];
walk(ast, {
visitLiteral: (node) => columns.push(node),
});
expect(columns).toMatchObject([
{
type: 'literal',
name: '1',
},
]);
});
test('can collect all params', () => {
const query = 'ROW x = ?';
const { ast } = getAstAndSyntaxErrors(query);
const params = Walker.params(ast);
expect(params).toMatchObject([
{
type: 'literal',
literalType: 'param',
paramType: 'unnamed',
},
]);
});
test('can find assignment expression', () => {
const query = 'METRICS source var0 = bucket(bytes, 1 hour)';
const { ast } = getAstAndSyntaxErrors(query);
@ -87,26 +52,566 @@ test('can find assignment expression', () => {
expect((functions[0].args[0] as any).name).toBe('var0');
});
test('can collect all params from grouping functions', () => {
const query =
'ROW x=1, time=2024-07-10 | stats z = avg(x) by bucket(time, 20, ?earliest,?latest)';
const { ast } = getAstAndSyntaxErrors(query);
const params = Walker.params(ast);
describe('structurally can walk all nodes', () => {
describe('commands', () => {
test('can visit a single source command', () => {
const { ast } = getAstAndSyntaxErrors('FROM index');
const commands: ESQLCommand[] = [];
expect(params).toMatchObject([
{
type: 'literal',
literalType: 'param',
paramType: 'named',
value: 'earliest',
},
{
type: 'literal',
literalType: 'param',
paramType: 'named',
value: 'latest',
},
]);
walk(ast, {
visitCommand: (cmd) => commands.push(cmd),
});
expect(commands.map(({ name }) => name).sort()).toStrictEqual(['from']);
});
test('can visit all commands', () => {
const { ast } = getAstAndSyntaxErrors('FROM index | STATS a = 123 | WHERE 123 | LIMIT 10');
const commands: ESQLCommand[] = [];
walk(ast, {
visitCommand: (cmd) => commands.push(cmd),
});
expect(commands.map(({ name }) => name).sort()).toStrictEqual([
'from',
'limit',
'stats',
'where',
]);
});
describe('command options', () => {
test('can visit command options', () => {
const { ast } = getAstAndSyntaxErrors('FROM index METADATA _index');
const options: ESQLCommandOption[] = [];
walk(ast, {
visitCommandOption: (opt) => options.push(opt),
});
expect(options.length).toBe(1);
expect(options[0].name).toBe('metadata');
});
});
describe('command mode', () => {
test('visits "mode" nodes', () => {
const { ast } = getAstAndSyntaxErrors('FROM index | ENRICH a:b');
const options: ESQLCommandMode[] = [];
walk(ast, {
visitCommandMode: (opt) => options.push(opt),
});
expect(options.length).toBe(1);
expect(options[0].name).toBe('a');
});
});
describe('expressions', () => {
describe('sources', () => {
test('iterates through a single source', () => {
const { ast } = getAstAndSyntaxErrors('FROM index');
const sources: ESQLSource[] = [];
walk(ast, {
visitSource: (opt) => sources.push(opt),
});
expect(sources.length).toBe(1);
expect(sources[0].name).toBe('index');
});
test('iterates through all sources', () => {
const { ast } = getAstAndSyntaxErrors('METRICS index, index2, index3, index4');
const sources: ESQLSource[] = [];
walk(ast, {
visitSource: (opt) => sources.push(opt),
});
expect(sources.length).toBe(4);
expect(sources.map(({ name }) => name).sort()).toEqual([
'index',
'index2',
'index3',
'index4',
]);
});
});
describe('columns', () => {
test('can through a single column', () => {
const query = 'ROW x = 1';
const { ast } = getAstAndSyntaxErrors(query);
const columns: ESQLColumn[] = [];
walk(ast, {
visitColumn: (node) => columns.push(node),
});
expect(columns).toMatchObject([
{
type: 'column',
name: 'x',
},
]);
});
test('can walk through multiple columns', () => {
const query = 'FROM index | STATS a = 123, b = 456';
const { ast } = getAstAndSyntaxErrors(query);
const columns: ESQLColumn[] = [];
walk(ast, {
visitColumn: (node) => columns.push(node),
});
expect(columns).toMatchObject([
{
type: 'column',
name: 'a',
},
{
type: 'column',
name: 'b',
},
]);
});
});
describe('literals', () => {
test('can walk a single literal', () => {
const query = 'ROW x = 1';
const { ast } = getAstAndSyntaxErrors(query);
const columns: ESQLLiteral[] = [];
walk(ast, {
visitLiteral: (node) => columns.push(node),
});
expect(columns).toMatchObject([
{
type: 'literal',
name: '1',
},
]);
});
test('can walk through all literals', () => {
const query = 'FROM index | STATS a = 123, b = "foo", c = true AND false';
const { ast } = getAstAndSyntaxErrors(query);
const columns: ESQLLiteral[] = [];
walk(ast, {
visitLiteral: (node) => columns.push(node),
});
expect(columns).toMatchObject([
{
type: 'literal',
literalType: 'number',
name: '123',
},
{
type: 'literal',
literalType: 'string',
name: '"foo"',
},
{
type: 'literal',
literalType: 'boolean',
name: 'true',
},
{
type: 'literal',
literalType: 'boolean',
name: 'false',
},
]);
});
test('can walk through literals inside functions', () => {
const query = 'FROM index | STATS f(1, "2", g(true) + false, h(j(k(3.14))))';
const { ast } = getAstAndSyntaxErrors(query);
const columns: ESQLLiteral[] = [];
walk(ast, {
visitLiteral: (node) => columns.push(node),
});
expect(columns).toMatchObject([
{
type: 'literal',
literalType: 'number',
name: '1',
},
{
type: 'literal',
literalType: 'string',
name: '"2"',
},
{
type: 'literal',
literalType: 'boolean',
name: 'true',
},
{
type: 'literal',
literalType: 'boolean',
name: 'false',
},
{
type: 'literal',
literalType: 'number',
name: '3.14',
},
]);
});
});
describe('list literals', () => {
describe('numeric', () => {
test('can walk a single numeric list literal', () => {
const query = 'ROW x = [1, 2]';
const { ast } = getAstAndSyntaxErrors(query);
const lists: ESQLList[] = [];
walk(ast, {
visitListLiteral: (node) => lists.push(node),
});
expect(lists).toMatchObject([
{
type: 'list',
values: [
{
type: 'literal',
literalType: 'number',
name: '1',
},
{
type: 'literal',
literalType: 'number',
name: '2',
},
],
},
]);
});
test('can walk plain literals inside list literal', () => {
const query = 'ROW x = [1, 2] + [3.3]';
const { ast } = getAstAndSyntaxErrors(query);
const lists: ESQLList[] = [];
const literals: ESQLLiteral[] = [];
walk(ast, {
visitListLiteral: (node) => lists.push(node),
visitLiteral: (node) => literals.push(node),
});
expect(lists).toMatchObject([
{
type: 'list',
values: [
{
type: 'literal',
literalType: 'number',
name: '1',
},
{
type: 'literal',
literalType: 'number',
name: '2',
},
],
},
{
type: 'list',
values: [
{
type: 'literal',
literalType: 'number',
name: '3.3',
},
],
},
]);
expect(literals).toMatchObject([
{
type: 'literal',
literalType: 'number',
name: '1',
},
{
type: 'literal',
literalType: 'number',
name: '2',
},
{
type: 'literal',
literalType: 'number',
name: '3.3',
},
]);
});
});
describe('boolean', () => {
test('can walk a single numeric list literal', () => {
const query = 'ROW x = [true, false]';
const { ast } = getAstAndSyntaxErrors(query);
const lists: ESQLList[] = [];
walk(ast, {
visitListLiteral: (node) => lists.push(node),
});
expect(lists).toMatchObject([
{
type: 'list',
values: [
{
type: 'literal',
literalType: 'boolean',
name: 'true',
},
{
type: 'literal',
literalType: 'boolean',
name: 'false',
},
],
},
]);
});
test('can walk plain literals inside list literal', () => {
const query = 'ROW x = [false, false], b([true, true, true])';
const { ast } = getAstAndSyntaxErrors(query);
const lists: ESQLList[] = [];
const literals: ESQLLiteral[] = [];
walk(ast, {
visitListLiteral: (node) => lists.push(node),
visitLiteral: (node) => literals.push(node),
});
expect(lists).toMatchObject([
{
type: 'list',
},
{
type: 'list',
},
]);
expect(literals).toMatchObject([
{
type: 'literal',
literalType: 'boolean',
name: 'false',
},
{
type: 'literal',
literalType: 'boolean',
name: 'false',
},
{
type: 'literal',
literalType: 'boolean',
name: 'true',
},
{
type: 'literal',
literalType: 'boolean',
name: 'true',
},
{
type: 'literal',
literalType: 'boolean',
name: 'true',
},
]);
});
});
describe('string', () => {
test('can walk string literals', () => {
const query = 'ROW x = ["a", "b"], b(["c", "d", "e"])';
const { ast } = getAstAndSyntaxErrors(query);
const lists: ESQLList[] = [];
const literals: ESQLLiteral[] = [];
walk(ast, {
visitListLiteral: (node) => lists.push(node),
visitLiteral: (node) => literals.push(node),
});
expect(lists).toMatchObject([
{
type: 'list',
},
{
type: 'list',
},
]);
expect(literals).toMatchObject([
{
type: 'literal',
literalType: 'string',
name: '"a"',
},
{
type: 'literal',
literalType: 'string',
name: '"b"',
},
{
type: 'literal',
literalType: 'string',
name: '"c"',
},
{
type: 'literal',
literalType: 'string',
name: '"d"',
},
{
type: 'literal',
literalType: 'string',
name: '"e"',
},
]);
});
});
});
describe('time interval', () => {
test('can visit time interval nodes', () => {
const query = 'FROM index | STATS a = 123 BY 1h';
const { ast } = getAstAndSyntaxErrors(query);
const intervals: ESQLTimeInterval[] = [];
walk(ast, {
visitTimeIntervalLiteral: (node) => intervals.push(node),
});
expect(intervals).toMatchObject([
{
type: 'timeInterval',
quantity: 1,
unit: 'h',
},
]);
});
});
describe('cast expression', () => {
test('can visit cast expression', () => {
const query = 'FROM index | STATS a = 123::number';
const { ast } = getAstAndSyntaxErrors(query);
const casts: ESQLInlineCast[] = [];
walk(ast, {
visitInlineCast: (node) => casts.push(node),
});
expect(casts).toMatchObject([
{
type: 'inlineCast',
castType: 'number',
value: {
type: 'literal',
literalType: 'number',
value: 123,
},
},
]);
});
});
});
});
describe('unknown nodes', () => {
test('can iterate through "unknown" nodes', () => {
const { ast } = getAstAndSyntaxErrors('FROM index');
let source: ESQLSource | undefined;
walk(ast, {
visitSource: (src) => (source = src),
});
(source! as any).type = 'unknown';
const unknowns: ESQLUnknownItem[] = [];
walk(ast, {
visitUnknown: (node) => unknowns.push(node),
});
expect(unknowns).toMatchObject([
{
type: 'unknown',
},
]);
});
});
});
describe('Walker.commands()', () => {
test('can collect all commands', () => {
const { ast } = getAstAndSyntaxErrors('FROM index | STATS a = 123 | WHERE 123 | LIMIT 10');
const commands = Walker.commands(ast);
expect(commands.map(({ name }) => name).sort()).toStrictEqual([
'from',
'limit',
'stats',
'where',
]);
});
});
describe('Walker.params', () => {
test('can collect all params', () => {
const query = 'ROW x = ?';
const { ast } = getAstAndSyntaxErrors(query);
const params = Walker.params(ast);
expect(params).toMatchObject([
{
type: 'literal',
literalType: 'param',
paramType: 'unnamed',
},
]);
});
test('can collect all params from grouping functions', () => {
const query =
'ROW x=1, time=2024-07-10 | stats z = avg(x) by bucket(time, 20, ?earliest,?latest)';
const { ast } = getAstAndSyntaxErrors(query);
const params = Walker.params(ast);
expect(params).toMatchObject([
{
type: 'literal',
literalType: 'param',
paramType: 'named',
value: 'earliest',
},
{
type: 'literal',
literalType: 'param',
paramType: 'named',
value: 'latest',
},
]);
});
});
describe('Walker.hasFunction()', () => {

View file

@ -11,38 +11,84 @@ import type {
ESQLAstItem,
ESQLAstNode,
ESQLColumn,
ESQLCommand,
ESQLCommandMode,
ESQLCommandOption,
ESQLFunction,
ESQLInlineCast,
ESQLList,
ESQLLiteral,
ESQLParamLiteral,
ESQLSingleAstItem,
ESQLSource,
ESQLTimeInterval,
ESQLUnknownItem,
} from '../types';
type Node = ESQLAstNode | ESQLAstNode[];
export interface WalkerOptions {
visitCommand?: (node: ESQLCommand) => void;
visitCommandOption?: (node: ESQLCommandOption) => void;
visitCommandMode?: (node: ESQLCommandMode) => void;
visitSingleAstItem?: (node: ESQLSingleAstItem) => void;
visitSource?: (node: ESQLSource) => void;
visitFunction?: (node: ESQLFunction) => void;
visitColumn?: (node: ESQLColumn) => void;
visitLiteral?: (node: ESQLLiteral) => void;
visitListLiteral?: (node: ESQLList) => void;
visitTimeIntervalLiteral?: (node: ESQLTimeInterval) => void;
visitInlineCast?: (node: ESQLInlineCast) => void;
visitUnknown?: (node: ESQLUnknownItem) => void;
}
/**
* Iterates over all nodes in the AST and calls the appropriate visitor
* functions.
*
* AST nodes supported:
*
* - [x] command
* - [x] option
* - [x] mode
* - [x] function
* - [x] source
* - [x] column
* - [x] literal
* - [x] list literal
* - [x] timeInterval
* - [x] inlineCast
* - [x] unknown
*/
export class Walker {
/**
* Walks the AST and calls the appropriate visitor functions.
*/
public static readonly walk = (
node: ESQLAstNode | ESQLAstNode[],
options: WalkerOptions
): Walker => {
public static readonly walk = (node: Node, options: WalkerOptions): Walker => {
const walker = new Walker(options);
walker.walk(node);
return walker;
};
/**
* Walks the AST and extracts all command statements.
*
* @param node AST node to extract parameters from.
*/
public static readonly commands = (node: Node): ESQLCommand[] => {
const commands: ESQLCommand[] = [];
walk(node, {
visitCommand: (cmd) => commands.push(cmd),
});
return commands;
};
/**
* Walks the AST and extracts all parameter literals.
*
* @param node AST node to extract parameters from.
*/
public static readonly params = (node: ESQLAstNode | ESQLAstNode[]): ESQLParamLiteral[] => {
public static readonly params = (node: Node): ESQLParamLiteral[] => {
const params: ESQLParamLiteral[] = [];
Walker.walk(node, {
visitLiteral: (param) => {
@ -62,7 +108,7 @@ export class Walker {
* @returns The first function that matches the predicate.
*/
public static readonly findFunction = (
node: ESQLAstNode | ESQLAstNode[],
node: Node,
predicate: (fn: ESQLFunction) => boolean
): ESQLFunction | undefined => {
let found: ESQLFunction | undefined;
@ -113,6 +159,7 @@ export class Walker {
}
public walkCommand(node: ESQLAstCommand): void {
this.options.visitCommand?.(node);
switch (node.name) {
default: {
this.walk(node.args);
@ -121,6 +168,13 @@ export class Walker {
}
}
public walkOption(node: ESQLCommandOption): void {
this.options.visitCommandOption?.(node);
for (const child of node.args) {
this.walkAstItem(child);
}
}
public walkAstItem(node: ESQLAstItem): void {
if (node instanceof Array) {
const list = node as ESQLAstItem[];
@ -131,6 +185,17 @@ export class Walker {
}
}
public walkMode(node: ESQLCommandMode): void {
this.options.visitCommandMode?.(node);
}
public walkListLiteral(node: ESQLList): void {
this.options.visitListLiteral?.(node);
for (const value of node.values) {
this.walkAstItem(value);
}
}
public walkSingleAstItem(node: ESQLSingleAstItem): void {
const { options } = this;
options.visitSingleAstItem?.(node);
@ -140,7 +205,15 @@ export class Walker {
break;
}
case 'option': {
this.walkAstItem(node.args);
this.walkOption(node);
break;
}
case 'mode': {
this.walkMode(node);
break;
}
case 'source': {
options.visitSource?.(node);
break;
}
case 'column': {
@ -151,6 +224,22 @@ export class Walker {
options.visitLiteral?.(node);
break;
}
case 'list': {
this.walkListLiteral(node);
break;
}
case 'timeInterval': {
options.visitTimeIntervalLiteral?.(node);
break;
}
case 'inlineCast': {
options.visitInlineCast?.(node);
break;
}
case 'unknown': {
options.visitUnknown?.(node);
break;
}
}
}