[ES|QL] Finalize string parsing (unquote and unescape strings) (#203610)

## Summary

Closes https://github.com/elastic/kibana/issues/203445

- Un-escapes and un-quotes all strings when parsing. So the strings can
be compared and string nodes can be constructed using `Builder` and then
correctly formatted by pretty-printer.
- Introduces `valueUnqoted` field to string literal nodes.
- Refactors `GROK` and `DISSECT` command parsing into their separate
files.


### Checklist

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
This commit is contained in:
Vadim Kibana 2024-12-16 17:00:09 +01:00 committed by GitHub
parent 7370cc712e
commit 64630ab11f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 340 additions and 143 deletions

View file

@ -224,12 +224,13 @@ describe('literal', () => {
const node = Builder.expression.literal.string('abc');
const text = BasicPrettyPrinter.expression(node);
expect(text).toBe('"""abc"""');
expect(text).toBe('"abc"');
expect(node).toMatchObject({
type: 'literal',
literalType: 'keyword',
name: '"""abc"""',
value: '"""abc"""',
name: '"abc"',
value: '"abc"',
valueUnquoted: 'abc',
});
});
});
@ -260,7 +261,7 @@ describe('literal', () => {
});
const text = BasicPrettyPrinter.expression(node);
expect(text).toBe('["""a""", """b""", """c"""]');
expect(text).toBe('["a", "b", "c"]');
});
test('integer list', () => {

View file

@ -32,10 +32,10 @@ import {
ESQLParamLiteral,
ESQLFunction,
ESQLAstItem,
ESQLStringLiteral,
ESQLBinaryExpression,
ESQLUnaryExpression,
ESQLTimeInterval,
ESQLStringLiteral,
ESQLBooleanLiteral,
ESQLNullLiteral,
} from '../types';
@ -368,26 +368,6 @@ export namespace Builder {
) as ESQLDecimalLiteral;
};
export const string = (
value: string,
template?: Omit<AstNodeTemplate<ESQLStringLiteral>, 'name' | 'literalType'>,
fromParser?: Partial<AstNodeParserFields>
): ESQLStringLiteral => {
// TODO: Once (https://github.com/elastic/kibana/issues/203445) do not use
// triple quotes and escape the string.
const quotedValue = '"""' + value + '"""';
const node: ESQLStringLiteral = {
...template,
...Builder.parserFields(fromParser),
type: 'literal',
literalType: 'keyword',
name: quotedValue,
value: quotedValue,
};
return node;
};
/**
* Constructs "time interval" literal node.
*
@ -407,6 +387,38 @@ export namespace Builder {
};
};
export const string = (
valueUnquoted: string,
template?: Omit<
AstNodeTemplate<ESQLStringLiteral>,
'name' | 'literalType' | 'value' | 'valueUnquoted'
> &
Partial<Pick<ESQLStringLiteral, 'name'>>,
fromParser?: Partial<AstNodeParserFields>
): ESQLStringLiteral => {
const value =
'"' +
valueUnquoted
.replace(/\\/g, '\\\\')
.replace(/"/g, '\\"')
.replace(/\n/g, '\\n')
.replace(/\r/g, '\\r')
.replace(/\t/g, '\\t') +
'"';
const name = template?.name ?? value;
const node: ESQLStringLiteral = {
...template,
...Builder.parserFields(fromParser),
type: 'literal',
literalType: 'keyword',
name,
value,
valueUnquoted,
};
return node;
};
export const list = (
template: Omit<AstNodeTemplate<ESQLList>, 'name'>,
fromParser?: Partial<AstNodeParserFields>

View file

@ -286,7 +286,9 @@ describe('commands', () => {
},
{
type: 'literal',
value: '"b"',
literalType: 'keyword',
name: '"b"',
valueUnquoted: 'b',
},
{
type: 'option',
@ -294,7 +296,9 @@ describe('commands', () => {
args: [
{
type: 'literal',
value: '"c"',
literalType: 'keyword',
name: '"c"',
valueUnquoted: 'c',
},
],
},
@ -303,6 +307,31 @@ describe('commands', () => {
]);
});
it('DISSECT (no options)', () => {
const query = 'FROM index | DISSECT a "b"';
const { ast } = parse(query);
expect(ast).toMatchObject([
{},
{
type: 'command',
name: 'dissect',
args: [
{
type: 'column',
name: 'a',
},
{
type: 'literal',
literalType: 'keyword',
name: '"b"',
valueUnquoted: 'b',
},
],
},
]);
});
it('GROK', () => {
const query = 'FROM index | GROK a "b"';
const { ast } = parse(query);
@ -319,7 +348,9 @@ describe('commands', () => {
},
{
type: 'literal',
value: '"b"',
literalType: 'keyword',
name: '"b"',
valueUnquoted: 'b',
},
],
},

View file

@ -26,8 +26,8 @@ describe('literal expression', () => {
it('numeric expression captures "value", and "name" fields', () => {
const text = 'ROW 1';
const { ast } = parse(text);
const literal = ast[0].args[0] as ESQLLiteral;
const { root } = parse(text);
const literal = root.commands[0].args[0] as ESQLLiteral;
expect(literal).toMatchObject({
type: 'literal',
@ -39,9 +39,9 @@ describe('literal expression', () => {
it('doubles vs integers', () => {
const text = 'ROW a(1.0, 1)';
const { ast } = parse(text);
const { root } = parse(text);
expect(ast[0]).toMatchObject({
expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
@ -61,54 +61,117 @@ describe('literal expression', () => {
});
});
// TODO: Un-skip once string parsing fixed: https://github.com/elastic/kibana/issues/203445
it.skip('single-quoted string', () => {
const text = 'ROW "abc"';
const { root } = parse(text);
describe('string', () => {
describe('single quoted', () => {
it('empty string', () => {
const text = 'ROW "", 1';
const { root } = parse(text);
expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
value: 'abc',
},
],
expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
name: '""',
valueUnquoted: '',
},
{},
],
});
});
it('short string', () => {
const text = 'ROW "abc", 1';
const { root } = parse(text);
expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
name: '"abc"',
valueUnquoted: 'abc',
},
{},
],
});
});
it('escaped characters', () => {
const text = 'ROW "a\\nb\\tc\\rd\\\\e\\"f", 1';
const { root } = parse(text);
expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
name: '"a\\nb\\tc\\rd\\\\e\\"f"',
valueUnquoted: 'a\nb\tc\rd\\e"f',
},
{},
],
});
});
});
});
// TODO: Un-skip once string parsing fixed: https://github.com/elastic/kibana/issues/203445
it.skip('unescapes characters', () => {
const text = 'ROW "a\\nbc"';
const { root } = parse(text);
describe('triple quoted', () => {
it('empty string', () => {
const text = 'ROW """""", 1';
const { root } = parse(text);
expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
value: 'a\nbc',
},
],
});
});
expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
name: '""""""',
valueUnquoted: '',
},
{},
],
});
});
// TODO: Un-skip once string parsing fixed: https://github.com/elastic/kibana/issues/203445
it.skip('triple-quoted string', () => {
const text = 'ROW """abc"""';
const { root } = parse(text);
it('short string', () => {
const text = 'ROW """abc""", 1';
const { root } = parse(text);
expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
value: 'abc',
},
],
expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
name: '"""abc"""',
valueUnquoted: 'abc',
},
{},
],
});
});
it('characters are not escaped', () => {
const text = 'ROW """a\\nb\\c\\"d""", 1';
const { root } = parse(text);
expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
name: '"""a\\nb\\c\\"d"""',
valueUnquoted: 'a\\nb\\c\\"d',
},
{},
],
});
});
});
});
});

View file

@ -50,8 +50,6 @@ import {
visitByOption,
collectAllColumnIdentifiers,
visitRenameClauses,
visitDissect,
visitGrok,
collectBooleanExpression,
visitOrderExpressions,
getPolicyName,
@ -60,6 +58,8 @@ import {
} from './walkers';
import type { ESQLAst, ESQLAstMetricsCommand } from '../types';
import { createJoinCommand } from './factories/join';
import { createDissectCommand } from './factories/dissect';
import { createGrokCommand } from './factories/grok';
export class ESQLAstBuilderListener implements ESQLParserListener {
private ast: ESQLAst = [];
@ -262,9 +262,9 @@ export class ESQLAstBuilderListener implements ESQLParserListener {
* @param ctx the parse tree
*/
exitDissectCommand(ctx: DissectCommandContext) {
const command = createCommand('dissect', ctx);
const command = createDissectCommand(ctx);
this.ast.push(command);
command.args.push(...visitDissect(ctx));
}
/**
@ -272,9 +272,9 @@ export class ESQLAstBuilderListener implements ESQLParserListener {
* @param ctx the parse tree
*/
exitGrokCommand(ctx: GrokCommandContext) {
const command = createCommand('grok', ctx);
const command = createGrokCommand(ctx);
this.ast.push(command);
command.args.push(...visitGrok(ctx));
}
/**

View file

@ -32,6 +32,7 @@ import {
InputParamContext,
InputNamedOrPositionalParamContext,
IdentifierOrParameterContext,
StringContext,
} from '../antlr/esql_parser';
import { DOUBLE_TICKS_REGEX, SINGLE_BACKTICK, TICKS_REGEX } from './constants';
import type {
@ -119,6 +120,29 @@ export function createFakeMultiplyLiteral(
};
}
export function createLiteralString(ctx: StringContext): ESQLLiteral {
const quotedString = ctx.QUOTED_STRING()?.getText() ?? '""';
const isTripleQuoted = quotedString.startsWith('"""') && quotedString.endsWith('"""');
let valueUnquoted = isTripleQuoted ? quotedString.slice(3, -3) : quotedString.slice(1, -1);
if (!isTripleQuoted) {
valueUnquoted = valueUnquoted
.replace(/\\\\/g, '\\')
.replace(/\\"/g, '"')
.replace(/\\r/g, '\r')
.replace(/\\n/g, '\n')
.replace(/\\t/g, '\t');
}
return Builder.expression.literal.string(
valueUnquoted,
{
name: quotedString,
},
createParserFields(ctx)
);
}
function isMissingText(text: string) {
return /<missing /.test(text);
}

View file

@ -0,0 +1,65 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
import esql_parser, { CommandOptionsContext, DissectCommandContext } from '../../antlr/esql_parser';
import { ESQLCommand, ESQLCommandOption } from '../../types';
import {
createCommand,
createLiteralString,
createOption,
sanitizeIdentifierString,
textExistsAndIsValid,
} from '../factories';
import { getConstant, visitPrimaryExpression } from '../walkers';
const createDissectOptions = (ctx: CommandOptionsContext | undefined): ESQLCommandOption[] => {
if (!ctx) {
return [];
}
const options: ESQLCommandOption[] = [];
for (const optionCtx of ctx.commandOption_list()) {
const option = createOption(
sanitizeIdentifierString(optionCtx.identifier()).toLowerCase(),
optionCtx
);
options.push(option);
// it can throw while accessing constant for incomplete commands, so try catch it
try {
const optionValue = getConstant(optionCtx.constant());
if (optionValue != null) {
option.args.push(optionValue);
}
} catch (e) {
// do nothing here
}
}
return options;
};
export const createDissectCommand = (ctx: DissectCommandContext): ESQLCommand => {
const command = createCommand('dissect', ctx);
const primaryExpression = visitPrimaryExpression(ctx.primaryExpression());
const stringContext = ctx.string_();
const pattern = stringContext.getToken(esql_parser.QUOTED_STRING, 0);
const doParseStringAndOptions = pattern && textExistsAndIsValid(pattern.getText());
command.args.push(primaryExpression);
if (doParseStringAndOptions) {
const stringNode = createLiteralString(stringContext);
command.args.push(stringNode);
command.args.push(...createDissectOptions(ctx.commandOptions()));
}
return command;
};

View file

@ -0,0 +1,31 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
import esql_parser, { GrokCommandContext } from '../../antlr/esql_parser';
import { ESQLCommand } from '../../types';
import { createCommand, createLiteralString, textExistsAndIsValid } from '../factories';
import { visitPrimaryExpression } from '../walkers';
export const createGrokCommand = (ctx: GrokCommandContext): ESQLCommand => {
const command = createCommand('grok', ctx);
const primaryExpression = visitPrimaryExpression(ctx.primaryExpression());
const stringContext = ctx.string_();
const pattern = stringContext.getToken(esql_parser.QUOTED_STRING, 0);
const doParseStringAndOptions = pattern && textExistsAndIsValid(pattern.getText());
command.args.push(primaryExpression);
if (doParseStringAndOptions) {
const stringNode = createLiteralString(stringContext);
command.args.push(stringNode);
}
return command;
};

View file

@ -18,14 +18,12 @@ import {
BooleanLiteralContext,
InputParameterContext,
BooleanValueContext,
type CommandOptionsContext,
ComparisonContext,
type ComparisonOperatorContext,
type ConstantContext,
ConstantDefaultContext,
DecimalLiteralContext,
DereferenceContext,
type DissectCommandContext,
type DropCommandContext,
type EnrichCommandContext,
type FieldContext,
@ -33,7 +31,6 @@ import {
type AggFieldsContext,
type FromCommandContext,
FunctionContext,
type GrokCommandContext,
IntegerLiteralContext,
IsNullContext,
type KeepCommandContext,
@ -74,7 +71,6 @@ import {
createFakeMultiplyLiteral,
createList,
createNumericLiteral,
sanitizeIdentifierString,
computeLocationExtends,
createColumnStar,
wrapIdentifierAsArray,
@ -86,6 +82,7 @@ import {
createOrderExpression,
createFunctionCall,
createParam,
createLiteralString,
} from './factories';
import {
@ -331,7 +328,7 @@ function getBooleanValue(ctx: BooleanLiteralContext | BooleanValueContext) {
return createLiteral('boolean', booleanTerminalNode!);
}
function getConstant(ctx: ConstantContext): ESQLAstItem {
export function getConstant(ctx: ConstantContext): ESQLAstItem {
if (ctx instanceof NullLiteralContext) {
return createLiteral('null', ctx.NULL());
}
@ -354,8 +351,7 @@ function getConstant(ctx: ConstantContext): ESQLAstItem {
return getBooleanValue(ctx);
}
if (ctx instanceof StringLiteralContext) {
// String literal covers multiple ES|QL types: text and keyword types
return createLiteral('keyword', ctx.string_().QUOTED_STRING());
return createLiteralString(ctx.string_());
}
if (
ctx instanceof NumericArrayLiteralContext ||
@ -374,11 +370,9 @@ function getConstant(ctx: ConstantContext): ESQLAstItem {
values.push(getBooleanValue(booleanValue)!);
}
for (const string of ctx.getTypedRuleContexts(StringContext)) {
// String literal covers multiple ES|QL types: text and keyword types
const literal = createLiteral('keyword', string.QUOTED_STRING());
if (literal) {
values.push(literal);
}
const literal = createLiteralString(string);
values.push(literal);
}
return createList(ctx, values);
}
@ -484,10 +478,10 @@ function collectRegexExpression(ctx: BooleanExpressionContext): ESQLFunction[] {
const arg = visitValueExpression(regex.valueExpression());
if (arg) {
fn.args.push(arg);
const literal = createLiteral('keyword', regex._pattern.QUOTED_STRING());
if (literal) {
fn.args.push(literal);
}
const literal = createLiteralString(regex._pattern);
fn.args.push(literal);
}
return fn;
})
@ -631,47 +625,3 @@ export function visitOrderExpressions(
return ast;
}
export function visitDissect(ctx: DissectCommandContext) {
const pattern = ctx.string_().getToken(esql_parser.QUOTED_STRING, 0);
return [
visitPrimaryExpression(ctx.primaryExpression()),
...(pattern && textExistsAndIsValid(pattern.getText())
? [createLiteral('keyword', pattern), ...visitDissectOptions(ctx.commandOptions())]
: []),
].filter(nonNullable);
}
export function visitGrok(ctx: GrokCommandContext) {
const pattern = ctx.string_().getToken(esql_parser.QUOTED_STRING, 0);
return [
visitPrimaryExpression(ctx.primaryExpression()),
...(pattern && textExistsAndIsValid(pattern.getText())
? [createLiteral('keyword', pattern)]
: []),
].filter(nonNullable);
}
function visitDissectOptions(ctx: CommandOptionsContext | undefined) {
if (!ctx) {
return [];
}
const options: ESQLCommandOption[] = [];
for (const optionCtx of ctx.commandOption_list()) {
const option = createOption(
sanitizeIdentifierString(optionCtx.identifier()).toLowerCase(),
optionCtx
);
options.push(option);
// it can throw while accessing constant for incomplete commands, so try catch it
try {
const optionValue = getConstant(optionCtx.constant());
if (optionValue != null) {
option.args.push(optionValue);
}
} catch (e) {
// do nothing here
}
}
return options;
}

View file

@ -16,6 +16,7 @@ import {
ESQLParamLiteral,
ESQLProperNode,
ESQLSource,
ESQLStringLiteral,
ESQLTimeInterval,
} from '../types';
@ -81,6 +82,21 @@ export const LeafPrinter = {
return formatted;
},
string: (node: ESQLStringLiteral) => {
const str = node.valueUnquoted;
const strFormatted =
'"' +
str
.replace(/\\/g, '\\\\')
.replace(/"/g, '\\"')
.replace(/\n/g, '\\n')
.replace(/\r/g, '\\r')
.replace(/\t/g, '\\t') +
'"';
return strFormatted;
},
literal: (node: ESQLLiteral) => {
switch (node.literalType) {
case 'null': {
@ -93,7 +109,7 @@ export const LeafPrinter = {
return LeafPrinter.param(node);
}
case 'keyword': {
return String(node.value);
return LeafPrinter.string(node);
}
case 'double': {
const isRounded = node.value % 1 === 0;

View file

@ -46,5 +46,5 @@ test('can compose expressions into commands', () => {
const text2 = BasicPrettyPrinter.command(cmd2);
expect(text1).toBe('WHERE a.b.c == "asdf"');
expect(text2).toBe('DISSECT a.b.c """%{date}"""');
expect(text2).toBe('DISSECT a.b.c "%{date}"');
});

View file

@ -57,7 +57,7 @@ test('can generate a function call expression', () => {
{
type: 'literal',
literalType: 'keyword',
value: '"test"',
valueUnquoted: 'test',
},
],
});

View file

@ -369,6 +369,7 @@ export interface ESQLStringLiteral extends ESQLAstBaseItem {
literalType: 'keyword';
value: string;
valueUnquoted: string;
}
// @internal

View file

@ -43,6 +43,9 @@ function checkLikeNode(node: ESQLLikeOperator): QueryCorrection[] {
likeExpression.value = likeExpression.value
.replaceAll(/(?<!\\)%/g, '*')
.replaceAll(/(?<!\\)_/g, '?');
likeExpression.valueUnquoted = likeExpression.valueUnquoted
.replaceAll(/(?<!\\)%/g, '*')
.replaceAll(/(?<!\\)_/g, '?');
if (likeExpression.value !== initialValue) {
likeExpression.name = likeExpression.value;