[ES|QL] Correctly parse source nodes (#190941)

## Summary

Fixes `source` node parsing. Correctly handles cluster part and
unescapes the quoted index string part.

1. First removes the cluster string part.
2. Unquotes and unescapes the index string part (if it is quoted and
escaped).

Those two were not done before: the index patter string was unquoted as
a whole (with cluster part attached); and, the index string was not
unescaped.


### Checklist

Delete any items that are not applicable to this PR.

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios


### For maintainers

- [x] This was checked for breaking API changes and was [labeled
appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)

---------

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Vadim Kibana 2024-08-22 15:35:14 +02:00 committed by GitHub
parent e932b932f0
commit 08fbd9caae
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 332 additions and 0 deletions

View file

@ -0,0 +1,265 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
import { getAstAndSyntaxErrors as parse } from '../ast_parser';
describe('source nodes', () => {
it('cluster vs quoted source', () => {
const text = 'FROM cluster:index, "cluster:index"';
const { ast } = parse(text);
expect(ast).toMatchObject([
{
type: 'command',
name: 'from',
args: [
{
type: 'source',
name: 'cluster:index',
cluster: 'cluster',
index: 'index',
},
{
type: 'source',
name: 'cluster:index',
cluster: '',
index: 'cluster:index',
},
],
},
]);
});
it('date-math syntax', () => {
const text = 'FROM <logs-{now/d}>';
const { ast } = parse(text);
expect(ast).toMatchObject([
{
type: 'command',
name: 'from',
args: [
{
type: 'source',
name: '<logs-{now/d}>',
cluster: '',
index: '<logs-{now/d}>',
},
],
},
]);
});
describe('unquoted', () => {
it('basic', () => {
const text = 'FROM a';
const { ast } = parse(text);
expect(ast).toMatchObject([
{
type: 'command',
name: 'from',
args: [
{
type: 'source',
name: 'a',
cluster: '',
index: 'a',
},
],
},
]);
});
it('with slash', () => {
const text = 'FROM a/b';
const { ast } = parse(text);
expect(ast).toMatchObject([
{
type: 'command',
name: 'from',
args: [
{
type: 'source',
name: 'a/b',
cluster: '',
index: 'a/b',
},
],
},
]);
});
it('dot and star', () => {
const text = 'FROM a.b-*';
const { ast } = parse(text);
expect(ast).toMatchObject([
{
type: 'command',
name: 'from',
args: [
{
type: 'source',
name: 'a.b-*',
cluster: '',
index: 'a.b-*',
},
],
},
]);
});
});
describe('double quoted', () => {
it('basic', () => {
const text = 'FROM "a"';
const { ast } = parse(text);
expect(ast).toMatchObject([
{
type: 'command',
name: 'from',
args: [
{
type: 'source',
name: 'a',
cluster: '',
index: 'a',
},
],
},
]);
});
it('allows escaped chars', () => {
const text = 'FROM "a \\" \\r \\n \\t \\\\ b"';
const { ast } = parse(text);
expect(ast).toMatchObject([
{
type: 'command',
name: 'from',
args: [
{
type: 'source',
name: expect.any(String),
cluster: '',
index: 'a " \r \n \t \\ b',
},
],
},
]);
});
});
describe('triple-double quoted', () => {
it('basic', () => {
const text = 'FROM """a"""';
const { ast } = parse(text);
expect(ast).toMatchObject([
{
type: 'command',
name: 'from',
args: [
{
type: 'source',
name: 'a',
cluster: '',
index: 'a',
},
],
},
]);
});
it('with double quote in the middle', () => {
const text = 'FROM """a"b"""';
const { ast } = parse(text);
expect(ast).toMatchObject([
{
type: 'command',
name: 'from',
args: [
{
type: 'source',
name: 'a"b',
cluster: '',
index: 'a"b',
},
],
},
]);
});
it('allows special chars', () => {
const text = 'FROM """a:\\/b"""';
const { ast } = parse(text);
expect(ast).toMatchObject([
{
type: 'command',
name: 'from',
args: [
{
type: 'source',
name: 'a:\\/b',
cluster: '',
index: 'a:\\/b',
},
],
},
]);
});
it('allows emojis', () => {
const text = 'FROM """a👍b"""';
const { ast } = parse(text);
expect(ast).toMatchObject([
{
type: 'command',
name: 'from',
args: [
{
type: 'source',
name: 'a👍b',
cluster: '',
index: 'a👍b',
},
],
},
]);
});
});
describe('cluster string', () => {
it('basic', () => {
const text = 'FROM cluster:a';
const { ast } = parse(text);
expect(ast).toMatchObject([
{
type: 'command',
name: 'from',
args: [
{
type: 'source',
name: 'cluster:a',
cluster: 'cluster',
index: 'a',
},
],
},
]);
});
});
});

View file

@ -12,6 +12,7 @@
import { type Token, type ParserRuleContext, type TerminalNode } from 'antlr4';
import {
IndexPatternContext,
QualifiedNameContext,
type ArithmeticUnaryContext,
type DecimalValueContext,
@ -306,6 +307,34 @@ function sanitizeSourceString(ctx: ParserRuleContext) {
return contextText;
}
const unquoteIndexString = (indexString: string): string => {
const isStringQuoted = indexString[0] === '"';
if (!isStringQuoted) {
return indexString;
}
// If wrapped by triple double quotes, simply remove them.
if (indexString.startsWith(`"""`) && indexString.endsWith(`"""`)) {
return indexString.slice(3, -3);
}
// If wrapped by double quote, remove them and unescape the string.
if (indexString[indexString.length - 1] === '"') {
indexString = indexString.slice(1, -1);
indexString = indexString
.replace(/\\"/g, '"')
.replace(/\\r/g, '\r')
.replace(/\\n/g, '\n')
.replace(/\\t/g, '\t')
.replace(/\\\\/g, '\\');
return indexString;
}
// This should never happen, but if it does, return the original string.
return indexString;
};
export function sanitizeIdentifierString(ctx: ParserRuleContext) {
const result =
getUnquotedText(ctx)?.getText() ||
@ -352,8 +381,27 @@ export function createSource(
type: 'index' | 'policy' = 'index'
): ESQLSource {
const text = sanitizeSourceString(ctx);
let cluster: string = '';
let index: string = '';
if (ctx instanceof IndexPatternContext) {
const clusterString = ctx.clusterString();
const indexString = ctx.indexString();
if (clusterString) {
cluster = clusterString.getText();
}
if (indexString) {
index = indexString.getText();
index = unquoteIndexString(index);
}
}
return {
type: 'source',
cluster,
index,
name: text,
sourceType: type,
text,

View file

@ -175,6 +175,25 @@ export interface ESQLTimeInterval extends ESQLAstBaseItem {
export interface ESQLSource extends ESQLAstBaseItem {
type: 'source';
sourceType: 'index' | 'policy';
/**
* Represents the cluster part of the source identifier. Empty string if not
* present.
*
* ```
* FROM [<cluster>:]<index>
* ```
*/
cluster?: string;
/**
* Represents the index part of the source identifier. Unescaped and unquoted.
*
* ```
* FROM [<cluster>:]<index>
* ```
*/
index?: string;
}
export interface ESQLColumn extends ESQLAstBaseItem {