ES|QL: Check if cluster aliases and index patterns are valid before executing query (#122497)

ES|QL index patterns validation: Ensure that the patterns in the query are syntactically and semantically valid

---------

Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>
Co-authored-by: Andrei Stefan <astefan@users.noreply.github.com>
Co-authored-by: Alexander Spies <alexander.spies@elastic.co>
This commit is contained in:
Pawan Kartik 2025-06-25 16:47:42 +01:00 committed by GitHub
parent af735accda
commit c94c021d0e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 408 additions and 82 deletions

View file

@ -0,0 +1,5 @@
pr: 122497
summary: Check if index patterns conform to valid format before validation
area: CCS
type: enhancement
issues: []

View file

@ -20,17 +20,20 @@ import org.elasticsearch.xpack.esql.parser.EsqlBaseParser.IdentifierContext;
import org.elasticsearch.xpack.esql.parser.EsqlBaseParser.IndexStringContext;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.elasticsearch.cluster.metadata.IndexNameExpressionResolver.SelectorResolver.SELECTOR_SEPARATOR;
import static org.elasticsearch.transport.RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR;
import static org.elasticsearch.transport.RemoteClusterAware.isRemoteIndexName;
import static org.elasticsearch.transport.RemoteClusterAware.splitIndexName;
import static org.elasticsearch.xpack.esql.core.util.StringUtils.EXCLUSION;
import static org.elasticsearch.xpack.esql.core.util.StringUtils.WILDCARD;
import static org.elasticsearch.xpack.esql.parser.ParserUtils.source;
abstract class IdentifierBuilder extends AbstractBuilder {
private static final String BLANK_INDEX_ERROR_MESSAGE = "Blank index specified in index pattern";
@Override
public String visitIdentifier(IdentifierContext ctx) {
return ctx == null ? null : unquoteIdentifier(ctx.QUOTED_IDENTIFIER(), ctx.UNQUOTED_IDENTIFIER());
@ -88,39 +91,21 @@ abstract class IdentifierBuilder extends AbstractBuilder {
String indexPattern = c.unquotedIndexString() != null ? c.unquotedIndexString().getText() : visitIndexString(c.indexString());
String clusterString = visitClusterString(c.clusterString());
String selectorString = visitSelectorString(c.selectorString());
// skip validating index on remote cluster, because the behavior of remote cluster is not consistent with local cluster
// For example, invalid#index is an invalid index name, however FROM *:invalid#index does not return an error
if (clusterString == null) {
hasSeenStar.set(indexPattern.contains(WILDCARD) || hasSeenStar.get());
validateIndexPattern(indexPattern, c, hasSeenStar.get());
// Other instances of Elasticsearch may have differing selectors so only validate selector string if remote cluster
// string is unset
if (selectorString != null) {
try {
// Ensures that the selector provided is one of the valid kinds
IndexNameExpressionResolver.SelectorResolver.validateIndexSelectorString(indexPattern, selectorString);
} catch (InvalidIndexNameException e) {
throw new ParsingException(e, source(c), e.getMessage());
}
}
} else {
validateClusterString(clusterString, c);
// Do not allow selectors on remote cluster expressions until they are supported
if (selectorString != null) {
throwOnMixingSelectorWithCluster(reassembleIndexName(clusterString, indexPattern, selectorString), c);
}
}
hasSeenStar.set(hasSeenStar.get() || indexPattern.contains(WILDCARD));
validate(clusterString, indexPattern, selectorString, c, hasSeenStar.get());
patterns.add(reassembleIndexName(clusterString, indexPattern, selectorString));
});
return Strings.collectionToDelimitedString(patterns, ",");
}
private static void throwInvalidIndexNameException(String indexPattern, String message, EsqlBaseParser.IndexPatternContext ctx) {
var ie = new InvalidIndexNameException(indexPattern, message);
throw new ParsingException(ie, source(ctx), ie.getMessage());
}
private static void throwOnMixingSelectorWithCluster(String indexPattern, EsqlBaseParser.IndexPatternContext c) {
InvalidIndexNameException ie = new InvalidIndexNameException(
indexPattern,
"Selectors are not yet supported on remote cluster patterns"
);
throw new ParsingException(ie, source(c), ie.getMessage());
throwInvalidIndexNameException(indexPattern, "Selectors are not yet supported on remote cluster patterns", c);
}
private static String reassembleIndexName(String clusterString, String indexPattern, String selectorString) {
@ -144,56 +129,193 @@ abstract class IdentifierBuilder extends AbstractBuilder {
}
}
private static void validateIndexPattern(String indexPattern, EsqlBaseParser.IndexPatternContext ctx, boolean hasSeenStar) {
// multiple index names can be in the same double quote, e.g. indexPattern = "idx1, *, -idx2"
String[] indices = indexPattern.split(",");
boolean hasExclusion = false;
for (String index : indices) {
// Strip spaces off first because validation checks are not written to handle them
index = index.strip();
if (isRemoteIndexName(index)) { // skip the validation if there is remote cluster
// Ensure that there are no selectors as they are not yet supported
if (index.contains(SELECTOR_SEPARATOR)) {
throwOnMixingSelectorWithCluster(index, ctx);
}
continue;
/**
* Takes the parsed constituent strings and validates them.
* @param clusterString Name of the remote cluster. Can be null.
* @param indexPattern Name of the index or pattern; can also have multiple patterns in case of quoting,
* e.g. {@code FROM """index*,-index1"""}.
* @param selectorString Selector string, i.e. "::data" or "::failures". Can be null.
* @param ctx Index Pattern Context for generating error messages with offsets.
* @param hasSeenStar If we've seen an asterisk so far.
*/
private static void validate(
String clusterString,
String indexPattern,
String selectorString,
EsqlBaseParser.IndexPatternContext ctx,
boolean hasSeenStar
) {
/*
* At this point, only 3 formats are possible:
* "index_pattern(s)",
* remote:index_pattern, and,
* index_pattern::selector_string.
*
* The grammar prohibits remote:"index_pattern(s)" or "index_pattern(s)"::selector_string as they're
* partially quoted. So if either of cluster string or selector string are present, there's no need
* to split the pattern by comma since comma requires partial quoting.
*/
String[] patterns;
if (clusterString == null && selectorString == null) {
// Pattern could be quoted or is singular like "index_name".
patterns = indexPattern.split(",", -1);
} else {
// Either of cluster string or selector string is present. Pattern is unquoted.
patterns = new String[] { indexPattern };
}
patterns = Arrays.stream(patterns).map(String::strip).toArray(String[]::new);
if (Arrays.stream(patterns).anyMatch(String::isBlank)) {
throwInvalidIndexNameException(indexPattern, BLANK_INDEX_ERROR_MESSAGE, ctx);
}
// Edge case: happens when all the index names in a pattern are empty like "FROM ",,,,,"".
if (patterns.length == 0) {
throwInvalidIndexNameException(indexPattern, BLANK_INDEX_ERROR_MESSAGE, ctx);
} else if (patterns.length == 1) {
// Pattern is either an unquoted string or a quoted string with a single index (no comma sep).
validateSingleIndexPattern(clusterString, patterns[0], selectorString, ctx, hasSeenStar);
} else {
/*
* Presence of multiple patterns requires a comma and comma requires quoting. If quoting is present,
* cluster string and selector string cannot be present; they need to be attached within the quoting.
* So we attempt to extract them later.
*/
for (String pattern : patterns) {
validateSingleIndexPattern(null, pattern, null, ctx, hasSeenStar);
}
}
}
/**
* Validates the constituent strings. Will extract the cluster string and/or selector string from the index
* name if clubbed together inside a quoted string.
*
* @param clusterString Name of the remote cluster. Can be null.
* @param indexName Name of the index.
* @param selectorString Selector string, i.e. "::data" or "::failures". Can be null.
* @param ctx Index Pattern Context for generating error messages with offsets.
* @param hasSeenStar If we've seen an asterisk so far.
*/
private static void validateSingleIndexPattern(
String clusterString,
String indexName,
String selectorString,
EsqlBaseParser.IndexPatternContext ctx,
boolean hasSeenStar
) {
indexName = indexName.strip();
/*
* Precedence:
* 1. Cannot mix cluster and selector strings.
* 2. Cluster string must be valid.
* 3. Index name must be valid.
* 4. Selector string must be valid.
*
* Since cluster string and/or selector string can be clubbed with the index name, we must try to
* manually extract them before we attempt to do #2, #3, and #4.
*/
// It is possible to specify a pattern like "remote_cluster:index_name". Try to extract such details from the index string.
if (clusterString == null && selectorString == null) {
try {
Tuple<String, String> splitPattern = IndexNameExpressionResolver.splitSelectorExpression(index);
if (splitPattern.v2() != null) {
index = splitPattern.v1();
var split = splitIndexName(indexName);
clusterString = split[0];
indexName = split[1];
} catch (IllegalArgumentException e) {
throw new ParsingException(e, source(ctx), e.getMessage());
}
}
// At the moment, selector strings for remote indices is not allowed.
if (clusterString != null && selectorString != null) {
throwOnMixingSelectorWithCluster(reassembleIndexName(clusterString, indexName, selectorString), ctx);
}
// Validation in the right precedence.
if (clusterString != null) {
clusterString = clusterString.strip();
validateClusterString(clusterString, ctx);
}
/*
* It is possible for selector string to be attached to the index: "index_name::selector_string".
* Try to extract the selector string.
*/
try {
Tuple<String, String> splitPattern = IndexNameExpressionResolver.splitSelectorExpression(indexName);
if (splitPattern.v2() != null) {
// Cluster string too was clubbed with the index name like selector string.
if (clusterString != null) {
throwOnMixingSelectorWithCluster(reassembleIndexName(clusterString, splitPattern.v1(), splitPattern.v2()), ctx);
} else {
// We've seen a selectorString. Use it.
selectorString = splitPattern.v2();
}
}
indexName = splitPattern.v1();
} catch (InvalidIndexNameException e) {
throw new ParsingException(e, source(ctx), e.getMessage());
}
resolveAndValidateIndex(indexName, ctx, hasSeenStar);
if (selectorString != null) {
selectorString = selectorString.strip();
try {
// Ensures that the selector provided is one of the valid kinds.
IndexNameExpressionResolver.SelectorResolver.validateIndexSelectorString(indexName, selectorString);
} catch (InvalidIndexNameException e) {
// throws exception if the selector expression is invalid. Selector resolution does not complain about exclusions
throw new ParsingException(e, source(ctx), e.getMessage());
}
hasSeenStar = index.contains(WILDCARD) || hasSeenStar;
index = index.replace(WILDCARD, "").strip();
if (index.isBlank()) {
continue;
}
}
private static void resolveAndValidateIndex(String index, EsqlBaseParser.IndexPatternContext ctx, boolean hasSeenStar) {
// If index name is blank without any replacements, it was likely blank right from the beginning and is invalid.
if (index.isBlank()) {
throwInvalidIndexNameException(index, BLANK_INDEX_ERROR_MESSAGE, ctx);
}
hasSeenStar = hasSeenStar || index.contains(WILDCARD);
index = index.replace(WILDCARD, "").strip();
if (index.isBlank()) {
return;
}
var hasExclusion = index.startsWith(EXCLUSION);
index = removeExclusion(index);
String tempName;
try {
// remove the exclusion outside of <>, from index names with DateMath expression,
// e.g. -<-logstash-{now/d}> becomes <-logstash-{now/d}> before calling resolveDateMathExpression
tempName = IndexNameExpressionResolver.resolveDateMathExpression(index);
} catch (ElasticsearchParseException e) {
// throws exception if the DateMath expression is invalid, resolveDateMathExpression does not complain about exclusions
throw new ParsingException(e, source(ctx), e.getMessage());
}
hasExclusion = tempName.startsWith(EXCLUSION) || hasExclusion;
index = tempName.equals(index) ? index : removeExclusion(tempName);
try {
MetadataCreateIndexService.validateIndexOrAliasName(index, InvalidIndexNameException::new);
} catch (InvalidIndexNameException e) {
// ignore invalid index name if it has exclusions and there is an index with wildcard before it
if (hasSeenStar && hasExclusion) {
return;
}
hasExclusion = index.startsWith(EXCLUSION);
index = removeExclusion(index);
String tempName;
try {
// remove the exclusion outside of <>, from index names with DateMath expression,
// e.g. -<-logstash-{now/d}> becomes <-logstash-{now/d}> before calling resolveDateMathExpression
tempName = IndexNameExpressionResolver.resolveDateMathExpression(index);
} catch (ElasticsearchParseException e) {
// throws exception if the DateMath expression is invalid, resolveDateMathExpression does not complain about exclusions
throw new ParsingException(e, source(ctx), e.getMessage());
}
hasExclusion = tempName.startsWith(EXCLUSION) || hasExclusion;
index = tempName.equals(index) ? index : removeExclusion(tempName);
try {
MetadataCreateIndexService.validateIndexOrAliasName(index, InvalidIndexNameException::new);
} catch (InvalidIndexNameException e) {
// ignore invalid index name if it has exclusions and there is an index with wildcard before it
if (hasSeenStar && hasExclusion) {
continue;
}
throw new ParsingException(e, source(ctx), e.getMessage());
InvalidIndexNameException errorToThrow = e;
/*
* We only modify this particular message because it mentions '*' as an invalid char.
* However, we do allow asterisk in the index patterns: wildcarded patterns. Let's not
* mislead the user by mentioning this char in the error message.
*/
if (e.getMessage().contains("must not contain the following characters")) {
errorToThrow = new InvalidIndexNameException(index, e.getMessage().replace("'*',", ""));
}
throw new ParsingException(errorToThrow, source(ctx), errorToThrow.getMessage());
}
}

View file

@ -472,10 +472,12 @@ public class StatementParserTests extends AbstractStatementParserTests {
"foo, test-*, abc, xyz", test123
""");
assertStringAsIndexPattern("foo,test,xyz", command + " foo, test,xyz");
assertStringAsIndexPattern("<logstash-{now/M{yyyy.MM}}>", command + " <logstash-{now/M{yyyy.MM}}>");
assertStringAsIndexPattern(
"<logstash-{now/M{yyyy.MM}}>,<logstash-{now/d{yyyy.MM.dd|+12:00}}>",
command + " <logstash-{now/M{yyyy.MM}}>, \"<logstash-{now/d{yyyy.MM.dd|+12:00}}>\""
);
assertStringAsIndexPattern("<logstash-{now/d{yyyy.MM.dd|+12:00}}>", command + " \"<logstash-{now/d{yyyy.MM.dd|+12:00}}>\"");
assertStringAsIndexPattern(
"-<logstash-{now/M{yyyy.MM}}>,-<-logstash-{now/M{yyyy.MM}}>,"
+ "-<logstash-{now/d{yyyy.MM.dd|+12:00}}>,-<-logstash-{now/d{yyyy.MM.dd|+12:00}}>",
@ -494,18 +496,30 @@ public class StatementParserTests extends AbstractStatementParserTests {
lineNumber,
"mismatched input '\"index|pattern\"' expecting UNQUOTED_SOURCE"
);
assertStringAsIndexPattern("*:index|pattern", command + " \"*:index|pattern\"");
// Entire index pattern is quoted. So it's not a parse error but a semantic error where the index name
// is invalid.
expectError(command + " \"*:index|pattern\"", "Invalid index name [index|pattern], must not contain the following characters");
clusterAndIndexAsIndexPattern(command, "cluster:index");
clusterAndIndexAsIndexPattern(command, "cluster:.index");
clusterAndIndexAsIndexPattern(command, "cluster*:index*");
clusterAndIndexAsIndexPattern(command, "cluster*:<logstash-{now/D}>*");// this is not a valid pattern, * should be inside <>
clusterAndIndexAsIndexPattern(command, "cluster*:<logstash-{now/D}*>");
clusterAndIndexAsIndexPattern(command, "cluster*:<logstash-{now/d}>*");
clusterAndIndexAsIndexPattern(command, "cluster*:*");
clusterAndIndexAsIndexPattern(command, "*:index*");
clusterAndIndexAsIndexPattern(command, "*:*");
expectError(
command + " \"cluster:index|pattern\"",
"Invalid index name [index|pattern], must not contain the following characters"
);
expectError(command + " *:\"index|pattern\"", "expecting UNQUOTED_SOURCE");
if (EsqlCapabilities.Cap.INDEX_COMPONENT_SELECTORS.isEnabled()) {
assertStringAsIndexPattern("foo::data", command + " foo::data");
assertStringAsIndexPattern("foo::failures", command + " foo::failures");
expectErrorWithLineNumber(
command + " *,\"-foo\"::data",
"*,-foo::data",
lineNumber,
"mismatched input '::' expecting {<EOF>, '|', ',', 'metadata'}"
);
expectErrorWithLineNumber(
command + " cluster:\"foo::data\"",
" cluster:\"foo::data\"",
@ -585,6 +599,7 @@ public class StatementParserTests extends AbstractStatementParserTests {
Map<String, String> commands = new HashMap<>();
commands.put("FROM {}", "line 1:6: ");
if (Build.current().isSnapshot()) {
commands.put("TS {}", "line 1:4: ");
commands.put("ROW x = 1 | LOOKUP_🐔 {} ON j", "line 1:22: ");
}
String lineNumber;
@ -625,7 +640,11 @@ public class StatementParserTests extends AbstractStatementParserTests {
expectInvalidIndexNameErrorWithLineNumber(command, "index::failure", lineNumber);
// Cluster name cannot be combined with selector yet.
var parseLineNumber = command.contains("FROM") ? 6 : 9;
int parseLineNumber = 6;
if (command.startsWith("TS")) {
parseLineNumber = 4;
}
expectDoubleColonErrorWithLineNumber(command, "cluster:foo::data", parseLineNumber + 11);
expectDoubleColonErrorWithLineNumber(command, "cluster:foo::failures", parseLineNumber + 11);
@ -633,19 +652,18 @@ public class StatementParserTests extends AbstractStatementParserTests {
expectErrorWithLineNumber(
command,
"cluster:\"foo\"::data",
"line 1:14: ",
command.startsWith("FROM") ? "line 1:14: " : "line 1:12: ",
"mismatched input '\"foo\"' expecting UNQUOTED_SOURCE"
);
expectErrorWithLineNumber(
command,
"cluster:\"foo\"::failures",
"line 1:14: ",
command.startsWith("FROM") ? "line 1:14: " : "line 1:12: ",
"mismatched input '\"foo\"' expecting UNQUOTED_SOURCE"
);
// TODO: Edge case that will be invalidated in follow up (https://github.com/elastic/elasticsearch/issues/122651)
// expectDoubleColonErrorWithLineNumber(command, "\"cluster:foo\"::data", parseLineNumber + 13);
// expectDoubleColonErrorWithLineNumber(command, "\"cluster:foo\"::failures", parseLineNumber + 13);
expectDoubleColonErrorWithLineNumber(command, "\"cluster:foo\"::data", parseLineNumber + 13);
expectDoubleColonErrorWithLineNumber(command, "\"cluster:foo\"::failures", parseLineNumber + 13);
expectErrorWithLineNumber(
command,
@ -689,7 +707,7 @@ public class StatementParserTests extends AbstractStatementParserTests {
expectErrorWithLineNumber(
command,
"cluster:\"index,index2\"::failures",
"line 1:14: ",
command.startsWith("FROM") ? "line 1:14: " : "line 1:12: ",
"mismatched input '\"index,index2\"' expecting UNQUOTED_SOURCE"
);
}
@ -746,16 +764,33 @@ public class StatementParserTests extends AbstractStatementParserTests {
clustersAndIndices(command, "index*", "-index#pattern");
clustersAndIndices(command, "*", "-<--logstash-{now/M{yyyy.MM}}>");
clustersAndIndices(command, "index*", "-<--logstash#-{now/M{yyyy.MM}}>");
expectInvalidIndexNameErrorWithLineNumber(command, "*, index#pattern", lineNumber, "index#pattern", "must not contain '#'");
expectInvalidIndexNameErrorWithLineNumber(
command,
"index*, index#pattern",
indexStarLineNumber,
"index#pattern",
"must not contain '#'"
);
expectDateMathErrorWithLineNumber(command, "cluster*:<logstash-{now/D}*>", commands.get(command), dateMathError);
expectDateMathErrorWithLineNumber(command, "*, \"-<-logstash-{now/D}>\"", lineNumber, dateMathError);
expectDateMathErrorWithLineNumber(command, "*, -<-logstash-{now/D}>", lineNumber, dateMathError);
expectDateMathErrorWithLineNumber(command, "\"*, -<-logstash-{now/D}>\"", commands.get(command), dateMathError);
expectDateMathErrorWithLineNumber(command, "\"*, -<-logst:ash-{now/D}>\"", commands.get(command), dateMathError);
if (EsqlCapabilities.Cap.INDEX_COMPONENT_SELECTORS.isEnabled()) {
clustersAndIndices(command, "*", "-index#pattern::data");
clustersAndIndices(command, "*", "-index#pattern::data");
clustersAndIndices(command, "*", "-index::data");
clustersAndIndices(command, "*", "-index::failures");
clustersAndIndices(command, "*", "-index*pattern::data");
clustersAndIndices(command, "*", "-index*pattern::failures");
// This is by existing design: refer to the comment in IdentifierBuilder#resolveAndValidateIndex() in the last
// catch clause. If there's an index with a wildcard before an invalid index, we don't error out.
clustersAndIndices(command, "index*", "-index#pattern::data");
clustersAndIndices(command, "*", "-<--logstash-{now/M{yyyy.MM}}>::data");
clustersAndIndices(command, "index*", "-<--logstash#-{now/M{yyyy.MM}}>::data");
expectError(command + "index1,<logstash-{now+-/d}>", "unit [-] not supported for date math [+-/d]");
// Throw on invalid date math
expectDateMathErrorWithLineNumber(
command,
@ -3140,6 +3175,122 @@ public class StatementParserTests extends AbstractStatementParserTests {
assertThat(joinType.coreJoin().joinName(), equalTo("LEFT OUTER"));
}
public void testInvalidFromPatterns() {
var sourceCommands = new String[] { "FROM", "TS" };
var indexIsBlank = "Blank index specified in index pattern";
var remoteIsEmpty = "remote part is empty";
var invalidDoubleColonUsage = "invalid usage of :: separator";
expectError(randomFrom(sourceCommands) + " \"\"", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \" \"", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \",,,\"", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \",,, \"", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \", , ,,\"", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \",,,\",*", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \"*,\"", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \"*,,,\"", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \"index1,,,,\"", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \"index1,index2,,\"", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \"index1,<-+^,index2\",*", "must not contain the following characters");
expectError(randomFrom(sourceCommands) + " \"\",*", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \"*: ,*,\"", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \"*: ,*,\",validIndexName", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \"\", \" \", \" \",validIndexName", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \"index1\", \"index2\", \" ,index3,index4\"", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \"index1,index2,,index3\"", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \"index1,index2, ,index3\"", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \"*, \"", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \"*\", \"\"", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \"*\", \" \"", indexIsBlank);
expectError(randomFrom(sourceCommands) + " \"*\", \":index1\"", remoteIsEmpty);
expectError(randomFrom(sourceCommands) + " \"index1,*,:index2\"", remoteIsEmpty);
expectError(randomFrom(sourceCommands) + " \"*\", \"::data\"", remoteIsEmpty);
expectError(randomFrom(sourceCommands) + " \"*\", \"::failures\"", remoteIsEmpty);
expectError(randomFrom(sourceCommands) + " \"*,index1::\"", invalidDoubleColonUsage);
expectError(randomFrom(sourceCommands) + " \"*\", index1, index2, \"index3:: \"", invalidDoubleColonUsage);
expectError(randomFrom(sourceCommands) + " \"*,index1::*\"", invalidDoubleColonUsage);
}
public void testInvalidPatternsWithIntermittentQuotes() {
// There are 3 ways of crafting invalid index patterns that conforms to the grammar defined through ANTLR.
// 1. Not quoting the pattern,
// 2. Quoting individual patterns ("index1", "index2", ...), and,
// 3. Clubbing all the patterns into a single quoted string ("index1,index2,...).
//
// Note that in these tests, we unquote a pattern and then quote it immediately.
// This is because when randomly generating an index pattern, it may look like: "foo"::data.
// To convert it into a quoted string like "foo::data", we need to unquote and then re-quote it.
// Prohibited char in a quoted cross cluster index pattern should result in an error.
{
var randomIndex = randomIndexPattern();
// Select an invalid char to sneak in.
// Note: some chars like '|' and '"' are excluded to generate a proper invalid name.
Character[] invalidChars = { ' ', '/', '<', '>', '?' };
var randomInvalidChar = randomFrom(invalidChars);
// Construct the new invalid index pattern.
var remoteIndexWithInvalidChar = quote(randomIdentifier() + ":" + "foo" + randomInvalidChar + "bar");
var query = "FROM " + randomIndex + "," + remoteIndexWithInvalidChar;
expectError(query, "must not contain the following characters [' ','\"',',','/','<','>','?','\\','|']");
}
// Colon outside a quoted string should result in an ANTLR error: a comma is expected.
{
var randomIndex = randomIndexPattern();
// In the form of: "*|cluster alias:random string".
var malformedClusterAlias = quote((randomBoolean() ? "*" : randomIdentifier()) + ":" + randomIdentifier());
// We do not generate a cross cluster pattern or else we'd be getting a different error (which is tested in
// the next test).
var remoteIndex = quote(unquoteIndexPattern(randomIndexPattern(without(CROSS_CLUSTER))));
// Format: FROM <some index>, "<cluster alias: random string>":<remote index>
var query = "FROM " + randomIndex + "," + malformedClusterAlias + ":" + remoteIndex;
expectError(query, " mismatched input ':'");
}
// If an explicit cluster string is present, then we expect an unquoted string next.
{
var randomIndex = randomIndexPattern();
var remoteClusterAlias = randomBoolean() ? "*" : randomIdentifier();
// In the form of: random string:random string.
var malformedRemoteIndex = quote(unquoteIndexPattern(randomIndexPattern(CROSS_CLUSTER)));
// Format: FROM <some index>, <cluster alias>:"random string:random string"
var query = "FROM " + randomIndex + "," + remoteClusterAlias + ":" + malformedRemoteIndex;
// Since "random string:random string" is partially quoted, expect a ANTLR's parse error.
expectError(query, "expecting UNQUOTED_SOURCE");
}
if (EsqlCapabilities.Cap.INDEX_COMPONENT_SELECTORS.isEnabled()) {
// If a stream in on a remote and the pattern is entirely quoted, we should be able to validate it.
// Note: invalid selector syntax is covered in a different test.
{
var fromPattern = randomIndexPattern();
var malformedIndexSelectorPattern = quote(
(randomIdentifier()) + ":" + unquoteIndexPattern(randomIndexPattern(INDEX_SELECTOR, without(CROSS_CLUSTER)))
);
// Format: FROM <some index>, "<cluster alias>:<some index>::<data|failures>"
var query = "FROM " + fromPattern + "," + malformedIndexSelectorPattern;
expectError(query, "Selectors are not yet supported on remote cluster patterns");
}
// If a stream in on a remote and the cluster alias and index pattern are separately quoted, we should
// still be able to validate it.
// Note: invalid selector syntax is covered in a different test.
{
var fromPattern = randomIndexPattern();
var malformedIndexSelectorPattern = quote(randomIdentifier())
+ ":"
+ quote(unquoteIndexPattern(randomIndexPattern(INDEX_SELECTOR, without(CROSS_CLUSTER))));
// Format: FROM <some index>, "<cluster alias>":"<some index>::<data|failures>"
var query = "FROM " + fromPattern + "," + malformedIndexSelectorPattern;
// Everything after "<cluster alias>" is extraneous input and hence ANTLR's error.
expectError(query, "mismatched input ':'");
}
}
}
public void testInvalidJoinPatterns() {
assumeTrue("LOOKUP JOIN requires corresponding capability", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
@ -3172,6 +3323,18 @@ public class StatementParserTests extends AbstractStatementParserTests {
// If one or more patterns participating in LOOKUP JOINs are partially quoted, we expect the partial quoting
// error messages to take precedence over any LOOKUP JOIN error messages.
{
// Generate a syntactically invalid (partial quoted) pattern.
var fromPatterns = quote(randomIdentifier()) + ":" + unquoteIndexPattern(randomIndexPattern(without(CROSS_CLUSTER)));
var joinPattern = randomIndexPattern();
expectError(
"FROM " + fromPatterns + " | LOOKUP JOIN " + joinPattern + " ON " + randomIdentifier(),
// Since the from pattern is partially quoted, we get an error at the end of the partially quoted string.
" mismatched input ':'"
);
}
{
// Generate a syntactically invalid (partial quoted) pattern.
var fromPatterns = randomIdentifier() + ":" + quote(randomIndexPatterns(without(CROSS_CLUSTER)));
@ -3184,6 +3347,17 @@ public class StatementParserTests extends AbstractStatementParserTests {
);
}
{
var fromPatterns = randomIndexPattern();
// Generate a syntactically invalid (partial quoted) pattern.
var joinPattern = quote(randomIdentifier()) + ":" + unquoteIndexPattern(randomIndexPattern(without(CROSS_CLUSTER)));
expectError(
"FROM " + fromPatterns + " | LOOKUP JOIN " + joinPattern + " ON " + randomIdentifier(),
// Since the join pattern is partially quoted, we get an error at the end of the partially quoted string.
"mismatched input ':'"
);
}
{
var fromPatterns = randomIndexPattern();
// Generate a syntactically invalid (partial quoted) pattern.
@ -3251,6 +3425,31 @@ public class StatementParserTests extends AbstractStatementParserTests {
+ "], index pattern selectors are not supported in LOOKUP JOIN"
);
}
{
// Although we don't support selector strings for remote indices, it's alright.
// The parser error message takes precedence.
var fromPatterns = randomIndexPatterns();
var joinPattern = quote(randomIdentifier()) + "::" + randomFrom("data", "failures");
// After the end of the partially quoted string, i.e. the index name, parser now expects "ON..." and not a selector string.
expectError(
"FROM " + fromPatterns + " | LOOKUP JOIN " + joinPattern + " ON " + randomIdentifier(),
"mismatched input ':' expecting 'on'"
);
}
{
// Although we don't support selector strings for remote indices, it's alright.
// The parser error message takes precedence.
var fromPatterns = randomIndexPatterns();
var joinPattern = randomIdentifier() + "::" + quote(randomFrom("data", "failures"));
// After the index name and "::", parser expects an unquoted string, i.e. the selector string should not be
// partially quoted.
expectError(
"FROM " + fromPatterns + " | LOOKUP JOIN " + joinPattern + " ON " + randomIdentifier(),
" mismatched input ':' expecting UNQUOTED_SOURCE"
);
}
}
}