diff --git a/docs/changelog/122497.yaml b/docs/changelog/122497.yaml new file mode 100644 index 000000000000..46c385ea4ed4 --- /dev/null +++ b/docs/changelog/122497.yaml @@ -0,0 +1,5 @@ +pr: 122497 +summary: Check if index patterns conform to valid format before validation +area: CCS +type: enhancement +issues: [] diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/IdentifierBuilder.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/IdentifierBuilder.java index 91b8606c4030..0a07e06c4c1d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/IdentifierBuilder.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/IdentifierBuilder.java @@ -20,17 +20,20 @@ import org.elasticsearch.xpack.esql.parser.EsqlBaseParser.IdentifierContext; import org.elasticsearch.xpack.esql.parser.EsqlBaseParser.IndexStringContext; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import static org.elasticsearch.cluster.metadata.IndexNameExpressionResolver.SelectorResolver.SELECTOR_SEPARATOR; import static org.elasticsearch.transport.RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR; -import static org.elasticsearch.transport.RemoteClusterAware.isRemoteIndexName; +import static org.elasticsearch.transport.RemoteClusterAware.splitIndexName; import static org.elasticsearch.xpack.esql.core.util.StringUtils.EXCLUSION; import static org.elasticsearch.xpack.esql.core.util.StringUtils.WILDCARD; import static org.elasticsearch.xpack.esql.parser.ParserUtils.source; abstract class IdentifierBuilder extends AbstractBuilder { + private static final String BLANK_INDEX_ERROR_MESSAGE = "Blank index specified in index pattern"; + @Override public String visitIdentifier(IdentifierContext ctx) { return ctx == null ? null : unquoteIdentifier(ctx.QUOTED_IDENTIFIER(), ctx.UNQUOTED_IDENTIFIER()); @@ -88,39 +91,21 @@ abstract class IdentifierBuilder extends AbstractBuilder { String indexPattern = c.unquotedIndexString() != null ? c.unquotedIndexString().getText() : visitIndexString(c.indexString()); String clusterString = visitClusterString(c.clusterString()); String selectorString = visitSelectorString(c.selectorString()); - // skip validating index on remote cluster, because the behavior of remote cluster is not consistent with local cluster - // For example, invalid#index is an invalid index name, however FROM *:invalid#index does not return an error - if (clusterString == null) { - hasSeenStar.set(indexPattern.contains(WILDCARD) || hasSeenStar.get()); - validateIndexPattern(indexPattern, c, hasSeenStar.get()); - // Other instances of Elasticsearch may have differing selectors so only validate selector string if remote cluster - // string is unset - if (selectorString != null) { - try { - // Ensures that the selector provided is one of the valid kinds - IndexNameExpressionResolver.SelectorResolver.validateIndexSelectorString(indexPattern, selectorString); - } catch (InvalidIndexNameException e) { - throw new ParsingException(e, source(c), e.getMessage()); - } - } - } else { - validateClusterString(clusterString, c); - // Do not allow selectors on remote cluster expressions until they are supported - if (selectorString != null) { - throwOnMixingSelectorWithCluster(reassembleIndexName(clusterString, indexPattern, selectorString), c); - } - } + + hasSeenStar.set(hasSeenStar.get() || indexPattern.contains(WILDCARD)); + validate(clusterString, indexPattern, selectorString, c, hasSeenStar.get()); patterns.add(reassembleIndexName(clusterString, indexPattern, selectorString)); }); return Strings.collectionToDelimitedString(patterns, ","); } + private static void throwInvalidIndexNameException(String indexPattern, String message, EsqlBaseParser.IndexPatternContext ctx) { + var ie = new InvalidIndexNameException(indexPattern, message); + throw new ParsingException(ie, source(ctx), ie.getMessage()); + } + private static void throwOnMixingSelectorWithCluster(String indexPattern, EsqlBaseParser.IndexPatternContext c) { - InvalidIndexNameException ie = new InvalidIndexNameException( - indexPattern, - "Selectors are not yet supported on remote cluster patterns" - ); - throw new ParsingException(ie, source(c), ie.getMessage()); + throwInvalidIndexNameException(indexPattern, "Selectors are not yet supported on remote cluster patterns", c); } private static String reassembleIndexName(String clusterString, String indexPattern, String selectorString) { @@ -144,56 +129,193 @@ abstract class IdentifierBuilder extends AbstractBuilder { } } - private static void validateIndexPattern(String indexPattern, EsqlBaseParser.IndexPatternContext ctx, boolean hasSeenStar) { - // multiple index names can be in the same double quote, e.g. indexPattern = "idx1, *, -idx2" - String[] indices = indexPattern.split(","); - boolean hasExclusion = false; - for (String index : indices) { - // Strip spaces off first because validation checks are not written to handle them - index = index.strip(); - if (isRemoteIndexName(index)) { // skip the validation if there is remote cluster - // Ensure that there are no selectors as they are not yet supported - if (index.contains(SELECTOR_SEPARATOR)) { - throwOnMixingSelectorWithCluster(index, ctx); - } - continue; + /** + * Takes the parsed constituent strings and validates them. + * @param clusterString Name of the remote cluster. Can be null. + * @param indexPattern Name of the index or pattern; can also have multiple patterns in case of quoting, + * e.g. {@code FROM """index*,-index1"""}. + * @param selectorString Selector string, i.e. "::data" or "::failures". Can be null. + * @param ctx Index Pattern Context for generating error messages with offsets. + * @param hasSeenStar If we've seen an asterisk so far. + */ + private static void validate( + String clusterString, + String indexPattern, + String selectorString, + EsqlBaseParser.IndexPatternContext ctx, + boolean hasSeenStar + ) { + /* + * At this point, only 3 formats are possible: + * "index_pattern(s)", + * remote:index_pattern, and, + * index_pattern::selector_string. + * + * The grammar prohibits remote:"index_pattern(s)" or "index_pattern(s)"::selector_string as they're + * partially quoted. So if either of cluster string or selector string are present, there's no need + * to split the pattern by comma since comma requires partial quoting. + */ + + String[] patterns; + if (clusterString == null && selectorString == null) { + // Pattern could be quoted or is singular like "index_name". + patterns = indexPattern.split(",", -1); + } else { + // Either of cluster string or selector string is present. Pattern is unquoted. + patterns = new String[] { indexPattern }; + } + + patterns = Arrays.stream(patterns).map(String::strip).toArray(String[]::new); + if (Arrays.stream(patterns).anyMatch(String::isBlank)) { + throwInvalidIndexNameException(indexPattern, BLANK_INDEX_ERROR_MESSAGE, ctx); + } + + // Edge case: happens when all the index names in a pattern are empty like "FROM ",,,,,"". + if (patterns.length == 0) { + throwInvalidIndexNameException(indexPattern, BLANK_INDEX_ERROR_MESSAGE, ctx); + } else if (patterns.length == 1) { + // Pattern is either an unquoted string or a quoted string with a single index (no comma sep). + validateSingleIndexPattern(clusterString, patterns[0], selectorString, ctx, hasSeenStar); + } else { + /* + * Presence of multiple patterns requires a comma and comma requires quoting. If quoting is present, + * cluster string and selector string cannot be present; they need to be attached within the quoting. + * So we attempt to extract them later. + */ + for (String pattern : patterns) { + validateSingleIndexPattern(null, pattern, null, ctx, hasSeenStar); } + } + } + + /** + * Validates the constituent strings. Will extract the cluster string and/or selector string from the index + * name if clubbed together inside a quoted string. + * + * @param clusterString Name of the remote cluster. Can be null. + * @param indexName Name of the index. + * @param selectorString Selector string, i.e. "::data" or "::failures". Can be null. + * @param ctx Index Pattern Context for generating error messages with offsets. + * @param hasSeenStar If we've seen an asterisk so far. + */ + private static void validateSingleIndexPattern( + String clusterString, + String indexName, + String selectorString, + EsqlBaseParser.IndexPatternContext ctx, + boolean hasSeenStar + ) { + indexName = indexName.strip(); + + /* + * Precedence: + * 1. Cannot mix cluster and selector strings. + * 2. Cluster string must be valid. + * 3. Index name must be valid. + * 4. Selector string must be valid. + * + * Since cluster string and/or selector string can be clubbed with the index name, we must try to + * manually extract them before we attempt to do #2, #3, and #4. + */ + + // It is possible to specify a pattern like "remote_cluster:index_name". Try to extract such details from the index string. + if (clusterString == null && selectorString == null) { try { - Tuple splitPattern = IndexNameExpressionResolver.splitSelectorExpression(index); - if (splitPattern.v2() != null) { - index = splitPattern.v1(); + var split = splitIndexName(indexName); + clusterString = split[0]; + indexName = split[1]; + } catch (IllegalArgumentException e) { + throw new ParsingException(e, source(ctx), e.getMessage()); + } + } + + // At the moment, selector strings for remote indices is not allowed. + if (clusterString != null && selectorString != null) { + throwOnMixingSelectorWithCluster(reassembleIndexName(clusterString, indexName, selectorString), ctx); + } + + // Validation in the right precedence. + if (clusterString != null) { + clusterString = clusterString.strip(); + validateClusterString(clusterString, ctx); + } + + /* + * It is possible for selector string to be attached to the index: "index_name::selector_string". + * Try to extract the selector string. + */ + try { + Tuple splitPattern = IndexNameExpressionResolver.splitSelectorExpression(indexName); + if (splitPattern.v2() != null) { + // Cluster string too was clubbed with the index name like selector string. + if (clusterString != null) { + throwOnMixingSelectorWithCluster(reassembleIndexName(clusterString, splitPattern.v1(), splitPattern.v2()), ctx); + } else { + // We've seen a selectorString. Use it. + selectorString = splitPattern.v2(); } + } + + indexName = splitPattern.v1(); + } catch (InvalidIndexNameException e) { + throw new ParsingException(e, source(ctx), e.getMessage()); + } + + resolveAndValidateIndex(indexName, ctx, hasSeenStar); + if (selectorString != null) { + selectorString = selectorString.strip(); + try { + // Ensures that the selector provided is one of the valid kinds. + IndexNameExpressionResolver.SelectorResolver.validateIndexSelectorString(indexName, selectorString); } catch (InvalidIndexNameException e) { - // throws exception if the selector expression is invalid. Selector resolution does not complain about exclusions throw new ParsingException(e, source(ctx), e.getMessage()); } - hasSeenStar = index.contains(WILDCARD) || hasSeenStar; - index = index.replace(WILDCARD, "").strip(); - if (index.isBlank()) { - continue; + } + } + + private static void resolveAndValidateIndex(String index, EsqlBaseParser.IndexPatternContext ctx, boolean hasSeenStar) { + // If index name is blank without any replacements, it was likely blank right from the beginning and is invalid. + if (index.isBlank()) { + throwInvalidIndexNameException(index, BLANK_INDEX_ERROR_MESSAGE, ctx); + } + + hasSeenStar = hasSeenStar || index.contains(WILDCARD); + index = index.replace(WILDCARD, "").strip(); + if (index.isBlank()) { + return; + } + var hasExclusion = index.startsWith(EXCLUSION); + index = removeExclusion(index); + String tempName; + try { + // remove the exclusion outside of <>, from index names with DateMath expression, + // e.g. -<-logstash-{now/d}> becomes <-logstash-{now/d}> before calling resolveDateMathExpression + tempName = IndexNameExpressionResolver.resolveDateMathExpression(index); + } catch (ElasticsearchParseException e) { + // throws exception if the DateMath expression is invalid, resolveDateMathExpression does not complain about exclusions + throw new ParsingException(e, source(ctx), e.getMessage()); + } + hasExclusion = tempName.startsWith(EXCLUSION) || hasExclusion; + index = tempName.equals(index) ? index : removeExclusion(tempName); + try { + MetadataCreateIndexService.validateIndexOrAliasName(index, InvalidIndexNameException::new); + } catch (InvalidIndexNameException e) { + // ignore invalid index name if it has exclusions and there is an index with wildcard before it + if (hasSeenStar && hasExclusion) { + return; } - hasExclusion = index.startsWith(EXCLUSION); - index = removeExclusion(index); - String tempName; - try { - // remove the exclusion outside of <>, from index names with DateMath expression, - // e.g. -<-logstash-{now/d}> becomes <-logstash-{now/d}> before calling resolveDateMathExpression - tempName = IndexNameExpressionResolver.resolveDateMathExpression(index); - } catch (ElasticsearchParseException e) { - // throws exception if the DateMath expression is invalid, resolveDateMathExpression does not complain about exclusions - throw new ParsingException(e, source(ctx), e.getMessage()); - } - hasExclusion = tempName.startsWith(EXCLUSION) || hasExclusion; - index = tempName.equals(index) ? index : removeExclusion(tempName); - try { - MetadataCreateIndexService.validateIndexOrAliasName(index, InvalidIndexNameException::new); - } catch (InvalidIndexNameException e) { - // ignore invalid index name if it has exclusions and there is an index with wildcard before it - if (hasSeenStar && hasExclusion) { - continue; - } - throw new ParsingException(e, source(ctx), e.getMessage()); + + InvalidIndexNameException errorToThrow = e; + /* + * We only modify this particular message because it mentions '*' as an invalid char. + * However, we do allow asterisk in the index patterns: wildcarded patterns. Let's not + * mislead the user by mentioning this char in the error message. + */ + if (e.getMessage().contains("must not contain the following characters")) { + errorToThrow = new InvalidIndexNameException(index, e.getMessage().replace("'*',", "")); } + + throw new ParsingException(errorToThrow, source(ctx), errorToThrow.getMessage()); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java index cb5c53193949..108059ac2c88 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java @@ -472,10 +472,12 @@ public class StatementParserTests extends AbstractStatementParserTests { "foo, test-*, abc, xyz", test123 """); assertStringAsIndexPattern("foo,test,xyz", command + " foo, test,xyz"); + assertStringAsIndexPattern("", command + " "); assertStringAsIndexPattern( ",", command + " , \"\"" ); + assertStringAsIndexPattern("", command + " \"\""); assertStringAsIndexPattern( "-,-<-logstash-{now/M{yyyy.MM}}>," + "-,-<-logstash-{now/d{yyyy.MM.dd|+12:00}}>", @@ -494,18 +496,30 @@ public class StatementParserTests extends AbstractStatementParserTests { lineNumber, "mismatched input '\"index|pattern\"' expecting UNQUOTED_SOURCE" ); - assertStringAsIndexPattern("*:index|pattern", command + " \"*:index|pattern\""); + // Entire index pattern is quoted. So it's not a parse error but a semantic error where the index name + // is invalid. + expectError(command + " \"*:index|pattern\"", "Invalid index name [index|pattern], must not contain the following characters"); clusterAndIndexAsIndexPattern(command, "cluster:index"); clusterAndIndexAsIndexPattern(command, "cluster:.index"); clusterAndIndexAsIndexPattern(command, "cluster*:index*"); - clusterAndIndexAsIndexPattern(command, "cluster*:*");// this is not a valid pattern, * should be inside <> - clusterAndIndexAsIndexPattern(command, "cluster*:"); + clusterAndIndexAsIndexPattern(command, "cluster*:*"); clusterAndIndexAsIndexPattern(command, "cluster*:*"); clusterAndIndexAsIndexPattern(command, "*:index*"); clusterAndIndexAsIndexPattern(command, "*:*"); + expectError( + command + " \"cluster:index|pattern\"", + "Invalid index name [index|pattern], must not contain the following characters" + ); + expectError(command + " *:\"index|pattern\"", "expecting UNQUOTED_SOURCE"); if (EsqlCapabilities.Cap.INDEX_COMPONENT_SELECTORS.isEnabled()) { assertStringAsIndexPattern("foo::data", command + " foo::data"); assertStringAsIndexPattern("foo::failures", command + " foo::failures"); + expectErrorWithLineNumber( + command + " *,\"-foo\"::data", + "*,-foo::data", + lineNumber, + "mismatched input '::' expecting {, '|', ',', 'metadata'}" + ); expectErrorWithLineNumber( command + " cluster:\"foo::data\"", " cluster:\"foo::data\"", @@ -585,6 +599,7 @@ public class StatementParserTests extends AbstractStatementParserTests { Map commands = new HashMap<>(); commands.put("FROM {}", "line 1:6: "); if (Build.current().isSnapshot()) { + commands.put("TS {}", "line 1:4: "); commands.put("ROW x = 1 | LOOKUP_🐔 {} ON j", "line 1:22: "); } String lineNumber; @@ -625,7 +640,11 @@ public class StatementParserTests extends AbstractStatementParserTests { expectInvalidIndexNameErrorWithLineNumber(command, "index::failure", lineNumber); // Cluster name cannot be combined with selector yet. - var parseLineNumber = command.contains("FROM") ? 6 : 9; + int parseLineNumber = 6; + if (command.startsWith("TS")) { + parseLineNumber = 4; + } + expectDoubleColonErrorWithLineNumber(command, "cluster:foo::data", parseLineNumber + 11); expectDoubleColonErrorWithLineNumber(command, "cluster:foo::failures", parseLineNumber + 11); @@ -633,19 +652,18 @@ public class StatementParserTests extends AbstractStatementParserTests { expectErrorWithLineNumber( command, "cluster:\"foo\"::data", - "line 1:14: ", + command.startsWith("FROM") ? "line 1:14: " : "line 1:12: ", "mismatched input '\"foo\"' expecting UNQUOTED_SOURCE" ); expectErrorWithLineNumber( command, "cluster:\"foo\"::failures", - "line 1:14: ", + command.startsWith("FROM") ? "line 1:14: " : "line 1:12: ", "mismatched input '\"foo\"' expecting UNQUOTED_SOURCE" ); - // TODO: Edge case that will be invalidated in follow up (https://github.com/elastic/elasticsearch/issues/122651) - // expectDoubleColonErrorWithLineNumber(command, "\"cluster:foo\"::data", parseLineNumber + 13); - // expectDoubleColonErrorWithLineNumber(command, "\"cluster:foo\"::failures", parseLineNumber + 13); + expectDoubleColonErrorWithLineNumber(command, "\"cluster:foo\"::data", parseLineNumber + 13); + expectDoubleColonErrorWithLineNumber(command, "\"cluster:foo\"::failures", parseLineNumber + 13); expectErrorWithLineNumber( command, @@ -689,7 +707,7 @@ public class StatementParserTests extends AbstractStatementParserTests { expectErrorWithLineNumber( command, "cluster:\"index,index2\"::failures", - "line 1:14: ", + command.startsWith("FROM") ? "line 1:14: " : "line 1:12: ", "mismatched input '\"index,index2\"' expecting UNQUOTED_SOURCE" ); } @@ -746,16 +764,33 @@ public class StatementParserTests extends AbstractStatementParserTests { clustersAndIndices(command, "index*", "-index#pattern"); clustersAndIndices(command, "*", "-<--logstash-{now/M{yyyy.MM}}>"); clustersAndIndices(command, "index*", "-<--logstash#-{now/M{yyyy.MM}}>"); + expectInvalidIndexNameErrorWithLineNumber(command, "*, index#pattern", lineNumber, "index#pattern", "must not contain '#'"); + expectInvalidIndexNameErrorWithLineNumber( + command, + "index*, index#pattern", + indexStarLineNumber, + "index#pattern", + "must not contain '#'" + ); + expectDateMathErrorWithLineNumber(command, "cluster*:", commands.get(command), dateMathError); expectDateMathErrorWithLineNumber(command, "*, \"-<-logstash-{now/D}>\"", lineNumber, dateMathError); expectDateMathErrorWithLineNumber(command, "*, -<-logstash-{now/D}>", lineNumber, dateMathError); expectDateMathErrorWithLineNumber(command, "\"*, -<-logstash-{now/D}>\"", commands.get(command), dateMathError); expectDateMathErrorWithLineNumber(command, "\"*, -<-logst:ash-{now/D}>\"", commands.get(command), dateMathError); if (EsqlCapabilities.Cap.INDEX_COMPONENT_SELECTORS.isEnabled()) { - clustersAndIndices(command, "*", "-index#pattern::data"); - clustersAndIndices(command, "*", "-index#pattern::data"); + clustersAndIndices(command, "*", "-index::data"); + clustersAndIndices(command, "*", "-index::failures"); + clustersAndIndices(command, "*", "-index*pattern::data"); + clustersAndIndices(command, "*", "-index*pattern::failures"); + + // This is by existing design: refer to the comment in IdentifierBuilder#resolveAndValidateIndex() in the last + // catch clause. If there's an index with a wildcard before an invalid index, we don't error out. clustersAndIndices(command, "index*", "-index#pattern::data"); clustersAndIndices(command, "*", "-<--logstash-{now/M{yyyy.MM}}>::data"); clustersAndIndices(command, "index*", "-<--logstash#-{now/M{yyyy.MM}}>::data"); + + expectError(command + "index1,", "unit [-] not supported for date math [+-/d]"); + // Throw on invalid date math expectDateMathErrorWithLineNumber( command, @@ -3140,6 +3175,122 @@ public class StatementParserTests extends AbstractStatementParserTests { assertThat(joinType.coreJoin().joinName(), equalTo("LEFT OUTER")); } + public void testInvalidFromPatterns() { + var sourceCommands = new String[] { "FROM", "TS" }; + var indexIsBlank = "Blank index specified in index pattern"; + var remoteIsEmpty = "remote part is empty"; + var invalidDoubleColonUsage = "invalid usage of :: separator"; + + expectError(randomFrom(sourceCommands) + " \"\"", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \" \"", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \",,,\"", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \",,, \"", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \", , ,,\"", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \",,,\",*", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \"*,\"", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \"*,,,\"", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \"index1,,,,\"", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \"index1,index2,,\"", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \"index1,<-+^,index2\",*", "must not contain the following characters"); + expectError(randomFrom(sourceCommands) + " \"\",*", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \"*: ,*,\"", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \"*: ,*,\",validIndexName", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \"\", \" \", \" \",validIndexName", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \"index1\", \"index2\", \" ,index3,index4\"", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \"index1,index2,,index3\"", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \"index1,index2, ,index3\"", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \"*, \"", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \"*\", \"\"", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \"*\", \" \"", indexIsBlank); + expectError(randomFrom(sourceCommands) + " \"*\", \":index1\"", remoteIsEmpty); + expectError(randomFrom(sourceCommands) + " \"index1,*,:index2\"", remoteIsEmpty); + expectError(randomFrom(sourceCommands) + " \"*\", \"::data\"", remoteIsEmpty); + expectError(randomFrom(sourceCommands) + " \"*\", \"::failures\"", remoteIsEmpty); + expectError(randomFrom(sourceCommands) + " \"*,index1::\"", invalidDoubleColonUsage); + expectError(randomFrom(sourceCommands) + " \"*\", index1, index2, \"index3:: \"", invalidDoubleColonUsage); + expectError(randomFrom(sourceCommands) + " \"*,index1::*\"", invalidDoubleColonUsage); + } + + public void testInvalidPatternsWithIntermittentQuotes() { + // There are 3 ways of crafting invalid index patterns that conforms to the grammar defined through ANTLR. + // 1. Not quoting the pattern, + // 2. Quoting individual patterns ("index1", "index2", ...), and, + // 3. Clubbing all the patterns into a single quoted string ("index1,index2,...). + // + // Note that in these tests, we unquote a pattern and then quote it immediately. + // This is because when randomly generating an index pattern, it may look like: "foo"::data. + // To convert it into a quoted string like "foo::data", we need to unquote and then re-quote it. + + // Prohibited char in a quoted cross cluster index pattern should result in an error. + { + var randomIndex = randomIndexPattern(); + // Select an invalid char to sneak in. + // Note: some chars like '|' and '"' are excluded to generate a proper invalid name. + Character[] invalidChars = { ' ', '/', '<', '>', '?' }; + var randomInvalidChar = randomFrom(invalidChars); + + // Construct the new invalid index pattern. + var remoteIndexWithInvalidChar = quote(randomIdentifier() + ":" + "foo" + randomInvalidChar + "bar"); + var query = "FROM " + randomIndex + "," + remoteIndexWithInvalidChar; + expectError(query, "must not contain the following characters [' ','\"',',','/','<','>','?','\\','|']"); + } + + // Colon outside a quoted string should result in an ANTLR error: a comma is expected. + { + var randomIndex = randomIndexPattern(); + + // In the form of: "*|cluster alias:random string". + var malformedClusterAlias = quote((randomBoolean() ? "*" : randomIdentifier()) + ":" + randomIdentifier()); + + // We do not generate a cross cluster pattern or else we'd be getting a different error (which is tested in + // the next test). + var remoteIndex = quote(unquoteIndexPattern(randomIndexPattern(without(CROSS_CLUSTER)))); + // Format: FROM , "": + var query = "FROM " + randomIndex + "," + malformedClusterAlias + ":" + remoteIndex; + expectError(query, " mismatched input ':'"); + } + + // If an explicit cluster string is present, then we expect an unquoted string next. + { + var randomIndex = randomIndexPattern(); + var remoteClusterAlias = randomBoolean() ? "*" : randomIdentifier(); + // In the form of: random string:random string. + var malformedRemoteIndex = quote(unquoteIndexPattern(randomIndexPattern(CROSS_CLUSTER))); + // Format: FROM , :"random string:random string" + var query = "FROM " + randomIndex + "," + remoteClusterAlias + ":" + malformedRemoteIndex; + // Since "random string:random string" is partially quoted, expect a ANTLR's parse error. + expectError(query, "expecting UNQUOTED_SOURCE"); + } + + if (EsqlCapabilities.Cap.INDEX_COMPONENT_SELECTORS.isEnabled()) { + // If a stream in on a remote and the pattern is entirely quoted, we should be able to validate it. + // Note: invalid selector syntax is covered in a different test. + { + var fromPattern = randomIndexPattern(); + var malformedIndexSelectorPattern = quote( + (randomIdentifier()) + ":" + unquoteIndexPattern(randomIndexPattern(INDEX_SELECTOR, without(CROSS_CLUSTER))) + ); + // Format: FROM , ":::" + var query = "FROM " + fromPattern + "," + malformedIndexSelectorPattern; + expectError(query, "Selectors are not yet supported on remote cluster patterns"); + } + + // If a stream in on a remote and the cluster alias and index pattern are separately quoted, we should + // still be able to validate it. + // Note: invalid selector syntax is covered in a different test. + { + var fromPattern = randomIndexPattern(); + var malformedIndexSelectorPattern = quote(randomIdentifier()) + + ":" + + quote(unquoteIndexPattern(randomIndexPattern(INDEX_SELECTOR, without(CROSS_CLUSTER)))); + // Format: FROM , "":"::" + var query = "FROM " + fromPattern + "," + malformedIndexSelectorPattern; + // Everything after "" is extraneous input and hence ANTLR's error. + expectError(query, "mismatched input ':'"); + } + } + } + public void testInvalidJoinPatterns() { assumeTrue("LOOKUP JOIN requires corresponding capability", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); @@ -3172,6 +3323,18 @@ public class StatementParserTests extends AbstractStatementParserTests { // If one or more patterns participating in LOOKUP JOINs are partially quoted, we expect the partial quoting // error messages to take precedence over any LOOKUP JOIN error messages. + + { + // Generate a syntactically invalid (partial quoted) pattern. + var fromPatterns = quote(randomIdentifier()) + ":" + unquoteIndexPattern(randomIndexPattern(without(CROSS_CLUSTER))); + var joinPattern = randomIndexPattern(); + expectError( + "FROM " + fromPatterns + " | LOOKUP JOIN " + joinPattern + " ON " + randomIdentifier(), + // Since the from pattern is partially quoted, we get an error at the end of the partially quoted string. + " mismatched input ':'" + ); + } + { // Generate a syntactically invalid (partial quoted) pattern. var fromPatterns = randomIdentifier() + ":" + quote(randomIndexPatterns(without(CROSS_CLUSTER))); @@ -3184,6 +3347,17 @@ public class StatementParserTests extends AbstractStatementParserTests { ); } + { + var fromPatterns = randomIndexPattern(); + // Generate a syntactically invalid (partial quoted) pattern. + var joinPattern = quote(randomIdentifier()) + ":" + unquoteIndexPattern(randomIndexPattern(without(CROSS_CLUSTER))); + expectError( + "FROM " + fromPatterns + " | LOOKUP JOIN " + joinPattern + " ON " + randomIdentifier(), + // Since the join pattern is partially quoted, we get an error at the end of the partially quoted string. + "mismatched input ':'" + ); + } + { var fromPatterns = randomIndexPattern(); // Generate a syntactically invalid (partial quoted) pattern. @@ -3251,6 +3425,31 @@ public class StatementParserTests extends AbstractStatementParserTests { + "], index pattern selectors are not supported in LOOKUP JOIN" ); } + + { + // Although we don't support selector strings for remote indices, it's alright. + // The parser error message takes precedence. + var fromPatterns = randomIndexPatterns(); + var joinPattern = quote(randomIdentifier()) + "::" + randomFrom("data", "failures"); + // After the end of the partially quoted string, i.e. the index name, parser now expects "ON..." and not a selector string. + expectError( + "FROM " + fromPatterns + " | LOOKUP JOIN " + joinPattern + " ON " + randomIdentifier(), + "mismatched input ':' expecting 'on'" + ); + } + + { + // Although we don't support selector strings for remote indices, it's alright. + // The parser error message takes precedence. + var fromPatterns = randomIndexPatterns(); + var joinPattern = randomIdentifier() + "::" + quote(randomFrom("data", "failures")); + // After the index name and "::", parser expects an unquoted string, i.e. the selector string should not be + // partially quoted. + expectError( + "FROM " + fromPatterns + " | LOOKUP JOIN " + joinPattern + " ON " + randomIdentifier(), + " mismatched input ':' expecting UNQUOTED_SOURCE" + ); + } } }