Pushdown for LIKE (LIST) (#129557)

Improved performance of LIKE (LIST)  by pushing an Automaton to do the evaluation down to Lucine.
This commit is contained in:
Julian Kiryakov 2025-06-23 14:35:09 -04:00 committed by GitHub
parent e6347b8ab0
commit caae426cf7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 189 additions and 12 deletions

View file

@ -0,0 +1,5 @@
pr: 129557
summary: Pushdown for LIKE (LIST)
area: ES|QL
type: enhancement
issues: []

View file

@ -0,0 +1,104 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.index.query;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.automaton.Automaton;
import org.elasticsearch.TransportVersion;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.xcontent.XContentBuilder;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Objects;
/**
* Implements an Automaton query, which matches documents based on a Lucene Automaton.
* It does not support serialization or XContent representation.
*/
public class AutomatonQueryBuilder extends AbstractQueryBuilder<AutomatonQueryBuilder> implements MultiTermQueryBuilder {
private final String fieldName;
private final Automaton automaton;
private final String description;
public AutomatonQueryBuilder(String fieldName, Automaton automaton, String description) {
if (Strings.isEmpty(fieldName)) {
throw new IllegalArgumentException("field name is null or empty");
}
if (automaton == null) {
throw new IllegalArgumentException("automaton cannot be null");
}
this.fieldName = fieldName;
this.automaton = automaton;
this.description = description;
}
@Override
public String fieldName() {
return fieldName;
}
@Override
public String getWriteableName() {
throw new UnsupportedOperationException("AutomatonQueryBuilder does not support getWriteableName");
}
@Override
protected void doWriteTo(StreamOutput out) throws IOException {
throw new UnsupportedEncodingException("AutomatonQueryBuilder does not support doWriteTo");
}
@Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
throw new UnsupportedEncodingException("AutomatonQueryBuilder does not support doXContent");
}
@Override
protected Query doToQuery(SearchExecutionContext context) throws IOException {
return new AutomatonQueryWithDescription(new Term(fieldName), automaton, description);
}
@Override
protected int doHashCode() {
return Objects.hash(fieldName, automaton, description);
}
@Override
protected boolean doEquals(AutomatonQueryBuilder other) {
return Objects.equals(fieldName, other.fieldName)
&& Objects.equals(automaton, other.automaton)
&& Objects.equals(description, other.description);
}
@Override
public TransportVersion getMinimalSupportedVersion() {
throw new UnsupportedOperationException("AutomatonQueryBuilder does not support getMinimalSupportedVersion");
}
static class AutomatonQueryWithDescription extends AutomatonQuery {
private final String description;
AutomatonQueryWithDescription(Term term, Automaton automaton, String description) {
super(term, automaton);
this.description = description;
}
@Override
public String toString(String field) {
if (this.field.equals(field)) {
return description;
}
return this.field + ":" + description;
}
}
}

View file

@ -0,0 +1,66 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.esql.core.querydsl.query;
import org.apache.lucene.util.automaton.Automaton;
import org.elasticsearch.index.query.AutomatonQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.xpack.esql.core.tree.Source;
import java.util.Objects;
/**
* Query that matches documents based on a Lucene Automaton.
*/
public class AutomatonQuery extends Query {
private final String field;
private final Automaton automaton;
private final String automatonDescription;
public AutomatonQuery(Source source, String field, Automaton automaton, String automatonDescription) {
super(source);
this.field = field;
this.automaton = automaton;
this.automatonDescription = automatonDescription;
}
public String field() {
return field;
}
@Override
protected QueryBuilder asBuilder() {
return new AutomatonQueryBuilder(field, automaton, automatonDescription);
}
@Override
public int hashCode() {
return Objects.hash(field, automaton, automatonDescription);
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
AutomatonQuery other = (AutomatonQuery) obj;
return Objects.equals(field, other.field)
&& Objects.equals(automaton, other.automaton)
&& Objects.equals(automatonDescription, other.automatonDescription);
}
@Override
protected String innerToString() {
return "AutomatonQuery{" + "field='" + field + '\'' + '}';
}
}

View file

@ -264,13 +264,13 @@ public class PushQueriesIT extends ESRestTestCase {
| WHERE test like ("%value*", "abc*") | WHERE test like ("%value*", "abc*")
"""; """;
String luceneQuery = switch (type) { String luceneQuery = switch (type) {
case KEYWORD, CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*"; case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*";
case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]";
case KEYWORD -> "test:LIKE(\"%value*\", \"abc*\"), caseInsensitive=false";
}; };
ComputeSignature dataNodeSignature = switch (type) { ComputeSignature dataNodeSignature = switch (type) {
case CONSTANT_KEYWORD -> ComputeSignature.FILTER_IN_QUERY; case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY;
case AUTO, KEYWORD, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE;
ComputeSignature.FILTER_IN_COMPUTE;
}; };
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true); testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
} }

View file

@ -12,7 +12,9 @@ import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPattern;
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPatternList; import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPatternList;
import org.elasticsearch.xpack.esql.core.querydsl.query.AutomatonQuery;
import org.elasticsearch.xpack.esql.core.querydsl.query.Query; import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery; import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
@ -23,6 +25,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdow
import org.elasticsearch.xpack.esql.planner.TranslatorHandler; import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
import java.io.IOException; import java.io.IOException;
import java.util.stream.Collectors;
public class WildcardLikeList extends RegexMatch<WildcardPatternList> { public class WildcardLikeList extends RegexMatch<WildcardPatternList> {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
@ -89,10 +92,6 @@ public class WildcardLikeList extends RegexMatch<WildcardPatternList> {
*/ */
@Override @Override
public Translatable translatable(LucenePushdownPredicates pushdownPredicates) { public Translatable translatable(LucenePushdownPredicates pushdownPredicates) {
if (pattern().patternList().size() != 1) {
// we only support a single pattern in the list for pushdown for now
return Translatable.NO;
}
return pushdownPredicates.isPushableAttribute(field()) ? Translatable.YES : Translatable.NO; return pushdownPredicates.isPushableAttribute(field()) ? Translatable.YES : Translatable.NO;
} }
@ -113,9 +112,12 @@ public class WildcardLikeList extends RegexMatch<WildcardPatternList> {
* Throws an {@link IllegalArgumentException} if the pattern list contains more than one pattern. * Throws an {@link IllegalArgumentException} if the pattern list contains more than one pattern.
*/ */
private Query translateField(String targetFieldName) { private Query translateField(String targetFieldName) {
if (pattern().patternList().size() != 1) { return new AutomatonQuery(source(), targetFieldName, pattern().createAutomaton(caseInsensitive()), getAutomatonDescription());
throw new IllegalArgumentException("WildcardLikeList can only be translated when it has a single pattern");
} }
return new WildcardQuery(source(), targetFieldName, pattern().patternList().getFirst().asLuceneWildcard(), caseInsensitive());
private String getAutomatonDescription() {
// we use the information used to create the automaton to describe the query here
String patternDesc = pattern().patternList().stream().map(WildcardPattern::pattern).collect(Collectors.joining("\", \""));
return "LIKE(\"" + patternDesc + "\"), caseInsensitive=" + caseInsensitive();
} }
} }