mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 01:22:26 -04:00
Pushdown for LIKE (LIST) (#129557)
Improved performance of LIKE (LIST) by pushing an Automaton to do the evaluation down to Lucine.
This commit is contained in:
parent
e6347b8ab0
commit
caae426cf7
5 changed files with 189 additions and 12 deletions
5
docs/changelog/129557.yaml
Normal file
5
docs/changelog/129557.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 129557
|
||||
summary: Pushdown for LIKE (LIST)
|
||||
area: ES|QL
|
||||
type: enhancement
|
||||
issues: []
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.query;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.AutomatonQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.elasticsearch.TransportVersion;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.xcontent.XContentBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Implements an Automaton query, which matches documents based on a Lucene Automaton.
|
||||
* It does not support serialization or XContent representation.
|
||||
*/
|
||||
public class AutomatonQueryBuilder extends AbstractQueryBuilder<AutomatonQueryBuilder> implements MultiTermQueryBuilder {
|
||||
private final String fieldName;
|
||||
private final Automaton automaton;
|
||||
private final String description;
|
||||
|
||||
public AutomatonQueryBuilder(String fieldName, Automaton automaton, String description) {
|
||||
if (Strings.isEmpty(fieldName)) {
|
||||
throw new IllegalArgumentException("field name is null or empty");
|
||||
}
|
||||
if (automaton == null) {
|
||||
throw new IllegalArgumentException("automaton cannot be null");
|
||||
}
|
||||
this.fieldName = fieldName;
|
||||
this.automaton = automaton;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String fieldName() {
|
||||
return fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getWriteableName() {
|
||||
throw new UnsupportedOperationException("AutomatonQueryBuilder does not support getWriteableName");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doWriteTo(StreamOutput out) throws IOException {
|
||||
throw new UnsupportedEncodingException("AutomatonQueryBuilder does not support doWriteTo");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
throw new UnsupportedEncodingException("AutomatonQueryBuilder does not support doXContent");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query doToQuery(SearchExecutionContext context) throws IOException {
|
||||
return new AutomatonQueryWithDescription(new Term(fieldName), automaton, description);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int doHashCode() {
|
||||
return Objects.hash(fieldName, automaton, description);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean doEquals(AutomatonQueryBuilder other) {
|
||||
return Objects.equals(fieldName, other.fieldName)
|
||||
&& Objects.equals(automaton, other.automaton)
|
||||
&& Objects.equals(description, other.description);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TransportVersion getMinimalSupportedVersion() {
|
||||
throw new UnsupportedOperationException("AutomatonQueryBuilder does not support getMinimalSupportedVersion");
|
||||
}
|
||||
|
||||
static class AutomatonQueryWithDescription extends AutomatonQuery {
|
||||
private final String description;
|
||||
|
||||
AutomatonQueryWithDescription(Term term, Automaton automaton, String description) {
|
||||
super(term, automaton);
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
if (this.field.equals(field)) {
|
||||
return description;
|
||||
}
|
||||
return this.field + ":" + description;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
package org.elasticsearch.xpack.esql.core.querydsl.query;
|
||||
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.elasticsearch.index.query.AutomatonQueryBuilder;
|
||||
import org.elasticsearch.index.query.QueryBuilder;
|
||||
import org.elasticsearch.xpack.esql.core.tree.Source;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Query that matches documents based on a Lucene Automaton.
|
||||
*/
|
||||
public class AutomatonQuery extends Query {
|
||||
|
||||
private final String field;
|
||||
private final Automaton automaton;
|
||||
private final String automatonDescription;
|
||||
|
||||
public AutomatonQuery(Source source, String field, Automaton automaton, String automatonDescription) {
|
||||
super(source);
|
||||
this.field = field;
|
||||
this.automaton = automaton;
|
||||
this.automatonDescription = automatonDescription;
|
||||
}
|
||||
|
||||
public String field() {
|
||||
return field;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected QueryBuilder asBuilder() {
|
||||
return new AutomatonQueryBuilder(field, automaton, automatonDescription);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(field, automaton, automatonDescription);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (obj == null || getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
AutomatonQuery other = (AutomatonQuery) obj;
|
||||
return Objects.equals(field, other.field)
|
||||
&& Objects.equals(automaton, other.automaton)
|
||||
&& Objects.equals(automatonDescription, other.automatonDescription);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String innerToString() {
|
||||
return "AutomatonQuery{" + "field='" + field + '\'' + '}';
|
||||
}
|
||||
}
|
|
@ -264,13 +264,13 @@ public class PushQueriesIT extends ESRestTestCase {
|
|||
| WHERE test like ("%value*", "abc*")
|
||||
""";
|
||||
String luceneQuery = switch (type) {
|
||||
case KEYWORD, CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*";
|
||||
case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*";
|
||||
case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]";
|
||||
case KEYWORD -> "test:LIKE(\"%value*\", \"abc*\"), caseInsensitive=false";
|
||||
};
|
||||
ComputeSignature dataNodeSignature = switch (type) {
|
||||
case CONSTANT_KEYWORD -> ComputeSignature.FILTER_IN_QUERY;
|
||||
case AUTO, KEYWORD, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD ->
|
||||
ComputeSignature.FILTER_IN_COMPUTE;
|
||||
case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY;
|
||||
case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE;
|
||||
};
|
||||
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
|
||||
}
|
||||
|
|
|
@ -12,7 +12,9 @@ import org.elasticsearch.common.io.stream.StreamInput;
|
|||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.xpack.esql.core.expression.Expression;
|
||||
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
|
||||
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPattern;
|
||||
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPatternList;
|
||||
import org.elasticsearch.xpack.esql.core.querydsl.query.AutomatonQuery;
|
||||
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
|
||||
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
|
||||
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
|
||||
|
@ -23,6 +25,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdow
|
|||
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class WildcardLikeList extends RegexMatch<WildcardPatternList> {
|
||||
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
|
||||
|
@ -89,10 +92,6 @@ public class WildcardLikeList extends RegexMatch<WildcardPatternList> {
|
|||
*/
|
||||
@Override
|
||||
public Translatable translatable(LucenePushdownPredicates pushdownPredicates) {
|
||||
if (pattern().patternList().size() != 1) {
|
||||
// we only support a single pattern in the list for pushdown for now
|
||||
return Translatable.NO;
|
||||
}
|
||||
return pushdownPredicates.isPushableAttribute(field()) ? Translatable.YES : Translatable.NO;
|
||||
|
||||
}
|
||||
|
@ -113,9 +112,12 @@ public class WildcardLikeList extends RegexMatch<WildcardPatternList> {
|
|||
* Throws an {@link IllegalArgumentException} if the pattern list contains more than one pattern.
|
||||
*/
|
||||
private Query translateField(String targetFieldName) {
|
||||
if (pattern().patternList().size() != 1) {
|
||||
throw new IllegalArgumentException("WildcardLikeList can only be translated when it has a single pattern");
|
||||
}
|
||||
return new WildcardQuery(source(), targetFieldName, pattern().patternList().getFirst().asLuceneWildcard(), caseInsensitive());
|
||||
return new AutomatonQuery(source(), targetFieldName, pattern().createAutomaton(caseInsensitive()), getAutomatonDescription());
|
||||
}
|
||||
|
||||
private String getAutomatonDescription() {
|
||||
// we use the information used to create the automaton to describe the query here
|
||||
String patternDesc = pattern().patternList().stream().map(WildcardPattern::pattern).collect(Collectors.joining("\", \""));
|
||||
return "LIKE(\"" + patternDesc + "\"), caseInsensitive=" + caseInsensitive();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue