Rewrite match and match_phrase queries to term queries on keyword fields (#82612)

Term queries can in certain circumstances (eg when run against constant keyword
fields) rewrite themselves to match_no_docs queries, which is very useful for filtering
out shards from searches and field_caps requests. But match and match_phrase
queries can reduce down to simple term queries when there is no fuzziness defined
on them, and when they are run using a keyword analyzer.

This commit makes simple match and match_phrase rewrite themselves to term
queries when run against keyword fields.

Fixes #82515
This commit is contained in:
Alan Woodward 2022-01-17 17:02:07 +00:00 committed by GitHub
parent f287852424
commit 2d77ef57cf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 177 additions and 14 deletions

View file

@ -63,26 +63,26 @@ public class QueryBuilderBWCIT extends AbstractFullClusterRestartTestCase {
static { static {
addCandidate(""" addCandidate("""
"match": { "keyword_field": "value"} "match": { "text_field": "value"}
""", new MatchQueryBuilder("keyword_field", "value")); """, new MatchQueryBuilder("text_field", "value"));
addCandidate(""" addCandidate("""
"match": { "keyword_field": {"query": "value", "operator": "and"} } "match": { "text_field": {"query": "value", "operator": "and"} }
""", new MatchQueryBuilder("keyword_field", "value").operator(Operator.AND)); """, new MatchQueryBuilder("text_field", "value").operator(Operator.AND));
addCandidate(""" addCandidate("""
"match": { "keyword_field": {"query": "value", "analyzer": "english"} } "match": { "text_field": {"query": "value", "analyzer": "english"} }
""", new MatchQueryBuilder("keyword_field", "value").analyzer("english")); """, new MatchQueryBuilder("text_field", "value").analyzer("english"));
addCandidate(""" addCandidate("""
"match": { "keyword_field": {"query": "value", "minimum_should_match": 3} } "match": { "text_field": {"query": "value", "minimum_should_match": 3} }
""", new MatchQueryBuilder("keyword_field", "value").minimumShouldMatch("3")); """, new MatchQueryBuilder("text_field", "value").minimumShouldMatch("3"));
addCandidate(""" addCandidate("""
"match": { "keyword_field": {"query": "value", "fuzziness": "auto"} } "match": { "text_field": {"query": "value", "fuzziness": "auto"} }
""", new MatchQueryBuilder("keyword_field", "value").fuzziness(Fuzziness.AUTO)); """, new MatchQueryBuilder("text_field", "value").fuzziness(Fuzziness.AUTO));
addCandidate(""" addCandidate("""
"match_phrase": { "keyword_field": "value"} "match_phrase": { "text_field": "value"}
""", new MatchPhraseQueryBuilder("keyword_field", "value")); """, new MatchPhraseQueryBuilder("text_field", "value"));
addCandidate(""" addCandidate("""
"match_phrase": { "keyword_field": {"query": "value", "slop": 3}} "match_phrase": { "text_field": {"query": "value", "slop": 3}}
""", new MatchPhraseQueryBuilder("keyword_field", "value").slop(3)); """, new MatchPhraseQueryBuilder("text_field", "value").slop(3));
addCandidate(""" addCandidate("""
"range": { "long_field": {"gte": 1, "lte": 9}} "range": { "long_field": {"gte": 1, "lte": 9}}
""", new RangeQueryBuilder("long_field").from(1).to(9)); """, new RangeQueryBuilder("long_field").from(1).to(9));
@ -179,6 +179,11 @@ public class QueryBuilderBWCIT extends AbstractFullClusterRestartTestCase {
mappingsAndSettings.field("type", "keyword"); mappingsAndSettings.field("type", "keyword");
mappingsAndSettings.endObject(); mappingsAndSettings.endObject();
} }
{
mappingsAndSettings.startObject("text_field");
mappingsAndSettings.field("type", "text");
mappingsAndSettings.endObject();
}
{ {
mappingsAndSettings.startObject("long_field"); mappingsAndSettings.startObject("long_field");
mappingsAndSettings.field("type", "long"); mappingsAndSettings.field("type", "long");

View file

@ -8,11 +8,14 @@
package org.elasticsearch.index.query; package org.elasticsearch.index.query;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.search.MatchQueryParser; import org.elasticsearch.index.search.MatchQueryParser;
import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentBuilder;
@ -148,6 +151,35 @@ public class MatchPhraseQueryBuilder extends AbstractQueryBuilder<MatchPhraseQue
builder.endObject(); builder.endObject();
} }
@Override
protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws IOException {
SearchExecutionContext sec = queryRewriteContext.convertToSearchExecutionContext();
if (sec == null) {
return this;
}
// If we're using the default keyword analyzer then we can rewrite this to a TermQueryBuilder
// and possibly shortcut
// If we're using a keyword analyzer then we can rewrite this to a TermQueryBuilder
// and possibly shortcut
NamedAnalyzer configuredAnalyzer = configuredAnalyzer(sec);
if (configuredAnalyzer != null && configuredAnalyzer.analyzer() instanceof KeywordAnalyzer) {
TermQueryBuilder termQueryBuilder = new TermQueryBuilder(fieldName, value);
return termQueryBuilder.rewrite(sec);
}
return this;
}
private NamedAnalyzer configuredAnalyzer(SearchExecutionContext context) {
if (analyzer != null) {
return context.getIndexAnalyzers().get(analyzer);
}
MappedFieldType mft = context.getFieldType(fieldName);
if (mft != null) {
return mft.getTextSearchInfo().getSearchAnalyzer();
}
return null;
}
@Override @Override
protected Query doToQuery(SearchExecutionContext context) throws IOException { protected Query doToQuery(SearchExecutionContext context) throws IOException {
// validate context specific fields // validate context specific fields

View file

@ -8,6 +8,7 @@
package org.elasticsearch.index.query; package org.elasticsearch.index.query;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.elasticsearch.Version; import org.elasticsearch.Version;
@ -18,6 +19,8 @@ import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.core.RestApiVersion;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.support.QueryParsers; import org.elasticsearch.index.query.support.QueryParsers;
import org.elasticsearch.index.search.MatchQueryParser; import org.elasticsearch.index.search.MatchQueryParser;
import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.ParseField;
@ -350,6 +353,37 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
builder.endObject(); builder.endObject();
} }
@Override
protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws IOException {
if (fuzziness != null || lenient) {
// Term queries can be neither fuzzy nor lenient, so don't rewrite under these conditions
return this;
}
SearchExecutionContext sec = queryRewriteContext.convertToSearchExecutionContext();
if (sec == null) {
return this;
}
// If we're using a keyword analyzer then we can rewrite this to a TermQueryBuilder
// and possibly shortcut
NamedAnalyzer configuredAnalyzer = configuredAnalyzer(sec);
if (configuredAnalyzer != null && configuredAnalyzer.analyzer() instanceof KeywordAnalyzer) {
TermQueryBuilder termQueryBuilder = new TermQueryBuilder(fieldName, value);
return termQueryBuilder.rewrite(sec);
}
return this;
}
private NamedAnalyzer configuredAnalyzer(SearchExecutionContext context) {
if (analyzer != null) {
return context.getIndexAnalyzers().get(analyzer);
}
MappedFieldType mft = context.getFieldType(fieldName);
if (mft != null) {
return mft.getTextSearchInfo().getSearchAnalyzer();
}
return null;
}
@Override @Override
protected Query doToQuery(SearchExecutionContext context) throws IOException { protected Query doToQuery(SearchExecutionContext context) throws IOException {
// validate context specific fields // validate context specific fields

View file

@ -16,6 +16,7 @@ import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.ParsingException;
import org.elasticsearch.test.AbstractQueryTestCase; import org.elasticsearch.test.AbstractQueryTestCase;
@ -190,4 +191,39 @@ public class MatchPhraseQueryBuilderTests extends AbstractQueryTestCase<MatchPhr
e = expectThrows(ParsingException.class, () -> parseQuery(shortJson)); e = expectThrows(ParsingException.class, () -> parseQuery(shortJson));
assertEquals("[match_phrase] query doesn't support multiple fields, found [message1] and [message2]", e.getMessage()); assertEquals("[match_phrase] query doesn't support multiple fields, found [message1] and [message2]", e.getMessage());
} }
public void testRewriteToTermQueries() throws IOException {
QueryBuilder queryBuilder = new MatchPhraseQueryBuilder(KEYWORD_FIELD_NAME, "value");
SearchExecutionContext context = createSearchExecutionContext();
QueryBuilder rewritten = queryBuilder.rewrite(context);
assertThat(rewritten, instanceOf(TermQueryBuilder.class));
TermQueryBuilder tqb = (TermQueryBuilder) rewritten;
assertEquals(KEYWORD_FIELD_NAME, tqb.fieldName);
assertEquals(new BytesRef("value"), tqb.value);
}
public void testRewriteToTermQueryWithAnalyzer() throws IOException {
MatchPhraseQueryBuilder queryBuilder = new MatchPhraseQueryBuilder(TEXT_FIELD_NAME, "value");
queryBuilder.analyzer("keyword");
SearchExecutionContext context = createSearchExecutionContext();
QueryBuilder rewritten = queryBuilder.rewrite(context);
assertThat(rewritten, instanceOf(TermQueryBuilder.class));
TermQueryBuilder tqb = (TermQueryBuilder) rewritten;
assertEquals(TEXT_FIELD_NAME, tqb.fieldName);
assertEquals(new BytesRef("value"), tqb.value);
}
public void testRewriteIndexQueryToMatchNone() throws IOException {
QueryBuilder query = new MatchPhraseQueryBuilder("_index", "does_not_exist");
SearchExecutionContext searchExecutionContext = createSearchExecutionContext();
QueryBuilder rewritten = query.rewrite(searchExecutionContext);
assertThat(rewritten, instanceOf(MatchNoneQueryBuilder.class));
}
public void testRewriteIndexQueryToNotMatchNone() throws IOException {
QueryBuilder query = new MatchPhraseQueryBuilder("_index", getIndex().getName());
SearchExecutionContext searchExecutionContext = createSearchExecutionContext();
QueryBuilder rewritten = query.rewrite(searchExecutionContext);
assertThat(rewritten, instanceOf(MatchAllQueryBuilder.class));
}
} }

View file

@ -33,6 +33,7 @@ import org.elasticsearch.common.Strings;
import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.search.MatchQueryParser; import org.elasticsearch.index.search.MatchQueryParser;
@ -574,4 +575,59 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
assertNotNull(rewritten.toQuery(context)); assertNotNull(rewritten.toQuery(context));
assertFalse("query should not be cacheable: " + queryBuilder.toString(), context.isCacheable()); assertFalse("query should not be cacheable: " + queryBuilder.toString(), context.isCacheable());
} }
public void testRewriteToTermQueries() throws IOException {
MatchQueryBuilder queryBuilder = new MatchQueryBuilder(KEYWORD_FIELD_NAME, "value");
queryBuilder.boost(2f);
SearchExecutionContext context = createSearchExecutionContext();
QueryBuilder rewritten = queryBuilder.rewrite(context);
assertThat(rewritten, instanceOf(TermQueryBuilder.class));
TermQueryBuilder tqb = (TermQueryBuilder) rewritten;
assertEquals(KEYWORD_FIELD_NAME, tqb.fieldName);
assertEquals(new BytesRef("value"), tqb.value);
assertThat(rewritten.boost(), equalTo(2f));
}
public void testRewriteToTermQueryWithAnalyzer() throws IOException {
MatchQueryBuilder queryBuilder = new MatchQueryBuilder(TEXT_FIELD_NAME, "value");
queryBuilder.analyzer("keyword");
SearchExecutionContext context = createSearchExecutionContext();
QueryBuilder rewritten = queryBuilder.rewrite(context);
assertThat(rewritten, instanceOf(TermQueryBuilder.class));
TermQueryBuilder tqb = (TermQueryBuilder) rewritten;
assertEquals(TEXT_FIELD_NAME, tqb.fieldName);
assertEquals(new BytesRef("value"), tqb.value);
}
public void testRewriteWithFuzziness() throws IOException {
// If we've configured fuzziness then we can't rewrite to a term query
MatchQueryBuilder queryBuilder = new MatchQueryBuilder(KEYWORD_FIELD_NAME, "value");
queryBuilder.fuzziness(Fuzziness.AUTO);
SearchExecutionContext context = createSearchExecutionContext();
QueryBuilder rewritten = queryBuilder.rewrite(context);
assertEquals(queryBuilder, rewritten);
}
public void testRewriteWithLeniency() throws IOException {
// If we've configured leniency then we can't rewrite to a term query
MatchQueryBuilder queryBuilder = new MatchQueryBuilder(KEYWORD_FIELD_NAME, "value");
queryBuilder.lenient(true);
SearchExecutionContext context = createSearchExecutionContext();
QueryBuilder rewritten = queryBuilder.rewrite(context);
assertEquals(queryBuilder, rewritten);
}
public void testRewriteIndexQueryToMatchNone() throws IOException {
QueryBuilder query = new MatchQueryBuilder("_index", "does_not_exist");
SearchExecutionContext searchExecutionContext = createSearchExecutionContext();
QueryBuilder rewritten = query.rewrite(searchExecutionContext);
assertThat(rewritten, instanceOf(MatchNoneQueryBuilder.class));
}
public void testRewriteIndexQueryToNotMatchNone() throws IOException {
QueryBuilder query = new MatchQueryBuilder("_index", getIndex().getName());
SearchExecutionContext searchExecutionContext = createSearchExecutionContext();
QueryBuilder rewritten = query.rewrite(searchExecutionContext);
assertThat(rewritten, instanceOf(MatchAllQueryBuilder.class));
}
} }