mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-04-24 23:27:25 -04:00
Rewrite match and match_phrase queries to term queries on keyword fields (#82612)
Term queries can in certain circumstances (eg when run against constant keyword fields) rewrite themselves to match_no_docs queries, which is very useful for filtering out shards from searches and field_caps requests. But match and match_phrase queries can reduce down to simple term queries when there is no fuzziness defined on them, and when they are run using a keyword analyzer. This commit makes simple match and match_phrase rewrite themselves to term queries when run against keyword fields. Fixes #82515
This commit is contained in:
parent
f287852424
commit
2d77ef57cf
5 changed files with 177 additions and 14 deletions
|
@ -63,26 +63,26 @@ public class QueryBuilderBWCIT extends AbstractFullClusterRestartTestCase {
|
|||
|
||||
static {
|
||||
addCandidate("""
|
||||
"match": { "keyword_field": "value"}
|
||||
""", new MatchQueryBuilder("keyword_field", "value"));
|
||||
"match": { "text_field": "value"}
|
||||
""", new MatchQueryBuilder("text_field", "value"));
|
||||
addCandidate("""
|
||||
"match": { "keyword_field": {"query": "value", "operator": "and"} }
|
||||
""", new MatchQueryBuilder("keyword_field", "value").operator(Operator.AND));
|
||||
"match": { "text_field": {"query": "value", "operator": "and"} }
|
||||
""", new MatchQueryBuilder("text_field", "value").operator(Operator.AND));
|
||||
addCandidate("""
|
||||
"match": { "keyword_field": {"query": "value", "analyzer": "english"} }
|
||||
""", new MatchQueryBuilder("keyword_field", "value").analyzer("english"));
|
||||
"match": { "text_field": {"query": "value", "analyzer": "english"} }
|
||||
""", new MatchQueryBuilder("text_field", "value").analyzer("english"));
|
||||
addCandidate("""
|
||||
"match": { "keyword_field": {"query": "value", "minimum_should_match": 3} }
|
||||
""", new MatchQueryBuilder("keyword_field", "value").minimumShouldMatch("3"));
|
||||
"match": { "text_field": {"query": "value", "minimum_should_match": 3} }
|
||||
""", new MatchQueryBuilder("text_field", "value").minimumShouldMatch("3"));
|
||||
addCandidate("""
|
||||
"match": { "keyword_field": {"query": "value", "fuzziness": "auto"} }
|
||||
""", new MatchQueryBuilder("keyword_field", "value").fuzziness(Fuzziness.AUTO));
|
||||
"match": { "text_field": {"query": "value", "fuzziness": "auto"} }
|
||||
""", new MatchQueryBuilder("text_field", "value").fuzziness(Fuzziness.AUTO));
|
||||
addCandidate("""
|
||||
"match_phrase": { "keyword_field": "value"}
|
||||
""", new MatchPhraseQueryBuilder("keyword_field", "value"));
|
||||
"match_phrase": { "text_field": "value"}
|
||||
""", new MatchPhraseQueryBuilder("text_field", "value"));
|
||||
addCandidate("""
|
||||
"match_phrase": { "keyword_field": {"query": "value", "slop": 3}}
|
||||
""", new MatchPhraseQueryBuilder("keyword_field", "value").slop(3));
|
||||
"match_phrase": { "text_field": {"query": "value", "slop": 3}}
|
||||
""", new MatchPhraseQueryBuilder("text_field", "value").slop(3));
|
||||
addCandidate("""
|
||||
"range": { "long_field": {"gte": 1, "lte": 9}}
|
||||
""", new RangeQueryBuilder("long_field").from(1).to(9));
|
||||
|
@ -179,6 +179,11 @@ public class QueryBuilderBWCIT extends AbstractFullClusterRestartTestCase {
|
|||
mappingsAndSettings.field("type", "keyword");
|
||||
mappingsAndSettings.endObject();
|
||||
}
|
||||
{
|
||||
mappingsAndSettings.startObject("text_field");
|
||||
mappingsAndSettings.field("type", "text");
|
||||
mappingsAndSettings.endObject();
|
||||
}
|
||||
{
|
||||
mappingsAndSettings.startObject("long_field");
|
||||
mappingsAndSettings.field("type", "long");
|
||||
|
|
|
@ -8,11 +8,14 @@
|
|||
|
||||
package org.elasticsearch.index.query;
|
||||
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.search.MatchQueryParser;
|
||||
import org.elasticsearch.xcontent.ParseField;
|
||||
import org.elasticsearch.xcontent.XContentBuilder;
|
||||
|
@ -148,6 +151,35 @@ public class MatchPhraseQueryBuilder extends AbstractQueryBuilder<MatchPhraseQue
|
|||
builder.endObject();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws IOException {
|
||||
SearchExecutionContext sec = queryRewriteContext.convertToSearchExecutionContext();
|
||||
if (sec == null) {
|
||||
return this;
|
||||
}
|
||||
// If we're using the default keyword analyzer then we can rewrite this to a TermQueryBuilder
|
||||
// and possibly shortcut
|
||||
// If we're using a keyword analyzer then we can rewrite this to a TermQueryBuilder
|
||||
// and possibly shortcut
|
||||
NamedAnalyzer configuredAnalyzer = configuredAnalyzer(sec);
|
||||
if (configuredAnalyzer != null && configuredAnalyzer.analyzer() instanceof KeywordAnalyzer) {
|
||||
TermQueryBuilder termQueryBuilder = new TermQueryBuilder(fieldName, value);
|
||||
return termQueryBuilder.rewrite(sec);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
private NamedAnalyzer configuredAnalyzer(SearchExecutionContext context) {
|
||||
if (analyzer != null) {
|
||||
return context.getIndexAnalyzers().get(analyzer);
|
||||
}
|
||||
MappedFieldType mft = context.getFieldType(fieldName);
|
||||
if (mft != null) {
|
||||
return mft.getTextSearchInfo().getSearchAnalyzer();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query doToQuery(SearchExecutionContext context) throws IOException {
|
||||
// validate context specific fields
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
package org.elasticsearch.index.query;
|
||||
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.elasticsearch.Version;
|
||||
|
@ -18,6 +19,8 @@ import org.elasticsearch.common.lucene.search.Queries;
|
|||
import org.elasticsearch.common.unit.Fuzziness;
|
||||
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
|
||||
import org.elasticsearch.core.RestApiVersion;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.query.support.QueryParsers;
|
||||
import org.elasticsearch.index.search.MatchQueryParser;
|
||||
import org.elasticsearch.xcontent.ParseField;
|
||||
|
@ -350,6 +353,37 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
|
|||
builder.endObject();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws IOException {
|
||||
if (fuzziness != null || lenient) {
|
||||
// Term queries can be neither fuzzy nor lenient, so don't rewrite under these conditions
|
||||
return this;
|
||||
}
|
||||
SearchExecutionContext sec = queryRewriteContext.convertToSearchExecutionContext();
|
||||
if (sec == null) {
|
||||
return this;
|
||||
}
|
||||
// If we're using a keyword analyzer then we can rewrite this to a TermQueryBuilder
|
||||
// and possibly shortcut
|
||||
NamedAnalyzer configuredAnalyzer = configuredAnalyzer(sec);
|
||||
if (configuredAnalyzer != null && configuredAnalyzer.analyzer() instanceof KeywordAnalyzer) {
|
||||
TermQueryBuilder termQueryBuilder = new TermQueryBuilder(fieldName, value);
|
||||
return termQueryBuilder.rewrite(sec);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
private NamedAnalyzer configuredAnalyzer(SearchExecutionContext context) {
|
||||
if (analyzer != null) {
|
||||
return context.getIndexAnalyzers().get(analyzer);
|
||||
}
|
||||
MappedFieldType mft = context.getFieldType(fieldName);
|
||||
if (mft != null) {
|
||||
return mft.getTextSearchInfo().getSearchAnalyzer();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query doToQuery(SearchExecutionContext context) throws IOException {
|
||||
// validate context specific fields
|
||||
|
|
|
@ -16,6 +16,7 @@ import org.apache.lucene.search.PhraseQuery;
|
|||
import org.apache.lucene.search.PointRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.test.AbstractQueryTestCase;
|
||||
|
||||
|
@ -190,4 +191,39 @@ public class MatchPhraseQueryBuilderTests extends AbstractQueryTestCase<MatchPhr
|
|||
e = expectThrows(ParsingException.class, () -> parseQuery(shortJson));
|
||||
assertEquals("[match_phrase] query doesn't support multiple fields, found [message1] and [message2]", e.getMessage());
|
||||
}
|
||||
|
||||
public void testRewriteToTermQueries() throws IOException {
|
||||
QueryBuilder queryBuilder = new MatchPhraseQueryBuilder(KEYWORD_FIELD_NAME, "value");
|
||||
SearchExecutionContext context = createSearchExecutionContext();
|
||||
QueryBuilder rewritten = queryBuilder.rewrite(context);
|
||||
assertThat(rewritten, instanceOf(TermQueryBuilder.class));
|
||||
TermQueryBuilder tqb = (TermQueryBuilder) rewritten;
|
||||
assertEquals(KEYWORD_FIELD_NAME, tqb.fieldName);
|
||||
assertEquals(new BytesRef("value"), tqb.value);
|
||||
}
|
||||
|
||||
public void testRewriteToTermQueryWithAnalyzer() throws IOException {
|
||||
MatchPhraseQueryBuilder queryBuilder = new MatchPhraseQueryBuilder(TEXT_FIELD_NAME, "value");
|
||||
queryBuilder.analyzer("keyword");
|
||||
SearchExecutionContext context = createSearchExecutionContext();
|
||||
QueryBuilder rewritten = queryBuilder.rewrite(context);
|
||||
assertThat(rewritten, instanceOf(TermQueryBuilder.class));
|
||||
TermQueryBuilder tqb = (TermQueryBuilder) rewritten;
|
||||
assertEquals(TEXT_FIELD_NAME, tqb.fieldName);
|
||||
assertEquals(new BytesRef("value"), tqb.value);
|
||||
}
|
||||
|
||||
public void testRewriteIndexQueryToMatchNone() throws IOException {
|
||||
QueryBuilder query = new MatchPhraseQueryBuilder("_index", "does_not_exist");
|
||||
SearchExecutionContext searchExecutionContext = createSearchExecutionContext();
|
||||
QueryBuilder rewritten = query.rewrite(searchExecutionContext);
|
||||
assertThat(rewritten, instanceOf(MatchNoneQueryBuilder.class));
|
||||
}
|
||||
|
||||
public void testRewriteIndexQueryToNotMatchNone() throws IOException {
|
||||
QueryBuilder query = new MatchPhraseQueryBuilder("_index", getIndex().getName());
|
||||
SearchExecutionContext searchExecutionContext = createSearchExecutionContext();
|
||||
QueryBuilder rewritten = query.rewrite(searchExecutionContext);
|
||||
assertThat(rewritten, instanceOf(MatchAllQueryBuilder.class));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.elasticsearch.common.Strings;
|
|||
import org.elasticsearch.common.compress.CompressedXContent;
|
||||
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
|
||||
import org.elasticsearch.common.lucene.search.Queries;
|
||||
import org.elasticsearch.common.unit.Fuzziness;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.search.MatchQueryParser;
|
||||
|
@ -574,4 +575,59 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
|
|||
assertNotNull(rewritten.toQuery(context));
|
||||
assertFalse("query should not be cacheable: " + queryBuilder.toString(), context.isCacheable());
|
||||
}
|
||||
|
||||
public void testRewriteToTermQueries() throws IOException {
|
||||
MatchQueryBuilder queryBuilder = new MatchQueryBuilder(KEYWORD_FIELD_NAME, "value");
|
||||
queryBuilder.boost(2f);
|
||||
SearchExecutionContext context = createSearchExecutionContext();
|
||||
QueryBuilder rewritten = queryBuilder.rewrite(context);
|
||||
assertThat(rewritten, instanceOf(TermQueryBuilder.class));
|
||||
TermQueryBuilder tqb = (TermQueryBuilder) rewritten;
|
||||
assertEquals(KEYWORD_FIELD_NAME, tqb.fieldName);
|
||||
assertEquals(new BytesRef("value"), tqb.value);
|
||||
assertThat(rewritten.boost(), equalTo(2f));
|
||||
}
|
||||
|
||||
public void testRewriteToTermQueryWithAnalyzer() throws IOException {
|
||||
MatchQueryBuilder queryBuilder = new MatchQueryBuilder(TEXT_FIELD_NAME, "value");
|
||||
queryBuilder.analyzer("keyword");
|
||||
SearchExecutionContext context = createSearchExecutionContext();
|
||||
QueryBuilder rewritten = queryBuilder.rewrite(context);
|
||||
assertThat(rewritten, instanceOf(TermQueryBuilder.class));
|
||||
TermQueryBuilder tqb = (TermQueryBuilder) rewritten;
|
||||
assertEquals(TEXT_FIELD_NAME, tqb.fieldName);
|
||||
assertEquals(new BytesRef("value"), tqb.value);
|
||||
}
|
||||
|
||||
public void testRewriteWithFuzziness() throws IOException {
|
||||
// If we've configured fuzziness then we can't rewrite to a term query
|
||||
MatchQueryBuilder queryBuilder = new MatchQueryBuilder(KEYWORD_FIELD_NAME, "value");
|
||||
queryBuilder.fuzziness(Fuzziness.AUTO);
|
||||
SearchExecutionContext context = createSearchExecutionContext();
|
||||
QueryBuilder rewritten = queryBuilder.rewrite(context);
|
||||
assertEquals(queryBuilder, rewritten);
|
||||
}
|
||||
|
||||
public void testRewriteWithLeniency() throws IOException {
|
||||
// If we've configured leniency then we can't rewrite to a term query
|
||||
MatchQueryBuilder queryBuilder = new MatchQueryBuilder(KEYWORD_FIELD_NAME, "value");
|
||||
queryBuilder.lenient(true);
|
||||
SearchExecutionContext context = createSearchExecutionContext();
|
||||
QueryBuilder rewritten = queryBuilder.rewrite(context);
|
||||
assertEquals(queryBuilder, rewritten);
|
||||
}
|
||||
|
||||
public void testRewriteIndexQueryToMatchNone() throws IOException {
|
||||
QueryBuilder query = new MatchQueryBuilder("_index", "does_not_exist");
|
||||
SearchExecutionContext searchExecutionContext = createSearchExecutionContext();
|
||||
QueryBuilder rewritten = query.rewrite(searchExecutionContext);
|
||||
assertThat(rewritten, instanceOf(MatchNoneQueryBuilder.class));
|
||||
}
|
||||
|
||||
public void testRewriteIndexQueryToNotMatchNone() throws IOException {
|
||||
QueryBuilder query = new MatchQueryBuilder("_index", getIndex().getName());
|
||||
SearchExecutionContext searchExecutionContext = createSearchExecutionContext();
|
||||
QueryBuilder rewritten = query.rewrite(searchExecutionContext);
|
||||
assertThat(rewritten, instanceOf(MatchAllQueryBuilder.class));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue