mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-30 10:23:41 -04:00
Search - make wildcard field use constant scoring queries for wildcard queries and caching fix (#70452)
* Make wildcard field use constant scoring queries for wildcard queries. Add a note about ignoring rewrite parameters on wildcard queries. Also fixes caching issue where case sensitive and case insensitive results were cached as the same Closes #69604
This commit is contained in:
parent
1db2b85e45
commit
2f9c7318c2
4 changed files with 62 additions and 43 deletions
|
@ -130,4 +130,5 @@ The following parameters are accepted by `wildcard` fields:
|
||||||
==== Limitations
|
==== Limitations
|
||||||
|
|
||||||
* `wildcard` fields are untokenized like keyword fields, so do not support queries that rely on word positions such as phrase queries.
|
* `wildcard` fields are untokenized like keyword fields, so do not support queries that rely on word positions such as phrase queries.
|
||||||
|
* When running `wildcard` queries any `rewrite` parameter is ignored. The scoring is always a constant score.
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.function.Supplier;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Query that runs an Automaton across all binary doc values.
|
* Query that runs an Automaton across all binary doc values.
|
||||||
|
@ -35,20 +34,16 @@ public class AutomatonQueryOnBinaryDv extends Query {
|
||||||
|
|
||||||
private final String field;
|
private final String field;
|
||||||
private final String matchPattern;
|
private final String matchPattern;
|
||||||
private final Supplier<Automaton> automatonSupplier;
|
private final ByteRunAutomaton bytesMatcher;
|
||||||
|
|
||||||
public AutomatonQueryOnBinaryDv(String field, String matchPattern, Supplier<Automaton> automatonSupplier) {
|
public AutomatonQueryOnBinaryDv(String field, String matchPattern, Automaton automaton) {
|
||||||
this.field = field;
|
this.field = field;
|
||||||
this.matchPattern = matchPattern;
|
this.matchPattern = matchPattern;
|
||||||
this.automatonSupplier = automatonSupplier;
|
bytesMatcher = new ByteRunAutomaton(automaton);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
|
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
|
||||||
|
|
||||||
|
|
||||||
ByteRunAutomaton bytesMatcher = new ByteRunAutomaton(automatonSupplier.get());
|
|
||||||
|
|
||||||
return new ConstantScoreWeight(this, boost) {
|
return new ConstantScoreWeight(this, boost) {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -99,12 +94,13 @@ public class AutomatonQueryOnBinaryDv extends Query {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
AutomatonQueryOnBinaryDv other = (AutomatonQueryOnBinaryDv) obj;
|
AutomatonQueryOnBinaryDv other = (AutomatonQueryOnBinaryDv) obj;
|
||||||
return Objects.equals(field, other.field) && Objects.equals(matchPattern, other.matchPattern);
|
return Objects.equals(field, other.field) && Objects.equals(matchPattern, other.matchPattern)
|
||||||
|
&& Objects.equals(bytesMatcher, other.bytesMatcher);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(field, matchPattern);
|
return Objects.hash(field, matchPattern, bytesMatcher);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -322,21 +322,17 @@ public class WildcardFieldMapper extends FieldMapper {
|
||||||
addClause(string, rewritten, Occur.MUST);
|
addClause(string, rewritten, Occur.MUST);
|
||||||
clauseCount++;
|
clauseCount++;
|
||||||
}
|
}
|
||||||
Supplier<Automaton> deferredAutomatonSupplier = () -> {
|
Automaton automaton = caseInsensitive
|
||||||
if(caseInsensitive) {
|
? AutomatonQueries.toCaseInsensitiveWildcardAutomaton(new Term(name(), wildcardPattern), Integer.MAX_VALUE)
|
||||||
return AutomatonQueries.toCaseInsensitiveWildcardAutomaton(new Term(name(), wildcardPattern), Integer.MAX_VALUE);
|
: WildcardQuery.toAutomaton(new Term(name(), wildcardPattern));
|
||||||
} else {
|
AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), wildcardPattern, automaton);
|
||||||
return WildcardQuery.toAutomaton(new Term(name(), wildcardPattern));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), wildcardPattern, deferredAutomatonSupplier);
|
|
||||||
if (clauseCount > 0) {
|
if (clauseCount > 0) {
|
||||||
// We can accelerate execution with the ngram query
|
// We can accelerate execution with the ngram query
|
||||||
BooleanQuery approxQuery = rewritten.build();
|
BooleanQuery approxQuery = rewritten.build();
|
||||||
BooleanQuery.Builder verifyingBuilder = new BooleanQuery.Builder();
|
BooleanQuery.Builder verifyingBuilder = new BooleanQuery.Builder();
|
||||||
verifyingBuilder.add(new BooleanClause(approxQuery, Occur.MUST));
|
verifyingBuilder.add(new BooleanClause(approxQuery, Occur.MUST));
|
||||||
verifyingBuilder.add(new BooleanClause(verifyingQuery, Occur.MUST));
|
verifyingBuilder.add(new BooleanClause(verifyingQuery, Occur.MUST));
|
||||||
return verifyingBuilder.build();
|
return new ConstantScoreQuery(verifyingBuilder.build());
|
||||||
} else if (numWildcardChars == 0 || numWildcardStrings > 0) {
|
} else if (numWildcardChars == 0 || numWildcardStrings > 0) {
|
||||||
// We have no concrete characters and we're not a pure length query e.g. ???
|
// We have no concrete characters and we're not a pure length query e.g. ???
|
||||||
return new DocValuesFieldExistsQuery(name());
|
return new DocValuesFieldExistsQuery(name());
|
||||||
|
@ -362,12 +358,9 @@ public class WildcardFieldMapper extends FieldMapper {
|
||||||
if (approxNgramQuery instanceof MatchAllDocsQuery) {
|
if (approxNgramQuery instanceof MatchAllDocsQuery) {
|
||||||
return existsQuery(context);
|
return existsQuery(context);
|
||||||
}
|
}
|
||||||
Supplier<Automaton> deferredAutomatonSupplier = ()-> {
|
RegExp regex = new RegExp(value, syntaxFlags, matchFlags);
|
||||||
RegExp regex = new RegExp(value, syntaxFlags, matchFlags);
|
Automaton automaton = regex.toAutomaton(maxDeterminizedStates);
|
||||||
return regex.toAutomaton(maxDeterminizedStates);
|
AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), value, automaton);
|
||||||
};
|
|
||||||
|
|
||||||
AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), value, deferredAutomatonSupplier);
|
|
||||||
|
|
||||||
// MatchAllButRequireVerificationQuery is a special case meaning the regex is reduced to a single
|
// MatchAllButRequireVerificationQuery is a special case meaning the regex is reduced to a single
|
||||||
// clause which we can't accelerate at all and needs verification. Example would be ".."
|
// clause which we can't accelerate at all and needs verification. Example would be ".."
|
||||||
|
@ -746,9 +739,8 @@ public class WildcardFieldMapper extends FieldMapper {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Supplier <Automaton> deferredAutomatonSupplier
|
Automaton automaton = TermRangeQuery.toAutomaton(lower, upper, includeLower, includeUpper);
|
||||||
= () -> TermRangeQuery.toAutomaton(lower, upper, includeLower, includeUpper);
|
AutomatonQueryOnBinaryDv slowQuery = new AutomatonQueryOnBinaryDv(name(), lower + "-" + upper, automaton);
|
||||||
AutomatonQueryOnBinaryDv slowQuery = new AutomatonQueryOnBinaryDv(name(), lower + "-" + upper, deferredAutomatonSupplier);
|
|
||||||
|
|
||||||
if (accelerationQuery == null) {
|
if (accelerationQuery == null) {
|
||||||
return slowQuery;
|
return slowQuery;
|
||||||
|
@ -831,18 +823,15 @@ public class WildcardFieldMapper extends FieldMapper {
|
||||||
bqBuilder.add(ngramQ, Occur.MUST);
|
bqBuilder.add(ngramQ, Occur.MUST);
|
||||||
}
|
}
|
||||||
|
|
||||||
Supplier <Automaton> deferredAutomatonSupplier = ()->{
|
// Verification query
|
||||||
// Verification query
|
FuzzyQuery fq = new FuzzyQuery(
|
||||||
FuzzyQuery fq = new FuzzyQuery(
|
new Term(name(), searchTerm),
|
||||||
new Term(name(), searchTerm),
|
fuzziness.asDistance(searchTerm),
|
||||||
fuzziness.asDistance(searchTerm),
|
prefixLength,
|
||||||
prefixLength,
|
maxExpansions,
|
||||||
maxExpansions,
|
transpositions
|
||||||
transpositions
|
);
|
||||||
);
|
bqBuilder.add(new AutomatonQueryOnBinaryDv(name(), searchTerm, fq.getAutomata().automaton), Occur.MUST);
|
||||||
return fq.getAutomata().automaton;
|
|
||||||
};
|
|
||||||
bqBuilder.add(new AutomatonQueryOnBinaryDv(name(), searchTerm, deferredAutomatonSupplier), Occur.MUST);
|
|
||||||
|
|
||||||
return bqBuilder.build();
|
return bqBuilder.build();
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.queryparser.classic.QueryParser;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
import org.apache.lucene.search.DocValuesFieldExistsQuery;
|
import org.apache.lucene.search.DocValuesFieldExistsQuery;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
|
@ -42,6 +43,7 @@ import org.apache.lucene.util.automaton.RegExp;
|
||||||
import org.elasticsearch.Version;
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.cluster.metadata.IndexMetadata;
|
import org.elasticsearch.cluster.metadata.IndexMetadata;
|
||||||
import org.elasticsearch.common.TriFunction;
|
import org.elasticsearch.common.TriFunction;
|
||||||
|
import org.elasticsearch.common.lucene.search.AutomatonQueries;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.unit.Fuzziness;
|
import org.elasticsearch.common.unit.Fuzziness;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
|
@ -548,13 +550,14 @@ public class WildcardFieldMapperTests extends MapperTestCase {
|
||||||
String expectedAccelerationQueryString = test[1].replaceAll("_", "" + WildcardFieldMapper.TOKEN_START_OR_END_CHAR);
|
String expectedAccelerationQueryString = test[1].replaceAll("_", "" + WildcardFieldMapper.TOKEN_START_OR_END_CHAR);
|
||||||
Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_CONTEXT);
|
Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_CONTEXT);
|
||||||
testExpectedAccelerationQuery(pattern, wildcardFieldQuery, expectedAccelerationQueryString);
|
testExpectedAccelerationQuery(pattern, wildcardFieldQuery, expectedAccelerationQueryString);
|
||||||
assertTrue(wildcardFieldQuery instanceof BooleanQuery);
|
assertTrue(unwrapAnyConstantScore(wildcardFieldQuery) instanceof BooleanQuery);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO All these expressions have no acceleration at all and could be improved
|
// TODO All these expressions have no acceleration at all and could be improved
|
||||||
String slowPatterns[] = { "??" };
|
String slowPatterns[] = { "??" };
|
||||||
for (String pattern : slowPatterns) {
|
for (String pattern : slowPatterns) {
|
||||||
Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_CONTEXT);
|
Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_CONTEXT);
|
||||||
|
wildcardFieldQuery = unwrapAnyConstantScore(wildcardFieldQuery);
|
||||||
assertTrue(
|
assertTrue(
|
||||||
pattern + " was not as slow as we assumed " + formatQuery(wildcardFieldQuery),
|
pattern + " was not as slow as we assumed " + formatQuery(wildcardFieldQuery),
|
||||||
wildcardFieldQuery instanceof AutomatonQueryOnBinaryDv
|
wildcardFieldQuery instanceof AutomatonQueryOnBinaryDv
|
||||||
|
@ -563,6 +566,26 @@ public class WildcardFieldMapperTests extends MapperTestCase {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testQueryCachingEquality() throws IOException, ParseException {
|
||||||
|
String pattern = "A*b*B?a";
|
||||||
|
// Case sensitivity matters when it comes to caching
|
||||||
|
Automaton caseSensitiveAutomaton = WildcardQuery.toAutomaton(new Term("field", pattern));
|
||||||
|
Automaton caseInSensitiveAutomaton = AutomatonQueries.toCaseInsensitiveWildcardAutomaton(
|
||||||
|
new Term("field", pattern),
|
||||||
|
Integer.MAX_VALUE
|
||||||
|
);
|
||||||
|
AutomatonQueryOnBinaryDv csQ = new AutomatonQueryOnBinaryDv("field", pattern, caseSensitiveAutomaton);
|
||||||
|
AutomatonQueryOnBinaryDv ciQ = new AutomatonQueryOnBinaryDv("field", pattern, caseInSensitiveAutomaton);
|
||||||
|
assertNotEquals(csQ, ciQ);
|
||||||
|
assertNotEquals(csQ.hashCode(), ciQ.hashCode());
|
||||||
|
|
||||||
|
// Same query should be equal
|
||||||
|
Automaton caseSensitiveAutomaton2 = WildcardQuery.toAutomaton(new Term("field", pattern));
|
||||||
|
AutomatonQueryOnBinaryDv csQ2 = new AutomatonQueryOnBinaryDv("field", pattern, caseSensitiveAutomaton2);
|
||||||
|
assertEquals(csQ, csQ2);
|
||||||
|
assertEquals(csQ.hashCode(), csQ2.hashCode());
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void minimalMapping(XContentBuilder b) throws IOException {
|
protected void minimalMapping(XContentBuilder b) throws IOException {
|
||||||
b.field("type", "wildcard");
|
b.field("type", "wildcard");
|
||||||
|
@ -719,8 +742,18 @@ public class WildcardFieldMapperTests extends MapperTestCase {
|
||||||
Query expectedAccelerationQuery = qsp.parse(expectedAccelerationQueryString);
|
Query expectedAccelerationQuery = qsp.parse(expectedAccelerationQueryString);
|
||||||
testExpectedAccelerationQuery(regex, combinedQuery, expectedAccelerationQuery);
|
testExpectedAccelerationQuery(regex, combinedQuery, expectedAccelerationQuery);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Query unwrapAnyConstantScore(Query q) {
|
||||||
|
if (q instanceof ConstantScoreQuery) {
|
||||||
|
ConstantScoreQuery csq = (ConstantScoreQuery) q;
|
||||||
|
return csq.getQuery();
|
||||||
|
} else {
|
||||||
|
return q;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void testExpectedAccelerationQuery(String regex, Query combinedQuery, Query expectedAccelerationQuery) throws ParseException {
|
void testExpectedAccelerationQuery(String regex, Query combinedQuery, Query expectedAccelerationQuery) throws ParseException {
|
||||||
BooleanQuery cq = (BooleanQuery) combinedQuery;
|
BooleanQuery cq = (BooleanQuery) unwrapAnyConstantScore(combinedQuery);
|
||||||
assert cq.clauses().size() == 2;
|
assert cq.clauses().size() == 2;
|
||||||
Query approximationQuery = null;
|
Query approximationQuery = null;
|
||||||
boolean verifyQueryFound = false;
|
boolean verifyQueryFound = false;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue