Search - make wildcard field use constant scoring queries for wildcard queries and caching fix (#70452)

* Make wildcard field use constant scoring queries for wildcard queries. Add a note about ignoring rewrite parameters on wildcard queries.

Also fixes caching issue where case sensitive and case insensitive results were cached as the same

Closes #69604
This commit is contained in:
markharwood 2021-03-30 10:37:39 +01:00 committed by GitHub
parent 1db2b85e45
commit 2f9c7318c2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 62 additions and 43 deletions

View file

@ -130,4 +130,5 @@ The following parameters are accepted by `wildcard` fields:
==== Limitations ==== Limitations
* `wildcard` fields are untokenized like keyword fields, so do not support queries that rely on word positions such as phrase queries. * `wildcard` fields are untokenized like keyword fields, so do not support queries that rely on word positions such as phrase queries.
* When running `wildcard` queries any `rewrite` parameter is ignored. The scoring is always a constant score.

View file

@ -25,7 +25,6 @@ import org.apache.lucene.util.automaton.ByteRunAutomaton;
import java.io.IOException; import java.io.IOException;
import java.util.Objects; import java.util.Objects;
import java.util.function.Supplier;
/** /**
* Query that runs an Automaton across all binary doc values. * Query that runs an Automaton across all binary doc values.
@ -35,20 +34,16 @@ public class AutomatonQueryOnBinaryDv extends Query {
private final String field; private final String field;
private final String matchPattern; private final String matchPattern;
private final Supplier<Automaton> automatonSupplier; private final ByteRunAutomaton bytesMatcher;
public AutomatonQueryOnBinaryDv(String field, String matchPattern, Supplier<Automaton> automatonSupplier) { public AutomatonQueryOnBinaryDv(String field, String matchPattern, Automaton automaton) {
this.field = field; this.field = field;
this.matchPattern = matchPattern; this.matchPattern = matchPattern;
this.automatonSupplier = automatonSupplier; bytesMatcher = new ByteRunAutomaton(automaton);
} }
@Override @Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
ByteRunAutomaton bytesMatcher = new ByteRunAutomaton(automatonSupplier.get());
return new ConstantScoreWeight(this, boost) { return new ConstantScoreWeight(this, boost) {
@Override @Override
@ -99,12 +94,13 @@ public class AutomatonQueryOnBinaryDv extends Query {
return false; return false;
} }
AutomatonQueryOnBinaryDv other = (AutomatonQueryOnBinaryDv) obj; AutomatonQueryOnBinaryDv other = (AutomatonQueryOnBinaryDv) obj;
return Objects.equals(field, other.field) && Objects.equals(matchPattern, other.matchPattern); return Objects.equals(field, other.field) && Objects.equals(matchPattern, other.matchPattern)
&& Objects.equals(bytesMatcher, other.bytesMatcher);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(field, matchPattern); return Objects.hash(field, matchPattern, bytesMatcher);
} }
} }

View file

@ -322,21 +322,17 @@ public class WildcardFieldMapper extends FieldMapper {
addClause(string, rewritten, Occur.MUST); addClause(string, rewritten, Occur.MUST);
clauseCount++; clauseCount++;
} }
Supplier<Automaton> deferredAutomatonSupplier = () -> { Automaton automaton = caseInsensitive
if(caseInsensitive) { ? AutomatonQueries.toCaseInsensitiveWildcardAutomaton(new Term(name(), wildcardPattern), Integer.MAX_VALUE)
return AutomatonQueries.toCaseInsensitiveWildcardAutomaton(new Term(name(), wildcardPattern), Integer.MAX_VALUE); : WildcardQuery.toAutomaton(new Term(name(), wildcardPattern));
} else { AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), wildcardPattern, automaton);
return WildcardQuery.toAutomaton(new Term(name(), wildcardPattern));
}
};
AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), wildcardPattern, deferredAutomatonSupplier);
if (clauseCount > 0) { if (clauseCount > 0) {
// We can accelerate execution with the ngram query // We can accelerate execution with the ngram query
BooleanQuery approxQuery = rewritten.build(); BooleanQuery approxQuery = rewritten.build();
BooleanQuery.Builder verifyingBuilder = new BooleanQuery.Builder(); BooleanQuery.Builder verifyingBuilder = new BooleanQuery.Builder();
verifyingBuilder.add(new BooleanClause(approxQuery, Occur.MUST)); verifyingBuilder.add(new BooleanClause(approxQuery, Occur.MUST));
verifyingBuilder.add(new BooleanClause(verifyingQuery, Occur.MUST)); verifyingBuilder.add(new BooleanClause(verifyingQuery, Occur.MUST));
return verifyingBuilder.build(); return new ConstantScoreQuery(verifyingBuilder.build());
} else if (numWildcardChars == 0 || numWildcardStrings > 0) { } else if (numWildcardChars == 0 || numWildcardStrings > 0) {
// We have no concrete characters and we're not a pure length query e.g. ??? // We have no concrete characters and we're not a pure length query e.g. ???
return new DocValuesFieldExistsQuery(name()); return new DocValuesFieldExistsQuery(name());
@ -362,12 +358,9 @@ public class WildcardFieldMapper extends FieldMapper {
if (approxNgramQuery instanceof MatchAllDocsQuery) { if (approxNgramQuery instanceof MatchAllDocsQuery) {
return existsQuery(context); return existsQuery(context);
} }
Supplier<Automaton> deferredAutomatonSupplier = ()-> {
RegExp regex = new RegExp(value, syntaxFlags, matchFlags); RegExp regex = new RegExp(value, syntaxFlags, matchFlags);
return regex.toAutomaton(maxDeterminizedStates); Automaton automaton = regex.toAutomaton(maxDeterminizedStates);
}; AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), value, automaton);
AutomatonQueryOnBinaryDv verifyingQuery = new AutomatonQueryOnBinaryDv(name(), value, deferredAutomatonSupplier);
// MatchAllButRequireVerificationQuery is a special case meaning the regex is reduced to a single // MatchAllButRequireVerificationQuery is a special case meaning the regex is reduced to a single
// clause which we can't accelerate at all and needs verification. Example would be ".." // clause which we can't accelerate at all and needs verification. Example would be ".."
@ -746,9 +739,8 @@ public class WildcardFieldMapper extends FieldMapper {
} }
} }
} }
Supplier <Automaton> deferredAutomatonSupplier Automaton automaton = TermRangeQuery.toAutomaton(lower, upper, includeLower, includeUpper);
= () -> TermRangeQuery.toAutomaton(lower, upper, includeLower, includeUpper); AutomatonQueryOnBinaryDv slowQuery = new AutomatonQueryOnBinaryDv(name(), lower + "-" + upper, automaton);
AutomatonQueryOnBinaryDv slowQuery = new AutomatonQueryOnBinaryDv(name(), lower + "-" + upper, deferredAutomatonSupplier);
if (accelerationQuery == null) { if (accelerationQuery == null) {
return slowQuery; return slowQuery;
@ -831,7 +823,6 @@ public class WildcardFieldMapper extends FieldMapper {
bqBuilder.add(ngramQ, Occur.MUST); bqBuilder.add(ngramQ, Occur.MUST);
} }
Supplier <Automaton> deferredAutomatonSupplier = ()->{
// Verification query // Verification query
FuzzyQuery fq = new FuzzyQuery( FuzzyQuery fq = new FuzzyQuery(
new Term(name(), searchTerm), new Term(name(), searchTerm),
@ -840,9 +831,7 @@ public class WildcardFieldMapper extends FieldMapper {
maxExpansions, maxExpansions,
transpositions transpositions
); );
return fq.getAutomata().automaton; bqBuilder.add(new AutomatonQueryOnBinaryDv(name(), searchTerm, fq.getAutomata().automaton), Occur.MUST);
};
bqBuilder.add(new AutomatonQueryOnBinaryDv(name(), searchTerm, deferredAutomatonSupplier), Occur.MUST);
return bqBuilder.build(); return bqBuilder.build();
} catch (IOException ioe) { } catch (IOException ioe) {

View file

@ -22,6 +22,7 @@ import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchAllDocsQuery;
@ -42,6 +43,7 @@ import org.apache.lucene.util.automaton.RegExp;
import org.elasticsearch.Version; import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.TriFunction; import org.elasticsearch.common.TriFunction;
import org.elasticsearch.common.lucene.search.AutomatonQueries;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
@ -548,13 +550,14 @@ public class WildcardFieldMapperTests extends MapperTestCase {
String expectedAccelerationQueryString = test[1].replaceAll("_", "" + WildcardFieldMapper.TOKEN_START_OR_END_CHAR); String expectedAccelerationQueryString = test[1].replaceAll("_", "" + WildcardFieldMapper.TOKEN_START_OR_END_CHAR);
Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_CONTEXT); Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_CONTEXT);
testExpectedAccelerationQuery(pattern, wildcardFieldQuery, expectedAccelerationQueryString); testExpectedAccelerationQuery(pattern, wildcardFieldQuery, expectedAccelerationQueryString);
assertTrue(wildcardFieldQuery instanceof BooleanQuery); assertTrue(unwrapAnyConstantScore(wildcardFieldQuery) instanceof BooleanQuery);
} }
// TODO All these expressions have no acceleration at all and could be improved // TODO All these expressions have no acceleration at all and could be improved
String slowPatterns[] = { "??" }; String slowPatterns[] = { "??" };
for (String pattern : slowPatterns) { for (String pattern : slowPatterns) {
Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_CONTEXT); Query wildcardFieldQuery = wildcardFieldType.fieldType().wildcardQuery(pattern, null, MOCK_CONTEXT);
wildcardFieldQuery = unwrapAnyConstantScore(wildcardFieldQuery);
assertTrue( assertTrue(
pattern + " was not as slow as we assumed " + formatQuery(wildcardFieldQuery), pattern + " was not as slow as we assumed " + formatQuery(wildcardFieldQuery),
wildcardFieldQuery instanceof AutomatonQueryOnBinaryDv wildcardFieldQuery instanceof AutomatonQueryOnBinaryDv
@ -563,6 +566,26 @@ public class WildcardFieldMapperTests extends MapperTestCase {
} }
public void testQueryCachingEquality() throws IOException, ParseException {
String pattern = "A*b*B?a";
// Case sensitivity matters when it comes to caching
Automaton caseSensitiveAutomaton = WildcardQuery.toAutomaton(new Term("field", pattern));
Automaton caseInSensitiveAutomaton = AutomatonQueries.toCaseInsensitiveWildcardAutomaton(
new Term("field", pattern),
Integer.MAX_VALUE
);
AutomatonQueryOnBinaryDv csQ = new AutomatonQueryOnBinaryDv("field", pattern, caseSensitiveAutomaton);
AutomatonQueryOnBinaryDv ciQ = new AutomatonQueryOnBinaryDv("field", pattern, caseInSensitiveAutomaton);
assertNotEquals(csQ, ciQ);
assertNotEquals(csQ.hashCode(), ciQ.hashCode());
// Same query should be equal
Automaton caseSensitiveAutomaton2 = WildcardQuery.toAutomaton(new Term("field", pattern));
AutomatonQueryOnBinaryDv csQ2 = new AutomatonQueryOnBinaryDv("field", pattern, caseSensitiveAutomaton2);
assertEquals(csQ, csQ2);
assertEquals(csQ.hashCode(), csQ2.hashCode());
}
@Override @Override
protected void minimalMapping(XContentBuilder b) throws IOException { protected void minimalMapping(XContentBuilder b) throws IOException {
b.field("type", "wildcard"); b.field("type", "wildcard");
@ -719,8 +742,18 @@ public class WildcardFieldMapperTests extends MapperTestCase {
Query expectedAccelerationQuery = qsp.parse(expectedAccelerationQueryString); Query expectedAccelerationQuery = qsp.parse(expectedAccelerationQueryString);
testExpectedAccelerationQuery(regex, combinedQuery, expectedAccelerationQuery); testExpectedAccelerationQuery(regex, combinedQuery, expectedAccelerationQuery);
} }
private Query unwrapAnyConstantScore(Query q) {
if (q instanceof ConstantScoreQuery) {
ConstantScoreQuery csq = (ConstantScoreQuery) q;
return csq.getQuery();
} else {
return q;
}
}
void testExpectedAccelerationQuery(String regex, Query combinedQuery, Query expectedAccelerationQuery) throws ParseException { void testExpectedAccelerationQuery(String regex, Query combinedQuery, Query expectedAccelerationQuery) throws ParseException {
BooleanQuery cq = (BooleanQuery) combinedQuery; BooleanQuery cq = (BooleanQuery) unwrapAnyConstantScore(combinedQuery);
assert cq.clauses().size() == 2; assert cq.clauses().size() == 2;
Query approximationQuery = null; Query approximationQuery = null;
boolean verifyQueryFound = false; boolean verifyQueryFound = false;