ESQL: Push more ==s on text fields to lucene (#126641)

If you do:
```
| WHERE text_field == "cat"
```
we can't push to the text field because it's search index is for
individual words. But most text fields have a `.keyword` sub field and
we *can* query it's index. EXCEPT! It's normal for these fields to have
`ignore_above` in their mapping. In that case we don't push to the
field. Very sad.

With this change we can push down `==`, but only when the right hand
side is shorter than the `ignore_above`.

This has pretty much infinite speed gain. An example using a million
documents:
```
Before:  "took" : 391,
 After:  "took" :   4,
```

But this is going from totally un-indexed linear scans to totally
indexed. You can make the "Before" number as high as you want by loading
more data.
This commit is contained in:
Nik Everett 2025-04-22 15:24:59 -04:00 committed by GitHub
parent 09541c596a
commit b527e4b79e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
49 changed files with 747 additions and 96 deletions

View file

@ -0,0 +1,5 @@
pr: 126641
summary: Push more `==`s on text fields to lucene
area: ES|QL
type: enhancement
issues: []

View file

@ -983,6 +983,21 @@ public final class TextFieldMapper extends FieldMapper {
&& syntheticSourceDelegate.isIndexed();
}
/**
* Returns true if the delegate sub-field can be used for querying only (ie. isIndexed must be true)
*/
public boolean canUseSyntheticSourceDelegateForQueryingEquality(String str) {
if (syntheticSourceDelegate == null
// Can't push equality to an index if there isn't an index
|| syntheticSourceDelegate.isIndexed() == false
// ESQL needs docs values to push equality
|| syntheticSourceDelegate.hasDocValues() == false) {
return false;
}
// Can't push equality if the field we're checking for is so big we'd ignore it.
return str.length() <= syntheticSourceDelegate.ignoreAbove();
}
@Override
public BlockLoader blockLoader(BlockLoaderContext blContext) {
if (canUseSyntheticSourceDelegateForLoading()) {

View file

@ -0,0 +1,168 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.esql.qa.single_node;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
import org.elasticsearch.client.Request;
import org.elasticsearch.client.Response;
import org.elasticsearch.test.ListMatcher;
import org.elasticsearch.test.MapMatcher;
import org.elasticsearch.test.TestClustersThreadFilter;
import org.elasticsearch.test.cluster.ElasticsearchCluster;
import org.elasticsearch.test.rest.ESRestTestCase;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xpack.esql.AssertWarnings;
import org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase;
import org.junit.ClassRule;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import static org.elasticsearch.test.ListMatcher.matchesList;
import static org.elasticsearch.test.MapMatcher.assertMap;
import static org.elasticsearch.test.MapMatcher.matchesMap;
import static org.elasticsearch.xpack.esql.EsqlTestUtils.entityToMap;
import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.requestObjectBuilder;
import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.runEsql;
import static org.elasticsearch.xpack.esql.qa.single_node.RestEsqlIT.commonProfile;
import static org.elasticsearch.xpack.esql.qa.single_node.RestEsqlIT.fixTypesOnProfile;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.startsWith;
/**
* Tests for pushing queries to lucene.
*/
@ThreadLeakFilters(filters = TestClustersThreadFilter.class)
public class PushQueriesIT extends ESRestTestCase {
@ClassRule
public static ElasticsearchCluster cluster = Clusters.testCluster();
public void testPushEqualityOnDefaults() throws IOException {
String value = "v".repeat(between(0, 256));
testPushQuery(value, """
FROM test
| WHERE test == "%value"
""", "#test.keyword:%value -_ignored:test.keyword", false);
}
public void testPushEqualityOnDefaultsTooBigToPush() throws IOException {
String value = "a".repeat(between(257, 1000));
testPushQuery(value, """
FROM test
| WHERE test == "%value"
""", "*:*", true);
}
public void testPushCaseInsensitiveEqualityOnDefaults() throws IOException {
String value = "a".repeat(between(0, 256));
testPushQuery(value, """
FROM test
| WHERE TO_LOWER(test) == "%value"
""", "*:*", true);
}
private void testPushQuery(String value, String esqlQuery, String luceneQuery, boolean filterInCompute) throws IOException {
indexValue(value);
RestEsqlTestCase.RequestObjectBuilder builder = requestObjectBuilder().query(
esqlQuery.replaceAll("%value", value) + "\n| KEEP test"
);
builder.profile(true);
Map<String, Object> result = runEsql(builder, new AssertWarnings.NoWarnings(), RestEsqlTestCase.Mode.SYNC);
assertResultMap(
result,
getResultMatcher(result).entry(
"profile",
matchesMap().entry("drivers", instanceOf(List.class))
.entry("planning", matchesMap().extraOk())
.entry("query", matchesMap().extraOk())
),
matchesList().item(matchesMap().entry("name", "test").entry("type", "text")),
equalTo(List.of(List.of(value)))
);
@SuppressWarnings("unchecked")
List<Map<String, Object>> profiles = (List<Map<String, Object>>) ((Map<String, Object>) result.get("profile")).get("drivers");
for (Map<String, Object> p : profiles) {
fixTypesOnProfile(p);
assertThat(p, commonProfile());
List<String> sig = new ArrayList<>();
@SuppressWarnings("unchecked")
List<Map<String, Object>> operators = (List<Map<String, Object>>) p.get("operators");
for (Map<String, Object> o : operators) {
sig.add(checkOperatorProfile(o, luceneQuery.replaceAll("%value", value)));
}
String description = p.get("description").toString();
switch (description) {
case "data" -> {
ListMatcher matcher = matchesList().item("LuceneSourceOperator").item("ValuesSourceReaderOperator");
if (filterInCompute) {
matcher = matcher.item("FilterOperator").item("LimitOperator");
}
matcher = matcher.item("ProjectOperator").item("ExchangeSinkOperator");
assertMap(sig, matcher);
}
case "node_reduce" -> assertMap(sig, matchesList().item("ExchangeSourceOperator").item("ExchangeSinkOperator"));
case "final" -> assertMap(
sig,
matchesList().item("ExchangeSourceOperator").item("LimitOperator").item("ProjectOperator").item("OutputOperator")
);
default -> throw new IllegalArgumentException("can't match " + description);
}
}
}
private void indexValue(String value) throws IOException {
Request createIndex = new Request("PUT", "test");
createIndex.setJsonEntity("""
{
"settings": {
"index": {
"number_of_shards": 1
}
}
}""");
Response createResponse = client().performRequest(createIndex);
assertThat(
entityToMap(createResponse.getEntity(), XContentType.JSON),
matchesMap().entry("shards_acknowledged", true).entry("index", "test").entry("acknowledged", true)
);
Request bulk = new Request("POST", "/_bulk");
bulk.addParameter("refresh", "");
bulk.setJsonEntity(String.format("""
{"create":{"_index":"test"}}
{"test":"%s"}
""", value));
Response bulkResponse = client().performRequest(bulk);
assertThat(entityToMap(bulkResponse.getEntity(), XContentType.JSON), matchesMap().entry("errors", false).extraOk());
}
private static final Pattern TO_NAME = Pattern.compile("\\[.+", Pattern.DOTALL);
private static String checkOperatorProfile(Map<String, Object> o, String query) {
String name = (String) o.get("operator");
name = TO_NAME.matcher(name).replaceAll("");
if (name.equals("LuceneSourceOperator")) {
MapMatcher expectedOp = matchesMap().entry("operator", startsWith(name))
.entry("status", matchesMap().entry("processed_queries", List.of(query)).extraOk());
assertMap(o, expectedOp);
}
return name;
}
@Override
protected String getTestRestCluster() {
return cluster.getHttpAddresses();
}
}

View file

@ -648,7 +648,7 @@ public class RestEsqlIT extends RestEsqlTestCase {
}
}
private MapMatcher commonProfile() {
static MapMatcher commonProfile() {
return matchesMap() //
.entry("description", any(String.class))
.entry("cluster_name", any(String.class))
@ -669,7 +669,7 @@ public class RestEsqlIT extends RestEsqlTestCase {
* come back as integers and sometimes longs. This just promotes
* them to long every time.
*/
private void fixTypesOnProfile(Map<String, Object> profile) {
static void fixTypesOnProfile(Map<String, Object> profile) {
profile.put("iterations", ((Number) profile.get("iterations")).longValue());
profile.put("cpu_nanos", ((Number) profile.get("cpu_nanos")).longValue());
profile.put("took_nanos", ((Number) profile.get("took_nanos")).longValue());

View file

@ -1235,7 +1235,8 @@ public abstract class RestEsqlTestCase extends ESRestTestCase {
return runEsqlAsync(requestObject, randomBoolean(), new AssertWarnings.NoWarnings());
}
static Map<String, Object> runEsql(RequestObjectBuilder requestObject, AssertWarnings assertWarnings, Mode mode) throws IOException {
public static Map<String, Object> runEsql(RequestObjectBuilder requestObject, AssertWarnings assertWarnings, Mode mode)
throws IOException {
if (mode == ASYNC) {
return runEsqlAsync(requestObject, randomBoolean(), assertWarnings);
} else {

View file

@ -139,6 +139,7 @@ public class CsvTestsDataLoader {
private static final TestDataset BOOKS = new TestDataset("books").withSetting("books-settings.json");
private static final TestDataset SEMANTIC_TEXT = new TestDataset("semantic_text").withInferenceEndpoint(true);
private static final TestDataset LOGS = new TestDataset("logs");
private static final TestDataset MV_TEXT = new TestDataset("mv_text");
public static final Map<String, TestDataset> CSV_DATASET_MAP = Map.ofEntries(
Map.entry(EMPLOYEES.indexName, EMPLOYEES),
@ -196,7 +197,8 @@ public class CsvTestsDataLoader {
Map.entry(ADDRESSES.indexName, ADDRESSES),
Map.entry(BOOKS.indexName, BOOKS),
Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT),
Map.entry(LOGS.indexName, LOGS)
Map.entry(LOGS.indexName, LOGS),
Map.entry(MV_TEXT.indexName, MV_TEXT)
);
private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json");

View file

@ -294,6 +294,11 @@ public final class EsqlTestUtils {
public boolean isSingleValue(String field) {
return false;
}
@Override
public boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value) {
return false;
}
}
/**

View file

@ -0,0 +1,5 @@
@timestamp:date ,message:text
2023-10-23T13:55:01.543Z,[Connected to 10.1.0.1, Banana]
2023-10-23T13:55:01.544Z,Connected to 10.1.0.1
2023-10-23T13:55:01.545Z,[Connected to 10.1.0.1, More than one hundred characters long so it isn't indexed by the sub keyword field with ignore_above:100]
2023-10-23T13:55:01.546Z,More than one hundred characters long so it isn't indexed by the sub keyword field with ignore_above:100
1 @timestamp:date ,message:text
2 2023-10-23T13:55:01.543Z,[Connected to 10.1.0.1, Banana]
3 2023-10-23T13:55:01.544Z,Connected to 10.1.0.1
4 2023-10-23T13:55:01.545Z,[Connected to 10.1.0.1, More than one hundred characters long so it isn't indexed by the sub keyword field with ignore_above:100]
5 2023-10-23T13:55:01.546Z,More than one hundred characters long so it isn't indexed by the sub keyword field with ignore_above:100

View file

@ -0,0 +1,42 @@
{
"properties" : {
"emp_no" : {
"type" : "integer"
},
"first_name" : {
"type" : "keyword"
},
"gender" : {
"type" : "text"
},
"languages" : {
"type" : "byte"
},
"last_name" : {
"type" : "keyword"
},
"salary" : {
"type" : "integer"
},
"_meta_field": {
"type" : "keyword"
},
"hire_date": {
"type": "date"
},
"job": {
"type": "text",
"fields": {
"raw": {
"type": "keyword",
"ignore_above": 4
}
}
},
"long_noidx": {
"type": "long",
"index": false,
"doc_values": false
}
}
}

View file

@ -0,0 +1,16 @@
{
"properties": {
"@timestamp": {
"type": "date"
},
"message": {
"type": "text",
"fields": {
"raw": {
"type": "keyword",
"ignore_above": 100
}
}
}
}
}

View file

@ -2308,3 +2308,27 @@ message:keyword
foo ( bar
// end::rlikeEscapingTripleQuotes-result[]
;
mvStringEquals
FROM mv_text
| WHERE message == "Connected to 10.1.0.1"
| KEEP @timestamp, message
;
warning:Line 2:9: evaluation of [message == \"Connected to 10.1.0.1\"] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:9: java.lang.IllegalArgumentException: single-value function encountered multi-value
@timestamp:date | message:text
2023-10-23T13:55:01.544Z|Connected to 10.1.0.1
;
mvStringEqualsLongString
FROM mv_text
| WHERE message == "More than one hundred characters long so it isn't indexed by the sub keyword field with ignore_above:100"
| KEEP @timestamp, message
;
warning:Line 2:9: evaluation of [message == \"More than one hundred characters long so it isn't indexed by the sub keyword field with ignore_above:100\"] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:9: java.lang.IllegalArgumentException: single-value function encountered multi-value
@timestamp:date | message:text
2023-10-23T13:55:01.546Z|More than one hundred characters long so it isn't indexed by the sub keyword field with ignore_above:100
;

View file

@ -31,7 +31,7 @@ public interface TranslationAware {
* <p>and <b>not</b> this:</p>
* <p>{@code Query childQuery = child.asQuery(handler);}</p>
*/
Query asQuery(TranslatorHandler handler);
Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler);
/**
* Subinterface for expressions that can only process single values (and null out on MVs).

View file

@ -162,7 +162,7 @@ public abstract class FullTextFunction extends Function
}
@Override
public Query asQuery(TranslatorHandler handler) {
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
return queryBuilder != null ? new TranslationAwareExpressionQuery(source(), queryBuilder) : translate(handler);
}

View file

@ -13,6 +13,7 @@ import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryRewriteContext;
import org.elasticsearch.index.query.Rewriteable;
import org.elasticsearch.xpack.esql.core.util.Holder;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
@ -76,7 +77,9 @@ public final class QueryBuilderResolver {
Holder<Boolean> updated = new Holder<>(false);
LogicalPlan newPlan = plan.transformExpressionsDown(FullTextFunction.class, f -> {
QueryBuilder builder = f.queryBuilder(), initial = builder;
builder = builder == null ? f.asQuery(TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder() : builder;
builder = builder == null
? f.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder()
: builder;
try {
builder = builder.rewrite(ctx);
} catch (IOException e) {

View file

@ -184,7 +184,7 @@ public class CIDRMatch extends EsqlScalarFunction implements TranslationAware.Si
}
@Override
public Query asQuery(TranslatorHandler handler) {
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
var fa = LucenePushdownPredicates.checkIsFieldAttribute(ipField);
Check.isTrue(Expressions.foldable(matches), "Expected foldable matches, but got [{}]", matches);

View file

@ -186,7 +186,7 @@ public abstract class SpatialRelatesFunction extends BinarySpatialFunction
}
@Override
public Query asQuery(TranslatorHandler handler) {
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
if (left().foldable()) {
checkSpatialRelatesFunction(left(), queryRelation());
return translate(handler, right(), left());

View file

@ -144,7 +144,7 @@ public class EndsWith extends EsqlScalarFunction implements TranslationAware.Sin
}
@Override
public Query asQuery(TranslatorHandler handler) {
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
LucenePushdownPredicates.checkIsPushableAttribute(str);
var fieldName = handler.nameOf(str instanceof FieldAttribute fa ? fa.exactAttribute() : str);

View file

@ -112,7 +112,7 @@ public class RLike extends org.elasticsearch.xpack.esql.core.expression.predicat
}
@Override
public Query asQuery(TranslatorHandler handler) {
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
var fa = LucenePushdownPredicates.checkIsFieldAttribute(field());
// TODO: see whether escaping is needed
return new RegexQuery(source(), handler.nameOf(fa.exactAttribute()), pattern().asJavaRegex(), caseInsensitive());

View file

@ -141,7 +141,7 @@ public class StartsWith extends EsqlScalarFunction implements TranslationAware.S
}
@Override
public Query asQuery(TranslatorHandler handler) {
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
LucenePushdownPredicates.checkIsPushableAttribute(str);
var fieldName = handler.nameOf(str instanceof FieldAttribute fa ? fa.exactAttribute() : str);

View file

@ -124,7 +124,7 @@ public class WildcardLike extends org.elasticsearch.xpack.esql.core.expression.p
}
@Override
public Query asQuery(TranslatorHandler handler) {
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
var field = field();
LucenePushdownPredicates.checkIsPushableAttribute(field);
return translateField(handler.nameOf(field instanceof FieldAttribute fa ? fa.exactAttribute() : field));

View file

@ -220,7 +220,7 @@ public class Range extends ScalarFunction implements TranslationAware.SingleValu
}
@Override
public Query asQuery(TranslatorHandler handler) {
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
return translate(handler);
}

View file

@ -90,8 +90,13 @@ public abstract class BinaryLogic extends BinaryOperator<Boolean, Boolean, Boole
}
@Override
public Query asQuery(TranslatorHandler handler) {
return boolQuery(source(), handler.asQuery(left()), handler.asQuery(right()), this instanceof And);
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
return boolQuery(
source(),
handler.asQuery(pushdownPredicates, left()),
handler.asQuery(pushdownPredicates, right()),
this instanceof And
);
}
public static Query boolQuery(Source source, Query left, Query right, boolean isAnd) {

View file

@ -105,7 +105,7 @@ public class Not extends UnaryScalarFunction implements Negatable<Expression>, T
}
@Override
public Query asQuery(TranslatorHandler handler) {
return handler.asQuery(field()).negate(source());
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
return handler.asQuery(pushdownPredicates, field()).negate(source());
}
}

View file

@ -80,7 +80,7 @@ public class IsNotNull extends UnaryScalarFunction implements Negatable<UnarySca
}
@Override
public Query asQuery(TranslatorHandler handler) {
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
return new ExistsQuery(source(), handler.nameOf(field()));
}
}

View file

@ -81,7 +81,7 @@ public class IsNull extends UnaryScalarFunction implements Negatable<UnaryScalar
}
@Override
public Query asQuery(TranslatorHandler handler) {
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
return new NotQuery(source(), new ExistsQuery(source(), handler.nameOf(field())));
}
}

View file

@ -11,13 +11,20 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.time.DateUtils;
import org.elasticsearch.compute.ann.Evaluator;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.expression.predicate.Negatable;
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.Param;
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
import org.elasticsearch.xpack.esql.querydsl.query.EqualsSyntheticSourceDelegate;
import org.elasticsearch.xpack.esql.querydsl.query.SingleValueQuery;
import java.time.ZoneId;
import java.util.Map;
@ -120,6 +127,32 @@ public class Equals extends EsqlBinaryComparison implements Negatable<EsqlBinary
);
}
@Override
public boolean translatable(LucenePushdownPredicates pushdownPredicates) {
if (right() instanceof Literal lit) {
if (left().dataType() == DataType.TEXT && left() instanceof FieldAttribute fa) {
if (pushdownPredicates.canUseEqualityOnSyntheticSourceDelegate(fa, ((BytesRef) lit.value()).utf8ToString())) {
return true;
}
}
}
return super.translatable(pushdownPredicates);
}
@Override
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
if (right() instanceof Literal lit) {
if (left().dataType() == DataType.TEXT && left() instanceof FieldAttribute fa) {
String value = ((BytesRef) lit.value()).utf8ToString();
if (pushdownPredicates.canUseEqualityOnSyntheticSourceDelegate(fa, value)) {
String name = handler.nameOf(fa);
return new SingleValueQuery(new EqualsSyntheticSourceDelegate(source(), name, value), name, true);
}
}
}
return super.asQuery(pushdownPredicates, handler);
}
@Override
public String getWriteableName() {
return ENTRY.name;

View file

@ -356,7 +356,7 @@ public abstract class EsqlBinaryComparison extends BinaryComparison
* input to the operation.
*/
@Override
public Query asQuery(TranslatorHandler handler) {
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
Check.isTrue(
right().foldable(),
"Line {}:{}: Comparisons against fields are not (currently) supported; offender [{}] in [{}]",

View file

@ -466,11 +466,11 @@ public class In extends EsqlScalarFunction implements TranslationAware.SingleVal
}
@Override
public Query asQuery(TranslatorHandler handler) {
return translate(handler);
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
return translate(pushdownPredicates, handler);
}
private Query translate(TranslatorHandler handler) {
private Query translate(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
logger.trace("Attempting to generate lucene query for IN expression");
TypedAttribute attribute = LucenePushdownPredicates.checkIsPushableAttribute(value());
@ -483,7 +483,7 @@ public class In extends EsqlScalarFunction implements TranslationAware.SingleVal
// delegates to BinaryComparisons translator to ensure consistent handling of date and time values
// TODO:
// Query query = BinaryComparisons.translate(new Equals(in.source(), in.value(), rhs), handler);
Query query = handler.asQuery(new Equals(source(), value(), rhs));
Query query = handler.asQuery(pushdownPredicates, new Equals(source(), value(), rhs));
if (query instanceof TermQuery) {
terms.add(((TermQuery) query).value());

View file

@ -102,7 +102,7 @@ public class InsensitiveEquals extends InsensitiveBinaryComparison {
}
@Override
public Query asQuery(TranslatorHandler handler) {
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
checkInsensitiveComparison();
return translate();
}

View file

@ -47,6 +47,8 @@ public interface LucenePushdownPredicates {
*/
boolean isIndexed(FieldAttribute attr);
boolean canUseEqualityOnSyntheticSourceDelegate(FieldAttribute attr, String value);
/**
* We see fields as pushable if either they are aggregatable or they are indexed.
* This covers non-indexed cases like <code>AbstractScriptFieldType</code> which hard-coded <code>isAggregatable</code> to true,
@ -116,6 +118,11 @@ public interface LucenePushdownPredicates {
// TODO: This is the original behaviour, but is it correct? In FieldType isAggregatable usually only means hasDocValues
return attr.field().isAggregatable();
}
@Override
public boolean canUseEqualityOnSyntheticSourceDelegate(FieldAttribute attr, String value) {
return false;
}
};
/**
@ -141,6 +148,11 @@ public interface LucenePushdownPredicates {
public boolean isIndexed(FieldAttribute attr) {
return stats.isIndexed(attr.name());
}
@Override
public boolean canUseEqualityOnSyntheticSourceDelegate(FieldAttribute attr, String value) {
return stats.canUseEqualityOnSyntheticSourceDelegate(attr.field().getName(), value);
}
};
}
}

View file

@ -53,12 +53,13 @@ public class PushFiltersToSource extends PhysicalOptimizerRules.ParameterizedOpt
}
private static PhysicalPlan planFilterExec(FilterExec filterExec, EsQueryExec queryExec, LocalPhysicalOptimizerContext ctx) {
LucenePushdownPredicates pushdownPredicates = LucenePushdownPredicates.from(ctx.searchStats());
List<Expression> pushable = new ArrayList<>();
List<Expression> nonPushable = new ArrayList<>();
for (Expression exp : splitAnd(filterExec.condition())) {
(canPushToSource(exp, LucenePushdownPredicates.from(ctx.searchStats())) ? pushable : nonPushable).add(exp);
(canPushToSource(exp, pushdownPredicates) ? pushable : nonPushable).add(exp);
}
return rewrite(filterExec, queryExec, pushable, nonPushable, List.of());
return rewrite(pushdownPredicates, filterExec, queryExec, pushable, nonPushable, List.of());
}
private static PhysicalPlan planFilterExec(
@ -67,16 +68,17 @@ public class PushFiltersToSource extends PhysicalOptimizerRules.ParameterizedOpt
EsQueryExec queryExec,
LocalPhysicalOptimizerContext ctx
) {
LucenePushdownPredicates pushdownPredicates = LucenePushdownPredicates.from(ctx.searchStats());
AttributeMap<Attribute> aliasReplacedBy = getAliasReplacedBy(evalExec);
List<Expression> pushable = new ArrayList<>();
List<Expression> nonPushable = new ArrayList<>();
for (Expression exp : splitAnd(filterExec.condition())) {
Expression resExp = exp.transformUp(ReferenceAttribute.class, r -> aliasReplacedBy.resolve(r, r));
(canPushToSource(resExp, LucenePushdownPredicates.from(ctx.searchStats())) ? pushable : nonPushable).add(exp);
(canPushToSource(resExp, pushdownPredicates) ? pushable : nonPushable).add(exp);
}
// Replace field references with their actual field attributes
pushable.replaceAll(e -> e.transformDown(ReferenceAttribute.class, r -> aliasReplacedBy.resolve(r, r)));
return rewrite(filterExec, queryExec, pushable, nonPushable, evalExec.fields());
return rewrite(pushdownPredicates, filterExec, queryExec, pushable, nonPushable, evalExec.fields());
}
static AttributeMap<Attribute> getAliasReplacedBy(EvalExec evalExec) {
@ -90,6 +92,7 @@ public class PushFiltersToSource extends PhysicalOptimizerRules.ParameterizedOpt
}
private static PhysicalPlan rewrite(
LucenePushdownPredicates pushdownPredicates,
FilterExec filterExec,
EsQueryExec queryExec,
List<Expression> pushable,
@ -99,7 +102,7 @@ public class PushFiltersToSource extends PhysicalOptimizerRules.ParameterizedOpt
// Combine GT, GTE, LT and LTE in pushable to Range if possible
List<Expression> newPushable = combineEligiblePushableToRange(pushable);
if (newPushable.size() > 0) { // update the executable with pushable conditions
Query queryDSL = TRANSLATOR_HANDLER.asQuery(Predicates.combineAnd(newPushable));
Query queryDSL = TRANSLATOR_HANDLER.asQuery(pushdownPredicates, Predicates.combineAnd(newPushable));
QueryBuilder planQuery = queryDSL.toQueryBuilder();
Queries.Clause combiningQueryClauseType = queryExec.hasScoring() ? Queries.Clause.MUST : Queries.Clause.FILTER;
var query = Queries.combine(combiningQueryClauseType, asList(queryExec.query(), planQuery));

View file

@ -106,7 +106,7 @@ public class PushStatsToSource extends PhysicalOptimizerRules.ParameterizedOptim
if (canPushToSource(count.filter()) == false) {
return null; // can't push down
}
var countFilter = TRANSLATOR_HANDLER.asQuery(count.filter());
var countFilter = TRANSLATOR_HANDLER.asQuery(LucenePushdownPredicates.DEFAULT, count.filter());
query = Queries.combine(Queries.Clause.MUST, asList(countFilter.toQueryBuilder(), query));
}
return new EsStatsQueryExec.Stat(fieldName, COUNT, query);

View file

@ -33,6 +33,7 @@ import org.elasticsearch.xpack.esql.optimizer.LocalLogicalOptimizerContext;
import org.elasticsearch.xpack.esql.optimizer.LocalLogicalPlanOptimizer;
import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalOptimizerContext;
import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalPlanOptimizer;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.elasticsearch.xpack.esql.plan.QueryPlan;
import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
import org.elasticsearch.xpack.esql.plan.logical.Filter;
@ -250,7 +251,9 @@ public class PlannerUtils {
}
}
if (matches.isEmpty() == false) {
requestFilters.add(TRANSLATOR_HANDLER.asQuery(Predicates.combineAnd(matches)).toQueryBuilder());
requestFilters.add(
TRANSLATOR_HANDLER.asQuery(LucenePushdownPredicates.DEFAULT, Predicates.combineAnd(matches)).toQueryBuilder()
);
}
});
});

View file

@ -15,6 +15,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expressions;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute;
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.elasticsearch.xpack.esql.querydsl.query.SingleValueQuery;
/**
@ -29,9 +30,9 @@ public final class TranslatorHandler {
private TranslatorHandler() {}
public Query asQuery(Expression e) {
public Query asQuery(LucenePushdownPredicates predicates, Expression e) {
if (e instanceof TranslationAware ta) {
Query query = ta.asQuery(this);
Query query = ta.asQuery(predicates, this);
return ta instanceof TranslationAware.SingleValueTranslationAware sv ? wrapFunctionQuery(sv.singleValueField(), query) : query;
}
@ -39,9 +40,13 @@ public final class TranslatorHandler {
}
private static Query wrapFunctionQuery(Expression field, Query query) {
if (query instanceof SingleValueQuery) {
// Already wrapped
return query;
}
if (field instanceof FieldAttribute fa) {
fa = fa.getExactInfo().hasExact() ? fa.exactAttribute() : fa;
return new SingleValueQuery(query, fa.name());
return new SingleValueQuery(query, fa.name(), false);
}
if (field instanceof MetadataAttribute) {
return query; // MetadataAttributes are always single valued

View file

@ -0,0 +1,60 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.esql.querydsl.query;
import org.elasticsearch.TransportVersion;
import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.query.BaseTermQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
import org.elasticsearch.xpack.esql.core.tree.Source;
public class EqualsSyntheticSourceDelegate extends Query {
private final String fieldName;
private final String value;
public EqualsSyntheticSourceDelegate(Source source, String fieldName, String value) {
super(source);
this.fieldName = fieldName;
this.value = value;
}
@Override
protected QueryBuilder asBuilder() {
return new Builder(fieldName, value);
}
@Override
protected String innerToString() {
return fieldName + "(delegate):" + value;
}
private class Builder extends BaseTermQueryBuilder<Builder> {
private Builder(String name, String value) {
super(name, value);
}
@Override
protected org.apache.lucene.search.Query doToQuery(SearchExecutionContext context) {
TextFieldMapper.TextFieldType ft = (TextFieldMapper.TextFieldType) context.getFieldType(fieldName);
return ft.syntheticSourceDelegate().termQuery(value, context);
}
@Override
public String getWriteableName() {
return "equals_synthetic_source_delegate";
}
@Override
public TransportVersion getMinimalSupportedVersion() {
// This is just translated on the data node and not sent over the wire.
throw new UnsupportedOperationException();
}
}
}

View file

@ -11,6 +11,7 @@ import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.TermQuery;
import org.elasticsearch.TransportVersion;
import org.elasticsearch.TransportVersions;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
@ -19,7 +20,9 @@ import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.Warnings;
import org.elasticsearch.compute.querydsl.query.SingleValueMatchQuery;
import org.elasticsearch.index.mapper.IgnoredFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.query.AbstractQueryBuilder;
import org.elasticsearch.index.query.MatchNoneQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
@ -57,16 +60,28 @@ public class SingleValueQuery extends Query {
private final Query next;
private final String field;
private final boolean useSyntheticSourceDelegate;
public SingleValueQuery(Query next, String field) {
/**
* Build.
* @param next the query whose documents we should use for single-valued fields
* @param field the name of the field whose values to check
* @param useSyntheticSourceDelegate Should we check the field's synthetic source delegate (true)
* or it's values itself? If the field is a {@code text} field
* we often want to use its delegate.
*/
public SingleValueQuery(Query next, String field, boolean useSyntheticSourceDelegate) {
super(next.source());
this.next = next;
this.field = field;
this.useSyntheticSourceDelegate = useSyntheticSourceDelegate;
}
@Override
protected Builder asBuilder() {
return new Builder(next.toQueryBuilder(), field, next.source());
protected AbstractBuilder asBuilder() {
return useSyntheticSourceDelegate
? new SyntheticSourceDelegateBuilder(next.toQueryBuilder(), field, next.source())
: new Builder(next.toQueryBuilder(), field, next.source());
}
@Override
@ -76,7 +91,7 @@ public class SingleValueQuery extends Query {
@Override
public SingleValueQuery negate(Source source) {
return new SingleValueQuery(next.negate(source), field);
return new SingleValueQuery(next.negate(source), field, useSyntheticSourceDelegate);
}
@Override
@ -85,26 +100,28 @@ public class SingleValueQuery extends Query {
return false;
}
SingleValueQuery other = (SingleValueQuery) o;
return Objects.equals(next, other.next) && Objects.equals(field, other.field);
return Objects.equals(next, other.next)
&& Objects.equals(field, other.field)
&& useSyntheticSourceDelegate == other.useSyntheticSourceDelegate;
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), next, field);
return Objects.hash(super.hashCode(), next, field, useSyntheticSourceDelegate);
}
public static class Builder extends AbstractQueryBuilder<Builder> {
public abstract static class AbstractBuilder extends AbstractQueryBuilder<AbstractBuilder> {
private final QueryBuilder next;
private final String field;
private final Source source;
Builder(QueryBuilder next, String field, Source source) {
AbstractBuilder(QueryBuilder next, String field, Source source) {
this.next = next;
this.field = field;
this.source = source;
}
Builder(StreamInput in) throws IOException {
AbstractBuilder(StreamInput in) throws IOException {
super(in);
this.next = in.readNamedWriteable(QueryBuilder.class);
this.field = in.readString();
@ -126,7 +143,7 @@ public class SingleValueQuery extends Query {
}
@Override
protected void doWriteTo(StreamOutput out) throws IOException {
protected final void doWriteTo(StreamOutput out) throws IOException {
out.writeNamedWriteable(next);
out.writeString(field);
if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_16_0)) {
@ -148,6 +165,43 @@ public class SingleValueQuery extends Query {
return source;
}
protected abstract AbstractBuilder rewrite(QueryBuilder next);
@Override
protected final QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws IOException {
QueryBuilder rewritten = next.rewrite(queryRewriteContext);
if (rewritten instanceof MatchNoneQueryBuilder) {
return rewritten;
}
if (rewritten == next) {
return this;
}
return rewrite(rewritten);
}
@Override
protected final boolean doEquals(AbstractBuilder other) {
return next.equals(other.next) && field.equals(other.field);
}
@Override
protected final int doHashCode() {
return Objects.hash(next, field);
}
}
/**
* Builds a {@code bool} query combining the "next" query and a {@link SingleValueMatchQuery}.
*/
public static class Builder extends AbstractBuilder {
Builder(QueryBuilder next, String field, Source source) {
super(next, field, source);
}
Builder(StreamInput in) throws IOException {
super(in);
}
@Override
public String getWriteableName() {
return ENTRY.name;
@ -156,9 +210,9 @@ public class SingleValueQuery extends Query {
@Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(ENTRY.name);
builder.field("field", field);
builder.field("next", next, params);
builder.field("source", source.toString());
builder.field("field", field());
builder.field("next", next(), params);
builder.field("source", source().toString());
builder.endObject();
}
@ -168,52 +222,125 @@ public class SingleValueQuery extends Query {
}
@Override
protected org.apache.lucene.search.Query doToQuery(SearchExecutionContext context) throws IOException {
MappedFieldType ft = context.getFieldType(field);
protected final org.apache.lucene.search.Query doToQuery(SearchExecutionContext context) throws IOException {
MappedFieldType ft = context.getFieldType(field());
if (ft == null) {
return new MatchNoDocsQuery("missing field [" + field + "]");
return new MatchNoDocsQuery("missing field [" + field() + "]");
}
SingleValueMatchQuery singleValueQuery = new SingleValueMatchQuery(
context.getForField(ft, MappedFieldType.FielddataOperation.SEARCH),
Warnings.createWarnings(
DriverContext.WarningsMode.COLLECT,
source.source().getLineNumber(),
source.source().getColumnNumber(),
source.text()
source().source().getLineNumber(),
source().source().getColumnNumber(),
source().text()
),
"single-value function encountered multi-value"
);
org.apache.lucene.search.Query rewrite = singleValueQuery.rewrite(context.searcher());
if (rewrite instanceof MatchAllDocsQuery) {
// nothing to filter
return next.toQuery(context);
return next().toQuery(context);
}
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(next.toQuery(context), BooleanClause.Occur.FILTER);
builder.add(next().toQuery(context), BooleanClause.Occur.FILTER);
builder.add(rewrite, BooleanClause.Occur.FILTER);
return builder.build();
}
@Override
protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws IOException {
QueryBuilder rewritten = next.rewrite(queryRewriteContext);
if (rewritten instanceof MatchNoneQueryBuilder) {
return rewritten;
}
if (rewritten == next) {
return this;
}
return new Builder(rewritten, field, source);
protected AbstractBuilder rewrite(QueryBuilder next) {
return new Builder(next, field(), source());
}
}
/**
* Builds a {@code bool} query combining the "next" query, a {@link SingleValueMatchQuery},
* and a {@link TermQuery} making sure we didn't ignore any values. Three total queries.
* This is only used if the "next" query matches fields that would not be ignored. Read all
* the paragraphs below to understand it. It's tricky!
* <p>
* This is used in the case when you do {@code text_field == "foo"} and {@code text_field}
* has a {@code keyword} sub-field. See, {@code text} typed fields can't do our equality -
* they only do matching. But {@code keyword} fields *can* do the equality. In this case
* the "next" query is a {@link TermQuery} like {@code text_field.raw:foo}.
* </p>
* <p>
* But there's a big wrinkle! If you index a field longer than {@code ignore_above} into
* {@code text_field.raw} field then it'll drop its value on the floor. So the
* {@link SingleValueMatchQuery} isn't enough to emulate {@code ==}. You have to remove
* any matches that ignored a field. Luckily we have {@link IgnoredFieldMapper}! We can
* do a {@link TermQuery} like {@code NOT(_ignored:text_field.raw)} to filter those out.
* </p>
* <p>
* You may be asking, "how would the first {@code text_field.raw:foo} query work if the
* value we're searching for is very long?" In that case we never use this query at all.
* We have to delegate the filtering to the compute engine. No fancy lucene searches in
* that case.
* </p>
*/
public static class SyntheticSourceDelegateBuilder extends AbstractBuilder {
SyntheticSourceDelegateBuilder(QueryBuilder next, String field, Source source) {
super(next, field, source);
}
@Override
protected boolean doEquals(Builder other) {
return next.equals(other.next) && field.equals(other.field);
public String getWriteableName() {
throw new UnsupportedOperationException("Not serialized");
}
@Override
protected int doHashCode() {
return Objects.hash(next, field);
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(ENTRY.name);
builder.field("field", field() + ":synthetic_source_delegate");
builder.field("next", next(), params);
builder.field("source", source().toString());
builder.endObject();
}
@Override
public TransportVersion getMinimalSupportedVersion() {
throw new UnsupportedOperationException("Not serialized");
}
@Override
protected final org.apache.lucene.search.Query doToQuery(SearchExecutionContext context) throws IOException {
MappedFieldType ft = context.getFieldType(field());
if (ft == null) {
return new MatchNoDocsQuery("missing field [" + field() + "]");
}
ft = ((TextFieldMapper.TextFieldType) ft).syntheticSourceDelegate();
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(next().toQuery(context), BooleanClause.Occur.FILTER);
org.apache.lucene.search.Query singleValueQuery = new SingleValueMatchQuery(
context.getForField(ft, MappedFieldType.FielddataOperation.SEARCH),
Warnings.createWarnings(
DriverContext.WarningsMode.COLLECT,
source().source().getLineNumber(),
source().source().getColumnNumber(),
source().text()
),
"single-value function encountered multi-value"
);
singleValueQuery = singleValueQuery.rewrite(context.searcher());
if (singleValueQuery instanceof MatchAllDocsQuery == false) {
builder.add(singleValueQuery, BooleanClause.Occur.FILTER);
}
org.apache.lucene.search.Query ignored = new TermQuery(new org.apache.lucene.index.Term(IgnoredFieldMapper.NAME, ft.name()));
ignored = ignored.rewrite(context.searcher());
if (ignored instanceof MatchNoDocsQuery == false) {
builder.add(ignored, BooleanClause.Occur.MUST_NOT);
}
return builder.build();
}
@Override
protected AbstractBuilder rewrite(QueryBuilder next) {
return new Builder(next, field(), source());
}
}

View file

@ -292,6 +292,24 @@ public class SearchContextStats implements SearchStats {
return false;
}
@Override
public boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value) {
for (SearchExecutionContext ctx : contexts) {
MappedFieldType type = ctx.getFieldType(name);
if (type == null) {
return false;
}
if (type instanceof TextFieldMapper.TextFieldType t) {
if (t.canUseSyntheticSourceDelegateForQueryingEquality(value) == false) {
return false;
}
} else {
return false;
}
}
return true;
}
private interface DocCountTester {
Boolean test(LeafReader leafReader) throws IOException;
}

View file

@ -37,6 +37,8 @@ public interface SearchStats {
boolean isSingleValue(String field);
boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value);
/**
* When there are no search stats available, for example when there are no search contexts, we have static results.
*/
@ -92,5 +94,9 @@ public interface SearchStats {
return true;
}
@Override
public boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value) {
return false;
}
}
}

View file

@ -17,6 +17,7 @@ import org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase
import org.elasticsearch.xpack.esql.expression.function.ErrorsForCasesWithoutExamplesTestCase;
import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.hamcrest.Matcher;
import java.util.List;
@ -40,7 +41,7 @@ public class MatchErrorTests extends ErrorsForCasesWithoutExamplesTestCase {
// We need to add the QueryBuilder to the match expression, as it is used to implement equals() and hashCode() and
// thus test the serialization methods. But we can only do this if the parameters make sense .
if (args.get(0) instanceof FieldAttribute && args.get(1).foldable()) {
QueryBuilder queryBuilder = TRANSLATOR_HANDLER.asQuery(match).toQueryBuilder();
QueryBuilder queryBuilder = TRANSLATOR_HANDLER.asQuery(LucenePushdownPredicates.DEFAULT, match).toQueryBuilder();
match.replaceQueryBuilder(queryBuilder);
}
return match;

View file

@ -20,6 +20,7 @@ import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.expression.function.FunctionName;
import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import java.util.ArrayList;
import java.util.List;
@ -80,7 +81,7 @@ public class MatchTests extends AbstractMatchFullTextFunctionTests {
// We need to add the QueryBuilder to the match expression, as it is used to implement equals() and hashCode() and
// thus test the serialization methods. But we can only do this if the parameters make sense .
if (args.get(0) instanceof FieldAttribute && args.get(1).foldable()) {
QueryBuilder queryBuilder = TRANSLATOR_HANDLER.asQuery(match).toQueryBuilder();
QueryBuilder queryBuilder = TRANSLATOR_HANDLER.asQuery(LucenePushdownPredicates.DEFAULT, match).toQueryBuilder();
match.replaceQueryBuilder(queryBuilder);
}
return match;

View file

@ -15,6 +15,7 @@ import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.expression.function.FunctionName;
import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import java.util.List;
import java.util.function.Supplier;
@ -38,7 +39,7 @@ public class MultiMatchTests extends MatchTests {
// We need to add the QueryBuilder to the multi_match expression, as it is used to implement equals() and hashCode() and
// thus test the serialization methods. But we can only do this if the parameters make sense .
if (mm.query().foldable() && mm.fields().stream().allMatch(field -> field instanceof FieldAttribute)) {
QueryBuilder queryBuilder = TRANSLATOR_HANDLER.asQuery(mm).toQueryBuilder();
QueryBuilder queryBuilder = TRANSLATOR_HANDLER.asQuery(LucenePushdownPredicates.DEFAULT, mm).toQueryBuilder();
mm.replaceQueryBuilder(queryBuilder);
}
return mm;

View file

@ -19,6 +19,7 @@ import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.expression.function.FunctionName;
import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import java.util.ArrayList;
import java.util.List;
@ -79,7 +80,7 @@ public class QueryStringTests extends NoneFieldFullTextFunctionTestCase {
// We need to add the QueryBuilder to the match expression, as it is used to implement equals() and hashCode() and
// thus test the serialization methods. But we can only do this if the parameters make sense .
if (args.get(0).foldable()) {
QueryBuilder queryBuilder = TRANSLATOR_HANDLER.asQuery(qstr).toQueryBuilder();
QueryBuilder queryBuilder = TRANSLATOR_HANDLER.asQuery(LucenePushdownPredicates.DEFAULT, qstr).toQueryBuilder();
qstr.replaceQueryBuilder(queryBuilder);
}
return qstr;

View file

@ -135,7 +135,7 @@ public class EndsWithTests extends AbstractScalarFunctionTestCase {
assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(true));
var query = function.asQuery(TranslatorHandler.TRANSLATOR_HANDLER);
var query = function.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER);
assertThat(query, equalTo(new WildcardQuery(Source.EMPTY, "field", "*a\\*b\\?c\\\\")));
}

View file

@ -95,7 +95,7 @@ public class StartsWithTests extends AbstractScalarFunctionTestCase {
assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(true));
var query = function.asQuery(TranslatorHandler.TRANSLATOR_HANDLER);
var query = function.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER);
assertThat(query, equalTo(new WildcardQuery(Source.EMPTY, "field", "a\\*b\\?c\\\\*")));
}

View file

@ -22,6 +22,7 @@ import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.type.EsField;
import org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase;
import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
import org.junit.AfterClass;
@ -93,7 +94,7 @@ public class InTests extends AbstractFunctionTestCase {
new FieldAttribute(Source.EMPTY, "field", new EsField("suffix", DataType.KEYWORD, Map.of(), true)),
Arrays.asList(ONE, new Literal(Source.EMPTY, null, randomFrom(DataType.types())), THREE)
);
var query = in.asQuery(TranslatorHandler.TRANSLATOR_HANDLER);
var query = in.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER);
assertEquals(new TermsQuery(EMPTY, "field", Set.of(1, 3)), query);
}

View file

@ -134,6 +134,7 @@ import org.elasticsearch.xpack.esql.planner.LocalExecutionPlanner;
import org.elasticsearch.xpack.esql.planner.PlannerUtils;
import org.elasticsearch.xpack.esql.planner.mapper.Mapper;
import org.elasticsearch.xpack.esql.plugin.QueryPragmas;
import org.elasticsearch.xpack.esql.querydsl.query.EqualsSyntheticSourceDelegate;
import org.elasticsearch.xpack.esql.querydsl.query.SingleValueQuery;
import org.elasticsearch.xpack.esql.querydsl.query.SpatialRelatesQuery;
import org.elasticsearch.xpack.esql.session.Configuration;
@ -216,6 +217,7 @@ public class PhysicalPlanOptimizerTests extends ESTestCase {
private PhysicalPlanOptimizer physicalPlanOptimizer;
private Mapper mapper;
private TestDataSource testData;
private TestDataSource testDataLimitedRaw;
private int allFieldRowSize; // TODO: Move this into testDataSource so tests that load other indexes can also assert on this
private TestDataSource airports;
private TestDataSource airportsNoDocValues; // Test when spatial field is indexed but has no doc values
@ -236,7 +238,7 @@ public class PhysicalPlanOptimizerTests extends ESTestCase {
private record TestDataSource(Map<String, EsField> mapping, EsIndex index, Analyzer analyzer, SearchStats stats) {}
@ParametersFactory(argumentFormatting = PARAM_FORMATTING)
public static List<Object[]> readScriptSpec() {
public static List<Object[]> params() {
return settings().stream().map(t -> {
var settings = Settings.builder().loadFromMap(t.v2()).build();
return new Object[] { t.v1(), configuration(new QueryPragmas(settings)) };
@ -261,6 +263,7 @@ public class PhysicalPlanOptimizerTests extends ESTestCase {
var enrichResolution = setupEnrichResolution();
// Most tests used data from the test index, so we load it here, and use it in the plan() function.
this.testData = makeTestDataSource("test", "mapping-basic.json", functionRegistry, enrichResolution);
this.testDataLimitedRaw = makeTestDataSource("test", "mapping-basic-limited-raw.json", functionRegistry, enrichResolution);
allFieldRowSize = testData.mapping.values()
.stream()
.mapToInt(
@ -7789,6 +7792,35 @@ public class PhysicalPlanOptimizerTests extends ESTestCase {
assertThat(reductionAggs.estimatedRowSize(), equalTo(58)); // double and keyword
}
public void testEqualsPushdownToDelegate() {
var optimized = optimizedPlan(physicalPlan("""
FROM test
| WHERE job == "v"
""", testDataLimitedRaw), SEARCH_STATS_SHORT_DELEGATES);
var limit = as(optimized, LimitExec.class);
var exchange = as(limit.child(), ExchangeExec.class);
var project = as(exchange.child(), ProjectExec.class);
var extract = as(project.child(), FieldExtractExec.class);
var query = as(extract.child(), EsQueryExec.class);
assertThat(
query.query(),
equalTo(new SingleValueQuery(new EqualsSyntheticSourceDelegate(Source.EMPTY, "job", "v"), "job", true).toQueryBuilder())
);
}
public void testEqualsPushdownToDelegateTooBig() {
var optimized = optimizedPlan(physicalPlan("""
FROM test
| WHERE job == "too_long"
""", testDataLimitedRaw), SEARCH_STATS_SHORT_DELEGATES);
var limit = as(optimized, LimitExec.class);
var exchange = as(limit.child(), ExchangeExec.class);
var project = as(exchange.child(), ProjectExec.class);
var extract = as(project.child(), FieldExtractExec.class);
var limit2 = as(extract.child(), LimitExec.class);
as(limit2.child(), FilterExec.class);
}
@SuppressWarnings("SameParameterValue")
private static void assertFilterCondition(
Filter filter,
@ -8002,4 +8034,16 @@ public class PhysicalPlanOptimizerTests extends ESTestCase {
protected List<String> filteredWarnings() {
return withDefaultLimitWarning(super.filteredWarnings());
}
private static final SearchStats SEARCH_STATS_SHORT_DELEGATES = new EsqlTestUtils.TestSearchStats() {
@Override
public boolean hasExactSubfield(String field) {
return false;
}
@Override
public boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value) {
return value.length() < 4;
}
};
}

View file

@ -21,12 +21,16 @@ import static org.hamcrest.Matchers.equalTo;
*/
public class SingleValueQueryNegateTests extends ESTestCase {
public void testNot() {
var sv = new SingleValueQuery(new MatchAll(Source.EMPTY), "foo");
assertThat(sv.negate(Source.EMPTY), equalTo(new SingleValueQuery(new NotQuery(Source.EMPTY, new MatchAll(Source.EMPTY)), "foo")));
boolean useSyntheticSourceDelegate = randomBoolean();
var sv = new SingleValueQuery(new MatchAll(Source.EMPTY), "foo", useSyntheticSourceDelegate);
assertThat(
sv.negate(Source.EMPTY),
equalTo(new SingleValueQuery(new NotQuery(Source.EMPTY, new MatchAll(Source.EMPTY)), "foo", useSyntheticSourceDelegate))
);
}
public void testNotNot() {
var sv = new SingleValueQuery(new MatchAll(Source.EMPTY), "foo");
var sv = new SingleValueQuery(new MatchAll(Source.EMPTY), "foo", randomBoolean());
assertThat(sv.negate(Source.EMPTY).negate(Source.EMPTY), equalTo(sv));
}
}

View file

@ -22,15 +22,13 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.MapperServiceTestCase;
import org.elasticsearch.index.query.MatchNoneQueryBuilder;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xpack.esql.core.querydsl.query.MatchAll;
import org.elasticsearch.xpack.esql.core.querydsl.query.RangeQuery;
import org.elasticsearch.xpack.esql.core.querydsl.query.TermQuery;
import org.elasticsearch.xpack.esql.core.tree.Source;
import java.io.IOException;
@ -46,12 +44,14 @@ public class SingleValueQueryTests extends MapperServiceTestCase {
XContentBuilder mapping(XContentBuilder builder) throws IOException;
List<List<Object>> build(RandomIndexWriter iw) throws IOException;
boolean useSyntheticSourceDelegate();
}
@ParametersFactory
public static List<Object[]> params() {
List<Object[]> params = new ArrayList<>();
for (String fieldType : new String[] { "long", "integer", "short", "byte", "double", "float", "keyword" }) {
for (String fieldType : new String[] { "long", "integer", "short", "byte", "double", "float", "keyword", "text" }) {
for (boolean multivaluedField : new boolean[] { true, false }) {
for (boolean allowEmpty : new boolean[] { true, false }) {
params.add(new Object[] { new StandardSetup(fieldType, multivaluedField, allowEmpty, 100) });
@ -69,45 +69,54 @@ public class SingleValueQueryTests extends MapperServiceTestCase {
}
public void testMatchAll() throws IOException {
testCase(new SingleValueQuery(new MatchAll(Source.EMPTY), "foo").asBuilder(), this::runCase);
testCase(new SingleValueQuery(new MatchAll(Source.EMPTY), "foo", setup.useSyntheticSourceDelegate()).asBuilder(), this::runCase);
}
public void testMatchSome() throws IOException {
int max = between(1, 100);
testCase(
new SingleValueQuery.Builder(new RangeQueryBuilder("i").lt(max), "foo", Source.EMPTY),
new SingleValueQuery(
new RangeQuery(Source.EMPTY, "i", null, false, max, false, randomZone()),
"foo",
setup.useSyntheticSourceDelegate()
).asBuilder(),
(fieldValues, count) -> runCase(fieldValues, count, null, max)
);
}
public void testSubPhrase() throws IOException {
testCase(new SingleValueQuery.Builder(new MatchPhraseQueryBuilder("str", "fox jumped"), "foo", Source.EMPTY), this::runCase);
SingleValueQuery.AbstractBuilder builder = setup.useSyntheticSourceDelegate()
? new SingleValueQuery.SyntheticSourceDelegateBuilder(new MatchPhraseQueryBuilder("str", "fox jumped"), "foo", Source.EMPTY)
: new SingleValueQuery.Builder(new MatchPhraseQueryBuilder("str", "fox jumped"), "foo", Source.EMPTY);
testCase(builder, this::runCase);
}
public void testMatchNone() throws IOException {
testCase(
new SingleValueQuery.Builder(new MatchNoneQueryBuilder(), "foo", Source.EMPTY),
new SingleValueQuery(new MatchAll(Source.EMPTY).negate(Source.EMPTY), "foo", setup.useSyntheticSourceDelegate()).asBuilder(),
(fieldValues, count) -> assertThat(count, equalTo(0))
);
}
public void testRewritesToMatchNone() throws IOException {
testCase(
new SingleValueQuery.Builder(new TermQueryBuilder("missing", 0), "foo", Source.EMPTY),
new SingleValueQuery(new TermQuery(Source.EMPTY, "missing", 0), "foo", setup.useSyntheticSourceDelegate()).asBuilder(),
(fieldValues, count) -> assertThat(count, equalTo(0))
);
}
public void testNotMatchAll() throws IOException {
testCase(
new SingleValueQuery(new MatchAll(Source.EMPTY), "foo").negate(Source.EMPTY).asBuilder(),
new SingleValueQuery(new MatchAll(Source.EMPTY), "foo", setup.useSyntheticSourceDelegate()).negate(Source.EMPTY).asBuilder(),
(fieldValues, count) -> assertThat(count, equalTo(0))
);
}
public void testNotMatchNone() throws IOException {
testCase(
new SingleValueQuery(new MatchAll(Source.EMPTY).negate(Source.EMPTY), "foo").negate(Source.EMPTY).asBuilder(),
new SingleValueQuery(new MatchAll(Source.EMPTY).negate(Source.EMPTY), "foo", setup.useSyntheticSourceDelegate()).negate(
Source.EMPTY
).asBuilder(),
this::runCase
);
}
@ -115,7 +124,11 @@ public class SingleValueQueryTests extends MapperServiceTestCase {
public void testNotMatchSome() throws IOException {
int max = between(1, 100);
testCase(
new SingleValueQuery(new RangeQuery(Source.EMPTY, "i", null, false, max, false, null), "foo").negate(Source.EMPTY).asBuilder(),
new SingleValueQuery(
new RangeQuery(Source.EMPTY, "i", null, false, max, false, null),
"foo",
setup.useSyntheticSourceDelegate()
).negate(Source.EMPTY).asBuilder(),
(fieldValues, count) -> runCase(fieldValues, count, max, 100)
);
}
@ -161,7 +174,7 @@ public class SingleValueQueryTests extends MapperServiceTestCase {
runCase(fieldValues, count, null, null);
}
private void testCase(SingleValueQuery.Builder builder, TestCase testCase) throws IOException {
private void testCase(SingleValueQuery.AbstractBuilder builder, TestCase testCase) throws IOException {
MapperService mapper = createMapperService(mapping(setup::mapping));
try (Directory d = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), d)) {
List<List<Object>> fieldValues = setup.build(iw);
@ -185,7 +198,13 @@ public class SingleValueQueryTests extends MapperServiceTestCase {
public XContentBuilder mapping(XContentBuilder builder) throws IOException {
builder.startObject("i").field("type", "long").endObject();
builder.startObject("str").field("type", "text").endObject();
return builder.startObject("foo").field("type", fieldType).endObject();
builder.startObject("foo").field("type", fieldType);
if (fieldType.equals("text")) {
builder.startObject("fields");
builder.startObject("raw").field("type", "keyword").field("ignore_above", 256).endObject();
builder.endObject();
}
return builder.endObject();
}
@Override
@ -199,6 +218,11 @@ public class SingleValueQueryTests extends MapperServiceTestCase {
return fieldValues;
}
@Override
public boolean useSyntheticSourceDelegate() {
return fieldType.equals("text");
}
private List<Object> values(int i) {
// i == 10 forces at least one multivalued field when we're configured for multivalued fields
boolean makeMultivalued = multivaluedField && (i == 10 || randomBoolean());
@ -225,7 +249,7 @@ public class SingleValueQueryTests extends MapperServiceTestCase {
case "byte" -> randomByte();
case "double" -> randomDouble();
case "float" -> randomFloat();
case "keyword" -> randomAlphaOfLength(5);
case "keyword", "text" -> randomAlphaOfLength(5);
default -> throw new UnsupportedOperationException();
};
}
@ -252,6 +276,12 @@ public class SingleValueQueryTests extends MapperServiceTestCase {
fields.add(new KeywordField("foo", v.toString(), Field.Store.NO));
}
}
case "text" -> {
for (Object v : values) {
fields.add(new TextField("foo", v.toString(), Field.Store.NO));
fields.add(new KeywordField("foo.raw", v.toString(), Field.Store.NO));
}
}
default -> throw new UnsupportedOperationException();
}
return fields;
@ -279,5 +309,10 @@ public class SingleValueQueryTests extends MapperServiceTestCase {
}
return fieldValues;
}
@Override
public boolean useSyntheticSourceDelegate() {
return randomBoolean();
}
}
}

View file

@ -61,4 +61,9 @@ public class DisabledSearchStats implements SearchStats {
public boolean isSingleValue(String field) {
return false;
}
@Override
public boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value) {
return false;
}
}