From 2a44166a2c21fbd18cc63aaeef2b28a31c21af2c Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Mon, 2 Jun 2025 09:50:25 -0400 Subject: [PATCH] Applying Apache Lucene fix: https://github.com/apache/lucene/pull/14732 (#128671) * Applying Apache Lucene fix: https://github.com/apache/lucene/pull/14732 * fixing test * fixing annot --- ...eFieldMapperDocValuesSkipperBenchmark.java | 6 +- .../extras/ScaledFloatFieldTypeTests.java | 4 +- server/src/main/java/module-info.java | 1 + .../index/mapper/DateFieldMapper.java | 4 +- .../index/mapper/NumberFieldMapper.java | 11 +- .../lucene/document/NumericField.java | 52 ++ ...xSortSortedNumericDocValuesRangeQuery.java | 699 ++++++++++++++++++ .../bucket/filter/QueryToFilterAdapter.java | 6 +- .../index/mapper/DateFieldTypeTests.java | 6 +- .../index/mapper/NumberFieldTypeTests.java | 13 +- .../query/MatchPhraseQueryBuilderTests.java | 4 +- .../query/MultiMatchQueryBuilderTests.java | 4 +- .../index/query/TermQueryBuilderTests.java | 4 +- .../lucene/TimeSeriesSourceOperatorTests.java | 4 +- .../unsignedlong/UnsignedLongFieldMapper.java | 4 +- 15 files changed, 787 insertions(+), 35 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/lucene/document/NumericField.java create mode 100644 server/src/main/java/org/elasticsearch/lucene/search/XIndexSortSortedNumericDocValuesRangeQuery.java diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/query/range/DateFieldMapperDocValuesSkipperBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/query/range/DateFieldMapperDocValuesSkipperBenchmark.java index 2110d2646326..a732986ebcba 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/query/range/DateFieldMapperDocValuesSkipperBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/query/range/DateFieldMapperDocValuesSkipperBenchmark.java @@ -21,7 +21,6 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; @@ -29,6 +28,7 @@ import org.apache.lucene.search.SortedNumericSortField; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -295,7 +295,7 @@ public class DateFieldMapperDocValuesSkipperBenchmark { /** * Runs the actual Lucene range query, optionally combining a {@link LongPoint} index query * with doc values ({@link SortedNumericDocValuesField}) via {@link IndexOrDocValuesQuery}, - * and then wrapping it with an {@link IndexSortSortedNumericDocValuesRangeQuery} to utilize the index sort. + * and then wrapping it with an {@link XIndexSortSortedNumericDocValuesRangeQuery} to utilize the index sort. * * @param searcher the Lucene {@link IndexSearcher} * @param rangeStartTimestamp lower bound of the timestamp range @@ -316,7 +316,7 @@ public class DateFieldMapperDocValuesSkipperBenchmark { ) : SortedNumericDocValuesField.newSlowRangeQuery(TIMESTAMP_FIELD, rangeStartTimestamp, rangeEndTimestamp); - final Query query = new IndexSortSortedNumericDocValuesRangeQuery( + final Query query = new XIndexSortSortedNumericDocValuesRangeQuery( TIMESTAMP_FIELD, rangeStartTimestamp, rangeEndTimestamp, diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldTypeTests.java index f753d4b91f50..31a150a41b22 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldTypeTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldTypeTests.java @@ -11,7 +11,6 @@ package org.elasticsearch.index.mapper.extras; import org.apache.lucene.document.Document; import org.apache.lucene.document.DoublePoint; -import org.apache.lucene.document.LongField; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.DirectoryReader; @@ -31,6 +30,7 @@ import org.elasticsearch.index.mapper.FieldTypeTestCase; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperBuilderContext; import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.lucene.document.NumericField; import java.io.IOException; import java.util.Arrays; @@ -48,7 +48,7 @@ public class ScaledFloatFieldTypeTests extends FieldTypeTestCase { ); double value = (randomDouble() * 2 - 1) * 10000; long scaledValue = Math.round(value * ft.getScalingFactor()); - assertEquals(LongField.newExactQuery("scaled_float", scaledValue), ft.termQuery(value, MOCK_CONTEXT)); + assertEquals(NumericField.newExactLongQuery("scaled_float", scaledValue), ft.termQuery(value, MOCK_CONTEXT)); MappedFieldType ft2 = new ScaledFloatFieldMapper.ScaledFloatFieldType("scaled_float", 0.1 + randomDouble() * 100, false); ElasticsearchException e2 = expectThrows(ElasticsearchException.class, () -> ft2.termQuery("42", MOCK_CONTEXT_DISALLOW_EXPENSIVE)); diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 8da4f403c29b..c0411ea9182d 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -477,4 +477,5 @@ module org.elasticsearch.server { exports org.elasticsearch.plugins.internal.rewriter to org.elasticsearch.inference; exports org.elasticsearch.lucene.util.automaton; exports org.elasticsearch.index.codec.perfield; + exports org.elasticsearch.lucene.search; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java index 3511c8dc1932..db3b1e87fb66 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java @@ -22,7 +22,6 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PointValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.IndexOrDocValuesQuery; -import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.Query; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.geo.ShapeRelation; @@ -50,6 +49,7 @@ import org.elasticsearch.index.fielddata.plain.SortedNumericIndexFieldData; import org.elasticsearch.index.query.DateRangeIncludingNowQuery; import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; import org.elasticsearch.script.DateFieldScript; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptCompiler; @@ -750,7 +750,7 @@ public final class DateFieldMapper extends FieldMapper { query = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u); } if (hasDocValues() && context.indexSortedOnField(name())) { - query = new IndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query); + query = new XIndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query); } return query; }); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java index cc37f7b3f508..e263ebcfeced 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java @@ -24,7 +24,6 @@ import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.sandbox.document.HalfFloatPoint; import org.apache.lucene.search.IndexOrDocValuesQuery; -import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; @@ -47,6 +46,8 @@ import org.elasticsearch.index.fielddata.plain.SortedDoublesIndexFieldData; import org.elasticsearch.index.fielddata.plain.SortedNumericIndexFieldData; import org.elasticsearch.index.mapper.TimeSeriesParams.MetricType; import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.lucene.document.NumericField; +import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; import org.elasticsearch.script.DoubleFieldScript; import org.elasticsearch.script.LongFieldScript; import org.elasticsearch.script.Script; @@ -1146,7 +1147,7 @@ public class NumberFieldMapper extends FieldMapper { int v = parse(value, true); if (isIndexed && hasDocValues) { - return IntField.newExactQuery(field, v); + return NumericField.newExactIntQuery(field, v); } else if (isIndexed) { return IntPoint.newExactQuery(field, v); } else { @@ -1223,7 +1224,7 @@ public class NumberFieldMapper extends FieldMapper { query = SortedNumericDocValuesField.newSlowRangeQuery(field, l, u); } if (hasDocValues && context.indexSortedOnField(field)) { - query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query); + query = new XIndexSortSortedNumericDocValuesRangeQuery(field, l, u, query); } return query; } @@ -1330,7 +1331,7 @@ public class NumberFieldMapper extends FieldMapper { long v = parse(value, true); if (isIndexed && hasDocValues) { - return LongField.newExactQuery(field, v); + return NumericField.newExactLongQuery(field, v); } else if (isIndexed) { return LongPoint.newExactQuery(field, v); } else { @@ -1381,7 +1382,7 @@ public class NumberFieldMapper extends FieldMapper { query = SortedNumericDocValuesField.newSlowRangeQuery(field, l, u); } if (hasDocValues && context.indexSortedOnField(field)) { - query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query); + query = new XIndexSortSortedNumericDocValuesRangeQuery(field, l, u, query); } return query; }); diff --git a/server/src/main/java/org/elasticsearch/lucene/document/NumericField.java b/server/src/main/java/org/elasticsearch/lucene/document/NumericField.java new file mode 100644 index 000000000000..d60244767dd9 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/lucene/document/NumericField.java @@ -0,0 +1,52 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.lucene.document; + +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.search.IndexOrDocValuesQuery; +import org.apache.lucene.search.PointRangeQuery; +import org.apache.lucene.search.Query; +import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; + +public final class NumericField { + + private NumericField() { + // Utility class, no instantiation + } + + public static Query newExactLongQuery(String field, long value) { + return newRangeLongQuery(field, value, value); + } + + public static Query newRangeLongQuery(String field, long lowerValue, long upperValue) { + PointRangeQuery.checkArgs(field, lowerValue, upperValue); + Query fallbackQuery = new IndexOrDocValuesQuery( + LongPoint.newRangeQuery(field, lowerValue, upperValue), + SortedNumericDocValuesField.newSlowRangeQuery(field, lowerValue, upperValue) + ); + return new XIndexSortSortedNumericDocValuesRangeQuery(field, lowerValue, upperValue, fallbackQuery); + } + + public static Query newExactIntQuery(String field, int value) { + return newRangeIntQuery(field, value, value); + } + + public static Query newRangeIntQuery(String field, int lowerValue, int upperValue) { + PointRangeQuery.checkArgs(field, lowerValue, upperValue); + Query fallbackQuery = new IndexOrDocValuesQuery( + IntPoint.newRangeQuery(field, lowerValue, upperValue), + SortedNumericDocValuesField.newSlowRangeQuery(field, lowerValue, upperValue) + ); + return new XIndexSortSortedNumericDocValuesRangeQuery(field, lowerValue, upperValue, fallbackQuery); + } + +} diff --git a/server/src/main/java/org/elasticsearch/lucene/search/XIndexSortSortedNumericDocValuesRangeQuery.java b/server/src/main/java/org/elasticsearch/lucene/search/XIndexSortSortedNumericDocValuesRangeQuery.java new file mode 100644 index 000000000000..bc892a9c778e --- /dev/null +++ b/server/src/main/java/org/elasticsearch/lucene/search/XIndexSortSortedNumericDocValuesRangeQuery.java @@ -0,0 +1,699 @@ +/* + * @notice + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * Modifications copyright (C) 2025 Elasticsearch B.V. + */ + +package org.elasticsearch.lucene.search; + +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.PointValues.IntersectVisitor; +import org.apache.lucene.index.PointValues.PointTree; +import org.apache.lucene.index.PointValues.Relation; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.search.FieldExistsQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LeafFieldComparator; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Pruning; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.ScorerSupplier; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortField.Type; +import org.apache.lucene.search.SortedNumericSortField; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.ArrayUtil.ByteArrayComparator; +import org.apache.lucene.util.Version; + +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.Objects; + +/** + * copied from Lucene + */ +public class XIndexSortSortedNumericDocValuesRangeQuery extends Query { + + private final String field; + private final long lowerValue; + private final long upperValue; + private final Query fallbackQuery; + + /** + * Creates a new {@link XIndexSortSortedNumericDocValuesRangeQuery}. + * + * @param field The field name. + * @param lowerValue The lower end of the range (inclusive). + * @param upperValue The upper end of the range (exclusive). + * @param fallbackQuery A query to fall back to if the optimization cannot be applied. + */ + public XIndexSortSortedNumericDocValuesRangeQuery(String field, long lowerValue, long upperValue, Query fallbackQuery) { + // we should only have this while the apache Lucene version is 10.2 or earlier + assert Version.LATEST.major == 10 && Version.LATEST.minor <= 2 + : "This query should only be used with Lucene 10.2 or earlier, but got version: " + Version.LATEST; + this.field = Objects.requireNonNull(field); + this.lowerValue = lowerValue; + this.upperValue = upperValue; + this.fallbackQuery = fallbackQuery; + } + + public Query getFallbackQuery() { + return fallbackQuery; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + XIndexSortSortedNumericDocValuesRangeQuery that = (XIndexSortSortedNumericDocValuesRangeQuery) o; + return lowerValue == that.lowerValue + && upperValue == that.upperValue + && Objects.equals(field, that.field) + && Objects.equals(fallbackQuery, that.fallbackQuery); + } + + @Override + public int hashCode() { + return Objects.hash(field, lowerValue, upperValue, fallbackQuery); + } + + @Override + public void visit(QueryVisitor visitor) { + if (visitor.acceptField(field)) { + visitor.visitLeaf(this); + fallbackQuery.visit(visitor); + } + } + + @Override + public String toString(String field) { + StringBuilder b = new StringBuilder(); + if (this.field.equals(field) == false) { + b.append(this.field).append(":"); + } + return b.append("[").append(lowerValue).append(" TO ").append(upperValue).append("]").toString(); + } + + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + if (lowerValue == Long.MIN_VALUE && upperValue == Long.MAX_VALUE) { + return new FieldExistsQuery(field); + } + + Query rewrittenFallback = fallbackQuery.rewrite(indexSearcher); + if (rewrittenFallback.getClass() == MatchAllDocsQuery.class) { + return new MatchAllDocsQuery(); + } + if (rewrittenFallback == fallbackQuery) { + return this; + } else { + return new XIndexSortSortedNumericDocValuesRangeQuery(field, lowerValue, upperValue, rewrittenFallback); + } + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + Weight fallbackWeight = fallbackQuery.createWeight(searcher, scoreMode, boost); + + return new ConstantScoreWeight(this, boost) { + + @Override + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + IteratorAndCount itAndCount = getDocIdSetIteratorOrNull(context); + if (itAndCount != null) { + DocIdSetIterator disi = itAndCount.it; + return new ScorerSupplier() { + @Override + public Scorer get(long leadCost) throws IOException { + return new ConstantScoreScorer(score(), scoreMode, disi); + } + + @Override + public long cost() { + return disi.cost(); + } + }; + } + return fallbackWeight.scorerSupplier(context); + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + // Both queries should always return the same values, so we can just check + // if the fallback query is cacheable. + return fallbackWeight.isCacheable(ctx); + } + + @Override + public int count(LeafReaderContext context) throws IOException { + if (context.reader().hasDeletions() == false) { + if (lowerValue > upperValue) { + return 0; + } + IteratorAndCount itAndCount = null; + LeafReader reader = context.reader(); + + // first use bkd optimization if possible + SortedNumericDocValues sortedNumericValues = DocValues.getSortedNumeric(reader, field); + NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues); + PointValues pointValues = reader.getPointValues(field); + if (pointValues != null && pointValues.getDocCount() == reader.maxDoc()) { + itAndCount = getDocIdSetIteratorOrNullFromBkd(context, numericValues); + } + if (itAndCount != null && itAndCount.count != -1) { + return itAndCount.count; + } + + // use index sort optimization if possible + Sort indexSort = reader.getMetaData().sort(); + if (indexSort != null && indexSort.getSort().length > 0 && indexSort.getSort()[0].getField().equals(field)) { + final SortField sortField = indexSort.getSort()[0]; + final SortField.Type sortFieldType = getSortFieldType(sortField); + // The index sort optimization is only supported for Type.INT and Type.LONG + if (sortFieldType == Type.INT || sortFieldType == Type.LONG) { + Object missingValue = sortField.getMissingValue(); + // This is the fix + final long missingLongValue = missingValue == null ? 0L : ((Number) missingValue).longValue(); + // all documents have docValues or missing value falls outside the range + if ((pointValues != null && pointValues.getDocCount() == reader.maxDoc()) + || (missingLongValue < lowerValue || missingLongValue > upperValue)) { + itAndCount = getDocIdSetIterator(sortField, sortFieldType, context, numericValues); + } + if (itAndCount != null && itAndCount.count != -1) { + return itAndCount.count; + } + } + } + } + return fallbackWeight.count(context); + } + }; + } + + private static class ValueAndDoc { + byte[] value; + int docID; + boolean done; + } + + /** + * Move to the minimum leaf node that has at least one value that is greater than (or equal to if + * {@code allowEqual}) {@code value}, and return the next greater value on this block. Upon + * returning, the {@code pointTree} must be on the leaf node where the value was found. + */ + private static ValueAndDoc findNextValue( + PointTree pointTree, + byte[] value, + boolean allowEqual, + ByteArrayComparator comparator, + boolean lastDoc + ) throws IOException { + int cmp = comparator.compare(pointTree.getMaxPackedValue(), 0, value, 0); + if (cmp < 0 || (cmp == 0 && allowEqual == false)) { + return null; + } + if (pointTree.moveToChild() == false) { + ValueAndDoc vd = new ValueAndDoc(); + pointTree.visitDocValues(new IntersectVisitor() { + + @Override + public void visit(int docID, byte[] packedValue) throws IOException { + if (vd.value == null) { + int cmp = comparator.compare(packedValue, 0, value, 0); + if (cmp > 0 || (cmp == 0 && allowEqual)) { + vd.value = packedValue.clone(); + vd.docID = docID; + } + } else if (lastDoc && vd.done == false) { + int cmp = comparator.compare(packedValue, 0, vd.value, 0); + assert cmp >= 0; + if (cmp > 0) { + vd.done = true; + } else { + vd.docID = docID; + } + } + } + + @Override + public void visit(int docID) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + return Relation.CELL_CROSSES_QUERY; + } + }); + if (vd.value != null) { + return vd; + } else { + return null; + } + } + + // Recurse + do { + ValueAndDoc vd = findNextValue(pointTree, value, allowEqual, comparator, lastDoc); + if (vd != null) { + return vd; + } + } while (pointTree.moveToSibling()); + + boolean moved = pointTree.moveToParent(); + assert moved; + return null; + } + + /** + * Find the next value that is greater than (or equal to if {@code allowEqual}) and return either + * its first doc ID or last doc ID depending on {@code lastDoc}. This method returns -1 if there + * is no greater value in the dataset. + */ + private static int nextDoc(PointTree pointTree, byte[] value, boolean allowEqual, ByteArrayComparator comparator, boolean lastDoc) + throws IOException { + ValueAndDoc vd = findNextValue(pointTree, value, allowEqual, comparator, lastDoc); + if (vd == null) { + return -1; + } + if (lastDoc == false || vd.done) { + return vd.docID; + } + + // We found the next value, now we need the last doc ID. + int doc = lastDoc(pointTree, vd.value, comparator); + if (doc == -1) { + // vd.docID was actually the last doc ID + return vd.docID; + } else { + return doc; + } + } + + /** + * Compute the last doc ID that matches the given value and is stored on a leaf node that compares + * greater than the current leaf node that the provided {@link PointTree} is positioned on. This + * returns -1 if no other leaf node contains the provided {@code value}. + */ + private static int lastDoc(PointTree pointTree, byte[] value, ByteArrayComparator comparator) throws IOException { + // Create a stack of nodes that may contain value that we'll use to search for the last leaf + // node that contains `value`. + // While the logic looks a bit complicated due to the fact that the PointTree API doesn't allow + // moving back to previous siblings, this effectively performs a binary search. + Deque stack = new ArrayDeque<>(); + + outer: while (true) { + + // Move to the next node + while (pointTree.moveToSibling() == false) { + if (pointTree.moveToParent() == false) { + // No next node + break outer; + } + } + + int cmp = comparator.compare(pointTree.getMinPackedValue(), 0, value, 0); + if (cmp > 0) { + // This node doesn't have `value`, so next nodes can't either + break; + } + + stack.push(pointTree.clone()); + } + + while (stack.isEmpty() == false) { + PointTree next = stack.pop(); + if (next.moveToChild() == false) { + int[] lastDoc = { -1 }; + next.visitDocValues(new IntersectVisitor() { + + @Override + public void visit(int docID) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public void visit(int docID, byte[] packedValue) throws IOException { + int cmp = comparator.compare(value, 0, packedValue, 0); + if (cmp == 0) { + lastDoc[0] = docID; + } + } + + @Override + public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + return Relation.CELL_CROSSES_QUERY; + } + }); + if (lastDoc[0] != -1) { + return lastDoc[0]; + } + } else { + do { + int cmp = comparator.compare(next.getMinPackedValue(), 0, value, 0); + if (cmp > 0) { + // This node doesn't have `value`, so next nodes can't either + break; + } + stack.push(next.clone()); + } while (next.moveToSibling()); + } + } + + return -1; + } + + private boolean matchNone(PointValues points, byte[] queryLowerPoint, byte[] queryUpperPoint) throws IOException { + assert points.getNumDimensions() == 1; + final ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(points.getBytesPerDimension()); + return comparator.compare(points.getMinPackedValue(), 0, queryUpperPoint, 0) > 0 + || comparator.compare(points.getMaxPackedValue(), 0, queryLowerPoint, 0) < 0; + } + + private boolean matchAll(PointValues points, byte[] queryLowerPoint, byte[] queryUpperPoint) throws IOException { + assert points.getNumDimensions() == 1; + final ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(points.getBytesPerDimension()); + return comparator.compare(points.getMinPackedValue(), 0, queryLowerPoint, 0) >= 0 + && comparator.compare(points.getMaxPackedValue(), 0, queryUpperPoint, 0) <= 0; + } + + private IteratorAndCount getDocIdSetIteratorOrNullFromBkd(LeafReaderContext context, DocIdSetIterator delegate) throws IOException { + Sort indexSort = context.reader().getMetaData().sort(); + if (indexSort == null || indexSort.getSort().length == 0 || indexSort.getSort()[0].getField().equals(field) == false) { + return null; + } + + final boolean reverse = indexSort.getSort()[0].getReverse(); + + PointValues points = context.reader().getPointValues(field); + if (points == null) { + return null; + } + + if (points.getNumDimensions() != 1) { + return null; + } + + if (points.getBytesPerDimension() != Long.BYTES && points.getBytesPerDimension() != Integer.BYTES) { + return null; + } + + if (points.size() != points.getDocCount()) { + return null; + } + + assert lowerValue <= upperValue; + byte[] queryLowerPoint; + byte[] queryUpperPoint; + if (points.getBytesPerDimension() == Integer.BYTES) { + queryLowerPoint = IntPoint.pack((int) lowerValue).bytes; + queryUpperPoint = IntPoint.pack((int) upperValue).bytes; + } else { + queryLowerPoint = LongPoint.pack(lowerValue).bytes; + queryUpperPoint = LongPoint.pack(upperValue).bytes; + } + if (matchNone(points, queryLowerPoint, queryUpperPoint)) { + return IteratorAndCount.empty(); + } + if (matchAll(points, queryLowerPoint, queryUpperPoint)) { + int maxDoc = context.reader().maxDoc(); + if (points.getDocCount() == maxDoc) { + return IteratorAndCount.all(maxDoc); + } else { + return IteratorAndCount.sparseRange(0, maxDoc, delegate); + } + } + + int minDocId, maxDocId; + final ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(points.getBytesPerDimension()); + + if (reverse) { + minDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, true) + 1; + } else { + minDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, false); + if (minDocId == -1) { + // No matches + return IteratorAndCount.empty(); + } + } + + if (reverse) { + maxDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, true) + 1; + if (maxDocId == 0) { + // No matches + return IteratorAndCount.empty(); + } + } else { + maxDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, false); + if (maxDocId == -1) { + maxDocId = context.reader().maxDoc(); + } + } + + if (minDocId == maxDocId) { + return IteratorAndCount.empty(); + } + + if ((points.getDocCount() == context.reader().maxDoc())) { + return IteratorAndCount.denseRange(minDocId, maxDocId); + } else { + return IteratorAndCount.sparseRange(minDocId, maxDocId, delegate); + } + } + + private IteratorAndCount getDocIdSetIteratorOrNull(LeafReaderContext context) throws IOException { + if (lowerValue > upperValue) { + return IteratorAndCount.empty(); + } + + SortedNumericDocValues sortedNumericValues = DocValues.getSortedNumeric(context.reader(), field); + NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues); + if (numericValues != null) { + IteratorAndCount itAndCount = getDocIdSetIteratorOrNullFromBkd(context, numericValues); + if (itAndCount != null) { + return itAndCount; + } + Sort indexSort = context.reader().getMetaData().sort(); + if (indexSort != null && indexSort.getSort().length > 0 && indexSort.getSort()[0].getField().equals(field)) { + + final SortField sortField = indexSort.getSort()[0]; + final SortField.Type sortFieldType = getSortFieldType(sortField); + // The index sort optimization is only supported for Type.INT and Type.LONG + if (sortFieldType == Type.INT || sortFieldType == Type.LONG) { + return getDocIdSetIterator(sortField, sortFieldType, context, numericValues); + } + } + } + return null; + } + + /** + * Computes the document IDs that lie within the range [lowerValue, upperValue] by performing + * binary search on the field's doc values. + * + *

Because doc values only allow forward iteration, we need to reload the field comparator + * every time the binary search accesses an earlier element. + * + *

We must also account for missing values when performing the binary search. For this reason, + * we load the {@link FieldComparator} instead of checking the docvalues directly. The returned + * {@link DocIdSetIterator} makes sure to wrap the original docvalues to skip over documents with + * no value. + */ + private IteratorAndCount getDocIdSetIterator( + SortField sortField, + SortField.Type sortFieldType, + LeafReaderContext context, + DocIdSetIterator delegate + ) throws IOException { + long lower = sortField.getReverse() ? upperValue : lowerValue; + long upper = sortField.getReverse() ? lowerValue : upperValue; + int maxDoc = context.reader().maxDoc(); + + // Perform a binary search to find the first document with value >= lower. + ValueComparator comparator = loadComparator(sortField, sortFieldType, lower, context); + int low = 0; + int high = maxDoc - 1; + + while (low <= high) { + int mid = (low + high) >>> 1; + if (comparator.compare(mid) <= 0) { + high = mid - 1; + comparator = loadComparator(sortField, sortFieldType, lower, context); + } else { + low = mid + 1; + } + } + int firstDocIdInclusive = high + 1; + + // Perform a binary search to find the first document with value > upper. + // Since we know that upper >= lower, we can initialize the lower bound + // of the binary search to the result of the previous search. + comparator = loadComparator(sortField, sortFieldType, upper, context); + low = firstDocIdInclusive; + high = maxDoc - 1; + + while (low <= high) { + int mid = (low + high) >>> 1; + if (comparator.compare(mid) < 0) { + high = mid - 1; + comparator = loadComparator(sortField, sortFieldType, upper, context); + } else { + low = mid + 1; + } + } + + int lastDocIdExclusive = high + 1; + + if (firstDocIdInclusive == lastDocIdExclusive) { + return IteratorAndCount.empty(); + } + + Object missingValue = sortField.getMissingValue(); + LeafReader reader = context.reader(); + PointValues pointValues = reader.getPointValues(field); + // this is the fix + final long missingLongValue = missingValue == null ? 0L : ((Number) missingValue).longValue(); + // all documents have docValues or missing value falls outside the range + if ((pointValues != null && pointValues.getDocCount() == reader.maxDoc()) + || (missingLongValue < lowerValue || missingLongValue > upperValue)) { + return IteratorAndCount.denseRange(firstDocIdInclusive, lastDocIdExclusive); + } else { + return IteratorAndCount.sparseRange(firstDocIdInclusive, lastDocIdExclusive, delegate); + } + } + + /** Compares the given document's value with a stored reference value. */ + private interface ValueComparator { + int compare(int docID) throws IOException; + } + + private static ValueComparator loadComparator(SortField sortField, SortField.Type type, long topValue, LeafReaderContext context) + throws IOException { + @SuppressWarnings("unchecked") + FieldComparator fieldComparator = (FieldComparator) sortField.getComparator(1, Pruning.NONE); + if (type == Type.INT) { + fieldComparator.setTopValue((int) topValue); + } else { + // Since we support only Type.INT and Type.LONG, assuming LONG for all other cases + fieldComparator.setTopValue(topValue); + } + + LeafFieldComparator leafFieldComparator = fieldComparator.getLeafComparator(context); + int direction = sortField.getReverse() ? -1 : 1; + + return doc -> { + int value = leafFieldComparator.compareTop(doc); + return direction * value; + }; + } + + private static SortField.Type getSortFieldType(SortField sortField) { + // We expect the sortField to be SortedNumericSortField + if (sortField instanceof SortedNumericSortField) { + return ((SortedNumericSortField) sortField).getNumericType(); + } else { + return sortField.getType(); + } + } + + /** + * Provides a {@code DocIdSetIterator} along with an accurate count of documents provided by the + * iterator (or {@code -1} if an accurate count is unknown). + */ + private record IteratorAndCount(DocIdSetIterator it, int count) { + + static IteratorAndCount empty() { + return new IteratorAndCount(DocIdSetIterator.empty(), 0); + } + + static IteratorAndCount all(int maxDoc) { + return new IteratorAndCount(DocIdSetIterator.all(maxDoc), maxDoc); + } + + static IteratorAndCount denseRange(int minDoc, int maxDoc) { + return new IteratorAndCount(DocIdSetIterator.range(minDoc, maxDoc), maxDoc - minDoc); + } + + static IteratorAndCount sparseRange(int minDoc, int maxDoc, DocIdSetIterator delegate) { + return new IteratorAndCount(new BoundedDocIdSetIterator(minDoc, maxDoc, delegate), -1); + } + } + + /** + * A doc ID set iterator that wraps a delegate iterator and only returns doc IDs in the range + * [firstDocInclusive, lastDoc). + */ + private static class BoundedDocIdSetIterator extends DocIdSetIterator { + private final int firstDoc; + private final int lastDoc; + private final DocIdSetIterator delegate; + + private int docID = -1; + + BoundedDocIdSetIterator(int firstDoc, int lastDoc, DocIdSetIterator delegate) { + assert delegate != null; + this.firstDoc = firstDoc; + this.lastDoc = lastDoc; + this.delegate = delegate; + } + + @Override + public int docID() { + return docID; + } + + @Override + public int nextDoc() throws IOException { + return advance(docID + 1); + } + + @Override + public int advance(int target) throws IOException { + if (target < firstDoc) { + target = firstDoc; + } + + int result = delegate.advance(target); + if (result < lastDoc) { + docID = result; + } else { + docID = NO_MORE_DOCS; + } + return docID; + } + + @Override + public long cost() { + return Math.min(delegate.cost(), lastDoc - firstDoc); + } + } +} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java index e8e33655d47c..33403c87a27b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java @@ -17,7 +17,6 @@ import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointRangeQuery; @@ -28,6 +27,7 @@ import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.xcontent.XContentBuilder; @@ -160,8 +160,8 @@ public class QueryToFilterAdapter { query = ((ConstantScoreQuery) query).getQuery(); continue; } - if (query instanceof IndexSortSortedNumericDocValuesRangeQuery) { - query = ((IndexSortSortedNumericDocValuesRangeQuery) query).getFallbackQuery(); + if (query instanceof XIndexSortSortedNumericDocValuesRangeQuery) { + query = ((XIndexSortSortedNumericDocValuesRangeQuery) query).getFallbackQuery(); continue; } if (query instanceof IndexOrDocValuesQuery) { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java index 16c2c5ca5ddb..66c324563d33 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java @@ -20,7 +20,6 @@ import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexOrDocValuesQuery; -import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.elasticsearch.ElasticsearchParseException; @@ -42,6 +41,7 @@ import org.elasticsearch.index.query.DateRangeIncludingNowQuery; import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.query.SearchExecutionContextHelper; +import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; import org.elasticsearch.script.field.DateNanosDocValuesField; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; @@ -374,7 +374,7 @@ public class DateFieldTypeTests extends FieldTypeTestCase { Query pointQuery = LongPoint.newRangeQuery("field", instant1, instant2); Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2); - Query expected = new IndexSortSortedNumericDocValuesRangeQuery( + Query expected = new XIndexSortSortedNumericDocValuesRangeQuery( "field", instant1, instant2, @@ -383,7 +383,7 @@ public class DateFieldTypeTests extends FieldTypeTestCase { assertEquals(expected, ft.rangeQuery(date1, date2, true, true, null, null, null, context)); ft = new DateFieldType("field", false); - expected = new IndexSortSortedNumericDocValuesRangeQuery("field", instant1, instant2, dvQuery); + expected = new XIndexSortSortedNumericDocValuesRangeQuery("field", instant1, instant2, dvQuery); assertEquals(expected, ft.rangeQuery(date1, date2, true, true, null, null, null, context)); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/NumberFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/NumberFieldTypeTests.java index 048e292430c5..bd06177e17b9 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/NumberFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/NumberFieldTypeTests.java @@ -16,9 +16,7 @@ import org.apache.lucene.document.DoubleField; import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.FloatField; import org.apache.lucene.document.FloatPoint; -import org.apache.lucene.document.IntField; import org.apache.lucene.document.IntPoint; -import org.apache.lucene.document.LongField; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.DirectoryReader; @@ -27,7 +25,6 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.sandbox.document.HalfFloatPoint; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.Sort; @@ -49,6 +46,8 @@ import org.elasticsearch.index.mapper.NumberFieldMapper.NumberFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.query.SearchExecutionContextHelper; +import org.elasticsearch.lucene.document.NumericField; +import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; import org.elasticsearch.script.ScriptCompiler; import org.elasticsearch.search.MultiValueMode; import org.elasticsearch.xcontent.XContentBuilder; @@ -158,7 +157,7 @@ public class NumberFieldTypeTests extends FieldTypeTestCase { public void testTermQuery() { Query[] expectedIntegerQueries = new Query[] { - IntField.newExactQuery("field", 42), + NumericField.newExactIntQuery("field", 42), IntPoint.newExactQuery("field", 42), SortedNumericDocValuesField.newSlowExactQuery("field", 42) }; List testCases = List.of( @@ -168,7 +167,7 @@ public class NumberFieldTypeTests extends FieldTypeTestCase { new TermQueryTestCase( NumberType.LONG, new Query[] { - LongField.newExactQuery("field", 42), + NumericField.newExactLongQuery("field", 42), LongPoint.newExactQuery("field", 42), SortedNumericDocValuesField.newSlowExactQuery("field", 42) } ), @@ -867,8 +866,8 @@ public class NumberFieldTypeTests extends FieldTypeTestCase { context, isIndexed ); - assertThat(query, instanceOf(IndexSortSortedNumericDocValuesRangeQuery.class)); - Query fallbackQuery = ((IndexSortSortedNumericDocValuesRangeQuery) query).getFallbackQuery(); + assertThat(query, instanceOf(XIndexSortSortedNumericDocValuesRangeQuery.class)); + Query fallbackQuery = ((XIndexSortSortedNumericDocValuesRangeQuery) query).getFallbackQuery(); if (isIndexed) { assertThat(fallbackQuery, instanceOf(IndexOrDocValuesQuery.class)); diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchPhraseQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchPhraseQueryBuilderTests.java index 3532751359cf..d02c2ce29bfa 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MatchPhraseQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MatchPhraseQueryBuilderTests.java @@ -11,7 +11,6 @@ package org.elasticsearch.index.query; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexOrDocValuesQuery; -import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PhraseQuery; @@ -21,6 +20,7 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.ParsingException; import org.elasticsearch.core.Strings; +import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; import org.elasticsearch.test.AbstractQueryTestCase; import java.io.IOException; @@ -107,7 +107,7 @@ public class MatchPhraseQueryBuilderTests extends AbstractQueryTestCase